hw/scsi/lsi53c895a: Fix reentrancy issues in the LSI controller (CVE-2023-0330)

Git-commit: b987718bbb References: bsc#1207205 (CVE-2023-0330) We cannot use the generic reentrancy guard in the LSI code, so we have to manually prevent endless reentrancy here. The problematic lsi_execute_script() function has already a way to detect whether too many instructions have been executed - we just have to slightly change the logic here that it also takes into account if the function has been called too often in a reentrant way. The code in fuzz-lsi53c895a-test.c has been taken from an earlier patch by Mauro Matteo Cascella. Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1563 Message-Id: <20230522091011.1082574-1-thuth@redhat.com> Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> Reviewed-by: Alexander Bulekov <alxndr@bu.edu> Signed-off-by: Thomas Huth <thuth@redhat.com> Signed-off-by: Dario Faggioli <dfaggioli@suse.com>
virtio-crypto: verify src&dst buffer length for sym request
2023-10-20 16:02:13 +02:00 · 2023-10-20 15:56:38 +02:00 · 2023-10-20 15:50:22 +02:00 · 2023-10-20 15:47:57 +02:00 · 2023-10-20 15:46:57 +02:00 · 2023-03-08 09:59:28 +01:00
369 changed files with 11438 additions and 2835 deletions
--- a/3
+++ b/3
@@ -957,6 +957,7 @@ M: Paolo Bonzini <pbonzini@redhat.com>
 S: Supported
 F: include/hw/scsi/*
 F: hw/scsi/*
+F: tests/scsi-disk-test.c
 F: tests/virtio-scsi-test.c
 T: git git://github.com/bonzini/qemu.git scsi-next

@@ -1395,6 +1396,7 @@ S: Supported
 F: qobject/
 F: include/qapi/qmp/
 X: include/qapi/qmp/dispatch.h
+F: scripts/coccinelle/qobject.cocci
 F: tests/check-qdict.c
 F: tests/check-qfloat.c
 F: tests/check-qint.c
@@ -1419,6 +1421,7 @@ F: qom/
 X: qom/cpu.c
 F: tests/check-qom-interface.c
 F: tests/check-qom-proplist.c
+F: tests/check-qom-props.c
 F: tests/qom-test.c

 QMP
--- a/Makefile.target
+++ b/Makefile.target
@@ -36,6 +36,10 @@ endif
 PROGS=$(QEMU_PROG) $(QEMU_PROGW)
 STPFILES=

+ifdef CONFIG_LINUX_USER
+PROGS+=$(QEMU_PROG)-binfmt
+endif
+
 config-target.h: config-target.h-timestamp
 config-target.h-timestamp: config-target.mak

@@ -121,6 +125,8 @@ QEMU_CFLAGS+=-I$(SRC_PATH)/linux-user/$(TARGET_ABI_DIR) \
 obj-y += linux-user/
 obj-y += gdbstub.o thunk.o user-exec.o user-exec-stub.o

+obj-binfmt-y += linux-user/
+
 endif #CONFIG_LINUX_USER

 #########################################################
@@ -169,7 +175,11 @@ endif # CONFIG_SOFTMMU
 # Workaround for http://gcc.gnu.org/PR55489, see configure.
 %/translate.o: QEMU_CFLAGS += $(TRANSLATE_OPT_CFLAGS)

+ifdef CONFIG_LINUX_USER
+dummy := $(call unnest-vars,,obj-y obj-binfmt-y)
+else
 dummy := $(call unnest-vars,,obj-y)
+endif
 all-obj-y := $(obj-y)

 target-obj-y :=
@@ -211,6 +221,9 @@ ifdef CONFIG_DARWIN
 	$(call quiet-command,SetFile -a C $@,"SETFILE","$(TARGET_DIR)$@")
 endif

+$(QEMU_PROG)-binfmt: $(obj-binfmt-y)
+	$(call LINK,$^)
+
 gdbstub-xml.c: $(TARGET_XML_FILES) $(SRC_PATH)/scripts/feature_to_c.sh
 	$(call quiet-command,rm -f $@ && $(SHELL) $(SRC_PATH)/scripts/feature_to_c.sh $@ $(TARGET_XML_FILES),"GEN","$(TARGET_DIR)$@")

--- a/2
+++ b/2
@@ -1 +1 @@
-2.9.0
+2.9.1
--- a/audio/audio.c
+++ b/audio/audio.c
@@ -2028,6 +2028,8 @@ void AUD_del_capture (CaptureVoiceOut *cap, void *cb_opaque)
                    sw = sw1;
                }
                QLIST_REMOVE (cap, entries);
+                g_free (cap->hw.mix_buf);
+                g_free (cap->buf);
                g_free (cap);
            }
            return;
--- a/block.c
+++ b/block.c
@@ -937,16 +937,14 @@ static void update_flags_from_options(int *flags, QemuOpts *opts)
 static void update_options_from_flags(QDict *options, int flags)
 {
    if (!qdict_haskey(options, BDRV_OPT_CACHE_DIRECT)) {
-        qdict_put(options, BDRV_OPT_CACHE_DIRECT,
-                  qbool_from_bool(flags & BDRV_O_NOCACHE));
+        qdict_put_bool(options, BDRV_OPT_CACHE_DIRECT, flags & BDRV_O_NOCACHE);
    }
    if (!qdict_haskey(options, BDRV_OPT_CACHE_NO_FLUSH)) {
-        qdict_put(options, BDRV_OPT_CACHE_NO_FLUSH,
-                  qbool_from_bool(flags & BDRV_O_NO_FLUSH));
+        qdict_put_bool(options, BDRV_OPT_CACHE_NO_FLUSH,
+                       flags & BDRV_O_NO_FLUSH);
    }
    if (!qdict_haskey(options, BDRV_OPT_READ_ONLY)) {
-        qdict_put(options, BDRV_OPT_READ_ONLY,
-                  qbool_from_bool(!(flags & BDRV_O_RDWR)));
+        qdict_put_bool(options, BDRV_OPT_READ_ONLY, !(flags & BDRV_O_RDWR));
    }
 }

@@ -1167,7 +1165,7 @@ static int bdrv_open_common(BlockDriverState *bs, BlockBackend *file,
        filename = qdict_get_try_str(options, "filename");
    }

-    if (drv->bdrv_needs_filename && !filename) {
+    if (drv->bdrv_needs_filename && (!filename || !filename[0])) {
        error_setg(errp, "The '%s' block driver requires a file name",
                   drv->format_name);
        ret = -EINVAL;
@@ -1362,7 +1360,7 @@ static int bdrv_fill_options(QDict **options, const char *filename,
    /* Fetch the file name from the options QDict if necessary */
    if (protocol && filename) {
        if (!qdict_haskey(*options, "filename")) {
-            qdict_put(*options, "filename", qstring_from_str(filename));
+            qdict_put_str(*options, "filename", filename);
            parse_filename = true;
        } else {
            error_setg(errp, "Can't specify 'file' and 'filename' options at "
@@ -1383,7 +1381,7 @@ static int bdrv_fill_options(QDict **options, const char *filename,
            }

            drvname = drv->format_name;
-            qdict_put(*options, "driver", qstring_from_str(drvname));
+            qdict_put_str(*options, "driver", drvname);
        } else {
            error_setg(errp, "Must specify either driver or file");
            return -EINVAL;
@@ -2038,7 +2036,7 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,
    }

    if (bs->backing_format[0] != '\0' && !qdict_haskey(options, "driver")) {
-        qdict_put(options, "driver", qstring_from_str(bs->backing_format));
+        qdict_put_str(options, "driver", bs->backing_format);
    }

    backing_hd = bdrv_open_inherit(*backing_filename ? backing_filename : NULL,
@@ -2193,12 +2191,9 @@ static BlockDriverState *bdrv_append_temp_snapshot(BlockDriverState *bs,
    }

    /* Prepare options QDict for the temporary file */
-    qdict_put(snapshot_options, "file.driver",
-              qstring_from_str("file"));
-    qdict_put(snapshot_options, "file.filename",
-              qstring_from_str(tmp_filename));
-    qdict_put(snapshot_options, "driver",
-              qstring_from_str("qcow2"));
+    qdict_put_str(snapshot_options, "file.driver", "file");
+    qdict_put_str(snapshot_options, "file.filename", tmp_filename);
+    qdict_put_str(snapshot_options, "driver", "qcow2");

    bs_snapshot = bdrv_open(NULL, NULL, snapshot_options, flags, errp);
    snapshot_options = NULL;
@@ -2373,8 +2368,7 @@ static BlockDriverState *bdrv_open_inherit(const char *filename,
                goto fail;
            }

-            qdict_put(options, "file",
-                      qstring_from_str(bdrv_get_node_name(file_bs)));
+            qdict_put_str(options, "file", bdrv_get_node_name(file_bs));
        }
    }

@@ -2396,8 +2390,8 @@ static BlockDriverState *bdrv_open_inherit(const char *filename,
         * sure to update both bs->options (which has the full effective
         * options for bs) and options (which has file.* already removed).
         */
-        qdict_put(bs->options, "driver", qstring_from_str(drv->format_name));
-        qdict_put(options, "driver", qstring_from_str(drv->format_name));
+        qdict_put_str(bs->options, "driver", drv->format_name);
+        qdict_put_str(options, "driver", drv->format_name);
    } else if (!drv) {
        error_setg(errp, "Must specify either driver or file");
        goto fail;
@@ -2772,12 +2766,12 @@ int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
     * that they are checked at the end of this function. */
    value = qemu_opt_get(opts, "node-name");
    if (value) {
-        qdict_put(reopen_state->options, "node-name", qstring_from_str(value));
+        qdict_put_str(reopen_state->options, "node-name", value);
    }

    value = qemu_opt_get(opts, "driver");
    if (value) {
-        qdict_put(reopen_state->options, "driver", qstring_from_str(value));
+        qdict_put_str(reopen_state->options, "driver", value);
    }

    /* if we are to stay read-only, do not allow permission change
@@ -3268,7 +3262,7 @@ exit:
 /**
 * Truncate file to 'offset' bytes (needed only for file protocols)
 */
-int bdrv_truncate(BdrvChild *child, int64_t offset)
+int bdrv_truncate(BdrvChild *child, int64_t offset, Error **errp)
 {
    BlockDriverState *bs = child->bs;
    BlockDriver *drv = bs->drv;
@@ -3280,12 +3274,18 @@ int bdrv_truncate(BdrvChild *child, int64_t offset)
     *        cannot assert this permission in that case. */
    // assert(child->perm & BLK_PERM_RESIZE);

-    if (!drv)
+    if (!drv) {
+        error_setg(errp, "No medium inserted");
        return -ENOMEDIUM;
-    if (!drv->bdrv_truncate)
+    }
+    if (!drv->bdrv_truncate) {
+        error_setg(errp, "Image format driver does not support resize");
        return -ENOTSUP;
-    if (bs->read_only)
+    }
+    if (bs->read_only) {
+        error_setg(errp, "Image is read-only");
        return -EACCES;
+    }

    ret = drv->bdrv_truncate(bs, offset);
    if (ret == 0) {
@@ -3293,6 +3293,8 @@ int bdrv_truncate(BdrvChild *child, int64_t offset)
        bdrv_dirty_bitmap_truncate(bs);
        bdrv_parent_cb_resize(bs);
        ++bs->write_gen;
+    } else {
+        error_setg_errno(errp, -ret, "Failed to resize image");
    }
    return ret;
 }
@@ -3861,19 +3863,6 @@ BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
    return retval;
 }

-int bdrv_get_backing_file_depth(BlockDriverState *bs)
-{
-    if (!bs->drv) {
-        return 0;
-    }
-
-    if (!bs->backing) {
-        return 0;
-    }
-
-    return 1 + bdrv_get_backing_file_depth(bs->backing->bs);
-}
-
 void bdrv_init(void)
 {
    module_call_init(MODULE_INIT_BLOCK);
@@ -4268,8 +4257,7 @@ void bdrv_img_create(const char *filename, const char *fmt,

            if (backing_fmt) {
                backing_options = qdict_new();
-                qdict_put(backing_options, "driver",
-                          qstring_from_str(backing_fmt));
+                qdict_put_str(backing_options, "driver", backing_fmt);
            }

            bs = bdrv_open(full_backing, NULL, backing_options, back_flags,
@@ -4674,11 +4662,9 @@ void bdrv_refresh_filename(BlockDriverState *bs)
         * contain a representation of the filename, therefore the following
         * suffices without querying the (exact_)filename of this BDS. */
        if (bs->file->bs->full_open_options) {
-            qdict_put_obj(opts, "driver",
-                          QOBJECT(qstring_from_str(drv->format_name)));
+            qdict_put_str(opts, "driver", drv->format_name);
            QINCREF(bs->file->bs->full_open_options);
-            qdict_put_obj(opts, "file",
-                          QOBJECT(bs->file->bs->full_open_options));
+            qdict_put(opts, "file", bs->file->bs->full_open_options);

            bs->full_open_options = opts;
        } else {
@@ -4694,8 +4680,7 @@ void bdrv_refresh_filename(BlockDriverState *bs)

        opts = qdict_new();
        append_open_options(opts, bs);
-        qdict_put_obj(opts, "driver",
-                      QOBJECT(qstring_from_str(drv->format_name)));
+        qdict_put_str(opts, "driver", drv->format_name);

        if (bs->exact_filename[0]) {
            /* This may not work for all block protocol drivers (some may
@@ -4705,8 +4690,7 @@ void bdrv_refresh_filename(BlockDriverState *bs)
             * needs some special format of the options QDict, it needs to
             * implement the driver-specific bdrv_refresh_filename() function.
             */
-            qdict_put_obj(opts, "filename",
-                          QOBJECT(qstring_from_str(bs->exact_filename)));
+            qdict_put_str(opts, "filename", bs->exact_filename);
        }

        bs->full_open_options = opts;
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -21,6 +21,8 @@ block-obj-$(CONFIG_RBD) += rbd.o
 block-obj-$(CONFIG_GLUSTERFS) += gluster.o
 block-obj-$(CONFIG_LIBSSH2) += ssh.o
 block-obj-y += accounting.o dirty-bitmap.o
+block-obj-y += dictzip.o
+block-obj-y += tar.o
 block-obj-y += write-threshold.o
 block-obj-y += backup.o
 block-obj-$(CONFIG_REPLICATION) += replication.o
--- a/block/blkdebug.c
+++ b/block/blkdebug.c
@@ -1,6 +1,7 @@
 /*
 * Block protocol for I/O error injection
 *
+ * Copyright (C) 2016-2017 Red Hat, Inc.
 * Copyright (c) 2010 Kevin Wolf <kwolf@redhat.com>
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
@@ -37,7 +38,12 @@
 typedef struct BDRVBlkdebugState {
    int state;
    int new_state;
-    int align;
+    uint64_t align;
+    uint64_t max_transfer;
+    uint64_t opt_write_zero;
+    uint64_t max_write_zero;
+    uint64_t opt_discard;
+    uint64_t max_discard;

    /* For blkdebug_refresh_filename() */
    char *config_file;
@@ -301,7 +307,7 @@ static void blkdebug_parse_filename(const char *filename, QDict *options,
    if (!strstart(filename, "blkdebug:", &filename)) {
        /* There was no prefix; therefore, all options have to be already
           present in the QDict (except for the filename) */
-        qdict_put(options, "x-image", qstring_from_str(filename));
+        qdict_put_str(options, "x-image", filename);
        return;
    }

@@ -320,7 +326,7 @@ static void blkdebug_parse_filename(const char *filename, QDict *options,

    /* TODO Allow multi-level nesting and set file.filename here */
    filename = c + 1;
-    qdict_put(options, "x-image", qstring_from_str(filename));
+    qdict_put_str(options, "x-image", filename);
 }

 static QemuOptsList runtime_opts = {
@@ -342,6 +348,31 @@ static QemuOptsList runtime_opts = {
            .type = QEMU_OPT_SIZE,
            .help = "Required alignment in bytes",
        },
+        {
+            .name = "max-transfer",
+            .type = QEMU_OPT_SIZE,
+            .help = "Maximum transfer size in bytes",
+        },
+        {
+            .name = "opt-write-zero",
+            .type = QEMU_OPT_SIZE,
+            .help = "Optimum write zero alignment in bytes",
+        },
+        {
+            .name = "max-write-zero",
+            .type = QEMU_OPT_SIZE,
+            .help = "Maximum write zero size in bytes",
+        },
+        {
+            .name = "opt-discard",
+            .type = QEMU_OPT_SIZE,
+            .help = "Optimum discard alignment in bytes",
+        },
+        {
+            .name = "max-discard",
+            .type = QEMU_OPT_SIZE,
+            .help = "Maximum discard size in bytes",
+        },
        { /* end of list */ }
    },
 };
@@ -352,8 +383,8 @@ static int blkdebug_open(BlockDriverState *bs, QDict *options, int flags,
    BDRVBlkdebugState *s = bs->opaque;
    QemuOpts *opts;
    Error *local_err = NULL;
-    uint64_t align;
    int ret;
+    uint64_t align;

    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
    qemu_opts_absorb_qdict(opts, options, &local_err);
@@ -382,21 +413,69 @@ static int blkdebug_open(BlockDriverState *bs, QDict *options, int flags,
        goto out;
    }

-    /* Set request alignment */
-    align = qemu_opt_get_size(opts, "align", 0);
-    if (align < INT_MAX && is_power_of_2(align)) {
-        s->align = align;
-    } else if (align) {
-        error_setg(errp, "Invalid alignment");
-        ret = -EINVAL;
-        goto fail_unref;
+    bs->supported_write_flags = BDRV_REQ_FUA &
+        bs->file->bs->supported_write_flags;
+    bs->supported_zero_flags = (BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP) &
+        bs->file->bs->supported_zero_flags;
+    ret = -EINVAL;
+
+    /* Set alignment overrides */
+    s->align = qemu_opt_get_size(opts, "align", 0);
+    if (s->align && (s->align >= INT_MAX || !is_power_of_2(s->align))) {
+        error_setg(errp, "Cannot meet constraints with align %" PRIu64,
+                   s->align);
+        goto out;
+    }
+    align = MAX(s->align, bs->file->bs->bl.request_alignment);
+
+    s->max_transfer = qemu_opt_get_size(opts, "max-transfer", 0);
+    if (s->max_transfer &&
+        (s->max_transfer >= INT_MAX ||
+         !QEMU_IS_ALIGNED(s->max_transfer, align))) {
+        error_setg(errp, "Cannot meet constraints with max-transfer %" PRIu64,
+                   s->max_transfer);
+        goto out;
+    }
+
+    s->opt_write_zero = qemu_opt_get_size(opts, "opt-write-zero", 0);
+    if (s->opt_write_zero &&
+        (s->opt_write_zero >= INT_MAX ||
+         !QEMU_IS_ALIGNED(s->opt_write_zero, align))) {
+        error_setg(errp, "Cannot meet constraints with opt-write-zero %" PRIu64,
+                   s->opt_write_zero);
+        goto out;
+    }
+
+    s->max_write_zero = qemu_opt_get_size(opts, "max-write-zero", 0);
+    if (s->max_write_zero &&
+        (s->max_write_zero >= INT_MAX ||
+         !QEMU_IS_ALIGNED(s->max_write_zero,
+                          MAX(s->opt_write_zero, align)))) {
+        error_setg(errp, "Cannot meet constraints with max-write-zero %" PRIu64,
+                   s->max_write_zero);
+        goto out;
+    }
+
+    s->opt_discard = qemu_opt_get_size(opts, "opt-discard", 0);
+    if (s->opt_discard &&
+        (s->opt_discard >= INT_MAX ||
+         !QEMU_IS_ALIGNED(s->opt_discard, align))) {
+        error_setg(errp, "Cannot meet constraints with opt-discard %" PRIu64,
+                   s->opt_discard);
+        goto out;
+    }
+
+    s->max_discard = qemu_opt_get_size(opts, "max-discard", 0);
+    if (s->max_discard &&
+        (s->max_discard >= INT_MAX ||
+         !QEMU_IS_ALIGNED(s->max_discard,
+                          MAX(s->opt_discard, align)))) {
+        error_setg(errp, "Cannot meet constraints with max-discard %" PRIu64,
+                   s->max_discard);
+        goto out;
    }

    ret = 0;
-    goto out;
-
-fail_unref:
-    bdrv_unref_child(bs, bs->file);
 out:
    if (ret < 0) {
        g_free(s->config_file);
@@ -405,11 +484,30 @@ out:
    return ret;
 }

-static int inject_error(BlockDriverState *bs, BlkdebugRule *rule)
+static int rule_check(BlockDriverState *bs, uint64_t offset, uint64_t bytes)
 {
    BDRVBlkdebugState *s = bs->opaque;
-    int error = rule->options.inject.error;
-    bool immediately = rule->options.inject.immediately;
+    BlkdebugRule *rule = NULL;
+    int error;
+    bool immediately;
+
+    QSIMPLEQ_FOREACH(rule, &s->active_rules, active_next) {
+        uint64_t inject_offset = rule->options.inject.offset;
+
+        if (inject_offset == -1 ||
+            (bytes && inject_offset >= offset &&
+             inject_offset < offset + bytes))
+        {
+            break;
+        }
+    }
+
+    if (!rule || !rule->options.inject.error) {
+        return 0;
+    }
+
+    immediately = rule->options.inject.immediately;
+    error = rule->options.inject.error;

    if (rule->options.inject.once) {
        QSIMPLEQ_REMOVE(&s->active_rules, rule, BlkdebugRule, active_next);
@@ -428,21 +526,18 @@ static int coroutine_fn
 blkdebug_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
                   QEMUIOVector *qiov, int flags)
 {
-    BDRVBlkdebugState *s = bs->opaque;
-    BlkdebugRule *rule = NULL;
+    int err;

-    QSIMPLEQ_FOREACH(rule, &s->active_rules, active_next) {
-        uint64_t inject_offset = rule->options.inject.offset;
-
-        if (inject_offset == -1 ||
-            (inject_offset >= offset && inject_offset < offset + bytes))
-        {
-            break;
-        }
+    /* Sanity check block layer guarantees */
+    assert(QEMU_IS_ALIGNED(offset, bs->bl.request_alignment));
+    assert(QEMU_IS_ALIGNED(bytes, bs->bl.request_alignment));
+    if (bs->bl.max_transfer) {
+        assert(bytes <= bs->bl.max_transfer);
    }

-    if (rule && rule->options.inject.error) {
-        return inject_error(bs, rule);
+    err = rule_check(bs, offset, bytes);
+    if (err) {
+        return err;
    }

    return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags);
@@ -452,21 +547,18 @@ static int coroutine_fn
 blkdebug_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
                    QEMUIOVector *qiov, int flags)
 {
-    BDRVBlkdebugState *s = bs->opaque;
-    BlkdebugRule *rule = NULL;
+    int err;

-    QSIMPLEQ_FOREACH(rule, &s->active_rules, active_next) {
-        uint64_t inject_offset = rule->options.inject.offset;
-
-        if (inject_offset == -1 ||
-            (inject_offset >= offset && inject_offset < offset + bytes))
-        {
-            break;
-        }
+    /* Sanity check block layer guarantees */
+    assert(QEMU_IS_ALIGNED(offset, bs->bl.request_alignment));
+    assert(QEMU_IS_ALIGNED(bytes, bs->bl.request_alignment));
+    if (bs->bl.max_transfer) {
+        assert(bytes <= bs->bl.max_transfer);
    }

-    if (rule && rule->options.inject.error) {
-        return inject_error(bs, rule);
+    err = rule_check(bs, offset, bytes);
+    if (err) {
+        return err;
    }

    return bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
@@ -474,22 +566,81 @@ blkdebug_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,

 static int blkdebug_co_flush(BlockDriverState *bs)
 {
-    BDRVBlkdebugState *s = bs->opaque;
-    BlkdebugRule *rule = NULL;
+    int err = rule_check(bs, 0, 0);

-    QSIMPLEQ_FOREACH(rule, &s->active_rules, active_next) {
-        if (rule->options.inject.offset == -1) {
-            break;
-        }
-    }
-
-    if (rule && rule->options.inject.error) {
-        return inject_error(bs, rule);
+    if (err) {
+        return err;
    }

    return bdrv_co_flush(bs->file->bs);
 }

+static int coroutine_fn blkdebug_co_pwrite_zeroes(BlockDriverState *bs,
+                                                  int64_t offset, int count,
+                                                  BdrvRequestFlags flags)
+{
+    uint32_t align = MAX(bs->bl.request_alignment,
+                         bs->bl.pwrite_zeroes_alignment);
+    int err;
+
+    /* Only pass through requests that are larger than requested
+     * preferred alignment (so that we test the fallback to writes on
+     * unaligned portions), and check that the block layer never hands
+     * us anything unaligned that crosses an alignment boundary.  */
+    if (count < align) {
+        assert(QEMU_IS_ALIGNED(offset, align) ||
+               QEMU_IS_ALIGNED(offset + count, align) ||
+               DIV_ROUND_UP(offset, align) ==
+               DIV_ROUND_UP(offset + count, align));
+        return -ENOTSUP;
+    }
+    assert(QEMU_IS_ALIGNED(offset, align));
+    assert(QEMU_IS_ALIGNED(count, align));
+    if (bs->bl.max_pwrite_zeroes) {
+        assert(count <= bs->bl.max_pwrite_zeroes);
+    }
+
+    err = rule_check(bs, offset, count);
+    if (err) {
+        return err;
+    }
+
+    return bdrv_co_pwrite_zeroes(bs->file, offset, count, flags);
+}
+
+static int coroutine_fn blkdebug_co_pdiscard(BlockDriverState *bs,
+                                             int64_t offset, int count)
+{
+    uint32_t align = bs->bl.pdiscard_alignment;
+    int err;
+
+    /* Only pass through requests that are larger than requested
+     * minimum alignment, and ensure that unaligned requests do not
+     * cross optimum discard boundaries. */
+    if (count < bs->bl.request_alignment) {
+        assert(QEMU_IS_ALIGNED(offset, align) ||
+               QEMU_IS_ALIGNED(offset + count, align) ||
+               DIV_ROUND_UP(offset, align) ==
+               DIV_ROUND_UP(offset + count, align));
+        return -ENOTSUP;
+    }
+    assert(QEMU_IS_ALIGNED(offset, bs->bl.request_alignment));
+    assert(QEMU_IS_ALIGNED(count, bs->bl.request_alignment));
+    if (align && count >= align) {
+        assert(QEMU_IS_ALIGNED(offset, align));
+        assert(QEMU_IS_ALIGNED(count, align));
+    }
+    if (bs->bl.max_pdiscard) {
+        assert(count <= bs->bl.max_pdiscard);
+    }
+
+    err = rule_check(bs, offset, count);
+    if (err) {
+        return err;
+    }
+
+    return bdrv_co_pdiscard(bs->file->bs, offset, count);
+}

 static void blkdebug_close(BlockDriverState *bs)
 {
@@ -663,7 +814,7 @@ static int64_t blkdebug_getlength(BlockDriverState *bs)

 static int blkdebug_truncate(BlockDriverState *bs, int64_t offset)
 {
-    return bdrv_truncate(bs->file, offset);
+    return bdrv_truncate(bs->file, offset, NULL);
 }

 static void blkdebug_refresh_filename(BlockDriverState *bs, QDict *options)
@@ -689,16 +840,20 @@ static void blkdebug_refresh_filename(BlockDriverState *bs, QDict *options)
    }

    if (!force_json && bs->file->bs->exact_filename[0]) {
-        snprintf(bs->exact_filename, sizeof(bs->exact_filename),
-                 "blkdebug:%s:%s", s->config_file ?: "",
-                 bs->file->bs->exact_filename);
+        int ret = snprintf(bs->exact_filename, sizeof(bs->exact_filename),
+                           "blkdebug:%s:%s", s->config_file ?: "",
+                           bs->file->bs->exact_filename);
+        if (ret >= sizeof(bs->exact_filename)) {
+            /* An overflow makes the filename unusable, so do not report any */
+            bs->exact_filename[0] = 0;
+        }
    }

    opts = qdict_new();
-    qdict_put_obj(opts, "driver", QOBJECT(qstring_from_str("blkdebug")));
+    qdict_put_str(opts, "driver", "blkdebug");

    QINCREF(bs->file->bs->full_open_options);
-    qdict_put_obj(opts, "image", QOBJECT(bs->file->bs->full_open_options));
+    qdict_put(opts, "image", bs->file->bs->full_open_options);

    for (e = qdict_first(options); e; e = qdict_next(options, e)) {
        if (strcmp(qdict_entry_key(e), "x-image")) {
@@ -717,6 +872,21 @@ static void blkdebug_refresh_limits(BlockDriverState *bs, Error **errp)
    if (s->align) {
        bs->bl.request_alignment = s->align;
    }
+    if (s->max_transfer) {
+        bs->bl.max_transfer = s->max_transfer;
+    }
+    if (s->opt_write_zero) {
+        bs->bl.pwrite_zeroes_alignment = s->opt_write_zero;
+    }
+    if (s->max_write_zero) {
+        bs->bl.max_pwrite_zeroes = s->max_write_zero;
+    }
+    if (s->opt_discard) {
+        bs->bl.pdiscard_alignment = s->opt_discard;
+    }
+    if (s->max_discard) {
+        bs->bl.max_pdiscard = s->max_discard;
+    }
 }

 static int blkdebug_reopen_prepare(BDRVReopenState *reopen_state,
@@ -744,6 +914,8 @@ static BlockDriver bdrv_blkdebug = {
    .bdrv_co_preadv         = blkdebug_co_preadv,
    .bdrv_co_pwritev        = blkdebug_co_pwritev,
    .bdrv_co_flush_to_disk  = blkdebug_co_flush,
+    .bdrv_co_pwrite_zeroes  = blkdebug_co_pwrite_zeroes,
+    .bdrv_co_pdiscard       = blkdebug_co_pdiscard,

    .bdrv_debug_event           = blkdebug_debug_event,
    .bdrv_debug_breakpoint      = blkdebug_debug_breakpoint,
--- a/block/blkreplay.c
+++ b/block/blkreplay.c
@@ -37,9 +37,6 @@ static int blkreplay_open(BlockDriverState *bs, QDict *options, int flags,

    ret = 0;
 fail:
-    if (ret < 0) {
-        bdrv_unref_child(bs, bs->file);
-    }
    return ret;
 }

--- a/block/blkverify.c
+++ b/block/blkverify.c
@@ -67,7 +67,7 @@ static void blkverify_parse_filename(const char *filename, QDict *options,
    if (!strstart(filename, "blkverify:", &filename)) {
        /* There was no prefix; therefore, all options have to be already
           present in the QDict (except for the filename) */
-        qdict_put(options, "x-image", qstring_from_str(filename));
+        qdict_put_str(options, "x-image", filename);
        return;
    }

@@ -84,7 +84,7 @@ static void blkverify_parse_filename(const char *filename, QDict *options,

    /* TODO Allow multi-level nesting and set file.filename here */
    filename = c + 1;
-    qdict_put(options, "x-image", qstring_from_str(filename));
+    qdict_put_str(options, "x-image", filename);
 }

 static QemuOptsList runtime_opts = {
@@ -142,9 +142,6 @@ static int blkverify_open(BlockDriverState *bs, QDict *options, int flags,

    ret = 0;
 fail:
-    if (ret < 0) {
-        bdrv_unref_child(bs, bs->file);
-    }
    qemu_opts_del(opts);
    return ret;
 }
@@ -291,13 +288,12 @@ static void blkverify_refresh_filename(BlockDriverState *bs, QDict *options)
        && s->test_file->bs->full_open_options)
    {
        QDict *opts = qdict_new();
-        qdict_put_obj(opts, "driver", QOBJECT(qstring_from_str("blkverify")));
+        qdict_put_str(opts, "driver", "blkverify");

        QINCREF(bs->file->bs->full_open_options);
-        qdict_put_obj(opts, "raw", QOBJECT(bs->file->bs->full_open_options));
+        qdict_put(opts, "raw", bs->file->bs->full_open_options);
        QINCREF(s->test_file->bs->full_open_options);
-        qdict_put_obj(opts, "test",
-                      QOBJECT(s->test_file->bs->full_open_options));
+        qdict_put(opts, "test", s->test_file->bs->full_open_options);

        bs->full_open_options = opts;
    }
@@ -305,10 +301,14 @@ static void blkverify_refresh_filename(BlockDriverState *bs, QDict *options)
    if (bs->file->bs->exact_filename[0]
        && s->test_file->bs->exact_filename[0])
    {
-        snprintf(bs->exact_filename, sizeof(bs->exact_filename),
-                 "blkverify:%s:%s",
-                 bs->file->bs->exact_filename,
-                 s->test_file->bs->exact_filename);
+        int ret = snprintf(bs->exact_filename, sizeof(bs->exact_filename),
+                           "blkverify:%s:%s",
+                           bs->file->bs->exact_filename,
+                           s->test_file->bs->exact_filename);
+        if (ret >= sizeof(bs->exact_filename)) {
+            /* An overflow makes the filename unusable, so do not report any */
+            bs->exact_filename[0] = 0;
+        }
    }
 }

--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -1746,13 +1746,21 @@ int blk_pwrite_compressed(BlockBackend *blk, int64_t offset, const void *buf,
                   BDRV_REQ_WRITE_COMPRESSED);
 }

-int blk_truncate(BlockBackend *blk, int64_t offset)
+int blk_truncate(BlockBackend *blk, int64_t offset, Error **errp)
 {
    if (!blk_is_available(blk)) {
+        error_setg(errp, "No medium inserted");
        return -ENOMEDIUM;
    }

-    return bdrv_truncate(blk->root, offset);
+    return bdrv_truncate(blk->root, offset, errp);
+}
+
+void blk_legacy_resize_cb(BlockBackend *blk)
+{
+    if (blk->legacy_dev) {
+        xen_blk_resize_cb(blk->dev);
+    }
 }

 static void blk_pdiscard_entry(void *opaque)
--- a/block/commit.c
+++ b/block/commit.c
@@ -89,6 +89,12 @@ static void commit_complete(BlockJob *job, void *opaque)
    int ret = data->ret;
    bool remove_commit_top_bs = false;

+    /* Make sure overlay_bs and top stay around until bdrv_set_backing_hd() */
+    bdrv_ref(top);
+    if (overlay_bs) {
+        bdrv_ref(overlay_bs);
+    }
+
    /* Remove base node parent that still uses BLK_PERM_WRITE/RESIZE before
     * the normal backing chain can be restored. */
    blk_unref(s->base);
@@ -115,6 +121,13 @@ static void commit_complete(BlockJob *job, void *opaque)
    }
    g_free(s->backing_file_str);
    blk_unref(s->top);
+
+    /* If there is more than one reference to the job (e.g. if called from
+     * block_job_finish_sync()), block_job_completed() won't free it and
+     * therefore the blockers on the intermediate nodes remain. This would
+     * cause bdrv_set_backing_hd() to fail. */
+    block_job_remove_all_bdrv(job);
+
    block_job_completed(&s->common, ret);
    g_free(data);

@@ -124,6 +137,9 @@ static void commit_complete(BlockJob *job, void *opaque)
    if (remove_commit_top_bs) {
        bdrv_set_backing_hd(overlay_bs, top, &error_abort);
    }
+
+    bdrv_unref(overlay_bs);
+    bdrv_unref(top);
 }

 static void coroutine_fn commit_run(void *opaque)
@@ -151,7 +167,7 @@ static void coroutine_fn commit_run(void *opaque)
    }

    if (base_len < s->common.len) {
-        ret = blk_truncate(s->base, s->common.len);
+        ret = blk_truncate(s->base, s->common.len, NULL);
        if (ret) {
            goto out;
        }
@@ -335,6 +351,9 @@ void commit_start(const char *job_id, BlockDriverState *bs,
    if (commit_top_bs == NULL) {
        goto fail;
    }
+    if (!filter_node_name) {
+        commit_top_bs->implicit = true;
+    }
    commit_top_bs->total_sectors = top->total_sectors;
    bdrv_set_aio_context(commit_top_bs, bdrv_get_aio_context(top));

@@ -511,8 +530,9 @@ int bdrv_commit(BlockDriverState *bs)
     * grow the backing file image if possible.  If not possible,
     * we must return an error */
    if (length > backing_length) {
-        ret = blk_truncate(backing, length);
+        ret = blk_truncate(backing, length, &local_err);
        if (ret < 0) {
+            error_report_err(local_err);
            goto ro_cleanup;
        }
    }
--- a/block/crypto.c
+++ b/block/crypto.c
@@ -389,7 +389,7 @@ static int block_crypto_truncate(BlockDriverState *bs, int64_t offset)

    offset += payload_offset;

-    return bdrv_truncate(bs->file, offset);
+    return bdrv_truncate(bs->file, offset, NULL);
 }

 static void block_crypto_close(BlockDriverState *bs)
--- a/block/curl.c
+++ b/block/curl.c
@@ -147,6 +147,7 @@ static void curl_multi_do(void *arg);
 static void curl_multi_read(void *arg);

 #ifdef NEED_CURL_TIMER_CALLBACK
+/* Called from curl_multi_do_locked, with s->mutex held.  */
 static int curl_timer_cb(CURLM *multi, long timeout_ms, void *opaque)
 {
    BDRVCURLState *s = opaque;
@@ -163,6 +164,7 @@ static int curl_timer_cb(CURLM *multi, long timeout_ms, void *opaque)
 }
 #endif

+/* Called from curl_multi_do_locked, with s->mutex held.  */
 static int curl_sock_cb(CURL *curl, curl_socket_t fd, int action,
                        void *userp, void *sp)
 {
@@ -212,6 +214,7 @@ static int curl_sock_cb(CURL *curl, curl_socket_t fd, int action,
    return 0;
 }

+/* Called from curl_multi_do_locked, with s->mutex held.  */
 static size_t curl_header_cb(void *ptr, size_t size, size_t nmemb, void *opaque)
 {
    BDRVCURLState *s = opaque;
@@ -226,6 +229,7 @@ static size_t curl_header_cb(void *ptr, size_t size, size_t nmemb, void *opaque)
    return realsize;
 }

+/* Called from curl_multi_do_locked, with s->mutex held.  */
 static size_t curl_read_cb(void *ptr, size_t size, size_t nmemb, void *opaque)
 {
    CURLState *s = ((CURLState*)opaque);
@@ -264,7 +268,9 @@ static size_t curl_read_cb(void *ptr, size_t size, size_t nmemb, void *opaque)
                                  request_length - offset);
            }

+            qemu_mutex_unlock(&s->s->mutex);
            acb->common.cb(acb->common.opaque, 0);
+            qemu_mutex_lock(&s->s->mutex);
            qemu_aio_unref(acb);
            s->acb[i] = NULL;
        }
@@ -275,6 +281,7 @@ read_end:
    return size * nmemb;
 }

+/* Called with s->mutex held.  */
 static int curl_find_buf(BDRVCURLState *s, size_t start, size_t len,
                         CURLAIOCB *acb)
 {
@@ -305,8 +312,6 @@ static int curl_find_buf(BDRVCURLState *s, size_t start, size_t len,
            if (clamped_len < len) {
                qemu_iovec_memset(acb->qiov, clamped_len, 0, len - clamped_len);
            }
-            acb->common.cb(acb->common.opaque, 0);
-
            return FIND_RET_OK;
        }

@@ -449,6 +454,7 @@ static void curl_multi_timeout_do(void *arg)
 #endif
 }

+/* Called with s->mutex held.  */
 static CURLState *curl_init_state(BlockDriverState *bs, BDRVCURLState *s)
 {
    CURLState *state = NULL;
@@ -467,7 +473,9 @@ static CURLState *curl_init_state(BlockDriverState *bs, BDRVCURLState *s)
            break;
        }
        if (!state) {
+            qemu_mutex_unlock(&s->mutex);
            aio_poll(bdrv_get_aio_context(bs), true);
+            qemu_mutex_lock(&s->mutex);
        }
    } while(!state);

@@ -530,8 +538,14 @@ static CURLState *curl_init_state(BlockDriverState *bs, BDRVCURLState *s)
    return state;
 }

+/* Called with s->mutex held.  */
 static void curl_clean_state(CURLState *s)
 {
+    int j;
+    for (j = 0; j < CURL_NUM_ACB; j++) {
+        assert(!s->acb[j]);
+    }
+
    if (s->s->multi)
        curl_multi_remove_handle(s->s->multi, s->curl);

@@ -548,7 +562,7 @@ static void curl_clean_state(CURLState *s)
 static void curl_parse_filename(const char *filename, QDict *options,
                                Error **errp)
 {
-    qdict_put(options, CURL_BLOCK_OPT_URL, qstring_from_str(filename));
+    qdict_put_str(options, CURL_BLOCK_OPT_URL, filename);
 }

 static void curl_detach_aio_context(BlockDriverState *bs)
@@ -556,6 +570,7 @@ static void curl_detach_aio_context(BlockDriverState *bs)
    BDRVCURLState *s = bs->opaque;
    int i;

+    qemu_mutex_lock(&s->mutex);
    for (i = 0; i < CURL_NUM_STATES; i++) {
        if (s->states[i].in_use) {
            curl_clean_state(&s->states[i]);
@@ -571,6 +586,7 @@ static void curl_detach_aio_context(BlockDriverState *bs)
        curl_multi_cleanup(s->multi);
        s->multi = NULL;
    }
+    qemu_mutex_unlock(&s->mutex);

    timer_del(&s->timer);
 }
@@ -668,6 +684,7 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags,
        return -EROFS;
    }

+    qemu_mutex_init(&s->mutex);
    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
    qemu_opts_absorb_qdict(opts, options, &local_err);
    if (local_err) {
@@ -738,7 +755,9 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags,
    DPRINTF("CURL: Opening %s\n", file);
    s->aio_context = bdrv_get_aio_context(bs);
    s->url = g_strdup(file);
+    qemu_mutex_lock(&s->mutex);
    state = curl_init_state(bs, s);
+    qemu_mutex_unlock(&s->mutex);
    if (!state)
        goto out_noclean;

@@ -782,11 +801,12 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags,
    }
    DPRINTF("CURL: Size = %zd\n", s->len);

+    qemu_mutex_lock(&s->mutex);
    curl_clean_state(state);
+    qemu_mutex_unlock(&s->mutex);
    curl_easy_cleanup(state->curl);
    state->curl = NULL;

-    qemu_mutex_init(&s->mutex);
    curl_attach_aio_context(bs, bdrv_get_aio_context(bs));

    qemu_opts_del(opts);
@@ -797,6 +817,7 @@ out:
    curl_easy_cleanup(state->curl);
    state->curl = NULL;
 out_noclean:
+    qemu_mutex_destroy(&s->mutex);
    g_free(s->cookie);
    g_free(s->url);
    qemu_opts_del(opts);
@@ -827,8 +848,8 @@ static void curl_readv_bh_cb(void *p)
    // we can just call the callback and be done.
    switch (curl_find_buf(s, start, acb->nb_sectors * BDRV_SECTOR_SIZE, acb)) {
        case FIND_RET_OK:
-            qemu_aio_unref(acb);
-            // fall through
+            ret = 0;
+            goto out;
        case FIND_RET_WAIT:
            goto out;
        default:
--- a/block/dictzip.c
+++ b/block/dictzip.c
@@ -0,0 +1,586 @@
+/*
+ * DictZip Block driver for dictzip enabled gzip files
+ *
+ * Use the "dictzip" tool from the "dictd" package to create gzip files that
+ * contain the extra DictZip headers.
+ *
+ * dictzip(1) is a compression program which creates compressed files in the
+ * gzip format (see RFC 1952). However, unlike gzip(1), dictzip(1) compresses
+ * the file in pieces and stores an index to the pieces in the gzip header.
+ * This allows random access to the file at the granularity of the compressed
+ * pieces (currently about 64kB) while maintaining good compression ratios
+ * (within 5% of the expected ratio for dictionary data).
+ * dictd(8) uses files stored in this format.
+ *
+ * For details on DictZip see http://dict.org/.
+ *
+ * Copyright (c) 2009 Alexander Graf <agraf@suse.de>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qemu-common.h"
+#include "block/block_int.h"
+#include <zlib.h>
+
+// #define DEBUG
+
+#ifdef DEBUG
+#define dprintf(fmt, ...) do { printf("dzip: " fmt, ## __VA_ARGS__); } while (0)
+#else
+#define dprintf(fmt, ...) do { } while (0)
+#endif
+
+#define SECTOR_SIZE 512
+#define Z_STREAM_COUNT 4
+#define CACHE_COUNT 20
+
+/* magic values */
+
+#define GZ_MAGIC1     0x1f
+#define GZ_MAGIC2     0x8b
+#define DZ_MAGIC1      'R'
+#define DZ_MAGIC2      'A'
+
+#define GZ_FEXTRA     0x04      /* Optional field (random access index)    */
+#define GZ_FNAME      0x08      /* Original name                           */
+#define GZ_COMMENT    0x10      /* Zero-terminated, human-readable comment */
+#define GZ_FHCRC      0x02      /* Header CRC16                            */
+
+/* offsets */
+
+#define GZ_ID            0      /* GZ_MAGIC (16bit)                        */
+#define GZ_FLG           3      /* FLaGs (see above)                       */
+#define GZ_XLEN         10      /* eXtra LENgth (16bit)                    */
+#define GZ_SI           12      /* Subfield ID (16bit)                     */
+#define GZ_VERSION      16      /* Version for subfield format             */
+#define GZ_CHUNKSIZE    18      /* Chunk size (16bit)                      */
+#define GZ_CHUNKCNT     20      /* Number of chunks (16bit)                */
+#define GZ_RNDDATA      22      /* Random access data (16bit)              */
+
+#define GZ_99_CHUNKSIZE 18      /* Chunk size (32bit)                      */
+#define GZ_99_CHUNKCNT  22      /* Number of chunks (32bit)                */
+#define GZ_99_FILESIZE  26      /* Size of unpacked file (64bit)           */
+#define GZ_99_RNDDATA   34      /* Random access data (32bit)              */
+
+struct BDRVDictZipState;
+
+typedef struct DictZipAIOCB {
+    BlockAIOCB common;
+    struct BDRVDictZipState *s;
+    QEMUIOVector *qiov;          /* QIOV of the original request */
+    QEMUIOVector *qiov_gz;       /* QIOV of the gz subrequest */
+    QEMUBH *bh;                  /* BH for cache */
+    z_stream *zStream;           /* stream to use for decoding */
+    int zStream_id;              /* stream id of the above pointer */
+    size_t start;                /* offset into the uncompressed file */
+    size_t len;                  /* uncompressed bytes to read */
+    uint8_t *gzipped;            /* the gzipped data */
+    uint8_t *buf;                /* cached result */
+    size_t gz_len;               /* amount of gzip data */
+    size_t gz_start;             /* uncompressed starting point of gzip data */
+    uint64_t offset;             /* offset for "start" into the uncompressed chunk */
+    int chunks_len;              /* amount of uncompressed data in all gzip data */
+} DictZipAIOCB;
+
+typedef struct dict_cache {
+    size_t start;
+    size_t len;
+    uint8_t *buf;
+} DictCache;
+
+typedef struct BDRVDictZipState {
+    BlockDriverState *hd;
+    z_stream zStream[Z_STREAM_COUNT];
+    DictCache cache[CACHE_COUNT];
+    int cache_index;
+    uint8_t  stream_in_use;
+    uint64_t chunk_len;
+    uint32_t chunk_cnt;
+    uint16_t *chunks;
+    uint32_t *chunks32;
+    uint64_t *offsets;
+    int64_t file_len;
+} BDRVDictZipState;
+
+static int start_zStream(z_stream *zStream)
+{
+    zStream->zalloc    = NULL;
+    zStream->zfree     = NULL;
+    zStream->opaque    = NULL;
+    zStream->next_in   = 0;
+    zStream->avail_in  = 0;
+    zStream->next_out  = NULL;
+    zStream->avail_out = 0;
+
+    return inflateInit2( zStream, -15 );
+}
+
+static QemuOptsList runtime_opts = {
+    .name = "dzip",
+    .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
+    .desc = {
+        {
+            .name = "filename",
+            .type = QEMU_OPT_STRING,
+            .help = "URL to the dictzip file",
+        },
+        { /* end of list */ }
+    },
+};
+
+static int dictzip_open(BlockDriverState *bs, QDict *options, int flags, Error **errp)
+{
+    BDRVDictZipState *s = bs->opaque;
+    const char *err = "Unknown (read error?)";
+    uint8_t magic[2];
+    char buf[100];
+    uint8_t header_flags;
+    uint16_t chunk_len16;
+    uint16_t chunk_cnt16;
+    uint32_t chunk_len32;
+    uint16_t header_ver;
+    uint16_t tmp_short;
+    uint64_t offset;
+    int chunks_len;
+    int headerLength = GZ_XLEN - 1;
+    int rnd_offs;
+    int ret;
+    int i;
+    QemuOpts *opts;
+    Error *local_err = NULL;
+    const char *filename;
+
+    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
+    qemu_opts_absorb_qdict(opts, options, &local_err);
+    if (local_err != NULL) {
+        error_propagate(errp, local_err);
+        ret = -EINVAL;
+        goto fail;
+    }
+
+    filename = qemu_opt_get(opts, "filename");
+
+    if (!strncmp(filename, "dzip://", 7))
+        filename += 7;
+    else if (!strncmp(filename, "dzip:", 5))
+        filename += 5;
+
+    s->hd = bdrv_open(filename, NULL, NULL, flags | BDRV_O_PROTOCOL, errp);
+    if (!s->hd) {
+        ret = -EINVAL;
+        qemu_opts_del(opts);
+        return ret;
+    }
+
+    /* initialize zlib streams */
+    for (i = 0; i < Z_STREAM_COUNT; i++) {
+        if (start_zStream( &s->zStream[i] ) != Z_OK) {
+            err = s->zStream[i].msg;
+            goto fail;
+        }
+    }
+
+    /* gzip header */
+    if (bdrv_pread(s->hd->file, GZ_ID, &magic, sizeof(magic)) != sizeof(magic))
+        goto fail;
+
+    if (!((magic[0] == GZ_MAGIC1) && (magic[1] == GZ_MAGIC2))) {
+        err = "No gzip file";
+        goto fail;
+    }
+
+    /* dzip header */
+    if (bdrv_pread(s->hd->file, GZ_FLG, &header_flags, 1) != 1)
+        goto fail;
+
+    if (!(header_flags & GZ_FEXTRA)) {
+        err = "Not a dictzip file (wrong flags)";
+        goto fail;
+    }
+
+    /* extra length */
+    if (bdrv_pread(s->hd->file, GZ_XLEN, &tmp_short, 2) != 2)
+        goto fail;
+
+    headerLength += le16_to_cpu(tmp_short) + 2;
+
+    /* DictZip magic */
+    if (bdrv_pread(s->hd->file, GZ_SI, &magic, 2) != 2)
+        goto fail;
+
+    if (magic[0] != DZ_MAGIC1 || magic[1] != DZ_MAGIC2) {
+        err = "Not a dictzip file (missing extra magic)";
+        goto fail;
+    }
+
+    /* DictZip version */
+    if (bdrv_pread(s->hd->file, GZ_VERSION, &header_ver, 2) != 2)
+        goto fail;
+
+    header_ver = le16_to_cpu(header_ver);
+
+    switch (header_ver) {
+        case 1: /* Normal DictZip */
+            /* number of chunks */
+            if (bdrv_pread(s->hd->file, GZ_CHUNKSIZE, &chunk_len16, 2) != 2)
+                goto fail;
+
+            s->chunk_len = le16_to_cpu(chunk_len16);
+
+            /* chunk count */
+            if (bdrv_pread(s->hd->file, GZ_CHUNKCNT, &chunk_cnt16, 2) != 2)
+                goto fail;
+
+            s->chunk_cnt = le16_to_cpu(chunk_cnt16);
+            chunks_len = sizeof(short) * s->chunk_cnt;
+            rnd_offs = GZ_RNDDATA;
+            break;
+        case 99: /* Special Alex pigz version */
+            /* number of chunks */
+            if (bdrv_pread(s->hd->file, GZ_99_CHUNKSIZE, &chunk_len32, 4) != 4)
+                goto fail;
+
+            dprintf("chunk len [%#x] = %d\n", GZ_99_CHUNKSIZE, chunk_len32);
+            s->chunk_len = le32_to_cpu(chunk_len32);
+
+            /* chunk count */
+            if (bdrv_pread(s->hd->file, GZ_99_CHUNKCNT, &s->chunk_cnt, 4) != 4)
+                goto fail;
+
+            s->chunk_cnt = le32_to_cpu(s->chunk_cnt);
+
+            dprintf("chunk len | count = %"PRId64" | %d\n", s->chunk_len, s->chunk_cnt);
+
+            /* file size */
+            if (bdrv_pread(s->hd->file, GZ_99_FILESIZE, &s->file_len, 8) != 8)
+                goto fail;
+
+            s->file_len = le64_to_cpu(s->file_len);
+            chunks_len = sizeof(int) * s->chunk_cnt;
+            rnd_offs = GZ_99_RNDDATA;
+            break;
+        default:
+            err = "Invalid DictZip version";
+            goto fail;
+    }
+
+    /* random access data */
+    s->chunks = g_malloc(chunks_len);
+    if (header_ver == 99)
+        s->chunks32 = (uint32_t *)s->chunks;
+
+    if (bdrv_pread(s->hd->file, rnd_offs, s->chunks, chunks_len) != chunks_len)
+        goto fail;
+
+    /* orig filename */
+    if (header_flags & GZ_FNAME) {
+        if (bdrv_pread(s->hd->file, headerLength + 1, buf, sizeof(buf)) != sizeof(buf))
+            goto fail;
+
+        buf[sizeof(buf) - 1] = '\0';
+        headerLength += strlen(buf) + 1;
+
+        if (strlen(buf) == sizeof(buf))
+            goto fail;
+
+        dprintf("filename: %s\n", buf);
+    }
+
+    /* comment field */
+    if (header_flags & GZ_COMMENT) {
+        if (bdrv_pread(s->hd->file, headerLength, buf, sizeof(buf)) != sizeof(buf))
+            goto fail;
+
+        buf[sizeof(buf) - 1] = '\0';
+        headerLength += strlen(buf) + 1;
+
+        if (strlen(buf) == sizeof(buf))
+            goto fail;
+
+        dprintf("comment: %s\n", buf);
+    }
+
+    if (header_flags & GZ_FHCRC)
+        headerLength += 2;
+
+    /* uncompressed file length*/
+    if (!s->file_len) {
+        uint32_t file_len;
+
+        if (bdrv_pread(s->hd->file, bdrv_getlength(s->hd) - 4, &file_len, 4) != 4)
+            goto fail;
+
+        s->file_len = le32_to_cpu(file_len);
+    }
+
+    /* compute offsets */
+    s->offsets = g_malloc(sizeof( *s->offsets ) * s->chunk_cnt);
+
+    for (offset = headerLength + 1, i = 0; i < s->chunk_cnt; i++) {
+        s->offsets[i] = offset;
+        switch (header_ver) {
+        case 1:
+            offset += le16_to_cpu(s->chunks[i]);
+            break;
+        case 99:
+            offset += le32_to_cpu(s->chunks32[i]);
+            break;
+        }
+
+        dprintf("chunk %#"PRIx64" - %#"PRIx64" = offset %#"PRIx64" -> %#"PRIx64"\n", i * s->chunk_len, (i+1) * s->chunk_len, s->offsets[i], offset);
+    }
+    qemu_opts_del(opts);
+
+    return 0;
+
+fail:
+    fprintf(stderr, "DictZip: Error opening file: %s\n", err);
+    bdrv_unref(s->hd);
+    if (s->chunks)
+        g_free(s->chunks);
+    qemu_opts_del(opts);
+    return -EINVAL;
+}
+
+/* This callback gets invoked when we have the result in cache already */
+static void dictzip_cache_cb(void *opaque)
+{
+    DictZipAIOCB *acb = (DictZipAIOCB *)opaque;
+
+    qemu_iovec_from_buf(acb->qiov, 0, acb->buf, acb->len);
+    acb->common.cb(acb->common.opaque, 0);
+    qemu_bh_delete(acb->bh);
+    qemu_aio_unref(acb);
+}
+
+/* This callback gets invoked by the underlying block reader when we have
+ * all compressed data. We uncompress in here. */
+static void dictzip_read_cb(void *opaque, int ret)
+{
+    DictZipAIOCB *acb = (DictZipAIOCB *)opaque;
+    struct BDRVDictZipState *s = acb->s;
+    uint8_t *buf;
+    DictCache *cache;
+    int r, i;
+
+    buf = g_malloc(acb->chunks_len);
+
+    /* try to find zlib stream for decoding */
+    do {
+        for (i = 0; i < Z_STREAM_COUNT; i++) {
+            if (!(s->stream_in_use & (1 << i))) {
+                s->stream_in_use |= (1 << i);
+                acb->zStream_id = i;
+                acb->zStream = &s->zStream[i];
+                break;
+            }
+        }
+    } while(!acb->zStream);
+
+    /* sure, we could handle more streams, but this callback should be single
+       threaded and when it's not, we really want to know! */
+    assert(i == 0);
+
+    /* uncompress the chunk */
+    acb->zStream->next_in   = acb->gzipped;
+    acb->zStream->avail_in  = acb->gz_len;
+    acb->zStream->next_out  = buf;
+    acb->zStream->avail_out = acb->chunks_len;
+
+    r = inflate( acb->zStream,  Z_PARTIAL_FLUSH );
+    if ( (r != Z_OK) && (r != Z_STREAM_END) )
+        fprintf(stderr, "Error inflating: [%d] %s\n", r, acb->zStream->msg);
+
+    if ( r == Z_STREAM_END )
+        inflateReset(acb->zStream);
+
+    dprintf("inflating [%d] left: %d | %d bytes\n", r, acb->zStream->avail_in, acb->zStream->avail_out);
+    s->stream_in_use &= ~(1 << acb->zStream_id);
+
+    /* nofity the caller */
+    qemu_iovec_from_buf(acb->qiov, 0, buf + acb->offset, acb->len);
+    acb->common.cb(acb->common.opaque, 0);
+
+    /* fill the cache */
+    cache = &s->cache[s->cache_index];
+    s->cache_index++;
+    if (s->cache_index == CACHE_COUNT)
+        s->cache_index = 0;
+
+    cache->len = 0;
+    if (cache->buf)
+        g_free(cache->buf);
+    cache->start = acb->gz_start;
+    cache->buf = buf;
+    cache->len = acb->chunks_len;
+
+    /* free occupied ressources */
+    g_free(acb->qiov_gz);
+    qemu_aio_unref(acb);
+}
+
+static const AIOCBInfo dictzip_aiocb_info = {
+    .aiocb_size         = sizeof(DictZipAIOCB),
+};
+
+/* This is where we get a request from a caller to read something */
+static BlockAIOCB *dictzip_aio_readv(BlockDriverState *bs,
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+        BlockCompletionFunc *cb, void *opaque)
+{
+    BDRVDictZipState *s = bs->opaque;
+    DictZipAIOCB *acb;
+    QEMUIOVector *qiov_gz;
+    struct iovec *iov;
+    uint8_t *buf;
+    size_t  start = sector_num * SECTOR_SIZE;
+    size_t  len = nb_sectors * SECTOR_SIZE;
+    size_t  end = start + len;
+    size_t  gz_start;
+    size_t  gz_len;
+    int64_t gz_sector_num;
+    int     gz_nb_sectors;
+    int     first_chunk, last_chunk;
+    int     first_offset;
+    int     i;
+
+    acb = qemu_aio_get(&dictzip_aiocb_info, bs, cb, opaque);
+    if (!acb)
+        return NULL;
+
+    /* Search Cache */
+    for (i = 0; i < CACHE_COUNT; i++) {
+        if (!s->cache[i].len)
+            continue;
+
+        if ((start >= s->cache[i].start) &&
+            (end <= (s->cache[i].start + s->cache[i].len))) {
+            acb->buf = s->cache[i].buf + (start - s->cache[i].start);
+            acb->len = len;
+            acb->qiov = qiov;
+            acb->bh = qemu_bh_new(dictzip_cache_cb, acb);
+            qemu_bh_schedule(acb->bh);
+
+            return &acb->common;
+        }
+    }
+
+    /* No cache, so let's decode */
+    /* We need to read these chunks */
+    first_chunk  = start / s->chunk_len;
+    first_offset = start - first_chunk * s->chunk_len;
+    last_chunk   = end / s->chunk_len;
+
+    gz_start = s->offsets[first_chunk];
+    gz_len = 0;
+    for (i = first_chunk; i <= last_chunk; i++) {
+        if (s->chunks32)
+            gz_len += le32_to_cpu(s->chunks32[i]);
+        else
+            gz_len += le16_to_cpu(s->chunks[i]);
+    }
+
+    gz_sector_num = gz_start / SECTOR_SIZE;
+    gz_nb_sectors = (gz_len / SECTOR_SIZE);
+
+    /* account for tail and heads */
+    while ((gz_start + gz_len) > ((gz_sector_num + gz_nb_sectors) * SECTOR_SIZE))
+        gz_nb_sectors++;
+
+    /* Allocate qiov, iov and buf in one chunk so we only need to free qiov */
+    qiov_gz = g_malloc0(sizeof(QEMUIOVector) + sizeof(struct iovec) +
+                           (gz_nb_sectors * SECTOR_SIZE));
+    iov = (struct iovec *)(((char *)qiov_gz) + sizeof(QEMUIOVector));
+    buf = ((uint8_t *)iov) + sizeof(struct iovec *);
+
+    /* Kick off the read by the backing file, so we can start decompressing */
+    iov->iov_base = (void *)buf;
+    iov->iov_len = gz_nb_sectors * 512;
+    qemu_iovec_init_external(qiov_gz, iov, 1);
+
+    dprintf("read %zd - %zd => %zd - %zd\n", start, end, gz_start, gz_start + gz_len);
+
+    acb->s = s;
+    acb->qiov = qiov;
+    acb->qiov_gz = qiov_gz;
+    acb->start = start;
+    acb->len = len;
+    acb->gzipped = buf + (gz_start % SECTOR_SIZE);
+    acb->gz_len = gz_len;
+    acb->gz_start = first_chunk * s->chunk_len;
+    acb->offset = first_offset;
+    acb->chunks_len = (last_chunk - first_chunk + 1) * s->chunk_len;
+
+    return bdrv_aio_readv(s->hd->file, gz_sector_num, qiov_gz, gz_nb_sectors,
+                          dictzip_read_cb, acb);
+}
+
+static void dictzip_close(BlockDriverState *bs)
+{
+    BDRVDictZipState *s = bs->opaque;
+    int i;
+
+    for (i = 0; i < CACHE_COUNT; i++) {
+        if (!s->cache[i].len)
+            continue;
+
+        g_free(s->cache[i].buf);
+    }
+
+    for (i = 0; i < Z_STREAM_COUNT; i++) {
+        inflateEnd(&s->zStream[i]);
+    }
+
+    if (s->chunks)
+        g_free(s->chunks);
+
+    if (s->offsets)
+        g_free(s->offsets);
+
+    dprintf("Close\n");
+}
+
+static int64_t dictzip_getlength(BlockDriverState *bs)
+{
+    BDRVDictZipState *s = bs->opaque;
+    dprintf("getlength -> %ld\n", s->file_len);
+    return s->file_len;
+}
+
+static BlockDriver bdrv_dictzip = {
+    .format_name     = "dzip",
+    .protocol_name   = "dzip",
+
+    .instance_size   = sizeof(BDRVDictZipState),
+    .bdrv_file_open  = dictzip_open,
+    .bdrv_close      = dictzip_close,
+    .bdrv_getlength  = dictzip_getlength,
+
+    .bdrv_aio_readv  = dictzip_aio_readv,
+};
+
+static void dictzip_block_init(void)
+{
+    bdrv_register(&bdrv_dictzip);
+}
+
+block_init(dictzip_block_init);
--- a/block/dirty-bitmap.c
+++ b/block/dirty-bitmap.c
@@ -345,7 +345,7 @@ BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs)
    QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
        BlockDirtyInfo *info = g_new0(BlockDirtyInfo, 1);
        BlockDirtyInfoList *entry = g_new0(BlockDirtyInfoList, 1);
-        info->count = bdrv_get_dirty_count(bm);
+        info->count = bdrv_get_dirty_count(bm) << BDRV_SECTOR_BITS;
        info->granularity = bdrv_dirty_bitmap_granularity(bm);
        info->has_name = !!bm->name;
        info->name = g_strdup(bm->name);
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -377,7 +377,7 @@ static void raw_parse_filename(const char *filename, QDict *options,
     * function call can be ignored. */
    strstart(filename, "file:", &filename);

-    qdict_put_obj(options, "filename", QOBJECT(qstring_from_str(filename)));
+    qdict_put_str(options, "filename", filename);
 }

 static QemuOptsList raw_runtime_opts = {
@@ -2150,7 +2150,7 @@ static void hdev_parse_filename(const char *filename, QDict *options,
    /* The prefix is optional, just as for "file". */
    strstart(filename, "host_device:", &filename);

-    qdict_put_obj(options, "filename", QOBJECT(qstring_from_str(filename)));
+    qdict_put_str(options, "filename", filename);
 }

 static bool hdev_is_sg(BlockDriverState *bs)
@@ -2239,7 +2239,7 @@ static int hdev_open(BlockDriverState *bs, QDict *options, int flags,
            goto hdev_open_Mac_error;
        }

-        qdict_put(options, "filename", qstring_from_str(bsd_path));
+        qdict_put_str(options, "filename", bsd_path);

 hdev_open_Mac_error:
        g_free(mediaType);
@@ -2449,7 +2449,7 @@ static void cdrom_parse_filename(const char *filename, QDict *options,
    /* The prefix is optional, just as for "file". */
    strstart(filename, "host_cdrom:", &filename);

-    qdict_put_obj(options, "filename", QOBJECT(qstring_from_str(filename)));
+    qdict_put_str(options, "filename", filename);
 }
 #endif

--- a/block/file-win32.c
+++ b/block/file-win32.c
@@ -282,7 +282,7 @@ static void raw_parse_filename(const char *filename, QDict *options,
     * function call can be ignored. */
    strstart(filename, "file:", &filename);

-    qdict_put_obj(options, "filename", QOBJECT(qstring_from_str(filename)));
+    qdict_put_str(options, "filename", filename);
 }

 static QemuOptsList raw_runtime_opts = {
@@ -669,7 +669,7 @@ static void hdev_parse_filename(const char *filename, QDict *options,
    /* The prefix is optional, just as for "file". */
    strstart(filename, "host_device:", &filename);

-    qdict_put_obj(options, "filename", QOBJECT(qstring_from_str(filename)));
+    qdict_put_str(options, "filename", filename);
 }

 static int hdev_open(BlockDriverState *bs, QDict *options, int flags,
--- a/block/io.c
+++ b/block/io.c
@@ -1757,6 +1757,7 @@ static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
    int64_t n;
    int64_t ret, ret2;

+    *file = NULL;
    total_sectors = bdrv_nb_sectors(bs);
    if (total_sectors < 0) {
        return total_sectors;
@@ -1777,11 +1778,11 @@ static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
        ret = BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED;
        if (bs->drv->protocol_name) {
            ret |= BDRV_BLOCK_OFFSET_VALID | (sector_num * BDRV_SECTOR_SIZE);
+            *file = bs;
        }
        return ret;
    }

-    *file = NULL;
    bdrv_inc_in_flight(bs);
    ret = bs->drv->bdrv_co_get_block_status(bs, sector_num, nb_sectors, pnum,
                                            file);
@@ -1791,9 +1792,9 @@ static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
    }

    if (ret & BDRV_BLOCK_RAW) {
-        assert(ret & BDRV_BLOCK_OFFSET_VALID);
-        ret = bdrv_get_block_status(*file, ret >> BDRV_SECTOR_BITS,
-                                    *pnum, pnum, file);
+        assert(ret & BDRV_BLOCK_OFFSET_VALID && *file);
+        ret = bdrv_co_get_block_status(*file, ret >> BDRV_SECTOR_BITS,
+                                       *pnum, pnum, file);
        goto out;
    }

--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -684,7 +684,7 @@ static int64_t coroutine_fn iscsi_co_get_block_status(BlockDriverState *bs,
    struct scsi_get_lba_status *lbas = NULL;
    struct scsi_lba_status_descriptor *lbasd = NULL;
    struct IscsiTask iTask;
-    int64_t ret;
+    int64_t ret, max_sector;

    iscsi_co_init_iscsitask(iscsilun, &iTask);

@@ -703,6 +703,7 @@ static int64_t coroutine_fn iscsi_co_get_block_status(BlockDriverState *bs,
        goto out;
    }

+    max_sector = iscsilun->num_blocks - sector_num;
    qemu_mutex_lock(&iscsilun->mutex);
 retry:
    if (iscsi_get_lba_status_task(iscsilun->iscsi, iscsilun->lun,
@@ -750,7 +751,7 @@ retry:
        goto out_unlock;
    }

-    *pnum = sector_lun2qemu(lbasd->num_blocks, iscsilun);
+    *pnum = MIN(sector_lun2qemu(lbasd->num_blocks, iscsilun), max_sector);

    if (lbasd->provisioning == SCSI_PROVISIONING_TYPE_DEALLOCATED ||
        lbasd->provisioning == SCSI_PROVISIONING_TYPE_ANCHORED) {
@@ -965,8 +966,7 @@ iscsi_aio_ioctl_cb(struct iscsi_context *iscsi, int status,
        acb->ioh->driver_status |= SG_ERR_DRIVER_SENSE;

        acb->ioh->sb_len_wr = acb->task->datain.size - 2;
-        ss = (acb->ioh->mx_sb_len >= acb->ioh->sb_len_wr) ?
-             acb->ioh->mx_sb_len : acb->ioh->sb_len_wr;
+        ss = MIN(acb->ioh->mx_sb_len, acb->ioh->sb_len_wr);
        memcpy(acb->ioh->sbp, &acb->task->datain.data[2], ss);
    }

--- a/block/mirror.c
+++ b/block/mirror.c
@@ -514,7 +514,12 @@ static void mirror_exit(BlockJob *job, void *opaque)

    /* Remove target parent that still uses BLK_PERM_WRITE/RESIZE before
     * inserting target_bs at s->to_replace, where we might not be able to get
-     * these permissions. */
+     * these permissions.
+     *
+     * Note that blk_unref() alone doesn't necessarily drop permissions because
+     * we might be running nested inside mirror_drain(), which takes an extra
+     * reference, so use an explicit blk_set_perm() first. */
+    blk_set_perm(s->target, 0, BLK_PERM_ALL, &error_abort);
    blk_unref(s->target);
    s->target = NULL;

@@ -724,7 +729,7 @@ static void coroutine_fn mirror_run(void *opaque)
        }

        if (s->bdev_length > base_length) {
-            ret = blk_truncate(s->target, s->bdev_length);
+            ret = blk_truncate(s->target, s->bdev_length, NULL);
            if (ret < 0) {
                goto immediate_exit;
            }
@@ -1147,6 +1152,9 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs,
    if (mirror_top_bs == NULL) {
        return;
    }
+    if (!filter_node_name) {
+        mirror_top_bs->implicit = true;
+    }
    mirror_top_bs->total_sectors = bs->total_sectors;
    bdrv_set_aio_context(mirror_top_bs, bdrv_get_aio_context(bs));

--- a/block/nbd-client.c
+++ b/block/nbd-client.c
@@ -352,14 +352,14 @@ int nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset, int count)
 void nbd_client_detach_aio_context(BlockDriverState *bs)
 {
    NBDClientSession *client = nbd_get_client_session(bs);
-    qio_channel_detach_aio_context(QIO_CHANNEL(client->sioc));
+    qio_channel_detach_aio_context(QIO_CHANNEL(client->ioc));
 }

 void nbd_client_attach_aio_context(BlockDriverState *bs,
                                   AioContext *new_context)
 {
    NBDClientSession *client = nbd_get_client_session(bs);
-    qio_channel_attach_aio_context(QIO_CHANNEL(client->sioc), new_context);
+    qio_channel_attach_aio_context(QIO_CHANNEL(client->ioc), new_context);
    aio_co_schedule(new_context, client->read_reply_co);
 }

--- a/block/nbd.c
+++ b/block/nbd.c
@@ -65,11 +65,11 @@ static int nbd_parse_uri(const char *filename, QDict *options)
    }

    /* transport */
-    if (!strcmp(uri->scheme, "nbd")) {
+    if (!g_strcmp0(uri->scheme, "nbd")) {
        is_unix = false;
-    } else if (!strcmp(uri->scheme, "nbd+tcp")) {
+    } else if (!g_strcmp0(uri->scheme, "nbd+tcp")) {
        is_unix = false;
-    } else if (!strcmp(uri->scheme, "nbd+unix")) {
+    } else if (!g_strcmp0(uri->scheme, "nbd+unix")) {
        is_unix = true;
    } else {
        ret = -EINVAL;
@@ -79,7 +79,7 @@ static int nbd_parse_uri(const char *filename, QDict *options)
    p = uri->path ? uri->path : "/";
    p += strspn(p, "/");
    if (p[0]) {
-        qdict_put(options, "export", qstring_from_str(p));
+        qdict_put_str(options, "export", p);
    }

    qp = query_params_parse(uri->query);
@@ -94,9 +94,8 @@ static int nbd_parse_uri(const char *filename, QDict *options)
            ret = -EINVAL;
            goto out;
        }
-        qdict_put(options, "server.type", qstring_from_str("unix"));
-        qdict_put(options, "server.path",
-                  qstring_from_str(qp->p[0].value));
+        qdict_put_str(options, "server.type", "unix");
+        qdict_put_str(options, "server.path", qp->p[0].value);
    } else {
        QString *host;
        char *port_str;
@@ -115,11 +114,11 @@ static int nbd_parse_uri(const char *filename, QDict *options)
            host = qstring_from_str(uri->server);
        }

-        qdict_put(options, "server.type", qstring_from_str("inet"));
+        qdict_put_str(options, "server.type", "inet");
        qdict_put(options, "server.host", host);

        port_str = g_strdup_printf("%d", uri->port ?: NBD_DEFAULT_PORT);
-        qdict_put(options, "server.port", qstring_from_str(port_str));
+        qdict_put_str(options, "server.port", port_str);
        g_free(port_str);
    }

@@ -181,7 +180,7 @@ static void nbd_parse_filename(const char *filename, QDict *options,
        export_name[0] = 0; /* truncate 'file' */
        export_name += strlen(EN_OPTSTR);

-        qdict_put(options, "export", qstring_from_str(export_name));
+        qdict_put_str(options, "export", export_name);
    }

    /* extract the host_spec - fail if it's not nbd:... */
@@ -196,8 +195,8 @@ static void nbd_parse_filename(const char *filename, QDict *options,

    /* are we a UNIX or TCP socket? */
    if (strstart(host_spec, "unix:", &unixpath)) {
-        qdict_put(options, "server.type", qstring_from_str("unix"));
-        qdict_put(options, "server.path", qstring_from_str(unixpath));
+        qdict_put_str(options, "server.type", "unix");
+        qdict_put_str(options, "server.path", unixpath);
    } else {
        InetSocketAddress *addr = NULL;

@@ -206,9 +205,9 @@ static void nbd_parse_filename(const char *filename, QDict *options,
            goto out;
        }

-        qdict_put(options, "server.type", qstring_from_str("inet"));
-        qdict_put(options, "server.host", qstring_from_str(addr->host));
-        qdict_put(options, "server.port", qstring_from_str(addr->port));
+        qdict_put_str(options, "server.type", "inet");
+        qdict_put_str(options, "server.host", addr->host);
+        qdict_put_str(options, "server.port", addr->port);
        qapi_free_InetSocketAddress(addr);
    }

@@ -247,13 +246,13 @@ static bool nbd_process_legacy_socket_options(QDict *output_options,
            return false;
        }

-        qdict_put(output_options, "server.type", qstring_from_str("unix"));
-        qdict_put(output_options, "server.path", qstring_from_str(path));
+        qdict_put_str(output_options, "server.type", "unix");
+        qdict_put_str(output_options, "server.path", path);
    } else if (host) {
-        qdict_put(output_options, "server.type", qstring_from_str("inet"));
-        qdict_put(output_options, "server.host", qstring_from_str(host));
-        qdict_put(output_options, "server.port",
-                  qstring_from_str(port ?: stringify(NBD_DEFAULT_PORT)));
+        qdict_put_str(output_options, "server.type", "inet");
+        qdict_put_str(output_options, "server.host", host);
+        qdict_put_str(output_options, "server.port",
+                      port ?: stringify(NBD_DEFAULT_PORT));
    }

    return true;
@@ -528,7 +527,7 @@ static void nbd_refresh_filename(BlockDriverState *bs, QDict *options)
        path = s->saddr->u.q_unix.path;
    } /* else can't represent as pseudo-filename */

-    qdict_put(opts, "driver", qstring_from_str("nbd"));
+    qdict_put_str(opts, "driver", "nbd");

    if (path && s->export) {
        snprintf(bs->exact_filename, sizeof(bs->exact_filename),
@@ -551,10 +550,10 @@ static void nbd_refresh_filename(BlockDriverState *bs, QDict *options)
    qdict_put_obj(opts, "server", saddr_qdict);

    if (s->export) {
-        qdict_put(opts, "export", qstring_from_str(s->export));
+        qdict_put_str(opts, "export", s->export);
    }
    if (s->tlscredsid) {
-        qdict_put(opts, "tls-creds", qstring_from_str(s->tlscredsid));
+        qdict_put_str(opts, "tls-creds", s->tlscredsid);
    }

    qdict_flatten(opts);
--- a/block/nfs.c
+++ b/block/nfs.c
@@ -83,7 +83,7 @@ static int nfs_parse_uri(const char *filename, QDict *options, Error **errp)
        error_setg(errp, "Invalid URI specified");
        goto out;
    }
-    if (strcmp(uri->scheme, "nfs") != 0) {
+    if (g_strcmp0(uri->scheme, "nfs") != 0) {
        error_setg(errp, "URI scheme must be 'nfs'");
        goto out;
    }
@@ -104,9 +104,9 @@ static int nfs_parse_uri(const char *filename, QDict *options, Error **errp)
        goto out;
    }

-    qdict_put(options, "server.host", qstring_from_str(uri->server));
-    qdict_put(options, "server.type", qstring_from_str("inet"));
-    qdict_put(options, "path", qstring_from_str(uri->path));
+    qdict_put_str(options, "server.host", uri->server);
+    qdict_put_str(options, "server.type", "inet");
+    qdict_put_str(options, "path", uri->path);

    for (i = 0; i < qp->n; i++) {
        unsigned long long val;
@@ -121,23 +121,17 @@ static int nfs_parse_uri(const char *filename, QDict *options, Error **errp)
            goto out;
        }
        if (!strcmp(qp->p[i].name, "uid")) {
-            qdict_put(options, "user",
-                      qstring_from_str(qp->p[i].value));
+            qdict_put_str(options, "user", qp->p[i].value);
        } else if (!strcmp(qp->p[i].name, "gid")) {
-            qdict_put(options, "group",
-                      qstring_from_str(qp->p[i].value));
+            qdict_put_str(options, "group", qp->p[i].value);
        } else if (!strcmp(qp->p[i].name, "tcp-syncnt")) {
-            qdict_put(options, "tcp-syn-count",
-                      qstring_from_str(qp->p[i].value));
+            qdict_put_str(options, "tcp-syn-count", qp->p[i].value);
        } else if (!strcmp(qp->p[i].name, "readahead")) {
-            qdict_put(options, "readahead-size",
-                      qstring_from_str(qp->p[i].value));
+            qdict_put_str(options, "readahead-size", qp->p[i].value);
        } else if (!strcmp(qp->p[i].name, "pagecache")) {
-            qdict_put(options, "page-cache-size",
-                      qstring_from_str(qp->p[i].value));
+            qdict_put_str(options, "page-cache-size", qp->p[i].value);
        } else if (!strcmp(qp->p[i].name, "debug")) {
-            qdict_put(options, "debug",
-                      qstring_from_str(qp->p[i].value));
+            qdict_put_str(options, "debug", qp->p[i].value);
        } else {
            error_setg(errp, "Unknown NFS parameter name: %s",
                       qp->p[i].name);
@@ -440,19 +434,23 @@ static void nfs_client_close(NFSClient *client)
    if (client->context) {
        if (client->fh) {
            nfs_close(client->context, client->fh);
+            client->fh = NULL;
        }
        aio_set_fd_handler(client->aio_context, nfs_get_fd(client->context),
                           false, NULL, NULL, NULL, NULL);
        nfs_destroy_context(client->context);
+        client->context = NULL;
    }
-    memset(client, 0, sizeof(NFSClient));
+    g_free(client->path);
+    qemu_mutex_destroy(&client->mutex);
+    qapi_free_NFSServer(client->server);
+    client->server = NULL;
 }

 static void nfs_file_close(BlockDriverState *bs)
 {
    NFSClient *client = bs->opaque;
    nfs_client_close(client);
-    qemu_mutex_destroy(&client->mutex);
 }

 static NFSServer *nfs_config(QDict *options, Error **errp)
@@ -505,6 +503,7 @@ static int64_t nfs_client_open(NFSClient *client, QDict *options,
    struct stat st;
    char *file = NULL, *strp = NULL;

+    qemu_mutex_init(&client->mutex);
    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
    qemu_opts_absorb_qdict(opts, options, &local_err);
    if (local_err) {
@@ -667,7 +666,7 @@ static int nfs_file_open(BlockDriverState *bs, QDict *options, int flags,
    if (ret < 0) {
        return ret;
    }
-    qemu_mutex_init(&client->mutex);
+
    bs->total_sectors = ret;
    ret = 0;
    return ret;
@@ -811,7 +810,7 @@ static void nfs_refresh_filename(BlockDriverState *bs, QDict *options)
    QObject *server_qdict;
    Visitor *ov;

-    qdict_put(opts, "driver", qstring_from_str("nfs"));
+    qdict_put_str(opts, "driver", "nfs");

    if (client->uid && !client->gid) {
        snprintf(bs->exact_filename, sizeof(bs->exact_filename),
@@ -834,28 +833,25 @@ static void nfs_refresh_filename(BlockDriverState *bs, QDict *options)
    visit_type_NFSServer(ov, NULL, &client->server, &error_abort);
    visit_complete(ov, &server_qdict);
    qdict_put_obj(opts, "server", server_qdict);
-    qdict_put(opts, "path", qstring_from_str(client->path));
+    qdict_put_str(opts, "path", client->path);

    if (client->uid) {
-        qdict_put(opts, "user", qint_from_int(client->uid));
+        qdict_put_int(opts, "user", client->uid);
    }
    if (client->gid) {
-        qdict_put(opts, "group", qint_from_int(client->gid));
+        qdict_put_int(opts, "group", client->gid);
    }
    if (client->tcp_syncnt) {
-        qdict_put(opts, "tcp-syn-cnt",
-                  qint_from_int(client->tcp_syncnt));
+        qdict_put_int(opts, "tcp-syn-cnt", client->tcp_syncnt);
    }
    if (client->readahead) {
-        qdict_put(opts, "readahead-size",
-                  qint_from_int(client->readahead));
+        qdict_put_int(opts, "readahead-size", client->readahead);
    }
    if (client->pagecache) {
-        qdict_put(opts, "page-cache-size",
-                  qint_from_int(client->pagecache));
+        qdict_put_int(opts, "page-cache-size", client->pagecache);
    }
    if (client->debug) {
-        qdict_put(opts, "debug", qint_from_int(client->debug));
+        qdict_put_int(opts, "debug", client->debug);
    }

    visit_free(ov);
--- a/block/null.c
+++ b/block/null.c
@@ -232,7 +232,7 @@ static void null_refresh_filename(BlockDriverState *bs, QDict *opts)
                 bs->drv->format_name);
    }

-    qdict_put(opts, "driver", qstring_from_str(bs->drv->format_name));
+    qdict_put_str(opts, "driver", bs->drv->format_name);
    bs->full_open_options = opts;
 }

--- a/block/parallels.c
+++ b/block/parallels.c
@@ -223,7 +223,8 @@ static int64_t allocate_clusters(BlockDriverState *bs, int64_t sector_num,
                                     space << BDRV_SECTOR_BITS, 0);
        } else {
            ret = bdrv_truncate(bs->file,
-                                (s->data_end + space) << BDRV_SECTOR_BITS);
+                                (s->data_end + space) << BDRV_SECTOR_BITS,
+                                NULL);
        }
        if (ret < 0) {
            return ret;
@@ -456,8 +457,10 @@ static int parallels_check(BlockDriverState *bs, BdrvCheckResult *res,
                size - res->image_end_offset);
        res->leaks += count;
        if (fix & BDRV_FIX_LEAKS) {
-            ret = bdrv_truncate(bs->file, res->image_end_offset);
+            Error *local_err = NULL;
+            ret = bdrv_truncate(bs->file, res->image_end_offset, &local_err);
            if (ret < 0) {
+                error_report_err(local_err);
                res->check_errors++;
                return ret;
            }
@@ -504,7 +507,7 @@ static int parallels_create(const char *filename, QemuOpts *opts, Error **errp)

    blk_set_allow_write_beyond_eof(file, true);

-    ret = blk_truncate(file, 0);
+    ret = blk_truncate(file, 0, errp);
    if (ret < 0) {
        goto exit;
    }
@@ -696,7 +699,7 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
    }

    if (!(flags & BDRV_O_RESIZE) || !bdrv_has_zero_init(bs->file->bs) ||
-            bdrv_truncate(bs->file, bdrv_getlength(bs->file->bs)) != 0) {
+            bdrv_truncate(bs->file, bdrv_getlength(bs->file->bs), NULL) != 0) {
        s->prealloc_mode = PRL_PREALLOC_MODE_FALLOCATE;
    }

@@ -739,7 +742,7 @@ static void parallels_close(BlockDriverState *bs)
    }

    if (bs->open_flags & BDRV_O_RDWR) {
-        bdrv_truncate(bs->file, s->data_end << BDRV_SECTOR_BITS);
+        bdrv_truncate(bs->file, s->data_end << BDRV_SECTOR_BITS, NULL);
    }

    g_free(s->bat_dirty_bmap);
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -64,7 +64,6 @@ BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk,
        info->backing_file = g_strdup(bs->backing_file);
    }

-    info->backing_file_depth = bdrv_get_backing_file_depth(bs);
    info->detect_zeroes = bs->detect_zeroes;

    if (blk && blk_get_public(blk)->throttle_state) {
@@ -125,6 +124,7 @@ BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk,

    bs0 = bs;
    p_image_info = &info->image;
+    info->backing_file_depth = 0;
    while (1) {
        Error *local_err = NULL;
        bdrv_query_image_info(bs0, p_image_info, &local_err);
@@ -133,13 +133,21 @@ BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk,
            qapi_free_BlockDeviceInfo(info);
            return NULL;
        }
+
        if (bs0->drv && bs0->backing) {
+            info->backing_file_depth++;
            bs0 = bs0->backing->bs;
            (*p_image_info)->has_backing_image = true;
            p_image_info = &((*p_image_info)->backing_image);
        } else {
            break;
        }
+
+        /* Skip automatically inserted nodes that the user isn't aware of for
+         * query-block (blk != NULL), but not for query-named-block-nodes */
+        while (blk && bs0 && bs0->drv && bs0->implicit) {
+            bs0 = backing_bs(bs0);
+        }
    }

    return info;
@@ -322,6 +330,12 @@ static void bdrv_query_info(BlockBackend *blk, BlockInfo **p_info,
 {
    BlockInfo *info = g_malloc0(sizeof(*info));
    BlockDriverState *bs = blk_bs(blk);
+
+    /* Skip automatically inserted nodes that the user isn't aware of */
+    while (bs && bs->drv && bs->implicit) {
+        bs = backing_bs(bs);
+    }
+
    info->device = g_strdup(blk_name(blk));
    info->type = g_strdup("unknown");
    info->locked = blk_dev_is_medium_locked(blk);
@@ -424,8 +438,8 @@ static void bdrv_query_blk_stats(BlockDeviceStats *ds, BlockBackend *blk)
    }
 }

-static BlockStats *bdrv_query_bds_stats(const BlockDriverState *bs,
-                                 bool query_backing)
+static BlockStats *bdrv_query_bds_stats(BlockDriverState *bs,
+                                        bool blk_level)
 {
    BlockStats *s = NULL;

@@ -436,6 +450,14 @@ static BlockStats *bdrv_query_bds_stats(const BlockDriverState *bs,
        return s;
    }

+    /* Skip automatically inserted nodes that the user isn't aware of in
+     * a BlockBackend-level command. Stay at the exact node for a node-level
+     * command. */
+    while (blk_level && bs->drv && bs->implicit) {
+        bs = backing_bs(bs);
+        assert(bs);
+    }
+
    if (bdrv_get_node_name(bs)[0]) {
        s->has_node_name = true;
        s->node_name = g_strdup(bdrv_get_node_name(bs));
@@ -445,12 +467,12 @@ static BlockStats *bdrv_query_bds_stats(const BlockDriverState *bs,

    if (bs->file) {
        s->has_parent = true;
-        s->parent = bdrv_query_bds_stats(bs->file->bs, query_backing);
+        s->parent = bdrv_query_bds_stats(bs->file->bs, blk_level);
    }

-    if (query_backing && bs->backing) {
+    if (blk_level && bs->backing) {
        s->has_backing = true;
-        s->backing = bdrv_query_bds_stats(bs->backing->bs, query_backing);
+        s->backing = bdrv_query_bds_stats(bs->backing->bs, blk_level);
    }

    return s;
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -473,7 +473,7 @@ static uint64_t get_cluster_offset(BlockDriverState *bs,
                /* round to cluster size */
                cluster_offset = (cluster_offset + s->cluster_size - 1) &
                    ~(s->cluster_size - 1);
-                bdrv_truncate(bs->file, cluster_offset + s->cluster_size);
+                bdrv_truncate(bs->file, cluster_offset + s->cluster_size, NULL);
                /* if encrypted, we must initialize the cluster
                   content which won't be written */
                if (bs->encrypted &&
@@ -833,7 +833,7 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp)

    blk_set_allow_write_beyond_eof(qcow_blk, true);

-    ret = blk_truncate(qcow_blk, 0);
+    ret = blk_truncate(qcow_blk, 0, errp);
    if (ret < 0) {
        goto exit;
    }
@@ -916,7 +916,7 @@ static int qcow_make_empty(BlockDriverState *bs)
    if (bdrv_pwrite_sync(bs->file, s->l1_table_offset, s->l1_table,
            l1_length) < 0)
        return -1;
-    ret = bdrv_truncate(bs->file, s->l1_table_offset + l1_length);
+    ret = bdrv_truncate(bs->file, s->l1_table_offset + l1_length, NULL);
    if (ret < 0)
        return ret;

--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -1728,14 +1728,17 @@ static int check_refblocks(BlockDriverState *bs, BdrvCheckResult *res,

            if (fix & BDRV_FIX_ERRORS) {
                int64_t new_nb_clusters;
+                Error *local_err = NULL;

                if (offset > INT64_MAX - s->cluster_size) {
                    ret = -EINVAL;
                    goto resize_fail;
                }

-                ret = bdrv_truncate(bs->file, offset + s->cluster_size);
+                ret = bdrv_truncate(bs->file, offset + s->cluster_size,
+                                    &local_err);
                if (ret < 0) {
+                    error_report_err(local_err);
                    goto resize_fail;
                }
                size = bdrv_getlength(bs->file->bs);
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -524,25 +524,32 @@ static void read_cache_sizes(BlockDriverState *bs, QemuOpts *opts,
                             uint64_t *refcount_cache_size, Error **errp)
 {
    BDRVQcow2State *s = bs->opaque;
-    uint64_t combined_cache_size;
+    uint64_t combined_cache_size, l2_cache_max_setting;
    bool l2_cache_size_set, refcount_cache_size_set, combined_cache_size_set;
+    int min_refcount_cache = MIN_REFCOUNT_CACHE_SIZE * s->cluster_size;
+    uint64_t virtual_disk_size = bs->total_sectors * BDRV_SECTOR_SIZE;
+    uint64_t max_l2_cache = virtual_disk_size / (s->cluster_size / 8);

    combined_cache_size_set = qemu_opt_get(opts, QCOW2_OPT_CACHE_SIZE);
    l2_cache_size_set = qemu_opt_get(opts, QCOW2_OPT_L2_CACHE_SIZE);
    refcount_cache_size_set = qemu_opt_get(opts, QCOW2_OPT_REFCOUNT_CACHE_SIZE);

    combined_cache_size = qemu_opt_get_size(opts, QCOW2_OPT_CACHE_SIZE, 0);
-    *l2_cache_size = qemu_opt_get_size(opts, QCOW2_OPT_L2_CACHE_SIZE, 0);
+    l2_cache_max_setting = qemu_opt_get_size(opts, QCOW2_OPT_L2_CACHE_SIZE,
+                                             DEFAULT_L2_CACHE_MAX_SIZE);
    *refcount_cache_size = qemu_opt_get_size(opts,
                                             QCOW2_OPT_REFCOUNT_CACHE_SIZE, 0);

+    *l2_cache_size = MIN(max_l2_cache, l2_cache_max_setting);
+
    if (combined_cache_size_set) {
        if (l2_cache_size_set && refcount_cache_size_set) {
            error_setg(errp, QCOW2_OPT_CACHE_SIZE ", " QCOW2_OPT_L2_CACHE_SIZE
                       " and " QCOW2_OPT_REFCOUNT_CACHE_SIZE " may not be set "
                       "the same time");
            return;
-        } else if (*l2_cache_size > combined_cache_size) {
+        } else if (l2_cache_size_set &&
+                   (l2_cache_max_setting > combined_cache_size)) {
            error_setg(errp, QCOW2_OPT_L2_CACHE_SIZE " may not exceed "
                       QCOW2_OPT_CACHE_SIZE);
            return;
@@ -557,25 +564,20 @@ static void read_cache_sizes(BlockDriverState *bs, QemuOpts *opts,
        } else if (refcount_cache_size_set) {
            *l2_cache_size = combined_cache_size - *refcount_cache_size;
        } else {
-            *refcount_cache_size = combined_cache_size
-                                 / (DEFAULT_L2_REFCOUNT_SIZE_RATIO + 1);
-            *l2_cache_size = combined_cache_size - *refcount_cache_size;
-        }
-    } else {
-        if (!l2_cache_size_set && !refcount_cache_size_set) {
-            *l2_cache_size = MAX(DEFAULT_L2_CACHE_BYTE_SIZE,
-                                 (uint64_t)DEFAULT_L2_CACHE_CLUSTERS
-                                 * s->cluster_size);
-            *refcount_cache_size = *l2_cache_size
-                                 / DEFAULT_L2_REFCOUNT_SIZE_RATIO;
-        } else if (!l2_cache_size_set) {
-            *l2_cache_size = *refcount_cache_size
-                           * DEFAULT_L2_REFCOUNT_SIZE_RATIO;
-        } else if (!refcount_cache_size_set) {
-            *refcount_cache_size = *l2_cache_size
-                                 / DEFAULT_L2_REFCOUNT_SIZE_RATIO;
+            /* Assign as much memory as possible to the L2 cache, and
+             * use the remainder for the refcount cache */
+            if (combined_cache_size >= max_l2_cache + min_refcount_cache) {
+                *l2_cache_size = max_l2_cache;
+                *refcount_cache_size = combined_cache_size - *l2_cache_size;
+            } else {
+                *refcount_cache_size =
+                    MIN(combined_cache_size, min_refcount_cache);
+                *l2_cache_size = combined_cache_size - *refcount_cache_size;
+            }
        }
    }
+    /* l2_cache_size and refcount_cache_size are ensured to have at least
+     * their minimum values in qcow2_update_options_prepare() */
 }

 typedef struct Qcow2ReopenState {
@@ -667,7 +669,7 @@ static int qcow2_update_options_prepare(BlockDriverState *bs,
    /* New interval for cache cleanup timer */
    r->cache_clean_interval =
        qemu_opt_get_number(opts, QCOW2_OPT_CACHE_CLEAN_INTERVAL,
-                            s->cache_clean_interval);
+                            DEFAULT_CACHE_CLEAN_INTERVAL);
 #ifndef CONFIG_LINUX
    if (r->cache_clean_interval != 0) {
        error_setg(errp, QCOW2_OPT_CACHE_CLEAN_INTERVAL
@@ -997,7 +999,7 @@ static int qcow2_do_open(BlockDriverState *bs, QDict *options, int flags,
    /* 2^(s->refcount_order - 3) is the refcount width in bytes */
    s->refcount_block_bits = s->cluster_bits - (s->refcount_order - 3);
    s->refcount_block_size = 1 << s->refcount_block_bits;
-    bs->total_sectors = header.size / 512;
+    bs->total_sectors = header.size / BDRV_SECTOR_SIZE;
    s->csize_shift = (62 - (s->cluster_bits - 8));
    s->csize_mask = (1 << (s->cluster_bits - 8)) - 1;
    s->cluster_offset_mask = (1LL << s->csize_shift) - 1;
@@ -2265,7 +2267,7 @@ static int qcow2_create2(const char *filename, int64_t total_size,
     * table)
     */
    options = qdict_new();
-    qdict_put(options, "driver", qstring_from_str("qcow2"));
+    qdict_put_str(options, "driver", "qcow2");
    blk = blk_new_open(filename, NULL, options,
                       BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_NO_FLUSH,
                       &local_err);
@@ -2294,9 +2296,9 @@ static int qcow2_create2(const char *filename, int64_t total_size,
    }

    /* Okay, now that we have a valid image, let's give it the right size */
-    ret = blk_truncate(blk, total_size);
+    ret = blk_truncate(blk, total_size, errp);
    if (ret < 0) {
-        error_setg_errno(errp, -ret, "Could not resize image");
+        error_prepend(errp, "Could not resize image: ");
        goto out;
    }

@@ -2327,7 +2329,7 @@ static int qcow2_create2(const char *filename, int64_t total_size,

    /* Reopen the image without BDRV_O_NO_FLUSH to flush it before returning */
    options = qdict_new();
-    qdict_put(options, "driver", qstring_from_str("qcow2"));
+    qdict_put_str(options, "driver", "qcow2");
    blk = blk_new_open(filename, NULL, options,
                       BDRV_O_RDWR | BDRV_O_NO_BACKING, &local_err);
    if (blk == NULL) {
@@ -2584,7 +2586,7 @@ qcow2_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
        /* align end of file to a sector boundary to ease reading with
           sector based I/Os */
        cluster_offset = bdrv_getlength(bs->file->bs);
-        return bdrv_truncate(bs->file, cluster_offset);
+        return bdrv_truncate(bs->file, cluster_offset, NULL);
    }

    buf = qemu_blockalign(bs, s->cluster_size);
@@ -2674,6 +2676,7 @@ fail:
 static int make_completely_empty(BlockDriverState *bs)
 {
    BDRVQcow2State *s = bs->opaque;
+    Error *local_err = NULL;
    int ret, l1_clusters;
    int64_t offset;
    uint64_t *new_reftable = NULL;
@@ -2798,8 +2801,10 @@ static int make_completely_empty(BlockDriverState *bs)
        goto fail;
    }

-    ret = bdrv_truncate(bs->file, (3 + l1_clusters) * s->cluster_size);
+    ret = bdrv_truncate(bs->file, (3 + l1_clusters) * s->cluster_size,
+                        &local_err);
    if (ret < 0) {
+        error_report_err(local_err);
        goto fail;
    }

@@ -3273,9 +3278,10 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts,
            return ret;
        }

-        ret = blk_truncate(blk, new_size);
+        ret = blk_truncate(blk, new_size, &local_err);
        blk_unref(blk);
        if (ret < 0) {
+            error_report_err(local_err);
            return ret;
        }
    }
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -27,6 +27,7 @@

 #include "crypto/cipher.h"
 #include "qemu/coroutine.h"
+#include "qemu/units.h"

 //#define DEBUG_ALLOC
 //#define DEBUG_ALLOC2
@@ -42,11 +43,11 @@

 /* 8 MB refcount table is enough for 2 PB images at 64k cluster size
 * (128 GB for 512 byte clusters, 2 EB for 2 MB clusters) */
-#define QCOW_MAX_REFTABLE_SIZE 0x800000
+#define QCOW_MAX_REFTABLE_SIZE S_8MiB

 /* 32 MB L1 table is enough for 2 PB images at 64k cluster size
 * (128 GB for 512 byte clusters, 2 EB for 2 MB clusters) */
-#define QCOW_MAX_L1_SIZE 0x2000000
+#define QCOW_MAX_L1_SIZE S_32MiB

 /* Allow for an average of 1k per snapshot table entry, should be plenty of
 * space for snapshot names and IDs */
@@ -68,16 +69,16 @@
 /* Must be at least 4 to cover all cases of refcount table growth */
 #define MIN_REFCOUNT_CACHE_SIZE 4 /* clusters */

-/* Whichever is more */
-#define DEFAULT_L2_CACHE_CLUSTERS 8 /* clusters */
-#define DEFAULT_L2_CACHE_BYTE_SIZE 1048576 /* bytes */
-
-/* The refblock cache needs only a fourth of the L2 cache size to cover as many
- * clusters */
-#define DEFAULT_L2_REFCOUNT_SIZE_RATIO 4
-
-#define DEFAULT_CLUSTER_SIZE 65536
+#ifdef CONFIG_LINUX
+#define DEFAULT_L2_CACHE_MAX_SIZE S_32MiB
+#define DEFAULT_CACHE_CLEAN_INTERVAL 600  /* seconds */
+#else
+#define DEFAULT_L2_CACHE_MAX_SIZE S_8MiB
+/* Cache clean interval is currently available only on Linux, so must be 0 */
+#define DEFAULT_CACHE_CLEAN_INTERVAL 0
+#endif

+#define DEFAULT_CLUSTER_SIZE S_64KiB

 #define QCOW2_OPT_LAZY_REFCOUNTS "lazy-refcounts"
 #define QCOW2_OPT_DISCARD_REQUEST "pass-discard-request"
--- a/block/qed.c
+++ b/block/qed.c
@@ -635,7 +635,7 @@ static int qed_create(const char *filename, uint32_t cluster_size,
    blk_set_allow_write_beyond_eof(blk, true);

    /* File must start empty and grow, check truncate is supported */
-    ret = blk_truncate(blk, 0);
+    ret = blk_truncate(blk, 0, errp);
    if (ret < 0) {
        goto out;
    }
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -1096,19 +1096,15 @@ static void quorum_refresh_filename(BlockDriverState *bs, QDict *options)
    children = qlist_new();
    for (i = 0; i < s->num_children; i++) {
        QINCREF(s->children[i]->bs->full_open_options);
-        qlist_append_obj(children,
-                         QOBJECT(s->children[i]->bs->full_open_options));
+        qlist_append(children, s->children[i]->bs->full_open_options);
    }

    opts = qdict_new();
-    qdict_put_obj(opts, "driver", QOBJECT(qstring_from_str("quorum")));
-    qdict_put_obj(opts, QUORUM_OPT_VOTE_THRESHOLD,
-                  QOBJECT(qint_from_int(s->threshold)));
-    qdict_put_obj(opts, QUORUM_OPT_BLKVERIFY,
-                  QOBJECT(qbool_from_bool(s->is_blkverify)));
-    qdict_put_obj(opts, QUORUM_OPT_REWRITE,
-                  QOBJECT(qbool_from_bool(s->rewrite_corrupted)));
-    qdict_put_obj(opts, "children", QOBJECT(children));
+    qdict_put_str(opts, "driver", "quorum");
+    qdict_put_int(opts, QUORUM_OPT_VOTE_THRESHOLD, s->threshold);
+    qdict_put_bool(opts, QUORUM_OPT_BLKVERIFY, s->is_blkverify);
+    qdict_put_bool(opts, QUORUM_OPT_REWRITE, s->rewrite_corrupted);
+    qdict_put(opts, "children", children);

    bs->full_open_options = opts;
 }
--- a/block/raw-format.c
+++ b/block/raw-format.c
@@ -341,7 +341,7 @@ static int raw_truncate(BlockDriverState *bs, int64_t offset)

    s->size = offset;
    offset += s->offset;
-    return bdrv_truncate(bs->file, offset);
+    return bdrv_truncate(bs->file, offset, NULL);
 }

 static int raw_media_changed(BlockDriverState *bs)
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -154,20 +154,20 @@ static void qemu_rbd_parse_filename(const char *filename, QDict *options,
        goto done;
    }
    qemu_rbd_unescape(found_str);
-    qdict_put(options, "pool", qstring_from_str(found_str));
+    qdict_put_str(options, "pool", found_str);

    if (strchr(p, '@')) {
        found_str = qemu_rbd_next_tok(p, '@', &p);
        qemu_rbd_unescape(found_str);
-        qdict_put(options, "image", qstring_from_str(found_str));
+        qdict_put_str(options, "image", found_str);

        found_str = qemu_rbd_next_tok(p, ':', &p);
        qemu_rbd_unescape(found_str);
-        qdict_put(options, "snapshot", qstring_from_str(found_str));
+        qdict_put_str(options, "snapshot", found_str);
    } else {
        found_str = qemu_rbd_next_tok(p, ':', &p);
        qemu_rbd_unescape(found_str);
-        qdict_put(options, "image", qstring_from_str(found_str));
+        qdict_put_str(options, "image", found_str);
    }
    if (!p) {
        goto done;
@@ -189,9 +189,9 @@ static void qemu_rbd_parse_filename(const char *filename, QDict *options,
        qemu_rbd_unescape(value);

        if (!strcmp(name, "conf")) {
-            qdict_put(options, "conf", qstring_from_str(value));
+            qdict_put_str(options, "conf", value);
        } else if (!strcmp(name, "id")) {
-            qdict_put(options, "user" , qstring_from_str(value));
+            qdict_put_str(options, "user", value);
        } else {
            /*
             * We pass these internally to qemu_rbd_set_keypairs(), so
@@ -204,8 +204,8 @@ static void qemu_rbd_parse_filename(const char *filename, QDict *options,
            if (!keypairs) {
                keypairs = qlist_new();
            }
-            qlist_append(keypairs, qstring_from_str(name));
-            qlist_append(keypairs, qstring_from_str(value));
+            qlist_append_str(keypairs, name);
+            qlist_append_str(keypairs, value);
        }
    }

--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -1052,11 +1052,11 @@ static void sd_parse_uri(SheepdogConfig *cfg, const char *filename,
    }

    /* transport */
-    if (!strcmp(uri->scheme, "sheepdog")) {
+    if (!g_strcmp0(uri->scheme, "sheepdog")) {
        is_unix = false;
-    } else if (!strcmp(uri->scheme, "sheepdog+tcp")) {
+    } else if (!g_strcmp0(uri->scheme, "sheepdog+tcp")) {
        is_unix = false;
-    } else if (!strcmp(uri->scheme, "sheepdog+unix")) {
+    } else if (!g_strcmp0(uri->scheme, "sheepdog+unix")) {
        is_unix = true;
    } else {
        error_setg(&err, "URI scheme must be 'sheepdog', 'sheepdog+tcp',"
--- a/block/snapshot.c
+++ b/block/snapshot.c
@@ -200,7 +200,7 @@ int bdrv_snapshot_goto(BlockDriverState *bs,

        qdict_extract_subqdict(options, &file_options, "file.");
        QDECREF(file_options);
-        qdict_put(options, "file", qstring_from_str(bdrv_get_node_name(file)));
+        qdict_put_str(options, "file", bdrv_get_node_name(file));

        drv->bdrv_close(bs);
        bdrv_unref_child(bs, bs->file);
--- a/block/ssh.c
+++ b/block/ssh.c
@@ -205,7 +205,7 @@ static int parse_uri(const char *filename, QDict *options, Error **errp)
        return -EINVAL;
    }

-    if (strcmp(uri->scheme, "ssh") != 0) {
+    if (g_strcmp0(uri->scheme, "ssh") != 0) {
        error_setg(errp, "URI scheme must be 'ssh'");
        goto err;
    }
@@ -227,24 +227,23 @@ static int parse_uri(const char *filename, QDict *options, Error **errp)
    }

    if(uri->user && strcmp(uri->user, "") != 0) {
-        qdict_put(options, "user", qstring_from_str(uri->user));
+        qdict_put_str(options, "user", uri->user);
    }

-    qdict_put(options, "server.host", qstring_from_str(uri->server));
+    qdict_put_str(options, "server.host", uri->server);

    port_str = g_strdup_printf("%d", uri->port ?: 22);
-    qdict_put(options, "server.port", qstring_from_str(port_str));
+    qdict_put_str(options, "server.port", port_str);
    g_free(port_str);

-    qdict_put(options, "path", qstring_from_str(uri->path));
+    qdict_put_str(options, "path", uri->path);

    /* Pick out any query parameters that we understand, and ignore
     * the rest.
     */
    for (i = 0; i < qp->n; ++i) {
        if (strcmp(qp->p[i].name, "host_key_check") == 0) {
-            qdict_put(options, "host_key_check",
-                      qstring_from_str(qp->p[i].value));
+            qdict_put_str(options, "host_key_check", qp->p[i].value);
        }
    }

@@ -574,9 +573,8 @@ static bool ssh_process_legacy_socket_options(QDict *output_opts,
    }

    if (host) {
-        qdict_put(output_opts, "server.host", qstring_from_str(host));
-        qdict_put(output_opts, "server.port",
-                  qstring_from_str(port ?: stringify(22)));
+        qdict_put_str(output_opts, "server.host", host);
+        qdict_put_str(output_opts, "server.port", port ?: stringify(22));
    }

    return true;
--- a/block/stream.c
+++ b/block/stream.c
@@ -280,6 +280,6 @@ void stream_start(const char *job_id, BlockDriverState *bs,

 fail:
    if (orig_bs_flags != bdrv_get_flags(bs)) {
-        bdrv_reopen(bs, s->bs_flags, NULL);
+        bdrv_reopen(bs, orig_bs_flags, NULL);
    }
 }
--- a/block/tar.c
+++ b/block/tar.c
@@ -0,0 +1,379 @@
+/*
+ * Tar block driver
+ *
+ * Copyright (c) 2009 Alexander Graf <agraf@suse.de>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qemu-common.h"
+#include "block/block_int.h"
+
+// #define DEBUG
+
+#ifdef DEBUG
+#define dprintf(fmt, ...) do { printf("tar: " fmt, ## __VA_ARGS__); } while (0)
+#else
+#define dprintf(fmt, ...) do { } while (0)
+#endif
+
+#define SECTOR_SIZE      512
+
+#define POSIX_TAR_MAGIC  "ustar"
+#define OFFS_LENGTH      0x7c
+#define OFFS_TYPE        0x9c
+#define OFFS_MAGIC       0x101
+
+#define OFFS_S_SP        0x182
+#define OFFS_S_EXT       0x1e2
+#define OFFS_S_LENGTH    0x1e3
+#define OFFS_SX_EXT      0x1f8
+
+typedef struct SparseCache {
+    uint64_t start;
+    uint64_t end;
+} SparseCache;
+
+typedef struct BDRVTarState {
+    BlockDriverState *hd;
+    size_t file_sec;
+    uint64_t file_len;
+    SparseCache *sparse;
+    int sparse_num;
+    uint64_t last_end;
+    char longfile[2048];
+} BDRVTarState;
+
+static int str_ends(char *str, const char *end)
+{
+    int end_len = strlen(end);
+    int str_len = strlen(str);
+
+    if (str_len < end_len)
+        return 0;
+
+    return !strncmp(str + str_len - end_len, end, end_len);
+}
+
+static int is_target_file(BlockDriverState *bs, char *filename,
+                          char *header)
+{
+    int retval = 0;
+
+    if (str_ends(filename, ".raw"))
+        retval = 1;
+
+    if (str_ends(filename, ".qcow"))
+        retval = 1;
+
+    if (str_ends(filename, ".qcow2"))
+        retval = 1;
+
+    if (str_ends(filename, ".vmdk"))
+        retval = 1;
+
+    if (retval &&
+        (header[OFFS_TYPE] != '0') &&
+        (header[OFFS_TYPE] != 'S')) {
+        retval = 0;
+    }
+
+    dprintf("does filename %s match? %s\n", filename, retval ? "yes" : "no");
+
+    /* make sure we're not using this name again */
+    filename[0] = '\0';
+
+    return retval;
+}
+
+static uint64_t tar2u64(char *ptr)
+{
+    uint64_t retval;
+    char oldend = ptr[12];
+
+    ptr[12] = '\0';
+    if (*ptr & 0x80) {
+        /* XXX we only support files up to 64 bit length */
+        retval = be64_to_cpu(*(uint64_t *)(ptr+4));
+        dprintf("Convert %lx -> %#lx\n", *(uint64_t*)(ptr+4), retval);
+    } else {
+        retval = strtol(ptr, NULL, 8);
+        dprintf("Convert %s -> %#lx\n", ptr, retval);
+    }
+
+    ptr[12] = oldend;
+
+    return retval;
+}
+
+static void tar_sparse(BDRVTarState *s, uint64_t offs, uint64_t len)
+{
+    SparseCache *sparse;
+
+    if (!len)
+        return;
+    if (!(offs - s->last_end)) {
+        s->last_end += len;
+        return;
+    }
+    if (s->last_end > offs)
+        return;
+
+    dprintf("Last chunk until %lx new chunk at %lx\n", s->last_end, offs);
+
+    s->sparse = g_realloc(s->sparse, (s->sparse_num + 1) * sizeof(SparseCache));
+    sparse = &s->sparse[s->sparse_num];
+    sparse->start = s->last_end;
+    sparse->end = offs;
+    s->last_end = offs + len;
+    s->sparse_num++;
+    dprintf("Sparse at %lx end=%lx\n", sparse->start,
+                                       sparse->end);
+}
+
+static QemuOptsList runtime_opts = {
+    .name = "tar",
+    .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
+    .desc = {
+        {
+            .name = "filename",
+            .type = QEMU_OPT_STRING,
+            .help = "URL to the tar file",
+        },
+        { /* end of list */ }
+    },
+};
+
+static int tar_open(BlockDriverState *bs, QDict *options, int flags, Error **errp)
+{
+    BDRVTarState *s = bs->opaque;
+    char header[SECTOR_SIZE];
+    char *real_file = header;
+    char *magic;
+    size_t header_offs = 0;
+    int ret;
+    QemuOpts *opts;
+    Error *local_err = NULL;
+    const char *filename;
+
+    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
+    qemu_opts_absorb_qdict(opts, options, &local_err);
+    if (local_err != NULL) {
+        error_propagate(errp, local_err);
+        ret = -EINVAL;
+        goto fail;
+    }
+
+    filename = qemu_opt_get(opts, "filename");
+
+    if (!strncmp(filename, "tar://", 6))
+        filename += 6;
+    else if (!strncmp(filename, "tar:", 4))
+        filename += 4;
+
+    s->hd = bdrv_open(filename, NULL, NULL, flags | BDRV_O_PROTOCOL, errp);
+    if (!s->hd) {
+        ret = -EINVAL;
+        qemu_opts_del(opts);
+        return ret;
+    }
+
+    /* Search the file for an image */
+
+    do {
+        /* tar header */
+        if (bdrv_pread(s->hd->file, header_offs, header, SECTOR_SIZE) != SECTOR_SIZE)
+            goto fail;
+
+        if ((header_offs > 1) && !header[0]) {
+            fprintf(stderr, "Tar: No image file found in archive\n");
+            goto fail;
+        }
+
+        magic = &header[OFFS_MAGIC];
+        if (strncmp(magic, POSIX_TAR_MAGIC, 5)) {
+            fprintf(stderr, "Tar: Invalid magic: %s\n", magic);
+            goto fail;
+        }
+
+        dprintf("file type: %c\n", header[OFFS_TYPE]);
+
+        /* file length*/
+        s->file_len = (tar2u64(&header[OFFS_LENGTH]) + (SECTOR_SIZE - 1)) &
+                      ~(SECTOR_SIZE - 1);
+        s->file_sec = (header_offs / SECTOR_SIZE) + 1;
+
+        header_offs += s->file_len + SECTOR_SIZE;
+
+        if (header[OFFS_TYPE] == 'L') {
+            bdrv_pread(s->hd->file, header_offs - s->file_len, s->longfile,
+                       sizeof(s->longfile));
+            s->longfile[sizeof(s->longfile)-1] = '\0';
+            real_file = header;
+        } else if (s->longfile[0]) {
+            real_file = s->longfile;
+        } else {
+            real_file = header;
+        }
+    } while(!is_target_file(bs, real_file, header));
+
+    /* We found an image! */
+
+    if (header[OFFS_TYPE] == 'S') {
+        uint8_t isextended;
+        int i;
+
+        for (i = OFFS_S_SP; i < (OFFS_S_SP + (4 * 24)); i += 24)
+            tar_sparse(s, tar2u64(&header[i]), tar2u64(&header[i+12]));
+
+        s->file_len = tar2u64(&header[OFFS_S_LENGTH]);
+        isextended = header[OFFS_S_EXT];
+
+        while (isextended) {
+            if (bdrv_pread(s->hd->file, s->file_sec * SECTOR_SIZE, header,
+                           SECTOR_SIZE) != SECTOR_SIZE)
+                goto fail;
+
+            for (i = 0; i < (21 * 24); i += 24)
+                tar_sparse(s, tar2u64(&header[i]), tar2u64(&header[i+12]));
+            isextended = header[OFFS_SX_EXT];
+            s->file_sec++;
+        }
+        tar_sparse(s, s->file_len, 1);
+    }
+    qemu_opts_del(opts);
+
+    return 0;
+
+fail:
+    fprintf(stderr, "Tar: Error opening file\n");
+    bdrv_unref(s->hd);
+    qemu_opts_del(opts);
+    return -EINVAL;
+}
+
+typedef struct TarAIOCB {
+    BlockAIOCB common;
+    QEMUBH *bh;
+} TarAIOCB;
+
+/* This callback gets invoked when we have pure sparseness */
+static void tar_sparse_cb(void *opaque)
+{
+    TarAIOCB *acb = (TarAIOCB *)opaque;
+
+    acb->common.cb(acb->common.opaque, 0);
+    qemu_bh_delete(acb->bh);
+    qemu_aio_unref(acb);
+}
+
+static AIOCBInfo tar_aiocb_info = {
+    .aiocb_size         = sizeof(TarAIOCB),
+};
+
+/* This is where we get a request from a caller to read something */
+static BlockAIOCB *tar_aio_readv(BlockDriverState *bs,
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+        BlockCompletionFunc *cb, void *opaque)
+{
+    BDRVTarState *s = bs->opaque;
+    SparseCache *sparse;
+    int64_t sec_file = sector_num + s->file_sec;
+    int64_t start = sector_num * SECTOR_SIZE;
+    int64_t end = start + (nb_sectors * SECTOR_SIZE);
+    int i;
+    TarAIOCB *acb;
+
+    for (i = 0; i < s->sparse_num; i++) {
+        sparse = &s->sparse[i];
+        if (sparse->start > end) {
+            /* We expect the cache to be start increasing */
+            break;
+        } else if ((sparse->start < start) && (sparse->end <= start)) {
+            /* sparse before our offset */
+            sec_file -= (sparse->end - sparse->start) / SECTOR_SIZE;
+        } else if ((sparse->start <= start) && (sparse->end >= end)) {
+            /* all our sectors are sparse */
+            char *buf = g_malloc0(nb_sectors * SECTOR_SIZE);
+
+            acb = qemu_aio_get(&tar_aiocb_info, bs, cb, opaque);
+            qemu_iovec_from_buf(qiov, 0, buf, nb_sectors * SECTOR_SIZE);
+            g_free(buf);
+            acb->bh = qemu_bh_new(tar_sparse_cb, acb);
+            qemu_bh_schedule(acb->bh);
+
+            return &acb->common;
+        } else if (((sparse->start >= start) && (sparse->start < end)) ||
+                   ((sparse->end >= start) && (sparse->end < end))) {
+            /* we're semi-sparse (worst case) */
+            /* let's go synchronous and read all sectors individually */
+            char *buf = g_malloc(nb_sectors * SECTOR_SIZE);
+            uint64_t offs;
+
+            for (offs = 0; offs < (nb_sectors * SECTOR_SIZE);
+                 offs += SECTOR_SIZE) {
+                bdrv_pread(bs->file, (sector_num * SECTOR_SIZE) + offs,
+                           buf + offs, SECTOR_SIZE);
+            }
+
+            qemu_iovec_from_buf(qiov, 0, buf, nb_sectors * SECTOR_SIZE);
+            acb = qemu_aio_get(&tar_aiocb_info, bs, cb, opaque);
+            acb->bh = qemu_bh_new(tar_sparse_cb, acb);
+            qemu_bh_schedule(acb->bh);
+
+            return &acb->common;
+        }
+    }
+
+    return bdrv_aio_readv(s->hd->file, sec_file, qiov, nb_sectors,
+                          cb, opaque);
+}
+
+static void tar_close(BlockDriverState *bs)
+{
+    dprintf("Close\n");
+}
+
+static int64_t tar_getlength(BlockDriverState *bs)
+{
+    BDRVTarState *s = bs->opaque;
+    dprintf("getlength -> %ld\n", s->file_len);
+    return s->file_len;
+}
+
+static BlockDriver bdrv_tar = {
+    .format_name     = "tar",
+    .protocol_name   = "tar",
+
+    .instance_size   = sizeof(BDRVTarState),
+    .bdrv_file_open  = tar_open,
+    .bdrv_close      = tar_close,
+    .bdrv_getlength  = tar_getlength,
+
+    .bdrv_aio_readv  = tar_aio_readv,
+};
+
+static void tar_block_init(void)
+{
+    bdrv_register(&bdrv_tar);
+}
+
+block_init(tar_block_init);
--- a/block/vdi.c
+++ b/block/vdi.c
@@ -832,9 +832,9 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp)
    }

    if (image_type == VDI_TYPE_STATIC) {
-        ret = blk_truncate(blk, offset + blocks * block_size);
+        ret = blk_truncate(blk, offset + blocks * block_size, errp);
        if (ret < 0) {
-            error_setg(errp, "Failed to statically allocate %s", filename);
+            error_prepend(errp, "Failed to statically allocate %s", filename);
            goto exit;
        }
    }
--- a/block/vhdx-log.c
+++ b/block/vhdx-log.c
@@ -548,7 +548,7 @@ static int vhdx_log_flush(BlockDriverState *bs, BDRVVHDXState *s,
            if (new_file_size % (1024*1024)) {
                /* round up to nearest 1MB boundary */
                new_file_size = ((new_file_size >> 20) + 1) << 20;
-                bdrv_truncate(bs->file, new_file_size);
+                bdrv_truncate(bs->file, new_file_size, NULL);
            }
        }
        qemu_vfree(desc_entries);
--- a/block/vhdx.c
+++ b/block/vhdx.c
@@ -1171,7 +1171,7 @@ static int vhdx_allocate_block(BlockDriverState *bs, BDRVVHDXState *s,
    /* per the spec, the address for a block is in units of 1MB */
    *new_offset = ROUND_UP(*new_offset, 1024 * 1024);

-    return bdrv_truncate(bs->file, *new_offset + s->block_size);
+    return bdrv_truncate(bs->file, *new_offset + s->block_size, NULL);
 }

 /*
@@ -1586,7 +1586,7 @@ exit:
 static int vhdx_create_bat(BlockBackend *blk, BDRVVHDXState *s,
                           uint64_t image_size, VHDXImageType type,
                           bool use_zero_blocks, uint64_t file_offset,
-                           uint32_t length)
+                           uint32_t length, Error **errp)
 {
    int ret = 0;
    uint64_t data_file_offset;
@@ -1607,16 +1607,21 @@ static int vhdx_create_bat(BlockBackend *blk, BDRVVHDXState *s,
    if (type == VHDX_TYPE_DYNAMIC) {
        /* All zeroes, so we can just extend the file - the end of the BAT
         * is the furthest thing we have written yet */
-        ret = blk_truncate(blk, data_file_offset);
+        ret = blk_truncate(blk, data_file_offset, errp);
        if (ret < 0) {
+            error_setg_errno(errp, -ret,
+                            "Failed to resize the underlying file");
            goto exit;
        }
    } else if (type == VHDX_TYPE_FIXED) {
-        ret = blk_truncate(blk, data_file_offset + image_size);
+        ret = blk_truncate(blk, data_file_offset + image_size, errp);
        if (ret < 0) {
+            error_setg_errno(errp, -ret,
+                            "Failed to resize the underlying file");
            goto exit;
        }
    } else {
+        error_setg(errp, "Unsupported image type");
        ret = -ENOTSUP;
        goto exit;
    }
@@ -1627,6 +1632,7 @@ static int vhdx_create_bat(BlockBackend *blk, BDRVVHDXState *s,
        /* for a fixed file, the default BAT entry is not zero */
        s->bat = g_try_malloc0(length);
        if (length && s->bat == NULL) {
+            error_setg(errp, "Failed to allocate memory for the BAT");
            ret = -ENOMEM;
            goto exit;
        }
@@ -1646,6 +1652,7 @@ static int vhdx_create_bat(BlockBackend *blk, BDRVVHDXState *s,
        }
        ret = blk_pwrite(blk, file_offset, s->bat, length, 0);
        if (ret < 0) {
+            error_setg_errno(errp, -ret, "Failed to write the BAT");
            goto exit;
        }
    }
@@ -1671,7 +1678,8 @@ static int vhdx_create_new_region_table(BlockBackend *blk,
                                        uint32_t log_size,
                                        bool use_zero_blocks,
                                        VHDXImageType type,
-                                        uint64_t *metadata_offset)
+                                        uint64_t *metadata_offset,
+                                        Error **errp)
 {
    int ret = 0;
    uint32_t offset = 0;
@@ -1740,7 +1748,7 @@ static int vhdx_create_new_region_table(BlockBackend *blk,
    /* The region table gives us the data we need to create the BAT,
     * so do that now */
    ret = vhdx_create_bat(blk, s, image_size, type, use_zero_blocks,
-                          bat_file_offset, bat_length);
+                          bat_file_offset, bat_length, errp);
    if (ret < 0) {
        goto exit;
    }
@@ -1749,12 +1757,14 @@ static int vhdx_create_new_region_table(BlockBackend *blk,
    ret = blk_pwrite(blk, VHDX_REGION_TABLE_OFFSET, buffer,
                     VHDX_HEADER_BLOCK_SIZE, 0);
    if (ret < 0) {
+        error_setg_errno(errp, -ret, "Failed to write first region table");
        goto exit;
    }

    ret = blk_pwrite(blk, VHDX_REGION_TABLE2_OFFSET, buffer,
                     VHDX_HEADER_BLOCK_SIZE, 0);
    if (ret < 0) {
+        error_setg_errno(errp, -ret, "Failed to write second region table");
        goto exit;
    }

@@ -1825,6 +1835,7 @@ static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp)
        ret = -ENOTSUP;
        goto exit;
    } else {
+        error_setg(errp, "Invalid subformat '%s'", type);
        ret = -EINVAL;
        goto exit;
    }
@@ -1879,12 +1890,14 @@ static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp)
    ret = blk_pwrite(blk, VHDX_FILE_ID_OFFSET, &signature, sizeof(signature),
                     0);
    if (ret < 0) {
+        error_setg_errno(errp, -ret, "Failed to write file signature");
        goto delete_and_exit;
    }
    if (creator) {
        ret = blk_pwrite(blk, VHDX_FILE_ID_OFFSET + sizeof(signature),
                         creator, creator_items * sizeof(gunichar2), 0);
        if (ret < 0) {
+            error_setg_errno(errp, -ret, "Failed to write creator field");
            goto delete_and_exit;
        }
    }
@@ -1893,13 +1906,14 @@ static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp)
    /* Creates (B),(C) */
    ret = vhdx_create_new_headers(blk, image_size, log_size);
    if (ret < 0) {
+        error_setg_errno(errp, -ret, "Failed to write image headers");
        goto delete_and_exit;
    }

    /* Creates (D),(E),(G) explicitly. (F) created as by-product */
    ret = vhdx_create_new_region_table(blk, image_size, block_size, 512,
                                       log_size, use_zero_blocks, image_type,
-                                       &metadata_offset);
+                                       &metadata_offset, errp);
    if (ret < 0) {
        goto delete_and_exit;
    }
@@ -1908,6 +1922,7 @@ static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp)
    ret = vhdx_create_new_metadata(blk, image_size, block_size, 512,
                                   metadata_offset, image_type);
    if (ret < 0) {
+        error_setg_errno(errp, -ret, "Failed to initialize metadata");
        goto delete_and_exit;
    }

--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -1714,10 +1714,7 @@ static int vmdk_create_extent(const char *filename, int64_t filesize,
    blk_set_allow_write_beyond_eof(blk, true);

    if (flat) {
-        ret = blk_truncate(blk, filesize);
-        if (ret < 0) {
-            error_setg_errno(errp, -ret, "Could not truncate file");
-        }
+        ret = blk_truncate(blk, filesize, errp);
        goto exit;
    }
    magic = cpu_to_be32(VMDK4_MAGIC);
@@ -1780,9 +1777,8 @@ static int vmdk_create_extent(const char *filename, int64_t filesize,
        goto exit;
    }

-    ret = blk_truncate(blk, le64_to_cpu(header.grain_offset) << 9);
+    ret = blk_truncate(blk, le64_to_cpu(header.grain_offset) << 9, errp);
    if (ret < 0) {
-        error_setg_errno(errp, -ret, "Could not truncate file");
        goto exit;
    }

@@ -2090,10 +2086,7 @@ static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp)
    /* bdrv_pwrite write padding zeros to align to sector, we don't need that
     * for description file */
    if (desc_offset == 0) {
-        ret = blk_truncate(new_blk, desc_len);
-        if (ret < 0) {
-            error_setg_errno(errp, -ret, "Could not truncate file");
-        }
+        ret = blk_truncate(new_blk, desc_len, errp);
    }
 exit:
    if (new_blk) {
--- a/block/vpc.c
+++ b/block/vpc.c
@@ -851,20 +851,21 @@ static int create_dynamic_disk(BlockBackend *blk, uint8_t *buf,
 }

 static int create_fixed_disk(BlockBackend *blk, uint8_t *buf,
-                             int64_t total_size)
+                             int64_t total_size, Error **errp)
 {
    int ret;

    /* Add footer to total size */
    total_size += HEADER_SIZE;

-    ret = blk_truncate(blk, total_size);
+    ret = blk_truncate(blk, total_size, errp);
    if (ret < 0) {
        return ret;
    }

    ret = blk_pwrite(blk, total_size - HEADER_SIZE, buf, HEADER_SIZE, 0);
    if (ret < 0) {
+        error_setg_errno(errp, -ret, "Unable to write VHD header");
        return ret;
    }

@@ -996,11 +997,11 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp)

    if (disk_type == VHD_DYNAMIC) {
        ret = create_dynamic_disk(blk, buf, total_sectors);
+        if (ret < 0) {
+            error_setg(errp, "Unable to create or write VHD header");
+        }
    } else {
-        ret = create_fixed_disk(blk, buf, total_size);
-    }
-    if (ret < 0) {
-        error_setg(errp, "Unable to create or write VHD header");
+        ret = create_fixed_disk(blk, buf, total_size, errp);
    }

 out:
--- a/block/vvfat.c
+++ b/block/vvfat.c
@@ -1057,10 +1057,10 @@ static void vvfat_parse_filename(const char *filename, QDict *options,
    }

    /* Fill in the options QDict */
-    qdict_put(options, "dir", qstring_from_str(filename));
-    qdict_put(options, "fat-type", qint_from_int(fat_type));
-    qdict_put(options, "floppy", qbool_from_bool(floppy));
-    qdict_put(options, "rw", qbool_from_bool(rw));
+    qdict_put_str(options, "dir", filename);
+    qdict_put_int(options, "fat-type", fat_type);
+    qdict_put_bool(options, "floppy", floppy);
+    qdict_put_bool(options, "rw", rw);
 }

 static int vvfat_open(BlockDriverState *bs, QDict *options, int flags,
@@ -2958,8 +2958,7 @@ vvfat_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
 static int64_t coroutine_fn vvfat_co_get_block_status(BlockDriverState *bs,
 	int64_t sector_num, int nb_sectors, int *n, BlockDriverState **file)
 {
-    BDRVVVFATState* s = bs->opaque;
-    *n = s->sector_count - sector_num;
+    *n = bs->total_sectors - sector_num;
    if (*n > nb_sectors) {
        *n = nb_sectors;
    } else if (*n < 0) {
@@ -3040,7 +3039,7 @@ static int enable_write_target(BlockDriverState *bs, Error **errp)
    }

    options = qdict_new();
-    qdict_put(options, "write-target.driver", qstring_from_str("qcow"));
+    qdict_put_str(options, "write-target.driver", "qcow");
    s->qcow = bdrv_open_child(s->qcow_filename, options, "write-target", bs,
                              &child_vvfat_qcow, false, errp);
    QDECREF(options);
--- a/blockdev-nbd.c
+++ b/blockdev-nbd.c
@@ -27,6 +27,10 @@ typedef struct NBDServerData {

 static NBDServerData *nbd_server;

+static void nbd_blockdev_client_closed(NBDClient *client, bool ignored)
+{
+    nbd_client_put(client);
+}

 static gboolean nbd_accept(QIOChannel *ioc, GIOCondition condition,
                           gpointer opaque)
@@ -46,7 +50,7 @@ static gboolean nbd_accept(QIOChannel *ioc, GIOCondition condition,
    qio_channel_set_name(QIO_CHANNEL(cioc), "nbd-server");
    nbd_client_new(NULL, cioc,
                   nbd_server->tlscreds, NULL,
-                   nbd_client_put);
+                   nbd_blockdev_client_closed);
    object_unref(OBJECT(cioc));
    return TRUE;
 }
--- a/blockdev.c
+++ b/blockdev.c
@@ -527,7 +527,7 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts,
            error_setg(errp, "Cannot specify both 'driver' and 'format'");
            goto early_err;
        }
-        qdict_put(bs_opts, "driver", qstring_from_str(buf));
+        qdict_put_str(bs_opts, "driver", buf);
    }

    on_write_error = BLOCKDEV_ON_ERROR_ENOSPC;
@@ -903,10 +903,8 @@ DriveInfo *drive_new(QemuOpts *all_opts, BlockInterfaceType block_default_type)
        copy_on_read = false;
    }

-    qdict_put(bs_opts, BDRV_OPT_READ_ONLY,
-              qstring_from_str(read_only ? "on" : "off"));
-    qdict_put(bs_opts, "copy-on-read",
-              qstring_from_str(copy_on_read ? "on" :"off"));
+    qdict_put_str(bs_opts, BDRV_OPT_READ_ONLY, read_only ? "on" : "off");
+    qdict_put_str(bs_opts, "copy-on-read", copy_on_read ? "on" : "off");

    /* Controller type */
    value = qemu_opt_get(legacy_opts, "if");
@@ -1030,7 +1028,7 @@ DriveInfo *drive_new(QemuOpts *all_opts, BlockInterfaceType block_default_type)
            new_id = g_strdup_printf("%s%s%i", if_name[type],
                                     mediastr, unit_id);
        }
-        qdict_put(bs_opts, "id", qstring_from_str(new_id));
+        qdict_put_str(bs_opts, "id", new_id);
        g_free(new_id);
    }

@@ -1067,7 +1065,7 @@ DriveInfo *drive_new(QemuOpts *all_opts, BlockInterfaceType block_default_type)
            error_report("werror is not supported by this bus type");
            goto fail;
        }
-        qdict_put(bs_opts, "werror", qstring_from_str(werror));
+        qdict_put_str(bs_opts, "werror", werror);
    }

    rerror = qemu_opt_get(legacy_opts, "rerror");
@@ -1077,7 +1075,7 @@ DriveInfo *drive_new(QemuOpts *all_opts, BlockInterfaceType block_default_type)
            error_report("rerror is not supported by this bus type");
            goto fail;
        }
-        qdict_put(bs_opts, "rerror", qstring_from_str(rerror));
+        qdict_put_str(bs_opts, "rerror", rerror);
    }

    /* Actual block device init: Functionality shared with blockdev-add */
@@ -1737,10 +1735,9 @@ static void external_snapshot_prepare(BlkActionState *common,

        options = qdict_new();
        if (s->has_snapshot_node_name) {
-            qdict_put(options, "node-name",
-                      qstring_from_str(snapshot_node_name));
+            qdict_put_str(options, "node-name", snapshot_node_name);
        }
-        qdict_put(options, "driver", qstring_from_str(format));
+        qdict_put_str(options, "driver", format);

        flags |= BDRV_O_NO_BACKING;
    }
@@ -2579,11 +2576,10 @@ void qmp_blockdev_change_medium(bool has_device, const char *device,

    options = qdict_new();
    detect_zeroes = blk_get_detect_zeroes_from_root_state(blk);
-    qdict_put(options, "detect-zeroes",
-              qstring_from_str(detect_zeroes ? "on" : "off"));
+    qdict_put_str(options, "detect-zeroes", detect_zeroes ? "on" : "off");

    if (has_format) {
-        qdict_put(options, "driver", qstring_from_str(format));
+        qdict_put_str(options, "driver", format);
    }

    medium_bs = bdrv_open(filename, NULL, options, bdrv_flags, errp);
@@ -2892,6 +2888,7 @@ void qmp_block_resize(bool has_device, const char *device,
    Error *local_err = NULL;
    BlockBackend *blk = NULL;
    BlockDriverState *bs;
+    BlockBackend *cb_blk = NULL;
    AioContext *aio_context;
    int ret;

@@ -2903,6 +2900,10 @@ void qmp_block_resize(bool has_device, const char *device,
        return;
    }

+    if (has_device) {
+        cb_blk = blk_by_name(device);
+    }
+
    aio_context = bdrv_get_aio_context(bs);
    aio_context_acquire(aio_context);

@@ -2927,29 +2928,12 @@ void qmp_block_resize(bool has_device, const char *device,
        goto out;
    }

-    /* complete all in-flight operations before resizing the device */
-    bdrv_drain_all();
-
-    ret = blk_truncate(blk, size);
-    switch (ret) {
-    case 0:
-        break;
-    case -ENOMEDIUM:
-        error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, device);
-        break;
-    case -ENOTSUP:
-        error_setg(errp, QERR_UNSUPPORTED);
-        break;
-    case -EACCES:
-        error_setg(errp, "Device '%s' is read only", device);
-        break;
-    case -EBUSY:
-        error_setg(errp, QERR_DEVICE_IN_USE, device);
-        break;
-    default:
-        error_setg_errno(errp, -ret, "Could not resize");
-        break;
+    bdrv_drained_begin(bs);
+    ret = blk_truncate(blk, size, errp);
+    if (!ret && cb_blk) {
+        blk_legacy_resize_cb(cb_blk);
    }
+    bdrv_drained_end(bs);

 out:
    blk_unref(blk);
@@ -3174,6 +3158,7 @@ static BlockJob *do_drive_backup(DriveBackup *backup, BlockJobTxn *txn,
    Error *local_err = NULL;
    int flags;
    int64_t size;
+    bool set_backing_hd = false;

    if (!backup->has_speed) {
        backup->speed = 0;
@@ -3224,6 +3209,8 @@ static BlockJob *do_drive_backup(DriveBackup *backup, BlockJobTxn *txn,
    }
    if (backup->sync == MIRROR_SYNC_MODE_NONE) {
        source = bs;
+        flags |= BDRV_O_NO_BACKING;
+        set_backing_hd = true;
    }

    size = bdrv_getlength(bs);
@@ -3250,8 +3237,10 @@ static BlockJob *do_drive_backup(DriveBackup *backup, BlockJobTxn *txn,
    }

    if (backup->format) {
-        options = qdict_new();
-        qdict_put(options, "driver", qstring_from_str(backup->format));
+        if (!options) {
+            options = qdict_new();
+        }
+        qdict_put_str(options, "driver", backup->format);
    }

    target_bs = bdrv_open(backup->target, NULL, options, flags, errp);
@@ -3261,6 +3250,14 @@ static BlockJob *do_drive_backup(DriveBackup *backup, BlockJobTxn *txn,

    bdrv_set_aio_context(target_bs, aio_context);

+    if (set_backing_hd) {
+        bdrv_set_backing_hd(target_bs, source, &local_err);
+        if (local_err) {
+            bdrv_unref(target_bs);
+            goto out;
+        }
+    }
+
    if (backup->has_bitmap) {
        bmap = bdrv_find_dirty_bitmap(bs, backup->bitmap);
        if (!bmap) {
@@ -3555,10 +3552,10 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)

    options = qdict_new();
    if (arg->has_node_name) {
-        qdict_put(options, "node-name", qstring_from_str(arg->node_name));
+        qdict_put_str(options, "node-name", arg->node_name);
    }
    if (format) {
-        qdict_put(options, "driver", qstring_from_str(format));
+        qdict_put_str(options, "driver", format);
    }

    /* Mirroring takes care of copy-on-write using the source's backing
--- a/chardev/char-mux.c
+++ b/chardev/char-mux.c
@@ -179,6 +179,15 @@ static void mux_chr_accept_input(Chardev *chr)
        be->chr_read(be->opaque,
                     &d->buffer[m][d->cons[m]++ & MUX_BUFFER_MASK], 1);
    }
+
+#if defined(TARGET_S390X)
+    /* We're still not able to sync producer and consumer, so let's wait a bit
+       and try again by then. */
+    if (d->prod[m] != d->cons[m]) {
+        qemu_mod_timer(d->accept_timer, qemu_get_clock_ns(vm_clock)
+                                        + (int64_t)100000);
+    }
+#endif
 }

 static int mux_chr_can_read(void *opaque)
@@ -308,6 +317,10 @@ static void qemu_chr_open_mux(Chardev *chr,
    }

    d->focus = -1;
+#if defined(TARGET_S390X)
+    d->accept_timer = qemu_new_timer_ns(vm_clock,
+                                     (QEMUTimerCB*)mux_chr_accept_input, chr);
+#endif
    /* only default to opened state if we've realized the initial
     * set of muxes
     */
--- a/chardev/char-mux.h
+++ b/chardev/char-mux.h
@@ -35,6 +35,9 @@ typedef struct MuxChardev {
    Chardev parent;
    CharBackend *backends[MAX_MUX];
    CharBackend chr;
+#if defined(TARGET_S390X)
+    QEMUTimer *accept_timer;
+#endif
    int focus;
    int mux_cnt;
    int term_got_escape;
--- a/15
+++ b/15
@@ -1600,7 +1600,7 @@ fi

 if test "$pie" = ""; then
  case "$cpu-$targetos" in
-    i386-Linux|x86_64-Linux|x32-Linux|i386-OpenBSD|x86_64-OpenBSD)
+    i386-Linux|x86_64-Linux|x32-Linux|ppc*-Linux|i386-OpenBSD|x86_64-OpenBSD)
      ;;
    *)
      pie="no"
@@ -1640,7 +1640,7 @@ EOF

  if compile_prog "-Werror -fno-pie" "-nopie"; then
    CFLAGS_NOPIE="-fno-pie"
-    LDFLAGS_NOPIE="-nopie"
+    LDFLAGS_NOPIE="-no-pie"
  fi
 fi

@@ -1947,13 +1947,10 @@ fi
 ##########################################
 # libseccomp check

+libseccomp_minver="2.2.0"
 if test "$seccomp" != "no" ; then
    case "$cpu" in
-    i386|x86_64)
-        libseccomp_minver="2.1.0"
-        ;;
-    mips)
-        libseccomp_minver="2.2.0"
+    i386|x86_64|mips)
        ;;
    arm|aarch64)
        libseccomp_minver="2.2.3"
@@ -1961,6 +1958,9 @@ if test "$seccomp" != "no" ; then
    ppc|ppc64)
        libseccomp_minver="2.3.0"
        ;;
+    s390|s390x)
+        libseccomp_minver="2.2.0"
+        ;;
    *)
        libseccomp_minver=""
        ;;
@@ -5843,7 +5843,6 @@ else
  echo "AUTOCONF_HOST := "                             >> $config_host_mak
 fi
 echo "LDFLAGS=$LDFLAGS" >> $config_host_mak
-echo "LDFLAGS_NOPIE=$LDFLAGS_NOPIE" >> $config_host_mak
 echo "LD_REL_FLAGS=$LD_REL_FLAGS" >> $config_host_mak
 echo "LD_I386_EMULATION=$ld_i386_emulation" >> $config_host_mak
 echo "LIBS+=$LIBS" >> $config_host_mak
--- a/cpus.c
+++ b/cpus.c
@@ -1999,6 +1999,18 @@ exit:
    fclose(f);
 }

+bool spec_ctrl_is_inconsistent(void)
+{
+#if defined(TARGET_I386)
+    X86CPU *x86_cpu = X86_CPU(current_cpu);
+    CPUX86State *env = x86_cpu != NULL ? &x86_cpu->env : NULL;
+    if (env && !(env->features[FEAT_7_0_EDX] & CPUID_7_0_EDX_SPEC_CTRL) &&
+	    env->spec_ctrl)
+        return true;
+#endif
+    return false;
+}
+
 void qmp_inject_nmi(Error **errp)
 {
    nmi_monitor_handle(monitor_get_cpu_index(), errp);
--- a/device_tree.c
+++ b/device_tree.c
@@ -91,7 +91,7 @@ void *load_device_tree(const char *filename_path, int *sizep)
    /* First allocate space in qemu for device tree */
    fdt = g_malloc0(dt_size);

-    dt_file_load_size = load_image(filename_path, fdt);
+    dt_file_load_size = load_image_size(filename_path, fdt, dt_size);
    if (dt_file_load_size < 0) {
        error_report("Unable to open device tree file '%s'",
                     filename_path);
--- a/docs/qcow2-cache.txt
+++ b/docs/qcow2-cache.txt
@@ -79,14 +79,14 @@ Choosing the right cache sizes
 In order to choose the cache sizes we need to know how they relate to
 the amount of allocated space.

-The amount of virtual disk that can be mapped by the L2 and refcount
+The part of the virtual disk that can be mapped by the L2 and refcount
 caches (in bytes) is:

   disk_size = l2_cache_size * cluster_size / 8
   disk_size = refcount_cache_size * cluster_size * 8 / refcount_bits

 With the default values for cluster_size (64KB) and refcount_bits
-(16), that is
+(16), this becomes:

   disk_size = l2_cache_size * 8192
   disk_size = refcount_cache_size * 32768
@@ -97,12 +97,16 @@ need:
   l2_cache_size = disk_size_GB * 131072
   refcount_cache_size = disk_size_GB * 32768

-QEMU has a default L2 cache of 1MB (1048576 bytes) and a refcount
-cache of 256KB (262144 bytes), so using the formulas we've just seen
-we have
+For example, 1MB of L2 cache is needed to cover every 8 GB of the virtual
+image size (given that the default cluster size is used):

-   1048576 / 131072 = 8 GB of virtual disk covered by that cache
-    262144 /  32768 = 8 GB
+   8 GB / 8192 = 1 MB
+
+The refcount cache is 4 times the cluster size by default. With the default
+cluster size of 64 KB, it is 256 KB (262144 bytes). This is sufficient for
+8 GB of image size:
+
+   262144 * 32768 = 8 GB


 How to configure the cache sizes
@@ -116,31 +120,39 @@ There are three options available, and all of them take bytes:
 "refcount-cache-size":   maximum size of the refcount block cache
 "cache-size":            maximum size of both caches combined

-There are two things that need to be taken into account:
+There are a few things that need to be taken into account:

 - Both caches must have a size that is a multiple of the cluster
   size.

- - If you only set one of the options above, QEMU will automatically
-   adjust the others so that the L2 cache is 4 times bigger than the
-   refcount cache.
+ - The maximum L2 cache size is 32 MB by default on Linux platforms (enough
+   for full coverage of 256 GB images, with the default cluster size). This
+   value can be modified using the "l2-cache-size" option. QEMU will not use
+   more memory than needed to hold all of the image's L2 tables, regardless
+   of this max. value.
+   On non-Linux platforms the maximal value is smaller by default (8 MB) and
+   this difference stems from the fact that on Linux the cache can be cleared
+   periodically if needed, using the "cache-clean-interval" option (see below).
+   The minimal L2 cache size is 2 clusters (or 2 cache entries, see below).

-This means that these options are equivalent:
+ - The default (and minimum) refcount cache size is 4 clusters.

-   -drive file=hd.qcow2,l2-cache-size=2097152
-   -drive file=hd.qcow2,refcount-cache-size=524288
-   -drive file=hd.qcow2,cache-size=2621440
+ - If only "cache-size" is specified then QEMU will assign as much
+   memory as possible to the L2 cache before increasing the refcount
+   cache size.

-The reason for this 1/4 ratio is to ensure that both caches cover the
-same amount of disk space. Note however that this is only valid with
-the default value of refcount_bits (16). If you are using a different
-value you might want to calculate both cache sizes yourself since QEMU
-will always use the same 1/4 ratio.
+ - At most two of "l2-cache-size", "refcount-cache-size", and "cache-size"
+   can be set simultaneously.

-It's also worth mentioning that there's no strict need for both caches
-to cover the same amount of disk space. The refcount cache is used
-much less often than the L2 cache, so it's perfectly reasonable to
-keep it small.
+Unlike L2 tables, refcount blocks are not used during normal I/O but
+only during allocations and internal snapshots. In most cases they are
+accessed sequentially (even during random guest I/O) so increasing the
+refcount cache size won't have any measurable effect in performance
+(this can change if you are using internal snapshots, so you may want
+to think about increasing the cache size if you use them heavily).
+
+Now the refcount cache is as small as possible unless overridden by the
+user.


 Reducing the memory usage
@@ -156,8 +168,8 @@ This example removes all unused cache entries every 15 minutes:

   -drive file=hd.qcow2,cache-clean-interval=900

-If unset, the default value for this parameter is 0 and it disables
-this feature.
+If unset, the default value for this parameter is 600. Setting it to 0
+disables this feature.

 Note that this functionality currently relies on the MADV_DONTNEED
 argument for madvise() to actually free the memory. This is a
--- a/exec.c
+++ b/exec.c
@@ -223,6 +223,12 @@ struct CPUAddressSpace {
    MemoryListener tcg_as_listener;
 };

+struct DirtyBitmapSnapshot {
+    ram_addr_t start;
+    ram_addr_t end;
+    unsigned long dirty[];
+};
+
 #endif

 #if !defined(CONFIG_USER_ONLY)
@@ -1061,6 +1067,75 @@ bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start,
    return dirty;
 }

+DirtyBitmapSnapshot *cpu_physical_memory_snapshot_and_clear_dirty
+     (ram_addr_t start, ram_addr_t length, unsigned client)
+{
+    DirtyMemoryBlocks *blocks;
+    unsigned long align = 1UL << (TARGET_PAGE_BITS + BITS_PER_LEVEL);
+    ram_addr_t first = QEMU_ALIGN_DOWN(start, align);
+    ram_addr_t last  = QEMU_ALIGN_UP(start + length, align);
+    DirtyBitmapSnapshot *snap;
+    unsigned long page, end, dest;
+
+    snap = g_malloc0(sizeof(*snap) +
+                     ((last - first) >> (TARGET_PAGE_BITS + 3)));
+    snap->start = first;
+    snap->end   = last;
+
+    page = first >> TARGET_PAGE_BITS;
+    end  = last  >> TARGET_PAGE_BITS;
+    dest = 0;
+
+    rcu_read_lock();
+
+    blocks = atomic_rcu_read(&ram_list.dirty_memory[client]);
+
+    while (page < end) {
+        unsigned long idx = page / DIRTY_MEMORY_BLOCK_SIZE;
+        unsigned long offset = page % DIRTY_MEMORY_BLOCK_SIZE;
+        unsigned long num = MIN(end - page, DIRTY_MEMORY_BLOCK_SIZE - offset);
+
+        assert(QEMU_IS_ALIGNED(offset, (1 << BITS_PER_LEVEL)));
+        assert(QEMU_IS_ALIGNED(num,    (1 << BITS_PER_LEVEL)));
+        offset >>= BITS_PER_LEVEL;
+
+        bitmap_copy_and_clear_atomic(snap->dirty + dest,
+                                     blocks->blocks[idx] + offset,
+                                     num);
+        page += num;
+        dest += num >> BITS_PER_LEVEL;
+    }
+
+    rcu_read_unlock();
+
+    if (tcg_enabled()) {
+        tlb_reset_dirty_range_all(start, length);
+    }
+
+    return snap;
+}
+
+bool cpu_physical_memory_snapshot_get_dirty(DirtyBitmapSnapshot *snap,
+                                            ram_addr_t start,
+                                            ram_addr_t length)
+{
+    unsigned long page, end;
+
+    assert(start >= snap->start);
+    assert(start + length <= snap->end);
+
+    end = TARGET_PAGE_ALIGN(start + length - snap->start) >> TARGET_PAGE_BITS;
+    page = (start - snap->start) >> TARGET_PAGE_BITS;
+
+    while (page < end) {
+        if (test_bit(page, snap->dirty)) {
+            return true;
+        }
+        page++;
+    }
+    return false;
+}
+
 /* Called from RCU critical section */
 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
                                       MemoryRegionSection *section,
@@ -1362,11 +1437,13 @@ static void *file_ram_alloc(RAMBlock *block,
    int fd = -1;
    int64_t file_size;

+#ifndef TARGET_PPC
    if (kvm_enabled() && !kvm_has_sync_mmu()) {
        error_setg(errp,
                   "host lacks kvm mmu notifiers, -mem-path unsupported");
        return NULL;
    }
+#endif

    for (;;) {
        fd = open(path, O_RDWR);
@@ -2010,10 +2087,10 @@ void *qemu_map_ram_ptr(RAMBlock *ram_block, ram_addr_t addr)
         * In that case just map until the end of the page.
         */
        if (block->offset == 0) {
-            return xen_map_cache(addr, 0, 0);
+            return xen_map_cache(addr, 0, 0, false);
        }

-        block->host = xen_map_cache(block->offset, block->max_length, 1);
+        block->host = xen_map_cache(block->offset, block->max_length, 1, false);
    }
    return ramblock_ptr(block, addr);
 }
@@ -2024,7 +2101,7 @@ void *qemu_map_ram_ptr(RAMBlock *ram_block, ram_addr_t addr)
 * Called within RCU critical section.
 */
 static void *qemu_ram_ptr_length(RAMBlock *ram_block, ram_addr_t addr,
-                                 hwaddr *size)
+                                 hwaddr *size, bool lock)
 {
    RAMBlock *block = ram_block;
    if (*size == 0) {
@@ -2043,10 +2120,10 @@ static void *qemu_ram_ptr_length(RAMBlock *ram_block, ram_addr_t addr,
         * In that case just map the requested area.
         */
        if (block->offset == 0) {
-            return xen_map_cache(addr, *size, 1);
+            return xen_map_cache(addr, *size, lock, lock);
        }

-        block->host = xen_map_cache(block->offset, block->max_length, 1);
+        block->host = xen_map_cache(block->offset, block->max_length, 1, lock);
    }

    return ramblock_ptr(block, addr);
@@ -2765,7 +2842,7 @@ static MemTxResult address_space_write_continue(AddressSpace *as, hwaddr addr,
            }
        } else {
            /* RAM case */
-            ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
+            ptr = qemu_ram_ptr_length(mr->ram_block, addr1, &l, false);
            memcpy(ptr, buf, l);
            invalidate_and_set_dirty(mr, addr1, l);
        }
@@ -2856,7 +2933,7 @@ MemTxResult address_space_read_continue(AddressSpace *as, hwaddr addr,
            }
        } else {
            /* RAM case */
-            ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
+            ptr = qemu_ram_ptr_length(mr->ram_block, addr1, &l, false);
            memcpy(buf, ptr, l);
        }

@@ -3144,6 +3221,7 @@ void *address_space_map(AddressSpace *as,
    if (!memory_access_is_direct(mr, is_write)) {
        if (atomic_xchg(&bounce.in_use, true)) {
            rcu_read_unlock();
+            *plen = 0;
            return NULL;
        }
        /* Avoid unbounded allocations */
@@ -3167,7 +3245,7 @@ void *address_space_map(AddressSpace *as,

    memory_region_ref(mr);
    *plen = address_space_extend_translation(as, addr, len, mr, xlat, l, is_write);
-    ptr = qemu_ram_ptr_length(mr->ram_block, xlat, plen);
+    ptr = qemu_ram_ptr_length(mr->ram_block, xlat, plen, true);
    rcu_read_unlock();

    return ptr;
--- a/hw/9pfs/9p-local.c
+++ b/hw/9pfs/9p-local.c
@@ -278,17 +278,27 @@ update_map_file:

 static int fchmodat_nofollow(int dirfd, const char *name, mode_t mode)
 {
+    struct stat stbuf;
    int fd, ret;

    /* FIXME: this should be handled with fchmodat(AT_SYMLINK_NOFOLLOW).
-     * Unfortunately, the linux kernel doesn't implement it yet. As an
-     * alternative, let's open the file and use fchmod() instead. This
-     * may fail depending on the permissions of the file, but it is the
-     * best we can do to avoid TOCTTOU. We first try to open read-only
-     * in case name points to a directory. If that fails, we try write-only
-     * in case name doesn't point to a directory.
+     * Unfortunately, the linux kernel doesn't implement it yet.
     */
-    fd = openat_file(dirfd, name, O_RDONLY, 0);
+
+     /* First, we clear non-racing symlinks out of the way. */
+    if (fstatat(dirfd, name, &stbuf, AT_SYMLINK_NOFOLLOW)) {
+        return -1;
+    }
+    if (S_ISLNK(stbuf.st_mode)) {
+        errno = ELOOP;
+        return -1;
+    }
+
+    /* Access modes are ignored when O_PATH is supported. We try O_RDONLY and
+     * O_WRONLY for old-systems that don't support O_PATH.
+     */
+    fd = openat_file(dirfd, name, O_RDONLY | O_PATH_9P_UTIL, 0);
+#if O_PATH_9P_UTIL == 0
    if (fd == -1) {
        /* In case the file is writable-only and isn't a directory. */
        if (errno == EACCES) {
@@ -302,6 +312,24 @@ static int fchmodat_nofollow(int dirfd, const char *name, mode_t mode)
        return -1;
    }
    ret = fchmod(fd, mode);
+#else
+    if (fd == -1) {
+        return -1;
+    }
+
+    /* Now we handle racing symlinks. */
+    ret = fstat(fd, &stbuf);
+    if (!ret) {
+        if (S_ISLNK(stbuf.st_mode)) {
+            errno = ELOOP;
+            ret = -1;
+        } else {
+            char *proc_path = g_strdup_printf("/proc/self/fd/%d", fd);
+            ret = chmod(proc_path, mode);
+            g_free(proc_path);
+        }
+    }
+#endif
    close_preserve_errno(fd);
    return ret;
 }
@@ -452,6 +480,11 @@ static off_t local_telldir(FsContext *ctx, V9fsFidOpenState *fs)
    return telldir(fs->dir.stream);
 }

+static bool local_is_mapped_file_metadata(FsContext *fs_ctx, const char *name)
+{
+    return !strcmp(name, VIRTFS_META_DIR);
+}
+
 static struct dirent *local_readdir(FsContext *ctx, V9fsFidOpenState *fs)
 {
    struct dirent *entry;
@@ -465,8 +498,8 @@ again:
    if (ctx->export_flags & V9FS_SM_MAPPED) {
        entry->d_type = DT_UNKNOWN;
    } else if (ctx->export_flags & V9FS_SM_MAPPED_FILE) {
-        if (!strcmp(entry->d_name, VIRTFS_META_DIR)) {
-            /* skp the meta data directory */
+        if (local_is_mapped_file_metadata(ctx, entry->d_name)) {
+            /* skip the meta data directory */
            goto again;
        }
        entry->d_type = DT_UNKNOWN;
@@ -559,6 +592,12 @@ static int local_mknod(FsContext *fs_ctx, V9fsPath *dir_path,
    int err = -1;
    int dirfd;

+    if (fs_ctx->export_flags & V9FS_SM_MAPPED_FILE &&
+        local_is_mapped_file_metadata(fs_ctx, name)) {
+        errno = EINVAL;
+        return -1;
+    }
+
    dirfd = local_opendir_nofollow(fs_ctx, dir_path->data);
    if (dirfd == -1) {
        return -1;
@@ -605,6 +644,12 @@ static int local_mkdir(FsContext *fs_ctx, V9fsPath *dir_path,
    int err = -1;
    int dirfd;

+    if (fs_ctx->export_flags & V9FS_SM_MAPPED_FILE &&
+        local_is_mapped_file_metadata(fs_ctx, name)) {
+        errno = EINVAL;
+        return -1;
+    }
+
    dirfd = local_opendir_nofollow(fs_ctx, dir_path->data);
    if (dirfd == -1) {
        return -1;
@@ -694,6 +739,12 @@ static int local_open2(FsContext *fs_ctx, V9fsPath *dir_path, const char *name,
    int err = -1;
    int dirfd;

+    if (fs_ctx->export_flags & V9FS_SM_MAPPED_FILE &&
+        local_is_mapped_file_metadata(fs_ctx, name)) {
+        errno = EINVAL;
+        return -1;
+    }
+
    /*
     * Mark all the open to not follow symlinks
     */
@@ -752,6 +803,12 @@ static int local_symlink(FsContext *fs_ctx, const char *oldpath,
    int err = -1;
    int dirfd;

+    if (fs_ctx->export_flags & V9FS_SM_MAPPED_FILE &&
+        local_is_mapped_file_metadata(fs_ctx, name)) {
+        errno = EINVAL;
+        return -1;
+    }
+
    dirfd = local_opendir_nofollow(fs_ctx, dir_path->data);
    if (dirfd == -1) {
        return -1;
@@ -826,6 +883,12 @@ static int local_link(FsContext *ctx, V9fsPath *oldpath,
    int ret = -1;
    int odirfd, ndirfd;

+    if (ctx->export_flags & V9FS_SM_MAPPED_FILE &&
+        local_is_mapped_file_metadata(ctx, name)) {
+        errno = EINVAL;
+        return -1;
+    }
+
    odirfd = local_opendir_nofollow(ctx, odirpath);
    if (odirfd == -1) {
        goto out;
@@ -957,6 +1020,14 @@ static int local_unlinkat_common(FsContext *ctx, int dirfd, const char *name,
    if (ctx->export_flags & V9FS_SM_MAPPED_FILE) {
        int map_dirfd;

+        /* We need to remove the metadata as well:
+         * - the metadata directory if we're removing a directory
+         * - the metadata file in the parent's metadata directory
+         *
+         * If any of these are missing (ie, ENOENT) then we're probably
+         * trying to remove something that wasn't created in mapped-file
+         * mode. We just ignore the error.
+         */
        if (flags == AT_REMOVEDIR) {
            int fd;

@@ -964,32 +1035,20 @@ static int local_unlinkat_common(FsContext *ctx, int dirfd, const char *name,
            if (fd == -1) {
                goto err_out;
            }
-            /*
-             * If directory remove .virtfs_metadata contained in the
-             * directory
-             */
            ret = unlinkat(fd, VIRTFS_META_DIR, AT_REMOVEDIR);
            close_preserve_errno(fd);
            if (ret < 0 && errno != ENOENT) {
-                /*
-                 * We didn't had the .virtfs_metadata file. May be file created
-                 * in non-mapped mode ?. Ignore ENOENT.
-                 */
                goto err_out;
            }
        }
-        /*
-         * Now remove the name from parent directory
-         * .virtfs_metadata directory.
-         */
        map_dirfd = openat_dir(dirfd, VIRTFS_META_DIR);
-        ret = unlinkat(map_dirfd, name, 0);
-        close_preserve_errno(map_dirfd);
-        if (ret < 0 && errno != ENOENT) {
-            /*
-             * We didn't had the .virtfs_metadata file. May be file created
-             * in non-mapped mode ?. Ignore ENOENT.
-             */
+        if (map_dirfd != -1) {
+            ret = unlinkat(map_dirfd, name, 0);
+            close_preserve_errno(map_dirfd);
+            if (ret < 0 && errno != ENOENT) {
+                goto err_out;
+            }
+        } else if (errno != ENOENT) {
            goto err_out;
        }
    }
@@ -1013,7 +1072,7 @@ static int local_remove(FsContext *ctx, const char *path)
        goto out;
    }

-    if (fstatat(dirfd, path, &stbuf, AT_SYMLINK_NOFOLLOW) < 0) {
+    if (fstatat(dirfd, name, &stbuf, AT_SYMLINK_NOFOLLOW) < 0) {
        goto err_out;
    }

@@ -1096,6 +1155,12 @@ static int local_lremovexattr(FsContext *ctx, V9fsPath *fs_path,
 static int local_name_to_path(FsContext *ctx, V9fsPath *dir_path,
                              const char *name, V9fsPath *target)
 {
+    if (ctx->export_flags & V9FS_SM_MAPPED_FILE &&
+        local_is_mapped_file_metadata(ctx, name)) {
+        errno = EINVAL;
+        return -1;
+    }
+
    if (dir_path) {
        v9fs_path_sprintf(target, "%s/%s", dir_path->data, name);
    } else if (strcmp(name, "/")) {
@@ -1116,6 +1181,13 @@ static int local_renameat(FsContext *ctx, V9fsPath *olddir,
    int ret;
    int odirfd, ndirfd;

+    if (ctx->export_flags & V9FS_SM_MAPPED_FILE &&
+        (local_is_mapped_file_metadata(ctx, old_name) ||
+         local_is_mapped_file_metadata(ctx, new_name))) {
+        errno = EINVAL;
+        return -1;
+    }
+
    odirfd = local_opendir_nofollow(ctx, olddir->data);
    if (odirfd == -1) {
        return -1;
@@ -1206,6 +1278,12 @@ static int local_unlinkat(FsContext *ctx, V9fsPath *dir,
    int ret;
    int dirfd;

+    if (ctx->export_flags & V9FS_SM_MAPPED_FILE &&
+        local_is_mapped_file_metadata(ctx, name)) {
+        errno = EINVAL;
+        return -1;
+    }
+
    dirfd = local_opendir_nofollow(ctx, dir->data);
    if (dirfd == -1) {
        return -1;
--- a/hw/9pfs/9p-util.h
+++ b/hw/9pfs/9p-util.h
@@ -13,6 +13,12 @@
 #ifndef QEMU_9P_UTIL_H
 #define QEMU_9P_UTIL_H

+#ifdef O_PATH
+#define O_PATH_9P_UTIL O_PATH
+#else
+#define O_PATH_9P_UTIL 0
+#endif
+
 static inline void close_preserve_errno(int fd)
 {
    int serrno = errno;
@@ -22,13 +28,8 @@ static inline void close_preserve_errno(int fd)

 static inline int openat_dir(int dirfd, const char *name)
 {
-#ifdef O_PATH
-#define OPENAT_DIR_O_PATH O_PATH
-#else
-#define OPENAT_DIR_O_PATH 0
-#endif
    return openat(dirfd, name,
-                  O_DIRECTORY | O_RDONLY | O_NOFOLLOW | OPENAT_DIR_O_PATH);
+                  O_DIRECTORY | O_RDONLY | O_NOFOLLOW | O_PATH_9P_UTIL);
 }

 static inline int openat_file(int dirfd, const char *name, int flags,
@@ -43,9 +44,14 @@ static inline int openat_file(int dirfd, const char *name, int flags,
    }

    serrno = errno;
-    /* O_NONBLOCK was only needed to open the file. Let's drop it. */
-    ret = fcntl(fd, F_SETFL, flags);
-    assert(!ret);
+    /* O_NONBLOCK was only needed to open the file. Let's drop it. We don't
+     * do that with O_PATH since fcntl(F_SETFL) isn't supported, and openat()
+     * ignored it anyway.
+     */
+    if (!(flags & O_PATH_9P_UTIL)) {
+        ret = fcntl(fd, F_SETFL, flags);
+        assert(!ret);
+    }
    errno = serrno;
    return fd;
 }
--- a/hw/9pfs/9p.c
+++ b/hw/9pfs/9p.c
@@ -503,9 +503,9 @@ static int coroutine_fn v9fs_mark_fids_unreclaim(V9fsPDU *pdu, V9fsPath *path)
 {
    int err;
    V9fsState *s = pdu->s;
-    V9fsFidState *fidp, head_fid;
+    V9fsFidState *fidp;

-    head_fid.next = s->fid_list;
+again:
    for (fidp = s->fid_list; fidp; fidp = fidp->next) {
        if (fidp->path.size != path->size) {
            continue;
@@ -525,7 +525,7 @@ static int coroutine_fn v9fs_mark_fids_unreclaim(V9fsPDU *pdu, V9fsPath *path)
             * switched to the worker thread
             */
            if (err == 0) {
-                fidp = &head_fid;
+                goto again;
            }
        }
    }
@@ -1372,7 +1372,9 @@ static void coroutine_fn v9fs_walk(void *opaque)
            err = -EINVAL;
            goto out;
        }
+        v9fs_path_write_lock(s);
        v9fs_path_copy(&fidp->path, &path);
+        v9fs_path_unlock(s);
    } else {
        newfidp = alloc_fid(s, newfid);
        if (newfidp == NULL) {
@@ -2132,6 +2134,7 @@ static void coroutine_fn v9fs_create(void *opaque)
    V9fsString extension;
    int iounit;
    V9fsPDU *pdu = opaque;
+    V9fsState *s = pdu->s;

    v9fs_path_init(&path);
    v9fs_string_init(&name);
@@ -2172,7 +2175,9 @@ static void coroutine_fn v9fs_create(void *opaque)
        if (err < 0) {
            goto out;
        }
+        v9fs_path_write_lock(s);
        v9fs_path_copy(&fidp->path, &path);
+        v9fs_path_unlock(s);
        err = v9fs_co_opendir(pdu, fidp);
        if (err < 0) {
            goto out;
@@ -2188,7 +2193,9 @@ static void coroutine_fn v9fs_create(void *opaque)
        if (err < 0) {
            goto out;
        }
+        v9fs_path_write_lock(s);
        v9fs_path_copy(&fidp->path, &path);
+        v9fs_path_unlock(s);
    } else if (perm & P9_STAT_MODE_LINK) {
        int32_t ofid = atoi(extension.data);
        V9fsFidState *ofidp = get_fid(pdu, ofid);
@@ -2206,7 +2213,9 @@ static void coroutine_fn v9fs_create(void *opaque)
            fidp->fid_type = P9_FID_NONE;
            goto out;
        }
+        v9fs_path_write_lock(s);
        v9fs_path_copy(&fidp->path, &path);
+        v9fs_path_unlock(s);
        err = v9fs_co_lstat(pdu, &fidp->path, &stbuf);
        if (err < 0) {
            fidp->fid_type = P9_FID_NONE;
@@ -2244,7 +2253,9 @@ static void coroutine_fn v9fs_create(void *opaque)
        if (err < 0) {
            goto out;
        }
+        v9fs_path_write_lock(s);
        v9fs_path_copy(&fidp->path, &path);
+        v9fs_path_unlock(s);
    } else if (perm & P9_STAT_MODE_NAMED_PIPE) {
        err = v9fs_co_mknod(pdu, fidp, &name, fidp->uid, -1,
                            0, S_IFIFO | (perm & 0777), &stbuf);
@@ -2255,7 +2266,9 @@ static void coroutine_fn v9fs_create(void *opaque)
        if (err < 0) {
            goto out;
        }
+        v9fs_path_write_lock(s);
        v9fs_path_copy(&fidp->path, &path);
+        v9fs_path_unlock(s);
    } else if (perm & P9_STAT_MODE_SOCKET) {
        err = v9fs_co_mknod(pdu, fidp, &name, fidp->uid, -1,
                            0, S_IFSOCK | (perm & 0777), &stbuf);
@@ -2266,7 +2279,9 @@ static void coroutine_fn v9fs_create(void *opaque)
        if (err < 0) {
            goto out;
        }
+        v9fs_path_write_lock(s);
        v9fs_path_copy(&fidp->path, &path);
+        v9fs_path_unlock(s);
    } else {
        err = v9fs_co_open2(pdu, fidp, &name, -1,
                            omode_to_uflags(mode)|O_CREAT, perm, &stbuf);
@@ -2794,6 +2809,7 @@ static void coroutine_fn v9fs_wstat(void *opaque)
    struct stat stbuf;
    V9fsFidState *fidp;
    V9fsPDU *pdu = opaque;
+    V9fsState *s = pdu->s;

    v9fs_stat_init(&v9stat);
    err = pdu_unmarshal(pdu, offset, "dwS", &fid, &unused, &v9stat);
@@ -2859,7 +2875,9 @@ static void coroutine_fn v9fs_wstat(void *opaque)
        }
    }
    if (v9stat.name.size != 0) {
+        v9fs_path_write_lock(s);
        err = v9fs_complete_rename(pdu, fidp, -1, &v9stat.name);
+        v9fs_path_unlock(s);
        if (err < 0) {
            goto out;
        }
@@ -3220,7 +3238,7 @@ static void coroutine_fn v9fs_xattrwalk(void *opaque)
        xattr_fidp->fid_type = P9_FID_XATTR;
        xattr_fidp->fs.xattr.xattrwalk_fid = true;
        if (size) {
-            xattr_fidp->fs.xattr.value = g_malloc(size);
+            xattr_fidp->fs.xattr.value = g_malloc0(size);
            err = v9fs_co_llistxattr(pdu, &xattr_fidp->path,
                                     xattr_fidp->fs.xattr.value,
                                     xattr_fidp->fs.xattr.len);
@@ -3253,7 +3271,7 @@ static void coroutine_fn v9fs_xattrwalk(void *opaque)
        xattr_fidp->fid_type = P9_FID_XATTR;
        xattr_fidp->fs.xattr.xattrwalk_fid = true;
        if (size) {
-            xattr_fidp->fs.xattr.value = g_malloc(size);
+            xattr_fidp->fs.xattr.value = g_malloc0(size);
            err = v9fs_co_lgetxattr(pdu, &xattr_fidp->path,
                                    &name, xattr_fidp->fs.xattr.value,
                                    xattr_fidp->fs.xattr.len);
--- a/hw/9pfs/cofile.c
+++ b/hw/9pfs/cofile.c
@@ -140,10 +140,10 @@ int coroutine_fn v9fs_co_open2(V9fsPDU *pdu, V9fsFidState *fidp,
    cred.fc_gid = gid;
    /*
     * Hold the directory fid lock so that directory path name
-     * don't change. Read lock is fine because this fid cannot
-     * be used by any other operation.
+     * don't change. Take the write lock to be sure this fid
+     * cannot be used by another operation.
     */
-    v9fs_path_read_lock(s);
+    v9fs_path_write_lock(s);
    v9fs_co_run_in_worker(
        {
            err = s->ops->open2(&s->ctx, &fidp->path,
--- a/hw/acpi/core.c
+++ b/hw/acpi/core.c
@@ -462,7 +462,8 @@ static void acpi_pm_evt_write(void *opaque, hwaddr addr, uint64_t val,
 static const MemoryRegionOps acpi_pm_evt_ops = {
    .read = acpi_pm_evt_read,
    .write = acpi_pm_evt_write,
-    .valid.min_access_size = 2,
+    .impl.min_access_size = 2,
+    .valid.min_access_size = 1,
    .valid.max_access_size = 2,
    .endianness = DEVICE_LITTLE_ENDIAN,
 };
@@ -530,7 +531,8 @@ static void acpi_pm_tmr_write(void *opaque, hwaddr addr, uint64_t val,
 static const MemoryRegionOps acpi_pm_tmr_ops = {
    .read = acpi_pm_tmr_read,
    .write = acpi_pm_tmr_write,
-    .valid.min_access_size = 4,
+    .impl.min_access_size = 4,
+    .valid.min_access_size = 1,
    .valid.max_access_size = 4,
    .endianness = DEVICE_LITTLE_ENDIAN,
 };
@@ -602,7 +604,8 @@ static void acpi_pm_cnt_write(void *opaque, hwaddr addr, uint64_t val,
 static const MemoryRegionOps acpi_pm_cnt_ops = {
    .read = acpi_pm_cnt_read,
    .write = acpi_pm_cnt_write,
-    .valid.min_access_size = 2,
+    .impl.min_access_size = 2,
+    .valid.min_access_size = 1,
    .valid.max_access_size = 2,
    .endianness = DEVICE_LITTLE_ENDIAN,
 };
--- a/hw/acpi/pcihp.c
+++ b/hw/acpi/pcihp.c
@@ -49,6 +49,7 @@

 #define ACPI_PCIHP_ADDR 0xae00
 #define ACPI_PCIHP_SIZE 0x0014
+#define ACPI_PCIHP_LEGACY_SIZE 0x000f
 #define PCI_UP_BASE 0x0000
 #define PCI_DOWN_BASE 0x0004
 #define PCI_EJ_BASE 0x0008
@@ -301,6 +302,16 @@ void acpi_pcihp_init(Object *owner, AcpiPciHpState *s, PCIBus *root_bus,
    s->root= root_bus;
    s->legacy_piix = !bridges_enabled;

+    if (s->legacy_piix) {
+        unsigned *bus_bsel = g_malloc(sizeof *bus_bsel);
+
+        s->io_len = ACPI_PCIHP_LEGACY_SIZE;
+
+        *bus_bsel = ACPI_PCIHP_BSEL_DEFAULT;
+        object_property_add_uint32_ptr(OBJECT(root_bus), ACPI_PCIHP_PROP_BSEL,
+                                       bus_bsel, NULL);
+    }
+
    memory_region_init_io(&s->io, owner, &acpi_pcihp_io_ops, s,
                          "acpi-pci-hotplug", s->io_len);
    memory_region_add_subregion(address_space_io, s->io_base, &s->io);
--- a/hw/acpi/piix4.c
+++ b/hw/acpi/piix4.c
@@ -311,7 +311,7 @@ static const VMStateDescription vmstate_cpuhp_state = {
 static const VMStateDescription vmstate_acpi = {
    .name = "piix4_pm",
    .version_id = 3,
-    .minimum_version_id = 3,
+    .minimum_version_id = 2, /* qemu-kvm */
    .minimum_version_id_old = 1,
    .load_state_old = acpi_load_old,
    .post_load = vmstate_acpi_post_load,
@@ -385,7 +385,10 @@ static void piix4_device_plug_cb(HotplugHandler *hotplug_dev,
                                dev, errp);
        }
    } else if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) {
-        acpi_pcihp_device_plug_cb(hotplug_dev, &s->acpi_pci_hotplug, dev, errp);
+        if (!xen_enabled()) {
+            acpi_pcihp_device_plug_cb(hotplug_dev, &s->acpi_pci_hotplug, dev,
+                                      errp);
+        }
    } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) {
        if (s->cpu_hotplug_legacy) {
            legacy_acpi_cpu_plug_cb(hotplug_dev, &s->gpe_cpu, dev, errp);
@@ -408,8 +411,10 @@ static void piix4_device_unplug_request_cb(HotplugHandler *hotplug_dev,
        acpi_memory_unplug_request_cb(hotplug_dev, &s->acpi_memory_hotplug,
                                      dev, errp);
    } else if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) {
-        acpi_pcihp_device_unplug_cb(hotplug_dev, &s->acpi_pci_hotplug, dev,
-                                    errp);
+        if (!xen_enabled()) {
+            acpi_pcihp_device_unplug_cb(hotplug_dev, &s->acpi_pci_hotplug, dev,
+                                        errp);
+        }
    } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU) &&
               !s->cpu_hotplug_legacy) {
        acpi_cpu_unplug_request_cb(hotplug_dev, &s->cpuhp_state, dev, errp);
@@ -700,7 +705,7 @@ static void piix4_pm_class_init(ObjectClass *klass, void *data)
     * Reason: part of PIIX4 southbridge, needs to be wired up,
     * e.g. by mips_malta_init()
     */
-    dc->cannot_instantiate_with_device_add_yet = true;
+    dc->user_creatable = false;
    dc->hotpluggable = false;
    hc->plug = piix4_device_plug_cb;
    hc->unplug_request = piix4_device_unplug_request_cb;
--- a/hw/arm/spitz.c
+++ b/hw/arm/spitz.c
@@ -1076,7 +1076,7 @@ static void sl_nand_class_init(ObjectClass *klass, void *data)
    dc->vmsd = &vmstate_sl_nand_info;
    dc->props = sl_nand_properties;
    /* Reason: init() method uses drive_get() */
-    dc->cannot_instantiate_with_device_add_yet = true;
+    dc->user_creatable = false;
 }

 static const TypeInfo sl_nand_info = {
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -609,6 +609,16 @@ static void create_gic(VirtMachineState *vms, qemu_irq *pic)
    } else if (type == 2) {
        create_v2m(vms, pic);
    }
+
+#ifdef CONFIG_KVM
+    if (kvm_enabled() && !kvm_irqchip_in_kernel()) {
+        if (!kvm_check_extension(kvm_state, KVM_CAP_ARM_USER_IRQ)) {
+            error_report("KVM with user space irqchip only works when the "
+                         "host kernel supports KVM_CAP_ARM_USER_IRQ");
+            exit(1);
+        }
+    }
+#endif
 }

 static void create_uart(const VirtMachineState *vms, qemu_irq *pic, int uart,
--- a/hw/audio/es1370.c
+++ b/hw/audio/es1370.c
@@ -788,6 +788,9 @@ static void es1370_transfer_audio (ES1370State *s, struct chan *d, int loop_sel,
    int csc_bytes = (csc + 1) << d->shift;
    int cnt = d->frame_cnt >> 16;
    int size = d->frame_cnt & 0xffff;
+    if (size < cnt) {
+        return;
+    }
    int left = ((size - cnt + 1) << 2) + d->leftover;
    int transferred = 0;
    int temp = audio_MIN (max, audio_MIN (left, csc_bytes));
@@ -796,7 +799,7 @@ static void es1370_transfer_audio (ES1370State *s, struct chan *d, int loop_sel,
    addr += (cnt << 2) + d->leftover;

    if (index == ADC_CHANNEL) {
-        while (temp) {
+        while (temp > 0) {
            int acquired, to_copy;

            to_copy = audio_MIN ((size_t) temp, sizeof (tmpbuf));
@@ -814,7 +817,7 @@ static void es1370_transfer_audio (ES1370State *s, struct chan *d, int loop_sel,
    else {
        SWVoiceOut *voice = s->dac_voice[index];

-        while (temp) {
+        while (temp > 0) {
            int copied, to_copy;

            to_copy = audio_MIN ((size_t) temp, sizeof (tmpbuf));
--- a/hw/audio/marvell_88w8618.c
+++ b/hw/audio/marvell_88w8618.c
@@ -292,7 +292,7 @@ static void mv88w8618_audio_class_init(ObjectClass *klass, void *data)
    dc->vmsd = &mv88w8618_audio_vmsd;
    dc->props = mv88w8618_audio_properties;
    /* Reason: pointer property "wm8750" */
-    dc->cannot_instantiate_with_device_add_yet = true;
+    dc->user_creatable = false;
 }

 static const TypeInfo mv88w8618_audio_info = {
--- a/hw/audio/pcspk.c
+++ b/hw/audio/pcspk.c
@@ -223,7 +223,7 @@ static void pcspk_class_initfn(ObjectClass *klass, void *data)
    dc->vmsd = &vmstate_spk;
    dc->props = pcspk_properties;
    /* Reason: realize sets global pcspk_state */
-    dc->cannot_instantiate_with_device_add_yet = true;
+    dc->user_creatable = false;
 }

 static const TypeInfo pcspk_info = {
--- a/hw/block/fdc.c
+++ b/hw/block/fdc.c
@@ -54,6 +54,12 @@
    } while (0)


+/* Anonymous BlockBackend for empty drive */
+static BlockBackend *blk_create_empty_drive(void)
+{
+    return blk_new(0, BLK_PERM_ALL);
+}
+
 /********************************************************/
 /* qdev floppy bus                                      */

@@ -547,8 +553,7 @@ static int floppy_drive_init(DeviceState *qdev)
    }

    if (!dev->conf.blk) {
-        /* Anonymous BlockBackend for an empty drive */
-        dev->conf.blk = blk_new(0, BLK_PERM_ALL);
+        dev->conf.blk = blk_create_empty_drive();
        ret = blk_attach_dev(dev->conf.blk, qdev);
        assert(ret == 0);
    }
@@ -1358,7 +1363,19 @@ static FDrive *get_drv(FDCtrl *fdctrl, int unit)

 static FDrive *get_cur_drv(FDCtrl *fdctrl)
 {
-    return get_drv(fdctrl, fdctrl->cur_drv);
+    FDrive *cur_drv = get_drv(fdctrl, fdctrl->cur_drv);
+
+    if (!cur_drv->blk) {
+        /*
+         * Kludge: empty drive line selected. Create an anonymous
+         * BlockBackend to avoid NULL deref with various BlockBackend
+         * API calls within this model (CVE-2021-20196).
+         * Due to the controller QOM model limitations, we don't
+         * attach the created to the controller device.
+         */
+        cur_drv->blk = blk_create_empty_drive();
+    }
+    return cur_drv;
 }

 /* Status A register : 0x00 (read-only) */
@@ -1710,6 +1727,14 @@ static void fdctrl_start_transfer(FDCtrl *fdctrl, int direction)
        int tmp;
        fdctrl->data_len = 128 << (fdctrl->fifo[5] > 7 ? 7 : fdctrl->fifo[5]);
        tmp = (fdctrl->fifo[6] - ks + 1);
+        if (tmp < 0) {
+            FLOPPY_DPRINTF("invalid EOT: %d\n", tmp);
+            fdctrl_stop_transfer(fdctrl, FD_SR0_ABNTERM, FD_SR1_MA, 0x00);
+            fdctrl->fifo[3] = kt;
+            fdctrl->fifo[4] = kh;
+            fdctrl->fifo[5] = ks;
+            return;
+        }
        if (fdctrl->fifo[0] & 0x80)
            tmp += fdctrl->fifo[6];
        fdctrl->data_len *= tmp;
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -34,6 +34,24 @@

 static void nvme_process_sq(void *opaque);

+static inline bool nvme_addr_is_iomem(NvmeCtrl *n, hwaddr addr)
+{
+    PCIDevice *pci_dev = &n->parent_obj;
+    hwaddr regs_hi, regs_lo, msix_hi, msix_lo;
+
+    /*
+     * The purpose of this check is to guard against invalid "local" access
+     * to the iomem (i.e. controller registers, MSIX-related space).
+     */
+    regs_lo = n->iomem.addr;
+    regs_hi = regs_lo + int128_get64(n->iomem.size);
+    msix_lo = pci_dev->msix_exclusive_bar.addr;
+    msix_hi = msix_lo + int128_get64(pci_dev->msix_exclusive_bar.size);
+
+    return (addr >= regs_lo && addr < regs_hi) ||
+           (addr >= msix_lo && addr < msix_hi);
+}
+
 static int nvme_check_sqid(NvmeCtrl *n, uint16_t sqid)
 {
    return sqid < n->num_queues && n->sq[sqid] != NULL ? 0 : -1;
@@ -90,6 +108,9 @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, uint64_t prp1, uint64_t prp2,
        return NVME_INVALID_FIELD | NVME_DNR;
    }

+    if (nvme_addr_is_iomem(n, prp1)) {
+        return NVME_DATA_TRAS_ERROR;
+    }
    pci_dma_sglist_init(qsg, &n->parent_obj, num_prps);
    qemu_sglist_add(qsg, prp1, trans_len);
    len -= trans_len;
@@ -126,6 +147,9 @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, uint64_t prp1, uint64_t prp2,
                }

                trans_len = MIN(len, n->page_size);
+                if (nvme_addr_is_iomem(n, prp_ent)) {
+                    return NVME_DATA_TRAS_ERROR;
+                }
                qemu_sglist_add(qsg, prp_ent, trans_len);
                len -= trans_len;
                i++;
@@ -134,6 +158,9 @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, uint64_t prp1, uint64_t prp2,
            if (prp2 & (n->page_size - 1)) {
                goto unmap;
            }
+            if (nvme_addr_is_iomem(n, prp2)) {
+                return NVME_DATA_TRAS_ERROR;
+            }
            qemu_sglist_add(qsg, prp2, len);
        }
    }
--- a/hw/block/xen_disk.c
+++ b/hw/block/xen_disk.c
@@ -34,29 +34,9 @@

 /* ------------------------------------------------------------- */

-static int batch_maps   = 0;
-
-static int max_requests = 32;
-
-/* ------------------------------------------------------------- */
-
 #define BLOCK_SIZE  512
 #define IOCB_COUNT  (BLKIF_MAX_SEGMENTS_PER_REQUEST + 2)

-struct PersistentGrant {
-    void *page;
-    struct XenBlkDev *blkdev;
-};
-
-typedef struct PersistentGrant PersistentGrant;
-
-struct PersistentRegion {
-    void *addr;
-    int num;
-};
-
-typedef struct PersistentRegion PersistentRegion;
-
 struct ioreq {
    blkif_request_t     req;
    int16_t             status;
@@ -64,16 +44,9 @@ struct ioreq {
    /* parsed request */
    off_t               start;
    QEMUIOVector        v;
+    void                *buf;
+    size_t              size;
    int                 presync;
-    uint8_t             mapped;
-
-    /* grant mapping */
-    uint32_t            domids[BLKIF_MAX_SEGMENTS_PER_REQUEST];
-    uint32_t            refs[BLKIF_MAX_SEGMENTS_PER_REQUEST];
-    int                 prot;
-    void                *page[BLKIF_MAX_SEGMENTS_PER_REQUEST];
-    void                *pages;
-    int                 num_unmap;

    /* aio status */
    int                 aio_inflight;
@@ -84,6 +57,8 @@ struct ioreq {
    BlockAcctCookie     acct;
 };

+#define MAX_RING_PAGE_ORDER 4
+
 struct XenBlkDev {
    struct XenDevice    xendev;  /* must be first */
    char                *params;
@@ -94,14 +69,14 @@ struct XenBlkDev {
    bool                directiosafe;
    const char          *fileproto;
    const char          *filename;
-    int                 ring_ref;
+    unsigned int        ring_ref[1 << MAX_RING_PAGE_ORDER];
+    unsigned int        nr_ring_ref;
    void                *sring;
    int64_t             file_blk;
    int64_t             file_size;
    int                 protocol;
    blkif_back_rings_t  rings;
    int                 more_work;
-    int                 cnt_map;

    /* request lists */
    QLIST_HEAD(inflight_head, ioreq) inflight;
@@ -110,17 +85,10 @@ struct XenBlkDev {
    int                 requests_total;
    int                 requests_inflight;
    int                 requests_finished;
+    unsigned int        max_requests;

-    /* Persistent grants extension */
+    gboolean            cache_unsafe;
    gboolean            feature_discard;
-    gboolean            feature_persistent;
-    GTree               *persistent_gnts;
-    GSList              *persistent_regions;
-    unsigned int        persistent_gnt_count;
-    unsigned int        max_grants;
-
-    /* Grant copy */
-    gboolean            feature_grant_copy;

    /* qemu block driver */
    DriveInfo           *dinfo;
@@ -135,14 +103,8 @@ static void ioreq_reset(struct ioreq *ioreq)
    memset(&ioreq->req, 0, sizeof(ioreq->req));
    ioreq->status = 0;
    ioreq->start = 0;
+    ioreq->size = 0;
    ioreq->presync = 0;
-    ioreq->mapped = 0;
-
-    memset(ioreq->domids, 0, sizeof(ioreq->domids));
-    memset(ioreq->refs, 0, sizeof(ioreq->refs));
-    ioreq->prot = 0;
-    memset(ioreq->page, 0, sizeof(ioreq->page));
-    ioreq->pages = NULL;

    ioreq->aio_inflight = 0;
    ioreq->aio_errors = 0;
@@ -154,59 +116,20 @@ static void ioreq_reset(struct ioreq *ioreq)
    qemu_iovec_reset(&ioreq->v);
 }

-static gint int_cmp(gconstpointer a, gconstpointer b, gpointer user_data)
-{
-    uint ua = GPOINTER_TO_UINT(a);
-    uint ub = GPOINTER_TO_UINT(b);
-    return (ua > ub) - (ua < ub);
-}
-
-static void destroy_grant(gpointer pgnt)
-{
-    PersistentGrant *grant = pgnt;
-    xengnttab_handle *gnt = grant->blkdev->xendev.gnttabdev;
-
-    if (xengnttab_unmap(gnt, grant->page, 1) != 0) {
-        xen_pv_printf(&grant->blkdev->xendev, 0,
-                      "xengnttab_unmap failed: %s\n",
-                      strerror(errno));
-    }
-    grant->blkdev->persistent_gnt_count--;
-    xen_pv_printf(&grant->blkdev->xendev, 3,
-                  "unmapped grant %p\n", grant->page);
-    g_free(grant);
-}
-
-static void remove_persistent_region(gpointer data, gpointer dev)
-{
-    PersistentRegion *region = data;
-    struct XenBlkDev *blkdev = dev;
-    xengnttab_handle *gnt = blkdev->xendev.gnttabdev;
-
-    if (xengnttab_unmap(gnt, region->addr, region->num) != 0) {
-        xen_pv_printf(&blkdev->xendev, 0,
-                      "xengnttab_unmap region %p failed: %s\n",
-                      region->addr, strerror(errno));
-    }
-    xen_pv_printf(&blkdev->xendev, 3,
-                  "unmapped grant region %p with %d pages\n",
-                  region->addr, region->num);
-    g_free(region);
-}
-
 static struct ioreq *ioreq_start(struct XenBlkDev *blkdev)
 {
    struct ioreq *ioreq = NULL;

    if (QLIST_EMPTY(&blkdev->freelist)) {
-        if (blkdev->requests_total >= max_requests) {
+        if (blkdev->requests_total >= blkdev->max_requests) {
            goto out;
        }
        /* allocate new struct */
        ioreq = g_malloc0(sizeof(*ioreq));
        ioreq->blkdev = blkdev;
+        ioreq->buf = qemu_memalign(XC_PAGE_SIZE, BLKIF_MAX_SEGMENTS_PER_REQUEST * XC_PAGE_SIZE);
        blkdev->requests_total++;
-        qemu_iovec_init(&ioreq->v, BLKIF_MAX_SEGMENTS_PER_REQUEST);
+        qemu_iovec_init(&ioreq->v, 1);
    } else {
        /* get one from freelist */
        ioreq = QLIST_FIRST(&blkdev->freelist);
@@ -251,7 +174,6 @@ static void ioreq_release(struct ioreq *ioreq, bool finish)
 static int ioreq_parse(struct ioreq *ioreq)
 {
    struct XenBlkDev *blkdev = ioreq->blkdev;
-    uintptr_t mem;
    size_t len;
    int i;

@@ -261,7 +183,6 @@ static int ioreq_parse(struct ioreq *ioreq)
                  ioreq->req.handle, ioreq->req.id, ioreq->req.sector_number);
    switch (ioreq->req.operation) {
    case BLKIF_OP_READ:
-        ioreq->prot = PROT_WRITE; /* to memory */
        break;
    case BLKIF_OP_FLUSH_DISKCACHE:
        ioreq->presync = 1;
@@ -270,7 +191,6 @@ static int ioreq_parse(struct ioreq *ioreq)
        }
        /* fall through */
    case BLKIF_OP_WRITE:
-        ioreq->prot = PROT_READ; /* from memory */
        break;
    case BLKIF_OP_DISCARD:
        return 0;
@@ -300,14 +220,10 @@ static int ioreq_parse(struct ioreq *ioreq)
            goto err;
        }

-        ioreq->domids[i] = blkdev->xendev.dom;
-        ioreq->refs[i]   = ioreq->req.seg[i].gref;
-
-        mem = ioreq->req.seg[i].first_sect * blkdev->file_blk;
        len = (ioreq->req.seg[i].last_sect - ioreq->req.seg[i].first_sect + 1) * blkdev->file_blk;
-        qemu_iovec_add(&ioreq->v, (void*)mem, len);
+        ioreq->size += len;
    }
-    if (ioreq->start + ioreq->v.size > blkdev->file_size) {
+    if (ioreq->start + ioreq->size > blkdev->file_size) {
        xen_pv_printf(&blkdev->xendev, 0, "error: access beyond end of file\n");
        goto err;
    }
@@ -318,243 +234,38 @@ err:
    return -1;
 }

-static void ioreq_unmap(struct ioreq *ioreq)
-{
-    xengnttab_handle *gnt = ioreq->blkdev->xendev.gnttabdev;
-    int i;
-
-    if (ioreq->num_unmap == 0 || ioreq->mapped == 0) {
-        return;
-    }
-    if (batch_maps) {
-        if (!ioreq->pages) {
-            return;
-        }
-        if (xengnttab_unmap(gnt, ioreq->pages, ioreq->num_unmap) != 0) {
-            xen_pv_printf(&ioreq->blkdev->xendev, 0,
-                          "xengnttab_unmap failed: %s\n",
-                          strerror(errno));
-        }
-        ioreq->blkdev->cnt_map -= ioreq->num_unmap;
-        ioreq->pages = NULL;
-    } else {
-        for (i = 0; i < ioreq->num_unmap; i++) {
-            if (!ioreq->page[i]) {
-                continue;
-            }
-            if (xengnttab_unmap(gnt, ioreq->page[i], 1) != 0) {
-                xen_pv_printf(&ioreq->blkdev->xendev, 0,
-                              "xengnttab_unmap failed: %s\n",
-                              strerror(errno));
-            }
-            ioreq->blkdev->cnt_map--;
-            ioreq->page[i] = NULL;
-        }
-    }
-    ioreq->mapped = 0;
-}
-
-static int ioreq_map(struct ioreq *ioreq)
-{
-    xengnttab_handle *gnt = ioreq->blkdev->xendev.gnttabdev;
-    uint32_t domids[BLKIF_MAX_SEGMENTS_PER_REQUEST];
-    uint32_t refs[BLKIF_MAX_SEGMENTS_PER_REQUEST];
-    void *page[BLKIF_MAX_SEGMENTS_PER_REQUEST];
-    int i, j, new_maps = 0;
-    PersistentGrant *grant;
-    PersistentRegion *region;
-    /* domids and refs variables will contain the information necessary
-     * to map the grants that are needed to fulfill this request.
-     *
-     * After mapping the needed grants, the page array will contain the
-     * memory address of each granted page in the order specified in ioreq
-     * (disregarding if it's a persistent grant or not).
-     */
-
-    if (ioreq->v.niov == 0 || ioreq->mapped == 1) {
-        return 0;
-    }
-    if (ioreq->blkdev->feature_persistent) {
-        for (i = 0; i < ioreq->v.niov; i++) {
-            grant = g_tree_lookup(ioreq->blkdev->persistent_gnts,
-                                    GUINT_TO_POINTER(ioreq->refs[i]));
-
-            if (grant != NULL) {
-                page[i] = grant->page;
-                xen_pv_printf(&ioreq->blkdev->xendev, 3,
-                              "using persistent-grant %" PRIu32 "\n",
-                              ioreq->refs[i]);
-            } else {
-                    /* Add the grant to the list of grants that
-                     * should be mapped
-                     */
-                    domids[new_maps] = ioreq->domids[i];
-                    refs[new_maps] = ioreq->refs[i];
-                    page[i] = NULL;
-                    new_maps++;
-            }
-        }
-        /* Set the protection to RW, since grants may be reused later
-         * with a different protection than the one needed for this request
-         */
-        ioreq->prot = PROT_WRITE | PROT_READ;
-    } else {
-        /* All grants in the request should be mapped */
-        memcpy(refs, ioreq->refs, sizeof(refs));
-        memcpy(domids, ioreq->domids, sizeof(domids));
-        memset(page, 0, sizeof(page));
-        new_maps = ioreq->v.niov;
-    }
-
-    if (batch_maps && new_maps) {
-        ioreq->pages = xengnttab_map_grant_refs
-            (gnt, new_maps, domids, refs, ioreq->prot);
-        if (ioreq->pages == NULL) {
-            xen_pv_printf(&ioreq->blkdev->xendev, 0,
-                          "can't map %d grant refs (%s, %d maps)\n",
-                          new_maps, strerror(errno), ioreq->blkdev->cnt_map);
-            return -1;
-        }
-        for (i = 0, j = 0; i < ioreq->v.niov; i++) {
-            if (page[i] == NULL) {
-                page[i] = ioreq->pages + (j++) * XC_PAGE_SIZE;
-            }
-        }
-        ioreq->blkdev->cnt_map += new_maps;
-    } else if (new_maps)  {
-        for (i = 0; i < new_maps; i++) {
-            ioreq->page[i] = xengnttab_map_grant_ref
-                (gnt, domids[i], refs[i], ioreq->prot);
-            if (ioreq->page[i] == NULL) {
-                xen_pv_printf(&ioreq->blkdev->xendev, 0,
-                              "can't map grant ref %d (%s, %d maps)\n",
-                              refs[i], strerror(errno), ioreq->blkdev->cnt_map);
-                ioreq->mapped = 1;
-                ioreq_unmap(ioreq);
-                return -1;
-            }
-            ioreq->blkdev->cnt_map++;
-        }
-        for (i = 0, j = 0; i < ioreq->v.niov; i++) {
-            if (page[i] == NULL) {
-                page[i] = ioreq->page[j++];
-            }
-        }
-    }
-    if (ioreq->blkdev->feature_persistent && new_maps != 0 &&
-        (!batch_maps || (ioreq->blkdev->persistent_gnt_count + new_maps <=
-        ioreq->blkdev->max_grants))) {
-        /*
-         * If we are using persistent grants and batch mappings only
-         * add the new maps to the list of persistent grants if the whole
-         * area can be persistently mapped.
-         */
-        if (batch_maps) {
-            region = g_malloc0(sizeof(*region));
-            region->addr = ioreq->pages;
-            region->num = new_maps;
-            ioreq->blkdev->persistent_regions = g_slist_append(
-                                            ioreq->blkdev->persistent_regions,
-                                            region);
-        }
-        while ((ioreq->blkdev->persistent_gnt_count < ioreq->blkdev->max_grants)
-              && new_maps) {
-            /* Go through the list of newly mapped grants and add as many
-             * as possible to the list of persistently mapped grants.
-             *
-             * Since we start at the end of ioreq->page(s), we only need
-             * to decrease new_maps to prevent this granted pages from
-             * being unmapped in ioreq_unmap.
-             */
-            grant = g_malloc0(sizeof(*grant));
-            new_maps--;
-            if (batch_maps) {
-                grant->page = ioreq->pages + (new_maps) * XC_PAGE_SIZE;
-            } else {
-                grant->page = ioreq->page[new_maps];
-            }
-            grant->blkdev = ioreq->blkdev;
-            xen_pv_printf(&ioreq->blkdev->xendev, 3,
-                          "adding grant %" PRIu32 " page: %p\n",
-                          refs[new_maps], grant->page);
-            g_tree_insert(ioreq->blkdev->persistent_gnts,
-                          GUINT_TO_POINTER(refs[new_maps]),
-                          grant);
-            ioreq->blkdev->persistent_gnt_count++;
-        }
-        assert(!batch_maps || new_maps == 0);
-    }
-    for (i = 0; i < ioreq->v.niov; i++) {
-        ioreq->v.iov[i].iov_base += (uintptr_t)page[i];
-    }
-    ioreq->mapped = 1;
-    ioreq->num_unmap = new_maps;
-    return 0;
-}
-
-#if CONFIG_XEN_CTRL_INTERFACE_VERSION >= 480
-
-static void ioreq_free_copy_buffers(struct ioreq *ioreq)
-{
-    int i;
-
-    for (i = 0; i < ioreq->v.niov; i++) {
-        ioreq->page[i] = NULL;
-    }
-
-    qemu_vfree(ioreq->pages);
-}
-
-static int ioreq_init_copy_buffers(struct ioreq *ioreq)
-{
-    int i;
-
-    if (ioreq->v.niov == 0) {
-        return 0;
-    }
-
-    ioreq->pages = qemu_memalign(XC_PAGE_SIZE, ioreq->v.niov * XC_PAGE_SIZE);
-
-    for (i = 0; i < ioreq->v.niov; i++) {
-        ioreq->page[i] = ioreq->pages + i * XC_PAGE_SIZE;
-        ioreq->v.iov[i].iov_base = ioreq->page[i];
-    }
-
-    return 0;
-}
-
 static int ioreq_grant_copy(struct ioreq *ioreq)
 {
-    xengnttab_handle *gnt = ioreq->blkdev->xendev.gnttabdev;
-    xengnttab_grant_copy_segment_t segs[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+    struct XenBlkDev *blkdev = ioreq->blkdev;
+    struct XenDevice *xendev = &blkdev->xendev;
+    XenGrantCopySegment segs[BLKIF_MAX_SEGMENTS_PER_REQUEST];
    int i, count, rc;
    int64_t file_blk = ioreq->blkdev->file_blk;
+    bool to_domain = (ioreq->req.operation == BLKIF_OP_READ);
+    void *virt = ioreq->buf;

-    if (ioreq->v.niov == 0) {
+    if (ioreq->req.nr_segments == 0) {
        return 0;
    }

-    count = ioreq->v.niov;
+    count = ioreq->req.nr_segments;

    for (i = 0; i < count; i++) {
-        if (ioreq->req.operation == BLKIF_OP_READ) {
-            segs[i].flags = GNTCOPY_dest_gref;
-            segs[i].dest.foreign.ref = ioreq->refs[i];
-            segs[i].dest.foreign.domid = ioreq->domids[i];
+        if (to_domain) {
+            segs[i].dest.foreign.ref = ioreq->req.seg[i].gref;
            segs[i].dest.foreign.offset = ioreq->req.seg[i].first_sect * file_blk;
-            segs[i].source.virt = ioreq->v.iov[i].iov_base;
+            segs[i].source.virt = virt;
        } else {
-            segs[i].flags = GNTCOPY_source_gref;
-            segs[i].source.foreign.ref = ioreq->refs[i];
-            segs[i].source.foreign.domid = ioreq->domids[i];
+            segs[i].source.foreign.ref = ioreq->req.seg[i].gref;
            segs[i].source.foreign.offset = ioreq->req.seg[i].first_sect * file_blk;
-            segs[i].dest.virt = ioreq->v.iov[i].iov_base;
+            segs[i].dest.virt = virt;
        }
        segs[i].len = (ioreq->req.seg[i].last_sect
                       - ioreq->req.seg[i].first_sect + 1) * file_blk;
+        virt += segs[i].len;
    }

-    rc = xengnttab_grant_copy(gnt, count, segs);
+    rc = xen_be_copy_grant_refs(xendev, to_domain, segs, count);

    if (rc) {
        xen_pv_printf(&ioreq->blkdev->xendev, 0,
@@ -563,34 +274,8 @@ static int ioreq_grant_copy(struct ioreq *ioreq)
        return -1;
    }

-    for (i = 0; i < count; i++) {
-        if (segs[i].status != GNTST_okay) {
-            xen_pv_printf(&ioreq->blkdev->xendev, 3,
-                          "failed to copy data %d for gref %d, domid %d\n",
-                          segs[i].status, ioreq->refs[i], ioreq->domids[i]);
-            ioreq->aio_errors++;
-            rc = -1;
-        }
-    }
-
    return rc;
 }
-#else
-static void ioreq_free_copy_buffers(struct ioreq *ioreq)
-{
-    abort();
-}
-
-static int ioreq_init_copy_buffers(struct ioreq *ioreq)
-{
-    abort();
-}
-
-static int ioreq_grant_copy(struct ioreq *ioreq)
-{
-    abort();
-}
-#endif

 static int ioreq_runio_qemu_aio(struct ioreq *ioreq);

@@ -614,32 +299,26 @@ static void qemu_aio_complete(void *opaque, int ret)
        return;
    }

-    if (ioreq->blkdev->feature_grant_copy) {
-        switch (ioreq->req.operation) {
-        case BLKIF_OP_READ:
-            /* in case of failure ioreq->aio_errors is increased */
-            if (ret == 0) {
-                ioreq_grant_copy(ioreq);
-            }
-            ioreq_free_copy_buffers(ioreq);
-            break;
-        case BLKIF_OP_WRITE:
-        case BLKIF_OP_FLUSH_DISKCACHE:
-            if (!ioreq->req.nr_segments) {
-                break;
-            }
-            ioreq_free_copy_buffers(ioreq);
-            break;
-        default:
+    switch (ioreq->req.operation) {
+    case BLKIF_OP_READ:
+        /* in case of failure ioreq->aio_errors is increased */
+        if (ret == 0) {
+            ioreq_grant_copy(ioreq);
+        }
+        break;
+    case BLKIF_OP_WRITE:
+    case BLKIF_OP_FLUSH_DISKCACHE:
+        if (!ioreq->req.nr_segments) {
            break;
        }
+        break;
+    default:
+        break;
    }

    ioreq->status = ioreq->aio_errors ? BLKIF_RSP_ERROR : BLKIF_RSP_OKAY;
-    if (!ioreq->blkdev->feature_grant_copy) {
-        ioreq_unmap(ioreq);
-    }
    ioreq_finish(ioreq);
+
    switch (ioreq->req.operation) {
    case BLKIF_OP_WRITE:
    case BLKIF_OP_FLUSH_DISKCACHE:
@@ -696,18 +375,11 @@ static int ioreq_runio_qemu_aio(struct ioreq *ioreq)
 {
    struct XenBlkDev *blkdev = ioreq->blkdev;

-    if (ioreq->blkdev->feature_grant_copy) {
-        ioreq_init_copy_buffers(ioreq);
-        if (ioreq->req.nr_segments && (ioreq->req.operation == BLKIF_OP_WRITE ||
-            ioreq->req.operation == BLKIF_OP_FLUSH_DISKCACHE) &&
-            ioreq_grant_copy(ioreq)) {
-                ioreq_free_copy_buffers(ioreq);
-                goto err;
-        }
-    } else {
-        if (ioreq->req.nr_segments && ioreq_map(ioreq)) {
-            goto err;
-        }
+    if (ioreq->req.nr_segments &&
+        (ioreq->req.operation == BLKIF_OP_WRITE ||
+         ioreq->req.operation == BLKIF_OP_FLUSH_DISKCACHE) &&
+        ioreq_grant_copy(ioreq)) {
+        goto err;
    }

    ioreq->aio_inflight++;
@@ -718,6 +390,7 @@ static int ioreq_runio_qemu_aio(struct ioreq *ioreq)

    switch (ioreq->req.operation) {
    case BLKIF_OP_READ:
+        qemu_iovec_add(&ioreq->v, ioreq->buf, ioreq->size);
        block_acct_start(blk_get_stats(blkdev->blk), &ioreq->acct,
                         ioreq->v.size, BLOCK_ACCT_READ);
        ioreq->aio_inflight++;
@@ -730,6 +403,7 @@ static int ioreq_runio_qemu_aio(struct ioreq *ioreq)
            break;
        }

+        qemu_iovec_add(&ioreq->v, ioreq->buf, ioreq->size);
        block_acct_start(blk_get_stats(blkdev->blk), &ioreq->acct,
                         ioreq->v.size,
                         ioreq->req.operation == BLKIF_OP_WRITE ?
@@ -748,9 +422,6 @@ static int ioreq_runio_qemu_aio(struct ioreq *ioreq)
    }
    default:
        /* unknown operation (shouldn't happen -- parse catches this) */
-        if (!ioreq->blkdev->feature_grant_copy) {
-            ioreq_unmap(ioreq);
-        }
        goto err;
    }

@@ -769,31 +440,30 @@ static int blk_send_response_one(struct ioreq *ioreq)
    struct XenBlkDev  *blkdev = ioreq->blkdev;
    int               send_notify   = 0;
    int               have_requests = 0;
-    blkif_response_t  resp;
-    void              *dst;
-
-    resp.id        = ioreq->req.id;
-    resp.operation = ioreq->req.operation;
-    resp.status    = ioreq->status;
+    blkif_response_t  *resp;

    /* Place on the response ring for the relevant domain. */
    switch (blkdev->protocol) {
    case BLKIF_PROTOCOL_NATIVE:
-        dst = RING_GET_RESPONSE(&blkdev->rings.native, blkdev->rings.native.rsp_prod_pvt);
+        resp = (blkif_response_t *) RING_GET_RESPONSE(&blkdev->rings.native,
+                                 blkdev->rings.native.rsp_prod_pvt);
        break;
    case BLKIF_PROTOCOL_X86_32:
-        dst = RING_GET_RESPONSE(&blkdev->rings.x86_32_part,
-                                blkdev->rings.x86_32_part.rsp_prod_pvt);
+        resp = (blkif_response_t *) RING_GET_RESPONSE(&blkdev->rings.x86_32_part,
+                                 blkdev->rings.x86_32_part.rsp_prod_pvt);
        break;
    case BLKIF_PROTOCOL_X86_64:
-        dst = RING_GET_RESPONSE(&blkdev->rings.x86_64_part,
-                                blkdev->rings.x86_64_part.rsp_prod_pvt);
+        resp = (blkif_response_t *) RING_GET_RESPONSE(&blkdev->rings.x86_64_part,
+                                 blkdev->rings.x86_64_part.rsp_prod_pvt);
        break;
    default:
-        dst = NULL;
        return 0;
    }
-    memcpy(dst, &resp, sizeof(resp));
+
+    resp->id        = ioreq->req.id;
+    resp->operation = ioreq->req.operation;
+    resp->status    = ioreq->status;
+
    blkdev->rings.common.rsp_prod_pvt++;

    RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&blkdev->rings.common, send_notify);
@@ -905,7 +575,7 @@ static void blk_handle_requests(struct XenBlkDev *blkdev)
        ioreq_runio_qemu_aio(ioreq);
    }

-    if (blkdev->more_work && blkdev->requests_inflight < max_requests) {
+    if (blkdev->more_work && blkdev->requests_inflight < blkdev->max_requests) {
        qemu_bh_schedule(blkdev->bh);
    }
 }
@@ -918,15 +588,6 @@ static void blk_bh(void *opaque)
    blk_handle_requests(blkdev);
 }

-/*
- * We need to account for the grant allocations requiring contiguous
- * chunks; the worst case number would be
- *     max_req * max_seg + (max_req - 1) * (max_seg - 1) + 1,
- * but in order to keep things simple just use
- *     2 * max_req * max_seg.
- */
-#define MAX_GRANTS(max_req, max_seg) (2 * (max_req) * (max_seg))
-
 static void blk_alloc(struct XenDevice *xendev)
 {
    struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev);
@@ -935,14 +596,6 @@ static void blk_alloc(struct XenDevice *xendev)
    QLIST_INIT(&blkdev->finished);
    QLIST_INIT(&blkdev->freelist);
    blkdev->bh = qemu_bh_new(blk_bh, blkdev);
-    if (xen_mode != XEN_EMULATE) {
-        batch_maps = 1;
-    }
-    if (xengnttab_set_max_grants(xendev->gnttabdev,
-            MAX_GRANTS(max_requests, BLKIF_MAX_SEGMENTS_PER_REQUEST)) < 0) {
-        xen_pv_printf(xendev, 0, "xengnttab_set_max_grants failed: %s\n",
-                      strerror(errno));
-    }
 }

 static void blk_parse_discard(struct XenBlkDev *blkdev)
@@ -960,6 +613,16 @@ static void blk_parse_discard(struct XenBlkDev *blkdev)
    }
 }

+static void blk_parse_cache_unsafe(struct XenBlkDev *blkdev)
+{
+    int enable;
+
+    blkdev->cache_unsafe = false;
+
+    if (xenstore_read_be_int(&blkdev->xendev, "suse-diskcache-disable-flush", &enable) == 0)
+	    blkdev->cache_unsafe = !!enable;
+}
+
 static int blk_init(struct XenDevice *xendev)
 {
    struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev);
@@ -1027,10 +690,13 @@ static int blk_init(struct XenDevice *xendev)
     * blk_connect supplies sector-size and sectors
     */
    xenstore_write_be_int(&blkdev->xendev, "feature-flush-cache", 1);
-    xenstore_write_be_int(&blkdev->xendev, "feature-persistent", 1);
    xenstore_write_be_int(&blkdev->xendev, "info", info);

+    xenstore_write_be_int(&blkdev->xendev, "max-ring-page-order",
+                          MAX_RING_PAGE_ORDER);
+
    blk_parse_discard(blkdev);
+    blk_parse_cache_unsafe(blkdev);

    g_free(directiosafe);
    return 0;
@@ -1054,9 +720,13 @@ out_error:
 static int blk_connect(struct XenDevice *xendev)
 {
    struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev);
-    int pers, index, qflags;
+    int index, qflags;
    bool readonly = true;
    bool writethrough = true;
+    Error *errp = NULL;
+    int order, ring_ref;
+    unsigned int ring_size, max_grants;
+    unsigned int i;

    /* read-only ? */
    if (blkdev->directiosafe) {
@@ -1073,6 +743,9 @@ static int blk_connect(struct XenDevice *xendev)
        qflags |= BDRV_O_UNMAP;
    }

+    if (blkdev->cache_unsafe)
+        qflags |= BDRV_O_NO_FLUSH;
+
    /* init qemu block driver */
    index = (blkdev->xendev.dev - 202 * 256) / 16;
    blkdev->dinfo = drive_get(IF_XEN, 0, index);
@@ -1082,7 +755,7 @@ static int blk_connect(struct XenDevice *xendev)

        if (strcmp(blkdev->fileproto, "<unset>")) {
            options = qdict_new();
-            qdict_put(options, "driver", qstring_from_str(blkdev->fileproto));
+            qdict_put_str(options, "driver", blkdev->fileproto);
        }

        /* setup via xenbus -> create new block driver instance */
@@ -1111,6 +784,13 @@ static int blk_connect(struct XenDevice *xendev)
        blk_ref(blkdev->blk);
    }
    blk_attach_dev_legacy(blkdev->blk, blkdev);
+    if (!monitor_add_blk(blkdev->blk, g_strdup(blkdev->dev), &errp)) {
+        xen_pv_printf(&blkdev->xendev, 0, "error: %s\n",
+                      error_get_pretty(errp));
+        error_free(errp);
+        return -1;
+    }
+
    blkdev->file_size = blk_getlength(blkdev->blk);
    if (blkdev->file_size < 0) {
        BlockDriverState *bs = blk_bs(blkdev->blk);
@@ -1131,18 +811,46 @@ static int blk_connect(struct XenDevice *xendev)
    xenstore_write_be_int64(&blkdev->xendev, "sectors",
                            blkdev->file_size / blkdev->file_blk);

-    if (xenstore_read_fe_int(&blkdev->xendev, "ring-ref", &blkdev->ring_ref) == -1) {
+    if (xenstore_read_fe_int(&blkdev->xendev, "ring-page-order",
+                             &order) == -1) {
+        blkdev->nr_ring_ref = 1;
+
+        if (xenstore_read_fe_int(&blkdev->xendev, "ring-ref",
+                                 &ring_ref) == -1) {
+            return -1;
+        }
+        blkdev->ring_ref[0] = ring_ref;
+
+    } else if (order >= 0 && order <= MAX_RING_PAGE_ORDER) {
+        blkdev->nr_ring_ref = 1 << order;
+
+        for (i = 0; i < blkdev->nr_ring_ref; i++) {
+            char *key;
+
+            key = g_strdup_printf("ring-ref%u", i);
+            if (!key) {
+                return -1;
+            }
+
+            if (xenstore_read_fe_int(&blkdev->xendev, key,
+                                     &ring_ref) == -1) {
+                g_free(key);
+                return -1;
+            }
+            blkdev->ring_ref[i] = ring_ref;
+
+            g_free(key);
+        }
+    } else {
+        xen_pv_printf(xendev, 0, "invalid ring-page-order: %d\n",
+                      order);
        return -1;
    }
+
    if (xenstore_read_fe_int(&blkdev->xendev, "event-channel",
                             &blkdev->xendev.remote_port) == -1) {
        return -1;
    }
-    if (xenstore_read_fe_int(&blkdev->xendev, "feature-persistent", &pers)) {
-        blkdev->feature_persistent = FALSE;
-    } else {
-        blkdev->feature_persistent = !!pers;
-    }

    if (!blkdev->xendev.protocol) {
        blkdev->protocol = BLKIF_PROTOCOL_NATIVE;
@@ -1156,61 +864,67 @@ static int blk_connect(struct XenDevice *xendev)
        blkdev->protocol = BLKIF_PROTOCOL_NATIVE;
    }

-    blkdev->sring = xengnttab_map_grant_ref(blkdev->xendev.gnttabdev,
-                                            blkdev->xendev.dom,
-                                            blkdev->ring_ref,
-                                            PROT_READ | PROT_WRITE);
+    ring_size = XC_PAGE_SIZE * blkdev->nr_ring_ref;
+    switch (blkdev->protocol) {
+    case BLKIF_PROTOCOL_NATIVE:
+    {
+        blkdev->max_requests = __CONST_RING_SIZE(blkif, ring_size);
+        break;
+    }
+    case BLKIF_PROTOCOL_X86_32:
+    {
+        blkdev->max_requests = __CONST_RING_SIZE(blkif_x86_32, ring_size);
+        break;
+    }
+    case BLKIF_PROTOCOL_X86_64:
+    {
+        blkdev->max_requests = __CONST_RING_SIZE(blkif_x86_64, ring_size);
+        break;
+    }
+    default:
+        return -1;
+    }
+
+    /* Add on the number needed for the ring pages */
+    max_grants = blkdev->nr_ring_ref;
+
+    xen_be_set_max_grant_refs(xendev, max_grants);
+
+    blkdev->sring = xen_be_map_grant_refs(xendev, blkdev->ring_ref,
+                                          blkdev->nr_ring_ref,
+                                          PROT_READ | PROT_WRITE);
    if (!blkdev->sring) {
        return -1;
    }
-    blkdev->cnt_map++;

    switch (blkdev->protocol) {
    case BLKIF_PROTOCOL_NATIVE:
    {
        blkif_sring_t *sring_native = blkdev->sring;
-        BACK_RING_INIT(&blkdev->rings.native, sring_native, XC_PAGE_SIZE);
+        BACK_RING_INIT(&blkdev->rings.native, sring_native, ring_size);
        break;
    }
    case BLKIF_PROTOCOL_X86_32:
    {
        blkif_x86_32_sring_t *sring_x86_32 = blkdev->sring;

-        BACK_RING_INIT(&blkdev->rings.x86_32_part, sring_x86_32, XC_PAGE_SIZE);
+        BACK_RING_INIT(&blkdev->rings.x86_32_part, sring_x86_32, ring_size);
        break;
    }
    case BLKIF_PROTOCOL_X86_64:
    {
        blkif_x86_64_sring_t *sring_x86_64 = blkdev->sring;

-        BACK_RING_INIT(&blkdev->rings.x86_64_part, sring_x86_64, XC_PAGE_SIZE);
+        BACK_RING_INIT(&blkdev->rings.x86_64_part, sring_x86_64, ring_size);
        break;
    }
    }

-    if (blkdev->feature_persistent) {
-        /* Init persistent grants */
-        blkdev->max_grants = max_requests * BLKIF_MAX_SEGMENTS_PER_REQUEST;
-        blkdev->persistent_gnts = g_tree_new_full((GCompareDataFunc)int_cmp,
-                                             NULL, NULL,
-                                             batch_maps ?
-                                             (GDestroyNotify)g_free :
-                                             (GDestroyNotify)destroy_grant);
-        blkdev->persistent_regions = NULL;
-        blkdev->persistent_gnt_count = 0;
-    }
-
    xen_be_bind_evtchn(&blkdev->xendev);

-    blkdev->feature_grant_copy =
-                (xengnttab_grant_copy(blkdev->xendev.gnttabdev, 0, NULL) == 0);
-
-    xen_pv_printf(&blkdev->xendev, 3, "grant copy operation %s\n",
-                  blkdev->feature_grant_copy ? "enabled" : "disabled");
-
-    xen_pv_printf(&blkdev->xendev, 1, "ok: proto %s, ring-ref %d, "
+    xen_pv_printf(&blkdev->xendev, 1, "ok: proto %s, nr-ring-ref %u, "
                  "remote port %d, local port %d\n",
-                  blkdev->xendev.protocol, blkdev->ring_ref,
+                  blkdev->xendev.protocol, blkdev->nr_ring_ref,
                  blkdev->xendev.remote_port, blkdev->xendev.local_port);
    return 0;
 }
@@ -1221,36 +935,17 @@ static void blk_disconnect(struct XenDevice *xendev)

    if (blkdev->blk) {
        blk_detach_dev(blkdev->blk, blkdev);
+        monitor_remove_blk(blkdev->blk);
        blk_unref(blkdev->blk);
        blkdev->blk = NULL;
    }
    xen_pv_unbind_evtchn(&blkdev->xendev);

    if (blkdev->sring) {
-        xengnttab_unmap(blkdev->xendev.gnttabdev, blkdev->sring, 1);
-        blkdev->cnt_map--;
+        xen_be_unmap_grant_refs(xendev, blkdev->sring,
+                                blkdev->nr_ring_ref);
        blkdev->sring = NULL;
    }
-
-    /*
-     * Unmap persistent grants before switching to the closed state
-     * so the frontend can free them.
-     *
-     * In the !batch_maps case g_tree_destroy will take care of unmapping
-     * the grant, but in the batch_maps case we need to iterate over every
-     * region in persistent_regions and unmap it.
-     */
-    if (blkdev->feature_persistent) {
-        g_tree_destroy(blkdev->persistent_gnts);
-        assert(batch_maps || blkdev->persistent_gnt_count == 0);
-        if (batch_maps) {
-            blkdev->persistent_gnt_count = 0;
-            g_slist_foreach(blkdev->persistent_regions,
-                            (GFunc)remove_persistent_region, blkdev);
-            g_slist_free(blkdev->persistent_regions);
-        }
-        blkdev->feature_persistent = false;
-    }
 }

 static int blk_free(struct XenDevice *xendev)
@@ -1258,14 +953,13 @@ static int blk_free(struct XenDevice *xendev)
    struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev);
    struct ioreq *ioreq;

-    if (blkdev->blk || blkdev->sring) {
-        blk_disconnect(xendev);
-    }
+    blk_disconnect(xendev);

    while (!QLIST_EMPTY(&blkdev->freelist)) {
        ioreq = QLIST_FIRST(&blkdev->freelist);
        QLIST_REMOVE(ioreq, list);
        qemu_iovec_destroy(&ioreq->v);
+        qemu_vfree(ioreq->buf);
        g_free(ioreq);
    }

@@ -1285,12 +979,22 @@ static void blk_event(struct XenDevice *xendev)
    qemu_bh_schedule(blkdev->bh);
 }

+extern void xen_blk_resize_update(void *dev);
+void xen_blk_resize_update(void *dev)
+{
+    struct XenBlkDev *blkdev = dev;
+    blkdev->file_size = blk_getlength(blkdev->blk);
+    xenstore_write_be_int64(&blkdev->xendev, "sectors",
+                            blkdev->file_size / blkdev->file_blk);
+    xen_be_set_state(&blkdev->xendev, blkdev->xendev.be_state);
+}
+
 struct XenDevOps xen_blkdev_ops = {
-    .size       = sizeof(struct XenBlkDev),
    .flags      = DEVOPS_FLAG_NEED_GNTDEV,
+    .size       = sizeof(struct XenBlkDev),
    .alloc      = blk_alloc,
    .init       = blk_init,
-    .initialise    = blk_connect,
+    .initialise = blk_connect,
    .disconnect = blk_disconnect,
    .event      = blk_event,
    .free       = blk_free,
--- a/hw/char/bcm2835_aux.c
+++ b/hw/char/bcm2835_aux.c
@@ -244,7 +244,9 @@ static const MemoryRegionOps bcm2835_aux_ops = {
    .read = bcm2835_aux_read,
    .write = bcm2835_aux_write,
    .endianness = DEVICE_NATIVE_ENDIAN,
-    .valid.min_access_size = 4,
+    .impl.min_access_size = 4,
+    .impl.max_access_size = 4,
+    .valid.min_access_size = 1,
    .valid.max_access_size = 4,
 };

--- a/hw/char/serial.c
+++ b/hw/char/serial.c
@@ -261,15 +261,20 @@ static void serial_xmit(SerialState *s)
        if (s->mcr & UART_MCR_LOOP) {
            /* in loopback mode, say that we just received a char */
            serial_receive1(s, &s->tsr, 1);
-        } else if (qemu_chr_fe_write(&s->chr, &s->tsr, 1) != 1 &&
-                   s->tsr_retry < MAX_XMIT_RETRY) {
-            assert(s->watch_tag == 0);
-            s->watch_tag =
-                qemu_chr_fe_add_watch(&s->chr, G_IO_OUT | G_IO_HUP,
-                                      serial_watch_cb, s);
-            if (s->watch_tag > 0) {
-                s->tsr_retry++;
-                return;
+        } else {
+            int rc = qemu_chr_fe_write(&s->chr, &s->tsr, 1);
+
+            if ((rc == 0 ||
+                 (rc == -1 && errno == EAGAIN)) &&
+                s->tsr_retry < MAX_XMIT_RETRY) {
+                assert(s->watch_tag == 0);
+                s->watch_tag =
+                    qemu_chr_fe_add_watch(&s->chr, G_IO_OUT | G_IO_HUP,
+                                          serial_watch_cb, s);
+                if (s->watch_tag > 0) {
+                    s->tsr_retry++;
+                    return;
+                }
            }
        }
        s->tsr_retry = 0;
--- a/hw/char/virtio-serial-bus.c
+++ b/hw/char/virtio-serial-bus.c
@@ -1121,6 +1121,9 @@ static void virtio_serial_device_unrealize(DeviceState *dev, Error **errp)
        timer_free(vser->post_load->timer);
        g_free(vser->post_load);
    }
+
+    qbus_set_hotplug_handler(BUS(&vser->bus), NULL, errp);
+
    virtio_cleanup(vdev);
 }

--- a/hw/char/xen_console.c
+++ b/hw/char/xen_console.c
@@ -234,12 +234,11 @@ static int con_initialise(struct XenDevice *xendev)
    if (!xendev->dev) {
        xen_pfn_t mfn = con->ring_ref;
        con->sring = xenforeignmemory_map(xen_fmem, con->xendev.dom,
-                                          PROT_READ|PROT_WRITE,
+                                          PROT_READ | PROT_WRITE,
                                          1, &mfn, NULL);
    } else {
-        con->sring = xengnttab_map_grant_ref(xendev->gnttabdev, con->xendev.dom,
-                                             con->ring_ref,
-                                             PROT_READ|PROT_WRITE);
+        con->sring = xen_be_map_grant_ref(xendev, con->ring_ref,
+                                          PROT_READ | PROT_WRITE);
    }
    if (!con->sring)
 	return -1;
@@ -268,7 +267,7 @@ static void con_disconnect(struct XenDevice *xendev)
        if (!xendev->dev) {
            xenforeignmemory_unmap(xen_fmem, con->sring, 1);
        } else {
-            xengnttab_unmap(xendev->gnttabdev, con->sring, 1);
+            xen_be_unmap_grant_ref(xendev, con->sring);
        }
        con->sring = NULL;
    }
--- a/hw/core/loader.c
+++ b/hw/core/loader.c
@@ -55,6 +55,7 @@
 #include "exec/address-spaces.h"
 #include "hw/boards.h"
 #include "qemu/cutils.h"
+#include "hw/xen/xen.h"

 #include <zlib.h>

@@ -866,7 +867,10 @@ static void *rom_set_mr(Rom *rom, Object *owner, const char *name, bool ro)
    void *data;

    rom->mr = g_malloc(sizeof(*rom->mr));
-    memory_region_init_resizeable_ram(rom->mr, owner, name,
+    if (xen_enabled())
+        memory_region_init_ram(rom->mr, owner, name, rom->datasize, &error_fatal);
+    else
+        memory_region_init_resizeable_ram(rom->mr, owner, name,
                                      rom->datasize, rom->romsize,
                                      fw_cfg_resized,
                                      &error_fatal);
@@ -1184,7 +1188,7 @@ int rom_copy(uint8_t *dest, hwaddr addr, size_t size)
        if (rom->addr + rom->romsize < addr) {
            continue;
        }
-        if (rom->addr > end) {
+        if (rom->addr > end || rom->addr < addr) {
            break;
        }

--- a/hw/core/or-irq.c
+++ b/hw/core/or-irq.c
@@ -91,7 +91,7 @@ static void or_irq_class_init(ObjectClass *klass, void *data)
    dc->vmsd = &vmstate_or_irq;

    /* Reason: Needs to be wired up to work, e.g. see stm32f205_soc.c */
-    dc->cannot_instantiate_with_device_add_yet = true;
+    dc->user_creatable = false;
 }

 static const TypeInfo or_irq_type_info = {
--- a/hw/core/qdev.c
+++ b/hw/core/qdev.c
@@ -1159,6 +1159,7 @@ static void device_class_init(ObjectClass *class, void *data)
     * should override it in their class_init()
     */
    dc->hotpluggable = true;
+    dc->user_creatable = true;
 }

 void device_reset(DeviceState *dev)
--- a/hw/core/register.c
+++ b/hw/core/register.c
@@ -288,7 +288,7 @@ static void register_class_init(ObjectClass *oc, void *data)
    DeviceClass *dc = DEVICE_CLASS(oc);

    /* Reason: needs to be wired up to work */
-    dc->cannot_instantiate_with_device_add_yet = true;
+    dc->user_creatable = false;
 }

 static const TypeInfo register_info = {
--- a/hw/cpu/core.c
+++ b/hw/cpu/core.c
@@ -33,6 +33,11 @@ static void core_prop_set_core_id(Object *obj, Visitor *v, const char *name,
        return;
    }

+    if (value < 0) {
+        error_setg(errp, "Invalid core id %"PRId64, value);
+        return;
+    }
+
    core->core_id = value;
 }

--- a/hw/display/cirrus_vga.c
+++ b/hw/display/cirrus_vga.c
@@ -2038,15 +2038,14 @@ static void cirrus_mem_writeb_mode4and5_8bpp(CirrusVGAState * s,
    unsigned val = mem_value;
    uint8_t *dst;

-    dst = s->vga.vram_ptr + (offset &= s->cirrus_addr_mask);
    for (x = 0; x < 8; x++) {
+        dst = s->vga.vram_ptr + ((offset + x) & s->cirrus_addr_mask);
 	if (val & 0x80) {
 	    *dst = s->cirrus_shadow_gr1;
 	} else if (mode == 5) {
 	    *dst = s->cirrus_shadow_gr0;
 	}
 	val <<= 1;
-	dst++;
    }
    memory_region_set_dirty(&s->vga.vram, offset, 8);
 }
@@ -2060,8 +2059,8 @@ static void cirrus_mem_writeb_mode4and5_16bpp(CirrusVGAState * s,
    unsigned val = mem_value;
    uint8_t *dst;

-    dst = s->vga.vram_ptr + (offset &= s->cirrus_addr_mask);
    for (x = 0; x < 8; x++) {
+        dst = s->vga.vram_ptr + ((offset + 2 * x) & s->cirrus_addr_mask & ~1);
 	if (val & 0x80) {
 	    *dst = s->cirrus_shadow_gr1;
 	    *(dst + 1) = s->vga.gr[0x11];
@@ -2070,7 +2069,6 @@ static void cirrus_mem_writeb_mode4and5_16bpp(CirrusVGAState * s,
 	    *(dst + 1) = s->vga.gr[0x10];
 	}
 	val <<= 1;
-	dst += 2;
    }
    memory_region_set_dirty(&s->vga.vram, offset, 16);
 }
--- a/hw/display/jazz_led.c
+++ b/hw/display/jazz_led.c
@@ -227,13 +227,13 @@ static void jazz_led_invalidate_display(void *opaque)
 static void jazz_led_text_update(void *opaque, console_ch_t *chardata)
 {
    LedState *s = opaque;
-    char buf[2];
+    char buf[3];

    dpy_text_cursor(s->con, -1, -1);
    qemu_console_resize(s->con, 2, 1);

    /* TODO: draw the segments */
-    snprintf(buf, 2, "%02hhx\n", s->segments);
+    snprintf(buf, 3, "%02hhx", s->segments);
    console_write_ch(chardata++, ATTR2CHTYPE(buf[0], QEMU_COLOR_BLUE,
                                             QEMU_COLOR_BLACK, 1));
    console_write_ch(chardata++, ATTR2CHTYPE(buf[1], QEMU_COLOR_BLUE,
--- a/hw/display/qxl-logger.c
+++ b/hw/display/qxl-logger.c
@@ -106,7 +106,7 @@ static int qxl_log_image(PCIQXLDevice *qxl, QXLPHYSICAL addr, int group_id)
    QXLImage *image;
    QXLImageDescriptor *desc;

-    image = qxl_phys2virt(qxl, addr, group_id);
+    image = qxl_phys2virt(qxl, addr, group_id, sizeof(QXLImage));
    if (!image) {
        return 1;
    }
@@ -216,7 +216,8 @@ int qxl_log_cmd_cursor(PCIQXLDevice *qxl, QXLCursorCmd *cmd, int group_id)
                cmd->u.set.position.y,
                cmd->u.set.visible ? "yes" : "no",
                cmd->u.set.shape);
-        cursor = qxl_phys2virt(qxl, cmd->u.set.shape, group_id);
+        cursor = qxl_phys2virt(qxl, cmd->u.set.shape, group_id,
+                               sizeof(QXLCursor));
        if (!cursor) {
            return 1;
        }
@@ -238,6 +239,7 @@ int qxl_log_command(PCIQXLDevice *qxl, const char *ring, QXLCommandExt *ext)
 {
    bool compat = ext->flags & QXL_COMMAND_FLAG_COMPAT;
    void *data;
+    size_t datasz;
    int ret;

    if (!qxl->cmdlog) {
@@ -249,7 +251,20 @@ int qxl_log_command(PCIQXLDevice *qxl, const char *ring, QXLCommandExt *ext)
            qxl_name(qxl_type, ext->cmd.type),
            compat ? "(compat)" : "");

-    data = qxl_phys2virt(qxl, ext->cmd.data, ext->group_id);
+    switch (ext->cmd.type) {
+    case QXL_CMD_DRAW:
+        datasz = compat ? sizeof(QXLCompatDrawable) : sizeof(QXLDrawable);
+        break;
+    case QXL_CMD_SURFACE:
+        datasz = sizeof(QXLSurfaceCmd);
+        break;
+    case QXL_CMD_CURSOR:
+        datasz = sizeof(QXLCursorCmd);
+        break;
+    default:
+        goto out;
+    }
+    data = qxl_phys2virt(qxl, ext->cmd.data, ext->group_id, datasz);
    if (!data) {
        return 1;
    }
@@ -271,6 +286,7 @@ int qxl_log_command(PCIQXLDevice *qxl, const char *ring, QXLCommandExt *ext)
        qxl_log_cmd_cursor(qxl, data, ext->group_id);
        break;
    }
+out:
    fprintf(stderr, "\n");
    return 0;
 }
--- a/hw/display/qxl-render.c
+++ b/hw/display/qxl-render.c
@@ -98,21 +98,25 @@ static void qxl_render_update_area_unlocked(PCIQXLDevice *qxl)
 {
    VGACommonState *vga = &qxl->vga;
    DisplaySurface *surface;
+    int width = qxl->guest_head0_width ?: qxl->guest_primary.surface.width;
+    int height = qxl->guest_head0_height ?: qxl->guest_primary.surface.height;
    int i;

    if (qxl->guest_primary.resized) {
        qxl->guest_primary.resized = 0;
        qxl->guest_primary.data = qxl_phys2virt(qxl,
                                                qxl->guest_primary.surface.mem,
-                                                MEMSLOT_GROUP_GUEST);
+                                                MEMSLOT_GROUP_GUEST,
+                                                qxl->guest_primary.abs_stride
+                                                * height);
        if (!qxl->guest_primary.data) {
            return;
        }
        qxl_set_rect_to_surface(qxl, &qxl->dirty[0]);
        qxl->num_dirty_rects = 1;
        trace_qxl_render_guest_primary_resized(
-               qxl->guest_primary.surface.width,
-               qxl->guest_primary.surface.height,
+               width,
+               height,
               qxl->guest_primary.qxl_stride,
               qxl->guest_primary.bytes_pp,
               qxl->guest_primary.bits_pp);
@@ -120,15 +124,15 @@ static void qxl_render_update_area_unlocked(PCIQXLDevice *qxl)
            pixman_format_code_t format =
                qemu_default_pixman_format(qxl->guest_primary.bits_pp, true);
            surface = qemu_create_displaysurface_from
-                (qxl->guest_primary.surface.width,
-                 qxl->guest_primary.surface.height,
+                (width,
+                 height,
                 format,
                 qxl->guest_primary.abs_stride,
                 qxl->guest_primary.data);
        } else {
            surface = qemu_create_displaysurface
-                (qxl->guest_primary.surface.width,
-                 qxl->guest_primary.surface.height);
+                (width,
+                 height);
        }
        dpy_gfx_replace_surface(vga->con, surface);
    }
@@ -144,8 +148,8 @@ static void qxl_render_update_area_unlocked(PCIQXLDevice *qxl)
            qxl->dirty[i].top < 0 ||
            qxl->dirty[i].left > qxl->dirty[i].right ||
            qxl->dirty[i].top > qxl->dirty[i].bottom ||
-            qxl->dirty[i].right > qxl->guest_primary.surface.width ||
-            qxl->dirty[i].bottom > qxl->guest_primary.surface.height) {
+            qxl->dirty[i].right > width ||
+            qxl->dirty[i].bottom > height) {
            continue;
        }
        qxl_blit(qxl, qxl->dirty+i);
@@ -211,11 +215,18 @@ static QEMUCursor *qxl_cursor(PCIQXLDevice *qxl, QXLCursor *cursor)
    size_t size;

    c = cursor_alloc(cursor->header.width, cursor->header.height);
+
+    if (!c) {
+        qxl_set_guest_bug(qxl, "%s: cursor %ux%u alloc error", __func__,
+                cursor->header.width, cursor->header.height);
+        goto fail;
+    }
+
    c->hot_x = cursor->header.hot_spot_x;
    c->hot_y = cursor->header.hot_spot_y;
    switch (cursor->header.type) {
    case SPICE_CURSOR_TYPE_ALPHA:
-        size = sizeof(uint32_t) * cursor->header.width * cursor->header.height;
+        size = sizeof(uint32_t) * c->width * c->height;
        memcpy(c->data, cursor->chunk.data, size);
        if (qxl->debug > 2) {
            cursor_print_ascii_art(c, "qxl/alpha");
@@ -245,7 +256,8 @@ fail:
 /* called from spice server thread context only */
 int qxl_render_cursor(PCIQXLDevice *qxl, QXLCommandExt *ext)
 {
-    QXLCursorCmd *cmd = qxl_phys2virt(qxl, ext->cmd.data, ext->group_id);
+    QXLCursorCmd *cmd = qxl_phys2virt(qxl, ext->cmd.data, ext->group_id,
+                                      sizeof(QXLCursorCmd));
    QXLCursor *cursor;
    QEMUCursor *c;

@@ -264,7 +276,15 @@ int qxl_render_cursor(PCIQXLDevice *qxl, QXLCommandExt *ext)
    }
    switch (cmd->type) {
    case QXL_CURSOR_SET:
-        cursor = qxl_phys2virt(qxl, cmd->u.set.shape, ext->group_id);
+        /* First read the QXLCursor to get QXLDataChunk::data_size ... */
+        cursor = qxl_phys2virt(qxl, cmd->u.set.shape, ext->group_id,
+                               sizeof(QXLCursor));
+        if (!cursor) {
+            return 1;
+        }
+        /* Then read including the chunked data following QXLCursor. */
+        cursor = qxl_phys2virt(qxl, cmd->u.set.shape, ext->group_id,
+                               sizeof(QXLCursor) + cursor->chunk.data_size);
        if (!cursor) {
            return 1;
        }
--- a/hw/display/qxl.c
+++ b/hw/display/qxl.c
@@ -257,6 +257,8 @@ static void qxl_spice_destroy_surfaces(PCIQXLDevice *qxl, qxl_async_io async)

 static void qxl_spice_monitors_config_async(PCIQXLDevice *qxl, int replay)
 {
+    QXLMonitorsConfig *cfg;
+
    trace_qxl_spice_monitors_config(qxl->id);
    if (replay) {
        /*
@@ -284,6 +286,17 @@ static void qxl_spice_monitors_config_async(PCIQXLDevice *qxl, int replay)
                (uintptr_t)qxl_cookie_new(QXL_COOKIE_TYPE_IO,
                                          QXL_IO_MONITORS_CONFIG_ASYNC));
    }
+
+    cfg = qxl_phys2virt(qxl, qxl->guest_monitors_config, MEMSLOT_GROUP_GUEST,
+                        sizeof(QXLMonitorsConfig));
+    if (cfg->count == 1) {
+        qxl->guest_primary.resized = 1;
+        qxl->guest_head0_width  = cfg->heads[0].width;
+        qxl->guest_head0_height = cfg->heads[0].height;
+    } else {
+        qxl->guest_head0_width  = 0;
+        qxl->guest_head0_height = 0;
+    }
 }

 void qxl_spice_reset_image_cache(PCIQXLDevice *qxl)
@@ -434,7 +447,8 @@ static int qxl_track_command(PCIQXLDevice *qxl, struct QXLCommandExt *ext)
    switch (le32_to_cpu(ext->cmd.type)) {
    case QXL_CMD_SURFACE:
    {
-        QXLSurfaceCmd *cmd = qxl_phys2virt(qxl, ext->cmd.data, ext->group_id);
+        QXLSurfaceCmd *cmd = qxl_phys2virt(qxl, ext->cmd.data, ext->group_id,
+                                           sizeof(QXLSurfaceCmd));

        if (!cmd) {
            return 1;
@@ -468,7 +482,8 @@ static int qxl_track_command(PCIQXLDevice *qxl, struct QXLCommandExt *ext)
    }
    case QXL_CMD_CURSOR:
    {
-        QXLCursorCmd *cmd = qxl_phys2virt(qxl, ext->cmd.data, ext->group_id);
+        QXLCursorCmd *cmd = qxl_phys2virt(qxl, ext->cmd.data, ext->group_id,
+                                          sizeof(QXLCursorCmd));

        if (!cmd) {
            return 1;
@@ -649,7 +664,8 @@ static int interface_get_command(QXLInstance *sin, struct QXLCommandExt *ext)
             *
             * https://cgit.freedesktop.org/spice/win32/qxl-wddm-dod/commit/?id=f6e099db39e7d0787f294d5fd0dce328b5210faa
             */
-            void *msg = qxl_phys2virt(qxl, ext->cmd.data, ext->group_id);
+            void *msg = qxl_phys2virt(qxl, ext->cmd.data, ext->group_id,
+                                      sizeof(QXLSurfaceCmd));
            if (msg != NULL && (
                    msg < (void *)qxl->vga.vram_ptr ||
                    msg > ((void *)qxl->vga.vram_ptr + qxl->vga.vram_size))) {
@@ -741,6 +757,9 @@ static void interface_release_resource(QXLInstance *sin,
    QXLReleaseRing *ring;
    uint64_t *item, id;

+    if (!ext.info) {
+        return;
+    }
    if (ext.group_id == MEMSLOT_GROUP_HOST) {
        /* host group -> vga mode update request */
        QXLCommandExt *cmdext = (void *)(intptr_t)(ext.info->id);
@@ -1377,6 +1396,7 @@ static int qxl_add_memslot(PCIQXLDevice *d, uint32_t slot_id, uint64_t delta,
        qxl_set_guest_bug(d, "%s: pci_region = %d", __func__, pci_region);
        return 1;
    }
+    assert(guest_end - pci_start <= memory_region_size(mr));

    virt_start = (intptr_t)memory_region_get_ram_ptr(mr);
    memslot.slot_id = slot_id;
@@ -1417,11 +1437,13 @@ static void qxl_reset_surfaces(PCIQXLDevice *d)

 /* can be also called from spice server thread context */
 static bool qxl_get_check_slot_offset(PCIQXLDevice *qxl, QXLPHYSICAL pqxl,
-                                      uint32_t *s, uint64_t *o)
+                                      uint32_t *s, uint64_t *o,
+                                      size_t size_requested)
 {
    uint64_t phys   = le64_to_cpu(pqxl);
    uint32_t slot   = (phys >> (64 -  8)) & 0xff;
    uint64_t offset = phys & 0xffffffffffff;
+    uint64_t size_available;

    if (slot >= NUM_MEMSLOTS) {
        qxl_set_guest_bug(qxl, "slot too large %d >= %d", slot,
@@ -1445,6 +1467,23 @@ static bool qxl_get_check_slot_offset(PCIQXLDevice *qxl, QXLPHYSICAL pqxl,
                          slot, offset, qxl->guest_slots[slot].size);
        return false;
    }
+    size_available = memory_region_size(qxl->guest_slots[slot].mr);
+    if (qxl->guest_slots[slot].offset + offset >= size_available) {
+        qxl_set_guest_bug(qxl,
+                          "slot %d offset %"PRIu64" > region size %"PRIu64"\n",
+                          slot, qxl->guest_slots[slot].offset + offset,
+                          size_available);
+        return false;
+    }
+    size_available -= qxl->guest_slots[slot].offset + offset;
+    if (size_requested > size_available) {
+        qxl_set_guest_bug(qxl,
+                          "slot %d offset %"PRIu64" size %zu: "
+                          "overrun by %"PRIu64" bytes\n",
+                          slot, offset, size_requested,
+                          size_requested - size_available);
+        return false;
+    }

    *s = slot;
    *o = offset;
@@ -1452,7 +1491,8 @@ static bool qxl_get_check_slot_offset(PCIQXLDevice *qxl, QXLPHYSICAL pqxl,
 }

 /* can be also called from spice server thread context */
-void *qxl_phys2virt(PCIQXLDevice *qxl, QXLPHYSICAL pqxl, int group_id)
+void *qxl_phys2virt(PCIQXLDevice *qxl, QXLPHYSICAL pqxl, int group_id,
+                    size_t size)
 {
    uint64_t offset;
    uint32_t slot;
@@ -1463,7 +1503,7 @@ void *qxl_phys2virt(PCIQXLDevice *qxl, QXLPHYSICAL pqxl, int group_id)
        offset = le64_to_cpu(pqxl) & 0xffffffffffff;
        return (void *)(intptr_t)offset;
    case MEMSLOT_GROUP_GUEST:
-        if (!qxl_get_check_slot_offset(qxl, pqxl, &slot, &offset)) {
+        if (!qxl_get_check_slot_offset(qxl, pqxl, &slot, &offset, size)) {
            return NULL;
        }
        ptr = memory_region_get_ram_ptr(qxl->guest_slots[slot].mr);
@@ -1891,9 +1931,9 @@ static void qxl_dirty_one_surface(PCIQXLDevice *qxl, QXLPHYSICAL pqxl,
    uint32_t slot;
    bool rc;

-    rc = qxl_get_check_slot_offset(qxl, pqxl, &slot, &offset);
-    assert(rc == true);
    size = (uint64_t)height * abs(stride);
+    rc = qxl_get_check_slot_offset(qxl, pqxl, &slot, &offset, size);
+    assert(rc == true);
    trace_qxl_surfaces_dirty(qxl->id, offset, size);
    qxl_set_dirty(qxl->guest_slots[slot].mr,
                  qxl->guest_slots[slot].offset + offset,
@@ -1922,7 +1962,7 @@ static void qxl_dirty_surfaces(PCIQXLDevice *qxl)
        }

        cmd = qxl_phys2virt(qxl, qxl->guest_surfaces.cmds[i],
-                            MEMSLOT_GROUP_GUEST);
+                            MEMSLOT_GROUP_GUEST, sizeof(QXLSurfaceCmd));
        assert(cmd);
        assert(cmd->type == QXL_SURFACE_CMD_CREATE);
        qxl_dirty_one_surface(qxl, cmd->u.surface_create.data,
--- a/hw/display/qxl.h
+++ b/hw/display/qxl.h
@@ -80,6 +80,8 @@ typedef struct PCIQXLDevice {
    QXLPHYSICAL        guest_cursor;

    QXLPHYSICAL        guest_monitors_config;
+    uint32_t           guest_head0_width;
+    uint32_t           guest_head0_height;

    QemuMutex          track_lock;

@@ -146,7 +148,28 @@ typedef struct PCIQXLDevice {
 #define QXL_DEFAULT_REVISION QXL_REVISION_STABLE_V12

 /* qxl.c */
-void *qxl_phys2virt(PCIQXLDevice *qxl, QXLPHYSICAL phys, int group_id);
+/**
+ * qxl_phys2virt: Get a pointer within a PCI VRAM memory region.
+ *
+ * @qxl: QXL device
+ * @phys: physical offset of buffer within the VRAM
+ * @group_id: memory slot group
+ * @size: size of the buffer
+ *
+ * Returns a host pointer to a buffer placed at offset @phys within the
+ * active slot @group_id of the PCI VGA RAM memory region associated with
+ * the @qxl device. If the slot is inactive, or the offset + size are out
+ * of the memory region, returns NULL.
+ *
+ * Use with care; by the time this function returns, the returned pointer is
+ * not protected by RCU anymore.  If the caller is not within an RCU critical
+ * section and does not hold the iothread lock, it must have other means of
+ * protecting the pointer, such as a reference to the region that includes
+ * the incoming ram_addr_t.
+ *
+ */
+void *qxl_phys2virt(PCIQXLDevice *qxl, QXLPHYSICAL phys, int group_id,
+                    size_t size);
 void qxl_set_guest_bug(PCIQXLDevice *qxl, const char *msg, ...)
    GCC_FMT_ATTR(2, 3);

--- a/hw/display/sm501.c
+++ b/hw/display/sm501.c
@@ -2,6 +2,7 @@
 * QEMU SM501 Device
 *
 * Copyright (c) 2008 Shin-ichiro KAWASAKI
+ * Copyright (c) 2016 BALATON Zoltan
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
@@ -23,6 +24,7 @@
 */

 #include "qemu/osdep.h"
+#include "qemu/log.h"
 #include "qapi/error.h"
 #include "qemu-common.h"
 #include "cpu.h"
@@ -34,14 +36,18 @@
 #include "qemu/range.h"
 #include "ui/pixel_ops.h"
 #include "exec/address-spaces.h"
+#include "qemu/bswap.h"

 /*
 * Status: 2010/05/07
 *   - Minimum implementation for Linux console : mmio regs and CRT layer.
 *   - 2D grapihcs acceleration partially supported : only fill rectangle.
 *
- * TODO:
+ * Status: 2016/12/04
+ *   - Misc fixes: endianness, hardware cursor
 *   - Panel support
+ *
+ * TODO:
 *   - Touch panel support
 *   - USB support
 *   - UART support
@@ -548,6 +554,29 @@ static uint32_t get_local_mem_size_index(uint32_t size)
    return index;
 }

+static inline int get_width(SM501State *s, int crt)
+{
+    int width = crt ? s->dc_crt_h_total : s->dc_panel_h_total;
+    return (width & 0x00000FFF) + 1;
+}
+
+static inline int get_height(SM501State *s, int crt)
+{
+    int height = crt ? s->dc_crt_v_total : s->dc_panel_v_total;
+    return (height & 0x00000FFF) + 1;
+}
+
+static inline int get_bpp(SM501State *s, int crt)
+{
+    int bpp = crt ? s->dc_crt_control : s->dc_panel_control;
+    return 1 << (bpp & 3);
+}
+
+static ram_addr_t get_fb_addr(SM501State *s, int crt)
+{
+    return (crt ? s->dc_crt_fb_addr : s->dc_panel_fb_addr) & 0x3FFFFF0;
+}
+
 /**
 * Check the availability of hardware cursor.
 * @param crt  0 for PANEL, 1 for CRT.
@@ -562,10 +591,10 @@ static inline int is_hwc_enabled(SM501State *state, int crt)
 * Get the address which holds cursor pattern data.
 * @param crt  0 for PANEL, 1 for CRT.
 */
-static inline uint32_t get_hwc_address(SM501State *state, int crt)
+static inline uint8_t *get_hwc_address(SM501State *state, int crt)
 {
    uint32_t addr = crt ? state->dc_crt_hwc_addr : state->dc_panel_hwc_addr;
-    return (addr & 0x03FFFFF0)/* >> 4*/;
+    return state->local_mem + (addr & 0x03FFFFF0);
 }

 /**
@@ -591,142 +620,218 @@ static inline uint32_t get_hwc_x(SM501State *state, int crt)
 }

 /**
- * Get the cursor position in x coordinate.
+ * Get the hardware cursor palette.
 * @param crt  0 for PANEL, 1 for CRT.
- * @param index  0, 1, 2 or 3 which specifies color of corsor dot.
+ * @param palette  pointer to a [3 * 3] array to store color values in
 */
-static inline uint16_t get_hwc_color(SM501State *state, int crt, int index)
+static inline void get_hwc_palette(SM501State *state, int crt, uint8_t *palette)
 {
-    uint32_t color_reg = 0;
-    uint16_t color_565 = 0;
+    int i;
+    uint32_t color_reg;
+    uint16_t rgb565;

-    if (index == 0) {
-        return 0;
-    }
+    for (i = 0; i < 3; i++) {
+        if (i + 1 == 3) {
+            color_reg = crt ? state->dc_crt_hwc_color_3
+                            : state->dc_panel_hwc_color_3;
+        } else {
+            color_reg = crt ? state->dc_crt_hwc_color_1_2
+                            : state->dc_panel_hwc_color_1_2;
+        }

-    switch (index) {
-    case 1:
-    case 2:
-        color_reg = crt ? state->dc_crt_hwc_color_1_2
-                        : state->dc_panel_hwc_color_1_2;
-        break;
-    case 3:
-        color_reg = crt ? state->dc_crt_hwc_color_3
-                        : state->dc_panel_hwc_color_3;
-        break;
-    default:
-        printf("invalid hw cursor color.\n");
-        abort();
+        if (i + 1 == 2) {
+            rgb565 = (color_reg >> 16) & 0xFFFF;
+        } else {
+            rgb565 = color_reg & 0xFFFF;
+        }
+        palette[i * 3 + 0] = (rgb565 << 3) & 0xf8; /* red */
+        palette[i * 3 + 1] = (rgb565 >> 3) & 0xfc; /* green */
+        palette[i * 3 + 2] = (rgb565 >> 8) & 0xf8; /* blue */
    }
-
-    switch (index) {
-    case 1:
-    case 3:
-        color_565 = (uint16_t)(color_reg & 0xFFFF);
-        break;
-    case 2:
-        color_565 = (uint16_t)((color_reg >> 16) & 0xFFFF);
-        break;
-    }
-    return color_565;
 }

-static int within_hwc_y_range(SM501State *state, int y, int crt)
+static inline void hwc_invalidate(SM501State *s, int crt)
 {
-    int hwc_y = get_hwc_y(state, crt);
-    return (hwc_y <= y && y < hwc_y + SM501_HWC_HEIGHT);
+    int w = get_width(s, crt);
+    int h = get_height(s, crt);
+    int bpp = get_bpp(s, crt);
+    int start = get_hwc_y(s, crt);
+    int end = MIN(h, start + SM501_HWC_HEIGHT) + 1;
+
+    start *= w * bpp;
+    end *= w * bpp;
+
+    memory_region_set_dirty(&s->local_mem_region, start, end - start);
 }

 static void sm501_2d_operation(SM501State * s)
 {
-    /* obtain operation parameters */
-    int operation = (s->twoD_control >> 16) & 0x1f;
-    int rtl = s->twoD_control & 0x8000000;
-    int src_x = (s->twoD_source >> 16) & 0x01FFF;
-    int src_y = s->twoD_source & 0xFFFF;
-    int dst_x = (s->twoD_destination >> 16) & 0x01FFF;
-    int dst_y = s->twoD_destination & 0xFFFF;
-    int operation_width = (s->twoD_dimension >> 16) & 0x1FFF;
-    int operation_height = s->twoD_dimension & 0xFFFF;
-    uint32_t color = s->twoD_foreground;
-    int format_flags = (s->twoD_stretch >> 20) & 0x3;
-    int addressing = (s->twoD_stretch >> 16) & 0xF;
+    int cmd = (s->twoD_control >> 16) & 0x1F;
+    int rtl = s->twoD_control & BIT(27);
+    int format = (s->twoD_stretch >> 20) & 0x3;
+    int rop_mode = (s->twoD_control >> 15) & 0x1; /* 1 for rop2, else rop3 */
+    /* 1 if rop2 source is the pattern, otherwise the source is the bitmap */
+    int rop2_source_is_pattern = (s->twoD_control >> 14) & 0x1;
+    int rop = s->twoD_control & 0xFF;
+    unsigned int dst_x = (s->twoD_destination >> 16) & 0x01FFF;
+    unsigned int dst_y = s->twoD_destination & 0xFFFF;
+    unsigned int width = (s->twoD_dimension >> 16) & 0x1FFF;
+    unsigned int height = s->twoD_dimension & 0xFFFF;
+    uint32_t dst_base = s->twoD_destination_base & 0x03FFFFFF;
+    unsigned int dst_pitch = (s->twoD_pitch >> 16) & 0x1FFF;
+    int crt = (s->dc_crt_control & SM501_DC_CRT_CONTROL_SEL) ? 1 : 0;
+    int fb_len = get_width(s, crt) * get_height(s, crt) * get_bpp(s, crt);

-    /* get frame buffer info */
-    uint8_t * src = s->local_mem + (s->twoD_source_base & 0x03FFFFFF);
-    uint8_t * dst = s->local_mem + (s->twoD_destination_base & 0x03FFFFFF);
-    int src_width = (s->dc_crt_h_total & 0x00000FFF) + 1;
-    int dst_width = (s->dc_crt_h_total & 0x00000FFF) + 1;
-
-    if (addressing != 0x0) {
-        printf("%s: only XY addressing is supported.\n", __func__);
-        abort();
+    if ((s->twoD_stretch >> 16) & 0xF) {
+        qemu_log_mask(LOG_UNIMP, "sm501: only XY addressing is supported.\n");
+        return;
    }

-    if ((s->twoD_source_base & 0x08000000) ||
-        (s->twoD_destination_base & 0x08000000)) {
-        printf("%s: only local memory is supported.\n", __func__);
-        abort();
+    if (s->twoD_source_base & BIT(27) || s->twoD_destination_base & BIT(27)) {
+        qemu_log_mask(LOG_UNIMP, "sm501: only local memory is supported.\n");
+        return;
    }

-    switch (operation) {
-    case 0x00: /* copy area */
-#define COPY_AREA(_bpp, _pixel_type, rtl) {                                 \
-        int y, x, index_d, index_s;                                         \
-        for (y = 0; y < operation_height; y++) {                            \
-            for (x = 0; x < operation_width; x++) {                         \
-                if (rtl) {                                                  \
-                    index_s = ((src_y - y) * src_width + src_x - x) * _bpp; \
-                    index_d = ((dst_y - y) * dst_width + dst_x - x) * _bpp; \
-                } else {                                                    \
-                    index_s = ((src_y + y) * src_width + src_x + x) * _bpp; \
-                    index_d = ((dst_y + y) * dst_width + dst_x + x) * _bpp; \
-                }                                                           \
-                *(_pixel_type*)&dst[index_d] = *(_pixel_type*)&src[index_s];\
-            }                                                               \
-        }                                                                   \
+    if (!dst_pitch) {
+        qemu_log_mask(LOG_GUEST_ERROR, "sm501: Zero dest pitch.\n");
+        return;
    }
-        switch (format_flags) {
-        case 0:
-            COPY_AREA(1, uint8_t, rtl);
-            break;
-        case 1:
-            COPY_AREA(2, uint16_t, rtl);
-            break;
-        case 2:
-            COPY_AREA(4, uint32_t, rtl);
-            break;
+
+    if (!width || !height) {
+        qemu_log_mask(LOG_GUEST_ERROR, "sm501: Zero size 2D op.\n");
+        return;
+    }
+
+    if (rtl) {
+        dst_x -= width - 1;
+        dst_y -= height - 1;
+    }
+
+    if (dst_base >= get_local_mem_size(s) || dst_base +
+        (dst_x + width + (dst_y + height) * (dst_pitch + width)) *
+        (1 << format) >= get_local_mem_size(s)) {
+        qemu_log_mask(LOG_GUEST_ERROR, "sm501: 2D op dest is outside vram.\n");
+        return;
+    }
+
+    switch (cmd) {
+    case 0: /* BitBlt */
+    {
+        unsigned int src_x = (s->twoD_source >> 16) & 0x01FFF;
+        unsigned int src_y = s->twoD_source & 0xFFFF;
+        uint32_t src_base = s->twoD_source_base & 0x03FFFFFF;
+        unsigned int src_pitch = s->twoD_pitch & 0x1FFF;
+
+        if (!src_pitch) {
+            qemu_log_mask(LOG_GUEST_ERROR, "sm501: Zero src pitch.\n");
+            return;
+        }
+
+        if (rtl) {
+            src_x -= width - 1;
+            src_y -= height - 1;
+        }
+
+        if (src_base >= get_local_mem_size(s) || src_base +
+            (src_x + width + (src_y + height) * (src_pitch + width)) *
+            (1 << format) >= get_local_mem_size(s)) {
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "sm501: 2D op src is outside vram.\n");
+            return;
+        }
+
+        if ((rop_mode && rop == 0x5) || (!rop_mode && rop == 0x55)) {
+            /* Invert dest, is there a way to do this with pixman? */
+            unsigned int x, y, i;
+            uint8_t *d = s->local_mem + dst_base;
+
+            for (y = 0; y < height; y++) {
+                i = (dst_x + (dst_y + y) * dst_pitch) * (1 << format);
+                for (x = 0; x < width; x++, i += (1 << format)) {
+                    switch (format) {
+                    case 0:
+                        d[i] = ~d[i];
+                        break;
+                    case 1:
+                        *(uint16_t *)&d[i] = ~*(uint16_t *)&d[i];
+                        break;
+                    case 2:
+                        *(uint32_t *)&d[i] = ~*(uint32_t *)&d[i];
+                        break;
+                    }
+                }
+            }
+        } else {
+            /* Do copy src for unimplemented ops, better than unpainted area */
+            if ((rop_mode && (rop != 0xc || rop2_source_is_pattern)) ||
+                (!rop_mode && rop != 0xcc)) {
+                qemu_log_mask(LOG_UNIMP,
+                              "sm501: rop%d op %x%s not implemented\n",
+                              (rop_mode ? 2 : 3), rop,
+                              (rop2_source_is_pattern ?
+                                  " with pattern source" : ""));
+            }
+            /* Check for overlaps, this could be made more exact */
+            uint32_t sb, se, db, de;
+            sb = src_base + src_x + src_y * (width + src_pitch);
+            se = sb + width + height * (width + src_pitch);
+            db = dst_base + dst_x + dst_y * (width + dst_pitch);
+            de = db + width + height * (width + dst_pitch);
+            if (rtl && ((db >= sb && db <= se) || (de >= sb && de <= se))) {
+                /* regions may overlap: copy via temporary */
+                int llb = width * (1 << format);
+                int tmp_stride = DIV_ROUND_UP(llb, sizeof(uint32_t));
+                uint32_t *tmp = g_malloc(tmp_stride * sizeof(uint32_t) *
+                                         height);
+                pixman_blt((uint32_t *)&s->local_mem[src_base], tmp,
+                           src_pitch * (1 << format) / sizeof(uint32_t),
+                           tmp_stride, 8 * (1 << format), 8 * (1 << format),
+                           src_x, src_y, 0, 0, width, height);
+                pixman_blt(tmp, (uint32_t *)&s->local_mem[dst_base],
+                           tmp_stride,
+                           dst_pitch * (1 << format) / sizeof(uint32_t),
+                           8 * (1 << format), 8 * (1 << format),
+                           0, 0, dst_x, dst_y, width, height);
+                g_free(tmp);
+            } else {
+                pixman_blt((uint32_t *)&s->local_mem[src_base],
+                           (uint32_t *)&s->local_mem[dst_base],
+                           src_pitch * (1 << format) / sizeof(uint32_t),
+                           dst_pitch * (1 << format) / sizeof(uint32_t),
+                           8 * (1 << format), 8 * (1 << format),
+                           src_x, src_y, dst_x, dst_y, width, height);
+            }
        }
        break;
-
-    case 0x01: /* fill rectangle */
-#define FILL_RECT(_bpp, _pixel_type) {                                      \
-        int y, x;                                                           \
-        for (y = 0; y < operation_height; y++) {                            \
-            for (x = 0; x < operation_width; x++) {                         \
-                int index = ((dst_y + y) * dst_width + dst_x + x) * _bpp;   \
-                *(_pixel_type*)&dst[index] = (_pixel_type)color;            \
-            }                                                               \
-        }                                                                   \
    }
+    case 1: /* Rectangle Fill */
+    {
+        uint32_t color = s->twoD_foreground;

-        switch (format_flags) {
-        case 0:
-            FILL_RECT(1, uint8_t);
-            break;
-        case 1:
-            FILL_RECT(2, uint16_t);
-            break;
-        case 2:
-            FILL_RECT(4, uint32_t);
-            break;
+        if (format == 2) {
+            color = cpu_to_le32(color);
+        } else if (format == 1) {
+            color = cpu_to_le16(color);
        }
-        break;

+        pixman_fill((uint32_t *)&s->local_mem[dst_base],
+                    dst_pitch * (1 << format) / sizeof(uint32_t),
+                    8 * (1 << format), dst_x, dst_y, width, height, color);
+        break;
+    }
    default:
-        printf("non-implemented SM501 2D operation. %d\n", operation);
-        abort();
-        break;
+        qemu_log_mask(LOG_UNIMP, "sm501: not implemented 2D operation: %d\n",
+                      cmd);
+        return;
+    }
+
+    if (dst_base >= get_fb_addr(s, crt) &&
+        dst_base <= get_fb_addr(s, crt) + fb_len) {
+        int dst_len = MIN(fb_len, ((dst_y + height - 1) * dst_pitch +
+                          dst_x + width) * (1 << format));
+        if (dst_len) {
+            memory_region_set_dirty(&s->local_mem_region, dst_base, dst_len);
+        }
    }
 }

@@ -756,6 +861,8 @@ static uint64_t sm501_system_config_read(void *opaque, hwaddr addr,
    case SM501_DRAM_CONTROL:
 	ret = (s->dram_control & 0x07F107C0) | s->local_mem_size_index << 13;
 	break;
+    case SM501_COMMAND_LIST_STATUS:
+        ret = 0x00180002; /* FIFOs are empty, everything idle */
    case SM501_IRQ_MASK:
 	ret = s->irq_mask;
 	break;
@@ -773,11 +880,13 @@ static uint64_t sm501_system_config_read(void *opaque, hwaddr addr,
    case SM501_POWER_MODE_CONTROL:
 	ret = s->power_mode_control;
 	break;
+    case SM501_ENDIAN_CONTROL:
+        ret = 0; /* Only default little endian mode is supported */
+        break;

    default:
-	printf("sm501 system config : not implemented register read."
-	       " addr=%x\n", (int)addr);
-        abort();
+        qemu_log_mask(LOG_UNIMP, "sm501: not implemented system config"
+                      "register read. addr=%" HWADDR_PRIx "\n", addr);
    }

    return ret;
@@ -823,11 +932,17 @@ static void sm501_system_config_write(void *opaque, hwaddr addr,
    case SM501_POWER_MODE_CONTROL:
 	s->power_mode_control = value & 0x00000003;
 	break;
+    case SM501_ENDIAN_CONTROL:
+        if (value & 0x00000001) {
+            printf("sm501 system config : big endian mode not implemented.\n");
+            abort();
+        }
+        break;

    default:
-	printf("sm501 system config : not implemented register write."
-	       " addr=%x, val=%x\n", (int)addr, (uint32_t)value);
-        abort();
+        qemu_log_mask(LOG_UNIMP, "sm501: not implemented system config"
+                      "register write. addr=%" HWADDR_PRIx
+                      ", val=%" PRIx64 "\n", addr, value);
    }
 }

@@ -882,6 +997,9 @@ static uint64_t sm501_disp_ctrl_read(void *opaque, hwaddr addr,
    case SM501_DC_PANEL_PANNING_CONTROL:
 	ret = s->dc_panel_panning_control;
 	break;
+    case SM501_DC_PANEL_COLOR_KEY:
+        /* Not implemented yet */
+        break;
    case SM501_DC_PANEL_FB_ADDR:
 	ret = s->dc_panel_fb_addr;
 	break;
@@ -954,9 +1072,8 @@ static uint64_t sm501_disp_ctrl_read(void *opaque, hwaddr addr,
        break;

    default:
-	printf("sm501 disp ctrl : not implemented register read."
-	       " addr=%x\n", (int)addr);
-        abort();
+        qemu_log_mask(LOG_UNIMP, "sm501: not implemented disp ctrl register "
+                      "read. addr=%" HWADDR_PRIx "\n", addr);
    }

    return ret;
@@ -976,8 +1093,14 @@ static void sm501_disp_ctrl_write(void *opaque, hwaddr addr,
    case SM501_DC_PANEL_PANNING_CONTROL:
 	s->dc_panel_panning_control = value & 0xFF3FFF3F;
 	break;
+    case SM501_DC_PANEL_COLOR_KEY:
+        /* Not implemented yet */
+        break;
    case SM501_DC_PANEL_FB_ADDR:
 	s->dc_panel_fb_addr = value & 0x8FFFFFF0;
+        if (value & 0x8000000) {
+            qemu_log_mask(LOG_UNIMP, "Panel external memory not supported\n");
+        }
 	break;
    case SM501_DC_PANEL_FB_OFFSET:
 	s->dc_panel_fb_offset = value & 0x3FF03FF0;
@@ -1009,11 +1132,19 @@ static void sm501_disp_ctrl_write(void *opaque, hwaddr addr,
 	break;

    case SM501_DC_PANEL_HWC_ADDR:
-	s->dc_panel_hwc_addr = value & 0x8FFFFFF0;
-	break;
+        value &= 0x8FFFFFF0;
+        if (value != s->dc_panel_hwc_addr) {
+            hwc_invalidate(s, 0);
+            s->dc_panel_hwc_addr = value;
+        }
+        break;
    case SM501_DC_PANEL_HWC_LOC:
-	s->dc_panel_hwc_location = value & 0x0FFF0FFF;
-	break;
+        value &= 0x0FFF0FFF;
+        if (value != s->dc_panel_hwc_location) {
+            hwc_invalidate(s, 0);
+            s->dc_panel_hwc_location = value;
+        }
+        break;
    case SM501_DC_PANEL_HWC_COLOR_1_2:
 	s->dc_panel_hwc_color_1_2 = value;
 	break;
@@ -1026,6 +1157,9 @@ static void sm501_disp_ctrl_write(void *opaque, hwaddr addr,
 	break;
    case SM501_DC_CRT_FB_ADDR:
 	s->dc_crt_fb_addr = value & 0x8FFFFFF0;
+        if (value & 0x8000000) {
+            qemu_log_mask(LOG_UNIMP, "CRT external memory not supported\n");
+        }
 	break;
    case SM501_DC_CRT_FB_OFFSET:
 	s->dc_crt_fb_offset = value & 0x3FF03FF0;
@@ -1044,11 +1178,19 @@ static void sm501_disp_ctrl_write(void *opaque, hwaddr addr,
 	break;

    case SM501_DC_CRT_HWC_ADDR:
-	s->dc_crt_hwc_addr = value & 0x8FFFFFF0;
-	break;
+        value &= 0x8FFFFFF0;
+        if (value != s->dc_crt_hwc_addr) {
+            hwc_invalidate(s, 1);
+            s->dc_crt_hwc_addr = value;
+        }
+        break;
    case SM501_DC_CRT_HWC_LOC:
-	s->dc_crt_hwc_location = value & 0x0FFF0FFF;
-	break;
+        value &= 0x0FFF0FFF;
+        if (value != s->dc_crt_hwc_location) {
+            hwc_invalidate(s, 1);
+            s->dc_crt_hwc_location = value;
+        }
+        break;
    case SM501_DC_CRT_HWC_COLOR_1_2:
 	s->dc_crt_hwc_color_1_2 = value;
 	break;
@@ -1061,9 +1203,9 @@ static void sm501_disp_ctrl_write(void *opaque, hwaddr addr,
        break;

    default:
-	printf("sm501 disp ctrl : not implemented register write."
-	       " addr=%x, val=%x\n", (int)addr, (unsigned)value);
-        abort();
+        qemu_log_mask(LOG_UNIMP, "sm501: not implemented disp ctrl register "
+                      "write. addr=%" HWADDR_PRIx
+                      ", val=%" PRIx64 "\n", addr, value);
    }
 }

@@ -1089,9 +1231,8 @@ static uint64_t sm501_2d_engine_read(void *opaque, hwaddr addr,
        ret = s->twoD_source_base;
        break;
    default:
-        printf("sm501 disp ctrl : not implemented register read."
-               " addr=%x\n", (int)addr);
-        abort();
+        qemu_log_mask(LOG_UNIMP, "sm501: not implemented disp ctrl register "
+                      "read. addr=%" HWADDR_PRIx "\n", addr);
    }

    return ret;
@@ -1149,9 +1290,9 @@ static void sm501_2d_engine_write(void *opaque, hwaddr addr,
        s->twoD_destination_base = value;
        break;
    default:
-        printf("sm501 2d engine : not implemented register write."
-               " addr=%x, val=%x\n", (int)addr, (unsigned)value);
-        abort();
+        qemu_log_mask(LOG_UNIMP, "sm501: not implemented 2d engine register "
+                      "write. addr=%" HWADDR_PRIx
+                      ", val=%" PRIx64 "\n", addr, value);
    }
 }

@@ -1170,8 +1311,9 @@ static const MemoryRegionOps sm501_2d_engine_ops = {
 typedef void draw_line_func(uint8_t *d, const uint8_t *s,
 			    int width, const uint32_t *pal);

-typedef void draw_hwc_line_func(SM501State * s, int crt, uint8_t * palette,
-                                int c_y, uint8_t *d, int width);
+typedef void draw_hwc_line_func(uint8_t *d, const uint8_t *s,
+                                int width, const uint8_t *palette,
+                                int c_x, int c_y);

 #define DEPTH 8
 #include "sm501_template.h"
@@ -1256,19 +1398,16 @@ static inline int get_depth_index(DisplaySurface *surface)
    }
 }

-static void sm501_draw_crt(SM501State * s)
+static void sm501_update_display(void *opaque)
 {
+    SM501State *s = (SM501State *)opaque;
    DisplaySurface *surface = qemu_console_surface(s->con);
-    int y;
-    int width = (s->dc_crt_h_total & 0x00000FFF) + 1;
-    int height = (s->dc_crt_v_total & 0x00000FFF) + 1;
-
-    uint8_t  * src = s->local_mem;
-    int src_bpp = 0;
+    int y, c_x = 0, c_y = 0;
+    int crt = (s->dc_crt_control & SM501_DC_CRT_CONTROL_SEL) ? 1 : 0;
+    int width = get_width(s, crt);
+    int height = get_height(s, crt);
+    int src_bpp = get_bpp(s, crt);
    int dst_bpp = surface_bytes_per_pixel(surface);
-    uint32_t * palette = (uint32_t *)&s->dc_palette[SM501_DC_CRT_PALETTE
-						    - SM501_DC_PANEL_PALETTE];
-    uint8_t hwc_palette[3 * 3];
    int ds_depth_index = get_depth_index(surface);
    draw_line_func * draw_line = NULL;
    draw_hwc_line_func * draw_hwc_line = NULL;
@@ -1276,43 +1415,46 @@ static void sm501_draw_crt(SM501State * s)
    int y_start = -1;
    ram_addr_t page_min = ~0l;
    ram_addr_t page_max = 0l;
-    ram_addr_t offset = 0;
+    ram_addr_t offset;
+    uint32_t *palette;
+    uint8_t hwc_palette[3 * 3];
+    uint8_t *hwc_src = NULL;
+
+    if (!((crt ? s->dc_crt_control : s->dc_panel_control)
+          & SM501_DC_CRT_CONTROL_ENABLE)) {
+        return;
+    }
+
+    palette = (uint32_t *)(crt ? &s->dc_palette[SM501_DC_CRT_PALETTE -
+                                                SM501_DC_PANEL_PALETTE]
+                               : &s->dc_palette[0]);

    /* choose draw_line function */
-    switch (s->dc_crt_control & 3) {
-    case SM501_DC_CRT_CONTROL_8BPP:
-	src_bpp = 1;
-	draw_line = draw_line8_funcs[ds_depth_index];
-	break;
-    case SM501_DC_CRT_CONTROL_16BPP:
-	src_bpp = 2;
-	draw_line = draw_line16_funcs[ds_depth_index];
-	break;
-    case SM501_DC_CRT_CONTROL_32BPP:
-	src_bpp = 4;
-	draw_line = draw_line32_funcs[ds_depth_index];
-	break;
+    switch (src_bpp) {
+    case 1:
+        draw_line = draw_line8_funcs[ds_depth_index];
+        break;
+    case 2:
+        draw_line = draw_line16_funcs[ds_depth_index];
+        break;
+    case 4:
+        draw_line = draw_line32_funcs[ds_depth_index];
+        break;
    default:
-	printf("sm501 draw crt : invalid DC_CRT_CONTROL=%x.\n",
-	       s->dc_crt_control);
-        abort();
+        qemu_log_mask(LOG_GUEST_ERROR, "sm501 draw crt"
+		      "invalid DC_CRT_CONTROL=%x.\n",
+	       	      s->dc_crt_control);
 	break;
    }

    /* set up to draw hardware cursor */
-    if (is_hwc_enabled(s, 1)) {
-        int i;
-
-        /* get cursor palette */
-        for (i = 0; i < 3; i++) {
-            uint16_t rgb565 = get_hwc_color(s, 1, i + 1);
-            hwc_palette[i * 3 + 0] = (rgb565 & 0xf800) >> 8; /* red */
-            hwc_palette[i * 3 + 1] = (rgb565 & 0x07e0) >> 3; /* green */
-            hwc_palette[i * 3 + 2] = (rgb565 & 0x001f) << 3; /* blue */
-        }
-
+    if (is_hwc_enabled(s, crt)) {
        /* choose cursor draw line function */
        draw_hwc_line = draw_hwc_line_funcs[ds_depth_index];
+        hwc_src = get_hwc_address(s, crt);
+        c_x = get_hwc_x(s, crt);
+        c_y = get_hwc_y(s, crt);
+        get_hwc_palette(s, crt, hwc_palette);
    }

    /* adjust console size */
@@ -1326,15 +1468,18 @@ static void sm501_draw_crt(SM501State * s)

    /* draw each line according to conditions */
    memory_region_sync_dirty_bitmap(&s->local_mem_region);
-    for (y = 0; y < height; y++) {
-	int update_hwc = draw_hwc_line ? within_hwc_y_range(s, y, 1) : 0;
-	int update = full_update || update_hwc;
+    offset = get_fb_addr(s, crt);
+    for (y = 0; y < height; y++, offset += width * src_bpp) {
+        int update, update_hwc;
        ram_addr_t page0 = offset;
        ram_addr_t page1 = offset + width * src_bpp - 1;

-	/* check dirty flags for each line */
-        update = memory_region_get_dirty(&s->local_mem_region, page0,
-                                         page1 - page0, DIRTY_MEMORY_VGA);
+        /* check if hardware cursor is enabled and we're within its range */
+        update_hwc = draw_hwc_line && c_y <= y && y < c_y + SM501_HWC_HEIGHT;
+        update = full_update || update_hwc;
+        /* check dirty flags for each line */
+        update |= memory_region_get_dirty(&s->local_mem_region, page0,
+                                          page1 - page0, DIRTY_MEMORY_VGA);

 	/* draw line and change status */
 	if (update) {
@@ -1342,11 +1487,11 @@ static void sm501_draw_crt(SM501State * s)
            d +=  y * width * dst_bpp;

            /* draw graphics layer */
-            draw_line(d, src, width, palette);
+            draw_line(d, s->local_mem + offset, width, palette);

            /* draw haredware cursor */
            if (update_hwc) {
-                draw_hwc_line(s, 1, hwc_palette, y - get_hwc_y(s, 1), d, width);
+                draw_hwc_line(d, hwc_src, width, hwc_palette, c_x, y - c_y);
            }

 	    if (y_start < 0)
@@ -1362,9 +1507,6 @@ static void sm501_draw_crt(SM501State * s)
 		y_start = -1;
 	    }
 	}
-
-	src += width * src_bpp;
-	offset += width * src_bpp;
    }

    /* complete flush to display */
@@ -1379,14 +1521,6 @@ static void sm501_draw_crt(SM501State * s)
    }
 }

-static void sm501_update_display(void *opaque)
-{
-    SM501State * s = (SM501State *)opaque;
-
-    if (s->dc_crt_control & SM501_DC_CRT_CONTROL_ENABLE)
-	sm501_draw_crt(s);
-}
-
 static const GraphicHwOps sm501_ops = {
    .gfx_update  = sm501_update_display,
 };
--- a/hw/display/sm501_template.h
+++ b/hw/display/sm501_template.h
@@ -99,29 +99,24 @@ static void glue(draw_line32_, PIXEL_NAME)(
 /**
 * Draw hardware cursor image on the given line.
 */
-static void glue(draw_hwc_line_, PIXEL_NAME)(SM501State * s, int crt,
-                         uint8_t * palette, int c_y, uint8_t *d, int width)
+static void glue(draw_hwc_line_, PIXEL_NAME)(uint8_t *d, const uint8_t *s,
+                 int width, const uint8_t *palette, int c_x, int c_y)
 {
-    int x, i;
+    int i;
    uint8_t bitset = 0;

-    /* get hardware cursor pattern */
-    uint32_t cursor_addr = get_hwc_address(s, crt);
-    assert(0 <= c_y && c_y < SM501_HWC_HEIGHT);
-    cursor_addr += 64 * c_y / 4;  /* 4 pixels per byte */
-    cursor_addr += s->base;
-
    /* get cursor position */
-    x = get_hwc_x(s, crt);
-    d += x * BPP;
+    assert(0 <= c_y && c_y < SM501_HWC_HEIGHT);
+    s += SM501_HWC_WIDTH * c_y / 4;  /* 4 pixels per byte */
+    d += c_x * BPP;

-    for (i = 0; i < SM501_HWC_WIDTH && x + i < width; i++) {
+    for (i = 0; i < SM501_HWC_WIDTH && c_x + i < width; i++) {
        uint8_t v;

        /* get pixel value */
        if (i % 4 == 0) {
-            bitset = ldub_phys(&address_space_memory, cursor_addr);
-            cursor_addr++;
+            bitset = ldub_p(s);
+            s++;
        }
        v = bitset & 3;
        bitset >>= 2;
--- a/hw/display/vga-helpers.h
+++ b/hw/display/vga-helpers.h
@@ -95,20 +95,46 @@ static void vga_draw_glyph9(uint8_t *d, int linesize,
    } while (--h);
 }

+static inline uint8_t vga_read_byte(VGACommonState *vga, uint32_t addr)
+{
+    return vga->vram_ptr[addr & vga->vbe_size_mask];
+}
+
+static inline uint16_t vga_read_word_le(VGACommonState *vga, uint32_t addr)
+{
+    uint32_t offset = addr & vga->vbe_size_mask & ~1;
+    uint16_t *ptr = (uint16_t *)(vga->vram_ptr + offset);
+    return lduw_le_p(ptr);
+}
+
+static inline uint16_t vga_read_word_be(VGACommonState *vga, uint32_t addr)
+{
+    uint32_t offset = addr & vga->vbe_size_mask & ~1;
+    uint16_t *ptr = (uint16_t *)(vga->vram_ptr + offset);
+    return lduw_be_p(ptr);
+}
+
+static inline uint32_t vga_read_dword_le(VGACommonState *vga, uint32_t addr)
+{
+    uint32_t offset = addr & vga->vbe_size_mask & ~3;
+    uint32_t *ptr = (uint32_t *)(vga->vram_ptr + offset);
+    return ldl_le_p(ptr);
+}
+
 /*
 * 4 color mode
 */
-static void vga_draw_line2(VGACommonState *s1, uint8_t *d,
-                           const uint8_t *s, int width)
+static void vga_draw_line2(VGACommonState *vga, uint8_t *d,
+                           uint32_t addr, int width)
 {
    uint32_t plane_mask, *palette, data, v;
    int x;

-    palette = s1->last_palette;
-    plane_mask = mask16[s1->ar[VGA_ATC_PLANE_ENABLE] & 0xf];
+    palette = vga->last_palette;
+    plane_mask = mask16[vga->ar[VGA_ATC_PLANE_ENABLE] & 0xf];
    width >>= 3;
    for(x = 0; x < width; x++) {
-        data = ((uint32_t *)s)[0];
+        data = vga_read_dword_le(vga, addr);
        data &= plane_mask;
        v = expand2[GET_PLANE(data, 0)];
        v |= expand2[GET_PLANE(data, 2)] << 2;
@@ -124,7 +150,7 @@ static void vga_draw_line2(VGACommonState *s1, uint8_t *d,
        ((uint32_t *)d)[6] = palette[(v >> 4) & 0xf];
        ((uint32_t *)d)[7] = palette[(v >> 0) & 0xf];
        d += 32;
-        s += 4;
+        addr += 4;
    }
 }

@@ -134,17 +160,17 @@ static void vga_draw_line2(VGACommonState *s1, uint8_t *d,
 /*
 * 4 color mode, dup2 horizontal
 */
-static void vga_draw_line2d2(VGACommonState *s1, uint8_t *d,
-                             const uint8_t *s, int width)
+static void vga_draw_line2d2(VGACommonState *vga, uint8_t *d,
+                             uint32_t addr, int width)
 {
    uint32_t plane_mask, *palette, data, v;
    int x;

-    palette = s1->last_palette;
-    plane_mask = mask16[s1->ar[VGA_ATC_PLANE_ENABLE] & 0xf];
+    palette = vga->last_palette;
+    plane_mask = mask16[vga->ar[VGA_ATC_PLANE_ENABLE] & 0xf];
    width >>= 3;
    for(x = 0; x < width; x++) {
-        data = ((uint32_t *)s)[0];
+        data = vga_read_dword_le(vga, addr);
        data &= plane_mask;
        v = expand2[GET_PLANE(data, 0)];
        v |= expand2[GET_PLANE(data, 2)] << 2;
@@ -160,24 +186,24 @@ static void vga_draw_line2d2(VGACommonState *s1, uint8_t *d,
        PUT_PIXEL2(d, 6, palette[(v >> 4) & 0xf]);
        PUT_PIXEL2(d, 7, palette[(v >> 0) & 0xf]);
        d += 64;
-        s += 4;
+        addr += 4;
    }
 }

 /*
 * 16 color mode
 */
-static void vga_draw_line4(VGACommonState *s1, uint8_t *d,
-                           const uint8_t *s, int width)
+static void vga_draw_line4(VGACommonState *vga, uint8_t *d,
+                           uint32_t addr, int width)
 {
    uint32_t plane_mask, data, v, *palette;
    int x;

-    palette = s1->last_palette;
-    plane_mask = mask16[s1->ar[VGA_ATC_PLANE_ENABLE] & 0xf];
+    palette = vga->last_palette;
+    plane_mask = mask16[vga->ar[VGA_ATC_PLANE_ENABLE] & 0xf];
    width >>= 3;
    for(x = 0; x < width; x++) {
-        data = ((uint32_t *)s)[0];
+        data = vga_read_dword_le(vga, addr);
        data &= plane_mask;
        v = expand4[GET_PLANE(data, 0)];
        v |= expand4[GET_PLANE(data, 1)] << 1;
@@ -192,24 +218,24 @@ static void vga_draw_line4(VGACommonState *s1, uint8_t *d,
        ((uint32_t *)d)[6] = palette[(v >> 4) & 0xf];
        ((uint32_t *)d)[7] = palette[(v >> 0) & 0xf];
        d += 32;
-        s += 4;
+        addr += 4;
    }
 }

 /*
 * 16 color mode, dup2 horizontal
 */
-static void vga_draw_line4d2(VGACommonState *s1, uint8_t *d,
-                             const uint8_t *s, int width)
+static void vga_draw_line4d2(VGACommonState *vga, uint8_t *d,
+                             uint32_t addr, int width)
 {
    uint32_t plane_mask, data, v, *palette;
    int x;

-    palette = s1->last_palette;
-    plane_mask = mask16[s1->ar[VGA_ATC_PLANE_ENABLE] & 0xf];
+    palette = vga->last_palette;
+    plane_mask = mask16[vga->ar[VGA_ATC_PLANE_ENABLE] & 0xf];
    width >>= 3;
    for(x = 0; x < width; x++) {
-        data = ((uint32_t *)s)[0];
+        data = vga_read_dword_le(vga, addr);
        data &= plane_mask;
        v = expand4[GET_PLANE(data, 0)];
        v |= expand4[GET_PLANE(data, 1)] << 1;
@@ -224,7 +250,7 @@ static void vga_draw_line4d2(VGACommonState *s1, uint8_t *d,
        PUT_PIXEL2(d, 6, palette[(v >> 4) & 0xf]);
        PUT_PIXEL2(d, 7, palette[(v >> 0) & 0xf]);
        d += 64;
-        s += 4;
+        addr += 4;
    }
 }

@@ -233,21 +259,21 @@ static void vga_draw_line4d2(VGACommonState *s1, uint8_t *d,
 *
 * XXX: add plane_mask support (never used in standard VGA modes)
 */
-static void vga_draw_line8d2(VGACommonState *s1, uint8_t *d,
-                             const uint8_t *s, int width)
+static void vga_draw_line8d2(VGACommonState *vga, uint8_t *d,
+                             uint32_t addr, int width)
 {
    uint32_t *palette;
    int x;

-    palette = s1->last_palette;
+    palette = vga->last_palette;
    width >>= 3;
    for(x = 0; x < width; x++) {
-        PUT_PIXEL2(d, 0, palette[s[0]]);
-        PUT_PIXEL2(d, 1, palette[s[1]]);
-        PUT_PIXEL2(d, 2, palette[s[2]]);
-        PUT_PIXEL2(d, 3, palette[s[3]]);
+        PUT_PIXEL2(d, 0, palette[vga_read_byte(vga, addr + 0)]);
+        PUT_PIXEL2(d, 1, palette[vga_read_byte(vga, addr + 1)]);
+        PUT_PIXEL2(d, 2, palette[vga_read_byte(vga, addr + 2)]);
+        PUT_PIXEL2(d, 3, palette[vga_read_byte(vga, addr + 3)]);
        d += 32;
-        s += 4;
+        addr += 4;
    }
 }

@@ -256,63 +282,63 @@ static void vga_draw_line8d2(VGACommonState *s1, uint8_t *d,
 *
 * XXX: add plane_mask support (never used in standard VGA modes)
 */
-static void vga_draw_line8(VGACommonState *s1, uint8_t *d,
-                           const uint8_t *s, int width)
+static void vga_draw_line8(VGACommonState *vga, uint8_t *d,
+                           uint32_t addr, int width)
 {
    uint32_t *palette;
    int x;

-    palette = s1->last_palette;
+    palette = vga->last_palette;
    width >>= 3;
    for(x = 0; x < width; x++) {
-        ((uint32_t *)d)[0] = palette[s[0]];
-        ((uint32_t *)d)[1] = palette[s[1]];
-        ((uint32_t *)d)[2] = palette[s[2]];
-        ((uint32_t *)d)[3] = palette[s[3]];
-        ((uint32_t *)d)[4] = palette[s[4]];
-        ((uint32_t *)d)[5] = palette[s[5]];
-        ((uint32_t *)d)[6] = palette[s[6]];
-        ((uint32_t *)d)[7] = palette[s[7]];
+        ((uint32_t *)d)[0] = palette[vga_read_byte(vga, addr + 0)];
+        ((uint32_t *)d)[1] = palette[vga_read_byte(vga, addr + 1)];
+        ((uint32_t *)d)[2] = palette[vga_read_byte(vga, addr + 2)];
+        ((uint32_t *)d)[3] = palette[vga_read_byte(vga, addr + 3)];
+        ((uint32_t *)d)[4] = palette[vga_read_byte(vga, addr + 4)];
+        ((uint32_t *)d)[5] = palette[vga_read_byte(vga, addr + 5)];
+        ((uint32_t *)d)[6] = palette[vga_read_byte(vga, addr + 6)];
+        ((uint32_t *)d)[7] = palette[vga_read_byte(vga, addr + 7)];
        d += 32;
-        s += 8;
+        addr += 8;
    }
 }

 /*
 * 15 bit color
 */
-static void vga_draw_line15_le(VGACommonState *s1, uint8_t *d,
-                               const uint8_t *s, int width)
+static void vga_draw_line15_le(VGACommonState *vga, uint8_t *d,
+                               uint32_t addr, int width)
 {
    int w;
    uint32_t v, r, g, b;

    w = width;
    do {
-        v = lduw_le_p((void *)s);
+        v = vga_read_word_le(vga, addr);
        r = (v >> 7) & 0xf8;
        g = (v >> 2) & 0xf8;
        b = (v << 3) & 0xf8;
        ((uint32_t *)d)[0] = rgb_to_pixel32(r, g, b);
-        s += 2;
+        addr += 2;
        d += 4;
    } while (--w != 0);
 }

-static void vga_draw_line15_be(VGACommonState *s1, uint8_t *d,
-                               const uint8_t *s, int width)
+static void vga_draw_line15_be(VGACommonState *vga, uint8_t *d,
+                               uint32_t addr, int width)
 {
    int w;
    uint32_t v, r, g, b;

    w = width;
    do {
-        v = lduw_be_p((void *)s);
+        v = vga_read_word_be(vga, addr);
        r = (v >> 7) & 0xf8;
        g = (v >> 2) & 0xf8;
        b = (v << 3) & 0xf8;
        ((uint32_t *)d)[0] = rgb_to_pixel32(r, g, b);
-        s += 2;
+        addr += 2;
        d += 4;
    } while (--w != 0);
 }
@@ -320,38 +346,38 @@ static void vga_draw_line15_be(VGACommonState *s1, uint8_t *d,
 /*
 * 16 bit color
 */
-static void vga_draw_line16_le(VGACommonState *s1, uint8_t *d,
-                               const uint8_t *s, int width)
+static void vga_draw_line16_le(VGACommonState *vga, uint8_t *d,
+                               uint32_t addr, int width)
 {
    int w;
    uint32_t v, r, g, b;

    w = width;
    do {
-        v = lduw_le_p((void *)s);
+        v = vga_read_word_le(vga, addr);
        r = (v >> 8) & 0xf8;
        g = (v >> 3) & 0xfc;
        b = (v << 3) & 0xf8;
        ((uint32_t *)d)[0] = rgb_to_pixel32(r, g, b);
-        s += 2;
+        addr += 2;
        d += 4;
    } while (--w != 0);
 }

-static void vga_draw_line16_be(VGACommonState *s1, uint8_t *d,
-                               const uint8_t *s, int width)
+static void vga_draw_line16_be(VGACommonState *vga, uint8_t *d,
+                               uint32_t addr, int width)
 {
    int w;
    uint32_t v, r, g, b;

    w = width;
    do {
-        v = lduw_be_p((void *)s);
+        v = vga_read_word_be(vga, addr);
        r = (v >> 8) & 0xf8;
        g = (v >> 3) & 0xfc;
        b = (v << 3) & 0xf8;
        ((uint32_t *)d)[0] = rgb_to_pixel32(r, g, b);
-        s += 2;
+        addr += 2;
        d += 4;
    } while (--w != 0);
 }
@@ -359,36 +385,36 @@ static void vga_draw_line16_be(VGACommonState *s1, uint8_t *d,
 /*
 * 24 bit color
 */
-static void vga_draw_line24_le(VGACommonState *s1, uint8_t *d,
-                               const uint8_t *s, int width)
+static void vga_draw_line24_le(VGACommonState *vga, uint8_t *d,
+                               uint32_t addr, int width)
 {
    int w;
    uint32_t r, g, b;

    w = width;
    do {
-        b = s[0];
-        g = s[1];
-        r = s[2];
+        b = vga_read_byte(vga, addr + 0);
+        g = vga_read_byte(vga, addr + 1);
+        r = vga_read_byte(vga, addr + 2);
        ((uint32_t *)d)[0] = rgb_to_pixel32(r, g, b);
-        s += 3;
+        addr += 3;
        d += 4;
    } while (--w != 0);
 }

-static void vga_draw_line24_be(VGACommonState *s1, uint8_t *d,
-                               const uint8_t *s, int width)
+static void vga_draw_line24_be(VGACommonState *vga, uint8_t *d,
+                               uint32_t addr, int width)
 {
    int w;
    uint32_t r, g, b;

    w = width;
    do {
-        r = s[0];
-        g = s[1];
-        b = s[2];
+        r = vga_read_byte(vga, addr + 0);
+        g = vga_read_byte(vga, addr + 1);
+        b = vga_read_byte(vga, addr + 2);
        ((uint32_t *)d)[0] = rgb_to_pixel32(r, g, b);
-        s += 3;
+        addr += 3;
        d += 4;
    } while (--w != 0);
 }
@@ -396,44 +422,36 @@ static void vga_draw_line24_be(VGACommonState *s1, uint8_t *d,
 /*
 * 32 bit color
 */
-static void vga_draw_line32_le(VGACommonState *s1, uint8_t *d,
-                               const uint8_t *s, int width)
+static void vga_draw_line32_le(VGACommonState *vga, uint8_t *d,
+                               uint32_t addr, int width)
 {
-#ifndef HOST_WORDS_BIGENDIAN
-    memcpy(d, s, width * 4);
-#else
    int w;
    uint32_t r, g, b;

    w = width;
    do {
-        b = s[0];
-        g = s[1];
-        r = s[2];
+        b = vga_read_byte(vga, addr + 0);
+        g = vga_read_byte(vga, addr + 1);
+        r = vga_read_byte(vga, addr + 2);
        ((uint32_t *)d)[0] = rgb_to_pixel32(r, g, b);
-        s += 4;
+        addr += 4;
        d += 4;
    } while (--w != 0);
-#endif
 }

-static void vga_draw_line32_be(VGACommonState *s1, uint8_t *d,
-                               const uint8_t *s, int width)
+static void vga_draw_line32_be(VGACommonState *vga, uint8_t *d,
+                               uint32_t addr, int width)
 {
-#ifdef HOST_WORDS_BIGENDIAN
-    memcpy(d, s, width * 4);
-#else
    int w;
    uint32_t r, g, b;

    w = width;
    do {
-        r = s[1];
-        g = s[2];
-        b = s[3];
+        r = vga_read_byte(vga, addr + 1);
+        g = vga_read_byte(vga, addr + 2);
+        b = vga_read_byte(vga, addr + 3);
        ((uint32_t *)d)[0] = rgb_to_pixel32(r, g, b);
-        s += 4;
+        addr += 4;
        d += 4;
    } while (--w != 0);
-#endif
 }
--- a/hw/display/vga.c
+++ b/hw/display/vga.c
@@ -1005,7 +1005,7 @@ void vga_mem_writeb(VGACommonState *s, hwaddr addr, uint32_t val)
 }

 typedef void vga_draw_line_func(VGACommonState *s1, uint8_t *d,
-                                const uint8_t *s, int width);
+                                uint32_t srcaddr, int width);

 #include "vga-helpers.h"

@@ -1280,6 +1280,9 @@ static void vga_draw_text(VGACommonState *s, int full_update)
        cx_min = width;
        cx_max = -1;
        for(cx = 0; cx < width; cx++) {
+            if (src + sizeof(uint16_t) > s->vram_ptr + s->vram_size) {
+                break;
+            }
            ch_attr = *(uint16_t *)src;
            if (full_update || ch_attr != *ch_attr_ptr || src == cursor_ptr) {
                if (cx < cx_min)
@@ -1434,6 +1437,14 @@ void vga_invalidate_scanlines(VGACommonState *s, int y1, int y2)
    }
 }

+static bool vga_scanline_invalidated(VGACommonState *s, int y)
+{
+    if (y >= VGA_MAX_HEIGHT) {
+        return false;
+    }
+    return s->invalidated_y_table[y >> 5] & (1 << (y & 0x1f));
+}
+
 void vga_sync_dirty_bitmap(VGACommonState *s)
 {
    memory_region_sync_dirty_bitmap(&s->vram);
@@ -1456,13 +1467,14 @@ static void vga_draw_graphic(VGACommonState *s, int full_update)
 {
    DisplaySurface *surface = qemu_console_surface(s->con);
    int y1, y, update, linesize, y_start, double_scan, mask, depth;
-    int width, height, shift_control, line_offset, bwidth, bits;
-    ram_addr_t page0, page1, page_min, page_max;
+    int width, height, shift_control, bwidth, bits;
+    ram_addr_t page0, page1, region_start, region_end;
+    DirtyBitmapSnapshot *snap = NULL;
    int disp_width, multi_scan, multi_run;
    uint8_t *d;
    uint32_t v, addr1, addr;
    vga_draw_line_func *vga_draw_line = NULL;
-    bool share_surface;
+    bool share_surface, force_shadow = false;
    pixman_format_code_t format;
 #ifdef HOST_WORDS_BIGENDIAN
    bool byteswap = !s->big_endian_fb;
@@ -1472,11 +1484,34 @@ static void vga_draw_graphic(VGACommonState *s, int full_update)

    full_update |= update_basic_params(s);

-    if (!full_update)
-        vga_sync_dirty_bitmap(s);
-
    s->get_resolution(s, &width, &height);
    disp_width = width;
+    depth = s->get_bpp(s);
+
+    region_start = (s->start_addr * 4);
+    region_end = region_start + (ram_addr_t)s->line_offset * height;
+    region_end += width * depth / 8; /* scanline length */
+    region_end -= s->line_offset;
+    if (region_end > s->vbe_size || depth == 0 || depth == 15) {
+        /*
+         * We land here on:
+         *  - wraps around (can happen with cirrus vbe modes)
+         *  - depth == 0 (256 color palette video mode)
+         *  - depth == 15
+         *
+         * Take the safe and slow route:
+         *   - create a dirty bitmap snapshot for all vga memory.
+         *   - force shadowing (so all vga memory access goes
+         *     through vga_read_*() helpers).
+         *
+         * Given this affects only vga features which are pretty much
+         * unused by modern guests there should be no performance
+         * impact.
+         */
+        region_start = 0;
+        region_end = s->vbe_size;
+        force_shadow = true;
+    }

    shift_control = (s->gr[VGA_GFX_MODE] >> 5) & 3;
    double_scan = (s->cr[VGA_CRTC_MAX_SCAN] >> 7);
@@ -1506,8 +1541,6 @@ static void vga_draw_graphic(VGACommonState *s, int full_update)
        }
    }

-    depth = s->get_bpp(s);
-
    /*
     * Check whether we can share the surface with the backend
     * or whether we need a shadow surface. We share native
@@ -1517,7 +1550,7 @@ static void vga_draw_graphic(VGACommonState *s, int full_update)
    format = qemu_default_pixman_format(depth, !byteswap);
    if (format) {
        share_surface = dpy_gfx_check_format(s->con, format)
-            && !s->force_shadow;
+            && !s->force_shadow && !force_shadow;
    } else {
        share_surface = false;
    }
@@ -1608,20 +1641,29 @@ static void vga_draw_graphic(VGACommonState *s, int full_update)
        s->cursor_invalidate(s);
    }

-    line_offset = s->line_offset;
 #if 0
    printf("w=%d h=%d v=%d line_offset=%d cr[0x09]=0x%02x cr[0x17]=0x%02x linecmp=%d sr[0x01]=0x%02x\n",
           width, height, v, line_offset, s->cr[9], s->cr[VGA_CRTC_MODE],
           s->line_compare, sr(s, VGA_SEQ_CLOCK_MODE));
 #endif
    addr1 = (s->start_addr * 4);
-    bwidth = (width * bits + 7) / 8;
+    bwidth = DIV_ROUND_UP(width * bits, 8);
    y_start = -1;
-    page_min = -1;
-    page_max = 0;
    d = surface_data(surface);
    linesize = surface_stride(surface);
    y1 = 0;
+
+    if (!full_update) {
+        vga_sync_dirty_bitmap(s);
+        if (s->line_compare < height) {
+            /* split screen mode */
+            region_start = 0;
+        }
+        snap = memory_region_snapshot_and_clear_dirty(&s->vram, region_start,
+                                                      region_end - region_start,
+                                                      DIRTY_MEMORY_VGA);
+    }
+
    for(y = 0; y < height; y++) {
        addr = addr1;
        if (!(s->cr[VGA_CRTC_MODE] & 1)) {
@@ -1634,21 +1676,28 @@ static void vga_draw_graphic(VGACommonState *s, int full_update)
            addr = (addr & ~0x8000) | ((y1 & 2) << 14);
        }
        update = full_update;
-        page0 = addr;
-        page1 = addr + bwidth - 1;
-        update |= memory_region_get_dirty(&s->vram, page0, page1 - page0,
-                                          DIRTY_MEMORY_VGA);
-        /* explicit invalidation for the hardware cursor */
-        update |= (s->invalidated_y_table[y >> 5] >> (y & 0x1f)) & 1;
+        page0 = addr & s->vbe_size_mask;
+        page1 = (addr + bwidth - 1) & s->vbe_size_mask;
+        if (full_update) {
+            update = 1;
+        } else if (page1 < page0) {
+            /* scanline wraps from end of video memory to the start */
+            assert(force_shadow);
+            update = memory_region_snapshot_get_dirty(&s->vram, snap,
+                                                      page0, s->vbe_size - page0);
+            update |= memory_region_snapshot_get_dirty(&s->vram, snap,
+                                                       0, page1);
+        } else {
+            update = memory_region_snapshot_get_dirty(&s->vram, snap,
+                                                      page0, page1 - page0);
+        }
+        /* explicit invalidation for the hardware cursor (cirrus only) */
+        update |= vga_scanline_invalidated(s, y);
        if (update) {
            if (y_start < 0)
                y_start = y;
-            if (page0 < page_min)
-                page_min = page0;
-            if (page1 > page_max)
-                page_max = page1;
            if (!(is_buffer_shared(surface))) {
-                vga_draw_line(s, d, s->vram_ptr + addr, width);
+                vga_draw_line(s, d, addr, width);
                if (s->cursor_draw_line)
                    s->cursor_draw_line(s, d, y);
            }
@@ -1663,7 +1712,7 @@ static void vga_draw_graphic(VGACommonState *s, int full_update)
        if (!multi_run) {
            mask = (s->cr[VGA_CRTC_MODE] & 3) ^ 3;
            if ((y1 & mask) == mask)
-                addr1 += line_offset;
+                addr1 += s->line_offset;
            y1++;
            multi_run = multi_scan;
        } else {
@@ -1679,14 +1728,8 @@ static void vga_draw_graphic(VGACommonState *s, int full_update)
        dpy_gfx_update(s->con, 0, y_start,
                       disp_width, y - y_start);
    }
-    /* reset modified pages */
-    if (page_max >= page_min) {
-        memory_region_reset_dirty(&s->vram,
-                                  page_min,
-                                  page_max - page_min,
-                                  DIRTY_MEMORY_VGA);
-    }
-    memset(s->invalidated_y_table, 0, ((height + 31) >> 5) * 4);
+    g_free(snap);
+    memset(s->invalidated_y_table, 0, sizeof(s->invalidated_y_table));
 }

 static void vga_draw_blank(VGACommonState *s, int full_update)
@@ -2158,6 +2201,7 @@ void vga_common_init(VGACommonState *s, Object *obj, bool global_vmstate)
    if (!s->vbe_size) {
        s->vbe_size = s->vram_size;
    }
+    s->vbe_size_mask = s->vbe_size - 1;

    s->is_vbe_vmstate = 1;
    memory_region_init_ram(&s->vram, obj, "vga.vram", s->vram_size,
--- a/hw/display/vga_int.h
+++ b/hw/display/vga_int.h
@@ -94,6 +94,7 @@ typedef struct VGACommonState {
    uint32_t vram_size;
    uint32_t vram_size_mb; /* property */
    uint32_t vbe_size;
+    uint32_t vbe_size_mask;
    uint32_t latch;
    bool has_chain4_alias;
    MemoryRegion chain4_alias;
--- a/hw/display/vmware_vga.c
+++ b/hw/display/vmware_vga.c
@@ -505,6 +505,8 @@ static inline void vmsvga_cursor_define(struct vmsvga_state_s *s,
    int i, pixels;

    qc = cursor_alloc(c->width, c->height);
+    assert(qc != NULL);
+
    qc->hot_x = c->hot_x;
    qc->hot_y = c->hot_y;
    switch (c->bpp) {
--- a/hw/dma/i8257.c
+++ b/hw/dma/i8257.c
@@ -601,7 +601,7 @@ static void i8257_class_init(ObjectClass *klass, void *data)
    idc->schedule = i8257_dma_schedule;
    idc->register_channel = i8257_dma_register_channel;
    /* Reason: needs to be wired up by isa_bus_dma() to work */
-    dc->cannot_instantiate_with_device_add_yet = true;
+    dc->user_creatable = false;
 }

 static const TypeInfo i8257_info = {
--- a/hw/dma/sparc32_dma.c
+++ b/hw/dma/sparc32_dma.c
@@ -305,7 +305,7 @@ static void sparc32_dma_class_init(ObjectClass *klass, void *data)
    dc->vmsd = &vmstate_dma;
    dc->props = sparc32_dma_properties;
    /* Reason: pointer property "iommu_opaque" */
-    dc->cannot_instantiate_with_device_add_yet = true;
+    dc->user_creatable = false;
 }

 static const TypeInfo sparc32_dma_info = {
--- a/hw/gpio/omap_gpio.c
+++ b/hw/gpio/omap_gpio.c
@@ -773,7 +773,7 @@ static void omap_gpio_class_init(ObjectClass *klass, void *data)
    dc->reset = omap_gpif_reset;
    dc->props = omap_gpio_properties;
    /* Reason: pointer property "clk" */
-    dc->cannot_instantiate_with_device_add_yet = true;
+    dc->user_creatable = false;
 }

 static const TypeInfo omap_gpio_info = {
@@ -804,7 +804,7 @@ static void omap2_gpio_class_init(ObjectClass *klass, void *data)
    dc->reset = omap2_gpif_reset;
    dc->props = omap2_gpio_properties;
    /* Reason: pointer properties "iclk", "fclk0", ..., "fclk5" */
-    dc->cannot_instantiate_with_device_add_yet = true;
+    dc->user_creatable = false;
 }

 static const TypeInfo omap2_gpio_info = {
--- a/hw/i2c/i2c-ddc.c
+++ b/hw/i2c/i2c-ddc.c
@@ -246,7 +246,7 @@ static int i2c_ddc_rx(I2CSlave *i2c)
    I2CDDCState *s = I2CDDC(i2c);

    int value;
-    value = s->edid_blob[s->reg];
+    value = s->edid_blob[s->reg % sizeof(s->edid_blob)];
    s->reg++;
    return value;
 }
--- a/hw/i2c/omap_i2c.c
+++ b/hw/i2c/omap_i2c.c
@@ -491,7 +491,7 @@ static void omap_i2c_class_init(ObjectClass *klass, void *data)
    dc->props = omap_i2c_properties;
    dc->reset = omap_i2c_reset;
    /* Reason: pointer properties "iclk", "fclk" */
-    dc->cannot_instantiate_with_device_add_yet = true;
+    dc->user_creatable = false;
    dc->realize = omap_i2c_realize;
 }

--- a/hw/i2c/smbus_eeprom.c
+++ b/hw/i2c/smbus_eeprom.c
@@ -123,7 +123,7 @@ static void smbus_eeprom_class_initfn(ObjectClass *klass, void *data)
    sc->read_data = eeprom_read_data;
    dc->props = smbus_eeprom_properties;
    /* Reason: pointer property "data" */
-    dc->cannot_instantiate_with_device_add_yet = true;
+    dc->user_creatable = false;
 }

 static const TypeInfo smbus_eeprom_info = {
--- a/hw/i2c/smbus_ich9.c
+++ b/hw/i2c/smbus_ich9.c
@@ -103,7 +103,7 @@ static void ich9_smb_class_init(ObjectClass *klass, void *data)
     * Reason: part of ICH9 southbridge, needs to be wired up by
     * pc_q35_init()
     */
-    dc->cannot_instantiate_with_device_add_yet = true;
+    dc->user_creatable = false;
 }

 I2CBus *ich9_smb_init(PCIBus *bus, int devfn, uint32_t smb_io_base)
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -457,8 +457,10 @@ static void *acpi_set_bsel(PCIBus *bus, void *opaque)
 {
    unsigned *bsel_alloc = opaque;
    unsigned *bus_bsel;
+    int64_t bsel = object_property_get_int(OBJECT(bus),
+                                           ACPI_PCIHP_PROP_BSEL, NULL);

-    if (qbus_is_hotpluggable(BUS(bus))) {
+    if (qbus_is_hotpluggable(BUS(bus)) && bsel < 0) {
        bus_bsel = g_malloc(sizeof *bus_bsel);

        *bus_bsel = (*bsel_alloc)++;
@@ -1818,9 +1820,9 @@ static Aml *build_q35_osc_method(void)

    /*
     * Always allow native PME, AER (no dependencies)
-     * Never allow SHPC (no SHPC controller in this system)
+     * Allow SHPC (PCI bridges can have SHPC controller)
     */
-    aml_append(if_ctx, aml_and(a_ctrl, aml_int(0x1D), a_ctrl));
+    aml_append(if_ctx, aml_and(a_ctrl, aml_int(0x1F), a_ctrl));

    if_ctx2 = aml_if(aml_lnot(aml_equal(aml_arg(1), aml_int(1))));
    /* Unknown revision */
--- a/hw/i386/kvm/clock.c
+++ b/hw/i386/kvm/clock.c
@@ -19,6 +19,7 @@
 #include "qemu/host-utils.h"
 #include "sysemu/sysemu.h"
 #include "sysemu/kvm.h"
+#include "sysemu/hw_accel.h"
 #include "kvm_i386.h"
 #include "hw/sysbus.h"
 #include "hw/kvm/clock.h"
@@ -61,7 +62,7 @@ static uint64_t kvmclock_current_nsec(KVMClockState *s)
 {
    CPUState *cpu = first_cpu;
    CPUX86State *env = cpu->env_ptr;
-    hwaddr kvmclock_struct_pa = env->system_time_msr & ~1ULL;
+    hwaddr kvmclock_struct_pa;
    uint64_t migration_tsc = env->tsc;
    struct pvclock_vcpu_time_info time;
    uint64_t delta;
@@ -69,11 +70,14 @@ static uint64_t kvmclock_current_nsec(KVMClockState *s)
    uint64_t nsec_hi;
    uint64_t nsec;

+    cpu_synchronize_state(cpu);
+
    if (!(env->system_time_msr & 1ULL)) {
        /* KVM clock not active */
        return 0;
    }

+    kvmclock_struct_pa = env->system_time_msr & ~1ULL;
    cpu_physical_memory_read(kvmclock_struct_pa, &time, sizeof(time));

    assert(time.tsc_timestamp <= migration_tsc);
--- a/hw/i386/multiboot.c
+++ b/hw/i386/multiboot.c
@@ -221,15 +221,38 @@ int load_multiboot(FWCfgState *fw_cfg,
        uint32_t mh_header_addr = ldl_p(header+i+12);
        uint32_t mh_load_end_addr = ldl_p(header+i+20);
        uint32_t mh_bss_end_addr = ldl_p(header+i+24);
+
        mh_load_addr = ldl_p(header+i+16);
+        if (mh_header_addr < mh_load_addr) {
+            fprintf(stderr, "invalid mh_load_addr address\n");
+            exit(1);
+        }
+        if (mh_load_end_addr > mh_bss_end_addr) {
+            fprintf(stderr, "invalid mh_load_end_addr address\n");
+            exit(1);
+        }
+
        uint32_t mb_kernel_text_offset = i - (mh_header_addr - mh_load_addr);
        uint32_t mb_load_size = 0;
        mh_entry_addr = ldl_p(header+i+28);

        if (mh_load_end_addr) {
+            if (mh_bss_end_addr < mh_load_addr) {
+                fprintf(stderr, "invalid mh_bss_end_addr address\n");
+                exit(1);
+            }
            mb_kernel_size = mh_bss_end_addr - mh_load_addr;
+
+            if (mh_load_end_addr < mh_load_addr) {
+                fprintf(stderr, "invalid mh_load_end_addr address\n");
+                exit(1);
+            }
            mb_load_size = mh_load_end_addr - mh_load_addr;
        } else {
+            if (kernel_file_size < mb_kernel_text_offset) {
+                fprintf(stderr, "invalid kernel_file_size\n");
+                exit(1);
+            }
            mb_kernel_size = kernel_file_size - mb_kernel_text_offset;
            mb_load_size = mb_kernel_size;
        }
--- a/Show More
+++ b/Show More
@@ -1 +1 @@
 .9.0
 .9.1