hw/input/hid.c Fix capslock hid code

When ever USB keyboard is used, e.g. '-usbdevice keyboard' pressing caps lock key send 0x32 hid code, which is treated as backslash. Instead it should be 0x39 code. This affects sending uppercase keys, as they typed whith caps lock active. While on x86 this can be workarounded by using ps/2 protocol. On Power it is crusial as we don't have anything else than USB. This is fixes guest automation tasts over vnc. Signed-off-by: Dinar Valeev <dvaleev@suse.com> Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
hid: handle full ptr queues in post_load
2015-01-22 12:19:48 +01:00 · 2015-01-22 12:19:48 +01:00 · 2015-01-22 12:19:48 +01:00 · 2015-01-20 16:19:58 +00:00 · 2015-01-20 15:19:35 +00:00 · 2015-01-20 15:19:35 +00:00
880 changed files with 46819 additions and 15291 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -109,3 +109,4 @@ cscope.*
 tags
 TAGS
 *~
+/tests/qemu-iotests/common.env
--- a/2
+++ b/2
@@ -11,7 +11,7 @@ option) any later version.

 As of July 2013, contributions under version 2 of the GNU General Public
 License (and no later version) are only accepted for the following files
-or directories: bsd-user/, linux-user/, hw/misc/vfio.c, hw/xen/xen_pt*.
+or directories: bsd-user/, linux-user/, hw/vfio/, hw/xen/xen_pt*.

 3) The Tiny Code Generator (TCG) is released under the BSD license
   (see license headers in files).
--- a/128
+++ b/128
@@ -51,6 +51,7 @@ Descriptions of section entries:
 General Project Administration
 ------------------------------
 M: Anthony Liguori <aliguori@amazon.com>
+M: Peter Maydell <peter.maydell@linaro.org>

 Responsible Disclosure, Reporting Security Issues
 ------------------------------
@@ -61,11 +62,23 @@ L: secalert@redhat.com

 Guest CPU cores (TCG):
 ----------------------
+Overall
+L: qemu-devel@nongnu.org
+S: Odd fixes
+F: cpu-exec.c
+F: cputlb.c
+F: softmmu_template.h
+F: translate-all.c
+F: include/exec/cpu_ldst.h
+F: include/exec/cpu_ldst_template.h
+F: include/exec/helper*.h
+
 Alpha
 M: Richard Henderson <rth@twiddle.net>
 S: Maintained
 F: target-alpha/
 F: hw/alpha/
+F: tests/tcg/alpha/

 ARM
 M: Peter Maydell <peter.maydell@linaro.org>
@@ -79,13 +92,19 @@ M: Edgar E. Iglesias <edgar.iglesias@gmail.com>
 S: Maintained
 F: target-cris/
 F: hw/cris/
+F: tests/tcg/cris/

 LM32
 M: Michael Walle <michael@walle.cc>
 S: Maintained
 F: target-lm32/
+F: disas/lm32.c
 F: hw/lm32/
-F: hw/char/lm32_*
+F: hw/*/lm32_*
+F: hw/*/milkymist-*
+F: include/hw/char/lm32_juart.h
+F: include/hw/lm32/
+F: tests/tcg/lm32/

 M68K
 S: Orphan
@@ -100,9 +119,11 @@ F: hw/microblaze/

 MIPS
 M: Aurelien Jarno <aurelien@aurel32.net>
-S: Odd Fixes
+M: Leon Alrae <leon.alrae@imgtec.com>
+S: Maintained
 F: target-mips/
 F: hw/mips/
+F: tests/tcg/mips/

 Moxie
 M: Anthony Green <green@moxielogic.com>
@@ -114,6 +135,7 @@ M: Jia Liu <proljc@gmail.com>
 S: Maintained
 F: target-openrisc/
 F: hw/openrisc/
+F: tests/tcg/openrisc/

 PowerPC
 M: Alexander Graf <agraf@suse.de>
@@ -149,7 +171,8 @@ F: target-unicore32/
 F: hw/unicore32/

 X86
-M: qemu-devel@nongnu.org
+M: Paolo Bonzini <pbonzini@redhat.com>
+M: Richard Henderson <rth@twiddle.net>
 S: Odd Fixes
 F: target-i386/
 F: hw/i386/
@@ -160,6 +183,7 @@ W: http://wiki.osll.spb.ru/doku.php?id=etc:users:jcmvbkbc:qemu-target-xtensa
 S: Maintained
 F: target-xtensa/
 F: hw/xtensa/
+F: tests/tcg/xtensa/

 TriCore
 M: Bastian Koppelmann <kbastian@mail.uni-paderborn.de>
@@ -198,9 +222,12 @@ M: Cornelia Huck <cornelia.huck@de.ibm.com>
 M: Alexander Graf <agraf@suse.de>
 S: Maintained
 F: target-s390x/kvm.c
-F: hw/intc/s390_flic.[hc]
+F: hw/intc/s390_flic.c
+F: hw/intc/s390_flic_kvm.c
+F: include/hw/s390x/s390_flic.h

 X86
+M: Paolo Bonzini <pbonzini@redhat.com>
 M: Marcelo Tosatti <mtosatti@redhat.com>
 L: kvm@vger.kernel.org
 S: Supported
@@ -266,7 +293,7 @@ F: include/hw/arm/digic.h
 F: hw/*/digic*

 Gumstix
-M: qemu-devel@nongnu.org
+L: qemu-devel@nongnu.org
 S: Orphan
 F: hw/arm/gumstix.c

@@ -282,7 +309,7 @@ S: Maintained
 F: hw/arm/integratorcp.c

 Mainstone
-M: qemu-devel@nongnu.org
+L: qemu-devel@nongnu.org
 S: Orphan
 F: hw/arm/mainstone.c

@@ -388,7 +415,7 @@ S: Maintained
 F: hw/mips/mips_malta.c

 Mipssim
-M: qemu-devel@nongnu.org
+L: qemu-devel@nongnu.org
 S: Orphan
 F: hw/mips/mips_mipssim.c

@@ -511,6 +538,7 @@ S390 Virtio
 M: Alexander Graf <agraf@suse.de>
 S: Maintained
 F: hw/s390x/s390-*.c
+X: hw/s390x/*pci*.[hc]

 S390 Virtio-ccw
 M: Cornelia Huck <cornelia.huck@de.ibm.com>
@@ -521,6 +549,9 @@ F: hw/s390x/s390-virtio-ccw.c
 F: hw/s390x/css.[hc]
 F: hw/s390x/sclp*.[hc]
 F: hw/s390x/ipl*.[hc]
+F: hw/s390x/*pci*.[hc]
+F: include/hw/s390x/
+F: pc-bios/s390-ccw/
 T: git git://github.com/cohuck/qemu virtio-ccw-upstr

 UniCore32 Machines
@@ -558,12 +589,13 @@ Xtensa Machines
 sim
 M: Max Filippov <jcmvbkbc@gmail.com>
 S: Maintained
-F: hw/xtensa/xtensa_sim.c
+F: hw/xtensa/sim.c

-Avnet LX60
+XTFPGA (LX60, LX200, ML605, KC705)
 M: Max Filippov <jcmvbkbc@gmail.com>
 S: Maintained
-F: hw/xtensa/xtensa_lx60.c
+F: hw/xtensa/xtfpga.c
+F: hw/net/opencores_eth.c

 Devices
 -------
@@ -622,10 +654,16 @@ S: Maintained
 F: hw/usb/*
 F: tests/usb-*-test.c

+USB (serial adapter)
+M: Gerd Hoffmann <kraxel@redhat.com>
+M: Samuel Thibault <samuel.thibault@ens-lyon.org>
+S: Maintained
+F: hw/usb/dev-serial.c
+
 VFIO
 M: Alex Williamson <alex.williamson@redhat.com>
 S: Supported
-F: hw/misc/vfio.c
+F: hw/vfio/*

 vhost
 M: Michael S. Tsirkin <mst@redhat.com>
@@ -664,6 +702,14 @@ M: Amit Shah <amit.shah@redhat.com>
 S: Supported
 F: hw/char/virtio-serial-bus.c
 F: hw/char/virtio-console.c
+F: include/hw/virtio/virtio-serial.h
+
+virtio-rng
+M: Amit Shah <amit.shah@redhat.com>
+S: Supported
+F: hw/virtio/virtio-rng.c
+F: include/hw/virtio/virtio-rng.h
+F: backends/rng*.c

 nvme
 M: Keith Busch <keith.busch@intel.com>
@@ -706,18 +752,31 @@ Block
 M: Kevin Wolf <kwolf@redhat.com>
 M: Stefan Hajnoczi <stefanha@redhat.com>
 S: Supported
+F: async.c
+F: aio-*.c
 F: block*
 F: block/
 F: hw/block/
+F: migration/block*
 F: qemu-img*
 F: qemu-io*
+F: tests/image-fuzzer/
+F: tests/qemu-iotests/
 T: git git://repo.or.cz/qemu/kevin.git block
 T: git git://github.com/stefanha/qemu.git block

 Character Devices
 M: Anthony Liguori <aliguori@amazon.com>
+M: Paolo Bonzini <pbonzini@redhat.com>
 S: Maintained
 F: qemu-char.c
+F: backends/msmouse.c
+F: backends/testdev.c
+
+Character Devices (Braille)
+M: Samuel Thibault <samuel.thibault@ens-lyon.org>
+S: Maintained
+F: backends/baum.c

 CPU
 M: Andreas Färber <afaerber@suse.de>
@@ -739,7 +798,7 @@ S: Maintained
 F: device_tree.[ch]

 GDB stub
-M: qemu-devel@nongnu.org
+L: qemu-devel@nongnu.org
 S: Odd Fixes
 F: gdbstub*
 F: gdb-xml/
@@ -776,7 +835,11 @@ F: ui/cocoa.m

 Main loop
 M: Anthony Liguori <aliguori@amazon.com>
-S: Supported
+M: Paolo Bonzini <pbonzini@redhat.com>
+S: Maintained
+F: cpus.c
+F: main-loop.c
+F: qemu-timer.c
 F: vl.c

 Human Monitor (HMP)
@@ -815,6 +878,7 @@ M: Luiz Capitulino <lcapitulino@redhat.com>
 M: Michael Roth <mdroth@linux.vnet.ibm.com>
 S: Maintained
 F: qapi/
+F: tests/qapi-schema/
 T: git git://repo.or.cz/qemu/qmp-unstable.git queue/qmp

 QAPI Schema
@@ -825,6 +889,18 @@ S: Supported
 F: qapi-schema.json
 T: git git://repo.or.cz/qemu/qmp-unstable.git queue/qmp

+QObject
+M: Luiz Capitulino <lcapitulino@redhat.com>
+S: Maintained
+F: qobject/
+T: git git://repo.or.cz/qemu/qmp-unstable.git queue/qmp
+
+QEMU Guest Agent
+M: Michael Roth <mdroth@linux.vnet.ibm.com>
+S: Maintained
+F: qga/
+T: git git://github.com/mdroth/qemu.git qga
+
 QOM
 M: Anthony Liguori <aliguori@amazon.com>
 M: Andreas Färber <afaerber@suse.de>
@@ -865,6 +941,17 @@ M: Blue Swirl <blauwirbel@gmail.com>
 S: Odd Fixes
 F: scripts/checkpatch.pl

+Migration
+M: Juan Quintela <quintela@redhat.com>
+M: Amit Shah <amit.shah@redhat.com>
+S: Maintained
+F: include/migration/
+F: migration/
+F: savevm.c
+F: arch_init.c
+F: scripts/vmstate-static-checker.py
+F: tests/vmstate-static-checker-data/
+
 Seccomp
 M: Eduardo Otubo <eduardo.otubo@profitbricks.com>
 S: Supported
@@ -873,6 +960,12 @@ F: include/sysemu/seccomp.h

 Usermode Emulation
 ------------------
+Overall
+M: Riku Voipio <riku.voipio@iki.fi>
+S: Maintained
+F: thunk.c
+F: user-exec.c
+
 BSD user
 M: Blue Swirl <blauwirbel@gmail.com>
 S: Maintained
@@ -886,7 +979,6 @@ F: linux-user/
 Tiny Code Generator (TCG)
 -------------------------
 Common code
-M: qemu-devel@nongnu.org
 M: Richard Henderson <rth@twiddle.net>
 S: Maintained
 F: tcg/
@@ -903,7 +995,7 @@ S: Maintained
 F: tcg/arm/

 i386 target
-M: qemu-devel@nongnu.org
+L: qemu-devel@nongnu.org
 S: Maintained
 F: tcg/i386/

@@ -1014,7 +1106,11 @@ S: Supported
 F: block/ssh.c

 ARCHIPELAGO
-M: Chrysostomos Nanakos <cnanakos@grnet.gr>
 M: Chrysostomos Nanakos <chris@include.gr>
 S: Maintained
 F: block/archipelago.c
+
+Bootdevice
+M: Gonglei <arei.gonglei@huawei.com>
+S: Maintained
+F: bootdevice.c
--- a/4
+++ b/4
@@ -313,8 +313,8 @@ qemu-%.tar.bz2:

 distclean: clean
 	rm -f config-host.mak config-host.h* config-host.ld $(DOCS) qemu-options.texi qemu-img-cmds.texi qemu-monitor.texi
-	rm -f config-all-devices.mak config-all-disas.mak
-	rm -f po/*.mo
+	rm -f config-all-devices.mak config-all-disas.mak config.status
+	rm -f po/*.mo tests/qemu-iotests/common.env
 	rm -f roms/seabios/config.mak roms/vgabios/config.mak
 	rm -f qemu-doc.info qemu-doc.aux qemu-doc.cp qemu-doc.cps qemu-doc.dvi
 	rm -f qemu-doc.fn qemu-doc.fns qemu-doc.info qemu-doc.ky qemu-doc.kys
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -48,15 +48,9 @@ common-obj-$(CONFIG_POSIX) += os-posix.o

 common-obj-$(CONFIG_LINUX) += fsdev/

-common-obj-y += migration.o migration-tcp.o
-common-obj-y += vmstate.o
-common-obj-y += qemu-file.o
-common-obj-$(CONFIG_RDMA) += migration-rdma.o
+common-obj-y += migration/
 common-obj-y += qemu-char.o #aio.o
-common-obj-y += block-migration.o
-common-obj-y += page_cache.o xbzrle.o
-
-common-obj-$(CONFIG_POSIX) += migration-exec.o migration-unix.o migration-fd.o
+common-obj-y += page_cache.o

 common-obj-$(CONFIG_SPICE) += spice-qemu-char.o

--- a/2
+++ b/2
@@ -1 +1 @@
-2.1.50
+2.2.50
--- a/aio-posix.c
+++ b/aio-posix.c
@@ -73,7 +73,7 @@ void aio_set_fd_handler(AioContext *ctx,
    } else {
        if (node == NULL) {
            /* Alloc and insert if it's not already there */
-            node = g_malloc0(sizeof(AioHandler));
+            node = g_new0(AioHandler, 1);
            node->pfd.fd = fd;
            QLIST_INSERT_HEAD(&ctx->aio_handlers, node, node);

--- a/aio-win32.c
+++ b/aio-win32.c
@@ -67,7 +67,7 @@ void aio_set_fd_handler(AioContext *ctx,

        if (node == NULL) {
            /* Alloc and insert if it's not already there */
-            node = g_malloc0(sizeof(AioHandler));
+            node = g_new0(AioHandler, 1);
            node->pfd.fd = fd;
            QLIST_INSERT_HEAD(&ctx->aio_handlers, node, node);
        }
@@ -129,7 +129,7 @@ void aio_set_event_notifier(AioContext *ctx,
    } else {
        if (node == NULL) {
            /* Alloc and insert if it's not already there */
-            node = g_malloc0(sizeof(AioHandler));
+            node = g_new0(AioHandler, 1);
            node->e = e;
            node->pfd.fd = (uintptr_t)event_notifier_get_handle(e);
            node->pfd.events = G_IO_IN;
--- a/arch_init.c
+++ b/arch_init.c
@@ -346,7 +346,8 @@ static void xbzrle_cache_zero_page(ram_addr_t current_addr)

    /* We don't care if this fails to allocate a new cache page
     * as long as it updated an old one */
-    cache_insert(XBZRLE.cache, current_addr, ZERO_TARGET_PAGE);
+    cache_insert(XBZRLE.cache, current_addr, ZERO_TARGET_PAGE,
+                 bitmap_sync_count);
 }

 #define ENCODING_FLAG_XBZRLE 0x1
@@ -358,10 +359,11 @@ static int save_xbzrle_page(QEMUFile *f, uint8_t **current_data,
    int encoded_len = 0, bytes_sent = -1;
    uint8_t *prev_cached_page;

-    if (!cache_is_cached(XBZRLE.cache, current_addr)) {
+    if (!cache_is_cached(XBZRLE.cache, current_addr, bitmap_sync_count)) {
        acct_info.xbzrle_cache_miss++;
        if (!last_stage) {
-            if (cache_insert(XBZRLE.cache, current_addr, *current_data) == -1) {
+            if (cache_insert(XBZRLE.cache, current_addr, *current_data,
+                             bitmap_sync_count) == -1) {
                return -1;
            } else {
                /* update *current_data when the page has been
@@ -486,15 +488,23 @@ static void migration_bitmap_sync_range(ram_addr_t start, ram_addr_t length)


 /* Needs iothread lock! */
+/* Fix me: there are too many global variables used in migration process. */
+static int64_t start_time;
+static int64_t bytes_xfer_prev;
+static int64_t num_dirty_pages_period;
+
+static void migration_bitmap_sync_init(void)
+{
+    start_time = 0;
+    bytes_xfer_prev = 0;
+    num_dirty_pages_period = 0;
+}

 static void migration_bitmap_sync(void)
 {
    RAMBlock *block;
    uint64_t num_dirty_pages_init = migration_dirty_pages;
    MigrationState *s = migrate_get_current();
-    static int64_t start_time;
-    static int64_t bytes_xfer_prev;
-    static int64_t num_dirty_pages_period;
    int64_t end_time;
    int64_t bytes_xfer_now;
    static uint64_t xbzrle_cache_miss_prev;
@@ -514,7 +524,7 @@ static void migration_bitmap_sync(void)
    address_space_sync_dirty_bitmap(&address_space_memory);

    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
-        migration_bitmap_sync_range(block->mr->ram_addr, block->length);
+        migration_bitmap_sync_range(block->mr->ram_addr, block->used_length);
    }
    trace_migration_bitmap_sync_end(migration_dirty_pages
                                    - num_dirty_pages_init);
@@ -660,7 +670,7 @@ static int ram_find_and_save_block(QEMUFile *f, bool last_stage)
            offset >= last_offset) {
            break;
        }
-        if (offset >= block->length) {
+        if (offset >= block->used_length) {
            offset = 0;
            block = QTAILQ_NEXT(block, next);
            if (!block) {
@@ -719,7 +729,7 @@ uint64_t ram_bytes_total(void)
    uint64_t total = 0;

    QTAILQ_FOREACH(block, &ram_list.blocks, next)
-        total += block->length;
+        total += block->used_length;

    return total;
 }
@@ -774,6 +784,7 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
    mig_throttle_on = false;
    dirty_rate_high_cnt = 0;
    bitmap_sync_count = 0;
+    migration_bitmap_sync_init();

    if (migrate_use_xbzrle()) {
        XBZRLE_cache_lock();
@@ -822,7 +833,7 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
        uint64_t block_pages;

-        block_pages = block->length >> TARGET_PAGE_BITS;
+        block_pages = block->used_length >> TARGET_PAGE_BITS;
        migration_dirty_pages += block_pages;
    }

@@ -835,7 +846,7 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
        qemu_put_byte(f, strlen(block->idstr));
        qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
-        qemu_put_be64(f, block->length);
+        qemu_put_be64(f, block->used_length);
    }

    qemu_mutex_unlock_ramlist();
@@ -1006,7 +1017,7 @@ static inline void *host_from_stream_offset(QEMUFile *f,
    uint8_t len;

    if (flags & RAM_SAVE_FLAG_CONTINUE) {
-        if (!block) {
+        if (!block || block->max_length <= offset) {
            error_report("Ack, bad migration stream!");
            return NULL;
        }
@@ -1019,8 +1030,10 @@ static inline void *host_from_stream_offset(QEMUFile *f,
    id[len] = 0;

    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
-        if (!strncmp(id, block->idstr, sizeof(id)))
+        if (!strncmp(id, block->idstr, sizeof(id)) &&
+            block->max_length > offset) {
            return memory_region_get_ram_ptr(block->mr) + offset;
+        }
    }

    error_report("Can't find block %s!", id);
@@ -1040,8 +1053,7 @@ void ram_handle_compressed(void *host, uint8_t ch, uint64_t size)

 static int ram_load(QEMUFile *f, void *opaque, int version_id)
 {
-    ram_addr_t addr;
-    int flags, ret = 0;
+    int flags = 0, ret = 0;
    static uint64_t seq_iter;

    seq_iter++;
@@ -1050,21 +1062,24 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
        ret = -EINVAL;
    }

-    while (!ret) {
-        addr = qemu_get_be64(f);
+    while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {
+        ram_addr_t addr, total_ram_bytes;
+        void *host;
+        uint8_t ch;

+        addr = qemu_get_be64(f);
        flags = addr & ~TARGET_PAGE_MASK;
        addr &= TARGET_PAGE_MASK;

-        if (flags & RAM_SAVE_FLAG_MEM_SIZE) {
+        switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
+        case RAM_SAVE_FLAG_MEM_SIZE:
            /* Synchronize RAM block list */
-            char id[256];
-            ram_addr_t length;
-            ram_addr_t total_ram_bytes = addr;
-
-            while (total_ram_bytes) {
+            total_ram_bytes = addr;
+            while (!ret && total_ram_bytes) {
                RAMBlock *block;
                uint8_t len;
+                char id[256];
+                ram_addr_t length;

                len = qemu_get_byte(f);
                qemu_get_buffer(f, (uint8_t *)id, len);
@@ -1073,11 +1088,14 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)

                QTAILQ_FOREACH(block, &ram_list.blocks, next) {
                    if (!strncmp(id, block->idstr, sizeof(id))) {
-                        if (block->length != length) {
-                            error_report("Length mismatch: %s: 0x" RAM_ADDR_FMT
-                                         " in != 0x" RAM_ADDR_FMT, id, length,
-                                         block->length);
-                            ret =  -EINVAL;
+                        if (length != block->used_length) {
+                            Error *local_err = NULL;
+
+                            ret = qemu_ram_resize(block->offset, length, &local_err);
+                            if (local_err) {
+                                error_report("%s", error_get_pretty(local_err));
+                                error_free(local_err);
+                            }
                        }
                        break;
                    }
@@ -1088,16 +1106,11 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
                                 "accept migration", id);
                    ret = -EINVAL;
                }
-                if (ret) {
-                    break;
-                }

                total_ram_bytes -= length;
            }
-        } else if (flags & RAM_SAVE_FLAG_COMPRESS) {
-            void *host;
-            uint8_t ch;
-
+            break;
+        case RAM_SAVE_FLAG_COMPRESS:
            host = host_from_stream_offset(f, addr, flags);
            if (!host) {
                error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
@@ -1107,9 +1120,8 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)

            ch = qemu_get_byte(f);
            ram_handle_compressed(host, ch, TARGET_PAGE_SIZE);
-        } else if (flags & RAM_SAVE_FLAG_PAGE) {
-            void *host;
-
+            break;
+        case RAM_SAVE_FLAG_PAGE:
            host = host_from_stream_offset(f, addr, flags);
            if (!host) {
                error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
@@ -1118,8 +1130,9 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
            }

            qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
-        } else if (flags & RAM_SAVE_FLAG_XBZRLE) {
-            void *host = host_from_stream_offset(f, addr, flags);
+            break;
+        case RAM_SAVE_FLAG_XBZRLE:
+            host = host_from_stream_offset(f, addr, flags);
            if (!host) {
                error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
                ret = -EINVAL;
@@ -1132,17 +1145,22 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
                ret = -EINVAL;
                break;
            }
-        } else if (flags & RAM_SAVE_FLAG_HOOK) {
-            ram_control_load_hook(f, flags);
-        } else if (flags & RAM_SAVE_FLAG_EOS) {
+            break;
+        case RAM_SAVE_FLAG_EOS:
            /* normal exit */
            break;
-        } else {
-            error_report("Unknown migration flags: %#x", flags);
-            ret = -EINVAL;
-            break;
+        default:
+            if (flags & RAM_SAVE_FLAG_HOOK) {
+                ram_control_load_hook(f, flags);
+            } else {
+                error_report("Unknown combination of migration flags: %#x",
+                             flags);
+                ret = -EINVAL;
+            }
+        }
+        if (!ret) {
+            ret = qemu_file_get_error(f);
        }
-        ret = qemu_file_get_error(f);
    }

    DPRINTF("Completed load of VM with exit code %d seq iteration "
--- a/async.c
+++ b/async.c
@@ -44,10 +44,12 @@ struct QEMUBH {
 QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque)
 {
    QEMUBH *bh;
-    bh = g_malloc0(sizeof(QEMUBH));
-    bh->ctx = ctx;
-    bh->cb = cb;
-    bh->opaque = opaque;
+    bh = g_new(QEMUBH, 1);
+    *bh = (QEMUBH){
+        .ctx = ctx,
+        .cb = cb,
+        .opaque = opaque,
+    };
    qemu_mutex_lock(&ctx->bh_lock);
    bh->next = ctx->first_bh;
    /* Make sure that the members are ready before putting bh into list */
@@ -300,6 +302,7 @@ AioContext *aio_context_new(Error **errp)
        error_setg_errno(errp, -ret, "Failed to initialize event notifier");
        return NULL;
    }
+    g_source_set_can_recurse(&ctx->source, true);
    aio_set_event_notifier(ctx, &ctx->notifier,
                           (EventNotifierHandler *)
                           event_notifier_test_and_clear);
--- a/audio/audio_template.h
+++ b/audio/audio_template.h
@@ -191,9 +191,9 @@ static void glue (audio_pcm_hw_gc_, TYPE) (HW **hwp)
        audio_detach_capture (hw);
 #endif
        QLIST_REMOVE (hw, entries);
+        glue (hw->pcm_ops->fini_, TYPE) (hw);
        glue (s->nb_hw_voices_, TYPE) += 1;
        glue (audio_pcm_hw_free_resources_ ,TYPE) (hw);
-        glue (hw->pcm_ops->fini_, TYPE) (hw);
        g_free (hw);
        *hwp = NULL;
    }
--- a/backends/rng-random.c
+++ b/backends/rng-random.c
@@ -88,11 +88,7 @@ static char *rng_random_get_filename(Object *obj, Error **errp)
 {
    RndRandom *s = RNG_RANDOM(obj);

-    if (s->filename) {
-        return g_strdup(s->filename);
-    }
-
-    return NULL;
+    return g_strdup(s->filename);
 }

 static void rng_random_set_filename(Object *obj, const char *filename,
--- a/block.c
+++ b/block.c
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -5,11 +5,11 @@ block-obj-y += qed-check.o
 block-obj-$(CONFIG_VHDX) += vhdx.o vhdx-endian.o vhdx-log.o
 block-obj-$(CONFIG_QUORUM) += quorum.o
 block-obj-y += parallels.o blkdebug.o blkverify.o
-block-obj-y += snapshot.o qapi.o
+block-obj-y += block-backend.o snapshot.o qapi.o
 block-obj-$(CONFIG_WIN32) += raw-win32.o win32-aio.o
 block-obj-$(CONFIG_POSIX) += raw-posix.o
 block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
-block-obj-y += null.o
+block-obj-y += null.o mirror.o

 block-obj-y += nbd.o nbd-client.o sheepdog.o
 block-obj-$(CONFIG_LIBISCSI) += iscsi.o
@@ -23,7 +23,6 @@ block-obj-y += accounting.o

 common-obj-y += stream.o
 common-obj-y += commit.o
-common-obj-y += mirror.o
 common-obj-y += backup.o

 iscsi.o-cflags     := $(LIBISCSI_CFLAGS)
--- a/block/accounting.c
+++ b/block/accounting.c
@@ -24,6 +24,7 @@

 #include "block/accounting.h"
 #include "block/block_int.h"
+#include "qemu/timer.h"

 void block_acct_start(BlockAcctStats *stats, BlockAcctCookie *cookie,
                      int64_t bytes, enum BlockAcctType type)
@@ -31,7 +32,7 @@ void block_acct_start(BlockAcctStats *stats, BlockAcctCookie *cookie,
    assert(type < BLOCK_MAX_IOTYPE);

    cookie->bytes = bytes;
-    cookie->start_time_ns = get_clock();
+    cookie->start_time_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
    cookie->type = type;
 }

@@ -41,7 +42,8 @@ void block_acct_done(BlockAcctStats *stats, BlockAcctCookie *cookie)

    stats->nr_bytes[cookie->type] += cookie->bytes;
    stats->nr_ops[cookie->type]++;
-    stats->total_time_ns[cookie->type] += get_clock() - cookie->start_time_ns;
+    stats->total_time_ns[cookie->type] +=
+        qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - cookie->start_time_ns;
 }


--- a/block/archipelago.c
+++ b/block/archipelago.c
@@ -86,7 +86,7 @@ typedef enum {
 } ARCHIPCmd;

 typedef struct ArchipelagoAIOCB {
-    BlockDriverAIOCB common;
+    BlockAIOCB common;
    QEMUBH *bh;
    struct BDRVArchipelagoState *s;
    QEMUIOVector *qiov;
@@ -856,13 +856,13 @@ err_exit:
    return ret;
 }

-static BlockDriverAIOCB *qemu_archipelago_aio_rw(BlockDriverState *bs,
-                                                 int64_t sector_num,
-                                                 QEMUIOVector *qiov,
-                                                 int nb_sectors,
-                                                 BlockDriverCompletionFunc *cb,
-                                                 void *opaque,
-                                                 int op)
+static BlockAIOCB *qemu_archipelago_aio_rw(BlockDriverState *bs,
+                                           int64_t sector_num,
+                                           QEMUIOVector *qiov,
+                                           int nb_sectors,
+                                           BlockCompletionFunc *cb,
+                                           void *opaque,
+                                           int op)
 {
    ArchipelagoAIOCB *aio_cb;
    BDRVArchipelagoState *s = bs->opaque;
@@ -894,17 +894,17 @@ err_exit:
    return NULL;
 }

-static BlockDriverAIOCB *qemu_archipelago_aio_readv(BlockDriverState *bs,
+static BlockAIOCB *qemu_archipelago_aio_readv(BlockDriverState *bs,
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockDriverCompletionFunc *cb, void *opaque)
+        BlockCompletionFunc *cb, void *opaque)
 {
    return qemu_archipelago_aio_rw(bs, sector_num, qiov, nb_sectors, cb,
                                   opaque, ARCHIP_OP_READ);
 }

-static BlockDriverAIOCB *qemu_archipelago_aio_writev(BlockDriverState *bs,
+static BlockAIOCB *qemu_archipelago_aio_writev(BlockDriverState *bs,
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockDriverCompletionFunc *cb, void *opaque)
+        BlockCompletionFunc *cb, void *opaque)
 {
    return qemu_archipelago_aio_rw(bs, sector_num, qiov, nb_sectors, cb,
                                   opaque, ARCHIP_OP_WRITE);
@@ -1052,8 +1052,8 @@ static QemuOptsList qemu_archipelago_create_opts = {
    }
 };

-static BlockDriverAIOCB *qemu_archipelago_aio_flush(BlockDriverState *bs,
-        BlockDriverCompletionFunc *cb, void *opaque)
+static BlockAIOCB *qemu_archipelago_aio_flush(BlockDriverState *bs,
+        BlockCompletionFunc *cb, void *opaque)
 {
    return qemu_archipelago_aio_rw(bs, 0, NULL, 0, cb, opaque,
                                   ARCHIP_OP_FLUSH);
--- a/block/backup.c
+++ b/block/backup.c
@@ -227,9 +227,25 @@ static BlockErrorAction backup_error_action(BackupBlockJob *job,
    }
 }

+typedef struct {
+    int ret;
+} BackupCompleteData;
+
+static void backup_complete(BlockJob *job, void *opaque)
+{
+    BackupBlockJob *s = container_of(job, BackupBlockJob, common);
+    BackupCompleteData *data = opaque;
+
+    bdrv_unref(s->target);
+
+    block_job_completed(job, data->ret);
+    g_free(data);
+}
+
 static void coroutine_fn backup_run(void *opaque)
 {
    BackupBlockJob *job = opaque;
+    BackupCompleteData *data;
    BlockDriverState *bs = job->common.bs;
    BlockDriverState *target = job->target;
    BlockdevOnError on_target_error = job->on_target_error;
@@ -344,16 +360,18 @@ static void coroutine_fn backup_run(void *opaque)
    hbitmap_free(job->bitmap);

    bdrv_iostatus_disable(target);
-    bdrv_unref(target);
+    bdrv_op_unblock_all(target, job->common.blocker);

-    block_job_completed(&job->common, ret);
+    data = g_malloc(sizeof(*data));
+    data->ret = ret;
+    block_job_defer_to_main_loop(&job->common, backup_complete, data);
 }

 void backup_start(BlockDriverState *bs, BlockDriverState *target,
                  int64_t speed, MirrorSyncMode sync_mode,
                  BlockdevOnError on_source_error,
                  BlockdevOnError on_target_error,
-                  BlockDriverCompletionFunc *cb, void *opaque,
+                  BlockCompletionFunc *cb, void *opaque,
                  Error **errp)
 {
    int64_t len;
@@ -362,6 +380,11 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target,
    assert(target);
    assert(cb);

+    if (bs == target) {
+        error_setg(errp, "Source and target cannot be the same");
+        return;
+    }
+
    if ((on_source_error == BLOCKDEV_ON_ERROR_STOP ||
         on_source_error == BLOCKDEV_ON_ERROR_ENOSPC) &&
        !bdrv_iostatus_is_enabled(bs)) {
@@ -369,6 +392,26 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target,
        return;
    }

+    if (!bdrv_is_inserted(bs)) {
+        error_setg(errp, "Device is not inserted: %s",
+                   bdrv_get_device_name(bs));
+        return;
+    }
+
+    if (!bdrv_is_inserted(target)) {
+        error_setg(errp, "Device is not inserted: %s",
+                   bdrv_get_device_name(target));
+        return;
+    }
+
+    if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_BACKUP_SOURCE, errp)) {
+        return;
+    }
+
+    if (bdrv_op_is_blocked(target, BLOCK_OP_TYPE_BACKUP_TARGET, errp)) {
+        return;
+    }
+
    len = bdrv_getlength(bs);
    if (len < 0) {
        error_setg_errno(errp, -len, "unable to get length for '%s'",
@@ -382,6 +425,8 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target,
        return;
    }

+    bdrv_op_block_all(target, job->common.blocker);
+
    job->on_source_error = on_source_error;
    job->on_target_error = on_target_error;
    job->target = target;
--- a/block/blkdebug.c
+++ b/block/blkdebug.c
@@ -41,7 +41,7 @@ typedef struct BDRVBlkdebugState {
 } BDRVBlkdebugState;

 typedef struct BlkdebugAIOCB {
-    BlockDriverAIOCB common;
+    BlockAIOCB common;
    QEMUBH *bh;
    int ret;
 } BlkdebugAIOCB;
@@ -195,6 +195,8 @@ static const char *event_names[BLKDBG_EVENT_MAX] = {
    [BLKDBG_PWRITEV]                        = "pwritev",
    [BLKDBG_PWRITEV_ZERO]                   = "pwritev_zero",
    [BLKDBG_PWRITEV_DONE]                   = "pwritev_done",
+
+    [BLKDBG_EMPTY_IMAGE_PREPARE]            = "empty_image_prepare",
 };

 static int get_event_by_name(const char *name, BlkDebugEvent *event)
@@ -463,8 +465,8 @@ static void error_callback_bh(void *opaque)
    qemu_aio_unref(acb);
 }

-static BlockDriverAIOCB *inject_error(BlockDriverState *bs,
-    BlockDriverCompletionFunc *cb, void *opaque, BlkdebugRule *rule)
+static BlockAIOCB *inject_error(BlockDriverState *bs,
+    BlockCompletionFunc *cb, void *opaque, BlkdebugRule *rule)
 {
    BDRVBlkdebugState *s = bs->opaque;
    int error = rule->options.inject.error;
@@ -489,9 +491,9 @@ static BlockDriverAIOCB *inject_error(BlockDriverState *bs,
    return &acb->common;
 }

-static BlockDriverAIOCB *blkdebug_aio_readv(BlockDriverState *bs,
+static BlockAIOCB *blkdebug_aio_readv(BlockDriverState *bs,
    int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-    BlockDriverCompletionFunc *cb, void *opaque)
+    BlockCompletionFunc *cb, void *opaque)
 {
    BDRVBlkdebugState *s = bs->opaque;
    BlkdebugRule *rule = NULL;
@@ -511,9 +513,9 @@ static BlockDriverAIOCB *blkdebug_aio_readv(BlockDriverState *bs,
    return bdrv_aio_readv(bs->file, sector_num, qiov, nb_sectors, cb, opaque);
 }

-static BlockDriverAIOCB *blkdebug_aio_writev(BlockDriverState *bs,
+static BlockAIOCB *blkdebug_aio_writev(BlockDriverState *bs,
    int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-    BlockDriverCompletionFunc *cb, void *opaque)
+    BlockCompletionFunc *cb, void *opaque)
 {
    BDRVBlkdebugState *s = bs->opaque;
    BlkdebugRule *rule = NULL;
@@ -533,8 +535,8 @@ static BlockDriverAIOCB *blkdebug_aio_writev(BlockDriverState *bs,
    return bdrv_aio_writev(bs->file, sector_num, qiov, nb_sectors, cb, opaque);
 }

-static BlockDriverAIOCB *blkdebug_aio_flush(BlockDriverState *bs,
-    BlockDriverCompletionFunc *cb, void *opaque)
+static BlockAIOCB *blkdebug_aio_flush(BlockDriverState *bs,
+    BlockCompletionFunc *cb, void *opaque)
 {
    BDRVBlkdebugState *s = bs->opaque;
    BlkdebugRule *rule = NULL;
@@ -719,93 +721,50 @@ static int64_t blkdebug_getlength(BlockDriverState *bs)

 static void blkdebug_refresh_filename(BlockDriverState *bs)
 {
-    BDRVBlkdebugState *s = bs->opaque;
-    struct BlkdebugRule *rule;
    QDict *opts;
-    QList *inject_error_list = NULL, *set_state_list = NULL;
-    QList *suspend_list = NULL;
-    int event;
+    const QDictEntry *e;
+    bool force_json = false;

-    if (!bs->file->full_open_options) {
+    for (e = qdict_first(bs->options); e; e = qdict_next(bs->options, e)) {
+        if (strcmp(qdict_entry_key(e), "config") &&
+            strcmp(qdict_entry_key(e), "x-image") &&
+            strcmp(qdict_entry_key(e), "image") &&
+            strncmp(qdict_entry_key(e), "image.", strlen("image.")))
+        {
+            force_json = true;
+            break;
+        }
+    }
+
+    if (force_json && !bs->file->full_open_options) {
        /* The config file cannot be recreated, so creating a plain filename
         * is impossible */
        return;
    }

+    if (!force_json && bs->file->exact_filename[0]) {
+        snprintf(bs->exact_filename, sizeof(bs->exact_filename),
+                 "blkdebug:%s:%s",
+                 qdict_get_try_str(bs->options, "config") ?: "",
+                 bs->file->exact_filename);
+    }
+
    opts = qdict_new();
    qdict_put_obj(opts, "driver", QOBJECT(qstring_from_str("blkdebug")));

    QINCREF(bs->file->full_open_options);
    qdict_put_obj(opts, "image", QOBJECT(bs->file->full_open_options));

-    for (event = 0; event < BLKDBG_EVENT_MAX; event++) {
-        QLIST_FOREACH(rule, &s->rules[event], next) {
-            if (rule->action == ACTION_INJECT_ERROR) {
-                QDict *inject_error = qdict_new();
-
-                qdict_put_obj(inject_error, "event", QOBJECT(qstring_from_str(
-                              BlkdebugEvent_lookup[rule->event])));
-                qdict_put_obj(inject_error, "state",
-                              QOBJECT(qint_from_int(rule->state)));
-                qdict_put_obj(inject_error, "errno", QOBJECT(qint_from_int(
-                              rule->options.inject.error)));
-                qdict_put_obj(inject_error, "sector", QOBJECT(qint_from_int(
-                              rule->options.inject.sector)));
-                qdict_put_obj(inject_error, "once", QOBJECT(qbool_from_int(
-                              rule->options.inject.once)));
-                qdict_put_obj(inject_error, "immediately",
-                              QOBJECT(qbool_from_int(
-                              rule->options.inject.immediately)));
-
-                if (!inject_error_list) {
-                    inject_error_list = qlist_new();
-                }
-
-                qlist_append_obj(inject_error_list, QOBJECT(inject_error));
-            } else if (rule->action == ACTION_SET_STATE) {
-                QDict *set_state = qdict_new();
-
-                qdict_put_obj(set_state, "event", QOBJECT(qstring_from_str(
-                              BlkdebugEvent_lookup[rule->event])));
-                qdict_put_obj(set_state, "state",
-                              QOBJECT(qint_from_int(rule->state)));
-                qdict_put_obj(set_state, "new_state", QOBJECT(qint_from_int(
-                              rule->options.set_state.new_state)));
-
-                if (!set_state_list) {
-                    set_state_list = qlist_new();
-                }
-
-                qlist_append_obj(set_state_list, QOBJECT(set_state));
-            } else if (rule->action == ACTION_SUSPEND) {
-                QDict *suspend = qdict_new();
-
-                qdict_put_obj(suspend, "event", QOBJECT(qstring_from_str(
-                              BlkdebugEvent_lookup[rule->event])));
-                qdict_put_obj(suspend, "state",
-                              QOBJECT(qint_from_int(rule->state)));
-                qdict_put_obj(suspend, "tag", QOBJECT(qstring_from_str(
-                              rule->options.suspend.tag)));
-
-                if (!suspend_list) {
-                    suspend_list = qlist_new();
-                }
-
-                qlist_append_obj(suspend_list, QOBJECT(suspend));
-            }
+    for (e = qdict_first(bs->options); e; e = qdict_next(bs->options, e)) {
+        if (strcmp(qdict_entry_key(e), "x-image") &&
+            strcmp(qdict_entry_key(e), "image") &&
+            strncmp(qdict_entry_key(e), "image.", strlen("image.")))
+        {
+            qobject_incref(qdict_entry_value(e));
+            qdict_put_obj(opts, qdict_entry_key(e), qdict_entry_value(e));
        }
    }

-    if (inject_error_list) {
-        qdict_put_obj(opts, "inject-error", QOBJECT(inject_error_list));
-    }
-    if (set_state_list) {
-        qdict_put_obj(opts, "set-state", QOBJECT(set_state_list));
-    }
-    if (suspend_list) {
-        qdict_put_obj(opts, "suspend", QOBJECT(suspend_list));
-    }
-
    bs->full_open_options = opts;
 }

--- a/block/blkverify.c
+++ b/block/blkverify.c
@@ -19,7 +19,7 @@ typedef struct {

 typedef struct BlkverifyAIOCB BlkverifyAIOCB;
 struct BlkverifyAIOCB {
-    BlockDriverAIOCB common;
+    BlockAIOCB common;
    QEMUBH *bh;

    /* Request metadata */
@@ -165,7 +165,7 @@ static int64_t blkverify_getlength(BlockDriverState *bs)
 static BlkverifyAIOCB *blkverify_aio_get(BlockDriverState *bs, bool is_write,
                                         int64_t sector_num, QEMUIOVector *qiov,
                                         int nb_sectors,
-                                         BlockDriverCompletionFunc *cb,
+                                         BlockCompletionFunc *cb,
                                         void *opaque)
 {
    BlkverifyAIOCB *acb = qemu_aio_get(&blkverify_aiocb_info, bs, cb, opaque);
@@ -229,9 +229,9 @@ static void blkverify_verify_readv(BlkverifyAIOCB *acb)
    }
 }

-static BlockDriverAIOCB *blkverify_aio_readv(BlockDriverState *bs,
+static BlockAIOCB *blkverify_aio_readv(BlockDriverState *bs,
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockDriverCompletionFunc *cb, void *opaque)
+        BlockCompletionFunc *cb, void *opaque)
 {
    BDRVBlkverifyState *s = bs->opaque;
    BlkverifyAIOCB *acb = blkverify_aio_get(bs, false, sector_num, qiov,
@@ -249,9 +249,9 @@ static BlockDriverAIOCB *blkverify_aio_readv(BlockDriverState *bs,
    return &acb->common;
 }

-static BlockDriverAIOCB *blkverify_aio_writev(BlockDriverState *bs,
+static BlockAIOCB *blkverify_aio_writev(BlockDriverState *bs,
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockDriverCompletionFunc *cb, void *opaque)
+        BlockCompletionFunc *cb, void *opaque)
 {
    BDRVBlkverifyState *s = bs->opaque;
    BlkverifyAIOCB *acb = blkverify_aio_get(bs, true, sector_num, qiov,
@@ -264,9 +264,9 @@ static BlockDriverAIOCB *blkverify_aio_writev(BlockDriverState *bs,
    return &acb->common;
 }

-static BlockDriverAIOCB *blkverify_aio_flush(BlockDriverState *bs,
-                                             BlockDriverCompletionFunc *cb,
-                                             void *opaque)
+static BlockAIOCB *blkverify_aio_flush(BlockDriverState *bs,
+                                       BlockCompletionFunc *cb,
+                                       void *opaque)
 {
    BDRVBlkverifyState *s = bs->opaque;

--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -0,0 +1,665 @@
+/*
+ * QEMU Block backends
+ *
+ * Copyright (C) 2014 Red Hat, Inc.
+ *
+ * Authors:
+ *  Markus Armbruster <armbru@redhat.com>,
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1
+ * or later.  See the COPYING.LIB file in the top-level directory.
+ */
+
+#include "sysemu/block-backend.h"
+#include "block/block_int.h"
+#include "sysemu/blockdev.h"
+#include "qapi-event.h"
+
+/* Number of coroutines to reserve per attached device model */
+#define COROUTINE_POOL_RESERVATION 64
+
+struct BlockBackend {
+    char *name;
+    int refcnt;
+    BlockDriverState *bs;
+    DriveInfo *legacy_dinfo;    /* null unless created by drive_new() */
+    QTAILQ_ENTRY(BlockBackend) link; /* for blk_backends */
+
+    void *dev;                  /* attached device model, if any */
+    /* TODO change to DeviceState when all users are qdevified */
+    const BlockDevOps *dev_ops;
+    void *dev_opaque;
+};
+
+static void drive_info_del(DriveInfo *dinfo);
+
+/* All the BlockBackends (except for hidden ones) */
+static QTAILQ_HEAD(, BlockBackend) blk_backends =
+    QTAILQ_HEAD_INITIALIZER(blk_backends);
+
+/*
+ * Create a new BlockBackend with @name, with a reference count of one.
+ * @name must not be null or empty.
+ * Fail if a BlockBackend with this name already exists.
+ * Store an error through @errp on failure, unless it's null.
+ * Return the new BlockBackend on success, null on failure.
+ */
+BlockBackend *blk_new(const char *name, Error **errp)
+{
+    BlockBackend *blk;
+
+    assert(name && name[0]);
+    if (!id_wellformed(name)) {
+        error_setg(errp, "Invalid device name");
+        return NULL;
+    }
+    if (blk_by_name(name)) {
+        error_setg(errp, "Device with id '%s' already exists", name);
+        return NULL;
+    }
+    if (bdrv_find_node(name)) {
+        error_setg(errp,
+                   "Device name '%s' conflicts with an existing node name",
+                   name);
+        return NULL;
+    }
+
+    blk = g_new0(BlockBackend, 1);
+    blk->name = g_strdup(name);
+    blk->refcnt = 1;
+    QTAILQ_INSERT_TAIL(&blk_backends, blk, link);
+    return blk;
+}
+
+/*
+ * Create a new BlockBackend with a new BlockDriverState attached.
+ * Otherwise just like blk_new(), which see.
+ */
+BlockBackend *blk_new_with_bs(const char *name, Error **errp)
+{
+    BlockBackend *blk;
+    BlockDriverState *bs;
+
+    blk = blk_new(name, errp);
+    if (!blk) {
+        return NULL;
+    }
+
+    bs = bdrv_new_root();
+    blk->bs = bs;
+    bs->blk = blk;
+    return blk;
+}
+
+static void blk_delete(BlockBackend *blk)
+{
+    assert(!blk->refcnt);
+    assert(!blk->dev);
+    if (blk->bs) {
+        assert(blk->bs->blk == blk);
+        blk->bs->blk = NULL;
+        bdrv_unref(blk->bs);
+        blk->bs = NULL;
+    }
+    /* Avoid double-remove after blk_hide_on_behalf_of_do_drive_del() */
+    if (blk->name[0]) {
+        QTAILQ_REMOVE(&blk_backends, blk, link);
+    }
+    g_free(blk->name);
+    drive_info_del(blk->legacy_dinfo);
+    g_free(blk);
+}
+
+static void drive_info_del(DriveInfo *dinfo)
+{
+    if (!dinfo) {
+        return;
+    }
+    qemu_opts_del(dinfo->opts);
+    g_free(dinfo->serial);
+    g_free(dinfo);
+}
+
+/*
+ * Increment @blk's reference count.
+ * @blk must not be null.
+ */
+void blk_ref(BlockBackend *blk)
+{
+    blk->refcnt++;
+}
+
+/*
+ * Decrement @blk's reference count.
+ * If this drops it to zero, destroy @blk.
+ * For convenience, do nothing if @blk is null.
+ */
+void blk_unref(BlockBackend *blk)
+{
+    if (blk) {
+        assert(blk->refcnt > 0);
+        if (!--blk->refcnt) {
+            blk_delete(blk);
+        }
+    }
+}
+
+/*
+ * Return the BlockBackend after @blk.
+ * If @blk is null, return the first one.
+ * Else, return @blk's next sibling, which may be null.
+ *
+ * To iterate over all BlockBackends, do
+ * for (blk = blk_next(NULL); blk; blk = blk_next(blk)) {
+ *     ...
+ * }
+ */
+BlockBackend *blk_next(BlockBackend *blk)
+{
+    return blk ? QTAILQ_NEXT(blk, link) : QTAILQ_FIRST(&blk_backends);
+}
+
+/*
+ * Return @blk's name, a non-null string.
+ * Wart: the name is empty iff @blk has been hidden with
+ * blk_hide_on_behalf_of_do_drive_del().
+ */
+const char *blk_name(BlockBackend *blk)
+{
+    return blk->name;
+}
+
+/*
+ * Return the BlockBackend with name @name if it exists, else null.
+ * @name must not be null.
+ */
+BlockBackend *blk_by_name(const char *name)
+{
+    BlockBackend *blk;
+
+    assert(name);
+    QTAILQ_FOREACH(blk, &blk_backends, link) {
+        if (!strcmp(name, blk->name)) {
+            return blk;
+        }
+    }
+    return NULL;
+}
+
+/*
+ * Return the BlockDriverState attached to @blk if any, else null.
+ */
+BlockDriverState *blk_bs(BlockBackend *blk)
+{
+    return blk->bs;
+}
+
+/*
+ * Return @blk's DriveInfo if any, else null.
+ */
+DriveInfo *blk_legacy_dinfo(BlockBackend *blk)
+{
+    return blk->legacy_dinfo;
+}
+
+/*
+ * Set @blk's DriveInfo to @dinfo, and return it.
+ * @blk must not have a DriveInfo set already.
+ * No other BlockBackend may have the same DriveInfo set.
+ */
+DriveInfo *blk_set_legacy_dinfo(BlockBackend *blk, DriveInfo *dinfo)
+{
+    assert(!blk->legacy_dinfo);
+    return blk->legacy_dinfo = dinfo;
+}
+
+/*
+ * Return the BlockBackend with DriveInfo @dinfo.
+ * It must exist.
+ */
+BlockBackend *blk_by_legacy_dinfo(DriveInfo *dinfo)
+{
+    BlockBackend *blk;
+
+    QTAILQ_FOREACH(blk, &blk_backends, link) {
+        if (blk->legacy_dinfo == dinfo) {
+            return blk;
+        }
+    }
+    abort();
+}
+
+/*
+ * Hide @blk.
+ * @blk must not have been hidden already.
+ * Make attached BlockDriverState, if any, anonymous.
+ * Once hidden, @blk is invisible to all functions that don't receive
+ * it as argument.  For example, blk_by_name() won't return it.
+ * Strictly for use by do_drive_del().
+ * TODO get rid of it!
+ */
+void blk_hide_on_behalf_of_do_drive_del(BlockBackend *blk)
+{
+    QTAILQ_REMOVE(&blk_backends, blk, link);
+    blk->name[0] = 0;
+    if (blk->bs) {
+        bdrv_make_anon(blk->bs);
+    }
+}
+
+/*
+ * Attach device model @dev to @blk.
+ * Return 0 on success, -EBUSY when a device model is attached already.
+ */
+int blk_attach_dev(BlockBackend *blk, void *dev)
+/* TODO change to DeviceState *dev when all users are qdevified */
+{
+    if (blk->dev) {
+        return -EBUSY;
+    }
+    blk_ref(blk);
+    blk->dev = dev;
+    bdrv_iostatus_reset(blk->bs);
+    return 0;
+}
+
+/*
+ * Attach device model @dev to @blk.
+ * @blk must not have a device model attached already.
+ * TODO qdevified devices don't use this, remove when devices are qdevified
+ */
+void blk_attach_dev_nofail(BlockBackend *blk, void *dev)
+{
+    if (blk_attach_dev(blk, dev) < 0) {
+        abort();
+    }
+}
+
+/*
+ * Detach device model @dev from @blk.
+ * @dev must be currently attached to @blk.
+ */
+void blk_detach_dev(BlockBackend *blk, void *dev)
+/* TODO change to DeviceState *dev when all users are qdevified */
+{
+    assert(blk->dev == dev);
+    blk->dev = NULL;
+    blk->dev_ops = NULL;
+    blk->dev_opaque = NULL;
+    bdrv_set_guest_block_size(blk->bs, 512);
+    blk_unref(blk);
+}
+
+/*
+ * Return the device model attached to @blk if any, else null.
+ */
+void *blk_get_attached_dev(BlockBackend *blk)
+/* TODO change to return DeviceState * when all users are qdevified */
+{
+    return blk->dev;
+}
+
+/*
+ * Set @blk's device model callbacks to @ops.
+ * @opaque is the opaque argument to pass to the callbacks.
+ * This is for use by device models.
+ */
+void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops,
+                     void *opaque)
+{
+    blk->dev_ops = ops;
+    blk->dev_opaque = opaque;
+}
+
+/*
+ * Notify @blk's attached device model of media change.
+ * If @load is true, notify of media load.
+ * Else, notify of media eject.
+ * Also send DEVICE_TRAY_MOVED events as appropriate.
+ */
+void blk_dev_change_media_cb(BlockBackend *blk, bool load)
+{
+    if (blk->dev_ops && blk->dev_ops->change_media_cb) {
+        bool tray_was_closed = !blk_dev_is_tray_open(blk);
+
+        blk->dev_ops->change_media_cb(blk->dev_opaque, load);
+        if (tray_was_closed) {
+            /* tray open */
+            qapi_event_send_device_tray_moved(blk_name(blk),
+                                              true, &error_abort);
+        }
+        if (load) {
+            /* tray close */
+            qapi_event_send_device_tray_moved(blk_name(blk),
+                                              false, &error_abort);
+        }
+    }
+}
+
+/*
+ * Does @blk's attached device model have removable media?
+ * %true if no device model is attached.
+ */
+bool blk_dev_has_removable_media(BlockBackend *blk)
+{
+    return !blk->dev || (blk->dev_ops && blk->dev_ops->change_media_cb);
+}
+
+/*
+ * Notify @blk's attached device model of a media eject request.
+ * If @force is true, the medium is about to be yanked out forcefully.
+ */
+void blk_dev_eject_request(BlockBackend *blk, bool force)
+{
+    if (blk->dev_ops && blk->dev_ops->eject_request_cb) {
+        blk->dev_ops->eject_request_cb(blk->dev_opaque, force);
+    }
+}
+
+/*
+ * Does @blk's attached device model have a tray, and is it open?
+ */
+bool blk_dev_is_tray_open(BlockBackend *blk)
+{
+    if (blk->dev_ops && blk->dev_ops->is_tray_open) {
+        return blk->dev_ops->is_tray_open(blk->dev_opaque);
+    }
+    return false;
+}
+
+/*
+ * Does @blk's attached device model have the medium locked?
+ * %false if the device model has no such lock.
+ */
+bool blk_dev_is_medium_locked(BlockBackend *blk)
+{
+    if (blk->dev_ops && blk->dev_ops->is_medium_locked) {
+        return blk->dev_ops->is_medium_locked(blk->dev_opaque);
+    }
+    return false;
+}
+
+/*
+ * Notify @blk's attached device model of a backend size change.
+ */
+void blk_dev_resize_cb(BlockBackend *blk)
+{
+    if (blk->dev_ops && blk->dev_ops->resize_cb) {
+        blk->dev_ops->resize_cb(blk->dev_opaque);
+    }
+}
+
+void blk_iostatus_enable(BlockBackend *blk)
+{
+    bdrv_iostatus_enable(blk->bs);
+}
+
+int blk_read(BlockBackend *blk, int64_t sector_num, uint8_t *buf,
+             int nb_sectors)
+{
+    return bdrv_read(blk->bs, sector_num, buf, nb_sectors);
+}
+
+int blk_read_unthrottled(BlockBackend *blk, int64_t sector_num, uint8_t *buf,
+                         int nb_sectors)
+{
+    return bdrv_read_unthrottled(blk->bs, sector_num, buf, nb_sectors);
+}
+
+int blk_write(BlockBackend *blk, int64_t sector_num, const uint8_t *buf,
+              int nb_sectors)
+{
+    return bdrv_write(blk->bs, sector_num, buf, nb_sectors);
+}
+
+BlockAIOCB *blk_aio_write_zeroes(BlockBackend *blk, int64_t sector_num,
+                                 int nb_sectors, BdrvRequestFlags flags,
+                                 BlockCompletionFunc *cb, void *opaque)
+{
+    return bdrv_aio_write_zeroes(blk->bs, sector_num, nb_sectors, flags,
+                                 cb, opaque);
+}
+
+int blk_pread(BlockBackend *blk, int64_t offset, void *buf, int count)
+{
+    return bdrv_pread(blk->bs, offset, buf, count);
+}
+
+int blk_pwrite(BlockBackend *blk, int64_t offset, const void *buf, int count)
+{
+    return bdrv_pwrite(blk->bs, offset, buf, count);
+}
+
+int64_t blk_getlength(BlockBackend *blk)
+{
+    return bdrv_getlength(blk->bs);
+}
+
+void blk_get_geometry(BlockBackend *blk, uint64_t *nb_sectors_ptr)
+{
+    bdrv_get_geometry(blk->bs, nb_sectors_ptr);
+}
+
+BlockAIOCB *blk_aio_readv(BlockBackend *blk, int64_t sector_num,
+                          QEMUIOVector *iov, int nb_sectors,
+                          BlockCompletionFunc *cb, void *opaque)
+{
+    return bdrv_aio_readv(blk->bs, sector_num, iov, nb_sectors, cb, opaque);
+}
+
+BlockAIOCB *blk_aio_writev(BlockBackend *blk, int64_t sector_num,
+                           QEMUIOVector *iov, int nb_sectors,
+                           BlockCompletionFunc *cb, void *opaque)
+{
+    return bdrv_aio_writev(blk->bs, sector_num, iov, nb_sectors, cb, opaque);
+}
+
+BlockAIOCB *blk_aio_flush(BlockBackend *blk,
+                          BlockCompletionFunc *cb, void *opaque)
+{
+    return bdrv_aio_flush(blk->bs, cb, opaque);
+}
+
+BlockAIOCB *blk_aio_discard(BlockBackend *blk,
+                            int64_t sector_num, int nb_sectors,
+                            BlockCompletionFunc *cb, void *opaque)
+{
+    return bdrv_aio_discard(blk->bs, sector_num, nb_sectors, cb, opaque);
+}
+
+void blk_aio_cancel(BlockAIOCB *acb)
+{
+    bdrv_aio_cancel(acb);
+}
+
+void blk_aio_cancel_async(BlockAIOCB *acb)
+{
+    bdrv_aio_cancel_async(acb);
+}
+
+int blk_aio_multiwrite(BlockBackend *blk, BlockRequest *reqs, int num_reqs)
+{
+    return bdrv_aio_multiwrite(blk->bs, reqs, num_reqs);
+}
+
+int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
+{
+    return bdrv_ioctl(blk->bs, req, buf);
+}
+
+BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf,
+                          BlockCompletionFunc *cb, void *opaque)
+{
+    return bdrv_aio_ioctl(blk->bs, req, buf, cb, opaque);
+}
+
+int blk_co_discard(BlockBackend *blk, int64_t sector_num, int nb_sectors)
+{
+    return bdrv_co_discard(blk->bs, sector_num, nb_sectors);
+}
+
+int blk_co_flush(BlockBackend *blk)
+{
+    return bdrv_co_flush(blk->bs);
+}
+
+int blk_flush(BlockBackend *blk)
+{
+    return bdrv_flush(blk->bs);
+}
+
+int blk_flush_all(void)
+{
+    return bdrv_flush_all();
+}
+
+void blk_drain_all(void)
+{
+    bdrv_drain_all();
+}
+
+BlockdevOnError blk_get_on_error(BlockBackend *blk, bool is_read)
+{
+    return bdrv_get_on_error(blk->bs, is_read);
+}
+
+BlockErrorAction blk_get_error_action(BlockBackend *blk, bool is_read,
+                                      int error)
+{
+    return bdrv_get_error_action(blk->bs, is_read, error);
+}
+
+void blk_error_action(BlockBackend *blk, BlockErrorAction action,
+                      bool is_read, int error)
+{
+    bdrv_error_action(blk->bs, action, is_read, error);
+}
+
+int blk_is_read_only(BlockBackend *blk)
+{
+    return bdrv_is_read_only(blk->bs);
+}
+
+int blk_is_sg(BlockBackend *blk)
+{
+    return bdrv_is_sg(blk->bs);
+}
+
+int blk_enable_write_cache(BlockBackend *blk)
+{
+    return bdrv_enable_write_cache(blk->bs);
+}
+
+void blk_set_enable_write_cache(BlockBackend *blk, bool wce)
+{
+    bdrv_set_enable_write_cache(blk->bs, wce);
+}
+
+void blk_invalidate_cache(BlockBackend *blk, Error **errp)
+{
+    bdrv_invalidate_cache(blk->bs, errp);
+}
+
+int blk_is_inserted(BlockBackend *blk)
+{
+    return bdrv_is_inserted(blk->bs);
+}
+
+void blk_lock_medium(BlockBackend *blk, bool locked)
+{
+    bdrv_lock_medium(blk->bs, locked);
+}
+
+void blk_eject(BlockBackend *blk, bool eject_flag)
+{
+    bdrv_eject(blk->bs, eject_flag);
+}
+
+int blk_get_flags(BlockBackend *blk)
+{
+    return bdrv_get_flags(blk->bs);
+}
+
+void blk_set_guest_block_size(BlockBackend *blk, int align)
+{
+    bdrv_set_guest_block_size(blk->bs, align);
+}
+
+void *blk_blockalign(BlockBackend *blk, size_t size)
+{
+    return qemu_blockalign(blk ? blk->bs : NULL, size);
+}
+
+bool blk_op_is_blocked(BlockBackend *blk, BlockOpType op, Error **errp)
+{
+    return bdrv_op_is_blocked(blk->bs, op, errp);
+}
+
+void blk_op_unblock(BlockBackend *blk, BlockOpType op, Error *reason)
+{
+    bdrv_op_unblock(blk->bs, op, reason);
+}
+
+void blk_op_block_all(BlockBackend *blk, Error *reason)
+{
+    bdrv_op_block_all(blk->bs, reason);
+}
+
+void blk_op_unblock_all(BlockBackend *blk, Error *reason)
+{
+    bdrv_op_unblock_all(blk->bs, reason);
+}
+
+AioContext *blk_get_aio_context(BlockBackend *blk)
+{
+    return bdrv_get_aio_context(blk->bs);
+}
+
+void blk_set_aio_context(BlockBackend *blk, AioContext *new_context)
+{
+    bdrv_set_aio_context(blk->bs, new_context);
+}
+
+void blk_add_aio_context_notifier(BlockBackend *blk,
+        void (*attached_aio_context)(AioContext *new_context, void *opaque),
+        void (*detach_aio_context)(void *opaque), void *opaque)
+{
+    bdrv_add_aio_context_notifier(blk->bs, attached_aio_context,
+                                  detach_aio_context, opaque);
+}
+
+void blk_remove_aio_context_notifier(BlockBackend *blk,
+                                     void (*attached_aio_context)(AioContext *,
+                                                                  void *),
+                                     void (*detach_aio_context)(void *),
+                                     void *opaque)
+{
+    bdrv_remove_aio_context_notifier(blk->bs, attached_aio_context,
+                                     detach_aio_context, opaque);
+}
+
+void blk_add_close_notifier(BlockBackend *blk, Notifier *notify)
+{
+    bdrv_add_close_notifier(blk->bs, notify);
+}
+
+void blk_io_plug(BlockBackend *blk)
+{
+    bdrv_io_plug(blk->bs);
+}
+
+void blk_io_unplug(BlockBackend *blk)
+{
+    bdrv_io_unplug(blk->bs);
+}
+
+BlockAcctStats *blk_get_stats(BlockBackend *blk)
+{
+    return bdrv_get_stats(blk->bs);
+}
+
+void *blk_aio_get(const AIOCBInfo *aiocb_info, BlockBackend *blk,
+                  BlockCompletionFunc *cb, void *opaque)
+{
+    return qemu_aio_get(aiocb_info, blk_bs(blk), cb, opaque);
+}
--- a/block/commit.c
+++ b/block/commit.c
@@ -60,17 +60,50 @@ static int coroutine_fn commit_populate(BlockDriverState *bs,
    return 0;
 }

-static void coroutine_fn commit_run(void *opaque)
+typedef struct {
+    int ret;
+} CommitCompleteData;
+
+static void commit_complete(BlockJob *job, void *opaque)
 {
-    CommitBlockJob *s = opaque;
+    CommitBlockJob *s = container_of(job, CommitBlockJob, common);
+    CommitCompleteData *data = opaque;
    BlockDriverState *active = s->active;
    BlockDriverState *top = s->top;
    BlockDriverState *base = s->base;
    BlockDriverState *overlay_bs;
+    int ret = data->ret;
+
+    if (!block_job_is_cancelled(&s->common) && ret == 0) {
+        /* success */
+        ret = bdrv_drop_intermediate(active, top, base, s->backing_file_str);
+    }
+
+    /* restore base open flags here if appropriate (e.g., change the base back
+     * to r/o). These reopens do not need to be atomic, since we won't abort
+     * even on failure here */
+    if (s->base_flags != bdrv_get_flags(base)) {
+        bdrv_reopen(base, s->base_flags, NULL);
+    }
+    overlay_bs = bdrv_find_overlay(active, top);
+    if (overlay_bs && s->orig_overlay_flags != bdrv_get_flags(overlay_bs)) {
+        bdrv_reopen(overlay_bs, s->orig_overlay_flags, NULL);
+    }
+    g_free(s->backing_file_str);
+    block_job_completed(&s->common, ret);
+    g_free(data);
+}
+
+static void coroutine_fn commit_run(void *opaque)
+{
+    CommitBlockJob *s = opaque;
+    CommitCompleteData *data;
+    BlockDriverState *top = s->top;
+    BlockDriverState *base = s->base;
    int64_t sector_num, end;
    int ret = 0;
    int n = 0;
-    void *buf;
+    void *buf = NULL;
    int bytes_written = 0;
    int64_t base_len;

@@ -78,18 +111,18 @@ static void coroutine_fn commit_run(void *opaque)


    if (s->common.len < 0) {
-        goto exit_restore_reopen;
+        goto out;
    }

    ret = base_len = bdrv_getlength(base);
    if (base_len < 0) {
-        goto exit_restore_reopen;
+        goto out;
    }

    if (base_len < s->common.len) {
        ret = bdrv_truncate(base, s->common.len);
        if (ret) {
-            goto exit_restore_reopen;
+            goto out;
        }
    }

@@ -128,7 +161,7 @@ wait:
            if (s->on_error == BLOCKDEV_ON_ERROR_STOP ||
                s->on_error == BLOCKDEV_ON_ERROR_REPORT||
                (s->on_error == BLOCKDEV_ON_ERROR_ENOSPC && ret == -ENOSPC)) {
-                goto exit_free_buf;
+                goto out;
            } else {
                n = 0;
                continue;
@@ -140,27 +173,12 @@ wait:

    ret = 0;

-    if (!block_job_is_cancelled(&s->common) && sector_num == end) {
-        /* success */
-        ret = bdrv_drop_intermediate(active, top, base, s->backing_file_str);
-    }
-
-exit_free_buf:
+out:
    qemu_vfree(buf);

-exit_restore_reopen:
-    /* restore base open flags here if appropriate (e.g., change the base back
-     * to r/o). These reopens do not need to be atomic, since we won't abort
-     * even on failure here */
-    if (s->base_flags != bdrv_get_flags(base)) {
-        bdrv_reopen(base, s->base_flags, NULL);
-    }
-    overlay_bs = bdrv_find_overlay(active, top);
-    if (overlay_bs && s->orig_overlay_flags != bdrv_get_flags(overlay_bs)) {
-        bdrv_reopen(overlay_bs, s->orig_overlay_flags, NULL);
-    }
-    g_free(s->backing_file_str);
-    block_job_completed(&s->common, ret);
+    data = g_malloc(sizeof(*data));
+    data->ret = ret;
+    block_job_defer_to_main_loop(&s->common, commit_complete, data);
 }

 static void commit_set_speed(BlockJob *job, int64_t speed, Error **errp)
@@ -182,7 +200,7 @@ static const BlockJobDriver commit_job_driver = {

 void commit_start(BlockDriverState *bs, BlockDriverState *base,
                  BlockDriverState *top, int64_t speed,
-                  BlockdevOnError on_error, BlockDriverCompletionFunc *cb,
+                  BlockdevOnError on_error, BlockCompletionFunc *cb,
                  void *opaque, const char *backing_file_str, Error **errp)
 {
    CommitBlockJob *s;
--- a/block/curl.c
+++ b/block/curl.c
@@ -64,6 +64,7 @@ static CURLMcode __curl_multi_socket_action(CURLM *multi_handle,
 #define SECTOR_SIZE     512
 #define READ_AHEAD_DEFAULT (256 * 1024)
 #define CURL_TIMEOUT_DEFAULT 5
+#define CURL_TIMEOUT_MAX 10000

 #define FIND_RET_NONE   0
 #define FIND_RET_OK     1
@@ -78,7 +79,7 @@ static CURLMcode __curl_multi_socket_action(CURLM *multi_handle,
 struct BDRVCURLState;

 typedef struct CURLAIOCB {
-    BlockDriverAIOCB common;
+    BlockAIOCB common;
    QEMUBH *bh;
    QEMUIOVector *qiov;

@@ -112,7 +113,7 @@ typedef struct BDRVCURLState {
    char *url;
    size_t readahead_size;
    bool sslverify;
-    int timeout;
+    uint64_t timeout;
    char *cookie;
    bool accept_range;
    AioContext *aio_context;
@@ -390,7 +391,7 @@ static CURLState *curl_init_state(BlockDriverState *bs, BDRVCURLState *s)
        if (s->cookie) {
            curl_easy_setopt(state->curl, CURLOPT_COOKIE, s->cookie);
        }
-        curl_easy_setopt(state->curl, CURLOPT_TIMEOUT, s->timeout);
+        curl_easy_setopt(state->curl, CURLOPT_TIMEOUT, (long)s->timeout);
        curl_easy_setopt(state->curl, CURLOPT_WRITEFUNCTION,
                         (void *)curl_read_cb);
        curl_easy_setopt(state->curl, CURLOPT_WRITEDATA, (void *)state);
@@ -546,6 +547,10 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags,

    s->timeout = qemu_opt_get_number(opts, CURL_BLOCK_OPT_TIMEOUT,
                                     CURL_TIMEOUT_DEFAULT);
+    if (s->timeout > CURL_TIMEOUT_MAX) {
+        error_setg(errp, "timeout parameter is too large or negative");
+        goto out_noclean;
+    }

    s->sslverify = qemu_opt_get_bool(opts, CURL_BLOCK_OPT_SSLVERIFY, true);

@@ -680,9 +685,9 @@ static void curl_readv_bh_cb(void *p)
    curl_multi_socket_action(s->multi, CURL_SOCKET_TIMEOUT, 0, &running);
 }

-static BlockDriverAIOCB *curl_aio_readv(BlockDriverState *bs,
+static BlockAIOCB *curl_aio_readv(BlockDriverState *bs,
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockDriverCompletionFunc *cb, void *opaque)
+        BlockCompletionFunc *cb, void *opaque)
 {
    CURLAIOCB *acb;

--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -80,7 +80,7 @@ typedef struct IscsiTask {
 } IscsiTask;

 typedef struct IscsiAIOCB {
-    BlockDriverAIOCB common;
+    BlockAIOCB common;
    QEMUIOVector *qiov;
    QEMUBH *bh;
    IscsiLun *iscsilun;
@@ -227,7 +227,7 @@ iscsi_abort_task_cb(struct iscsi_context *iscsi, int status, void *command_data,
 }

 static void
-iscsi_aio_cancel(BlockDriverAIOCB *blockacb)
+iscsi_aio_cancel(BlockAIOCB *blockacb)
 {
    IscsiAIOCB *acb = (IscsiAIOCB *)blockacb;
    IscsiLun *iscsilun = acb->iscsilun;
@@ -362,6 +362,12 @@ static int coroutine_fn iscsi_co_writev(BlockDriverState *bs,
        return -EINVAL;
    }

+    if (bs->bl.max_transfer_length && nb_sectors > bs->bl.max_transfer_length) {
+        error_report("iSCSI Error: Write of %d sectors exceeds max_xfer_len "
+                     "of %d sectors", nb_sectors, bs->bl.max_transfer_length);
+        return -EINVAL;
+    }
+
    lba = sector_qemu2lun(sector_num, iscsilun);
    num_sectors = sector_qemu2lun(nb_sectors, iscsilun);
    iscsi_co_init_iscsitask(iscsilun, &iTask);
@@ -529,6 +535,12 @@ static int coroutine_fn iscsi_co_readv(BlockDriverState *bs,
        return -EINVAL;
    }

+    if (bs->bl.max_transfer_length && nb_sectors > bs->bl.max_transfer_length) {
+        error_report("iSCSI Error: Read of %d sectors exceeds max_xfer_len "
+                     "of %d sectors", nb_sectors, bs->bl.max_transfer_length);
+        return -EINVAL;
+    }
+
    if (iscsilun->lbprz && nb_sectors >= ISCSI_CHECKALLOC_THRES &&
        !iscsi_allocationmap_is_allocated(iscsilun, sector_num, nb_sectors)) {
        int64_t ret;
@@ -663,9 +675,9 @@ iscsi_aio_ioctl_cb(struct iscsi_context *iscsi, int status,
    iscsi_schedule_bh(acb);
 }

-static BlockDriverAIOCB *iscsi_aio_ioctl(BlockDriverState *bs,
+static BlockAIOCB *iscsi_aio_ioctl(BlockDriverState *bs,
        unsigned long int req, void *buf,
-        BlockDriverCompletionFunc *cb, void *opaque)
+        BlockCompletionFunc *cb, void *opaque)
 {
    IscsiLun *iscsilun = bs->opaque;
    struct iscsi_context *iscsi = iscsilun->iscsi;
@@ -1219,6 +1231,40 @@ static void iscsi_attach_aio_context(BlockDriverState *bs,
              qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL);
 }

+static bool iscsi_is_write_protected(IscsiLun *iscsilun)
+{
+    struct scsi_task *task;
+    struct scsi_mode_sense *ms = NULL;
+    bool wrprotected = false;
+
+    task = iscsi_modesense6_sync(iscsilun->iscsi, iscsilun->lun,
+                                 1, SCSI_MODESENSE_PC_CURRENT,
+                                 0x3F, 0, 255);
+    if (task == NULL) {
+        error_report("iSCSI: Failed to send MODE_SENSE(6) command: %s",
+                     iscsi_get_error(iscsilun->iscsi));
+        goto out;
+    }
+
+    if (task->status != SCSI_STATUS_GOOD) {
+        error_report("iSCSI: Failed MODE_SENSE(6), LUN assumed writable");
+        goto out;
+    }
+    ms = scsi_datain_unmarshall(task);
+    if (!ms) {
+        error_report("iSCSI: Failed to unmarshall MODE_SENSE(6) data: %s",
+                     iscsi_get_error(iscsilun->iscsi));
+        goto out;
+    }
+    wrprotected = ms->device_specific_parameter & 0x80;
+
+out:
+    if (task) {
+        scsi_free_scsi_task(task);
+    }
+    return wrprotected;
+}
+
 /*
 * We support iscsi url's on the form
 * iscsi://[<username>%<password>@]<host>[:<port>]/<targetname>/<lun>
@@ -1240,7 +1286,7 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
    QemuOpts *opts;
    Error *local_err = NULL;
    const char *filename;
-    int i, ret;
+    int i, ret = 0;

    if ((BDRV_SECTOR_SIZE % 512) != 0) {
        error_setg(errp, "iSCSI: Invalid BDRV_SECTOR_SIZE. "
@@ -1339,6 +1385,14 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
    scsi_free_scsi_task(task);
    task = NULL;

+    /* Check the write protect flag of the LUN if we want to write */
+    if (iscsilun->type == TYPE_DISK && (flags & BDRV_O_RDWR) &&
+        iscsi_is_write_protected(iscsilun)) {
+        error_setg(errp, "Cannot open a write protected LUN as read-write");
+        ret = -EACCES;
+        goto out;
+    }
+
    iscsi_readcapacity_sync(iscsilun, &local_err);
    if (local_err != NULL) {
        error_propagate(errp, local_err);
@@ -1447,31 +1501,44 @@ static void iscsi_close(BlockDriverState *bs)
    memset(iscsilun, 0, sizeof(IscsiLun));
 }

+static int sector_limits_lun2qemu(int64_t sector, IscsiLun *iscsilun)
+{
+    return MIN(sector_lun2qemu(sector, iscsilun), INT_MAX / 2 + 1);
+}
+
 static void iscsi_refresh_limits(BlockDriverState *bs, Error **errp)
 {
-    IscsiLun *iscsilun = bs->opaque;
-
    /* We don't actually refresh here, but just return data queried in
     * iscsi_open(): iscsi targets don't change their limits. */
+
+    IscsiLun *iscsilun = bs->opaque;
+    uint32_t max_xfer_len = iscsilun->use_16_for_rw ? 0xffffffff : 0xffff;
+
+    if (iscsilun->bl.max_xfer_len) {
+        max_xfer_len = MIN(max_xfer_len, iscsilun->bl.max_xfer_len);
+    }
+
+    bs->bl.max_transfer_length = sector_limits_lun2qemu(max_xfer_len, iscsilun);
+
    if (iscsilun->lbp.lbpu) {
        if (iscsilun->bl.max_unmap < 0xffffffff) {
-            bs->bl.max_discard = sector_lun2qemu(iscsilun->bl.max_unmap,
-                                                 iscsilun);
+            bs->bl.max_discard =
+                sector_limits_lun2qemu(iscsilun->bl.max_unmap, iscsilun);
        }
-        bs->bl.discard_alignment = sector_lun2qemu(iscsilun->bl.opt_unmap_gran,
-                                                   iscsilun);
+        bs->bl.discard_alignment =
+            sector_limits_lun2qemu(iscsilun->bl.opt_unmap_gran, iscsilun);
    }

    if (iscsilun->bl.max_ws_len < 0xffffffff) {
-        bs->bl.max_write_zeroes = sector_lun2qemu(iscsilun->bl.max_ws_len,
-                                                  iscsilun);
+        bs->bl.max_write_zeroes =
+            sector_limits_lun2qemu(iscsilun->bl.max_ws_len, iscsilun);
    }
    if (iscsilun->lbp.lbpws) {
-        bs->bl.write_zeroes_alignment = sector_lun2qemu(iscsilun->bl.opt_unmap_gran,
-                                                        iscsilun);
+        bs->bl.write_zeroes_alignment =
+            sector_limits_lun2qemu(iscsilun->bl.opt_unmap_gran, iscsilun);
    }
-    bs->bl.opt_transfer_length = sector_lun2qemu(iscsilun->bl.opt_xfer_len,
-                                                 iscsilun);
+    bs->bl.opt_transfer_length =
+        sector_limits_lun2qemu(iscsilun->bl.opt_xfer_len, iscsilun);
 }

 /* Since iscsi_open() ignores bdrv_flags, there is nothing to do here in
@@ -1519,7 +1586,7 @@ static int iscsi_create(const char *filename, QemuOpts *opts, Error **errp)
    IscsiLun *iscsilun = NULL;
    QDict *bs_options;

-    bs = bdrv_new("", &error_abort);
+    bs = bdrv_new();

    /* Read out options */
    total_size = DIV_ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
--- a/block/linux-aio.c
+++ b/block/linux-aio.c
@@ -28,21 +28,21 @@
 #define MAX_QUEUED_IO  128

 struct qemu_laiocb {
-    BlockDriverAIOCB common;
+    BlockAIOCB common;
    struct qemu_laio_state *ctx;
    struct iocb iocb;
    ssize_t ret;
    size_t nbytes;
    QEMUIOVector *qiov;
    bool is_read;
-    QLIST_ENTRY(qemu_laiocb) node;
+    QSIMPLEQ_ENTRY(qemu_laiocb) next;
 };

 typedef struct {
-    struct iocb *iocbs[MAX_QUEUED_IO];
    int plugged;
-    unsigned int size;
-    unsigned int idx;
+    unsigned int n;
+    bool blocked;
+    QSIMPLEQ_HEAD(, qemu_laiocb) pending;
 } LaioQueue;

 struct qemu_laio_state {
@@ -59,6 +59,8 @@ struct qemu_laio_state {
    int event_max;
 };

+static void ioq_submit(struct qemu_laio_state *s);
+
 static inline ssize_t io_event_ret(struct io_event *ev)
 {
    return (ssize_t)(((uint64_t)ev->res2 << 32) | ev->res);
@@ -135,6 +137,10 @@ static void qemu_laio_completion_bh(void *opaque)

        qemu_laio_process_completion(s, laiocb);
    }
+
+    if (!s->io_q.plugged && !QSIMPLEQ_EMPTY(&s->io_q.pending)) {
+        ioq_submit(s);
+    }
 }

 static void qemu_laio_completion_cb(EventNotifier *e)
@@ -146,7 +152,7 @@ static void qemu_laio_completion_cb(EventNotifier *e)
    }
 }

-static void laio_cancel(BlockDriverAIOCB *blockacb)
+static void laio_cancel(BlockAIOCB *blockacb)
 {
    struct qemu_laiocb *laiocb = (struct qemu_laiocb *)blockacb;
    struct io_event event;
@@ -172,50 +178,41 @@ static const AIOCBInfo laio_aiocb_info = {

 static void ioq_init(LaioQueue *io_q)
 {
-    io_q->size = MAX_QUEUED_IO;
-    io_q->idx = 0;
+    QSIMPLEQ_INIT(&io_q->pending);
    io_q->plugged = 0;
+    io_q->n = 0;
+    io_q->blocked = false;
 }

-static int ioq_submit(struct qemu_laio_state *s)
+static void ioq_submit(struct qemu_laio_state *s)
 {
-    int ret, i = 0;
-    int len = s->io_q.idx;
+    int ret, len;
+    struct qemu_laiocb *aiocb;
+    struct iocb *iocbs[MAX_QUEUED_IO];
+    QSIMPLEQ_HEAD(, qemu_laiocb) completed;

    do {
-        ret = io_submit(s->ctx, len, s->io_q.iocbs);
-    } while (i++ < 3 && ret == -EAGAIN);
+        len = 0;
+        QSIMPLEQ_FOREACH(aiocb, &s->io_q.pending, next) {
+            iocbs[len++] = &aiocb->iocb;
+            if (len == MAX_QUEUED_IO) {
+                break;
+            }
+        }

-    /* empty io queue */
-    s->io_q.idx = 0;
+        ret = io_submit(s->ctx, len, iocbs);
+        if (ret == -EAGAIN) {
+            break;
+        }
+        if (ret < 0) {
+            abort();
+        }

-    if (ret < 0) {
-        i = 0;
-    } else {
-        i = ret;
-    }
-
-    for (; i < len; i++) {
-        struct qemu_laiocb *laiocb =
-            container_of(s->io_q.iocbs[i], struct qemu_laiocb, iocb);
-
-        laiocb->ret = (ret < 0) ? ret : -EIO;
-        qemu_laio_process_completion(s, laiocb);
-    }
-    return ret;
-}
-
-static void ioq_enqueue(struct qemu_laio_state *s, struct iocb *iocb)
-{
-    unsigned int idx = s->io_q.idx;
-
-    s->io_q.iocbs[idx++] = iocb;
-    s->io_q.idx = idx;
-
-    /* submit immediately if queue is full */
-    if (idx == s->io_q.size) {
-        ioq_submit(s);
-    }
+        s->io_q.n -= ret;
+        aiocb = container_of(iocbs[ret - 1], struct qemu_laiocb, iocb);
+        QSIMPLEQ_SPLIT_AFTER(&s->io_q.pending, aiocb, next, &completed);
+    } while (ret == len && !QSIMPLEQ_EMPTY(&s->io_q.pending));
+    s->io_q.blocked = (s->io_q.n > 0);
 }

 void laio_io_plug(BlockDriverState *bs, void *aio_ctx)
@@ -225,27 +222,24 @@ void laio_io_plug(BlockDriverState *bs, void *aio_ctx)
    s->io_q.plugged++;
 }

-int laio_io_unplug(BlockDriverState *bs, void *aio_ctx, bool unplug)
+void laio_io_unplug(BlockDriverState *bs, void *aio_ctx, bool unplug)
 {
    struct qemu_laio_state *s = aio_ctx;
-    int ret = 0;

    assert(s->io_q.plugged > 0 || !unplug);

    if (unplug && --s->io_q.plugged > 0) {
-        return 0;
+        return;
    }

-    if (s->io_q.idx > 0) {
-        ret = ioq_submit(s);
+    if (!s->io_q.blocked && !QSIMPLEQ_EMPTY(&s->io_q.pending)) {
+        ioq_submit(s);
    }
-
-    return ret;
 }

-BlockDriverAIOCB *laio_submit(BlockDriverState *bs, void *aio_ctx, int fd,
+BlockAIOCB *laio_submit(BlockDriverState *bs, void *aio_ctx, int fd,
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockDriverCompletionFunc *cb, void *opaque, int type)
+        BlockCompletionFunc *cb, void *opaque, int type)
 {
    struct qemu_laio_state *s = aio_ctx;
    struct qemu_laiocb *laiocb;
@@ -276,12 +270,11 @@ BlockDriverAIOCB *laio_submit(BlockDriverState *bs, void *aio_ctx, int fd,
    }
    io_set_eventfd(&laiocb->iocb, event_notifier_get_fd(&s->e));

-    if (!s->io_q.plugged) {
-        if (io_submit(s->ctx, 1, &iocbs) < 0) {
-            goto out_free_aiocb;
-        }
-    } else {
-        ioq_enqueue(s, iocbs);
+    QSIMPLEQ_INSERT_TAIL(&s->io_q.pending, laiocb, next);
+    s->io_q.n++;
+    if (!s->io_q.blocked &&
+        (!s->io_q.plugged || s->io_q.n >= MAX_QUEUED_IO)) {
+        ioq_submit(s);
    }
    return &laiocb->common;

--- a/block/mirror.c
+++ b/block/mirror.c
@@ -45,6 +45,7 @@ typedef struct MirrorBlockJob {
    int64_t sector_num;
    int64_t granularity;
    size_t buf_size;
+    int64_t bdev_length;
    unsigned long *cow_bitmap;
    BdrvDirtyBitmap *dirty_bitmap;
    HBitmapIter hbi;
@@ -54,6 +55,7 @@ typedef struct MirrorBlockJob {

    unsigned long *in_flight_bitmap;
    int in_flight;
+    int sectors_in_flight;
    int ret;
 } MirrorBlockJob;

@@ -87,6 +89,7 @@ static void mirror_iteration_done(MirrorOp *op, int ret)
    trace_mirror_iteration_done(s, op->sector_num, op->nb_sectors, ret);

    s->in_flight--;
+    s->sectors_in_flight -= op->nb_sectors;
    iov = op->qiov.iov;
    for (i = 0; i < op->qiov.niov; i++) {
        MirrorBuffer *buf = (MirrorBuffer *) iov[i].iov_base;
@@ -98,8 +101,11 @@ static void mirror_iteration_done(MirrorOp *op, int ret)
    chunk_num = op->sector_num / sectors_per_chunk;
    nb_chunks = op->nb_sectors / sectors_per_chunk;
    bitmap_clear(s->in_flight_bitmap, chunk_num, nb_chunks);
-    if (s->cow_bitmap && ret >= 0) {
-        bitmap_set(s->cow_bitmap, chunk_num, nb_chunks);
+    if (ret >= 0) {
+        if (s->cow_bitmap) {
+            bitmap_set(s->cow_bitmap, chunk_num, nb_chunks);
+        }
+        s->common.offset += (uint64_t)op->nb_sectors * BDRV_SECTOR_SIZE;
    }

    qemu_iovec_destroy(&op->qiov);
@@ -122,7 +128,8 @@ static void mirror_write_complete(void *opaque, int ret)
        BlockDriverState *source = s->common.bs;
        BlockErrorAction action;

-        bdrv_set_dirty(source, op->sector_num, op->nb_sectors);
+        bdrv_set_dirty_bitmap(source, s->dirty_bitmap, op->sector_num,
+                              op->nb_sectors);
        action = mirror_error_action(s, false, -ret);
        if (action == BLOCK_ERROR_ACTION_REPORT && s->ret >= 0) {
            s->ret = ret;
@@ -139,7 +146,8 @@ static void mirror_read_complete(void *opaque, int ret)
        BlockDriverState *source = s->common.bs;
        BlockErrorAction action;

-        bdrv_set_dirty(source, op->sector_num, op->nb_sectors);
+        bdrv_set_dirty_bitmap(source, s->dirty_bitmap, op->sector_num,
+                              op->nb_sectors);
        action = mirror_error_action(s, true, -ret);
        if (action == BLOCK_ERROR_ACTION_REPORT && s->ret >= 0) {
            s->ret = ret;
@@ -172,7 +180,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
    hbitmap_next_sector = s->sector_num;
    sector_num = s->sector_num;
    sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS;
-    end = s->common.len >> BDRV_SECTOR_BITS;
+    end = s->bdev_length / BDRV_SECTOR_SIZE;

    /* Extend the QEMUIOVector to include all adjacent blocks that will
     * be copied in this operation.
@@ -280,10 +288,12 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
        next_sector += sectors_per_chunk;
    }

-    bdrv_reset_dirty(source, sector_num, nb_sectors);
+    bdrv_reset_dirty_bitmap(source, s->dirty_bitmap, sector_num,
+                            nb_sectors);

    /* Copy the dirty cluster.  */
    s->in_flight++;
+    s->sectors_in_flight += nb_sectors;
    trace_mirror_one_iteration(s, sector_num, nb_sectors);
    bdrv_aio_readv(source, sector_num, &op->qiov, nb_sectors,
                   mirror_read_complete, op);
@@ -314,9 +324,56 @@ static void mirror_drain(MirrorBlockJob *s)
    }
 }

+typedef struct {
+    int ret;
+} MirrorExitData;
+
+static void mirror_exit(BlockJob *job, void *opaque)
+{
+    MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
+    MirrorExitData *data = opaque;
+    AioContext *replace_aio_context = NULL;
+
+    if (s->to_replace) {
+        replace_aio_context = bdrv_get_aio_context(s->to_replace);
+        aio_context_acquire(replace_aio_context);
+    }
+
+    if (s->should_complete && data->ret == 0) {
+        BlockDriverState *to_replace = s->common.bs;
+        if (s->to_replace) {
+            to_replace = s->to_replace;
+        }
+        if (bdrv_get_flags(s->target) != bdrv_get_flags(to_replace)) {
+            bdrv_reopen(s->target, bdrv_get_flags(to_replace), NULL);
+        }
+        bdrv_swap(s->target, to_replace);
+        if (s->common.driver->job_type == BLOCK_JOB_TYPE_COMMIT) {
+            /* drop the bs loop chain formed by the swap: break the loop then
+             * trigger the unref from the top one */
+            BlockDriverState *p = s->base->backing_hd;
+            bdrv_set_backing_hd(s->base, NULL);
+            bdrv_unref(p);
+        }
+    }
+    if (s->to_replace) {
+        bdrv_op_unblock_all(s->to_replace, s->replace_blocker);
+        error_free(s->replace_blocker);
+        bdrv_unref(s->to_replace);
+    }
+    if (replace_aio_context) {
+        aio_context_release(replace_aio_context);
+    }
+    g_free(s->replaces);
+    bdrv_unref(s->target);
+    block_job_completed(&s->common, data->ret);
+    g_free(data);
+}
+
 static void coroutine_fn mirror_run(void *opaque)
 {
    MirrorBlockJob *s = opaque;
+    MirrorExitData *data;
    BlockDriverState *bs = s->common.bs;
    int64_t sector_num, end, sectors_per_chunk, length;
    uint64_t last_pause_ns;
@@ -329,11 +386,11 @@ static void coroutine_fn mirror_run(void *opaque)
        goto immediate_exit;
    }

-    s->common.len = bdrv_getlength(bs);
-    if (s->common.len < 0) {
-        ret = s->common.len;
+    s->bdev_length = bdrv_getlength(bs);
+    if (s->bdev_length < 0) {
+        ret = s->bdev_length;
        goto immediate_exit;
-    } else if (s->common.len == 0) {
+    } else if (s->bdev_length == 0) {
        /* Report BLOCK_JOB_READY and wait for complete. */
        block_job_event_ready(&s->common);
        s->synced = true;
@@ -344,7 +401,7 @@ static void coroutine_fn mirror_run(void *opaque)
        goto immediate_exit;
    }

-    length = DIV_ROUND_UP(s->common.len, s->granularity);
+    length = DIV_ROUND_UP(s->bdev_length, s->granularity);
    s->in_flight_bitmap = bitmap_new(length);

    /* If we have no backing file yet in the destination, we cannot let
@@ -364,7 +421,7 @@ static void coroutine_fn mirror_run(void *opaque)
        }
    }

-    end = s->common.len >> BDRV_SECTOR_BITS;
+    end = s->bdev_length / BDRV_SECTOR_SIZE;
    s->buf = qemu_try_blockalign(bs, s->buf_size);
    if (s->buf == NULL) {
        ret = -ENOMEM;
@@ -388,7 +445,7 @@ static void coroutine_fn mirror_run(void *opaque)

            assert(n > 0);
            if (ret == 1) {
-                bdrv_set_dirty(bs, sector_num, n);
+                bdrv_set_dirty_bitmap(bs, s->dirty_bitmap, sector_num, n);
                sector_num = next;
            } else {
                sector_num += n;
@@ -409,6 +466,12 @@ static void coroutine_fn mirror_run(void *opaque)
        }

        cnt = bdrv_get_dirty_count(bs, s->dirty_bitmap);
+        /* s->common.offset contains the number of bytes already processed so
+         * far, cnt is the number of dirty sectors remaining and
+         * s->sectors_in_flight is the number of sectors currently being
+         * processed; together those are the current total operation length */
+        s->common.len = s->common.offset +
+                        (cnt + s->sectors_in_flight) * BDRV_SECTOR_SIZE;

        /* Note that even when no rate limit is applied we need to yield
         * periodically with no pending I/O so that qemu_aio_flush() returns.
@@ -445,7 +508,6 @@ static void coroutine_fn mirror_run(void *opaque)
                 * report completion.  This way, block-job-cancel will leave
                 * the target in a consistent state.
                 */
-                s->common.offset = end * BDRV_SECTOR_SIZE;
                if (!s->synced) {
                    block_job_event_ready(&s->common);
                    s->synced = true;
@@ -467,15 +529,13 @@ static void coroutine_fn mirror_run(void *opaque)
             * mirror_populate runs.
             */
            trace_mirror_before_drain(s, cnt);
-            bdrv_drain_all();
+            bdrv_drain(bs);
            cnt = bdrv_get_dirty_count(bs, s->dirty_bitmap);
        }

        ret = 0;
        trace_mirror_before_sleep(s, cnt, s->synced, delay_ns);
        if (!s->synced) {
-            /* Publish progress */
-            s->common.offset = (end - cnt) * BDRV_SECTOR_SIZE;
            block_job_sleep_ns(&s->common, QEMU_CLOCK_REALTIME, delay_ns);
            if (block_job_is_cancelled(&s->common)) {
                break;
@@ -510,31 +570,10 @@ immediate_exit:
    g_free(s->in_flight_bitmap);
    bdrv_release_dirty_bitmap(bs, s->dirty_bitmap);
    bdrv_iostatus_disable(s->target);
-    if (s->should_complete && ret == 0) {
-        BlockDriverState *to_replace = s->common.bs;
-        if (s->to_replace) {
-            to_replace = s->to_replace;
-        }
-        if (bdrv_get_flags(s->target) != bdrv_get_flags(to_replace)) {
-            bdrv_reopen(s->target, bdrv_get_flags(to_replace), NULL);
-        }
-        bdrv_swap(s->target, to_replace);
-        if (s->common.driver->job_type == BLOCK_JOB_TYPE_COMMIT) {
-            /* drop the bs loop chain formed by the swap: break the loop then
-             * trigger the unref from the top one */
-            BlockDriverState *p = s->base->backing_hd;
-            bdrv_set_backing_hd(s->base, NULL);
-            bdrv_unref(p);
-        }
-    }
-    if (s->to_replace) {
-        bdrv_op_unblock_all(s->to_replace, s->replace_blocker);
-        error_free(s->replace_blocker);
-        bdrv_unref(s->to_replace);
-    }
-    g_free(s->replaces);
-    bdrv_unref(s->target);
-    block_job_completed(&s->common, ret);
+
+    data = g_malloc(sizeof(*data));
+    data->ret = ret;
+    block_job_defer_to_main_loop(&s->common, mirror_exit, data);
 }

 static void mirror_set_speed(BlockJob *job, int64_t speed, Error **errp)
@@ -567,22 +606,30 @@ static void mirror_complete(BlockJob *job, Error **errp)
        return;
    }
    if (!s->synced) {
-        error_set(errp, QERR_BLOCK_JOB_NOT_READY, job->bs->device_name);
+        error_set(errp, QERR_BLOCK_JOB_NOT_READY,
+                  bdrv_get_device_name(job->bs));
        return;
    }

    /* check the target bs is not blocked and block all operations on it */
    if (s->replaces) {
+        AioContext *replace_aio_context;
+
        s->to_replace = check_to_replace_node(s->replaces, &local_err);
        if (!s->to_replace) {
            error_propagate(errp, local_err);
            return;
        }

+        replace_aio_context = bdrv_get_aio_context(s->to_replace);
+        aio_context_acquire(replace_aio_context);
+
        error_setg(&s->replace_blocker,
                   "block device is in use by block-job-complete");
        bdrv_op_block_all(s->to_replace, s->replace_blocker);
        bdrv_ref(s->to_replace);
+
+        aio_context_release(replace_aio_context);
    }

    s->should_complete = true;
@@ -612,7 +659,7 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
                             int64_t buf_size,
                             BlockdevOnError on_source_error,
                             BlockdevOnError on_target_error,
-                             BlockDriverCompletionFunc *cb,
+                             BlockCompletionFunc *cb,
                             void *opaque, Error **errp,
                             const BlockJobDriver *driver,
                             bool is_none_mode, BlockDriverState *base)
@@ -672,7 +719,7 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target,
                  int64_t speed, int64_t granularity, int64_t buf_size,
                  MirrorSyncMode mode, BlockdevOnError on_source_error,
                  BlockdevOnError on_target_error,
-                  BlockDriverCompletionFunc *cb,
+                  BlockCompletionFunc *cb,
                  void *opaque, Error **errp)
 {
    bool is_none_mode;
@@ -689,7 +736,7 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target,
 void commit_active_start(BlockDriverState *bs, BlockDriverState *base,
                         int64_t speed,
                         BlockdevOnError on_error,
-                         BlockDriverCompletionFunc *cb,
+                         BlockCompletionFunc *cb,
                         void *opaque, Error **errp)
 {
    int64_t length, base_length;
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -342,30 +342,44 @@ static void nbd_attach_aio_context(BlockDriverState *bs,

 static void nbd_refresh_filename(BlockDriverState *bs)
 {
-    BDRVNBDState *s = bs->opaque;
    QDict *opts = qdict_new();
-    const char *path   = qemu_opt_get(s->socket_opts, "path");
-    const char *host   = qemu_opt_get(s->socket_opts, "host");
-    const char *port   = qemu_opt_get(s->socket_opts, "port");
-    const char *export = qemu_opt_get(s->socket_opts, "export");
+    const char *path   = qdict_get_try_str(bs->options, "path");
+    const char *host   = qdict_get_try_str(bs->options, "host");
+    const char *port   = qdict_get_try_str(bs->options, "port");
+    const char *export = qdict_get_try_str(bs->options, "export");

    qdict_put_obj(opts, "driver", QOBJECT(qstring_from_str("nbd")));

+    if (path && export) {
+        snprintf(bs->exact_filename, sizeof(bs->exact_filename),
+                 "nbd+unix:///%s?socket=%s", export, path);
+    } else if (path && !export) {
+        snprintf(bs->exact_filename, sizeof(bs->exact_filename),
+                 "nbd+unix://?socket=%s", path);
+    } else if (!path && export && port) {
+        snprintf(bs->exact_filename, sizeof(bs->exact_filename),
+                 "nbd://%s:%s/%s", host, port, export);
+    } else if (!path && export && !port) {
+        snprintf(bs->exact_filename, sizeof(bs->exact_filename),
+                 "nbd://%s/%s", host, export);
+    } else if (!path && !export && port) {
+        snprintf(bs->exact_filename, sizeof(bs->exact_filename),
+                 "nbd://%s:%s", host, port);
+    } else if (!path && !export && !port) {
+        snprintf(bs->exact_filename, sizeof(bs->exact_filename),
+                 "nbd://%s", host);
+    }
+
    if (path) {
-        snprintf(bs->exact_filename, sizeof(bs->exact_filename),
-                 "nbd+unix:%s", path);
        qdict_put_obj(opts, "path", QOBJECT(qstring_from_str(path)));
-    } else if (export) {
-        snprintf(bs->exact_filename, sizeof(bs->exact_filename),
-                 "nbd:%s:%s/%s", host, port, export);
-        qdict_put_obj(opts, "host",   QOBJECT(qstring_from_str(host)));
-        qdict_put_obj(opts, "port",   QOBJECT(qstring_from_str(port)));
-        qdict_put_obj(opts, "export", QOBJECT(qstring_from_str(export)));
-    } else {
-        snprintf(bs->exact_filename, sizeof(bs->exact_filename),
-                 "nbd:%s:%s", host, port);
+    } else if (port) {
        qdict_put_obj(opts, "host", QOBJECT(qstring_from_str(host)));
        qdict_put_obj(opts, "port", QOBJECT(qstring_from_str(port)));
+    } else {
+        qdict_put_obj(opts, "host", QOBJECT(qstring_from_str(host)));
+    }
+    if (export) {
+        qdict_put_obj(opts, "export", QOBJECT(qstring_from_str(export)));
    }

    bs->full_open_options = opts;
--- a/block/nfs.c
+++ b/block/nfs.c
@@ -409,6 +409,19 @@ out:
    return ret;
 }

+static QemuOptsList nfs_create_opts = {
+    .name = "nfs-create-opts",
+    .head = QTAILQ_HEAD_INITIALIZER(nfs_create_opts.head),
+    .desc = {
+        {
+            .name = BLOCK_OPT_SIZE,
+            .type = QEMU_OPT_SIZE,
+            .help = "Virtual disk size"
+        },
+        { /* end of list */ }
+    }
+};
+
 static int nfs_file_create(const char *url, QemuOpts *opts, Error **errp)
 {
    int ret = 0;
@@ -470,6 +483,8 @@ static BlockDriver bdrv_nfs = {

    .instance_size                  = sizeof(NFSClient),
    .bdrv_needs_filename            = true,
+    .create_opts                    = &nfs_create_opts,
+
    .bdrv_has_zero_init             = nfs_has_zero_init,
    .bdrv_get_allocated_file_size   = nfs_get_allocated_file_size,
    .bdrv_truncate                  = nfs_file_truncate,
--- a/block/null.c
+++ b/block/null.c
@@ -78,7 +78,7 @@ static coroutine_fn int null_co_flush(BlockDriverState *bs)
 }

 typedef struct {
-    BlockDriverAIOCB common;
+    BlockAIOCB common;
    QEMUBH *bh;
 } NullAIOCB;

@@ -94,9 +94,9 @@ static void null_bh_cb(void *opaque)
    qemu_aio_unref(acb);
 }

-static inline BlockDriverAIOCB *null_aio_common(BlockDriverState *bs,
-                                                BlockDriverCompletionFunc *cb,
-                                                void *opaque)
+static inline BlockAIOCB *null_aio_common(BlockDriverState *bs,
+                                          BlockCompletionFunc *cb,
+                                          void *opaque)
 {
    NullAIOCB *acb;

@@ -106,27 +106,27 @@ static inline BlockDriverAIOCB *null_aio_common(BlockDriverState *bs,
    return &acb->common;
 }

-static BlockDriverAIOCB *null_aio_readv(BlockDriverState *bs,
-                                        int64_t sector_num, QEMUIOVector *qiov,
-                                        int nb_sectors,
-                                        BlockDriverCompletionFunc *cb,
-                                        void *opaque)
+static BlockAIOCB *null_aio_readv(BlockDriverState *bs,
+                                  int64_t sector_num, QEMUIOVector *qiov,
+                                  int nb_sectors,
+                                  BlockCompletionFunc *cb,
+                                  void *opaque)
 {
    return null_aio_common(bs, cb, opaque);
 }

-static BlockDriverAIOCB *null_aio_writev(BlockDriverState *bs,
-                                         int64_t sector_num, QEMUIOVector *qiov,
-                                         int nb_sectors,
-                                         BlockDriverCompletionFunc *cb,
-                                         void *opaque)
+static BlockAIOCB *null_aio_writev(BlockDriverState *bs,
+                                   int64_t sector_num, QEMUIOVector *qiov,
+                                   int nb_sectors,
+                                   BlockCompletionFunc *cb,
+                                   void *opaque)
 {
    return null_aio_common(bs, cb, opaque);
 }

-static BlockDriverAIOCB *null_aio_flush(BlockDriverState *bs,
-                                        BlockDriverCompletionFunc *cb,
-                                        void *opaque)
+static BlockAIOCB *null_aio_flush(BlockDriverState *bs,
+                                  BlockCompletionFunc *cb,
+                                  void *opaque)
 {
    return null_aio_common(bs, cb, opaque);
 }
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -155,7 +155,7 @@ static int64_t seek_to_sector(BlockDriverState *bs, int64_t sector_num)
    offset = sector_num % s->tracks;

    /* not allocated */
-    if ((index > s->catalog_size) || (s->catalog_bitmap[index] == 0))
+    if ((index >= s->catalog_size) || (s->catalog_bitmap[index] == 0))
        return -1;
    return
        ((uint64_t)s->catalog_bitmap[index] * s->off_multiplier + offset) * 512;
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -28,13 +28,7 @@
 #include "qapi-visit.h"
 #include "qapi/qmp-output-visitor.h"
 #include "qapi/qmp/types.h"
-#ifdef __linux__
-#include <linux/fs.h>
-#include <sys/ioctl.h>
-#ifndef FS_NOCOW_FL
-#define FS_NOCOW_FL                     0x00800000 /* Do not cow file */
-#endif
-#endif
+#include "sysemu/block-backend.h"

 BlockDeviceInfo *bdrv_block_device_info(BlockDriverState *bs)
 {
@@ -46,6 +40,13 @@ BlockDeviceInfo *bdrv_block_device_info(BlockDriverState *bs)
    info->encrypted              = bs->encrypted;
    info->encryption_key_missing = bdrv_key_required(bs);

+    info->cache = g_new(BlockdevCacheInfo, 1);
+    *info->cache = (BlockdevCacheInfo) {
+        .writeback      = bdrv_enable_write_cache(bs),
+        .direct         = !!(bs->open_flags & BDRV_O_NOCACHE),
+        .no_flush       = !!(bs->open_flags & BDRV_O_NO_FLUSH),
+    };
+
    if (bs->node_name[0]) {
        info->has_node_name = true;
        info->node_name = g_strdup(bs->node_name);
@@ -179,9 +180,6 @@ void bdrv_query_image_info(BlockDriverState *bs,
    int ret;
    Error *err = NULL;
    ImageInfo *info;
-#ifdef __linux__
-    int fd, attr;
-#endif

    size = bdrv_getlength(bs);
    if (size < 0) {
@@ -211,24 +209,17 @@ void bdrv_query_image_info(BlockDriverState *bs,
    info->format_specific     = bdrv_get_specific_info(bs);
    info->has_format_specific = info->format_specific != NULL;

-#ifdef __linux__
-    /* get NOCOW info */
-    fd = qemu_open(bs->filename, O_RDONLY | O_NONBLOCK);
-    if (fd >= 0) {
-        if (ioctl(fd, FS_IOC_GETFLAGS, &attr) == 0 && (attr & FS_NOCOW_FL)) {
-            info->has_nocow = true;
-            info->nocow = true;
-        }
-        qemu_close(fd);
-    }
-#endif
-
    backing_filename = bs->backing_file;
    if (backing_filename[0] != '\0') {
        info->backing_filename = g_strdup(backing_filename);
        info->has_backing_filename = true;
        bdrv_get_full_backing_filename(bs, backing_filename2,
-                                       sizeof(backing_filename2));
+                                       sizeof(backing_filename2), &err);
+        if (err) {
+            error_propagate(errp, err);
+            qapi_free_ImageInfo(info);
+            return;
+        }

        if (strcmp(backing_filename, backing_filename2) != 0) {
            info->full_backing_filename =
@@ -264,22 +255,22 @@ void bdrv_query_image_info(BlockDriverState *bs,
 }

 /* @p_info will be set only on success. */
-void bdrv_query_info(BlockDriverState *bs,
-                     BlockInfo **p_info,
-                     Error **errp)
+static void bdrv_query_info(BlockBackend *blk, BlockInfo **p_info,
+                            Error **errp)
 {
    BlockInfo *info = g_malloc0(sizeof(*info));
+    BlockDriverState *bs = blk_bs(blk);
    BlockDriverState *bs0;
    ImageInfo **p_image_info;
    Error *local_err = NULL;
-    info->device = g_strdup(bs->device_name);
+    info->device = g_strdup(blk_name(blk));
    info->type = g_strdup("unknown");
-    info->locked = bdrv_dev_is_medium_locked(bs);
-    info->removable = bdrv_dev_has_removable_media(bs);
+    info->locked = blk_dev_is_medium_locked(blk);
+    info->removable = blk_dev_has_removable_media(blk);

-    if (bdrv_dev_has_removable_media(bs)) {
+    if (blk_dev_has_removable_media(blk)) {
        info->has_tray_open = true;
-        info->tray_open = bdrv_dev_is_tray_open(bs);
+        info->tray_open = blk_dev_is_tray_open(blk);
    }

    if (bdrv_iostatus_is_enabled(bs)) {
@@ -321,15 +312,21 @@ void bdrv_query_info(BlockDriverState *bs,
    qapi_free_BlockInfo(info);
 }

-static BlockStats *bdrv_query_stats(const BlockDriverState *bs)
+static BlockStats *bdrv_query_stats(const BlockDriverState *bs,
+                                    bool query_backing)
 {
    BlockStats *s;

    s = g_malloc0(sizeof(*s));

-    if (bs->device_name[0]) {
+    if (bdrv_get_device_name(bs)[0]) {
        s->has_device = true;
-        s->device = g_strdup(bs->device_name);
+        s->device = g_strdup(bdrv_get_device_name(bs));
+    }
+
+    if (bdrv_get_node_name(bs)[0]) {
+        s->has_node_name = true;
+        s->node_name = g_strdup(bdrv_get_node_name(bs));
    }

    s->stats = g_malloc0(sizeof(*s->stats));
@@ -346,12 +343,12 @@ static BlockStats *bdrv_query_stats(const BlockDriverState *bs)

    if (bs->file) {
        s->has_parent = true;
-        s->parent = bdrv_query_stats(bs->file);
+        s->parent = bdrv_query_stats(bs->file, query_backing);
    }

-    if (bs->backing_hd) {
+    if (query_backing && bs->backing_hd) {
        s->has_backing = true;
-        s->backing = bdrv_query_stats(bs->backing_hd);
+        s->backing = bdrv_query_stats(bs->backing_hd, query_backing);
    }

    return s;
@@ -360,12 +357,12 @@ static BlockStats *bdrv_query_stats(const BlockDriverState *bs)
 BlockInfoList *qmp_query_block(Error **errp)
 {
    BlockInfoList *head = NULL, **p_next = &head;
-    BlockDriverState *bs = NULL;
+    BlockBackend *blk;
    Error *local_err = NULL;

-     while ((bs = bdrv_next(bs))) {
+    for (blk = blk_next(NULL); blk; blk = blk_next(blk)) {
        BlockInfoList *info = g_malloc0(sizeof(*info));
-        bdrv_query_info(bs, &info->value, &local_err);
+        bdrv_query_info(blk, &info->value, &local_err);
        if (local_err) {
            error_propagate(errp, local_err);
            goto err;
@@ -382,17 +379,22 @@ BlockInfoList *qmp_query_block(Error **errp)
    return NULL;
 }

-BlockStatsList *qmp_query_blockstats(Error **errp)
+BlockStatsList *qmp_query_blockstats(bool has_query_nodes,
+                                     bool query_nodes,
+                                     Error **errp)
 {
    BlockStatsList *head = NULL, **p_next = &head;
    BlockDriverState *bs = NULL;

-     while ((bs = bdrv_next(bs))) {
+    /* Just to be safe if query_nodes is not always initialized */
+    query_nodes = has_query_nodes && query_nodes;
+
+    while ((bs = query_nodes ? bdrv_next_node(bs) : bdrv_next(bs))) {
        BlockStatsList *info = g_malloc0(sizeof(*info));
        AioContext *ctx = bdrv_get_aio_context(bs);

        aio_context_acquire(ctx);
-        info->value = bdrv_query_stats(bs);
+        info->value = bdrv_query_stats(bs, !query_nodes);
        aio_context_release(ctx);

        *p_next = info;
@@ -654,8 +656,4 @@ void bdrv_image_info_dump(fprintf_function func_fprintf, void *f,
        func_fprintf(f, "Format specific information:\n");
        bdrv_image_info_specific_dump(func_fprintf, f, info->format_specific);
    }
-
-    if (info->has_nocow && info->nocow) {
-        func_fprintf(f, "NOCOW flag: set\n");
-    }
 }
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -124,7 +124,7 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
        snprintf(version, sizeof(version), "QCOW version %" PRIu32,
                 header.version);
        error_set(errp, QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
-                  bs->device_name, "qcow", version);
+                  bdrv_get_device_name(bs), "qcow", version);
        ret = -ENOTSUP;
        goto fail;
    }
@@ -231,7 +231,7 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
    /* Disable migration when qcow images are used */
    error_set(&s->migration_blocker,
              QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
-              "qcow", bs->device_name, "live migration");
+              "qcow", bdrv_get_device_name(bs), "live migration");
    migrate_add_blocker(s->migration_blocker);

    qemu_co_mutex_init(&s->lock);
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -164,12 +164,14 @@ static int l2_load(BlockDriverState *bs, uint64_t l2_offset,
 int qcow2_write_l1_entry(BlockDriverState *bs, int l1_index)
 {
    BDRVQcowState *s = bs->opaque;
-    uint64_t buf[L1_ENTRIES_PER_SECTOR];
+    uint64_t buf[L1_ENTRIES_PER_SECTOR] = { 0 };
    int l1_start_index;
    int i, ret;

    l1_start_index = l1_index & ~(L1_ENTRIES_PER_SECTOR - 1);
-    for (i = 0; i < L1_ENTRIES_PER_SECTOR; i++) {
+    for (i = 0; i < L1_ENTRIES_PER_SECTOR && l1_start_index + i < s->l1_size;
+         i++)
+    {
        buf[i] = cpu_to_be64(s->l1_table[l1_start_index + i]);
    }

@@ -1261,7 +1263,7 @@ int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset,

 again:
    start = offset;
-    remaining = *num << BDRV_SECTOR_BITS;
+    remaining = (uint64_t)*num << BDRV_SECTOR_BITS;
    cluster_offset = 0;
    *host_offset = 0;
    cur_bytes = 0;
@@ -1412,7 +1414,7 @@ int qcow2_decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset)
 * clusters.
 */
 static int discard_single_l2(BlockDriverState *bs, uint64_t offset,
-    unsigned int nb_clusters, enum qcow2_discard_type type)
+    unsigned int nb_clusters, enum qcow2_discard_type type, bool full_discard)
 {
    BDRVQcowState *s = bs->opaque;
    uint64_t *l2_table;
@@ -1434,23 +1436,30 @@ static int discard_single_l2(BlockDriverState *bs, uint64_t offset,
        old_l2_entry = be64_to_cpu(l2_table[l2_index + i]);

        /*
-         * Make sure that a discarded area reads back as zeroes for v3 images
-         * (we cannot do it for v2 without actually writing a zero-filled
-         * buffer). We can skip the operation if the cluster is already marked
-         * as zero, or if it's unallocated and we don't have a backing file.
+         * If full_discard is false, make sure that a discarded area reads back
+         * as zeroes for v3 images (we cannot do it for v2 without actually
+         * writing a zero-filled buffer). We can skip the operation if the
+         * cluster is already marked as zero, or if it's unallocated and we
+         * don't have a backing file.
         *
         * TODO We might want to use bdrv_get_block_status(bs) here, but we're
         * holding s->lock, so that doesn't work today.
+         *
+         * If full_discard is true, the sector should not read back as zeroes,
+         * but rather fall through to the backing file.
         */
        switch (qcow2_get_cluster_type(old_l2_entry)) {
            case QCOW2_CLUSTER_UNALLOCATED:
-                if (!bs->backing_hd) {
+                if (full_discard || !bs->backing_hd) {
                    continue;
                }
                break;

            case QCOW2_CLUSTER_ZERO:
-                continue;
+                if (!full_discard) {
+                    continue;
+                }
+                break;

            case QCOW2_CLUSTER_NORMAL:
            case QCOW2_CLUSTER_COMPRESSED:
@@ -1462,7 +1471,7 @@ static int discard_single_l2(BlockDriverState *bs, uint64_t offset,

        /* First remove L2 entries */
        qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
-        if (s->qcow_version >= 3) {
+        if (!full_discard && s->qcow_version >= 3) {
            l2_table[l2_index + i] = cpu_to_be64(QCOW_OFLAG_ZERO);
        } else {
            l2_table[l2_index + i] = cpu_to_be64(0);
@@ -1481,7 +1490,7 @@ static int discard_single_l2(BlockDriverState *bs, uint64_t offset,
 }

 int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset,
-    int nb_sectors, enum qcow2_discard_type type)
+    int nb_sectors, enum qcow2_discard_type type, bool full_discard)
 {
    BDRVQcowState *s = bs->opaque;
    uint64_t end_offset;
@@ -1504,7 +1513,7 @@ int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset,

    /* Each L2 table is handled by its own loop iteration */
    while (nb_clusters > 0) {
-        ret = discard_single_l2(bs, offset, nb_clusters, type);
+        ret = discard_single_l2(bs, offset, nb_clusters, type, full_discard);
        if (ret < 0) {
            goto fail;
        }
@@ -1604,15 +1613,14 @@ fail:
 * Expands all zero clusters in a specific L1 table (or deallocates them, for
 * non-backed non-pre-allocated zero clusters).
 *
- * expanded_clusters is a bitmap where every bit corresponds to one cluster in
- * the image file; a bit gets set if the corresponding cluster has been used for
- * zero expansion (i.e., has been filled with zeroes and is referenced from an
- * L2 table). nb_clusters contains the total cluster count of the image file,
- * i.e., the number of bits in expanded_clusters.
+ * l1_entries and *visited_l1_entries are used to keep track of progress for
+ * status_cb(). l1_entries contains the total number of L1 entries and
+ * *visited_l1_entries counts all visited L1 entries.
 */
 static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
-                                      int l1_size, uint8_t **expanded_clusters,
-                                      uint64_t *nb_clusters)
+                                      int l1_size, int64_t *visited_l1_entries,
+                                      int64_t l1_entries,
+                                      BlockDriverAmendStatusCB *status_cb)
 {
    BDRVQcowState *s = bs->opaque;
    bool is_active_l1 = (l1_table == s->l1_table);
@@ -1632,9 +1640,14 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
    for (i = 0; i < l1_size; i++) {
        uint64_t l2_offset = l1_table[i] & L1E_OFFSET_MASK;
        bool l2_dirty = false;
+        int l2_refcount;

        if (!l2_offset) {
            /* unallocated */
+            (*visited_l1_entries)++;
+            if (status_cb) {
+                status_cb(bs, *visited_l1_entries, l1_entries);
+            }
            continue;
        }

@@ -1651,33 +1664,19 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
            goto fail;
        }

+        l2_refcount = qcow2_get_refcount(bs, l2_offset >> s->cluster_bits);
+        if (l2_refcount < 0) {
+            ret = l2_refcount;
+            goto fail;
+        }
+
        for (j = 0; j < s->l2_size; j++) {
            uint64_t l2_entry = be64_to_cpu(l2_table[j]);
-            int64_t offset = l2_entry & L2E_OFFSET_MASK, cluster_index;
+            int64_t offset = l2_entry & L2E_OFFSET_MASK;
            int cluster_type = qcow2_get_cluster_type(l2_entry);
            bool preallocated = offset != 0;

-            if (cluster_type == QCOW2_CLUSTER_NORMAL) {
-                cluster_index = offset >> s->cluster_bits;
-                assert((cluster_index >= 0) && (cluster_index < *nb_clusters));
-                if ((*expanded_clusters)[cluster_index / 8] &
-                    (1 << (cluster_index % 8))) {
-                    /* Probably a shared L2 table; this cluster was a zero
-                     * cluster which has been expanded, its refcount
-                     * therefore most likely requires an update. */
-                    ret = qcow2_update_cluster_refcount(bs, cluster_index, 1,
-                                                        QCOW2_DISCARD_NEVER);
-                    if (ret < 0) {
-                        goto fail;
-                    }
-                    /* Since we just increased the refcount, the COPIED flag may
-                     * no longer be set. */
-                    l2_table[j] = cpu_to_be64(l2_entry & ~QCOW_OFLAG_COPIED);
-                    l2_dirty = true;
-                }
-                continue;
-            }
-            else if (qcow2_get_cluster_type(l2_entry) != QCOW2_CLUSTER_ZERO) {
+            if (cluster_type != QCOW2_CLUSTER_ZERO) {
                continue;
            }

@@ -1695,6 +1694,19 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
                    ret = offset;
                    goto fail;
                }
+
+                if (l2_refcount > 1) {
+                    /* For shared L2 tables, set the refcount accordingly (it is
+                     * already 1 and needs to be l2_refcount) */
+                    ret = qcow2_update_cluster_refcount(bs,
+                            offset >> s->cluster_bits, l2_refcount - 1,
+                            QCOW2_DISCARD_OTHER);
+                    if (ret < 0) {
+                        qcow2_free_clusters(bs, offset, s->cluster_size,
+                                            QCOW2_DISCARD_OTHER);
+                        goto fail;
+                    }
+                }
            }

            ret = qcow2_pre_write_overlap_check(bs, 0, offset, s->cluster_size);
@@ -1716,29 +1728,12 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
                goto fail;
            }

-            l2_table[j] = cpu_to_be64(offset | QCOW_OFLAG_COPIED);
-            l2_dirty = true;
-
-            cluster_index = offset >> s->cluster_bits;
-
-            if (cluster_index >= *nb_clusters) {
-                uint64_t old_bitmap_size = (*nb_clusters + 7) / 8;
-                uint64_t new_bitmap_size;
-                /* The offset may lie beyond the old end of the underlying image
-                 * file for growable files only */
-                assert(bs->file->growable);
-                *nb_clusters = size_to_clusters(s, bs->file->total_sectors *
-                                                BDRV_SECTOR_SIZE);
-                new_bitmap_size = (*nb_clusters + 7) / 8;
-                *expanded_clusters = g_realloc(*expanded_clusters,
-                                               new_bitmap_size);
-                /* clear the newly allocated space */
-                memset(&(*expanded_clusters)[old_bitmap_size], 0,
-                       new_bitmap_size - old_bitmap_size);
+            if (l2_refcount == 1) {
+                l2_table[j] = cpu_to_be64(offset | QCOW_OFLAG_COPIED);
+            } else {
+                l2_table[j] = cpu_to_be64(offset);
            }
-
-            assert((cluster_index >= 0) && (cluster_index < *nb_clusters));
-            (*expanded_clusters)[cluster_index / 8] |= 1 << (cluster_index % 8);
+            l2_dirty = true;
        }

        if (is_active_l1) {
@@ -1767,6 +1762,11 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
                }
            }
        }
+
+        (*visited_l1_entries)++;
+        if (status_cb) {
+            status_cb(bs, *visited_l1_entries, l1_entries);
+        }
    }

    ret = 0;
@@ -1793,25 +1793,25 @@ fail:
 * allocation for pre-allocated ones). This is important for downgrading to a
 * qcow2 version which doesn't yet support metadata zero clusters.
 */
-int qcow2_expand_zero_clusters(BlockDriverState *bs)
+int qcow2_expand_zero_clusters(BlockDriverState *bs,
+                               BlockDriverAmendStatusCB *status_cb)
 {
    BDRVQcowState *s = bs->opaque;
    uint64_t *l1_table = NULL;
-    uint64_t nb_clusters;
-    uint8_t *expanded_clusters;
+    int64_t l1_entries = 0, visited_l1_entries = 0;
    int ret;
    int i, j;

-    nb_clusters = size_to_clusters(s, bs->file->total_sectors *
-                                   BDRV_SECTOR_SIZE);
-    expanded_clusters = g_try_malloc0((nb_clusters + 7) / 8);
-    if (expanded_clusters == NULL) {
-        ret = -ENOMEM;
-        goto fail;
+    if (status_cb) {
+        l1_entries = s->l1_size;
+        for (i = 0; i < s->nb_snapshots; i++) {
+            l1_entries += s->snapshots[i].l1_size;
+        }
    }

    ret = expand_zero_clusters_in_l1(bs, s->l1_table, s->l1_size,
-                                     &expanded_clusters, &nb_clusters);
+                                     &visited_l1_entries, l1_entries,
+                                     status_cb);
    if (ret < 0) {
        goto fail;
    }
@@ -1845,7 +1845,8 @@ int qcow2_expand_zero_clusters(BlockDriverState *bs)
        }

        ret = expand_zero_clusters_in_l1(bs, l1_table, s->snapshots[i].l1_size,
-                                         &expanded_clusters, &nb_clusters);
+                                         &visited_l1_entries, l1_entries,
+                                         status_cb);
        if (ret < 0) {
            goto fail;
        }
@@ -1854,7 +1855,6 @@ int qcow2_expand_zero_clusters(BlockDriverState *bs)
    ret = 0;

 fail:
-    g_free(expanded_clusters);
    g_free(l1_table);
    return ret;
 }
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
--- a/block/qcow2-snapshot.c
+++ b/block/qcow2-snapshot.c
@@ -441,7 +441,7 @@ int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
    qcow2_discard_clusters(bs, qcow2_vm_state_offset(s),
                           align_offset(sn->vm_state_size, s->cluster_size)
                                >> BDRV_SECTOR_BITS,
-                           QCOW2_DISCARD_NEVER);
+                           QCOW2_DISCARD_NEVER, false);

 #ifdef DEBUG_ALLOC
    {
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -117,7 +117,7 @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
 #ifdef DEBUG_EXT
        printf("ext.magic = 0x%x\n", ext.magic);
 #endif
-        if (ext.len > end_offset - offset) {
+        if (offset > end_offset || ext.len > end_offset - offset) {
            error_setg(errp, "Header extension too large");
            return -EINVAL;
        }
@@ -206,8 +206,8 @@ static void GCC_FMT_ATTR(3, 4) report_unsupported(BlockDriverState *bs,
    vsnprintf(msg, sizeof(msg), fmt, ap);
    va_end(ap);

-    error_set(errp, QERR_UNKNOWN_BLOCK_FORMAT_FEATURE, bs->device_name, "qcow2",
-              msg);
+    error_set(errp, QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
+              bdrv_get_device_name(bs), "qcow2", msg);
 }

 static void report_unsupported_feature(BlockDriverState *bs,
@@ -698,6 +698,9 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,

    s->l2_bits = s->cluster_bits - 3; /* L2 is always one cluster */
    s->l2_size = 1 << s->l2_bits;
+    /* 2^(s->refcount_order - 3) is the refcount width in bytes */
+    s->refcount_block_bits = s->cluster_bits - (s->refcount_order - 3);
+    s->refcount_block_size = 1 << s->refcount_block_bits;
    bs->total_sectors = header.size / 512;
    s->csize_shift = (62 - (s->cluster_bits - 8));
    s->csize_mask = (1 << (s->cluster_bits - 8)) - 1;
@@ -907,7 +910,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
        (s->incompatible_features & QCOW2_INCOMPAT_DIRTY)) {
        BdrvCheckResult result = {0};

-        ret = qcow2_check(bs, &result, BDRV_FIX_ERRORS);
+        ret = qcow2_check(bs, &result, BDRV_FIX_ERRORS | BDRV_FIX_LEAKS);
        if (ret < 0) {
            error_setg_errno(errp, -ret, "Could not repair dirty image");
            goto fail;
@@ -1425,10 +1428,23 @@ static void qcow2_close(BlockDriverState *bs)
    s->l1_table = NULL;

    if (!(bs->open_flags & BDRV_O_INCOMING)) {
-        qcow2_cache_flush(bs, s->l2_table_cache);
-        qcow2_cache_flush(bs, s->refcount_block_cache);
+        int ret1, ret2;

-        qcow2_mark_clean(bs);
+        ret1 = qcow2_cache_flush(bs, s->l2_table_cache);
+        ret2 = qcow2_cache_flush(bs, s->refcount_block_cache);
+
+        if (ret1) {
+            error_report("Failed to flush the L2 table cache: %s",
+                         strerror(-ret1));
+        }
+        if (ret2) {
+            error_report("Failed to flush the refcount block cache: %s",
+                         strerror(-ret2));
+        }
+
+        if (!ret1 && !ret2) {
+            qcow2_mark_clean(bs);
+        }
    }

    qcow2_cache_destroy(bs, s->l2_table_cache);
@@ -1871,7 +1887,7 @@ static int qcow2_create2(const char *filename, int64_t total_size,
        .l1_size                    = cpu_to_be32(0),
        .refcount_table_offset      = cpu_to_be64(cluster_size),
        .refcount_table_clusters    = cpu_to_be32(1),
-        .refcount_order             = cpu_to_be32(3 + REFCOUNT_SHIFT),
+        .refcount_order             = cpu_to_be32(4),
        .header_length              = cpu_to_be32(sizeof(*header)),
    };

@@ -1912,10 +1928,9 @@ static int qcow2_create2(const char *filename, int64_t total_size,
     * refcount of the cluster that is occupied by the header and the refcount
     * table)
     */
-    BlockDriver* drv = bdrv_find_format("qcow2");
-    assert(drv != NULL);
    ret = bdrv_open(&bs, filename, NULL, NULL,
-        BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_NO_FLUSH, drv, &local_err);
+                    BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_NO_FLUSH,
+                    &bdrv_qcow2, &local_err);
    if (ret < 0) {
        error_propagate(errp, local_err);
        goto out;
@@ -1967,7 +1982,7 @@ static int qcow2_create2(const char *filename, int64_t total_size,
    /* Reopen the image without BDRV_O_NO_FLUSH to flush it before returning */
    ret = bdrv_open(&bs, filename, NULL, NULL,
                    BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_NO_BACKING,
-                    drv, &local_err);
+                    &bdrv_qcow2, &local_err);
    if (local_err) {
        error_propagate(errp, local_err);
        goto out;
@@ -2086,7 +2101,7 @@ static coroutine_fn int qcow2_co_discard(BlockDriverState *bs,

    qemu_co_mutex_lock(&s->lock);
    ret = qcow2_discard_clusters(bs, sector_num << BDRV_SECTOR_BITS,
-        nb_sectors, QCOW2_DISCARD_REQUEST);
+        nb_sectors, QCOW2_DISCARD_REQUEST, false);
    qemu_co_mutex_unlock(&s->lock);
    return ret;
 }
@@ -2147,8 +2162,7 @@ static int qcow2_write_compressed(BlockDriverState *bs, int64_t sector_num,
        /* align end of file to a sector boundary to ease reading with
           sector based I/Os */
        cluster_offset = bdrv_getlength(bs->file);
-        bdrv_truncate(bs->file, cluster_offset);
-        return 0;
+        return bdrv_truncate(bs->file, cluster_offset);
    }

    if (nb_sectors != s->cluster_sectors) {
@@ -2227,6 +2241,195 @@ fail:
    return ret;
 }

+static int make_completely_empty(BlockDriverState *bs)
+{
+    BDRVQcowState *s = bs->opaque;
+    int ret, l1_clusters;
+    int64_t offset;
+    uint64_t *new_reftable = NULL;
+    uint64_t rt_entry, l1_size2;
+    struct {
+        uint64_t l1_offset;
+        uint64_t reftable_offset;
+        uint32_t reftable_clusters;
+    } QEMU_PACKED l1_ofs_rt_ofs_cls;
+
+    ret = qcow2_cache_empty(bs, s->l2_table_cache);
+    if (ret < 0) {
+        goto fail;
+    }
+
+    ret = qcow2_cache_empty(bs, s->refcount_block_cache);
+    if (ret < 0) {
+        goto fail;
+    }
+
+    /* Refcounts will be broken utterly */
+    ret = qcow2_mark_dirty(bs);
+    if (ret < 0) {
+        goto fail;
+    }
+
+    BLKDBG_EVENT(bs->file, BLKDBG_L1_UPDATE);
+
+    l1_clusters = DIV_ROUND_UP(s->l1_size, s->cluster_size / sizeof(uint64_t));
+    l1_size2 = (uint64_t)s->l1_size * sizeof(uint64_t);
+
+    /* After this call, neither the in-memory nor the on-disk refcount
+     * information accurately describe the actual references */
+
+    ret = bdrv_write_zeroes(bs->file, s->l1_table_offset / BDRV_SECTOR_SIZE,
+                            l1_clusters * s->cluster_sectors, 0);
+    if (ret < 0) {
+        goto fail_broken_refcounts;
+    }
+    memset(s->l1_table, 0, l1_size2);
+
+    BLKDBG_EVENT(bs->file, BLKDBG_EMPTY_IMAGE_PREPARE);
+
+    /* Overwrite enough clusters at the beginning of the sectors to place
+     * the refcount table, a refcount block and the L1 table in; this may
+     * overwrite parts of the existing refcount and L1 table, which is not
+     * an issue because the dirty flag is set, complete data loss is in fact
+     * desired and partial data loss is consequently fine as well */
+    ret = bdrv_write_zeroes(bs->file, s->cluster_size / BDRV_SECTOR_SIZE,
+                            (2 + l1_clusters) * s->cluster_size /
+                            BDRV_SECTOR_SIZE, 0);
+    /* This call (even if it failed overall) may have overwritten on-disk
+     * refcount structures; in that case, the in-memory refcount information
+     * will probably differ from the on-disk information which makes the BDS
+     * unusable */
+    if (ret < 0) {
+        goto fail_broken_refcounts;
+    }
+
+    BLKDBG_EVENT(bs->file, BLKDBG_L1_UPDATE);
+    BLKDBG_EVENT(bs->file, BLKDBG_REFTABLE_UPDATE);
+
+    /* "Create" an empty reftable (one cluster) directly after the image
+     * header and an empty L1 table three clusters after the image header;
+     * the cluster between those two will be used as the first refblock */
+    cpu_to_be64w(&l1_ofs_rt_ofs_cls.l1_offset, 3 * s->cluster_size);
+    cpu_to_be64w(&l1_ofs_rt_ofs_cls.reftable_offset, s->cluster_size);
+    cpu_to_be32w(&l1_ofs_rt_ofs_cls.reftable_clusters, 1);
+    ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, l1_table_offset),
+                           &l1_ofs_rt_ofs_cls, sizeof(l1_ofs_rt_ofs_cls));
+    if (ret < 0) {
+        goto fail_broken_refcounts;
+    }
+
+    s->l1_table_offset = 3 * s->cluster_size;
+
+    new_reftable = g_try_new0(uint64_t, s->cluster_size / sizeof(uint64_t));
+    if (!new_reftable) {
+        ret = -ENOMEM;
+        goto fail_broken_refcounts;
+    }
+
+    s->refcount_table_offset = s->cluster_size;
+    s->refcount_table_size   = s->cluster_size / sizeof(uint64_t);
+
+    g_free(s->refcount_table);
+    s->refcount_table = new_reftable;
+    new_reftable = NULL;
+
+    /* Now the in-memory refcount information again corresponds to the on-disk
+     * information (reftable is empty and no refblocks (the refblock cache is
+     * empty)); however, this means some clusters (e.g. the image header) are
+     * referenced, but not refcounted, but the normal qcow2 code assumes that
+     * the in-memory information is always correct */
+
+    BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC);
+
+    /* Enter the first refblock into the reftable */
+    rt_entry = cpu_to_be64(2 * s->cluster_size);
+    ret = bdrv_pwrite_sync(bs->file, s->cluster_size,
+                           &rt_entry, sizeof(rt_entry));
+    if (ret < 0) {
+        goto fail_broken_refcounts;
+    }
+    s->refcount_table[0] = 2 * s->cluster_size;
+
+    s->free_cluster_index = 0;
+    assert(3 + l1_clusters <= s->refcount_block_size);
+    offset = qcow2_alloc_clusters(bs, 3 * s->cluster_size + l1_size2);
+    if (offset < 0) {
+        ret = offset;
+        goto fail_broken_refcounts;
+    } else if (offset > 0) {
+        error_report("First cluster in emptied image is in use");
+        abort();
+    }
+
+    /* Now finally the in-memory information corresponds to the on-disk
+     * structures and is correct */
+    ret = qcow2_mark_clean(bs);
+    if (ret < 0) {
+        goto fail;
+    }
+
+    ret = bdrv_truncate(bs->file, (3 + l1_clusters) * s->cluster_size);
+    if (ret < 0) {
+        goto fail;
+    }
+
+    return 0;
+
+fail_broken_refcounts:
+    /* The BDS is unusable at this point. If we wanted to make it usable, we
+     * would have to call qcow2_refcount_close(), qcow2_refcount_init(),
+     * qcow2_check_refcounts(), qcow2_refcount_close() and qcow2_refcount_init()
+     * again. However, because the functions which could have caused this error
+     * path to be taken are used by those functions as well, it's very likely
+     * that that sequence will fail as well. Therefore, just eject the BDS. */
+    bs->drv = NULL;
+
+fail:
+    g_free(new_reftable);
+    return ret;
+}
+
+static int qcow2_make_empty(BlockDriverState *bs)
+{
+    BDRVQcowState *s = bs->opaque;
+    uint64_t start_sector;
+    int sector_step = INT_MAX / BDRV_SECTOR_SIZE;
+    int l1_clusters, ret = 0;
+
+    l1_clusters = DIV_ROUND_UP(s->l1_size, s->cluster_size / sizeof(uint64_t));
+
+    if (s->qcow_version >= 3 && !s->snapshots &&
+        3 + l1_clusters <= s->refcount_block_size) {
+        /* The following function only works for qcow2 v3 images (it requires
+         * the dirty flag) and only as long as there are no snapshots (because
+         * it completely empties the image). Furthermore, the L1 table and three
+         * additional clusters (image header, refcount table, one refcount
+         * block) have to fit inside one refcount block. */
+        return make_completely_empty(bs);
+    }
+
+    /* This fallback code simply discards every active cluster; this is slow,
+     * but works in all cases */
+    for (start_sector = 0; start_sector < bs->total_sectors;
+         start_sector += sector_step)
+    {
+        /* As this function is generally used after committing an external
+         * snapshot, QCOW2_DISCARD_SNAPSHOT seems appropriate. Also, the
+         * default action for this kind of discard is to pass the discard,
+         * which will ideally result in an actually smaller image file, as
+         * is probably desired. */
+        ret = qcow2_discard_clusters(bs, start_sector * BDRV_SECTOR_SIZE,
+                                     MIN(sector_step,
+                                         bs->total_sectors - start_sector),
+                                     QCOW2_DISCARD_SNAPSHOT, true);
+        if (ret < 0) {
+            break;
+        }
+    }
+
+    return ret;
+}
+
 static coroutine_fn int qcow2_co_flush_to_os(BlockDriverState *bs)
 {
    BDRVQcowState *s = bs->opaque;
@@ -2358,7 +2561,8 @@ static int qcow2_load_vmstate(BlockDriverState *bs, uint8_t *buf,
 * Downgrades an image's version. To achieve this, any incompatible features
 * have to be removed.
 */
-static int qcow2_downgrade(BlockDriverState *bs, int target_version)
+static int qcow2_downgrade(BlockDriverState *bs, int target_version,
+                           BlockDriverAmendStatusCB *status_cb)
 {
    BDRVQcowState *s = bs->opaque;
    int current_version = s->qcow_version;
@@ -2407,7 +2611,7 @@ static int qcow2_downgrade(BlockDriverState *bs, int target_version)
    /* clearing autoclear features is trivial */
    s->autoclear_features = 0;

-    ret = qcow2_expand_zero_clusters(bs);
+    ret = qcow2_expand_zero_clusters(bs, status_cb);
    if (ret < 0) {
        return ret;
    }
@@ -2421,7 +2625,8 @@ static int qcow2_downgrade(BlockDriverState *bs, int target_version)
    return 0;
 }

-static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts)
+static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts,
+                               BlockDriverAmendStatusCB *status_cb)
 {
    BDRVQcowState *s = bs->opaque;
    int old_version = s->qcow_version, new_version = old_version;
@@ -2499,7 +2704,7 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts)
                return ret;
            }
        } else {
-            ret = qcow2_downgrade(bs, new_version);
+            ret = qcow2_downgrade(bs, new_version, status_cb);
            if (ret < 0) {
                return ret;
            }
@@ -2653,7 +2858,7 @@ static QemuOptsList qcow2_create_opts = {
    }
 };

-static BlockDriver bdrv_qcow2 = {
+BlockDriver bdrv_qcow2 = {
    .format_name        = "qcow2",
    .instance_size      = sizeof(BDRVQcowState),
    .bdrv_probe         = qcow2_probe,
@@ -2673,6 +2878,7 @@ static BlockDriver bdrv_qcow2 = {
    .bdrv_co_discard        = qcow2_co_discard,
    .bdrv_truncate          = qcow2_truncate,
    .bdrv_write_compressed  = qcow2_write_compressed,
+    .bdrv_make_empty        = qcow2_make_empty,

    .bdrv_snapshot_create   = qcow2_snapshot_create,
    .bdrv_snapshot_goto     = qcow2_snapshot_goto,
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -59,8 +59,6 @@
 /* The cluster reads as all zeros */
 #define QCOW_OFLAG_ZERO (1ULL << 0)

-#define REFCOUNT_SHIFT 1 /* refcount size is 2 bytes */
-
 #define MIN_CLUSTER_BITS 9
 #define MAX_CLUSTER_BITS 21

@@ -223,6 +221,8 @@ typedef struct BDRVQcowState {
    int l2_size;
    int l1_size;
    int l1_vm_state_index;
+    int refcount_block_bits;
+    int refcount_block_size;
    int csize_shift;
    int csize_mask;
    uint64_t cluster_offset_mask;
@@ -487,6 +487,8 @@ void qcow2_signal_corruption(BlockDriverState *bs, bool fatal, int64_t offset,
 int qcow2_refcount_init(BlockDriverState *bs);
 void qcow2_refcount_close(BlockDriverState *bs);

+int qcow2_get_refcount(BlockDriverState *bs, int64_t cluster_index);
+
 int qcow2_update_cluster_refcount(BlockDriverState *bs, int64_t cluster_index,
                                  int addend, enum qcow2_discard_type type);

@@ -534,10 +536,11 @@ uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,

 int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m);
 int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset,
-    int nb_sectors, enum qcow2_discard_type type);
+    int nb_sectors, enum qcow2_discard_type type, bool full_discard);
 int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors);

-int qcow2_expand_zero_clusters(BlockDriverState *bs);
+int qcow2_expand_zero_clusters(BlockDriverState *bs,
+                               BlockDriverAmendStatusCB *status_cb);

 /* qcow2-snapshot.c functions */
 int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info);
--- a/block/qed-gencb.c
+++ b/block/qed-gencb.c
@@ -13,7 +13,7 @@

 #include "qed.h"

-void *gencb_alloc(size_t len, BlockDriverCompletionFunc *cb, void *opaque)
+void *gencb_alloc(size_t len, BlockCompletionFunc *cb, void *opaque)
 {
    GenericCB *gencb = g_malloc(len);
    gencb->cb = cb;
@@ -24,7 +24,7 @@ void *gencb_alloc(size_t len, BlockDriverCompletionFunc *cb, void *opaque)
 void gencb_complete(void *opaque, int ret)
 {
    GenericCB *gencb = opaque;
-    BlockDriverCompletionFunc *cb = gencb->cb;
+    BlockCompletionFunc *cb = gencb->cb;
    void *user_opaque = gencb->opaque;

    g_free(gencb);
--- a/block/qed-table.c
+++ b/block/qed-table.c
@@ -49,7 +49,7 @@ out:
 }

 static void qed_read_table(BDRVQEDState *s, uint64_t offset, QEDTable *table,
-                           BlockDriverCompletionFunc *cb, void *opaque)
+                           BlockCompletionFunc *cb, void *opaque)
 {
    QEDReadTableCB *read_table_cb = gencb_alloc(sizeof(*read_table_cb),
                                                cb, opaque);
@@ -119,7 +119,7 @@ out:
 */
 static void qed_write_table(BDRVQEDState *s, uint64_t offset, QEDTable *table,
                            unsigned int index, unsigned int n, bool flush,
-                            BlockDriverCompletionFunc *cb, void *opaque)
+                            BlockCompletionFunc *cb, void *opaque)
 {
    QEDWriteTableCB *write_table_cb;
    unsigned int sector_mask = BDRV_SECTOR_SIZE / sizeof(uint64_t) - 1;
@@ -180,7 +180,7 @@ int qed_read_l1_table_sync(BDRVQEDState *s)
 }

 void qed_write_l1_table(BDRVQEDState *s, unsigned int index, unsigned int n,
-                        BlockDriverCompletionFunc *cb, void *opaque)
+                        BlockCompletionFunc *cb, void *opaque)
 {
    BLKDBG_EVENT(s->bs->file, BLKDBG_L1_UPDATE);
    qed_write_table(s, s->header.l1_table_offset,
@@ -235,7 +235,7 @@ static void qed_read_l2_table_cb(void *opaque, int ret)
 }

 void qed_read_l2_table(BDRVQEDState *s, QEDRequest *request, uint64_t offset,
-                       BlockDriverCompletionFunc *cb, void *opaque)
+                       BlockCompletionFunc *cb, void *opaque)
 {
    QEDReadL2TableCB *read_l2_table_cb;

@@ -275,7 +275,7 @@ int qed_read_l2_table_sync(BDRVQEDState *s, QEDRequest *request, uint64_t offset

 void qed_write_l2_table(BDRVQEDState *s, QEDRequest *request,
                        unsigned int index, unsigned int n, bool flush,
-                        BlockDriverCompletionFunc *cb, void *opaque)
+                        BlockCompletionFunc *cb, void *opaque)
 {
    BLKDBG_EVENT(s->bs->file, BLKDBG_L2_UPDATE);
    qed_write_table(s, request->l2_table->offset,
--- a/block/qed.c
+++ b/block/qed.c
@@ -130,7 +130,7 @@ static void qed_write_header_read_cb(void *opaque, int ret)
 * This function only updates known header fields in-place and does not affect
 * extra data after the QED header.
 */
-static void qed_write_header(BDRVQEDState *s, BlockDriverCompletionFunc cb,
+static void qed_write_header(BDRVQEDState *s, BlockCompletionFunc cb,
                             void *opaque)
 {
    /* We must write full sectors for O_DIRECT but cannot necessarily generate
@@ -408,7 +408,7 @@ static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
        snprintf(buf, sizeof(buf), "%" PRIx64,
            s->header.features & ~QED_FEATURE_MASK);
        error_set(errp, QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
-            bs->device_name, "QED", buf);
+            bdrv_get_device_name(bs), "QED", buf);
        return -ENOTSUP;
    }
    if (!qed_is_cluster_size_valid(s->header.cluster_size)) {
@@ -759,7 +759,7 @@ static BDRVQEDState *acb_to_s(QEDAIOCB *acb)
 static void qed_read_backing_file(BDRVQEDState *s, uint64_t pos,
                                  QEMUIOVector *qiov,
                                  QEMUIOVector **backing_qiov,
-                                  BlockDriverCompletionFunc *cb, void *opaque)
+                                  BlockCompletionFunc *cb, void *opaque)
 {
    uint64_t backing_length = 0;
    size_t size;
@@ -851,7 +851,7 @@ static void qed_copy_from_backing_file_write(void *opaque, int ret)
 */
 static void qed_copy_from_backing_file(BDRVQEDState *s, uint64_t pos,
                                       uint64_t len, uint64_t offset,
-                                       BlockDriverCompletionFunc *cb,
+                                       BlockCompletionFunc *cb,
                                       void *opaque)
 {
    CopyFromBackingFileCB *copy_cb;
@@ -902,7 +902,7 @@ static void qed_update_l2_table(BDRVQEDState *s, QEDTable *table, int index,
 static void qed_aio_complete_bh(void *opaque)
 {
    QEDAIOCB *acb = opaque;
-    BlockDriverCompletionFunc *cb = acb->common.cb;
+    BlockCompletionFunc *cb = acb->common.cb;
    void *user_opaque = acb->common.opaque;
    int ret = acb->bh_ret;

@@ -1064,7 +1064,7 @@ static void qed_aio_write_main(void *opaque, int ret)
    BDRVQEDState *s = acb_to_s(acb);
    uint64_t offset = acb->cur_cluster +
                      qed_offset_into_cluster(s, acb->cur_pos);
-    BlockDriverCompletionFunc *next_fn;
+    BlockCompletionFunc *next_fn;

    trace_qed_aio_write_main(s, acb, ret, offset, acb->cur_qiov.size);

@@ -1164,7 +1164,7 @@ static void qed_aio_write_zero_cluster(void *opaque, int ret)
 static void qed_aio_write_alloc(QEDAIOCB *acb, size_t len)
 {
    BDRVQEDState *s = acb_to_s(acb);
-    BlockDriverCompletionFunc *cb;
+    BlockCompletionFunc *cb;

    /* Cancel timer when the first allocating request comes in */
    if (QSIMPLEQ_EMPTY(&s->allocating_write_reqs)) {
@@ -1365,11 +1365,11 @@ static void qed_aio_next_io(void *opaque, int ret)
                      io_fn, acb);
 }

-static BlockDriverAIOCB *qed_aio_setup(BlockDriverState *bs,
-                                       int64_t sector_num,
-                                       QEMUIOVector *qiov, int nb_sectors,
-                                       BlockDriverCompletionFunc *cb,
-                                       void *opaque, int flags)
+static BlockAIOCB *qed_aio_setup(BlockDriverState *bs,
+                                 int64_t sector_num,
+                                 QEMUIOVector *qiov, int nb_sectors,
+                                 BlockCompletionFunc *cb,
+                                 void *opaque, int flags)
 {
    QEDAIOCB *acb = qemu_aio_get(&qed_aiocb_info, bs, cb, opaque);

@@ -1390,20 +1390,20 @@ static BlockDriverAIOCB *qed_aio_setup(BlockDriverState *bs,
    return &acb->common;
 }

-static BlockDriverAIOCB *bdrv_qed_aio_readv(BlockDriverState *bs,
-                                            int64_t sector_num,
-                                            QEMUIOVector *qiov, int nb_sectors,
-                                            BlockDriverCompletionFunc *cb,
-                                            void *opaque)
+static BlockAIOCB *bdrv_qed_aio_readv(BlockDriverState *bs,
+                                      int64_t sector_num,
+                                      QEMUIOVector *qiov, int nb_sectors,
+                                      BlockCompletionFunc *cb,
+                                      void *opaque)
 {
    return qed_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
 }

-static BlockDriverAIOCB *bdrv_qed_aio_writev(BlockDriverState *bs,
-                                             int64_t sector_num,
-                                             QEMUIOVector *qiov, int nb_sectors,
-                                             BlockDriverCompletionFunc *cb,
-                                             void *opaque)
+static BlockAIOCB *bdrv_qed_aio_writev(BlockDriverState *bs,
+                                       int64_t sector_num,
+                                       QEMUIOVector *qiov, int nb_sectors,
+                                       BlockCompletionFunc *cb,
+                                       void *opaque)
 {
    return qed_aio_setup(bs, sector_num, qiov, nb_sectors, cb,
                         opaque, QED_AIOCB_WRITE);
@@ -1431,7 +1431,7 @@ static int coroutine_fn bdrv_qed_co_write_zeroes(BlockDriverState *bs,
                                                 int nb_sectors,
                                                 BdrvRequestFlags flags)
 {
-    BlockDriverAIOCB *blockacb;
+    BlockAIOCB *blockacb;
    BDRVQEDState *s = bs->opaque;
    QEDWriteZeroesCB cb = { .done = false };
    QEMUIOVector qiov;
--- a/block/qed.h
+++ b/block/qed.h
@@ -128,7 +128,7 @@ enum {
 };

 typedef struct QEDAIOCB {
-    BlockDriverAIOCB common;
+    BlockAIOCB common;
    QEMUBH *bh;
    int bh_ret;                     /* final return status for completion bh */
    QSIMPLEQ_ENTRY(QEDAIOCB) next;  /* next request */
@@ -203,11 +203,11 @@ typedef void QEDFindClusterFunc(void *opaque, int ret, uint64_t offset, size_t l
 * Generic callback for chaining async callbacks
 */
 typedef struct {
-    BlockDriverCompletionFunc *cb;
+    BlockCompletionFunc *cb;
    void *opaque;
 } GenericCB;

-void *gencb_alloc(size_t len, BlockDriverCompletionFunc *cb, void *opaque);
+void *gencb_alloc(size_t len, BlockCompletionFunc *cb, void *opaque);
 void gencb_complete(void *opaque, int ret);

 /**
@@ -230,16 +230,16 @@ void qed_commit_l2_cache_entry(L2TableCache *l2_cache, CachedL2Table *l2_table);
 */
 int qed_read_l1_table_sync(BDRVQEDState *s);
 void qed_write_l1_table(BDRVQEDState *s, unsigned int index, unsigned int n,
-                        BlockDriverCompletionFunc *cb, void *opaque);
+                        BlockCompletionFunc *cb, void *opaque);
 int qed_write_l1_table_sync(BDRVQEDState *s, unsigned int index,
                            unsigned int n);
 int qed_read_l2_table_sync(BDRVQEDState *s, QEDRequest *request,
                           uint64_t offset);
 void qed_read_l2_table(BDRVQEDState *s, QEDRequest *request, uint64_t offset,
-                       BlockDriverCompletionFunc *cb, void *opaque);
+                       BlockCompletionFunc *cb, void *opaque);
 void qed_write_l2_table(BDRVQEDState *s, QEDRequest *request,
                        unsigned int index, unsigned int n, bool flush,
-                        BlockDriverCompletionFunc *cb, void *opaque);
+                        BlockCompletionFunc *cb, void *opaque);
 int qed_write_l2_table_sync(BDRVQEDState *s, QEDRequest *request,
                            unsigned int index, unsigned int n, bool flush);

--- a/block/quorum.c
+++ b/block/quorum.c
@@ -92,7 +92,7 @@ typedef struct QuorumAIOCB QuorumAIOCB;
 * $children_count QuorumChildRequest.
 */
 typedef struct QuorumChildRequest {
-    BlockDriverAIOCB *aiocb;
+    BlockAIOCB *aiocb;
    QEMUIOVector qiov;
    uint8_t *buf;
    int ret;
@@ -105,7 +105,7 @@ typedef struct QuorumChildRequest {
 * used to do operations on each children and track overall progress.
 */
 struct QuorumAIOCB {
-    BlockDriverAIOCB common;
+    BlockAIOCB common;

    /* Request metadata */
    uint64_t sector_num;
@@ -130,7 +130,7 @@ struct QuorumAIOCB {

 static bool quorum_vote(QuorumAIOCB *acb);

-static void quorum_aio_cancel(BlockDriverAIOCB *blockacb)
+static void quorum_aio_cancel(BlockAIOCB *blockacb)
 {
    QuorumAIOCB *acb = container_of(blockacb, QuorumAIOCB, common);
    BDRVQuorumState *s = acb->common.bs->opaque;
@@ -186,7 +186,7 @@ static QuorumAIOCB *quorum_aio_get(BDRVQuorumState *s,
                                   QEMUIOVector *qiov,
                                   uint64_t sector_num,
                                   int nb_sectors,
-                                   BlockDriverCompletionFunc *cb,
+                                   BlockCompletionFunc *cb,
                                   void *opaque)
 {
    QuorumAIOCB *acb = qemu_aio_get(&quorum_aiocb_info, bs, cb, opaque);
@@ -226,8 +226,8 @@ static void quorum_report_bad(QuorumAIOCB *acb, char *node_name, int ret)

 static void quorum_report_failure(QuorumAIOCB *acb)
 {
-    const char *reference = acb->common.bs->device_name[0] ?
-                            acb->common.bs->device_name :
+    const char *reference = bdrv_get_device_name(acb->common.bs)[0] ?
+                            bdrv_get_device_name(acb->common.bs) :
                            acb->common.bs->node_name;

    qapi_event_send_quorum_failure(reference, acb->sector_num,
@@ -264,7 +264,7 @@ static void quorum_rewrite_aio_cb(void *opaque, int ret)
    quorum_aio_finalize(acb);
 }

-static BlockDriverAIOCB *read_fifo_child(QuorumAIOCB *acb);
+static BlockAIOCB *read_fifo_child(QuorumAIOCB *acb);

 static void quorum_copy_qiov(QEMUIOVector *dest, QEMUIOVector *source)
 {
@@ -640,7 +640,7 @@ free_exit:
    return rewrite;
 }

-static BlockDriverAIOCB *read_quorum_children(QuorumAIOCB *acb)
+static BlockAIOCB *read_quorum_children(QuorumAIOCB *acb)
 {
    BDRVQuorumState *s = acb->common.bs->opaque;
    int i;
@@ -659,7 +659,7 @@ static BlockDriverAIOCB *read_quorum_children(QuorumAIOCB *acb)
    return &acb->common;
 }

-static BlockDriverAIOCB *read_fifo_child(QuorumAIOCB *acb)
+static BlockAIOCB *read_fifo_child(QuorumAIOCB *acb)
 {
    BDRVQuorumState *s = acb->common.bs->opaque;

@@ -675,12 +675,12 @@ static BlockDriverAIOCB *read_fifo_child(QuorumAIOCB *acb)
    return &acb->common;
 }

-static BlockDriverAIOCB *quorum_aio_readv(BlockDriverState *bs,
-                                          int64_t sector_num,
-                                          QEMUIOVector *qiov,
-                                          int nb_sectors,
-                                          BlockDriverCompletionFunc *cb,
-                                          void *opaque)
+static BlockAIOCB *quorum_aio_readv(BlockDriverState *bs,
+                                    int64_t sector_num,
+                                    QEMUIOVector *qiov,
+                                    int nb_sectors,
+                                    BlockCompletionFunc *cb,
+                                    void *opaque)
 {
    BDRVQuorumState *s = bs->opaque;
    QuorumAIOCB *acb = quorum_aio_get(s, bs, qiov, sector_num,
@@ -696,12 +696,12 @@ static BlockDriverAIOCB *quorum_aio_readv(BlockDriverState *bs,
    return read_fifo_child(acb);
 }

-static BlockDriverAIOCB *quorum_aio_writev(BlockDriverState *bs,
-                                          int64_t sector_num,
-                                          QEMUIOVector *qiov,
-                                          int nb_sectors,
-                                          BlockDriverCompletionFunc *cb,
-                                          void *opaque)
+static BlockAIOCB *quorum_aio_writev(BlockDriverState *bs,
+                                     int64_t sector_num,
+                                     QEMUIOVector *qiov,
+                                     int nb_sectors,
+                                     BlockCompletionFunc *cb,
+                                     void *opaque)
 {
    BDRVQuorumState *s = bs->opaque;
    QuorumAIOCB *acb = quorum_aio_get(s, bs, qiov, sector_num, nb_sectors,
--- a/block/raw-aio.h
+++ b/block/raw-aio.h
@@ -35,13 +35,13 @@
 #ifdef CONFIG_LINUX_AIO
 void *laio_init(void);
 void laio_cleanup(void *s);
-BlockDriverAIOCB *laio_submit(BlockDriverState *bs, void *aio_ctx, int fd,
+BlockAIOCB *laio_submit(BlockDriverState *bs, void *aio_ctx, int fd,
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockDriverCompletionFunc *cb, void *opaque, int type);
+        BlockCompletionFunc *cb, void *opaque, int type);
 void laio_detach_aio_context(void *s, AioContext *old_context);
 void laio_attach_aio_context(void *s, AioContext *new_context);
 void laio_io_plug(BlockDriverState *bs, void *aio_ctx);
-int laio_io_unplug(BlockDriverState *bs, void *aio_ctx, bool unplug);
+void laio_io_unplug(BlockDriverState *bs, void *aio_ctx, bool unplug);
 #endif

 #ifdef _WIN32
@@ -49,10 +49,10 @@ typedef struct QEMUWin32AIOState QEMUWin32AIOState;
 QEMUWin32AIOState *win32_aio_init(void);
 void win32_aio_cleanup(QEMUWin32AIOState *aio);
 int win32_aio_attach(QEMUWin32AIOState *aio, HANDLE hfile);
-BlockDriverAIOCB *win32_aio_submit(BlockDriverState *bs,
+BlockAIOCB *win32_aio_submit(BlockDriverState *bs,
        QEMUWin32AIOState *aio, HANDLE hfile,
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockDriverCompletionFunc *cb, void *opaque, int type);
+        BlockCompletionFunc *cb, void *opaque, int type);
 void win32_aio_detach_aio_context(QEMUWin32AIOState *aio,
                                  AioContext *old_context);
 void win32_aio_attach_aio_context(QEMUWin32AIOState *aio,
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -60,9 +60,6 @@
 #define FS_NOCOW_FL                     0x00800000 /* Do not cow file */
 #endif
 #endif
-#ifdef CONFIG_FIEMAP
-#include <linux/fiemap.h>
-#endif
 #ifdef CONFIG_FALLOCATE_PUNCH_HOLE
 #include <linux/falloc.h>
 #endif
@@ -150,9 +147,7 @@ typedef struct BDRVRawState {
    bool has_discard:1;
    bool has_write_zeroes:1;
    bool discard_zeroes:1;
-#ifdef CONFIG_FIEMAP
-    bool skip_fiemap;
-#endif
+    bool needs_alignment;
 } BDRVRawState;

 typedef struct BDRVRawReopenState {
@@ -230,7 +225,7 @@ static void raw_probe_alignment(BlockDriverState *bs, int fd, Error **errp)

    /* For /dev/sg devices the alignment is not really used.
       With buffered I/O, we don't have any restrictions. */
-    if (bs->sg || !(s->open_flags & O_DIRECT)) {
+    if (bs->sg || !s->needs_alignment) {
        bs->request_alignment = 1;
        s->buf_align = 1;
        return;
@@ -446,8 +441,12 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,

    s->has_discard = true;
    s->has_write_zeroes = true;
+    if ((bs->open_flags & BDRV_O_NOCACHE) != 0) {
+        s->needs_alignment = true;
+    }

    if (fstat(s->fd, &st) < 0) {
+        ret = -errno;
        error_setg_errno(errp, errno, "Could not stat file");
        goto fail;
    }
@@ -472,6 +471,17 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
        }
 #endif
    }
+#ifdef __FreeBSD__
+    if (S_ISCHR(st.st_mode)) {
+        /*
+         * The file is a char device (disk), which on FreeBSD isn't behind
+         * a pager, so force all requests to be aligned. This is needed
+         * so QEMU makes sure all IO operations on the device are aligned
+         * to sector size, or else FreeBSD will reject them with EINVAL.
+         */
+        s->needs_alignment = true;
+    }
+#endif

 #ifdef CONFIG_XFS
    if (platform_test_xfs_fd(s->fd)) {
@@ -1041,9 +1051,9 @@ static int paio_submit_co(BlockDriverState *bs, int fd,
    return thread_pool_submit_co(pool, aio_worker, acb);
 }

-static BlockDriverAIOCB *paio_submit(BlockDriverState *bs, int fd,
+static BlockAIOCB *paio_submit(BlockDriverState *bs, int fd,
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockDriverCompletionFunc *cb, void *opaque, int type)
+        BlockCompletionFunc *cb, void *opaque, int type)
 {
    RawPosixAIOData *acb = g_slice_new(RawPosixAIOData);
    ThreadPool *pool;
@@ -1066,9 +1076,9 @@ static BlockDriverAIOCB *paio_submit(BlockDriverState *bs, int fd,
    return thread_pool_submit_aio(pool, aio_worker, acb, cb, opaque);
 }

-static BlockDriverAIOCB *raw_aio_submit(BlockDriverState *bs,
+static BlockAIOCB *raw_aio_submit(BlockDriverState *bs,
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockDriverCompletionFunc *cb, void *opaque, int type)
+        BlockCompletionFunc *cb, void *opaque, int type)
 {
    BDRVRawState *s = bs->opaque;

@@ -1076,11 +1086,12 @@ static BlockDriverAIOCB *raw_aio_submit(BlockDriverState *bs,
        return NULL;

    /*
-     * If O_DIRECT is used the buffer needs to be aligned on a sector
-     * boundary.  Check if this is the case or tell the low-level
-     * driver that it needs to copy the buffer.
+     * Check if the underlying device requires requests to be aligned,
+     * and if the request we are trying to submit is aligned or not.
+     * If this is the case tell the low-level driver that it needs
+     * to copy the buffer.
     */
-    if ((bs->open_flags & BDRV_O_NOCACHE)) {
+    if (s->needs_alignment) {
        if (!bdrv_qiov_is_aligned(bs, qiov)) {
            type |= QEMU_AIO_MISALIGNED;
 #ifdef CONFIG_LINUX_AIO
@@ -1125,24 +1136,24 @@ static void raw_aio_flush_io_queue(BlockDriverState *bs)
 #endif
 }

-static BlockDriverAIOCB *raw_aio_readv(BlockDriverState *bs,
+static BlockAIOCB *raw_aio_readv(BlockDriverState *bs,
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockDriverCompletionFunc *cb, void *opaque)
+        BlockCompletionFunc *cb, void *opaque)
 {
    return raw_aio_submit(bs, sector_num, qiov, nb_sectors,
                          cb, opaque, QEMU_AIO_READ);
 }

-static BlockDriverAIOCB *raw_aio_writev(BlockDriverState *bs,
+static BlockAIOCB *raw_aio_writev(BlockDriverState *bs,
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockDriverCompletionFunc *cb, void *opaque)
+        BlockCompletionFunc *cb, void *opaque)
 {
    return raw_aio_submit(bs, sector_num, qiov, nb_sectors,
                          cb, opaque, QEMU_AIO_WRITE);
 }

-static BlockDriverAIOCB *raw_aio_flush(BlockDriverState *bs,
-        BlockDriverCompletionFunc *cb, void *opaque)
+static BlockAIOCB *raw_aio_flush(BlockDriverState *bs,
+        BlockCompletionFunc *cb, void *opaque)
 {
    BDRVRawState *s = bs->opaque;

@@ -1441,9 +1452,16 @@ static int raw_create(const char *filename, QemuOpts *opts, Error **errp)
                                 "Could not write to the new file");
                break;
            }
-            left -= num;
+            left -= result;
+        }
+        if (result >= 0) {
+            result = fsync(fd);
+            if (result < 0) {
+                result = -errno;
+                error_setg_errno(errp, -result,
+                                 "Could not flush new file to disk");
+            }
        }
-        fsync(fd);
        g_free(buf);
        break;
    }
@@ -1465,87 +1483,93 @@ out:
    return result;
 }

-static int64_t try_fiemap(BlockDriverState *bs, off_t start, off_t *data,
-                          off_t *hole, int nb_sectors, int *pnum)
-{
-#ifdef CONFIG_FIEMAP
-    BDRVRawState *s = bs->opaque;
-    int64_t ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | start;
-    struct {
-        struct fiemap fm;
-        struct fiemap_extent fe;
-    } f;
-
-    if (s->skip_fiemap) {
-        return -ENOTSUP;
-    }
-
-    f.fm.fm_start = start;
-    f.fm.fm_length = (int64_t)nb_sectors * BDRV_SECTOR_SIZE;
-    f.fm.fm_flags = 0;
-    f.fm.fm_extent_count = 1;
-    f.fm.fm_reserved = 0;
-    if (ioctl(s->fd, FS_IOC_FIEMAP, &f) == -1) {
-        s->skip_fiemap = true;
-        return -errno;
-    }
-
-    if (f.fm.fm_mapped_extents == 0) {
-        /* No extents found, data is beyond f.fm.fm_start + f.fm.fm_length.
-         * f.fm.fm_start + f.fm.fm_length must be clamped to the file size!
-         */
-        off_t length = lseek(s->fd, 0, SEEK_END);
-        *hole = f.fm.fm_start;
-        *data = MIN(f.fm.fm_start + f.fm.fm_length, length);
-    } else {
-        *data = f.fe.fe_logical;
-        *hole = f.fe.fe_logical + f.fe.fe_length;
-        if (f.fe.fe_flags & FIEMAP_EXTENT_UNWRITTEN) {
-            ret |= BDRV_BLOCK_ZERO;
-        }
-    }
-
-    return ret;
-#else
-    return -ENOTSUP;
-#endif
-}
-
-static int64_t try_seek_hole(BlockDriverState *bs, off_t start, off_t *data,
-                             off_t *hole, int *pnum)
+/*
+ * Find allocation range in @bs around offset @start.
+ * May change underlying file descriptor's file offset.
+ * If @start is not in a hole, store @start in @data, and the
+ * beginning of the next hole in @hole, and return 0.
+ * If @start is in a non-trailing hole, store @start in @hole and the
+ * beginning of the next non-hole in @data, and return 0.
+ * If @start is in a trailing hole or beyond EOF, return -ENXIO.
+ * If we can't find out, return a negative errno other than -ENXIO.
+ */
+static int find_allocation(BlockDriverState *bs, off_t start,
+                           off_t *data, off_t *hole)
 {
 #if defined SEEK_HOLE && defined SEEK_DATA
    BDRVRawState *s = bs->opaque;
+    off_t offs;

-    *hole = lseek(s->fd, start, SEEK_HOLE);
-    if (*hole == -1) {
-        /* -ENXIO indicates that sector_num was past the end of the file.
-         * There is a virtual hole there.  */
-        assert(errno != -ENXIO);
+    /*
+     * SEEK_DATA cases:
+     * D1. offs == start: start is in data
+     * D2. offs > start: start is in a hole, next data at offs
+     * D3. offs < 0, errno = ENXIO: either start is in a trailing hole
+     *                              or start is beyond EOF
+     *     If the latter happens, the file has been truncated behind
+     *     our back since we opened it.  All bets are off then.
+     *     Treating like a trailing hole is simplest.
+     * D4. offs < 0, errno != ENXIO: we learned nothing
+     */
+    offs = lseek(s->fd, start, SEEK_DATA);
+    if (offs < 0) {
+        return -errno;          /* D3 or D4 */
+    }
+    assert(offs >= start);

-        return -errno;
+    if (offs > start) {
+        /* D2: in hole, next data at offs */
+        *hole = start;
+        *data = offs;
+        return 0;
    }

-    if (*hole > start) {
+    /* D1: in data, end not yet known */
+
+    /*
+     * SEEK_HOLE cases:
+     * H1. offs == start: start is in a hole
+     *     If this happens here, a hole has been dug behind our back
+     *     since the previous lseek().
+     * H2. offs > start: either start is in data, next hole at offs,
+     *                   or start is in trailing hole, EOF at offs
+     *     Linux treats trailing holes like any other hole: offs ==
+     *     start.  Solaris seeks to EOF instead: offs > start (blech).
+     *     If that happens here, a hole has been dug behind our back
+     *     since the previous lseek().
+     * H3. offs < 0, errno = ENXIO: start is beyond EOF
+     *     If this happens, the file has been truncated behind our
+     *     back since we opened it.  Treat it like a trailing hole.
+     * H4. offs < 0, errno != ENXIO: we learned nothing
+     *     Pretend we know nothing at all, i.e. "forget" about D1.
+     */
+    offs = lseek(s->fd, start, SEEK_HOLE);
+    if (offs < 0) {
+        return -errno;          /* D1 and (H3 or H4) */
+    }
+    assert(offs >= start);
+
+    if (offs > start) {
+        /*
+         * D1 and H2: either in data, next hole at offs, or it was in
+         * data but is now in a trailing hole.  In the latter case,
+         * all bets are off.  Treating it as if it there was data all
+         * the way to EOF is safe, so simply do that.
+         */
        *data = start;
-    } else {
-        /* On a hole.  We need another syscall to find its end.  */
-        *data = lseek(s->fd, start, SEEK_DATA);
-        if (*data == -1) {
-            *data = lseek(s->fd, 0, SEEK_END);
-        }
+        *hole = offs;
+        return 0;
    }

-    return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | start;
+    /* D1 and H1 */
+    return -EBUSY;
 #else
    return -ENOTSUP;
 #endif
 }

 /*
- * Returns true iff the specified sector is present in the disk image. Drivers
- * not implementing the functionality are assumed to not support backing files,
- * hence all their sectors are reported as allocated.
+ * Returns the allocation status of the specified sectors.
 *
 * If 'sector_num' is beyond the end of the disk image the return value is 0
 * and 'pnum' is set to 0.
@@ -1562,7 +1586,8 @@ static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs,
                                                    int nb_sectors, int *pnum)
 {
    off_t start, data = 0, hole = 0;
-    int64_t ret;
+    int64_t total_size;
+    int ret;

    ret = fd_open(bs);
    if (ret < 0) {
@@ -1570,34 +1595,41 @@ static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs,
    }

    start = sector_num * BDRV_SECTOR_SIZE;
-
-    ret = try_fiemap(bs, start, &data, &hole, nb_sectors, pnum);
-    if (ret < 0) {
-        ret = try_seek_hole(bs, start, &data, &hole, pnum);
-        if (ret < 0) {
-            /* Assume everything is allocated. */
-            data = 0;
-            hole = start + nb_sectors * BDRV_SECTOR_SIZE;
-            ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | start;
-        }
+    total_size = bdrv_getlength(bs);
+    if (total_size < 0) {
+        return total_size;
+    } else if (start >= total_size) {
+        *pnum = 0;
+        return 0;
+    } else if (start + nb_sectors * BDRV_SECTOR_SIZE > total_size) {
+        nb_sectors = DIV_ROUND_UP(total_size - start, BDRV_SECTOR_SIZE);
    }

-    if (data <= start) {
+    ret = find_allocation(bs, start, &data, &hole);
+    if (ret == -ENXIO) {
+        /* Trailing hole */
+        *pnum = nb_sectors;
+        ret = BDRV_BLOCK_ZERO;
+    } else if (ret < 0) {
+        /* No info available, so pretend there are no holes */
+        *pnum = nb_sectors;
+        ret = BDRV_BLOCK_DATA;
+    } else if (data == start) {
        /* On a data extent, compute sectors to the end of the extent.  */
        *pnum = MIN(nb_sectors, (hole - start) / BDRV_SECTOR_SIZE);
+        ret = BDRV_BLOCK_DATA;
    } else {
        /* On a hole, compute sectors to the beginning of the next extent.  */
+        assert(hole == start);
        *pnum = MIN(nb_sectors, (data - start) / BDRV_SECTOR_SIZE);
-        ret &= ~BDRV_BLOCK_DATA;
-        ret |= BDRV_BLOCK_ZERO;
+        ret = BDRV_BLOCK_ZERO;
    }
-
-    return ret;
+    return ret | BDRV_BLOCK_OFFSET_VALID | start;
 }

-static coroutine_fn BlockDriverAIOCB *raw_aio_discard(BlockDriverState *bs,
+static coroutine_fn BlockAIOCB *raw_aio_discard(BlockDriverState *bs,
    int64_t sector_num, int nb_sectors,
-    BlockDriverCompletionFunc *cb, void *opaque)
+    BlockCompletionFunc *cb, void *opaque)
 {
    BDRVRawState *s = bs->opaque;

@@ -1653,7 +1685,7 @@ static QemuOptsList raw_create_opts = {
    }
 };

-static BlockDriver bdrv_file = {
+BlockDriver bdrv_file = {
    .format_name = "file",
    .protocol_name = "file",
    .instance_size = sizeof(BDRVRawState),
@@ -1891,7 +1923,7 @@ static int fd_open(BlockDriverState *bs)
        return 0;
    last_media_present = (s->fd >= 0);
    if (s->fd >= 0 &&
-        (get_clock() - s->fd_open_time) >= FD_OPEN_TIMEOUT) {
+        (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - s->fd_open_time) >= FD_OPEN_TIMEOUT) {
        qemu_close(s->fd);
        s->fd = -1;
 #ifdef DEBUG_FLOPPY
@@ -1900,7 +1932,7 @@ static int fd_open(BlockDriverState *bs)
    }
    if (s->fd < 0) {
        if (s->fd_got_error &&
-            (get_clock() - s->fd_error_time) < FD_OPEN_TIMEOUT) {
+            (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - s->fd_error_time) < FD_OPEN_TIMEOUT) {
 #ifdef DEBUG_FLOPPY
            printf("No floppy (open delayed)\n");
 #endif
@@ -1908,7 +1940,7 @@ static int fd_open(BlockDriverState *bs)
        }
        s->fd = qemu_open(bs->filename, s->open_flags & ~O_NONBLOCK);
        if (s->fd < 0) {
-            s->fd_error_time = get_clock();
+            s->fd_error_time = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
            s->fd_got_error = 1;
            if (last_media_present)
                s->fd_media_changed = 1;
@@ -1923,7 +1955,7 @@ static int fd_open(BlockDriverState *bs)
    }
    if (!last_media_present)
        s->fd_media_changed = 1;
-    s->fd_open_time = get_clock();
+    s->fd_open_time = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
    s->fd_got_error = 0;
    return 0;
 }
@@ -1935,9 +1967,9 @@ static int hdev_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
    return ioctl(s->fd, req, buf);
 }

-static BlockDriverAIOCB *hdev_aio_ioctl(BlockDriverState *bs,
+static BlockAIOCB *hdev_aio_ioctl(BlockDriverState *bs,
        unsigned long int req, void *buf,
-        BlockDriverCompletionFunc *cb, void *opaque)
+        BlockCompletionFunc *cb, void *opaque)
 {
    BDRVRawState *s = bs->opaque;
    RawPosixAIOData *acb;
@@ -1976,9 +2008,9 @@ static int fd_open(BlockDriverState *bs)

 #endif /* !linux && !FreeBSD */

-static coroutine_fn BlockDriverAIOCB *hdev_aio_discard(BlockDriverState *bs,
+static coroutine_fn BlockAIOCB *hdev_aio_discard(BlockDriverState *bs,
    int64_t sector_num, int nb_sectors,
-    BlockDriverCompletionFunc *cb, void *opaque)
+    BlockCompletionFunc *cb, void *opaque)
 {
    BDRVRawState *s = bs->opaque;

--- a/block/raw-win32.c
+++ b/block/raw-win32.c
@@ -138,9 +138,9 @@ static int aio_worker(void *arg)
    return ret;
 }

-static BlockDriverAIOCB *paio_submit(BlockDriverState *bs, HANDLE hfile,
+static BlockAIOCB *paio_submit(BlockDriverState *bs, HANDLE hfile,
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockDriverCompletionFunc *cb, void *opaque, int type)
+        BlockCompletionFunc *cb, void *opaque, int type)
 {
    RawWin32AIOData *acb = g_slice_new(RawWin32AIOData);
    ThreadPool *pool;
@@ -369,9 +369,9 @@ fail:
    return ret;
 }

-static BlockDriverAIOCB *raw_aio_readv(BlockDriverState *bs,
+static BlockAIOCB *raw_aio_readv(BlockDriverState *bs,
                         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-                         BlockDriverCompletionFunc *cb, void *opaque)
+                         BlockCompletionFunc *cb, void *opaque)
 {
    BDRVRawState *s = bs->opaque;
    if (s->aio) {
@@ -383,9 +383,9 @@ static BlockDriverAIOCB *raw_aio_readv(BlockDriverState *bs,
    }
 }

-static BlockDriverAIOCB *raw_aio_writev(BlockDriverState *bs,
+static BlockAIOCB *raw_aio_writev(BlockDriverState *bs,
                          int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-                          BlockDriverCompletionFunc *cb, void *opaque)
+                          BlockCompletionFunc *cb, void *opaque)
 {
    BDRVRawState *s = bs->opaque;
    if (s->aio) {
@@ -397,8 +397,8 @@ static BlockDriverAIOCB *raw_aio_writev(BlockDriverState *bs,
    }
 }

-static BlockDriverAIOCB *raw_aio_flush(BlockDriverState *bs,
-                         BlockDriverCompletionFunc *cb, void *opaque)
+static BlockAIOCB *raw_aio_flush(BlockDriverState *bs,
+                         BlockCompletionFunc *cb, void *opaque)
 {
    BDRVRawState *s = bs->opaque;
    return paio_submit(bs, s->hfile, 0, NULL, 0, cb, opaque, QEMU_AIO_FLUSH);
@@ -540,7 +540,7 @@ static QemuOptsList raw_create_opts = {
    }
 };

-static BlockDriver bdrv_file = {
+BlockDriver bdrv_file = {
    .format_name	= "file",
    .protocol_name	= "file",
    .instance_size	= sizeof(BDRVRawState),
--- a/block/raw_bsd.c
+++ b/block/raw_bsd.c
@@ -58,8 +58,58 @@ static int coroutine_fn raw_co_readv(BlockDriverState *bs, int64_t sector_num,
 static int coroutine_fn raw_co_writev(BlockDriverState *bs, int64_t sector_num,
                                      int nb_sectors, QEMUIOVector *qiov)
 {
+    void *buf = NULL;
+    BlockDriver *drv;
+    QEMUIOVector local_qiov;
+    int ret;
+
+    if (bs->probed && sector_num == 0) {
+        /* As long as these conditions are true, we can't get partial writes to
+         * the probe buffer and can just directly check the request. */
+        QEMU_BUILD_BUG_ON(BLOCK_PROBE_BUF_SIZE != 512);
+        QEMU_BUILD_BUG_ON(BDRV_SECTOR_SIZE != 512);
+
+        if (nb_sectors == 0) {
+            /* qemu_iovec_to_buf() would fail, but we want to return success
+             * instead of -EINVAL in this case. */
+            return 0;
+        }
+
+        buf = qemu_try_blockalign(bs->file, 512);
+        if (!buf) {
+            ret = -ENOMEM;
+            goto fail;
+        }
+
+        ret = qemu_iovec_to_buf(qiov, 0, buf, 512);
+        if (ret != 512) {
+            ret = -EINVAL;
+            goto fail;
+        }
+
+        drv = bdrv_probe_all(buf, 512, NULL);
+        if (drv != bs->drv) {
+            ret = -EPERM;
+            goto fail;
+        }
+
+        /* Use the checked buffer, a malicious guest might be overwriting its
+         * original buffer in the background. */
+        qemu_iovec_init(&local_qiov, qiov->niov + 1);
+        qemu_iovec_add(&local_qiov, buf, 512);
+        qemu_iovec_concat(&local_qiov, qiov, 512, qiov->size - 512);
+        qiov = &local_qiov;
+    }
+
    BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
-    return bdrv_co_writev(bs->file, sector_num, nb_sectors, qiov);
+    ret = bdrv_co_writev(bs->file, sector_num, nb_sectors, qiov);
+
+fail:
+    if (qiov == &local_qiov) {
+        qemu_iovec_destroy(&local_qiov);
+    }
+    qemu_vfree(buf);
+    return ret;
 }

 static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs,
@@ -129,10 +179,10 @@ static int raw_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
    return bdrv_ioctl(bs->file, req, buf);
 }

-static BlockDriverAIOCB *raw_aio_ioctl(BlockDriverState *bs,
-                                       unsigned long int req, void *buf,
-                                       BlockDriverCompletionFunc *cb,
-                                       void *opaque)
+static BlockAIOCB *raw_aio_ioctl(BlockDriverState *bs,
+                                 unsigned long int req, void *buf,
+                                 BlockCompletionFunc *cb,
+                                 void *opaque)
 {
    return bdrv_aio_ioctl(bs->file, req, buf, cb, opaque);
 }
@@ -158,6 +208,18 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,
                    Error **errp)
 {
    bs->sg = bs->file->sg;
+
+    if (bs->probed && !bdrv_is_read_only(bs)) {
+        fprintf(stderr,
+                "WARNING: Image format was not specified for '%s' and probing "
+                "guessed raw.\n"
+                "         Automatically detecting the format is dangerous for "
+                "raw images, write operations on block 0 will be restricted.\n"
+                "         Specify the 'raw' format explicitly to remove the "
+                "restrictions.\n",
+                bs->file->filename);
+    }
+
    return 0;
 }

@@ -173,7 +235,7 @@ static int raw_probe(const uint8_t *buf, int buf_size, const char *filename)
    return 1;
 }

-static BlockDriver bdrv_raw = {
+BlockDriver bdrv_raw = {
    .format_name          = "raw",
    .bdrv_probe           = &raw_probe,
    .bdrv_reopen_prepare  = &raw_reopen_prepare,
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -68,7 +68,7 @@ typedef enum {
 } RBDAIOCmd;

 typedef struct RBDAIOCB {
-    BlockDriverAIOCB common;
+    BlockAIOCB common;
    QEMUBH *bh;
    int64_t ret;
    QEMUIOVector *qiov;
@@ -459,7 +459,7 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
    clientname = qemu_rbd_parse_clientname(conf, clientname_buf);
    r = rados_create(&s->cluster, clientname);
    if (r < 0) {
-        error_setg(&local_err, "error initializing");
+        error_setg(errp, "error initializing");
        goto failed_opts;
    }

@@ -495,19 +495,19 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,

    r = rados_connect(s->cluster);
    if (r < 0) {
-        error_setg(&local_err, "error connecting");
+        error_setg(errp, "error connecting");
        goto failed_shutdown;
    }

    r = rados_ioctx_create(s->cluster, pool, &s->io_ctx);
    if (r < 0) {
-        error_setg(&local_err, "error opening pool %s", pool);
+        error_setg(errp, "error opening pool %s", pool);
        goto failed_shutdown;
    }

    r = rbd_open(s->io_ctx, s->name, &s->image, s->snap);
    if (r < 0) {
-        error_setg(&local_err, "error reading header from %s", s->name);
+        error_setg(errp, "error reading header from %s", s->name);
        goto failed_open;
    }

@@ -589,13 +589,13 @@ static int rbd_aio_flush_wrapper(rbd_image_t image,
 #endif
 }

-static BlockDriverAIOCB *rbd_start_aio(BlockDriverState *bs,
-                                       int64_t sector_num,
-                                       QEMUIOVector *qiov,
-                                       int nb_sectors,
-                                       BlockDriverCompletionFunc *cb,
-                                       void *opaque,
-                                       RBDAIOCmd cmd)
+static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
+                                 int64_t sector_num,
+                                 QEMUIOVector *qiov,
+                                 int nb_sectors,
+                                 BlockCompletionFunc *cb,
+                                 void *opaque,
+                                 RBDAIOCmd cmd)
 {
    RBDAIOCB *acb;
    RADOSCB *rcb = NULL;
@@ -675,32 +675,32 @@ failed:
    return NULL;
 }

-static BlockDriverAIOCB *qemu_rbd_aio_readv(BlockDriverState *bs,
-                                            int64_t sector_num,
-                                            QEMUIOVector *qiov,
-                                            int nb_sectors,
-                                            BlockDriverCompletionFunc *cb,
-                                            void *opaque)
+static BlockAIOCB *qemu_rbd_aio_readv(BlockDriverState *bs,
+                                      int64_t sector_num,
+                                      QEMUIOVector *qiov,
+                                      int nb_sectors,
+                                      BlockCompletionFunc *cb,
+                                      void *opaque)
 {
    return rbd_start_aio(bs, sector_num, qiov, nb_sectors, cb, opaque,
                         RBD_AIO_READ);
 }

-static BlockDriverAIOCB *qemu_rbd_aio_writev(BlockDriverState *bs,
-                                             int64_t sector_num,
-                                             QEMUIOVector *qiov,
-                                             int nb_sectors,
-                                             BlockDriverCompletionFunc *cb,
-                                             void *opaque)
+static BlockAIOCB *qemu_rbd_aio_writev(BlockDriverState *bs,
+                                       int64_t sector_num,
+                                       QEMUIOVector *qiov,
+                                       int nb_sectors,
+                                       BlockCompletionFunc *cb,
+                                       void *opaque)
 {
    return rbd_start_aio(bs, sector_num, qiov, nb_sectors, cb, opaque,
                         RBD_AIO_WRITE);
 }

 #ifdef LIBRBD_SUPPORTS_AIO_FLUSH
-static BlockDriverAIOCB *qemu_rbd_aio_flush(BlockDriverState *bs,
-                                            BlockDriverCompletionFunc *cb,
-                                            void *opaque)
+static BlockAIOCB *qemu_rbd_aio_flush(BlockDriverState *bs,
+                                      BlockCompletionFunc *cb,
+                                      void *opaque)
 {
    return rbd_start_aio(bs, 0, NULL, 0, cb, opaque, RBD_AIO_FLUSH);
 }
@@ -876,17 +876,29 @@ static int qemu_rbd_snap_list(BlockDriverState *bs,
 }

 #ifdef LIBRBD_SUPPORTS_DISCARD
-static BlockDriverAIOCB* qemu_rbd_aio_discard(BlockDriverState *bs,
-                                              int64_t sector_num,
-                                              int nb_sectors,
-                                              BlockDriverCompletionFunc *cb,
-                                              void *opaque)
+static BlockAIOCB* qemu_rbd_aio_discard(BlockDriverState *bs,
+                                        int64_t sector_num,
+                                        int nb_sectors,
+                                        BlockCompletionFunc *cb,
+                                        void *opaque)
 {
    return rbd_start_aio(bs, sector_num, NULL, nb_sectors, cb, opaque,
                         RBD_AIO_DISCARD);
 }
 #endif

+#ifdef LIBRBD_SUPPORTS_INVALIDATE
+static void qemu_rbd_invalidate_cache(BlockDriverState *bs,
+                                      Error **errp)
+{
+    BDRVRBDState *s = bs->opaque;
+    int r = rbd_invalidate_cache(s->image);
+    if (r < 0) {
+        error_setg_errno(errp, -r, "Failed to invalidate the cache");
+    }
+}
+#endif
+
 static QemuOptsList qemu_rbd_create_opts = {
    .name = "rbd-create-opts",
    .head = QTAILQ_HEAD_INITIALIZER(qemu_rbd_create_opts.head),
@@ -936,6 +948,9 @@ static BlockDriver bdrv_rbd = {
    .bdrv_snapshot_delete   = qemu_rbd_snap_remove,
    .bdrv_snapshot_list     = qemu_rbd_snap_list,
    .bdrv_snapshot_goto     = qemu_rbd_snap_rollback,
+#ifdef LIBRBD_SUPPORTS_INVALIDATE
+    .bdrv_invalidate_cache  = qemu_rbd_invalidate_cache,
+#endif
 };

 static void bdrv_rbd_init(void)
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -301,7 +301,7 @@ enum AIOCBState {
 };

 struct SheepdogAIOCB {
-    BlockDriverAIOCB common;
+    BlockAIOCB common;

    QEMUIOVector *qiov;

@@ -473,7 +473,7 @@ static bool sd_acb_cancelable(const SheepdogAIOCB *acb)
    return true;
 }

-static void sd_aio_cancel(BlockDriverAIOCB *blockacb)
+static void sd_aio_cancel(BlockAIOCB *blockacb)
 {
    SheepdogAIOCB *acb = (SheepdogAIOCB *)blockacb;
    BDRVSheepdogState *s = acb->common.bs->opaque;
--- a/block/snapshot.c
+++ b/block/snapshot.c
@@ -236,6 +236,10 @@ int bdrv_snapshot_delete(BlockDriverState *bs,
        error_setg(errp, "snapshot_id and name are both NULL");
        return -EINVAL;
    }
+
+    /* drain all pending i/o before deleting snapshot */
+    bdrv_drain_all();
+
    if (drv->bdrv_snapshot_delete) {
        return drv->bdrv_snapshot_delete(bs, snapshot_id, name, errp);
    }
--- a/block/stream.c
+++ b/block/stream.c
@@ -79,9 +79,39 @@ static void close_unused_images(BlockDriverState *top, BlockDriverState *base,
    bdrv_refresh_limits(top, NULL);
 }

+typedef struct {
+    int ret;
+    bool reached_end;
+} StreamCompleteData;
+
+static void stream_complete(BlockJob *job, void *opaque)
+{
+    StreamBlockJob *s = container_of(job, StreamBlockJob, common);
+    StreamCompleteData *data = opaque;
+    BlockDriverState *base = s->base;
+
+    if (!block_job_is_cancelled(&s->common) && data->reached_end &&
+        data->ret == 0) {
+        const char *base_id = NULL, *base_fmt = NULL;
+        if (base) {
+            base_id = s->backing_file_str;
+            if (base->drv) {
+                base_fmt = base->drv->format_name;
+            }
+        }
+        data->ret = bdrv_change_backing_file(job->bs, base_id, base_fmt);
+        close_unused_images(job->bs, base, base_id);
+    }
+
+    g_free(s->backing_file_str);
+    block_job_completed(&s->common, data->ret);
+    g_free(data);
+}
+
 static void coroutine_fn stream_run(void *opaque)
 {
    StreamBlockJob *s = opaque;
+    StreamCompleteData *data;
    BlockDriverState *bs = s->common.bs;
    BlockDriverState *base = s->base;
    int64_t sector_num, end;
@@ -183,21 +213,13 @@ wait:
    /* Do not remove the backing file if an error was there but ignored.  */
    ret = error;

-    if (!block_job_is_cancelled(&s->common) && sector_num == end && ret == 0) {
-        const char *base_id = NULL, *base_fmt = NULL;
-        if (base) {
-            base_id = s->backing_file_str;
-            if (base->drv) {
-                base_fmt = base->drv->format_name;
-            }
-        }
-        ret = bdrv_change_backing_file(bs, base_id, base_fmt);
-        close_unused_images(bs, base, base_id);
-    }
-
    qemu_vfree(buf);
-    g_free(s->backing_file_str);
-    block_job_completed(&s->common, ret);
+
+    /* Modify backing chain and close BDSes in main loop */
+    data = g_malloc(sizeof(*data));
+    data->ret = ret;
+    data->reached_end = sector_num == end;
+    block_job_defer_to_main_loop(&s->common, stream_complete, data);
 }

 static void stream_set_speed(BlockJob *job, int64_t speed, Error **errp)
@@ -220,7 +242,7 @@ static const BlockJobDriver stream_job_driver = {
 void stream_start(BlockDriverState *bs, BlockDriverState *base,
                  const char *backing_file_str, int64_t speed,
                  BlockdevOnError on_error,
-                  BlockDriverCompletionFunc *cb,
+                  BlockCompletionFunc *cb,
                  void *opaque, Error **errp)
 {
    StreamBlockJob *s;
--- a/block/vdi.c
+++ b/block/vdi.c
@@ -120,8 +120,18 @@ typedef unsigned char uuid_t[16];

 #define VDI_IS_ALLOCATED(X) ((X) < VDI_DISCARDED)

-/* max blocks in image is (0xffffffff / 4) */
-#define VDI_BLOCKS_IN_IMAGE_MAX  0x3fffffff
+/* The bmap will take up VDI_BLOCKS_IN_IMAGE_MAX * sizeof(uint32_t) bytes; since
+ * the bmap is read and written in a single operation, its size needs to be
+ * limited to INT_MAX; furthermore, when opening an image, the bmap size is
+ * rounded up to be aligned on BDRV_SECTOR_SIZE.
+ * Therefore this should satisfy the following:
+ * VDI_BLOCKS_IN_IMAGE_MAX * sizeof(uint32_t) + BDRV_SECTOR_SIZE == INT_MAX + 1
+ * (INT_MAX + 1 is the first value not representable as an int)
+ * This guarantees that any value below or equal to the constant will, when
+ * multiplied by sizeof(uint32_t) and rounded up to a BDRV_SECTOR_SIZE boundary,
+ * still be below or equal to INT_MAX. */
+#define VDI_BLOCKS_IN_IMAGE_MAX \
+    ((unsigned)((INT_MAX + 1u - BDRV_SECTOR_SIZE) / sizeof(uint32_t)))
 #define VDI_DISK_SIZE_MAX        ((uint64_t)VDI_BLOCKS_IN_IMAGE_MAX * \
                                  (uint64_t)DEFAULT_CLUSTER_SIZE)

@@ -137,12 +147,14 @@ static inline int uuid_is_null(const uuid_t uu)
    return memcmp(uu, null_uuid, sizeof(uuid_t)) == 0;
 }

+# if defined(CONFIG_VDI_DEBUG)
 static inline void uuid_unparse(const uuid_t uu, char *out)
 {
    snprintf(out, 37, UUID_FMT,
            uu[0], uu[1], uu[2], uu[3], uu[4], uu[5], uu[6], uu[7],
            uu[8], uu[9], uu[10], uu[11], uu[12], uu[13], uu[14], uu[15]);
 }
+# endif
 #endif

 typedef struct {
@@ -407,8 +419,7 @@ static int vdi_open(BlockDriverState *bs, QDict *options, int flags,
           We accept them but round the disk size to the next multiple of
           SECTOR_SIZE. */
        logout("odd disk size %" PRIu64 " B, round up\n", header.disk_size);
-        header.disk_size += SECTOR_SIZE - 1;
-        header.disk_size &= ~(SECTOR_SIZE - 1);
+        header.disk_size = ROUND_UP(header.disk_size, SECTOR_SIZE);
    }

    if (header.signature != VDI_SIGNATURE) {
@@ -475,7 +486,7 @@ static int vdi_open(BlockDriverState *bs, QDict *options, int flags,
    s->header = header;

    bmap_size = header.blocks_in_image * sizeof(uint32_t);
-    bmap_size = (bmap_size + SECTOR_SIZE - 1) / SECTOR_SIZE;
+    bmap_size = DIV_ROUND_UP(bmap_size, SECTOR_SIZE);
    s->bmap = qemu_try_blockalign(bs->file, bmap_size * SECTOR_SIZE);
    if (s->bmap == NULL) {
        ret = -ENOMEM;
@@ -490,7 +501,7 @@ static int vdi_open(BlockDriverState *bs, QDict *options, int flags,
    /* Disable migration when vdi images are used */
    error_set(&s->migration_blocker,
              QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
-              "vdi", bs->device_name, "live migration");
+              "vdi", bdrv_get_device_name(bs), "live migration");
    migrate_add_blocker(s->migration_blocker);

    return 0;
@@ -736,10 +747,10 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp)

    /* We need enough blocks to store the given disk size,
       so always round up. */
-    blocks = (bytes + block_size - 1) / block_size;
+    blocks = DIV_ROUND_UP(bytes, block_size);

    bmap_size = blocks * sizeof(uint32_t);
-    bmap_size = ((bmap_size + SECTOR_SIZE - 1) & ~(SECTOR_SIZE -1));
+    bmap_size = ROUND_UP(bmap_size, SECTOR_SIZE);

    memset(&header, 0, sizeof(header));
    pstrcpy(header.text, sizeof(header.text), VDI_TEXT);
@@ -841,11 +852,6 @@ static QemuOptsList vdi_create_opts = {
            .def_value_str = "off"
        },
 #endif
-        {
-            .name = BLOCK_OPT_NOCOW,
-            .type = QEMU_OPT_BOOL,
-            .help = "Turn off copy-on-write (valid only on btrfs)"
-        },
        /* TODO: An additional option to set UUID values might be useful. */
        { /* end of list */ }
    }
--- a/block/vhdx.c
+++ b/block/vhdx.c
@@ -1004,7 +1004,7 @@ static int vhdx_open(BlockDriverState *bs, QDict *options, int flags,
    /* Disable migration when VHDX images are used */
    error_set(&s->migration_blocker,
            QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
-            "vhdx", bs->device_name, "live migration");
+            "vhdx", bdrv_get_device_name(bs), "live migration");
    migrate_add_blocker(s->migration_blocker);

    return 0;
@@ -1109,8 +1109,9 @@ static coroutine_fn int vhdx_co_readv(BlockDriverState *bs, int64_t sector_num,
            /* check the payload block state */
            switch (s->bat[sinfo.bat_idx] & VHDX_BAT_STATE_BIT_MASK) {
            case PAYLOAD_BLOCK_NOT_PRESENT: /* fall through */
-            case PAYLOAD_BLOCK_UNDEFINED:   /* fall through */
-            case PAYLOAD_BLOCK_UNMAPPED:    /* fall through */
+            case PAYLOAD_BLOCK_UNDEFINED:
+            case PAYLOAD_BLOCK_UNMAPPED:
+            case PAYLOAD_BLOCK_UNMAPPED_v095:
            case PAYLOAD_BLOCK_ZERO:
                /* return zero */
                qemu_iovec_memset(&hd_qiov, 0, 0, sinfo.bytes_avail);
@@ -1277,11 +1278,11 @@ static coroutine_fn int vhdx_co_writev(BlockDriverState *bs, int64_t sector_num,
                        sectors_to_write += iov2.iov_len >> BDRV_SECTOR_BITS;
                    }
                }
-
                /* fall through */
            case PAYLOAD_BLOCK_NOT_PRESENT: /* fall through */
-            case PAYLOAD_BLOCK_UNMAPPED:    /* fall through */
-            case PAYLOAD_BLOCK_UNDEFINED:   /* fall through */
+            case PAYLOAD_BLOCK_UNMAPPED:
+            case PAYLOAD_BLOCK_UNMAPPED_v095:
+            case PAYLOAD_BLOCK_UNDEFINED:
                bat_prior_offset = sinfo.file_offset;
                ret = vhdx_allocate_block(bs, s, &sinfo.file_offset);
                if (ret < 0) {
@@ -1773,7 +1774,7 @@ static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp)
    log_size = qemu_opt_get_size_del(opts, VHDX_BLOCK_OPT_LOG_SIZE, 0);
    block_size = qemu_opt_get_size_del(opts, VHDX_BLOCK_OPT_BLOCK_SIZE, 0);
    type = qemu_opt_get_del(opts, BLOCK_OPT_SUBFMT);
-    use_zero_blocks = qemu_opt_get_bool_del(opts, VHDX_BLOCK_OPT_ZERO, false);
+    use_zero_blocks = qemu_opt_get_bool_del(opts, VHDX_BLOCK_OPT_ZERO, true);

    if (image_size > VHDX_MAX_IMAGE_SIZE) {
        error_setg_errno(errp, EINVAL, "Image size too large; max of 64TB");
@@ -1935,7 +1936,9 @@ static QemuOptsList vhdx_create_opts = {
       {
           .name = VHDX_BLOCK_OPT_ZERO,
           .type = QEMU_OPT_BOOL,
-           .help = "Force use of payload blocks of type 'ZERO'.  Non-standard."
+           .help = "Force use of payload blocks of type 'ZERO'. "\
+                   "Non-standard, but default.  Do not set to 'off' when "\
+                   "using 'qemu-img convert' with subformat=dynamic."
       },
       { NULL }
    }
@@ -1953,6 +1956,7 @@ static BlockDriver bdrv_vhdx = {
    .bdrv_create            = vhdx_create,
    .bdrv_get_info          = vhdx_get_info,
    .bdrv_check             = vhdx_check,
+    .bdrv_has_zero_init     = bdrv_has_zero_init_1,

    .create_opts            = &vhdx_create_opts,
 };
--- a/block/vhdx.h
+++ b/block/vhdx.h
@@ -226,7 +226,8 @@ typedef struct QEMU_PACKED VHDXLogDataSector {
 #define PAYLOAD_BLOCK_NOT_PRESENT       0
 #define PAYLOAD_BLOCK_UNDEFINED         1
 #define PAYLOAD_BLOCK_ZERO              2
-#define PAYLOAD_BLOCK_UNMAPPED          5
+#define PAYLOAD_BLOCK_UNMAPPED          3
+#define PAYLOAD_BLOCK_UNMAPPED_v095     5
 #define PAYLOAD_BLOCK_FULLY_PRESENT     6
 #define PAYLOAD_BLOCK_PARTIALLY_PRESENT 7

--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -28,6 +28,7 @@
 #include "qemu/module.h"
 #include "migration/migration.h"
 #include <zlib.h>
+#include <glib.h>

 #define VMDK3_MAGIC (('C' << 24) | ('O' << 16) | ('W' << 8) | 'D')
 #define VMDK4_MAGIC (('K' << 24) | ('D' << 16) | ('M' << 8) | 'V')
@@ -556,8 +557,16 @@ static char *vmdk_read_desc(BlockDriverState *file, uint64_t desc_offset,
        return NULL;
    }

-    size = MIN(size, 1 << 20);  /* avoid unbounded allocation */
-    buf = g_malloc0(size + 1);
+    if (size < 4) {
+        /* Both descriptor file and sparse image must be much larger than 4
+         * bytes, also callers of vmdk_read_desc want to compare the first 4
+         * bytes with VMDK4_MAGIC, let's error out if less is read. */
+        error_setg(errp, "File is too small, not a valid image");
+        return NULL;
+    }
+
+    size = MIN(size, (1 << 20) - 1);  /* avoid unbounded allocation */
+    buf = g_malloc(size + 1);

    ret = bdrv_pread(file, desc_offset, buf, size);
    if (ret < 0) {
@@ -565,6 +574,7 @@ static char *vmdk_read_desc(BlockDriverState *file, uint64_t desc_offset,
        g_free(buf);
        return NULL;
    }
+    buf[ret] = 0;

    return buf;
 }
@@ -635,6 +645,7 @@ static int vmdk_open_vmdk4(BlockDriverState *bs,
            bs->file->total_sectors * 512 - 1536,
            &footer, sizeof(footer));
        if (ret < 0) {
+            error_setg_errno(errp, -ret, "Failed to read footer");
            return ret;
        }

@@ -646,6 +657,7 @@ static int vmdk_open_vmdk4(BlockDriverState *bs,
            le32_to_cpu(footer.eos_marker.size) != 0  ||
            le32_to_cpu(footer.eos_marker.type) != MARKER_END_OF_STREAM)
        {
+            error_setg(errp, "Invalid footer");
            return -EINVAL;
        }

@@ -657,7 +669,7 @@ static int vmdk_open_vmdk4(BlockDriverState *bs,
        snprintf(buf, sizeof(buf), "VMDK version %" PRId32,
                 le32_to_cpu(header.version));
        error_set(errp, QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
-                  bs->device_name, "vmdk", buf);
+                  bdrv_get_device_name(bs), "vmdk", buf);
        return -ENOTSUP;
    } else if (le32_to_cpu(header.version) == 3 && (flags & BDRV_O_RDWR)) {
        /* VMware KB 2064959 explains that version 3 added support for
@@ -676,6 +688,7 @@ static int vmdk_open_vmdk4(BlockDriverState *bs,
    l1_entry_sectors = le32_to_cpu(header.num_gtes_per_gt)
                        * le64_to_cpu(header.granularity);
    if (l1_entry_sectors == 0) {
+        error_setg(errp, "L1 entry size is invalid");
        return -EINVAL;
    }
    l1_size = (le64_to_cpu(header.capacity) + l1_entry_sectors - 1)
@@ -784,10 +797,12 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
    VmdkExtent *extent;

    while (*p) {
-        /* parse extent line:
+        /* parse extent line in one of below formats:
+         *
         * RW [size in sectors] FLAT "file-name.vmdk" OFFSET
-         * or
         * RW [size in sectors] SPARSE "file-name.vmdk"
+         * RW [size in sectors] VMFS "file-name.vmdk"
+         * RW [size in sectors] VMFSSPARSE "file-name.vmdk"
         */
        flat_offset = -1;
        ret = sscanf(p, "%10s %" SCNd64 " %10s \"%511[^\n\r\"]\" %" SCNd64,
@@ -818,6 +833,14 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
            goto next_line;
        }

+        if (!path_is_absolute(fname) && !path_has_protocol(fname) &&
+            !desc_file_path[0])
+        {
+            error_setg(errp, "Cannot use relative extent paths with VMDK "
+                       "descriptor file '%s'", bs->file->filename);
+            return -EINVAL;
+        }
+
        path_combine(extent_path, sizeof(extent_path),
                desc_file_path, fname);
        extent_file = NULL;
@@ -894,7 +917,7 @@ static int vmdk_open_desc_file(BlockDriverState *bs, int flags, char *buf,
    }
    s->create_type = g_strdup(ct);
    s->desc_offset = 0;
-    ret = vmdk_parse_extents(buf, bs, bs->file->filename, errp);
+    ret = vmdk_parse_extents(buf, bs, bs->file->exact_filename, errp);
 exit:
    return ret;
 }
@@ -902,7 +925,7 @@ exit:
 static int vmdk_open(BlockDriverState *bs, QDict *options, int flags,
                     Error **errp)
 {
-    char *buf = NULL;
+    char *buf;
    int ret;
    BDRVVmdkState *s = bs->opaque;
    uint32_t magic;
@@ -939,7 +962,7 @@ static int vmdk_open(BlockDriverState *bs, QDict *options, int flags,
    /* Disable migration when VMDK images are used */
    error_set(&s->migration_blocker,
              QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
-              "vmdk", bs->device_name, "live migration");
+              "vmdk", bdrv_get_device_name(bs), "live migration");
    migrate_add_blocker(s->migration_blocker);
    g_free(buf);
    return 0;
@@ -1538,7 +1561,7 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
        /* update CID on the first write every time the virtual disk is
         * opened */
        if (!s->cid_updated) {
-            ret = vmdk_write_cid(bs, time(NULL));
+            ret = vmdk_write_cid(bs, g_random_int());
            if (ret < 0) {
                return ret;
            }
@@ -1868,8 +1891,19 @@ static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp)
    }
    if (backing_file) {
        BlockDriverState *bs = NULL;
-        ret = bdrv_open(&bs, backing_file, NULL, NULL, BDRV_O_NO_BACKING, NULL,
+        char *full_backing = g_new0(char, PATH_MAX);
+        bdrv_get_full_backing_filename_from_filename(filename, backing_file,
+                                                     full_backing, PATH_MAX,
+                                                     &local_err);
+        if (local_err) {
+            g_free(full_backing);
+            error_propagate(errp, local_err);
+            ret = -ENOENT;
+            goto exit;
+        }
+        ret = bdrv_open(&bs, full_backing, NULL, NULL, BDRV_O_NO_BACKING, NULL,
                        errp);
+        g_free(full_backing);
        if (ret != 0) {
            goto exit;
        }
@@ -1922,7 +1956,7 @@ static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp)
    }
    /* generate descriptor file */
    desc = g_strdup_printf(desc_template,
-                           (uint32_t)time(NULL),
+                           g_random_int(),
                           parent_cid,
                           fmt,
                           parent_desc_line,
@@ -2137,23 +2171,29 @@ static ImageInfoSpecific *vmdk_get_specific_info(BlockDriverState *bs)
    return spec_info;
 }

+static bool vmdk_extents_type_eq(const VmdkExtent *a, const VmdkExtent *b)
+{
+    return a->flat == b->flat &&
+           a->compressed == b->compressed &&
+           (a->flat || a->cluster_sectors == b->cluster_sectors);
+}
+
 static int vmdk_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
 {
    int i;
    BDRVVmdkState *s = bs->opaque;
    assert(s->num_extents);
+
+    /* See if we have multiple extents but they have different cases */
+    for (i = 1; i < s->num_extents; i++) {
+        if (!vmdk_extents_type_eq(&s->extents[0], &s->extents[i])) {
+            return -ENOTSUP;
+        }
+    }
    bdi->needs_compressed_writes = s->extents[0].compressed;
    if (!s->extents[0].flat) {
        bdi->cluster_size = s->extents[0].cluster_sectors << BDRV_SECTOR_BITS;
    }
-    /* See if we have multiple extents but they have different cases */
-    for (i = 1; i < s->num_extents; i++) {
-        if (bdi->needs_compressed_writes != s->extents[i].compressed ||
-            (bdi->cluster_size && bdi->cluster_size !=
-                s->extents[i].cluster_sectors << BDRV_SECTOR_BITS)) {
-            return -ENOTSUP;
-        }
-    }
    return 0;
 }

--- a/block/vpc.c
+++ b/block/vpc.c
@@ -320,7 +320,7 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
    /* Disable migration when VHD images are used */
    error_set(&s->migration_blocker,
              QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
-              "vpc", bs->device_name, "live migration");
+              "vpc", bdrv_get_device_name(bs), "live migration");
    migrate_add_blocker(s->migration_blocker);

    return 0;
@@ -893,11 +893,6 @@ static QemuOptsList vpc_create_opts = {
                "Type of virtual hard disk format. Supported formats are "
                "{dynamic (default) | fixed} "
        },
-        {
-            .name = BLOCK_OPT_NOCOW,
-            .type = QEMU_OPT_BOOL,
-            .help = "Turn off copy-on-write (valid only on btrfs)"
-        },
        { /* end of list */ }
    }
 };
--- a/block/vvfat.c
+++ b/block/vvfat.c
@@ -1182,7 +1182,7 @@ static int vvfat_open(BlockDriverState *bs, QDict *options, int flags,
    if (s->qcow) {
        error_set(&s->migration_blocker,
                  QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
-                  "vvfat (rw)", bs->device_name, "live migration");
+                  "vvfat (rw)", bdrv_get_device_name(bs), "live migration");
        migrate_add_blocker(s->migration_blocker);
    }

@@ -2917,6 +2917,12 @@ static int enable_write_target(BDRVVVFATState *s, Error **errp)
    }

    bdrv_qcow = bdrv_find_format("qcow");
+    if (!bdrv_qcow) {
+        error_setg(errp, "Failed to locate qcow driver");
+        ret = -ENOENT;
+        goto err;
+    }
+
    opts = qemu_opts_create(bdrv_qcow->create_opts, NULL, 0, &error_abort);
    qemu_opt_set_number(opts, BLOCK_OPT_SIZE, s->sector_count * 512);
    qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, "fat:");
@@ -2939,7 +2945,7 @@ static int enable_write_target(BDRVVVFATState *s, Error **errp)
    unlink(s->qcow_filename);
 #endif

-    bdrv_set_backing_hd(s->bs, bdrv_new("", &error_abort));
+    bdrv_set_backing_hd(s->bs, bdrv_new());
    s->bs->backing_hd->drv = &vvfat_write_target;
    s->bs->backing_hd->opaque = g_new(void *, 1);
    *(void**)s->bs->backing_hd->opaque = s;
--- a/block/win32-aio.c
+++ b/block/win32-aio.c
@@ -44,7 +44,7 @@ struct QEMUWin32AIOState {
 };

 typedef struct QEMUWin32AIOCB {
-    BlockDriverAIOCB common;
+    BlockAIOCB common;
    struct QEMUWin32AIOState *ctx;
    int nbytes;
    OVERLAPPED ov;
@@ -110,10 +110,10 @@ static const AIOCBInfo win32_aiocb_info = {
    .aiocb_size         = sizeof(QEMUWin32AIOCB),
 };

-BlockDriverAIOCB *win32_aio_submit(BlockDriverState *bs,
+BlockAIOCB *win32_aio_submit(BlockDriverState *bs,
        QEMUWin32AIOState *aio, HANDLE hfile,
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockDriverCompletionFunc *cb, void *opaque, int type)
+        BlockCompletionFunc *cb, void *opaque, int type)
 {
    struct QEMUWin32AIOCB *waiocb;
    uint64_t offset = sector_num * 512;
--- a/blockdev-nbd.c
+++ b/blockdev-nbd.c
@@ -10,6 +10,7 @@
 */

 #include "sysemu/blockdev.h"
+#include "sysemu/block-backend.h"
 #include "hw/block/block.h"
 #include "monitor/monitor.h"
 #include "qapi/qmp/qerror.h"
@@ -73,7 +74,7 @@ static void nbd_close_notifier(Notifier *n, void *data)
 void qmp_nbd_server_add(const char *device, bool has_writable, bool writable,
                        Error **errp)
 {
-    BlockDriverState *bs;
+    BlockBackend *blk;
    NBDExport *exp;
    NBDCloseNotifier *n;

@@ -87,12 +88,12 @@ void qmp_nbd_server_add(const char *device, bool has_writable, bool writable,
        return;
    }

-    bs = bdrv_find(device);
-    if (!bs) {
+    blk = blk_by_name(device);
+    if (!blk) {
        error_set(errp, QERR_DEVICE_NOT_FOUND, device);
        return;
    }
-    if (!bdrv_is_inserted(bs)) {
+    if (!blk_is_inserted(blk)) {
        error_set(errp, QERR_DEVICE_HAS_NO_MEDIUM, device);
        return;
    }
@@ -100,18 +101,18 @@ void qmp_nbd_server_add(const char *device, bool has_writable, bool writable,
    if (!has_writable) {
        writable = false;
    }
-    if (bdrv_is_read_only(bs)) {
+    if (blk_is_read_only(blk)) {
        writable = false;
    }

-    exp = nbd_export_new(bs, 0, -1, writable ? 0 : NBD_FLAG_READ_ONLY, NULL);
+    exp = nbd_export_new(blk, 0, -1, writable ? 0 : NBD_FLAG_READ_ONLY, NULL);

    nbd_export_set_name(exp, device);

    n = g_new0(NBDCloseNotifier, 1);
    n->n.notify = nbd_close_notifier;
    n->exp = exp;
-    bdrv_add_close_notifier(bs, &n->n);
+    blk_add_close_notifier(blk, &n->n);
    QTAILQ_INSERT_TAIL(&close_notifiers, n, next);
 }

--- a/blockdev.c
+++ b/blockdev.c
--- a/blockjob.c
+++ b/blockjob.c
@@ -36,7 +36,7 @@
 #include "qapi-event.h"

 void *block_job_create(const BlockJobDriver *driver, BlockDriverState *bs,
-                       int64_t speed, BlockDriverCompletionFunc *cb,
+                       int64_t speed, BlockCompletionFunc *cb,
                       void *opaque, Error **errp)
 {
    BlockJob *job;
@@ -50,6 +50,7 @@ void *block_job_create(const BlockJobDriver *driver, BlockDriverState *bs,
    error_setg(&job->blocker, "block device is in use by block job: %s",
               BlockJobType_lookup[driver->job_type]);
    bdrv_op_block_all(bs, job->blocker);
+    bdrv_op_unblock(bs, BLOCK_OP_TYPE_DATAPLANE, job->blocker);

    job->driver        = driver;
    job->bs            = bs;
@@ -107,7 +108,8 @@ void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp)
 void block_job_complete(BlockJob *job, Error **errp)
 {
    if (job->paused || job->cancelled || !job->driver->complete) {
-        error_set(errp, QERR_BLOCK_JOB_NOT_READY, job->bs->device_name);
+        error_set(errp, QERR_BLOCK_JOB_NOT_READY,
+                  bdrv_get_device_name(job->bs));
        return;
    }

@@ -152,27 +154,30 @@ void block_job_iostatus_reset(BlockJob *job)
    }
 }

-struct BlockCancelData {
+struct BlockFinishData {
    BlockJob *job;
-    BlockDriverCompletionFunc *cb;
+    BlockCompletionFunc *cb;
    void *opaque;
    bool cancelled;
    int ret;
 };

-static void block_job_cancel_cb(void *opaque, int ret)
+static void block_job_finish_cb(void *opaque, int ret)
 {
-    struct BlockCancelData *data = opaque;
+    struct BlockFinishData *data = opaque;

    data->cancelled = block_job_is_cancelled(data->job);
    data->ret = ret;
    data->cb(data->opaque, ret);
 }

-int block_job_cancel_sync(BlockJob *job)
+static int block_job_finish_sync(BlockJob *job,
+                                 void (*finish)(BlockJob *, Error **errp),
+                                 Error **errp)
 {
-    struct BlockCancelData data;
+    struct BlockFinishData data;
    BlockDriverState *bs = job->bs;
+    Error *local_err = NULL;

    assert(bs->job == job);

@@ -183,15 +188,37 @@ int block_job_cancel_sync(BlockJob *job)
    data.cb = job->cb;
    data.opaque = job->opaque;
    data.ret = -EINPROGRESS;
-    job->cb = block_job_cancel_cb;
+    job->cb = block_job_finish_cb;
    job->opaque = &data;
-    block_job_cancel(job);
+    finish(job, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return -EBUSY;
+    }
    while (data.ret == -EINPROGRESS) {
        aio_poll(bdrv_get_aio_context(bs), true);
    }
    return (data.cancelled && data.ret == 0) ? -ECANCELED : data.ret;
 }

+/* A wrapper around block_job_cancel() taking an Error ** parameter so it may be
+ * used with block_job_finish_sync() without the need for (rather nasty)
+ * function pointer casts there. */
+static void block_job_cancel_err(BlockJob *job, Error **errp)
+{
+    block_job_cancel(job);
+}
+
+int block_job_cancel_sync(BlockJob *job)
+{
+    return block_job_finish_sync(job, &block_job_cancel_err, NULL);
+}
+
+int block_job_complete_sync(BlockJob *job, Error **errp)
+{
+    return block_job_finish_sync(job, &block_job_complete, errp);
+}
+
 void block_job_sleep_ns(BlockJob *job, QEMUClockType type, int64_t ns)
 {
    assert(job->busy);
@@ -235,6 +262,7 @@ BlockJobInfo *block_job_query(BlockJob *job)
    info->offset    = job->offset;
    info->speed     = job->speed;
    info->io_status = job->iostatus;
+    info->ready     = job->ready;
    return info;
 }

@@ -270,6 +298,8 @@ void block_job_event_completed(BlockJob *job, const char *msg)

 void block_job_event_ready(BlockJob *job)
 {
+    job->ready = true;
+
    qapi_event_send_block_job_ready(job->driver->job_type,
                                    bdrv_get_device_name(job->bs),
                                    job->len,
@@ -313,3 +343,48 @@ BlockErrorAction block_job_error_action(BlockJob *job, BlockDriverState *bs,
    }
    return action;
 }
+
+typedef struct {
+    BlockJob *job;
+    QEMUBH *bh;
+    AioContext *aio_context;
+    BlockJobDeferToMainLoopFn *fn;
+    void *opaque;
+} BlockJobDeferToMainLoopData;
+
+static void block_job_defer_to_main_loop_bh(void *opaque)
+{
+    BlockJobDeferToMainLoopData *data = opaque;
+    AioContext *aio_context;
+
+    qemu_bh_delete(data->bh);
+
+    /* Prevent race with block_job_defer_to_main_loop() */
+    aio_context_acquire(data->aio_context);
+
+    /* Fetch BDS AioContext again, in case it has changed */
+    aio_context = bdrv_get_aio_context(data->job->bs);
+    aio_context_acquire(aio_context);
+
+    data->fn(data->job, data->opaque);
+
+    aio_context_release(aio_context);
+
+    aio_context_release(data->aio_context);
+
+    g_free(data);
+}
+
+void block_job_defer_to_main_loop(BlockJob *job,
+                                  BlockJobDeferToMainLoopFn *fn,
+                                  void *opaque)
+{
+    BlockJobDeferToMainLoopData *data = g_malloc(sizeof(*data));
+    data->job = job;
+    data->bh = qemu_bh_new(block_job_defer_to_main_loop_bh, data);
+    data->aio_context = bdrv_get_aio_context(job->bs);
+    data->fn = fn;
+    data->opaque = opaque;
+
+    qemu_bh_schedule(data->bh);
+}
--- a/bootdevice.c
+++ b/bootdevice.c
@@ -25,6 +25,7 @@
 #include "sysemu/sysemu.h"
 #include "qapi/visitor.h"
 #include "qemu/error-report.h"
+#include "hw/hw.h"

 typedef struct FWBootEntry FWBootEntry;

@@ -37,6 +38,78 @@ struct FWBootEntry {

 static QTAILQ_HEAD(, FWBootEntry) fw_boot_order =
    QTAILQ_HEAD_INITIALIZER(fw_boot_order);
+static QEMUBootSetHandler *boot_set_handler;
+static void *boot_set_opaque;
+
+void qemu_register_boot_set(QEMUBootSetHandler *func, void *opaque)
+{
+    boot_set_handler = func;
+    boot_set_opaque = opaque;
+}
+
+void qemu_boot_set(const char *boot_order, Error **errp)
+{
+    Error *local_err = NULL;
+
+    if (!boot_set_handler) {
+        error_setg(errp, "no function defined to set boot device list for"
+                         " this architecture");
+        return;
+    }
+
+    validate_bootdevices(boot_order, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+
+    boot_set_handler(boot_set_opaque, boot_order, errp);
+}
+
+void validate_bootdevices(const char *devices, Error **errp)
+{
+    /* We just do some generic consistency checks */
+    const char *p;
+    int bitmap = 0;
+
+    for (p = devices; *p != '\0'; p++) {
+        /* Allowed boot devices are:
+         * a-b: floppy disk drives
+         * c-f: IDE disk drives
+         * g-m: machine implementation dependent drives
+         * n-p: network devices
+         * It's up to each machine implementation to check if the given boot
+         * devices match the actual hardware implementation and firmware
+         * features.
+         */
+        if (*p < 'a' || *p > 'p') {
+            error_setg(errp, "Invalid boot device '%c'", *p);
+            return;
+        }
+        if (bitmap & (1 << (*p - 'a'))) {
+            error_setg(errp, "Boot device '%c' was given twice", *p);
+            return;
+        }
+        bitmap |= 1 << (*p - 'a');
+    }
+}
+
+void restore_boot_order(void *opaque)
+{
+    char *normal_boot_order = opaque;
+    static int first = 1;
+
+    /* Restore boot order and remove ourselves after the first boot */
+    if (first) {
+        first = 0;
+        return;
+    }
+
+    qemu_boot_set(normal_boot_order, NULL);
+
+    qemu_unregister_reset(restore_boot_order, normal_boot_order);
+    g_free(normal_boot_order);
+}

 void check_boot_index(int32_t bootindex, Error **errp)
 {
--- a/bsd-user/elfload.c
+++ b/bsd-user/elfload.c
@@ -351,8 +351,10 @@ static inline void init_thread(struct target_pt_regs *_regs, struct image_info *

    _regs->gpr[1] = infop->start_stack;
 #if defined(TARGET_PPC64) && !defined(TARGET_ABI32)
-    entry = ldq_raw(infop->entry) + infop->load_addr;
-    toc = ldq_raw(infop->entry + 8) + infop->load_addr;
+    get_user_u64(entry, infop->entry);
+    entry += infop->load_addr;
+    get_user_u64(toc, infop->entry + 8);
+    toc += infop->load_addr;
    _regs->gpr[2] = toc;
    infop->entry = entry;
 #endif
@@ -365,8 +367,9 @@ static inline void init_thread(struct target_pt_regs *_regs, struct image_info *
    get_user_ual(_regs->gpr[3], pos);
    pos += sizeof(abi_ulong);
    _regs->gpr[4] = pos;
-    for (tmp = 1; tmp != 0; pos += sizeof(abi_ulong))
-        tmp = ldl(pos);
+    for (tmp = 1; tmp != 0; pos += sizeof(abi_ulong)) {
+        get_user_ual(tmp, pos);
+    }
    _regs->gpr[5] = pos;
 }

--- a/43
+++ b/43
@@ -1823,13 +1823,14 @@ fi
 # libseccomp check

 if test "$seccomp" != "no" ; then
-    if $pkg_config --atleast-version=2.1.0 libseccomp; then
+    if test "$cpu" = "i386" || test "$cpu" = "x86_64" &&
+        $pkg_config --atleast-version=2.1.1 libseccomp; then
        libs_softmmu="$libs_softmmu `$pkg_config --libs libseccomp`"
        QEMU_CFLAGS="$QEMU_CFLAGS `$pkg_config --cflags libseccomp`"
 	seccomp="yes"
    else
 	if test "$seccomp" = "yes"; then
-            feature_not_found "libseccomp" "Install libseccomp devel >= 2.1.0"
+            feature_not_found "libseccomp" "Install libseccomp devel >= 2.1.1"
 	fi
 	seccomp="no"
    fi
@@ -1868,6 +1869,32 @@ EOF
 #if !defined(HVM_MAX_VCPUS)
 # error HVM_MAX_VCPUS not defined
 #endif
+int main(void) {
+  xc_interface *xc;
+  xs_daemon_open();
+  xc = xc_interface_open(0, 0, 0);
+  xc_hvm_set_mem_type(0, 0, HVMMEM_ram_ro, 0, 0);
+  xc_gnttab_open(NULL, 0);
+  xc_domain_add_to_physmap(0, 0, XENMAPSPACE_gmfn, 0, 0);
+  xc_hvm_inject_msi(xc, 0, 0xf0000000, 0x00000000);
+  xc_hvm_create_ioreq_server(xc, 0, 0, NULL);
+  return 0;
+}
+EOF
+      compile_prog "" "$xen_libs"
+    then
+    xen_ctrl_version=450
+    xen=yes
+
+  elif
+      cat > $TMPC <<EOF &&
+#include <xenctrl.h>
+#include <xenstore.h>
+#include <stdint.h>
+#include <xen/hvm/hvm_info_table.h>
+#if !defined(HVM_MAX_VCPUS)
+# error HVM_MAX_VCPUS not defined
+#endif
 int main(void) {
  xc_interface *xc;
  xs_daemon_open();
@@ -2726,7 +2753,7 @@ fi
 if test "$modules" = yes; then
    shacmd_probe="sha1sum sha1 shasum"
    for c in $shacmd_probe; do
-        if which $c >/dev/null 2>&1; then
+        if has $c; then
            shacmd="$c"
            break
        fi
@@ -4211,9 +4238,9 @@ EOF
  fi
 fi

-# add pixman flags after all config tests are done
-QEMU_CFLAGS="$QEMU_CFLAGS $pixman_cflags $fdt_cflags"
-libs_softmmu="$libs_softmmu $pixman_libs"
+# prepend pixman and ftd flags after all config tests are done
+QEMU_CFLAGS="$pixman_cflags $fdt_cflags $QEMU_CFLAGS"
+libs_softmmu="$pixman_libs $libs_softmmu"

 echo "Install prefix    $prefix"
 echo "BIOS directory    `eval echo $qemu_datadir`"
@@ -4282,6 +4309,9 @@ if test -n "$sparc_cpu"; then
    echo "Target Sparc Arch $sparc_cpu"
 fi
 echo "xen support       $xen"
+if test "$xen" = "yes" ; then
+  echo "xen ctrl version  $xen_ctrl_version"
+fi
 echo "brlapi support    $brlapi"
 echo "bluez  support    $bluez"
 echo "Documentation     $docs"
@@ -4908,6 +4938,7 @@ echo "QEMU_CFLAGS=$QEMU_CFLAGS" >> $config_host_mak
 echo "QEMU_INCLUDES=$QEMU_INCLUDES" >> $config_host_mak
 if test "$sparse" = "yes" ; then
  echo "CC           := REAL_CC=\"\$(CC)\" cgcc"       >> $config_host_mak
+  echo "CXX          := REAL_CC=\"\$(CXX)\" cgcc"      >> $config_host_mak
  echo "HOST_CC      := REAL_CC=\"\$(HOST_CC)\" cgcc"  >> $config_host_mak
  echo "QEMU_CFLAGS  += -Wbitwise -Wno-transparent-union -Wno-old-initializer -Wno-non-pointer-null" >> $config_host_mak
 fi
--- a/coroutine-sigaltstack.c
+++ b/coroutine-sigaltstack.c
@@ -155,7 +155,7 @@ Coroutine *qemu_coroutine_new(void)
    stack_t oss;
    sigset_t sigs;
    sigset_t osigs;
-    jmp_buf old_env;
+    sigjmp_buf old_env;

    /* The way to manipulate stack is with the sigaltstack function. We
     * prepare a stack, with it delivering a signal to ourselves and then
--- a/coroutine-ucontext.c
+++ b/coroutine-ucontext.c
@@ -25,7 +25,6 @@
 #include <stdlib.h>
 #include <setjmp.h>
 #include <stdint.h>
-#include <pthread.h>
 #include <ucontext.h>
 #include "qemu-common.h"
 #include "block/coroutine_int.h"
@@ -48,15 +47,8 @@ typedef struct {
 /**
 * Per-thread coroutine bookkeeping
 */
-typedef struct {
-    /** Currently executing coroutine */
-    Coroutine *current;
-
-    /** The default coroutine */
-    CoroutineUContext leader;
-} CoroutineThreadState;
-
-static pthread_key_t thread_state_key;
+static __thread CoroutineUContext leader;
+static __thread Coroutine *current;

 /*
 * va_args to makecontext() must be type 'int', so passing
@@ -68,36 +60,6 @@ union cc_arg {
    int i[2];
 };

-static CoroutineThreadState *coroutine_get_thread_state(void)
-{
-    CoroutineThreadState *s = pthread_getspecific(thread_state_key);
-
-    if (!s) {
-        s = g_malloc0(sizeof(*s));
-        s->current = &s->leader.base;
-        pthread_setspecific(thread_state_key, s);
-    }
-    return s;
-}
-
-static void qemu_coroutine_thread_cleanup(void *opaque)
-{
-    CoroutineThreadState *s = opaque;
-
-    g_free(s);
-}
-
-static void __attribute__((constructor)) coroutine_init(void)
-{
-    int ret;
-
-    ret = pthread_key_create(&thread_state_key, qemu_coroutine_thread_cleanup);
-    if (ret != 0) {
-        fprintf(stderr, "unable to create leader key: %s\n", strerror(errno));
-        abort();
-    }
-}
-
 static void coroutine_trampoline(int i0, int i1)
 {
    union cc_arg arg;
@@ -193,15 +155,23 @@ void qemu_coroutine_delete(Coroutine *co_)
    g_free(co);
 }

-CoroutineAction qemu_coroutine_switch(Coroutine *from_, Coroutine *to_,
-                                      CoroutineAction action)
+/* This function is marked noinline to prevent GCC from inlining it
+ * into coroutine_trampoline(). If we allow it to do that then it
+ * hoists the code to get the address of the TLS variable "current"
+ * out of the while() loop. This is an invalid transformation because
+ * the sigsetjmp() call may be called when running thread A but
+ * return in thread B, and so we might be in a different thread
+ * context each time round the loop.
+ */
+CoroutineAction __attribute__((noinline))
+qemu_coroutine_switch(Coroutine *from_, Coroutine *to_,
+                      CoroutineAction action)
 {
    CoroutineUContext *from = DO_UPCAST(CoroutineUContext, base, from_);
    CoroutineUContext *to = DO_UPCAST(CoroutineUContext, base, to_);
-    CoroutineThreadState *s = coroutine_get_thread_state();
    int ret;

-    s->current = to_;
+    current = to_;

    ret = sigsetjmp(from->env, 0);
    if (ret == 0) {
@@ -212,14 +182,13 @@ CoroutineAction qemu_coroutine_switch(Coroutine *from_, Coroutine *to_,

 Coroutine *qemu_coroutine_self(void)
 {
-    CoroutineThreadState *s = coroutine_get_thread_state();
-
-    return s->current;
+    if (!current) {
+        current = &leader.base;
+    }
+    return current;
 }

 bool qemu_in_coroutine(void)
 {
-    CoroutineThreadState *s = pthread_getspecific(thread_state_key);
-
-    return s && s->current->caller;
+    return current && current->caller;
 }
--- a/cpu-exec.c
+++ b/cpu-exec.c
@@ -168,7 +168,9 @@ static inline tcg_target_ulong cpu_tb_exec(CPUState *cpu, uint8_t *tb_ptr)
    }
 #endif /* DEBUG_DISAS */

+    cpu->can_do_io = 0;
    next_tb = tcg_qemu_tb_exec(env, tb_ptr);
+    cpu->can_do_io = 1;
    trace_exec_tb_exit((void *) (next_tb & ~TB_EXIT_MASK),
                       next_tb & TB_EXIT_MASK);

@@ -202,14 +204,19 @@ static void cpu_exec_nocache(CPUArchState *env, int max_cycles,
 {
    CPUState *cpu = ENV_GET_CPU(env);
    TranslationBlock *tb;
+    target_ulong pc = orig_tb->pc;
+    target_ulong cs_base = orig_tb->cs_base;
+    uint64_t flags = orig_tb->flags;

    /* Should never happen.
       We only end up here when an existing TB is too long.  */
    if (max_cycles > CF_COUNT_MASK)
        max_cycles = CF_COUNT_MASK;

-    tb = tb_gen_code(cpu, orig_tb->pc, orig_tb->cs_base, orig_tb->flags,
-                     max_cycles);
+    /* tb_gen_code can flush our orig_tb, invalidate it now */
+    tb_phys_invalidate(orig_tb, -1);
+    tb = tb_gen_code(cpu, pc, cs_base, flags,
+                     max_cycles | CF_NOCACHE);
    cpu->current_tb = tb;
    /* execute the generated code */
    trace_exec_tb_nocache(tb, tb->pc);
@@ -353,7 +360,6 @@ int cpu_exec(CPUArchState *env)
    }

    cc->cpu_exec_enter(cpu);
-    cpu->exception_index = -1;

    /* Calculate difference between guest clock and host clock.
     * This delay includes the delay of the last cycle, so
@@ -373,6 +379,7 @@ int cpu_exec(CPUArchState *env)
                    if (ret == EXCP_DEBUG) {
                        cpu_handle_debug_exception(env);
                    }
+                    cpu->exception_index = -1;
                    break;
                } else {
 #if defined(CONFIG_USER_ONLY)
@@ -383,6 +390,7 @@ int cpu_exec(CPUArchState *env)
                    cc->do_interrupt(cpu);
 #endif
                    ret = cpu->exception_index;
+                    cpu->exception_index = -1;
                    break;
 #else
                    cc->do_interrupt(cpu);
@@ -537,6 +545,7 @@ int cpu_exec(CPUArchState *env)
            cpu = current_cpu;
            env = cpu->env_ptr;
            cc = CPU_GET_CLASS(cpu);
+            cpu->can_do_io = 1;
 #ifdef TARGET_I386
            x86_cpu = X86_CPU(cpu);
 #endif
--- a/cpus.c
+++ b/cpus.c
@@ -136,8 +136,7 @@ typedef struct TimersState {

 static TimersState timers_state;

-/* Return the virtual CPU time, based on the instruction counter.  */
-static int64_t cpu_get_icount_locked(void)
+int64_t cpu_get_icount_raw(void)
 {
    int64_t icount;
    CPUState *cpu = current_cpu;
@@ -145,10 +144,18 @@ static int64_t cpu_get_icount_locked(void)
    icount = timers_state.qemu_icount;
    if (cpu) {
        if (!cpu_can_do_io(cpu)) {
-            fprintf(stderr, "Bad clock read\n");
+            fprintf(stderr, "Bad icount read\n");
+            exit(1);
        }
        icount -= (cpu->icount_decr.u16.low + cpu->icount_extra);
    }
+    return icount;
+}
+
+/* Return the virtual CPU time, based on the instruction counter.  */
+static int64_t cpu_get_icount_locked(void)
+{
+    int64_t icount = cpu_get_icount_raw();
    return timers_state.qemu_icount_bias + cpu_icount_to_ns(icount);
 }

@@ -317,7 +324,7 @@ static void icount_adjust(void)
 static void icount_adjust_rt(void *opaque)
 {
    timer_mod(icount_rt_timer,
-                   qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + 1000);
+              qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
    icount_adjust();
 }

@@ -345,7 +352,7 @@ static void icount_warp_rt(void *opaque)

    seqlock_write_lock(&timers_state.vm_clock_seqlock);
    if (runstate_is_running()) {
-        int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
+        int64_t clock = cpu_get_clock_locked();
        int64_t warp_delta;

        warp_delta = clock - vm_clock_warp_start;
@@ -354,9 +361,8 @@ static void icount_warp_rt(void *opaque)
             * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
             * far ahead of real time.
             */
-            int64_t cur_time = cpu_get_clock_locked();
            int64_t cur_icount = cpu_get_icount_locked();
-            int64_t delta = cur_time - cur_icount;
+            int64_t delta = clock - cur_icount;
            warp_delta = MIN(warp_delta, delta);
        }
        timers_state.qemu_icount_bias += warp_delta;
@@ -419,7 +425,7 @@ void qemu_clock_warp(QEMUClockType type)
    }

    /* We want to use the earliest deadline from ALL vm_clocks */
-    clock = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
+    clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
    deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
    if (deadline < 0) {
        return;
@@ -437,8 +443,8 @@ void qemu_clock_warp(QEMUClockType type)
         * sleep in icount mode if there is a pending QEMU_CLOCK_VIRTUAL
         * timer; rather time could just advance to the next QEMU_CLOCK_VIRTUAL
         * event.  Instead, we do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL
-         * after some e"real" time, (related to the time left until the next
-         * event) has passed. The QEMU_CLOCK_REALTIME timer will do this.
+         * after some "real" time, (related to the time left until the next
+         * event) has passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
         * This avoids that the warps are visible externally; for example,
         * you will not be sending network packets continuously instead of
         * every 100ms.
@@ -512,8 +518,8 @@ void configure_icount(QemuOpts *opts, Error **errp)
        return;
    }
    icount_align_option = qemu_opt_get_bool(opts, "align", false);
-    icount_warp_timer = timer_new_ns(QEMU_CLOCK_REALTIME,
-                                          icount_warp_rt, NULL);
+    icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
+                                     icount_warp_rt, NULL);
    if (strcmp(option, "auto") != 0) {
        errno = 0;
        icount_time_shift = strtol(option, &rem_str, 0);
@@ -537,10 +543,10 @@ void configure_icount(QemuOpts *opts, Error **errp)
       the virtual time trigger catches emulated time passing too fast.
       Realtime triggers occur even when idle, so use them less frequently
       than VM triggers.  */
-    icount_rt_timer = timer_new_ms(QEMU_CLOCK_REALTIME,
-                                        icount_adjust_rt, NULL);
+    icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
+                                   icount_adjust_rt, NULL);
    timer_mod(icount_rt_timer,
-                   qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + 1000);
+                   qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
    icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
                                        icount_adjust_vm, NULL);
    timer_mod(icount_vm_timer,
@@ -934,6 +940,7 @@ static void *qemu_kvm_cpu_thread_fn(void *arg)
    qemu_mutex_lock(&qemu_global_mutex);
    qemu_thread_get_self(cpu->thread);
    cpu->thread_id = qemu_get_thread_id();
+    cpu->can_do_io = 1;
    current_cpu = cpu;

    r = kvm_init_vcpu(cpu);
@@ -974,6 +981,7 @@ static void *qemu_dummy_cpu_thread_fn(void *arg)
    qemu_mutex_lock_iothread();
    qemu_thread_get_self(cpu->thread);
    cpu->thread_id = qemu_get_thread_id();
+    cpu->can_do_io = 1;

    sigemptyset(&waitset);
    sigaddset(&waitset, SIG_IPI);
@@ -1016,6 +1024,7 @@ static void *qemu_tcg_cpu_thread_fn(void *arg)
    CPU_FOREACH(cpu) {
        cpu->thread_id = qemu_get_thread_id();
        cpu->created = true;
+        cpu->can_do_io = 1;
    }
    qemu_cond_signal(&qemu_cpu_cond);

--- a/cputlb.c
+++ b/cputlb.c
@@ -270,7 +270,8 @@ void tlb_set_page(CPUState *cpu, target_ulong vaddr,
    assert(sz >= TARGET_PAGE_SIZE);

 #if defined(DEBUG_TLB)
-    printf("tlb_set_page: vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx
+    qemu_log_mask(CPU_LOG_MMU,
+           "tlb_set_page: vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx
           " prot=%x idx=%d\n",
           vaddr, paddr, prot, mmu_idx);
 #endif
--- a/default-configs/mips-softmmu.mak
+++ b/default-configs/mips-softmmu.mak
@@ -32,6 +32,5 @@ CONFIG_G364FB=y
 CONFIG_I8259=y
 CONFIG_JAZZ_LED=y
 CONFIG_MC146818RTC=y
-CONFIG_VT82C686=y
 CONFIG_ISA_TESTDEV=y
 CONFIG_EMPTY_SLOT=y
--- a/default-configs/mips64-softmmu.mak
+++ b/default-configs/mips64-softmmu.mak
@@ -32,6 +32,5 @@ CONFIG_G364FB=y
 CONFIG_I8259=y
 CONFIG_JAZZ_LED=y
 CONFIG_MC146818RTC=y
-CONFIG_VT82C686=y
 CONFIG_ISA_TESTDEV=y
 CONFIG_EMPTY_SLOT=y
--- a/default-configs/mipsel-softmmu.mak
+++ b/default-configs/mipsel-softmmu.mak
@@ -32,6 +32,5 @@ CONFIG_G364FB=y
 CONFIG_I8259=y
 CONFIG_JAZZ_LED=y
 CONFIG_MC146818RTC=y
-CONFIG_VT82C686=y
 CONFIG_ISA_TESTDEV=y
 CONFIG_EMPTY_SLOT=y
--- a/default-configs/pci.mak
+++ b/default-configs/pci.mak
@@ -30,3 +30,5 @@ CONFIG_IPACK=y
 CONFIG_WDT_IB6300ESB=y
 CONFIG_PCI_TESTDEV=y
 CONFIG_NVME_PCI=y
+CONFIG_SD=y
+CONFIG_SDHCI=y
--- a/default-configs/s390x-softmmu.mak
+++ b/default-configs/s390x-softmmu.mak
@@ -1,3 +1,4 @@
+include pci.mak
 CONFIG_VIRTIO=y
 CONFIG_SCLPCONSOLE=y
 CONFIG_S390_FLIC=y
--- a/device-hotplug.c
+++ b/device-hotplug.c
@@ -24,6 +24,7 @@

 #include "hw/hw.h"
 #include "hw/boards.h"
+#include "sysemu/block-backend.h"
 #include "sysemu/blockdev.h"
 #include "qemu/config-file.h"
 #include "sysemu/sysemu.h"
@@ -76,6 +77,6 @@ void drive_hot_add(Monitor *mon, const QDict *qdict)

 err:
    if (dinfo) {
-        drive_del(dinfo);
+        blk_unref(blk_by_legacy_dinfo(dinfo));
    }
 }
--- a/device_tree.c
+++ b/device_tree.c
@@ -324,6 +324,7 @@ int qemu_fdt_setprop_sized_cells_from_array(void *fdt,
    uint64_t value;
    int cellnum, vnum, ncells;
    uint32_t hival;
+    int ret;

    propcells = g_new0(uint32_t, numvalues * 2);

@@ -331,18 +332,23 @@ int qemu_fdt_setprop_sized_cells_from_array(void *fdt,
    for (vnum = 0; vnum < numvalues; vnum++) {
        ncells = values[vnum * 2];
        if (ncells != 1 && ncells != 2) {
-            return -1;
+            ret = -1;
+            goto out;
        }
        value = values[vnum * 2 + 1];
        hival = cpu_to_be32(value >> 32);
        if (ncells > 1) {
            propcells[cellnum++] = hival;
        } else if (hival != 0) {
-            return -1;
+            ret = -1;
+            goto out;
        }
        propcells[cellnum++] = cpu_to_be32(value);
    }

-    return qemu_fdt_setprop(fdt, node_path, property, propcells,
-                            cellnum * sizeof(uint32_t));
+    ret = qemu_fdt_setprop(fdt, node_path, property, propcells,
+                           cellnum * sizeof(uint32_t));
+out:
+    g_free(propcells);
+    return ret;
 }
--- a/disas/arm-a64.cc
+++ b/disas/arm-a64.cc
@@ -39,7 +39,7 @@ public:
    ~QEMUDisassembler() { }

 protected:
-    void ProcessOutput(Instruction *instr) {
+    virtual void ProcessOutput(const Instruction *instr) {
        fprintf(stream_, "%08" PRIx32 "      %s",
                instr->InstructionBits(), GetOutput());
    }
--- a/disas/libvixl/README
+++ b/disas/libvixl/README
@@ -2,7 +2,7 @@
 The code in this directory is a subset of libvixl:
 https://github.com/armvixl/vixl
 (specifically, it is the set of files needed for disassembly only,
-taken from libvixl 1.5).
+taken from libvixl 1.6).
 Bugfixes should preferably be sent upstream initially.

 The disassembler does not currently support the entire A64 instruction
--- a/disas/libvixl/a64/assembler-a64.h
+++ b/disas/libvixl/a64/assembler-a64.h
@@ -32,6 +32,7 @@

 #include "globals.h"
 #include "utils.h"
+#include "code-buffer.h"
 #include "a64/instructions-a64.h"

 namespace vixl {
@@ -168,6 +169,11 @@ class CPURegister {
    return type_ == kFPRegister;
  }

+  bool IsW() const { return IsValidRegister() && Is32Bits(); }
+  bool IsX() const { return IsValidRegister() && Is64Bits(); }
+  bool IsS() const { return IsValidFPRegister() && Is32Bits(); }
+  bool IsD() const { return IsValidFPRegister() && Is64Bits(); }
+
  const Register& W() const;
  const Register& X() const;
  const FPRegister& S() const;
@@ -191,12 +197,12 @@ class CPURegister {

 class Register : public CPURegister {
 public:
-  explicit Register() : CPURegister() {}
+  Register() : CPURegister() {}
  inline explicit Register(const CPURegister& other)
      : CPURegister(other.code(), other.size(), other.type()) {
    VIXL_ASSERT(IsValidRegister());
  }
-  explicit Register(unsigned code, unsigned size)
+  Register(unsigned code, unsigned size)
      : CPURegister(code, size, kRegister) {}

  bool IsValid() const {
@@ -536,7 +542,7 @@ class Operand {
 class MemOperand {
 public:
  explicit MemOperand(Register base,
-                      ptrdiff_t offset = 0,
+                      int64_t offset = 0,
                      AddrMode addrmode = Offset);
  explicit MemOperand(Register base,
                      Register regoffset,
@@ -552,7 +558,7 @@ class MemOperand {

  const Register& base() const { return base_; }
  const Register& regoffset() const { return regoffset_; }
-  ptrdiff_t offset() const { return offset_; }
+  int64_t offset() const { return offset_; }
  AddrMode addrmode() const { return addrmode_; }
  Shift shift() const { return shift_; }
  Extend extend() const { return extend_; }
@@ -565,7 +571,7 @@ class MemOperand {
 private:
  Register base_;
  Register regoffset_;
-  ptrdiff_t offset_;
+  int64_t offset_;
  AddrMode addrmode_;
  Shift shift_;
  Extend extend_;
@@ -680,32 +686,80 @@ class Label {
 };


-// TODO: Obtain better values for these, based on real-world data.
-const int kLiteralPoolCheckInterval = 4 * KBytes;
-const int kRecommendedLiteralPoolRange = 2 * kLiteralPoolCheckInterval;
+// A literal is a 32-bit or 64-bit piece of data stored in the instruction
+// stream and loaded through a pc relative load. The same literal can be
+// referred to by multiple instructions but a literal can only reside at one
+// place in memory. A literal can be used by a load before or after being
+// placed in memory.
+//
+// Internally an offset of 0 is associated with a literal which has been
+// neither used nor placed. Then two possibilities arise:
+//  1) the label is placed, the offset (stored as offset + 1) is used to
+//     resolve any subsequent load using the label.
+//  2) the label is not placed and offset is the offset of the last load using
+//     the literal (stored as -offset -1). If multiple loads refer to this
+//     literal then the last load holds the offset of the preceding load and
+//     all loads form a chain. Once the offset is placed all the loads in the
+//     chain are resolved and future loads fall back to possibility 1.
+class RawLiteral {
+ public:
+  RawLiteral() : size_(0), offset_(0), raw_value_(0) {}

+  size_t size() {
+    VIXL_STATIC_ASSERT(kDRegSizeInBytes == kXRegSizeInBytes);
+    VIXL_STATIC_ASSERT(kSRegSizeInBytes == kWRegSizeInBytes);
+    VIXL_ASSERT((size_ == kXRegSizeInBytes) || (size_ == kWRegSizeInBytes));
+    return size_;
+  }
+  uint64_t raw_value64() {
+    VIXL_ASSERT(size_ == kXRegSizeInBytes);
+    return raw_value_;
+  }
+  uint32_t raw_value32() {
+    VIXL_ASSERT(size_ == kWRegSizeInBytes);
+    VIXL_ASSERT(is_uint32(raw_value_) || is_int32(raw_value_));
+    return static_cast<uint32_t>(raw_value_);
+  }
+  bool IsUsed() { return offset_ < 0; }
+  bool IsPlaced() { return offset_ > 0; }

-// Control whether a branch over the literal pool should also be emitted. This
-// is needed if the literal pool has to be emitted in the middle of the JITted
-// code.
-enum LiteralPoolEmitOption {
-  JumpRequired,
-  NoJumpRequired
+ protected:
+  ptrdiff_t offset() {
+    VIXL_ASSERT(IsPlaced());
+    return offset_ - 1;
+  }
+  void set_offset(ptrdiff_t offset) {
+    VIXL_ASSERT(offset >= 0);
+    VIXL_ASSERT(IsWordAligned(offset));
+    VIXL_ASSERT(!IsPlaced());
+    offset_ = offset + 1;
+  }
+  ptrdiff_t last_use() {
+    VIXL_ASSERT(IsUsed());
+    return -offset_ - 1;
+  }
+  void set_last_use(ptrdiff_t offset) {
+    VIXL_ASSERT(offset >= 0);
+    VIXL_ASSERT(IsWordAligned(offset));
+    VIXL_ASSERT(!IsPlaced());
+    offset_ = -offset - 1;
+  }
+
+  size_t size_;
+  ptrdiff_t offset_;
+  uint64_t raw_value_;
+
+  friend class Assembler;
 };


-// Literal pool entry.
-class Literal {
+template <typename T>
+class Literal : public RawLiteral {
 public:
-  Literal(Instruction* pc, uint64_t imm, unsigned size)
-      : pc_(pc), value_(imm), size_(size) {}
-
- private:
-  Instruction* pc_;
-  int64_t value_;
-  unsigned size_;
-
-  friend class Assembler;
+  explicit Literal(T value) {
+    size_ = sizeof(value);
+    memcpy(&raw_value_, &value, sizeof(value));
+  }
 };


@@ -750,7 +804,9 @@ enum LoadStoreScalingOption {
 // Assembler.
 class Assembler {
 public:
-  Assembler(byte* buffer, unsigned buffer_size,
+  Assembler(size_t capacity,
+            PositionIndependentCodeOption pic = PositionIndependentCode);
+  Assembler(byte* buffer, size_t capacity,
            PositionIndependentCodeOption pic = PositionIndependentCode);

  // The destructor asserts that one of the following is true:
@@ -763,9 +819,6 @@ class Assembler {

  // Start generating code from the beginning of the buffer, discarding any code
  // and data that has already been emitted into the buffer.
-  //
-  // In order to avoid any accidental transfer of state, Reset ASSERTs that the
-  // constant pool is not blocked.
  void Reset();

  // Finalize a code buffer of generated instructions. This function must be
@@ -776,13 +829,47 @@ class Assembler {
  // Bind a label to the current PC.
  void bind(Label* label);

+  // Bind a label to a specified offset from the start of the buffer.
+  void BindToOffset(Label* label, ptrdiff_t offset);
+
+  // Place a literal at the current PC.
+  void place(RawLiteral* literal);
+
+  ptrdiff_t CursorOffset() const {
+    return buffer_->CursorOffset();
+  }
+
+  ptrdiff_t BufferEndOffset() const {
+    return static_cast<ptrdiff_t>(buffer_->capacity());
+  }
+
+  // Return the address of an offset in the buffer.
+  template <typename T>
+  inline T GetOffsetAddress(ptrdiff_t offset) {
+    VIXL_STATIC_ASSERT(sizeof(T) >= sizeof(uintptr_t));
+    return buffer_->GetOffsetAddress<T>(offset);
+  }
+
  // Return the address of a bound label.
  template <typename T>
  inline T GetLabelAddress(const Label * label) {
    VIXL_ASSERT(label->IsBound());
    VIXL_STATIC_ASSERT(sizeof(T) >= sizeof(uintptr_t));
-    VIXL_STATIC_ASSERT(sizeof(*buffer_) == 1);
-    return reinterpret_cast<T>(buffer_ + label->location());
+    return GetOffsetAddress<T>(label->location());
+  }
+
+  // Return the address of the cursor.
+  template <typename T>
+  inline T GetCursorAddress() {
+    VIXL_STATIC_ASSERT(sizeof(T) >= sizeof(uintptr_t));
+    return GetOffsetAddress<T>(CursorOffset());
+  }
+
+  // Return the address of the start of the buffer.
+  template <typename T>
+  inline T GetStartAddress() {
+    VIXL_STATIC_ASSERT(sizeof(T) >= sizeof(uintptr_t));
+    return GetOffsetAddress<T>(0);
  }

  // Instruction set functions.
@@ -1324,14 +1411,17 @@ class Assembler {
  void stnp(const CPURegister& rt, const CPURegister& rt2,
            const MemOperand& dst);

-  // Load literal to register.
-  void ldr(const Register& rt, uint64_t imm);
+  // Load integer or FP register from literal pool.
+  void ldr(const CPURegister& rt, RawLiteral* literal);

-  // Load double precision floating point literal to FP register.
-  void ldr(const FPRegister& ft, double imm);
+  // Load word with sign extension from literal pool.
+  void ldrsw(const Register& rt, RawLiteral* literal);

-  // Load single precision floating point literal to FP register.
-  void ldr(const FPRegister& ft, float imm);
+  // Load integer or FP register from pc + imm19 << 2.
+  void ldr(const CPURegister& rt, int imm19);
+
+  // Load word with sign extension from pc + imm19 << 2.
+  void ldrsw(const Register& rt, int imm19);

  // Store exclusive byte.
  void stxrb(const Register& rs, const Register& rt, const MemOperand& dst);
@@ -1618,25 +1708,26 @@ class Assembler {
  inline void dci(Instr raw_inst) { Emit(raw_inst); }

  // Emit 32 bits of data into the instruction stream.
-  inline void dc32(uint32_t data) { EmitData(&data, sizeof(data)); }
+  inline void dc32(uint32_t data) {
+    VIXL_ASSERT(buffer_monitor_ > 0);
+    buffer_->Emit32(data);
+  }

  // Emit 64 bits of data into the instruction stream.
-  inline void dc64(uint64_t data) { EmitData(&data, sizeof(data)); }
+  inline void dc64(uint64_t data) {
+    VIXL_ASSERT(buffer_monitor_ > 0);
+    buffer_->Emit64(data);
+  }

  // Copy a string into the instruction stream, including the terminating NULL
-  // character. The instruction pointer (pc_) is then aligned correctly for
+  // character. The instruction pointer is then aligned correctly for
  // subsequent instructions.
-  void EmitStringData(const char * string) {
+  void EmitString(const char * string) {
    VIXL_ASSERT(string != NULL);
+    VIXL_ASSERT(buffer_monitor_ > 0);

-    size_t len = strlen(string) + 1;
-    EmitData(string, len);
-
-    // Pad with NULL characters until pc_ is aligned.
-    const char pad[] = {'\0', '\0', '\0', '\0'};
-    VIXL_STATIC_ASSERT(sizeof(pad) == kInstructionSize);
-    Instruction* next_pc = AlignUp(pc_, kInstructionSize);
-    EmitData(&pad, next_pc - pc_);
+    buffer_->EmitString(string);
+    buffer_->Align();
  }

  // Code generation helpers.
@@ -1912,43 +2003,39 @@ class Assembler {
    return scale << FPScale_offset;
  }

-  // Size of the code generated in bytes
-  size_t SizeOfCodeGenerated() const {
-    VIXL_ASSERT((pc_ >= buffer_) && (pc_ < (buffer_ + buffer_size_)));
-    return pc_ - buffer_;
-  }
-
  // Size of the code generated since label to the current position.
  size_t SizeOfCodeGeneratedSince(Label* label) const {
-    size_t pc_offset = SizeOfCodeGenerated();
-
    VIXL_ASSERT(label->IsBound());
-    VIXL_ASSERT(pc_offset >= static_cast<size_t>(label->location()));
-    VIXL_ASSERT(pc_offset < buffer_size_);
-
-    return pc_offset - label->location();
+    return buffer_->OffsetFrom(label->location());
  }

+  size_t BufferCapacity() const { return buffer_->capacity(); }

-  inline void BlockLiteralPool() {
-    literal_pool_monitor_++;
-  }
+  size_t RemainingBufferSpace() const { return buffer_->RemainingBytes(); }

-  inline void ReleaseLiteralPool() {
-    if (--literal_pool_monitor_ == 0) {
-      // Has the literal pool been blocked for too long?
-      VIXL_ASSERT(literals_.empty() ||
-             (pc_ < (literals_.back()->pc_ + kMaxLoadLiteralRange)));
+  void EnsureSpaceFor(size_t amount) {
+    if (buffer_->RemainingBytes() < amount) {
+      size_t capacity = buffer_->capacity();
+      size_t size = buffer_->CursorOffset();
+      do {
+        // TODO(all): refine.
+        capacity *= 2;
+      } while ((capacity - size) <  amount);
+      buffer_->Grow(capacity);
    }
  }

-  inline bool IsLiteralPoolBlocked() {
-    return literal_pool_monitor_ != 0;
+#ifdef DEBUG
+  void AcquireBuffer() {
+    VIXL_ASSERT(buffer_monitor_ >= 0);
+    buffer_monitor_++;
  }

-  void CheckLiteralPool(LiteralPoolEmitOption option = JumpRequired);
-  void EmitLiteralPool(LiteralPoolEmitOption option = NoJumpRequired);
-  size_t LiteralPoolSize();
+  void ReleaseBuffer() {
+    buffer_monitor_--;
+    VIXL_ASSERT(buffer_monitor_ >= 0);
+  }
+#endif

  inline PositionIndependentCodeOption pic() {
    return pic_;
@@ -1959,22 +2046,30 @@ class Assembler {
           (pic() == PositionDependentCode);
  }

- protected:
-  inline const Register& AppropriateZeroRegFor(const CPURegister& reg) const {
+  static inline const Register& AppropriateZeroRegFor(const CPURegister& reg) {
    return reg.Is64Bits() ? xzr : wzr;
  }


+ protected:
  void LoadStore(const CPURegister& rt,
                 const MemOperand& addr,
                 LoadStoreOp op,
                 LoadStoreScalingOption option = PreferScaledOffset);
-  static bool IsImmLSUnscaled(ptrdiff_t offset);
-  static bool IsImmLSScaled(ptrdiff_t offset, LSDataSize size);
+  static bool IsImmLSUnscaled(int64_t offset);
+  static bool IsImmLSScaled(int64_t offset, LSDataSize size);

+  void LoadStorePair(const CPURegister& rt,
+                     const CPURegister& rt2,
+                     const MemOperand& addr,
+                     LoadStorePairOp op);
+  static bool IsImmLSPair(int64_t offset, LSDataSize size);
+
+  // TODO(all): The third parameter should be passed by reference but gcc 4.8.2
+  // reports a bogus uninitialised warning then.
  void Logical(const Register& rd,
               const Register& rn,
-               const Operand& operand,
+               const Operand operand,
               LogicalOp op);
  void LogicalImmediate(const Register& rd,
                        const Register& rn,
@@ -2035,6 +2130,7 @@ class Assembler {
    const CPURegister& rt, const CPURegister& rt2);
  static LoadStorePairNonTemporalOp StorePairNonTemporalOpFor(
    const CPURegister& rt, const CPURegister& rt2);
+  static LoadLiteralOp LoadLiteralOpFor(const CPURegister& rt);


 private:
@@ -2053,10 +2149,6 @@ class Assembler {
                                const Operand& operand,
                                FlagsUpdate S,
                                Instr op);
-  void LoadStorePair(const CPURegister& rt,
-                     const CPURegister& rt2,
-                     const MemOperand& addr,
-                     LoadStorePairOp op);
  void LoadStorePairNonTemporal(const CPURegister& rt,
                                const CPURegister& rt2,
                                const MemOperand& addr,
@@ -2088,8 +2180,6 @@ class Assembler {
                               const FPRegister& fa,
                               FPDataProcessing3SourceOp op);

-  void RecordLiteral(int64_t imm, unsigned size);
-
  // Link the current (not-yet-emitted) instruction to the specified label, then
  // return an offset to be encoded in the instruction. If the label is not yet
  // bound, an offset of 0 is returned.
@@ -2098,79 +2188,102 @@ class Assembler {
  ptrdiff_t LinkAndGetPageOffsetTo(Label * label);

  // A common implementation for the LinkAndGet<Type>OffsetTo helpers.
-  template <int element_size>
+  template <int element_shift>
  ptrdiff_t LinkAndGetOffsetTo(Label* label);

-  // Emit the instruction at pc_.
+  // Literal load offset are in words (32-bit).
+  ptrdiff_t LinkAndGetWordOffsetTo(RawLiteral* literal);
+
+  // Emit the instruction in buffer_.
  void Emit(Instr instruction) {
-    VIXL_STATIC_ASSERT(sizeof(*pc_) == 1);
    VIXL_STATIC_ASSERT(sizeof(instruction) == kInstructionSize);
-    VIXL_ASSERT((pc_ + sizeof(instruction)) <= (buffer_ + buffer_size_));
-
-#ifdef DEBUG
-    finalized_ = false;
-#endif
-
-    memcpy(pc_, &instruction, sizeof(instruction));
-    pc_ += sizeof(instruction);
-    CheckBufferSpace();
+    VIXL_ASSERT(buffer_monitor_ > 0);
+    buffer_->Emit32(instruction);
  }

-  // Emit data inline in the instruction stream.
-  void EmitData(void const * data, unsigned size) {
-    VIXL_STATIC_ASSERT(sizeof(*pc_) == 1);
-    VIXL_ASSERT((pc_ + size) <= (buffer_ + buffer_size_));
-
-#ifdef DEBUG
-    finalized_ = false;
-#endif
-
-    // TODO: Record this 'instruction' as data, so that it can be disassembled
-    // correctly.
-    memcpy(pc_, data, size);
-    pc_ += size;
-    CheckBufferSpace();
-  }
-
-  inline void CheckBufferSpace() {
-    VIXL_ASSERT(pc_ < (buffer_ + buffer_size_));
-    if (pc_ > next_literal_pool_check_) {
-      CheckLiteralPool();
-    }
-  }
-
-  // The buffer into which code and relocation info are generated.
-  Instruction* buffer_;
-  // Buffer size, in bytes.
-  size_t buffer_size_;
-  Instruction* pc_;
-  std::list<Literal*> literals_;
-  Instruction* next_literal_pool_check_;
-  unsigned literal_pool_monitor_;
-
+  // Buffer where the code is emitted.
+  CodeBuffer* buffer_;
  PositionIndependentCodeOption pic_;

-  friend class Label;
-  friend class BlockLiteralPoolScope;
-
 #ifdef DEBUG
-  bool finalized_;
+  int64_t buffer_monitor_;
 #endif
 };

-class BlockLiteralPoolScope {
+
+// All Assembler emits MUST acquire/release the underlying code buffer. The
+// helper scope below will do so and optionally ensure the buffer is big enough
+// to receive the emit. It is possible to request the scope not to perform any
+// checks (kNoCheck) if for example it is known in advance the buffer size is
+// adequate or there is some other size checking mechanism in place.
+class CodeBufferCheckScope {
 public:
-  explicit BlockLiteralPoolScope(Assembler* assm) : assm_(assm) {
-    assm_->BlockLiteralPool();
+  // Tell whether or not the scope needs to ensure the associated CodeBuffer
+  // has enough space for the requested size.
+  enum CheckPolicy {
+    kNoCheck,
+    kCheck
+  };
+
+  // Tell whether or not the scope should assert the amount of code emitted
+  // within the scope is consistent with the requested amount.
+  enum AssertPolicy {
+    kNoAssert,    // No assert required.
+    kExactSize,   // The code emitted must be exactly size bytes.
+    kMaximumSize  // The code emitted must be at most size bytes.
+  };
+
+  CodeBufferCheckScope(Assembler* assm,
+                       size_t size,
+                       CheckPolicy check_policy = kCheck,
+                       AssertPolicy assert_policy = kMaximumSize)
+      : assm_(assm) {
+    if (check_policy == kCheck) assm->EnsureSpaceFor(size);
+#ifdef DEBUG
+    assm->bind(&start_);
+    size_ = size;
+    assert_policy_ = assert_policy;
+    assm->AcquireBuffer();
+#else
+    USE(assert_policy);
+#endif
  }

-  ~BlockLiteralPoolScope() {
-    assm_->ReleaseLiteralPool();
+  // This is a shortcut for CodeBufferCheckScope(assm, 0, kNoCheck, kNoAssert).
+  explicit CodeBufferCheckScope(Assembler* assm) : assm_(assm) {
+#ifdef DEBUG
+    size_ = 0;
+    assert_policy_ = kNoAssert;
+    assm->AcquireBuffer();
+#endif
  }

- private:
+  ~CodeBufferCheckScope() {
+#ifdef DEBUG
+    assm_->ReleaseBuffer();
+    switch (assert_policy_) {
+      case kNoAssert: break;
+      case kExactSize:
+        VIXL_ASSERT(assm_->SizeOfCodeGeneratedSince(&start_) == size_);
+        break;
+      case kMaximumSize:
+        VIXL_ASSERT(assm_->SizeOfCodeGeneratedSince(&start_) <= size_);
+        break;
+      default:
+        VIXL_UNREACHABLE();
+    }
+#endif
+  }
+
+ protected:
  Assembler* assm_;
+#ifdef DEBUG
+  Label start_;
+  size_t size_;
+  AssertPolicy assert_policy_;
+#endif
 };
+
 }  // namespace vixl

 #endif  // VIXL_A64_ASSEMBLER_A64_H_
--- a/disas/libvixl/a64/decoder-a64.cc
+++ b/disas/libvixl/a64/decoder-a64.cc
@@ -29,8 +29,8 @@
 #include "a64/decoder-a64.h"

 namespace vixl {
-// Top-level instruction decode function.
-void Decoder::Decode(Instruction *instr) {
+
+void Decoder::DecodeInstruction(const Instruction *instr) {
  if (instr->Bits(28, 27) == 0) {
    VisitUnallocated(instr);
  } else {
@@ -109,20 +109,17 @@ void Decoder::Decode(Instruction *instr) {
 }

 void Decoder::AppendVisitor(DecoderVisitor* new_visitor) {
-  visitors_.remove(new_visitor);
-  visitors_.push_front(new_visitor);
+  visitors_.push_back(new_visitor);
 }


 void Decoder::PrependVisitor(DecoderVisitor* new_visitor) {
-  visitors_.remove(new_visitor);
-  visitors_.push_back(new_visitor);
+  visitors_.push_front(new_visitor);
 }


 void Decoder::InsertVisitorBefore(DecoderVisitor* new_visitor,
                                  DecoderVisitor* registered_visitor) {
-  visitors_.remove(new_visitor);
  std::list<DecoderVisitor*>::iterator it;
  for (it = visitors_.begin(); it != visitors_.end(); it++) {
    if (*it == registered_visitor) {
@@ -139,7 +136,6 @@ void Decoder::InsertVisitorBefore(DecoderVisitor* new_visitor,

 void Decoder::InsertVisitorAfter(DecoderVisitor* new_visitor,
                                 DecoderVisitor* registered_visitor) {
-  visitors_.remove(new_visitor);
  std::list<DecoderVisitor*>::iterator it;
  for (it = visitors_.begin(); it != visitors_.end(); it++) {
    if (*it == registered_visitor) {
@@ -160,7 +156,7 @@ void Decoder::RemoveVisitor(DecoderVisitor* visitor) {
 }


-void Decoder::DecodePCRelAddressing(Instruction* instr) {
+void Decoder::DecodePCRelAddressing(const Instruction* instr) {
  VIXL_ASSERT(instr->Bits(27, 24) == 0x0);
  // We know bit 28 is set, as <b28:b27> = 0 is filtered out at the top level
  // decode.
@@ -169,7 +165,7 @@ void Decoder::DecodePCRelAddressing(Instruction* instr) {
 }


-void Decoder::DecodeBranchSystemException(Instruction* instr) {
+void Decoder::DecodeBranchSystemException(const Instruction* instr) {
  VIXL_ASSERT((instr->Bits(27, 24) == 0x4) ||
              (instr->Bits(27, 24) == 0x5) ||
              (instr->Bits(27, 24) == 0x6) ||
@@ -270,7 +266,7 @@ void Decoder::DecodeBranchSystemException(Instruction* instr) {
 }


-void Decoder::DecodeLoadStore(Instruction* instr) {
+void Decoder::DecodeLoadStore(const Instruction* instr) {
  VIXL_ASSERT((instr->Bits(27, 24) == 0x8) ||
              (instr->Bits(27, 24) == 0x9) ||
              (instr->Bits(27, 24) == 0xC) ||
@@ -388,7 +384,7 @@ void Decoder::DecodeLoadStore(Instruction* instr) {
 }


-void Decoder::DecodeLogical(Instruction* instr) {
+void Decoder::DecodeLogical(const Instruction* instr) {
  VIXL_ASSERT(instr->Bits(27, 24) == 0x2);

  if (instr->Mask(0x80400000) == 0x00400000) {
@@ -407,7 +403,7 @@ void Decoder::DecodeLogical(Instruction* instr) {
 }


-void Decoder::DecodeBitfieldExtract(Instruction* instr) {
+void Decoder::DecodeBitfieldExtract(const Instruction* instr) {
  VIXL_ASSERT(instr->Bits(27, 24) == 0x3);

  if ((instr->Mask(0x80400000) == 0x80000000) ||
@@ -432,7 +428,7 @@ void Decoder::DecodeBitfieldExtract(Instruction* instr) {
 }


-void Decoder::DecodeAddSubImmediate(Instruction* instr) {
+void Decoder::DecodeAddSubImmediate(const Instruction* instr) {
  VIXL_ASSERT(instr->Bits(27, 24) == 0x1);
  if (instr->Bit(23) == 1) {
    VisitUnallocated(instr);
@@ -442,7 +438,7 @@ void Decoder::DecodeAddSubImmediate(Instruction* instr) {
 }


-void Decoder::DecodeDataProcessing(Instruction* instr) {
+void Decoder::DecodeDataProcessing(const Instruction* instr) {
  VIXL_ASSERT((instr->Bits(27, 24) == 0xA) ||
              (instr->Bits(27, 24) == 0xB));

@@ -557,7 +553,7 @@ void Decoder::DecodeDataProcessing(Instruction* instr) {
 }


-void Decoder::DecodeFP(Instruction* instr) {
+void Decoder::DecodeFP(const Instruction* instr) {
  VIXL_ASSERT((instr->Bits(27, 24) == 0xE) ||
              (instr->Bits(27, 24) == 0xF));

@@ -684,14 +680,14 @@ void Decoder::DecodeFP(Instruction* instr) {
 }


-void Decoder::DecodeAdvSIMDLoadStore(Instruction* instr) {
+void Decoder::DecodeAdvSIMDLoadStore(const Instruction* instr) {
  // TODO: Implement Advanced SIMD load/store instruction decode.
  VIXL_ASSERT(instr->Bits(29, 25) == 0x6);
  VisitUnimplemented(instr);
 }


-void Decoder::DecodeAdvSIMDDataProcessing(Instruction* instr) {
+void Decoder::DecodeAdvSIMDDataProcessing(const Instruction* instr) {
  // TODO: Implement Advanced SIMD data processing instruction decode.
  VIXL_ASSERT(instr->Bits(27, 25) == 0x7);
  VisitUnimplemented(instr);
@@ -699,7 +695,7 @@ void Decoder::DecodeAdvSIMDDataProcessing(Instruction* instr) {


 #define DEFINE_VISITOR_CALLERS(A)                                              \
-  void Decoder::Visit##A(Instruction *instr) {                                 \
+  void Decoder::Visit##A(const Instruction *instr) {                           \
    VIXL_ASSERT(instr->Mask(A##FMask) == A##Fixed);                            \
    std::list<DecoderVisitor*>::iterator it;                                   \
    for (it = visitors_.begin(); it != visitors_.end(); it++) {                \
--- a/disas/libvixl/a64/decoder-a64.h
+++ b/disas/libvixl/a64/decoder-a64.h
@@ -88,112 +88,152 @@ namespace vixl {
 // must provide implementations for all of these functions.
 class DecoderVisitor {
 public:
-  #define DECLARE(A) virtual void Visit##A(Instruction* instr) = 0;
-  VISITOR_LIST(DECLARE)
-  #undef DECLARE
+  enum VisitorConstness {
+    kConstVisitor,
+    kNonConstVisitor
+  };
+  explicit DecoderVisitor(VisitorConstness constness = kConstVisitor)
+      : constness_(constness) {}

  virtual ~DecoderVisitor() {}

- private:
-  // Visitors are registered in a list.
-  std::list<DecoderVisitor*> visitors_;
+  #define DECLARE(A) virtual void Visit##A(const Instruction* instr) = 0;
+  VISITOR_LIST(DECLARE)
+  #undef DECLARE

-  friend class Decoder;
+  bool IsConstVisitor() const { return constness_ == kConstVisitor; }
+  Instruction* MutableInstruction(const Instruction* instr) {
+    VIXL_ASSERT(!IsConstVisitor());
+    return const_cast<Instruction*>(instr);
+  }
+
+ private:
+  VisitorConstness constness_;
 };


-class Decoder: public DecoderVisitor {
+class Decoder {
 public:
  Decoder() {}

-  // Top-level instruction decoder function. Decodes an instruction and calls
-  // the visitor functions registered with the Decoder class.
-  void Decode(Instruction *instr);
+  // Top-level wrappers around the actual decoding function.
+  void Decode(const Instruction* instr) {
+    std::list<DecoderVisitor*>::iterator it;
+    for (it = visitors_.begin(); it != visitors_.end(); it++) {
+      VIXL_ASSERT((*it)->IsConstVisitor());
+    }
+    DecodeInstruction(instr);
+  }
+  void Decode(Instruction* instr) {
+    DecodeInstruction(const_cast<const Instruction*>(instr));
+  }

  // Register a new visitor class with the decoder.
  // Decode() will call the corresponding visitor method from all registered
  // visitor classes when decoding reaches the leaf node of the instruction
  // decode tree.
-  // Visitors are called in the order.
-  // A visitor can only be registered once.
-  // Registering an already registered visitor will update its position.
+  // Visitors are called in order.
+  // A visitor can be registered multiple times.
  //
  //   d.AppendVisitor(V1);
  //   d.AppendVisitor(V2);
-  //   d.PrependVisitor(V2);            // Move V2 at the start of the list.
-  //   d.InsertVisitorBefore(V3, V2);
-  //   d.AppendVisitor(V4);
-  //   d.AppendVisitor(V4);             // No effect.
+  //   d.PrependVisitor(V2);
+  //   d.AppendVisitor(V3);
  //
  //   d.Decode(i);
  //
-  // will call in order visitor methods in V3, V2, V1, V4.
+  // will call in order visitor methods in V2, V1, V2, V3.
  void AppendVisitor(DecoderVisitor* visitor);
  void PrependVisitor(DecoderVisitor* visitor);
+  // These helpers register `new_visitor` before or after the first instance of
+  // `registered_visiter` in the list.
+  // So if
+  //   V1, V2, V1, V2
+  // are registered in this order in the decoder, calls to
+  //   d.InsertVisitorAfter(V3, V1);
+  //   d.InsertVisitorBefore(V4, V2);
+  // will yield the order
+  //   V1, V3, V4, V2, V1, V2
+  //
+  // For more complex modifications of the order of registered visitors, one can
+  // directly access and modify the list of visitors via the `visitors()'
+  // accessor.
  void InsertVisitorBefore(DecoderVisitor* new_visitor,
                           DecoderVisitor* registered_visitor);
  void InsertVisitorAfter(DecoderVisitor* new_visitor,
                          DecoderVisitor* registered_visitor);

-  // Remove a previously registered visitor class from the list of visitors
-  // stored by the decoder.
+  // Remove all instances of a previously registered visitor class from the list
+  // of visitors stored by the decoder.
  void RemoveVisitor(DecoderVisitor* visitor);

-  #define DECLARE(A) void Visit##A(Instruction* instr);
+  #define DECLARE(A) void Visit##A(const Instruction* instr);
  VISITOR_LIST(DECLARE)
  #undef DECLARE

+
+  std::list<DecoderVisitor*>* visitors() { return &visitors_; }
+
 private:
+  // Decodes an instruction and calls the visitor functions registered with the
+  // Decoder class.
+  void DecodeInstruction(const Instruction* instr);
+
  // Decode the PC relative addressing instruction, and call the corresponding
  // visitors.
  // On entry, instruction bits 27:24 = 0x0.
-  void DecodePCRelAddressing(Instruction* instr);
+  void DecodePCRelAddressing(const Instruction* instr);

  // Decode the add/subtract immediate instruction, and call the correspoding
  // visitors.
  // On entry, instruction bits 27:24 = 0x1.
-  void DecodeAddSubImmediate(Instruction* instr);
+  void DecodeAddSubImmediate(const Instruction* instr);

  // Decode the branch, system command, and exception generation parts of
  // the instruction tree, and call the corresponding visitors.
  // On entry, instruction bits 27:24 = {0x4, 0x5, 0x6, 0x7}.
-  void DecodeBranchSystemException(Instruction* instr);
+  void DecodeBranchSystemException(const Instruction* instr);

  // Decode the load and store parts of the instruction tree, and call
  // the corresponding visitors.
  // On entry, instruction bits 27:24 = {0x8, 0x9, 0xC, 0xD}.
-  void DecodeLoadStore(Instruction* instr);
+  void DecodeLoadStore(const Instruction* instr);

  // Decode the logical immediate and move wide immediate parts of the
  // instruction tree, and call the corresponding visitors.
  // On entry, instruction bits 27:24 = 0x2.
-  void DecodeLogical(Instruction* instr);
+  void DecodeLogical(const Instruction* instr);

  // Decode the bitfield and extraction parts of the instruction tree,
  // and call the corresponding visitors.
  // On entry, instruction bits 27:24 = 0x3.
-  void DecodeBitfieldExtract(Instruction* instr);
+  void DecodeBitfieldExtract(const Instruction* instr);

  // Decode the data processing parts of the instruction tree, and call the
  // corresponding visitors.
  // On entry, instruction bits 27:24 = {0x1, 0xA, 0xB}.
-  void DecodeDataProcessing(Instruction* instr);
+  void DecodeDataProcessing(const Instruction* instr);

  // Decode the floating point parts of the instruction tree, and call the
  // corresponding visitors.
  // On entry, instruction bits 27:24 = {0xE, 0xF}.
-  void DecodeFP(Instruction* instr);
+  void DecodeFP(const Instruction* instr);

  // Decode the Advanced SIMD (NEON) load/store part of the instruction tree,
  // and call the corresponding visitors.
  // On entry, instruction bits 29:25 = 0x6.
-  void DecodeAdvSIMDLoadStore(Instruction* instr);
+  void DecodeAdvSIMDLoadStore(const Instruction* instr);

  // Decode the Advanced SIMD (NEON) data processing part of the instruction
  // tree, and call the corresponding visitors.
  // On entry, instruction bits 27:25 = 0x7.
-  void DecodeAdvSIMDDataProcessing(Instruction* instr);
+  void DecodeAdvSIMDDataProcessing(const Instruction* instr);
+
+ private:
+  // Visitors are registered in a list.
+  std::list<DecoderVisitor*> visitors_;
 };
+
 }  // namespace vixl

 #endif  // VIXL_A64_DECODER_A64_H_
--- a/disas/libvixl/a64/disasm-a64.cc
+++ b/disas/libvixl/a64/disasm-a64.cc
@@ -57,7 +57,7 @@ char* Disassembler::GetOutput() {
 }


-void Disassembler::VisitAddSubImmediate(Instruction* instr) {
+void Disassembler::VisitAddSubImmediate(const Instruction* instr) {
  bool rd_is_zr = RdIsZROrSP(instr);
  bool stack_op = (rd_is_zr || RnIsZROrSP(instr)) &&
                  (instr->ImmAddSub() == 0) ? true : false;
@@ -102,7 +102,7 @@ void Disassembler::VisitAddSubImmediate(Instruction* instr) {
 }


-void Disassembler::VisitAddSubShifted(Instruction* instr) {
+void Disassembler::VisitAddSubShifted(const Instruction* instr) {
  bool rd_is_zr = RdIsZROrSP(instr);
  bool rn_is_zr = RnIsZROrSP(instr);
  const char *mnemonic = "";
@@ -149,7 +149,7 @@ void Disassembler::VisitAddSubShifted(Instruction* instr) {
 }


-void Disassembler::VisitAddSubExtended(Instruction* instr) {
+void Disassembler::VisitAddSubExtended(const Instruction* instr) {
  bool rd_is_zr = RdIsZROrSP(instr);
  const char *mnemonic = "";
  Extend mode = static_cast<Extend>(instr->ExtendMode());
@@ -187,7 +187,7 @@ void Disassembler::VisitAddSubExtended(Instruction* instr) {
 }


-void Disassembler::VisitAddSubWithCarry(Instruction* instr) {
+void Disassembler::VisitAddSubWithCarry(const Instruction* instr) {
  bool rn_is_zr = RnIsZROrSP(instr);
  const char *mnemonic = "";
  const char *form = "'Rd, 'Rn, 'Rm";
@@ -222,7 +222,7 @@ void Disassembler::VisitAddSubWithCarry(Instruction* instr) {
 }


-void Disassembler::VisitLogicalImmediate(Instruction* instr) {
+void Disassembler::VisitLogicalImmediate(const Instruction* instr) {
  bool rd_is_zr = RdIsZROrSP(instr);
  bool rn_is_zr = RnIsZROrSP(instr);
  const char *mnemonic = "";
@@ -294,7 +294,7 @@ bool Disassembler::IsMovzMovnImm(unsigned reg_size, uint64_t value) {
 }


-void Disassembler::VisitLogicalShifted(Instruction* instr) {
+void Disassembler::VisitLogicalShifted(const Instruction* instr) {
  bool rd_is_zr = RdIsZROrSP(instr);
  bool rn_is_zr = RnIsZROrSP(instr);
  const char *mnemonic = "";
@@ -345,7 +345,7 @@ void Disassembler::VisitLogicalShifted(Instruction* instr) {
 }


-void Disassembler::VisitConditionalCompareRegister(Instruction* instr) {
+void Disassembler::VisitConditionalCompareRegister(const Instruction* instr) {
  const char *mnemonic = "";
  const char *form = "'Rn, 'Rm, 'INzcv, 'Cond";

@@ -360,7 +360,7 @@ void Disassembler::VisitConditionalCompareRegister(Instruction* instr) {
 }


-void Disassembler::VisitConditionalCompareImmediate(Instruction* instr) {
+void Disassembler::VisitConditionalCompareImmediate(const Instruction* instr) {
  const char *mnemonic = "";
  const char *form = "'Rn, 'IP, 'INzcv, 'Cond";

@@ -375,7 +375,7 @@ void Disassembler::VisitConditionalCompareImmediate(Instruction* instr) {
 }


-void Disassembler::VisitConditionalSelect(Instruction* instr) {
+void Disassembler::VisitConditionalSelect(const Instruction* instr) {
  bool rnm_is_zr = (RnIsZROrSP(instr) && RmIsZROrSP(instr));
  bool rn_is_rm = (instr->Rn() == instr->Rm());
  const char *mnemonic = "";
@@ -428,7 +428,7 @@ void Disassembler::VisitConditionalSelect(Instruction* instr) {
 }


-void Disassembler::VisitBitfield(Instruction* instr) {
+void Disassembler::VisitBitfield(const Instruction* instr) {
  unsigned s = instr->ImmS();
  unsigned r = instr->ImmR();
  unsigned rd_size_minus_1 =
@@ -506,7 +506,7 @@ void Disassembler::VisitBitfield(Instruction* instr) {
 }


-void Disassembler::VisitExtract(Instruction* instr) {
+void Disassembler::VisitExtract(const Instruction* instr) {
  const char *mnemonic = "";
  const char *form = "'Rd, 'Rn, 'Rm, 'IExtract";

@@ -527,7 +527,7 @@ void Disassembler::VisitExtract(Instruction* instr) {
 }


-void Disassembler::VisitPCRelAddressing(Instruction* instr) {
+void Disassembler::VisitPCRelAddressing(const Instruction* instr) {
  switch (instr->Mask(PCRelAddressingMask)) {
    case ADR: Format(instr, "adr", "'Xd, 'AddrPCRelByte"); break;
    case ADRP: Format(instr, "adrp", "'Xd, 'AddrPCRelPage"); break;
@@ -536,7 +536,7 @@ void Disassembler::VisitPCRelAddressing(Instruction* instr) {
 }


-void Disassembler::VisitConditionalBranch(Instruction* instr) {
+void Disassembler::VisitConditionalBranch(const Instruction* instr) {
  switch (instr->Mask(ConditionalBranchMask)) {
    case B_cond: Format(instr, "b.'CBrn", "'BImmCond"); break;
    default: VIXL_UNREACHABLE();
@@ -544,7 +544,8 @@ void Disassembler::VisitConditionalBranch(Instruction* instr) {
 }


-void Disassembler::VisitUnconditionalBranchToRegister(Instruction* instr) {
+void Disassembler::VisitUnconditionalBranchToRegister(
+    const Instruction* instr) {
  const char *mnemonic = "unimplemented";
  const char *form = "'Xn";

@@ -564,7 +565,7 @@ void Disassembler::VisitUnconditionalBranchToRegister(Instruction* instr) {
 }


-void Disassembler::VisitUnconditionalBranch(Instruction* instr) {
+void Disassembler::VisitUnconditionalBranch(const Instruction* instr) {
  const char *mnemonic = "";
  const char *form = "'BImmUncn";

@@ -577,7 +578,7 @@ void Disassembler::VisitUnconditionalBranch(Instruction* instr) {
 }


-void Disassembler::VisitDataProcessing1Source(Instruction* instr) {
+void Disassembler::VisitDataProcessing1Source(const Instruction* instr) {
  const char *mnemonic = "";
  const char *form = "'Rd, 'Rn";

@@ -598,7 +599,7 @@ void Disassembler::VisitDataProcessing1Source(Instruction* instr) {
 }


-void Disassembler::VisitDataProcessing2Source(Instruction* instr) {
+void Disassembler::VisitDataProcessing2Source(const Instruction* instr) {
  const char *mnemonic = "unimplemented";
  const char *form = "'Rd, 'Rn, 'Rm";

@@ -619,7 +620,7 @@ void Disassembler::VisitDataProcessing2Source(Instruction* instr) {
 }


-void Disassembler::VisitDataProcessing3Source(Instruction* instr) {
+void Disassembler::VisitDataProcessing3Source(const Instruction* instr) {
  bool ra_is_zr = RaIsZROrSP(instr);
  const char *mnemonic = "";
  const char *form = "'Xd, 'Wn, 'Wm, 'Xa";
@@ -697,7 +698,7 @@ void Disassembler::VisitDataProcessing3Source(Instruction* instr) {
 }


-void Disassembler::VisitCompareBranch(Instruction* instr) {
+void Disassembler::VisitCompareBranch(const Instruction* instr) {
  const char *mnemonic = "";
  const char *form = "'Rt, 'BImmCmpa";

@@ -712,7 +713,7 @@ void Disassembler::VisitCompareBranch(Instruction* instr) {
 }


-void Disassembler::VisitTestBranch(Instruction* instr) {
+void Disassembler::VisitTestBranch(const Instruction* instr) {
  const char *mnemonic = "";
  // If the top bit of the immediate is clear, the tested register is
  // disassembled as Wt, otherwise Xt. As the top bit of the immediate is
@@ -729,7 +730,7 @@ void Disassembler::VisitTestBranch(Instruction* instr) {
 }


-void Disassembler::VisitMoveWideImmediate(Instruction* instr) {
+void Disassembler::VisitMoveWideImmediate(const Instruction* instr) {
  const char *mnemonic = "";
  const char *form = "'Rd, 'IMoveImm";

@@ -768,7 +769,7 @@ void Disassembler::VisitMoveWideImmediate(Instruction* instr) {
  V(LDR_s, "ldr", "'St")      \
  V(LDR_d, "ldr", "'Dt")

-void Disassembler::VisitLoadStorePreIndex(Instruction* instr) {
+void Disassembler::VisitLoadStorePreIndex(const Instruction* instr) {
  const char *mnemonic = "unimplemented";
  const char *form = "(LoadStorePreIndex)";

@@ -782,7 +783,7 @@ void Disassembler::VisitLoadStorePreIndex(Instruction* instr) {
 }


-void Disassembler::VisitLoadStorePostIndex(Instruction* instr) {
+void Disassembler::VisitLoadStorePostIndex(const Instruction* instr) {
  const char *mnemonic = "unimplemented";
  const char *form = "(LoadStorePostIndex)";

@@ -796,7 +797,7 @@ void Disassembler::VisitLoadStorePostIndex(Instruction* instr) {
 }


-void Disassembler::VisitLoadStoreUnsignedOffset(Instruction* instr) {
+void Disassembler::VisitLoadStoreUnsignedOffset(const Instruction* instr) {
  const char *mnemonic = "unimplemented";
  const char *form = "(LoadStoreUnsignedOffset)";

@@ -811,7 +812,7 @@ void Disassembler::VisitLoadStoreUnsignedOffset(Instruction* instr) {
 }


-void Disassembler::VisitLoadStoreRegisterOffset(Instruction* instr) {
+void Disassembler::VisitLoadStoreRegisterOffset(const Instruction* instr) {
  const char *mnemonic = "unimplemented";
  const char *form = "(LoadStoreRegisterOffset)";

@@ -826,7 +827,7 @@ void Disassembler::VisitLoadStoreRegisterOffset(Instruction* instr) {
 }


-void Disassembler::VisitLoadStoreUnscaledOffset(Instruction* instr) {
+void Disassembler::VisitLoadStoreUnscaledOffset(const Instruction* instr) {
  const char *mnemonic = "unimplemented";
  const char *form = "'Wt, ['Xns'ILS]";
  const char *form_x = "'Xt, ['Xns'ILS]";
@@ -857,7 +858,7 @@ void Disassembler::VisitLoadStoreUnscaledOffset(Instruction* instr) {
 }


-void Disassembler::VisitLoadLiteral(Instruction* instr) {
+void Disassembler::VisitLoadLiteral(const Instruction* instr) {
  const char *mnemonic = "ldr";
  const char *form = "(LoadLiteral)";

@@ -866,6 +867,11 @@ void Disassembler::VisitLoadLiteral(Instruction* instr) {
    case LDR_x_lit: form = "'Xt, 'ILLiteral 'LValue"; break;
    case LDR_s_lit: form = "'St, 'ILLiteral 'LValue"; break;
    case LDR_d_lit: form = "'Dt, 'ILLiteral 'LValue"; break;
+    case LDRSW_x_lit: {
+      mnemonic = "ldrsw";
+      form = "'Xt, 'ILLiteral 'LValue";
+      break;
+    }
    default: mnemonic = "unimplemented";
  }
  Format(instr, mnemonic, form);
@@ -883,7 +889,7 @@ void Disassembler::VisitLoadLiteral(Instruction* instr) {
  V(STP_d, "stp", "'Dt, 'Dt2", "8")     \
  V(LDP_d, "ldp", "'Dt, 'Dt2", "8")

-void Disassembler::VisitLoadStorePairPostIndex(Instruction* instr) {
+void Disassembler::VisitLoadStorePairPostIndex(const Instruction* instr) {
  const char *mnemonic = "unimplemented";
  const char *form = "(LoadStorePairPostIndex)";

@@ -897,7 +903,7 @@ void Disassembler::VisitLoadStorePairPostIndex(Instruction* instr) {
 }


-void Disassembler::VisitLoadStorePairPreIndex(Instruction* instr) {
+void Disassembler::VisitLoadStorePairPreIndex(const Instruction* instr) {
  const char *mnemonic = "unimplemented";
  const char *form = "(LoadStorePairPreIndex)";

@@ -911,7 +917,7 @@ void Disassembler::VisitLoadStorePairPreIndex(Instruction* instr) {
 }


-void Disassembler::VisitLoadStorePairOffset(Instruction* instr) {
+void Disassembler::VisitLoadStorePairOffset(const Instruction* instr) {
  const char *mnemonic = "unimplemented";
  const char *form = "(LoadStorePairOffset)";

@@ -925,7 +931,7 @@ void Disassembler::VisitLoadStorePairOffset(Instruction* instr) {
 }


-void Disassembler::VisitLoadStorePairNonTemporal(Instruction* instr) {
+void Disassembler::VisitLoadStorePairNonTemporal(const Instruction* instr) {
  const char *mnemonic = "unimplemented";
  const char *form;

@@ -944,7 +950,7 @@ void Disassembler::VisitLoadStorePairNonTemporal(Instruction* instr) {
 }


-void Disassembler::VisitLoadStoreExclusive(Instruction* instr) {
+void Disassembler::VisitLoadStoreExclusive(const Instruction* instr) {
  const char *mnemonic = "unimplemented";
  const char *form;

@@ -987,7 +993,7 @@ void Disassembler::VisitLoadStoreExclusive(Instruction* instr) {
 }


-void Disassembler::VisitFPCompare(Instruction* instr) {
+void Disassembler::VisitFPCompare(const Instruction* instr) {
  const char *mnemonic = "unimplemented";
  const char *form = "'Fn, 'Fm";
  const char *form_zero = "'Fn, #0.0";
@@ -1003,7 +1009,7 @@ void Disassembler::VisitFPCompare(Instruction* instr) {
 }


-void Disassembler::VisitFPConditionalCompare(Instruction* instr) {
+void Disassembler::VisitFPConditionalCompare(const Instruction* instr) {
  const char *mnemonic = "unmplemented";
  const char *form = "'Fn, 'Fm, 'INzcv, 'Cond";

@@ -1018,7 +1024,7 @@ void Disassembler::VisitFPConditionalCompare(Instruction* instr) {
 }


-void Disassembler::VisitFPConditionalSelect(Instruction* instr) {
+void Disassembler::VisitFPConditionalSelect(const Instruction* instr) {
  const char *mnemonic = "";
  const char *form = "'Fd, 'Fn, 'Fm, 'Cond";

@@ -1031,7 +1037,7 @@ void Disassembler::VisitFPConditionalSelect(Instruction* instr) {
 }


-void Disassembler::VisitFPDataProcessing1Source(Instruction* instr) {
+void Disassembler::VisitFPDataProcessing1Source(const Instruction* instr) {
  const char *mnemonic = "unimplemented";
  const char *form = "'Fd, 'Fn";

@@ -1059,7 +1065,7 @@ void Disassembler::VisitFPDataProcessing1Source(Instruction* instr) {
 }


-void Disassembler::VisitFPDataProcessing2Source(Instruction* instr) {
+void Disassembler::VisitFPDataProcessing2Source(const Instruction* instr) {
  const char *mnemonic = "";
  const char *form = "'Fd, 'Fn, 'Fm";

@@ -1083,7 +1089,7 @@ void Disassembler::VisitFPDataProcessing2Source(Instruction* instr) {
 }


-void Disassembler::VisitFPDataProcessing3Source(Instruction* instr) {
+void Disassembler::VisitFPDataProcessing3Source(const Instruction* instr) {
  const char *mnemonic = "";
  const char *form = "'Fd, 'Fn, 'Fm, 'Fa";

@@ -1102,7 +1108,7 @@ void Disassembler::VisitFPDataProcessing3Source(Instruction* instr) {
 }


-void Disassembler::VisitFPImmediate(Instruction* instr) {
+void Disassembler::VisitFPImmediate(const Instruction* instr) {
  const char *mnemonic = "";
  const char *form = "(FPImmediate)";

@@ -1115,7 +1121,7 @@ void Disassembler::VisitFPImmediate(Instruction* instr) {
 }


-void Disassembler::VisitFPIntegerConvert(Instruction* instr) {
+void Disassembler::VisitFPIntegerConvert(const Instruction* instr) {
  const char *mnemonic = "unimplemented";
  const char *form = "(FPIntegerConvert)";
  const char *form_rf = "'Rd, 'Fn";
@@ -1171,7 +1177,7 @@ void Disassembler::VisitFPIntegerConvert(Instruction* instr) {
 }


-void Disassembler::VisitFPFixedPointConvert(Instruction* instr) {
+void Disassembler::VisitFPFixedPointConvert(const Instruction* instr) {
  const char *mnemonic = "";
  const char *form = "'Rd, 'Fn, 'IFPFBits";
  const char *form_fr = "'Fd, 'Rn, 'IFPFBits";
@@ -1199,7 +1205,7 @@ void Disassembler::VisitFPFixedPointConvert(Instruction* instr) {
 }


-void Disassembler::VisitSystem(Instruction* instr) {
+void Disassembler::VisitSystem(const Instruction* instr) {
  // Some system instructions hijack their Op and Cp fields to represent a
  // range of immediates instead of indicating a different instruction. This
  // makes the decoding tricky.
@@ -1267,7 +1273,7 @@ void Disassembler::VisitSystem(Instruction* instr) {
 }


-void Disassembler::VisitException(Instruction* instr) {
+void Disassembler::VisitException(const Instruction* instr) {
  const char *mnemonic = "unimplemented";
  const char *form = "'IDebug";

@@ -1286,22 +1292,75 @@ void Disassembler::VisitException(Instruction* instr) {
 }


-void Disassembler::VisitUnimplemented(Instruction* instr) {
+void Disassembler::VisitUnimplemented(const Instruction* instr) {
  Format(instr, "unimplemented", "(Unimplemented)");
 }


-void Disassembler::VisitUnallocated(Instruction* instr) {
+void Disassembler::VisitUnallocated(const Instruction* instr) {
  Format(instr, "unallocated", "(Unallocated)");
 }


-void Disassembler::ProcessOutput(Instruction* /*instr*/) {
+void Disassembler::ProcessOutput(const Instruction* /*instr*/) {
  // The base disasm does nothing more than disassembling into a buffer.
 }


-void Disassembler::Format(Instruction* instr, const char* mnemonic,
+void Disassembler::AppendRegisterNameToOutput(const Instruction* instr,
+                                              const CPURegister& reg) {
+  USE(instr);
+  VIXL_ASSERT(reg.IsValid());
+  char reg_char;
+
+  if (reg.IsRegister()) {
+    reg_char = reg.Is64Bits() ? 'x' : 'w';
+  } else {
+    VIXL_ASSERT(reg.IsFPRegister());
+    reg_char = reg.Is64Bits() ? 'd' : 's';
+  }
+
+  if (reg.IsFPRegister() || !(reg.Aliases(sp) || reg.Aliases(xzr))) {
+    // A normal register: w0 - w30, x0 - x30, s0 - s31, d0 - d31.
+    AppendToOutput("%c%d", reg_char, reg.code());
+  } else if (reg.Aliases(sp)) {
+    // Disassemble w31/x31 as stack pointer wsp/sp.
+    AppendToOutput("%s", reg.Is64Bits() ? "sp" : "wsp");
+  } else {
+    // Disassemble w31/x31 as zero register wzr/xzr.
+    AppendToOutput("%czr", reg_char);
+  }
+}
+
+
+void Disassembler::AppendPCRelativeOffsetToOutput(const Instruction* instr,
+                                                  int64_t offset) {
+  USE(instr);
+  char sign = (offset < 0) ? '-' : '+';
+  AppendToOutput("#%c0x%" PRIx64, sign, std::abs(offset));
+}
+
+
+void Disassembler::AppendAddressToOutput(const Instruction* instr,
+                                         const void* addr) {
+  USE(instr);
+  AppendToOutput("(addr %p)", addr);
+}
+
+
+void Disassembler::AppendCodeAddressToOutput(const Instruction* instr,
+                                             const void* addr) {
+  AppendAddressToOutput(instr, addr);
+}
+
+
+void Disassembler::AppendDataAddressToOutput(const Instruction* instr,
+                                             const void* addr) {
+  AppendAddressToOutput(instr, addr);
+}
+
+
+void Disassembler::Format(const Instruction* instr, const char* mnemonic,
                          const char* format) {
  VIXL_ASSERT(mnemonic != NULL);
  ResetOutput();
@@ -1315,7 +1374,7 @@ void Disassembler::Format(Instruction* instr, const char* mnemonic,
 }


-void Disassembler::Substitute(Instruction* instr, const char* string) {
+void Disassembler::Substitute(const Instruction* instr, const char* string) {
  char chr = *string++;
  while (chr != '\0') {
    if (chr == '\'') {
@@ -1328,7 +1387,8 @@ void Disassembler::Substitute(Instruction* instr, const char* string) {
 }


-int Disassembler::SubstituteField(Instruction* instr, const char* format) {
+int Disassembler::SubstituteField(const Instruction* instr,
+                                  const char* format) {
  switch (format[0]) {
    case 'R':  // Register. X or W, selected by sf bit.
    case 'F':  // FP Register. S or D, selected by type field.
@@ -1354,7 +1414,7 @@ int Disassembler::SubstituteField(Instruction* instr, const char* format) {
 }


-int Disassembler::SubstituteRegisterField(Instruction* instr,
+int Disassembler::SubstituteRegisterField(const Instruction* instr,
                                          const char* format) {
  unsigned reg_num = 0;
  unsigned field_len = 2;
@@ -1381,34 +1441,47 @@ int Disassembler::SubstituteRegisterField(Instruction* instr,
    field_len = 3;
  }

-  char reg_type;
+  CPURegister::RegisterType reg_type;
+  unsigned reg_size;
+
  if (format[0] == 'R') {
    // Register type is R: use sf bit to choose X and W.
-    reg_type = instr->SixtyFourBits() ? 'x' : 'w';
+    reg_type = CPURegister::kRegister;
+    reg_size = instr->SixtyFourBits() ? kXRegSize : kWRegSize;
  } else if (format[0] == 'F') {
    // Floating-point register: use type field to choose S or D.
-    reg_type = ((instr->FPType() & 1) == 0) ? 's' : 'd';
+    reg_type = CPURegister::kFPRegister;
+    reg_size = ((instr->FPType() & 1) == 0) ? kSRegSize : kDRegSize;
  } else {
-    // Register type is specified. Make it lower case.
-    reg_type = format[0] + 0x20;
+    // The register type is specified.
+    switch (format[0]) {
+      case 'W':
+        reg_type = CPURegister::kRegister; reg_size = kWRegSize; break;
+      case 'X':
+        reg_type = CPURegister::kRegister; reg_size = kXRegSize; break;
+      case 'S':
+        reg_type = CPURegister::kFPRegister; reg_size = kSRegSize; break;
+      case 'D':
+        reg_type = CPURegister::kFPRegister; reg_size = kDRegSize; break;
+      default:
+        VIXL_UNREACHABLE();
+        reg_type = CPURegister::kRegister;
+        reg_size = kXRegSize;
+    }
  }

-  if ((reg_num != kZeroRegCode) || (reg_type == 's') || (reg_type == 'd')) {
-    // A normal register: w0 - w30, x0 - x30, s0 - s31, d0 - d31.
-    AppendToOutput("%c%d", reg_type, reg_num);
-  } else if (format[2] == 's') {
-    // Disassemble w31/x31 as stack pointer wsp/sp.
-    AppendToOutput("%s", (reg_type == 'w') ? "wsp" : "sp");
-  } else {
-    // Disassemble w31/x31 as zero register wzr/xzr.
-    AppendToOutput("%czr", reg_type);
+  if ((reg_type == CPURegister::kRegister) &&
+      (reg_num == kZeroRegCode) && (format[2] == 's')) {
+    reg_num = kSPRegInternalCode;
  }

+  AppendRegisterNameToOutput(instr, CPURegister(reg_num, reg_size, reg_type));
+
  return field_len;
 }


-int Disassembler::SubstituteImmediateField(Instruction* instr,
+int Disassembler::SubstituteImmediateField(const Instruction* instr,
                                           const char* format) {
  VIXL_ASSERT(format[0] == 'I');

@@ -1458,8 +1531,7 @@ int Disassembler::SubstituteImmediateField(Instruction* instr,
    }
    case 'C': {  // ICondB - Immediate Conditional Branch.
      int64_t offset = instr->ImmCondBranch() << 2;
-      char sign = (offset >= 0) ? '+' : '-';
-      AppendToOutput("#%c0x%" PRIx64, sign, offset);
+      AppendPCRelativeOffsetToOutput(instr, offset);
      return 6;
    }
    case 'A': {  // IAddSub.
@@ -1522,7 +1594,7 @@ int Disassembler::SubstituteImmediateField(Instruction* instr,
 }


-int Disassembler::SubstituteBitfieldImmediateField(Instruction* instr,
+int Disassembler::SubstituteBitfieldImmediateField(const Instruction* instr,
                                                   const char* format) {
  VIXL_ASSERT((format[0] == 'I') && (format[1] == 'B'));
  unsigned r = instr->ImmR();
@@ -1557,7 +1629,7 @@ int Disassembler::SubstituteBitfieldImmediateField(Instruction* instr,
 }


-int Disassembler::SubstituteLiteralField(Instruction* instr,
+int Disassembler::SubstituteLiteralField(const Instruction* instr,
                                         const char* format) {
  VIXL_ASSERT(strncmp(format, "LValue", 6) == 0);
  USE(format);
@@ -1565,16 +1637,21 @@ int Disassembler::SubstituteLiteralField(Instruction* instr,
  switch (instr->Mask(LoadLiteralMask)) {
    case LDR_w_lit:
    case LDR_x_lit:
+    case LDRSW_x_lit:
    case LDR_s_lit:
-    case LDR_d_lit: AppendToOutput("(addr %p)", instr->LiteralAddress()); break;
-    default: VIXL_UNREACHABLE();
+    case LDR_d_lit:
+      AppendDataAddressToOutput(instr, instr->LiteralAddress());
+      break;
+    default:
+      VIXL_UNREACHABLE();
  }

  return 6;
 }


-int Disassembler::SubstituteShiftField(Instruction* instr, const char* format) {
+int Disassembler::SubstituteShiftField(const Instruction* instr,
+                                       const char* format) {
  VIXL_ASSERT(format[0] == 'H');
  VIXL_ASSERT(instr->ShiftDP() <= 0x3);

@@ -1597,7 +1674,7 @@ int Disassembler::SubstituteShiftField(Instruction* instr, const char* format) {
 }


-int Disassembler::SubstituteConditionField(Instruction* instr,
+int Disassembler::SubstituteConditionField(const Instruction* instr,
                                           const char* format) {
  VIXL_ASSERT(format[0] == 'C');
  const char* condition_code[] = { "eq", "ne", "hs", "lo",
@@ -1618,27 +1695,28 @@ int Disassembler::SubstituteConditionField(Instruction* instr,
 }


-int Disassembler::SubstitutePCRelAddressField(Instruction* instr,
+int Disassembler::SubstitutePCRelAddressField(const Instruction* instr,
                                              const char* format) {
  VIXL_ASSERT((strcmp(format, "AddrPCRelByte") == 0) ||   // Used by `adr`.
              (strcmp(format, "AddrPCRelPage") == 0));    // Used by `adrp`.

  int64_t offset = instr->ImmPCRel();
-  Instruction * base = instr;
+  const Instruction * base = instr;

  if (format[9] == 'P') {
    offset *= kPageSize;
    base = AlignDown(base, kPageSize);
  }

-  char sign = (offset < 0) ? '-' : '+';
-  void * target = reinterpret_cast<void *>(base + offset);
-  AppendToOutput("#%c0x%" PRIx64 " (addr %p)", sign, std::abs(offset), target);
+  const void* target = reinterpret_cast<const void*>(base + offset);
+  AppendPCRelativeOffsetToOutput(instr, offset);
+  AppendToOutput(" ");
+  AppendAddressToOutput(instr, target);
  return 13;
 }


-int Disassembler::SubstituteBranchTargetField(Instruction* instr,
+int Disassembler::SubstituteBranchTargetField(const Instruction* instr,
                                              const char* format) {
  VIXL_ASSERT(strncmp(format, "BImm", 4) == 0);

@@ -1655,19 +1733,18 @@ int Disassembler::SubstituteBranchTargetField(Instruction* instr,
    default: VIXL_UNIMPLEMENTED();
  }
  offset <<= kInstructionSizeLog2;
-  char sign = '+';
-  if (offset < 0) {
-    offset = -offset;
-    sign = '-';
-  }
+  const void* target_address = reinterpret_cast<const void*>(instr + offset);
  VIXL_STATIC_ASSERT(sizeof(*instr) == 1);
-  void * address = reinterpret_cast<void *>(instr + offset);
-  AppendToOutput("#%c0x%" PRIx64 " (addr %p)", sign, offset, address);
+
+  AppendPCRelativeOffsetToOutput(instr, offset);
+  AppendToOutput(" ");
+  AppendCodeAddressToOutput(instr, target_address);
+
  return 8;
 }


-int Disassembler::SubstituteExtendField(Instruction* instr,
+int Disassembler::SubstituteExtendField(const Instruction* instr,
                                        const char* format) {
  VIXL_ASSERT(strncmp(format, "Ext", 3) == 0);
  VIXL_ASSERT(instr->ExtendMode() <= 7);
@@ -1694,7 +1771,7 @@ int Disassembler::SubstituteExtendField(Instruction* instr,
 }


-int Disassembler::SubstituteLSRegOffsetField(Instruction* instr,
+int Disassembler::SubstituteLSRegOffsetField(const Instruction* instr,
                                             const char* format) {
  VIXL_ASSERT(strncmp(format, "Offsetreg", 9) == 0);
  const char* extend_mode[] = { "undefined", "undefined", "uxtw", "lsl",
@@ -1723,7 +1800,7 @@ int Disassembler::SubstituteLSRegOffsetField(Instruction* instr,
 }


-int Disassembler::SubstitutePrefetchField(Instruction* instr,
+int Disassembler::SubstitutePrefetchField(const Instruction* instr,
                                          const char* format) {
  VIXL_ASSERT(format[0] == 'P');
  USE(format);
@@ -1738,7 +1815,7 @@ int Disassembler::SubstitutePrefetchField(Instruction* instr,
  return 6;
 }

-int Disassembler::SubstituteBarrierField(Instruction* instr,
+int Disassembler::SubstituteBarrierField(const Instruction* instr,
                                         const char* format) {
  VIXL_ASSERT(format[0] == 'M');
  USE(format);
@@ -1770,7 +1847,7 @@ void Disassembler::AppendToOutput(const char* format, ...) {
 }


-void PrintDisassembler::ProcessOutput(Instruction* instr) {
+void PrintDisassembler::ProcessOutput(const Instruction* instr) {
  fprintf(stream_, "0x%016" PRIx64 "  %08" PRIx32 "\t\t%s\n",
          reinterpret_cast<uint64_t>(instr),
          instr->InstructionBits(),
--- a/disas/libvixl/a64/disasm-a64.h
+++ b/disas/libvixl/a64/disasm-a64.h
@@ -31,6 +31,7 @@
 #include "utils.h"
 #include "instructions-a64.h"
 #include "decoder-a64.h"
+#include "assembler-a64.h"

 namespace vixl {

@@ -42,48 +43,83 @@ class Disassembler: public DecoderVisitor {
  char* GetOutput();

  // Declare all Visitor functions.
-  #define DECLARE(A)  void Visit##A(Instruction* instr);
+  #define DECLARE(A)  void Visit##A(const Instruction* instr);
  VISITOR_LIST(DECLARE)
  #undef DECLARE

 protected:
-  virtual void ProcessOutput(Instruction* instr);
+  virtual void ProcessOutput(const Instruction* instr);
+
+  // Default output functions.  The functions below implement a default way of
+  // printing elements in the disassembly. A sub-class can override these to
+  // customize the disassembly output.
+
+  // Prints the name of a register.
+  virtual void AppendRegisterNameToOutput(const Instruction* instr,
+                                          const CPURegister& reg);
+
+  // Prints a PC-relative offset. This is used for example when disassembling
+  // branches to immediate offsets.
+  virtual void AppendPCRelativeOffsetToOutput(const Instruction* instr,
+                                              int64_t offset);
+
+  // Prints an address, in the general case. It can be code or data. This is
+  // used for example to print the target address of an ADR instruction.
+  virtual void AppendAddressToOutput(const Instruction* instr,
+                                     const void* addr);
+
+  // Prints the address of some code.
+  // This is used for example to print the target address of a branch to an
+  // immediate offset.
+  // A sub-class can for example override this method to lookup the address and
+  // print an appropriate name.
+  virtual void AppendCodeAddressToOutput(const Instruction* instr,
+                                         const void* addr);
+
+  // Prints the address of some data.
+  // This is used for example to print the source address of a load literal
+  // instruction.
+  virtual void AppendDataAddressToOutput(const Instruction* instr,
+                                         const void* addr);

 private:
-  void Format(Instruction* instr, const char* mnemonic, const char* format);
-  void Substitute(Instruction* instr, const char* string);
-  int SubstituteField(Instruction* instr, const char* format);
-  int SubstituteRegisterField(Instruction* instr, const char* format);
-  int SubstituteImmediateField(Instruction* instr, const char* format);
-  int SubstituteLiteralField(Instruction* instr, const char* format);
-  int SubstituteBitfieldImmediateField(Instruction* instr, const char* format);
-  int SubstituteShiftField(Instruction* instr, const char* format);
-  int SubstituteExtendField(Instruction* instr, const char* format);
-  int SubstituteConditionField(Instruction* instr, const char* format);
-  int SubstitutePCRelAddressField(Instruction* instr, const char* format);
-  int SubstituteBranchTargetField(Instruction* instr, const char* format);
-  int SubstituteLSRegOffsetField(Instruction* instr, const char* format);
-  int SubstitutePrefetchField(Instruction* instr, const char* format);
-  int SubstituteBarrierField(Instruction* instr, const char* format);
+  void Format(
+      const Instruction* instr, const char* mnemonic, const char* format);
+  void Substitute(const Instruction* instr, const char* string);
+  int SubstituteField(const Instruction* instr, const char* format);
+  int SubstituteRegisterField(const Instruction* instr, const char* format);
+  int SubstituteImmediateField(const Instruction* instr, const char* format);
+  int SubstituteLiteralField(const Instruction* instr, const char* format);
+  int SubstituteBitfieldImmediateField(
+      const Instruction* instr, const char* format);
+  int SubstituteShiftField(const Instruction* instr, const char* format);
+  int SubstituteExtendField(const Instruction* instr, const char* format);
+  int SubstituteConditionField(const Instruction* instr, const char* format);
+  int SubstitutePCRelAddressField(const Instruction* instr, const char* format);
+  int SubstituteBranchTargetField(const Instruction* instr, const char* format);
+  int SubstituteLSRegOffsetField(const Instruction* instr, const char* format);
+  int SubstitutePrefetchField(const Instruction* instr, const char* format);
+  int SubstituteBarrierField(const Instruction* instr, const char* format);

-  inline bool RdIsZROrSP(Instruction* instr) const {
+  inline bool RdIsZROrSP(const Instruction* instr) const {
    return (instr->Rd() == kZeroRegCode);
  }

-  inline bool RnIsZROrSP(Instruction* instr) const {
+  inline bool RnIsZROrSP(const Instruction* instr) const {
    return (instr->Rn() == kZeroRegCode);
  }

-  inline bool RmIsZROrSP(Instruction* instr) const {
+  inline bool RmIsZROrSP(const Instruction* instr) const {
    return (instr->Rm() == kZeroRegCode);
  }

-  inline bool RaIsZROrSP(Instruction* instr) const {
+  inline bool RaIsZROrSP(const Instruction* instr) const {
    return (instr->Ra() == kZeroRegCode);
  }

  bool IsMovzMovnImm(unsigned reg_size, uint64_t value);

+ protected:
  void ResetOutput();
  void AppendToOutput(const char* string, ...) PRINTF_CHECK(2, 3);

@@ -97,10 +133,10 @@ class Disassembler: public DecoderVisitor {
 class PrintDisassembler: public Disassembler {
 public:
  explicit PrintDisassembler(FILE* stream) : stream_(stream) { }
-  ~PrintDisassembler() { }
+  virtual ~PrintDisassembler() { }

 protected:
-  virtual void ProcessOutput(Instruction* instr);
+  virtual void ProcessOutput(const Instruction* instr);

 private:
  FILE *stream_;
--- a/disas/libvixl/a64/instructions-a64.cc
+++ b/disas/libvixl/a64/instructions-a64.cc
@@ -57,7 +57,7 @@ static uint64_t RepeatBitsAcrossReg(unsigned reg_size,
 // Logical immediates can't encode zero, so a return value of zero is used to
 // indicate a failure case. Specifically, where the constraints on imm_s are
 // not met.
-uint64_t Instruction::ImmLogical() {
+uint64_t Instruction::ImmLogical() const {
  unsigned reg_size = SixtyFourBits() ? kXRegSize : kWRegSize;
  int64_t n = BitN();
  int64_t imm_s = ImmSetBits();
@@ -108,7 +108,7 @@ uint64_t Instruction::ImmLogical() {
 }


-float Instruction::ImmFP32() {
+float Instruction::ImmFP32() const {
  //  ImmFP: abcdefgh (8 bits)
  // Single: aBbb.bbbc.defg.h000.0000.0000.0000.0000 (32 bits)
  // where B is b ^ 1
@@ -122,7 +122,7 @@ float Instruction::ImmFP32() {
 }


-double Instruction::ImmFP64() {
+double Instruction::ImmFP64() const {
  //  ImmFP: abcdefgh (8 bits)
  // Double: aBbb.bbbb.bbcd.efgh.0000.0000.0000.0000
  //         0000.0000.0000.0000.0000.0000.0000.0000 (64 bits)
@@ -148,8 +148,8 @@ LSDataSize CalcLSPairDataSize(LoadStorePairOp op) {
 }


-Instruction* Instruction::ImmPCOffsetTarget() {
-  Instruction * base = this;
+const Instruction* Instruction::ImmPCOffsetTarget() const {
+  const Instruction * base = this;
  ptrdiff_t offset;
  if (IsPCRelAddressing()) {
    // ADR and ADRP.
@@ -182,7 +182,7 @@ inline int Instruction::ImmBranch() const {
 }


-void Instruction::SetImmPCOffsetTarget(Instruction* target) {
+void Instruction::SetImmPCOffsetTarget(const Instruction* target) {
  if (IsPCRelAddressing()) {
    SetPCRelImmTarget(target);
  } else {
@@ -191,7 +191,7 @@ void Instruction::SetImmPCOffsetTarget(Instruction* target) {
 }


-void Instruction::SetPCRelImmTarget(Instruction* target) {
+void Instruction::SetPCRelImmTarget(const Instruction* target) {
  int32_t imm21;
  if ((Mask(PCRelAddressingMask) == ADR)) {
    imm21 = target - this;
@@ -207,7 +207,7 @@ void Instruction::SetPCRelImmTarget(Instruction* target) {
 }


-void Instruction::SetBranchImmTarget(Instruction* target) {
+void Instruction::SetBranchImmTarget(const Instruction* target) {
  VIXL_ASSERT(((target - this) & 3) == 0);
  Instr branch_imm = 0;
  uint32_t imm_mask = 0;
@@ -239,9 +239,9 @@ void Instruction::SetBranchImmTarget(Instruction* target) {
 }


-void Instruction::SetImmLLiteral(Instruction* source) {
-  VIXL_ASSERT(((source - this) & 3) == 0);
-  int offset = (source - this) >> kLiteralEntrySizeLog2;
+void Instruction::SetImmLLiteral(const Instruction* source) {
+  VIXL_ASSERT(IsWordAligned(source));
+  ptrdiff_t offset = (source - this) >> kLiteralEntrySizeLog2;
  Instr imm = Assembler::ImmLLiteral(offset);
  Instr mask = ImmLLiteral_mask;

--- a/disas/libvixl/a64/instructions-a64.h
+++ b/disas/libvixl/a64/instructions-a64.h
@@ -44,6 +44,7 @@ const unsigned kMaxLoadLiteralRange = 1 * MBytes;
 // This is the nominal page size (as used by the adrp instruction); the actual
 // size of the memory pages allocated by the kernel is likely to differ.
 const unsigned kPageSize = 4 * KBytes;
+const unsigned kPageSizeLog2 = 12;

 const unsigned kWRegSize = 32;
 const unsigned kWRegSizeLog2 = 5;
@@ -95,30 +96,6 @@ const unsigned kDoubleExponentBits = 11;
 const unsigned kFloatMantissaBits = 23;
 const unsigned kFloatExponentBits = 8;

-const float kFP32PositiveInfinity = rawbits_to_float(0x7f800000);
-const float kFP32NegativeInfinity = rawbits_to_float(0xff800000);
-const double kFP64PositiveInfinity =
-    rawbits_to_double(UINT64_C(0x7ff0000000000000));
-const double kFP64NegativeInfinity =
-    rawbits_to_double(UINT64_C(0xfff0000000000000));
-
-// This value is a signalling NaN as both a double and as a float (taking the
-// least-significant word).
-static const double kFP64SignallingNaN =
-    rawbits_to_double(UINT64_C(0x7ff000007f800001));
-static const float kFP32SignallingNaN = rawbits_to_float(0x7f800001);
-
-// A similar value, but as a quiet NaN.
-static const double kFP64QuietNaN =
-    rawbits_to_double(UINT64_C(0x7ff800007fc00001));
-static const float kFP32QuietNaN = rawbits_to_float(0x7fc00001);
-
-// The default NaN values (for FPCR.DN=1).
-static const double kFP64DefaultNaN =
-    rawbits_to_double(UINT64_C(0x7ff8000000000000));
-static const float kFP32DefaultNaN = rawbits_to_float(0x7fc00000);
-
-
 enum LSDataSize {
  LSByte        = 0,
  LSHalfword    = 1,
@@ -201,9 +178,9 @@ class Instruction {
    return signed_bitextract_32(width-1, 0, offset);
  }

-  uint64_t ImmLogical();
-  float ImmFP32();
-  double ImmFP64();
+  uint64_t ImmLogical() const;
+  float ImmFP32() const;
+  double ImmFP64() const;

  inline LSDataSize SizeLSPair() const {
    return CalcLSPairDataSize(
@@ -311,46 +288,49 @@ class Instruction {

  // Find the target of this instruction. 'this' may be a branch or a
  // PC-relative addressing instruction.
-  Instruction* ImmPCOffsetTarget();
+  const Instruction* ImmPCOffsetTarget() const;

  // Patch a PC-relative offset to refer to 'target'. 'this' may be a branch or
  // a PC-relative addressing instruction.
-  void SetImmPCOffsetTarget(Instruction* target);
+  void SetImmPCOffsetTarget(const Instruction* target);
  // Patch a literal load instruction to load from 'source'.
-  void SetImmLLiteral(Instruction* source);
+  void SetImmLLiteral(const Instruction* source);

-  inline uint8_t* LiteralAddress() {
+  inline uint8_t* LiteralAddress() const {
    int offset = ImmLLiteral() << kLiteralEntrySizeLog2;
-    return reinterpret_cast<uint8_t*>(this) + offset;
+    const uint8_t* address = reinterpret_cast<const uint8_t*>(this) + offset;
+    // Note that the result is safely mutable only if the backing buffer is
+    // safely mutable.
+    return const_cast<uint8_t*>(address);
  }

-  inline uint32_t Literal32() {
+  inline uint32_t Literal32() const {
    uint32_t literal;
    memcpy(&literal, LiteralAddress(), sizeof(literal));

    return literal;
  }

-  inline uint64_t Literal64() {
+  inline uint64_t Literal64() const {
    uint64_t literal;
    memcpy(&literal, LiteralAddress(), sizeof(literal));

    return literal;
  }

-  inline float LiteralFP32() {
+  inline float LiteralFP32() const {
    return rawbits_to_float(Literal32());
  }

-  inline double LiteralFP64() {
+  inline double LiteralFP64() const {
    return rawbits_to_double(Literal64());
  }

-  inline Instruction* NextInstruction() {
+  inline const Instruction* NextInstruction() const {
    return this + kInstructionSize;
  }

-  inline Instruction* InstructionAtOffset(int64_t offset) {
+  inline const Instruction* InstructionAtOffset(int64_t offset) const {
    VIXL_ASSERT(IsWordAligned(this + offset));
    return this + offset;
  }
@@ -359,11 +339,15 @@ class Instruction {
    return reinterpret_cast<Instruction*>(src);
  }

+  template<typename T> static inline const Instruction* CastConst(T src) {
+    return reinterpret_cast<const Instruction*>(src);
+  }
+
 private:
  inline int ImmBranch() const;

-  void SetPCRelImmTarget(Instruction* target);
-  void SetBranchImmTarget(Instruction* target);
+  void SetPCRelImmTarget(const Instruction* target);
+  void SetBranchImmTarget(const Instruction* target);
 };
 }  // namespace vixl

--- a/disas/libvixl/code-buffer.h
+++ b/disas/libvixl/code-buffer.h
@@ -0,0 +1,113 @@
+// Copyright 2014, ARM Limited
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef VIXL_CODE_BUFFER_H
+#define VIXL_CODE_BUFFER_H
+
+#include <string.h>
+#include "globals.h"
+
+namespace vixl {
+
+class CodeBuffer {
+ public:
+  explicit CodeBuffer(size_t capacity = 4 * KBytes);
+  CodeBuffer(void* buffer, size_t capacity);
+  ~CodeBuffer();
+
+  void Reset();
+
+  ptrdiff_t OffsetFrom(ptrdiff_t offset) const {
+    ptrdiff_t cursor_offset = cursor_ - buffer_;
+    VIXL_ASSERT((offset >= 0) && (offset <= cursor_offset));
+    return cursor_offset - offset;
+  }
+
+  ptrdiff_t CursorOffset() const {
+    return OffsetFrom(0);
+  }
+
+  template <typename T>
+  T GetOffsetAddress(ptrdiff_t offset) const {
+    VIXL_ASSERT((offset >= 0) && (offset <= (cursor_ - buffer_)));
+    return reinterpret_cast<T>(buffer_ + offset);
+  }
+
+  size_t RemainingBytes() const {
+    VIXL_ASSERT((cursor_ >= buffer_) && (cursor_ <= (buffer_ + capacity_)));
+    return (buffer_ + capacity_) - cursor_;
+  }
+
+  // A code buffer can emit:
+  //  * 32-bit data: instruction and constant.
+  //  * 64-bit data: constant.
+  //  * string: debug info.
+  void Emit32(uint32_t data) { Emit(data); }
+
+  void Emit64(uint64_t data) { Emit(data); }
+
+  void EmitString(const char* string);
+
+  // Align to kInstructionSize.
+  void Align();
+
+  size_t capacity() const { return capacity_; }
+
+  bool IsManaged() const { return managed_; }
+
+  void Grow(size_t new_capacity);
+
+  bool IsDirty() const { return dirty_; }
+
+  void SetClean() { dirty_ = false; }
+
+ private:
+  template <typename T>
+  void Emit(T value) {
+    VIXL_ASSERT(RemainingBytes() >= sizeof(value));
+    dirty_ = true;
+    memcpy(cursor_, &value, sizeof(value));
+    cursor_ += sizeof(value);
+  }
+
+  // Backing store of the buffer.
+  byte* buffer_;
+  // If true the backing store is allocated and deallocated by the buffer. The
+  // backing store can then grow on demand. If false the backing store is
+  // provided by the user and cannot be resized internally.
+  bool managed_;
+  // Pointer to the next location to be written.
+  byte* cursor_;
+  // True if there has been any write since the buffer was created or cleaned.
+  bool dirty_;
+  // Capacity in bytes of the backing store.
+  size_t capacity_;
+};
+
+}  // namespace vixl
+
+#endif  // VIXL_CODE_BUFFER_H
+
--- a/disas/libvixl/utils.cc
+++ b/disas/libvixl/utils.cc
@@ -134,4 +134,5 @@ uint64_t LowestSetBit(uint64_t value) {
 bool IsPowerOf2(int64_t value) {
  return (value != 0) && ((value & (value - 1)) == 0);
 }
+
 }  // namespace vixl
--- a/disas/libvixl/utils.h
+++ b/disas/libvixl/utils.h
@@ -171,7 +171,7 @@ bool IsPowerOf2(int64_t value);
 template<typename T>
 bool IsWordAligned(T pointer) {
  VIXL_ASSERT(sizeof(pointer) == sizeof(intptr_t));   // NOLINT(runtime/sizeof)
-  return (reinterpret_cast<intptr_t>(pointer) & 3) == 0;
+  return ((intptr_t)(pointer) & 3) == 0;
 }

 // Increment a pointer until it has the specified alignment.
@@ -204,7 +204,6 @@ T AlignDown(T pointer, size_t alignment) {
  return (T)(pointer_raw - align_step);
 }

-
 }  // namespace vixl

 #endif  // VIXL_UTILS_H
--- a/disas/mips.c
+++ b/disas/mips.c
--- a/dma-helpers.c
+++ b/dma-helpers.c
@@ -7,6 +7,7 @@
 * (GNU GPL), version 2 or later.
 */

+#include "sysemu/block-backend.h"
 #include "sysemu/dma.h"
 #include "trace.h"
 #include "qemu/range.h"
@@ -67,9 +68,9 @@ void qemu_sglist_destroy(QEMUSGList *qsg)
 }

 typedef struct {
-    BlockDriverAIOCB common;
-    BlockDriverState *bs;
-    BlockDriverAIOCB *acb;
+    BlockAIOCB common;
+    BlockBackend *blk;
+    BlockAIOCB *acb;
    QEMUSGList *sg;
    uint64_t sector_num;
    DMADirection dir;
@@ -80,7 +81,7 @@ typedef struct {
    DMAIOFunc *io_func;
 } DMAAIOCB;

-static void dma_bdrv_cb(void *opaque, int ret);
+static void dma_blk_cb(void *opaque, int ret);

 static void reschedule_dma(void *opaque)
 {
@@ -88,7 +89,7 @@ static void reschedule_dma(void *opaque)

    qemu_bh_delete(dbs->bh);
    dbs->bh = NULL;
-    dma_bdrv_cb(dbs, 0);
+    dma_blk_cb(dbs, 0);
 }

 static void continue_after_map_failure(void *opaque)
@@ -99,7 +100,7 @@ static void continue_after_map_failure(void *opaque)
    qemu_bh_schedule(dbs->bh);
 }

-static void dma_bdrv_unmap(DMAAIOCB *dbs)
+static void dma_blk_unmap(DMAAIOCB *dbs)
 {
    int i;

@@ -115,7 +116,7 @@ static void dma_complete(DMAAIOCB *dbs, int ret)
 {
    trace_dma_complete(dbs, ret, dbs->common.cb);

-    dma_bdrv_unmap(dbs);
+    dma_blk_unmap(dbs);
    if (dbs->common.cb) {
        dbs->common.cb(dbs->common.opaque, ret);
    }
@@ -127,13 +128,13 @@ static void dma_complete(DMAAIOCB *dbs, int ret)
    qemu_aio_unref(dbs);
 }

-static void dma_bdrv_cb(void *opaque, int ret)
+static void dma_blk_cb(void *opaque, int ret)
 {
    DMAAIOCB *dbs = (DMAAIOCB *)opaque;
    dma_addr_t cur_addr, cur_len;
    void *mem;

-    trace_dma_bdrv_cb(dbs, ret);
+    trace_dma_blk_cb(dbs, ret);

    dbs->acb = NULL;
    dbs->sector_num += dbs->iov.size / 512;
@@ -142,7 +143,7 @@ static void dma_bdrv_cb(void *opaque, int ret)
        dma_complete(dbs, ret);
        return;
    }
-    dma_bdrv_unmap(dbs);
+    dma_blk_unmap(dbs);

    while (dbs->sg_cur_index < dbs->sg->nsg) {
        cur_addr = dbs->sg->sg[dbs->sg_cur_index].base + dbs->sg_cur_byte;
@@ -168,19 +169,19 @@ static void dma_bdrv_cb(void *opaque, int ret)
        qemu_iovec_discard_back(&dbs->iov, dbs->iov.size & ~BDRV_SECTOR_MASK);
    }

-    dbs->acb = dbs->io_func(dbs->bs, dbs->sector_num, &dbs->iov,
-                            dbs->iov.size / 512, dma_bdrv_cb, dbs);
+    dbs->acb = dbs->io_func(dbs->blk, dbs->sector_num, &dbs->iov,
+                            dbs->iov.size / 512, dma_blk_cb, dbs);
    assert(dbs->acb);
 }

-static void dma_aio_cancel(BlockDriverAIOCB *acb)
+static void dma_aio_cancel(BlockAIOCB *acb)
 {
    DMAAIOCB *dbs = container_of(acb, DMAAIOCB, common);

    trace_dma_aio_cancel(dbs);

    if (dbs->acb) {
-        bdrv_aio_cancel_async(dbs->acb);
+        blk_aio_cancel_async(dbs->acb);
    }
 }

@@ -190,17 +191,17 @@ static const AIOCBInfo dma_aiocb_info = {
    .cancel_async       = dma_aio_cancel,
 };

-BlockDriverAIOCB *dma_bdrv_io(
-    BlockDriverState *bs, QEMUSGList *sg, uint64_t sector_num,
-    DMAIOFunc *io_func, BlockDriverCompletionFunc *cb,
+BlockAIOCB *dma_blk_io(
+    BlockBackend *blk, QEMUSGList *sg, uint64_t sector_num,
+    DMAIOFunc *io_func, BlockCompletionFunc *cb,
    void *opaque, DMADirection dir)
 {
-    DMAAIOCB *dbs = qemu_aio_get(&dma_aiocb_info, bs, cb, opaque);
+    DMAAIOCB *dbs = blk_aio_get(&dma_aiocb_info, blk, cb, opaque);

-    trace_dma_bdrv_io(dbs, bs, sector_num, (dir == DMA_DIRECTION_TO_DEVICE));
+    trace_dma_blk_io(dbs, blk, sector_num, (dir == DMA_DIRECTION_TO_DEVICE));

    dbs->acb = NULL;
-    dbs->bs = bs;
+    dbs->blk = blk;
    dbs->sg = sg;
    dbs->sector_num = sector_num;
    dbs->sg_cur_index = 0;
@@ -209,25 +210,25 @@ BlockDriverAIOCB *dma_bdrv_io(
    dbs->io_func = io_func;
    dbs->bh = NULL;
    qemu_iovec_init(&dbs->iov, sg->nsg);
-    dma_bdrv_cb(dbs, 0);
+    dma_blk_cb(dbs, 0);
    return &dbs->common;
 }


-BlockDriverAIOCB *dma_bdrv_read(BlockDriverState *bs,
-                                QEMUSGList *sg, uint64_t sector,
-                                void (*cb)(void *opaque, int ret), void *opaque)
+BlockAIOCB *dma_blk_read(BlockBackend *blk,
+                         QEMUSGList *sg, uint64_t sector,
+                         void (*cb)(void *opaque, int ret), void *opaque)
 {
-    return dma_bdrv_io(bs, sg, sector, bdrv_aio_readv, cb, opaque,
-                       DMA_DIRECTION_FROM_DEVICE);
+    return dma_blk_io(blk, sg, sector, blk_aio_readv, cb, opaque,
+                      DMA_DIRECTION_FROM_DEVICE);
 }

-BlockDriverAIOCB *dma_bdrv_write(BlockDriverState *bs,
-                                 QEMUSGList *sg, uint64_t sector,
-                                 void (*cb)(void *opaque, int ret), void *opaque)
+BlockAIOCB *dma_blk_write(BlockBackend *blk,
+                          QEMUSGList *sg, uint64_t sector,
+                          void (*cb)(void *opaque, int ret), void *opaque)
 {
-    return dma_bdrv_io(bs, sg, sector, bdrv_aio_writev, cb, opaque,
-                       DMA_DIRECTION_TO_DEVICE);
+    return dma_blk_io(blk, sg, sector, blk_aio_writev, cb, opaque,
+                      DMA_DIRECTION_TO_DEVICE);
 }


@@ -262,8 +263,8 @@ uint64_t dma_buf_write(uint8_t *ptr, int32_t len, QEMUSGList *sg)
    return dma_buf_rw(ptr, len, sg, DMA_DIRECTION_TO_DEVICE);
 }

-void dma_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie,
+void dma_acct_start(BlockBackend *blk, BlockAcctCookie *cookie,
                    QEMUSGList *sg, enum BlockAcctType type)
 {
-    block_acct_start(bdrv_get_stats(bs), cookie, sg->size, type);
+    block_acct_start(blk_get_stats(blk), cookie, sg->size, type);
 }
--- a/docs/blkdebug.txt
+++ b/docs/blkdebug.txt
@@ -70,10 +70,10 @@ Rules support the following attributes:
  once - (optional, default "off") only execute this action on the first
         matching request

-  immediately - (optional, default "off") return a NULL BlockDriverAIOCB
-                pointer and fail without an errno instead.  This exercises the
-                code path where BlockDriverAIOCB fails and the caller's
-                BlockDriverCompletionFunc is not invoked.
+  immediately - (optional, default "off") return a NULL BlockAIOCB
+                pointer and fail without an errno instead.  This
+                exercises the code path where BlockAIOCB fails and the
+                caller's BlockCompletionFunc is not invoked.

 Events
 ------
--- a/docs/specs/pci-ids.txt
+++ b/docs/specs/pci-ids.txt
@@ -44,6 +44,8 @@ PCI devices (other than virtio):
 1b36:0002  PCI serial port (16550A) adapter (docs/specs/pci-serial.txt)
 1b36:0003  PCI Dual-port 16550A adapter (docs/specs/pci-serial.txt)
 1b36:0004  PCI Quad-port 16550A adapter (docs/specs/pci-serial.txt)
+1b36:0005  PCI test device (docs/specs/pci-testdev.txt)
+1b36:0007  PCI SD Card Host Controller Interface (SDHCI)

 All these devices are documented in docs/specs.

--- a/docs/specs/qcow2.txt
+++ b/docs/specs/qcow2.txt
@@ -110,6 +110,7 @@ in the description of a field.
                    in bits: refcount_bits = 1 << refcount_order). For version 2
                    images, the order is always assumed to be 4
                    (i.e. refcount_bits = 16).
+                    This value may not exceed 6 (i.e. refcount_bits = 64).

        100 - 103:  header_length
                    Length of the header structure in bytes. For version 2
@@ -183,7 +184,7 @@ blocks and are exactly one cluster in size.
 Given a offset into the image file, the refcount of its cluster can be obtained
 as follows:

-    refcount_block_entries = (cluster_size / sizeof(uint16_t))
+    refcount_block_entries = (cluster_size * 8 / refcount_bits)

    refcount_block_index = (offset / cluster_size) % refcount_block_entries
    refcount_table_index = (offset / cluster_size) / refcount_block_entries
--- a/docs/specs/standard-vga.txt
+++ b/docs/specs/standard-vga.txt
@@ -70,3 +70,12 @@ Likewise applies to the pci variant only for obvious reasons.
 0500 - 0515 : bochs dispi interface registers, mapped flat
              without index/data ports.  Use (index << 1)
              as offset for (16bit) register access.
+
+0600 - 0607 : qemu extended registers.  qemu 2.2+ only.
+              The pci revision is 2 (or greater) when
+              these registers are present.  The registers
+              are 32bit.
+  0600      : qemu extended register region size, in bytes.
+  0604      : framebuffer endianness register.
+              - 0xbebebebe indicates big endian.
+              - 0x1e1e1e1e indicates little endian.
--- a/docs/tracing.txt
+++ b/docs/tracing.txt
@@ -139,12 +139,12 @@ events are not tightly coupled to a specific trace backend, such as LTTng or
 SystemTap.  Support for trace backends can be added by extending the "tracetool"
 script.

-The trace backend is chosen at configure time and only one trace backend can
-be built into the binary:
+The trace backends are chosen at configure time:

-    ./configure --trace-backends=simple
+    ./configure --enable-trace-backends=simple

 For a list of supported trace backends, try ./configure --help or see below.
+If multiple backends are enabled, the trace is sent to them all.

 The following subsections describe the supported trace backends.

--- a/docs/writing-qmp-commands.txt
+++ b/docs/writing-qmp-commands.txt
@@ -365,6 +365,8 @@ documentation for information about the other types.

 === User Defined Types ===

+FIXME This example needs to be redone after commit 6d32717
+
 For this example we will write the query-alarm-clock command, which returns
 information about QEMU's timer alarm. For more information about it, please
 check the "-clock" command-line option.
--- a/docs/xbzrle.txt
+++ b/docs/xbzrle.txt
@@ -71,6 +71,14 @@ encoded buffer:
 encoded length 24
 e9 07 0f 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f 03 01 67 01 01 69

+Cache update strategy
+=====================
+Keeping the hot pages in the cache is effective for decreased cache
+misses. XBZRLE uses a counter as the age of each page. The counter will
+increase after each ram dirty bitmap sync. When a cache conflict is
+detected, XBZRLE will only evict pages in the cache that are older than
+a threshold.
+
 Usage
 ======================
 1. Verify the destination QEMU version is able to decode the new format.
--- a/dump.c
+++ b/dump.c
@@ -81,9 +81,10 @@ static int dump_cleanup(DumpState *s)
    return 0;
 }

-static void dump_error(DumpState *s, const char *reason)
+static void dump_error(DumpState *s, const char *reason, Error **errp)
 {
    dump_cleanup(s);
+    error_setg(errp, "%s", reason);
 }

 static int fd_write_vmcore(const void *buf, size_t size, void *opaque)
@@ -99,7 +100,7 @@ static int fd_write_vmcore(const void *buf, size_t size, void *opaque)
    return 0;
 }

-static int write_elf64_header(DumpState *s)
+static void write_elf64_header(DumpState *s, Error **errp)
 {
    Elf64_Ehdr elf_header;
    int ret;
@@ -126,14 +127,11 @@ static int write_elf64_header(DumpState *s)

    ret = fd_write_vmcore(&elf_header, sizeof(elf_header), s);
    if (ret < 0) {
-        dump_error(s, "dump: failed to write elf header.\n");
-        return -1;
+        dump_error(s, "dump: failed to write elf header", errp);
    }
-
-    return 0;
 }

-static int write_elf32_header(DumpState *s)
+static void write_elf32_header(DumpState *s, Error **errp)
 {
    Elf32_Ehdr elf_header;
    int ret;
@@ -160,16 +158,13 @@ static int write_elf32_header(DumpState *s)

    ret = fd_write_vmcore(&elf_header, sizeof(elf_header), s);
    if (ret < 0) {
-        dump_error(s, "dump: failed to write elf header.\n");
-        return -1;
+        dump_error(s, "dump: failed to write elf header", errp);
    }
-
-    return 0;
 }

-static int write_elf64_load(DumpState *s, MemoryMapping *memory_mapping,
-                            int phdr_index, hwaddr offset,
-                            hwaddr filesz)
+static void write_elf64_load(DumpState *s, MemoryMapping *memory_mapping,
+                             int phdr_index, hwaddr offset,
+                             hwaddr filesz, Error **errp)
 {
    Elf64_Phdr phdr;
    int ret;
@@ -186,16 +181,13 @@ static int write_elf64_load(DumpState *s, MemoryMapping *memory_mapping,

    ret = fd_write_vmcore(&phdr, sizeof(Elf64_Phdr), s);
    if (ret < 0) {
-        dump_error(s, "dump: failed to write program header table.\n");
-        return -1;
+        dump_error(s, "dump: failed to write program header table", errp);
    }
-
-    return 0;
 }

-static int write_elf32_load(DumpState *s, MemoryMapping *memory_mapping,
-                            int phdr_index, hwaddr offset,
-                            hwaddr filesz)
+static void write_elf32_load(DumpState *s, MemoryMapping *memory_mapping,
+                             int phdr_index, hwaddr offset,
+                             hwaddr filesz, Error **errp)
 {
    Elf32_Phdr phdr;
    int ret;
@@ -212,14 +204,11 @@ static int write_elf32_load(DumpState *s, MemoryMapping *memory_mapping,

    ret = fd_write_vmcore(&phdr, sizeof(Elf32_Phdr), s);
    if (ret < 0) {
-        dump_error(s, "dump: failed to write program header table.\n");
-        return -1;
+        dump_error(s, "dump: failed to write program header table", errp);
    }
-
-    return 0;
 }

-static int write_elf64_note(DumpState *s)
+static void write_elf64_note(DumpState *s, Error **errp)
 {
    Elf64_Phdr phdr;
    hwaddr begin = s->memory_offset - s->note_size;
@@ -235,11 +224,8 @@ static int write_elf64_note(DumpState *s)

    ret = fd_write_vmcore(&phdr, sizeof(Elf64_Phdr), s);
    if (ret < 0) {
-        dump_error(s, "dump: failed to write program header table.\n");
-        return -1;
+        dump_error(s, "dump: failed to write program header table", errp);
    }
-
-    return 0;
 }

 static inline int cpu_index(CPUState *cpu)
@@ -247,7 +233,8 @@ static inline int cpu_index(CPUState *cpu)
    return cpu->cpu_index + 1;
 }

-static int write_elf64_notes(WriteCoreDumpFunction f, DumpState *s)
+static void write_elf64_notes(WriteCoreDumpFunction f, DumpState *s,
+                              Error **errp)
 {
    CPUState *cpu;
    int ret;
@@ -257,23 +244,21 @@ static int write_elf64_notes(WriteCoreDumpFunction f, DumpState *s)
        id = cpu_index(cpu);
        ret = cpu_write_elf64_note(f, cpu, id, s);
        if (ret < 0) {
-            dump_error(s, "dump: failed to write elf notes.\n");
-            return -1;
+            dump_error(s, "dump: failed to write elf notes", errp);
+            return;
        }
    }

    CPU_FOREACH(cpu) {
        ret = cpu_write_elf64_qemunote(f, cpu, s);
        if (ret < 0) {
-            dump_error(s, "dump: failed to write CPU status.\n");
-            return -1;
+            dump_error(s, "dump: failed to write CPU status", errp);
+            return;
        }
    }
-
-    return 0;
 }

-static int write_elf32_note(DumpState *s)
+static void write_elf32_note(DumpState *s, Error **errp)
 {
    hwaddr begin = s->memory_offset - s->note_size;
    Elf32_Phdr phdr;
@@ -289,14 +274,12 @@ static int write_elf32_note(DumpState *s)

    ret = fd_write_vmcore(&phdr, sizeof(Elf32_Phdr), s);
    if (ret < 0) {
-        dump_error(s, "dump: failed to write program header table.\n");
-        return -1;
+        dump_error(s, "dump: failed to write program header table", errp);
    }
-
-    return 0;
 }

-static int write_elf32_notes(WriteCoreDumpFunction f, DumpState *s)
+static void write_elf32_notes(WriteCoreDumpFunction f, DumpState *s,
+                              Error **errp)
 {
    CPUState *cpu;
    int ret;
@@ -306,23 +289,21 @@ static int write_elf32_notes(WriteCoreDumpFunction f, DumpState *s)
        id = cpu_index(cpu);
        ret = cpu_write_elf32_note(f, cpu, id, s);
        if (ret < 0) {
-            dump_error(s, "dump: failed to write elf notes.\n");
-            return -1;
+            dump_error(s, "dump: failed to write elf notes", errp);
+            return;
        }
    }

    CPU_FOREACH(cpu) {
        ret = cpu_write_elf32_qemunote(f, cpu, s);
        if (ret < 0) {
-            dump_error(s, "dump: failed to write CPU status.\n");
-            return -1;
+            dump_error(s, "dump: failed to write CPU status", errp);
+            return;
        }
    }
-
-    return 0;
 }

-static int write_elf_section(DumpState *s, int type)
+static void write_elf_section(DumpState *s, int type, Error **errp)
 {
    Elf32_Shdr shdr32;
    Elf64_Shdr shdr64;
@@ -344,50 +325,44 @@ static int write_elf_section(DumpState *s, int type)

    ret = fd_write_vmcore(&shdr, shdr_size, s);
    if (ret < 0) {
-        dump_error(s, "dump: failed to write section header table.\n");
-        return -1;
+        dump_error(s, "dump: failed to write section header table", errp);
    }
-
-    return 0;
 }

-static int write_data(DumpState *s, void *buf, int length)
+static void write_data(DumpState *s, void *buf, int length, Error **errp)
 {
    int ret;

    ret = fd_write_vmcore(buf, length, s);
    if (ret < 0) {
-        dump_error(s, "dump: failed to save memory.\n");
-        return -1;
+        dump_error(s, "dump: failed to save memory", errp);
    }
-
-    return 0;
 }

-/* write the memroy to vmcore. 1 page per I/O. */
-static int write_memory(DumpState *s, GuestPhysBlock *block, ram_addr_t start,
-                        int64_t size)
+/* write the memory to vmcore. 1 page per I/O. */
+static void write_memory(DumpState *s, GuestPhysBlock *block, ram_addr_t start,
+                         int64_t size, Error **errp)
 {
    int64_t i;
-    int ret;
+    Error *local_err = NULL;

    for (i = 0; i < size / TARGET_PAGE_SIZE; i++) {
-        ret = write_data(s, block->host_addr + start + i * TARGET_PAGE_SIZE,
-                         TARGET_PAGE_SIZE);
-        if (ret < 0) {
-            return ret;
+        write_data(s, block->host_addr + start + i * TARGET_PAGE_SIZE,
+                   TARGET_PAGE_SIZE, &local_err);
+        if (local_err) {
+            error_propagate(errp, local_err);
+            return;
        }
    }

    if ((size % TARGET_PAGE_SIZE) != 0) {
-        ret = write_data(s, block->host_addr + start + i * TARGET_PAGE_SIZE,
-                         size % TARGET_PAGE_SIZE);
-        if (ret < 0) {
-            return ret;
+        write_data(s, block->host_addr + start + i * TARGET_PAGE_SIZE,
+                   size % TARGET_PAGE_SIZE, &local_err);
+        if (local_err) {
+            error_propagate(errp, local_err);
+            return;
        }
    }
-
-    return 0;
 }

 /* get the memory's offset and size in the vmcore */
@@ -452,13 +427,13 @@ static void get_offset_range(hwaddr phys_addr,
    }
 }

-static int write_elf_loads(DumpState *s)
+static void write_elf_loads(DumpState *s, Error **errp)
 {
    hwaddr offset, filesz;
    MemoryMapping *memory_mapping;
    uint32_t phdr_index = 1;
-    int ret;
    uint32_t max_index;
+    Error *local_err = NULL;

    if (s->have_section) {
        max_index = s->sh_info;
@@ -471,29 +446,28 @@ static int write_elf_loads(DumpState *s)
                         memory_mapping->length,
                         s, &offset, &filesz);
        if (s->dump_info.d_class == ELFCLASS64) {
-            ret = write_elf64_load(s, memory_mapping, phdr_index++, offset,
-                                   filesz);
+            write_elf64_load(s, memory_mapping, phdr_index++, offset,
+                             filesz, &local_err);
        } else {
-            ret = write_elf32_load(s, memory_mapping, phdr_index++, offset,
-                                   filesz);
+            write_elf32_load(s, memory_mapping, phdr_index++, offset,
+                             filesz, &local_err);
        }

-        if (ret < 0) {
-            return -1;
+        if (local_err) {
+            error_propagate(errp, local_err);
+            return;
        }

        if (phdr_index >= max_index) {
            break;
        }
    }
-
-    return 0;
 }

 /* write elf header, PT_NOTE and elf note to vmcore. */
-static int dump_begin(DumpState *s)
+static void dump_begin(DumpState *s, Error **errp)
 {
-    int ret;
+    Error *local_err = NULL;

    /*
     * the vmcore's format is:
@@ -521,69 +495,81 @@ static int dump_begin(DumpState *s)

    /* write elf header to vmcore */
    if (s->dump_info.d_class == ELFCLASS64) {
-        ret = write_elf64_header(s);
+        write_elf64_header(s, &local_err);
    } else {
-        ret = write_elf32_header(s);
+        write_elf32_header(s, &local_err);
    }
-    if (ret < 0) {
-        return -1;
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
    }

    if (s->dump_info.d_class == ELFCLASS64) {
        /* write PT_NOTE to vmcore */
-        if (write_elf64_note(s) < 0) {
-            return -1;
+        write_elf64_note(s, &local_err);
+        if (local_err) {
+            error_propagate(errp, local_err);
+            return;
        }

        /* write all PT_LOAD to vmcore */
-        if (write_elf_loads(s) < 0) {
-            return -1;
+        write_elf_loads(s, &local_err);
+        if (local_err) {
+            error_propagate(errp, local_err);
+            return;
        }

        /* write section to vmcore */
        if (s->have_section) {
-            if (write_elf_section(s, 1) < 0) {
-                return -1;
+            write_elf_section(s, 1, &local_err);
+            if (local_err) {
+                error_propagate(errp, local_err);
+                return;
            }
        }

        /* write notes to vmcore */
-        if (write_elf64_notes(fd_write_vmcore, s) < 0) {
-            return -1;
+        write_elf64_notes(fd_write_vmcore, s, &local_err);
+        if (local_err) {
+            error_propagate(errp, local_err);
+            return;
        }
-
    } else {
        /* write PT_NOTE to vmcore */
-        if (write_elf32_note(s) < 0) {
-            return -1;
+        write_elf32_note(s, &local_err);
+        if (local_err) {
+            error_propagate(errp, local_err);
+            return;
        }

        /* write all PT_LOAD to vmcore */
-        if (write_elf_loads(s) < 0) {
-            return -1;
+        write_elf_loads(s, &local_err);
+        if (local_err) {
+            error_propagate(errp, local_err);
+            return;
        }

        /* write section to vmcore */
        if (s->have_section) {
-            if (write_elf_section(s, 0) < 0) {
-                return -1;
+            write_elf_section(s, 0, &local_err);
+            if (local_err) {
+                error_propagate(errp, local_err);
+                return;
            }
        }

        /* write notes to vmcore */
-        if (write_elf32_notes(fd_write_vmcore, s) < 0) {
-            return -1;
+        write_elf32_notes(fd_write_vmcore, s, &local_err);
+        if (local_err) {
+            error_propagate(errp, local_err);
+            return;
        }
    }
-
-    return 0;
 }

-/* write PT_LOAD to vmcore */
-static int dump_completed(DumpState *s)
+static void dump_completed(DumpState *s)
 {
    dump_cleanup(s);
-    return 0;
 }

 static int get_next_block(DumpState *s, GuestPhysBlock *block)
@@ -614,13 +600,13 @@ static int get_next_block(DumpState *s, GuestPhysBlock *block)
 }

 /* write all memory to vmcore */
-static int dump_iterate(DumpState *s)
+static void dump_iterate(DumpState *s, Error **errp)
 {
    GuestPhysBlock *block;
    int64_t size;
-    int ret;
+    Error *local_err = NULL;

-    while (1) {
+    do {
        block = s->next_block;

        size = block->target_end - block->target_start;
@@ -630,34 +616,28 @@ static int dump_iterate(DumpState *s)
                size -= block->target_end - (s->begin + s->length);
            }
        }
-        ret = write_memory(s, block, s->start, size);
-        if (ret == -1) {
-            return ret;
+        write_memory(s, block, s->start, size, &local_err);
+        if (local_err) {
+            error_propagate(errp, local_err);
+            return;
        }

-        ret = get_next_block(s, block);
-        if (ret == 1) {
-            dump_completed(s);
-            return 0;
-        }
-    }
+    } while (!get_next_block(s, block));
+
+    dump_completed(s);
 }

-static int create_vmcore(DumpState *s)
+static void create_vmcore(DumpState *s, Error **errp)
 {
-    int ret;
+    Error *local_err = NULL;

-    ret = dump_begin(s);
-    if (ret < 0) {
-        return -1;
+    dump_begin(s, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
    }

-    ret = dump_iterate(s);
-    if (ret < 0) {
-        return -1;
-    }
-
-    return 0;
+    dump_iterate(s, errp);
 }

 static int write_start_flat_header(int fd)
@@ -738,9 +718,8 @@ static int buf_write_note(const void *buf, size_t size, void *opaque)
 }

 /* write common header, sub header and elf note to vmcore */
-static int create_header32(DumpState *s)
+static void create_header32(DumpState *s, Error **errp)
 {
-    int ret = 0;
    DiskDumpHeader32 *dh = NULL;
    KdumpSubHeader32 *kh = NULL;
    size_t size;
@@ -749,6 +728,7 @@ static int create_header32(DumpState *s)
    uint32_t bitmap_blocks;
    uint32_t status = 0;
    uint64_t offset_note;
+    Error *local_err = NULL;

    /* write common header, the version of kdump-compressed format is 6th */
    size = sizeof(DiskDumpHeader32);
@@ -784,8 +764,7 @@ static int create_header32(DumpState *s)
    dh->status = cpu_to_dump32(s, status);

    if (write_buffer(s->fd, 0, dh, size) < 0) {
-        dump_error(s, "dump: failed to write disk dump header.\n");
-        ret = -1;
+        dump_error(s, "dump: failed to write disk dump header", errp);
        goto out;
    }

@@ -804,8 +783,7 @@ static int create_header32(DumpState *s)

    if (write_buffer(s->fd, DISKDUMP_HEADER_BLOCKS *
                     block_size, kh, size) < 0) {
-        dump_error(s, "dump: failed to write kdump sub header.\n");
-        ret = -1;
+        dump_error(s, "dump: failed to write kdump sub header", errp);
        goto out;
    }

@@ -814,15 +792,14 @@ static int create_header32(DumpState *s)
    s->note_buf_offset = 0;

    /* use s->note_buf to store notes temporarily */
-    if (write_elf32_notes(buf_write_note, s) < 0) {
-        ret = -1;
+    write_elf32_notes(buf_write_note, s, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
        goto out;
    }
-
    if (write_buffer(s->fd, offset_note, s->note_buf,
                     s->note_size) < 0) {
-        dump_error(s, "dump: failed to write notes");
-        ret = -1;
+        dump_error(s, "dump: failed to write notes", errp);
        goto out;
    }

@@ -838,14 +815,11 @@ out:
    g_free(dh);
    g_free(kh);
    g_free(s->note_buf);
-
-    return ret;
 }

 /* write common header, sub header and elf note to vmcore */
-static int create_header64(DumpState *s)
+static void create_header64(DumpState *s, Error **errp)
 {
-    int ret = 0;
    DiskDumpHeader64 *dh = NULL;
    KdumpSubHeader64 *kh = NULL;
    size_t size;
@@ -854,6 +828,7 @@ static int create_header64(DumpState *s)
    uint32_t bitmap_blocks;
    uint32_t status = 0;
    uint64_t offset_note;
+    Error *local_err = NULL;

    /* write common header, the version of kdump-compressed format is 6th */
    size = sizeof(DiskDumpHeader64);
@@ -889,8 +864,7 @@ static int create_header64(DumpState *s)
    dh->status = cpu_to_dump32(s, status);

    if (write_buffer(s->fd, 0, dh, size) < 0) {
-        dump_error(s, "dump: failed to write disk dump header.\n");
-        ret = -1;
+        dump_error(s, "dump: failed to write disk dump header", errp);
        goto out;
    }

@@ -909,8 +883,7 @@ static int create_header64(DumpState *s)

    if (write_buffer(s->fd, DISKDUMP_HEADER_BLOCKS *
                     block_size, kh, size) < 0) {
-        dump_error(s, "dump: failed to write kdump sub header.\n");
-        ret = -1;
+        dump_error(s, "dump: failed to write kdump sub header", errp);
        goto out;
    }

@@ -919,15 +892,15 @@ static int create_header64(DumpState *s)
    s->note_buf_offset = 0;

    /* use s->note_buf to store notes temporarily */
-    if (write_elf64_notes(buf_write_note, s) < 0) {
-        ret = -1;
+    write_elf64_notes(buf_write_note, s, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
        goto out;
    }

    if (write_buffer(s->fd, offset_note, s->note_buf,
                     s->note_size) < 0) {
-        dump_error(s, "dump: failed to write notes");
-        ret = -1;
+        dump_error(s, "dump: failed to write notes", errp);
        goto out;
    }

@@ -943,16 +916,19 @@ out:
    g_free(dh);
    g_free(kh);
    g_free(s->note_buf);
-
-    return ret;
 }

-static int write_dump_header(DumpState *s)
+static void write_dump_header(DumpState *s, Error **errp)
 {
+     Error *local_err = NULL;
+
    if (s->dump_info.d_class == ELFCLASS32) {
-        return create_header32(s);
+        create_header32(s, &local_err);
    } else {
-        return create_header64(s);
+        create_header64(s, &local_err);
+    }
+    if (local_err) {
+        error_propagate(errp, local_err);
    }
 }

@@ -1066,7 +1042,7 @@ static bool get_next_page(GuestPhysBlock **blockptr, uint64_t *pfnptr,
    return true;
 }

-static int write_dump_bitmap(DumpState *s)
+static void write_dump_bitmap(DumpState *s, Error **errp)
 {
    int ret = 0;
    uint64_t last_pfn, pfn;
@@ -1087,8 +1063,7 @@ static int write_dump_bitmap(DumpState *s)
    while (get_next_page(&block_iter, &pfn, NULL, s)) {
        ret = set_dump_bitmap(last_pfn, pfn, true, dump_bitmap_buf, s);
        if (ret < 0) {
-            dump_error(s, "dump: failed to set dump_bitmap.\n");
-            ret = -1;
+            dump_error(s, "dump: failed to set dump_bitmap", errp);
            goto out;
        }

@@ -1105,8 +1080,7 @@ static int write_dump_bitmap(DumpState *s)
        ret = set_dump_bitmap(last_pfn, last_pfn + PFN_BUFBITMAP, false,
                              dump_bitmap_buf, s);
        if (ret < 0) {
-            dump_error(s, "dump: failed to sync dump_bitmap.\n");
-            ret = -1;
+            dump_error(s, "dump: failed to sync dump_bitmap", errp);
            goto out;
        }
    }
@@ -1116,8 +1090,6 @@ static int write_dump_bitmap(DumpState *s)

 out:
    g_free(dump_bitmap_buf);
-
-    return ret;
 }

 static void prepare_data_cache(DataCache *data_cache, DumpState *s,
@@ -1197,7 +1169,7 @@ static inline bool is_zero_page(const uint8_t *buf, size_t page_size)
    return buffer_is_zero(buf, page_size);
 }

-static int write_dump_pages(DumpState *s)
+static void write_dump_pages(DumpState *s, Error **errp)
 {
    int ret = 0;
    DataCache page_desc, page_data;
@@ -1241,7 +1213,7 @@ static int write_dump_pages(DumpState *s)
    ret = write_cache(&page_data, buf, TARGET_PAGE_SIZE, false);
    g_free(buf);
    if (ret < 0) {
-        dump_error(s, "dump: failed to write page data(zero page).\n");
+        dump_error(s, "dump: failed to write page data (zero page)", errp);
        goto out;
    }

@@ -1257,7 +1229,7 @@ static int write_dump_pages(DumpState *s)
            ret = write_cache(&page_desc, &pd_zero, sizeof(PageDescriptor),
                              false);
            if (ret < 0) {
-                dump_error(s, "dump: failed to write page desc.\n");
+                dump_error(s, "dump: failed to write page desc", errp);
                goto out;
            }
        } else {
@@ -1282,7 +1254,7 @@ static int write_dump_pages(DumpState *s)

                ret = write_cache(&page_data, buf_out, size_out, false);
                if (ret < 0) {
-                    dump_error(s, "dump: failed to write page data.\n");
+                    dump_error(s, "dump: failed to write page data", errp);
                    goto out;
                }
 #ifdef CONFIG_LZO
@@ -1295,7 +1267,7 @@ static int write_dump_pages(DumpState *s)

                ret = write_cache(&page_data, buf_out, size_out, false);
                if (ret < 0) {
-                    dump_error(s, "dump: failed to write page data.\n");
+                    dump_error(s, "dump: failed to write page data", errp);
                    goto out;
                }
 #endif
@@ -1309,7 +1281,7 @@ static int write_dump_pages(DumpState *s)

                ret = write_cache(&page_data, buf_out, size_out, false);
                if (ret < 0) {
-                    dump_error(s, "dump: failed to write page data.\n");
+                    dump_error(s, "dump: failed to write page data", errp);
                    goto out;
                }
 #endif
@@ -1324,7 +1296,7 @@ static int write_dump_pages(DumpState *s)

                ret = write_cache(&page_data, buf, TARGET_PAGE_SIZE, false);
                if (ret < 0) {
-                    dump_error(s, "dump: failed to write page data.\n");
+                    dump_error(s, "dump: failed to write page data", errp);
                    goto out;
                }
            }
@@ -1336,7 +1308,7 @@ static int write_dump_pages(DumpState *s)

            ret = write_cache(&page_desc, &pd, sizeof(PageDescriptor), false);
            if (ret < 0) {
-                dump_error(s, "dump: failed to write page desc.\n");
+                dump_error(s, "dump: failed to write page desc", errp);
                goto out;
            }
        }
@@ -1344,12 +1316,12 @@ static int write_dump_pages(DumpState *s)

    ret = write_cache(&page_desc, NULL, 0, true);
    if (ret < 0) {
-        dump_error(s, "dump: failed to sync cache for page_desc.\n");
+        dump_error(s, "dump: failed to sync cache for page_desc", errp);
        goto out;
    }
    ret = write_cache(&page_data, NULL, 0, true);
    if (ret < 0) {
-        dump_error(s, "dump: failed to sync cache for page_data.\n");
+        dump_error(s, "dump: failed to sync cache for page_data", errp);
        goto out;
    }

@@ -1362,13 +1334,12 @@ out:
 #endif

    g_free(buf_out);
-
-    return ret;
 }

-static int create_kdump_vmcore(DumpState *s)
+static void create_kdump_vmcore(DumpState *s, Error **errp)
 {
    int ret;
+    Error *local_err = NULL;

    /*
     * the kdump-compressed format is:
@@ -1394,34 +1365,35 @@ static int create_kdump_vmcore(DumpState *s)

    ret = write_start_flat_header(s->fd);
    if (ret < 0) {
-        dump_error(s, "dump: failed to write start flat header.\n");
-        return -1;
+        dump_error(s, "dump: failed to write start flat header", errp);
+        return;
    }

-    ret = write_dump_header(s);
-    if (ret < 0) {
-        return -1;
+    write_dump_header(s, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
    }

-    ret = write_dump_bitmap(s);
-    if (ret < 0) {
-        return -1;
+    write_dump_bitmap(s, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
    }

-    ret = write_dump_pages(s);
-    if (ret < 0) {
-        return -1;
+    write_dump_pages(s, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
    }

    ret = write_end_flat_header(s->fd);
    if (ret < 0) {
-        dump_error(s, "dump: failed to write end flat header.\n");
-        return -1;
+        dump_error(s, "dump: failed to write end flat header", errp);
+        return;
    }

    dump_completed(s);
-
-    return 0;
 }

 static ram_addr_t get_start_block(DumpState *s)
@@ -1460,9 +1432,9 @@ static void get_max_mapnr(DumpState *s)
    s->max_mapnr = paddr_to_pfn(last_block->target_end);
 }

-static int dump_init(DumpState *s, int fd, bool has_format,
-                     DumpGuestMemoryFormat format, bool paging, bool has_filter,
-                     int64_t begin, int64_t length, Error **errp)
+static void dump_init(DumpState *s, int fd, bool has_format,
+                      DumpGuestMemoryFormat format, bool paging, bool has_filter,
+                      int64_t begin, int64_t length, Error **errp)
 {
    CPUState *cpu;
    int nr_cpus;
@@ -1567,7 +1539,7 @@ static int dump_init(DumpState *s, int fd, bool has_format,
            s->flag_compress = 0;
        }

-        return 0;
+        return;
    }

    if (s->has_filter) {
@@ -1616,11 +1588,10 @@ static int dump_init(DumpState *s, int fd, bool has_format,
        }
    }

-    return 0;
+    return;

 cleanup:
    dump_cleanup(s);
-    return -1;
 }

 void qmp_dump_guest_memory(bool paging, const char *file, bool has_begin,
@@ -1631,7 +1602,7 @@ void qmp_dump_guest_memory(bool paging, const char *file, bool has_begin,
    const char *p;
    int fd = -1;
    DumpState *s;
-    int ret;
+    Error *local_err = NULL;

    /*
     * kdump-compressed format need the whole memory dumped, so paging or
@@ -1691,21 +1662,18 @@ void qmp_dump_guest_memory(bool paging, const char *file, bool has_begin,

    s = g_malloc0(sizeof(DumpState));

-    ret = dump_init(s, fd, has_format, format, paging, has_begin,
-                    begin, length, errp);
-    if (ret < 0) {
+    dump_init(s, fd, has_format, format, paging, has_begin,
+              begin, length, &local_err);
+    if (local_err) {
        g_free(s);
+        error_propagate(errp, local_err);
        return;
    }

    if (has_format && format != DUMP_GUEST_MEMORY_FORMAT_ELF) {
-        if (create_kdump_vmcore(s) < 0) {
-            error_set(errp, QERR_IO_ERROR);
-        }
+        create_kdump_vmcore(s, errp);
    } else {
-        if (create_vmcore(s) < 0) {
-            error_set(errp, QERR_IO_ERROR);
-        }
+        create_vmcore(s, errp);
    }

    g_free(s);
--- a/exec.c
+++ b/exec.c
@@ -75,6 +75,11 @@ static MemoryRegion io_mem_unassigned;
 /* RAM is mmap-ed with MAP_SHARED */
 #define RAM_SHARED     (1 << 1)

+/* Only a portion of RAM (used_length) is actually used, and migrated.
+ * This used_length size can change across reboots.
+ */
+#define RAM_RESIZEABLE (1 << 2)
+
 #endif

 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
@@ -434,7 +439,7 @@ static int cpu_common_pre_load(void *opaque)
 {
    CPUState *cpu = opaque;

-    cpu->exception_index = 0;
+    cpu->exception_index = -1;

    return 0;
 }
@@ -443,7 +448,7 @@ static bool cpu_common_exception_index_needed(void *opaque)
 {
    CPUState *cpu = opaque;

-    return cpu->exception_index != 0;
+    return tcg_enabled() && cpu->exception_index != -1;
 }

 static const VMStateDescription vmstate_cpu_common_exception_index = {
@@ -548,7 +553,6 @@ void cpu_exec_init(CPUArchState *env)
    }
 }

-#if defined(TARGET_HAS_ICE)
 #if defined(CONFIG_USER_ONLY)
 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
 {
@@ -564,7 +568,6 @@ static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
    }
 }
 #endif
-#endif /* TARGET_HAS_ICE */

 #if defined(CONFIG_USER_ONLY)
 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
@@ -684,7 +687,6 @@ static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
 int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
                          CPUBreakpoint **breakpoint)
 {
-#if defined(TARGET_HAS_ICE)
    CPUBreakpoint *bp;

    bp = g_malloc(sizeof(*bp));
@@ -705,15 +707,11 @@ int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
        *breakpoint = bp;
    }
    return 0;
-#else
-    return -ENOSYS;
-#endif
 }

 /* Remove a specific breakpoint.  */
 int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
 {
-#if defined(TARGET_HAS_ICE)
    CPUBreakpoint *bp;

    QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
@@ -723,27 +721,21 @@ int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
        }
    }
    return -ENOENT;
-#else
-    return -ENOSYS;
-#endif
 }

 /* Remove a specific breakpoint by reference.  */
 void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
 {
-#if defined(TARGET_HAS_ICE)
    QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);

    breakpoint_invalidate(cpu, breakpoint->pc);

    g_free(breakpoint);
-#endif
 }

 /* Remove all matching breakpoints. */
 void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
 {
-#if defined(TARGET_HAS_ICE)
    CPUBreakpoint *bp, *next;

    QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
@@ -751,14 +743,12 @@ void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
            cpu_breakpoint_remove_by_ref(cpu, bp);
        }
    }
-#endif
 }

 /* enable or disable single step mode. EXCP_DEBUG is returned by the
   CPU loop after each instruction */
 void cpu_single_step(CPUState *cpu, int enabled)
 {
-#if defined(TARGET_HAS_ICE)
    if (cpu->singlestep_enabled != enabled) {
        cpu->singlestep_enabled = enabled;
        if (kvm_enabled()) {
@@ -770,7 +760,6 @@ void cpu_single_step(CPUState *cpu, int enabled)
            tb_flush(env);
        }
    }
-#endif
 }

 void cpu_abort(CPUState *cpu, const char *fmt, ...)
@@ -812,11 +801,11 @@ static RAMBlock *qemu_get_ram_block(ram_addr_t addr)

    /* The list is protected by the iothread lock here.  */
    block = ram_list.mru_block;
-    if (block && addr - block->offset < block->length) {
+    if (block && addr - block->offset < block->max_length) {
        goto found;
    }
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
-        if (addr - block->offset < block->length) {
+        if (addr - block->offset < block->max_length) {
            goto found;
        }
    }
@@ -840,7 +829,7 @@ static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)

    block = qemu_get_ram_block(start);
    assert(block == qemu_get_ram_block(end - 1));
-    start1 = (uintptr_t)block->host + (start - block->offset);
+    start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
    cpu_tlb_reset_dirty_all(start1, length);
 }

@@ -850,7 +839,7 @@ void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t length,
 {
    if (length == 0)
        return;
-    cpu_physical_memory_clear_dirty_range(start, length, client);
+    cpu_physical_memory_clear_dirty_range_type(start, length, client);

    if (tcg_enabled()) {
        tlb_reset_dirty_range_all(start, length);
@@ -909,14 +898,15 @@ static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
                             uint16_t section);
 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);

-static void *(*phys_mem_alloc)(size_t size) = qemu_anon_ram_alloc;
+static void *(*phys_mem_alloc)(size_t size, uint64_t *align) =
+                               qemu_anon_ram_alloc;

 /*
 * Set a custom physical guest memory alloator.
 * Accelerators with unusual needs may need this.  Hopefully, we can
 * get rid of it eventually.
 */
-void phys_mem_set_alloc(void *(*alloc)(size_t))
+void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align))
 {
    phys_mem_alloc = alloc;
 }
@@ -1098,6 +1088,7 @@ static void *file_ram_alloc(RAMBlock *block,
        error_propagate(errp, local_err);
        goto error;
    }
+    block->mr->align = hpagesize;

    if (memory < hpagesize) {
        error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
@@ -1184,7 +1175,7 @@ static ram_addr_t find_ram_offset(ram_addr_t size)
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
        ram_addr_t end, next = RAM_ADDR_MAX;

-        end = block->offset + block->length;
+        end = block->offset + block->max_length;

        QTAILQ_FOREACH(next_block, &ram_list.blocks, next) {
            if (next_block->offset >= end) {
@@ -1212,7 +1203,7 @@ ram_addr_t last_ram_offset(void)
    ram_addr_t last = 0;

    QTAILQ_FOREACH(block, &ram_list.blocks, next)
-        last = MAX(last, block->offset + block->length);
+        last = MAX(last, block->offset + block->max_length);

    return last;
 }
@@ -1294,6 +1285,49 @@ static int memory_try_enable_merging(void *addr, size_t len)
    return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
 }

+/* Only legal before guest might have detected the memory size: e.g. on
+ * incoming migration, or right after reset.
+ *
+ * As memory core doesn't know how is memory accessed, it is up to
+ * resize callback to update device state and/or add assertions to detect
+ * misuse, if necessary.
+ */
+int qemu_ram_resize(ram_addr_t base, ram_addr_t newsize, Error **errp)
+{
+    RAMBlock *block = find_ram_block(base);
+
+    assert(block);
+
+    if (block->used_length == newsize) {
+        return 0;
+    }
+
+    if (!(block->flags & RAM_RESIZEABLE)) {
+        error_setg_errno(errp, EINVAL,
+                         "Length mismatch: %s: 0x" RAM_ADDR_FMT
+                         " in != 0x" RAM_ADDR_FMT, block->idstr,
+                         newsize, block->used_length);
+        return -EINVAL;
+    }
+
+    if (block->max_length < newsize) {
+        error_setg_errno(errp, EINVAL,
+                         "Length too large: %s: 0x" RAM_ADDR_FMT
+                         " > 0x" RAM_ADDR_FMT, block->idstr,
+                         newsize, block->max_length);
+        return -EINVAL;
+    }
+
+    cpu_physical_memory_clear_dirty_range(block->offset, block->used_length);
+    block->used_length = newsize;
+    cpu_physical_memory_set_dirty_range(block->offset, block->used_length);
+    memory_region_set_size(block->mr, newsize);
+    if (block->resized) {
+        block->resized(block->idstr, newsize, block->host);
+    }
+    return 0;
+}
+
 static ram_addr_t ram_block_add(RAMBlock *new_block, Error **errp)
 {
    RAMBlock *block;
@@ -1303,13 +1337,15 @@ static ram_addr_t ram_block_add(RAMBlock *new_block, Error **errp)

    /* This assumes the iothread lock is taken here too.  */
    qemu_mutex_lock_ramlist();
-    new_block->offset = find_ram_offset(new_block->length);
+    new_block->offset = find_ram_offset(new_block->max_length);

    if (!new_block->host) {
        if (xen_enabled()) {
-            xen_ram_alloc(new_block->offset, new_block->length, new_block->mr);
+            xen_ram_alloc(new_block->offset, new_block->max_length,
+                          new_block->mr);
        } else {
-            new_block->host = phys_mem_alloc(new_block->length);
+            new_block->host = phys_mem_alloc(new_block->max_length,
+                                             &new_block->mr->align);
            if (!new_block->host) {
                error_setg_errno(errp, errno,
                                 "cannot set up guest memory '%s'",
@@ -1317,13 +1353,13 @@ static ram_addr_t ram_block_add(RAMBlock *new_block, Error **errp)
                qemu_mutex_unlock_ramlist();
                return -1;
            }
-            memory_try_enable_merging(new_block->host, new_block->length);
+            memory_try_enable_merging(new_block->host, new_block->max_length);
        }
    }

    /* Keep the list sorted from biggest to smallest block.  */
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
-        if (block->length < new_block->length) {
+        if (block->max_length < new_block->max_length) {
            break;
        }
    }
@@ -1347,14 +1383,15 @@ static ram_addr_t ram_block_add(RAMBlock *new_block, Error **errp)
                                   old_ram_size, new_ram_size);
       }
    }
-    cpu_physical_memory_set_dirty_range(new_block->offset, new_block->length);
+    cpu_physical_memory_set_dirty_range(new_block->offset,
+                                        new_block->used_length);

-    qemu_ram_setup_dump(new_block->host, new_block->length);
-    qemu_madvise(new_block->host, new_block->length, QEMU_MADV_HUGEPAGE);
-    qemu_madvise(new_block->host, new_block->length, QEMU_MADV_DONTFORK);
+    qemu_ram_setup_dump(new_block->host, new_block->max_length);
+    qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
+    qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_DONTFORK);

    if (kvm_enabled()) {
-        kvm_setup_guest_memory(new_block->host, new_block->length);
+        kvm_setup_guest_memory(new_block->host, new_block->max_length);
    }

    return new_block->offset;
@@ -1388,7 +1425,8 @@ ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
    size = TARGET_PAGE_ALIGN(size);
    new_block = g_malloc0(sizeof(*new_block));
    new_block->mr = mr;
-    new_block->length = size;
+    new_block->used_length = size;
+    new_block->max_length = size;
    new_block->flags = share ? RAM_SHARED : 0;
    new_block->host = file_ram_alloc(new_block, size,
                                     mem_path, errp);
@@ -1407,7 +1445,12 @@ ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
 }
 #endif

-ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
+static
+ram_addr_t qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
+                                   void (*resized)(const char*,
+                                                   uint64_t length,
+                                                   void *host),
+                                   void *host, bool resizeable,
                                   MemoryRegion *mr, Error **errp)
 {
    RAMBlock *new_block;
@@ -1415,14 +1458,21 @@ ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
    Error *local_err = NULL;

    size = TARGET_PAGE_ALIGN(size);
+    max_size = TARGET_PAGE_ALIGN(max_size);
    new_block = g_malloc0(sizeof(*new_block));
    new_block->mr = mr;
-    new_block->length = size;
+    new_block->resized = resized;
+    new_block->used_length = size;
+    new_block->max_length = max_size;
+    assert(max_size >= size);
    new_block->fd = -1;
    new_block->host = host;
    if (host) {
        new_block->flags |= RAM_PREALLOC;
    }
+    if (resizeable) {
+        new_block->flags |= RAM_RESIZEABLE;
+    }
    addr = ram_block_add(new_block, &local_err);
    if (local_err) {
        g_free(new_block);
@@ -1432,9 +1482,24 @@ ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
    return addr;
 }

+ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
+                                   MemoryRegion *mr, Error **errp)
+{
+    return qemu_ram_alloc_internal(size, size, NULL, host, false, mr, errp);
+}
+
 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp)
 {
-    return qemu_ram_alloc_from_ptr(size, NULL, mr, errp);
+    return qemu_ram_alloc_internal(size, size, NULL, NULL, false, mr, errp);
+}
+
+ram_addr_t qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
+                                     void (*resized)(const char*,
+                                                     uint64_t length,
+                                                     void *host),
+                                     MemoryRegion *mr, Error **errp)
+{
+    return qemu_ram_alloc_internal(size, maxsz, resized, NULL, true, mr, errp);
 }

 void qemu_ram_free_from_ptr(ram_addr_t addr)
@@ -1472,11 +1537,11 @@ void qemu_ram_free(ram_addr_t addr)
                xen_invalidate_map_cache_entry(block->host);
 #ifndef _WIN32
            } else if (block->fd >= 0) {
-                munmap(block->host, block->length);
+                munmap(block->host, block->max_length);
                close(block->fd);
 #endif
            } else {
-                qemu_anon_ram_free(block->host, block->length);
+                qemu_anon_ram_free(block->host, block->max_length);
            }
            g_free(block);
            break;
@@ -1496,8 +1561,8 @@ void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)

    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
        offset = addr - block->offset;
-        if (offset < block->length) {
-            vaddr = block->host + offset;
+        if (offset < block->max_length) {
+            vaddr = ramblock_ptr(block, offset);
            if (block->flags & RAM_PREALLOC) {
                ;
            } else if (xen_enabled()) {
@@ -1548,7 +1613,7 @@ void *qemu_get_ram_block_host_ptr(ram_addr_t addr)
 {
    RAMBlock *block = qemu_get_ram_block(addr);

-    return block->host;
+    return ramblock_ptr(block, 0);
 }

 /* Return a host pointer to ram allocated with qemu_ram_alloc.
@@ -1572,10 +1637,10 @@ void *qemu_get_ram_ptr(ram_addr_t addr)
            return xen_map_cache(addr, 0, 0);
        } else if (block->host == NULL) {
            block->host =
-                xen_map_cache(block->offset, block->length, 1);
+                xen_map_cache(block->offset, block->max_length, 1);
        }
    }
-    return block->host + (addr - block->offset);
+    return ramblock_ptr(block, addr - block->offset);
 }

 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
@@ -1591,10 +1656,10 @@ static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
        RAMBlock *block;

        QTAILQ_FOREACH(block, &ram_list.blocks, next) {
-            if (addr - block->offset < block->length) {
-                if (addr - block->offset + *size > block->length)
-                    *size = block->length - addr + block->offset;
-                return block->host + (addr - block->offset);
+            if (addr - block->offset < block->max_length) {
+                if (addr - block->offset + *size > block->max_length)
+                    *size = block->max_length - addr + block->offset;
+                return ramblock_ptr(block, addr - block->offset);
            }
        }

@@ -1616,7 +1681,7 @@ MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
    }

    block = ram_list.mru_block;
-    if (block && block->host && host - block->host < block->length) {
+    if (block && block->host && host - block->host < block->max_length) {
        goto found;
    }

@@ -1625,7 +1690,7 @@ MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
        if (block->host == NULL) {
            continue;
        }
-        if (host - block->host < block->length) {
+        if (host - block->host < block->max_length) {
            goto found;
        }
    }
@@ -1765,7 +1830,7 @@ static uint64_t subpage_read(void *opaque, hwaddr addr,
                             unsigned len)
 {
    subpage_t *subpage = opaque;
-    uint8_t buf[4];
+    uint8_t buf[8];

 #if defined(DEBUG_SUBPAGE)
    printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
@@ -1779,6 +1844,8 @@ static uint64_t subpage_read(void *opaque, hwaddr addr,
        return lduw_p(buf);
    case 4:
        return ldl_p(buf);
+    case 8:
+        return ldq_p(buf);
    default:
        abort();
    }
@@ -1788,7 +1855,7 @@ static void subpage_write(void *opaque, hwaddr addr,
                          uint64_t value, unsigned len)
 {
    subpage_t *subpage = opaque;
-    uint8_t buf[4];
+    uint8_t buf[8];

 #if defined(DEBUG_SUBPAGE)
    printf("%s: subpage %p len %u addr " TARGET_FMT_plx
@@ -1805,6 +1872,9 @@ static void subpage_write(void *opaque, hwaddr addr,
    case 4:
        stl_p(buf, value);
        break;
+    case 8:
+        stq_p(buf, value);
+        break;
    default:
        abort();
    }
@@ -1827,6 +1897,10 @@ static bool subpage_accepts(void *opaque, hwaddr addr,
 static const MemoryRegionOps subpage_ops = {
    .read = subpage_read,
    .write = subpage_write,
+    .impl.min_access_size = 1,
+    .impl.max_access_size = 8,
+    .valid.min_access_size = 1,
+    .valid.max_access_size = 8,
    .valid.accepts = subpage_accepts,
    .endianness = DEVICE_NATIVE_ENDIAN,
 };
@@ -2066,10 +2140,8 @@ int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
 static void invalidate_and_set_dirty(hwaddr addr,
                                     hwaddr length)
 {
-    if (cpu_physical_memory_is_clean(addr)) {
-        /* invalidate code */
-        tb_invalidate_phys_page_range(addr, addr + length, 0);
-        /* set dirty bit */
+    if (cpu_physical_memory_range_includes_clean(addr, length)) {
+        tb_invalidate_phys_range(addr, addr + length, 0);
        cpu_physical_memory_set_dirty_range_nocode(addr, length);
    }
    xen_modified_memory(addr, length);
@@ -2872,7 +2944,7 @@ void qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
    RAMBlock *block;

    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
-        func(block->host, block->offset, block->length, opaque);
+        func(block->host, block->offset, block->used_length, opaque);
    }
 }
 #endif
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -7240,13 +7240,17 @@ int float128_compare_quiet( float128 a, float128 b STATUS_PARAM )
 * minnum() and maxnum correspond to the IEEE 754-2008 minNum()
 * and maxNum() operations. min() and max() are the typical min/max
 * semantics provided by many CPUs which predate that specification.
+ *
+ * minnummag() and maxnummag() functions correspond to minNumMag()
+ * and minNumMag() from the IEEE-754 2008.
 */
 #define MINMAX(s)                                                       \
 static inline float ## s float ## s ## _minmax(float ## s a, float ## s b,     \
-                                        int ismin, int isieee STATUS_PARAM) \
+                                               int ismin, int isieee,   \
+                                               int ismag STATUS_PARAM)  \
 {                                                                       \
    flag aSign, bSign;                                                  \
-    uint ## s ## _t av, bv;                                             \
+    uint ## s ## _t av, bv, aav, abv;                                   \
    a = float ## s ## _squash_input_denormal(a STATUS_VAR);             \
    b = float ## s ## _squash_input_denormal(b STATUS_VAR);             \
    if (float ## s ## _is_any_nan(a) ||                                 \
@@ -7266,6 +7270,17 @@ static inline float ## s float ## s ## _minmax(float ## s a, float ## s b,     \
    bSign = extractFloat ## s ## Sign(b);                               \
    av = float ## s ## _val(a);                                         \
    bv = float ## s ## _val(b);                                         \
+    if (ismag) {                                                        \
+        aav = float ## s ## _abs(av);                                   \
+        abv = float ## s ## _abs(bv);                                   \
+        if (aav != abv) {                                               \
+            if (ismin) {                                                \
+                return (aav < abv) ? a : b;                             \
+            } else {                                                    \
+                return (aav < abv) ? b : a;                             \
+            }                                                           \
+        }                                                               \
+    }                                                                   \
    if (aSign != bSign) {                                               \
        if (ismin) {                                                    \
            return aSign ? a : b;                                       \
@@ -7283,22 +7298,32 @@ static inline float ## s float ## s ## _minmax(float ## s a, float ## s b,     \
                                                                        \
 float ## s float ## s ## _min(float ## s a, float ## s b STATUS_PARAM)  \
 {                                                                       \
-    return float ## s ## _minmax(a, b, 1, 0 STATUS_VAR);                \
+    return float ## s ## _minmax(a, b, 1, 0, 0 STATUS_VAR);             \
 }                                                                       \
                                                                        \
 float ## s float ## s ## _max(float ## s a, float ## s b STATUS_PARAM)  \
 {                                                                       \
-    return float ## s ## _minmax(a, b, 0, 0 STATUS_VAR);                \
+    return float ## s ## _minmax(a, b, 0, 0, 0 STATUS_VAR);             \
 }                                                                       \
                                                                        \
 float ## s float ## s ## _minnum(float ## s a, float ## s b STATUS_PARAM) \
 {                                                                       \
-    return float ## s ## _minmax(a, b, 1, 1 STATUS_VAR);                \
+    return float ## s ## _minmax(a, b, 1, 1, 0 STATUS_VAR);             \
 }                                                                       \
                                                                        \
 float ## s float ## s ## _maxnum(float ## s a, float ## s b STATUS_PARAM) \
 {                                                                       \
-    return float ## s ## _minmax(a, b, 0, 1 STATUS_VAR);                \
+    return float ## s ## _minmax(a, b, 0, 1, 0 STATUS_VAR);             \
+}                                                                       \
+                                                                        \
+float ## s float ## s ## _minnummag(float ## s a, float ## s b STATUS_PARAM) \
+{                                                                       \
+    return float ## s ## _minmax(a, b, 1, 1, 1 STATUS_VAR);             \
+}                                                                       \
+                                                                        \
+float ## s float ## s ## _maxnummag(float ## s a, float ## s b STATUS_PARAM) \
+{                                                                       \
+    return float ## s ## _minmax(a, b, 0, 1, 1 STATUS_VAR);             \
 }

 MINMAX(32)
--- a/gdbstub.c
+++ b/gdbstub.c
@@ -317,6 +317,8 @@ static GDBState *gdbserver_state;

 bool gdb_has_xml;

+int semihosting_target = SEMIHOSTING_TARGET_AUTO;
+
 #ifdef CONFIG_USER_ONLY
 /* XXX: This is not thread safe.  Do we care?  */
 static int gdbserver_fd = -1;
@@ -351,10 +353,19 @@ static enum {
    GDB_SYS_DISABLED,
 } gdb_syscall_mode;

-/* If gdb is connected when the first semihosting syscall occurs then use
-   remote gdb syscalls.  Otherwise use native file IO.  */
+/* Decide if either remote gdb syscalls or native file IO should be used. */
 int use_gdb_syscalls(void)
 {
+    if (semihosting_target == SEMIHOSTING_TARGET_NATIVE) {
+        /* -semihosting-config target=native */
+        return false;
+    } else if (semihosting_target == SEMIHOSTING_TARGET_GDB) {
+        /* -semihosting-config target=gdb */
+        return true;
+    }
+
+    /* -semihosting-config target=auto */
+    /* On the first call check if gdb is connected and remember. */
    if (gdb_syscall_mode == GDB_SYS_UNKNOWN) {
        gdb_syscall_mode = (gdbserver_state ? GDB_SYS_ENABLED
                                            : GDB_SYS_DISABLED);
@@ -823,7 +834,10 @@ static int gdb_handle_packet(GDBState *s, const char *line_buf)
                action = *p++;
                signal = 0;
                if (action == 'C' || action == 'S') {
-                    signal = strtoul(p, (char **)&p, 16);
+                    signal = gdb_signal_to_target(strtoul(p, (char **)&p, 16));
+                    if (signal == -1) {
+                        signal = 0;
+                    }
                } else if (action != 'c' && action != 's') {
                    res = 0;
                    break;
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -1748,8 +1748,6 @@ show information about active capturing
 show list of VM snapshots
@item info status
 show the current VM status (running|paused)
-@item info pcmcia
-show guest PCMCIA status
@item info mice
 show which guest mouse is receiving events
@item info vnc
--- a/Show More
+++ b/Show More
@@ -1 +1 @@
 .1.50
 .2.50