ui/sdl2 : initial port to SDL 2.0 (v2.0)

I've ported the SDL1.2 code over, and rewritten it to use the SDL2 interface. The biggest changes were in the input handling, where SDL2 has done a major overhaul, and I've had to include a generated translation file to get from SDL2 codes back to qemu compatible ones. I'm still not sure how the keyboard layout code works in qemu, so there may be further work if someone can point me a test case that works with SDL1.2 and doesn't with SDL2. Some SDL env vars we used to set are no longer used by SDL2, Windows, OSX support is untested, I don't think we can link to SDL1.2 and SDL2 at the same time, so I felt using --with-sdlabi=2.0 to select the new code should be fine, like how gtk does it. v1.1: fix keys in text console v1.2: fix shutdown, cleanups a bit of code, support ARGB cursor v2.0: merge the SDL multihead patch into this, g_new the number of consoles needed, wrap DCL inside per-console structure. Signed-off-by: Dave Airlie <airlied@redhat.com> Fixes & improvements by kraxel: * baum build fix * remove text console logic * adapt to new input core * codestyle fixups Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
console: add QemuUIInfo
2014-03-05 09:52:05 +01:00 · 2014-03-05 09:52:04 +01:00 · 2014-03-05 09:52:04 +01:00 · 2014-03-05 09:52:04 +01:00 · 2014-03-05 09:52:04 +01:00 · 2014-03-05 09:52:04 +01:00
952 changed files with 58390 additions and 16062 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -1,69 +1,74 @@
-config-devices.*
-config-all-devices.*
-config-all-disas.*
-config-host.*
-config-target.*
-config.status
-trace/generated-tracers.h
-trace/generated-tracers.c
-trace/generated-tracers-dtrace.h
-trace/generated-tracers.dtrace
-trace/generated-events.h
-trace/generated-events.c
-libcacard/trace/generated-tracers.c
+/config-devices.*
+/config-all-devices.*
+/config-all-disas.*
+/config-host.*
+/config-target.*
+/config.status
+/trace/generated-tracers.h
+/trace/generated-tracers.c
+/trace/generated-tracers-dtrace.h
+/trace/generated-tracers.dtrace
+/trace/generated-events.h
+/trace/generated-events.c
+/trace/generated-ust-provider.h
+/trace/generated-ust.c
+/libcacard/trace/generated-tracers.c
 *-timestamp
-*-softmmu
-*-darwin-user
-*-linux-user
-*-bsd-user
+/*-softmmu
+/*-darwin-user
+/*-linux-user
+/*-bsd-user
 libdis*
 libuser
-linux-headers/asm
-qapi-generated
-qapi-types.[ch]
-qapi-visit.[ch]
-qmp-commands.h
-qmp-marshal.c
-qemu-doc.html
-qemu-tech.html
-qemu-doc.info
-qemu-tech.info
-qemu.1
-qemu.pod
-qemu-img.1
-qemu-img.pod
-qemu-img
-qemu-nbd
-qemu-nbd.8
-qemu-nbd.pod
-qemu-options.def
-qemu-options.texi
-qemu-img-cmds.texi
-qemu-img-cmds.h
-qemu-io
-qemu-ga
-qemu-bridge-helper
-qemu-monitor.texi
-vscclient
-qmp-commands.txt
-test-bitops
-test-coroutine
-test-int128
-test-opts-visitor
-test-qmp-input-visitor
-test-qmp-output-visitor
-test-string-input-visitor
-test-string-output-visitor
-test-visitor-serialization
-fsdev/virtfs-proxy-helper
-fsdev/virtfs-proxy-helper.1
-fsdev/virtfs-proxy-helper.pod
-.gdbinit
+/linux-headers/asm
+/qapi-generated
+/qapi-types.[ch]
+/qapi-visit.[ch]
+/qmp-commands.h
+/qmp-marshal.c
+/qemu-doc.html
+/qemu-tech.html
+/qemu-doc.info
+/qemu-tech.info
+/qemu.1
+/qemu.pod
+/qemu-img.1
+/qemu-img.pod
+/qemu-img
+/qemu-nbd
+/qemu-nbd.8
+/qemu-nbd.pod
+/qemu-options.def
+/qemu-options.texi
+/qemu-img-cmds.texi
+/qemu-img-cmds.h
+/qemu-io
+/qemu-ga
+/qemu-bridge-helper
+/qemu-monitor.texi
+/qmp-commands.txt
+/vscclient
+/test-bitops
+/test-coroutine
+/test-int128
+/test-opts-visitor
+/test-qmp-input-visitor
+/test-qmp-output-visitor
+/test-string-input-visitor
+/test-string-output-visitor
+/test-visitor-serialization
+/fsdev/virtfs-proxy-helper
+/fsdev/virtfs-proxy-helper.1
+/fsdev/virtfs-proxy-helper.pod
+/.gdbinit
 *.a
 *.aux
 *.cp
 *.dvi
 *.exe
+*.dll
+*.so
+*.mo
 *.fn
 *.ky
 *.log
@@ -77,7 +82,7 @@ fsdev/virtfs-proxy-helper.pod
 *.tp
 *.vr
 *.d
-!scripts/qemu-guest-agent/fsfreeze-hook.d
+!/scripts/qemu-guest-agent/fsfreeze-hook.d
 *.o
 *.lo
 *.la
@@ -90,22 +95,22 @@ fsdev/virtfs-proxy-helper.pod
 *.gcda
 *.gcno
 patches
-pc-bios/bios-pq/status
-pc-bios/vgabios-pq/status
-pc-bios/optionrom/linuxboot.asm
-pc-bios/optionrom/linuxboot.bin
-pc-bios/optionrom/linuxboot.raw
-pc-bios/optionrom/linuxboot.img
-pc-bios/optionrom/multiboot.asm
-pc-bios/optionrom/multiboot.bin
-pc-bios/optionrom/multiboot.raw
-pc-bios/optionrom/multiboot.img
-pc-bios/optionrom/kvmvapic.asm
-pc-bios/optionrom/kvmvapic.bin
-pc-bios/optionrom/kvmvapic.raw
-pc-bios/optionrom/kvmvapic.img
-pc-bios/s390-ccw/s390-ccw.elf
-pc-bios/s390-ccw/s390-ccw.img
+/pc-bios/bios-pq/status
+/pc-bios/vgabios-pq/status
+/pc-bios/optionrom/linuxboot.asm
+/pc-bios/optionrom/linuxboot.bin
+/pc-bios/optionrom/linuxboot.raw
+/pc-bios/optionrom/linuxboot.img
+/pc-bios/optionrom/multiboot.asm
+/pc-bios/optionrom/multiboot.bin
+/pc-bios/optionrom/multiboot.raw
+/pc-bios/optionrom/multiboot.img
+/pc-bios/optionrom/kvmvapic.asm
+/pc-bios/optionrom/kvmvapic.bin
+/pc-bios/optionrom/kvmvapic.raw
+/pc-bios/optionrom/kvmvapic.img
+/pc-bios/s390-ccw/s390-ccw.elf
+/pc-bios/s390-ccw/s390-ccw.img
 .stgit-*
 cscope.*
 tags
--- a/.travis.yml
+++ b/.travis.yml
@@ -16,6 +16,7 @@ env:
  matrix:
  - TARGETS=alpha-softmmu,alpha-linux-user
  - TARGETS=arm-softmmu,arm-linux-user
+  - TARGETS=aarch64-softmmu,aarch64-linux-user
  - TARGETS=cris-softmmu
  - TARGETS=i386-softmmu,x86_64-softmmu
  - TARGETS=lm32-softmmu
--- a/52
+++ b/52
@@ -219,6 +219,13 @@ F: *win32*

 ARM Machines
 ------------
+Allwinner-a10
+M: Li Guang <lig.fnst@cn.fujitsu.com>
+S: Maintained
+F: hw/*/allwinner-a10*
+F: include/hw/*/allwinner-a10*
+F: hw/arm/cubieboard.c
+
 Exynos
 M: Evgeny Voevodin <e.voevodin@samsung.com>
 M: Maksim Kozlov <m.kozlov@samsung.com>
@@ -233,6 +240,12 @@ S: Supported
 F: hw/arm/highbank.c
 F: hw/net/xgmac.c

+Canon DIGIC
+M: Antony Pavlov <antonynpavlov@gmail.com>
+S: Maintained
+F: include/hw/arm/digic.h
+F: hw/*/digic*
+
 Gumstix
 M: qemu-devel@nongnu.org
 S: Orphan
@@ -500,9 +513,23 @@ X86 Machines
 ------------
 PC
 M: Anthony Liguori <aliguori@amazon.com>
+M: Michael S. Tsirkin <mst@redhat.com>
 S: Supported
-F: hw/i386/pc.[ch]
-F: hw/i386/pc_piix.c
+F: include/hw/i386/
+F: hw/i386/
+F: hw/pci-host/piix.c
+F: hw/pci-host/q35.c
+F: hw/pci-host/pam.c
+F: include/hw/pci-host/q35.h
+F: include/hw/pci-host/pam.h
+F: hw/isa/piix4.c
+F: hw/isa/lpc_ich9.c
+F: hw/i2c/smbus_ich9.c
+F: hw/acpi/piix4.c
+F: hw/acpi/ich9.c
+F: include/hw/acpi/ich9.h
+F: include/hw/acpi/piix.h
+

 Xtensa Machines
 ---------------
@@ -583,6 +610,7 @@ F: hw/*/*vhost*

 virtio
 M: Anthony Liguori <aliguori@amazon.com>
+M: Michael S. Tsirkin <mst@redhat.com>
 S: Supported
 F: hw/*/virtio*

@@ -687,6 +715,7 @@ F: ui/

 Cocoa graphics
 M: Andreas Färber <andreas.faerber@web.de>
+M: Peter Maydell <peter.maydell@linaro.org>
 S: Odd Fixes
 F: ui/cocoa.m

@@ -697,7 +726,7 @@ F: vl.c

 Human Monitor (HMP)
 M: Luiz Capitulino <lcapitulino@redhat.com>
-S: Supported
+S: Maintained
 F: monitor.c
 F: hmp.c
 F: hmp-commands.hx
@@ -710,6 +739,14 @@ S: Maintained
 F: net/
 T: git git://github.com/stefanha/qemu.git net

+Netmap network backend
+M: Luigi Rizzo <rizzo@iet.unipi.it>
+M: Giuseppe Lettieri <g.lettieri@iet.unipi.it>
+M: Vincenzo Maffione <v.maffione@gmail.com>
+W: http://info.iet.unipi.it/~luigi/netmap/
+S: Maintained
+F: net/netmap.c
+
 Network Block Device (NBD)
 M: Paolo Bonzini <pbonzini@redhat.com>
 S: Odd Fixes
@@ -721,7 +758,7 @@ T: git git://github.com/bonzini/qemu.git nbd-next
 QAPI
 M: Luiz Capitulino <lcapitulino@redhat.com>
 M: Michael Roth <mdroth@linux.vnet.ibm.com>
-S: Supported
+S: Maintained
 F: qapi/
 T: git git://repo.or.cz/qemu/qmp-unstable.git queue/qmp

@@ -735,7 +772,7 @@ T: git git://repo.or.cz/qemu/qmp-unstable.git queue/qmp

 QMP
 M: Luiz Capitulino <lcapitulino@redhat.com>
-S: Supported
+S: Maintained
 F: qmp.c
 F: monitor.c
 F: qmp-commands.hx
@@ -900,6 +937,11 @@ M: Peter Lieven <pl@kamp.de>
 S: Supported
 F: block/iscsi.c

+NFS
+M: Peter Lieven <pl@kamp.de>
+S: Maintained
+F: block/nfs.c
+
 SSH
 M: Richard W.M. Jones <rjones@redhat.com>
 S: Supported
--- a/39
+++ b/39
@@ -57,6 +57,11 @@ GENERATED_HEADERS += trace/generated-tracers-dtrace.h
 endif
 GENERATED_SOURCES += trace/generated-tracers.c

+ifeq ($(TRACE_BACKEND),ust)
+GENERATED_HEADERS += trace/generated-ust-provider.h
+GENERATED_SOURCES += trace/generated-ust.c
+endif
+
 # Don't try to regenerate Makefile or configure
 # We don't generate any of them
 Makefile: ;
@@ -122,13 +127,29 @@ defconfig:

 ifneq ($(wildcard config-host.mak),)
 include $(SRC_PATH)/Makefile.objs
+endif
+
+dummy := $(call unnest-vars,, \
+                stub-obj-y \
+                util-obj-y \
+                qga-obj-y \
+                block-obj-y \
+                block-obj-m \
+                common-obj-y \
+                common-obj-m)
+
+ifneq ($(wildcard config-host.mak),)
 include $(SRC_PATH)/tests/Makefile
 endif
 ifeq ($(CONFIG_SMARTCARD_NSS),y)
 include $(SRC_PATH)/libcacard/Makefile
 endif

-all: $(DOCS) $(TOOLS) $(HELPERS-y) recurse-all
+all: $(DOCS) $(TOOLS) $(HELPERS-y) recurse-all modules
+
+vl.o: QEMU_CFLAGS+=$(GPROF_CFLAGS)
+
+vl.o: QEMU_CFLAGS+=$(SDL_CFLAGS)

 config-host.h: config-host.h-timestamp
 config-host.h-timestamp: config-host.mak
@@ -138,6 +159,7 @@ qemu-options.def: $(SRC_PATH)/qemu-options.hx
 SUBDIR_RULES=$(patsubst %,subdir-%, $(TARGET_DIRS))
 SOFTMMU_SUBDIR_RULES=$(filter %-softmmu,$(SUBDIR_RULES))

+$(SOFTMMU_SUBDIR_RULES): $(block-obj-y)
 $(SOFTMMU_SUBDIR_RULES): config-all-devices.mak

 subdir-%:
@@ -187,6 +209,9 @@ Makefile: $(version-obj-y) $(version-lobj-y)
 libqemustub.a: $(stub-obj-y)
 libqemuutil.a: $(util-obj-y) qapi-types.o qapi-visit.o

+block-modules = $(foreach o,$(block-obj-m),"$(basename $(subst /,-,$o))",) NULL
+util/module.o-cflags = -D'CONFIG_BLOCK_MODULES=$(block-modules)'
+
 ######################################################################

 qemu-img.o: qemu-img-cmds.h
@@ -242,6 +267,8 @@ clean:
 	rm -f qemu-options.def
 	find . -name '*.[oda]' -type f -exec rm -f {} +
 	find . -name '*.l[oa]' -type f -exec rm -f {} +
+	find . -name '*$(DSOSUF)' -type f -exec rm -f {} +
+	find . -name '*.mo' -type f -exec rm -f {} +
 	rm -f $(filter-out %.tlb,$(TOOLS)) $(HELPERS-y) qemu-ga TAGS cscope.* *.pod *~ */*~
 	rm -f fsdev/*.pod
 	rm -rf .libs */.libs
@@ -290,10 +317,10 @@ common  de-ch  es     fo  fr-ca  hu     ja  mk  nl-be      pt  sl     tr \
 bepo    cz

 ifdef INSTALL_BLOBS
-BLOBS=bios.bin sgabios.bin vgabios.bin vgabios-cirrus.bin \
+BLOBS=bios.bin bios-256k.bin sgabios.bin vgabios.bin vgabios-cirrus.bin \
 vgabios-stdvga.bin vgabios-vmware.bin vgabios-qxl.bin \
 acpi-dsdt.aml q35-acpi-dsdt.aml \
-ppc_rom.bin openbios-sparc32 openbios-sparc64 openbios-ppc QEMU,tcx.bin \
+ppc_rom.bin openbios-sparc32 openbios-sparc64 openbios-ppc QEMU,tcx.bin QEMU,cgthree.bin \
 pxe-e1000.rom pxe-eepro100.rom pxe-ne2k_pci.rom \
 pxe-pcnet.rom pxe-rtl8139.rom pxe-virtio.rom \
 efi-e1000.rom efi-eepro100.rom efi-ne2k_pci.rom \
@@ -349,6 +376,12 @@ install-datadir install-localstatedir
 ifneq ($(TOOLS),)
 	$(INSTALL_PROG) $(STRIP_OPT) $(TOOLS) "$(DESTDIR)$(bindir)"
 endif
+ifneq ($(CONFIG_MODULES),)
+	$(INSTALL_DIR) "$(DESTDIR)$(qemu_moddir)"
+	for s in $(patsubst %.mo,%$(DSOSUF),$(modules-m)); do \
+		$(INSTALL_PROG) $(STRIP_OPT) $$s "$(DESTDIR)$(qemu_moddir)/$${s//\//-}"; \
+	done
+endif
 ifneq ($(HELPERS-y),)
 	$(INSTALL_DIR) "$(DESTDIR)$(libexecdir)"
 	$(INSTALL_PROG) $(STRIP_OPT) $(HELPERS-y) "$(DESTDIR)$(libexecdir)"
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -19,6 +19,8 @@ block-obj-y += qemu-coroutine.o qemu-coroutine-lock.o qemu-coroutine-io.o
 block-obj-y += qemu-coroutine-sleep.o
 block-obj-y += coroutine-$(CONFIG_COROUTINE_BACKEND).o

+block-obj-m = block/
+
 ifeq ($(CONFIG_VIRTIO)$(CONFIG_VIRTFS)$(CONFIG_PCI),yyy)
 # Lots of the fsdev/9pcode is pulled in by vl.c via qemu_fsdev_add.
 # only pull in the actual virtio-9p device if we also enabled virtio.
@@ -41,9 +43,8 @@ libcacard-y += libcacard/vcardt.o
 # single QEMU executable should support all CPUs and machines.

 ifeq ($(CONFIG_SOFTMMU),y)
-common-obj-y = $(block-obj-y) blockdev.o blockdev-nbd.o block/
+common-obj-y = blockdev.o blockdev-nbd.o block/
 common-obj-y += net/
-common-obj-y += readline.o
 common-obj-y += qdev-monitor.o device-hotplug.o
 common-obj-$(CONFIG_WIN32) += os-win32.o
 common-obj-$(CONFIG_POSIX) += os-posix.o
@@ -51,6 +52,8 @@ common-obj-$(CONFIG_POSIX) += os-posix.o
 common-obj-$(CONFIG_LINUX) += fsdev/

 common-obj-y += migration.o migration-tcp.o
+common-obj-y += vmstate.o
+common-obj-y += qemu-file.o
 common-obj-$(CONFIG_RDMA) += migration-rdma.o
 common-obj-y += qemu-char.o #aio.o
 common-obj-y += block-migration.o
@@ -110,18 +113,3 @@ version-lobj-$(CONFIG_WIN32) += $(BUILD_DIR)/version.lo
 # by libqemuutil.a.  These should be moved to a separate .json schema.
 qga-obj-y = qga/ qapi-types.o qapi-visit.o
 qga-vss-dll-obj-y = qga/
-
-vl.o: QEMU_CFLAGS+=$(GPROF_CFLAGS)
-
-vl.o: QEMU_CFLAGS+=$(SDL_CFLAGS)
-
-QEMU_CFLAGS+=$(GLIB_CFLAGS)
-
-nested-vars += \
-	stub-obj-y \
-	util-obj-y \
-	qga-obj-y \
-	qga-vss-dll-obj-y \
-	block-obj-y \
-	common-obj-y
-dummy := $(call unnest-vars)
--- a/Makefile.target
+++ b/Makefile.target
@@ -130,8 +130,6 @@ else
 obj-y += hw/$(TARGET_BASE_ARCH)/
 endif

-main.o: QEMU_CFLAGS+=$(GPROF_CFLAGS)
-
 GENERATED_HEADERS += hmp-commands.h qmp-commands-old.h

 endif # CONFIG_SOFTMMU
@@ -139,13 +137,26 @@ endif # CONFIG_SOFTMMU
 # Workaround for http://gcc.gnu.org/PR55489, see configure.
 %/translate.o: QEMU_CFLAGS += $(TRANSLATE_OPT_CFLAGS)

-nested-vars += obj-y
+dummy := $(call unnest-vars,,obj-y)

-# This resolves all nested paths, so it must come last
+# we are making another call to unnest-vars with different vars, protect obj-y,
+# it can be overriden in subdir Makefile.objs
+obj-y-save := $(obj-y)
+
+block-obj-y :=
+common-obj-y :=
 include $(SRC_PATH)/Makefile.objs
+dummy := $(call unnest-vars,.., \
+               block-obj-y \
+               block-obj-m \
+               common-obj-y \
+               common-obj-m)

-all-obj-y = $(obj-y)
-all-obj-y += $(addprefix ../, $(common-obj-y))
+# Now restore obj-y
+obj-y := $(obj-y-save)
+
+all-obj-y = $(obj-y) $(common-obj-y)
+all-obj-$(CONFIG_SOFTMMU) += $(block-obj-y)

 ifndef CONFIG_HAIKU
 LIBS+=-lm
--- a/arch_init.c
+++ b/arch_init.c
@@ -48,7 +48,9 @@
 #include "qmp-commands.h"
 #include "trace.h"
 #include "exec/cpu-all.h"
+#include "exec/ram_addr.h"
 #include "hw/acpi/acpi.h"
+#include "qemu/host-utils.h"

 #ifdef DEBUG_ARCH_INIT
 #define DPRINTF(fmt, ...) \
@@ -120,7 +122,6 @@ static void check_guest_throttling(void);
 #define RAM_SAVE_FLAG_XBZRLE   0x40
 /* 0x80 is reserved in migration.h start with 0x100 next */

-
 static struct defconfig_file {
    const char *filename;
    /* Indicates it is an user config file (disabled by -no-user-config) */
@@ -131,6 +132,7 @@ static struct defconfig_file {
    { NULL }, /* end of list */
 };

+static const uint8_t ZERO_TARGET_PAGE[TARGET_PAGE_SIZE];

 int qemu_read_default_config_files(bool userconfig)
 {
@@ -162,20 +164,22 @@ static struct {
    uint8_t *encoded_buf;
    /* buffer for storing page content */
    uint8_t *current_buf;
-    /* buffer used for XBZRLE decoding */
-    uint8_t *decoded_buf;
    /* Cache for XBZRLE */
    PageCache *cache;
 } XBZRLE = {
    .encoded_buf = NULL,
    .current_buf = NULL,
-    .decoded_buf = NULL,
    .cache = NULL,
 };
-
+/* buffer used for XBZRLE decoding */
+static uint8_t *xbzrle_decoded_buf;

 int64_t xbzrle_cache_resize(int64_t new_size)
 {
+    if (new_size < TARGET_PAGE_SIZE) {
+        return -1;
+    }
+
    if (XBZRLE.cache != NULL) {
        return cache_resize(XBZRLE.cache, new_size / TARGET_PAGE_SIZE) *
            TARGET_PAGE_SIZE;
@@ -269,6 +273,34 @@ static size_t save_block_hdr(QEMUFile *f, RAMBlock *block, ram_addr_t offset,
    return size;
 }

+/* This is the last block that we have visited serching for dirty pages
+ */
+static RAMBlock *last_seen_block;
+/* This is the last block from where we have sent data */
+static RAMBlock *last_sent_block;
+static ram_addr_t last_offset;
+static unsigned long *migration_bitmap;
+static uint64_t migration_dirty_pages;
+static uint32_t last_version;
+static bool ram_bulk_stage;
+
+/* Update the xbzrle cache to reflect a page that's been sent as all 0.
+ * The important thing is that a stale (not-yet-0'd) page be replaced
+ * by the new data.
+ * As a bonus, if the page wasn't in the cache it gets added so that
+ * when a small write is made into the 0'd page it gets XBZRLE sent
+ */
+static void xbzrle_cache_zero_page(ram_addr_t current_addr)
+{
+    if (ram_bulk_stage || !migrate_use_xbzrle()) {
+        return;
+    }
+
+    /* We don't care if this fails to allocate a new cache page
+     * as long as it updated an old one */
+    cache_insert(XBZRLE.cache, current_addr, ZERO_TARGET_PAGE);
+}
+
 #define ENCODING_FLAG_XBZRLE 0x1

 static int save_xbzrle_page(QEMUFile *f, uint8_t *current_data,
@@ -280,7 +312,9 @@ static int save_xbzrle_page(QEMUFile *f, uint8_t *current_data,

    if (!cache_is_cached(XBZRLE.cache, current_addr)) {
        if (!last_stage) {
-            cache_insert(XBZRLE.cache, current_addr, current_data);
+            if (cache_insert(XBZRLE.cache, current_addr, current_data) == -1) {
+                return -1;
+            }
        }
        acct_info.xbzrle_cache_miss++;
        return -1;
@@ -323,18 +357,6 @@ static int save_xbzrle_page(QEMUFile *f, uint8_t *current_data,
    return bytes_sent;
 }

-
-/* This is the last block that we have visited serching for dirty pages
- */
-static RAMBlock *last_seen_block;
-/* This is the last block from where we have sent data */
-static RAMBlock *last_sent_block;
-static ram_addr_t last_offset;
-static unsigned long *migration_bitmap;
-static uint64_t migration_dirty_pages;
-static uint32_t last_version;
-static bool ram_bulk_stage;
-
 static inline
 ram_addr_t migration_bitmap_find_and_reset_dirty(MemoryRegion *mr,
                                                 ram_addr_t start)
@@ -359,11 +381,10 @@ ram_addr_t migration_bitmap_find_and_reset_dirty(MemoryRegion *mr,
    return (next - base) << TARGET_PAGE_BITS;
 }

-static inline bool migration_bitmap_set_dirty(MemoryRegion *mr,
-                                              ram_addr_t offset)
+static inline bool migration_bitmap_set_dirty(ram_addr_t addr)
 {
    bool ret;
-    int nr = (mr->ram_addr + offset) >> TARGET_PAGE_BITS;
+    int nr = addr >> TARGET_PAGE_BITS;

    ret = test_and_set_bit(nr, migration_bitmap);

@@ -373,12 +394,47 @@ static inline bool migration_bitmap_set_dirty(MemoryRegion *mr,
    return ret;
 }

+static void migration_bitmap_sync_range(ram_addr_t start, ram_addr_t length)
+{
+    ram_addr_t addr;
+    unsigned long page = BIT_WORD(start >> TARGET_PAGE_BITS);
+
+    /* start address is aligned at the start of a word? */
+    if (((page * BITS_PER_LONG) << TARGET_PAGE_BITS) == start) {
+        int k;
+        int nr = BITS_TO_LONGS(length >> TARGET_PAGE_BITS);
+        unsigned long *src = ram_list.dirty_memory[DIRTY_MEMORY_MIGRATION];
+
+        for (k = page; k < page + nr; k++) {
+            if (src[k]) {
+                unsigned long new_dirty;
+                new_dirty = ~migration_bitmap[k];
+                migration_bitmap[k] |= src[k];
+                new_dirty &= src[k];
+                migration_dirty_pages += ctpopl(new_dirty);
+                src[k] = 0;
+            }
+        }
+    } else {
+        for (addr = 0; addr < length; addr += TARGET_PAGE_SIZE) {
+            if (cpu_physical_memory_get_dirty(start + addr,
+                                              TARGET_PAGE_SIZE,
+                                              DIRTY_MEMORY_MIGRATION)) {
+                cpu_physical_memory_reset_dirty(start + addr,
+                                                TARGET_PAGE_SIZE,
+                                                DIRTY_MEMORY_MIGRATION);
+                migration_bitmap_set_dirty(start + addr);
+            }
+        }
+    }
+}
+
+
 /* Needs iothread lock! */

 static void migration_bitmap_sync(void)
 {
    RAMBlock *block;
-    ram_addr_t addr;
    uint64_t num_dirty_pages_init = migration_dirty_pages;
    MigrationState *s = migrate_get_current();
    static int64_t start_time;
@@ -399,13 +455,7 @@ static void migration_bitmap_sync(void)
    address_space_sync_dirty_bitmap(&address_space_memory);

    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
-        for (addr = 0; addr < block->length; addr += TARGET_PAGE_SIZE) {
-            if (memory_region_test_and_clear_dirty(block->mr,
-                                                   addr, TARGET_PAGE_SIZE,
-                                                   DIRTY_MEMORY_MIGRATION)) {
-                migration_bitmap_set_dirty(block->mr, addr);
-            }
-        }
+        migration_bitmap_sync_range(block->mr->ram_addr, block->length);
    }
    trace_migration_bitmap_sync_end(migration_dirty_pages
                                    - num_dirty_pages_init);
@@ -478,6 +528,7 @@ static int ram_save_block(QEMUFile *f, bool last_stage)
        } else {
            int ret;
            uint8_t *p;
+            bool send_async = true;
            int cont = (block == last_sent_block) ?
                RAM_SAVE_FLAG_CONTINUE : 0;

@@ -488,6 +539,7 @@ static int ram_save_block(QEMUFile *f, bool last_stage)
            ret = ram_control_save_page(f, block->offset,
                               offset, TARGET_PAGE_SIZE, &bytes_sent);

+            current_addr = block->offset + offset;
            if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
                if (ret != RAM_SAVE_CONTROL_DELAYED) {
                    if (bytes_sent > 0) {
@@ -502,19 +554,35 @@ static int ram_save_block(QEMUFile *f, bool last_stage)
                                            RAM_SAVE_FLAG_COMPRESS);
                qemu_put_byte(f, 0);
                bytes_sent++;
+                /* Must let xbzrle know, otherwise a previous (now 0'd) cached
+                 * page would be stale
+                 */
+                xbzrle_cache_zero_page(current_addr);
            } else if (!ram_bulk_stage && migrate_use_xbzrle()) {
-                current_addr = block->offset + offset;
                bytes_sent = save_xbzrle_page(f, p, current_addr, block,
                                              offset, cont, last_stage);
                if (!last_stage) {
+                    /* We must send exactly what's in the xbzrle cache
+                     * even if the page wasn't xbzrle compressed, so that
+                     * it's right next time.
+                     */
                    p = get_cached_data(XBZRLE.cache, current_addr);
+
+                    /* Can't send this cached data async, since the cache page
+                     * might get updated before it gets to the wire
+                     */
+                    send_async = false;
                }
            }

            /* XBZRLE overflow or normal page */
            if (bytes_sent == -1) {
                bytes_sent = save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_PAGE);
-                qemu_put_buffer_async(f, p, TARGET_PAGE_SIZE);
+                if (send_async) {
+                    qemu_put_buffer_async(f, p, TARGET_PAGE_SIZE);
+                } else {
+                    qemu_put_buffer(f, p, TARGET_PAGE_SIZE);
+                }
                bytes_sent += TARGET_PAGE_SIZE;
                acct_info.norm_pages++;
            }
@@ -572,6 +640,12 @@ uint64_t ram_bytes_total(void)
    return total;
 }

+void free_xbzrle_decoded_buf(void)
+{
+    g_free(xbzrle_decoded_buf);
+    xbzrle_decoded_buf = NULL;
+}
+
 static void migration_end(void)
 {
    if (migration_bitmap) {
@@ -585,8 +659,9 @@ static void migration_end(void)
        g_free(XBZRLE.cache);
        g_free(XBZRLE.encoded_buf);
        g_free(XBZRLE.current_buf);
-        g_free(XBZRLE.decoded_buf);
        XBZRLE.cache = NULL;
+        XBZRLE.encoded_buf = NULL;
+        XBZRLE.current_buf = NULL;
    }
 }

@@ -625,8 +700,22 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
            DPRINTF("Error creating cache\n");
            return -1;
        }
-        XBZRLE.encoded_buf = g_malloc0(TARGET_PAGE_SIZE);
-        XBZRLE.current_buf = g_malloc(TARGET_PAGE_SIZE);
+
+        /* We prefer not to abort if there is no memory */
+        XBZRLE.encoded_buf = g_try_malloc0(TARGET_PAGE_SIZE);
+        if (!XBZRLE.encoded_buf) {
+            DPRINTF("Error allocating encoded_buf\n");
+            return -1;
+        }
+
+        XBZRLE.current_buf = g_try_malloc(TARGET_PAGE_SIZE);
+        if (!XBZRLE.current_buf) {
+            DPRINTF("Error allocating current_buf\n");
+            g_free(XBZRLE.encoded_buf);
+            XBZRLE.encoded_buf = NULL;
+            return -1;
+        }
+
        acct_clear();
    }

@@ -777,8 +866,8 @@ static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
    unsigned int xh_len;
    int xh_flags;

-    if (!XBZRLE.decoded_buf) {
-        XBZRLE.decoded_buf = g_malloc(TARGET_PAGE_SIZE);
+    if (!xbzrle_decoded_buf) {
+        xbzrle_decoded_buf = g_malloc(TARGET_PAGE_SIZE);
    }

    /* extract RLE header */
@@ -795,10 +884,10 @@ static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
        return -1;
    }
    /* load data and decode */
-    qemu_get_buffer(f, XBZRLE.decoded_buf, xh_len);
+    qemu_get_buffer(f, xbzrle_decoded_buf, xh_len);

    /* decode RLE */
-    ret = xbzrle_decode_buffer(XBZRLE.decoded_buf, xh_len, host,
+    ret = xbzrle_decode_buffer(xbzrle_decoded_buf, xh_len, host,
                               TARGET_PAGE_SIZE);
    if (ret == -1) {
        fprintf(stderr, "Failed to load XBZRLE page - decode error!\n");
--- a/audio/spiceaudio.c
+++ b/audio/spiceaudio.c
@@ -25,8 +25,17 @@
 #include "audio.h"
 #include "audio_int.h"

-#define LINE_IN_SAMPLES 1024
-#define LINE_OUT_SAMPLES 1024
+#if SPICE_INTERFACE_PLAYBACK_MAJOR > 1 || SPICE_INTERFACE_PLAYBACK_MINOR >= 3
+#define LINE_OUT_SAMPLES (480 * 4)
+#else
+#define LINE_OUT_SAMPLES (256 * 4)
+#endif
+
+#if SPICE_INTERFACE_RECORD_MAJOR > 2 || SPICE_INTERFACE_RECORD_MINOR >= 3
+#define LINE_IN_SAMPLES (480 * 4)
+#else
+#define LINE_IN_SAMPLES (256 * 4)
+#endif

 typedef struct SpiceRateCtl {
    int64_t               start_ticks;
@@ -111,7 +120,11 @@ static int line_out_init (HWVoiceOut *hw, struct audsettings *as)
    SpiceVoiceOut *out = container_of (hw, SpiceVoiceOut, hw);
    struct audsettings settings;

+#if SPICE_INTERFACE_PLAYBACK_MAJOR > 1 || SPICE_INTERFACE_PLAYBACK_MINOR >= 3
+    settings.freq       = spice_server_get_best_playback_rate(NULL);
+#else
    settings.freq       = SPICE_INTERFACE_PLAYBACK_FREQ;
+#endif
    settings.nchannels  = SPICE_INTERFACE_PLAYBACK_CHAN;
    settings.fmt        = AUD_FMT_S16;
    settings.endianness = AUDIO_HOST_ENDIANNESS;
@@ -122,6 +135,9 @@ static int line_out_init (HWVoiceOut *hw, struct audsettings *as)

    out->sin.base.sif = &playback_sif.base;
    qemu_spice_add_interface (&out->sin.base);
+#if SPICE_INTERFACE_PLAYBACK_MAJOR > 1 || SPICE_INTERFACE_PLAYBACK_MINOR >= 3
+    spice_server_set_playback_rate(&out->sin, settings.freq);
+#endif
    return 0;
 }

@@ -232,7 +248,11 @@ static int line_in_init (HWVoiceIn *hw, struct audsettings *as)
    SpiceVoiceIn *in = container_of (hw, SpiceVoiceIn, hw);
    struct audsettings settings;

+#if SPICE_INTERFACE_RECORD_MAJOR > 2 || SPICE_INTERFACE_RECORD_MINOR >= 3
+    settings.freq       = spice_server_get_best_record_rate(NULL);
+#else
    settings.freq       = SPICE_INTERFACE_RECORD_FREQ;
+#endif
    settings.nchannels  = SPICE_INTERFACE_RECORD_CHAN;
    settings.fmt        = AUD_FMT_S16;
    settings.endianness = AUDIO_HOST_ENDIANNESS;
@@ -243,6 +263,9 @@ static int line_in_init (HWVoiceIn *hw, struct audsettings *as)

    in->sin.base.sif = &record_sif.base;
    qemu_spice_add_interface (&in->sin.base);
+#if SPICE_INTERFACE_RECORD_MAJOR > 2 || SPICE_INTERFACE_RECORD_MINOR >= 3
+    spice_server_set_record_rate(&in->sin, settings.freq);
+#endif
    return 0;
 }

--- a/backends/baum.c
+++ b/backends/baum.c
@@ -566,7 +566,7 @@ CharDriverState *chr_baum_init(void)
    BaumDriverState *baum;
    CharDriverState *chr;
    brlapi_handle_t *handle;
-#ifdef CONFIG_SDL
+#if defined(CONFIG_SDL) && SDL_COMPILEDVERSION < SDL_VERSIONNUM(2, 0, 0)
    SDL_SysWMinfo info;
 #endif
    int tty;
@@ -595,7 +595,7 @@ CharDriverState *chr_baum_init(void)
        goto fail;
    }

-#ifdef CONFIG_SDL
+#if defined(CONFIG_SDL) && SDL_COMPILEDVERSION < SDL_VERSIONNUM(2, 0, 0)
    memset(&info, 0, sizeof(info));
    SDL_VERSION(&info.version);
    if (SDL_GetWMInfo(&info))
--- a/backends/rng-random.c
+++ b/backends/rng-random.c
@@ -123,15 +123,15 @@ static void rng_random_init(Object *obj)
                            NULL);

    s->filename = g_strdup("/dev/random");
+    s->fd = -1;
 }

 static void rng_random_finalize(Object *obj)
 {
    RndRandom *s = RNG_RANDOM(obj);

-    qemu_set_fd_handler(s->fd, NULL, NULL, NULL);
-
    if (s->fd != -1) {
+        qemu_set_fd_handler(s->fd, NULL, NULL, NULL);
        qemu_close(s->fd);
    }

--- a/backends/rng.c
+++ b/backends/rng.c
@@ -12,6 +12,7 @@

 #include "sysemu/rng.h"
 #include "qapi/qmp/qerror.h"
+#include "qom/object_interfaces.h"

 void rng_backend_request_entropy(RngBackend *s, size_t size,
                                 EntropyReceiveFunc *receive_entropy,
@@ -40,9 +41,9 @@ static bool rng_backend_prop_get_opened(Object *obj, Error **errp)
    return s->opened;
 }

-void rng_backend_open(RngBackend *s, Error **errp)
+static void rng_backend_complete(UserCreatable *uc, Error **errp)
 {
-    object_property_set_bool(OBJECT(s), true, "opened", errp);
+    object_property_set_bool(OBJECT(uc), true, "opened", errp);
 }

 static void rng_backend_prop_set_opened(Object *obj, bool value, Error **errp)
@@ -76,13 +77,25 @@ static void rng_backend_init(Object *obj)
                             NULL);
 }

+static void rng_backend_class_init(ObjectClass *oc, void *data)
+{
+    UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc);
+
+    ucc->complete = rng_backend_complete;
+}
+
 static const TypeInfo rng_backend_info = {
    .name = TYPE_RNG_BACKEND,
    .parent = TYPE_OBJECT,
    .instance_size = sizeof(RngBackend),
    .instance_init = rng_backend_init,
    .class_size = sizeof(RngBackendClass),
+    .class_init = rng_backend_class_init,
    .abstract = true,
+    .interfaces = (InterfaceInfo[]) {
+        { TYPE_USER_CREATABLE },
+        { }
+    }
 };

 static void register_types(void)
--- a/block.c
+++ b/block.c
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -3,6 +3,7 @@ block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-c
 block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o
 block-obj-y += qed-check.o
 block-obj-$(CONFIG_VHDX) += vhdx.o vhdx-endian.o vhdx-log.o
+block-obj-$(CONFIG_QUORUM) += quorum.o
 block-obj-y += parallels.o blkdebug.o blkverify.o
 block-obj-y += snapshot.o qapi.o
 block-obj-$(CONFIG_WIN32) += raw-win32.o win32-aio.o
@@ -10,8 +11,9 @@ block-obj-$(CONFIG_POSIX) += raw-posix.o
 block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o

 ifeq ($(CONFIG_POSIX),y)
-block-obj-y += nbd.o sheepdog.o
+block-obj-y += nbd.o nbd-client.o sheepdog.o
 block-obj-$(CONFIG_LIBISCSI) += iscsi.o
+block-obj-$(CONFIG_LIBNFS) += nfs.o
 block-obj-$(CONFIG_CURL) += curl.o
 block-obj-$(CONFIG_RBD) += rbd.o
 block-obj-$(CONFIG_GLUSTERFS) += gluster.o
@@ -23,4 +25,15 @@ common-obj-y += commit.o
 common-obj-y += mirror.o
 common-obj-y += backup.o

-$(obj)/curl.o: QEMU_CFLAGS+=$(CURL_CFLAGS)
+iscsi.o-cflags     := $(LIBISCSI_CFLAGS)
+iscsi.o-libs       := $(LIBISCSI_LIBS)
+curl.o-cflags      := $(CURL_CFLAGS)
+curl.o-libs        := $(CURL_LIBS)
+rbd.o-cflags       := $(RBD_CFLAGS)
+rbd.o-libs         := $(RBD_LIBS)
+gluster.o-cflags   := $(GLUSTERFS_CFLAGS)
+gluster.o-libs     := $(GLUSTERFS_LIBS)
+ssh.o-cflags       := $(LIBSSH2_CFLAGS)
+ssh.o-libs         := $(LIBSSH2_LIBS)
+qcow.o-libs        := -lz
+linux-aio.o-libs   := -laio
--- a/block/backup.c
+++ b/block/backup.c
@@ -181,8 +181,13 @@ static int coroutine_fn backup_before_write_notify(
        void *opaque)
 {
    BdrvTrackedRequest *req = opaque;
+    int64_t sector_num = req->offset >> BDRV_SECTOR_BITS;
+    int nb_sectors = req->bytes >> BDRV_SECTOR_BITS;

-    return backup_do_cow(req->bs, req->sector_num, req->nb_sectors, NULL);
+    assert((req->offset & (BDRV_SECTOR_SIZE - 1)) == 0);
+    assert((req->bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
+
+    return backup_do_cow(req->bs, sector_num, nb_sectors, NULL);
 }

 static void backup_set_speed(BlockJob *job, int64_t speed, Error **errp)
--- a/block/blkdebug.c
+++ b/block/blkdebug.c
@@ -186,6 +186,14 @@ static const char *event_names[BLKDBG_EVENT_MAX] = {

    [BLKDBG_FLUSH_TO_OS]                    = "flush_to_os",
    [BLKDBG_FLUSH_TO_DISK]                  = "flush_to_disk",
+
+    [BLKDBG_PWRITEV_RMW_HEAD]               = "pwritev_rmw.head",
+    [BLKDBG_PWRITEV_RMW_AFTER_HEAD]         = "pwritev_rmw.after_head",
+    [BLKDBG_PWRITEV_RMW_TAIL]               = "pwritev_rmw.tail",
+    [BLKDBG_PWRITEV_RMW_AFTER_TAIL]         = "pwritev_rmw.after_tail",
+    [BLKDBG_PWRITEV]                        = "pwritev",
+    [BLKDBG_PWRITEV_ZERO]                   = "pwritev_zero",
+    [BLKDBG_PWRITEV_DONE]                   = "pwritev_done",
 };

 static int get_event_by_name(const char *name, BlkDebugEvent *event)
@@ -271,19 +279,33 @@ static void remove_rule(BlkdebugRule *rule)
    g_free(rule);
 }

-static int read_config(BDRVBlkdebugState *s, const char *filename)
+static int read_config(BDRVBlkdebugState *s, const char *filename,
+                       QDict *options, Error **errp)
 {
-    FILE *f;
+    FILE *f = NULL;
    int ret;
    struct add_rule_data d;
+    Error *local_err = NULL;

-    f = fopen(filename, "r");
-    if (f == NULL) {
-        return -errno;
+    if (filename) {
+        f = fopen(filename, "r");
+        if (f == NULL) {
+            error_setg_errno(errp, errno, "Could not read blkdebug config file");
+            return -errno;
+        }
+
+        ret = qemu_config_parse(f, config_groups, filename);
+        if (ret < 0) {
+            error_setg(errp, "Could not parse blkdebug config file");
+            ret = -EINVAL;
+            goto fail;
+        }
    }

-    ret = qemu_config_parse(f, config_groups, filename);
-    if (ret < 0) {
+    qemu_config_parse_qdict(options, config_groups, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        ret = -EINVAL;
        goto fail;
    }

@@ -298,7 +320,9 @@ static int read_config(BDRVBlkdebugState *s, const char *filename)
 fail:
    qemu_opts_reset(&inject_error_opts);
    qemu_opts_reset(&set_state_opts);
-    fclose(f);
+    if (f) {
+        fclose(f);
+    }
    return ret;
 }

@@ -310,7 +334,9 @@ static void blkdebug_parse_filename(const char *filename, QDict *options,

    /* Parse the blkdebug: prefix */
    if (!strstart(filename, "blkdebug:", &filename)) {
-        error_setg(errp, "File name string must start with 'blkdebug:'");
+        /* There was no prefix; therefore, all options have to be already
+           present in the QDict (except for the filename) */
+        qdict_put(options, "x-image", qstring_from_str(filename));
        return;
    }

@@ -346,6 +372,11 @@ static QemuOptsList runtime_opts = {
            .type = QEMU_OPT_STRING,
            .help = "[internal use only, will be removed]",
        },
+        {
+            .name = "align",
+            .type = QEMU_OPT_SIZE,
+            .help = "Required alignment in bytes",
+        },
        { /* end of list */ }
    },
 };
@@ -356,46 +387,53 @@ static int blkdebug_open(BlockDriverState *bs, QDict *options, int flags,
    BDRVBlkdebugState *s = bs->opaque;
    QemuOpts *opts;
    Error *local_err = NULL;
-    const char *filename, *config;
+    const char *config;
+    uint64_t align;
    int ret;

-    opts = qemu_opts_create_nofail(&runtime_opts);
+    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
    qemu_opts_absorb_qdict(opts, options, &local_err);
-    if (error_is_set(&local_err)) {
+    if (local_err) {
        error_propagate(errp, local_err);
        ret = -EINVAL;
-        goto fail;
+        goto out;
    }

-    /* Read rules from config file */
+    /* Read rules from config file or command line options */
    config = qemu_opt_get(opts, "config");
-    if (config) {
-        ret = read_config(s, config);
-        if (ret < 0) {
-            error_setg_errno(errp, -ret, "Could not read blkdebug config file");
-            goto fail;
-        }
+    ret = read_config(s, config, options, errp);
+    if (ret) {
+        goto out;
    }

    /* Set initial state */
    s->state = 1;

    /* Open the backing file */
-    filename = qemu_opt_get(opts, "x-image");
-    if (filename == NULL) {
-        error_setg(errp, "Could not retrieve image file name");
-        ret = -EINVAL;
-        goto fail;
-    }
-
-    ret = bdrv_file_open(&bs->file, filename, NULL, flags, &local_err);
+    assert(bs->file == NULL);
+    ret = bdrv_open_image(&bs->file, qemu_opt_get(opts, "x-image"), options, "image",
+                          flags | BDRV_O_PROTOCOL, false, &local_err);
    if (ret < 0) {
        error_propagate(errp, local_err);
-        goto fail;
+        goto out;
+    }
+
+    /* Set request alignment */
+    align = qemu_opt_get_size(opts, "align", bs->request_alignment);
+    if (align > 0 && align < INT_MAX && !(align & (align - 1))) {
+        bs->request_alignment = align;
+    } else {
+        error_setg(errp, "Invalid alignment");
+        ret = -EINVAL;
+        goto fail_unref;
    }

    ret = 0;
-fail:
+    goto out;
+
+fail_unref:
+    bdrv_unref(bs->file);
+out:
    qemu_opts_del(opts);
    return ret;
 }
@@ -594,9 +632,9 @@ static int blkdebug_debug_breakpoint(BlockDriverState *bs, const char *event,
 static int blkdebug_debug_resume(BlockDriverState *bs, const char *tag)
 {
    BDRVBlkdebugState *s = bs->opaque;
-    BlkdebugSuspendedReq *r;
+    BlkdebugSuspendedReq *r, *next;

-    QLIST_FOREACH(r, &s->suspended_reqs, next) {
+    QLIST_FOREACH_SAFE(r, &s->suspended_reqs, next, next) {
        if (!strcmp(r->tag, tag)) {
            qemu_coroutine_enter(r->co, NULL);
            return 0;
@@ -609,7 +647,7 @@ static int blkdebug_debug_remove_breakpoint(BlockDriverState *bs,
                                            const char *tag)
 {
    BDRVBlkdebugState *s = bs->opaque;
-    BlkdebugSuspendedReq *r;
+    BlkdebugSuspendedReq *r, *r_next;
    BlkdebugRule *rule, *next;
    int i, ret = -ENOENT;

@@ -622,7 +660,7 @@ static int blkdebug_debug_remove_breakpoint(BlockDriverState *bs,
            }
        }
    }
-    QLIST_FOREACH(r, &s->suspended_reqs, next) {
+    QLIST_FOREACH_SAFE(r, &s->suspended_reqs, next, r_next) {
        if (!strcmp(r->tag, tag)) {
            qemu_coroutine_enter(r->co, NULL);
            ret = 0;
--- a/block/blkverify.c
+++ b/block/blkverify.c
@@ -78,7 +78,9 @@ static void blkverify_parse_filename(const char *filename, QDict *options,

    /* Parse the blkverify: prefix */
    if (!strstart(filename, "blkverify:", &filename)) {
-        error_setg(errp, "File name string must start with 'blkverify:'");
+        /* There was no prefix; therefore, all options have to be already
+           present in the QDict (except for the filename) */
+        qdict_put(options, "x-image", qstring_from_str(filename));
        return;
    }

@@ -122,44 +124,31 @@ static int blkverify_open(BlockDriverState *bs, QDict *options, int flags,
    BDRVBlkverifyState *s = bs->opaque;
    QemuOpts *opts;
    Error *local_err = NULL;
-    const char *filename, *raw;
    int ret;

-    opts = qemu_opts_create_nofail(&runtime_opts);
+    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
    qemu_opts_absorb_qdict(opts, options, &local_err);
-    if (error_is_set(&local_err)) {
+    if (local_err) {
        error_propagate(errp, local_err);
        ret = -EINVAL;
        goto fail;
    }

-    /* Parse the raw image filename */
-    raw = qemu_opt_get(opts, "x-raw");
-    if (raw == NULL) {
-        error_setg(errp, "Could not retrieve raw image filename");
-        ret = -EINVAL;
-        goto fail;
-    }
-
-    ret = bdrv_file_open(&bs->file, raw, NULL, flags, &local_err);
+    /* Open the raw file */
+    assert(bs->file == NULL);
+    ret = bdrv_open_image(&bs->file, qemu_opt_get(opts, "x-raw"), options,
+                          "raw", flags | BDRV_O_PROTOCOL, false, &local_err);
    if (ret < 0) {
        error_propagate(errp, local_err);
        goto fail;
    }

    /* Open the test file */
-    filename = qemu_opt_get(opts, "x-image");
-    if (filename == NULL) {
-        error_setg(errp, "Could not retrieve test image filename");
-        ret = -EINVAL;
-        goto fail;
-    }
-
-    s->test_file = bdrv_new("");
-    ret = bdrv_open(s->test_file, filename, NULL, flags, NULL, &local_err);
+    assert(s->test_file == NULL);
+    ret = bdrv_open_image(&s->test_file, qemu_opt_get(opts, "x-image"), options,
+                          "test", flags, false, &local_err);
    if (ret < 0) {
        error_propagate(errp, local_err);
-        bdrv_unref(s->test_file);
        s->test_file = NULL;
        goto fail;
    }
@@ -184,110 +173,6 @@ static int64_t blkverify_getlength(BlockDriverState *bs)
    return bdrv_getlength(s->test_file);
 }

-/**
- * Check that I/O vector contents are identical
- *
- * @a:          I/O vector
- * @b:          I/O vector
- * @ret:        Offset to first mismatching byte or -1 if match
- */
-static ssize_t blkverify_iovec_compare(QEMUIOVector *a, QEMUIOVector *b)
-{
-    int i;
-    ssize_t offset = 0;
-
-    assert(a->niov == b->niov);
-    for (i = 0; i < a->niov; i++) {
-        size_t len = 0;
-        uint8_t *p = (uint8_t *)a->iov[i].iov_base;
-        uint8_t *q = (uint8_t *)b->iov[i].iov_base;
-
-        assert(a->iov[i].iov_len == b->iov[i].iov_len);
-        while (len < a->iov[i].iov_len && *p++ == *q++) {
-            len++;
-        }
-
-        offset += len;
-
-        if (len != a->iov[i].iov_len) {
-            return offset;
-        }
-    }
-    return -1;
-}
-
-typedef struct {
-    int src_index;
-    struct iovec *src_iov;
-    void *dest_base;
-} IOVectorSortElem;
-
-static int sortelem_cmp_src_base(const void *a, const void *b)
-{
-    const IOVectorSortElem *elem_a = a;
-    const IOVectorSortElem *elem_b = b;
-
-    /* Don't overflow */
-    if (elem_a->src_iov->iov_base < elem_b->src_iov->iov_base) {
-        return -1;
-    } else if (elem_a->src_iov->iov_base > elem_b->src_iov->iov_base) {
-        return 1;
-    } else {
-        return 0;
-    }
-}
-
-static int sortelem_cmp_src_index(const void *a, const void *b)
-{
-    const IOVectorSortElem *elem_a = a;
-    const IOVectorSortElem *elem_b = b;
-
-    return elem_a->src_index - elem_b->src_index;
-}
-
-/**
- * Copy contents of I/O vector
- *
- * The relative relationships of overlapping iovecs are preserved.  This is
- * necessary to ensure identical semantics in the cloned I/O vector.
- */
-static void blkverify_iovec_clone(QEMUIOVector *dest, const QEMUIOVector *src,
-                                  void *buf)
-{
-    IOVectorSortElem sortelems[src->niov];
-    void *last_end;
-    int i;
-
-    /* Sort by source iovecs by base address */
-    for (i = 0; i < src->niov; i++) {
-        sortelems[i].src_index = i;
-        sortelems[i].src_iov = &src->iov[i];
-    }
-    qsort(sortelems, src->niov, sizeof(sortelems[0]), sortelem_cmp_src_base);
-
-    /* Allocate buffer space taking into account overlapping iovecs */
-    last_end = NULL;
-    for (i = 0; i < src->niov; i++) {
-        struct iovec *cur = sortelems[i].src_iov;
-        ptrdiff_t rewind = 0;
-
-        /* Detect overlap */
-        if (last_end && last_end > cur->iov_base) {
-            rewind = last_end - cur->iov_base;
-        }
-
-        sortelems[i].dest_base = buf - rewind;
-        buf += cur->iov_len - MIN(rewind, cur->iov_len);
-        last_end = MAX(cur->iov_base + cur->iov_len, last_end);
-    }
-
-    /* Sort by source iovec index and build destination iovec */
-    qsort(sortelems, src->niov, sizeof(sortelems[0]), sortelem_cmp_src_index);
-    for (i = 0; i < src->niov; i++) {
-        qemu_iovec_add(dest, sortelems[i].dest_base, src->iov[i].iov_len);
-    }
-}
-
 static BlkverifyAIOCB *blkverify_aio_get(BlockDriverState *bs, bool is_write,
                                         int64_t sector_num, QEMUIOVector *qiov,
                                         int nb_sectors,
@@ -351,7 +236,7 @@ static void blkverify_aio_cb(void *opaque, int ret)

 static void blkverify_verify_readv(BlkverifyAIOCB *acb)
 {
-    ssize_t offset = blkverify_iovec_compare(acb->qiov, &acb->raw_qiov);
+    ssize_t offset = qemu_iovec_compare(acb->qiov, &acb->raw_qiov);
    if (offset != -1) {
        blkverify_err(acb, "contents mismatch in sector %" PRId64,
                      acb->sector_num + (int64_t)(offset / BDRV_SECTOR_SIZE));
@@ -369,7 +254,7 @@ static BlockDriverAIOCB *blkverify_aio_readv(BlockDriverState *bs,
    acb->verify = blkverify_verify_readv;
    acb->buf = qemu_blockalign(bs->file, qiov->size);
    qemu_iovec_init(&acb->raw_qiov, acb->qiov->niov);
-    blkverify_iovec_clone(&acb->raw_qiov, qiov, acb->buf);
+    qemu_iovec_clone(&acb->raw_qiov, qiov, acb->buf);

    bdrv_aio_readv(s->test_file, sector_num, qiov, nb_sectors,
                   blkverify_aio_cb, acb);
@@ -417,7 +302,7 @@ static BlockDriver bdrv_blkverify = {
    .bdrv_aio_writev        = blkverify_aio_writev,
    .bdrv_aio_flush         = blkverify_aio_flush,

-    .bdrv_check_ext_snapshot = bdrv_check_ext_snapshot_forbidden,
+    .authorizations         = { true, false },
 };

 static void bdrv_blkverify_init(void)
--- a/block/bochs.c
+++ b/block/bochs.c
@@ -129,7 +129,8 @@ static int bochs_open(BlockDriverState *bs, QDict *options, int flags,
        strcmp(bochs.subtype, GROWING_TYPE) ||
 	((le32_to_cpu(bochs.version) != HEADER_VERSION) &&
 	(le32_to_cpu(bochs.version) != HEADER_V1))) {
-        return -EMEDIUMTYPE;
+        error_setg(errp, "Image not in Bochs format");
+        return -EINVAL;
    }

    if (le32_to_cpu(bochs.version) == HEADER_V1) {
--- a/block/commit.c
+++ b/block/commit.c
@@ -198,13 +198,7 @@ void commit_start(BlockDriverState *bs, BlockDriverState *base,
        return;
    }

-    /* Once we support top == active layer, remove this check */
-    if (top == bs) {
-        error_setg(errp,
-                   "Top image as the active layer is currently unsupported");
-        return;
-    }
-
+    assert(top != bs);
    if (top == base) {
        error_setg(errp, "Invalid files for merge: top and base are the same");
        return;
--- a/block/cow.c
+++ b/block/cow.c
@@ -74,7 +74,8 @@ static int cow_open(BlockDriverState *bs, QDict *options, int flags,
    }

    if (be32_to_cpu(cow_header.magic) != COW_MAGIC) {
-        ret = -EMEDIUMTYPE;
+        error_setg(errp, "Image not in COW format");
+        ret = -EINVAL;
        goto fail;
    }

@@ -82,7 +83,7 @@ static int cow_open(BlockDriverState *bs, QDict *options, int flags,
        char version[64];
        snprintf(version, sizeof(version),
               "COW version %d", cow_header.version);
-        qerror_report(QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
+        error_set(errp, QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
            bs->device_name, "cow", version);
        ret = -ENOTSUP;
        goto fail;
@@ -346,15 +347,15 @@ static int cow_create(const char *filename, QEMUOptionParameter *options,

    ret = bdrv_create_file(filename, options, &local_err);
    if (ret < 0) {
-        qerror_report_err(local_err);
-        error_free(local_err);
+        error_propagate(errp, local_err);
        return ret;
    }

-    ret = bdrv_file_open(&cow_bs, filename, NULL, BDRV_O_RDWR, &local_err);
+    cow_bs = NULL;
+    ret = bdrv_open(&cow_bs, filename, NULL, NULL,
+                    BDRV_O_RDWR | BDRV_O_PROTOCOL, NULL, &local_err);
    if (ret < 0) {
-        qerror_report_err(local_err);
-        error_free(local_err);
+        error_propagate(errp, local_err);
        return ret;
    }

--- a/block/curl.c
+++ b/block/curl.c
@@ -34,6 +34,11 @@
 #define DPRINTF(fmt, ...) do { } while (0)
 #endif

+#if LIBCURL_VERSION_NUM >= 0x071000
+/* The multi interface timer callback was introduced in 7.16.0 */
+#define NEED_CURL_TIMER_CALLBACK
+#endif
+
 #define PROTOCOLS (CURLPROTO_HTTP | CURLPROTO_HTTPS | \
                   CURLPROTO_FTP | CURLPROTO_FTPS | \
                   CURLPROTO_TFTP)
@@ -77,6 +82,7 @@ typedef struct CURLState

 typedef struct BDRVCURLState {
    CURLM *multi;
+    QEMUTimer timer;
    size_t len;
    CURLState states[CURL_NUM_STATES];
    char *url;
@@ -87,6 +93,23 @@ typedef struct BDRVCURLState {
 static void curl_clean_state(CURLState *s);
 static void curl_multi_do(void *arg);

+#ifdef NEED_CURL_TIMER_CALLBACK
+static int curl_timer_cb(CURLM *multi, long timeout_ms, void *opaque)
+{
+    BDRVCURLState *s = opaque;
+
+    DPRINTF("CURL: timer callback timeout_ms %ld\n", timeout_ms);
+    if (timeout_ms == -1) {
+        timer_del(&s->timer);
+    } else {
+        int64_t timeout_ns = (int64_t)timeout_ms * 1000 * 1000;
+        timer_mod(&s->timer,
+                  qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + timeout_ns);
+    }
+    return 0;
+}
+#endif
+
 static int curl_sock_cb(CURL *curl, curl_socket_t fd, int action,
                        void *s, void *sp)
 {
@@ -209,20 +232,10 @@ static int curl_find_buf(BDRVCURLState *s, size_t start, size_t len,
    return FIND_RET_NONE;
 }

-static void curl_multi_do(void *arg)
+static void curl_multi_read(BDRVCURLState *s)
 {
-    BDRVCURLState *s = (BDRVCURLState *)arg;
-    int running;
-    int r;
    int msgs_in_queue;

-    if (!s->multi)
-        return;
-
-    do {
-        r = curl_multi_socket_all(s->multi, &running);
-    } while(r == CURLM_CALL_MULTI_PERFORM);
-
    /* Try to find done transfers, so we can free the easy
     * handle again. */
    do {
@@ -266,6 +279,41 @@ static void curl_multi_do(void *arg)
    } while(msgs_in_queue);
 }

+static void curl_multi_do(void *arg)
+{
+    BDRVCURLState *s = (BDRVCURLState *)arg;
+    int running;
+    int r;
+
+    if (!s->multi) {
+        return;
+    }
+
+    do {
+        r = curl_multi_socket_all(s->multi, &running);
+    } while(r == CURLM_CALL_MULTI_PERFORM);
+
+    curl_multi_read(s);
+}
+
+static void curl_multi_timeout_do(void *arg)
+{
+#ifdef NEED_CURL_TIMER_CALLBACK
+    BDRVCURLState *s = (BDRVCURLState *)arg;
+    int running;
+
+    if (!s->multi) {
+        return;
+    }
+
+    curl_multi_socket_action(s->multi, CURL_SOCKET_TIMEOUT, 0, &running);
+
+    curl_multi_read(s);
+#else
+    abort();
+#endif
+}
+
 static CURLState *curl_init_state(BDRVCURLState *s)
 {
    CURLState *state = NULL;
@@ -408,30 +456,27 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags,
    static int inited = 0;

    if (flags & BDRV_O_RDWR) {
-        qerror_report(ERROR_CLASS_GENERIC_ERROR,
-                      "curl block device does not support writes");
+        error_setg(errp, "curl block device does not support writes");
        return -EROFS;
    }

-    opts = qemu_opts_create_nofail(&runtime_opts);
+    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
    qemu_opts_absorb_qdict(opts, options, &local_err);
-    if (error_is_set(&local_err)) {
-        qerror_report_err(local_err);
-        error_free(local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
        goto out_noclean;
    }

    s->readahead_size = qemu_opt_get_size(opts, "readahead", READ_AHEAD_SIZE);
    if ((s->readahead_size & 0x1ff) != 0) {
-        fprintf(stderr, "HTTP_READAHEAD_SIZE %zd is not a multiple of 512\n",
-                s->readahead_size);
+        error_setg(errp, "HTTP_READAHEAD_SIZE %zd is not a multiple of 512",
+                   s->readahead_size);
        goto out_noclean;
    }

    file = qemu_opt_get(opts, "url");
    if (file == NULL) {
-        qerror_report(ERROR_CLASS_GENERIC_ERROR, "curl block driver requires "
-                      "an 'url' option");
+        error_setg(errp, "curl block driver requires an 'url' option");
        goto out_noclean;
    }

@@ -473,12 +518,20 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags,
    curl_easy_cleanup(state->curl);
    state->curl = NULL;

+    aio_timer_init(bdrv_get_aio_context(bs), &s->timer,
+                   QEMU_CLOCK_REALTIME, SCALE_NS,
+                   curl_multi_timeout_do, s);
+
    // Now we know the file exists and its size, so let's
    // initialize the multi interface!

    s->multi = curl_multi_init();
    curl_multi_setopt(s->multi, CURLMOPT_SOCKETDATA, s);
    curl_multi_setopt(s->multi, CURLMOPT_SOCKETFUNCTION, curl_sock_cb);
+#ifdef NEED_CURL_TIMER_CALLBACK
+    curl_multi_setopt(s->multi, CURLMOPT_TIMERDATA, s);
+    curl_multi_setopt(s->multi, CURLMOPT_TIMERFUNCTION, curl_timer_cb);
+#endif
    curl_multi_do(s);

    qemu_opts_del(opts);
@@ -597,6 +650,9 @@ static void curl_close(BlockDriverState *bs)
    }
    if (s->multi)
        curl_multi_cleanup(s->multi);
+
+    timer_del(&s->timer);
+
    g_free(s->url);
 }

--- a/block/gluster.c
+++ b/block/gluster.c
@@ -21,19 +21,15 @@
 #include "qemu/uri.h"

 typedef struct GlusterAIOCB {
-    BlockDriverAIOCB common;
    int64_t size;
    int ret;
-    bool *finished;
    QEMUBH *bh;
+    Coroutine *coroutine;
 } GlusterAIOCB;

 typedef struct BDRVGlusterState {
    struct glfs *glfs;
-    int fds[2];
    struct glfs_fd *fd;
-    int event_reader_pos;
-    GlusterAIOCB *event_acb;
 } BDRVGlusterState;

 #define GLUSTER_FD_READ  0
@@ -49,11 +45,13 @@ typedef struct GlusterConf {

 static void qemu_gluster_gconf_free(GlusterConf *gconf)
 {
-    g_free(gconf->server);
-    g_free(gconf->volname);
-    g_free(gconf->image);
-    g_free(gconf->transport);
-    g_free(gconf);
+    if (gconf) {
+        g_free(gconf->server);
+        g_free(gconf->volname);
+        g_free(gconf->image);
+        g_free(gconf->transport);
+        g_free(gconf);
+    }
 }

 static int parse_volume_options(GlusterConf *gconf, char *path)
@@ -131,7 +129,7 @@ static int qemu_gluster_parseuri(GlusterConf *gconf, const char *filename)
    }

    /* transport */
-    if (!strcmp(uri->scheme, "gluster")) {
+    if (!uri->scheme || !strcmp(uri->scheme, "gluster")) {
        gconf->transport = g_strdup("tcp");
    } else if (!strcmp(uri->scheme, "gluster+tcp")) {
        gconf->transport = g_strdup("tcp");
@@ -167,7 +165,7 @@ static int qemu_gluster_parseuri(GlusterConf *gconf, const char *filename)
        }
        gconf->server = g_strdup(qp->p[0].value);
    } else {
-        gconf->server = g_strdup(uri->server);
+        gconf->server = g_strdup(uri->server ? uri->server : "localhost");
        gconf->port = uri->port;
    }

@@ -179,7 +177,8 @@ out:
    return ret;
 }

-static struct glfs *qemu_gluster_init(GlusterConf *gconf, const char *filename)
+static struct glfs *qemu_gluster_init(GlusterConf *gconf, const char *filename,
+                                      Error **errp)
 {
    struct glfs *glfs = NULL;
    int ret;
@@ -187,8 +186,8 @@ static struct glfs *qemu_gluster_init(GlusterConf *gconf, const char *filename)

    ret = qemu_gluster_parseuri(gconf, filename);
    if (ret < 0) {
-        error_report("Usage: file=gluster[+transport]://[server[:port]]/"
-            "volname/image[?socket=...]");
+        error_setg(errp, "Usage: file=gluster[+transport]://[server[:port]]/"
+                   "volname/image[?socket=...]");
        errno = -ret;
        goto out;
    }
@@ -215,9 +214,11 @@ static struct glfs *qemu_gluster_init(GlusterConf *gconf, const char *filename)

    ret = glfs_init(glfs);
    if (ret) {
-        error_report("Gluster connection failed for server=%s port=%d "
-             "volume=%s image=%s transport=%s", gconf->server, gconf->port,
-             gconf->volname, gconf->image, gconf->transport);
+        error_setg_errno(errp, errno,
+                         "Gluster connection failed for server=%s port=%d "
+                         "volume=%s image=%s transport=%s", gconf->server,
+                         gconf->port, gconf->volname, gconf->image,
+                         gconf->transport);
        goto out;
    }
    return glfs;
@@ -231,46 +232,32 @@ out:
    return NULL;
 }

-static void qemu_gluster_complete_aio(GlusterAIOCB *acb, BDRVGlusterState *s)
+static void qemu_gluster_complete_aio(void *opaque)
 {
-    int ret;
-    bool *finished = acb->finished;
-    BlockDriverCompletionFunc *cb = acb->common.cb;
-    void *opaque = acb->common.opaque;
+    GlusterAIOCB *acb = (GlusterAIOCB *)opaque;

-    if (!acb->ret || acb->ret == acb->size) {
-        ret = 0; /* Success */
-    } else if (acb->ret < 0) {
-        ret = acb->ret; /* Read/Write failed */
-    } else {
-        ret = -EIO; /* Partial read/write - fail it */
-    }
-
-    qemu_aio_release(acb);
-    cb(opaque, ret);
-    if (finished) {
-        *finished = true;
-    }
+    qemu_bh_delete(acb->bh);
+    acb->bh = NULL;
+    qemu_coroutine_enter(acb->coroutine, NULL);
 }

-static void qemu_gluster_aio_event_reader(void *opaque)
+/*
+ * AIO callback routine called from GlusterFS thread.
+ */
+static void gluster_finish_aiocb(struct glfs_fd *fd, ssize_t ret, void *arg)
 {
-    BDRVGlusterState *s = opaque;
-    ssize_t ret;
+    GlusterAIOCB *acb = (GlusterAIOCB *)arg;

-    do {
-        char *p = (char *)&s->event_acb;
+    if (!ret || ret == acb->size) {
+        acb->ret = 0; /* Success */
+    } else if (ret < 0) {
+        acb->ret = ret; /* Read/Write failed */
+    } else {
+        acb->ret = -EIO; /* Partial read/write - fail it */
+    }

-        ret = read(s->fds[GLUSTER_FD_READ], p + s->event_reader_pos,
-                   sizeof(s->event_acb) - s->event_reader_pos);
-        if (ret > 0) {
-            s->event_reader_pos += ret;
-            if (s->event_reader_pos == sizeof(s->event_acb)) {
-                s->event_reader_pos = 0;
-                qemu_gluster_complete_aio(s->event_acb, s);
-            }
-        }
-    } while (ret < 0 && errno == EINTR);
+    acb->bh = qemu_bh_new(qemu_gluster_complete_aio, acb);
+    qemu_bh_schedule(acb->bh);
 }

 /* TODO Convert to fine grained options */
@@ -287,60 +274,57 @@ static QemuOptsList runtime_opts = {
    },
 };

+static void qemu_gluster_parse_flags(int bdrv_flags, int *open_flags)
+{
+    assert(open_flags != NULL);
+
+    *open_flags |= O_BINARY;
+
+    if (bdrv_flags & BDRV_O_RDWR) {
+        *open_flags |= O_RDWR;
+    } else {
+        *open_flags |= O_RDONLY;
+    }
+
+    if ((bdrv_flags & BDRV_O_NOCACHE)) {
+        *open_flags |= O_DIRECT;
+    }
+}
+
 static int qemu_gluster_open(BlockDriverState *bs,  QDict *options,
                             int bdrv_flags, Error **errp)
 {
    BDRVGlusterState *s = bs->opaque;
-    int open_flags = O_BINARY;
+    int open_flags = 0;
    int ret = 0;
    GlusterConf *gconf = g_malloc0(sizeof(GlusterConf));
    QemuOpts *opts;
    Error *local_err = NULL;
    const char *filename;

-    opts = qemu_opts_create_nofail(&runtime_opts);
+    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
    qemu_opts_absorb_qdict(opts, options, &local_err);
-    if (error_is_set(&local_err)) {
-        qerror_report_err(local_err);
-        error_free(local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
        ret = -EINVAL;
        goto out;
    }

    filename = qemu_opt_get(opts, "filename");

-
-    s->glfs = qemu_gluster_init(gconf, filename);
+    s->glfs = qemu_gluster_init(gconf, filename, errp);
    if (!s->glfs) {
        ret = -errno;
        goto out;
    }

-    if (bdrv_flags & BDRV_O_RDWR) {
-        open_flags |= O_RDWR;
-    } else {
-        open_flags |= O_RDONLY;
-    }
-
-    if ((bdrv_flags & BDRV_O_NOCACHE)) {
-        open_flags |= O_DIRECT;
-    }
+    qemu_gluster_parse_flags(bdrv_flags, &open_flags);

    s->fd = glfs_open(s->glfs, gconf->image, open_flags);
    if (!s->fd) {
        ret = -errno;
-        goto out;
    }

-    ret = qemu_pipe(s->fds);
-    if (ret < 0) {
-        ret = -errno;
-        goto out;
-    }
-    fcntl(s->fds[GLUSTER_FD_READ], F_SETFL, O_NONBLOCK);
-    qemu_aio_set_fd_handler(s->fds[GLUSTER_FD_READ],
-        qemu_gluster_aio_event_reader, NULL, s);
-
 out:
    qemu_opts_del(opts);
    qemu_gluster_gconf_free(gconf);
@@ -356,24 +340,180 @@ out:
    return ret;
 }

+typedef struct BDRVGlusterReopenState {
+    struct glfs *glfs;
+    struct glfs_fd *fd;
+} BDRVGlusterReopenState;
+
+
+static int qemu_gluster_reopen_prepare(BDRVReopenState *state,
+                                       BlockReopenQueue *queue, Error **errp)
+{
+    int ret = 0;
+    BDRVGlusterReopenState *reop_s;
+    GlusterConf *gconf = NULL;
+    int open_flags = 0;
+
+    assert(state != NULL);
+    assert(state->bs != NULL);
+
+    state->opaque = g_malloc0(sizeof(BDRVGlusterReopenState));
+    reop_s = state->opaque;
+
+    qemu_gluster_parse_flags(state->flags, &open_flags);
+
+    gconf = g_malloc0(sizeof(GlusterConf));
+
+    reop_s->glfs = qemu_gluster_init(gconf, state->bs->filename, errp);
+    if (reop_s->glfs == NULL) {
+        ret = -errno;
+        goto exit;
+    }
+
+    reop_s->fd = glfs_open(reop_s->glfs, gconf->image, open_flags);
+    if (reop_s->fd == NULL) {
+        /* reops->glfs will be cleaned up in _abort */
+        ret = -errno;
+        goto exit;
+    }
+
+exit:
+    /* state->opaque will be freed in either the _abort or _commit */
+    qemu_gluster_gconf_free(gconf);
+    return ret;
+}
+
+static void qemu_gluster_reopen_commit(BDRVReopenState *state)
+{
+    BDRVGlusterReopenState *reop_s = state->opaque;
+    BDRVGlusterState *s = state->bs->opaque;
+
+
+    /* close the old */
+    if (s->fd) {
+        glfs_close(s->fd);
+    }
+    if (s->glfs) {
+        glfs_fini(s->glfs);
+    }
+
+    /* use the newly opened image / connection */
+    s->fd         = reop_s->fd;
+    s->glfs       = reop_s->glfs;
+
+    g_free(state->opaque);
+    state->opaque = NULL;
+
+    return;
+}
+
+
+static void qemu_gluster_reopen_abort(BDRVReopenState *state)
+{
+    BDRVGlusterReopenState *reop_s = state->opaque;
+
+    if (reop_s == NULL) {
+        return;
+    }
+
+    if (reop_s->fd) {
+        glfs_close(reop_s->fd);
+    }
+
+    if (reop_s->glfs) {
+        glfs_fini(reop_s->glfs);
+    }
+
+    g_free(state->opaque);
+    state->opaque = NULL;
+
+    return;
+}
+
+#ifdef CONFIG_GLUSTERFS_ZEROFILL
+static coroutine_fn int qemu_gluster_co_write_zeroes(BlockDriverState *bs,
+        int64_t sector_num, int nb_sectors, BdrvRequestFlags flags)
+{
+    int ret;
+    GlusterAIOCB *acb = g_slice_new(GlusterAIOCB);
+    BDRVGlusterState *s = bs->opaque;
+    off_t size = nb_sectors * BDRV_SECTOR_SIZE;
+    off_t offset = sector_num * BDRV_SECTOR_SIZE;
+
+    acb->size = size;
+    acb->ret = 0;
+    acb->coroutine = qemu_coroutine_self();
+
+    ret = glfs_zerofill_async(s->fd, offset, size, &gluster_finish_aiocb, acb);
+    if (ret < 0) {
+        ret = -errno;
+        goto out;
+    }
+
+    qemu_coroutine_yield();
+    ret = acb->ret;
+
+out:
+    g_slice_free(GlusterAIOCB, acb);
+    return ret;
+}
+
+static inline bool gluster_supports_zerofill(void)
+{
+    return 1;
+}
+
+static inline int qemu_gluster_zerofill(struct glfs_fd *fd, int64_t offset,
+        int64_t size)
+{
+    return glfs_zerofill(fd, offset, size);
+}
+
+#else
+static inline bool gluster_supports_zerofill(void)
+{
+    return 0;
+}
+
+static inline int qemu_gluster_zerofill(struct glfs_fd *fd, int64_t offset,
+        int64_t size)
+{
+    return 0;
+}
+#endif
+
 static int qemu_gluster_create(const char *filename,
        QEMUOptionParameter *options, Error **errp)
 {
    struct glfs *glfs;
    struct glfs_fd *fd;
    int ret = 0;
+    int prealloc = 0;
    int64_t total_size = 0;
    GlusterConf *gconf = g_malloc0(sizeof(GlusterConf));

-    glfs = qemu_gluster_init(gconf, filename);
+    glfs = qemu_gluster_init(gconf, filename, errp);
    if (!glfs) {
-        ret = -errno;
+        ret = -EINVAL;
        goto out;
    }

    while (options && options->name) {
        if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
            total_size = options->value.n / BDRV_SECTOR_SIZE;
+        } else if (!strcmp(options->name, BLOCK_OPT_PREALLOC)) {
+            if (!options->value.s || !strcmp(options->value.s, "off")) {
+                prealloc = 0;
+            } else if (!strcmp(options->value.s, "full") &&
+                    gluster_supports_zerofill()) {
+                prealloc = 1;
+            } else {
+                error_setg(errp, "Invalid preallocation mode: '%s'"
+                    " or GlusterFS doesn't support zerofill API",
+                           options->value.s);
+                ret = -EINVAL;
+                goto out;
+            }
        }
        options++;
    }
@@ -383,9 +523,15 @@ static int qemu_gluster_create(const char *filename,
    if (!fd) {
        ret = -errno;
    } else {
-        if (glfs_ftruncate(fd, total_size * BDRV_SECTOR_SIZE) != 0) {
+        if (!glfs_ftruncate(fd, total_size * BDRV_SECTOR_SIZE)) {
+            if (prealloc && qemu_gluster_zerofill(fd, 0,
+                    total_size * BDRV_SECTOR_SIZE)) {
+                ret = -errno;
+            }
+        } else {
            ret = -errno;
        }
+
        if (glfs_close(fd) != 0) {
            ret = -errno;
        }
@@ -398,58 +544,18 @@ out:
    return ret;
 }

-static void qemu_gluster_aio_cancel(BlockDriverAIOCB *blockacb)
-{
-    GlusterAIOCB *acb = (GlusterAIOCB *)blockacb;
-    bool finished = false;
-
-    acb->finished = &finished;
-    while (!finished) {
-        qemu_aio_wait();
-    }
-}
-
-static const AIOCBInfo gluster_aiocb_info = {
-    .aiocb_size = sizeof(GlusterAIOCB),
-    .cancel = qemu_gluster_aio_cancel,
-};
-
-static void gluster_finish_aiocb(struct glfs_fd *fd, ssize_t ret, void *arg)
-{
-    GlusterAIOCB *acb = (GlusterAIOCB *)arg;
-    BlockDriverState *bs = acb->common.bs;
-    BDRVGlusterState *s = bs->opaque;
-    int retval;
-
-    acb->ret = ret;
-    retval = qemu_write_full(s->fds[GLUSTER_FD_WRITE], &acb, sizeof(acb));
-    if (retval != sizeof(acb)) {
-        /*
-         * Gluster AIO callback thread failed to notify the waiting
-         * QEMU thread about IO completion.
-         */
-        error_report("Gluster AIO completion failed: %s", strerror(errno));
-        abort();
-    }
-}
-
-static BlockDriverAIOCB *qemu_gluster_aio_rw(BlockDriverState *bs,
-        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockDriverCompletionFunc *cb, void *opaque, int write)
+static coroutine_fn int qemu_gluster_co_rw(BlockDriverState *bs,
+        int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, int write)
 {
    int ret;
-    GlusterAIOCB *acb;
+    GlusterAIOCB *acb = g_slice_new(GlusterAIOCB);
    BDRVGlusterState *s = bs->opaque;
-    size_t size;
-    off_t offset;
+    size_t size = nb_sectors * BDRV_SECTOR_SIZE;
+    off_t offset = sector_num * BDRV_SECTOR_SIZE;

-    offset = sector_num * BDRV_SECTOR_SIZE;
-    size = nb_sectors * BDRV_SECTOR_SIZE;
-
-    acb = qemu_aio_get(&gluster_aiocb_info, bs, cb, opaque);
    acb->size = size;
    acb->ret = 0;
-    acb->finished = NULL;
+    acb->coroutine = qemu_coroutine_self();

    if (write) {
        ret = glfs_pwritev_async(s->fd, qiov->iov, qiov->niov, offset, 0,
@@ -460,13 +566,16 @@ static BlockDriverAIOCB *qemu_gluster_aio_rw(BlockDriverState *bs,
    }

    if (ret < 0) {
+        ret = -errno;
        goto out;
    }
-    return &acb->common;
+
+    qemu_coroutine_yield();
+    ret = acb->ret;

 out:
-    qemu_aio_release(acb);
-    return NULL;
+    g_slice_free(GlusterAIOCB, acb);
+    return ret;
 }

 static int qemu_gluster_truncate(BlockDriverState *bs, int64_t offset)
@@ -482,71 +591,68 @@ static int qemu_gluster_truncate(BlockDriverState *bs, int64_t offset)
    return 0;
 }

-static BlockDriverAIOCB *qemu_gluster_aio_readv(BlockDriverState *bs,
-        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockDriverCompletionFunc *cb, void *opaque)
+static coroutine_fn int qemu_gluster_co_readv(BlockDriverState *bs,
+        int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
 {
-    return qemu_gluster_aio_rw(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
+    return qemu_gluster_co_rw(bs, sector_num, nb_sectors, qiov, 0);
 }

-static BlockDriverAIOCB *qemu_gluster_aio_writev(BlockDriverState *bs,
-        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockDriverCompletionFunc *cb, void *opaque)
+static coroutine_fn int qemu_gluster_co_writev(BlockDriverState *bs,
+        int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
 {
-    return qemu_gluster_aio_rw(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
+    return qemu_gluster_co_rw(bs, sector_num, nb_sectors, qiov, 1);
 }

-static BlockDriverAIOCB *qemu_gluster_aio_flush(BlockDriverState *bs,
-        BlockDriverCompletionFunc *cb, void *opaque)
+static coroutine_fn int qemu_gluster_co_flush_to_disk(BlockDriverState *bs)
 {
    int ret;
-    GlusterAIOCB *acb;
+    GlusterAIOCB *acb = g_slice_new(GlusterAIOCB);
    BDRVGlusterState *s = bs->opaque;

-    acb = qemu_aio_get(&gluster_aiocb_info, bs, cb, opaque);
    acb->size = 0;
    acb->ret = 0;
-    acb->finished = NULL;
+    acb->coroutine = qemu_coroutine_self();

    ret = glfs_fsync_async(s->fd, &gluster_finish_aiocb, acb);
    if (ret < 0) {
+        ret = -errno;
        goto out;
    }
-    return &acb->common;
+
+    qemu_coroutine_yield();
+    ret = acb->ret;

 out:
-    qemu_aio_release(acb);
-    return NULL;
+    g_slice_free(GlusterAIOCB, acb);
+    return ret;
 }

 #ifdef CONFIG_GLUSTERFS_DISCARD
-static BlockDriverAIOCB *qemu_gluster_aio_discard(BlockDriverState *bs,
-        int64_t sector_num, int nb_sectors, BlockDriverCompletionFunc *cb,
-        void *opaque)
+static coroutine_fn int qemu_gluster_co_discard(BlockDriverState *bs,
+        int64_t sector_num, int nb_sectors)
 {
    int ret;
-    GlusterAIOCB *acb;
+    GlusterAIOCB *acb = g_slice_new(GlusterAIOCB);
    BDRVGlusterState *s = bs->opaque;
-    size_t size;
-    off_t offset;
+    size_t size = nb_sectors * BDRV_SECTOR_SIZE;
+    off_t offset = sector_num * BDRV_SECTOR_SIZE;

-    offset = sector_num * BDRV_SECTOR_SIZE;
-    size = nb_sectors * BDRV_SECTOR_SIZE;
-
-    acb = qemu_aio_get(&gluster_aiocb_info, bs, cb, opaque);
    acb->size = 0;
    acb->ret = 0;
-    acb->finished = NULL;
+    acb->coroutine = qemu_coroutine_self();

    ret = glfs_discard_async(s->fd, offset, size, &gluster_finish_aiocb, acb);
    if (ret < 0) {
+        ret = -errno;
        goto out;
    }
-    return &acb->common;
+
+    qemu_coroutine_yield();
+    ret = acb->ret;

 out:
-    qemu_aio_release(acb);
-    return NULL;
+    g_slice_free(GlusterAIOCB, acb);
+    return ret;
 }
 #endif

@@ -581,10 +687,6 @@ static void qemu_gluster_close(BlockDriverState *bs)
 {
    BDRVGlusterState *s = bs->opaque;

-    close(s->fds[GLUSTER_FD_READ]);
-    close(s->fds[GLUSTER_FD_WRITE]);
-    qemu_aio_set_fd_handler(s->fds[GLUSTER_FD_READ], NULL, NULL, NULL);
-
    if (s->fd) {
        glfs_close(s->fd);
        s->fd = NULL;
@@ -604,6 +706,11 @@ static QEMUOptionParameter qemu_gluster_create_options[] = {
        .type = OPT_SIZE,
        .help = "Virtual disk size"
    },
+    {
+        .name = BLOCK_OPT_PREALLOC,
+        .type = OPT_STRING,
+        .help = "Preallocation mode (allowed values: off, full)"
+    },
    { NULL }
 };

@@ -613,17 +720,23 @@ static BlockDriver bdrv_gluster = {
    .instance_size                = sizeof(BDRVGlusterState),
    .bdrv_needs_filename          = true,
    .bdrv_file_open               = qemu_gluster_open,
+    .bdrv_reopen_prepare          = qemu_gluster_reopen_prepare,
+    .bdrv_reopen_commit           = qemu_gluster_reopen_commit,
+    .bdrv_reopen_abort            = qemu_gluster_reopen_abort,
    .bdrv_close                   = qemu_gluster_close,
    .bdrv_create                  = qemu_gluster_create,
    .bdrv_getlength               = qemu_gluster_getlength,
    .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size,
    .bdrv_truncate                = qemu_gluster_truncate,
-    .bdrv_aio_readv               = qemu_gluster_aio_readv,
-    .bdrv_aio_writev              = qemu_gluster_aio_writev,
-    .bdrv_aio_flush               = qemu_gluster_aio_flush,
+    .bdrv_co_readv                = qemu_gluster_co_readv,
+    .bdrv_co_writev               = qemu_gluster_co_writev,
+    .bdrv_co_flush_to_disk        = qemu_gluster_co_flush_to_disk,
    .bdrv_has_zero_init           = qemu_gluster_has_zero_init,
 #ifdef CONFIG_GLUSTERFS_DISCARD
-    .bdrv_aio_discard             = qemu_gluster_aio_discard,
+    .bdrv_co_discard              = qemu_gluster_co_discard,
+#endif
+#ifdef CONFIG_GLUSTERFS_ZEROFILL
+    .bdrv_co_write_zeroes         = qemu_gluster_co_write_zeroes,
 #endif
    .create_options               = qemu_gluster_create_options,
 };
@@ -634,17 +747,23 @@ static BlockDriver bdrv_gluster_tcp = {
    .instance_size                = sizeof(BDRVGlusterState),
    .bdrv_needs_filename          = true,
    .bdrv_file_open               = qemu_gluster_open,
+    .bdrv_reopen_prepare          = qemu_gluster_reopen_prepare,
+    .bdrv_reopen_commit           = qemu_gluster_reopen_commit,
+    .bdrv_reopen_abort            = qemu_gluster_reopen_abort,
    .bdrv_close                   = qemu_gluster_close,
    .bdrv_create                  = qemu_gluster_create,
    .bdrv_getlength               = qemu_gluster_getlength,
    .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size,
    .bdrv_truncate                = qemu_gluster_truncate,
-    .bdrv_aio_readv               = qemu_gluster_aio_readv,
-    .bdrv_aio_writev              = qemu_gluster_aio_writev,
-    .bdrv_aio_flush               = qemu_gluster_aio_flush,
+    .bdrv_co_readv                = qemu_gluster_co_readv,
+    .bdrv_co_writev               = qemu_gluster_co_writev,
+    .bdrv_co_flush_to_disk        = qemu_gluster_co_flush_to_disk,
    .bdrv_has_zero_init           = qemu_gluster_has_zero_init,
 #ifdef CONFIG_GLUSTERFS_DISCARD
-    .bdrv_aio_discard             = qemu_gluster_aio_discard,
+    .bdrv_co_discard              = qemu_gluster_co_discard,
+#endif
+#ifdef CONFIG_GLUSTERFS_ZEROFILL
+    .bdrv_co_write_zeroes         = qemu_gluster_co_write_zeroes,
 #endif
    .create_options               = qemu_gluster_create_options,
 };
@@ -655,17 +774,23 @@ static BlockDriver bdrv_gluster_unix = {
    .instance_size                = sizeof(BDRVGlusterState),
    .bdrv_needs_filename          = true,
    .bdrv_file_open               = qemu_gluster_open,
+    .bdrv_reopen_prepare          = qemu_gluster_reopen_prepare,
+    .bdrv_reopen_commit           = qemu_gluster_reopen_commit,
+    .bdrv_reopen_abort            = qemu_gluster_reopen_abort,
    .bdrv_close                   = qemu_gluster_close,
    .bdrv_create                  = qemu_gluster_create,
    .bdrv_getlength               = qemu_gluster_getlength,
    .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size,
    .bdrv_truncate                = qemu_gluster_truncate,
-    .bdrv_aio_readv               = qemu_gluster_aio_readv,
-    .bdrv_aio_writev              = qemu_gluster_aio_writev,
-    .bdrv_aio_flush               = qemu_gluster_aio_flush,
+    .bdrv_co_readv                = qemu_gluster_co_readv,
+    .bdrv_co_writev               = qemu_gluster_co_writev,
+    .bdrv_co_flush_to_disk        = qemu_gluster_co_flush_to_disk,
    .bdrv_has_zero_init           = qemu_gluster_has_zero_init,
 #ifdef CONFIG_GLUSTERFS_DISCARD
-    .bdrv_aio_discard             = qemu_gluster_aio_discard,
+    .bdrv_co_discard              = qemu_gluster_co_discard,
+#endif
+#ifdef CONFIG_GLUSTERFS_ZEROFILL
+    .bdrv_co_write_zeroes         = qemu_gluster_co_write_zeroes,
 #endif
    .create_options               = qemu_gluster_create_options,
 };
@@ -676,17 +801,23 @@ static BlockDriver bdrv_gluster_rdma = {
    .instance_size                = sizeof(BDRVGlusterState),
    .bdrv_needs_filename          = true,
    .bdrv_file_open               = qemu_gluster_open,
+    .bdrv_reopen_prepare          = qemu_gluster_reopen_prepare,
+    .bdrv_reopen_commit           = qemu_gluster_reopen_commit,
+    .bdrv_reopen_abort            = qemu_gluster_reopen_abort,
    .bdrv_close                   = qemu_gluster_close,
    .bdrv_create                  = qemu_gluster_create,
    .bdrv_getlength               = qemu_gluster_getlength,
    .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size,
    .bdrv_truncate                = qemu_gluster_truncate,
-    .bdrv_aio_readv               = qemu_gluster_aio_readv,
-    .bdrv_aio_writev              = qemu_gluster_aio_writev,
-    .bdrv_aio_flush               = qemu_gluster_aio_flush,
+    .bdrv_co_readv                = qemu_gluster_co_readv,
+    .bdrv_co_writev               = qemu_gluster_co_writev,
+    .bdrv_co_flush_to_disk        = qemu_gluster_co_flush_to_disk,
    .bdrv_has_zero_init           = qemu_gluster_has_zero_init,
 #ifdef CONFIG_GLUSTERFS_DISCARD
-    .bdrv_aio_discard             = qemu_gluster_aio_discard,
+    .bdrv_co_discard              = qemu_gluster_co_discard,
+#endif
+#ifdef CONFIG_GLUSTERFS_ZEROFILL
+    .bdrv_co_write_zeroes         = qemu_gluster_co_write_zeroes,
 #endif
    .create_options               = qemu_gluster_create_options,
 };
--- a/block/iscsi.c
+++ b/block/iscsi.c
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -31,7 +31,8 @@ typedef struct MirrorBlockJob {
    BlockJob common;
    RateLimit limit;
    BlockDriverState *target;
-    MirrorSyncMode mode;
+    BlockDriverState *base;
+    bool is_none_mode;
    BlockdevOnError on_source_error, on_target_error;
    bool synced;
    bool should_complete;
@@ -95,6 +96,7 @@ static void mirror_iteration_done(MirrorOp *op, int ret)
        bitmap_set(s->cow_bitmap, chunk_num, nb_chunks);
    }

+    qemu_iovec_destroy(&op->qiov);
    g_slice_free(MirrorOp, op);
    qemu_coroutine_enter(s->common.co, NULL);
 }
@@ -335,10 +337,9 @@ static void coroutine_fn mirror_run(void *opaque)
    sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS;
    mirror_free_init(s);

-    if (s->mode != MIRROR_SYNC_MODE_NONE) {
+    if (!s->is_none_mode) {
        /* First part, loop on the sectors and initialize the dirty bitmap.  */
-        BlockDriverState *base;
-        base = s->mode == MIRROR_SYNC_MODE_FULL ? NULL : bs->backing_hd;
+        BlockDriverState *base = s->base;
        for (sector_num = 0; sector_num < end; ) {
            int64_t next = (sector_num | (sectors_per_chunk - 1)) + 1;
            ret = bdrv_is_allocated_above(bs, base,
@@ -481,8 +482,14 @@ immediate_exit:
            bdrv_reopen(s->target, bdrv_get_flags(s->common.bs), NULL);
        }
        bdrv_swap(s->target, s->common.bs);
+        if (s->common.driver->job_type == BLOCK_JOB_TYPE_COMMIT) {
+            /* drop the bs loop chain formed by the swap: break the loop then
+             * trigger the unref from the top one */
+            BlockDriverState *p = s->base->backing_hd;
+            s->base->backing_hd = NULL;
+            bdrv_unref(p);
+        }
    }
-    bdrv_close(s->target);
    bdrv_unref(s->target);
    block_job_completed(&s->common, ret);
 }
@@ -536,12 +543,24 @@ static const BlockJobDriver mirror_job_driver = {
    .complete      = mirror_complete,
 };

-void mirror_start(BlockDriverState *bs, BlockDriverState *target,
-                  int64_t speed, int64_t granularity, int64_t buf_size,
-                  MirrorSyncMode mode, BlockdevOnError on_source_error,
-                  BlockdevOnError on_target_error,
-                  BlockDriverCompletionFunc *cb,
-                  void *opaque, Error **errp)
+static const BlockJobDriver commit_active_job_driver = {
+    .instance_size = sizeof(MirrorBlockJob),
+    .job_type      = BLOCK_JOB_TYPE_COMMIT,
+    .set_speed     = mirror_set_speed,
+    .iostatus_reset
+                   = mirror_iostatus_reset,
+    .complete      = mirror_complete,
+};
+
+static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
+                            int64_t speed, int64_t granularity,
+                            int64_t buf_size,
+                            BlockdevOnError on_source_error,
+                            BlockdevOnError on_target_error,
+                            BlockDriverCompletionFunc *cb,
+                            void *opaque, Error **errp,
+                            const BlockJobDriver *driver,
+                            bool is_none_mode, BlockDriverState *base)
 {
    MirrorBlockJob *s;

@@ -566,7 +585,8 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target,
        return;
    }

-    s = block_job_create(&mirror_job_driver, bs, speed, cb, opaque, errp);
+
+    s = block_job_create(driver, bs, speed, cb, opaque, errp);
    if (!s) {
        return;
    }
@@ -574,7 +594,8 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target,
    s->on_source_error = on_source_error;
    s->on_target_error = on_target_error;
    s->target = target;
-    s->mode = mode;
+    s->is_none_mode = is_none_mode;
+    s->base = base;
    s->granularity = granularity;
    s->buf_size = MAX(buf_size, granularity);

@@ -586,3 +607,80 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target,
    trace_mirror_start(bs, s, s->common.co, opaque);
    qemu_coroutine_enter(s->common.co, s);
 }
+
+void mirror_start(BlockDriverState *bs, BlockDriverState *target,
+                  int64_t speed, int64_t granularity, int64_t buf_size,
+                  MirrorSyncMode mode, BlockdevOnError on_source_error,
+                  BlockdevOnError on_target_error,
+                  BlockDriverCompletionFunc *cb,
+                  void *opaque, Error **errp)
+{
+    bool is_none_mode;
+    BlockDriverState *base;
+
+    is_none_mode = mode == MIRROR_SYNC_MODE_NONE;
+    base = mode == MIRROR_SYNC_MODE_TOP ? bs->backing_hd : NULL;
+    mirror_start_job(bs, target, speed, granularity, buf_size,
+                     on_source_error, on_target_error, cb, opaque, errp,
+                     &mirror_job_driver, is_none_mode, base);
+}
+
+void commit_active_start(BlockDriverState *bs, BlockDriverState *base,
+                         int64_t speed,
+                         BlockdevOnError on_error,
+                         BlockDriverCompletionFunc *cb,
+                         void *opaque, Error **errp)
+{
+    int64_t length, base_length;
+    int orig_base_flags;
+    int ret;
+    Error *local_err = NULL;
+
+    orig_base_flags = bdrv_get_flags(base);
+
+    if (bdrv_reopen(base, bs->open_flags, errp)) {
+        return;
+    }
+
+    length = bdrv_getlength(bs);
+    if (length < 0) {
+        error_setg_errno(errp, -length,
+                         "Unable to determine length of %s", bs->filename);
+        goto error_restore_flags;
+    }
+
+    base_length = bdrv_getlength(base);
+    if (base_length < 0) {
+        error_setg_errno(errp, -base_length,
+                         "Unable to determine length of %s", base->filename);
+        goto error_restore_flags;
+    }
+
+    if (length > base_length) {
+        ret = bdrv_truncate(base, length);
+        if (ret < 0) {
+            error_setg_errno(errp, -ret,
+                            "Top image %s is larger than base image %s, and "
+                             "resize of base image failed",
+                             bs->filename, base->filename);
+            goto error_restore_flags;
+        }
+    }
+
+    bdrv_ref(base);
+    mirror_start_job(bs, base, speed, 0, 0,
+                     on_error, on_error, cb, opaque, &local_err,
+                     &commit_active_job_driver, false, base);
+    if (error_is_set(&local_err)) {
+        error_propagate(errp, local_err);
+        goto error_restore_flags;
+    }
+
+    return;
+
+error_restore_flags:
+    /* ignore error and errp for bdrv_reopen, because we want to propagate
+     * the original error */
+    bdrv_reopen(base, orig_base_flags, NULL);
+    return;
+}
--- a/block/nbd-client.c
+++ b/block/nbd-client.c
@@ -0,0 +1,385 @@
+/*
+ * QEMU Block driver for  NBD
+ *
+ * Copyright (C) 2008 Bull S.A.S.
+ *     Author: Laurent Vivier <Laurent.Vivier@bull.net>
+ *
+ * Some parts:
+ *    Copyright (C) 2007 Anthony Liguori <anthony@codemonkey.ws>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "nbd-client.h"
+#include "qemu/sockets.h"
+
+#define HANDLE_TO_INDEX(bs, handle) ((handle) ^ ((uint64_t)(intptr_t)bs))
+#define INDEX_TO_HANDLE(bs, index)  ((index)  ^ ((uint64_t)(intptr_t)bs))
+
+static void nbd_recv_coroutines_enter_all(NbdClientSession *s)
+{
+    int i;
+
+    for (i = 0; i < MAX_NBD_REQUESTS; i++) {
+        if (s->recv_coroutine[i]) {
+            qemu_coroutine_enter(s->recv_coroutine[i], NULL);
+        }
+    }
+}
+
+static void nbd_reply_ready(void *opaque)
+{
+    NbdClientSession *s = opaque;
+    uint64_t i;
+    int ret;
+
+    if (s->reply.handle == 0) {
+        /* No reply already in flight.  Fetch a header.  It is possible
+         * that another thread has done the same thing in parallel, so
+         * the socket is not readable anymore.
+         */
+        ret = nbd_receive_reply(s->sock, &s->reply);
+        if (ret == -EAGAIN) {
+            return;
+        }
+        if (ret < 0) {
+            s->reply.handle = 0;
+            goto fail;
+        }
+    }
+
+    /* There's no need for a mutex on the receive side, because the
+     * handler acts as a synchronization point and ensures that only
+     * one coroutine is called until the reply finishes.  */
+    i = HANDLE_TO_INDEX(s, s->reply.handle);
+    if (i >= MAX_NBD_REQUESTS) {
+        goto fail;
+    }
+
+    if (s->recv_coroutine[i]) {
+        qemu_coroutine_enter(s->recv_coroutine[i], NULL);
+        return;
+    }
+
+fail:
+    nbd_recv_coroutines_enter_all(s);
+}
+
+static void nbd_restart_write(void *opaque)
+{
+    NbdClientSession *s = opaque;
+
+    qemu_coroutine_enter(s->send_coroutine, NULL);
+}
+
+static int nbd_co_send_request(NbdClientSession *s,
+    struct nbd_request *request,
+    QEMUIOVector *qiov, int offset)
+{
+    int rc, ret;
+
+    qemu_co_mutex_lock(&s->send_mutex);
+    s->send_coroutine = qemu_coroutine_self();
+    qemu_aio_set_fd_handler(s->sock, nbd_reply_ready, nbd_restart_write, s);
+    if (qiov) {
+        if (!s->is_unix) {
+            socket_set_cork(s->sock, 1);
+        }
+        rc = nbd_send_request(s->sock, request);
+        if (rc >= 0) {
+            ret = qemu_co_sendv(s->sock, qiov->iov, qiov->niov,
+                                offset, request->len);
+            if (ret != request->len) {
+                rc = -EIO;
+            }
+        }
+        if (!s->is_unix) {
+            socket_set_cork(s->sock, 0);
+        }
+    } else {
+        rc = nbd_send_request(s->sock, request);
+    }
+    qemu_aio_set_fd_handler(s->sock, nbd_reply_ready, NULL, s);
+    s->send_coroutine = NULL;
+    qemu_co_mutex_unlock(&s->send_mutex);
+    return rc;
+}
+
+static void nbd_co_receive_reply(NbdClientSession *s,
+    struct nbd_request *request, struct nbd_reply *reply,
+    QEMUIOVector *qiov, int offset)
+{
+    int ret;
+
+    /* Wait until we're woken up by the read handler.  TODO: perhaps
+     * peek at the next reply and avoid yielding if it's ours?  */
+    qemu_coroutine_yield();
+    *reply = s->reply;
+    if (reply->handle != request->handle) {
+        reply->error = EIO;
+    } else {
+        if (qiov && reply->error == 0) {
+            ret = qemu_co_recvv(s->sock, qiov->iov, qiov->niov,
+                                offset, request->len);
+            if (ret != request->len) {
+                reply->error = EIO;
+            }
+        }
+
+        /* Tell the read handler to read another header.  */
+        s->reply.handle = 0;
+    }
+}
+
+static void nbd_coroutine_start(NbdClientSession *s,
+   struct nbd_request *request)
+{
+    int i;
+
+    /* Poor man semaphore.  The free_sema is locked when no other request
+     * can be accepted, and unlocked after receiving one reply.  */
+    if (s->in_flight >= MAX_NBD_REQUESTS - 1) {
+        qemu_co_mutex_lock(&s->free_sema);
+        assert(s->in_flight < MAX_NBD_REQUESTS);
+    }
+    s->in_flight++;
+
+    for (i = 0; i < MAX_NBD_REQUESTS; i++) {
+        if (s->recv_coroutine[i] == NULL) {
+            s->recv_coroutine[i] = qemu_coroutine_self();
+            break;
+        }
+    }
+
+    assert(i < MAX_NBD_REQUESTS);
+    request->handle = INDEX_TO_HANDLE(s, i);
+}
+
+static void nbd_coroutine_end(NbdClientSession *s,
+    struct nbd_request *request)
+{
+    int i = HANDLE_TO_INDEX(s, request->handle);
+    s->recv_coroutine[i] = NULL;
+    if (s->in_flight-- == MAX_NBD_REQUESTS) {
+        qemu_co_mutex_unlock(&s->free_sema);
+    }
+}
+
+static int nbd_co_readv_1(NbdClientSession *client, int64_t sector_num,
+                          int nb_sectors, QEMUIOVector *qiov,
+                          int offset)
+{
+    struct nbd_request request = { .type = NBD_CMD_READ };
+    struct nbd_reply reply;
+    ssize_t ret;
+
+    request.from = sector_num * 512;
+    request.len = nb_sectors * 512;
+
+    nbd_coroutine_start(client, &request);
+    ret = nbd_co_send_request(client, &request, NULL, 0);
+    if (ret < 0) {
+        reply.error = -ret;
+    } else {
+        nbd_co_receive_reply(client, &request, &reply, qiov, offset);
+    }
+    nbd_coroutine_end(client, &request);
+    return -reply.error;
+
+}
+
+static int nbd_co_writev_1(NbdClientSession *client, int64_t sector_num,
+                           int nb_sectors, QEMUIOVector *qiov,
+                           int offset)
+{
+    struct nbd_request request = { .type = NBD_CMD_WRITE };
+    struct nbd_reply reply;
+    ssize_t ret;
+
+    if (!bdrv_enable_write_cache(client->bs) &&
+        (client->nbdflags & NBD_FLAG_SEND_FUA)) {
+        request.type |= NBD_CMD_FLAG_FUA;
+    }
+
+    request.from = sector_num * 512;
+    request.len = nb_sectors * 512;
+
+    nbd_coroutine_start(client, &request);
+    ret = nbd_co_send_request(client, &request, qiov, offset);
+    if (ret < 0) {
+        reply.error = -ret;
+    } else {
+        nbd_co_receive_reply(client, &request, &reply, NULL, 0);
+    }
+    nbd_coroutine_end(client, &request);
+    return -reply.error;
+}
+
+/* qemu-nbd has a limit of slightly less than 1M per request.  Try to
+ * remain aligned to 4K. */
+#define NBD_MAX_SECTORS 2040
+
+int nbd_client_session_co_readv(NbdClientSession *client, int64_t sector_num,
+    int nb_sectors, QEMUIOVector *qiov)
+{
+    int offset = 0;
+    int ret;
+    while (nb_sectors > NBD_MAX_SECTORS) {
+        ret = nbd_co_readv_1(client, sector_num,
+                             NBD_MAX_SECTORS, qiov, offset);
+        if (ret < 0) {
+            return ret;
+        }
+        offset += NBD_MAX_SECTORS * 512;
+        sector_num += NBD_MAX_SECTORS;
+        nb_sectors -= NBD_MAX_SECTORS;
+    }
+    return nbd_co_readv_1(client, sector_num, nb_sectors, qiov, offset);
+}
+
+int nbd_client_session_co_writev(NbdClientSession *client, int64_t sector_num,
+                                 int nb_sectors, QEMUIOVector *qiov)
+{
+    int offset = 0;
+    int ret;
+    while (nb_sectors > NBD_MAX_SECTORS) {
+        ret = nbd_co_writev_1(client, sector_num,
+                              NBD_MAX_SECTORS, qiov, offset);
+        if (ret < 0) {
+            return ret;
+        }
+        offset += NBD_MAX_SECTORS * 512;
+        sector_num += NBD_MAX_SECTORS;
+        nb_sectors -= NBD_MAX_SECTORS;
+    }
+    return nbd_co_writev_1(client, sector_num, nb_sectors, qiov, offset);
+}
+
+int nbd_client_session_co_flush(NbdClientSession *client)
+{
+    struct nbd_request request = { .type = NBD_CMD_FLUSH };
+    struct nbd_reply reply;
+    ssize_t ret;
+
+    if (!(client->nbdflags & NBD_FLAG_SEND_FLUSH)) {
+        return 0;
+    }
+
+    if (client->nbdflags & NBD_FLAG_SEND_FUA) {
+        request.type |= NBD_CMD_FLAG_FUA;
+    }
+
+    request.from = 0;
+    request.len = 0;
+
+    nbd_coroutine_start(client, &request);
+    ret = nbd_co_send_request(client, &request, NULL, 0);
+    if (ret < 0) {
+        reply.error = -ret;
+    } else {
+        nbd_co_receive_reply(client, &request, &reply, NULL, 0);
+    }
+    nbd_coroutine_end(client, &request);
+    return -reply.error;
+}
+
+int nbd_client_session_co_discard(NbdClientSession *client, int64_t sector_num,
+    int nb_sectors)
+{
+    struct nbd_request request = { .type = NBD_CMD_TRIM };
+    struct nbd_reply reply;
+    ssize_t ret;
+
+    if (!(client->nbdflags & NBD_FLAG_SEND_TRIM)) {
+        return 0;
+    }
+    request.from = sector_num * 512;
+    request.len = nb_sectors * 512;
+
+    nbd_coroutine_start(client, &request);
+    ret = nbd_co_send_request(client, &request, NULL, 0);
+    if (ret < 0) {
+        reply.error = -ret;
+    } else {
+        nbd_co_receive_reply(client, &request, &reply, NULL, 0);
+    }
+    nbd_coroutine_end(client, &request);
+    return -reply.error;
+
+}
+
+static void nbd_teardown_connection(NbdClientSession *client)
+{
+    struct nbd_request request = {
+        .type = NBD_CMD_DISC,
+        .from = 0,
+        .len = 0
+    };
+
+    nbd_send_request(client->sock, &request);
+
+    /* finish any pending coroutines */
+    shutdown(client->sock, 2);
+    nbd_recv_coroutines_enter_all(client);
+
+    qemu_aio_set_fd_handler(client->sock, NULL, NULL, NULL);
+    closesocket(client->sock);
+    client->sock = -1;
+}
+
+void nbd_client_session_close(NbdClientSession *client)
+{
+    if (!client->bs) {
+        return;
+    }
+
+    nbd_teardown_connection(client);
+    client->bs = NULL;
+}
+
+int nbd_client_session_init(NbdClientSession *client, BlockDriverState *bs,
+    int sock, const char *export)
+{
+    int ret;
+
+    /* NBD handshake */
+    logout("session init %s\n", export);
+    qemu_set_block(sock);
+    ret = nbd_receive_negotiate(sock, export,
+                                &client->nbdflags, &client->size,
+                                &client->blocksize);
+    if (ret < 0) {
+        logout("Failed to negotiate with the NBD server\n");
+        closesocket(sock);
+        return ret;
+    }
+
+    qemu_co_mutex_init(&client->send_mutex);
+    qemu_co_mutex_init(&client->free_sema);
+    client->bs = bs;
+    client->sock = sock;
+
+    /* Now that we're connected, set the socket to be non-blocking and
+     * kick the reply mechanism.  */
+    qemu_set_nonblock(sock);
+    qemu_aio_set_fd_handler(sock, nbd_reply_ready, NULL, client);
+
+    logout("Established connection with NBD server\n");
+    return 0;
+}
--- a/block/nbd-client.h
+++ b/block/nbd-client.h
@@ -0,0 +1,50 @@
+#ifndef NBD_CLIENT_H
+#define NBD_CLIENT_H
+
+#include "qemu-common.h"
+#include "block/nbd.h"
+#include "block/block_int.h"
+
+/* #define DEBUG_NBD */
+
+#if defined(DEBUG_NBD)
+#define logout(fmt, ...) \
+    fprintf(stderr, "nbd\t%-24s" fmt, __func__, ##__VA_ARGS__)
+#else
+#define logout(fmt, ...) ((void)0)
+#endif
+
+#define MAX_NBD_REQUESTS    16
+
+typedef struct NbdClientSession {
+    int sock;
+    uint32_t nbdflags;
+    off_t size;
+    size_t blocksize;
+
+    CoMutex send_mutex;
+    CoMutex free_sema;
+    Coroutine *send_coroutine;
+    int in_flight;
+
+    Coroutine *recv_coroutine[MAX_NBD_REQUESTS];
+    struct nbd_reply reply;
+
+    bool is_unix;
+
+    BlockDriverState *bs;
+} NbdClientSession;
+
+int nbd_client_session_init(NbdClientSession *client, BlockDriverState *bs,
+                            int sock, const char *export_name);
+void nbd_client_session_close(NbdClientSession *client);
+
+int nbd_client_session_co_discard(NbdClientSession *client, int64_t sector_num,
+                                  int nb_sectors);
+int nbd_client_session_co_flush(NbdClientSession *client);
+int nbd_client_session_co_writev(NbdClientSession *client, int64_t sector_num,
+                                 int nb_sectors, QEMUIOVector *qiov);
+int nbd_client_session_co_readv(NbdClientSession *client, int64_t sector_num,
+                                int nb_sectors, QEMUIOVector *qiov);
+
+#endif /* NBD_CLIENT_H */
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -26,8 +26,7 @@
 * THE SOFTWARE.
 */

-#include "qemu-common.h"
-#include "block/nbd.h"
+#include "block/nbd-client.h"
 #include "qemu/uri.h"
 #include "block/block_int.h"
 #include "qemu/module.h"
@@ -40,37 +39,9 @@

 #define EN_OPTSTR ":exportname="

-/* #define DEBUG_NBD */
-
-#if defined(DEBUG_NBD)
-#define logout(fmt, ...) \
-                fprintf(stderr, "nbd\t%-24s" fmt, __func__, ##__VA_ARGS__)
-#else
-#define logout(fmt, ...) ((void)0)
-#endif
-
-#define MAX_NBD_REQUESTS	16
-#define HANDLE_TO_INDEX(bs, handle) ((handle) ^ ((uint64_t)(intptr_t)bs))
-#define INDEX_TO_HANDLE(bs, index)  ((index)  ^ ((uint64_t)(intptr_t)bs))
-
 typedef struct BDRVNBDState {
-    int sock;
-    uint32_t nbdflags;
-    off_t size;
-    size_t blocksize;
-
-    CoMutex send_mutex;
-    CoMutex free_sema;
-    Coroutine *send_coroutine;
-    int in_flight;
-
-    Coroutine *recv_coroutine[MAX_NBD_REQUESTS];
-    struct nbd_reply reply;
-
-    bool is_unix;
+    NbdClientSession client;
    QemuOpts *socket_opts;
-
-    char *export_name; /* An NBD server may export several devices */
 } BDRVNBDState;

 static int nbd_parse_uri(const char *filename, QDict *options)
@@ -217,195 +188,49 @@ out:
    g_free(file);
 }

-static int nbd_config(BDRVNBDState *s, QDict *options)
+static void nbd_config(BDRVNBDState *s, QDict *options, char **export,
+                       Error **errp)
 {
    Error *local_err = NULL;

-    if (qdict_haskey(options, "path")) {
-        if (qdict_haskey(options, "host")) {
-            qerror_report(ERROR_CLASS_GENERIC_ERROR, "path and host may not "
-                          "be used at the same time.");
-            return -EINVAL;
+    if (qdict_haskey(options, "path") == qdict_haskey(options, "host")) {
+        if (qdict_haskey(options, "path")) {
+            error_setg(errp, "path and host may not be used at the same time.");
+        } else {
+            error_setg(errp, "one of path and host must be specified.");
        }
-        s->is_unix = true;
-    } else if (qdict_haskey(options, "host")) {
-        s->is_unix = false;
-    } else {
-        return -EINVAL;
+        return;
    }

-    s->socket_opts = qemu_opts_create_nofail(&socket_optslist);
+    s->client.is_unix = qdict_haskey(options, "path");
+    s->socket_opts = qemu_opts_create(&socket_optslist, NULL, 0,
+                                      &error_abort);

    qemu_opts_absorb_qdict(s->socket_opts, options, &local_err);
-    if (error_is_set(&local_err)) {
-        qerror_report_err(local_err);
-        error_free(local_err);
-        return -EINVAL;
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
    }

    if (!qemu_opt_get(s->socket_opts, "port")) {
        qemu_opt_set_number(s->socket_opts, "port", NBD_DEFAULT_PORT);
    }

-    s->export_name = g_strdup(qdict_get_try_str(options, "export"));
-    if (s->export_name) {
+    *export = g_strdup(qdict_get_try_str(options, "export"));
+    if (*export) {
        qdict_del(options, "export");
    }
-
-    return 0;
 }

-
-static void nbd_coroutine_start(BDRVNBDState *s, struct nbd_request *request)
-{
-    int i;
-
-    /* Poor man semaphore.  The free_sema is locked when no other request
-     * can be accepted, and unlocked after receiving one reply.  */
-    if (s->in_flight >= MAX_NBD_REQUESTS - 1) {
-        qemu_co_mutex_lock(&s->free_sema);
-        assert(s->in_flight < MAX_NBD_REQUESTS);
-    }
-    s->in_flight++;
-
-    for (i = 0; i < MAX_NBD_REQUESTS; i++) {
-        if (s->recv_coroutine[i] == NULL) {
-            s->recv_coroutine[i] = qemu_coroutine_self();
-            break;
-        }
-    }
-
-    assert(i < MAX_NBD_REQUESTS);
-    request->handle = INDEX_TO_HANDLE(s, i);
-}
-
-static void nbd_reply_ready(void *opaque)
-{
-    BDRVNBDState *s = opaque;
-    uint64_t i;
-    int ret;
-
-    if (s->reply.handle == 0) {
-        /* No reply already in flight.  Fetch a header.  It is possible
-         * that another thread has done the same thing in parallel, so
-         * the socket is not readable anymore.
-         */
-        ret = nbd_receive_reply(s->sock, &s->reply);
-        if (ret == -EAGAIN) {
-            return;
-        }
-        if (ret < 0) {
-            s->reply.handle = 0;
-            goto fail;
-        }
-    }
-
-    /* There's no need for a mutex on the receive side, because the
-     * handler acts as a synchronization point and ensures that only
-     * one coroutine is called until the reply finishes.  */
-    i = HANDLE_TO_INDEX(s, s->reply.handle);
-    if (i >= MAX_NBD_REQUESTS) {
-        goto fail;
-    }
-
-    if (s->recv_coroutine[i]) {
-        qemu_coroutine_enter(s->recv_coroutine[i], NULL);
-        return;
-    }
-
-fail:
-    for (i = 0; i < MAX_NBD_REQUESTS; i++) {
-        if (s->recv_coroutine[i]) {
-            qemu_coroutine_enter(s->recv_coroutine[i], NULL);
-        }
-    }
-}
-
-static void nbd_restart_write(void *opaque)
-{
-    BDRVNBDState *s = opaque;
-    qemu_coroutine_enter(s->send_coroutine, NULL);
-}
-
-static int nbd_co_send_request(BDRVNBDState *s, struct nbd_request *request,
-                               QEMUIOVector *qiov, int offset)
-{
-    int rc, ret;
-
-    qemu_co_mutex_lock(&s->send_mutex);
-    s->send_coroutine = qemu_coroutine_self();
-    qemu_aio_set_fd_handler(s->sock, nbd_reply_ready, nbd_restart_write, s);
-    if (qiov) {
-        if (!s->is_unix) {
-            socket_set_cork(s->sock, 1);
-        }
-        rc = nbd_send_request(s->sock, request);
-        if (rc >= 0) {
-            ret = qemu_co_sendv(s->sock, qiov->iov, qiov->niov,
-                                offset, request->len);
-            if (ret != request->len) {
-                rc = -EIO;
-            }
-        }
-        if (!s->is_unix) {
-            socket_set_cork(s->sock, 0);
-        }
-    } else {
-        rc = nbd_send_request(s->sock, request);
-    }
-    qemu_aio_set_fd_handler(s->sock, nbd_reply_ready, NULL, s);
-    s->send_coroutine = NULL;
-    qemu_co_mutex_unlock(&s->send_mutex);
-    return rc;
-}
-
-static void nbd_co_receive_reply(BDRVNBDState *s, struct nbd_request *request,
-                                 struct nbd_reply *reply,
-                                 QEMUIOVector *qiov, int offset)
-{
-    int ret;
-
-    /* Wait until we're woken up by the read handler.  TODO: perhaps
-     * peek at the next reply and avoid yielding if it's ours?  */
-    qemu_coroutine_yield();
-    *reply = s->reply;
-    if (reply->handle != request->handle) {
-        reply->error = EIO;
-    } else {
-        if (qiov && reply->error == 0) {
-            ret = qemu_co_recvv(s->sock, qiov->iov, qiov->niov,
-                                offset, request->len);
-            if (ret != request->len) {
-                reply->error = EIO;
-            }
-        }
-
-        /* Tell the read handler to read another header.  */
-        s->reply.handle = 0;
-    }
-}
-
-static void nbd_coroutine_end(BDRVNBDState *s, struct nbd_request *request)
-{
-    int i = HANDLE_TO_INDEX(s, request->handle);
-    s->recv_coroutine[i] = NULL;
-    if (s->in_flight-- == MAX_NBD_REQUESTS) {
-        qemu_co_mutex_unlock(&s->free_sema);
-    }
-}
-
-static int nbd_establish_connection(BlockDriverState *bs)
+static int nbd_establish_connection(BlockDriverState *bs, Error **errp)
 {
    BDRVNBDState *s = bs->opaque;
    int sock;
-    int ret;
-    off_t size;
-    size_t blocksize;

-    if (s->is_unix) {
-        sock = unix_socket_outgoing(qemu_opt_get(s->socket_opts, "path"));
+    if (s->client.is_unix) {
+        sock = unix_connect_opts(s->socket_opts, errp, NULL, NULL);
    } else {
-        sock = tcp_socket_outgoing_opts(s->socket_opts);
+        sock = inet_connect_opts(s->socket_opts, errp, NULL, NULL);
        if (sock >= 0) {
            socket_set_nodelay(sock);
        }
@@ -417,226 +242,85 @@ static int nbd_establish_connection(BlockDriverState *bs)
        return -errno;
    }

-    /* NBD handshake */
-    ret = nbd_receive_negotiate(sock, s->export_name, &s->nbdflags, &size,
-                                &blocksize);
-    if (ret < 0) {
-        logout("Failed to negotiate with the NBD server\n");
-        closesocket(sock);
-        return ret;
-    }
-
-    /* Now that we're connected, set the socket to be non-blocking and
-     * kick the reply mechanism.  */
-    qemu_set_nonblock(sock);
-    qemu_aio_set_fd_handler(sock, nbd_reply_ready, NULL, s);
-
-    s->sock = sock;
-    s->size = size;
-    s->blocksize = blocksize;
-
-    logout("Established connection with NBD server\n");
-    return 0;
-}
-
-static void nbd_teardown_connection(BlockDriverState *bs)
-{
-    BDRVNBDState *s = bs->opaque;
-    struct nbd_request request;
-
-    request.type = NBD_CMD_DISC;
-    request.from = 0;
-    request.len = 0;
-    nbd_send_request(s->sock, &request);
-
-    qemu_aio_set_fd_handler(s->sock, NULL, NULL, NULL);
-    closesocket(s->sock);
+    return sock;
 }

 static int nbd_open(BlockDriverState *bs, QDict *options, int flags,
                    Error **errp)
 {
    BDRVNBDState *s = bs->opaque;
-    int result;
-
-    qemu_co_mutex_init(&s->send_mutex);
-    qemu_co_mutex_init(&s->free_sema);
+    char *export = NULL;
+    int result, sock;
+    Error *local_err = NULL;

    /* Pop the config into our state object. Exit if invalid. */
-    result = nbd_config(s, options);
-    if (result != 0) {
-        return result;
+    nbd_config(s, options, &export, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return -EINVAL;
    }

    /* establish TCP connection, return error if it fails
     * TODO: Configurable retry-until-timeout behaviour.
     */
-    result = nbd_establish_connection(bs);
+    sock = nbd_establish_connection(bs, errp);
+    if (sock < 0) {
+        return sock;
+    }

+    /* NBD handshake */
+    result = nbd_client_session_init(&s->client, bs, sock, export);
+    g_free(export);
    return result;
 }

-static int nbd_co_readv_1(BlockDriverState *bs, int64_t sector_num,
-                          int nb_sectors, QEMUIOVector *qiov,
-                          int offset)
-{
-    BDRVNBDState *s = bs->opaque;
-    struct nbd_request request;
-    struct nbd_reply reply;
-    ssize_t ret;
-
-    request.type = NBD_CMD_READ;
-    request.from = sector_num * 512;
-    request.len = nb_sectors * 512;
-
-    nbd_coroutine_start(s, &request);
-    ret = nbd_co_send_request(s, &request, NULL, 0);
-    if (ret < 0) {
-        reply.error = -ret;
-    } else {
-        nbd_co_receive_reply(s, &request, &reply, qiov, offset);
-    }
-    nbd_coroutine_end(s, &request);
-    return -reply.error;
-
-}
-
-static int nbd_co_writev_1(BlockDriverState *bs, int64_t sector_num,
-                           int nb_sectors, QEMUIOVector *qiov,
-                           int offset)
-{
-    BDRVNBDState *s = bs->opaque;
-    struct nbd_request request;
-    struct nbd_reply reply;
-    ssize_t ret;
-
-    request.type = NBD_CMD_WRITE;
-    if (!bdrv_enable_write_cache(bs) && (s->nbdflags & NBD_FLAG_SEND_FUA)) {
-        request.type |= NBD_CMD_FLAG_FUA;
-    }
-
-    request.from = sector_num * 512;
-    request.len = nb_sectors * 512;
-
-    nbd_coroutine_start(s, &request);
-    ret = nbd_co_send_request(s, &request, qiov, offset);
-    if (ret < 0) {
-        reply.error = -ret;
-    } else {
-        nbd_co_receive_reply(s, &request, &reply, NULL, 0);
-    }
-    nbd_coroutine_end(s, &request);
-    return -reply.error;
-}
-
-/* qemu-nbd has a limit of slightly less than 1M per request.  Try to
- * remain aligned to 4K. */
-#define NBD_MAX_SECTORS 2040
-
 static int nbd_co_readv(BlockDriverState *bs, int64_t sector_num,
                        int nb_sectors, QEMUIOVector *qiov)
 {
-    int offset = 0;
-    int ret;
-    while (nb_sectors > NBD_MAX_SECTORS) {
-        ret = nbd_co_readv_1(bs, sector_num, NBD_MAX_SECTORS, qiov, offset);
-        if (ret < 0) {
-            return ret;
-        }
-        offset += NBD_MAX_SECTORS * 512;
-        sector_num += NBD_MAX_SECTORS;
-        nb_sectors -= NBD_MAX_SECTORS;
-    }
-    return nbd_co_readv_1(bs, sector_num, nb_sectors, qiov, offset);
+    BDRVNBDState *s = bs->opaque;
+
+    return nbd_client_session_co_readv(&s->client, sector_num,
+                                       nb_sectors, qiov);
 }

 static int nbd_co_writev(BlockDriverState *bs, int64_t sector_num,
                         int nb_sectors, QEMUIOVector *qiov)
 {
-    int offset = 0;
-    int ret;
-    while (nb_sectors > NBD_MAX_SECTORS) {
-        ret = nbd_co_writev_1(bs, sector_num, NBD_MAX_SECTORS, qiov, offset);
-        if (ret < 0) {
-            return ret;
-        }
-        offset += NBD_MAX_SECTORS * 512;
-        sector_num += NBD_MAX_SECTORS;
-        nb_sectors -= NBD_MAX_SECTORS;
-    }
-    return nbd_co_writev_1(bs, sector_num, nb_sectors, qiov, offset);
+    BDRVNBDState *s = bs->opaque;
+
+    return nbd_client_session_co_writev(&s->client, sector_num,
+                                        nb_sectors, qiov);
 }

 static int nbd_co_flush(BlockDriverState *bs)
 {
    BDRVNBDState *s = bs->opaque;
-    struct nbd_request request;
-    struct nbd_reply reply;
-    ssize_t ret;

-    if (!(s->nbdflags & NBD_FLAG_SEND_FLUSH)) {
-        return 0;
-    }
-
-    request.type = NBD_CMD_FLUSH;
-    if (s->nbdflags & NBD_FLAG_SEND_FUA) {
-        request.type |= NBD_CMD_FLAG_FUA;
-    }
-
-    request.from = 0;
-    request.len = 0;
-
-    nbd_coroutine_start(s, &request);
-    ret = nbd_co_send_request(s, &request, NULL, 0);
-    if (ret < 0) {
-        reply.error = -ret;
-    } else {
-        nbd_co_receive_reply(s, &request, &reply, NULL, 0);
-    }
-    nbd_coroutine_end(s, &request);
-    return -reply.error;
+    return nbd_client_session_co_flush(&s->client);
 }

 static int nbd_co_discard(BlockDriverState *bs, int64_t sector_num,
                          int nb_sectors)
 {
    BDRVNBDState *s = bs->opaque;
-    struct nbd_request request;
-    struct nbd_reply reply;
-    ssize_t ret;

-    if (!(s->nbdflags & NBD_FLAG_SEND_TRIM)) {
-        return 0;
-    }
-    request.type = NBD_CMD_TRIM;
-    request.from = sector_num * 512;
-    request.len = nb_sectors * 512;
-
-    nbd_coroutine_start(s, &request);
-    ret = nbd_co_send_request(s, &request, NULL, 0);
-    if (ret < 0) {
-        reply.error = -ret;
-    } else {
-        nbd_co_receive_reply(s, &request, &reply, NULL, 0);
-    }
-    nbd_coroutine_end(s, &request);
-    return -reply.error;
+    return nbd_client_session_co_discard(&s->client, sector_num,
+                                         nb_sectors);
 }

 static void nbd_close(BlockDriverState *bs)
 {
    BDRVNBDState *s = bs->opaque;
-    g_free(s->export_name);
-    qemu_opts_del(s->socket_opts);

-    nbd_teardown_connection(bs);
+    qemu_opts_del(s->socket_opts);
+    nbd_client_session_close(&s->client);
 }

 static int64_t nbd_getlength(BlockDriverState *bs)
 {
    BDRVNBDState *s = bs->opaque;

-    return s->size;
+    return s->client.size;
 }

 static BlockDriver bdrv_nbd = {
--- a/block/nfs.c
+++ b/block/nfs.c
@@ -0,0 +1,439 @@
+/*
+ * QEMU Block driver for native access to files on NFS shares
+ *
+ * Copyright (c) 2014 Peter Lieven <pl@kamp.de>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "config-host.h"
+
+#include <poll.h>
+#include "qemu-common.h"
+#include "qemu/config-file.h"
+#include "qemu/error-report.h"
+#include "block/block_int.h"
+#include "trace.h"
+#include "qemu/iov.h"
+#include "qemu/uri.h"
+#include "sysemu/sysemu.h"
+#include <nfsc/libnfs.h>
+
+typedef struct NFSClient {
+    struct nfs_context *context;
+    struct nfsfh *fh;
+    int events;
+    bool has_zero_init;
+} NFSClient;
+
+typedef struct NFSRPC {
+    int ret;
+    int complete;
+    QEMUIOVector *iov;
+    struct stat *st;
+    Coroutine *co;
+    QEMUBH *bh;
+} NFSRPC;
+
+static void nfs_process_read(void *arg);
+static void nfs_process_write(void *arg);
+
+static void nfs_set_events(NFSClient *client)
+{
+    int ev = nfs_which_events(client->context);
+    if (ev != client->events) {
+        qemu_aio_set_fd_handler(nfs_get_fd(client->context),
+                      (ev & POLLIN) ? nfs_process_read : NULL,
+                      (ev & POLLOUT) ? nfs_process_write : NULL,
+                      client);
+
+    }
+    client->events = ev;
+}
+
+static void nfs_process_read(void *arg)
+{
+    NFSClient *client = arg;
+    nfs_service(client->context, POLLIN);
+    nfs_set_events(client);
+}
+
+static void nfs_process_write(void *arg)
+{
+    NFSClient *client = arg;
+    nfs_service(client->context, POLLOUT);
+    nfs_set_events(client);
+}
+
+static void nfs_co_init_task(NFSClient *client, NFSRPC *task)
+{
+    *task = (NFSRPC) {
+        .co         = qemu_coroutine_self(),
+    };
+}
+
+static void nfs_co_generic_bh_cb(void *opaque)
+{
+    NFSRPC *task = opaque;
+    qemu_bh_delete(task->bh);
+    qemu_coroutine_enter(task->co, NULL);
+}
+
+static void
+nfs_co_generic_cb(int ret, struct nfs_context *nfs, void *data,
+                  void *private_data)
+{
+    NFSRPC *task = private_data;
+    task->complete = 1;
+    task->ret = ret;
+    if (task->ret > 0 && task->iov) {
+        if (task->ret <= task->iov->size) {
+            qemu_iovec_from_buf(task->iov, 0, data, task->ret);
+        } else {
+            task->ret = -EIO;
+        }
+    }
+    if (task->ret == 0 && task->st) {
+        memcpy(task->st, data, sizeof(struct stat));
+    }
+    if (task->co) {
+        task->bh = qemu_bh_new(nfs_co_generic_bh_cb, task);
+        qemu_bh_schedule(task->bh);
+    }
+}
+
+static int coroutine_fn nfs_co_readv(BlockDriverState *bs,
+                                     int64_t sector_num, int nb_sectors,
+                                     QEMUIOVector *iov)
+{
+    NFSClient *client = bs->opaque;
+    NFSRPC task;
+
+    nfs_co_init_task(client, &task);
+    task.iov = iov;
+
+    if (nfs_pread_async(client->context, client->fh,
+                        sector_num * BDRV_SECTOR_SIZE,
+                        nb_sectors * BDRV_SECTOR_SIZE,
+                        nfs_co_generic_cb, &task) != 0) {
+        return -ENOMEM;
+    }
+
+    while (!task.complete) {
+        nfs_set_events(client);
+        qemu_coroutine_yield();
+    }
+
+    if (task.ret < 0) {
+        return task.ret;
+    }
+
+    /* zero pad short reads */
+    if (task.ret < iov->size) {
+        qemu_iovec_memset(iov, task.ret, 0, iov->size - task.ret);
+    }
+
+    return 0;
+}
+
+static int coroutine_fn nfs_co_writev(BlockDriverState *bs,
+                                        int64_t sector_num, int nb_sectors,
+                                        QEMUIOVector *iov)
+{
+    NFSClient *client = bs->opaque;
+    NFSRPC task;
+    char *buf = NULL;
+
+    nfs_co_init_task(client, &task);
+
+    buf = g_malloc(nb_sectors * BDRV_SECTOR_SIZE);
+    qemu_iovec_to_buf(iov, 0, buf, nb_sectors * BDRV_SECTOR_SIZE);
+
+    if (nfs_pwrite_async(client->context, client->fh,
+                         sector_num * BDRV_SECTOR_SIZE,
+                         nb_sectors * BDRV_SECTOR_SIZE,
+                         buf, nfs_co_generic_cb, &task) != 0) {
+        g_free(buf);
+        return -ENOMEM;
+    }
+
+    while (!task.complete) {
+        nfs_set_events(client);
+        qemu_coroutine_yield();
+    }
+
+    g_free(buf);
+
+    if (task.ret != nb_sectors * BDRV_SECTOR_SIZE) {
+        return task.ret < 0 ? task.ret : -EIO;
+    }
+
+    return 0;
+}
+
+static int coroutine_fn nfs_co_flush(BlockDriverState *bs)
+{
+    NFSClient *client = bs->opaque;
+    NFSRPC task;
+
+    nfs_co_init_task(client, &task);
+
+    if (nfs_fsync_async(client->context, client->fh, nfs_co_generic_cb,
+                        &task) != 0) {
+        return -ENOMEM;
+    }
+
+    while (!task.complete) {
+        nfs_set_events(client);
+        qemu_coroutine_yield();
+    }
+
+    return task.ret;
+}
+
+/* TODO Convert to fine grained options */
+static QemuOptsList runtime_opts = {
+    .name = "nfs",
+    .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
+    .desc = {
+        {
+            .name = "filename",
+            .type = QEMU_OPT_STRING,
+            .help = "URL to the NFS file",
+        },
+        { /* end of list */ }
+    },
+};
+
+static void nfs_client_close(NFSClient *client)
+{
+    if (client->context) {
+        if (client->fh) {
+            nfs_close(client->context, client->fh);
+        }
+        qemu_aio_set_fd_handler(nfs_get_fd(client->context), NULL, NULL, NULL);
+        nfs_destroy_context(client->context);
+    }
+    memset(client, 0, sizeof(NFSClient));
+}
+
+static void nfs_file_close(BlockDriverState *bs)
+{
+    NFSClient *client = bs->opaque;
+    nfs_client_close(client);
+}
+
+static int64_t nfs_client_open(NFSClient *client, const char *filename,
+                               int flags, Error **errp)
+{
+    int ret = -EINVAL, i;
+    struct stat st;
+    URI *uri;
+    QueryParams *qp = NULL;
+    char *file = NULL, *strp = NULL;
+
+    uri = uri_parse(filename);
+    if (!uri) {
+        error_setg(errp, "Invalid URL specified");
+        goto fail;
+    }
+    strp = strrchr(uri->path, '/');
+    if (strp == NULL) {
+        error_setg(errp, "Invalid URL specified");
+        goto fail;
+    }
+    file = g_strdup(strp);
+    *strp = 0;
+
+    client->context = nfs_init_context();
+    if (client->context == NULL) {
+        error_setg(errp, "Failed to init NFS context");
+        goto fail;
+    }
+
+    qp = query_params_parse(uri->query);
+    for (i = 0; i < qp->n; i++) {
+        if (!qp->p[i].value) {
+            error_setg(errp, "Value for NFS parameter expected: %s",
+                       qp->p[i].name);
+            goto fail;
+        }
+        if (!strncmp(qp->p[i].name, "uid", 3)) {
+            nfs_set_uid(client->context, atoi(qp->p[i].value));
+        } else if (!strncmp(qp->p[i].name, "gid", 3)) {
+            nfs_set_gid(client->context, atoi(qp->p[i].value));
+        } else if (!strncmp(qp->p[i].name, "tcp-syncnt", 10)) {
+            nfs_set_tcp_syncnt(client->context, atoi(qp->p[i].value));
+        } else {
+            error_setg(errp, "Unknown NFS parameter name: %s",
+                       qp->p[i].name);
+            goto fail;
+        }
+    }
+
+    ret = nfs_mount(client->context, uri->server, uri->path);
+    if (ret < 0) {
+        error_setg(errp, "Failed to mount nfs share: %s",
+                   nfs_get_error(client->context));
+        goto fail;
+    }
+
+    if (flags & O_CREAT) {
+        ret = nfs_creat(client->context, file, 0600, &client->fh);
+        if (ret < 0) {
+            error_setg(errp, "Failed to create file: %s",
+                       nfs_get_error(client->context));
+            goto fail;
+        }
+    } else {
+        ret = nfs_open(client->context, file, flags, &client->fh);
+        if (ret < 0) {
+            error_setg(errp, "Failed to open file : %s",
+                       nfs_get_error(client->context));
+            goto fail;
+        }
+    }
+
+    ret = nfs_fstat(client->context, client->fh, &st);
+    if (ret < 0) {
+        error_setg(errp, "Failed to fstat file: %s",
+                   nfs_get_error(client->context));
+        goto fail;
+    }
+
+    ret = DIV_ROUND_UP(st.st_size, BDRV_SECTOR_SIZE);
+    client->has_zero_init = S_ISREG(st.st_mode);
+    goto out;
+fail:
+    nfs_client_close(client);
+out:
+    if (qp) {
+        query_params_free(qp);
+    }
+    uri_free(uri);
+    g_free(file);
+    return ret;
+}
+
+static int nfs_file_open(BlockDriverState *bs, QDict *options, int flags,
+                         Error **errp) {
+    NFSClient *client = bs->opaque;
+    int64_t ret;
+    QemuOpts *opts;
+    Error *local_err = NULL;
+
+    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
+    qemu_opts_absorb_qdict(opts, options, &local_err);
+    if (error_is_set(&local_err)) {
+        error_propagate(errp, local_err);
+        return -EINVAL;
+    }
+    ret = nfs_client_open(client, qemu_opt_get(opts, "filename"),
+                          (flags & BDRV_O_RDWR) ? O_RDWR : O_RDONLY,
+                          errp);
+    if (ret < 0) {
+        return ret;
+    }
+    bs->total_sectors = ret;
+    return 0;
+}
+
+static int nfs_file_create(const char *url, QEMUOptionParameter *options,
+                           Error **errp)
+{
+    int ret = 0;
+    int64_t total_size = 0;
+    NFSClient *client = g_malloc0(sizeof(NFSClient));
+
+    /* Read out options */
+    while (options && options->name) {
+        if (!strcmp(options->name, "size")) {
+            total_size = options->value.n;
+        }
+        options++;
+    }
+
+    ret = nfs_client_open(client, url, O_CREAT, errp);
+    if (ret < 0) {
+        goto out;
+    }
+    ret = nfs_ftruncate(client->context, client->fh, total_size);
+    nfs_client_close(client);
+out:
+    g_free(client);
+    return ret;
+}
+
+static int nfs_has_zero_init(BlockDriverState *bs)
+{
+    NFSClient *client = bs->opaque;
+    return client->has_zero_init;
+}
+
+static int64_t nfs_get_allocated_file_size(BlockDriverState *bs)
+{
+    NFSClient *client = bs->opaque;
+    NFSRPC task = {0};
+    struct stat st;
+
+    task.st = &st;
+    if (nfs_fstat_async(client->context, client->fh, nfs_co_generic_cb,
+                        &task) != 0) {
+        return -ENOMEM;
+    }
+
+    while (!task.complete) {
+        nfs_set_events(client);
+        qemu_aio_wait();
+    }
+
+    return (task.ret < 0 ? task.ret : st.st_blocks * st.st_blksize);
+}
+
+static int nfs_file_truncate(BlockDriverState *bs, int64_t offset)
+{
+    NFSClient *client = bs->opaque;
+    return nfs_ftruncate(client->context, client->fh, offset);
+}
+
+static BlockDriver bdrv_nfs = {
+    .format_name     = "nfs",
+    .protocol_name   = "nfs",
+
+    .instance_size   = sizeof(NFSClient),
+    .bdrv_needs_filename = true,
+    .bdrv_has_zero_init = nfs_has_zero_init,
+    .bdrv_get_allocated_file_size = nfs_get_allocated_file_size,
+    .bdrv_truncate = nfs_file_truncate,
+
+    .bdrv_file_open  = nfs_file_open,
+    .bdrv_close      = nfs_file_close,
+    .bdrv_create     = nfs_file_create,
+
+    .bdrv_co_readv         = nfs_co_readv,
+    .bdrv_co_writev        = nfs_co_writev,
+    .bdrv_co_flush_to_disk = nfs_co_flush,
+};
+
+static void nfs_block_init(void)
+{
+    bdrv_register(&bdrv_nfs);
+}
+
+block_init(nfs_block_init);
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -85,7 +85,8 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,

    if (memcmp(ph.magic, HEADER_MAGIC, 16) ||
        (le32_to_cpu(ph.version) != HEADER_VERSION)) {
-        ret = -EMEDIUMTYPE;
+        error_setg(errp, "Image not in Parallels format");
+        ret = -EINVAL;
        goto fail;
    }

--- a/block/qapi.c
+++ b/block/qapi.c
@@ -29,6 +29,60 @@
 #include "qapi/qmp-output-visitor.h"
 #include "qapi/qmp/types.h"

+BlockDeviceInfo *bdrv_block_device_info(BlockDriverState *bs)
+{
+    BlockDeviceInfo *info = g_malloc0(sizeof(*info));
+
+    info->file                   = g_strdup(bs->filename);
+    info->ro                     = bs->read_only;
+    info->drv                    = g_strdup(bs->drv->format_name);
+    info->encrypted              = bs->encrypted;
+    info->encryption_key_missing = bdrv_key_required(bs);
+
+    if (bs->node_name[0]) {
+        info->has_node_name = true;
+        info->node_name = g_strdup(bs->node_name);
+    }
+
+    if (bs->backing_file[0]) {
+        info->has_backing_file = true;
+        info->backing_file = g_strdup(bs->backing_file);
+    }
+
+    info->backing_file_depth = bdrv_get_backing_file_depth(bs);
+
+    if (bs->io_limits_enabled) {
+        ThrottleConfig cfg;
+        throttle_get_config(&bs->throttle_state, &cfg);
+        info->bps     = cfg.buckets[THROTTLE_BPS_TOTAL].avg;
+        info->bps_rd  = cfg.buckets[THROTTLE_BPS_READ].avg;
+        info->bps_wr  = cfg.buckets[THROTTLE_BPS_WRITE].avg;
+
+        info->iops    = cfg.buckets[THROTTLE_OPS_TOTAL].avg;
+        info->iops_rd = cfg.buckets[THROTTLE_OPS_READ].avg;
+        info->iops_wr = cfg.buckets[THROTTLE_OPS_WRITE].avg;
+
+        info->has_bps_max     = cfg.buckets[THROTTLE_BPS_TOTAL].max;
+        info->bps_max         = cfg.buckets[THROTTLE_BPS_TOTAL].max;
+        info->has_bps_rd_max  = cfg.buckets[THROTTLE_BPS_READ].max;
+        info->bps_rd_max      = cfg.buckets[THROTTLE_BPS_READ].max;
+        info->has_bps_wr_max  = cfg.buckets[THROTTLE_BPS_WRITE].max;
+        info->bps_wr_max      = cfg.buckets[THROTTLE_BPS_WRITE].max;
+
+        info->has_iops_max    = cfg.buckets[THROTTLE_OPS_TOTAL].max;
+        info->iops_max        = cfg.buckets[THROTTLE_OPS_TOTAL].max;
+        info->has_iops_rd_max = cfg.buckets[THROTTLE_OPS_READ].max;
+        info->iops_rd_max     = cfg.buckets[THROTTLE_OPS_READ].max;
+        info->has_iops_wr_max = cfg.buckets[THROTTLE_OPS_WRITE].max;
+        info->iops_wr_max     = cfg.buckets[THROTTLE_OPS_WRITE].max;
+
+        info->has_iops_size = cfg.op_size;
+        info->iops_size = cfg.op_size;
+    }
+
+    return info;
+}
+
 /*
 * Returns 0 on success, with *p_list either set to describe snapshot
 * information, or NULL because there are no snapshots.  Returns -errno on
@@ -211,66 +265,13 @@ void bdrv_query_info(BlockDriverState *bs,

    if (bs->drv) {
        info->has_inserted = true;
-        info->inserted = g_malloc0(sizeof(*info->inserted));
-        info->inserted->file = g_strdup(bs->filename);
-        info->inserted->ro = bs->read_only;
-        info->inserted->drv = g_strdup(bs->drv->format_name);
-        info->inserted->encrypted = bs->encrypted;
-        info->inserted->encryption_key_missing = bdrv_key_required(bs);
-
-        if (bs->backing_file[0]) {
-            info->inserted->has_backing_file = true;
-            info->inserted->backing_file = g_strdup(bs->backing_file);
-        }
-
-        info->inserted->backing_file_depth = bdrv_get_backing_file_depth(bs);
-
-        if (bs->io_limits_enabled) {
-            ThrottleConfig cfg;
-            throttle_get_config(&bs->throttle_state, &cfg);
-            info->inserted->bps     = cfg.buckets[THROTTLE_BPS_TOTAL].avg;
-            info->inserted->bps_rd  = cfg.buckets[THROTTLE_BPS_READ].avg;
-            info->inserted->bps_wr  = cfg.buckets[THROTTLE_BPS_WRITE].avg;
-
-            info->inserted->iops    = cfg.buckets[THROTTLE_OPS_TOTAL].avg;
-            info->inserted->iops_rd = cfg.buckets[THROTTLE_OPS_READ].avg;
-            info->inserted->iops_wr = cfg.buckets[THROTTLE_OPS_WRITE].avg;
-
-            info->inserted->has_bps_max     =
-                cfg.buckets[THROTTLE_BPS_TOTAL].max;
-            info->inserted->bps_max         =
-                cfg.buckets[THROTTLE_BPS_TOTAL].max;
-            info->inserted->has_bps_rd_max  =
-                cfg.buckets[THROTTLE_BPS_READ].max;
-            info->inserted->bps_rd_max      =
-                cfg.buckets[THROTTLE_BPS_READ].max;
-            info->inserted->has_bps_wr_max  =
-                cfg.buckets[THROTTLE_BPS_WRITE].max;
-            info->inserted->bps_wr_max      =
-                cfg.buckets[THROTTLE_BPS_WRITE].max;
-
-            info->inserted->has_iops_max    =
-                cfg.buckets[THROTTLE_OPS_TOTAL].max;
-            info->inserted->iops_max        =
-                cfg.buckets[THROTTLE_OPS_TOTAL].max;
-            info->inserted->has_iops_rd_max =
-                cfg.buckets[THROTTLE_OPS_READ].max;
-            info->inserted->iops_rd_max     =
-                cfg.buckets[THROTTLE_OPS_READ].max;
-            info->inserted->has_iops_wr_max =
-                cfg.buckets[THROTTLE_OPS_WRITE].max;
-            info->inserted->iops_wr_max     =
-                cfg.buckets[THROTTLE_OPS_WRITE].max;
-
-            info->inserted->has_iops_size = cfg.op_size;
-            info->inserted->iops_size = cfg.op_size;
-        }
+        info->inserted = bdrv_block_device_info(bs);

        bs0 = bs;
        p_image_info = &info->inserted->image;
        while (1) {
            bdrv_query_image_info(bs0, p_image_info, &local_err);
-            if (error_is_set(&local_err)) {
+            if (local_err) {
                error_propagate(errp, local_err);
                goto err;
            }
@@ -318,6 +319,11 @@ BlockStats *bdrv_query_stats(const BlockDriverState *bs)
        s->parent = bdrv_query_stats(bs->file);
    }

+    if (bs->backing_hd) {
+        s->has_backing = true;
+        s->backing = bdrv_query_stats(bs->backing_hd);
+    }
+
    return s;
 }

@@ -330,7 +336,7 @@ BlockInfoList *qmp_query_block(Error **errp)
     while ((bs = bdrv_next(bs))) {
        BlockInfoList *info = g_malloc0(sizeof(*info));
        bdrv_query_info(bs, &info->value, &local_err);
-        if (error_is_set(&local_err)) {
+        if (local_err) {
            error_propagate(errp, local_err);
            goto err;
        }
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -113,23 +113,26 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
    be64_to_cpus(&header.l1_table_offset);

    if (header.magic != QCOW_MAGIC) {
-        ret = -EMEDIUMTYPE;
+        error_setg(errp, "Image not in qcow format");
+        ret = -EINVAL;
        goto fail;
    }
    if (header.version != QCOW_VERSION) {
        char version[64];
        snprintf(version, sizeof(version), "QCOW version %d", header.version);
-        qerror_report(QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
-            bs->device_name, "qcow", version);
+        error_set(errp, QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
+                  bs->device_name, "qcow", version);
        ret = -ENOTSUP;
        goto fail;
    }

    if (header.size <= 1 || header.cluster_bits < 9) {
+        error_setg(errp, "invalid value in qcow header");
        ret = -EINVAL;
        goto fail;
    }
    if (header.crypt_method > QCOW_CRYPT_AES) {
+        error_setg(errp, "invalid encryption method in qcow header");
        ret = -EINVAL;
        goto fail;
    }
@@ -686,15 +689,15 @@ static int qcow_create(const char *filename, QEMUOptionParameter *options,

    ret = bdrv_create_file(filename, options, &local_err);
    if (ret < 0) {
-        qerror_report_err(local_err);
-        error_free(local_err);
+        error_propagate(errp, local_err);
        return ret;
    }

-    ret = bdrv_file_open(&qcow_bs, filename, NULL, BDRV_O_RDWR, &local_err);
+    qcow_bs = NULL;
+    ret = bdrv_open(&qcow_bs, filename, NULL, NULL,
+                    BDRV_O_RDWR | BDRV_O_PROTOCOL, NULL, &local_err);
    if (ret < 0) {
-        qerror_report_err(local_err);
-        error_free(local_err);
+        error_propagate(errp, local_err);
        return ret;
    }

--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -1182,7 +1182,7 @@ fail:
 * Return 0 on success and -errno in error cases
 */
 int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset,
-    int n_start, int n_end, int *num, uint64_t *host_offset, QCowL2Meta **m)
+    int *num, uint64_t *host_offset, QCowL2Meta **m)
 {
    BDRVQcowState *s = bs->opaque;
    uint64_t start, remaining;
@@ -1190,15 +1190,13 @@ int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset,
    uint64_t cur_bytes;
    int ret;

-    trace_qcow2_alloc_clusters_offset(qemu_coroutine_self(), offset,
-                                      n_start, n_end);
+    trace_qcow2_alloc_clusters_offset(qemu_coroutine_self(), offset, *num);

-    assert(n_start * BDRV_SECTOR_SIZE == offset_into_cluster(s, offset));
-    offset = start_of_cluster(s, offset);
+    assert((offset & ~BDRV_SECTOR_MASK) == 0);

 again:
-    start = offset + (n_start << BDRV_SECTOR_BITS);
-    remaining = (n_end - n_start) << BDRV_SECTOR_BITS;
+    start = offset;
+    remaining = *num << BDRV_SECTOR_BITS;
    cluster_offset = 0;
    *host_offset = 0;
    cur_bytes = 0;
@@ -1284,7 +1282,7 @@ again:
        }
    }

-    *num = (n_end - n_start) - (remaining >> BDRV_SECTOR_BITS);
+    *num -= remaining >> BDRV_SECTOR_BITS;
    assert(*num > 0);
    assert(*host_offset != 0);

@@ -1369,13 +1367,31 @@ static int discard_single_l2(BlockDriverState *bs, uint64_t offset,
        uint64_t old_offset;

        old_offset = be64_to_cpu(l2_table[l2_index + i]);
-        if ((old_offset & L2E_OFFSET_MASK) == 0) {
+
+        /*
+         * Make sure that a discarded area reads back as zeroes for v3 images
+         * (we cannot do it for v2 without actually writing a zero-filled
+         * buffer). We can skip the operation if the cluster is already marked
+         * as zero, or if it's unallocated and we don't have a backing file.
+         *
+         * TODO We might want to use bdrv_get_block_status(bs) here, but we're
+         * holding s->lock, so that doesn't work today.
+         */
+        if (old_offset & QCOW_OFLAG_ZERO) {
+            continue;
+        }
+
+        if ((old_offset & L2E_OFFSET_MASK) == 0 && !bs->backing_hd) {
            continue;
        }

        /* First remove L2 entries */
        qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
-        l2_table[l2_index + i] = cpu_to_be64(0);
+        if (s->qcow_version >= 3) {
+            l2_table[l2_index + i] = cpu_to_be64(QCOW_OFLAG_ZERO);
+        } else {
+            l2_table[l2_index + i] = cpu_to_be64(0);
+        }

        /* Then decrease the refcount */
        qcow2_free_any_clusters(bs, old_offset, 1, type);
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -676,7 +676,13 @@ int qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset,
    BDRVQcowState *s = bs->opaque;
    uint64_t cluster_index;
    uint64_t old_free_cluster_index;
-    int i, refcount, ret;
+    uint64_t i;
+    int refcount, ret;
+
+    assert(nb_clusters >= 0);
+    if (nb_clusters == 0) {
+        return 0;
+    }

    /* Check how many clusters there are free */
    cluster_index = offset >> s->cluster_bits;
--- a/block/qcow2-snapshot.c
+++ b/block/qcow2-snapshot.c
@@ -606,7 +606,8 @@ int qcow2_snapshot_delete(BlockDriverState *bs,
    s->nb_snapshots--;
    ret = qcow2_write_snapshots(bs);
    if (ret < 0) {
-        error_setg(errp, "Failed to remove snapshot from snapshot list");
+        error_setg_errno(errp, -ret,
+                         "Failed to remove snapshot from snapshot list");
        return ret;
    }

@@ -624,7 +625,7 @@ int qcow2_snapshot_delete(BlockDriverState *bs,
    ret = qcow2_update_snapshot_refcount(bs, sn.l1_table_offset,
                                         sn.l1_size, -1);
    if (ret < 0) {
-        error_setg(errp, "Failed to free the cluster and L1 table");
+        error_setg_errno(errp, -ret, "Failed to free the cluster and L1 table");
        return ret;
    }
    qcow2_free_clusters(bs, sn.l1_table_offset, sn.l1_size * sizeof(uint64_t),
@@ -633,7 +634,8 @@ int qcow2_snapshot_delete(BlockDriverState *bs,
    /* must update the copied flag on the current cluster offsets */
    ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 0);
    if (ret < 0) {
-        error_setg(errp, "Failed to update snapshot status in disk");
+        error_setg_errno(errp, -ret,
+                         "Failed to update snapshot status in disk");
        return ret;
    }

--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -449,7 +449,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,

    if (header.magic != QCOW_MAGIC) {
        error_setg(errp, "Image is not in qcow2 format");
-        ret = -EMEDIUMTYPE;
+        ret = -EINVAL;
        goto fail;
    }
    if (header.version < 2 || header.version > 3) {
@@ -669,9 +669,9 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
    }

    /* Enable lazy_refcounts according to image and command line options */
-    opts = qemu_opts_create_nofail(&qcow2_runtime_opts);
+    opts = qemu_opts_create(&qcow2_runtime_opts, NULL, 0, &error_abort);
    qemu_opts_absorb_qdict(opts, options, &local_err);
-    if (error_is_set(&local_err)) {
+    if (local_err) {
        error_propagate(errp, local_err);
        ret = -EINVAL;
        goto fail;
@@ -718,7 +718,6 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
    }

    qemu_opts_del(opts);
-    bs->bl.write_zeroes_alignment = s->cluster_sectors;

    if (s->use_lazy_refcounts && s->qcow_version < 3) {
        error_setg(errp, "Lazy refcounts require a qcow2 image with at least "
@@ -751,6 +750,15 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
    return ret;
 }

+static int qcow2_refresh_limits(BlockDriverState *bs)
+{
+    BDRVQcowState *s = bs->opaque;
+
+    bs->bl.write_zeroes_alignment = s->cluster_sectors;
+
+    return 0;
+}
+
 static int qcow2_set_key(BlockDriverState *bs, const char *key)
 {
    BDRVQcowState *s = bs->opaque;
@@ -992,7 +1000,6 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
 {
    BDRVQcowState *s = bs->opaque;
    int index_in_cluster;
-    int n_end;
    int ret;
    int cur_nr_sectors; /* number of sectors in current iteration */
    uint64_t cluster_offset;
@@ -1016,14 +1023,16 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,

        trace_qcow2_writev_start_part(qemu_coroutine_self());
        index_in_cluster = sector_num & (s->cluster_sectors - 1);
-        n_end = index_in_cluster + remaining_sectors;
+        cur_nr_sectors = remaining_sectors;
        if (s->crypt_method &&
-            n_end > QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors) {
-            n_end = QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors;
+            cur_nr_sectors >
+            QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors - index_in_cluster) {
+            cur_nr_sectors =
+                QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors - index_in_cluster;
        }

        ret = qcow2_alloc_cluster_offset(bs, sector_num << 9,
-            index_in_cluster, n_end, &cur_nr_sectors, &cluster_offset, &l2meta);
+            &cur_nr_sectors, &cluster_offset, &l2meta);
        if (ret < 0) {
            goto fail;
        }
@@ -1395,34 +1404,34 @@ static int preallocate(BlockDriverState *bs)
    int ret;
    QCowL2Meta *meta;

-    nb_sectors = bdrv_getlength(bs) >> 9;
+    nb_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
    offset = 0;

    while (nb_sectors) {
-        num = MIN(nb_sectors, INT_MAX >> 9);
-        ret = qcow2_alloc_cluster_offset(bs, offset, 0, num, &num,
+        num = MIN(nb_sectors, INT_MAX >> BDRV_SECTOR_BITS);
+        ret = qcow2_alloc_cluster_offset(bs, offset, &num,
                                         &host_offset, &meta);
        if (ret < 0) {
            return ret;
        }

-        ret = qcow2_alloc_cluster_link_l2(bs, meta);
-        if (ret < 0) {
-            qcow2_free_any_clusters(bs, meta->alloc_offset, meta->nb_clusters,
-                                    QCOW2_DISCARD_NEVER);
-            return ret;
-        }
-
-        /* There are no dependent requests, but we need to remove our request
-         * from the list of in-flight requests */
        if (meta != NULL) {
+            ret = qcow2_alloc_cluster_link_l2(bs, meta);
+            if (ret < 0) {
+                qcow2_free_any_clusters(bs, meta->alloc_offset,
+                                        meta->nb_clusters, QCOW2_DISCARD_NEVER);
+                return ret;
+            }
+
+            /* There are no dependent requests, but we need to remove our
+             * request from the list of in-flight requests */
            QLIST_REMOVE(meta, next_in_flight);
        }

        /* TODO Preallocate data if requested */

        nb_sectors -= num;
-        offset += num << 9;
+        offset += num << BDRV_SECTOR_BITS;
    }

    /*
@@ -1431,9 +1440,10 @@ static int preallocate(BlockDriverState *bs)
     * EOF). Extend the image to the last allocated sector.
     */
    if (host_offset != 0) {
-        uint8_t buf[512];
-        memset(buf, 0, 512);
-        ret = bdrv_write(bs->file, (host_offset >> 9) + num - 1, buf, 1);
+        uint8_t buf[BDRV_SECTOR_SIZE];
+        memset(buf, 0, BDRV_SECTOR_SIZE);
+        ret = bdrv_write(bs->file, (host_offset >> BDRV_SECTOR_BITS) + num - 1,
+                         buf, 1);
        if (ret < 0) {
            return ret;
        }
@@ -1483,7 +1493,9 @@ static int qcow2_create2(const char *filename, int64_t total_size,
        return ret;
    }

-    ret = bdrv_file_open(&bs, filename, NULL, BDRV_O_RDWR, &local_err);
+    bs = NULL;
+    ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL,
+                    NULL, &local_err);
    if (ret < 0) {
        error_propagate(errp, local_err);
        return ret;
@@ -1533,7 +1545,8 @@ static int qcow2_create2(const char *filename, int64_t total_size,
        goto out;
    }

-    bdrv_close(bs);
+    bdrv_unref(bs);
+    bs = NULL;

    /*
     * And now open the image and make it consistent first (i.e. increase the
@@ -1542,7 +1555,7 @@ static int qcow2_create2(const char *filename, int64_t total_size,
     */
    BlockDriver* drv = bdrv_find_format("qcow2");
    assert(drv != NULL);
-    ret = bdrv_open(bs, filename, NULL,
+    ret = bdrv_open(&bs, filename, NULL, NULL,
        BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_NO_FLUSH, drv, &local_err);
    if (ret < 0) {
        error_propagate(errp, local_err);
@@ -1589,20 +1602,23 @@ static int qcow2_create2(const char *filename, int64_t total_size,
        }
    }

-    bdrv_close(bs);
+    bdrv_unref(bs);
+    bs = NULL;

    /* Reopen the image without BDRV_O_NO_FLUSH to flush it before returning */
-    ret = bdrv_open(bs, filename, NULL,
+    ret = bdrv_open(&bs, filename, NULL, NULL,
                    BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_NO_BACKING,
                    drv, &local_err);
-    if (error_is_set(&local_err)) {
+    if (local_err) {
        error_propagate(errp, local_err);
        goto out;
    }

    ret = 0;
 out:
-    bdrv_unref(bs);
+    if (bs) {
+        bdrv_unref(bs);
+    }
    return ret;
 }

@@ -1675,32 +1691,12 @@ static int qcow2_create(const char *filename, QEMUOptionParameter *options,

    ret = qcow2_create2(filename, sectors, backing_file, backing_fmt, flags,
                        cluster_size, prealloc, options, version, &local_err);
-    if (error_is_set(&local_err)) {
+    if (local_err) {
        error_propagate(errp, local_err);
    }
    return ret;
 }

-static int qcow2_make_empty(BlockDriverState *bs)
-{
-#if 0
-    /* XXX: not correct */
-    BDRVQcowState *s = bs->opaque;
-    uint32_t l1_length = s->l1_size * sizeof(uint64_t);
-    int ret;
-
-    memset(s->l1_table, 0, l1_length);
-    if (bdrv_pwrite(bs->file, s->l1_table_offset, s->l1_table, l1_length) < 0)
-        return -1;
-    ret = bdrv_truncate(bs->file, s->l1_table_offset + l1_length);
-    if (ret < 0)
-        return ret;
-
-    l2_cache_reset(bs);
-#endif
-    return 0;
-}
-
 static coroutine_fn int qcow2_co_write_zeroes(BlockDriverState *bs,
    int64_t sector_num, int nb_sectors, BdrvRequestFlags flags)
 {
@@ -2244,7 +2240,6 @@ static BlockDriver bdrv_qcow2 = {
    .bdrv_has_zero_init = bdrv_has_zero_init_1,
    .bdrv_co_get_block_status = qcow2_co_get_block_status,
    .bdrv_set_key       = qcow2_set_key,
-    .bdrv_make_empty    = qcow2_make_empty,

    .bdrv_co_readv          = qcow2_co_readv,
    .bdrv_co_writev         = qcow2_co_writev,
@@ -2268,6 +2263,7 @@ static BlockDriver bdrv_qcow2 = {

    .bdrv_change_backing_file   = qcow2_change_backing_file,

+    .bdrv_refresh_limits        = qcow2_refresh_limits,
    .bdrv_invalidate_cache      = qcow2_invalidate_cache,

    .create_options = qcow2_create_options,
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -340,11 +340,11 @@ typedef enum QCow2MetadataOverlap {
 #define QCOW2_OL_ALL \
    (QCOW2_OL_CACHED | QCOW2_OL_INACTIVE_L2)

-#define L1E_OFFSET_MASK 0x00ffffffffffff00ULL
-#define L2E_OFFSET_MASK 0x00ffffffffffff00ULL
+#define L1E_OFFSET_MASK 0x00fffffffffffe00ULL
+#define L2E_OFFSET_MASK 0x00fffffffffffe00ULL
 #define L2E_COMPRESSED_OFFSET_SIZE_MASK 0x3fffffffffffffffULL

-#define REFT_OFFSET_MASK 0xffffffffffffff00ULL
+#define REFT_OFFSET_MASK 0xfffffffffffffe00ULL

 static inline int64_t start_of_cluster(BDRVQcowState *s, int64_t offset)
 {
@@ -468,7 +468,7 @@ void qcow2_encrypt_sectors(BDRVQcowState *s, int64_t sector_num,
 int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
    int *num, uint64_t *cluster_offset);
 int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset,
-    int n_start, int n_end, int *num, uint64_t *host_offset, QCowL2Meta **m);
+    int *num, uint64_t *host_offset, QCowL2Meta **m);
 uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
                                         uint64_t offset,
                                         int compressed_size);
--- a/block/qed.c
+++ b/block/qed.c
@@ -391,14 +391,15 @@ static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
    qed_header_le_to_cpu(&le_header, &s->header);

    if (s->header.magic != QED_MAGIC) {
-        return -EMEDIUMTYPE;
+        error_setg(errp, "Image not in QED format");
+        return -EINVAL;
    }
    if (s->header.features & ~QED_FEATURE_MASK) {
        /* image uses unsupported feature bits */
        char buf[64];
        snprintf(buf, sizeof(buf), "%" PRIx64,
            s->header.features & ~QED_FEATURE_MASK);
-        qerror_report(QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
+        error_set(errp, QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
            bs->device_name, "QED", buf);
        return -ENOTSUP;
    }
@@ -495,7 +496,6 @@ static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
        }
    }

-    bs->bl.write_zeroes_alignment = s->header.cluster_size >> BDRV_SECTOR_BITS;
    s->need_check_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
                                            qed_need_check_timer_cb, s);

@@ -507,6 +507,15 @@ out:
    return ret;
 }

+static int bdrv_qed_refresh_limits(BlockDriverState *bs)
+{
+    BDRVQEDState *s = bs->opaque;
+
+    bs->bl.write_zeroes_alignment = s->header.cluster_size >> BDRV_SECTOR_BITS;
+
+    return 0;
+}
+
 /* We have nothing to do for QED reopen, stubs just return
 * success */
 static int bdrv_qed_reopen_prepare(BDRVReopenState *state,
@@ -537,7 +546,8 @@ static void bdrv_qed_close(BlockDriverState *bs)

 static int qed_create(const char *filename, uint32_t cluster_size,
                      uint64_t image_size, uint32_t table_size,
-                      const char *backing_file, const char *backing_fmt)
+                      const char *backing_file, const char *backing_fmt,
+                      Error **errp)
 {
    QEDHeader header = {
        .magic = QED_MAGIC,
@@ -554,20 +564,20 @@ static int qed_create(const char *filename, uint32_t cluster_size,
    size_t l1_size = header.cluster_size * header.table_size;
    Error *local_err = NULL;
    int ret = 0;
-    BlockDriverState *bs = NULL;
+    BlockDriverState *bs;

    ret = bdrv_create_file(filename, NULL, &local_err);
    if (ret < 0) {
-        qerror_report_err(local_err);
-        error_free(local_err);
+        error_propagate(errp, local_err);
        return ret;
    }

-    ret = bdrv_file_open(&bs, filename, NULL, BDRV_O_RDWR | BDRV_O_CACHE_WB,
-                         &local_err);
+    bs = NULL;
+    ret = bdrv_open(&bs, filename, NULL, NULL,
+                    BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_PROTOCOL, NULL,
+                    &local_err);
    if (ret < 0) {
-        qerror_report_err(local_err);
-        error_free(local_err);
+        error_propagate(errp, local_err);
        return ret;
    }

@@ -657,7 +667,7 @@ static int bdrv_qed_create(const char *filename, QEMUOptionParameter *options,
    }

    return qed_create(filename, cluster_size, image_size, table_size,
-                      backing_file, backing_fmt);
+                      backing_file, backing_fmt, errp);
 }

 typedef struct {
@@ -723,11 +733,6 @@ static int64_t coroutine_fn bdrv_qed_co_get_block_status(BlockDriverState *bs,
    return cb.status;
 }

-static int bdrv_qed_make_empty(BlockDriverState *bs)
-{
-    return -ENOTSUP;
-}
-
 static BDRVQEDState *acb_to_s(QEDAIOCB *acb)
 {
    return acb->common.bs->opaque;
@@ -1609,13 +1614,13 @@ static BlockDriver bdrv_qed = {
    .bdrv_create              = bdrv_qed_create,
    .bdrv_has_zero_init       = bdrv_has_zero_init_1,
    .bdrv_co_get_block_status = bdrv_qed_co_get_block_status,
-    .bdrv_make_empty          = bdrv_qed_make_empty,
    .bdrv_aio_readv           = bdrv_qed_aio_readv,
    .bdrv_aio_writev          = bdrv_qed_aio_writev,
    .bdrv_co_write_zeroes     = bdrv_qed_co_write_zeroes,
    .bdrv_truncate            = bdrv_qed_truncate,
    .bdrv_getlength           = bdrv_qed_getlength,
    .bdrv_get_info            = bdrv_qed_get_info,
+    .bdrv_refresh_limits      = bdrv_qed_refresh_limits,
    .bdrv_change_backing_file = bdrv_qed_change_backing_file,
    .bdrv_invalidate_cache    = bdrv_qed_invalidate_cache,
    .bdrv_check               = bdrv_qed_check,
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -0,0 +1,873 @@
+/*
+ * Quorum Block filter
+ *
+ * Copyright (C) 2012-2014 Nodalink, EURL.
+ *
+ * Author:
+ *   Benoît Canet <benoit.canet@irqsave.net>
+ *
+ * Based on the design and code of blkverify.c (Copyright (C) 2010 IBM, Corp)
+ * and blkmirror.c (Copyright (C) 2011 Red Hat, Inc).
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include <gnutls/gnutls.h>
+#include <gnutls/crypto.h>
+#include "block/block_int.h"
+#include "qapi/qmp/qjson.h"
+
+#define HASH_LENGTH 32
+
+#define QUORUM_OPT_VOTE_THRESHOLD "vote-threshold"
+#define QUORUM_OPT_BLKVERIFY      "blkverify"
+
+/* This union holds a vote hash value */
+typedef union QuorumVoteValue {
+    char h[HASH_LENGTH];       /* SHA-256 hash */
+    int64_t l;                 /* simpler 64 bits hash */
+} QuorumVoteValue;
+
+/* A vote item */
+typedef struct QuorumVoteItem {
+    int index;
+    QLIST_ENTRY(QuorumVoteItem) next;
+} QuorumVoteItem;
+
+/* this structure is a vote version. A version is the set of votes sharing the
+ * same vote value.
+ * The set of votes will be tracked with the items field and its cardinality is
+ * vote_count.
+ */
+typedef struct QuorumVoteVersion {
+    QuorumVoteValue value;
+    int index;
+    int vote_count;
+    QLIST_HEAD(, QuorumVoteItem) items;
+    QLIST_ENTRY(QuorumVoteVersion) next;
+} QuorumVoteVersion;
+
+/* this structure holds a group of vote versions together */
+typedef struct QuorumVotes {
+    QLIST_HEAD(, QuorumVoteVersion) vote_list;
+    bool (*compare)(QuorumVoteValue *a, QuorumVoteValue *b);
+} QuorumVotes;
+
+/* the following structure holds the state of one quorum instance */
+typedef struct BDRVQuorumState {
+    BlockDriverState **bs; /* children BlockDriverStates */
+    int num_children;      /* children count */
+    int threshold;         /* if less than threshold children reads gave the
+                            * same result a quorum error occurs.
+                            */
+    bool is_blkverify;     /* true if the driver is in blkverify mode
+                            * Writes are mirrored on two children devices.
+                            * On reads the two children devices' contents are
+                            * compared and if a difference is spotted its
+                            * location is printed and the code aborts.
+                            * It is useful to debug other block drivers by
+                            * comparing them with a reference one.
+                            */
+} BDRVQuorumState;
+
+typedef struct QuorumAIOCB QuorumAIOCB;
+
+/* Quorum will create one instance of the following structure per operation it
+ * performs on its children.
+ * So for each read/write operation coming from the upper layer there will be
+ * $children_count QuorumChildRequest.
+ */
+typedef struct QuorumChildRequest {
+    BlockDriverAIOCB *aiocb;
+    QEMUIOVector qiov;
+    uint8_t *buf;
+    int ret;
+    QuorumAIOCB *parent;
+} QuorumChildRequest;
+
+/* Quorum will use the following structure to track progress of each read/write
+ * operation received by the upper layer.
+ * This structure hold pointers to the QuorumChildRequest structures instances
+ * used to do operations on each children and track overall progress.
+ */
+struct QuorumAIOCB {
+    BlockDriverAIOCB common;
+
+    /* Request metadata */
+    uint64_t sector_num;
+    int nb_sectors;
+
+    QEMUIOVector *qiov;         /* calling IOV */
+
+    QuorumChildRequest *qcrs;   /* individual child requests */
+    int count;                  /* number of completed AIOCB */
+    int success_count;          /* number of successfully completed AIOCB */
+
+    QuorumVotes votes;
+
+    bool is_read;
+    int vote_ret;
+};
+
+static void quorum_vote(QuorumAIOCB *acb);
+
+static void quorum_aio_cancel(BlockDriverAIOCB *blockacb)
+{
+    QuorumAIOCB *acb = container_of(blockacb, QuorumAIOCB, common);
+    BDRVQuorumState *s = acb->common.bs->opaque;
+    int i;
+
+    /* cancel all callbacks */
+    for (i = 0; i < s->num_children; i++) {
+        bdrv_aio_cancel(acb->qcrs[i].aiocb);
+    }
+
+    g_free(acb->qcrs);
+    qemu_aio_release(acb);
+}
+
+static AIOCBInfo quorum_aiocb_info = {
+    .aiocb_size         = sizeof(QuorumAIOCB),
+    .cancel             = quorum_aio_cancel,
+};
+
+static void quorum_aio_finalize(QuorumAIOCB *acb)
+{
+    BDRVQuorumState *s = acb->common.bs->opaque;
+    int i, ret = 0;
+
+    if (acb->vote_ret) {
+        ret = acb->vote_ret;
+    }
+
+    acb->common.cb(acb->common.opaque, ret);
+
+    if (acb->is_read) {
+        for (i = 0; i < s->num_children; i++) {
+            qemu_vfree(acb->qcrs[i].buf);
+            qemu_iovec_destroy(&acb->qcrs[i].qiov);
+        }
+    }
+
+    g_free(acb->qcrs);
+    qemu_aio_release(acb);
+}
+
+static bool quorum_sha256_compare(QuorumVoteValue *a, QuorumVoteValue *b)
+{
+    return !memcmp(a->h, b->h, HASH_LENGTH);
+}
+
+static bool quorum_64bits_compare(QuorumVoteValue *a, QuorumVoteValue *b)
+{
+    return a->l == b->l;
+}
+
+static QuorumAIOCB *quorum_aio_get(BDRVQuorumState *s,
+                                   BlockDriverState *bs,
+                                   QEMUIOVector *qiov,
+                                   uint64_t sector_num,
+                                   int nb_sectors,
+                                   BlockDriverCompletionFunc *cb,
+                                   void *opaque)
+{
+    QuorumAIOCB *acb = qemu_aio_get(&quorum_aiocb_info, bs, cb, opaque);
+    int i;
+
+    acb->common.bs->opaque = s;
+    acb->sector_num = sector_num;
+    acb->nb_sectors = nb_sectors;
+    acb->qiov = qiov;
+    acb->qcrs = g_new0(QuorumChildRequest, s->num_children);
+    acb->count = 0;
+    acb->success_count = 0;
+    acb->votes.compare = quorum_sha256_compare;
+    QLIST_INIT(&acb->votes.vote_list);
+    acb->is_read = false;
+    acb->vote_ret = 0;
+
+    for (i = 0; i < s->num_children; i++) {
+        acb->qcrs[i].buf = NULL;
+        acb->qcrs[i].ret = 0;
+        acb->qcrs[i].parent = acb;
+    }
+
+    return acb;
+}
+
+static void quorum_report_bad(QuorumAIOCB *acb, char *node_name, int ret)
+{
+    QObject *data;
+    assert(node_name);
+    data = qobject_from_jsonf("{ 'node-name': %s"
+                              ", 'sector-num': %" PRId64
+                              ", 'sectors-count': %d }",
+                              node_name, acb->sector_num, acb->nb_sectors);
+    if (ret < 0) {
+        QDict *dict = qobject_to_qdict(data);
+        qdict_put(dict, "error", qstring_from_str(strerror(-ret)));
+    }
+    monitor_protocol_event(QEVENT_QUORUM_REPORT_BAD, data);
+    qobject_decref(data);
+}
+
+static void quorum_report_failure(QuorumAIOCB *acb)
+{
+    QObject *data;
+    const char *reference = acb->common.bs->device_name[0] ?
+                            acb->common.bs->device_name :
+                            acb->common.bs->node_name;
+    data = qobject_from_jsonf("{ 'reference': %s"
+                              ", 'sector-num': %" PRId64
+                              ", 'sectors-count': %d }",
+                              reference, acb->sector_num, acb->nb_sectors);
+    monitor_protocol_event(QEVENT_QUORUM_FAILURE, data);
+    qobject_decref(data);
+}
+
+static int quorum_vote_error(QuorumAIOCB *acb);
+
+static bool quorum_has_too_much_io_failed(QuorumAIOCB *acb)
+{
+    BDRVQuorumState *s = acb->common.bs->opaque;
+
+    if (acb->success_count < s->threshold) {
+        acb->vote_ret = quorum_vote_error(acb);
+        quorum_report_failure(acb);
+        return true;
+    }
+
+    return false;
+}
+
+static void quorum_aio_cb(void *opaque, int ret)
+{
+    QuorumChildRequest *sacb = opaque;
+    QuorumAIOCB *acb = sacb->parent;
+    BDRVQuorumState *s = acb->common.bs->opaque;
+
+    sacb->ret = ret;
+    acb->count++;
+    if (ret == 0) {
+        acb->success_count++;
+    } else {
+        quorum_report_bad(acb, sacb->aiocb->bs->node_name, ret);
+    }
+    assert(acb->count <= s->num_children);
+    assert(acb->success_count <= s->num_children);
+    if (acb->count < s->num_children) {
+        return;
+    }
+
+    /* Do the vote on read */
+    if (acb->is_read) {
+        quorum_vote(acb);
+    } else {
+        quorum_has_too_much_io_failed(acb);
+    }
+
+    quorum_aio_finalize(acb);
+}
+
+static void quorum_report_bad_versions(BDRVQuorumState *s,
+                                       QuorumAIOCB *acb,
+                                       QuorumVoteValue *value)
+{
+    QuorumVoteVersion *version;
+    QuorumVoteItem *item;
+
+    QLIST_FOREACH(version, &acb->votes.vote_list, next) {
+        if (acb->votes.compare(&version->value, value)) {
+            continue;
+        }
+        QLIST_FOREACH(item, &version->items, next) {
+            quorum_report_bad(acb, s->bs[item->index]->node_name, 0);
+        }
+    }
+}
+
+static void quorum_copy_qiov(QEMUIOVector *dest, QEMUIOVector *source)
+{
+    int i;
+    assert(dest->niov == source->niov);
+    assert(dest->size == source->size);
+    for (i = 0; i < source->niov; i++) {
+        assert(dest->iov[i].iov_len == source->iov[i].iov_len);
+        memcpy(dest->iov[i].iov_base,
+               source->iov[i].iov_base,
+               source->iov[i].iov_len);
+    }
+}
+
+static void quorum_count_vote(QuorumVotes *votes,
+                              QuorumVoteValue *value,
+                              int index)
+{
+    QuorumVoteVersion *v = NULL, *version = NULL;
+    QuorumVoteItem *item;
+
+    /* look if we have something with this hash */
+    QLIST_FOREACH(v, &votes->vote_list, next) {
+        if (votes->compare(&v->value, value)) {
+            version = v;
+            break;
+        }
+    }
+
+    /* It's a version not yet in the list add it */
+    if (!version) {
+        version = g_new0(QuorumVoteVersion, 1);
+        QLIST_INIT(&version->items);
+        memcpy(&version->value, value, sizeof(version->value));
+        version->index = index;
+        version->vote_count = 0;
+        QLIST_INSERT_HEAD(&votes->vote_list, version, next);
+    }
+
+    version->vote_count++;
+
+    item = g_new0(QuorumVoteItem, 1);
+    item->index = index;
+    QLIST_INSERT_HEAD(&version->items, item, next);
+}
+
+static void quorum_free_vote_list(QuorumVotes *votes)
+{
+    QuorumVoteVersion *version, *next_version;
+    QuorumVoteItem *item, *next_item;
+
+    QLIST_FOREACH_SAFE(version, &votes->vote_list, next, next_version) {
+        QLIST_REMOVE(version, next);
+        QLIST_FOREACH_SAFE(item, &version->items, next, next_item) {
+            QLIST_REMOVE(item, next);
+            g_free(item);
+        }
+        g_free(version);
+    }
+}
+
+static int quorum_compute_hash(QuorumAIOCB *acb, int i, QuorumVoteValue *hash)
+{
+    int j, ret;
+    gnutls_hash_hd_t dig;
+    QEMUIOVector *qiov = &acb->qcrs[i].qiov;
+
+    ret = gnutls_hash_init(&dig, GNUTLS_DIG_SHA256);
+
+    if (ret < 0) {
+        return ret;
+    }
+
+    for (j = 0; j < qiov->niov; j++) {
+        ret = gnutls_hash(dig, qiov->iov[j].iov_base, qiov->iov[j].iov_len);
+        if (ret < 0) {
+            break;
+        }
+    }
+
+    gnutls_hash_deinit(dig, (void *) hash);
+    return ret;
+}
+
+static QuorumVoteVersion *quorum_get_vote_winner(QuorumVotes *votes)
+{
+    int max = 0;
+    QuorumVoteVersion *candidate, *winner = NULL;
+
+    QLIST_FOREACH(candidate, &votes->vote_list, next) {
+        if (candidate->vote_count > max) {
+            max = candidate->vote_count;
+            winner = candidate;
+        }
+    }
+
+    return winner;
+}
+
+/* qemu_iovec_compare is handy for blkverify mode because it returns the first
+ * differing byte location. Yet it is handcoded to compare vectors one byte
+ * after another so it does not benefit from the libc SIMD optimizations.
+ * quorum_iovec_compare is written for speed and should be used in the non
+ * blkverify mode of quorum.
+ */
+static bool quorum_iovec_compare(QEMUIOVector *a, QEMUIOVector *b)
+{
+    int i;
+    int result;
+
+    assert(a->niov == b->niov);
+    for (i = 0; i < a->niov; i++) {
+        assert(a->iov[i].iov_len == b->iov[i].iov_len);
+        result = memcmp(a->iov[i].iov_base,
+                        b->iov[i].iov_base,
+                        a->iov[i].iov_len);
+        if (result) {
+            return false;
+        }
+    }
+
+    return true;
+}
+
+static void GCC_FMT_ATTR(2, 3) quorum_err(QuorumAIOCB *acb,
+                                          const char *fmt, ...)
+{
+    va_list ap;
+
+    va_start(ap, fmt);
+    fprintf(stderr, "quorum: sector_num=%" PRId64 " nb_sectors=%d ",
+            acb->sector_num, acb->nb_sectors);
+    vfprintf(stderr, fmt, ap);
+    fprintf(stderr, "\n");
+    va_end(ap);
+    exit(1);
+}
+
+static bool quorum_compare(QuorumAIOCB *acb,
+                           QEMUIOVector *a,
+                           QEMUIOVector *b)
+{
+    BDRVQuorumState *s = acb->common.bs->opaque;
+    ssize_t offset;
+
+    /* This driver will replace blkverify in this particular case */
+    if (s->is_blkverify) {
+        offset = qemu_iovec_compare(a, b);
+        if (offset != -1) {
+            quorum_err(acb, "contents mismatch in sector %" PRId64,
+                       acb->sector_num +
+                       (uint64_t)(offset / BDRV_SECTOR_SIZE));
+        }
+        return true;
+    }
+
+    return quorum_iovec_compare(a, b);
+}
+
+/* Do a vote to get the error code */
+static int quorum_vote_error(QuorumAIOCB *acb)
+{
+    BDRVQuorumState *s = acb->common.bs->opaque;
+    QuorumVoteVersion *winner = NULL;
+    QuorumVotes error_votes;
+    QuorumVoteValue result_value;
+    int i, ret = 0;
+    bool error = false;
+
+    QLIST_INIT(&error_votes.vote_list);
+    error_votes.compare = quorum_64bits_compare;
+
+    for (i = 0; i < s->num_children; i++) {
+        ret = acb->qcrs[i].ret;
+        if (ret) {
+            error = true;
+            result_value.l = ret;
+            quorum_count_vote(&error_votes, &result_value, i);
+        }
+    }
+
+    if (error) {
+        winner = quorum_get_vote_winner(&error_votes);
+        ret = winner->value.l;
+    }
+
+    quorum_free_vote_list(&error_votes);
+
+    return ret;
+}
+
+static void quorum_vote(QuorumAIOCB *acb)
+{
+    bool quorum = true;
+    int i, j, ret;
+    QuorumVoteValue hash;
+    BDRVQuorumState *s = acb->common.bs->opaque;
+    QuorumVoteVersion *winner;
+
+    if (quorum_has_too_much_io_failed(acb)) {
+        return;
+    }
+
+    /* get the index of the first successful read */
+    for (i = 0; i < s->num_children; i++) {
+        if (!acb->qcrs[i].ret) {
+            break;
+        }
+    }
+
+    assert(i < s->num_children);
+
+    /* compare this read with all other successful reads stopping at quorum
+     * failure
+     */
+    for (j = i + 1; j < s->num_children; j++) {
+        if (acb->qcrs[j].ret) {
+            continue;
+        }
+        quorum = quorum_compare(acb, &acb->qcrs[i].qiov, &acb->qcrs[j].qiov);
+        if (!quorum) {
+            break;
+       }
+    }
+
+    /* Every successful read agrees */
+    if (quorum) {
+        quorum_copy_qiov(acb->qiov, &acb->qcrs[i].qiov);
+        return;
+    }
+
+    /* compute hashes for each successful read, also store indexes */
+    for (i = 0; i < s->num_children; i++) {
+        if (acb->qcrs[i].ret) {
+            continue;
+        }
+        ret = quorum_compute_hash(acb, i, &hash);
+        /* if ever the hash computation failed */
+        if (ret < 0) {
+            acb->vote_ret = ret;
+            goto free_exit;
+        }
+        quorum_count_vote(&acb->votes, &hash, i);
+    }
+
+    /* vote to select the most represented version */
+    winner = quorum_get_vote_winner(&acb->votes);
+
+    /* if the winner count is smaller than threshold the read fails */
+    if (winner->vote_count < s->threshold) {
+        quorum_report_failure(acb);
+        acb->vote_ret = -EIO;
+        goto free_exit;
+    }
+
+    /* we have a winner: copy it */
+    quorum_copy_qiov(acb->qiov, &acb->qcrs[winner->index].qiov);
+
+    /* some versions are bad print them */
+    quorum_report_bad_versions(s, acb, &winner->value);
+
+free_exit:
+    /* free lists */
+    quorum_free_vote_list(&acb->votes);
+}
+
+static BlockDriverAIOCB *quorum_aio_readv(BlockDriverState *bs,
+                                         int64_t sector_num,
+                                         QEMUIOVector *qiov,
+                                         int nb_sectors,
+                                         BlockDriverCompletionFunc *cb,
+                                         void *opaque)
+{
+    BDRVQuorumState *s = bs->opaque;
+    QuorumAIOCB *acb = quorum_aio_get(s, bs, qiov, sector_num,
+                                      nb_sectors, cb, opaque);
+    int i;
+
+    acb->is_read = true;
+
+    for (i = 0; i < s->num_children; i++) {
+        acb->qcrs[i].buf = qemu_blockalign(s->bs[i], qiov->size);
+        qemu_iovec_init(&acb->qcrs[i].qiov, qiov->niov);
+        qemu_iovec_clone(&acb->qcrs[i].qiov, qiov, acb->qcrs[i].buf);
+    }
+
+    for (i = 0; i < s->num_children; i++) {
+        bdrv_aio_readv(s->bs[i], sector_num, &acb->qcrs[i].qiov, nb_sectors,
+                       quorum_aio_cb, &acb->qcrs[i]);
+    }
+
+    return &acb->common;
+}
+
+static BlockDriverAIOCB *quorum_aio_writev(BlockDriverState *bs,
+                                          int64_t sector_num,
+                                          QEMUIOVector *qiov,
+                                          int nb_sectors,
+                                          BlockDriverCompletionFunc *cb,
+                                          void *opaque)
+{
+    BDRVQuorumState *s = bs->opaque;
+    QuorumAIOCB *acb = quorum_aio_get(s, bs, qiov, sector_num, nb_sectors,
+                                      cb, opaque);
+    int i;
+
+    for (i = 0; i < s->num_children; i++) {
+        acb->qcrs[i].aiocb = bdrv_aio_writev(s->bs[i], sector_num, qiov,
+                                             nb_sectors, &quorum_aio_cb,
+                                             &acb->qcrs[i]);
+    }
+
+    return &acb->common;
+}
+
+static int64_t quorum_getlength(BlockDriverState *bs)
+{
+    BDRVQuorumState *s = bs->opaque;
+    int64_t result;
+    int i;
+
+    /* check that all file have the same length */
+    result = bdrv_getlength(s->bs[0]);
+    if (result < 0) {
+        return result;
+    }
+    for (i = 1; i < s->num_children; i++) {
+        int64_t value = bdrv_getlength(s->bs[i]);
+        if (value < 0) {
+            return value;
+        }
+        if (value != result) {
+            return -EIO;
+        }
+    }
+
+    return result;
+}
+
+static void quorum_invalidate_cache(BlockDriverState *bs)
+{
+    BDRVQuorumState *s = bs->opaque;
+    int i;
+
+    for (i = 0; i < s->num_children; i++) {
+        bdrv_invalidate_cache(s->bs[i]);
+    }
+}
+
+static coroutine_fn int quorum_co_flush(BlockDriverState *bs)
+{
+    BDRVQuorumState *s = bs->opaque;
+    QuorumVoteVersion *winner = NULL;
+    QuorumVotes error_votes;
+    QuorumVoteValue result_value;
+    int i;
+    int result = 0;
+
+    QLIST_INIT(&error_votes.vote_list);
+    error_votes.compare = quorum_64bits_compare;
+
+    for (i = 0; i < s->num_children; i++) {
+        result = bdrv_co_flush(s->bs[i]);
+        result_value.l = result;
+        quorum_count_vote(&error_votes, &result_value, i);
+    }
+
+    winner = quorum_get_vote_winner(&error_votes);
+    result = winner->value.l;
+
+    quorum_free_vote_list(&error_votes);
+
+    return result;
+}
+
+static bool quorum_recurse_is_first_non_filter(BlockDriverState *bs,
+                                               BlockDriverState *candidate)
+{
+    BDRVQuorumState *s = bs->opaque;
+    int i;
+
+    for (i = 0; i < s->num_children; i++) {
+        bool perm = bdrv_recurse_is_first_non_filter(s->bs[i],
+                                                     candidate);
+        if (perm) {
+            return true;
+        }
+    }
+
+    return false;
+}
+
+static int quorum_valid_threshold(int threshold, int num_children, Error **errp)
+{
+
+    if (threshold < 1) {
+        error_set(errp, QERR_INVALID_PARAMETER_VALUE,
+                  "vote-threshold", "value >= 1");
+        return -ERANGE;
+    }
+
+    if (threshold > num_children) {
+        error_setg(errp, "threshold may not exceed children count");
+        return -ERANGE;
+    }
+
+    return 0;
+}
+
+static QemuOptsList quorum_runtime_opts = {
+    .name = "quorum",
+    .head = QTAILQ_HEAD_INITIALIZER(quorum_runtime_opts.head),
+    .desc = {
+        {
+            .name = QUORUM_OPT_VOTE_THRESHOLD,
+            .type = QEMU_OPT_NUMBER,
+            .help = "The number of vote needed for reaching quorum",
+        },
+        {
+            .name = QUORUM_OPT_BLKVERIFY,
+            .type = QEMU_OPT_BOOL,
+            .help = "Trigger block verify mode if set",
+        },
+        { /* end of list */ }
+    },
+};
+
+static int quorum_open(BlockDriverState *bs, QDict *options, int flags,
+                       Error **errp)
+{
+    BDRVQuorumState *s = bs->opaque;
+    Error *local_err = NULL;
+    QemuOpts *opts;
+    bool *opened;
+    QDict *sub = NULL;
+    QList *list = NULL;
+    const QListEntry *lentry;
+    int i;
+    int ret = 0;
+
+    qdict_flatten(options);
+    qdict_extract_subqdict(options, &sub, "children.");
+    qdict_array_split(sub, &list);
+
+    if (qdict_size(sub)) {
+        error_setg(&local_err, "Invalid option children.%s",
+                   qdict_first(sub)->key);
+        ret = -EINVAL;
+        goto exit;
+    }
+
+    /* count how many different children are present */
+    s->num_children = qlist_size(list);
+    if (s->num_children < 2) {
+        error_setg(&local_err,
+                   "Number of provided children must be greater than 1");
+        ret = -EINVAL;
+        goto exit;
+    }
+
+    opts = qemu_opts_create(&quorum_runtime_opts, NULL, 0, &error_abort);
+    qemu_opts_absorb_qdict(opts, options, &local_err);
+    if (error_is_set(&local_err)) {
+        ret = -EINVAL;
+        goto exit;
+    }
+
+    s->threshold = qemu_opt_get_number(opts, QUORUM_OPT_VOTE_THRESHOLD, 0);
+
+    /* and validate it against s->num_children */
+    ret = quorum_valid_threshold(s->threshold, s->num_children, &local_err);
+    if (ret < 0) {
+        goto exit;
+    }
+
+    /* is the driver in blkverify mode */
+    if (qemu_opt_get_bool(opts, QUORUM_OPT_BLKVERIFY, false) &&
+        s->num_children == 2 && s->threshold == 2) {
+        s->is_blkverify = true;
+    } else if (qemu_opt_get_bool(opts, QUORUM_OPT_BLKVERIFY, false)) {
+        fprintf(stderr, "blkverify mode is set by setting blkverify=on "
+                "and using two files with vote_threshold=2\n");
+    }
+
+    /* allocate the children BlockDriverState array */
+    s->bs = g_new0(BlockDriverState *, s->num_children);
+    opened = g_new0(bool, s->num_children);
+
+    for (i = 0, lentry = qlist_first(list); lentry;
+         lentry = qlist_next(lentry), i++) {
+        QDict *d;
+        QString *string;
+
+        switch (qobject_type(lentry->value))
+        {
+            /* List of options */
+            case QTYPE_QDICT:
+                d = qobject_to_qdict(lentry->value);
+                QINCREF(d);
+                ret = bdrv_open(&s->bs[i], NULL, NULL, d, flags, NULL,
+                                &local_err);
+                break;
+
+            /* QMP reference */
+            case QTYPE_QSTRING:
+                string = qobject_to_qstring(lentry->value);
+                ret = bdrv_open(&s->bs[i], NULL, qstring_get_str(string), NULL,
+                                flags, NULL, &local_err);
+                break;
+
+            default:
+                error_setg(&local_err, "Specification of child block device %i "
+                           "is invalid", i);
+                ret = -EINVAL;
+        }
+
+        if (ret < 0) {
+            goto close_exit;
+        }
+        opened[i] = true;
+    }
+
+    g_free(opened);
+    goto exit;
+
+close_exit:
+    /* cleanup on error */
+    for (i = 0; i < s->num_children; i++) {
+        if (!opened[i]) {
+            continue;
+        }
+        bdrv_unref(s->bs[i]);
+    }
+    g_free(s->bs);
+    g_free(opened);
+exit:
+    /* propagate error */
+    if (error_is_set(&local_err)) {
+        error_propagate(errp, local_err);
+    }
+    QDECREF(list);
+    QDECREF(sub);
+    return ret;
+}
+
+static void quorum_close(BlockDriverState *bs)
+{
+    BDRVQuorumState *s = bs->opaque;
+    int i;
+
+    for (i = 0; i < s->num_children; i++) {
+        bdrv_unref(s->bs[i]);
+    }
+
+    g_free(s->bs);
+}
+
+static BlockDriver bdrv_quorum = {
+    .format_name        = "quorum",
+    .protocol_name      = "quorum",
+
+    .instance_size      = sizeof(BDRVQuorumState),
+
+    .bdrv_file_open     = quorum_open,
+    .bdrv_close         = quorum_close,
+
+    .authorizations     = { true, true },
+
+    .bdrv_co_flush_to_disk = quorum_co_flush,
+
+    .bdrv_getlength     = quorum_getlength,
+
+    .bdrv_aio_readv     = quorum_aio_readv,
+    .bdrv_aio_writev    = quorum_aio_writev,
+    .bdrv_invalidate_cache = quorum_invalidate_cache,
+
+    .bdrv_recurse_is_first_non_filter = quorum_recurse_is_first_non_filter,
+};
+
+static void bdrv_quorum_init(void)
+{
+    bdrv_register(&bdrv_quorum);
+}
+
+block_init(bdrv_quorum_init);
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -127,6 +127,8 @@ typedef struct BDRVRawState {
    int fd;
    int type;
    int open_flags;
+    size_t buf_align;
+
 #if defined(__linux__)
    /* linux floppy specific */
    int64_t fd_open_time;
@@ -213,6 +215,76 @@ static int raw_normalize_devicepath(const char **filename)
 }
 #endif

+static void raw_probe_alignment(BlockDriverState *bs)
+{
+    BDRVRawState *s = bs->opaque;
+    char *buf;
+    unsigned int sector_size;
+
+    /* For /dev/sg devices the alignment is not really used.
+       With buffered I/O, we don't have any restrictions. */
+    if (bs->sg || !(s->open_flags & O_DIRECT)) {
+        bs->request_alignment = 1;
+        s->buf_align = 1;
+        return;
+    }
+
+    /* Try a few ioctls to get the right size */
+    bs->request_alignment = 0;
+    s->buf_align = 0;
+
+#ifdef BLKSSZGET
+    if (ioctl(s->fd, BLKSSZGET, &sector_size) >= 0) {
+        bs->request_alignment = sector_size;
+    }
+#endif
+#ifdef DKIOCGETBLOCKSIZE
+    if (ioctl(s->fd, DKIOCGETBLOCKSIZE, &sector_size) >= 0) {
+        bs->request_alignment = sector_size;
+    }
+#endif
+#ifdef DIOCGSECTORSIZE
+    if (ioctl(s->fd, DIOCGSECTORSIZE, &sector_size) >= 0) {
+        bs->request_alignment = sector_size;
+    }
+#endif
+#ifdef CONFIG_XFS
+    if (s->is_xfs) {
+        struct dioattr da;
+        if (xfsctl(NULL, s->fd, XFS_IOC_DIOINFO, &da) >= 0) {
+            bs->request_alignment = da.d_miniosz;
+            /* The kernel returns wrong information for d_mem */
+            /* s->buf_align = da.d_mem; */
+        }
+    }
+#endif
+
+    /* If we could not get the sizes so far, we can only guess them */
+    if (!s->buf_align) {
+        size_t align;
+        buf = qemu_memalign(MAX_BLOCKSIZE, 2 * MAX_BLOCKSIZE);
+        for (align = 512; align <= MAX_BLOCKSIZE; align <<= 1) {
+            if (pread(s->fd, buf + align, MAX_BLOCKSIZE, 0) >= 0) {
+                s->buf_align = align;
+                break;
+            }
+        }
+        qemu_vfree(buf);
+    }
+
+    if (!bs->request_alignment) {
+        size_t align;
+        buf = qemu_memalign(s->buf_align, MAX_BLOCKSIZE);
+        for (align = 512; align <= MAX_BLOCKSIZE; align <<= 1) {
+            if (pread(s->fd, buf, align, 0) >= 0) {
+                bs->request_alignment = align;
+                break;
+            }
+        }
+        qemu_vfree(buf);
+    }
+}
+
 static void raw_parse_flags(int bdrv_flags, int *open_flags)
 {
    assert(open_flags != NULL);
@@ -287,9 +359,9 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
    int fd, ret;
    struct stat st;

-    opts = qemu_opts_create_nofail(&raw_runtime_opts);
+    opts = qemu_opts_create(&raw_runtime_opts, NULL, 0, &error_abort);
    qemu_opts_absorb_qdict(opts, options, &local_err);
-    if (error_is_set(&local_err)) {
+    if (local_err) {
        error_propagate(errp, local_err);
        ret = -EINVAL;
        goto fail;
@@ -376,7 +448,7 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,

    s->type = FTYPE_FILE;
    ret = raw_open_common(bs, options, flags, 0, &local_err);
-    if (error_is_set(&local_err)) {
+    if (local_err) {
        error_propagate(errp, local_err);
    }
    return ret;
@@ -463,7 +535,6 @@ static int raw_reopen_prepare(BDRVReopenState *state,
    return ret;
 }

-
 static void raw_reopen_commit(BDRVReopenState *state)
 {
    BDRVRawReopenState *raw_s = state->opaque;
@@ -499,23 +570,15 @@ static void raw_reopen_abort(BDRVReopenState *state)
    state->opaque = NULL;
 }

+static int raw_refresh_limits(BlockDriverState *bs)
+{
+    BDRVRawState *s = bs->opaque;

-/* XXX: use host sector size if necessary with:
-#ifdef DIOCGSECTORSIZE
-        {
-            unsigned int sectorsize = 512;
-            if (!ioctl(fd, DIOCGSECTORSIZE, &sectorsize) &&
-                sectorsize > bufsize)
-                bufsize = sectorsize;
-        }
-#endif
-#ifdef CONFIG_COCOA
-        uint32_t blockSize = 512;
-        if ( !ioctl( fd, DKIOCGETBLOCKSIZE, &blockSize ) && blockSize > bufsize) {
-            bufsize = blockSize;
-        }
-#endif
-*/
+    raw_probe_alignment(bs);
+    bs->bl.opt_mem_alignment = s->buf_align;
+
+    return 0;
+}

 static ssize_t handle_aiocb_ioctl(RawPosixAIOData *aiocb)
 {
@@ -1363,6 +1426,7 @@ static BlockDriver bdrv_file = {
    .bdrv_aio_writev = raw_aio_writev,
    .bdrv_aio_flush = raw_aio_flush,
    .bdrv_aio_discard = raw_aio_discard,
+    .bdrv_refresh_limits = raw_refresh_limits,

    .bdrv_truncate = raw_truncate,
    .bdrv_getlength = raw_getlength,
@@ -1533,7 +1597,7 @@ static int hdev_open(BlockDriverState *bs, QDict *options, int flags,

    ret = raw_open_common(bs, options, flags, 0, &local_err);
    if (ret < 0) {
-        if (error_is_set(&local_err)) {
+        if (local_err) {
            error_propagate(errp, local_err);
        }
        return ret;
@@ -1740,6 +1804,7 @@ static BlockDriver bdrv_host_device = {
    .bdrv_aio_writev	= raw_aio_writev,
    .bdrv_aio_flush	= raw_aio_flush,
    .bdrv_aio_discard   = hdev_aio_discard,
+    .bdrv_refresh_limits = raw_refresh_limits,

    .bdrv_truncate      = raw_truncate,
    .bdrv_getlength	= raw_getlength,
@@ -1767,7 +1832,7 @@ static int floppy_open(BlockDriverState *bs, QDict *options, int flags,
    /* open will not fail even if no floppy is inserted, so add O_NONBLOCK */
    ret = raw_open_common(bs, options, flags, O_NONBLOCK, &local_err);
    if (ret) {
-        if (error_is_set(&local_err)) {
+        if (local_err) {
            error_propagate(errp, local_err);
        }
        return ret;
@@ -1871,6 +1936,7 @@ static BlockDriver bdrv_host_floppy = {
    .bdrv_aio_readv     = raw_aio_readv,
    .bdrv_aio_writev    = raw_aio_writev,
    .bdrv_aio_flush	= raw_aio_flush,
+    .bdrv_refresh_limits = raw_refresh_limits,

    .bdrv_truncate      = raw_truncate,
    .bdrv_getlength      = raw_getlength,
@@ -1895,7 +1961,7 @@ static int cdrom_open(BlockDriverState *bs, QDict *options, int flags,

    /* open will not fail even if no CD is inserted, so add O_NONBLOCK */
    ret = raw_open_common(bs, options, flags, O_NONBLOCK, &local_err);
-    if (error_is_set(&local_err)) {
+    if (local_err) {
        error_propagate(errp, local_err);
    }
    return ret;
@@ -1981,6 +2047,7 @@ static BlockDriver bdrv_host_cdrom = {
    .bdrv_aio_readv     = raw_aio_readv,
    .bdrv_aio_writev    = raw_aio_writev,
    .bdrv_aio_flush	= raw_aio_flush,
+    .bdrv_refresh_limits = raw_refresh_limits,

    .bdrv_truncate      = raw_truncate,
    .bdrv_getlength      = raw_getlength,
@@ -2011,7 +2078,7 @@ static int cdrom_open(BlockDriverState *bs, QDict *options, int flags,

    ret = raw_open_common(bs, options, flags, 0, &local_err);
    if (ret) {
-        if (error_is_set(&local_err)) {
+        if (local_err) {
            error_propagate(errp, local_err);
        }
        return ret;
@@ -2110,6 +2177,7 @@ static BlockDriver bdrv_host_cdrom = {
    .bdrv_aio_readv     = raw_aio_readv,
    .bdrv_aio_writev    = raw_aio_writev,
    .bdrv_aio_flush	= raw_aio_flush,
+    .bdrv_refresh_limits = raw_refresh_limits,

    .bdrv_truncate      = raw_truncate,
    .bdrv_getlength      = raw_getlength,
--- a/block/raw-win32.c
+++ b/block/raw-win32.c
@@ -202,6 +202,35 @@ static int set_sparse(int fd)
 				 NULL, 0, NULL, 0, &returned, NULL);
 }

+static void raw_probe_alignment(BlockDriverState *bs)
+{
+    BDRVRawState *s = bs->opaque;
+    DWORD sectorsPerCluster, freeClusters, totalClusters, count;
+    DISK_GEOMETRY_EX dg;
+    BOOL status;
+
+    if (s->type == FTYPE_CD) {
+        bs->request_alignment = 2048;
+        return;
+    }
+    if (s->type == FTYPE_HARDDISK) {
+        status = DeviceIoControl(s->hfile, IOCTL_DISK_GET_DRIVE_GEOMETRY_EX,
+                                 NULL, 0, &dg, sizeof(dg), &count, NULL);
+        if (status != 0) {
+            bs->request_alignment = dg.Geometry.BytesPerSector;
+            return;
+        }
+        /* try GetDiskFreeSpace too */
+    }
+
+    if (s->drive_path[0]) {
+        GetDiskFreeSpace(s->drive_path, &sectorsPerCluster,
+                         &dg.Geometry.BytesPerSector,
+                         &freeClusters, &totalClusters);
+        bs->request_alignment = dg.Geometry.BytesPerSector;
+    }
+}
+
 static void raw_parse_flags(int flags, int *access_flags, DWORD *overlapped)
 {
    assert(access_flags != NULL);
@@ -248,9 +277,9 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,

    s->type = FTYPE_FILE;

-    opts = qemu_opts_create_nofail(&raw_runtime_opts);
+    opts = qemu_opts_create(&raw_runtime_opts, NULL, 0, &error_abort);
    qemu_opts_absorb_qdict(opts, options, &local_err);
-    if (error_is_set(&local_err)) {
+    if (local_err) {
        error_propagate(errp, local_err);
        ret = -EINVAL;
        goto fail;
@@ -269,6 +298,17 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,
        }
    }

+    if (filename[0] && filename[1] == ':') {
+        snprintf(s->drive_path, sizeof(s->drive_path), "%c:\\", filename[0]);
+    } else if (filename[0] == '\\' && filename[1] == '\\') {
+        s->drive_path[0] = 0;
+    } else {
+        /* Relative path.  */
+        char buf[MAX_PATH];
+        GetCurrentDirectory(MAX_PATH, buf);
+        snprintf(s->drive_path, sizeof(s->drive_path), "%c:\\", buf[0]);
+    }
+
    s->hfile = CreateFile(filename, access_flags,
                          FILE_SHARE_READ, NULL,
                          OPEN_EXISTING, overlapped, NULL);
@@ -293,6 +333,7 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,
        s->aio = aio;
    }

+    raw_probe_alignment(bs);
    ret = 0;
 fail:
    qemu_opts_del(opts);
@@ -550,9 +591,10 @@ static int hdev_open(BlockDriverState *bs, QDict *options, int flags,
    Error *local_err = NULL;
    const char *filename;

-    QemuOpts *opts = qemu_opts_create_nofail(&raw_runtime_opts);
+    QemuOpts *opts = qemu_opts_create(&raw_runtime_opts, NULL, 0,
+                                      &error_abort);
    qemu_opts_absorb_qdict(opts, options, &local_err);
-    if (error_is_set(&local_err)) {
+    if (local_err) {
        error_propagate(errp, local_err);
        ret = -EINVAL;
        goto done;
--- a/block/raw_bsd.c
+++ b/block/raw_bsd.c
@@ -90,6 +90,12 @@ static int raw_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
    return bdrv_get_info(bs->file, bdi);
 }

+static int raw_refresh_limits(BlockDriverState *bs)
+{
+    bs->bl = bs->file->bl;
+    return 0;
+}
+
 static int raw_truncate(BlockDriverState *bs, int64_t offset)
 {
    return bdrv_truncate(bs->file, offset);
@@ -140,7 +146,7 @@ static int raw_create(const char *filename, QEMUOptionParameter *options,
    int ret;

    ret = bdrv_create_file(filename, options, &local_err);
-    if (error_is_set(&local_err)) {
+    if (local_err) {
        error_propagate(errp, local_err);
    }
    return ret;
@@ -150,7 +156,6 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,
                    Error **errp)
 {
    bs->sg = bs->file->sg;
-    bs->bl = bs->file->bl;
    return 0;
 }

@@ -182,6 +187,7 @@ static BlockDriver bdrv_raw = {
    .bdrv_getlength       = &raw_getlength,
    .has_variable_length  = true,
    .bdrv_get_info        = &raw_get_info,
+    .bdrv_refresh_limits  = &raw_refresh_limits,
    .bdrv_is_inserted     = &raw_is_inserted,
    .bdrv_media_changed   = &raw_media_changed,
    .bdrv_eject           = &raw_eject,
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -95,18 +95,13 @@ typedef struct RADOSCB {
 #define RBD_FD_WRITE 1

 typedef struct BDRVRBDState {
-    int fds[2];
    rados_t cluster;
    rados_ioctx_t io_ctx;
    rbd_image_t image;
    char name[RBD_MAX_IMAGE_NAME_SIZE];
    char *snap;
-    int event_reader_pos;
-    RADOSCB *event_rcb;
 } BDRVRBDState;

-static void rbd_aio_bh_cb(void *opaque);
-
 static int qemu_rbd_next_tok(char *dst, int dst_len,
                             char *src, char delim,
                             const char *name,
@@ -369,9 +364,8 @@ static int qemu_rbd_create(const char *filename, QEMUOptionParameter *options,
 }

 /*
- * This aio completion is being called from qemu_rbd_aio_event_reader()
- * and runs in qemu context. It schedules a bh, but just in case the aio
- * was not cancelled before.
+ * This aio completion is being called from rbd_finish_bh() and runs in qemu
+ * BH context.
 */
 static void qemu_rbd_complete_aio(RADOSCB *rcb)
 {
@@ -401,36 +395,19 @@ static void qemu_rbd_complete_aio(RADOSCB *rcb)
            acb->ret = r;
        }
    }
-    /* Note that acb->bh can be NULL in case where the aio was cancelled */
-    acb->bh = qemu_bh_new(rbd_aio_bh_cb, acb);
-    qemu_bh_schedule(acb->bh);
+
    g_free(rcb);
-}

-/*
- * aio fd read handler. It runs in the qemu context and calls the
- * completion handling of completed rados aio operations.
- */
-static void qemu_rbd_aio_event_reader(void *opaque)
-{
-    BDRVRBDState *s = opaque;
+    if (acb->cmd == RBD_AIO_READ) {
+        qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
+    }
+    qemu_vfree(acb->bounce);
+    acb->common.cb(acb->common.opaque, (acb->ret > 0 ? 0 : acb->ret));
+    acb->status = 0;

-    ssize_t ret;
-
-    do {
-        char *p = (char *)&s->event_rcb;
-
-        /* now read the rcb pointer that was sent from a non qemu thread */
-        ret = read(s->fds[RBD_FD_READ], p + s->event_reader_pos,
-                   sizeof(s->event_rcb) - s->event_reader_pos);
-        if (ret > 0) {
-            s->event_reader_pos += ret;
-            if (s->event_reader_pos == sizeof(s->event_rcb)) {
-                s->event_reader_pos = 0;
-                qemu_rbd_complete_aio(s->event_rcb);
-            }
-        }
-    } while (ret < 0 && errno == EINTR);
+    if (!acb->cancelled) {
+        qemu_aio_release(acb);
+    }
 }

 /* TODO Convert to fine grained options */
@@ -461,9 +438,9 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
    const char *filename;
    int r;

-    opts = qemu_opts_create_nofail(&runtime_opts);
+    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
    qemu_opts_absorb_qdict(opts, options, &local_err);
-    if (error_is_set(&local_err)) {
+    if (local_err) {
        qerror_report_err(local_err);
        error_free(local_err);
        qemu_opts_del(opts);
@@ -538,23 +515,9 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,

    bs->read_only = (s->snap != NULL);

-    s->event_reader_pos = 0;
-    r = qemu_pipe(s->fds);
-    if (r < 0) {
-        error_report("error opening eventfd");
-        goto failed;
-    }
-    fcntl(s->fds[0], F_SETFL, O_NONBLOCK);
-    fcntl(s->fds[1], F_SETFL, O_NONBLOCK);
-    qemu_aio_set_fd_handler(s->fds[RBD_FD_READ], qemu_rbd_aio_event_reader,
-                            NULL, s);
-
-
    qemu_opts_del(opts);
    return 0;

-failed:
-    rbd_close(s->image);
 failed_open:
    rados_ioctx_destroy(s->io_ctx);
 failed_shutdown:
@@ -569,10 +532,6 @@ static void qemu_rbd_close(BlockDriverState *bs)
 {
    BDRVRBDState *s = bs->opaque;

-    close(s->fds[0]);
-    close(s->fds[1]);
-    qemu_aio_set_fd_handler(s->fds[RBD_FD_READ], NULL, NULL, NULL);
-
    rbd_close(s->image);
    rados_ioctx_destroy(s->io_ctx);
    g_free(s->snap);
@@ -600,34 +559,11 @@ static const AIOCBInfo rbd_aiocb_info = {
    .cancel = qemu_rbd_aio_cancel,
 };

-static int qemu_rbd_send_pipe(BDRVRBDState *s, RADOSCB *rcb)
+static void rbd_finish_bh(void *opaque)
 {
-    int ret = 0;
-    while (1) {
-        fd_set wfd;
-        int fd = s->fds[RBD_FD_WRITE];
-
-        /* send the op pointer to the qemu thread that is responsible
-           for the aio/op completion. Must do it in a qemu thread context */
-        ret = write(fd, (void *)&rcb, sizeof(rcb));
-        if (ret >= 0) {
-            break;
-        }
-        if (errno == EINTR) {
-            continue;
-        }
-        if (errno != EAGAIN) {
-            break;
-        }
-
-        FD_ZERO(&wfd);
-        FD_SET(fd, &wfd);
-        do {
-            ret = select(fd + 1, NULL, &wfd, NULL, NULL);
-        } while (ret < 0 && errno == EINTR);
-    }
-
-    return ret;
+    RADOSCB *rcb = opaque;
+    qemu_bh_delete(rcb->acb->bh);
+    qemu_rbd_complete_aio(rcb);
 }

 /*
@@ -635,40 +571,18 @@ static int qemu_rbd_send_pipe(BDRVRBDState *s, RADOSCB *rcb)
 *
 * Note: this function is being called from a non qemu thread so
 * we need to be careful about what we do here. Generally we only
- * write to the block notification pipe, and do the rest of the
- * io completion handling from qemu_rbd_aio_event_reader() which
- * runs in a qemu context.
+ * schedule a BH, and do the rest of the io completion handling
+ * from rbd_finish_bh() which runs in a qemu context.
 */
 static void rbd_finish_aiocb(rbd_completion_t c, RADOSCB *rcb)
 {
-    int ret;
+    RBDAIOCB *acb = rcb->acb;
+
    rcb->ret = rbd_aio_get_return_value(c);
    rbd_aio_release(c);
-    ret = qemu_rbd_send_pipe(rcb->s, rcb);
-    if (ret < 0) {
-        error_report("failed writing to acb->s->fds");
-        g_free(rcb);
-    }
-}

-/* Callback when all queued rbd_aio requests are complete */
-
-static void rbd_aio_bh_cb(void *opaque)
-{
-    RBDAIOCB *acb = opaque;
-
-    if (acb->cmd == RBD_AIO_READ) {
-        qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
-    }
-    qemu_vfree(acb->bounce);
-    acb->common.cb(acb->common.opaque, (acb->ret > 0 ? 0 : acb->ret));
-    qemu_bh_delete(acb->bh);
-    acb->bh = NULL;
-    acb->status = 0;
-
-    if (!acb->cancelled) {
-        qemu_aio_release(acb);
-    }
+    acb->bh = qemu_bh_new(rbd_finish_bh, rcb);
+    qemu_bh_schedule(acb->bh);
 }

 static int rbd_aio_discard_wrapper(rbd_image_t image,
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -161,7 +161,7 @@ typedef struct SheepdogVdiReq {
    uint32_t id;
    uint32_t data_length;
    uint64_t vdi_size;
-    uint32_t vdi_id;
+    uint32_t base_vdi_id;
    uint8_t copies;
    uint8_t copy_policy;
    uint8_t reserved[2];
@@ -1383,9 +1383,9 @@ static int sd_open(BlockDriverState *bs, QDict *options, int flags,

    s->bs = bs;

-    opts = qemu_opts_create_nofail(&runtime_opts);
+    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
    qemu_opts_absorb_qdict(opts, options, &local_err);
-    if (error_is_set(&local_err)) {
+    if (local_err) {
        qerror_report_err(local_err);
        error_free(local_err);
        ret = -EINVAL;
@@ -1493,7 +1493,7 @@ static int do_sd_create(BDRVSheepdogState *s, uint32_t *vdi_id, int snapshot)

    memset(&hdr, 0, sizeof(hdr));
    hdr.opcode = SD_OP_NEW_VDI;
-    hdr.vdi_id = s->inode.vdi_id;
+    hdr.base_vdi_id = s->inode.vdi_id;

    wlen = SD_MAX_VDI_LEN;

@@ -1534,7 +1534,8 @@ static int sd_prealloc(const char *filename)
    Error *local_err = NULL;
    int ret;

-    ret = bdrv_file_open(&bs, filename, NULL, BDRV_O_RDWR, &local_err);
+    ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL,
+                    NULL, &local_err);
    if (ret < 0) {
        qerror_report_err(local_err);
        error_free(local_err);
@@ -1666,9 +1667,11 @@ static int sd_create(const char *filename, QEMUOptionParameter *options,
                goto out;
            }
        } else if (!strcmp(options->name, BLOCK_OPT_REDUNDANCY)) {
-            ret = parse_redundancy(s, options->value.s);
-            if (ret < 0) {
-                goto out;
+            if (options->value.s) {
+                ret = parse_redundancy(s, options->value.s);
+                if (ret < 0) {
+                    goto out;
+                }
            }
        }
        options++;
@@ -1682,7 +1685,7 @@ static int sd_create(const char *filename, QEMUOptionParameter *options,

    if (backing_file) {
        BlockDriverState *bs;
-        BDRVSheepdogState *s;
+        BDRVSheepdogState *base;
        BlockDriver *drv;

        /* Currently, only Sheepdog backing image is supported. */
@@ -1693,22 +1696,24 @@ static int sd_create(const char *filename, QEMUOptionParameter *options,
            goto out;
        }

-        ret = bdrv_file_open(&bs, backing_file, NULL, 0, &local_err);
+        bs = NULL;
+        ret = bdrv_open(&bs, backing_file, NULL, NULL, BDRV_O_PROTOCOL, NULL,
+                        &local_err);
        if (ret < 0) {
            qerror_report_err(local_err);
            error_free(local_err);
            goto out;
        }

-        s = bs->opaque;
+        base = bs->opaque;

-        if (!is_snapshot(&s->inode)) {
+        if (!is_snapshot(&base->inode)) {
            error_report("cannot clone from a non snapshot vdi");
            bdrv_unref(bs);
            ret = -EINVAL;
            goto out;
        }
-
+        s->inode.vdi_id = base->inode.vdi_id;
        bdrv_unref(bs);
    }

@@ -1741,7 +1746,7 @@ static void sd_close(BlockDriverState *bs)
    memset(&hdr, 0, sizeof(hdr));

    hdr.opcode = SD_OP_RELEASE_VDI;
-    hdr.vdi_id = s->inode.vdi_id;
+    hdr.base_vdi_id = s->inode.vdi_id;
    wlen = strlen(s->name) + 1;
    hdr.data_length = wlen;
    hdr.flags = SD_FLAG_CMD_WRITE;
@@ -1844,7 +1849,7 @@ static bool sd_delete(BDRVSheepdogState *s)
    unsigned int wlen = SD_MAX_VDI_LEN, rlen = 0;
    SheepdogVdiReq hdr = {
        .opcode = SD_OP_DEL_VDI,
-        .vdi_id = s->inode.vdi_id,
+        .base_vdi_id = s->inode.vdi_id,
        .data_length = wlen,
        .flags = SD_FLAG_CMD_WRITE,
    };
@@ -2046,13 +2051,14 @@ static coroutine_fn int sd_co_writev(BlockDriverState *bs, int64_t sector_num,
 {
    SheepdogAIOCB *acb;
    int ret;
+    int64_t offset = (sector_num + nb_sectors) * BDRV_SECTOR_SIZE;
+    BDRVSheepdogState *s = bs->opaque;

-    if (bs->growable && sector_num + nb_sectors > bs->total_sectors) {
-        ret = sd_truncate(bs, (sector_num + nb_sectors) * BDRV_SECTOR_SIZE);
+    if (bs->growable && offset > s->inode.vdi_size) {
+        ret = sd_truncate(bs, offset);
        if (ret < 0) {
            return ret;
        }
-        bs->total_sectors = sector_num + nb_sectors;
    }

    acb = sd_aio_setup(bs, qiov, sector_num, nb_sectors);
@@ -2439,11 +2445,12 @@ sd_co_get_block_status(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
 {
    BDRVSheepdogState *s = bs->opaque;
    SheepdogInode *inode = &s->inode;
-    unsigned long start = sector_num * BDRV_SECTOR_SIZE / SD_DATA_OBJ_SIZE,
+    uint64_t offset = sector_num * BDRV_SECTOR_SIZE;
+    unsigned long start = offset / SD_DATA_OBJ_SIZE,
                  end = DIV_ROUND_UP((sector_num + nb_sectors) *
                                     BDRV_SECTOR_SIZE, SD_DATA_OBJ_SIZE);
    unsigned long idx;
-    int64_t ret = BDRV_BLOCK_DATA;
+    int64_t ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | offset;

    for (idx = start; idx < end; idx++) {
        if (inode->data_vdi_id[idx] == 0) {
--- a/block/snapshot.c
+++ b/block/snapshot.c
@@ -345,7 +345,7 @@ int bdrv_snapshot_load_tmp_by_id_or_name(BlockDriverState *bs,
        ret = bdrv_snapshot_load_tmp(bs, NULL, id_or_name, &local_err);
    }

-    if (error_is_set(&local_err)) {
+    if (local_err) {
        error_propagate(errp, local_err);
    }

--- a/block/stream.c
+++ b/block/stream.c
@@ -75,6 +75,8 @@ static void close_unused_images(BlockDriverState *top, BlockDriverState *base,
        unused->backing_hd = NULL;
        bdrv_unref(unused);
    }
+
+    bdrv_refresh_limits(top);
 }

 static void coroutine_fn stream_run(void *opaque)
--- a/block/vdi.c
+++ b/block/vdi.c
@@ -395,43 +395,50 @@ static int vdi_open(BlockDriverState *bs, QDict *options, int flags,
    }

    if (header.signature != VDI_SIGNATURE) {
-        logout("bad vdi signature %08x\n", header.signature);
-        ret = -EMEDIUMTYPE;
+        error_setg(errp, "Image not in VDI format (bad signature %08x)", header.signature);
+        ret = -EINVAL;
        goto fail;
    } else if (header.version != VDI_VERSION_1_1) {
-        logout("unsupported version %u.%u\n",
-               header.version >> 16, header.version & 0xffff);
+        error_setg(errp, "unsupported VDI image (version %u.%u)",
+                   header.version >> 16, header.version & 0xffff);
        ret = -ENOTSUP;
        goto fail;
    } else if (header.offset_bmap % SECTOR_SIZE != 0) {
        /* We only support block maps which start on a sector boundary. */
-        logout("unsupported block map offset 0x%x B\n", header.offset_bmap);
+        error_setg(errp, "unsupported VDI image (unaligned block map offset "
+                   "0x%x)", header.offset_bmap);
        ret = -ENOTSUP;
        goto fail;
    } else if (header.offset_data % SECTOR_SIZE != 0) {
        /* We only support data blocks which start on a sector boundary. */
-        logout("unsupported data offset 0x%x B\n", header.offset_data);
+        error_setg(errp, "unsupported VDI image (unaligned data offset 0x%x)",
+                   header.offset_data);
        ret = -ENOTSUP;
        goto fail;
    } else if (header.sector_size != SECTOR_SIZE) {
-        logout("unsupported sector size %u B\n", header.sector_size);
+        error_setg(errp, "unsupported VDI image (sector size %u is not %u)",
+                   header.sector_size, SECTOR_SIZE);
        ret = -ENOTSUP;
        goto fail;
    } else if (header.block_size != 1 * MiB) {
-        logout("unsupported block size %u B\n", header.block_size);
+        error_setg(errp, "unsupported VDI image (sector size %u is not %u)",
+                   header.block_size, 1 * MiB);
        ret = -ENOTSUP;
        goto fail;
    } else if (header.disk_size >
               (uint64_t)header.blocks_in_image * header.block_size) {
-        logout("unsupported disk size %" PRIu64 " B\n", header.disk_size);
+        error_setg(errp, "unsupported VDI image (disk size %" PRIu64 ", "
+                   "image bitmap has room for %" PRIu64 ")",
+                   header.disk_size,
+                   (uint64_t)header.blocks_in_image * header.block_size);
        ret = -ENOTSUP;
        goto fail;
    } else if (!uuid_is_null(header.uuid_link)) {
-        logout("link uuid != 0, unsupported\n");
+        error_setg(errp, "unsupported VDI image (non-NULL link UUID)");
        ret = -ENOTSUP;
        goto fail;
    } else if (!uuid_is_null(header.uuid_parent)) {
-        logout("parent uuid != 0, unsupported\n");
+        error_setg(errp, "unsupported VDI image (non-NULL parent UUID)");
        ret = -ENOTSUP;
        goto fail;
    }
--- a/block/vhdx-log.c
+++ b/block/vhdx-log.c
@@ -706,7 +706,8 @@ exit:
 *
 * If read-only, we must replay the log in RAM (or refuse to open
 * a dirty VHDX file read-only) */
-int vhdx_parse_log(BlockDriverState *bs, BDRVVHDXState *s, bool *flushed)
+int vhdx_parse_log(BlockDriverState *bs, BDRVVHDXState *s, bool *flushed,
+                   Error **errp)
 {
    int ret = 0;
    VHDXHeader *hdr;
@@ -761,6 +762,16 @@ int vhdx_parse_log(BlockDriverState *bs, BDRVVHDXState *s, bool *flushed)
    }

    if (logs.valid) {
+        if (bs->read_only) {
+            ret = -EPERM;
+            error_setg_errno(errp, EPERM,
+                             "VHDX image file '%s' opened read-only, but "
+                             "contains a log that needs to be replayed.  To "
+                             "replay the log, execute:\n qemu-img check -r "
+                             "all '%s'",
+                             bs->filename, bs->filename);
+            goto exit;
+        }
        /* now flush the log */
        ret = vhdx_log_flush(bs, s, &logs);
        if (ret < 0) {
@@ -954,8 +965,8 @@ static int vhdx_log_write(BlockDriverState *bs, BDRVVHDXState *s,
    cpu_to_le32s((uint32_t *)(buffer + 4));

    /* now write to the log */
-    vhdx_log_write_sectors(bs, &s->log, &sectors_written, buffer,
-                           desc_sectors + sectors);
+    ret = vhdx_log_write_sectors(bs, &s->log, &sectors_written, buffer,
+                                 desc_sectors + sectors);
    if (ret < 0) {
        goto exit;
    }
--- a/block/vhdx.c
+++ b/block/vhdx.c
@@ -374,7 +374,7 @@ static int vhdx_update_header(BlockDriverState *bs, BDRVVHDXState *s,
        inactive_header->log_guid = *log_guid;
    }

-    vhdx_write_header(bs->file, inactive_header, header_offset, true);
+    ret = vhdx_write_header(bs->file, inactive_header, header_offset, true);
    if (ret < 0) {
        goto exit;
    }
@@ -402,9 +402,10 @@ int vhdx_update_headers(BlockDriverState *bs, BDRVVHDXState *s,
 }

 /* opens the specified header block from the VHDX file header section */
-static int vhdx_parse_header(BlockDriverState *bs, BDRVVHDXState *s)
+static void vhdx_parse_header(BlockDriverState *bs, BDRVVHDXState *s,
+                              Error **errp)
 {
-    int ret = 0;
+    int ret;
    VHDXHeader *header1;
    VHDXHeader *header2;
    bool h1_valid = false;
@@ -462,7 +463,6 @@ static int vhdx_parse_header(BlockDriverState *bs, BDRVVHDXState *s)
    } else if (!h1_valid && h2_valid) {
        s->curr_header = 1;
    } else if (!h1_valid && !h2_valid) {
-        ret = -EINVAL;
        goto fail;
    } else {
        /* If both headers are valid, then we choose the active one by the
@@ -473,27 +473,22 @@ static int vhdx_parse_header(BlockDriverState *bs, BDRVVHDXState *s)
        } else if (h2_seq > h1_seq) {
            s->curr_header = 1;
        } else {
-            ret = -EINVAL;
            goto fail;
        }
    }

    vhdx_region_register(s, s->headers[s->curr_header]->log_offset,
                            s->headers[s->curr_header]->log_length);
-
-    ret = 0;
-
    goto exit;

 fail:
-    qerror_report(ERROR_CLASS_GENERIC_ERROR, "No valid VHDX header found");
+    error_setg_errno(errp, -ret, "No valid VHDX header found");
    qemu_vfree(header1);
    qemu_vfree(header2);
    s->headers[0] = NULL;
    s->headers[1] = NULL;
 exit:
    qemu_vfree(buffer);
-    return ret;
 }


@@ -878,8 +873,7 @@ static int vhdx_open(BlockDriverState *bs, QDict *options, int flags,
    int ret = 0;
    uint32_t i;
    uint64_t signature;
-    bool log_flushed = false;
-
+    Error *local_err = NULL;

    s->bat = NULL;
    s->first_visible_write = true;
@@ -902,12 +896,14 @@ static int vhdx_open(BlockDriverState *bs, QDict *options, int flags,
     * header update */
    vhdx_guid_generate(&s->session_guid);

-    ret = vhdx_parse_header(bs, s);
-    if (ret < 0) {
+    vhdx_parse_header(bs, s, &local_err);
+    if (local_err != NULL) {
+        error_propagate(errp, local_err);
+        ret = -EINVAL;
        goto fail;
    }

-    ret = vhdx_parse_log(bs, s, &log_flushed);
+    ret = vhdx_parse_log(bs, s, &s->log_replayed_on_open, errp);
    if (ret < 0) {
        goto fail;
    }
@@ -1798,7 +1794,9 @@ static int vhdx_create(const char *filename, QEMUOptionParameter *options,
        goto exit;
    }

-    ret = bdrv_file_open(&bs, filename, NULL, BDRV_O_RDWR, &local_err);
+    bs = NULL;
+    ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL,
+                    NULL, &local_err);
    if (ret < 0) {
        error_propagate(errp, local_err);
        goto exit;
@@ -1811,13 +1809,13 @@ static int vhdx_create(const char *filename, QEMUOptionParameter *options,
    creator = g_utf8_to_utf16("QEMU v" QEMU_VERSION, -1, NULL,
                              &creator_items, NULL);
    signature = cpu_to_le64(VHDX_FILE_SIGNATURE);
-    bdrv_pwrite(bs, VHDX_FILE_ID_OFFSET, &signature, sizeof(signature));
+    ret = bdrv_pwrite(bs, VHDX_FILE_ID_OFFSET, &signature, sizeof(signature));
    if (ret < 0) {
        goto delete_and_exit;
    }
    if (creator) {
-        bdrv_pwrite(bs, VHDX_FILE_ID_OFFSET + sizeof(signature), creator,
-                    creator_items * sizeof(gunichar2));
+        ret = bdrv_pwrite(bs, VHDX_FILE_ID_OFFSET + sizeof(signature),
+                          creator, creator_items * sizeof(gunichar2));
        if (ret < 0) {
            goto delete_and_exit;
        }
@@ -1854,6 +1852,24 @@ exit:
    return ret;
 }

+/* If opened r/w, the VHDX driver will automatically replay the log,
+ * if one is present, inside the vhdx_open() call.
+ *
+ * If qemu-img check -r all is called, the image is automatically opened
+ * r/w and any log has already been replayed, so there is nothing (currently)
+ * for us to do here
+ */
+static int vhdx_check(BlockDriverState *bs, BdrvCheckResult *result,
+                       BdrvCheckMode fix)
+{
+    BDRVVHDXState *s = bs->opaque;
+
+    if (s->log_replayed_on_open) {
+        result->corruptions_fixed++;
+    }
+    return 0;
+}
+
 static QEMUOptionParameter vhdx_create_options[] = {
    {
        .name = BLOCK_OPT_SIZE,
@@ -1898,6 +1914,7 @@ static BlockDriver bdrv_vhdx = {
    .bdrv_co_writev         = vhdx_co_writev,
    .bdrv_create            = vhdx_create,
    .bdrv_get_info          = vhdx_get_info,
+    .bdrv_check             = vhdx_check,

    .create_options         = vhdx_create_options,
 };
--- a/block/vhdx.h
+++ b/block/vhdx.h
@@ -394,6 +394,8 @@ typedef struct BDRVVHDXState {

    Error *migration_blocker;

+    bool log_replayed_on_open;
+
    QLIST_HEAD(VHDXRegionHead, VHDXRegionEntry) regions;
 } BDRVVHDXState;

@@ -408,7 +410,8 @@ uint32_t vhdx_checksum_calc(uint32_t crc, uint8_t *buf, size_t size,

 bool vhdx_checksum_is_valid(uint8_t *buf, size_t size, int crc_offset);

-int vhdx_parse_log(BlockDriverState *bs, BDRVVHDXState *s, bool *flushed);
+int vhdx_parse_log(BlockDriverState *bs, BDRVVHDXState *s, bool *flushed,
+                   Error **errp);

 int vhdx_log_write_and_flush(BlockDriverState *bs, BDRVVHDXState *s,
                             void *data, uint32_t length, uint64_t offset);
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -428,10 +428,6 @@ static int vmdk_add_extent(BlockDriverState *bs,
    extent->l2_size = l2_size;
    extent->cluster_sectors = flat ? sectors : cluster_sectors;

-    if (!flat) {
-        bs->bl.write_zeroes_alignment =
-            MAX(bs->bl.write_zeroes_alignment, cluster_sectors);
-    }
    if (s->num_extents > 1) {
        extent->end_sector = (*(extent - 1)).end_sector + extent->sectors;
    } else {
@@ -530,8 +526,34 @@ static int vmdk_open_vmfs_sparse(BlockDriverState *bs,
    return ret;
 }

-static int vmdk_open_desc_file(BlockDriverState *bs, int flags,
-                               uint64_t desc_offset, Error **errp);
+static int vmdk_open_desc_file(BlockDriverState *bs, int flags, char *buf,
+                               Error **errp);
+
+static char *vmdk_read_desc(BlockDriverState *file, uint64_t desc_offset,
+                            Error **errp)
+{
+    int64_t size;
+    char *buf;
+    int ret;
+
+    size = bdrv_getlength(file);
+    if (size < 0) {
+        error_setg_errno(errp, -size, "Could not access file");
+        return NULL;
+    }
+
+    size = MIN(size, 1 << 20);  /* avoid unbounded allocation */
+    buf = g_malloc0(size + 1);
+
+    ret = bdrv_pread(file, desc_offset, buf, size);
+    if (ret < 0) {
+        error_setg_errno(errp, -ret, "Could not read from file");
+        g_free(buf);
+        return NULL;
+    }
+
+    return buf;
+}

 static int vmdk_open_vmdk4(BlockDriverState *bs,
                           BlockDriverState *file,
@@ -550,11 +572,18 @@ static int vmdk_open_vmdk4(BlockDriverState *bs,
        error_setg_errno(errp, -ret,
                         "Could not read header from file '%s'",
                         file->filename);
+        return -EINVAL;
    }
    if (header.capacity == 0) {
        uint64_t desc_offset = le64_to_cpu(header.desc_offset);
        if (desc_offset) {
-            return vmdk_open_desc_file(bs, flags, desc_offset << 9, errp);
+            char *buf = vmdk_read_desc(file, desc_offset << 9, errp);
+            if (!buf) {
+                return -EINVAL;
+            }
+            ret = vmdk_open_desc_file(bs, flags, buf, errp);
+            g_free(buf);
+            return ret;
        }
    }

@@ -613,8 +642,8 @@ static int vmdk_open_vmdk4(BlockDriverState *bs,
        char buf[64];
        snprintf(buf, sizeof(buf), "VMDK version %d",
                 le32_to_cpu(header.version));
-        qerror_report(QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
-                bs->device_name, "vmdk", buf);
+        error_set(errp, QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
+                  bs->device_name, "vmdk", buf);
        return -ENOTSUP;
    } else if (le32_to_cpu(header.version) == 3 && (flags & BDRV_O_RDWR)) {
        /* VMware KB 2064959 explains that version 3 added support for
@@ -626,7 +655,7 @@ static int vmdk_open_vmdk4(BlockDriverState *bs,
    }

    if (le32_to_cpu(header.num_gtes_per_gt) > 512) {
-        error_report("L2 table size too big");
+        error_setg(errp, "L2 table size too big");
        return -EINVAL;
    }

@@ -640,6 +669,13 @@ static int vmdk_open_vmdk4(BlockDriverState *bs,
    if (le32_to_cpu(header.flags) & VMDK4_FLAG_RGD) {
        l1_backup_offset = le64_to_cpu(header.rgd_offset) << 9;
    }
+    if (bdrv_getlength(file) <
+            le64_to_cpu(header.grain_offset) * BDRV_SECTOR_SIZE) {
+        error_setg(errp, "File truncated, expecting at least %lld bytes",
+                   le64_to_cpu(header.grain_offset) * BDRV_SECTOR_SIZE);
+        return -EINVAL;
+    }
+
    ret = vmdk_add_extent(bs, file, false,
                          le64_to_cpu(header.capacity),
                          le64_to_cpu(header.gd_offset) << 9,
@@ -654,6 +690,10 @@ static int vmdk_open_vmdk4(BlockDriverState *bs,
    }
    extent->compressed =
        le16_to_cpu(header.compressAlgorithm) == VMDK4_COMPRESSION_DEFLATE;
+    if (extent->compressed) {
+        g_free(s->create_type);
+        s->create_type = g_strdup("streamOptimized");
+    }
    extent->has_marker = le32_to_cpu(header.flags) & VMDK4_FLAG_MARKER;
    extent->version = le32_to_cpu(header.version);
    extent->has_zero_grain = le32_to_cpu(header.flags) & VMDK4_FLAG_ZERO_GRAIN;
@@ -694,16 +734,12 @@ static int vmdk_parse_description(const char *desc, const char *opt_name,

 /* Open an extent file and append to bs array */
 static int vmdk_open_sparse(BlockDriverState *bs,
-                            BlockDriverState *file,
-                            int flags, Error **errp)
+                            BlockDriverState *file, int flags,
+                            char *buf, Error **errp)
 {
    uint32_t magic;

-    if (bdrv_pread(file, 0, &magic, sizeof(magic)) != sizeof(magic)) {
-        return -EIO;
-    }
-
-    magic = be32_to_cpu(magic);
+    magic = ldl_be_p(buf);
    switch (magic) {
        case VMDK3_MAGIC:
            return vmdk_open_vmfs_sparse(bs, file, flags, errp);
@@ -712,7 +748,8 @@ static int vmdk_open_sparse(BlockDriverState *bs,
            return vmdk_open_vmdk4(bs, file, flags, errp);
            break;
        default:
-            return -EMEDIUMTYPE;
+            error_setg(errp, "Image not in VMDK format");
+            return -EINVAL;
            break;
    }
 }
@@ -749,9 +786,14 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
                return -EINVAL;
            }
        } else if (!strcmp(type, "VMFS")) {
-            flat_offset = 0;
+            if (ret == 4) {
+                flat_offset = 0;
+            } else {
+                error_setg(errp, "Invalid extent lines:\n%s", p);
+                return -EINVAL;
+            }
        } else if (ret != 4) {
-            error_setg(errp, "Invalid extent lines: \n%s", p);
+            error_setg(errp, "Invalid extent lines:\n%s", p);
            return -EINVAL;
        }

@@ -764,8 +806,9 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,

        path_combine(extent_path, sizeof(extent_path),
                desc_file_path, fname);
-        ret = bdrv_file_open(&extent_file, extent_path, NULL, bs->open_flags,
-                             errp);
+        extent_file = NULL;
+        ret = bdrv_open(&extent_file, extent_path, NULL, NULL,
+                        bs->open_flags | BDRV_O_PROTOCOL, NULL, errp);
        if (ret) {
            return ret;
        }
@@ -782,8 +825,14 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
            extent->flat_start_offset = flat_offset << 9;
        } else if (!strcmp(type, "SPARSE") || !strcmp(type, "VMFSSPARSE")) {
            /* SPARSE extent and VMFSSPARSE extent are both "COWD" sparse file*/
-            ret = vmdk_open_sparse(bs, extent_file, bs->open_flags, errp);
+            char *buf = vmdk_read_desc(extent_file, 0, errp);
+            if (!buf) {
+                ret = -EINVAL;
+            } else {
+                ret = vmdk_open_sparse(bs, extent_file, bs->open_flags, buf, errp);
+            }
            if (ret) {
+                g_free(buf);
                bdrv_unref(extent_file);
                return ret;
            }
@@ -806,29 +855,16 @@ next_line:
    return 0;
 }

-static int vmdk_open_desc_file(BlockDriverState *bs, int flags,
-                               uint64_t desc_offset, Error **errp)
+static int vmdk_open_desc_file(BlockDriverState *bs, int flags, char *buf,
+                               Error **errp)
 {
    int ret;
-    char *buf = NULL;
    char ct[128];
    BDRVVmdkState *s = bs->opaque;
-    int64_t size;

-    size = bdrv_getlength(bs->file);
-    if (size < 0) {
-        return -EINVAL;
-    }
-
-    size = MIN(size, 1 << 20);  /* avoid unbounded allocation */
-    buf = g_malloc0(size + 1);
-
-    ret = bdrv_pread(bs->file, desc_offset, buf, size);
-    if (ret < 0) {
-        goto exit;
-    }
    if (vmdk_parse_description(buf, "createType", ct, sizeof(ct))) {
-        ret = -EMEDIUMTYPE;
+        error_setg(errp, "invalid VMDK image descriptor");
+        ret = -EINVAL;
        goto exit;
    }
    if (strcmp(ct, "monolithicFlat") &&
@@ -844,24 +880,37 @@ static int vmdk_open_desc_file(BlockDriverState *bs, int flags,
    s->desc_offset = 0;
    ret = vmdk_parse_extents(buf, bs, bs->file->filename, errp);
 exit:
-    g_free(buf);
    return ret;
 }

 static int vmdk_open(BlockDriverState *bs, QDict *options, int flags,
                     Error **errp)
 {
+    char *buf = NULL;
    int ret;
    BDRVVmdkState *s = bs->opaque;
+    uint32_t magic;

-    if (vmdk_open_sparse(bs, bs->file, flags, errp) == 0) {
-        s->desc_offset = 0x200;
-    } else {
-        ret = vmdk_open_desc_file(bs, flags, 0, errp);
-        if (ret) {
-            goto fail;
-        }
+    buf = vmdk_read_desc(bs->file, 0, errp);
+    if (!buf) {
+        return -EINVAL;
    }
+
+    magic = ldl_be_p(buf);
+    switch (magic) {
+        case VMDK3_MAGIC:
+        case VMDK4_MAGIC:
+            ret = vmdk_open_sparse(bs, bs->file, flags, buf, errp);
+            s->desc_offset = 0x200;
+            break;
+        default:
+            ret = vmdk_open_desc_file(bs, flags, buf, errp);
+            break;
+    }
+    if (ret) {
+        goto fail;
+    }
+
    /* try to open parent images, if exist */
    ret = vmdk_parent_open(bs);
    if (ret) {
@@ -876,16 +925,34 @@ static int vmdk_open(BlockDriverState *bs, QDict *options, int flags,
              QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
              "vmdk", bs->device_name, "live migration");
    migrate_add_blocker(s->migration_blocker);
-
+    g_free(buf);
    return 0;

 fail:
+    g_free(buf);
    g_free(s->create_type);
    s->create_type = NULL;
    vmdk_free_extents(bs);
    return ret;
 }

+
+static int vmdk_refresh_limits(BlockDriverState *bs)
+{
+    BDRVVmdkState *s = bs->opaque;
+    int i;
+
+    for (i = 0; i < s->num_extents; i++) {
+        if (!s->extents[i].flat) {
+            bs->bl.write_zeroes_alignment =
+                MAX(bs->bl.write_zeroes_alignment,
+                    s->extents[i].cluster_sectors);
+        }
+    }
+
+    return 0;
+}
+
 static int get_whole_cluster(BlockDriverState *bs,
                VmdkExtent *extent,
                uint64_t cluster_offset,
@@ -1117,7 +1184,7 @@ static int64_t coroutine_fn vmdk_co_get_block_status(BlockDriverState *bs,
        break;
    case VMDK_OK:
        ret = BDRV_BLOCK_DATA;
-        if (extent->file == bs->file) {
+        if (extent->file == bs->file && !extent->compressed) {
            ret |= BDRV_BLOCK_OFFSET_VALID | offset;
        }

@@ -1320,8 +1387,8 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
 {
    BDRVVmdkState *s = bs->opaque;
    VmdkExtent *extent = NULL;
-    int n, ret;
-    int64_t index_in_cluster;
+    int ret;
+    int64_t index_in_cluster, n;
    uint64_t extent_begin_sector, extent_relative_sector_num;
    uint64_t cluster_offset;
    VmdkMetaData m_data;
@@ -1447,23 +1514,35 @@ static int coroutine_fn vmdk_co_write_zeroes(BlockDriverState *bs,
 }

 static int vmdk_create_extent(const char *filename, int64_t filesize,
-                              bool flat, bool compress, bool zeroed_grain)
+                              bool flat, bool compress, bool zeroed_grain,
+                              Error **errp)
 {
    int ret, i;
-    int fd = 0;
+    BlockDriverState *bs = NULL;
    VMDK4Header header;
-    uint32_t tmp, magic, grains, gd_size, gt_size, gt_count;
+    Error *local_err;
+    uint32_t tmp, magic, grains, gd_sectors, gt_size, gt_count;
+    uint32_t *gd_buf = NULL;
+    int gd_buf_size;

-    fd = qemu_open(filename,
-                   O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE,
-                   0644);
-    if (fd < 0) {
-        return -errno;
+    ret = bdrv_create_file(filename, NULL, &local_err);
+    if (ret < 0) {
+        error_propagate(errp, local_err);
+        goto exit;
    }
+
+    assert(bs == NULL);
+    ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL,
+                    NULL, &local_err);
+    if (ret < 0) {
+        error_propagate(errp, local_err);
+        goto exit;
+    }
+
    if (flat) {
-        ret = ftruncate(fd, filesize);
+        ret = bdrv_truncate(bs, filesize);
        if (ret < 0) {
-            ret = -errno;
+            error_setg_errno(errp, -ret, "Could not truncate file");
        }
        goto exit;
    }
@@ -1474,24 +1553,23 @@ static int vmdk_create_extent(const char *filename, int64_t filesize,
                   | (compress ? VMDK4_FLAG_COMPRESS | VMDK4_FLAG_MARKER : 0)
                   | (zeroed_grain ? VMDK4_FLAG_ZERO_GRAIN : 0);
    header.compressAlgorithm = compress ? VMDK4_COMPRESSION_DEFLATE : 0;
-    header.capacity = filesize / 512;
+    header.capacity = filesize / BDRV_SECTOR_SIZE;
    header.granularity = 128;
-    header.num_gtes_per_gt = 512;
+    header.num_gtes_per_gt = BDRV_SECTOR_SIZE;

-    grains = (filesize / 512 + header.granularity - 1) / header.granularity;
-    gt_size = ((header.num_gtes_per_gt * sizeof(uint32_t)) + 511) >> 9;
-    gt_count =
-        (grains + header.num_gtes_per_gt - 1) / header.num_gtes_per_gt;
-    gd_size = (gt_count * sizeof(uint32_t) + 511) >> 9;
+    grains = DIV_ROUND_UP(filesize / BDRV_SECTOR_SIZE, header.granularity);
+    gt_size = DIV_ROUND_UP(header.num_gtes_per_gt * sizeof(uint32_t),
+                           BDRV_SECTOR_SIZE);
+    gt_count = DIV_ROUND_UP(grains, header.num_gtes_per_gt);
+    gd_sectors = DIV_ROUND_UP(gt_count * sizeof(uint32_t), BDRV_SECTOR_SIZE);

    header.desc_offset = 1;
    header.desc_size = 20;
    header.rgd_offset = header.desc_offset + header.desc_size;
-    header.gd_offset = header.rgd_offset + gd_size + (gt_size * gt_count);
+    header.gd_offset = header.rgd_offset + gd_sectors + (gt_size * gt_count);
    header.grain_offset =
-       ((header.gd_offset + gd_size + (gt_size * gt_count) +
-         header.granularity - 1) / header.granularity) *
-        header.granularity;
+        ROUND_UP(header.gd_offset + gd_sectors + (gt_size * gt_count),
+                 header.granularity);
    /* swap endianness for all header fields */
    header.version = cpu_to_le32(header.version);
    header.flags = cpu_to_le32(header.flags);
@@ -1511,48 +1589,55 @@ static int vmdk_create_extent(const char *filename, int64_t filesize,
    header.check_bytes[3] = 0xa;

    /* write all the data */
-    ret = qemu_write_full(fd, &magic, sizeof(magic));
-    if (ret != sizeof(magic)) {
-        ret = -errno;
+    ret = bdrv_pwrite(bs, 0, &magic, sizeof(magic));
+    if (ret < 0) {
+        error_set(errp, QERR_IO_ERROR);
        goto exit;
    }
-    ret = qemu_write_full(fd, &header, sizeof(header));
-    if (ret != sizeof(header)) {
-        ret = -errno;
+    ret = bdrv_pwrite(bs, sizeof(magic), &header, sizeof(header));
+    if (ret < 0) {
+        error_set(errp, QERR_IO_ERROR);
        goto exit;
    }

-    ret = ftruncate(fd, le64_to_cpu(header.grain_offset) << 9);
+    ret = bdrv_truncate(bs, le64_to_cpu(header.grain_offset) << 9);
    if (ret < 0) {
-        ret = -errno;
+        error_setg_errno(errp, -ret, "Could not truncate file");
        goto exit;
    }

    /* write grain directory */
-    lseek(fd, le64_to_cpu(header.rgd_offset) << 9, SEEK_SET);
-    for (i = 0, tmp = le64_to_cpu(header.rgd_offset) + gd_size;
+    gd_buf_size = gd_sectors * BDRV_SECTOR_SIZE;
+    gd_buf = g_malloc0(gd_buf_size);
+    for (i = 0, tmp = le64_to_cpu(header.rgd_offset) + gd_sectors;
         i < gt_count; i++, tmp += gt_size) {
-        ret = qemu_write_full(fd, &tmp, sizeof(tmp));
-        if (ret != sizeof(tmp)) {
-            ret = -errno;
-            goto exit;
-        }
+        gd_buf[i] = cpu_to_le32(tmp);
+    }
+    ret = bdrv_pwrite(bs, le64_to_cpu(header.rgd_offset) * BDRV_SECTOR_SIZE,
+                      gd_buf, gd_buf_size);
+    if (ret < 0) {
+        error_set(errp, QERR_IO_ERROR);
+        goto exit;
    }

    /* write backup grain directory */
-    lseek(fd, le64_to_cpu(header.gd_offset) << 9, SEEK_SET);
-    for (i = 0, tmp = le64_to_cpu(header.gd_offset) + gd_size;
+    for (i = 0, tmp = le64_to_cpu(header.gd_offset) + gd_sectors;
         i < gt_count; i++, tmp += gt_size) {
-        ret = qemu_write_full(fd, &tmp, sizeof(tmp));
-        if (ret != sizeof(tmp)) {
-            ret = -errno;
-            goto exit;
-        }
+        gd_buf[i] = cpu_to_le32(tmp);
+    }
+    ret = bdrv_pwrite(bs, le64_to_cpu(header.gd_offset) * BDRV_SECTOR_SIZE,
+                      gd_buf, gd_buf_size);
+    if (ret < 0) {
+        error_set(errp, QERR_IO_ERROR);
+        goto exit;
    }

    ret = 0;
- exit:
-    qemu_close(fd);
+exit:
+    if (bs) {
+        bdrv_unref(bs);
+    }
+    g_free(gd_buf);
    return ret;
 }

@@ -1599,7 +1684,9 @@ static int filename_decompose(const char *filename, char *path, char *prefix,
 static int vmdk_create(const char *filename, QEMUOptionParameter *options,
                       Error **errp)
 {
-    int fd, idx = 0;
+    int idx = 0;
+    BlockDriverState *new_bs = NULL;
+    Error *local_err;
    char *desc = NULL;
    int64_t total_size = 0, filesize;
    const char *adapter_type = NULL;
@@ -1616,6 +1703,7 @@ static int vmdk_create(const char *filename, QEMUOptionParameter *options,
    uint32_t parent_cid = 0xffffffff;
    uint32_t number_heads = 16;
    bool zeroed_grain = false;
+    uint32_t desc_offset = 0, desc_len;
    const char desc_template[] =
        "# Disk DescriptorFile\n"
        "version=1\n"
@@ -1707,10 +1795,10 @@ static int vmdk_create(const char *filename, QEMUOptionParameter *options,
        goto exit;
    }
    if (backing_file) {
-        BlockDriverState *bs = bdrv_new("");
-        ret = bdrv_open(bs, backing_file, NULL, BDRV_O_NO_BACKING, NULL, errp);
+        BlockDriverState *bs = NULL;
+        ret = bdrv_open(&bs, backing_file, NULL, NULL, BDRV_O_NO_BACKING, NULL,
+                        errp);
        if (ret != 0) {
-            bdrv_unref(bs);
            goto exit;
        }
        if (strcmp(bs->drv->format_name, "vmdk")) {
@@ -1749,7 +1837,7 @@ static int vmdk_create(const char *filename, QEMUOptionParameter *options,
                path, desc_filename);

        if (vmdk_create_extent(ext_filename, size,
-                               flat, compress, zeroed_grain)) {
+                               flat, compress, zeroed_grain, errp)) {
            ret = -EINVAL;
            goto exit;
        }
@@ -1757,7 +1845,7 @@ static int vmdk_create(const char *filename, QEMUOptionParameter *options,

        /* Format description line */
        snprintf(desc_line, sizeof(desc_line),
-                    desc_extent_line, size / 512, desc_filename);
+                    desc_extent_line, size / BDRV_SECTOR_SIZE, desc_filename);
        g_string_append(ext_desc_lines, desc_line);
    }
    /* generate descriptor file */
@@ -1768,36 +1856,45 @@ static int vmdk_create(const char *filename, QEMUOptionParameter *options,
                           parent_desc_line,
                           ext_desc_lines->str,
                           (flags & BLOCK_FLAG_COMPAT6 ? 6 : 4),
-                           total_size / (int64_t)(63 * number_heads * 512),
+                           total_size /
+                               (int64_t)(63 * number_heads * BDRV_SECTOR_SIZE),
                           number_heads,
                           adapter_type);
-    if (split || flat) {
-        fd = qemu_open(filename,
-                       O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE,
-                       0644);
+    desc_len = strlen(desc);
+    /* the descriptor offset = 0x200 */
+    if (!split && !flat) {
+        desc_offset = 0x200;
    } else {
-        fd = qemu_open(filename,
-                       O_WRONLY | O_BINARY | O_LARGEFILE,
-                       0644);
+        ret = bdrv_create_file(filename, options, &local_err);
+        if (ret < 0) {
+            error_setg_errno(errp, -ret, "Could not create image file");
+            goto exit;
+        }
    }
-    if (fd < 0) {
-        ret = -errno;
+    assert(new_bs == NULL);
+    ret = bdrv_open(&new_bs, filename, NULL, NULL,
+                    BDRV_O_RDWR | BDRV_O_PROTOCOL, NULL, &local_err);
+    if (ret < 0) {
+        error_setg_errno(errp, -ret, "Could not write description");
        goto exit;
    }
-    /* the descriptor offset = 0x200 */
-    if (!split && !flat && 0x200 != lseek(fd, 0x200, SEEK_SET)) {
-        ret = -errno;
-        goto close_exit;
+    ret = bdrv_pwrite(new_bs, desc_offset, desc, desc_len);
+    if (ret < 0) {
+        error_setg_errno(errp, -ret, "Could not write description");
+        goto exit;
    }
-    ret = qemu_write_full(fd, desc, strlen(desc));
-    if (ret != strlen(desc)) {
-        ret = -errno;
-        goto close_exit;
+    /* bdrv_pwrite write padding zeros to align to sector, we don't need that
+     * for description file */
+    if (desc_offset == 0) {
+        ret = bdrv_truncate(new_bs, desc_len);
+        if (ret < 0) {
+            error_setg_errno(errp, -ret, "Could not truncate file");
+        }
    }
-    ret = 0;
-close_exit:
-    qemu_close(fd);
 exit:
+    if (new_bs) {
+        bdrv_unref(new_bs);
+    }
    g_free(desc);
    g_string_free(ext_desc_lines, true);
    return ret;
@@ -1887,6 +1984,53 @@ static ImageInfo *vmdk_get_extent_info(VmdkExtent *extent)
    return info;
 }

+static int vmdk_check(BlockDriverState *bs, BdrvCheckResult *result,
+                      BdrvCheckMode fix)
+{
+    BDRVVmdkState *s = bs->opaque;
+    VmdkExtent *extent = NULL;
+    int64_t sector_num = 0;
+    int64_t total_sectors = bdrv_getlength(bs) / BDRV_SECTOR_SIZE;
+    int ret;
+    uint64_t cluster_offset;
+
+    if (fix) {
+        return -ENOTSUP;
+    }
+
+    for (;;) {
+        if (sector_num >= total_sectors) {
+            return 0;
+        }
+        extent = find_extent(s, sector_num, extent);
+        if (!extent) {
+            fprintf(stderr,
+                    "ERROR: could not find extent for sector %" PRId64 "\n",
+                    sector_num);
+            break;
+        }
+        ret = get_cluster_offset(bs, extent, NULL,
+                                 sector_num << BDRV_SECTOR_BITS,
+                                 0, &cluster_offset);
+        if (ret == VMDK_ERROR) {
+            fprintf(stderr,
+                    "ERROR: could not get cluster_offset for sector %"
+                    PRId64 "\n", sector_num);
+            break;
+        }
+        if (ret == VMDK_OK && cluster_offset >= bdrv_getlength(extent->file)) {
+            fprintf(stderr,
+                    "ERROR: cluster offset for sector %"
+                    PRId64 " points after EOF\n", sector_num);
+            break;
+        }
+        sector_num += extent->cluster_sectors;
+    }
+
+    result->corruptions++;
+    return 0;
+}
+
 static ImageInfoSpecific *vmdk_get_specific_info(BlockDriverState *bs)
 {
    int i;
@@ -1960,6 +2104,7 @@ static BlockDriver bdrv_vmdk = {
    .instance_size                = sizeof(BDRVVmdkState),
    .bdrv_probe                   = vmdk_probe,
    .bdrv_open                    = vmdk_open,
+    .bdrv_check                   = vmdk_check,
    .bdrv_reopen_prepare          = vmdk_reopen_prepare,
    .bdrv_read                    = vmdk_co_read,
    .bdrv_write                   = vmdk_co_write,
@@ -1971,6 +2116,7 @@ static BlockDriver bdrv_vmdk = {
    .bdrv_get_allocated_file_size = vmdk_get_allocated_file_size,
    .bdrv_has_zero_init           = vmdk_has_zero_init,
    .bdrv_get_specific_info       = vmdk_get_specific_info,
+    .bdrv_refresh_limits          = vmdk_refresh_limits,

    .create_options               = vmdk_create_options,
 };
--- a/block/vpc.c
+++ b/block/vpc.c
@@ -190,7 +190,8 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
            goto fail;
        }
        if (strncmp(footer->creator, "conectix", 8)) {
-            ret = -EMEDIUMTYPE;
+            error_setg(errp, "invalid VPC image");
+            ret = -EINVAL;
            goto fail;
        }
        disk_type = VHD_FIXED;
--- a/block/vvfat.c
+++ b/block/vvfat.c
@@ -266,8 +266,7 @@ typedef struct mbr_t {
 } QEMU_PACKED mbr_t;

 typedef struct direntry_t {
-    uint8_t name[8];
-    uint8_t extension[3];
+    uint8_t name[8 + 3];
    uint8_t attributes;
    uint8_t reserved[2];
    uint16_t ctime;
@@ -518,11 +517,9 @@ static inline uint8_t fat_chksum(const direntry_t* entry)
    uint8_t chksum=0;
    int i;

-    for(i=0;i<11;i++) {
-        unsigned char c;
-
-        c = (i < 8) ? entry->name[i] : entry->extension[i-8];
-        chksum=(((chksum&0xfe)>>1)|((chksum&0x01)?0x80:0)) + c;
+    for (i = 0; i < ARRAY_SIZE(entry->name); i++) {
+        chksum = (((chksum & 0xfe) >> 1) |
+                  ((chksum & 0x01) ? 0x80 : 0)) + entry->name[i];
    }

    return chksum;
@@ -617,7 +614,7 @@ static inline direntry_t* create_short_and_long_name(BDRVVVFATState* s,

    if(is_dot) {
 	entry=array_get_next(&(s->directory));
-	memset(entry->name,0x20,11);
+        memset(entry->name, 0x20, sizeof(entry->name));
 	memcpy(entry->name,filename,strlen(filename));
 	return entry;
    }
@@ -632,12 +629,14 @@ static inline direntry_t* create_short_and_long_name(BDRVVVFATState* s,
 	i = 8;

    entry=array_get_next(&(s->directory));
-    memset(entry->name,0x20,11);
+    memset(entry->name, 0x20, sizeof(entry->name));
    memcpy(entry->name, filename, i);

-    if(j > 0)
-	for (i = 0; i < 3 && filename[j+1+i]; i++)
-	    entry->extension[i] = filename[j+1+i];
+    if (j > 0) {
+        for (i = 0; i < 3 && filename[j + 1 + i]; i++) {
+            entry->name[8 + i] = filename[j + 1 + i];
+        }
+    }

    /* upcase & remove unwanted characters */
    for(i=10;i>=0;i--) {
@@ -861,8 +860,7 @@ static int init_directories(BDRVVVFATState* s,
    {
 	direntry_t* entry=array_get_next(&(s->directory));
 	entry->attributes=0x28; /* archive | volume label */
-	memcpy(entry->name,"QEMU VVF",8);
-	memcpy(entry->extension,"AT ",3);
+        memcpy(entry->name, "QEMU VVFAT ", sizeof(entry->name));
    }

    /* Now build FAT, and write back information into directory */
@@ -1085,19 +1083,17 @@ DLOG(if (stderr == NULL) {
    setbuf(stderr, NULL);
 })

-    opts = qemu_opts_create_nofail(&runtime_opts);
+    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
    qemu_opts_absorb_qdict(opts, options, &local_err);
-    if (error_is_set(&local_err)) {
-        qerror_report_err(local_err);
-        error_free(local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
        ret = -EINVAL;
        goto fail;
    }

    dirname = qemu_opt_get(opts, "dir");
    if (!dirname) {
-        qerror_report(ERROR_CLASS_GENERIC_ERROR, "vvfat block driver requires "
-                      "a 'dir' option");
+        error_setg(errp, "vvfat block driver requires a 'dir' option");
        ret = -EINVAL;
        goto fail;
    }
@@ -1137,8 +1133,7 @@ DLOG(if (stderr == NULL) {
    case 12:
        break;
    default:
-        qerror_report(ERROR_CLASS_GENERIC_ERROR, "Valid FAT types are only "
-                      "12, 16 and 32");
+        error_setg(errp, "Valid FAT types are only 12, 16 and 32");
        ret = -EINVAL;
        goto fail;
    }
@@ -1591,17 +1586,20 @@ static int parse_short_name(BDRVVVFATState* s,
 	    lfn->name[i] = direntry->name[i];
    }

-    for (j = 2; j >= 0 && direntry->extension[j] == ' '; j--);
+    for (j = 2; j >= 0 && direntry->name[8 + j] == ' '; j--) {
+    }
    if (j >= 0) {
 	lfn->name[i++] = '.';
 	lfn->name[i + j + 1] = '\0';
 	for (;j >= 0; j--) {
-	    if (direntry->extension[j] <= ' ' || direntry->extension[j] > 0x7f)
-		return -2;
-	    else if (s->downcase_short_names)
-		lfn->name[i + j] = qemu_tolower(direntry->extension[j]);
-	    else
-		lfn->name[i + j] = direntry->extension[j];
+            uint8_t c = direntry->name[8 + j];
+            if (c <= ' ' || c > 0x7f) {
+                return -2;
+            } else if (s->downcase_short_names) {
+                lfn->name[i + j] = qemu_tolower(c);
+            } else {
+                lfn->name[i + j] = c;
+            }
 	}
    } else
 	lfn->name[i + j + 1] = '\0';
@@ -2935,15 +2933,13 @@ static int enable_write_target(BDRVVVFATState *s)
        goto err;
    }

-    s->qcow = bdrv_new("");
-
-    ret = bdrv_open(s->qcow, s->qcow_filename, NULL,
+    s->qcow = NULL;
+    ret = bdrv_open(&s->qcow, s->qcow_filename, NULL, NULL,
            BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_NO_FLUSH, bdrv_qcow,
            &local_err);
    if (ret < 0) {
        qerror_report_err(local_err);
        error_free(local_err);
-        bdrv_unref(s->qcow);
        goto err;
    }

--- a/blockdev.c
+++ b/blockdev.c
@@ -307,12 +307,10 @@ static bool check_throttle_config(ThrottleConfig *cfg, Error **errp)
 typedef enum { MEDIA_DISK, MEDIA_CDROM } DriveMediaType;

 /* Takes the ownership of bs_opts */
-static DriveInfo *blockdev_init(QDict *bs_opts,
-                                BlockInterfaceType type,
+static DriveInfo *blockdev_init(const char *file, QDict *bs_opts,
                                Error **errp)
 {
    const char *buf;
-    const char *file = NULL;
    const char *serial;
    int ro = 0;
    int bdrv_flags = 0;
@@ -332,13 +330,13 @@ static DriveInfo *blockdev_init(QDict *bs_opts,
     * stay in bs_opts for processing by bdrv_open(). */
    id = qdict_get_try_str(bs_opts, "id");
    opts = qemu_opts_create(&qemu_common_drive_opts, id, 1, &error);
-    if (error_is_set(&error)) {
+    if (error) {
        error_propagate(errp, error);
        return NULL;
    }

    qemu_opts_absorb_qdict(opts, bs_opts, &error);
-    if (error_is_set(&error)) {
+    if (error) {
        error_propagate(errp, error);
        goto early_err;
    }
@@ -354,7 +352,6 @@ static DriveInfo *blockdev_init(QDict *bs_opts,
    ro = qemu_opt_get_bool(opts, "read-only", 0);
    copy_on_read = qemu_opt_get_bool(opts, "copy-on-read", false);

-    file = qemu_opt_get(opts, "file");
    serial = qemu_opt_get(opts, "serial");

    if ((buf = qemu_opt_get(opts, "discard")) != NULL) {
@@ -439,13 +436,8 @@ static DriveInfo *blockdev_init(QDict *bs_opts,

    on_write_error = BLOCKDEV_ON_ERROR_ENOSPC;
    if ((buf = qemu_opt_get(opts, "werror")) != NULL) {
-        if (type != IF_IDE && type != IF_SCSI && type != IF_VIRTIO && type != IF_NONE) {
-            error_setg(errp, "werror is not supported by this bus type");
-            goto early_err;
-        }
-
        on_write_error = parse_block_error_action(buf, 0, &error);
-        if (error_is_set(&error)) {
+        if (error) {
            error_propagate(errp, error);
            goto early_err;
        }
@@ -453,25 +445,25 @@ static DriveInfo *blockdev_init(QDict *bs_opts,

    on_read_error = BLOCKDEV_ON_ERROR_REPORT;
    if ((buf = qemu_opt_get(opts, "rerror")) != NULL) {
-        if (type != IF_IDE && type != IF_VIRTIO && type != IF_SCSI && type != IF_NONE) {
-            error_report("rerror is not supported by this bus type");
-            goto early_err;
-        }
-
        on_read_error = parse_block_error_action(buf, 1, &error);
-        if (error_is_set(&error)) {
+        if (error) {
            error_propagate(errp, error);
            goto early_err;
        }
    }

+    if (bdrv_find_node(qemu_opts_id(opts))) {
+        error_setg(errp, "device id=%s is conflicting with a node-name",
+                   qemu_opts_id(opts));
+        goto early_err;
+    }
+
    /* init */
    dinfo = g_malloc0(sizeof(*dinfo));
    dinfo->id = g_strdup(qemu_opts_id(opts));
    dinfo->bdrv = bdrv_new(dinfo->id);
    dinfo->bdrv->open_flags = snapshot ? BDRV_O_SNAPSHOT : 0;
    dinfo->bdrv->read_only = ro;
-    dinfo->type = type;
    dinfo->refcount = 1;
    if (serial != NULL) {
        dinfo->serial = g_strdup(serial);
@@ -512,7 +504,7 @@ static DriveInfo *blockdev_init(QDict *bs_opts,
    bdrv_flags |= ro ? 0 : BDRV_O_RDWR;

    QINCREF(bs_opts);
-    ret = bdrv_open(dinfo->bdrv, file, bs_opts, bdrv_flags, drv, &error);
+    ret = bdrv_open(&dinfo->bdrv, file, NULL, bs_opts, bdrv_flags, drv, &error);

    if (ret < 0) {
        error_setg(errp, "could not open disk image %s: %s",
@@ -599,6 +591,10 @@ QemuOptsList qemu_legacy_drive_opts = {
            .name = "addr",
            .type = QEMU_OPT_STRING,
            .help = "pci address (virtio only)",
+        },{
+            .name = "file",
+            .type = QEMU_OPT_STRING,
+            .help = "file name",
        },

        /* Options that are passed on, but have special semantics with -drive */
@@ -606,6 +602,14 @@ QemuOptsList qemu_legacy_drive_opts = {
            .name = "read-only",
            .type = QEMU_OPT_BOOL,
            .help = "open drive file as read-only",
+        },{
+            .name = "rerror",
+            .type = QEMU_OPT_STRING,
+            .help = "read error action",
+        },{
+            .name = "werror",
+            .type = QEMU_OPT_STRING,
+            .help = "write error action",
        },{
            .name = "copy-on-read",
            .type = QEMU_OPT_BOOL,
@@ -627,8 +631,10 @@ DriveInfo *drive_init(QemuOpts *all_opts, BlockInterfaceType block_default_type)
    int cyls, heads, secs, translation;
    int max_devs, bus_id, unit_id, index;
    const char *devaddr;
+    const char *werror, *rerror;
    bool read_only = false;
    bool copy_on_read;
+    const char *filename;
    Error *local_err = NULL;

    /* Change legacy command line options into QMP ones */
@@ -682,9 +688,10 @@ DriveInfo *drive_init(QemuOpts *all_opts, BlockInterfaceType block_default_type)
    bs_opts = qdict_new();
    qemu_opts_to_qdict(all_opts, bs_opts);

-    legacy_opts = qemu_opts_create_nofail(&qemu_legacy_drive_opts);
+    legacy_opts = qemu_opts_create(&qemu_legacy_drive_opts, NULL, 0,
+                                   &error_abort);
    qemu_opts_absorb_qdict(legacy_opts, bs_opts, &local_err);
-    if (error_is_set(&local_err)) {
+    if (local_err) {
        qerror_report_err(local_err);
        error_free(local_err);
        goto fail;
@@ -772,6 +779,10 @@ DriveInfo *drive_init(QemuOpts *all_opts, BlockInterfaceType block_default_type)
            translation = BIOS_ATA_TRANSLATION_NONE;
        } else if (!strcmp(value, "lba")) {
            translation = BIOS_ATA_TRANSLATION_LBA;
+        } else if (!strcmp(value, "large")) {
+            translation = BIOS_ATA_TRANSLATION_LARGE;
+        } else if (!strcmp(value, "rechs")) {
+            translation = BIOS_ATA_TRANSLATION_RECHS;
        } else if (!strcmp(value, "auto")) {
            translation = BIOS_ATA_TRANSLATION_AUTO;
        } else {
@@ -853,7 +864,8 @@ DriveInfo *drive_init(QemuOpts *all_opts, BlockInterfaceType block_default_type)

    if (type == IF_VIRTIO) {
        QemuOpts *devopts;
-        devopts = qemu_opts_create_nofail(qemu_find_opts("device"));
+        devopts = qemu_opts_create(qemu_find_opts("device"), NULL, 0,
+                                   &error_abort);
        if (arch_type == QEMU_ARCH_S390X) {
            qemu_opt_set(devopts, "driver", "virtio-blk-s390");
        } else {
@@ -865,16 +877,39 @@ DriveInfo *drive_init(QemuOpts *all_opts, BlockInterfaceType block_default_type)
        }
    }

+    filename = qemu_opt_get(legacy_opts, "file");
+
+    /* Check werror/rerror compatibility with if=... */
+    werror = qemu_opt_get(legacy_opts, "werror");
+    if (werror != NULL) {
+        if (type != IF_IDE && type != IF_SCSI && type != IF_VIRTIO &&
+            type != IF_NONE) {
+            error_report("werror is not supported by this bus type");
+            goto fail;
+        }
+        qdict_put(bs_opts, "werror", qstring_from_str(werror));
+    }
+
+    rerror = qemu_opt_get(legacy_opts, "rerror");
+    if (rerror != NULL) {
+        if (type != IF_IDE && type != IF_VIRTIO && type != IF_SCSI &&
+            type != IF_NONE) {
+            error_report("rerror is not supported by this bus type");
+            goto fail;
+        }
+        qdict_put(bs_opts, "rerror", qstring_from_str(rerror));
+    }
+
    /* Actual block device init: Functionality shared with blockdev-add */
-    dinfo = blockdev_init(bs_opts, type, &local_err);
+    dinfo = blockdev_init(filename, bs_opts, &local_err);
    if (dinfo == NULL) {
-        if (error_is_set(&local_err)) {
+        if (local_err) {
            qerror_report_err(local_err);
            error_free(local_err);
        }
        goto fail;
    } else {
-        assert(!error_is_set(&local_err));
+        assert(!local_err);
    }

    /* Set legacy DriveInfo fields */
@@ -886,6 +921,7 @@ DriveInfo *drive_init(QemuOpts *all_opts, BlockInterfaceType block_default_type)
    dinfo->secs = secs;
    dinfo->trans = translation;

+    dinfo->type = type;
    dinfo->bus = bus_id;
    dinfo->unit = unit_id;
    dinfo->devaddr = devaddr;
@@ -940,14 +976,22 @@ static void blockdev_do_action(int kind, void *data, Error **errp)
    qmp_transaction(&list, errp);
 }

-void qmp_blockdev_snapshot_sync(const char *device, const char *snapshot_file,
+void qmp_blockdev_snapshot_sync(bool has_device, const char *device,
+                                bool has_node_name, const char *node_name,
+                                const char *snapshot_file,
+                                bool has_snapshot_node_name,
+                                const char *snapshot_node_name,
                                bool has_format, const char *format,
-                                bool has_mode, enum NewImageMode mode,
-                                Error **errp)
+                                bool has_mode, NewImageMode mode, Error **errp)
 {
    BlockdevSnapshot snapshot = {
+        .has_device = has_device,
        .device = (char *) device,
+        .has_node_name = has_node_name,
+        .node_name = (char *) node_name,
        .snapshot_file = (char *) snapshot_file,
+        .has_snapshot_node_name = has_snapshot_node_name,
+        .snapshot_node_name = (char *) snapshot_node_name,
        .has_format = has_format,
        .format = (char *) format,
        .has_mode = has_mode,
@@ -1002,7 +1046,7 @@ SnapshotInfo *qmp_blockdev_snapshot_delete_internal_sync(const char *device,
    }

    ret = bdrv_snapshot_find_by_id_and_name(bs, id, name, &sn, &local_err);
-    if (error_is_set(&local_err)) {
+    if (local_err) {
        error_propagate(errp, local_err);
        return NULL;
    }
@@ -1015,7 +1059,7 @@ SnapshotInfo *qmp_blockdev_snapshot_delete_internal_sync(const char *device,
    }

    bdrv_snapshot_delete(bs, id, name, &local_err);
-    if (error_is_set(&local_err)) {
+    if (local_err) {
        error_propagate(errp, local_err);
        return NULL;
    }
@@ -1185,8 +1229,14 @@ static void external_snapshot_prepare(BlkTransactionState *common,
 {
    BlockDriver *drv;
    int flags, ret;
+    QDict *options = NULL;
    Error *local_err = NULL;
+    bool has_device = false;
    const char *device;
+    bool has_node_name = false;
+    const char *node_name;
+    bool has_snapshot_node_name = false;
+    const char *snapshot_node_name;
    const char *new_image_file;
    const char *format = "qcow2";
    enum NewImageMode mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS;
@@ -1197,7 +1247,14 @@ static void external_snapshot_prepare(BlkTransactionState *common,
    /* get parameters */
    g_assert(action->kind == TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC);

+    has_device = action->blockdev_snapshot_sync->has_device;
    device = action->blockdev_snapshot_sync->device;
+    has_node_name = action->blockdev_snapshot_sync->has_node_name;
+    node_name = action->blockdev_snapshot_sync->node_name;
+    has_snapshot_node_name =
+        action->blockdev_snapshot_sync->has_snapshot_node_name;
+    snapshot_node_name = action->blockdev_snapshot_sync->snapshot_node_name;
+
    new_image_file = action->blockdev_snapshot_sync->snapshot_file;
    if (action->blockdev_snapshot_sync->has_format) {
        format = action->blockdev_snapshot_sync->format;
@@ -1213,9 +1270,21 @@ static void external_snapshot_prepare(BlkTransactionState *common,
        return;
    }

-    state->old_bs = bdrv_find(device);
-    if (!state->old_bs) {
-        error_set(errp, QERR_DEVICE_NOT_FOUND, device);
+    state->old_bs = bdrv_lookup_bs(has_device ? device : NULL,
+                                   has_node_name ? node_name : NULL,
+                                   &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+
+    if (has_node_name && !has_snapshot_node_name) {
+        error_setg(errp, "New snapshot node name missing");
+        return;
+    }
+
+    if (has_snapshot_node_name && bdrv_find_node(snapshot_node_name)) {
+        error_setg(errp, "New snapshot node name already existing");
        return;
    }

@@ -1236,7 +1305,7 @@ static void external_snapshot_prepare(BlkTransactionState *common,
        }
    }

-    if (bdrv_check_ext_snapshot(state->old_bs) != EXT_SNAPSHOT_ALLOWED) {
+    if (!bdrv_is_first_non_filter(state->old_bs)) {
        error_set(errp, QERR_FEATURE_DISABLED, "snapshot");
        return;
    }
@@ -1249,18 +1318,24 @@ static void external_snapshot_prepare(BlkTransactionState *common,
                        state->old_bs->filename,
                        state->old_bs->drv->format_name,
                        NULL, -1, flags, &local_err, false);
-        if (error_is_set(&local_err)) {
+        if (local_err) {
            error_propagate(errp, local_err);
            return;
        }
    }

-    /* We will manually add the backing_hd field to the bs later */
-    state->new_bs = bdrv_new("");
+    if (has_snapshot_node_name) {
+        options = qdict_new();
+        qdict_put(options, "node-name",
+                  qstring_from_str(snapshot_node_name));
+    }
+
    /* TODO Inherit bs->options or only take explicit options with an
     * extended QMP command? */
-    ret = bdrv_open(state->new_bs, new_image_file, NULL,
+    assert(state->new_bs == NULL);
+    ret = bdrv_open(&state->new_bs, new_image_file, NULL, options,
                    flags | BDRV_O_NO_BACKING, drv, &local_err);
+    /* We will manually add the backing_hd field to the bs later */
    if (ret != 0) {
        error_propagate(errp, local_err);
    }
@@ -1312,7 +1387,7 @@ static void drive_backup_prepare(BlkTransactionState *common, Error **errp)
                     backup->has_on_source_error, backup->on_source_error,
                     backup->has_on_target_error, backup->on_target_error,
                     &local_err);
-    if (error_is_set(&local_err)) {
+    if (local_err) {
        error_propagate(errp, local_err);
        state->bs = NULL;
        state->job = NULL;
@@ -1404,7 +1479,7 @@ void qmp_transaction(TransactionActionList *dev_list, Error **errp)
        QSIMPLEQ_INSERT_TAIL(&snap_bdrv_states, state, entry);

        state->ops->prepare(state, &local_err);
-        if (error_is_set(&local_err)) {
+        if (local_err) {
            error_propagate(errp, local_err);
            goto delete_and_fail;
        }
@@ -1474,14 +1549,19 @@ void qmp_eject(const char *device, bool has_force, bool force, Error **errp)
    eject_device(bs, force, errp);
 }

-void qmp_block_passwd(const char *device, const char *password, Error **errp)
+void qmp_block_passwd(bool has_device, const char *device,
+                      bool has_node_name, const char *node_name,
+                      const char *password, Error **errp)
 {
+    Error *local_err = NULL;
    BlockDriverState *bs;
    int err;

-    bs = bdrv_find(device);
-    if (!bs) {
-        error_set(errp, QERR_DEVICE_NOT_FOUND, device);
+    bs = bdrv_lookup_bs(has_device ? device : NULL,
+                        has_node_name ? node_name : NULL,
+                        &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
        return;
    }

@@ -1502,7 +1582,7 @@ static void qmp_bdrv_open_encrypted(BlockDriverState *bs, const char *filename,
    Error *local_err = NULL;
    int ret;

-    ret = bdrv_open(bs, filename, NULL, bdrv_flags, drv, &local_err);
+    ret = bdrv_open(&bs, filename, NULL, NULL, bdrv_flags, drv, &local_err);
    if (ret < 0) {
        error_propagate(errp, local_err);
        return;
@@ -1523,7 +1603,7 @@ static void qmp_bdrv_open_encrypted(BlockDriverState *bs, const char *filename,
 }

 void qmp_change_blockdev(const char *device, const char *filename,
-                         bool has_format, const char *format, Error **errp)
+                         const char *format, Error **errp)
 {
    BlockDriverState *bs;
    BlockDriver *drv = NULL;
@@ -1545,7 +1625,7 @@ void qmp_change_blockdev(const char *device, const char *filename,
    }

    eject_device(bs, 0, &err);
-    if (error_is_set(&err)) {
+    if (err) {
        error_propagate(errp, err);
        return;
    }
@@ -1671,14 +1751,24 @@ int do_drive_del(Monitor *mon, const QDict *qdict, QObject **ret_data)
    return 0;
 }

-void qmp_block_resize(const char *device, int64_t size, Error **errp)
+void qmp_block_resize(bool has_device, const char *device,
+                      bool has_node_name, const char *node_name,
+                      int64_t size, Error **errp)
 {
+    Error *local_err = NULL;
    BlockDriverState *bs;
    int ret;

-    bs = bdrv_find(device);
-    if (!bs) {
-        error_set(errp, QERR_DEVICE_NOT_FOUND, device);
+    bs = bdrv_lookup_bs(has_device ? device : NULL,
+                        has_node_name ? node_name : NULL,
+                        &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+
+    if (!bdrv_is_first_non_filter(bs)) {
+        error_set(errp, QERR_FEATURE_DISABLED, "resize");
        return;
    }

@@ -1765,7 +1855,7 @@ void qmp_block_stream(const char *device, bool has_base,

    stream_start(bs, base_bs, base, has_speed ? speed : 0,
                 on_error, block_job_cb, bs, &local_err);
-    if (error_is_set(&local_err)) {
+    if (local_err) {
        error_propagate(errp, local_err);
        return;
    }
@@ -1820,8 +1910,13 @@ void qmp_block_commit(const char *device,
        return;
    }

-    commit_start(bs, base_bs, top_bs, speed, on_error, block_job_cb, bs,
-                &local_err);
+    if (top_bs == bs) {
+        commit_active_start(bs, base_bs, speed, on_error, block_job_cb,
+                            bs, &local_err);
+    } else {
+        commit_start(bs, base_bs, top_bs, speed, on_error, block_job_cb, bs,
+                    &local_err);
+    }
    if (local_err != NULL) {
        error_propagate(errp, local_err);
        return;
@@ -1918,15 +2013,14 @@ void qmp_drive_backup(const char *device, const char *target,
        }
    }

-    if (error_is_set(&local_err)) {
+    if (local_err) {
        error_propagate(errp, local_err);
        return;
    }

-    target_bs = bdrv_new("");
-    ret = bdrv_open(target_bs, target, NULL, flags, drv, &local_err);
+    target_bs = NULL;
+    ret = bdrv_open(&target_bs, target, NULL, NULL, flags, drv, &local_err);
    if (ret < 0) {
-        bdrv_unref(target_bs);
        error_propagate(errp, local_err);
        return;
    }
@@ -1940,6 +2034,11 @@ void qmp_drive_backup(const char *device, const char *target,
    }
 }

+BlockDeviceInfoList *qmp_query_named_block_nodes(Error **errp)
+{
+    return bdrv_named_nodes_list();
+}
+
 #define DEFAULT_MIRROR_BUF_SIZE   (10 << 20)

 void qmp_drive_mirror(const char *device, const char *target,
@@ -2054,7 +2153,7 @@ void qmp_drive_mirror(const char *device, const char *target,
        }
    }

-    if (error_is_set(&local_err)) {
+    if (local_err) {
        error_propagate(errp, local_err);
        return;
    }
@@ -2062,11 +2161,10 @@ void qmp_drive_mirror(const char *device, const char *target,
    /* Mirroring takes care of copy-on-write using the source's backing
     * file.
     */
-    target_bs = bdrv_new("");
-    ret = bdrv_open(target_bs, target, NULL, flags | BDRV_O_NO_BACKING, drv,
-                    &local_err);
+    target_bs = NULL;
+    ret = bdrv_open(&target_bs, target, NULL, NULL, flags | BDRV_O_NO_BACKING,
+                    drv, &local_err);
    if (ret < 0) {
-        bdrv_unref(target_bs);
        error_propagate(errp, local_err);
        return;
    }
@@ -2193,7 +2291,7 @@ void qmp_blockdev_add(BlockdevOptions *options, Error **errp)

    visit_type_BlockdevOptions(qmp_output_get_visitor(ov),
                               &options, NULL, &local_err);
-    if (error_is_set(&local_err)) {
+    if (local_err) {
        error_propagate(errp, local_err);
        goto fail;
    }
@@ -2203,8 +2301,8 @@ void qmp_blockdev_add(BlockdevOptions *options, Error **errp)

    qdict_flatten(qdict);

-    blockdev_init(qdict, IF_NONE, &local_err);
-    if (error_is_set(&local_err)) {
+    blockdev_init(NULL, qdict, &local_err);
+    if (local_err) {
        error_propagate(errp, local_err);
        goto fail;
    }
@@ -2243,10 +2341,6 @@ QemuOptsList qemu_common_drive_opts = {
            .name = "snapshot",
            .type = QEMU_OPT_BOOL,
            .help = "enable/disable snapshot mode",
-        },{
-            .name = "file",
-            .type = QEMU_OPT_STRING,
-            .help = "disk image",
        },{
            .name = "discard",
            .type = QEMU_OPT_STRING,
--- a/blockjob.c
+++ b/blockjob.c
@@ -61,7 +61,7 @@ void *block_job_create(const BlockJobDriver *driver, BlockDriverState *bs,
        Error *local_err = NULL;

        block_job_set_speed(job, speed, &local_err);
-        if (error_is_set(&local_err)) {
+        if (local_err) {
            bs->job = NULL;
            g_free(job);
            bdrv_set_in_use(bs, 0);
@@ -92,7 +92,7 @@ void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp)
        return;
    }
    job->driver->set_speed(job, speed, &local_err);
-    if (error_is_set(&local_err)) {
+    if (local_err) {
        error_propagate(errp, local_err);
        return;
    }
--- a/889
+++ b/889
--- a/cpu-exec.c
+++ b/cpu-exec.c
@@ -53,7 +53,25 @@ void cpu_resume_from_signal(CPUArchState *env, void *puc)
 static inline tcg_target_ulong cpu_tb_exec(CPUState *cpu, uint8_t *tb_ptr)
 {
    CPUArchState *env = cpu->env_ptr;
-    uintptr_t next_tb = tcg_qemu_tb_exec(env, tb_ptr);
+    uintptr_t next_tb;
+
+#if defined(DEBUG_DISAS)
+    if (qemu_loglevel_mask(CPU_LOG_TB_CPU)) {
+#if defined(TARGET_I386)
+        log_cpu_state(cpu, CPU_DUMP_CCOP);
+#elif defined(TARGET_M68K)
+        /* ??? Should not modify env state for dumping.  */
+        cpu_m68k_flush_flags(env, env->cc_op);
+        env->cc_op = CC_OP_FLAGS;
+        env->sr = (env->sr & 0xffe0) | env->cc_dest | (env->cc_x << 4);
+        log_cpu_state(cpu, 0);
+#else
+        log_cpu_state(cpu, 0);
+#endif
+    }
+#endif /* DEBUG_DISAS */
+
+    next_tb = tcg_qemu_tb_exec(env, tb_ptr);
    if ((next_tb & TB_EXIT_MASK) > TB_EXIT_IDX1) {
        /* We didn't start executing this TB (eg because the instruction
         * counter hit zero); we must restore the guest PC to the address
@@ -205,6 +223,9 @@ int cpu_exec(CPUArchState *env)
 #if !(defined(CONFIG_USER_ONLY) && \
      (defined(TARGET_M68K) || defined(TARGET_PPC) || defined(TARGET_S390X)))
    CPUClass *cc = CPU_GET_CLASS(cpu);
+#endif
+#ifdef TARGET_I386
+    X86CPU *x86_cpu = X86_CPU(cpu);
 #endif
    int ret, interrupt_request;
    TranslationBlock *tb;
@@ -320,24 +341,24 @@ int cpu_exec(CPUArchState *env)
 #if !defined(CONFIG_USER_ONLY)
                    if (interrupt_request & CPU_INTERRUPT_POLL) {
                        cpu->interrupt_request &= ~CPU_INTERRUPT_POLL;
-                        apic_poll_irq(env->apic_state);
+                        apic_poll_irq(x86_cpu->apic_state);
                    }
 #endif
                    if (interrupt_request & CPU_INTERRUPT_INIT) {
                            cpu_svm_check_intercept_param(env, SVM_EXIT_INIT,
                                                          0);
-                            do_cpu_init(x86_env_get_cpu(env));
+                            do_cpu_init(x86_cpu);
                            env->exception_index = EXCP_HALTED;
                            cpu_loop_exit(env);
                    } else if (interrupt_request & CPU_INTERRUPT_SIPI) {
-                            do_cpu_sipi(x86_env_get_cpu(env));
+                            do_cpu_sipi(x86_cpu);
                    } else if (env->hflags2 & HF2_GIF_MASK) {
                        if ((interrupt_request & CPU_INTERRUPT_SMI) &&
                            !(env->hflags & HF_SMM_MASK)) {
                            cpu_svm_check_intercept_param(env, SVM_EXIT_SMI,
                                                          0);
                            cpu->interrupt_request &= ~CPU_INTERRUPT_SMI;
-                            do_smm_enter(x86_env_get_cpu(env));
+                            do_smm_enter(x86_cpu);
                            next_tb = 0;
                        } else if ((interrupt_request & CPU_INTERRUPT_NMI) &&
                                   !(env->hflags2 & HF2_NMI_MASK)) {
@@ -374,7 +395,10 @@ int cpu_exec(CPUArchState *env)
                            /* FIXME: this should respect TPR */
                            cpu_svm_check_intercept_param(env, SVM_EXIT_VINTR,
                                                          0);
-                            intno = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_vector));
+                            intno = ldl_phys(cpu->as,
+                                             env->vm_vmcb
+                                             + offsetof(struct vmcb,
+                                                        control.int_vector));
                            qemu_log_mask(CPU_LOG_TB_IN_ASM, "Servicing virtual hardware INT=0x%02x\n", intno);
                            do_interrupt_x86_hardirq(env, intno, 1);
                            cpu->interrupt_request &= ~CPU_INTERRUPT_VIRQ;
@@ -453,7 +477,7 @@ int cpu_exec(CPUArchState *env)
                    }
 #elif defined(TARGET_ARM)
                    if (interrupt_request & CPU_INTERRUPT_FIQ
-                        && !(env->uncached_cpsr & CPSR_F)) {
+                        && !(env->daif & PSTATE_F)) {
                        env->exception_index = EXCP_FIQ;
                        cc->do_interrupt(cpu);
                        next_tb = 0;
@@ -469,7 +493,7 @@ int cpu_exec(CPUArchState *env)
                       pc contains a magic address.  */
                    if (interrupt_request & CPU_INTERRUPT_HARD
                        && ((IS_M(env) && env->regs[15] < 0xfffffff0)
-                            || !(env->uncached_cpsr & CPSR_I))) {
+                            || !(env->daif & PSTATE_I))) {
                        env->exception_index = EXCP_IRQ;
                        cc->do_interrupt(cpu);
                        next_tb = 0;
@@ -579,22 +603,6 @@ int cpu_exec(CPUArchState *env)
                    env->exception_index = EXCP_INTERRUPT;
                    cpu_loop_exit(env);
                }
-#if defined(DEBUG_DISAS)
-                if (qemu_loglevel_mask(CPU_LOG_TB_CPU)) {
-                    /* restore flags in standard format */
-#if defined(TARGET_I386)
-                    log_cpu_state(cpu, CPU_DUMP_CCOP);
-#elif defined(TARGET_M68K)
-                    cpu_m68k_flush_flags(env, env->cc_op);
-                    env->cc_op = CC_OP_FLAGS;
-                    env->sr = (env->sr & 0xffe0)
-                              | env->cc_dest | (env->cc_x << 4);
-                    log_cpu_state(cpu, 0);
-#else
-                    log_cpu_state(cpu, 0);
-#endif
-                }
-#endif /* DEBUG_DISAS */
                spin_lock(&tcg_ctx.tb_ctx.tb_lock);
                tb = tb_find_fast(env);
                /* Note: we do it here to avoid a gcc bug on Mac OS X when
@@ -684,6 +692,9 @@ int cpu_exec(CPUArchState *env)
 #if !(defined(CONFIG_USER_ONLY) && \
      (defined(TARGET_M68K) || defined(TARGET_PPC) || defined(TARGET_S390X)))
            cc = CPU_GET_CLASS(cpu);
+#endif
+#ifdef TARGET_I386
+            x86_cpu = X86_CPU(cpu);
 #endif
        }
    } /* for(;;) */
--- a/cpus.c
+++ b/cpus.c
@@ -1119,6 +1119,8 @@ void resume_all_vcpus(void)

 static void qemu_tcg_init_vcpu(CPUState *cpu)
 {
+    tcg_cpu_address_space_init(cpu, cpu->as);
+
    /* share a single thread for all cpus with TCG */
    if (!tcg_cpu_thread) {
        cpu->thread = g_malloc0(sizeof(QemuThread));
@@ -1458,12 +1460,11 @@ void qmp_inject_nmi(Error **errp)

    CPU_FOREACH(cs) {
        X86CPU *cpu = X86_CPU(cs);
-        CPUX86State *env = &cpu->env;

-        if (!env->apic_state) {
+        if (!cpu->apic_state) {
            cpu_interrupt(cs, CPU_INTERRUPT_NMI);
        } else {
-            apic_deliver_nmi(env->apic_state);
+            apic_deliver_nmi(cpu->apic_state);
        }
    }
 #elif defined(TARGET_S390X)
--- a/cputlb.c
+++ b/cputlb.c
@@ -26,6 +26,7 @@
 #include "exec/cputlb.h"

 #include "exec/memory-internal.h"
+#include "exec/ram_addr.h"

 //#define DEBUG_TLB
 //#define DEBUG_TLB_CHECK
@@ -33,13 +34,6 @@
 /* statistics */
 int tlb_flush_count;

-static const CPUTLBEntry s_cputlb_empty_entry = {
-    .addr_read  = -1,
-    .addr_write = -1,
-    .addr_code  = -1,
-    .addend     = -1,
-};
-
 /* NOTE:
 * If flush_global is true (the usual case), flush all tlb entries.
 * If flush_global is false, flush (at least) all tlb entries not
@@ -55,7 +49,6 @@ static const CPUTLBEntry s_cputlb_empty_entry = {
 void tlb_flush(CPUArchState *env, int flush_global)
 {
    CPUState *cpu = ENV_GET_CPU(env);
-    int i;

 #if defined(DEBUG_TLB)
    printf("tlb_flush:\n");
@@ -64,15 +57,8 @@ void tlb_flush(CPUArchState *env, int flush_global)
       links while we are modifying them */
    cpu->current_tb = NULL;

-    for (i = 0; i < CPU_TLB_SIZE; i++) {
-        int mmu_idx;
-
-        for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
-            env->tlb_table[mmu_idx][i] = s_cputlb_empty_entry;
-        }
-    }
-
-    memset(env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
+    memset(env->tlb_table, -1, sizeof(env->tlb_table));
+    memset(env->tb_jmp_cache, 0, sizeof(env->tb_jmp_cache));

    env->tlb_flush_addr = -1;
    env->tlb_flush_mask = 0;
@@ -87,7 +73,7 @@ static inline void tlb_flush_entry(CPUTLBEntry *tlb_entry, target_ulong addr)
                 (TARGET_PAGE_MASK | TLB_INVALID_MASK)) ||
        addr == (tlb_entry->addr_code &
                 (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
-        *tlb_entry = s_cputlb_empty_entry;
+        memset(tlb_entry, -1, sizeof(*tlb_entry));
    }
 }

@@ -127,9 +113,8 @@ void tlb_flush_page(CPUArchState *env, target_ulong addr)
   can be detected */
 void tlb_protect_code(ram_addr_t ram_addr)
 {
-    cpu_physical_memory_reset_dirty(ram_addr,
-                                    ram_addr + TARGET_PAGE_SIZE,
-                                    CODE_DIRTY_FLAG);
+    cpu_physical_memory_reset_dirty(ram_addr, TARGET_PAGE_SIZE,
+                                    DIRTY_MEMORY_CODE);
 }

 /* update the TLB so that writes in physical page 'phys_addr' are no longer
@@ -137,7 +122,7 @@ void tlb_protect_code(ram_addr_t ram_addr)
 void tlb_unprotect_code_phys(CPUArchState *env, ram_addr_t ram_addr,
                             target_ulong vaddr)
 {
-    cpu_physical_memory_set_dirty_flags(ram_addr, CODE_DIRTY_FLAG);
+    cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_CODE);
 }

 static bool tlb_is_dirty_ram(CPUTLBEntry *tlbe)
@@ -247,6 +232,7 @@ void tlb_set_page(CPUArchState *env, target_ulong vaddr,
    uintptr_t addend;
    CPUTLBEntry *te;
    hwaddr iotlb, xlat, sz;
+    CPUState *cpu = ENV_GET_CPU(env);

    assert(size >= TARGET_PAGE_SIZE);
    if (size != TARGET_PAGE_SIZE) {
@@ -254,7 +240,7 @@ void tlb_set_page(CPUArchState *env, target_ulong vaddr,
    }

    sz = size;
-    section = address_space_translate_for_iotlb(&address_space_memory, paddr,
+    section = address_space_translate_for_iotlb(cpu->as, paddr,
                                                &xlat, &sz);
    assert(sz >= TARGET_PAGE_SIZE);

@@ -299,7 +285,8 @@ void tlb_set_page(CPUArchState *env, target_ulong vaddr,
            /* Write access calls the I/O callback.  */
            te->addr_write = address | TLB_MMIO;
        } else if (memory_region_is_ram(section->mr)
-                   && !cpu_physical_memory_is_dirty(section->mr->ram_addr + xlat)) {
+                   && cpu_physical_memory_is_clean(section->mr->ram_addr
+                                                   + xlat)) {
            te->addr_write = address | TLB_NOTDIRTY;
        } else {
            te->addr_write = address;
@@ -319,6 +306,7 @@ tb_page_addr_t get_page_addr_code(CPUArchState *env1, target_ulong addr)
    int mmu_idx, page_index, pd;
    void *p;
    MemoryRegion *mr;
+    CPUState *cpu = ENV_GET_CPU(env1);

    page_index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
    mmu_idx = cpu_mmu_index(env1);
@@ -327,9 +315,8 @@ tb_page_addr_t get_page_addr_code(CPUArchState *env1, target_ulong addr)
        cpu_ldub_code(env1, addr);
    }
    pd = env1->iotlb[mmu_idx][page_index] & ~TARGET_PAGE_MASK;
-    mr = iotlb_to_region(pd);
+    mr = iotlb_to_region(cpu->as, pd);
    if (memory_region_is_unassigned(mr)) {
-        CPUState *cpu = ENV_GET_CPU(env1);
        CPUClass *cc = CPU_GET_CLASS(cpu);

        if (cc->do_unassigned_access) {
--- a/default-configs/aarch64-linux-user.mak
+++ b/default-configs/aarch64-linux-user.mak
@@ -0,0 +1,3 @@
+# Default configuration for aarch64-linux-user
+
+CONFIG_GDBSTUB_XML=y
--- a/default-configs/aarch64-softmmu.mak
+++ b/default-configs/aarch64-softmmu.mak
@@ -0,0 +1,6 @@
+# Default configuration for aarch64-softmmu
+
+# We support all the 32 bit boards so need all their config
+include arm-softmmu.mak
+
+# Currently no 64-bit specific config requirements
--- a/default-configs/arm-softmmu.mak
+++ b/default-configs/arm-softmmu.mak
@@ -27,6 +27,7 @@ CONFIG_SSI_SD=y
 CONFIG_SSI_M25P80=y
 CONFIG_LAN9118=y
 CONFIG_SMC91C111=y
+CONFIG_ALLWINNER_EMAC=y
 CONFIG_DS1338=y
 CONFIG_PFLASH_CFI01=y
 CONFIG_PFLASH_CFI02=y
@@ -41,6 +42,7 @@ CONFIG_ARM_GIC=y
 CONFIG_ARM_GIC_KVM=$(CONFIG_KVM)
 CONFIG_ARM_TIMER=y
 CONFIG_ARM_MPTIMER=y
+CONFIG_A9_GTIMER=y
 CONFIG_PL011=y
 CONFIG_PL022=y
 CONFIG_PL031=y
@@ -63,6 +65,7 @@ CONFIG_XILINX_SPIPS=y

 CONFIG_ARM11SCU=y
 CONFIG_A9SCU=y
+CONFIG_DIGIC=y
 CONFIG_MARVELL_88W8618=y
 CONFIG_OMAP=y
 CONFIG_TSC210X=y
@@ -81,3 +84,7 @@ CONFIG_VERSATILE_I2C=y

 CONFIG_SDHCI=y
 CONFIG_INTEGRATOR_DEBUG=y
+
+CONFIG_ALLWINNER_A10_PIT=y
+CONFIG_ALLWINNER_A10_PIC=y
+CONFIG_ALLWINNER_A10=y
--- a/default-configs/ppc-softmmu.mak
+++ b/default-configs/ppc-softmmu.mak
@@ -41,6 +41,8 @@ CONFIG_I8259=y
 CONFIG_XILINX=y
 CONFIG_XILINX_ETHLITE=y
 CONFIG_OPENPIC=y
+CONFIG_PREP=y
+CONFIG_MAC=y
 CONFIG_E500=y
 CONFIG_OPENPIC_KVM=$(and $(CONFIG_E500),$(CONFIG_KVM))
 # For PReP
--- a/default-configs/ppc64-softmmu.mak
+++ b/default-configs/ppc64-softmmu.mak
@@ -42,6 +42,8 @@ CONFIG_XILINX=y
 CONFIG_XILINX_ETHLITE=y
 CONFIG_OPENPIC=y
 CONFIG_PSERIES=y
+CONFIG_PREP=y
+CONFIG_MAC=y
 CONFIG_E500=y
 CONFIG_OPENPIC_KVM=$(and $(CONFIG_E500),$(CONFIG_KVM))
 # For pSeries
--- a/default-configs/ppcemb-softmmu.mak
+++ b/default-configs/ppcemb-softmmu.mak
@@ -3,32 +3,12 @@
 include pci.mak
 include sound.mak
 include usb.mak
-CONFIG_ISA_MMIO=y
-CONFIG_ESCC=y
 CONFIG_M48T59=y
 CONFIG_VGA=y
 CONFIG_VGA_PCI=y
 CONFIG_SERIAL=y
-CONFIG_I8254=y
-CONFIG_PCKBD=y
-CONFIG_FDC=y
 CONFIG_I8257=y
 CONFIG_OPENPIC=y
-CONFIG_PREP_PCI=y
-CONFIG_MACIO=y
-CONFIG_CUDA=y
-CONFIG_ADB=y
-CONFIG_MAC_NVRAM=y
-CONFIG_MAC_DBDMA=y
-CONFIG_HEATHROW_PIC=y
-CONFIG_GRACKLE_PCI=y
-CONFIG_UNIN_PCI=y
-CONFIG_DEC_PCI=y
-CONFIG_PPCE500_PCI=y
-CONFIG_IDE_ISA=y
-CONFIG_IDE_CMD646=y
-CONFIG_IDE_MACIO=y
-CONFIG_NE2000_ISA=y
 CONFIG_PFLASH_CFI01=y
 CONFIG_PFLASH_CFI02=y
 CONFIG_PTIMER=y
@@ -36,8 +16,3 @@ CONFIG_I8259=y
 CONFIG_XILINX=y
 CONFIG_XILINX_ETHLITE=y
 CONFIG_OPENPIC=y
-CONFIG_E500=y
-CONFIG_OPENPIC_KVM=$(and $(CONFIG_E500),$(CONFIG_KVM))
-# For PReP
-CONFIG_MC146818RTC=y
-CONFIG_ISA_TESTDEV=y
--- a/default-configs/s390x-softmmu.mak
+++ b/default-configs/s390x-softmmu.mak
@@ -1,2 +1,3 @@
 CONFIG_VIRTIO=y
 CONFIG_SCLPCONSOLE=y
+CONFIG_S390_FLIC=$(CONFIG_KVM)
--- a/default-configs/sparc-softmmu.mak
+++ b/default-configs/sparc-softmmu.mak
@@ -10,6 +10,7 @@ CONFIG_EMPTY_SLOT=y
 CONFIG_PCNET_COMMON=y
 CONFIG_LANCE=y
 CONFIG_TCX=y
+CONFIG_CG3=y
 CONFIG_SLAVIO=y
 CONFIG_CS4231=y
 CONFIG_GRLIB=y
--- a/device_tree.c
+++ b/device_tree.c
@@ -41,6 +41,10 @@ void *create_device_tree(int *sizep)
    if (ret < 0) {
        goto fail;
    }
+    ret = fdt_finish_reservemap(fdt);
+    if (ret < 0) {
+        goto fail;
+    }
    ret = fdt_begin_node(fdt, "");
    if (ret < 0) {
        goto fail;
@@ -127,12 +131,12 @@ static int findnode_nofail(void *fdt, const char *node_path)
    return offset;
 }

-int qemu_devtree_setprop(void *fdt, const char *node_path,
-                         const char *property, const void *val_array, int size)
+int qemu_fdt_setprop(void *fdt, const char *node_path,
+                     const char *property, const void *val, int size)
 {
    int r;

-    r = fdt_setprop(fdt, findnode_nofail(fdt, node_path), property, val_array, size);
+    r = fdt_setprop(fdt, findnode_nofail(fdt, node_path), property, val, size);
    if (r < 0) {
        fprintf(stderr, "%s: Couldn't set %s/%s: %s\n", __func__, node_path,
                property, fdt_strerror(r));
@@ -142,8 +146,8 @@ int qemu_devtree_setprop(void *fdt, const char *node_path,
    return r;
 }

-int qemu_devtree_setprop_cell(void *fdt, const char *node_path,
-                              const char *property, uint32_t val)
+int qemu_fdt_setprop_cell(void *fdt, const char *node_path,
+                          const char *property, uint32_t val)
 {
    int r;

@@ -157,15 +161,15 @@ int qemu_devtree_setprop_cell(void *fdt, const char *node_path,
    return r;
 }

-int qemu_devtree_setprop_u64(void *fdt, const char *node_path,
-                             const char *property, uint64_t val)
+int qemu_fdt_setprop_u64(void *fdt, const char *node_path,
+                         const char *property, uint64_t val)
 {
    val = cpu_to_be64(val);
-    return qemu_devtree_setprop(fdt, node_path, property, &val, sizeof(val));
+    return qemu_fdt_setprop(fdt, node_path, property, &val, sizeof(val));
 }

-int qemu_devtree_setprop_string(void *fdt, const char *node_path,
-                                const char *property, const char *string)
+int qemu_fdt_setprop_string(void *fdt, const char *node_path,
+                            const char *property, const char *string)
 {
    int r;

@@ -179,8 +183,8 @@ int qemu_devtree_setprop_string(void *fdt, const char *node_path,
    return r;
 }

-const void *qemu_devtree_getprop(void *fdt, const char *node_path,
-                                 const char *property, int *lenp)
+const void *qemu_fdt_getprop(void *fdt, const char *node_path,
+                             const char *property, int *lenp)
 {
    int len;
    const void *r;
@@ -196,11 +200,11 @@ const void *qemu_devtree_getprop(void *fdt, const char *node_path,
    return r;
 }

-uint32_t qemu_devtree_getprop_cell(void *fdt, const char *node_path,
-                                   const char *property)
+uint32_t qemu_fdt_getprop_cell(void *fdt, const char *node_path,
+                               const char *property)
 {
    int len;
-    const uint32_t *p = qemu_devtree_getprop(fdt, node_path, property, &len);
+    const uint32_t *p = qemu_fdt_getprop(fdt, node_path, property, &len);
    if (len != 4) {
        fprintf(stderr, "%s: %s/%s not 4 bytes long (not a cell?)\n",
                __func__, node_path, property);
@@ -209,7 +213,7 @@ uint32_t qemu_devtree_getprop_cell(void *fdt, const char *node_path,
    return be32_to_cpu(*p);
 }

-uint32_t qemu_devtree_get_phandle(void *fdt, const char *path)
+uint32_t qemu_fdt_get_phandle(void *fdt, const char *path)
 {
    uint32_t r;

@@ -223,15 +227,15 @@ uint32_t qemu_devtree_get_phandle(void *fdt, const char *path)
    return r;
 }

-int qemu_devtree_setprop_phandle(void *fdt, const char *node_path,
-                                 const char *property,
-                                 const char *target_node_path)
+int qemu_fdt_setprop_phandle(void *fdt, const char *node_path,
+                             const char *property,
+                             const char *target_node_path)
 {
-    uint32_t phandle = qemu_devtree_get_phandle(fdt, target_node_path);
-    return qemu_devtree_setprop_cell(fdt, node_path, property, phandle);
+    uint32_t phandle = qemu_fdt_get_phandle(fdt, target_node_path);
+    return qemu_fdt_setprop_cell(fdt, node_path, property, phandle);
 }

-uint32_t qemu_devtree_alloc_phandle(void *fdt)
+uint32_t qemu_fdt_alloc_phandle(void *fdt)
 {
    static int phandle = 0x0;

@@ -255,7 +259,7 @@ uint32_t qemu_devtree_alloc_phandle(void *fdt)
    return phandle++;
 }

-int qemu_devtree_nop_node(void *fdt, const char *node_path)
+int qemu_fdt_nop_node(void *fdt, const char *node_path)
 {
    int r;

@@ -269,7 +273,7 @@ int qemu_devtree_nop_node(void *fdt, const char *node_path)
    return r;
 }

-int qemu_devtree_add_subnode(void *fdt, const char *name)
+int qemu_fdt_add_subnode(void *fdt, const char *name)
 {
    char *dupname = g_strdup(name);
    char *basename = strrchr(dupname, '/');
@@ -299,7 +303,7 @@ int qemu_devtree_add_subnode(void *fdt, const char *name)
    return retval;
 }

-void qemu_devtree_dumpdtb(void *fdt, int size)
+void qemu_fdt_dumpdtb(void *fdt, int size)
 {
    const char *dumpdtb = qemu_opt_get(qemu_get_machine_opts(), "dumpdtb");

@@ -309,11 +313,11 @@ void qemu_devtree_dumpdtb(void *fdt, int size)
    }
 }

-int qemu_devtree_setprop_sized_cells_from_array(void *fdt,
-                                                const char *node_path,
-                                                const char *property,
-                                                int numvalues,
-                                                uint64_t *values)
+int qemu_fdt_setprop_sized_cells_from_array(void *fdt,
+                                            const char *node_path,
+                                            const char *property,
+                                            int numvalues,
+                                            uint64_t *values)
 {
    uint32_t *propcells;
    uint64_t value;
@@ -338,6 +342,6 @@ int qemu_devtree_setprop_sized_cells_from_array(void *fdt,
        propcells[cellnum++] = cpu_to_be32(value);
    }

-    return qemu_devtree_setprop(fdt, node_path, property, propcells,
-                                cellnum * sizeof(uint32_t));
+    return qemu_fdt_setprop(fdt, node_path, property, propcells,
+                            cellnum * sizeof(uint32_t));
 }
--- a/disas.c
+++ b/disas.c
@@ -190,7 +190,7 @@ static int print_insn_od_target(bfd_vma pc, disassemble_info *info)
 /* Disassemble this for me please... (debugging). 'flags' has the following
   values:
    i386 - 1 means 16 bit code, 2 means 64 bit code
-    arm  - bit 0 = thumb, bit 1 = reverse endian
+    arm  - bit 0 = thumb, bit 1 = reverse endian, bit 2 = A64
    ppc  - nonzero means little endian
    other targets - unused
 */
@@ -225,7 +225,15 @@ void target_disas(FILE *out, CPUArchState *env, target_ulong code,
    }
    print_insn = print_insn_i386;
 #elif defined(TARGET_ARM)
-    if (flags & 1) {
+    if (flags & 4) {
+        /* We might not be compiled with the A64 disassembler
+         * because it needs a C++ compiler; in that case we will
+         * fall through to the default print_insn_od case.
+         */
+#if defined(CONFIG_ARM_A64_DIS)
+        print_insn = print_insn_arm_a64;
+#endif
+    } else if (flags & 1) {
        print_insn = print_insn_thumb1;
    } else {
        print_insn = print_insn_arm;
@@ -356,6 +364,8 @@ void disas(FILE *out, void *code, unsigned long size)
 #elif defined(_ARCH_PPC)
    s.info.disassembler_options = (char *)"any";
    print_insn = print_insn_ppc;
+#elif defined(__aarch64__) && defined(CONFIG_ARM_A64_DIS)
+    print_insn = print_insn_arm_a64;
 #elif defined(__alpha__)
    print_insn = print_insn_alpha;
 #elif defined(__sparc__)
--- a/disas/Makefile.objs
+++ b/disas/Makefile.objs
@@ -1,5 +1,10 @@
+
 common-obj-$(CONFIG_ALPHA_DIS) += alpha.o
 common-obj-$(CONFIG_ARM_DIS) += arm.o
+common-obj-$(CONFIG_ARM_A64_DIS) += arm-a64.o
+common-obj-$(CONFIG_ARM_A64_DIS) += libvixl/
+libvixldir = $(SRC_PATH)/disas/libvixl
+$(obj)/arm-a64.o: QEMU_CFLAGS += -I$(libvixldir)
 common-obj-$(CONFIG_CRIS_DIS) += cris.o
 common-obj-$(CONFIG_HPPA_DIS) += hppa.o
 common-obj-$(CONFIG_I386_DIS) += i386.o
--- a/disas/arm-a64.cc
+++ b/disas/arm-a64.cc
@@ -0,0 +1,87 @@
+/*
+ * ARM A64 disassembly output wrapper to libvixl
+ * Copyright (c) 2013 Linaro Limited
+ * Written by Claudio Fontana
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "a64/disasm-a64.h"
+
+extern "C" {
+#include "disas/bfd.h"
+}
+
+using namespace vixl;
+
+static Decoder *vixl_decoder = NULL;
+static Disassembler *vixl_disasm = NULL;
+
+/* We don't use libvixl's PrintDisassembler because its output
+ * is a little unhelpful (trailing newlines, for example).
+ * Instead we use our own very similar variant so we have
+ * control over the format.
+ */
+class QEMUDisassembler : public Disassembler {
+public:
+    explicit QEMUDisassembler(FILE *stream) : stream_(stream) { }
+    ~QEMUDisassembler() { }
+
+protected:
+    void ProcessOutput(Instruction *instr) {
+        fprintf(stream_, "%08" PRIx32 "      %s",
+                instr->InstructionBits(), GetOutput());
+    }
+
+private:
+    FILE *stream_;
+};
+
+static int vixl_is_initialized(void)
+{
+    return vixl_decoder != NULL;
+}
+
+static void vixl_init(FILE *f) {
+    vixl_decoder = new Decoder();
+    vixl_disasm = new QEMUDisassembler(f);
+    vixl_decoder->AppendVisitor(vixl_disasm);
+}
+
+#define INSN_SIZE 4
+
+/* Disassemble ARM A64 instruction. This is our only entry
+ * point from QEMU's C code.
+ */
+int print_insn_arm_a64(uint64_t addr, disassemble_info *info)
+{
+    uint8_t bytes[INSN_SIZE];
+    uint32_t instr;
+    int status;
+
+    status = info->read_memory_func(addr, bytes, INSN_SIZE, info);
+    if (status != 0) {
+        info->memory_error_func(status, addr, info);
+        return -1;
+    }
+
+    if (!vixl_is_initialized()) {
+        vixl_init(info->stream);
+    }
+
+    instr = bytes[0] | bytes[1] << 8 | bytes[2] << 16 | bytes[3] << 24;
+    vixl_decoder->Decode(reinterpret_cast<Instruction*>(&instr));
+
+    return INSN_SIZE;
+}
--- a/disas/i386.c
+++ b/disas/i386.c
@@ -171,6 +171,7 @@ static void print_operand_value (char *buf, size_t bufsize, int hex, bfd_vma dis
 static void print_displacement (char *, bfd_vma);
 static void OP_E (int, int);
 static void OP_G (int, int);
+static void OP_vvvv (int, int);
 static bfd_vma get64 (void);
 static bfd_signed_vma get32 (void);
 static bfd_signed_vma get32s (void);
@@ -264,6 +265,9 @@ static int rex_used;
   current instruction.  */
 static int used_prefixes;

+/* The VEX.vvvv register, unencoded.  */
+static int vex_reg;
+
 /* Flags stored in PREFIXES.  */
 #define PREFIX_REPZ 1
 #define PREFIX_REPNZ 2
@@ -278,6 +282,10 @@ static int used_prefixes;
 #define PREFIX_ADDR 0x400
 #define PREFIX_FWAIT 0x800

+#define PREFIX_VEX_0F    0x1000
+#define PREFIX_VEX_0F38  0x2000
+#define PREFIX_VEX_0F3A  0x4000
+
 /* Make sure that bytes from INFO->PRIVATE_DATA->BUFFER (inclusive)
   to ADDR (exclusive) are valid.  Returns 1 for success, longjmps
   on error.  */
@@ -323,6 +331,7 @@ fetch_data(struct disassemble_info *info, bfd_byte *addr)

 #define XX { NULL, 0 }

+#define Bv { OP_vvvv, v_mode }
 #define Eb { OP_E, b_mode }
 #define Ev { OP_E, v_mode }
 #define Ed { OP_E, d_mode }
@@ -671,7 +680,8 @@ fetch_data(struct disassemble_info *info, bfd_byte *addr)
 #define PREGRP102 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 102 } }
 #define PREGRP103 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 103 } }
 #define PREGRP104 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 104 } }
-
+#define PREGRP105 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 105 } }
+#define PREGRP106 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 106 } }

 #define X86_64_0  NULL, { { NULL, X86_64_SPECIAL }, { NULL, 0 } }
 #define X86_64_1  NULL, { { NULL, X86_64_SPECIAL }, { NULL, 1 } }
@@ -1449,7 +1459,7 @@ static const unsigned char threebyte_0x38_uses_DATA_prefix[256] = {
  /* c0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* cf */
  /* d0 */ 0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1, /* df */
  /* e0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* ef */
-  /* f0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* ff */
+  /* f0 */ 0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, /* ff */
  /*       -------------------------------        */
  /*       0 1 2 3 4 5 6 7 8 9 a b c d e f        */
 };
@@ -1473,7 +1483,7 @@ static const unsigned char threebyte_0x38_uses_REPNZ_prefix[256] = {
  /* c0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* cf */
  /* d0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* df */
  /* e0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* ef */
-  /* f0 */ 1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* ff */
+  /* f0 */ 1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0, /* ff */
  /*       -------------------------------        */
  /*       0 1 2 3 4 5 6 7 8 9 a b c d e f        */
 };
@@ -1497,7 +1507,7 @@ static const unsigned char threebyte_0x38_uses_REPZ_prefix[256] = {
  /* c0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* cf */
  /* d0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* df */
  /* e0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* ef */
-  /* f0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* ff */
+  /* f0 */ 0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, /* ff */
  /*       -------------------------------        */
  /*       0 1 2 3 4 5 6 7 8 9 a b c d e f        */
 };
@@ -2632,17 +2642,17 @@ static const struct dis386 prefix_user_table[][4] = {

  /* PREGRP87 */
  {
+    { "movbe",	{ Gv, Ev } },
    { "(bad)",	{ XX } },
-    { "(bad)",	{ XX } },
-    { "(bad)",	{ XX } },
+    { "movbe",	{ Gv, Ev } },
    { "crc32",	{ Gdq, { CRC32_Fixup, b_mode } } },
  },

  /* PREGRP88 */
  {
+    { "movbe",	{ Ev, Gv } },
    { "(bad)",	{ XX } },
-    { "(bad)",	{ XX } },
-    { "(bad)",	{ XX } },
+    { "movbe",	{ Ev, Gv } },
    { "crc32",	{ Gdq, { CRC32_Fixup, v_mode } } },
  },

@@ -2774,6 +2784,22 @@ static const struct dis386 prefix_user_table[][4] = {
    { "(bad)",	{ XX } },
  },

+  /* PREGRP105 */
+  {
+    { "andnS",	{ Gv, Bv, Ev } },
+    { "(bad)",	{ XX } },
+    { "(bad)",	{ XX } },
+    { "(bad)",	{ XX } },
+  },
+
+  /* PREGRP106 */
+  {
+    { "bextrS",	{ Gv, Ev, Bv } },
+    { "sarxS",	{ Gv, Ev, Bv } },
+    { "shlxS",	{ Gv, Ev, Bv } },
+    { "shrxS",	{ Gv, Ev, Bv } },
+  },
+
 };

 static const struct dis386 x86_64_table[][2] = {
@@ -3071,12 +3097,12 @@ static const struct dis386 three_byte_table[][256] = {
    /* f0 */
    { PREGRP87 },
    { PREGRP88 },
+    { PREGRP105 },
    { "(bad)", { XX } },
    { "(bad)", { XX } },
    { "(bad)", { XX } },
    { "(bad)", { XX } },
-    { "(bad)", { XX } },
-    { "(bad)", { XX } },
+    { PREGRP106 },
    /* f8 */
    { "(bad)", { XX } },
    { "(bad)", { XX } },
@@ -3477,6 +3503,74 @@ ckprefix (void)
    }
 }

+static void
+ckvexprefix (void)
+{
+    int op, vex2, vex3, newrex = 0, newpfx = prefixes;
+
+    if (address_mode == mode_16bit) {
+        return;
+    }
+
+    fetch_data(the_info, codep + 1);
+    op = *codep;
+
+    if (op != 0xc4 && op != 0xc5) {
+        return;
+    }
+
+    fetch_data(the_info, codep + 2);
+    vex2 = codep[1];
+
+    if (address_mode == mode_32bit && (vex2 & 0xc0) != 0xc0) {
+        return;
+    }
+
+    if (op == 0xc4) {
+        /* Three byte VEX prefix.  */
+        fetch_data(the_info, codep + 3);
+        vex3 = codep[2];
+
+        newrex |= (vex2 & 0x80 ? 0 : REX_R);
+        newrex |= (vex2 & 0x40 ? 0 : REX_X);
+        newrex |= (vex2 & 0x20 ? 0 : REX_B);
+        newrex |= (vex3 & 0x80 ? REX_W : 0);
+        switch (vex2 & 0x1f) {      /* VEX.m-mmmm */
+        case 1:
+            newpfx |= PREFIX_VEX_0F;
+            break;
+        case 2:
+            newpfx |= PREFIX_VEX_0F | PREFIX_VEX_0F38;
+            break;
+        case 3:
+            newpfx |= PREFIX_VEX_0F | PREFIX_VEX_0F3A;
+            break;
+        }
+        vex2 = vex3;
+        codep += 3;
+    } else {
+        /* Two byte VEX prefix.  */
+        newrex |= (vex2 & 0x80 ? 0 : REX_R);
+        codep += 2;
+    }
+
+    vex_reg = (~vex2 >> 3) & 15;     /* VEX.vvvv */
+    switch (vex2 & 3) {              /* VEX.pp */
+    case 1:
+        newpfx |= PREFIX_DATA;     /* 0x66 */
+        break;
+    case 2:
+        newpfx |= PREFIX_REPZ;     /* 0xf3 */
+        break;
+    case 3:
+        newpfx |= PREFIX_REPNZ;    /* 0xf2 */
+        break;
+    }
+
+    rex = newrex;
+    prefixes = newpfx;
+}
+
 /* Return the name of the prefix byte PREF, or NULL if PREF is not a
   prefix byte.  */

@@ -3598,6 +3692,7 @@ print_insn (bfd_vma pc, disassemble_info *info)
  const char *p;
  struct dis_private priv;
  unsigned char op;
+  unsigned char threebyte;

  if (info->mach == bfd_mach_x86_64_intel_syntax
      || info->mach == bfd_mach_x86_64)
@@ -3752,6 +3847,7 @@ print_insn (bfd_vma pc, disassemble_info *info)

  obufp = obuf;
  ckprefix ();
+  ckvexprefix ();

  insn_codep = codep;
  sizeflag = priv.orig_sizeflag;
@@ -3775,18 +3871,29 @@ print_insn (bfd_vma pc, disassemble_info *info)
    }

  op = 0;
+  if (prefixes & PREFIX_VEX_0F)
+    {
+      used_prefixes |= PREFIX_VEX_0F | PREFIX_VEX_0F38 | PREFIX_VEX_0F3A;
+      if (prefixes & PREFIX_VEX_0F38)
+        threebyte = 0x38;
+      else if (prefixes & PREFIX_VEX_0F3A)
+        threebyte = 0x3a;
+      else
+        threebyte = *codep++;
+      goto vex_opcode;
+    }
  if (*codep == 0x0f)
    {
-      unsigned char threebyte;
      fetch_data(info, codep + 2);
-      threebyte = *++codep;
+      threebyte = codep[1];
+      codep += 2;
+    vex_opcode:
      dp = &dis386_twobyte[threebyte];
-      need_modrm = twobyte_has_modrm[*codep];
-      uses_DATA_prefix = twobyte_uses_DATA_prefix[*codep];
-      uses_REPNZ_prefix = twobyte_uses_REPNZ_prefix[*codep];
-      uses_REPZ_prefix = twobyte_uses_REPZ_prefix[*codep];
-      uses_LOCK_prefix = (*codep & ~0x02) == 0x20;
-      codep++;
+      need_modrm = twobyte_has_modrm[threebyte];
+      uses_DATA_prefix = twobyte_uses_DATA_prefix[threebyte];
+      uses_REPNZ_prefix = twobyte_uses_REPNZ_prefix[threebyte];
+      uses_REPZ_prefix = twobyte_uses_REPZ_prefix[threebyte];
+      uses_LOCK_prefix = (threebyte & ~0x02) == 0x20;
      if (dp->name == NULL && dp->op[0].bytemode == IS_3BYTE_OPCODE)
 	{
          fetch_data(info, codep + 2);
@@ -5291,6 +5398,17 @@ OP_G (int bytemode, int sizeflag)
    }
 }

+static void
+OP_vvvv (int bytemode, int sizeflags)
+{
+    USED_REX (REX_W);
+    if (rex & REX_W) {
+        oappend(names64[vex_reg]);
+    } else {
+        oappend(names32[vex_reg]);
+    }
+}
+
 static bfd_vma
 get64 (void)
 {
--- a/disas/libvixl/LICENCE
+++ b/disas/libvixl/LICENCE
@@ -0,0 +1,30 @@
+LICENCE
+=======
+
+The software in this repository is covered by the following licence.
+
+// Copyright 2013, ARM Limited
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--- a/disas/libvixl/Makefile.objs
+++ b/disas/libvixl/Makefile.objs
@@ -0,0 +1,8 @@
+libvixl_OBJS = utils.o \
+               a64/instructions-a64.o \
+               a64/decoder-a64.o \
+               a64/disasm-a64.o
+
+$(addprefix $(obj)/,$(libvixl_OBJS)): QEMU_CFLAGS += -I$(SRC_PATH)/disas/libvixl
+
+common-obj-$(CONFIG_ARM_A64_DIS) += $(libvixl_OBJS)
--- a/disas/libvixl/README
+++ b/disas/libvixl/README
@@ -0,0 +1,12 @@
+
+The code in this directory is a subset of libvixl:
+ https://github.com/armvixl/vixl
+(specifically, it is the set of files needed for disassembly only,
+taken from libvixl 1.1).
+Bugfixes should preferably be sent upstream initially.
+
+The disassembler does not currently support the entire A64 instruction
+set. Notably:
+ * No Advanced SIMD support.
+ * Limited support for system instructions.
+ * A few miscellaneous integer and floating point instructions are missing.
--- a/disas/libvixl/a64/assembler-a64.h
+++ b/disas/libvixl/a64/assembler-a64.h
--- a/disas/libvixl/a64/constants-a64.h
+++ b/disas/libvixl/a64/constants-a64.h
--- a/disas/libvixl/a64/cpu-a64.h
+++ b/disas/libvixl/a64/cpu-a64.h
@@ -0,0 +1,56 @@
+// Copyright 2013, ARM Limited
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef VIXL_CPU_A64_H
+#define VIXL_CPU_A64_H
+
+#include "globals.h"
+
+namespace vixl {
+
+class CPU {
+ public:
+  // Initialise CPU support.
+  static void SetUp();
+
+  // Ensures the data at a given address and with a given size is the same for
+  // the I and D caches. I and D caches are not automatically coherent on ARM
+  // so this operation is required before any dynamically generated code can
+  // safely run.
+  static void EnsureIAndDCacheCoherency(void *address, size_t length);
+
+ private:
+  // Return the content of the cache type register.
+  static uint32_t GetCacheType();
+
+  // I and D cache line size in bytes.
+  static unsigned icache_line_size_;
+  static unsigned dcache_line_size_;
+};
+
+}  // namespace vixl
+
+#endif  // VIXL_CPU_A64_H
--- a/disas/libvixl/a64/decoder-a64.cc
+++ b/disas/libvixl/a64/decoder-a64.cc
@@ -0,0 +1,712 @@
+// Copyright 2013, ARM Limited
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "globals.h"
+#include "utils.h"
+#include "a64/decoder-a64.h"
+
+namespace vixl {
+// Top-level instruction decode function.
+void Decoder::Decode(Instruction *instr) {
+  if (instr->Bits(28, 27) == 0) {
+    VisitUnallocated(instr);
+  } else {
+    switch (instr->Bits(27, 24)) {
+      // 0:   PC relative addressing.
+      case 0x0: DecodePCRelAddressing(instr); break;
+
+      // 1:   Add/sub immediate.
+      case 0x1: DecodeAddSubImmediate(instr); break;
+
+      // A:   Logical shifted register.
+      //      Add/sub with carry.
+      //      Conditional compare register.
+      //      Conditional compare immediate.
+      //      Conditional select.
+      //      Data processing 1 source.
+      //      Data processing 2 source.
+      // B:   Add/sub shifted register.
+      //      Add/sub extended register.
+      //      Data processing 3 source.
+      case 0xA:
+      case 0xB: DecodeDataProcessing(instr); break;
+
+      // 2:   Logical immediate.
+      //      Move wide immediate.
+      case 0x2: DecodeLogical(instr); break;
+
+      // 3:   Bitfield.
+      //      Extract.
+      case 0x3: DecodeBitfieldExtract(instr); break;
+
+      // 4:   Unconditional branch immediate.
+      //      Exception generation.
+      //      Compare and branch immediate.
+      // 5:   Compare and branch immediate.
+      //      Conditional branch.
+      //      System.
+      // 6,7: Unconditional branch.
+      //      Test and branch immediate.
+      case 0x4:
+      case 0x5:
+      case 0x6:
+      case 0x7: DecodeBranchSystemException(instr); break;
+
+      // 8,9: Load/store register pair post-index.
+      //      Load register literal.
+      //      Load/store register unscaled immediate.
+      //      Load/store register immediate post-index.
+      //      Load/store register immediate pre-index.
+      //      Load/store register offset.
+      //      Load/store exclusive.
+      // C,D: Load/store register pair offset.
+      //      Load/store register pair pre-index.
+      //      Load/store register unsigned immediate.
+      //      Advanced SIMD.
+      case 0x8:
+      case 0x9:
+      case 0xC:
+      case 0xD: DecodeLoadStore(instr); break;
+
+      // E:   FP fixed point conversion.
+      //      FP integer conversion.
+      //      FP data processing 1 source.
+      //      FP compare.
+      //      FP immediate.
+      //      FP data processing 2 source.
+      //      FP conditional compare.
+      //      FP conditional select.
+      //      Advanced SIMD.
+      // F:   FP data processing 3 source.
+      //      Advanced SIMD.
+      case 0xE:
+      case 0xF: DecodeFP(instr); break;
+    }
+  }
+}
+
+void Decoder::AppendVisitor(DecoderVisitor* new_visitor) {
+  visitors_.remove(new_visitor);
+  visitors_.push_front(new_visitor);
+}
+
+
+void Decoder::PrependVisitor(DecoderVisitor* new_visitor) {
+  visitors_.remove(new_visitor);
+  visitors_.push_back(new_visitor);
+}
+
+
+void Decoder::InsertVisitorBefore(DecoderVisitor* new_visitor,
+                                  DecoderVisitor* registered_visitor) {
+  visitors_.remove(new_visitor);
+  std::list<DecoderVisitor*>::iterator it;
+  for (it = visitors_.begin(); it != visitors_.end(); it++) {
+    if (*it == registered_visitor) {
+      visitors_.insert(it, new_visitor);
+      return;
+    }
+  }
+  // We reached the end of the list. The last element must be
+  // registered_visitor.
+  ASSERT(*it == registered_visitor);
+  visitors_.insert(it, new_visitor);
+}
+
+
+void Decoder::InsertVisitorAfter(DecoderVisitor* new_visitor,
+                                 DecoderVisitor* registered_visitor) {
+  visitors_.remove(new_visitor);
+  std::list<DecoderVisitor*>::iterator it;
+  for (it = visitors_.begin(); it != visitors_.end(); it++) {
+    if (*it == registered_visitor) {
+      it++;
+      visitors_.insert(it, new_visitor);
+      return;
+    }
+  }
+  // We reached the end of the list. The last element must be
+  // registered_visitor.
+  ASSERT(*it == registered_visitor);
+  visitors_.push_back(new_visitor);
+}
+
+
+void Decoder::RemoveVisitor(DecoderVisitor* visitor) {
+  visitors_.remove(visitor);
+}
+
+
+void Decoder::DecodePCRelAddressing(Instruction* instr) {
+  ASSERT(instr->Bits(27, 24) == 0x0);
+  // We know bit 28 is set, as <b28:b27> = 0 is filtered out at the top level
+  // decode.
+  ASSERT(instr->Bit(28) == 0x1);
+  VisitPCRelAddressing(instr);
+}
+
+
+void Decoder::DecodeBranchSystemException(Instruction* instr) {
+  ASSERT((instr->Bits(27, 24) == 0x4) ||
+         (instr->Bits(27, 24) == 0x5) ||
+         (instr->Bits(27, 24) == 0x6) ||
+         (instr->Bits(27, 24) == 0x7) );
+
+  switch (instr->Bits(31, 29)) {
+    case 0:
+    case 4: {
+      VisitUnconditionalBranch(instr);
+      break;
+    }
+    case 1:
+    case 5: {
+      if (instr->Bit(25) == 0) {
+        VisitCompareBranch(instr);
+      } else {
+        VisitTestBranch(instr);
+      }
+      break;
+    }
+    case 2: {
+      if (instr->Bit(25) == 0) {
+        if ((instr->Bit(24) == 0x1) ||
+            (instr->Mask(0x01000010) == 0x00000010)) {
+          VisitUnallocated(instr);
+        } else {
+          VisitConditionalBranch(instr);
+        }
+      } else {
+        VisitUnallocated(instr);
+      }
+      break;
+    }
+    case 6: {
+      if (instr->Bit(25) == 0) {
+        if (instr->Bit(24) == 0) {
+          if ((instr->Bits(4, 2) != 0) ||
+              (instr->Mask(0x00E0001D) == 0x00200001) ||
+              (instr->Mask(0x00E0001D) == 0x00400001) ||
+              (instr->Mask(0x00E0001E) == 0x00200002) ||
+              (instr->Mask(0x00E0001E) == 0x00400002) ||
+              (instr->Mask(0x00E0001C) == 0x00600000) ||
+              (instr->Mask(0x00E0001C) == 0x00800000) ||
+              (instr->Mask(0x00E0001F) == 0x00A00000) ||
+              (instr->Mask(0x00C0001C) == 0x00C00000)) {
+            VisitUnallocated(instr);
+          } else {
+            VisitException(instr);
+          }
+        } else {
+          if (instr->Bits(23, 22) == 0) {
+            const Instr masked_003FF0E0 = instr->Mask(0x003FF0E0);
+            if ((instr->Bits(21, 19) == 0x4) ||
+                (masked_003FF0E0 == 0x00033000) ||
+                (masked_003FF0E0 == 0x003FF020) ||
+                (masked_003FF0E0 == 0x003FF060) ||
+                (masked_003FF0E0 == 0x003FF0E0) ||
+                (instr->Mask(0x00388000) == 0x00008000) ||
+                (instr->Mask(0x0038E000) == 0x00000000) ||
+                (instr->Mask(0x0039E000) == 0x00002000) ||
+                (instr->Mask(0x003AE000) == 0x00002000) ||
+                (instr->Mask(0x003CE000) == 0x00042000) ||
+                (instr->Mask(0x003FFFC0) == 0x000320C0) ||
+                (instr->Mask(0x003FF100) == 0x00032100) ||
+                (instr->Mask(0x003FF200) == 0x00032200) ||
+                (instr->Mask(0x003FF400) == 0x00032400) ||
+                (instr->Mask(0x003FF800) == 0x00032800) ||
+                (instr->Mask(0x0038F000) == 0x00005000) ||
+                (instr->Mask(0x0038E000) == 0x00006000)) {
+              VisitUnallocated(instr);
+            } else {
+              VisitSystem(instr);
+            }
+          } else {
+            VisitUnallocated(instr);
+          }
+        }
+      } else {
+        if ((instr->Bit(24) == 0x1) ||
+            (instr->Bits(20, 16) != 0x1F) ||
+            (instr->Bits(15, 10) != 0) ||
+            (instr->Bits(4, 0) != 0) ||
+            (instr->Bits(24, 21) == 0x3) ||
+            (instr->Bits(24, 22) == 0x3)) {
+          VisitUnallocated(instr);
+        } else {
+          VisitUnconditionalBranchToRegister(instr);
+        }
+      }
+      break;
+    }
+    case 3:
+    case 7: {
+      VisitUnallocated(instr);
+      break;
+    }
+  }
+}
+
+
+void Decoder::DecodeLoadStore(Instruction* instr) {
+  ASSERT((instr->Bits(27, 24) == 0x8) ||
+         (instr->Bits(27, 24) == 0x9) ||
+         (instr->Bits(27, 24) == 0xC) ||
+         (instr->Bits(27, 24) == 0xD) );
+
+  if (instr->Bit(24) == 0) {
+    if (instr->Bit(28) == 0) {
+      if (instr->Bit(29) == 0) {
+        if (instr->Bit(26) == 0) {
+          // TODO: VisitLoadStoreExclusive.
+          VisitUnimplemented(instr);
+        } else {
+          DecodeAdvSIMDLoadStore(instr);
+        }
+      } else {
+        if ((instr->Bits(31, 30) == 0x3) ||
+            (instr->Mask(0xC4400000) == 0x40000000)) {
+          VisitUnallocated(instr);
+        } else {
+          if (instr->Bit(23) == 0) {
+            if (instr->Mask(0xC4400000) == 0xC0400000) {
+              VisitUnallocated(instr);
+            } else {
+              VisitLoadStorePairNonTemporal(instr);
+            }
+          } else {
+            VisitLoadStorePairPostIndex(instr);
+          }
+        }
+      }
+    } else {
+      if (instr->Bit(29) == 0) {
+        if (instr->Mask(0xC4000000) == 0xC4000000) {
+          VisitUnallocated(instr);
+        } else {
+          VisitLoadLiteral(instr);
+        }
+      } else {
+        if ((instr->Mask(0x84C00000) == 0x80C00000) ||
+            (instr->Mask(0x44800000) == 0x44800000) ||
+            (instr->Mask(0x84800000) == 0x84800000)) {
+          VisitUnallocated(instr);
+        } else {
+          if (instr->Bit(21) == 0) {
+            switch (instr->Bits(11, 10)) {
+              case 0: {
+                VisitLoadStoreUnscaledOffset(instr);
+                break;
+              }
+              case 1: {
+                if (instr->Mask(0xC4C00000) == 0xC0800000) {
+                  VisitUnallocated(instr);
+                } else {
+                  VisitLoadStorePostIndex(instr);
+                }
+                break;
+              }
+              case 2: {
+                // TODO: VisitLoadStoreRegisterOffsetUnpriv.
+                VisitUnimplemented(instr);
+                break;
+              }
+              case 3: {
+                if (instr->Mask(0xC4C00000) == 0xC0800000) {
+                  VisitUnallocated(instr);
+                } else {
+                  VisitLoadStorePreIndex(instr);
+                }
+                break;
+              }
+            }
+          } else {
+            if (instr->Bits(11, 10) == 0x2) {
+              if (instr->Bit(14) == 0) {
+                VisitUnallocated(instr);
+              } else {
+                VisitLoadStoreRegisterOffset(instr);
+              }
+            } else {
+              VisitUnallocated(instr);
+            }
+          }
+        }
+      }
+    }
+  } else {
+    if (instr->Bit(28) == 0) {
+      if (instr->Bit(29) == 0) {
+        VisitUnallocated(instr);
+      } else {
+        if ((instr->Bits(31, 30) == 0x3) ||
+            (instr->Mask(0xC4400000) == 0x40000000)) {
+          VisitUnallocated(instr);
+        } else {
+          if (instr->Bit(23) == 0) {
+            VisitLoadStorePairOffset(instr);
+          } else {
+            VisitLoadStorePairPreIndex(instr);
+          }
+        }
+      }
+    } else {
+      if (instr->Bit(29) == 0) {
+        VisitUnallocated(instr);
+      } else {
+        if ((instr->Mask(0x84C00000) == 0x80C00000) ||
+            (instr->Mask(0x44800000) == 0x44800000) ||
+            (instr->Mask(0x84800000) == 0x84800000)) {
+          VisitUnallocated(instr);
+        } else {
+          VisitLoadStoreUnsignedOffset(instr);
+        }
+      }
+    }
+  }
+}
+
+
+void Decoder::DecodeLogical(Instruction* instr) {
+  ASSERT(instr->Bits(27, 24) == 0x2);
+
+  if (instr->Mask(0x80400000) == 0x00400000) {
+    VisitUnallocated(instr);
+  } else {
+    if (instr->Bit(23) == 0) {
+      VisitLogicalImmediate(instr);
+    } else {
+      if (instr->Bits(30, 29) == 0x1) {
+        VisitUnallocated(instr);
+      } else {
+        VisitMoveWideImmediate(instr);
+      }
+    }
+  }
+}
+
+
+void Decoder::DecodeBitfieldExtract(Instruction* instr) {
+  ASSERT(instr->Bits(27, 24) == 0x3);
+
+  if ((instr->Mask(0x80400000) == 0x80000000) ||
+      (instr->Mask(0x80400000) == 0x00400000) ||
+      (instr->Mask(0x80008000) == 0x00008000)) {
+    VisitUnallocated(instr);
+  } else if (instr->Bit(23) == 0) {
+    if ((instr->Mask(0x80200000) == 0x00200000) ||
+        (instr->Mask(0x60000000) == 0x60000000)) {
+      VisitUnallocated(instr);
+    } else {
+      VisitBitfield(instr);
+    }
+  } else {
+    if ((instr->Mask(0x60200000) == 0x00200000) ||
+        (instr->Mask(0x60000000) != 0x00000000)) {
+      VisitUnallocated(instr);
+    } else {
+      VisitExtract(instr);
+    }
+  }
+}
+
+
+void Decoder::DecodeAddSubImmediate(Instruction* instr) {
+  ASSERT(instr->Bits(27, 24) == 0x1);
+  if (instr->Bit(23) == 1) {
+    VisitUnallocated(instr);
+  } else {
+    VisitAddSubImmediate(instr);
+  }
+}
+
+
+void Decoder::DecodeDataProcessing(Instruction* instr) {
+  ASSERT((instr->Bits(27, 24) == 0xA) ||
+         (instr->Bits(27, 24) == 0xB) );
+
+  if (instr->Bit(24) == 0) {
+    if (instr->Bit(28) == 0) {
+      if (instr->Mask(0x80008000) == 0x00008000) {
+        VisitUnallocated(instr);
+      } else {
+        VisitLogicalShifted(instr);
+      }
+    } else {
+      switch (instr->Bits(23, 21)) {
+        case 0: {
+          if (instr->Mask(0x0000FC00) != 0) {
+            VisitUnallocated(instr);
+          } else {
+            VisitAddSubWithCarry(instr);
+          }
+          break;
+        }
+        case 2: {
+          if ((instr->Bit(29) == 0) ||
+              (instr->Mask(0x00000410) != 0)) {
+            VisitUnallocated(instr);
+          } else {
+            if (instr->Bit(11) == 0) {
+              VisitConditionalCompareRegister(instr);
+            } else {
+              VisitConditionalCompareImmediate(instr);
+            }
+          }
+          break;
+        }
+        case 4: {
+          if (instr->Mask(0x20000800) != 0x00000000) {
+            VisitUnallocated(instr);
+          } else {
+            VisitConditionalSelect(instr);
+          }
+          break;
+        }
+        case 6: {
+          if (instr->Bit(29) == 0x1) {
+            VisitUnallocated(instr);
+          } else {
+            if (instr->Bit(30) == 0) {
+              if ((instr->Bit(15) == 0x1) ||
+                  (instr->Bits(15, 11) == 0) ||
+                  (instr->Bits(15, 12) == 0x1) ||
+                  (instr->Bits(15, 12) == 0x3) ||
+                  (instr->Bits(15, 13) == 0x3) ||
+                  (instr->Mask(0x8000EC00) == 0x00004C00) ||
+                  (instr->Mask(0x8000E800) == 0x80004000) ||
+                  (instr->Mask(0x8000E400) == 0x80004000)) {
+                VisitUnallocated(instr);
+              } else {
+                VisitDataProcessing2Source(instr);
+              }
+            } else {
+              if ((instr->Bit(13) == 1) ||
+                  (instr->Bits(20, 16) != 0) ||
+                  (instr->Bits(15, 14) != 0) ||
+                  (instr->Mask(0xA01FFC00) == 0x00000C00) ||
+                  (instr->Mask(0x201FF800) == 0x00001800)) {
+                VisitUnallocated(instr);
+              } else {
+                VisitDataProcessing1Source(instr);
+              }
+            }
+            break;
+          }
+        }
+        case 1:
+        case 3:
+        case 5:
+        case 7: VisitUnallocated(instr); break;
+      }
+    }
+  } else {
+    if (instr->Bit(28) == 0) {
+     if (instr->Bit(21) == 0) {
+        if ((instr->Bits(23, 22) == 0x3) ||
+            (instr->Mask(0x80008000) == 0x00008000)) {
+          VisitUnallocated(instr);
+        } else {
+          VisitAddSubShifted(instr);
+        }
+      } else {
+        if ((instr->Mask(0x00C00000) != 0x00000000) ||
+            (instr->Mask(0x00001400) == 0x00001400) ||
+            (instr->Mask(0x00001800) == 0x00001800)) {
+          VisitUnallocated(instr);
+        } else {
+          VisitAddSubExtended(instr);
+        }
+      }
+    } else {
+      if ((instr->Bit(30) == 0x1) ||
+          (instr->Bits(30, 29) == 0x1) ||
+          (instr->Mask(0xE0600000) == 0x00200000) ||
+          (instr->Mask(0xE0608000) == 0x00400000) ||
+          (instr->Mask(0x60608000) == 0x00408000) ||
+          (instr->Mask(0x60E00000) == 0x00E00000) ||
+          (instr->Mask(0x60E00000) == 0x00800000) ||
+          (instr->Mask(0x60E00000) == 0x00600000)) {
+        VisitUnallocated(instr);
+      } else {
+        VisitDataProcessing3Source(instr);
+      }
+    }
+  }
+}
+
+
+void Decoder::DecodeFP(Instruction* instr) {
+  ASSERT((instr->Bits(27, 24) == 0xE) ||
+         (instr->Bits(27, 24) == 0xF) );
+
+  if (instr->Bit(28) == 0) {
+    DecodeAdvSIMDDataProcessing(instr);
+  } else {
+    if (instr->Bit(29) == 1) {
+      VisitUnallocated(instr);
+    } else {
+      if (instr->Bits(31, 30) == 0x3) {
+        VisitUnallocated(instr);
+      } else if (instr->Bits(31, 30) == 0x1) {
+        DecodeAdvSIMDDataProcessing(instr);
+      } else {
+        if (instr->Bit(24) == 0) {
+          if (instr->Bit(21) == 0) {
+            if ((instr->Bit(23) == 1) ||
+                (instr->Bit(18) == 1) ||
+                (instr->Mask(0x80008000) == 0x00000000) ||
+                (instr->Mask(0x000E0000) == 0x00000000) ||
+                (instr->Mask(0x000E0000) == 0x000A0000) ||
+                (instr->Mask(0x00160000) == 0x00000000) ||
+                (instr->Mask(0x00160000) == 0x00120000)) {
+              VisitUnallocated(instr);
+            } else {
+              VisitFPFixedPointConvert(instr);
+            }
+          } else {
+            if (instr->Bits(15, 10) == 32) {
+              VisitUnallocated(instr);
+            } else if (instr->Bits(15, 10) == 0) {
+              if ((instr->Bits(23, 22) == 0x3) ||
+                  (instr->Mask(0x000E0000) == 0x000A0000) ||
+                  (instr->Mask(0x000E0000) == 0x000C0000) ||
+                  (instr->Mask(0x00160000) == 0x00120000) ||
+                  (instr->Mask(0x00160000) == 0x00140000) ||
+                  (instr->Mask(0x20C40000) == 0x00800000) ||
+                  (instr->Mask(0x20C60000) == 0x00840000) ||
+                  (instr->Mask(0xA0C60000) == 0x80060000) ||
+                  (instr->Mask(0xA0C60000) == 0x00860000) ||
+                  (instr->Mask(0xA0C60000) == 0x00460000) ||
+                  (instr->Mask(0xA0CE0000) == 0x80860000) ||
+                  (instr->Mask(0xA0CE0000) == 0x804E0000) ||
+                  (instr->Mask(0xA0CE0000) == 0x000E0000) ||
+                  (instr->Mask(0xA0D60000) == 0x00160000) ||
+                  (instr->Mask(0xA0D60000) == 0x80560000) ||
+                  (instr->Mask(0xA0D60000) == 0x80960000)) {
+                VisitUnallocated(instr);
+              } else {
+                VisitFPIntegerConvert(instr);
+              }
+            } else if (instr->Bits(14, 10) == 16) {
+              const Instr masked_A0DF8000 = instr->Mask(0xA0DF8000);
+              if ((instr->Mask(0x80180000) != 0) ||
+                  (masked_A0DF8000 == 0x00020000) ||
+                  (masked_A0DF8000 == 0x00030000) ||
+                  (masked_A0DF8000 == 0x00068000) ||
+                  (masked_A0DF8000 == 0x00428000) ||
+                  (masked_A0DF8000 == 0x00430000) ||
+                  (masked_A0DF8000 == 0x00468000) ||
+                  (instr->Mask(0xA0D80000) == 0x00800000) ||
+                  (instr->Mask(0xA0DE0000) == 0x00C00000) ||
+                  (instr->Mask(0xA0DF0000) == 0x00C30000) ||
+                  (instr->Mask(0xA0DC0000) == 0x00C40000)) {
+                VisitUnallocated(instr);
+              } else {
+                VisitFPDataProcessing1Source(instr);
+              }
+            } else if (instr->Bits(13, 10) == 8) {
+              if ((instr->Bits(15, 14) != 0) ||
+                  (instr->Bits(2, 0) != 0) ||
+                  (instr->Mask(0x80800000) != 0x00000000)) {
+                VisitUnallocated(instr);
+              } else {
+                VisitFPCompare(instr);
+              }
+            } else if (instr->Bits(12, 10) == 4) {
+              if ((instr->Bits(9, 5) != 0) ||
+                  (instr->Mask(0x80800000) != 0x00000000)) {
+                VisitUnallocated(instr);
+              } else {
+                VisitFPImmediate(instr);
+              }
+            } else {
+              if (instr->Mask(0x80800000) != 0x00000000) {
+                VisitUnallocated(instr);
+              } else {
+                switch (instr->Bits(11, 10)) {
+                  case 1: {
+                    VisitFPConditionalCompare(instr);
+                    break;
+                  }
+                  case 2: {
+                    if ((instr->Bits(15, 14) == 0x3) ||
+                        (instr->Mask(0x00009000) == 0x00009000) ||
+                        (instr->Mask(0x0000A000) == 0x0000A000)) {
+                      VisitUnallocated(instr);
+                    } else {
+                      VisitFPDataProcessing2Source(instr);
+                    }
+                    break;
+                  }
+                  case 3: {
+                    VisitFPConditionalSelect(instr);
+                    break;
+                  }
+                  default: UNREACHABLE();
+                }
+              }
+            }
+          }
+        } else {
+          // Bit 30 == 1 has been handled earlier.
+          ASSERT(instr->Bit(30) == 0);
+          if (instr->Mask(0xA0800000) != 0) {
+            VisitUnallocated(instr);
+          } else {
+            VisitFPDataProcessing3Source(instr);
+          }
+        }
+      }
+    }
+  }
+}
+
+
+void Decoder::DecodeAdvSIMDLoadStore(Instruction* instr) {
+  // TODO: Implement Advanced SIMD load/store instruction decode.
+  ASSERT(instr->Bits(29, 25) == 0x6);
+  VisitUnimplemented(instr);
+}
+
+
+void Decoder::DecodeAdvSIMDDataProcessing(Instruction* instr) {
+  // TODO: Implement Advanced SIMD data processing instruction decode.
+  ASSERT(instr->Bits(27, 25) == 0x7);
+  VisitUnimplemented(instr);
+}
+
+
+#define DEFINE_VISITOR_CALLERS(A)                                              \
+  void Decoder::Visit##A(Instruction *instr) {                                 \
+    ASSERT(instr->Mask(A##FMask) == A##Fixed);                                 \
+    std::list<DecoderVisitor*>::iterator it;                                   \
+    for (it = visitors_.begin(); it != visitors_.end(); it++) {                \
+      (*it)->Visit##A(instr);                                                  \
+    }                                                                          \
+  }
+VISITOR_LIST(DEFINE_VISITOR_CALLERS)
+#undef DEFINE_VISITOR_CALLERS
+}  // namespace vixl
--- a/disas/libvixl/a64/decoder-a64.h
+++ b/disas/libvixl/a64/decoder-a64.h
@@ -0,0 +1,198 @@
+// Copyright 2013, ARM Limited
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef VIXL_A64_DECODER_A64_H_
+#define VIXL_A64_DECODER_A64_H_
+
+#include <list>
+
+#include "globals.h"
+#include "a64/instructions-a64.h"
+
+
+// List macro containing all visitors needed by the decoder class.
+
+#define VISITOR_LIST(V)             \
+  V(PCRelAddressing)                \
+  V(AddSubImmediate)                \
+  V(LogicalImmediate)               \
+  V(MoveWideImmediate)              \
+  V(Bitfield)                       \
+  V(Extract)                        \
+  V(UnconditionalBranch)            \
+  V(UnconditionalBranchToRegister)  \
+  V(CompareBranch)                  \
+  V(TestBranch)                     \
+  V(ConditionalBranch)              \
+  V(System)                         \
+  V(Exception)                      \
+  V(LoadStorePairPostIndex)         \
+  V(LoadStorePairOffset)            \
+  V(LoadStorePairPreIndex)          \
+  V(LoadStorePairNonTemporal)       \
+  V(LoadLiteral)                    \
+  V(LoadStoreUnscaledOffset)        \
+  V(LoadStorePostIndex)             \
+  V(LoadStorePreIndex)              \
+  V(LoadStoreRegisterOffset)        \
+  V(LoadStoreUnsignedOffset)        \
+  V(LogicalShifted)                 \
+  V(AddSubShifted)                  \
+  V(AddSubExtended)                 \
+  V(AddSubWithCarry)                \
+  V(ConditionalCompareRegister)     \
+  V(ConditionalCompareImmediate)    \
+  V(ConditionalSelect)              \
+  V(DataProcessing1Source)          \
+  V(DataProcessing2Source)          \
+  V(DataProcessing3Source)          \
+  V(FPCompare)                      \
+  V(FPConditionalCompare)           \
+  V(FPConditionalSelect)            \
+  V(FPImmediate)                    \
+  V(FPDataProcessing1Source)        \
+  V(FPDataProcessing2Source)        \
+  V(FPDataProcessing3Source)        \
+  V(FPIntegerConvert)               \
+  V(FPFixedPointConvert)            \
+  V(Unallocated)                    \
+  V(Unimplemented)
+
+namespace vixl {
+
+// The Visitor interface. Disassembler and simulator (and other tools)
+// must provide implementations for all of these functions.
+class DecoderVisitor {
+ public:
+  #define DECLARE(A) virtual void Visit##A(Instruction* instr) = 0;
+  VISITOR_LIST(DECLARE)
+  #undef DECLARE
+
+  virtual ~DecoderVisitor() {}
+
+ private:
+  // Visitors are registered in a list.
+  std::list<DecoderVisitor*> visitors_;
+
+  friend class Decoder;
+};
+
+
+class Decoder: public DecoderVisitor {
+ public:
+  Decoder() {}
+
+  // Top-level instruction decoder function. Decodes an instruction and calls
+  // the visitor functions registered with the Decoder class.
+  void Decode(Instruction *instr);
+
+  // Register a new visitor class with the decoder.
+  // Decode() will call the corresponding visitor method from all registered
+  // visitor classes when decoding reaches the leaf node of the instruction
+  // decode tree.
+  // Visitors are called in the order.
+  // A visitor can only be registered once.
+  // Registering an already registered visitor will update its position.
+  //
+  //   d.AppendVisitor(V1);
+  //   d.AppendVisitor(V2);
+  //   d.PrependVisitor(V2);            // Move V2 at the start of the list.
+  //   d.InsertVisitorBefore(V3, V2);
+  //   d.AppendVisitor(V4);
+  //   d.AppendVisitor(V4);             // No effect.
+  //
+  //   d.Decode(i);
+  //
+  // will call in order visitor methods in V3, V2, V1, V4.
+  void AppendVisitor(DecoderVisitor* visitor);
+  void PrependVisitor(DecoderVisitor* visitor);
+  void InsertVisitorBefore(DecoderVisitor* new_visitor,
+                           DecoderVisitor* registered_visitor);
+  void InsertVisitorAfter(DecoderVisitor* new_visitor,
+                          DecoderVisitor* registered_visitor);
+
+  // Remove a previously registered visitor class from the list of visitors
+  // stored by the decoder.
+  void RemoveVisitor(DecoderVisitor* visitor);
+
+  #define DECLARE(A) void Visit##A(Instruction* instr);
+  VISITOR_LIST(DECLARE)
+  #undef DECLARE
+
+ private:
+  // Decode the PC relative addressing instruction, and call the corresponding
+  // visitors.
+  // On entry, instruction bits 27:24 = 0x0.
+  void DecodePCRelAddressing(Instruction* instr);
+
+  // Decode the add/subtract immediate instruction, and call the correspoding
+  // visitors.
+  // On entry, instruction bits 27:24 = 0x1.
+  void DecodeAddSubImmediate(Instruction* instr);
+
+  // Decode the branch, system command, and exception generation parts of
+  // the instruction tree, and call the corresponding visitors.
+  // On entry, instruction bits 27:24 = {0x4, 0x5, 0x6, 0x7}.
+  void DecodeBranchSystemException(Instruction* instr);
+
+  // Decode the load and store parts of the instruction tree, and call
+  // the corresponding visitors.
+  // On entry, instruction bits 27:24 = {0x8, 0x9, 0xC, 0xD}.
+  void DecodeLoadStore(Instruction* instr);
+
+  // Decode the logical immediate and move wide immediate parts of the
+  // instruction tree, and call the corresponding visitors.
+  // On entry, instruction bits 27:24 = 0x2.
+  void DecodeLogical(Instruction* instr);
+
+  // Decode the bitfield and extraction parts of the instruction tree,
+  // and call the corresponding visitors.
+  // On entry, instruction bits 27:24 = 0x3.
+  void DecodeBitfieldExtract(Instruction* instr);
+
+  // Decode the data processing parts of the instruction tree, and call the
+  // corresponding visitors.
+  // On entry, instruction bits 27:24 = {0x1, 0xA, 0xB}.
+  void DecodeDataProcessing(Instruction* instr);
+
+  // Decode the floating point parts of the instruction tree, and call the
+  // corresponding visitors.
+  // On entry, instruction bits 27:24 = {0xE, 0xF}.
+  void DecodeFP(Instruction* instr);
+
+  // Decode the Advanced SIMD (NEON) load/store part of the instruction tree,
+  // and call the corresponding visitors.
+  // On entry, instruction bits 29:25 = 0x6.
+  void DecodeAdvSIMDLoadStore(Instruction* instr);
+
+  // Decode the Advanced SIMD (NEON) data processing part of the instruction
+  // tree, and call the corresponding visitors.
+  // On entry, instruction bits 27:25 = 0x7.
+  void DecodeAdvSIMDDataProcessing(Instruction* instr);
+};
+}  // namespace vixl
+
+#endif  // VIXL_A64_DECODER_A64_H_
--- a/disas/libvixl/a64/disasm-a64.cc
+++ b/disas/libvixl/a64/disasm-a64.cc
--- a/disas/libvixl/a64/disasm-a64.h
+++ b/disas/libvixl/a64/disasm-a64.h
@@ -0,0 +1,109 @@
+// Copyright 2013, ARM Limited
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef VIXL_A64_DISASM_A64_H
+#define VIXL_A64_DISASM_A64_H
+
+#include "globals.h"
+#include "utils.h"
+#include "instructions-a64.h"
+#include "decoder-a64.h"
+
+namespace vixl {
+
+class Disassembler: public DecoderVisitor {
+ public:
+  Disassembler();
+  Disassembler(char* text_buffer, int buffer_size);
+  virtual ~Disassembler();
+  char* GetOutput();
+
+  // Declare all Visitor functions.
+  #define DECLARE(A)  void Visit##A(Instruction* instr);
+  VISITOR_LIST(DECLARE)
+  #undef DECLARE
+
+ protected:
+  virtual void ProcessOutput(Instruction* instr);
+
+ private:
+  void Format(Instruction* instr, const char* mnemonic, const char* format);
+  void Substitute(Instruction* instr, const char* string);
+  int SubstituteField(Instruction* instr, const char* format);
+  int SubstituteRegisterField(Instruction* instr, const char* format);
+  int SubstituteImmediateField(Instruction* instr, const char* format);
+  int SubstituteLiteralField(Instruction* instr, const char* format);
+  int SubstituteBitfieldImmediateField(Instruction* instr, const char* format);
+  int SubstituteShiftField(Instruction* instr, const char* format);
+  int SubstituteExtendField(Instruction* instr, const char* format);
+  int SubstituteConditionField(Instruction* instr, const char* format);
+  int SubstitutePCRelAddressField(Instruction* instr, const char* format);
+  int SubstituteBranchTargetField(Instruction* instr, const char* format);
+  int SubstituteLSRegOffsetField(Instruction* instr, const char* format);
+  int SubstitutePrefetchField(Instruction* instr, const char* format);
+
+  inline bool RdIsZROrSP(Instruction* instr) const {
+    return (instr->Rd() == kZeroRegCode);
+  }
+
+  inline bool RnIsZROrSP(Instruction* instr) const {
+    return (instr->Rn() == kZeroRegCode);
+  }
+
+  inline bool RmIsZROrSP(Instruction* instr) const {
+    return (instr->Rm() == kZeroRegCode);
+  }
+
+  inline bool RaIsZROrSP(Instruction* instr) const {
+    return (instr->Ra() == kZeroRegCode);
+  }
+
+  bool IsMovzMovnImm(unsigned reg_size, uint64_t value);
+
+  void ResetOutput();
+  void AppendToOutput(const char* string, ...);
+
+  char* buffer_;
+  uint32_t buffer_pos_;
+  uint32_t buffer_size_;
+  bool own_buffer_;
+};
+
+
+class PrintDisassembler: public Disassembler {
+ public:
+  explicit PrintDisassembler(FILE* stream) : stream_(stream) { }
+  ~PrintDisassembler() { }
+
+ protected:
+  virtual void ProcessOutput(Instruction* instr);
+
+ private:
+  FILE *stream_;
+};
+}  // namespace vixl
+
+#endif  // VIXL_A64_DISASM_A64_H
--- a/disas/libvixl/a64/instructions-a64.cc
+++ b/disas/libvixl/a64/instructions-a64.cc
@@ -0,0 +1,238 @@
+// Copyright 2013, ARM Limited
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "a64/instructions-a64.h"
+#include "a64/assembler-a64.h"
+
+namespace vixl {
+
+
+static uint64_t RotateRight(uint64_t value,
+                            unsigned int rotate,
+                            unsigned int width) {
+  ASSERT(width <= 64);
+  rotate &= 63;
+  return ((value & ((1UL << rotate) - 1UL)) << (width - rotate)) |
+         (value >> rotate);
+}
+
+
+static uint64_t RepeatBitsAcrossReg(unsigned reg_size,
+                                    uint64_t value,
+                                    unsigned width) {
+  ASSERT((width == 2) || (width == 4) || (width == 8) || (width == 16) ||
+         (width == 32));
+  ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize));
+  uint64_t result = value & ((1UL << width) - 1UL);
+  for (unsigned i = width; i < reg_size; i *= 2) {
+    result |= (result << i);
+  }
+  return result;
+}
+
+
+// Logical immediates can't encode zero, so a return value of zero is used to
+// indicate a failure case. Specifically, where the constraints on imm_s are
+// not met.
+uint64_t Instruction::ImmLogical() {
+  unsigned reg_size = SixtyFourBits() ? kXRegSize : kWRegSize;
+  int64_t n = BitN();
+  int64_t imm_s = ImmSetBits();
+  int64_t imm_r = ImmRotate();
+
+  // An integer is constructed from the n, imm_s and imm_r bits according to
+  // the following table:
+  //
+  //  N   imms    immr    size        S             R
+  //  1  ssssss  rrrrrr    64    UInt(ssssss)  UInt(rrrrrr)
+  //  0  0sssss  xrrrrr    32    UInt(sssss)   UInt(rrrrr)
+  //  0  10ssss  xxrrrr    16    UInt(ssss)    UInt(rrrr)
+  //  0  110sss  xxxrrr     8    UInt(sss)     UInt(rrr)
+  //  0  1110ss  xxxxrr     4    UInt(ss)      UInt(rr)
+  //  0  11110s  xxxxxr     2    UInt(s)       UInt(r)
+  // (s bits must not be all set)
+  //
+  // A pattern is constructed of size bits, where the least significant S+1
+  // bits are set. The pattern is rotated right by R, and repeated across a
+  // 32 or 64-bit value, depending on destination register width.
+  //
+
+  if (n == 1) {
+    if (imm_s == 0x3F) {
+      return 0;
+    }
+    uint64_t bits = (1UL << (imm_s + 1)) - 1;
+    return RotateRight(bits, imm_r, 64);
+  } else {
+    if ((imm_s >> 1) == 0x1F) {
+      return 0;
+    }
+    for (int width = 0x20; width >= 0x2; width >>= 1) {
+      if ((imm_s & width) == 0) {
+        int mask = width - 1;
+        if ((imm_s & mask) == mask) {
+          return 0;
+        }
+        uint64_t bits = (1UL << ((imm_s & mask) + 1)) - 1;
+        return RepeatBitsAcrossReg(reg_size,
+                                   RotateRight(bits, imm_r & mask, width),
+                                   width);
+      }
+    }
+  }
+  UNREACHABLE();
+  return 0;
+}
+
+
+float Instruction::ImmFP32() {
+  //  ImmFP: abcdefgh (8 bits)
+  // Single: aBbb.bbbc.defg.h000.0000.0000.0000.0000 (32 bits)
+  // where B is b ^ 1
+  uint32_t bits = ImmFP();
+  uint32_t bit7 = (bits >> 7) & 0x1;
+  uint32_t bit6 = (bits >> 6) & 0x1;
+  uint32_t bit5_to_0 = bits & 0x3f;
+  uint32_t result = (bit7 << 31) | ((32 - bit6) << 25) | (bit5_to_0 << 19);
+
+  return rawbits_to_float(result);
+}
+
+
+double Instruction::ImmFP64() {
+  //  ImmFP: abcdefgh (8 bits)
+  // Double: aBbb.bbbb.bbcd.efgh.0000.0000.0000.0000
+  //         0000.0000.0000.0000.0000.0000.0000.0000 (64 bits)
+  // where B is b ^ 1
+  uint32_t bits = ImmFP();
+  uint64_t bit7 = (bits >> 7) & 0x1;
+  uint64_t bit6 = (bits >> 6) & 0x1;
+  uint64_t bit5_to_0 = bits & 0x3f;
+  uint64_t result = (bit7 << 63) | ((256 - bit6) << 54) | (bit5_to_0 << 48);
+
+  return rawbits_to_double(result);
+}
+
+
+LSDataSize CalcLSPairDataSize(LoadStorePairOp op) {
+  switch (op) {
+    case STP_x:
+    case LDP_x:
+    case STP_d:
+    case LDP_d: return LSDoubleWord;
+    default: return LSWord;
+  }
+}
+
+
+Instruction* Instruction::ImmPCOffsetTarget() {
+  ptrdiff_t offset;
+  if (IsPCRelAddressing()) {
+    // PC-relative addressing. Only ADR is supported.
+    offset = ImmPCRel();
+  } else {
+    // All PC-relative branches.
+    ASSERT(BranchType() != UnknownBranchType);
+    // Relative branch offsets are instruction-size-aligned.
+    offset = ImmBranch() << kInstructionSizeLog2;
+  }
+  return this + offset;
+}
+
+
+inline int Instruction::ImmBranch() const {
+  switch (BranchType()) {
+    case CondBranchType: return ImmCondBranch();
+    case UncondBranchType: return ImmUncondBranch();
+    case CompareBranchType: return ImmCmpBranch();
+    case TestBranchType: return ImmTestBranch();
+    default: UNREACHABLE();
+  }
+  return 0;
+}
+
+
+void Instruction::SetImmPCOffsetTarget(Instruction* target) {
+  if (IsPCRelAddressing()) {
+    SetPCRelImmTarget(target);
+  } else {
+    SetBranchImmTarget(target);
+  }
+}
+
+
+void Instruction::SetPCRelImmTarget(Instruction* target) {
+  // ADRP is not supported, so 'this' must point to an ADR instruction.
+  ASSERT(Mask(PCRelAddressingMask) == ADR);
+
+  Instr imm = Assembler::ImmPCRelAddress(target - this);
+
+  SetInstructionBits(Mask(~ImmPCRel_mask) | imm);
+}
+
+
+void Instruction::SetBranchImmTarget(Instruction* target) {
+  ASSERT(((target - this) & 3) == 0);
+  Instr branch_imm = 0;
+  uint32_t imm_mask = 0;
+  int offset = (target - this) >> kInstructionSizeLog2;
+  switch (BranchType()) {
+    case CondBranchType: {
+      branch_imm = Assembler::ImmCondBranch(offset);
+      imm_mask = ImmCondBranch_mask;
+      break;
+    }
+    case UncondBranchType: {
+      branch_imm = Assembler::ImmUncondBranch(offset);
+      imm_mask = ImmUncondBranch_mask;
+      break;
+    }
+    case CompareBranchType: {
+      branch_imm = Assembler::ImmCmpBranch(offset);
+      imm_mask = ImmCmpBranch_mask;
+      break;
+    }
+    case TestBranchType: {
+      branch_imm = Assembler::ImmTestBranch(offset);
+      imm_mask = ImmTestBranch_mask;
+      break;
+    }
+    default: UNREACHABLE();
+  }
+  SetInstructionBits(Mask(~imm_mask) | branch_imm);
+}
+
+
+void Instruction::SetImmLLiteral(Instruction* source) {
+  ASSERT(((source - this) & 3) == 0);
+  int offset = (source - this) >> kLiteralEntrySizeLog2;
+  Instr imm = Assembler::ImmLLiteral(offset);
+  Instr mask = ImmLLiteral_mask;
+
+  SetInstructionBits(Mask(~mask) | imm);
+}
+}  // namespace vixl
+
--- a/disas/libvixl/a64/instructions-a64.h
+++ b/disas/libvixl/a64/instructions-a64.h
@@ -0,0 +1,344 @@
+// Copyright 2013, ARM Limited
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef VIXL_A64_INSTRUCTIONS_A64_H_
+#define VIXL_A64_INSTRUCTIONS_A64_H_
+
+#include "globals.h"
+#include "utils.h"
+#include "a64/constants-a64.h"
+
+namespace vixl {
+// ISA constants. --------------------------------------------------------------
+
+typedef uint32_t Instr;
+const unsigned kInstructionSize = 4;
+const unsigned kInstructionSizeLog2 = 2;
+const unsigned kLiteralEntrySize = 4;
+const unsigned kLiteralEntrySizeLog2 = 2;
+const unsigned kMaxLoadLiteralRange = 1 * MBytes;
+
+const unsigned kWRegSize = 32;
+const unsigned kWRegSizeLog2 = 5;
+const unsigned kWRegSizeInBytes = kWRegSize / 8;
+const unsigned kXRegSize = 64;
+const unsigned kXRegSizeLog2 = 6;
+const unsigned kXRegSizeInBytes = kXRegSize / 8;
+const unsigned kSRegSize = 32;
+const unsigned kSRegSizeLog2 = 5;
+const unsigned kSRegSizeInBytes = kSRegSize / 8;
+const unsigned kDRegSize = 64;
+const unsigned kDRegSizeLog2 = 6;
+const unsigned kDRegSizeInBytes = kDRegSize / 8;
+const int64_t kWRegMask = 0x00000000ffffffffLL;
+const int64_t kXRegMask = 0xffffffffffffffffLL;
+const int64_t kSRegMask = 0x00000000ffffffffLL;
+const int64_t kDRegMask = 0xffffffffffffffffLL;
+const int64_t kXSignMask = 0x1LL << 63;
+const int64_t kWSignMask = 0x1LL << 31;
+const int64_t kByteMask = 0xffL;
+const int64_t kHalfWordMask = 0xffffL;
+const int64_t kWordMask = 0xffffffffLL;
+const uint64_t kXMaxUInt = 0xffffffffffffffffULL;
+const uint64_t kWMaxUInt = 0xffffffffULL;
+const int64_t kXMaxInt = 0x7fffffffffffffffLL;
+const int64_t kXMinInt = 0x8000000000000000LL;
+const int32_t kWMaxInt = 0x7fffffff;
+const int32_t kWMinInt = 0x80000000;
+const unsigned kLinkRegCode = 30;
+const unsigned kZeroRegCode = 31;
+const unsigned kSPRegInternalCode = 63;
+const unsigned kRegCodeMask = 0x1f;
+
+// AArch64 floating-point specifics. These match IEEE-754.
+const unsigned kDoubleMantissaBits = 52;
+const unsigned kDoubleExponentBits = 11;
+const unsigned kFloatMantissaBits = 23;
+const unsigned kFloatExponentBits = 8;
+
+const float kFP32PositiveInfinity = rawbits_to_float(0x7f800000);
+const float kFP32NegativeInfinity = rawbits_to_float(0xff800000);
+const double kFP64PositiveInfinity = rawbits_to_double(0x7ff0000000000000ULL);
+const double kFP64NegativeInfinity = rawbits_to_double(0xfff0000000000000ULL);
+
+// This value is a signalling NaN as both a double and as a float (taking the
+// least-significant word).
+static const double kFP64SignallingNaN = rawbits_to_double(0x7ff000007f800001ULL);
+static const float kFP32SignallingNaN = rawbits_to_float(0x7f800001);
+
+// A similar value, but as a quiet NaN.
+static const double kFP64QuietNaN = rawbits_to_double(0x7ff800007fc00001ULL);
+static const float kFP32QuietNaN = rawbits_to_float(0x7fc00001);
+
+enum LSDataSize {
+  LSByte        = 0,
+  LSHalfword    = 1,
+  LSWord        = 2,
+  LSDoubleWord  = 3
+};
+
+LSDataSize CalcLSPairDataSize(LoadStorePairOp op);
+
+enum ImmBranchType {
+  UnknownBranchType = 0,
+  CondBranchType    = 1,
+  UncondBranchType  = 2,
+  CompareBranchType = 3,
+  TestBranchType    = 4
+};
+
+enum AddrMode {
+  Offset,
+  PreIndex,
+  PostIndex
+};
+
+enum FPRounding {
+  // The first four values are encodable directly by FPCR<RMode>.
+  FPTieEven = 0x0,
+  FPPositiveInfinity = 0x1,
+  FPNegativeInfinity = 0x2,
+  FPZero = 0x3,
+
+  // The final rounding mode is only available when explicitly specified by the
+  // instruction (such as with fcvta). It cannot be set in FPCR.
+  FPTieAway
+};
+
+enum Reg31Mode {
+  Reg31IsStackPointer,
+  Reg31IsZeroRegister
+};
+
+// Instructions. ---------------------------------------------------------------
+
+class Instruction {
+ public:
+  inline Instr InstructionBits() const {
+    return *(reinterpret_cast<const Instr*>(this));
+  }
+
+  inline void SetInstructionBits(Instr new_instr) {
+    *(reinterpret_cast<Instr*>(this)) = new_instr;
+  }
+
+  inline int Bit(int pos) const {
+    return (InstructionBits() >> pos) & 1;
+  }
+
+  inline uint32_t Bits(int msb, int lsb) const {
+    return unsigned_bitextract_32(msb, lsb, InstructionBits());
+  }
+
+  inline int32_t SignedBits(int msb, int lsb) const {
+    int32_t bits = *(reinterpret_cast<const int32_t*>(this));
+    return signed_bitextract_32(msb, lsb, bits);
+  }
+
+  inline Instr Mask(uint32_t mask) const {
+    return InstructionBits() & mask;
+  }
+
+  #define DEFINE_GETTER(Name, HighBit, LowBit, Func)             \
+  inline int64_t Name() const { return Func(HighBit, LowBit); }
+  INSTRUCTION_FIELDS_LIST(DEFINE_GETTER)
+  #undef DEFINE_GETTER
+
+  // ImmPCRel is a compound field (not present in INSTRUCTION_FIELDS_LIST),
+  // formed from ImmPCRelLo and ImmPCRelHi.
+  int ImmPCRel() const {
+    int const offset = ((ImmPCRelHi() << ImmPCRelLo_width) | ImmPCRelLo());
+    int const width = ImmPCRelLo_width + ImmPCRelHi_width;
+    return signed_bitextract_32(width-1, 0, offset);
+  }
+
+  uint64_t ImmLogical();
+  float ImmFP32();
+  double ImmFP64();
+
+  inline LSDataSize SizeLSPair() const {
+    return CalcLSPairDataSize(
+             static_cast<LoadStorePairOp>(Mask(LoadStorePairMask)));
+  }
+
+  // Helpers.
+  inline bool IsCondBranchImm() const {
+    return Mask(ConditionalBranchFMask) == ConditionalBranchFixed;
+  }
+
+  inline bool IsUncondBranchImm() const {
+    return Mask(UnconditionalBranchFMask) == UnconditionalBranchFixed;
+  }
+
+  inline bool IsCompareBranch() const {
+    return Mask(CompareBranchFMask) == CompareBranchFixed;
+  }
+
+  inline bool IsTestBranch() const {
+    return Mask(TestBranchFMask) == TestBranchFixed;
+  }
+
+  inline bool IsPCRelAddressing() const {
+    return Mask(PCRelAddressingFMask) == PCRelAddressingFixed;
+  }
+
+  inline bool IsLogicalImmediate() const {
+    return Mask(LogicalImmediateFMask) == LogicalImmediateFixed;
+  }
+
+  inline bool IsAddSubImmediate() const {
+    return Mask(AddSubImmediateFMask) == AddSubImmediateFixed;
+  }
+
+  inline bool IsAddSubExtended() const {
+    return Mask(AddSubExtendedFMask) == AddSubExtendedFixed;
+  }
+
+  inline bool IsLoadOrStore() const {
+    return Mask(LoadStoreAnyFMask) == LoadStoreAnyFixed;
+  }
+
+  inline bool IsMovn() const {
+    return (Mask(MoveWideImmediateMask) == MOVN_x) ||
+           (Mask(MoveWideImmediateMask) == MOVN_w);
+  }
+
+  // Indicate whether Rd can be the stack pointer or the zero register. This
+  // does not check that the instruction actually has an Rd field.
+  inline Reg31Mode RdMode() const {
+    // The following instructions use sp or wsp as Rd:
+    //  Add/sub (immediate) when not setting the flags.
+    //  Add/sub (extended) when not setting the flags.
+    //  Logical (immediate) when not setting the flags.
+    // Otherwise, r31 is the zero register.
+    if (IsAddSubImmediate() || IsAddSubExtended()) {
+      if (Mask(AddSubSetFlagsBit)) {
+        return Reg31IsZeroRegister;
+      } else {
+        return Reg31IsStackPointer;
+      }
+    }
+    if (IsLogicalImmediate()) {
+      // Of the logical (immediate) instructions, only ANDS (and its aliases)
+      // can set the flags. The others can all write into sp.
+      // Note that some logical operations are not available to
+      // immediate-operand instructions, so we have to combine two masks here.
+      if (Mask(LogicalImmediateMask & LogicalOpMask) == ANDS) {
+        return Reg31IsZeroRegister;
+      } else {
+        return Reg31IsStackPointer;
+      }
+    }
+    return Reg31IsZeroRegister;
+  }
+
+  // Indicate whether Rn can be the stack pointer or the zero register. This
+  // does not check that the instruction actually has an Rn field.
+  inline Reg31Mode RnMode() const {
+    // The following instructions use sp or wsp as Rn:
+    //  All loads and stores.
+    //  Add/sub (immediate).
+    //  Add/sub (extended).
+    // Otherwise, r31 is the zero register.
+    if (IsLoadOrStore() || IsAddSubImmediate() || IsAddSubExtended()) {
+      return Reg31IsStackPointer;
+    }
+    return Reg31IsZeroRegister;
+  }
+
+  inline ImmBranchType BranchType() const {
+    if (IsCondBranchImm()) {
+      return CondBranchType;
+    } else if (IsUncondBranchImm()) {
+      return UncondBranchType;
+    } else if (IsCompareBranch()) {
+      return CompareBranchType;
+    } else if (IsTestBranch()) {
+      return TestBranchType;
+    } else {
+      return UnknownBranchType;
+    }
+  }
+
+  // Find the target of this instruction. 'this' may be a branch or a
+  // PC-relative addressing instruction.
+  Instruction* ImmPCOffsetTarget();
+
+  // Patch a PC-relative offset to refer to 'target'. 'this' may be a branch or
+  // a PC-relative addressing instruction.
+  void SetImmPCOffsetTarget(Instruction* target);
+  // Patch a literal load instruction to load from 'source'.
+  void SetImmLLiteral(Instruction* source);
+
+  inline uint8_t* LiteralAddress() {
+    int offset = ImmLLiteral() << kLiteralEntrySizeLog2;
+    return reinterpret_cast<uint8_t*>(this) + offset;
+  }
+
+  inline uint32_t Literal32() {
+    uint32_t literal;
+    memcpy(&literal, LiteralAddress(), sizeof(literal));
+
+    return literal;
+  }
+
+  inline uint64_t Literal64() {
+    uint64_t literal;
+    memcpy(&literal, LiteralAddress(), sizeof(literal));
+
+    return literal;
+  }
+
+  inline float LiteralFP32() {
+    return rawbits_to_float(Literal32());
+  }
+
+  inline double LiteralFP64() {
+    return rawbits_to_double(Literal64());
+  }
+
+  inline Instruction* NextInstruction() {
+    return this + kInstructionSize;
+  }
+
+  inline Instruction* InstructionAtOffset(int64_t offset) {
+    ASSERT(IsWordAligned(this + offset));
+    return this + offset;
+  }
+
+  template<typename T> static inline Instruction* Cast(T src) {
+    return reinterpret_cast<Instruction*>(src);
+  }
+
+ private:
+  inline int ImmBranch() const;
+
+  void SetPCRelImmTarget(Instruction* target);
+  void SetBranchImmTarget(Instruction* target);
+};
+}  // namespace vixl
+
+#endif  // VIXL_A64_INSTRUCTIONS_A64_H_
--- a/disas/libvixl/globals.h
+++ b/disas/libvixl/globals.h
@@ -0,0 +1,65 @@
+// Copyright 2013, ARM Limited
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef VIXL_GLOBALS_H
+#define VIXL_GLOBALS_H
+
+// Get the standard printf format macros for C99 stdint types.
+#define __STDC_FORMAT_MACROS
+#include <inttypes.h>
+
+#include <assert.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include "platform.h"
+
+
+typedef uint8_t byte;
+
+const int KBytes = 1024;
+const int MBytes = 1024 * KBytes;
+
+  #define ABORT() printf("in %s, line %i", __FILE__, __LINE__); abort()
+#ifdef DEBUG
+  #define ASSERT(condition) assert(condition)
+  #define CHECK(condition) ASSERT(condition)
+  #define UNIMPLEMENTED() printf("UNIMPLEMENTED\t"); ABORT()
+  #define UNREACHABLE() printf("UNREACHABLE\t"); ABORT()
+#else
+  #define ASSERT(condition) ((void) 0)
+  #define CHECK(condition) assert(condition)
+  #define UNIMPLEMENTED() ((void) 0)
+  #define UNREACHABLE() ((void) 0)
+#endif
+
+template <typename T> inline void USE(T) {}
+
+#define ALIGNMENT_EXCEPTION() printf("ALIGNMENT EXCEPTION\t"); ABORT()
+
+#endif  // VIXL_GLOBALS_H
--- a/disas/libvixl/platform.h
+++ b/disas/libvixl/platform.h
@@ -0,0 +1,43 @@
+// Copyright 2013, ARM Limited
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef PLATFORM_H
+#define PLATFORM_H
+
+// Define platform specific functionalities.
+
+namespace vixl {
+#ifdef USE_SIMULATOR
+// Currently we assume running the simulator implies running on x86 hardware.
+inline void HostBreakpoint() { asm("int3"); }
+#else
+inline void HostBreakpoint() {
+  // TODO: Implement HostBreakpoint on a64.
+}
+#endif
+}  // namespace vixl
+
+#endif
--- a/disas/libvixl/utils.cc
+++ b/disas/libvixl/utils.cc
@@ -0,0 +1,126 @@
+// Copyright 2013, ARM Limited
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "utils.h"
+#include <stdio.h>
+
+namespace vixl {
+
+uint32_t float_to_rawbits(float value) {
+  uint32_t bits = 0;
+  memcpy(&bits, &value, 4);
+  return bits;
+}
+
+
+uint64_t double_to_rawbits(double value) {
+  uint64_t bits = 0;
+  memcpy(&bits, &value, 8);
+  return bits;
+}
+
+
+float rawbits_to_float(uint32_t bits) {
+  float value = 0.0;
+  memcpy(&value, &bits, 4);
+  return value;
+}
+
+
+double rawbits_to_double(uint64_t bits) {
+  double value = 0.0;
+  memcpy(&value, &bits, 8);
+  return value;
+}
+
+
+int CountLeadingZeros(uint64_t value, int width) {
+  ASSERT((width == 32) || (width == 64));
+  int count = 0;
+  uint64_t bit_test = 1UL << (width - 1);
+  while ((count < width) && ((bit_test & value) == 0)) {
+    count++;
+    bit_test >>= 1;
+  }
+  return count;
+}
+
+
+int CountLeadingSignBits(int64_t value, int width) {
+  ASSERT((width == 32) || (width == 64));
+  if (value >= 0) {
+    return CountLeadingZeros(value, width) - 1;
+  } else {
+    return CountLeadingZeros(~value, width) - 1;
+  }
+}
+
+
+int CountTrailingZeros(uint64_t value, int width) {
+  ASSERT((width == 32) || (width == 64));
+  int count = 0;
+  while ((count < width) && (((value >> count) & 1) == 0)) {
+    count++;
+  }
+  return count;
+}
+
+
+int CountSetBits(uint64_t value, int width) {
+  // TODO: Other widths could be added here, as the implementation already
+  // supports them.
+  ASSERT((width == 32) || (width == 64));
+
+  // Mask out unused bits to ensure that they are not counted.
+  value &= (0xffffffffffffffffULL >> (64-width));
+
+  // Add up the set bits.
+  // The algorithm works by adding pairs of bit fields together iteratively,
+  // where the size of each bit field doubles each time.
+  // An example for an 8-bit value:
+  // Bits:  h  g  f  e  d  c  b  a
+  //         \ |   \ |   \ |   \ |
+  // value = h+g   f+e   d+c   b+a
+  //            \    |      \    |
+  // value =   h+g+f+e     d+c+b+a
+  //                  \          |
+  // value =       h+g+f+e+d+c+b+a
+  value = ((value >> 1) & 0x5555555555555555ULL) +
+           (value & 0x5555555555555555ULL);
+  value = ((value >> 2) & 0x3333333333333333ULL) +
+           (value & 0x3333333333333333ULL);
+  value = ((value >> 4) & 0x0f0f0f0f0f0f0f0fULL) +
+           (value & 0x0f0f0f0f0f0f0f0fULL);
+  value = ((value >> 8) & 0x00ff00ff00ff00ffULL) +
+           (value & 0x00ff00ff00ff00ffULL);
+  value = ((value >> 16) & 0x0000ffff0000ffffULL) +
+           (value & 0x0000ffff0000ffffULL);
+  value = ((value >> 32) & 0x00000000ffffffffULL) +
+           (value & 0x00000000ffffffffULL);
+
+  return value;
+}
+}  // namespace vixl
--- a/disas/libvixl/utils.h
+++ b/disas/libvixl/utils.h
@@ -0,0 +1,126 @@
+// Copyright 2013, ARM Limited
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef VIXL_UTILS_H
+#define VIXL_UTILS_H
+
+
+#include <string.h>
+#include "globals.h"
+
+namespace vixl {
+
+// Check number width.
+inline bool is_intn(unsigned n, int64_t x) {
+  ASSERT((0 < n) && (n < 64));
+  int64_t limit = 1ULL << (n - 1);
+  return (-limit <= x) && (x < limit);
+}
+
+inline bool is_uintn(unsigned n, int64_t x) {
+  ASSERT((0 < n) && (n < 64));
+  return !(x >> n);
+}
+
+inline unsigned truncate_to_intn(unsigned n, int64_t x) {
+  ASSERT((0 < n) && (n < 64));
+  return (x & ((1ULL << n) - 1));
+}
+
+#define INT_1_TO_63_LIST(V)                                                    \
+V(1)  V(2)  V(3)  V(4)  V(5)  V(6)  V(7)  V(8)                                 \
+V(9)  V(10) V(11) V(12) V(13) V(14) V(15) V(16)                                \
+V(17) V(18) V(19) V(20) V(21) V(22) V(23) V(24)                                \
+V(25) V(26) V(27) V(28) V(29) V(30) V(31) V(32)                                \
+V(33) V(34) V(35) V(36) V(37) V(38) V(39) V(40)                                \
+V(41) V(42) V(43) V(44) V(45) V(46) V(47) V(48)                                \
+V(49) V(50) V(51) V(52) V(53) V(54) V(55) V(56)                                \
+V(57) V(58) V(59) V(60) V(61) V(62) V(63)
+
+#define DECLARE_IS_INT_N(N)                                                    \
+inline bool is_int##N(int64_t x) { return is_intn(N, x); }
+#define DECLARE_IS_UINT_N(N)                                                   \
+inline bool is_uint##N(int64_t x) { return is_uintn(N, x); }
+#define DECLARE_TRUNCATE_TO_INT_N(N)                                           \
+inline int truncate_to_int##N(int x) { return truncate_to_intn(N, x); }
+INT_1_TO_63_LIST(DECLARE_IS_INT_N)
+INT_1_TO_63_LIST(DECLARE_IS_UINT_N)
+INT_1_TO_63_LIST(DECLARE_TRUNCATE_TO_INT_N)
+#undef DECLARE_IS_INT_N
+#undef DECLARE_IS_UINT_N
+#undef DECLARE_TRUNCATE_TO_INT_N
+
+// Bit field extraction.
+inline uint32_t unsigned_bitextract_32(int msb, int lsb, uint32_t x) {
+  return (x >> lsb) & ((1 << (1 + msb - lsb)) - 1);
+}
+
+inline uint64_t unsigned_bitextract_64(int msb, int lsb, uint64_t x) {
+  return (x >> lsb) & ((static_cast<uint64_t>(1) << (1 + msb - lsb)) - 1);
+}
+
+inline int32_t signed_bitextract_32(int msb, int lsb, int32_t x) {
+  return (x << (31 - msb)) >> (lsb + 31 - msb);
+}
+
+inline int64_t signed_bitextract_64(int msb, int lsb, int64_t x) {
+  return (x << (63 - msb)) >> (lsb + 63 - msb);
+}
+
+// floating point representation
+uint32_t float_to_rawbits(float value);
+uint64_t double_to_rawbits(double value);
+float rawbits_to_float(uint32_t bits);
+double rawbits_to_double(uint64_t bits);
+
+// Bits counting.
+int CountLeadingZeros(uint64_t value, int width);
+int CountLeadingSignBits(int64_t value, int width);
+int CountTrailingZeros(uint64_t value, int width);
+int CountSetBits(uint64_t value, int width);
+
+// Pointer alignment
+// TODO: rename/refactor to make it specific to instructions.
+template<typename T>
+bool IsWordAligned(T pointer) {
+  ASSERT(sizeof(pointer) == sizeof(intptr_t));   // NOLINT(runtime/sizeof)
+  return (reinterpret_cast<intptr_t>(pointer) & 3) == 0;
+}
+
+// Increment a pointer until it has the specified alignment.
+template<class T>
+T AlignUp(T pointer, size_t alignment) {
+  ASSERT(sizeof(pointer) == sizeof(uintptr_t));
+  uintptr_t pointer_raw = reinterpret_cast<uintptr_t>(pointer);
+  size_t align_step = (alignment - pointer_raw) % alignment;
+  ASSERT((pointer_raw + align_step) % alignment == 0);
+  return reinterpret_cast<T>(pointer_raw + align_step);
+}
+
+
+}  // namespace vixl
+
+#endif  // VIXL_UTILS_H
--- a/docs/qmp/qmp-events.txt
+++ b/docs/qmp/qmp-events.txt
@@ -225,6 +225,45 @@ Data:
  "timestamp": { "seconds": 1368697518, "microseconds": 326866 } }
 }

+QUORUM_FAILURE
+--------------
+
+Emitted by the Quorum block driver if it fails to establish a quorum.
+
+Data:
+
+- "reference":    device name if defined else node name.
+- "sector-num":   Number of the first sector of the failed read operation.
+- "sector-count": Failed read operation sector count.
+
+Example:
+
+{ "event": "QUORUM_FAILURE",
+     "data": { "reference": "usr1", "sector-num": 345435, "sector-count": 5 },
+     "timestamp": { "seconds": 1344522075, "microseconds": 745528 } }
+
+QUORUM_REPORT_BAD
+-----------------
+
+Emitted to report a corruption of a Quorum file.
+
+Data:
+
+- "error":        Error message (json-string, optional)
+                  Only present on failure.  This field contains a human-readable
+                  error message.  There are no semantics other than that the
+                  block layer reported an error and clients should not try to
+                  interpret the error string.
+- "node-name":    The graph node name of the block driver state.
+- "sector-num":   Number of the first sector of the failed read operation.
+- "sector-count": Failed read operation sector count.
+
+Example:
+
+{ "event": "QUORUM_REPORT_BAD",
+     "data": { "node-name": "1.raw", "sector-num": 345435, "sector-count": 5 },
+     "timestamp": { "seconds": 1344522075, "microseconds": 745528 } }
+
 RESET
 -----

@@ -479,7 +518,7 @@ Data: None.

 Example:

-{ "event": "WATCHDOG",
+{ "event": "WAKEUP",
     "timestamp": { "seconds": 1344522075, "microseconds": 745528 } }

 WATCHDOG
--- a/docs/rdma.txt
+++ b/docs/rdma.txt
@@ -66,7 +66,7 @@ bulk-phase round of the migration and can be enabled for extremely
 high-performance RDMA hardware using the following command:

 QEMU Monitor Command:
-$ migrate_set_capability x-rdma-pin-all on # disabled by default
+$ migrate_set_capability rdma-pin-all on # disabled by default

 Performing this action will cause all 8GB to be pinned, so if that's
 not what you want, then please ignore this step altogether.
@@ -93,12 +93,12 @@ $ migrate_set_speed 40g # or whatever is the MAX of your RDMA device

 Next, on the destination machine, add the following to the QEMU command line:

-qemu ..... -incoming x-rdma:host:port
+qemu ..... -incoming rdma:host:port

 Finally, perform the actual migration on the source machine:

 QEMU Monitor Command:
-$ migrate -d x-rdma:host:port
+$ migrate -d rdma:host:port

 PERFORMANCE
 ===========
@@ -120,8 +120,8 @@ For example, in the same 8GB RAM example with all 8GB of memory in
 active use and the VM itself is completely idle using the same 40 gbps
 infiniband link:

-1. x-rdma-pin-all disabled total time: approximately 7.5 seconds @ 9.5 Gbps
-2. x-rdma-pin-all enabled total time: approximately 4 seconds @ 26 Gbps
+1. rdma-pin-all disabled total time: approximately 7.5 seconds @ 9.5 Gbps
+2. rdma-pin-all enabled total time: approximately 4 seconds @ 26 Gbps

 These numbers would of course scale up to whatever size virtual machine
 you have to migrate using RDMA.
@@ -407,18 +407,14 @@ socket is broken during a non-RDMA based migration.

 TODO:
 =====
-1. 'migrate x-rdma:host:port' and '-incoming x-rdma' options will be
-   renamed to 'rdma' after the experimental phase of this work has
-   completed upstream.
-2. Currently, 'ulimit -l' mlock() limits as well as cgroups swap limits
+1. Currently, 'ulimit -l' mlock() limits as well as cgroups swap limits
   are not compatible with infinband memory pinning and will result in
   an aborted migration (but with the source VM left unaffected).
-3. Use of the recent /proc/<pid>/pagemap would likely speed up
+2. Use of the recent /proc/<pid>/pagemap would likely speed up
   the use of KSM and ballooning while using RDMA.
-4. Also, some form of balloon-device usage tracking would also
+3. Also, some form of balloon-device usage tracking would also
   help alleviate some issues.
-5. Move UNREGISTER requests to a separate thread.
-6. Use LRU to provide more fine-grained direction of UNREGISTER
+4. Use LRU to provide more fine-grained direction of UNREGISTER
   requests for unpinning memory in an overcommitted environment.
-7. Expose UNREGISTER support to the user by way of workload-specific
+5. Expose UNREGISTER support to the user by way of workload-specific
   hints about application behavior.
--- a/docs/specs/acpi_cpu_hotplug.txt
+++ b/docs/specs/acpi_cpu_hotplug.txt
@@ -10,7 +10,9 @@ ACPI GPE block (IO ports 0xafe0-0xafe3, byte access):
 Generic ACPI GPE block. Bit 2 (GPE.2) used to notify CPU
 hot-add/remove event to ACPI BIOS, via SCI interrupt.

-CPU present bitmap (IO port 0xaf00-0xaf1f, 1-byte access):
+CPU present bitmap for:
+  ICH9-LPC (IO port 0x0cd8-0xcf7, 1-byte access)
+  PIIX-PM  (IO port 0xaf00-0xaf1f, 1-byte access)
 ---------------------------------------------------------------
 One bit per CPU. Bit position reflects corresponding CPU APIC ID.
 Read-only.
--- a/docs/tracing.txt
+++ b/docs/tracing.txt
@@ -214,6 +214,42 @@ The "ust" backend uses the LTTng Userspace Tracer library.  There are no
 monitor commands built into QEMU, instead UST utilities should be used to list,
 enable/disable, and dump traces.

+Package lttng-tools is required for userspace tracing. You must ensure that the
+current user belongs to the "tracing" group, or manually launch the
+lttng-sessiond daemon for the current user prior to running any instance of
+QEMU.
+
+While running an instrumented QEMU, LTTng should be able to list all available
+events:
+
+    lttng list -u
+
+Create tracing session:
+
+    lttng create mysession
+
+Enable events:
+
+    lttng enable-event qemu:g_malloc -u
+
+Where the events can either be a comma-separated list of events, or "-a" to
+enable all tracepoint events. Start and stop tracing as needed:
+
+    lttng start
+    lttng stop
+
+View the trace:
+
+    lttng view
+
+Destroy tracing session:
+
+    lttng destroy
+
+Babeltrace can be used at any later time to view the trace:
+
+    babeltrace $HOME/lttng-traces/mysession-<date>-<time>
+
 === SystemTap ===

 The "dtrace" backend uses DTrace sdt probes but has only been tested with
--- a/dump.c
+++ b/dump.c
--- a/exec.c
+++ b/exec.c
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
--- a/gdb-xml/aarch64-fpu.xml
+++ b/gdb-xml/aarch64-fpu.xml
@@ -0,0 +1,86 @@
+<?xml version="1.0"?>
+<!-- Copyright (C) 2009-2012 Free Software Foundation, Inc.
+     Contributed by ARM Ltd.
+
+     Copying and distribution of this file, with or without modification,
+     are permitted in any medium without royalty provided the copyright
+     notice and this notice are preserved.  -->
+
+<!DOCTYPE feature SYSTEM "gdb-target.dtd">
+<feature name="org.gnu.gdb.aarch64.fpu">
+  <vector id="v2d" type="ieee_double" count="2"/>
+  <vector id="v2u" type="uint64" count="2"/>
+  <vector id="v2i" type="int64" count="2"/>
+  <vector id="v4f" type="ieee_single" count="4"/>
+  <vector id="v4u" type="uint32" count="4"/>
+  <vector id="v4i" type="int32" count="4"/>
+  <vector id="v8u" type="uint16" count="8"/>
+  <vector id="v8i" type="int16" count="8"/>
+  <vector id="v16u" type="uint8" count="16"/>
+  <vector id="v16i" type="int8" count="16"/>
+  <vector id="v1u" type="uint128" count="1"/>
+  <vector id="v1i" type="int128" count="1"/>
+  <union id="vnd">
+    <field name="f" type="v2d"/>
+    <field name="u" type="v2u"/>
+    <field name="s" type="v2i"/>
+  </union>
+  <union id="vns">
+    <field name="f" type="v4f"/>
+    <field name="u" type="v4u"/>
+    <field name="s" type="v4i"/>
+  </union>
+  <union id="vnh">
+    <field name="u" type="v8u"/>
+    <field name="s" type="v8i"/>
+  </union>
+  <union id="vnb">
+    <field name="u" type="v16u"/>
+    <field name="s" type="v16i"/>
+  </union>
+  <union id="vnq">
+    <field name="u" type="v1u"/>
+    <field name="s" type="v1i"/>
+  </union>
+  <union id="aarch64v">
+    <field name="d" type="vnd"/>
+    <field name="s" type="vns"/>
+    <field name="h" type="vnh"/>
+    <field name="b" type="vnb"/>
+    <field name="q" type="vnq"/>
+  </union>
+  <reg name="v0" bitsize="128" type="aarch64v" regnum="34"/>
+  <reg name="v1" bitsize="128" type="aarch64v" />
+  <reg name="v2" bitsize="128" type="aarch64v" />
+  <reg name="v3" bitsize="128" type="aarch64v" />
+  <reg name="v4" bitsize="128" type="aarch64v" />
+  <reg name="v5" bitsize="128" type="aarch64v" />
+  <reg name="v6" bitsize="128" type="aarch64v" />
+  <reg name="v7" bitsize="128" type="aarch64v" />
+  <reg name="v8" bitsize="128" type="aarch64v" />
+  <reg name="v9" bitsize="128" type="aarch64v" />
+  <reg name="v10" bitsize="128" type="aarch64v"/>
+  <reg name="v11" bitsize="128" type="aarch64v"/>
+  <reg name="v12" bitsize="128" type="aarch64v"/>
+  <reg name="v13" bitsize="128" type="aarch64v"/>
+  <reg name="v14" bitsize="128" type="aarch64v"/>
+  <reg name="v15" bitsize="128" type="aarch64v"/>
+  <reg name="v16" bitsize="128" type="aarch64v"/>
+  <reg name="v17" bitsize="128" type="aarch64v"/>
+  <reg name="v18" bitsize="128" type="aarch64v"/>
+  <reg name="v19" bitsize="128" type="aarch64v"/>
+  <reg name="v20" bitsize="128" type="aarch64v"/>
+  <reg name="v21" bitsize="128" type="aarch64v"/>
+  <reg name="v22" bitsize="128" type="aarch64v"/>
+  <reg name="v23" bitsize="128" type="aarch64v"/>
+  <reg name="v24" bitsize="128" type="aarch64v"/>
+  <reg name="v25" bitsize="128" type="aarch64v"/>
+  <reg name="v26" bitsize="128" type="aarch64v"/>
+  <reg name="v27" bitsize="128" type="aarch64v"/>
+  <reg name="v28" bitsize="128" type="aarch64v"/>
+  <reg name="v29" bitsize="128" type="aarch64v"/>
+  <reg name="v30" bitsize="128" type="aarch64v"/>
+  <reg name="v31" bitsize="128" type="aarch64v"/>
+  <reg name="fpsr" bitsize="32"/>
+  <reg name="fpcr" bitsize="32"/>
+</feature>
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -35,6 +35,11 @@ STEXI
@item commit
@findex commit
 Commit changes to the disk images (if -snapshot is used) or backing files.
+If the backing file is smaller than the snapshot, then the backing file will be
+resized to be the same size as the snapshot.  If the snapshot is smaller than
+the backing file, the backing file will not be truncated.  If you want the
+backing file to match the size of the smaller snapshot, you can safely truncate
+it yourself once the commit operation successfully completes.
 ETEXI

    {
@@ -1190,7 +1195,7 @@ ETEXI
    {
        .name       = "host_net_add",
        .args_type  = "device:s,opts:s?",
-        .params     = "tap|user|socket|vde|dump [options]",
+        .params     = "tap|user|socket|vde|netmap|dump [options]",
        .help       = "add host VLAN client",
        .mhandler.cmd = net_host_device_add,
    },
@@ -1218,7 +1223,7 @@ ETEXI
    {
        .name       = "netdev_add",
        .args_type  = "netdev:O",
-        .params     = "[user|tap|socket|hubport],id=str[,prop=value][,...]",
+        .params     = "[user|tap|socket|hubport|netmap],id=str[,prop=value][,...]",
        .help       = "add host network device",
        .mhandler.cmd = hmp_netdev_add,
    },
@@ -1241,6 +1246,34 @@ STEXI
@item netdev_del
@findex netdev_del
 Remove host network device.
+ETEXI
+
+    {
+        .name       = "object_add",
+        .args_type  = "object:O",
+        .params     = "[qom-type=]type,id=str[,prop=value][,...]",
+        .help       = "create QOM object",
+        .mhandler.cmd = hmp_object_add,
+    },
+
+STEXI
+@item object_add
+@findex object_add
+Create QOM object.
+ETEXI
+
+    {
+        .name       = "object_del",
+        .args_type  = "id:s",
+        .params     = "id",
+        .help       = "destroy QOM object",
+        .mhandler.cmd = hmp_object_del,
+    },
+
+STEXI
+@item object_del
+@findex object_del
+Destroy QOM object.
 ETEXI

 #ifdef CONFIG_SLIRP
@@ -1617,6 +1650,19 @@ STEXI

 Executes a qemu-io command on the given block device.

+ETEXI
+
+    {
+        .name       = "cpu-add",
+        .args_type  = "id:i",
+        .params     = "id",
+        .help       = "add cpu",
+        .mhandler.cmd  = hmp_cpu_add,
+    },
+
+STEXI
+@item cpu-add @var{id}
+Add CPU with id @var{id}
 ETEXI

    {
--- a/hmp.c
+++ b/hmp.c
@@ -21,6 +21,7 @@
 #include "qmp-commands.h"
 #include "qemu/sockets.h"
 #include "monitor/monitor.h"
+#include "qapi/opts-visitor.h"
 #include "ui/console.h"
 #include "block/qapi.h"
 #include "qemu-io.h"
@@ -870,7 +871,7 @@ void hmp_block_passwd(Monitor *mon, const QDict *qdict)
    const char *password = qdict_get_str(qdict, "password");
    Error *errp = NULL;

-    qmp_block_passwd(device, password, &errp);
+    qmp_block_passwd(true, device, false, NULL, password, &errp);
    hmp_handle_error(mon, &errp);
 }

@@ -880,7 +881,7 @@ void hmp_balloon(Monitor *mon, const QDict *qdict)
    Error *errp = NULL;

    qmp_balloon(value, &errp);
-    if (error_is_set(&errp)) {
+    if (errp) {
        monitor_printf(mon, "balloon: %s\n", error_get_pretty(errp));
        error_free(errp);
    }
@@ -892,7 +893,7 @@ void hmp_block_resize(Monitor *mon, const QDict *qdict)
    int64_t size = qdict_get_int(qdict, "size");
    Error *errp = NULL;

-    qmp_block_resize(device, size, &errp);
+    qmp_block_resize(true, device, false, NULL, size, &errp);
    hmp_handle_error(mon, &errp);
 }

@@ -971,7 +972,9 @@ void hmp_snapshot_blkdev(Monitor *mon, const QDict *qdict)
    }

    mode = reuse ? NEW_IMAGE_MODE_EXISTING : NEW_IMAGE_MODE_ABSOLUTE_PATHS;
-    qmp_blockdev_snapshot_sync(device, filename, !!format, format,
+    qmp_blockdev_snapshot_sync(true, device, false, NULL,
+                               filename, false, NULL,
+                               !!format, format,
                               true, mode, &errp);
    hmp_handle_error(mon, &errp);
 }
@@ -1091,11 +1094,11 @@ void hmp_eject(Monitor *mon, const QDict *qdict)
    hmp_handle_error(mon, &err);
 }

-static void hmp_change_read_arg(Monitor *mon, const char *password,
-                                void *opaque)
+static void hmp_change_read_arg(void *opaque, const char *password,
+                                void *readline_opaque)
 {
    qmp_change_vnc_password(password, NULL);
-    monitor_read_command(mon, 1);
+    monitor_read_command(opaque, 1);
 }

 void hmp_change(Monitor *mon, const QDict *qdict)
@@ -1115,7 +1118,7 @@ void hmp_change(Monitor *mon, const QDict *qdict)
    }

    qmp_change(device, target, !!arg, arg, &err);
-    if (error_is_set(&err) &&
+    if (err &&
        error_get_class(err) == ERROR_CLASS_DEVICE_ENCRYPTED) {
        error_free(err);
        monitor_read_block_device_key(mon, device, NULL, NULL);
@@ -1231,7 +1234,8 @@ static void hmp_migrate_status_cb(void *opaque)
    MigrationInfo *info;

    info = qmp_query_migrate(NULL);
-    if (!info->has_status || strcmp(info->status, "active") == 0) {
+    if (!info->has_status || strcmp(info->status, "active") == 0 ||
+        strcmp(info->status, "setup") == 0) {
        if (info->has_disk) {
            int progress;

@@ -1307,8 +1311,11 @@ void hmp_dump_guest_memory(Monitor *mon, const QDict *qdict)
    const char *file = qdict_get_str(qdict, "filename");
    bool has_begin = qdict_haskey(qdict, "begin");
    bool has_length = qdict_haskey(qdict, "length");
+    /* kdump-compressed format is not supported for HMP */
+    bool has_format = false;
    int64_t begin = 0;
    int64_t length = 0;
+    enum DumpGuestMemoryFormat dump_format = DUMP_GUEST_MEMORY_FORMAT_ELF;
    char *prot;

    if (has_begin) {
@@ -1321,7 +1328,7 @@ void hmp_dump_guest_memory(Monitor *mon, const QDict *qdict)
    prot = g_strconcat("file:", file, NULL);

    qmp_dump_guest_memory(paging, prot, has_begin, begin, has_length, length,
-                          &errp);
+                          has_format, dump_format, &errp);
    hmp_handle_error(mon, &errp);
    g_free(prot);
 }
@@ -1332,12 +1339,12 @@ void hmp_netdev_add(Monitor *mon, const QDict *qdict)
    QemuOpts *opts;

    opts = qemu_opts_from_qdict(qemu_find_opts("netdev"), qdict, &err);
-    if (error_is_set(&err)) {
+    if (err) {
        goto out;
    }

    netdev_add(opts, &err);
-    if (error_is_set(&err)) {
+    if (err) {
        qemu_opts_del(opts);
    }

@@ -1354,6 +1361,63 @@ void hmp_netdev_del(Monitor *mon, const QDict *qdict)
    hmp_handle_error(mon, &err);
 }

+void hmp_object_add(Monitor *mon, const QDict *qdict)
+{
+    Error *err = NULL;
+    QemuOpts *opts;
+    char *type = NULL;
+    char *id = NULL;
+    void *dummy = NULL;
+    OptsVisitor *ov;
+    QDict *pdict;
+
+    opts = qemu_opts_from_qdict(qemu_find_opts("object"), qdict, &err);
+    if (err) {
+        goto out;
+    }
+
+    ov = opts_visitor_new(opts);
+    pdict = qdict_clone_shallow(qdict);
+
+    visit_start_struct(opts_get_visitor(ov), &dummy, NULL, NULL, 0, &err);
+    if (err) {
+        goto out_clean;
+    }
+
+    qdict_del(pdict, "qom-type");
+    visit_type_str(opts_get_visitor(ov), &type, "qom-type", &err);
+    if (err) {
+        goto out_clean;
+    }
+
+    qdict_del(pdict, "id");
+    visit_type_str(opts_get_visitor(ov), &id, "id", &err);
+    if (err) {
+        goto out_clean;
+    }
+
+    object_add(type, id, pdict, opts_get_visitor(ov), &err);
+    if (err) {
+        goto out_clean;
+    }
+    visit_end_struct(opts_get_visitor(ov), &err);
+    if (err) {
+        qmp_object_del(id, NULL);
+    }
+
+out_clean:
+    opts_visitor_cleanup(ov);
+
+    QDECREF(pdict);
+    qemu_opts_del(opts);
+    g_free(id);
+    g_free(type);
+    g_free(dummy);
+
+out:
+    hmp_handle_error(mon, &err);
+}
+
 void hmp_getfd(Monitor *mon, const QDict *qdict)
 {
    const char *fdname = qdict_get_str(qdict, "fdname");
@@ -1525,6 +1589,16 @@ void hmp_nbd_server_stop(Monitor *mon, const QDict *qdict)
    hmp_handle_error(mon, &errp);
 }

+void hmp_cpu_add(Monitor *mon, const QDict *qdict)
+{
+    int cpuid;
+    Error *err = NULL;
+
+    cpuid = qdict_get_int(qdict, "id");
+    qmp_cpu_add(cpuid, &err);
+    hmp_handle_error(mon, &err);
+}
+
 void hmp_chardev_add(Monitor *mon, const QDict *qdict)
 {
    const char *args = qdict_get_str(qdict, "args");
@@ -1564,3 +1638,12 @@ void hmp_qemu_io(Monitor *mon, const QDict *qdict)

    hmp_handle_error(mon, &err);
 }
+
+void hmp_object_del(Monitor *mon, const QDict *qdict)
+{
+    const char *id = qdict_get_str(qdict, "id");
+    Error *err = NULL;
+
+    qmp_object_del(id, &err);
+    hmp_handle_error(mon, &err);
+}
--- a/hmp.h
+++ b/hmp.h
@@ -89,5 +89,8 @@ void hmp_nbd_server_stop(Monitor *mon, const QDict *qdict);
 void hmp_chardev_add(Monitor *mon, const QDict *qdict);
 void hmp_chardev_remove(Monitor *mon, const QDict *qdict);
 void hmp_qemu_io(Monitor *mon, const QDict *qdict);
+void hmp_cpu_add(Monitor *mon, const QDict *qdict);
+void hmp_object_add(Monitor *mon, const QDict *qdict);
+void hmp_object_del(Monitor *mon, const QDict *qdict);

 #endif
--- a/hw/9pfs/cofile.c
+++ b/hw/9pfs/cofile.c
@@ -38,10 +38,6 @@ int v9fs_co_st_gen(V9fsPDU *pdu, V9fsPath *path, mode_t st_mode,
            });
        v9fs_path_unlock(s);
    }
-    /* The ioctl may not be supported depending on the path */
-    if (err == -ENOTTY) {
-        err = 0;
-    }
    return err;
 }

--- a/hw/9pfs/virtio-9p-device.c
+++ b/hw/9pfs/virtio-9p-device.c
@@ -41,15 +41,16 @@ static void virtio_9p_get_config(VirtIODevice *vdev, uint8_t *config)
    g_free(cfg);
 }

-static int virtio_9p_device_init(VirtIODevice *vdev)
+static void virtio_9p_device_realize(DeviceState *dev, Error **errp)
 {
-    V9fsState *s = VIRTIO_9P(vdev);
+    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
+    V9fsState *s = VIRTIO_9P(dev);
    int i, len;
    struct stat stat;
    FsDriverEntry *fse;
    V9fsPath path;

-    virtio_init(VIRTIO_DEVICE(s), "virtio-9p", VIRTIO_ID_9P,
+    virtio_init(vdev, "virtio-9p", VIRTIO_ID_9P,
                sizeof(struct virtio_9p_config) + MAX_TAG_LEN);

    /* initialize pdu allocator */
@@ -67,16 +68,16 @@ static int virtio_9p_device_init(VirtIODevice *vdev)

    if (!fse) {
        /* We don't have a fsdev identified by fsdev_id */
-        fprintf(stderr, "Virtio-9p device couldn't find fsdev with the "
-                "id = %s\n",
-                s->fsconf.fsdev_id ? s->fsconf.fsdev_id : "NULL");
+        error_setg(errp, "Virtio-9p device couldn't find fsdev with the "
+                   "id = %s",
+                   s->fsconf.fsdev_id ? s->fsconf.fsdev_id : "NULL");
        goto out;
    }

    if (!s->fsconf.tag) {
        /* we haven't specified a mount_tag */
-        fprintf(stderr, "fsdev with id %s needs mount_tag arguments\n",
-                s->fsconf.fsdev_id);
+        error_setg(errp, "fsdev with id %s needs mount_tag arguments",
+                   s->fsconf.fsdev_id);
        goto out;
    }

@@ -85,8 +86,8 @@ static int virtio_9p_device_init(VirtIODevice *vdev)
    s->ctx.exops.get_st_gen = NULL;
    len = strlen(s->fsconf.tag);
    if (len > MAX_TAG_LEN - 1) {
-        fprintf(stderr, "mount tag '%s' (%d bytes) is longer than "
-                "maximum (%d bytes)", s->fsconf.tag, len, MAX_TAG_LEN - 1);
+        error_setg(errp, "mount tag '%s' (%d bytes) is longer than "
+                   "maximum (%d bytes)", s->fsconf.tag, len, MAX_TAG_LEN - 1);
        goto out;
    }

@@ -99,12 +100,12 @@ static int virtio_9p_device_init(VirtIODevice *vdev)
    qemu_co_rwlock_init(&s->rename_lock);

    if (s->ops->init(&s->ctx) < 0) {
-        fprintf(stderr, "Virtio-9p Failed to initialize fs-driver with id:%s"
-                " and export path:%s\n", s->fsconf.fsdev_id, s->ctx.fs_root);
+        error_setg(errp, "Virtio-9p Failed to initialize fs-driver with id:%s"
+                   " and export path:%s", s->fsconf.fsdev_id, s->ctx.fs_root);
        goto out;
    }
    if (v9fs_init_worker_threads() < 0) {
-        fprintf(stderr, "worker thread initialization failed\n");
+        error_setg(errp, "worker thread initialization failed");
        goto out;
    }

@@ -114,28 +115,25 @@ static int virtio_9p_device_init(VirtIODevice *vdev)
     * use co-routines here.
     */
    if (s->ops->name_to_path(&s->ctx, NULL, "/", &path) < 0) {
-        fprintf(stderr,
-                "error in converting name to path %s", strerror(errno));
+        error_setg(errp,
+                   "error in converting name to path %s", strerror(errno));
        goto out;
    }
    if (s->ops->lstat(&s->ctx, &path, &stat)) {
-        fprintf(stderr, "share path %s does not exist\n", fse->path);
+        error_setg(errp, "share path %s does not exist", fse->path);
        goto out;
    } else if (!S_ISDIR(stat.st_mode)) {
-        fprintf(stderr, "share path %s is not a directory\n", fse->path);
+        error_setg(errp, "share path %s is not a directory", fse->path);
        goto out;
    }
    v9fs_path_free(&path);

-    return 0;
+    return;
 out:
    g_free(s->ctx.fs_root);
    g_free(s->tag);
    virtio_cleanup(vdev);
    v9fs_path_free(&path);
-
-    return -1;
-
 }

 /* virtio-9p device */
@@ -149,9 +147,10 @@ static void virtio_9p_class_init(ObjectClass *klass, void *data)
 {
    DeviceClass *dc = DEVICE_CLASS(klass);
    VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
+
    dc->props = virtio_9p_properties;
    set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
-    vdc->init = virtio_9p_device_init;
+    vdc->realize = virtio_9p_device_realize;
    vdc->get_features = virtio_9p_get_features;
    vdc->get_config = virtio_9p_get_config;
 }
--- a/hw/9pfs/virtio-9p-handle.c
+++ b/hw/9pfs/virtio-9p-handle.c
@@ -582,6 +582,7 @@ static int handle_unlinkat(FsContext *ctx, V9fsPath *dir,
 static int handle_ioc_getversion(FsContext *ctx, V9fsPath *path,
                                 mode_t st_mode, uint64_t *st_gen)
 {
+#ifdef FS_IOC_GETVERSION
    int err;
    V9fsFidOpenState fid_open;

@@ -590,7 +591,8 @@ static int handle_ioc_getversion(FsContext *ctx, V9fsPath *path,
     * We can get fd for regular files and directories only
     */
    if (!S_ISREG(st_mode) && !S_ISDIR(st_mode)) {
-            return 0;
+        errno = ENOTTY;
+        return -1;
    }
    err = handle_open(ctx, path, O_RDONLY, &fid_open);
    if (err < 0) {
@@ -599,6 +601,10 @@ static int handle_ioc_getversion(FsContext *ctx, V9fsPath *path,
    err = ioctl(fid_open.fd, FS_IOC_GETVERSION, st_gen);
    handle_close(ctx, &fid_open);
    return err;
+#else
+    errno = ENOTTY;
+    return -1;
+#endif
 }

 static int handle_init(FsContext *ctx)
--- a/Show More
+++ b/Show More