Update VERSION for 2.0.2 release

Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
audio: fmopl: drop INLINE macro
2014-08-18 11:00:53 -05:00 · 2014-08-17 11:43:40 -05:00 · 2014-08-17 11:42:00 -05:00 · 2014-08-15 15:51:19 -05:00 · 2014-08-07 17:59:15 -05:00 · 2014-08-07 17:42:39 -05:00
1124 changed files with 60671 additions and 16547 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -1,69 +1,75 @@
-config-devices.*
-config-all-devices.*
-config-all-disas.*
-config-host.*
-config-target.*
-config.status
-trace/generated-tracers.h
-trace/generated-tracers.c
-trace/generated-tracers-dtrace.h
-trace/generated-tracers.dtrace
-trace/generated-events.h
-trace/generated-events.c
-libcacard/trace/generated-tracers.c
+/config-devices.*
+/config-all-devices.*
+/config-all-disas.*
+/config-host.*
+/config-target.*
+/config.status
+/trace/generated-tracers.h
+/trace/generated-tracers.c
+/trace/generated-tracers-dtrace.h
+/trace/generated-tracers.dtrace
+/trace/generated-events.h
+/trace/generated-events.c
+/trace/generated-ust-provider.h
+/trace/generated-ust.c
+/libcacard/trace/generated-tracers.c
 *-timestamp
-*-softmmu
-*-darwin-user
-*-linux-user
-*-bsd-user
+/*-softmmu
+/*-darwin-user
+/*-linux-user
+/*-bsd-user
 libdis*
 libuser
-linux-headers/asm
-qapi-generated
-qapi-types.[ch]
-qapi-visit.[ch]
-qmp-commands.h
-qmp-marshal.c
-qemu-doc.html
-qemu-tech.html
-qemu-doc.info
-qemu-tech.info
-qemu.1
-qemu.pod
-qemu-img.1
-qemu-img.pod
-qemu-img
-qemu-nbd
-qemu-nbd.8
-qemu-nbd.pod
-qemu-options.def
-qemu-options.texi
-qemu-img-cmds.texi
-qemu-img-cmds.h
-qemu-io
-qemu-ga
-qemu-bridge-helper
-qemu-monitor.texi
-vscclient
-qmp-commands.txt
-test-bitops
-test-coroutine
-test-int128
-test-opts-visitor
-test-qmp-input-visitor
-test-qmp-output-visitor
-test-string-input-visitor
-test-string-output-visitor
-test-visitor-serialization
-fsdev/virtfs-proxy-helper
-fsdev/virtfs-proxy-helper.1
-fsdev/virtfs-proxy-helper.pod
-.gdbinit
+/linux-headers/asm
+/qga/qapi-generated
+/qapi-generated
+/qapi-types.[ch]
+/qapi-visit.[ch]
+/qmp-commands.h
+/qmp-marshal.c
+/qemu-doc.html
+/qemu-tech.html
+/qemu-doc.info
+/qemu-tech.info
+/qemu.1
+/qemu.pod
+/qemu-img.1
+/qemu-img.pod
+/qemu-img
+/qemu-nbd
+/qemu-nbd.8
+/qemu-nbd.pod
+/qemu-options.def
+/qemu-options.texi
+/qemu-img-cmds.texi
+/qemu-img-cmds.h
+/qemu-io
+/qemu-ga
+/qemu-bridge-helper
+/qemu-monitor.texi
+/qmp-commands.txt
+/vscclient
+/test-bitops
+/test-coroutine
+/test-int128
+/test-opts-visitor
+/test-qmp-input-visitor
+/test-qmp-output-visitor
+/test-string-input-visitor
+/test-string-output-visitor
+/test-visitor-serialization
+/fsdev/virtfs-proxy-helper
+/fsdev/virtfs-proxy-helper.1
+/fsdev/virtfs-proxy-helper.pod
+/.gdbinit
 *.a
 *.aux
 *.cp
 *.dvi
 *.exe
+*.dll
+*.so
+*.mo
 *.fn
 *.ky
 *.log
@@ -77,7 +83,7 @@ fsdev/virtfs-proxy-helper.pod
 *.tp
 *.vr
 *.d
-!scripts/qemu-guest-agent/fsfreeze-hook.d
+!/scripts/qemu-guest-agent/fsfreeze-hook.d
 *.o
 *.lo
 *.la
@@ -90,22 +96,22 @@ fsdev/virtfs-proxy-helper.pod
 *.gcda
 *.gcno
 patches
-pc-bios/bios-pq/status
-pc-bios/vgabios-pq/status
-pc-bios/optionrom/linuxboot.asm
-pc-bios/optionrom/linuxboot.bin
-pc-bios/optionrom/linuxboot.raw
-pc-bios/optionrom/linuxboot.img
-pc-bios/optionrom/multiboot.asm
-pc-bios/optionrom/multiboot.bin
-pc-bios/optionrom/multiboot.raw
-pc-bios/optionrom/multiboot.img
-pc-bios/optionrom/kvmvapic.asm
-pc-bios/optionrom/kvmvapic.bin
-pc-bios/optionrom/kvmvapic.raw
-pc-bios/optionrom/kvmvapic.img
-pc-bios/s390-ccw/s390-ccw.elf
-pc-bios/s390-ccw/s390-ccw.img
+/pc-bios/bios-pq/status
+/pc-bios/vgabios-pq/status
+/pc-bios/optionrom/linuxboot.asm
+/pc-bios/optionrom/linuxboot.bin
+/pc-bios/optionrom/linuxboot.raw
+/pc-bios/optionrom/linuxboot.img
+/pc-bios/optionrom/multiboot.asm
+/pc-bios/optionrom/multiboot.bin
+/pc-bios/optionrom/multiboot.raw
+/pc-bios/optionrom/multiboot.img
+/pc-bios/optionrom/kvmvapic.asm
+/pc-bios/optionrom/kvmvapic.bin
+/pc-bios/optionrom/kvmvapic.raw
+/pc-bios/optionrom/kvmvapic.img
+/pc-bios/s390-ccw/s390-ccw.elf
+/pc-bios/s390-ccw/s390-ccw.img
 .stgit-*
 cscope.*
 tags
--- a/.gitmodules
+++ b/.gitmodules
@@ -13,6 +13,9 @@
 [submodule "roms/openbios"]
 	path = roms/openbios
 	url = git://git.qemu-project.org/openbios.git
+[submodule "roms/openhackware"]
+	path = roms/openhackware
+	url = git://git.qemu-project.org/openhackware.git
 [submodule "roms/qemu-palcode"]
 	path = roms/qemu-palcode
 	url = git://github.com/rth7680/qemu-palcode.git
--- a/.travis.yml
+++ b/.travis.yml
@@ -4,6 +4,12 @@ python:
 compiler:
  - gcc
  - clang
+notifications:
+  irc:
+    channels:
+      - "irc.oftc.net#qemu"
+    on_success: change
+    on_failure: always
 env:
  global:
    - TEST_CMD="make check"
@@ -14,23 +20,23 @@ env:
    - GUI_PKGS="libgtk-3-dev libvte-2.90-dev libsdl1.2-dev libpng12-dev libpixman-1-dev"
    - EXTRA_PKGS=""
  matrix:
-  - TARGETS=alpha-softmmu,alpha-linux-user
-  - TARGETS=arm-softmmu,arm-linux-user
-  - TARGETS=aarch64-softmmu,aarch64-linux-user
-  - TARGETS=cris-softmmu
-  - TARGETS=i386-softmmu,x86_64-softmmu
-  - TARGETS=lm32-softmmu
-  - TARGETS=m68k-softmmu
-  - TARGETS=microblaze-softmmu,microblazeel-softmmu
-  - TARGETS=mips-softmmu,mips64-softmmu,mips64el-softmmu,mipsel-softmmu
-  - TARGETS=moxie-softmmu
-  - TARGETS=or32-softmmu,
-  - TARGETS=ppc-softmmu,ppc64-softmmu,ppcemb-softmmu
-  - TARGETS=s390x-softmmu
-  - TARGETS=sh4-softmmu,sh4eb-softmmu
-  - TARGETS=sparc-softmmu,sparc64-softmmu
-  - TARGETS=unicore32-softmmu
-  - TARGETS=xtensa-softmmu,xtensaeb-softmmu
+    - TARGETS=alpha-softmmu,alpha-linux-user
+    - TARGETS=arm-softmmu,arm-linux-user
+    - TARGETS=aarch64-softmmu,aarch64-linux-user
+    - TARGETS=cris-softmmu
+    - TARGETS=i386-softmmu,x86_64-softmmu
+    - TARGETS=lm32-softmmu
+    - TARGETS=m68k-softmmu
+    - TARGETS=microblaze-softmmu,microblazeel-softmmu
+    - TARGETS=mips-softmmu,mips64-softmmu,mips64el-softmmu,mipsel-softmmu
+    - TARGETS=moxie-softmmu
+    - TARGETS=or32-softmmu,
+    - TARGETS=ppc-softmmu,ppc64-softmmu,ppcemb-softmmu
+    - TARGETS=s390x-softmmu
+    - TARGETS=sh4-softmmu,sh4eb-softmmu
+    - TARGETS=sparc-softmmu,sparc64-softmmu
+    - TARGETS=unicore32-softmmu
+    - TARGETS=xtensa-softmmu,xtensaeb-softmmu
 before_install:
  - git submodule update --init --recursive
  - sudo apt-get update -qq
@@ -46,6 +52,10 @@ matrix:
    - env: TARGETS=i386-softmmu,x86_64-softmmu
           EXTRA_CONFIG="--enable-debug --enable-tcg-interpreter"
      compiler: gcc
+    # All the extra -dev packages
+    - env: TARGETS=i386-softmmu,x86_64-softmmu
+           EXTRA_PKGS="libaio-dev libcap-ng-dev libattr1-dev libbrlapi-dev uuid-dev libusb-1.0.0-dev"
+      compiler: gcc
    # Currently configure doesn't force --disable-pie
    - env: TARGETS=i386-softmmu,x86_64-softmmu
           EXTRA_CONFIG="--enable-gprof --enable-gcov --disable-pie"
@@ -65,8 +75,7 @@ matrix:
           EXTRA_CONFIG="--enable-trace-backend=ftrace"
           TEST_CMD=""
      compiler: gcc
-    # This disabled make check for the ftrace backend which needs more setting up
-    # Currently broken on 12.04 due to mis-packaged liburcu and changed API, will be pulled.
-    #- env: TARGETS=i386-softmmu,x86_64-softmmu
-    #       EXTRA_PKGS="liblttng-ust-dev liburcu-dev"
-    #       EXTRA_CONFIG="--enable-trace-backend=ust"
+    - env: TARGETS=i386-softmmu,x86_64-softmmu
+          EXTRA_PKGS="liblttng-ust-dev liburcu-dev"
+          EXTRA_CONFIG="--enable-trace-backend=ust"
+      compiler: gcc
--- a/7
+++ b/7
@@ -84,3 +84,10 @@ and clarity it comes on a line by itself:
 Rationale: a consistent (except for functions...) bracing style reduces
 ambiguity and avoids needless churn when lines are added or removed.
 Furthermore, it is the QEMU coding style.
+
+5. Declarations
+
+Mixed declarations (interleaving statements and declarations within blocks)
+are not allowed; declarations should be at the beginning of blocks.  In other
+words, the code should not generate warnings if using GCC's
+-Wdeclaration-after-statement option.
--- a/36
+++ b/36
@@ -158,7 +158,6 @@ Guest CPU Cores (KVM):
 ----------------------

 Overall
-M: Gleb Natapov <gleb@redhat.com>
 M: Paolo Bonzini <pbonzini@redhat.com>
 L: kvm@vger.kernel.org
 S: Supported
@@ -176,12 +175,14 @@ S: Maintained
 F: target-ppc/kvm.c

 S390
+M: Christian Borntraeger <borntraeger@de.ibm.com>
+M: Cornelia Huck <cornelia.huck@de.ibm.com>
 M: Alexander Graf <agraf@suse.de>
 S: Maintained
 F: target-s390x/kvm.c
+F: hw/intc/s390_flic.[hc]

 X86
-M: Gleb Natapov <gleb@redhat.com>
 M: Marcelo Tosatti <mtosatti@redhat.com>
 L: kvm@vger.kernel.org
 S: Supported
@@ -303,7 +304,7 @@ S: Maintained
 F: hw/*/versatile*

 Xilinx Zynq
-M: Peter Crosthwaite <peter.crosthwaite@petalogix.com>
+M: Peter Crosthwaite <peter.crosthwaite@xilinx.com>
 S: Maintained
 F: hw/arm/xilinx_zynq.c
 F: hw/misc/zynq_slcr.c
@@ -352,7 +353,7 @@ S: Maintained
 F: hw/microblaze/petalogix_s3adsp1800_mmu.c

 petalogix_ml605
-M: Peter Crosthwaite <peter.crosthwaite@petalogix.com>
+M: Peter Crosthwaite <peter.crosthwaite@xilinx.com>
 S: Maintained
 F: hw/microblaze/petalogix_ml605_mmu.c

@@ -495,10 +496,13 @@ F: hw/s390x/s390-*.c

 S390 Virtio-ccw
 M: Cornelia Huck <cornelia.huck@de.ibm.com>
+M: Christian Borntraeger <borntraeger@de.ibm.com>
 M: Alexander Graf <agraf@suse.de>
 S: Supported
 F: hw/s390x/s390-virtio-ccw.c
 F: hw/s390x/css.[hc]
+F: hw/s390x/sclp*.[hc]
+F: hw/s390x/ipl*.[hc]
 T: git git://github.com/cohuck/qemu virtio-ccw-upstr

 UniCore32 Machines
@@ -588,7 +592,7 @@ S: Orphan
 F: hw/scsi/lsi53c895a.c

 SSI
-M: Peter Crosthwaite <peter.crosthwaite@petalogix.com>
+M: Peter Crosthwaite <peter.crosthwaite@xilinx.com>
 S: Maintained
 F: hw/ssi/*
 F: hw/block/m25p80.c
@@ -610,6 +614,7 @@ F: hw/*/*vhost*

 virtio
 M: Anthony Liguori <aliguori@amazon.com>
+M: Michael S. Tsirkin <mst@redhat.com>
 S: Supported
 F: hw/*/virtio*

@@ -618,6 +623,7 @@ M: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
 S: Supported
 F: hw/9pfs/
 F: fsdev/
+F: tests/virtio-9p-test.c
 T: git git://github.com/kvaneesh/QEMU.git

 virtio-blk
@@ -628,6 +634,7 @@ F: hw/block/virtio-blk.c

 virtio-ccw
 M: Cornelia Huck <cornelia.huck@de.ibm.com>
+M: Christian Borntraeger <borntraeger@de.ibm.com>
 S: Supported
 F: hw/s390x/virtio-ccw.[hc]
 T: git git://github.com/cohuck/qemu virtio-ccw-upstr
@@ -642,9 +649,10 @@ nvme
 M: Keith Busch <keith.busch@intel.com>
 S: Supported
 F: hw/block/nvme*
+F: tests/nvme-test.c

 Xilinx EDK
-M: Peter Crosthwaite <peter.crosthwaite@petalogix.com>
+M: Peter Crosthwaite <peter.crosthwaite@xilinx.com>
 M: Edgar E. Iglesias <edgar.iglesias@gmail.com>
 S: Maintained
 F: hw/*/xilinx_*
@@ -688,7 +696,7 @@ F: include/hw/cpu/icc_bus.h
 F: hw/cpu/icc_bus.c

 Device Tree
-M: Peter Crosthwaite <peter.crosthwaite@petalogix.com>
+M: Peter Crosthwaite <peter.crosthwaite@xilinx.com>
 M: Alexander Graf <agraf@suse.de>
 S: Maintained
 F: device_tree.[ch]
@@ -709,7 +717,8 @@ F: hw/display/qxl*

 Graphics
 M: Anthony Liguori <aliguori@amazon.com>
-S: Maintained
+M: Gerd Hoffmann <kraxel@redhat.com>
+S: Odd Fixes
 F: ui/

 Cocoa graphics
@@ -725,7 +734,7 @@ F: vl.c

 Human Monitor (HMP)
 M: Luiz Capitulino <lcapitulino@redhat.com>
-S: Supported
+S: Maintained
 F: monitor.c
 F: hmp.c
 F: hmp-commands.hx
@@ -757,7 +766,7 @@ T: git git://github.com/bonzini/qemu.git nbd-next
 QAPI
 M: Luiz Capitulino <lcapitulino@redhat.com>
 M: Michael Roth <mdroth@linux.vnet.ibm.com>
-S: Supported
+S: Maintained
 F: qapi/
 T: git git://repo.or.cz/qemu/qmp-unstable.git queue/qmp

@@ -771,7 +780,7 @@ T: git git://repo.or.cz/qemu/qmp-unstable.git queue/qmp

 QMP
 M: Luiz Capitulino <lcapitulino@redhat.com>
-S: Supported
+S: Maintained
 F: qmp.c
 F: monitor.c
 F: qmp-commands.hx
@@ -936,6 +945,11 @@ M: Peter Lieven <pl@kamp.de>
 S: Supported
 F: block/iscsi.c

+NFS
+M: Peter Lieven <pl@kamp.de>
+S: Maintained
+F: block/nfs.c
+
 SSH
 M: Richard W.M. Jones <rjones@redhat.com>
 S: Supported
--- a/43
+++ b/43
@@ -57,6 +57,11 @@ GENERATED_HEADERS += trace/generated-tracers-dtrace.h
 endif
 GENERATED_SOURCES += trace/generated-tracers.c

+ifeq ($(TRACE_BACKEND),ust)
+GENERATED_HEADERS += trace/generated-ust-provider.h
+GENERATED_SOURCES += trace/generated-ust.c
+endif
+
 # Don't try to regenerate Makefile or configure
 # We don't generate any of them
 Makefile: ;
@@ -122,13 +127,30 @@ defconfig:

 ifneq ($(wildcard config-host.mak),)
 include $(SRC_PATH)/Makefile.objs
+endif
+
+dummy := $(call unnest-vars,, \
+                stub-obj-y \
+                util-obj-y \
+                qga-obj-y \
+                qga-vss-dll-obj-y \
+                block-obj-y \
+                block-obj-m \
+                common-obj-y \
+                common-obj-m)
+
+ifneq ($(wildcard config-host.mak),)
 include $(SRC_PATH)/tests/Makefile
 endif
 ifeq ($(CONFIG_SMARTCARD_NSS),y)
 include $(SRC_PATH)/libcacard/Makefile
 endif

-all: $(DOCS) $(TOOLS) $(HELPERS-y) recurse-all
+all: $(DOCS) $(TOOLS) $(HELPERS-y) recurse-all modules
+
+vl.o: QEMU_CFLAGS+=$(GPROF_CFLAGS)
+
+vl.o: QEMU_CFLAGS+=$(SDL_CFLAGS)

 config-host.h: config-host.h-timestamp
 config-host.h-timestamp: config-host.mak
@@ -138,6 +160,7 @@ qemu-options.def: $(SRC_PATH)/qemu-options.hx
 SUBDIR_RULES=$(patsubst %,subdir-%, $(TARGET_DIRS))
 SOFTMMU_SUBDIR_RULES=$(filter %-softmmu,$(SUBDIR_RULES))

+$(SOFTMMU_SUBDIR_RULES): $(block-obj-y)
 $(SOFTMMU_SUBDIR_RULES): config-all-devices.mak

 subdir-%:
@@ -187,6 +210,9 @@ Makefile: $(version-obj-y) $(version-lobj-y)
 libqemustub.a: $(stub-obj-y)
 libqemuutil.a: $(util-obj-y) qapi-types.o qapi-visit.o

+block-modules = $(foreach o,$(block-obj-m),"$(basename $(subst /,-,$o))",) NULL
+util/module.o-cflags = -D'CONFIG_BLOCK_MODULES=$(block-modules)'
+
 ######################################################################

 qemu-img.o: qemu-img-cmds.h
@@ -240,8 +266,7 @@ clean:
 # avoid old build problems by removing potentially incorrect old files
 	rm -f config.mak op-i386.h opc-i386.h gen-op-i386.h op-arm.h opc-arm.h gen-op-arm.h
 	rm -f qemu-options.def
-	find . -name '*.[oda]' -type f -exec rm -f {} +
-	find . -name '*.l[oa]' -type f -exec rm -f {} +
+	find . \( -name '*.l[oa]' -o -name '*.so' -o -name '*.dll' -o -name '*.mo' -o -name '*.[oda]' \) -type f -exec rm {} +
 	rm -f $(filter-out %.tlb,$(TOOLS)) $(HELPERS-y) qemu-ga TAGS cscope.* *.pod *~ */*~
 	rm -f fsdev/*.pod
 	rm -rf .libs */.libs
@@ -290,10 +315,10 @@ common  de-ch  es     fo  fr-ca  hu     ja  mk  nl-be      pt  sl     tr \
 bepo    cz

 ifdef INSTALL_BLOBS
-BLOBS=bios.bin sgabios.bin vgabios.bin vgabios-cirrus.bin \
+BLOBS=bios.bin bios-256k.bin sgabios.bin vgabios.bin vgabios-cirrus.bin \
 vgabios-stdvga.bin vgabios-vmware.bin vgabios-qxl.bin \
 acpi-dsdt.aml q35-acpi-dsdt.aml \
-ppc_rom.bin openbios-sparc32 openbios-sparc64 openbios-ppc QEMU,tcx.bin \
+ppc_rom.bin openbios-sparc32 openbios-sparc64 openbios-ppc QEMU,tcx.bin QEMU,cgthree.bin \
 pxe-e1000.rom pxe-eepro100.rom pxe-ne2k_pci.rom \
 pxe-pcnet.rom pxe-rtl8139.rom pxe-virtio.rom \
 efi-e1000.rom efi-eepro100.rom efi-ne2k_pci.rom \
@@ -349,6 +374,12 @@ install-datadir install-localstatedir
 ifneq ($(TOOLS),)
 	$(INSTALL_PROG) $(STRIP_OPT) $(TOOLS) "$(DESTDIR)$(bindir)"
 endif
+ifneq ($(CONFIG_MODULES),)
+	$(INSTALL_DIR) "$(DESTDIR)$(qemu_moddir)"
+	for s in $(patsubst %.mo,%$(DSOSUF),$(modules-m)); do \
+		$(INSTALL_PROG) $(STRIP_OPT) $$s "$(DESTDIR)$(qemu_moddir)/$$(echo $$s | tr / -)"; \
+	done
+endif
 ifneq ($(HELPERS-y),)
 	$(INSTALL_DIR) "$(DESTDIR)$(libexecdir)"
 	$(INSTALL_PROG) $(STRIP_OPT) $(HELPERS-y) "$(DESTDIR)$(libexecdir)"
@@ -366,7 +397,7 @@ endif
 		$(INSTALL_DATA) $(SRC_PATH)/pc-bios/keymaps/$$x "$(DESTDIR)$(qemu_datadir)/keymaps"; \
 	done
 	for d in $(TARGET_DIRS); do \
-	$(MAKE) -C $$d $@ || exit 1 ; \
+	$(MAKE) $(SUBDIR_MAKEFLAGS) TARGET_DIR=$$d/ -C $$d $@ || exit 1 ; \
        done

 # various test targets
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -19,11 +19,8 @@ block-obj-y += qemu-coroutine.o qemu-coroutine-lock.o qemu-coroutine-io.o
 block-obj-y += qemu-coroutine-sleep.o
 block-obj-y += coroutine-$(CONFIG_COROUTINE_BACKEND).o

-ifeq ($(CONFIG_VIRTIO)$(CONFIG_VIRTFS)$(CONFIG_PCI),yyy)
-# Lots of the fsdev/9pcode is pulled in by vl.c via qemu_fsdev_add.
-# only pull in the actual virtio-9p device if we also enabled virtio.
-CONFIG_REALLY_VIRTFS=y
-endif
+block-obj-m = block/
+

 ######################################################################
 # smartcard
@@ -41,9 +38,9 @@ libcacard-y += libcacard/vcardt.o
 # single QEMU executable should support all CPUs and machines.

 ifeq ($(CONFIG_SOFTMMU),y)
-common-obj-y = $(block-obj-y) blockdev.o blockdev-nbd.o block/
+common-obj-y = blockdev.o blockdev-nbd.o block/
+common-obj-y += iothread.o
 common-obj-y += net/
-common-obj-y += readline.o
 common-obj-y += qdev-monitor.o device-hotplug.o
 common-obj-$(CONFIG_WIN32) += os-win32.o
 common-obj-$(CONFIG_POSIX) += os-posix.o
@@ -112,18 +109,3 @@ version-lobj-$(CONFIG_WIN32) += $(BUILD_DIR)/version.lo
 # by libqemuutil.a.  These should be moved to a separate .json schema.
 qga-obj-y = qga/ qapi-types.o qapi-visit.o
 qga-vss-dll-obj-y = qga/
-
-vl.o: QEMU_CFLAGS+=$(GPROF_CFLAGS)
-
-vl.o: QEMU_CFLAGS+=$(SDL_CFLAGS)
-
-QEMU_CFLAGS+=$(GLIB_CFLAGS)
-
-nested-vars += \
-	stub-obj-y \
-	util-obj-y \
-	qga-obj-y \
-	qga-vss-dll-obj-y \
-	block-obj-y \
-	common-obj-y
-dummy := $(call unnest-vars)
--- a/Makefile.target
+++ b/Makefile.target
@@ -130,8 +130,6 @@ else
 obj-y += hw/$(TARGET_BASE_ARCH)/
 endif

-main.o: QEMU_CFLAGS+=$(GPROF_CFLAGS)
-
 GENERATED_HEADERS += hmp-commands.h qmp-commands-old.h

 endif # CONFIG_SOFTMMU
@@ -139,13 +137,26 @@ endif # CONFIG_SOFTMMU
 # Workaround for http://gcc.gnu.org/PR55489, see configure.
 %/translate.o: QEMU_CFLAGS += $(TRANSLATE_OPT_CFLAGS)

-nested-vars += obj-y
+dummy := $(call unnest-vars,,obj-y)

-# This resolves all nested paths, so it must come last
+# we are making another call to unnest-vars with different vars, protect obj-y,
+# it can be overriden in subdir Makefile.objs
+obj-y-save := $(obj-y)
+
+block-obj-y :=
+common-obj-y :=
 include $(SRC_PATH)/Makefile.objs
+dummy := $(call unnest-vars,.., \
+               block-obj-y \
+               block-obj-m \
+               common-obj-y \
+               common-obj-m)

-all-obj-y = $(obj-y)
-all-obj-y += $(addprefix ../, $(common-obj-y))
+# Now restore obj-y
+obj-y := $(obj-y-save)
+
+all-obj-y = $(obj-y) $(common-obj-y)
+all-obj-$(CONFIG_SOFTMMU) += $(block-obj-y)

 ifndef CONFIG_HAIKU
 LIBS+=-lm
--- a/2
+++ b/2
@@ -1 +1 @@
-1.7.50
+2.0.2
--- a/arch_init.c
+++ b/arch_init.c
@@ -122,7 +122,6 @@ static void check_guest_throttling(void);
 #define RAM_SAVE_FLAG_XBZRLE   0x40
 /* 0x80 is reserved in migration.h start with 0x100 next */

-
 static struct defconfig_file {
    const char *filename;
    /* Indicates it is an user config file (disabled by -no-user-config) */
@@ -133,6 +132,7 @@ static struct defconfig_file {
    { NULL }, /* end of list */
 };

+static const uint8_t ZERO_TARGET_PAGE[TARGET_PAGE_SIZE];

 int qemu_read_default_config_files(bool userconfig)
 {
@@ -164,24 +164,63 @@ static struct {
    uint8_t *encoded_buf;
    /* buffer for storing page content */
    uint8_t *current_buf;
-    /* buffer used for XBZRLE decoding */
-    uint8_t *decoded_buf;
-    /* Cache for XBZRLE */
+    /* Cache for XBZRLE, Protected by lock. */
    PageCache *cache;
+    QemuMutex lock;
 } XBZRLE = {
    .encoded_buf = NULL,
    .current_buf = NULL,
-    .decoded_buf = NULL,
    .cache = NULL,
 };
+/* buffer used for XBZRLE decoding */
+static uint8_t *xbzrle_decoded_buf;

+static void XBZRLE_cache_lock(void)
+{
+    if (migrate_use_xbzrle())
+        qemu_mutex_lock(&XBZRLE.lock);
+}
+
+static void XBZRLE_cache_unlock(void)
+{
+    if (migrate_use_xbzrle())
+        qemu_mutex_unlock(&XBZRLE.lock);
+}

 int64_t xbzrle_cache_resize(int64_t new_size)
 {
-    if (XBZRLE.cache != NULL) {
-        return cache_resize(XBZRLE.cache, new_size / TARGET_PAGE_SIZE) *
-            TARGET_PAGE_SIZE;
+    PageCache *new_cache, *cache_to_free;
+
+    if (new_size < TARGET_PAGE_SIZE) {
+        return -1;
    }
+
+    /* no need to lock, the current thread holds qemu big lock */
+    if (XBZRLE.cache != NULL) {
+        /* check XBZRLE.cache again later */
+        if (pow2floor(new_size) == migrate_xbzrle_cache_size()) {
+            return pow2floor(new_size);
+        }
+        new_cache = cache_init(new_size / TARGET_PAGE_SIZE,
+                                        TARGET_PAGE_SIZE);
+        if (!new_cache) {
+            DPRINTF("Error creating cache\n");
+            return -1;
+        }
+
+        XBZRLE_cache_lock();
+        /* the XBZRLE.cache may have be destroyed, check it again */
+        if (XBZRLE.cache != NULL) {
+            cache_to_free = XBZRLE.cache;
+            XBZRLE.cache = new_cache;
+        } else {
+            cache_to_free = new_cache;
+        }
+        XBZRLE_cache_unlock();
+
+        cache_fini(cache_to_free);
+    }
+
    return pow2floor(new_size);
 }

@@ -271,6 +310,34 @@ static size_t save_block_hdr(QEMUFile *f, RAMBlock *block, ram_addr_t offset,
    return size;
 }

+/* This is the last block that we have visited serching for dirty pages
+ */
+static RAMBlock *last_seen_block;
+/* This is the last block from where we have sent data */
+static RAMBlock *last_sent_block;
+static ram_addr_t last_offset;
+static unsigned long *migration_bitmap;
+static uint64_t migration_dirty_pages;
+static uint32_t last_version;
+static bool ram_bulk_stage;
+
+/* Update the xbzrle cache to reflect a page that's been sent as all 0.
+ * The important thing is that a stale (not-yet-0'd) page be replaced
+ * by the new data.
+ * As a bonus, if the page wasn't in the cache it gets added so that
+ * when a small write is made into the 0'd page it gets XBZRLE sent
+ */
+static void xbzrle_cache_zero_page(ram_addr_t current_addr)
+{
+    if (ram_bulk_stage || !migrate_use_xbzrle()) {
+        return;
+    }
+
+    /* We don't care if this fails to allocate a new cache page
+     * as long as it updated an old one */
+    cache_insert(XBZRLE.cache, current_addr, ZERO_TARGET_PAGE);
+}
+
 #define ENCODING_FLAG_XBZRLE 0x1

 static int save_xbzrle_page(QEMUFile *f, uint8_t *current_data,
@@ -282,7 +349,9 @@ static int save_xbzrle_page(QEMUFile *f, uint8_t *current_data,

    if (!cache_is_cached(XBZRLE.cache, current_addr)) {
        if (!last_stage) {
-            cache_insert(XBZRLE.cache, current_addr, current_data);
+            if (cache_insert(XBZRLE.cache, current_addr, current_data) == -1) {
+                return -1;
+            }
        }
        acct_info.xbzrle_cache_miss++;
        return -1;
@@ -325,18 +394,6 @@ static int save_xbzrle_page(QEMUFile *f, uint8_t *current_data,
    return bytes_sent;
 }

-
-/* This is the last block that we have visited serching for dirty pages
- */
-static RAMBlock *last_seen_block;
-/* This is the last block from where we have sent data */
-static RAMBlock *last_sent_block;
-static ram_addr_t last_offset;
-static unsigned long *migration_bitmap;
-static uint64_t migration_dirty_pages;
-static uint32_t last_version;
-static bool ram_bulk_stage;
-
 static inline
 ram_addr_t migration_bitmap_find_and_reset_dirty(MemoryRegion *mr,
                                                 ram_addr_t start)
@@ -508,6 +565,7 @@ static int ram_save_block(QEMUFile *f, bool last_stage)
        } else {
            int ret;
            uint8_t *p;
+            bool send_async = true;
            int cont = (block == last_sent_block) ?
                RAM_SAVE_FLAG_CONTINUE : 0;

@@ -518,6 +576,9 @@ static int ram_save_block(QEMUFile *f, bool last_stage)
            ret = ram_control_save_page(f, block->offset,
                               offset, TARGET_PAGE_SIZE, &bytes_sent);

+            XBZRLE_cache_lock();
+
+            current_addr = block->offset + offset;
            if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
                if (ret != RAM_SAVE_CONTROL_DELAYED) {
                    if (bytes_sent > 0) {
@@ -532,23 +593,40 @@ static int ram_save_block(QEMUFile *f, bool last_stage)
                                            RAM_SAVE_FLAG_COMPRESS);
                qemu_put_byte(f, 0);
                bytes_sent++;
+                /* Must let xbzrle know, otherwise a previous (now 0'd) cached
+                 * page would be stale
+                 */
+                xbzrle_cache_zero_page(current_addr);
            } else if (!ram_bulk_stage && migrate_use_xbzrle()) {
-                current_addr = block->offset + offset;
                bytes_sent = save_xbzrle_page(f, p, current_addr, block,
                                              offset, cont, last_stage);
                if (!last_stage) {
+                    /* We must send exactly what's in the xbzrle cache
+                     * even if the page wasn't xbzrle compressed, so that
+                     * it's right next time.
+                     */
                    p = get_cached_data(XBZRLE.cache, current_addr);
+
+                    /* Can't send this cached data async, since the cache page
+                     * might get updated before it gets to the wire
+                     */
+                    send_async = false;
                }
            }

            /* XBZRLE overflow or normal page */
            if (bytes_sent == -1) {
                bytes_sent = save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_PAGE);
-                qemu_put_buffer_async(f, p, TARGET_PAGE_SIZE);
+                if (send_async) {
+                    qemu_put_buffer_async(f, p, TARGET_PAGE_SIZE);
+                } else {
+                    qemu_put_buffer(f, p, TARGET_PAGE_SIZE);
+                }
                bytes_sent += TARGET_PAGE_SIZE;
                acct_info.norm_pages++;
            }

+            XBZRLE_cache_unlock();
            /* if page is unmodified, continue to the next */
            if (bytes_sent > 0) {
                last_sent_block = block;
@@ -602,6 +680,12 @@ uint64_t ram_bytes_total(void)
    return total;
 }

+void free_xbzrle_decoded_buf(void)
+{
+    g_free(xbzrle_decoded_buf);
+    xbzrle_decoded_buf = NULL;
+}
+
 static void migration_end(void)
 {
    if (migration_bitmap) {
@@ -610,14 +694,17 @@ static void migration_end(void)
        migration_bitmap = NULL;
    }

+    XBZRLE_cache_lock();
    if (XBZRLE.cache) {
        cache_fini(XBZRLE.cache);
        g_free(XBZRLE.cache);
        g_free(XBZRLE.encoded_buf);
        g_free(XBZRLE.current_buf);
-        g_free(XBZRLE.decoded_buf);
        XBZRLE.cache = NULL;
+        XBZRLE.encoded_buf = NULL;
+        XBZRLE.current_buf = NULL;
    }
+    XBZRLE_cache_unlock();
 }

 static void ram_migration_cancel(void *opaque)
@@ -648,15 +735,33 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
    dirty_rate_high_cnt = 0;

    if (migrate_use_xbzrle()) {
+        qemu_mutex_lock_iothread();
        XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() /
                                  TARGET_PAGE_SIZE,
                                  TARGET_PAGE_SIZE);
        if (!XBZRLE.cache) {
+            qemu_mutex_unlock_iothread();
            DPRINTF("Error creating cache\n");
            return -1;
        }
-        XBZRLE.encoded_buf = g_malloc0(TARGET_PAGE_SIZE);
-        XBZRLE.current_buf = g_malloc(TARGET_PAGE_SIZE);
+        qemu_mutex_init(&XBZRLE.lock);
+        qemu_mutex_unlock_iothread();
+
+        /* We prefer not to abort if there is no memory */
+        XBZRLE.encoded_buf = g_try_malloc0(TARGET_PAGE_SIZE);
+        if (!XBZRLE.encoded_buf) {
+            DPRINTF("Error allocating encoded_buf\n");
+            return -1;
+        }
+
+        XBZRLE.current_buf = g_try_malloc(TARGET_PAGE_SIZE);
+        if (!XBZRLE.current_buf) {
+            DPRINTF("Error allocating current_buf\n");
+            g_free(XBZRLE.encoded_buf);
+            XBZRLE.encoded_buf = NULL;
+            return -1;
+        }
+
        acct_clear();
    }

@@ -807,8 +912,8 @@ static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
    unsigned int xh_len;
    int xh_flags;

-    if (!XBZRLE.decoded_buf) {
-        XBZRLE.decoded_buf = g_malloc(TARGET_PAGE_SIZE);
+    if (!xbzrle_decoded_buf) {
+        xbzrle_decoded_buf = g_malloc(TARGET_PAGE_SIZE);
    }

    /* extract RLE header */
@@ -825,10 +930,10 @@ static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
        return -1;
    }
    /* load data and decode */
-    qemu_get_buffer(f, XBZRLE.decoded_buf, xh_len);
+    qemu_get_buffer(f, xbzrle_decoded_buf, xh_len);

    /* decode RLE */
-    ret = xbzrle_decode_buffer(XBZRLE.decoded_buf, xh_len, host,
+    ret = xbzrle_decode_buffer(xbzrle_decoded_buf, xh_len, host,
                               TARGET_PAGE_SIZE);
    if (ret == -1) {
        fprintf(stderr, "Failed to load XBZRLE page - decode error!\n");
@@ -887,70 +992,68 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
 {
    ram_addr_t addr;
    int flags, ret = 0;
-    int error;
    static uint64_t seq_iter;

    seq_iter++;

-    if (version_id < 4 || version_id > 4) {
-        return -EINVAL;
+    if (version_id != 4) {
+        ret = -EINVAL;
    }

-    do {
+    while (!ret) {
        addr = qemu_get_be64(f);

        flags = addr & ~TARGET_PAGE_MASK;
        addr &= TARGET_PAGE_MASK;

        if (flags & RAM_SAVE_FLAG_MEM_SIZE) {
-            if (version_id == 4) {
-                /* Synchronize RAM block list */
-                char id[256];
-                ram_addr_t length;
-                ram_addr_t total_ram_bytes = addr;
+            /* Synchronize RAM block list */
+            char id[256];
+            ram_addr_t length;
+            ram_addr_t total_ram_bytes = addr;

-                while (total_ram_bytes) {
-                    RAMBlock *block;
-                    uint8_t len;
+            while (total_ram_bytes) {
+                RAMBlock *block;
+                uint8_t len;

-                    len = qemu_get_byte(f);
-                    qemu_get_buffer(f, (uint8_t *)id, len);
-                    id[len] = 0;
-                    length = qemu_get_be64(f);
+                len = qemu_get_byte(f);
+                qemu_get_buffer(f, (uint8_t *)id, len);
+                id[len] = 0;
+                length = qemu_get_be64(f);

-                    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
-                        if (!strncmp(id, block->idstr, sizeof(id))) {
-                            if (block->length != length) {
-                                fprintf(stderr,
-                                        "Length mismatch: %s: " RAM_ADDR_FMT
-                                        " in != " RAM_ADDR_FMT "\n", id, length,
-                                        block->length);
-                                ret =  -EINVAL;
-                                goto done;
-                            }
-                            break;
+                QTAILQ_FOREACH(block, &ram_list.blocks, next) {
+                    if (!strncmp(id, block->idstr, sizeof(id))) {
+                        if (block->length != length) {
+                            fprintf(stderr,
+                                    "Length mismatch: %s: " RAM_ADDR_FMT
+                                    " in != " RAM_ADDR_FMT "\n", id, length,
+                                    block->length);
+                            ret =  -EINVAL;
                        }
+                        break;
                    }
-
-                    if (!block) {
-                        fprintf(stderr, "Unknown ramblock \"%s\", cannot "
-                                "accept migration\n", id);
-                        ret = -EINVAL;
-                        goto done;
-                    }
-
-                    total_ram_bytes -= length;
                }
-            }
-        }

-        if (flags & RAM_SAVE_FLAG_COMPRESS) {
+                if (!block) {
+                    fprintf(stderr, "Unknown ramblock \"%s\", cannot "
+                            "accept migration\n", id);
+                    ret = -EINVAL;
+                }
+                if (ret) {
+                    break;
+                }
+
+                total_ram_bytes -= length;
+            }
+        } else if (flags & RAM_SAVE_FLAG_COMPRESS) {
            void *host;
            uint8_t ch;

            host = host_from_stream_offset(f, addr, flags);
            if (!host) {
-                return -EINVAL;
+                error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
+                ret = -EINVAL;
+                break;
            }

            ch = qemu_get_byte(f);
@@ -960,31 +1063,39 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)

            host = host_from_stream_offset(f, addr, flags);
            if (!host) {
-                return -EINVAL;
+                error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
+                ret = -EINVAL;
+                break;
            }

            qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
        } else if (flags & RAM_SAVE_FLAG_XBZRLE) {
            void *host = host_from_stream_offset(f, addr, flags);
            if (!host) {
-                return -EINVAL;
+                error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
+                ret = -EINVAL;
+                break;
            }

            if (load_xbzrle(f, addr, host) < 0) {
+                error_report("Failed to decompress XBZRLE page at "
+                             RAM_ADDR_FMT, addr);
                ret = -EINVAL;
-                goto done;
+                break;
            }
        } else if (flags & RAM_SAVE_FLAG_HOOK) {
            ram_control_load_hook(f, flags);
+        } else if (flags & RAM_SAVE_FLAG_EOS) {
+            /* normal exit */
+            break;
+        } else {
+            error_report("Unknown migration flags: %#x", flags);
+            ret = -EINVAL;
+            break;
        }
-        error = qemu_file_get_error(f);
-        if (error) {
-            ret = error;
-            goto done;
-        }
-    } while (!(flags & RAM_SAVE_FLAG_EOS));
+        ret = qemu_file_get_error(f);
+    }

-done:
    DPRINTF("Completed load of VM with exit code %d seq iteration "
            "%" PRIu64 "\n", ret, seq_iter);
    return ret;
--- a/async.c
+++ b/async.c
@@ -117,15 +117,21 @@ void qemu_bh_schedule_idle(QEMUBH *bh)

 void qemu_bh_schedule(QEMUBH *bh)
 {
+    AioContext *ctx;
+
    if (bh->scheduled)
        return;
+    ctx = bh->ctx;
    bh->idle = 0;
-    /* Make sure that idle & any writes needed by the callback are done
-     * before the locations are read in the aio_bh_poll.
+    /* Make sure that:
+     * 1. idle & any writes needed by the callback are done before the
+     *    locations are read in the aio_bh_poll.
+     * 2. ctx is loaded before scheduled is set and the callback has a chance
+     *    to execute.
     */
-    smp_wmb();
+    smp_mb();
    bh->scheduled = 1;
-    aio_notify(bh->ctx);
+    aio_notify(ctx);
 }


@@ -214,6 +220,7 @@ aio_ctx_finalize(GSource     *source)
    thread_pool_free(ctx->thread_pool);
    aio_set_event_notifier(ctx, &ctx->notifier, NULL);
    event_notifier_cleanup(&ctx->notifier);
+    rfifolock_destroy(&ctx->lock);
    qemu_mutex_destroy(&ctx->bh_lock);
    g_array_free(ctx->pollfds, TRUE);
    timerlistgroup_deinit(&ctx->tlg);
@@ -250,6 +257,12 @@ static void aio_timerlist_notify(void *opaque)
    aio_notify(opaque);
 }

+static void aio_rfifolock_cb(void *opaque)
+{
+    /* Kick owner thread in case they are blocked in aio_poll() */
+    aio_notify(opaque);
+}
+
 AioContext *aio_context_new(void)
 {
    AioContext *ctx;
@@ -257,6 +270,7 @@ AioContext *aio_context_new(void)
    ctx->pollfds = g_array_new(FALSE, FALSE, sizeof(GPollFD));
    ctx->thread_pool = NULL;
    qemu_mutex_init(&ctx->bh_lock);
+    rfifolock_init(&ctx->lock, aio_rfifolock_cb, ctx);
    event_notifier_init(&ctx->notifier, false);
    aio_set_event_notifier(ctx, &ctx->notifier, 
                           (EventNotifierHandler *)
@@ -275,3 +289,13 @@ void aio_context_unref(AioContext *ctx)
 {
    g_source_unref(&ctx->source);
 }
+
+void aio_context_acquire(AioContext *ctx)
+{
+    rfifolock_lock(&ctx->lock);
+}
+
+void aio_context_release(AioContext *ctx)
+{
+    rfifolock_unlock(&ctx->lock);
+}
--- a/audio/spiceaudio.c
+++ b/audio/spiceaudio.c
@@ -25,8 +25,17 @@
 #include "audio.h"
 #include "audio_int.h"

-#define LINE_IN_SAMPLES 1024
-#define LINE_OUT_SAMPLES 1024
+#if SPICE_INTERFACE_PLAYBACK_MAJOR > 1 || SPICE_INTERFACE_PLAYBACK_MINOR >= 3
+#define LINE_OUT_SAMPLES (480 * 4)
+#else
+#define LINE_OUT_SAMPLES (256 * 4)
+#endif
+
+#if SPICE_INTERFACE_RECORD_MAJOR > 2 || SPICE_INTERFACE_RECORD_MINOR >= 3
+#define LINE_IN_SAMPLES (480 * 4)
+#else
+#define LINE_IN_SAMPLES (256 * 4)
+#endif

 typedef struct SpiceRateCtl {
    int64_t               start_ticks;
@@ -111,7 +120,11 @@ static int line_out_init (HWVoiceOut *hw, struct audsettings *as)
    SpiceVoiceOut *out = container_of (hw, SpiceVoiceOut, hw);
    struct audsettings settings;

+#if SPICE_INTERFACE_PLAYBACK_MAJOR > 1 || SPICE_INTERFACE_PLAYBACK_MINOR >= 3
+    settings.freq       = spice_server_get_best_playback_rate(NULL);
+#else
    settings.freq       = SPICE_INTERFACE_PLAYBACK_FREQ;
+#endif
    settings.nchannels  = SPICE_INTERFACE_PLAYBACK_CHAN;
    settings.fmt        = AUD_FMT_S16;
    settings.endianness = AUDIO_HOST_ENDIANNESS;
@@ -122,6 +135,9 @@ static int line_out_init (HWVoiceOut *hw, struct audsettings *as)

    out->sin.base.sif = &playback_sif.base;
    qemu_spice_add_interface (&out->sin.base);
+#if SPICE_INTERFACE_PLAYBACK_MAJOR > 1 || SPICE_INTERFACE_PLAYBACK_MINOR >= 3
+    spice_server_set_playback_rate(&out->sin, settings.freq);
+#endif
    return 0;
 }

@@ -232,7 +248,11 @@ static int line_in_init (HWVoiceIn *hw, struct audsettings *as)
    SpiceVoiceIn *in = container_of (hw, SpiceVoiceIn, hw);
    struct audsettings settings;

+#if SPICE_INTERFACE_RECORD_MAJOR > 2 || SPICE_INTERFACE_RECORD_MINOR >= 3
+    settings.freq       = spice_server_get_best_record_rate(NULL);
+#else
    settings.freq       = SPICE_INTERFACE_RECORD_FREQ;
+#endif
    settings.nchannels  = SPICE_INTERFACE_RECORD_CHAN;
    settings.fmt        = AUD_FMT_S16;
    settings.endianness = AUDIO_HOST_ENDIANNESS;
@@ -243,6 +263,9 @@ static int line_in_init (HWVoiceIn *hw, struct audsettings *as)

    in->sin.base.sif = &record_sif.base;
    qemu_spice_add_interface (&in->sin.base);
+#if SPICE_INTERFACE_RECORD_MAJOR > 2 || SPICE_INTERFACE_RECORD_MINOR >= 3
+    spice_server_set_record_rate(&in->sin, settings.freq);
+#endif
    return 0;
 }

--- a/backends/baum.c
+++ b/backends/baum.c
@@ -566,8 +566,10 @@ CharDriverState *chr_baum_init(void)
    BaumDriverState *baum;
    CharDriverState *chr;
    brlapi_handle_t *handle;
-#ifdef CONFIG_SDL
+#if defined(CONFIG_SDL)
+#if SDL_COMPILEDVERSION < SDL_VERSIONNUM(2, 0, 0)
    SDL_SysWMinfo info;
+#endif
 #endif
    int tty;

@@ -595,12 +597,14 @@ CharDriverState *chr_baum_init(void)
        goto fail;
    }

-#ifdef CONFIG_SDL
+#if defined(CONFIG_SDL)
+#if SDL_COMPILEDVERSION < SDL_VERSIONNUM(2, 0, 0)
    memset(&info, 0, sizeof(info));
    SDL_VERSION(&info.version);
    if (SDL_GetWMInfo(&info))
        tty = info.info.x11.wmwindow;
    else
+#endif
 #endif
        tty = BRLAPI_TTY_DEFAULT;

--- a/backends/rng.c
+++ b/backends/rng.c
@@ -12,6 +12,7 @@

 #include "sysemu/rng.h"
 #include "qapi/qmp/qerror.h"
+#include "qom/object_interfaces.h"

 void rng_backend_request_entropy(RngBackend *s, size_t size,
                                 EntropyReceiveFunc *receive_entropy,
@@ -40,9 +41,9 @@ static bool rng_backend_prop_get_opened(Object *obj, Error **errp)
    return s->opened;
 }

-void rng_backend_open(RngBackend *s, Error **errp)
+static void rng_backend_complete(UserCreatable *uc, Error **errp)
 {
-    object_property_set_bool(OBJECT(s), true, "opened", errp);
+    object_property_set_bool(OBJECT(uc), true, "opened", errp);
 }

 static void rng_backend_prop_set_opened(Object *obj, bool value, Error **errp)
@@ -76,13 +77,25 @@ static void rng_backend_init(Object *obj)
                             NULL);
 }

+static void rng_backend_class_init(ObjectClass *oc, void *data)
+{
+    UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc);
+
+    ucc->complete = rng_backend_complete;
+}
+
 static const TypeInfo rng_backend_info = {
    .name = TYPE_RNG_BACKEND,
    .parent = TYPE_OBJECT,
    .instance_size = sizeof(RngBackend),
    .instance_init = rng_backend_init,
    .class_size = sizeof(RngBackendClass),
+    .class_init = rng_backend_class_init,
    .abstract = true,
+    .interfaces = (InterfaceInfo[]) {
+        { TYPE_USER_CREATABLE },
+        { }
+    }
 };

 static void register_types(void)
--- a/block.c
+++ b/block.c
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -3,6 +3,7 @@ block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-c
 block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o
 block-obj-y += qed-check.o
 block-obj-$(CONFIG_VHDX) += vhdx.o vhdx-endian.o vhdx-log.o
+block-obj-$(CONFIG_QUORUM) += quorum.o
 block-obj-y += parallels.o blkdebug.o blkverify.o
 block-obj-y += snapshot.o qapi.o
 block-obj-$(CONFIG_WIN32) += raw-win32.o win32-aio.o
@@ -12,6 +13,7 @@ block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
 ifeq ($(CONFIG_POSIX),y)
 block-obj-y += nbd.o nbd-client.o sheepdog.o
 block-obj-$(CONFIG_LIBISCSI) += iscsi.o
+block-obj-$(CONFIG_LIBNFS) += nfs.o
 block-obj-$(CONFIG_CURL) += curl.o
 block-obj-$(CONFIG_RBD) += rbd.o
 block-obj-$(CONFIG_GLUSTERFS) += gluster.o
@@ -23,4 +25,15 @@ common-obj-y += commit.o
 common-obj-y += mirror.o
 common-obj-y += backup.o

-$(obj)/curl.o: QEMU_CFLAGS+=$(CURL_CFLAGS)
+iscsi.o-cflags     := $(LIBISCSI_CFLAGS)
+iscsi.o-libs       := $(LIBISCSI_LIBS)
+curl.o-cflags      := $(CURL_CFLAGS)
+curl.o-libs        := $(CURL_LIBS)
+rbd.o-cflags       := $(RBD_CFLAGS)
+rbd.o-libs         := $(RBD_LIBS)
+gluster.o-cflags   := $(GLUSTERFS_CFLAGS)
+gluster.o-libs     := $(GLUSTERFS_LIBS)
+ssh.o-cflags       := $(LIBSSH2_CFLAGS)
+ssh.o-libs         := $(LIBSSH2_LIBS)
+qcow.o-libs        := -lz
+linux-aio.o-libs   := -laio
--- a/block/backup.c
+++ b/block/backup.c
@@ -181,8 +181,13 @@ static int coroutine_fn backup_before_write_notify(
        void *opaque)
 {
    BdrvTrackedRequest *req = opaque;
+    int64_t sector_num = req->offset >> BDRV_SECTOR_BITS;
+    int nb_sectors = req->bytes >> BDRV_SECTOR_BITS;

-    return backup_do_cow(req->bs, req->sector_num, req->nb_sectors, NULL);
+    assert((req->offset & (BDRV_SECTOR_SIZE - 1)) == 0);
+    assert((req->bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
+
+    return backup_do_cow(req->bs, sector_num, nb_sectors, NULL);
 }

 static void backup_set_speed(BlockJob *job, int64_t speed, Error **errp)
--- a/block/blkdebug.c
+++ b/block/blkdebug.c
@@ -186,6 +186,14 @@ static const char *event_names[BLKDBG_EVENT_MAX] = {

    [BLKDBG_FLUSH_TO_OS]                    = "flush_to_os",
    [BLKDBG_FLUSH_TO_DISK]                  = "flush_to_disk",
+
+    [BLKDBG_PWRITEV_RMW_HEAD]               = "pwritev_rmw.head",
+    [BLKDBG_PWRITEV_RMW_AFTER_HEAD]         = "pwritev_rmw.after_head",
+    [BLKDBG_PWRITEV_RMW_TAIL]               = "pwritev_rmw.tail",
+    [BLKDBG_PWRITEV_RMW_AFTER_TAIL]         = "pwritev_rmw.after_tail",
+    [BLKDBG_PWRITEV]                        = "pwritev",
+    [BLKDBG_PWRITEV_ZERO]                   = "pwritev_zero",
+    [BLKDBG_PWRITEV_DONE]                   = "pwritev_done",
 };

 static int get_event_by_name(const char *name, BlkDebugEvent *event)
@@ -271,19 +279,33 @@ static void remove_rule(BlkdebugRule *rule)
    g_free(rule);
 }

-static int read_config(BDRVBlkdebugState *s, const char *filename)
+static int read_config(BDRVBlkdebugState *s, const char *filename,
+                       QDict *options, Error **errp)
 {
-    FILE *f;
+    FILE *f = NULL;
    int ret;
    struct add_rule_data d;
+    Error *local_err = NULL;

-    f = fopen(filename, "r");
-    if (f == NULL) {
-        return -errno;
+    if (filename) {
+        f = fopen(filename, "r");
+        if (f == NULL) {
+            error_setg_errno(errp, errno, "Could not read blkdebug config file");
+            return -errno;
+        }
+
+        ret = qemu_config_parse(f, config_groups, filename);
+        if (ret < 0) {
+            error_setg(errp, "Could not parse blkdebug config file");
+            ret = -EINVAL;
+            goto fail;
+        }
    }

-    ret = qemu_config_parse(f, config_groups, filename);
-    if (ret < 0) {
+    qemu_config_parse_qdict(options, config_groups, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        ret = -EINVAL;
        goto fail;
    }

@@ -298,7 +320,9 @@ static int read_config(BDRVBlkdebugState *s, const char *filename)
 fail:
    qemu_opts_reset(&inject_error_opts);
    qemu_opts_reset(&set_state_opts);
-    fclose(f);
+    if (f) {
+        fclose(f);
+    }
    return ret;
 }

@@ -310,7 +334,9 @@ static void blkdebug_parse_filename(const char *filename, QDict *options,

    /* Parse the blkdebug: prefix */
    if (!strstart(filename, "blkdebug:", &filename)) {
-        error_setg(errp, "File name string must start with 'blkdebug:'");
+        /* There was no prefix; therefore, all options have to be already
+           present in the QDict (except for the filename) */
+        qdict_put(options, "x-image", qstring_from_str(filename));
        return;
    }

@@ -346,6 +372,11 @@ static QemuOptsList runtime_opts = {
            .type = QEMU_OPT_STRING,
            .help = "[internal use only, will be removed]",
        },
+        {
+            .name = "align",
+            .type = QEMU_OPT_SIZE,
+            .help = "Required alignment in bytes",
+        },
        { /* end of list */ }
    },
 };
@@ -356,46 +387,53 @@ static int blkdebug_open(BlockDriverState *bs, QDict *options, int flags,
    BDRVBlkdebugState *s = bs->opaque;
    QemuOpts *opts;
    Error *local_err = NULL;
-    const char *filename, *config;
+    const char *config;
+    uint64_t align;
    int ret;

    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
    qemu_opts_absorb_qdict(opts, options, &local_err);
-    if (error_is_set(&local_err)) {
+    if (local_err) {
        error_propagate(errp, local_err);
        ret = -EINVAL;
-        goto fail;
+        goto out;
    }

-    /* Read rules from config file */
+    /* Read rules from config file or command line options */
    config = qemu_opt_get(opts, "config");
-    if (config) {
-        ret = read_config(s, config);
-        if (ret < 0) {
-            error_setg_errno(errp, -ret, "Could not read blkdebug config file");
-            goto fail;
-        }
+    ret = read_config(s, config, options, errp);
+    if (ret) {
+        goto out;
    }

    /* Set initial state */
    s->state = 1;

    /* Open the backing file */
-    filename = qemu_opt_get(opts, "x-image");
-    if (filename == NULL) {
-        error_setg(errp, "Could not retrieve image file name");
-        ret = -EINVAL;
-        goto fail;
-    }
-
-    ret = bdrv_file_open(&bs->file, filename, NULL, flags, &local_err);
+    assert(bs->file == NULL);
+    ret = bdrv_open_image(&bs->file, qemu_opt_get(opts, "x-image"), options, "image",
+                          flags | BDRV_O_PROTOCOL, false, &local_err);
    if (ret < 0) {
        error_propagate(errp, local_err);
-        goto fail;
+        goto out;
+    }
+
+    /* Set request alignment */
+    align = qemu_opt_get_size(opts, "align", bs->request_alignment);
+    if (align > 0 && align < INT_MAX && !(align & (align - 1))) {
+        bs->request_alignment = align;
+    } else {
+        error_setg(errp, "Invalid alignment");
+        ret = -EINVAL;
+        goto fail_unref;
    }

    ret = 0;
-fail:
+    goto out;
+
+fail_unref:
+    bdrv_unref(bs->file);
+out:
    qemu_opts_del(opts);
    return ret;
 }
--- a/block/blkverify.c
+++ b/block/blkverify.c
@@ -78,7 +78,9 @@ static void blkverify_parse_filename(const char *filename, QDict *options,

    /* Parse the blkverify: prefix */
    if (!strstart(filename, "blkverify:", &filename)) {
-        error_setg(errp, "File name string must start with 'blkverify:'");
+        /* There was no prefix; therefore, all options have to be already
+           present in the QDict (except for the filename) */
+        qdict_put(options, "x-image", qstring_from_str(filename));
        return;
    }

@@ -122,44 +124,31 @@ static int blkverify_open(BlockDriverState *bs, QDict *options, int flags,
    BDRVBlkverifyState *s = bs->opaque;
    QemuOpts *opts;
    Error *local_err = NULL;
-    const char *filename, *raw;
    int ret;

    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
    qemu_opts_absorb_qdict(opts, options, &local_err);
-    if (error_is_set(&local_err)) {
+    if (local_err) {
        error_propagate(errp, local_err);
        ret = -EINVAL;
        goto fail;
    }

-    /* Parse the raw image filename */
-    raw = qemu_opt_get(opts, "x-raw");
-    if (raw == NULL) {
-        error_setg(errp, "Could not retrieve raw image filename");
-        ret = -EINVAL;
-        goto fail;
-    }
-
-    ret = bdrv_file_open(&bs->file, raw, NULL, flags, &local_err);
+    /* Open the raw file */
+    assert(bs->file == NULL);
+    ret = bdrv_open_image(&bs->file, qemu_opt_get(opts, "x-raw"), options,
+                          "raw", flags | BDRV_O_PROTOCOL, false, &local_err);
    if (ret < 0) {
        error_propagate(errp, local_err);
        goto fail;
    }

    /* Open the test file */
-    filename = qemu_opt_get(opts, "x-image");
-    if (filename == NULL) {
-        error_setg(errp, "Could not retrieve test image filename");
-        ret = -EINVAL;
-        goto fail;
-    }
-
-    s->test_file = bdrv_new("");
-    ret = bdrv_open(s->test_file, filename, NULL, flags, NULL, &local_err);
+    assert(s->test_file == NULL);
+    ret = bdrv_open_image(&s->test_file, qemu_opt_get(opts, "x-image"), options,
+                          "test", flags, false, &local_err);
    if (ret < 0) {
        error_propagate(errp, local_err);
-        bdrv_unref(s->test_file);
        s->test_file = NULL;
        goto fail;
    }
@@ -184,110 +173,6 @@ static int64_t blkverify_getlength(BlockDriverState *bs)
    return bdrv_getlength(s->test_file);
 }

-/**
- * Check that I/O vector contents are identical
- *
- * @a:          I/O vector
- * @b:          I/O vector
- * @ret:        Offset to first mismatching byte or -1 if match
- */
-static ssize_t blkverify_iovec_compare(QEMUIOVector *a, QEMUIOVector *b)
-{
-    int i;
-    ssize_t offset = 0;
-
-    assert(a->niov == b->niov);
-    for (i = 0; i < a->niov; i++) {
-        size_t len = 0;
-        uint8_t *p = (uint8_t *)a->iov[i].iov_base;
-        uint8_t *q = (uint8_t *)b->iov[i].iov_base;
-
-        assert(a->iov[i].iov_len == b->iov[i].iov_len);
-        while (len < a->iov[i].iov_len && *p++ == *q++) {
-            len++;
-        }
-
-        offset += len;
-
-        if (len != a->iov[i].iov_len) {
-            return offset;
-        }
-    }
-    return -1;
-}
-
-typedef struct {
-    int src_index;
-    struct iovec *src_iov;
-    void *dest_base;
-} IOVectorSortElem;
-
-static int sortelem_cmp_src_base(const void *a, const void *b)
-{
-    const IOVectorSortElem *elem_a = a;
-    const IOVectorSortElem *elem_b = b;
-
-    /* Don't overflow */
-    if (elem_a->src_iov->iov_base < elem_b->src_iov->iov_base) {
-        return -1;
-    } else if (elem_a->src_iov->iov_base > elem_b->src_iov->iov_base) {
-        return 1;
-    } else {
-        return 0;
-    }
-}
-
-static int sortelem_cmp_src_index(const void *a, const void *b)
-{
-    const IOVectorSortElem *elem_a = a;
-    const IOVectorSortElem *elem_b = b;
-
-    return elem_a->src_index - elem_b->src_index;
-}
-
-/**
- * Copy contents of I/O vector
- *
- * The relative relationships of overlapping iovecs are preserved.  This is
- * necessary to ensure identical semantics in the cloned I/O vector.
- */
-static void blkverify_iovec_clone(QEMUIOVector *dest, const QEMUIOVector *src,
-                                  void *buf)
-{
-    IOVectorSortElem sortelems[src->niov];
-    void *last_end;
-    int i;
-
-    /* Sort by source iovecs by base address */
-    for (i = 0; i < src->niov; i++) {
-        sortelems[i].src_index = i;
-        sortelems[i].src_iov = &src->iov[i];
-    }
-    qsort(sortelems, src->niov, sizeof(sortelems[0]), sortelem_cmp_src_base);
-
-    /* Allocate buffer space taking into account overlapping iovecs */
-    last_end = NULL;
-    for (i = 0; i < src->niov; i++) {
-        struct iovec *cur = sortelems[i].src_iov;
-        ptrdiff_t rewind = 0;
-
-        /* Detect overlap */
-        if (last_end && last_end > cur->iov_base) {
-            rewind = last_end - cur->iov_base;
-        }
-
-        sortelems[i].dest_base = buf - rewind;
-        buf += cur->iov_len - MIN(rewind, cur->iov_len);
-        last_end = MAX(cur->iov_base + cur->iov_len, last_end);
-    }
-
-    /* Sort by source iovec index and build destination iovec */
-    qsort(sortelems, src->niov, sizeof(sortelems[0]), sortelem_cmp_src_index);
-    for (i = 0; i < src->niov; i++) {
-        qemu_iovec_add(dest, sortelems[i].dest_base, src->iov[i].iov_len);
-    }
-}
-
 static BlkverifyAIOCB *blkverify_aio_get(BlockDriverState *bs, bool is_write,
                                         int64_t sector_num, QEMUIOVector *qiov,
                                         int nb_sectors,
@@ -351,7 +236,7 @@ static void blkverify_aio_cb(void *opaque, int ret)

 static void blkverify_verify_readv(BlkverifyAIOCB *acb)
 {
-    ssize_t offset = blkverify_iovec_compare(acb->qiov, &acb->raw_qiov);
+    ssize_t offset = qemu_iovec_compare(acb->qiov, &acb->raw_qiov);
    if (offset != -1) {
        blkverify_err(acb, "contents mismatch in sector %" PRId64,
                      acb->sector_num + (int64_t)(offset / BDRV_SECTOR_SIZE));
@@ -369,7 +254,7 @@ static BlockDriverAIOCB *blkverify_aio_readv(BlockDriverState *bs,
    acb->verify = blkverify_verify_readv;
    acb->buf = qemu_blockalign(bs->file, qiov->size);
    qemu_iovec_init(&acb->raw_qiov, acb->qiov->niov);
-    blkverify_iovec_clone(&acb->raw_qiov, qiov, acb->buf);
+    qemu_iovec_clone(&acb->raw_qiov, qiov, acb->buf);

    bdrv_aio_readv(s->test_file, sector_num, qiov, nb_sectors,
                   blkverify_aio_cb, acb);
@@ -403,6 +288,20 @@ static BlockDriverAIOCB *blkverify_aio_flush(BlockDriverState *bs,
    return bdrv_aio_flush(s->test_file, cb, opaque);
 }

+static bool blkverify_recurse_is_first_non_filter(BlockDriverState *bs,
+                                                  BlockDriverState *candidate)
+{
+    BDRVBlkverifyState *s = bs->opaque;
+
+    bool perm = bdrv_recurse_is_first_non_filter(bs->file, candidate);
+
+    if (perm) {
+        return true;
+    }
+
+    return bdrv_recurse_is_first_non_filter(s->test_file, candidate);
+}
+
 static BlockDriver bdrv_blkverify = {
    .format_name            = "blkverify",
    .protocol_name          = "blkverify",
@@ -417,7 +316,8 @@ static BlockDriver bdrv_blkverify = {
    .bdrv_aio_writev        = blkverify_aio_writev,
    .bdrv_aio_flush         = blkverify_aio_flush,

-    .bdrv_check_ext_snapshot = bdrv_check_ext_snapshot_forbidden,
+    .is_filter              = true,
+    .bdrv_recurse_is_first_non_filter = blkverify_recurse_is_first_non_filter,
 };

 static void bdrv_blkverify_init(void)
--- a/block/bochs.c
+++ b/block/bochs.c
@@ -38,57 +38,42 @@

 // not allocated: 0xffffffff

-// always little-endian
-struct bochs_header_v1 {
-    char magic[32]; // "Bochs Virtual HD Image"
-    char type[16]; // "Redolog"
-    char subtype[16]; // "Undoable" / "Volatile" / "Growing"
-    uint32_t version;
-    uint32_t header; // size of header
-
-    union {
-	struct {
-	    uint32_t catalog; // num of entries
-	    uint32_t bitmap; // bitmap size
-	    uint32_t extent; // extent size
-	    uint64_t disk; // disk size
-	    char padding[HEADER_SIZE - 64 - 8 - 20];
-	} redolog;
-	char padding[HEADER_SIZE - 64 - 8];
-    } extra;
-};
-
 // always little-endian
 struct bochs_header {
-    char magic[32]; // "Bochs Virtual HD Image"
-    char type[16]; // "Redolog"
-    char subtype[16]; // "Undoable" / "Volatile" / "Growing"
+    char magic[32];     /* "Bochs Virtual HD Image" */
+    char type[16];      /* "Redolog" */
+    char subtype[16];   /* "Undoable" / "Volatile" / "Growing" */
    uint32_t version;
-    uint32_t header; // size of header
+    uint32_t header;    /* size of header */
+
+    uint32_t catalog;   /* num of entries */
+    uint32_t bitmap;    /* bitmap size */
+    uint32_t extent;    /* extent size */

    union {
-	struct {
-	    uint32_t catalog; // num of entries
-	    uint32_t bitmap; // bitmap size
-	    uint32_t extent; // extent size
-	    uint32_t reserved; // for ???
-	    uint64_t disk; // disk size
-	    char padding[HEADER_SIZE - 64 - 8 - 24];
-	} redolog;
-	char padding[HEADER_SIZE - 64 - 8];
+        struct {
+            uint32_t reserved;  /* for ??? */
+            uint64_t disk;      /* disk size */
+            char padding[HEADER_SIZE - 64 - 20 - 12];
+        } QEMU_PACKED redolog;
+        struct {
+            uint64_t disk;      /* disk size */
+            char padding[HEADER_SIZE - 64 - 20 - 8];
+        } QEMU_PACKED redolog_v1;
+        char padding[HEADER_SIZE - 64 - 20];
    } extra;
-};
+} QEMU_PACKED;

 typedef struct BDRVBochsState {
    CoMutex lock;
    uint32_t *catalog_bitmap;
-    int catalog_size;
+    uint32_t catalog_size;

-    int data_offset;
+    uint32_t data_offset;

-    int bitmap_blocks;
-    int extent_blocks;
-    int extent_size;
+    uint32_t bitmap_blocks;
+    uint32_t extent_blocks;
+    uint32_t extent_size;
 } BDRVBochsState;

 static int bochs_probe(const uint8_t *buf, int buf_size, const char *filename)
@@ -112,9 +97,8 @@ static int bochs_open(BlockDriverState *bs, QDict *options, int flags,
                      Error **errp)
 {
    BDRVBochsState *s = bs->opaque;
-    int i;
+    uint32_t i;
    struct bochs_header bochs;
-    struct bochs_header_v1 header_v1;
    int ret;

    bs->read_only = 1; // no write support yet
@@ -129,17 +113,24 @@ static int bochs_open(BlockDriverState *bs, QDict *options, int flags,
        strcmp(bochs.subtype, GROWING_TYPE) ||
 	((le32_to_cpu(bochs.version) != HEADER_VERSION) &&
 	(le32_to_cpu(bochs.version) != HEADER_V1))) {
-        return -EMEDIUMTYPE;
+        error_setg(errp, "Image not in Bochs format");
+        return -EINVAL;
    }

    if (le32_to_cpu(bochs.version) == HEADER_V1) {
-      memcpy(&header_v1, &bochs, sizeof(bochs));
-      bs->total_sectors = le64_to_cpu(header_v1.extra.redolog.disk) / 512;
+        bs->total_sectors = le64_to_cpu(bochs.extra.redolog_v1.disk) / 512;
    } else {
-      bs->total_sectors = le64_to_cpu(bochs.extra.redolog.disk) / 512;
+        bs->total_sectors = le64_to_cpu(bochs.extra.redolog.disk) / 512;
+    }
+
+    /* Limit to 1M entries to avoid unbounded allocation. This is what is
+     * needed for the largest image that bximage can create (~8 TB). */
+    s->catalog_size = le32_to_cpu(bochs.catalog);
+    if (s->catalog_size > 0x100000) {
+        error_setg(errp, "Catalog size is too large");
+        return -EFBIG;
    }

-    s->catalog_size = le32_to_cpu(bochs.extra.redolog.catalog);
    s->catalog_bitmap = g_malloc(s->catalog_size * 4);

    ret = bdrv_pread(bs->file, le32_to_cpu(bochs.header), s->catalog_bitmap,
@@ -153,10 +144,34 @@ static int bochs_open(BlockDriverState *bs, QDict *options, int flags,

    s->data_offset = le32_to_cpu(bochs.header) + (s->catalog_size * 4);

-    s->bitmap_blocks = 1 + (le32_to_cpu(bochs.extra.redolog.bitmap) - 1) / 512;
-    s->extent_blocks = 1 + (le32_to_cpu(bochs.extra.redolog.extent) - 1) / 512;
+    s->bitmap_blocks = 1 + (le32_to_cpu(bochs.bitmap) - 1) / 512;
+    s->extent_blocks = 1 + (le32_to_cpu(bochs.extent) - 1) / 512;

-    s->extent_size = le32_to_cpu(bochs.extra.redolog.extent);
+    s->extent_size = le32_to_cpu(bochs.extent);
+    if (s->extent_size < BDRV_SECTOR_SIZE) {
+        /* bximage actually never creates extents smaller than 4k */
+        error_setg(errp, "Extent size must be at least 512");
+        ret = -EINVAL;
+        goto fail;
+    } else if (!is_power_of_2(s->extent_size)) {
+        error_setg(errp, "Extent size %" PRIu32 " is not a power of two",
+                   s->extent_size);
+        ret = -EINVAL;
+        goto fail;
+    } else if (s->extent_size > 0x800000) {
+        error_setg(errp, "Extent size %" PRIu32 " is too large",
+                   s->extent_size);
+        ret = -EINVAL;
+        goto fail;
+    }
+
+    if (s->catalog_size < DIV_ROUND_UP(bs->total_sectors,
+                                       s->extent_size / BDRV_SECTOR_SIZE))
+    {
+        error_setg(errp, "Catalog size is too small for this disk size");
+        ret = -EINVAL;
+        goto fail;
+    }

    qemu_co_mutex_init(&s->lock);
    return 0;
@@ -169,8 +184,8 @@ fail:
 static int64_t seek_to_sector(BlockDriverState *bs, int64_t sector_num)
 {
    BDRVBochsState *s = bs->opaque;
-    int64_t offset = sector_num * 512;
-    int64_t extent_index, extent_offset, bitmap_offset;
+    uint64_t offset = sector_num * 512;
+    uint64_t extent_index, extent_offset, bitmap_offset;
    char bitmap_entry;

    // seek to sector
@@ -181,8 +196,9 @@ static int64_t seek_to_sector(BlockDriverState *bs, int64_t sector_num)
 	return -1; /* not allocated */
    }

-    bitmap_offset = s->data_offset + (512 * s->catalog_bitmap[extent_index] *
-	(s->extent_blocks + s->bitmap_blocks));
+    bitmap_offset = s->data_offset +
+        (512 * (uint64_t) s->catalog_bitmap[extent_index] *
+        (s->extent_blocks + s->bitmap_blocks));

    /* read in bitmap for current extent */
    if (bdrv_pread(bs->file, bitmap_offset + (extent_offset / 8),
--- a/block/cloop.c
+++ b/block/cloop.c
@@ -26,6 +26,9 @@
 #include "qemu/module.h"
 #include <zlib.h>

+/* Maximum compressed block size */
+#define MAX_BLOCK_SIZE (64 * 1024 * 1024)
+
 typedef struct BDRVCloopState {
    CoMutex lock;
    uint32_t block_size;
@@ -68,6 +71,26 @@ static int cloop_open(BlockDriverState *bs, QDict *options, int flags,
        return ret;
    }
    s->block_size = be32_to_cpu(s->block_size);
+    if (s->block_size % 512) {
+        error_setg(errp, "block_size %u must be a multiple of 512",
+                   s->block_size);
+        return -EINVAL;
+    }
+    if (s->block_size == 0) {
+        error_setg(errp, "block_size cannot be zero");
+        return -EINVAL;
+    }
+
+    /* cloop's create_compressed_fs.c warns about block sizes beyond 256 KB but
+     * we can accept more.  Prevent ridiculous values like 4 GB - 1 since we
+     * need a buffer this big.
+     */
+    if (s->block_size > MAX_BLOCK_SIZE) {
+        error_setg(errp, "block_size %u must be %u MB or less",
+                   s->block_size,
+                   MAX_BLOCK_SIZE / (1024 * 1024));
+        return -EINVAL;
+    }

    ret = bdrv_pread(bs->file, 128 + 4, &s->n_blocks, 4);
    if (ret < 0) {
@@ -76,7 +99,23 @@ static int cloop_open(BlockDriverState *bs, QDict *options, int flags,
    s->n_blocks = be32_to_cpu(s->n_blocks);

    /* read offsets */
-    offsets_size = s->n_blocks * sizeof(uint64_t);
+    if (s->n_blocks > (UINT32_MAX - 1) / sizeof(uint64_t)) {
+        /* Prevent integer overflow */
+        error_setg(errp, "n_blocks %u must be %zu or less",
+                   s->n_blocks,
+                   (UINT32_MAX - 1) / sizeof(uint64_t));
+        return -EINVAL;
+    }
+    offsets_size = (s->n_blocks + 1) * sizeof(uint64_t);
+    if (offsets_size > 512 * 1024 * 1024) {
+        /* Prevent ridiculous offsets_size which causes memory allocation to
+         * fail or overflows bdrv_pread() size.  In practice the 512 MB
+         * offsets[] limit supports 16 TB images at 256 KB block size.
+         */
+        error_setg(errp, "image requires too many offsets, "
+                   "try increasing block size");
+        return -EINVAL;
+    }
    s->offsets = g_malloc(offsets_size);

    ret = bdrv_pread(bs->file, 128 + 4 + 4, s->offsets, offsets_size);
@@ -84,13 +123,37 @@ static int cloop_open(BlockDriverState *bs, QDict *options, int flags,
        goto fail;
    }

-    for(i=0;i<s->n_blocks;i++) {
+    for (i = 0; i < s->n_blocks + 1; i++) {
+        uint64_t size;
+
        s->offsets[i] = be64_to_cpu(s->offsets[i]);
-        if (i > 0) {
-            uint32_t size = s->offsets[i] - s->offsets[i - 1];
-            if (size > max_compressed_block_size) {
-                max_compressed_block_size = size;
-            }
+        if (i == 0) {
+            continue;
+        }
+
+        if (s->offsets[i] < s->offsets[i - 1]) {
+            error_setg(errp, "offsets not monotonically increasing at "
+                       "index %u, image file is corrupt", i);
+            ret = -EINVAL;
+            goto fail;
+        }
+
+        size = s->offsets[i] - s->offsets[i - 1];
+
+        /* Compressed blocks should be smaller than the uncompressed block size
+         * but maybe compression performed poorly so the compressed block is
+         * actually bigger.  Clamp down on unrealistic values to prevent
+         * ridiculous s->compressed_block allocation.
+         */
+        if (size > 2 * MAX_BLOCK_SIZE) {
+            error_setg(errp, "invalid compressed block size at index %u, "
+                       "image file is corrupt", i);
+            ret = -EINVAL;
+            goto fail;
+        }
+
+        if (size > max_compressed_block_size) {
+            max_compressed_block_size = size;
        }
    }

@@ -180,9 +243,7 @@ static coroutine_fn int cloop_co_read(BlockDriverState *bs, int64_t sector_num,
 static void cloop_close(BlockDriverState *bs)
 {
    BDRVCloopState *s = bs->opaque;
-    if (s->n_blocks > 0) {
-        g_free(s->offsets);
-    }
+    g_free(s->offsets);
    g_free(s->compressed_block);
    g_free(s->uncompressed_block);
    inflateEnd(&s->zstream);
--- a/block/cow.c
+++ b/block/cow.c
@@ -74,7 +74,8 @@ static int cow_open(BlockDriverState *bs, QDict *options, int flags,
    }

    if (be32_to_cpu(cow_header.magic) != COW_MAGIC) {
-        ret = -EMEDIUMTYPE;
+        error_setg(errp, "Image not in COW format");
+        ret = -EINVAL;
        goto fail;
    }

@@ -82,7 +83,7 @@ static int cow_open(BlockDriverState *bs, QDict *options, int flags,
        char version[64];
        snprintf(version, sizeof(version),
               "COW version %d", cow_header.version);
-        qerror_report(QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
+        error_set(errp, QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
            bs->device_name, "cow", version);
        ret = -ENOTSUP;
        goto fail;
@@ -346,15 +347,15 @@ static int cow_create(const char *filename, QEMUOptionParameter *options,

    ret = bdrv_create_file(filename, options, &local_err);
    if (ret < 0) {
-        qerror_report_err(local_err);
-        error_free(local_err);
+        error_propagate(errp, local_err);
        return ret;
    }

-    ret = bdrv_file_open(&cow_bs, filename, NULL, BDRV_O_RDWR, &local_err);
+    cow_bs = NULL;
+    ret = bdrv_open(&cow_bs, filename, NULL, NULL,
+                    BDRV_O_RDWR | BDRV_O_PROTOCOL, NULL, &local_err);
    if (ret < 0) {
-        qerror_report_err(local_err);
-        error_free(local_err);
+        error_propagate(errp, local_err);
        return ret;
    }

--- a/block/curl.c
+++ b/block/curl.c
@@ -34,6 +34,11 @@
 #define DPRINTF(fmt, ...) do { } while (0)
 #endif

+#if LIBCURL_VERSION_NUM >= 0x071000
+/* The multi interface timer callback was introduced in 7.16.0 */
+#define NEED_CURL_TIMER_CALLBACK
+#endif
+
 #define PROTOCOLS (CURLPROTO_HTTP | CURLPROTO_HTTPS | \
                   CURLPROTO_FTP | CURLPROTO_FTPS | \
                   CURLPROTO_TFTP)
@@ -77,6 +82,7 @@ typedef struct CURLState

 typedef struct BDRVCURLState {
    CURLM *multi;
+    QEMUTimer timer;
    size_t len;
    CURLState states[CURL_NUM_STATES];
    char *url;
@@ -87,6 +93,23 @@ typedef struct BDRVCURLState {
 static void curl_clean_state(CURLState *s);
 static void curl_multi_do(void *arg);

+#ifdef NEED_CURL_TIMER_CALLBACK
+static int curl_timer_cb(CURLM *multi, long timeout_ms, void *opaque)
+{
+    BDRVCURLState *s = opaque;
+
+    DPRINTF("CURL: timer callback timeout_ms %ld\n", timeout_ms);
+    if (timeout_ms == -1) {
+        timer_del(&s->timer);
+    } else {
+        int64_t timeout_ns = (int64_t)timeout_ms * 1000 * 1000;
+        timer_mod(&s->timer,
+                  qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + timeout_ns);
+    }
+    return 0;
+}
+#endif
+
 static int curl_sock_cb(CURL *curl, curl_socket_t fd, int action,
                        void *s, void *sp)
 {
@@ -134,6 +157,11 @@ static size_t curl_read_cb(void *ptr, size_t size, size_t nmemb, void *opaque)
    if (!s || !s->orig_buf)
        goto read_end;

+    if (s->buf_off >= s->buf_len) {
+        /* buffer full, read nothing */
+        return 0;
+    }
+    realsize = MIN(realsize, s->buf_len - s->buf_off);
    memcpy(s->orig_buf + s->buf_off, ptr, realsize);
    s->buf_off += realsize;

@@ -209,20 +237,10 @@ static int curl_find_buf(BDRVCURLState *s, size_t start, size_t len,
    return FIND_RET_NONE;
 }

-static void curl_multi_do(void *arg)
+static void curl_multi_read(BDRVCURLState *s)
 {
-    BDRVCURLState *s = (BDRVCURLState *)arg;
-    int running;
-    int r;
    int msgs_in_queue;

-    if (!s->multi)
-        return;
-
-    do {
-        r = curl_multi_socket_all(s->multi, &running);
-    } while(r == CURLM_CALL_MULTI_PERFORM);
-
    /* Try to find done transfers, so we can free the easy
     * handle again. */
    do {
@@ -266,6 +284,41 @@ static void curl_multi_do(void *arg)
    } while(msgs_in_queue);
 }

+static void curl_multi_do(void *arg)
+{
+    BDRVCURLState *s = (BDRVCURLState *)arg;
+    int running;
+    int r;
+
+    if (!s->multi) {
+        return;
+    }
+
+    do {
+        r = curl_multi_socket_all(s->multi, &running);
+    } while(r == CURLM_CALL_MULTI_PERFORM);
+
+    curl_multi_read(s);
+}
+
+static void curl_multi_timeout_do(void *arg)
+{
+#ifdef NEED_CURL_TIMER_CALLBACK
+    BDRVCURLState *s = (BDRVCURLState *)arg;
+    int running;
+
+    if (!s->multi) {
+        return;
+    }
+
+    curl_multi_socket_action(s->multi, CURL_SOCKET_TIMEOUT, 0, &running);
+
+    curl_multi_read(s);
+#else
+    abort();
+#endif
+}
+
 static CURLState *curl_init_state(BDRVCURLState *s)
 {
    CURLState *state = NULL;
@@ -408,30 +461,27 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags,
    static int inited = 0;

    if (flags & BDRV_O_RDWR) {
-        qerror_report(ERROR_CLASS_GENERIC_ERROR,
-                      "curl block device does not support writes");
+        error_setg(errp, "curl block device does not support writes");
        return -EROFS;
    }

    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
    qemu_opts_absorb_qdict(opts, options, &local_err);
-    if (error_is_set(&local_err)) {
-        qerror_report_err(local_err);
-        error_free(local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
        goto out_noclean;
    }

    s->readahead_size = qemu_opt_get_size(opts, "readahead", READ_AHEAD_SIZE);
    if ((s->readahead_size & 0x1ff) != 0) {
-        fprintf(stderr, "HTTP_READAHEAD_SIZE %zd is not a multiple of 512\n",
-                s->readahead_size);
+        error_setg(errp, "HTTP_READAHEAD_SIZE %zd is not a multiple of 512",
+                   s->readahead_size);
        goto out_noclean;
    }

    file = qemu_opt_get(opts, "url");
    if (file == NULL) {
-        qerror_report(ERROR_CLASS_GENERIC_ERROR, "curl block driver requires "
-                      "an 'url' option");
+        error_setg(errp, "curl block driver requires an 'url' option");
        goto out_noclean;
    }

@@ -473,12 +523,20 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags,
    curl_easy_cleanup(state->curl);
    state->curl = NULL;

+    aio_timer_init(bdrv_get_aio_context(bs), &s->timer,
+                   QEMU_CLOCK_REALTIME, SCALE_NS,
+                   curl_multi_timeout_do, s);
+
    // Now we know the file exists and its size, so let's
    // initialize the multi interface!

    s->multi = curl_multi_init();
    curl_multi_setopt(s->multi, CURLMOPT_SOCKETDATA, s);
    curl_multi_setopt(s->multi, CURLMOPT_SOCKETFUNCTION, curl_sock_cb);
+#ifdef NEED_CURL_TIMER_CALLBACK
+    curl_multi_setopt(s->multi, CURLMOPT_TIMERDATA, s);
+    curl_multi_setopt(s->multi, CURLMOPT_TIMERFUNCTION, curl_timer_cb);
+#endif
    curl_multi_do(s);

    qemu_opts_del(opts);
@@ -597,6 +655,9 @@ static void curl_close(BlockDriverState *bs)
    }
    if (s->multi)
        curl_multi_cleanup(s->multi);
+
+    timer_del(&s->timer);
+
    g_free(s->url);
 }

--- a/block/dmg.c
+++ b/block/dmg.c
@@ -27,6 +27,14 @@
 #include "qemu/module.h"
 #include <zlib.h>

+enum {
+    /* Limit chunk sizes to prevent unreasonable amounts of memory being used
+     * or truncating when converting to 32-bit types
+     */
+    DMG_LENGTHS_MAX = 64 * 1024 * 1024, /* 64 MB */
+    DMG_SECTORCOUNTS_MAX = DMG_LENGTHS_MAX / 512,
+};
+
 typedef struct BDRVDMGState {
    CoMutex lock;
    /* each chunk contains a certain number of sectors,
@@ -92,13 +100,44 @@ static int read_uint32(BlockDriverState *bs, int64_t offset, uint32_t *result)
    return 0;
 }

+/* Increase max chunk sizes, if necessary.  This function is used to calculate
+ * the buffer sizes needed for compressed/uncompressed chunk I/O.
+ */
+static void update_max_chunk_size(BDRVDMGState *s, uint32_t chunk,
+                                  uint32_t *max_compressed_size,
+                                  uint32_t *max_sectors_per_chunk)
+{
+    uint32_t compressed_size = 0;
+    uint32_t uncompressed_sectors = 0;
+
+    switch (s->types[chunk]) {
+    case 0x80000005: /* zlib compressed */
+        compressed_size = s->lengths[chunk];
+        uncompressed_sectors = s->sectorcounts[chunk];
+        break;
+    case 1: /* copy */
+        uncompressed_sectors = (s->lengths[chunk] + 511) / 512;
+        break;
+    case 2: /* zero */
+        uncompressed_sectors = s->sectorcounts[chunk];
+        break;
+    }
+
+    if (compressed_size > *max_compressed_size) {
+        *max_compressed_size = compressed_size;
+    }
+    if (uncompressed_sectors > *max_sectors_per_chunk) {
+        *max_sectors_per_chunk = uncompressed_sectors;
+    }
+}
+
 static int dmg_open(BlockDriverState *bs, QDict *options, int flags,
                    Error **errp)
 {
    BDRVDMGState *s = bs->opaque;
-    uint64_t info_begin,info_end,last_in_offset,last_out_offset;
+    uint64_t info_begin, info_end, last_in_offset, last_out_offset;
    uint32_t count, tmp;
-    uint32_t max_compressed_size=1,max_sectors_per_chunk=1,i;
+    uint32_t max_compressed_size = 1, max_sectors_per_chunk = 1, i;
    int64_t offset;
    int ret;

@@ -160,37 +199,40 @@ static int dmg_open(BlockDriverState *bs, QDict *options, int flags,
            goto fail;
        }

-	if (type == 0x6d697368 && count >= 244) {
-	    int new_size, chunk_count;
+        if (type == 0x6d697368 && count >= 244) {
+            size_t new_size;
+            uint32_t chunk_count;

            offset += 4;
            offset += 200;

-	    chunk_count = (count-204)/40;
-	    new_size = sizeof(uint64_t) * (s->n_chunks + chunk_count);
-	    s->types = g_realloc(s->types, new_size/2);
-	    s->offsets = g_realloc(s->offsets, new_size);
-	    s->lengths = g_realloc(s->lengths, new_size);
-	    s->sectors = g_realloc(s->sectors, new_size);
-	    s->sectorcounts = g_realloc(s->sectorcounts, new_size);
+            chunk_count = (count - 204) / 40;
+            new_size = sizeof(uint64_t) * (s->n_chunks + chunk_count);
+            s->types = g_realloc(s->types, new_size / 2);
+            s->offsets = g_realloc(s->offsets, new_size);
+            s->lengths = g_realloc(s->lengths, new_size);
+            s->sectors = g_realloc(s->sectors, new_size);
+            s->sectorcounts = g_realloc(s->sectorcounts, new_size);

            for (i = s->n_chunks; i < s->n_chunks + chunk_count; i++) {
                ret = read_uint32(bs, offset, &s->types[i]);
                if (ret < 0) {
                    goto fail;
                }
-		offset += 4;
-		if(s->types[i]!=0x80000005 && s->types[i]!=1 && s->types[i]!=2) {
-		    if(s->types[i]==0xffffffff) {
-			last_in_offset = s->offsets[i-1]+s->lengths[i-1];
-			last_out_offset = s->sectors[i-1]+s->sectorcounts[i-1];
-		    }
-		    chunk_count--;
-		    i--;
-		    offset += 36;
-		    continue;
-		}
-		offset += 4;
+                offset += 4;
+                if (s->types[i] != 0x80000005 && s->types[i] != 1 &&
+                    s->types[i] != 2) {
+                    if (s->types[i] == 0xffffffff && i > 0) {
+                        last_in_offset = s->offsets[i - 1] + s->lengths[i - 1];
+                        last_out_offset = s->sectors[i - 1] +
+                                          s->sectorcounts[i - 1];
+                    }
+                    chunk_count--;
+                    i--;
+                    offset += 36;
+                    continue;
+                }
+                offset += 4;

                ret = read_uint64(bs, offset, &s->sectors[i]);
                if (ret < 0) {
@@ -205,6 +247,14 @@ static int dmg_open(BlockDriverState *bs, QDict *options, int flags,
                }
                offset += 8;

+                if (s->sectorcounts[i] > DMG_SECTORCOUNTS_MAX) {
+                    error_report("sector count %" PRIu64 " for chunk %u is "
+                                 "larger than max (%u)",
+                                 s->sectorcounts[i], i, DMG_SECTORCOUNTS_MAX);
+                    ret = -EINVAL;
+                    goto fail;
+                }
+
                ret = read_uint64(bs, offset, &s->offsets[i]);
                if (ret < 0) {
                    goto fail;
@@ -218,19 +268,25 @@ static int dmg_open(BlockDriverState *bs, QDict *options, int flags,
                }
                offset += 8;

-		if(s->lengths[i]>max_compressed_size)
-		    max_compressed_size = s->lengths[i];
-		if(s->sectorcounts[i]>max_sectors_per_chunk)
-		    max_sectors_per_chunk = s->sectorcounts[i];
-	    }
-	    s->n_chunks+=chunk_count;
-	}
+                if (s->lengths[i] > DMG_LENGTHS_MAX) {
+                    error_report("length %" PRIu64 " for chunk %u is larger "
+                                 "than max (%u)",
+                                 s->lengths[i], i, DMG_LENGTHS_MAX);
+                    ret = -EINVAL;
+                    goto fail;
+                }
+
+                update_max_chunk_size(s, i, &max_compressed_size,
+                                      &max_sectors_per_chunk);
+            }
+            s->n_chunks += chunk_count;
+        }
    }

    /* initialize zlib engine */
-    s->compressed_chunk = g_malloc(max_compressed_size+1);
-    s->uncompressed_chunk = g_malloc(512*max_sectors_per_chunk);
-    if(inflateInit(&s->zstream) != Z_OK) {
+    s->compressed_chunk = g_malloc(max_compressed_size + 1);
+    s->uncompressed_chunk = g_malloc(512 * max_sectors_per_chunk);
+    if (inflateInit(&s->zstream) != Z_OK) {
        ret = -EINVAL;
        goto fail;
    }
@@ -252,83 +308,82 @@ fail:
 }

 static inline int is_sector_in_chunk(BDRVDMGState* s,
-		uint32_t chunk_num,int sector_num)
+                uint32_t chunk_num, uint64_t sector_num)
 {
-    if(chunk_num>=s->n_chunks || s->sectors[chunk_num]>sector_num ||
-	    s->sectors[chunk_num]+s->sectorcounts[chunk_num]<=sector_num)
-	return 0;
-    else
-	return -1;
+    if (chunk_num >= s->n_chunks || s->sectors[chunk_num] > sector_num ||
+            s->sectors[chunk_num] + s->sectorcounts[chunk_num] <= sector_num) {
+        return 0;
+    } else {
+        return -1;
+    }
 }

-static inline uint32_t search_chunk(BDRVDMGState* s,int sector_num)
+static inline uint32_t search_chunk(BDRVDMGState *s, uint64_t sector_num)
 {
    /* binary search */
-    uint32_t chunk1=0,chunk2=s->n_chunks,chunk3;
-    while(chunk1!=chunk2) {
-	chunk3 = (chunk1+chunk2)/2;
-	if(s->sectors[chunk3]>sector_num)
-	    chunk2 = chunk3;
-	else if(s->sectors[chunk3]+s->sectorcounts[chunk3]>sector_num)
-	    return chunk3;
-	else
-	    chunk1 = chunk3;
+    uint32_t chunk1 = 0, chunk2 = s->n_chunks, chunk3;
+    while (chunk1 != chunk2) {
+        chunk3 = (chunk1 + chunk2) / 2;
+        if (s->sectors[chunk3] > sector_num) {
+            chunk2 = chunk3;
+        } else if (s->sectors[chunk3] + s->sectorcounts[chunk3] > sector_num) {
+            return chunk3;
+        } else {
+            chunk1 = chunk3;
+        }
    }
    return s->n_chunks; /* error */
 }

-static inline int dmg_read_chunk(BlockDriverState *bs, int sector_num)
+static inline int dmg_read_chunk(BlockDriverState *bs, uint64_t sector_num)
 {
    BDRVDMGState *s = bs->opaque;

-    if(!is_sector_in_chunk(s,s->current_chunk,sector_num)) {
-	int ret;
-	uint32_t chunk = search_chunk(s,sector_num);
+    if (!is_sector_in_chunk(s, s->current_chunk, sector_num)) {
+        int ret;
+        uint32_t chunk = search_chunk(s, sector_num);

-	if(chunk>=s->n_chunks)
-	    return -1;
+        if (chunk >= s->n_chunks) {
+            return -1;
+        }

-	s->current_chunk = s->n_chunks;
-	switch(s->types[chunk]) {
-	case 0x80000005: { /* zlib compressed */
-	    int i;
+        s->current_chunk = s->n_chunks;
+        switch (s->types[chunk]) {
+        case 0x80000005: { /* zlib compressed */
+            /* we need to buffer, because only the chunk as whole can be
+             * inflated. */
+            ret = bdrv_pread(bs->file, s->offsets[chunk],
+                             s->compressed_chunk, s->lengths[chunk]);
+            if (ret != s->lengths[chunk]) {
+                return -1;
+            }

-	    /* we need to buffer, because only the chunk as whole can be
-	     * inflated. */
-	    i=0;
-	    do {
-                ret = bdrv_pread(bs->file, s->offsets[chunk] + i,
-                                 s->compressed_chunk+i, s->lengths[chunk]-i);
-		if(ret<0 && errno==EINTR)
-		    ret=0;
-		i+=ret;
-	    } while(ret>=0 && ret+i<s->lengths[chunk]);
-
-	    if (ret != s->lengths[chunk])
-		return -1;
-
-	    s->zstream.next_in = s->compressed_chunk;
-	    s->zstream.avail_in = s->lengths[chunk];
-	    s->zstream.next_out = s->uncompressed_chunk;
-	    s->zstream.avail_out = 512*s->sectorcounts[chunk];
-	    ret = inflateReset(&s->zstream);
-	    if(ret != Z_OK)
-		return -1;
-	    ret = inflate(&s->zstream, Z_FINISH);
-	    if(ret != Z_STREAM_END || s->zstream.total_out != 512*s->sectorcounts[chunk])
-		return -1;
-	    break; }
-	case 1: /* copy */
-	    ret = bdrv_pread(bs->file, s->offsets[chunk],
+            s->zstream.next_in = s->compressed_chunk;
+            s->zstream.avail_in = s->lengths[chunk];
+            s->zstream.next_out = s->uncompressed_chunk;
+            s->zstream.avail_out = 512 * s->sectorcounts[chunk];
+            ret = inflateReset(&s->zstream);
+            if (ret != Z_OK) {
+                return -1;
+            }
+            ret = inflate(&s->zstream, Z_FINISH);
+            if (ret != Z_STREAM_END ||
+                s->zstream.total_out != 512 * s->sectorcounts[chunk]) {
+                return -1;
+            }
+            break; }
+        case 1: /* copy */
+            ret = bdrv_pread(bs->file, s->offsets[chunk],
                             s->uncompressed_chunk, s->lengths[chunk]);
-	    if (ret != s->lengths[chunk])
-		return -1;
-	    break;
-	case 2: /* zero */
-	    memset(s->uncompressed_chunk, 0, 512*s->sectorcounts[chunk]);
-	    break;
-	}
-	s->current_chunk = chunk;
+            if (ret != s->lengths[chunk]) {
+                return -1;
+            }
+            break;
+        case 2: /* zero */
+            memset(s->uncompressed_chunk, 0, 512 * s->sectorcounts[chunk]);
+            break;
+        }
+        s->current_chunk = chunk;
    }
    return 0;
 }
@@ -339,12 +394,14 @@ static int dmg_read(BlockDriverState *bs, int64_t sector_num,
    BDRVDMGState *s = bs->opaque;
    int i;

-    for(i=0;i<nb_sectors;i++) {
-	uint32_t sector_offset_in_chunk;
-	if(dmg_read_chunk(bs, sector_num+i) != 0)
-	    return -1;
-	sector_offset_in_chunk = sector_num+i-s->sectors[s->current_chunk];
-	memcpy(buf+i*512,s->uncompressed_chunk+sector_offset_in_chunk*512,512);
+    for (i = 0; i < nb_sectors; i++) {
+        uint32_t sector_offset_in_chunk;
+        if (dmg_read_chunk(bs, sector_num + i) != 0) {
+            return -1;
+        }
+        sector_offset_in_chunk = sector_num + i - s->sectors[s->current_chunk];
+        memcpy(buf + i * 512,
+               s->uncompressed_chunk + sector_offset_in_chunk * 512, 512);
    }
    return 0;
 }
@@ -376,12 +433,12 @@ static void dmg_close(BlockDriverState *bs)
 }

 static BlockDriver bdrv_dmg = {
-    .format_name	= "dmg",
-    .instance_size	= sizeof(BDRVDMGState),
-    .bdrv_probe		= dmg_probe,
-    .bdrv_open		= dmg_open,
-    .bdrv_read          = dmg_co_read,
-    .bdrv_close		= dmg_close,
+    .format_name    = "dmg",
+    .instance_size  = sizeof(BDRVDMGState),
+    .bdrv_probe     = dmg_probe,
+    .bdrv_open      = dmg_open,
+    .bdrv_read      = dmg_co_read,
+    .bdrv_close     = dmg_close,
 };

 static void bdrv_dmg_init(void)
--- a/block/gluster.c
+++ b/block/gluster.c
@@ -3,42 +3,26 @@
 *
 * Copyright (C) 2012 Bharata B Rao <bharata@linux.vnet.ibm.com>
 *
- * Pipe handling mechanism in AIO implementation is derived from
- * block/rbd.c. Hence,
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
 *
- * Copyright (C) 2010-2011 Christian Brunner <chb@muc.de>,
- *                         Josh Durgin <josh.durgin@dreamhost.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2.  See
- * the COPYING file in the top-level directory.
- *
- * Contributions after 2012-01-13 are licensed under the terms of the
- * GNU GPL, version 2 or (at your option) any later version.
 */
 #include <glusterfs/api/glfs.h>
 #include "block/block_int.h"
-#include "qemu/sockets.h"
 #include "qemu/uri.h"

 typedef struct GlusterAIOCB {
-    BlockDriverAIOCB common;
    int64_t size;
    int ret;
-    bool *finished;
    QEMUBH *bh;
+    Coroutine *coroutine;
 } GlusterAIOCB;

 typedef struct BDRVGlusterState {
    struct glfs *glfs;
-    int fds[2];
    struct glfs_fd *fd;
-    int event_reader_pos;
-    GlusterAIOCB *event_acb;
 } BDRVGlusterState;

-#define GLUSTER_FD_READ  0
-#define GLUSTER_FD_WRITE 1
-
 typedef struct GlusterConf {
    char *server;
    int port;
@@ -49,11 +33,13 @@ typedef struct GlusterConf {

 static void qemu_gluster_gconf_free(GlusterConf *gconf)
 {
-    g_free(gconf->server);
-    g_free(gconf->volname);
-    g_free(gconf->image);
-    g_free(gconf->transport);
-    g_free(gconf);
+    if (gconf) {
+        g_free(gconf->server);
+        g_free(gconf->volname);
+        g_free(gconf->image);
+        g_free(gconf->transport);
+        g_free(gconf);
+    }
 }

 static int parse_volume_options(GlusterConf *gconf, char *path)
@@ -94,7 +80,7 @@ static int parse_volume_options(GlusterConf *gconf, char *path)
 * 'server' specifies the server where the volume file specification for
 * the given volume resides. This can be either hostname, ipv4 address
 * or ipv6 address. ipv6 address needs to be within square brackets [ ].
- * If transport type is 'unix', then 'server' field should not be specifed.
+ * If transport type is 'unix', then 'server' field should not be specified.
 * The 'socket' field needs to be populated with the path to unix domain
 * socket.
 *
@@ -131,7 +117,7 @@ static int qemu_gluster_parseuri(GlusterConf *gconf, const char *filename)
    }

    /* transport */
-    if (!strcmp(uri->scheme, "gluster")) {
+    if (!uri->scheme || !strcmp(uri->scheme, "gluster")) {
        gconf->transport = g_strdup("tcp");
    } else if (!strcmp(uri->scheme, "gluster+tcp")) {
        gconf->transport = g_strdup("tcp");
@@ -167,7 +153,7 @@ static int qemu_gluster_parseuri(GlusterConf *gconf, const char *filename)
        }
        gconf->server = g_strdup(qp->p[0].value);
    } else {
-        gconf->server = g_strdup(uri->server);
+        gconf->server = g_strdup(uri->server ? uri->server : "localhost");
        gconf->port = uri->port;
    }

@@ -179,7 +165,8 @@ out:
    return ret;
 }

-static struct glfs *qemu_gluster_init(GlusterConf *gconf, const char *filename)
+static struct glfs *qemu_gluster_init(GlusterConf *gconf, const char *filename,
+                                      Error **errp)
 {
    struct glfs *glfs = NULL;
    int ret;
@@ -187,8 +174,8 @@ static struct glfs *qemu_gluster_init(GlusterConf *gconf, const char *filename)

    ret = qemu_gluster_parseuri(gconf, filename);
    if (ret < 0) {
-        error_report("Usage: file=gluster[+transport]://[server[:port]]/"
-            "volname/image[?socket=...]");
+        error_setg(errp, "Usage: file=gluster[+transport]://[server[:port]]/"
+                   "volname/image[?socket=...]");
        errno = -ret;
        goto out;
    }
@@ -215,9 +202,11 @@ static struct glfs *qemu_gluster_init(GlusterConf *gconf, const char *filename)

    ret = glfs_init(glfs);
    if (ret) {
-        error_report("Gluster connection failed for server=%s port=%d "
-             "volume=%s image=%s transport=%s", gconf->server, gconf->port,
-             gconf->volname, gconf->image, gconf->transport);
+        error_setg_errno(errp, errno,
+                         "Gluster connection failed for server=%s port=%d "
+                         "volume=%s image=%s transport=%s", gconf->server,
+                         gconf->port, gconf->volname, gconf->image,
+                         gconf->transport);
        goto out;
    }
    return glfs;
@@ -231,46 +220,32 @@ out:
    return NULL;
 }

-static void qemu_gluster_complete_aio(GlusterAIOCB *acb, BDRVGlusterState *s)
+static void qemu_gluster_complete_aio(void *opaque)
 {
-    int ret;
-    bool *finished = acb->finished;
-    BlockDriverCompletionFunc *cb = acb->common.cb;
-    void *opaque = acb->common.opaque;
+    GlusterAIOCB *acb = (GlusterAIOCB *)opaque;

-    if (!acb->ret || acb->ret == acb->size) {
-        ret = 0; /* Success */
-    } else if (acb->ret < 0) {
-        ret = acb->ret; /* Read/Write failed */
-    } else {
-        ret = -EIO; /* Partial read/write - fail it */
-    }
-
-    qemu_aio_release(acb);
-    cb(opaque, ret);
-    if (finished) {
-        *finished = true;
-    }
+    qemu_bh_delete(acb->bh);
+    acb->bh = NULL;
+    qemu_coroutine_enter(acb->coroutine, NULL);
 }

-static void qemu_gluster_aio_event_reader(void *opaque)
+/*
+ * AIO callback routine called from GlusterFS thread.
+ */
+static void gluster_finish_aiocb(struct glfs_fd *fd, ssize_t ret, void *arg)
 {
-    BDRVGlusterState *s = opaque;
-    ssize_t ret;
+    GlusterAIOCB *acb = (GlusterAIOCB *)arg;

-    do {
-        char *p = (char *)&s->event_acb;
+    if (!ret || ret == acb->size) {
+        acb->ret = 0; /* Success */
+    } else if (ret < 0) {
+        acb->ret = ret; /* Read/Write failed */
+    } else {
+        acb->ret = -EIO; /* Partial read/write - fail it */
+    }

-        ret = read(s->fds[GLUSTER_FD_READ], p + s->event_reader_pos,
-                   sizeof(s->event_acb) - s->event_reader_pos);
-        if (ret > 0) {
-            s->event_reader_pos += ret;
-            if (s->event_reader_pos == sizeof(s->event_acb)) {
-                s->event_reader_pos = 0;
-                qemu_gluster_complete_aio(s->event_acb, s);
-            }
-        }
-    } while (ret < 0 && errno == EINTR);
+    acb->bh = qemu_bh_new(qemu_gluster_complete_aio, acb);
+    qemu_bh_schedule(acb->bh);
 }

 /* TODO Convert to fine grained options */
@@ -287,11 +262,28 @@ static QemuOptsList runtime_opts = {
    },
 };

+static void qemu_gluster_parse_flags(int bdrv_flags, int *open_flags)
+{
+    assert(open_flags != NULL);
+
+    *open_flags |= O_BINARY;
+
+    if (bdrv_flags & BDRV_O_RDWR) {
+        *open_flags |= O_RDWR;
+    } else {
+        *open_flags |= O_RDONLY;
+    }
+
+    if ((bdrv_flags & BDRV_O_NOCACHE)) {
+        *open_flags |= O_DIRECT;
+    }
+}
+
 static int qemu_gluster_open(BlockDriverState *bs,  QDict *options,
                             int bdrv_flags, Error **errp)
 {
    BDRVGlusterState *s = bs->opaque;
-    int open_flags = O_BINARY;
+    int open_flags = 0;
    int ret = 0;
    GlusterConf *gconf = g_malloc0(sizeof(GlusterConf));
    QemuOpts *opts;
@@ -300,47 +292,27 @@ static int qemu_gluster_open(BlockDriverState *bs,  QDict *options,

    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
    qemu_opts_absorb_qdict(opts, options, &local_err);
-    if (error_is_set(&local_err)) {
-        qerror_report_err(local_err);
-        error_free(local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
        ret = -EINVAL;
        goto out;
    }

    filename = qemu_opt_get(opts, "filename");

-
-    s->glfs = qemu_gluster_init(gconf, filename);
+    s->glfs = qemu_gluster_init(gconf, filename, errp);
    if (!s->glfs) {
        ret = -errno;
        goto out;
    }

-    if (bdrv_flags & BDRV_O_RDWR) {
-        open_flags |= O_RDWR;
-    } else {
-        open_flags |= O_RDONLY;
-    }
-
-    if ((bdrv_flags & BDRV_O_NOCACHE)) {
-        open_flags |= O_DIRECT;
-    }
+    qemu_gluster_parse_flags(bdrv_flags, &open_flags);

    s->fd = glfs_open(s->glfs, gconf->image, open_flags);
    if (!s->fd) {
        ret = -errno;
-        goto out;
    }

-    ret = qemu_pipe(s->fds);
-    if (ret < 0) {
-        ret = -errno;
-        goto out;
-    }
-    fcntl(s->fds[GLUSTER_FD_READ], F_SETFL, O_NONBLOCK);
-    qemu_aio_set_fd_handler(s->fds[GLUSTER_FD_READ],
-        qemu_gluster_aio_event_reader, NULL, s);
-
 out:
    qemu_opts_del(opts);
    qemu_gluster_gconf_free(gconf);
@@ -356,24 +328,180 @@ out:
    return ret;
 }

+typedef struct BDRVGlusterReopenState {
+    struct glfs *glfs;
+    struct glfs_fd *fd;
+} BDRVGlusterReopenState;
+
+
+static int qemu_gluster_reopen_prepare(BDRVReopenState *state,
+                                       BlockReopenQueue *queue, Error **errp)
+{
+    int ret = 0;
+    BDRVGlusterReopenState *reop_s;
+    GlusterConf *gconf = NULL;
+    int open_flags = 0;
+
+    assert(state != NULL);
+    assert(state->bs != NULL);
+
+    state->opaque = g_malloc0(sizeof(BDRVGlusterReopenState));
+    reop_s = state->opaque;
+
+    qemu_gluster_parse_flags(state->flags, &open_flags);
+
+    gconf = g_malloc0(sizeof(GlusterConf));
+
+    reop_s->glfs = qemu_gluster_init(gconf, state->bs->filename, errp);
+    if (reop_s->glfs == NULL) {
+        ret = -errno;
+        goto exit;
+    }
+
+    reop_s->fd = glfs_open(reop_s->glfs, gconf->image, open_flags);
+    if (reop_s->fd == NULL) {
+        /* reops->glfs will be cleaned up in _abort */
+        ret = -errno;
+        goto exit;
+    }
+
+exit:
+    /* state->opaque will be freed in either the _abort or _commit */
+    qemu_gluster_gconf_free(gconf);
+    return ret;
+}
+
+static void qemu_gluster_reopen_commit(BDRVReopenState *state)
+{
+    BDRVGlusterReopenState *reop_s = state->opaque;
+    BDRVGlusterState *s = state->bs->opaque;
+
+
+    /* close the old */
+    if (s->fd) {
+        glfs_close(s->fd);
+    }
+    if (s->glfs) {
+        glfs_fini(s->glfs);
+    }
+
+    /* use the newly opened image / connection */
+    s->fd         = reop_s->fd;
+    s->glfs       = reop_s->glfs;
+
+    g_free(state->opaque);
+    state->opaque = NULL;
+
+    return;
+}
+
+
+static void qemu_gluster_reopen_abort(BDRVReopenState *state)
+{
+    BDRVGlusterReopenState *reop_s = state->opaque;
+
+    if (reop_s == NULL) {
+        return;
+    }
+
+    if (reop_s->fd) {
+        glfs_close(reop_s->fd);
+    }
+
+    if (reop_s->glfs) {
+        glfs_fini(reop_s->glfs);
+    }
+
+    g_free(state->opaque);
+    state->opaque = NULL;
+
+    return;
+}
+
+#ifdef CONFIG_GLUSTERFS_ZEROFILL
+static coroutine_fn int qemu_gluster_co_write_zeroes(BlockDriverState *bs,
+        int64_t sector_num, int nb_sectors, BdrvRequestFlags flags)
+{
+    int ret;
+    GlusterAIOCB *acb = g_slice_new(GlusterAIOCB);
+    BDRVGlusterState *s = bs->opaque;
+    off_t size = nb_sectors * BDRV_SECTOR_SIZE;
+    off_t offset = sector_num * BDRV_SECTOR_SIZE;
+
+    acb->size = size;
+    acb->ret = 0;
+    acb->coroutine = qemu_coroutine_self();
+
+    ret = glfs_zerofill_async(s->fd, offset, size, &gluster_finish_aiocb, acb);
+    if (ret < 0) {
+        ret = -errno;
+        goto out;
+    }
+
+    qemu_coroutine_yield();
+    ret = acb->ret;
+
+out:
+    g_slice_free(GlusterAIOCB, acb);
+    return ret;
+}
+
+static inline bool gluster_supports_zerofill(void)
+{
+    return 1;
+}
+
+static inline int qemu_gluster_zerofill(struct glfs_fd *fd, int64_t offset,
+        int64_t size)
+{
+    return glfs_zerofill(fd, offset, size);
+}
+
+#else
+static inline bool gluster_supports_zerofill(void)
+{
+    return 0;
+}
+
+static inline int qemu_gluster_zerofill(struct glfs_fd *fd, int64_t offset,
+        int64_t size)
+{
+    return 0;
+}
+#endif
+
 static int qemu_gluster_create(const char *filename,
        QEMUOptionParameter *options, Error **errp)
 {
    struct glfs *glfs;
    struct glfs_fd *fd;
    int ret = 0;
+    int prealloc = 0;
    int64_t total_size = 0;
    GlusterConf *gconf = g_malloc0(sizeof(GlusterConf));

-    glfs = qemu_gluster_init(gconf, filename);
+    glfs = qemu_gluster_init(gconf, filename, errp);
    if (!glfs) {
-        ret = -errno;
+        ret = -EINVAL;
        goto out;
    }

    while (options && options->name) {
        if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
            total_size = options->value.n / BDRV_SECTOR_SIZE;
+        } else if (!strcmp(options->name, BLOCK_OPT_PREALLOC)) {
+            if (!options->value.s || !strcmp(options->value.s, "off")) {
+                prealloc = 0;
+            } else if (!strcmp(options->value.s, "full") &&
+                    gluster_supports_zerofill()) {
+                prealloc = 1;
+            } else {
+                error_setg(errp, "Invalid preallocation mode: '%s'"
+                    " or GlusterFS doesn't support zerofill API",
+                           options->value.s);
+                ret = -EINVAL;
+                goto out;
+            }
        }
        options++;
    }
@@ -383,9 +511,15 @@ static int qemu_gluster_create(const char *filename,
    if (!fd) {
        ret = -errno;
    } else {
-        if (glfs_ftruncate(fd, total_size * BDRV_SECTOR_SIZE) != 0) {
+        if (!glfs_ftruncate(fd, total_size * BDRV_SECTOR_SIZE)) {
+            if (prealloc && qemu_gluster_zerofill(fd, 0,
+                    total_size * BDRV_SECTOR_SIZE)) {
+                ret = -errno;
+            }
+        } else {
            ret = -errno;
        }
+
        if (glfs_close(fd) != 0) {
            ret = -errno;
        }
@@ -398,58 +532,18 @@ out:
    return ret;
 }

-static void qemu_gluster_aio_cancel(BlockDriverAIOCB *blockacb)
-{
-    GlusterAIOCB *acb = (GlusterAIOCB *)blockacb;
-    bool finished = false;
-
-    acb->finished = &finished;
-    while (!finished) {
-        qemu_aio_wait();
-    }
-}
-
-static const AIOCBInfo gluster_aiocb_info = {
-    .aiocb_size = sizeof(GlusterAIOCB),
-    .cancel = qemu_gluster_aio_cancel,
-};
-
-static void gluster_finish_aiocb(struct glfs_fd *fd, ssize_t ret, void *arg)
-{
-    GlusterAIOCB *acb = (GlusterAIOCB *)arg;
-    BlockDriverState *bs = acb->common.bs;
-    BDRVGlusterState *s = bs->opaque;
-    int retval;
-
-    acb->ret = ret;
-    retval = qemu_write_full(s->fds[GLUSTER_FD_WRITE], &acb, sizeof(acb));
-    if (retval != sizeof(acb)) {
-        /*
-         * Gluster AIO callback thread failed to notify the waiting
-         * QEMU thread about IO completion.
-         */
-        error_report("Gluster AIO completion failed: %s", strerror(errno));
-        abort();
-    }
-}
-
-static BlockDriverAIOCB *qemu_gluster_aio_rw(BlockDriverState *bs,
-        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockDriverCompletionFunc *cb, void *opaque, int write)
+static coroutine_fn int qemu_gluster_co_rw(BlockDriverState *bs,
+        int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, int write)
 {
    int ret;
-    GlusterAIOCB *acb;
+    GlusterAIOCB *acb = g_slice_new(GlusterAIOCB);
    BDRVGlusterState *s = bs->opaque;
-    size_t size;
-    off_t offset;
+    size_t size = nb_sectors * BDRV_SECTOR_SIZE;
+    off_t offset = sector_num * BDRV_SECTOR_SIZE;

-    offset = sector_num * BDRV_SECTOR_SIZE;
-    size = nb_sectors * BDRV_SECTOR_SIZE;
-
-    acb = qemu_aio_get(&gluster_aiocb_info, bs, cb, opaque);
    acb->size = size;
    acb->ret = 0;
-    acb->finished = NULL;
+    acb->coroutine = qemu_coroutine_self();

    if (write) {
        ret = glfs_pwritev_async(s->fd, qiov->iov, qiov->niov, offset, 0,
@@ -460,13 +554,16 @@ static BlockDriverAIOCB *qemu_gluster_aio_rw(BlockDriverState *bs,
    }

    if (ret < 0) {
+        ret = -errno;
        goto out;
    }
-    return &acb->common;
+
+    qemu_coroutine_yield();
+    ret = acb->ret;

 out:
-    qemu_aio_release(acb);
-    return NULL;
+    g_slice_free(GlusterAIOCB, acb);
+    return ret;
 }

 static int qemu_gluster_truncate(BlockDriverState *bs, int64_t offset)
@@ -482,71 +579,68 @@ static int qemu_gluster_truncate(BlockDriverState *bs, int64_t offset)
    return 0;
 }

-static BlockDriverAIOCB *qemu_gluster_aio_readv(BlockDriverState *bs,
-        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockDriverCompletionFunc *cb, void *opaque)
+static coroutine_fn int qemu_gluster_co_readv(BlockDriverState *bs,
+        int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
 {
-    return qemu_gluster_aio_rw(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
+    return qemu_gluster_co_rw(bs, sector_num, nb_sectors, qiov, 0);
 }

-static BlockDriverAIOCB *qemu_gluster_aio_writev(BlockDriverState *bs,
-        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockDriverCompletionFunc *cb, void *opaque)
+static coroutine_fn int qemu_gluster_co_writev(BlockDriverState *bs,
+        int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
 {
-    return qemu_gluster_aio_rw(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
+    return qemu_gluster_co_rw(bs, sector_num, nb_sectors, qiov, 1);
 }

-static BlockDriverAIOCB *qemu_gluster_aio_flush(BlockDriverState *bs,
-        BlockDriverCompletionFunc *cb, void *opaque)
+static coroutine_fn int qemu_gluster_co_flush_to_disk(BlockDriverState *bs)
 {
    int ret;
-    GlusterAIOCB *acb;
+    GlusterAIOCB *acb = g_slice_new(GlusterAIOCB);
    BDRVGlusterState *s = bs->opaque;

-    acb = qemu_aio_get(&gluster_aiocb_info, bs, cb, opaque);
    acb->size = 0;
    acb->ret = 0;
-    acb->finished = NULL;
+    acb->coroutine = qemu_coroutine_self();

    ret = glfs_fsync_async(s->fd, &gluster_finish_aiocb, acb);
    if (ret < 0) {
+        ret = -errno;
        goto out;
    }
-    return &acb->common;
+
+    qemu_coroutine_yield();
+    ret = acb->ret;

 out:
-    qemu_aio_release(acb);
-    return NULL;
+    g_slice_free(GlusterAIOCB, acb);
+    return ret;
 }

 #ifdef CONFIG_GLUSTERFS_DISCARD
-static BlockDriverAIOCB *qemu_gluster_aio_discard(BlockDriverState *bs,
-        int64_t sector_num, int nb_sectors, BlockDriverCompletionFunc *cb,
-        void *opaque)
+static coroutine_fn int qemu_gluster_co_discard(BlockDriverState *bs,
+        int64_t sector_num, int nb_sectors)
 {
    int ret;
-    GlusterAIOCB *acb;
+    GlusterAIOCB *acb = g_slice_new(GlusterAIOCB);
    BDRVGlusterState *s = bs->opaque;
-    size_t size;
-    off_t offset;
+    size_t size = nb_sectors * BDRV_SECTOR_SIZE;
+    off_t offset = sector_num * BDRV_SECTOR_SIZE;

-    offset = sector_num * BDRV_SECTOR_SIZE;
-    size = nb_sectors * BDRV_SECTOR_SIZE;
-
-    acb = qemu_aio_get(&gluster_aiocb_info, bs, cb, opaque);
    acb->size = 0;
    acb->ret = 0;
-    acb->finished = NULL;
+    acb->coroutine = qemu_coroutine_self();

    ret = glfs_discard_async(s->fd, offset, size, &gluster_finish_aiocb, acb);
    if (ret < 0) {
+        ret = -errno;
        goto out;
    }
-    return &acb->common;
+
+    qemu_coroutine_yield();
+    ret = acb->ret;

 out:
-    qemu_aio_release(acb);
-    return NULL;
+    g_slice_free(GlusterAIOCB, acb);
+    return ret;
 }
 #endif

@@ -581,10 +675,6 @@ static void qemu_gluster_close(BlockDriverState *bs)
 {
    BDRVGlusterState *s = bs->opaque;

-    close(s->fds[GLUSTER_FD_READ]);
-    close(s->fds[GLUSTER_FD_WRITE]);
-    qemu_aio_set_fd_handler(s->fds[GLUSTER_FD_READ], NULL, NULL, NULL);
-
    if (s->fd) {
        glfs_close(s->fd);
        s->fd = NULL;
@@ -604,6 +694,11 @@ static QEMUOptionParameter qemu_gluster_create_options[] = {
        .type = OPT_SIZE,
        .help = "Virtual disk size"
    },
+    {
+        .name = BLOCK_OPT_PREALLOC,
+        .type = OPT_STRING,
+        .help = "Preallocation mode (allowed values: off, full)"
+    },
    { NULL }
 };

@@ -613,17 +708,23 @@ static BlockDriver bdrv_gluster = {
    .instance_size                = sizeof(BDRVGlusterState),
    .bdrv_needs_filename          = true,
    .bdrv_file_open               = qemu_gluster_open,
+    .bdrv_reopen_prepare          = qemu_gluster_reopen_prepare,
+    .bdrv_reopen_commit           = qemu_gluster_reopen_commit,
+    .bdrv_reopen_abort            = qemu_gluster_reopen_abort,
    .bdrv_close                   = qemu_gluster_close,
    .bdrv_create                  = qemu_gluster_create,
    .bdrv_getlength               = qemu_gluster_getlength,
    .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size,
    .bdrv_truncate                = qemu_gluster_truncate,
-    .bdrv_aio_readv               = qemu_gluster_aio_readv,
-    .bdrv_aio_writev              = qemu_gluster_aio_writev,
-    .bdrv_aio_flush               = qemu_gluster_aio_flush,
+    .bdrv_co_readv                = qemu_gluster_co_readv,
+    .bdrv_co_writev               = qemu_gluster_co_writev,
+    .bdrv_co_flush_to_disk        = qemu_gluster_co_flush_to_disk,
    .bdrv_has_zero_init           = qemu_gluster_has_zero_init,
 #ifdef CONFIG_GLUSTERFS_DISCARD
-    .bdrv_aio_discard             = qemu_gluster_aio_discard,
+    .bdrv_co_discard              = qemu_gluster_co_discard,
+#endif
+#ifdef CONFIG_GLUSTERFS_ZEROFILL
+    .bdrv_co_write_zeroes         = qemu_gluster_co_write_zeroes,
 #endif
    .create_options               = qemu_gluster_create_options,
 };
@@ -634,17 +735,23 @@ static BlockDriver bdrv_gluster_tcp = {
    .instance_size                = sizeof(BDRVGlusterState),
    .bdrv_needs_filename          = true,
    .bdrv_file_open               = qemu_gluster_open,
+    .bdrv_reopen_prepare          = qemu_gluster_reopen_prepare,
+    .bdrv_reopen_commit           = qemu_gluster_reopen_commit,
+    .bdrv_reopen_abort            = qemu_gluster_reopen_abort,
    .bdrv_close                   = qemu_gluster_close,
    .bdrv_create                  = qemu_gluster_create,
    .bdrv_getlength               = qemu_gluster_getlength,
    .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size,
    .bdrv_truncate                = qemu_gluster_truncate,
-    .bdrv_aio_readv               = qemu_gluster_aio_readv,
-    .bdrv_aio_writev              = qemu_gluster_aio_writev,
-    .bdrv_aio_flush               = qemu_gluster_aio_flush,
+    .bdrv_co_readv                = qemu_gluster_co_readv,
+    .bdrv_co_writev               = qemu_gluster_co_writev,
+    .bdrv_co_flush_to_disk        = qemu_gluster_co_flush_to_disk,
    .bdrv_has_zero_init           = qemu_gluster_has_zero_init,
 #ifdef CONFIG_GLUSTERFS_DISCARD
-    .bdrv_aio_discard             = qemu_gluster_aio_discard,
+    .bdrv_co_discard              = qemu_gluster_co_discard,
+#endif
+#ifdef CONFIG_GLUSTERFS_ZEROFILL
+    .bdrv_co_write_zeroes         = qemu_gluster_co_write_zeroes,
 #endif
    .create_options               = qemu_gluster_create_options,
 };
@@ -655,17 +762,23 @@ static BlockDriver bdrv_gluster_unix = {
    .instance_size                = sizeof(BDRVGlusterState),
    .bdrv_needs_filename          = true,
    .bdrv_file_open               = qemu_gluster_open,
+    .bdrv_reopen_prepare          = qemu_gluster_reopen_prepare,
+    .bdrv_reopen_commit           = qemu_gluster_reopen_commit,
+    .bdrv_reopen_abort            = qemu_gluster_reopen_abort,
    .bdrv_close                   = qemu_gluster_close,
    .bdrv_create                  = qemu_gluster_create,
    .bdrv_getlength               = qemu_gluster_getlength,
    .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size,
    .bdrv_truncate                = qemu_gluster_truncate,
-    .bdrv_aio_readv               = qemu_gluster_aio_readv,
-    .bdrv_aio_writev              = qemu_gluster_aio_writev,
-    .bdrv_aio_flush               = qemu_gluster_aio_flush,
+    .bdrv_co_readv                = qemu_gluster_co_readv,
+    .bdrv_co_writev               = qemu_gluster_co_writev,
+    .bdrv_co_flush_to_disk        = qemu_gluster_co_flush_to_disk,
    .bdrv_has_zero_init           = qemu_gluster_has_zero_init,
 #ifdef CONFIG_GLUSTERFS_DISCARD
-    .bdrv_aio_discard             = qemu_gluster_aio_discard,
+    .bdrv_co_discard              = qemu_gluster_co_discard,
+#endif
+#ifdef CONFIG_GLUSTERFS_ZEROFILL
+    .bdrv_co_write_zeroes         = qemu_gluster_co_write_zeroes,
 #endif
    .create_options               = qemu_gluster_create_options,
 };
@@ -676,17 +789,23 @@ static BlockDriver bdrv_gluster_rdma = {
    .instance_size                = sizeof(BDRVGlusterState),
    .bdrv_needs_filename          = true,
    .bdrv_file_open               = qemu_gluster_open,
+    .bdrv_reopen_prepare          = qemu_gluster_reopen_prepare,
+    .bdrv_reopen_commit           = qemu_gluster_reopen_commit,
+    .bdrv_reopen_abort            = qemu_gluster_reopen_abort,
    .bdrv_close                   = qemu_gluster_close,
    .bdrv_create                  = qemu_gluster_create,
    .bdrv_getlength               = qemu_gluster_getlength,
    .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size,
    .bdrv_truncate                = qemu_gluster_truncate,
-    .bdrv_aio_readv               = qemu_gluster_aio_readv,
-    .bdrv_aio_writev              = qemu_gluster_aio_writev,
-    .bdrv_aio_flush               = qemu_gluster_aio_flush,
+    .bdrv_co_readv                = qemu_gluster_co_readv,
+    .bdrv_co_writev               = qemu_gluster_co_writev,
+    .bdrv_co_flush_to_disk        = qemu_gluster_co_flush_to_disk,
    .bdrv_has_zero_init           = qemu_gluster_has_zero_init,
 #ifdef CONFIG_GLUSTERFS_DISCARD
-    .bdrv_aio_discard             = qemu_gluster_aio_discard,
+    .bdrv_co_discard              = qemu_gluster_co_discard,
+#endif
+#ifdef CONFIG_GLUSTERFS_ZEROFILL
+    .bdrv_co_write_zeroes         = qemu_gluster_co_write_zeroes,
 #endif
    .create_options               = qemu_gluster_create_options,
 };
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -145,12 +145,13 @@ iscsi_co_generic_cb(struct iscsi_context *iscsi, int status,

    if (iTask->retries-- > 0 && status == SCSI_STATUS_CHECK_CONDITION
        && task->sense.key == SCSI_SENSE_UNIT_ATTENTION) {
+        error_report("iSCSI CheckCondition: %s", iscsi_get_error(iscsi));
        iTask->do_retry = 1;
        goto out;
    }

    if (status != SCSI_STATUS_GOOD) {
-        error_report("iSCSI: Failure. %s", iscsi_get_error(iscsi));
+        error_report("iSCSI Failure: %s", iscsi_get_error(iscsi));
    }

 out:
@@ -308,7 +309,7 @@ retry:
                                    iscsi_co_generic_cb, &iTask);
    if (iTask.task == NULL) {
        g_free(buf);
-        return -EIO;
+        return -ENOMEM;
    }
 #if defined(LIBISCSI_FEATURE_IOVECTOR)
    scsi_task_set_iov_out(iTask.task, (struct scsi_iovec *) iov->iov,
@@ -325,6 +326,7 @@ retry:
    }

    if (iTask.do_retry) {
+        iTask.complete = 0;
        goto retry;
    }

@@ -376,7 +378,7 @@ retry:
        break;
    }
    if (iTask.task == NULL) {
-        return -EIO;
+        return -ENOMEM;
    }
 #if defined(LIBISCSI_FEATURE_IOVECTOR)
    scsi_task_set_iov_in(iTask.task, (struct scsi_iovec *) iov->iov, iov->niov);
@@ -399,6 +401,7 @@ retry:
    }

    if (iTask.do_retry) {
+        iTask.complete = 0;
        goto retry;
    }

@@ -414,12 +417,16 @@ static int coroutine_fn iscsi_co_flush(BlockDriverState *bs)
    IscsiLun *iscsilun = bs->opaque;
    struct IscsiTask iTask;

+    if (bs->sg) {
+        return 0;
+    }
+
    iscsi_co_init_iscsitask(iscsilun, &iTask);

 retry:
    if (iscsi_synchronizecache10_task(iscsilun->iscsi, iscsilun->lun, 0, 0, 0,
                                      0, iscsi_co_generic_cb, &iTask) == NULL) {
-        return -EIO;
+        return -ENOMEM;
    }

    while (!iTask.complete) {
@@ -433,6 +440,7 @@ retry:
    }

    if (iTask.do_retry) {
+        iTask.complete = 0;
        goto retry;
    }

@@ -669,7 +677,7 @@ retry:
                                  sector_qemu2lun(sector_num, iscsilun),
                                  8 + 16, iscsi_co_generic_cb,
                                  &iTask) == NULL) {
-        ret = -EIO;
+        ret = -ENOMEM;
        goto out;
    }

@@ -683,6 +691,7 @@ retry:
            scsi_free_scsi_task(iTask.task);
            iTask.task = NULL;
        }
+        iTask.complete = 0;
        goto retry;
    }

@@ -753,7 +762,7 @@ coroutine_fn iscsi_co_discard(BlockDriverState *bs, int64_t sector_num,
 retry:
    if (iscsi_unmap_task(iscsilun->iscsi, iscsilun->lun, 0, 0, &list, 1,
                     iscsi_co_generic_cb, &iTask) == NULL) {
-        return -EIO;
+        return -ENOMEM;
    }

    while (!iTask.complete) {
@@ -767,6 +776,7 @@ retry:
    }

    if (iTask.do_retry) {
+        iTask.complete = 0;
        goto retry;
    }

@@ -822,7 +832,7 @@ retry:
                               iscsilun->zeroblock, iscsilun->block_size,
                               nb_blocks, 0, !!(flags & BDRV_REQ_MAY_UNMAP),
                               0, 0, iscsi_co_generic_cb, &iTask) == NULL) {
-        return -EIO;
+        return -ENOMEM;
    }

    while (!iTask.complete) {
@@ -830,24 +840,27 @@ retry:
        qemu_coroutine_yield();
    }

+    if (iTask.status == SCSI_STATUS_CHECK_CONDITION &&
+        iTask.task->sense.key == SCSI_SENSE_ILLEGAL_REQUEST &&
+        (iTask.task->sense.ascq == SCSI_SENSE_ASCQ_INVALID_OPERATION_CODE ||
+         iTask.task->sense.ascq == SCSI_SENSE_ASCQ_INVALID_FIELD_IN_CDB)) {
+        /* WRITE SAME is not supported by the target */
+        iscsilun->has_write_same = false;
+        scsi_free_scsi_task(iTask.task);
+        return -ENOTSUP;
+    }
+
    if (iTask.task != NULL) {
        scsi_free_scsi_task(iTask.task);
        iTask.task = NULL;
    }

    if (iTask.do_retry) {
+        iTask.complete = 0;
        goto retry;
    }

    if (iTask.status != SCSI_STATUS_GOOD) {
-        if (iTask.status == SCSI_STATUS_CHECK_CONDITION &&
-            iTask.task->sense.key == SCSI_SENSE_ILLEGAL_REQUEST &&
-            iTask.task->sense.ascq == SCSI_SENSE_ASCQ_INVALID_OPERATION_CODE) {
-            /* WRITE SAME is not supported by the target */
-            iscsilun->has_write_same = false;
-            return -ENOTSUP;
-        }
-
        return -EIO;
    }

@@ -856,7 +869,8 @@ retry:

 #endif /* SCSI_SENSE_ASCQ_CAPACITY_DATA_HAS_CHANGED */

-static int parse_chap(struct iscsi_context *iscsi, const char *target)
+static void parse_chap(struct iscsi_context *iscsi, const char *target,
+                       Error **errp)
 {
    QemuOptsList *list;
    QemuOpts *opts;
@@ -865,37 +879,35 @@ static int parse_chap(struct iscsi_context *iscsi, const char *target)

    list = qemu_find_opts("iscsi");
    if (!list) {
-        return 0;
+        return;
    }

    opts = qemu_opts_find(list, target);
    if (opts == NULL) {
        opts = QTAILQ_FIRST(&list->head);
        if (!opts) {
-            return 0;
+            return;
        }
    }

    user = qemu_opt_get(opts, "user");
    if (!user) {
-        return 0;
+        return;
    }

    password = qemu_opt_get(opts, "password");
    if (!password) {
-        error_report("CHAP username specified but no password was given");
-        return -1;
+        error_setg(errp, "CHAP username specified but no password was given");
+        return;
    }

    if (iscsi_set_initiator_username_pwd(iscsi, user, password)) {
-        error_report("Failed to set initiator username and password");
-        return -1;
+        error_setg(errp, "Failed to set initiator username and password");
    }
-
-    return 0;
 }

-static void parse_header_digest(struct iscsi_context *iscsi, const char *target)
+static void parse_header_digest(struct iscsi_context *iscsi, const char *target,
+                                Error **errp)
 {
    QemuOptsList *list;
    QemuOpts *opts;
@@ -928,7 +940,7 @@ static void parse_header_digest(struct iscsi_context *iscsi, const char *target)
    } else if (!strcmp(digest, "NONE-CRC32C")) {
        iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE_CRC32C);
    } else {
-        error_report("Invalid header-digest setting : %s", digest);
+        error_setg(errp, "Invalid header-digest setting : %s", digest);
    }
 }

@@ -986,12 +998,11 @@ static void iscsi_nop_timed_event(void *opaque)
 }
 #endif

-static int iscsi_readcapacity_sync(IscsiLun *iscsilun)
+static void iscsi_readcapacity_sync(IscsiLun *iscsilun, Error **errp)
 {
    struct scsi_task *task = NULL;
    struct scsi_readcapacity10 *rc10 = NULL;
    struct scsi_readcapacity16 *rc16 = NULL;
-    int ret = 0;
    int retries = ISCSI_CMD_RETRIES; 

    do {
@@ -1006,8 +1017,7 @@ static int iscsi_readcapacity_sync(IscsiLun *iscsilun)
            if (task != NULL && task->status == SCSI_STATUS_GOOD) {
                rc16 = scsi_datain_unmarshall(task);
                if (rc16 == NULL) {
-                    error_report("iSCSI: Failed to unmarshall readcapacity16 data.");
-                    ret = -EINVAL;
+                    error_setg(errp, "iSCSI: Failed to unmarshall readcapacity16 data.");
                } else {
                    iscsilun->block_size = rc16->block_length;
                    iscsilun->num_blocks = rc16->returned_lba + 1;
@@ -1021,8 +1031,7 @@ static int iscsi_readcapacity_sync(IscsiLun *iscsilun)
            if (task != NULL && task->status == SCSI_STATUS_GOOD) {
                rc10 = scsi_datain_unmarshall(task);
                if (rc10 == NULL) {
-                    error_report("iSCSI: Failed to unmarshall readcapacity10 data.");
-                    ret = -EINVAL;
+                    error_setg(errp, "iSCSI: Failed to unmarshall readcapacity10 data.");
                } else {
                    iscsilun->block_size = rc10->block_size;
                    if (rc10->lba == 0) {
@@ -1035,20 +1044,18 @@ static int iscsi_readcapacity_sync(IscsiLun *iscsilun)
            }
            break;
        default:
-            return 0;
+            return;
        }
    } while (task != NULL && task->status == SCSI_STATUS_CHECK_CONDITION
             && task->sense.key == SCSI_SENSE_UNIT_ATTENTION
             && retries-- > 0);

    if (task == NULL || task->status != SCSI_STATUS_GOOD) {
-        error_report("iSCSI: failed to send readcapacity10 command.");
-        ret = -EINVAL;
+        error_setg(errp, "iSCSI: failed to send readcapacity10 command.");
    }
    if (task) {
        scsi_free_scsi_task(task);
    }
-    return ret;
 }

 /* TODO Convert to fine grained options */
@@ -1065,40 +1072,52 @@ static QemuOptsList runtime_opts = {
    },
 };

-static struct scsi_task *iscsi_do_inquiry(struct iscsi_context *iscsi,
-                                          int lun, int evpd, int pc) {
-        int full_size;
-        struct scsi_task *task = NULL;
-        task = iscsi_inquiry_sync(iscsi, lun, evpd, pc, 64);
+static struct scsi_task *iscsi_do_inquiry(struct iscsi_context *iscsi, int lun,
+                                          int evpd, int pc, void **inq, Error **errp)
+{
+    int full_size;
+    struct scsi_task *task = NULL;
+    task = iscsi_inquiry_sync(iscsi, lun, evpd, pc, 64);
+    if (task == NULL || task->status != SCSI_STATUS_GOOD) {
+        goto fail;
+    }
+    full_size = scsi_datain_getfullsize(task);
+    if (full_size > task->datain.size) {
+        scsi_free_scsi_task(task);
+
+        /* we need more data for the full list */
+        task = iscsi_inquiry_sync(iscsi, lun, evpd, pc, full_size);
        if (task == NULL || task->status != SCSI_STATUS_GOOD) {
            goto fail;
        }
-        full_size = scsi_datain_getfullsize(task);
-        if (full_size > task->datain.size) {
-            scsi_free_scsi_task(task);
+    }

-            /* we need more data for the full list */
-            task = iscsi_inquiry_sync(iscsi, lun, evpd, pc, full_size);
-            if (task == NULL || task->status != SCSI_STATUS_GOOD) {
-                goto fail;
-            }
-        }
+    *inq = scsi_datain_unmarshall(task);
+    if (*inq == NULL) {
+        error_setg(errp, "iSCSI: failed to unmarshall inquiry datain blob");
+        goto fail;
+    }

-        return task;
+    return task;

 fail:
-        error_report("iSCSI: Inquiry command failed : %s",
-                     iscsi_get_error(iscsi));
-        if (task) {
-            scsi_free_scsi_task(task);
-            return NULL;
-        }
-        return NULL;
+    if (!error_is_set(errp)) {
+        error_setg(errp, "iSCSI: Inquiry command failed : %s",
+                   iscsi_get_error(iscsi));
+    }
+    if (task != NULL) {
+        scsi_free_scsi_task(task);
+    }
+    return NULL;
 }

 /*
 * We support iscsi url's on the form
 * iscsi://[<username>%<password>@]<host>[:<port>]/<targetname>/<lun>
+ *
+ * Note: flags are currently not used by iscsi_open.  If this function
+ * is changed such that flags are used, please examine iscsi_reopen_prepare()
+ * to see if needs to be changed as well.
 */
 static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
                      Error **errp)
@@ -1108,34 +1127,33 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
    struct iscsi_url *iscsi_url = NULL;
    struct scsi_task *task = NULL;
    struct scsi_inquiry_standard *inq = NULL;
+    struct scsi_inquiry_supported_pages *inq_vpd;
    char *initiator_name = NULL;
    QemuOpts *opts;
    Error *local_err = NULL;
    const char *filename;
-    int ret;
+    int i, ret;

    if ((BDRV_SECTOR_SIZE % 512) != 0) {
-        error_report("iSCSI: Invalid BDRV_SECTOR_SIZE. "
-                     "BDRV_SECTOR_SIZE(%lld) is not a multiple "
-                     "of 512", BDRV_SECTOR_SIZE);
+        error_setg(errp, "iSCSI: Invalid BDRV_SECTOR_SIZE. "
+                   "BDRV_SECTOR_SIZE(%lld) is not a multiple "
+                   "of 512", BDRV_SECTOR_SIZE);
        return -EINVAL;
    }

    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
    qemu_opts_absorb_qdict(opts, options, &local_err);
-    if (error_is_set(&local_err)) {
-        qerror_report_err(local_err);
-        error_free(local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
        ret = -EINVAL;
        goto out;
    }

    filename = qemu_opt_get(opts, "filename");

-
    iscsi_url = iscsi_parse_full_url(iscsi, filename);
    if (iscsi_url == NULL) {
-        error_report("Failed to parse URL : %s", filename);
+        error_setg(errp, "Failed to parse URL : %s", filename);
        ret = -EINVAL;
        goto out;
    }
@@ -1146,13 +1164,13 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,

    iscsi = iscsi_create_context(initiator_name);
    if (iscsi == NULL) {
-        error_report("iSCSI: Failed to create iSCSI context.");
+        error_setg(errp, "iSCSI: Failed to create iSCSI context.");
        ret = -ENOMEM;
        goto out;
    }

    if (iscsi_set_targetname(iscsi, iscsi_url->target)) {
-        error_report("iSCSI: Failed to set target name.");
+        error_setg(errp, "iSCSI: Failed to set target name.");
        ret = -EINVAL;
        goto out;
    }
@@ -1161,21 +1179,22 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
        ret = iscsi_set_initiator_username_pwd(iscsi, iscsi_url->user,
                                              iscsi_url->passwd);
        if (ret != 0) {
-            error_report("Failed to set initiator username and password");
+            error_setg(errp, "Failed to set initiator username and password");
            ret = -EINVAL;
            goto out;
        }
    }

    /* check if we got CHAP username/password via the options */
-    if (parse_chap(iscsi, iscsi_url->target) != 0) {
-        error_report("iSCSI: Failed to set CHAP user/password");
+    parse_chap(iscsi, iscsi_url->target, &local_err);
+    if (local_err != NULL) {
+        error_propagate(errp, local_err);
        ret = -EINVAL;
        goto out;
    }

    if (iscsi_set_session_type(iscsi, ISCSI_SESSION_NORMAL) != 0) {
-        error_report("iSCSI: Failed to set session type to normal.");
+        error_setg(errp, "iSCSI: Failed to set session type to normal.");
        ret = -EINVAL;
        goto out;
    }
@@ -1183,10 +1202,15 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
    iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE_CRC32C);

    /* check if we got HEADER_DIGEST via the options */
-    parse_header_digest(iscsi, iscsi_url->target);
+    parse_header_digest(iscsi, iscsi_url->target, &local_err);
+    if (local_err != NULL) {
+        error_propagate(errp, local_err);
+        ret = -EINVAL;
+        goto out;
+    }

    if (iscsi_full_connect_sync(iscsi, iscsi_url->portal, iscsi_url->lun) != 0) {
-        error_report("iSCSI: Failed to connect to LUN : %s",
+        error_setg(errp, "iSCSI: Failed to connect to LUN : %s",
            iscsi_get_error(iscsi));
        ret = -EINVAL;
        goto out;
@@ -1194,95 +1218,77 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,

    iscsilun->iscsi = iscsi;
    iscsilun->lun   = iscsi_url->lun;
-
-    task = iscsi_inquiry_sync(iscsi, iscsilun->lun, 0, 0, 36);
-
-    if (task == NULL || task->status != SCSI_STATUS_GOOD) {
-        error_report("iSCSI: failed to send inquiry command.");
-        ret = -EINVAL;
-        goto out;
-    }
-
-    inq = scsi_datain_unmarshall(task);
-    if (inq == NULL) {
-        error_report("iSCSI: Failed to unmarshall inquiry data.");
-        ret = -EINVAL;
-        goto out;
-    }
-
-    iscsilun->type = inq->periperal_device_type;
    iscsilun->has_write_same = true;

-    if ((ret = iscsi_readcapacity_sync(iscsilun)) != 0) {
+    task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 0, 0,
+                            (void **) &inq, errp);
+    if (task == NULL) {
+        ret = -EINVAL;
+        goto out;
+    }
+    iscsilun->type = inq->periperal_device_type;
+    scsi_free_scsi_task(task);
+    task = NULL;
+
+    iscsi_readcapacity_sync(iscsilun, &local_err);
+    if (local_err != NULL) {
+        error_propagate(errp, local_err);
+        ret = -EINVAL;
        goto out;
    }
    bs->total_sectors = sector_lun2qemu(iscsilun->num_blocks, iscsilun);
+    bs->request_alignment = iscsilun->block_size;

-    /* Medium changer or tape. We dont have any emulation for this so this must
-     * be sg ioctl compatible. We force it to be sg, otherwise qemu will try
-     * to read from the device to guess the image format.
+    /* We don't have any emulation for devices other than disks and CD-ROMs, so
+     * this must be sg ioctl compatible. We force it to be sg, otherwise qemu
+     * will try to read from the device to guess the image format.
     */
-    if (iscsilun->type == TYPE_MEDIUM_CHANGER ||
-        iscsilun->type == TYPE_TAPE) {
+    if (iscsilun->type != TYPE_DISK && iscsilun->type != TYPE_ROM) {
        bs->sg = 1;
    }

-    if (iscsilun->lbpme) {
+    task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1,
+                            SCSI_INQUIRY_PAGECODE_SUPPORTED_VPD_PAGES,
+                            (void **) &inq_vpd, errp);
+    if (task == NULL) {
+        ret = -EINVAL;
+        goto out;
+    }
+    for (i = 0; i < inq_vpd->num_pages; i++) {
+        struct scsi_task *inq_task;
        struct scsi_inquiry_logical_block_provisioning *inq_lbp;
-        task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1,
-                                SCSI_INQUIRY_PAGECODE_LOGICAL_BLOCK_PROVISIONING);
-        if (task == NULL) {
-            ret = -EINVAL;
-            goto out;
-        }
-        inq_lbp = scsi_datain_unmarshall(task);
-        if (inq_lbp == NULL) {
-            error_report("iSCSI: failed to unmarshall inquiry datain blob");
-            ret = -EINVAL;
-            goto out;
-        }
-        memcpy(&iscsilun->lbp, inq_lbp,
-               sizeof(struct scsi_inquiry_logical_block_provisioning));
-        scsi_free_scsi_task(task);
-        task = NULL;
-    }
-
-    if (iscsilun->lbp.lbpu || iscsilun->lbp.lbpws) {
        struct scsi_inquiry_block_limits *inq_bl;
-        task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1,
-                                SCSI_INQUIRY_PAGECODE_BLOCK_LIMITS);
-        if (task == NULL) {
-            ret = -EINVAL;
-            goto out;
+        switch (inq_vpd->pages[i]) {
+        case SCSI_INQUIRY_PAGECODE_LOGICAL_BLOCK_PROVISIONING:
+            inq_task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1,
+                                        SCSI_INQUIRY_PAGECODE_LOGICAL_BLOCK_PROVISIONING,
+                                        (void **) &inq_lbp, errp);
+            if (inq_task == NULL) {
+                ret = -EINVAL;
+                goto out;
+            }
+            memcpy(&iscsilun->lbp, inq_lbp,
+                   sizeof(struct scsi_inquiry_logical_block_provisioning));
+            scsi_free_scsi_task(inq_task);
+            break;
+        case SCSI_INQUIRY_PAGECODE_BLOCK_LIMITS:
+            inq_task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1,
+                                    SCSI_INQUIRY_PAGECODE_BLOCK_LIMITS,
+                                    (void **) &inq_bl, errp);
+            if (inq_task == NULL) {
+                ret = -EINVAL;
+                goto out;
+            }
+            memcpy(&iscsilun->bl, inq_bl,
+                   sizeof(struct scsi_inquiry_block_limits));
+            scsi_free_scsi_task(inq_task);
+            break;
+        default:
+            break;
        }
-        inq_bl = scsi_datain_unmarshall(task);
-        if (inq_bl == NULL) {
-            error_report("iSCSI: failed to unmarshall inquiry datain blob");
-            ret = -EINVAL;
-            goto out;
-        }
-        memcpy(&iscsilun->bl, inq_bl,
-               sizeof(struct scsi_inquiry_block_limits));
-        scsi_free_scsi_task(task);
-        task = NULL;
-
-        if (iscsilun->bl.max_unmap < 0xffffffff) {
-            bs->bl.max_discard = sector_lun2qemu(iscsilun->bl.max_unmap,
-                                                 iscsilun);
-        }
-        bs->bl.discard_alignment = sector_lun2qemu(iscsilun->bl.opt_unmap_gran,
-                                                   iscsilun);
-
-        if (iscsilun->bl.max_ws_len < 0xffffffff) {
-            bs->bl.max_write_zeroes = sector_lun2qemu(iscsilun->bl.max_ws_len,
-                                                      iscsilun);
-        }
-        bs->bl.write_zeroes_alignment = sector_lun2qemu(iscsilun->bl.opt_unmap_gran,
-                                                        iscsilun);
-
-        bs->bl.opt_transfer_length = sector_lun2qemu(iscsilun->bl.opt_xfer_len,
-                                                     iscsilun);
    }
+    scsi_free_scsi_task(task);
+    task = NULL;

 #if defined(LIBISCSI_FEATURE_NOP_COUNTER)
    /* Set up a timer for sending out iSCSI NOPs */
@@ -1326,17 +1332,57 @@ static void iscsi_close(BlockDriverState *bs)
    memset(iscsilun, 0, sizeof(IscsiLun));
 }

+static int iscsi_refresh_limits(BlockDriverState *bs)
+{
+    IscsiLun *iscsilun = bs->opaque;
+
+    /* We don't actually refresh here, but just return data queried in
+     * iscsi_open(): iscsi targets don't change their limits. */
+    if (iscsilun->lbp.lbpu) {
+        if (iscsilun->bl.max_unmap < 0xffffffff) {
+            bs->bl.max_discard = sector_lun2qemu(iscsilun->bl.max_unmap,
+                                                 iscsilun);
+        }
+        bs->bl.discard_alignment = sector_lun2qemu(iscsilun->bl.opt_unmap_gran,
+                                                   iscsilun);
+    }
+
+    if (iscsilun->bl.max_ws_len < 0xffffffff) {
+        bs->bl.max_write_zeroes = sector_lun2qemu(iscsilun->bl.max_ws_len,
+                                                  iscsilun);
+    }
+    if (iscsilun->lbp.lbpws) {
+        bs->bl.write_zeroes_alignment = sector_lun2qemu(iscsilun->bl.opt_unmap_gran,
+                                                        iscsilun);
+    }
+    bs->bl.opt_transfer_length = sector_lun2qemu(iscsilun->bl.opt_xfer_len,
+                                                 iscsilun);
+    return 0;
+}
+
+/* Since iscsi_open() ignores bdrv_flags, there is nothing to do here in
+ * prepare.  Note that this will not re-establish a connection with an iSCSI
+ * target - it is effectively a NOP.  */
+static int iscsi_reopen_prepare(BDRVReopenState *state,
+                                BlockReopenQueue *queue, Error **errp)
+{
+    /* NOP */
+    return 0;
+}
+
 static int iscsi_truncate(BlockDriverState *bs, int64_t offset)
 {
    IscsiLun *iscsilun = bs->opaque;
-    int ret = 0;
+    Error *local_err = NULL;

    if (iscsilun->type != TYPE_DISK) {
        return -ENOTSUP;
    }

-    if ((ret = iscsi_readcapacity_sync(iscsilun)) != 0) {
-        return ret;
+    iscsi_readcapacity_sync(iscsilun, &local_err);
+    if (local_err != NULL) {
+        error_free(local_err);
+        return -EIO;
    }

    if (offset > iscsi_getlength(bs)) {
@@ -1434,10 +1480,12 @@ static BlockDriver bdrv_iscsi = {
    .bdrv_close      = iscsi_close,
    .bdrv_create     = iscsi_create,
    .create_options  = iscsi_create_options,
+    .bdrv_reopen_prepare  = iscsi_reopen_prepare,

    .bdrv_getlength  = iscsi_getlength,
    .bdrv_get_info   = iscsi_get_info,
    .bdrv_truncate   = iscsi_truncate,
+    .bdrv_refresh_limits = iscsi_refresh_limits,

 #if defined(LIBISCSI_FEATURE_IOVECTOR)
    .bdrv_co_get_block_status = iscsi_co_get_block_status,
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -96,8 +96,16 @@ static void mirror_iteration_done(MirrorOp *op, int ret)
        bitmap_set(s->cow_bitmap, chunk_num, nb_chunks);
    }

+    qemu_iovec_destroy(&op->qiov);
    g_slice_free(MirrorOp, op);
-    qemu_coroutine_enter(s->common.co, NULL);
+
+    /* Enter coroutine when it is not sleeping.  The coroutine sleeps to
+     * rate-limit itself.  The coroutine will eventually resume since there is
+     * a sleep timeout so don't wake it early.
+     */
+    if (s->common.busy) {
+        qemu_coroutine_enter(s->common.co, NULL);
+    }
 }

 static void mirror_write_complete(void *opaque, int ret)
@@ -138,11 +146,12 @@ static void mirror_read_complete(void *opaque, int ret)
                    mirror_write_complete, op);
 }

-static void coroutine_fn mirror_iteration(MirrorBlockJob *s)
+static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
 {
    BlockDriverState *source = s->common.bs;
    int nb_sectors, sectors_per_chunk, nb_chunks;
    int64_t end, sector_num, next_chunk, next_sector, hbitmap_next_sector;
+    uint64_t delay_ns;
    MirrorOp *op;

    s->sector_num = hbitmap_iter_next(&s->hbi);
@@ -230,7 +239,12 @@ static void coroutine_fn mirror_iteration(MirrorBlockJob *s)
        nb_chunks += added_chunks;
        next_sector += added_sectors;
        next_chunk += added_chunks;
-    } while (next_sector < end);
+        if (!s->synced && s->common.speed) {
+            delay_ns = ratelimit_calculate_delay(&s->limit, added_sectors);
+        } else {
+            delay_ns = 0;
+        }
+    } while (delay_ns == 0 && next_sector < end);

    /* Allocate a MirrorOp that is used as an AIO callback.  */
    op = g_slice_new(MirrorOp);
@@ -245,9 +259,11 @@ static void coroutine_fn mirror_iteration(MirrorBlockJob *s)
    next_sector = sector_num;
    while (nb_chunks-- > 0) {
        MirrorBuffer *buf = QSIMPLEQ_FIRST(&s->buf_free);
+        size_t remaining = (nb_sectors * BDRV_SECTOR_SIZE) - op->qiov.size;
+
        QSIMPLEQ_REMOVE_HEAD(&s->buf_free, next);
        s->buf_free_count--;
-        qemu_iovec_add(&op->qiov, buf, s->granularity);
+        qemu_iovec_add(&op->qiov, buf, MIN(s->granularity, remaining));

        /* Advance the HBitmapIter in parallel, so that we do not examine
         * the same sector twice.
@@ -267,6 +283,7 @@ static void coroutine_fn mirror_iteration(MirrorBlockJob *s)
    trace_mirror_one_iteration(s, sector_num, nb_sectors);
    bdrv_aio_readv(source, sector_num, &op->qiov, nb_sectors,
                   mirror_read_complete, op);
+    return delay_ns;
 }

 static void mirror_free_init(MirrorBlockJob *s)
@@ -309,9 +326,18 @@ static void coroutine_fn mirror_run(void *opaque)
    }

    s->common.len = bdrv_getlength(bs);
-    if (s->common.len <= 0) {
-        block_job_completed(&s->common, s->common.len);
-        return;
+    if (s->common.len < 0) {
+        ret = s->common.len;
+        goto immediate_exit;
+    } else if (s->common.len == 0) {
+        /* Report BLOCK_JOB_READY and wait for complete. */
+        block_job_ready(&s->common);
+        s->synced = true;
+        while (!block_job_is_cancelled(&s->common) && !s->should_complete) {
+            block_job_yield(&s->common);
+        }
+        s->common.cancelled = false;
+        goto immediate_exit;
    }

    length = (bdrv_getlength(bs) + s->granularity - 1) / s->granularity;
@@ -361,7 +387,7 @@ static void coroutine_fn mirror_run(void *opaque)
    bdrv_dirty_iter_init(bs, s->dirty_bitmap, &s->hbi);
    last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
    for (;;) {
-        uint64_t delay_ns;
+        uint64_t delay_ns = 0;
        int64_t cnt;
        bool should_complete;

@@ -385,8 +411,10 @@ static void coroutine_fn mirror_run(void *opaque)
                qemu_coroutine_yield();
                continue;
            } else if (cnt != 0) {
-                mirror_iteration(s);
-                continue;
+                delay_ns = mirror_iteration(s);
+                if (delay_ns == 0) {
+                    continue;
+                }
            }
        }

@@ -431,17 +459,10 @@ static void coroutine_fn mirror_run(void *opaque)
        }

        ret = 0;
-        trace_mirror_before_sleep(s, cnt, s->synced);
+        trace_mirror_before_sleep(s, cnt, s->synced, delay_ns);
        if (!s->synced) {
            /* Publish progress */
            s->common.offset = (end - cnt) * BDRV_SECTOR_SIZE;
-
-            if (s->common.speed) {
-                delay_ns = ratelimit_calculate_delay(&s->limit, sectors_per_chunk);
-            } else {
-                delay_ns = 0;
-            }
-
            block_job_sleep_ns(&s->common, QEMU_CLOCK_REALTIME, delay_ns);
            if (block_job_is_cancelled(&s->common)) {
                break;
@@ -519,9 +540,6 @@ static void mirror_complete(BlockJob *job, Error **errp)

    ret = bdrv_open_backing_file(s->target, NULL, &local_err);
    if (ret < 0) {
-        char backing_filename[PATH_MAX];
-        bdrv_get_full_backing_filename(s->target, backing_filename,
-                                       sizeof(backing_filename));
        error_propagate(errp, local_err);
        return;
    }
@@ -630,11 +648,56 @@ void commit_active_start(BlockDriverState *bs, BlockDriverState *base,
                         BlockDriverCompletionFunc *cb,
                         void *opaque, Error **errp)
 {
+    int64_t length, base_length;
+    int orig_base_flags;
+    int ret;
+    Error *local_err = NULL;
+
+    orig_base_flags = bdrv_get_flags(base);
+
    if (bdrv_reopen(base, bs->open_flags, errp)) {
        return;
    }
+
+    length = bdrv_getlength(bs);
+    if (length < 0) {
+        error_setg_errno(errp, -length,
+                         "Unable to determine length of %s", bs->filename);
+        goto error_restore_flags;
+    }
+
+    base_length = bdrv_getlength(base);
+    if (base_length < 0) {
+        error_setg_errno(errp, -base_length,
+                         "Unable to determine length of %s", base->filename);
+        goto error_restore_flags;
+    }
+
+    if (length > base_length) {
+        ret = bdrv_truncate(base, length);
+        if (ret < 0) {
+            error_setg_errno(errp, -ret,
+                            "Top image %s is larger than base image %s, and "
+                             "resize of base image failed",
+                             bs->filename, base->filename);
+            goto error_restore_flags;
+        }
+    }
+
    bdrv_ref(base);
    mirror_start_job(bs, base, speed, 0, 0,
-                     on_error, on_error, cb, opaque, errp,
+                     on_error, on_error, cb, opaque, &local_err,
                     &commit_active_job_driver, false, base);
+    if (error_is_set(&local_err)) {
+        error_propagate(errp, local_err);
+        goto error_restore_flags;
+    }
+
+    return;
+
+error_restore_flags:
+    /* ignore error and errp for bdrv_reopen, because we want to propagate
+     * the original error */
+    bdrv_reopen(base, orig_base_flags, NULL);
+    return;
 }
--- a/block/nbd-client.c
+++ b/block/nbd-client.c
@@ -43,6 +43,17 @@ static void nbd_recv_coroutines_enter_all(NbdClientSession *s)
    }
 }

+static void nbd_teardown_connection(NbdClientSession *client)
+{
+    /* finish any pending coroutines */
+    shutdown(client->sock, 2);
+    nbd_recv_coroutines_enter_all(client);
+
+    qemu_aio_set_fd_handler(client->sock, NULL, NULL, NULL);
+    closesocket(client->sock);
+    client->sock = -1;
+}
+
 static void nbd_reply_ready(void *opaque)
 {
    NbdClientSession *s = opaque;
@@ -78,7 +89,7 @@ static void nbd_reply_ready(void *opaque)
    }

 fail:
-    nbd_recv_coroutines_enter_all(s);
+    nbd_teardown_connection(s);
 }

 static void nbd_restart_write(void *opaque)
@@ -324,7 +335,7 @@ int nbd_client_session_co_discard(NbdClientSession *client, int64_t sector_num,

 }

-static void nbd_teardown_connection(NbdClientSession *client)
+void nbd_client_session_close(NbdClientSession *client)
 {
    struct nbd_request request = {
        .type = NBD_CMD_DISC,
@@ -332,22 +343,14 @@ static void nbd_teardown_connection(NbdClientSession *client)
        .len = 0
    };

-    nbd_send_request(client->sock, &request);
-
-    /* finish any pending coroutines */
-    shutdown(client->sock, 2);
-    nbd_recv_coroutines_enter_all(client);
-
-    qemu_aio_set_fd_handler(client->sock, NULL, NULL, NULL);
-    closesocket(client->sock);
-    client->sock = -1;
-}
-
-void nbd_client_session_close(NbdClientSession *client)
-{
    if (!client->bs) {
        return;
    }
+    if (client->sock == -1) {
+        return;
+    }
+
+    nbd_send_request(client->sock, &request);

    nbd_teardown_connection(client);
    client->bs = NULL;
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -188,31 +188,28 @@ out:
    g_free(file);
 }

-static int nbd_config(BDRVNBDState *s, QDict *options, char **export)
+static void nbd_config(BDRVNBDState *s, QDict *options, char **export,
+                       Error **errp)
 {
    Error *local_err = NULL;

-    if (qdict_haskey(options, "path")) {
-        if (qdict_haskey(options, "host")) {
-            qerror_report(ERROR_CLASS_GENERIC_ERROR, "path and host may not "
-                          "be used at the same time.");
-            return -EINVAL;
+    if (qdict_haskey(options, "path") == qdict_haskey(options, "host")) {
+        if (qdict_haskey(options, "path")) {
+            error_setg(errp, "path and host may not be used at the same time.");
+        } else {
+            error_setg(errp, "one of path and host must be specified.");
        }
-        s->client.is_unix = true;
-    } else if (qdict_haskey(options, "host")) {
-        s->client.is_unix = false;
-    } else {
-        return -EINVAL;
+        return;
    }

+    s->client.is_unix = qdict_haskey(options, "path");
    s->socket_opts = qemu_opts_create(&socket_optslist, NULL, 0,
                                      &error_abort);

    qemu_opts_absorb_qdict(s->socket_opts, options, &local_err);
-    if (error_is_set(&local_err)) {
-        qerror_report_err(local_err);
-        error_free(local_err);
-        return -EINVAL;
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
    }

    if (!qemu_opt_get(s->socket_opts, "port")) {
@@ -223,19 +220,17 @@ static int nbd_config(BDRVNBDState *s, QDict *options, char **export)
    if (*export) {
        qdict_del(options, "export");
    }
-
-    return 0;
 }

-static int nbd_establish_connection(BlockDriverState *bs)
+static int nbd_establish_connection(BlockDriverState *bs, Error **errp)
 {
    BDRVNBDState *s = bs->opaque;
    int sock;

    if (s->client.is_unix) {
-        sock = unix_socket_outgoing(qemu_opt_get(s->socket_opts, "path"));
+        sock = unix_connect_opts(s->socket_opts, errp, NULL, NULL);
    } else {
-        sock = tcp_socket_outgoing_opts(s->socket_opts);
+        sock = inet_connect_opts(s->socket_opts, errp, NULL, NULL);
        if (sock >= 0) {
            socket_set_nodelay(sock);
        }
@@ -256,17 +251,19 @@ static int nbd_open(BlockDriverState *bs, QDict *options, int flags,
    BDRVNBDState *s = bs->opaque;
    char *export = NULL;
    int result, sock;
+    Error *local_err = NULL;

    /* Pop the config into our state object. Exit if invalid. */
-    result = nbd_config(s, options, &export);
-    if (result != 0) {
-        return result;
+    nbd_config(s, options, &export, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return -EINVAL;
    }

    /* establish TCP connection, return error if it fails
     * TODO: Configurable retry-until-timeout behaviour.
     */
-    sock = nbd_establish_connection(bs);
+    sock = nbd_establish_connection(bs, errp);
    if (sock < 0) {
        return sock;
    }
--- a/block/nfs.c
+++ b/block/nfs.c
@@ -0,0 +1,442 @@
+/*
+ * QEMU Block driver for native access to files on NFS shares
+ *
+ * Copyright (c) 2014 Peter Lieven <pl@kamp.de>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "config-host.h"
+
+#include <poll.h>
+#include "qemu-common.h"
+#include "qemu/config-file.h"
+#include "qemu/error-report.h"
+#include "block/block_int.h"
+#include "trace.h"
+#include "qemu/iov.h"
+#include "qemu/uri.h"
+#include "sysemu/sysemu.h"
+#include <nfsc/libnfs.h>
+
+typedef struct NFSClient {
+    struct nfs_context *context;
+    struct nfsfh *fh;
+    int events;
+    bool has_zero_init;
+} NFSClient;
+
+typedef struct NFSRPC {
+    int ret;
+    int complete;
+    QEMUIOVector *iov;
+    struct stat *st;
+    Coroutine *co;
+    QEMUBH *bh;
+} NFSRPC;
+
+static void nfs_process_read(void *arg);
+static void nfs_process_write(void *arg);
+
+static void nfs_set_events(NFSClient *client)
+{
+    int ev = nfs_which_events(client->context);
+    if (ev != client->events) {
+        qemu_aio_set_fd_handler(nfs_get_fd(client->context),
+                      (ev & POLLIN) ? nfs_process_read : NULL,
+                      (ev & POLLOUT) ? nfs_process_write : NULL,
+                      client);
+
+    }
+    client->events = ev;
+}
+
+static void nfs_process_read(void *arg)
+{
+    NFSClient *client = arg;
+    nfs_service(client->context, POLLIN);
+    nfs_set_events(client);
+}
+
+static void nfs_process_write(void *arg)
+{
+    NFSClient *client = arg;
+    nfs_service(client->context, POLLOUT);
+    nfs_set_events(client);
+}
+
+static void nfs_co_init_task(NFSClient *client, NFSRPC *task)
+{
+    *task = (NFSRPC) {
+        .co         = qemu_coroutine_self(),
+    };
+}
+
+static void nfs_co_generic_bh_cb(void *opaque)
+{
+    NFSRPC *task = opaque;
+    qemu_bh_delete(task->bh);
+    qemu_coroutine_enter(task->co, NULL);
+}
+
+static void
+nfs_co_generic_cb(int ret, struct nfs_context *nfs, void *data,
+                  void *private_data)
+{
+    NFSRPC *task = private_data;
+    task->complete = 1;
+    task->ret = ret;
+    if (task->ret > 0 && task->iov) {
+        if (task->ret <= task->iov->size) {
+            qemu_iovec_from_buf(task->iov, 0, data, task->ret);
+        } else {
+            task->ret = -EIO;
+        }
+    }
+    if (task->ret == 0 && task->st) {
+        memcpy(task->st, data, sizeof(struct stat));
+    }
+    if (task->ret < 0) {
+        error_report("NFS Error: %s", nfs_get_error(nfs));
+    }
+    if (task->co) {
+        task->bh = qemu_bh_new(nfs_co_generic_bh_cb, task);
+        qemu_bh_schedule(task->bh);
+    }
+}
+
+static int coroutine_fn nfs_co_readv(BlockDriverState *bs,
+                                     int64_t sector_num, int nb_sectors,
+                                     QEMUIOVector *iov)
+{
+    NFSClient *client = bs->opaque;
+    NFSRPC task;
+
+    nfs_co_init_task(client, &task);
+    task.iov = iov;
+
+    if (nfs_pread_async(client->context, client->fh,
+                        sector_num * BDRV_SECTOR_SIZE,
+                        nb_sectors * BDRV_SECTOR_SIZE,
+                        nfs_co_generic_cb, &task) != 0) {
+        return -ENOMEM;
+    }
+
+    while (!task.complete) {
+        nfs_set_events(client);
+        qemu_coroutine_yield();
+    }
+
+    if (task.ret < 0) {
+        return task.ret;
+    }
+
+    /* zero pad short reads */
+    if (task.ret < iov->size) {
+        qemu_iovec_memset(iov, task.ret, 0, iov->size - task.ret);
+    }
+
+    return 0;
+}
+
+static int coroutine_fn nfs_co_writev(BlockDriverState *bs,
+                                        int64_t sector_num, int nb_sectors,
+                                        QEMUIOVector *iov)
+{
+    NFSClient *client = bs->opaque;
+    NFSRPC task;
+    char *buf = NULL;
+
+    nfs_co_init_task(client, &task);
+
+    buf = g_malloc(nb_sectors * BDRV_SECTOR_SIZE);
+    qemu_iovec_to_buf(iov, 0, buf, nb_sectors * BDRV_SECTOR_SIZE);
+
+    if (nfs_pwrite_async(client->context, client->fh,
+                         sector_num * BDRV_SECTOR_SIZE,
+                         nb_sectors * BDRV_SECTOR_SIZE,
+                         buf, nfs_co_generic_cb, &task) != 0) {
+        g_free(buf);
+        return -ENOMEM;
+    }
+
+    while (!task.complete) {
+        nfs_set_events(client);
+        qemu_coroutine_yield();
+    }
+
+    g_free(buf);
+
+    if (task.ret != nb_sectors * BDRV_SECTOR_SIZE) {
+        return task.ret < 0 ? task.ret : -EIO;
+    }
+
+    return 0;
+}
+
+static int coroutine_fn nfs_co_flush(BlockDriverState *bs)
+{
+    NFSClient *client = bs->opaque;
+    NFSRPC task;
+
+    nfs_co_init_task(client, &task);
+
+    if (nfs_fsync_async(client->context, client->fh, nfs_co_generic_cb,
+                        &task) != 0) {
+        return -ENOMEM;
+    }
+
+    while (!task.complete) {
+        nfs_set_events(client);
+        qemu_coroutine_yield();
+    }
+
+    return task.ret;
+}
+
+/* TODO Convert to fine grained options */
+static QemuOptsList runtime_opts = {
+    .name = "nfs",
+    .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
+    .desc = {
+        {
+            .name = "filename",
+            .type = QEMU_OPT_STRING,
+            .help = "URL to the NFS file",
+        },
+        { /* end of list */ }
+    },
+};
+
+static void nfs_client_close(NFSClient *client)
+{
+    if (client->context) {
+        if (client->fh) {
+            nfs_close(client->context, client->fh);
+        }
+        qemu_aio_set_fd_handler(nfs_get_fd(client->context), NULL, NULL, NULL);
+        nfs_destroy_context(client->context);
+    }
+    memset(client, 0, sizeof(NFSClient));
+}
+
+static void nfs_file_close(BlockDriverState *bs)
+{
+    NFSClient *client = bs->opaque;
+    nfs_client_close(client);
+}
+
+static int64_t nfs_client_open(NFSClient *client, const char *filename,
+                               int flags, Error **errp)
+{
+    int ret = -EINVAL, i;
+    struct stat st;
+    URI *uri;
+    QueryParams *qp = NULL;
+    char *file = NULL, *strp = NULL;
+
+    uri = uri_parse(filename);
+    if (!uri) {
+        error_setg(errp, "Invalid URL specified");
+        goto fail;
+    }
+    strp = strrchr(uri->path, '/');
+    if (strp == NULL) {
+        error_setg(errp, "Invalid URL specified");
+        goto fail;
+    }
+    file = g_strdup(strp);
+    *strp = 0;
+
+    client->context = nfs_init_context();
+    if (client->context == NULL) {
+        error_setg(errp, "Failed to init NFS context");
+        goto fail;
+    }
+
+    qp = query_params_parse(uri->query);
+    for (i = 0; i < qp->n; i++) {
+        if (!qp->p[i].value) {
+            error_setg(errp, "Value for NFS parameter expected: %s",
+                       qp->p[i].name);
+            goto fail;
+        }
+        if (!strncmp(qp->p[i].name, "uid", 3)) {
+            nfs_set_uid(client->context, atoi(qp->p[i].value));
+        } else if (!strncmp(qp->p[i].name, "gid", 3)) {
+            nfs_set_gid(client->context, atoi(qp->p[i].value));
+        } else if (!strncmp(qp->p[i].name, "tcp-syncnt", 10)) {
+            nfs_set_tcp_syncnt(client->context, atoi(qp->p[i].value));
+        } else {
+            error_setg(errp, "Unknown NFS parameter name: %s",
+                       qp->p[i].name);
+            goto fail;
+        }
+    }
+
+    ret = nfs_mount(client->context, uri->server, uri->path);
+    if (ret < 0) {
+        error_setg(errp, "Failed to mount nfs share: %s",
+                   nfs_get_error(client->context));
+        goto fail;
+    }
+
+    if (flags & O_CREAT) {
+        ret = nfs_creat(client->context, file, 0600, &client->fh);
+        if (ret < 0) {
+            error_setg(errp, "Failed to create file: %s",
+                       nfs_get_error(client->context));
+            goto fail;
+        }
+    } else {
+        ret = nfs_open(client->context, file, flags, &client->fh);
+        if (ret < 0) {
+            error_setg(errp, "Failed to open file : %s",
+                       nfs_get_error(client->context));
+            goto fail;
+        }
+    }
+
+    ret = nfs_fstat(client->context, client->fh, &st);
+    if (ret < 0) {
+        error_setg(errp, "Failed to fstat file: %s",
+                   nfs_get_error(client->context));
+        goto fail;
+    }
+
+    ret = DIV_ROUND_UP(st.st_size, BDRV_SECTOR_SIZE);
+    client->has_zero_init = S_ISREG(st.st_mode);
+    goto out;
+fail:
+    nfs_client_close(client);
+out:
+    if (qp) {
+        query_params_free(qp);
+    }
+    uri_free(uri);
+    g_free(file);
+    return ret;
+}
+
+static int nfs_file_open(BlockDriverState *bs, QDict *options, int flags,
+                         Error **errp) {
+    NFSClient *client = bs->opaque;
+    int64_t ret;
+    QemuOpts *opts;
+    Error *local_err = NULL;
+
+    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
+    qemu_opts_absorb_qdict(opts, options, &local_err);
+    if (error_is_set(&local_err)) {
+        error_propagate(errp, local_err);
+        return -EINVAL;
+    }
+    ret = nfs_client_open(client, qemu_opt_get(opts, "filename"),
+                          (flags & BDRV_O_RDWR) ? O_RDWR : O_RDONLY,
+                          errp);
+    if (ret < 0) {
+        return ret;
+    }
+    bs->total_sectors = ret;
+    return 0;
+}
+
+static int nfs_file_create(const char *url, QEMUOptionParameter *options,
+                           Error **errp)
+{
+    int ret = 0;
+    int64_t total_size = 0;
+    NFSClient *client = g_malloc0(sizeof(NFSClient));
+
+    /* Read out options */
+    while (options && options->name) {
+        if (!strcmp(options->name, "size")) {
+            total_size = options->value.n;
+        }
+        options++;
+    }
+
+    ret = nfs_client_open(client, url, O_CREAT, errp);
+    if (ret < 0) {
+        goto out;
+    }
+    ret = nfs_ftruncate(client->context, client->fh, total_size);
+    nfs_client_close(client);
+out:
+    g_free(client);
+    return ret;
+}
+
+static int nfs_has_zero_init(BlockDriverState *bs)
+{
+    NFSClient *client = bs->opaque;
+    return client->has_zero_init;
+}
+
+static int64_t nfs_get_allocated_file_size(BlockDriverState *bs)
+{
+    NFSClient *client = bs->opaque;
+    NFSRPC task = {0};
+    struct stat st;
+
+    task.st = &st;
+    if (nfs_fstat_async(client->context, client->fh, nfs_co_generic_cb,
+                        &task) != 0) {
+        return -ENOMEM;
+    }
+
+    while (!task.complete) {
+        nfs_set_events(client);
+        qemu_aio_wait();
+    }
+
+    return (task.ret < 0 ? task.ret : st.st_blocks * st.st_blksize);
+}
+
+static int nfs_file_truncate(BlockDriverState *bs, int64_t offset)
+{
+    NFSClient *client = bs->opaque;
+    return nfs_ftruncate(client->context, client->fh, offset);
+}
+
+static BlockDriver bdrv_nfs = {
+    .format_name     = "nfs",
+    .protocol_name   = "nfs",
+
+    .instance_size   = sizeof(NFSClient),
+    .bdrv_needs_filename = true,
+    .bdrv_has_zero_init = nfs_has_zero_init,
+    .bdrv_get_allocated_file_size = nfs_get_allocated_file_size,
+    .bdrv_truncate = nfs_file_truncate,
+
+    .bdrv_file_open  = nfs_file_open,
+    .bdrv_close      = nfs_file_close,
+    .bdrv_create     = nfs_file_create,
+
+    .bdrv_co_readv         = nfs_co_readv,
+    .bdrv_co_writev        = nfs_co_writev,
+    .bdrv_co_flush_to_disk = nfs_co_flush,
+};
+
+static void nfs_block_init(void)
+{
+    bdrv_register(&bdrv_nfs);
+}
+
+block_init(nfs_block_init);
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -49,9 +49,9 @@ typedef struct BDRVParallelsState {
    CoMutex lock;

    uint32_t *catalog_bitmap;
-    int catalog_size;
+    unsigned int catalog_size;

-    int tracks;
+    unsigned int tracks;
 } BDRVParallelsState;

 static int parallels_probe(const uint8_t *buf, int buf_size, const char *filename)
@@ -85,15 +85,26 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,

    if (memcmp(ph.magic, HEADER_MAGIC, 16) ||
        (le32_to_cpu(ph.version) != HEADER_VERSION)) {
-        ret = -EMEDIUMTYPE;
+        error_setg(errp, "Image not in Parallels format");
+        ret = -EINVAL;
        goto fail;
    }

    bs->total_sectors = le32_to_cpu(ph.nb_sectors);

    s->tracks = le32_to_cpu(ph.tracks);
+    if (s->tracks == 0) {
+        error_setg(errp, "Invalid image: Zero sectors per track");
+        ret = -EINVAL;
+        goto fail;
+    }

    s->catalog_size = le32_to_cpu(ph.catalog_entries);
+    if (s->catalog_size > INT_MAX / 4) {
+        error_setg(errp, "Catalog too large");
+        ret = -EFBIG;
+        goto fail;
+    }
    s->catalog_bitmap = g_malloc(s->catalog_size * 4);

    ret = bdrv_pread(bs->file, 64, s->catalog_bitmap, s->catalog_size * 4);
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -29,6 +29,60 @@
 #include "qapi/qmp-output-visitor.h"
 #include "qapi/qmp/types.h"

+BlockDeviceInfo *bdrv_block_device_info(BlockDriverState *bs)
+{
+    BlockDeviceInfo *info = g_malloc0(sizeof(*info));
+
+    info->file                   = g_strdup(bs->filename);
+    info->ro                     = bs->read_only;
+    info->drv                    = g_strdup(bs->drv->format_name);
+    info->encrypted              = bs->encrypted;
+    info->encryption_key_missing = bdrv_key_required(bs);
+
+    if (bs->node_name[0]) {
+        info->has_node_name = true;
+        info->node_name = g_strdup(bs->node_name);
+    }
+
+    if (bs->backing_file[0]) {
+        info->has_backing_file = true;
+        info->backing_file = g_strdup(bs->backing_file);
+    }
+
+    info->backing_file_depth = bdrv_get_backing_file_depth(bs);
+
+    if (bs->io_limits_enabled) {
+        ThrottleConfig cfg;
+        throttle_get_config(&bs->throttle_state, &cfg);
+        info->bps     = cfg.buckets[THROTTLE_BPS_TOTAL].avg;
+        info->bps_rd  = cfg.buckets[THROTTLE_BPS_READ].avg;
+        info->bps_wr  = cfg.buckets[THROTTLE_BPS_WRITE].avg;
+
+        info->iops    = cfg.buckets[THROTTLE_OPS_TOTAL].avg;
+        info->iops_rd = cfg.buckets[THROTTLE_OPS_READ].avg;
+        info->iops_wr = cfg.buckets[THROTTLE_OPS_WRITE].avg;
+
+        info->has_bps_max     = cfg.buckets[THROTTLE_BPS_TOTAL].max;
+        info->bps_max         = cfg.buckets[THROTTLE_BPS_TOTAL].max;
+        info->has_bps_rd_max  = cfg.buckets[THROTTLE_BPS_READ].max;
+        info->bps_rd_max      = cfg.buckets[THROTTLE_BPS_READ].max;
+        info->has_bps_wr_max  = cfg.buckets[THROTTLE_BPS_WRITE].max;
+        info->bps_wr_max      = cfg.buckets[THROTTLE_BPS_WRITE].max;
+
+        info->has_iops_max    = cfg.buckets[THROTTLE_OPS_TOTAL].max;
+        info->iops_max        = cfg.buckets[THROTTLE_OPS_TOTAL].max;
+        info->has_iops_rd_max = cfg.buckets[THROTTLE_OPS_READ].max;
+        info->iops_rd_max     = cfg.buckets[THROTTLE_OPS_READ].max;
+        info->has_iops_wr_max = cfg.buckets[THROTTLE_OPS_WRITE].max;
+        info->iops_wr_max     = cfg.buckets[THROTTLE_OPS_WRITE].max;
+
+        info->has_iops_size = cfg.op_size;
+        info->iops_size = cfg.op_size;
+    }
+
+    return info;
+}
+
 /*
 * Returns 0 on success, with *p_list either set to describe snapshot
 * information, or NULL because there are no snapshots.  Returns -errno on
@@ -211,66 +265,13 @@ void bdrv_query_info(BlockDriverState *bs,

    if (bs->drv) {
        info->has_inserted = true;
-        info->inserted = g_malloc0(sizeof(*info->inserted));
-        info->inserted->file = g_strdup(bs->filename);
-        info->inserted->ro = bs->read_only;
-        info->inserted->drv = g_strdup(bs->drv->format_name);
-        info->inserted->encrypted = bs->encrypted;
-        info->inserted->encryption_key_missing = bdrv_key_required(bs);
-
-        if (bs->backing_file[0]) {
-            info->inserted->has_backing_file = true;
-            info->inserted->backing_file = g_strdup(bs->backing_file);
-        }
-
-        info->inserted->backing_file_depth = bdrv_get_backing_file_depth(bs);
-
-        if (bs->io_limits_enabled) {
-            ThrottleConfig cfg;
-            throttle_get_config(&bs->throttle_state, &cfg);
-            info->inserted->bps     = cfg.buckets[THROTTLE_BPS_TOTAL].avg;
-            info->inserted->bps_rd  = cfg.buckets[THROTTLE_BPS_READ].avg;
-            info->inserted->bps_wr  = cfg.buckets[THROTTLE_BPS_WRITE].avg;
-
-            info->inserted->iops    = cfg.buckets[THROTTLE_OPS_TOTAL].avg;
-            info->inserted->iops_rd = cfg.buckets[THROTTLE_OPS_READ].avg;
-            info->inserted->iops_wr = cfg.buckets[THROTTLE_OPS_WRITE].avg;
-
-            info->inserted->has_bps_max     =
-                cfg.buckets[THROTTLE_BPS_TOTAL].max;
-            info->inserted->bps_max         =
-                cfg.buckets[THROTTLE_BPS_TOTAL].max;
-            info->inserted->has_bps_rd_max  =
-                cfg.buckets[THROTTLE_BPS_READ].max;
-            info->inserted->bps_rd_max      =
-                cfg.buckets[THROTTLE_BPS_READ].max;
-            info->inserted->has_bps_wr_max  =
-                cfg.buckets[THROTTLE_BPS_WRITE].max;
-            info->inserted->bps_wr_max      =
-                cfg.buckets[THROTTLE_BPS_WRITE].max;
-
-            info->inserted->has_iops_max    =
-                cfg.buckets[THROTTLE_OPS_TOTAL].max;
-            info->inserted->iops_max        =
-                cfg.buckets[THROTTLE_OPS_TOTAL].max;
-            info->inserted->has_iops_rd_max =
-                cfg.buckets[THROTTLE_OPS_READ].max;
-            info->inserted->iops_rd_max     =
-                cfg.buckets[THROTTLE_OPS_READ].max;
-            info->inserted->has_iops_wr_max =
-                cfg.buckets[THROTTLE_OPS_WRITE].max;
-            info->inserted->iops_wr_max     =
-                cfg.buckets[THROTTLE_OPS_WRITE].max;
-
-            info->inserted->has_iops_size = cfg.op_size;
-            info->inserted->iops_size = cfg.op_size;
-        }
+        info->inserted = bdrv_block_device_info(bs);

        bs0 = bs;
        p_image_info = &info->inserted->image;
        while (1) {
            bdrv_query_image_info(bs0, p_image_info, &local_err);
-            if (error_is_set(&local_err)) {
+            if (local_err) {
                error_propagate(errp, local_err);
                goto err;
            }
@@ -318,6 +319,11 @@ BlockStats *bdrv_query_stats(const BlockDriverState *bs)
        s->parent = bdrv_query_stats(bs->file);
    }

+    if (bs->backing_hd) {
+        s->has_backing = true;
+        s->backing = bdrv_query_stats(bs->backing_hd);
+    }
+
    return s;
 }

@@ -330,7 +336,7 @@ BlockInfoList *qmp_query_block(Error **errp)
     while ((bs = bdrv_next(bs))) {
        BlockInfoList *info = g_malloc0(sizeof(*info));
        bdrv_query_info(bs, &info->value, &local_err);
-        if (error_is_set(&local_err)) {
+        if (local_err) {
            error_propagate(errp, local_err);
            goto err;
        }
@@ -468,6 +474,7 @@ static void dump_qobject(fprintf_function func_fprintf, void *f,
        case QTYPE_QERROR: {
            QString *value = qerror_human((QError *)obj);
            func_fprintf(f, "%s", qstring_get_str(value));
+            QDECREF(value);
            break;
        }
        case QTYPE_NONE:
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -48,9 +48,10 @@ typedef struct QCowHeader {
    uint64_t size; /* in bytes */
    uint8_t cluster_bits;
    uint8_t l2_bits;
+    uint16_t padding;
    uint32_t crypt_method;
    uint64_t l1_table_offset;
-} QCowHeader;
+} QEMU_PACKED QCowHeader;

 #define L2_CACHE_SIZE 16

@@ -60,7 +61,7 @@ typedef struct BDRVQcowState {
    int cluster_sectors;
    int l2_bits;
    int l2_size;
-    int l1_size;
+    unsigned int l1_size;
    uint64_t cluster_offset_mask;
    uint64_t l1_table_offset;
    uint64_t *l1_table;
@@ -96,7 +97,8 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
                     Error **errp)
 {
    BDRVQcowState *s = bs->opaque;
-    int len, i, shift, ret;
+    unsigned int len, i, shift;
+    int ret;
    QCowHeader header;

    ret = bdrv_pread(bs->file, 0, &header, sizeof(header));
@@ -113,23 +115,40 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
    be64_to_cpus(&header.l1_table_offset);

    if (header.magic != QCOW_MAGIC) {
-        ret = -EMEDIUMTYPE;
+        error_setg(errp, "Image not in qcow format");
+        ret = -EINVAL;
        goto fail;
    }
    if (header.version != QCOW_VERSION) {
        char version[64];
        snprintf(version, sizeof(version), "QCOW version %d", header.version);
-        qerror_report(QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
-            bs->device_name, "qcow", version);
+        error_set(errp, QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
+                  bs->device_name, "qcow", version);
        ret = -ENOTSUP;
        goto fail;
    }

-    if (header.size <= 1 || header.cluster_bits < 9) {
+    if (header.size <= 1) {
+        error_setg(errp, "Image size is too small (must be at least 2 bytes)");
        ret = -EINVAL;
        goto fail;
    }
+    if (header.cluster_bits < 9 || header.cluster_bits > 16) {
+        error_setg(errp, "Cluster size must be between 512 and 64k");
+        ret = -EINVAL;
+        goto fail;
+    }
+
+    /* l2_bits specifies number of entries; storing a uint64_t in each entry,
+     * so bytes = num_entries << 3. */
+    if (header.l2_bits < 9 - 3 || header.l2_bits > 16 - 3) {
+        error_setg(errp, "L2 table size must be between 512 and 64k");
+        ret = -EINVAL;
+        goto fail;
+    }
+
    if (header.crypt_method > QCOW_CRYPT_AES) {
+        error_setg(errp, "invalid encryption method in qcow header");
        ret = -EINVAL;
        goto fail;
    }
@@ -147,7 +166,19 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,

    /* read the level 1 table */
    shift = s->cluster_bits + s->l2_bits;
-    s->l1_size = (header.size + (1LL << shift) - 1) >> shift;
+    if (header.size > UINT64_MAX - (1LL << shift)) {
+        error_setg(errp, "Image too large");
+        ret = -EINVAL;
+        goto fail;
+    } else {
+        uint64_t l1_size = (header.size + (1LL << shift) - 1) >> shift;
+        if (l1_size > INT_MAX / sizeof(uint64_t)) {
+            error_setg(errp, "Image too large");
+            ret = -EINVAL;
+            goto fail;
+        }
+        s->l1_size = l1_size;
+    }

    s->l1_table_offset = header.l1_table_offset;
    s->l1_table = g_malloc(s->l1_size * sizeof(uint64_t));
@@ -171,7 +202,9 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
    if (header.backing_file_offset != 0) {
        len = header.backing_file_size;
        if (len > 1023) {
-            len = 1023;
+            error_setg(errp, "Backing file name too long");
+            ret = -EINVAL;
+            goto fail;
        }
        ret = bdrv_pread(bs->file, header.backing_file_offset,
                   bs->backing_file, len);
@@ -686,15 +719,15 @@ static int qcow_create(const char *filename, QEMUOptionParameter *options,

    ret = bdrv_create_file(filename, options, &local_err);
    if (ret < 0) {
-        qerror_report_err(local_err);
-        error_free(local_err);
+        error_propagate(errp, local_err);
        return ret;
    }

-    ret = bdrv_file_open(&qcow_bs, filename, NULL, BDRV_O_RDWR, &local_err);
+    qcow_bs = NULL;
+    ret = bdrv_open(&qcow_bs, filename, NULL, NULL,
+                    BDRV_O_RDWR | BDRV_O_PROTOCOL, NULL, &local_err);
    if (ret < 0) {
-        qerror_report_err(local_err);
-        error_free(local_err);
+        error_propagate(errp, local_err);
        return ret;
    }

@@ -720,7 +753,7 @@ static int qcow_create(const char *filename, QEMUOptionParameter *options,
            backing_file = NULL;
        }
        header.cluster_bits = 9; /* 512 byte cluster to avoid copying
-                                    unmodifyed sectors */
+                                    unmodified sectors */
        header.l2_bits = 12; /* 32 KB L2 tables */
    } else {
        header.cluster_bits = 12; /* 4 KB clusters */
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -55,7 +55,7 @@ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
        }
    }

-    if (new_l1_size > INT_MAX) {
+    if (new_l1_size > INT_MAX / sizeof(uint64_t)) {
        return -EFBIG;
    }

@@ -359,15 +359,6 @@ static int coroutine_fn copy_sectors(BlockDriverState *bs,
    struct iovec iov;
    int n, ret;

-    /*
-     * If this is the last cluster and it is only partially used, we must only
-     * copy until the end of the image, or bdrv_check_request will fail for the
-     * bdrv_read/write calls below.
-     */
-    if (start_sect + n_end > bs->total_sectors) {
-        n_end = bs->total_sectors - start_sect;
-    }
-
    n = n_end - n_start;
    if (n <= 0) {
        return 0;
@@ -380,6 +371,10 @@ static int coroutine_fn copy_sectors(BlockDriverState *bs,

    BLKDBG_EVENT(bs->file, BLKDBG_COW_READ);

+    if (!bs->drv) {
+        return -ENOMEDIUM;
+    }
+
    /* Call .bdrv_co_readv() directly instead of using the public block-layer
     * interface.  This avoids double I/O throttling and request tracking,
     * which can lead to deadlock when block layer copy-on-read is enabled.
@@ -496,6 +491,7 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
        break;
    case QCOW2_CLUSTER_ZERO:
        if (s->qcow_version < 3) {
+            qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
            return -EIO;
        }
        c = count_contiguous_clusters(nb_clusters, s->cluster_size,
@@ -1182,7 +1178,7 @@ fail:
 * Return 0 on success and -errno in error cases
 */
 int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset,
-    int n_start, int n_end, int *num, uint64_t *host_offset, QCowL2Meta **m)
+    int *num, uint64_t *host_offset, QCowL2Meta **m)
 {
    BDRVQcowState *s = bs->opaque;
    uint64_t start, remaining;
@@ -1190,15 +1186,13 @@ int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset,
    uint64_t cur_bytes;
    int ret;

-    trace_qcow2_alloc_clusters_offset(qemu_coroutine_self(), offset,
-                                      n_start, n_end);
+    trace_qcow2_alloc_clusters_offset(qemu_coroutine_self(), offset, *num);

-    assert(n_start * BDRV_SECTOR_SIZE == offset_into_cluster(s, offset));
-    offset = start_of_cluster(s, offset);
+    assert((offset & ~BDRV_SECTOR_MASK) == 0);

 again:
-    start = offset + (n_start << BDRV_SECTOR_BITS);
-    remaining = (n_end - n_start) << BDRV_SECTOR_BITS;
+    start = offset;
+    remaining = *num << BDRV_SECTOR_BITS;
    cluster_offset = 0;
    *host_offset = 0;
    cur_bytes = 0;
@@ -1284,7 +1278,7 @@ again:
        }
    }

-    *num = (n_end - n_start) - (remaining >> BDRV_SECTOR_BITS);
+    *num -= remaining >> BDRV_SECTOR_BITS;
    assert(*num > 0);
    assert(*host_offset != 0);

@@ -1369,13 +1363,31 @@ static int discard_single_l2(BlockDriverState *bs, uint64_t offset,
        uint64_t old_offset;

        old_offset = be64_to_cpu(l2_table[l2_index + i]);
-        if ((old_offset & L2E_OFFSET_MASK) == 0) {
+
+        /*
+         * Make sure that a discarded area reads back as zeroes for v3 images
+         * (we cannot do it for v2 without actually writing a zero-filled
+         * buffer). We can skip the operation if the cluster is already marked
+         * as zero, or if it's unallocated and we don't have a backing file.
+         *
+         * TODO We might want to use bdrv_get_block_status(bs) here, but we're
+         * holding s->lock, so that doesn't work today.
+         */
+        if (old_offset & QCOW_OFLAG_ZERO) {
+            continue;
+        }
+
+        if ((old_offset & L2E_OFFSET_MASK) == 0 && !bs->backing_hd) {
            continue;
        }

        /* First remove L2 entries */
        qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
-        l2_table[l2_index + i] = cpu_to_be64(0);
+        if (s->qcow_version >= 3) {
+            l2_table[l2_index + i] = cpu_to_be64(QCOW_OFLAG_ZERO);
+        } else {
+            l2_table[l2_index + i] = cpu_to_be64(0);
+        }

        /* Then decrease the refcount */
        qcow2_free_any_clusters(bs, old_offset, 1, type);
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -28,7 +28,7 @@
 #include "qemu/range.h"
 #include "qapi/qmp/types.h"

-static int64_t alloc_clusters_noref(BlockDriverState *bs, int64_t size);
+static int64_t alloc_clusters_noref(BlockDriverState *bs, uint64_t size);
 static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs,
                            int64_t offset, int64_t length,
                            int addend, enum qcow2_discard_type type);
@@ -40,8 +40,10 @@ static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs,
 int qcow2_refcount_init(BlockDriverState *bs)
 {
    BDRVQcowState *s = bs->opaque;
-    int ret, refcount_table_size2, i;
+    unsigned int refcount_table_size2, i;
+    int ret;

+    assert(s->refcount_table_size <= INT_MAX / sizeof(uint64_t));
    refcount_table_size2 = s->refcount_table_size * sizeof(uint64_t);
    s->refcount_table = g_malloc(refcount_table_size2);
    if (s->refcount_table_size > 0) {
@@ -87,7 +89,7 @@ static int load_refcount_block(BlockDriverState *bs,
 static int get_refcount(BlockDriverState *bs, int64_t cluster_index)
 {
    BDRVQcowState *s = bs->opaque;
-    int refcount_table_index, block_index;
+    uint64_t refcount_table_index, block_index;
    int64_t refcount_block_offset;
    int ret;
    uint16_t *refcount_block;
@@ -96,7 +98,8 @@ static int get_refcount(BlockDriverState *bs, int64_t cluster_index)
    refcount_table_index = cluster_index >> (s->cluster_bits - REFCOUNT_SHIFT);
    if (refcount_table_index >= s->refcount_table_size)
        return 0;
-    refcount_block_offset = s->refcount_table[refcount_table_index];
+    refcount_block_offset =
+        s->refcount_table[refcount_table_index] & REFT_OFFSET_MASK;
    if (!refcount_block_offset)
        return 0;

@@ -191,10 +194,11 @@ static int alloc_refcount_block(BlockDriverState *bs,
     *   they can describe them themselves.
     *
     * - We need to consider that at this point we are inside update_refcounts
-     *   and doing the initial refcount increase. This means that some clusters
-     *   have already been allocated by the caller, but their refcount isn't
-     *   accurate yet. free_cluster_index tells us where this allocation ends
-     *   as long as we don't overwrite it by freeing clusters.
+     *   and potentially doing an initial refcount increase. This means that
+     *   some clusters have already been allocated by the caller, but their
+     *   refcount isn't accurate yet. If we allocate clusters for metadata, we
+     *   need to return -EAGAIN to signal the caller that it needs to restart
+     *   the search for free clusters.
     *
     * - alloc_clusters_noref and qcow2_free_clusters may load a different
     *   refcount block into the cache
@@ -279,7 +283,10 @@ static int alloc_refcount_block(BlockDriverState *bs,
        }

        s->refcount_table[refcount_table_index] = new_block;
-        return 0;
+
+        /* The new refcount block may be where the caller intended to put its
+         * data, so let it restart the search. */
+        return -EAGAIN;
    }

    ret = qcow2_cache_put(bs, s->refcount_block_cache, (void**) refcount_block);
@@ -302,8 +309,11 @@ static int alloc_refcount_block(BlockDriverState *bs,

    /* Calculate the number of refcount blocks needed so far */
    uint64_t refcount_block_clusters = 1 << (s->cluster_bits - REFCOUNT_SHIFT);
-    uint64_t blocks_used = (s->free_cluster_index +
-        refcount_block_clusters - 1) / refcount_block_clusters;
+    uint64_t blocks_used = DIV_ROUND_UP(cluster_index, refcount_block_clusters);
+
+    if (blocks_used > QCOW_MAX_REFTABLE_SIZE / sizeof(uint64_t)) {
+        return -EFBIG;
+    }

    /* And now we need at least one block more for the new metadata */
    uint64_t table_size = next_refcount_table_size(s, blocks_used + 1);
@@ -336,8 +346,6 @@ static int alloc_refcount_block(BlockDriverState *bs,
    uint16_t *new_blocks = g_malloc0(blocks_clusters * s->cluster_size);
    uint64_t *new_table = g_malloc0(table_size * sizeof(uint64_t));

-    assert(meta_offset >= (s->free_cluster_index * s->cluster_size));
-
    /* Fill the new refcount table */
    memcpy(new_table, s->refcount_table,
        s->refcount_table_size * sizeof(uint64_t));
@@ -400,18 +408,19 @@ static int alloc_refcount_block(BlockDriverState *bs,
    s->refcount_table_size = table_size;
    s->refcount_table_offset = table_offset;

-    /* Free old table. Remember, we must not change free_cluster_index */
-    uint64_t old_free_cluster_index = s->free_cluster_index;
+    /* Free old table. */
    qcow2_free_clusters(bs, old_table_offset, old_table_size * sizeof(uint64_t),
                        QCOW2_DISCARD_OTHER);
-    s->free_cluster_index = old_free_cluster_index;

    ret = load_refcount_block(bs, new_block, (void**) refcount_block);
    if (ret < 0) {
        return ret;
    }

-    return 0;
+    /* If we were trying to do the initial refcount update for some cluster
+     * allocation, we might have used the same clusters to store newly
+     * allocated metadata. Make the caller search some new space. */
+    return -EAGAIN;

 fail_table:
    g_free(new_table);
@@ -626,15 +635,16 @@ int qcow2_update_cluster_refcount(BlockDriverState *bs,


 /* return < 0 if error */
-static int64_t alloc_clusters_noref(BlockDriverState *bs, int64_t size)
+static int64_t alloc_clusters_noref(BlockDriverState *bs, uint64_t size)
 {
    BDRVQcowState *s = bs->opaque;
-    int i, nb_clusters, refcount;
+    uint64_t i, nb_clusters;
+    int refcount;

    nb_clusters = size_to_clusters(s, size);
 retry:
    for(i = 0; i < nb_clusters; i++) {
-        int64_t next_cluster_index = s->free_cluster_index++;
+        uint64_t next_cluster_index = s->free_cluster_index++;
        refcount = get_refcount(bs, next_cluster_index);

        if (refcount < 0) {
@@ -651,18 +661,21 @@ retry:
    return (s->free_cluster_index - nb_clusters) << s->cluster_bits;
 }

-int64_t qcow2_alloc_clusters(BlockDriverState *bs, int64_t size)
+int64_t qcow2_alloc_clusters(BlockDriverState *bs, uint64_t size)
 {
    int64_t offset;
    int ret;

    BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_ALLOC);
-    offset = alloc_clusters_noref(bs, size);
-    if (offset < 0) {
-        return offset;
-    }
+    do {
+        offset = alloc_clusters_noref(bs, size);
+        if (offset < 0) {
+            return offset;
+        }
+
+        ret = update_refcount(bs, offset, size, 1, QCOW2_DISCARD_NEVER);
+    } while (ret == -EAGAIN);

-    ret = update_refcount(bs, offset, size, 1, QCOW2_DISCARD_NEVER);
    if (ret < 0) {
        return ret;
    }
@@ -675,33 +688,36 @@ int qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset,
 {
    BDRVQcowState *s = bs->opaque;
    uint64_t cluster_index;
-    uint64_t old_free_cluster_index;
-    int i, refcount, ret;
+    uint64_t i;
+    int refcount, ret;

-    /* Check how many clusters there are free */
-    cluster_index = offset >> s->cluster_bits;
-    for(i = 0; i < nb_clusters; i++) {
-        refcount = get_refcount(bs, cluster_index++);
-
-        if (refcount < 0) {
-            return refcount;
-        } else if (refcount != 0) {
-            break;
-        }
+    assert(nb_clusters >= 0);
+    if (nb_clusters == 0) {
+        return 0;
    }

-    /* And then allocate them */
-    old_free_cluster_index = s->free_cluster_index;
-    s->free_cluster_index = cluster_index + i;
+    do {
+        /* Check how many clusters there are free */
+        cluster_index = offset >> s->cluster_bits;
+        for(i = 0; i < nb_clusters; i++) {
+            refcount = get_refcount(bs, cluster_index++);
+
+            if (refcount < 0) {
+                return refcount;
+            } else if (refcount != 0) {
+                break;
+            }
+        }
+
+        /* And then allocate them */
+        ret = update_refcount(bs, offset, i << s->cluster_bits, 1,
+                              QCOW2_DISCARD_NEVER);
+    } while (ret == -EAGAIN);

-    ret = update_refcount(bs, offset, i << s->cluster_bits, 1,
-                          QCOW2_DISCARD_NEVER);
    if (ret < 0) {
        return ret;
    }

-    s->free_cluster_index = old_free_cluster_index;
-
    return i;
 }

@@ -1004,8 +1020,7 @@ static void inc_refcounts(BlockDriverState *bs,
                          int64_t offset, int64_t size)
 {
    BDRVQcowState *s = bs->opaque;
-    int64_t start, last, cluster_offset;
-    int k;
+    uint64_t start, last, cluster_offset, k;

    if (size <= 0)
        return;
@@ -1015,11 +1030,7 @@ static void inc_refcounts(BlockDriverState *bs,
    for(cluster_offset = start; cluster_offset <= last;
        cluster_offset += s->cluster_size) {
        k = cluster_offset >> s->cluster_bits;
-        if (k < 0) {
-            fprintf(stderr, "ERROR: invalid cluster offset=0x%" PRIx64 "\n",
-                cluster_offset);
-            res->corruptions++;
-        } else if (k >= refcount_table_size) {
+        if (k >= refcount_table_size) {
            fprintf(stderr, "Warning: cluster offset=0x%" PRIx64 " is after "
                "the end of the image file, can't properly check refcounts.\n",
                cluster_offset);
@@ -1376,7 +1387,7 @@ static int write_reftable_entry(BlockDriverState *bs, int rt_index)
 * does _not_ decrement the reference count for the currently occupied cluster.
 *
 * This function prints an informative message to stderr on error (and returns
- * -errno); on success, 0 is returned.
+ * -errno); on success, the offset of the newly allocated cluster is returned.
 */
 static int64_t realloc_refcount_block(BlockDriverState *bs, int reftable_index,
                                      uint64_t offset)
@@ -1392,14 +1403,14 @@ static int64_t realloc_refcount_block(BlockDriverState *bs, int reftable_index,
        fprintf(stderr, "Could not allocate new cluster: %s\n",
                strerror(-new_offset));
        ret = new_offset;
-        goto fail;
+        goto done;
    }

    /* fetch current refcount block content */
    ret = qcow2_cache_get(bs, s->refcount_block_cache, offset, &refcount_block);
    if (ret < 0) {
        fprintf(stderr, "Could not fetch refcount block: %s\n", strerror(-ret));
-        goto fail;
+        goto fail_free_cluster;
    }

    /* new block has not yet been entered into refcount table, therefore it is
@@ -1410,8 +1421,7 @@ static int64_t realloc_refcount_block(BlockDriverState *bs, int reftable_index,
                "check failed: %s\n", strerror(-ret));
        /* the image will be marked corrupt, so don't even attempt on freeing
         * the cluster */
-        new_offset = 0;
-        goto fail;
+        goto done;
    }

    /* write to new block */
@@ -1419,7 +1429,7 @@ static int64_t realloc_refcount_block(BlockDriverState *bs, int reftable_index,
            s->cluster_sectors);
    if (ret < 0) {
        fprintf(stderr, "Could not write refcount block: %s\n", strerror(-ret));
-        goto fail;
+        goto fail_free_cluster;
    }

    /* update refcount table */
@@ -1429,24 +1439,27 @@ static int64_t realloc_refcount_block(BlockDriverState *bs, int reftable_index,
    if (ret < 0) {
        fprintf(stderr, "Could not update refcount table: %s\n",
                strerror(-ret));
-        goto fail;
+        goto fail_free_cluster;
    }

-fail:
-    if (new_offset && (ret < 0)) {
-        qcow2_free_clusters(bs, new_offset, s->cluster_size,
-                QCOW2_DISCARD_ALWAYS);
-    }
+    goto done;
+
+fail_free_cluster:
+    qcow2_free_clusters(bs, new_offset, s->cluster_size, QCOW2_DISCARD_OTHER);
+
+done:
    if (refcount_block) {
-        if (ret < 0) {
-            qcow2_cache_put(bs, s->refcount_block_cache, &refcount_block);
-        } else {
-            ret = qcow2_cache_put(bs, s->refcount_block_cache, &refcount_block);
-        }
+        /* This should never fail, as it would only do so if the given refcount
+         * block cannot be found in the cache. As this is impossible as long as
+         * there are no bugs, assert the success. */
+        int tmp = qcow2_cache_put(bs, s->refcount_block_cache, &refcount_block);
+        assert(tmp == 0);
    }
+
    if (ret < 0) {
        return ret;
    }
+
    return new_offset;
 }

@@ -1460,14 +1473,19 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
                          BdrvCheckMode fix)
 {
    BDRVQcowState *s = bs->opaque;
-    int64_t size, i, highest_cluster;
-    int nb_clusters, refcount1, refcount2;
+    int64_t size, i, highest_cluster, nb_clusters;
+    int refcount1, refcount2;
    QCowSnapshot *sn;
    uint16_t *refcount_table;
    int ret;

    size = bdrv_getlength(bs->file);
    nb_clusters = size_to_clusters(s, size);
+    if (nb_clusters > INT_MAX) {
+        res->check_errors++;
+        return -EFBIG;
+    }
+
    refcount_table = g_malloc0(nb_clusters * sizeof(uint16_t));

    res->bfi.total_clusters =
--- a/block/qcow2-snapshot.c
+++ b/block/qcow2-snapshot.c
@@ -26,31 +26,6 @@
 #include "block/block_int.h"
 #include "block/qcow2.h"

-typedef struct QEMU_PACKED QCowSnapshotHeader {
-    /* header is 8 byte aligned */
-    uint64_t l1_table_offset;
-
-    uint32_t l1_size;
-    uint16_t id_str_size;
-    uint16_t name_size;
-
-    uint32_t date_sec;
-    uint32_t date_nsec;
-
-    uint64_t vm_clock_nsec;
-
-    uint32_t vm_state_size;
-    uint32_t extra_data_size; /* for extension */
-    /* extra data follows */
-    /* id_str follows */
-    /* name follows  */
-} QCowSnapshotHeader;
-
-typedef struct QEMU_PACKED QCowSnapshotExtraData {
-    uint64_t vm_state_size_large;
-    uint64_t disk_size;
-} QCowSnapshotExtraData;
-
 void qcow2_free_snapshots(BlockDriverState *bs)
 {
    BDRVQcowState *s = bs->opaque;
@@ -141,8 +116,14 @@ int qcow2_read_snapshots(BlockDriverState *bs)
        }
        offset += name_size;
        sn->name[name_size] = '\0';
+
+        if (offset - s->snapshots_offset > QCOW_MAX_SNAPSHOTS_SIZE) {
+            ret = -EFBIG;
+            goto fail;
+        }
    }

+    assert(offset - s->snapshots_offset <= INT_MAX);
    s->snapshots_size = offset - s->snapshots_offset;
    return 0;

@@ -163,7 +144,7 @@ static int qcow2_write_snapshots(BlockDriverState *bs)
        uint32_t nb_snapshots;
        uint64_t snapshots_offset;
    } QEMU_PACKED header_data;
-    int64_t offset, snapshots_offset;
+    int64_t offset, snapshots_offset = 0;
    int ret;

    /* compute the size of the snapshots */
@@ -175,7 +156,14 @@ static int qcow2_write_snapshots(BlockDriverState *bs)
        offset += sizeof(extra);
        offset += strlen(sn->id_str);
        offset += strlen(sn->name);
+
+        if (offset > QCOW_MAX_SNAPSHOTS_SIZE) {
+            ret = -EFBIG;
+            goto fail;
+        }
    }
+
+    assert(offset <= INT_MAX);
    snapshots_size = offset;

    /* Allocate space for the new snapshot list */
@@ -357,6 +345,10 @@ int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
    uint64_t *l1_table = NULL;
    int64_t l1_table_offset;

+    if (s->nb_snapshots >= QCOW_MAX_SNAPSHOTS) {
+        return -EFBIG;
+    }
+
    memset(sn, 0, sizeof(*sn));

    /* Generate an ID if it wasn't passed */
@@ -606,7 +598,8 @@ int qcow2_snapshot_delete(BlockDriverState *bs,
    s->nb_snapshots--;
    ret = qcow2_write_snapshots(bs);
    if (ret < 0) {
-        error_setg(errp, "Failed to remove snapshot from snapshot list");
+        error_setg_errno(errp, -ret,
+                         "Failed to remove snapshot from snapshot list");
        return ret;
    }

@@ -624,7 +617,7 @@ int qcow2_snapshot_delete(BlockDriverState *bs,
    ret = qcow2_update_snapshot_refcount(bs, sn.l1_table_offset,
                                         sn.l1_size, -1);
    if (ret < 0) {
-        error_setg(errp, "Failed to free the cluster and L1 table");
+        error_setg_errno(errp, -ret, "Failed to free the cluster and L1 table");
        return ret;
    }
    qcow2_free_clusters(bs, sn.l1_table_offset, sn.l1_size * sizeof(uint64_t),
@@ -633,7 +626,8 @@ int qcow2_snapshot_delete(BlockDriverState *bs,
    /* must update the copied flag on the current cluster offsets */
    ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 0);
    if (ret < 0) {
-        error_setg(errp, "Failed to update snapshot status in disk");
+        error_setg_errno(errp, -ret,
+                         "Failed to update snapshot status in disk");
        return ret;
    }

@@ -699,7 +693,11 @@ int qcow2_snapshot_load_tmp(BlockDriverState *bs,
    sn = &s->snapshots[snapshot_index];

    /* Allocate and read in the snapshot's L1 table */
-    new_l1_bytes = s->l1_size * sizeof(uint64_t);
+    if (sn->l1_size > QCOW_MAX_L1_SIZE) {
+        error_setg(errp, "Snapshot L1 table too large");
+        return -EFBIG;
+    }
+    new_l1_bytes = sn->l1_size * sizeof(uint64_t);
    new_l1_table = g_malloc0(align_offset(new_l1_bytes, 512));

    ret = bdrv_pread(bs->file, sn->l1_table_offset, new_l1_table, new_l1_bytes);
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -269,12 +269,15 @@ static int qcow2_mark_clean(BlockDriverState *bs)
    BDRVQcowState *s = bs->opaque;

    if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) {
-        int ret = bdrv_flush(bs);
+        int ret;
+
+        s->incompatible_features &= ~QCOW2_INCOMPAT_DIRTY;
+
+        ret = bdrv_flush(bs);
        if (ret < 0) {
            return ret;
        }

-        s->incompatible_features &= ~QCOW2_INCOMPAT_DIRTY;
        return qcow2_update_header(bs);
    }
    return 0;
@@ -329,6 +332,32 @@ static int qcow2_check(BlockDriverState *bs, BdrvCheckResult *result,
    return ret;
 }

+static int validate_table_offset(BlockDriverState *bs, uint64_t offset,
+                                 uint64_t entries, size_t entry_len)
+{
+    BDRVQcowState *s = bs->opaque;
+    uint64_t size;
+
+    /* Use signed INT64_MAX as the maximum even for uint64_t header fields,
+     * because values will be passed to qemu functions taking int64_t. */
+    if (entries > INT64_MAX / entry_len) {
+        return -EINVAL;
+    }
+
+    size = entries * entry_len;
+
+    if (INT64_MAX - size < offset) {
+        return -EINVAL;
+    }
+
+    /* Tables must be cluster aligned */
+    if (offset & (s->cluster_size - 1)) {
+        return -EINVAL;
+    }
+
+    return 0;
+}
+
 static QemuOptsList qcow2_runtime_opts = {
    .name = "qcow2",
    .head = QTAILQ_HEAD_INITIALIZER(qcow2_runtime_opts.head),
@@ -419,7 +448,8 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
                      Error **errp)
 {
    BDRVQcowState *s = bs->opaque;
-    int len, i, ret = 0;
+    unsigned int len, i;
+    int ret = 0;
    QCowHeader header;
    QemuOpts *opts;
    Error *local_err = NULL;
@@ -449,7 +479,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,

    if (header.magic != QCOW_MAGIC) {
        error_setg(errp, "Image is not in qcow2 format");
-        ret = -EMEDIUMTYPE;
+        ret = -EINVAL;
        goto fail;
    }
    if (header.version < 2 || header.version > 3) {
@@ -460,6 +490,18 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,

    s->qcow_version = header.version;

+    /* Initialise cluster size */
+    if (header.cluster_bits < MIN_CLUSTER_BITS ||
+        header.cluster_bits > MAX_CLUSTER_BITS) {
+        error_setg(errp, "Unsupported cluster size: 2^%i", header.cluster_bits);
+        ret = -EINVAL;
+        goto fail;
+    }
+
+    s->cluster_bits = header.cluster_bits;
+    s->cluster_size = 1 << s->cluster_bits;
+    s->cluster_sectors = 1 << (s->cluster_bits - 9);
+
    /* Initialise version 3 header fields */
    if (header.version == 2) {
        header.incompatible_features    = 0;
@@ -473,6 +515,18 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
        be64_to_cpus(&header.autoclear_features);
        be32_to_cpus(&header.refcount_order);
        be32_to_cpus(&header.header_length);
+
+        if (header.header_length < 104) {
+            error_setg(errp, "qcow2 header too short");
+            ret = -EINVAL;
+            goto fail;
+        }
+    }
+
+    if (header.header_length > s->cluster_size) {
+        error_setg(errp, "qcow2 header exceeds cluster size");
+        ret = -EINVAL;
+        goto fail;
    }

    if (header.header_length > sizeof(header)) {
@@ -487,6 +541,12 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
        }
    }

+    if (header.backing_file_offset > s->cluster_size) {
+        error_setg(errp, "Invalid backing file offset");
+        ret = -EINVAL;
+        goto fail;
+    }
+
    if (header.backing_file_offset) {
        ext_end = header.backing_file_offset;
    } else {
@@ -506,6 +566,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
                                   s->incompatible_features &
                                   ~QCOW2_INCOMPAT_MASK);
        ret = -ENOTSUP;
+        g_free(feature_table);
        goto fail;
    }

@@ -529,12 +590,6 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
    }
    s->refcount_order = header.refcount_order;

-    if (header.cluster_bits < MIN_CLUSTER_BITS ||
-        header.cluster_bits > MAX_CLUSTER_BITS) {
-        error_setg(errp, "Unsupported cluster size: 2^%i", header.cluster_bits);
-        ret = -EINVAL;
-        goto fail;
-    }
    if (header.crypt_method > QCOW_CRYPT_AES) {
        error_setg(errp, "Unsupported encryption method: %i",
                   header.crypt_method);
@@ -545,23 +600,52 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
    if (s->crypt_method_header) {
        bs->encrypted = 1;
    }
-    s->cluster_bits = header.cluster_bits;
-    s->cluster_size = 1 << s->cluster_bits;
-    s->cluster_sectors = 1 << (s->cluster_bits - 9);
+
    s->l2_bits = s->cluster_bits - 3; /* L2 is always one cluster */
    s->l2_size = 1 << s->l2_bits;
    bs->total_sectors = header.size / 512;
    s->csize_shift = (62 - (s->cluster_bits - 8));
    s->csize_mask = (1 << (s->cluster_bits - 8)) - 1;
    s->cluster_offset_mask = (1LL << s->csize_shift) - 1;
+
    s->refcount_table_offset = header.refcount_table_offset;
    s->refcount_table_size =
        header.refcount_table_clusters << (s->cluster_bits - 3);

-    s->snapshots_offset = header.snapshots_offset;
-    s->nb_snapshots = header.nb_snapshots;
+    if (header.refcount_table_clusters > qcow2_max_refcount_clusters(s)) {
+        error_setg(errp, "Reference count table too large");
+        ret = -EINVAL;
+        goto fail;
+    }
+
+    ret = validate_table_offset(bs, s->refcount_table_offset,
+                                s->refcount_table_size, sizeof(uint64_t));
+    if (ret < 0) {
+        error_setg(errp, "Invalid reference count table offset");
+        goto fail;
+    }
+
+    /* Snapshot table offset/length */
+    if (header.nb_snapshots > QCOW_MAX_SNAPSHOTS) {
+        error_setg(errp, "Too many snapshots");
+        ret = -EINVAL;
+        goto fail;
+    }
+
+    ret = validate_table_offset(bs, header.snapshots_offset,
+                                header.nb_snapshots,
+                                sizeof(QCowSnapshotHeader));
+    if (ret < 0) {
+        error_setg(errp, "Invalid snapshot table offset");
+        goto fail;
+    }

    /* read the level 1 table */
+    if (header.l1_size > QCOW_MAX_L1_SIZE) {
+        error_setg(errp, "Active L1 table too large");
+        ret = -EFBIG;
+        goto fail;
+    }
    s->l1_size = header.l1_size;

    l1_vm_state_index = size_to_l1(s, header.size);
@@ -579,7 +663,16 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
        ret = -EINVAL;
        goto fail;
    }
+
+    ret = validate_table_offset(bs, header.l1_table_offset,
+                                header.l1_size, sizeof(uint64_t));
+    if (ret < 0) {
+        error_setg(errp, "Invalid L1 table offset");
+        goto fail;
+    }
    s->l1_table_offset = header.l1_table_offset;
+
+
    if (s->l1_size > 0) {
        s->l1_table = g_malloc0(
            align_offset(s->l1_size * sizeof(uint64_t), 512));
@@ -625,8 +718,10 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
    /* read the backing file name */
    if (header.backing_file_offset != 0) {
        len = header.backing_file_size;
-        if (len > 1023) {
-            len = 1023;
+        if (len > MIN(1023, s->cluster_size - header.backing_file_offset)) {
+            error_setg(errp, "Backing file name too long");
+            ret = -EINVAL;
+            goto fail;
        }
        ret = bdrv_pread(bs->file, header.backing_file_offset,
                         bs->backing_file, len);
@@ -637,6 +732,10 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
        bs->backing_file[len] = '\0';
    }

+    /* Internal snapshots */
+    s->snapshots_offset = header.snapshots_offset;
+    s->nb_snapshots = header.nb_snapshots;
+
    ret = qcow2_read_snapshots(bs);
    if (ret < 0) {
        error_setg_errno(errp, -ret, "Could not read snapshots");
@@ -644,7 +743,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
    }

    /* Clear unknown autoclear feature bits */
-    if (!bs->read_only && s->autoclear_features != 0) {
+    if (!bs->read_only && !(flags & BDRV_O_INCOMING) && s->autoclear_features) {
        s->autoclear_features = 0;
        ret = qcow2_update_header(bs);
        if (ret < 0) {
@@ -657,7 +756,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
    qemu_co_mutex_init(&s->lock);

    /* Repair image if dirty */
-    if (!(flags & BDRV_O_CHECK) && !bs->read_only &&
+    if (!(flags & (BDRV_O_CHECK | BDRV_O_INCOMING)) && !bs->read_only &&
        (s->incompatible_features & QCOW2_INCOMPAT_DIRTY)) {
        BdrvCheckResult result = {0};

@@ -671,7 +770,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
    /* Enable lazy_refcounts according to image and command line options */
    opts = qemu_opts_create(&qcow2_runtime_opts, NULL, 0, &error_abort);
    qemu_opts_absorb_qdict(opts, options, &local_err);
-    if (error_is_set(&local_err)) {
+    if (local_err) {
        error_propagate(errp, local_err);
        ret = -EINVAL;
        goto fail;
@@ -718,7 +817,6 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
    }

    qemu_opts_del(opts);
-    bs->bl.write_zeroes_alignment = s->cluster_sectors;

    if (s->use_lazy_refcounts && s->qcow_version < 3) {
        error_setg(errp, "Lazy refcounts require a qcow2 image with at least "
@@ -746,11 +844,23 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
    if (s->l2_table_cache) {
        qcow2_cache_destroy(bs, s->l2_table_cache);
    }
+    if (s->refcount_block_cache) {
+        qcow2_cache_destroy(bs, s->refcount_block_cache);
+    }
    g_free(s->cluster_cache);
    qemu_vfree(s->cluster_data);
    return ret;
 }

+static int qcow2_refresh_limits(BlockDriverState *bs)
+{
+    BDRVQcowState *s = bs->opaque;
+
+    bs->bl.write_zeroes_alignment = s->cluster_sectors;
+
+    return 0;
+}
+
 static int qcow2_set_key(BlockDriverState *bs, const char *key)
 {
    BDRVQcowState *s = bs->opaque;
@@ -793,11 +903,25 @@ static int qcow2_set_key(BlockDriverState *bs, const char *key)
    return 0;
 }

-/* We have nothing to do for QCOW2 reopen, stubs just return
- * success */
+/* We have no actual commit/abort logic for qcow2, but we need to write out any
+ * unwritten data if we reopen read-only. */
 static int qcow2_reopen_prepare(BDRVReopenState *state,
                                BlockReopenQueue *queue, Error **errp)
 {
+    int ret;
+
+    if ((state->flags & BDRV_O_RDWR) == 0) {
+        ret = bdrv_flush(state->bs);
+        if (ret < 0) {
+            return ret;
+        }
+
+        ret = qcow2_mark_clean(state->bs);
+        if (ret < 0) {
+            return ret;
+        }
+    }
+
    return 0;
 }

@@ -992,7 +1116,6 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
 {
    BDRVQcowState *s = bs->opaque;
    int index_in_cluster;
-    int n_end;
    int ret;
    int cur_nr_sectors; /* number of sectors in current iteration */
    uint64_t cluster_offset;
@@ -1016,14 +1139,16 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,

        trace_qcow2_writev_start_part(qemu_coroutine_self());
        index_in_cluster = sector_num & (s->cluster_sectors - 1);
-        n_end = index_in_cluster + remaining_sectors;
+        cur_nr_sectors = remaining_sectors;
        if (s->crypt_method &&
-            n_end > QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors) {
-            n_end = QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors;
+            cur_nr_sectors >
+            QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors - index_in_cluster) {
+            cur_nr_sectors =
+                QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors - index_in_cluster;
        }

        ret = qcow2_alloc_cluster_offset(bs, sector_num << 9,
-            index_in_cluster, n_end, &cur_nr_sectors, &cluster_offset, &l2meta);
+            &cur_nr_sectors, &cluster_offset, &l2meta);
        if (ret < 0) {
            goto fail;
        }
@@ -1128,10 +1253,12 @@ static void qcow2_close(BlockDriverState *bs)
    /* else pre-write overlap checks in cache_destroy may crash */
    s->l1_table = NULL;

-    qcow2_cache_flush(bs, s->l2_table_cache);
-    qcow2_cache_flush(bs, s->refcount_block_cache);
+    if (!(bs->open_flags & BDRV_O_INCOMING)) {
+        qcow2_cache_flush(bs, s->l2_table_cache);
+        qcow2_cache_flush(bs, s->refcount_block_cache);

-    qcow2_mark_clean(bs);
+        qcow2_mark_clean(bs);
+    }

    qcow2_cache_destroy(bs, s->l2_table_cache);
    qcow2_cache_destroy(bs, s->refcount_block_cache);
@@ -1145,7 +1272,7 @@ static void qcow2_close(BlockDriverState *bs)
    qcow2_free_snapshots(bs);
 }

-static void qcow2_invalidate_cache(BlockDriverState *bs)
+static void qcow2_invalidate_cache(BlockDriverState *bs, Error **errp)
 {
    BDRVQcowState *s = bs->opaque;
    int flags = s->flags;
@@ -1153,6 +1280,8 @@ static void qcow2_invalidate_cache(BlockDriverState *bs)
    AES_KEY aes_decrypt_key;
    uint32_t crypt_method = 0;
    QDict *options;
+    Error *local_err = NULL;
+    int ret;

    /*
     * Backing files are read-only which makes all of their metadata immutable,
@@ -1167,14 +1296,26 @@ static void qcow2_invalidate_cache(BlockDriverState *bs)

    qcow2_close(bs);

-    options = qdict_new();
-    qdict_put(options, QCOW2_OPT_LAZY_REFCOUNTS,
-              qbool_from_int(s->use_lazy_refcounts));
+    bdrv_invalidate_cache(bs->file, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }

    memset(s, 0, sizeof(BDRVQcowState));
-    qcow2_open(bs, options, flags, NULL);
+    options = qdict_clone_shallow(bs->options);

+    ret = qcow2_open(bs, options, flags, &local_err);
    QDECREF(options);
+    if (local_err) {
+        error_setg(errp, "Could not reopen qcow2 layer: %s",
+                   error_get_pretty(local_err));
+        error_free(local_err);
+        return;
+    } else if (ret < 0) {
+        error_setg_errno(errp, -ret, "Could not reopen qcow2 layer");
+        return;
+    }

    if (crypt_method) {
        s->crypt_method = crypt_method;
@@ -1395,34 +1536,39 @@ static int preallocate(BlockDriverState *bs)
    int ret;
    QCowL2Meta *meta;

-    nb_sectors = bdrv_getlength(bs) >> 9;
+    nb_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
    offset = 0;

    while (nb_sectors) {
-        num = MIN(nb_sectors, INT_MAX >> 9);
-        ret = qcow2_alloc_cluster_offset(bs, offset, 0, num, &num,
+        num = MIN(nb_sectors, INT_MAX >> BDRV_SECTOR_BITS);
+        ret = qcow2_alloc_cluster_offset(bs, offset, &num,
                                         &host_offset, &meta);
        if (ret < 0) {
            return ret;
        }

-        ret = qcow2_alloc_cluster_link_l2(bs, meta);
-        if (ret < 0) {
-            qcow2_free_any_clusters(bs, meta->alloc_offset, meta->nb_clusters,
-                                    QCOW2_DISCARD_NEVER);
-            return ret;
-        }
+        while (meta) {
+            QCowL2Meta *next = meta->next;

-        /* There are no dependent requests, but we need to remove our request
-         * from the list of in-flight requests */
-        if (meta != NULL) {
+            ret = qcow2_alloc_cluster_link_l2(bs, meta);
+            if (ret < 0) {
+                qcow2_free_any_clusters(bs, meta->alloc_offset,
+                                        meta->nb_clusters, QCOW2_DISCARD_NEVER);
+                return ret;
+            }
+
+            /* There are no dependent requests, but we need to remove our
+             * request from the list of in-flight requests */
            QLIST_REMOVE(meta, next_in_flight);
+
+            g_free(meta);
+            meta = next;
        }

        /* TODO Preallocate data if requested */

        nb_sectors -= num;
-        offset += num << 9;
+        offset += num << BDRV_SECTOR_BITS;
    }

    /*
@@ -1431,9 +1577,10 @@ static int preallocate(BlockDriverState *bs)
     * EOF). Extend the image to the last allocated sector.
     */
    if (host_offset != 0) {
-        uint8_t buf[512];
-        memset(buf, 0, 512);
-        ret = bdrv_write(bs->file, (host_offset >> 9) + num - 1, buf, 1);
+        uint8_t buf[BDRV_SECTOR_SIZE];
+        memset(buf, 0, BDRV_SECTOR_SIZE);
+        ret = bdrv_write(bs->file, (host_offset >> BDRV_SECTOR_BITS) + num - 1,
+                         buf, 1);
        if (ret < 0) {
            return ret;
        }
@@ -1473,7 +1620,7 @@ static int qcow2_create2(const char *filename, int64_t total_size,
     */
    BlockDriverState* bs;
    QCowHeader *header;
-    uint8_t* refcount_table;
+    uint64_t* refcount_table;
    Error *local_err = NULL;
    int ret;

@@ -1483,7 +1630,9 @@ static int qcow2_create2(const char *filename, int64_t total_size,
        return ret;
    }

-    ret = bdrv_file_open(&bs, filename, NULL, BDRV_O_RDWR, &local_err);
+    bs = NULL;
+    ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL,
+                    NULL, &local_err);
    if (ret < 0) {
        error_propagate(errp, local_err);
        return ret;
@@ -1523,9 +1672,10 @@ static int qcow2_create2(const char *filename, int64_t total_size,
        goto out;
    }

-    /* Write an empty refcount table */
-    refcount_table = g_malloc0(cluster_size);
-    ret = bdrv_pwrite(bs, cluster_size, refcount_table, cluster_size);
+    /* Write a refcount table with one refcount block */
+    refcount_table = g_malloc0(2 * cluster_size);
+    refcount_table[0] = cpu_to_be64(2 * cluster_size);
+    ret = bdrv_pwrite(bs, cluster_size, refcount_table, 2 * cluster_size);
    g_free(refcount_table);

    if (ret < 0) {
@@ -1533,7 +1683,8 @@ static int qcow2_create2(const char *filename, int64_t total_size,
        goto out;
    }

-    bdrv_close(bs);
+    bdrv_unref(bs);
+    bs = NULL;

    /*
     * And now open the image and make it consistent first (i.e. increase the
@@ -1542,14 +1693,14 @@ static int qcow2_create2(const char *filename, int64_t total_size,
     */
    BlockDriver* drv = bdrv_find_format("qcow2");
    assert(drv != NULL);
-    ret = bdrv_open(bs, filename, NULL,
+    ret = bdrv_open(&bs, filename, NULL, NULL,
        BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_NO_FLUSH, drv, &local_err);
    if (ret < 0) {
        error_propagate(errp, local_err);
        goto out;
    }

-    ret = qcow2_alloc_clusters(bs, 2 * cluster_size);
+    ret = qcow2_alloc_clusters(bs, 3 * cluster_size);
    if (ret < 0) {
        error_setg_errno(errp, -ret, "Could not allocate clusters for qcow2 "
                         "header and refcount table");
@@ -1589,20 +1740,23 @@ static int qcow2_create2(const char *filename, int64_t total_size,
        }
    }

-    bdrv_close(bs);
+    bdrv_unref(bs);
+    bs = NULL;

    /* Reopen the image without BDRV_O_NO_FLUSH to flush it before returning */
-    ret = bdrv_open(bs, filename, NULL,
+    ret = bdrv_open(&bs, filename, NULL, NULL,
                    BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_NO_BACKING,
                    drv, &local_err);
-    if (error_is_set(&local_err)) {
+    if (local_err) {
        error_propagate(errp, local_err);
        goto out;
    }

    ret = 0;
 out:
-    bdrv_unref(bs);
+    if (bs) {
+        bdrv_unref(bs);
+    }
    return ret;
 }

@@ -1675,32 +1829,12 @@ static int qcow2_create(const char *filename, QEMUOptionParameter *options,

    ret = qcow2_create2(filename, sectors, backing_file, backing_fmt, flags,
                        cluster_size, prealloc, options, version, &local_err);
-    if (error_is_set(&local_err)) {
+    if (local_err) {
        error_propagate(errp, local_err);
    }
    return ret;
 }

-static int qcow2_make_empty(BlockDriverState *bs)
-{
-#if 0
-    /* XXX: not correct */
-    BDRVQcowState *s = bs->opaque;
-    uint32_t l1_length = s->l1_size * sizeof(uint64_t);
-    int ret;
-
-    memset(s->l1_table, 0, l1_length);
-    if (bdrv_pwrite(bs->file, s->l1_table_offset, s->l1_table, l1_length) < 0)
-        return -1;
-    ret = bdrv_truncate(bs->file, s->l1_table_offset + l1_length);
-    if (ret < 0)
-        return ret;
-
-    l2_cache_reset(bs);
-#endif
-    return 0;
-}
-
 static coroutine_fn int qcow2_co_write_zeroes(BlockDriverState *bs,
    int64_t sector_num, int nb_sectors, BdrvRequestFlags flags)
 {
@@ -2244,7 +2378,6 @@ static BlockDriver bdrv_qcow2 = {
    .bdrv_has_zero_init = bdrv_has_zero_init_1,
    .bdrv_co_get_block_status = qcow2_co_get_block_status,
    .bdrv_set_key       = qcow2_set_key,
-    .bdrv_make_empty    = qcow2_make_empty,

    .bdrv_co_readv          = qcow2_co_readv,
    .bdrv_co_writev         = qcow2_co_writev,
@@ -2268,6 +2401,7 @@ static BlockDriver bdrv_qcow2 = {

    .bdrv_change_backing_file   = qcow2_change_backing_file,

+    .bdrv_refresh_limits        = qcow2_refresh_limits,
    .bdrv_invalidate_cache      = qcow2_invalidate_cache,

    .create_options = qcow2_create_options,
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -38,6 +38,19 @@
 #define QCOW_CRYPT_AES  1

 #define QCOW_MAX_CRYPT_CLUSTERS 32
+#define QCOW_MAX_SNAPSHOTS 65536
+
+/* 8 MB refcount table is enough for 2 PB images at 64k cluster size
+ * (128 GB for 512 byte clusters, 2 EB for 2 MB clusters) */
+#define QCOW_MAX_REFTABLE_SIZE 0x800000
+
+/* 32 MB L1 table is enough for 2 PB images at 64k cluster size
+ * (128 GB for 512 byte clusters, 2 EB for 2 MB clusters) */
+#define QCOW_MAX_L1_SIZE 0x2000000
+
+/* Allow for an average of 1k per snapshot table entry, should be plenty of
+ * space for snapshot names and IDs */
+#define QCOW_MAX_SNAPSHOTS_SIZE (1024 * QCOW_MAX_SNAPSHOTS)

 /* indicate that the refcount of the referenced cluster is exactly one. */
 #define QCOW_OFLAG_COPIED     (1ULL << 63)
@@ -97,6 +110,32 @@ typedef struct QCowHeader {
    uint32_t header_length;
 } QEMU_PACKED QCowHeader;

+typedef struct QEMU_PACKED QCowSnapshotHeader {
+    /* header is 8 byte aligned */
+    uint64_t l1_table_offset;
+
+    uint32_t l1_size;
+    uint16_t id_str_size;
+    uint16_t name_size;
+
+    uint32_t date_sec;
+    uint32_t date_nsec;
+
+    uint64_t vm_clock_nsec;
+
+    uint32_t vm_state_size;
+    uint32_t extra_data_size; /* for extension */
+    /* extra data follows */
+    /* id_str follows */
+    /* name follows  */
+} QCowSnapshotHeader;
+
+typedef struct QEMU_PACKED QCowSnapshotExtraData {
+    uint64_t vm_state_size_large;
+    uint64_t disk_size;
+} QCowSnapshotExtraData;
+
+
 typedef struct QCowSnapshot {
    uint64_t l1_table_offset;
    uint32_t l1_size;
@@ -191,8 +230,8 @@ typedef struct BDRVQcowState {
    uint64_t *refcount_table;
    uint64_t refcount_table_offset;
    uint32_t refcount_table_size;
-    int64_t free_cluster_index;
-    int64_t free_byte_offset;
+    uint64_t free_cluster_index;
+    uint64_t free_byte_offset;

    CoMutex lock;

@@ -202,7 +241,7 @@ typedef struct BDRVQcowState {
    AES_KEY aes_decrypt_key;
    uint64_t snapshots_offset;
    int snapshots_size;
-    int nb_snapshots;
+    unsigned int nb_snapshots;
    QCowSnapshot *snapshots;

    int flags;
@@ -340,11 +379,11 @@ typedef enum QCow2MetadataOverlap {
 #define QCOW2_OL_ALL \
    (QCOW2_OL_CACHED | QCOW2_OL_INACTIVE_L2)

-#define L1E_OFFSET_MASK 0x00ffffffffffff00ULL
-#define L2E_OFFSET_MASK 0x00ffffffffffff00ULL
+#define L1E_OFFSET_MASK 0x00fffffffffffe00ULL
+#define L2E_OFFSET_MASK 0x00fffffffffffe00ULL
 #define L2E_COMPRESSED_OFFSET_SIZE_MASK 0x3fffffffffffffffULL

-#define REFT_OFFSET_MASK 0xffffffffffffff00ULL
+#define REFT_OFFSET_MASK 0xfffffffffffffe00ULL

 static inline int64_t start_of_cluster(BDRVQcowState *s, int64_t offset)
 {
@@ -383,6 +422,11 @@ static inline int64_t qcow2_vm_state_offset(BDRVQcowState *s)
    return (int64_t)s->l1_vm_state_index << (s->cluster_bits + s->l2_bits);
 }

+static inline uint64_t qcow2_max_refcount_clusters(BDRVQcowState *s)
+{
+    return QCOW_MAX_REFTABLE_SIZE >> s->cluster_bits;
+}
+
 static inline int qcow2_get_cluster_type(uint64_t l2_entry)
 {
    if (l2_entry & QCOW_OFLAG_COMPRESSED) {
@@ -431,7 +475,7 @@ void qcow2_refcount_close(BlockDriverState *bs);
 int qcow2_update_cluster_refcount(BlockDriverState *bs, int64_t cluster_index,
                                  int addend, enum qcow2_discard_type type);

-int64_t qcow2_alloc_clusters(BlockDriverState *bs, int64_t size);
+int64_t qcow2_alloc_clusters(BlockDriverState *bs, uint64_t size);
 int qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset,
    int nb_clusters);
 int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size);
@@ -468,7 +512,7 @@ void qcow2_encrypt_sectors(BDRVQcowState *s, int64_t sector_num,
 int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
    int *num, uint64_t *cluster_offset);
 int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset,
-    int n_start, int n_end, int *num, uint64_t *host_offset, QCowL2Meta **m);
+    int *num, uint64_t *host_offset, QCowL2Meta **m);
 uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
                                         uint64_t offset,
                                         int compressed_size);
--- a/block/qed.c
+++ b/block/qed.c
@@ -391,14 +391,15 @@ static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
    qed_header_le_to_cpu(&le_header, &s->header);

    if (s->header.magic != QED_MAGIC) {
-        return -EMEDIUMTYPE;
+        error_setg(errp, "Image not in QED format");
+        return -EINVAL;
    }
    if (s->header.features & ~QED_FEATURE_MASK) {
        /* image uses unsupported feature bits */
        char buf[64];
        snprintf(buf, sizeof(buf), "%" PRIx64,
            s->header.features & ~QED_FEATURE_MASK);
-        qerror_report(QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
+        error_set(errp, QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
            bs->device_name, "QED", buf);
        return -ENOTSUP;
    }
@@ -495,7 +496,6 @@ static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
        }
    }

-    bs->bl.write_zeroes_alignment = s->header.cluster_size >> BDRV_SECTOR_BITS;
    s->need_check_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
                                            qed_need_check_timer_cb, s);

@@ -507,6 +507,15 @@ out:
    return ret;
 }

+static int bdrv_qed_refresh_limits(BlockDriverState *bs)
+{
+    BDRVQEDState *s = bs->opaque;
+
+    bs->bl.write_zeroes_alignment = s->header.cluster_size >> BDRV_SECTOR_BITS;
+
+    return 0;
+}
+
 /* We have nothing to do for QED reopen, stubs just return
 * success */
 static int bdrv_qed_reopen_prepare(BDRVReopenState *state,
@@ -537,7 +546,8 @@ static void bdrv_qed_close(BlockDriverState *bs)

 static int qed_create(const char *filename, uint32_t cluster_size,
                      uint64_t image_size, uint32_t table_size,
-                      const char *backing_file, const char *backing_fmt)
+                      const char *backing_file, const char *backing_fmt,
+                      Error **errp)
 {
    QEDHeader header = {
        .magic = QED_MAGIC,
@@ -554,20 +564,20 @@ static int qed_create(const char *filename, uint32_t cluster_size,
    size_t l1_size = header.cluster_size * header.table_size;
    Error *local_err = NULL;
    int ret = 0;
-    BlockDriverState *bs = NULL;
+    BlockDriverState *bs;

    ret = bdrv_create_file(filename, NULL, &local_err);
    if (ret < 0) {
-        qerror_report_err(local_err);
-        error_free(local_err);
+        error_propagate(errp, local_err);
        return ret;
    }

-    ret = bdrv_file_open(&bs, filename, NULL, BDRV_O_RDWR | BDRV_O_CACHE_WB,
-                         &local_err);
+    bs = NULL;
+    ret = bdrv_open(&bs, filename, NULL, NULL,
+                    BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_PROTOCOL, NULL,
+                    &local_err);
    if (ret < 0) {
-        qerror_report_err(local_err);
-        error_free(local_err);
+        error_propagate(errp, local_err);
        return ret;
    }

@@ -657,7 +667,7 @@ static int bdrv_qed_create(const char *filename, QEMUOptionParameter *options,
    }

    return qed_create(filename, cluster_size, image_size, table_size,
-                      backing_file, backing_fmt);
+                      backing_file, backing_fmt, errp);
 }

 typedef struct {
@@ -723,11 +733,6 @@ static int64_t coroutine_fn bdrv_qed_co_get_block_status(BlockDriverState *bs,
    return cb.status;
 }

-static int bdrv_qed_make_empty(BlockDriverState *bs)
-{
-    return -ENOTSUP;
-}
-
 static BDRVQEDState *acb_to_s(QEDAIOCB *acb)
 {
    return acb->common.bs->opaque;
@@ -1553,13 +1558,31 @@ static int bdrv_qed_change_backing_file(BlockDriverState *bs,
    return ret;
 }

-static void bdrv_qed_invalidate_cache(BlockDriverState *bs)
+static void bdrv_qed_invalidate_cache(BlockDriverState *bs, Error **errp)
 {
    BDRVQEDState *s = bs->opaque;
+    Error *local_err = NULL;
+    int ret;

    bdrv_qed_close(bs);
+
+    bdrv_invalidate_cache(bs->file, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+
    memset(s, 0, sizeof(BDRVQEDState));
-    bdrv_qed_open(bs, NULL, bs->open_flags, NULL);
+    ret = bdrv_qed_open(bs, NULL, bs->open_flags, &local_err);
+    if (local_err) {
+        error_setg(errp, "Could not reopen qed layer: %s",
+                   error_get_pretty(local_err));
+        error_free(local_err);
+        return;
+    } else if (ret < 0) {
+        error_setg_errno(errp, -ret, "Could not reopen qed layer");
+        return;
+    }
 }

 static int bdrv_qed_check(BlockDriverState *bs, BdrvCheckResult *result,
@@ -1609,13 +1632,13 @@ static BlockDriver bdrv_qed = {
    .bdrv_create              = bdrv_qed_create,
    .bdrv_has_zero_init       = bdrv_has_zero_init_1,
    .bdrv_co_get_block_status = bdrv_qed_co_get_block_status,
-    .bdrv_make_empty          = bdrv_qed_make_empty,
    .bdrv_aio_readv           = bdrv_qed_aio_readv,
    .bdrv_aio_writev          = bdrv_qed_aio_writev,
    .bdrv_co_write_zeroes     = bdrv_qed_co_write_zeroes,
    .bdrv_truncate            = bdrv_qed_truncate,
    .bdrv_getlength           = bdrv_qed_getlength,
    .bdrv_get_info            = bdrv_qed_get_info,
+    .bdrv_refresh_limits      = bdrv_qed_refresh_limits,
    .bdrv_change_backing_file = bdrv_qed_change_backing_file,
    .bdrv_invalidate_cache    = bdrv_qed_invalidate_cache,
    .bdrv_check               = bdrv_qed_check,
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -0,0 +1,877 @@
+/*
+ * Quorum Block filter
+ *
+ * Copyright (C) 2012-2014 Nodalink, EURL.
+ *
+ * Author:
+ *   Benoît Canet <benoit.canet@irqsave.net>
+ *
+ * Based on the design and code of blkverify.c (Copyright (C) 2010 IBM, Corp)
+ * and blkmirror.c (Copyright (C) 2011 Red Hat, Inc).
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include <gnutls/gnutls.h>
+#include <gnutls/crypto.h>
+#include "block/block_int.h"
+#include "qapi/qmp/qjson.h"
+
+#define HASH_LENGTH 32
+
+#define QUORUM_OPT_VOTE_THRESHOLD "vote-threshold"
+#define QUORUM_OPT_BLKVERIFY      "blkverify"
+
+/* This union holds a vote hash value */
+typedef union QuorumVoteValue {
+    char h[HASH_LENGTH];       /* SHA-256 hash */
+    int64_t l;                 /* simpler 64 bits hash */
+} QuorumVoteValue;
+
+/* A vote item */
+typedef struct QuorumVoteItem {
+    int index;
+    QLIST_ENTRY(QuorumVoteItem) next;
+} QuorumVoteItem;
+
+/* this structure is a vote version. A version is the set of votes sharing the
+ * same vote value.
+ * The set of votes will be tracked with the items field and its cardinality is
+ * vote_count.
+ */
+typedef struct QuorumVoteVersion {
+    QuorumVoteValue value;
+    int index;
+    int vote_count;
+    QLIST_HEAD(, QuorumVoteItem) items;
+    QLIST_ENTRY(QuorumVoteVersion) next;
+} QuorumVoteVersion;
+
+/* this structure holds a group of vote versions together */
+typedef struct QuorumVotes {
+    QLIST_HEAD(, QuorumVoteVersion) vote_list;
+    bool (*compare)(QuorumVoteValue *a, QuorumVoteValue *b);
+} QuorumVotes;
+
+/* the following structure holds the state of one quorum instance */
+typedef struct BDRVQuorumState {
+    BlockDriverState **bs; /* children BlockDriverStates */
+    int num_children;      /* children count */
+    int threshold;         /* if less than threshold children reads gave the
+                            * same result a quorum error occurs.
+                            */
+    bool is_blkverify;     /* true if the driver is in blkverify mode
+                            * Writes are mirrored on two children devices.
+                            * On reads the two children devices' contents are
+                            * compared and if a difference is spotted its
+                            * location is printed and the code aborts.
+                            * It is useful to debug other block drivers by
+                            * comparing them with a reference one.
+                            */
+} BDRVQuorumState;
+
+typedef struct QuorumAIOCB QuorumAIOCB;
+
+/* Quorum will create one instance of the following structure per operation it
+ * performs on its children.
+ * So for each read/write operation coming from the upper layer there will be
+ * $children_count QuorumChildRequest.
+ */
+typedef struct QuorumChildRequest {
+    BlockDriverAIOCB *aiocb;
+    QEMUIOVector qiov;
+    uint8_t *buf;
+    int ret;
+    QuorumAIOCB *parent;
+} QuorumChildRequest;
+
+/* Quorum will use the following structure to track progress of each read/write
+ * operation received by the upper layer.
+ * This structure hold pointers to the QuorumChildRequest structures instances
+ * used to do operations on each children and track overall progress.
+ */
+struct QuorumAIOCB {
+    BlockDriverAIOCB common;
+
+    /* Request metadata */
+    uint64_t sector_num;
+    int nb_sectors;
+
+    QEMUIOVector *qiov;         /* calling IOV */
+
+    QuorumChildRequest *qcrs;   /* individual child requests */
+    int count;                  /* number of completed AIOCB */
+    int success_count;          /* number of successfully completed AIOCB */
+
+    QuorumVotes votes;
+
+    bool is_read;
+    int vote_ret;
+};
+
+static void quorum_vote(QuorumAIOCB *acb);
+
+static void quorum_aio_cancel(BlockDriverAIOCB *blockacb)
+{
+    QuorumAIOCB *acb = container_of(blockacb, QuorumAIOCB, common);
+    BDRVQuorumState *s = acb->common.bs->opaque;
+    int i;
+
+    /* cancel all callbacks */
+    for (i = 0; i < s->num_children; i++) {
+        bdrv_aio_cancel(acb->qcrs[i].aiocb);
+    }
+
+    g_free(acb->qcrs);
+    qemu_aio_release(acb);
+}
+
+static AIOCBInfo quorum_aiocb_info = {
+    .aiocb_size         = sizeof(QuorumAIOCB),
+    .cancel             = quorum_aio_cancel,
+};
+
+static void quorum_aio_finalize(QuorumAIOCB *acb)
+{
+    BDRVQuorumState *s = acb->common.bs->opaque;
+    int i, ret = 0;
+
+    if (acb->vote_ret) {
+        ret = acb->vote_ret;
+    }
+
+    acb->common.cb(acb->common.opaque, ret);
+
+    if (acb->is_read) {
+        for (i = 0; i < s->num_children; i++) {
+            qemu_vfree(acb->qcrs[i].buf);
+            qemu_iovec_destroy(&acb->qcrs[i].qiov);
+        }
+    }
+
+    g_free(acb->qcrs);
+    qemu_aio_release(acb);
+}
+
+static bool quorum_sha256_compare(QuorumVoteValue *a, QuorumVoteValue *b)
+{
+    return !memcmp(a->h, b->h, HASH_LENGTH);
+}
+
+static bool quorum_64bits_compare(QuorumVoteValue *a, QuorumVoteValue *b)
+{
+    return a->l == b->l;
+}
+
+static QuorumAIOCB *quorum_aio_get(BDRVQuorumState *s,
+                                   BlockDriverState *bs,
+                                   QEMUIOVector *qiov,
+                                   uint64_t sector_num,
+                                   int nb_sectors,
+                                   BlockDriverCompletionFunc *cb,
+                                   void *opaque)
+{
+    QuorumAIOCB *acb = qemu_aio_get(&quorum_aiocb_info, bs, cb, opaque);
+    int i;
+
+    acb->common.bs->opaque = s;
+    acb->sector_num = sector_num;
+    acb->nb_sectors = nb_sectors;
+    acb->qiov = qiov;
+    acb->qcrs = g_new0(QuorumChildRequest, s->num_children);
+    acb->count = 0;
+    acb->success_count = 0;
+    acb->votes.compare = quorum_sha256_compare;
+    QLIST_INIT(&acb->votes.vote_list);
+    acb->is_read = false;
+    acb->vote_ret = 0;
+
+    for (i = 0; i < s->num_children; i++) {
+        acb->qcrs[i].buf = NULL;
+        acb->qcrs[i].ret = 0;
+        acb->qcrs[i].parent = acb;
+    }
+
+    return acb;
+}
+
+static void quorum_report_bad(QuorumAIOCB *acb, char *node_name, int ret)
+{
+    QObject *data;
+    assert(node_name);
+    data = qobject_from_jsonf("{ 'node-name': %s"
+                              ", 'sector-num': %" PRId64
+                              ", 'sectors-count': %d }",
+                              node_name, acb->sector_num, acb->nb_sectors);
+    if (ret < 0) {
+        QDict *dict = qobject_to_qdict(data);
+        qdict_put(dict, "error", qstring_from_str(strerror(-ret)));
+    }
+    monitor_protocol_event(QEVENT_QUORUM_REPORT_BAD, data);
+    qobject_decref(data);
+}
+
+static void quorum_report_failure(QuorumAIOCB *acb)
+{
+    QObject *data;
+    const char *reference = acb->common.bs->device_name[0] ?
+                            acb->common.bs->device_name :
+                            acb->common.bs->node_name;
+    data = qobject_from_jsonf("{ 'reference': %s"
+                              ", 'sector-num': %" PRId64
+                              ", 'sectors-count': %d }",
+                              reference, acb->sector_num, acb->nb_sectors);
+    monitor_protocol_event(QEVENT_QUORUM_FAILURE, data);
+    qobject_decref(data);
+}
+
+static int quorum_vote_error(QuorumAIOCB *acb);
+
+static bool quorum_has_too_much_io_failed(QuorumAIOCB *acb)
+{
+    BDRVQuorumState *s = acb->common.bs->opaque;
+
+    if (acb->success_count < s->threshold) {
+        acb->vote_ret = quorum_vote_error(acb);
+        quorum_report_failure(acb);
+        return true;
+    }
+
+    return false;
+}
+
+static void quorum_aio_cb(void *opaque, int ret)
+{
+    QuorumChildRequest *sacb = opaque;
+    QuorumAIOCB *acb = sacb->parent;
+    BDRVQuorumState *s = acb->common.bs->opaque;
+
+    sacb->ret = ret;
+    acb->count++;
+    if (ret == 0) {
+        acb->success_count++;
+    } else {
+        quorum_report_bad(acb, sacb->aiocb->bs->node_name, ret);
+    }
+    assert(acb->count <= s->num_children);
+    assert(acb->success_count <= s->num_children);
+    if (acb->count < s->num_children) {
+        return;
+    }
+
+    /* Do the vote on read */
+    if (acb->is_read) {
+        quorum_vote(acb);
+    } else {
+        quorum_has_too_much_io_failed(acb);
+    }
+
+    quorum_aio_finalize(acb);
+}
+
+static void quorum_report_bad_versions(BDRVQuorumState *s,
+                                       QuorumAIOCB *acb,
+                                       QuorumVoteValue *value)
+{
+    QuorumVoteVersion *version;
+    QuorumVoteItem *item;
+
+    QLIST_FOREACH(version, &acb->votes.vote_list, next) {
+        if (acb->votes.compare(&version->value, value)) {
+            continue;
+        }
+        QLIST_FOREACH(item, &version->items, next) {
+            quorum_report_bad(acb, s->bs[item->index]->node_name, 0);
+        }
+    }
+}
+
+static void quorum_copy_qiov(QEMUIOVector *dest, QEMUIOVector *source)
+{
+    int i;
+    assert(dest->niov == source->niov);
+    assert(dest->size == source->size);
+    for (i = 0; i < source->niov; i++) {
+        assert(dest->iov[i].iov_len == source->iov[i].iov_len);
+        memcpy(dest->iov[i].iov_base,
+               source->iov[i].iov_base,
+               source->iov[i].iov_len);
+    }
+}
+
+static void quorum_count_vote(QuorumVotes *votes,
+                              QuorumVoteValue *value,
+                              int index)
+{
+    QuorumVoteVersion *v = NULL, *version = NULL;
+    QuorumVoteItem *item;
+
+    /* look if we have something with this hash */
+    QLIST_FOREACH(v, &votes->vote_list, next) {
+        if (votes->compare(&v->value, value)) {
+            version = v;
+            break;
+        }
+    }
+
+    /* It's a version not yet in the list add it */
+    if (!version) {
+        version = g_new0(QuorumVoteVersion, 1);
+        QLIST_INIT(&version->items);
+        memcpy(&version->value, value, sizeof(version->value));
+        version->index = index;
+        version->vote_count = 0;
+        QLIST_INSERT_HEAD(&votes->vote_list, version, next);
+    }
+
+    version->vote_count++;
+
+    item = g_new0(QuorumVoteItem, 1);
+    item->index = index;
+    QLIST_INSERT_HEAD(&version->items, item, next);
+}
+
+static void quorum_free_vote_list(QuorumVotes *votes)
+{
+    QuorumVoteVersion *version, *next_version;
+    QuorumVoteItem *item, *next_item;
+
+    QLIST_FOREACH_SAFE(version, &votes->vote_list, next, next_version) {
+        QLIST_REMOVE(version, next);
+        QLIST_FOREACH_SAFE(item, &version->items, next, next_item) {
+            QLIST_REMOVE(item, next);
+            g_free(item);
+        }
+        g_free(version);
+    }
+}
+
+static int quorum_compute_hash(QuorumAIOCB *acb, int i, QuorumVoteValue *hash)
+{
+    int j, ret;
+    gnutls_hash_hd_t dig;
+    QEMUIOVector *qiov = &acb->qcrs[i].qiov;
+
+    ret = gnutls_hash_init(&dig, GNUTLS_DIG_SHA256);
+
+    if (ret < 0) {
+        return ret;
+    }
+
+    for (j = 0; j < qiov->niov; j++) {
+        ret = gnutls_hash(dig, qiov->iov[j].iov_base, qiov->iov[j].iov_len);
+        if (ret < 0) {
+            break;
+        }
+    }
+
+    gnutls_hash_deinit(dig, (void *) hash);
+    return ret;
+}
+
+static QuorumVoteVersion *quorum_get_vote_winner(QuorumVotes *votes)
+{
+    int max = 0;
+    QuorumVoteVersion *candidate, *winner = NULL;
+
+    QLIST_FOREACH(candidate, &votes->vote_list, next) {
+        if (candidate->vote_count > max) {
+            max = candidate->vote_count;
+            winner = candidate;
+        }
+    }
+
+    return winner;
+}
+
+/* qemu_iovec_compare is handy for blkverify mode because it returns the first
+ * differing byte location. Yet it is handcoded to compare vectors one byte
+ * after another so it does not benefit from the libc SIMD optimizations.
+ * quorum_iovec_compare is written for speed and should be used in the non
+ * blkverify mode of quorum.
+ */
+static bool quorum_iovec_compare(QEMUIOVector *a, QEMUIOVector *b)
+{
+    int i;
+    int result;
+
+    assert(a->niov == b->niov);
+    for (i = 0; i < a->niov; i++) {
+        assert(a->iov[i].iov_len == b->iov[i].iov_len);
+        result = memcmp(a->iov[i].iov_base,
+                        b->iov[i].iov_base,
+                        a->iov[i].iov_len);
+        if (result) {
+            return false;
+        }
+    }
+
+    return true;
+}
+
+static void GCC_FMT_ATTR(2, 3) quorum_err(QuorumAIOCB *acb,
+                                          const char *fmt, ...)
+{
+    va_list ap;
+
+    va_start(ap, fmt);
+    fprintf(stderr, "quorum: sector_num=%" PRId64 " nb_sectors=%d ",
+            acb->sector_num, acb->nb_sectors);
+    vfprintf(stderr, fmt, ap);
+    fprintf(stderr, "\n");
+    va_end(ap);
+    exit(1);
+}
+
+static bool quorum_compare(QuorumAIOCB *acb,
+                           QEMUIOVector *a,
+                           QEMUIOVector *b)
+{
+    BDRVQuorumState *s = acb->common.bs->opaque;
+    ssize_t offset;
+
+    /* This driver will replace blkverify in this particular case */
+    if (s->is_blkverify) {
+        offset = qemu_iovec_compare(a, b);
+        if (offset != -1) {
+            quorum_err(acb, "contents mismatch in sector %" PRId64,
+                       acb->sector_num +
+                       (uint64_t)(offset / BDRV_SECTOR_SIZE));
+        }
+        return true;
+    }
+
+    return quorum_iovec_compare(a, b);
+}
+
+/* Do a vote to get the error code */
+static int quorum_vote_error(QuorumAIOCB *acb)
+{
+    BDRVQuorumState *s = acb->common.bs->opaque;
+    QuorumVoteVersion *winner = NULL;
+    QuorumVotes error_votes;
+    QuorumVoteValue result_value;
+    int i, ret = 0;
+    bool error = false;
+
+    QLIST_INIT(&error_votes.vote_list);
+    error_votes.compare = quorum_64bits_compare;
+
+    for (i = 0; i < s->num_children; i++) {
+        ret = acb->qcrs[i].ret;
+        if (ret) {
+            error = true;
+            result_value.l = ret;
+            quorum_count_vote(&error_votes, &result_value, i);
+        }
+    }
+
+    if (error) {
+        winner = quorum_get_vote_winner(&error_votes);
+        ret = winner->value.l;
+    }
+
+    quorum_free_vote_list(&error_votes);
+
+    return ret;
+}
+
+static void quorum_vote(QuorumAIOCB *acb)
+{
+    bool quorum = true;
+    int i, j, ret;
+    QuorumVoteValue hash;
+    BDRVQuorumState *s = acb->common.bs->opaque;
+    QuorumVoteVersion *winner;
+
+    if (quorum_has_too_much_io_failed(acb)) {
+        return;
+    }
+
+    /* get the index of the first successful read */
+    for (i = 0; i < s->num_children; i++) {
+        if (!acb->qcrs[i].ret) {
+            break;
+        }
+    }
+
+    assert(i < s->num_children);
+
+    /* compare this read with all other successful reads stopping at quorum
+     * failure
+     */
+    for (j = i + 1; j < s->num_children; j++) {
+        if (acb->qcrs[j].ret) {
+            continue;
+        }
+        quorum = quorum_compare(acb, &acb->qcrs[i].qiov, &acb->qcrs[j].qiov);
+        if (!quorum) {
+            break;
+       }
+    }
+
+    /* Every successful read agrees */
+    if (quorum) {
+        quorum_copy_qiov(acb->qiov, &acb->qcrs[i].qiov);
+        return;
+    }
+
+    /* compute hashes for each successful read, also store indexes */
+    for (i = 0; i < s->num_children; i++) {
+        if (acb->qcrs[i].ret) {
+            continue;
+        }
+        ret = quorum_compute_hash(acb, i, &hash);
+        /* if ever the hash computation failed */
+        if (ret < 0) {
+            acb->vote_ret = ret;
+            goto free_exit;
+        }
+        quorum_count_vote(&acb->votes, &hash, i);
+    }
+
+    /* vote to select the most represented version */
+    winner = quorum_get_vote_winner(&acb->votes);
+
+    /* if the winner count is smaller than threshold the read fails */
+    if (winner->vote_count < s->threshold) {
+        quorum_report_failure(acb);
+        acb->vote_ret = -EIO;
+        goto free_exit;
+    }
+
+    /* we have a winner: copy it */
+    quorum_copy_qiov(acb->qiov, &acb->qcrs[winner->index].qiov);
+
+    /* some versions are bad print them */
+    quorum_report_bad_versions(s, acb, &winner->value);
+
+free_exit:
+    /* free lists */
+    quorum_free_vote_list(&acb->votes);
+}
+
+static BlockDriverAIOCB *quorum_aio_readv(BlockDriverState *bs,
+                                         int64_t sector_num,
+                                         QEMUIOVector *qiov,
+                                         int nb_sectors,
+                                         BlockDriverCompletionFunc *cb,
+                                         void *opaque)
+{
+    BDRVQuorumState *s = bs->opaque;
+    QuorumAIOCB *acb = quorum_aio_get(s, bs, qiov, sector_num,
+                                      nb_sectors, cb, opaque);
+    int i;
+
+    acb->is_read = true;
+
+    for (i = 0; i < s->num_children; i++) {
+        acb->qcrs[i].buf = qemu_blockalign(s->bs[i], qiov->size);
+        qemu_iovec_init(&acb->qcrs[i].qiov, qiov->niov);
+        qemu_iovec_clone(&acb->qcrs[i].qiov, qiov, acb->qcrs[i].buf);
+    }
+
+    for (i = 0; i < s->num_children; i++) {
+        bdrv_aio_readv(s->bs[i], sector_num, &acb->qcrs[i].qiov, nb_sectors,
+                       quorum_aio_cb, &acb->qcrs[i]);
+    }
+
+    return &acb->common;
+}
+
+static BlockDriverAIOCB *quorum_aio_writev(BlockDriverState *bs,
+                                          int64_t sector_num,
+                                          QEMUIOVector *qiov,
+                                          int nb_sectors,
+                                          BlockDriverCompletionFunc *cb,
+                                          void *opaque)
+{
+    BDRVQuorumState *s = bs->opaque;
+    QuorumAIOCB *acb = quorum_aio_get(s, bs, qiov, sector_num, nb_sectors,
+                                      cb, opaque);
+    int i;
+
+    for (i = 0; i < s->num_children; i++) {
+        acb->qcrs[i].aiocb = bdrv_aio_writev(s->bs[i], sector_num, qiov,
+                                             nb_sectors, &quorum_aio_cb,
+                                             &acb->qcrs[i]);
+    }
+
+    return &acb->common;
+}
+
+static int64_t quorum_getlength(BlockDriverState *bs)
+{
+    BDRVQuorumState *s = bs->opaque;
+    int64_t result;
+    int i;
+
+    /* check that all file have the same length */
+    result = bdrv_getlength(s->bs[0]);
+    if (result < 0) {
+        return result;
+    }
+    for (i = 1; i < s->num_children; i++) {
+        int64_t value = bdrv_getlength(s->bs[i]);
+        if (value < 0) {
+            return value;
+        }
+        if (value != result) {
+            return -EIO;
+        }
+    }
+
+    return result;
+}
+
+static void quorum_invalidate_cache(BlockDriverState *bs, Error **errp)
+{
+    BDRVQuorumState *s = bs->opaque;
+    Error *local_err = NULL;
+    int i;
+
+    for (i = 0; i < s->num_children; i++) {
+        bdrv_invalidate_cache(s->bs[i], &local_err);
+        if (local_err) {
+            error_propagate(errp, local_err);
+            return;
+        }
+    }
+}
+
+static coroutine_fn int quorum_co_flush(BlockDriverState *bs)
+{
+    BDRVQuorumState *s = bs->opaque;
+    QuorumVoteVersion *winner = NULL;
+    QuorumVotes error_votes;
+    QuorumVoteValue result_value;
+    int i;
+    int result = 0;
+
+    QLIST_INIT(&error_votes.vote_list);
+    error_votes.compare = quorum_64bits_compare;
+
+    for (i = 0; i < s->num_children; i++) {
+        result = bdrv_co_flush(s->bs[i]);
+        result_value.l = result;
+        quorum_count_vote(&error_votes, &result_value, i);
+    }
+
+    winner = quorum_get_vote_winner(&error_votes);
+    result = winner->value.l;
+
+    quorum_free_vote_list(&error_votes);
+
+    return result;
+}
+
+static bool quorum_recurse_is_first_non_filter(BlockDriverState *bs,
+                                               BlockDriverState *candidate)
+{
+    BDRVQuorumState *s = bs->opaque;
+    int i;
+
+    for (i = 0; i < s->num_children; i++) {
+        bool perm = bdrv_recurse_is_first_non_filter(s->bs[i],
+                                                     candidate);
+        if (perm) {
+            return true;
+        }
+    }
+
+    return false;
+}
+
+static int quorum_valid_threshold(int threshold, int num_children, Error **errp)
+{
+
+    if (threshold < 1) {
+        error_set(errp, QERR_INVALID_PARAMETER_VALUE,
+                  "vote-threshold", "value >= 1");
+        return -ERANGE;
+    }
+
+    if (threshold > num_children) {
+        error_setg(errp, "threshold may not exceed children count");
+        return -ERANGE;
+    }
+
+    return 0;
+}
+
+static QemuOptsList quorum_runtime_opts = {
+    .name = "quorum",
+    .head = QTAILQ_HEAD_INITIALIZER(quorum_runtime_opts.head),
+    .desc = {
+        {
+            .name = QUORUM_OPT_VOTE_THRESHOLD,
+            .type = QEMU_OPT_NUMBER,
+            .help = "The number of vote needed for reaching quorum",
+        },
+        {
+            .name = QUORUM_OPT_BLKVERIFY,
+            .type = QEMU_OPT_BOOL,
+            .help = "Trigger block verify mode if set",
+        },
+        { /* end of list */ }
+    },
+};
+
+static int quorum_open(BlockDriverState *bs, QDict *options, int flags,
+                       Error **errp)
+{
+    BDRVQuorumState *s = bs->opaque;
+    Error *local_err = NULL;
+    QemuOpts *opts;
+    bool *opened;
+    QDict *sub = NULL;
+    QList *list = NULL;
+    const QListEntry *lentry;
+    int i;
+    int ret = 0;
+
+    qdict_flatten(options);
+    qdict_extract_subqdict(options, &sub, "children.");
+    qdict_array_split(sub, &list);
+
+    if (qdict_size(sub)) {
+        error_setg(&local_err, "Invalid option children.%s",
+                   qdict_first(sub)->key);
+        ret = -EINVAL;
+        goto exit;
+    }
+
+    /* count how many different children are present */
+    s->num_children = qlist_size(list);
+    if (s->num_children < 2) {
+        error_setg(&local_err,
+                   "Number of provided children must be greater than 1");
+        ret = -EINVAL;
+        goto exit;
+    }
+
+    opts = qemu_opts_create(&quorum_runtime_opts, NULL, 0, &error_abort);
+    qemu_opts_absorb_qdict(opts, options, &local_err);
+    if (error_is_set(&local_err)) {
+        ret = -EINVAL;
+        goto exit;
+    }
+
+    s->threshold = qemu_opt_get_number(opts, QUORUM_OPT_VOTE_THRESHOLD, 0);
+
+    /* and validate it against s->num_children */
+    ret = quorum_valid_threshold(s->threshold, s->num_children, &local_err);
+    if (ret < 0) {
+        goto exit;
+    }
+
+    /* is the driver in blkverify mode */
+    if (qemu_opt_get_bool(opts, QUORUM_OPT_BLKVERIFY, false) &&
+        s->num_children == 2 && s->threshold == 2) {
+        s->is_blkverify = true;
+    } else if (qemu_opt_get_bool(opts, QUORUM_OPT_BLKVERIFY, false)) {
+        fprintf(stderr, "blkverify mode is set by setting blkverify=on "
+                "and using two files with vote_threshold=2\n");
+    }
+
+    /* allocate the children BlockDriverState array */
+    s->bs = g_new0(BlockDriverState *, s->num_children);
+    opened = g_new0(bool, s->num_children);
+
+    for (i = 0, lentry = qlist_first(list); lentry;
+         lentry = qlist_next(lentry), i++) {
+        QDict *d;
+        QString *string;
+
+        switch (qobject_type(lentry->value))
+        {
+            /* List of options */
+            case QTYPE_QDICT:
+                d = qobject_to_qdict(lentry->value);
+                QINCREF(d);
+                ret = bdrv_open(&s->bs[i], NULL, NULL, d, flags, NULL,
+                                &local_err);
+                break;
+
+            /* QMP reference */
+            case QTYPE_QSTRING:
+                string = qobject_to_qstring(lentry->value);
+                ret = bdrv_open(&s->bs[i], NULL, qstring_get_str(string), NULL,
+                                flags, NULL, &local_err);
+                break;
+
+            default:
+                error_setg(&local_err, "Specification of child block device %i "
+                           "is invalid", i);
+                ret = -EINVAL;
+        }
+
+        if (ret < 0) {
+            goto close_exit;
+        }
+        opened[i] = true;
+    }
+
+    g_free(opened);
+    goto exit;
+
+close_exit:
+    /* cleanup on error */
+    for (i = 0; i < s->num_children; i++) {
+        if (!opened[i]) {
+            continue;
+        }
+        bdrv_unref(s->bs[i]);
+    }
+    g_free(s->bs);
+    g_free(opened);
+exit:
+    /* propagate error */
+    if (error_is_set(&local_err)) {
+        error_propagate(errp, local_err);
+    }
+    QDECREF(list);
+    QDECREF(sub);
+    return ret;
+}
+
+static void quorum_close(BlockDriverState *bs)
+{
+    BDRVQuorumState *s = bs->opaque;
+    int i;
+
+    for (i = 0; i < s->num_children; i++) {
+        bdrv_unref(s->bs[i]);
+    }
+
+    g_free(s->bs);
+}
+
+static BlockDriver bdrv_quorum = {
+    .format_name        = "quorum",
+    .protocol_name      = "quorum",
+
+    .instance_size      = sizeof(BDRVQuorumState),
+
+    .bdrv_file_open     = quorum_open,
+    .bdrv_close         = quorum_close,
+
+    .bdrv_co_flush_to_disk = quorum_co_flush,
+
+    .bdrv_getlength     = quorum_getlength,
+
+    .bdrv_aio_readv     = quorum_aio_readv,
+    .bdrv_aio_writev    = quorum_aio_writev,
+    .bdrv_invalidate_cache = quorum_invalidate_cache,
+
+    .is_filter           = true,
+    .bdrv_recurse_is_first_non_filter = quorum_recurse_is_first_non_filter,
+};
+
+static void bdrv_quorum_init(void)
+{
+    bdrv_register(&bdrv_quorum);
+}
+
+block_init(bdrv_quorum_init);
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -127,6 +127,8 @@ typedef struct BDRVRawState {
    int fd;
    int type;
    int open_flags;
+    size_t buf_align;
+
 #if defined(__linux__)
    /* linux floppy specific */
    int64_t fd_open_time;
@@ -213,6 +215,76 @@ static int raw_normalize_devicepath(const char **filename)
 }
 #endif

+static void raw_probe_alignment(BlockDriverState *bs)
+{
+    BDRVRawState *s = bs->opaque;
+    char *buf;
+    unsigned int sector_size;
+
+    /* For /dev/sg devices the alignment is not really used.
+       With buffered I/O, we don't have any restrictions. */
+    if (bs->sg || !(s->open_flags & O_DIRECT)) {
+        bs->request_alignment = 1;
+        s->buf_align = 1;
+        return;
+    }
+
+    /* Try a few ioctls to get the right size */
+    bs->request_alignment = 0;
+    s->buf_align = 0;
+
+#ifdef BLKSSZGET
+    if (ioctl(s->fd, BLKSSZGET, &sector_size) >= 0) {
+        bs->request_alignment = sector_size;
+    }
+#endif
+#ifdef DKIOCGETBLOCKSIZE
+    if (ioctl(s->fd, DKIOCGETBLOCKSIZE, &sector_size) >= 0) {
+        bs->request_alignment = sector_size;
+    }
+#endif
+#ifdef DIOCGSECTORSIZE
+    if (ioctl(s->fd, DIOCGSECTORSIZE, &sector_size) >= 0) {
+        bs->request_alignment = sector_size;
+    }
+#endif
+#ifdef CONFIG_XFS
+    if (s->is_xfs) {
+        struct dioattr da;
+        if (xfsctl(NULL, s->fd, XFS_IOC_DIOINFO, &da) >= 0) {
+            bs->request_alignment = da.d_miniosz;
+            /* The kernel returns wrong information for d_mem */
+            /* s->buf_align = da.d_mem; */
+        }
+    }
+#endif
+
+    /* If we could not get the sizes so far, we can only guess them */
+    if (!s->buf_align) {
+        size_t align;
+        buf = qemu_memalign(MAX_BLOCKSIZE, 2 * MAX_BLOCKSIZE);
+        for (align = 512; align <= MAX_BLOCKSIZE; align <<= 1) {
+            if (pread(s->fd, buf + align, MAX_BLOCKSIZE, 0) >= 0) {
+                s->buf_align = align;
+                break;
+            }
+        }
+        qemu_vfree(buf);
+    }
+
+    if (!bs->request_alignment) {
+        size_t align;
+        buf = qemu_memalign(s->buf_align, MAX_BLOCKSIZE);
+        for (align = 512; align <= MAX_BLOCKSIZE; align <<= 1) {
+            if (pread(s->fd, buf, align, 0) >= 0) {
+                bs->request_alignment = align;
+                break;
+            }
+        }
+        qemu_vfree(buf);
+    }
+}
+
 static void raw_parse_flags(int bdrv_flags, int *open_flags)
 {
    assert(open_flags != NULL);
@@ -264,6 +336,17 @@ error:
 }
 #endif

+static void raw_parse_filename(const char *filename, QDict *options,
+                               Error **errp)
+{
+    /* The filename does not have to be prefixed by the protocol name, since
+     * "file" is the default protocol; therefore, the return value of this
+     * function call can be ignored. */
+    strstart(filename, "file:", &filename);
+
+    qdict_put_obj(options, "filename", QOBJECT(qstring_from_str(filename)));
+}
+
 static QemuOptsList raw_runtime_opts = {
    .name = "raw",
    .head = QTAILQ_HEAD_INITIALIZER(raw_runtime_opts.head),
@@ -289,7 +372,7 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,

    opts = qemu_opts_create(&raw_runtime_opts, NULL, 0, &error_abort);
    qemu_opts_absorb_qdict(opts, options, &local_err);
-    if (error_is_set(&local_err)) {
+    if (local_err) {
        error_propagate(errp, local_err);
        ret = -EINVAL;
        goto fail;
@@ -376,7 +459,7 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,

    s->type = FTYPE_FILE;
    ret = raw_open_common(bs, options, flags, 0, &local_err);
-    if (error_is_set(&local_err)) {
+    if (local_err) {
        error_propagate(errp, local_err);
    }
    return ret;
@@ -463,7 +546,6 @@ static int raw_reopen_prepare(BDRVReopenState *state,
    return ret;
 }

-
 static void raw_reopen_commit(BDRVReopenState *state)
 {
    BDRVRawReopenState *raw_s = state->opaque;
@@ -499,23 +581,15 @@ static void raw_reopen_abort(BDRVReopenState *state)
    state->opaque = NULL;
 }

+static int raw_refresh_limits(BlockDriverState *bs)
+{
+    BDRVRawState *s = bs->opaque;

-/* XXX: use host sector size if necessary with:
-#ifdef DIOCGSECTORSIZE
-        {
-            unsigned int sectorsize = 512;
-            if (!ioctl(fd, DIOCGSECTORSIZE, &sectorsize) &&
-                sectorsize > bufsize)
-                bufsize = sectorsize;
-        }
-#endif
-#ifdef CONFIG_COCOA
-        uint32_t blockSize = 512;
-        if ( !ioctl( fd, DKIOCGETBLOCKSIZE, &blockSize ) && blockSize > bufsize) {
-            bufsize = blockSize;
-        }
-#endif
-*/
+    raw_probe_alignment(bs);
+    bs->bl.opt_mem_alignment = s->buf_align;
+
+    return 0;
+}

 static ssize_t handle_aiocb_ioctl(RawPosixAIOData *aiocb)
 {
@@ -1167,6 +1241,8 @@ static int raw_create(const char *filename, QEMUOptionParameter *options,
    int result = 0;
    int64_t total_size = 0;

+    strstart(filename, "file:", &filename);
+
    /* Read out options */
    while (options && options->name) {
        if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
@@ -1349,6 +1425,7 @@ static BlockDriver bdrv_file = {
    .instance_size = sizeof(BDRVRawState),
    .bdrv_needs_filename = true,
    .bdrv_probe = NULL, /* no probe for protocols */
+    .bdrv_parse_filename = raw_parse_filename,
    .bdrv_file_open = raw_open,
    .bdrv_reopen_prepare = raw_reopen_prepare,
    .bdrv_reopen_commit = raw_reopen_commit,
@@ -1363,6 +1440,7 @@ static BlockDriver bdrv_file = {
    .bdrv_aio_writev = raw_aio_writev,
    .bdrv_aio_flush = raw_aio_flush,
    .bdrv_aio_discard = raw_aio_discard,
+    .bdrv_refresh_limits = raw_refresh_limits,

    .bdrv_truncate = raw_truncate,
    .bdrv_getlength = raw_getlength,
@@ -1483,6 +1561,15 @@ static int check_hdev_writable(BDRVRawState *s)
    return 0;
 }

+static void hdev_parse_filename(const char *filename, QDict *options,
+                                Error **errp)
+{
+    /* The prefix is optional, just as for "file". */
+    strstart(filename, "host_device:", &filename);
+
+    qdict_put_obj(options, "filename", QOBJECT(qstring_from_str(filename)));
+}
+
 static int hdev_open(BlockDriverState *bs, QDict *options, int flags,
                     Error **errp)
 {
@@ -1533,7 +1620,7 @@ static int hdev_open(BlockDriverState *bs, QDict *options, int flags,

    ret = raw_open_common(bs, options, flags, 0, &local_err);
    if (ret < 0) {
-        if (error_is_set(&local_err)) {
+        if (local_err) {
            error_propagate(errp, local_err);
        }
        return ret;
@@ -1689,6 +1776,18 @@ static int hdev_create(const char *filename, QEMUOptionParameter *options,
    int ret = 0;
    struct stat stat_buf;
    int64_t total_size = 0;
+    bool has_prefix;
+
+    /* This function is used by all three protocol block drivers and therefore
+     * any of these three prefixes may be given.
+     * The return value has to be stored somewhere, otherwise this is an error
+     * due to -Werror=unused-value. */
+    has_prefix =
+        strstart(filename, "host_device:", &filename) ||
+        strstart(filename, "host_cdrom:" , &filename) ||
+        strstart(filename, "host_floppy:", &filename);
+
+    (void)has_prefix;

    /* Read out options */
    while (options && options->name) {
@@ -1727,6 +1826,7 @@ static BlockDriver bdrv_host_device = {
    .instance_size      = sizeof(BDRVRawState),
    .bdrv_needs_filename = true,
    .bdrv_probe_device  = hdev_probe_device,
+    .bdrv_parse_filename = hdev_parse_filename,
    .bdrv_file_open     = hdev_open,
    .bdrv_close         = raw_close,
    .bdrv_reopen_prepare = raw_reopen_prepare,
@@ -1740,6 +1840,7 @@ static BlockDriver bdrv_host_device = {
    .bdrv_aio_writev	= raw_aio_writev,
    .bdrv_aio_flush	= raw_aio_flush,
    .bdrv_aio_discard   = hdev_aio_discard,
+    .bdrv_refresh_limits = raw_refresh_limits,

    .bdrv_truncate      = raw_truncate,
    .bdrv_getlength	= raw_getlength,
@@ -1755,6 +1856,15 @@ static BlockDriver bdrv_host_device = {
 };

 #ifdef __linux__
+static void floppy_parse_filename(const char *filename, QDict *options,
+                                  Error **errp)
+{
+    /* The prefix is optional, just as for "file". */
+    strstart(filename, "host_floppy:", &filename);
+
+    qdict_put_obj(options, "filename", QOBJECT(qstring_from_str(filename)));
+}
+
 static int floppy_open(BlockDriverState *bs, QDict *options, int flags,
                       Error **errp)
 {
@@ -1767,7 +1877,7 @@ static int floppy_open(BlockDriverState *bs, QDict *options, int flags,
    /* open will not fail even if no floppy is inserted, so add O_NONBLOCK */
    ret = raw_open_common(bs, options, flags, O_NONBLOCK, &local_err);
    if (ret) {
-        if (error_is_set(&local_err)) {
+        if (local_err) {
            error_propagate(errp, local_err);
        }
        return ret;
@@ -1860,6 +1970,7 @@ static BlockDriver bdrv_host_floppy = {
    .instance_size      = sizeof(BDRVRawState),
    .bdrv_needs_filename = true,
    .bdrv_probe_device	= floppy_probe_device,
+    .bdrv_parse_filename = floppy_parse_filename,
    .bdrv_file_open     = floppy_open,
    .bdrv_close         = raw_close,
    .bdrv_reopen_prepare = raw_reopen_prepare,
@@ -1871,6 +1982,7 @@ static BlockDriver bdrv_host_floppy = {
    .bdrv_aio_readv     = raw_aio_readv,
    .bdrv_aio_writev    = raw_aio_writev,
    .bdrv_aio_flush	= raw_aio_flush,
+    .bdrv_refresh_limits = raw_refresh_limits,

    .bdrv_truncate      = raw_truncate,
    .bdrv_getlength      = raw_getlength,
@@ -1883,7 +1995,20 @@ static BlockDriver bdrv_host_floppy = {
    .bdrv_media_changed = floppy_media_changed,
    .bdrv_eject         = floppy_eject,
 };
+#endif

+#if defined(__linux__) || defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
+static void cdrom_parse_filename(const char *filename, QDict *options,
+                                 Error **errp)
+{
+    /* The prefix is optional, just as for "file". */
+    strstart(filename, "host_cdrom:", &filename);
+
+    qdict_put_obj(options, "filename", QOBJECT(qstring_from_str(filename)));
+}
+#endif
+
+#ifdef __linux__
 static int cdrom_open(BlockDriverState *bs, QDict *options, int flags,
                      Error **errp)
 {
@@ -1895,7 +2020,7 @@ static int cdrom_open(BlockDriverState *bs, QDict *options, int flags,

    /* open will not fail even if no CD is inserted, so add O_NONBLOCK */
    ret = raw_open_common(bs, options, flags, O_NONBLOCK, &local_err);
-    if (error_is_set(&local_err)) {
+    if (local_err) {
        error_propagate(errp, local_err);
    }
    return ret;
@@ -1970,6 +2095,7 @@ static BlockDriver bdrv_host_cdrom = {
    .instance_size      = sizeof(BDRVRawState),
    .bdrv_needs_filename = true,
    .bdrv_probe_device	= cdrom_probe_device,
+    .bdrv_parse_filename = cdrom_parse_filename,
    .bdrv_file_open     = cdrom_open,
    .bdrv_close         = raw_close,
    .bdrv_reopen_prepare = raw_reopen_prepare,
@@ -1981,6 +2107,7 @@ static BlockDriver bdrv_host_cdrom = {
    .bdrv_aio_readv     = raw_aio_readv,
    .bdrv_aio_writev    = raw_aio_writev,
    .bdrv_aio_flush	= raw_aio_flush,
+    .bdrv_refresh_limits = raw_refresh_limits,

    .bdrv_truncate      = raw_truncate,
    .bdrv_getlength      = raw_getlength,
@@ -2011,7 +2138,7 @@ static int cdrom_open(BlockDriverState *bs, QDict *options, int flags,

    ret = raw_open_common(bs, options, flags, 0, &local_err);
    if (ret) {
-        if (error_is_set(&local_err)) {
+        if (local_err) {
            error_propagate(errp, local_err);
        }
        return ret;
@@ -2099,6 +2226,7 @@ static BlockDriver bdrv_host_cdrom = {
    .instance_size      = sizeof(BDRVRawState),
    .bdrv_needs_filename = true,
    .bdrv_probe_device	= cdrom_probe_device,
+    .bdrv_parse_filename = cdrom_parse_filename,
    .bdrv_file_open     = cdrom_open,
    .bdrv_close         = raw_close,
    .bdrv_reopen_prepare = raw_reopen_prepare,
@@ -2110,6 +2238,7 @@ static BlockDriver bdrv_host_cdrom = {
    .bdrv_aio_readv     = raw_aio_readv,
    .bdrv_aio_writev    = raw_aio_writev,
    .bdrv_aio_flush	= raw_aio_flush,
+    .bdrv_refresh_limits = raw_refresh_limits,

    .bdrv_truncate      = raw_truncate,
    .bdrv_getlength      = raw_getlength,
--- a/block/raw-win32.c
+++ b/block/raw-win32.c
@@ -202,6 +202,35 @@ static int set_sparse(int fd)
 				 NULL, 0, NULL, 0, &returned, NULL);
 }

+static void raw_probe_alignment(BlockDriverState *bs)
+{
+    BDRVRawState *s = bs->opaque;
+    DWORD sectorsPerCluster, freeClusters, totalClusters, count;
+    DISK_GEOMETRY_EX dg;
+    BOOL status;
+
+    if (s->type == FTYPE_CD) {
+        bs->request_alignment = 2048;
+        return;
+    }
+    if (s->type == FTYPE_HARDDISK) {
+        status = DeviceIoControl(s->hfile, IOCTL_DISK_GET_DRIVE_GEOMETRY_EX,
+                                 NULL, 0, &dg, sizeof(dg), &count, NULL);
+        if (status != 0) {
+            bs->request_alignment = dg.Geometry.BytesPerSector;
+            return;
+        }
+        /* try GetDiskFreeSpace too */
+    }
+
+    if (s->drive_path[0]) {
+        GetDiskFreeSpace(s->drive_path, &sectorsPerCluster,
+                         &dg.Geometry.BytesPerSector,
+                         &freeClusters, &totalClusters);
+        bs->request_alignment = dg.Geometry.BytesPerSector;
+    }
+}
+
 static void raw_parse_flags(int flags, int *access_flags, DWORD *overlapped)
 {
    assert(access_flags != NULL);
@@ -222,6 +251,17 @@ static void raw_parse_flags(int flags, int *access_flags, DWORD *overlapped)
    }
 }

+static void raw_parse_filename(const char *filename, QDict *options,
+                               Error **errp)
+{
+    /* The filename does not have to be prefixed by the protocol name, since
+     * "file" is the default protocol; therefore, the return value of this
+     * function call can be ignored. */
+    strstart(filename, "file:", &filename);
+
+    qdict_put_obj(options, "filename", QOBJECT(qstring_from_str(filename)));
+}
+
 static QemuOptsList raw_runtime_opts = {
    .name = "raw",
    .head = QTAILQ_HEAD_INITIALIZER(raw_runtime_opts.head),
@@ -250,7 +290,7 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,

    opts = qemu_opts_create(&raw_runtime_opts, NULL, 0, &error_abort);
    qemu_opts_absorb_qdict(opts, options, &local_err);
-    if (error_is_set(&local_err)) {
+    if (local_err) {
        error_propagate(errp, local_err);
        ret = -EINVAL;
        goto fail;
@@ -269,6 +309,17 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,
        }
    }

+    if (filename[0] && filename[1] == ':') {
+        snprintf(s->drive_path, sizeof(s->drive_path), "%c:\\", filename[0]);
+    } else if (filename[0] == '\\' && filename[1] == '\\') {
+        s->drive_path[0] = 0;
+    } else {
+        /* Relative path.  */
+        char buf[MAX_PATH];
+        GetCurrentDirectory(MAX_PATH, buf);
+        snprintf(s->drive_path, sizeof(s->drive_path), "%c:\\", buf[0]);
+    }
+
    s->hfile = CreateFile(filename, access_flags,
                          FILE_SHARE_READ, NULL,
                          OPEN_EXISTING, overlapped, NULL);
@@ -293,6 +344,7 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,
        s->aio = aio;
    }

+    raw_probe_alignment(bs);
    ret = 0;
 fail:
    qemu_opts_del(opts);
@@ -429,6 +481,8 @@ static int raw_create(const char *filename, QEMUOptionParameter *options,
    int fd;
    int64_t total_size = 0;

+    strstart(filename, "file:", &filename);
+
    /* Read out options */
    while (options && options->name) {
        if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
@@ -463,6 +517,7 @@ static BlockDriver bdrv_file = {
    .protocol_name	= "file",
    .instance_size	= sizeof(BDRVRawState),
    .bdrv_needs_filename = true,
+    .bdrv_parse_filename = raw_parse_filename,
    .bdrv_file_open	= raw_open,
    .bdrv_close		= raw_close,
    .bdrv_create	= raw_create,
@@ -538,6 +593,15 @@ static int hdev_probe_device(const char *filename)
    return 0;
 }

+static void hdev_parse_filename(const char *filename, QDict *options,
+                                Error **errp)
+{
+    /* The prefix is optional, just as for "file". */
+    strstart(filename, "host_device:", &filename);
+
+    qdict_put_obj(options, "filename", QOBJECT(qstring_from_str(filename)));
+}
+
 static int hdev_open(BlockDriverState *bs, QDict *options, int flags,
                     Error **errp)
 {
@@ -553,7 +617,7 @@ static int hdev_open(BlockDriverState *bs, QDict *options, int flags,
    QemuOpts *opts = qemu_opts_create(&raw_runtime_opts, NULL, 0,
                                      &error_abort);
    qemu_opts_absorb_qdict(opts, options, &local_err);
-    if (error_is_set(&local_err)) {
+    if (local_err) {
        error_propagate(errp, local_err);
        ret = -EINVAL;
        goto done;
@@ -608,6 +672,7 @@ static BlockDriver bdrv_host_device = {
    .protocol_name	= "host_device",
    .instance_size	= sizeof(BDRVRawState),
    .bdrv_needs_filename = true,
+    .bdrv_parse_filename = hdev_parse_filename,
    .bdrv_probe_device	= hdev_probe_device,
    .bdrv_file_open	= hdev_open,
    .bdrv_close		= raw_close,
--- a/block/raw_bsd.c
+++ b/block/raw_bsd.c
@@ -90,6 +90,12 @@ static int raw_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
    return bdrv_get_info(bs->file, bdi);
 }

+static int raw_refresh_limits(BlockDriverState *bs)
+{
+    bs->bl = bs->file->bl;
+    return 0;
+}
+
 static int raw_truncate(BlockDriverState *bs, int64_t offset)
 {
    return bdrv_truncate(bs->file, offset);
@@ -140,7 +146,7 @@ static int raw_create(const char *filename, QEMUOptionParameter *options,
    int ret;

    ret = bdrv_create_file(filename, options, &local_err);
-    if (error_is_set(&local_err)) {
+    if (local_err) {
        error_propagate(errp, local_err);
    }
    return ret;
@@ -150,7 +156,6 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,
                    Error **errp)
 {
    bs->sg = bs->file->sg;
-    bs->bl = bs->file->bl;
    return 0;
 }

@@ -182,6 +187,7 @@ static BlockDriver bdrv_raw = {
    .bdrv_getlength       = &raw_getlength,
    .has_variable_length  = true,
    .bdrv_get_info        = &raw_get_info,
+    .bdrv_refresh_limits  = &raw_refresh_limits,
    .bdrv_is_inserted     = &raw_is_inserted,
    .bdrv_media_changed   = &raw_media_changed,
    .bdrv_eject           = &raw_eject,
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -95,18 +95,13 @@ typedef struct RADOSCB {
 #define RBD_FD_WRITE 1

 typedef struct BDRVRBDState {
-    int fds[2];
    rados_t cluster;
    rados_ioctx_t io_ctx;
    rbd_image_t image;
    char name[RBD_MAX_IMAGE_NAME_SIZE];
    char *snap;
-    int event_reader_pos;
-    RADOSCB *event_rcb;
 } BDRVRBDState;

-static void rbd_aio_bh_cb(void *opaque);
-
 static int qemu_rbd_next_tok(char *dst, int dst_len,
                             char *src, char delim,
                             const char *name,
@@ -369,9 +364,8 @@ static int qemu_rbd_create(const char *filename, QEMUOptionParameter *options,
 }

 /*
- * This aio completion is being called from qemu_rbd_aio_event_reader()
- * and runs in qemu context. It schedules a bh, but just in case the aio
- * was not cancelled before.
+ * This aio completion is being called from rbd_finish_bh() and runs in qemu
+ * BH context.
 */
 static void qemu_rbd_complete_aio(RADOSCB *rcb)
 {
@@ -401,36 +395,19 @@ static void qemu_rbd_complete_aio(RADOSCB *rcb)
            acb->ret = r;
        }
    }
-    /* Note that acb->bh can be NULL in case where the aio was cancelled */
-    acb->bh = qemu_bh_new(rbd_aio_bh_cb, acb);
-    qemu_bh_schedule(acb->bh);
+
    g_free(rcb);
-}

-/*
- * aio fd read handler. It runs in the qemu context and calls the
- * completion handling of completed rados aio operations.
- */
-static void qemu_rbd_aio_event_reader(void *opaque)
-{
-    BDRVRBDState *s = opaque;
+    if (acb->cmd == RBD_AIO_READ) {
+        qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
+    }
+    qemu_vfree(acb->bounce);
+    acb->common.cb(acb->common.opaque, (acb->ret > 0 ? 0 : acb->ret));
+    acb->status = 0;

-    ssize_t ret;
-
-    do {
-        char *p = (char *)&s->event_rcb;
-
-        /* now read the rcb pointer that was sent from a non qemu thread */
-        ret = read(s->fds[RBD_FD_READ], p + s->event_reader_pos,
-                   sizeof(s->event_rcb) - s->event_reader_pos);
-        if (ret > 0) {
-            s->event_reader_pos += ret;
-            if (s->event_reader_pos == sizeof(s->event_rcb)) {
-                s->event_reader_pos = 0;
-                qemu_rbd_complete_aio(s->event_rcb);
-            }
-        }
-    } while (ret < 0 && errno == EINTR);
+    if (!acb->cancelled) {
+        qemu_aio_release(acb);
+    }
 }

 /* TODO Convert to fine grained options */
@@ -463,7 +440,7 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,

    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
    qemu_opts_absorb_qdict(opts, options, &local_err);
-    if (error_is_set(&local_err)) {
+    if (local_err) {
        qerror_report_err(local_err);
        error_free(local_err);
        qemu_opts_del(opts);
@@ -538,23 +515,9 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,

    bs->read_only = (s->snap != NULL);

-    s->event_reader_pos = 0;
-    r = qemu_pipe(s->fds);
-    if (r < 0) {
-        error_report("error opening eventfd");
-        goto failed;
-    }
-    fcntl(s->fds[0], F_SETFL, O_NONBLOCK);
-    fcntl(s->fds[1], F_SETFL, O_NONBLOCK);
-    qemu_aio_set_fd_handler(s->fds[RBD_FD_READ], qemu_rbd_aio_event_reader,
-                            NULL, s);
-
-
    qemu_opts_del(opts);
    return 0;

-failed:
-    rbd_close(s->image);
 failed_open:
    rados_ioctx_destroy(s->io_ctx);
 failed_shutdown:
@@ -569,10 +532,6 @@ static void qemu_rbd_close(BlockDriverState *bs)
 {
    BDRVRBDState *s = bs->opaque;

-    close(s->fds[0]);
-    close(s->fds[1]);
-    qemu_aio_set_fd_handler(s->fds[RBD_FD_READ], NULL, NULL, NULL);
-
    rbd_close(s->image);
    rados_ioctx_destroy(s->io_ctx);
    g_free(s->snap);
@@ -600,34 +559,11 @@ static const AIOCBInfo rbd_aiocb_info = {
    .cancel = qemu_rbd_aio_cancel,
 };

-static int qemu_rbd_send_pipe(BDRVRBDState *s, RADOSCB *rcb)
+static void rbd_finish_bh(void *opaque)
 {
-    int ret = 0;
-    while (1) {
-        fd_set wfd;
-        int fd = s->fds[RBD_FD_WRITE];
-
-        /* send the op pointer to the qemu thread that is responsible
-           for the aio/op completion. Must do it in a qemu thread context */
-        ret = write(fd, (void *)&rcb, sizeof(rcb));
-        if (ret >= 0) {
-            break;
-        }
-        if (errno == EINTR) {
-            continue;
-        }
-        if (errno != EAGAIN) {
-            break;
-        }
-
-        FD_ZERO(&wfd);
-        FD_SET(fd, &wfd);
-        do {
-            ret = select(fd + 1, NULL, &wfd, NULL, NULL);
-        } while (ret < 0 && errno == EINTR);
-    }
-
-    return ret;
+    RADOSCB *rcb = opaque;
+    qemu_bh_delete(rcb->acb->bh);
+    qemu_rbd_complete_aio(rcb);
 }

 /*
@@ -635,40 +571,18 @@ static int qemu_rbd_send_pipe(BDRVRBDState *s, RADOSCB *rcb)
 *
 * Note: this function is being called from a non qemu thread so
 * we need to be careful about what we do here. Generally we only
- * write to the block notification pipe, and do the rest of the
- * io completion handling from qemu_rbd_aio_event_reader() which
- * runs in a qemu context.
+ * schedule a BH, and do the rest of the io completion handling
+ * from rbd_finish_bh() which runs in a qemu context.
 */
 static void rbd_finish_aiocb(rbd_completion_t c, RADOSCB *rcb)
 {
-    int ret;
+    RBDAIOCB *acb = rcb->acb;
+
    rcb->ret = rbd_aio_get_return_value(c);
    rbd_aio_release(c);
-    ret = qemu_rbd_send_pipe(rcb->s, rcb);
-    if (ret < 0) {
-        error_report("failed writing to acb->s->fds");
-        g_free(rcb);
-    }
-}

-/* Callback when all queued rbd_aio requests are complete */
-
-static void rbd_aio_bh_cb(void *opaque)
-{
-    RBDAIOCB *acb = opaque;
-
-    if (acb->cmd == RBD_AIO_READ) {
-        qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
-    }
-    qemu_vfree(acb->bounce);
-    acb->common.cb(acb->common.opaque, (acb->ret > 0 ? 0 : acb->ret));
-    qemu_bh_delete(acb->bh);
-    acb->bh = NULL;
-    acb->status = 0;
-
-    if (!acb->cancelled) {
-        qemu_aio_release(acb);
-    }
+    acb->bh = qemu_bh_new(rbd_finish_bh, rcb);
+    qemu_bh_schedule(acb->bh);
 }

 static int rbd_aio_discard_wrapper(rbd_image_t image,
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -161,7 +161,7 @@ typedef struct SheepdogVdiReq {
    uint32_t id;
    uint32_t data_length;
    uint64_t vdi_size;
-    uint32_t vdi_id;
+    uint32_t base_vdi_id;
    uint8_t copies;
    uint8_t copy_policy;
    uint8_t reserved[2];
@@ -909,9 +909,9 @@ static void co_write_request(void *opaque)
 }

 /*
- * Return a socket discriptor to read/write objects.
+ * Return a socket descriptor to read/write objects.
 *
- * We cannot use this discriptor for other operations because
+ * We cannot use this descriptor for other operations because
 * the block driver may be on waiting response from the server.
 */
 static int get_sheep_fd(BDRVSheepdogState *s)
@@ -1385,7 +1385,7 @@ static int sd_open(BlockDriverState *bs, QDict *options, int flags,

    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
    qemu_opts_absorb_qdict(opts, options, &local_err);
-    if (error_is_set(&local_err)) {
+    if (local_err) {
        qerror_report_err(local_err);
        error_free(local_err);
        ret = -EINVAL;
@@ -1493,7 +1493,7 @@ static int do_sd_create(BDRVSheepdogState *s, uint32_t *vdi_id, int snapshot)

    memset(&hdr, 0, sizeof(hdr));
    hdr.opcode = SD_OP_NEW_VDI;
-    hdr.vdi_id = s->inode.vdi_id;
+    hdr.base_vdi_id = s->inode.vdi_id;

    wlen = SD_MAX_VDI_LEN;

@@ -1534,7 +1534,8 @@ static int sd_prealloc(const char *filename)
    Error *local_err = NULL;
    int ret;

-    ret = bdrv_file_open(&bs, filename, NULL, BDRV_O_RDWR, &local_err);
+    ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL,
+                    NULL, &local_err);
    if (ret < 0) {
        qerror_report_err(local_err);
        error_free(local_err);
@@ -1684,7 +1685,7 @@ static int sd_create(const char *filename, QEMUOptionParameter *options,

    if (backing_file) {
        BlockDriverState *bs;
-        BDRVSheepdogState *s;
+        BDRVSheepdogState *base;
        BlockDriver *drv;

        /* Currently, only Sheepdog backing image is supported. */
@@ -1695,22 +1696,24 @@ static int sd_create(const char *filename, QEMUOptionParameter *options,
            goto out;
        }

-        ret = bdrv_file_open(&bs, backing_file, NULL, 0, &local_err);
+        bs = NULL;
+        ret = bdrv_open(&bs, backing_file, NULL, NULL, BDRV_O_PROTOCOL, NULL,
+                        &local_err);
        if (ret < 0) {
            qerror_report_err(local_err);
            error_free(local_err);
            goto out;
        }

-        s = bs->opaque;
+        base = bs->opaque;

-        if (!is_snapshot(&s->inode)) {
+        if (!is_snapshot(&base->inode)) {
            error_report("cannot clone from a non snapshot vdi");
            bdrv_unref(bs);
            ret = -EINVAL;
            goto out;
        }
-
+        s->inode.vdi_id = base->inode.vdi_id;
        bdrv_unref(bs);
    }

@@ -1743,7 +1746,7 @@ static void sd_close(BlockDriverState *bs)
    memset(&hdr, 0, sizeof(hdr));

    hdr.opcode = SD_OP_RELEASE_VDI;
-    hdr.vdi_id = s->inode.vdi_id;
+    hdr.base_vdi_id = s->inode.vdi_id;
    wlen = strlen(s->name) + 1;
    hdr.data_length = wlen;
    hdr.flags = SD_FLAG_CMD_WRITE;
@@ -1846,7 +1849,7 @@ static bool sd_delete(BDRVSheepdogState *s)
    unsigned int wlen = SD_MAX_VDI_LEN, rlen = 0;
    SheepdogVdiReq hdr = {
        .opcode = SD_OP_DEL_VDI,
-        .vdi_id = s->inode.vdi_id,
+        .base_vdi_id = s->inode.vdi_id,
        .data_length = wlen,
        .flags = SD_FLAG_CMD_WRITE,
    };
@@ -1893,7 +1896,7 @@ static int sd_create_branch(BDRVSheepdogState *s)

    /*
     * Even If deletion fails, we will just create extra snapshot based on
-     * the workding VDI which was supposed to be deleted. So no need to
+     * the working VDI which was supposed to be deleted. So no need to
     * false bail out.
     */
    deleted = sd_delete(s);
@@ -2146,6 +2149,7 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
    strncpy(s->inode.tag, sn_info->name, sizeof(s->inode.tag));
    /* we don't need to update entire object */
    datalen = SD_INODE_SIZE - sizeof(s->inode.data_vdi_id);
+    inode = g_malloc(datalen);

    /* refresh inode. */
    fd = connect_to_sdog(s);
@@ -2168,8 +2172,6 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
        goto cleanup;
    }

-    inode = (SheepdogInode *)g_malloc(datalen);
-
    ret = read_object(fd, (char *)inode, vid_to_vdi_oid(new_vid),
                      s->inode.nr_copies, datalen, 0, s->cache_flags);

@@ -2183,6 +2185,7 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
            s->inode.name, s->inode.snap_id, s->inode.vdi_id);

 cleanup:
+    g_free(inode);
    closesocket(fd);
    return ret;
 }
@@ -2191,7 +2194,7 @@ cleanup:
 * We implement rollback(loadvm) operation to the specified snapshot by
 * 1) switch to the snapshot
 * 2) rely on sd_create_branch to delete working VDI and
- * 3) create a new working VDI based on the speicified snapshot
+ * 3) create a new working VDI based on the specified snapshot
 */
 static int sd_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
 {
@@ -2442,11 +2445,12 @@ sd_co_get_block_status(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
 {
    BDRVSheepdogState *s = bs->opaque;
    SheepdogInode *inode = &s->inode;
-    unsigned long start = sector_num * BDRV_SECTOR_SIZE / SD_DATA_OBJ_SIZE,
+    uint64_t offset = sector_num * BDRV_SECTOR_SIZE;
+    unsigned long start = offset / SD_DATA_OBJ_SIZE,
                  end = DIV_ROUND_UP((sector_num + nb_sectors) *
                                     BDRV_SECTOR_SIZE, SD_DATA_OBJ_SIZE);
    unsigned long idx;
-    int64_t ret = BDRV_BLOCK_DATA;
+    int64_t ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | offset;

    for (idx = start; idx < end; idx++) {
        if (inode->data_vdi_id[idx] == 0) {
--- a/block/snapshot.c
+++ b/block/snapshot.c
@@ -345,7 +345,7 @@ int bdrv_snapshot_load_tmp_by_id_or_name(BlockDriverState *bs,
        ret = bdrv_snapshot_load_tmp(bs, NULL, id_or_name, &local_err);
    }

-    if (error_is_set(&local_err)) {
+    if (local_err) {
        error_propagate(errp, local_err);
    }

--- a/block/stream.c
+++ b/block/stream.c
@@ -75,6 +75,8 @@ static void close_unused_images(BlockDriverState *top, BlockDriverState *base,
        unused->backing_hd = NULL;
        bdrv_unref(unused);
    }
+
+    bdrv_refresh_limits(top);
 }

 static void coroutine_fn stream_run(void *opaque)
--- a/block/vdi.c
+++ b/block/vdi.c
@@ -31,7 +31,7 @@
 * Allocation of blocks could be optimized (less writes to block map and
 * header).
 *
- * Read and write of adjacents blocks could be done in one operation
+ * Read and write of adjacent blocks could be done in one operation
 * (current code uses one operation per block (1 MiB).
 *
 * The code is not thread safe (missing locks for changes in header and
@@ -120,6 +120,11 @@ typedef unsigned char uuid_t[16];

 #define VDI_IS_ALLOCATED(X) ((X) < VDI_DISCARDED)

+/* max blocks in image is (0xffffffff / 4) */
+#define VDI_BLOCKS_IN_IMAGE_MAX  0x3fffffff
+#define VDI_DISK_SIZE_MAX        ((uint64_t)VDI_BLOCKS_IN_IMAGE_MAX * \
+                                  (uint64_t)DEFAULT_CLUSTER_SIZE)
+
 #if !defined(CONFIG_UUID)
 static inline void uuid_generate(uuid_t out)
 {
@@ -385,6 +390,14 @@ static int vdi_open(BlockDriverState *bs, QDict *options, int flags,
    vdi_header_print(&header);
 #endif

+    if (header.disk_size > VDI_DISK_SIZE_MAX) {
+        error_setg(errp, "Unsupported VDI image size (size is 0x%" PRIx64
+                          ", max supported is 0x%" PRIx64 ")",
+                          header.disk_size, VDI_DISK_SIZE_MAX);
+        ret = -ENOTSUP;
+        goto fail;
+    }
+
    if (header.disk_size % SECTOR_SIZE != 0) {
        /* 'VBoxManage convertfromraw' can create images with odd disk sizes.
           We accept them but round the disk size to the next multiple of
@@ -395,43 +408,56 @@ static int vdi_open(BlockDriverState *bs, QDict *options, int flags,
    }

    if (header.signature != VDI_SIGNATURE) {
-        logout("bad vdi signature %08x\n", header.signature);
-        ret = -EMEDIUMTYPE;
+        error_setg(errp, "Image not in VDI format (bad signature %08x)", header.signature);
+        ret = -EINVAL;
        goto fail;
    } else if (header.version != VDI_VERSION_1_1) {
-        logout("unsupported version %u.%u\n",
-               header.version >> 16, header.version & 0xffff);
+        error_setg(errp, "unsupported VDI image (version %u.%u)",
+                   header.version >> 16, header.version & 0xffff);
        ret = -ENOTSUP;
        goto fail;
    } else if (header.offset_bmap % SECTOR_SIZE != 0) {
        /* We only support block maps which start on a sector boundary. */
-        logout("unsupported block map offset 0x%x B\n", header.offset_bmap);
+        error_setg(errp, "unsupported VDI image (unaligned block map offset "
+                   "0x%x)", header.offset_bmap);
        ret = -ENOTSUP;
        goto fail;
    } else if (header.offset_data % SECTOR_SIZE != 0) {
        /* We only support data blocks which start on a sector boundary. */
-        logout("unsupported data offset 0x%x B\n", header.offset_data);
+        error_setg(errp, "unsupported VDI image (unaligned data offset 0x%x)",
+                   header.offset_data);
        ret = -ENOTSUP;
        goto fail;
    } else if (header.sector_size != SECTOR_SIZE) {
-        logout("unsupported sector size %u B\n", header.sector_size);
+        error_setg(errp, "unsupported VDI image (sector size %u is not %u)",
+                   header.sector_size, SECTOR_SIZE);
        ret = -ENOTSUP;
        goto fail;
-    } else if (header.block_size != 1 * MiB) {
-        logout("unsupported block size %u B\n", header.block_size);
+    } else if (header.block_size != DEFAULT_CLUSTER_SIZE) {
+        error_setg(errp, "unsupported VDI image (block size %u is not %u)",
+                   header.block_size, DEFAULT_CLUSTER_SIZE);
        ret = -ENOTSUP;
        goto fail;
    } else if (header.disk_size >
               (uint64_t)header.blocks_in_image * header.block_size) {
-        logout("unsupported disk size %" PRIu64 " B\n", header.disk_size);
+        error_setg(errp, "unsupported VDI image (disk size %" PRIu64 ", "
+                   "image bitmap has room for %" PRIu64 ")",
+                   header.disk_size,
+                   (uint64_t)header.blocks_in_image * header.block_size);
        ret = -ENOTSUP;
        goto fail;
    } else if (!uuid_is_null(header.uuid_link)) {
-        logout("link uuid != 0, unsupported\n");
+        error_setg(errp, "unsupported VDI image (non-NULL link UUID)");
        ret = -ENOTSUP;
        goto fail;
    } else if (!uuid_is_null(header.uuid_parent)) {
-        logout("parent uuid != 0, unsupported\n");
+        error_setg(errp, "unsupported VDI image (non-NULL parent UUID)");
+        ret = -ENOTSUP;
+        goto fail;
+    } else if (header.blocks_in_image > VDI_BLOCKS_IN_IMAGE_MAX) {
+        error_setg(errp, "unsupported VDI image "
+                         "(too many blocks %u, max is %u)",
+                          header.blocks_in_image, VDI_BLOCKS_IN_IMAGE_MAX);
        ret = -ENOTSUP;
        goto fail;
    }
@@ -682,11 +708,20 @@ static int vdi_create(const char *filename, QEMUOptionParameter *options,
        options++;
    }

+    if (bytes > VDI_DISK_SIZE_MAX) {
+        result = -ENOTSUP;
+        error_setg(errp, "Unsupported VDI image size (size is 0x%" PRIx64
+                          ", max supported is 0x%" PRIx64 ")",
+                          bytes, VDI_DISK_SIZE_MAX);
+        goto exit;
+    }
+
    fd = qemu_open(filename,
                   O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE,
                   0644);
    if (fd < 0) {
-        return -errno;
+        result = -errno;
+        goto exit;
    }

    /* We need enough blocks to store the given disk size,
@@ -747,6 +782,7 @@ static int vdi_create(const char *filename, QEMUOptionParameter *options,
        result = -errno;
    }

+exit:
    return result;
 }

--- a/block/vhdx-log.c
+++ b/block/vhdx-log.c
@@ -578,7 +578,7 @@ static int vhdx_validate_log_entry(BlockDriverState *bs, BDRVVHDXState *s,
    total_sectors = hdr.entry_length / VHDX_LOG_SECTOR_SIZE;


-    /* read_desc() will incrememnt the read idx */
+    /* read_desc() will increment the read idx */
    ret = vhdx_log_read_desc(bs, s, log, &desc_buffer);
    if (ret < 0) {
        goto free_and_exit;
@@ -965,8 +965,8 @@ static int vhdx_log_write(BlockDriverState *bs, BDRVVHDXState *s,
    cpu_to_le32s((uint32_t *)(buffer + 4));

    /* now write to the log */
-    vhdx_log_write_sectors(bs, &s->log, &sectors_written, buffer,
-                           desc_sectors + sectors);
+    ret = vhdx_log_write_sectors(bs, &s->log, &sectors_written, buffer,
+                                 desc_sectors + sectors);
    if (ret < 0) {
        goto exit;
    }
--- a/block/vhdx.c
+++ b/block/vhdx.c
@@ -374,7 +374,7 @@ static int vhdx_update_header(BlockDriverState *bs, BDRVVHDXState *s,
        inactive_header->log_guid = *log_guid;
    }

-    vhdx_write_header(bs->file, inactive_header, header_offset, true);
+    ret = vhdx_write_header(bs->file, inactive_header, header_offset, true);
    if (ret < 0) {
        goto exit;
    }
@@ -402,9 +402,10 @@ int vhdx_update_headers(BlockDriverState *bs, BDRVVHDXState *s,
 }

 /* opens the specified header block from the VHDX file header section */
-static int vhdx_parse_header(BlockDriverState *bs, BDRVVHDXState *s)
+static void vhdx_parse_header(BlockDriverState *bs, BDRVVHDXState *s,
+                              Error **errp)
 {
-    int ret = 0;
+    int ret;
    VHDXHeader *header1;
    VHDXHeader *header2;
    bool h1_valid = false;
@@ -462,7 +463,6 @@ static int vhdx_parse_header(BlockDriverState *bs, BDRVVHDXState *s)
    } else if (!h1_valid && h2_valid) {
        s->curr_header = 1;
    } else if (!h1_valid && !h2_valid) {
-        ret = -EINVAL;
        goto fail;
    } else {
        /* If both headers are valid, then we choose the active one by the
@@ -473,27 +473,22 @@ static int vhdx_parse_header(BlockDriverState *bs, BDRVVHDXState *s)
        } else if (h2_seq > h1_seq) {
            s->curr_header = 1;
        } else {
-            ret = -EINVAL;
            goto fail;
        }
    }

    vhdx_region_register(s, s->headers[s->curr_header]->log_offset,
                            s->headers[s->curr_header]->log_length);
-
-    ret = 0;
-
    goto exit;

 fail:
-    qerror_report(ERROR_CLASS_GENERIC_ERROR, "No valid VHDX header found");
+    error_setg_errno(errp, -ret, "No valid VHDX header found");
    qemu_vfree(header1);
    qemu_vfree(header2);
    s->headers[0] = NULL;
    s->headers[1] = NULL;
 exit:
    qemu_vfree(buffer);
-    return ret;
 }


@@ -785,12 +780,20 @@ static int vhdx_parse_metadata(BlockDriverState *bs, BDRVVHDXState *s)
    le32_to_cpus(&s->logical_sector_size);
    le32_to_cpus(&s->physical_sector_size);

-    if (s->logical_sector_size == 0 || s->params.block_size == 0) {
+    if (s->params.block_size < VHDX_BLOCK_SIZE_MIN ||
+        s->params.block_size > VHDX_BLOCK_SIZE_MAX) {
        ret = -EINVAL;
        goto exit;
    }

-    /* both block_size and sector_size are guaranteed powers of 2 */
+    /* only 2 supported sector sizes */
+    if (s->logical_sector_size != 512 && s->logical_sector_size != 4096) {
+        ret = -EINVAL;
+        goto exit;
+    }
+
+    /* Both block_size and sector_size are guaranteed powers of 2, below.
+       Due to range checks above, s->sectors_per_block can never be < 256 */
    s->sectors_per_block = s->params.block_size / s->logical_sector_size;
    s->chunk_ratio = (VHDX_MAX_SECTORS_PER_BLOCK) *
                     (uint64_t)s->logical_sector_size /
@@ -878,7 +881,7 @@ static int vhdx_open(BlockDriverState *bs, QDict *options, int flags,
    int ret = 0;
    uint32_t i;
    uint64_t signature;
-
+    Error *local_err = NULL;

    s->bat = NULL;
    s->first_visible_write = true;
@@ -901,8 +904,10 @@ static int vhdx_open(BlockDriverState *bs, QDict *options, int flags,
     * header update */
    vhdx_guid_generate(&s->session_guid);

-    ret = vhdx_parse_header(bs, s);
-    if (ret < 0) {
+    vhdx_parse_header(bs, s, &local_err);
+    if (local_err != NULL) {
+        error_propagate(errp, local_err);
+        ret = -EINVAL;
        goto fail;
    }

@@ -1797,7 +1802,9 @@ static int vhdx_create(const char *filename, QEMUOptionParameter *options,
        goto exit;
    }

-    ret = bdrv_file_open(&bs, filename, NULL, BDRV_O_RDWR, &local_err);
+    bs = NULL;
+    ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL,
+                    NULL, &local_err);
    if (ret < 0) {
        error_propagate(errp, local_err);
        goto exit;
@@ -1810,13 +1817,13 @@ static int vhdx_create(const char *filename, QEMUOptionParameter *options,
    creator = g_utf8_to_utf16("QEMU v" QEMU_VERSION, -1, NULL,
                              &creator_items, NULL);
    signature = cpu_to_le64(VHDX_FILE_SIGNATURE);
-    bdrv_pwrite(bs, VHDX_FILE_ID_OFFSET, &signature, sizeof(signature));
+    ret = bdrv_pwrite(bs, VHDX_FILE_ID_OFFSET, &signature, sizeof(signature));
    if (ret < 0) {
        goto delete_and_exit;
    }
    if (creator) {
-        bdrv_pwrite(bs, VHDX_FILE_ID_OFFSET + sizeof(signature), creator,
-                    creator_items * sizeof(gunichar2));
+        ret = bdrv_pwrite(bs, VHDX_FILE_ID_OFFSET + sizeof(signature),
+                          creator, creator_items * sizeof(gunichar2));
        if (ret < 0) {
            goto delete_and_exit;
        }
--- a/block/vhdx.h
+++ b/block/vhdx.h
@@ -61,7 +61,7 @@
 /* These structures are ones that are defined in the VHDX specification
 * document */

-#define VHDX_FILE_SIGNATURE 0x656C696678646876  /* "vhdxfile" in ASCII */
+#define VHDX_FILE_SIGNATURE 0x656C696678646876ULL  /* "vhdxfile" in ASCII */
 typedef struct VHDXFileIdentifier {
    uint64_t    signature;              /* "vhdxfile" in ASCII */
    uint16_t    creator[256];           /* optional; utf-16 string to identify
@@ -238,7 +238,7 @@ typedef struct QEMU_PACKED VHDXLogDataSector {
 /* upper 44 bits are the file offset in 1MB units lower 3 bits are the state
   other bits are reserved */
 #define VHDX_BAT_STATE_BIT_MASK 0x07
-#define VHDX_BAT_FILE_OFF_MASK  0xFFFFFFFFFFF00000 /* upper 44 bits */
+#define VHDX_BAT_FILE_OFF_MASK  0xFFFFFFFFFFF00000ULL /* upper 44 bits */
 typedef uint64_t VHDXBatEntry;

 /* ---- METADATA REGION STRUCTURES ---- */
@@ -247,7 +247,7 @@ typedef uint64_t VHDXBatEntry;
 #define VHDX_METADATA_MAX_ENTRIES 2047  /* not including the header */
 #define VHDX_METADATA_TABLE_MAX_SIZE \
    (VHDX_METADATA_ENTRY_SIZE * (VHDX_METADATA_MAX_ENTRIES+1))
-#define VHDX_METADATA_SIGNATURE 0x617461646174656D  /* "metadata" in ASCII */
+#define VHDX_METADATA_SIGNATURE 0x617461646174656DULL  /* "metadata" in ASCII */
 typedef struct QEMU_PACKED VHDXMetadataTableHeader {
    uint64_t    signature;              /* "metadata" in ASCII */
    uint16_t    reserved;
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -428,10 +428,6 @@ static int vmdk_add_extent(BlockDriverState *bs,
    extent->l2_size = l2_size;
    extent->cluster_sectors = flat ? sectors : cluster_sectors;

-    if (!flat) {
-        bs->bl.write_zeroes_alignment =
-            MAX(bs->bl.write_zeroes_alignment, cluster_sectors);
-    }
    if (s->num_extents > 1) {
        extent->end_sector = (*(extent - 1)).end_sector + extent->sectors;
    } else {
@@ -530,8 +526,34 @@ static int vmdk_open_vmfs_sparse(BlockDriverState *bs,
    return ret;
 }

-static int vmdk_open_desc_file(BlockDriverState *bs, int flags,
-                               uint64_t desc_offset, Error **errp);
+static int vmdk_open_desc_file(BlockDriverState *bs, int flags, char *buf,
+                               Error **errp);
+
+static char *vmdk_read_desc(BlockDriverState *file, uint64_t desc_offset,
+                            Error **errp)
+{
+    int64_t size;
+    char *buf;
+    int ret;
+
+    size = bdrv_getlength(file);
+    if (size < 0) {
+        error_setg_errno(errp, -size, "Could not access file");
+        return NULL;
+    }
+
+    size = MIN(size, 1 << 20);  /* avoid unbounded allocation */
+    buf = g_malloc0(size + 1);
+
+    ret = bdrv_pread(file, desc_offset, buf, size);
+    if (ret < 0) {
+        error_setg_errno(errp, -ret, "Could not read from file");
+        g_free(buf);
+        return NULL;
+    }
+
+    return buf;
+}

 static int vmdk_open_vmdk4(BlockDriverState *bs,
                           BlockDriverState *file,
@@ -550,11 +572,18 @@ static int vmdk_open_vmdk4(BlockDriverState *bs,
        error_setg_errno(errp, -ret,
                         "Could not read header from file '%s'",
                         file->filename);
+        return -EINVAL;
    }
    if (header.capacity == 0) {
        uint64_t desc_offset = le64_to_cpu(header.desc_offset);
        if (desc_offset) {
-            return vmdk_open_desc_file(bs, flags, desc_offset << 9, errp);
+            char *buf = vmdk_read_desc(file, desc_offset << 9, errp);
+            if (!buf) {
+                return -EINVAL;
+            }
+            ret = vmdk_open_desc_file(bs, flags, buf, errp);
+            g_free(buf);
+            return ret;
        }
    }

@@ -613,8 +642,8 @@ static int vmdk_open_vmdk4(BlockDriverState *bs,
        char buf[64];
        snprintf(buf, sizeof(buf), "VMDK version %d",
                 le32_to_cpu(header.version));
-        qerror_report(QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
-                bs->device_name, "vmdk", buf);
+        error_set(errp, QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
+                  bs->device_name, "vmdk", buf);
        return -ENOTSUP;
    } else if (le32_to_cpu(header.version) == 3 && (flags & BDRV_O_RDWR)) {
        /* VMware KB 2064959 explains that version 3 added support for
@@ -626,7 +655,7 @@ static int vmdk_open_vmdk4(BlockDriverState *bs,
    }

    if (le32_to_cpu(header.num_gtes_per_gt) > 512) {
-        error_report("L2 table size too big");
+        error_setg(errp, "L2 table size too big");
        return -EINVAL;
    }

@@ -640,6 +669,13 @@ static int vmdk_open_vmdk4(BlockDriverState *bs,
    if (le32_to_cpu(header.flags) & VMDK4_FLAG_RGD) {
        l1_backup_offset = le64_to_cpu(header.rgd_offset) << 9;
    }
+    if (bdrv_getlength(file) <
+            le64_to_cpu(header.grain_offset) * BDRV_SECTOR_SIZE) {
+        error_setg(errp, "File truncated, expecting at least %lld bytes",
+                   le64_to_cpu(header.grain_offset) * BDRV_SECTOR_SIZE);
+        return -EINVAL;
+    }
+
    ret = vmdk_add_extent(bs, file, false,
                          le64_to_cpu(header.capacity),
                          le64_to_cpu(header.gd_offset) << 9,
@@ -654,6 +690,10 @@ static int vmdk_open_vmdk4(BlockDriverState *bs,
    }
    extent->compressed =
        le16_to_cpu(header.compressAlgorithm) == VMDK4_COMPRESSION_DEFLATE;
+    if (extent->compressed) {
+        g_free(s->create_type);
+        s->create_type = g_strdup("streamOptimized");
+    }
    extent->has_marker = le32_to_cpu(header.flags) & VMDK4_FLAG_MARKER;
    extent->version = le32_to_cpu(header.version);
    extent->has_zero_grain = le32_to_cpu(header.flags) & VMDK4_FLAG_ZERO_GRAIN;
@@ -694,16 +734,12 @@ static int vmdk_parse_description(const char *desc, const char *opt_name,

 /* Open an extent file and append to bs array */
 static int vmdk_open_sparse(BlockDriverState *bs,
-                            BlockDriverState *file,
-                            int flags, Error **errp)
+                            BlockDriverState *file, int flags,
+                            char *buf, Error **errp)
 {
    uint32_t magic;

-    if (bdrv_pread(file, 0, &magic, sizeof(magic)) != sizeof(magic)) {
-        return -EIO;
-    }
-
-    magic = be32_to_cpu(magic);
+    magic = ldl_be_p(buf);
    switch (magic) {
        case VMDK3_MAGIC:
            return vmdk_open_vmfs_sparse(bs, file, flags, errp);
@@ -712,7 +748,8 @@ static int vmdk_open_sparse(BlockDriverState *bs,
            return vmdk_open_vmdk4(bs, file, flags, errp);
            break;
        default:
-            return -EMEDIUMTYPE;
+            error_setg(errp, "Image not in VMDK format");
+            return -EINVAL;
            break;
    }
 }
@@ -769,8 +806,9 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,

        path_combine(extent_path, sizeof(extent_path),
                desc_file_path, fname);
-        ret = bdrv_file_open(&extent_file, extent_path, NULL, bs->open_flags,
-                             errp);
+        extent_file = NULL;
+        ret = bdrv_open(&extent_file, extent_path, NULL, NULL,
+                        bs->open_flags | BDRV_O_PROTOCOL, NULL, errp);
        if (ret) {
            return ret;
        }
@@ -787,8 +825,14 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
            extent->flat_start_offset = flat_offset << 9;
        } else if (!strcmp(type, "SPARSE") || !strcmp(type, "VMFSSPARSE")) {
            /* SPARSE extent and VMFSSPARSE extent are both "COWD" sparse file*/
-            ret = vmdk_open_sparse(bs, extent_file, bs->open_flags, errp);
+            char *buf = vmdk_read_desc(extent_file, 0, errp);
+            if (!buf) {
+                ret = -EINVAL;
+            } else {
+                ret = vmdk_open_sparse(bs, extent_file, bs->open_flags, buf, errp);
+            }
            if (ret) {
+                g_free(buf);
                bdrv_unref(extent_file);
                return ret;
            }
@@ -811,29 +855,16 @@ next_line:
    return 0;
 }

-static int vmdk_open_desc_file(BlockDriverState *bs, int flags,
-                               uint64_t desc_offset, Error **errp)
+static int vmdk_open_desc_file(BlockDriverState *bs, int flags, char *buf,
+                               Error **errp)
 {
    int ret;
-    char *buf = NULL;
    char ct[128];
    BDRVVmdkState *s = bs->opaque;
-    int64_t size;

-    size = bdrv_getlength(bs->file);
-    if (size < 0) {
-        return -EINVAL;
-    }
-
-    size = MIN(size, 1 << 20);  /* avoid unbounded allocation */
-    buf = g_malloc0(size + 1);
-
-    ret = bdrv_pread(bs->file, desc_offset, buf, size);
-    if (ret < 0) {
-        goto exit;
-    }
    if (vmdk_parse_description(buf, "createType", ct, sizeof(ct))) {
-        ret = -EMEDIUMTYPE;
+        error_setg(errp, "invalid VMDK image descriptor");
+        ret = -EINVAL;
        goto exit;
    }
    if (strcmp(ct, "monolithicFlat") &&
@@ -849,24 +880,37 @@ static int vmdk_open_desc_file(BlockDriverState *bs, int flags,
    s->desc_offset = 0;
    ret = vmdk_parse_extents(buf, bs, bs->file->filename, errp);
 exit:
-    g_free(buf);
    return ret;
 }

 static int vmdk_open(BlockDriverState *bs, QDict *options, int flags,
                     Error **errp)
 {
+    char *buf = NULL;
    int ret;
    BDRVVmdkState *s = bs->opaque;
+    uint32_t magic;

-    if (vmdk_open_sparse(bs, bs->file, flags, errp) == 0) {
-        s->desc_offset = 0x200;
-    } else {
-        ret = vmdk_open_desc_file(bs, flags, 0, errp);
-        if (ret) {
-            goto fail;
-        }
+    buf = vmdk_read_desc(bs->file, 0, errp);
+    if (!buf) {
+        return -EINVAL;
    }
+
+    magic = ldl_be_p(buf);
+    switch (magic) {
+        case VMDK3_MAGIC:
+        case VMDK4_MAGIC:
+            ret = vmdk_open_sparse(bs, bs->file, flags, buf, errp);
+            s->desc_offset = 0x200;
+            break;
+        default:
+            ret = vmdk_open_desc_file(bs, flags, buf, errp);
+            break;
+    }
+    if (ret) {
+        goto fail;
+    }
+
    /* try to open parent images, if exist */
    ret = vmdk_parent_open(bs);
    if (ret) {
@@ -881,16 +925,34 @@ static int vmdk_open(BlockDriverState *bs, QDict *options, int flags,
              QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
              "vmdk", bs->device_name, "live migration");
    migrate_add_blocker(s->migration_blocker);
-
+    g_free(buf);
    return 0;

 fail:
+    g_free(buf);
    g_free(s->create_type);
    s->create_type = NULL;
    vmdk_free_extents(bs);
    return ret;
 }

+
+static int vmdk_refresh_limits(BlockDriverState *bs)
+{
+    BDRVVmdkState *s = bs->opaque;
+    int i;
+
+    for (i = 0; i < s->num_extents; i++) {
+        if (!s->extents[i].flat) {
+            bs->bl.write_zeroes_alignment =
+                MAX(bs->bl.write_zeroes_alignment,
+                    s->extents[i].cluster_sectors);
+        }
+    }
+
+    return 0;
+}
+
 static int get_whole_cluster(BlockDriverState *bs,
                VmdkExtent *extent,
                uint64_t cluster_offset,
@@ -1122,7 +1184,7 @@ static int64_t coroutine_fn vmdk_co_get_block_status(BlockDriverState *bs,
        break;
    case VMDK_OK:
        ret = BDRV_BLOCK_DATA;
-        if (extent->file == bs->file) {
+        if (extent->file == bs->file && !extent->compressed) {
            ret |= BDRV_BLOCK_OFFSET_VALID | offset;
        }

@@ -1325,8 +1387,8 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
 {
    BDRVVmdkState *s = bs->opaque;
    VmdkExtent *extent = NULL;
-    int n, ret;
-    int64_t index_in_cluster;
+    int ret;
+    int64_t index_in_cluster, n;
    uint64_t extent_begin_sector, extent_relative_sector_num;
    uint64_t cluster_offset;
    VmdkMetaData m_data;
@@ -1469,7 +1531,9 @@ static int vmdk_create_extent(const char *filename, int64_t filesize,
        goto exit;
    }

-    ret = bdrv_file_open(&bs, filename, NULL, BDRV_O_RDWR, &local_err);
+    assert(bs == NULL);
+    ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL,
+                    NULL, &local_err);
    if (ret < 0) {
        error_propagate(errp, local_err);
        goto exit;
@@ -1478,7 +1542,7 @@ static int vmdk_create_extent(const char *filename, int64_t filesize,
    if (flat) {
        ret = bdrv_truncate(bs, filesize);
        if (ret < 0) {
-            error_setg(errp, "Could not truncate file");
+            error_setg_errno(errp, -ret, "Could not truncate file");
        }
        goto exit;
    }
@@ -1538,7 +1602,7 @@ static int vmdk_create_extent(const char *filename, int64_t filesize,

    ret = bdrv_truncate(bs, le64_to_cpu(header.grain_offset) << 9);
    if (ret < 0) {
-        error_setg(errp, "Could not truncate file");
+        error_setg_errno(errp, -ret, "Could not truncate file");
        goto exit;
    }

@@ -1731,10 +1795,10 @@ static int vmdk_create(const char *filename, QEMUOptionParameter *options,
        goto exit;
    }
    if (backing_file) {
-        BlockDriverState *bs = bdrv_new("");
-        ret = bdrv_open(bs, backing_file, NULL, BDRV_O_NO_BACKING, NULL, errp);
+        BlockDriverState *bs = NULL;
+        ret = bdrv_open(&bs, backing_file, NULL, NULL, BDRV_O_NO_BACKING, NULL,
+                        errp);
        if (ret != 0) {
-            bdrv_unref(bs);
            goto exit;
        }
        if (strcmp(bs->drv->format_name, "vmdk")) {
@@ -1807,7 +1871,9 @@ static int vmdk_create(const char *filename, QEMUOptionParameter *options,
            goto exit;
        }
    }
-    ret = bdrv_file_open(&new_bs, filename, NULL, BDRV_O_RDWR, &local_err);
+    assert(new_bs == NULL);
+    ret = bdrv_open(&new_bs, filename, NULL, NULL,
+                    BDRV_O_RDWR | BDRV_O_PROTOCOL, NULL, &local_err);
    if (ret < 0) {
        error_setg_errno(errp, -ret, "Could not write description");
        goto exit;
@@ -1822,7 +1888,7 @@ static int vmdk_create(const char *filename, QEMUOptionParameter *options,
    if (desc_offset == 0) {
        ret = bdrv_truncate(new_bs, desc_len);
        if (ret < 0) {
-            error_setg(errp, "Could not truncate file");
+            error_setg_errno(errp, -ret, "Could not truncate file");
        }
    }
 exit:
@@ -1918,6 +1984,53 @@ static ImageInfo *vmdk_get_extent_info(VmdkExtent *extent)
    return info;
 }

+static int vmdk_check(BlockDriverState *bs, BdrvCheckResult *result,
+                      BdrvCheckMode fix)
+{
+    BDRVVmdkState *s = bs->opaque;
+    VmdkExtent *extent = NULL;
+    int64_t sector_num = 0;
+    int64_t total_sectors = bdrv_getlength(bs) / BDRV_SECTOR_SIZE;
+    int ret;
+    uint64_t cluster_offset;
+
+    if (fix) {
+        return -ENOTSUP;
+    }
+
+    for (;;) {
+        if (sector_num >= total_sectors) {
+            return 0;
+        }
+        extent = find_extent(s, sector_num, extent);
+        if (!extent) {
+            fprintf(stderr,
+                    "ERROR: could not find extent for sector %" PRId64 "\n",
+                    sector_num);
+            break;
+        }
+        ret = get_cluster_offset(bs, extent, NULL,
+                                 sector_num << BDRV_SECTOR_BITS,
+                                 0, &cluster_offset);
+        if (ret == VMDK_ERROR) {
+            fprintf(stderr,
+                    "ERROR: could not get cluster_offset for sector %"
+                    PRId64 "\n", sector_num);
+            break;
+        }
+        if (ret == VMDK_OK && cluster_offset >= bdrv_getlength(extent->file)) {
+            fprintf(stderr,
+                    "ERROR: cluster offset for sector %"
+                    PRId64 " points after EOF\n", sector_num);
+            break;
+        }
+        sector_num += extent->cluster_sectors;
+    }
+
+    result->corruptions++;
+    return 0;
+}
+
 static ImageInfoSpecific *vmdk_get_specific_info(BlockDriverState *bs)
 {
    int i;
@@ -1991,6 +2104,7 @@ static BlockDriver bdrv_vmdk = {
    .instance_size                = sizeof(BDRVVmdkState),
    .bdrv_probe                   = vmdk_probe,
    .bdrv_open                    = vmdk_open,
+    .bdrv_check                   = vmdk_check,
    .bdrv_reopen_prepare          = vmdk_reopen_prepare,
    .bdrv_read                    = vmdk_co_read,
    .bdrv_write                   = vmdk_co_write,
@@ -2002,6 +2116,7 @@ static BlockDriver bdrv_vmdk = {
    .bdrv_get_allocated_file_size = vmdk_get_allocated_file_size,
    .bdrv_has_zero_init           = vmdk_has_zero_init,
    .bdrv_get_specific_info       = vmdk_get_specific_info,
+    .bdrv_refresh_limits          = vmdk_refresh_limits,

    .create_options               = vmdk_create_options,
 };
--- a/block/vpc.c
+++ b/block/vpc.c
@@ -45,6 +45,8 @@ enum vhd_type {
 // Seconds since Jan 1, 2000 0:00:00 (UTC)
 #define VHD_TIMESTAMP_BASE 946684800

+#define VHD_MAX_SECTORS       (65535LL * 255 * 255)
+
 // always big-endian
 typedef struct vhd_footer {
    char        creator[8]; // "conectix"
@@ -164,6 +166,7 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
    VHDDynDiskHeader *dyndisk_header;
    uint8_t buf[HEADER_SIZE];
    uint32_t checksum;
+    uint64_t computed_size;
    int disk_type = VHD_DYNAMIC;
    int ret;

@@ -190,7 +193,8 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
            goto fail;
        }
        if (strncmp(footer->creator, "conectix", 8)) {
-            ret = -EMEDIUMTYPE;
+            error_setg(errp, "invalid VPC image");
+            ret = -EINVAL;
            goto fail;
        }
        disk_type = VHD_FIXED;
@@ -221,7 +225,7 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
    }

    /* Allow a maximum disk size of approximately 2 TB */
-    if (bs->total_sectors >= 65535LL * 255 * 255) {
+    if (bs->total_sectors >= VHD_MAX_SECTORS) {
        ret = -EFBIG;
        goto fail;
    }
@@ -241,10 +245,31 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
        }

        s->block_size = be32_to_cpu(dyndisk_header->block_size);
+        if (!is_power_of_2(s->block_size) || s->block_size < BDRV_SECTOR_SIZE) {
+            error_setg(errp, "Invalid block size %" PRIu32, s->block_size);
+            ret = -EINVAL;
+            goto fail;
+        }
        s->bitmap_size = ((s->block_size / (8 * 512)) + 511) & ~511;

        s->max_table_entries = be32_to_cpu(dyndisk_header->max_table_entries);
-        s->pagetable = g_malloc(s->max_table_entries * 4);
+
+        if ((bs->total_sectors * 512) / s->block_size > 0xffffffffU) {
+            ret = -EINVAL;
+            goto fail;
+        }
+        if (s->max_table_entries > (VHD_MAX_SECTORS * 512) / s->block_size) {
+            ret = -EINVAL;
+            goto fail;
+        }
+
+        computed_size = (uint64_t) s->max_table_entries * s->block_size;
+        if (computed_size < bs->total_sectors * 512) {
+            ret = -EINVAL;
+            goto fail;
+        }
+
+        s->pagetable = qemu_blockalign(bs, s->max_table_entries * 4);

        s->bat_offset = be64_to_cpu(dyndisk_header->table_offset);

@@ -297,7 +322,7 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
    return 0;

 fail:
-    g_free(s->pagetable);
+    qemu_vfree(s->pagetable);
 #ifdef CACHE
    g_free(s->pageentry_u8);
 #endif
@@ -832,7 +857,7 @@ static int vpc_has_zero_init(BlockDriverState *bs)
 static void vpc_close(BlockDriverState *bs)
 {
    BDRVVPCState *s = bs->opaque;
-    g_free(s->pagetable);
+    qemu_vfree(s->pagetable);
 #ifdef CACHE
    g_free(s->pageentry_u8);
 #endif
--- a/block/vvfat.c
+++ b/block/vvfat.c
@@ -787,7 +787,9 @@ static int read_directory(BDRVVVFATState* s, int mapping_index)
 	    s->current_mapping->path=buffer;
 	    s->current_mapping->read_only =
 		(st.st_mode & (S_IWUSR | S_IWGRP | S_IWOTH)) == 0;
-	}
+        } else {
+            g_free(buffer);
+        }
    }
    closedir(dir);

@@ -1085,17 +1087,15 @@ DLOG(if (stderr == NULL) {

    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
    qemu_opts_absorb_qdict(opts, options, &local_err);
-    if (error_is_set(&local_err)) {
-        qerror_report_err(local_err);
-        error_free(local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
        ret = -EINVAL;
        goto fail;
    }

    dirname = qemu_opt_get(opts, "dir");
    if (!dirname) {
-        qerror_report(ERROR_CLASS_GENERIC_ERROR, "vvfat block driver requires "
-                      "a 'dir' option");
+        error_setg(errp, "vvfat block driver requires a 'dir' option");
        ret = -EINVAL;
        goto fail;
    }
@@ -1121,6 +1121,7 @@ DLOG(if (stderr == NULL) {
        if (!s->fat_type) {
            s->fat_type = 16;
        }
+        s->first_sectors_number = 0x40;
        cyls = s->fat_type == 12 ? 64 : 1024;
        heads = 16;
        secs = 63;
@@ -1135,8 +1136,7 @@ DLOG(if (stderr == NULL) {
    case 12:
        break;
    default:
-        qerror_report(ERROR_CLASS_GENERIC_ERROR, "Valid FAT types are only "
-                      "12, 16 and 32");
+        error_setg(errp, "Valid FAT types are only 12, 16 and 32");
        ret = -EINVAL;
        goto fail;
    }
@@ -1149,7 +1149,6 @@ DLOG(if (stderr == NULL) {

    s->current_cluster=0xffffffff;

-    s->first_sectors_number=0x40;
    /* read only is the default for safety */
    bs->read_only = 1;
    s->qcow = s->write_target = NULL;
@@ -1867,7 +1866,7 @@ static int check_directory_consistency(BDRVVVFATState *s,

 	if (s->used_clusters[cluster_num] & USED_ANY) {
 	    fprintf(stderr, "cluster %d used more than once\n", (int)cluster_num);
-	    return 0;
+            goto fail;
 	}
 	s->used_clusters[cluster_num] = USED_DIRECTORY;

@@ -2936,15 +2935,13 @@ static int enable_write_target(BDRVVVFATState *s)
        goto err;
    }

-    s->qcow = bdrv_new("");
-
-    ret = bdrv_open(s->qcow, s->qcow_filename, NULL,
+    s->qcow = NULL;
+    ret = bdrv_open(&s->qcow, s->qcow_filename, NULL, NULL,
            BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_NO_FLUSH, bdrv_qcow,
            &local_err);
    if (ret < 0) {
        qerror_report_err(local_err);
        error_free(local_err);
-        bdrv_unref(s->qcow);
        goto err;
    }

--- a/blockdev-nbd.c
+++ b/blockdev-nbd.c
@@ -27,8 +27,9 @@ static void nbd_accept(void *opaque)
    socklen_t addr_len = sizeof(addr);

    int fd = accept(server_fd, (struct sockaddr *)&addr, &addr_len);
-    if (fd >= 0) {
-        nbd_client_new(NULL, fd, nbd_client_put);
+    if (fd >= 0 && !nbd_client_new(NULL, fd, nbd_client_put)) {
+        shutdown(fd, 2);
+        close(fd);
    }
 }

@@ -91,6 +92,10 @@ void qmp_nbd_server_add(const char *device, bool has_writable, bool writable,
        error_set(errp, QERR_DEVICE_NOT_FOUND, device);
        return;
    }
+    if (!bdrv_is_inserted(bs)) {
+        error_set(errp, QERR_DEVICE_HAS_NO_MEDIUM, device);
+        return;
+    }

    if (!has_writable) {
        writable = false;
--- a/blockdev.c
+++ b/blockdev.c
@@ -307,12 +307,10 @@ static bool check_throttle_config(ThrottleConfig *cfg, Error **errp)
 typedef enum { MEDIA_DISK, MEDIA_CDROM } DriveMediaType;

 /* Takes the ownership of bs_opts */
-static DriveInfo *blockdev_init(QDict *bs_opts,
-                                BlockInterfaceType type,
+static DriveInfo *blockdev_init(const char *file, QDict *bs_opts,
                                Error **errp)
 {
    const char *buf;
-    const char *file = NULL;
    const char *serial;
    int ro = 0;
    int bdrv_flags = 0;
@@ -332,13 +330,13 @@ static DriveInfo *blockdev_init(QDict *bs_opts,
     * stay in bs_opts for processing by bdrv_open(). */
    id = qdict_get_try_str(bs_opts, "id");
    opts = qemu_opts_create(&qemu_common_drive_opts, id, 1, &error);
-    if (error_is_set(&error)) {
+    if (error) {
        error_propagate(errp, error);
-        return NULL;
+        goto err_no_opts;
    }

    qemu_opts_absorb_qdict(opts, bs_opts, &error);
-    if (error_is_set(&error)) {
+    if (error) {
        error_propagate(errp, error);
        goto early_err;
    }
@@ -354,7 +352,6 @@ static DriveInfo *blockdev_init(QDict *bs_opts,
    ro = qemu_opt_get_bool(opts, "read-only", 0);
    copy_on_read = qemu_opt_get_bool(opts, "copy-on-read", false);

-    file = qemu_opt_get(opts, "file");
    serial = qemu_opt_get(opts, "serial");

    if ((buf = qemu_opt_get(opts, "discard")) != NULL) {
@@ -439,13 +436,8 @@ static DriveInfo *blockdev_init(QDict *bs_opts,

    on_write_error = BLOCKDEV_ON_ERROR_ENOSPC;
    if ((buf = qemu_opt_get(opts, "werror")) != NULL) {
-        if (type != IF_IDE && type != IF_SCSI && type != IF_VIRTIO && type != IF_NONE) {
-            error_setg(errp, "werror is not supported by this bus type");
-            goto early_err;
-        }
-
        on_write_error = parse_block_error_action(buf, 0, &error);
-        if (error_is_set(&error)) {
+        if (error) {
            error_propagate(errp, error);
            goto early_err;
        }
@@ -453,25 +445,25 @@ static DriveInfo *blockdev_init(QDict *bs_opts,

    on_read_error = BLOCKDEV_ON_ERROR_REPORT;
    if ((buf = qemu_opt_get(opts, "rerror")) != NULL) {
-        if (type != IF_IDE && type != IF_VIRTIO && type != IF_SCSI && type != IF_NONE) {
-            error_report("rerror is not supported by this bus type");
-            goto early_err;
-        }
-
        on_read_error = parse_block_error_action(buf, 1, &error);
-        if (error_is_set(&error)) {
+        if (error) {
            error_propagate(errp, error);
            goto early_err;
        }
    }

+    if (bdrv_find_node(qemu_opts_id(opts))) {
+        error_setg(errp, "device id=%s is conflicting with a node-name",
+                   qemu_opts_id(opts));
+        goto early_err;
+    }
+
    /* init */
    dinfo = g_malloc0(sizeof(*dinfo));
    dinfo->id = g_strdup(qemu_opts_id(opts));
    dinfo->bdrv = bdrv_new(dinfo->id);
    dinfo->bdrv->open_flags = snapshot ? BDRV_O_SNAPSHOT : 0;
    dinfo->bdrv->read_only = ro;
-    dinfo->type = type;
    dinfo->refcount = 1;
    if (serial != NULL) {
        dinfo->serial = g_strdup(serial);
@@ -512,7 +504,7 @@ static DriveInfo *blockdev_init(QDict *bs_opts,
    bdrv_flags |= ro ? 0 : BDRV_O_RDWR;

    QINCREF(bs_opts);
-    ret = bdrv_open(dinfo->bdrv, file, bs_opts, bdrv_flags, drv, &error);
+    ret = bdrv_open(&dinfo->bdrv, file, NULL, bs_opts, bdrv_flags, drv, &error);

    if (ret < 0) {
        error_setg(errp, "could not open disk image %s: %s",
@@ -535,8 +527,9 @@ err:
    QTAILQ_REMOVE(&drives, dinfo, next);
    g_free(dinfo);
 early_err:
-    QDECREF(bs_opts);
    qemu_opts_del(opts);
+err_no_opts:
+    QDECREF(bs_opts);
    return NULL;
 }

@@ -599,6 +592,10 @@ QemuOptsList qemu_legacy_drive_opts = {
            .name = "addr",
            .type = QEMU_OPT_STRING,
            .help = "pci address (virtio only)",
+        },{
+            .name = "file",
+            .type = QEMU_OPT_STRING,
+            .help = "file name",
        },

        /* Options that are passed on, but have special semantics with -drive */
@@ -606,6 +603,14 @@ QemuOptsList qemu_legacy_drive_opts = {
            .name = "read-only",
            .type = QEMU_OPT_BOOL,
            .help = "open drive file as read-only",
+        },{
+            .name = "rerror",
+            .type = QEMU_OPT_STRING,
+            .help = "read error action",
+        },{
+            .name = "werror",
+            .type = QEMU_OPT_STRING,
+            .help = "write error action",
        },{
            .name = "copy-on-read",
            .type = QEMU_OPT_BOOL,
@@ -627,8 +632,10 @@ DriveInfo *drive_init(QemuOpts *all_opts, BlockInterfaceType block_default_type)
    int cyls, heads, secs, translation;
    int max_devs, bus_id, unit_id, index;
    const char *devaddr;
+    const char *werror, *rerror;
    bool read_only = false;
    bool copy_on_read;
+    const char *filename;
    Error *local_err = NULL;

    /* Change legacy command line options into QMP ones */
@@ -685,7 +692,7 @@ DriveInfo *drive_init(QemuOpts *all_opts, BlockInterfaceType block_default_type)
    legacy_opts = qemu_opts_create(&qemu_legacy_drive_opts, NULL, 0,
                                   &error_abort);
    qemu_opts_absorb_qdict(legacy_opts, bs_opts, &local_err);
-    if (error_is_set(&local_err)) {
+    if (local_err) {
        qerror_report_err(local_err);
        error_free(local_err);
        goto fail;
@@ -773,6 +780,10 @@ DriveInfo *drive_init(QemuOpts *all_opts, BlockInterfaceType block_default_type)
            translation = BIOS_ATA_TRANSLATION_NONE;
        } else if (!strcmp(value, "lba")) {
            translation = BIOS_ATA_TRANSLATION_LBA;
+        } else if (!strcmp(value, "large")) {
+            translation = BIOS_ATA_TRANSLATION_LARGE;
+        } else if (!strcmp(value, "rechs")) {
+            translation = BIOS_ATA_TRANSLATION_RECHS;
        } else if (!strcmp(value, "auto")) {
            translation = BIOS_ATA_TRANSLATION_AUTO;
        } else {
@@ -867,16 +878,40 @@ DriveInfo *drive_init(QemuOpts *all_opts, BlockInterfaceType block_default_type)
        }
    }

+    filename = qemu_opt_get(legacy_opts, "file");
+
+    /* Check werror/rerror compatibility with if=... */
+    werror = qemu_opt_get(legacy_opts, "werror");
+    if (werror != NULL) {
+        if (type != IF_IDE && type != IF_SCSI && type != IF_VIRTIO &&
+            type != IF_NONE) {
+            error_report("werror is not supported by this bus type");
+            goto fail;
+        }
+        qdict_put(bs_opts, "werror", qstring_from_str(werror));
+    }
+
+    rerror = qemu_opt_get(legacy_opts, "rerror");
+    if (rerror != NULL) {
+        if (type != IF_IDE && type != IF_VIRTIO && type != IF_SCSI &&
+            type != IF_NONE) {
+            error_report("rerror is not supported by this bus type");
+            goto fail;
+        }
+        qdict_put(bs_opts, "rerror", qstring_from_str(rerror));
+    }
+
    /* Actual block device init: Functionality shared with blockdev-add */
-    dinfo = blockdev_init(bs_opts, type, &local_err);
+    dinfo = blockdev_init(filename, bs_opts, &local_err);
+    bs_opts = NULL;
    if (dinfo == NULL) {
-        if (error_is_set(&local_err)) {
+        if (local_err) {
            qerror_report_err(local_err);
            error_free(local_err);
        }
        goto fail;
    } else {
-        assert(!error_is_set(&local_err));
+        assert(!local_err);
    }

    /* Set legacy DriveInfo fields */
@@ -888,6 +923,7 @@ DriveInfo *drive_init(QemuOpts *all_opts, BlockInterfaceType block_default_type)
    dinfo->secs = secs;
    dinfo->trans = translation;

+    dinfo->type = type;
    dinfo->bus = bus_id;
    dinfo->unit = unit_id;
    dinfo->devaddr = devaddr;
@@ -905,6 +941,7 @@ DriveInfo *drive_init(QemuOpts *all_opts, BlockInterfaceType block_default_type)

 fail:
    qemu_opts_del(legacy_opts);
+    QDECREF(bs_opts);
    return dinfo;
 }

@@ -942,14 +979,22 @@ static void blockdev_do_action(int kind, void *data, Error **errp)
    qmp_transaction(&list, errp);
 }

-void qmp_blockdev_snapshot_sync(const char *device, const char *snapshot_file,
+void qmp_blockdev_snapshot_sync(bool has_device, const char *device,
+                                bool has_node_name, const char *node_name,
+                                const char *snapshot_file,
+                                bool has_snapshot_node_name,
+                                const char *snapshot_node_name,
                                bool has_format, const char *format,
-                                bool has_mode, enum NewImageMode mode,
-                                Error **errp)
+                                bool has_mode, NewImageMode mode, Error **errp)
 {
    BlockdevSnapshot snapshot = {
+        .has_device = has_device,
        .device = (char *) device,
+        .has_node_name = has_node_name,
+        .node_name = (char *) node_name,
        .snapshot_file = (char *) snapshot_file,
+        .has_snapshot_node_name = has_snapshot_node_name,
+        .snapshot_node_name = (char *) snapshot_node_name,
        .has_format = has_format,
        .format = (char *) format,
        .has_mode = has_mode,
@@ -1004,7 +1049,7 @@ SnapshotInfo *qmp_blockdev_snapshot_delete_internal_sync(const char *device,
    }

    ret = bdrv_snapshot_find_by_id_and_name(bs, id, name, &sn, &local_err);
-    if (error_is_set(&local_err)) {
+    if (local_err) {
        error_propagate(errp, local_err);
        return NULL;
    }
@@ -1017,7 +1062,7 @@ SnapshotInfo *qmp_blockdev_snapshot_delete_internal_sync(const char *device,
    }

    bdrv_snapshot_delete(bs, id, name, &local_err);
-    if (error_is_set(&local_err)) {
+    if (local_err) {
        error_propagate(errp, local_err);
        return NULL;
    }
@@ -1187,8 +1232,14 @@ static void external_snapshot_prepare(BlkTransactionState *common,
 {
    BlockDriver *drv;
    int flags, ret;
+    QDict *options = NULL;
    Error *local_err = NULL;
+    bool has_device = false;
    const char *device;
+    bool has_node_name = false;
+    const char *node_name;
+    bool has_snapshot_node_name = false;
+    const char *snapshot_node_name;
    const char *new_image_file;
    const char *format = "qcow2";
    enum NewImageMode mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS;
@@ -1199,7 +1250,14 @@ static void external_snapshot_prepare(BlkTransactionState *common,
    /* get parameters */
    g_assert(action->kind == TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC);

+    has_device = action->blockdev_snapshot_sync->has_device;
    device = action->blockdev_snapshot_sync->device;
+    has_node_name = action->blockdev_snapshot_sync->has_node_name;
+    node_name = action->blockdev_snapshot_sync->node_name;
+    has_snapshot_node_name =
+        action->blockdev_snapshot_sync->has_snapshot_node_name;
+    snapshot_node_name = action->blockdev_snapshot_sync->snapshot_node_name;
+
    new_image_file = action->blockdev_snapshot_sync->snapshot_file;
    if (action->blockdev_snapshot_sync->has_format) {
        format = action->blockdev_snapshot_sync->format;
@@ -1215,9 +1273,21 @@ static void external_snapshot_prepare(BlkTransactionState *common,
        return;
    }

-    state->old_bs = bdrv_find(device);
-    if (!state->old_bs) {
-        error_set(errp, QERR_DEVICE_NOT_FOUND, device);
+    state->old_bs = bdrv_lookup_bs(has_device ? device : NULL,
+                                   has_node_name ? node_name : NULL,
+                                   &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+
+    if (has_node_name && !has_snapshot_node_name) {
+        error_setg(errp, "New snapshot node name missing");
+        return;
+    }
+
+    if (has_snapshot_node_name && bdrv_find_node(snapshot_node_name)) {
+        error_setg(errp, "New snapshot node name already existing");
        return;
    }

@@ -1238,7 +1308,7 @@ static void external_snapshot_prepare(BlkTransactionState *common,
        }
    }

-    if (bdrv_check_ext_snapshot(state->old_bs) != EXT_SNAPSHOT_ALLOWED) {
+    if (!bdrv_is_first_non_filter(state->old_bs)) {
        error_set(errp, QERR_FEATURE_DISABLED, "snapshot");
        return;
    }
@@ -1251,18 +1321,24 @@ static void external_snapshot_prepare(BlkTransactionState *common,
                        state->old_bs->filename,
                        state->old_bs->drv->format_name,
                        NULL, -1, flags, &local_err, false);
-        if (error_is_set(&local_err)) {
+        if (local_err) {
            error_propagate(errp, local_err);
            return;
        }
    }

-    /* We will manually add the backing_hd field to the bs later */
-    state->new_bs = bdrv_new("");
+    if (has_snapshot_node_name) {
+        options = qdict_new();
+        qdict_put(options, "node-name",
+                  qstring_from_str(snapshot_node_name));
+    }
+
    /* TODO Inherit bs->options or only take explicit options with an
     * extended QMP command? */
-    ret = bdrv_open(state->new_bs, new_image_file, NULL,
+    assert(state->new_bs == NULL);
+    ret = bdrv_open(&state->new_bs, new_image_file, NULL, options,
                    flags | BDRV_O_NO_BACKING, drv, &local_err);
+    /* We will manually add the backing_hd field to the bs later */
    if (ret != 0) {
        error_propagate(errp, local_err);
    }
@@ -1314,7 +1390,7 @@ static void drive_backup_prepare(BlkTransactionState *common, Error **errp)
                     backup->has_on_source_error, backup->on_source_error,
                     backup->has_on_target_error, backup->on_target_error,
                     &local_err);
-    if (error_is_set(&local_err)) {
+    if (local_err) {
        error_propagate(errp, local_err);
        state->bs = NULL;
        state->job = NULL;
@@ -1406,7 +1482,7 @@ void qmp_transaction(TransactionActionList *dev_list, Error **errp)
        QSIMPLEQ_INSERT_TAIL(&snap_bdrv_states, state, entry);

        state->ops->prepare(state, &local_err);
-        if (error_is_set(&local_err)) {
+        if (local_err) {
            error_propagate(errp, local_err);
            goto delete_and_fail;
        }
@@ -1476,14 +1552,19 @@ void qmp_eject(const char *device, bool has_force, bool force, Error **errp)
    eject_device(bs, force, errp);
 }

-void qmp_block_passwd(const char *device, const char *password, Error **errp)
+void qmp_block_passwd(bool has_device, const char *device,
+                      bool has_node_name, const char *node_name,
+                      const char *password, Error **errp)
 {
+    Error *local_err = NULL;
    BlockDriverState *bs;
    int err;

-    bs = bdrv_find(device);
-    if (!bs) {
-        error_set(errp, QERR_DEVICE_NOT_FOUND, device);
+    bs = bdrv_lookup_bs(has_device ? device : NULL,
+                        has_node_name ? node_name : NULL,
+                        &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
        return;
    }

@@ -1504,7 +1585,7 @@ static void qmp_bdrv_open_encrypted(BlockDriverState *bs, const char *filename,
    Error *local_err = NULL;
    int ret;

-    ret = bdrv_open(bs, filename, NULL, bdrv_flags, drv, &local_err);
+    ret = bdrv_open(&bs, filename, NULL, NULL, bdrv_flags, drv, &local_err);
    if (ret < 0) {
        error_propagate(errp, local_err);
        return;
@@ -1547,7 +1628,7 @@ void qmp_change_blockdev(const char *device, const char *filename,
    }

    eject_device(bs, 0, &err);
-    if (error_is_set(&err)) {
+    if (err) {
        error_propagate(errp, err);
        return;
    }
@@ -1673,14 +1754,24 @@ int do_drive_del(Monitor *mon, const QDict *qdict, QObject **ret_data)
    return 0;
 }

-void qmp_block_resize(const char *device, int64_t size, Error **errp)
+void qmp_block_resize(bool has_device, const char *device,
+                      bool has_node_name, const char *node_name,
+                      int64_t size, Error **errp)
 {
+    Error *local_err = NULL;
    BlockDriverState *bs;
    int ret;

-    bs = bdrv_find(device);
-    if (!bs) {
-        error_set(errp, QERR_DEVICE_NOT_FOUND, device);
+    bs = bdrv_lookup_bs(has_device ? device : NULL,
+                        has_node_name ? node_name : NULL,
+                        &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+
+    if (!bdrv_is_first_non_filter(bs)) {
+        error_set(errp, QERR_FEATURE_DISABLED, "resize");
        return;
    }

@@ -1767,7 +1858,7 @@ void qmp_block_stream(const char *device, bool has_base,

    stream_start(bs, base_bs, base, has_speed ? speed : 0,
                 on_error, block_job_cb, bs, &local_err);
-    if (error_is_set(&local_err)) {
+    if (local_err) {
        error_propagate(errp, local_err);
        return;
    }
@@ -1776,7 +1867,8 @@ void qmp_block_stream(const char *device, bool has_base,
 }

 void qmp_block_commit(const char *device,
-                      bool has_base, const char *base, const char *top,
+                      bool has_base, const char *base,
+                      bool has_top, const char *top,
                      bool has_speed, int64_t speed,
                      Error **errp)
 {
@@ -1788,9 +1880,18 @@ void qmp_block_commit(const char *device,
     */
    BlockdevOnError on_error = BLOCKDEV_ON_ERROR_REPORT;

+    if (!has_speed) {
+        speed = 0;
+    }
+
    /* drain all i/o before commits */
    bdrv_drain_all();

+    /* Important Note:
+     *  libvirt relies on the DeviceNotFound error class in order to probe for
+     *  live commit feature versions; for this to work, we must make sure to
+     *  perform the device lookup before any generic errors that may occur in a
+     *  scenario in which all optional arguments are omitted. */
    bs = bdrv_find(device);
    if (!bs) {
        error_set(errp, QERR_DEVICE_NOT_FOUND, device);
@@ -1800,7 +1901,7 @@ void qmp_block_commit(const char *device,
    /* default top_bs is the active layer */
    top_bs = bs;

-    if (top) {
+    if (has_top && top) {
        if (strcmp(bs->filename, top) != 0) {
            top_bs = bdrv_find_backing_image(bs, top);
        }
@@ -1822,6 +1923,12 @@ void qmp_block_commit(const char *device,
        return;
    }

+    /* Do not allow attempts to commit an image into itself */
+    if (top_bs == base_bs) {
+        error_setg(errp, "cannot commit an image into itself");
+        return;
+    }
+
    if (top_bs == bs) {
        commit_active_start(bs, base_bs, speed, on_error, block_job_cb,
                            bs, &local_err);
@@ -1925,15 +2032,14 @@ void qmp_drive_backup(const char *device, const char *target,
        }
    }

-    if (error_is_set(&local_err)) {
+    if (local_err) {
        error_propagate(errp, local_err);
        return;
    }

-    target_bs = bdrv_new("");
-    ret = bdrv_open(target_bs, target, NULL, flags, drv, &local_err);
+    target_bs = NULL;
+    ret = bdrv_open(&target_bs, target, NULL, NULL, flags, drv, &local_err);
    if (ret < 0) {
-        bdrv_unref(target_bs);
        error_propagate(errp, local_err);
        return;
    }
@@ -1947,6 +2053,11 @@ void qmp_drive_backup(const char *device, const char *target,
    }
 }

+BlockDeviceInfoList *qmp_query_named_block_nodes(Error **errp)
+{
+    return bdrv_named_nodes_list();
+}
+
 #define DEFAULT_MIRROR_BUF_SIZE   (10 << 20)

 void qmp_drive_mirror(const char *device, const char *target,
@@ -2061,7 +2172,7 @@ void qmp_drive_mirror(const char *device, const char *target,
        }
    }

-    if (error_is_set(&local_err)) {
+    if (local_err) {
        error_propagate(errp, local_err);
        return;
    }
@@ -2069,11 +2180,10 @@ void qmp_drive_mirror(const char *device, const char *target,
    /* Mirroring takes care of copy-on-write using the source's backing
     * file.
     */
-    target_bs = bdrv_new("");
-    ret = bdrv_open(target_bs, target, NULL, flags | BDRV_O_NO_BACKING, drv,
-                    &local_err);
+    target_bs = NULL;
+    ret = bdrv_open(&target_bs, target, NULL, NULL, flags | BDRV_O_NO_BACKING,
+                    drv, &local_err);
    if (ret < 0) {
-        bdrv_unref(target_bs);
        error_propagate(errp, local_err);
        return;
    }
@@ -2175,6 +2285,7 @@ void qmp_block_job_complete(const char *device, Error **errp)
 void qmp_blockdev_add(BlockdevOptions *options, Error **errp)
 {
    QmpOutputVisitor *ov = qmp_output_visitor_new();
+    DriveInfo *dinfo;
    QObject *obj;
    QDict *qdict;
    Error *local_err = NULL;
@@ -2191,8 +2302,10 @@ void qmp_blockdev_add(BlockdevOptions *options, Error **errp)
     *
     * For now, simply forbidding the combination for all drivers will do. */
    if (options->has_aio && options->aio == BLOCKDEV_AIO_OPTIONS_NATIVE) {
-        bool direct = options->cache->has_direct && options->cache->direct;
-        if (!options->has_cache && !direct) {
+        bool direct = options->has_cache &&
+                      options->cache->has_direct &&
+                      options->cache->direct;
+        if (!direct) {
            error_setg(errp, "aio=native requires cache.direct=true");
            goto fail;
        }
@@ -2200,7 +2313,7 @@ void qmp_blockdev_add(BlockdevOptions *options, Error **errp)

    visit_type_BlockdevOptions(qmp_output_get_visitor(ov),
                               &options, NULL, &local_err);
-    if (error_is_set(&local_err)) {
+    if (local_err) {
        error_propagate(errp, local_err);
        goto fail;
    }
@@ -2210,12 +2323,18 @@ void qmp_blockdev_add(BlockdevOptions *options, Error **errp)

    qdict_flatten(qdict);

-    blockdev_init(qdict, IF_NONE, &local_err);
-    if (error_is_set(&local_err)) {
+    dinfo = blockdev_init(NULL, qdict, &local_err);
+    if (local_err) {
        error_propagate(errp, local_err);
        goto fail;
    }

+    if (bdrv_key_required(dinfo->bdrv)) {
+        drive_uninit(dinfo);
+        error_setg(errp, "blockdev-add doesn't support encrypted devices");
+        goto fail;
+    }
+
 fail:
    qmp_output_visitor_cleanup(ov);
 }
@@ -2250,10 +2369,6 @@ QemuOptsList qemu_common_drive_opts = {
            .name = "snapshot",
            .type = QEMU_OPT_BOOL,
            .help = "enable/disable snapshot mode",
-        },{
-            .name = "file",
-            .type = QEMU_OPT_STRING,
-            .help = "disk image",
        },{
            .name = "discard",
            .type = QEMU_OPT_STRING,
--- a/blockjob.c
+++ b/blockjob.c
@@ -61,7 +61,7 @@ void *block_job_create(const BlockJobDriver *driver, BlockDriverState *bs,
        Error *local_err = NULL;

        block_job_set_speed(job, speed, &local_err);
-        if (error_is_set(&local_err)) {
+        if (local_err) {
            bs->job = NULL;
            g_free(job);
            bdrv_set_in_use(bs, 0);
@@ -92,7 +92,7 @@ void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp)
        return;
    }
    job->driver->set_speed(job, speed, &local_err);
-    if (error_is_set(&local_err)) {
+    if (local_err) {
        error_propagate(errp, local_err);
        return;
    }
@@ -206,6 +206,20 @@ void block_job_sleep_ns(BlockJob *job, QEMUClockType type, int64_t ns)
    job->busy = true;
 }

+void block_job_yield(BlockJob *job)
+{
+    assert(job->busy);
+
+    /* Check cancellation *before* setting busy = false, too!  */
+    if (block_job_is_cancelled(job)) {
+        return;
+    }
+
+    job->busy = false;
+    qemu_coroutine_yield();
+    job->busy = true;
+}
+
 BlockJobInfo *block_job_query(BlockJob *job)
 {
    BlockJobInfo *info = g_new0(BlockJobInfo, 1);
--- a/bsd-user/main.c
+++ b/bsd-user/main.c
@@ -1000,7 +1000,7 @@ int main(int argc, char **argv)
    memset(ts, 0, sizeof(TaskState));
    init_task_state(ts);
    ts->info = info;
-    env->opaque = ts;
+    cpu->opaque = ts;

 #if defined(TARGET_I386)
    cpu_x86_set_cpl(env, 3);
--- a/683
+++ b/683
--- a/coroutine-win32.c
+++ b/coroutine-win32.c
@@ -36,8 +36,17 @@ typedef struct
 static __thread CoroutineWin32 leader;
 static __thread Coroutine *current;

-CoroutineAction qemu_coroutine_switch(Coroutine *from_, Coroutine *to_,
-                                      CoroutineAction action)
+/* This function is marked noinline to prevent GCC from inlining it
+ * into coroutine_trampoline(). If we allow it to do that then it
+ * hoists the code to get the address of the TLS variable "current"
+ * out of the while() loop. This is an invalid transformation because
+ * the SwitchToFiber() call may be called when running thread A but
+ * return in thread B, and so we might be in a different thread
+ * context each time round the loop.
+ */
+CoroutineAction __attribute__((noinline))
+qemu_coroutine_switch(Coroutine *from_, Coroutine *to_,
+                      CoroutineAction action)
 {
    CoroutineWin32 *from = DO_UPCAST(CoroutineWin32, base, from_);
    CoroutineWin32 *to = DO_UPCAST(CoroutineWin32, base, to_);
--- a/cpu-exec.c
+++ b/cpu-exec.c
@@ -23,29 +23,22 @@
 #include "qemu/atomic.h"
 #include "sysemu/qtest.h"

-bool qemu_cpu_has_work(CPUState *cpu)
+void cpu_loop_exit(CPUState *cpu)
 {
-    return cpu_has_work(cpu);
-}
-
-void cpu_loop_exit(CPUArchState *env)
-{
-    CPUState *cpu = ENV_GET_CPU(env);
-
    cpu->current_tb = NULL;
-    siglongjmp(env->jmp_env, 1);
+    siglongjmp(cpu->jmp_env, 1);
 }

 /* exit the current TB from a signal handler. The host registers are
   restored in a state compatible with the CPU emulator
 */
 #if defined(CONFIG_SOFTMMU)
-void cpu_resume_from_signal(CPUArchState *env, void *puc)
+void cpu_resume_from_signal(CPUState *cpu, void *puc)
 {
    /* XXX: restore cpu registers saved in host registers */

-    env->exception_index = -1;
-    siglongjmp(env->jmp_env, 1);
+    cpu->exception_index = -1;
+    siglongjmp(cpu->jmp_env, 1);
 }
 #endif

@@ -108,7 +101,7 @@ static void cpu_exec_nocache(CPUArchState *env, int max_cycles,
    if (max_cycles > CF_COUNT_MASK)
        max_cycles = CF_COUNT_MASK;

-    tb = tb_gen_code(env, orig_tb->pc, orig_tb->cs_base, orig_tb->flags,
+    tb = tb_gen_code(cpu, orig_tb->pc, orig_tb->cs_base, orig_tb->flags,
                     max_cycles);
    cpu->current_tb = tb;
    /* execute the generated code */
@@ -123,6 +116,7 @@ static TranslationBlock *tb_find_slow(CPUArchState *env,
                                      target_ulong cs_base,
                                      uint64_t flags)
 {
+    CPUState *cpu = ENV_GET_CPU(env);
    TranslationBlock *tb, **ptb1;
    unsigned int h;
    tb_page_addr_t phys_pc, phys_page1;
@@ -160,7 +154,7 @@ static TranslationBlock *tb_find_slow(CPUArchState *env,
    }
 not_found:
   /* if no translated code available, then translate it now */
-    tb = tb_gen_code(env, pc, cs_base, flags, 0);
+    tb = tb_gen_code(cpu, pc, cs_base, flags, 0);

 found:
    /* Move the last found TB to the head of the list */
@@ -170,12 +164,13 @@ static TranslationBlock *tb_find_slow(CPUArchState *env,
        tcg_ctx.tb_ctx.tb_phys_hash[h] = tb;
    }
    /* we add the TB in the virtual pc hash table */
-    env->tb_jmp_cache[tb_jmp_cache_hash_func(pc)] = tb;
+    cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)] = tb;
    return tb;
 }

 static inline TranslationBlock *tb_find_fast(CPUArchState *env)
 {
+    CPUState *cpu = ENV_GET_CPU(env);
    TranslationBlock *tb;
    target_ulong cs_base, pc;
    int flags;
@@ -184,7 +179,7 @@ static inline TranslationBlock *tb_find_fast(CPUArchState *env)
       always be the same before a given translated block
       is executed. */
    cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
-    tb = env->tb_jmp_cache[tb_jmp_cache_hash_func(pc)];
+    tb = cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)];
    if (unlikely(!tb || tb->pc != pc || tb->cs_base != cs_base ||
                 tb->flags != flags)) {
        tb = tb_find_slow(env, pc, cs_base, flags);
@@ -201,10 +196,11 @@ void cpu_set_debug_excp_handler(CPUDebugExcpHandler *handler)

 static void cpu_handle_debug_exception(CPUArchState *env)
 {
+    CPUState *cpu = ENV_GET_CPU(env);
    CPUWatchpoint *wp;

-    if (!env->watchpoint_hit) {
-        QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
+    if (!cpu->watchpoint_hit) {
+        QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
            wp->flags &= ~BP_WATCHPOINT_HIT;
        }
    }
@@ -231,6 +227,8 @@ int cpu_exec(CPUArchState *env)
    TranslationBlock *tb;
    uint8_t *tc_ptr;
    uintptr_t next_tb;
+    /* This must be volatile so it is not trashed by longjmp() */
+    volatile bool have_tb_lock = false;

    if (cpu->halted) {
        if (!cpu_has_work(cpu)) {
@@ -283,16 +281,16 @@ int cpu_exec(CPUArchState *env)
 #else
 #error unsupported target CPU
 #endif
-    env->exception_index = -1;
+    cpu->exception_index = -1;

    /* prepare setjmp context for exception handling */
    for(;;) {
-        if (sigsetjmp(env->jmp_env, 0) == 0) {
+        if (sigsetjmp(cpu->jmp_env, 0) == 0) {
            /* if an exception is pending, we execute it here */
-            if (env->exception_index >= 0) {
-                if (env->exception_index >= EXCP_INTERRUPT) {
+            if (cpu->exception_index >= 0) {
+                if (cpu->exception_index >= EXCP_INTERRUPT) {
                    /* exit request from the cpu execution loop */
-                    ret = env->exception_index;
+                    ret = cpu->exception_index;
                    if (ret == EXCP_DEBUG) {
                        cpu_handle_debug_exception(env);
                    }
@@ -305,11 +303,11 @@ int cpu_exec(CPUArchState *env)
 #if defined(TARGET_I386)
                    cc->do_interrupt(cpu);
 #endif
-                    ret = env->exception_index;
+                    ret = cpu->exception_index;
                    break;
 #else
                    cc->do_interrupt(cpu);
-                    env->exception_index = -1;
+                    cpu->exception_index = -1;
 #endif
                }
            }
@@ -324,8 +322,8 @@ int cpu_exec(CPUArchState *env)
                    }
                    if (interrupt_request & CPU_INTERRUPT_DEBUG) {
                        cpu->interrupt_request &= ~CPU_INTERRUPT_DEBUG;
-                        env->exception_index = EXCP_DEBUG;
-                        cpu_loop_exit(env);
+                        cpu->exception_index = EXCP_DEBUG;
+                        cpu_loop_exit(cpu);
                    }
 #if defined(TARGET_ARM) || defined(TARGET_SPARC) || defined(TARGET_MIPS) || \
    defined(TARGET_PPC) || defined(TARGET_ALPHA) || defined(TARGET_CRIS) || \
@@ -333,8 +331,8 @@ int cpu_exec(CPUArchState *env)
                    if (interrupt_request & CPU_INTERRUPT_HALT) {
                        cpu->interrupt_request &= ~CPU_INTERRUPT_HALT;
                        cpu->halted = 1;
-                        env->exception_index = EXCP_HLT;
-                        cpu_loop_exit(env);
+                        cpu->exception_index = EXCP_HLT;
+                        cpu_loop_exit(cpu);
                    }
 #endif
 #if defined(TARGET_I386)
@@ -348,8 +346,8 @@ int cpu_exec(CPUArchState *env)
                            cpu_svm_check_intercept_param(env, SVM_EXIT_INIT,
                                                          0);
                            do_cpu_init(x86_cpu);
-                            env->exception_index = EXCP_HALTED;
-                            cpu_loop_exit(env);
+                            cpu->exception_index = EXCP_HALTED;
+                            cpu_loop_exit(cpu);
                    } else if (interrupt_request & CPU_INTERRUPT_SIPI) {
                            do_cpu_sipi(x86_cpu);
                    } else if (env->hflags2 & HF2_GIF_MASK) {
@@ -395,7 +393,10 @@ int cpu_exec(CPUArchState *env)
                            /* FIXME: this should respect TPR */
                            cpu_svm_check_intercept_param(env, SVM_EXIT_VINTR,
                                                          0);
-                            intno = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_vector));
+                            intno = ldl_phys(cpu->as,
+                                             env->vm_vmcb
+                                             + offsetof(struct vmcb,
+                                                        control.int_vector));
                            qemu_log_mask(CPU_LOG_TB_IN_ASM, "Servicing virtual hardware INT=0x%02x\n", intno);
                            do_interrupt_x86_hardirq(env, intno, 1);
                            cpu->interrupt_request &= ~CPU_INTERRUPT_VIRQ;
@@ -417,7 +418,7 @@ int cpu_exec(CPUArchState *env)
 #elif defined(TARGET_LM32)
                    if ((interrupt_request & CPU_INTERRUPT_HARD)
                        && (env->ie & IE_IE)) {
-                        env->exception_index = EXCP_IRQ;
+                        cpu->exception_index = EXCP_IRQ;
                        cc->do_interrupt(cpu);
                        next_tb = 0;
                    }
@@ -426,7 +427,7 @@ int cpu_exec(CPUArchState *env)
                        && (env->sregs[SR_MSR] & MSR_IE)
                        && !(env->sregs[SR_MSR] & (MSR_EIP | MSR_BIP))
                        && !(env->iflags & (D_FLAG | IMM_FLAG))) {
-                        env->exception_index = EXCP_IRQ;
+                        cpu->exception_index = EXCP_IRQ;
                        cc->do_interrupt(cpu);
                        next_tb = 0;
                    }
@@ -434,7 +435,7 @@ int cpu_exec(CPUArchState *env)
                    if ((interrupt_request & CPU_INTERRUPT_HARD) &&
                        cpu_mips_hw_interrupts_pending(env)) {
                        /* Raise it */
-                        env->exception_index = EXCP_EXT_INTERRUPT;
+                        cpu->exception_index = EXCP_EXT_INTERRUPT;
                        env->error_code = 0;
                        cc->do_interrupt(cpu);
                        next_tb = 0;
@@ -451,7 +452,7 @@ int cpu_exec(CPUArchState *env)
                            idx = EXCP_TICK;
                        }
                        if (idx >= 0) {
-                            env->exception_index = idx;
+                            cpu->exception_index = idx;
                            cc->do_interrupt(cpu);
                            next_tb = 0;
                        }
@@ -466,7 +467,7 @@ int cpu_exec(CPUArchState *env)
                            if (((type == TT_EXTINT) &&
                                  cpu_pil_allowed(env, pil)) ||
                                  type != TT_EXTINT) {
-                                env->exception_index = env->interrupt_index;
+                                cpu->exception_index = env->interrupt_index;
                                cc->do_interrupt(cpu);
                                next_tb = 0;
                            }
@@ -474,8 +475,8 @@ int cpu_exec(CPUArchState *env)
                    }
 #elif defined(TARGET_ARM)
                    if (interrupt_request & CPU_INTERRUPT_FIQ
-                        && !(env->uncached_cpsr & CPSR_F)) {
-                        env->exception_index = EXCP_FIQ;
+                        && !(env->daif & PSTATE_F)) {
+                        cpu->exception_index = EXCP_FIQ;
                        cc->do_interrupt(cpu);
                        next_tb = 0;
                    }
@@ -490,15 +491,15 @@ int cpu_exec(CPUArchState *env)
                       pc contains a magic address.  */
                    if (interrupt_request & CPU_INTERRUPT_HARD
                        && ((IS_M(env) && env->regs[15] < 0xfffffff0)
-                            || !(env->uncached_cpsr & CPSR_I))) {
-                        env->exception_index = EXCP_IRQ;
+                            || !(env->daif & PSTATE_I))) {
+                        cpu->exception_index = EXCP_IRQ;
                        cc->do_interrupt(cpu);
                        next_tb = 0;
                    }
 #elif defined(TARGET_UNICORE32)
                    if (interrupt_request & CPU_INTERRUPT_HARD
                        && !(env->uncached_asr & ASR_I)) {
-                        env->exception_index = UC32_EXCP_INTR;
+                        cpu->exception_index = UC32_EXCP_INTR;
                        cc->do_interrupt(cpu);
                        next_tb = 0;
                    }
@@ -533,7 +534,7 @@ int cpu_exec(CPUArchState *env)
                            }
                        }
                        if (idx >= 0) {
-                            env->exception_index = idx;
+                            cpu->exception_index = idx;
                            env->error_code = 0;
                            cc->do_interrupt(cpu);
                            next_tb = 0;
@@ -543,7 +544,7 @@ int cpu_exec(CPUArchState *env)
                    if (interrupt_request & CPU_INTERRUPT_HARD
                        && (env->pregs[PR_CCS] & I_FLAG)
                        && !env->locked_irq) {
-                        env->exception_index = EXCP_IRQ;
+                        cpu->exception_index = EXCP_IRQ;
                        cc->do_interrupt(cpu);
                        next_tb = 0;
                    }
@@ -555,7 +556,7 @@ int cpu_exec(CPUArchState *env)
                            m_flag_archval = M_FLAG_V32;
                        }
                        if ((env->pregs[PR_CCS] & m_flag_archval)) {
-                            env->exception_index = EXCP_NMI;
+                            cpu->exception_index = EXCP_NMI;
                            cc->do_interrupt(cpu);
                            next_tb = 0;
                        }
@@ -569,7 +570,7 @@ int cpu_exec(CPUArchState *env)
                           hardware doesn't rely on this, so we
                           provide/save the vector when the interrupt is
                           first signalled.  */
-                        env->exception_index = env->pending_vector;
+                        cpu->exception_index = env->pending_vector;
                        do_interrupt_m68k_hardirq(env);
                        next_tb = 0;
                    }
@@ -581,7 +582,7 @@ int cpu_exec(CPUArchState *env)
                    }
 #elif defined(TARGET_XTENSA)
                    if (interrupt_request & CPU_INTERRUPT_HARD) {
-                        env->exception_index = EXC_IRQ;
+                        cpu->exception_index = EXC_IRQ;
                        cc->do_interrupt(cpu);
                        next_tb = 0;
                    }
@@ -597,10 +598,11 @@ int cpu_exec(CPUArchState *env)
                }
                if (unlikely(cpu->exit_request)) {
                    cpu->exit_request = 0;
-                    env->exception_index = EXCP_INTERRUPT;
-                    cpu_loop_exit(env);
+                    cpu->exception_index = EXCP_INTERRUPT;
+                    cpu_loop_exit(cpu);
                }
                spin_lock(&tcg_ctx.tb_ctx.tb_lock);
+                have_tb_lock = true;
                tb = tb_find_fast(env);
                /* Note: we do it here to avoid a gcc bug on Mac OS X when
                   doing it in tb_find_slow */
@@ -622,6 +624,7 @@ int cpu_exec(CPUArchState *env)
                    tb_add_jump((TranslationBlock *)(next_tb & ~TB_EXIT_MASK),
                                next_tb & TB_EXIT_MASK, tb);
                }
+                have_tb_lock = false;
                spin_unlock(&tcg_ctx.tb_ctx.tb_lock);

                /* cpu_interrupt might be called while translating the
@@ -651,25 +654,25 @@ int cpu_exec(CPUArchState *env)
                        /* Instruction counter expired.  */
                        int insns_left;
                        tb = (TranslationBlock *)(next_tb & ~TB_EXIT_MASK);
-                        insns_left = env->icount_decr.u32;
-                        if (env->icount_extra && insns_left >= 0) {
+                        insns_left = cpu->icount_decr.u32;
+                        if (cpu->icount_extra && insns_left >= 0) {
                            /* Refill decrementer and continue execution.  */
-                            env->icount_extra += insns_left;
-                            if (env->icount_extra > 0xffff) {
+                            cpu->icount_extra += insns_left;
+                            if (cpu->icount_extra > 0xffff) {
                                insns_left = 0xffff;
                            } else {
-                                insns_left = env->icount_extra;
+                                insns_left = cpu->icount_extra;
                            }
-                            env->icount_extra -= insns_left;
-                            env->icount_decr.u16.low = insns_left;
+                            cpu->icount_extra -= insns_left;
+                            cpu->icount_decr.u16.low = insns_left;
                        } else {
                            if (insns_left > 0) {
                                /* Execute remaining instructions.  */
                                cpu_exec_nocache(env, insns_left, tb);
                            }
-                            env->exception_index = EXCP_INTERRUPT;
+                            cpu->exception_index = EXCP_INTERRUPT;
                            next_tb = 0;
-                            cpu_loop_exit(env);
+                            cpu_loop_exit(cpu);
                        }
                        break;
                    }
@@ -693,6 +696,10 @@ int cpu_exec(CPUArchState *env)
 #ifdef TARGET_I386
            x86_cpu = X86_CPU(cpu);
 #endif
+            if (have_tb_lock) {
+                spin_unlock(&tcg_ctx.tb_ctx.tb_lock);
+                have_tb_lock = false;
+            }
        }
    } /* for(;;) */

--- a/cpus.c
+++ b/cpus.c
@@ -76,7 +76,7 @@ static bool cpu_thread_is_idle(CPUState *cpu)
    if (cpu_is_stopped(cpu)) {
        return true;
    }
-    if (!cpu->halted || qemu_cpu_has_work(cpu) ||
+    if (!cpu->halted || cpu_has_work(cpu) ||
        kvm_halt_in_kernel()) {
        return false;
    }
@@ -139,11 +139,10 @@ static int64_t cpu_get_icount_locked(void)

    icount = qemu_icount;
    if (cpu) {
-        CPUArchState *env = cpu->env_ptr;
-        if (!can_do_io(env)) {
+        if (!cpu_can_do_io(cpu)) {
            fprintf(stderr, "Bad clock read\n");
        }
-        icount -= (env->icount_decr.u16.low + env->icount_extra);
+        icount -= (cpu->icount_decr.u16.low + cpu->icount_extra);
    }
    return qemu_icount_bias + (icount << icount_time_shift);
 }
@@ -1117,16 +1116,25 @@ void resume_all_vcpus(void)
    }
 }

+/* For temporary buffers for forming a name */
+#define VCPU_THREAD_NAME_SIZE 16
+
 static void qemu_tcg_init_vcpu(CPUState *cpu)
 {
+    char thread_name[VCPU_THREAD_NAME_SIZE];
+
+    tcg_cpu_address_space_init(cpu, cpu->as);
+
    /* share a single thread for all cpus with TCG */
    if (!tcg_cpu_thread) {
        cpu->thread = g_malloc0(sizeof(QemuThread));
        cpu->halt_cond = g_malloc0(sizeof(QemuCond));
        qemu_cond_init(cpu->halt_cond);
        tcg_halt_cond = cpu->halt_cond;
-        qemu_thread_create(cpu->thread, qemu_tcg_cpu_thread_fn, cpu,
-                           QEMU_THREAD_JOINABLE);
+        snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
+                 cpu->cpu_index);
+        qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
+                           cpu, QEMU_THREAD_JOINABLE);
 #ifdef _WIN32
        cpu->hThread = qemu_thread_get_handle(cpu->thread);
 #endif
@@ -1142,11 +1150,15 @@ static void qemu_tcg_init_vcpu(CPUState *cpu)

 static void qemu_kvm_start_vcpu(CPUState *cpu)
 {
+    char thread_name[VCPU_THREAD_NAME_SIZE];
+
    cpu->thread = g_malloc0(sizeof(QemuThread));
    cpu->halt_cond = g_malloc0(sizeof(QemuCond));
    qemu_cond_init(cpu->halt_cond);
-    qemu_thread_create(cpu->thread, qemu_kvm_cpu_thread_fn, cpu,
-                       QEMU_THREAD_JOINABLE);
+    snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
+             cpu->cpu_index);
+    qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
+                       cpu, QEMU_THREAD_JOINABLE);
    while (!cpu->created) {
        qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
    }
@@ -1154,10 +1166,14 @@ static void qemu_kvm_start_vcpu(CPUState *cpu)

 static void qemu_dummy_start_vcpu(CPUState *cpu)
 {
+    char thread_name[VCPU_THREAD_NAME_SIZE];
+
    cpu->thread = g_malloc0(sizeof(QemuThread));
    cpu->halt_cond = g_malloc0(sizeof(QemuCond));
    qemu_cond_init(cpu->halt_cond);
-    qemu_thread_create(cpu->thread, qemu_dummy_cpu_thread_fn, cpu,
+    snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
+             cpu->cpu_index);
+    qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
                       QEMU_THREAD_JOINABLE);
    while (!cpu->created) {
        qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
@@ -1219,6 +1235,7 @@ int vm_stop_force_state(RunState state)

 static int tcg_cpu_exec(CPUArchState *env)
 {
+    CPUState *cpu = ENV_GET_CPU(env);
    int ret;
 #ifdef CONFIG_PROFILER
    int64_t ti;
@@ -1231,9 +1248,9 @@ static int tcg_cpu_exec(CPUArchState *env)
        int64_t count;
        int64_t deadline;
        int decr;
-        qemu_icount -= (env->icount_decr.u16.low + env->icount_extra);
-        env->icount_decr.u16.low = 0;
-        env->icount_extra = 0;
+        qemu_icount -= (cpu->icount_decr.u16.low + cpu->icount_extra);
+        cpu->icount_decr.u16.low = 0;
+        cpu->icount_extra = 0;
        deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);

        /* Maintain prior (possibly buggy) behaviour where if no deadline
@@ -1249,8 +1266,8 @@ static int tcg_cpu_exec(CPUArchState *env)
        qemu_icount += count;
        decr = (count > 0xffff) ? 0xffff : count;
        count -= decr;
-        env->icount_decr.u16.low = decr;
-        env->icount_extra = count;
+        cpu->icount_decr.u16.low = decr;
+        cpu->icount_extra = count;
    }
    ret = cpu_exec(env);
 #ifdef CONFIG_PROFILER
@@ -1259,10 +1276,9 @@ static int tcg_cpu_exec(CPUArchState *env)
    if (use_icount) {
        /* Fold pending instructions back into the
           instruction counter, and clear the interrupt flag.  */
-        qemu_icount -= (env->icount_decr.u16.low
-                        + env->icount_extra);
-        env->icount_decr.u32 = 0;
-        env->icount_extra = 0;
+        qemu_icount -= (cpu->icount_decr.u16.low + cpu->icount_extra);
+        cpu->icount_decr.u32 = 0;
+        cpu->icount_extra = 0;
    }
    return ret;
 }
--- a/cputlb.c
+++ b/cputlb.c
@@ -46,9 +46,9 @@ int tlb_flush_count;
 * entries from the TLB at any time, so flushing more entries than
 * required is only an efficiency issue, not a correctness issue.
 */
-void tlb_flush(CPUArchState *env, int flush_global)
+void tlb_flush(CPUState *cpu, int flush_global)
 {
-    CPUState *cpu = ENV_GET_CPU(env);
+    CPUArchState *env = cpu->env_ptr;

 #if defined(DEBUG_TLB)
    printf("tlb_flush:\n");
@@ -58,7 +58,7 @@ void tlb_flush(CPUArchState *env, int flush_global)
    cpu->current_tb = NULL;

    memset(env->tlb_table, -1, sizeof(env->tlb_table));
-    memset(env->tb_jmp_cache, 0, sizeof(env->tb_jmp_cache));
+    memset(cpu->tb_jmp_cache, 0, sizeof(cpu->tb_jmp_cache));

    env->tlb_flush_addr = -1;
    env->tlb_flush_mask = 0;
@@ -77,9 +77,9 @@ static inline void tlb_flush_entry(CPUTLBEntry *tlb_entry, target_ulong addr)
    }
 }

-void tlb_flush_page(CPUArchState *env, target_ulong addr)
+void tlb_flush_page(CPUState *cpu, target_ulong addr)
 {
-    CPUState *cpu = ENV_GET_CPU(env);
+    CPUArchState *env = cpu->env_ptr;
    int i;
    int mmu_idx;

@@ -93,7 +93,7 @@ void tlb_flush_page(CPUArchState *env, target_ulong addr)
               TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
               env->tlb_flush_addr, env->tlb_flush_mask);
 #endif
-        tlb_flush(env, 1);
+        tlb_flush(cpu, 1);
        return;
    }
    /* must reset current TB so that interrupts cannot modify the
@@ -106,7 +106,7 @@ void tlb_flush_page(CPUArchState *env, target_ulong addr)
        tlb_flush_entry(&env->tlb_table[mmu_idx][i], addr);
    }

-    tb_flush_jmp_cache(env, addr);
+    tb_flush_jmp_cache(cpu, addr);
 }

 /* update the TLBs so that writes to code in the virtual page 'addr'
@@ -119,7 +119,7 @@ void tlb_protect_code(ram_addr_t ram_addr)

 /* update the TLB so that writes in physical page 'phys_addr' are no longer
   tested for self modifying code */
-void tlb_unprotect_code_phys(CPUArchState *env, ram_addr_t ram_addr,
+void tlb_unprotect_code_phys(CPUState *cpu, ram_addr_t ram_addr,
                             target_ulong vaddr)
 {
    cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_CODE);
@@ -221,10 +221,11 @@ static void tlb_add_large_page(CPUArchState *env, target_ulong vaddr,
 /* Add a new TLB entry. At most one entry for a given virtual address
   is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the
   supplied size is only used by tlb_flush_page.  */
-void tlb_set_page(CPUArchState *env, target_ulong vaddr,
+void tlb_set_page(CPUState *cpu, target_ulong vaddr,
                  hwaddr paddr, int prot,
                  int mmu_idx, target_ulong size)
 {
+    CPUArchState *env = cpu->env_ptr;
    MemoryRegionSection *section;
    unsigned int index;
    target_ulong address;
@@ -239,7 +240,7 @@ void tlb_set_page(CPUArchState *env, target_ulong vaddr,
    }

    sz = size;
-    section = address_space_translate_for_iotlb(&address_space_memory, paddr,
+    section = address_space_translate_for_iotlb(cpu->as, paddr,
                                                &xlat, &sz);
    assert(sz >= TARGET_PAGE_SIZE);

@@ -260,7 +261,7 @@ void tlb_set_page(CPUArchState *env, target_ulong vaddr,
    }

    code_address = address;
-    iotlb = memory_region_section_get_iotlb(env, section, vaddr, paddr, xlat,
+    iotlb = memory_region_section_get_iotlb(cpu, section, vaddr, paddr, xlat,
                                            prot, &address);

    index = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
@@ -305,6 +306,7 @@ tb_page_addr_t get_page_addr_code(CPUArchState *env1, target_ulong addr)
    int mmu_idx, page_index, pd;
    void *p;
    MemoryRegion *mr;
+    CPUState *cpu = ENV_GET_CPU(env1);

    page_index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
    mmu_idx = cpu_mmu_index(env1);
@@ -313,15 +315,14 @@ tb_page_addr_t get_page_addr_code(CPUArchState *env1, target_ulong addr)
        cpu_ldub_code(env1, addr);
    }
    pd = env1->iotlb[mmu_idx][page_index] & ~TARGET_PAGE_MASK;
-    mr = iotlb_to_region(pd);
+    mr = iotlb_to_region(cpu->as, pd);
    if (memory_region_is_unassigned(mr)) {
-        CPUState *cpu = ENV_GET_CPU(env1);
        CPUClass *cc = CPU_GET_CLASS(cpu);

        if (cc->do_unassigned_access) {
            cc->do_unassigned_access(cpu, addr, false, true, 0, 4);
        } else {
-            cpu_abort(env1, "Trying to execute code outside RAM or ROM at 0x"
+            cpu_abort(cpu, "Trying to execute code outside RAM or ROM at 0x"
                      TARGET_FMT_lx "\n", addr);
        }
    }
@@ -330,8 +331,10 @@ tb_page_addr_t get_page_addr_code(CPUArchState *env1, target_ulong addr)
 }

 #define MMUSUFFIX _cmmu
-#undef GETPC
-#define GETPC() ((uintptr_t)0)
+#undef GETPC_ADJ
+#define GETPC_ADJ 0
+#undef GETRA
+#define GETRA() ((uintptr_t)0)
 #define SOFTMMU_CODE_ACCESS

 #define SHIFT 0
--- a/default-configs/arm-softmmu.mak
+++ b/default-configs/arm-softmmu.mak
@@ -27,6 +27,7 @@ CONFIG_SSI_SD=y
 CONFIG_SSI_M25P80=y
 CONFIG_LAN9118=y
 CONFIG_SMC91C111=y
+CONFIG_ALLWINNER_EMAC=y
 CONFIG_DS1338=y
 CONFIG_PFLASH_CFI01=y
 CONFIG_PFLASH_CFI02=y
--- a/default-configs/ppc-softmmu.mak
+++ b/default-configs/ppc-softmmu.mak
@@ -41,8 +41,11 @@ CONFIG_I8259=y
 CONFIG_XILINX=y
 CONFIG_XILINX_ETHLITE=y
 CONFIG_OPENPIC=y
+CONFIG_PREP=y
+CONFIG_MAC=y
 CONFIG_E500=y
 CONFIG_OPENPIC_KVM=$(and $(CONFIG_E500),$(CONFIG_KVM))
 # For PReP
 CONFIG_MC146818RTC=y
+CONFIG_ETSEC=y
 CONFIG_ISA_TESTDEV=y
--- a/default-configs/ppc64-softmmu.mak
+++ b/default-configs/ppc64-softmmu.mak
@@ -42,6 +42,8 @@ CONFIG_XILINX=y
 CONFIG_XILINX_ETHLITE=y
 CONFIG_OPENPIC=y
 CONFIG_PSERIES=y
+CONFIG_PREP=y
+CONFIG_MAC=y
 CONFIG_E500=y
 CONFIG_OPENPIC_KVM=$(and $(CONFIG_E500),$(CONFIG_KVM))
 # For pSeries
--- a/default-configs/ppcemb-softmmu.mak
+++ b/default-configs/ppcemb-softmmu.mak
@@ -3,32 +3,12 @@
 include pci.mak
 include sound.mak
 include usb.mak
-CONFIG_ISA_MMIO=y
-CONFIG_ESCC=y
 CONFIG_M48T59=y
 CONFIG_VGA=y
 CONFIG_VGA_PCI=y
 CONFIG_SERIAL=y
-CONFIG_I8254=y
-CONFIG_PCKBD=y
-CONFIG_FDC=y
 CONFIG_I8257=y
 CONFIG_OPENPIC=y
-CONFIG_PREP_PCI=y
-CONFIG_MACIO=y
-CONFIG_CUDA=y
-CONFIG_ADB=y
-CONFIG_MAC_NVRAM=y
-CONFIG_MAC_DBDMA=y
-CONFIG_HEATHROW_PIC=y
-CONFIG_GRACKLE_PCI=y
-CONFIG_UNIN_PCI=y
-CONFIG_DEC_PCI=y
-CONFIG_PPCE500_PCI=y
-CONFIG_IDE_ISA=y
-CONFIG_IDE_CMD646=y
-CONFIG_IDE_MACIO=y
-CONFIG_NE2000_ISA=y
 CONFIG_PFLASH_CFI01=y
 CONFIG_PFLASH_CFI02=y
 CONFIG_PTIMER=y
@@ -36,8 +16,3 @@ CONFIG_I8259=y
 CONFIG_XILINX=y
 CONFIG_XILINX_ETHLITE=y
 CONFIG_OPENPIC=y
-CONFIG_E500=y
-CONFIG_OPENPIC_KVM=$(and $(CONFIG_E500),$(CONFIG_KVM))
-# For PReP
-CONFIG_MC146818RTC=y
-CONFIG_ISA_TESTDEV=y
--- a/default-configs/s390x-softmmu.mak
+++ b/default-configs/s390x-softmmu.mak
@@ -1,2 +1,3 @@
 CONFIG_VIRTIO=y
 CONFIG_SCLPCONSOLE=y
+CONFIG_S390_FLIC=$(CONFIG_KVM)
--- a/default-configs/sparc-softmmu.mak
+++ b/default-configs/sparc-softmmu.mak
@@ -10,6 +10,7 @@ CONFIG_EMPTY_SLOT=y
 CONFIG_PCNET_COMMON=y
 CONFIG_LANCE=y
 CONFIG_TCX=y
+CONFIG_CG3=y
 CONFIG_SLAVIO=y
 CONFIG_CS4231=y
 CONFIG_GRLIB=y
--- a/device-hotplug.c
+++ b/device-hotplug.c
@@ -33,12 +33,14 @@ DriveInfo *add_init_drive(const char *optstr)
 {
    DriveInfo *dinfo;
    QemuOpts *opts;
+    MachineClass *mc;

    opts = drive_def(optstr);
    if (!opts)
        return NULL;

-    dinfo = drive_init(opts, current_machine->block_default_type);
+    mc = MACHINE_GET_CLASS(current_machine);
+    dinfo = drive_init(opts, mc->qemu_machine->block_default_type);
    if (!dinfo) {
        qemu_opts_del(opts);
        return NULL;
--- a/disas.c
+++ b/disas.c
@@ -190,7 +190,7 @@ static int print_insn_od_target(bfd_vma pc, disassemble_info *info)
 /* Disassemble this for me please... (debugging). 'flags' has the following
   values:
    i386 - 1 means 16 bit code, 2 means 64 bit code
-    arm  - bit 0 = thumb, bit 1 = reverse endian
+    arm  - bit 0 = thumb, bit 1 = reverse endian, bit 2 = A64
    ppc  - nonzero means little endian
    other targets - unused
 */
@@ -225,7 +225,15 @@ void target_disas(FILE *out, CPUArchState *env, target_ulong code,
    }
    print_insn = print_insn_i386;
 #elif defined(TARGET_ARM)
-    if (flags & 1) {
+    if (flags & 4) {
+        /* We might not be compiled with the A64 disassembler
+         * because it needs a C++ compiler; in that case we will
+         * fall through to the default print_insn_od case.
+         */
+#if defined(CONFIG_ARM_A64_DIS)
+        print_insn = print_insn_arm_a64;
+#endif
+    } else if (flags & 1) {
        print_insn = print_insn_thumb1;
    } else {
        print_insn = print_insn_arm;
@@ -356,6 +364,8 @@ void disas(FILE *out, void *code, unsigned long size)
 #elif defined(_ARCH_PPC)
    s.info.disassembler_options = (char *)"any";
    print_insn = print_insn_ppc;
+#elif defined(__aarch64__) && defined(CONFIG_ARM_A64_DIS)
+    print_insn = print_insn_arm_a64;
 #elif defined(__alpha__)
    print_insn = print_insn_alpha;
 #elif defined(__sparc__)
--- a/disas/Makefile.objs
+++ b/disas/Makefile.objs
@@ -1,5 +1,10 @@
+
 common-obj-$(CONFIG_ALPHA_DIS) += alpha.o
 common-obj-$(CONFIG_ARM_DIS) += arm.o
+common-obj-$(CONFIG_ARM_A64_DIS) += arm-a64.o
+common-obj-$(CONFIG_ARM_A64_DIS) += libvixl/
+libvixldir = $(SRC_PATH)/disas/libvixl
+$(obj)/arm-a64.o: QEMU_CFLAGS := -I$(libvixldir) $(QEMU_CFLAGS)
 common-obj-$(CONFIG_CRIS_DIS) += cris.o
 common-obj-$(CONFIG_HPPA_DIS) += hppa.o
 common-obj-$(CONFIG_I386_DIS) += i386.o
--- a/disas/arm-a64.cc
+++ b/disas/arm-a64.cc
@@ -0,0 +1,87 @@
+/*
+ * ARM A64 disassembly output wrapper to libvixl
+ * Copyright (c) 2013 Linaro Limited
+ * Written by Claudio Fontana
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "a64/disasm-a64.h"
+
+extern "C" {
+#include "disas/bfd.h"
+}
+
+using namespace vixl;
+
+static Decoder *vixl_decoder = NULL;
+static Disassembler *vixl_disasm = NULL;
+
+/* We don't use libvixl's PrintDisassembler because its output
+ * is a little unhelpful (trailing newlines, for example).
+ * Instead we use our own very similar variant so we have
+ * control over the format.
+ */
+class QEMUDisassembler : public Disassembler {
+public:
+    explicit QEMUDisassembler(FILE *stream) : stream_(stream) { }
+    ~QEMUDisassembler() { }
+
+protected:
+    void ProcessOutput(Instruction *instr) {
+        fprintf(stream_, "%08" PRIx32 "      %s",
+                instr->InstructionBits(), GetOutput());
+    }
+
+private:
+    FILE *stream_;
+};
+
+static int vixl_is_initialized(void)
+{
+    return vixl_decoder != NULL;
+}
+
+static void vixl_init(FILE *f) {
+    vixl_decoder = new Decoder();
+    vixl_disasm = new QEMUDisassembler(f);
+    vixl_decoder->AppendVisitor(vixl_disasm);
+}
+
+#define INSN_SIZE 4
+
+/* Disassemble ARM A64 instruction. This is our only entry
+ * point from QEMU's C code.
+ */
+int print_insn_arm_a64(uint64_t addr, disassemble_info *info)
+{
+    uint8_t bytes[INSN_SIZE];
+    uint32_t instr;
+    int status;
+
+    status = info->read_memory_func(addr, bytes, INSN_SIZE, info);
+    if (status != 0) {
+        info->memory_error_func(status, addr, info);
+        return -1;
+    }
+
+    if (!vixl_is_initialized()) {
+        vixl_init(info->stream);
+    }
+
+    instr = bytes[0] | bytes[1] << 8 | bytes[2] << 16 | bytes[3] << 24;
+    vixl_decoder->Decode(reinterpret_cast<Instruction*>(&instr));
+
+    return INSN_SIZE;
+}
--- a/disas/i386.c
+++ b/disas/i386.c
@@ -171,6 +171,7 @@ static void print_operand_value (char *buf, size_t bufsize, int hex, bfd_vma dis
 static void print_displacement (char *, bfd_vma);
 static void OP_E (int, int);
 static void OP_G (int, int);
+static void OP_vvvv (int, int);
 static bfd_vma get64 (void);
 static bfd_signed_vma get32 (void);
 static bfd_signed_vma get32s (void);
@@ -264,6 +265,9 @@ static int rex_used;
   current instruction.  */
 static int used_prefixes;

+/* The VEX.vvvv register, unencoded.  */
+static int vex_reg;
+
 /* Flags stored in PREFIXES.  */
 #define PREFIX_REPZ 1
 #define PREFIX_REPNZ 2
@@ -278,6 +282,10 @@ static int used_prefixes;
 #define PREFIX_ADDR 0x400
 #define PREFIX_FWAIT 0x800

+#define PREFIX_VEX_0F    0x1000
+#define PREFIX_VEX_0F38  0x2000
+#define PREFIX_VEX_0F3A  0x4000
+
 /* Make sure that bytes from INFO->PRIVATE_DATA->BUFFER (inclusive)
   to ADDR (exclusive) are valid.  Returns 1 for success, longjmps
   on error.  */
@@ -323,6 +331,7 @@ fetch_data(struct disassemble_info *info, bfd_byte *addr)

 #define XX { NULL, 0 }

+#define Bv { OP_vvvv, v_mode }
 #define Eb { OP_E, b_mode }
 #define Ev { OP_E, v_mode }
 #define Ed { OP_E, d_mode }
@@ -671,7 +680,8 @@ fetch_data(struct disassemble_info *info, bfd_byte *addr)
 #define PREGRP102 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 102 } }
 #define PREGRP103 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 103 } }
 #define PREGRP104 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 104 } }
-
+#define PREGRP105 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 105 } }
+#define PREGRP106 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 106 } }

 #define X86_64_0  NULL, { { NULL, X86_64_SPECIAL }, { NULL, 0 } }
 #define X86_64_1  NULL, { { NULL, X86_64_SPECIAL }, { NULL, 1 } }
@@ -1449,7 +1459,7 @@ static const unsigned char threebyte_0x38_uses_DATA_prefix[256] = {
  /* c0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* cf */
  /* d0 */ 0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1, /* df */
  /* e0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* ef */
-  /* f0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* ff */
+  /* f0 */ 0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, /* ff */
  /*       -------------------------------        */
  /*       0 1 2 3 4 5 6 7 8 9 a b c d e f        */
 };
@@ -1473,7 +1483,7 @@ static const unsigned char threebyte_0x38_uses_REPNZ_prefix[256] = {
  /* c0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* cf */
  /* d0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* df */
  /* e0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* ef */
-  /* f0 */ 1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* ff */
+  /* f0 */ 1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0, /* ff */
  /*       -------------------------------        */
  /*       0 1 2 3 4 5 6 7 8 9 a b c d e f        */
 };
@@ -1497,7 +1507,7 @@ static const unsigned char threebyte_0x38_uses_REPZ_prefix[256] = {
  /* c0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* cf */
  /* d0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* df */
  /* e0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* ef */
-  /* f0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* ff */
+  /* f0 */ 0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, /* ff */
  /*       -------------------------------        */
  /*       0 1 2 3 4 5 6 7 8 9 a b c d e f        */
 };
@@ -2632,17 +2642,17 @@ static const struct dis386 prefix_user_table[][4] = {

  /* PREGRP87 */
  {
+    { "movbe",	{ Gv, Ev } },
    { "(bad)",	{ XX } },
-    { "(bad)",	{ XX } },
-    { "(bad)",	{ XX } },
+    { "movbe",	{ Gv, Ev } },
    { "crc32",	{ Gdq, { CRC32_Fixup, b_mode } } },
  },

  /* PREGRP88 */
  {
+    { "movbe",	{ Ev, Gv } },
    { "(bad)",	{ XX } },
-    { "(bad)",	{ XX } },
-    { "(bad)",	{ XX } },
+    { "movbe",	{ Ev, Gv } },
    { "crc32",	{ Gdq, { CRC32_Fixup, v_mode } } },
  },

@@ -2774,6 +2784,22 @@ static const struct dis386 prefix_user_table[][4] = {
    { "(bad)",	{ XX } },
  },

+  /* PREGRP105 */
+  {
+    { "andnS",	{ Gv, Bv, Ev } },
+    { "(bad)",	{ XX } },
+    { "(bad)",	{ XX } },
+    { "(bad)",	{ XX } },
+  },
+
+  /* PREGRP106 */
+  {
+    { "bextrS",	{ Gv, Ev, Bv } },
+    { "sarxS",	{ Gv, Ev, Bv } },
+    { "shlxS",	{ Gv, Ev, Bv } },
+    { "shrxS",	{ Gv, Ev, Bv } },
+  },
+
 };

 static const struct dis386 x86_64_table[][2] = {
@@ -3071,12 +3097,12 @@ static const struct dis386 three_byte_table[][256] = {
    /* f0 */
    { PREGRP87 },
    { PREGRP88 },
+    { PREGRP105 },
    { "(bad)", { XX } },
    { "(bad)", { XX } },
    { "(bad)", { XX } },
    { "(bad)", { XX } },
-    { "(bad)", { XX } },
-    { "(bad)", { XX } },
+    { PREGRP106 },
    /* f8 */
    { "(bad)", { XX } },
    { "(bad)", { XX } },
@@ -3477,6 +3503,74 @@ ckprefix (void)
    }
 }

+static void
+ckvexprefix (void)
+{
+    int op, vex2, vex3, newrex = 0, newpfx = prefixes;
+
+    if (address_mode == mode_16bit) {
+        return;
+    }
+
+    fetch_data(the_info, codep + 1);
+    op = *codep;
+
+    if (op != 0xc4 && op != 0xc5) {
+        return;
+    }
+
+    fetch_data(the_info, codep + 2);
+    vex2 = codep[1];
+
+    if (address_mode == mode_32bit && (vex2 & 0xc0) != 0xc0) {
+        return;
+    }
+
+    if (op == 0xc4) {
+        /* Three byte VEX prefix.  */
+        fetch_data(the_info, codep + 3);
+        vex3 = codep[2];
+
+        newrex |= (vex2 & 0x80 ? 0 : REX_R);
+        newrex |= (vex2 & 0x40 ? 0 : REX_X);
+        newrex |= (vex2 & 0x20 ? 0 : REX_B);
+        newrex |= (vex3 & 0x80 ? REX_W : 0);
+        switch (vex2 & 0x1f) {      /* VEX.m-mmmm */
+        case 1:
+            newpfx |= PREFIX_VEX_0F;
+            break;
+        case 2:
+            newpfx |= PREFIX_VEX_0F | PREFIX_VEX_0F38;
+            break;
+        case 3:
+            newpfx |= PREFIX_VEX_0F | PREFIX_VEX_0F3A;
+            break;
+        }
+        vex2 = vex3;
+        codep += 3;
+    } else {
+        /* Two byte VEX prefix.  */
+        newrex |= (vex2 & 0x80 ? 0 : REX_R);
+        codep += 2;
+    }
+
+    vex_reg = (~vex2 >> 3) & 15;     /* VEX.vvvv */
+    switch (vex2 & 3) {              /* VEX.pp */
+    case 1:
+        newpfx |= PREFIX_DATA;     /* 0x66 */
+        break;
+    case 2:
+        newpfx |= PREFIX_REPZ;     /* 0xf3 */
+        break;
+    case 3:
+        newpfx |= PREFIX_REPNZ;    /* 0xf2 */
+        break;
+    }
+
+    rex = newrex;
+    prefixes = newpfx;
+}
+
 /* Return the name of the prefix byte PREF, or NULL if PREF is not a
   prefix byte.  */

@@ -3598,6 +3692,7 @@ print_insn (bfd_vma pc, disassemble_info *info)
  const char *p;
  struct dis_private priv;
  unsigned char op;
+  unsigned char threebyte;

  if (info->mach == bfd_mach_x86_64_intel_syntax
      || info->mach == bfd_mach_x86_64)
@@ -3752,6 +3847,7 @@ print_insn (bfd_vma pc, disassemble_info *info)

  obufp = obuf;
  ckprefix ();
+  ckvexprefix ();

  insn_codep = codep;
  sizeflag = priv.orig_sizeflag;
@@ -3775,18 +3871,29 @@ print_insn (bfd_vma pc, disassemble_info *info)
    }

  op = 0;
+  if (prefixes & PREFIX_VEX_0F)
+    {
+      used_prefixes |= PREFIX_VEX_0F | PREFIX_VEX_0F38 | PREFIX_VEX_0F3A;
+      if (prefixes & PREFIX_VEX_0F38)
+        threebyte = 0x38;
+      else if (prefixes & PREFIX_VEX_0F3A)
+        threebyte = 0x3a;
+      else
+        threebyte = *codep++;
+      goto vex_opcode;
+    }
  if (*codep == 0x0f)
    {
-      unsigned char threebyte;
      fetch_data(info, codep + 2);
-      threebyte = *++codep;
+      threebyte = codep[1];
+      codep += 2;
+    vex_opcode:
      dp = &dis386_twobyte[threebyte];
-      need_modrm = twobyte_has_modrm[*codep];
-      uses_DATA_prefix = twobyte_uses_DATA_prefix[*codep];
-      uses_REPNZ_prefix = twobyte_uses_REPNZ_prefix[*codep];
-      uses_REPZ_prefix = twobyte_uses_REPZ_prefix[*codep];
-      uses_LOCK_prefix = (*codep & ~0x02) == 0x20;
-      codep++;
+      need_modrm = twobyte_has_modrm[threebyte];
+      uses_DATA_prefix = twobyte_uses_DATA_prefix[threebyte];
+      uses_REPNZ_prefix = twobyte_uses_REPNZ_prefix[threebyte];
+      uses_REPZ_prefix = twobyte_uses_REPZ_prefix[threebyte];
+      uses_LOCK_prefix = (threebyte & ~0x02) == 0x20;
      if (dp->name == NULL && dp->op[0].bytemode == IS_3BYTE_OPCODE)
 	{
          fetch_data(info, codep + 2);
@@ -5291,6 +5398,17 @@ OP_G (int bytemode, int sizeflag)
    }
 }

+static void
+OP_vvvv (int bytemode, int sizeflags)
+{
+    USED_REX (REX_W);
+    if (rex & REX_W) {
+        oappend(names64[vex_reg]);
+    } else {
+        oappend(names32[vex_reg]);
+    }
+}
+
 static bfd_vma
 get64 (void)
 {
--- a/disas/libvixl/LICENCE
+++ b/disas/libvixl/LICENCE
@@ -0,0 +1,30 @@
+LICENCE
+=======
+
+The software in this repository is covered by the following licence.
+
+// Copyright 2013, ARM Limited
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--- a/disas/libvixl/Makefile.objs
+++ b/disas/libvixl/Makefile.objs
@@ -0,0 +1,8 @@
+libvixl_OBJS = utils.o \
+               a64/instructions-a64.o \
+               a64/decoder-a64.o \
+               a64/disasm-a64.o
+
+$(addprefix $(obj)/,$(libvixl_OBJS)): QEMU_CFLAGS := -I$(SRC_PATH)/disas/libvixl $(QEMU_CFLAGS)
+
+common-obj-$(CONFIG_ARM_A64_DIS) += $(libvixl_OBJS)
--- a/disas/libvixl/README
+++ b/disas/libvixl/README
@@ -0,0 +1,12 @@
+
+The code in this directory is a subset of libvixl:
+ https://github.com/armvixl/vixl
+(specifically, it is the set of files needed for disassembly only,
+taken from libvixl 1.1).
+Bugfixes should preferably be sent upstream initially.
+
+The disassembler does not currently support the entire A64 instruction
+set. Notably:
+ * No Advanced SIMD support.
+ * Limited support for system instructions.
+ * A few miscellaneous integer and floating point instructions are missing.
--- a/disas/libvixl/a64/assembler-a64.h
+++ b/disas/libvixl/a64/assembler-a64.h
--- a/disas/libvixl/a64/constants-a64.h
+++ b/disas/libvixl/a64/constants-a64.h
--- a/disas/libvixl/a64/cpu-a64.h
+++ b/disas/libvixl/a64/cpu-a64.h
@@ -0,0 +1,56 @@
+// Copyright 2013, ARM Limited
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef VIXL_CPU_A64_H
+#define VIXL_CPU_A64_H
+
+#include "globals.h"
+
+namespace vixl {
+
+class CPU {
+ public:
+  // Initialise CPU support.
+  static void SetUp();
+
+  // Ensures the data at a given address and with a given size is the same for
+  // the I and D caches. I and D caches are not automatically coherent on ARM
+  // so this operation is required before any dynamically generated code can
+  // safely run.
+  static void EnsureIAndDCacheCoherency(void *address, size_t length);
+
+ private:
+  // Return the content of the cache type register.
+  static uint32_t GetCacheType();
+
+  // I and D cache line size in bytes.
+  static unsigned icache_line_size_;
+  static unsigned dcache_line_size_;
+};
+
+}  // namespace vixl
+
+#endif  // VIXL_CPU_A64_H
--- a/disas/libvixl/a64/decoder-a64.cc
+++ b/disas/libvixl/a64/decoder-a64.cc
@@ -0,0 +1,712 @@
+// Copyright 2013, ARM Limited
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "globals.h"
+#include "utils.h"
+#include "a64/decoder-a64.h"
+
+namespace vixl {
+// Top-level instruction decode function.
+void Decoder::Decode(Instruction *instr) {
+  if (instr->Bits(28, 27) == 0) {
+    VisitUnallocated(instr);
+  } else {
+    switch (instr->Bits(27, 24)) {
+      // 0:   PC relative addressing.
+      case 0x0: DecodePCRelAddressing(instr); break;
+
+      // 1:   Add/sub immediate.
+      case 0x1: DecodeAddSubImmediate(instr); break;
+
+      // A:   Logical shifted register.
+      //      Add/sub with carry.
+      //      Conditional compare register.
+      //      Conditional compare immediate.
+      //      Conditional select.
+      //      Data processing 1 source.
+      //      Data processing 2 source.
+      // B:   Add/sub shifted register.
+      //      Add/sub extended register.
+      //      Data processing 3 source.
+      case 0xA:
+      case 0xB: DecodeDataProcessing(instr); break;
+
+      // 2:   Logical immediate.
+      //      Move wide immediate.
+      case 0x2: DecodeLogical(instr); break;
+
+      // 3:   Bitfield.
+      //      Extract.
+      case 0x3: DecodeBitfieldExtract(instr); break;
+
+      // 4:   Unconditional branch immediate.
+      //      Exception generation.
+      //      Compare and branch immediate.
+      // 5:   Compare and branch immediate.
+      //      Conditional branch.
+      //      System.
+      // 6,7: Unconditional branch.
+      //      Test and branch immediate.
+      case 0x4:
+      case 0x5:
+      case 0x6:
+      case 0x7: DecodeBranchSystemException(instr); break;
+
+      // 8,9: Load/store register pair post-index.
+      //      Load register literal.
+      //      Load/store register unscaled immediate.
+      //      Load/store register immediate post-index.
+      //      Load/store register immediate pre-index.
+      //      Load/store register offset.
+      //      Load/store exclusive.
+      // C,D: Load/store register pair offset.
+      //      Load/store register pair pre-index.
+      //      Load/store register unsigned immediate.
+      //      Advanced SIMD.
+      case 0x8:
+      case 0x9:
+      case 0xC:
+      case 0xD: DecodeLoadStore(instr); break;
+
+      // E:   FP fixed point conversion.
+      //      FP integer conversion.
+      //      FP data processing 1 source.
+      //      FP compare.
+      //      FP immediate.
+      //      FP data processing 2 source.
+      //      FP conditional compare.
+      //      FP conditional select.
+      //      Advanced SIMD.
+      // F:   FP data processing 3 source.
+      //      Advanced SIMD.
+      case 0xE:
+      case 0xF: DecodeFP(instr); break;
+    }
+  }
+}
+
+void Decoder::AppendVisitor(DecoderVisitor* new_visitor) {
+  visitors_.remove(new_visitor);
+  visitors_.push_front(new_visitor);
+}
+
+
+void Decoder::PrependVisitor(DecoderVisitor* new_visitor) {
+  visitors_.remove(new_visitor);
+  visitors_.push_back(new_visitor);
+}
+
+
+void Decoder::InsertVisitorBefore(DecoderVisitor* new_visitor,
+                                  DecoderVisitor* registered_visitor) {
+  visitors_.remove(new_visitor);
+  std::list<DecoderVisitor*>::iterator it;
+  for (it = visitors_.begin(); it != visitors_.end(); it++) {
+    if (*it == registered_visitor) {
+      visitors_.insert(it, new_visitor);
+      return;
+    }
+  }
+  // We reached the end of the list. The last element must be
+  // registered_visitor.
+  ASSERT(*it == registered_visitor);
+  visitors_.insert(it, new_visitor);
+}
+
+
+void Decoder::InsertVisitorAfter(DecoderVisitor* new_visitor,
+                                 DecoderVisitor* registered_visitor) {
+  visitors_.remove(new_visitor);
+  std::list<DecoderVisitor*>::iterator it;
+  for (it = visitors_.begin(); it != visitors_.end(); it++) {
+    if (*it == registered_visitor) {
+      it++;
+      visitors_.insert(it, new_visitor);
+      return;
+    }
+  }
+  // We reached the end of the list. The last element must be
+  // registered_visitor.
+  ASSERT(*it == registered_visitor);
+  visitors_.push_back(new_visitor);
+}
+
+
+void Decoder::RemoveVisitor(DecoderVisitor* visitor) {
+  visitors_.remove(visitor);
+}
+
+
+void Decoder::DecodePCRelAddressing(Instruction* instr) {
+  ASSERT(instr->Bits(27, 24) == 0x0);
+  // We know bit 28 is set, as <b28:b27> = 0 is filtered out at the top level
+  // decode.
+  ASSERT(instr->Bit(28) == 0x1);
+  VisitPCRelAddressing(instr);
+}
+
+
+void Decoder::DecodeBranchSystemException(Instruction* instr) {
+  ASSERT((instr->Bits(27, 24) == 0x4) ||
+         (instr->Bits(27, 24) == 0x5) ||
+         (instr->Bits(27, 24) == 0x6) ||
+         (instr->Bits(27, 24) == 0x7) );
+
+  switch (instr->Bits(31, 29)) {
+    case 0:
+    case 4: {
+      VisitUnconditionalBranch(instr);
+      break;
+    }
+    case 1:
+    case 5: {
+      if (instr->Bit(25) == 0) {
+        VisitCompareBranch(instr);
+      } else {
+        VisitTestBranch(instr);
+      }
+      break;
+    }
+    case 2: {
+      if (instr->Bit(25) == 0) {
+        if ((instr->Bit(24) == 0x1) ||
+            (instr->Mask(0x01000010) == 0x00000010)) {
+          VisitUnallocated(instr);
+        } else {
+          VisitConditionalBranch(instr);
+        }
+      } else {
+        VisitUnallocated(instr);
+      }
+      break;
+    }
+    case 6: {
+      if (instr->Bit(25) == 0) {
+        if (instr->Bit(24) == 0) {
+          if ((instr->Bits(4, 2) != 0) ||
+              (instr->Mask(0x00E0001D) == 0x00200001) ||
+              (instr->Mask(0x00E0001D) == 0x00400001) ||
+              (instr->Mask(0x00E0001E) == 0x00200002) ||
+              (instr->Mask(0x00E0001E) == 0x00400002) ||
+              (instr->Mask(0x00E0001C) == 0x00600000) ||
+              (instr->Mask(0x00E0001C) == 0x00800000) ||
+              (instr->Mask(0x00E0001F) == 0x00A00000) ||
+              (instr->Mask(0x00C0001C) == 0x00C00000)) {
+            VisitUnallocated(instr);
+          } else {
+            VisitException(instr);
+          }
+        } else {
+          if (instr->Bits(23, 22) == 0) {
+            const Instr masked_003FF0E0 = instr->Mask(0x003FF0E0);
+            if ((instr->Bits(21, 19) == 0x4) ||
+                (masked_003FF0E0 == 0x00033000) ||
+                (masked_003FF0E0 == 0x003FF020) ||
+                (masked_003FF0E0 == 0x003FF060) ||
+                (masked_003FF0E0 == 0x003FF0E0) ||
+                (instr->Mask(0x00388000) == 0x00008000) ||
+                (instr->Mask(0x0038E000) == 0x00000000) ||
+                (instr->Mask(0x0039E000) == 0x00002000) ||
+                (instr->Mask(0x003AE000) == 0x00002000) ||
+                (instr->Mask(0x003CE000) == 0x00042000) ||
+                (instr->Mask(0x003FFFC0) == 0x000320C0) ||
+                (instr->Mask(0x003FF100) == 0x00032100) ||
+                (instr->Mask(0x003FF200) == 0x00032200) ||
+                (instr->Mask(0x003FF400) == 0x00032400) ||
+                (instr->Mask(0x003FF800) == 0x00032800) ||
+                (instr->Mask(0x0038F000) == 0x00005000) ||
+                (instr->Mask(0x0038E000) == 0x00006000)) {
+              VisitUnallocated(instr);
+            } else {
+              VisitSystem(instr);
+            }
+          } else {
+            VisitUnallocated(instr);
+          }
+        }
+      } else {
+        if ((instr->Bit(24) == 0x1) ||
+            (instr->Bits(20, 16) != 0x1F) ||
+            (instr->Bits(15, 10) != 0) ||
+            (instr->Bits(4, 0) != 0) ||
+            (instr->Bits(24, 21) == 0x3) ||
+            (instr->Bits(24, 22) == 0x3)) {
+          VisitUnallocated(instr);
+        } else {
+          VisitUnconditionalBranchToRegister(instr);
+        }
+      }
+      break;
+    }
+    case 3:
+    case 7: {
+      VisitUnallocated(instr);
+      break;
+    }
+  }
+}
+
+
+void Decoder::DecodeLoadStore(Instruction* instr) {
+  ASSERT((instr->Bits(27, 24) == 0x8) ||
+         (instr->Bits(27, 24) == 0x9) ||
+         (instr->Bits(27, 24) == 0xC) ||
+         (instr->Bits(27, 24) == 0xD) );
+
+  if (instr->Bit(24) == 0) {
+    if (instr->Bit(28) == 0) {
+      if (instr->Bit(29) == 0) {
+        if (instr->Bit(26) == 0) {
+          // TODO: VisitLoadStoreExclusive.
+          VisitUnimplemented(instr);
+        } else {
+          DecodeAdvSIMDLoadStore(instr);
+        }
+      } else {
+        if ((instr->Bits(31, 30) == 0x3) ||
+            (instr->Mask(0xC4400000) == 0x40000000)) {
+          VisitUnallocated(instr);
+        } else {
+          if (instr->Bit(23) == 0) {
+            if (instr->Mask(0xC4400000) == 0xC0400000) {
+              VisitUnallocated(instr);
+            } else {
+              VisitLoadStorePairNonTemporal(instr);
+            }
+          } else {
+            VisitLoadStorePairPostIndex(instr);
+          }
+        }
+      }
+    } else {
+      if (instr->Bit(29) == 0) {
+        if (instr->Mask(0xC4000000) == 0xC4000000) {
+          VisitUnallocated(instr);
+        } else {
+          VisitLoadLiteral(instr);
+        }
+      } else {
+        if ((instr->Mask(0x84C00000) == 0x80C00000) ||
+            (instr->Mask(0x44800000) == 0x44800000) ||
+            (instr->Mask(0x84800000) == 0x84800000)) {
+          VisitUnallocated(instr);
+        } else {
+          if (instr->Bit(21) == 0) {
+            switch (instr->Bits(11, 10)) {
+              case 0: {
+                VisitLoadStoreUnscaledOffset(instr);
+                break;
+              }
+              case 1: {
+                if (instr->Mask(0xC4C00000) == 0xC0800000) {
+                  VisitUnallocated(instr);
+                } else {
+                  VisitLoadStorePostIndex(instr);
+                }
+                break;
+              }
+              case 2: {
+                // TODO: VisitLoadStoreRegisterOffsetUnpriv.
+                VisitUnimplemented(instr);
+                break;
+              }
+              case 3: {
+                if (instr->Mask(0xC4C00000) == 0xC0800000) {
+                  VisitUnallocated(instr);
+                } else {
+                  VisitLoadStorePreIndex(instr);
+                }
+                break;
+              }
+            }
+          } else {
+            if (instr->Bits(11, 10) == 0x2) {
+              if (instr->Bit(14) == 0) {
+                VisitUnallocated(instr);
+              } else {
+                VisitLoadStoreRegisterOffset(instr);
+              }
+            } else {
+              VisitUnallocated(instr);
+            }
+          }
+        }
+      }
+    }
+  } else {
+    if (instr->Bit(28) == 0) {
+      if (instr->Bit(29) == 0) {
+        VisitUnallocated(instr);
+      } else {
+        if ((instr->Bits(31, 30) == 0x3) ||
+            (instr->Mask(0xC4400000) == 0x40000000)) {
+          VisitUnallocated(instr);
+        } else {
+          if (instr->Bit(23) == 0) {
+            VisitLoadStorePairOffset(instr);
+          } else {
+            VisitLoadStorePairPreIndex(instr);
+          }
+        }
+      }
+    } else {
+      if (instr->Bit(29) == 0) {
+        VisitUnallocated(instr);
+      } else {
+        if ((instr->Mask(0x84C00000) == 0x80C00000) ||
+            (instr->Mask(0x44800000) == 0x44800000) ||
+            (instr->Mask(0x84800000) == 0x84800000)) {
+          VisitUnallocated(instr);
+        } else {
+          VisitLoadStoreUnsignedOffset(instr);
+        }
+      }
+    }
+  }
+}
+
+
+void Decoder::DecodeLogical(Instruction* instr) {
+  ASSERT(instr->Bits(27, 24) == 0x2);
+
+  if (instr->Mask(0x80400000) == 0x00400000) {
+    VisitUnallocated(instr);
+  } else {
+    if (instr->Bit(23) == 0) {
+      VisitLogicalImmediate(instr);
+    } else {
+      if (instr->Bits(30, 29) == 0x1) {
+        VisitUnallocated(instr);
+      } else {
+        VisitMoveWideImmediate(instr);
+      }
+    }
+  }
+}
+
+
+void Decoder::DecodeBitfieldExtract(Instruction* instr) {
+  ASSERT(instr->Bits(27, 24) == 0x3);
+
+  if ((instr->Mask(0x80400000) == 0x80000000) ||
+      (instr->Mask(0x80400000) == 0x00400000) ||
+      (instr->Mask(0x80008000) == 0x00008000)) {
+    VisitUnallocated(instr);
+  } else if (instr->Bit(23) == 0) {
+    if ((instr->Mask(0x80200000) == 0x00200000) ||
+        (instr->Mask(0x60000000) == 0x60000000)) {
+      VisitUnallocated(instr);
+    } else {
+      VisitBitfield(instr);
+    }
+  } else {
+    if ((instr->Mask(0x60200000) == 0x00200000) ||
+        (instr->Mask(0x60000000) != 0x00000000)) {
+      VisitUnallocated(instr);
+    } else {
+      VisitExtract(instr);
+    }
+  }
+}
+
+
+void Decoder::DecodeAddSubImmediate(Instruction* instr) {
+  ASSERT(instr->Bits(27, 24) == 0x1);
+  if (instr->Bit(23) == 1) {
+    VisitUnallocated(instr);
+  } else {
+    VisitAddSubImmediate(instr);
+  }
+}
+
+
+void Decoder::DecodeDataProcessing(Instruction* instr) {
+  ASSERT((instr->Bits(27, 24) == 0xA) ||
+         (instr->Bits(27, 24) == 0xB) );
+
+  if (instr->Bit(24) == 0) {
+    if (instr->Bit(28) == 0) {
+      if (instr->Mask(0x80008000) == 0x00008000) {
+        VisitUnallocated(instr);
+      } else {
+        VisitLogicalShifted(instr);
+      }
+    } else {
+      switch (instr->Bits(23, 21)) {
+        case 0: {
+          if (instr->Mask(0x0000FC00) != 0) {
+            VisitUnallocated(instr);
+          } else {
+            VisitAddSubWithCarry(instr);
+          }
+          break;
+        }
+        case 2: {
+          if ((instr->Bit(29) == 0) ||
+              (instr->Mask(0x00000410) != 0)) {
+            VisitUnallocated(instr);
+          } else {
+            if (instr->Bit(11) == 0) {
+              VisitConditionalCompareRegister(instr);
+            } else {
+              VisitConditionalCompareImmediate(instr);
+            }
+          }
+          break;
+        }
+        case 4: {
+          if (instr->Mask(0x20000800) != 0x00000000) {
+            VisitUnallocated(instr);
+          } else {
+            VisitConditionalSelect(instr);
+          }
+          break;
+        }
+        case 6: {
+          if (instr->Bit(29) == 0x1) {
+            VisitUnallocated(instr);
+          } else {
+            if (instr->Bit(30) == 0) {
+              if ((instr->Bit(15) == 0x1) ||
+                  (instr->Bits(15, 11) == 0) ||
+                  (instr->Bits(15, 12) == 0x1) ||
+                  (instr->Bits(15, 12) == 0x3) ||
+                  (instr->Bits(15, 13) == 0x3) ||
+                  (instr->Mask(0x8000EC00) == 0x00004C00) ||
+                  (instr->Mask(0x8000E800) == 0x80004000) ||
+                  (instr->Mask(0x8000E400) == 0x80004000)) {
+                VisitUnallocated(instr);
+              } else {
+                VisitDataProcessing2Source(instr);
+              }
+            } else {
+              if ((instr->Bit(13) == 1) ||
+                  (instr->Bits(20, 16) != 0) ||
+                  (instr->Bits(15, 14) != 0) ||
+                  (instr->Mask(0xA01FFC00) == 0x00000C00) ||
+                  (instr->Mask(0x201FF800) == 0x00001800)) {
+                VisitUnallocated(instr);
+              } else {
+                VisitDataProcessing1Source(instr);
+              }
+            }
+            break;
+          }
+        }
+        case 1:
+        case 3:
+        case 5:
+        case 7: VisitUnallocated(instr); break;
+      }
+    }
+  } else {
+    if (instr->Bit(28) == 0) {
+     if (instr->Bit(21) == 0) {
+        if ((instr->Bits(23, 22) == 0x3) ||
+            (instr->Mask(0x80008000) == 0x00008000)) {
+          VisitUnallocated(instr);
+        } else {
+          VisitAddSubShifted(instr);
+        }
+      } else {
+        if ((instr->Mask(0x00C00000) != 0x00000000) ||
+            (instr->Mask(0x00001400) == 0x00001400) ||
+            (instr->Mask(0x00001800) == 0x00001800)) {
+          VisitUnallocated(instr);
+        } else {
+          VisitAddSubExtended(instr);
+        }
+      }
+    } else {
+      if ((instr->Bit(30) == 0x1) ||
+          (instr->Bits(30, 29) == 0x1) ||
+          (instr->Mask(0xE0600000) == 0x00200000) ||
+          (instr->Mask(0xE0608000) == 0x00400000) ||
+          (instr->Mask(0x60608000) == 0x00408000) ||
+          (instr->Mask(0x60E00000) == 0x00E00000) ||
+          (instr->Mask(0x60E00000) == 0x00800000) ||
+          (instr->Mask(0x60E00000) == 0x00600000)) {
+        VisitUnallocated(instr);
+      } else {
+        VisitDataProcessing3Source(instr);
+      }
+    }
+  }
+}
+
+
+void Decoder::DecodeFP(Instruction* instr) {
+  ASSERT((instr->Bits(27, 24) == 0xE) ||
+         (instr->Bits(27, 24) == 0xF) );
+
+  if (instr->Bit(28) == 0) {
+    DecodeAdvSIMDDataProcessing(instr);
+  } else {
+    if (instr->Bit(29) == 1) {
+      VisitUnallocated(instr);
+    } else {
+      if (instr->Bits(31, 30) == 0x3) {
+        VisitUnallocated(instr);
+      } else if (instr->Bits(31, 30) == 0x1) {
+        DecodeAdvSIMDDataProcessing(instr);
+      } else {
+        if (instr->Bit(24) == 0) {
+          if (instr->Bit(21) == 0) {
+            if ((instr->Bit(23) == 1) ||
+                (instr->Bit(18) == 1) ||
+                (instr->Mask(0x80008000) == 0x00000000) ||
+                (instr->Mask(0x000E0000) == 0x00000000) ||
+                (instr->Mask(0x000E0000) == 0x000A0000) ||
+                (instr->Mask(0x00160000) == 0x00000000) ||
+                (instr->Mask(0x00160000) == 0x00120000)) {
+              VisitUnallocated(instr);
+            } else {
+              VisitFPFixedPointConvert(instr);
+            }
+          } else {
+            if (instr->Bits(15, 10) == 32) {
+              VisitUnallocated(instr);
+            } else if (instr->Bits(15, 10) == 0) {
+              if ((instr->Bits(23, 22) == 0x3) ||
+                  (instr->Mask(0x000E0000) == 0x000A0000) ||
+                  (instr->Mask(0x000E0000) == 0x000C0000) ||
+                  (instr->Mask(0x00160000) == 0x00120000) ||
+                  (instr->Mask(0x00160000) == 0x00140000) ||
+                  (instr->Mask(0x20C40000) == 0x00800000) ||
+                  (instr->Mask(0x20C60000) == 0x00840000) ||
+                  (instr->Mask(0xA0C60000) == 0x80060000) ||
+                  (instr->Mask(0xA0C60000) == 0x00860000) ||
+                  (instr->Mask(0xA0C60000) == 0x00460000) ||
+                  (instr->Mask(0xA0CE0000) == 0x80860000) ||
+                  (instr->Mask(0xA0CE0000) == 0x804E0000) ||
+                  (instr->Mask(0xA0CE0000) == 0x000E0000) ||
+                  (instr->Mask(0xA0D60000) == 0x00160000) ||
+                  (instr->Mask(0xA0D60000) == 0x80560000) ||
+                  (instr->Mask(0xA0D60000) == 0x80960000)) {
+                VisitUnallocated(instr);
+              } else {
+                VisitFPIntegerConvert(instr);
+              }
+            } else if (instr->Bits(14, 10) == 16) {
+              const Instr masked_A0DF8000 = instr->Mask(0xA0DF8000);
+              if ((instr->Mask(0x80180000) != 0) ||
+                  (masked_A0DF8000 == 0x00020000) ||
+                  (masked_A0DF8000 == 0x00030000) ||
+                  (masked_A0DF8000 == 0x00068000) ||
+                  (masked_A0DF8000 == 0x00428000) ||
+                  (masked_A0DF8000 == 0x00430000) ||
+                  (masked_A0DF8000 == 0x00468000) ||
+                  (instr->Mask(0xA0D80000) == 0x00800000) ||
+                  (instr->Mask(0xA0DE0000) == 0x00C00000) ||
+                  (instr->Mask(0xA0DF0000) == 0x00C30000) ||
+                  (instr->Mask(0xA0DC0000) == 0x00C40000)) {
+                VisitUnallocated(instr);
+              } else {
+                VisitFPDataProcessing1Source(instr);
+              }
+            } else if (instr->Bits(13, 10) == 8) {
+              if ((instr->Bits(15, 14) != 0) ||
+                  (instr->Bits(2, 0) != 0) ||
+                  (instr->Mask(0x80800000) != 0x00000000)) {
+                VisitUnallocated(instr);
+              } else {
+                VisitFPCompare(instr);
+              }
+            } else if (instr->Bits(12, 10) == 4) {
+              if ((instr->Bits(9, 5) != 0) ||
+                  (instr->Mask(0x80800000) != 0x00000000)) {
+                VisitUnallocated(instr);
+              } else {
+                VisitFPImmediate(instr);
+              }
+            } else {
+              if (instr->Mask(0x80800000) != 0x00000000) {
+                VisitUnallocated(instr);
+              } else {
+                switch (instr->Bits(11, 10)) {
+                  case 1: {
+                    VisitFPConditionalCompare(instr);
+                    break;
+                  }
+                  case 2: {
+                    if ((instr->Bits(15, 14) == 0x3) ||
+                        (instr->Mask(0x00009000) == 0x00009000) ||
+                        (instr->Mask(0x0000A000) == 0x0000A000)) {
+                      VisitUnallocated(instr);
+                    } else {
+                      VisitFPDataProcessing2Source(instr);
+                    }
+                    break;
+                  }
+                  case 3: {
+                    VisitFPConditionalSelect(instr);
+                    break;
+                  }
+                  default: UNREACHABLE();
+                }
+              }
+            }
+          }
+        } else {
+          // Bit 30 == 1 has been handled earlier.
+          ASSERT(instr->Bit(30) == 0);
+          if (instr->Mask(0xA0800000) != 0) {
+            VisitUnallocated(instr);
+          } else {
+            VisitFPDataProcessing3Source(instr);
+          }
+        }
+      }
+    }
+  }
+}
+
+
+void Decoder::DecodeAdvSIMDLoadStore(Instruction* instr) {
+  // TODO: Implement Advanced SIMD load/store instruction decode.
+  ASSERT(instr->Bits(29, 25) == 0x6);
+  VisitUnimplemented(instr);
+}
+
+
+void Decoder::DecodeAdvSIMDDataProcessing(Instruction* instr) {
+  // TODO: Implement Advanced SIMD data processing instruction decode.
+  ASSERT(instr->Bits(27, 25) == 0x7);
+  VisitUnimplemented(instr);
+}
+
+
+#define DEFINE_VISITOR_CALLERS(A)                                              \
+  void Decoder::Visit##A(Instruction *instr) {                                 \
+    ASSERT(instr->Mask(A##FMask) == A##Fixed);                                 \
+    std::list<DecoderVisitor*>::iterator it;                                   \
+    for (it = visitors_.begin(); it != visitors_.end(); it++) {                \
+      (*it)->Visit##A(instr);                                                  \
+    }                                                                          \
+  }
+VISITOR_LIST(DEFINE_VISITOR_CALLERS)
+#undef DEFINE_VISITOR_CALLERS
+}  // namespace vixl
--- a/disas/libvixl/a64/decoder-a64.h
+++ b/disas/libvixl/a64/decoder-a64.h
@@ -0,0 +1,198 @@
+// Copyright 2013, ARM Limited
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef VIXL_A64_DECODER_A64_H_
+#define VIXL_A64_DECODER_A64_H_
+
+#include <list>
+
+#include "globals.h"
+#include "a64/instructions-a64.h"
+
+
+// List macro containing all visitors needed by the decoder class.
+
+#define VISITOR_LIST(V)             \
+  V(PCRelAddressing)                \
+  V(AddSubImmediate)                \
+  V(LogicalImmediate)               \
+  V(MoveWideImmediate)              \
+  V(Bitfield)                       \
+  V(Extract)                        \
+  V(UnconditionalBranch)            \
+  V(UnconditionalBranchToRegister)  \
+  V(CompareBranch)                  \
+  V(TestBranch)                     \
+  V(ConditionalBranch)              \
+  V(System)                         \
+  V(Exception)                      \
+  V(LoadStorePairPostIndex)         \
+  V(LoadStorePairOffset)            \
+  V(LoadStorePairPreIndex)          \
+  V(LoadStorePairNonTemporal)       \
+  V(LoadLiteral)                    \
+  V(LoadStoreUnscaledOffset)        \
+  V(LoadStorePostIndex)             \
+  V(LoadStorePreIndex)              \
+  V(LoadStoreRegisterOffset)        \
+  V(LoadStoreUnsignedOffset)        \
+  V(LogicalShifted)                 \
+  V(AddSubShifted)                  \
+  V(AddSubExtended)                 \
+  V(AddSubWithCarry)                \
+  V(ConditionalCompareRegister)     \
+  V(ConditionalCompareImmediate)    \
+  V(ConditionalSelect)              \
+  V(DataProcessing1Source)          \
+  V(DataProcessing2Source)          \
+  V(DataProcessing3Source)          \
+  V(FPCompare)                      \
+  V(FPConditionalCompare)           \
+  V(FPConditionalSelect)            \
+  V(FPImmediate)                    \
+  V(FPDataProcessing1Source)        \
+  V(FPDataProcessing2Source)        \
+  V(FPDataProcessing3Source)        \
+  V(FPIntegerConvert)               \
+  V(FPFixedPointConvert)            \
+  V(Unallocated)                    \
+  V(Unimplemented)
+
+namespace vixl {
+
+// The Visitor interface. Disassembler and simulator (and other tools)
+// must provide implementations for all of these functions.
+class DecoderVisitor {
+ public:
+  #define DECLARE(A) virtual void Visit##A(Instruction* instr) = 0;
+  VISITOR_LIST(DECLARE)
+  #undef DECLARE
+
+  virtual ~DecoderVisitor() {}
+
+ private:
+  // Visitors are registered in a list.
+  std::list<DecoderVisitor*> visitors_;
+
+  friend class Decoder;
+};
+
+
+class Decoder: public DecoderVisitor {
+ public:
+  Decoder() {}
+
+  // Top-level instruction decoder function. Decodes an instruction and calls
+  // the visitor functions registered with the Decoder class.
+  void Decode(Instruction *instr);
+
+  // Register a new visitor class with the decoder.
+  // Decode() will call the corresponding visitor method from all registered
+  // visitor classes when decoding reaches the leaf node of the instruction
+  // decode tree.
+  // Visitors are called in the order.
+  // A visitor can only be registered once.
+  // Registering an already registered visitor will update its position.
+  //
+  //   d.AppendVisitor(V1);
+  //   d.AppendVisitor(V2);
+  //   d.PrependVisitor(V2);            // Move V2 at the start of the list.
+  //   d.InsertVisitorBefore(V3, V2);
+  //   d.AppendVisitor(V4);
+  //   d.AppendVisitor(V4);             // No effect.
+  //
+  //   d.Decode(i);
+  //
+  // will call in order visitor methods in V3, V2, V1, V4.
+  void AppendVisitor(DecoderVisitor* visitor);
+  void PrependVisitor(DecoderVisitor* visitor);
+  void InsertVisitorBefore(DecoderVisitor* new_visitor,
+                           DecoderVisitor* registered_visitor);
+  void InsertVisitorAfter(DecoderVisitor* new_visitor,
+                          DecoderVisitor* registered_visitor);
+
+  // Remove a previously registered visitor class from the list of visitors
+  // stored by the decoder.
+  void RemoveVisitor(DecoderVisitor* visitor);
+
+  #define DECLARE(A) void Visit##A(Instruction* instr);
+  VISITOR_LIST(DECLARE)
+  #undef DECLARE
+
+ private:
+  // Decode the PC relative addressing instruction, and call the corresponding
+  // visitors.
+  // On entry, instruction bits 27:24 = 0x0.
+  void DecodePCRelAddressing(Instruction* instr);
+
+  // Decode the add/subtract immediate instruction, and call the correspoding
+  // visitors.
+  // On entry, instruction bits 27:24 = 0x1.
+  void DecodeAddSubImmediate(Instruction* instr);
+
+  // Decode the branch, system command, and exception generation parts of
+  // the instruction tree, and call the corresponding visitors.
+  // On entry, instruction bits 27:24 = {0x4, 0x5, 0x6, 0x7}.
+  void DecodeBranchSystemException(Instruction* instr);
+
+  // Decode the load and store parts of the instruction tree, and call
+  // the corresponding visitors.
+  // On entry, instruction bits 27:24 = {0x8, 0x9, 0xC, 0xD}.
+  void DecodeLoadStore(Instruction* instr);
+
+  // Decode the logical immediate and move wide immediate parts of the
+  // instruction tree, and call the corresponding visitors.
+  // On entry, instruction bits 27:24 = 0x2.
+  void DecodeLogical(Instruction* instr);
+
+  // Decode the bitfield and extraction parts of the instruction tree,
+  // and call the corresponding visitors.
+  // On entry, instruction bits 27:24 = 0x3.
+  void DecodeBitfieldExtract(Instruction* instr);
+
+  // Decode the data processing parts of the instruction tree, and call the
+  // corresponding visitors.
+  // On entry, instruction bits 27:24 = {0x1, 0xA, 0xB}.
+  void DecodeDataProcessing(Instruction* instr);
+
+  // Decode the floating point parts of the instruction tree, and call the
+  // corresponding visitors.
+  // On entry, instruction bits 27:24 = {0xE, 0xF}.
+  void DecodeFP(Instruction* instr);
+
+  // Decode the Advanced SIMD (NEON) load/store part of the instruction tree,
+  // and call the corresponding visitors.
+  // On entry, instruction bits 29:25 = 0x6.
+  void DecodeAdvSIMDLoadStore(Instruction* instr);
+
+  // Decode the Advanced SIMD (NEON) data processing part of the instruction
+  // tree, and call the corresponding visitors.
+  // On entry, instruction bits 27:25 = 0x7.
+  void DecodeAdvSIMDDataProcessing(Instruction* instr);
+};
+}  // namespace vixl
+
+#endif  // VIXL_A64_DECODER_A64_H_
--- a/disas/libvixl/a64/disasm-a64.cc
+++ b/disas/libvixl/a64/disasm-a64.cc
--- a/disas/libvixl/a64/disasm-a64.h
+++ b/disas/libvixl/a64/disasm-a64.h
@@ -0,0 +1,109 @@
+// Copyright 2013, ARM Limited
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef VIXL_A64_DISASM_A64_H
+#define VIXL_A64_DISASM_A64_H
+
+#include "globals.h"
+#include "utils.h"
+#include "instructions-a64.h"
+#include "decoder-a64.h"
+
+namespace vixl {
+
+class Disassembler: public DecoderVisitor {
+ public:
+  Disassembler();
+  Disassembler(char* text_buffer, int buffer_size);
+  virtual ~Disassembler();
+  char* GetOutput();
+
+  // Declare all Visitor functions.
+  #define DECLARE(A)  void Visit##A(Instruction* instr);
+  VISITOR_LIST(DECLARE)
+  #undef DECLARE
+
+ protected:
+  virtual void ProcessOutput(Instruction* instr);
+
+ private:
+  void Format(Instruction* instr, const char* mnemonic, const char* format);
+  void Substitute(Instruction* instr, const char* string);
+  int SubstituteField(Instruction* instr, const char* format);
+  int SubstituteRegisterField(Instruction* instr, const char* format);
+  int SubstituteImmediateField(Instruction* instr, const char* format);
+  int SubstituteLiteralField(Instruction* instr, const char* format);
+  int SubstituteBitfieldImmediateField(Instruction* instr, const char* format);
+  int SubstituteShiftField(Instruction* instr, const char* format);
+  int SubstituteExtendField(Instruction* instr, const char* format);
+  int SubstituteConditionField(Instruction* instr, const char* format);
+  int SubstitutePCRelAddressField(Instruction* instr, const char* format);
+  int SubstituteBranchTargetField(Instruction* instr, const char* format);
+  int SubstituteLSRegOffsetField(Instruction* instr, const char* format);
+  int SubstitutePrefetchField(Instruction* instr, const char* format);
+
+  inline bool RdIsZROrSP(Instruction* instr) const {
+    return (instr->Rd() == kZeroRegCode);
+  }
+
+  inline bool RnIsZROrSP(Instruction* instr) const {
+    return (instr->Rn() == kZeroRegCode);
+  }
+
+  inline bool RmIsZROrSP(Instruction* instr) const {
+    return (instr->Rm() == kZeroRegCode);
+  }
+
+  inline bool RaIsZROrSP(Instruction* instr) const {
+    return (instr->Ra() == kZeroRegCode);
+  }
+
+  bool IsMovzMovnImm(unsigned reg_size, uint64_t value);
+
+  void ResetOutput();
+  void AppendToOutput(const char* string, ...);
+
+  char* buffer_;
+  uint32_t buffer_pos_;
+  uint32_t buffer_size_;
+  bool own_buffer_;
+};
+
+
+class PrintDisassembler: public Disassembler {
+ public:
+  explicit PrintDisassembler(FILE* stream) : stream_(stream) { }
+  ~PrintDisassembler() { }
+
+ protected:
+  virtual void ProcessOutput(Instruction* instr);
+
+ private:
+  FILE *stream_;
+};
+}  // namespace vixl
+
+#endif  // VIXL_A64_DISASM_A64_H
--- a/disas/libvixl/a64/instructions-a64.cc
+++ b/disas/libvixl/a64/instructions-a64.cc
@@ -0,0 +1,238 @@
+// Copyright 2013, ARM Limited
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "a64/instructions-a64.h"
+#include "a64/assembler-a64.h"
+
+namespace vixl {
+
+
+static uint64_t RotateRight(uint64_t value,
+                            unsigned int rotate,
+                            unsigned int width) {
+  ASSERT(width <= 64);
+  rotate &= 63;
+  return ((value & ((1UL << rotate) - 1UL)) << (width - rotate)) |
+         (value >> rotate);
+}
+
+
+static uint64_t RepeatBitsAcrossReg(unsigned reg_size,
+                                    uint64_t value,
+                                    unsigned width) {
+  ASSERT((width == 2) || (width == 4) || (width == 8) || (width == 16) ||
+         (width == 32));
+  ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize));
+  uint64_t result = value & ((1UL << width) - 1UL);
+  for (unsigned i = width; i < reg_size; i *= 2) {
+    result |= (result << i);
+  }
+  return result;
+}
+
+
+// Logical immediates can't encode zero, so a return value of zero is used to
+// indicate a failure case. Specifically, where the constraints on imm_s are
+// not met.
+uint64_t Instruction::ImmLogical() {
+  unsigned reg_size = SixtyFourBits() ? kXRegSize : kWRegSize;
+  int64_t n = BitN();
+  int64_t imm_s = ImmSetBits();
+  int64_t imm_r = ImmRotate();
+
+  // An integer is constructed from the n, imm_s and imm_r bits according to
+  // the following table:
+  //
+  //  N   imms    immr    size        S             R
+  //  1  ssssss  rrrrrr    64    UInt(ssssss)  UInt(rrrrrr)
+  //  0  0sssss  xrrrrr    32    UInt(sssss)   UInt(rrrrr)
+  //  0  10ssss  xxrrrr    16    UInt(ssss)    UInt(rrrr)
+  //  0  110sss  xxxrrr     8    UInt(sss)     UInt(rrr)
+  //  0  1110ss  xxxxrr     4    UInt(ss)      UInt(rr)
+  //  0  11110s  xxxxxr     2    UInt(s)       UInt(r)
+  // (s bits must not be all set)
+  //
+  // A pattern is constructed of size bits, where the least significant S+1
+  // bits are set. The pattern is rotated right by R, and repeated across a
+  // 32 or 64-bit value, depending on destination register width.
+  //
+
+  if (n == 1) {
+    if (imm_s == 0x3F) {
+      return 0;
+    }
+    uint64_t bits = (1UL << (imm_s + 1)) - 1;
+    return RotateRight(bits, imm_r, 64);
+  } else {
+    if ((imm_s >> 1) == 0x1F) {
+      return 0;
+    }
+    for (int width = 0x20; width >= 0x2; width >>= 1) {
+      if ((imm_s & width) == 0) {
+        int mask = width - 1;
+        if ((imm_s & mask) == mask) {
+          return 0;
+        }
+        uint64_t bits = (1UL << ((imm_s & mask) + 1)) - 1;
+        return RepeatBitsAcrossReg(reg_size,
+                                   RotateRight(bits, imm_r & mask, width),
+                                   width);
+      }
+    }
+  }
+  UNREACHABLE();
+  return 0;
+}
+
+
+float Instruction::ImmFP32() {
+  //  ImmFP: abcdefgh (8 bits)
+  // Single: aBbb.bbbc.defg.h000.0000.0000.0000.0000 (32 bits)
+  // where B is b ^ 1
+  uint32_t bits = ImmFP();
+  uint32_t bit7 = (bits >> 7) & 0x1;
+  uint32_t bit6 = (bits >> 6) & 0x1;
+  uint32_t bit5_to_0 = bits & 0x3f;
+  uint32_t result = (bit7 << 31) | ((32 - bit6) << 25) | (bit5_to_0 << 19);
+
+  return rawbits_to_float(result);
+}
+
+
+double Instruction::ImmFP64() {
+  //  ImmFP: abcdefgh (8 bits)
+  // Double: aBbb.bbbb.bbcd.efgh.0000.0000.0000.0000
+  //         0000.0000.0000.0000.0000.0000.0000.0000 (64 bits)
+  // where B is b ^ 1
+  uint32_t bits = ImmFP();
+  uint64_t bit7 = (bits >> 7) & 0x1;
+  uint64_t bit6 = (bits >> 6) & 0x1;
+  uint64_t bit5_to_0 = bits & 0x3f;
+  uint64_t result = (bit7 << 63) | ((256 - bit6) << 54) | (bit5_to_0 << 48);
+
+  return rawbits_to_double(result);
+}
+
+
+LSDataSize CalcLSPairDataSize(LoadStorePairOp op) {
+  switch (op) {
+    case STP_x:
+    case LDP_x:
+    case STP_d:
+    case LDP_d: return LSDoubleWord;
+    default: return LSWord;
+  }
+}
+
+
+Instruction* Instruction::ImmPCOffsetTarget() {
+  ptrdiff_t offset;
+  if (IsPCRelAddressing()) {
+    // PC-relative addressing. Only ADR is supported.
+    offset = ImmPCRel();
+  } else {
+    // All PC-relative branches.
+    ASSERT(BranchType() != UnknownBranchType);
+    // Relative branch offsets are instruction-size-aligned.
+    offset = ImmBranch() << kInstructionSizeLog2;
+  }
+  return this + offset;
+}
+
+
+inline int Instruction::ImmBranch() const {
+  switch (BranchType()) {
+    case CondBranchType: return ImmCondBranch();
+    case UncondBranchType: return ImmUncondBranch();
+    case CompareBranchType: return ImmCmpBranch();
+    case TestBranchType: return ImmTestBranch();
+    default: UNREACHABLE();
+  }
+  return 0;
+}
+
+
+void Instruction::SetImmPCOffsetTarget(Instruction* target) {
+  if (IsPCRelAddressing()) {
+    SetPCRelImmTarget(target);
+  } else {
+    SetBranchImmTarget(target);
+  }
+}
+
+
+void Instruction::SetPCRelImmTarget(Instruction* target) {
+  // ADRP is not supported, so 'this' must point to an ADR instruction.
+  ASSERT(Mask(PCRelAddressingMask) == ADR);
+
+  Instr imm = Assembler::ImmPCRelAddress(target - this);
+
+  SetInstructionBits(Mask(~ImmPCRel_mask) | imm);
+}
+
+
+void Instruction::SetBranchImmTarget(Instruction* target) {
+  ASSERT(((target - this) & 3) == 0);
+  Instr branch_imm = 0;
+  uint32_t imm_mask = 0;
+  int offset = (target - this) >> kInstructionSizeLog2;
+  switch (BranchType()) {
+    case CondBranchType: {
+      branch_imm = Assembler::ImmCondBranch(offset);
+      imm_mask = ImmCondBranch_mask;
+      break;
+    }
+    case UncondBranchType: {
+      branch_imm = Assembler::ImmUncondBranch(offset);
+      imm_mask = ImmUncondBranch_mask;
+      break;
+    }
+    case CompareBranchType: {
+      branch_imm = Assembler::ImmCmpBranch(offset);
+      imm_mask = ImmCmpBranch_mask;
+      break;
+    }
+    case TestBranchType: {
+      branch_imm = Assembler::ImmTestBranch(offset);
+      imm_mask = ImmTestBranch_mask;
+      break;
+    }
+    default: UNREACHABLE();
+  }
+  SetInstructionBits(Mask(~imm_mask) | branch_imm);
+}
+
+
+void Instruction::SetImmLLiteral(Instruction* source) {
+  ASSERT(((source - this) & 3) == 0);
+  int offset = (source - this) >> kLiteralEntrySizeLog2;
+  Instr imm = Assembler::ImmLLiteral(offset);
+  Instr mask = ImmLLiteral_mask;
+
+  SetInstructionBits(Mask(~mask) | imm);
+}
+}  // namespace vixl
+
--- a/disas/libvixl/a64/instructions-a64.h
+++ b/disas/libvixl/a64/instructions-a64.h
@@ -0,0 +1,344 @@
+// Copyright 2013, ARM Limited
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef VIXL_A64_INSTRUCTIONS_A64_H_
+#define VIXL_A64_INSTRUCTIONS_A64_H_
+
+#include "globals.h"
+#include "utils.h"
+#include "a64/constants-a64.h"
+
+namespace vixl {
+// ISA constants. --------------------------------------------------------------
+
+typedef uint32_t Instr;
+const unsigned kInstructionSize = 4;
+const unsigned kInstructionSizeLog2 = 2;
+const unsigned kLiteralEntrySize = 4;
+const unsigned kLiteralEntrySizeLog2 = 2;
+const unsigned kMaxLoadLiteralRange = 1 * MBytes;
+
+const unsigned kWRegSize = 32;
+const unsigned kWRegSizeLog2 = 5;
+const unsigned kWRegSizeInBytes = kWRegSize / 8;
+const unsigned kXRegSize = 64;
+const unsigned kXRegSizeLog2 = 6;
+const unsigned kXRegSizeInBytes = kXRegSize / 8;
+const unsigned kSRegSize = 32;
+const unsigned kSRegSizeLog2 = 5;
+const unsigned kSRegSizeInBytes = kSRegSize / 8;
+const unsigned kDRegSize = 64;
+const unsigned kDRegSizeLog2 = 6;
+const unsigned kDRegSizeInBytes = kDRegSize / 8;
+const int64_t kWRegMask = 0x00000000ffffffffLL;
+const int64_t kXRegMask = 0xffffffffffffffffLL;
+const int64_t kSRegMask = 0x00000000ffffffffLL;
+const int64_t kDRegMask = 0xffffffffffffffffLL;
+const int64_t kXSignMask = 0x1LL << 63;
+const int64_t kWSignMask = 0x1LL << 31;
+const int64_t kByteMask = 0xffL;
+const int64_t kHalfWordMask = 0xffffL;
+const int64_t kWordMask = 0xffffffffLL;
+const uint64_t kXMaxUInt = 0xffffffffffffffffULL;
+const uint64_t kWMaxUInt = 0xffffffffULL;
+const int64_t kXMaxInt = 0x7fffffffffffffffLL;
+const int64_t kXMinInt = 0x8000000000000000LL;
+const int32_t kWMaxInt = 0x7fffffff;
+const int32_t kWMinInt = 0x80000000;
+const unsigned kLinkRegCode = 30;
+const unsigned kZeroRegCode = 31;
+const unsigned kSPRegInternalCode = 63;
+const unsigned kRegCodeMask = 0x1f;
+
+// AArch64 floating-point specifics. These match IEEE-754.
+const unsigned kDoubleMantissaBits = 52;
+const unsigned kDoubleExponentBits = 11;
+const unsigned kFloatMantissaBits = 23;
+const unsigned kFloatExponentBits = 8;
+
+const float kFP32PositiveInfinity = rawbits_to_float(0x7f800000);
+const float kFP32NegativeInfinity = rawbits_to_float(0xff800000);
+const double kFP64PositiveInfinity = rawbits_to_double(0x7ff0000000000000ULL);
+const double kFP64NegativeInfinity = rawbits_to_double(0xfff0000000000000ULL);
+
+// This value is a signalling NaN as both a double and as a float (taking the
+// least-significant word).
+static const double kFP64SignallingNaN = rawbits_to_double(0x7ff000007f800001ULL);
+static const float kFP32SignallingNaN = rawbits_to_float(0x7f800001);
+
+// A similar value, but as a quiet NaN.
+static const double kFP64QuietNaN = rawbits_to_double(0x7ff800007fc00001ULL);
+static const float kFP32QuietNaN = rawbits_to_float(0x7fc00001);
+
+enum LSDataSize {
+  LSByte        = 0,
+  LSHalfword    = 1,
+  LSWord        = 2,
+  LSDoubleWord  = 3
+};
+
+LSDataSize CalcLSPairDataSize(LoadStorePairOp op);
+
+enum ImmBranchType {
+  UnknownBranchType = 0,
+  CondBranchType    = 1,
+  UncondBranchType  = 2,
+  CompareBranchType = 3,
+  TestBranchType    = 4
+};
+
+enum AddrMode {
+  Offset,
+  PreIndex,
+  PostIndex
+};
+
+enum FPRounding {
+  // The first four values are encodable directly by FPCR<RMode>.
+  FPTieEven = 0x0,
+  FPPositiveInfinity = 0x1,
+  FPNegativeInfinity = 0x2,
+  FPZero = 0x3,
+
+  // The final rounding mode is only available when explicitly specified by the
+  // instruction (such as with fcvta). It cannot be set in FPCR.
+  FPTieAway
+};
+
+enum Reg31Mode {
+  Reg31IsStackPointer,
+  Reg31IsZeroRegister
+};
+
+// Instructions. ---------------------------------------------------------------
+
+class Instruction {
+ public:
+  inline Instr InstructionBits() const {
+    return *(reinterpret_cast<const Instr*>(this));
+  }
+
+  inline void SetInstructionBits(Instr new_instr) {
+    *(reinterpret_cast<Instr*>(this)) = new_instr;
+  }
+
+  inline int Bit(int pos) const {
+    return (InstructionBits() >> pos) & 1;
+  }
+
+  inline uint32_t Bits(int msb, int lsb) const {
+    return unsigned_bitextract_32(msb, lsb, InstructionBits());
+  }
+
+  inline int32_t SignedBits(int msb, int lsb) const {
+    int32_t bits = *(reinterpret_cast<const int32_t*>(this));
+    return signed_bitextract_32(msb, lsb, bits);
+  }
+
+  inline Instr Mask(uint32_t mask) const {
+    return InstructionBits() & mask;
+  }
+
+  #define DEFINE_GETTER(Name, HighBit, LowBit, Func)             \
+  inline int64_t Name() const { return Func(HighBit, LowBit); }
+  INSTRUCTION_FIELDS_LIST(DEFINE_GETTER)
+  #undef DEFINE_GETTER
+
+  // ImmPCRel is a compound field (not present in INSTRUCTION_FIELDS_LIST),
+  // formed from ImmPCRelLo and ImmPCRelHi.
+  int ImmPCRel() const {
+    int const offset = ((ImmPCRelHi() << ImmPCRelLo_width) | ImmPCRelLo());
+    int const width = ImmPCRelLo_width + ImmPCRelHi_width;
+    return signed_bitextract_32(width-1, 0, offset);
+  }
+
+  uint64_t ImmLogical();
+  float ImmFP32();
+  double ImmFP64();
+
+  inline LSDataSize SizeLSPair() const {
+    return CalcLSPairDataSize(
+             static_cast<LoadStorePairOp>(Mask(LoadStorePairMask)));
+  }
+
+  // Helpers.
+  inline bool IsCondBranchImm() const {
+    return Mask(ConditionalBranchFMask) == ConditionalBranchFixed;
+  }
+
+  inline bool IsUncondBranchImm() const {
+    return Mask(UnconditionalBranchFMask) == UnconditionalBranchFixed;
+  }
+
+  inline bool IsCompareBranch() const {
+    return Mask(CompareBranchFMask) == CompareBranchFixed;
+  }
+
+  inline bool IsTestBranch() const {
+    return Mask(TestBranchFMask) == TestBranchFixed;
+  }
+
+  inline bool IsPCRelAddressing() const {
+    return Mask(PCRelAddressingFMask) == PCRelAddressingFixed;
+  }
+
+  inline bool IsLogicalImmediate() const {
+    return Mask(LogicalImmediateFMask) == LogicalImmediateFixed;
+  }
+
+  inline bool IsAddSubImmediate() const {
+    return Mask(AddSubImmediateFMask) == AddSubImmediateFixed;
+  }
+
+  inline bool IsAddSubExtended() const {
+    return Mask(AddSubExtendedFMask) == AddSubExtendedFixed;
+  }
+
+  inline bool IsLoadOrStore() const {
+    return Mask(LoadStoreAnyFMask) == LoadStoreAnyFixed;
+  }
+
+  inline bool IsMovn() const {
+    return (Mask(MoveWideImmediateMask) == MOVN_x) ||
+           (Mask(MoveWideImmediateMask) == MOVN_w);
+  }
+
+  // Indicate whether Rd can be the stack pointer or the zero register. This
+  // does not check that the instruction actually has an Rd field.
+  inline Reg31Mode RdMode() const {
+    // The following instructions use sp or wsp as Rd:
+    //  Add/sub (immediate) when not setting the flags.
+    //  Add/sub (extended) when not setting the flags.
+    //  Logical (immediate) when not setting the flags.
+    // Otherwise, r31 is the zero register.
+    if (IsAddSubImmediate() || IsAddSubExtended()) {
+      if (Mask(AddSubSetFlagsBit)) {
+        return Reg31IsZeroRegister;
+      } else {
+        return Reg31IsStackPointer;
+      }
+    }
+    if (IsLogicalImmediate()) {
+      // Of the logical (immediate) instructions, only ANDS (and its aliases)
+      // can set the flags. The others can all write into sp.
+      // Note that some logical operations are not available to
+      // immediate-operand instructions, so we have to combine two masks here.
+      if (Mask(LogicalImmediateMask & LogicalOpMask) == ANDS) {
+        return Reg31IsZeroRegister;
+      } else {
+        return Reg31IsStackPointer;
+      }
+    }
+    return Reg31IsZeroRegister;
+  }
+
+  // Indicate whether Rn can be the stack pointer or the zero register. This
+  // does not check that the instruction actually has an Rn field.
+  inline Reg31Mode RnMode() const {
+    // The following instructions use sp or wsp as Rn:
+    //  All loads and stores.
+    //  Add/sub (immediate).
+    //  Add/sub (extended).
+    // Otherwise, r31 is the zero register.
+    if (IsLoadOrStore() || IsAddSubImmediate() || IsAddSubExtended()) {
+      return Reg31IsStackPointer;
+    }
+    return Reg31IsZeroRegister;
+  }
+
+  inline ImmBranchType BranchType() const {
+    if (IsCondBranchImm()) {
+      return CondBranchType;
+    } else if (IsUncondBranchImm()) {
+      return UncondBranchType;
+    } else if (IsCompareBranch()) {
+      return CompareBranchType;
+    } else if (IsTestBranch()) {
+      return TestBranchType;
+    } else {
+      return UnknownBranchType;
+    }
+  }
+
+  // Find the target of this instruction. 'this' may be a branch or a
+  // PC-relative addressing instruction.
+  Instruction* ImmPCOffsetTarget();
+
+  // Patch a PC-relative offset to refer to 'target'. 'this' may be a branch or
+  // a PC-relative addressing instruction.
+  void SetImmPCOffsetTarget(Instruction* target);
+  // Patch a literal load instruction to load from 'source'.
+  void SetImmLLiteral(Instruction* source);
+
+  inline uint8_t* LiteralAddress() {
+    int offset = ImmLLiteral() << kLiteralEntrySizeLog2;
+    return reinterpret_cast<uint8_t*>(this) + offset;
+  }
+
+  inline uint32_t Literal32() {
+    uint32_t literal;
+    memcpy(&literal, LiteralAddress(), sizeof(literal));
+
+    return literal;
+  }
+
+  inline uint64_t Literal64() {
+    uint64_t literal;
+    memcpy(&literal, LiteralAddress(), sizeof(literal));
+
+    return literal;
+  }
+
+  inline float LiteralFP32() {
+    return rawbits_to_float(Literal32());
+  }
+
+  inline double LiteralFP64() {
+    return rawbits_to_double(Literal64());
+  }
+
+  inline Instruction* NextInstruction() {
+    return this + kInstructionSize;
+  }
+
+  inline Instruction* InstructionAtOffset(int64_t offset) {
+    ASSERT(IsWordAligned(this + offset));
+    return this + offset;
+  }
+
+  template<typename T> static inline Instruction* Cast(T src) {
+    return reinterpret_cast<Instruction*>(src);
+  }
+
+ private:
+  inline int ImmBranch() const;
+
+  void SetPCRelImmTarget(Instruction* target);
+  void SetBranchImmTarget(Instruction* target);
+};
+}  // namespace vixl
+
+#endif  // VIXL_A64_INSTRUCTIONS_A64_H_
--- a/disas/libvixl/globals.h
+++ b/disas/libvixl/globals.h
@@ -0,0 +1,65 @@
+// Copyright 2013, ARM Limited
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef VIXL_GLOBALS_H
+#define VIXL_GLOBALS_H
+
+// Get the standard printf format macros for C99 stdint types.
+#define __STDC_FORMAT_MACROS
+#include <inttypes.h>
+
+#include <assert.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include "platform.h"
+
+
+typedef uint8_t byte;
+
+const int KBytes = 1024;
+const int MBytes = 1024 * KBytes;
+
+  #define ABORT() printf("in %s, line %i", __FILE__, __LINE__); abort()
+#ifdef DEBUG
+  #define ASSERT(condition) assert(condition)
+  #define CHECK(condition) ASSERT(condition)
+  #define UNIMPLEMENTED() printf("UNIMPLEMENTED\t"); ABORT()
+  #define UNREACHABLE() printf("UNREACHABLE\t"); ABORT()
+#else
+  #define ASSERT(condition) ((void) 0)
+  #define CHECK(condition) assert(condition)
+  #define UNIMPLEMENTED() ((void) 0)
+  #define UNREACHABLE() ((void) 0)
+#endif
+
+template <typename T> inline void USE(T) {}
+
+#define ALIGNMENT_EXCEPTION() printf("ALIGNMENT EXCEPTION\t"); ABORT()
+
+#endif  // VIXL_GLOBALS_H
--- a/disas/libvixl/platform.h
+++ b/disas/libvixl/platform.h
@@ -0,0 +1,43 @@
+// Copyright 2013, ARM Limited
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef PLATFORM_H
+#define PLATFORM_H
+
+// Define platform specific functionalities.
+
+namespace vixl {
+#ifdef USE_SIMULATOR
+// Currently we assume running the simulator implies running on x86 hardware.
+inline void HostBreakpoint() { asm("int3"); }
+#else
+inline void HostBreakpoint() {
+  // TODO: Implement HostBreakpoint on a64.
+}
+#endif
+}  // namespace vixl
+
+#endif
--- a/disas/libvixl/utils.cc
+++ b/disas/libvixl/utils.cc
@@ -0,0 +1,126 @@
+// Copyright 2013, ARM Limited
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "utils.h"
+#include <stdio.h>
+
+namespace vixl {
+
+uint32_t float_to_rawbits(float value) {
+  uint32_t bits = 0;
+  memcpy(&bits, &value, 4);
+  return bits;
+}
+
+
+uint64_t double_to_rawbits(double value) {
+  uint64_t bits = 0;
+  memcpy(&bits, &value, 8);
+  return bits;
+}
+
+
+float rawbits_to_float(uint32_t bits) {
+  float value = 0.0;
+  memcpy(&value, &bits, 4);
+  return value;
+}
+
+
+double rawbits_to_double(uint64_t bits) {
+  double value = 0.0;
+  memcpy(&value, &bits, 8);
+  return value;
+}
+
+
+int CountLeadingZeros(uint64_t value, int width) {
+  ASSERT((width == 32) || (width == 64));
+  int count = 0;
+  uint64_t bit_test = 1UL << (width - 1);
+  while ((count < width) && ((bit_test & value) == 0)) {
+    count++;
+    bit_test >>= 1;
+  }
+  return count;
+}
+
+
+int CountLeadingSignBits(int64_t value, int width) {
+  ASSERT((width == 32) || (width == 64));
+  if (value >= 0) {
+    return CountLeadingZeros(value, width) - 1;
+  } else {
+    return CountLeadingZeros(~value, width) - 1;
+  }
+}
+
+
+int CountTrailingZeros(uint64_t value, int width) {
+  ASSERT((width == 32) || (width == 64));
+  int count = 0;
+  while ((count < width) && (((value >> count) & 1) == 0)) {
+    count++;
+  }
+  return count;
+}
+
+
+int CountSetBits(uint64_t value, int width) {
+  // TODO: Other widths could be added here, as the implementation already
+  // supports them.
+  ASSERT((width == 32) || (width == 64));
+
+  // Mask out unused bits to ensure that they are not counted.
+  value &= (0xffffffffffffffffULL >> (64-width));
+
+  // Add up the set bits.
+  // The algorithm works by adding pairs of bit fields together iteratively,
+  // where the size of each bit field doubles each time.
+  // An example for an 8-bit value:
+  // Bits:  h  g  f  e  d  c  b  a
+  //         \ |   \ |   \ |   \ |
+  // value = h+g   f+e   d+c   b+a
+  //            \    |      \    |
+  // value =   h+g+f+e     d+c+b+a
+  //                  \          |
+  // value =       h+g+f+e+d+c+b+a
+  value = ((value >> 1) & 0x5555555555555555ULL) +
+           (value & 0x5555555555555555ULL);
+  value = ((value >> 2) & 0x3333333333333333ULL) +
+           (value & 0x3333333333333333ULL);
+  value = ((value >> 4) & 0x0f0f0f0f0f0f0f0fULL) +
+           (value & 0x0f0f0f0f0f0f0f0fULL);
+  value = ((value >> 8) & 0x00ff00ff00ff00ffULL) +
+           (value & 0x00ff00ff00ff00ffULL);
+  value = ((value >> 16) & 0x0000ffff0000ffffULL) +
+           (value & 0x0000ffff0000ffffULL);
+  value = ((value >> 32) & 0x00000000ffffffffULL) +
+           (value & 0x00000000ffffffffULL);
+
+  return value;
+}
+}  // namespace vixl
--- a/disas/libvixl/utils.h
+++ b/disas/libvixl/utils.h
@@ -0,0 +1,126 @@
+// Copyright 2013, ARM Limited
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef VIXL_UTILS_H
+#define VIXL_UTILS_H
+
+
+#include <string.h>
+#include "globals.h"
+
+namespace vixl {
+
+// Check number width.
+inline bool is_intn(unsigned n, int64_t x) {
+  ASSERT((0 < n) && (n < 64));
+  int64_t limit = 1ULL << (n - 1);
+  return (-limit <= x) && (x < limit);
+}
+
+inline bool is_uintn(unsigned n, int64_t x) {
+  ASSERT((0 < n) && (n < 64));
+  return !(x >> n);
+}
+
+inline unsigned truncate_to_intn(unsigned n, int64_t x) {
+  ASSERT((0 < n) && (n < 64));
+  return (x & ((1ULL << n) - 1));
+}
+
+#define INT_1_TO_63_LIST(V)                                                    \
+V(1)  V(2)  V(3)  V(4)  V(5)  V(6)  V(7)  V(8)                                 \
+V(9)  V(10) V(11) V(12) V(13) V(14) V(15) V(16)                                \
+V(17) V(18) V(19) V(20) V(21) V(22) V(23) V(24)                                \
+V(25) V(26) V(27) V(28) V(29) V(30) V(31) V(32)                                \
+V(33) V(34) V(35) V(36) V(37) V(38) V(39) V(40)                                \
+V(41) V(42) V(43) V(44) V(45) V(46) V(47) V(48)                                \
+V(49) V(50) V(51) V(52) V(53) V(54) V(55) V(56)                                \
+V(57) V(58) V(59) V(60) V(61) V(62) V(63)
+
+#define DECLARE_IS_INT_N(N)                                                    \
+inline bool is_int##N(int64_t x) { return is_intn(N, x); }
+#define DECLARE_IS_UINT_N(N)                                                   \
+inline bool is_uint##N(int64_t x) { return is_uintn(N, x); }
+#define DECLARE_TRUNCATE_TO_INT_N(N)                                           \
+inline int truncate_to_int##N(int x) { return truncate_to_intn(N, x); }
+INT_1_TO_63_LIST(DECLARE_IS_INT_N)
+INT_1_TO_63_LIST(DECLARE_IS_UINT_N)
+INT_1_TO_63_LIST(DECLARE_TRUNCATE_TO_INT_N)
+#undef DECLARE_IS_INT_N
+#undef DECLARE_IS_UINT_N
+#undef DECLARE_TRUNCATE_TO_INT_N
+
+// Bit field extraction.
+inline uint32_t unsigned_bitextract_32(int msb, int lsb, uint32_t x) {
+  return (x >> lsb) & ((1 << (1 + msb - lsb)) - 1);
+}
+
+inline uint64_t unsigned_bitextract_64(int msb, int lsb, uint64_t x) {
+  return (x >> lsb) & ((static_cast<uint64_t>(1) << (1 + msb - lsb)) - 1);
+}
+
+inline int32_t signed_bitextract_32(int msb, int lsb, int32_t x) {
+  return (x << (31 - msb)) >> (lsb + 31 - msb);
+}
+
+inline int64_t signed_bitextract_64(int msb, int lsb, int64_t x) {
+  return (x << (63 - msb)) >> (lsb + 63 - msb);
+}
+
+// floating point representation
+uint32_t float_to_rawbits(float value);
+uint64_t double_to_rawbits(double value);
+float rawbits_to_float(uint32_t bits);
+double rawbits_to_double(uint64_t bits);
+
+// Bits counting.
+int CountLeadingZeros(uint64_t value, int width);
+int CountLeadingSignBits(int64_t value, int width);
+int CountTrailingZeros(uint64_t value, int width);
+int CountSetBits(uint64_t value, int width);
+
+// Pointer alignment
+// TODO: rename/refactor to make it specific to instructions.
+template<typename T>
+bool IsWordAligned(T pointer) {
+  ASSERT(sizeof(pointer) == sizeof(intptr_t));   // NOLINT(runtime/sizeof)
+  return (reinterpret_cast<intptr_t>(pointer) & 3) == 0;
+}
+
+// Increment a pointer until it has the specified alignment.
+template<class T>
+T AlignUp(T pointer, size_t alignment) {
+  ASSERT(sizeof(pointer) == sizeof(uintptr_t));
+  uintptr_t pointer_raw = reinterpret_cast<uintptr_t>(pointer);
+  size_t align_step = (alignment - pointer_raw) % alignment;
+  ASSERT((pointer_raw + align_step) % alignment == 0);
+  return reinterpret_cast<T>(pointer_raw + align_step);
+}
+
+
+}  // namespace vixl
+
+#endif  // VIXL_UTILS_H
--- a/dma-helpers.c
+++ b/dma-helpers.c
@@ -213,6 +213,7 @@ BlockDriverAIOCB *dma_bdrv_io(
    dbs->sg_cur_index = 0;
    dbs->sg_cur_byte = 0;
    dbs->dir = dir;
+    dbs->in_cancel = false;
    dbs->io_func = io_func;
    dbs->bh = NULL;
    qemu_iovec_init(&dbs->iov, sg->nsg);
--- a/docs/qapi-code-gen.txt
+++ b/docs/qapi-code-gen.txt
@@ -123,11 +123,12 @@ And it looks like this on the wire:

 Flat union types avoid the nesting on the wire. They are used whenever a
 specific field of the base type is declared as the discriminator ('type' is
-then no longer generated). The discriminator must always be a string field.
+then no longer generated). The discriminator must be of enumeration type.
 The above example can then be modified as follows:

+ { 'enum': 'BlockdevDriver', 'data': [ 'raw', 'qcow2' ] }
 { 'type': 'BlockdevCommonOptions',
-   'data': { 'driver': 'str', 'readonly': 'bool' } }
+   'data': { 'driver': 'BlockdevDriver', 'readonly': 'bool' } }
 { 'union': 'BlockdevOptions',
   'base': 'BlockdevCommonOptions',
   'discriminator': 'driver',
--- a/docs/qemupciserial.inf
+++ b/docs/qemupciserial.inf
@@ -11,99 +11,92 @@
 ; (Com+Lpt)" from the list.  Click "Have a disk".  Select this file.
 ; Procedure may vary a bit depending on the windows version.

-; FIXME: This file covers the single port version only.
+; This file covers all options: pci-serial, pci-serial-2x, pci-serial-4x
+; for both 32 and 64 bit platforms.

 [Version]
-Signature="$CHICAGO$"
-Class=Ports
-ClassGuid={4D36E978-E325-11CE-BFC1-08002BE10318}
+Signature="$Windows NT$"
+Class=MultiFunction
+ClassGUID={4d36e971-e325-11ce-bfc1-08002be10318}
 Provider=%QEMU%
-DriverVer=09/24/2012,1.3.0
-
-[SourceDisksNames]
-3426=windows cd
-
-[SourceDisksFiles]
-serial.sys 		= 3426
-serenum.sys 		= 3426
-
-[DestinationDirs]
-DefaultDestDir  = 11        ;LDID_SYS
-ComPort.NT.Copy = 12        ;DIRID_DRIVERS
-SerialEnumerator.NT.Copy=12 ;DIRID_DRIVERS
-
-; Drivers
-;----------------------------------------------------------
+DriverVer=12/29/2013,1.3.0
+[ControlFlags]
+ExcludeFromSelect=*
 [Manufacturer]
-%QEMU%=QEMU,NTx86
+%QEMU%=QEMU,NTx86,NTAMD64

 [QEMU.NTx86]
-%QEMU-PCI_SERIAL.DeviceDesc% = ComPort, "PCI\VEN_1b36&DEV_0002&CC_0700"
+%QEMU-PCI_SERIAL_1_PORT%=ComPort_inst1, PCI\VEN_1B36&DEV_0002
+%QEMU-PCI_SERIAL_2_PORT%=ComPort_inst2, PCI\VEN_1B36&DEV_0003
+%QEMU-PCI_SERIAL_4_PORT%=ComPort_inst4, PCI\VEN_1B36&DEV_0004

-; COM sections
-;----------------------------------------------------------
-[ComPort.AddReg]
-HKR,,PortSubClass,1,01
+[QEMU.NTAMD64]
+%QEMU-PCI_SERIAL_1_PORT%=ComPort_inst1, PCI\VEN_1B36&DEV_0002
+%QEMU-PCI_SERIAL_2_PORT%=ComPort_inst2, PCI\VEN_1B36&DEV_0003
+%QEMU-PCI_SERIAL_4_PORT%=ComPort_inst4, PCI\VEN_1B36&DEV_0004

-[ComPort.NT]
-AddReg=ComPort.AddReg, ComPort.NT.AddReg
-LogConfig=caa
-SyssetupPnPFlags = 1
+[ComPort_inst1]
+Include=mf.inf
+Needs=MFINSTALL.mf

-[ComPort.NT.HW]
-AddReg=ComPort.NT.HW.AddReg
+[ComPort_inst2]
+Include=mf.inf
+Needs=MFINSTALL.mf

-[ComPort.NT.AddReg]
-HKR,,EnumPropPages32,,"MsPorts.dll,SerialPortPropPageProvider"
+[ComPort_inst4]
+Include=mf.inf
+Needs=MFINSTALL.mf

-[ComPort.NT.HW.AddReg]
-HKR,,"UpperFilters",0x00010000,"serenum"
+[ComPort_inst1.HW]
+AddReg=ComPort_inst1.RegHW

-;-------------- Service installation
-; Port Driver (function driver for this device)
-[ComPort.NT.Services]
-AddService = Serial, 0x00000002, Serial_Service_Inst, Serial_EventLog_Inst
-AddService = Serenum,,Serenum_Service_Inst
+[ComPort_inst2.HW]
+AddReg=ComPort_inst2.RegHW

-; -------------- Serial Port Driver install sections
-[Serial_Service_Inst]
-DisplayName    = %Serial.SVCDESC%
-ServiceType    = 1               ; SERVICE_KERNEL_DRIVER
-StartType      = 1               ; SERVICE_SYSTEM_START (this driver may do detection)
-ErrorControl   = 0               ; SERVICE_ERROR_IGNORE
-ServiceBinary  = %12%\serial.sys
-LoadOrderGroup = Extended base
+[ComPort_inst4.HW]
+AddReg=ComPort_inst4.RegHW

-; -------------- Serenum Driver install section
-[Serenum_Service_Inst]
-DisplayName    = %Serenum.SVCDESC%
-ServiceType    = 1               ; SERVICE_KERNEL_DRIVER
-StartType      = 3               ; SERVICE_DEMAND_START
-ErrorControl   = 1               ; SERVICE_ERROR_NORMAL
-ServiceBinary  = %12%\serenum.sys
-LoadOrderGroup = PNP Filter
+[ComPort_inst1.Services]
+Include=mf.inf
+Needs=MFINSTALL.mf.Services

-[Serial_EventLog_Inst]
-AddReg = Serial_EventLog_AddReg
+[ComPort_inst2.Services]
+Include=mf.inf
+Needs=MFINSTALL.mf.Services

-[Serial_EventLog_AddReg]
-HKR,,EventMessageFile,0x00020000,"%%SystemRoot%%\System32\IoLogMsg.dll;%%SystemRoot%%\System32\drivers\serial.sys"
-HKR,,TypesSupported,0x00010001,7
+[ComPort_inst4.Services]
+Include=mf.inf
+Needs=MFINSTALL.mf.Services

-; The following sections are COM port resource configs.
-; Section name format means:
-; Char 1 = c (COM port)
-; Char 2 = I/O config: 1 (3f8), 2 (2f8), 3 (3e8), 4 (2e8), a (any)
-; Char 3 = IRQ config: #, a (any)
+[ComPort_inst1.RegHW]
+HKR,Child0000,HardwareID,,*PNP0501
+HKR,Child0000,VaryingResourceMap,1,00, 00,00,00,00, 08,00,00,00
+HKR,Child0000,ResourceMap,1,02

-[caa]                   ; Any base, any IRQ
-ConfigPriority=HARDRECONFIG
-IOConfig=8@100-ffff%fff8(3ff::)
-IRQConfig=S:3,4,5,7,9,10,11,12,14,15
+[ComPort_inst2.RegHW]
+HKR,Child0000,HardwareID,,*PNP0501
+HKR,Child0000,VaryingResourceMap,1,00, 00,00,00,00, 08,00,00,00
+HKR,Child0000,ResourceMap,1,02
+HKR,Child0001,HardwareID,,*PNP0501
+HKR,Child0001,VaryingResourceMap,1,00, 08,00,00,00, 08,00,00,00
+HKR,Child0001,ResourceMap,1,02
+
+[ComPort_inst4.RegHW]
+HKR,Child0000,HardwareID,,*PNP0501
+HKR,Child0000,VaryingResourceMap,1,00, 00,00,00,00, 08,00,00,00
+HKR,Child0000,ResourceMap,1,02
+HKR,Child0001,HardwareID,,*PNP0501
+HKR,Child0001,VaryingResourceMap,1,00, 08,00,00,00, 08,00,00,00
+HKR,Child0001,ResourceMap,1,02
+HKR,Child0002,HardwareID,,*PNP0501
+HKR,Child0002,VaryingResourceMap,1,00, 10,00,00,00, 08,00,00,00
+HKR,Child0002,ResourceMap,1,02
+HKR,Child0003,HardwareID,,*PNP0501
+HKR,Child0003,VaryingResourceMap,1,00, 18,00,00,00, 08,00,00,00
+HKR,Child0003,ResourceMap,1,02

 [Strings]
 QEMU="QEMU"
-QEMU-PCI_SERIAL.DeviceDesc="QEMU Serial PCI Card"
-
-Serial.SVCDESC   = "Serial port driver"
-Serenum.SVCDESC = "Serenum Filter Driver"
+QEMU-PCI_SERIAL_1_PORT="1x QEMU PCI Serial Card"
+QEMU-PCI_SERIAL_2_PORT="2x QEMU PCI Serial Card"
+QEMU-PCI_SERIAL_4_PORT="4x QEMU PCI Serial Card"
--- a/docs/qmp/qmp-events.txt
+++ b/docs/qmp/qmp-events.txt
@@ -225,6 +225,45 @@ Data:
  "timestamp": { "seconds": 1368697518, "microseconds": 326866 } }
 }

+QUORUM_FAILURE
+--------------
+
+Emitted by the Quorum block driver if it fails to establish a quorum.
+
+Data:
+
+- "reference":    device name if defined else node name.
+- "sector-num":   Number of the first sector of the failed read operation.
+- "sector-count": Failed read operation sector count.
+
+Example:
+
+{ "event": "QUORUM_FAILURE",
+     "data": { "reference": "usr1", "sector-num": 345435, "sector-count": 5 },
+     "timestamp": { "seconds": 1344522075, "microseconds": 745528 } }
+
+QUORUM_REPORT_BAD
+-----------------
+
+Emitted to report a corruption of a Quorum file.
+
+Data:
+
+- "error":        Error message (json-string, optional)
+                  Only present on failure.  This field contains a human-readable
+                  error message.  There are no semantics other than that the
+                  block layer reported an error and clients should not try to
+                  interpret the error string.
+- "node-name":    The graph node name of the block driver state.
+- "sector-num":   Number of the first sector of the failed read operation.
+- "sector-count": Failed read operation sector count.
+
+Example:
+
+{ "event": "QUORUM_REPORT_BAD",
+     "data": { "node-name": "1.raw", "sector-num": 345435, "sector-count": 5 },
+     "timestamp": { "seconds": 1344522075, "microseconds": 745528 } }
+
 RESET
 -----

@@ -479,7 +518,7 @@ Data: None.

 Example:

-{ "event": "WATCHDOG",
+{ "event": "WAKEUP",
     "timestamp": { "seconds": 1344522075, "microseconds": 745528 } }

 WATCHDOG
--- a/docs/rdma.txt
+++ b/docs/rdma.txt
@@ -66,7 +66,7 @@ bulk-phase round of the migration and can be enabled for extremely
 high-performance RDMA hardware using the following command:

 QEMU Monitor Command:
-$ migrate_set_capability x-rdma-pin-all on # disabled by default
+$ migrate_set_capability rdma-pin-all on # disabled by default

 Performing this action will cause all 8GB to be pinned, so if that's
 not what you want, then please ignore this step altogether.
@@ -93,12 +93,12 @@ $ migrate_set_speed 40g # or whatever is the MAX of your RDMA device

 Next, on the destination machine, add the following to the QEMU command line:

-qemu ..... -incoming x-rdma:host:port
+qemu ..... -incoming rdma:host:port

 Finally, perform the actual migration on the source machine:

 QEMU Monitor Command:
-$ migrate -d x-rdma:host:port
+$ migrate -d rdma:host:port

 PERFORMANCE
 ===========
@@ -120,8 +120,8 @@ For example, in the same 8GB RAM example with all 8GB of memory in
 active use and the VM itself is completely idle using the same 40 gbps
 infiniband link:

-1. x-rdma-pin-all disabled total time: approximately 7.5 seconds @ 9.5 Gbps
-2. x-rdma-pin-all enabled total time: approximately 4 seconds @ 26 Gbps
+1. rdma-pin-all disabled total time: approximately 7.5 seconds @ 9.5 Gbps
+2. rdma-pin-all enabled total time: approximately 4 seconds @ 26 Gbps

 These numbers would of course scale up to whatever size virtual machine
 you have to migrate using RDMA.
@@ -407,18 +407,14 @@ socket is broken during a non-RDMA based migration.

 TODO:
 =====
-1. 'migrate x-rdma:host:port' and '-incoming x-rdma' options will be
-   renamed to 'rdma' after the experimental phase of this work has
-   completed upstream.
-2. Currently, 'ulimit -l' mlock() limits as well as cgroups swap limits
+1. Currently, 'ulimit -l' mlock() limits as well as cgroups swap limits
   are not compatible with infinband memory pinning and will result in
   an aborted migration (but with the source VM left unaffected).
-3. Use of the recent /proc/<pid>/pagemap would likely speed up
+2. Use of the recent /proc/<pid>/pagemap would likely speed up
   the use of KSM and ballooning while using RDMA.
-4. Also, some form of balloon-device usage tracking would also
+3. Also, some form of balloon-device usage tracking would also
   help alleviate some issues.
-5. Move UNREGISTER requests to a separate thread.
-6. Use LRU to provide more fine-grained direction of UNREGISTER
+4. Use LRU to provide more fine-grained direction of UNREGISTER
   requests for unpinning memory in an overcommitted environment.
-7. Expose UNREGISTER support to the user by way of workload-specific
+5. Expose UNREGISTER support to the user by way of workload-specific
   hints about application behavior.
--- a/docs/specs/acpi_cpu_hotplug.txt
+++ b/docs/specs/acpi_cpu_hotplug.txt
@@ -10,7 +10,9 @@ ACPI GPE block (IO ports 0xafe0-0xafe3, byte access):
 Generic ACPI GPE block. Bit 2 (GPE.2) used to notify CPU
 hot-add/remove event to ACPI BIOS, via SCI interrupt.

-CPU present bitmap (IO port 0xaf00-0xaf1f, 1-byte access):
+CPU present bitmap for:
+  ICH9-LPC (IO port 0x0cd8-0xcf7, 1-byte access)
+  PIIX-PM  (IO port 0xaf00-0xaf1f, 1-byte access)
 ---------------------------------------------------------------
 One bit per CPU. Bit position reflects corresponding CPU APIC ID.
 Read-only.
--- a/docs/tracing.txt
+++ b/docs/tracing.txt
@@ -214,6 +214,42 @@ The "ust" backend uses the LTTng Userspace Tracer library.  There are no
 monitor commands built into QEMU, instead UST utilities should be used to list,
 enable/disable, and dump traces.

+Package lttng-tools is required for userspace tracing. You must ensure that the
+current user belongs to the "tracing" group, or manually launch the
+lttng-sessiond daemon for the current user prior to running any instance of
+QEMU.
+
+While running an instrumented QEMU, LTTng should be able to list all available
+events:
+
+    lttng list -u
+
+Create tracing session:
+
+    lttng create mysession
+
+Enable events:
+
+    lttng enable-event qemu:g_malloc -u
+
+Where the events can either be a comma-separated list of events, or "-a" to
+enable all tracepoint events. Start and stop tracing as needed:
+
+    lttng start
+    lttng stop
+
+View the trace:
+
+    lttng view
+
+Destroy tracing session:
+
+    lttng destroy
+
+Babeltrace can be used at any later time to view the trace:
+
+    babeltrace $HOME/lttng-traces/mysession-<date>-<time>
+
 === SystemTap ===

 The "dtrace" backend uses DTrace sdt probes but has only been tested with
--- a/dump.c
+++ b/dump.c
--- a/exec.c
+++ b/exec.c
@@ -17,9 +17,7 @@
 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
 */
 #include "config.h"
-#ifdef _WIN32
-#include <windows.h>
-#else
+#ifndef _WIN32
 #include <sys/types.h>
 #include <sys/mman.h>
 #endif
@@ -35,6 +33,7 @@
 #include "hw/xen/xen.h"
 #include "qemu/timer.h"
 #include "qemu/config-file.h"
+#include "qemu/error-report.h"
 #include "exec/memory.h"
 #include "sysemu/dma.h"
 #include "exec/address-spaces.h"
@@ -138,6 +137,7 @@ typedef struct subpage_t {

 static void io_mem_init(void);
 static void memory_map_init(void);
+static void tcg_commit(MemoryListener *listener);

 static MemoryRegion io_mem_watch;
 #endif
@@ -339,6 +339,18 @@ address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *x
    return section;
 }

+static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
+{
+    if (memory_region_is_ram(mr)) {
+        return !(is_write && mr->readonly);
+    }
+    if (memory_region_is_romd(mr)) {
+        return !is_write;
+    }
+
+    return false;
+}
+
 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
                                      hwaddr *xlat, hwaddr *plen,
                                      bool is_write)
@@ -368,6 +380,11 @@ MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
        as = iotlb.target_as;
    }

+    if (memory_access_is_direct(mr, is_write)) {
+        hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
+        len = MIN(page, len);
+    }
+
    *plen = len;
    *xlat = addr;
    return mr;
@@ -403,7 +420,7 @@ static int cpu_common_post_load(void *opaque, int version_id)
    /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
       version_id is increased. */
    cpu->interrupt_request &= ~0x01;
-    tlb_flush(cpu->env_ptr, 1);
+    tlb_flush(cpu, 1);

    return 0;
 }
@@ -436,6 +453,22 @@ CPUState *qemu_get_cpu(int index)
    return NULL;
 }

+#if !defined(CONFIG_USER_ONLY)
+void tcg_cpu_address_space_init(CPUState *cpu, AddressSpace *as)
+{
+    /* We only support one address space per cpu at the moment.  */
+    assert(cpu->as == as);
+
+    if (cpu->tcg_as_listener) {
+        memory_listener_unregister(cpu->tcg_as_listener);
+    } else {
+        cpu->tcg_as_listener = g_new0(MemoryListener, 1);
+    }
+    cpu->tcg_as_listener->commit = tcg_commit;
+    memory_listener_register(cpu->tcg_as_listener, as);
+}
+#endif
+
 void cpu_exec_init(CPUArchState *env)
 {
    CPUState *cpu = ENV_GET_CPU(env);
@@ -452,9 +485,10 @@ void cpu_exec_init(CPUArchState *env)
    }
    cpu->cpu_index = cpu_index;
    cpu->numa_node = 0;
-    QTAILQ_INIT(&env->breakpoints);
-    QTAILQ_INIT(&env->watchpoints);
+    QTAILQ_INIT(&cpu->breakpoints);
+    QTAILQ_INIT(&cpu->watchpoints);
 #ifndef CONFIG_USER_ONLY
+    cpu->as = &address_space_memory;
    cpu->thread_id = qemu_get_thread_id();
 #endif
    QTAILQ_INSERT_TAIL(&cpus, cpu, node);
@@ -486,36 +520,37 @@ static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
 {
    hwaddr phys = cpu_get_phys_page_debug(cpu, pc);
    if (phys != -1) {
-        tb_invalidate_phys_addr(phys | (pc & ~TARGET_PAGE_MASK));
+        tb_invalidate_phys_addr(cpu->as,
+                                phys | (pc & ~TARGET_PAGE_MASK));
    }
 }
 #endif
 #endif /* TARGET_HAS_ICE */

 #if defined(CONFIG_USER_ONLY)
-void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
+void cpu_watchpoint_remove_all(CPUState *cpu, int mask)

 {
 }

-int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
+int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
                          int flags, CPUWatchpoint **watchpoint)
 {
    return -ENOSYS;
 }
 #else
 /* Add a watchpoint.  */
-int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
+int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
                          int flags, CPUWatchpoint **watchpoint)
 {
-    target_ulong len_mask = ~(len - 1);
+    vaddr len_mask = ~(len - 1);
    CPUWatchpoint *wp;

    /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
    if ((len & (len - 1)) || (addr & ~len_mask) ||
            len == 0 || len > TARGET_PAGE_SIZE) {
-        fprintf(stderr, "qemu: tried to set invalid watchpoint at "
-                TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
+        error_report("tried to set invalid watchpoint at %"
+                     VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
        return -EINVAL;
    }
    wp = g_malloc(sizeof(*wp));
@@ -525,12 +560,13 @@ int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len
    wp->flags = flags;

    /* keep all GDB-injected watchpoints in front */
-    if (flags & BP_GDB)
-        QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
-    else
-        QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
+    if (flags & BP_GDB) {
+        QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
+    } else {
+        QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
+    }

-    tlb_flush_page(env, addr);
+    tlb_flush_page(cpu, addr);

    if (watchpoint)
        *watchpoint = wp;
@@ -538,16 +574,16 @@ int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len
 }

 /* Remove a specific watchpoint.  */
-int cpu_watchpoint_remove(CPUArchState *env, target_ulong addr, target_ulong len,
+int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
                          int flags)
 {
-    target_ulong len_mask = ~(len - 1);
+    vaddr len_mask = ~(len - 1);
    CPUWatchpoint *wp;

-    QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
+    QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
        if (addr == wp->vaddr && len_mask == wp->len_mask
                && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
-            cpu_watchpoint_remove_by_ref(env, wp);
+            cpu_watchpoint_remove_by_ref(cpu, wp);
            return 0;
        }
    }
@@ -555,29 +591,30 @@ int cpu_watchpoint_remove(CPUArchState *env, target_ulong addr, target_ulong len
 }

 /* Remove a specific watchpoint by reference.  */
-void cpu_watchpoint_remove_by_ref(CPUArchState *env, CPUWatchpoint *watchpoint)
+void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
 {
-    QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
+    QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);

-    tlb_flush_page(env, watchpoint->vaddr);
+    tlb_flush_page(cpu, watchpoint->vaddr);

    g_free(watchpoint);
 }

 /* Remove all matching watchpoints.  */
-void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
+void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
 {
    CPUWatchpoint *wp, *next;

-    QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
-        if (wp->flags & mask)
-            cpu_watchpoint_remove_by_ref(env, wp);
+    QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
+        if (wp->flags & mask) {
+            cpu_watchpoint_remove_by_ref(cpu, wp);
+        }
    }
 }
 #endif

 /* Add a breakpoint.  */
-int cpu_breakpoint_insert(CPUArchState *env, target_ulong pc, int flags,
+int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
                          CPUBreakpoint **breakpoint)
 {
 #if defined(TARGET_HAS_ICE)
@@ -590,12 +627,12 @@ int cpu_breakpoint_insert(CPUArchState *env, target_ulong pc, int flags,

    /* keep all GDB-injected breakpoints in front */
    if (flags & BP_GDB) {
-        QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
+        QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
    } else {
-        QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
+        QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
    }

-    breakpoint_invalidate(ENV_GET_CPU(env), pc);
+    breakpoint_invalidate(cpu, pc);

    if (breakpoint) {
        *breakpoint = bp;
@@ -607,14 +644,14 @@ int cpu_breakpoint_insert(CPUArchState *env, target_ulong pc, int flags,
 }

 /* Remove a specific breakpoint.  */
-int cpu_breakpoint_remove(CPUArchState *env, target_ulong pc, int flags)
+int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
 {
 #if defined(TARGET_HAS_ICE)
    CPUBreakpoint *bp;

-    QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
+    QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
        if (bp->pc == pc && bp->flags == flags) {
-            cpu_breakpoint_remove_by_ref(env, bp);
+            cpu_breakpoint_remove_by_ref(cpu, bp);
            return 0;
        }
    }
@@ -625,26 +662,27 @@ int cpu_breakpoint_remove(CPUArchState *env, target_ulong pc, int flags)
 }

 /* Remove a specific breakpoint by reference.  */
-void cpu_breakpoint_remove_by_ref(CPUArchState *env, CPUBreakpoint *breakpoint)
+void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
 {
 #if defined(TARGET_HAS_ICE)
-    QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
+    QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);

-    breakpoint_invalidate(ENV_GET_CPU(env), breakpoint->pc);
+    breakpoint_invalidate(cpu, breakpoint->pc);

    g_free(breakpoint);
 #endif
 }

 /* Remove all matching breakpoints. */
-void cpu_breakpoint_remove_all(CPUArchState *env, int mask)
+void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
 {
 #if defined(TARGET_HAS_ICE)
    CPUBreakpoint *bp, *next;

-    QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
-        if (bp->flags & mask)
-            cpu_breakpoint_remove_by_ref(env, bp);
+    QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
+        if (bp->flags & mask) {
+            cpu_breakpoint_remove_by_ref(cpu, bp);
+        }
    }
 #endif
 }
@@ -668,9 +706,8 @@ void cpu_single_step(CPUState *cpu, int enabled)
 #endif
 }

-void cpu_abort(CPUArchState *env, const char *fmt, ...)
+void cpu_abort(CPUState *cpu, const char *fmt, ...)
 {
-    CPUState *cpu = ENV_GET_CPU(env);
    va_list ap;
    va_list ap2;

@@ -758,7 +795,7 @@ static void cpu_physical_memory_set_dirty_tracking(bool enable)
    in_migration = enable;
 }

-hwaddr memory_region_section_get_iotlb(CPUArchState *env,
+hwaddr memory_region_section_get_iotlb(CPUState *cpu,
                                       MemoryRegionSection *section,
                                       target_ulong vaddr,
                                       hwaddr paddr, hwaddr xlat,
@@ -778,13 +815,13 @@ hwaddr memory_region_section_get_iotlb(CPUArchState *env,
            iotlb |= PHYS_SECTION_ROM;
        }
    } else {
-        iotlb = section - address_space_memory.dispatch->map.sections;
+        iotlb = section - section->address_space->dispatch->map.sections;
        iotlb += xlat;
    }

    /* Make accesses to pages with watchpoints go via the
       watchpoint trap routines.  */
-    QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
+    QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
        if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
            /* Avoid trapping reads of pages with a write breakpoint. */
            if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
@@ -874,6 +911,7 @@ static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *secti

    if (!(existing->mr->subpage)) {
        subpage = subpage_init(d->as, base);
+        subsection.address_space = d->as;
        subsection.mr = &subpage->iomem;
        phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
                      phys_section_add(&d->map, &subsection));
@@ -994,7 +1032,7 @@ static void *file_ram_alloc(RAMBlock *block,

    hpagesize = gethugepagesize(path);
    if (!hpagesize) {
-        return NULL;
+        goto error;
    }

    if (memory < hpagesize) {
@@ -1003,7 +1041,7 @@ static void *file_ram_alloc(RAMBlock *block,

    if (kvm_enabled() && !kvm_has_sync_mmu()) {
        fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
-        return NULL;
+        goto error;
    }

    /* Make name safe to use with mkstemp by replacing '/' with '_'. */
@@ -1021,7 +1059,7 @@ static void *file_ram_alloc(RAMBlock *block,
    if (fd < 0) {
        perror("unable to create backing store for hugepages");
        g_free(filename);
-        return NULL;
+        goto error;
    }
    unlink(filename);
    g_free(filename);
@@ -1041,7 +1079,7 @@ static void *file_ram_alloc(RAMBlock *block,
    if (area == MAP_FAILED) {
        perror("file_ram_alloc: can't mmap RAM pages");
        close(fd);
-        return (NULL);
+        goto error;
    }

    if (mem_prealloc) {
@@ -1070,7 +1108,7 @@ static void *file_ram_alloc(RAMBlock *block,
        }

        /* MAP_POPULATE silently ignores failures */
-        for (i = 0; i < (memory/hpagesize)-1; i++) {
+        for (i = 0; i < (memory/hpagesize); i++) {
            memset(area + (hpagesize*i), 0, 1);
        }

@@ -1085,6 +1123,12 @@ static void *file_ram_alloc(RAMBlock *block,

    block->fd = fd;
    return area;
+
+error:
+    if (mem_prealloc) {
+        exit(1);
+    }
+    return NULL;
 }
 #else
 static void *file_ram_alloc(RAMBlock *block,
@@ -1512,7 +1556,7 @@ static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
       flushed */
    if (!cpu_physical_memory_is_clean(ram_addr)) {
        CPUArchState *env = current_cpu->env_ptr;
-        tlb_set_dirty(env, env->mem_io_vaddr);
+        tlb_set_dirty(env, current_cpu->mem_io_vaddr);
    }
 }

@@ -1531,34 +1575,35 @@ static const MemoryRegionOps notdirty_mem_ops = {
 /* Generate a debug exception if a watchpoint has been hit.  */
 static void check_watchpoint(int offset, int len_mask, int flags)
 {
-    CPUArchState *env = current_cpu->env_ptr;
+    CPUState *cpu = current_cpu;
+    CPUArchState *env = cpu->env_ptr;
    target_ulong pc, cs_base;
    target_ulong vaddr;
    CPUWatchpoint *wp;
    int cpu_flags;

-    if (env->watchpoint_hit) {
+    if (cpu->watchpoint_hit) {
        /* We re-entered the check after replacing the TB. Now raise
         * the debug interrupt so that is will trigger after the
         * current instruction. */
-        cpu_interrupt(ENV_GET_CPU(env), CPU_INTERRUPT_DEBUG);
+        cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
        return;
    }
-    vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
-    QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
+    vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
+    QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
        if ((vaddr == (wp->vaddr & len_mask) ||
             (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
            wp->flags |= BP_WATCHPOINT_HIT;
-            if (!env->watchpoint_hit) {
-                env->watchpoint_hit = wp;
-                tb_check_watchpoint(env);
+            if (!cpu->watchpoint_hit) {
+                cpu->watchpoint_hit = wp;
+                tb_check_watchpoint(cpu);
                if (wp->flags & BP_STOP_BEFORE_ACCESS) {
-                    env->exception_index = EXCP_DEBUG;
-                    cpu_loop_exit(env);
+                    cpu->exception_index = EXCP_DEBUG;
+                    cpu_loop_exit(cpu);
                } else {
                    cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
-                    tb_gen_code(env, pc, cs_base, cpu_flags, 1);
-                    cpu_resume_from_signal(env, NULL);
+                    tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
+                    cpu_resume_from_signal(cpu, NULL);
                }
            }
        } else {
@@ -1575,9 +1620,9 @@ static uint64_t watch_mem_read(void *opaque, hwaddr addr,
 {
    check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
    switch (size) {
-    case 1: return ldub_phys(addr);
-    case 2: return lduw_phys(addr);
-    case 4: return ldl_phys(addr);
+    case 1: return ldub_phys(&address_space_memory, addr);
+    case 2: return lduw_phys(&address_space_memory, addr);
+    case 4: return ldl_phys(&address_space_memory, addr);
    default: abort();
    }
 }
@@ -1588,13 +1633,13 @@ static void watch_mem_write(void *opaque, hwaddr addr,
    check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
    switch (size) {
    case 1:
-        stb_phys(addr, val);
+        stb_phys(&address_space_memory, addr, val);
        break;
    case 2:
-        stw_phys(addr, val);
+        stw_phys(&address_space_memory, addr, val);
        break;
    case 4:
-        stl_phys(addr, val);
+        stl_phys(&address_space_memory, addr, val);
        break;
    default: abort();
    }
@@ -1719,6 +1764,7 @@ static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
 static uint16_t dummy_section(PhysPageMap *map, MemoryRegion *mr)
 {
    MemoryRegionSection section = {
+        .address_space = &address_space_memory,
        .mr = mr,
        .offset_within_address_space = 0,
        .offset_within_region = 0,
@@ -1728,10 +1774,9 @@ static uint16_t dummy_section(PhysPageMap *map, MemoryRegion *mr)
    return phys_section_add(map, &section);
 }

-MemoryRegion *iotlb_to_region(hwaddr index)
+MemoryRegion *iotlb_to_region(AddressSpace *as, hwaddr index)
 {
-    return address_space_memory.dispatch->map.sections[
-           index & ~TARGET_PAGE_MASK].mr;
+    return as->dispatch->map.sections[index & ~TARGET_PAGE_MASK].mr;
 }

 static void io_mem_init(void)
@@ -1789,9 +1834,12 @@ static void tcg_commit(MemoryListener *listener)
       reset the modified entries */
    /* XXX: slow ! */
    CPU_FOREACH(cpu) {
-        CPUArchState *env = cpu->env_ptr;
-
-        tlb_flush(env, 1);
+        /* FIXME: Disentangle the cpu.h circular files deps so we can
+           directly get the right CPU from listener.  */
+        if (cpu->tcg_as_listener != listener) {
+            continue;
+        }
+        tlb_flush(cpu, 1);
    }
 }

@@ -1811,10 +1859,6 @@ static MemoryListener core_memory_listener = {
    .priority = 1,
 };

-static MemoryListener tcg_memory_listener = {
-    .commit = tcg_commit,
-};
-
 void address_space_init_dispatch(AddressSpace *as)
 {
    as->dispatch = NULL;
@@ -1850,9 +1894,6 @@ static void memory_map_init(void)
    address_space_init(&address_space_io, system_io, "I/O");

    memory_listener_register(&core_memory_listener, &address_space_memory);
-    if (tcg_enabled()) {
-        memory_listener_register(&tcg_memory_listener, &address_space_memory);
-    }
 }

 MemoryRegion *get_system_memory(void)
@@ -1923,18 +1964,6 @@ static void invalidate_and_set_dirty(hwaddr addr,
    xen_modified_memory(addr, length);
 }

-static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
-{
-    if (memory_region_is_ram(mr)) {
-        return !(is_write && mr->readonly);
-    }
-    if (memory_region_is_romd(mr)) {
-        return !is_write;
-    }
-
-    return false;
-}
-
 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
 {
    unsigned access_size_max = mr->ops->valid.max_access_size;
@@ -2079,7 +2108,7 @@ enum write_rom_type {
    FLUSH_CACHE,
 };

-static inline void cpu_physical_memory_write_rom_internal(
+static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
    hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
 {
    hwaddr l;
@@ -2089,8 +2118,7 @@ static inline void cpu_physical_memory_write_rom_internal(

    while (len > 0) {
        l = len;
-        mr = address_space_translate(&address_space_memory,
-                                     addr, &addr1, &l, true);
+        mr = address_space_translate(as, addr, &addr1, &l, true);

        if (!(memory_region_is_ram(mr) ||
              memory_region_is_romd(mr))) {
@@ -2116,10 +2144,10 @@ static inline void cpu_physical_memory_write_rom_internal(
 }

 /* used for ROM loading : can write in RAM and ROM */
-void cpu_physical_memory_write_rom(hwaddr addr,
+void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
                                   const uint8_t *buf, int len)
 {
-    cpu_physical_memory_write_rom_internal(addr, buf, len, WRITE_DATA);
+    cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
 }

 void cpu_flush_icache_range(hwaddr start, int len)
@@ -2134,7 +2162,8 @@ void cpu_flush_icache_range(hwaddr start, int len)
        return;
    }

-    cpu_physical_memory_write_rom_internal(start, NULL, len, FLUSH_CACHE);
+    cpu_physical_memory_write_rom_internal(&address_space_memory,
+                                           start, NULL, len, FLUSH_CACHE);
 }

 typedef struct {
@@ -2325,7 +2354,7 @@ void cpu_physical_memory_unmap(void *buffer, hwaddr len,
 }

 /* warning: addr must be aligned */
-static inline uint32_t ldl_phys_internal(hwaddr addr,
+static inline uint32_t ldl_phys_internal(AddressSpace *as, hwaddr addr,
                                         enum device_endian endian)
 {
    uint8_t *ptr;
@@ -2334,8 +2363,7 @@ static inline uint32_t ldl_phys_internal(hwaddr addr,
    hwaddr l = 4;
    hwaddr addr1;

-    mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
-                                 false);
+    mr = address_space_translate(as, addr, &addr1, &l, false);
    if (l < 4 || !memory_access_is_direct(mr, false)) {
        /* I/O case */
        io_mem_read(mr, addr1, &val, 4);
@@ -2368,23 +2396,23 @@ static inline uint32_t ldl_phys_internal(hwaddr addr,
    return val;
 }

-uint32_t ldl_phys(hwaddr addr)
+uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
 {
-    return ldl_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
+    return ldl_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
 }

-uint32_t ldl_le_phys(hwaddr addr)
+uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
 {
-    return ldl_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
+    return ldl_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
 }

-uint32_t ldl_be_phys(hwaddr addr)
+uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
 {
-    return ldl_phys_internal(addr, DEVICE_BIG_ENDIAN);
+    return ldl_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
 }

 /* warning: addr must be aligned */
-static inline uint64_t ldq_phys_internal(hwaddr addr,
+static inline uint64_t ldq_phys_internal(AddressSpace *as, hwaddr addr,
                                         enum device_endian endian)
 {
    uint8_t *ptr;
@@ -2393,7 +2421,7 @@ static inline uint64_t ldq_phys_internal(hwaddr addr,
    hwaddr l = 8;
    hwaddr addr1;

-    mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
+    mr = address_space_translate(as, addr, &addr1, &l,
                                 false);
    if (l < 8 || !memory_access_is_direct(mr, false)) {
        /* I/O case */
@@ -2427,31 +2455,31 @@ static inline uint64_t ldq_phys_internal(hwaddr addr,
    return val;
 }

-uint64_t ldq_phys(hwaddr addr)
+uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
 {
-    return ldq_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
+    return ldq_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
 }

-uint64_t ldq_le_phys(hwaddr addr)
+uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
 {
-    return ldq_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
+    return ldq_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
 }

-uint64_t ldq_be_phys(hwaddr addr)
+uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
 {
-    return ldq_phys_internal(addr, DEVICE_BIG_ENDIAN);
+    return ldq_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
 }

 /* XXX: optimize */
-uint32_t ldub_phys(hwaddr addr)
+uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
 {
    uint8_t val;
-    cpu_physical_memory_read(addr, &val, 1);
+    address_space_rw(as, addr, &val, 1, 0);
    return val;
 }

 /* warning: addr must be aligned */
-static inline uint32_t lduw_phys_internal(hwaddr addr,
+static inline uint32_t lduw_phys_internal(AddressSpace *as, hwaddr addr,
                                          enum device_endian endian)
 {
    uint8_t *ptr;
@@ -2460,7 +2488,7 @@ static inline uint32_t lduw_phys_internal(hwaddr addr,
    hwaddr l = 2;
    hwaddr addr1;

-    mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
+    mr = address_space_translate(as, addr, &addr1, &l,
                                 false);
    if (l < 2 || !memory_access_is_direct(mr, false)) {
        /* I/O case */
@@ -2494,32 +2522,32 @@ static inline uint32_t lduw_phys_internal(hwaddr addr,
    return val;
 }

-uint32_t lduw_phys(hwaddr addr)
+uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
 {
-    return lduw_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
+    return lduw_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
 }

-uint32_t lduw_le_phys(hwaddr addr)
+uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
 {
-    return lduw_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
+    return lduw_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
 }

-uint32_t lduw_be_phys(hwaddr addr)
+uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
 {
-    return lduw_phys_internal(addr, DEVICE_BIG_ENDIAN);
+    return lduw_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
 }

 /* warning: addr must be aligned. The ram page is not masked as dirty
   and the code inside is not invalidated. It is useful if the dirty
   bits are used to track modified PTEs */
-void stl_phys_notdirty(hwaddr addr, uint32_t val)
+void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
 {
    uint8_t *ptr;
    MemoryRegion *mr;
    hwaddr l = 4;
    hwaddr addr1;

-    mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
+    mr = address_space_translate(as, addr, &addr1, &l,
                                 true);
    if (l < 4 || !memory_access_is_direct(mr, true)) {
        io_mem_write(mr, addr1, val, 4);
@@ -2542,7 +2570,8 @@ void stl_phys_notdirty(hwaddr addr, uint32_t val)
 }

 /* warning: addr must be aligned */
-static inline void stl_phys_internal(hwaddr addr, uint32_t val,
+static inline void stl_phys_internal(AddressSpace *as,
+                                     hwaddr addr, uint32_t val,
                                     enum device_endian endian)
 {
    uint8_t *ptr;
@@ -2550,7 +2579,7 @@ static inline void stl_phys_internal(hwaddr addr, uint32_t val,
    hwaddr l = 4;
    hwaddr addr1;

-    mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
+    mr = address_space_translate(as, addr, &addr1, &l,
                                 true);
    if (l < 4 || !memory_access_is_direct(mr, true)) {
 #if defined(TARGET_WORDS_BIGENDIAN)
@@ -2582,30 +2611,31 @@ static inline void stl_phys_internal(hwaddr addr, uint32_t val,
    }
 }

-void stl_phys(hwaddr addr, uint32_t val)
+void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
 {
-    stl_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
+    stl_phys_internal(as, addr, val, DEVICE_NATIVE_ENDIAN);
 }

-void stl_le_phys(hwaddr addr, uint32_t val)
+void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
 {
-    stl_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
+    stl_phys_internal(as, addr, val, DEVICE_LITTLE_ENDIAN);
 }

-void stl_be_phys(hwaddr addr, uint32_t val)
+void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
 {
-    stl_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
+    stl_phys_internal(as, addr, val, DEVICE_BIG_ENDIAN);
 }

 /* XXX: optimize */
-void stb_phys(hwaddr addr, uint32_t val)
+void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
 {
    uint8_t v = val;
-    cpu_physical_memory_write(addr, &v, 1);
+    address_space_rw(as, addr, &v, 1, 1);
 }

 /* warning: addr must be aligned */
-static inline void stw_phys_internal(hwaddr addr, uint32_t val,
+static inline void stw_phys_internal(AddressSpace *as,
+                                     hwaddr addr, uint32_t val,
                                     enum device_endian endian)
 {
    uint8_t *ptr;
@@ -2613,8 +2643,7 @@ static inline void stw_phys_internal(hwaddr addr, uint32_t val,
    hwaddr l = 2;
    hwaddr addr1;

-    mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
-                                 true);
+    mr = address_space_translate(as, addr, &addr1, &l, true);
    if (l < 2 || !memory_access_is_direct(mr, true)) {
 #if defined(TARGET_WORDS_BIGENDIAN)
        if (endian == DEVICE_LITTLE_ENDIAN) {
@@ -2645,38 +2674,38 @@ static inline void stw_phys_internal(hwaddr addr, uint32_t val,
    }
 }

-void stw_phys(hwaddr addr, uint32_t val)
+void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
 {
-    stw_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
+    stw_phys_internal(as, addr, val, DEVICE_NATIVE_ENDIAN);
 }

-void stw_le_phys(hwaddr addr, uint32_t val)
+void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
 {
-    stw_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
+    stw_phys_internal(as, addr, val, DEVICE_LITTLE_ENDIAN);
 }

-void stw_be_phys(hwaddr addr, uint32_t val)
+void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
 {
-    stw_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
+    stw_phys_internal(as, addr, val, DEVICE_BIG_ENDIAN);
 }

 /* XXX: optimize */
-void stq_phys(hwaddr addr, uint64_t val)
+void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
 {
    val = tswap64(val);
-    cpu_physical_memory_write(addr, &val, 8);
+    address_space_rw(as, addr, (void *) &val, 8, 1);
 }

-void stq_le_phys(hwaddr addr, uint64_t val)
+void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
 {
    val = cpu_to_le64(val);
-    cpu_physical_memory_write(addr, &val, 8);
+    address_space_rw(as, addr, (void *) &val, 8, 1);
 }

-void stq_be_phys(hwaddr addr, uint64_t val)
+void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
 {
    val = cpu_to_be64(val);
-    cpu_physical_memory_write(addr, &val, 8);
+    address_space_rw(as, addr, (void *) &val, 8, 1);
 }

 /* virtual memory access for debug (includes writing to ROM) */
@@ -2697,10 +2726,11 @@ int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
        if (l > len)
            l = len;
        phys_addr += (addr & ~TARGET_PAGE_MASK);
-        if (is_write)
-            cpu_physical_memory_write_rom(phys_addr, buf, l);
-        else
-            cpu_physical_memory_rw(phys_addr, buf, l, is_write);
+        if (is_write) {
+            cpu_physical_memory_write_rom(cpu->as, phys_addr, buf, l);
+        } else {
+            address_space_rw(cpu->as, phys_addr, buf, l, 0);
+        }
        len -= l;
        buf += l;
        addr += l;
--- a/fpu/softfloat-macros.h
+++ b/fpu/softfloat-macros.h
@@ -55,7 +55,7 @@ these four paragraphs for those parts of this code that are retained.
 | The result is stored in the location pointed to by `zPtr'.
 *----------------------------------------------------------------------------*/

-INLINE void shift32RightJamming(uint32_t a, int_fast16_t count, uint32_t *zPtr)
+static inline void shift32RightJamming(uint32_t a, int_fast16_t count, uint32_t *zPtr)
 {
    uint32_t z;

@@ -81,7 +81,7 @@ INLINE void shift32RightJamming(uint32_t a, int_fast16_t count, uint32_t *zPtr)
 | The result is stored in the location pointed to by `zPtr'.
 *----------------------------------------------------------------------------*/

-INLINE void shift64RightJamming(uint64_t a, int_fast16_t count, uint64_t *zPtr)
+static inline void shift64RightJamming(uint64_t a, int_fast16_t count, uint64_t *zPtr)
 {
    uint64_t z;

@@ -115,7 +115,7 @@ INLINE void shift64RightJamming(uint64_t a, int_fast16_t count, uint64_t *zPtr)
 | described above, and is returned at the location pointed to by `z1Ptr'.)
 *----------------------------------------------------------------------------*/

-INLINE void
+static inline void
 shift64ExtraRightJamming(
     uint64_t a0, uint64_t a1, int_fast16_t count, uint64_t *z0Ptr, uint64_t *z1Ptr)
 {
@@ -152,7 +152,7 @@ INLINE void
 | which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
 *----------------------------------------------------------------------------*/

-INLINE void
+static inline void
 shift128Right(
     uint64_t a0, uint64_t a1, int_fast16_t count, uint64_t *z0Ptr, uint64_t *z1Ptr)
 {
@@ -187,7 +187,7 @@ INLINE void
 | the locations pointed to by `z0Ptr' and `z1Ptr'.
 *----------------------------------------------------------------------------*/

-INLINE void
+static inline void
 shift128RightJamming(
     uint64_t a0, uint64_t a1, int_fast16_t count, uint64_t *z0Ptr, uint64_t *z1Ptr)
 {
@@ -238,7 +238,7 @@ INLINE void
 | `z2Ptr'.)
 *----------------------------------------------------------------------------*/

-INLINE void
+static inline void
 shift128ExtraRightJamming(
     uint64_t a0,
     uint64_t a1,
@@ -296,7 +296,7 @@ INLINE void
 | pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
 *----------------------------------------------------------------------------*/

-INLINE void
+static inline void
 shortShift128Left(
     uint64_t a0, uint64_t a1, int_fast16_t count, uint64_t *z0Ptr, uint64_t *z1Ptr)
 {
@@ -315,7 +315,7 @@ INLINE void
 | `z1Ptr', and `z2Ptr'.
 *----------------------------------------------------------------------------*/

-INLINE void
+static inline void
 shortShift192Left(
     uint64_t a0,
     uint64_t a1,
@@ -350,7 +350,7 @@ INLINE void
 | are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
 *----------------------------------------------------------------------------*/

-INLINE void
+static inline void
 add128(
     uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1, uint64_t *z0Ptr, uint64_t *z1Ptr )
 {
@@ -370,7 +370,7 @@ INLINE void
 | `z1Ptr', and `z2Ptr'.
 *----------------------------------------------------------------------------*/

-INLINE void
+static inline void
 add192(
     uint64_t a0,
     uint64_t a1,
@@ -408,7 +408,7 @@ INLINE void
 | `z1Ptr'.
 *----------------------------------------------------------------------------*/

-INLINE void
+static inline void
 sub128(
     uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1, uint64_t *z0Ptr, uint64_t *z1Ptr )
 {
@@ -426,7 +426,7 @@ INLINE void
 | pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'.
 *----------------------------------------------------------------------------*/

-INLINE void
+static inline void
 sub192(
     uint64_t a0,
     uint64_t a1,
@@ -462,7 +462,7 @@ INLINE void
 | `z0Ptr' and `z1Ptr'.
 *----------------------------------------------------------------------------*/

-INLINE void mul64To128( uint64_t a, uint64_t b, uint64_t *z0Ptr, uint64_t *z1Ptr )
+static inline void mul64To128( uint64_t a, uint64_t b, uint64_t *z0Ptr, uint64_t *z1Ptr )
 {
    uint32_t aHigh, aLow, bHigh, bLow;
    uint64_t z0, zMiddleA, zMiddleB, z1;
@@ -492,7 +492,7 @@ INLINE void mul64To128( uint64_t a, uint64_t b, uint64_t *z0Ptr, uint64_t *z1Ptr
 | `z2Ptr'.
 *----------------------------------------------------------------------------*/

-INLINE void
+static inline void
 mul128By64To192(
     uint64_t a0,
     uint64_t a1,
@@ -520,7 +520,7 @@ INLINE void
 | the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
 *----------------------------------------------------------------------------*/

-INLINE void
+static inline void
 mul128To256(
     uint64_t a0,
     uint64_t a1,
@@ -702,7 +702,7 @@ static int8 countLeadingZeros64( uint64_t a )
 | Otherwise, returns 0.
 *----------------------------------------------------------------------------*/

-INLINE flag eq128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 )
+static inline flag eq128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 )
 {

    return ( a0 == b0 ) && ( a1 == b1 );
@@ -715,7 +715,7 @@ INLINE flag eq128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 )
 | Otherwise, returns 0.
 *----------------------------------------------------------------------------*/

-INLINE flag le128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 )
+static inline flag le128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 )
 {

    return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) );
@@ -728,7 +728,7 @@ INLINE flag le128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 )
 | returns 0.
 *----------------------------------------------------------------------------*/

-INLINE flag lt128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 )
+static inline flag lt128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 )
 {

    return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) );
@@ -741,7 +741,7 @@ INLINE flag lt128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 )
 | Otherwise, returns 0.
 *----------------------------------------------------------------------------*/

-INLINE flag ne128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 )
+static inline flag ne128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 )
 {

    return ( a0 != b0 ) || ( a1 != b1 );
--- a/Show More
+++ b/Show More
@@ -1 +1 @@
 .7.50
 .0.2