Update VERSION for 1.6.2 release

Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
scsi_target_send_command(): amend stable-1.6 port of the CVE-2013-4344 fix
2013-12-09 18:21:01 -06:00 · 2013-12-09 14:49:49 -06:00 · 2013-12-09 14:45:41 -06:00 · 2013-12-09 14:42:33 -06:00 · 2013-12-09 11:40:30 -06:00 · 2013-12-09 11:40:30 -06:00
1693 changed files with 117249 additions and 212836 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -1,75 +1,65 @@
-/config-devices.*
-/config-all-devices.*
-/config-all-disas.*
-/config-host.*
-/config-target.*
-/config.status
-/trace/generated-tracers.h
-/trace/generated-tracers.c
-/trace/generated-tracers-dtrace.h
-/trace/generated-tracers.dtrace
-/trace/generated-events.h
-/trace/generated-events.c
-/trace/generated-ust-provider.h
-/trace/generated-ust.c
-/libcacard/trace/generated-tracers.c
+config-devices.*
+config-all-devices.*
+config-all-disas.*
+config-host.*
+config-target.*
+trace/generated-tracers.h
+trace/generated-tracers.c
+trace/generated-tracers-dtrace.h
+trace/generated-tracers.dtrace
+trace/generated-events.h
+trace/generated-events.c
+libcacard/trace/generated-tracers.c
 *-timestamp
-/*-softmmu
-/*-darwin-user
-/*-linux-user
-/*-bsd-user
+*-softmmu
+*-darwin-user
+*-linux-user
+*-bsd-user
 libdis*
 libuser
-/linux-headers/asm
-/qga/qapi-generated
-/qapi-generated
-/qapi-types.[ch]
-/qapi-visit.[ch]
-/qmp-commands.h
-/qmp-marshal.c
-/qemu-doc.html
-/qemu-tech.html
-/qemu-doc.info
-/qemu-tech.info
-/qemu.1
-/qemu.pod
-/qemu-img.1
-/qemu-img.pod
-/qemu-img
-/qemu-nbd
-/qemu-nbd.8
-/qemu-nbd.pod
-/qemu-options.def
-/qemu-options.texi
-/qemu-img-cmds.texi
-/qemu-img-cmds.h
-/qemu-io
-/qemu-ga
-/qemu-bridge-helper
-/qemu-monitor.texi
-/qmp-commands.txt
-/vscclient
-/test-bitops
-/test-coroutine
-/test-int128
-/test-opts-visitor
-/test-qmp-input-visitor
-/test-qmp-output-visitor
-/test-string-input-visitor
-/test-string-output-visitor
-/test-visitor-serialization
-/fsdev/virtfs-proxy-helper
-/fsdev/virtfs-proxy-helper.1
-/fsdev/virtfs-proxy-helper.pod
-/.gdbinit
+linux-headers/asm
+qapi-generated
+qapi-types.[ch]
+qapi-visit.[ch]
+qmp-commands.h
+qmp-marshal.c
+qemu-doc.html
+qemu-tech.html
+qemu-doc.info
+qemu-tech.info
+qemu.1
+qemu.pod
+qemu-img.1
+qemu-img.pod
+qemu-img
+qemu-nbd
+qemu-nbd.8
+qemu-nbd.pod
+qemu-options.def
+qemu-options.texi
+qemu-img-cmds.texi
+qemu-img-cmds.h
+qemu-io
+qemu-ga
+qemu-bridge-helper
+qemu-monitor.texi
+vscclient
+QMP/qmp-commands.txt
+test-coroutine
+test-qmp-input-visitor
+test-qmp-output-visitor
+test-string-input-visitor
+test-string-output-visitor
+test-visitor-serialization
+fsdev/virtfs-proxy-helper
+fsdev/virtfs-proxy-helper.1
+fsdev/virtfs-proxy-helper.pod
+.gdbinit
 *.a
 *.aux
 *.cp
 *.dvi
 *.exe
-*.dll
-*.so
-*.mo
 *.fn
 *.ky
 *.log
@@ -83,35 +73,34 @@ libuser
 *.tp
 *.vr
 *.d
-!/scripts/qemu-guest-agent/fsfreeze-hook.d
+!scripts/qemu-guest-agent/fsfreeze-hook.d
 *.o
 *.lo
 *.la
 *.pc
 .libs
-.sdk
 *.swp
 *.orig
 .pc
 *.gcda
 *.gcno
 patches
-/pc-bios/bios-pq/status
-/pc-bios/vgabios-pq/status
-/pc-bios/optionrom/linuxboot.asm
-/pc-bios/optionrom/linuxboot.bin
-/pc-bios/optionrom/linuxboot.raw
-/pc-bios/optionrom/linuxboot.img
-/pc-bios/optionrom/multiboot.asm
-/pc-bios/optionrom/multiboot.bin
-/pc-bios/optionrom/multiboot.raw
-/pc-bios/optionrom/multiboot.img
-/pc-bios/optionrom/kvmvapic.asm
-/pc-bios/optionrom/kvmvapic.bin
-/pc-bios/optionrom/kvmvapic.raw
-/pc-bios/optionrom/kvmvapic.img
-/pc-bios/s390-ccw/s390-ccw.elf
-/pc-bios/s390-ccw/s390-ccw.img
+pc-bios/bios-pq/status
+pc-bios/vgabios-pq/status
+pc-bios/optionrom/linuxboot.asm
+pc-bios/optionrom/linuxboot.bin
+pc-bios/optionrom/linuxboot.raw
+pc-bios/optionrom/linuxboot.img
+pc-bios/optionrom/multiboot.asm
+pc-bios/optionrom/multiboot.bin
+pc-bios/optionrom/multiboot.raw
+pc-bios/optionrom/multiboot.img
+pc-bios/optionrom/kvmvapic.asm
+pc-bios/optionrom/kvmvapic.bin
+pc-bios/optionrom/kvmvapic.raw
+pc-bios/optionrom/kvmvapic.img
+pc-bios/s390-ccw/s390-ccw.elf
+pc-bios/s390-ccw/s390-ccw.img
 .stgit-*
 cscope.*
 tags
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,30 +1,27 @@
 [submodule "roms/vgabios"]
 	path = roms/vgabios
-	url = git://git.qemu-project.org/vgabios.git/
+	url = git://git.qemu.org/vgabios.git/
 [submodule "roms/seabios"]
 	path = roms/seabios
-	url = git://git.qemu-project.org/seabios.git/
+	url = git://git.qemu.org/seabios.git/
 [submodule "roms/SLOF"]
 	path = roms/SLOF
-	url = git://git.qemu-project.org/SLOF.git
+	url = git://git.qemu.org/SLOF.git
 [submodule "roms/ipxe"]
 	path = roms/ipxe
-	url = git://git.qemu-project.org/ipxe.git
+	url = git://git.qemu.org/ipxe.git
 [submodule "roms/openbios"]
 	path = roms/openbios
-	url = git://git.qemu-project.org/openbios.git
-[submodule "roms/openhackware"]
-	path = roms/openhackware
-	url = git://git.qemu-project.org/openhackware.git
+	url = git://git.qemu.org/openbios.git
 [submodule "roms/qemu-palcode"]
 	path = roms/qemu-palcode
 	url = git://github.com/rth7680/qemu-palcode.git
 [submodule "roms/sgabios"]
 	path = roms/sgabios
-	url = git://git.qemu-project.org/sgabios.git
+	url = git://git.qemu.org/sgabios.git
 [submodule "pixman"]
 	path = pixman
 	url = git://anongit.freedesktop.org/pixman
 [submodule "dtc"]
 	path = dtc
-	url = git://git.qemu-project.org/dtc.git
+	url = git://git.qemu.org/dtc.git
--- a/.mailmap
+++ b/.mailmap
@@ -2,8 +2,7 @@
 # into proper addresses so that they are counted properly in git shortlog output.
 #
 Andrzej Zaborowski <balrogg@gmail.com> balrog <balrog@c046a42c-6fe2-441c-8c8c-71466251a162>
-Anthony Liguori <anthony@codemonkey.ws> aliguori <aliguori@c046a42c-6fe2-441c-8c8c-71466251a162>
-Anthony Liguori <anthony@codemonkey.ws> Anthony Liguori <aliguori@us.ibm.com>
+Anthony Liguori <aliguori@us.ibm.com> aliguori <aliguori@c046a42c-6fe2-441c-8c8c-71466251a162>
 Aurelien Jarno <aurelien@aurel32.net> aurel32 <aurel32@c046a42c-6fe2-441c-8c8c-71466251a162>
 Blue Swirl <blauwirbel@gmail.com> blueswir1 <blueswir1@c046a42c-6fe2-441c-8c8c-71466251a162>
 Edgar E. Iglesias <edgar.iglesias@gmail.com> edgar_igl <edgar_igl@c046a42c-6fe2-441c-8c8c-71466251a162>
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,81 +0,0 @@
-language: c
-python:
-  - "2.4"
-compiler:
-  - gcc
-  - clang
-notifications:
-  irc:
-    channels:
-      - "irc.oftc.net#qemu"
-    on_success: change
-    on_failure: always
-env:
-  global:
-    - TEST_CMD="make check"
-    - EXTRA_CONFIG=""
-    # Development packages, EXTRA_PKGS saved for additional builds
-    - CORE_PKGS="libusb-1.0-0-dev libiscsi-dev librados-dev libncurses5-dev"
-    - NET_PKGS="libseccomp-dev libgnutls-dev libssh2-1-dev  libspice-server-dev libspice-protocol-dev libnss3-dev"
-    - GUI_PKGS="libgtk-3-dev libvte-2.90-dev libsdl1.2-dev libpng12-dev libpixman-1-dev"
-    - EXTRA_PKGS=""
-  matrix:
-    - TARGETS=alpha-softmmu,alpha-linux-user
-    - TARGETS=arm-softmmu,arm-linux-user
-    - TARGETS=aarch64-softmmu,aarch64-linux-user
-    - TARGETS=cris-softmmu
-    - TARGETS=i386-softmmu,x86_64-softmmu
-    - TARGETS=lm32-softmmu
-    - TARGETS=m68k-softmmu
-    - TARGETS=microblaze-softmmu,microblazeel-softmmu
-    - TARGETS=mips-softmmu,mips64-softmmu,mips64el-softmmu,mipsel-softmmu
-    - TARGETS=moxie-softmmu
-    - TARGETS=or32-softmmu,
-    - TARGETS=ppc-softmmu,ppc64-softmmu,ppcemb-softmmu
-    - TARGETS=s390x-softmmu
-    - TARGETS=sh4-softmmu,sh4eb-softmmu
-    - TARGETS=sparc-softmmu,sparc64-softmmu
-    - TARGETS=unicore32-softmmu
-    - TARGETS=xtensa-softmmu,xtensaeb-softmmu
-before_install:
-  - git submodule update --init --recursive
-  - sudo apt-get update -qq
-  - sudo apt-get install -qq ${CORE_PKGS} ${NET_PKGS} ${GUI_PKGS} ${EXTRA_PKGS}
-script: "./configure --target-list=${TARGETS} ${EXTRA_CONFIG} && make && ${TEST_CMD}"
-matrix:
-  # We manually include a number of additional build for non-standard bits
-  include:
-    # Debug related options
-    - env: TARGETS=i386-softmmu,x86_64-softmmu
-           EXTRA_CONFIG="--enable-debug"
-      compiler: gcc
-    - env: TARGETS=i386-softmmu,x86_64-softmmu
-           EXTRA_CONFIG="--enable-debug --enable-tcg-interpreter"
-      compiler: gcc
-    # All the extra -dev packages
-    - env: TARGETS=i386-softmmu,x86_64-softmmu
-           EXTRA_PKGS="libaio-dev libcap-ng-dev libattr1-dev libbrlapi-dev uuid-dev libusb-1.0.0-dev"
-      compiler: gcc
-    # Currently configure doesn't force --disable-pie
-    - env: TARGETS=i386-softmmu,x86_64-softmmu
-           EXTRA_CONFIG="--enable-gprof --enable-gcov --disable-pie"
-      compiler: gcc
-    - env: TARGETS=i386-softmmu,x86_64-softmmu
-           EXTRA_PKGS="sparse"
-           EXTRA_CONFIG="--enable-sparse"
-      compiler: gcc
-    # All the trace backends (apart from dtrace)
-    - env: TARGETS=i386-softmmu,x86_64-softmmu
-           EXTRA_CONFIG="--enable-trace-backend=stderr"
-      compiler: gcc
-    - env: TARGETS=i386-softmmu,x86_64-softmmu
-           EXTRA_CONFIG="--enable-trace-backend=simple"
-      compiler: gcc
-    - env: TARGETS=i386-softmmu,x86_64-softmmu
-           EXTRA_CONFIG="--enable-trace-backend=ftrace"
-           TEST_CMD=""
-      compiler: gcc
-    - env: TARGETS=i386-softmmu,x86_64-softmmu
-          EXTRA_PKGS="liblttng-ust-dev liburcu-dev"
-          EXTRA_CONFIG="--enable-trace-backend=ust"
-      compiler: gcc
--- a/7
+++ b/7
@@ -84,10 +84,3 @@ and clarity it comes on a line by itself:
 Rationale: a consistent (except for functions...) bracing style reduces
 ambiguity and avoids needless churn when lines are added or removed.
 Furthermore, it is the QEMU coding style.
-
-5. Declarations
-
-Mixed declarations (interleaving statements and declarations within blocks)
-are not allowed; declarations should be at the beginning of blocks.  In other
-words, the code should not generate warnings if using GCC's
-Wdeclaration-after-statement option.
--- a/2
+++ b/2
@@ -1,6 +1,6 @@
 This file documents changes for QEMU releases 0.12 and earlier.
 For changelog information for later releases, see
-http://wiki.qemu-project.org/ChangeLog or look at the git history for
+http://wiki.qemu.org/ChangeLog or look at the git history for
 more detailed information.


--- a/182
+++ b/182
@@ -50,7 +50,8 @@ Descriptions of section entries:

 General Project Administration
 ------------------------------
-M: Anthony Liguori <aliguori@amazon.com>
+M: Anthony Liguori <aliguori@us.ibm.com>
+M: Paul Brook <paul@codesourcery.com>

 Guest CPU cores (TCG):
 ----------------------
@@ -61,6 +62,7 @@ F: target-alpha/
 F: hw/alpha/

 ARM
+M: Paul Brook <paul@codesourcery.com>
 M: Peter Maydell <peter.maydell@linaro.org>
 S: Maintained
 F: target-arm/
@@ -81,7 +83,8 @@ F: hw/lm32/
 F: hw/char/lm32_*

 M68K
-S: Orphan
+M: Paul Brook <paul@codesourcery.com>
+S: Odd Fixes
 F: target-m68k/
 F: hw/m68k/

@@ -158,6 +161,7 @@ Guest CPU Cores (KVM):
 ----------------------

 Overall
+M: Gleb Natapov <gleb@redhat.com>
 M: Paolo Bonzini <pbonzini@redhat.com>
 L: kvm@vger.kernel.org
 S: Supported
@@ -175,14 +179,12 @@ S: Maintained
 F: target-ppc/kvm.c

 S390
-M: Christian Borntraeger <borntraeger@de.ibm.com>
-M: Cornelia Huck <cornelia.huck@de.ibm.com>
 M: Alexander Graf <agraf@suse.de>
 S: Maintained
 F: target-s390x/kvm.c
-F: hw/intc/s390_flic.[hc]

 X86
+M: Gleb Natapov <gleb@redhat.com>
 M: Marcelo Tosatti <mtosatti@redhat.com>
 L: kvm@vger.kernel.org
 S: Supported
@@ -220,13 +222,6 @@ F: *win32*

 ARM Machines
 ------------
-Allwinner-a10
-M: Li Guang <lig.fnst@cn.fujitsu.com>
-S: Maintained
-F: hw/*/allwinner-a10*
-F: include/hw/*/allwinner-a10*
-F: hw/arm/cubieboard.c
-
 Exynos
 M: Evgeny Voevodin <e.voevodin@samsung.com>
 M: Maksim Kozlov <m.kozlov@samsung.com>
@@ -241,12 +236,6 @@ S: Supported
 F: hw/arm/highbank.c
 F: hw/net/xgmac.c

-Canon DIGIC
-M: Antony Pavlov <antonynpavlov@gmail.com>
-S: Maintained
-F: include/hw/arm/digic.h
-F: hw/*/digic*
-
 Gumstix
 M: qemu-devel@nongnu.org
 S: Orphan
@@ -259,6 +248,7 @@ F: hw/*/imx*
 F: hw/arm/kzm.c

 Integrator CP
+M: Paul Brook <paul@codesourcery.com>
 M: Peter Maydell <peter.maydell@linaro.org>
 S: Maintained
 F: hw/arm/integratorcp.c
@@ -284,6 +274,7 @@ S: Maintained
 F: hw/arm/palm.c

 Real View
+M: Paul Brook <paul@codesourcery.com>
 M: Peter Maydell <peter.maydell@linaro.org>
 S: Maintained
 F: hw/arm/realview*
@@ -294,17 +285,19 @@ S: Maintained
 F: hw/arm/spitz.c

 Stellaris
+M: Paul Brook <paul@codesourcery.com>
 M: Peter Maydell <peter.maydell@linaro.org>
 S: Maintained
 F: hw/*/stellaris*

 Versatile PB
+M: Paul Brook <paul@codesourcery.com>
 M: Peter Maydell <peter.maydell@linaro.org>
 S: Maintained
 F: hw/*/versatile*

 Xilinx Zynq
-M: Peter Crosthwaite <peter.crosthwaite@xilinx.com>
+M: Peter Crosthwaite <peter.crosthwaite@petalogix.com>
 S: Maintained
 F: hw/arm/xilinx_zynq.c
 F: hw/misc/zynq_slcr.c
@@ -334,15 +327,18 @@ F: hw/lm32/milkymist.c
 M68K Machines
 -------------
 an5206
-S: Orphan
+M: Paul Brook <paul@codesourcery.com>
+S: Maintained
 F: hw/m68k/an5206.c

 dummy_m68k
-S: Orphan
+M: Paul Brook <paul@codesourcery.com>
+S: Maintained
 F: hw/m68k/dummy_m68k.c

 mcf5208
-S: Orphan
+M: Paul Brook <paul@codesourcery.com>
+S: Maintained
 F: hw/m68k/mcf5208.c

 MicroBlaze Machines
@@ -353,7 +349,7 @@ S: Maintained
 F: hw/microblaze/petalogix_s3adsp1800_mmu.c

 petalogix_ml605
-M: Peter Crosthwaite <peter.crosthwaite@xilinx.com>
+M: Peter Crosthwaite <peter.crosthwaite@petalogix.com>
 S: Maintained
 F: hw/microblaze/petalogix_ml605_mmu.c

@@ -496,13 +492,10 @@ F: hw/s390x/s390-*.c

 S390 Virtio-ccw
 M: Cornelia Huck <cornelia.huck@de.ibm.com>
-M: Christian Borntraeger <borntraeger@de.ibm.com>
 M: Alexander Graf <agraf@suse.de>
 S: Supported
 F: hw/s390x/s390-virtio-ccw.c
 F: hw/s390x/css.[hc]
-F: hw/s390x/sclp*.[hc]
-F: hw/s390x/ipl*.[hc]
 T: git git://github.com/cohuck/qemu virtio-ccw-upstr

 UniCore32 Machines
@@ -516,24 +509,10 @@ F: hw/unicore32/
 X86 Machines
 ------------
 PC
-M: Anthony Liguori <aliguori@amazon.com>
-M: Michael S. Tsirkin <mst@redhat.com>
+M: Anthony Liguori <aliguori@us.ibm.com>
 S: Supported
-F: include/hw/i386/
-F: hw/i386/
-F: hw/pci-host/piix.c
-F: hw/pci-host/q35.c
-F: hw/pci-host/pam.c
-F: include/hw/pci-host/q35.h
-F: include/hw/pci-host/pam.h
-F: hw/isa/piix4.c
-F: hw/isa/lpc_ich9.c
-F: hw/i2c/smbus_ich9.c
-F: hw/acpi/piix4.c
-F: hw/acpi/ich9.c
-F: include/hw/acpi/ich9.h
-F: include/hw/acpi/piix.h
-
+F: hw/i386/pc.[ch]
+F: hw/i386/pc_piix.c

 Xtensa Machines
 ---------------
@@ -588,11 +567,12 @@ F: hw/scsi/*
 T: git git://github.com/bonzini/qemu.git scsi-next

 LSI53C895A
-S: Orphan
+M: Paul Brook <paul@codesourcery.com>
+S: Odd Fixes
 F: hw/scsi/lsi53c895a.c

 SSI
-M: Peter Crosthwaite <peter.crosthwaite@xilinx.com>
+M: Peter Crosthwaite <peter.crosthwaite@petalogix.com>
 S: Maintained
 F: hw/ssi/*
 F: hw/block/m25p80.c
@@ -613,8 +593,7 @@ S: Supported
 F: hw/*/*vhost*

 virtio
-M: Anthony Liguori <aliguori@amazon.com>
-M: Michael S. Tsirkin <mst@redhat.com>
+M: Anthony Liguori <aliguori@us.ibm.com>
 S: Supported
 F: hw/*/virtio*

@@ -623,7 +602,6 @@ M: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
 S: Supported
 F: hw/9pfs/
 F: fsdev/
-F: tests/virtio-9p-test.c
 T: git git://github.com/kvaneesh/QEMU.git

 virtio-blk
@@ -634,7 +612,6 @@ F: hw/block/virtio-blk.c

 virtio-ccw
 M: Cornelia Huck <cornelia.huck@de.ibm.com>
-M: Christian Borntraeger <borntraeger@de.ibm.com>
 S: Supported
 F: hw/s390x/virtio-ccw.[hc]
 T: git git://github.com/cohuck/qemu virtio-ccw-upstr
@@ -649,10 +626,9 @@ nvme
 M: Keith Busch <keith.busch@intel.com>
 S: Supported
 F: hw/block/nvme*
-F: tests/nvme-test.c

 Xilinx EDK
-M: Peter Crosthwaite <peter.crosthwaite@xilinx.com>
+M: Peter Crosthwaite <peter.crosthwaite@petalogix.com>
 M: Edgar E. Iglesias <edgar.iglesias@gmail.com>
 S: Maintained
 F: hw/*/xilinx_*
@@ -662,7 +638,6 @@ Subsystems
 ----------
 Audio
 M: Vassili Karpov (malc) <av1474@comtv.ru>
-M: Gerd Hoffmann <kraxel@redhat.com>
 S: Maintained
 F: audio/
 F: hw/audio/
@@ -674,11 +649,9 @@ S: Supported
 F: block*
 F: block/
 F: hw/block/
-T: git git://repo.or.cz/qemu/kevin.git block
-T: git git://github.com/stefanha/qemu.git block

 Character Devices
-M: Anthony Liguori <aliguori@amazon.com>
+M: Anthony Liguori <aliguori@us.ibm.com>
 S: Maintained
 F: qemu-char.c

@@ -696,7 +669,7 @@ F: include/hw/cpu/icc_bus.h
 F: hw/cpu/icc_bus.c

 Device Tree
-M: Peter Crosthwaite <peter.crosthwaite@xilinx.com>
+M: Peter Crosthwaite <peter.crosthwaite@petalogix.com>
 M: Alexander Graf <agraf@suse.de>
 S: Maintained
 F: device_tree.[ch]
@@ -716,45 +689,34 @@ F: audio/spiceaudio.c
 F: hw/display/qxl*

 Graphics
-M: Anthony Liguori <aliguori@amazon.com>
-M: Gerd Hoffmann <kraxel@redhat.com>
-S: Odd Fixes
+M: Anthony Liguori <aliguori@us.ibm.com>
+S: Maintained
 F: ui/

 Cocoa graphics
 M: Andreas Färber <andreas.faerber@web.de>
-M: Peter Maydell <peter.maydell@linaro.org>
 S: Odd Fixes
 F: ui/cocoa.m

 Main loop
-M: Anthony Liguori <aliguori@amazon.com>
+M: Anthony Liguori <aliguori@us.ibm.com>
 S: Supported
 F: vl.c

 Human Monitor (HMP)
 M: Luiz Capitulino <lcapitulino@redhat.com>
-S: Maintained
+S: Supported
 F: monitor.c
 F: hmp.c
 F: hmp-commands.hx
-T: git git://repo.or.cz/qemu/qmp-unstable.git queue/qmp

 Network device layer
-M: Anthony Liguori <aliguori@amazon.com>
+M: Anthony Liguori <aliguori@us.ibm.com>
 M: Stefan Hajnoczi <stefanha@redhat.com>
 S: Maintained
 F: net/
 T: git git://github.com/stefanha/qemu.git net

-Netmap network backend
-M: Luigi Rizzo <rizzo@iet.unipi.it>
-M: Giuseppe Lettieri <g.lettieri@iet.unipi.it>
-M: Vincenzo Maffione <v.maffione@gmail.com>
-W: http://info.iet.unipi.it/~luigi/netmap/
-S: Maintained
-F: net/netmap.c
-
 Network Block Device (NBD)
 M: Paolo Bonzini <pbonzini@redhat.com>
 S: Odd Fixes
@@ -766,9 +728,8 @@ T: git git://github.com/bonzini/qemu.git nbd-next
 QAPI
 M: Luiz Capitulino <lcapitulino@redhat.com>
 M: Michael Roth <mdroth@linux.vnet.ibm.com>
-S: Maintained
+S: Supported
 F: qapi/
-T: git git://repo.or.cz/qemu/qmp-unstable.git queue/qmp

 QAPI Schema
 M: Eric Blake <eblake@redhat.com>
@@ -776,16 +737,14 @@ M: Luiz Capitulino <lcapitulino@redhat.com>
 M: Markus Armbruster <armbru@redhat.com>
 S: Supported
 F: qapi-schema.json
-T: git git://repo.or.cz/qemu/qmp-unstable.git queue/qmp

 QMP
 M: Luiz Capitulino <lcapitulino@redhat.com>
-S: Maintained
+S: Supported
 F: qmp.c
 F: monitor.c
 F: qmp-commands.hx
 F: QMP/
-T: git git://repo.or.cz/qemu/qmp-unstable.git queue/qmp

 SLIRP
 M: Jan Kiszka <jan.kiszka@siemens.com>
@@ -807,12 +766,6 @@ M: Blue Swirl <blauwirbel@gmail.com>
 S: Odd Fixes
 F: scripts/checkpatch.pl

-Seccomp
-M: Eduardo Otubo <otubo@linux.vnet.ibm.com>
-S: Supported
-F: qemu-seccomp.c
-F: include/sysemu/seccomp.h
-
 Usermode Emulation
 ------------------
 BSD user
@@ -844,6 +797,11 @@ M: Andrzej Zaborowski <balrogg@gmail.com>
 S: Maintained
 F: tcg/arm/

+HPPA target
+M: Richard Henderson <rth@twiddle.net>
+S: Maintained
+F: tcg/hppa/
+
 i386 target
 M: qemu-devel@nongnu.org
 S: Maintained
@@ -884,73 +842,25 @@ TCI target
 M: Stefan Weil <sw@weilnetz.de>
 S: Maintained
 F: tcg/tci/
-F: tci.c

 Stable branches
 ---------------
 Stable 1.0
 L: qemu-stable@nongnu.org
-T: git git://git.qemu-project.org/qemu-stable-1.0.git
+T: git git://git.qemu.org/qemu-stable-1.0.git
 S: Orphan

 Stable 0.15
 L: qemu-stable@nongnu.org
-M: Andreas Färber <afaerber@suse.de>
-T: git git://git.qemu-project.org/qemu-stable-0.15.git
-S: Supported
+T: git git://git.qemu.org/qemu-stable-0.15.git
+S: Orphan

 Stable 0.14
 L: qemu-stable@nongnu.org
-T: git git://git.qemu-project.org/qemu-stable-0.14.git
+T: git git://git.qemu.org/qemu-stable-0.14.git
 S: Orphan

 Stable 0.10
 L: qemu-stable@nongnu.org
-T: git git://git.qemu-project.org/qemu-stable-0.10.git
+T: git git://git.qemu.org/qemu-stable-0.10.git
 S: Orphan
-
-Block drivers
-------------
-VMDK
-M: Fam Zheng <famz@redhat.com>
-S: Supported
-F: block/vmdk.c
-
-RBD
-M: Josh Durgin <josh.durgin@inktank.com>
-S: Supported
-F: block/rbd.c
-
-Sheepdog
-M: MORITA Kazutaka <morita.kazutaka@lab.ntt.co.jp>
-M: Liu Yuan <namei.unix@gmail.com>
-L: sheepdog@lists.wpkg.org
-S: Supported
-F: block/sheepdog.c
-
-VHDX
-M: Jeff Cody <jcody@redhat.com>
-S: Supported
-F: block/vhdx*
-
-VDI
-M: Stefan Weil <sw@weilnetz.de>
-S: Maintained
-F: block/vdi.c
-
-iSCSI
-M: Ronnie Sahlberg <ronniesahlberg@gmail.com>
-M: Paolo Bonzini <pbonzini@redhat.com>
-M: Peter Lieven <pl@kamp.de>
-S: Supported
-F: block/iscsi.c
-
-NFS
-M: Peter Lieven <pl@kamp.de>
-S: Maintained
-F: block/nfs.c
-
-SSH
-M: Richard W.M. Jones <rjones@redhat.com>
-S: Supported
-F: block/ssh.c
--- a/70
+++ b/70
@@ -28,14 +28,7 @@ CONFIG_ALL=y
 include $(SRC_PATH)/rules.mak
 config-host.mak: $(SRC_PATH)/configure
 	@echo $@ is out-of-date, running configure
-	@# TODO: The next lines include code which supports a smooth
-	@# transition from old configurations without config.status.
-	@# This code can be removed after QEMU 1.7.
-	@if test -x config.status; then \
-	    ./config.status; \
-        else \
-	    sed -n "/.*Configured with/s/[^:]*: //p" $@ | sh; \
-	fi
+	@sed -n "/.*Configured with/s/[^:]*: //p" $@ | sh
 else
 config-host.mak:
 ifneq ($(filter-out %clean,$(MAKECMDGOALS)),$(if $(MAKECMDGOALS),,fail))
@@ -57,11 +50,6 @@ GENERATED_HEADERS += trace/generated-tracers-dtrace.h
 endif
 GENERATED_SOURCES += trace/generated-tracers.c

-ifeq ($(TRACE_BACKEND),ust)
-GENERATED_HEADERS += trace/generated-ust-provider.h
-GENERATED_SOURCES += trace/generated-ust.c
-endif
-
 # Don't try to regenerate Makefile or configure
 # We don't generate any of them
 Makefile: ;
@@ -77,7 +65,7 @@ LIBS+=-lz $(LIBS_TOOLS)
 HELPERS-$(CONFIG_LINUX) = qemu-bridge-helper$(EXESUF)

 ifdef BUILD_DOCS
-DOCS=qemu-doc.html qemu-tech.html qemu.1 qemu-img.1 qemu-nbd.8 qmp-commands.txt
+DOCS=qemu-doc.html qemu-tech.html qemu.1 qemu-img.1 qemu-nbd.8 QMP/qmp-commands.txt
 ifdef CONFIG_VIRTFS
 DOCS+=fsdev/virtfs-proxy-helper.1
 endif
@@ -127,30 +115,13 @@ defconfig:

 ifneq ($(wildcard config-host.mak),)
 include $(SRC_PATH)/Makefile.objs
-endif
-
-dummy := $(call unnest-vars,, \
-                stub-obj-y \
-                util-obj-y \
-                qga-obj-y \
-                qga-vss-dll-obj-y \
-                block-obj-y \
-                block-obj-m \
-                common-obj-y \
-                common-obj-m)
-
-ifneq ($(wildcard config-host.mak),)
 include $(SRC_PATH)/tests/Makefile
 endif
 ifeq ($(CONFIG_SMARTCARD_NSS),y)
 include $(SRC_PATH)/libcacard/Makefile
 endif

-all: $(DOCS) $(TOOLS) $(HELPERS-y) recurse-all modules
-
-vl.o: QEMU_CFLAGS+=$(GPROF_CFLAGS)
-
-vl.o: QEMU_CFLAGS+=$(SDL_CFLAGS)
+all: $(DOCS) $(TOOLS) $(HELPERS-y) recurse-all

 config-host.h: config-host.h-timestamp
 config-host.h-timestamp: config-host.mak
@@ -160,7 +131,6 @@ qemu-options.def: $(SRC_PATH)/qemu-options.hx
 SUBDIR_RULES=$(patsubst %,subdir-%, $(TARGET_DIRS))
 SOFTMMU_SUBDIR_RULES=$(filter %-softmmu,$(SUBDIR_RULES))

-$(SOFTMMU_SUBDIR_RULES): $(block-obj-y)
 $(SOFTMMU_SUBDIR_RULES): config-all-devices.mak

 subdir-%:
@@ -198,9 +168,7 @@ recurse-all: $(SUBDIR_RULES) $(ROMSUBDIR_RULES)
 bt-host.o: QEMU_CFLAGS += $(BLUEZ_CFLAGS)

 $(BUILD_DIR)/version.o: $(SRC_PATH)/version.rc $(BUILD_DIR)/config-host.h | $(BUILD_DIR)/version.lo
-	$(call quiet-command,$(WINDRES) -I$(BUILD_DIR) -o $@ $<,"  RC    version.o")
 $(BUILD_DIR)/version.lo: $(SRC_PATH)/version.rc $(BUILD_DIR)/config-host.h
-	$(call quiet-command,$(WINDRES) -I$(BUILD_DIR) -o $@ $<,"  RC    version.lo")

 Makefile: $(version-obj-y) $(version-lobj-y)

@@ -210,9 +178,6 @@ Makefile: $(version-obj-y) $(version-lobj-y)
 libqemustub.a: $(stub-obj-y)
 libqemuutil.a: $(util-obj-y) qapi-types.o qapi-visit.o

-block-modules = $(foreach o,$(block-obj-m),"$(basename $(subst /,-,$o))",) NULL
-util/module.o-cflags = -D'CONFIG_BLOCK_MODULES=$(block-modules)'
-
 ######################################################################

 qemu-img.o: qemu-img-cmds.h
@@ -266,10 +231,10 @@ clean:
 # avoid old build problems by removing potentially incorrect old files
 	rm -f config.mak op-i386.h opc-i386.h gen-op-i386.h op-arm.h opc-arm.h gen-op-arm.h
 	rm -f qemu-options.def
-	find . \( -name '*.l[oa]' -o -name '*.so' -o -name '*.dll' -o -name '*.mo' -o -name '*.[oda]' \) -type f -exec rm {} +
-	rm -f $(filter-out %.tlb,$(TOOLS)) $(HELPERS-y) qemu-ga TAGS cscope.* *.pod *~ */*~
-	rm -f fsdev/*.pod
-	rm -rf .libs */.libs
+	find . -name '*.[oda]' -type f -exec rm -f {} +
+	find . -name '*.l[oa]' -type f -exec rm -f {} +
+	rm -f $(TOOLS) $(HELPERS-y) qemu-ga TAGS cscope.* *.pod *~ */*~
+	rm -Rf .libs
 	rm -f qemu-img-cmds.h
 	@# May not be present in GENERATED_HEADERS
 	rm -f trace/generated-tracers-dtrace.dtrace*
@@ -278,6 +243,7 @@ clean:
 	rm -f $(foreach f,$(GENERATED_SOURCES),$(f) $(f)-timestamp)
 	rm -rf qapi-generated
 	rm -rf qga/qapi-generated
+	$(MAKE) -C tests/tcg clean
 	for d in $(ALL_SUBDIRS); do \
 	if test -d $$d; then $(MAKE) -C $$d $@ || exit 1; fi; \
 	rm -f $$d/qemu-options.def; \
@@ -293,7 +259,6 @@ qemu-%.tar.bz2:
 distclean: clean
 	rm -f config-host.mak config-host.h* config-host.ld $(DOCS) qemu-options.texi qemu-img-cmds.texi qemu-monitor.texi
 	rm -f config-all-devices.mak config-all-disas.mak
-	rm -f po/*.mo
 	rm -f roms/seabios/config.mak roms/vgabios/config.mak
 	rm -f qemu-doc.info qemu-doc.aux qemu-doc.cp qemu-doc.cps qemu-doc.dvi
 	rm -f qemu-doc.fn qemu-doc.fns qemu-doc.info qemu-doc.ky qemu-doc.kys
@@ -305,20 +270,19 @@ distclean: clean
 	for d in $(TARGET_DIRS); do \
 	rm -rf $$d || exit 1 ; \
        done
-	rm -Rf .sdk
 	if test -f pixman/config.log; then make -C pixman distclean; fi
 	if test -f dtc/version_gen.h; then make $(DTC_MAKE_ARGS) clean; fi

 KEYMAPS=da     en-gb  et  fr     fr-ch  is  lt  modifiers  no  pt-br  sv \
 ar      de     en-us  fi  fr-be  hr     it  lv  nl         pl  ru     th \
 common  de-ch  es     fo  fr-ca  hu     ja  mk  nl-be      pt  sl     tr \
-bepo    cz
+bepo

 ifdef INSTALL_BLOBS
-BLOBS=bios.bin bios-256k.bin sgabios.bin vgabios.bin vgabios-cirrus.bin \
+BLOBS=bios.bin sgabios.bin vgabios.bin vgabios-cirrus.bin \
 vgabios-stdvga.bin vgabios-vmware.bin vgabios-qxl.bin \
 acpi-dsdt.aml q35-acpi-dsdt.aml \
-ppc_rom.bin openbios-sparc32 openbios-sparc64 openbios-ppc QEMU,tcx.bin QEMU,cgthree.bin \
+ppc_rom.bin openbios-sparc32 openbios-sparc64 openbios-ppc \
 pxe-e1000.rom pxe-eepro100.rom pxe-ne2k_pci.rom \
 pxe-pcnet.rom pxe-rtl8139.rom pxe-virtio.rom \
 efi-e1000.rom efi-eepro100.rom efi-ne2k_pci.rom \
@@ -337,7 +301,7 @@ endif
 install-doc: $(DOCS)
 	$(INSTALL_DIR) "$(DESTDIR)$(qemu_docdir)"
 	$(INSTALL_DATA) qemu-doc.html  qemu-tech.html "$(DESTDIR)$(qemu_docdir)"
-	$(INSTALL_DATA) qmp-commands.txt "$(DESTDIR)$(qemu_docdir)"
+	$(INSTALL_DATA) QMP/qmp-commands.txt "$(DESTDIR)$(qemu_docdir)"
 ifdef CONFIG_POSIX
 	$(INSTALL_DIR) "$(DESTDIR)$(mandir)/man1"
 	$(INSTALL_DATA) qemu.1 "$(DESTDIR)$(mandir)/man1"
@@ -374,12 +338,6 @@ install-datadir install-localstatedir
 ifneq ($(TOOLS),)
 	$(INSTALL_PROG) $(STRIP_OPT) $(TOOLS) "$(DESTDIR)$(bindir)"
 endif
-ifneq ($(CONFIG_MODULES),)
-	$(INSTALL_DIR) "$(DESTDIR)$(qemu_moddir)"
-	for s in $(patsubst %.mo,%$(DSOSUF),$(modules-m)); do \
-		$(INSTALL_PROG) $(STRIP_OPT) $$s "$(DESTDIR)$(qemu_moddir)/$$(echo $$s | tr / -)"; \
-	done
-endif
 ifneq ($(HELPERS-y),)
 	$(INSTALL_DIR) "$(DESTDIR)$(libexecdir)"
 	$(INSTALL_PROG) $(STRIP_OPT) $(HELPERS-y) "$(DESTDIR)$(libexecdir)"
@@ -397,7 +355,7 @@ endif
 		$(INSTALL_DATA) $(SRC_PATH)/pc-bios/keymaps/$$x "$(DESTDIR)$(qemu_datadir)/keymaps"; \
 	done
 	for d in $(TARGET_DIRS); do \
-	$(MAKE) $(SUBDIR_MAKEFLAGS) TARGET_DIR=$$d/ -C $$d $@ || exit 1 ; \
+	$(MAKE) -C $$d $@ || exit 1 ; \
        done

 # various test targets
@@ -437,7 +395,7 @@ qemu-options.texi: $(SRC_PATH)/qemu-options.hx
 qemu-monitor.texi: $(SRC_PATH)/hmp-commands.hx
 	$(call quiet-command,sh $(SRC_PATH)/scripts/hxtool -t < $< > $@,"  GEN   $@")

-qmp-commands.txt: $(SRC_PATH)/qmp-commands.hx
+QMP/qmp-commands.txt: $(SRC_PATH)/qmp-commands.hx
 	$(call quiet-command,sh $(SRC_PATH)/scripts/hxtool -q < $< > $@,"  GEN   $@")

 qemu-img-cmds.texi: $(SRC_PATH)/qemu-img-cmds.hx
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -19,8 +19,11 @@ block-obj-y += qemu-coroutine.o qemu-coroutine-lock.o qemu-coroutine-io.o
 block-obj-y += qemu-coroutine-sleep.o
 block-obj-y += coroutine-$(CONFIG_COROUTINE_BACKEND).o

-block-obj-m = block/
-
+ifeq ($(CONFIG_VIRTIO)$(CONFIG_VIRTFS)$(CONFIG_PCI),yyy)
+# Lots of the fsdev/9pcode is pulled in by vl.c via qemu_fsdev_add.
+# only pull in the actual virtio-9p device if we also enabled virtio.
+CONFIG_REALLY_VIRTFS=y
+endif

 ######################################################################
 # smartcard
@@ -38,9 +41,9 @@ libcacard-y += libcacard/vcardt.o
 # single QEMU executable should support all CPUs and machines.

 ifeq ($(CONFIG_SOFTMMU),y)
-common-obj-y = blockdev.o blockdev-nbd.o block/
-common-obj-y += iothread.o
+common-obj-y = $(block-obj-y) blockdev.o blockdev-nbd.o block/
 common-obj-y += net/
+common-obj-y += readline.o
 common-obj-y += qdev-monitor.o device-hotplug.o
 common-obj-$(CONFIG_WIN32) += os-win32.o
 common-obj-$(CONFIG_POSIX) += os-posix.o
@@ -48,8 +51,6 @@ common-obj-$(CONFIG_POSIX) += os-posix.o
 common-obj-$(CONFIG_LINUX) += fsdev/

 common-obj-y += migration.o migration-tcp.o
-common-obj-y += vmstate.o
-common-obj-y += qemu-file.o
 common-obj-$(CONFIG_RDMA) += migration-rdma.o
 common-obj-y += qemu-char.o #aio.o
 common-obj-y += block-migration.o
@@ -108,4 +109,17 @@ version-lobj-$(CONFIG_WIN32) += $(BUILD_DIR)/version.lo
 # FIXME: a few definitions from qapi-types.o/qapi-visit.o are needed
 # by libqemuutil.a.  These should be moved to a separate .json schema.
 qga-obj-y = qga/ qapi-types.o qapi-visit.o
-qga-vss-dll-obj-y = qga/
+
+vl.o: QEMU_CFLAGS+=$(GPROF_CFLAGS)
+
+vl.o: QEMU_CFLAGS+=$(SDL_CFLAGS)
+
+QEMU_CFLAGS+=$(GLIB_CFLAGS)
+
+nested-vars += \
+	stub-obj-y \
+	util-obj-y \
+	qga-obj-y \
+	block-obj-y \
+	common-obj-y
+dummy := $(call unnest-vars)
--- a/Makefile.target
+++ b/Makefile.target
@@ -70,6 +70,10 @@ all: $(PROGS) stap
 # Dummy command so that make thinks it has done something
 	@true

+CONFIG_NO_PCI = $(if $(subst n,,$(CONFIG_PCI)),n,y)
+CONFIG_NO_KVM = $(if $(subst n,,$(CONFIG_KVM)),n,y)
+CONFIG_NO_XEN = $(if $(subst n,,$(CONFIG_XEN)),n,y)
+
 #########################################################
 # cpu emulator library
 obj-y = exec.o translate-all.o cpu-exec.o
@@ -79,8 +83,8 @@ obj-$(CONFIG_TCG_INTERPRETER) += disas/tci.o
 obj-y += fpu/softfloat.o
 obj-y += target-$(TARGET_BASE_ARCH)/
 obj-y += disas.o
-obj-$(call notempty,$(TARGET_XML_FILES)) += gdbstub-xml.o
-obj-$(call lnot,$(CONFIG_KVM)) += kvm-stub.o
+obj-$(CONFIG_GDBSTUB_XML) += gdbstub-xml.o
+obj-$(CONFIG_NO_KVM) += kvm-stub.o

 #########################################################
 # Linux user emulator target
@@ -121,7 +125,7 @@ LIBS+=$(libs_softmmu)

 # xen support
 obj-$(CONFIG_XEN) += xen-all.o xen-mapcache.o
-obj-$(call lnot,$(CONFIG_XEN)) += xen-stub.o
+obj-$(CONFIG_NO_XEN) += xen-stub.o

 # Hardware support
 ifeq ($(TARGET_NAME), sparc64)
@@ -130,6 +134,8 @@ else
 obj-y += hw/$(TARGET_BASE_ARCH)/
 endif

+main.o: QEMU_CFLAGS+=$(GPROF_CFLAGS)
+
 GENERATED_HEADERS += hmp-commands.h qmp-commands-old.h

 endif # CONFIG_SOFTMMU
@@ -137,26 +143,13 @@ endif # CONFIG_SOFTMMU
 # Workaround for http://gcc.gnu.org/PR55489, see configure.
 %/translate.o: QEMU_CFLAGS += $(TRANSLATE_OPT_CFLAGS)

-dummy := $(call unnest-vars,,obj-y)
+nested-vars += obj-y

-# we are making another call to unnest-vars with different vars, protect obj-y,
-# it can be overriden in subdir Makefile.objs
-obj-y-save := $(obj-y)
-
-block-obj-y :=
-common-obj-y :=
+# This resolves all nested paths, so it must come last
 include $(SRC_PATH)/Makefile.objs
-dummy := $(call unnest-vars,.., \
-               block-obj-y \
-               block-obj-m \
-               common-obj-y \
-               common-obj-m)

-# Now restore obj-y
-obj-y := $(obj-y-save)
-
-all-obj-y = $(obj-y) $(common-obj-y)
-all-obj-$(CONFIG_SOFTMMU) += $(block-obj-y)
+all-obj-y = $(obj-y)
+all-obj-y += $(addprefix ../, $(common-obj-y))

 ifndef CONFIG_HAIKU
 LIBS+=-lm
--- a/QMP/README
+++ b/QMP/README
@@ -0,0 +1,88 @@
+                          QEMU Monitor Protocol
+                          =====================
+
+Introduction
+-------------
+
+The QEMU Monitor Protocol (QMP) allows applications to communicate with
+QEMU's Monitor.
+
+QMP is JSON[1] based and currently has the following features:
+
+- Lightweight, text-based, easy to parse data format
+- Asynchronous messages support (ie. events)
+- Capabilities Negotiation
+
+For detailed information on QMP's usage, please, refer to the following files:
+
+o qmp-spec.txt      QEMU Monitor Protocol current specification
+o qmp-commands.txt  QMP supported commands (auto-generated at build-time)
+o qmp-events.txt    List of available asynchronous events
+
+There is also a simple Python script called 'qmp-shell' available.
+
+IMPORTANT: It's strongly recommended to read the 'Stability Considerations'
+section in the qmp-commands.txt file before making any serious use of QMP.
+
+
+[1] http://www.json.org
+
+Usage
+-----
+
+To enable QMP, you need a QEMU monitor instance in "control mode". There are
+two ways of doing this.
+
+The simplest one is using the '-qmp' command-line option. The following
+example makes QMP available on localhost port 4444:
+
+  $ qemu [...] -qmp tcp:localhost:4444,server
+
+However, in order to have more complex combinations, like multiple monitors,
+the '-mon' command-line option should be used along with the '-chardev' one.
+For instance, the following example creates one user monitor on stdio and one
+QMP monitor on localhost port 4444.
+
+   $ qemu [...] -chardev stdio,id=mon0 -mon chardev=mon0,mode=readline \
+                -chardev socket,id=mon1,host=localhost,port=4444,server \
+                -mon chardev=mon1,mode=control
+
+Please, refer to QEMU's manpage for more information.
+
+Simple Testing
+--------------
+
+To manually test QMP one can connect with telnet and issue commands by hand:
+
+$ telnet localhost 4444
+Trying 127.0.0.1...
+Connected to localhost.
+Escape character is '^]'.
+{"QMP": {"version": {"qemu": {"micro": 50, "minor": 13, "major": 0}, "package": ""}, "capabilities": []}}
+{ "execute": "qmp_capabilities" }
+{"return": {}}
+{ "execute": "query-version" }
+{"return": {"qemu": {"micro": 50, "minor": 13, "major": 0}, "package": ""}}
+
+Development Process
+-------------------
+
+When changing QMP's interface (by adding new commands, events or modifying
+existing ones) it's mandatory to update the relevant documentation, which is
+one (or more) of the files listed in the 'Introduction' section*.
+
+Also, it's strongly recommended to send the documentation patch first, before
+doing any code change. This is so because:
+
+  1. Avoids the code dictating the interface
+
+  2. Review can improve your interface.  Letting that happen before
+     you implement it can save you work.
+
+* The qmp-commands.txt file is generated from the qmp-commands.hx one, which
+  is the file that should be edited.
+
+Homepage
+--------
+
+http://wiki.qemu.org/QMP
--- a/scripts/qmp/qemu-ga-client
+++ b/scripts/qmp/qemu-ga-client
@@ -33,7 +33,7 @@
 # $ qemu-ga-client fsfreeze freeze
 # 2 filesystems frozen
 #
-# See also: http://wiki.qemu-project.org/Features/QAPI/GuestAgent
+# See also: http://wiki.qemu.org/Features/QAPI/GuestAgent
 #

 import base64
@@ -267,9 +267,7 @@ def main(address, cmd, args):
            print('Hint: qemu is not running?')
        sys.exit(1)

-    if cmd == 'fsfreeze' and args[0] == 'freeze':
-        client.sync(60)
-    elif cmd != 'ping':
+    if cmd != 'ping':
        client.sync()

    globals()['_cmd_' + cmd](client, args)
--- a/scripts/qmp/qmp
+++ b/scripts/qmp/qmp
--- a/docs/qmp/qmp-events.txt
+++ b/docs/qmp/qmp-events.txt
@@ -1,4 +1,4 @@
-                   QEMU Machine Protocol Events
+                   QEMU Monitor Protocol Events
                   ============================

 BALLOON_CHANGE
@@ -18,28 +18,6 @@ Example:
    "data": { "actual": 944766976 },
    "timestamp": { "seconds": 1267020223, "microseconds": 435656 } }

-BLOCK_IMAGE_CORRUPTED
---------------------
-
-Emitted when a disk image is being marked corrupt.
-
-Data:
-
- "device": Device name (json-string)
- "msg":    Informative message (e.g., reason for the corruption) (json-string)
- "offset": If the corruption resulted from an image access, this is the access
-            offset into the image (json-int)
- "size":   If the corruption resulted from an image access, this is the access
-            size (json-int)
-
-Example:
-
-{ "event": "BLOCK_IMAGE_CORRUPTED",
-    "data": { "device": "ide0-hd0",
-        "msg": "Prevented active L1 table overwrite", "offset": 196608,
-        "size": 65536 },
-    "timestamp": { "seconds": 1378126126, "microseconds": 966463 } }
-
 BLOCK_IO_ERROR
 --------------

@@ -159,7 +137,7 @@ Note: The "ready to complete" status is always reset by a BLOCK_JOB_ERROR
 event.

 DEVICE_DELETED
--------------
+-----------------

 Emitted whenever the device removal completion is acknowledged
 by the guest.
@@ -194,22 +172,8 @@ Data:
  },
  "timestamp": { "seconds": 1265044230, "microseconds": 450486 } }

-GUEST_PANICKED
--------------
-
-Emitted when guest OS panic is detected.
-
-Data:
-
- "action": Action that has been taken (json-string, currently always "pause").
-
-Example:
-
-{ "event": "GUEST_PANICKED",
-     "data": { "action": "pause" } }
-
 NIC_RX_FILTER_CHANGED
---------------------
+-----------------

 The event is emitted once until the query command is executed,
 the first event will always be emitted.
@@ -225,45 +189,6 @@ Data:
  "timestamp": { "seconds": 1368697518, "microseconds": 326866 } }
 }

-QUORUM_FAILURE
--------------
-
-Emitted by the Quorum block driver if it fails to establish a quorum.
-
-Data:
-
- "reference":    device name if defined else node name.
- "sector-num":   Number of the first sector of the failed read operation.
- "sector-count": Failed read operation sector count.
-
-Example:
-
-{ "event": "QUORUM_FAILURE",
-     "data": { "reference": "usr1", "sector-num": 345435, "sector-count": 5 },
-     "timestamp": { "seconds": 1344522075, "microseconds": 745528 } }
-
-QUORUM_REPORT_BAD
-----------------
-
-Emitted to report a corruption of a Quorum file.
-
-Data:
-
- "error":        Error message (json-string, optional)
-                  Only present on failure.  This field contains a human-readable
-                  error message.  There are no semantics other than that the
-                  block layer reported an error and clients should not try to
-                  interpret the error string.
- "node-name":    The graph node name of the block driver state.
- "sector-num":   Number of the first sector of the failed read operation.
- "sector-count": Failed read operation sector count.
-
-Example:
-
-{ "event": "QUORUM_REPORT_BAD",
-     "data": { "node-name": "1.raw", "sector-num": 345435, "sector-count": 5 },
-     "timestamp": { "seconds": 1344522075, "microseconds": 745528 } }
-
 RESET
 -----

@@ -518,7 +443,7 @@ Data: None.

 Example:

-{ "event": "WAKEUP",
+{ "event": "WATCHDOG",
     "timestamp": { "seconds": 1344522075, "microseconds": 745528 } }

 WATCHDOG
@@ -539,3 +464,17 @@ Example:

 Note: If action is "reset", "shutdown", or "pause" the WATCHDOG event is
 followed respectively by the RESET, SHUTDOWN, or STOP events.
+
+GUEST_PANICKED
+--------------
+
+Emitted when guest OS panic is detected.
+
+Data:
+
+- "action": Action that has been taken (json-string, currently always "pause").
+
+Example:
+
+{ "event": "GUEST_PANICKED",
+     "data": { "action": "pause" } }
--- a/scripts/qmp/qmp-shell
+++ b/scripts/qmp/qmp-shell
@@ -31,7 +31,6 @@
 # (QEMU)

 import qmp
-import json
 import readline
 import sys
 import pprint
@@ -92,7 +91,7 @@ class QMPShell(qmp.QEMUMonitorProtocol):
        """
        Build a QMP input object from a user provided command-line in the
        following format:
-
+    
            < command-name > [ arg-name1=arg1 ] ... [ arg-nameN=argN ]
        """
        cmdargs = cmdline.split()
@@ -108,33 +107,15 @@ class QMPShell(qmp.QEMUMonitorProtocol):
                    value = True
                elif opt[1] == 'false':
                    value = False
-                elif opt[1].startswith('{'):
-                    value = json.loads(opt[1])
                else:
                    value = opt[1]
-            optpath = opt[0].split('.')
-            parent = qmpcmd['arguments']
-            curpath = []
-            for p in optpath[:-1]:
-                curpath.append(p)
-                d = parent.get(p, {})
-                if type(d) is not dict:
-                    raise QMPShellError('Cannot use "%s" as both leaf and non-leaf key' % '.'.join(curpath))
-                parent[p] = d
-                parent = d
-            if optpath[-1] in parent:
-                if type(parent[optpath[-1]]) is dict:
-                    raise QMPShellError('Cannot use "%s" as both leaf and non-leaf key' % '.'.join(curpath))
-                else:
-                    raise QMPShellError('Cannot set "%s" multiple times' % opt[0])
-            parent[optpath[-1]] = value
+            qmpcmd['arguments'][opt[0]] = value
        return qmpcmd

    def _execute_cmd(self, cmdline):
        try:
            qmpcmd = self.__build_cmd(cmdline)
-        except Exception, e:
-            print 'Error while parsing command line: %s' % e
+        except:
            print 'command format: <command-name> ',
            print '[arg-name1=arg1] ... [arg-nameN=argN]'
            return True
--- a/docs/qmp/qmp-spec.txt
+++ b/docs/qmp/qmp-spec.txt
@@ -1,17 +1,21 @@
-                      QEMU Machine Protocol Specification
+           QEMU Monitor Protocol Specification - Version 0.1

 1. Introduction
 ===============

-This document specifies the QEMU Machine Protocol (QMP), a JSON-based protocol
-which is available for applications to operate QEMU at the machine-level.
+This document specifies the QEMU Monitor Protocol (QMP), a JSON-based protocol
+which is available for applications to control QEMU at the machine-level.
+
+To enable QMP support, QEMU has to be run in "control mode". This is done by
+starting QEMU with the appropriate command-line options. Please, refer to the
+QEMU manual page for more information.

 2. Protocol Specification
 =========================

 This section details the protocol format. For the purpose of this document
-"Client" is any application which is using QMP to communicate with QEMU and
-"Server" is QEMU itself.
+"Client" is any application which is communicating with QEMU in control mode,
+and "Server" is QEMU itself.

 JSON data structures, when mentioned in this document, are always in the
 following format:
@@ -43,14 +47,14 @@ that the connection has been successfully established and that the Server is
 ready for capabilities negotiation (for more information refer to section
 '4. Capabilities Negotiation').

-The greeting message format is:
+The format is:

 { "QMP": { "version": json-object, "capabilities": json-array } }

 Where,

 - The "version" member contains the Server's version information (the format
-  is the same of the query-version command)
+  is the same of the 'query-version' command)
 - The "capabilities" member specify the availability of features beyond the
  baseline specification

@@ -79,7 +83,10 @@ of a command execution: success or error.
 2.4.1 success
 -------------

-The format of a success response is:
+The success response is issued when the command execution has finished
+without errors.
+
+The format is:

 { "return": json-object, "id": json-value }

@@ -89,12 +96,15 @@ The format of a success response is:
  in a per-command basis or an empty json-object if the command does not
  return data
 - The "id" member contains the transaction identification associated
-  with the command execution if issued by the Client
+  with the command execution (if issued by the Client)

 2.4.2 error
 -----------

-The format of an error response is:
+The error response is issued when the command execution could not be
+completed because of an error condition.
+
+The format is:

 { "error": { "class": json-string, "desc": json-string }, "id": json-value }

@@ -104,7 +114,7 @@ The format of an error response is:
 - The "desc" member is a human-readable error message. Clients should
  not attempt to parse this message.
 - The "id" member contains the transaction identification associated with
-  the command execution if issued by the Client
+  the command execution (if issued by the Client)

 NOTE: Some errors can occur before the Server is able to read the "id" member,
 in these cases the "id" member will not be part of the error response, even
@@ -114,9 +124,9 @@ if provided by the client.
 -----------------------

 As a result of state changes, the Server may send messages unilaterally
-to the Client at any time. They are called "asynchronous events".
+to the Client at any time. They are called 'asynchronous events'.

-The format of asynchronous events is:
+The format is:

 { "event": json-string, "data": json-object,
  "timestamp": { "seconds": json-number, "microseconds": json-number } }
@@ -137,37 +147,36 @@ qmp-events.txt file.
 ===============

 This section provides some examples of real QMP usage, in all of them
-"C" stands for "Client" and "S" stands for "Server".
+'C' stands for 'Client' and 'S' stands for 'Server'.

 3.1 Server greeting
 -------------------

-S: { "QMP": { "version": { "qemu": { "micro": 50, "minor": 6, "major": 1 },
-     "package": ""}, "capabilities": []}}
+S: {"QMP": {"version": {"qemu": "0.12.50", "package": ""}, "capabilities": []}}

 3.2 Simple 'stop' execution
 ---------------------------

 C: { "execute": "stop" }
-S: { "return": {} }
+S: {"return": {}}

 3.3 KVM information
 -------------------

 C: { "execute": "query-kvm", "id": "example" }
-S: { "return": { "enabled": true, "present": true }, "id": "example"}
+S: {"return": {"enabled": true, "present": true}, "id": "example"}

 3.4 Parsing error
 ------------------

 C: { "execute": }
-S: { "error": { "class": "GenericError", "desc": "Invalid JSON syntax" } }
+S: {"error": {"class": "GenericError", "desc": "Invalid JSON syntax" } }

 3.5 Powerdown event
 -------------------

-S: { "timestamp": { "seconds": 1258551470, "microseconds": 802384 },
-    "event": "POWERDOWN" }
+S: {"timestamp": {"seconds": 1258551470, "microseconds": 802384}, "event":
+"POWERDOWN"}

 4. Capabilities Negotiation
 ----------------------------
@@ -175,17 +184,17 @@ S: { "timestamp": { "seconds": 1258551470, "microseconds": 802384 },
 When a Client successfully establishes a connection, the Server is in
 Capabilities Negotiation mode.

-In this mode only the qmp_capabilities command is allowed to run, all
-other commands will return the CommandNotFound error. Asynchronous
-messages are not delivered either.
+In this mode only the 'qmp_capabilities' command is allowed to run, all
+other commands will return the CommandNotFound error. Asynchronous messages
+are not delivered either.

-Clients should use the qmp_capabilities command to enable capabilities
+Clients should use the 'qmp_capabilities' command to enable capabilities
 advertised in the Server's greeting (section '2.2 Server Greeting') they
 support.

-When the qmp_capabilities command is issued, and if it does not return an
+When the 'qmp_capabilities' command is issued, and if it does not return an
 error, the Server enters in Command mode where capabilities changes take
-effect, all commands (except qmp_capabilities) are allowed and asynchronous
+effect, all commands (except 'qmp_capabilities') are allowed and asynchronous
 messages are delivered.

 5 Compatibility Considerations
@@ -236,7 +245,7 @@ arguments, errors, asynchronous events, and so forth.

 Any new names downstream wishes to add must begin with '__'.  To
 ensure compatibility with other downstreams, it is strongly
-recommended that you prefix your downstream names with '__RFQDN_' where
+recommended that you prefix your downstram names with '__RFQDN_' where
 RFQDN is a valid, reverse fully qualified domain name which you
 control.  For example, a qemu-kvm specific monitor command would be:

--- a/scripts/qmp/qmp.py
+++ b/scripts/qmp/qmp.py
@@ -1,5 +1,5 @@
 # QEMU Monitor Protocol Python class
-#
+# 
 # Copyright (C) 2009, 2010 Red Hat Inc.
 #
 # Authors:
@@ -171,12 +171,7 @@ class QEMUMonitorProtocol:
                pass
        self.__sock.setblocking(1)
        if not self.__events and wait:
-            ret = self.__json_read(only_event=True)
-            if ret == None:
-                # We are in blocking mode, if don't get anything, something
-                # went wrong
-                raise QMPConnectError("Error while reading from socket")
-
+            self.__json_read(only_event=True)
        return self.__events

    def clear_events(self):
@@ -193,9 +188,3 @@ class QEMUMonitorProtocol:

    def settimeout(self, timeout):
        self.__sock.settimeout(timeout)
-
-    def get_sock_fd(self):
-        return self.__sock.fileno()
-
-    def is_scm_available(self):
-        return self.__sock.family == socket.AF_UNIX
--- a/scripts/qmp/qom-fuse
+++ b/scripts/qmp/qom-fuse
--- a/scripts/qmp/qom-get
+++ b/scripts/qmp/qom-get
--- a/scripts/qmp/qom-list
+++ b/scripts/qmp/qom-list
--- a/scripts/qmp/qom-set
+++ b/scripts/qmp/qom-set
--- a/2
+++ b/2
@@ -1,3 +1,3 @@
-Read the documentation in qemu-doc.html or on http://wiki.qemu-project.org
+Read the documentation in qemu-doc.html or on http://wiki.qemu.org

 - QEMU team
--- a/2
+++ b/2
@@ -1 +1 @@
-1.7.92
+1.6.2
--- a/aio-posix.c
+++ b/aio-posix.c
@@ -23,6 +23,7 @@ struct AioHandler
    GPollFD pfd;
    IOHandler *io_read;
    IOHandler *io_write;
+    AioFlushHandler *io_flush;
    int deleted;
    int pollfds_idx;
    void *opaque;
@@ -46,6 +47,7 @@ void aio_set_fd_handler(AioContext *ctx,
                        int fd,
                        IOHandler *io_read,
                        IOHandler *io_write,
+                        AioFlushHandler *io_flush,
                        void *opaque)
 {
    AioHandler *node;
@@ -82,6 +84,7 @@ void aio_set_fd_handler(AioContext *ctx,
        /* Update handler with latest information */
        node->io_read = io_read;
        node->io_write = io_write;
+        node->io_flush = io_flush;
        node->opaque = opaque;
        node->pollfds_idx = -1;

@@ -94,10 +97,12 @@ void aio_set_fd_handler(AioContext *ctx,

 void aio_set_event_notifier(AioContext *ctx,
                            EventNotifier *notifier,
-                            EventNotifierHandler *io_read)
+                            EventNotifierHandler *io_read,
+                            AioFlushEventNotifierHandler *io_flush)
 {
    aio_set_fd_handler(ctx, event_notifier_get_fd(notifier),
-                       (IOHandler *)io_read, NULL, notifier);
+                       (IOHandler *)io_read, NULL,
+                       (AioFlushHandler *)io_flush, notifier);
 }

 bool aio_pending(AioContext *ctx)
@@ -142,11 +147,7 @@ static bool aio_dispatch(AioContext *ctx)
            (revents & (G_IO_IN | G_IO_HUP | G_IO_ERR)) &&
            node->io_read) {
            node->io_read(node->opaque);
-
-            /* aio_notify() does not count as progress */
-            if (node->opaque != &ctx->notifier) {
-                progress = true;
-            }
+            progress = true;
        }
        if (!node->deleted &&
            (revents & (G_IO_OUT | G_IO_ERR)) &&
@@ -165,10 +166,6 @@ static bool aio_dispatch(AioContext *ctx)
            g_free(tmp);
        }
    }
-
-    /* Run our timers */
-    progress |= timerlistgroup_run_timers(&ctx->tlg);
-
    return progress;
 }

@@ -176,7 +173,7 @@ bool aio_poll(AioContext *ctx, bool blocking)
 {
    AioHandler *node;
    int ret;
-    bool progress;
+    bool busy, progress;

    progress = false;

@@ -203,8 +200,20 @@ bool aio_poll(AioContext *ctx, bool blocking)
    g_array_set_size(ctx->pollfds, 0);

    /* fill pollfds */
+    busy = false;
    QLIST_FOREACH(node, &ctx->aio_handlers, node) {
        node->pollfds_idx = -1;
+
+        /* If there aren't pending AIO operations, don't invoke callbacks.
+         * Otherwise, if there are no AIO requests, qemu_aio_wait() would
+         * wait indefinitely.
+         */
+        if (!node->deleted && node->io_flush) {
+            if (node->io_flush(node->opaque) == 0) {
+                continue;
+            }
+            busy = true;
+        }
        if (!node->deleted && node->pfd.events) {
            GPollFD pfd = {
                .fd = node->pfd.fd,
@@ -217,10 +226,15 @@ bool aio_poll(AioContext *ctx, bool blocking)

    ctx->walking_handlers--;

+    /* No AIO operations?  Get us out of here */
+    if (!busy) {
+        return progress;
+    }
+
    /* wait until next event */
-    ret = qemu_poll_ns((GPollFD *)ctx->pollfds->data,
-                         ctx->pollfds->len,
-                         blocking ? timerlistgroup_deadline_ns(&ctx->tlg) : 0);
+    ret = g_poll((GPollFD *)ctx->pollfds->data,
+                 ctx->pollfds->len,
+                 blocking ? -1 : 0);

    /* if we have any readable fds, dispatch event */
    if (ret > 0) {
@@ -231,12 +245,11 @@ bool aio_poll(AioContext *ctx, bool blocking)
                node->pfd.revents = pfd->revents;
            }
        }
+        if (aio_dispatch(ctx)) {
+            progress = true;
+        }
    }

-    /* Run dispatch even if there were no readable fds to run timers */
-    if (aio_dispatch(ctx)) {
-        progress = true;
-    }
-
-    return progress;
+    assert(progress || busy);
+    return true;
 }
--- a/aio-win32.c
+++ b/aio-win32.c
@@ -23,6 +23,7 @@
 struct AioHandler {
    EventNotifier *e;
    EventNotifierHandler *io_notify;
+    AioFlushEventNotifierHandler *io_flush;
    GPollFD pfd;
    int deleted;
    QLIST_ENTRY(AioHandler) node;
@@ -30,7 +31,8 @@ struct AioHandler {

 void aio_set_event_notifier(AioContext *ctx,
                            EventNotifier *e,
-                            EventNotifierHandler *io_notify)
+                            EventNotifierHandler *io_notify,
+                            AioFlushEventNotifierHandler *io_flush)
 {
    AioHandler *node;

@@ -71,6 +73,7 @@ void aio_set_event_notifier(AioContext *ctx,
        }
        /* Update handler with latest information */
        node->io_notify = io_notify;
+        node->io_flush = io_flush;
    }

    aio_notify(ctx);
@@ -93,9 +96,8 @@ bool aio_poll(AioContext *ctx, bool blocking)
 {
    AioHandler *node;
    HANDLE events[MAXIMUM_WAIT_OBJECTS + 1];
-    bool progress;
+    bool busy, progress;
    int count;
-    int timeout;

    progress = false;

@@ -109,9 +111,6 @@ bool aio_poll(AioContext *ctx, bool blocking)
        progress = true;
    }

-    /* Run timers */
-    progress |= timerlistgroup_run_timers(&ctx->tlg);
-
    /*
     * Then dispatch any pending callbacks from the GSource.
     *
@@ -127,11 +126,7 @@ bool aio_poll(AioContext *ctx, bool blocking)
        if (node->pfd.revents && node->io_notify) {
            node->pfd.revents = 0;
            node->io_notify(node->e);
-
-            /* aio_notify() does not count as progress */
-            if (node->e != &ctx->notifier) {
-                progress = true;
-            }
+            progress = true;
        }

        tmp = node;
@@ -152,8 +147,19 @@ bool aio_poll(AioContext *ctx, bool blocking)
    ctx->walking_handlers++;

    /* fill fd sets */
+    busy = false;
    count = 0;
    QLIST_FOREACH(node, &ctx->aio_handlers, node) {
+        /* If there aren't pending AIO operations, don't invoke callbacks.
+         * Otherwise, if there are no AIO requests, qemu_aio_wait() would
+         * wait indefinitely.
+         */
+        if (!node->deleted && node->io_flush) {
+            if (node->io_flush(node->e) == 0) {
+                continue;
+            }
+            busy = true;
+        }
        if (!node->deleted && node->io_notify) {
            events[count++] = event_notifier_get_handle(node->e);
        }
@@ -161,13 +167,15 @@ bool aio_poll(AioContext *ctx, bool blocking)

    ctx->walking_handlers--;

+    /* No AIO operations?  Get us out of here */
+    if (!busy) {
+        return progress;
+    }
+
    /* wait until next event */
    while (count > 0) {
-        int ret;
-
-        timeout = blocking ?
-            qemu_timeout_ns_to_ms(timerlistgroup_deadline_ns(&ctx->tlg)) : 0;
-        ret = WaitForMultipleObjects(count, events, FALSE, timeout);
+        int timeout = blocking ? INFINITE : 0;
+        int ret = WaitForMultipleObjects(count, events, FALSE, timeout);

        /* if we have any signaled events, dispatch event */
        if ((DWORD) (ret - WAIT_OBJECT_0) >= count) {
@@ -188,11 +196,7 @@ bool aio_poll(AioContext *ctx, bool blocking)
                event_notifier_get_handle(node->e) == events[ret - WAIT_OBJECT_0] &&
                node->io_notify) {
                node->io_notify(node->e);
-
-                /* aio_notify() does not count as progress */
-                if (node->e != &ctx->notifier) {
-                    progress = true;
-                }
+                progress = true;
            }

            tmp = node;
@@ -210,14 +214,6 @@ bool aio_poll(AioContext *ctx, bool blocking)
        events[ret - WAIT_OBJECT_0] = events[--count];
    }

-    if (blocking) {
-        /* Run the timers a second time. We do this because otherwise aio_wait
-         * will not note progress - and will stop a drain early - if we have
-         * a timer that was not ready to run entering g_poll but is ready
-         * after g_poll. This will only do anything if a timer has expired.
-         */
-        progress |= timerlistgroup_run_timers(&ctx->tlg);
-    }
-
-    return progress;
+    assert(progress || busy);
+    return true;
 }
--- a/arch_init.c
+++ b/arch_init.c
@@ -48,9 +48,7 @@
 #include "qmp-commands.h"
 #include "trace.h"
 #include "exec/cpu-all.h"
-#include "exec/ram_addr.h"
 #include "hw/acpi/acpi.h"
-#include "qemu/host-utils.h"

 #ifdef DEBUG_ARCH_INIT
 #define DPRINTF(fmt, ...) \
@@ -122,6 +120,7 @@ static void check_guest_throttling(void);
 #define RAM_SAVE_FLAG_XBZRLE   0x40
 /* 0x80 is reserved in migration.h start with 0x100 next */

+
 static struct defconfig_file {
    const char *filename;
    /* Indicates it is an user config file (disabled by -no-user-config) */
@@ -132,7 +131,6 @@ static struct defconfig_file {
    { NULL }, /* end of list */
 };

-static const uint8_t ZERO_TARGET_PAGE[TARGET_PAGE_SIZE];

 int qemu_read_default_config_files(bool userconfig)
 {
@@ -152,9 +150,10 @@ int qemu_read_default_config_files(bool userconfig)
    return 0;
 }

-static inline bool is_zero_range(uint8_t *p, uint64_t size)
+static inline bool is_zero_page(uint8_t *p)
 {
-    return buffer_find_nonzero_offset(p, size) == size;
+    return buffer_find_nonzero_offset(p, TARGET_PAGE_SIZE) ==
+        TARGET_PAGE_SIZE;
 }

 /* struct contains XBZRLE cache and a static page
@@ -164,63 +163,24 @@ static struct {
    uint8_t *encoded_buf;
    /* buffer for storing page content */
    uint8_t *current_buf;
-    /* Cache for XBZRLE, Protected by lock. */
+    /* buffer used for XBZRLE decoding */
+    uint8_t *decoded_buf;
+    /* Cache for XBZRLE */
    PageCache *cache;
-    QemuMutex lock;
 } XBZRLE = {
    .encoded_buf = NULL,
    .current_buf = NULL,
+    .decoded_buf = NULL,
    .cache = NULL,
 };
-/* buffer used for XBZRLE decoding */
-static uint8_t *xbzrle_decoded_buf;

-static void XBZRLE_cache_lock(void)
-{
-    if (migrate_use_xbzrle())
-        qemu_mutex_lock(&XBZRLE.lock);
-}
-
-static void XBZRLE_cache_unlock(void)
-{
-    if (migrate_use_xbzrle())
-        qemu_mutex_unlock(&XBZRLE.lock);
-}

 int64_t xbzrle_cache_resize(int64_t new_size)
 {
-    PageCache *new_cache, *cache_to_free;
-
-    if (new_size < TARGET_PAGE_SIZE) {
-        return -1;
-    }
-
-    /* no need to lock, the current thread holds qemu big lock */
    if (XBZRLE.cache != NULL) {
-        /* check XBZRLE.cache again later */
-        if (pow2floor(new_size) == migrate_xbzrle_cache_size()) {
-            return pow2floor(new_size);
-        }
-        new_cache = cache_init(new_size / TARGET_PAGE_SIZE,
-                                        TARGET_PAGE_SIZE);
-        if (!new_cache) {
-            DPRINTF("Error creating cache\n");
-            return -1;
-        }
-
-        XBZRLE_cache_lock();
-        /* the XBZRLE.cache may have be destroyed, check it again */
-        if (XBZRLE.cache != NULL) {
-            cache_to_free = XBZRLE.cache;
-            XBZRLE.cache = new_cache;
-        } else {
-            cache_to_free = new_cache;
-        }
-        XBZRLE_cache_unlock();
-
-        cache_fini(cache_to_free);
+        return cache_resize(XBZRLE.cache, new_size / TARGET_PAGE_SIZE) *
+            TARGET_PAGE_SIZE;
    }
-
    return pow2floor(new_size);
 }

@@ -310,34 +270,6 @@ static size_t save_block_hdr(QEMUFile *f, RAMBlock *block, ram_addr_t offset,
    return size;
 }

-/* This is the last block that we have visited serching for dirty pages
- */
-static RAMBlock *last_seen_block;
-/* This is the last block from where we have sent data */
-static RAMBlock *last_sent_block;
-static ram_addr_t last_offset;
-static unsigned long *migration_bitmap;
-static uint64_t migration_dirty_pages;
-static uint32_t last_version;
-static bool ram_bulk_stage;
-
-/* Update the xbzrle cache to reflect a page that's been sent as all 0.
- * The important thing is that a stale (not-yet-0'd) page be replaced
- * by the new data.
- * As a bonus, if the page wasn't in the cache it gets added so that
- * when a small write is made into the 0'd page it gets XBZRLE sent
- */
-static void xbzrle_cache_zero_page(ram_addr_t current_addr)
-{
-    if (ram_bulk_stage || !migrate_use_xbzrle()) {
-        return;
-    }
-
-    /* We don't care if this fails to allocate a new cache page
-     * as long as it updated an old one */
-    cache_insert(XBZRLE.cache, current_addr, ZERO_TARGET_PAGE);
-}
-
 #define ENCODING_FLAG_XBZRLE 0x1

 static int save_xbzrle_page(QEMUFile *f, uint8_t *current_data,
@@ -349,9 +281,7 @@ static int save_xbzrle_page(QEMUFile *f, uint8_t *current_data,

    if (!cache_is_cached(XBZRLE.cache, current_addr)) {
        if (!last_stage) {
-            if (cache_insert(XBZRLE.cache, current_addr, current_data) == -1) {
-                return -1;
-            }
+            cache_insert(XBZRLE.cache, current_addr, current_data);
        }
        acct_info.xbzrle_cache_miss++;
        return -1;
@@ -394,14 +324,25 @@ static int save_xbzrle_page(QEMUFile *f, uint8_t *current_data,
    return bytes_sent;
 }

+
+/* This is the last block that we have visited serching for dirty pages
+ */
+static RAMBlock *last_seen_block;
+/* This is the last block from where we have sent data */
+static RAMBlock *last_sent_block;
+static ram_addr_t last_offset;
+static unsigned long *migration_bitmap;
+static uint64_t migration_dirty_pages;
+static uint32_t last_version;
+static bool ram_bulk_stage;
+
 static inline
 ram_addr_t migration_bitmap_find_and_reset_dirty(MemoryRegion *mr,
                                                 ram_addr_t start)
 {
    unsigned long base = mr->ram_addr >> TARGET_PAGE_BITS;
    unsigned long nr = base + (start >> TARGET_PAGE_BITS);
-    uint64_t mr_size = TARGET_PAGE_ALIGN(memory_region_size(mr));
-    unsigned long size = base + (mr_size >> TARGET_PAGE_BITS);
+    unsigned long size = base + (int128_get64(mr->size) >> TARGET_PAGE_BITS);

    unsigned long next;

@@ -418,10 +359,11 @@ ram_addr_t migration_bitmap_find_and_reset_dirty(MemoryRegion *mr,
    return (next - base) << TARGET_PAGE_BITS;
 }

-static inline bool migration_bitmap_set_dirty(ram_addr_t addr)
+static inline bool migration_bitmap_set_dirty(MemoryRegion *mr,
+                                              ram_addr_t offset)
 {
    bool ret;
-    int nr = addr >> TARGET_PAGE_BITS;
+    int nr = (mr->ram_addr + offset) >> TARGET_PAGE_BITS;

    ret = test_and_set_bit(nr, migration_bitmap);

@@ -431,47 +373,12 @@ static inline bool migration_bitmap_set_dirty(ram_addr_t addr)
    return ret;
 }

-static void migration_bitmap_sync_range(ram_addr_t start, ram_addr_t length)
-{
-    ram_addr_t addr;
-    unsigned long page = BIT_WORD(start >> TARGET_PAGE_BITS);
-
-    /* start address is aligned at the start of a word? */
-    if (((page * BITS_PER_LONG) << TARGET_PAGE_BITS) == start) {
-        int k;
-        int nr = BITS_TO_LONGS(length >> TARGET_PAGE_BITS);
-        unsigned long *src = ram_list.dirty_memory[DIRTY_MEMORY_MIGRATION];
-
-        for (k = page; k < page + nr; k++) {
-            if (src[k]) {
-                unsigned long new_dirty;
-                new_dirty = ~migration_bitmap[k];
-                migration_bitmap[k] |= src[k];
-                new_dirty &= src[k];
-                migration_dirty_pages += ctpopl(new_dirty);
-                src[k] = 0;
-            }
-        }
-    } else {
-        for (addr = 0; addr < length; addr += TARGET_PAGE_SIZE) {
-            if (cpu_physical_memory_get_dirty(start + addr,
-                                              TARGET_PAGE_SIZE,
-                                              DIRTY_MEMORY_MIGRATION)) {
-                cpu_physical_memory_reset_dirty(start + addr,
-                                                TARGET_PAGE_SIZE,
-                                                DIRTY_MEMORY_MIGRATION);
-                migration_bitmap_set_dirty(start + addr);
-            }
-        }
-    }
-}
-
-
 /* Needs iothread lock! */

 static void migration_bitmap_sync(void)
 {
    RAMBlock *block;
+    ram_addr_t addr;
    uint64_t num_dirty_pages_init = migration_dirty_pages;
    MigrationState *s = migrate_get_current();
    static int64_t start_time;
@@ -485,19 +392,25 @@ static void migration_bitmap_sync(void)
    }

    if (!start_time) {
-        start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+        start_time = qemu_get_clock_ms(rt_clock);
    }

    trace_migration_bitmap_sync_start();
    address_space_sync_dirty_bitmap(&address_space_memory);

    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
-        migration_bitmap_sync_range(block->mr->ram_addr, block->length);
+        for (addr = 0; addr < block->length; addr += TARGET_PAGE_SIZE) {
+            if (memory_region_test_and_clear_dirty(block->mr,
+                                                   addr, TARGET_PAGE_SIZE,
+                                                   DIRTY_MEMORY_MIGRATION)) {
+                migration_bitmap_set_dirty(block->mr, addr);
+            }
+        }
    }
    trace_migration_bitmap_sync_end(migration_dirty_pages
                                    - num_dirty_pages_init);
    num_dirty_pages_period += migration_dirty_pages - num_dirty_pages_init;
-    end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+    end_time = qemu_get_clock_ms(rt_clock);

    /* more than 1 second = 1000 millisecons */
    if (end_time > start_time + 1000) {
@@ -565,7 +478,6 @@ static int ram_save_block(QEMUFile *f, bool last_stage)
        } else {
            int ret;
            uint8_t *p;
-            bool send_async = true;
            int cont = (block == last_sent_block) ?
                RAM_SAVE_FLAG_CONTINUE : 0;

@@ -576,9 +488,6 @@ static int ram_save_block(QEMUFile *f, bool last_stage)
            ret = ram_control_save_page(f, block->offset,
                               offset, TARGET_PAGE_SIZE, &bytes_sent);

-            XBZRLE_cache_lock();
-
-            current_addr = block->offset + offset;
            if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
                if (ret != RAM_SAVE_CONTROL_DELAYED) {
                    if (bytes_sent > 0) {
@@ -587,46 +496,29 @@ static int ram_save_block(QEMUFile *f, bool last_stage)
                        acct_info.dup_pages++;
                    }
                }
-            } else if (is_zero_range(p, TARGET_PAGE_SIZE)) {
+            } else if (is_zero_page(p)) {
                acct_info.dup_pages++;
                bytes_sent = save_block_hdr(f, block, offset, cont,
                                            RAM_SAVE_FLAG_COMPRESS);
                qemu_put_byte(f, 0);
                bytes_sent++;
-                /* Must let xbzrle know, otherwise a previous (now 0'd) cached
-                 * page would be stale
-                 */
-                xbzrle_cache_zero_page(current_addr);
            } else if (!ram_bulk_stage && migrate_use_xbzrle()) {
+                current_addr = block->offset + offset;
                bytes_sent = save_xbzrle_page(f, p, current_addr, block,
                                              offset, cont, last_stage);
                if (!last_stage) {
-                    /* We must send exactly what's in the xbzrle cache
-                     * even if the page wasn't xbzrle compressed, so that
-                     * it's right next time.
-                     */
                    p = get_cached_data(XBZRLE.cache, current_addr);
-
-                    /* Can't send this cached data async, since the cache page
-                     * might get updated before it gets to the wire
-                     */
-                    send_async = false;
                }
            }

            /* XBZRLE overflow or normal page */
            if (bytes_sent == -1) {
                bytes_sent = save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_PAGE);
-                if (send_async) {
-                    qemu_put_buffer_async(f, p, TARGET_PAGE_SIZE);
-                } else {
-                    qemu_put_buffer(f, p, TARGET_PAGE_SIZE);
-                }
+                qemu_put_buffer_async(f, p, TARGET_PAGE_SIZE);
                bytes_sent += TARGET_PAGE_SIZE;
                acct_info.norm_pages++;
            }

-            XBZRLE_cache_unlock();
            /* if page is unmodified, continue to the next */
            if (bytes_sent > 0) {
                last_sent_block = block;
@@ -680,12 +572,6 @@ uint64_t ram_bytes_total(void)
    return total;
 }

-void free_xbzrle_decoded_buf(void)
-{
-    g_free(xbzrle_decoded_buf);
-    xbzrle_decoded_buf = NULL;
-}
-
 static void migration_end(void)
 {
    if (migration_bitmap) {
@@ -694,17 +580,14 @@ static void migration_end(void)
        migration_bitmap = NULL;
    }

-    XBZRLE_cache_lock();
    if (XBZRLE.cache) {
        cache_fini(XBZRLE.cache);
        g_free(XBZRLE.cache);
        g_free(XBZRLE.encoded_buf);
        g_free(XBZRLE.current_buf);
+        g_free(XBZRLE.decoded_buf);
        XBZRLE.cache = NULL;
-        XBZRLE.encoded_buf = NULL;
-        XBZRLE.current_buf = NULL;
    }
-    XBZRLE_cache_unlock();
 }

 static void ram_migration_cancel(void *opaque)
@@ -735,33 +618,15 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
    dirty_rate_high_cnt = 0;

    if (migrate_use_xbzrle()) {
-        qemu_mutex_lock_iothread();
        XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() /
                                  TARGET_PAGE_SIZE,
                                  TARGET_PAGE_SIZE);
        if (!XBZRLE.cache) {
-            qemu_mutex_unlock_iothread();
            DPRINTF("Error creating cache\n");
            return -1;
        }
-        qemu_mutex_init(&XBZRLE.lock);
-        qemu_mutex_unlock_iothread();
-
-        /* We prefer not to abort if there is no memory */
-        XBZRLE.encoded_buf = g_try_malloc0(TARGET_PAGE_SIZE);
-        if (!XBZRLE.encoded_buf) {
-            DPRINTF("Error allocating encoded_buf\n");
-            return -1;
-        }
-
-        XBZRLE.current_buf = g_try_malloc(TARGET_PAGE_SIZE);
-        if (!XBZRLE.current_buf) {
-            DPRINTF("Error allocating current_buf\n");
-            g_free(XBZRLE.encoded_buf);
-            XBZRLE.encoded_buf = NULL;
-            return -1;
-        }
-
+        XBZRLE.encoded_buf = g_malloc0(TARGET_PAGE_SIZE);
+        XBZRLE.current_buf = g_malloc(TARGET_PAGE_SIZE);
        acct_clear();
    }

@@ -807,7 +672,7 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)

    ram_control_before_iterate(f, RAM_CONTROL_ROUND);

-    t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
+    t0 = qemu_get_clock_ns(rt_clock);
    i = 0;
    while ((ret = qemu_file_rate_limit(f)) == 0) {
        int bytes_sent;
@@ -826,7 +691,7 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
           iterations
        */
        if ((i & 63) == 0) {
-            uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) / 1000000;
+            uint64_t t1 = (qemu_get_clock_ns(rt_clock) - t0) / 1000000;
            if (t1 > MAX_WAIT) {
                DPRINTF("big wait: %" PRIu64 " milliseconds, %d iterations\n",
                        t1, i);
@@ -844,20 +709,15 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
     */
    ram_control_after_iterate(f, RAM_CONTROL_ROUND);

-    bytes_transferred += total_sent;
-
-    /*
-     * Do not count these 8 bytes into total_sent, so that we can
-     * return 0 if no page had been dirtied.
-     */
-    qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
-    bytes_transferred += 8;
-
-    ret = qemu_file_get_error(f);
    if (ret < 0) {
+        bytes_transferred += total_sent;
        return ret;
    }

+    qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
+    total_sent += 8;
+    bytes_transferred += total_sent;
+
    return total_sent;
 }

@@ -912,8 +772,8 @@ static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
    unsigned int xh_len;
    int xh_flags;

-    if (!xbzrle_decoded_buf) {
-        xbzrle_decoded_buf = g_malloc(TARGET_PAGE_SIZE);
+    if (!XBZRLE.decoded_buf) {
+        XBZRLE.decoded_buf = g_malloc(TARGET_PAGE_SIZE);
    }

    /* extract RLE header */
@@ -930,10 +790,10 @@ static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
        return -1;
    }
    /* load data and decode */
-    qemu_get_buffer(f, xbzrle_decoded_buf, xh_len);
+    qemu_get_buffer(f, XBZRLE.decoded_buf, xh_len);

    /* decode RLE */
-    ret = xbzrle_decode_buffer(xbzrle_decoded_buf, xh_len, host,
+    ret = xbzrle_decode_buffer(XBZRLE.decoded_buf, xh_len, host,
                               TARGET_PAGE_SIZE);
    if (ret == -1) {
        fprintf(stderr, "Failed to load XBZRLE page - decode error!\n");
@@ -983,8 +843,15 @@ static inline void *host_from_stream_offset(QEMUFile *f,
 */
 void ram_handle_compressed(void *host, uint8_t ch, uint64_t size)
 {
-    if (ch != 0 || !is_zero_range(host, size)) {
+    if (ch != 0 || !is_zero_page(host)) {
        memset(host, ch, size);
+#ifndef _WIN32
+        if (ch == 0 &&
+            (!kvm_enabled() || kvm_has_sync_mmu()) &&
+            getpagesize() <= TARGET_PAGE_SIZE) {
+            qemu_madvise(host, TARGET_PAGE_SIZE, QEMU_MADV_DONTNEED);
+        }
+#endif
    }
 }

@@ -1245,6 +1112,9 @@ int qemu_uuid_parse(const char *str, uint8_t *uuid)
    if (ret != 16) {
        return -1;
    }
+#ifdef TARGET_I386
+    smbios_add_field(1, offsetof(struct smbios_type_1, uuid), uuid, 16);
+#endif
    return 0;
 }

@@ -1255,18 +1125,20 @@ void do_acpitable_option(const QemuOpts *opts)

    acpi_table_add(opts, &err);
    if (err) {
-        error_report("Wrong acpi table provided: %s",
-                     error_get_pretty(err));
+        fprintf(stderr, "Wrong acpi table provided: %s\n",
+                error_get_pretty(err));
        error_free(err);
        exit(1);
    }
 #endif
 }

-void do_smbios_option(QemuOpts *opts)
+void do_smbios_option(const char *optarg)
 {
 #ifdef TARGET_I386
-    smbios_entry_add(opts);
+    if (smbios_entry_add(optarg) < 0) {
+        exit(1);
+    }
 #endif
 }

@@ -1323,14 +1195,15 @@ static void mig_sleep_cpu(void *opq)
   much time in the VM. The migration thread will try to catchup.
   Workload will experience a performance drop.
 */
+static void mig_throttle_cpu_down(CPUState *cpu, void *data)
+{
+    async_run_on_cpu(cpu, mig_sleep_cpu, NULL);
+}
+
 static void mig_throttle_guest_down(void)
 {
-    CPUState *cpu;
-
    qemu_mutex_lock_iothread();
-    CPU_FOREACH(cpu) {
-        async_run_on_cpu(cpu, mig_sleep_cpu, NULL);
-    }
+    qemu_for_each_cpu(mig_throttle_cpu_down, NULL);
    qemu_mutex_unlock_iothread();
 }

@@ -1344,11 +1217,11 @@ static void check_guest_throttling(void)
    }

    if (!t0)  {
-        t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
+        t0 = qemu_get_clock_ns(rt_clock);
        return;
    }

-    t1 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
+    t1 = qemu_get_clock_ns(rt_clock);

    /* If it has been more than 40 ms since the last time the guest
     * was throttled then do it again.
--- a/async.c
+++ b/async.c
@@ -150,10 +150,7 @@ aio_ctx_prepare(GSource *source, gint    *timeout)
 {
    AioContext *ctx = (AioContext *) source;
    QEMUBH *bh;
-    int deadline;

-    /* We assume there is no timeout already supplied */
-    *timeout = -1;
    for (bh = ctx->first_bh; bh; bh = bh->next) {
        if (!bh->deleted && bh->scheduled) {
            if (bh->idle) {
@@ -169,14 +166,6 @@ aio_ctx_prepare(GSource *source, gint    *timeout)
        }
    }

-    deadline = qemu_timeout_ns_to_ms(timerlistgroup_deadline_ns(&ctx->tlg));
-    if (deadline == 0) {
-        *timeout = 0;
-        return true;
-    } else {
-        *timeout = qemu_soonest_timeout(*timeout, deadline);
-    }
-
    return false;
 }

@@ -191,7 +180,7 @@ aio_ctx_check(GSource *source)
            return true;
 	}
    }
-    return aio_pending(ctx) || (timerlistgroup_deadline_ns(&ctx->tlg) == 0);
+    return aio_pending(ctx);
 }

 static gboolean
@@ -212,12 +201,10 @@ aio_ctx_finalize(GSource     *source)
    AioContext *ctx = (AioContext *) source;

    thread_pool_free(ctx->thread_pool);
-    aio_set_event_notifier(ctx, &ctx->notifier, NULL);
+    aio_set_event_notifier(ctx, &ctx->notifier, NULL, NULL);
    event_notifier_cleanup(&ctx->notifier);
-    rfifolock_destroy(&ctx->lock);
    qemu_mutex_destroy(&ctx->bh_lock);
    g_array_free(ctx->pollfds, TRUE);
-    timerlistgroup_deinit(&ctx->tlg);
 }

 static GSourceFuncs aio_source_funcs = {
@@ -246,17 +233,6 @@ void aio_notify(AioContext *ctx)
    event_notifier_set(&ctx->notifier);
 }

-static void aio_timerlist_notify(void *opaque)
-{
-    aio_notify(opaque);
-}
-
-static void aio_rfifolock_cb(void *opaque)
-{
-    /* Kick owner thread in case they are blocked in aio_poll() */
-    aio_notify(opaque);
-}
-
 AioContext *aio_context_new(void)
 {
    AioContext *ctx;
@@ -264,12 +240,10 @@ AioContext *aio_context_new(void)
    ctx->pollfds = g_array_new(FALSE, FALSE, sizeof(GPollFD));
    ctx->thread_pool = NULL;
    qemu_mutex_init(&ctx->bh_lock);
-    rfifolock_init(&ctx->lock, aio_rfifolock_cb, ctx);
    event_notifier_init(&ctx->notifier, false);
    aio_set_event_notifier(ctx, &ctx->notifier, 
                           (EventNotifierHandler *)
-                           event_notifier_test_and_clear);
-    timerlistgroup_init(&ctx->tlg, aio_timerlist_notify, ctx);
+                           event_notifier_test_and_clear, NULL);

    return ctx;
 }
@@ -283,13 +257,3 @@ void aio_context_unref(AioContext *ctx)
 {
    g_source_unref(&ctx->source);
 }
-
-void aio_context_acquire(AioContext *ctx)
-{
-    rfifolock_lock(&ctx->lock);
-}
-
-void aio_context_release(AioContext *ctx)
-{
-    rfifolock_unlock(&ctx->lock);
-}
--- a/audio/audio.c
+++ b/audio/audio.c
@@ -95,7 +95,7 @@ static struct {
        }
    },

-    .period = { .hertz = 100 },
+    .period = { .hertz = 250 },
    .plive = 0,
    .log_to_monitor = 0,
    .try_poll_in = 1,
@@ -1124,11 +1124,11 @@ static int audio_is_timer_needed (void)
 static void audio_reset_timer (AudioState *s)
 {
    if (audio_is_timer_needed ()) {
-        timer_mod (s->ts,
-            qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + conf.period.ticks);
+        qemu_mod_timer (s->ts,
+            qemu_get_clock_ns(vm_clock) + conf.period.ticks);
    }
    else {
-        timer_del (s->ts);
+        qemu_del_timer (s->ts);
    }
 }

@@ -1835,7 +1835,7 @@ static void audio_init (void)
    QLIST_INIT (&s->cap_head);
    atexit (audio_atexit);

-    s->ts = timer_new_ns(QEMU_CLOCK_VIRTUAL, audio_timer, s);
+    s->ts = qemu_new_timer_ns (vm_clock, audio_timer, s);
    if (!s->ts) {
        hw_error("Could not create audio timer\n");
    }
--- a/audio/mixeng.c
+++ b/audio/mixeng.c
@@ -348,6 +348,7 @@ void mixeng_clear (struct st_sample *buf, int len)

 void mixeng_volume (struct st_sample *buf, int len, struct mixeng_volume *vol)
 {
+#ifdef CONFIG_MIXEMU
    if (vol->mute) {
        mixeng_clear (buf, len);
        return;
@@ -363,4 +364,9 @@ void mixeng_volume (struct st_sample *buf, int len, struct mixeng_volume *vol)
 #endif
        buf += 1;
    }
+#else
+    (void) buf;
+    (void) len;
+    (void) vol;
+#endif
 }
--- a/audio/mixeng_template.h
+++ b/audio/mixeng_template.h
@@ -35,7 +35,7 @@
 #define IN_T glue (glue (ITYPE, BSIZE), _t)

 #ifdef FLOAT_MIXENG
-static inline mixeng_real glue (conv_, ET) (IN_T v)
+static mixeng_real inline glue (conv_, ET) (IN_T v)
 {
    IN_T nv = ENDIAN_CONVERT (v);

@@ -54,7 +54,7 @@ static inline mixeng_real glue (conv_, ET) (IN_T v)
 #endif
 }

-static inline IN_T glue (clip_, ET) (mixeng_real v)
+static IN_T inline glue (clip_, ET) (mixeng_real v)
 {
    if (v >= 0.5) {
        return IN_MAX;
--- a/audio/noaudio.c
+++ b/audio/noaudio.c
@@ -46,7 +46,7 @@ static int no_run_out (HWVoiceOut *hw, int live)
    int64_t ticks;
    int64_t bytes;

-    now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+    now = qemu_get_clock_ns (vm_clock);
    ticks = now - no->old_ticks;
    bytes = muldiv64 (ticks, hw->info.bytes_per_second, get_ticks_per_sec ());
    bytes = audio_MIN (bytes, INT_MAX);
@@ -102,7 +102,7 @@ static int no_run_in (HWVoiceIn *hw)
    int samples = 0;

    if (dead) {
-        int64_t now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+        int64_t now = qemu_get_clock_ns (vm_clock);
        int64_t ticks = now - no->old_ticks;
        int64_t bytes =
            muldiv64 (ticks, hw->info.bytes_per_second, get_ticks_per_sec ());
--- a/audio/ossaudio.c
+++ b/audio/ossaudio.c
@@ -849,10 +849,6 @@ static int oss_ctl_in (HWVoiceIn *hw, int cmd, ...)

 static void *oss_audio_init (void)
 {
-    if (access(conf.devpath_in, R_OK | W_OK) < 0 ||
-        access(conf.devpath_out, R_OK | W_OK) < 0) {
-        return NULL;
-    }
    return &conf;
 }

--- a/audio/paaudio.c
+++ b/audio/paaudio.c
@@ -547,11 +547,11 @@ static int qpa_init_out (HWVoiceOut *hw, struct audsettings *as)
    ss.rate = as->freq;

    /*
-     * qemu audio tick runs at 100 Hz (by default), so processing
-     * data chunks worth 10 ms of sound should be a good fit.
+     * qemu audio tick runs at 250 Hz (by default), so processing
+     * data chunks worth 4 ms of sound should be a good fit.
     */
-    ba.tlength = pa_usec_to_bytes (10 * 1000, &ss);
-    ba.minreq = pa_usec_to_bytes (5 * 1000, &ss);
+    ba.tlength = pa_usec_to_bytes (4 * 1000, &ss);
+    ba.minreq = pa_usec_to_bytes (2 * 1000, &ss);
    ba.maxlength = -1;
    ba.prebuf = -1;

--- a/audio/spiceaudio.c
+++ b/audio/spiceaudio.c
@@ -25,17 +25,8 @@
 #include "audio.h"
 #include "audio_int.h"

-#if SPICE_INTERFACE_PLAYBACK_MAJOR > 1 || SPICE_INTERFACE_PLAYBACK_MINOR >= 3
-#define LINE_OUT_SAMPLES (480 * 4)
-#else
-#define LINE_OUT_SAMPLES (256 * 4)
-#endif
-
-#if SPICE_INTERFACE_RECORD_MAJOR > 2 || SPICE_INTERFACE_RECORD_MINOR >= 3
-#define LINE_IN_SAMPLES (480 * 4)
-#else
-#define LINE_IN_SAMPLES (256 * 4)
-#endif
+#define LINE_IN_SAMPLES 1024
+#define LINE_OUT_SAMPLES 1024

 typedef struct SpiceRateCtl {
    int64_t               start_ticks;
@@ -90,7 +81,7 @@ static void spice_audio_fini (void *opaque)
 static void rate_start (SpiceRateCtl *rate)
 {
    memset (rate, 0, sizeof (*rate));
-    rate->start_ticks = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+    rate->start_ticks = qemu_get_clock_ns (vm_clock);
 }

 static int rate_get_samples (struct audio_pcm_info *info, SpiceRateCtl *rate)
@@ -100,7 +91,7 @@ static int rate_get_samples (struct audio_pcm_info *info, SpiceRateCtl *rate)
    int64_t bytes;
    int64_t samples;

-    now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+    now = qemu_get_clock_ns (vm_clock);
    ticks = now - rate->start_ticks;
    bytes = muldiv64 (ticks, info->bytes_per_second, get_ticks_per_sec ());
    samples = (bytes - rate->bytes_sent) >> info->shift;
@@ -120,11 +111,7 @@ static int line_out_init (HWVoiceOut *hw, struct audsettings *as)
    SpiceVoiceOut *out = container_of (hw, SpiceVoiceOut, hw);
    struct audsettings settings;

-#if SPICE_INTERFACE_PLAYBACK_MAJOR > 1 || SPICE_INTERFACE_PLAYBACK_MINOR >= 3
-    settings.freq       = spice_server_get_best_playback_rate(NULL);
-#else
    settings.freq       = SPICE_INTERFACE_PLAYBACK_FREQ;
-#endif
    settings.nchannels  = SPICE_INTERFACE_PLAYBACK_CHAN;
    settings.fmt        = AUD_FMT_S16;
    settings.endianness = AUDIO_HOST_ENDIANNESS;
@@ -135,9 +122,6 @@ static int line_out_init (HWVoiceOut *hw, struct audsettings *as)

    out->sin.base.sif = &playback_sif.base;
    qemu_spice_add_interface (&out->sin.base);
-#if SPICE_INTERFACE_PLAYBACK_MAJOR > 1 || SPICE_INTERFACE_PLAYBACK_MINOR >= 3
-    spice_server_set_playback_rate(&out->sin, settings.freq);
-#endif
    return 0;
 }

@@ -248,11 +232,7 @@ static int line_in_init (HWVoiceIn *hw, struct audsettings *as)
    SpiceVoiceIn *in = container_of (hw, SpiceVoiceIn, hw);
    struct audsettings settings;

-#if SPICE_INTERFACE_RECORD_MAJOR > 2 || SPICE_INTERFACE_RECORD_MINOR >= 3
-    settings.freq       = spice_server_get_best_record_rate(NULL);
-#else
    settings.freq       = SPICE_INTERFACE_RECORD_FREQ;
-#endif
    settings.nchannels  = SPICE_INTERFACE_RECORD_CHAN;
    settings.fmt        = AUD_FMT_S16;
    settings.endianness = AUDIO_HOST_ENDIANNESS;
@@ -263,9 +243,6 @@ static int line_in_init (HWVoiceIn *hw, struct audsettings *as)

    in->sin.base.sif = &record_sif.base;
    qemu_spice_add_interface (&in->sin.base);
-#if SPICE_INTERFACE_RECORD_MAJOR > 2 || SPICE_INTERFACE_RECORD_MINOR >= 3
-    spice_server_set_record_rate(&in->sin, settings.freq);
-#endif
    return 0;
 }

--- a/audio/wavaudio.c
+++ b/audio/wavaudio.c
@@ -52,7 +52,7 @@ static int wav_run_out (HWVoiceOut *hw, int live)
    int rpos, decr, samples;
    uint8_t *dst;
    struct st_sample *src;
-    int64_t now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+    int64_t now = qemu_get_clock_ns (vm_clock);
    int64_t ticks = now - wav->old_ticks;
    int64_t bytes =
        muldiv64 (ticks, hw->info.bytes_per_second, get_ticks_per_sec ());
--- a/backends/baum.c
+++ b/backends/baum.c
@@ -314,9 +314,9 @@ static int baum_eat_packet(BaumDriverState *baum, const uint8_t *buf, int len)
            return 0; \
        if (*cur++ != ESC) { \
            DPRINTF("Broken packet %#2x, tossing\n", req); \
-            if (timer_pending(baum->cellCount_timer)) {    \
-                timer_del(baum->cellCount_timer);     \
-                baum_cellCount_timer_cb(baum);             \
+		if (qemu_timer_pending(baum->cellCount_timer)) { \
+                qemu_del_timer(baum->cellCount_timer); \
+                baum_cellCount_timer_cb(baum); \
            } \
            return (cur - 2 - buf); \
        } \
@@ -334,7 +334,7 @@ static int baum_eat_packet(BaumDriverState *baum, const uint8_t *buf, int len)
        int i;

        /* Allow 100ms to complete the DisplayData packet */
-        timer_mod(baum->cellCount_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
+        qemu_mod_timer(baum->cellCount_timer, qemu_get_clock_ns(vm_clock) +
                       get_ticks_per_sec() / 10);
        for (i = 0; i < baum->x * baum->y ; i++) {
            EAT(c);
@@ -348,7 +348,7 @@ static int baum_eat_packet(BaumDriverState *baum, const uint8_t *buf, int len)
                c = '?';
            text[i] = c;
        }
-        timer_del(baum->cellCount_timer);
+        qemu_del_timer(baum->cellCount_timer);

        memset(zero, 0, sizeof(zero));

@@ -553,7 +553,7 @@ static void baum_close(struct CharDriverState *chr)
 {
    BaumDriverState *baum = chr->opaque;

-    timer_free(baum->cellCount_timer);
+    qemu_free_timer(baum->cellCount_timer);
    if (baum->brlapi) {
        brlapi__closeConnection(baum->brlapi);
        g_free(baum->brlapi);
@@ -566,10 +566,8 @@ CharDriverState *chr_baum_init(void)
    BaumDriverState *baum;
    CharDriverState *chr;
    brlapi_handle_t *handle;
-#if defined(CONFIG_SDL)
-#if SDL_COMPILEDVERSION < SDL_VERSIONNUM(2, 0, 0)
+#ifdef CONFIG_SDL
    SDL_SysWMinfo info;
-#endif
 #endif
    int tty;

@@ -590,21 +588,19 @@ CharDriverState *chr_baum_init(void)
        goto fail_handle;
    }

-    baum->cellCount_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, baum_cellCount_timer_cb, baum);
+    baum->cellCount_timer = qemu_new_timer_ns(vm_clock, baum_cellCount_timer_cb, baum);

    if (brlapi__getDisplaySize(handle, &baum->x, &baum->y) == -1) {
        brlapi_perror("baum_init: brlapi_getDisplaySize");
        goto fail;
    }

-#if defined(CONFIG_SDL)
-#if SDL_COMPILEDVERSION < SDL_VERSIONNUM(2, 0, 0)
+#ifdef CONFIG_SDL
    memset(&info, 0, sizeof(info));
    SDL_VERSION(&info.version);
    if (SDL_GetWMInfo(&info))
        tty = info.info.x11.wmwindow;
    else
-#endif
 #endif
        tty = BRLAPI_TTY_DEFAULT;

@@ -618,7 +614,7 @@ CharDriverState *chr_baum_init(void)
    return chr;

 fail:
-    timer_free(baum->cellCount_timer);
+    qemu_free_timer(baum->cellCount_timer);
    brlapi__closeConnection(handle);
 fail_handle:
    g_free(handle);
--- a/backends/rng-egd.c
+++ b/backends/rng-egd.c
@@ -169,6 +169,7 @@ static void rng_egd_set_chardev(Object *obj, const char *value, Error **errp)
    if (b->opened) {
        error_set(errp, QERR_PERMISSION_DENIED);
    } else {
+        g_free(s->chr_name);
        s->chr_name = g_strdup(value);
    }
 }
--- a/backends/rng-random.c
+++ b/backends/rng-random.c
@@ -123,15 +123,15 @@ static void rng_random_init(Object *obj)
                            NULL);

    s->filename = g_strdup("/dev/random");
-    s->fd = -1;
 }

 static void rng_random_finalize(Object *obj)
 {
    RndRandom *s = RNG_RANDOM(obj);

+    qemu_set_fd_handler(s->fd, NULL, NULL, NULL);
+
    if (s->fd != -1) {
-        qemu_set_fd_handler(s->fd, NULL, NULL, NULL);
        qemu_close(s->fd);
    }

--- a/backends/rng.c
+++ b/backends/rng.c
@@ -12,7 +12,6 @@

 #include "sysemu/rng.h"
 #include "qapi/qmp/qerror.h"
-#include "qom/object_interfaces.h"

 void rng_backend_request_entropy(RngBackend *s, size_t size,
                                 EntropyReceiveFunc *receive_entropy,
@@ -41,9 +40,9 @@ static bool rng_backend_prop_get_opened(Object *obj, Error **errp)
    return s->opened;
 }

-static void rng_backend_complete(UserCreatable *uc, Error **errp)
+void rng_backend_open(RngBackend *s, Error **errp)
 {
-    object_property_set_bool(OBJECT(uc), true, "opened", errp);
+    object_property_set_bool(OBJECT(s), true, "opened", errp);
 }

 static void rng_backend_prop_set_opened(Object *obj, bool value, Error **errp)
@@ -77,25 +76,13 @@ static void rng_backend_init(Object *obj)
                             NULL);
 }

-static void rng_backend_class_init(ObjectClass *oc, void *data)
-{
-    UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc);
-
-    ucc->complete = rng_backend_complete;
-}
-
 static const TypeInfo rng_backend_info = {
    .name = TYPE_RNG_BACKEND,
    .parent = TYPE_OBJECT,
    .instance_size = sizeof(RngBackend),
    .instance_init = rng_backend_init,
    .class_size = sizeof(RngBackendClass),
-    .class_init = rng_backend_class_init,
    .abstract = true,
-    .interfaces = (InterfaceInfo[]) {
-        { TYPE_USER_CREATABLE },
-        { }
-    }
 };

 static void register_types(void)
--- a/block-migration.c
+++ b/block-migration.c
@@ -58,7 +58,6 @@ typedef struct BlkMigDevState {
    /* Protected by block migration lock.  */
    unsigned long *aio_bitmap;
    int64_t completed_sectors;
-    BdrvDirtyBitmap *dirty_bitmap;
 } BlkMigDevState;

 typedef struct BlkMigBlock {
@@ -310,21 +309,12 @@ static int mig_save_device_bulk(QEMUFile *f, BlkMigDevState *bmds)

 /* Called with iothread lock taken.  */

-static void set_dirty_tracking(void)
+static void set_dirty_tracking(int enable)
 {
    BlkMigDevState *bmds;

    QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
-        bmds->dirty_bitmap = bdrv_create_dirty_bitmap(bmds->bs, BLOCK_SIZE);
-    }
-}
-
-static void unset_dirty_tracking(void)
-{
-    BlkMigDevState *bmds;
-
-    QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
-        bdrv_release_dirty_bitmap(bmds->bs, bmds->dirty_bitmap);
+        bdrv_set_dirty_tracking(bmds->bs, enable ? BLOCK_SIZE : 0);
    }
 }

@@ -346,8 +336,8 @@ static void init_blk_migration_it(void *opaque, BlockDriverState *bs)
        bmds->completed_sectors = 0;
        bmds->shared_base = block_mig_state.shared_base;
        alloc_aio_bitmap(bmds);
+        drive_get_ref(drive_get_by_blockdev(bs));
        bdrv_set_in_use(bs, 1);
-        bdrv_ref(bs);

        block_mig_state.total_sector_sum += sectors;

@@ -442,7 +432,7 @@ static int mig_save_device_dirty(QEMUFile *f, BlkMigDevState *bmds,
        } else {
            blk_mig_unlock();
        }
-        if (bdrv_get_dirty(bmds->bs, bmds->dirty_bitmap, sector)) {
+        if (bdrv_get_dirty(bmds->bs, sector)) {

            if (total_sectors - sector < BDRV_SECTORS_PER_DIRTY_CHUNK) {
                nr_sectors = total_sectors - sector;
@@ -564,7 +554,7 @@ static int64_t get_remaining_dirty(void)
    int64_t dirty = 0;

    QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
-        dirty += bdrv_get_dirty_count(bmds->bs, bmds->dirty_bitmap);
+        dirty += bdrv_get_dirty_count(bmds->bs);
    }

    return dirty << BDRV_SECTOR_BITS;
@@ -579,13 +569,13 @@ static void blk_mig_cleanup(void)

    bdrv_drain_all();

-    unset_dirty_tracking();
+    set_dirty_tracking(0);

    blk_mig_lock();
    while ((bmds = QSIMPLEQ_FIRST(&block_mig_state.bmds_list)) != NULL) {
        QSIMPLEQ_REMOVE_HEAD(&block_mig_state.bmds_list, entry);
        bdrv_set_in_use(bmds->bs, 0);
-        bdrv_unref(bmds->bs);
+        drive_put_ref(drive_get_by_blockdev(bmds->bs));
        g_free(bmds->aio_bitmap);
        g_free(bmds);
    }
@@ -614,7 +604,7 @@ static int block_save_setup(QEMUFile *f, void *opaque)
    init_blk_migration(f);

    /* start track dirty blocks */
-    set_dirty_tracking();
+    set_dirty_tracking(1);
    qemu_mutex_unlock_iothread();

    ret = flush_blks(f);
@@ -790,8 +780,7 @@ static int block_load(QEMUFile *f, void *opaque, int version_id)
            }

            if (flags & BLK_MIG_FLAG_ZERO_BLOCK) {
-                ret = bdrv_write_zeroes(bs, addr, nr_sectors,
-                                        BDRV_REQ_MAY_UNMAP);
+                ret = bdrv_write_zeroes(bs, addr, nr_sectors);
            } else {
                buf = g_malloc(BLOCK_SIZE);
                qemu_get_buffer(f, buf, BLOCK_SIZE);
--- a/block.c
+++ b/block.c
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -1,9 +1,8 @@
-block-obj-y += raw_bsd.o cow.o qcow.o vdi.o vmdk.o cloop.o dmg.o bochs.o vpc.o vvfat.o
+block-obj-y += raw.o cow.o qcow.o vdi.o vmdk.o cloop.o dmg.o bochs.o vpc.o vvfat.o
 block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-cache.o
 block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o
 block-obj-y += qed-check.o
-block-obj-$(CONFIG_VHDX) += vhdx.o vhdx-endian.o vhdx-log.o
-block-obj-$(CONFIG_QUORUM) += quorum.o
+block-obj-y += vhdx.o
 block-obj-y += parallels.o blkdebug.o blkverify.o
 block-obj-y += snapshot.o qapi.o
 block-obj-$(CONFIG_WIN32) += raw-win32.o win32-aio.o
@@ -11,9 +10,8 @@ block-obj-$(CONFIG_POSIX) += raw-posix.o
 block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o

 ifeq ($(CONFIG_POSIX),y)
-block-obj-y += nbd.o nbd-client.o sheepdog.o
+block-obj-y += nbd.o sheepdog.o
 block-obj-$(CONFIG_LIBISCSI) += iscsi.o
-block-obj-$(CONFIG_LIBNFS) += nfs.o
 block-obj-$(CONFIG_CURL) += curl.o
 block-obj-$(CONFIG_RBD) += rbd.o
 block-obj-$(CONFIG_GLUSTERFS) += gluster.o
@@ -25,15 +23,4 @@ common-obj-y += commit.o
 common-obj-y += mirror.o
 common-obj-y += backup.o

-iscsi.o-cflags     := $(LIBISCSI_CFLAGS)
-iscsi.o-libs       := $(LIBISCSI_LIBS)
-curl.o-cflags      := $(CURL_CFLAGS)
-curl.o-libs        := $(CURL_LIBS)
-rbd.o-cflags       := $(RBD_CFLAGS)
-rbd.o-libs         := $(RBD_LIBS)
-gluster.o-cflags   := $(GLUSTERFS_CFLAGS)
-gluster.o-libs     := $(GLUSTERFS_LIBS)
-ssh.o-cflags       := $(LIBSSH2_CFLAGS)
-ssh.o-libs         := $(LIBSSH2_LIBS)
-qcow.o-libs        := -lz
-linux-aio.o-libs   := -laio
+$(obj)/curl.o: QEMU_CFLAGS+=$(CURL_CFLAGS)
--- a/block/backup.c
+++ b/block/backup.c
@@ -138,8 +138,7 @@ static int coroutine_fn backup_do_cow(BlockDriverState *bs,

        if (buffer_is_zero(iov.iov_base, iov.iov_len)) {
            ret = bdrv_co_write_zeroes(job->target,
-                                       start * BACKUP_SECTORS_PER_CLUSTER,
-                                       n, BDRV_REQ_MAY_UNMAP);
+                                       start * BACKUP_SECTORS_PER_CLUSTER, n);
        } else {
            ret = bdrv_co_writev(job->target,
                                 start * BACKUP_SECTORS_PER_CLUSTER, n,
@@ -181,13 +180,8 @@ static int coroutine_fn backup_before_write_notify(
        void *opaque)
 {
    BdrvTrackedRequest *req = opaque;
-    int64_t sector_num = req->offset >> BDRV_SECTOR_BITS;
-    int nb_sectors = req->bytes >> BDRV_SECTOR_BITS;

-    assert((req->offset & (BDRV_SECTOR_SIZE - 1)) == 0);
-    assert((req->bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
-
-    return backup_do_cow(req->bs, sector_num, nb_sectors, NULL);
+    return backup_do_cow(req->bs, req->sector_num, req->nb_sectors, NULL);
 }

 static void backup_set_speed(BlockJob *job, int64_t speed, Error **errp)
@@ -208,9 +202,9 @@ static void backup_iostatus_reset(BlockJob *job)
    bdrv_iostatus_reset(s->target);
 }

-static const BlockJobDriver backup_job_driver = {
+static const BlockJobType backup_job_type = {
    .instance_size  = sizeof(BackupBlockJob),
-    .job_type       = BLOCK_JOB_TYPE_BACKUP,
+    .job_type       = "backup",
    .set_speed      = backup_set_speed,
    .iostatus_reset = backup_iostatus_reset,
 };
@@ -278,9 +272,9 @@ static void coroutine_fn backup_run(void *opaque)
                uint64_t delay_ns = ratelimit_calculate_delay(
                        &job->limit, job->sectors_read);
                job->sectors_read = 0;
-                block_job_sleep_ns(&job->common, QEMU_CLOCK_REALTIME, delay_ns);
+                block_job_sleep_ns(&job->common, rt_clock, delay_ns);
            } else {
-                block_job_sleep_ns(&job->common, QEMU_CLOCK_REALTIME, 0);
+                block_job_sleep_ns(&job->common, rt_clock, 0);
            }

            if (block_job_is_cancelled(&job->common)) {
@@ -295,14 +289,14 @@ static void coroutine_fn backup_run(void *opaque)
                 * backing file. */

                for (i = 0; i < BACKUP_SECTORS_PER_CLUSTER;) {
-                    /* bdrv_is_allocated() only returns true/false based
-                     * on the first set of sectors it comes across that
+                    /* bdrv_co_is_allocated() only returns true/false based
+                     * on the first set of sectors it comes accross that
                     * are are all in the same state.
                     * For that reason we must verify each sector in the
                     * backup cluster length.  We end up copying more than
                     * needed but at some point that is always the case. */
                    alloced =
-                        bdrv_is_allocated(bs,
+                        bdrv_co_is_allocated(bs,
                                start * BACKUP_SECTORS_PER_CLUSTER + i,
                                BACKUP_SECTORS_PER_CLUSTER - i, &n);
                    i += n;
@@ -344,7 +338,7 @@ static void coroutine_fn backup_run(void *opaque)
    hbitmap_free(job->bitmap);

    bdrv_iostatus_disable(target);
-    bdrv_unref(target);
+    bdrv_delete(target);

    block_job_completed(&job->common, ret);
 }
@@ -376,7 +370,7 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target,
        return;
    }

-    BackupBlockJob *job = block_job_create(&backup_job_driver, bs, speed,
+    BackupBlockJob *job = block_job_create(&backup_job_type, bs, speed,
                                           cb, opaque, errp);
    if (!job) {
        return;
--- a/block/blkdebug.c
+++ b/block/blkdebug.c
@@ -168,7 +168,6 @@ static const char *event_names[BLKDBG_EVENT_MAX] = {

    [BLKDBG_REFTABLE_LOAD]                  = "reftable_load",
    [BLKDBG_REFTABLE_GROW]                  = "reftable_grow",
-    [BLKDBG_REFTABLE_UPDATE]                = "reftable_update",

    [BLKDBG_REFBLOCK_LOAD]                  = "refblock_load",
    [BLKDBG_REFBLOCK_UPDATE]                = "refblock_update",
@@ -186,14 +185,6 @@ static const char *event_names[BLKDBG_EVENT_MAX] = {

    [BLKDBG_FLUSH_TO_OS]                    = "flush_to_os",
    [BLKDBG_FLUSH_TO_DISK]                  = "flush_to_disk",
-
-    [BLKDBG_PWRITEV_RMW_HEAD]               = "pwritev_rmw.head",
-    [BLKDBG_PWRITEV_RMW_AFTER_HEAD]         = "pwritev_rmw.after_head",
-    [BLKDBG_PWRITEV_RMW_TAIL]               = "pwritev_rmw.tail",
-    [BLKDBG_PWRITEV_RMW_AFTER_TAIL]         = "pwritev_rmw.after_tail",
-    [BLKDBG_PWRITEV]                        = "pwritev",
-    [BLKDBG_PWRITEV_ZERO]                   = "pwritev_zero",
-    [BLKDBG_PWRITEV_DONE]                   = "pwritev_done",
 };

 static int get_event_by_name(const char *name, BlkDebugEvent *event)
@@ -279,33 +270,19 @@ static void remove_rule(BlkdebugRule *rule)
    g_free(rule);
 }

-static int read_config(BDRVBlkdebugState *s, const char *filename,
-                       QDict *options, Error **errp)
+static int read_config(BDRVBlkdebugState *s, const char *filename)
 {
-    FILE *f = NULL;
+    FILE *f;
    int ret;
    struct add_rule_data d;
-    Error *local_err = NULL;

-    if (filename) {
-        f = fopen(filename, "r");
-        if (f == NULL) {
-            error_setg_errno(errp, errno, "Could not read blkdebug config file");
-            return -errno;
-        }
-
-        ret = qemu_config_parse(f, config_groups, filename);
-        if (ret < 0) {
-            error_setg(errp, "Could not parse blkdebug config file");
-            ret = -EINVAL;
-            goto fail;
-        }
+    f = fopen(filename, "r");
+    if (f == NULL) {
+        return -errno;
    }

-    qemu_config_parse_qdict(options, config_groups, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        ret = -EINVAL;
+    ret = qemu_config_parse(f, config_groups, filename);
+    if (ret < 0) {
        goto fail;
    }

@@ -320,9 +297,7 @@ static int read_config(BDRVBlkdebugState *s, const char *filename,
 fail:
    qemu_opts_reset(&inject_error_opts);
    qemu_opts_reset(&set_state_opts);
-    if (f) {
-        fclose(f);
-    }
+    fclose(f);
    return ret;
 }

@@ -334,9 +309,7 @@ static void blkdebug_parse_filename(const char *filename, QDict *options,

    /* Parse the blkdebug: prefix */
    if (!strstart(filename, "blkdebug:", &filename)) {
-        /* There was no prefix; therefore, all options have to be already
-           present in the QDict (except for the filename) */
-        qdict_put(options, "x-image", qstring_from_str(filename));
+        error_setg(errp, "File name string must start with 'blkdebug:'");
        return;
    }

@@ -372,68 +345,53 @@ static QemuOptsList runtime_opts = {
            .type = QEMU_OPT_STRING,
            .help = "[internal use only, will be removed]",
        },
-        {
-            .name = "align",
-            .type = QEMU_OPT_SIZE,
-            .help = "Required alignment in bytes",
-        },
        { /* end of list */ }
    },
 };

-static int blkdebug_open(BlockDriverState *bs, QDict *options, int flags,
-                         Error **errp)
+static int blkdebug_open(BlockDriverState *bs, QDict *options, int flags)
 {
    BDRVBlkdebugState *s = bs->opaque;
    QemuOpts *opts;
    Error *local_err = NULL;
-    const char *config;
-    uint64_t align;
+    const char *filename, *config;
    int ret;

-    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
+    opts = qemu_opts_create_nofail(&runtime_opts);
    qemu_opts_absorb_qdict(opts, options, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
+    if (error_is_set(&local_err)) {
+        qerror_report_err(local_err);
+        error_free(local_err);
        ret = -EINVAL;
-        goto out;
+        goto fail;
    }

-    /* Read rules from config file or command line options */
+    /* Read rules from config file */
    config = qemu_opt_get(opts, "config");
-    ret = read_config(s, config, options, errp);
-    if (ret) {
-        goto out;
+    if (config) {
+        ret = read_config(s, config);
+        if (ret < 0) {
+            goto fail;
+        }
    }

    /* Set initial state */
    s->state = 1;

    /* Open the backing file */
-    assert(bs->file == NULL);
-    ret = bdrv_open_image(&bs->file, qemu_opt_get(opts, "x-image"), options, "image",
-                          flags | BDRV_O_PROTOCOL, false, &local_err);
-    if (ret < 0) {
-        error_propagate(errp, local_err);
-        goto out;
+    filename = qemu_opt_get(opts, "x-image");
+    if (filename == NULL) {
+        ret = -EINVAL;
+        goto fail;
    }

-    /* Set request alignment */
-    align = qemu_opt_get_size(opts, "align", bs->request_alignment);
-    if (align > 0 && align < INT_MAX && !(align & (align - 1))) {
-        bs->request_alignment = align;
-    } else {
-        error_setg(errp, "Invalid alignment");
-        ret = -EINVAL;
-        goto fail_unref;
+    ret = bdrv_file_open(&bs->file, filename, NULL, flags);
+    if (ret < 0) {
+        goto fail;
    }

    ret = 0;
-    goto out;
-
-fail_unref:
-    bdrv_unref(bs->file);
-out:
+fail:
    qemu_opts_del(opts);
    return ret;
 }
@@ -632,9 +590,9 @@ static int blkdebug_debug_breakpoint(BlockDriverState *bs, const char *event,
 static int blkdebug_debug_resume(BlockDriverState *bs, const char *tag)
 {
    BDRVBlkdebugState *s = bs->opaque;
-    BlkdebugSuspendedReq *r, *next;
+    BlkdebugSuspendedReq *r;

-    QLIST_FOREACH_SAFE(r, &s->suspended_reqs, next, next) {
+    QLIST_FOREACH(r, &s->suspended_reqs, next) {
        if (!strcmp(r->tag, tag)) {
            qemu_coroutine_enter(r->co, NULL);
            return 0;
@@ -643,31 +601,6 @@ static int blkdebug_debug_resume(BlockDriverState *bs, const char *tag)
    return -ENOENT;
 }

-static int blkdebug_debug_remove_breakpoint(BlockDriverState *bs,
-                                            const char *tag)
-{
-    BDRVBlkdebugState *s = bs->opaque;
-    BlkdebugSuspendedReq *r, *r_next;
-    BlkdebugRule *rule, *next;
-    int i, ret = -ENOENT;
-
-    for (i = 0; i < BLKDBG_EVENT_MAX; i++) {
-        QLIST_FOREACH_SAFE(rule, &s->rules[i], next, next) {
-            if (rule->action == ACTION_SUSPEND &&
-                !strcmp(rule->options.suspend.tag, tag)) {
-                remove_rule(rule);
-                ret = 0;
-            }
-        }
-    }
-    QLIST_FOREACH_SAFE(r, &s->suspended_reqs, next, r_next) {
-        if (!strcmp(r->tag, tag)) {
-            qemu_coroutine_enter(r->co, NULL);
-            ret = 0;
-        }
-    }
-    return ret;
-}

 static bool blkdebug_debug_is_suspended(BlockDriverState *bs, const char *tag)
 {
@@ -702,8 +635,6 @@ static BlockDriver bdrv_blkdebug = {

    .bdrv_debug_event           = blkdebug_debug_event,
    .bdrv_debug_breakpoint      = blkdebug_debug_breakpoint,
-    .bdrv_debug_remove_breakpoint
-                                = blkdebug_debug_remove_breakpoint,
    .bdrv_debug_resume          = blkdebug_debug_resume,
    .bdrv_debug_is_suspended    = blkdebug_debug_is_suspended,
 };
--- a/block/blkverify.c
+++ b/block/blkverify.c
@@ -78,9 +78,7 @@ static void blkverify_parse_filename(const char *filename, QDict *options,

    /* Parse the blkverify: prefix */
    if (!strstart(filename, "blkverify:", &filename)) {
-        /* There was no prefix; therefore, all options have to be already
-           present in the QDict (except for the filename) */
-        qdict_put(options, "x-image", qstring_from_str(filename));
+        error_setg(errp, "File name string must start with 'blkverify:'");
        return;
    }

@@ -118,37 +116,46 @@ static QemuOptsList runtime_opts = {
    },
 };

-static int blkverify_open(BlockDriverState *bs, QDict *options, int flags,
-                          Error **errp)
+static int blkverify_open(BlockDriverState *bs, QDict *options, int flags)
 {
    BDRVBlkverifyState *s = bs->opaque;
    QemuOpts *opts;
    Error *local_err = NULL;
+    const char *filename, *raw;
    int ret;

-    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
+    opts = qemu_opts_create_nofail(&runtime_opts);
    qemu_opts_absorb_qdict(opts, options, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
+    if (error_is_set(&local_err)) {
+        qerror_report_err(local_err);
+        error_free(local_err);
        ret = -EINVAL;
        goto fail;
    }

-    /* Open the raw file */
-    assert(bs->file == NULL);
-    ret = bdrv_open_image(&bs->file, qemu_opt_get(opts, "x-raw"), options,
-                          "raw", flags | BDRV_O_PROTOCOL, false, &local_err);
+    /* Parse the raw image filename */
+    raw = qemu_opt_get(opts, "x-raw");
+    if (raw == NULL) {
+        ret = -EINVAL;
+        goto fail;
+    }
+
+    ret = bdrv_file_open(&bs->file, raw, NULL, flags);
    if (ret < 0) {
-        error_propagate(errp, local_err);
        goto fail;
    }

    /* Open the test file */
-    assert(s->test_file == NULL);
-    ret = bdrv_open_image(&s->test_file, qemu_opt_get(opts, "x-image"), options,
-                          "test", flags, false, &local_err);
+    filename = qemu_opt_get(opts, "x-image");
+    if (filename == NULL) {
+        ret = -EINVAL;
+        goto fail;
+    }
+
+    s->test_file = bdrv_new("");
+    ret = bdrv_open(s->test_file, filename, NULL, flags, NULL);
    if (ret < 0) {
-        error_propagate(errp, local_err);
+        bdrv_delete(s->test_file);
        s->test_file = NULL;
        goto fail;
    }
@@ -162,7 +169,7 @@ static void blkverify_close(BlockDriverState *bs)
 {
    BDRVBlkverifyState *s = bs->opaque;

-    bdrv_unref(s->test_file);
+    bdrv_delete(s->test_file);
    s->test_file = NULL;
 }

@@ -173,6 +180,110 @@ static int64_t blkverify_getlength(BlockDriverState *bs)
    return bdrv_getlength(s->test_file);
 }

+/**
+ * Check that I/O vector contents are identical
+ *
+ * @a:          I/O vector
+ * @b:          I/O vector
+ * @ret:        Offset to first mismatching byte or -1 if match
+ */
+static ssize_t blkverify_iovec_compare(QEMUIOVector *a, QEMUIOVector *b)
+{
+    int i;
+    ssize_t offset = 0;
+
+    assert(a->niov == b->niov);
+    for (i = 0; i < a->niov; i++) {
+        size_t len = 0;
+        uint8_t *p = (uint8_t *)a->iov[i].iov_base;
+        uint8_t *q = (uint8_t *)b->iov[i].iov_base;
+
+        assert(a->iov[i].iov_len == b->iov[i].iov_len);
+        while (len < a->iov[i].iov_len && *p++ == *q++) {
+            len++;
+        }
+
+        offset += len;
+
+        if (len != a->iov[i].iov_len) {
+            return offset;
+        }
+    }
+    return -1;
+}
+
+typedef struct {
+    int src_index;
+    struct iovec *src_iov;
+    void *dest_base;
+} IOVectorSortElem;
+
+static int sortelem_cmp_src_base(const void *a, const void *b)
+{
+    const IOVectorSortElem *elem_a = a;
+    const IOVectorSortElem *elem_b = b;
+
+    /* Don't overflow */
+    if (elem_a->src_iov->iov_base < elem_b->src_iov->iov_base) {
+        return -1;
+    } else if (elem_a->src_iov->iov_base > elem_b->src_iov->iov_base) {
+        return 1;
+    } else {
+        return 0;
+    }
+}
+
+static int sortelem_cmp_src_index(const void *a, const void *b)
+{
+    const IOVectorSortElem *elem_a = a;
+    const IOVectorSortElem *elem_b = b;
+
+    return elem_a->src_index - elem_b->src_index;
+}
+
+/**
+ * Copy contents of I/O vector
+ *
+ * The relative relationships of overlapping iovecs are preserved.  This is
+ * necessary to ensure identical semantics in the cloned I/O vector.
+ */
+static void blkverify_iovec_clone(QEMUIOVector *dest, const QEMUIOVector *src,
+                                  void *buf)
+{
+    IOVectorSortElem sortelems[src->niov];
+    void *last_end;
+    int i;
+
+    /* Sort by source iovecs by base address */
+    for (i = 0; i < src->niov; i++) {
+        sortelems[i].src_index = i;
+        sortelems[i].src_iov = &src->iov[i];
+    }
+    qsort(sortelems, src->niov, sizeof(sortelems[0]), sortelem_cmp_src_base);
+
+    /* Allocate buffer space taking into account overlapping iovecs */
+    last_end = NULL;
+    for (i = 0; i < src->niov; i++) {
+        struct iovec *cur = sortelems[i].src_iov;
+        ptrdiff_t rewind = 0;
+
+        /* Detect overlap */
+        if (last_end && last_end > cur->iov_base) {
+            rewind = last_end - cur->iov_base;
+        }
+
+        sortelems[i].dest_base = buf - rewind;
+        buf += cur->iov_len - MIN(rewind, cur->iov_len);
+        last_end = MAX(cur->iov_base + cur->iov_len, last_end);
+    }
+
+    /* Sort by source iovec index and build destination iovec */
+    qsort(sortelems, src->niov, sizeof(sortelems[0]), sortelem_cmp_src_index);
+    for (i = 0; i < src->niov; i++) {
+        qemu_iovec_add(dest, sortelems[i].dest_base, src->iov[i].iov_len);
+    }
+}
+
 static BlkverifyAIOCB *blkverify_aio_get(BlockDriverState *bs, bool is_write,
                                         int64_t sector_num, QEMUIOVector *qiov,
                                         int nb_sectors,
@@ -236,7 +347,7 @@ static void blkverify_aio_cb(void *opaque, int ret)

 static void blkverify_verify_readv(BlkverifyAIOCB *acb)
 {
-    ssize_t offset = qemu_iovec_compare(acb->qiov, &acb->raw_qiov);
+    ssize_t offset = blkverify_iovec_compare(acb->qiov, &acb->raw_qiov);
    if (offset != -1) {
        blkverify_err(acb, "contents mismatch in sector %" PRId64,
                      acb->sector_num + (int64_t)(offset / BDRV_SECTOR_SIZE));
@@ -254,7 +365,7 @@ static BlockDriverAIOCB *blkverify_aio_readv(BlockDriverState *bs,
    acb->verify = blkverify_verify_readv;
    acb->buf = qemu_blockalign(bs->file, qiov->size);
    qemu_iovec_init(&acb->raw_qiov, acb->qiov->niov);
-    qemu_iovec_clone(&acb->raw_qiov, qiov, acb->buf);
+    blkverify_iovec_clone(&acb->raw_qiov, qiov, acb->buf);

    bdrv_aio_readv(s->test_file, sector_num, qiov, nb_sectors,
                   blkverify_aio_cb, acb);
@@ -288,20 +399,6 @@ static BlockDriverAIOCB *blkverify_aio_flush(BlockDriverState *bs,
    return bdrv_aio_flush(s->test_file, cb, opaque);
 }

-static bool blkverify_recurse_is_first_non_filter(BlockDriverState *bs,
-                                                  BlockDriverState *candidate)
-{
-    BDRVBlkverifyState *s = bs->opaque;
-
-    bool perm = bdrv_recurse_is_first_non_filter(bs->file, candidate);
-
-    if (perm) {
-        return true;
-    }
-
-    return bdrv_recurse_is_first_non_filter(s->test_file, candidate);
-}
-
 static BlockDriver bdrv_blkverify = {
    .format_name            = "blkverify",
    .protocol_name          = "blkverify",
@@ -315,9 +412,6 @@ static BlockDriver bdrv_blkverify = {
    .bdrv_aio_readv         = blkverify_aio_readv,
    .bdrv_aio_writev        = blkverify_aio_writev,
    .bdrv_aio_flush         = blkverify_aio_flush,
-
-    .is_filter              = true,
-    .bdrv_recurse_is_first_non_filter = blkverify_recurse_is_first_non_filter,
 };

 static void bdrv_blkverify_init(void)
--- a/block/bochs.c
+++ b/block/bochs.c
@@ -39,41 +39,56 @@
 // not allocated: 0xffffffff

 // always little-endian
-struct bochs_header {
-    char magic[32];     /* "Bochs Virtual HD Image" */
-    char type[16];      /* "Redolog" */
-    char subtype[16];   /* "Undoable" / "Volatile" / "Growing" */
+struct bochs_header_v1 {
+    char magic[32]; // "Bochs Virtual HD Image"
+    char type[16]; // "Redolog"
+    char subtype[16]; // "Undoable" / "Volatile" / "Growing"
    uint32_t version;
-    uint32_t header;    /* size of header */
-
-    uint32_t catalog;   /* num of entries */
-    uint32_t bitmap;    /* bitmap size */
-    uint32_t extent;    /* extent size */
+    uint32_t header; // size of header

    union {
-        struct {
-            uint32_t reserved;  /* for ??? */
-            uint64_t disk;      /* disk size */
-            char padding[HEADER_SIZE - 64 - 20 - 12];
-        } QEMU_PACKED redolog;
-        struct {
-            uint64_t disk;      /* disk size */
-            char padding[HEADER_SIZE - 64 - 20 - 8];
-        } QEMU_PACKED redolog_v1;
-        char padding[HEADER_SIZE - 64 - 20];
+	struct {
+	    uint32_t catalog; // num of entries
+	    uint32_t bitmap; // bitmap size
+	    uint32_t extent; // extent size
+	    uint64_t disk; // disk size
+	    char padding[HEADER_SIZE - 64 - 8 - 20];
+	} redolog;
+	char padding[HEADER_SIZE - 64 - 8];
    } extra;
-} QEMU_PACKED;
+};
+
+// always little-endian
+struct bochs_header {
+    char magic[32]; // "Bochs Virtual HD Image"
+    char type[16]; // "Redolog"
+    char subtype[16]; // "Undoable" / "Volatile" / "Growing"
+    uint32_t version;
+    uint32_t header; // size of header
+
+    union {
+	struct {
+	    uint32_t catalog; // num of entries
+	    uint32_t bitmap; // bitmap size
+	    uint32_t extent; // extent size
+	    uint32_t reserved; // for ???
+	    uint64_t disk; // disk size
+	    char padding[HEADER_SIZE - 64 - 8 - 24];
+	} redolog;
+	char padding[HEADER_SIZE - 64 - 8];
+    } extra;
+};

 typedef struct BDRVBochsState {
    CoMutex lock;
    uint32_t *catalog_bitmap;
-    uint32_t catalog_size;
+    int catalog_size;

-    uint32_t data_offset;
+    int data_offset;

-    uint32_t bitmap_blocks;
-    uint32_t extent_blocks;
-    uint32_t extent_size;
+    int bitmap_blocks;
+    int extent_blocks;
+    int extent_size;
 } BDRVBochsState;

 static int bochs_probe(const uint8_t *buf, int buf_size, const char *filename)
@@ -93,12 +108,12 @@ static int bochs_probe(const uint8_t *buf, int buf_size, const char *filename)
    return 0;
 }

-static int bochs_open(BlockDriverState *bs, QDict *options, int flags,
-                      Error **errp)
+static int bochs_open(BlockDriverState *bs, QDict *options, int flags)
 {
    BDRVBochsState *s = bs->opaque;
-    uint32_t i;
+    int i;
    struct bochs_header bochs;
+    struct bochs_header_v1 header_v1;
    int ret;

    bs->read_only = 1; // no write support yet
@@ -113,24 +128,17 @@ static int bochs_open(BlockDriverState *bs, QDict *options, int flags,
        strcmp(bochs.subtype, GROWING_TYPE) ||
 	((le32_to_cpu(bochs.version) != HEADER_VERSION) &&
 	(le32_to_cpu(bochs.version) != HEADER_V1))) {
-        error_setg(errp, "Image not in Bochs format");
-        return -EINVAL;
+        return -EMEDIUMTYPE;
    }

    if (le32_to_cpu(bochs.version) == HEADER_V1) {
-        bs->total_sectors = le64_to_cpu(bochs.extra.redolog_v1.disk) / 512;
+      memcpy(&header_v1, &bochs, sizeof(bochs));
+      bs->total_sectors = le64_to_cpu(header_v1.extra.redolog.disk) / 512;
    } else {
-        bs->total_sectors = le64_to_cpu(bochs.extra.redolog.disk) / 512;
-    }
-
-    /* Limit to 1M entries to avoid unbounded allocation. This is what is
-     * needed for the largest image that bximage can create (~8 TB). */
-    s->catalog_size = le32_to_cpu(bochs.catalog);
-    if (s->catalog_size > 0x100000) {
-        error_setg(errp, "Catalog size is too large");
-        return -EFBIG;
+      bs->total_sectors = le64_to_cpu(bochs.extra.redolog.disk) / 512;
    }

+    s->catalog_size = le32_to_cpu(bochs.extra.redolog.catalog);
    s->catalog_bitmap = g_malloc(s->catalog_size * 4);

    ret = bdrv_pread(bs->file, le32_to_cpu(bochs.header), s->catalog_bitmap,
@@ -144,24 +152,10 @@ static int bochs_open(BlockDriverState *bs, QDict *options, int flags,

    s->data_offset = le32_to_cpu(bochs.header) + (s->catalog_size * 4);

-    s->bitmap_blocks = 1 + (le32_to_cpu(bochs.bitmap) - 1) / 512;
-    s->extent_blocks = 1 + (le32_to_cpu(bochs.extent) - 1) / 512;
+    s->bitmap_blocks = 1 + (le32_to_cpu(bochs.extra.redolog.bitmap) - 1) / 512;
+    s->extent_blocks = 1 + (le32_to_cpu(bochs.extra.redolog.extent) - 1) / 512;

-    s->extent_size = le32_to_cpu(bochs.extent);
-    if (s->extent_size == 0) {
-        error_setg(errp, "Extent size may not be zero");
-        return -EINVAL;
-    } else if (s->extent_size > 0x800000) {
-        error_setg(errp, "Extent size %" PRIu32 " is too large",
-                   s->extent_size);
-        return -EINVAL;
-    }
-
-    if (s->catalog_size < bs->total_sectors / s->extent_size) {
-        error_setg(errp, "Catalog size is too small for this disk size");
-        ret = -EINVAL;
-        goto fail;
-    }
+    s->extent_size = le32_to_cpu(bochs.extra.redolog.extent);

    qemu_co_mutex_init(&s->lock);
    return 0;
@@ -174,8 +168,8 @@ fail:
 static int64_t seek_to_sector(BlockDriverState *bs, int64_t sector_num)
 {
    BDRVBochsState *s = bs->opaque;
-    uint64_t offset = sector_num * 512;
-    uint64_t extent_index, extent_offset, bitmap_offset;
+    int64_t offset = sector_num * 512;
+    int64_t extent_index, extent_offset, bitmap_offset;
    char bitmap_entry;

    // seek to sector
@@ -186,9 +180,8 @@ static int64_t seek_to_sector(BlockDriverState *bs, int64_t sector_num)
 	return -1; /* not allocated */
    }

-    bitmap_offset = s->data_offset +
-        (512 * (uint64_t) s->catalog_bitmap[extent_index] *
-        (s->extent_blocks + s->bitmap_blocks));
+    bitmap_offset = s->data_offset + (512 * s->catalog_bitmap[extent_index] *
+	(s->extent_blocks + s->bitmap_blocks));

    /* read in bitmap for current extent */
    if (bdrv_pread(bs->file, bitmap_offset + (extent_offset / 8),
--- a/block/cloop.c
+++ b/block/cloop.c
@@ -26,9 +26,6 @@
 #include "qemu/module.h"
 #include <zlib.h>

-/* Maximum compressed block size */
-#define MAX_BLOCK_SIZE (64 * 1024 * 1024)
-
 typedef struct BDRVCloopState {
    CoMutex lock;
    uint32_t block_size;
@@ -56,8 +53,7 @@ static int cloop_probe(const uint8_t *buf, int buf_size, const char *filename)
    return 0;
 }

-static int cloop_open(BlockDriverState *bs, QDict *options, int flags,
-                      Error **errp)
+static int cloop_open(BlockDriverState *bs, QDict *options, int flags)
 {
    BDRVCloopState *s = bs->opaque;
    uint32_t offsets_size, max_compressed_block_size = 1, i;
@@ -71,26 +67,6 @@ static int cloop_open(BlockDriverState *bs, QDict *options, int flags,
        return ret;
    }
    s->block_size = be32_to_cpu(s->block_size);
-    if (s->block_size % 512) {
-        error_setg(errp, "block_size %u must be a multiple of 512",
-                   s->block_size);
-        return -EINVAL;
-    }
-    if (s->block_size == 0) {
-        error_setg(errp, "block_size cannot be zero");
-        return -EINVAL;
-    }
-
-    /* cloop's create_compressed_fs.c warns about block sizes beyond 256 KB but
-     * we can accept more.  Prevent ridiculous values like 4 GB - 1 since we
-     * need a buffer this big.
-     */
-    if (s->block_size > MAX_BLOCK_SIZE) {
-        error_setg(errp, "block_size %u must be %u MB or less",
-                   s->block_size,
-                   MAX_BLOCK_SIZE / (1024 * 1024));
-        return -EINVAL;
-    }

    ret = bdrv_pread(bs->file, 128 + 4, &s->n_blocks, 4);
    if (ret < 0) {
@@ -99,23 +75,7 @@ static int cloop_open(BlockDriverState *bs, QDict *options, int flags,
    s->n_blocks = be32_to_cpu(s->n_blocks);

    /* read offsets */
-    if (s->n_blocks > (UINT32_MAX - 1) / sizeof(uint64_t)) {
-        /* Prevent integer overflow */
-        error_setg(errp, "n_blocks %u must be %zu or less",
-                   s->n_blocks,
-                   (UINT32_MAX - 1) / sizeof(uint64_t));
-        return -EINVAL;
-    }
-    offsets_size = (s->n_blocks + 1) * sizeof(uint64_t);
-    if (offsets_size > 512 * 1024 * 1024) {
-        /* Prevent ridiculous offsets_size which causes memory allocation to
-         * fail or overflows bdrv_pread() size.  In practice the 512 MB
-         * offsets[] limit supports 16 TB images at 256 KB block size.
-         */
-        error_setg(errp, "image requires too many offsets, "
-                   "try increasing block size");
-        return -EINVAL;
-    }
+    offsets_size = s->n_blocks * sizeof(uint64_t);
    s->offsets = g_malloc(offsets_size);

    ret = bdrv_pread(bs->file, 128 + 4 + 4, s->offsets, offsets_size);
@@ -123,37 +83,13 @@ static int cloop_open(BlockDriverState *bs, QDict *options, int flags,
        goto fail;
    }

-    for (i = 0; i < s->n_blocks + 1; i++) {
-        uint64_t size;
-
+    for(i=0;i<s->n_blocks;i++) {
        s->offsets[i] = be64_to_cpu(s->offsets[i]);
-        if (i == 0) {
-            continue;
-        }
-
-        if (s->offsets[i] < s->offsets[i - 1]) {
-            error_setg(errp, "offsets not monotonically increasing at "
-                       "index %u, image file is corrupt", i);
-            ret = -EINVAL;
-            goto fail;
-        }
-
-        size = s->offsets[i] - s->offsets[i - 1];
-
-        /* Compressed blocks should be smaller than the uncompressed block size
-         * but maybe compression performed poorly so the compressed block is
-         * actually bigger.  Clamp down on unrealistic values to prevent
-         * ridiculous s->compressed_block allocation.
-         */
-        if (size > 2 * MAX_BLOCK_SIZE) {
-            error_setg(errp, "invalid compressed block size at index %u, "
-                       "image file is corrupt", i);
-            ret = -EINVAL;
-            goto fail;
-        }
-
-        if (size > max_compressed_block_size) {
-            max_compressed_block_size = size;
+        if (i > 0) {
+            uint32_t size = s->offsets[i] - s->offsets[i - 1];
+            if (size > max_compressed_block_size) {
+                max_compressed_block_size = size;
+            }
        }
    }

@@ -243,7 +179,9 @@ static coroutine_fn int cloop_co_read(BlockDriverState *bs, int64_t sector_num,
 static void cloop_close(BlockDriverState *bs)
 {
    BDRVCloopState *s = bs->opaque;
-    g_free(s->offsets);
+    if (s->n_blocks > 0) {
+        g_free(s->offsets);
+    }
    g_free(s->compressed_block);
    g_free(s->uncompressed_block);
    inflateEnd(&s->zstream);
--- a/block/commit.c
+++ b/block/commit.c
@@ -103,14 +103,14 @@ wait:
        /* Note that even when no rate limit is applied we need to yield
         * with no pending I/O here so that bdrv_drain_all() returns.
         */
-        block_job_sleep_ns(&s->common, QEMU_CLOCK_REALTIME, delay_ns);
+        block_job_sleep_ns(&s->common, rt_clock, delay_ns);
        if (block_job_is_cancelled(&s->common)) {
            break;
        }
        /* Copy if allocated above the base */
-        ret = bdrv_is_allocated_above(top, base, sector_num,
-                                      COMMIT_BUFFER_SIZE / BDRV_SECTOR_SIZE,
-                                      &n);
+        ret = bdrv_co_is_allocated_above(top, base, sector_num,
+                                         COMMIT_BUFFER_SIZE / BDRV_SECTOR_SIZE,
+                                         &n);
        copy = (ret == 1);
        trace_commit_one_iteration(s, sector_num, n, ret);
        if (copy) {
@@ -173,9 +173,9 @@ static void commit_set_speed(BlockJob *job, int64_t speed, Error **errp)
    ratelimit_set_speed(&s->limit, speed / BDRV_SECTOR_SIZE, SLICE_TIME);
 }

-static const BlockJobDriver commit_job_driver = {
+static const BlockJobType commit_job_type = {
    .instance_size = sizeof(CommitBlockJob),
-    .job_type      = BLOCK_JOB_TYPE_COMMIT,
+    .job_type      = "commit",
    .set_speed     = commit_set_speed,
 };

@@ -198,7 +198,13 @@ void commit_start(BlockDriverState *bs, BlockDriverState *base,
        return;
    }

-    assert(top != bs);
+    /* Once we support top == active layer, remove this check */
+    if (top == bs) {
+        error_setg(errp,
+                   "Top image as the active layer is currently unsupported");
+        return;
+    }
+
    if (top == base) {
        error_setg(errp, "Invalid files for merge: top and base are the same");
        return;
@@ -232,7 +238,7 @@ void commit_start(BlockDriverState *bs, BlockDriverState *base,
    }


-    s = block_job_create(&commit_job_driver, bs, speed, cb, opaque, errp);
+    s = block_job_create(&commit_job_type, bs, speed, cb, opaque, errp);
    if (!s) {
        return;
    }
--- a/block/cow.c
+++ b/block/cow.c
@@ -58,8 +58,7 @@ static int cow_probe(const uint8_t *buf, int buf_size, const char *filename)
        return 0;
 }

-static int cow_open(BlockDriverState *bs, QDict *options, int flags,
-                    Error **errp)
+static int cow_open(BlockDriverState *bs, QDict *options, int flags)
 {
    BDRVCowState *s = bs->opaque;
    struct cow_header_v2 cow_header;
@@ -74,8 +73,7 @@ static int cow_open(BlockDriverState *bs, QDict *options, int flags,
    }

    if (be32_to_cpu(cow_header.magic) != COW_MAGIC) {
-        error_setg(errp, "Image not in COW format");
-        ret = -EINVAL;
+        ret = -EMEDIUMTYPE;
        goto fail;
    }

@@ -83,7 +81,7 @@ static int cow_open(BlockDriverState *bs, QDict *options, int flags,
        char version[64];
        snprintf(version, sizeof(version),
               "COW version %d", cow_header.version);
-        error_set(errp, QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
+        qerror_report(QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
            bs->device_name, "cow", version);
        ret = -ENOTSUP;
        goto fail;
@@ -104,45 +102,42 @@ static int cow_open(BlockDriverState *bs, QDict *options, int flags,
    return ret;
 }

-static inline void cow_set_bits(uint8_t *bitmap, int start, int64_t nb_sectors)
+/*
+ * XXX(hch): right now these functions are extremely inefficient.
+ * We should just read the whole bitmap we'll need in one go instead.
+ */
+static inline int cow_set_bit(BlockDriverState *bs, int64_t bitnum)
 {
-    int64_t bitnum = start, last = start + nb_sectors;
-    while (bitnum < last) {
-        if ((bitnum & 7) == 0 && bitnum + 8 <= last) {
-            bitmap[bitnum / 8] = 0xFF;
-            bitnum += 8;
-            continue;
-        }
-        bitmap[bitnum/8] |= (1 << (bitnum % 8));
-        bitnum++;
+    uint64_t offset = sizeof(struct cow_header_v2) + bitnum / 8;
+    uint8_t bitmap;
+    int ret;
+
+    ret = bdrv_pread(bs->file, offset, &bitmap, sizeof(bitmap));
+    if (ret < 0) {
+       return ret;
    }
+
+    bitmap |= (1 << (bitnum % 8));
+
+    ret = bdrv_pwrite_sync(bs->file, offset, &bitmap, sizeof(bitmap));
+    if (ret < 0) {
+       return ret;
+    }
+    return 0;
 }

-#define BITS_PER_BITMAP_SECTOR (512 * 8)
-
-/* Cannot use bitmap.c on big-endian machines.  */
-static int cow_test_bit(int64_t bitnum, const uint8_t *bitmap)
+static inline int is_bit_set(BlockDriverState *bs, int64_t bitnum)
 {
-    return (bitmap[bitnum / 8] & (1 << (bitnum & 7))) != 0;
-}
+    uint64_t offset = sizeof(struct cow_header_v2) + bitnum / 8;
+    uint8_t bitmap;
+    int ret;

-static int cow_find_streak(const uint8_t *bitmap, int value, int start, int nb_sectors)
-{
-    int streak_value = value ? 0xFF : 0;
-    int last = MIN(start + nb_sectors, BITS_PER_BITMAP_SECTOR);
-    int bitnum = start;
-    while (bitnum < last) {
-        if ((bitnum & 7) == 0 && bitmap[bitnum / 8] == streak_value) {
-            bitnum += 8;
-            continue;
-        }
-        if (cow_test_bit(bitnum, bitmap) == value) {
-            bitnum++;
-            continue;
-        }
-        break;
+    ret = bdrv_pread(bs->file, offset, &bitmap, sizeof(bitmap));
+    if (ret < 0) {
+       return ret;
    }
-    return MIN(bitnum, last) - start;
+
+    return !!(bitmap & (1 << (bitnum % 8)));
 }

 /* Return true if first block has been changed (ie. current version is
@@ -151,100 +146,40 @@ static int cow_find_streak(const uint8_t *bitmap, int value, int start, int nb_s
 static int coroutine_fn cow_co_is_allocated(BlockDriverState *bs,
        int64_t sector_num, int nb_sectors, int *num_same)
 {
-    int64_t bitnum = sector_num + sizeof(struct cow_header_v2) * 8;
-    uint64_t offset = (bitnum / 8) & -BDRV_SECTOR_SIZE;
-    bool first = true;
-    int changed = 0, same = 0;
+    int changed;

-    do {
-        int ret;
-        uint8_t bitmap[BDRV_SECTOR_SIZE];
-
-        bitnum &= BITS_PER_BITMAP_SECTOR - 1;
-        int sector_bits = MIN(nb_sectors, BITS_PER_BITMAP_SECTOR - bitnum);
-
-        ret = bdrv_pread(bs->file, offset, &bitmap, sizeof(bitmap));
-        if (ret < 0) {
-            return ret;
-        }
-
-        if (first) {
-            changed = cow_test_bit(bitnum, bitmap);
-            first = false;
-        }
-
-        same += cow_find_streak(bitmap, changed, bitnum, nb_sectors);
-
-        bitnum += sector_bits;
-        nb_sectors -= sector_bits;
-        offset += BDRV_SECTOR_SIZE;
-    } while (nb_sectors);
-
-    *num_same = same;
-    return changed;
-}
-
-static int64_t coroutine_fn cow_co_get_block_status(BlockDriverState *bs,
-        int64_t sector_num, int nb_sectors, int *num_same)
-{
-    BDRVCowState *s = bs->opaque;
-    int ret = cow_co_is_allocated(bs, sector_num, nb_sectors, num_same);
-    int64_t offset = s->cow_sectors_offset + (sector_num << BDRV_SECTOR_BITS);
-    if (ret < 0) {
-        return ret;
+    if (nb_sectors == 0) {
+	*num_same = nb_sectors;
+	return 0;
    }
-    return (ret ? BDRV_BLOCK_DATA : 0) | offset | BDRV_BLOCK_OFFSET_VALID;
+
+    changed = is_bit_set(bs, sector_num);
+    if (changed < 0) {
+        return 0; /* XXX: how to return I/O errors? */
+    }
+
+    for (*num_same = 1; *num_same < nb_sectors; (*num_same)++) {
+	if (is_bit_set(bs, sector_num + *num_same) != changed)
+	    break;
+    }
+
+    return changed;
 }

 static int cow_update_bitmap(BlockDriverState *bs, int64_t sector_num,
        int nb_sectors)
 {
-    int64_t bitnum = sector_num + sizeof(struct cow_header_v2) * 8;
-    uint64_t offset = (bitnum / 8) & -BDRV_SECTOR_SIZE;
-    bool first = true;
-    int sector_bits;
+    int error = 0;
+    int i;

-    for ( ; nb_sectors;
-            bitnum += sector_bits,
-            nb_sectors -= sector_bits,
-            offset += BDRV_SECTOR_SIZE) {
-        int ret, set;
-        uint8_t bitmap[BDRV_SECTOR_SIZE];
-
-        bitnum &= BITS_PER_BITMAP_SECTOR - 1;
-        sector_bits = MIN(nb_sectors, BITS_PER_BITMAP_SECTOR - bitnum);
-
-        ret = bdrv_pread(bs->file, offset, &bitmap, sizeof(bitmap));
-        if (ret < 0) {
-            return ret;
-        }
-
-        /* Skip over any already set bits */
-        set = cow_find_streak(bitmap, 1, bitnum, sector_bits);
-        bitnum += set;
-        sector_bits -= set;
-        nb_sectors -= set;
-        if (!sector_bits) {
-            continue;
-        }
-
-        if (first) {
-            ret = bdrv_flush(bs->file);
-            if (ret < 0) {
-                return ret;
-            }
-            first = false;
-        }
-
-        cow_set_bits(bitmap, bitnum, sector_bits);
-
-        ret = bdrv_pwrite(bs->file, offset, &bitmap, sizeof(bitmap));
-        if (ret < 0) {
-            return ret;
+    for (i = 0; i < nb_sectors; i++) {
+        error = cow_set_bit(bs, sector_num + i);
+        if (error) {
+            break;
        }
    }

-    return 0;
+    return error;
 }

 static int coroutine_fn cow_read(BlockDriverState *bs, int64_t sector_num,
@@ -254,7 +189,7 @@ static int coroutine_fn cow_read(BlockDriverState *bs, int64_t sector_num,
    int ret, n;

    while (nb_sectors > 0) {
-        ret = cow_co_is_allocated(bs, sector_num, nb_sectors, &n);
+        ret = bdrv_co_is_allocated(bs, sector_num, nb_sectors, &n);
        if (ret < 0) {
            return ret;
        }
@@ -324,14 +259,12 @@ static void cow_close(BlockDriverState *bs)
 {
 }

-static int cow_create(const char *filename, QEMUOptionParameter *options,
-                      Error **errp)
+static int cow_create(const char *filename, QEMUOptionParameter *options)
 {
    struct cow_header_v2 cow_header;
    struct stat st;
    int64_t image_sectors = 0;
    const char *image_filename = NULL;
-    Error *local_err = NULL;
    int ret;
    BlockDriverState *cow_bs;

@@ -345,17 +278,13 @@ static int cow_create(const char *filename, QEMUOptionParameter *options,
        options++;
    }

-    ret = bdrv_create_file(filename, options, &local_err);
+    ret = bdrv_create_file(filename, options);
    if (ret < 0) {
-        error_propagate(errp, local_err);
        return ret;
    }

-    cow_bs = NULL;
-    ret = bdrv_open(&cow_bs, filename, NULL, NULL,
-                    BDRV_O_RDWR | BDRV_O_PROTOCOL, NULL, &local_err);
+    ret = bdrv_file_open(&cow_bs, filename, NULL, BDRV_O_RDWR);
    if (ret < 0) {
-        error_propagate(errp, local_err);
        return ret;
    }

@@ -389,7 +318,7 @@ static int cow_create(const char *filename, QEMUOptionParameter *options,
    }

 exit:
-    bdrv_unref(cow_bs);
+    bdrv_delete(cow_bs);
    return ret;
 }

@@ -419,7 +348,7 @@ static BlockDriver bdrv_cow = {

    .bdrv_read              = cow_co_read,
    .bdrv_write             = cow_co_write,
-    .bdrv_co_get_block_status   = cow_co_get_block_status,
+    .bdrv_co_is_allocated   = cow_co_is_allocated,

    .create_options = cow_create_options,
 };
--- a/block/curl.c
+++ b/block/curl.c
@@ -34,11 +34,6 @@
 #define DPRINTF(fmt, ...) do { } while (0)
 #endif

-#if LIBCURL_VERSION_NUM >= 0x071000
-/* The multi interface timer callback was introduced in 7.16.0 */
-#define NEED_CURL_TIMER_CALLBACK
-#endif
-
 #define PROTOCOLS (CURLPROTO_HTTP | CURLPROTO_HTTPS | \
                   CURLPROTO_FTP | CURLPROTO_FTPS | \
                   CURLPROTO_TFTP)
@@ -82,7 +77,6 @@ typedef struct CURLState

 typedef struct BDRVCURLState {
    CURLM *multi;
-    QEMUTimer timer;
    size_t len;
    CURLState states[CURL_NUM_STATES];
    char *url;
@@ -92,23 +86,7 @@ typedef struct BDRVCURLState {

 static void curl_clean_state(CURLState *s);
 static void curl_multi_do(void *arg);
-
-#ifdef NEED_CURL_TIMER_CALLBACK
-static int curl_timer_cb(CURLM *multi, long timeout_ms, void *opaque)
-{
-    BDRVCURLState *s = opaque;
-
-    DPRINTF("CURL: timer callback timeout_ms %ld\n", timeout_ms);
-    if (timeout_ms == -1) {
-        timer_del(&s->timer);
-    } else {
-        int64_t timeout_ns = (int64_t)timeout_ms * 1000 * 1000;
-        timer_mod(&s->timer,
-                  qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + timeout_ns);
-    }
-    return 0;
-}
-#endif
+static int curl_aio_flush(void *opaque);

 static int curl_sock_cb(CURL *curl, curl_socket_t fd, int action,
                        void *s, void *sp)
@@ -116,16 +94,17 @@ static int curl_sock_cb(CURL *curl, curl_socket_t fd, int action,
    DPRINTF("CURL (AIO): Sock action %d on fd %d\n", action, fd);
    switch (action) {
        case CURL_POLL_IN:
-            qemu_aio_set_fd_handler(fd, curl_multi_do, NULL, s);
+            qemu_aio_set_fd_handler(fd, curl_multi_do, NULL, curl_aio_flush, s);
            break;
        case CURL_POLL_OUT:
-            qemu_aio_set_fd_handler(fd, NULL, curl_multi_do, s);
+            qemu_aio_set_fd_handler(fd, NULL, curl_multi_do, curl_aio_flush, s);
            break;
        case CURL_POLL_INOUT:
-            qemu_aio_set_fd_handler(fd, curl_multi_do, curl_multi_do, s);
+            qemu_aio_set_fd_handler(fd, curl_multi_do, curl_multi_do,
+                                    curl_aio_flush, s);
            break;
        case CURL_POLL_REMOVE:
-            qemu_aio_set_fd_handler(fd, NULL, NULL, NULL);
+            qemu_aio_set_fd_handler(fd, NULL, NULL, NULL, NULL);
            break;
    }

@@ -157,11 +136,6 @@ static size_t curl_read_cb(void *ptr, size_t size, size_t nmemb, void *opaque)
    if (!s || !s->orig_buf)
        goto read_end;

-    if (s->buf_off >= s->buf_len) {
-        /* buffer full, read nothing */
-        return 0;
-    }
-    realsize = MIN(realsize, s->buf_len - s->buf_off);
    memcpy(s->orig_buf + s->buf_off, ptr, realsize);
    s->buf_off += realsize;

@@ -237,10 +211,20 @@ static int curl_find_buf(BDRVCURLState *s, size_t start, size_t len,
    return FIND_RET_NONE;
 }

-static void curl_multi_read(BDRVCURLState *s)
+static void curl_multi_do(void *arg)
 {
+    BDRVCURLState *s = (BDRVCURLState *)arg;
+    int running;
+    int r;
    int msgs_in_queue;

+    if (!s->multi)
+        return;
+
+    do {
+        r = curl_multi_socket_all(s->multi, &running);
+    } while(r == CURLM_CALL_MULTI_PERFORM);
+
    /* Try to find done transfers, so we can free the easy
     * handle again. */
    do {
@@ -284,41 +268,6 @@ static void curl_multi_read(BDRVCURLState *s)
    } while(msgs_in_queue);
 }

-static void curl_multi_do(void *arg)
-{
-    BDRVCURLState *s = (BDRVCURLState *)arg;
-    int running;
-    int r;
-
-    if (!s->multi) {
-        return;
-    }
-
-    do {
-        r = curl_multi_socket_all(s->multi, &running);
-    } while(r == CURLM_CALL_MULTI_PERFORM);
-
-    curl_multi_read(s);
-}
-
-static void curl_multi_timeout_do(void *arg)
-{
-#ifdef NEED_CURL_TIMER_CALLBACK
-    BDRVCURLState *s = (BDRVCURLState *)arg;
-    int running;
-
-    if (!s->multi) {
-        return;
-    }
-
-    curl_multi_socket_action(s->multi, CURL_SOCKET_TIMEOUT, 0, &running);
-
-    curl_multi_read(s);
-#else
-    abort();
-#endif
-}
-
 static CURLState *curl_init_state(BDRVCURLState *s)
 {
    CURLState *state = NULL;
@@ -448,8 +397,7 @@ static QemuOptsList runtime_opts = {
    },
 };

-static int curl_open(BlockDriverState *bs, QDict *options, int flags,
-                     Error **errp)
+static int curl_open(BlockDriverState *bs, QDict *options, int flags)
 {
    BDRVCURLState *s = bs->opaque;
    CURLState *state = NULL;
@@ -461,27 +409,30 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags,
    static int inited = 0;

    if (flags & BDRV_O_RDWR) {
-        error_setg(errp, "curl block device does not support writes");
+        qerror_report(ERROR_CLASS_GENERIC_ERROR,
+                      "curl block device does not support writes");
        return -EROFS;
    }

-    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
+    opts = qemu_opts_create_nofail(&runtime_opts);
    qemu_opts_absorb_qdict(opts, options, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
+    if (error_is_set(&local_err)) {
+        qerror_report_err(local_err);
+        error_free(local_err);
        goto out_noclean;
    }

    s->readahead_size = qemu_opt_get_size(opts, "readahead", READ_AHEAD_SIZE);
    if ((s->readahead_size & 0x1ff) != 0) {
-        error_setg(errp, "HTTP_READAHEAD_SIZE %zd is not a multiple of 512",
-                   s->readahead_size);
+        fprintf(stderr, "HTTP_READAHEAD_SIZE %zd is not a multiple of 512\n",
+                s->readahead_size);
        goto out_noclean;
    }

    file = qemu_opt_get(opts, "url");
    if (file == NULL) {
-        error_setg(errp, "curl block driver requires an 'url' option");
+        qerror_report(ERROR_CLASS_GENERIC_ERROR, "curl block driver requires "
+                      "an 'url' option");
        goto out_noclean;
    }

@@ -523,20 +474,12 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags,
    curl_easy_cleanup(state->curl);
    state->curl = NULL;

-    aio_timer_init(bdrv_get_aio_context(bs), &s->timer,
-                   QEMU_CLOCK_REALTIME, SCALE_NS,
-                   curl_multi_timeout_do, s);
-
    // Now we know the file exists and its size, so let's
    // initialize the multi interface!

    s->multi = curl_multi_init();
    curl_multi_setopt(s->multi, CURLMOPT_SOCKETDATA, s);
    curl_multi_setopt(s->multi, CURLMOPT_SOCKETFUNCTION, curl_sock_cb);
-#ifdef NEED_CURL_TIMER_CALLBACK
-    curl_multi_setopt(s->multi, CURLMOPT_TIMERDATA, s);
-    curl_multi_setopt(s->multi, CURLMOPT_TIMERFUNCTION, curl_timer_cb);
-#endif
    curl_multi_do(s);

    qemu_opts_del(opts);
@@ -552,6 +495,21 @@ out_noclean:
    return -EINVAL;
 }

+static int curl_aio_flush(void *opaque)
+{
+    BDRVCURLState *s = opaque;
+    int i, j;
+
+    for (i=0; i < CURL_NUM_STATES; i++) {
+        for(j=0; j < CURL_NUM_ACB; j++) {
+            if (s->states[i].acb[j]) {
+                return 1;
+            }
+        }
+    }
+    return 0;
+}
+
 static void curl_aio_cancel(BlockDriverAIOCB *blockacb)
 {
    // Do we have to implement canceling? Seems to work without...
@@ -631,6 +589,12 @@ static BlockDriverAIOCB *curl_aio_readv(BlockDriverState *bs,
    acb->nb_sectors = nb_sectors;

    acb->bh = qemu_bh_new(curl_readv_bh_cb, acb);
+
+    if (!acb->bh) {
+        DPRINTF("CURL: qemu_bh_new failed\n");
+        return NULL;
+    }
+
    qemu_bh_schedule(acb->bh);
    return &acb->common;
 }
@@ -655,9 +619,6 @@ static void curl_close(BlockDriverState *bs)
    }
    if (s->multi)
        curl_multi_cleanup(s->multi);
-
-    timer_del(&s->timer);
-
    g_free(s->url);
 }

--- a/block/dmg.c
+++ b/block/dmg.c
@@ -27,14 +27,6 @@
 #include "qemu/module.h"
 #include <zlib.h>

-enum {
-    /* Limit chunk sizes to prevent unreasonable amounts of memory being used
-     * or truncating when converting to 32-bit types
-     */
-    DMG_LENGTHS_MAX = 64 * 1024 * 1024, /* 64 MB */
-    DMG_SECTORCOUNTS_MAX = DMG_LENGTHS_MAX / 512,
-};
-
 typedef struct BDRVDMGState {
    CoMutex lock;
    /* each chunk contains a certain number of sectors,
@@ -100,44 +92,12 @@ static int read_uint32(BlockDriverState *bs, int64_t offset, uint32_t *result)
    return 0;
 }

-/* Increase max chunk sizes, if necessary.  This function is used to calculate
- * the buffer sizes needed for compressed/uncompressed chunk I/O.
- */
-static void update_max_chunk_size(BDRVDMGState *s, uint32_t chunk,
-                                  uint32_t *max_compressed_size,
-                                  uint32_t *max_sectors_per_chunk)
-{
-    uint32_t compressed_size = 0;
-    uint32_t uncompressed_sectors = 0;
-
-    switch (s->types[chunk]) {
-    case 0x80000005: /* zlib compressed */
-        compressed_size = s->lengths[chunk];
-        uncompressed_sectors = s->sectorcounts[chunk];
-        break;
-    case 1: /* copy */
-        uncompressed_sectors = (s->lengths[chunk] + 511) / 512;
-        break;
-    case 2: /* zero */
-        uncompressed_sectors = s->sectorcounts[chunk];
-        break;
-    }
-
-    if (compressed_size > *max_compressed_size) {
-        *max_compressed_size = compressed_size;
-    }
-    if (uncompressed_sectors > *max_sectors_per_chunk) {
-        *max_sectors_per_chunk = uncompressed_sectors;
-    }
-}
-
-static int dmg_open(BlockDriverState *bs, QDict *options, int flags,
-                    Error **errp)
+static int dmg_open(BlockDriverState *bs, QDict *options, int flags)
 {
    BDRVDMGState *s = bs->opaque;
-    uint64_t info_begin, info_end, last_in_offset, last_out_offset;
+    uint64_t info_begin,info_end,last_in_offset,last_out_offset;
    uint32_t count, tmp;
-    uint32_t max_compressed_size = 1, max_sectors_per_chunk = 1, i;
+    uint32_t max_compressed_size=1,max_sectors_per_chunk=1,i;
    int64_t offset;
    int ret;

@@ -199,40 +159,37 @@ static int dmg_open(BlockDriverState *bs, QDict *options, int flags,
            goto fail;
        }

-        if (type == 0x6d697368 && count >= 244) {
-            size_t new_size;
-            uint32_t chunk_count;
+	if (type == 0x6d697368 && count >= 244) {
+	    int new_size, chunk_count;

            offset += 4;
            offset += 200;

-            chunk_count = (count - 204) / 40;
-            new_size = sizeof(uint64_t) * (s->n_chunks + chunk_count);
-            s->types = g_realloc(s->types, new_size / 2);
-            s->offsets = g_realloc(s->offsets, new_size);
-            s->lengths = g_realloc(s->lengths, new_size);
-            s->sectors = g_realloc(s->sectors, new_size);
-            s->sectorcounts = g_realloc(s->sectorcounts, new_size);
+	    chunk_count = (count-204)/40;
+	    new_size = sizeof(uint64_t) * (s->n_chunks + chunk_count);
+	    s->types = g_realloc(s->types, new_size/2);
+	    s->offsets = g_realloc(s->offsets, new_size);
+	    s->lengths = g_realloc(s->lengths, new_size);
+	    s->sectors = g_realloc(s->sectors, new_size);
+	    s->sectorcounts = g_realloc(s->sectorcounts, new_size);

            for (i = s->n_chunks; i < s->n_chunks + chunk_count; i++) {
                ret = read_uint32(bs, offset, &s->types[i]);
                if (ret < 0) {
                    goto fail;
                }
-                offset += 4;
-                if (s->types[i] != 0x80000005 && s->types[i] != 1 &&
-                    s->types[i] != 2) {
-                    if (s->types[i] == 0xffffffff && i > 0) {
-                        last_in_offset = s->offsets[i - 1] + s->lengths[i - 1];
-                        last_out_offset = s->sectors[i - 1] +
-                                          s->sectorcounts[i - 1];
-                    }
-                    chunk_count--;
-                    i--;
-                    offset += 36;
-                    continue;
-                }
-                offset += 4;
+		offset += 4;
+		if(s->types[i]!=0x80000005 && s->types[i]!=1 && s->types[i]!=2) {
+		    if(s->types[i]==0xffffffff) {
+			last_in_offset = s->offsets[i-1]+s->lengths[i-1];
+			last_out_offset = s->sectors[i-1]+s->sectorcounts[i-1];
+		    }
+		    chunk_count--;
+		    i--;
+		    offset += 36;
+		    continue;
+		}
+		offset += 4;

                ret = read_uint64(bs, offset, &s->sectors[i]);
                if (ret < 0) {
@@ -247,14 +204,6 @@ static int dmg_open(BlockDriverState *bs, QDict *options, int flags,
                }
                offset += 8;

-                if (s->sectorcounts[i] > DMG_SECTORCOUNTS_MAX) {
-                    error_report("sector count %" PRIu64 " for chunk %u is "
-                                 "larger than max (%u)",
-                                 s->sectorcounts[i], i, DMG_SECTORCOUNTS_MAX);
-                    ret = -EINVAL;
-                    goto fail;
-                }
-
                ret = read_uint64(bs, offset, &s->offsets[i]);
                if (ret < 0) {
                    goto fail;
@@ -268,25 +217,19 @@ static int dmg_open(BlockDriverState *bs, QDict *options, int flags,
                }
                offset += 8;

-                if (s->lengths[i] > DMG_LENGTHS_MAX) {
-                    error_report("length %" PRIu64 " for chunk %u is larger "
-                                 "than max (%u)",
-                                 s->lengths[i], i, DMG_LENGTHS_MAX);
-                    ret = -EINVAL;
-                    goto fail;
-                }
-
-                update_max_chunk_size(s, i, &max_compressed_size,
-                                      &max_sectors_per_chunk);
-            }
-            s->n_chunks += chunk_count;
-        }
+		if(s->lengths[i]>max_compressed_size)
+		    max_compressed_size = s->lengths[i];
+		if(s->sectorcounts[i]>max_sectors_per_chunk)
+		    max_sectors_per_chunk = s->sectorcounts[i];
+	    }
+	    s->n_chunks+=chunk_count;
+	}
    }

    /* initialize zlib engine */
-    s->compressed_chunk = g_malloc(max_compressed_size + 1);
-    s->uncompressed_chunk = g_malloc(512 * max_sectors_per_chunk);
-    if (inflateInit(&s->zstream) != Z_OK) {
+    s->compressed_chunk = g_malloc(max_compressed_size+1);
+    s->uncompressed_chunk = g_malloc(512*max_sectors_per_chunk);
+    if(inflateInit(&s->zstream) != Z_OK) {
        ret = -EINVAL;
        goto fail;
    }
@@ -308,82 +251,83 @@ fail:
 }

 static inline int is_sector_in_chunk(BDRVDMGState* s,
-                uint32_t chunk_num, uint64_t sector_num)
+		uint32_t chunk_num,int sector_num)
 {
-    if (chunk_num >= s->n_chunks || s->sectors[chunk_num] > sector_num ||
-            s->sectors[chunk_num] + s->sectorcounts[chunk_num] <= sector_num) {
-        return 0;
-    } else {
-        return -1;
-    }
+    if(chunk_num>=s->n_chunks || s->sectors[chunk_num]>sector_num ||
+	    s->sectors[chunk_num]+s->sectorcounts[chunk_num]<=sector_num)
+	return 0;
+    else
+	return -1;
 }

-static inline uint32_t search_chunk(BDRVDMGState *s, uint64_t sector_num)
+static inline uint32_t search_chunk(BDRVDMGState* s,int sector_num)
 {
    /* binary search */
-    uint32_t chunk1 = 0, chunk2 = s->n_chunks, chunk3;
-    while (chunk1 != chunk2) {
-        chunk3 = (chunk1 + chunk2) / 2;
-        if (s->sectors[chunk3] > sector_num) {
-            chunk2 = chunk3;
-        } else if (s->sectors[chunk3] + s->sectorcounts[chunk3] > sector_num) {
-            return chunk3;
-        } else {
-            chunk1 = chunk3;
-        }
+    uint32_t chunk1=0,chunk2=s->n_chunks,chunk3;
+    while(chunk1!=chunk2) {
+	chunk3 = (chunk1+chunk2)/2;
+	if(s->sectors[chunk3]>sector_num)
+	    chunk2 = chunk3;
+	else if(s->sectors[chunk3]+s->sectorcounts[chunk3]>sector_num)
+	    return chunk3;
+	else
+	    chunk1 = chunk3;
    }
    return s->n_chunks; /* error */
 }

-static inline int dmg_read_chunk(BlockDriverState *bs, uint64_t sector_num)
+static inline int dmg_read_chunk(BlockDriverState *bs, int sector_num)
 {
    BDRVDMGState *s = bs->opaque;

-    if (!is_sector_in_chunk(s, s->current_chunk, sector_num)) {
-        int ret;
-        uint32_t chunk = search_chunk(s, sector_num);
+    if(!is_sector_in_chunk(s,s->current_chunk,sector_num)) {
+	int ret;
+	uint32_t chunk = search_chunk(s,sector_num);

-        if (chunk >= s->n_chunks) {
-            return -1;
-        }
+	if(chunk>=s->n_chunks)
+	    return -1;

-        s->current_chunk = s->n_chunks;
-        switch (s->types[chunk]) {
-        case 0x80000005: { /* zlib compressed */
-            /* we need to buffer, because only the chunk as whole can be
-             * inflated. */
-            ret = bdrv_pread(bs->file, s->offsets[chunk],
-                             s->compressed_chunk, s->lengths[chunk]);
-            if (ret != s->lengths[chunk]) {
-                return -1;
-            }
+	s->current_chunk = s->n_chunks;
+	switch(s->types[chunk]) {
+	case 0x80000005: { /* zlib compressed */
+	    int i;

-            s->zstream.next_in = s->compressed_chunk;
-            s->zstream.avail_in = s->lengths[chunk];
-            s->zstream.next_out = s->uncompressed_chunk;
-            s->zstream.avail_out = 512 * s->sectorcounts[chunk];
-            ret = inflateReset(&s->zstream);
-            if (ret != Z_OK) {
-                return -1;
-            }
-            ret = inflate(&s->zstream, Z_FINISH);
-            if (ret != Z_STREAM_END ||
-                s->zstream.total_out != 512 * s->sectorcounts[chunk]) {
-                return -1;
-            }
-            break; }
-        case 1: /* copy */
-            ret = bdrv_pread(bs->file, s->offsets[chunk],
+	    /* we need to buffer, because only the chunk as whole can be
+	     * inflated. */
+	    i=0;
+	    do {
+                ret = bdrv_pread(bs->file, s->offsets[chunk] + i,
+                                 s->compressed_chunk+i, s->lengths[chunk]-i);
+		if(ret<0 && errno==EINTR)
+		    ret=0;
+		i+=ret;
+	    } while(ret>=0 && ret+i<s->lengths[chunk]);
+
+	    if (ret != s->lengths[chunk])
+		return -1;
+
+	    s->zstream.next_in = s->compressed_chunk;
+	    s->zstream.avail_in = s->lengths[chunk];
+	    s->zstream.next_out = s->uncompressed_chunk;
+	    s->zstream.avail_out = 512*s->sectorcounts[chunk];
+	    ret = inflateReset(&s->zstream);
+	    if(ret != Z_OK)
+		return -1;
+	    ret = inflate(&s->zstream, Z_FINISH);
+	    if(ret != Z_STREAM_END || s->zstream.total_out != 512*s->sectorcounts[chunk])
+		return -1;
+	    break; }
+	case 1: /* copy */
+	    ret = bdrv_pread(bs->file, s->offsets[chunk],
                             s->uncompressed_chunk, s->lengths[chunk]);
-            if (ret != s->lengths[chunk]) {
-                return -1;
-            }
-            break;
-        case 2: /* zero */
-            memset(s->uncompressed_chunk, 0, 512 * s->sectorcounts[chunk]);
-            break;
-        }
-        s->current_chunk = chunk;
+	    if (ret != s->lengths[chunk])
+		return -1;
+	    break;
+	case 2: /* zero */
+	    memset(s->uncompressed_chunk, 0, 512*s->sectorcounts[chunk]);
+	    break;
+	}
+	s->current_chunk = chunk;
    }
    return 0;
 }
@@ -394,14 +338,12 @@ static int dmg_read(BlockDriverState *bs, int64_t sector_num,
    BDRVDMGState *s = bs->opaque;
    int i;

-    for (i = 0; i < nb_sectors; i++) {
-        uint32_t sector_offset_in_chunk;
-        if (dmg_read_chunk(bs, sector_num + i) != 0) {
-            return -1;
-        }
-        sector_offset_in_chunk = sector_num + i - s->sectors[s->current_chunk];
-        memcpy(buf + i * 512,
-               s->uncompressed_chunk + sector_offset_in_chunk * 512, 512);
+    for(i=0;i<nb_sectors;i++) {
+	uint32_t sector_offset_in_chunk;
+	if(dmg_read_chunk(bs, sector_num+i) != 0)
+	    return -1;
+	sector_offset_in_chunk = sector_num+i-s->sectors[s->current_chunk];
+	memcpy(buf+i*512,s->uncompressed_chunk+sector_offset_in_chunk*512,512);
    }
    return 0;
 }
@@ -433,12 +375,12 @@ static void dmg_close(BlockDriverState *bs)
 }

 static BlockDriver bdrv_dmg = {
-    .format_name    = "dmg",
-    .instance_size  = sizeof(BDRVDMGState),
-    .bdrv_probe     = dmg_probe,
-    .bdrv_open      = dmg_open,
-    .bdrv_read      = dmg_co_read,
-    .bdrv_close     = dmg_close,
+    .format_name	= "dmg",
+    .instance_size	= sizeof(BDRVDMGState),
+    .bdrv_probe		= dmg_probe,
+    .bdrv_open		= dmg_open,
+    .bdrv_read          = dmg_co_read,
+    .bdrv_close		= dmg_close,
 };

 static void bdrv_dmg_init(void)
--- a/block/gluster.c
+++ b/block/gluster.c
@@ -3,26 +3,43 @@
 *
 * Copyright (C) 2012 Bharata B Rao <bharata@linux.vnet.ibm.com>
 *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
+ * Pipe handling mechanism in AIO implementation is derived from
+ * block/rbd.c. Hence,
 *
+ * Copyright (C) 2010-2011 Christian Brunner <chb@muc.de>,
+ *                         Josh Durgin <josh.durgin@dreamhost.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ * Contributions after 2012-01-13 are licensed under the terms of the
+ * GNU GPL, version 2 or (at your option) any later version.
 */
 #include <glusterfs/api/glfs.h>
 #include "block/block_int.h"
+#include "qemu/sockets.h"
 #include "qemu/uri.h"

 typedef struct GlusterAIOCB {
+    BlockDriverAIOCB common;
    int64_t size;
    int ret;
+    bool *finished;
    QEMUBH *bh;
-    Coroutine *coroutine;
 } GlusterAIOCB;

 typedef struct BDRVGlusterState {
    struct glfs *glfs;
+    int fds[2];
    struct glfs_fd *fd;
+    int qemu_aio_count;
+    int event_reader_pos;
+    GlusterAIOCB *event_acb;
 } BDRVGlusterState;

+#define GLUSTER_FD_READ  0
+#define GLUSTER_FD_WRITE 1
+
 typedef struct GlusterConf {
    char *server;
    int port;
@@ -33,13 +50,11 @@ typedef struct GlusterConf {

 static void qemu_gluster_gconf_free(GlusterConf *gconf)
 {
-    if (gconf) {
-        g_free(gconf->server);
-        g_free(gconf->volname);
-        g_free(gconf->image);
-        g_free(gconf->transport);
-        g_free(gconf);
-    }
+    g_free(gconf->server);
+    g_free(gconf->volname);
+    g_free(gconf->image);
+    g_free(gconf->transport);
+    g_free(gconf);
 }

 static int parse_volume_options(GlusterConf *gconf, char *path)
@@ -80,7 +95,7 @@ static int parse_volume_options(GlusterConf *gconf, char *path)
 * 'server' specifies the server where the volume file specification for
 * the given volume resides. This can be either hostname, ipv4 address
 * or ipv6 address. ipv6 address needs to be within square brackets [ ].
- * If transport type is 'unix', then 'server' field should not be specified.
+ * If transport type is 'unix', then 'server' field should not be specifed.
 * The 'socket' field needs to be populated with the path to unix domain
 * socket.
 *
@@ -117,7 +132,7 @@ static int qemu_gluster_parseuri(GlusterConf *gconf, const char *filename)
    }

    /* transport */
-    if (!uri->scheme || !strcmp(uri->scheme, "gluster")) {
+    if (!strcmp(uri->scheme, "gluster")) {
        gconf->transport = g_strdup("tcp");
    } else if (!strcmp(uri->scheme, "gluster+tcp")) {
        gconf->transport = g_strdup("tcp");
@@ -153,7 +168,7 @@ static int qemu_gluster_parseuri(GlusterConf *gconf, const char *filename)
        }
        gconf->server = g_strdup(qp->p[0].value);
    } else {
-        gconf->server = g_strdup(uri->server ? uri->server : "localhost");
+        gconf->server = g_strdup(uri->server);
        gconf->port = uri->port;
    }

@@ -165,8 +180,7 @@ out:
    return ret;
 }

-static struct glfs *qemu_gluster_init(GlusterConf *gconf, const char *filename,
-                                      Error **errp)
+static struct glfs *qemu_gluster_init(GlusterConf *gconf, const char *filename)
 {
    struct glfs *glfs = NULL;
    int ret;
@@ -174,8 +188,8 @@ static struct glfs *qemu_gluster_init(GlusterConf *gconf, const char *filename,

    ret = qemu_gluster_parseuri(gconf, filename);
    if (ret < 0) {
-        error_setg(errp, "Usage: file=gluster[+transport]://[server[:port]]/"
-                   "volname/image[?socket=...]");
+        error_report("Usage: file=gluster[+transport]://[server[:port]]/"
+            "volname/image[?socket=...]");
        errno = -ret;
        goto out;
    }
@@ -202,11 +216,9 @@ static struct glfs *qemu_gluster_init(GlusterConf *gconf, const char *filename,

    ret = glfs_init(glfs);
    if (ret) {
-        error_setg_errno(errp, errno,
-                         "Gluster connection failed for server=%s port=%d "
-                         "volume=%s image=%s transport=%s", gconf->server,
-                         gconf->port, gconf->volname, gconf->image,
-                         gconf->transport);
+        error_report("Gluster connection failed for server=%s port=%d "
+             "volume=%s image=%s transport=%s", gconf->server, gconf->port,
+             gconf->volname, gconf->image, gconf->transport);
        goto out;
    }
    return glfs;
@@ -220,32 +232,54 @@ out:
    return NULL;
 }

-static void qemu_gluster_complete_aio(void *opaque)
+static void qemu_gluster_complete_aio(GlusterAIOCB *acb, BDRVGlusterState *s)
 {
-    GlusterAIOCB *acb = (GlusterAIOCB *)opaque;
+    int ret;
+    bool *finished = acb->finished;
+    BlockDriverCompletionFunc *cb = acb->common.cb;
+    void *opaque = acb->common.opaque;

-    qemu_bh_delete(acb->bh);
-    acb->bh = NULL;
-    qemu_coroutine_enter(acb->coroutine, NULL);
-}
-
-/*
- * AIO callback routine called from GlusterFS thread.
- */
-static void gluster_finish_aiocb(struct glfs_fd *fd, ssize_t ret, void *arg)
-{
-    GlusterAIOCB *acb = (GlusterAIOCB *)arg;
-
-    if (!ret || ret == acb->size) {
-        acb->ret = 0; /* Success */
-    } else if (ret < 0) {
-        acb->ret = ret; /* Read/Write failed */
+    if (!acb->ret || acb->ret == acb->size) {
+        ret = 0; /* Success */
+    } else if (acb->ret < 0) {
+        ret = acb->ret; /* Read/Write failed */
    } else {
-        acb->ret = -EIO; /* Partial read/write - fail it */
+        ret = -EIO; /* Partial read/write - fail it */
    }

-    acb->bh = qemu_bh_new(qemu_gluster_complete_aio, acb);
-    qemu_bh_schedule(acb->bh);
+    s->qemu_aio_count--;
+    qemu_aio_release(acb);
+    cb(opaque, ret);
+    if (finished) {
+        *finished = true;
+    }
+}
+
+static void qemu_gluster_aio_event_reader(void *opaque)
+{
+    BDRVGlusterState *s = opaque;
+    ssize_t ret;
+
+    do {
+        char *p = (char *)&s->event_acb;
+
+        ret = read(s->fds[GLUSTER_FD_READ], p + s->event_reader_pos,
+                   sizeof(s->event_acb) - s->event_reader_pos);
+        if (ret > 0) {
+            s->event_reader_pos += ret;
+            if (s->event_reader_pos == sizeof(s->event_acb)) {
+                s->event_reader_pos = 0;
+                qemu_gluster_complete_aio(s->event_acb, s);
+            }
+        }
+    } while (ret < 0 && errno == EINTR);
+}
+
+static int qemu_gluster_aio_flush_cb(void *opaque)
+{
+    BDRVGlusterState *s = opaque;
+
+    return (s->qemu_aio_count > 0);
 }

 /* TODO Convert to fine grained options */
@@ -262,57 +296,60 @@ static QemuOptsList runtime_opts = {
    },
 };

-static void qemu_gluster_parse_flags(int bdrv_flags, int *open_flags)
-{
-    assert(open_flags != NULL);
-
-    *open_flags |= O_BINARY;
-
-    if (bdrv_flags & BDRV_O_RDWR) {
-        *open_flags |= O_RDWR;
-    } else {
-        *open_flags |= O_RDONLY;
-    }
-
-    if ((bdrv_flags & BDRV_O_NOCACHE)) {
-        *open_flags |= O_DIRECT;
-    }
-}
-
 static int qemu_gluster_open(BlockDriverState *bs,  QDict *options,
-                             int bdrv_flags, Error **errp)
+                             int bdrv_flags)
 {
    BDRVGlusterState *s = bs->opaque;
-    int open_flags = 0;
+    int open_flags = O_BINARY;
    int ret = 0;
    GlusterConf *gconf = g_malloc0(sizeof(GlusterConf));
    QemuOpts *opts;
    Error *local_err = NULL;
    const char *filename;

-    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
+    opts = qemu_opts_create_nofail(&runtime_opts);
    qemu_opts_absorb_qdict(opts, options, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
+    if (error_is_set(&local_err)) {
+        qerror_report_err(local_err);
+        error_free(local_err);
        ret = -EINVAL;
        goto out;
    }

    filename = qemu_opt_get(opts, "filename");

-    s->glfs = qemu_gluster_init(gconf, filename, errp);
+
+    s->glfs = qemu_gluster_init(gconf, filename);
    if (!s->glfs) {
        ret = -errno;
        goto out;
    }

-    qemu_gluster_parse_flags(bdrv_flags, &open_flags);
+    if (bdrv_flags & BDRV_O_RDWR) {
+        open_flags |= O_RDWR;
+    } else {
+        open_flags |= O_RDONLY;
+    }
+
+    if ((bdrv_flags & BDRV_O_NOCACHE)) {
+        open_flags |= O_DIRECT;
+    }

    s->fd = glfs_open(s->glfs, gconf->image, open_flags);
    if (!s->fd) {
        ret = -errno;
+        goto out;
    }

+    ret = qemu_pipe(s->fds);
+    if (ret < 0) {
+        ret = -errno;
+        goto out;
+    }
+    fcntl(s->fds[GLUSTER_FD_READ], F_SETFL, O_NONBLOCK);
+    qemu_aio_set_fd_handler(s->fds[GLUSTER_FD_READ],
+        qemu_gluster_aio_event_reader, NULL, qemu_gluster_aio_flush_cb, s);
+
 out:
    qemu_opts_del(opts);
    qemu_gluster_gconf_free(gconf);
@@ -328,180 +365,24 @@ out:
    return ret;
 }

-typedef struct BDRVGlusterReopenState {
-    struct glfs *glfs;
-    struct glfs_fd *fd;
-} BDRVGlusterReopenState;
-
-
-static int qemu_gluster_reopen_prepare(BDRVReopenState *state,
-                                       BlockReopenQueue *queue, Error **errp)
-{
-    int ret = 0;
-    BDRVGlusterReopenState *reop_s;
-    GlusterConf *gconf = NULL;
-    int open_flags = 0;
-
-    assert(state != NULL);
-    assert(state->bs != NULL);
-
-    state->opaque = g_malloc0(sizeof(BDRVGlusterReopenState));
-    reop_s = state->opaque;
-
-    qemu_gluster_parse_flags(state->flags, &open_flags);
-
-    gconf = g_malloc0(sizeof(GlusterConf));
-
-    reop_s->glfs = qemu_gluster_init(gconf, state->bs->filename, errp);
-    if (reop_s->glfs == NULL) {
-        ret = -errno;
-        goto exit;
-    }
-
-    reop_s->fd = glfs_open(reop_s->glfs, gconf->image, open_flags);
-    if (reop_s->fd == NULL) {
-        /* reops->glfs will be cleaned up in _abort */
-        ret = -errno;
-        goto exit;
-    }
-
-exit:
-    /* state->opaque will be freed in either the _abort or _commit */
-    qemu_gluster_gconf_free(gconf);
-    return ret;
-}
-
-static void qemu_gluster_reopen_commit(BDRVReopenState *state)
-{
-    BDRVGlusterReopenState *reop_s = state->opaque;
-    BDRVGlusterState *s = state->bs->opaque;
-
-
-    /* close the old */
-    if (s->fd) {
-        glfs_close(s->fd);
-    }
-    if (s->glfs) {
-        glfs_fini(s->glfs);
-    }
-
-    /* use the newly opened image / connection */
-    s->fd         = reop_s->fd;
-    s->glfs       = reop_s->glfs;
-
-    g_free(state->opaque);
-    state->opaque = NULL;
-
-    return;
-}
-
-
-static void qemu_gluster_reopen_abort(BDRVReopenState *state)
-{
-    BDRVGlusterReopenState *reop_s = state->opaque;
-
-    if (reop_s == NULL) {
-        return;
-    }
-
-    if (reop_s->fd) {
-        glfs_close(reop_s->fd);
-    }
-
-    if (reop_s->glfs) {
-        glfs_fini(reop_s->glfs);
-    }
-
-    g_free(state->opaque);
-    state->opaque = NULL;
-
-    return;
-}
-
-#ifdef CONFIG_GLUSTERFS_ZEROFILL
-static coroutine_fn int qemu_gluster_co_write_zeroes(BlockDriverState *bs,
-        int64_t sector_num, int nb_sectors, BdrvRequestFlags flags)
-{
-    int ret;
-    GlusterAIOCB *acb = g_slice_new(GlusterAIOCB);
-    BDRVGlusterState *s = bs->opaque;
-    off_t size = nb_sectors * BDRV_SECTOR_SIZE;
-    off_t offset = sector_num * BDRV_SECTOR_SIZE;
-
-    acb->size = size;
-    acb->ret = 0;
-    acb->coroutine = qemu_coroutine_self();
-
-    ret = glfs_zerofill_async(s->fd, offset, size, &gluster_finish_aiocb, acb);
-    if (ret < 0) {
-        ret = -errno;
-        goto out;
-    }
-
-    qemu_coroutine_yield();
-    ret = acb->ret;
-
-out:
-    g_slice_free(GlusterAIOCB, acb);
-    return ret;
-}
-
-static inline bool gluster_supports_zerofill(void)
-{
-    return 1;
-}
-
-static inline int qemu_gluster_zerofill(struct glfs_fd *fd, int64_t offset,
-        int64_t size)
-{
-    return glfs_zerofill(fd, offset, size);
-}
-
-#else
-static inline bool gluster_supports_zerofill(void)
-{
-    return 0;
-}
-
-static inline int qemu_gluster_zerofill(struct glfs_fd *fd, int64_t offset,
-        int64_t size)
-{
-    return 0;
-}
-#endif
-
 static int qemu_gluster_create(const char *filename,
-        QEMUOptionParameter *options, Error **errp)
+        QEMUOptionParameter *options)
 {
    struct glfs *glfs;
    struct glfs_fd *fd;
    int ret = 0;
-    int prealloc = 0;
    int64_t total_size = 0;
    GlusterConf *gconf = g_malloc0(sizeof(GlusterConf));

-    glfs = qemu_gluster_init(gconf, filename, errp);
+    glfs = qemu_gluster_init(gconf, filename);
    if (!glfs) {
-        ret = -EINVAL;
+        ret = -errno;
        goto out;
    }

    while (options && options->name) {
        if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
            total_size = options->value.n / BDRV_SECTOR_SIZE;
-        } else if (!strcmp(options->name, BLOCK_OPT_PREALLOC)) {
-            if (!options->value.s || !strcmp(options->value.s, "off")) {
-                prealloc = 0;
-            } else if (!strcmp(options->value.s, "full") &&
-                    gluster_supports_zerofill()) {
-                prealloc = 1;
-            } else {
-                error_setg(errp, "Invalid preallocation mode: '%s'"
-                    " or GlusterFS doesn't support zerofill API",
-                           options->value.s);
-                ret = -EINVAL;
-                goto out;
-            }
        }
        options++;
    }
@@ -511,15 +392,9 @@ static int qemu_gluster_create(const char *filename,
    if (!fd) {
        ret = -errno;
    } else {
-        if (!glfs_ftruncate(fd, total_size * BDRV_SECTOR_SIZE)) {
-            if (prealloc && qemu_gluster_zerofill(fd, 0,
-                    total_size * BDRV_SECTOR_SIZE)) {
-                ret = -errno;
-            }
-        } else {
+        if (glfs_ftruncate(fd, total_size * BDRV_SECTOR_SIZE) != 0) {
            ret = -errno;
        }
-
        if (glfs_close(fd) != 0) {
            ret = -errno;
        }
@@ -532,18 +407,72 @@ out:
    return ret;
 }

-static coroutine_fn int qemu_gluster_co_rw(BlockDriverState *bs,
-        int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, int write)
+static void qemu_gluster_aio_cancel(BlockDriverAIOCB *blockacb)
+{
+    GlusterAIOCB *acb = (GlusterAIOCB *)blockacb;
+    bool finished = false;
+
+    acb->finished = &finished;
+    while (!finished) {
+        qemu_aio_wait();
+    }
+}
+
+static const AIOCBInfo gluster_aiocb_info = {
+    .aiocb_size = sizeof(GlusterAIOCB),
+    .cancel = qemu_gluster_aio_cancel,
+};
+
+static void gluster_finish_aiocb(struct glfs_fd *fd, ssize_t ret, void *arg)
+{
+    GlusterAIOCB *acb = (GlusterAIOCB *)arg;
+    BlockDriverState *bs = acb->common.bs;
+    BDRVGlusterState *s = bs->opaque;
+    int retval;
+
+    acb->ret = ret;
+    retval = qemu_write_full(s->fds[GLUSTER_FD_WRITE], &acb, sizeof(acb));
+    if (retval != sizeof(acb)) {
+        /*
+         * Gluster AIO callback thread failed to notify the waiting
+         * QEMU thread about IO completion.
+         *
+         * Complete this IO request and make the disk inaccessible for
+         * subsequent reads and writes.
+         */
+        error_report("Gluster failed to notify QEMU about IO completion");
+
+        qemu_mutex_lock_iothread(); /* We are in gluster thread context */
+        acb->common.cb(acb->common.opaque, -EIO);
+        qemu_aio_release(acb);
+        s->qemu_aio_count--;
+        close(s->fds[GLUSTER_FD_READ]);
+        close(s->fds[GLUSTER_FD_WRITE]);
+        qemu_aio_set_fd_handler(s->fds[GLUSTER_FD_READ], NULL, NULL, NULL,
+            NULL);
+        bs->drv = NULL; /* Make the disk inaccessible */
+        qemu_mutex_unlock_iothread();
+    }
+}
+
+static BlockDriverAIOCB *qemu_gluster_aio_rw(BlockDriverState *bs,
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+        BlockDriverCompletionFunc *cb, void *opaque, int write)
 {
    int ret;
-    GlusterAIOCB *acb = g_slice_new(GlusterAIOCB);
+    GlusterAIOCB *acb;
    BDRVGlusterState *s = bs->opaque;
-    size_t size = nb_sectors * BDRV_SECTOR_SIZE;
-    off_t offset = sector_num * BDRV_SECTOR_SIZE;
+    size_t size;
+    off_t offset;

+    offset = sector_num * BDRV_SECTOR_SIZE;
+    size = nb_sectors * BDRV_SECTOR_SIZE;
+    s->qemu_aio_count++;
+
+    acb = qemu_aio_get(&gluster_aiocb_info, bs, cb, opaque);
    acb->size = size;
    acb->ret = 0;
-    acb->coroutine = qemu_coroutine_self();
+    acb->finished = NULL;

    if (write) {
        ret = glfs_pwritev_async(s->fd, qiov->iov, qiov->niov, offset, 0,
@@ -554,16 +483,14 @@ static coroutine_fn int qemu_gluster_co_rw(BlockDriverState *bs,
    }

    if (ret < 0) {
-        ret = -errno;
        goto out;
    }
-
-    qemu_coroutine_yield();
-    ret = acb->ret;
+    return &acb->common;

 out:
-    g_slice_free(GlusterAIOCB, acb);
-    return ret;
+    s->qemu_aio_count--;
+    qemu_aio_release(acb);
+    return NULL;
 }

 static int qemu_gluster_truncate(BlockDriverState *bs, int64_t offset)
@@ -579,68 +506,75 @@ static int qemu_gluster_truncate(BlockDriverState *bs, int64_t offset)
    return 0;
 }

-static coroutine_fn int qemu_gluster_co_readv(BlockDriverState *bs,
-        int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
+static BlockDriverAIOCB *qemu_gluster_aio_readv(BlockDriverState *bs,
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+        BlockDriverCompletionFunc *cb, void *opaque)
 {
-    return qemu_gluster_co_rw(bs, sector_num, nb_sectors, qiov, 0);
+    return qemu_gluster_aio_rw(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
 }

-static coroutine_fn int qemu_gluster_co_writev(BlockDriverState *bs,
-        int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
+static BlockDriverAIOCB *qemu_gluster_aio_writev(BlockDriverState *bs,
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+        BlockDriverCompletionFunc *cb, void *opaque)
 {
-    return qemu_gluster_co_rw(bs, sector_num, nb_sectors, qiov, 1);
+    return qemu_gluster_aio_rw(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
 }

-static coroutine_fn int qemu_gluster_co_flush_to_disk(BlockDriverState *bs)
+static BlockDriverAIOCB *qemu_gluster_aio_flush(BlockDriverState *bs,
+        BlockDriverCompletionFunc *cb, void *opaque)
 {
    int ret;
-    GlusterAIOCB *acb = g_slice_new(GlusterAIOCB);
+    GlusterAIOCB *acb;
    BDRVGlusterState *s = bs->opaque;

+    acb = qemu_aio_get(&gluster_aiocb_info, bs, cb, opaque);
    acb->size = 0;
    acb->ret = 0;
-    acb->coroutine = qemu_coroutine_self();
+    acb->finished = NULL;
+    s->qemu_aio_count++;

    ret = glfs_fsync_async(s->fd, &gluster_finish_aiocb, acb);
    if (ret < 0) {
-        ret = -errno;
        goto out;
    }
-
-    qemu_coroutine_yield();
-    ret = acb->ret;
+    return &acb->common;

 out:
-    g_slice_free(GlusterAIOCB, acb);
-    return ret;
+    s->qemu_aio_count--;
+    qemu_aio_release(acb);
+    return NULL;
 }

 #ifdef CONFIG_GLUSTERFS_DISCARD
-static coroutine_fn int qemu_gluster_co_discard(BlockDriverState *bs,
-        int64_t sector_num, int nb_sectors)
+static BlockDriverAIOCB *qemu_gluster_aio_discard(BlockDriverState *bs,
+        int64_t sector_num, int nb_sectors, BlockDriverCompletionFunc *cb,
+        void *opaque)
 {
    int ret;
-    GlusterAIOCB *acb = g_slice_new(GlusterAIOCB);
+    GlusterAIOCB *acb;
    BDRVGlusterState *s = bs->opaque;
-    size_t size = nb_sectors * BDRV_SECTOR_SIZE;
-    off_t offset = sector_num * BDRV_SECTOR_SIZE;
+    size_t size;
+    off_t offset;

+    offset = sector_num * BDRV_SECTOR_SIZE;
+    size = nb_sectors * BDRV_SECTOR_SIZE;
+
+    acb = qemu_aio_get(&gluster_aiocb_info, bs, cb, opaque);
    acb->size = 0;
    acb->ret = 0;
-    acb->coroutine = qemu_coroutine_self();
+    acb->finished = NULL;
+    s->qemu_aio_count++;

    ret = glfs_discard_async(s->fd, offset, size, &gluster_finish_aiocb, acb);
    if (ret < 0) {
-        ret = -errno;
        goto out;
    }
-
-    qemu_coroutine_yield();
-    ret = acb->ret;
+    return &acb->common;

 out:
-    g_slice_free(GlusterAIOCB, acb);
-    return ret;
+    s->qemu_aio_count--;
+    qemu_aio_release(acb);
+    return NULL;
 }
 #endif

@@ -675,6 +609,10 @@ static void qemu_gluster_close(BlockDriverState *bs)
 {
    BDRVGlusterState *s = bs->opaque;

+    close(s->fds[GLUSTER_FD_READ]);
+    close(s->fds[GLUSTER_FD_WRITE]);
+    qemu_aio_set_fd_handler(s->fds[GLUSTER_FD_READ], NULL, NULL, NULL, NULL);
+
    if (s->fd) {
        glfs_close(s->fd);
        s->fd = NULL;
@@ -694,11 +632,6 @@ static QEMUOptionParameter qemu_gluster_create_options[] = {
        .type = OPT_SIZE,
        .help = "Virtual disk size"
    },
-    {
-        .name = BLOCK_OPT_PREALLOC,
-        .type = OPT_STRING,
-        .help = "Preallocation mode (allowed values: off, full)"
-    },
    { NULL }
 };

@@ -706,25 +639,18 @@ static BlockDriver bdrv_gluster = {
    .format_name                  = "gluster",
    .protocol_name                = "gluster",
    .instance_size                = sizeof(BDRVGlusterState),
-    .bdrv_needs_filename          = true,
    .bdrv_file_open               = qemu_gluster_open,
-    .bdrv_reopen_prepare          = qemu_gluster_reopen_prepare,
-    .bdrv_reopen_commit           = qemu_gluster_reopen_commit,
-    .bdrv_reopen_abort            = qemu_gluster_reopen_abort,
    .bdrv_close                   = qemu_gluster_close,
    .bdrv_create                  = qemu_gluster_create,
    .bdrv_getlength               = qemu_gluster_getlength,
    .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size,
    .bdrv_truncate                = qemu_gluster_truncate,
-    .bdrv_co_readv                = qemu_gluster_co_readv,
-    .bdrv_co_writev               = qemu_gluster_co_writev,
-    .bdrv_co_flush_to_disk        = qemu_gluster_co_flush_to_disk,
+    .bdrv_aio_readv               = qemu_gluster_aio_readv,
+    .bdrv_aio_writev              = qemu_gluster_aio_writev,
+    .bdrv_aio_flush               = qemu_gluster_aio_flush,
    .bdrv_has_zero_init           = qemu_gluster_has_zero_init,
 #ifdef CONFIG_GLUSTERFS_DISCARD
-    .bdrv_co_discard              = qemu_gluster_co_discard,
-#endif
-#ifdef CONFIG_GLUSTERFS_ZEROFILL
-    .bdrv_co_write_zeroes         = qemu_gluster_co_write_zeroes,
+    .bdrv_aio_discard             = qemu_gluster_aio_discard,
 #endif
    .create_options               = qemu_gluster_create_options,
 };
@@ -733,25 +659,18 @@ static BlockDriver bdrv_gluster_tcp = {
    .format_name                  = "gluster",
    .protocol_name                = "gluster+tcp",
    .instance_size                = sizeof(BDRVGlusterState),
-    .bdrv_needs_filename          = true,
    .bdrv_file_open               = qemu_gluster_open,
-    .bdrv_reopen_prepare          = qemu_gluster_reopen_prepare,
-    .bdrv_reopen_commit           = qemu_gluster_reopen_commit,
-    .bdrv_reopen_abort            = qemu_gluster_reopen_abort,
    .bdrv_close                   = qemu_gluster_close,
    .bdrv_create                  = qemu_gluster_create,
    .bdrv_getlength               = qemu_gluster_getlength,
    .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size,
    .bdrv_truncate                = qemu_gluster_truncate,
-    .bdrv_co_readv                = qemu_gluster_co_readv,
-    .bdrv_co_writev               = qemu_gluster_co_writev,
-    .bdrv_co_flush_to_disk        = qemu_gluster_co_flush_to_disk,
+    .bdrv_aio_readv               = qemu_gluster_aio_readv,
+    .bdrv_aio_writev              = qemu_gluster_aio_writev,
+    .bdrv_aio_flush               = qemu_gluster_aio_flush,
    .bdrv_has_zero_init           = qemu_gluster_has_zero_init,
 #ifdef CONFIG_GLUSTERFS_DISCARD
-    .bdrv_co_discard              = qemu_gluster_co_discard,
-#endif
-#ifdef CONFIG_GLUSTERFS_ZEROFILL
-    .bdrv_co_write_zeroes         = qemu_gluster_co_write_zeroes,
+    .bdrv_aio_discard             = qemu_gluster_aio_discard,
 #endif
    .create_options               = qemu_gluster_create_options,
 };
@@ -760,25 +679,18 @@ static BlockDriver bdrv_gluster_unix = {
    .format_name                  = "gluster",
    .protocol_name                = "gluster+unix",
    .instance_size                = sizeof(BDRVGlusterState),
-    .bdrv_needs_filename          = true,
    .bdrv_file_open               = qemu_gluster_open,
-    .bdrv_reopen_prepare          = qemu_gluster_reopen_prepare,
-    .bdrv_reopen_commit           = qemu_gluster_reopen_commit,
-    .bdrv_reopen_abort            = qemu_gluster_reopen_abort,
    .bdrv_close                   = qemu_gluster_close,
    .bdrv_create                  = qemu_gluster_create,
    .bdrv_getlength               = qemu_gluster_getlength,
    .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size,
    .bdrv_truncate                = qemu_gluster_truncate,
-    .bdrv_co_readv                = qemu_gluster_co_readv,
-    .bdrv_co_writev               = qemu_gluster_co_writev,
-    .bdrv_co_flush_to_disk        = qemu_gluster_co_flush_to_disk,
+    .bdrv_aio_readv               = qemu_gluster_aio_readv,
+    .bdrv_aio_writev              = qemu_gluster_aio_writev,
+    .bdrv_aio_flush               = qemu_gluster_aio_flush,
    .bdrv_has_zero_init           = qemu_gluster_has_zero_init,
 #ifdef CONFIG_GLUSTERFS_DISCARD
-    .bdrv_co_discard              = qemu_gluster_co_discard,
-#endif
-#ifdef CONFIG_GLUSTERFS_ZEROFILL
-    .bdrv_co_write_zeroes         = qemu_gluster_co_write_zeroes,
+    .bdrv_aio_discard             = qemu_gluster_aio_discard,
 #endif
    .create_options               = qemu_gluster_create_options,
 };
@@ -787,25 +699,18 @@ static BlockDriver bdrv_gluster_rdma = {
    .format_name                  = "gluster",
    .protocol_name                = "gluster+rdma",
    .instance_size                = sizeof(BDRVGlusterState),
-    .bdrv_needs_filename          = true,
    .bdrv_file_open               = qemu_gluster_open,
-    .bdrv_reopen_prepare          = qemu_gluster_reopen_prepare,
-    .bdrv_reopen_commit           = qemu_gluster_reopen_commit,
-    .bdrv_reopen_abort            = qemu_gluster_reopen_abort,
    .bdrv_close                   = qemu_gluster_close,
    .bdrv_create                  = qemu_gluster_create,
    .bdrv_getlength               = qemu_gluster_getlength,
    .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size,
    .bdrv_truncate                = qemu_gluster_truncate,
-    .bdrv_co_readv                = qemu_gluster_co_readv,
-    .bdrv_co_writev               = qemu_gluster_co_writev,
-    .bdrv_co_flush_to_disk        = qemu_gluster_co_flush_to_disk,
+    .bdrv_aio_readv               = qemu_gluster_aio_readv,
+    .bdrv_aio_writev              = qemu_gluster_aio_writev,
+    .bdrv_aio_flush               = qemu_gluster_aio_flush,
    .bdrv_has_zero_init           = qemu_gluster_has_zero_init,
 #ifdef CONFIG_GLUSTERFS_DISCARD
-    .bdrv_co_discard              = qemu_gluster_co_discard,
-#endif
-#ifdef CONFIG_GLUSTERFS_ZEROFILL
-    .bdrv_co_write_zeroes         = qemu_gluster_co_write_zeroes,
+    .bdrv_aio_discard             = qemu_gluster_aio_discard,
 #endif
    .create_options               = qemu_gluster_create_options,
 };
--- a/block/iscsi.c
+++ b/block/iscsi.c
--- a/block/linux-aio.c
+++ b/block/linux-aio.c
@@ -39,6 +39,7 @@ struct qemu_laiocb {
 struct qemu_laio_state {
    io_context_t ctx;
    EventNotifier e;
+    int count;
 };

 static inline ssize_t io_event_ret(struct io_event *ev)
@@ -54,6 +55,8 @@ static void qemu_laio_process_completion(struct qemu_laio_state *s,
 {
    int ret;

+    s->count--;
+
    ret = laiocb->ret;
    if (ret != -ECANCELED) {
        if (ret == laiocb->nbytes) {
@@ -98,6 +101,13 @@ static void qemu_laio_completion_cb(EventNotifier *e)
    }
 }

+static int qemu_laio_flush_cb(EventNotifier *e)
+{
+    struct qemu_laio_state *s = container_of(e, struct qemu_laio_state, e);
+
+    return (s->count > 0) ? 1 : 0;
+}
+
 static void laio_cancel(BlockDriverAIOCB *blockacb)
 {
    struct qemu_laiocb *laiocb = (struct qemu_laiocb *)blockacb;
@@ -167,11 +177,14 @@ BlockDriverAIOCB *laio_submit(BlockDriverState *bs, void *aio_ctx, int fd,
        goto out_free_aiocb;
    }
    io_set_eventfd(&laiocb->iocb, event_notifier_get_fd(&s->e));
+    s->count++;

    if (io_submit(s->ctx, 1, &iocbs) < 0)
-        goto out_free_aiocb;
+        goto out_dec_count;
    return &laiocb->common;

+out_dec_count:
+    s->count--;
 out_free_aiocb:
    qemu_aio_release(laiocb);
    return NULL;
@@ -190,7 +203,8 @@ void *laio_init(void)
        goto out_close_efd;
    }

-    qemu_aio_set_event_notifier(&s->e, qemu_laio_completion_cb);
+    qemu_aio_set_event_notifier(&s->e, qemu_laio_completion_cb,
+                                qemu_laio_flush_cb);

    return s;

--- a/block/mirror.c
+++ b/block/mirror.c
@@ -31,8 +31,7 @@ typedef struct MirrorBlockJob {
    BlockJob common;
    RateLimit limit;
    BlockDriverState *target;
-    BlockDriverState *base;
-    bool is_none_mode;
+    MirrorSyncMode mode;
    BlockdevOnError on_source_error, on_target_error;
    bool synced;
    bool should_complete;
@@ -40,7 +39,6 @@ typedef struct MirrorBlockJob {
    int64_t granularity;
    size_t buf_size;
    unsigned long *cow_bitmap;
-    BdrvDirtyBitmap *dirty_bitmap;
    HBitmapIter hbi;
    uint8_t *buf;
    QSIMPLEQ_HEAD(, MirrorBuffer) buf_free;
@@ -96,16 +94,8 @@ static void mirror_iteration_done(MirrorOp *op, int ret)
        bitmap_set(s->cow_bitmap, chunk_num, nb_chunks);
    }

-    qemu_iovec_destroy(&op->qiov);
    g_slice_free(MirrorOp, op);
-
-    /* Enter coroutine when it is not sleeping.  The coroutine sleeps to
-     * rate-limit itself.  The coroutine will eventually resume since there is
-     * a sleep timeout so don't wake it early.
-     */
-    if (s->common.busy) {
-        qemu_coroutine_enter(s->common.co, NULL);
-    }
+    qemu_coroutine_enter(s->common.co, NULL);
 }

 static void mirror_write_complete(void *opaque, int ret)
@@ -146,20 +136,18 @@ static void mirror_read_complete(void *opaque, int ret)
                    mirror_write_complete, op);
 }

-static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
+static void coroutine_fn mirror_iteration(MirrorBlockJob *s)
 {
    BlockDriverState *source = s->common.bs;
    int nb_sectors, sectors_per_chunk, nb_chunks;
    int64_t end, sector_num, next_chunk, next_sector, hbitmap_next_sector;
-    uint64_t delay_ns;
    MirrorOp *op;

    s->sector_num = hbitmap_iter_next(&s->hbi);
    if (s->sector_num < 0) {
-        bdrv_dirty_iter_init(source, s->dirty_bitmap, &s->hbi);
+        bdrv_dirty_iter_init(source, &s->hbi);
        s->sector_num = hbitmap_iter_next(&s->hbi);
-        trace_mirror_restart_iter(s,
-                                  bdrv_get_dirty_count(source, s->dirty_bitmap));
+        trace_mirror_restart_iter(s, bdrv_get_dirty_count(source));
        assert(s->sector_num >= 0);
    }

@@ -195,7 +183,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
    do {
        int added_sectors, added_chunks;

-        if (!bdrv_get_dirty(source, s->dirty_bitmap, next_sector) ||
+        if (!bdrv_get_dirty(source, next_sector) ||
            test_bit(next_chunk, s->in_flight_bitmap)) {
            assert(nb_sectors > 0);
            break;
@@ -239,12 +227,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
        nb_chunks += added_chunks;
        next_sector += added_sectors;
        next_chunk += added_chunks;
-        if (!s->synced && s->common.speed) {
-            delay_ns = ratelimit_calculate_delay(&s->limit, added_sectors);
-        } else {
-            delay_ns = 0;
-        }
-    } while (delay_ns == 0 && next_sector < end);
+    } while (next_sector < end);

    /* Allocate a MirrorOp that is used as an AIO callback.  */
    op = g_slice_new(MirrorOp);
@@ -266,8 +249,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
        /* Advance the HBitmapIter in parallel, so that we do not examine
         * the same sector twice.
         */
-        if (next_sector > hbitmap_next_sector
-            && bdrv_get_dirty(source, s->dirty_bitmap, next_sector)) {
+        if (next_sector > hbitmap_next_sector && bdrv_get_dirty(source, next_sector)) {
            hbitmap_next_sector = hbitmap_iter_next(&s->hbi);
        }

@@ -281,7 +263,6 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
    trace_mirror_one_iteration(s, sector_num, nb_sectors);
    bdrv_aio_readv(source, sector_num, &op->qiov, nb_sectors,
                   mirror_read_complete, op);
-    return delay_ns;
 }

 static void mirror_free_init(MirrorBlockJob *s)
@@ -351,13 +332,14 @@ static void coroutine_fn mirror_run(void *opaque)
    sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS;
    mirror_free_init(s);

-    if (!s->is_none_mode) {
+    if (s->mode != MIRROR_SYNC_MODE_NONE) {
        /* First part, loop on the sectors and initialize the dirty bitmap.  */
-        BlockDriverState *base = s->base;
+        BlockDriverState *base;
+        base = s->mode == MIRROR_SYNC_MODE_FULL ? NULL : bs->backing_hd;
        for (sector_num = 0; sector_num < end; ) {
            int64_t next = (sector_num | (sectors_per_chunk - 1)) + 1;
-            ret = bdrv_is_allocated_above(bs, base,
-                                          sector_num, next - sector_num, &n);
+            ret = bdrv_co_is_allocated_above(bs, base,
+                                             sector_num, next - sector_num, &n);

            if (ret < 0) {
                goto immediate_exit;
@@ -373,10 +355,10 @@ static void coroutine_fn mirror_run(void *opaque)
        }
    }

-    bdrv_dirty_iter_init(bs, s->dirty_bitmap, &s->hbi);
-    last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
+    bdrv_dirty_iter_init(bs, &s->hbi);
+    last_pause_ns = qemu_get_clock_ns(rt_clock);
    for (;;) {
-        uint64_t delay_ns = 0;
+        uint64_t delay_ns;
        int64_t cnt;
        bool should_complete;

@@ -385,14 +367,14 @@ static void coroutine_fn mirror_run(void *opaque)
            goto immediate_exit;
        }

-        cnt = bdrv_get_dirty_count(bs, s->dirty_bitmap);
+        cnt = bdrv_get_dirty_count(bs);

        /* Note that even when no rate limit is applied we need to yield
         * periodically with no pending I/O so that qemu_aio_flush() returns.
         * We do so every SLICE_TIME nanoseconds, or when there is an error,
         * or when the source is clean, whichever comes first.
         */
-        if (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - last_pause_ns < SLICE_TIME &&
+        if (qemu_get_clock_ns(rt_clock) - last_pause_ns < SLICE_TIME &&
            s->common.iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
            if (s->in_flight == MAX_IN_FLIGHT || s->buf_free_count == 0 ||
                (cnt == 0 && s->in_flight > 0)) {
@@ -400,10 +382,8 @@ static void coroutine_fn mirror_run(void *opaque)
                qemu_coroutine_yield();
                continue;
            } else if (cnt != 0) {
-                delay_ns = mirror_iteration(s);
-                if (delay_ns == 0) {
-                    continue;
-                }
+                mirror_iteration(s);
+                continue;
            }
        }

@@ -429,7 +409,7 @@ static void coroutine_fn mirror_run(void *opaque)

                should_complete = s->should_complete ||
                    block_job_is_cancelled(&s->common);
-                cnt = bdrv_get_dirty_count(bs, s->dirty_bitmap);
+                cnt = bdrv_get_dirty_count(bs);
            }
        }

@@ -444,21 +424,28 @@ static void coroutine_fn mirror_run(void *opaque)
             */
            trace_mirror_before_drain(s, cnt);
            bdrv_drain_all();
-            cnt = bdrv_get_dirty_count(bs, s->dirty_bitmap);
+            cnt = bdrv_get_dirty_count(bs);
        }

        ret = 0;
-        trace_mirror_before_sleep(s, cnt, s->synced, delay_ns);
+        trace_mirror_before_sleep(s, cnt, s->synced);
        if (!s->synced) {
            /* Publish progress */
            s->common.offset = (end - cnt) * BDRV_SECTOR_SIZE;
-            block_job_sleep_ns(&s->common, QEMU_CLOCK_REALTIME, delay_ns);
+
+            if (s->common.speed) {
+                delay_ns = ratelimit_calculate_delay(&s->limit, sectors_per_chunk);
+            } else {
+                delay_ns = 0;
+            }
+
+            block_job_sleep_ns(&s->common, rt_clock, delay_ns);
            if (block_job_is_cancelled(&s->common)) {
                break;
            }
        } else if (!should_complete) {
            delay_ns = (s->in_flight == 0 && cnt == 0 ? SLICE_TIME : 0);
-            block_job_sleep_ns(&s->common, QEMU_CLOCK_REALTIME, delay_ns);
+            block_job_sleep_ns(&s->common, rt_clock, delay_ns);
        } else if (cnt == 0) {
            /* The two disks are in sync.  Exit and report successful
             * completion.
@@ -467,7 +454,7 @@ static void coroutine_fn mirror_run(void *opaque)
            s->common.cancelled = false;
            break;
        }
-        last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
+        last_pause_ns = qemu_get_clock_ns(rt_clock);
    }

 immediate_exit:
@@ -484,22 +471,16 @@ immediate_exit:
    qemu_vfree(s->buf);
    g_free(s->cow_bitmap);
    g_free(s->in_flight_bitmap);
-    bdrv_release_dirty_bitmap(bs, s->dirty_bitmap);
+    bdrv_set_dirty_tracking(bs, 0);
    bdrv_iostatus_disable(s->target);
    if (s->should_complete && ret == 0) {
        if (bdrv_get_flags(s->target) != bdrv_get_flags(s->common.bs)) {
            bdrv_reopen(s->target, bdrv_get_flags(s->common.bs), NULL);
        }
        bdrv_swap(s->target, s->common.bs);
-        if (s->common.driver->job_type == BLOCK_JOB_TYPE_COMMIT) {
-            /* drop the bs loop chain formed by the swap: break the loop then
-             * trigger the unref from the top one */
-            BlockDriverState *p = s->base->backing_hd;
-            s->base->backing_hd = NULL;
-            bdrv_unref(p);
-        }
    }
-    bdrv_unref(s->target);
+    bdrv_close(s->target);
+    bdrv_delete(s->target);
    block_job_completed(&s->common, ret);
 }

@@ -524,12 +505,14 @@ static void mirror_iostatus_reset(BlockJob *job)
 static void mirror_complete(BlockJob *job, Error **errp)
 {
    MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
-    Error *local_err = NULL;
    int ret;

-    ret = bdrv_open_backing_file(s->target, NULL, &local_err);
+    ret = bdrv_open_backing_file(s->target, NULL);
    if (ret < 0) {
-        error_propagate(errp, local_err);
+        char backing_filename[PATH_MAX];
+        bdrv_get_full_backing_filename(s->target, backing_filename,
+                                       sizeof(backing_filename));
+        error_setg_file_open(errp, -ret, backing_filename);
        return;
    }
    if (!s->synced) {
@@ -541,32 +524,20 @@ static void mirror_complete(BlockJob *job, Error **errp)
    block_job_resume(job);
 }

-static const BlockJobDriver mirror_job_driver = {
+static const BlockJobType mirror_job_type = {
    .instance_size = sizeof(MirrorBlockJob),
-    .job_type      = BLOCK_JOB_TYPE_MIRROR,
+    .job_type      = "mirror",
    .set_speed     = mirror_set_speed,
    .iostatus_reset= mirror_iostatus_reset,
    .complete      = mirror_complete,
 };

-static const BlockJobDriver commit_active_job_driver = {
-    .instance_size = sizeof(MirrorBlockJob),
-    .job_type      = BLOCK_JOB_TYPE_COMMIT,
-    .set_speed     = mirror_set_speed,
-    .iostatus_reset
-                   = mirror_iostatus_reset,
-    .complete      = mirror_complete,
-};
-
-static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
-                            int64_t speed, int64_t granularity,
-                            int64_t buf_size,
-                            BlockdevOnError on_source_error,
-                            BlockdevOnError on_target_error,
-                            BlockDriverCompletionFunc *cb,
-                            void *opaque, Error **errp,
-                            const BlockJobDriver *driver,
-                            bool is_none_mode, BlockDriverState *base)
+void mirror_start(BlockDriverState *bs, BlockDriverState *target,
+                  int64_t speed, int64_t granularity, int64_t buf_size,
+                  MirrorSyncMode mode, BlockdevOnError on_source_error,
+                  BlockdevOnError on_target_error,
+                  BlockDriverCompletionFunc *cb,
+                  void *opaque, Error **errp)
 {
    MirrorBlockJob *s;

@@ -591,8 +562,7 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
        return;
    }

-
-    s = block_job_create(driver, bs, speed, cb, opaque, errp);
+    s = block_job_create(&mirror_job_type, bs, speed, cb, opaque, errp);
    if (!s) {
        return;
    }
@@ -600,12 +570,11 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
    s->on_source_error = on_source_error;
    s->on_target_error = on_target_error;
    s->target = target;
-    s->is_none_mode = is_none_mode;
-    s->base = base;
+    s->mode = mode;
    s->granularity = granularity;
    s->buf_size = MAX(buf_size, granularity);

-    s->dirty_bitmap = bdrv_create_dirty_bitmap(bs, granularity);
+    bdrv_set_dirty_tracking(bs, granularity);
    bdrv_set_enable_write_cache(s->target, true);
    bdrv_set_on_error(s->target, on_target_error, on_target_error);
    bdrv_iostatus_enable(s->target);
@@ -613,80 +582,3 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
    trace_mirror_start(bs, s, s->common.co, opaque);
    qemu_coroutine_enter(s->common.co, s);
 }
-
-void mirror_start(BlockDriverState *bs, BlockDriverState *target,
-                  int64_t speed, int64_t granularity, int64_t buf_size,
-                  MirrorSyncMode mode, BlockdevOnError on_source_error,
-                  BlockdevOnError on_target_error,
-                  BlockDriverCompletionFunc *cb,
-                  void *opaque, Error **errp)
-{
-    bool is_none_mode;
-    BlockDriverState *base;
-
-    is_none_mode = mode == MIRROR_SYNC_MODE_NONE;
-    base = mode == MIRROR_SYNC_MODE_TOP ? bs->backing_hd : NULL;
-    mirror_start_job(bs, target, speed, granularity, buf_size,
-                     on_source_error, on_target_error, cb, opaque, errp,
-                     &mirror_job_driver, is_none_mode, base);
-}
-
-void commit_active_start(BlockDriverState *bs, BlockDriverState *base,
-                         int64_t speed,
-                         BlockdevOnError on_error,
-                         BlockDriverCompletionFunc *cb,
-                         void *opaque, Error **errp)
-{
-    int64_t length, base_length;
-    int orig_base_flags;
-    int ret;
-    Error *local_err = NULL;
-
-    orig_base_flags = bdrv_get_flags(base);
-
-    if (bdrv_reopen(base, bs->open_flags, errp)) {
-        return;
-    }
-
-    length = bdrv_getlength(bs);
-    if (length < 0) {
-        error_setg_errno(errp, -length,
-                         "Unable to determine length of %s", bs->filename);
-        goto error_restore_flags;
-    }
-
-    base_length = bdrv_getlength(base);
-    if (base_length < 0) {
-        error_setg_errno(errp, -base_length,
-                         "Unable to determine length of %s", base->filename);
-        goto error_restore_flags;
-    }
-
-    if (length > base_length) {
-        ret = bdrv_truncate(base, length);
-        if (ret < 0) {
-            error_setg_errno(errp, -ret,
-                            "Top image %s is larger than base image %s, and "
-                             "resize of base image failed",
-                             bs->filename, base->filename);
-            goto error_restore_flags;
-        }
-    }
-
-    bdrv_ref(base);
-    mirror_start_job(bs, base, speed, 0, 0,
-                     on_error, on_error, cb, opaque, &local_err,
-                     &commit_active_job_driver, false, base);
-    if (error_is_set(&local_err)) {
-        error_propagate(errp, local_err);
-        goto error_restore_flags;
-    }
-
-    return;
-
-error_restore_flags:
-    /* ignore error and errp for bdrv_reopen, because we want to propagate
-     * the original error */
-    bdrv_reopen(base, orig_base_flags, NULL);
-    return;
-}
--- a/block/nbd-client.c
+++ b/block/nbd-client.c
@@ -1,388 +0,0 @@
-/*
- * QEMU Block driver for  NBD
- *
- * Copyright (C) 2008 Bull S.A.S.
- *     Author: Laurent Vivier <Laurent.Vivier@bull.net>
- *
- * Some parts:
- *    Copyright (C) 2007 Anthony Liguori <anthony@codemonkey.ws>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-#include "nbd-client.h"
-#include "qemu/sockets.h"
-
-#define HANDLE_TO_INDEX(bs, handle) ((handle) ^ ((uint64_t)(intptr_t)bs))
-#define INDEX_TO_HANDLE(bs, index)  ((index)  ^ ((uint64_t)(intptr_t)bs))
-
-static void nbd_recv_coroutines_enter_all(NbdClientSession *s)
-{
-    int i;
-
-    for (i = 0; i < MAX_NBD_REQUESTS; i++) {
-        if (s->recv_coroutine[i]) {
-            qemu_coroutine_enter(s->recv_coroutine[i], NULL);
-        }
-    }
-}
-
-static void nbd_teardown_connection(NbdClientSession *client)
-{
-    /* finish any pending coroutines */
-    shutdown(client->sock, 2);
-    nbd_recv_coroutines_enter_all(client);
-
-    qemu_aio_set_fd_handler(client->sock, NULL, NULL, NULL);
-    closesocket(client->sock);
-    client->sock = -1;
-}
-
-static void nbd_reply_ready(void *opaque)
-{
-    NbdClientSession *s = opaque;
-    uint64_t i;
-    int ret;
-
-    if (s->reply.handle == 0) {
-        /* No reply already in flight.  Fetch a header.  It is possible
-         * that another thread has done the same thing in parallel, so
-         * the socket is not readable anymore.
-         */
-        ret = nbd_receive_reply(s->sock, &s->reply);
-        if (ret == -EAGAIN) {
-            return;
-        }
-        if (ret < 0) {
-            s->reply.handle = 0;
-            goto fail;
-        }
-    }
-
-    /* There's no need for a mutex on the receive side, because the
-     * handler acts as a synchronization point and ensures that only
-     * one coroutine is called until the reply finishes.  */
-    i = HANDLE_TO_INDEX(s, s->reply.handle);
-    if (i >= MAX_NBD_REQUESTS) {
-        goto fail;
-    }
-
-    if (s->recv_coroutine[i]) {
-        qemu_coroutine_enter(s->recv_coroutine[i], NULL);
-        return;
-    }
-
-fail:
-    nbd_teardown_connection(s);
-}
-
-static void nbd_restart_write(void *opaque)
-{
-    NbdClientSession *s = opaque;
-
-    qemu_coroutine_enter(s->send_coroutine, NULL);
-}
-
-static int nbd_co_send_request(NbdClientSession *s,
-    struct nbd_request *request,
-    QEMUIOVector *qiov, int offset)
-{
-    int rc, ret;
-
-    qemu_co_mutex_lock(&s->send_mutex);
-    s->send_coroutine = qemu_coroutine_self();
-    qemu_aio_set_fd_handler(s->sock, nbd_reply_ready, nbd_restart_write, s);
-    if (qiov) {
-        if (!s->is_unix) {
-            socket_set_cork(s->sock, 1);
-        }
-        rc = nbd_send_request(s->sock, request);
-        if (rc >= 0) {
-            ret = qemu_co_sendv(s->sock, qiov->iov, qiov->niov,
-                                offset, request->len);
-            if (ret != request->len) {
-                rc = -EIO;
-            }
-        }
-        if (!s->is_unix) {
-            socket_set_cork(s->sock, 0);
-        }
-    } else {
-        rc = nbd_send_request(s->sock, request);
-    }
-    qemu_aio_set_fd_handler(s->sock, nbd_reply_ready, NULL, s);
-    s->send_coroutine = NULL;
-    qemu_co_mutex_unlock(&s->send_mutex);
-    return rc;
-}
-
-static void nbd_co_receive_reply(NbdClientSession *s,
-    struct nbd_request *request, struct nbd_reply *reply,
-    QEMUIOVector *qiov, int offset)
-{
-    int ret;
-
-    /* Wait until we're woken up by the read handler.  TODO: perhaps
-     * peek at the next reply and avoid yielding if it's ours?  */
-    qemu_coroutine_yield();
-    *reply = s->reply;
-    if (reply->handle != request->handle) {
-        reply->error = EIO;
-    } else {
-        if (qiov && reply->error == 0) {
-            ret = qemu_co_recvv(s->sock, qiov->iov, qiov->niov,
-                                offset, request->len);
-            if (ret != request->len) {
-                reply->error = EIO;
-            }
-        }
-
-        /* Tell the read handler to read another header.  */
-        s->reply.handle = 0;
-    }
-}
-
-static void nbd_coroutine_start(NbdClientSession *s,
-   struct nbd_request *request)
-{
-    int i;
-
-    /* Poor man semaphore.  The free_sema is locked when no other request
-     * can be accepted, and unlocked after receiving one reply.  */
-    if (s->in_flight >= MAX_NBD_REQUESTS - 1) {
-        qemu_co_mutex_lock(&s->free_sema);
-        assert(s->in_flight < MAX_NBD_REQUESTS);
-    }
-    s->in_flight++;
-
-    for (i = 0; i < MAX_NBD_REQUESTS; i++) {
-        if (s->recv_coroutine[i] == NULL) {
-            s->recv_coroutine[i] = qemu_coroutine_self();
-            break;
-        }
-    }
-
-    assert(i < MAX_NBD_REQUESTS);
-    request->handle = INDEX_TO_HANDLE(s, i);
-}
-
-static void nbd_coroutine_end(NbdClientSession *s,
-    struct nbd_request *request)
-{
-    int i = HANDLE_TO_INDEX(s, request->handle);
-    s->recv_coroutine[i] = NULL;
-    if (s->in_flight-- == MAX_NBD_REQUESTS) {
-        qemu_co_mutex_unlock(&s->free_sema);
-    }
-}
-
-static int nbd_co_readv_1(NbdClientSession *client, int64_t sector_num,
-                          int nb_sectors, QEMUIOVector *qiov,
-                          int offset)
-{
-    struct nbd_request request = { .type = NBD_CMD_READ };
-    struct nbd_reply reply;
-    ssize_t ret;
-
-    request.from = sector_num * 512;
-    request.len = nb_sectors * 512;
-
-    nbd_coroutine_start(client, &request);
-    ret = nbd_co_send_request(client, &request, NULL, 0);
-    if (ret < 0) {
-        reply.error = -ret;
-    } else {
-        nbd_co_receive_reply(client, &request, &reply, qiov, offset);
-    }
-    nbd_coroutine_end(client, &request);
-    return -reply.error;
-
-}
-
-static int nbd_co_writev_1(NbdClientSession *client, int64_t sector_num,
-                           int nb_sectors, QEMUIOVector *qiov,
-                           int offset)
-{
-    struct nbd_request request = { .type = NBD_CMD_WRITE };
-    struct nbd_reply reply;
-    ssize_t ret;
-
-    if (!bdrv_enable_write_cache(client->bs) &&
-        (client->nbdflags & NBD_FLAG_SEND_FUA)) {
-        request.type |= NBD_CMD_FLAG_FUA;
-    }
-
-    request.from = sector_num * 512;
-    request.len = nb_sectors * 512;
-
-    nbd_coroutine_start(client, &request);
-    ret = nbd_co_send_request(client, &request, qiov, offset);
-    if (ret < 0) {
-        reply.error = -ret;
-    } else {
-        nbd_co_receive_reply(client, &request, &reply, NULL, 0);
-    }
-    nbd_coroutine_end(client, &request);
-    return -reply.error;
-}
-
-/* qemu-nbd has a limit of slightly less than 1M per request.  Try to
- * remain aligned to 4K. */
-#define NBD_MAX_SECTORS 2040
-
-int nbd_client_session_co_readv(NbdClientSession *client, int64_t sector_num,
-    int nb_sectors, QEMUIOVector *qiov)
-{
-    int offset = 0;
-    int ret;
-    while (nb_sectors > NBD_MAX_SECTORS) {
-        ret = nbd_co_readv_1(client, sector_num,
-                             NBD_MAX_SECTORS, qiov, offset);
-        if (ret < 0) {
-            return ret;
-        }
-        offset += NBD_MAX_SECTORS * 512;
-        sector_num += NBD_MAX_SECTORS;
-        nb_sectors -= NBD_MAX_SECTORS;
-    }
-    return nbd_co_readv_1(client, sector_num, nb_sectors, qiov, offset);
-}
-
-int nbd_client_session_co_writev(NbdClientSession *client, int64_t sector_num,
-                                 int nb_sectors, QEMUIOVector *qiov)
-{
-    int offset = 0;
-    int ret;
-    while (nb_sectors > NBD_MAX_SECTORS) {
-        ret = nbd_co_writev_1(client, sector_num,
-                              NBD_MAX_SECTORS, qiov, offset);
-        if (ret < 0) {
-            return ret;
-        }
-        offset += NBD_MAX_SECTORS * 512;
-        sector_num += NBD_MAX_SECTORS;
-        nb_sectors -= NBD_MAX_SECTORS;
-    }
-    return nbd_co_writev_1(client, sector_num, nb_sectors, qiov, offset);
-}
-
-int nbd_client_session_co_flush(NbdClientSession *client)
-{
-    struct nbd_request request = { .type = NBD_CMD_FLUSH };
-    struct nbd_reply reply;
-    ssize_t ret;
-
-    if (!(client->nbdflags & NBD_FLAG_SEND_FLUSH)) {
-        return 0;
-    }
-
-    if (client->nbdflags & NBD_FLAG_SEND_FUA) {
-        request.type |= NBD_CMD_FLAG_FUA;
-    }
-
-    request.from = 0;
-    request.len = 0;
-
-    nbd_coroutine_start(client, &request);
-    ret = nbd_co_send_request(client, &request, NULL, 0);
-    if (ret < 0) {
-        reply.error = -ret;
-    } else {
-        nbd_co_receive_reply(client, &request, &reply, NULL, 0);
-    }
-    nbd_coroutine_end(client, &request);
-    return -reply.error;
-}
-
-int nbd_client_session_co_discard(NbdClientSession *client, int64_t sector_num,
-    int nb_sectors)
-{
-    struct nbd_request request = { .type = NBD_CMD_TRIM };
-    struct nbd_reply reply;
-    ssize_t ret;
-
-    if (!(client->nbdflags & NBD_FLAG_SEND_TRIM)) {
-        return 0;
-    }
-    request.from = sector_num * 512;
-    request.len = nb_sectors * 512;
-
-    nbd_coroutine_start(client, &request);
-    ret = nbd_co_send_request(client, &request, NULL, 0);
-    if (ret < 0) {
-        reply.error = -ret;
-    } else {
-        nbd_co_receive_reply(client, &request, &reply, NULL, 0);
-    }
-    nbd_coroutine_end(client, &request);
-    return -reply.error;
-
-}
-
-void nbd_client_session_close(NbdClientSession *client)
-{
-    struct nbd_request request = {
-        .type = NBD_CMD_DISC,
-        .from = 0,
-        .len = 0
-    };
-
-    if (!client->bs) {
-        return;
-    }
-    if (client->sock == -1) {
-        return;
-    }
-
-    nbd_send_request(client->sock, &request);
-
-    nbd_teardown_connection(client);
-    client->bs = NULL;
-}
-
-int nbd_client_session_init(NbdClientSession *client, BlockDriverState *bs,
-    int sock, const char *export)
-{
-    int ret;
-
-    /* NBD handshake */
-    logout("session init %s\n", export);
-    qemu_set_block(sock);
-    ret = nbd_receive_negotiate(sock, export,
-                                &client->nbdflags, &client->size,
-                                &client->blocksize);
-    if (ret < 0) {
-        logout("Failed to negotiate with the NBD server\n");
-        closesocket(sock);
-        return ret;
-    }
-
-    qemu_co_mutex_init(&client->send_mutex);
-    qemu_co_mutex_init(&client->free_sema);
-    client->bs = bs;
-    client->sock = sock;
-
-    /* Now that we're connected, set the socket to be non-blocking and
-     * kick the reply mechanism.  */
-    qemu_set_nonblock(sock);
-    qemu_aio_set_fd_handler(sock, nbd_reply_ready, NULL, client);
-
-    logout("Established connection with NBD server\n");
-    return 0;
-}
--- a/block/nbd-client.h
+++ b/block/nbd-client.h
@@ -1,50 +0,0 @@
-#ifndef NBD_CLIENT_H
-#define NBD_CLIENT_H
-
-#include "qemu-common.h"
-#include "block/nbd.h"
-#include "block/block_int.h"
-
-/* #define DEBUG_NBD */
-
-#if defined(DEBUG_NBD)
-#define logout(fmt, ...) \
-    fprintf(stderr, "nbd\t%-24s" fmt, __func__, ##__VA_ARGS__)
-#else
-#define logout(fmt, ...) ((void)0)
-#endif
-
-#define MAX_NBD_REQUESTS    16
-
-typedef struct NbdClientSession {
-    int sock;
-    uint32_t nbdflags;
-    off_t size;
-    size_t blocksize;
-
-    CoMutex send_mutex;
-    CoMutex free_sema;
-    Coroutine *send_coroutine;
-    int in_flight;
-
-    Coroutine *recv_coroutine[MAX_NBD_REQUESTS];
-    struct nbd_reply reply;
-
-    bool is_unix;
-
-    BlockDriverState *bs;
-} NbdClientSession;
-
-int nbd_client_session_init(NbdClientSession *client, BlockDriverState *bs,
-                            int sock, const char *export_name);
-void nbd_client_session_close(NbdClientSession *client);
-
-int nbd_client_session_co_discard(NbdClientSession *client, int64_t sector_num,
-                                  int nb_sectors);
-int nbd_client_session_co_flush(NbdClientSession *client);
-int nbd_client_session_co_writev(NbdClientSession *client, int64_t sector_num,
-                                 int nb_sectors, QEMUIOVector *qiov);
-int nbd_client_session_co_readv(NbdClientSession *client, int64_t sector_num,
-                                int nb_sectors, QEMUIOVector *qiov);
-
-#endif /* NBD_CLIENT_H */
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -26,7 +26,8 @@
 * THE SOFTWARE.
 */

-#include "block/nbd-client.h"
+#include "qemu-common.h"
+#include "block/nbd.h"
 #include "qemu/uri.h"
 #include "block/block_int.h"
 #include "qemu/module.h"
@@ -39,9 +40,37 @@

 #define EN_OPTSTR ":exportname="

+/* #define DEBUG_NBD */
+
+#if defined(DEBUG_NBD)
+#define logout(fmt, ...) \
+                fprintf(stderr, "nbd\t%-24s" fmt, __func__, ##__VA_ARGS__)
+#else
+#define logout(fmt, ...) ((void)0)
+#endif
+
+#define MAX_NBD_REQUESTS	16
+#define HANDLE_TO_INDEX(bs, handle) ((handle) ^ ((uint64_t)(intptr_t)bs))
+#define INDEX_TO_HANDLE(bs, index)  ((index)  ^ ((uint64_t)(intptr_t)bs))
+
 typedef struct BDRVNBDState {
-    NbdClientSession client;
+    int sock;
+    uint32_t nbdflags;
+    off_t size;
+    size_t blocksize;
+
+    CoMutex send_mutex;
+    CoMutex free_sema;
+    Coroutine *send_coroutine;
+    int in_flight;
+
+    Coroutine *recv_coroutine[MAX_NBD_REQUESTS];
+    struct nbd_reply reply;
+
+    bool is_unix;
    QemuOpts *socket_opts;
+
+    char *export_name; /* An NBD server may export several devices */
 } BDRVNBDState;

 static int nbd_parse_uri(const char *filename, QDict *options)
@@ -188,49 +217,204 @@ out:
    g_free(file);
 }

-static void nbd_config(BDRVNBDState *s, QDict *options, char **export,
-                       Error **errp)
+static int nbd_config(BDRVNBDState *s, QDict *options)
 {
    Error *local_err = NULL;

-    if (qdict_haskey(options, "path") == qdict_haskey(options, "host")) {
-        if (qdict_haskey(options, "path")) {
-            error_setg(errp, "path and host may not be used at the same time.");
-        } else {
-            error_setg(errp, "one of path and host must be specified.");
+    if (qdict_haskey(options, "path")) {
+        if (qdict_haskey(options, "host")) {
+            qerror_report(ERROR_CLASS_GENERIC_ERROR, "path and host may not "
+                          "be used at the same time.");
+            return -EINVAL;
        }
-        return;
+        s->is_unix = true;
+    } else if (qdict_haskey(options, "host")) {
+        s->is_unix = false;
+    } else {
+        return -EINVAL;
    }

-    s->client.is_unix = qdict_haskey(options, "path");
-    s->socket_opts = qemu_opts_create(&socket_optslist, NULL, 0,
-                                      &error_abort);
+    s->socket_opts = qemu_opts_create_nofail(&socket_optslist);

    qemu_opts_absorb_qdict(s->socket_opts, options, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        return;
+    if (error_is_set(&local_err)) {
+        qerror_report_err(local_err);
+        error_free(local_err);
+        return -EINVAL;
    }

    if (!qemu_opt_get(s->socket_opts, "port")) {
        qemu_opt_set_number(s->socket_opts, "port", NBD_DEFAULT_PORT);
    }

-    *export = g_strdup(qdict_get_try_str(options, "export"));
-    if (*export) {
+    s->export_name = g_strdup(qdict_get_try_str(options, "export"));
+    if (s->export_name) {
        qdict_del(options, "export");
    }
+
+    return 0;
+}
+
+
+static void nbd_coroutine_start(BDRVNBDState *s, struct nbd_request *request)
+{
+    int i;
+
+    /* Poor man semaphore.  The free_sema is locked when no other request
+     * can be accepted, and unlocked after receiving one reply.  */
+    if (s->in_flight >= MAX_NBD_REQUESTS - 1) {
+        qemu_co_mutex_lock(&s->free_sema);
+        assert(s->in_flight < MAX_NBD_REQUESTS);
+    }
+    s->in_flight++;
+
+    for (i = 0; i < MAX_NBD_REQUESTS; i++) {
+        if (s->recv_coroutine[i] == NULL) {
+            s->recv_coroutine[i] = qemu_coroutine_self();
+            break;
+        }
+    }
+
+    assert(i < MAX_NBD_REQUESTS);
+    request->handle = INDEX_TO_HANDLE(s, i);
+}
+
+static int nbd_have_request(void *opaque)
+{
+    BDRVNBDState *s = opaque;
+
+    return s->in_flight > 0;
+}
+
+static void nbd_reply_ready(void *opaque)
+{
+    BDRVNBDState *s = opaque;
+    uint64_t i;
+    int ret;
+
+    if (s->reply.handle == 0) {
+        /* No reply already in flight.  Fetch a header.  It is possible
+         * that another thread has done the same thing in parallel, so
+         * the socket is not readable anymore.
+         */
+        ret = nbd_receive_reply(s->sock, &s->reply);
+        if (ret == -EAGAIN) {
+            return;
+        }
+        if (ret < 0) {
+            s->reply.handle = 0;
+            goto fail;
+        }
+    }
+
+    /* There's no need for a mutex on the receive side, because the
+     * handler acts as a synchronization point and ensures that only
+     * one coroutine is called until the reply finishes.  */
+    i = HANDLE_TO_INDEX(s, s->reply.handle);
+    if (i >= MAX_NBD_REQUESTS) {
+        goto fail;
+    }
+
+    if (s->recv_coroutine[i]) {
+        qemu_coroutine_enter(s->recv_coroutine[i], NULL);
+        return;
+    }
+
+fail:
+    for (i = 0; i < MAX_NBD_REQUESTS; i++) {
+        if (s->recv_coroutine[i]) {
+            qemu_coroutine_enter(s->recv_coroutine[i], NULL);
+        }
+    }
 }

-static int nbd_establish_connection(BlockDriverState *bs, Error **errp)
+static void nbd_restart_write(void *opaque)
+{
+    BDRVNBDState *s = opaque;
+    qemu_coroutine_enter(s->send_coroutine, NULL);
+}
+
+static int nbd_co_send_request(BDRVNBDState *s, struct nbd_request *request,
+                               QEMUIOVector *qiov, int offset)
+{
+    int rc, ret;
+
+    qemu_co_mutex_lock(&s->send_mutex);
+    s->send_coroutine = qemu_coroutine_self();
+    qemu_aio_set_fd_handler(s->sock, nbd_reply_ready, nbd_restart_write,
+                            nbd_have_request, s);
+    if (qiov) {
+        if (!s->is_unix) {
+            socket_set_cork(s->sock, 1);
+        }
+        rc = nbd_send_request(s->sock, request);
+        if (rc >= 0) {
+            ret = qemu_co_sendv(s->sock, qiov->iov, qiov->niov,
+                                offset, request->len);
+            if (ret != request->len) {
+                rc = -EIO;
+            }
+        }
+        if (!s->is_unix) {
+            socket_set_cork(s->sock, 0);
+        }
+    } else {
+        rc = nbd_send_request(s->sock, request);
+    }
+    qemu_aio_set_fd_handler(s->sock, nbd_reply_ready, NULL,
+                            nbd_have_request, s);
+    s->send_coroutine = NULL;
+    qemu_co_mutex_unlock(&s->send_mutex);
+    return rc;
+}
+
+static void nbd_co_receive_reply(BDRVNBDState *s, struct nbd_request *request,
+                                 struct nbd_reply *reply,
+                                 QEMUIOVector *qiov, int offset)
+{
+    int ret;
+
+    /* Wait until we're woken up by the read handler.  TODO: perhaps
+     * peek at the next reply and avoid yielding if it's ours?  */
+    qemu_coroutine_yield();
+    *reply = s->reply;
+    if (reply->handle != request->handle) {
+        reply->error = EIO;
+    } else {
+        if (qiov && reply->error == 0) {
+            ret = qemu_co_recvv(s->sock, qiov->iov, qiov->niov,
+                                offset, request->len);
+            if (ret != request->len) {
+                reply->error = EIO;
+            }
+        }
+
+        /* Tell the read handler to read another header.  */
+        s->reply.handle = 0;
+    }
+}
+
+static void nbd_coroutine_end(BDRVNBDState *s, struct nbd_request *request)
+{
+    int i = HANDLE_TO_INDEX(s, request->handle);
+    s->recv_coroutine[i] = NULL;
+    if (s->in_flight-- == MAX_NBD_REQUESTS) {
+        qemu_co_mutex_unlock(&s->free_sema);
+    }
+}
+
+static int nbd_establish_connection(BlockDriverState *bs)
 {
    BDRVNBDState *s = bs->opaque;
    int sock;
+    int ret;
+    off_t size;
+    size_t blocksize;

-    if (s->client.is_unix) {
-        sock = unix_connect_opts(s->socket_opts, errp, NULL, NULL);
+    if (s->is_unix) {
+        sock = unix_socket_outgoing(qemu_opt_get(s->socket_opts, "path"));
    } else {
-        sock = inet_connect_opts(s->socket_opts, errp, NULL, NULL);
+        sock = tcp_socket_outgoing_opts(s->socket_opts);
        if (sock >= 0) {
            socket_set_nodelay(sock);
        }
@@ -242,85 +426,226 @@ static int nbd_establish_connection(BlockDriverState *bs, Error **errp)
        return -errno;
    }

-    return sock;
+    /* NBD handshake */
+    ret = nbd_receive_negotiate(sock, s->export_name, &s->nbdflags, &size,
+                                &blocksize);
+    if (ret < 0) {
+        logout("Failed to negotiate with the NBD server\n");
+        closesocket(sock);
+        return ret;
+    }
+
+    /* Now that we're connected, set the socket to be non-blocking and
+     * kick the reply mechanism.  */
+    qemu_set_nonblock(sock);
+    qemu_aio_set_fd_handler(sock, nbd_reply_ready, NULL,
+                            nbd_have_request, s);
+
+    s->sock = sock;
+    s->size = size;
+    s->blocksize = blocksize;
+
+    logout("Established connection with NBD server\n");
+    return 0;
 }

-static int nbd_open(BlockDriverState *bs, QDict *options, int flags,
-                    Error **errp)
+static void nbd_teardown_connection(BlockDriverState *bs)
 {
    BDRVNBDState *s = bs->opaque;
-    char *export = NULL;
-    int result, sock;
-    Error *local_err = NULL;
+    struct nbd_request request;
+
+    request.type = NBD_CMD_DISC;
+    request.from = 0;
+    request.len = 0;
+    nbd_send_request(s->sock, &request);
+
+    qemu_aio_set_fd_handler(s->sock, NULL, NULL, NULL, NULL);
+    closesocket(s->sock);
+}
+
+static int nbd_open(BlockDriverState *bs, QDict *options, int flags)
+{
+    BDRVNBDState *s = bs->opaque;
+    int result;
+
+    qemu_co_mutex_init(&s->send_mutex);
+    qemu_co_mutex_init(&s->free_sema);

    /* Pop the config into our state object. Exit if invalid. */
-    nbd_config(s, options, &export, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        return -EINVAL;
+    result = nbd_config(s, options);
+    if (result != 0) {
+        return result;
    }

    /* establish TCP connection, return error if it fails
     * TODO: Configurable retry-until-timeout behaviour.
     */
-    sock = nbd_establish_connection(bs, errp);
-    if (sock < 0) {
-        return sock;
-    }
+    result = nbd_establish_connection(bs);

-    /* NBD handshake */
-    result = nbd_client_session_init(&s->client, bs, sock, export);
-    g_free(export);
    return result;
 }

+static int nbd_co_readv_1(BlockDriverState *bs, int64_t sector_num,
+                          int nb_sectors, QEMUIOVector *qiov,
+                          int offset)
+{
+    BDRVNBDState *s = bs->opaque;
+    struct nbd_request request;
+    struct nbd_reply reply;
+    ssize_t ret;
+
+    request.type = NBD_CMD_READ;
+    request.from = sector_num * 512;
+    request.len = nb_sectors * 512;
+
+    nbd_coroutine_start(s, &request);
+    ret = nbd_co_send_request(s, &request, NULL, 0);
+    if (ret < 0) {
+        reply.error = -ret;
+    } else {
+        nbd_co_receive_reply(s, &request, &reply, qiov, offset);
+    }
+    nbd_coroutine_end(s, &request);
+    return -reply.error;
+
+}
+
+static int nbd_co_writev_1(BlockDriverState *bs, int64_t sector_num,
+                           int nb_sectors, QEMUIOVector *qiov,
+                           int offset)
+{
+    BDRVNBDState *s = bs->opaque;
+    struct nbd_request request;
+    struct nbd_reply reply;
+    ssize_t ret;
+
+    request.type = NBD_CMD_WRITE;
+    if (!bdrv_enable_write_cache(bs) && (s->nbdflags & NBD_FLAG_SEND_FUA)) {
+        request.type |= NBD_CMD_FLAG_FUA;
+    }
+
+    request.from = sector_num * 512;
+    request.len = nb_sectors * 512;
+
+    nbd_coroutine_start(s, &request);
+    ret = nbd_co_send_request(s, &request, qiov, offset);
+    if (ret < 0) {
+        reply.error = -ret;
+    } else {
+        nbd_co_receive_reply(s, &request, &reply, NULL, 0);
+    }
+    nbd_coroutine_end(s, &request);
+    return -reply.error;
+}
+
+/* qemu-nbd has a limit of slightly less than 1M per request.  Try to
+ * remain aligned to 4K. */
+#define NBD_MAX_SECTORS 2040
+
 static int nbd_co_readv(BlockDriverState *bs, int64_t sector_num,
                        int nb_sectors, QEMUIOVector *qiov)
 {
-    BDRVNBDState *s = bs->opaque;
-
-    return nbd_client_session_co_readv(&s->client, sector_num,
-                                       nb_sectors, qiov);
+    int offset = 0;
+    int ret;
+    while (nb_sectors > NBD_MAX_SECTORS) {
+        ret = nbd_co_readv_1(bs, sector_num, NBD_MAX_SECTORS, qiov, offset);
+        if (ret < 0) {
+            return ret;
+        }
+        offset += NBD_MAX_SECTORS * 512;
+        sector_num += NBD_MAX_SECTORS;
+        nb_sectors -= NBD_MAX_SECTORS;
+    }
+    return nbd_co_readv_1(bs, sector_num, nb_sectors, qiov, offset);
 }

 static int nbd_co_writev(BlockDriverState *bs, int64_t sector_num,
                         int nb_sectors, QEMUIOVector *qiov)
 {
-    BDRVNBDState *s = bs->opaque;
-
-    return nbd_client_session_co_writev(&s->client, sector_num,
-                                        nb_sectors, qiov);
+    int offset = 0;
+    int ret;
+    while (nb_sectors > NBD_MAX_SECTORS) {
+        ret = nbd_co_writev_1(bs, sector_num, NBD_MAX_SECTORS, qiov, offset);
+        if (ret < 0) {
+            return ret;
+        }
+        offset += NBD_MAX_SECTORS * 512;
+        sector_num += NBD_MAX_SECTORS;
+        nb_sectors -= NBD_MAX_SECTORS;
+    }
+    return nbd_co_writev_1(bs, sector_num, nb_sectors, qiov, offset);
 }

 static int nbd_co_flush(BlockDriverState *bs)
 {
    BDRVNBDState *s = bs->opaque;
+    struct nbd_request request;
+    struct nbd_reply reply;
+    ssize_t ret;

-    return nbd_client_session_co_flush(&s->client);
+    if (!(s->nbdflags & NBD_FLAG_SEND_FLUSH)) {
+        return 0;
+    }
+
+    request.type = NBD_CMD_FLUSH;
+    if (s->nbdflags & NBD_FLAG_SEND_FUA) {
+        request.type |= NBD_CMD_FLAG_FUA;
+    }
+
+    request.from = 0;
+    request.len = 0;
+
+    nbd_coroutine_start(s, &request);
+    ret = nbd_co_send_request(s, &request, NULL, 0);
+    if (ret < 0) {
+        reply.error = -ret;
+    } else {
+        nbd_co_receive_reply(s, &request, &reply, NULL, 0);
+    }
+    nbd_coroutine_end(s, &request);
+    return -reply.error;
 }

 static int nbd_co_discard(BlockDriverState *bs, int64_t sector_num,
                          int nb_sectors)
 {
    BDRVNBDState *s = bs->opaque;
+    struct nbd_request request;
+    struct nbd_reply reply;
+    ssize_t ret;

-    return nbd_client_session_co_discard(&s->client, sector_num,
-                                         nb_sectors);
+    if (!(s->nbdflags & NBD_FLAG_SEND_TRIM)) {
+        return 0;
+    }
+    request.type = NBD_CMD_TRIM;
+    request.from = sector_num * 512;
+    request.len = nb_sectors * 512;
+
+    nbd_coroutine_start(s, &request);
+    ret = nbd_co_send_request(s, &request, NULL, 0);
+    if (ret < 0) {
+        reply.error = -ret;
+    } else {
+        nbd_co_receive_reply(s, &request, &reply, NULL, 0);
+    }
+    nbd_coroutine_end(s, &request);
+    return -reply.error;
 }

 static void nbd_close(BlockDriverState *bs)
 {
    BDRVNBDState *s = bs->opaque;
-
+    g_free(s->export_name);
    qemu_opts_del(s->socket_opts);
-    nbd_client_session_close(&s->client);
+
+    nbd_teardown_connection(bs);
 }

 static int64_t nbd_getlength(BlockDriverState *bs)
 {
    BDRVNBDState *s = bs->opaque;

-    return s->client.size;
+    return s->size;
 }

 static BlockDriver bdrv_nbd = {
--- a/block/nfs.c
+++ b/block/nfs.c
@@ -1,442 +0,0 @@
-/*
- * QEMU Block driver for native access to files on NFS shares
- *
- * Copyright (c) 2014 Peter Lieven <pl@kamp.de>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-#include "config-host.h"
-
-#include <poll.h>
-#include "qemu-common.h"
-#include "qemu/config-file.h"
-#include "qemu/error-report.h"
-#include "block/block_int.h"
-#include "trace.h"
-#include "qemu/iov.h"
-#include "qemu/uri.h"
-#include "sysemu/sysemu.h"
-#include <nfsc/libnfs.h>
-
-typedef struct NFSClient {
-    struct nfs_context *context;
-    struct nfsfh *fh;
-    int events;
-    bool has_zero_init;
-} NFSClient;
-
-typedef struct NFSRPC {
-    int ret;
-    int complete;
-    QEMUIOVector *iov;
-    struct stat *st;
-    Coroutine *co;
-    QEMUBH *bh;
-} NFSRPC;
-
-static void nfs_process_read(void *arg);
-static void nfs_process_write(void *arg);
-
-static void nfs_set_events(NFSClient *client)
-{
-    int ev = nfs_which_events(client->context);
-    if (ev != client->events) {
-        qemu_aio_set_fd_handler(nfs_get_fd(client->context),
-                      (ev & POLLIN) ? nfs_process_read : NULL,
-                      (ev & POLLOUT) ? nfs_process_write : NULL,
-                      client);
-
-    }
-    client->events = ev;
-}
-
-static void nfs_process_read(void *arg)
-{
-    NFSClient *client = arg;
-    nfs_service(client->context, POLLIN);
-    nfs_set_events(client);
-}
-
-static void nfs_process_write(void *arg)
-{
-    NFSClient *client = arg;
-    nfs_service(client->context, POLLOUT);
-    nfs_set_events(client);
-}
-
-static void nfs_co_init_task(NFSClient *client, NFSRPC *task)
-{
-    *task = (NFSRPC) {
-        .co         = qemu_coroutine_self(),
-    };
-}
-
-static void nfs_co_generic_bh_cb(void *opaque)
-{
-    NFSRPC *task = opaque;
-    qemu_bh_delete(task->bh);
-    qemu_coroutine_enter(task->co, NULL);
-}
-
-static void
-nfs_co_generic_cb(int ret, struct nfs_context *nfs, void *data,
-                  void *private_data)
-{
-    NFSRPC *task = private_data;
-    task->complete = 1;
-    task->ret = ret;
-    if (task->ret > 0 && task->iov) {
-        if (task->ret <= task->iov->size) {
-            qemu_iovec_from_buf(task->iov, 0, data, task->ret);
-        } else {
-            task->ret = -EIO;
-        }
-    }
-    if (task->ret == 0 && task->st) {
-        memcpy(task->st, data, sizeof(struct stat));
-    }
-    if (task->ret < 0) {
-        error_report("NFS Error: %s", nfs_get_error(nfs));
-    }
-    if (task->co) {
-        task->bh = qemu_bh_new(nfs_co_generic_bh_cb, task);
-        qemu_bh_schedule(task->bh);
-    }
-}
-
-static int coroutine_fn nfs_co_readv(BlockDriverState *bs,
-                                     int64_t sector_num, int nb_sectors,
-                                     QEMUIOVector *iov)
-{
-    NFSClient *client = bs->opaque;
-    NFSRPC task;
-
-    nfs_co_init_task(client, &task);
-    task.iov = iov;
-
-    if (nfs_pread_async(client->context, client->fh,
-                        sector_num * BDRV_SECTOR_SIZE,
-                        nb_sectors * BDRV_SECTOR_SIZE,
-                        nfs_co_generic_cb, &task) != 0) {
-        return -ENOMEM;
-    }
-
-    while (!task.complete) {
-        nfs_set_events(client);
-        qemu_coroutine_yield();
-    }
-
-    if (task.ret < 0) {
-        return task.ret;
-    }
-
-    /* zero pad short reads */
-    if (task.ret < iov->size) {
-        qemu_iovec_memset(iov, task.ret, 0, iov->size - task.ret);
-    }
-
-    return 0;
-}
-
-static int coroutine_fn nfs_co_writev(BlockDriverState *bs,
-                                        int64_t sector_num, int nb_sectors,
-                                        QEMUIOVector *iov)
-{
-    NFSClient *client = bs->opaque;
-    NFSRPC task;
-    char *buf = NULL;
-
-    nfs_co_init_task(client, &task);
-
-    buf = g_malloc(nb_sectors * BDRV_SECTOR_SIZE);
-    qemu_iovec_to_buf(iov, 0, buf, nb_sectors * BDRV_SECTOR_SIZE);
-
-    if (nfs_pwrite_async(client->context, client->fh,
-                         sector_num * BDRV_SECTOR_SIZE,
-                         nb_sectors * BDRV_SECTOR_SIZE,
-                         buf, nfs_co_generic_cb, &task) != 0) {
-        g_free(buf);
-        return -ENOMEM;
-    }
-
-    while (!task.complete) {
-        nfs_set_events(client);
-        qemu_coroutine_yield();
-    }
-
-    g_free(buf);
-
-    if (task.ret != nb_sectors * BDRV_SECTOR_SIZE) {
-        return task.ret < 0 ? task.ret : -EIO;
-    }
-
-    return 0;
-}
-
-static int coroutine_fn nfs_co_flush(BlockDriverState *bs)
-{
-    NFSClient *client = bs->opaque;
-    NFSRPC task;
-
-    nfs_co_init_task(client, &task);
-
-    if (nfs_fsync_async(client->context, client->fh, nfs_co_generic_cb,
-                        &task) != 0) {
-        return -ENOMEM;
-    }
-
-    while (!task.complete) {
-        nfs_set_events(client);
-        qemu_coroutine_yield();
-    }
-
-    return task.ret;
-}
-
-/* TODO Convert to fine grained options */
-static QemuOptsList runtime_opts = {
-    .name = "nfs",
-    .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
-    .desc = {
-        {
-            .name = "filename",
-            .type = QEMU_OPT_STRING,
-            .help = "URL to the NFS file",
-        },
-        { /* end of list */ }
-    },
-};
-
-static void nfs_client_close(NFSClient *client)
-{
-    if (client->context) {
-        if (client->fh) {
-            nfs_close(client->context, client->fh);
-        }
-        qemu_aio_set_fd_handler(nfs_get_fd(client->context), NULL, NULL, NULL);
-        nfs_destroy_context(client->context);
-    }
-    memset(client, 0, sizeof(NFSClient));
-}
-
-static void nfs_file_close(BlockDriverState *bs)
-{
-    NFSClient *client = bs->opaque;
-    nfs_client_close(client);
-}
-
-static int64_t nfs_client_open(NFSClient *client, const char *filename,
-                               int flags, Error **errp)
-{
-    int ret = -EINVAL, i;
-    struct stat st;
-    URI *uri;
-    QueryParams *qp = NULL;
-    char *file = NULL, *strp = NULL;
-
-    uri = uri_parse(filename);
-    if (!uri) {
-        error_setg(errp, "Invalid URL specified");
-        goto fail;
-    }
-    strp = strrchr(uri->path, '/');
-    if (strp == NULL) {
-        error_setg(errp, "Invalid URL specified");
-        goto fail;
-    }
-    file = g_strdup(strp);
-    *strp = 0;
-
-    client->context = nfs_init_context();
-    if (client->context == NULL) {
-        error_setg(errp, "Failed to init NFS context");
-        goto fail;
-    }
-
-    qp = query_params_parse(uri->query);
-    for (i = 0; i < qp->n; i++) {
-        if (!qp->p[i].value) {
-            error_setg(errp, "Value for NFS parameter expected: %s",
-                       qp->p[i].name);
-            goto fail;
-        }
-        if (!strncmp(qp->p[i].name, "uid", 3)) {
-            nfs_set_uid(client->context, atoi(qp->p[i].value));
-        } else if (!strncmp(qp->p[i].name, "gid", 3)) {
-            nfs_set_gid(client->context, atoi(qp->p[i].value));
-        } else if (!strncmp(qp->p[i].name, "tcp-syncnt", 10)) {
-            nfs_set_tcp_syncnt(client->context, atoi(qp->p[i].value));
-        } else {
-            error_setg(errp, "Unknown NFS parameter name: %s",
-                       qp->p[i].name);
-            goto fail;
-        }
-    }
-
-    ret = nfs_mount(client->context, uri->server, uri->path);
-    if (ret < 0) {
-        error_setg(errp, "Failed to mount nfs share: %s",
-                   nfs_get_error(client->context));
-        goto fail;
-    }
-
-    if (flags & O_CREAT) {
-        ret = nfs_creat(client->context, file, 0600, &client->fh);
-        if (ret < 0) {
-            error_setg(errp, "Failed to create file: %s",
-                       nfs_get_error(client->context));
-            goto fail;
-        }
-    } else {
-        ret = nfs_open(client->context, file, flags, &client->fh);
-        if (ret < 0) {
-            error_setg(errp, "Failed to open file : %s",
-                       nfs_get_error(client->context));
-            goto fail;
-        }
-    }
-
-    ret = nfs_fstat(client->context, client->fh, &st);
-    if (ret < 0) {
-        error_setg(errp, "Failed to fstat file: %s",
-                   nfs_get_error(client->context));
-        goto fail;
-    }
-
-    ret = DIV_ROUND_UP(st.st_size, BDRV_SECTOR_SIZE);
-    client->has_zero_init = S_ISREG(st.st_mode);
-    goto out;
-fail:
-    nfs_client_close(client);
-out:
-    if (qp) {
-        query_params_free(qp);
-    }
-    uri_free(uri);
-    g_free(file);
-    return ret;
-}
-
-static int nfs_file_open(BlockDriverState *bs, QDict *options, int flags,
-                         Error **errp) {
-    NFSClient *client = bs->opaque;
-    int64_t ret;
-    QemuOpts *opts;
-    Error *local_err = NULL;
-
-    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
-    qemu_opts_absorb_qdict(opts, options, &local_err);
-    if (error_is_set(&local_err)) {
-        error_propagate(errp, local_err);
-        return -EINVAL;
-    }
-    ret = nfs_client_open(client, qemu_opt_get(opts, "filename"),
-                          (flags & BDRV_O_RDWR) ? O_RDWR : O_RDONLY,
-                          errp);
-    if (ret < 0) {
-        return ret;
-    }
-    bs->total_sectors = ret;
-    return 0;
-}
-
-static int nfs_file_create(const char *url, QEMUOptionParameter *options,
-                           Error **errp)
-{
-    int ret = 0;
-    int64_t total_size = 0;
-    NFSClient *client = g_malloc0(sizeof(NFSClient));
-
-    /* Read out options */
-    while (options && options->name) {
-        if (!strcmp(options->name, "size")) {
-            total_size = options->value.n;
-        }
-        options++;
-    }
-
-    ret = nfs_client_open(client, url, O_CREAT, errp);
-    if (ret < 0) {
-        goto out;
-    }
-    ret = nfs_ftruncate(client->context, client->fh, total_size);
-    nfs_client_close(client);
-out:
-    g_free(client);
-    return ret;
-}
-
-static int nfs_has_zero_init(BlockDriverState *bs)
-{
-    NFSClient *client = bs->opaque;
-    return client->has_zero_init;
-}
-
-static int64_t nfs_get_allocated_file_size(BlockDriverState *bs)
-{
-    NFSClient *client = bs->opaque;
-    NFSRPC task = {0};
-    struct stat st;
-
-    task.st = &st;
-    if (nfs_fstat_async(client->context, client->fh, nfs_co_generic_cb,
-                        &task) != 0) {
-        return -ENOMEM;
-    }
-
-    while (!task.complete) {
-        nfs_set_events(client);
-        qemu_aio_wait();
-    }
-
-    return (task.ret < 0 ? task.ret : st.st_blocks * st.st_blksize);
-}
-
-static int nfs_file_truncate(BlockDriverState *bs, int64_t offset)
-{
-    NFSClient *client = bs->opaque;
-    return nfs_ftruncate(client->context, client->fh, offset);
-}
-
-static BlockDriver bdrv_nfs = {
-    .format_name     = "nfs",
-    .protocol_name   = "nfs",
-
-    .instance_size   = sizeof(NFSClient),
-    .bdrv_needs_filename = true,
-    .bdrv_has_zero_init = nfs_has_zero_init,
-    .bdrv_get_allocated_file_size = nfs_get_allocated_file_size,
-    .bdrv_truncate = nfs_file_truncate,
-
-    .bdrv_file_open  = nfs_file_open,
-    .bdrv_close      = nfs_file_close,
-    .bdrv_create     = nfs_file_create,
-
-    .bdrv_co_readv         = nfs_co_readv,
-    .bdrv_co_writev        = nfs_co_writev,
-    .bdrv_co_flush_to_disk = nfs_co_flush,
-};
-
-static void nfs_block_init(void)
-{
-    bdrv_register(&bdrv_nfs);
-}
-
-block_init(nfs_block_init);
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -49,9 +49,9 @@ typedef struct BDRVParallelsState {
    CoMutex lock;

    uint32_t *catalog_bitmap;
-    unsigned int catalog_size;
+    int catalog_size;

-    unsigned int tracks;
+    int tracks;
 } BDRVParallelsState;

 static int parallels_probe(const uint8_t *buf, int buf_size, const char *filename)
@@ -68,8 +68,7 @@ static int parallels_probe(const uint8_t *buf, int buf_size, const char *filenam
    return 0;
 }

-static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
-                          Error **errp)
+static int parallels_open(BlockDriverState *bs, QDict *options, int flags)
 {
    BDRVParallelsState *s = bs->opaque;
    int i;
@@ -85,26 +84,15 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,

    if (memcmp(ph.magic, HEADER_MAGIC, 16) ||
        (le32_to_cpu(ph.version) != HEADER_VERSION)) {
-        error_setg(errp, "Image not in Parallels format");
-        ret = -EINVAL;
+        ret = -EMEDIUMTYPE;
        goto fail;
    }

    bs->total_sectors = le32_to_cpu(ph.nb_sectors);

    s->tracks = le32_to_cpu(ph.tracks);
-    if (s->tracks == 0) {
-        error_setg(errp, "Invalid image: Zero sectors per track");
-        ret = -EINVAL;
-        goto fail;
-    }

    s->catalog_size = le32_to_cpu(ph.catalog_entries);
-    if (s->catalog_size > INT_MAX / 4) {
-        error_setg(errp, "Catalog too large");
-        ret = -EFBIG;
-        goto fail;
-    }
    s->catalog_bitmap = g_malloc(s->catalog_size * 4);

    ret = bdrv_pread(bs->file, 64, s->catalog_bitmap, s->catalog_size * 4);
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -25,63 +25,6 @@
 #include "block/qapi.h"
 #include "block/block_int.h"
 #include "qmp-commands.h"
-#include "qapi-visit.h"
-#include "qapi/qmp-output-visitor.h"
-#include "qapi/qmp/types.h"
-
-BlockDeviceInfo *bdrv_block_device_info(BlockDriverState *bs)
-{
-    BlockDeviceInfo *info = g_malloc0(sizeof(*info));
-
-    info->file                   = g_strdup(bs->filename);
-    info->ro                     = bs->read_only;
-    info->drv                    = g_strdup(bs->drv->format_name);
-    info->encrypted              = bs->encrypted;
-    info->encryption_key_missing = bdrv_key_required(bs);
-
-    if (bs->node_name[0]) {
-        info->has_node_name = true;
-        info->node_name = g_strdup(bs->node_name);
-    }
-
-    if (bs->backing_file[0]) {
-        info->has_backing_file = true;
-        info->backing_file = g_strdup(bs->backing_file);
-    }
-
-    info->backing_file_depth = bdrv_get_backing_file_depth(bs);
-
-    if (bs->io_limits_enabled) {
-        ThrottleConfig cfg;
-        throttle_get_config(&bs->throttle_state, &cfg);
-        info->bps     = cfg.buckets[THROTTLE_BPS_TOTAL].avg;
-        info->bps_rd  = cfg.buckets[THROTTLE_BPS_READ].avg;
-        info->bps_wr  = cfg.buckets[THROTTLE_BPS_WRITE].avg;
-
-        info->iops    = cfg.buckets[THROTTLE_OPS_TOTAL].avg;
-        info->iops_rd = cfg.buckets[THROTTLE_OPS_READ].avg;
-        info->iops_wr = cfg.buckets[THROTTLE_OPS_WRITE].avg;
-
-        info->has_bps_max     = cfg.buckets[THROTTLE_BPS_TOTAL].max;
-        info->bps_max         = cfg.buckets[THROTTLE_BPS_TOTAL].max;
-        info->has_bps_rd_max  = cfg.buckets[THROTTLE_BPS_READ].max;
-        info->bps_rd_max      = cfg.buckets[THROTTLE_BPS_READ].max;
-        info->has_bps_wr_max  = cfg.buckets[THROTTLE_BPS_WRITE].max;
-        info->bps_wr_max      = cfg.buckets[THROTTLE_BPS_WRITE].max;
-
-        info->has_iops_max    = cfg.buckets[THROTTLE_OPS_TOTAL].max;
-        info->iops_max        = cfg.buckets[THROTTLE_OPS_TOTAL].max;
-        info->has_iops_rd_max = cfg.buckets[THROTTLE_OPS_READ].max;
-        info->iops_rd_max     = cfg.buckets[THROTTLE_OPS_READ].max;
-        info->has_iops_wr_max = cfg.buckets[THROTTLE_OPS_WRITE].max;
-        info->iops_wr_max     = cfg.buckets[THROTTLE_OPS_WRITE].max;
-
-        info->has_iops_size = cfg.op_size;
-        info->iops_size = cfg.op_size;
-    }
-
-    return info;
-}

 /*
 * Returns 0 on success, with *p_list either set to describe snapshot
@@ -191,9 +134,6 @@ void bdrv_query_image_info(BlockDriverState *bs,
        info->dirty_flag = bdi.is_dirty;
        info->has_dirty_flag = true;
    }
-    info->format_specific     = bdrv_get_specific_info(bs);
-    info->has_format_specific = info->format_specific != NULL;
-
    backing_filename = bs->backing_file;
    if (backing_filename[0] != '\0') {
        info->backing_filename = g_strdup(backing_filename);
@@ -258,20 +198,50 @@ void bdrv_query_info(BlockDriverState *bs,
        info->io_status = bs->iostatus;
    }

-    if (!QLIST_EMPTY(&bs->dirty_bitmaps)) {
-        info->has_dirty_bitmaps = true;
-        info->dirty_bitmaps = bdrv_query_dirty_bitmaps(bs);
+    if (bs->dirty_bitmap) {
+        info->has_dirty = true;
+        info->dirty = g_malloc0(sizeof(*info->dirty));
+        info->dirty->count = bdrv_get_dirty_count(bs) * BDRV_SECTOR_SIZE;
+        info->dirty->granularity =
+         ((int64_t) BDRV_SECTOR_SIZE << hbitmap_granularity(bs->dirty_bitmap));
    }

    if (bs->drv) {
        info->has_inserted = true;
-        info->inserted = bdrv_block_device_info(bs);
+        info->inserted = g_malloc0(sizeof(*info->inserted));
+        info->inserted->file = g_strdup(bs->filename);
+        info->inserted->ro = bs->read_only;
+        info->inserted->drv = g_strdup(bs->drv->format_name);
+        info->inserted->encrypted = bs->encrypted;
+        info->inserted->encryption_key_missing = bdrv_key_required(bs);
+
+        if (bs->backing_file[0]) {
+            info->inserted->has_backing_file = true;
+            info->inserted->backing_file = g_strdup(bs->backing_file);
+        }
+
+        info->inserted->backing_file_depth = bdrv_get_backing_file_depth(bs);
+
+        if (bs->io_limits_enabled) {
+            info->inserted->bps =
+                           bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL];
+            info->inserted->bps_rd =
+                           bs->io_limits.bps[BLOCK_IO_LIMIT_READ];
+            info->inserted->bps_wr =
+                           bs->io_limits.bps[BLOCK_IO_LIMIT_WRITE];
+            info->inserted->iops =
+                           bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL];
+            info->inserted->iops_rd =
+                           bs->io_limits.iops[BLOCK_IO_LIMIT_READ];
+            info->inserted->iops_wr =
+                           bs->io_limits.iops[BLOCK_IO_LIMIT_WRITE];
+        }

        bs0 = bs;
        p_image_info = &info->inserted->image;
        while (1) {
            bdrv_query_image_info(bs0, p_image_info, &local_err);
-            if (local_err) {
+            if (error_is_set(&local_err)) {
                error_propagate(errp, local_err);
                goto err;
            }
@@ -319,11 +289,6 @@ BlockStats *bdrv_query_stats(const BlockDriverState *bs)
        s->parent = bdrv_query_stats(bs->file);
    }

-    if (bs->backing_hd) {
-        s->has_backing = true;
-        s->backing = bdrv_query_stats(bs->backing_hd);
-    }
-
    return s;
 }

@@ -336,7 +301,7 @@ BlockInfoList *qmp_query_block(Error **errp)
     while ((bs = bdrv_next(bs))) {
        BlockInfoList *info = g_malloc0(sizeof(*info));
        bdrv_query_info(bs, &info->value, &local_err);
-        if (local_err) {
+        if (error_is_set(&local_err)) {
            error_propagate(errp, local_err);
            goto err;
        }
@@ -432,119 +397,6 @@ void bdrv_snapshot_dump(fprintf_function func_fprintf, void *f,
    }
 }

-static void dump_qdict(fprintf_function func_fprintf, void *f, int indentation,
-                       QDict *dict);
-static void dump_qlist(fprintf_function func_fprintf, void *f, int indentation,
-                       QList *list);
-
-static void dump_qobject(fprintf_function func_fprintf, void *f,
-                         int comp_indent, QObject *obj)
-{
-    switch (qobject_type(obj)) {
-        case QTYPE_QINT: {
-            QInt *value = qobject_to_qint(obj);
-            func_fprintf(f, "%" PRId64, qint_get_int(value));
-            break;
-        }
-        case QTYPE_QSTRING: {
-            QString *value = qobject_to_qstring(obj);
-            func_fprintf(f, "%s", qstring_get_str(value));
-            break;
-        }
-        case QTYPE_QDICT: {
-            QDict *value = qobject_to_qdict(obj);
-            dump_qdict(func_fprintf, f, comp_indent, value);
-            break;
-        }
-        case QTYPE_QLIST: {
-            QList *value = qobject_to_qlist(obj);
-            dump_qlist(func_fprintf, f, comp_indent, value);
-            break;
-        }
-        case QTYPE_QFLOAT: {
-            QFloat *value = qobject_to_qfloat(obj);
-            func_fprintf(f, "%g", qfloat_get_double(value));
-            break;
-        }
-        case QTYPE_QBOOL: {
-            QBool *value = qobject_to_qbool(obj);
-            func_fprintf(f, "%s", qbool_get_int(value) ? "true" : "false");
-            break;
-        }
-        case QTYPE_QERROR: {
-            QString *value = qerror_human((QError *)obj);
-            func_fprintf(f, "%s", qstring_get_str(value));
-            break;
-        }
-        case QTYPE_NONE:
-            break;
-        case QTYPE_MAX:
-        default:
-            abort();
-    }
-}
-
-static void dump_qlist(fprintf_function func_fprintf, void *f, int indentation,
-                       QList *list)
-{
-    const QListEntry *entry;
-    int i = 0;
-
-    for (entry = qlist_first(list); entry; entry = qlist_next(entry), i++) {
-        qtype_code type = qobject_type(entry->value);
-        bool composite = (type == QTYPE_QDICT || type == QTYPE_QLIST);
-        const char *format = composite ? "%*s[%i]:\n" : "%*s[%i]: ";
-
-        func_fprintf(f, format, indentation * 4, "", i);
-        dump_qobject(func_fprintf, f, indentation + 1, entry->value);
-        if (!composite) {
-            func_fprintf(f, "\n");
-        }
-    }
-}
-
-static void dump_qdict(fprintf_function func_fprintf, void *f, int indentation,
-                       QDict *dict)
-{
-    const QDictEntry *entry;
-
-    for (entry = qdict_first(dict); entry; entry = qdict_next(dict, entry)) {
-        qtype_code type = qobject_type(entry->value);
-        bool composite = (type == QTYPE_QDICT || type == QTYPE_QLIST);
-        const char *format = composite ? "%*s%s:\n" : "%*s%s: ";
-        char key[strlen(entry->key) + 1];
-        int i;
-
-        /* replace dashes with spaces in key (variable) names */
-        for (i = 0; entry->key[i]; i++) {
-            key[i] = entry->key[i] == '-' ? ' ' : entry->key[i];
-        }
-        key[i] = 0;
-
-        func_fprintf(f, format, indentation * 4, "", key);
-        dump_qobject(func_fprintf, f, indentation + 1, entry->value);
-        if (!composite) {
-            func_fprintf(f, "\n");
-        }
-    }
-}
-
-void bdrv_image_info_specific_dump(fprintf_function func_fprintf, void *f,
-                                   ImageInfoSpecific *info_spec)
-{
-    Error *local_err = NULL;
-    QmpOutputVisitor *ov = qmp_output_visitor_new();
-    QObject *obj, *data;
-
-    visit_type_ImageInfoSpecific(qmp_output_get_visitor(ov), &info_spec, NULL,
-                                 &local_err);
-    obj = qmp_output_get_qobject(ov);
-    assert(qobject_type(obj) == QTYPE_QDICT);
-    data = qdict_get(qobject_to_qdict(obj), "data");
-    dump_qobject(func_fprintf, f, 1, data);
-    qmp_output_visitor_cleanup(ov);
-}
-
 void bdrv_image_info_dump(fprintf_function func_fprintf, void *f,
                          ImageInfo *info)
 {
@@ -615,9 +467,4 @@ void bdrv_image_info_dump(fprintf_function func_fprintf, void *f,
            func_fprintf(f, "\n");
        }
    }
-
-    if (info->has_format_specific) {
-        func_fprintf(f, "Format specific information:\n");
-        bdrv_image_info_specific_dump(func_fprintf, f, info->format_specific);
-    }
 }
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -92,8 +92,7 @@ static int qcow_probe(const uint8_t *buf, int buf_size, const char *filename)
        return 0;
 }

-static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
-                     Error **errp)
+static int qcow_open(BlockDriverState *bs, QDict *options, int flags)
 {
    BDRVQcowState *s = bs->opaque;
    int len, i, shift, ret;
@@ -113,26 +112,23 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
    be64_to_cpus(&header.l1_table_offset);

    if (header.magic != QCOW_MAGIC) {
-        error_setg(errp, "Image not in qcow format");
-        ret = -EINVAL;
+        ret = -EMEDIUMTYPE;
        goto fail;
    }
    if (header.version != QCOW_VERSION) {
        char version[64];
        snprintf(version, sizeof(version), "QCOW version %d", header.version);
-        error_set(errp, QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
-                  bs->device_name, "qcow", version);
+        qerror_report(QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
+            bs->device_name, "qcow", version);
        ret = -ENOTSUP;
        goto fail;
    }

    if (header.size <= 1 || header.cluster_bits < 9) {
-        error_setg(errp, "invalid value in qcow header");
        ret = -EINVAL;
        goto fail;
    }
    if (header.crypt_method > QCOW_CRYPT_AES) {
-        error_setg(errp, "invalid encryption method in qcow header");
        ret = -EINVAL;
        goto fail;
    }
@@ -399,7 +395,7 @@ static uint64_t get_cluster_offset(BlockDriverState *bs,
    return cluster_offset;
 }

-static int64_t coroutine_fn qcow_co_get_block_status(BlockDriverState *bs,
+static int coroutine_fn qcow_co_is_allocated(BlockDriverState *bs,
        int64_t sector_num, int nb_sectors, int *pnum)
 {
    BDRVQcowState *s = bs->opaque;
@@ -414,14 +410,7 @@ static int64_t coroutine_fn qcow_co_get_block_status(BlockDriverState *bs,
    if (n > nb_sectors)
        n = nb_sectors;
    *pnum = n;
-    if (!cluster_offset) {
-        return 0;
-    }
-    if ((cluster_offset & QCOW_OFLAG_COMPRESSED) || s->crypt_method) {
-        return BDRV_BLOCK_DATA;
-    }
-    cluster_offset |= (index_in_cluster << BDRV_SECTOR_BITS);
-    return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | cluster_offset;
+    return (cluster_offset != 0);
 }

 static int decompress_buffer(uint8_t *out_buf, int out_buf_size,
@@ -662,8 +651,7 @@ static void qcow_close(BlockDriverState *bs)
    error_free(s->migration_blocker);
 }

-static int qcow_create(const char *filename, QEMUOptionParameter *options,
-                       Error **errp)
+static int qcow_create(const char *filename, QEMUOptionParameter *options)
 {
    int header_size, backing_filename_len, l1_size, shift, i;
    QCowHeader header;
@@ -671,7 +659,6 @@ static int qcow_create(const char *filename, QEMUOptionParameter *options,
    int64_t total_size = 0;
    const char *backing_file = NULL;
    int flags = 0;
-    Error *local_err = NULL;
    int ret;
    BlockDriverState *qcow_bs;

@@ -687,17 +674,13 @@ static int qcow_create(const char *filename, QEMUOptionParameter *options,
        options++;
    }

-    ret = bdrv_create_file(filename, options, &local_err);
+    ret = bdrv_create_file(filename, options);
    if (ret < 0) {
-        error_propagate(errp, local_err);
        return ret;
    }

-    qcow_bs = NULL;
-    ret = bdrv_open(&qcow_bs, filename, NULL, NULL,
-                    BDRV_O_RDWR | BDRV_O_PROTOCOL, NULL, &local_err);
+    ret = bdrv_file_open(&qcow_bs, filename, NULL, BDRV_O_RDWR);
    if (ret < 0) {
-        error_propagate(errp, local_err);
        return ret;
    }

@@ -723,7 +706,7 @@ static int qcow_create(const char *filename, QEMUOptionParameter *options,
            backing_file = NULL;
        }
        header.cluster_bits = 9; /* 512 byte cluster to avoid copying
-                                    unmodified sectors */
+                                    unmodifyed sectors */
        header.l2_bits = 12; /* 32 KB L2 tables */
    } else {
        header.cluster_bits = 12; /* 4 KB clusters */
@@ -768,7 +751,7 @@ static int qcow_create(const char *filename, QEMUOptionParameter *options,
    g_free(tmp);
    ret = 0;
 exit:
-    bdrv_unref(qcow_bs);
+    bdrv_delete(qcow_bs);
    return ret;
 }

@@ -913,7 +896,7 @@ static BlockDriver bdrv_qcow = {

    .bdrv_co_readv          = qcow_co_readv,
    .bdrv_co_writev         = qcow_co_writev,
-    .bdrv_co_get_block_status   = qcow_co_get_block_status,
+    .bdrv_co_is_allocated   = qcow_co_is_allocated,

    .bdrv_set_key           = qcow_set_key,
    .bdrv_make_empty        = qcow_make_empty,
--- a/block/qcow2-cache.c
+++ b/block/qcow2-cache.c
@@ -114,21 +114,6 @@ static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i)
        return ret;
    }

-    if (c == s->refcount_block_cache) {
-        ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_REFCOUNT_BLOCK,
-                c->entries[i].offset, s->cluster_size);
-    } else if (c == s->l2_table_cache) {
-        ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_ACTIVE_L2,
-                c->entries[i].offset, s->cluster_size);
-    } else {
-        ret = qcow2_pre_write_overlap_check(bs, 0,
-                c->entries[i].offset, s->cluster_size);
-    }
-
-    if (ret < 0) {
-        return ret;
-    }
-
    if (c == s->refcount_block_cache) {
        BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_UPDATE_PART);
    } else if (c == s->l2_table_cache) {
@@ -200,24 +185,6 @@ void qcow2_cache_depends_on_flush(Qcow2Cache *c)
    c->depends_on_flush = true;
 }

-int qcow2_cache_empty(BlockDriverState *bs, Qcow2Cache *c)
-{
-    int ret, i;
-
-    ret = qcow2_cache_flush(bs, c);
-    if (ret < 0) {
-        return ret;
-    }
-
-    for (i = 0; i < c->size; i++) {
-        assert(c->entries[i].ref == 0);
-        c->entries[i].offset = 0;
-        c->entries[i].cache_hits = 0;
-    }
-
-    return 0;
-}
-
 static int qcow2_cache_find_entry_to_replace(Qcow2Cache *c)
 {
    int i;
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -35,7 +35,6 @@ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
    BDRVQcowState *s = bs->opaque;
    int new_l1_size2, ret, i;
    uint64_t *new_l1_table;
-    int64_t old_l1_table_offset, old_l1_size;
    int64_t new_l1_table_offset, new_l1_size;
    uint8_t data[12];

@@ -55,7 +54,7 @@ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
        }
    }

-    if (new_l1_size > INT_MAX / sizeof(uint64_t)) {
+    if (new_l1_size > INT_MAX) {
        return -EFBIG;
    }

@@ -81,14 +80,6 @@ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
        goto fail;
    }

-    /* the L1 position has not yet been updated, so these clusters must
-     * indeed be completely free */
-    ret = qcow2_pre_write_overlap_check(bs, 0, new_l1_table_offset,
-                                        new_l1_size2);
-    if (ret < 0) {
-        goto fail;
-    }
-
    BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_WRITE_TABLE);
    for(i = 0; i < s->l1_size; i++)
        new_l1_table[i] = cpu_to_be64(new_l1_table[i]);
@@ -101,19 +92,17 @@ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
    /* set new table */
    BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_ACTIVATE_TABLE);
    cpu_to_be32w((uint32_t*)data, new_l1_size);
-    stq_be_p(data + 4, new_l1_table_offset);
+    cpu_to_be64wu((uint64_t*)(data + 4), new_l1_table_offset);
    ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, l1_size), data,sizeof(data));
    if (ret < 0) {
        goto fail;
    }
    g_free(s->l1_table);
-    old_l1_table_offset = s->l1_table_offset;
+    qcow2_free_clusters(bs, s->l1_table_offset, s->l1_size * sizeof(uint64_t),
+                        QCOW2_DISCARD_OTHER);
    s->l1_table_offset = new_l1_table_offset;
    s->l1_table = new_l1_table;
-    old_l1_size = s->l1_size;
    s->l1_size = new_l1_size;
-    qcow2_free_clusters(bs, old_l1_table_offset, old_l1_size * sizeof(uint64_t),
-                        QCOW2_DISCARD_OTHER);
    return 0;
 fail:
    g_free(new_l1_table);
@@ -148,7 +137,7 @@ static int l2_load(BlockDriverState *bs, uint64_t l2_offset,
 * and we really don't want bdrv_pread to perform a read-modify-write)
 */
 #define L1_ENTRIES_PER_SECTOR (512 / 8)
-int qcow2_write_l1_entry(BlockDriverState *bs, int l1_index)
+static int write_l1_entry(BlockDriverState *bs, int l1_index)
 {
    BDRVQcowState *s = bs->opaque;
    uint64_t buf[L1_ENTRIES_PER_SECTOR];
@@ -160,12 +149,6 @@ int qcow2_write_l1_entry(BlockDriverState *bs, int l1_index)
        buf[i] = cpu_to_be64(s->l1_table[l1_start_index + i]);
    }

-    ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_ACTIVE_L1,
-            s->l1_table_offset + 8 * l1_start_index, sizeof(buf));
-    if (ret < 0) {
-        return ret;
-    }
-
    BLKDBG_EVENT(bs->file, BLKDBG_L1_UPDATE);
    ret = bdrv_pwrite_sync(bs->file, s->l1_table_offset + 8 * l1_start_index,
        buf, sizeof(buf));
@@ -190,7 +173,7 @@ static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table)
 {
    BDRVQcowState *s = bs->opaque;
    uint64_t old_l2_offset;
-    uint64_t *l2_table = NULL;
+    uint64_t *l2_table;
    int64_t l2_offset;
    int ret;

@@ -202,8 +185,7 @@ static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table)

    l2_offset = qcow2_alloc_clusters(bs, s->l2_size * sizeof(uint64_t));
    if (l2_offset < 0) {
-        ret = l2_offset;
-        goto fail;
+        return l2_offset;
    }

    ret = qcow2_cache_flush(bs, s->refcount_block_cache);
@@ -216,7 +198,7 @@ static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table)
    trace_qcow2_l2_allocate_get_empty(bs, l1_index);
    ret = qcow2_cache_get_empty(bs, s->l2_table_cache, l2_offset, (void**) table);
    if (ret < 0) {
-        goto fail;
+        return ret;
    }

    l2_table = *table;
@@ -257,7 +239,7 @@ static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table)
    /* update the L1 entry */
    trace_qcow2_l2_allocate_write_l1(bs, l1_index);
    s->l1_table[l1_index] = l2_offset | QCOW_OFLAG_COPIED;
-    ret = qcow2_write_l1_entry(bs, l1_index);
+    ret = write_l1_entry(bs, l1_index);
    if (ret < 0) {
        goto fail;
    }
@@ -268,14 +250,8 @@ static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table)

 fail:
    trace_qcow2_l2_allocate_done(bs, l1_index, ret);
-    if (l2_table != NULL) {
-        qcow2_cache_put(bs, s->l2_table_cache, (void**) table);
-    }
+    qcow2_cache_put(bs, s->l2_table_cache, (void**) table);
    s->l1_table[l1_index] = old_l2_offset;
-    if (l2_offset > 0) {
-        qcow2_free_clusters(bs, l2_offset, s->l2_size * sizeof(uint64_t),
-                            QCOW2_DISCARD_ALWAYS);
-    }
    return ret;
 }

@@ -287,7 +263,7 @@ fail:
 * cluster which may require a different handling)
 */
 static int count_contiguous_clusters(uint64_t nb_clusters, int cluster_size,
-        uint64_t *l2_table, uint64_t stop_flags)
+        uint64_t *l2_table, uint64_t start, uint64_t stop_flags)
 {
    int i;
    uint64_t mask = stop_flags | L2E_OFFSET_MASK | QCOW_OFLAG_COMPRESSED;
@@ -299,14 +275,14 @@ static int count_contiguous_clusters(uint64_t nb_clusters, int cluster_size,

    assert(qcow2_get_cluster_type(first_entry) != QCOW2_CLUSTER_COMPRESSED);

-    for (i = 0; i < nb_clusters; i++) {
+    for (i = start; i < start + nb_clusters; i++) {
        uint64_t l2_entry = be64_to_cpu(l2_table[i]) & mask;
        if (offset + (uint64_t) i * cluster_size != l2_entry) {
            break;
        }
    }

-	return i;
+	return (i - start);
 }

 static int count_contiguous_free_clusters(uint64_t nb_clusters, uint64_t *l2_table)
@@ -359,6 +335,15 @@ static int coroutine_fn copy_sectors(BlockDriverState *bs,
    struct iovec iov;
    int n, ret;

+    /*
+     * If this is the last cluster and it is only partially used, we must only
+     * copy until the end of the image, or bdrv_check_request will fail for the
+     * bdrv_read/write calls below.
+     */
+    if (start_sect + n_end > bs->total_sectors) {
+        n_end = bs->total_sectors - start_sect;
+    }
+
    n = n_end - n_start;
    if (n <= 0) {
        return 0;
@@ -371,10 +356,6 @@ static int coroutine_fn copy_sectors(BlockDriverState *bs,

    BLKDBG_EVENT(bs->file, BLKDBG_COW_READ);

-    if (!bs->drv) {
-        return -ENOMEDIUM;
-    }
-
    /* Call .bdrv_co_readv() directly instead of using the public block-layer
     * interface.  This avoids double I/O throttling and request tracking,
     * which can lead to deadlock when block layer copy-on-read is enabled.
@@ -390,12 +371,6 @@ static int coroutine_fn copy_sectors(BlockDriverState *bs,
                        &s->aes_encrypt_key);
    }

-    ret = qcow2_pre_write_overlap_check(bs, 0,
-            cluster_offset + n_start * BDRV_SECTOR_SIZE, n * BDRV_SECTOR_SIZE);
-    if (ret < 0) {
-        goto out;
-    }
-
    BLKDBG_EVENT(bs->file, BLKDBG_COW_WRITE);
    ret = bdrv_co_writev(bs->file, (cluster_offset >> 9) + n_start, n, &qiov);
    if (ret < 0) {
@@ -491,11 +466,11 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
        break;
    case QCOW2_CLUSTER_ZERO:
        if (s->qcow_version < 3) {
-            qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
            return -EIO;
        }
        c = count_contiguous_clusters(nb_clusters, s->cluster_size,
-                &l2_table[l2_index], QCOW_OFLAG_ZERO);
+                &l2_table[l2_index], 0,
+                QCOW_OFLAG_COMPRESSED | QCOW_OFLAG_ZERO);
        *cluster_offset = 0;
        break;
    case QCOW2_CLUSTER_UNALLOCATED:
@@ -506,7 +481,8 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
    case QCOW2_CLUSTER_NORMAL:
        /* how many allocated clusters ? */
        c = count_contiguous_clusters(nb_clusters, s->cluster_size,
-                &l2_table[l2_index], QCOW_OFLAG_ZERO);
+                &l2_table[l2_index], 0,
+                QCOW_OFLAG_COMPRESSED | QCOW_OFLAG_ZERO);
        *cluster_offset &= L2E_OFFSET_MASK;
        break;
    default:
@@ -722,7 +698,6 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
    }
    qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);

-    assert(l2_index + m->nb_clusters <= s->l2_size);
    for (i = 0; i < m->nb_clusters; i++) {
        /* if two concurrent writes happen to the same unallocated cluster
 	 * each write allocates separate cluster and writes data concurrently.
@@ -936,7 +911,7 @@ static int handle_copied(BlockDriverState *bs, uint64_t guest_offset,
        /* We keep all QCOW_OFLAG_COPIED clusters */
        keep_clusters =
            count_contiguous_clusters(nb_clusters, s->cluster_size,
-                                      &l2_table[l2_index],
+                                      &l2_table[l2_index], 0,
                                      QCOW_OFLAG_COPIED | QCOW_OFLAG_ZERO);
        assert(keep_clusters <= nb_clusters);

@@ -1178,7 +1153,7 @@ fail:
 * Return 0 on success and -errno in error cases
 */
 int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset,
-    int *num, uint64_t *host_offset, QCowL2Meta **m)
+    int n_start, int n_end, int *num, uint64_t *host_offset, QCowL2Meta **m)
 {
    BDRVQcowState *s = bs->opaque;
    uint64_t start, remaining;
@@ -1186,13 +1161,15 @@ int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset,
    uint64_t cur_bytes;
    int ret;

-    trace_qcow2_alloc_clusters_offset(qemu_coroutine_self(), offset, *num);
+    trace_qcow2_alloc_clusters_offset(qemu_coroutine_self(), offset,
+                                      n_start, n_end);

-    assert((offset & ~BDRV_SECTOR_MASK) == 0);
+    assert(n_start * BDRV_SECTOR_SIZE == offset_into_cluster(s, offset));
+    offset = start_of_cluster(s, offset);

 again:
-    start = offset;
-    remaining = *num << BDRV_SECTOR_BITS;
+    start = offset + (n_start << BDRV_SECTOR_BITS);
+    remaining = (n_end - n_start) << BDRV_SECTOR_BITS;
    cluster_offset = 0;
    *host_offset = 0;
    cur_bytes = 0;
@@ -1278,7 +1255,7 @@ again:
        }
    }

-    *num -= remaining >> BDRV_SECTOR_BITS;
+    *num = (n_end - n_start) - (remaining >> BDRV_SECTOR_BITS);
    assert(*num > 0);
    assert(*host_offset != 0);

@@ -1343,7 +1320,7 @@ int qcow2_decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset)
 * clusters.
 */
 static int discard_single_l2(BlockDriverState *bs, uint64_t offset,
-    unsigned int nb_clusters, enum qcow2_discard_type type)
+    unsigned int nb_clusters)
 {
    BDRVQcowState *s = bs->opaque;
    uint64_t *l2_table;
@@ -1363,34 +1340,16 @@ static int discard_single_l2(BlockDriverState *bs, uint64_t offset,
        uint64_t old_offset;

        old_offset = be64_to_cpu(l2_table[l2_index + i]);
-
-        /*
-         * Make sure that a discarded area reads back as zeroes for v3 images
-         * (we cannot do it for v2 without actually writing a zero-filled
-         * buffer). We can skip the operation if the cluster is already marked
-         * as zero, or if it's unallocated and we don't have a backing file.
-         *
-         * TODO We might want to use bdrv_get_block_status(bs) here, but we're
-         * holding s->lock, so that doesn't work today.
-         */
-        if (old_offset & QCOW_OFLAG_ZERO) {
-            continue;
-        }
-
-        if ((old_offset & L2E_OFFSET_MASK) == 0 && !bs->backing_hd) {
+        if ((old_offset & L2E_OFFSET_MASK) == 0) {
            continue;
        }

        /* First remove L2 entries */
        qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
-        if (s->qcow_version >= 3) {
-            l2_table[l2_index + i] = cpu_to_be64(QCOW_OFLAG_ZERO);
-        } else {
-            l2_table[l2_index + i] = cpu_to_be64(0);
-        }
+        l2_table[l2_index + i] = cpu_to_be64(0);

        /* Then decrease the refcount */
-        qcow2_free_any_clusters(bs, old_offset, 1, type);
+        qcow2_free_any_clusters(bs, old_offset, 1, QCOW2_DISCARD_REQUEST);
    }

    ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
@@ -1402,7 +1361,7 @@ static int discard_single_l2(BlockDriverState *bs, uint64_t offset,
 }

 int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset,
-    int nb_sectors, enum qcow2_discard_type type)
+    int nb_sectors)
 {
    BDRVQcowState *s = bs->opaque;
    uint64_t end_offset;
@@ -1413,7 +1372,7 @@ int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset,

    /* Round start up and end down */
    offset = align_offset(offset, s->cluster_size);
-    end_offset = start_of_cluster(s, end_offset);
+    end_offset &= ~(s->cluster_size - 1);

    if (offset > end_offset) {
        return 0;
@@ -1425,7 +1384,7 @@ int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset,

    /* Each L2 table is handled by its own loop iteration */
    while (nb_clusters > 0) {
-        ret = discard_single_l2(bs, offset, nb_clusters, type);
+        ret = discard_single_l2(bs, offset, nb_clusters);
        if (ret < 0) {
            goto fail;
        }
@@ -1520,255 +1479,3 @@ fail:

    return ret;
 }
-
-/*
- * Expands all zero clusters in a specific L1 table (or deallocates them, for
- * non-backed non-pre-allocated zero clusters).
- *
- * expanded_clusters is a bitmap where every bit corresponds to one cluster in
- * the image file; a bit gets set if the corresponding cluster has been used for
- * zero expansion (i.e., has been filled with zeroes and is referenced from an
- * L2 table). nb_clusters contains the total cluster count of the image file,
- * i.e., the number of bits in expanded_clusters.
- */
-static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
-                                      int l1_size, uint8_t **expanded_clusters,
-                                      uint64_t *nb_clusters)
-{
-    BDRVQcowState *s = bs->opaque;
-    bool is_active_l1 = (l1_table == s->l1_table);
-    uint64_t *l2_table = NULL;
-    int ret;
-    int i, j;
-
-    if (!is_active_l1) {
-        /* inactive L2 tables require a buffer to be stored in when loading
-         * them from disk */
-        l2_table = qemu_blockalign(bs, s->cluster_size);
-    }
-
-    for (i = 0; i < l1_size; i++) {
-        uint64_t l2_offset = l1_table[i] & L1E_OFFSET_MASK;
-        bool l2_dirty = false;
-
-        if (!l2_offset) {
-            /* unallocated */
-            continue;
-        }
-
-        if (is_active_l1) {
-            /* get active L2 tables from cache */
-            ret = qcow2_cache_get(bs, s->l2_table_cache, l2_offset,
-                    (void **)&l2_table);
-        } else {
-            /* load inactive L2 tables from disk */
-            ret = bdrv_read(bs->file, l2_offset / BDRV_SECTOR_SIZE,
-                    (void *)l2_table, s->cluster_sectors);
-        }
-        if (ret < 0) {
-            goto fail;
-        }
-
-        for (j = 0; j < s->l2_size; j++) {
-            uint64_t l2_entry = be64_to_cpu(l2_table[j]);
-            int64_t offset = l2_entry & L2E_OFFSET_MASK, cluster_index;
-            int cluster_type = qcow2_get_cluster_type(l2_entry);
-            bool preallocated = offset != 0;
-
-            if (cluster_type == QCOW2_CLUSTER_NORMAL) {
-                cluster_index = offset >> s->cluster_bits;
-                assert((cluster_index >= 0) && (cluster_index < *nb_clusters));
-                if ((*expanded_clusters)[cluster_index / 8] &
-                    (1 << (cluster_index % 8))) {
-                    /* Probably a shared L2 table; this cluster was a zero
-                     * cluster which has been expanded, its refcount
-                     * therefore most likely requires an update. */
-                    ret = qcow2_update_cluster_refcount(bs, cluster_index, 1,
-                                                        QCOW2_DISCARD_NEVER);
-                    if (ret < 0) {
-                        goto fail;
-                    }
-                    /* Since we just increased the refcount, the COPIED flag may
-                     * no longer be set. */
-                    l2_table[j] = cpu_to_be64(l2_entry & ~QCOW_OFLAG_COPIED);
-                    l2_dirty = true;
-                }
-                continue;
-            }
-            else if (qcow2_get_cluster_type(l2_entry) != QCOW2_CLUSTER_ZERO) {
-                continue;
-            }
-
-            if (!preallocated) {
-                if (!bs->backing_hd) {
-                    /* not backed; therefore we can simply deallocate the
-                     * cluster */
-                    l2_table[j] = 0;
-                    l2_dirty = true;
-                    continue;
-                }
-
-                offset = qcow2_alloc_clusters(bs, s->cluster_size);
-                if (offset < 0) {
-                    ret = offset;
-                    goto fail;
-                }
-            }
-
-            ret = qcow2_pre_write_overlap_check(bs, 0, offset, s->cluster_size);
-            if (ret < 0) {
-                if (!preallocated) {
-                    qcow2_free_clusters(bs, offset, s->cluster_size,
-                                        QCOW2_DISCARD_ALWAYS);
-                }
-                goto fail;
-            }
-
-            ret = bdrv_write_zeroes(bs->file, offset / BDRV_SECTOR_SIZE,
-                                    s->cluster_sectors, 0);
-            if (ret < 0) {
-                if (!preallocated) {
-                    qcow2_free_clusters(bs, offset, s->cluster_size,
-                                        QCOW2_DISCARD_ALWAYS);
-                }
-                goto fail;
-            }
-
-            l2_table[j] = cpu_to_be64(offset | QCOW_OFLAG_COPIED);
-            l2_dirty = true;
-
-            cluster_index = offset >> s->cluster_bits;
-
-            if (cluster_index >= *nb_clusters) {
-                uint64_t old_bitmap_size = (*nb_clusters + 7) / 8;
-                uint64_t new_bitmap_size;
-                /* The offset may lie beyond the old end of the underlying image
-                 * file for growable files only */
-                assert(bs->file->growable);
-                *nb_clusters = size_to_clusters(s, bs->file->total_sectors *
-                                                BDRV_SECTOR_SIZE);
-                new_bitmap_size = (*nb_clusters + 7) / 8;
-                *expanded_clusters = g_realloc(*expanded_clusters,
-                                               new_bitmap_size);
-                /* clear the newly allocated space */
-                memset(&(*expanded_clusters)[old_bitmap_size], 0,
-                       new_bitmap_size - old_bitmap_size);
-            }
-
-            assert((cluster_index >= 0) && (cluster_index < *nb_clusters));
-            (*expanded_clusters)[cluster_index / 8] |= 1 << (cluster_index % 8);
-        }
-
-        if (is_active_l1) {
-            if (l2_dirty) {
-                qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
-                qcow2_cache_depends_on_flush(s->l2_table_cache);
-            }
-            ret = qcow2_cache_put(bs, s->l2_table_cache, (void **)&l2_table);
-            if (ret < 0) {
-                l2_table = NULL;
-                goto fail;
-            }
-        } else {
-            if (l2_dirty) {
-                ret = qcow2_pre_write_overlap_check(bs,
-                        QCOW2_OL_INACTIVE_L2 | QCOW2_OL_ACTIVE_L2, l2_offset,
-                        s->cluster_size);
-                if (ret < 0) {
-                    goto fail;
-                }
-
-                ret = bdrv_write(bs->file, l2_offset / BDRV_SECTOR_SIZE,
-                        (void *)l2_table, s->cluster_sectors);
-                if (ret < 0) {
-                    goto fail;
-                }
-            }
-        }
-    }
-
-    ret = 0;
-
-fail:
-    if (l2_table) {
-        if (!is_active_l1) {
-            qemu_vfree(l2_table);
-        } else {
-            if (ret < 0) {
-                qcow2_cache_put(bs, s->l2_table_cache, (void **)&l2_table);
-            } else {
-                ret = qcow2_cache_put(bs, s->l2_table_cache,
-                        (void **)&l2_table);
-            }
-        }
-    }
-    return ret;
-}
-
-/*
- * For backed images, expands all zero clusters on the image. For non-backed
- * images, deallocates all non-pre-allocated zero clusters (and claims the
- * allocation for pre-allocated ones). This is important for downgrading to a
- * qcow2 version which doesn't yet support metadata zero clusters.
- */
-int qcow2_expand_zero_clusters(BlockDriverState *bs)
-{
-    BDRVQcowState *s = bs->opaque;
-    uint64_t *l1_table = NULL;
-    uint64_t nb_clusters;
-    uint8_t *expanded_clusters;
-    int ret;
-    int i, j;
-
-    nb_clusters = size_to_clusters(s, bs->file->total_sectors *
-                                   BDRV_SECTOR_SIZE);
-    expanded_clusters = g_malloc0((nb_clusters + 7) / 8);
-
-    ret = expand_zero_clusters_in_l1(bs, s->l1_table, s->l1_size,
-                                     &expanded_clusters, &nb_clusters);
-    if (ret < 0) {
-        goto fail;
-    }
-
-    /* Inactive L1 tables may point to active L2 tables - therefore it is
-     * necessary to flush the L2 table cache before trying to access the L2
-     * tables pointed to by inactive L1 entries (else we might try to expand
-     * zero clusters that have already been expanded); furthermore, it is also
-     * necessary to empty the L2 table cache, since it may contain tables which
-     * are now going to be modified directly on disk, bypassing the cache.
-     * qcow2_cache_empty() does both for us. */
-    ret = qcow2_cache_empty(bs, s->l2_table_cache);
-    if (ret < 0) {
-        goto fail;
-    }
-
-    for (i = 0; i < s->nb_snapshots; i++) {
-        int l1_sectors = (s->snapshots[i].l1_size * sizeof(uint64_t) +
-                BDRV_SECTOR_SIZE - 1) / BDRV_SECTOR_SIZE;
-
-        l1_table = g_realloc(l1_table, l1_sectors * BDRV_SECTOR_SIZE);
-
-        ret = bdrv_read(bs->file, s->snapshots[i].l1_table_offset /
-                BDRV_SECTOR_SIZE, (void *)l1_table, l1_sectors);
-        if (ret < 0) {
-            goto fail;
-        }
-
-        for (j = 0; j < s->snapshots[i].l1_size; j++) {
-            be64_to_cpus(&l1_table[j]);
-        }
-
-        ret = expand_zero_clusters_in_l1(bs, l1_table, s->snapshots[i].l1_size,
-                                         &expanded_clusters, &nb_clusters);
-        if (ret < 0) {
-            goto fail;
-        }
-    }
-
-    ret = 0;
-
-fail:
-    g_free(expanded_clusters);
-    g_free(l1_table);
-    return ret;
-}
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
--- a/block/qcow2-snapshot.c
+++ b/block/qcow2-snapshot.c
@@ -26,6 +26,31 @@
 #include "block/block_int.h"
 #include "block/qcow2.h"

+typedef struct QEMU_PACKED QCowSnapshotHeader {
+    /* header is 8 byte aligned */
+    uint64_t l1_table_offset;
+
+    uint32_t l1_size;
+    uint16_t id_str_size;
+    uint16_t name_size;
+
+    uint32_t date_sec;
+    uint32_t date_nsec;
+
+    uint64_t vm_clock_nsec;
+
+    uint32_t vm_state_size;
+    uint32_t extra_data_size; /* for extension */
+    /* extra data follows */
+    /* id_str follows */
+    /* name follows  */
+} QCowSnapshotHeader;
+
+typedef struct QEMU_PACKED QCowSnapshotExtraData {
+    uint64_t vm_state_size_large;
+    uint64_t disk_size;
+} QCowSnapshotExtraData;
+
 void qcow2_free_snapshots(BlockDriverState *bs)
 {
    BDRVQcowState *s = bs->opaque;
@@ -116,14 +141,8 @@ int qcow2_read_snapshots(BlockDriverState *bs)
        }
        offset += name_size;
        sn->name[name_size] = '\0';
-
-        if (offset - s->snapshots_offset > QCOW_MAX_SNAPSHOTS_SIZE) {
-            ret = -EFBIG;
-            goto fail;
-        }
    }

-    assert(offset - s->snapshots_offset <= INT_MAX);
    s->snapshots_size = offset - s->snapshots_offset;
    return 0;

@@ -144,7 +163,7 @@ static int qcow2_write_snapshots(BlockDriverState *bs)
        uint32_t nb_snapshots;
        uint64_t snapshots_offset;
    } QEMU_PACKED header_data;
-    int64_t offset, snapshots_offset = 0;
+    int64_t offset, snapshots_offset;
    int ret;

    /* compute the size of the snapshots */
@@ -156,36 +175,20 @@ static int qcow2_write_snapshots(BlockDriverState *bs)
        offset += sizeof(extra);
        offset += strlen(sn->id_str);
        offset += strlen(sn->name);
-
-        if (offset > QCOW_MAX_SNAPSHOTS_SIZE) {
-            ret = -EFBIG;
-            goto fail;
-        }
    }
-
-    assert(offset <= INT_MAX);
    snapshots_size = offset;

    /* Allocate space for the new snapshot list */
    snapshots_offset = qcow2_alloc_clusters(bs, snapshots_size);
    offset = snapshots_offset;
    if (offset < 0) {
-        ret = offset;
-        goto fail;
+        return offset;
    }
    ret = bdrv_flush(bs);
    if (ret < 0) {
-        goto fail;
+        return ret;
    }

-    /* The snapshot list position has not yet been updated, so these clusters
-     * must indeed be completely free */
-    ret = qcow2_pre_write_overlap_check(bs, 0, offset, snapshots_size);
-    if (ret < 0) {
-        goto fail;
-    }
-
-
    /* Write all snapshots to the new list */
    for(i = 0; i < s->nb_snapshots; i++) {
        sn = s->snapshots + i;
@@ -208,7 +211,6 @@ static int qcow2_write_snapshots(BlockDriverState *bs)

        id_str_size = strlen(sn->id_str);
        name_size = strlen(sn->name);
-        assert(id_str_size <= UINT16_MAX && name_size <= UINT16_MAX);
        h.id_str_size = cpu_to_be16(id_str_size);
        h.name_size = cpu_to_be16(name_size);
        offset = align_offset(offset, 8);
@@ -267,10 +269,6 @@ static int qcow2_write_snapshots(BlockDriverState *bs)
    return 0;

 fail:
-    if (snapshots_offset > 0) {
-        qcow2_free_clusters(bs, snapshots_offset, snapshots_size,
-                            QCOW2_DISCARD_ALWAYS);
-    }
    return ret;
 }

@@ -279,8 +277,7 @@ static void find_new_snapshot_id(BlockDriverState *bs,
 {
    BDRVQcowState *s = bs->opaque;
    QCowSnapshot *sn;
-    int i;
-    unsigned long id, id_max = 0;
+    int i, id, id_max = 0;

    for(i = 0; i < s->nb_snapshots; i++) {
        sn = s->snapshots + i;
@@ -288,50 +285,34 @@ static void find_new_snapshot_id(BlockDriverState *bs,
        if (id > id_max)
            id_max = id;
    }
-    snprintf(id_str, id_str_size, "%lu", id_max + 1);
+    snprintf(id_str, id_str_size, "%d", id_max + 1);
 }

-static int find_snapshot_by_id_and_name(BlockDriverState *bs,
-                                        const char *id,
-                                        const char *name)
+static int find_snapshot_by_id(BlockDriverState *bs, const char *id_str)
 {
    BDRVQcowState *s = bs->opaque;
    int i;

-    if (id && name) {
-        for (i = 0; i < s->nb_snapshots; i++) {
-            if (!strcmp(s->snapshots[i].id_str, id) &&
-                !strcmp(s->snapshots[i].name, name)) {
-                return i;
-            }
-        }
-    } else if (id) {
-        for (i = 0; i < s->nb_snapshots; i++) {
-            if (!strcmp(s->snapshots[i].id_str, id)) {
-                return i;
-            }
-        }
-    } else if (name) {
-        for (i = 0; i < s->nb_snapshots; i++) {
-            if (!strcmp(s->snapshots[i].name, name)) {
-                return i;
-            }
-        }
+    for(i = 0; i < s->nb_snapshots; i++) {
+        if (!strcmp(s->snapshots[i].id_str, id_str))
+            return i;
    }
-
    return -1;
 }

-static int find_snapshot_by_id_or_name(BlockDriverState *bs,
-                                       const char *id_or_name)
+static int find_snapshot_by_id_or_name(BlockDriverState *bs, const char *name)
 {
-    int ret;
+    BDRVQcowState *s = bs->opaque;
+    int i, ret;

-    ret = find_snapshot_by_id_and_name(bs, id_or_name, NULL);
-    if (ret >= 0) {
+    ret = find_snapshot_by_id(bs, name);
+    if (ret >= 0)
        return ret;
+    for(i = 0; i < s->nb_snapshots; i++) {
+        if (!strcmp(s->snapshots[i].name, name))
+            return i;
    }
-    return find_snapshot_by_id_and_name(bs, NULL, id_or_name);
+    return -1;
 }

 /* if no id is provided, a new one is constructed */
@@ -345,10 +326,6 @@ int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
    uint64_t *l1_table = NULL;
    int64_t l1_table_offset;

-    if (s->nb_snapshots >= QCOW_MAX_SNAPSHOTS) {
-        return -EFBIG;
-    }
-
    memset(sn, 0, sizeof(*sn));

    /* Generate an ID if it wasn't passed */
@@ -357,7 +334,7 @@ int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
    }

    /* Check that the ID is unique */
-    if (find_snapshot_by_id_and_name(bs, sn_info->id_str, NULL) >= 0) {
+    if (find_snapshot_by_id(bs, sn_info->id_str) >= 0) {
        return -EEXIST;
    }

@@ -386,12 +363,6 @@ int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
        l1_table[i] = cpu_to_be64(s->l1_table[i]);
    }

-    ret = qcow2_pre_write_overlap_check(bs, 0, sn->l1_table_offset,
-                                        s->l1_size * sizeof(uint64_t));
-    if (ret < 0) {
-        goto fail;
-    }
-
    ret = bdrv_pwrite(bs->file, sn->l1_table_offset, l1_table,
                      s->l1_size * sizeof(uint64_t));
    if (ret < 0) {
@@ -425,19 +396,11 @@ int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
    if (ret < 0) {
        g_free(s->snapshots);
        s->snapshots = old_snapshot_list;
-        s->nb_snapshots--;
        goto fail;
    }

    g_free(old_snapshot_list);

-    /* The VM state isn't needed any more in the active L1 table; in fact, it
-     * hurts by causing expensive COW for the next snapshot. */
-    qcow2_discard_clusters(bs, qcow2_vm_state_offset(s),
-                           align_offset(sn->vm_state_size, s->cluster_size)
-                                >> BDRV_SECTOR_BITS,
-                           QCOW2_DISCARD_NEVER);
-
 #ifdef DEBUG_ALLOC
    {
      BdrvCheckResult result = {0};
@@ -512,12 +475,6 @@ int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
        goto fail;
    }

-    ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_ACTIVE_L1,
-                                        s->l1_table_offset, cur_l1_bytes);
-    if (ret < 0) {
-        goto fail;
-    }
-
    ret = bdrv_pwrite_sync(bs->file, s->l1_table_offset, sn_l1_table,
                           cur_l1_bytes);
    if (ret < 0) {
@@ -574,19 +531,15 @@ fail:
    return ret;
 }

-int qcow2_snapshot_delete(BlockDriverState *bs,
-                          const char *snapshot_id,
-                          const char *name,
-                          Error **errp)
+int qcow2_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
 {
    BDRVQcowState *s = bs->opaque;
    QCowSnapshot sn;
    int snapshot_index, ret;

    /* Search the snapshot */
-    snapshot_index = find_snapshot_by_id_and_name(bs, snapshot_id, name);
+    snapshot_index = find_snapshot_by_id_or_name(bs, snapshot_id);
    if (snapshot_index < 0) {
-        error_setg(errp, "Can't find the snapshot");
        return -ENOENT;
    }
    sn = s->snapshots[snapshot_index];
@@ -598,8 +551,6 @@ int qcow2_snapshot_delete(BlockDriverState *bs,
    s->nb_snapshots--;
    ret = qcow2_write_snapshots(bs);
    if (ret < 0) {
-        error_setg_errno(errp, -ret,
-                         "Failed to remove snapshot from snapshot list");
        return ret;
    }

@@ -617,7 +568,6 @@ int qcow2_snapshot_delete(BlockDriverState *bs,
    ret = qcow2_update_snapshot_refcount(bs, sn.l1_table_offset,
                                         sn.l1_size, -1);
    if (ret < 0) {
-        error_setg_errno(errp, -ret, "Failed to free the cluster and L1 table");
        return ret;
    }
    qcow2_free_clusters(bs, sn.l1_table_offset, sn.l1_size * sizeof(uint64_t),
@@ -626,8 +576,6 @@ int qcow2_snapshot_delete(BlockDriverState *bs,
    /* must update the copied flag on the current cluster offsets */
    ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 0);
    if (ret < 0) {
-        error_setg_errno(errp, -ret,
-                         "Failed to update snapshot status in disk");
        return ret;
    }

@@ -669,10 +617,7 @@ int qcow2_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
    return s->nb_snapshots;
 }

-int qcow2_snapshot_load_tmp(BlockDriverState *bs,
-                            const char *snapshot_id,
-                            const char *name,
-                            Error **errp)
+int qcow2_snapshot_load_tmp(BlockDriverState *bs, const char *snapshot_name)
 {
    int i, snapshot_index;
    BDRVQcowState *s = bs->opaque;
@@ -684,25 +629,18 @@ int qcow2_snapshot_load_tmp(BlockDriverState *bs,
    assert(bs->read_only);

    /* Search the snapshot */
-    snapshot_index = find_snapshot_by_id_and_name(bs, snapshot_id, name);
+    snapshot_index = find_snapshot_by_id_or_name(bs, snapshot_name);
    if (snapshot_index < 0) {
-        error_setg(errp,
-                   "Can't find snapshot");
        return -ENOENT;
    }
    sn = &s->snapshots[snapshot_index];

    /* Allocate and read in the snapshot's L1 table */
-    if (sn->l1_size > QCOW_MAX_L1_SIZE) {
-        error_setg(errp, "Snapshot L1 table too large");
-        return -EFBIG;
-    }
-    new_l1_bytes = sn->l1_size * sizeof(uint64_t);
+    new_l1_bytes = s->l1_size * sizeof(uint64_t);
    new_l1_table = g_malloc0(align_offset(new_l1_bytes, 512));

    ret = bdrv_pread(bs->file, sn->l1_table_offset, new_l1_table, new_l1_bytes);
    if (ret < 0) {
-        error_setg(errp, "Failed to read l1 table for snapshot");
        g_free(new_l1_table);
        return ret;
    }
--- a/block/qcow2.c
+++ b/block/qcow2.c
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -38,26 +38,13 @@
 #define QCOW_CRYPT_AES  1

 #define QCOW_MAX_CRYPT_CLUSTERS 32
-#define QCOW_MAX_SNAPSHOTS 65536
-
-/* 8 MB refcount table is enough for 2 PB images at 64k cluster size
- * (128 GB for 512 byte clusters, 2 EB for 2 MB clusters) */
-#define QCOW_MAX_REFTABLE_SIZE 0x800000
-
-/* 32 MB L1 table is enough for 2 PB images at 64k cluster size
- * (128 GB for 512 byte clusters, 2 EB for 2 MB clusters) */
-#define QCOW_MAX_L1_SIZE 0x2000000
-
-/* Allow for an average of 1k per snapshot table entry, should be plenty of
- * space for snapshot names and IDs */
-#define QCOW_MAX_SNAPSHOTS_SIZE (1024 * QCOW_MAX_SNAPSHOTS)

 /* indicate that the refcount of the referenced cluster is exactly one. */
-#define QCOW_OFLAG_COPIED     (1ULL << 63)
+#define QCOW_OFLAG_COPIED     (1LL << 63)
 /* indicate that the cluster is compressed (they never have the copied flag) */
-#define QCOW_OFLAG_COMPRESSED (1ULL << 62)
+#define QCOW_OFLAG_COMPRESSED (1LL << 62)
 /* The cluster reads as all zeros */
-#define QCOW_OFLAG_ZERO (1ULL << 0)
+#define QCOW_OFLAG_ZERO (1LL << 0)

 #define REFCOUNT_SHIFT 1 /* refcount size is 2 bytes */

@@ -76,15 +63,6 @@
 #define QCOW2_OPT_DISCARD_REQUEST "pass-discard-request"
 #define QCOW2_OPT_DISCARD_SNAPSHOT "pass-discard-snapshot"
 #define QCOW2_OPT_DISCARD_OTHER "pass-discard-other"
-#define QCOW2_OPT_OVERLAP "overlap-check"
-#define QCOW2_OPT_OVERLAP_MAIN_HEADER "overlap-check.main-header"
-#define QCOW2_OPT_OVERLAP_ACTIVE_L1 "overlap-check.active-l1"
-#define QCOW2_OPT_OVERLAP_ACTIVE_L2 "overlap-check.active-l2"
-#define QCOW2_OPT_OVERLAP_REFCOUNT_TABLE "overlap-check.refcount-table"
-#define QCOW2_OPT_OVERLAP_REFCOUNT_BLOCK "overlap-check.refcount-block"
-#define QCOW2_OPT_OVERLAP_SNAPSHOT_TABLE "overlap-check.snapshot-table"
-#define QCOW2_OPT_OVERLAP_INACTIVE_L1 "overlap-check.inactive-l1"
-#define QCOW2_OPT_OVERLAP_INACTIVE_L2 "overlap-check.inactive-l2"

 typedef struct QCowHeader {
    uint32_t magic;
@@ -108,33 +86,7 @@ typedef struct QCowHeader {

    uint32_t refcount_order;
    uint32_t header_length;
-} QEMU_PACKED QCowHeader;
-
-typedef struct QEMU_PACKED QCowSnapshotHeader {
-    /* header is 8 byte aligned */
-    uint64_t l1_table_offset;
-
-    uint32_t l1_size;
-    uint16_t id_str_size;
-    uint16_t name_size;
-
-    uint32_t date_sec;
-    uint32_t date_nsec;
-
-    uint64_t vm_clock_nsec;
-
-    uint32_t vm_state_size;
-    uint32_t extra_data_size; /* for extension */
-    /* extra data follows */
-    /* id_str follows */
-    /* name follows  */
-} QCowSnapshotHeader;
-
-typedef struct QEMU_PACKED QCowSnapshotExtraData {
-    uint64_t vm_state_size_large;
-    uint64_t disk_size;
-} QCowSnapshotExtraData;
-
+} QCowHeader;

 typedef struct QCowSnapshot {
    uint64_t l1_table_offset;
@@ -167,12 +119,9 @@ enum {
 /* Incompatible feature bits */
 enum {
    QCOW2_INCOMPAT_DIRTY_BITNR   = 0,
-    QCOW2_INCOMPAT_CORRUPT_BITNR = 1,
    QCOW2_INCOMPAT_DIRTY         = 1 << QCOW2_INCOMPAT_DIRTY_BITNR,
-    QCOW2_INCOMPAT_CORRUPT       = 1 << QCOW2_INCOMPAT_CORRUPT_BITNR,

-    QCOW2_INCOMPAT_MASK          = QCOW2_INCOMPAT_DIRTY
-                                 | QCOW2_INCOMPAT_CORRUPT,
+    QCOW2_INCOMPAT_MASK          = QCOW2_INCOMPAT_DIRTY,
 };

 /* Compatible feature bits */
@@ -230,8 +179,8 @@ typedef struct BDRVQcowState {
    uint64_t *refcount_table;
    uint64_t refcount_table_offset;
    uint32_t refcount_table_size;
-    uint64_t free_cluster_index;
-    uint64_t free_byte_offset;
+    int64_t free_cluster_index;
+    int64_t free_byte_offset;

    CoMutex lock;

@@ -241,18 +190,15 @@ typedef struct BDRVQcowState {
    AES_KEY aes_decrypt_key;
    uint64_t snapshots_offset;
    int snapshots_size;
-    unsigned int nb_snapshots;
+    int nb_snapshots;
    QCowSnapshot *snapshots;

    int flags;
    int qcow_version;
    bool use_lazy_refcounts;
-    int refcount_order;

    bool discard_passthrough[QCOW2_DISCARD_MAX];

-    int overlap_check; /* bitmask of Qcow2MetadataOverlap values */
-
    uint64_t incompatible_features;
    uint64_t compatible_features;
    uint64_t autoclear_features;
@@ -340,50 +286,11 @@ enum {
    QCOW2_CLUSTER_ZERO
 };

-typedef enum QCow2MetadataOverlap {
-    QCOW2_OL_MAIN_HEADER_BITNR    = 0,
-    QCOW2_OL_ACTIVE_L1_BITNR      = 1,
-    QCOW2_OL_ACTIVE_L2_BITNR      = 2,
-    QCOW2_OL_REFCOUNT_TABLE_BITNR = 3,
-    QCOW2_OL_REFCOUNT_BLOCK_BITNR = 4,
-    QCOW2_OL_SNAPSHOT_TABLE_BITNR = 5,
-    QCOW2_OL_INACTIVE_L1_BITNR    = 6,
-    QCOW2_OL_INACTIVE_L2_BITNR    = 7,
-
-    QCOW2_OL_MAX_BITNR            = 8,
-
-    QCOW2_OL_NONE           = 0,
-    QCOW2_OL_MAIN_HEADER    = (1 << QCOW2_OL_MAIN_HEADER_BITNR),
-    QCOW2_OL_ACTIVE_L1      = (1 << QCOW2_OL_ACTIVE_L1_BITNR),
-    QCOW2_OL_ACTIVE_L2      = (1 << QCOW2_OL_ACTIVE_L2_BITNR),
-    QCOW2_OL_REFCOUNT_TABLE = (1 << QCOW2_OL_REFCOUNT_TABLE_BITNR),
-    QCOW2_OL_REFCOUNT_BLOCK = (1 << QCOW2_OL_REFCOUNT_BLOCK_BITNR),
-    QCOW2_OL_SNAPSHOT_TABLE = (1 << QCOW2_OL_SNAPSHOT_TABLE_BITNR),
-    QCOW2_OL_INACTIVE_L1    = (1 << QCOW2_OL_INACTIVE_L1_BITNR),
-    /* NOTE: Checking overlaps with inactive L2 tables will result in bdrv
-     * reads. */
-    QCOW2_OL_INACTIVE_L2    = (1 << QCOW2_OL_INACTIVE_L2_BITNR),
-} QCow2MetadataOverlap;
-
-/* Perform all overlap checks which can be done in constant time */
-#define QCOW2_OL_CONSTANT \
-    (QCOW2_OL_MAIN_HEADER | QCOW2_OL_ACTIVE_L1 | QCOW2_OL_REFCOUNT_TABLE | \
-     QCOW2_OL_SNAPSHOT_TABLE)
-
-/* Perform all overlap checks which don't require disk access */
-#define QCOW2_OL_CACHED \
-    (QCOW2_OL_CONSTANT | QCOW2_OL_ACTIVE_L2 | QCOW2_OL_REFCOUNT_BLOCK | \
-     QCOW2_OL_INACTIVE_L1)
-
-/* Perform all overlap checks */
-#define QCOW2_OL_ALL \
-    (QCOW2_OL_CACHED | QCOW2_OL_INACTIVE_L2)
-
-#define L1E_OFFSET_MASK 0x00fffffffffffe00ULL
-#define L2E_OFFSET_MASK 0x00fffffffffffe00ULL
+#define L1E_OFFSET_MASK 0x00ffffffffffff00ULL
+#define L2E_OFFSET_MASK 0x00ffffffffffff00ULL
 #define L2E_COMPRESSED_OFFSET_SIZE_MASK 0x3fffffffffffffffULL

-#define REFT_OFFSET_MASK 0xfffffffffffffe00ULL
+#define REFT_OFFSET_MASK 0xffffffffffffff00ULL

 static inline int64_t start_of_cluster(BDRVQcowState *s, int64_t offset)
 {
@@ -417,16 +324,6 @@ static inline int64_t align_offset(int64_t offset, int n)
    return offset;
 }

-static inline int64_t qcow2_vm_state_offset(BDRVQcowState *s)
-{
-    return (int64_t)s->l1_vm_state_index << (s->cluster_bits + s->l2_bits);
-}
-
-static inline uint64_t qcow2_max_refcount_clusters(BDRVQcowState *s)
-{
-    return QCOW_MAX_REFTABLE_SIZE >> s->cluster_bits;
-}
-
 static inline int qcow2_get_cluster_type(uint64_t l2_entry)
 {
    if (l2_entry & QCOW_OFLAG_COMPRESSED) {
@@ -464,18 +361,13 @@ int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov,
                  int64_t sector_num, int nb_sectors);

 int qcow2_mark_dirty(BlockDriverState *bs);
-int qcow2_mark_corrupt(BlockDriverState *bs);
-int qcow2_mark_consistent(BlockDriverState *bs);
 int qcow2_update_header(BlockDriverState *bs);

 /* qcow2-refcount.c functions */
 int qcow2_refcount_init(BlockDriverState *bs);
 void qcow2_refcount_close(BlockDriverState *bs);

-int qcow2_update_cluster_refcount(BlockDriverState *bs, int64_t cluster_index,
-                                  int addend, enum qcow2_discard_type type);
-
-int64_t qcow2_alloc_clusters(BlockDriverState *bs, uint64_t size);
+int64_t qcow2_alloc_clusters(BlockDriverState *bs, int64_t size);
 int qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset,
    int nb_clusters);
 int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size);
@@ -493,15 +385,9 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,

 void qcow2_process_discards(BlockDriverState *bs, int ret);

-int qcow2_check_metadata_overlap(BlockDriverState *bs, int ign, int64_t offset,
-                                 int64_t size);
-int qcow2_pre_write_overlap_check(BlockDriverState *bs, int ign, int64_t offset,
-                                  int64_t size);
-
 /* qcow2-cluster.c functions */
 int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
                        bool exact_size);
-int qcow2_write_l1_entry(BlockDriverState *bs, int l1_index);
 void qcow2_l2_cache_reset(BlockDriverState *bs);
 int qcow2_decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset);
 void qcow2_encrypt_sectors(BDRVQcowState *s, int64_t sector_num,
@@ -512,30 +398,22 @@ void qcow2_encrypt_sectors(BDRVQcowState *s, int64_t sector_num,
 int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
    int *num, uint64_t *cluster_offset);
 int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset,
-    int *num, uint64_t *host_offset, QCowL2Meta **m);
+    int n_start, int n_end, int *num, uint64_t *host_offset, QCowL2Meta **m);
 uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
                                         uint64_t offset,
                                         int compressed_size);

 int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m);
 int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset,
-    int nb_sectors, enum qcow2_discard_type type);
+    int nb_sectors);
 int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors);

-int qcow2_expand_zero_clusters(BlockDriverState *bs);
-
 /* qcow2-snapshot.c functions */
 int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info);
 int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id);
-int qcow2_snapshot_delete(BlockDriverState *bs,
-                          const char *snapshot_id,
-                          const char *name,
-                          Error **errp);
+int qcow2_snapshot_delete(BlockDriverState *bs, const char *snapshot_id);
 int qcow2_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab);
-int qcow2_snapshot_load_tmp(BlockDriverState *bs,
-                            const char *snapshot_id,
-                            const char *name,
-                            Error **errp);
+int qcow2_snapshot_load_tmp(BlockDriverState *bs, const char *snapshot_name);

 void qcow2_free_snapshots(BlockDriverState *bs);
 int qcow2_read_snapshots(BlockDriverState *bs);
@@ -550,8 +428,6 @@ int qcow2_cache_set_dependency(BlockDriverState *bs, Qcow2Cache *c,
    Qcow2Cache *dependency);
 void qcow2_cache_depends_on_flush(Qcow2Cache *c);

-int qcow2_cache_empty(BlockDriverState *bs, Qcow2Cache *c);
-
 int qcow2_cache_get(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
    void **table);
 int qcow2_cache_get_empty(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
--- a/block/qed.c
+++ b/block/qed.c
@@ -353,10 +353,10 @@ static void qed_start_need_check_timer(BDRVQEDState *s)
 {
    trace_qed_start_need_check_timer(s);

-    /* Use QEMU_CLOCK_VIRTUAL so we don't alter the image file while suspended for
+    /* Use vm_clock so we don't alter the image file while suspended for
     * migration.
     */
-    timer_mod(s->need_check_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
+    qemu_mod_timer(s->need_check_timer, qemu_get_clock_ns(vm_clock) +
                   get_ticks_per_sec() * QED_NEED_CHECK_TIMEOUT);
 }

@@ -364,7 +364,7 @@ static void qed_start_need_check_timer(BDRVQEDState *s)
 static void qed_cancel_need_check_timer(BDRVQEDState *s)
 {
    trace_qed_cancel_need_check_timer(s);
-    timer_del(s->need_check_timer);
+    qemu_del_timer(s->need_check_timer);
 }

 static void bdrv_qed_rebind(BlockDriverState *bs)
@@ -373,8 +373,7 @@ static void bdrv_qed_rebind(BlockDriverState *bs)
    s->bs = bs;
 }

-static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
-                         Error **errp)
+static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags)
 {
    BDRVQEDState *s = bs->opaque;
    QEDHeader le_header;
@@ -391,15 +390,14 @@ static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
    qed_header_le_to_cpu(&le_header, &s->header);

    if (s->header.magic != QED_MAGIC) {
-        error_setg(errp, "Image not in QED format");
-        return -EINVAL;
+        return -EMEDIUMTYPE;
    }
    if (s->header.features & ~QED_FEATURE_MASK) {
        /* image uses unsupported feature bits */
        char buf[64];
        snprintf(buf, sizeof(buf), "%" PRIx64,
            s->header.features & ~QED_FEATURE_MASK);
-        error_set(errp, QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
+        qerror_report(QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
            bs->device_name, "QED", buf);
        return -ENOTSUP;
    }
@@ -496,7 +494,7 @@ static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
        }
    }

-    s->need_check_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
+    s->need_check_timer = qemu_new_timer_ns(vm_clock,
                                            qed_need_check_timer_cb, s);

 out:
@@ -507,15 +505,6 @@ out:
    return ret;
 }

-static int bdrv_qed_refresh_limits(BlockDriverState *bs)
-{
-    BDRVQEDState *s = bs->opaque;
-
-    bs->bl.write_zeroes_alignment = s->header.cluster_size >> BDRV_SECTOR_BITS;
-
-    return 0;
-}
-
 /* We have nothing to do for QED reopen, stubs just return
 * success */
 static int bdrv_qed_reopen_prepare(BDRVReopenState *state,
@@ -529,7 +518,7 @@ static void bdrv_qed_close(BlockDriverState *bs)
    BDRVQEDState *s = bs->opaque;

    qed_cancel_need_check_timer(s);
-    timer_free(s->need_check_timer);
+    qemu_free_timer(s->need_check_timer);

    /* Ensure writes reach stable storage */
    bdrv_flush(bs->file);
@@ -546,8 +535,7 @@ static void bdrv_qed_close(BlockDriverState *bs)

 static int qed_create(const char *filename, uint32_t cluster_size,
                      uint64_t image_size, uint32_t table_size,
-                      const char *backing_file, const char *backing_fmt,
-                      Error **errp)
+                      const char *backing_file, const char *backing_fmt)
 {
    QEDHeader header = {
        .magic = QED_MAGIC,
@@ -562,22 +550,16 @@ static int qed_create(const char *filename, uint32_t cluster_size,
    QEDHeader le_header;
    uint8_t *l1_table = NULL;
    size_t l1_size = header.cluster_size * header.table_size;
-    Error *local_err = NULL;
    int ret = 0;
-    BlockDriverState *bs;
+    BlockDriverState *bs = NULL;

-    ret = bdrv_create_file(filename, NULL, &local_err);
+    ret = bdrv_create_file(filename, NULL);
    if (ret < 0) {
-        error_propagate(errp, local_err);
        return ret;
    }

-    bs = NULL;
-    ret = bdrv_open(&bs, filename, NULL, NULL,
-                    BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_PROTOCOL, NULL,
-                    &local_err);
+    ret = bdrv_file_open(&bs, filename, NULL, BDRV_O_RDWR | BDRV_O_CACHE_WB);
    if (ret < 0) {
-        error_propagate(errp, local_err);
        return ret;
    }

@@ -617,12 +599,11 @@ static int qed_create(const char *filename, uint32_t cluster_size,
    ret = 0; /* success */
 out:
    g_free(l1_table);
-    bdrv_unref(bs);
+    bdrv_delete(bs);
    return ret;
 }

-static int bdrv_qed_create(const char *filename, QEMUOptionParameter *options,
-                           Error **errp)
+static int bdrv_qed_create(const char *filename, QEMUOptionParameter *options)
 {
    uint64_t image_size = 0;
    uint32_t cluster_size = QED_DEFAULT_CLUSTER_SIZE;
@@ -667,70 +648,54 @@ static int bdrv_qed_create(const char *filename, QEMUOptionParameter *options,
    }

    return qed_create(filename, cluster_size, image_size, table_size,
-                      backing_file, backing_fmt, errp);
+                      backing_file, backing_fmt);
 }

 typedef struct {
-    BlockDriverState *bs;
    Coroutine *co;
-    uint64_t pos;
-    int64_t status;
+    int is_allocated;
    int *pnum;
 } QEDIsAllocatedCB;

 static void qed_is_allocated_cb(void *opaque, int ret, uint64_t offset, size_t len)
 {
    QEDIsAllocatedCB *cb = opaque;
-    BDRVQEDState *s = cb->bs->opaque;
    *cb->pnum = len / BDRV_SECTOR_SIZE;
-    switch (ret) {
-    case QED_CLUSTER_FOUND:
-        offset |= qed_offset_into_cluster(s, cb->pos);
-        cb->status = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | offset;
-        break;
-    case QED_CLUSTER_ZERO:
-        cb->status = BDRV_BLOCK_ZERO;
-        break;
-    case QED_CLUSTER_L2:
-    case QED_CLUSTER_L1:
-        cb->status = 0;
-        break;
-    default:
-        assert(ret < 0);
-        cb->status = ret;
-        break;
-    }
-
+    cb->is_allocated = (ret == QED_CLUSTER_FOUND || ret == QED_CLUSTER_ZERO);
    if (cb->co) {
        qemu_coroutine_enter(cb->co, NULL);
    }
 }

-static int64_t coroutine_fn bdrv_qed_co_get_block_status(BlockDriverState *bs,
+static int coroutine_fn bdrv_qed_co_is_allocated(BlockDriverState *bs,
                                                 int64_t sector_num,
                                                 int nb_sectors, int *pnum)
 {
    BDRVQEDState *s = bs->opaque;
+    uint64_t pos = (uint64_t)sector_num * BDRV_SECTOR_SIZE;
    size_t len = (size_t)nb_sectors * BDRV_SECTOR_SIZE;
    QEDIsAllocatedCB cb = {
-        .bs = bs,
-        .pos = (uint64_t)sector_num * BDRV_SECTOR_SIZE,
-        .status = BDRV_BLOCK_OFFSET_MASK,
+        .is_allocated = -1,
        .pnum = pnum,
    };
    QEDRequest request = { .l2_table = NULL };

-    qed_find_cluster(s, &request, cb.pos, len, qed_is_allocated_cb, &cb);
+    qed_find_cluster(s, &request, pos, len, qed_is_allocated_cb, &cb);

    /* Now sleep if the callback wasn't invoked immediately */
-    while (cb.status == BDRV_BLOCK_OFFSET_MASK) {
+    while (cb.is_allocated == -1) {
        cb.co = qemu_coroutine_self();
        qemu_coroutine_yield();
    }

    qed_unref_l2_cache_entry(request.l2_table);

-    return cb.status;
+    return cb.is_allocated;
+}
+
+static int bdrv_qed_make_empty(BlockDriverState *bs)
+{
+    return -ENOTSUP;
 }

 static BDRVQEDState *acb_to_s(QEDAIOCB *acb)
@@ -1403,8 +1368,7 @@ static void coroutine_fn qed_co_write_zeroes_cb(void *opaque, int ret)

 static int coroutine_fn bdrv_qed_co_write_zeroes(BlockDriverState *bs,
                                                 int64_t sector_num,
-                                                 int nb_sectors,
-                                                 BdrvRequestFlags flags)
+                                                 int nb_sectors)
 {
    BlockDriverAIOCB *blockacb;
    BDRVQEDState *s = bs->opaque;
@@ -1481,8 +1445,6 @@ static int bdrv_qed_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
    memset(bdi, 0, sizeof(*bdi));
    bdi->cluster_size = s->header.cluster_size;
    bdi->is_dirty = s->header.features & QED_F_NEED_CHECK;
-    bdi->unallocated_blocks_are_zero = true;
-    bdi->can_write_zeroes_with_unmap = true;
    return 0;
 }

@@ -1558,31 +1520,13 @@ static int bdrv_qed_change_backing_file(BlockDriverState *bs,
    return ret;
 }

-static void bdrv_qed_invalidate_cache(BlockDriverState *bs, Error **errp)
+static void bdrv_qed_invalidate_cache(BlockDriverState *bs)
 {
    BDRVQEDState *s = bs->opaque;
-    Error *local_err = NULL;
-    int ret;

    bdrv_qed_close(bs);
-
-    bdrv_invalidate_cache(bs->file, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        return;
-    }
-
    memset(s, 0, sizeof(BDRVQEDState));
-    ret = bdrv_qed_open(bs, NULL, bs->open_flags, &local_err);
-    if (local_err) {
-        error_setg(errp, "Could not reopen qed layer: %s",
-                   error_get_pretty(local_err));
-        error_free(local_err);
-        return;
-    } else if (ret < 0) {
-        error_setg_errno(errp, -ret, "Could not reopen qed layer");
-        return;
-    }
+    bdrv_qed_open(bs, NULL, bs->open_flags);
 }

 static int bdrv_qed_check(BlockDriverState *bs, BdrvCheckResult *result,
@@ -1631,14 +1575,14 @@ static BlockDriver bdrv_qed = {
    .bdrv_reopen_prepare      = bdrv_qed_reopen_prepare,
    .bdrv_create              = bdrv_qed_create,
    .bdrv_has_zero_init       = bdrv_has_zero_init_1,
-    .bdrv_co_get_block_status = bdrv_qed_co_get_block_status,
+    .bdrv_co_is_allocated     = bdrv_qed_co_is_allocated,
+    .bdrv_make_empty          = bdrv_qed_make_empty,
    .bdrv_aio_readv           = bdrv_qed_aio_readv,
    .bdrv_aio_writev          = bdrv_qed_aio_writev,
    .bdrv_co_write_zeroes     = bdrv_qed_co_write_zeroes,
    .bdrv_truncate            = bdrv_qed_truncate,
    .bdrv_getlength           = bdrv_qed_getlength,
    .bdrv_get_info            = bdrv_qed_get_info,
-    .bdrv_refresh_limits      = bdrv_qed_refresh_limits,
    .bdrv_change_backing_file = bdrv_qed_change_backing_file,
    .bdrv_invalidate_cache    = bdrv_qed_invalidate_cache,
    .bdrv_check               = bdrv_qed_check,
--- a/block/qed.h
+++ b/block/qed.h
@@ -100,7 +100,7 @@ typedef struct {
    /* if (features & QED_F_BACKING_FILE) */
    uint32_t backing_filename_offset; /* in bytes from start of header */
    uint32_t backing_filename_size;   /* in bytes */
-} QEMU_PACKED QEDHeader;
+} QEDHeader;

 typedef struct {
    uint64_t offsets[0];            /* in bytes */
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -1,877 +0,0 @@
-/*
- * Quorum Block filter
- *
- * Copyright (C) 2012-2014 Nodalink, EURL.
- *
- * Author:
- *   Benoît Canet <benoit.canet@irqsave.net>
- *
- * Based on the design and code of blkverify.c (Copyright (C) 2010 IBM, Corp)
- * and blkmirror.c (Copyright (C) 2011 Red Hat, Inc).
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- */
-
-#include <gnutls/gnutls.h>
-#include <gnutls/crypto.h>
-#include "block/block_int.h"
-#include "qapi/qmp/qjson.h"
-
-#define HASH_LENGTH 32
-
-#define QUORUM_OPT_VOTE_THRESHOLD "vote-threshold"
-#define QUORUM_OPT_BLKVERIFY      "blkverify"
-
-/* This union holds a vote hash value */
-typedef union QuorumVoteValue {
-    char h[HASH_LENGTH];       /* SHA-256 hash */
-    int64_t l;                 /* simpler 64 bits hash */
-} QuorumVoteValue;
-
-/* A vote item */
-typedef struct QuorumVoteItem {
-    int index;
-    QLIST_ENTRY(QuorumVoteItem) next;
-} QuorumVoteItem;
-
-/* this structure is a vote version. A version is the set of votes sharing the
- * same vote value.
- * The set of votes will be tracked with the items field and its cardinality is
- * vote_count.
- */
-typedef struct QuorumVoteVersion {
-    QuorumVoteValue value;
-    int index;
-    int vote_count;
-    QLIST_HEAD(, QuorumVoteItem) items;
-    QLIST_ENTRY(QuorumVoteVersion) next;
-} QuorumVoteVersion;
-
-/* this structure holds a group of vote versions together */
-typedef struct QuorumVotes {
-    QLIST_HEAD(, QuorumVoteVersion) vote_list;
-    bool (*compare)(QuorumVoteValue *a, QuorumVoteValue *b);
-} QuorumVotes;
-
-/* the following structure holds the state of one quorum instance */
-typedef struct BDRVQuorumState {
-    BlockDriverState **bs; /* children BlockDriverStates */
-    int num_children;      /* children count */
-    int threshold;         /* if less than threshold children reads gave the
-                            * same result a quorum error occurs.
-                            */
-    bool is_blkverify;     /* true if the driver is in blkverify mode
-                            * Writes are mirrored on two children devices.
-                            * On reads the two children devices' contents are
-                            * compared and if a difference is spotted its
-                            * location is printed and the code aborts.
-                            * It is useful to debug other block drivers by
-                            * comparing them with a reference one.
-                            */
-} BDRVQuorumState;
-
-typedef struct QuorumAIOCB QuorumAIOCB;
-
-/* Quorum will create one instance of the following structure per operation it
- * performs on its children.
- * So for each read/write operation coming from the upper layer there will be
- * $children_count QuorumChildRequest.
- */
-typedef struct QuorumChildRequest {
-    BlockDriverAIOCB *aiocb;
-    QEMUIOVector qiov;
-    uint8_t *buf;
-    int ret;
-    QuorumAIOCB *parent;
-} QuorumChildRequest;
-
-/* Quorum will use the following structure to track progress of each read/write
- * operation received by the upper layer.
- * This structure hold pointers to the QuorumChildRequest structures instances
- * used to do operations on each children and track overall progress.
- */
-struct QuorumAIOCB {
-    BlockDriverAIOCB common;
-
-    /* Request metadata */
-    uint64_t sector_num;
-    int nb_sectors;
-
-    QEMUIOVector *qiov;         /* calling IOV */
-
-    QuorumChildRequest *qcrs;   /* individual child requests */
-    int count;                  /* number of completed AIOCB */
-    int success_count;          /* number of successfully completed AIOCB */
-
-    QuorumVotes votes;
-
-    bool is_read;
-    int vote_ret;
-};
-
-static void quorum_vote(QuorumAIOCB *acb);
-
-static void quorum_aio_cancel(BlockDriverAIOCB *blockacb)
-{
-    QuorumAIOCB *acb = container_of(blockacb, QuorumAIOCB, common);
-    BDRVQuorumState *s = acb->common.bs->opaque;
-    int i;
-
-    /* cancel all callbacks */
-    for (i = 0; i < s->num_children; i++) {
-        bdrv_aio_cancel(acb->qcrs[i].aiocb);
-    }
-
-    g_free(acb->qcrs);
-    qemu_aio_release(acb);
-}
-
-static AIOCBInfo quorum_aiocb_info = {
-    .aiocb_size         = sizeof(QuorumAIOCB),
-    .cancel             = quorum_aio_cancel,
-};
-
-static void quorum_aio_finalize(QuorumAIOCB *acb)
-{
-    BDRVQuorumState *s = acb->common.bs->opaque;
-    int i, ret = 0;
-
-    if (acb->vote_ret) {
-        ret = acb->vote_ret;
-    }
-
-    acb->common.cb(acb->common.opaque, ret);
-
-    if (acb->is_read) {
-        for (i = 0; i < s->num_children; i++) {
-            qemu_vfree(acb->qcrs[i].buf);
-            qemu_iovec_destroy(&acb->qcrs[i].qiov);
-        }
-    }
-
-    g_free(acb->qcrs);
-    qemu_aio_release(acb);
-}
-
-static bool quorum_sha256_compare(QuorumVoteValue *a, QuorumVoteValue *b)
-{
-    return !memcmp(a->h, b->h, HASH_LENGTH);
-}
-
-static bool quorum_64bits_compare(QuorumVoteValue *a, QuorumVoteValue *b)
-{
-    return a->l == b->l;
-}
-
-static QuorumAIOCB *quorum_aio_get(BDRVQuorumState *s,
-                                   BlockDriverState *bs,
-                                   QEMUIOVector *qiov,
-                                   uint64_t sector_num,
-                                   int nb_sectors,
-                                   BlockDriverCompletionFunc *cb,
-                                   void *opaque)
-{
-    QuorumAIOCB *acb = qemu_aio_get(&quorum_aiocb_info, bs, cb, opaque);
-    int i;
-
-    acb->common.bs->opaque = s;
-    acb->sector_num = sector_num;
-    acb->nb_sectors = nb_sectors;
-    acb->qiov = qiov;
-    acb->qcrs = g_new0(QuorumChildRequest, s->num_children);
-    acb->count = 0;
-    acb->success_count = 0;
-    acb->votes.compare = quorum_sha256_compare;
-    QLIST_INIT(&acb->votes.vote_list);
-    acb->is_read = false;
-    acb->vote_ret = 0;
-
-    for (i = 0; i < s->num_children; i++) {
-        acb->qcrs[i].buf = NULL;
-        acb->qcrs[i].ret = 0;
-        acb->qcrs[i].parent = acb;
-    }
-
-    return acb;
-}
-
-static void quorum_report_bad(QuorumAIOCB *acb, char *node_name, int ret)
-{
-    QObject *data;
-    assert(node_name);
-    data = qobject_from_jsonf("{ 'node-name': %s"
-                              ", 'sector-num': %" PRId64
-                              ", 'sectors-count': %d }",
-                              node_name, acb->sector_num, acb->nb_sectors);
-    if (ret < 0) {
-        QDict *dict = qobject_to_qdict(data);
-        qdict_put(dict, "error", qstring_from_str(strerror(-ret)));
-    }
-    monitor_protocol_event(QEVENT_QUORUM_REPORT_BAD, data);
-    qobject_decref(data);
-}
-
-static void quorum_report_failure(QuorumAIOCB *acb)
-{
-    QObject *data;
-    const char *reference = acb->common.bs->device_name[0] ?
-                            acb->common.bs->device_name :
-                            acb->common.bs->node_name;
-    data = qobject_from_jsonf("{ 'reference': %s"
-                              ", 'sector-num': %" PRId64
-                              ", 'sectors-count': %d }",
-                              reference, acb->sector_num, acb->nb_sectors);
-    monitor_protocol_event(QEVENT_QUORUM_FAILURE, data);
-    qobject_decref(data);
-}
-
-static int quorum_vote_error(QuorumAIOCB *acb);
-
-static bool quorum_has_too_much_io_failed(QuorumAIOCB *acb)
-{
-    BDRVQuorumState *s = acb->common.bs->opaque;
-
-    if (acb->success_count < s->threshold) {
-        acb->vote_ret = quorum_vote_error(acb);
-        quorum_report_failure(acb);
-        return true;
-    }
-
-    return false;
-}
-
-static void quorum_aio_cb(void *opaque, int ret)
-{
-    QuorumChildRequest *sacb = opaque;
-    QuorumAIOCB *acb = sacb->parent;
-    BDRVQuorumState *s = acb->common.bs->opaque;
-
-    sacb->ret = ret;
-    acb->count++;
-    if (ret == 0) {
-        acb->success_count++;
-    } else {
-        quorum_report_bad(acb, sacb->aiocb->bs->node_name, ret);
-    }
-    assert(acb->count <= s->num_children);
-    assert(acb->success_count <= s->num_children);
-    if (acb->count < s->num_children) {
-        return;
-    }
-
-    /* Do the vote on read */
-    if (acb->is_read) {
-        quorum_vote(acb);
-    } else {
-        quorum_has_too_much_io_failed(acb);
-    }
-
-    quorum_aio_finalize(acb);
-}
-
-static void quorum_report_bad_versions(BDRVQuorumState *s,
-                                       QuorumAIOCB *acb,
-                                       QuorumVoteValue *value)
-{
-    QuorumVoteVersion *version;
-    QuorumVoteItem *item;
-
-    QLIST_FOREACH(version, &acb->votes.vote_list, next) {
-        if (acb->votes.compare(&version->value, value)) {
-            continue;
-        }
-        QLIST_FOREACH(item, &version->items, next) {
-            quorum_report_bad(acb, s->bs[item->index]->node_name, 0);
-        }
-    }
-}
-
-static void quorum_copy_qiov(QEMUIOVector *dest, QEMUIOVector *source)
-{
-    int i;
-    assert(dest->niov == source->niov);
-    assert(dest->size == source->size);
-    for (i = 0; i < source->niov; i++) {
-        assert(dest->iov[i].iov_len == source->iov[i].iov_len);
-        memcpy(dest->iov[i].iov_base,
-               source->iov[i].iov_base,
-               source->iov[i].iov_len);
-    }
-}
-
-static void quorum_count_vote(QuorumVotes *votes,
-                              QuorumVoteValue *value,
-                              int index)
-{
-    QuorumVoteVersion *v = NULL, *version = NULL;
-    QuorumVoteItem *item;
-
-    /* look if we have something with this hash */
-    QLIST_FOREACH(v, &votes->vote_list, next) {
-        if (votes->compare(&v->value, value)) {
-            version = v;
-            break;
-        }
-    }
-
-    /* It's a version not yet in the list add it */
-    if (!version) {
-        version = g_new0(QuorumVoteVersion, 1);
-        QLIST_INIT(&version->items);
-        memcpy(&version->value, value, sizeof(version->value));
-        version->index = index;
-        version->vote_count = 0;
-        QLIST_INSERT_HEAD(&votes->vote_list, version, next);
-    }
-
-    version->vote_count++;
-
-    item = g_new0(QuorumVoteItem, 1);
-    item->index = index;
-    QLIST_INSERT_HEAD(&version->items, item, next);
-}
-
-static void quorum_free_vote_list(QuorumVotes *votes)
-{
-    QuorumVoteVersion *version, *next_version;
-    QuorumVoteItem *item, *next_item;
-
-    QLIST_FOREACH_SAFE(version, &votes->vote_list, next, next_version) {
-        QLIST_REMOVE(version, next);
-        QLIST_FOREACH_SAFE(item, &version->items, next, next_item) {
-            QLIST_REMOVE(item, next);
-            g_free(item);
-        }
-        g_free(version);
-    }
-}
-
-static int quorum_compute_hash(QuorumAIOCB *acb, int i, QuorumVoteValue *hash)
-{
-    int j, ret;
-    gnutls_hash_hd_t dig;
-    QEMUIOVector *qiov = &acb->qcrs[i].qiov;
-
-    ret = gnutls_hash_init(&dig, GNUTLS_DIG_SHA256);
-
-    if (ret < 0) {
-        return ret;
-    }
-
-    for (j = 0; j < qiov->niov; j++) {
-        ret = gnutls_hash(dig, qiov->iov[j].iov_base, qiov->iov[j].iov_len);
-        if (ret < 0) {
-            break;
-        }
-    }
-
-    gnutls_hash_deinit(dig, (void *) hash);
-    return ret;
-}
-
-static QuorumVoteVersion *quorum_get_vote_winner(QuorumVotes *votes)
-{
-    int max = 0;
-    QuorumVoteVersion *candidate, *winner = NULL;
-
-    QLIST_FOREACH(candidate, &votes->vote_list, next) {
-        if (candidate->vote_count > max) {
-            max = candidate->vote_count;
-            winner = candidate;
-        }
-    }
-
-    return winner;
-}
-
-/* qemu_iovec_compare is handy for blkverify mode because it returns the first
- * differing byte location. Yet it is handcoded to compare vectors one byte
- * after another so it does not benefit from the libc SIMD optimizations.
- * quorum_iovec_compare is written for speed and should be used in the non
- * blkverify mode of quorum.
- */
-static bool quorum_iovec_compare(QEMUIOVector *a, QEMUIOVector *b)
-{
-    int i;
-    int result;
-
-    assert(a->niov == b->niov);
-    for (i = 0; i < a->niov; i++) {
-        assert(a->iov[i].iov_len == b->iov[i].iov_len);
-        result = memcmp(a->iov[i].iov_base,
-                        b->iov[i].iov_base,
-                        a->iov[i].iov_len);
-        if (result) {
-            return false;
-        }
-    }
-
-    return true;
-}
-
-static void GCC_FMT_ATTR(2, 3) quorum_err(QuorumAIOCB *acb,
-                                          const char *fmt, ...)
-{
-    va_list ap;
-
-    va_start(ap, fmt);
-    fprintf(stderr, "quorum: sector_num=%" PRId64 " nb_sectors=%d ",
-            acb->sector_num, acb->nb_sectors);
-    vfprintf(stderr, fmt, ap);
-    fprintf(stderr, "\n");
-    va_end(ap);
-    exit(1);
-}
-
-static bool quorum_compare(QuorumAIOCB *acb,
-                           QEMUIOVector *a,
-                           QEMUIOVector *b)
-{
-    BDRVQuorumState *s = acb->common.bs->opaque;
-    ssize_t offset;
-
-    /* This driver will replace blkverify in this particular case */
-    if (s->is_blkverify) {
-        offset = qemu_iovec_compare(a, b);
-        if (offset != -1) {
-            quorum_err(acb, "contents mismatch in sector %" PRId64,
-                       acb->sector_num +
-                       (uint64_t)(offset / BDRV_SECTOR_SIZE));
-        }
-        return true;
-    }
-
-    return quorum_iovec_compare(a, b);
-}
-
-/* Do a vote to get the error code */
-static int quorum_vote_error(QuorumAIOCB *acb)
-{
-    BDRVQuorumState *s = acb->common.bs->opaque;
-    QuorumVoteVersion *winner = NULL;
-    QuorumVotes error_votes;
-    QuorumVoteValue result_value;
-    int i, ret = 0;
-    bool error = false;
-
-    QLIST_INIT(&error_votes.vote_list);
-    error_votes.compare = quorum_64bits_compare;
-
-    for (i = 0; i < s->num_children; i++) {
-        ret = acb->qcrs[i].ret;
-        if (ret) {
-            error = true;
-            result_value.l = ret;
-            quorum_count_vote(&error_votes, &result_value, i);
-        }
-    }
-
-    if (error) {
-        winner = quorum_get_vote_winner(&error_votes);
-        ret = winner->value.l;
-    }
-
-    quorum_free_vote_list(&error_votes);
-
-    return ret;
-}
-
-static void quorum_vote(QuorumAIOCB *acb)
-{
-    bool quorum = true;
-    int i, j, ret;
-    QuorumVoteValue hash;
-    BDRVQuorumState *s = acb->common.bs->opaque;
-    QuorumVoteVersion *winner;
-
-    if (quorum_has_too_much_io_failed(acb)) {
-        return;
-    }
-
-    /* get the index of the first successful read */
-    for (i = 0; i < s->num_children; i++) {
-        if (!acb->qcrs[i].ret) {
-            break;
-        }
-    }
-
-    assert(i < s->num_children);
-
-    /* compare this read with all other successful reads stopping at quorum
-     * failure
-     */
-    for (j = i + 1; j < s->num_children; j++) {
-        if (acb->qcrs[j].ret) {
-            continue;
-        }
-        quorum = quorum_compare(acb, &acb->qcrs[i].qiov, &acb->qcrs[j].qiov);
-        if (!quorum) {
-            break;
-       }
-    }
-
-    /* Every successful read agrees */
-    if (quorum) {
-        quorum_copy_qiov(acb->qiov, &acb->qcrs[i].qiov);
-        return;
-    }
-
-    /* compute hashes for each successful read, also store indexes */
-    for (i = 0; i < s->num_children; i++) {
-        if (acb->qcrs[i].ret) {
-            continue;
-        }
-        ret = quorum_compute_hash(acb, i, &hash);
-        /* if ever the hash computation failed */
-        if (ret < 0) {
-            acb->vote_ret = ret;
-            goto free_exit;
-        }
-        quorum_count_vote(&acb->votes, &hash, i);
-    }
-
-    /* vote to select the most represented version */
-    winner = quorum_get_vote_winner(&acb->votes);
-
-    /* if the winner count is smaller than threshold the read fails */
-    if (winner->vote_count < s->threshold) {
-        quorum_report_failure(acb);
-        acb->vote_ret = -EIO;
-        goto free_exit;
-    }
-
-    /* we have a winner: copy it */
-    quorum_copy_qiov(acb->qiov, &acb->qcrs[winner->index].qiov);
-
-    /* some versions are bad print them */
-    quorum_report_bad_versions(s, acb, &winner->value);
-
-free_exit:
-    /* free lists */
-    quorum_free_vote_list(&acb->votes);
-}
-
-static BlockDriverAIOCB *quorum_aio_readv(BlockDriverState *bs,
-                                         int64_t sector_num,
-                                         QEMUIOVector *qiov,
-                                         int nb_sectors,
-                                         BlockDriverCompletionFunc *cb,
-                                         void *opaque)
-{
-    BDRVQuorumState *s = bs->opaque;
-    QuorumAIOCB *acb = quorum_aio_get(s, bs, qiov, sector_num,
-                                      nb_sectors, cb, opaque);
-    int i;
-
-    acb->is_read = true;
-
-    for (i = 0; i < s->num_children; i++) {
-        acb->qcrs[i].buf = qemu_blockalign(s->bs[i], qiov->size);
-        qemu_iovec_init(&acb->qcrs[i].qiov, qiov->niov);
-        qemu_iovec_clone(&acb->qcrs[i].qiov, qiov, acb->qcrs[i].buf);
-    }
-
-    for (i = 0; i < s->num_children; i++) {
-        bdrv_aio_readv(s->bs[i], sector_num, &acb->qcrs[i].qiov, nb_sectors,
-                       quorum_aio_cb, &acb->qcrs[i]);
-    }
-
-    return &acb->common;
-}
-
-static BlockDriverAIOCB *quorum_aio_writev(BlockDriverState *bs,
-                                          int64_t sector_num,
-                                          QEMUIOVector *qiov,
-                                          int nb_sectors,
-                                          BlockDriverCompletionFunc *cb,
-                                          void *opaque)
-{
-    BDRVQuorumState *s = bs->opaque;
-    QuorumAIOCB *acb = quorum_aio_get(s, bs, qiov, sector_num, nb_sectors,
-                                      cb, opaque);
-    int i;
-
-    for (i = 0; i < s->num_children; i++) {
-        acb->qcrs[i].aiocb = bdrv_aio_writev(s->bs[i], sector_num, qiov,
-                                             nb_sectors, &quorum_aio_cb,
-                                             &acb->qcrs[i]);
-    }
-
-    return &acb->common;
-}
-
-static int64_t quorum_getlength(BlockDriverState *bs)
-{
-    BDRVQuorumState *s = bs->opaque;
-    int64_t result;
-    int i;
-
-    /* check that all file have the same length */
-    result = bdrv_getlength(s->bs[0]);
-    if (result < 0) {
-        return result;
-    }
-    for (i = 1; i < s->num_children; i++) {
-        int64_t value = bdrv_getlength(s->bs[i]);
-        if (value < 0) {
-            return value;
-        }
-        if (value != result) {
-            return -EIO;
-        }
-    }
-
-    return result;
-}
-
-static void quorum_invalidate_cache(BlockDriverState *bs, Error **errp)
-{
-    BDRVQuorumState *s = bs->opaque;
-    Error *local_err = NULL;
-    int i;
-
-    for (i = 0; i < s->num_children; i++) {
-        bdrv_invalidate_cache(s->bs[i], &local_err);
-        if (local_err) {
-            error_propagate(errp, local_err);
-            return;
-        }
-    }
-}
-
-static coroutine_fn int quorum_co_flush(BlockDriverState *bs)
-{
-    BDRVQuorumState *s = bs->opaque;
-    QuorumVoteVersion *winner = NULL;
-    QuorumVotes error_votes;
-    QuorumVoteValue result_value;
-    int i;
-    int result = 0;
-
-    QLIST_INIT(&error_votes.vote_list);
-    error_votes.compare = quorum_64bits_compare;
-
-    for (i = 0; i < s->num_children; i++) {
-        result = bdrv_co_flush(s->bs[i]);
-        result_value.l = result;
-        quorum_count_vote(&error_votes, &result_value, i);
-    }
-
-    winner = quorum_get_vote_winner(&error_votes);
-    result = winner->value.l;
-
-    quorum_free_vote_list(&error_votes);
-
-    return result;
-}
-
-static bool quorum_recurse_is_first_non_filter(BlockDriverState *bs,
-                                               BlockDriverState *candidate)
-{
-    BDRVQuorumState *s = bs->opaque;
-    int i;
-
-    for (i = 0; i < s->num_children; i++) {
-        bool perm = bdrv_recurse_is_first_non_filter(s->bs[i],
-                                                     candidate);
-        if (perm) {
-            return true;
-        }
-    }
-
-    return false;
-}
-
-static int quorum_valid_threshold(int threshold, int num_children, Error **errp)
-{
-
-    if (threshold < 1) {
-        error_set(errp, QERR_INVALID_PARAMETER_VALUE,
-                  "vote-threshold", "value >= 1");
-        return -ERANGE;
-    }
-
-    if (threshold > num_children) {
-        error_setg(errp, "threshold may not exceed children count");
-        return -ERANGE;
-    }
-
-    return 0;
-}
-
-static QemuOptsList quorum_runtime_opts = {
-    .name = "quorum",
-    .head = QTAILQ_HEAD_INITIALIZER(quorum_runtime_opts.head),
-    .desc = {
-        {
-            .name = QUORUM_OPT_VOTE_THRESHOLD,
-            .type = QEMU_OPT_NUMBER,
-            .help = "The number of vote needed for reaching quorum",
-        },
-        {
-            .name = QUORUM_OPT_BLKVERIFY,
-            .type = QEMU_OPT_BOOL,
-            .help = "Trigger block verify mode if set",
-        },
-        { /* end of list */ }
-    },
-};
-
-static int quorum_open(BlockDriverState *bs, QDict *options, int flags,
-                       Error **errp)
-{
-    BDRVQuorumState *s = bs->opaque;
-    Error *local_err = NULL;
-    QemuOpts *opts;
-    bool *opened;
-    QDict *sub = NULL;
-    QList *list = NULL;
-    const QListEntry *lentry;
-    int i;
-    int ret = 0;
-
-    qdict_flatten(options);
-    qdict_extract_subqdict(options, &sub, "children.");
-    qdict_array_split(sub, &list);
-
-    if (qdict_size(sub)) {
-        error_setg(&local_err, "Invalid option children.%s",
-                   qdict_first(sub)->key);
-        ret = -EINVAL;
-        goto exit;
-    }
-
-    /* count how many different children are present */
-    s->num_children = qlist_size(list);
-    if (s->num_children < 2) {
-        error_setg(&local_err,
-                   "Number of provided children must be greater than 1");
-        ret = -EINVAL;
-        goto exit;
-    }
-
-    opts = qemu_opts_create(&quorum_runtime_opts, NULL, 0, &error_abort);
-    qemu_opts_absorb_qdict(opts, options, &local_err);
-    if (error_is_set(&local_err)) {
-        ret = -EINVAL;
-        goto exit;
-    }
-
-    s->threshold = qemu_opt_get_number(opts, QUORUM_OPT_VOTE_THRESHOLD, 0);
-
-    /* and validate it against s->num_children */
-    ret = quorum_valid_threshold(s->threshold, s->num_children, &local_err);
-    if (ret < 0) {
-        goto exit;
-    }
-
-    /* is the driver in blkverify mode */
-    if (qemu_opt_get_bool(opts, QUORUM_OPT_BLKVERIFY, false) &&
-        s->num_children == 2 && s->threshold == 2) {
-        s->is_blkverify = true;
-    } else if (qemu_opt_get_bool(opts, QUORUM_OPT_BLKVERIFY, false)) {
-        fprintf(stderr, "blkverify mode is set by setting blkverify=on "
-                "and using two files with vote_threshold=2\n");
-    }
-
-    /* allocate the children BlockDriverState array */
-    s->bs = g_new0(BlockDriverState *, s->num_children);
-    opened = g_new0(bool, s->num_children);
-
-    for (i = 0, lentry = qlist_first(list); lentry;
-         lentry = qlist_next(lentry), i++) {
-        QDict *d;
-        QString *string;
-
-        switch (qobject_type(lentry->value))
-        {
-            /* List of options */
-            case QTYPE_QDICT:
-                d = qobject_to_qdict(lentry->value);
-                QINCREF(d);
-                ret = bdrv_open(&s->bs[i], NULL, NULL, d, flags, NULL,
-                                &local_err);
-                break;
-
-            /* QMP reference */
-            case QTYPE_QSTRING:
-                string = qobject_to_qstring(lentry->value);
-                ret = bdrv_open(&s->bs[i], NULL, qstring_get_str(string), NULL,
-                                flags, NULL, &local_err);
-                break;
-
-            default:
-                error_setg(&local_err, "Specification of child block device %i "
-                           "is invalid", i);
-                ret = -EINVAL;
-        }
-
-        if (ret < 0) {
-            goto close_exit;
-        }
-        opened[i] = true;
-    }
-
-    g_free(opened);
-    goto exit;
-
-close_exit:
-    /* cleanup on error */
-    for (i = 0; i < s->num_children; i++) {
-        if (!opened[i]) {
-            continue;
-        }
-        bdrv_unref(s->bs[i]);
-    }
-    g_free(s->bs);
-    g_free(opened);
-exit:
-    /* propagate error */
-    if (error_is_set(&local_err)) {
-        error_propagate(errp, local_err);
-    }
-    QDECREF(list);
-    QDECREF(sub);
-    return ret;
-}
-
-static void quorum_close(BlockDriverState *bs)
-{
-    BDRVQuorumState *s = bs->opaque;
-    int i;
-
-    for (i = 0; i < s->num_children; i++) {
-        bdrv_unref(s->bs[i]);
-    }
-
-    g_free(s->bs);
-}
-
-static BlockDriver bdrv_quorum = {
-    .format_name        = "quorum",
-    .protocol_name      = "quorum",
-
-    .instance_size      = sizeof(BDRVQuorumState),
-
-    .bdrv_file_open     = quorum_open,
-    .bdrv_close         = quorum_close,
-
-    .bdrv_co_flush_to_disk = quorum_co_flush,
-
-    .bdrv_getlength     = quorum_getlength,
-
-    .bdrv_aio_readv     = quorum_aio_readv,
-    .bdrv_aio_writev    = quorum_aio_writev,
-    .bdrv_invalidate_cache = quorum_invalidate_cache,
-
-    .is_filter           = true,
-    .bdrv_recurse_is_first_non_filter = quorum_recurse_is_first_non_filter,
-};
-
-static void bdrv_quorum_init(void)
-{
-    bdrv_register(&bdrv_quorum);
-}
-
-block_init(bdrv_quorum_init);
--- a/block/raw-aio.h
+++ b/block/raw-aio.h
@@ -21,10 +21,9 @@
 #define QEMU_AIO_IOCTL        0x0004
 #define QEMU_AIO_FLUSH        0x0008
 #define QEMU_AIO_DISCARD      0x0010
-#define QEMU_AIO_WRITE_ZEROES 0x0020
 #define QEMU_AIO_TYPE_MASK \
        (QEMU_AIO_READ|QEMU_AIO_WRITE|QEMU_AIO_IOCTL|QEMU_AIO_FLUSH| \
-         QEMU_AIO_DISCARD|QEMU_AIO_WRITE_ZEROES)
+         QEMU_AIO_DISCARD)

 /* AIO flags */
 #define QEMU_AIO_MISALIGNED   0x1000
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -127,8 +127,6 @@ typedef struct BDRVRawState {
    int fd;
    int type;
    int open_flags;
-    size_t buf_align;
-
 #if defined(__linux__)
    /* linux floppy specific */
    int64_t fd_open_time;
@@ -141,11 +139,9 @@ typedef struct BDRVRawState {
    void *aio_ctx;
 #endif
 #ifdef CONFIG_XFS
-    bool is_xfs:1;
+    bool is_xfs : 1;
 #endif
-    bool has_discard:1;
-    bool has_write_zeroes:1;
-    bool discard_zeroes:1;
+    bool has_discard : 1;
 } BDRVRawState;

 typedef struct BDRVRawReopenState {
@@ -215,76 +211,6 @@ static int raw_normalize_devicepath(const char **filename)
 }
 #endif

-static void raw_probe_alignment(BlockDriverState *bs)
-{
-    BDRVRawState *s = bs->opaque;
-    char *buf;
-    unsigned int sector_size;
-
-    /* For /dev/sg devices the alignment is not really used.
-       With buffered I/O, we don't have any restrictions. */
-    if (bs->sg || !(s->open_flags & O_DIRECT)) {
-        bs->request_alignment = 1;
-        s->buf_align = 1;
-        return;
-    }
-
-    /* Try a few ioctls to get the right size */
-    bs->request_alignment = 0;
-    s->buf_align = 0;
-
-#ifdef BLKSSZGET
-    if (ioctl(s->fd, BLKSSZGET, &sector_size) >= 0) {
-        bs->request_alignment = sector_size;
-    }
-#endif
-#ifdef DKIOCGETBLOCKSIZE
-    if (ioctl(s->fd, DKIOCGETBLOCKSIZE, &sector_size) >= 0) {
-        bs->request_alignment = sector_size;
-    }
-#endif
-#ifdef DIOCGSECTORSIZE
-    if (ioctl(s->fd, DIOCGSECTORSIZE, &sector_size) >= 0) {
-        bs->request_alignment = sector_size;
-    }
-#endif
-#ifdef CONFIG_XFS
-    if (s->is_xfs) {
-        struct dioattr da;
-        if (xfsctl(NULL, s->fd, XFS_IOC_DIOINFO, &da) >= 0) {
-            bs->request_alignment = da.d_miniosz;
-            /* The kernel returns wrong information for d_mem */
-            /* s->buf_align = da.d_mem; */
-        }
-    }
-#endif
-
-    /* If we could not get the sizes so far, we can only guess them */
-    if (!s->buf_align) {
-        size_t align;
-        buf = qemu_memalign(MAX_BLOCKSIZE, 2 * MAX_BLOCKSIZE);
-        for (align = 512; align <= MAX_BLOCKSIZE; align <<= 1) {
-            if (pread(s->fd, buf + align, MAX_BLOCKSIZE, 0) >= 0) {
-                s->buf_align = align;
-                break;
-            }
-        }
-        qemu_vfree(buf);
-    }
-
-    if (!bs->request_alignment) {
-        size_t align;
-        buf = qemu_memalign(s->buf_align, MAX_BLOCKSIZE);
-        for (align = 512; align <= MAX_BLOCKSIZE; align <<= 1) {
-            if (pread(s->fd, buf, align, 0) >= 0) {
-                bs->request_alignment = align;
-                break;
-            }
-        }
-        qemu_vfree(buf);
-    }
-}
-
 static void raw_parse_flags(int bdrv_flags, int *open_flags)
 {
    assert(open_flags != NULL);
@@ -336,17 +262,6 @@ error:
 }
 #endif

-static void raw_parse_filename(const char *filename, QDict *options,
-                               Error **errp)
-{
-    /* The filename does not have to be prefixed by the protocol name, since
-     * "file" is the default protocol; therefore, the return value of this
-     * function call can be ignored. */
-    strstart(filename, "file:", &filename);
-
-    qdict_put_obj(options, "filename", QOBJECT(qstring_from_str(filename)));
-}
-
 static QemuOptsList raw_runtime_opts = {
    .name = "raw",
    .head = QTAILQ_HEAD_INITIALIZER(raw_runtime_opts.head),
@@ -361,19 +276,19 @@ static QemuOptsList raw_runtime_opts = {
 };

 static int raw_open_common(BlockDriverState *bs, QDict *options,
-                           int bdrv_flags, int open_flags, Error **errp)
+                           int bdrv_flags, int open_flags)
 {
    BDRVRawState *s = bs->opaque;
    QemuOpts *opts;
    Error *local_err = NULL;
    const char *filename;
    int fd, ret;
-    struct stat st;

-    opts = qemu_opts_create(&raw_runtime_opts, NULL, 0, &error_abort);
+    opts = qemu_opts_create_nofail(&raw_runtime_opts);
    qemu_opts_absorb_qdict(opts, options, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
+    if (error_is_set(&local_err)) {
+        qerror_report_err(local_err);
+        error_free(local_err);
        ret = -EINVAL;
        goto fail;
    }
@@ -382,7 +297,6 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,

    ret = raw_normalize_devicepath(&filename);
    if (ret != 0) {
-        error_setg_errno(errp, -ret, "Could not normalize device path");
        goto fail;
    }

@@ -404,43 +318,14 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
    if (raw_set_aio(&s->aio_ctx, &s->use_aio, bdrv_flags)) {
        qemu_close(fd);
        ret = -errno;
-        error_setg_errno(errp, -ret, "Could not set AIO state");
        goto fail;
    }
 #endif

-    s->has_discard = true;
-    s->has_write_zeroes = true;
-
-    if (fstat(s->fd, &st) < 0) {
-        error_setg_errno(errp, errno, "Could not stat file");
-        goto fail;
-    }
-    if (S_ISREG(st.st_mode)) {
-        s->discard_zeroes = true;
-    }
-    if (S_ISBLK(st.st_mode)) {
-#ifdef BLKDISCARDZEROES
-        unsigned int arg;
-        if (ioctl(s->fd, BLKDISCARDZEROES, &arg) == 0 && arg) {
-            s->discard_zeroes = true;
-        }
-#endif
-#ifdef __linux__
-        /* On Linux 3.10, BLKDISCARD leaves stale data in the page cache.  Do
-         * not rely on the contents of discarded blocks unless using O_DIRECT.
-         * Same for BLKZEROOUT.
-         */
-        if (!(bs->open_flags & BDRV_O_NOCACHE)) {
-            s->discard_zeroes = false;
-            s->has_write_zeroes = false;
-        }
-#endif
-    }
-
+    s->has_discard = 1;
 #ifdef CONFIG_XFS
    if (platform_test_xfs_fd(s->fd)) {
-        s->is_xfs = true;
+        s->is_xfs = 1;
    }
 #endif

@@ -450,19 +335,12 @@ fail:
    return ret;
 }

-static int raw_open(BlockDriverState *bs, QDict *options, int flags,
-                    Error **errp)
+static int raw_open(BlockDriverState *bs, QDict *options, int flags)
 {
    BDRVRawState *s = bs->opaque;
-    Error *local_err = NULL;
-    int ret;

    s->type = FTYPE_FILE;
-    ret = raw_open_common(bs, options, flags, 0, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-    }
-    return ret;
+    return raw_open_common(bs, options, flags, 0);
 }

 static int raw_reopen_prepare(BDRVReopenState *state,
@@ -487,7 +365,6 @@ static int raw_reopen_prepare(BDRVReopenState *state,
     * valid in the 'false' condition even if aio_ctx is set, and raw_set_aio()
     * won't override aio_ctx if aio_ctx is non-NULL */
    if (raw_set_aio(&s->aio_ctx, &raw_s->use_aio, state->flags)) {
-        error_setg(errp, "Could not set AIO state");
        return -1;
    }
 #endif
@@ -539,13 +416,13 @@ static int raw_reopen_prepare(BDRVReopenState *state,
        assert(!(raw_s->open_flags & O_CREAT));
        raw_s->fd = qemu_open(state->bs->filename, raw_s->open_flags);
        if (raw_s->fd == -1) {
-            error_setg_errno(errp, errno, "Could not reopen file");
            ret = -1;
        }
    }
    return ret;
 }

+
 static void raw_reopen_commit(BDRVReopenState *state)
 {
    BDRVRawReopenState *raw_s = state->opaque;
@@ -581,15 +458,23 @@ static void raw_reopen_abort(BDRVReopenState *state)
    state->opaque = NULL;
 }

-static int raw_refresh_limits(BlockDriverState *bs)
-{
-    BDRVRawState *s = bs->opaque;

-    raw_probe_alignment(bs);
-    bs->bl.opt_mem_alignment = s->buf_align;
-
-    return 0;
-}
+/* XXX: use host sector size if necessary with:
+#ifdef DIOCGSECTORSIZE
+        {
+            unsigned int sectorsize = 512;
+            if (!ioctl(fd, DIOCGSECTORSIZE, &sectorsize) &&
+                sectorsize > bufsize)
+                bufsize = sectorsize;
+        }
+#endif
+#ifdef CONFIG_COCOA
+        uint32_t blockSize = 512;
+        if ( !ioctl( fd, DKIOCGETBLOCKSIZE, &blockSize ) && blockSize > bufsize) {
+            bufsize = blockSize;
+        }
+#endif
+*/

 static ssize_t handle_aiocb_ioctl(RawPosixAIOData *aiocb)
 {
@@ -780,23 +665,6 @@ static ssize_t handle_aiocb_rw(RawPosixAIOData *aiocb)
 }

 #ifdef CONFIG_XFS
-static int xfs_write_zeroes(BDRVRawState *s, int64_t offset, uint64_t bytes)
-{
-    struct xfs_flock64 fl;
-
-    memset(&fl, 0, sizeof(fl));
-    fl.l_whence = SEEK_SET;
-    fl.l_start = offset;
-    fl.l_len = bytes;
-
-    if (xfsctl(NULL, s->fd, XFS_IOC_ZERO_RANGE, &fl) < 0) {
-        DEBUG_BLOCK_PRINT("cannot write zero range (%s)\n", strerror(errno));
-        return -errno;
-    }
-
-    return 0;
-}
-
 static int xfs_discard(BDRVRawState *s, int64_t offset, uint64_t bytes)
 {
    struct xfs_flock64 fl;
@@ -815,49 +683,13 @@ static int xfs_discard(BDRVRawState *s, int64_t offset, uint64_t bytes)
 }
 #endif

-static ssize_t handle_aiocb_write_zeroes(RawPosixAIOData *aiocb)
-{
-    int ret = -EOPNOTSUPP;
-    BDRVRawState *s = aiocb->bs->opaque;
-
-    if (s->has_write_zeroes == 0) {
-        return -ENOTSUP;
-    }
-
-    if (aiocb->aio_type & QEMU_AIO_BLKDEV) {
-#ifdef BLKZEROOUT
-        do {
-            uint64_t range[2] = { aiocb->aio_offset, aiocb->aio_nbytes };
-            if (ioctl(aiocb->aio_fildes, BLKZEROOUT, range) == 0) {
-                return 0;
-            }
-        } while (errno == EINTR);
-
-        ret = -errno;
-#endif
-    } else {
-#ifdef CONFIG_XFS
-        if (s->is_xfs) {
-            return xfs_write_zeroes(s, aiocb->aio_offset, aiocb->aio_nbytes);
-        }
-#endif
-    }
-
-    if (ret == -ENODEV || ret == -ENOSYS || ret == -EOPNOTSUPP ||
-        ret == -ENOTTY) {
-        s->has_write_zeroes = false;
-        ret = -ENOTSUP;
-    }
-    return ret;
-}
-
 static ssize_t handle_aiocb_discard(RawPosixAIOData *aiocb)
 {
    int ret = -EOPNOTSUPP;
    BDRVRawState *s = aiocb->bs->opaque;

-    if (!s->has_discard) {
-        return -ENOTSUP;
+    if (s->has_discard == 0) {
+        return 0;
    }

    if (aiocb->aio_type & QEMU_AIO_BLKDEV) {
@@ -892,8 +724,8 @@ static ssize_t handle_aiocb_discard(RawPosixAIOData *aiocb)

    if (ret == -ENODEV || ret == -ENOSYS || ret == -EOPNOTSUPP ||
        ret == -ENOTTY) {
-        s->has_discard = false;
-        ret = -ENOTSUP;
+        s->has_discard = 0;
+        ret = 0;
    }
    return ret;
 }
@@ -935,9 +767,6 @@ static int aio_worker(void *arg)
    case QEMU_AIO_DISCARD:
        ret = handle_aiocb_discard(aiocb);
        break;
-    case QEMU_AIO_WRITE_ZEROES:
-        ret = handle_aiocb_write_zeroes(aiocb);
-        break;
    default:
        fprintf(stderr, "invalid aio request (0x%x)\n", aiocb->aio_type);
        ret = -EINVAL;
@@ -948,29 +777,6 @@ static int aio_worker(void *arg)
    return ret;
 }

-static int paio_submit_co(BlockDriverState *bs, int fd,
-        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        int type)
-{
-    RawPosixAIOData *acb = g_slice_new(RawPosixAIOData);
-    ThreadPool *pool;
-
-    acb->bs = bs;
-    acb->aio_type = type;
-    acb->aio_fildes = fd;
-
-    if (qiov) {
-        acb->aio_iov = qiov->iov;
-        acb->aio_niov = qiov->niov;
-    }
-    acb->aio_nbytes = nb_sectors * 512;
-    acb->aio_offset = sector_num * 512;
-
-    trace_paio_submit_co(sector_num, nb_sectors, type);
-    pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
-    return thread_pool_submit_co(pool, aio_worker, acb);
-}
-
 static BlockDriverAIOCB *paio_submit(BlockDriverState *bs, int fd,
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
        BlockDriverCompletionFunc *cb, void *opaque, int type)
@@ -1234,15 +1040,12 @@ static int64_t raw_get_allocated_file_size(BlockDriverState *bs)
    return (int64_t)st.st_blocks * 512;
 }

-static int raw_create(const char *filename, QEMUOptionParameter *options,
-                      Error **errp)
+static int raw_create(const char *filename, QEMUOptionParameter *options)
 {
    int fd;
    int result = 0;
    int64_t total_size = 0;

-    strstart(filename, "file:", &filename);
-
    /* Read out options */
    while (options && options->name) {
        if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
@@ -1255,15 +1058,12 @@ static int raw_create(const char *filename, QEMUOptionParameter *options,
                   0644);
    if (fd < 0) {
        result = -errno;
-        error_setg_errno(errp, -result, "Could not create file");
    } else {
        if (ftruncate(fd, total_size * BDRV_SECTOR_SIZE) != 0) {
            result = -errno;
-            error_setg_errno(errp, -result, "Could not resize file");
        }
        if (qemu_close(fd) != 0) {
            result = -errno;
-            error_setg_errno(errp, -result, "Could not close the new file");
        }
    }
    return result;
@@ -1284,12 +1084,12 @@ static int raw_create(const char *filename, QEMUOptionParameter *options,
 * 'nb_sectors' is the max value 'pnum' should be set to.  If nb_sectors goes
 * beyond the end of the disk image it will be clamped.
 */
-static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs,
+static int coroutine_fn raw_co_is_allocated(BlockDriverState *bs,
                                            int64_t sector_num,
                                            int nb_sectors, int *pnum)
 {
    off_t start, data, hole;
-    int64_t ret;
+    int ret;

    ret = fd_open(bs);
    if (ret < 0) {
@@ -1297,7 +1097,6 @@ static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs,
    }

    start = sector_num * BDRV_SECTOR_SIZE;
-    ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | start;

 #ifdef CONFIG_FIEMAP

@@ -1315,7 +1114,7 @@ static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs,
    if (ioctl(s->fd, FS_IOC_FIEMAP, &f) == -1) {
        /* Assume everything is allocated.  */
        *pnum = nb_sectors;
-        return ret;
+        return 1;
    }

    if (f.fm.fm_mapped_extents == 0) {
@@ -1328,9 +1127,6 @@ static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs,
    } else {
        data = f.fe.fe_logical;
        hole = f.fe.fe_logical + f.fe.fe_length;
-        if (f.fe.fe_flags & FIEMAP_EXTENT_UNWRITTEN) {
-            ret |= BDRV_BLOCK_ZERO;
-        }
    }

 #elif defined SEEK_HOLE && defined SEEK_DATA
@@ -1345,7 +1141,7 @@ static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs,

        /* Most likely EINVAL.  Assume everything is allocated.  */
        *pnum = nb_sectors;
-        return ret;
+        return 1;
    }

    if (hole > start) {
@@ -1358,21 +1154,19 @@ static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs,
        }
    }
 #else
-    data = 0;
-    hole = start + nb_sectors * BDRV_SECTOR_SIZE;
+    *pnum = nb_sectors;
+    return 1;
 #endif

    if (data <= start) {
        /* On a data extent, compute sectors to the end of the extent.  */
        *pnum = MIN(nb_sectors, (hole - start) / BDRV_SECTOR_SIZE);
+        return 1;
    } else {
        /* On a hole, compute sectors to the beginning of the next extent.  */
        *pnum = MIN(nb_sectors, (data - start) / BDRV_SECTOR_SIZE);
-        ret &= ~BDRV_BLOCK_DATA;
-        ret |= BDRV_BLOCK_ZERO;
+        return 0;
    }
-
-    return ret;
 }

 static coroutine_fn BlockDriverAIOCB *raw_aio_discard(BlockDriverState *bs,
@@ -1385,31 +1179,6 @@ static coroutine_fn BlockDriverAIOCB *raw_aio_discard(BlockDriverState *bs,
                       cb, opaque, QEMU_AIO_DISCARD);
 }

-static int coroutine_fn raw_co_write_zeroes(
-    BlockDriverState *bs, int64_t sector_num,
-    int nb_sectors, BdrvRequestFlags flags)
-{
-    BDRVRawState *s = bs->opaque;
-
-    if (!(flags & BDRV_REQ_MAY_UNMAP)) {
-        return paio_submit_co(bs, s->fd, sector_num, NULL, nb_sectors,
-                              QEMU_AIO_WRITE_ZEROES);
-    } else if (s->discard_zeroes) {
-        return paio_submit_co(bs, s->fd, sector_num, NULL, nb_sectors,
-                              QEMU_AIO_DISCARD);
-    }
-    return -ENOTSUP;
-}
-
-static int raw_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
-{
-    BDRVRawState *s = bs->opaque;
-
-    bdi->unallocated_blocks_are_zero = s->discard_zeroes;
-    bdi->can_write_zeroes_with_unmap = s->discard_zeroes;
-    return 0;
-}
-
 static QEMUOptionParameter raw_create_options[] = {
    {
        .name = BLOCK_OPT_SIZE,
@@ -1423,9 +1192,7 @@ static BlockDriver bdrv_file = {
    .format_name = "file",
    .protocol_name = "file",
    .instance_size = sizeof(BDRVRawState),
-    .bdrv_needs_filename = true,
    .bdrv_probe = NULL, /* no probe for protocols */
-    .bdrv_parse_filename = raw_parse_filename,
    .bdrv_file_open = raw_open,
    .bdrv_reopen_prepare = raw_reopen_prepare,
    .bdrv_reopen_commit = raw_reopen_commit,
@@ -1433,18 +1200,15 @@ static BlockDriver bdrv_file = {
    .bdrv_close = raw_close,
    .bdrv_create = raw_create,
    .bdrv_has_zero_init = bdrv_has_zero_init_1,
-    .bdrv_co_get_block_status = raw_co_get_block_status,
-    .bdrv_co_write_zeroes = raw_co_write_zeroes,
+    .bdrv_co_is_allocated = raw_co_is_allocated,

    .bdrv_aio_readv = raw_aio_readv,
    .bdrv_aio_writev = raw_aio_writev,
    .bdrv_aio_flush = raw_aio_flush,
    .bdrv_aio_discard = raw_aio_discard,
-    .bdrv_refresh_limits = raw_refresh_limits,

    .bdrv_truncate = raw_truncate,
    .bdrv_getlength = raw_getlength,
-    .bdrv_get_info = raw_get_info,
    .bdrv_get_allocated_file_size
                        = raw_get_allocated_file_size,

@@ -1561,20 +1325,9 @@ static int check_hdev_writable(BDRVRawState *s)
    return 0;
 }

-static void hdev_parse_filename(const char *filename, QDict *options,
-                                Error **errp)
-{
-    /* The prefix is optional, just as for "file". */
-    strstart(filename, "host_device:", &filename);
-
-    qdict_put_obj(options, "filename", QOBJECT(qstring_from_str(filename)));
-}
-
-static int hdev_open(BlockDriverState *bs, QDict *options, int flags,
-                     Error **errp)
+static int hdev_open(BlockDriverState *bs, QDict *options, int flags)
 {
    BDRVRawState *s = bs->opaque;
-    Error *local_err = NULL;
    int ret;
    const char *filename = qdict_get_str(options, "filename");

@@ -1618,11 +1371,8 @@ static int hdev_open(BlockDriverState *bs, QDict *options, int flags,
    }
 #endif

-    ret = raw_open_common(bs, options, flags, 0, &local_err);
+    ret = raw_open_common(bs, options, flags, 0);
    if (ret < 0) {
-        if (local_err) {
-            error_propagate(errp, local_err);
-        }
        return ret;
    }

@@ -1630,7 +1380,6 @@ static int hdev_open(BlockDriverState *bs, QDict *options, int flags,
        ret = check_hdev_writable(s);
        if (ret < 0) {
            raw_close(bs);
-            error_setg_errno(errp, -ret, "The device is not writable");
            return ret;
        }
    }
@@ -1749,45 +1498,12 @@ static coroutine_fn BlockDriverAIOCB *hdev_aio_discard(BlockDriverState *bs,
                       cb, opaque, QEMU_AIO_DISCARD|QEMU_AIO_BLKDEV);
 }

-static coroutine_fn int hdev_co_write_zeroes(BlockDriverState *bs,
-    int64_t sector_num, int nb_sectors, BdrvRequestFlags flags)
-{
-    BDRVRawState *s = bs->opaque;
-    int rc;
-
-    rc = fd_open(bs);
-    if (rc < 0) {
-        return rc;
-    }
-    if (!(flags & BDRV_REQ_MAY_UNMAP)) {
-        return paio_submit_co(bs, s->fd, sector_num, NULL, nb_sectors,
-                              QEMU_AIO_WRITE_ZEROES|QEMU_AIO_BLKDEV);
-    } else if (s->discard_zeroes) {
-        return paio_submit_co(bs, s->fd, sector_num, NULL, nb_sectors,
-                              QEMU_AIO_DISCARD|QEMU_AIO_BLKDEV);
-    }
-    return -ENOTSUP;
-}
-
-static int hdev_create(const char *filename, QEMUOptionParameter *options,
-                       Error **errp)
+static int hdev_create(const char *filename, QEMUOptionParameter *options)
 {
    int fd;
    int ret = 0;
    struct stat stat_buf;
    int64_t total_size = 0;
-    bool has_prefix;
-
-    /* This function is used by all three protocol block drivers and therefore
-     * any of these three prefixes may be given.
-     * The return value has to be stored somewhere, otherwise this is an error
-     * due to -Werror=unused-value. */
-    has_prefix =
-        strstart(filename, "host_device:", &filename) ||
-        strstart(filename, "host_cdrom:" , &filename) ||
-        strstart(filename, "host_floppy:", &filename);
-
-    (void)has_prefix;

    /* Read out options */
    while (options && options->name) {
@@ -1798,23 +1514,15 @@ static int hdev_create(const char *filename, QEMUOptionParameter *options,
    }

    fd = qemu_open(filename, O_WRONLY | O_BINARY);
-    if (fd < 0) {
-        ret = -errno;
-        error_setg_errno(errp, -ret, "Could not open device");
-        return ret;
-    }
+    if (fd < 0)
+        return -errno;

-    if (fstat(fd, &stat_buf) < 0) {
+    if (fstat(fd, &stat_buf) < 0)
        ret = -errno;
-        error_setg_errno(errp, -ret, "Could not stat device");
-    } else if (!S_ISBLK(stat_buf.st_mode) && !S_ISCHR(stat_buf.st_mode)) {
-        error_setg(errp,
-                   "The given file is neither a block nor a character device");
+    else if (!S_ISBLK(stat_buf.st_mode) && !S_ISCHR(stat_buf.st_mode))
        ret = -ENODEV;
-    } else if (lseek(fd, 0, SEEK_END) < total_size * BDRV_SECTOR_SIZE) {
-        error_setg(errp, "Device is too small");
+    else if (lseek(fd, 0, SEEK_END) < total_size * BDRV_SECTOR_SIZE)
        ret = -ENOSPC;
-    }

    qemu_close(fd);
    return ret;
@@ -1824,9 +1532,7 @@ static BlockDriver bdrv_host_device = {
    .format_name        = "host_device",
    .protocol_name        = "host_device",
    .instance_size      = sizeof(BDRVRawState),
-    .bdrv_needs_filename = true,
    .bdrv_probe_device  = hdev_probe_device,
-    .bdrv_parse_filename = hdev_parse_filename,
    .bdrv_file_open     = hdev_open,
    .bdrv_close         = raw_close,
    .bdrv_reopen_prepare = raw_reopen_prepare,
@@ -1834,17 +1540,14 @@ static BlockDriver bdrv_host_device = {
    .bdrv_reopen_abort   = raw_reopen_abort,
    .bdrv_create        = hdev_create,
    .create_options     = raw_create_options,
-    .bdrv_co_write_zeroes = hdev_co_write_zeroes,

    .bdrv_aio_readv	= raw_aio_readv,
    .bdrv_aio_writev	= raw_aio_writev,
    .bdrv_aio_flush	= raw_aio_flush,
    .bdrv_aio_discard   = hdev_aio_discard,
-    .bdrv_refresh_limits = raw_refresh_limits,

    .bdrv_truncate      = raw_truncate,
    .bdrv_getlength	= raw_getlength,
-    .bdrv_get_info = raw_get_info,
    .bdrv_get_allocated_file_size
                        = raw_get_allocated_file_size,

@@ -1856,32 +1559,17 @@ static BlockDriver bdrv_host_device = {
 };

 #ifdef __linux__
-static void floppy_parse_filename(const char *filename, QDict *options,
-                                  Error **errp)
-{
-    /* The prefix is optional, just as for "file". */
-    strstart(filename, "host_floppy:", &filename);
-
-    qdict_put_obj(options, "filename", QOBJECT(qstring_from_str(filename)));
-}
-
-static int floppy_open(BlockDriverState *bs, QDict *options, int flags,
-                       Error **errp)
+static int floppy_open(BlockDriverState *bs, QDict *options, int flags)
 {
    BDRVRawState *s = bs->opaque;
-    Error *local_err = NULL;
    int ret;

    s->type = FTYPE_FD;

    /* open will not fail even if no floppy is inserted, so add O_NONBLOCK */
-    ret = raw_open_common(bs, options, flags, O_NONBLOCK, &local_err);
-    if (ret) {
-        if (local_err) {
-            error_propagate(errp, local_err);
-        }
+    ret = raw_open_common(bs, options, flags, O_NONBLOCK);
+    if (ret)
        return ret;
-    }

    /* close fd so that we can reopen it as needed */
    qemu_close(s->fd);
@@ -1968,9 +1656,7 @@ static BlockDriver bdrv_host_floppy = {
    .format_name        = "host_floppy",
    .protocol_name      = "host_floppy",
    .instance_size      = sizeof(BDRVRawState),
-    .bdrv_needs_filename = true,
    .bdrv_probe_device	= floppy_probe_device,
-    .bdrv_parse_filename = floppy_parse_filename,
    .bdrv_file_open     = floppy_open,
    .bdrv_close         = raw_close,
    .bdrv_reopen_prepare = raw_reopen_prepare,
@@ -1982,11 +1668,9 @@ static BlockDriver bdrv_host_floppy = {
    .bdrv_aio_readv     = raw_aio_readv,
    .bdrv_aio_writev    = raw_aio_writev,
    .bdrv_aio_flush	= raw_aio_flush,
-    .bdrv_refresh_limits = raw_refresh_limits,

    .bdrv_truncate      = raw_truncate,
-    .bdrv_getlength      = raw_getlength,
-    .has_variable_length = true,
+    .bdrv_getlength	= raw_getlength,
    .bdrv_get_allocated_file_size
                        = raw_get_allocated_file_size,

@@ -1995,35 +1679,15 @@ static BlockDriver bdrv_host_floppy = {
    .bdrv_media_changed = floppy_media_changed,
    .bdrv_eject         = floppy_eject,
 };
-#endif

-#if defined(__linux__) || defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
-static void cdrom_parse_filename(const char *filename, QDict *options,
-                                 Error **errp)
-{
-    /* The prefix is optional, just as for "file". */
-    strstart(filename, "host_cdrom:", &filename);
-
-    qdict_put_obj(options, "filename", QOBJECT(qstring_from_str(filename)));
-}
-#endif
-
-#ifdef __linux__
-static int cdrom_open(BlockDriverState *bs, QDict *options, int flags,
-                      Error **errp)
+static int cdrom_open(BlockDriverState *bs, QDict *options, int flags)
 {
    BDRVRawState *s = bs->opaque;
-    Error *local_err = NULL;
-    int ret;

    s->type = FTYPE_CD;

    /* open will not fail even if no CD is inserted, so add O_NONBLOCK */
-    ret = raw_open_common(bs, options, flags, O_NONBLOCK, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-    }
-    return ret;
+    return raw_open_common(bs, options, flags, O_NONBLOCK);
 }

 static int cdrom_probe_device(const char *filename)
@@ -2093,9 +1757,7 @@ static BlockDriver bdrv_host_cdrom = {
    .format_name        = "host_cdrom",
    .protocol_name      = "host_cdrom",
    .instance_size      = sizeof(BDRVRawState),
-    .bdrv_needs_filename = true,
    .bdrv_probe_device	= cdrom_probe_device,
-    .bdrv_parse_filename = cdrom_parse_filename,
    .bdrv_file_open     = cdrom_open,
    .bdrv_close         = raw_close,
    .bdrv_reopen_prepare = raw_reopen_prepare,
@@ -2107,11 +1769,9 @@ static BlockDriver bdrv_host_cdrom = {
    .bdrv_aio_readv     = raw_aio_readv,
    .bdrv_aio_writev    = raw_aio_writev,
    .bdrv_aio_flush	= raw_aio_flush,
-    .bdrv_refresh_limits = raw_refresh_limits,

    .bdrv_truncate      = raw_truncate,
-    .bdrv_getlength      = raw_getlength,
-    .has_variable_length = true,
+    .bdrv_getlength     = raw_getlength,
    .bdrv_get_allocated_file_size
                        = raw_get_allocated_file_size,

@@ -2127,22 +1787,16 @@ static BlockDriver bdrv_host_cdrom = {
 #endif /* __linux__ */

 #if defined (__FreeBSD__) || defined(__FreeBSD_kernel__)
-static int cdrom_open(BlockDriverState *bs, QDict *options, int flags,
-                      Error **errp)
+static int cdrom_open(BlockDriverState *bs, QDict *options, int flags)
 {
    BDRVRawState *s = bs->opaque;
-    Error *local_err = NULL;
    int ret;

    s->type = FTYPE_CD;

-    ret = raw_open_common(bs, options, flags, 0, &local_err);
-    if (ret) {
-        if (local_err) {
-            error_propagate(errp, local_err);
-        }
+    ret = raw_open_common(bs, options, flags, 0);
+    if (ret)
        return ret;
-    }

    /* make sure the door isn't locked at this time */
    ioctl(s->fd, CDIOCALLOW);
@@ -2224,9 +1878,7 @@ static BlockDriver bdrv_host_cdrom = {
    .format_name        = "host_cdrom",
    .protocol_name      = "host_cdrom",
    .instance_size      = sizeof(BDRVRawState),
-    .bdrv_needs_filename = true,
    .bdrv_probe_device	= cdrom_probe_device,
-    .bdrv_parse_filename = cdrom_parse_filename,
    .bdrv_file_open     = cdrom_open,
    .bdrv_close         = raw_close,
    .bdrv_reopen_prepare = raw_reopen_prepare,
@@ -2238,11 +1890,9 @@ static BlockDriver bdrv_host_cdrom = {
    .bdrv_aio_readv     = raw_aio_readv,
    .bdrv_aio_writev    = raw_aio_writev,
    .bdrv_aio_flush	= raw_aio_flush,
-    .bdrv_refresh_limits = raw_refresh_limits,

    .bdrv_truncate      = raw_truncate,
-    .bdrv_getlength      = raw_getlength,
-    .has_variable_length = true,
+    .bdrv_getlength     = raw_getlength,
    .bdrv_get_allocated_file_size
                        = raw_get_allocated_file_size,

--- a/block/raw-win32.c
+++ b/block/raw-win32.c
@@ -85,7 +85,6 @@ static size_t handle_aiocb_rw(RawWin32AIOData *aiocb)
            ret_count = 0;
        }
        if (ret_count != len) {
-            offset += ret_count;
            break;
        }
        offset += len;
@@ -202,35 +201,6 @@ static int set_sparse(int fd)
 				 NULL, 0, NULL, 0, &returned, NULL);
 }

-static void raw_probe_alignment(BlockDriverState *bs)
-{
-    BDRVRawState *s = bs->opaque;
-    DWORD sectorsPerCluster, freeClusters, totalClusters, count;
-    DISK_GEOMETRY_EX dg;
-    BOOL status;
-
-    if (s->type == FTYPE_CD) {
-        bs->request_alignment = 2048;
-        return;
-    }
-    if (s->type == FTYPE_HARDDISK) {
-        status = DeviceIoControl(s->hfile, IOCTL_DISK_GET_DRIVE_GEOMETRY_EX,
-                                 NULL, 0, &dg, sizeof(dg), &count, NULL);
-        if (status != 0) {
-            bs->request_alignment = dg.Geometry.BytesPerSector;
-            return;
-        }
-        /* try GetDiskFreeSpace too */
-    }
-
-    if (s->drive_path[0]) {
-        GetDiskFreeSpace(s->drive_path, &sectorsPerCluster,
-                         &dg.Geometry.BytesPerSector,
-                         &freeClusters, &totalClusters);
-        bs->request_alignment = dg.Geometry.BytesPerSector;
-    }
-}
-
 static void raw_parse_flags(int flags, int *access_flags, DWORD *overlapped)
 {
    assert(access_flags != NULL);
@@ -251,17 +221,6 @@ static void raw_parse_flags(int flags, int *access_flags, DWORD *overlapped)
    }
 }

-static void raw_parse_filename(const char *filename, QDict *options,
-                               Error **errp)
-{
-    /* The filename does not have to be prefixed by the protocol name, since
-     * "file" is the default protocol; therefore, the return value of this
-     * function call can be ignored. */
-    strstart(filename, "file:", &filename);
-
-    qdict_put_obj(options, "filename", QOBJECT(qstring_from_str(filename)));
-}
-
 static QemuOptsList raw_runtime_opts = {
    .name = "raw",
    .head = QTAILQ_HEAD_INITIALIZER(raw_runtime_opts.head),
@@ -275,8 +234,7 @@ static QemuOptsList raw_runtime_opts = {
    },
 };

-static int raw_open(BlockDriverState *bs, QDict *options, int flags,
-                    Error **errp)
+static int raw_open(BlockDriverState *bs, QDict *options, int flags)
 {
    BDRVRawState *s = bs->opaque;
    int access_flags;
@@ -288,10 +246,11 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,

    s->type = FTYPE_FILE;

-    opts = qemu_opts_create(&raw_runtime_opts, NULL, 0, &error_abort);
+    opts = qemu_opts_create_nofail(&raw_runtime_opts);
    qemu_opts_absorb_qdict(opts, options, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
+    if (error_is_set(&local_err)) {
+        qerror_report_err(local_err);
+        error_free(local_err);
        ret = -EINVAL;
        goto fail;
    }
@@ -303,23 +262,11 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,
    if ((flags & BDRV_O_NATIVE_AIO) && aio == NULL) {
        aio = win32_aio_init();
        if (aio == NULL) {
-            error_setg(errp, "Could not initialize AIO");
            ret = -EINVAL;
            goto fail;
        }
    }

-    if (filename[0] && filename[1] == ':') {
-        snprintf(s->drive_path, sizeof(s->drive_path), "%c:\\", filename[0]);
-    } else if (filename[0] == '\\' && filename[1] == '\\') {
-        s->drive_path[0] = 0;
-    } else {
-        /* Relative path.  */
-        char buf[MAX_PATH];
-        GetCurrentDirectory(MAX_PATH, buf);
-        snprintf(s->drive_path, sizeof(s->drive_path), "%c:\\", buf[0]);
-    }
-
    s->hfile = CreateFile(filename, access_flags,
                          FILE_SHARE_READ, NULL,
                          OPEN_EXISTING, overlapped, NULL);
@@ -338,13 +285,11 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,
        ret = win32_aio_attach(aio, s->hfile);
        if (ret < 0) {
            CloseHandle(s->hfile);
-            error_setg_errno(errp, -ret, "Could not enable AIO");
            goto fail;
        }
        s->aio = aio;
    }

-    raw_probe_alignment(bs);
    ret = 0;
 fail:
    qemu_opts_del(opts);
@@ -475,14 +420,11 @@ static int64_t raw_get_allocated_file_size(BlockDriverState *bs)
    return st.st_size;
 }

-static int raw_create(const char *filename, QEMUOptionParameter *options,
-                      Error **errp)
+static int raw_create(const char *filename, QEMUOptionParameter *options)
 {
    int fd;
    int64_t total_size = 0;

-    strstart(filename, "file:", &filename);
-
    /* Read out options */
    while (options && options->name) {
        if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
@@ -493,10 +435,8 @@ static int raw_create(const char *filename, QEMUOptionParameter *options,

    fd = qemu_open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY,
                   0644);
-    if (fd < 0) {
-        error_setg_errno(errp, errno, "Could not create file");
+    if (fd < 0)
        return -EIO;
-    }
    set_sparse(fd);
    ftruncate(fd, total_size * 512);
    qemu_close(fd);
@@ -516,8 +456,6 @@ static BlockDriver bdrv_file = {
    .format_name	= "file",
    .protocol_name	= "file",
    .instance_size	= sizeof(BDRVRawState),
-    .bdrv_needs_filename = true,
-    .bdrv_parse_filename = raw_parse_filename,
    .bdrv_file_open	= raw_open,
    .bdrv_close		= raw_close,
    .bdrv_create	= raw_create,
@@ -593,17 +531,7 @@ static int hdev_probe_device(const char *filename)
    return 0;
 }

-static void hdev_parse_filename(const char *filename, QDict *options,
-                                Error **errp)
-{
-    /* The prefix is optional, just as for "file". */
-    strstart(filename, "host_device:", &filename);
-
-    qdict_put_obj(options, "filename", QOBJECT(qstring_from_str(filename)));
-}
-
-static int hdev_open(BlockDriverState *bs, QDict *options, int flags,
-                     Error **errp)
+static int hdev_open(BlockDriverState *bs, QDict *options, int flags)
 {
    BDRVRawState *s = bs->opaque;
    int access_flags, create_flags;
@@ -614,11 +542,11 @@ static int hdev_open(BlockDriverState *bs, QDict *options, int flags,
    Error *local_err = NULL;
    const char *filename;

-    QemuOpts *opts = qemu_opts_create(&raw_runtime_opts, NULL, 0,
-                                      &error_abort);
+    QemuOpts *opts = qemu_opts_create_nofail(&raw_runtime_opts);
    qemu_opts_absorb_qdict(opts, options, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
+    if (error_is_set(&local_err)) {
+        qerror_report_err(local_err);
+        error_free(local_err);
        ret = -EINVAL;
        goto done;
    }
@@ -627,7 +555,6 @@ static int hdev_open(BlockDriverState *bs, QDict *options, int flags,

    if (strstart(filename, "/dev/cdrom", NULL)) {
        if (find_cdrom(device_name, sizeof(device_name)) < 0) {
-            error_setg(errp, "Could not open CD-ROM drive");
            ret = -ENOENT;
            goto done;
        }
@@ -656,9 +583,8 @@ static int hdev_open(BlockDriverState *bs, QDict *options, int flags,
        if (err == ERROR_ACCESS_DENIED) {
            ret = -EACCES;
        } else {
-            ret = -EINVAL;
+            ret = -1;
        }
-        error_setg_errno(errp, -ret, "Could not open device");
        goto done;
    }

@@ -671,8 +597,6 @@ static BlockDriver bdrv_host_device = {
    .format_name	= "host_device",
    .protocol_name	= "host_device",
    .instance_size	= sizeof(BDRVRawState),
-    .bdrv_needs_filename = true,
-    .bdrv_parse_filename = hdev_parse_filename,
    .bdrv_probe_device	= hdev_probe_device,
    .bdrv_file_open	= hdev_open,
    .bdrv_close		= raw_close,
@@ -681,9 +605,7 @@ static BlockDriver bdrv_host_device = {
    .bdrv_aio_writev    = raw_aio_writev,
    .bdrv_aio_flush     = raw_aio_flush,

-    .bdrv_getlength      = raw_getlength,
-    .has_variable_length = true,
-
+    .bdrv_getlength	= raw_getlength,
    .bdrv_get_allocated_file_size
                        = raw_get_allocated_file_size,
 };
--- a/block/raw_bsd.c
+++ b/block/raw_bsd.c
@@ -1,17 +1,13 @@
-/* BlockDriver implementation for "raw"
+/*
+ * Block driver for RAW format
 *
- * Copyright (C) 2010, 2013, Red Hat, Inc.
- * Copyright (C) 2010, Blue Swirl <blauwirbel@gmail.com>
- * Copyright (C) 2009, Anthony Liguori <aliguori@us.ibm.com>
- *
- * Author:
- *   Laszlo Ersek <lersek@redhat.com>
+ * Copyright (c) 2006 Fabrice Bellard
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
@@ -19,27 +15,27 @@
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
 */

+#include "qemu-common.h"
 #include "block/block_int.h"
-#include "qemu/option.h"
+#include "qemu/module.h"

-static QEMUOptionParameter raw_create_options[] = {
-    {
-        .name = BLOCK_OPT_SIZE,
-        .type = OPT_SIZE,
-        .help = "Virtual disk size"
-    },
-    { 0 }
-};
+static int raw_open(BlockDriverState *bs, QDict *options, int flags)
+{
+    bs->sg = bs->file->sg;
+    return 0;
+}

-static int raw_reopen_prepare(BDRVReopenState *reopen_state,
-                              BlockReopenQueue *queue, Error **errp)
+/* We have nothing to do for raw reopen, stubs just return
+ * success */
+static int raw_reopen_prepare(BDRVReopenState *state,
+                              BlockReopenQueue *queue,  Error **errp)
 {
    return 0;
 }
@@ -58,26 +54,22 @@ static int coroutine_fn raw_co_writev(BlockDriverState *bs, int64_t sector_num,
    return bdrv_co_writev(bs->file, sector_num, nb_sectors, qiov);
 }

-static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs,
+static void raw_close(BlockDriverState *bs)
+{
+}
+
+static int coroutine_fn raw_co_is_allocated(BlockDriverState *bs,
                                            int64_t sector_num,
                                            int nb_sectors, int *pnum)
 {
-    *pnum = nb_sectors;
-    return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID | BDRV_BLOCK_DATA |
-           (sector_num << BDRV_SECTOR_BITS);
+    return bdrv_co_is_allocated(bs->file, sector_num, nb_sectors, pnum);
 }

 static int coroutine_fn raw_co_write_zeroes(BlockDriverState *bs,
-                                            int64_t sector_num, int nb_sectors,
-                                            BdrvRequestFlags flags)
+                                            int64_t sector_num,
+                                            int nb_sectors)
 {
-    return bdrv_co_write_zeroes(bs->file, sector_num, nb_sectors, flags);
-}
-
-static int coroutine_fn raw_co_discard(BlockDriverState *bs,
-                                       int64_t sector_num, int nb_sectors)
-{
-    return bdrv_co_discard(bs->file, sector_num, nb_sectors);
+    return bdrv_co_write_zeroes(bs->file, sector_num, nb_sectors);
 }

 static int64_t raw_getlength(BlockDriverState *bs)
@@ -85,22 +77,22 @@ static int64_t raw_getlength(BlockDriverState *bs)
    return bdrv_getlength(bs->file);
 }

-static int raw_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
-{
-    return bdrv_get_info(bs->file, bdi);
-}
-
-static int raw_refresh_limits(BlockDriverState *bs)
-{
-    bs->bl = bs->file->bl;
-    return 0;
-}
-
 static int raw_truncate(BlockDriverState *bs, int64_t offset)
 {
    return bdrv_truncate(bs->file, offset);
 }

+static int raw_probe(const uint8_t *buf, int buf_size, const char *filename)
+{
+   return 1; /* everything can be opened as raw image */
+}
+
+static int coroutine_fn raw_co_discard(BlockDriverState *bs,
+                                       int64_t sector_num, int nb_sectors)
+{
+    return bdrv_co_discard(bs->file, sector_num, nb_sectors);
+}
+
 static int raw_is_inserted(BlockDriverState *bs)
 {
    return bdrv_is_inserted(bs->file);
@@ -123,79 +115,73 @@ static void raw_lock_medium(BlockDriverState *bs, bool locked)

 static int raw_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
 {
-    return bdrv_ioctl(bs->file, req, buf);
+   return bdrv_ioctl(bs->file, req, buf);
 }

 static BlockDriverAIOCB *raw_aio_ioctl(BlockDriverState *bs,
-                                       unsigned long int req, void *buf,
-                                       BlockDriverCompletionFunc *cb,
-                                       void *opaque)
+        unsigned long int req, void *buf,
+        BlockDriverCompletionFunc *cb, void *opaque)
 {
-    return bdrv_aio_ioctl(bs->file, req, buf, cb, opaque);
+   return bdrv_aio_ioctl(bs->file, req, buf, cb, opaque);
 }

+static int raw_create(const char *filename, QEMUOptionParameter *options)
+{
+    return bdrv_create_file(filename, options);
+}
+
+static QEMUOptionParameter raw_create_options[] = {
+    {
+        .name = BLOCK_OPT_SIZE,
+        .type = OPT_SIZE,
+        .help = "Virtual disk size"
+    },
+    { NULL }
+};
+
 static int raw_has_zero_init(BlockDriverState *bs)
 {
    return bdrv_has_zero_init(bs->file);
 }

-static int raw_create(const char *filename, QEMUOptionParameter *options,
-                      Error **errp)
+static int raw_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
 {
-    Error *local_err = NULL;
-    int ret;
-
-    ret = bdrv_create_file(filename, options, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-    }
-    return ret;
-}
-
-static int raw_open(BlockDriverState *bs, QDict *options, int flags,
-                    Error **errp)
-{
-    bs->sg = bs->file->sg;
-    return 0;
-}
-
-static void raw_close(BlockDriverState *bs)
-{
-}
-
-static int raw_probe(const uint8_t *buf, int buf_size, const char *filename)
-{
-    /* smallest possible positive score so that raw is used if and only if no
-     * other block driver works
-     */
-    return 1;
+    return bdrv_get_info(bs->file, bdi);
 }

 static BlockDriver bdrv_raw = {
-    .format_name          = "raw",
-    .bdrv_probe           = &raw_probe,
-    .bdrv_reopen_prepare  = &raw_reopen_prepare,
-    .bdrv_open            = &raw_open,
-    .bdrv_close           = &raw_close,
-    .bdrv_create          = &raw_create,
-    .bdrv_co_readv        = &raw_co_readv,
-    .bdrv_co_writev       = &raw_co_writev,
-    .bdrv_co_write_zeroes = &raw_co_write_zeroes,
-    .bdrv_co_discard      = &raw_co_discard,
-    .bdrv_co_get_block_status = &raw_co_get_block_status,
-    .bdrv_truncate        = &raw_truncate,
-    .bdrv_getlength       = &raw_getlength,
-    .has_variable_length  = true,
-    .bdrv_get_info        = &raw_get_info,
-    .bdrv_refresh_limits  = &raw_refresh_limits,
-    .bdrv_is_inserted     = &raw_is_inserted,
-    .bdrv_media_changed   = &raw_media_changed,
-    .bdrv_eject           = &raw_eject,
-    .bdrv_lock_medium     = &raw_lock_medium,
-    .bdrv_ioctl           = &raw_ioctl,
-    .bdrv_aio_ioctl       = &raw_aio_ioctl,
-    .create_options       = &raw_create_options[0],
-    .bdrv_has_zero_init   = &raw_has_zero_init
+    .format_name        = "raw",
+
+    /* It's really 0, but we need to make g_malloc() happy */
+    .instance_size      = 1,
+
+    .bdrv_open          = raw_open,
+    .bdrv_close         = raw_close,
+
+    .bdrv_reopen_prepare  = raw_reopen_prepare,
+
+    .bdrv_co_readv          = raw_co_readv,
+    .bdrv_co_writev         = raw_co_writev,
+    .bdrv_co_is_allocated   = raw_co_is_allocated,
+    .bdrv_co_write_zeroes   = raw_co_write_zeroes,
+    .bdrv_co_discard        = raw_co_discard,
+
+    .bdrv_probe         = raw_probe,
+    .bdrv_getlength     = raw_getlength,
+    .bdrv_get_info      = raw_get_info,
+    .bdrv_truncate      = raw_truncate,
+
+    .bdrv_is_inserted   = raw_is_inserted,
+    .bdrv_media_changed = raw_media_changed,
+    .bdrv_eject         = raw_eject,
+    .bdrv_lock_medium   = raw_lock_medium,
+
+    .bdrv_ioctl         = raw_ioctl,
+    .bdrv_aio_ioctl     = raw_aio_ioctl,
+
+    .bdrv_create        = raw_create,
+    .create_options     = raw_create_options,
+    .bdrv_has_zero_init = raw_has_zero_init,
 };

 static void bdrv_raw_init(void)
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -95,13 +95,19 @@ typedef struct RADOSCB {
 #define RBD_FD_WRITE 1

 typedef struct BDRVRBDState {
+    int fds[2];
    rados_t cluster;
    rados_ioctx_t io_ctx;
    rbd_image_t image;
    char name[RBD_MAX_IMAGE_NAME_SIZE];
+    int qemu_aio_count;
    char *snap;
+    int event_reader_pos;
+    RADOSCB *event_rcb;
 } BDRVRBDState;

+static void rbd_aio_bh_cb(void *opaque);
+
 static int qemu_rbd_next_tok(char *dst, int dst_len,
                             char *src, char delim,
                             const char *name,
@@ -282,8 +288,7 @@ static int qemu_rbd_set_conf(rados_t cluster, const char *conf)
    return ret;
 }

-static int qemu_rbd_create(const char *filename, QEMUOptionParameter *options,
-                           Error **errp)
+static int qemu_rbd_create(const char *filename, QEMUOptionParameter *options)
 {
    int64_t bytes = 0;
    int64_t objsize;
@@ -364,8 +369,9 @@ static int qemu_rbd_create(const char *filename, QEMUOptionParameter *options,
 }

 /*
- * This aio completion is being called from rbd_finish_bh() and runs in qemu
- * BH context.
+ * This aio completion is being called from qemu_rbd_aio_event_reader()
+ * and runs in qemu context. It schedules a bh, but just in case the aio
+ * was not cancelled before.
 */
 static void qemu_rbd_complete_aio(RADOSCB *rcb)
 {
@@ -395,19 +401,44 @@ static void qemu_rbd_complete_aio(RADOSCB *rcb)
            acb->ret = r;
        }
    }
-
+    /* Note that acb->bh can be NULL in case where the aio was cancelled */
+    acb->bh = qemu_bh_new(rbd_aio_bh_cb, acb);
+    qemu_bh_schedule(acb->bh);
    g_free(rcb);
+}

-    if (acb->cmd == RBD_AIO_READ) {
-        qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
-    }
-    qemu_vfree(acb->bounce);
-    acb->common.cb(acb->common.opaque, (acb->ret > 0 ? 0 : acb->ret));
-    acb->status = 0;
+/*
+ * aio fd read handler. It runs in the qemu context and calls the
+ * completion handling of completed rados aio operations.
+ */
+static void qemu_rbd_aio_event_reader(void *opaque)
+{
+    BDRVRBDState *s = opaque;

-    if (!acb->cancelled) {
-        qemu_aio_release(acb);
-    }
+    ssize_t ret;
+
+    do {
+        char *p = (char *)&s->event_rcb;
+
+        /* now read the rcb pointer that was sent from a non qemu thread */
+        ret = read(s->fds[RBD_FD_READ], p + s->event_reader_pos,
+                   sizeof(s->event_rcb) - s->event_reader_pos);
+        if (ret > 0) {
+            s->event_reader_pos += ret;
+            if (s->event_reader_pos == sizeof(s->event_rcb)) {
+                s->event_reader_pos = 0;
+                qemu_rbd_complete_aio(s->event_rcb);
+                s->qemu_aio_count--;
+            }
+        }
+    } while (ret < 0 && errno == EINTR);
+}
+
+static int qemu_rbd_aio_flush_cb(void *opaque)
+{
+    BDRVRBDState *s = opaque;
+
+    return (s->qemu_aio_count > 0);
 }

 /* TODO Convert to fine grained options */
@@ -424,8 +455,7 @@ static QemuOptsList runtime_opts = {
    },
 };

-static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
-                         Error **errp)
+static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags)
 {
    BDRVRBDState *s = bs->opaque;
    char pool[RBD_MAX_POOL_NAME_SIZE];
@@ -438,9 +468,9 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
    const char *filename;
    int r;

-    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
+    opts = qemu_opts_create_nofail(&runtime_opts);
    qemu_opts_absorb_qdict(opts, options, &local_err);
-    if (local_err) {
+    if (error_is_set(&local_err)) {
        qerror_report_err(local_err);
        error_free(local_err);
        qemu_opts_del(opts);
@@ -515,9 +545,23 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,

    bs->read_only = (s->snap != NULL);

+    s->event_reader_pos = 0;
+    r = qemu_pipe(s->fds);
+    if (r < 0) {
+        error_report("error opening eventfd");
+        goto failed;
+    }
+    fcntl(s->fds[0], F_SETFL, O_NONBLOCK);
+    fcntl(s->fds[1], F_SETFL, O_NONBLOCK);
+    qemu_aio_set_fd_handler(s->fds[RBD_FD_READ], qemu_rbd_aio_event_reader,
+                            NULL, qemu_rbd_aio_flush_cb, s);
+
+
    qemu_opts_del(opts);
    return 0;

+failed:
+    rbd_close(s->image);
 failed_open:
    rados_ioctx_destroy(s->io_ctx);
 failed_shutdown:
@@ -532,6 +576,10 @@ static void qemu_rbd_close(BlockDriverState *bs)
 {
    BDRVRBDState *s = bs->opaque;

+    close(s->fds[0]);
+    close(s->fds[1]);
+    qemu_aio_set_fd_handler(s->fds[RBD_FD_READ], NULL, NULL, NULL, NULL);
+
    rbd_close(s->image);
    rados_ioctx_destroy(s->io_ctx);
    g_free(s->snap);
@@ -559,11 +607,34 @@ static const AIOCBInfo rbd_aiocb_info = {
    .cancel = qemu_rbd_aio_cancel,
 };

-static void rbd_finish_bh(void *opaque)
+static int qemu_rbd_send_pipe(BDRVRBDState *s, RADOSCB *rcb)
 {
-    RADOSCB *rcb = opaque;
-    qemu_bh_delete(rcb->acb->bh);
-    qemu_rbd_complete_aio(rcb);
+    int ret = 0;
+    while (1) {
+        fd_set wfd;
+        int fd = s->fds[RBD_FD_WRITE];
+
+        /* send the op pointer to the qemu thread that is responsible
+           for the aio/op completion. Must do it in a qemu thread context */
+        ret = write(fd, (void *)&rcb, sizeof(rcb));
+        if (ret >= 0) {
+            break;
+        }
+        if (errno == EINTR) {
+            continue;
+        }
+        if (errno != EAGAIN) {
+            break;
+        }
+
+        FD_ZERO(&wfd);
+        FD_SET(fd, &wfd);
+        do {
+            ret = select(fd + 1, NULL, &wfd, NULL, NULL);
+        } while (ret < 0 && errno == EINTR);
+    }
+
+    return ret;
 }

 /*
@@ -571,18 +642,40 @@ static void rbd_finish_bh(void *opaque)
 *
 * Note: this function is being called from a non qemu thread so
 * we need to be careful about what we do here. Generally we only
- * schedule a BH, and do the rest of the io completion handling
- * from rbd_finish_bh() which runs in a qemu context.
+ * write to the block notification pipe, and do the rest of the
+ * io completion handling from qemu_rbd_aio_event_reader() which
+ * runs in a qemu context.
 */
 static void rbd_finish_aiocb(rbd_completion_t c, RADOSCB *rcb)
 {
-    RBDAIOCB *acb = rcb->acb;
-
+    int ret;
    rcb->ret = rbd_aio_get_return_value(c);
    rbd_aio_release(c);
+    ret = qemu_rbd_send_pipe(rcb->s, rcb);
+    if (ret < 0) {
+        error_report("failed writing to acb->s->fds");
+        g_free(rcb);
+    }
+}

-    acb->bh = qemu_bh_new(rbd_finish_bh, rcb);
-    qemu_bh_schedule(acb->bh);
+/* Callback when all queued rbd_aio requests are complete */
+
+static void rbd_aio_bh_cb(void *opaque)
+{
+    RBDAIOCB *acb = opaque;
+
+    if (acb->cmd == RBD_AIO_READ) {
+        qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
+    }
+    qemu_vfree(acb->bounce);
+    acb->common.cb(acb->common.opaque, (acb->ret > 0 ? 0 : acb->ret));
+    qemu_bh_delete(acb->bh);
+    acb->bh = NULL;
+    acb->status = 0;
+
+    if (!acb->cancelled) {
+        qemu_aio_release(acb);
+    }
 }

 static int rbd_aio_discard_wrapper(rbd_image_t image,
@@ -648,6 +741,8 @@ static BlockDriverAIOCB *rbd_start_aio(BlockDriverState *bs,
    off = sector_num * BDRV_SECTOR_SIZE;
    size = nb_sectors * BDRV_SECTOR_SIZE;

+    s->qemu_aio_count++; /* All the RADOSCB */
+
    rcb = g_malloc(sizeof(RADOSCB));
    rcb->done = 0;
    rcb->acb = acb;
@@ -684,6 +779,7 @@ static BlockDriverAIOCB *rbd_start_aio(BlockDriverState *bs,

 failed:
    g_free(rcb);
+    s->qemu_aio_count--;
    qemu_aio_release(acb);
    return NULL;
 }
@@ -807,31 +903,12 @@ static int qemu_rbd_snap_create(BlockDriverState *bs,
 }

 static int qemu_rbd_snap_remove(BlockDriverState *bs,
-                                const char *snapshot_id,
-                                const char *snapshot_name,
-                                Error **errp)
+                                const char *snapshot_name)
 {
    BDRVRBDState *s = bs->opaque;
    int r;

-    if (!snapshot_name) {
-        error_setg(errp, "rbd need a valid snapshot name");
-        return -EINVAL;
-    }
-
-    /* If snapshot_id is specified, it must be equal to name, see
-       qemu_rbd_snap_list() */
-    if (snapshot_id && strcmp(snapshot_id, snapshot_name)) {
-        error_setg(errp,
-                   "rbd do not support snapshot id, it should be NULL or "
-                   "equal to snapshot name");
-        return -EINVAL;
-    }
-
    r = rbd_snap_remove(s->image, snapshot_name);
-    if (r < 0) {
-        error_setg_errno(errp, -r, "Failed to remove the snapshot");
-    }
    return r;
 }

@@ -917,7 +994,6 @@ static QEMUOptionParameter qemu_rbd_create_options[] = {
 static BlockDriver bdrv_rbd = {
    .format_name        = "rbd",
    .instance_size      = sizeof(BDRVRBDState),
-    .bdrv_needs_filename = true,
    .bdrv_file_open     = qemu_rbd_open,
    .bdrv_close         = qemu_rbd_close,
    .bdrv_create        = qemu_rbd_create,
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
--- a/block/snapshot.c
+++ b/block/snapshot.c
@@ -25,24 +25,6 @@
 #include "block/snapshot.h"
 #include "block/block_int.h"

-QemuOptsList internal_snapshot_opts = {
-    .name = "snapshot",
-    .head = QTAILQ_HEAD_INITIALIZER(internal_snapshot_opts.head),
-    .desc = {
-        {
-            .name = SNAPSHOT_OPT_ID,
-            .type = QEMU_OPT_STRING,
-            .help = "snapshot id"
-        },{
-            .name = SNAPSHOT_OPT_NAME,
-            .type = QEMU_OPT_STRING,
-            .help = "snapshot name"
-        },{
-            /* end of list */
-        }
-    },
-};
-
 int bdrv_snapshot_find(BlockDriverState *bs, QEMUSnapshotInfo *sn_info,
                       const char *name)
 {
@@ -66,79 +48,6 @@ int bdrv_snapshot_find(BlockDriverState *bs, QEMUSnapshotInfo *sn_info,
    return ret;
 }

-/**
- * Look up an internal snapshot by @id and @name.
- * @bs: block device to search
- * @id: unique snapshot ID, or NULL
- * @name: snapshot name, or NULL
- * @sn_info: location to store information on the snapshot found
- * @errp: location to store error, will be set only for exception
- *
- * This function will traverse snapshot list in @bs to search the matching
- * one, @id and @name are the matching condition:
- * If both @id and @name are specified, find the first one with id @id and
- * name @name.
- * If only @id is specified, find the first one with id @id.
- * If only @name is specified, find the first one with name @name.
- * if none is specified, abort().
- *
- * Returns: true when a snapshot is found and @sn_info will be filled, false
- * when error or not found. If all operation succeed but no matching one is
- * found, @errp will NOT be set.
- */
-bool bdrv_snapshot_find_by_id_and_name(BlockDriverState *bs,
-                                       const char *id,
-                                       const char *name,
-                                       QEMUSnapshotInfo *sn_info,
-                                       Error **errp)
-{
-    QEMUSnapshotInfo *sn_tab, *sn;
-    int nb_sns, i;
-    bool ret = false;
-
-    assert(id || name);
-
-    nb_sns = bdrv_snapshot_list(bs, &sn_tab);
-    if (nb_sns < 0) {
-        error_setg_errno(errp, -nb_sns, "Failed to get a snapshot list");
-        return false;
-    } else if (nb_sns == 0) {
-        return false;
-    }
-
-    if (id && name) {
-        for (i = 0; i < nb_sns; i++) {
-            sn = &sn_tab[i];
-            if (!strcmp(sn->id_str, id) && !strcmp(sn->name, name)) {
-                *sn_info = *sn;
-                ret = true;
-                break;
-            }
-        }
-    } else if (id) {
-        for (i = 0; i < nb_sns; i++) {
-            sn = &sn_tab[i];
-            if (!strcmp(sn->id_str, id)) {
-                *sn_info = *sn;
-                ret = true;
-                break;
-            }
-        }
-    } else if (name) {
-        for (i = 0; i < nb_sns; i++) {
-            sn = &sn_tab[i];
-            if (!strcmp(sn->name, name)) {
-                *sn_info = *sn;
-                ret = true;
-                break;
-            }
-        }
-    }
-
-    g_free(sn_tab);
-    return ret;
-}
-
 int bdrv_can_snapshot(BlockDriverState *bs)
 {
    BlockDriver *drv = bs->drv;
@@ -188,9 +97,9 @@ int bdrv_snapshot_goto(BlockDriverState *bs,
    if (bs->file) {
        drv->bdrv_close(bs);
        ret = bdrv_snapshot_goto(bs->file, snapshot_id);
-        open_ret = drv->bdrv_open(bs, NULL, bs->open_flags, NULL);
+        open_ret = drv->bdrv_open(bs, NULL, bs->open_flags);
        if (open_ret < 0) {
-            bdrv_unref(bs->file);
+            bdrv_delete(bs->file);
            bs->drv = NULL;
            return open_ret;
        }
@@ -200,73 +109,21 @@ int bdrv_snapshot_goto(BlockDriverState *bs,
    return -ENOTSUP;
 }

-/**
- * Delete an internal snapshot by @snapshot_id and @name.
- * @bs: block device used in the operation
- * @snapshot_id: unique snapshot ID, or NULL
- * @name: snapshot name, or NULL
- * @errp: location to store error
- *
- * If both @snapshot_id and @name are specified, delete the first one with
- * id @snapshot_id and name @name.
- * If only @snapshot_id is specified, delete the first one with id
- * @snapshot_id.
- * If only @name is specified, delete the first one with name @name.
- * if none is specified, return -EINVAL.
- *
- * Returns: 0 on success, -errno on failure. If @bs is not inserted, return
- * -ENOMEDIUM. If @snapshot_id and @name are both NULL, return -EINVAL. If @bs
- * does not support internal snapshot deletion, return -ENOTSUP. If @bs does
- * not support parameter @snapshot_id or @name, or one of them is not correctly
- * specified, return -EINVAL. If @bs can't find one matching @id and @name,
- * return -ENOENT. If @errp != NULL, it will always be filled with error
- * message on failure.
- */
-int bdrv_snapshot_delete(BlockDriverState *bs,
-                         const char *snapshot_id,
-                         const char *name,
-                         Error **errp)
+int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
 {
    BlockDriver *drv = bs->drv;
    if (!drv) {
-        error_set(errp, QERR_DEVICE_HAS_NO_MEDIUM, bdrv_get_device_name(bs));
        return -ENOMEDIUM;
    }
-    if (!snapshot_id && !name) {
-        error_setg(errp, "snapshot_id and name are both NULL");
-        return -EINVAL;
-    }
    if (drv->bdrv_snapshot_delete) {
-        return drv->bdrv_snapshot_delete(bs, snapshot_id, name, errp);
+        return drv->bdrv_snapshot_delete(bs, snapshot_id);
    }
    if (bs->file) {
-        return bdrv_snapshot_delete(bs->file, snapshot_id, name, errp);
+        return bdrv_snapshot_delete(bs->file, snapshot_id);
    }
-    error_set(errp, QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
-              drv->format_name, bdrv_get_device_name(bs),
-              "internal snapshot deletion");
    return -ENOTSUP;
 }

-void bdrv_snapshot_delete_by_id_or_name(BlockDriverState *bs,
-                                        const char *id_or_name,
-                                        Error **errp)
-{
-    int ret;
-    Error *local_err = NULL;
-
-    ret = bdrv_snapshot_delete(bs, id_or_name, NULL, &local_err);
-    if (ret == -ENOENT || ret == -EINVAL) {
-        error_free(local_err);
-        local_err = NULL;
-        ret = bdrv_snapshot_delete(bs, NULL, id_or_name, &local_err);
-    }
-
-    if (ret < 0) {
-        error_propagate(errp, local_err);
-    }
-}
-
 int bdrv_snapshot_list(BlockDriverState *bs,
                       QEMUSnapshotInfo **psn_info)
 {
@@ -283,71 +140,18 @@ int bdrv_snapshot_list(BlockDriverState *bs,
    return -ENOTSUP;
 }

-/**
- * Temporarily load an internal snapshot by @snapshot_id and @name.
- * @bs: block device used in the operation
- * @snapshot_id: unique snapshot ID, or NULL
- * @name: snapshot name, or NULL
- * @errp: location to store error
- *
- * If both @snapshot_id and @name are specified, load the first one with
- * id @snapshot_id and name @name.
- * If only @snapshot_id is specified, load the first one with id
- * @snapshot_id.
- * If only @name is specified, load the first one with name @name.
- * if none is specified, return -EINVAL.
- *
- * Returns: 0 on success, -errno on fail. If @bs is not inserted, return
- * -ENOMEDIUM. If @bs is not readonly, return -EINVAL. If @bs did not support
- * internal snapshot, return -ENOTSUP. If qemu can't find a matching @id and
- * @name, return -ENOENT. If @errp != NULL, it will always be filled on
- * failure.
- */
 int bdrv_snapshot_load_tmp(BlockDriverState *bs,
-                           const char *snapshot_id,
-                           const char *name,
-                           Error **errp)
+        const char *snapshot_name)
 {
    BlockDriver *drv = bs->drv;
-
    if (!drv) {
-        error_set(errp, QERR_DEVICE_HAS_NO_MEDIUM, bdrv_get_device_name(bs));
        return -ENOMEDIUM;
    }
-    if (!snapshot_id && !name) {
-        error_setg(errp, "snapshot_id and name are both NULL");
-        return -EINVAL;
-    }
    if (!bs->read_only) {
-        error_setg(errp, "Device is not readonly");
        return -EINVAL;
    }
    if (drv->bdrv_snapshot_load_tmp) {
-        return drv->bdrv_snapshot_load_tmp(bs, snapshot_id, name, errp);
+        return drv->bdrv_snapshot_load_tmp(bs, snapshot_name);
    }
-    error_set(errp, QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
-              drv->format_name, bdrv_get_device_name(bs),
-              "temporarily load internal snapshot");
    return -ENOTSUP;
 }
-
-int bdrv_snapshot_load_tmp_by_id_or_name(BlockDriverState *bs,
-                                         const char *id_or_name,
-                                         Error **errp)
-{
-    int ret;
-    Error *local_err = NULL;
-
-    ret = bdrv_snapshot_load_tmp(bs, id_or_name, NULL, &local_err);
-    if (ret == -ENOENT || ret == -EINVAL) {
-        error_free(local_err);
-        local_err = NULL;
-        ret = bdrv_snapshot_load_tmp(bs, NULL, id_or_name, &local_err);
-    }
-
-    if (local_err) {
-        error_propagate(errp, local_err);
-    }
-
-    return ret;
-}
--- a/block/ssh.c
+++ b/block/ssh.c
@@ -608,8 +608,7 @@ static int connect_to_ssh(BDRVSSHState *s, QDict *options,
    return ret;
 }

-static int ssh_file_open(BlockDriverState *bs, QDict *options, int bdrv_flags,
-                         Error **errp)
+static int ssh_file_open(BlockDriverState *bs, QDict *options, int bdrv_flags)
 {
    BDRVSSHState *s = bs->opaque;
    int ret;
@@ -651,8 +650,7 @@ static QEMUOptionParameter ssh_create_options[] = {
    { NULL }
 };

-static int ssh_create(const char *filename, QEMUOptionParameter *options,
-                      Error **errp)
+static int ssh_create(const char *filename, QEMUOptionParameter *options)
 {
    int r, ret;
    Error *local_err = NULL;
@@ -742,6 +740,14 @@ static void restart_coroutine(void *opaque)
    qemu_coroutine_enter(co, NULL);
 }

+/* Always true because when we have called set_fd_handler there is
+ * always a request being processed.
+ */
+static int return_true(void *opaque)
+{
+    return 1;
+}
+
 static coroutine_fn void set_fd_handler(BDRVSSHState *s)
 {
    int r;
@@ -760,13 +766,13 @@ static coroutine_fn void set_fd_handler(BDRVSSHState *s)
    DPRINTF("s->sock=%d rd_handler=%p wr_handler=%p", s->sock,
            rd_handler, wr_handler);

-    qemu_aio_set_fd_handler(s->sock, rd_handler, wr_handler, co);
+    qemu_aio_set_fd_handler(s->sock, rd_handler, wr_handler, return_true, co);
 }

 static coroutine_fn void clear_fd_handler(BDRVSSHState *s)
 {
    DPRINTF("s->sock=%d", s->sock);
-    qemu_aio_set_fd_handler(s->sock, NULL, NULL, NULL);
+    qemu_aio_set_fd_handler(s->sock, NULL, NULL, NULL, NULL);
 }

 /* A non-blocking call returned EAGAIN, so yield, ensuring the
--- a/block/stream.c
+++ b/block/stream.c
@@ -57,11 +57,6 @@ static void close_unused_images(BlockDriverState *top, BlockDriverState *base,
    BlockDriverState *intermediate;
    intermediate = top->backing_hd;

-    /* Must assign before bdrv_delete() to prevent traversing dangling pointer
-     * while we delete backing image instances.
-     */
-    top->backing_hd = base;
-
    while (intermediate) {
        BlockDriverState *unused;

@@ -73,10 +68,9 @@ static void close_unused_images(BlockDriverState *top, BlockDriverState *base,
        unused = intermediate;
        intermediate = intermediate->backing_hd;
        unused->backing_hd = NULL;
-        bdrv_unref(unused);
+        bdrv_delete(unused);
    }
-
-    bdrv_refresh_limits(top);
+    top->backing_hd = base;
 }

 static void coroutine_fn stream_run(void *opaque)
@@ -90,11 +84,6 @@ static void coroutine_fn stream_run(void *opaque)
    int n = 0;
    void *buf;

-    if (!bs->backing_hd) {
-        block_job_completed(&s->common, 0);
-        return;
-    }
-
    s->common.len = bdrv_getlength(bs);
    if (s->common.len < 0) {
        block_job_completed(&s->common, s->common.len);
@@ -121,22 +110,21 @@ wait:
        /* Note that even when no rate limit is applied we need to yield
         * with no pending I/O here so that bdrv_drain_all() returns.
         */
-        block_job_sleep_ns(&s->common, QEMU_CLOCK_REALTIME, delay_ns);
+        block_job_sleep_ns(&s->common, rt_clock, delay_ns);
        if (block_job_is_cancelled(&s->common)) {
            break;
        }

-        copy = false;
-
-        ret = bdrv_is_allocated(bs, sector_num,
-                                STREAM_BUFFER_SIZE / BDRV_SECTOR_SIZE, &n);
+        ret = bdrv_co_is_allocated(bs, sector_num,
+                                   STREAM_BUFFER_SIZE / BDRV_SECTOR_SIZE, &n);
        if (ret == 1) {
            /* Allocated in the top, no need to copy.  */
+            copy = false;
        } else if (ret >= 0) {
            /* Copy if allocated in the intermediate images.  Limit to the
             * known-unallocated area [sector_num, sector_num+n).  */
-            ret = bdrv_is_allocated_above(bs->backing_hd, base,
-                                          sector_num, n, &n);
+            ret = bdrv_co_is_allocated_above(bs->backing_hd, base,
+                                             sector_num, n, &n);

            /* Finish early if end of backing file has been reached */
            if (ret == 0 && n == 0) {
@@ -146,7 +134,7 @@ wait:
            copy = (ret == 1);
        }
        trace_stream_one_iteration(s, sector_num, n, ret);
-        if (copy) {
+        if (ret >= 0 && copy) {
            if (s->common.speed) {
                delay_ns = ratelimit_calculate_delay(&s->limit, n);
                if (delay_ns > 0) {
@@ -210,9 +198,9 @@ static void stream_set_speed(BlockJob *job, int64_t speed, Error **errp)
    ratelimit_set_speed(&s->limit, speed / BDRV_SECTOR_SIZE, SLICE_TIME);
 }

-static const BlockJobDriver stream_job_driver = {
+static const BlockJobType stream_job_type = {
    .instance_size = sizeof(StreamBlockJob),
-    .job_type      = BLOCK_JOB_TYPE_STREAM,
+    .job_type      = "stream",
    .set_speed     = stream_set_speed,
 };

@@ -231,7 +219,7 @@ void stream_start(BlockDriverState *bs, BlockDriverState *base,
        return;
    }

-    s = block_job_create(&stream_job_driver, bs, speed, cb, opaque, errp);
+    s = block_job_create(&stream_job_type, bs, speed, cb, opaque, errp);
    if (!s) {
        return;
    }
--- a/block/vdi.c
+++ b/block/vdi.c
@@ -31,7 +31,7 @@
 * Allocation of blocks could be optimized (less writes to block map and
 * header).
 *
- * Read and write of adjacent blocks could be done in one operation
+ * Read and write of adjacents blocks could be done in one operation
 * (current code uses one operation per block (1 MiB).
 *
 * The code is not thread safe (missing locks for changes in header and
@@ -120,11 +120,6 @@ typedef unsigned char uuid_t[16];

 #define VDI_IS_ALLOCATED(X) ((X) < VDI_DISCARDED)

-/* max blocks in image is (0xffffffff / 4) */
-#define VDI_BLOCKS_IN_IMAGE_MAX  0x3fffffff
-#define VDI_DISK_SIZE_MAX        ((uint64_t)VDI_BLOCKS_IN_IMAGE_MAX * \
-                                  (uint64_t)DEFAULT_CLUSTER_SIZE)
-
 #if !defined(CONFIG_UUID)
 static inline void uuid_generate(uuid_t out)
 {
@@ -170,7 +165,7 @@ typedef struct {
    uuid_t uuid_link;
    uuid_t uuid_parent;
    uint64_t unused2[7];
-} QEMU_PACKED VdiHeader;
+} VdiHeader;

 typedef struct {
    /* The block map entries are little endian (even in memory). */
@@ -336,7 +331,6 @@ static int vdi_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
    logout("\n");
    bdi->cluster_size = s->block_size;
    bdi->vm_state_offset = 0;
-    bdi->unallocated_blocks_are_zero = true;
    return 0;
 }

@@ -370,8 +364,7 @@ static int vdi_probe(const uint8_t *buf, int buf_size, const char *filename)
    return result;
 }

-static int vdi_open(BlockDriverState *bs, QDict *options, int flags,
-                    Error **errp)
+static int vdi_open(BlockDriverState *bs, QDict *options, int flags)
 {
    BDRVVdiState *s = bs->opaque;
    VdiHeader header;
@@ -390,14 +383,6 @@ static int vdi_open(BlockDriverState *bs, QDict *options, int flags,
    vdi_header_print(&header);
 #endif

-    if (header.disk_size > VDI_DISK_SIZE_MAX) {
-        error_setg(errp, "Unsupported VDI image size (size is 0x%" PRIx64
-                          ", max supported is 0x%" PRIx64 ")",
-                          header.disk_size, VDI_DISK_SIZE_MAX);
-        ret = -ENOTSUP;
-        goto fail;
-    }
-
    if (header.disk_size % SECTOR_SIZE != 0) {
        /* 'VBoxManage convertfromraw' can create images with odd disk sizes.
           We accept them but round the disk size to the next multiple of
@@ -408,56 +393,43 @@ static int vdi_open(BlockDriverState *bs, QDict *options, int flags,
    }

    if (header.signature != VDI_SIGNATURE) {
-        error_setg(errp, "Image not in VDI format (bad signature %08x)", header.signature);
-        ret = -EINVAL;
+        logout("bad vdi signature %08x\n", header.signature);
+        ret = -EMEDIUMTYPE;
        goto fail;
    } else if (header.version != VDI_VERSION_1_1) {
-        error_setg(errp, "unsupported VDI image (version %u.%u)",
-                   header.version >> 16, header.version & 0xffff);
+        logout("unsupported version %u.%u\n",
+               header.version >> 16, header.version & 0xffff);
        ret = -ENOTSUP;
        goto fail;
    } else if (header.offset_bmap % SECTOR_SIZE != 0) {
        /* We only support block maps which start on a sector boundary. */
-        error_setg(errp, "unsupported VDI image (unaligned block map offset "
-                   "0x%x)", header.offset_bmap);
+        logout("unsupported block map offset 0x%x B\n", header.offset_bmap);
        ret = -ENOTSUP;
        goto fail;
    } else if (header.offset_data % SECTOR_SIZE != 0) {
        /* We only support data blocks which start on a sector boundary. */
-        error_setg(errp, "unsupported VDI image (unaligned data offset 0x%x)",
-                   header.offset_data);
+        logout("unsupported data offset 0x%x B\n", header.offset_data);
        ret = -ENOTSUP;
        goto fail;
    } else if (header.sector_size != SECTOR_SIZE) {
-        error_setg(errp, "unsupported VDI image (sector size %u is not %u)",
-                   header.sector_size, SECTOR_SIZE);
+        logout("unsupported sector size %u B\n", header.sector_size);
        ret = -ENOTSUP;
        goto fail;
-    } else if (header.block_size != DEFAULT_CLUSTER_SIZE) {
-        error_setg(errp, "unsupported VDI image (block size %u is not %u)",
-                   header.block_size, DEFAULT_CLUSTER_SIZE);
+    } else if (header.block_size != 1 * MiB) {
+        logout("unsupported block size %u B\n", header.block_size);
        ret = -ENOTSUP;
        goto fail;
    } else if (header.disk_size >
               (uint64_t)header.blocks_in_image * header.block_size) {
-        error_setg(errp, "unsupported VDI image (disk size %" PRIu64 ", "
-                   "image bitmap has room for %" PRIu64 ")",
-                   header.disk_size,
-                   (uint64_t)header.blocks_in_image * header.block_size);
+        logout("unsupported disk size %" PRIu64 " B\n", header.disk_size);
        ret = -ENOTSUP;
        goto fail;
    } else if (!uuid_is_null(header.uuid_link)) {
-        error_setg(errp, "unsupported VDI image (non-NULL link UUID)");
+        logout("link uuid != 0, unsupported\n");
        ret = -ENOTSUP;
        goto fail;
    } else if (!uuid_is_null(header.uuid_parent)) {
-        error_setg(errp, "unsupported VDI image (non-NULL parent UUID)");
-        ret = -ENOTSUP;
-        goto fail;
-    } else if (header.blocks_in_image > VDI_BLOCKS_IN_IMAGE_MAX) {
-        error_setg(errp, "unsupported VDI image "
-                         "(too many blocks %u, max is %u)",
-                          header.blocks_in_image, VDI_BLOCKS_IN_IMAGE_MAX);
+        logout("parent uuid != 0, unsupported\n");
        ret = -ENOTSUP;
        goto fail;
    }
@@ -498,7 +470,7 @@ static int vdi_reopen_prepare(BDRVReopenState *state,
    return 0;
 }

-static int64_t coroutine_fn vdi_co_get_block_status(BlockDriverState *bs,
+static int coroutine_fn vdi_co_is_allocated(BlockDriverState *bs,
        int64_t sector_num, int nb_sectors, int *pnum)
 {
    /* TODO: Check for too large sector_num (in bdrv_is_allocated or here). */
@@ -507,23 +479,12 @@ static int64_t coroutine_fn vdi_co_get_block_status(BlockDriverState *bs,
    size_t sector_in_block = sector_num % s->block_sectors;
    int n_sectors = s->block_sectors - sector_in_block;
    uint32_t bmap_entry = le32_to_cpu(s->bmap[bmap_index]);
-    uint64_t offset;
-    int result;
-
    logout("%p, %" PRId64 ", %d, %p\n", bs, sector_num, nb_sectors, pnum);
    if (n_sectors > nb_sectors) {
        n_sectors = nb_sectors;
    }
    *pnum = n_sectors;
-    result = VDI_IS_ALLOCATED(bmap_entry);
-    if (!result) {
-        return 0;
-    }
-
-    offset = s->header.offset_data +
-                              (uint64_t)bmap_entry * s->block_size +
-                              sector_in_block * SECTOR_SIZE;
-    return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | offset;
+    return VDI_IS_ALLOCATED(bmap_entry);
 }

 static int vdi_co_read(BlockDriverState *bs,
@@ -672,8 +633,7 @@ static int vdi_co_write(BlockDriverState *bs,
    return ret;
 }

-static int vdi_create(const char *filename, QEMUOptionParameter *options,
-                      Error **errp)
+static int vdi_create(const char *filename, QEMUOptionParameter *options)
 {
    int fd;
    int result = 0;
@@ -708,20 +668,11 @@ static int vdi_create(const char *filename, QEMUOptionParameter *options,
        options++;
    }

-    if (bytes > VDI_DISK_SIZE_MAX) {
-        result = -ENOTSUP;
-        error_setg(errp, "Unsupported VDI image size (size is 0x%" PRIx64
-                          ", max supported is 0x%" PRIx64 ")",
-                          bytes, VDI_DISK_SIZE_MAX);
-        goto exit;
-    }
-
    fd = qemu_open(filename,
                   O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE,
                   0644);
    if (fd < 0) {
-        result = -errno;
-        goto exit;
+        return -errno;
    }

    /* We need enough blocks to store the given disk size,
@@ -782,7 +733,6 @@ static int vdi_create(const char *filename, QEMUOptionParameter *options,
        result = -errno;
    }

-exit:
    return result;
 }

@@ -830,7 +780,7 @@ static BlockDriver bdrv_vdi = {
    .bdrv_reopen_prepare = vdi_reopen_prepare,
    .bdrv_create = vdi_create,
    .bdrv_has_zero_init = bdrv_has_zero_init_1,
-    .bdrv_co_get_block_status = vdi_co_get_block_status,
+    .bdrv_co_is_allocated = vdi_co_is_allocated,
    .bdrv_make_empty = vdi_make_empty,

    .bdrv_read = vdi_co_read,
--- a/block/vhdx-endian.c
+++ b/block/vhdx-endian.c
@@ -1,216 +0,0 @@
-/*
- * Block driver for Hyper-V VHDX Images
- *
- * Copyright (c) 2013 Red Hat, Inc.,
- *
- * Authors:
- *  Jeff Cody <jcody@redhat.com>
- *
- *  This is based on the "VHDX Format Specification v1.00", published 8/25/2012
- *  by Microsoft:
- *      https://www.microsoft.com/en-us/download/details.aspx?id=34750
- *
- * This work is licensed under the terms of the GNU LGPL, version 2 or later.
- * See the COPYING.LIB file in the top-level directory.
- *
- */
-
-#include "qemu-common.h"
-#include "block/block_int.h"
-#include "block/vhdx.h"
-
-#include <uuid/uuid.h>
-
-
-/*
- * All the VHDX formats on disk are little endian - the following
- * are helper import/export functions to correctly convert
- * endianness from disk read to native cpu format, and back again.
- */
-
-
-/* VHDX File Header */
-
-
-void vhdx_header_le_import(VHDXHeader *h)
-{
-    assert(h != NULL);
-
-    le32_to_cpus(&h->signature);
-    le32_to_cpus(&h->checksum);
-    le64_to_cpus(&h->sequence_number);
-
-    leguid_to_cpus(&h->file_write_guid);
-    leguid_to_cpus(&h->data_write_guid);
-    leguid_to_cpus(&h->log_guid);
-
-    le16_to_cpus(&h->log_version);
-    le16_to_cpus(&h->version);
-    le32_to_cpus(&h->log_length);
-    le64_to_cpus(&h->log_offset);
-}
-
-void vhdx_header_le_export(VHDXHeader *orig_h, VHDXHeader *new_h)
-{
-    assert(orig_h != NULL);
-    assert(new_h != NULL);
-
-    new_h->signature       = cpu_to_le32(orig_h->signature);
-    new_h->checksum        = cpu_to_le32(orig_h->checksum);
-    new_h->sequence_number = cpu_to_le64(orig_h->sequence_number);
-
-    new_h->file_write_guid = orig_h->file_write_guid;
-    new_h->data_write_guid = orig_h->data_write_guid;
-    new_h->log_guid        = orig_h->log_guid;
-
-    cpu_to_leguids(&new_h->file_write_guid);
-    cpu_to_leguids(&new_h->data_write_guid);
-    cpu_to_leguids(&new_h->log_guid);
-
-    new_h->log_version     = cpu_to_le16(orig_h->log_version);
-    new_h->version         = cpu_to_le16(orig_h->version);
-    new_h->log_length      = cpu_to_le32(orig_h->log_length);
-    new_h->log_offset      = cpu_to_le64(orig_h->log_offset);
-}
-
-
-/* VHDX Log Headers */
-
-
-void vhdx_log_desc_le_import(VHDXLogDescriptor *d)
-{
-    assert(d != NULL);
-
-    le32_to_cpus(&d->signature);
-    le32_to_cpus(&d->trailing_bytes);
-    le64_to_cpus(&d->leading_bytes);
-    le64_to_cpus(&d->file_offset);
-    le64_to_cpus(&d->sequence_number);
-}
-
-void vhdx_log_desc_le_export(VHDXLogDescriptor *d)
-{
-    assert(d != NULL);
-
-    cpu_to_le32s(&d->signature);
-    cpu_to_le32s(&d->trailing_bytes);
-    cpu_to_le64s(&d->leading_bytes);
-    cpu_to_le64s(&d->file_offset);
-    cpu_to_le64s(&d->sequence_number);
-}
-
-void vhdx_log_data_le_export(VHDXLogDataSector *d)
-{
-    assert(d != NULL);
-
-    cpu_to_le32s(&d->data_signature);
-    cpu_to_le32s(&d->sequence_high);
-    cpu_to_le32s(&d->sequence_low);
-}
-
-void vhdx_log_entry_hdr_le_import(VHDXLogEntryHeader *hdr)
-{
-    assert(hdr != NULL);
-
-    le32_to_cpus(&hdr->signature);
-    le32_to_cpus(&hdr->checksum);
-    le32_to_cpus(&hdr->entry_length);
-    le32_to_cpus(&hdr->tail);
-    le64_to_cpus(&hdr->sequence_number);
-    le32_to_cpus(&hdr->descriptor_count);
-    leguid_to_cpus(&hdr->log_guid);
-    le64_to_cpus(&hdr->flushed_file_offset);
-    le64_to_cpus(&hdr->last_file_offset);
-}
-
-void vhdx_log_entry_hdr_le_export(VHDXLogEntryHeader *hdr)
-{
-    assert(hdr != NULL);
-
-    cpu_to_le32s(&hdr->signature);
-    cpu_to_le32s(&hdr->checksum);
-    cpu_to_le32s(&hdr->entry_length);
-    cpu_to_le32s(&hdr->tail);
-    cpu_to_le64s(&hdr->sequence_number);
-    cpu_to_le32s(&hdr->descriptor_count);
-    cpu_to_leguids(&hdr->log_guid);
-    cpu_to_le64s(&hdr->flushed_file_offset);
-    cpu_to_le64s(&hdr->last_file_offset);
-}
-
-
-/* Region table entries */
-void vhdx_region_header_le_import(VHDXRegionTableHeader *hdr)
-{
-    assert(hdr != NULL);
-
-    le32_to_cpus(&hdr->signature);
-    le32_to_cpus(&hdr->checksum);
-    le32_to_cpus(&hdr->entry_count);
-}
-
-void vhdx_region_header_le_export(VHDXRegionTableHeader *hdr)
-{
-    assert(hdr != NULL);
-
-    cpu_to_le32s(&hdr->signature);
-    cpu_to_le32s(&hdr->checksum);
-    cpu_to_le32s(&hdr->entry_count);
-}
-
-void vhdx_region_entry_le_import(VHDXRegionTableEntry *e)
-{
-    assert(e != NULL);
-
-    leguid_to_cpus(&e->guid);
-    le64_to_cpus(&e->file_offset);
-    le32_to_cpus(&e->length);
-    le32_to_cpus(&e->data_bits);
-}
-
-void vhdx_region_entry_le_export(VHDXRegionTableEntry *e)
-{
-    assert(e != NULL);
-
-    cpu_to_leguids(&e->guid);
-    cpu_to_le64s(&e->file_offset);
-    cpu_to_le32s(&e->length);
-    cpu_to_le32s(&e->data_bits);
-}
-
-
-/* Metadata headers & table */
-void vhdx_metadata_header_le_import(VHDXMetadataTableHeader *hdr)
-{
-    assert(hdr != NULL);
-
-    le64_to_cpus(&hdr->signature);
-    le16_to_cpus(&hdr->entry_count);
-}
-
-void vhdx_metadata_header_le_export(VHDXMetadataTableHeader *hdr)
-{
-    assert(hdr != NULL);
-
-    cpu_to_le64s(&hdr->signature);
-    cpu_to_le16s(&hdr->entry_count);
-}
-
-void vhdx_metadata_entry_le_import(VHDXMetadataTableEntry *e)
-{
-    assert(e != NULL);
-
-    leguid_to_cpus(&e->item_id);
-    le32_to_cpus(&e->offset);
-    le32_to_cpus(&e->length);
-    le32_to_cpus(&e->data_bits);
-}
-void vhdx_metadata_entry_le_export(VHDXMetadataTableEntry *e)
-{
-    assert(e != NULL);
-
-    cpu_to_leguids(&e->item_id);
-    cpu_to_le32s(&e->offset);
-    cpu_to_le32s(&e->length);
-    cpu_to_le32s(&e->data_bits);
-}
--- a/block/vhdx-log.c
+++ b/block/vhdx-log.c
--- a/block/vhdx.c
+++ b/block/vhdx.c
--- a/block/vhdx.h
+++ b/block/vhdx.h
@@ -6,9 +6,9 @@
 * Authors:
 *  Jeff Cody <jcody@redhat.com>
 *
- *  This is based on the "VHDX Format Specification v1.00", published 8/25/2012
+ *  This is based on the "VHDX Format Specification v0.95", published 4/12/2012
 *  by Microsoft:
- *      https://www.microsoft.com/en-us/download/details.aspx?id=34750
+ *      https://www.microsoft.com/en-us/download/details.aspx?id=29681
 *
 * This work is licensed under the terms of the GNU LGPL, version 2 or later.
 * See the COPYING.LIB file in the top-level directory.
@@ -18,11 +18,6 @@
 #ifndef BLOCK_VHDX_H
 #define BLOCK_VHDX_H

-#define KiB              (1 * 1024)
-#define MiB            (KiB * 1024)
-#define GiB            (MiB * 1024)
-#define TiB ((uint64_t) GiB * 1024)
-
 /* Structures and fields present in the VHDX file */

 /* The header section has the following blocks,
@@ -35,15 +30,14 @@
 * 0.........64KB...........128KB........192KB..........256KB................1MB
 */

-#define VHDX_HEADER_BLOCK_SIZE      (64 * 1024)
+#define VHDX_HEADER_BLOCK_SIZE      (64*1024)

 #define VHDX_FILE_ID_OFFSET         0
-#define VHDX_HEADER1_OFFSET         (VHDX_HEADER_BLOCK_SIZE * 1)
-#define VHDX_HEADER2_OFFSET         (VHDX_HEADER_BLOCK_SIZE * 2)
-#define VHDX_REGION_TABLE_OFFSET    (VHDX_HEADER_BLOCK_SIZE * 3)
-#define VHDX_REGION_TABLE2_OFFSET   (VHDX_HEADER_BLOCK_SIZE * 4)
+#define VHDX_HEADER1_OFFSET         (VHDX_HEADER_BLOCK_SIZE*1)
+#define VHDX_HEADER2_OFFSET         (VHDX_HEADER_BLOCK_SIZE*2)
+#define VHDX_REGION_TABLE_OFFSET    (VHDX_HEADER_BLOCK_SIZE*3)
+

-#define VHDX_HEADER_SECTION_END     (1 * MiB)
 /*
 * A note on the use of MS-GUID fields.  For more details on the GUID,
 * please see: https://en.wikipedia.org/wiki/Globally_unique_identifier.
@@ -61,11 +55,10 @@
 /* These structures are ones that are defined in the VHDX specification
 * document */

-#define VHDX_FILE_SIGNATURE 0x656C696678646876ULL  /* "vhdxfile" in ASCII */
 typedef struct VHDXFileIdentifier {
    uint64_t    signature;              /* "vhdxfile" in ASCII */
    uint16_t    creator[256];           /* optional; utf-16 string to identify
-                                           the vhdx file creator.  Diagnostic
+                                           the vhdx file creator.  Diagnotistic
                                           only */
 } VHDXFileIdentifier;

@@ -74,7 +67,7 @@ typedef struct VHDXFileIdentifier {
 * Microsoft is not just 16 bytes though - it is a structure that is defined,
 * so we need to follow it here so that endianness does not trip us up */

-typedef struct QEMU_PACKED MSGUID {
+typedef struct MSGUID {
    uint32_t  data1;
    uint16_t  data2;
    uint16_t  data3;
@@ -84,15 +77,14 @@ typedef struct QEMU_PACKED MSGUID {
 #define guid_eq(a, b) \
    (memcmp(&(a), &(b), sizeof(MSGUID)) == 0)

-#define VHDX_HEADER_SIZE (4 * 1024)   /* although the vhdx_header struct in disk
-                                         is only 582 bytes, for purposes of crc
-                                         the header is the first 4KB of the 64KB
-                                         block */
+#define VHDX_HEADER_SIZE (4*1024)   /* although the vhdx_header struct in disk
+                                       is only 582 bytes, for purposes of crc
+                                       the header is the first 4KB of the 64KB
+                                       block */

 /* The full header is 4KB, although the actual header data is much smaller.
 * But for the checksum calculation, it is over the entire 4KB structure,
 * not just the defined portion of it */
-#define VHDX_HEADER_SIGNATURE 0x64616568
 typedef struct QEMU_PACKED VHDXHeader {
    uint32_t    signature;              /* "head" in ASCII */
    uint32_t    checksum;               /* CRC-32C hash of the whole header */
@@ -100,7 +92,7 @@ typedef struct QEMU_PACKED VHDXHeader {
                                           VHDX file has 2 of these headers,
                                           and only the header with the highest
                                           sequence number is valid */
-    MSGUID      file_write_guid;        /* 128 bit unique identifier. Must be
+    MSGUID      file_write_guid;       /* 128 bit unique identifier. Must be
                                           updated to new, unique value before
                                           the first modification is made to
                                           file */
@@ -122,9 +114,9 @@ typedef struct QEMU_PACKED VHDXHeader {
                                           there is no valid log. If non-zero,
                                           log entries with this guid are
                                           valid. */
-    uint16_t    log_version;            /* version of the log format. Must be
-                                           set to zero */
-    uint16_t    version;                /* version of the vhdx file.  Currently,
+    uint16_t    log_version;            /* version of the log format. Mustn't be
+                                           zero, unless log_guid is also zero */
+    uint16_t    version;                /* version of th evhdx file.  Currently,
                                           only supported version is "1" */
    uint32_t    log_length;             /* length of the log.  Must be multiple
                                           of 1MB */
@@ -133,7 +125,6 @@ typedef struct QEMU_PACKED VHDXHeader {
 } VHDXHeader;

 /* Header for the region table block */
-#define VHDX_REGION_SIGNATURE  0x69676572  /* "regi" in ASCII */
 typedef struct QEMU_PACKED VHDXRegionTableHeader {
    uint32_t    signature;              /* "regi" in ASCII */
    uint32_t    checksum;               /* CRC-32C hash of the 64KB table */
@@ -160,10 +151,7 @@ typedef struct QEMU_PACKED VHDXRegionTableEntry {


 /* ---- LOG ENTRY STRUCTURES ---- */
-#define VHDX_LOG_MIN_SIZE (1024 * 1024)
-#define VHDX_LOG_SECTOR_SIZE 4096
 #define VHDX_LOG_HDR_SIZE 64
-#define VHDX_LOG_SIGNATURE 0x65676f6c
 typedef struct QEMU_PACKED VHDXLogEntryHeader {
    uint32_t    signature;              /* "loge" in ASCII */
    uint32_t    checksum;               /* CRC-32C hash of the 64KB table */
@@ -186,8 +174,7 @@ typedef struct QEMU_PACKED VHDXLogEntryHeader {
 } VHDXLogEntryHeader;

 #define VHDX_LOG_DESC_SIZE 32
-#define VHDX_LOG_DESC_SIGNATURE 0x63736564
-#define VHDX_LOG_ZERO_SIGNATURE 0x6f72657a
+
 typedef struct QEMU_PACKED VHDXLogDescriptor {
    uint32_t    signature;              /* "zero" or "desc" in ASCII */
    union  {
@@ -207,7 +194,6 @@ typedef struct QEMU_PACKED VHDXLogDescriptor {
                                           vhdx_log_entry_header */
 } VHDXLogDescriptor;

-#define VHDX_LOG_DATA_SIGNATURE 0x61746164
 typedef struct QEMU_PACKED VHDXLogDataSector {
    uint32_t    data_signature;         /* "data" in ASCII */
    uint32_t    sequence_high;          /* 4 MSB of 8 byte sequence_number */
@@ -226,19 +212,19 @@ typedef struct QEMU_PACKED VHDXLogDataSector {
 #define PAYLOAD_BLOCK_UNDEFINED         1
 #define PAYLOAD_BLOCK_ZERO              2
 #define PAYLOAD_BLOCK_UNMAPPED          5
-#define PAYLOAD_BLOCK_FULLY_PRESENT     6
+#define PAYLOAD_BLOCK_FULL_PRESENT      6
 #define PAYLOAD_BLOCK_PARTIALLY_PRESENT 7

 #define SB_BLOCK_NOT_PRESENT    0
 #define SB_BLOCK_PRESENT        6

 /* per the spec */
-#define VHDX_MAX_SECTORS_PER_BLOCK  (1 << 23)
+#define VHDX_MAX_SECTORS_PER_BLOCK  (1<<23)

 /* upper 44 bits are the file offset in 1MB units lower 3 bits are the state
   other bits are reserved */
 #define VHDX_BAT_STATE_BIT_MASK 0x07
-#define VHDX_BAT_FILE_OFF_MASK  0xFFFFFFFFFFF00000ULL /* upper 44 bits */
+#define VHDX_BAT_FILE_OFF_BITS (64-44)
 typedef uint64_t VHDXBatEntry;

 /* ---- METADATA REGION STRUCTURES ---- */
@@ -247,7 +233,6 @@ typedef uint64_t VHDXBatEntry;
 #define VHDX_METADATA_MAX_ENTRIES 2047  /* not including the header */
 #define VHDX_METADATA_TABLE_MAX_SIZE \
    (VHDX_METADATA_ENTRY_SIZE * (VHDX_METADATA_MAX_ENTRIES+1))
-#define VHDX_METADATA_SIGNATURE 0x617461646174656DULL  /* "metadata" in ASCII */
 typedef struct QEMU_PACKED VHDXMetadataTableHeader {
    uint64_t    signature;              /* "metadata" in ASCII */
    uint16_t    reserved;
@@ -267,8 +252,8 @@ typedef struct QEMU_PACKED VHDXMetadataTableEntry {
                                           metadata region */
                                        /* note: if length = 0, so is offset */
    uint32_t    length;                 /* length of metadata. <= 1MB. */
-    uint32_t    data_bits;              /* least-significant 3 bits are flags,
-                                           the rest are reserved (see above) */
+    uint32_t    data_bits;      /* least-significant 3 bits are flags, the
+                                   rest are reserved (see above) */
    uint32_t    reserved2;
 } VHDXMetadataTableEntry;

@@ -277,16 +262,13 @@ typedef struct QEMU_PACKED VHDXMetadataTableEntry {
                                                   If set indicates a fixed
                                                   size VHDX file */
 #define VHDX_PARAMS_HAS_PARENT           0x02    /* has parent / backing file */
-#define VHDX_BLOCK_SIZE_MIN             (1   * MiB)
-#define VHDX_BLOCK_SIZE_MAX             (256 * MiB)
 typedef struct QEMU_PACKED VHDXFileParameters {
    uint32_t    block_size;             /* size of each payload block, always
                                           power of 2, <= 256MB and >= 1MB. */
-    uint32_t data_bits;                 /* least-significant 2 bits are flags,
-                                           the rest are reserved (see above) */
+    uint32_t data_bits;     /* least-significant 2 bits are flags, the rest
+                               are reserved (see above) */
 } VHDXFileParameters;

-#define VHDX_MAX_IMAGE_SIZE  ((uint64_t) 64 * TiB)
 typedef struct QEMU_PACKED VHDXVirtualDiskSize {
    uint64_t    virtual_disk_size;      /* Size of the virtual disk, in bytes.
                                           Must be multiple of the sector size,
@@ -294,7 +276,7 @@ typedef struct QEMU_PACKED VHDXVirtualDiskSize {
 } VHDXVirtualDiskSize;

 typedef struct QEMU_PACKED VHDXPage83Data {
-    MSGUID      page_83_data;           /* unique id for scsi devices that
+    MSGUID      page_83_data[16];       /* unique id for scsi devices that
                                           support page 0x83 */
 } VHDXPage83Data;

@@ -309,7 +291,7 @@ typedef struct QEMU_PACKED VHDXVirtualDiskPhysicalSectorSize {
 } VHDXVirtualDiskPhysicalSectorSize;

 typedef struct QEMU_PACKED VHDXParentLocatorHeader {
-    MSGUID      locator_type;           /* type of the parent virtual disk. */
+    MSGUID      locator_type[16];       /* type of the parent virtual disk. */
    uint16_t    reserved;
    uint16_t    key_value_count;        /* number of key/value pairs for this
                                           locator */
@@ -326,125 +308,18 @@ typedef struct QEMU_PACKED VHDXParentLocatorEntry {

 /* ----- END VHDX SPECIFICATION STRUCTURES ---- */

-typedef struct VHDXMetadataEntries {
-    VHDXMetadataTableEntry file_parameters_entry;
-    VHDXMetadataTableEntry virtual_disk_size_entry;
-    VHDXMetadataTableEntry page83_data_entry;
-    VHDXMetadataTableEntry logical_sector_size_entry;
-    VHDXMetadataTableEntry phys_sector_size_entry;
-    VHDXMetadataTableEntry parent_locator_entry;
-    uint16_t present;
-} VHDXMetadataEntries;

-typedef struct VHDXLogEntries {
-    uint64_t offset;
-    uint64_t length;
-    uint32_t write;
-    uint32_t read;
-    VHDXLogEntryHeader *hdr;
-    void *desc_buffer;
-    uint64_t sequence;
-    uint32_t tail;
-} VHDXLogEntries;
-
-typedef struct VHDXRegionEntry {
-    uint64_t start;
-    uint64_t end;
-    QLIST_ENTRY(VHDXRegionEntry) entries;
-} VHDXRegionEntry;
-
-typedef struct BDRVVHDXState {
-    CoMutex lock;
-
-    int curr_header;
-    VHDXHeader *headers[2];
-
-    VHDXRegionTableHeader rt;
-    VHDXRegionTableEntry bat_rt;         /* region table for the BAT */
-    VHDXRegionTableEntry metadata_rt;    /* region table for the metadata */
-
-    VHDXMetadataTableHeader metadata_hdr;
-    VHDXMetadataEntries metadata_entries;
-
-    VHDXFileParameters params;
-    uint32_t block_size;
-    uint32_t block_size_bits;
-    uint32_t sectors_per_block;
-    uint32_t sectors_per_block_bits;
-
-    uint64_t virtual_disk_size;
-    uint32_t logical_sector_size;
-    uint32_t physical_sector_size;
-
-    uint64_t chunk_ratio;
-    uint32_t chunk_ratio_bits;
-    uint32_t logical_sector_size_bits;
-
-    uint32_t bat_entries;
-    VHDXBatEntry *bat;
-    uint64_t bat_offset;
-
-    bool first_visible_write;
-    MSGUID session_guid;
-
-    VHDXLogEntries log;
-
-    VHDXParentLocatorHeader parent_header;
-    VHDXParentLocatorEntry *parent_entries;
-
-    Error *migration_blocker;
-
-    bool log_replayed_on_open;
-
-    QLIST_HEAD(VHDXRegionHead, VHDXRegionEntry) regions;
-} BDRVVHDXState;
-
-void vhdx_guid_generate(MSGUID *guid);
-
-int vhdx_update_headers(BlockDriverState *bs, BDRVVHDXState *s, bool rw,
-                        MSGUID *log_guid);
-
-uint32_t vhdx_update_checksum(uint8_t *buf, size_t size, int crc_offset);
 uint32_t vhdx_checksum_calc(uint32_t crc, uint8_t *buf, size_t size,
                            int crc_offset);

 bool vhdx_checksum_is_valid(uint8_t *buf, size_t size, int crc_offset);

-int vhdx_parse_log(BlockDriverState *bs, BDRVVHDXState *s, bool *flushed,
-                   Error **errp);

-int vhdx_log_write_and_flush(BlockDriverState *bs, BDRVVHDXState *s,
-                             void *data, uint32_t length, uint64_t offset);
-
-static inline void leguid_to_cpus(MSGUID *guid)
+static void leguid_to_cpus(MSGUID *guid)
 {
    le32_to_cpus(&guid->data1);
    le16_to_cpus(&guid->data2);
    le16_to_cpus(&guid->data3);
 }

-static inline void cpu_to_leguids(MSGUID *guid)
-{
-    cpu_to_le32s(&guid->data1);
-    cpu_to_le16s(&guid->data2);
-    cpu_to_le16s(&guid->data3);
-}
-
-void vhdx_header_le_import(VHDXHeader *h);
-void vhdx_header_le_export(VHDXHeader *orig_h, VHDXHeader *new_h);
-void vhdx_log_desc_le_import(VHDXLogDescriptor *d);
-void vhdx_log_desc_le_export(VHDXLogDescriptor *d);
-void vhdx_log_data_le_export(VHDXLogDataSector *d);
-void vhdx_log_entry_hdr_le_import(VHDXLogEntryHeader *hdr);
-void vhdx_log_entry_hdr_le_export(VHDXLogEntryHeader *hdr);
-void vhdx_region_header_le_import(VHDXRegionTableHeader *hdr);
-void vhdx_region_header_le_export(VHDXRegionTableHeader *hdr);
-void vhdx_region_entry_le_import(VHDXRegionTableEntry *e);
-void vhdx_region_entry_le_export(VHDXRegionTableEntry *e);
-void vhdx_metadata_header_le_import(VHDXMetadataTableHeader *hdr);
-void vhdx_metadata_header_le_export(VHDXMetadataTableHeader *hdr);
-void vhdx_metadata_entry_le_import(VHDXMetadataTableEntry *e);
-void vhdx_metadata_entry_le_export(VHDXMetadataTableEntry *e);
-int vhdx_user_visible_write(BlockDriverState *bs, BDRVVHDXState *s);
-
 #endif
--- a/block/vmdk.c
+++ b/block/vmdk.c
--- a/block/vpc.c
+++ b/block/vpc.c
@@ -45,10 +45,8 @@ enum vhd_type {
 // Seconds since Jan 1, 2000 0:00:00 (UTC)
 #define VHD_TIMESTAMP_BASE 946684800

-#define VHD_MAX_SECTORS       (65535LL * 255 * 255)
-
 // always big-endian
-typedef struct vhd_footer {
+struct vhd_footer {
    char        creator[8]; // "conectix"
    uint32_t    features;
    uint32_t    version;
@@ -81,9 +79,9 @@ typedef struct vhd_footer {
    uint8_t     uuid[16];

    uint8_t     in_saved_state;
-} QEMU_PACKED VHDFooter;
+};

-typedef struct vhd_dyndisk_header {
+struct vhd_dyndisk_header {
    char        magic[8]; // "cxsparse"

    // Offset of next header structure, 0xFFFFFFFF if none
@@ -113,7 +111,7 @@ typedef struct vhd_dyndisk_header {
        uint32_t    reserved;
        uint64_t    data_offset;
    } parent_locator[8];
-} QEMU_PACKED VHDDynDiskHeader;
+};

 typedef struct BDRVVPCState {
    CoMutex lock;
@@ -157,16 +155,14 @@ static int vpc_probe(const uint8_t *buf, int buf_size, const char *filename)
    return 0;
 }

-static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
-                    Error **errp)
+static int vpc_open(BlockDriverState *bs, QDict *options, int flags)
 {
    BDRVVPCState *s = bs->opaque;
    int i;
-    VHDFooter *footer;
-    VHDDynDiskHeader *dyndisk_header;
+    struct vhd_footer* footer;
+    struct vhd_dyndisk_header* dyndisk_header;
    uint8_t buf[HEADER_SIZE];
    uint32_t checksum;
-    uint64_t computed_size;
    int disk_type = VHD_DYNAMIC;
    int ret;

@@ -175,7 +171,7 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
        goto fail;
    }

-    footer = (VHDFooter *) s->footer_buf;
+    footer = (struct vhd_footer*) s->footer_buf;
    if (strncmp(footer->creator, "conectix", 8)) {
        int64_t offset = bdrv_getlength(bs->file);
        if (offset < 0) {
@@ -193,8 +189,7 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
            goto fail;
        }
        if (strncmp(footer->creator, "conectix", 8)) {
-            error_setg(errp, "invalid VPC image");
-            ret = -EINVAL;
+            ret = -EMEDIUMTYPE;
            goto fail;
        }
        disk_type = VHD_FIXED;
@@ -215,17 +210,8 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
    bs->total_sectors = (int64_t)
        be16_to_cpu(footer->cyls) * footer->heads * footer->secs_per_cyl;

-    /* images created with disk2vhd report a far higher virtual size
-     * than expected with the cyls * heads * sectors_per_cyl formula.
-     * use the footer->size instead if the image was created with
-     * disk2vhd.
-     */
-    if (!strncmp(footer->creator_app, "d2v", 4)) {
-        bs->total_sectors = be64_to_cpu(footer->size) / BDRV_SECTOR_SIZE;
-    }
-
    /* Allow a maximum disk size of approximately 2 TB */
-    if (bs->total_sectors >= VHD_MAX_SECTORS) {
+    if (bs->total_sectors >= 65535LL * 255 * 255) {
        ret = -EFBIG;
        goto fail;
    }
@@ -237,7 +223,7 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
            goto fail;
        }

-        dyndisk_header = (VHDDynDiskHeader *) buf;
+        dyndisk_header = (struct vhd_dyndisk_header *) buf;

        if (strncmp(dyndisk_header->magic, "cxsparse", 8)) {
            ret = -EINVAL;
@@ -245,31 +231,10 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
        }

        s->block_size = be32_to_cpu(dyndisk_header->block_size);
-        if (!is_power_of_2(s->block_size) || s->block_size < BDRV_SECTOR_SIZE) {
-            error_setg(errp, "Invalid block size %" PRIu32, s->block_size);
-            ret = -EINVAL;
-            goto fail;
-        }
        s->bitmap_size = ((s->block_size / (8 * 512)) + 511) & ~511;

        s->max_table_entries = be32_to_cpu(dyndisk_header->max_table_entries);
-
-        if ((bs->total_sectors * 512) / s->block_size > 0xffffffffU) {
-            ret = -EINVAL;
-            goto fail;
-        }
-        if (s->max_table_entries > (VHD_MAX_SECTORS * 512) / s->block_size) {
-            ret = -EINVAL;
-            goto fail;
-        }
-
-        computed_size = (uint64_t) s->max_table_entries * s->block_size;
-        if (computed_size < bs->total_sectors * 512) {
-            ret = -EINVAL;
-            goto fail;
-        }
-
-        s->pagetable = qemu_blockalign(bs, s->max_table_entries * 4);
+        s->pagetable = g_malloc(s->max_table_entries * 4);

        s->bat_offset = be64_to_cpu(dyndisk_header->table_offset);

@@ -294,13 +259,6 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
            }
        }

-        if (s->free_data_block_offset > bdrv_getlength(bs->file)) {
-            error_setg(errp, "block-vpc: free_data_block_offset points after "
-                             "the end of file. The image has been truncated.");
-            ret = -EINVAL;
-            goto fail;
-        }
-
        s->last_bitmap_offset = (int64_t) -1;

 #ifdef CACHE
@@ -322,7 +280,7 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
    return 0;

 fail:
-    qemu_vfree(s->pagetable);
+    g_free(s->pagetable);
 #ifdef CACHE
    g_free(s->pageentry_u8);
 #endif
@@ -480,19 +438,6 @@ fail:
    return -1;
 }

-static int vpc_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
-{
-    BDRVVPCState *s = (BDRVVPCState *)bs->opaque;
-    VHDFooter *footer = (VHDFooter *) s->footer_buf;
-
-    if (cpu_to_be32(footer->type) != VHD_FIXED) {
-        bdi->cluster_size = s->block_size;
-    }
-
-    bdi->unallocated_blocks_are_zero = true;
-    return 0;
-}
-
 static int vpc_read(BlockDriverState *bs, int64_t sector_num,
                    uint8_t *buf, int nb_sectors)
 {
@@ -500,7 +445,7 @@ static int vpc_read(BlockDriverState *bs, int64_t sector_num,
    int ret;
    int64_t offset;
    int64_t sectors, sectors_per_block;
-    VHDFooter *footer = (VHDFooter *) s->footer_buf;
+    struct vhd_footer *footer = (struct vhd_footer *) s->footer_buf;

    if (cpu_to_be32(footer->type) == VHD_FIXED) {
        return bdrv_read(bs->file, sector_num, buf, nb_sectors);
@@ -549,7 +494,7 @@ static int vpc_write(BlockDriverState *bs, int64_t sector_num,
    int64_t offset;
    int64_t sectors, sectors_per_block;
    int ret;
-    VHDFooter *footer =  (VHDFooter *) s->footer_buf;
+    struct vhd_footer *footer =  (struct vhd_footer *) s->footer_buf;

    if (cpu_to_be32(footer->type) == VHD_FIXED) {
        return bdrv_write(bs->file, sector_num, buf, nb_sectors);
@@ -651,8 +596,8 @@ static int calculate_geometry(int64_t total_sectors, uint16_t* cyls,

 static int create_dynamic_disk(int fd, uint8_t *buf, int64_t total_sectors)
 {
-    VHDDynDiskHeader *dyndisk_header =
-        (VHDDynDiskHeader *) buf;
+    struct vhd_dyndisk_header* dyndisk_header =
+        (struct vhd_dyndisk_header*) buf;
    size_t block_size, num_bat_entries;
    int i;
    int ret = -EIO;
@@ -738,11 +683,10 @@ static int create_fixed_disk(int fd, uint8_t *buf, int64_t total_size)
    return ret;
 }

-static int vpc_create(const char *filename, QEMUOptionParameter *options,
-                      Error **errp)
+static int vpc_create(const char *filename, QEMUOptionParameter *options)
 {
    uint8_t buf[1024];
-    VHDFooter *footer = (VHDFooter *) buf;
+    struct vhd_footer *footer = (struct vhd_footer *) buf;
    QEMUOptionParameter *disk_type_param;
    int fd, i;
    uint16_t cyls = 0;
@@ -845,7 +789,7 @@ static int vpc_create(const char *filename, QEMUOptionParameter *options,
 static int vpc_has_zero_init(BlockDriverState *bs)
 {
    BDRVVPCState *s = bs->opaque;
-    VHDFooter *footer =  (VHDFooter *) s->footer_buf;
+    struct vhd_footer *footer =  (struct vhd_footer *) s->footer_buf;

    if (cpu_to_be32(footer->type) == VHD_FIXED) {
        return bdrv_has_zero_init(bs->file);
@@ -857,7 +801,7 @@ static int vpc_has_zero_init(BlockDriverState *bs)
 static void vpc_close(BlockDriverState *bs)
 {
    BDRVVPCState *s = bs->opaque;
-    qemu_vfree(s->pagetable);
+    g_free(s->pagetable);
 #ifdef CACHE
    g_free(s->pageentry_u8);
 #endif
@@ -895,8 +839,6 @@ static BlockDriver bdrv_vpc = {
    .bdrv_read              = vpc_co_read,
    .bdrv_write             = vpc_co_write,

-    .bdrv_get_info          = vpc_get_info,
-
    .create_options         = vpc_create_options,
    .bdrv_has_zero_init     = vpc_has_zero_init,
 };
--- a/block/vvfat.c
+++ b/block/vvfat.c
@@ -266,7 +266,8 @@ typedef struct mbr_t {
 } QEMU_PACKED mbr_t;

 typedef struct direntry_t {
-    uint8_t name[8 + 3];
+    uint8_t name[8];
+    uint8_t extension[3];
    uint8_t attributes;
    uint8_t reserved[2];
    uint16_t ctime;
@@ -517,9 +518,11 @@ static inline uint8_t fat_chksum(const direntry_t* entry)
    uint8_t chksum=0;
    int i;

-    for (i = 0; i < ARRAY_SIZE(entry->name); i++) {
-        chksum = (((chksum & 0xfe) >> 1) |
-                  ((chksum & 0x01) ? 0x80 : 0)) + entry->name[i];
+    for(i=0;i<11;i++) {
+        unsigned char c;
+
+        c = (i < 8) ? entry->name[i] : entry->extension[i-8];
+        chksum=(((chksum&0xfe)>>1)|((chksum&0x01)?0x80:0)) + c;
    }

    return chksum;
@@ -614,7 +617,7 @@ static inline direntry_t* create_short_and_long_name(BDRVVVFATState* s,

    if(is_dot) {
 	entry=array_get_next(&(s->directory));
-        memset(entry->name, 0x20, sizeof(entry->name));
+	memset(entry->name,0x20,11);
 	memcpy(entry->name,filename,strlen(filename));
 	return entry;
    }
@@ -629,14 +632,12 @@ static inline direntry_t* create_short_and_long_name(BDRVVVFATState* s,
 	i = 8;

    entry=array_get_next(&(s->directory));
-    memset(entry->name, 0x20, sizeof(entry->name));
+    memset(entry->name,0x20,11);
    memcpy(entry->name, filename, i);

-    if (j > 0) {
-        for (i = 0; i < 3 && filename[j + 1 + i]; i++) {
-            entry->name[8 + i] = filename[j + 1 + i];
-        }
-    }
+    if(j > 0)
+	for (i = 0; i < 3 && filename[j+1+i]; i++)
+	    entry->extension[i] = filename[j+1+i];

    /* upcase & remove unwanted characters */
    for(i=10;i>=0;i--) {
@@ -860,7 +861,8 @@ static int init_directories(BDRVVVFATState* s,
    {
 	direntry_t* entry=array_get_next(&(s->directory));
 	entry->attributes=0x28; /* archive | volume label */
-        memcpy(entry->name, "QEMU VVFAT ", sizeof(entry->name));
+	memcpy(entry->name,"QEMU VVF",8);
+	memcpy(entry->extension,"AT ",3);
    }

    /* Now build FAT, and write back information into directory */
@@ -1063,8 +1065,7 @@ static void vvfat_parse_filename(const char *filename, QDict *options,
    qdict_put(options, "rw", qbool_from_int(rw));
 }

-static int vvfat_open(BlockDriverState *bs, QDict *options, int flags,
-                      Error **errp)
+static int vvfat_open(BlockDriverState *bs, QDict *options, int flags)
 {
    BDRVVVFATState *s = bs->opaque;
    int cyls, heads, secs;
@@ -1083,17 +1084,19 @@ DLOG(if (stderr == NULL) {
    setbuf(stderr, NULL);
 })

-    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
+    opts = qemu_opts_create_nofail(&runtime_opts);
    qemu_opts_absorb_qdict(opts, options, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
+    if (error_is_set(&local_err)) {
+        qerror_report_err(local_err);
+        error_free(local_err);
        ret = -EINVAL;
        goto fail;
    }

    dirname = qemu_opt_get(opts, "dir");
    if (!dirname) {
-        error_setg(errp, "vvfat block driver requires a 'dir' option");
+        qerror_report(ERROR_CLASS_GENERIC_ERROR, "vvfat block driver requires "
+                      "a 'dir' option");
        ret = -EINVAL;
        goto fail;
    }
@@ -1119,7 +1122,6 @@ DLOG(if (stderr == NULL) {
        if (!s->fat_type) {
            s->fat_type = 16;
        }
-        s->first_sectors_number = 0x40;
        cyls = s->fat_type == 12 ? 64 : 1024;
        heads = 16;
        secs = 63;
@@ -1134,7 +1136,8 @@ DLOG(if (stderr == NULL) {
    case 12:
        break;
    default:
-        error_setg(errp, "Valid FAT types are only 12, 16 and 32");
+        qerror_report(ERROR_CLASS_GENERIC_ERROR, "Valid FAT types are only "
+                      "12, 16 and 32");
        ret = -EINVAL;
        goto fail;
    }
@@ -1147,6 +1150,7 @@ DLOG(if (stderr == NULL) {

    s->current_cluster=0xffffffff;

+    s->first_sectors_number=0x40;
    /* read only is the default for safety */
    bs->read_only = 1;
    s->qcow = s->write_target = NULL;
@@ -1586,20 +1590,17 @@ static int parse_short_name(BDRVVVFATState* s,
 	    lfn->name[i] = direntry->name[i];
    }

-    for (j = 2; j >= 0 && direntry->name[8 + j] == ' '; j--) {
-    }
+    for (j = 2; j >= 0 && direntry->extension[j] == ' '; j--);
    if (j >= 0) {
 	lfn->name[i++] = '.';
 	lfn->name[i + j + 1] = '\0';
 	for (;j >= 0; j--) {
-            uint8_t c = direntry->name[8 + j];
-            if (c <= ' ' || c > 0x7f) {
-                return -2;
-            } else if (s->downcase_short_names) {
-                lfn->name[i + j] = qemu_tolower(c);
-            } else {
-                lfn->name[i + j] = c;
-            }
+	    if (direntry->extension[j] <= ' ' || direntry->extension[j] > 0x7f)
+		return -2;
+	    else if (s->downcase_short_names)
+		lfn->name[i + j] = qemu_tolower(direntry->extension[j]);
+	    else
+		lfn->name[i + j] = direntry->extension[j];
 	}
    } else
 	lfn->name[i + j + 1] = '\0';
@@ -2873,17 +2874,16 @@ static coroutine_fn int vvfat_co_write(BlockDriverState *bs, int64_t sector_num,
    return ret;
 }

-static int64_t coroutine_fn vvfat_co_get_block_status(BlockDriverState *bs,
+static int coroutine_fn vvfat_co_is_allocated(BlockDriverState *bs,
 	int64_t sector_num, int nb_sectors, int* n)
 {
    BDRVVVFATState* s = bs->opaque;
    *n = s->sector_count - sector_num;
-    if (*n > nb_sectors) {
-        *n = nb_sectors;
-    } else if (*n < 0) {
-        return 0;
-    }
-    return BDRV_BLOCK_DATA;
+    if (*n > nb_sectors)
+	*n = nb_sectors;
+    else if (*n < 0)
+	return 0;
+    return 1;
 }

 static int write_target_commit(BlockDriverState *bs, int64_t sector_num,
@@ -2894,7 +2894,7 @@ static int write_target_commit(BlockDriverState *bs, int64_t sector_num,

 static void write_target_close(BlockDriverState *bs) {
    BDRVVVFATState* s = *((BDRVVVFATState**) bs->opaque);
-    bdrv_unref(s->qcow);
+    bdrv_delete(s->qcow);
    g_free(s->qcow_filename);
 }

@@ -2908,7 +2908,6 @@ static int enable_write_target(BDRVVVFATState *s)
 {
    BlockDriver *bdrv_qcow;
    QEMUOptionParameter *options;
-    Error *local_err = NULL;
    int ret;
    int size = sector2cluster(s, s->sector_count);
    s->used_clusters = calloc(size, 1);
@@ -2926,20 +2925,17 @@ static int enable_write_target(BDRVVVFATState *s)
    set_option_parameter_int(options, BLOCK_OPT_SIZE, s->sector_count * 512);
    set_option_parameter(options, BLOCK_OPT_BACKING_FILE, "fat:");

-    ret = bdrv_create(bdrv_qcow, s->qcow_filename, options, &local_err);
+    ret = bdrv_create(bdrv_qcow, s->qcow_filename, options);
    if (ret < 0) {
-        qerror_report_err(local_err);
-        error_free(local_err);
        goto err;
    }

-    s->qcow = NULL;
-    ret = bdrv_open(&s->qcow, s->qcow_filename, NULL, NULL,
-            BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_NO_FLUSH, bdrv_qcow,
-            &local_err);
+    s->qcow = bdrv_new("");
+
+    ret = bdrv_open(s->qcow, s->qcow_filename, NULL,
+            BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_NO_FLUSH, bdrv_qcow);
    if (ret < 0) {
-        qerror_report_err(local_err);
-        error_free(local_err);
+        bdrv_delete(s->qcow);
        goto err;
    }

@@ -2947,7 +2943,7 @@ static int enable_write_target(BDRVVVFATState *s)
    unlink(s->qcow_filename);
 #endif

-    s->bs->backing_hd = bdrv_new("");
+    s->bs->backing_hd = calloc(sizeof(BlockDriverState), 1);
    s->bs->backing_hd->drv = &vvfat_write_target;
    s->bs->backing_hd->opaque = g_malloc(sizeof(void*));
    *(void**)s->bs->backing_hd->opaque = s;
@@ -2988,7 +2984,7 @@ static BlockDriver bdrv_vvfat = {

    .bdrv_read              = vvfat_co_read,
    .bdrv_write             = vvfat_co_write,
-    .bdrv_co_get_block_status = vvfat_co_get_block_status,
+    .bdrv_co_is_allocated   = vvfat_co_is_allocated,
 };

 static void bdrv_vvfat_init(void)
--- a/block/win32-aio.c
+++ b/block/win32-aio.c
@@ -105,6 +105,13 @@ static void win32_aio_completion_cb(EventNotifier *e)
    }
 }

+static int win32_aio_flush_cb(EventNotifier *e)
+{
+    QEMUWin32AIOState *s = container_of(e, QEMUWin32AIOState, e);
+
+    return (s->count > 0) ? 1 : 0;
+}
+
 static void win32_aio_cancel(BlockDriverAIOCB *blockacb)
 {
    QEMUWin32AIOCB *waiocb = (QEMUWin32AIOCB *)blockacb;
@@ -194,7 +201,8 @@ QEMUWin32AIOState *win32_aio_init(void)
        goto out_close_efd;
    }

-    qemu_aio_set_event_notifier(&s->e, win32_aio_completion_cb);
+    qemu_aio_set_event_notifier(&s->e, win32_aio_completion_cb,
+                                win32_aio_flush_cb);

    return s;

--- a/blockdev-nbd.c
+++ b/blockdev-nbd.c
@@ -69,6 +69,12 @@ static void nbd_close_notifier(Notifier *n, void *data)
    g_free(cn);
 }

+static void nbd_server_put_ref(NBDExport *exp)
+{
+    BlockDriverState *bs = nbd_export_get_blockdev(exp);
+    drive_put_ref(drive_get_by_blockdev(bs));
+}
+
 void qmp_nbd_server_add(const char *device, bool has_writable, bool writable,
                        Error **errp)
 {
@@ -99,9 +105,11 @@ void qmp_nbd_server_add(const char *device, bool has_writable, bool writable,
        writable = false;
    }

-    exp = nbd_export_new(bs, 0, -1, writable ? 0 : NBD_FLAG_READ_ONLY, NULL);
+    exp = nbd_export_new(bs, 0, -1, writable ? 0 : NBD_FLAG_READ_ONLY,
+                         nbd_server_put_ref);

    nbd_export_set_name(exp, device);
+    drive_get_ref(drive_get_by_blockdev(bs));

    n = g_malloc0(sizeof(NBDCloseNotifier));
    n->n.notify = nbd_close_notifier;
--- a/blockdev.c
+++ b/blockdev.c
--- a/blockjob.c
+++ b/blockjob.c
@@ -35,7 +35,7 @@
 #include "qmp-commands.h"
 #include "qemu/timer.h"

-void *block_job_create(const BlockJobDriver *driver, BlockDriverState *bs,
+void *block_job_create(const BlockJobType *job_type, BlockDriverState *bs,
                       int64_t speed, BlockDriverCompletionFunc *cb,
                       void *opaque, Error **errp)
 {
@@ -45,11 +45,10 @@ void *block_job_create(const BlockJobDriver *driver, BlockDriverState *bs,
        error_set(errp, QERR_DEVICE_IN_USE, bdrv_get_device_name(bs));
        return NULL;
    }
-    bdrv_ref(bs);
    bdrv_set_in_use(bs, 1);

-    job = g_malloc0(driver->instance_size);
-    job->driver        = driver;
+    job = g_malloc0(job_type->instance_size);
+    job->job_type      = job_type;
    job->bs            = bs;
    job->cb            = cb;
    job->opaque        = opaque;
@@ -61,7 +60,7 @@ void *block_job_create(const BlockJobDriver *driver, BlockDriverState *bs,
        Error *local_err = NULL;

        block_job_set_speed(job, speed, &local_err);
-        if (local_err) {
+        if (error_is_set(&local_err)) {
            bs->job = NULL;
            g_free(job);
            bdrv_set_in_use(bs, 0);
@@ -87,12 +86,12 @@ void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp)
 {
    Error *local_err = NULL;

-    if (!job->driver->set_speed) {
+    if (!job->job_type->set_speed) {
        error_set(errp, QERR_NOT_SUPPORTED);
        return;
    }
-    job->driver->set_speed(job, speed, &local_err);
-    if (local_err) {
+    job->job_type->set_speed(job, speed, &local_err);
+    if (error_is_set(&local_err)) {
        error_propagate(errp, local_err);
        return;
    }
@@ -102,12 +101,12 @@ void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp)

 void block_job_complete(BlockJob *job, Error **errp)
 {
-    if (job->paused || job->cancelled || !job->driver->complete) {
+    if (job->paused || job->cancelled || !job->job_type->complete) {
        error_set(errp, QERR_BLOCK_JOB_NOT_READY, job->bs->device_name);
        return;
    }

-    job->driver->complete(job, errp);
+    job->job_type->complete(job, errp);
 }

 void block_job_pause(BlockJob *job)
@@ -143,8 +142,8 @@ bool block_job_is_cancelled(BlockJob *job)
 void block_job_iostatus_reset(BlockJob *job)
 {
    job->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
-    if (job->driver->iostatus_reset) {
-        job->driver->iostatus_reset(job);
+    if (job->job_type->iostatus_reset) {
+        job->job_type->iostatus_reset(job);
    }
 }

@@ -188,7 +187,7 @@ int block_job_cancel_sync(BlockJob *job)
    return (data.cancelled && data.ret == 0) ? -ECANCELED : data.ret;
 }

-void block_job_sleep_ns(BlockJob *job, QEMUClockType type, int64_t ns)
+void block_job_sleep_ns(BlockJob *job, QEMUClock *clock, int64_t ns)
 {
    assert(job->busy);

@@ -201,7 +200,7 @@ void block_job_sleep_ns(BlockJob *job, QEMUClockType type, int64_t ns)
    if (block_job_is_paused(job)) {
        qemu_coroutine_yield();
    } else {
-        co_sleep_ns(type, ns);
+        co_sleep_ns(clock, ns);
    }
    job->busy = true;
 }
@@ -209,7 +208,7 @@ void block_job_sleep_ns(BlockJob *job, QEMUClockType type, int64_t ns)
 BlockJobInfo *block_job_query(BlockJob *job)
 {
    BlockJobInfo *info = g_new0(BlockJobInfo, 1);
-    info->type      = g_strdup(BlockJobType_lookup[job->driver->job_type]);
+    info->type      = g_strdup(job->job_type->job_type);
    info->device    = g_strdup(bdrv_get_device_name(job->bs));
    info->len       = job->len;
    info->busy      = job->busy;
@@ -236,7 +235,7 @@ QObject *qobject_from_block_job(BlockJob *job)
                              "'len': %" PRId64 ","
                              "'offset': %" PRId64 ","
                              "'speed': %" PRId64 " }",
-                              BlockJobType_lookup[job->driver->job_type],
+                              job->job_type->job_type,
                              bdrv_get_device_name(job->bs),
                              job->len,
                              job->offset,
--- a/bsd-user/main.c
+++ b/bsd-user/main.c
@@ -1000,7 +1000,7 @@ int main(int argc, char **argv)
    memset(ts, 0, sizeof(TaskState));
    init_task_state(ts);
    ts->info = info;
-    cpu->opaque = ts;
+    env->opaque = ts;

 #if defined(TARGET_I386)
    cpu_x86_set_cpl(env, 3);
--- a/bsd-user/qemu.h
+++ b/bsd-user/qemu.h
@@ -323,9 +323,9 @@ abi_long copy_from_user(void *hptr, abi_ulong gaddr, size_t len);
 abi_long copy_to_user(abi_ulong gaddr, void *hptr, size_t len);

 /* Functions for accessing guest memory.  The tget and tput functions
-   read/write single values, byteswapping as necessary.  The lock_user function
+   read/write single values, byteswapping as necessary.  The lock_user
   gets a pointer to a contiguous area of guest memory, but does not perform
-   any byteswapping.  lock_user may return either a pointer to the guest
+   and byteswapping.  lock_user may return either a pointer to the guest
   memory, or a temporary buffer.  */

 /* Lock an area of guest memory into the host.  If copy is true then the
@@ -381,7 +381,7 @@ static inline void *lock_user_string(abi_ulong guest_addr)
    return lock_user(VERIFY_READ, guest_addr, (long)(len + 1), 1);
 }

-/* Helper macros for locking/unlocking a target struct.  */
+/* Helper macros for locking/ulocking a target struct.  */
 #define lock_user_struct(type, host_ptr, guest_addr, copy)      \
    (host_ptr = lock_user(type, guest_addr, sizeof(*host_ptr), copy))
 #define unlock_user_struct(host_ptr, guest_addr, copy)          \
--- a/1373
+++ b/1373
--- a/cpu-exec.c
+++ b/cpu-exec.c
@@ -23,22 +23,29 @@
 #include "qemu/atomic.h"
 #include "sysemu/qtest.h"

-void cpu_loop_exit(CPUState *cpu)
+bool qemu_cpu_has_work(CPUState *cpu)
 {
+    return cpu_has_work(cpu);
+}
+
+void cpu_loop_exit(CPUArchState *env)
+{
+    CPUState *cpu = ENV_GET_CPU(env);
+
    cpu->current_tb = NULL;
-    siglongjmp(cpu->jmp_env, 1);
+    siglongjmp(env->jmp_env, 1);
 }

 /* exit the current TB from a signal handler. The host registers are
   restored in a state compatible with the CPU emulator
 */
 #if defined(CONFIG_SOFTMMU)
-void cpu_resume_from_signal(CPUState *cpu, void *puc)
+void cpu_resume_from_signal(CPUArchState *env, void *puc)
 {
    /* XXX: restore cpu registers saved in host registers */

-    cpu->exception_index = -1;
-    siglongjmp(cpu->jmp_env, 1);
+    env->exception_index = -1;
+    siglongjmp(env->jmp_env, 1);
 }
 #endif

@@ -46,25 +53,7 @@ void cpu_resume_from_signal(CPUState *cpu, void *puc)
 static inline tcg_target_ulong cpu_tb_exec(CPUState *cpu, uint8_t *tb_ptr)
 {
    CPUArchState *env = cpu->env_ptr;
-    uintptr_t next_tb;
-
-#if defined(DEBUG_DISAS)
-    if (qemu_loglevel_mask(CPU_LOG_TB_CPU)) {
-#if defined(TARGET_I386)
-        log_cpu_state(cpu, CPU_DUMP_CCOP);
-#elif defined(TARGET_M68K)
-        /* ??? Should not modify env state for dumping.  */
-        cpu_m68k_flush_flags(env, env->cc_op);
-        env->cc_op = CC_OP_FLAGS;
-        env->sr = (env->sr & 0xffe0) | env->cc_dest | (env->cc_x << 4);
-        log_cpu_state(cpu, 0);
-#else
-        log_cpu_state(cpu, 0);
-#endif
-    }
-#endif /* DEBUG_DISAS */
-
-    next_tb = tcg_qemu_tb_exec(env, tb_ptr);
+    tcg_target_ulong next_tb = tcg_qemu_tb_exec(env, tb_ptr);
    if ((next_tb & TB_EXIT_MASK) > TB_EXIT_IDX1) {
        /* We didn't start executing this TB (eg because the instruction
         * counter hit zero); we must restore the guest PC to the address
@@ -101,7 +90,7 @@ static void cpu_exec_nocache(CPUArchState *env, int max_cycles,
    if (max_cycles > CF_COUNT_MASK)
        max_cycles = CF_COUNT_MASK;

-    tb = tb_gen_code(cpu, orig_tb->pc, orig_tb->cs_base, orig_tb->flags,
+    tb = tb_gen_code(env, orig_tb->pc, orig_tb->cs_base, orig_tb->flags,
                     max_cycles);
    cpu->current_tb = tb;
    /* execute the generated code */
@@ -116,7 +105,6 @@ static TranslationBlock *tb_find_slow(CPUArchState *env,
                                      target_ulong cs_base,
                                      uint64_t flags)
 {
-    CPUState *cpu = ENV_GET_CPU(env);
    TranslationBlock *tb, **ptb1;
    unsigned int h;
    tb_page_addr_t phys_pc, phys_page1;
@@ -154,7 +142,7 @@ static TranslationBlock *tb_find_slow(CPUArchState *env,
    }
 not_found:
   /* if no translated code available, then translate it now */
-    tb = tb_gen_code(cpu, pc, cs_base, flags, 0);
+    tb = tb_gen_code(env, pc, cs_base, flags, 0);

 found:
    /* Move the last found TB to the head of the list */
@@ -164,13 +152,12 @@ static TranslationBlock *tb_find_slow(CPUArchState *env,
        tcg_ctx.tb_ctx.tb_phys_hash[h] = tb;
    }
    /* we add the TB in the virtual pc hash table */
-    cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)] = tb;
+    env->tb_jmp_cache[tb_jmp_cache_hash_func(pc)] = tb;
    return tb;
 }

 static inline TranslationBlock *tb_find_fast(CPUArchState *env)
 {
-    CPUState *cpu = ENV_GET_CPU(env);
    TranslationBlock *tb;
    target_ulong cs_base, pc;
    int flags;
@@ -179,7 +166,7 @@ static inline TranslationBlock *tb_find_fast(CPUArchState *env)
       always be the same before a given translated block
       is executed. */
    cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
-    tb = cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)];
+    tb = env->tb_jmp_cache[tb_jmp_cache_hash_func(pc)];
    if (unlikely(!tb || tb->pc != pc || tb->cs_base != cs_base ||
                 tb->flags != flags)) {
        tb = tb_find_slow(env, pc, cs_base, flags);
@@ -196,11 +183,10 @@ void cpu_set_debug_excp_handler(CPUDebugExcpHandler *handler)

 static void cpu_handle_debug_exception(CPUArchState *env)
 {
-    CPUState *cpu = ENV_GET_CPU(env);
    CPUWatchpoint *wp;

-    if (!cpu->watchpoint_hit) {
-        QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
+    if (!env->watchpoint_hit) {
+        QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
            wp->flags &= ~BP_WATCHPOINT_HIT;
        }
    }
@@ -219,16 +205,11 @@ int cpu_exec(CPUArchState *env)
 #if !(defined(CONFIG_USER_ONLY) && \
      (defined(TARGET_M68K) || defined(TARGET_PPC) || defined(TARGET_S390X)))
    CPUClass *cc = CPU_GET_CLASS(cpu);
-#endif
-#ifdef TARGET_I386
-    X86CPU *x86_cpu = X86_CPU(cpu);
 #endif
    int ret, interrupt_request;
    TranslationBlock *tb;
    uint8_t *tc_ptr;
-    uintptr_t next_tb;
-    /* This must be volatile so it is not trashed by longjmp() */
-    volatile bool have_tb_lock = false;
+    tcg_target_ulong next_tb;

    if (cpu->halted) {
        if (!cpu_has_work(cpu)) {
@@ -281,16 +262,16 @@ int cpu_exec(CPUArchState *env)
 #else
 #error unsupported target CPU
 #endif
-    cpu->exception_index = -1;
+    env->exception_index = -1;

    /* prepare setjmp context for exception handling */
    for(;;) {
-        if (sigsetjmp(cpu->jmp_env, 0) == 0) {
+        if (sigsetjmp(env->jmp_env, 0) == 0) {
            /* if an exception is pending, we execute it here */
-            if (cpu->exception_index >= 0) {
-                if (cpu->exception_index >= EXCP_INTERRUPT) {
+            if (env->exception_index >= 0) {
+                if (env->exception_index >= EXCP_INTERRUPT) {
                    /* exit request from the cpu execution loop */
-                    ret = cpu->exception_index;
+                    ret = env->exception_index;
                    if (ret == EXCP_DEBUG) {
                        cpu_handle_debug_exception(env);
                    }
@@ -303,11 +284,11 @@ int cpu_exec(CPUArchState *env)
 #if defined(TARGET_I386)
                    cc->do_interrupt(cpu);
 #endif
-                    ret = cpu->exception_index;
+                    ret = env->exception_index;
                    break;
 #else
                    cc->do_interrupt(cpu);
-                    cpu->exception_index = -1;
+                    env->exception_index = -1;
 #endif
                }
            }
@@ -322,8 +303,8 @@ int cpu_exec(CPUArchState *env)
                    }
                    if (interrupt_request & CPU_INTERRUPT_DEBUG) {
                        cpu->interrupt_request &= ~CPU_INTERRUPT_DEBUG;
-                        cpu->exception_index = EXCP_DEBUG;
-                        cpu_loop_exit(cpu);
+                        env->exception_index = EXCP_DEBUG;
+                        cpu_loop_exit(env);
                    }
 #if defined(TARGET_ARM) || defined(TARGET_SPARC) || defined(TARGET_MIPS) || \
    defined(TARGET_PPC) || defined(TARGET_ALPHA) || defined(TARGET_CRIS) || \
@@ -331,32 +312,32 @@ int cpu_exec(CPUArchState *env)
                    if (interrupt_request & CPU_INTERRUPT_HALT) {
                        cpu->interrupt_request &= ~CPU_INTERRUPT_HALT;
                        cpu->halted = 1;
-                        cpu->exception_index = EXCP_HLT;
-                        cpu_loop_exit(cpu);
+                        env->exception_index = EXCP_HLT;
+                        cpu_loop_exit(env);
                    }
 #endif
 #if defined(TARGET_I386)
 #if !defined(CONFIG_USER_ONLY)
                    if (interrupt_request & CPU_INTERRUPT_POLL) {
                        cpu->interrupt_request &= ~CPU_INTERRUPT_POLL;
-                        apic_poll_irq(x86_cpu->apic_state);
+                        apic_poll_irq(env->apic_state);
                    }
 #endif
                    if (interrupt_request & CPU_INTERRUPT_INIT) {
                            cpu_svm_check_intercept_param(env, SVM_EXIT_INIT,
                                                          0);
-                            do_cpu_init(x86_cpu);
-                            cpu->exception_index = EXCP_HALTED;
-                            cpu_loop_exit(cpu);
+                            do_cpu_init(x86_env_get_cpu(env));
+                            env->exception_index = EXCP_HALTED;
+                            cpu_loop_exit(env);
                    } else if (interrupt_request & CPU_INTERRUPT_SIPI) {
-                            do_cpu_sipi(x86_cpu);
+                            do_cpu_sipi(x86_env_get_cpu(env));
                    } else if (env->hflags2 & HF2_GIF_MASK) {
                        if ((interrupt_request & CPU_INTERRUPT_SMI) &&
                            !(env->hflags & HF_SMM_MASK)) {
                            cpu_svm_check_intercept_param(env, SVM_EXIT_SMI,
                                                          0);
                            cpu->interrupt_request &= ~CPU_INTERRUPT_SMI;
-                            do_smm_enter(x86_cpu);
+                            do_smm_enter(x86_env_get_cpu(env));
                            next_tb = 0;
                        } else if ((interrupt_request & CPU_INTERRUPT_NMI) &&
                                   !(env->hflags2 & HF2_NMI_MASK)) {
@@ -393,10 +374,7 @@ int cpu_exec(CPUArchState *env)
                            /* FIXME: this should respect TPR */
                            cpu_svm_check_intercept_param(env, SVM_EXIT_VINTR,
                                                          0);
-                            intno = ldl_phys(cpu->as,
-                                             env->vm_vmcb
-                                             + offsetof(struct vmcb,
-                                                        control.int_vector));
+                            intno = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_vector));
                            qemu_log_mask(CPU_LOG_TB_IN_ASM, "Servicing virtual hardware INT=0x%02x\n", intno);
                            do_interrupt_x86_hardirq(env, intno, 1);
                            cpu->interrupt_request &= ~CPU_INTERRUPT_VIRQ;
@@ -418,7 +396,7 @@ int cpu_exec(CPUArchState *env)
 #elif defined(TARGET_LM32)
                    if ((interrupt_request & CPU_INTERRUPT_HARD)
                        && (env->ie & IE_IE)) {
-                        cpu->exception_index = EXCP_IRQ;
+                        env->exception_index = EXCP_IRQ;
                        cc->do_interrupt(cpu);
                        next_tb = 0;
                    }
@@ -427,7 +405,7 @@ int cpu_exec(CPUArchState *env)
                        && (env->sregs[SR_MSR] & MSR_IE)
                        && !(env->sregs[SR_MSR] & (MSR_EIP | MSR_BIP))
                        && !(env->iflags & (D_FLAG | IMM_FLAG))) {
-                        cpu->exception_index = EXCP_IRQ;
+                        env->exception_index = EXCP_IRQ;
                        cc->do_interrupt(cpu);
                        next_tb = 0;
                    }
@@ -435,7 +413,7 @@ int cpu_exec(CPUArchState *env)
                    if ((interrupt_request & CPU_INTERRUPT_HARD) &&
                        cpu_mips_hw_interrupts_pending(env)) {
                        /* Raise it */
-                        cpu->exception_index = EXCP_EXT_INTERRUPT;
+                        env->exception_index = EXCP_EXT_INTERRUPT;
                        env->error_code = 0;
                        cc->do_interrupt(cpu);
                        next_tb = 0;
@@ -452,7 +430,7 @@ int cpu_exec(CPUArchState *env)
                            idx = EXCP_TICK;
                        }
                        if (idx >= 0) {
-                            cpu->exception_index = idx;
+                            env->exception_index = idx;
                            cc->do_interrupt(cpu);
                            next_tb = 0;
                        }
@@ -467,7 +445,7 @@ int cpu_exec(CPUArchState *env)
                            if (((type == TT_EXTINT) &&
                                  cpu_pil_allowed(env, pil)) ||
                                  type != TT_EXTINT) {
-                                cpu->exception_index = env->interrupt_index;
+                                env->exception_index = env->interrupt_index;
                                cc->do_interrupt(cpu);
                                next_tb = 0;
                            }
@@ -475,8 +453,8 @@ int cpu_exec(CPUArchState *env)
                    }
 #elif defined(TARGET_ARM)
                    if (interrupt_request & CPU_INTERRUPT_FIQ
-                        && !(env->daif & PSTATE_F)) {
-                        cpu->exception_index = EXCP_FIQ;
+                        && !(env->uncached_cpsr & CPSR_F)) {
+                        env->exception_index = EXCP_FIQ;
                        cc->do_interrupt(cpu);
                        next_tb = 0;
                    }
@@ -491,15 +469,15 @@ int cpu_exec(CPUArchState *env)
                       pc contains a magic address.  */
                    if (interrupt_request & CPU_INTERRUPT_HARD
                        && ((IS_M(env) && env->regs[15] < 0xfffffff0)
-                            || !(env->daif & PSTATE_I))) {
-                        cpu->exception_index = EXCP_IRQ;
+                            || !(env->uncached_cpsr & CPSR_I))) {
+                        env->exception_index = EXCP_IRQ;
                        cc->do_interrupt(cpu);
                        next_tb = 0;
                    }
 #elif defined(TARGET_UNICORE32)
                    if (interrupt_request & CPU_INTERRUPT_HARD
                        && !(env->uncached_asr & ASR_I)) {
-                        cpu->exception_index = UC32_EXCP_INTR;
+                        env->exception_index = UC32_EXCP_INTR;
                        cc->do_interrupt(cpu);
                        next_tb = 0;
                    }
@@ -534,7 +512,7 @@ int cpu_exec(CPUArchState *env)
                            }
                        }
                        if (idx >= 0) {
-                            cpu->exception_index = idx;
+                            env->exception_index = idx;
                            env->error_code = 0;
                            cc->do_interrupt(cpu);
                            next_tb = 0;
@@ -544,7 +522,7 @@ int cpu_exec(CPUArchState *env)
                    if (interrupt_request & CPU_INTERRUPT_HARD
                        && (env->pregs[PR_CCS] & I_FLAG)
                        && !env->locked_irq) {
-                        cpu->exception_index = EXCP_IRQ;
+                        env->exception_index = EXCP_IRQ;
                        cc->do_interrupt(cpu);
                        next_tb = 0;
                    }
@@ -556,7 +534,7 @@ int cpu_exec(CPUArchState *env)
                            m_flag_archval = M_FLAG_V32;
                        }
                        if ((env->pregs[PR_CCS] & m_flag_archval)) {
-                            cpu->exception_index = EXCP_NMI;
+                            env->exception_index = EXCP_NMI;
                            cc->do_interrupt(cpu);
                            next_tb = 0;
                        }
@@ -570,7 +548,7 @@ int cpu_exec(CPUArchState *env)
                           hardware doesn't rely on this, so we
                           provide/save the vector when the interrupt is
                           first signalled.  */
-                        cpu->exception_index = env->pending_vector;
+                        env->exception_index = env->pending_vector;
                        do_interrupt_m68k_hardirq(env);
                        next_tb = 0;
                    }
@@ -582,7 +560,7 @@ int cpu_exec(CPUArchState *env)
                    }
 #elif defined(TARGET_XTENSA)
                    if (interrupt_request & CPU_INTERRUPT_HARD) {
-                        cpu->exception_index = EXC_IRQ;
+                        env->exception_index = EXC_IRQ;
                        cc->do_interrupt(cpu);
                        next_tb = 0;
                    }
@@ -598,11 +576,26 @@ int cpu_exec(CPUArchState *env)
                }
                if (unlikely(cpu->exit_request)) {
                    cpu->exit_request = 0;
-                    cpu->exception_index = EXCP_INTERRUPT;
-                    cpu_loop_exit(cpu);
+                    env->exception_index = EXCP_INTERRUPT;
+                    cpu_loop_exit(env);
                }
+#if defined(DEBUG_DISAS)
+                if (qemu_loglevel_mask(CPU_LOG_TB_CPU)) {
+                    /* restore flags in standard format */
+#if defined(TARGET_I386)
+                    log_cpu_state(cpu, CPU_DUMP_CCOP);
+#elif defined(TARGET_M68K)
+                    cpu_m68k_flush_flags(env, env->cc_op);
+                    env->cc_op = CC_OP_FLAGS;
+                    env->sr = (env->sr & 0xffe0)
+                              | env->cc_dest | (env->cc_x << 4);
+                    log_cpu_state(cpu, 0);
+#else
+                    log_cpu_state(cpu, 0);
+#endif
+                }
+#endif /* DEBUG_DISAS */
                spin_lock(&tcg_ctx.tb_ctx.tb_lock);
-                have_tb_lock = true;
                tb = tb_find_fast(env);
                /* Note: we do it here to avoid a gcc bug on Mac OS X when
                   doing it in tb_find_slow */
@@ -624,7 +617,6 @@ int cpu_exec(CPUArchState *env)
                    tb_add_jump((TranslationBlock *)(next_tb & ~TB_EXIT_MASK),
                                next_tb & TB_EXIT_MASK, tb);
                }
-                have_tb_lock = false;
                spin_unlock(&tcg_ctx.tb_ctx.tb_lock);

                /* cpu_interrupt might be called while translating the
@@ -654,25 +646,25 @@ int cpu_exec(CPUArchState *env)
                        /* Instruction counter expired.  */
                        int insns_left;
                        tb = (TranslationBlock *)(next_tb & ~TB_EXIT_MASK);
-                        insns_left = cpu->icount_decr.u32;
-                        if (cpu->icount_extra && insns_left >= 0) {
+                        insns_left = env->icount_decr.u32;
+                        if (env->icount_extra && insns_left >= 0) {
                            /* Refill decrementer and continue execution.  */
-                            cpu->icount_extra += insns_left;
-                            if (cpu->icount_extra > 0xffff) {
+                            env->icount_extra += insns_left;
+                            if (env->icount_extra > 0xffff) {
                                insns_left = 0xffff;
                            } else {
-                                insns_left = cpu->icount_extra;
+                                insns_left = env->icount_extra;
                            }
-                            cpu->icount_extra -= insns_left;
-                            cpu->icount_decr.u16.low = insns_left;
+                            env->icount_extra -= insns_left;
+                            env->icount_decr.u16.low = insns_left;
                        } else {
                            if (insns_left > 0) {
                                /* Execute remaining instructions.  */
                                cpu_exec_nocache(env, insns_left, tb);
                            }
-                            cpu->exception_index = EXCP_INTERRUPT;
+                            env->exception_index = EXCP_INTERRUPT;
                            next_tb = 0;
-                            cpu_loop_exit(cpu);
+                            cpu_loop_exit(env);
                        }
                        break;
                    }
@@ -689,17 +681,6 @@ int cpu_exec(CPUArchState *env)
             * local variables as longjmp is marked 'noreturn'. */
            cpu = current_cpu;
            env = cpu->env_ptr;
-#if !(defined(CONFIG_USER_ONLY) && \
-      (defined(TARGET_M68K) || defined(TARGET_PPC) || defined(TARGET_S390X)))
-            cc = CPU_GET_CLASS(cpu);
-#endif
-#ifdef TARGET_I386
-            x86_cpu = X86_CPU(cpu);
-#endif
-            if (have_tb_lock) {
-                spin_unlock(&tcg_ctx.tb_ctx.tb_lock);
-                have_tb_lock = false;
-            }
        }
    } /* for(;;) */

--- a/cpus.c
+++ b/cpus.c
@@ -37,7 +37,6 @@
 #include "sysemu/qtest.h"
 #include "qemu/main-loop.h"
 #include "qemu/bitmap.h"
-#include "qemu/seqlock.h"

 #ifndef _WIN32
 #include "qemu/compatfd.h"
@@ -63,20 +62,15 @@

 static CPUState *next_cpu;

-bool cpu_is_stopped(CPUState *cpu)
-{
-    return cpu->stopped || !runstate_is_running();
-}
-
 static bool cpu_thread_is_idle(CPUState *cpu)
 {
    if (cpu->stop || cpu->queued_work_first) {
        return false;
    }
-    if (cpu_is_stopped(cpu)) {
+    if (cpu->stopped || !runstate_is_running()) {
        return true;
    }
-    if (!cpu->halted || cpu_has_work(cpu) ||
+    if (!cpu->halted || qemu_cpu_has_work(cpu) ||
        kvm_halt_in_kernel()) {
        return false;
    }
@@ -87,7 +81,7 @@ static bool all_cpu_threads_idle(void)
 {
    CPUState *cpu;

-    CPU_FOREACH(cpu) {
+    for (cpu = first_cpu; cpu != NULL; cpu = cpu->next_cpu) {
        if (!cpu_thread_is_idle(cpu)) {
            return false;
        }
@@ -98,32 +92,21 @@ static bool all_cpu_threads_idle(void)
 /***********************************************************/
 /* guest cycle counter */

-/* Protected by TimersState seqlock */
-
-/* Compensate for varying guest execution speed.  */
-static int64_t qemu_icount_bias;
-static int64_t vm_clock_warp_start;
 /* Conversion factor from emulated instructions to virtual clock ticks.  */
 static int icount_time_shift;
 /* Arbitrarily pick 1MIPS as the minimum allowable speed.  */
 #define MAX_ICOUNT_SHIFT 10
-
-/* Only written by TCG thread */
-static int64_t qemu_icount;
-
+/* Compensate for varying guest execution speed.  */
+static int64_t qemu_icount_bias;
 static QEMUTimer *icount_rt_timer;
 static QEMUTimer *icount_vm_timer;
 static QEMUTimer *icount_warp_timer;
+static int64_t vm_clock_warp_start;
+static int64_t qemu_icount;

 typedef struct TimersState {
-    /* Protected by BQL.  */
    int64_t cpu_ticks_prev;
    int64_t cpu_ticks_offset;
-
-    /* cpu_clock_offset can be read out of BQL, so protect it with
-     * this lock.
-     */
-    QemuSeqLock vm_clock_seqlock;
    int64_t cpu_clock_offset;
    int32_t cpu_ticks_enabled;
    int64_t dummy;
@@ -132,115 +115,74 @@ typedef struct TimersState {
 static TimersState timers_state;

 /* Return the virtual CPU time, based on the instruction counter.  */
-static int64_t cpu_get_icount_locked(void)
+int64_t cpu_get_icount(void)
 {
    int64_t icount;
    CPUState *cpu = current_cpu;

    icount = qemu_icount;
    if (cpu) {
-        if (!cpu_can_do_io(cpu)) {
+        CPUArchState *env = cpu->env_ptr;
+        if (!can_do_io(env)) {
            fprintf(stderr, "Bad clock read\n");
        }
-        icount -= (cpu->icount_decr.u16.low + cpu->icount_extra);
+        icount -= (env->icount_decr.u16.low + env->icount_extra);
    }
    return qemu_icount_bias + (icount << icount_time_shift);
 }

-int64_t cpu_get_icount(void)
-{
-    int64_t icount;
-    unsigned start;
-
-    do {
-        start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
-        icount = cpu_get_icount_locked();
-    } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
-
-    return icount;
-}
-
 /* return the host CPU cycle counter and handle stop/restart */
-/* Caller must hold the BQL */
 int64_t cpu_get_ticks(void)
 {
-    int64_t ticks;
-
    if (use_icount) {
        return cpu_get_icount();
    }
-
-    ticks = timers_state.cpu_ticks_offset;
-    if (timers_state.cpu_ticks_enabled) {
-        ticks += cpu_get_real_ticks();
+    if (!timers_state.cpu_ticks_enabled) {
+        return timers_state.cpu_ticks_offset;
+    } else {
+        int64_t ticks;
+        ticks = cpu_get_real_ticks();
+        if (timers_state.cpu_ticks_prev > ticks) {
+            /* Note: non increasing ticks may happen if the host uses
+               software suspend */
+            timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
+        }
+        timers_state.cpu_ticks_prev = ticks;
+        return ticks + timers_state.cpu_ticks_offset;
    }
-
-    if (timers_state.cpu_ticks_prev > ticks) {
-        /* Note: non increasing ticks may happen if the host uses
-           software suspend */
-        timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
-        ticks = timers_state.cpu_ticks_prev;
-    }
-
-    timers_state.cpu_ticks_prev = ticks;
-    return ticks;
-}
-
-static int64_t cpu_get_clock_locked(void)
-{
-    int64_t ticks;
-
-    ticks = timers_state.cpu_clock_offset;
-    if (timers_state.cpu_ticks_enabled) {
-        ticks += get_clock();
-    }
-
-    return ticks;
 }

 /* return the host CPU monotonic timer and handle stop/restart */
 int64_t cpu_get_clock(void)
 {
    int64_t ti;
-    unsigned start;
-
-    do {
-        start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
-        ti = cpu_get_clock_locked();
-    } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
-
-    return ti;
+    if (!timers_state.cpu_ticks_enabled) {
+        return timers_state.cpu_clock_offset;
+    } else {
+        ti = get_clock();
+        return ti + timers_state.cpu_clock_offset;
+    }
 }

-/* enable cpu_get_ticks()
- * Caller must hold BQL which server as mutex for vm_clock_seqlock.
- */
+/* enable cpu_get_ticks() */
 void cpu_enable_ticks(void)
 {
-    /* Here, the really thing protected by seqlock is cpu_clock_offset. */
-    seqlock_write_lock(&timers_state.vm_clock_seqlock);
    if (!timers_state.cpu_ticks_enabled) {
        timers_state.cpu_ticks_offset -= cpu_get_real_ticks();
        timers_state.cpu_clock_offset -= get_clock();
        timers_state.cpu_ticks_enabled = 1;
    }
-    seqlock_write_unlock(&timers_state.vm_clock_seqlock);
 }

 /* disable cpu_get_ticks() : the clock is stopped. You must not call
- * cpu_get_ticks() after that.
- * Caller must hold BQL which server as mutex for vm_clock_seqlock.
- */
+   cpu_get_ticks() after that.  */
 void cpu_disable_ticks(void)
 {
-    /* Here, the really thing protected by seqlock is cpu_clock_offset. */
-    seqlock_write_lock(&timers_state.vm_clock_seqlock);
    if (timers_state.cpu_ticks_enabled) {
-        timers_state.cpu_ticks_offset += cpu_get_real_ticks();
-        timers_state.cpu_clock_offset = cpu_get_clock_locked();
+        timers_state.cpu_ticks_offset = cpu_get_ticks();
+        timers_state.cpu_clock_offset = cpu_get_clock();
        timers_state.cpu_ticks_enabled = 0;
    }
-    seqlock_write_unlock(&timers_state.vm_clock_seqlock);
 }

 /* Correlation between real and virtual time is always going to be
@@ -254,19 +196,13 @@ static void icount_adjust(void)
    int64_t cur_time;
    int64_t cur_icount;
    int64_t delta;
-
-    /* Protected by TimersState mutex.  */
    static int64_t last_delta;
-
    /* If the VM is not running, then do nothing.  */
    if (!runstate_is_running()) {
        return;
    }
-
-    seqlock_write_lock(&timers_state.vm_clock_seqlock);
-    cur_time = cpu_get_clock_locked();
-    cur_icount = cpu_get_icount_locked();
-
+    cur_time = cpu_get_clock();
+    cur_icount = qemu_get_clock_ns(vm_clock);
    delta = cur_icount - cur_time;
    /* FIXME: This is a very crude algorithm, somewhat prone to oscillation.  */
    if (delta > 0
@@ -283,21 +219,19 @@ static void icount_adjust(void)
    }
    last_delta = delta;
    qemu_icount_bias = cur_icount - (qemu_icount << icount_time_shift);
-    seqlock_write_unlock(&timers_state.vm_clock_seqlock);
 }

 static void icount_adjust_rt(void *opaque)
 {
-    timer_mod(icount_rt_timer,
-                   qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + 1000);
+    qemu_mod_timer(icount_rt_timer,
+                   qemu_get_clock_ms(rt_clock) + 1000);
    icount_adjust();
 }

 static void icount_adjust_vm(void *opaque)
 {
-    timer_mod(icount_vm_timer,
-                   qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
-                   get_ticks_per_sec() / 10);
+    qemu_mod_timer(icount_vm_timer,
+                   qemu_get_clock_ns(vm_clock) + get_ticks_per_sec() / 10);
    icount_adjust();
 }

@@ -308,59 +242,48 @@ static int64_t qemu_icount_round(int64_t count)

 static void icount_warp_rt(void *opaque)
 {
-    /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
-     * changes from -1 to another value, so the race here is okay.
-     */
-    if (atomic_read(&vm_clock_warp_start) == -1) {
+    if (vm_clock_warp_start == -1) {
        return;
    }

-    seqlock_write_lock(&timers_state.vm_clock_seqlock);
    if (runstate_is_running()) {
-        int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
-        int64_t warp_delta;
-
-        warp_delta = clock - vm_clock_warp_start;
-        if (use_icount == 2) {
+        int64_t clock = qemu_get_clock_ns(rt_clock);
+        int64_t warp_delta = clock - vm_clock_warp_start;
+        if (use_icount == 1) {
+            qemu_icount_bias += warp_delta;
+        } else {
            /*
-             * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
+             * In adaptive mode, do not let the vm_clock run too
             * far ahead of real time.
             */
-            int64_t cur_time = cpu_get_clock_locked();
-            int64_t cur_icount = cpu_get_icount_locked();
+            int64_t cur_time = cpu_get_clock();
+            int64_t cur_icount = qemu_get_clock_ns(vm_clock);
            int64_t delta = cur_time - cur_icount;
-            warp_delta = MIN(warp_delta, delta);
+            qemu_icount_bias += MIN(warp_delta, delta);
+        }
+        if (qemu_clock_expired(vm_clock)) {
+            qemu_notify_event();
        }
-        qemu_icount_bias += warp_delta;
    }
    vm_clock_warp_start = -1;
-    seqlock_write_unlock(&timers_state.vm_clock_seqlock);
-
-    if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
-        qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
-    }
 }

 void qtest_clock_warp(int64_t dest)
 {
-    int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+    int64_t clock = qemu_get_clock_ns(vm_clock);
    assert(qtest_enabled());
    while (clock < dest) {
-        int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
+        int64_t deadline = qemu_clock_deadline(vm_clock);
        int64_t warp = MIN(dest - clock, deadline);
-        seqlock_write_lock(&timers_state.vm_clock_seqlock);
        qemu_icount_bias += warp;
-        seqlock_write_unlock(&timers_state.vm_clock_seqlock);
-
-        qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
-        clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+        qemu_run_timers(vm_clock);
+        clock = qemu_get_clock_ns(vm_clock);
    }
-    qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
+    qemu_notify_event();
 }

-void qemu_clock_warp(QEMUClockType type)
+void qemu_clock_warp(QEMUClock *clock)
 {
-    int64_t clock;
    int64_t deadline;

    /*
@@ -368,20 +291,20 @@ void qemu_clock_warp(QEMUClockType type)
     * applicable to other clocks.  But a clock argument removes the
     * need for if statements all over the place.
     */
-    if (type != QEMU_CLOCK_VIRTUAL || !use_icount) {
+    if (clock != vm_clock || !use_icount) {
        return;
    }

    /*
-     * If the CPUs have been sleeping, advance QEMU_CLOCK_VIRTUAL timer now.
-     * This ensures that the deadline for the timer is computed correctly below.
+     * If the CPUs have been sleeping, advance the vm_clock timer now.  This
+     * ensures that the deadline for the timer is computed correctly below.
     * This also makes sure that the insn counter is synchronized before the
     * CPU starts running, in case the CPU is woken by an event other than
-     * the earliest QEMU_CLOCK_VIRTUAL timer.
+     * the earliest vm_clock timer.
     */
    icount_warp_rt(NULL);
-    timer_del(icount_warp_timer);
-    if (!all_cpu_threads_idle()) {
+    if (!all_cpu_threads_idle() || !qemu_clock_has_timers(vm_clock)) {
+        qemu_del_timer(icount_warp_timer);
        return;
    }

@@ -390,39 +313,28 @@ void qemu_clock_warp(QEMUClockType type)
 	return;
    }

-    /* We want to use the earliest deadline from ALL vm_clocks */
-    clock = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
-    deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
-    if (deadline < 0) {
-        return;
-    }
-
+    vm_clock_warp_start = qemu_get_clock_ns(rt_clock);
+    deadline = qemu_clock_deadline(vm_clock);
    if (deadline > 0) {
        /*
-         * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
+         * Ensure the vm_clock proceeds even when the virtual CPU goes to
         * sleep.  Otherwise, the CPU might be waiting for a future timer
         * interrupt to wake it up, but the interrupt never comes because
         * the vCPU isn't running any insns and thus doesn't advance the
-         * QEMU_CLOCK_VIRTUAL.
+         * vm_clock.
         *
         * An extreme solution for this problem would be to never let VCPUs
-         * sleep in icount mode if there is a pending QEMU_CLOCK_VIRTUAL
-         * timer; rather time could just advance to the next QEMU_CLOCK_VIRTUAL
-         * event.  Instead, we do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL
-         * after some e"real" time, (related to the time left until the next
-         * event) has passed. The QEMU_CLOCK_REALTIME timer will do this.
-         * This avoids that the warps are visible externally; for example,
-         * you will not be sending network packets continuously instead of
-         * every 100ms.
+         * sleep in icount mode if there is a pending vm_clock timer; rather
+         * time could just advance to the next vm_clock event.  Instead, we
+         * do stop VCPUs and only advance vm_clock after some "real" time,
+         * (related to the time left until the next event) has passed.  This
+         * rt_clock timer will do this.  This avoids that the warps are too
+         * visible externally---for example, you will not be sending network
+         * packets continuously instead of every 100ms.
         */
-        seqlock_write_lock(&timers_state.vm_clock_seqlock);
-        if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) {
-            vm_clock_warp_start = clock;
-        }
-        seqlock_write_unlock(&timers_state.vm_clock_seqlock);
-        timer_mod_anticipate(icount_warp_timer, clock + deadline);
-    } else if (deadline == 0) {
-        qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
+        qemu_mod_timer(icount_warp_timer, vm_clock_warp_start + deadline);
+    } else {
+        qemu_notify_event();
    }
 }

@@ -441,14 +353,12 @@ static const VMStateDescription vmstate_timers = {

 void configure_icount(const char *option)
 {
-    seqlock_init(&timers_state.vm_clock_seqlock, NULL);
    vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
    if (!option) {
        return;
    }

-    icount_warp_timer = timer_new_ns(QEMU_CLOCK_REALTIME,
-                                          icount_warp_rt, NULL);
+    icount_warp_timer = qemu_new_timer_ns(rt_clock, icount_warp_rt, NULL);
    if (strcmp(option, "auto") != 0) {
        icount_time_shift = strtol(option, NULL, 0);
        use_icount = 1;
@@ -466,15 +376,12 @@ void configure_icount(const char *option)
       the virtual time trigger catches emulated time passing too fast.
       Realtime triggers occur even when idle, so use them less frequently
       than VM triggers.  */
-    icount_rt_timer = timer_new_ms(QEMU_CLOCK_REALTIME,
-                                        icount_adjust_rt, NULL);
-    timer_mod(icount_rt_timer,
-                   qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + 1000);
-    icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
-                                        icount_adjust_vm, NULL);
-    timer_mod(icount_vm_timer,
-                   qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
-                   get_ticks_per_sec() / 10);
+    icount_rt_timer = qemu_new_timer_ms(rt_clock, icount_adjust_rt, NULL);
+    qemu_mod_timer(icount_rt_timer,
+                   qemu_get_clock_ms(rt_clock) + 1000);
+    icount_vm_timer = qemu_new_timer_ns(vm_clock, icount_adjust_vm, NULL);
+    qemu_mod_timer(icount_vm_timer,
+                   qemu_get_clock_ns(vm_clock) + get_ticks_per_sec() / 10);
 }

 /***********************************************************/
@@ -487,7 +394,7 @@ void hw_error(const char *fmt, ...)
    fprintf(stderr, "qemu: hardware error: ");
    vfprintf(stderr, fmt, ap);
    fprintf(stderr, "\n");
-    CPU_FOREACH(cpu) {
+    for (cpu = first_cpu; cpu != NULL; cpu = cpu->next_cpu) {
        fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
        cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
    }
@@ -499,7 +406,7 @@ void cpu_synchronize_all_states(void)
 {
    CPUState *cpu;

-    CPU_FOREACH(cpu) {
+    for (cpu = first_cpu; cpu; cpu = cpu->next_cpu) {
        cpu_synchronize_state(cpu);
    }
 }
@@ -508,7 +415,7 @@ void cpu_synchronize_all_post_reset(void)
 {
    CPUState *cpu;

-    CPU_FOREACH(cpu) {
+    for (cpu = first_cpu; cpu; cpu = cpu->next_cpu) {
        cpu_synchronize_post_reset(cpu);
    }
 }
@@ -517,11 +424,16 @@ void cpu_synchronize_all_post_init(void)
 {
    CPUState *cpu;

-    CPU_FOREACH(cpu) {
+    for (cpu = first_cpu; cpu; cpu = cpu->next_cpu) {
        cpu_synchronize_post_init(cpu);
    }
 }

+bool cpu_is_stopped(CPUState *cpu)
+{
+    return !runstate_is_running() || cpu->stopped;
+}
+
 static int do_vm_stop(RunState state)
 {
    int ret = 0;
@@ -545,7 +457,7 @@ static bool cpu_can_run(CPUState *cpu)
    if (cpu->stop) {
        return false;
    }
-    if (cpu_is_stopped(cpu)) {
+    if (cpu->stopped || !runstate_is_running()) {
        return false;
    }
    return true;
@@ -823,7 +735,7 @@ static void qemu_tcg_wait_io_event(void)
    while (all_cpu_threads_idle()) {
       /* Start accounting real time to the virtual clock if the CPUs
          are idle.  */
-        qemu_clock_warp(QEMU_CLOCK_VIRTUAL);
+        qemu_clock_warp(vm_clock);
        qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
    }

@@ -831,7 +743,7 @@ static void qemu_tcg_wait_io_event(void)
        qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex);
    }

-    CPU_FOREACH(cpu) {
+    for (cpu = first_cpu; cpu != NULL; cpu = cpu->next_cpu) {
        qemu_wait_io_event_common(cpu);
    }
 }
@@ -925,6 +837,12 @@ static void *qemu_dummy_cpu_thread_fn(void *arg)

 static void tcg_exec_all(void);

+static void tcg_signal_cpu_creation(CPUState *cpu, void *data)
+{
+    cpu->thread_id = qemu_get_thread_id();
+    cpu->created = true;
+}
+
 static void *qemu_tcg_cpu_thread_fn(void *arg)
 {
    CPUState *cpu = arg;
@@ -933,31 +851,23 @@ static void *qemu_tcg_cpu_thread_fn(void *arg)
    qemu_thread_get_self(cpu->thread);

    qemu_mutex_lock(&qemu_global_mutex);
-    CPU_FOREACH(cpu) {
-        cpu->thread_id = qemu_get_thread_id();
-        cpu->created = true;
-    }
+    qemu_for_each_cpu(tcg_signal_cpu_creation, NULL);
    qemu_cond_signal(&qemu_cpu_cond);

    /* wait for initial kick-off after machine start */
-    while (QTAILQ_FIRST(&cpus)->stopped) {
+    while (first_cpu->stopped) {
        qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);

        /* process any pending work */
-        CPU_FOREACH(cpu) {
+        for (cpu = first_cpu; cpu != NULL; cpu = cpu->next_cpu) {
            qemu_wait_io_event_common(cpu);
        }
    }

    while (1) {
        tcg_exec_all();
-
-        if (use_icount) {
-            int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
-
-            if (deadline == 0) {
-                qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
-            }
+        if (use_icount && qemu_clock_deadline(vm_clock) <= 0) {
+            qemu_notify_event();
        }
        qemu_tcg_wait_io_event();
    }
@@ -1059,12 +969,13 @@ void qemu_mutex_unlock_iothread(void)

 static int all_vcpus_paused(void)
 {
-    CPUState *cpu;
+    CPUState *cpu = first_cpu;

-    CPU_FOREACH(cpu) {
+    while (cpu) {
        if (!cpu->stopped) {
            return 0;
        }
+        cpu = cpu->next_cpu;
    }

    return 1;
@@ -1072,20 +983,23 @@ static int all_vcpus_paused(void)

 void pause_all_vcpus(void)
 {
-    CPUState *cpu;
+    CPUState *cpu = first_cpu;

-    qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
-    CPU_FOREACH(cpu) {
+    qemu_clock_enable(vm_clock, false);
+    while (cpu) {
        cpu->stop = true;
        qemu_cpu_kick(cpu);
+        cpu = cpu->next_cpu;
    }

    if (qemu_in_vcpu_thread()) {
        cpu_stop_current();
        if (!kvm_enabled()) {
-            CPU_FOREACH(cpu) {
+            cpu = first_cpu;
+            while (cpu) {
                cpu->stop = false;
                cpu->stopped = true;
+                cpu = cpu->next_cpu;
            }
            return;
        }
@@ -1093,8 +1007,10 @@ void pause_all_vcpus(void)

    while (!all_vcpus_paused()) {
        qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
-        CPU_FOREACH(cpu) {
+        cpu = first_cpu;
+        while (cpu) {
            qemu_cpu_kick(cpu);
+            cpu = cpu->next_cpu;
        }
    }
 }
@@ -1108,33 +1024,25 @@ void cpu_resume(CPUState *cpu)

 void resume_all_vcpus(void)
 {
-    CPUState *cpu;
+    CPUState *cpu = first_cpu;

-    qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
-    CPU_FOREACH(cpu) {
+    qemu_clock_enable(vm_clock, true);
+    while (cpu) {
        cpu_resume(cpu);
+        cpu = cpu->next_cpu;
    }
 }

-/* For temporary buffers for forming a name */
-#define VCPU_THREAD_NAME_SIZE 16
-
 static void qemu_tcg_init_vcpu(CPUState *cpu)
 {
-    char thread_name[VCPU_THREAD_NAME_SIZE];
-
-    tcg_cpu_address_space_init(cpu, cpu->as);
-
    /* share a single thread for all cpus with TCG */
    if (!tcg_cpu_thread) {
        cpu->thread = g_malloc0(sizeof(QemuThread));
        cpu->halt_cond = g_malloc0(sizeof(QemuCond));
        qemu_cond_init(cpu->halt_cond);
        tcg_halt_cond = cpu->halt_cond;
-        snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
-                 cpu->cpu_index);
-        qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
-                           cpu, QEMU_THREAD_JOINABLE);
+        qemu_thread_create(cpu->thread, qemu_tcg_cpu_thread_fn, cpu,
+                           QEMU_THREAD_JOINABLE);
 #ifdef _WIN32
        cpu->hThread = qemu_thread_get_handle(cpu->thread);
 #endif
@@ -1150,15 +1058,11 @@ static void qemu_tcg_init_vcpu(CPUState *cpu)

 static void qemu_kvm_start_vcpu(CPUState *cpu)
 {
-    char thread_name[VCPU_THREAD_NAME_SIZE];
-
    cpu->thread = g_malloc0(sizeof(QemuThread));
    cpu->halt_cond = g_malloc0(sizeof(QemuCond));
    qemu_cond_init(cpu->halt_cond);
-    snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
-             cpu->cpu_index);
-    qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
-                       cpu, QEMU_THREAD_JOINABLE);
+    qemu_thread_create(cpu->thread, qemu_kvm_cpu_thread_fn, cpu,
+                       QEMU_THREAD_JOINABLE);
    while (!cpu->created) {
        qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
    }
@@ -1166,14 +1070,10 @@ static void qemu_kvm_start_vcpu(CPUState *cpu)

 static void qemu_dummy_start_vcpu(CPUState *cpu)
 {
-    char thread_name[VCPU_THREAD_NAME_SIZE];
-
    cpu->thread = g_malloc0(sizeof(QemuThread));
    cpu->halt_cond = g_malloc0(sizeof(QemuCond));
    qemu_cond_init(cpu->halt_cond);
-    snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
-             cpu->cpu_index);
-    qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
+    qemu_thread_create(cpu->thread, qemu_dummy_cpu_thread_fn, cpu,
                       QEMU_THREAD_JOINABLE);
    while (!cpu->created) {
        qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
@@ -1235,7 +1135,6 @@ int vm_stop_force_state(RunState state)

 static int tcg_cpu_exec(CPUArchState *env)
 {
-    CPUState *cpu = ENV_GET_CPU(env);
    int ret;
 #ifdef CONFIG_PROFILER
    int64_t ti;
@@ -1246,28 +1145,16 @@ static int tcg_cpu_exec(CPUArchState *env)
 #endif
    if (use_icount) {
        int64_t count;
-        int64_t deadline;
        int decr;
-        qemu_icount -= (cpu->icount_decr.u16.low + cpu->icount_extra);
-        cpu->icount_decr.u16.low = 0;
-        cpu->icount_extra = 0;
-        deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
-
-        /* Maintain prior (possibly buggy) behaviour where if no deadline
-         * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
-         * INT32_MAX nanoseconds ahead, we still use INT32_MAX
-         * nanoseconds.
-         */
-        if ((deadline < 0) || (deadline > INT32_MAX)) {
-            deadline = INT32_MAX;
-        }
-
-        count = qemu_icount_round(deadline);
+        qemu_icount -= (env->icount_decr.u16.low + env->icount_extra);
+        env->icount_decr.u16.low = 0;
+        env->icount_extra = 0;
+        count = qemu_icount_round(qemu_clock_deadline(vm_clock));
        qemu_icount += count;
        decr = (count > 0xffff) ? 0xffff : count;
        count -= decr;
-        cpu->icount_decr.u16.low = decr;
-        cpu->icount_extra = count;
+        env->icount_decr.u16.low = decr;
+        env->icount_extra = count;
    }
    ret = cpu_exec(env);
 #ifdef CONFIG_PROFILER
@@ -1276,9 +1163,10 @@ static int tcg_cpu_exec(CPUArchState *env)
    if (use_icount) {
        /* Fold pending instructions back into the
           instruction counter, and clear the interrupt flag.  */
-        qemu_icount -= (cpu->icount_decr.u16.low + cpu->icount_extra);
-        cpu->icount_decr.u32 = 0;
-        cpu->icount_extra = 0;
+        qemu_icount -= (env->icount_decr.u16.low
+                        + env->icount_extra);
+        env->icount_decr.u32 = 0;
+        env->icount_extra = 0;
    }
    return ret;
 }
@@ -1287,17 +1175,17 @@ static void tcg_exec_all(void)
 {
    int r;

-    /* Account partial waits to QEMU_CLOCK_VIRTUAL.  */
-    qemu_clock_warp(QEMU_CLOCK_VIRTUAL);
+    /* Account partial waits to the vm_clock.  */
+    qemu_clock_warp(vm_clock);

    if (next_cpu == NULL) {
        next_cpu = first_cpu;
    }
-    for (; next_cpu != NULL && !exit_request; next_cpu = CPU_NEXT(next_cpu)) {
+    for (; next_cpu != NULL && !exit_request; next_cpu = next_cpu->next_cpu) {
        CPUState *cpu = next_cpu;
        CPUArchState *env = cpu->env_ptr;

-        qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
+        qemu_clock_enable(vm_clock,
                          (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);

        if (cpu_can_run(cpu)) {
@@ -1318,7 +1206,7 @@ void set_numa_modes(void)
    CPUState *cpu;
    int i;

-    CPU_FOREACH(cpu) {
+    for (cpu = first_cpu; cpu != NULL; cpu = cpu->next_cpu) {
        for (i = 0; i < nb_numa_nodes; i++) {
            if (test_bit(cpu->cpu_index, node_cpumask[i])) {
                cpu->numa_node = i;
@@ -1340,7 +1228,7 @@ CpuInfoList *qmp_query_cpus(Error **errp)
    CpuInfoList *head = NULL, *cur_item = NULL;
    CPUState *cpu;

-    CPU_FOREACH(cpu) {
+    for (cpu = first_cpu; cpu != NULL; cpu = cpu->next_cpu) {
        CpuInfoList *info;
 #if defined(TARGET_I386)
        X86CPU *x86_cpu = X86_CPU(cpu);
@@ -1421,10 +1309,7 @@ void qmp_memsave(int64_t addr, int64_t size, const char *filename,
        l = sizeof(buf);
        if (l > size)
            l = size;
-        if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
-            error_setg(errp, "Invalid addr 0x%016" PRIx64 "specified", addr);
-            goto exit;
-        }
+        cpu_memory_rw_debug(cpu, addr, buf, l, 0);
        if (fwrite(buf, 1, l, f) != l) {
            error_set(errp, QERR_IO_ERROR);
            goto exit;
@@ -1472,27 +1357,14 @@ void qmp_inject_nmi(Error **errp)
 #if defined(TARGET_I386)
    CPUState *cs;

-    CPU_FOREACH(cs) {
+    for (cs = first_cpu; cs != NULL; cs = cs->next_cpu) {
        X86CPU *cpu = X86_CPU(cs);
+        CPUX86State *env = &cpu->env;

-        if (!cpu->apic_state) {
+        if (!env->apic_state) {
            cpu_interrupt(cs, CPU_INTERRUPT_NMI);
        } else {
-            apic_deliver_nmi(cpu->apic_state);
-        }
-    }
-#elif defined(TARGET_S390X)
-    CPUState *cs;
-    S390CPU *cpu;
-
-    CPU_FOREACH(cs) {
-        cpu = S390_CPU(cs);
-        if (cpu->env.cpu_num == monitor_get_cpu_index()) {
-            if (s390_cpu_restart(S390_CPU(cs)) == -1) {
-                error_set(errp, QERR_UNSUPPORTED);
-                return;
-            }
-            break;
+            apic_deliver_nmi(env->apic_state);
        }
    }
 #else
--- a/cputlb.c
+++ b/cputlb.c
@@ -26,7 +26,6 @@
 #include "exec/cputlb.h"

 #include "exec/memory-internal.h"
-#include "exec/ram_addr.h"

 //#define DEBUG_TLB
 //#define DEBUG_TLB_CHECK
@@ -34,6 +33,13 @@
 /* statistics */
 int tlb_flush_count;

+static const CPUTLBEntry s_cputlb_empty_entry = {
+    .addr_read  = -1,
+    .addr_write = -1,
+    .addr_code  = -1,
+    .addend     = -1,
+};
+
 /* NOTE:
 * If flush_global is true (the usual case), flush all tlb entries.
 * If flush_global is false, flush (at least) all tlb entries not
@@ -46,9 +52,10 @@ int tlb_flush_count;
 * entries from the TLB at any time, so flushing more entries than
 * required is only an efficiency issue, not a correctness issue.
 */
-void tlb_flush(CPUState *cpu, int flush_global)
+void tlb_flush(CPUArchState *env, int flush_global)
 {
-    CPUArchState *env = cpu->env_ptr;
+    CPUState *cpu = ENV_GET_CPU(env);
+    int i;

 #if defined(DEBUG_TLB)
    printf("tlb_flush:\n");
@@ -57,8 +64,15 @@ void tlb_flush(CPUState *cpu, int flush_global)
       links while we are modifying them */
    cpu->current_tb = NULL;

-    memset(env->tlb_table, -1, sizeof(env->tlb_table));
-    memset(cpu->tb_jmp_cache, 0, sizeof(cpu->tb_jmp_cache));
+    for (i = 0; i < CPU_TLB_SIZE; i++) {
+        int mmu_idx;
+
+        for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
+            env->tlb_table[mmu_idx][i] = s_cputlb_empty_entry;
+        }
+    }
+
+    memset(env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));

    env->tlb_flush_addr = -1;
    env->tlb_flush_mask = 0;
@@ -73,13 +87,13 @@ static inline void tlb_flush_entry(CPUTLBEntry *tlb_entry, target_ulong addr)
                 (TARGET_PAGE_MASK | TLB_INVALID_MASK)) ||
        addr == (tlb_entry->addr_code &
                 (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
-        memset(tlb_entry, -1, sizeof(*tlb_entry));
+        *tlb_entry = s_cputlb_empty_entry;
    }
 }

-void tlb_flush_page(CPUState *cpu, target_ulong addr)
+void tlb_flush_page(CPUArchState *env, target_ulong addr)
 {
-    CPUArchState *env = cpu->env_ptr;
+    CPUState *cpu = ENV_GET_CPU(env);
    int i;
    int mmu_idx;

@@ -93,7 +107,7 @@ void tlb_flush_page(CPUState *cpu, target_ulong addr)
               TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
               env->tlb_flush_addr, env->tlb_flush_mask);
 #endif
-        tlb_flush(cpu, 1);
+        tlb_flush(env, 1);
        return;
    }
    /* must reset current TB so that interrupts cannot modify the
@@ -106,23 +120,24 @@ void tlb_flush_page(CPUState *cpu, target_ulong addr)
        tlb_flush_entry(&env->tlb_table[mmu_idx][i], addr);
    }

-    tb_flush_jmp_cache(cpu, addr);
+    tb_flush_jmp_cache(env, addr);
 }

 /* update the TLBs so that writes to code in the virtual page 'addr'
   can be detected */
 void tlb_protect_code(ram_addr_t ram_addr)
 {
-    cpu_physical_memory_reset_dirty(ram_addr, TARGET_PAGE_SIZE,
-                                    DIRTY_MEMORY_CODE);
+    cpu_physical_memory_reset_dirty(ram_addr,
+                                    ram_addr + TARGET_PAGE_SIZE,
+                                    CODE_DIRTY_FLAG);
 }

 /* update the TLB so that writes in physical page 'phys_addr' are no longer
   tested for self modifying code */
-void tlb_unprotect_code_phys(CPUState *cpu, ram_addr_t ram_addr,
+void tlb_unprotect_code_phys(CPUArchState *env, ram_addr_t ram_addr,
                             target_ulong vaddr)
 {
-    cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_CODE);
+    cpu_physical_memory_set_dirty_flags(ram_addr, CODE_DIRTY_FLAG);
 }

 static bool tlb_is_dirty_ram(CPUTLBEntry *tlbe)
@@ -154,12 +169,27 @@ static inline ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
    return ram_addr;
 }

+static inline void tlb_update_dirty(CPUTLBEntry *tlb_entry)
+{
+    ram_addr_t ram_addr;
+    void *p;
+
+    if (tlb_is_dirty_ram(tlb_entry)) {
+        p = (void *)(uintptr_t)((tlb_entry->addr_write & TARGET_PAGE_MASK)
+            + tlb_entry->addend);
+        ram_addr = qemu_ram_addr_from_host_nofail(p);
+        if (!cpu_physical_memory_is_dirty(ram_addr)) {
+            tlb_entry->addr_write |= TLB_NOTDIRTY;
+        }
+    }
+}
+
 void cpu_tlb_reset_dirty_all(ram_addr_t start1, ram_addr_t length)
 {
    CPUState *cpu;
    CPUArchState *env;

-    CPU_FOREACH(cpu) {
+    for (cpu = first_cpu; cpu != NULL; cpu = cpu->next_cpu) {
        int mmu_idx;

        env = cpu->env_ptr;
@@ -221,11 +251,10 @@ static void tlb_add_large_page(CPUArchState *env, target_ulong vaddr,
 /* Add a new TLB entry. At most one entry for a given virtual address
   is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the
   supplied size is only used by tlb_flush_page.  */
-void tlb_set_page(CPUState *cpu, target_ulong vaddr,
+void tlb_set_page(CPUArchState *env, target_ulong vaddr,
                  hwaddr paddr, int prot,
                  int mmu_idx, target_ulong size)
 {
-    CPUArchState *env = cpu->env_ptr;
    MemoryRegionSection *section;
    unsigned int index;
    target_ulong address;
@@ -240,7 +269,7 @@ void tlb_set_page(CPUState *cpu, target_ulong vaddr,
    }

    sz = size;
-    section = address_space_translate_for_iotlb(cpu->as, paddr,
+    section = address_space_translate_for_iotlb(&address_space_memory, paddr,
                                                &xlat, &sz);
    assert(sz >= TARGET_PAGE_SIZE);

@@ -261,7 +290,7 @@ void tlb_set_page(CPUState *cpu, target_ulong vaddr,
    }

    code_address = address;
-    iotlb = memory_region_section_get_iotlb(cpu, section, vaddr, paddr, xlat,
+    iotlb = memory_region_section_get_iotlb(env, section, vaddr, paddr, xlat,
                                            prot, &address);

    index = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
@@ -285,8 +314,7 @@ void tlb_set_page(CPUState *cpu, target_ulong vaddr,
            /* Write access calls the I/O callback.  */
            te->addr_write = address | TLB_MMIO;
        } else if (memory_region_is_ram(section->mr)
-                   && cpu_physical_memory_is_clean(section->mr->ram_addr
-                                                   + xlat)) {
+                   && !cpu_physical_memory_is_dirty(section->mr->ram_addr + xlat)) {
            te->addr_write = address | TLB_NOTDIRTY;
        } else {
            te->addr_write = address;
@@ -306,7 +334,6 @@ tb_page_addr_t get_page_addr_code(CPUArchState *env1, target_ulong addr)
    int mmu_idx, page_index, pd;
    void *p;
    MemoryRegion *mr;
-    CPUState *cpu = ENV_GET_CPU(env1);

    page_index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
    mmu_idx = cpu_mmu_index(env1);
@@ -315,14 +342,15 @@ tb_page_addr_t get_page_addr_code(CPUArchState *env1, target_ulong addr)
        cpu_ldub_code(env1, addr);
    }
    pd = env1->iotlb[mmu_idx][page_index] & ~TARGET_PAGE_MASK;
-    mr = iotlb_to_region(cpu->as, pd);
+    mr = iotlb_to_region(pd);
    if (memory_region_is_unassigned(mr)) {
+        CPUState *cpu = ENV_GET_CPU(env1);
        CPUClass *cc = CPU_GET_CLASS(cpu);

        if (cc->do_unassigned_access) {
            cc->do_unassigned_access(cpu, addr, false, true, 0, 4);
        } else {
-            cpu_abort(cpu, "Trying to execute code outside RAM or ROM at 0x"
+            cpu_abort(env1, "Trying to execute code outside RAM or ROM at 0x"
                      TARGET_FMT_lx "\n", addr);
        }
    }
--- a/default-configs/aarch64-linux-user.mak
+++ b/default-configs/aarch64-linux-user.mak
@@ -1,3 +0,0 @@
-# Default configuration for aarch64-linux-user
-
-CONFIG_GDBSTUB_XML=y
--- a/default-configs/aarch64-softmmu.mak
+++ b/default-configs/aarch64-softmmu.mak
@@ -1,6 +0,0 @@
-# Default configuration for aarch64-softmmu
-
-# We support all the 32 bit boards so need all their config
-include arm-softmmu.mak
-
-# Currently no 64-bit specific config requirements
--- a/Show More
+++ b/Show More
@@ -1 +1 @@
 .7.92
 .6.2