9pfs: Fully restart unreclaim loop (CVE-2021-20181)

Git-commit: 89fbea8737 References: bsc#1182137 Depending on the client activity, the server can be asked to open a huge number of file descriptors and eventually hit RLIMIT_NOFILE. This is currently mitigated using a reclaim logic : the server closes the file descriptors of idle fids, based on the assumption that it will be able to re-open them later. This assumption doesn't hold of course if the client requests the file to be unlinked. In this case, we loop on the entire fid list and mark all related fids as unreclaimable (the reclaim logic will just ignore them) and, of course, we open or re-open their file descriptors if needed since we're about to unlink the file. This is the purpose of v9fs_mark_fids_unreclaim(). Since the actual opening of a file can cause the coroutine to yield, another client request could possibly add a new fid that we may want to mark as non-reclaimable as well. The loop is thus restarted if the re-open request was actually transmitted to the backend. This is achieved by keeping a reference on the first fid (head) before traversing the list. This is wrong in several ways: - a potential clunk request from the client could tear the first fid down and cause the reference to be stale. This leads to a use-after-free error that can be detected with ASAN, using a custom 9p client - fids are added at the head of the list : restarting from the previous head will always miss fids added by a some other potential request All these problems could be avoided if fids were being added at the end of the list. This can be achieved with a QSIMPLEQ, but this is probably too much change for a bug fix. For now let's keep it simple and just restart the loop from the current head. Fixes: CVE-2021-20181 Buglink: https://bugs.launchpad.net/qemu/+bug/1911666 Reported-by: Zero Day Initiative <zdi-disclosures@trendmicro.com> Reviewed-by: Christian Schoenebeck <qemu_oss@crudebyte.com> Reviewed-by: Stefano Stabellini <sstabellini@kernel.org> Message-Id: <161064025265.1838153.15185571283519390907.stgit@bahia.lan> Signed-off-by: Greg Kurz <groug@kaod.org> Signed-off-by: Bruce Rogers <brogers@suse.com>
usb: fix setup_len init (CVE-2020-14364)
2021-04-06 16:51:18 -06:00 · 2021-04-06 16:51:18 -06:00 · 2021-04-06 16:51:18 -06:00 · 2021-04-06 16:50:56 -06:00 · 2021-03-29 14:27:09 -06:00 · 2021-03-29 14:27:09 -06:00
2312 changed files with 167473 additions and 317391 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -1,74 +1,63 @@
-/config-devices.*
-/config-all-devices.*
-/config-all-disas.*
-/config-host.*
-/config-target.*
-/config.status
-/trace/generated-tracers.h
-/trace/generated-tracers.c
-/trace/generated-tracers-dtrace.h
-/trace/generated-tracers.dtrace
-/trace/generated-events.h
-/trace/generated-events.c
-/trace/generated-ust-provider.h
-/trace/generated-ust.c
-/libcacard/trace/generated-tracers.c
+config-devices.*
+config-all-devices.*
+config-all-disas.*
+config-host.*
+config-target.*
+trace/generated-tracers.h
+trace/generated-tracers.c
+trace/generated-tracers-dtrace.h
+trace/generated-tracers-dtrace.dtrace
+libcacard/trace/generated-tracers.c
 *-timestamp
-/*-softmmu
-/*-darwin-user
-/*-linux-user
-/*-bsd-user
+*-softmmu
+*-darwin-user
+*-linux-user
+*-bsd-user
 libdis*
 libuser
-/linux-headers/asm
-/qapi-generated
-/qapi-types.[ch]
-/qapi-visit.[ch]
-/qmp-commands.h
-/qmp-marshal.c
-/qemu-doc.html
-/qemu-tech.html
-/qemu-doc.info
-/qemu-tech.info
-/qemu.1
-/qemu.pod
-/qemu-img.1
-/qemu-img.pod
-/qemu-img
-/qemu-nbd
-/qemu-nbd.8
-/qemu-nbd.pod
-/qemu-options.def
-/qemu-options.texi
-/qemu-img-cmds.texi
-/qemu-img-cmds.h
-/qemu-io
-/qemu-ga
-/qemu-bridge-helper
-/qemu-monitor.texi
-/qmp-commands.txt
-/vscclient
-/test-bitops
-/test-coroutine
-/test-int128
-/test-opts-visitor
-/test-qmp-input-visitor
-/test-qmp-output-visitor
-/test-string-input-visitor
-/test-string-output-visitor
-/test-visitor-serialization
-/fsdev/virtfs-proxy-helper
-/fsdev/virtfs-proxy-helper.1
-/fsdev/virtfs-proxy-helper.pod
-/.gdbinit
+linux-headers/asm
+qapi-generated
+qapi-types.[ch]
+qapi-visit.[ch]
+qmp-commands.h
+qmp-marshal.c
+qemu-doc.html
+qemu-tech.html
+qemu-doc.info
+qemu-tech.info
+qemu.1
+qemu.pod
+qemu-img.1
+qemu-img.pod
+qemu-img
+qemu-nbd
+qemu-nbd.8
+qemu-nbd.pod
+qemu-options.def
+qemu-options.texi
+qemu-img-cmds.texi
+qemu-img-cmds.h
+qemu-io
+qemu-ga
+qemu-bridge-helper
+qemu-monitor.texi
+vscclient
+QMP/qmp-commands.txt
+test-coroutine
+test-qmp-input-visitor
+test-qmp-output-visitor
+test-string-input-visitor
+test-string-output-visitor
+test-visitor-serialization
+fsdev/virtfs-proxy-helper
+fsdev/virtfs-proxy-helper.1
+fsdev/virtfs-proxy-helper.pod
+.gdbinit
 *.a
 *.aux
 *.cp
 *.dvi
 *.exe
-*.dll
-*.so
-*.mo
 *.fn
 *.ky
 *.log
@@ -82,35 +71,29 @@ libuser
 *.tp
 *.vr
 *.d
-!/scripts/qemu-guest-agent/fsfreeze-hook.d
+!scripts/qemu-guest-agent/fsfreeze-hook.d
 *.o
 *.lo
 *.la
 *.pc
 .libs
-.sdk
 *.swp
 *.orig
 .pc
-*.gcda
-*.gcno
 patches
-/pc-bios/bios-pq/status
-/pc-bios/vgabios-pq/status
-/pc-bios/optionrom/linuxboot.asm
-/pc-bios/optionrom/linuxboot.bin
-/pc-bios/optionrom/linuxboot.raw
-/pc-bios/optionrom/linuxboot.img
-/pc-bios/optionrom/multiboot.asm
-/pc-bios/optionrom/multiboot.bin
-/pc-bios/optionrom/multiboot.raw
-/pc-bios/optionrom/multiboot.img
-/pc-bios/optionrom/kvmvapic.asm
-/pc-bios/optionrom/kvmvapic.bin
-/pc-bios/optionrom/kvmvapic.raw
-/pc-bios/optionrom/kvmvapic.img
-/pc-bios/s390-ccw/s390-ccw.elf
-/pc-bios/s390-ccw/s390-ccw.img
+pc-bios/bios-pq/status
+pc-bios/vgabios-pq/status
+pc-bios/optionrom/linuxboot.bin
+pc-bios/optionrom/linuxboot.raw
+pc-bios/optionrom/linuxboot.img
+pc-bios/optionrom/multiboot.bin
+pc-bios/optionrom/multiboot.raw
+pc-bios/optionrom/multiboot.img
+pc-bios/optionrom/kvmvapic.bin
+pc-bios/optionrom/kvmvapic.raw
+pc-bios/optionrom/kvmvapic.img
+pc-bios/s390-ccw/s390-ccw.elf
+pc-bios/s390-ccw/s390-ccw.img
 .stgit-*
 cscope.*
 tags
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,27 +1,24 @@
 [submodule "roms/vgabios"]
 	path = roms/vgabios
-	url = git://git.qemu-project.org/vgabios.git/
+	url = git://git.qemu.org/vgabios.git/
 [submodule "roms/seabios"]
 	path = roms/seabios
-	url = git://git.qemu-project.org/seabios.git/
+	url = git://git.qemu.org/seabios.git/
 [submodule "roms/SLOF"]
 	path = roms/SLOF
-	url = git://git.qemu-project.org/SLOF.git
+	url = git://git.qemu.org/SLOF.git
 [submodule "roms/ipxe"]
 	path = roms/ipxe
-	url = git://git.qemu-project.org/ipxe.git
+	url = git://git.qemu.org/ipxe.git
 [submodule "roms/openbios"]
 	path = roms/openbios
-	url = git://git.qemu-project.org/openbios.git
+	url = git://git.qemu.org/openbios.git
 [submodule "roms/qemu-palcode"]
 	path = roms/qemu-palcode
-	url = git://github.com/rth7680/qemu-palcode.git
+	url = git://repo.or.cz/qemu-palcode.git
 [submodule "roms/sgabios"]
 	path = roms/sgabios
-	url = git://git.qemu-project.org/sgabios.git
+	url = git://git.qemu.org/sgabios.git
 [submodule "pixman"]
 	path = pixman
 	url = git://anongit.freedesktop.org/pixman
-[submodule "dtc"]
-	path = dtc
-	url = git://git.qemu-project.org/dtc.git
--- a/.mailmap
+++ b/.mailmap
@@ -2,8 +2,7 @@
 # into proper addresses so that they are counted properly in git shortlog output.
 #
 Andrzej Zaborowski <balrogg@gmail.com> balrog <balrog@c046a42c-6fe2-441c-8c8c-71466251a162>
-Anthony Liguori <anthony@codemonkey.ws> aliguori <aliguori@c046a42c-6fe2-441c-8c8c-71466251a162>
-Anthony Liguori <anthony@codemonkey.ws> Anthony Liguori <aliguori@us.ibm.com>
+Anthony Liguori <aliguori@us.ibm.com> aliguori <aliguori@c046a42c-6fe2-441c-8c8c-71466251a162>
 Aurelien Jarno <aurelien@aurel32.net> aurel32 <aurel32@c046a42c-6fe2-441c-8c8c-71466251a162>
 Blue Swirl <blauwirbel@gmail.com> blueswir1 <blueswir1@c046a42c-6fe2-441c-8c8c-71466251a162>
 Edgar E. Iglesias <edgar.iglesias@gmail.com> edgar_igl <edgar_igl@c046a42c-6fe2-441c-8c8c-71466251a162>
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,72 +0,0 @@
-language: c
-python:
-  - "2.4"
-compiler:
-  - gcc
-  - clang
-env:
-  global:
-    - TEST_CMD="make check"
-    - EXTRA_CONFIG=""
-    # Development packages, EXTRA_PKGS saved for additional builds
-    - CORE_PKGS="libusb-1.0-0-dev libiscsi-dev librados-dev libncurses5-dev"
-    - NET_PKGS="libseccomp-dev libgnutls-dev libssh2-1-dev  libspice-server-dev libspice-protocol-dev libnss3-dev"
-    - GUI_PKGS="libgtk-3-dev libvte-2.90-dev libsdl1.2-dev libpng12-dev libpixman-1-dev"
-    - EXTRA_PKGS=""
-  matrix:
-  - TARGETS=alpha-softmmu,alpha-linux-user
-  - TARGETS=arm-softmmu,arm-linux-user
-  - TARGETS=aarch64-softmmu,aarch64-linux-user
-  - TARGETS=cris-softmmu
-  - TARGETS=i386-softmmu,x86_64-softmmu
-  - TARGETS=lm32-softmmu
-  - TARGETS=m68k-softmmu
-  - TARGETS=microblaze-softmmu,microblazeel-softmmu
-  - TARGETS=mips-softmmu,mips64-softmmu,mips64el-softmmu,mipsel-softmmu
-  - TARGETS=moxie-softmmu
-  - TARGETS=or32-softmmu,
-  - TARGETS=ppc-softmmu,ppc64-softmmu,ppcemb-softmmu
-  - TARGETS=s390x-softmmu
-  - TARGETS=sh4-softmmu,sh4eb-softmmu
-  - TARGETS=sparc-softmmu,sparc64-softmmu
-  - TARGETS=unicore32-softmmu
-  - TARGETS=xtensa-softmmu,xtensaeb-softmmu
-before_install:
-  - git submodule update --init --recursive
-  - sudo apt-get update -qq
-  - sudo apt-get install -qq ${CORE_PKGS} ${NET_PKGS} ${GUI_PKGS} ${EXTRA_PKGS}
-script: "./configure --target-list=${TARGETS} ${EXTRA_CONFIG} && make && ${TEST_CMD}"
-matrix:
-  # We manually include a number of additional build for non-standard bits
-  include:
-    # Debug related options
-    - env: TARGETS=i386-softmmu,x86_64-softmmu
-           EXTRA_CONFIG="--enable-debug"
-      compiler: gcc
-    - env: TARGETS=i386-softmmu,x86_64-softmmu
-           EXTRA_CONFIG="--enable-debug --enable-tcg-interpreter"
-      compiler: gcc
-    # Currently configure doesn't force --disable-pie
-    - env: TARGETS=i386-softmmu,x86_64-softmmu
-           EXTRA_CONFIG="--enable-gprof --enable-gcov --disable-pie"
-      compiler: gcc
-    - env: TARGETS=i386-softmmu,x86_64-softmmu
-           EXTRA_PKGS="sparse"
-           EXTRA_CONFIG="--enable-sparse"
-      compiler: gcc
-    # All the trace backends (apart from dtrace)
-    - env: TARGETS=i386-softmmu,x86_64-softmmu
-           EXTRA_CONFIG="--enable-trace-backend=stderr"
-      compiler: gcc
-    - env: TARGETS=i386-softmmu,x86_64-softmmu
-           EXTRA_CONFIG="--enable-trace-backend=simple"
-      compiler: gcc
-    - env: TARGETS=i386-softmmu,x86_64-softmmu
-           EXTRA_CONFIG="--enable-trace-backend=ftrace"
-           TEST_CMD=""
-      compiler: gcc
-    # This disabled make check for the ftrace backend which needs more setting up
-    # Currently broken on 12.04 due to mis-packaged liburcu and changed API, will be pulled.
-    #- env: TARGETS=i386-softmmu,x86_64-softmmu
-    #       EXTRA_PKGS="liblttng-ust-dev liburcu-dev"
-    #       EXTRA_CONFIG="--enable-trace-backend=ust"
--- a/2
+++ b/2
@@ -1,6 +1,6 @@
 This file documents changes for QEMU releases 0.12 and earlier.
 For changelog information for later releases, see
-http://wiki.qemu-project.org/ChangeLog or look at the git history for
+http://wiki.qemu.org/ChangeLog or look at the git history for
 more detailed information.


--- a/26
+++ b/26
@@ -40,23 +40,8 @@ speaking, the size of guest memory can always fit into ram_addr_t but
 it would not be correct to store an actual guest physical address in a
 ram_addr_t.

-For CPU virtual addresses there are several possible types.
-vaddr is the best type to use to hold a CPU virtual address in
-target-independent code. It is guaranteed to be large enough to hold a
-virtual address for any target, and it does not change size from target
-to target. It is always unsigned.
-target_ulong is a type the size of a virtual address on the CPU; this means
-it may be 32 or 64 bits depending on which target is being built. It should
-therefore be used only in target-specific code, and in some
-performance-critical built-per-target core code such as the TLB code.
-There is also a signed version, target_long.
-abi_ulong is for the *-user targets, and represents a type the size of
-'void *' in that target's ABI. (This may not be the same as the size of a
-full CPU virtual address in the case of target ABIs which use 32 bit pointers
-on 64 bit CPUs, like sparc32plus.) Definitions of structures that must match
-the target's ABI must use this type for anything that on the target is defined
-to be an 'unsigned long' or a pointer type.
-There is also a signed version, abi_long.
+Use target_ulong (or abi_ulong) for CPU virtual addresses, however
+devices should not need to use target_ulong.

 Of course, take all of the above with a grain of salt.  If you're about
 to use some system interface that requires a type like size_t, pid_t or
@@ -93,15 +78,16 @@ avoided.
 Use of the malloc/free/realloc/calloc/valloc/memalign/posix_memalign
 APIs is not allowed in the QEMU codebase. Instead of these routines,
 use the GLib memory allocation routines g_malloc/g_malloc0/g_new/
-g_new0/g_realloc/g_free or QEMU's qemu_memalign/qemu_blockalign/qemu_vfree
+g_new0/g_realloc/g_free or QEMU's qemu_vmalloc/qemu_memalign/qemu_vfree
 APIs.

 Please note that g_malloc will exit on allocation failure, so there
 is no need to test for failure (as you would have to with malloc).
 Calling g_malloc with a zero size is valid and will return NULL.

-Memory allocated by qemu_memalign or qemu_blockalign must be freed with
-qemu_vfree, since breaking this will cause problems on Win32.
+Memory allocated by qemu_vmalloc or qemu_memalign must be freed with
+qemu_vfree, since breaking this will cause problems on Win32 and user
+emulators.

 4. String manipulation

--- a/15
+++ b/15
@@ -1,21 +1,16 @@
 The following points clarify the QEMU license:

-1) QEMU as a whole is released under the GNU General Public License,
-version 2.
+1) QEMU as a whole is released under the GNU General Public License

 2) Parts of QEMU have specific licenses which are compatible with the
-GNU General Public License, version 2. Hence each source file contains
-its own licensing information.  Source files with no licensing information
-are released under the GNU General Public License, version 2 or (at your
-option) any later version.
+GNU General Public License. Hence each source file contains its own
+licensing information.

-As of July 2013, contributions under version 2 of the GNU General Public
-License (and no later version) are only accepted for the following files
-or directories: bsd-user/, linux-user/, hw/misc/vfio.c, hw/xen/xen_pt*.
+Many hardware device emulation sources are released under the BSD license.

 3) The Tiny Code Generator (TCG) is released under the BSD license
   (see license headers in files).

 4) QEMU is a trademark of Fabrice Bellard.

-Fabrice Bellard and the QEMU team
+Fabrice Bellard.
--- a/425
+++ b/425
@@ -50,7 +50,8 @@ Descriptions of section entries:

 General Project Administration
 ------------------------------
-M: Anthony Liguori <aliguori@amazon.com>
+M: Anthony Liguori <aliguori@us.ibm.com>
+M: Paul Brook <paul@codesourcery.com>

 Guest CPU cores (TCG):
 ----------------------
@@ -58,118 +59,87 @@ Alpha
 M: Richard Henderson <rth@twiddle.net>
 S: Maintained
 F: target-alpha/
-F: hw/alpha/

 ARM
+M: Paul Brook <paul@codesourcery.com>
 M: Peter Maydell <peter.maydell@linaro.org>
 S: Maintained
 F: target-arm/
-F: hw/arm/
-F: hw/cpu/a*mpcore.c

 CRIS
 M: Edgar E. Iglesias <edgar.iglesias@gmail.com>
 S: Maintained
 F: target-cris/
-F: hw/cris/

 LM32
 M: Michael Walle <michael@walle.cc>
 S: Maintained
 F: target-lm32/
-F: hw/lm32/
-F: hw/char/lm32_*

 M68K
-S: Orphan
+M: Paul Brook <paul@codesourcery.com>
+S: Odd Fixes
 F: target-m68k/
-F: hw/m68k/

 MicroBlaze
 M: Edgar E. Iglesias <edgar.iglesias@gmail.com>
 S: Maintained
 F: target-microblaze/
-F: hw/microblaze/

 MIPS
 M: Aurelien Jarno <aurelien@aurel32.net>
 S: Odd Fixes
 F: target-mips/
-F: hw/mips/
-
-Moxie
-M: Anthony Green <green@moxielogic.com>
-S: Maintained
-F: target-moxie/
-
-OpenRISC
-M: Jia Liu <proljc@gmail.com>
-S: Maintained
-F: target-openrisc/
-F: hw/openrisc/

 PowerPC
 M: Alexander Graf <agraf@suse.de>
 L: qemu-ppc@nongnu.org
 S: Maintained
 F: target-ppc/
-F: hw/ppc/

 S390
 M: Richard Henderson <rth@twiddle.net>
 M: Alexander Graf <agraf@suse.de>
 S: Maintained
 F: target-s390x/
-F: hw/s390x/

 SH4
 M: Aurelien Jarno <aurelien@aurel32.net>
 S: Odd Fixes
 F: target-sh4/
-F: hw/sh4/

 SPARC
 M: Blue Swirl <blauwirbel@gmail.com>
 S: Maintained
 F: target-sparc/
-F: hw/sparc/
-F: hw/sparc64/

 UniCore32
 M: Guan Xuetao <gxt@mprc.pku.edu.cn>
 S: Maintained
 F: target-unicore32/
-F: hw/unicore32/

 X86
 M: qemu-devel@nongnu.org
 S: Odd Fixes
 F: target-i386/
-F: hw/i386/

 Xtensa
 M: Max Filippov <jcmvbkbc@gmail.com>
 W: http://wiki.osll.spb.ru/doku.php?id=etc:users:jcmvbkbc:qemu-target-xtensa
 S: Maintained
 F: target-xtensa/
-F: hw/xtensa/

 Guest CPU Cores (KVM):
 ----------------------

 Overall
 M: Gleb Natapov <gleb@redhat.com>
-M: Paolo Bonzini <pbonzini@redhat.com>
+M: Marcelo Tosatti <mtosatti@redhat.com>
 L: kvm@vger.kernel.org
 S: Supported
 F: kvm-*
 F: */kvm.*

-ARM
-M: Peter Maydell <peter.maydell@linaro.org>
-S: Maintained
-F: target-arm/kvm.c
-
 PPC
 M: Alexander Graf <agraf@suse.de>
 S: Maintained
@@ -219,171 +189,162 @@ F: *win32*

 ARM Machines
 ------------
-Allwinner-a10
-M: Li Guang <lig.fnst@cn.fujitsu.com>
-S: Maintained
-F: hw/*/allwinner-a10*
-F: include/hw/*/allwinner-a10*
-F: hw/arm/cubieboard.c
-
 Exynos
 M: Evgeny Voevodin <e.voevodin@samsung.com>
 M: Maksim Kozlov <m.kozlov@samsung.com>
-M: Igor Mitsyanko <i.mitsyanko@gmail.com>
+M: Igor Mitsyanko <i.mitsyanko@samsung.com>
 M: Dmitry Solodkiy <d.solodkiy@samsung.com>
 S: Maintained
-F: hw/*/exynos*
+F: hw/exynos*

 Calxeda Highbank
 M: Mark Langsdorf <mark.langsdorf@calxeda.com>
 S: Supported
-F: hw/arm/highbank.c
-F: hw/net/xgmac.c
-
-Canon DIGIC
-M: Antony Pavlov <antonynpavlov@gmail.com>
-S: Maintained
-F: include/hw/arm/digic.h
-F: hw/*/digic*
+F: hw/highbank.c
+F: hw/xgmac.c

 Gumstix
 M: qemu-devel@nongnu.org
 S: Orphan
-F: hw/arm/gumstix.c
+F: hw/gumstix.c

 i.MX31
 M: Peter Chubb <peter.chubb@nicta.com.au>
 S: Odd fixes
-F: hw/*/imx*
-F: hw/arm/kzm.c
+F: hw/imx*
+F: hw/kzm.c

 Integrator CP
+M: Paul Brook <paul@codesourcery.com>
 M: Peter Maydell <peter.maydell@linaro.org>
 S: Maintained
-F: hw/arm/integratorcp.c
+F: hw/integratorcp.c

 Mainstone
 M: qemu-devel@nongnu.org
 S: Orphan
-F: hw/arm/mainstone.c
+F: hw/mainstone.c

 Musicpal
 M: Jan Kiszka <jan.kiszka@web.de>
 S: Maintained
-F: hw/arm/musicpal.c
+F: hw/musicpal.c

 nSeries
 M: Andrzej Zaborowski <balrogg@gmail.com>
 S: Maintained
-F: hw/arm/nseries.c
+F: hw/nseries.c

 Palm
 M: Andrzej Zaborowski <balrogg@gmail.com>
 S: Maintained
-F: hw/arm/palm.c
+F: hw/palm.c

 Real View
+M: Paul Brook <paul@codesourcery.com>
 M: Peter Maydell <peter.maydell@linaro.org>
 S: Maintained
-F: hw/arm/realview*
+F: hw/realview*

 Spitz
 M: Andrzej Zaborowski <balrogg@gmail.com>
 S: Maintained
-F: hw/arm/spitz.c
+F: hw/spitz.c

 Stellaris
+M: Paul Brook <paul@codesourcery.com>
 M: Peter Maydell <peter.maydell@linaro.org>
 S: Maintained
-F: hw/*/stellaris*
+F: hw/stellaris.c

 Versatile PB
+M: Paul Brook <paul@codesourcery.com>
 M: Peter Maydell <peter.maydell@linaro.org>
 S: Maintained
-F: hw/*/versatile*
+F: hw/versatilepb.c

 Xilinx Zynq
 M: Peter Crosthwaite <peter.crosthwaite@petalogix.com>
 S: Maintained
-F: hw/arm/xilinx_zynq.c
-F: hw/misc/zynq_slcr.c
-F: hw/*/cadence_*
-F: hw/ssi/xilinx_spips.c
+F: hw/xilinx_zynq.c
+F: hw/zynq_slcr.c
+F: hw/cadence_*
+F: hw/xilinx_spips.c

 CRIS Machines
 -------------
 Axis Dev88
 M: Edgar E. Iglesias <edgar.iglesias@gmail.com>
 S: Maintained
-F: hw/cris/axis_dev88.c
-F: hw/*/etraxfs_*.c
+F: hw/axis_dev88.c
+
+etraxfs
+M: Edgar E. Iglesias <edgar.iglesias@gmail.com>
+S: Maintained
+F: hw/etraxfs.c

 LM32 Machines
 -------------
 EVR32 and uclinux BSP
 M: Michael Walle <michael@walle.cc>
 S: Maintained
-F: hw/lm32/lm32_boards.c
+F: hw/lm32_boards.c

 milkymist
 M: Michael Walle <michael@walle.cc>
 S: Maintained
-F: hw/lm32/milkymist.c
+F: hw/milkymist.c

 M68K Machines
 -------------
 an5206
-S: Orphan
-F: hw/m68k/an5206.c
+M: Paul Brook <paul@codesourcery.com>
+S: Maintained
+F: hw/an5206.c

 dummy_m68k
-S: Orphan
-F: hw/m68k/dummy_m68k.c
+M: Paul Brook <paul@codesourcery.com>
+S: Maintained
+F: hw/dummy_m68k.c

 mcf5208
-S: Orphan
-F: hw/m68k/mcf5208.c
+M: Paul Brook <paul@codesourcery.com>
+S: Maintained
+F: hw/mcf5208.c

 MicroBlaze Machines
 -------------------
 petalogix_s3adsp1800
 M: Edgar E. Iglesias <edgar.iglesias@gmail.com>
 S: Maintained
-F: hw/microblaze/petalogix_s3adsp1800_mmu.c
+F: hw/petalogix_s3adsp1800.c

 petalogix_ml605
 M: Peter Crosthwaite <peter.crosthwaite@petalogix.com>
 S: Maintained
-F: hw/microblaze/petalogix_ml605_mmu.c
+F: hw/petalogix_ml605_mmu.c

 MIPS Machines
 -------------
 Jazz
 M: Hervé Poussineau <hpoussin@reactos.org>
 S: Maintained
-F: hw/mips/mips_jazz.c
+F: hw/mips_jazz.c

 Malta
 M: Aurelien Jarno <aurelien@aurel32.net>
 S: Maintained
-F: hw/mips/mips_malta.c
+F: hw/mips_malta.c

 Mipssim
 M: qemu-devel@nongnu.org
 S: Orphan
-F: hw/mips/mips_mipssim.c
+F: hw/mips_mipssim.c

 R4000
 M: Aurelien Jarno <aurelien@aurel32.net>
 S: Maintained
-F: hw/mips/mips_r4k.c
-
-OpenRISC Machines
-----------------
-or1k-sim
-M: Jia Liu <proljc@gmail.com>
-S: Maintained
-F: hw/openrisc/openrisc_sim.c
+F: hw/mips_r4k.c

 PowerPC Machines
 ----------------
@@ -391,13 +352,13 @@ PowerPC Machines
 M: Alexander Graf <agraf@suse.de>
 L: qemu-ppc@nongnu.org
 S: Odd Fixes
-F: hw/ppc/ppc405_boards.c
+F: hw/ppc405_boards.c

 Bamboo
 M: Alexander Graf <agraf@suse.de>
 L: qemu-ppc@nongnu.org
 S: Odd Fixes
-F: hw/ppc/ppc440_bamboo.c
+F: hw/ppc440_bamboo.c

 e500
 M: Alexander Graf <agraf@suse.de>
@@ -413,206 +374,178 @@ M: Scott Wood <scottwood@freescale.com>
 L: qemu-ppc@nongnu.org
 S: Supported
 F: hw/ppc/mpc8544ds.c
-F: hw/ppc/mpc8544_guts.c
+F: hw/mpc8544_guts.c

 New World
 M: Alexander Graf <agraf@suse.de>
 L: qemu-ppc@nongnu.org
 S: Maintained
 F: hw/ppc/mac_newworld.c
-F: hw/pci-host/uninorth.c
-F: hw/pci-bridge/dec.[hc]
-F: hw/misc/macio/
+F: hw/unin_pci.c
+F: hw/dec_pci.[hc]

 Old World
 M: Alexander Graf <agraf@suse.de>
 L: qemu-ppc@nongnu.org
 S: Maintained
 F: hw/ppc/mac_oldworld.c
-F: hw/pci-host/grackle.c
-F: hw/misc/macio/
+F: hw/grackle_pci.c

 PReP
 M: Andreas Färber <andreas.faerber@web.de>
 L: qemu-ppc@nongnu.org
 S: Odd Fixes
 F: hw/ppc/prep.c
-F: hw/pci-host/prep.[hc]
-F: hw/isa/pc87312.[hc]
+F: hw/prep_pci.[hc]
+F: hw/pc87312.[hc]

 sPAPR
+M: David Gibson <david@gibson.dropbear.id.au>
 M: Alexander Graf <agraf@suse.de>
 L: qemu-ppc@nongnu.org
 S: Supported
-F: hw/*/spapr*
-F: include/hw/*/spapr*
-F: hw/*/xics*
-F: include/hw/*/xics*
-F: pc-bios/spapr-rtas/*
+F: hw/spapr*

 virtex_ml507
 M: Edgar E. Iglesias <edgar.iglesias@gmail.com>
 L: qemu-ppc@nongnu.org
 S: Odd Fixes
-F: hw/ppc/virtex_ml507.c
+F: hw/virtex_ml507.c

 SH4 Machines
 ------------
 R2D
 M: Magnus Damm <magnus.damm@gmail.com>
 S: Maintained
-F: hw/sh4/r2d.c
+F: hw/r2d.c

 Shix
 M: Magnus Damm <magnus.damm@gmail.com>
 S: Orphan
-F: hw/sh4/shix.c
+F: hw/shix.c

 SPARC Machines
 --------------
 Sun4m
 M: Blue Swirl <blauwirbel@gmail.com>
 S: Maintained
-F: hw/sparc/sun4m.c
+F: hw/sun4m.c

 Sun4u
 M: Blue Swirl <blauwirbel@gmail.com>
 S: Maintained
-F: hw/sparc64/sun4u.c
+F: hw/sun4u.c

 Leon3
 M: Fabien Chouteau <chouteau@adacore.com>
 S: Maintained
-F: hw/sparc/leon3.c
-F: hw/*/grlib*
+F: hw/leon3.c
+F: hw/grlib*

 S390 Machines
 -------------
 S390 Virtio
 M: Alexander Graf <agraf@suse.de>
 S: Maintained
-F: hw/s390x/s390-*.c
-
-S390 Virtio-ccw
-M: Cornelia Huck <cornelia.huck@de.ibm.com>
-M: Alexander Graf <agraf@suse.de>
-S: Supported
-F: hw/s390x/s390-virtio-ccw.c
-F: hw/s390x/css.[hc]
-T: git git://github.com/cohuck/qemu virtio-ccw-upstr
+F: hw/s390-*.c

 UniCore32 Machines
 -------------
 PKUnity-3 SoC initramfs-with-busybox
 M: Guan Xuetao <gxt@mprc.pku.edu.cn>
 S: Maintained
-F: hw/*/puv3*
+F: hw/puv3*
 F: hw/unicore32/

 X86 Machines
 ------------
 PC
-M: Anthony Liguori <aliguori@amazon.com>
-M: Michael S. Tsirkin <mst@redhat.com>
+M: Anthony Liguori <aliguori@us.ibm.com>
 S: Supported
-F: include/hw/i386/
-F: hw/i386/
-F: hw/pci-host/piix.c
-F: hw/pci-host/q35.c
-F: hw/pci-host/pam.c
-F: include/hw/pci-host/q35.h
-F: include/hw/pci-host/pam.h
-F: hw/isa/piix4.c
-F: hw/isa/lpc_ich9.c
-F: hw/i2c/smbus_ich9.c
-F: hw/acpi/piix4.c
-F: hw/acpi/ich9.c
-F: include/hw/acpi/ich9.h
-F: include/hw/acpi/piix.h
-
+F: hw/pc.[ch]
+F: hw/pc_piix.c

 Xtensa Machines
 ---------------
 sim
 M: Max Filippov <jcmvbkbc@gmail.com>
 S: Maintained
-F: hw/xtensa/xtensa_sim.c
+F: hw/xtensa_sim.c

 Avnet LX60
 M: Max Filippov <jcmvbkbc@gmail.com>
 S: Maintained
-F: hw/xtensa/xtensa_lx60.c
+F: hw/xtensa_lx60.c

 Devices
 -------
 IDE
 M: Kevin Wolf <kwolf@redhat.com>
 S: Odd Fixes
-F: include/hw/ide.h
 F: hw/ide/

 OMAP
 M: Peter Maydell <peter.maydell@linaro.org>
 S: Maintained
-F: hw/*/omap*
+F: hw/omap*

 PCI
 M: Michael S. Tsirkin <mst@redhat.com>
 S: Supported
-F: include/hw/pci/*
 F: hw/pci/*
-F: hw/acpi/*
+F: hw/pci*
+F: hw/piix*

 ppc4xx
 M: Alexander Graf <agraf@suse.de>
 L: qemu-ppc@nongnu.org
 S: Odd Fixes
-F: hw/ppc/ppc4*.c
+F: hw/ppc4xx*.[hc]

 ppce500
 M: Alexander Graf <agraf@suse.de>
 M: Scott Wood <scottwood@freescale.com>
 L: qemu-ppc@nongnu.org
 S: Supported
-F: hw/ppc/e500*
+F: hw/ppce500_*

 SCSI
 M: Paolo Bonzini <pbonzini@redhat.com>
 S: Supported
-F: include/hw/scsi*
-F: hw/scsi/*
+F: hw/virtio-scsi.*
+F: hw/scsi*
 T: git git://github.com/bonzini/qemu.git scsi-next

 LSI53C895A
-S: Orphan
-F: hw/scsi/lsi53c895a.c
+M: Paul Brook <paul@codesourcery.com>
+S: Odd Fixes
+F: hw/lsi53c895a.c

 SSI
 M: Peter Crosthwaite <peter.crosthwaite@petalogix.com>
 S: Maintained
-F: hw/ssi/*
-F: hw/block/m25p80.c
+F: hw/ssi.*
+F: hw/m25p80.c

 USB
 M: Gerd Hoffmann <kraxel@redhat.com>
 S: Maintained
-F: hw/usb/*
+F: hw/usb*

 VFIO
 M: Alex Williamson <alex.williamson@redhat.com>
 S: Supported
-F: hw/misc/vfio.c
+F: hw/vfio*

 vhost
 M: Michael S. Tsirkin <mst@redhat.com>
 S: Supported
-F: hw/*/*vhost*
+F: hw/vhost*

 virtio
-M: Anthony Liguori <aliguori@amazon.com>
-M: Michael S. Tsirkin <mst@redhat.com>
+M: Anthony Liguori <aliguori@us.ibm.com>
 S: Supported
-F: hw/*/virtio*
+F: hw/virtio*

 virtio-9p
 M: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
@@ -625,40 +558,32 @@ virtio-blk
 M: Kevin Wolf <kwolf@redhat.com>
 M: Stefan Hajnoczi <stefanha@redhat.com>
 S: Supported
-F: hw/block/virtio-blk.c
-
-virtio-ccw
-M: Cornelia Huck <cornelia.huck@de.ibm.com>
-S: Supported
-F: hw/s390x/virtio-ccw.[hc]
-T: git git://github.com/cohuck/qemu virtio-ccw-upstr
+F: hw/virtio-blk*

 virtio-serial
 M: Amit Shah <amit.shah@redhat.com>
 S: Supported
-F: hw/char/virtio-serial-bus.c
-F: hw/char/virtio-console.c
-
-nvme
-M: Keith Busch <keith.busch@intel.com>
-S: Supported
-F: hw/block/nvme*
+F: hw/virtio-serial*
+F: hw/virtio-console*

 Xilinx EDK
 M: Peter Crosthwaite <peter.crosthwaite@petalogix.com>
 M: Edgar E. Iglesias <edgar.iglesias@gmail.com>
 S: Maintained
-F: hw/*/xilinx_*
-F: include/hw/xilinx.h
+F: hw/xilinx_axi*
+F: hw/xilinx_uartlite.c
+F: hw/xilinx_intc.c
+F: hw/xilinx_ethlite.c
+F: hw/xilinx_timer.c
+F: hw/xilinx.h
+F: hw/xilinx_spi.c

 Subsystems
 ----------
 Audio
 M: Vassili Karpov (malc) <av1474@comtv.ru>
-M: Gerd Hoffmann <kraxel@redhat.com>
 S: Maintained
 F: audio/
-F: hw/audio/

 Block
 M: Kevin Wolf <kwolf@redhat.com>
@@ -666,12 +591,9 @@ M: Stefan Hajnoczi <stefanha@redhat.com>
 S: Supported
 F: block*
 F: block/
-F: hw/block/
-T: git git://repo.or.cz/qemu/kevin.git block
-T: git git://github.com/stefanha/qemu.git block

 Character Devices
-M: Anthony Liguori <aliguori@amazon.com>
+M: Anthony Liguori <aliguori@us.ibm.com>
 S: Maintained
 F: qemu-char.c

@@ -679,20 +601,14 @@ CPU
 M: Andreas Färber <afaerber@suse.de>
 S: Supported
 F: qom/cpu.c
-F: include/qom/cpu.h
+F: include/qemu/cpu.h
 F: target-i386/cpu.c

-ICC Bus
-M: Igor Mammedov <imammedo@redhat.com>
-S: Supported
-F: include/hw/cpu/icc_bus.h
-F: hw/cpu/icc_bus.c
-
 Device Tree
 M: Peter Crosthwaite <peter.crosthwaite@petalogix.com>
 M: Alexander Graf <agraf@suse.de>
 S: Maintained
-F: device_tree.[ch]
+F: device-tree.[ch]

 GDB stub
 M: qemu-devel@nongnu.org
@@ -703,50 +619,39 @@ F: gdb-xml/
 SPICE
 M: Gerd Hoffmann <kraxel@redhat.com>
 S: Supported
-F: include/ui/qemu-spice.h
+F: ui/qemu-spice.h
 F: ui/spice-*.c
 F: audio/spiceaudio.c
-F: hw/display/qxl*
+F: hw/qxl*

 Graphics
-M: Anthony Liguori <aliguori@amazon.com>
+M: Anthony Liguori <aliguori@us.ibm.com>
 S: Maintained
 F: ui/

 Cocoa graphics
 M: Andreas Färber <andreas.faerber@web.de>
-M: Peter Maydell <peter.maydell@linaro.org>
 S: Odd Fixes
 F: ui/cocoa.m

 Main loop
-M: Anthony Liguori <aliguori@amazon.com>
+M: Anthony Liguori <aliguori@us.ibm.com>
 S: Supported
 F: vl.c

-Human Monitor (HMP)
+Monitor (QMP/HMP)
 M: Luiz Capitulino <lcapitulino@redhat.com>
-S: Maintained
+M: Markus Armbruster <armbru@redhat.com>
+S: Supported
 F: monitor.c
-F: hmp.c
-F: hmp-commands.hx
-T: git git://repo.or.cz/qemu/qmp-unstable.git queue/qmp

 Network device layer
-M: Anthony Liguori <aliguori@amazon.com>
+M: Anthony Liguori <aliguori@us.ibm.com>
 M: Stefan Hajnoczi <stefanha@redhat.com>
 S: Maintained
 F: net/
 T: git git://github.com/stefanha/qemu.git net

-Netmap network backend
-M: Luigi Rizzo <rizzo@iet.unipi.it>
-M: Giuseppe Lettieri <g.lettieri@iet.unipi.it>
-M: Vincenzo Maffione <v.maffione@gmail.com>
-W: http://info.iet.unipi.it/~luigi/netmap/
-S: Maintained
-F: net/netmap.c
-
 Network Block Device (NBD)
 M: Paolo Bonzini <pbonzini@redhat.com>
 S: Odd Fixes
@@ -755,30 +660,6 @@ F: nbd.*
 F: qemu-nbd.c
 T: git git://github.com/bonzini/qemu.git nbd-next

-QAPI
-M: Luiz Capitulino <lcapitulino@redhat.com>
-M: Michael Roth <mdroth@linux.vnet.ibm.com>
-S: Maintained
-F: qapi/
-T: git git://repo.or.cz/qemu/qmp-unstable.git queue/qmp
-
-QAPI Schema
-M: Eric Blake <eblake@redhat.com>
-M: Luiz Capitulino <lcapitulino@redhat.com>
-M: Markus Armbruster <armbru@redhat.com>
-S: Supported
-F: qapi-schema.json
-T: git git://repo.or.cz/qemu/qmp-unstable.git queue/qmp
-
-QMP
-M: Luiz Capitulino <lcapitulino@redhat.com>
-S: Maintained
-F: qmp.c
-F: monitor.c
-F: qmp-commands.hx
-F: QMP/
-T: git git://repo.or.cz/qemu/qmp-unstable.git queue/qmp
-
 SLIRP
 M: Jan Kiszka <jan.kiszka@siemens.com>
 S: Maintained
@@ -799,12 +680,6 @@ M: Blue Swirl <blauwirbel@gmail.com>
 S: Odd Fixes
 F: scripts/checkpatch.pl

-Seccomp
-M: Eduardo Otubo <otubo@linux.vnet.ibm.com>
-S: Supported
-F: qemu-seccomp.c
-F: include/sysemu/seccomp.h
-
 Usermode Emulation
 ------------------
 BSD user
@@ -821,21 +696,19 @@ Tiny Code Generator (TCG)
 -------------------------
 Common code
 M: qemu-devel@nongnu.org
-M: Richard Henderson <rth@twiddle.net>
 S: Maintained
 F: tcg/

-AArch64 target
-M: Claudio Fontana <claudio.fontana@huawei.com>
-M: Claudio Fontana <claudio.fontana@gmail.com>
-S: Maintained
-F: tcg/aarch64/
-
 ARM target
 M: Andrzej Zaborowski <balrogg@gmail.com>
 S: Maintained
 F: tcg/arm/

+HPPA target
+M: Richard Henderson <rth@twiddle.net>
+S: Maintained
+F: tcg/hppa/
+
 i386 target
 M: qemu-devel@nongnu.org
 S: Maintained
@@ -876,73 +749,25 @@ TCI target
 M: Stefan Weil <sw@weilnetz.de>
 S: Maintained
 F: tcg/tci/
-F: tci.c

 Stable branches
 ---------------
 Stable 1.0
 L: qemu-stable@nongnu.org
-T: git git://git.qemu-project.org/qemu-stable-1.0.git
+T: git git://git.qemu.org/qemu-stable-1.0.git
 S: Orphan

 Stable 0.15
 L: qemu-stable@nongnu.org
-M: Andreas Färber <afaerber@suse.de>
-T: git git://git.qemu-project.org/qemu-stable-0.15.git
-S: Supported
+T: git git://git.qemu.org/qemu-stable-0.15.git
+S: Orphan

 Stable 0.14
 L: qemu-stable@nongnu.org
-T: git git://git.qemu-project.org/qemu-stable-0.14.git
+T: git git://git.qemu.org/qemu-stable-0.14.git
 S: Orphan

 Stable 0.10
 L: qemu-stable@nongnu.org
-T: git git://git.qemu-project.org/qemu-stable-0.10.git
+T: git git://git.qemu.org/qemu-stable-0.10.git
 S: Orphan
-
-Block drivers
-------------
-VMDK
-M: Fam Zheng <famz@redhat.com>
-S: Supported
-F: block/vmdk.c
-
-RBD
-M: Josh Durgin <josh.durgin@inktank.com>
-S: Supported
-F: block/rbd.c
-
-Sheepdog
-M: MORITA Kazutaka <morita.kazutaka@lab.ntt.co.jp>
-M: Liu Yuan <namei.unix@gmail.com>
-L: sheepdog@lists.wpkg.org
-S: Supported
-F: block/sheepdog.c
-
-VHDX
-M: Jeff Cody <jcody@redhat.com>
-S: Supported
-F: block/vhdx*
-
-VDI
-M: Stefan Weil <sw@weilnetz.de>
-S: Maintained
-F: block/vdi.c
-
-iSCSI
-M: Ronnie Sahlberg <ronniesahlberg@gmail.com>
-M: Paolo Bonzini <pbonzini@redhat.com>
-M: Peter Lieven <pl@kamp.de>
-S: Supported
-F: block/iscsi.c
-
-NFS
-M: Peter Lieven <pl@kamp.de>
-S: Maintained
-F: block/nfs.c
-
-SSH
-M: Richard W.M. Jones <rjones@redhat.com>
-S: Supported
-F: block/ssh.c
--- a/202
+++ b/202
@@ -19,23 +19,10 @@ seems to have been used for an in-tree build. You can fix this by running \
 endif
 endif

-CONFIG_SOFTMMU := $(if $(filter %-softmmu,$(TARGET_DIRS)),y)
-CONFIG_USER_ONLY := $(if $(filter %-user,$(TARGET_DIRS)),y)
-CONFIG_ALL=y
-include config-all-devices.mak
-include config-all-disas.mak
-
 include $(SRC_PATH)/rules.mak
 config-host.mak: $(SRC_PATH)/configure
 	@echo $@ is out-of-date, running configure
-	@# TODO: The next lines include code which supports a smooth
-	@# transition from old configurations without config.status.
-	@# This code can be removed after QEMU 1.7.
-	@if test -x config.status; then \
-	    ./config.status; \
-        else \
-	    sed -n "/.*Configured with/s/[^:]*: //p" $@ | sh; \
-	fi
+	@sed -n "/.*Configured with/s/[^:]*: //p" $@ | sh
 else
 config-host.mak:
 ifneq ($(filter-out %clean,$(MAKECMDGOALS)),$(if $(MAKECMDGOALS),,fail))
@@ -48,20 +35,12 @@ GENERATED_HEADERS = config-host.h qemu-options.def
 GENERATED_HEADERS += qmp-commands.h qapi-types.h qapi-visit.h
 GENERATED_SOURCES += qmp-marshal.c qapi-types.c qapi-visit.c

-GENERATED_HEADERS += trace/generated-events.h
-GENERATED_SOURCES += trace/generated-events.c
-
 GENERATED_HEADERS += trace/generated-tracers.h
 ifeq ($(TRACE_BACKEND),dtrace)
 GENERATED_HEADERS += trace/generated-tracers-dtrace.h
 endif
 GENERATED_SOURCES += trace/generated-tracers.c

-ifeq ($(TRACE_BACKEND),ust)
-GENERATED_HEADERS += trace/generated-ust-provider.h
-GENERATED_SOURCES += trace/generated-ust.c
-endif
-
 # Don't try to regenerate Makefile or configure
 # We don't generate any of them
 Makefile: ;
@@ -77,7 +56,7 @@ LIBS+=-lz $(LIBS_TOOLS)
 HELPERS-$(CONFIG_LINUX) = qemu-bridge-helper$(EXESUF)

 ifdef BUILD_DOCS
-DOCS=qemu-doc.html qemu-tech.html qemu.1 qemu-img.1 qemu-nbd.8 qmp-commands.txt
+DOCS=qemu-doc.html qemu-tech.html qemu.1 qemu-img.1 qemu-nbd.8 QMP/qmp-commands.txt
 ifdef CONFIG_VIRTFS
 DOCS+=fsdev/virtfs-proxy-helper.1
 endif
@@ -87,17 +66,14 @@ endif

 SUBDIR_MAKEFLAGS=$(if $(V),,--no-print-directory) BUILD_DIR=$(BUILD_DIR)
 SUBDIR_DEVICES_MAK=$(patsubst %, %/config-devices.mak, $(TARGET_DIRS))
-SUBDIR_DEVICES_MAK_DEP=$(patsubst %, %-config-devices.mak.d, $(TARGET_DIRS))
+SUBDIR_DEVICES_MAK_DEP=$(patsubst %, %/config-devices.mak.d, $(TARGET_DIRS))

 ifeq ($(SUBDIR_DEVICES_MAK),)
 config-all-devices.mak:
 	$(call quiet-command,echo '# no devices' > $@,"  GEN   $@")
 else
 config-all-devices.mak: $(SUBDIR_DEVICES_MAK)
-	$(call quiet-command, sed -n \
-             's|^\([^=]*\)=\(.*\)$$|\1:=$$(findstring y,$$(\1)\2)|p' \
-             $(SUBDIR_DEVICES_MAK) | sort -u > $@, \
-             "  GEN   $@")
+	$(call quiet-command,cat $(SUBDIR_DEVICES_MAK) | grep =y | sort -u > $@,"  GEN   $@")
 endif

 -include $(SUBDIR_DEVICES_MAK_DEP)
@@ -125,31 +101,21 @@ endif
 defconfig:
 	rm -f config-all-devices.mak $(SUBDIR_DEVICES_MAK)

+-include config-all-devices.mak
+-include config-all-disas.mak
+CONFIG_SOFTMMU := $(if $(filter %-softmmu,$(TARGET_DIRS)),y)
+CONFIG_USER_ONLY := $(if $(filter %-user,$(TARGET_DIRS)),y)
+CONFIG_ALL=y
+
 ifneq ($(wildcard config-host.mak),)
 include $(SRC_PATH)/Makefile.objs
-endif
-
-dummy := $(call unnest-vars,, \
-                stub-obj-y \
-                util-obj-y \
-                qga-obj-y \
-                block-obj-y \
-                block-obj-m \
-                common-obj-y \
-                common-obj-m)
-
-ifneq ($(wildcard config-host.mak),)
 include $(SRC_PATH)/tests/Makefile
 endif
 ifeq ($(CONFIG_SMARTCARD_NSS),y)
 include $(SRC_PATH)/libcacard/Makefile
 endif

-all: $(DOCS) $(TOOLS) $(HELPERS-y) recurse-all modules
-
-vl.o: QEMU_CFLAGS+=$(GPROF_CFLAGS)
-
-vl.o: QEMU_CFLAGS+=$(SDL_CFLAGS)
+all: $(DOCS) $(TOOLS) $(HELPERS-y) recurse-all

 config-host.h: config-host.h-timestamp
 config-host.h-timestamp: config-host.mak
@@ -157,10 +123,6 @@ qemu-options.def: $(SRC_PATH)/qemu-options.hx
 	$(call quiet-command,sh $(SRC_PATH)/scripts/hxtool -h < $< > $@,"  GEN   $@")

 SUBDIR_RULES=$(patsubst %,subdir-%, $(TARGET_DIRS))
-SOFTMMU_SUBDIR_RULES=$(filter %-softmmu,$(SUBDIR_RULES))
-
-$(SOFTMMU_SUBDIR_RULES): $(block-obj-y)
-$(SOFTMMU_SUBDIR_RULES): config-all-devices.mak

 subdir-%:
 	$(call quiet-command,$(MAKE) $(SUBDIR_MAKEFLAGS) -C $* V="$(V)" TARGET_DIR="$*/" all,)
@@ -174,16 +136,6 @@ pixman/Makefile: $(SRC_PATH)/pixman/configure
 $(SRC_PATH)/pixman/configure:
 	(cd $(SRC_PATH)/pixman; autoreconf -v --install)

-DTC_MAKE_ARGS=-I$(SRC_PATH)/dtc VPATH=$(SRC_PATH)/dtc -C dtc V="$(V)" LIBFDT_srcdir=$(SRC_PATH)/dtc/libfdt
-DTC_CFLAGS=$(CFLAGS) $(QEMU_CFLAGS)
-DTC_CPPFLAGS=-I$(BUILD_DIR)/dtc -I$(SRC_PATH)/dtc -I$(SRC_PATH)/dtc/libfdt
-
-subdir-dtc:dtc/libfdt dtc/tests
-	$(call quiet-command,$(MAKE) $(DTC_MAKE_ARGS) CPPFLAGS="$(DTC_CPPFLAGS)" CFLAGS="$(DTC_CFLAGS)" LDFLAGS="$(LDFLAGS)" ARFLAGS="$(ARFLAGS)" CC="$(CC)" AR="$(AR)" LD="$(LD)" $(SUBDIR_MAKEFLAGS) libfdt/libfdt.a,)
-
-dtc/%:
-	mkdir -p $@
-
 $(SUBDIR_RULES): libqemuutil.a libqemustub.a $(common-obj-y)

 ROMSUBDIR_RULES=$(patsubst %,romsubdir-%, $(ROMS))
@@ -196,21 +148,17 @@ recurse-all: $(SUBDIR_RULES) $(ROMSUBDIR_RULES)

 bt-host.o: QEMU_CFLAGS += $(BLUEZ_CFLAGS)

-$(BUILD_DIR)/version.o: $(SRC_PATH)/version.rc $(BUILD_DIR)/config-host.h | $(BUILD_DIR)/version.lo
-	$(call quiet-command,$(WINDRES) -I$(BUILD_DIR) -o $@ $<,"  RC    version.o")
-$(BUILD_DIR)/version.lo: $(SRC_PATH)/version.rc $(BUILD_DIR)/config-host.h
-	$(call quiet-command,$(WINDRES) -I$(BUILD_DIR) -o $@ $<,"  RC    version.lo")
+version.o: $(SRC_PATH)/version.rc config-host.h
+	$(call quiet-command,$(WINDRES) -I. -o $@ $<,"  RC    $(TARGET_DIR)$@")

-Makefile: $(version-obj-y) $(version-lobj-y)
+version-obj-$(CONFIG_WIN32) += version.o
+Makefile: $(version-obj-y)

 ######################################################################
 # Build libraries

 libqemustub.a: $(stub-obj-y)
-libqemuutil.a: $(util-obj-y) qapi-types.o qapi-visit.o
-
-block-modules = $(foreach o,$(block-obj-m),"$(basename $(subst /,-,$o))",) NULL
-util/module.o-cflags = -D'CONFIG_BLOCK_MODULES=$(block-modules)'
+libqemuutil.a: $(util-obj-y)

 ######################################################################

@@ -218,11 +166,11 @@ qemu-img.o: qemu-img-cmds.h

 qemu-img$(EXESUF): qemu-img.o $(block-obj-y) libqemuutil.a libqemustub.a
 qemu-nbd$(EXESUF): qemu-nbd.o $(block-obj-y) libqemuutil.a libqemustub.a
-qemu-io$(EXESUF): qemu-io.o $(block-obj-y) libqemuutil.a libqemustub.a
+qemu-io$(EXESUF): qemu-io.o cmd.o $(block-obj-y) libqemuutil.a libqemustub.a

 qemu-bridge-helper$(EXESUF): qemu-bridge-helper.o

-fsdev/virtfs-proxy-helper$(EXESUF): fsdev/virtfs-proxy-helper.o fsdev/virtio-9p-marshal.o libqemuutil.a libqemustub.a
+fsdev/virtfs-proxy-helper$(EXESUF): fsdev/virtfs-proxy-helper.o fsdev/9p-marshal.o fsdev/9p-iov-marshal.o libqemuutil.a libqemustub.a
 fsdev/virtfs-proxy-helper$(EXESUF): LIBS += -lcap

 qemu-img-cmds.h: $(SRC_PATH)/qemu-img-cmds.hx
@@ -247,10 +195,10 @@ $(SRC_PATH)/qga/qapi-schema.json $(SRC_PATH)/scripts/qapi-commands.py $(qapi-py)

 qapi-types.c qapi-types.h :\
 $(SRC_PATH)/qapi-schema.json $(SRC_PATH)/scripts/qapi-types.py $(qapi-py)
-	$(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/qapi-types.py $(gen-out-type) -o "." -b < $<, "  GEN   $@")
+	$(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/qapi-types.py $(gen-out-type) -o "." < $<, "  GEN   $@")
 qapi-visit.c qapi-visit.h :\
 $(SRC_PATH)/qapi-schema.json $(SRC_PATH)/scripts/qapi-visit.py $(qapi-py)
-	$(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/qapi-visit.py $(gen-out-type) -o "." -b < $<, "  GEN   $@")
+	$(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/qapi-visit.py $(gen-out-type) -o "."  < $<, "  GEN   $@")
 qmp-commands.h qmp-marshal.c :\
 $(SRC_PATH)/qapi-schema.json $(SRC_PATH)/scripts/qapi-commands.py $(qapi-py)
 	$(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/qapi-commands.py $(gen-out-type) -m -o "." < $<, "  GEN   $@")
@@ -267,11 +215,8 @@ clean:
 	rm -f qemu-options.def
 	find . -name '*.[oda]' -type f -exec rm -f {} +
 	find . -name '*.l[oa]' -type f -exec rm -f {} +
-	find . -name '*$(DSOSUF)' -type f -exec rm -f {} +
-	find . -name '*.mo' -type f -exec rm -f {} +
-	rm -f $(filter-out %.tlb,$(TOOLS)) $(HELPERS-y) qemu-ga TAGS cscope.* *.pod *~ */*~
-	rm -f fsdev/*.pod
-	rm -rf .libs */.libs
+	rm -f $(TOOLS) $(HELPERS-y) qemu-ga TAGS cscope.* *.pod *~ */*~
+	rm -Rf .libs
 	rm -f qemu-img-cmds.h
 	@# May not be present in GENERATED_HEADERS
 	rm -f trace/generated-tracers-dtrace.dtrace*
@@ -280,6 +225,7 @@ clean:
 	rm -f $(foreach f,$(GENERATED_SOURCES),$(f) $(f)-timestamp)
 	rm -rf qapi-generated
 	rm -rf qga/qapi-generated
+	$(MAKE) -C tests/tcg clean
 	for d in $(ALL_SUBDIRS); do \
 	if test -d $$d; then $(MAKE) -C $$d $@ || exit 1; fi; \
 	rm -f $$d/qemu-options.def; \
@@ -295,7 +241,6 @@ qemu-%.tar.bz2:
 distclean: clean
 	rm -f config-host.mak config-host.h* config-host.ld $(DOCS) qemu-options.texi qemu-img-cmds.texi qemu-monitor.texi
 	rm -f config-all-devices.mak config-all-disas.mak
-	rm -f po/*.mo
 	rm -f roms/seabios/config.mak roms/vgabios/config.mak
 	rm -f qemu-doc.info qemu-doc.aux qemu-doc.cp qemu-doc.cps qemu-doc.dvi
 	rm -f qemu-doc.fn qemu-doc.fns qemu-doc.info qemu-doc.ky qemu-doc.kys
@@ -307,29 +252,24 @@ distclean: clean
 	for d in $(TARGET_DIRS); do \
 	rm -rf $$d || exit 1 ; \
        done
-	rm -Rf .sdk
 	if test -f pixman/config.log; then make -C pixman distclean; fi
-	if test -f dtc/version_gen.h; then make $(DTC_MAKE_ARGS) clean; fi

 KEYMAPS=da     en-gb  et  fr     fr-ch  is  lt  modifiers  no  pt-br  sv \
 ar      de     en-us  fi  fr-be  hr     it  lv  nl         pl  ru     th \
 common  de-ch  es     fo  fr-ca  hu     ja  mk  nl-be      pt  sl     tr \
-bepo    cz
+bepo

 ifdef INSTALL_BLOBS
-BLOBS=bios.bin bios-256k.bin sgabios.bin vgabios.bin vgabios-cirrus.bin \
+BLOBS=bios.bin sgabios.bin vgabios.bin vgabios-cirrus.bin \
 vgabios-stdvga.bin vgabios-vmware.bin vgabios-qxl.bin \
 acpi-dsdt.aml q35-acpi-dsdt.aml \
-ppc_rom.bin openbios-sparc32 openbios-sparc64 openbios-ppc QEMU,tcx.bin QEMU,cgthree.bin \
+ppc_rom.bin openbios-sparc32 openbios-sparc64 openbios-ppc \
 pxe-e1000.rom pxe-eepro100.rom pxe-ne2k_pci.rom \
 pxe-pcnet.rom pxe-rtl8139.rom pxe-virtio.rom \
-efi-e1000.rom efi-eepro100.rom efi-ne2k_pci.rom \
-efi-pcnet.rom efi-rtl8139.rom efi-virtio.rom \
-qemu-icon.bmp qemu_logo_no_text.svg \
+qemu-icon.bmp \
 bamboo.dtb petalogix-s3adsp1800.dtb petalogix-ml605.dtb \
 multiboot.bin linuxboot.bin kvmvapic.bin \
 s390-zipl.rom \
-s390-ccw.img \
 spapr-rtas.bin slof.bin \
 palcode-clipper
 else
@@ -339,16 +279,13 @@ endif
 install-doc: $(DOCS)
 	$(INSTALL_DIR) "$(DESTDIR)$(qemu_docdir)"
 	$(INSTALL_DATA) qemu-doc.html  qemu-tech.html "$(DESTDIR)$(qemu_docdir)"
-	$(INSTALL_DATA) qmp-commands.txt "$(DESTDIR)$(qemu_docdir)"
+	$(INSTALL_DATA) QMP/qmp-commands.txt "$(DESTDIR)$(qemu_docdir)"
 ifdef CONFIG_POSIX
 	$(INSTALL_DIR) "$(DESTDIR)$(mandir)/man1"
-	$(INSTALL_DATA) qemu.1 "$(DESTDIR)$(mandir)/man1"
-ifneq ($(TOOLS),)
-	$(INSTALL_DATA) qemu-img.1 "$(DESTDIR)$(mandir)/man1"
+	$(INSTALL_DATA) qemu.1 qemu-img.1 "$(DESTDIR)$(mandir)/man1"
 	$(INSTALL_DIR) "$(DESTDIR)$(mandir)/man8"
 	$(INSTALL_DATA) qemu-nbd.8 "$(DESTDIR)$(mandir)/man8"
 endif
-endif
 ifdef CONFIG_VIRTFS
 	$(INSTALL_DIR) "$(DESTDIR)$(mandir)/man1"
 	$(INSTALL_DATA) fsdev/virtfs-proxy-helper.1 "$(DESTDIR)$(mandir)/man1"
@@ -357,31 +294,17 @@ endif
 install-datadir:
 	$(INSTALL_DIR) "$(DESTDIR)$(qemu_datadir)"

-install-localstatedir:
-ifdef CONFIG_POSIX
-ifneq (,$(findstring qemu-ga,$(TOOLS)))
-	$(INSTALL_DIR) "$(DESTDIR)$(qemu_localstatedir)"/run
-endif
-endif
-
 install-confdir:
 	$(INSTALL_DIR) "$(DESTDIR)$(qemu_confdir)"

 install-sysconfig: install-datadir install-confdir
 	$(INSTALL_DATA) $(SRC_PATH)/sysconfigs/target/target-x86_64.conf "$(DESTDIR)$(qemu_confdir)"

-install: all $(if $(BUILD_DOCS),install-doc) install-sysconfig \
-install-datadir install-localstatedir
+install: all $(if $(BUILD_DOCS),install-doc) install-sysconfig install-datadir
 	$(INSTALL_DIR) "$(DESTDIR)$(bindir)"
 ifneq ($(TOOLS),)
 	$(INSTALL_PROG) $(STRIP_OPT) $(TOOLS) "$(DESTDIR)$(bindir)"
 endif
-ifneq ($(CONFIG_MODULES),)
-	$(INSTALL_DIR) "$(DESTDIR)$(qemu_moddir)"
-	for s in $(patsubst %.mo,%$(DSOSUF),$(modules-m)); do \
-		$(INSTALL_PROG) $(STRIP_OPT) $$s "$(DESTDIR)$(qemu_moddir)/$${s//\//-}"; \
-	done
-endif
 ifneq ($(HELPERS-y),)
 	$(INSTALL_DIR) "$(DESTDIR)$(libexecdir)"
 	$(INSTALL_PROG) $(STRIP_OPT) $(HELPERS-y) "$(DESTDIR)$(libexecdir)"
@@ -390,16 +313,13 @@ ifneq ($(BLOBS),)
 	set -e; for x in $(BLOBS); do \
 		$(INSTALL_DATA) $(SRC_PATH)/pc-bios/$$x "$(DESTDIR)$(qemu_datadir)"; \
 	done
-endif
-ifeq ($(CONFIG_GTK),y)
-	$(MAKE) -C po $@
 endif
 	$(INSTALL_DIR) "$(DESTDIR)$(qemu_datadir)/keymaps"
 	set -e; for x in $(KEYMAPS); do \
 		$(INSTALL_DATA) $(SRC_PATH)/pc-bios/keymaps/$$x "$(DESTDIR)$(qemu_datadir)/keymaps"; \
 	done
 	for d in $(TARGET_DIRS); do \
-	$(MAKE) $(SUBDIR_MAKEFLAGS) TARGET_DIR=$$d/ -C $$d $@ || exit 1 ; \
+	$(MAKE) -C $$d $@ || exit 1 ; \
        done

 # various test targets
@@ -408,8 +328,7 @@ test speed: all

 .PHONY: TAGS
 TAGS:
-	rm -f $@
-	find "$(SRC_PATH)" -name '*.[hc]' -exec etags --append {} +
+	find "$(SRC_PATH)" -name '*.[hc]' -print0 | xargs -0 etags

 cscope:
 	rm -f ./cscope.*
@@ -439,7 +358,7 @@ qemu-options.texi: $(SRC_PATH)/qemu-options.hx
 qemu-monitor.texi: $(SRC_PATH)/hmp-commands.hx
 	$(call quiet-command,sh $(SRC_PATH)/scripts/hxtool -t < $< > $@,"  GEN   $@")

-qmp-commands.txt: $(SRC_PATH)/qmp-commands.hx
+QMP/qmp-commands.txt: $(SRC_PATH)/qmp-commands.hx
 	$(call quiet-command,sh $(SRC_PATH)/scripts/hxtool -q < $< > $@,"  GEN   $@")

 qemu-img-cmds.texi: $(SRC_PATH)/qemu-img-cmds.hx
@@ -478,61 +397,6 @@ qemu-doc.dvi qemu-doc.html qemu-doc.info qemu-doc.pdf: \
 	qemu-img.texi qemu-nbd.texi qemu-options.texi \
 	qemu-monitor.texi qemu-img-cmds.texi

-ifdef CONFIG_WIN32
-
-INSTALLER = qemu-setup-$(VERSION)$(EXESUF)
-
-nsisflags = -V2 -NOCD
-
-ifneq ($(wildcard $(SRC_PATH)/dll),)
-ifeq ($(ARCH),x86_64)
-# 64 bit executables
-DLL_PATH = $(SRC_PATH)/dll/w64
-nsisflags += -DW64
-else
-# 32 bit executables
-DLL_PATH = $(SRC_PATH)/dll/w32
-endif
-endif
-
-.PHONY: installer
-installer: $(INSTALLER)
-
-INSTDIR=/tmp/qemu-nsis
-
-$(INSTALLER): $(SRC_PATH)/qemu.nsi
-	make install prefix=${INSTDIR}
-ifdef SIGNCODE
-	(cd ${INSTDIR}; \
-         for i in *.exe; do \
-           $(SIGNCODE) $${i}; \
-         done \
-        )
-endif # SIGNCODE
-	(cd ${INSTDIR}; \
-         for i in qemu-system-*.exe; do \
-           arch=$${i%.exe}; \
-           arch=$${arch#qemu-system-}; \
-           echo Section \"$$arch\" Section_$$arch; \
-           echo SetOutPath \"\$$INSTDIR\"; \
-           echo File \"\$${BINDIR}\\$$i\"; \
-           echo SectionEnd; \
-         done \
-        ) >${INSTDIR}/system-emulations.nsh
-	makensis $(nsisflags) \
-                $(if $(BUILD_DOCS),-DCONFIG_DOCUMENTATION="y") \
-                $(if $(CONFIG_GTK),-DCONFIG_GTK="y") \
-                -DBINDIR="${INSTDIR}" \
-                $(if $(DLL_PATH),-DDLLDIR="$(DLL_PATH)") \
-                -DSRCDIR="$(SRC_PATH)" \
-                -DOUTFILE="$(INSTALLER)" \
-                $(SRC_PATH)/qemu.nsi
-	rm -r ${INSTDIR}
-ifdef SIGNCODE
-	$(SIGNCODE) $(INSTALLER)
-endif # SIGNCODE
-endif # CONFIG_WIN
-
 # Add a dependency on the generated files, so that they are always
 # rebuilt before other object files
 ifneq ($(filter-out %clean,$(MAKECMDGOALS)),$(if $(MAKECMDGOALS),,fail))
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -13,14 +13,16 @@ block-obj-$(CONFIG_POSIX) += aio-posix.o
 block-obj-$(CONFIG_WIN32) += aio-win32.o
 block-obj-y += block/
 block-obj-y += qapi-types.o qapi-visit.o
-block-obj-y += qemu-io-cmds.o

 block-obj-y += qemu-coroutine.o qemu-coroutine-lock.o qemu-coroutine-io.o
 block-obj-y += qemu-coroutine-sleep.o
 block-obj-y += coroutine-$(CONFIG_COROUTINE_BACKEND).o

-block-obj-m = block/
-
+ifeq ($(CONFIG_VIRTIO)$(CONFIG_VIRTFS)$(CONFIG_PCI),yyy)
+# Lots of the fsdev/9pcode is pulled in by vl.c via qemu_fsdev_add.
+# only pull in the actual virtio-9p device if we also enabled virtio.
+CONFIG_REALLY_VIRTFS=y
+endif

 ######################################################################
 # smartcard
@@ -30,7 +32,6 @@ libcacard-y += libcacard/vcard.o libcacard/vreader.o
 libcacard-y += libcacard/vcard_emul_nss.o
 libcacard-y += libcacard/vcard_emul_type.o
 libcacard-y += libcacard/card_7816.o
-libcacard-y += libcacard/vcardt.o

 ######################################################################
 # Target independent part of system emulation. The long term path is to
@@ -38,18 +39,15 @@ libcacard-y += libcacard/vcardt.o
 # single QEMU executable should support all CPUs and machines.

 ifeq ($(CONFIG_SOFTMMU),y)
-common-obj-y = blockdev.o blockdev-nbd.o block/
+common-obj-y = $(block-obj-y) blockdev.o blockdev-nbd.o block/
 common-obj-y += net/
-common-obj-y += qdev-monitor.o device-hotplug.o
+common-obj-y += readline.o
 common-obj-$(CONFIG_WIN32) += os-win32.o
 common-obj-$(CONFIG_POSIX) += os-posix.o

 common-obj-$(CONFIG_LINUX) += fsdev/

 common-obj-y += migration.o migration-tcp.o
-common-obj-y += vmstate.o
-common-obj-y += qemu-file.o
-common-obj-$(CONFIG_RDMA) += migration-rdma.o
 common-obj-y += qemu-char.o #aio.o
 common-obj-y += block-migration.o
 common-obj-y += page_cache.o xbzrle.o
@@ -65,8 +63,8 @@ common-obj-y += ui/
 common-obj-y += bt-host.o bt-vhci.o

 common-obj-y += dma-helpers.o
+common-obj-y += qtest.o
 common-obj-y += vl.o
-common-obj-y += tpm.o

 common-obj-$(CONFIG_SLIRP) += slirp/

@@ -79,15 +77,10 @@ common-obj-$(CONFIG_SMARTCARD_NSS) += $(libcacard-y)
 ######################################################################
 # qapi

-common-obj-y += qmp-marshal.o
+common-obj-y += qmp-marshal.o qapi-visit.o qapi-types.o
 common-obj-y += qmp.o hmp.o
 endif

-######################################################################
-# some qapi visitors are used by both system and user emulation:
-
-common-obj-y += qapi-visit.o qapi-types.o
-
 #######################################################################
 # Target-independent parts used in system and user emulation
 common-obj-y += qemu-log.o
@@ -96,15 +89,23 @@ common-obj-y += hw/
 common-obj-y += qom/
 common-obj-y += disas/

-######################################################################
-# Resource file for Windows executables
-version-obj-$(CONFIG_WIN32) += $(BUILD_DIR)/version.o
-version-lobj-$(CONFIG_WIN32) += $(BUILD_DIR)/version.lo
-
 ######################################################################
 # guest agent

 # FIXME: a few definitions from qapi-types.o/qapi-visit.o are needed
 # by libqemuutil.a.  These should be moved to a separate .json schema.
 qga-obj-y = qga/ qapi-types.o qapi-visit.o
-qga-vss-dll-obj-y = qga/
+
+vl.o: QEMU_CFLAGS+=$(GPROF_CFLAGS)
+
+vl.o: QEMU_CFLAGS+=$(SDL_CFLAGS)
+
+QEMU_CFLAGS+=$(GLIB_CFLAGS)
+
+nested-vars += \
+	stub-obj-y \
+	util-obj-y \
+	qga-obj-y \
+	block-obj-y \
+	common-obj-y
+dummy := $(call unnest-vars)
--- a/Makefile.target
+++ b/Makefile.target
@@ -1,8 +1,8 @@
 # -*- Mode: makefile -*-

 include ../config-host.mak
-include config-target.mak
 include config-devices.mak
+include config-target.mak
 include $(SRC_PATH)/rules.mak

 $(call set-vpath, $(SRC_PATH))
@@ -15,14 +15,14 @@ QEMU_CFLAGS+=-I$(SRC_PATH)/include

 ifdef CONFIG_USER_ONLY
 # user emulator name
-QEMU_PROG=qemu-$(TARGET_NAME)
+QEMU_PROG=qemu-$(TARGET_ARCH2)
 else
 # system emulator name
-ifneq (,$(findstring -mwindows,$(libs_softmmu)))
+ifneq (,$(findstring -mwindows,$(LIBS)))
 # Terminate program name with a 'w' because the linker builds a windows executable.
-QEMU_PROGW=qemu-system-$(TARGET_NAME)w$(EXESUF)
+QEMU_PROGW=qemu-system-$(TARGET_ARCH2)w$(EXESUF)
 endif # windows executable
-QEMU_PROG=qemu-system-$(TARGET_NAME)$(EXESUF)
+QEMU_PROG=qemu-system-$(TARGET_ARCH2)$(EXESUF)
 endif

 PROGS=$(QEMU_PROG)
@@ -31,11 +31,15 @@ PROGS+=$(QEMU_PROGW)
 endif
 STPFILES=

+ifndef CONFIG_HAIKU
+LIBS+=-lm
+endif
+
 config-target.h: config-target.h-timestamp
 config-target.h-timestamp: config-target.mak

 ifdef CONFIG_TRACE_SYSTEMTAP
-stap: $(QEMU_PROG).stp-installed $(QEMU_PROG).stp
+stap: $(QEMU_PROG).stp

 ifdef CONFIG_USER_ONLY
 TARGET_TYPE=user
@@ -43,24 +47,14 @@ else
 TARGET_TYPE=system
 endif

-$(QEMU_PROG).stp-installed: $(SRC_PATH)/trace-events
-	$(call quiet-command,$(TRACETOOL) \
-		--format=stap \
-		--backend=$(TRACE_BACKEND) \
-		--binary=$(bindir)/$(QEMU_PROG) \
-		--target-name=$(TARGET_NAME) \
-		--target-type=$(TARGET_TYPE) \
-		< $< > $@,"  GEN   $(TARGET_DIR)$(QEMU_PROG).stp-installed")
-
 $(QEMU_PROG).stp: $(SRC_PATH)/trace-events
 	$(call quiet-command,$(TRACETOOL) \
 		--format=stap \
 		--backend=$(TRACE_BACKEND) \
-		--binary=$(realpath .)/$(QEMU_PROG) \
-		--target-name=$(TARGET_NAME) \
+		--binary=$(bindir)/$(QEMU_PROG) \
+		--target-arch=$(TARGET_ARCH) \
 		--target-type=$(TARGET_TYPE) \
 		< $< > $@,"  GEN   $(TARGET_DIR)$(QEMU_PROG).stp")
-
 else
 stap:
 endif
@@ -79,8 +73,7 @@ obj-$(CONFIG_TCG_INTERPRETER) += disas/tci.o
 obj-y += fpu/softfloat.o
 obj-y += target-$(TARGET_BASE_ARCH)/
 obj-y += disas.o
-obj-$(call notempty,$(TARGET_XML_FILES)) += gdbstub-xml.o
-obj-$(call lnot,$(CONFIG_KVM)) += kvm-stub.o
+obj-$(CONFIG_GDBSTUB_XML) += gdbstub-xml.o

 #########################################################
 # Linux user emulator target
@@ -99,7 +92,7 @@ endif #CONFIG_LINUX_USER

 ifdef CONFIG_BSD_USER

-QEMU_CFLAGS+=-I$(SRC_PATH)/bsd-user -I$(SRC_PATH)/bsd-user/$(TARGET_ABI_DIR)
+QEMU_CFLAGS+=-I$(SRC_PATH)/bsd-user -I$(SRC_PATH)/bsd-user/$(TARGET_ARCH)

 obj-y += bsd-user/
 obj-y += gdbstub.o user-exec.o
@@ -109,27 +102,36 @@ endif #CONFIG_BSD_USER
 #########################################################
 # System emulator target
 ifdef CONFIG_SOFTMMU
+CONFIG_NO_PCI = $(if $(subst n,,$(CONFIG_PCI)),n,y)
+CONFIG_NO_KVM = $(if $(subst n,,$(CONFIG_KVM)),n,y)
+CONFIG_NO_XEN = $(if $(subst n,,$(CONFIG_XEN)),n,y)
+CONFIG_NO_GET_MEMORY_MAPPING = $(if $(subst n,,$(CONFIG_HAVE_GET_MEMORY_MAPPING)),n,y)
+CONFIG_NO_CORE_DUMP = $(if $(subst n,,$(CONFIG_HAVE_CORE_DUMP)),n,y)
+
 obj-y += arch_init.o cpus.o monitor.o gdbstub.o balloon.o ioport.o
-obj-y += qtest.o
 obj-y += hw/
-obj-$(CONFIG_FDT) += device_tree.o
 obj-$(CONFIG_KVM) += kvm-all.o
+obj-$(CONFIG_NO_KVM) += kvm-stub.o
 obj-y += memory.o savevm.o cputlb.o
-obj-y += memory_mapping.o
-obj-y += dump.o
-LIBS+=$(libs_softmmu)
+obj-$(CONFIG_HAVE_GET_MEMORY_MAPPING) += memory_mapping.o
+obj-$(CONFIG_HAVE_CORE_DUMP) += dump.o
+obj-$(CONFIG_NO_GET_MEMORY_MAPPING) += memory_mapping-stub.o
+obj-$(CONFIG_NO_CORE_DUMP) += dump-stub.o
+LIBS+=-lz

 # xen support
 obj-$(CONFIG_XEN) += xen-all.o xen-mapcache.o
-obj-$(call lnot,$(CONFIG_XEN)) += xen-stub.o
+obj-$(CONFIG_NO_XEN) += xen-stub.o

 # Hardware support
-ifeq ($(TARGET_NAME), sparc64)
+ifeq ($(TARGET_ARCH), sparc64)
 obj-y += hw/sparc64/
 else
 obj-y += hw/$(TARGET_BASE_ARCH)/
 endif

+main.o: QEMU_CFLAGS+=$(GPROF_CFLAGS)
+
 GENERATED_HEADERS += hmp-commands.h qmp-commands-old.h

 endif # CONFIG_SOFTMMU
@@ -137,30 +139,13 @@ endif # CONFIG_SOFTMMU
 # Workaround for http://gcc.gnu.org/PR55489, see configure.
 %/translate.o: QEMU_CFLAGS += $(TRANSLATE_OPT_CFLAGS)

-dummy := $(call unnest-vars,,obj-y)
+nested-vars += obj-y

-# we are making another call to unnest-vars with different vars, protect obj-y,
-# it can be overriden in subdir Makefile.objs
-obj-y-save := $(obj-y)
-
-block-obj-y :=
-common-obj-y :=
+# This resolves all nested paths, so it must come last
 include $(SRC_PATH)/Makefile.objs
-dummy := $(call unnest-vars,.., \
-               block-obj-y \
-               block-obj-m \
-               common-obj-y \
-               common-obj-m)

-# Now restore obj-y
-obj-y := $(obj-y-save)
-
-all-obj-y = $(obj-y) $(common-obj-y)
-all-obj-$(CONFIG_SOFTMMU) += $(block-obj-y)
-
-ifndef CONFIG_HAIKU
-LIBS+=-lm
-endif
+all-obj-y = $(obj-y)
+all-obj-y += $(addprefix ../, $(common-obj-y))

 ifdef QEMU_PROGW
 # The linker builds a windows executable. Make also a console executable.
@@ -199,7 +184,7 @@ endif
 endif
 ifdef CONFIG_TRACE_SYSTEMTAP
 	$(INSTALL_DIR) "$(DESTDIR)$(qemu_datadir)/../systemtap/tapset"
-	$(INSTALL_DATA) $(QEMU_PROG).stp-installed "$(DESTDIR)$(qemu_datadir)/../systemtap/tapset/$(QEMU_PROG).stp"
+	$(INSTALL_DATA) $(QEMU_PROG).stp "$(DESTDIR)$(qemu_datadir)/../systemtap/tapset"
 endif

 GENERATED_HEADERS += config-target.h
--- a/QMP/README
+++ b/QMP/README
@@ -0,0 +1,88 @@
+                          QEMU Monitor Protocol
+                          =====================
+
+Introduction
+-------------
+
+The QEMU Monitor Protocol (QMP) allows applications to communicate with
+QEMU's Monitor.
+
+QMP is JSON[1] based and currently has the following features:
+
+- Lightweight, text-based, easy to parse data format
+- Asynchronous messages support (ie. events)
+- Capabilities Negotiation
+
+For detailed information on QMP's usage, please, refer to the following files:
+
+o qmp-spec.txt      QEMU Monitor Protocol current specification
+o qmp-commands.txt  QMP supported commands (auto-generated at build-time)
+o qmp-events.txt    List of available asynchronous events
+
+There is also a simple Python script called 'qmp-shell' available.
+
+IMPORTANT: It's strongly recommended to read the 'Stability Considerations'
+section in the qmp-commands.txt file before making any serious use of QMP.
+
+
+[1] http://www.json.org
+
+Usage
+-----
+
+To enable QMP, you need a QEMU monitor instance in "control mode". There are
+two ways of doing this.
+
+The simplest one is using the '-qmp' command-line option. The following
+example makes QMP available on localhost port 4444:
+
+  $ qemu [...] -qmp tcp:localhost:4444,server
+
+However, in order to have more complex combinations, like multiple monitors,
+the '-mon' command-line option should be used along with the '-chardev' one.
+For instance, the following example creates one user monitor on stdio and one
+QMP monitor on localhost port 4444.
+
+   $ qemu [...] -chardev stdio,id=mon0 -mon chardev=mon0,mode=readline \
+                -chardev socket,id=mon1,host=localhost,port=4444,server \
+                -mon chardev=mon1,mode=control
+
+Please, refer to QEMU's manpage for more information.
+
+Simple Testing
+--------------
+
+To manually test QMP one can connect with telnet and issue commands by hand:
+
+$ telnet localhost 4444
+Trying 127.0.0.1...
+Connected to localhost.
+Escape character is '^]'.
+{"QMP": {"version": {"qemu": {"micro": 50, "minor": 13, "major": 0}, "package": ""}, "capabilities": []}}
+{ "execute": "qmp_capabilities" }
+{"return": {}}
+{ "execute": "query-version" }
+{"return": {"qemu": {"micro": 50, "minor": 13, "major": 0}, "package": ""}}
+
+Development Process
+-------------------
+
+When changing QMP's interface (by adding new commands, events or modifying
+existing ones) it's mandatory to update the relevant documentation, which is
+one (or more) of the files listed in the 'Introduction' section*.
+
+Also, it's strongly recommended to send the documentation patch first, before
+doing any code change. This is so because:
+
+  1. Avoids the code dictating the interface
+
+  2. Review can improve your interface.  Letting that happen before
+     you implement it can save you work.
+
+* The qmp-commands.txt file is generated from the qmp-commands.hx one, which
+  is the file that should be edited.
+
+Homepage
+--------
+
+http://wiki.qemu.org/QMP
--- a/scripts/qmp/qemu-ga-client
+++ b/scripts/qmp/qemu-ga-client
@@ -33,7 +33,7 @@
 # $ qemu-ga-client fsfreeze freeze
 # 2 filesystems frozen
 #
-# See also: http://wiki.qemu-project.org/Features/QAPI/GuestAgent
+# See also: http://wiki.qemu.org/Features/QAPI/GuestAgent
 #

 import base64
@@ -267,9 +267,7 @@ def main(address, cmd, args):
            print('Hint: qemu is not running?')
        sys.exit(1)

-    if cmd == 'fsfreeze' and args[0] == 'freeze':
-        client.sync(60)
-    elif cmd != 'ping':
+    if cmd != 'ping':
        client.sync()

    globals()['_cmd_' + cmd](client, args)
--- a/scripts/qmp/qmp
+++ b/scripts/qmp/qmp
--- a/docs/qmp/qmp-events.txt
+++ b/docs/qmp/qmp-events.txt
@@ -1,4 +1,4 @@
-                   QEMU Machine Protocol Events
+                   QEMU Monitor Protocol Events
                   ============================

 BALLOON_CHANGE
@@ -18,28 +18,6 @@ Example:
    "data": { "actual": 944766976 },
    "timestamp": { "seconds": 1267020223, "microseconds": 435656 } }

-BLOCK_IMAGE_CORRUPTED
---------------------
-
-Emitted when a disk image is being marked corrupt.
-
-Data:
-
- "device": Device name (json-string)
- "msg":    Informative message (e.g., reason for the corruption) (json-string)
- "offset": If the corruption resulted from an image access, this is the access
-            offset into the image (json-int)
- "size":   If the corruption resulted from an image access, this is the access
-            size (json-int)
-
-Example:
-
-{ "event": "BLOCK_IMAGE_CORRUPTED",
-    "data": { "device": "ide0-hd0",
-        "msg": "Prevented active L1 table overwrite", "offset": 196608,
-        "size": 65536 },
-    "timestamp": { "seconds": 1378126126, "microseconds": 966463 } }
-
 BLOCK_IO_ERROR
 --------------

@@ -158,24 +136,6 @@ Example:
 Note: The "ready to complete" status is always reset by a BLOCK_JOB_ERROR
 event.

-DEVICE_DELETED
--------------
-
-Emitted whenever the device removal completion is acknowledged
-by the guest.
-At this point, it's safe to reuse the specified device ID.
-Device removal can be initiated by the guest or by HMP/QMP commands.
-
-Data:
-
- "device": device name (json-string, optional)
- "path": device path (json-string)
-
-{ "event": "DEVICE_DELETED",
-  "data": { "device": "virtio-net-pci-0",
-            "path": "/machine/peripheral/virtio-net-pci-0" },
-  "timestamp": { "seconds": 1265044230, "microseconds": 450486 } }
-
 DEVICE_TRAY_MOVED
 -----------------

@@ -194,76 +154,6 @@ Data:
  },
  "timestamp": { "seconds": 1265044230, "microseconds": 450486 } }

-GUEST_PANICKED
--------------
-
-Emitted when guest OS panic is detected.
-
-Data:
-
- "action": Action that has been taken (json-string, currently always "pause").
-
-Example:
-
-{ "event": "GUEST_PANICKED",
-     "data": { "action": "pause" } }
-
-NIC_RX_FILTER_CHANGED
---------------------
-
-The event is emitted once until the query command is executed,
-the first event will always be emitted.
-
-Data:
-
- "name": net client name (json-string)
- "path": device path (json-string)
-
-{ "event": "NIC_RX_FILTER_CHANGED",
-  "data": { "name": "vnet0",
-            "path": "/machine/peripheral/vnet0/virtio-backend" },
-  "timestamp": { "seconds": 1368697518, "microseconds": 326866 } }
-}
-
-QUORUM_FAILURE
--------------
-
-Emitted by the Quorum block driver if it fails to establish a quorum.
-
-Data:
-
- "reference":    device name if defined else node name.
- "sector-num":   Number of the first sector of the failed read operation.
- "sector-count": Failed read operation sector count.
-
-Example:
-
-{ "event": "QUORUM_FAILURE",
-     "data": { "reference": "usr1", "sector-num": 345435, "sector-count": 5 },
-     "timestamp": { "seconds": 1344522075, "microseconds": 745528 } }
-
-QUORUM_REPORT_BAD
-----------------
-
-Emitted to report a corruption of a Quorum file.
-
-Data:
-
- "error":        Error message (json-string, optional)
-                  Only present on failure.  This field contains a human-readable
-                  error message.  There are no semantics other than that the
-                  block layer reported an error and clients should not try to
-                  interpret the error string.
- "node-name":    The graph node name of the block driver state.
- "sector-num":   Number of the first sector of the failed read operation.
- "sector-count": Failed read operation sector count.
-
-Example:
-
-{ "event": "QUORUM_REPORT_BAD",
-     "data": { "node-name": "1.raw", "sector-num": 345435, "sector-count": 5 },
-     "timestamp": { "seconds": 1344522075, "microseconds": 745528 } }
-
 RESET
 -----

@@ -295,8 +185,7 @@ Emitted when the guest changes the RTC time.

 Data:

- "offset": Offset between base RTC clock (as specified by -rtc base), and
-new RTC clock value (json-number)
+- "offset": delta against the host UTC in seconds (json-number)

 Example:

@@ -518,7 +407,7 @@ Data: None.

 Example:

-{ "event": "WAKEUP",
+{ "event": "WATCHDOG",
     "timestamp": { "seconds": 1344522075, "microseconds": 745528 } }

 WATCHDOG
--- a/scripts/qmp/qmp-shell
+++ b/scripts/qmp/qmp-shell
@@ -31,7 +31,6 @@
 # (QEMU)

 import qmp
-import json
 import readline
 import sys
 import pprint
@@ -92,7 +91,7 @@ class QMPShell(qmp.QEMUMonitorProtocol):
        """
        Build a QMP input object from a user provided command-line in the
        following format:
-
+    
            < command-name > [ arg-name1=arg1 ] ... [ arg-nameN=argN ]
        """
        cmdargs = cmdline.split()
@@ -100,41 +99,16 @@ class QMPShell(qmp.QEMUMonitorProtocol):
        for arg in cmdargs[1:]:
            opt = arg.split('=')
            try:
-                if(len(opt) > 2):
-                    opt[1] = '='.join(opt[1:])
                value = int(opt[1])
            except ValueError:
-                if opt[1] == 'true':
-                    value = True
-                elif opt[1] == 'false':
-                    value = False
-                elif opt[1].startswith('{'):
-                    value = json.loads(opt[1])
-                else:
-                    value = opt[1]
-            optpath = opt[0].split('.')
-            parent = qmpcmd['arguments']
-            curpath = []
-            for p in optpath[:-1]:
-                curpath.append(p)
-                d = parent.get(p, {})
-                if type(d) is not dict:
-                    raise QMPShellError('Cannot use "%s" as both leaf and non-leaf key' % '.'.join(curpath))
-                parent[p] = d
-                parent = d
-            if optpath[-1] in parent:
-                if type(parent[optpath[-1]]) is dict:
-                    raise QMPShellError('Cannot use "%s" as both leaf and non-leaf key' % '.'.join(curpath))
-                else:
-                    raise QMPShellError('Cannot set "%s" multiple times' % opt[0])
-            parent[optpath[-1]] = value
+                value = opt[1]
+            qmpcmd['arguments'][opt[0]] = value
        return qmpcmd

    def _execute_cmd(self, cmdline):
        try:
            qmpcmd = self.__build_cmd(cmdline)
-        except Exception, e:
-            print 'Error while parsing command line: %s' % e
+        except:
            print 'command format: <command-name> ',
            print '[arg-name1=arg1] ... [arg-nameN=argN]'
            return True
--- a/docs/qmp/qmp-spec.txt
+++ b/docs/qmp/qmp-spec.txt
@@ -1,17 +1,21 @@
-                      QEMU Machine Protocol Specification
+           QEMU Monitor Protocol Specification - Version 0.1

 1. Introduction
 ===============

-This document specifies the QEMU Machine Protocol (QMP), a JSON-based protocol
-which is available for applications to operate QEMU at the machine-level.
+This document specifies the QEMU Monitor Protocol (QMP), a JSON-based protocol
+which is available for applications to control QEMU at the machine-level.
+
+To enable QMP support, QEMU has to be run in "control mode". This is done by
+starting QEMU with the appropriate command-line options. Please, refer to the
+QEMU manual page for more information.

 2. Protocol Specification
 =========================

 This section details the protocol format. For the purpose of this document
-"Client" is any application which is using QMP to communicate with QEMU and
-"Server" is QEMU itself.
+"Client" is any application which is communicating with QEMU in control mode,
+and "Server" is QEMU itself.

 JSON data structures, when mentioned in this document, are always in the
 following format:
@@ -43,14 +47,14 @@ that the connection has been successfully established and that the Server is
 ready for capabilities negotiation (for more information refer to section
 '4. Capabilities Negotiation').

-The greeting message format is:
+The format is:

 { "QMP": { "version": json-object, "capabilities": json-array } }

 Where,

 - The "version" member contains the Server's version information (the format
-  is the same of the query-version command)
+  is the same of the 'query-version' command)
 - The "capabilities" member specify the availability of features beyond the
  baseline specification

@@ -79,7 +83,10 @@ of a command execution: success or error.
 2.4.1 success
 -------------

-The format of a success response is:
+The success response is issued when the command execution has finished
+without errors.
+
+The format is:

 { "return": json-object, "id": json-value }

@@ -89,12 +96,15 @@ The format of a success response is:
  in a per-command basis or an empty json-object if the command does not
  return data
 - The "id" member contains the transaction identification associated
-  with the command execution if issued by the Client
+  with the command execution (if issued by the Client)

 2.4.2 error
 -----------

-The format of an error response is:
+The error response is issued when the command execution could not be
+completed because of an error condition.
+
+The format is:

 { "error": { "class": json-string, "desc": json-string }, "id": json-value }

@@ -104,7 +114,7 @@ The format of an error response is:
 - The "desc" member is a human-readable error message. Clients should
  not attempt to parse this message.
 - The "id" member contains the transaction identification associated with
-  the command execution if issued by the Client
+  the command execution (if issued by the Client)

 NOTE: Some errors can occur before the Server is able to read the "id" member,
 in these cases the "id" member will not be part of the error response, even
@@ -114,9 +124,9 @@ if provided by the client.
 -----------------------

 As a result of state changes, the Server may send messages unilaterally
-to the Client at any time. They are called "asynchronous events".
+to the Client at any time. They are called 'asynchronous events'.

-The format of asynchronous events is:
+The format is:

 { "event": json-string, "data": json-object,
  "timestamp": { "seconds": json-number, "microseconds": json-number } }
@@ -137,37 +147,36 @@ qmp-events.txt file.
 ===============

 This section provides some examples of real QMP usage, in all of them
-"C" stands for "Client" and "S" stands for "Server".
+'C' stands for 'Client' and 'S' stands for 'Server'.

 3.1 Server greeting
 -------------------

-S: { "QMP": { "version": { "qemu": { "micro": 50, "minor": 6, "major": 1 },
-     "package": ""}, "capabilities": []}}
+S: {"QMP": {"version": {"qemu": "0.12.50", "package": ""}, "capabilities": []}}

 3.2 Simple 'stop' execution
 ---------------------------

 C: { "execute": "stop" }
-S: { "return": {} }
+S: {"return": {}}

 3.3 KVM information
 -------------------

 C: { "execute": "query-kvm", "id": "example" }
-S: { "return": { "enabled": true, "present": true }, "id": "example"}
+S: {"return": {"enabled": true, "present": true}, "id": "example"}

 3.4 Parsing error
 ------------------

 C: { "execute": }
-S: { "error": { "class": "GenericError", "desc": "Invalid JSON syntax" } }
+S: {"error": {"class": "GenericError", "desc": "Invalid JSON syntax" } }

 3.5 Powerdown event
 -------------------

-S: { "timestamp": { "seconds": 1258551470, "microseconds": 802384 },
-    "event": "POWERDOWN" }
+S: {"timestamp": {"seconds": 1258551470, "microseconds": 802384}, "event":
+"POWERDOWN"}

 4. Capabilities Negotiation
 ----------------------------
@@ -175,17 +184,17 @@ S: { "timestamp": { "seconds": 1258551470, "microseconds": 802384 },
 When a Client successfully establishes a connection, the Server is in
 Capabilities Negotiation mode.

-In this mode only the qmp_capabilities command is allowed to run, all
-other commands will return the CommandNotFound error. Asynchronous
-messages are not delivered either.
+In this mode only the 'qmp_capabilities' command is allowed to run, all
+other commands will return the CommandNotFound error. Asynchronous messages
+are not delivered either.

-Clients should use the qmp_capabilities command to enable capabilities
+Clients should use the 'qmp_capabilities' command to enable capabilities
 advertised in the Server's greeting (section '2.2 Server Greeting') they
 support.

-When the qmp_capabilities command is issued, and if it does not return an
+When the 'qmp_capabilities' command is issued, and if it does not return an
 error, the Server enters in Command mode where capabilities changes take
-effect, all commands (except qmp_capabilities) are allowed and asynchronous
+effect, all commands (except 'qmp_capabilities') are allowed and asynchronous
 messages are delivered.

 5 Compatibility Considerations
@@ -236,7 +245,7 @@ arguments, errors, asynchronous events, and so forth.

 Any new names downstream wishes to add must begin with '__'.  To
 ensure compatibility with other downstreams, it is strongly
-recommended that you prefix your downstream names with '__RFQDN_' where
+recommended that you prefix your downstram names with '__RFQDN_' where
 RFQDN is a valid, reverse fully qualified domain name which you
 control.  For example, a qemu-kvm specific monitor command would be:

--- a/scripts/qmp/qmp.py
+++ b/scripts/qmp/qmp.py
@@ -1,5 +1,5 @@
 # QEMU Monitor Protocol Python class
-#
+# 
 # Copyright (C) 2009, 2010 Red Hat Inc.
 #
 # Authors:
@@ -171,12 +171,7 @@ class QEMUMonitorProtocol:
                pass
        self.__sock.setblocking(1)
        if not self.__events and wait:
-            ret = self.__json_read(only_event=True)
-            if ret == None:
-                # We are in blocking mode, if don't get anything, something
-                # went wrong
-                raise QMPConnectError("Error while reading from socket")
-
+            self.__json_read(only_event=True)
        return self.__events

    def clear_events(self):
@@ -193,9 +188,3 @@ class QEMUMonitorProtocol:

    def settimeout(self, timeout):
        self.__sock.settimeout(timeout)
-
-    def get_sock_fd(self):
-        return self.__sock.fileno()
-
-    def is_scm_available(self):
-        return self.__sock.family == socket.AF_UNIX
--- a/scripts/qmp/qom-fuse
+++ b/scripts/qmp/qom-fuse
--- a/scripts/qmp/qom-get
+++ b/scripts/qmp/qom-get
--- a/scripts/qmp/qom-list
+++ b/scripts/qmp/qom-list
--- a/scripts/qmp/qom-set
+++ b/scripts/qmp/qom-set
--- a/2
+++ b/2
@@ -1,3 +1,3 @@
-Read the documentation in qemu-doc.html or on http://wiki.qemu-project.org
+Read the documentation in qemu-doc.html or on http://wiki.qemu.org

 - QEMU team
--- a/37
+++ b/37
@@ -0,0 +1,37 @@
+General:
+-------
+- cycle counter for all archs
+- cpu_interrupt() win32/SMP fix
+- merge PIC spurious interrupt patch
+- warning for OS/2: must not use 128 MB memory (merge bochs cmos patch ?)
+- config file (at least for windows/Mac OS X)
+- update doc: PCI infos.
+- basic VGA optimizations
+- better code fetch
+- do not resize vga if invalid size.
+- TLB code protection support for PPC
+- disable SMC handling for ARM/SPARC/PPC (not finished)
+- see undefined flags for BTx insn
+- keyboard output buffer filling timing emulation
+- tests for each target CPU
+- fix all remaining thread lock issues (must put TBs in a specific invalid
+  state, find a solution for tb_flush()).
+
+ppc specific:
+------------
+- TLB invalidate not needed if msr_pr changes
+- enable shift optimizations ?
+
+linux-user specific:
+-------------------
+- remove threading support as it cannot work at this point
+- improve IPC syscalls
+- more syscalls (in particular all 64 bit ones, IPCs, fix 64 bit
+  issues, fix 16 bit uid issues)
+- use kernel traps for unaligned accesses on ARM ?
+
+
+lower priority:
+--------------
+- int15 ah=86: use better timing
+- use -msoft-float on ARM
--- a/2
+++ b/2
@@ -1 +1 @@
-1.7.50
+1.4.2
--- a/aio-posix.c
+++ b/aio-posix.c
@@ -23,8 +23,8 @@ struct AioHandler
    GPollFD pfd;
    IOHandler *io_read;
    IOHandler *io_write;
+    AioFlushHandler *io_flush;
    int deleted;
-    int pollfds_idx;
    void *opaque;
    QLIST_ENTRY(AioHandler) node;
 };
@@ -46,6 +46,7 @@ void aio_set_fd_handler(AioContext *ctx,
                        int fd,
                        IOHandler *io_read,
                        IOHandler *io_write,
+                        AioFlushHandler *io_flush,
                        void *opaque)
 {
    AioHandler *node;
@@ -82,11 +83,11 @@ void aio_set_fd_handler(AioContext *ctx,
        /* Update handler with latest information */
        node->io_read = io_read;
        node->io_write = io_write;
+        node->io_flush = io_flush;
        node->opaque = opaque;
-        node->pollfds_idx = -1;

-        node->pfd.events = (io_read ? G_IO_IN | G_IO_HUP | G_IO_ERR : 0);
-        node->pfd.events |= (io_write ? G_IO_OUT | G_IO_ERR : 0);
+        node->pfd.events = (io_read ? G_IO_IN | G_IO_HUP : 0);
+        node->pfd.events |= (io_write ? G_IO_OUT : 0);
    }

    aio_notify(ctx);
@@ -94,10 +95,12 @@ void aio_set_fd_handler(AioContext *ctx,

 void aio_set_event_notifier(AioContext *ctx,
                            EventNotifier *notifier,
-                            EventNotifierHandler *io_read)
+                            EventNotifierHandler *io_read,
+                            AioFlushEventNotifierHandler *io_flush)
 {
    aio_set_fd_handler(ctx, event_notifier_get_fd(notifier),
-                       (IOHandler *)io_read, NULL, notifier);
+                       (IOHandler *)io_read, NULL,
+                       (AioFlushHandler *)io_flush, notifier);
 }

 bool aio_pending(AioContext *ctx)
@@ -107,6 +110,13 @@ bool aio_pending(AioContext *ctx)
    QLIST_FOREACH(node, &ctx->aio_handlers, node) {
        int revents;

+        /*
+         * FIXME: right now we cannot get G_IO_HUP and G_IO_ERR because
+         * main-loop.c is still select based (due to the slirp legacy).
+         * If main-loop.c ever switches to poll, G_IO_ERR should be
+         * tested too.  Dispatching G_IO_ERR to both handlers should be
+         * okay, since handlers need to be ready for spurious wakeups.
+         */
        revents = node->pfd.revents & node->pfd.events;
        if (revents & (G_IO_IN | G_IO_HUP | G_IO_ERR) && node->io_read) {
            return true;
@@ -119,12 +129,30 @@ bool aio_pending(AioContext *ctx)
    return false;
 }

-static bool aio_dispatch(AioContext *ctx)
+bool aio_poll(AioContext *ctx, bool blocking)
 {
+    static struct timeval tv0;
    AioHandler *node;
-    bool progress = false;
+    fd_set rdfds, wrfds;
+    int max_fd = -1;
+    int ret;
+    bool busy, progress;
+
+    progress = false;

    /*
+     * If there are callbacks left that have been queued, we need to call then.
+     * Do not call select in this case, because it is possible that the caller
+     * does not need a complete flush (as is the case for qemu_aio_wait loops).
+     */
+    if (aio_bh_poll(ctx)) {
+        blocking = false;
+        progress = true;
+    }
+
+    /*
+     * Then dispatch any pending callbacks from the GSource.
+     *
     * We have to walk very carefully in case qemu_aio_set_fd_handler is
     * called while we're walking.
     */
@@ -138,19 +166,12 @@ static bool aio_dispatch(AioContext *ctx)
        revents = node->pfd.revents & node->pfd.events;
        node->pfd.revents = 0;

-        if (!node->deleted &&
-            (revents & (G_IO_IN | G_IO_HUP | G_IO_ERR)) &&
-            node->io_read) {
+        /* See comment in aio_pending.  */
+        if (revents & (G_IO_IN | G_IO_HUP | G_IO_ERR) && node->io_read) {
            node->io_read(node->opaque);
-
-            /* aio_notify() does not count as progress */
-            if (node->opaque != &ctx->notifier) {
-                progress = true;
-            }
+            progress = true;
        }
-        if (!node->deleted &&
-            (revents & (G_IO_OUT | G_IO_ERR)) &&
-            node->io_write) {
+        if (revents & (G_IO_OUT | G_IO_ERR) && node->io_write) {
            node->io_write(node->opaque);
            progress = true;
        }
@@ -166,77 +187,83 @@ static bool aio_dispatch(AioContext *ctx)
        }
    }

-    /* Run our timers */
-    progress |= timerlistgroup_run_timers(&ctx->tlg);
-
-    return progress;
-}
-
-bool aio_poll(AioContext *ctx, bool blocking)
-{
-    AioHandler *node;
-    int ret;
-    bool progress;
-
-    progress = false;
-
-    /*
-     * If there are callbacks left that have been queued, we need to call them.
-     * Do not call select in this case, because it is possible that the caller
-     * does not need a complete flush (as is the case for qemu_aio_wait loops).
-     */
-    if (aio_bh_poll(ctx)) {
-        blocking = false;
-        progress = true;
-    }
-
-    if (aio_dispatch(ctx)) {
-        progress = true;
-    }
-
    if (progress && !blocking) {
        return true;
    }

    ctx->walking_handlers++;

-    g_array_set_size(ctx->pollfds, 0);
+    FD_ZERO(&rdfds);
+    FD_ZERO(&wrfds);

-    /* fill pollfds */
+    /* fill fd sets */
+    busy = false;
    QLIST_FOREACH(node, &ctx->aio_handlers, node) {
-        node->pollfds_idx = -1;
-        if (!node->deleted && node->pfd.events) {
-            GPollFD pfd = {
-                .fd = node->pfd.fd,
-                .events = node->pfd.events,
-            };
-            node->pollfds_idx = ctx->pollfds->len;
-            g_array_append_val(ctx->pollfds, pfd);
+        /* If there aren't pending AIO operations, don't invoke callbacks.
+         * Otherwise, if there are no AIO requests, qemu_aio_wait() would
+         * wait indefinitely.
+         */
+        if (!node->deleted && node->io_flush) {
+            if (node->io_flush(node->opaque) == 0) {
+                continue;
+            }
+            busy = true;
+        }
+        if (!node->deleted && node->io_read) {
+            FD_SET(node->pfd.fd, &rdfds);
+            max_fd = MAX(max_fd, node->pfd.fd + 1);
+        }
+        if (!node->deleted && node->io_write) {
+            FD_SET(node->pfd.fd, &wrfds);
+            max_fd = MAX(max_fd, node->pfd.fd + 1);
        }
    }

    ctx->walking_handlers--;

+    /* No AIO operations?  Get us out of here */
+    if (!busy) {
+        return progress;
+    }
+
    /* wait until next event */
-    ret = qemu_poll_ns((GPollFD *)ctx->pollfds->data,
-                         ctx->pollfds->len,
-                         blocking ? timerlistgroup_deadline_ns(&ctx->tlg) : 0);
+    ret = select(max_fd, &rdfds, &wrfds, NULL, blocking ? NULL : &tv0);

    /* if we have any readable fds, dispatch event */
    if (ret > 0) {
-        QLIST_FOREACH(node, &ctx->aio_handlers, node) {
-            if (node->pollfds_idx != -1) {
-                GPollFD *pfd = &g_array_index(ctx->pollfds, GPollFD,
-                                              node->pollfds_idx);
-                node->pfd.revents = pfd->revents;
+        /* we have to walk very carefully in case
+         * qemu_aio_set_fd_handler is called while we're walking */
+        node = QLIST_FIRST(&ctx->aio_handlers);
+        while (node) {
+            AioHandler *tmp;
+
+            ctx->walking_handlers++;
+
+            if (!node->deleted &&
+                FD_ISSET(node->pfd.fd, &rdfds) &&
+                node->io_read) {
+                node->io_read(node->opaque);
+                progress = true;
+            }
+            if (!node->deleted &&
+                FD_ISSET(node->pfd.fd, &wrfds) &&
+                node->io_write) {
+                node->io_write(node->opaque);
+                progress = true;
+            }
+
+            tmp = node;
+            node = QLIST_NEXT(node, node);
+
+            ctx->walking_handlers--;
+
+            if (!ctx->walking_handlers && tmp->deleted) {
+                QLIST_REMOVE(tmp, node);
+                g_free(tmp);
            }
        }
    }

-    /* Run dispatch even if there were no readable fds to run timers */
-    if (aio_dispatch(ctx)) {
-        progress = true;
-    }
-
-    return progress;
+    assert(progress || busy);
+    return true;
 }
--- a/aio-win32.c
+++ b/aio-win32.c
@@ -23,6 +23,7 @@
 struct AioHandler {
    EventNotifier *e;
    EventNotifierHandler *io_notify;
+    AioFlushEventNotifierHandler *io_flush;
    GPollFD pfd;
    int deleted;
    QLIST_ENTRY(AioHandler) node;
@@ -30,7 +31,8 @@ struct AioHandler {

 void aio_set_event_notifier(AioContext *ctx,
                            EventNotifier *e,
-                            EventNotifierHandler *io_notify)
+                            EventNotifierHandler *io_notify,
+                            AioFlushEventNotifierHandler *io_flush)
 {
    AioHandler *node;

@@ -71,6 +73,7 @@ void aio_set_event_notifier(AioContext *ctx,
        }
        /* Update handler with latest information */
        node->io_notify = io_notify;
+        node->io_flush = io_flush;
    }

    aio_notify(ctx);
@@ -93,9 +96,8 @@ bool aio_poll(AioContext *ctx, bool blocking)
 {
    AioHandler *node;
    HANDLE events[MAXIMUM_WAIT_OBJECTS + 1];
-    bool progress;
+    bool busy, progress;
    int count;
-    int timeout;

    progress = false;

@@ -109,9 +111,6 @@ bool aio_poll(AioContext *ctx, bool blocking)
        progress = true;
    }

-    /* Run timers */
-    progress |= timerlistgroup_run_timers(&ctx->tlg);
-
    /*
     * Then dispatch any pending callbacks from the GSource.
     *
@@ -127,11 +126,7 @@ bool aio_poll(AioContext *ctx, bool blocking)
        if (node->pfd.revents && node->io_notify) {
            node->pfd.revents = 0;
            node->io_notify(node->e);
-
-            /* aio_notify() does not count as progress */
-            if (node->e != &ctx->notifier) {
-                progress = true;
-            }
+            progress = true;
        }

        tmp = node;
@@ -152,8 +147,19 @@ bool aio_poll(AioContext *ctx, bool blocking)
    ctx->walking_handlers++;

    /* fill fd sets */
+    busy = false;
    count = 0;
    QLIST_FOREACH(node, &ctx->aio_handlers, node) {
+        /* If there aren't pending AIO operations, don't invoke callbacks.
+         * Otherwise, if there are no AIO requests, qemu_aio_wait() would
+         * wait indefinitely.
+         */
+        if (!node->deleted && node->io_flush) {
+            if (node->io_flush(node->e) == 0) {
+                continue;
+            }
+            busy = true;
+        }
        if (!node->deleted && node->io_notify) {
            events[count++] = event_notifier_get_handle(node->e);
        }
@@ -161,13 +167,15 @@ bool aio_poll(AioContext *ctx, bool blocking)

    ctx->walking_handlers--;

+    /* No AIO operations?  Get us out of here */
+    if (!busy) {
+        return progress;
+    }
+
    /* wait until next event */
    while (count > 0) {
-        int ret;
-
-        timeout = blocking ?
-            qemu_timeout_ns_to_ms(timerlistgroup_deadline_ns(&ctx->tlg)) : 0;
-        ret = WaitForMultipleObjects(count, events, FALSE, timeout);
+        int timeout = blocking ? INFINITE : 0;
+        int ret = WaitForMultipleObjects(count, events, FALSE, timeout);

        /* if we have any signaled events, dispatch event */
        if ((DWORD) (ret - WAIT_OBJECT_0) >= count) {
@@ -188,11 +196,7 @@ bool aio_poll(AioContext *ctx, bool blocking)
                event_notifier_get_handle(node->e) == events[ret - WAIT_OBJECT_0] &&
                node->io_notify) {
                node->io_notify(node->e);
-
-                /* aio_notify() does not count as progress */
-                if (node->e != &ctx->notifier) {
-                    progress = true;
-                }
+                progress = true;
            }

            tmp = node;
@@ -210,14 +214,6 @@ bool aio_poll(AioContext *ctx, bool blocking)
        events[ret - WAIT_OBJECT_0] = events[--count];
    }

-    if (blocking) {
-        /* Run the timers a second time. We do this because otherwise aio_wait
-         * will not note progress - and will stop a drain early - if we have
-         * a timer that was not ready to run entering g_poll but is ready
-         * after g_poll. This will only do anything if a timer has expired.
-         */
-        progress |= timerlistgroup_run_timers(&ctx->tlg);
-    }
-
-    return progress;
+    assert(progress || busy);
+    return true;
 }
--- a/arch_init.c
+++ b/arch_init.c
@@ -35,22 +35,20 @@
 #include "qemu/bitmap.h"
 #include "sysemu/arch_init.h"
 #include "audio/audio.h"
-#include "hw/i386/pc.h"
+#include "hw/pc.h"
 #include "hw/pci/pci.h"
-#include "hw/audio/audio.h"
+#include "hw/audiodev.h"
 #include "sysemu/kvm.h"
 #include "migration/migration.h"
-#include "hw/i386/smbios.h"
+#include "exec/gdbstub.h"
+#include "hw/smbios.h"
 #include "exec/address-spaces.h"
-#include "hw/audio/pcspk.h"
+#include "hw/pcspk.h"
 #include "migration/page_cache.h"
 #include "qemu/config-file.h"
 #include "qmp-commands.h"
 #include "trace.h"
 #include "exec/cpu-all.h"
-#include "exec/ram_addr.h"
-#include "hw/acpi/acpi.h"
-#include "qemu/host-utils.h"

 #ifdef DEBUG_ARCH_INIT
 #define DPRINTF(fmt, ...) \
@@ -67,7 +65,7 @@ int graphic_depth = 8;
 #else
 int graphic_width = 800;
 int graphic_height = 600;
-int graphic_depth = 32;
+int graphic_depth = 15;
 #endif


@@ -87,8 +85,6 @@ int graphic_depth = 32;
 #define QEMU_ARCH QEMU_ARCH_MICROBLAZE
 #elif defined(TARGET_MIPS)
 #define QEMU_ARCH QEMU_ARCH_MIPS
-#elif defined(TARGET_MOXIE)
-#define QEMU_ARCH QEMU_ARCH_MOXIE
 #elif defined(TARGET_OPENRISC)
 #define QEMU_ARCH QEMU_ARCH_OPENRISC
 #elif defined(TARGET_PPC)
@@ -106,9 +102,6 @@ int graphic_depth = 32;
 #endif

 const uint32_t arch_type = QEMU_ARCH;
-static bool mig_throttle_on;
-static int dirty_rate_high_cnt;
-static void check_guest_throttling(void);

 /***********************************************************/
 /* ram save/restore */
@@ -120,7 +113,7 @@ static void check_guest_throttling(void);
 #define RAM_SAVE_FLAG_EOS      0x10
 #define RAM_SAVE_FLAG_CONTINUE 0x20
 #define RAM_SAVE_FLAG_XBZRLE   0x40
-/* 0x80 is reserved in migration.h start with 0x100 next */
+

 static struct defconfig_file {
    const char *filename;
@@ -128,11 +121,10 @@ static struct defconfig_file {
    bool userconfig;
 } default_config_files[] = {
    { CONFIG_QEMU_CONFDIR "/qemu.conf",                   true },
-    { CONFIG_QEMU_CONFDIR "/target-" TARGET_NAME ".conf", true },
+    { CONFIG_QEMU_CONFDIR "/target-" TARGET_ARCH ".conf", true },
    { NULL }, /* end of list */
 };

-static const uint8_t ZERO_TARGET_PAGE[TARGET_PAGE_SIZE];

 int qemu_read_default_config_files(bool userconfig)
 {
@@ -148,13 +140,14 @@ int qemu_read_default_config_files(bool userconfig)
            return ret;
        }
    }
-
+    
    return 0;
 }

-static inline bool is_zero_range(uint8_t *p, uint64_t size)
+static inline bool is_zero_page(uint8_t *p)
 {
-    return buffer_find_nonzero_offset(p, size) == size;
+    return buffer_find_nonzero_offset(p, TARGET_PAGE_SIZE) ==
+        TARGET_PAGE_SIZE;
 }

 /* struct contains XBZRLE cache and a static page
@@ -164,22 +157,20 @@ static struct {
    uint8_t *encoded_buf;
    /* buffer for storing page content */
    uint8_t *current_buf;
+    /* buffer used for XBZRLE decoding */
+    uint8_t *decoded_buf;
    /* Cache for XBZRLE */
    PageCache *cache;
 } XBZRLE = {
    .encoded_buf = NULL,
    .current_buf = NULL,
+    .decoded_buf = NULL,
    .cache = NULL,
 };
-/* buffer used for XBZRLE decoding */
-static uint8_t *xbzrle_decoded_buf;
+

 int64_t xbzrle_cache_resize(int64_t new_size)
 {
-    if (new_size < TARGET_PAGE_SIZE) {
-        return -1;
-    }
-
    if (XBZRLE.cache != NULL) {
        return cache_resize(XBZRLE.cache, new_size / TARGET_PAGE_SIZE) *
            TARGET_PAGE_SIZE;
@@ -273,34 +264,6 @@ static size_t save_block_hdr(QEMUFile *f, RAMBlock *block, ram_addr_t offset,
    return size;
 }

-/* This is the last block that we have visited serching for dirty pages
- */
-static RAMBlock *last_seen_block;
-/* This is the last block from where we have sent data */
-static RAMBlock *last_sent_block;
-static ram_addr_t last_offset;
-static unsigned long *migration_bitmap;
-static uint64_t migration_dirty_pages;
-static uint32_t last_version;
-static bool ram_bulk_stage;
-
-/* Update the xbzrle cache to reflect a page that's been sent as all 0.
- * The important thing is that a stale (not-yet-0'd) page be replaced
- * by the new data.
- * As a bonus, if the page wasn't in the cache it gets added so that
- * when a small write is made into the 0'd page it gets XBZRLE sent
- */
-static void xbzrle_cache_zero_page(ram_addr_t current_addr)
-{
-    if (ram_bulk_stage || !migrate_use_xbzrle()) {
-        return;
-    }
-
-    /* We don't care if this fails to allocate a new cache page
-     * as long as it updated an old one */
-    cache_insert(XBZRLE.cache, current_addr, ZERO_TARGET_PAGE);
-}
-
 #define ENCODING_FLAG_XBZRLE 0x1

 static int save_xbzrle_page(QEMUFile *f, uint8_t *current_data,
@@ -312,9 +275,8 @@ static int save_xbzrle_page(QEMUFile *f, uint8_t *current_data,

    if (!cache_is_cached(XBZRLE.cache, current_addr)) {
        if (!last_stage) {
-            if (cache_insert(XBZRLE.cache, current_addr, current_data) == -1) {
-                return -1;
-            }
+            cache_insert(XBZRLE.cache, current_addr,
+                         g_memdup(current_data, TARGET_PAGE_SIZE));
        }
        acct_info.xbzrle_cache_miss++;
        return -1;
@@ -357,14 +319,25 @@ static int save_xbzrle_page(QEMUFile *f, uint8_t *current_data,
    return bytes_sent;
 }

+
+/* This is the last block that we have visited serching for dirty pages
+ */
+static RAMBlock *last_seen_block;
+/* This is the last block from where we have sent data */
+static RAMBlock *last_sent_block;
+static ram_addr_t last_offset;
+static unsigned long *migration_bitmap;
+static uint64_t migration_dirty_pages;
+static uint32_t last_version;
+static bool ram_bulk_stage;
+
 static inline
 ram_addr_t migration_bitmap_find_and_reset_dirty(MemoryRegion *mr,
                                                 ram_addr_t start)
 {
    unsigned long base = mr->ram_addr >> TARGET_PAGE_BITS;
    unsigned long nr = base + (start >> TARGET_PAGE_BITS);
-    uint64_t mr_size = TARGET_PAGE_ALIGN(memory_region_size(mr));
-    unsigned long size = base + (mr_size >> TARGET_PAGE_BITS);
+    unsigned long size = base + (int128_get64(mr->size) >> TARGET_PAGE_BITS);

    unsigned long next;

@@ -381,10 +354,11 @@ ram_addr_t migration_bitmap_find_and_reset_dirty(MemoryRegion *mr,
    return (next - base) << TARGET_PAGE_BITS;
 }

-static inline bool migration_bitmap_set_dirty(ram_addr_t addr)
+static inline bool migration_bitmap_set_dirty(MemoryRegion *mr,
+                                              ram_addr_t offset)
 {
    bool ret;
-    int nr = addr >> TARGET_PAGE_BITS;
+    int nr = (mr->ram_addr + offset) >> TARGET_PAGE_BITS;

    ret = test_and_set_bit(nr, migration_bitmap);

@@ -394,98 +368,41 @@ static inline bool migration_bitmap_set_dirty(ram_addr_t addr)
    return ret;
 }

-static void migration_bitmap_sync_range(ram_addr_t start, ram_addr_t length)
-{
-    ram_addr_t addr;
-    unsigned long page = BIT_WORD(start >> TARGET_PAGE_BITS);
-
-    /* start address is aligned at the start of a word? */
-    if (((page * BITS_PER_LONG) << TARGET_PAGE_BITS) == start) {
-        int k;
-        int nr = BITS_TO_LONGS(length >> TARGET_PAGE_BITS);
-        unsigned long *src = ram_list.dirty_memory[DIRTY_MEMORY_MIGRATION];
-
-        for (k = page; k < page + nr; k++) {
-            if (src[k]) {
-                unsigned long new_dirty;
-                new_dirty = ~migration_bitmap[k];
-                migration_bitmap[k] |= src[k];
-                new_dirty &= src[k];
-                migration_dirty_pages += ctpopl(new_dirty);
-                src[k] = 0;
-            }
-        }
-    } else {
-        for (addr = 0; addr < length; addr += TARGET_PAGE_SIZE) {
-            if (cpu_physical_memory_get_dirty(start + addr,
-                                              TARGET_PAGE_SIZE,
-                                              DIRTY_MEMORY_MIGRATION)) {
-                cpu_physical_memory_reset_dirty(start + addr,
-                                                TARGET_PAGE_SIZE,
-                                                DIRTY_MEMORY_MIGRATION);
-                migration_bitmap_set_dirty(start + addr);
-            }
-        }
-    }
-}
-
-
-/* Needs iothread lock! */
-
 static void migration_bitmap_sync(void)
 {
    RAMBlock *block;
+    ram_addr_t addr;
    uint64_t num_dirty_pages_init = migration_dirty_pages;
    MigrationState *s = migrate_get_current();
    static int64_t start_time;
-    static int64_t bytes_xfer_prev;
    static int64_t num_dirty_pages_period;
    int64_t end_time;
-    int64_t bytes_xfer_now;
-
-    if (!bytes_xfer_prev) {
-        bytes_xfer_prev = ram_bytes_transferred();
-    }

    if (!start_time) {
-        start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+        start_time = qemu_get_clock_ms(rt_clock);
    }

    trace_migration_bitmap_sync_start();
-    address_space_sync_dirty_bitmap(&address_space_memory);
+    memory_global_sync_dirty_bitmap(get_system_memory());

    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
-        migration_bitmap_sync_range(block->mr->ram_addr, block->length);
+        for (addr = 0; addr < block->length; addr += TARGET_PAGE_SIZE) {
+            if (memory_region_test_and_clear_dirty(block->mr,
+                                                   addr, TARGET_PAGE_SIZE,
+                                                   DIRTY_MEMORY_MIGRATION)) {
+                migration_bitmap_set_dirty(block->mr, addr);
+            }
+        }
    }
    trace_migration_bitmap_sync_end(migration_dirty_pages
                                    - num_dirty_pages_init);
    num_dirty_pages_period += migration_dirty_pages - num_dirty_pages_init;
-    end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+    end_time = qemu_get_clock_ms(rt_clock);

    /* more than 1 second = 1000 millisecons */
    if (end_time > start_time + 1000) {
-        if (migrate_auto_converge()) {
-            /* The following detection logic can be refined later. For now:
-               Check to see if the dirtied bytes is 50% more than the approx.
-               amount of bytes that just got transferred since the last time we
-               were in this routine. If that happens >N times (for now N==4)
-               we turn on the throttle down logic */
-            bytes_xfer_now = ram_bytes_transferred();
-            if (s->dirty_pages_rate &&
-               (num_dirty_pages_period * TARGET_PAGE_SIZE >
-                   (bytes_xfer_now - bytes_xfer_prev)/2) &&
-               (dirty_rate_high_cnt++ > 4)) {
-                    trace_migration_throttle();
-                    mig_throttle_on = true;
-                    dirty_rate_high_cnt = 0;
-             }
-             bytes_xfer_prev = bytes_xfer_now;
-        } else {
-             mig_throttle_on = false;
-        }
        s->dirty_pages_rate = num_dirty_pages_period * 1000
            / (end_time - start_time);
-        s->dirty_bytes_rate = s->dirty_pages_rate * TARGET_PAGE_SIZE;
        start_time = end_time;
        num_dirty_pages_period = 0;
    }
@@ -526,9 +443,7 @@ static int ram_save_block(QEMUFile *f, bool last_stage)
                ram_bulk_stage = false;
            }
        } else {
-            int ret;
            uint8_t *p;
-            bool send_async = true;
            int cont = (block == last_sent_block) ?
                RAM_SAVE_FLAG_CONTINUE : 0;

@@ -536,53 +451,25 @@ static int ram_save_block(QEMUFile *f, bool last_stage)

            /* In doubt sent page as normal */
            bytes_sent = -1;
-            ret = ram_control_save_page(f, block->offset,
-                               offset, TARGET_PAGE_SIZE, &bytes_sent);
-
-            current_addr = block->offset + offset;
-            if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
-                if (ret != RAM_SAVE_CONTROL_DELAYED) {
-                    if (bytes_sent > 0) {
-                        acct_info.norm_pages++;
-                    } else if (bytes_sent == 0) {
-                        acct_info.dup_pages++;
-                    }
-                }
-            } else if (is_zero_range(p, TARGET_PAGE_SIZE)) {
+            if (is_zero_page(p)) {
                acct_info.dup_pages++;
                bytes_sent = save_block_hdr(f, block, offset, cont,
                                            RAM_SAVE_FLAG_COMPRESS);
                qemu_put_byte(f, 0);
                bytes_sent++;
-                /* Must let xbzrle know, otherwise a previous (now 0'd) cached
-                 * page would be stale
-                 */
-                xbzrle_cache_zero_page(current_addr);
            } else if (!ram_bulk_stage && migrate_use_xbzrle()) {
+                current_addr = block->offset + offset;
                bytes_sent = save_xbzrle_page(f, p, current_addr, block,
                                              offset, cont, last_stage);
                if (!last_stage) {
-                    /* We must send exactly what's in the xbzrle cache
-                     * even if the page wasn't xbzrle compressed, so that
-                     * it's right next time.
-                     */
                    p = get_cached_data(XBZRLE.cache, current_addr);
-
-                    /* Can't send this cached data async, since the cache page
-                     * might get updated before it gets to the wire
-                     */
-                    send_async = false;
                }
            }

            /* XBZRLE overflow or normal page */
            if (bytes_sent == -1) {
                bytes_sent = save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_PAGE);
-                if (send_async) {
-                    qemu_put_buffer_async(f, p, TARGET_PAGE_SIZE);
-                } else {
-                    qemu_put_buffer(f, p, TARGET_PAGE_SIZE);
-                }
+                qemu_put_buffer(f, p, TARGET_PAGE_SIZE);
                bytes_sent += TARGET_PAGE_SIZE;
                acct_info.norm_pages++;
            }
@@ -602,18 +489,6 @@ static int ram_save_block(QEMUFile *f, bool last_stage)

 static uint64_t bytes_transferred;

-void acct_update_position(QEMUFile *f, size_t size, bool zero)
-{
-    uint64_t pages = size / TARGET_PAGE_SIZE;
-    if (zero) {
-        acct_info.dup_pages += pages;
-    } else {
-        acct_info.norm_pages += pages;
-        bytes_transferred += size;
-        qemu_update_position(f, size);
-    }
-}
-
 static ram_addr_t ram_save_remaining(void)
 {
    return migration_dirty_pages;
@@ -640,12 +515,6 @@ uint64_t ram_bytes_total(void)
    return total;
 }

-void free_xbzrle_decoded_buf(void)
-{
-    g_free(xbzrle_decoded_buf);
-    xbzrle_decoded_buf = NULL;
-}
-
 static void migration_end(void)
 {
    if (migration_bitmap) {
@@ -659,9 +528,8 @@ static void migration_end(void)
        g_free(XBZRLE.cache);
        g_free(XBZRLE.encoded_buf);
        g_free(XBZRLE.current_buf);
+        g_free(XBZRLE.decoded_buf);
        XBZRLE.cache = NULL;
-        XBZRLE.encoded_buf = NULL;
-        XBZRLE.current_buf = NULL;
    }
 }

@@ -689,8 +557,10 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
    migration_bitmap = bitmap_new(ram_pages);
    bitmap_set(migration_bitmap, 0, ram_pages);
    migration_dirty_pages = ram_pages;
-    mig_throttle_on = false;
-    dirty_rate_high_cnt = 0;
+
+    qemu_mutex_lock_ramlist();
+    bytes_transferred = 0;
+    reset_ram_globals();

    if (migrate_use_xbzrle()) {
        XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() /
@@ -700,33 +570,13 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
            DPRINTF("Error creating cache\n");
            return -1;
        }
-
-        /* We prefer not to abort if there is no memory */
-        XBZRLE.encoded_buf = g_try_malloc0(TARGET_PAGE_SIZE);
-        if (!XBZRLE.encoded_buf) {
-            DPRINTF("Error allocating encoded_buf\n");
-            return -1;
-        }
-
-        XBZRLE.current_buf = g_try_malloc(TARGET_PAGE_SIZE);
-        if (!XBZRLE.current_buf) {
-            DPRINTF("Error allocating current_buf\n");
-            g_free(XBZRLE.encoded_buf);
-            XBZRLE.encoded_buf = NULL;
-            return -1;
-        }
-
+        XBZRLE.encoded_buf = g_malloc0(TARGET_PAGE_SIZE);
+        XBZRLE.current_buf = g_malloc(TARGET_PAGE_SIZE);
        acct_clear();
    }

-    qemu_mutex_lock_iothread();
-    qemu_mutex_lock_ramlist();
-    bytes_transferred = 0;
-    reset_ram_globals();
-
    memory_global_dirty_log_start();
    migration_bitmap_sync();
-    qemu_mutex_unlock_iothread();

    qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE);

@@ -737,10 +587,6 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
    }

    qemu_mutex_unlock_ramlist();
-
-    ram_control_before_iterate(f, RAM_CONTROL_SETUP);
-    ram_control_after_iterate(f, RAM_CONTROL_SETUP);
-
    qemu_put_be64(f, RAM_SAVE_FLAG_EOS);

    return 0;
@@ -759,9 +605,7 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
        reset_ram_globals();
    }

-    ram_control_before_iterate(f, RAM_CONTROL_ROUND);
-
-    t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
+    t0 = qemu_get_clock_ns(rt_clock);
    i = 0;
    while ((ret = qemu_file_rate_limit(f)) == 0) {
        int bytes_sent;
@@ -773,14 +617,13 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
        }
        total_sent += bytes_sent;
        acct_info.iterations++;
-        check_guest_throttling();
        /* we want to check in the 1st loop, just in case it was the 1st time
           and we had to sync the dirty bitmap.
           qemu_get_clock_ns() is a bit expensive, so we only check each some
           iterations
        */
        if ((i & 63) == 0) {
-            uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) / 1000000;
+            uint64_t t1 = (qemu_get_clock_ns(rt_clock) - t0) / 1000000;
            if (t1 > MAX_WAIT) {
                DPRINTF("big wait: %" PRIu64 " milliseconds, %d iterations\n",
                        t1, i);
@@ -792,26 +635,15 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)

    qemu_mutex_unlock_ramlist();

-    /*
-     * Must occur before EOS (or any QEMUFile operation)
-     * because of RDMA protocol.
-     */
-    ram_control_after_iterate(f, RAM_CONTROL_ROUND);
-
-    bytes_transferred += total_sent;
-
-    /*
-     * Do not count these 8 bytes into total_sent, so that we can
-     * return 0 if no page had been dirtied.
-     */
-    qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
-    bytes_transferred += 8;
-
-    ret = qemu_file_get_error(f);
    if (ret < 0) {
+        bytes_transferred += total_sent;
        return ret;
    }

+    qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
+    total_sent += 8;
+    bytes_transferred += total_sent;
+
    return total_sent;
 }

@@ -820,8 +652,6 @@ static int ram_save_complete(QEMUFile *f, void *opaque)
    qemu_mutex_lock_ramlist();
    migration_bitmap_sync();

-    ram_control_before_iterate(f, RAM_CONTROL_FINISH);
-
    /* try transferring iterative blocks of memory */

    /* flush all remaining blocks regardless of rate limiting */
@@ -835,8 +665,6 @@ static int ram_save_complete(QEMUFile *f, void *opaque)
        }
        bytes_transferred += bytes_sent;
    }
-
-    ram_control_after_iterate(f, RAM_CONTROL_FINISH);
    migration_end();

    qemu_mutex_unlock_ramlist();
@@ -852,9 +680,7 @@ static uint64_t ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size)
    remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE;

    if (remaining_size < max_size) {
-        qemu_mutex_lock_iothread();
        migration_bitmap_sync();
-        qemu_mutex_unlock_iothread();
        remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE;
    }
    return remaining_size;
@@ -866,8 +692,8 @@ static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
    unsigned int xh_len;
    int xh_flags;

-    if (!xbzrle_decoded_buf) {
-        xbzrle_decoded_buf = g_malloc(TARGET_PAGE_SIZE);
+    if (!XBZRLE.decoded_buf) {
+        XBZRLE.decoded_buf = g_malloc(TARGET_PAGE_SIZE);
    }

    /* extract RLE header */
@@ -884,10 +710,10 @@ static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
        return -1;
    }
    /* load data and decode */
-    qemu_get_buffer(f, xbzrle_decoded_buf, xh_len);
+    qemu_get_buffer(f, XBZRLE.decoded_buf, xh_len);

    /* decode RLE */
-    ret = xbzrle_decode_buffer(xbzrle_decoded_buf, xh_len, host,
+    ret = xbzrle_decode_buffer(XBZRLE.decoded_buf, xh_len, host,
                               TARGET_PAGE_SIZE);
    if (ret == -1) {
        fprintf(stderr, "Failed to load XBZRLE page - decode error!\n");
@@ -910,7 +736,7 @@ static inline void *host_from_stream_offset(QEMUFile *f,
    uint8_t len;

    if (flags & RAM_SAVE_FLAG_CONTINUE) {
-        if (!block) {
+        if (!block || block->length <= offset) {
            fprintf(stderr, "Ack, bad migration stream!\n");
            return NULL;
        }
@@ -923,25 +749,15 @@ static inline void *host_from_stream_offset(QEMUFile *f,
    id[len] = 0;

    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
-        if (!strncmp(id, block->idstr, sizeof(id)))
+        if (!strncmp(id, block->idstr, sizeof(id)) && block->length > offset) {
            return memory_region_get_ram_ptr(block->mr) + offset;
+        }
    }

    fprintf(stderr, "Can't find block %s!\n", id);
    return NULL;
 }

-/*
- * If a page (or a whole RDMA chunk) has been
- * determined to be zero, then zap it.
- */
-void ram_handle_compressed(void *host, uint8_t ch, uint64_t size)
-{
-    if (ch != 0 || !is_zero_range(host, size)) {
-        memset(host, ch, size);
-    }
-}
-
 static int ram_load(QEMUFile *f, void *opaque, int version_id)
 {
    ram_addr_t addr;
@@ -980,10 +796,6 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
                    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
                        if (!strncmp(id, block->idstr, sizeof(id))) {
                            if (block->length != length) {
-                                fprintf(stderr,
-                                        "Length mismatch: %s: " RAM_ADDR_FMT
-                                        " in != " RAM_ADDR_FMT "\n", id, length,
-                                        block->length);
                                ret =  -EINVAL;
                                goto done;
                            }
@@ -1013,7 +825,16 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
            }

            ch = qemu_get_byte(f);
-            ram_handle_compressed(host, ch, TARGET_PAGE_SIZE);
+            if (ch != 0 || !is_zero_page(host)) {
+                memset(host, ch, TARGET_PAGE_SIZE);
+#ifndef _WIN32
+                if (ch == 0 &&
+                    (!kvm_enabled() || kvm_has_sync_mmu()) &&
+                    getpagesize() <= TARGET_PAGE_SIZE) {
+                    qemu_madvise(host, TARGET_PAGE_SIZE, QEMU_MADV_DONTNEED);
+                }
+#endif
+            }
        } else if (flags & RAM_SAVE_FLAG_PAGE) {
            void *host;

@@ -1033,8 +854,6 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
                ret = -EINVAL;
                goto done;
            }
-        } else if (flags & RAM_SAVE_FLAG_HOOK) {
-            ram_control_load_hook(f, flags);
        }
        error = qemu_file_get_error(f);
        if (error) {
@@ -1058,6 +877,7 @@ SaveVMHandlers savevm_ram_handlers = {
    .cancel = ram_migration_cancel,
 };

+#ifdef HAS_AUDIO
 struct soundhw {
    const char *name;
    const char *descr;
@@ -1069,30 +889,96 @@ struct soundhw {
    } init;
 };

-static struct soundhw soundhw[9];
-static int soundhw_count;
+static struct soundhw soundhw[] = {
+#ifdef HAS_AUDIO_CHOICE
+#ifdef CONFIG_PCSPK
+    {
+        "pcspk",
+        "PC speaker",
+        0,
+        1,
+        { .init_isa = pcspk_audio_init }
+    },
+#endif

-void isa_register_soundhw(const char *name, const char *descr,
-                          int (*init_isa)(ISABus *bus))
-{
-    assert(soundhw_count < ARRAY_SIZE(soundhw) - 1);
-    soundhw[soundhw_count].name = name;
-    soundhw[soundhw_count].descr = descr;
-    soundhw[soundhw_count].isa = 1;
-    soundhw[soundhw_count].init.init_isa = init_isa;
-    soundhw_count++;
-}
+#ifdef CONFIG_SB16
+    {
+        "sb16",
+        "Creative Sound Blaster 16",
+        0,
+        1,
+        { .init_isa = SB16_init }
+    },
+#endif

-void pci_register_soundhw(const char *name, const char *descr,
-                          int (*init_pci)(PCIBus *bus))
-{
-    assert(soundhw_count < ARRAY_SIZE(soundhw) - 1);
-    soundhw[soundhw_count].name = name;
-    soundhw[soundhw_count].descr = descr;
-    soundhw[soundhw_count].isa = 0;
-    soundhw[soundhw_count].init.init_pci = init_pci;
-    soundhw_count++;
-}
+#ifdef CONFIG_CS4231A
+    {
+        "cs4231a",
+        "CS4231A",
+        0,
+        1,
+        { .init_isa = cs4231a_init }
+    },
+#endif
+
+#ifdef CONFIG_ADLIB
+    {
+        "adlib",
+#ifdef HAS_YMF262
+        "Yamaha YMF262 (OPL3)",
+#else
+        "Yamaha YM3812 (OPL2)",
+#endif
+        0,
+        1,
+        { .init_isa = Adlib_init }
+    },
+#endif
+
+#ifdef CONFIG_GUS
+    {
+        "gus",
+        "Gravis Ultrasound GF1",
+        0,
+        1,
+        { .init_isa = GUS_init }
+    },
+#endif
+
+#ifdef CONFIG_AC97
+    {
+        "ac97",
+        "Intel 82801AA AC97 Audio",
+        0,
+        0,
+        { .init_pci = ac97_init }
+    },
+#endif
+
+#ifdef CONFIG_ES1370
+    {
+        "es1370",
+        "ENSONIQ AudioPCI ES1370",
+        0,
+        0,
+        { .init_pci = es1370_init }
+    },
+#endif
+
+#ifdef CONFIG_HDA
+    {
+        "hda",
+        "Intel HD Audio",
+        0,
+        0,
+        { .init_pci = intel_hda_and_codec_init }
+    },
+#endif
+
+#endif /* HAS_AUDIO_CHOICE */
+
+    { NULL, NULL, 0, 0, { NULL } }
+};

 void select_soundhw(const char *optarg)
 {
@@ -1101,16 +987,16 @@ void select_soundhw(const char *optarg)
    if (is_help_option(optarg)) {
    show_valid_cards:

-        if (soundhw_count) {
-             printf("Valid sound card names (comma separated):\n");
-             for (c = soundhw; c->name; ++c) {
-                 printf ("%-11s %s\n", c->name, c->descr);
-             }
-             printf("\n-soundhw all will enable all of the above\n");
-        } else {
-             printf("Machine has no user-selectable audio hardware "
-                    "(it may or may not have always-present audio hardware).\n");
+#ifdef HAS_AUDIO_CHOICE
+        printf("Valid sound card names (comma separated):\n");
+        for (c = soundhw; c->name; ++c) {
+            printf ("%-11s %s\n", c->name, c->descr);
        }
+        printf("\n-soundhw all will enable all of the above\n");
+#else
+        printf("Machine has no user-selectable audio hardware "
+               "(it may or may not have always-present audio hardware).\n");
+#endif
        exit(!is_help_option(optarg));
    }
    else {
@@ -1158,30 +1044,32 @@ void select_soundhw(const char *optarg)
    }
 }

-void audio_init(void)
+void audio_init(ISABus *isa_bus, PCIBus *pci_bus)
 {
    struct soundhw *c;
-    ISABus *isa_bus = (ISABus *) object_resolve_path_type("", TYPE_ISA_BUS, NULL);
-    PCIBus *pci_bus = (PCIBus *) object_resolve_path_type("", TYPE_PCI_BUS, NULL);

    for (c = soundhw; c->name; ++c) {
        if (c->enabled) {
            if (c->isa) {
-                if (!isa_bus) {
-                    fprintf(stderr, "ISA bus not available for %s\n", c->name);
-                    exit(1);
+                if (isa_bus) {
+                    c->init.init_isa(isa_bus);
                }
-                c->init.init_isa(isa_bus);
            } else {
-                if (!pci_bus) {
-                    fprintf(stderr, "PCI bus not available for %s\n", c->name);
-                    exit(1);
+                if (pci_bus) {
+                    c->init.init_pci(pci_bus);
                }
-                c->init.init_pci(pci_bus);
            }
        }
    }
 }
+#else
+void select_soundhw(const char *optarg)
+{
+}
+void audio_init(ISABus *isa_bus, PCIBus *pci_bus)
+{
+}
+#endif

 int qemu_uuid_parse(const char *str, uint8_t *uuid)
 {
@@ -1199,28 +1087,29 @@ int qemu_uuid_parse(const char *str, uint8_t *uuid)
    if (ret != 16) {
        return -1;
    }
+#ifdef TARGET_I386
+    smbios_add_field(1, offsetof(struct smbios_type_1, uuid), 16, uuid);
+#endif
    return 0;
 }

-void do_acpitable_option(const QemuOpts *opts)
+void do_acpitable_option(const char *optarg)
 {
 #ifdef TARGET_I386
-    Error *err = NULL;
-
-    acpi_table_add(opts, &err);
-    if (err) {
-        error_report("Wrong acpi table provided: %s",
-                     error_get_pretty(err));
-        error_free(err);
+    if (acpi_table_add(optarg) < 0) {
+        fprintf(stderr, "Wrong acpi table provided\n");
        exit(1);
    }
 #endif
 }

-void do_smbios_option(QemuOpts *opts)
+void do_smbios_option(const char *optarg)
 {
 #ifdef TARGET_I386
-    smbios_entry_add(opts);
+    if (smbios_entry_add(optarg) < 0) {
+        fprintf(stderr, "Wrong smbios provided\n");
+        exit(1);
+    }
 #endif
 }

@@ -1231,6 +1120,15 @@ void cpudef_init(void)
 #endif
 }

+int audio_available(void)
+{
+#ifdef HAS_AUDIO
+    return 1;
+#else
+    return 0;
+#endif
+}
+
 int tcg_available(void)
 {
    return 1;
@@ -1259,56 +1157,7 @@ TargetInfo *qmp_query_target(Error **errp)
 {
    TargetInfo *info = g_malloc0(sizeof(*info));

-    info->arch = g_strdup(TARGET_NAME);
+    info->arch = TARGET_TYPE;

    return info;
 }
-
-/* Stub function that's gets run on the vcpu when its brought out of the
-   VM to run inside qemu via async_run_on_cpu()*/
-static void mig_sleep_cpu(void *opq)
-{
-    qemu_mutex_unlock_iothread();
-    g_usleep(30*1000);
-    qemu_mutex_lock_iothread();
-}
-
-/* To reduce the dirty rate explicitly disallow the VCPUs from spending
-   much time in the VM. The migration thread will try to catchup.
-   Workload will experience a performance drop.
-*/
-static void mig_throttle_guest_down(void)
-{
-    CPUState *cpu;
-
-    qemu_mutex_lock_iothread();
-    CPU_FOREACH(cpu) {
-        async_run_on_cpu(cpu, mig_sleep_cpu, NULL);
-    }
-    qemu_mutex_unlock_iothread();
-}
-
-static void check_guest_throttling(void)
-{
-    static int64_t t0;
-    int64_t        t1;
-
-    if (!mig_throttle_on) {
-        return;
-    }
-
-    if (!t0)  {
-        t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
-        return;
-    }
-
-    t1 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
-
-    /* If it has been more than 40 ms since the last time the guest
-     * was throttled then do it again.
-     */
-    if (40 < (t1-t0)/1000000) {
-        mig_throttle_guest_down();
-        t0 = t1;
-    }
-}
--- a/async.c
+++ b/async.c
@@ -24,7 +24,6 @@

 #include "qemu-common.h"
 #include "block/aio.h"
-#include "block/thread-pool.h"
 #include "qemu/main-loop.h"

 /***********************************************************/
@@ -47,16 +46,11 @@ QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque)
    bh->ctx = ctx;
    bh->cb = cb;
    bh->opaque = opaque;
-    qemu_mutex_lock(&ctx->bh_lock);
    bh->next = ctx->first_bh;
-    /* Make sure that the members are ready before putting bh into list */
-    smp_wmb();
    ctx->first_bh = bh;
-    qemu_mutex_unlock(&ctx->bh_lock);
    return bh;
 }

-/* Multiple occurrences of aio_bh_poll cannot be called concurrently */
 int aio_bh_poll(AioContext *ctx)
 {
    QEMUBH *bh, **bhp, *next;
@@ -66,15 +60,9 @@ int aio_bh_poll(AioContext *ctx)

    ret = 0;
    for (bh = ctx->first_bh; bh; bh = next) {
-        /* Make sure that fetching bh happens before accessing its members */
-        smp_read_barrier_depends();
        next = bh->next;
        if (!bh->deleted && bh->scheduled) {
            bh->scheduled = 0;
-            /* Paired with write barrier in bh schedule to ensure reading for
-             * idle & callbacks coming after bh's scheduling.
-             */
-            smp_rmb();
            if (!bh->idle)
                ret = 1;
            bh->idle = 0;
@@ -86,7 +74,6 @@ int aio_bh_poll(AioContext *ctx)

    /* remove deleted bhs */
    if (!ctx->walking_bh) {
-        qemu_mutex_lock(&ctx->bh_lock);
        bhp = &ctx->first_bh;
        while (*bhp) {
            bh = *bhp;
@@ -97,7 +84,6 @@ int aio_bh_poll(AioContext *ctx)
                bhp = &bh->next;
            }
        }
-        qemu_mutex_unlock(&ctx->bh_lock);
    }

    return ret;
@@ -107,38 +93,24 @@ void qemu_bh_schedule_idle(QEMUBH *bh)
 {
    if (bh->scheduled)
        return;
-    bh->idle = 1;
-    /* Make sure that idle & any writes needed by the callback are done
-     * before the locations are read in the aio_bh_poll.
-     */
-    smp_wmb();
    bh->scheduled = 1;
+    bh->idle = 1;
 }

 void qemu_bh_schedule(QEMUBH *bh)
 {
    if (bh->scheduled)
        return;
-    bh->idle = 0;
-    /* Make sure that idle & any writes needed by the callback are done
-     * before the locations are read in the aio_bh_poll.
-     */
-    smp_wmb();
    bh->scheduled = 1;
+    bh->idle = 0;
    aio_notify(bh->ctx);
 }

-
-/* This func is async.
- */
 void qemu_bh_cancel(QEMUBH *bh)
 {
    bh->scheduled = 0;
 }

-/* This func is async.The bottom half will do the delete action at the finial
- * end.
- */
 void qemu_bh_delete(QEMUBH *bh)
 {
    bh->scheduled = 0;
@@ -150,10 +122,7 @@ aio_ctx_prepare(GSource *source, gint    *timeout)
 {
    AioContext *ctx = (AioContext *) source;
    QEMUBH *bh;
-    int deadline;

-    /* We assume there is no timeout already supplied */
-    *timeout = -1;
    for (bh = ctx->first_bh; bh; bh = bh->next) {
        if (!bh->deleted && bh->scheduled) {
            if (bh->idle) {
@@ -169,14 +138,6 @@ aio_ctx_prepare(GSource *source, gint    *timeout)
        }
    }

-    deadline = qemu_timeout_ns_to_ms(timerlistgroup_deadline_ns(&ctx->tlg));
-    if (deadline == 0) {
-        *timeout = 0;
-        return true;
-    } else {
-        *timeout = qemu_soonest_timeout(*timeout, deadline);
-    }
-
    return false;
 }

@@ -191,7 +152,7 @@ aio_ctx_check(GSource *source)
            return true;
 	}
    }
-    return aio_pending(ctx) || (timerlistgroup_deadline_ns(&ctx->tlg) == 0);
+    return aio_pending(ctx);
 }

 static gboolean
@@ -211,12 +172,8 @@ aio_ctx_finalize(GSource     *source)
 {
    AioContext *ctx = (AioContext *) source;

-    thread_pool_free(ctx->thread_pool);
-    aio_set_event_notifier(ctx, &ctx->notifier, NULL);
+    aio_set_event_notifier(ctx, &ctx->notifier, NULL, NULL);
    event_notifier_cleanup(&ctx->notifier);
-    qemu_mutex_destroy(&ctx->bh_lock);
-    g_array_free(ctx->pollfds, TRUE);
-    timerlistgroup_deinit(&ctx->tlg);
 }

 static GSourceFuncs aio_source_funcs = {
@@ -232,36 +189,19 @@ GSource *aio_get_g_source(AioContext *ctx)
    return &ctx->source;
 }

-ThreadPool *aio_get_thread_pool(AioContext *ctx)
-{
-    if (!ctx->thread_pool) {
-        ctx->thread_pool = thread_pool_new(ctx);
-    }
-    return ctx->thread_pool;
-}
-
 void aio_notify(AioContext *ctx)
 {
    event_notifier_set(&ctx->notifier);
 }

-static void aio_timerlist_notify(void *opaque)
-{
-    aio_notify(opaque);
-}
-
 AioContext *aio_context_new(void)
 {
    AioContext *ctx;
    ctx = (AioContext *) g_source_new(&aio_source_funcs, sizeof(AioContext));
-    ctx->pollfds = g_array_new(FALSE, FALSE, sizeof(GPollFD));
-    ctx->thread_pool = NULL;
-    qemu_mutex_init(&ctx->bh_lock);
    event_notifier_init(&ctx->notifier, false);
    aio_set_event_notifier(ctx, &ctx->notifier, 
                           (EventNotifierHandler *)
-                           event_notifier_test_and_clear);
-    timerlistgroup_init(&ctx->tlg, aio_timerlist_notify, ctx);
+                           event_notifier_test_and_clear, NULL);

    return ctx;
 }
--- a/audio/audio.c
+++ b/audio/audio.c
@@ -95,7 +95,7 @@ static struct {
        }
    },

-    .period = { .hertz = 100 },
+    .period = { .hertz = 250 },
    .plive = 0,
    .log_to_monitor = 0,
    .try_poll_in = 1,
@@ -1124,11 +1124,10 @@ static int audio_is_timer_needed (void)
 static void audio_reset_timer (AudioState *s)
 {
    if (audio_is_timer_needed ()) {
-        timer_mod (s->ts,
-            qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + conf.period.ticks);
+        qemu_mod_timer (s->ts, qemu_get_clock_ns (vm_clock) + 1);
    }
    else {
-        timer_del (s->ts);
+        qemu_del_timer (s->ts);
    }
 }

@@ -1835,7 +1834,7 @@ static void audio_init (void)
    QLIST_INIT (&s->cap_head);
    atexit (audio_atexit);

-    s->ts = timer_new_ns(QEMU_CLOCK_VIRTUAL, audio_timer, s);
+    s->ts = qemu_new_timer_ns (vm_clock, audio_timer, s);
    if (!s->ts) {
        hw_error("Could not create audio timer\n");
    }
@@ -2055,6 +2054,8 @@ void AUD_del_capture (CaptureVoiceOut *cap, void *cb_opaque)
                    sw = sw1;
                }
                QLIST_REMOVE (cap, entries);
+                g_free (cap->hw.mix_buf);
+                g_free (cap->buf);
                g_free (cap);
            }
            return;
--- a/audio/audio_int.h
+++ b/audio/audio_int.h
@@ -243,13 +243,38 @@ static inline int audio_ring_dist (int dst, int src, int len)
    return (dst >= src) ? (dst - src) : (len - src + dst);
 }

-#define dolog(fmt, ...) AUD_log(AUDIO_CAP, fmt, ## __VA_ARGS__)
+static void GCC_ATTR dolog (const char *fmt, ...)
+{
+    va_list ap;
+
+    va_start (ap, fmt);
+    AUD_vlog (AUDIO_CAP, fmt, ap);
+    va_end (ap);
+}

 #ifdef DEBUG
-#define ldebug(fmt, ...) AUD_log(AUDIO_CAP, fmt, ## __VA_ARGS__)
+static void GCC_ATTR ldebug (const char *fmt, ...)
+{
+    va_list ap;
+
+    va_start (ap, fmt);
+    AUD_vlog (AUDIO_CAP, fmt, ap);
+    va_end (ap);
+}
 #else
-#define ldebug(fmt, ...) (void)0
+#if defined NDEBUG && defined __GNUC__
+#define ldebug(...)
+#elif defined NDEBUG && defined _MSC_VER
+#define ldebug __noop
+#else
+static void GCC_ATTR ldebug (const char *fmt, ...)
+{
+    (void) fmt;
+}
 #endif
+#endif
+
+#undef GCC_ATTR

 #define AUDIO_STRINGIFY_(n) #n
 #define AUDIO_STRINGIFY(n) AUDIO_STRINGIFY_(n)
--- a/audio/audio_win_int.c
+++ b/audio/audio_win_int.c
@@ -1,6 +1,7 @@
 /* public domain */

 #include "qemu-common.h"
+#include "audio.h"

 #define AUDIO_CAP "win-int"
 #include <windows.h>
--- a/audio/mixeng.c
+++ b/audio/mixeng.c
@@ -348,6 +348,7 @@ void mixeng_clear (struct st_sample *buf, int len)

 void mixeng_volume (struct st_sample *buf, int len, struct mixeng_volume *vol)
 {
+#ifdef CONFIG_MIXEMU
    if (vol->mute) {
        mixeng_clear (buf, len);
        return;
@@ -363,4 +364,9 @@ void mixeng_volume (struct st_sample *buf, int len, struct mixeng_volume *vol)
 #endif
        buf += 1;
    }
+#else
+    (void) buf;
+    (void) len;
+    (void) vol;
+#endif
 }
--- a/audio/mixeng_template.h
+++ b/audio/mixeng_template.h
@@ -35,7 +35,7 @@
 #define IN_T glue (glue (ITYPE, BSIZE), _t)

 #ifdef FLOAT_MIXENG
-static inline mixeng_real glue (conv_, ET) (IN_T v)
+static mixeng_real inline glue (conv_, ET) (IN_T v)
 {
    IN_T nv = ENDIAN_CONVERT (v);

@@ -54,7 +54,7 @@ static inline mixeng_real glue (conv_, ET) (IN_T v)
 #endif
 }

-static inline IN_T glue (clip_, ET) (mixeng_real v)
+static IN_T inline glue (clip_, ET) (mixeng_real v)
 {
    if (v >= 0.5) {
        return IN_MAX;
--- a/audio/noaudio.c
+++ b/audio/noaudio.c
@@ -46,7 +46,7 @@ static int no_run_out (HWVoiceOut *hw, int live)
    int64_t ticks;
    int64_t bytes;

-    now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+    now = qemu_get_clock_ns (vm_clock);
    ticks = now - no->old_ticks;
    bytes = muldiv64 (ticks, hw->info.bytes_per_second, get_ticks_per_sec ());
    bytes = audio_MIN (bytes, INT_MAX);
@@ -102,7 +102,7 @@ static int no_run_in (HWVoiceIn *hw)
    int samples = 0;

    if (dead) {
-        int64_t now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+        int64_t now = qemu_get_clock_ns (vm_clock);
        int64_t ticks = now - no->old_ticks;
        int64_t bytes =
            muldiv64 (ticks, hw->info.bytes_per_second, get_ticks_per_sec ());
--- a/audio/ossaudio.c
+++ b/audio/ossaudio.c
@@ -25,7 +25,11 @@
 #include <sys/mman.h>
 #include <sys/types.h>
 #include <sys/ioctl.h>
+#ifdef __OpenBSD__
+#include <soundcard.h>
+#else
 #include <sys/soundcard.h>
+#endif
 #include "qemu-common.h"
 #include "qemu/main-loop.h"
 #include "qemu/host-utils.h"
@@ -849,10 +853,6 @@ static int oss_ctl_in (HWVoiceIn *hw, int cmd, ...)

 static void *oss_audio_init (void)
 {
-    if (access(conf.devpath_in, R_OK | W_OK) < 0 ||
-        access(conf.devpath_out, R_OK | W_OK) < 0) {
-        return NULL;
-    }
    return &conf;
 }

--- a/audio/paaudio.c
+++ b/audio/paaudio.c
@@ -547,11 +547,11 @@ static int qpa_init_out (HWVoiceOut *hw, struct audsettings *as)
    ss.rate = as->freq;

    /*
-     * qemu audio tick runs at 100 Hz (by default), so processing
-     * data chunks worth 10 ms of sound should be a good fit.
+     * qemu audio tick runs at 250 Hz (by default), so processing
+     * data chunks worth 4 ms of sound should be a good fit.
     */
-    ba.tlength = pa_usec_to_bytes (10 * 1000, &ss);
-    ba.minreq = pa_usec_to_bytes (5 * 1000, &ss);
+    ba.tlength = pa_usec_to_bytes (4 * 1000, &ss);
+    ba.minreq = pa_usec_to_bytes (2 * 1000, &ss);
    ba.maxlength = -1;
    ba.prebuf = -1;

--- a/audio/spiceaudio.c
+++ b/audio/spiceaudio.c
@@ -25,17 +25,8 @@
 #include "audio.h"
 #include "audio_int.h"

-#if SPICE_INTERFACE_PLAYBACK_MAJOR > 1 || SPICE_INTERFACE_PLAYBACK_MINOR >= 3
-#define LINE_OUT_SAMPLES (480 * 4)
-#else
-#define LINE_OUT_SAMPLES (256 * 4)
-#endif
-
-#if SPICE_INTERFACE_RECORD_MAJOR > 2 || SPICE_INTERFACE_RECORD_MINOR >= 3
-#define LINE_IN_SAMPLES (480 * 4)
-#else
-#define LINE_IN_SAMPLES (256 * 4)
-#endif
+#define LINE_IN_SAMPLES 1024
+#define LINE_OUT_SAMPLES 1024

 typedef struct SpiceRateCtl {
    int64_t               start_ticks;
@@ -90,7 +81,7 @@ static void spice_audio_fini (void *opaque)
 static void rate_start (SpiceRateCtl *rate)
 {
    memset (rate, 0, sizeof (*rate));
-    rate->start_ticks = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+    rate->start_ticks = qemu_get_clock_ns (vm_clock);
 }

 static int rate_get_samples (struct audio_pcm_info *info, SpiceRateCtl *rate)
@@ -100,7 +91,7 @@ static int rate_get_samples (struct audio_pcm_info *info, SpiceRateCtl *rate)
    int64_t bytes;
    int64_t samples;

-    now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+    now = qemu_get_clock_ns (vm_clock);
    ticks = now - rate->start_ticks;
    bytes = muldiv64 (ticks, info->bytes_per_second, get_ticks_per_sec ());
    samples = (bytes - rate->bytes_sent) >> info->shift;
@@ -120,11 +111,7 @@ static int line_out_init (HWVoiceOut *hw, struct audsettings *as)
    SpiceVoiceOut *out = container_of (hw, SpiceVoiceOut, hw);
    struct audsettings settings;

-#if SPICE_INTERFACE_PLAYBACK_MAJOR > 1 || SPICE_INTERFACE_PLAYBACK_MINOR >= 3
-    settings.freq       = spice_server_get_best_playback_rate(NULL);
-#else
    settings.freq       = SPICE_INTERFACE_PLAYBACK_FREQ;
-#endif
    settings.nchannels  = SPICE_INTERFACE_PLAYBACK_CHAN;
    settings.fmt        = AUD_FMT_S16;
    settings.endianness = AUDIO_HOST_ENDIANNESS;
@@ -135,9 +122,6 @@ static int line_out_init (HWVoiceOut *hw, struct audsettings *as)

    out->sin.base.sif = &playback_sif.base;
    qemu_spice_add_interface (&out->sin.base);
-#if SPICE_INTERFACE_PLAYBACK_MAJOR > 1 || SPICE_INTERFACE_PLAYBACK_MINOR >= 3
-    spice_server_set_playback_rate(&out->sin, settings.freq);
-#endif
    return 0;
 }

@@ -248,11 +232,7 @@ static int line_in_init (HWVoiceIn *hw, struct audsettings *as)
    SpiceVoiceIn *in = container_of (hw, SpiceVoiceIn, hw);
    struct audsettings settings;

-#if SPICE_INTERFACE_RECORD_MAJOR > 2 || SPICE_INTERFACE_RECORD_MINOR >= 3
-    settings.freq       = spice_server_get_best_record_rate(NULL);
-#else
    settings.freq       = SPICE_INTERFACE_RECORD_FREQ;
-#endif
    settings.nchannels  = SPICE_INTERFACE_RECORD_CHAN;
    settings.fmt        = AUD_FMT_S16;
    settings.endianness = AUDIO_HOST_ENDIANNESS;
@@ -263,9 +243,6 @@ static int line_in_init (HWVoiceIn *hw, struct audsettings *as)

    in->sin.base.sif = &record_sif.base;
    qemu_spice_add_interface (&in->sin.base);
-#if SPICE_INTERFACE_RECORD_MAJOR > 2 || SPICE_INTERFACE_RECORD_MINOR >= 3
-    spice_server_set_record_rate(&in->sin, settings.freq);
-#endif
    return 0;
 }

--- a/audio/wavaudio.c
+++ b/audio/wavaudio.c
@@ -52,7 +52,7 @@ static int wav_run_out (HWVoiceOut *hw, int live)
    int rpos, decr, samples;
    uint8_t *dst;
    struct st_sample *src;
-    int64_t now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+    int64_t now = qemu_get_clock_ns (vm_clock);
    int64_t ticks = now - wav->old_ticks;
    int64_t bytes =
        muldiv64 (ticks, hw->info.bytes_per_second, get_ticks_per_sec ());
--- a/backends/Makefile.objs
+++ b/backends/Makefile.objs
@@ -1,8 +1,2 @@
 common-obj-y += rng.o rng-egd.o
 common-obj-$(CONFIG_POSIX) += rng-random.o
-
-common-obj-y += msmouse.o
-common-obj-$(CONFIG_BRLAPI) += baum.o
-$(obj)/baum.o: QEMU_CFLAGS += $(SDL_CFLAGS) 
-
-common-obj-$(CONFIG_TPM) += tpm.o
--- a/backends/rng-egd.c
+++ b/backends/rng-egd.c
@@ -10,8 +10,8 @@
 * See the COPYING file in the top-level directory.
 */

-#include "sysemu/rng.h"
-#include "sysemu/char.h"
+#include "qemu/rng.h"
+#include "char/char.h"
 #include "qapi/qmp/qerror.h"
 #include "hw/qdev.h" /* just for DEFINE_PROP_CHR */

@@ -24,33 +24,12 @@ typedef struct RngEgd

    CharDriverState *chr;
    char *chr_name;
-
-    GSList *requests;
 } RngEgd;

-typedef struct RngRequest
-{
-    EntropyReceiveFunc *receive_entropy;
-    uint8_t *data;
-    void *opaque;
-    size_t offset;
-    size_t size;
-} RngRequest;
-
-static void rng_egd_request_entropy(RngBackend *b, size_t size,
-                                    EntropyReceiveFunc *receive_entropy,
-                                    void *opaque)
+static void rng_egd_request_entropy(RngBackend *b, RngRequest *req)
 {
    RngEgd *s = RNG_EGD(b);
-    RngRequest *req;
-
-    req = g_malloc(sizeof(*req));
-
-    req->offset = 0;
-    req->size = size;
-    req->receive_entropy = receive_entropy;
-    req->opaque = opaque;
-    req->data = g_malloc(req->size);
+    size_t size = req->size;

    while (size > 0) {
        uint8_t header[2];
@@ -64,23 +43,15 @@ static void rng_egd_request_entropy(RngBackend *b, size_t size,

        size -= len;
    }
-
-    s->requests = g_slist_append(s->requests, req);
 }

-static void rng_egd_free_request(RngRequest *req)
-{
-    g_free(req->data);
-    g_free(req);
-}
-
-static int rng_egd_chr_can_read(void *opaque)
+static size_t rng_egd_chr_can_read(void *opaque)
 {
    RngEgd *s = RNG_EGD(opaque);
    GSList *i;
-    int size = 0;
+    size_t size = 0;

-    for (i = s->requests; i; i = i->next) {
+    for (i = s->parent.requests; i; i = i->next) {
        RngRequest *req = i->data;
        size += req->size - req->offset;
    }
@@ -88,53 +59,26 @@ static int rng_egd_chr_can_read(void *opaque)
    return size;
 }

-static void rng_egd_chr_read(void *opaque, const uint8_t *buf, int size)
+static void rng_egd_chr_read(void *opaque, const uint8_t *buf, size_t size)
 {
    RngEgd *s = RNG_EGD(opaque);
-    size_t buf_offset = 0;

-    while (size > 0 && s->requests) {
-        RngRequest *req = s->requests->data;
+    while (size > 0 && s->parent.requests) {
+        RngRequest *req = s->parent.requests->data;
        int len = MIN(size, req->size - req->offset);

-        memcpy(req->data + req->offset, buf + buf_offset, len);
-        buf_offset += len;
+        memcpy(req->data + req->offset, buf, len);
        req->offset += len;
        size -= len;

        if (req->offset == req->size) {
-            s->requests = g_slist_remove_link(s->requests, s->requests);
-
            req->receive_entropy(req->opaque, req->data, req->size);

-            rng_egd_free_request(req);
+            rng_backend_finalize_request(&s->parent, req);
        }
    }
 }

-static void rng_egd_free_requests(RngEgd *s)
-{
-    GSList *i;
-
-    for (i = s->requests; i; i = i->next) {
-        rng_egd_free_request(i->data);
-    }
-
-    g_slist_free(s->requests);
-    s->requests = NULL;
-}
-
-static void rng_egd_cancel_requests(RngBackend *b)
-{
-    RngEgd *s = RNG_EGD(b);
-
-    /* We simply delete the list of pending requests.  If there is data in the 
-     * queue waiting to be read, this is okay, because there will always be
-     * more data than we requested originally
-     */
-    rng_egd_free_requests(s);
-}
-
 static void rng_egd_opened(RngBackend *b, Error **errp)
 {
    RngEgd *s = RNG_EGD(b);
@@ -151,11 +95,6 @@ static void rng_egd_opened(RngBackend *b, Error **errp)
        return;
    }

-    if (qemu_chr_fe_claim(s->chr) != 0) {
-        error_set(errp, QERR_DEVICE_IN_USE, s->chr_name);
-        return;
-    }
-
    /* FIXME we should resubmit pending requests when the CDS reconnects. */
    qemu_chr_add_handlers(s->chr, rng_egd_chr_can_read, rng_egd_chr_read,
                          NULL, s);
@@ -169,6 +108,7 @@ static void rng_egd_set_chardev(Object *obj, const char *value, Error **errp)
    if (b->opened) {
        error_set(errp, QERR_PERMISSION_DENIED);
    } else {
+        g_free(s->chr_name);
        s->chr_name = g_strdup(value);
    }
 }
@@ -197,12 +137,9 @@ static void rng_egd_finalize(Object *obj)

    if (s->chr) {
        qemu_chr_add_handlers(s->chr, NULL, NULL, NULL, NULL);
-        qemu_chr_fe_release(s->chr);
    }

    g_free(s->chr_name);
-
-    rng_egd_free_requests(s);
 }

 static void rng_egd_class_init(ObjectClass *klass, void *data)
@@ -210,7 +147,6 @@ static void rng_egd_class_init(ObjectClass *klass, void *data)
    RngBackendClass *rbc = RNG_BACKEND_CLASS(klass);

    rbc->request_entropy = rng_egd_request_entropy;
-    rbc->cancel_requests = rng_egd_cancel_requests;
    rbc->opened = rng_egd_opened;
 }

--- a/backends/rng-random.c
+++ b/backends/rng-random.c
@@ -10,8 +10,8 @@
 * See the COPYING file in the top-level directory.
 */

-#include "sysemu/rng-random.h"
-#include "sysemu/rng.h"
+#include "qemu/rng-random.h"
+#include "qemu/rng.h"
 #include "qapi/qmp/qerror.h"
 #include "qemu/main-loop.h"

@@ -21,10 +21,6 @@ struct RndRandom

    int fd;
    char *filename;
-
-    EntropyReceiveFunc *receive_func;
-    void *opaque;
-    size_t size;
 };

 /**
@@ -37,36 +33,35 @@ struct RndRandom
 static void entropy_available(void *opaque)
 {
    RndRandom *s = RNG_RANDOM(opaque);
-    uint8_t buffer[s->size];
-    ssize_t len;

-    len = read(s->fd, buffer, s->size);
-    if (len < 0 && errno == EAGAIN) {
-        return;
+    while (s->parent.requests != NULL) {
+        RngRequest *req = s->parent.requests->data;
+        ssize_t len;
+
+        len = read(s->fd, req->data, req->size);
+        if (len < 0 && errno == EAGAIN) {
+            return;
+        }
+        g_assert(len != -1);
+
+        req->receive_entropy(req->opaque, req->data, len);
+
+        rng_backend_finalize_request(&s->parent, req);
    }
-    g_assert(len != -1);
-
-    s->receive_func(s->opaque, buffer, len);
-    s->receive_func = NULL;

+    /* We've drained all requests, the fd handler can be reset. */
    qemu_set_fd_handler(s->fd, NULL, NULL, NULL);
 }

-static void rng_random_request_entropy(RngBackend *b, size_t size,
-                                        EntropyReceiveFunc *receive_entropy,
-                                        void *opaque)
+static void rng_random_request_entropy(RngBackend *b, RngRequest *req)
 {
    RndRandom *s = RNG_RANDOM(b);

-    if (s->receive_func) {
-        s->receive_func(s->opaque, NULL, 0);
+    if (s->parent.requests == NULL) {
+        /* If there are no pending requests yet, we need to
+         * install our fd handler. */
+        qemu_set_fd_handler(s->fd, entropy_available, NULL, s);
    }
-
-    s->receive_func = receive_entropy;
-    s->opaque = opaque;
-    s->size = size;
-
-    qemu_set_fd_handler(s->fd, entropy_available, NULL, s);
 }

 static void rng_random_opened(RngBackend *b, Error **errp)
@@ -78,8 +73,9 @@ static void rng_random_opened(RngBackend *b, Error **errp)
                  "filename", "a valid filename");
    } else {
        s->fd = qemu_open(s->filename, O_RDONLY | O_NONBLOCK);
+
        if (s->fd == -1) {
-            error_setg_file_open(errp, errno, s->filename);
+            error_set(errp, QERR_OPEN_FILE_FAILED, s->filename);
        }
    }
 }
@@ -123,15 +119,15 @@ static void rng_random_init(Object *obj)
                            NULL);

    s->filename = g_strdup("/dev/random");
-    s->fd = -1;
 }

 static void rng_random_finalize(Object *obj)
 {
    RndRandom *s = RNG_RANDOM(obj);

+    qemu_set_fd_handler(s->fd, NULL, NULL, NULL);
+
    if (s->fd != -1) {
-        qemu_set_fd_handler(s->fd, NULL, NULL, NULL);
        qemu_close(s->fd);
    }

--- a/backends/rng.c
+++ b/backends/rng.c
@@ -10,27 +10,28 @@
 * See the COPYING file in the top-level directory.
 */

-#include "sysemu/rng.h"
+#include "qemu/rng.h"
 #include "qapi/qmp/qerror.h"
-#include "qom/object_interfaces.h"

 void rng_backend_request_entropy(RngBackend *s, size_t size,
                                 EntropyReceiveFunc *receive_entropy,
                                 void *opaque)
 {
    RngBackendClass *k = RNG_BACKEND_GET_CLASS(s);
+    RngRequest *req;

    if (k->request_entropy) {
-        k->request_entropy(s, size, receive_entropy, opaque);
-    }
-}
+        req = g_malloc(sizeof(*req));

-void rng_backend_cancel_requests(RngBackend *s)
-{
-    RngBackendClass *k = RNG_BACKEND_GET_CLASS(s);
+        req->offset = 0;
+        req->size = size;
+        req->receive_entropy = receive_entropy;
+        req->opaque = opaque;
+        req->data = g_malloc(req->size);

-    if (k->cancel_requests) {
-        k->cancel_requests(s);
+        k->request_entropy(s, req);
+
+        s->requests = g_slist_append(s->requests, req);
    }
 }

@@ -41,9 +42,9 @@ static bool rng_backend_prop_get_opened(Object *obj, Error **errp)
    return s->opened;
 }

-static void rng_backend_complete(UserCreatable *uc, Error **errp)
+void rng_backend_open(RngBackend *s, Error **errp)
 {
-    object_property_set_bool(OBJECT(uc), true, "opened", errp);
+    object_property_set_bool(OBJECT(s), true, "opened", errp);
 }

 static void rng_backend_prop_set_opened(Object *obj, bool value, Error **errp)
@@ -69,6 +70,30 @@ static void rng_backend_prop_set_opened(Object *obj, bool value, Error **errp)
    }
 }

+static void rng_backend_free_request(RngRequest *req)
+{
+    g_free(req->data);
+    g_free(req);
+}
+
+static void rng_backend_free_requests(RngBackend *s)
+{
+    GSList *i;
+
+    for (i = s->requests; i; i = i->next) {
+        rng_backend_free_request(i->data);
+    }
+
+    g_slist_free(s->requests);
+    s->requests = NULL;
+}
+
+void rng_backend_finalize_request(RngBackend *s, RngRequest *req)
+{
+    s->requests = g_slist_remove(s->requests, req);
+    rng_backend_free_request(req);
+}
+
 static void rng_backend_init(Object *obj)
 {
    object_property_add_bool(obj, "opened",
@@ -77,11 +102,11 @@ static void rng_backend_init(Object *obj)
                             NULL);
 }

-static void rng_backend_class_init(ObjectClass *oc, void *data)
+static void rng_backend_finalize(Object *obj)
 {
-    UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc);
+    RngBackend *s = RNG_BACKEND(obj);

-    ucc->complete = rng_backend_complete;
+    rng_backend_free_requests(s);
 }

 static const TypeInfo rng_backend_info = {
@@ -89,13 +114,9 @@ static const TypeInfo rng_backend_info = {
    .parent = TYPE_OBJECT,
    .instance_size = sizeof(RngBackend),
    .instance_init = rng_backend_init,
+    .instance_finalize = rng_backend_finalize,
    .class_size = sizeof(RngBackendClass),
-    .class_init = rng_backend_class_init,
    .abstract = true,
-    .interfaces = (InterfaceInfo[]) {
-        { TYPE_USER_CREATABLE },
-        { }
-    }
 };

 static void register_types(void)
--- a/backends/tpm.c
+++ b/backends/tpm.c
@@ -1,190 +0,0 @@
-/*
- * QEMU TPM Backend
- *
- * Copyright IBM, Corp. 2013
- *
- * Authors:
- *  Stefan Berger   <stefanb@us.ibm.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- *
- * Based on backends/rng.c by Anthony Liguori
- */
-
-#include "sysemu/tpm_backend.h"
-#include "qapi/qmp/qerror.h"
-#include "sysemu/tpm.h"
-#include "qemu/thread.h"
-#include "sysemu/tpm_backend_int.h"
-
-enum TpmType tpm_backend_get_type(TPMBackend *s)
-{
-    TPMBackendClass *k = TPM_BACKEND_GET_CLASS(s);
-
-    return k->ops->type;
-}
-
-const char *tpm_backend_get_desc(TPMBackend *s)
-{
-    TPMBackendClass *k = TPM_BACKEND_GET_CLASS(s);
-
-    return k->ops->desc();
-}
-
-void tpm_backend_destroy(TPMBackend *s)
-{
-    TPMBackendClass *k = TPM_BACKEND_GET_CLASS(s);
-
-    return k->ops->destroy(s);
-}
-
-int tpm_backend_init(TPMBackend *s, TPMState *state,
-                     TPMRecvDataCB *datacb)
-{
-    TPMBackendClass *k = TPM_BACKEND_GET_CLASS(s);
-
-    return k->ops->init(s, state, datacb);
-}
-
-int tpm_backend_startup_tpm(TPMBackend *s)
-{
-    TPMBackendClass *k = TPM_BACKEND_GET_CLASS(s);
-
-    return k->ops->startup_tpm(s);
-}
-
-bool tpm_backend_had_startup_error(TPMBackend *s)
-{
-    TPMBackendClass *k = TPM_BACKEND_GET_CLASS(s);
-
-    return k->ops->had_startup_error(s);
-}
-
-size_t tpm_backend_realloc_buffer(TPMBackend *s, TPMSizedBuffer *sb)
-{
-    TPMBackendClass *k = TPM_BACKEND_GET_CLASS(s);
-
-    return k->ops->realloc_buffer(sb);
-}
-
-void tpm_backend_deliver_request(TPMBackend *s)
-{
-    TPMBackendClass *k = TPM_BACKEND_GET_CLASS(s);
-
-    k->ops->deliver_request(s);
-}
-
-void tpm_backend_reset(TPMBackend *s)
-{
-    TPMBackendClass *k = TPM_BACKEND_GET_CLASS(s);
-
-    k->ops->reset(s);
-}
-
-void tpm_backend_cancel_cmd(TPMBackend *s)
-{
-    TPMBackendClass *k = TPM_BACKEND_GET_CLASS(s);
-
-    k->ops->cancel_cmd(s);
-}
-
-bool tpm_backend_get_tpm_established_flag(TPMBackend *s)
-{
-    TPMBackendClass *k = TPM_BACKEND_GET_CLASS(s);
-
-    return k->ops->get_tpm_established_flag(s);
-}
-
-static bool tpm_backend_prop_get_opened(Object *obj, Error **errp)
-{
-    TPMBackend *s = TPM_BACKEND(obj);
-
-    return s->opened;
-}
-
-void tpm_backend_open(TPMBackend *s, Error **errp)
-{
-    object_property_set_bool(OBJECT(s), true, "opened", errp);
-}
-
-static void tpm_backend_prop_set_opened(Object *obj, bool value, Error **errp)
-{
-    TPMBackend *s = TPM_BACKEND(obj);
-    TPMBackendClass *k = TPM_BACKEND_GET_CLASS(s);
-
-    if (value == s->opened) {
-        return;
-    }
-
-    if (!value && s->opened) {
-        error_set(errp, QERR_PERMISSION_DENIED);
-        return;
-    }
-
-    if (k->opened) {
-        k->opened(s, errp);
-    }
-
-    if (!error_is_set(errp)) {
-        s->opened = value;
-    }
-}
-
-static void tpm_backend_instance_init(Object *obj)
-{
-    object_property_add_bool(obj, "opened",
-                             tpm_backend_prop_get_opened,
-                             tpm_backend_prop_set_opened,
-                             NULL);
-}
-
-void tpm_backend_thread_deliver_request(TPMBackendThread *tbt)
-{
-   g_thread_pool_push(tbt->pool, (gpointer)TPM_BACKEND_CMD_PROCESS_CMD, NULL);
-}
-
-void tpm_backend_thread_create(TPMBackendThread *tbt,
-                               GFunc func, gpointer user_data)
-{
-    if (!tbt->pool) {
-        tbt->pool = g_thread_pool_new(func, user_data, 1, TRUE, NULL);
-        g_thread_pool_push(tbt->pool, (gpointer)TPM_BACKEND_CMD_INIT, NULL);
-    }
-}
-
-void tpm_backend_thread_end(TPMBackendThread *tbt)
-{
-    if (tbt->pool) {
-        g_thread_pool_push(tbt->pool, (gpointer)TPM_BACKEND_CMD_END, NULL);
-        g_thread_pool_free(tbt->pool, FALSE, TRUE);
-        tbt->pool = NULL;
-    }
-}
-
-void tpm_backend_thread_tpm_reset(TPMBackendThread *tbt,
-                                  GFunc func, gpointer user_data)
-{
-    if (!tbt->pool) {
-        tpm_backend_thread_create(tbt, func, user_data);
-    } else {
-        g_thread_pool_push(tbt->pool, (gpointer)TPM_BACKEND_CMD_TPM_RESET,
-                           NULL);
-    }
-}
-
-static const TypeInfo tpm_backend_info = {
-    .name = TYPE_TPM_BACKEND,
-    .parent = TYPE_OBJECT,
-    .instance_size = sizeof(TPMBackend),
-    .instance_init = tpm_backend_instance_init,
-    .class_size = sizeof(TPMBackendClass),
-    .abstract = true,
-};
-
-static void register_types(void)
-{
-    type_register_static(&tpm_backend_info);
-}
-
-type_init(register_types);
--- a/block-migration.c
+++ b/block-migration.c
@@ -29,7 +29,6 @@
 #define BLK_MIG_FLAG_DEVICE_BLOCK       0x01
 #define BLK_MIG_FLAG_EOS                0x02
 #define BLK_MIG_FLAG_PROGRESS           0x04
-#define BLK_MIG_FLAG_ZERO_BLOCK         0x08

 #define MAX_IS_ALLOCATED_SEARCH 65536

@@ -44,25 +43,19 @@
 #endif

 typedef struct BlkMigDevState {
-    /* Written during setup phase.  Can be read without a lock.  */
    BlockDriverState *bs;
-    int shared_base;
-    int64_t total_sectors;
-    QSIMPLEQ_ENTRY(BlkMigDevState) entry;
-
-    /* Only used by migration thread.  Does not need a lock.  */
    int bulk_completed;
+    int shared_base;
    int64_t cur_sector;
    int64_t cur_dirty;
-
-    /* Protected by block migration lock.  */
-    unsigned long *aio_bitmap;
    int64_t completed_sectors;
-    BdrvDirtyBitmap *dirty_bitmap;
+    int64_t total_sectors;
+    int64_t dirty;
+    QSIMPLEQ_ENTRY(BlkMigDevState) entry;
+    unsigned long *aio_bitmap;
 } BlkMigDevState;

 typedef struct BlkMigBlock {
-    /* Only used by migration thread.  */
    uint8_t *buf;
    BlkMigDevState *bmds;
    int64_t sector;
@@ -70,77 +63,39 @@ typedef struct BlkMigBlock {
    struct iovec iov;
    QEMUIOVector qiov;
    BlockDriverAIOCB *aiocb;
-
-    /* Protected by block migration lock.  */
    int ret;
    QSIMPLEQ_ENTRY(BlkMigBlock) entry;
 } BlkMigBlock;

 typedef struct BlkMigState {
-    /* Written during setup phase.  Can be read without a lock.  */
    int blk_enable;
    int shared_base;
    QSIMPLEQ_HEAD(bmds_list, BlkMigDevState) bmds_list;
-    int64_t total_sector_sum;
-    bool zero_blocks;
-
-    /* Protected by lock.  */
    QSIMPLEQ_HEAD(blk_list, BlkMigBlock) blk_list;
    int submitted;
    int read_done;
-
-    /* Only used by migration thread.  Does not need a lock.  */
    int transferred;
+    int64_t total_sector_sum;
    int prev_progress;
    int bulk_completed;
-
-    /* Lock must be taken _inside_ the iothread lock.  */
-    QemuMutex lock;
+    long double prev_time_offset;
 } BlkMigState;

 static BlkMigState block_mig_state;

-static void blk_mig_lock(void)
-{
-    qemu_mutex_lock(&block_mig_state.lock);
-}
-
-static void blk_mig_unlock(void)
-{
-    qemu_mutex_unlock(&block_mig_state.lock);
-}
-
-/* Must run outside of the iothread lock during the bulk phase,
- * or the VM will stall.
- */
-
 static void blk_send(QEMUFile *f, BlkMigBlock * blk)
 {
    int len;
-    uint64_t flags = BLK_MIG_FLAG_DEVICE_BLOCK;
-
-    if (block_mig_state.zero_blocks &&
-        buffer_is_zero(blk->buf, BLOCK_SIZE)) {
-        flags |= BLK_MIG_FLAG_ZERO_BLOCK;
-    }

    /* sector number and flags */
    qemu_put_be64(f, (blk->sector << BDRV_SECTOR_BITS)
-                     | flags);
+                     | BLK_MIG_FLAG_DEVICE_BLOCK);

    /* device name */
    len = strlen(blk->bmds->bs->device_name);
    qemu_put_byte(f, len);
    qemu_put_buffer(f, (uint8_t *)blk->bmds->bs->device_name, len);

-    /* if a block is zero we need to flush here since the network
-     * bandwidth is now a lot higher than the storage device bandwidth.
-     * thus if we queue zero blocks we slow down the migration */
-    if (flags & BLK_MIG_FLAG_ZERO_BLOCK) {
-        qemu_fflush(f);
-        return;
-    }
-
    qemu_put_buffer(f, blk->buf, BLOCK_SIZE);
 }

@@ -154,11 +109,9 @@ uint64_t blk_mig_bytes_transferred(void)
    BlkMigDevState *bmds;
    uint64_t sum = 0;

-    blk_mig_lock();
    QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
        sum += bmds->completed_sectors;
    }
-    blk_mig_unlock();
    return sum << BDRV_SECTOR_BITS;
 }

@@ -178,9 +131,6 @@ uint64_t blk_mig_bytes_total(void)
    return sum << BDRV_SECTOR_BITS;
 }

-
-/* Called with migration lock held.  */
-
 static int bmds_aio_inflight(BlkMigDevState *bmds, int64_t sector)
 {
    int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
@@ -193,8 +143,6 @@ static int bmds_aio_inflight(BlkMigDevState *bmds, int64_t sector)
    }
 }

-/* Called with migration lock held.  */
-
 static void bmds_set_aio_inflight(BlkMigDevState *bmds, int64_t sector_num,
                             int nb_sectors, int set)
 {
@@ -229,26 +177,23 @@ static void alloc_aio_bitmap(BlkMigDevState *bmds)
    bmds->aio_bitmap = g_malloc0(bitmap_size);
 }

-/* Never hold migration lock when yielding to the main loop!  */
-
 static void blk_mig_read_cb(void *opaque, int ret)
 {
+    long double curr_time = qemu_get_clock_ns(rt_clock);
    BlkMigBlock *blk = opaque;

-    blk_mig_lock();
    blk->ret = ret;

+    block_mig_state.prev_time_offset = curr_time;
+
    QSIMPLEQ_INSERT_TAIL(&block_mig_state.blk_list, blk, entry);
    bmds_set_aio_inflight(blk->bmds, blk->sector, blk->nr_sectors, 0);

    block_mig_state.submitted--;
    block_mig_state.read_done++;
    assert(block_mig_state.submitted >= 0);
-    blk_mig_unlock();
 }

-/* Called with no lock taken.  */
-
 static int mig_save_device_bulk(QEMUFile *f, BlkMigDevState *bmds)
 {
    int64_t total_sectors = bmds->total_sectors;
@@ -258,13 +203,11 @@ static int mig_save_device_bulk(QEMUFile *f, BlkMigDevState *bmds)
    int nr_sectors;

    if (bmds->shared_base) {
-        qemu_mutex_lock_iothread();
        while (cur_sector < total_sectors &&
               !bdrv_is_allocated(bs, cur_sector, MAX_IS_ALLOCATED_SEARCH,
                                  &nr_sectors)) {
            cur_sector += nr_sectors;
        }
-        qemu_mutex_unlock_iothread();
    }

    if (cur_sector >= total_sectors) {
@@ -293,38 +236,26 @@ static int mig_save_device_bulk(QEMUFile *f, BlkMigDevState *bmds)
    blk->iov.iov_len = nr_sectors * BDRV_SECTOR_SIZE;
    qemu_iovec_init_external(&blk->qiov, &blk->iov, 1);

-    blk_mig_lock();
-    block_mig_state.submitted++;
-    blk_mig_unlock();
+    if (block_mig_state.submitted == 0) {
+        block_mig_state.prev_time_offset = qemu_get_clock_ns(rt_clock);
+    }

-    qemu_mutex_lock_iothread();
    blk->aiocb = bdrv_aio_readv(bs, cur_sector, &blk->qiov,
                                nr_sectors, blk_mig_read_cb, blk);
+    block_mig_state.submitted++;

    bdrv_reset_dirty(bs, cur_sector, nr_sectors);
-    qemu_mutex_unlock_iothread();
-
    bmds->cur_sector = cur_sector + nr_sectors;
+
    return (bmds->cur_sector >= total_sectors);
 }

-/* Called with iothread lock taken.  */
-
-static void set_dirty_tracking(void)
+static void set_dirty_tracking(int enable)
 {
    BlkMigDevState *bmds;

    QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
-        bmds->dirty_bitmap = bdrv_create_dirty_bitmap(bmds->bs, BLOCK_SIZE);
-    }
-}
-
-static void unset_dirty_tracking(void)
-{
-    BlkMigDevState *bmds;
-
-    QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
-        bdrv_release_dirty_bitmap(bmds->bs, bmds->dirty_bitmap);
+        bdrv_set_dirty_tracking(bmds->bs, enable ? BLOCK_SIZE : 0);
    }
 }

@@ -346,8 +277,8 @@ static void init_blk_migration_it(void *opaque, BlockDriverState *bs)
        bmds->completed_sectors = 0;
        bmds->shared_base = block_mig_state.shared_base;
        alloc_aio_bitmap(bmds);
+        drive_get_ref(drive_get_by_blockdev(bs));
        bdrv_set_in_use(bs, 1);
-        bdrv_ref(bs);

        block_mig_state.total_sector_sum += sectors;

@@ -370,13 +301,10 @@ static void init_blk_migration(QEMUFile *f)
    block_mig_state.total_sector_sum = 0;
    block_mig_state.prev_progress = -1;
    block_mig_state.bulk_completed = 0;
-    block_mig_state.zero_blocks = migrate_zero_blocks();

    bdrv_iterate(init_blk_migration_it, NULL);
 }

-/* Called with no lock taken.  */
-
 static int blk_mig_save_bulked_block(QEMUFile *f)
 {
    int64_t completed_sector_sum = 0;
@@ -423,8 +351,6 @@ static void blk_mig_reset_dirty_cursor(void)
    }
 }

-/* Called with iothread lock taken.  */
-
 static int mig_save_device_dirty(QEMUFile *f, BlkMigDevState *bmds,
                                 int is_async)
 {
@@ -435,14 +361,10 @@ static int mig_save_device_dirty(QEMUFile *f, BlkMigDevState *bmds,
    int ret = -EIO;

    for (sector = bmds->cur_dirty; sector < bmds->total_sectors;) {
-        blk_mig_lock();
        if (bmds_aio_inflight(bmds, sector)) {
-            blk_mig_unlock();
            bdrv_drain_all();
-        } else {
-            blk_mig_unlock();
        }
-        if (bdrv_get_dirty(bmds->bs, bmds->dirty_bitmap, sector)) {
+        if (bdrv_get_dirty(bmds->bs, sector)) {

            if (total_sectors - sector < BDRV_SECTORS_PER_DIRTY_CHUNK) {
                nr_sectors = total_sectors - sector;
@@ -460,13 +382,14 @@ static int mig_save_device_dirty(QEMUFile *f, BlkMigDevState *bmds,
                blk->iov.iov_len = nr_sectors * BDRV_SECTOR_SIZE;
                qemu_iovec_init_external(&blk->qiov, &blk->iov, 1);

+                if (block_mig_state.submitted == 0) {
+                    block_mig_state.prev_time_offset = qemu_get_clock_ns(rt_clock);
+                }
+
                blk->aiocb = bdrv_aio_readv(bmds->bs, sector, &blk->qiov,
                                            nr_sectors, blk_mig_read_cb, blk);
-
-                blk_mig_lock();
                block_mig_state.submitted++;
                bmds_set_aio_inflight(bmds, sector, nr_sectors, 1);
-                blk_mig_unlock();
            } else {
                ret = bdrv_read(bmds->bs, sector, blk->buf, nr_sectors);
                if (ret < 0) {
@@ -494,9 +417,7 @@ error:
    return ret;
 }

-/* Called with iothread lock taken.
- *
- * return value:
+/* return value:
 * 0: too much data for max_downtime
 * 1: few enough data for max_downtime
 */
@@ -515,8 +436,6 @@ static int blk_mig_save_dirty_block(QEMUFile *f, int is_async)
    return ret;
 }

-/* Called with no locks taken.  */
-
 static int flush_blks(QEMUFile *f)
 {
    BlkMigBlock *blk;
@@ -526,7 +445,6 @@ static int flush_blks(QEMUFile *f)
            __FUNCTION__, block_mig_state.submitted, block_mig_state.read_done,
            block_mig_state.transferred);

-    blk_mig_lock();
    while ((blk = QSIMPLEQ_FIRST(&block_mig_state.blk_list)) != NULL) {
        if (qemu_file_rate_limit(f)) {
            break;
@@ -535,12 +453,9 @@ static int flush_blks(QEMUFile *f)
            ret = blk->ret;
            break;
        }
+        blk_send(f, blk);

        QSIMPLEQ_REMOVE_HEAD(&block_mig_state.blk_list, entry);
-        blk_mig_unlock();
-        blk_send(f, blk);
-        blk_mig_lock();
-
        g_free(blk->buf);
        g_free(blk);

@@ -548,7 +463,6 @@ static int flush_blks(QEMUFile *f)
        block_mig_state.transferred++;
        assert(block_mig_state.read_done >= 0);
    }
-    blk_mig_unlock();

    DPRINTF("%s Exit submitted %d read_done %d transferred %d\n", __FUNCTION__,
            block_mig_state.submitted, block_mig_state.read_done,
@@ -556,22 +470,18 @@ static int flush_blks(QEMUFile *f)
    return ret;
 }

-/* Called with iothread lock taken.  */
-
 static int64_t get_remaining_dirty(void)
 {
    BlkMigDevState *bmds;
    int64_t dirty = 0;

    QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
-        dirty += bdrv_get_dirty_count(bmds->bs, bmds->dirty_bitmap);
+        dirty += bdrv_get_dirty_count(bmds->bs);
    }

    return dirty << BDRV_SECTOR_BITS;
 }

-/* Called with iothread lock taken.  */
-
 static void blk_mig_cleanup(void)
 {
    BlkMigDevState *bmds;
@@ -579,13 +489,12 @@ static void blk_mig_cleanup(void)

    bdrv_drain_all();

-    unset_dirty_tracking();
+    set_dirty_tracking(0);

-    blk_mig_lock();
    while ((bmds = QSIMPLEQ_FIRST(&block_mig_state.bmds_list)) != NULL) {
        QSIMPLEQ_REMOVE_HEAD(&block_mig_state.bmds_list, entry);
        bdrv_set_in_use(bmds->bs, 0);
-        bdrv_unref(bmds->bs);
+        drive_put_ref(drive_get_by_blockdev(bmds->bs));
        g_free(bmds->aio_bitmap);
        g_free(bmds);
    }
@@ -595,7 +504,6 @@ static void blk_mig_cleanup(void)
        g_free(blk->buf);
        g_free(blk);
    }
-    blk_mig_unlock();
 }

 static void block_migration_cancel(void *opaque)
@@ -610,18 +518,22 @@ static int block_save_setup(QEMUFile *f, void *opaque)
    DPRINTF("Enter save live setup submitted %d transferred %d\n",
            block_mig_state.submitted, block_mig_state.transferred);

-    qemu_mutex_lock_iothread();
    init_blk_migration(f);

    /* start track dirty blocks */
-    set_dirty_tracking();
-    qemu_mutex_unlock_iothread();
+    set_dirty_tracking(1);

    ret = flush_blks(f);
+    if (ret) {
+        blk_mig_cleanup();
+        return ret;
+    }
+
    blk_mig_reset_dirty_cursor();
+
    qemu_put_be64(f, BLK_MIG_FLAG_EOS);

-    return ret;
+    return 0;
 }

 static int block_save_iterate(QEMUFile *f, void *opaque)
@@ -634,54 +546,46 @@ static int block_save_iterate(QEMUFile *f, void *opaque)

    ret = flush_blks(f);
    if (ret) {
+        blk_mig_cleanup();
        return ret;
    }

    blk_mig_reset_dirty_cursor();

    /* control the rate of transfer */
-    blk_mig_lock();
    while ((block_mig_state.submitted +
            block_mig_state.read_done) * BLOCK_SIZE <
           qemu_file_get_rate_limit(f)) {
-        blk_mig_unlock();
        if (block_mig_state.bulk_completed == 0) {
            /* first finish the bulk phase */
            if (blk_mig_save_bulked_block(f) == 0) {
                /* finished saving bulk on all devices */
                block_mig_state.bulk_completed = 1;
            }
-            ret = 0;
        } else {
-            /* Always called with iothread lock taken for
-             * simplicity, block_save_complete also calls it.
-             */
-            qemu_mutex_lock_iothread();
            ret = blk_mig_save_dirty_block(f, 1);
-            qemu_mutex_unlock_iothread();
-        }
-        if (ret < 0) {
-            return ret;
-        }
-        blk_mig_lock();
-        if (ret != 0) {
-            /* no more dirty blocks */
-            break;
+            if (ret != 0) {
+                /* no more dirty blocks */
+                break;
+            }
        }
    }
-    blk_mig_unlock();
+    if (ret < 0) {
+        blk_mig_cleanup();
+        return ret;
+    }

    ret = flush_blks(f);
    if (ret) {
+        blk_mig_cleanup();
        return ret;
    }

    qemu_put_be64(f, BLK_MIG_FLAG_EOS);
+
    return qemu_ftell(f) - last_ftell;
 }

-/* Called with iothread lock taken.  */
-
 static int block_save_complete(QEMUFile *f, void *opaque)
 {
    int ret;
@@ -691,6 +595,7 @@ static int block_save_complete(QEMUFile *f, void *opaque)

    ret = flush_blks(f);
    if (ret) {
+        blk_mig_cleanup();
        return ret;
    }

@@ -698,17 +603,16 @@ static int block_save_complete(QEMUFile *f, void *opaque)

    /* we know for sure that save bulk is completed and
       all async read completed */
-    blk_mig_lock();
    assert(block_mig_state.submitted == 0);
-    blk_mig_unlock();

    do {
        ret = blk_mig_save_dirty_block(f, 0);
-        if (ret < 0) {
-            return ret;
-        }
    } while (ret == 0);

+    blk_mig_cleanup();
+    if (ret < 0) {
+        return ret;
+    }
    /* report completion */
    qemu_put_be64(f, (100 << BDRV_SECTOR_BITS) | BLK_MIG_FLAG_PROGRESS);

@@ -716,18 +620,13 @@ static int block_save_complete(QEMUFile *f, void *opaque)

    qemu_put_be64(f, BLK_MIG_FLAG_EOS);

-    blk_mig_cleanup();
    return 0;
 }

 static uint64_t block_save_pending(QEMUFile *f, void *opaque, uint64_t max_size)
 {
    /* Estimate pending number of bytes to send */
-    uint64_t pending;
-
-    qemu_mutex_lock_iothread();
-    blk_mig_lock();
-    pending = get_remaining_dirty() +
+    uint64_t pending = get_remaining_dirty() +
                       block_mig_state.submitted * BLOCK_SIZE +
                       block_mig_state.read_done * BLOCK_SIZE;

@@ -735,8 +634,6 @@ static uint64_t block_save_pending(QEMUFile *f, void *opaque, uint64_t max_size)
    if (pending == 0 && !block_mig_state.bulk_completed) {
        pending = BLOCK_SIZE;
    }
-    blk_mig_unlock();
-    qemu_mutex_unlock_iothread();

    DPRINTF("Enter save live pending  %" PRIu64 "\n", pending);
    return pending;
@@ -789,16 +686,12 @@ static int block_load(QEMUFile *f, void *opaque, int version_id)
                nr_sectors = BDRV_SECTORS_PER_DIRTY_CHUNK;
            }

-            if (flags & BLK_MIG_FLAG_ZERO_BLOCK) {
-                ret = bdrv_write_zeroes(bs, addr, nr_sectors,
-                                        BDRV_REQ_MAY_UNMAP);
-            } else {
-                buf = g_malloc(BLOCK_SIZE);
-                qemu_get_buffer(f, buf, BLOCK_SIZE);
-                ret = bdrv_write(bs, addr, buf, nr_sectors);
-                g_free(buf);
-            }
+            buf = g_malloc(BLOCK_SIZE);

+            qemu_get_buffer(f, buf, BLOCK_SIZE);
+            ret = bdrv_write(bs, addr, buf, nr_sectors);
+
+            g_free(buf);
            if (ret < 0) {
                return ret;
            }
@@ -852,7 +745,6 @@ void blk_mig_init(void)
 {
    QSIMPLEQ_INIT(&block_mig_state.bmds_list);
    QSIMPLEQ_INIT(&block_mig_state.blk_list);
-    qemu_mutex_init(&block_mig_state.lock);

    register_savevm_live(NULL, "block", 0, 1, &savevm_block_handlers,
                         &block_mig_state);
--- a/block.c
+++ b/block.c
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -1,39 +1,24 @@
-block-obj-y += raw_bsd.o cow.o qcow.o vdi.o vmdk.o cloop.o dmg.o bochs.o vpc.o vvfat.o
+block-obj-y += raw.o cow.o qcow.o vdi.o vmdk.o cloop.o dmg.o bochs.o vpc.o vvfat.o
 block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-cache.o
 block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o
 block-obj-y += qed-check.o
-block-obj-$(CONFIG_VHDX) += vhdx.o vhdx-endian.o vhdx-log.o
-block-obj-$(CONFIG_QUORUM) += quorum.o
 block-obj-y += parallels.o blkdebug.o blkverify.o
-block-obj-y += snapshot.o qapi.o
 block-obj-$(CONFIG_WIN32) += raw-win32.o win32-aio.o
 block-obj-$(CONFIG_POSIX) += raw-posix.o
 block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o

 ifeq ($(CONFIG_POSIX),y)
-block-obj-y += nbd.o nbd-client.o sheepdog.o
+block-obj-y += nbd.o sheepdog.o
 block-obj-$(CONFIG_LIBISCSI) += iscsi.o
-block-obj-$(CONFIG_LIBNFS) += nfs.o
 block-obj-$(CONFIG_CURL) += curl.o
 block-obj-$(CONFIG_RBD) += rbd.o
 block-obj-$(CONFIG_GLUSTERFS) += gluster.o
-block-obj-$(CONFIG_LIBSSH2) += ssh.o
 endif

 common-obj-y += stream.o
 common-obj-y += commit.o
 common-obj-y += mirror.o
-common-obj-y += backup.o
+block-obj-y += dictzip.o
+block-obj-y += tar.o

-iscsi.o-cflags     := $(LIBISCSI_CFLAGS)
-iscsi.o-libs       := $(LIBISCSI_LIBS)
-curl.o-cflags      := $(CURL_CFLAGS)
-curl.o-libs        := $(CURL_LIBS)
-rbd.o-cflags       := $(RBD_CFLAGS)
-rbd.o-libs         := $(RBD_LIBS)
-gluster.o-cflags   := $(GLUSTERFS_CFLAGS)
-gluster.o-libs     := $(GLUSTERFS_LIBS)
-ssh.o-cflags       := $(LIBSSH2_CFLAGS)
-ssh.o-libs         := $(LIBSSH2_LIBS)
-qcow.o-libs        := -lz
-linux-aio.o-libs   := -laio
+$(obj)/curl.o: QEMU_CFLAGS+=$(CURL_CFLAGS)
--- a/block/backup.c
+++ b/block/backup.c
@@ -1,392 +0,0 @@
-/*
- * QEMU backup
- *
- * Copyright (C) 2013 Proxmox Server Solutions
- *
- * Authors:
- *  Dietmar Maurer (dietmar@proxmox.com)
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- *
- */
-
-#include <stdio.h>
-#include <errno.h>
-#include <unistd.h>
-
-#include "trace.h"
-#include "block/block.h"
-#include "block/block_int.h"
-#include "block/blockjob.h"
-#include "qemu/ratelimit.h"
-
-#define BACKUP_CLUSTER_BITS 16
-#define BACKUP_CLUSTER_SIZE (1 << BACKUP_CLUSTER_BITS)
-#define BACKUP_SECTORS_PER_CLUSTER (BACKUP_CLUSTER_SIZE / BDRV_SECTOR_SIZE)
-
-#define SLICE_TIME 100000000ULL /* ns */
-
-typedef struct CowRequest {
-    int64_t start;
-    int64_t end;
-    QLIST_ENTRY(CowRequest) list;
-    CoQueue wait_queue; /* coroutines blocked on this request */
-} CowRequest;
-
-typedef struct BackupBlockJob {
-    BlockJob common;
-    BlockDriverState *target;
-    MirrorSyncMode sync_mode;
-    RateLimit limit;
-    BlockdevOnError on_source_error;
-    BlockdevOnError on_target_error;
-    CoRwlock flush_rwlock;
-    uint64_t sectors_read;
-    HBitmap *bitmap;
-    QLIST_HEAD(, CowRequest) inflight_reqs;
-} BackupBlockJob;
-
-/* See if in-flight requests overlap and wait for them to complete */
-static void coroutine_fn wait_for_overlapping_requests(BackupBlockJob *job,
-                                                       int64_t start,
-                                                       int64_t end)
-{
-    CowRequest *req;
-    bool retry;
-
-    do {
-        retry = false;
-        QLIST_FOREACH(req, &job->inflight_reqs, list) {
-            if (end > req->start && start < req->end) {
-                qemu_co_queue_wait(&req->wait_queue);
-                retry = true;
-                break;
-            }
-        }
-    } while (retry);
-}
-
-/* Keep track of an in-flight request */
-static void cow_request_begin(CowRequest *req, BackupBlockJob *job,
-                                     int64_t start, int64_t end)
-{
-    req->start = start;
-    req->end = end;
-    qemu_co_queue_init(&req->wait_queue);
-    QLIST_INSERT_HEAD(&job->inflight_reqs, req, list);
-}
-
-/* Forget about a completed request */
-static void cow_request_end(CowRequest *req)
-{
-    QLIST_REMOVE(req, list);
-    qemu_co_queue_restart_all(&req->wait_queue);
-}
-
-static int coroutine_fn backup_do_cow(BlockDriverState *bs,
-                                      int64_t sector_num, int nb_sectors,
-                                      bool *error_is_read)
-{
-    BackupBlockJob *job = (BackupBlockJob *)bs->job;
-    CowRequest cow_request;
-    struct iovec iov;
-    QEMUIOVector bounce_qiov;
-    void *bounce_buffer = NULL;
-    int ret = 0;
-    int64_t start, end;
-    int n;
-
-    qemu_co_rwlock_rdlock(&job->flush_rwlock);
-
-    start = sector_num / BACKUP_SECTORS_PER_CLUSTER;
-    end = DIV_ROUND_UP(sector_num + nb_sectors, BACKUP_SECTORS_PER_CLUSTER);
-
-    trace_backup_do_cow_enter(job, start, sector_num, nb_sectors);
-
-    wait_for_overlapping_requests(job, start, end);
-    cow_request_begin(&cow_request, job, start, end);
-
-    for (; start < end; start++) {
-        if (hbitmap_get(job->bitmap, start)) {
-            trace_backup_do_cow_skip(job, start);
-            continue; /* already copied */
-        }
-
-        trace_backup_do_cow_process(job, start);
-
-        n = MIN(BACKUP_SECTORS_PER_CLUSTER,
-                job->common.len / BDRV_SECTOR_SIZE -
-                start * BACKUP_SECTORS_PER_CLUSTER);
-
-        if (!bounce_buffer) {
-            bounce_buffer = qemu_blockalign(bs, BACKUP_CLUSTER_SIZE);
-        }
-        iov.iov_base = bounce_buffer;
-        iov.iov_len = n * BDRV_SECTOR_SIZE;
-        qemu_iovec_init_external(&bounce_qiov, &iov, 1);
-
-        ret = bdrv_co_readv(bs, start * BACKUP_SECTORS_PER_CLUSTER, n,
-                            &bounce_qiov);
-        if (ret < 0) {
-            trace_backup_do_cow_read_fail(job, start, ret);
-            if (error_is_read) {
-                *error_is_read = true;
-            }
-            goto out;
-        }
-
-        if (buffer_is_zero(iov.iov_base, iov.iov_len)) {
-            ret = bdrv_co_write_zeroes(job->target,
-                                       start * BACKUP_SECTORS_PER_CLUSTER,
-                                       n, BDRV_REQ_MAY_UNMAP);
-        } else {
-            ret = bdrv_co_writev(job->target,
-                                 start * BACKUP_SECTORS_PER_CLUSTER, n,
-                                 &bounce_qiov);
-        }
-        if (ret < 0) {
-            trace_backup_do_cow_write_fail(job, start, ret);
-            if (error_is_read) {
-                *error_is_read = false;
-            }
-            goto out;
-        }
-
-        hbitmap_set(job->bitmap, start, 1);
-
-        /* Publish progress, guest I/O counts as progress too.  Note that the
-         * offset field is an opaque progress value, it is not a disk offset.
-         */
-        job->sectors_read += n;
-        job->common.offset += n * BDRV_SECTOR_SIZE;
-    }
-
-out:
-    if (bounce_buffer) {
-        qemu_vfree(bounce_buffer);
-    }
-
-    cow_request_end(&cow_request);
-
-    trace_backup_do_cow_return(job, sector_num, nb_sectors, ret);
-
-    qemu_co_rwlock_unlock(&job->flush_rwlock);
-
-    return ret;
-}
-
-static int coroutine_fn backup_before_write_notify(
-        NotifierWithReturn *notifier,
-        void *opaque)
-{
-    BdrvTrackedRequest *req = opaque;
-    int64_t sector_num = req->offset >> BDRV_SECTOR_BITS;
-    int nb_sectors = req->bytes >> BDRV_SECTOR_BITS;
-
-    assert((req->offset & (BDRV_SECTOR_SIZE - 1)) == 0);
-    assert((req->bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
-
-    return backup_do_cow(req->bs, sector_num, nb_sectors, NULL);
-}
-
-static void backup_set_speed(BlockJob *job, int64_t speed, Error **errp)
-{
-    BackupBlockJob *s = container_of(job, BackupBlockJob, common);
-
-    if (speed < 0) {
-        error_set(errp, QERR_INVALID_PARAMETER, "speed");
-        return;
-    }
-    ratelimit_set_speed(&s->limit, speed / BDRV_SECTOR_SIZE, SLICE_TIME);
-}
-
-static void backup_iostatus_reset(BlockJob *job)
-{
-    BackupBlockJob *s = container_of(job, BackupBlockJob, common);
-
-    bdrv_iostatus_reset(s->target);
-}
-
-static const BlockJobDriver backup_job_driver = {
-    .instance_size  = sizeof(BackupBlockJob),
-    .job_type       = BLOCK_JOB_TYPE_BACKUP,
-    .set_speed      = backup_set_speed,
-    .iostatus_reset = backup_iostatus_reset,
-};
-
-static BlockErrorAction backup_error_action(BackupBlockJob *job,
-                                            bool read, int error)
-{
-    if (read) {
-        return block_job_error_action(&job->common, job->common.bs,
-                                      job->on_source_error, true, error);
-    } else {
-        return block_job_error_action(&job->common, job->target,
-                                      job->on_target_error, false, error);
-    }
-}
-
-static void coroutine_fn backup_run(void *opaque)
-{
-    BackupBlockJob *job = opaque;
-    BlockDriverState *bs = job->common.bs;
-    BlockDriverState *target = job->target;
-    BlockdevOnError on_target_error = job->on_target_error;
-    NotifierWithReturn before_write = {
-        .notify = backup_before_write_notify,
-    };
-    int64_t start, end;
-    int ret = 0;
-
-    QLIST_INIT(&job->inflight_reqs);
-    qemu_co_rwlock_init(&job->flush_rwlock);
-
-    start = 0;
-    end = DIV_ROUND_UP(job->common.len / BDRV_SECTOR_SIZE,
-                       BACKUP_SECTORS_PER_CLUSTER);
-
-    job->bitmap = hbitmap_alloc(end, 0);
-
-    bdrv_set_enable_write_cache(target, true);
-    bdrv_set_on_error(target, on_target_error, on_target_error);
-    bdrv_iostatus_enable(target);
-
-    bdrv_add_before_write_notifier(bs, &before_write);
-
-    if (job->sync_mode == MIRROR_SYNC_MODE_NONE) {
-        while (!block_job_is_cancelled(&job->common)) {
-            /* Yield until the job is cancelled.  We just let our before_write
-             * notify callback service CoW requests. */
-            job->common.busy = false;
-            qemu_coroutine_yield();
-            job->common.busy = true;
-        }
-    } else {
-        /* Both FULL and TOP SYNC_MODE's require copying.. */
-        for (; start < end; start++) {
-            bool error_is_read;
-
-            if (block_job_is_cancelled(&job->common)) {
-                break;
-            }
-
-            /* we need to yield so that qemu_aio_flush() returns.
-             * (without, VM does not reboot)
-             */
-            if (job->common.speed) {
-                uint64_t delay_ns = ratelimit_calculate_delay(
-                        &job->limit, job->sectors_read);
-                job->sectors_read = 0;
-                block_job_sleep_ns(&job->common, QEMU_CLOCK_REALTIME, delay_ns);
-            } else {
-                block_job_sleep_ns(&job->common, QEMU_CLOCK_REALTIME, 0);
-            }
-
-            if (block_job_is_cancelled(&job->common)) {
-                break;
-            }
-
-            if (job->sync_mode == MIRROR_SYNC_MODE_TOP) {
-                int i, n;
-                int alloced = 0;
-
-                /* Check to see if these blocks are already in the
-                 * backing file. */
-
-                for (i = 0; i < BACKUP_SECTORS_PER_CLUSTER;) {
-                    /* bdrv_is_allocated() only returns true/false based
-                     * on the first set of sectors it comes across that
-                     * are are all in the same state.
-                     * For that reason we must verify each sector in the
-                     * backup cluster length.  We end up copying more than
-                     * needed but at some point that is always the case. */
-                    alloced =
-                        bdrv_is_allocated(bs,
-                                start * BACKUP_SECTORS_PER_CLUSTER + i,
-                                BACKUP_SECTORS_PER_CLUSTER - i, &n);
-                    i += n;
-
-                    if (alloced == 1) {
-                        break;
-                    }
-                }
-
-                /* If the above loop never found any sectors that are in
-                 * the topmost image, skip this backup. */
-                if (alloced == 0) {
-                    continue;
-                }
-            }
-            /* FULL sync mode we copy the whole drive. */
-            ret = backup_do_cow(bs, start * BACKUP_SECTORS_PER_CLUSTER,
-                    BACKUP_SECTORS_PER_CLUSTER, &error_is_read);
-            if (ret < 0) {
-                /* Depending on error action, fail now or retry cluster */
-                BlockErrorAction action =
-                    backup_error_action(job, error_is_read, -ret);
-                if (action == BDRV_ACTION_REPORT) {
-                    break;
-                } else {
-                    start--;
-                    continue;
-                }
-            }
-        }
-    }
-
-    notifier_with_return_remove(&before_write);
-
-    /* wait until pending backup_do_cow() calls have completed */
-    qemu_co_rwlock_wrlock(&job->flush_rwlock);
-    qemu_co_rwlock_unlock(&job->flush_rwlock);
-
-    hbitmap_free(job->bitmap);
-
-    bdrv_iostatus_disable(target);
-    bdrv_unref(target);
-
-    block_job_completed(&job->common, ret);
-}
-
-void backup_start(BlockDriverState *bs, BlockDriverState *target,
-                  int64_t speed, MirrorSyncMode sync_mode,
-                  BlockdevOnError on_source_error,
-                  BlockdevOnError on_target_error,
-                  BlockDriverCompletionFunc *cb, void *opaque,
-                  Error **errp)
-{
-    int64_t len;
-
-    assert(bs);
-    assert(target);
-    assert(cb);
-
-    if ((on_source_error == BLOCKDEV_ON_ERROR_STOP ||
-         on_source_error == BLOCKDEV_ON_ERROR_ENOSPC) &&
-        !bdrv_iostatus_is_enabled(bs)) {
-        error_set(errp, QERR_INVALID_PARAMETER, "on-source-error");
-        return;
-    }
-
-    len = bdrv_getlength(bs);
-    if (len < 0) {
-        error_setg_errno(errp, -len, "unable to get length for '%s'",
-                         bdrv_get_device_name(bs));
-        return;
-    }
-
-    BackupBlockJob *job = block_job_create(&backup_job_driver, bs, speed,
-                                           cb, opaque, errp);
-    if (!job) {
-        return;
-    }
-
-    job->on_source_error = on_source_error;
-    job->on_target_error = on_target_error;
-    job->target = target;
-    job->sync_mode = sync_mode;
-    job->common.len = len;
-    job->common.co = qemu_coroutine_create(backup_run);
-    qemu_coroutine_enter(job->common.co, job);
-}
--- a/block/blkdebug.c
+++ b/block/blkdebug.c
@@ -168,7 +168,6 @@ static const char *event_names[BLKDBG_EVENT_MAX] = {

    [BLKDBG_REFTABLE_LOAD]                  = "reftable_load",
    [BLKDBG_REFTABLE_GROW]                  = "reftable_grow",
-    [BLKDBG_REFTABLE_UPDATE]                = "reftable_update",

    [BLKDBG_REFBLOCK_LOAD]                  = "refblock_load",
    [BLKDBG_REFBLOCK_UPDATE]                = "refblock_update",
@@ -183,17 +182,6 @@ static const char *event_names[BLKDBG_EVENT_MAX] = {
    [BLKDBG_CLUSTER_ALLOC]                  = "cluster_alloc",
    [BLKDBG_CLUSTER_ALLOC_BYTES]            = "cluster_alloc_bytes",
    [BLKDBG_CLUSTER_FREE]                   = "cluster_free",
-
-    [BLKDBG_FLUSH_TO_OS]                    = "flush_to_os",
-    [BLKDBG_FLUSH_TO_DISK]                  = "flush_to_disk",
-
-    [BLKDBG_PWRITEV_RMW_HEAD]               = "pwritev_rmw.head",
-    [BLKDBG_PWRITEV_RMW_AFTER_HEAD]         = "pwritev_rmw.after_head",
-    [BLKDBG_PWRITEV_RMW_TAIL]               = "pwritev_rmw.tail",
-    [BLKDBG_PWRITEV_RMW_AFTER_TAIL]         = "pwritev_rmw.after_tail",
-    [BLKDBG_PWRITEV]                        = "pwritev",
-    [BLKDBG_PWRITEV_ZERO]                   = "pwritev_zero",
-    [BLKDBG_PWRITEV_DONE]                   = "pwritev_done",
 };

 static int get_event_by_name(const char *name, BlkDebugEvent *event)
@@ -279,33 +267,24 @@ static void remove_rule(BlkdebugRule *rule)
    g_free(rule);
 }

-static int read_config(BDRVBlkdebugState *s, const char *filename,
-                       QDict *options, Error **errp)
+static int read_config(BDRVBlkdebugState *s, const char *filename)
 {
-    FILE *f = NULL;
+    FILE *f;
    int ret;
    struct add_rule_data d;
-    Error *local_err = NULL;

-    if (filename) {
-        f = fopen(filename, "r");
-        if (f == NULL) {
-            error_setg_errno(errp, errno, "Could not read blkdebug config file");
-            return -errno;
-        }
-
-        ret = qemu_config_parse(f, config_groups, filename);
-        if (ret < 0) {
-            error_setg(errp, "Could not parse blkdebug config file");
-            ret = -EINVAL;
-            goto fail;
-        }
+    /* Allow usage without config file */
+    if (!*filename) {
+        return 0;
    }

-    qemu_config_parse_qdict(options, config_groups, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        ret = -EINVAL;
+    f = fopen(filename, "r");
+    if (f == NULL) {
+        return -errno;
+    }
+
+    ret = qemu_config_parse(f, config_groups, filename);
+    if (ret < 0) {
        goto fail;
    }

@@ -320,122 +299,48 @@ static int read_config(BDRVBlkdebugState *s, const char *filename,
 fail:
    qemu_opts_reset(&inject_error_opts);
    qemu_opts_reset(&set_state_opts);
-    if (f) {
-        fclose(f);
-    }
+    fclose(f);
    return ret;
 }

 /* Valid blkdebug filenames look like blkdebug:path/to/config:path/to/image */
-static void blkdebug_parse_filename(const char *filename, QDict *options,
-                                    Error **errp)
-{
-    const char *c;
-
-    /* Parse the blkdebug: prefix */
-    if (!strstart(filename, "blkdebug:", &filename)) {
-        /* There was no prefix; therefore, all options have to be already
-           present in the QDict (except for the filename) */
-        qdict_put(options, "x-image", qstring_from_str(filename));
-        return;
-    }
-
-    /* Parse config file path */
-    c = strchr(filename, ':');
-    if (c == NULL) {
-        error_setg(errp, "blkdebug requires both config file and image path");
-        return;
-    }
-
-    if (c != filename) {
-        QString *config_path;
-        config_path = qstring_from_substr(filename, 0, c - filename - 1);
-        qdict_put(options, "config", config_path);
-    }
-
-    /* TODO Allow multi-level nesting and set file.filename here */
-    filename = c + 1;
-    qdict_put(options, "x-image", qstring_from_str(filename));
-}
-
-static QemuOptsList runtime_opts = {
-    .name = "blkdebug",
-    .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
-    .desc = {
-        {
-            .name = "config",
-            .type = QEMU_OPT_STRING,
-            .help = "Path to the configuration file",
-        },
-        {
-            .name = "x-image",
-            .type = QEMU_OPT_STRING,
-            .help = "[internal use only, will be removed]",
-        },
-        {
-            .name = "align",
-            .type = QEMU_OPT_SIZE,
-            .help = "Required alignment in bytes",
-        },
-        { /* end of list */ }
-    },
-};
-
-static int blkdebug_open(BlockDriverState *bs, QDict *options, int flags,
-                         Error **errp)
+static int blkdebug_open(BlockDriverState *bs, const char *filename, int flags)
 {
    BDRVBlkdebugState *s = bs->opaque;
-    QemuOpts *opts;
-    Error *local_err = NULL;
-    const char *config;
-    uint64_t align;
    int ret;
+    char *config, *c;

-    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
-    qemu_opts_absorb_qdict(opts, options, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        ret = -EINVAL;
-        goto out;
+    /* Parse the blkdebug: prefix */
+    if (strncmp(filename, "blkdebug:", strlen("blkdebug:"))) {
+        return -EINVAL;
+    }
+    filename += strlen("blkdebug:");
+
+    /* Read rules from config file */
+    c = strchr(filename, ':');
+    if (c == NULL) {
+        return -EINVAL;
    }

-    /* Read rules from config file or command line options */
-    config = qemu_opt_get(opts, "config");
-    ret = read_config(s, config, options, errp);
-    if (ret) {
-        goto out;
+    config = g_strdup(filename);
+    config[c - filename] = '\0';
+    ret = read_config(s, config);
+    g_free(config);
+    if (ret < 0) {
+        return ret;
    }
+    filename = c + 1;

    /* Set initial state */
    s->state = 1;

    /* Open the backing file */
-    assert(bs->file == NULL);
-    ret = bdrv_open_image(&bs->file, qemu_opt_get(opts, "x-image"), options, "image",
-                          flags | BDRV_O_PROTOCOL, false, &local_err);
+    ret = bdrv_file_open(&bs->file, filename, flags);
    if (ret < 0) {
-        error_propagate(errp, local_err);
-        goto out;
+        return ret;
    }

-    /* Set request alignment */
-    align = qemu_opt_get_size(opts, "align", bs->request_alignment);
-    if (align > 0 && align < INT_MAX && !(align & (align - 1))) {
-        bs->request_alignment = align;
-    } else {
-        error_setg(errp, "Invalid alignment");
-        ret = -EINVAL;
-        goto fail_unref;
-    }
-
-    ret = 0;
-    goto out;
-
-fail_unref:
-    bdrv_unref(bs->file);
-out:
-    qemu_opts_del(opts);
-    return ret;
+    return 0;
 }

 static void error_callback_bh(void *opaque)
@@ -632,9 +537,9 @@ static int blkdebug_debug_breakpoint(BlockDriverState *bs, const char *event,
 static int blkdebug_debug_resume(BlockDriverState *bs, const char *tag)
 {
    BDRVBlkdebugState *s = bs->opaque;
-    BlkdebugSuspendedReq *r, *next;
+    BlkdebugSuspendedReq *r;

-    QLIST_FOREACH_SAFE(r, &s->suspended_reqs, next, next) {
+    QLIST_FOREACH(r, &s->suspended_reqs, next) {
        if (!strcmp(r->tag, tag)) {
            qemu_coroutine_enter(r->co, NULL);
            return 0;
@@ -643,31 +548,6 @@ static int blkdebug_debug_resume(BlockDriverState *bs, const char *tag)
    return -ENOENT;
 }

-static int blkdebug_debug_remove_breakpoint(BlockDriverState *bs,
-                                            const char *tag)
-{
-    BDRVBlkdebugState *s = bs->opaque;
-    BlkdebugSuspendedReq *r, *r_next;
-    BlkdebugRule *rule, *next;
-    int i, ret = -ENOENT;
-
-    for (i = 0; i < BLKDBG_EVENT_MAX; i++) {
-        QLIST_FOREACH_SAFE(rule, &s->rules[i], next, next) {
-            if (rule->action == ACTION_SUSPEND &&
-                !strcmp(rule->options.suspend.tag, tag)) {
-                remove_rule(rule);
-                ret = 0;
-            }
-        }
-    }
-    QLIST_FOREACH_SAFE(r, &s->suspended_reqs, next, r_next) {
-        if (!strcmp(r->tag, tag)) {
-            qemu_coroutine_enter(r->co, NULL);
-            ret = 0;
-        }
-    }
-    return ret;
-}

 static bool blkdebug_debug_is_suspended(BlockDriverState *bs, const char *tag)
 {
@@ -688,22 +568,20 @@ static int64_t blkdebug_getlength(BlockDriverState *bs)
 }

 static BlockDriver bdrv_blkdebug = {
-    .format_name            = "blkdebug",
-    .protocol_name          = "blkdebug",
-    .instance_size          = sizeof(BDRVBlkdebugState),
+    .format_name        = "blkdebug",
+    .protocol_name      = "blkdebug",

-    .bdrv_parse_filename    = blkdebug_parse_filename,
-    .bdrv_file_open         = blkdebug_open,
-    .bdrv_close             = blkdebug_close,
-    .bdrv_getlength         = blkdebug_getlength,
+    .instance_size      = sizeof(BDRVBlkdebugState),

-    .bdrv_aio_readv         = blkdebug_aio_readv,
-    .bdrv_aio_writev        = blkdebug_aio_writev,
+    .bdrv_file_open     = blkdebug_open,
+    .bdrv_close         = blkdebug_close,
+    .bdrv_getlength     = blkdebug_getlength,
+
+    .bdrv_aio_readv     = blkdebug_aio_readv,
+    .bdrv_aio_writev    = blkdebug_aio_writev,

    .bdrv_debug_event           = blkdebug_debug_event,
    .bdrv_debug_breakpoint      = blkdebug_debug_breakpoint,
-    .bdrv_debug_remove_breakpoint
-                                = blkdebug_debug_remove_breakpoint,
    .bdrv_debug_resume          = blkdebug_debug_resume,
    .bdrv_debug_is_suspended    = blkdebug_debug_is_suspended,
 };
--- a/block/blkverify.c
+++ b/block/blkverify.c
@@ -69,100 +69,50 @@ static void GCC_FMT_ATTR(2, 3) blkverify_err(BlkverifyAIOCB *acb,
 }

 /* Valid blkverify filenames look like blkverify:path/to/raw_image:path/to/image */
-static void blkverify_parse_filename(const char *filename, QDict *options,
-                                     Error **errp)
+static int blkverify_open(BlockDriverState *bs, const char *filename, int flags)
 {
-    const char *c;
-    QString *raw_path;
-
+    BDRVBlkverifyState *s = bs->opaque;
+    int ret;
+    char *raw, *c;

    /* Parse the blkverify: prefix */
-    if (!strstart(filename, "blkverify:", &filename)) {
-        /* There was no prefix; therefore, all options have to be already
-           present in the QDict (except for the filename) */
-        qdict_put(options, "x-image", qstring_from_str(filename));
-        return;
+    if (strncmp(filename, "blkverify:", strlen("blkverify:"))) {
+        return -EINVAL;
    }
+    filename += strlen("blkverify:");

    /* Parse the raw image filename */
    c = strchr(filename, ':');
    if (c == NULL) {
-        error_setg(errp, "blkverify requires raw copy and original image path");
-        return;
+        return -EINVAL;
    }

-    /* TODO Implement option pass-through and set raw.filename here */
-    raw_path = qstring_from_substr(filename, 0, c - filename - 1);
-    qdict_put(options, "x-raw", raw_path);
-
-    /* TODO Allow multi-level nesting and set file.filename here */
-    filename = c + 1;
-    qdict_put(options, "x-image", qstring_from_str(filename));
-}
-
-static QemuOptsList runtime_opts = {
-    .name = "blkverify",
-    .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
-    .desc = {
-        {
-            .name = "x-raw",
-            .type = QEMU_OPT_STRING,
-            .help = "[internal use only, will be removed]",
-        },
-        {
-            .name = "x-image",
-            .type = QEMU_OPT_STRING,
-            .help = "[internal use only, will be removed]",
-        },
-        { /* end of list */ }
-    },
-};
-
-static int blkverify_open(BlockDriverState *bs, QDict *options, int flags,
-                          Error **errp)
-{
-    BDRVBlkverifyState *s = bs->opaque;
-    QemuOpts *opts;
-    Error *local_err = NULL;
-    int ret;
-
-    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
-    qemu_opts_absorb_qdict(opts, options, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        ret = -EINVAL;
-        goto fail;
-    }
-
-    /* Open the raw file */
-    assert(bs->file == NULL);
-    ret = bdrv_open_image(&bs->file, qemu_opt_get(opts, "x-raw"), options,
-                          "raw", flags | BDRV_O_PROTOCOL, false, &local_err);
+    raw = g_strdup(filename);
+    raw[c - filename] = '\0';
+    ret = bdrv_file_open(&bs->file, raw, flags);
+    g_free(raw);
    if (ret < 0) {
-        error_propagate(errp, local_err);
-        goto fail;
+        return ret;
    }
+    filename = c + 1;

    /* Open the test file */
-    assert(s->test_file == NULL);
-    ret = bdrv_open_image(&s->test_file, qemu_opt_get(opts, "x-image"), options,
-                          "test", flags, false, &local_err);
+    s->test_file = bdrv_new("");
+    ret = bdrv_open(s->test_file, filename, flags, NULL);
    if (ret < 0) {
-        error_propagate(errp, local_err);
+        bdrv_delete(s->test_file);
        s->test_file = NULL;
-        goto fail;
+        return ret;
    }

-    ret = 0;
-fail:
-    return ret;
+    return 0;
 }

 static void blkverify_close(BlockDriverState *bs)
 {
    BDRVBlkverifyState *s = bs->opaque;

-    bdrv_unref(s->test_file);
+    bdrv_delete(s->test_file);
    s->test_file = NULL;
 }

@@ -173,6 +123,110 @@ static int64_t blkverify_getlength(BlockDriverState *bs)
    return bdrv_getlength(s->test_file);
 }

+/**
+ * Check that I/O vector contents are identical
+ *
+ * @a:          I/O vector
+ * @b:          I/O vector
+ * @ret:        Offset to first mismatching byte or -1 if match
+ */
+static ssize_t blkverify_iovec_compare(QEMUIOVector *a, QEMUIOVector *b)
+{
+    int i;
+    ssize_t offset = 0;
+
+    assert(a->niov == b->niov);
+    for (i = 0; i < a->niov; i++) {
+        size_t len = 0;
+        uint8_t *p = (uint8_t *)a->iov[i].iov_base;
+        uint8_t *q = (uint8_t *)b->iov[i].iov_base;
+
+        assert(a->iov[i].iov_len == b->iov[i].iov_len);
+        while (len < a->iov[i].iov_len && *p++ == *q++) {
+            len++;
+        }
+
+        offset += len;
+
+        if (len != a->iov[i].iov_len) {
+            return offset;
+        }
+    }
+    return -1;
+}
+
+typedef struct {
+    int src_index;
+    struct iovec *src_iov;
+    void *dest_base;
+} IOVectorSortElem;
+
+static int sortelem_cmp_src_base(const void *a, const void *b)
+{
+    const IOVectorSortElem *elem_a = a;
+    const IOVectorSortElem *elem_b = b;
+
+    /* Don't overflow */
+    if (elem_a->src_iov->iov_base < elem_b->src_iov->iov_base) {
+        return -1;
+    } else if (elem_a->src_iov->iov_base > elem_b->src_iov->iov_base) {
+        return 1;
+    } else {
+        return 0;
+    }
+}
+
+static int sortelem_cmp_src_index(const void *a, const void *b)
+{
+    const IOVectorSortElem *elem_a = a;
+    const IOVectorSortElem *elem_b = b;
+
+    return elem_a->src_index - elem_b->src_index;
+}
+
+/**
+ * Copy contents of I/O vector
+ *
+ * The relative relationships of overlapping iovecs are preserved.  This is
+ * necessary to ensure identical semantics in the cloned I/O vector.
+ */
+static void blkverify_iovec_clone(QEMUIOVector *dest, const QEMUIOVector *src,
+                                  void *buf)
+{
+    IOVectorSortElem sortelems[src->niov];
+    void *last_end;
+    int i;
+
+    /* Sort by source iovecs by base address */
+    for (i = 0; i < src->niov; i++) {
+        sortelems[i].src_index = i;
+        sortelems[i].src_iov = &src->iov[i];
+    }
+    qsort(sortelems, src->niov, sizeof(sortelems[0]), sortelem_cmp_src_base);
+
+    /* Allocate buffer space taking into account overlapping iovecs */
+    last_end = NULL;
+    for (i = 0; i < src->niov; i++) {
+        struct iovec *cur = sortelems[i].src_iov;
+        ptrdiff_t rewind = 0;
+
+        /* Detect overlap */
+        if (last_end && last_end > cur->iov_base) {
+            rewind = last_end - cur->iov_base;
+        }
+
+        sortelems[i].dest_base = buf - rewind;
+        buf += cur->iov_len - MIN(rewind, cur->iov_len);
+        last_end = MAX(cur->iov_base + cur->iov_len, last_end);
+    }
+
+    /* Sort by source iovec index and build destination iovec */
+    qsort(sortelems, src->niov, sizeof(sortelems[0]), sortelem_cmp_src_index);
+    for (i = 0; i < src->niov; i++) {
+        qemu_iovec_add(dest, sortelems[i].dest_base, src->iov[i].iov_len);
+    }
+}
+
 static BlkverifyAIOCB *blkverify_aio_get(BlockDriverState *bs, bool is_write,
                                         int64_t sector_num, QEMUIOVector *qiov,
                                         int nb_sectors,
@@ -236,7 +290,7 @@ static void blkverify_aio_cb(void *opaque, int ret)

 static void blkverify_verify_readv(BlkverifyAIOCB *acb)
 {
-    ssize_t offset = qemu_iovec_compare(acb->qiov, &acb->raw_qiov);
+    ssize_t offset = blkverify_iovec_compare(acb->qiov, &acb->raw_qiov);
    if (offset != -1) {
        blkverify_err(acb, "contents mismatch in sector %" PRId64,
                      acb->sector_num + (int64_t)(offset / BDRV_SECTOR_SIZE));
@@ -254,7 +308,7 @@ static BlockDriverAIOCB *blkverify_aio_readv(BlockDriverState *bs,
    acb->verify = blkverify_verify_readv;
    acb->buf = qemu_blockalign(bs->file, qiov->size);
    qemu_iovec_init(&acb->raw_qiov, acb->qiov->niov);
-    qemu_iovec_clone(&acb->raw_qiov, qiov, acb->buf);
+    blkverify_iovec_clone(&acb->raw_qiov, qiov, acb->buf);

    bdrv_aio_readv(s->test_file, sector_num, qiov, nb_sectors,
                   blkverify_aio_cb, acb);
@@ -289,20 +343,19 @@ static BlockDriverAIOCB *blkverify_aio_flush(BlockDriverState *bs,
 }

 static BlockDriver bdrv_blkverify = {
-    .format_name            = "blkverify",
-    .protocol_name          = "blkverify",
-    .instance_size          = sizeof(BDRVBlkverifyState),
+    .format_name        = "blkverify",
+    .protocol_name      = "blkverify",

-    .bdrv_parse_filename    = blkverify_parse_filename,
-    .bdrv_file_open         = blkverify_open,
-    .bdrv_close             = blkverify_close,
-    .bdrv_getlength         = blkverify_getlength,
+    .instance_size      = sizeof(BDRVBlkverifyState),

-    .bdrv_aio_readv         = blkverify_aio_readv,
-    .bdrv_aio_writev        = blkverify_aio_writev,
-    .bdrv_aio_flush         = blkverify_aio_flush,
+    .bdrv_getlength     = blkverify_getlength,

-    .authorizations         = { true, false },
+    .bdrv_file_open     = blkverify_open,
+    .bdrv_close         = blkverify_close,
+
+    .bdrv_aio_readv     = blkverify_aio_readv,
+    .bdrv_aio_writev    = blkverify_aio_writev,
+    .bdrv_aio_flush     = blkverify_aio_flush,
 };

 static void bdrv_blkverify_init(void)
--- a/block/bochs.c
+++ b/block/bochs.c
@@ -38,57 +38,42 @@

 // not allocated: 0xffffffff

-// always little-endian
-struct bochs_header_v1 {
-    char magic[32]; // "Bochs Virtual HD Image"
-    char type[16]; // "Redolog"
-    char subtype[16]; // "Undoable" / "Volatile" / "Growing"
-    uint32_t version;
-    uint32_t header; // size of header
-
-    union {
-	struct {
-	    uint32_t catalog; // num of entries
-	    uint32_t bitmap; // bitmap size
-	    uint32_t extent; // extent size
-	    uint64_t disk; // disk size
-	    char padding[HEADER_SIZE - 64 - 8 - 20];
-	} redolog;
-	char padding[HEADER_SIZE - 64 - 8];
-    } extra;
-};
-
 // always little-endian
 struct bochs_header {
-    char magic[32]; // "Bochs Virtual HD Image"
-    char type[16]; // "Redolog"
-    char subtype[16]; // "Undoable" / "Volatile" / "Growing"
+    char magic[32];     /* "Bochs Virtual HD Image" */
+    char type[16];      /* "Redolog" */
+    char subtype[16];   /* "Undoable" / "Volatile" / "Growing" */
    uint32_t version;
-    uint32_t header; // size of header
+    uint32_t header;    /* size of header */
+
+    uint32_t catalog;   /* num of entries */
+    uint32_t bitmap;    /* bitmap size */
+    uint32_t extent;    /* extent size */

    union {
-	struct {
-	    uint32_t catalog; // num of entries
-	    uint32_t bitmap; // bitmap size
-	    uint32_t extent; // extent size
-	    uint32_t reserved; // for ???
-	    uint64_t disk; // disk size
-	    char padding[HEADER_SIZE - 64 - 8 - 24];
-	} redolog;
-	char padding[HEADER_SIZE - 64 - 8];
+        struct {
+            uint32_t reserved;  /* for ??? */
+            uint64_t disk;      /* disk size */
+            char padding[HEADER_SIZE - 64 - 20 - 12];
+        } QEMU_PACKED redolog;
+        struct {
+            uint64_t disk;      /* disk size */
+            char padding[HEADER_SIZE - 64 - 20 - 8];
+        } QEMU_PACKED redolog_v1;
+        char padding[HEADER_SIZE - 64 - 20];
    } extra;
-};
+} QEMU_PACKED;

 typedef struct BDRVBochsState {
    CoMutex lock;
    uint32_t *catalog_bitmap;
-    int catalog_size;
+    uint32_t catalog_size;

-    int data_offset;
+    uint32_t data_offset;

-    int bitmap_blocks;
-    int extent_blocks;
-    int extent_size;
+    uint32_t bitmap_blocks;
+    uint32_t extent_blocks;
+    uint32_t extent_size;
 } BDRVBochsState;

 static int bochs_probe(const uint8_t *buf, int buf_size, const char *filename)
@@ -108,13 +93,11 @@ static int bochs_probe(const uint8_t *buf, int buf_size, const char *filename)
    return 0;
 }

-static int bochs_open(BlockDriverState *bs, QDict *options, int flags,
-                      Error **errp)
+static int bochs_open(BlockDriverState *bs, int flags)
 {
    BDRVBochsState *s = bs->opaque;
-    int i;
+    uint32_t i;
    struct bochs_header bochs;
-    struct bochs_header_v1 header_v1;
    int ret;

    bs->read_only = 1; // no write support yet
@@ -129,18 +112,24 @@ static int bochs_open(BlockDriverState *bs, QDict *options, int flags,
        strcmp(bochs.subtype, GROWING_TYPE) ||
 	((le32_to_cpu(bochs.version) != HEADER_VERSION) &&
 	(le32_to_cpu(bochs.version) != HEADER_V1))) {
-        error_setg(errp, "Image not in Bochs format");
-        return -EINVAL;
+        return -EMEDIUMTYPE;
    }

    if (le32_to_cpu(bochs.version) == HEADER_V1) {
-      memcpy(&header_v1, &bochs, sizeof(bochs));
-      bs->total_sectors = le64_to_cpu(header_v1.extra.redolog.disk) / 512;
+        bs->total_sectors = le64_to_cpu(bochs.extra.redolog_v1.disk) / 512;
    } else {
-      bs->total_sectors = le64_to_cpu(bochs.extra.redolog.disk) / 512;
+        bs->total_sectors = le64_to_cpu(bochs.extra.redolog.disk) / 512;
+    }
+
+    /* Limit to 1M entries to avoid unbounded allocation. This is what is
+     * needed for the largest image that bximage can create (~8 TB). */
+    s->catalog_size = le32_to_cpu(bochs.catalog);
+    if (s->catalog_size > 0x100000) {
+        qerror_report(ERROR_CLASS_GENERIC_ERROR,
+                      "Catalog size is too large");
+        return -EFBIG;
    }

-    s->catalog_size = le32_to_cpu(bochs.extra.redolog.catalog);
    s->catalog_bitmap = g_malloc(s->catalog_size * 4);

    ret = bdrv_pread(bs->file, le32_to_cpu(bochs.header), s->catalog_bitmap,
@@ -154,10 +143,27 @@ static int bochs_open(BlockDriverState *bs, QDict *options, int flags,

    s->data_offset = le32_to_cpu(bochs.header) + (s->catalog_size * 4);

-    s->bitmap_blocks = 1 + (le32_to_cpu(bochs.extra.redolog.bitmap) - 1) / 512;
-    s->extent_blocks = 1 + (le32_to_cpu(bochs.extra.redolog.extent) - 1) / 512;
+    s->bitmap_blocks = 1 + (le32_to_cpu(bochs.bitmap) - 1) / 512;
+    s->extent_blocks = 1 + (le32_to_cpu(bochs.extent) - 1) / 512;

-    s->extent_size = le32_to_cpu(bochs.extra.redolog.extent);
+    s->extent_size = le32_to_cpu(bochs.extent);
+    if (s->extent_size == 0) {
+        qerror_report(ERROR_CLASS_GENERIC_ERROR,
+                      "Extent size may not be zero");
+        return -EINVAL;
+    } else if (s->extent_size > 0x800000) {
+        qerror_report(ERROR_CLASS_GENERIC_ERROR,
+                      "Extent size %" PRIu32 " is too large",
+                      s->extent_size);
+        return -EINVAL;
+    }
+
+    if (s->catalog_size < bs->total_sectors / s->extent_size) {
+        qerror_report(ERROR_CLASS_GENERIC_ERROR,
+                      "Catalog size is too small for this disk size");
+        ret = -EINVAL;
+        goto fail;
+    }

    qemu_co_mutex_init(&s->lock);
    return 0;
@@ -170,8 +176,8 @@ fail:
 static int64_t seek_to_sector(BlockDriverState *bs, int64_t sector_num)
 {
    BDRVBochsState *s = bs->opaque;
-    int64_t offset = sector_num * 512;
-    int64_t extent_index, extent_offset, bitmap_offset;
+    uint64_t offset = sector_num * 512;
+    uint64_t extent_index, extent_offset, bitmap_offset;
    char bitmap_entry;

    // seek to sector
@@ -182,8 +188,9 @@ static int64_t seek_to_sector(BlockDriverState *bs, int64_t sector_num)
 	return -1; /* not allocated */
    }

-    bitmap_offset = s->data_offset + (512 * s->catalog_bitmap[extent_index] *
-	(s->extent_blocks + s->bitmap_blocks));
+    bitmap_offset = s->data_offset +
+        (512 * (uint64_t) s->catalog_bitmap[extent_index] *
+        (s->extent_blocks + s->bitmap_blocks));

    /* read in bitmap for current extent */
    if (bdrv_pread(bs->file, bitmap_offset + (extent_offset / 8),
--- a/block/cloop.c
+++ b/block/cloop.c
@@ -26,6 +26,9 @@
 #include "qemu/module.h"
 #include <zlib.h>

+/* Maximum compressed block size */
+#define MAX_BLOCK_SIZE (64 * 1024 * 1024)
+
 typedef struct BDRVCloopState {
    CoMutex lock;
    uint32_t block_size;
@@ -53,8 +56,7 @@ static int cloop_probe(const uint8_t *buf, int buf_size, const char *filename)
    return 0;
 }

-static int cloop_open(BlockDriverState *bs, QDict *options, int flags,
-                      Error **errp)
+static int cloop_open(BlockDriverState *bs, int flags)
 {
    BDRVCloopState *s = bs->opaque;
    uint32_t offsets_size, max_compressed_block_size = 1, i;
@@ -68,6 +70,29 @@ static int cloop_open(BlockDriverState *bs, QDict *options, int flags,
        return ret;
    }
    s->block_size = be32_to_cpu(s->block_size);
+    if (s->block_size % 512) {
+        qerror_report(ERROR_CLASS_GENERIC_ERROR,
+                      "block_size %u must be a multiple of 512",
+                      s->block_size);
+        return -EINVAL;
+    }
+    if (s->block_size == 0) {
+        qerror_report(ERROR_CLASS_GENERIC_ERROR,
+                      "block_size cannot be zero");
+        return -EINVAL;
+    }
+
+    /* cloop's create_compressed_fs.c warns about block sizes beyond 256 KB but
+     * we can accept more.  Prevent ridiculous values like 4 GB - 1 since we
+     * need a buffer this big.
+     */
+    if (s->block_size > MAX_BLOCK_SIZE) {
+        qerror_report(ERROR_CLASS_GENERIC_ERROR,
+                      "block_size %u must be %u MB or less",
+                      s->block_size,
+                      MAX_BLOCK_SIZE / (1024 * 1024));
+        return -EINVAL;
+    }

    ret = bdrv_pread(bs->file, 128 + 4, &s->n_blocks, 4);
    if (ret < 0) {
@@ -76,7 +101,25 @@ static int cloop_open(BlockDriverState *bs, QDict *options, int flags,
    s->n_blocks = be32_to_cpu(s->n_blocks);

    /* read offsets */
-    offsets_size = s->n_blocks * sizeof(uint64_t);
+    if (s->n_blocks > (UINT32_MAX - 1) / sizeof(uint64_t)) {
+        /* Prevent integer overflow */
+        qerror_report(ERROR_CLASS_GENERIC_ERROR,
+                      "n_blocks %u must be %zu or less",
+                      s->n_blocks,
+                      (UINT32_MAX - 1) / sizeof(uint64_t));
+        return -EINVAL;
+    }
+    offsets_size = (s->n_blocks + 1) * sizeof(uint64_t);
+    if (offsets_size > 512 * 1024 * 1024) {
+        /* Prevent ridiculous offsets_size which causes memory allocation to
+         * fail or overflows bdrv_pread() size.  In practice the 512 MB
+         * offsets[] limit supports 16 TB images at 256 KB block size.
+         */
+        qerror_report(ERROR_CLASS_GENERIC_ERROR,
+                      "image requires too many offsets, "
+                      "try increasing block size");
+        return -EINVAL;
+    }
    s->offsets = g_malloc(offsets_size);

    ret = bdrv_pread(bs->file, 128 + 4 + 4, s->offsets, offsets_size);
@@ -84,13 +127,39 @@ static int cloop_open(BlockDriverState *bs, QDict *options, int flags,
        goto fail;
    }

-    for(i=0;i<s->n_blocks;i++) {
+    for (i = 0; i < s->n_blocks + 1; i++) {
+        uint64_t size;
+
        s->offsets[i] = be64_to_cpu(s->offsets[i]);
-        if (i > 0) {
-            uint32_t size = s->offsets[i] - s->offsets[i - 1];
-            if (size > max_compressed_block_size) {
-                max_compressed_block_size = size;
-            }
+        if (i == 0) {
+            continue;
+        }
+
+        if (s->offsets[i] < s->offsets[i - 1]) {
+            qerror_report(ERROR_CLASS_GENERIC_ERROR,
+                          "offsets not monotonically increasing at "
+                          "index %u, image file is corrupt", i);
+            ret = -EINVAL;
+            goto fail;
+        }
+
+        size = s->offsets[i] - s->offsets[i - 1];
+
+        /* Compressed blocks should be smaller than the uncompressed block size
+         * but maybe compression performed poorly so the compressed block is
+         * actually bigger.  Clamp down on unrealistic values to prevent
+         * ridiculous s->compressed_block allocation.
+         */
+        if (size > 2 * MAX_BLOCK_SIZE) {
+            qerror_report(ERROR_CLASS_GENERIC_ERROR,
+                          "invalid compressed block size at index %u, "
+                          "image file is corrupt", i);
+            ret = -EINVAL;
+            goto fail;
+        }
+
+        if (size > max_compressed_block_size) {
+            max_compressed_block_size = size;
        }
    }

@@ -180,9 +249,7 @@ static coroutine_fn int cloop_co_read(BlockDriverState *bs, int64_t sector_num,
 static void cloop_close(BlockDriverState *bs)
 {
    BDRVCloopState *s = bs->opaque;
-    if (s->n_blocks > 0) {
-        g_free(s->offsets);
-    }
+    g_free(s->offsets);
    g_free(s->compressed_block);
    g_free(s->uncompressed_block);
    inflateEnd(&s->zstream);
--- a/block/commit.c
+++ b/block/commit.c
@@ -103,14 +103,14 @@ wait:
        /* Note that even when no rate limit is applied we need to yield
         * with no pending I/O here so that bdrv_drain_all() returns.
         */
-        block_job_sleep_ns(&s->common, QEMU_CLOCK_REALTIME, delay_ns);
+        block_job_sleep_ns(&s->common, rt_clock, delay_ns);
        if (block_job_is_cancelled(&s->common)) {
            break;
        }
        /* Copy if allocated above the base */
-        ret = bdrv_is_allocated_above(top, base, sector_num,
-                                      COMMIT_BUFFER_SIZE / BDRV_SECTOR_SIZE,
-                                      &n);
+        ret = bdrv_co_is_allocated_above(top, base, sector_num,
+                                         COMMIT_BUFFER_SIZE / BDRV_SECTOR_SIZE,
+                                         &n);
        copy = (ret == 1);
        trace_commit_one_iteration(s, sector_num, n, ret);
        if (copy) {
@@ -173,9 +173,9 @@ static void commit_set_speed(BlockJob *job, int64_t speed, Error **errp)
    ratelimit_set_speed(&s->limit, speed / BDRV_SECTOR_SIZE, SLICE_TIME);
 }

-static const BlockJobDriver commit_job_driver = {
+static BlockJobType commit_job_type = {
    .instance_size = sizeof(CommitBlockJob),
-    .job_type      = BLOCK_JOB_TYPE_COMMIT,
+    .job_type      = "commit",
    .set_speed     = commit_set_speed,
 };

@@ -198,7 +198,13 @@ void commit_start(BlockDriverState *bs, BlockDriverState *base,
        return;
    }

-    assert(top != bs);
+    /* Once we support top == active layer, remove this check */
+    if (top == bs) {
+        error_setg(errp,
+                   "Top image as the active layer is currently unsupported");
+        return;
+    }
+
    if (top == base) {
        error_setg(errp, "Invalid files for merge: top and base are the same");
        return;
@@ -232,7 +238,7 @@ void commit_start(BlockDriverState *bs, BlockDriverState *base,
    }


-    s = block_job_create(&commit_job_driver, bs, speed, cb, opaque, errp);
+    s = block_job_create(&commit_job_type, bs, speed, cb, opaque, errp);
    if (!s) {
        return;
    }
--- a/block/cow.c
+++ b/block/cow.c
@@ -58,8 +58,7 @@ static int cow_probe(const uint8_t *buf, int buf_size, const char *filename)
        return 0;
 }

-static int cow_open(BlockDriverState *bs, QDict *options, int flags,
-                    Error **errp)
+static int cow_open(BlockDriverState *bs, int flags)
 {
    BDRVCowState *s = bs->opaque;
    struct cow_header_v2 cow_header;
@@ -74,8 +73,7 @@ static int cow_open(BlockDriverState *bs, QDict *options, int flags,
    }

    if (be32_to_cpu(cow_header.magic) != COW_MAGIC) {
-        error_setg(errp, "Image not in COW format");
-        ret = -EINVAL;
+        ret = -EMEDIUMTYPE;
        goto fail;
    }

@@ -83,7 +81,7 @@ static int cow_open(BlockDriverState *bs, QDict *options, int flags,
        char version[64];
        snprintf(version, sizeof(version),
               "COW version %d", cow_header.version);
-        error_set(errp, QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
+        qerror_report(QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
            bs->device_name, "cow", version);
        ret = -ENOTSUP;
        goto fail;
@@ -104,45 +102,42 @@ static int cow_open(BlockDriverState *bs, QDict *options, int flags,
    return ret;
 }

-static inline void cow_set_bits(uint8_t *bitmap, int start, int64_t nb_sectors)
+/*
+ * XXX(hch): right now these functions are extremely inefficient.
+ * We should just read the whole bitmap we'll need in one go instead.
+ */
+static inline int cow_set_bit(BlockDriverState *bs, int64_t bitnum)
 {
-    int64_t bitnum = start, last = start + nb_sectors;
-    while (bitnum < last) {
-        if ((bitnum & 7) == 0 && bitnum + 8 <= last) {
-            bitmap[bitnum / 8] = 0xFF;
-            bitnum += 8;
-            continue;
-        }
-        bitmap[bitnum/8] |= (1 << (bitnum % 8));
-        bitnum++;
+    uint64_t offset = sizeof(struct cow_header_v2) + bitnum / 8;
+    uint8_t bitmap;
+    int ret;
+
+    ret = bdrv_pread(bs->file, offset, &bitmap, sizeof(bitmap));
+    if (ret < 0) {
+       return ret;
    }
+
+    bitmap |= (1 << (bitnum % 8));
+
+    ret = bdrv_pwrite_sync(bs->file, offset, &bitmap, sizeof(bitmap));
+    if (ret < 0) {
+       return ret;
+    }
+    return 0;
 }

-#define BITS_PER_BITMAP_SECTOR (512 * 8)
-
-/* Cannot use bitmap.c on big-endian machines.  */
-static int cow_test_bit(int64_t bitnum, const uint8_t *bitmap)
+static inline int is_bit_set(BlockDriverState *bs, int64_t bitnum)
 {
-    return (bitmap[bitnum / 8] & (1 << (bitnum & 7))) != 0;
-}
+    uint64_t offset = sizeof(struct cow_header_v2) + bitnum / 8;
+    uint8_t bitmap;
+    int ret;

-static int cow_find_streak(const uint8_t *bitmap, int value, int start, int nb_sectors)
-{
-    int streak_value = value ? 0xFF : 0;
-    int last = MIN(start + nb_sectors, BITS_PER_BITMAP_SECTOR);
-    int bitnum = start;
-    while (bitnum < last) {
-        if ((bitnum & 7) == 0 && bitmap[bitnum / 8] == streak_value) {
-            bitnum += 8;
-            continue;
-        }
-        if (cow_test_bit(bitnum, bitmap) == value) {
-            bitnum++;
-            continue;
-        }
-        break;
+    ret = bdrv_pread(bs->file, offset, &bitmap, sizeof(bitmap));
+    if (ret < 0) {
+       return ret;
    }
-    return MIN(bitnum, last) - start;
+
+    return !!(bitmap & (1 << (bitnum % 8)));
 }

 /* Return true if first block has been changed (ie. current version is
@@ -151,100 +146,40 @@ static int cow_find_streak(const uint8_t *bitmap, int value, int start, int nb_s
 static int coroutine_fn cow_co_is_allocated(BlockDriverState *bs,
        int64_t sector_num, int nb_sectors, int *num_same)
 {
-    int64_t bitnum = sector_num + sizeof(struct cow_header_v2) * 8;
-    uint64_t offset = (bitnum / 8) & -BDRV_SECTOR_SIZE;
-    bool first = true;
-    int changed = 0, same = 0;
+    int changed;

-    do {
-        int ret;
-        uint8_t bitmap[BDRV_SECTOR_SIZE];
-
-        bitnum &= BITS_PER_BITMAP_SECTOR - 1;
-        int sector_bits = MIN(nb_sectors, BITS_PER_BITMAP_SECTOR - bitnum);
-
-        ret = bdrv_pread(bs->file, offset, &bitmap, sizeof(bitmap));
-        if (ret < 0) {
-            return ret;
-        }
-
-        if (first) {
-            changed = cow_test_bit(bitnum, bitmap);
-            first = false;
-        }
-
-        same += cow_find_streak(bitmap, changed, bitnum, nb_sectors);
-
-        bitnum += sector_bits;
-        nb_sectors -= sector_bits;
-        offset += BDRV_SECTOR_SIZE;
-    } while (nb_sectors);
-
-    *num_same = same;
-    return changed;
-}
-
-static int64_t coroutine_fn cow_co_get_block_status(BlockDriverState *bs,
-        int64_t sector_num, int nb_sectors, int *num_same)
-{
-    BDRVCowState *s = bs->opaque;
-    int ret = cow_co_is_allocated(bs, sector_num, nb_sectors, num_same);
-    int64_t offset = s->cow_sectors_offset + (sector_num << BDRV_SECTOR_BITS);
-    if (ret < 0) {
-        return ret;
+    if (nb_sectors == 0) {
+	*num_same = nb_sectors;
+	return 0;
    }
-    return (ret ? BDRV_BLOCK_DATA : 0) | offset | BDRV_BLOCK_OFFSET_VALID;
+
+    changed = is_bit_set(bs, sector_num);
+    if (changed < 0) {
+        return 0; /* XXX: how to return I/O errors? */
+    }
+
+    for (*num_same = 1; *num_same < nb_sectors; (*num_same)++) {
+	if (is_bit_set(bs, sector_num + *num_same) != changed)
+	    break;
+    }
+
+    return changed;
 }

 static int cow_update_bitmap(BlockDriverState *bs, int64_t sector_num,
        int nb_sectors)
 {
-    int64_t bitnum = sector_num + sizeof(struct cow_header_v2) * 8;
-    uint64_t offset = (bitnum / 8) & -BDRV_SECTOR_SIZE;
-    bool first = true;
-    int sector_bits;
+    int error = 0;
+    int i;

-    for ( ; nb_sectors;
-            bitnum += sector_bits,
-            nb_sectors -= sector_bits,
-            offset += BDRV_SECTOR_SIZE) {
-        int ret, set;
-        uint8_t bitmap[BDRV_SECTOR_SIZE];
-
-        bitnum &= BITS_PER_BITMAP_SECTOR - 1;
-        sector_bits = MIN(nb_sectors, BITS_PER_BITMAP_SECTOR - bitnum);
-
-        ret = bdrv_pread(bs->file, offset, &bitmap, sizeof(bitmap));
-        if (ret < 0) {
-            return ret;
-        }
-
-        /* Skip over any already set bits */
-        set = cow_find_streak(bitmap, 1, bitnum, sector_bits);
-        bitnum += set;
-        sector_bits -= set;
-        nb_sectors -= set;
-        if (!sector_bits) {
-            continue;
-        }
-
-        if (first) {
-            ret = bdrv_flush(bs->file);
-            if (ret < 0) {
-                return ret;
-            }
-            first = false;
-        }
-
-        cow_set_bits(bitmap, bitnum, sector_bits);
-
-        ret = bdrv_pwrite(bs->file, offset, &bitmap, sizeof(bitmap));
-        if (ret < 0) {
-            return ret;
+    for (i = 0; i < nb_sectors; i++) {
+        error = cow_set_bit(bs, sector_num + i);
+        if (error) {
+            break;
        }
    }

-    return 0;
+    return error;
 }

 static int coroutine_fn cow_read(BlockDriverState *bs, int64_t sector_num,
@@ -254,11 +189,7 @@ static int coroutine_fn cow_read(BlockDriverState *bs, int64_t sector_num,
    int ret, n;

    while (nb_sectors > 0) {
-        ret = cow_co_is_allocated(bs, sector_num, nb_sectors, &n);
-        if (ret < 0) {
-            return ret;
-        }
-        if (ret) {
+        if (bdrv_co_is_allocated(bs, sector_num, nb_sectors, &n)) {
            ret = bdrv_pread(bs->file,
                        s->cow_sectors_offset + sector_num * 512,
                        buf, n * 512);
@@ -324,14 +255,12 @@ static void cow_close(BlockDriverState *bs)
 {
 }

-static int cow_create(const char *filename, QEMUOptionParameter *options,
-                      Error **errp)
+static int cow_create(const char *filename, QEMUOptionParameter *options)
 {
    struct cow_header_v2 cow_header;
    struct stat st;
    int64_t image_sectors = 0;
    const char *image_filename = NULL;
-    Error *local_err = NULL;
    int ret;
    BlockDriverState *cow_bs;

@@ -345,17 +274,13 @@ static int cow_create(const char *filename, QEMUOptionParameter *options,
        options++;
    }

-    ret = bdrv_create_file(filename, options, &local_err);
+    ret = bdrv_create_file(filename, options);
    if (ret < 0) {
-        error_propagate(errp, local_err);
        return ret;
    }

-    cow_bs = NULL;
-    ret = bdrv_open(&cow_bs, filename, NULL, NULL,
-                    BDRV_O_RDWR | BDRV_O_PROTOCOL, NULL, &local_err);
+    ret = bdrv_file_open(&cow_bs, filename, BDRV_O_RDWR);
    if (ret < 0) {
-        error_propagate(errp, local_err);
        return ret;
    }

@@ -389,7 +314,7 @@ static int cow_create(const char *filename, QEMUOptionParameter *options,
    }

 exit:
-    bdrv_unref(cow_bs);
+    bdrv_delete(cow_bs);
    return ret;
 }

@@ -415,11 +340,10 @@ static BlockDriver bdrv_cow = {
    .bdrv_open      = cow_open,
    .bdrv_close     = cow_close,
    .bdrv_create    = cow_create,
-    .bdrv_has_zero_init     = bdrv_has_zero_init_1,

    .bdrv_read              = cow_co_read,
    .bdrv_write             = cow_co_write,
-    .bdrv_co_get_block_status   = cow_co_get_block_status,
+    .bdrv_co_is_allocated   = cow_co_is_allocated,

    .create_options = cow_create_options,
 };
--- a/block/curl.c
+++ b/block/curl.c
@@ -34,11 +34,6 @@
 #define DPRINTF(fmt, ...) do { } while (0)
 #endif

-#if LIBCURL_VERSION_NUM >= 0x071000
-/* The multi interface timer callback was introduced in 7.16.0 */
-#define NEED_CURL_TIMER_CALLBACK
-#endif
-
 #define PROTOCOLS (CURLPROTO_HTTP | CURLPROTO_HTTPS | \
                   CURLPROTO_FTP | CURLPROTO_FTPS | \
                   CURLPROTO_TFTP)
@@ -82,33 +77,15 @@ typedef struct CURLState

 typedef struct BDRVCURLState {
    CURLM *multi;
-    QEMUTimer timer;
    size_t len;
    CURLState states[CURL_NUM_STATES];
    char *url;
    size_t readahead_size;
-    bool accept_range;
 } BDRVCURLState;

 static void curl_clean_state(CURLState *s);
 static void curl_multi_do(void *arg);
-
-#ifdef NEED_CURL_TIMER_CALLBACK
-static int curl_timer_cb(CURLM *multi, long timeout_ms, void *opaque)
-{
-    BDRVCURLState *s = opaque;
-
-    DPRINTF("CURL: timer callback timeout_ms %ld\n", timeout_ms);
-    if (timeout_ms == -1) {
-        timer_del(&s->timer);
-    } else {
-        int64_t timeout_ns = (int64_t)timeout_ms * 1000 * 1000;
-        timer_mod(&s->timer,
-                  qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + timeout_ns);
-    }
-    return 0;
-}
-#endif
+static int curl_aio_flush(void *opaque);

 static int curl_sock_cb(CURL *curl, curl_socket_t fd, int action,
                        void *s, void *sp)
@@ -116,31 +93,31 @@ static int curl_sock_cb(CURL *curl, curl_socket_t fd, int action,
    DPRINTF("CURL (AIO): Sock action %d on fd %d\n", action, fd);
    switch (action) {
        case CURL_POLL_IN:
-            qemu_aio_set_fd_handler(fd, curl_multi_do, NULL, s);
+            qemu_aio_set_fd_handler(fd, curl_multi_do, NULL, curl_aio_flush, s);
            break;
        case CURL_POLL_OUT:
-            qemu_aio_set_fd_handler(fd, NULL, curl_multi_do, s);
+            qemu_aio_set_fd_handler(fd, NULL, curl_multi_do, curl_aio_flush, s);
            break;
        case CURL_POLL_INOUT:
-            qemu_aio_set_fd_handler(fd, curl_multi_do, curl_multi_do, s);
+            qemu_aio_set_fd_handler(fd, curl_multi_do, curl_multi_do,
+                                    curl_aio_flush, s);
            break;
        case CURL_POLL_REMOVE:
-            qemu_aio_set_fd_handler(fd, NULL, NULL, NULL);
+            qemu_aio_set_fd_handler(fd, NULL, NULL, NULL, NULL);
            break;
    }

    return 0;
 }

-static size_t curl_header_cb(void *ptr, size_t size, size_t nmemb, void *opaque)
+static size_t curl_size_cb(void *ptr, size_t size, size_t nmemb, void *opaque)
 {
-    BDRVCURLState *s = opaque;
+    CURLState *s = ((CURLState*)opaque);
    size_t realsize = size * nmemb;
-    const char *accept_line = "Accept-Ranges: bytes";
+    size_t fsize;

-    if (realsize >= strlen(accept_line)
-        && strncmp((char *)ptr, accept_line, strlen(accept_line)) == 0) {
-        s->accept_range = true;
+    if(sscanf(ptr, "Content-Length: %zd", &fsize) == 1) {
+        s->s->len = fsize;
    }

    return realsize;
@@ -157,6 +134,11 @@ static size_t curl_read_cb(void *ptr, size_t size, size_t nmemb, void *opaque)
    if (!s || !s->orig_buf)
        goto read_end;

+    if (s->buf_off >= s->buf_len) {
+        /* buffer full, read nothing */
+        return 0;
+    }
+    realsize = MIN(realsize, s->buf_len - s->buf_off);
    memcpy(s->orig_buf + s->buf_off, ptr, realsize);
    s->buf_off += realsize;

@@ -232,10 +214,20 @@ static int curl_find_buf(BDRVCURLState *s, size_t start, size_t len,
    return FIND_RET_NONE;
 }

-static void curl_multi_read(BDRVCURLState *s)
+static void curl_multi_do(void *arg)
 {
+    BDRVCURLState *s = (BDRVCURLState *)arg;
+    int running;
+    int r;
    int msgs_in_queue;

+    if (!s->multi)
+        return;
+
+    do {
+        r = curl_multi_socket_all(s->multi, &running);
+    } while(r == CURLM_CALL_MULTI_PERFORM);
+
    /* Try to find done transfers, so we can free the easy
     * handle again. */
    do {
@@ -279,41 +271,6 @@ static void curl_multi_read(BDRVCURLState *s)
    } while(msgs_in_queue);
 }

-static void curl_multi_do(void *arg)
-{
-    BDRVCURLState *s = (BDRVCURLState *)arg;
-    int running;
-    int r;
-
-    if (!s->multi) {
-        return;
-    }
-
-    do {
-        r = curl_multi_socket_all(s->multi, &running);
-    } while(r == CURLM_CALL_MULTI_PERFORM);
-
-    curl_multi_read(s);
-}
-
-static void curl_multi_timeout_do(void *arg)
-{
-#ifdef NEED_CURL_TIMER_CALLBACK
-    BDRVCURLState *s = (BDRVCURLState *)arg;
-    int running;
-
-    if (!s->multi) {
-        return;
-    }
-
-    curl_multi_socket_action(s->multi, CURL_SOCKET_TIMEOUT, 0, &running);
-
-    curl_multi_read(s);
-#else
-    abort();
-#endif
-}
-
 static CURLState *curl_init_state(BDRVCURLState *s)
 {
    CURLState *state = NULL;
@@ -383,9 +340,11 @@ static void curl_clean_state(CURLState *s)
    s->in_use = 0;
 }

-static void curl_parse_filename(const char *filename, QDict *options,
-                                Error **errp)
+static int curl_open(BlockDriverState *bs, const char *filename, int flags)
 {
+    BDRVCURLState *s = bs->opaque;
+    CURLState *state = NULL;
+    double d;

    #define RA_OPTSTR ":readahead="
    char *file;
@@ -393,17 +352,19 @@ static void curl_parse_filename(const char *filename, QDict *options,
    const char *ra_val;
    int parse_state = 0;

+    static int inited = 0;
+
    file = g_strdup(filename);
+    s->readahead_size = READ_AHEAD_SIZE;

    /* Parse a trailing ":readahead=#:" param, if present. */
    ra = file + strlen(file) - 1;
    while (ra >= file) {
        if (parse_state == 0) {
-            if (*ra == ':') {
+            if (*ra == ':')
                parse_state++;
-            } else {
+            else
                break;
-            }
        } else if (parse_state == 1) {
            if (*ra > '9' || *ra < '0') {
                char *opt_start = ra - strlen(RA_OPTSTR) + 1;
@@ -412,71 +373,19 @@ static void curl_parse_filename(const char *filename, QDict *options,
                    ra_val = ra + 1;
                    ra -= strlen(RA_OPTSTR) - 1;
                    *ra = '\0';
-                    qdict_put(options, "readahead", qstring_from_str(ra_val));
+                    s->readahead_size = atoi(ra_val);
+                    break;
+                } else {
+                    break;
                }
-                break;
            }
        }
        ra--;
    }

-    qdict_put(options, "url", qstring_from_str(file));
-
-    g_free(file);
-}
-
-static QemuOptsList runtime_opts = {
-    .name = "curl",
-    .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
-    .desc = {
-        {
-            .name = "url",
-            .type = QEMU_OPT_STRING,
-            .help = "URL to open",
-        },
-        {
-            .name = "readahead",
-            .type = QEMU_OPT_SIZE,
-            .help = "Readahead size",
-        },
-        { /* end of list */ }
-    },
-};
-
-static int curl_open(BlockDriverState *bs, QDict *options, int flags,
-                     Error **errp)
-{
-    BDRVCURLState *s = bs->opaque;
-    CURLState *state = NULL;
-    QemuOpts *opts;
-    Error *local_err = NULL;
-    const char *file;
-    double d;
-
-    static int inited = 0;
-
-    if (flags & BDRV_O_RDWR) {
-        error_setg(errp, "curl block device does not support writes");
-        return -EROFS;
-    }
-
-    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
-    qemu_opts_absorb_qdict(opts, options, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        goto out_noclean;
-    }
-
-    s->readahead_size = qemu_opt_get_size(opts, "readahead", READ_AHEAD_SIZE);
    if ((s->readahead_size & 0x1ff) != 0) {
-        error_setg(errp, "HTTP_READAHEAD_SIZE %zd is not a multiple of 512",
-                   s->readahead_size);
-        goto out_noclean;
-    }
-
-    file = qemu_opt_get(opts, "url");
-    if (file == NULL) {
-        error_setg(errp, "curl block driver requires an 'url' option");
+        fprintf(stderr, "HTTP_READAHEAD_SIZE %zd is not a multiple of 512\n",
+                s->readahead_size);
        goto out_noclean;
    }

@@ -486,55 +395,38 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags,
    }

    DPRINTF("CURL: Opening %s\n", file);
-    s->url = g_strdup(file);
+    s->url = file;
    state = curl_init_state(s);
    if (!state)
        goto out_noclean;

    // Get file size

-    s->accept_range = false;
    curl_easy_setopt(state->curl, CURLOPT_NOBODY, 1);
-    curl_easy_setopt(state->curl, CURLOPT_HEADERFUNCTION,
-                     curl_header_cb);
-    curl_easy_setopt(state->curl, CURLOPT_HEADERDATA, s);
+    curl_easy_setopt(state->curl, CURLOPT_WRITEFUNCTION, (void *)curl_size_cb);
    if (curl_easy_perform(state->curl))
        goto out;
    curl_easy_getinfo(state->curl, CURLINFO_CONTENT_LENGTH_DOWNLOAD, &d);
+    curl_easy_setopt(state->curl, CURLOPT_WRITEFUNCTION, (void *)curl_read_cb);
+    curl_easy_setopt(state->curl, CURLOPT_NOBODY, 0);
    if (d)
        s->len = (size_t)d;
    else if(!s->len)
        goto out;
-    if ((!strncasecmp(s->url, "http://", strlen("http://"))
-        || !strncasecmp(s->url, "https://", strlen("https://")))
-        && !s->accept_range) {
-        pstrcpy(state->errmsg, CURL_ERROR_SIZE,
-                "Server does not support 'range' (byte ranges).");
-        goto out;
-    }
    DPRINTF("CURL: Size = %zd\n", s->len);

    curl_clean_state(state);
    curl_easy_cleanup(state->curl);
    state->curl = NULL;

-    aio_timer_init(bdrv_get_aio_context(bs), &s->timer,
-                   QEMU_CLOCK_REALTIME, SCALE_NS,
-                   curl_multi_timeout_do, s);
-
    // Now we know the file exists and its size, so let's
    // initialize the multi interface!

    s->multi = curl_multi_init();
-    curl_multi_setopt(s->multi, CURLMOPT_SOCKETDATA, s);
-    curl_multi_setopt(s->multi, CURLMOPT_SOCKETFUNCTION, curl_sock_cb);
-#ifdef NEED_CURL_TIMER_CALLBACK
-    curl_multi_setopt(s->multi, CURLMOPT_TIMERDATA, s);
-    curl_multi_setopt(s->multi, CURLMOPT_TIMERFUNCTION, curl_timer_cb);
-#endif
+    curl_multi_setopt( s->multi, CURLMOPT_SOCKETDATA, s); 
+    curl_multi_setopt( s->multi, CURLMOPT_SOCKETFUNCTION, curl_sock_cb ); 
    curl_multi_do(s);

-    qemu_opts_del(opts);
    return 0;

 out:
@@ -542,11 +434,25 @@ out:
    curl_easy_cleanup(state->curl);
    state->curl = NULL;
 out_noclean:
-    g_free(s->url);
-    qemu_opts_del(opts);
+    g_free(file);
    return -EINVAL;
 }

+static int curl_aio_flush(void *opaque)
+{
+    BDRVCURLState *s = opaque;
+    int i, j;
+
+    for (i=0; i < CURL_NUM_STATES; i++) {
+        for(j=0; j < CURL_NUM_ACB; j++) {
+            if (s->states[i].acb[j]) {
+                return 1;
+            }
+        }
+    }
+    return 0;
+}
+
 static void curl_aio_cancel(BlockDriverAIOCB *blockacb)
 {
    // Do we have to implement canceling? Seems to work without...
@@ -626,6 +532,12 @@ static BlockDriverAIOCB *curl_aio_readv(BlockDriverState *bs,
    acb->nb_sectors = nb_sectors;

    acb->bh = qemu_bh_new(curl_readv_bh_cb, acb);
+
+    if (!acb->bh) {
+        DPRINTF("CURL: qemu_bh_new failed\n");
+        return NULL;
+    }
+
    qemu_bh_schedule(acb->bh);
    return &acb->common;
 }
@@ -650,9 +562,6 @@ static void curl_close(BlockDriverState *bs)
    }
    if (s->multi)
        curl_multi_cleanup(s->multi);
-
-    timer_del(&s->timer);
-
    g_free(s->url);
 }

@@ -663,68 +572,63 @@ static int64_t curl_getlength(BlockDriverState *bs)
 }

 static BlockDriver bdrv_http = {
-    .format_name            = "http",
-    .protocol_name          = "http",
+    .format_name     = "http",
+    .protocol_name   = "http",

-    .instance_size          = sizeof(BDRVCURLState),
-    .bdrv_parse_filename    = curl_parse_filename,
-    .bdrv_file_open         = curl_open,
-    .bdrv_close             = curl_close,
-    .bdrv_getlength         = curl_getlength,
+    .instance_size   = sizeof(BDRVCURLState),
+    .bdrv_file_open  = curl_open,
+    .bdrv_close      = curl_close,
+    .bdrv_getlength  = curl_getlength,

-    .bdrv_aio_readv         = curl_aio_readv,
+    .bdrv_aio_readv  = curl_aio_readv,
 };

 static BlockDriver bdrv_https = {
-    .format_name            = "https",
-    .protocol_name          = "https",
+    .format_name     = "https",
+    .protocol_name   = "https",

-    .instance_size          = sizeof(BDRVCURLState),
-    .bdrv_parse_filename    = curl_parse_filename,
-    .bdrv_file_open         = curl_open,
-    .bdrv_close             = curl_close,
-    .bdrv_getlength         = curl_getlength,
+    .instance_size   = sizeof(BDRVCURLState),
+    .bdrv_file_open  = curl_open,
+    .bdrv_close      = curl_close,
+    .bdrv_getlength  = curl_getlength,

-    .bdrv_aio_readv         = curl_aio_readv,
+    .bdrv_aio_readv  = curl_aio_readv,
 };

 static BlockDriver bdrv_ftp = {
-    .format_name            = "ftp",
-    .protocol_name          = "ftp",
+    .format_name     = "ftp",
+    .protocol_name   = "ftp",

-    .instance_size          = sizeof(BDRVCURLState),
-    .bdrv_parse_filename    = curl_parse_filename,
-    .bdrv_file_open         = curl_open,
-    .bdrv_close             = curl_close,
-    .bdrv_getlength         = curl_getlength,
+    .instance_size   = sizeof(BDRVCURLState),
+    .bdrv_file_open  = curl_open,
+    .bdrv_close      = curl_close,
+    .bdrv_getlength  = curl_getlength,

-    .bdrv_aio_readv         = curl_aio_readv,
+    .bdrv_aio_readv  = curl_aio_readv,
 };

 static BlockDriver bdrv_ftps = {
-    .format_name            = "ftps",
-    .protocol_name          = "ftps",
+    .format_name     = "ftps",
+    .protocol_name   = "ftps",

-    .instance_size          = sizeof(BDRVCURLState),
-    .bdrv_parse_filename    = curl_parse_filename,
-    .bdrv_file_open         = curl_open,
-    .bdrv_close             = curl_close,
-    .bdrv_getlength         = curl_getlength,
+    .instance_size   = sizeof(BDRVCURLState),
+    .bdrv_file_open  = curl_open,
+    .bdrv_close      = curl_close,
+    .bdrv_getlength  = curl_getlength,

-    .bdrv_aio_readv         = curl_aio_readv,
+    .bdrv_aio_readv  = curl_aio_readv,
 };

 static BlockDriver bdrv_tftp = {
-    .format_name            = "tftp",
-    .protocol_name          = "tftp",
+    .format_name     = "tftp",
+    .protocol_name   = "tftp",

-    .instance_size          = sizeof(BDRVCURLState),
-    .bdrv_parse_filename    = curl_parse_filename,
-    .bdrv_file_open         = curl_open,
-    .bdrv_close             = curl_close,
-    .bdrv_getlength         = curl_getlength,
+    .instance_size   = sizeof(BDRVCURLState),
+    .bdrv_file_open  = curl_open,
+    .bdrv_close      = curl_close,
+    .bdrv_getlength  = curl_getlength,

-    .bdrv_aio_readv         = curl_aio_readv,
+    .bdrv_aio_readv  = curl_aio_readv,
 };

 static void curl_block_init(void)
--- a/block/dictzip.c
+++ b/block/dictzip.c
@@ -0,0 +1,572 @@
+/*
+ * DictZip Block driver for dictzip enabled gzip files
+ *
+ * Use the "dictzip" tool from the "dictd" package to create gzip files that
+ * contain the extra DictZip headers.
+ *
+ * dictzip(1) is a compression program which creates compressed files in the
+ * gzip format (see RFC 1952). However, unlike gzip(1), dictzip(1) compresses
+ * the file in pieces and stores an index to the pieces in the gzip header.
+ * This allows random access to the file at the granularity of the compressed
+ * pieces (currently about 64kB) while maintaining good compression ratios
+ * (within 5% of the expected ratio for dictionary data).
+ * dictd(8) uses files stored in this format.
+ *
+ * For details on DictZip see http://dict.org/.
+ *
+ * Copyright (c) 2009 Alexander Graf <agraf@suse.de>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu-common.h"
+#include "block/block_int.h"
+#include <zlib.h>
+
+// #define DEBUG
+
+#ifdef DEBUG
+#define dprintf(fmt, ...) do { printf("dzip: " fmt, ## __VA_ARGS__); } while (0)
+#else
+#define dprintf(fmt, ...) do { } while (0)
+#endif
+
+#define SECTOR_SIZE 512
+#define Z_STREAM_COUNT 4
+#define CACHE_COUNT 20
+
+/* magic values */
+
+#define GZ_MAGIC1     0x1f
+#define GZ_MAGIC2     0x8b
+#define DZ_MAGIC1      'R'
+#define DZ_MAGIC2      'A'
+
+#define GZ_FEXTRA     0x04      /* Optional field (random access index)    */
+#define GZ_FNAME      0x08      /* Original name                           */
+#define GZ_COMMENT    0x10      /* Zero-terminated, human-readable comment */
+#define GZ_FHCRC      0x02      /* Header CRC16                            */
+
+/* offsets */
+
+#define GZ_ID            0      /* GZ_MAGIC (16bit)                        */
+#define GZ_FLG           3      /* FLaGs (see above)                       */
+#define GZ_XLEN         10      /* eXtra LENgth (16bit)                    */
+#define GZ_SI           12      /* Subfield ID (16bit)                     */
+#define GZ_VERSION      16      /* Version for subfield format             */
+#define GZ_CHUNKSIZE    18      /* Chunk size (16bit)                      */
+#define GZ_CHUNKCNT     20      /* Number of chunks (16bit)                */
+#define GZ_RNDDATA      22      /* Random access data (16bit)              */
+
+#define GZ_99_CHUNKSIZE 18      /* Chunk size (32bit)                      */
+#define GZ_99_CHUNKCNT  22      /* Number of chunks (32bit)                */
+#define GZ_99_FILESIZE  26      /* Size of unpacked file (64bit)           */
+#define GZ_99_RNDDATA   34      /* Random access data (32bit)              */
+
+struct BDRVDictZipState;
+
+typedef struct DictZipAIOCB {
+    BlockDriverAIOCB common;
+    struct BDRVDictZipState *s;
+    QEMUIOVector *qiov;          /* QIOV of the original request */
+    QEMUIOVector *qiov_gz;       /* QIOV of the gz subrequest */
+    QEMUBH *bh;                  /* BH for cache */
+    z_stream *zStream;           /* stream to use for decoding */
+    int zStream_id;              /* stream id of the above pointer */
+    size_t start;                /* offset into the uncompressed file */
+    size_t len;                  /* uncompressed bytes to read */
+    uint8_t *gzipped;            /* the gzipped data */
+    uint8_t *buf;                /* cached result */
+    size_t gz_len;               /* amount of gzip data */
+    size_t gz_start;             /* uncompressed starting point of gzip data */
+    uint64_t offset;             /* offset for "start" into the uncompressed chunk */
+    int chunks_len;              /* amount of uncompressed data in all gzip data */
+} DictZipAIOCB;
+
+typedef struct dict_cache {
+    size_t start;
+    size_t len;
+    uint8_t *buf;
+} DictCache;
+
+typedef struct BDRVDictZipState {
+    BlockDriverState *hd;
+    z_stream zStream[Z_STREAM_COUNT];
+    DictCache cache[CACHE_COUNT];
+    int cache_index;
+    uint8_t  stream_in_use;
+    uint64_t chunk_len;
+    uint32_t chunk_cnt;
+    uint16_t *chunks;
+    uint32_t *chunks32;
+    uint64_t *offsets;
+    int64_t file_len;
+} BDRVDictZipState;
+
+static int dictzip_probe(const uint8_t *buf, int buf_size, const char *filename)
+{
+    if (buf_size < 2)
+        return 0;
+
+    /* We match on every gzip file */
+    if ((buf[0] == GZ_MAGIC1) && (buf[1] == GZ_MAGIC2))
+        return 100;
+
+    return 0;
+}
+
+static int start_zStream(z_stream *zStream)
+{
+    zStream->zalloc    = NULL;
+    zStream->zfree     = NULL;
+    zStream->opaque    = NULL;
+    zStream->next_in   = 0;
+    zStream->avail_in  = 0;
+    zStream->next_out  = NULL;
+    zStream->avail_out = 0;
+
+    return inflateInit2( zStream, -15 );
+}
+
+static int dictzip_open(BlockDriverState *bs, const char *filename, int flags)
+{
+    BDRVDictZipState *s = bs->opaque;
+    const char *err = "Unknown (read error?)";
+    uint8_t magic[2];
+    char buf[100];
+    uint8_t header_flags;
+    uint16_t chunk_len16;
+    uint16_t chunk_cnt16;
+    uint32_t chunk_len32;
+    uint16_t header_ver;
+    uint16_t tmp_short;
+    uint64_t offset;
+    int chunks_len;
+    int headerLength = GZ_XLEN - 1;
+    int rnd_offs;
+    int ret;
+    int i;
+    const char *fname = filename;
+
+    if (!strncmp(filename, "dzip://", 7))
+        fname += 7;
+    else if (!strncmp(filename, "dzip:", 5))
+        fname += 5;
+
+    ret = bdrv_file_open(&s->hd, fname, flags);
+    if (ret < 0)
+        return ret;
+
+    /* initialize zlib streams */
+    for (i = 0; i < Z_STREAM_COUNT; i++) {
+        if (start_zStream( &s->zStream[i] ) != Z_OK) {
+            err = s->zStream[i].msg;
+            goto fail;
+        }
+    }
+
+    /* gzip header */
+    if (bdrv_pread(s->hd, GZ_ID, &magic, sizeof(magic)) != sizeof(magic))
+        goto fail;
+
+    if (!((magic[0] == GZ_MAGIC1) && (magic[1] == GZ_MAGIC2))) {
+        err = "No gzip file";
+        goto fail;
+    }
+
+    /* dzip header */
+    if (bdrv_pread(s->hd, GZ_FLG, &header_flags, 1) != 1)
+        goto fail;
+
+    if (!(header_flags & GZ_FEXTRA)) {
+        err = "Not a dictzip file (wrong flags)";
+        goto fail;
+    }
+
+    /* extra length */
+    if (bdrv_pread(s->hd, GZ_XLEN, &tmp_short, 2) != 2)
+        goto fail;
+
+    headerLength += le16_to_cpu(tmp_short) + 2;
+
+    /* DictZip magic */
+    if (bdrv_pread(s->hd, GZ_SI, &magic, 2) != 2)
+        goto fail;
+
+    if (magic[0] != DZ_MAGIC1 || magic[1] != DZ_MAGIC2) {
+        err = "Not a dictzip file (missing extra magic)";
+        goto fail;
+    }
+
+    /* DictZip version */
+    if (bdrv_pread(s->hd, GZ_VERSION, &header_ver, 2) != 2)
+        goto fail;
+
+    header_ver = le16_to_cpu(header_ver);
+
+    switch (header_ver) {
+        case 1: /* Normal DictZip */
+            /* number of chunks */
+            if (bdrv_pread(s->hd, GZ_CHUNKSIZE, &chunk_len16, 2) != 2)
+                goto fail;
+
+            s->chunk_len = le16_to_cpu(chunk_len16);
+
+            /* chunk count */
+            if (bdrv_pread(s->hd, GZ_CHUNKCNT, &chunk_cnt16, 2) != 2)
+                goto fail;
+
+            s->chunk_cnt = le16_to_cpu(chunk_cnt16);
+            chunks_len = sizeof(short) * s->chunk_cnt;
+            rnd_offs = GZ_RNDDATA;
+            break;
+        case 99: /* Special Alex pigz version */
+            /* number of chunks */
+            if (bdrv_pread(s->hd, GZ_99_CHUNKSIZE, &chunk_len32, 4) != 4)
+                goto fail;
+
+            dprintf("chunk len [%#x] = %d\n", GZ_99_CHUNKSIZE, chunk_len32);
+            s->chunk_len = le32_to_cpu(chunk_len32);
+
+            /* chunk count */
+            if (bdrv_pread(s->hd, GZ_99_CHUNKCNT, &s->chunk_cnt, 4) != 4)
+                goto fail;
+
+            s->chunk_cnt = le32_to_cpu(s->chunk_cnt);
+
+            dprintf("chunk len | count = %d | %d\n", s->chunk_len, s->chunk_cnt);
+
+            /* file size */
+            if (bdrv_pread(s->hd, GZ_99_FILESIZE, &s->file_len, 8) != 8)
+                goto fail;
+
+            s->file_len = le64_to_cpu(s->file_len);
+            chunks_len = sizeof(int) * s->chunk_cnt;
+            rnd_offs = GZ_99_RNDDATA;
+            break;
+        default:
+            err = "Invalid DictZip version";
+            goto fail;
+    }
+
+    /* random access data */
+    s->chunks = g_malloc(chunks_len);
+    if (header_ver == 99)
+        s->chunks32 = (uint32_t *)s->chunks;
+
+    if (bdrv_pread(s->hd, rnd_offs, s->chunks, chunks_len) != chunks_len)
+        goto fail;
+
+    /* orig filename */
+    if (header_flags & GZ_FNAME) {
+        if (bdrv_pread(s->hd, headerLength + 1, buf, sizeof(buf)) != sizeof(buf))
+            goto fail;
+
+        buf[sizeof(buf) - 1] = '\0';
+        headerLength += strlen(buf) + 1;
+
+        if (strlen(buf) == sizeof(buf))
+            goto fail;
+
+        dprintf("filename: %s\n", buf);
+    }
+
+    /* comment field */
+    if (header_flags & GZ_COMMENT) {
+        if (bdrv_pread(s->hd, headerLength, buf, sizeof(buf)) != sizeof(buf))
+            goto fail;
+
+        buf[sizeof(buf) - 1] = '\0';
+        headerLength += strlen(buf) + 1;
+
+        if (strlen(buf) == sizeof(buf))
+            goto fail;
+
+        dprintf("comment: %s\n", buf);
+    }
+
+    if (header_flags & GZ_FHCRC)
+        headerLength += 2;
+
+    /* uncompressed file length*/
+    if (!s->file_len) {
+        uint32_t file_len;
+
+        if (bdrv_pread(s->hd, bdrv_getlength(s->hd) - 4, &file_len, 4) != 4)
+            goto fail;
+
+        s->file_len = le32_to_cpu(file_len);
+    }
+
+    /* compute offsets */
+    s->offsets = g_malloc(sizeof( *s->offsets ) * s->chunk_cnt);
+
+    for (offset = headerLength + 1, i = 0; i < s->chunk_cnt; i++) {
+        s->offsets[i] = offset;
+        switch (header_ver) {
+        case 1:
+            offset += le16_to_cpu(s->chunks[i]);
+            break;
+        case 99:
+            offset += le32_to_cpu(s->chunks32[i]);
+            break;
+        }
+
+        dprintf("chunk %#x - %#x = offset %#x -> %#x\n", i * s->chunk_len, (i+1) * s->chunk_len, s->offsets[i], offset);
+    }
+
+    return 0;
+
+fail:
+    fprintf(stderr, "DictZip: Error opening file: %s\n", err);
+    bdrv_delete(s->hd);
+    if (s->chunks)
+        g_free(s->chunks);
+    return -EINVAL;
+}
+
+/* This callback gets invoked when we have the result in cache already */
+static void dictzip_cache_cb(void *opaque)
+{
+    DictZipAIOCB *acb = (DictZipAIOCB *)opaque;
+
+    qemu_iovec_from_buf(acb->qiov, 0, acb->buf, acb->len);
+    acb->common.cb(acb->common.opaque, 0);
+    qemu_bh_delete(acb->bh);
+    qemu_aio_release(acb);
+}
+
+/* This callback gets invoked by the underlying block reader when we have
+ * all compressed data. We uncompress in here. */
+static void dictzip_read_cb(void *opaque, int ret)
+{
+    DictZipAIOCB *acb = (DictZipAIOCB *)opaque;
+    struct BDRVDictZipState *s = acb->s;
+    uint8_t *buf;
+    DictCache *cache;
+    int r, i;
+
+    buf = g_malloc(acb->chunks_len);
+
+    /* try to find zlib stream for decoding */
+    do {
+        for (i = 0; i < Z_STREAM_COUNT; i++) {
+            if (!(s->stream_in_use & (1 << i))) {
+                s->stream_in_use |= (1 << i);
+                acb->zStream_id = i;
+                acb->zStream = &s->zStream[i];
+                break;
+            }
+        }
+    } while(!acb->zStream);
+
+    /* sure, we could handle more streams, but this callback should be single
+       threaded and when it's not, we really want to know! */
+    assert(i == 0);
+
+    /* uncompress the chunk */
+    acb->zStream->next_in   = acb->gzipped;
+    acb->zStream->avail_in  = acb->gz_len;
+    acb->zStream->next_out  = buf;
+    acb->zStream->avail_out = acb->chunks_len;
+
+    r = inflate( acb->zStream,  Z_PARTIAL_FLUSH );
+    if ( (r != Z_OK) && (r != Z_STREAM_END) )
+        fprintf(stderr, "Error inflating: [%d] %s\n", r, acb->zStream->msg);
+
+    if ( r == Z_STREAM_END )
+        inflateReset(acb->zStream);
+
+    dprintf("inflating [%d] left: %d | %d bytes\n", r, acb->zStream->avail_in, acb->zStream->avail_out);
+    s->stream_in_use &= ~(1 << acb->zStream_id);
+
+    /* nofity the caller */
+    qemu_iovec_from_buf(acb->qiov, 0, buf + acb->offset, acb->len);
+    acb->common.cb(acb->common.opaque, 0);
+
+    /* fill the cache */
+    cache = &s->cache[s->cache_index];
+    s->cache_index++;
+    if (s->cache_index == CACHE_COUNT)
+        s->cache_index = 0;
+
+    cache->len = 0;
+    if (cache->buf)
+        g_free(cache->buf);
+    cache->start = acb->gz_start;
+    cache->buf = buf;
+    cache->len = acb->chunks_len;
+
+    /* free occupied ressources */
+    g_free(acb->qiov_gz);
+    qemu_aio_release(acb);
+}
+
+static void dictzip_aio_cancel(BlockDriverAIOCB *blockacb)
+{
+}
+
+static const AIOCBInfo dictzip_aiocb_info = {
+    .aiocb_size         = sizeof(DictZipAIOCB),
+    .cancel             = dictzip_aio_cancel,
+};
+
+/* This is where we get a request from a caller to read something */
+static BlockDriverAIOCB *dictzip_aio_readv(BlockDriverState *bs,
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+        BlockDriverCompletionFunc *cb, void *opaque)
+{
+    BDRVDictZipState *s = bs->opaque;
+    DictZipAIOCB *acb;
+    QEMUIOVector *qiov_gz;
+    struct iovec *iov;
+    uint8_t *buf;
+    size_t  start = sector_num * SECTOR_SIZE;
+    size_t  len = nb_sectors * SECTOR_SIZE;
+    size_t  end = start + len;
+    size_t  gz_start;
+    size_t  gz_len;
+    int64_t gz_sector_num;
+    int     gz_nb_sectors;
+    int     first_chunk, last_chunk;
+    int     first_offset;
+    int     i;
+
+    acb = qemu_aio_get(&dictzip_aiocb_info, bs, cb, opaque);
+    if (!acb)
+        return NULL;
+
+    /* Search Cache */
+    for (i = 0; i < CACHE_COUNT; i++) {
+        if (!s->cache[i].len)
+            continue;
+
+        if ((start >= s->cache[i].start) &&
+            (end <= (s->cache[i].start + s->cache[i].len))) {
+            acb->buf = s->cache[i].buf + (start - s->cache[i].start);
+            acb->len = len;
+            acb->qiov = qiov;
+            acb->bh = qemu_bh_new(dictzip_cache_cb, acb);
+            qemu_bh_schedule(acb->bh);
+
+            return &acb->common;
+        }
+    }
+
+    /* No cache, so let's decode */
+    /* We need to read these chunks */
+    first_chunk  = start / s->chunk_len;
+    first_offset = start - first_chunk * s->chunk_len;
+    last_chunk   = end / s->chunk_len;
+
+    gz_start = s->offsets[first_chunk];
+    gz_len = 0;
+    for (i = first_chunk; i <= last_chunk; i++) {
+        if (s->chunks32)
+            gz_len += le32_to_cpu(s->chunks32[i]);
+        else
+            gz_len += le16_to_cpu(s->chunks[i]);
+    }
+
+    gz_sector_num = gz_start / SECTOR_SIZE;
+    gz_nb_sectors = (gz_len / SECTOR_SIZE);
+
+    /* account for tail and heads */
+    while ((gz_start + gz_len) > ((gz_sector_num + gz_nb_sectors) * SECTOR_SIZE))
+        gz_nb_sectors++;
+
+    /* Allocate qiov, iov and buf in one chunk so we only need to free qiov */
+    qiov_gz = g_malloc0(sizeof(QEMUIOVector) + sizeof(struct iovec) +
+                           (gz_nb_sectors * SECTOR_SIZE));
+    iov = (struct iovec *)(((char *)qiov_gz) + sizeof(QEMUIOVector));
+    buf = ((uint8_t *)iov) + sizeof(struct iovec *);
+
+    /* Kick off the read by the backing file, so we can start decompressing */
+    iov->iov_base = (void *)buf;
+    iov->iov_len = gz_nb_sectors * 512;
+    qemu_iovec_init_external(qiov_gz, iov, 1);
+
+    dprintf("read %d - %d => %d - %d\n", start, end, gz_start, gz_start + gz_len);
+
+    acb->s = s;
+    acb->qiov = qiov;
+    acb->qiov_gz = qiov_gz;
+    acb->start = start;
+    acb->len = len;
+    acb->gzipped = buf + (gz_start % SECTOR_SIZE);
+    acb->gz_len = gz_len;
+    acb->gz_start = first_chunk * s->chunk_len;
+    acb->offset = first_offset;
+    acb->chunks_len = (last_chunk - first_chunk + 1) * s->chunk_len;
+
+    return bdrv_aio_readv(s->hd, gz_sector_num, qiov_gz, gz_nb_sectors,
+                          dictzip_read_cb, acb);
+}
+
+static void dictzip_close(BlockDriverState *bs)
+{
+    BDRVDictZipState *s = bs->opaque;
+    int i;
+
+    for (i = 0; i < CACHE_COUNT; i++) {
+        if (!s->cache[i].len)
+            continue;
+
+        g_free(s->cache[i].buf);
+    }
+
+    for (i = 0; i < Z_STREAM_COUNT; i++) {
+        inflateEnd(&s->zStream[i]);
+    }
+
+    if (s->chunks)
+        g_free(s->chunks);
+
+    if (s->offsets)
+        g_free(s->offsets);
+
+    dprintf("Close\n");
+}
+
+static int64_t dictzip_getlength(BlockDriverState *bs)
+{
+    BDRVDictZipState *s = bs->opaque;
+    dprintf("getlength -> %ld\n", s->file_len);
+    return s->file_len;
+}
+
+static BlockDriver bdrv_dictzip = {
+    .format_name     = "dzip",
+    .protocol_name   = "dzip",
+
+    .instance_size   = sizeof(BDRVDictZipState),
+    .bdrv_file_open  = dictzip_open,
+    .bdrv_close      = dictzip_close,
+    .bdrv_getlength  = dictzip_getlength,
+    .bdrv_probe      = dictzip_probe,
+
+    .bdrv_aio_readv  = dictzip_aio_readv,
+};
+
+static void dictzip_block_init(void)
+{
+    bdrv_register(&bdrv_dictzip);
+}
+
+block_init(dictzip_block_init);
--- a/block/dmg.c
+++ b/block/dmg.c
@@ -27,6 +27,14 @@
 #include "qemu/module.h"
 #include <zlib.h>

+enum {
+    /* Limit chunk sizes to prevent unreasonable amounts of memory being used
+     * or truncating when converting to 32-bit types
+     */
+    DMG_LENGTHS_MAX = 64 * 1024 * 1024, /* 64 MB */
+    DMG_SECTORCOUNTS_MAX = DMG_LENGTHS_MAX / 512,
+};
+
 typedef struct BDRVDMGState {
    CoMutex lock;
    /* each chunk contains a certain number of sectors,
@@ -51,16 +59,9 @@ typedef struct BDRVDMGState {

 static int dmg_probe(const uint8_t *buf, int buf_size, const char *filename)
 {
-    int len;
-
-    if (!filename) {
-        return 0;
-    }
-
-    len = strlen(filename);
-    if (len > 4 && !strcmp(filename + len - 4, ".dmg")) {
-        return 2;
-    }
+    int len=strlen(filename);
+    if(len>4 && !strcmp(filename+len-4,".dmg"))
+	return 2;
    return 0;
 }

@@ -92,13 +93,43 @@ static int read_uint32(BlockDriverState *bs, int64_t offset, uint32_t *result)
    return 0;
 }

-static int dmg_open(BlockDriverState *bs, QDict *options, int flags,
-                    Error **errp)
+/* Increase max chunk sizes, if necessary.  This function is used to calculate
+ * the buffer sizes needed for compressed/uncompressed chunk I/O.
+ */
+static void update_max_chunk_size(BDRVDMGState *s, uint32_t chunk,
+                                  uint32_t *max_compressed_size,
+                                  uint32_t *max_sectors_per_chunk)
+{
+    uint32_t compressed_size = 0;
+    uint32_t uncompressed_sectors = 0;
+
+    switch (s->types[chunk]) {
+    case 0x80000005: /* zlib compressed */
+        compressed_size = s->lengths[chunk];
+        uncompressed_sectors = s->sectorcounts[chunk];
+        break;
+    case 1: /* copy */
+        uncompressed_sectors = (s->lengths[chunk] + 511) / 512;
+        break;
+    case 2: /* zero */
+        uncompressed_sectors = s->sectorcounts[chunk];
+        break;
+    }
+
+    if (compressed_size > *max_compressed_size) {
+        *max_compressed_size = compressed_size;
+    }
+    if (uncompressed_sectors > *max_sectors_per_chunk) {
+        *max_sectors_per_chunk = uncompressed_sectors;
+    }
+}
+
+static int dmg_open(BlockDriverState *bs, int flags)
 {
    BDRVDMGState *s = bs->opaque;
-    uint64_t info_begin,info_end,last_in_offset,last_out_offset;
+    uint64_t info_begin, info_end, last_in_offset, last_out_offset;
    uint32_t count, tmp;
-    uint32_t max_compressed_size=1,max_sectors_per_chunk=1,i;
+    uint32_t max_compressed_size = 1, max_sectors_per_chunk = 1, i;
    int64_t offset;
    int ret;

@@ -160,37 +191,40 @@ static int dmg_open(BlockDriverState *bs, QDict *options, int flags,
            goto fail;
        }

-	if (type == 0x6d697368 && count >= 244) {
-	    int new_size, chunk_count;
+        if (type == 0x6d697368 && count >= 244) {
+            size_t new_size;
+            uint32_t chunk_count;

            offset += 4;
            offset += 200;

-	    chunk_count = (count-204)/40;
-	    new_size = sizeof(uint64_t) * (s->n_chunks + chunk_count);
-	    s->types = g_realloc(s->types, new_size/2);
-	    s->offsets = g_realloc(s->offsets, new_size);
-	    s->lengths = g_realloc(s->lengths, new_size);
-	    s->sectors = g_realloc(s->sectors, new_size);
-	    s->sectorcounts = g_realloc(s->sectorcounts, new_size);
+            chunk_count = (count - 204) / 40;
+            new_size = sizeof(uint64_t) * (s->n_chunks + chunk_count);
+            s->types = g_realloc(s->types, new_size / 2);
+            s->offsets = g_realloc(s->offsets, new_size);
+            s->lengths = g_realloc(s->lengths, new_size);
+            s->sectors = g_realloc(s->sectors, new_size);
+            s->sectorcounts = g_realloc(s->sectorcounts, new_size);

            for (i = s->n_chunks; i < s->n_chunks + chunk_count; i++) {
                ret = read_uint32(bs, offset, &s->types[i]);
                if (ret < 0) {
                    goto fail;
                }
-		offset += 4;
-		if(s->types[i]!=0x80000005 && s->types[i]!=1 && s->types[i]!=2) {
-		    if(s->types[i]==0xffffffff) {
-			last_in_offset = s->offsets[i-1]+s->lengths[i-1];
-			last_out_offset = s->sectors[i-1]+s->sectorcounts[i-1];
-		    }
-		    chunk_count--;
-		    i--;
-		    offset += 36;
-		    continue;
-		}
-		offset += 4;
+                offset += 4;
+                if (s->types[i] != 0x80000005 && s->types[i] != 1 &&
+                    s->types[i] != 2) {
+                    if (s->types[i] == 0xffffffff && i > 0) {
+                        last_in_offset = s->offsets[i - 1] + s->lengths[i - 1];
+                        last_out_offset = s->sectors[i - 1] +
+                                          s->sectorcounts[i - 1];
+                    }
+                    chunk_count--;
+                    i--;
+                    offset += 36;
+                    continue;
+                }
+                offset += 4;

                ret = read_uint64(bs, offset, &s->sectors[i]);
                if (ret < 0) {
@@ -205,6 +239,14 @@ static int dmg_open(BlockDriverState *bs, QDict *options, int flags,
                }
                offset += 8;

+                if (s->sectorcounts[i] > DMG_SECTORCOUNTS_MAX) {
+                    error_report("sector count %" PRIu64 " for chunk %u is "
+                                 "larger than max (%u)",
+                                 s->sectorcounts[i], i, DMG_SECTORCOUNTS_MAX);
+                    ret = -EINVAL;
+                    goto fail;
+                }
+
                ret = read_uint64(bs, offset, &s->offsets[i]);
                if (ret < 0) {
                    goto fail;
@@ -218,19 +260,25 @@ static int dmg_open(BlockDriverState *bs, QDict *options, int flags,
                }
                offset += 8;

-		if(s->lengths[i]>max_compressed_size)
-		    max_compressed_size = s->lengths[i];
-		if(s->sectorcounts[i]>max_sectors_per_chunk)
-		    max_sectors_per_chunk = s->sectorcounts[i];
-	    }
-	    s->n_chunks+=chunk_count;
-	}
+                if (s->lengths[i] > DMG_LENGTHS_MAX) {
+                    error_report("length %" PRIu64 " for chunk %u is larger "
+                                 "than max (%u)",
+                                 s->lengths[i], i, DMG_LENGTHS_MAX);
+                    ret = -EINVAL;
+                    goto fail;
+                }
+
+                update_max_chunk_size(s, i, &max_compressed_size,
+                                      &max_sectors_per_chunk);
+            }
+            s->n_chunks += chunk_count;
+        }
    }

    /* initialize zlib engine */
-    s->compressed_chunk = g_malloc(max_compressed_size+1);
-    s->uncompressed_chunk = g_malloc(512*max_sectors_per_chunk);
-    if(inflateInit(&s->zstream) != Z_OK) {
+    s->compressed_chunk = g_malloc(max_compressed_size + 1);
+    s->uncompressed_chunk = g_malloc(512 * max_sectors_per_chunk);
+    if (inflateInit(&s->zstream) != Z_OK) {
        ret = -EINVAL;
        goto fail;
    }
@@ -252,83 +300,82 @@ fail:
 }

 static inline int is_sector_in_chunk(BDRVDMGState* s,
-		uint32_t chunk_num,int sector_num)
+                uint32_t chunk_num, uint64_t sector_num)
 {
-    if(chunk_num>=s->n_chunks || s->sectors[chunk_num]>sector_num ||
-	    s->sectors[chunk_num]+s->sectorcounts[chunk_num]<=sector_num)
-	return 0;
-    else
-	return -1;
+    if (chunk_num >= s->n_chunks || s->sectors[chunk_num] > sector_num ||
+            s->sectors[chunk_num] + s->sectorcounts[chunk_num] <= sector_num) {
+        return 0;
+    } else {
+        return -1;
+    }
 }

-static inline uint32_t search_chunk(BDRVDMGState* s,int sector_num)
+static inline uint32_t search_chunk(BDRVDMGState *s, uint64_t sector_num)
 {
    /* binary search */
-    uint32_t chunk1=0,chunk2=s->n_chunks,chunk3;
-    while(chunk1!=chunk2) {
-	chunk3 = (chunk1+chunk2)/2;
-	if(s->sectors[chunk3]>sector_num)
-	    chunk2 = chunk3;
-	else if(s->sectors[chunk3]+s->sectorcounts[chunk3]>sector_num)
-	    return chunk3;
-	else
-	    chunk1 = chunk3;
+    uint32_t chunk1 = 0, chunk2 = s->n_chunks, chunk3;
+    while (chunk1 != chunk2) {
+        chunk3 = (chunk1 + chunk2) / 2;
+        if (s->sectors[chunk3] > sector_num) {
+            chunk2 = chunk3;
+        } else if (s->sectors[chunk3] + s->sectorcounts[chunk3] > sector_num) {
+            return chunk3;
+        } else {
+            chunk1 = chunk3;
+        }
    }
    return s->n_chunks; /* error */
 }

-static inline int dmg_read_chunk(BlockDriverState *bs, int sector_num)
+static inline int dmg_read_chunk(BlockDriverState *bs, uint64_t sector_num)
 {
    BDRVDMGState *s = bs->opaque;

-    if(!is_sector_in_chunk(s,s->current_chunk,sector_num)) {
-	int ret;
-	uint32_t chunk = search_chunk(s,sector_num);
+    if (!is_sector_in_chunk(s, s->current_chunk, sector_num)) {
+        int ret;
+        uint32_t chunk = search_chunk(s, sector_num);

-	if(chunk>=s->n_chunks)
-	    return -1;
+        if (chunk >= s->n_chunks) {
+            return -1;
+        }

-	s->current_chunk = s->n_chunks;
-	switch(s->types[chunk]) {
-	case 0x80000005: { /* zlib compressed */
-	    int i;
+        s->current_chunk = s->n_chunks;
+        switch (s->types[chunk]) {
+        case 0x80000005: { /* zlib compressed */
+            /* we need to buffer, because only the chunk as whole can be
+             * inflated. */
+            ret = bdrv_pread(bs->file, s->offsets[chunk],
+                             s->compressed_chunk, s->lengths[chunk]);
+            if (ret != s->lengths[chunk]) {
+                return -1;
+            }

-	    /* we need to buffer, because only the chunk as whole can be
-	     * inflated. */
-	    i=0;
-	    do {
-                ret = bdrv_pread(bs->file, s->offsets[chunk] + i,
-                                 s->compressed_chunk+i, s->lengths[chunk]-i);
-		if(ret<0 && errno==EINTR)
-		    ret=0;
-		i+=ret;
-	    } while(ret>=0 && ret+i<s->lengths[chunk]);
-
-	    if (ret != s->lengths[chunk])
-		return -1;
-
-	    s->zstream.next_in = s->compressed_chunk;
-	    s->zstream.avail_in = s->lengths[chunk];
-	    s->zstream.next_out = s->uncompressed_chunk;
-	    s->zstream.avail_out = 512*s->sectorcounts[chunk];
-	    ret = inflateReset(&s->zstream);
-	    if(ret != Z_OK)
-		return -1;
-	    ret = inflate(&s->zstream, Z_FINISH);
-	    if(ret != Z_STREAM_END || s->zstream.total_out != 512*s->sectorcounts[chunk])
-		return -1;
-	    break; }
-	case 1: /* copy */
-	    ret = bdrv_pread(bs->file, s->offsets[chunk],
+            s->zstream.next_in = s->compressed_chunk;
+            s->zstream.avail_in = s->lengths[chunk];
+            s->zstream.next_out = s->uncompressed_chunk;
+            s->zstream.avail_out = 512 * s->sectorcounts[chunk];
+            ret = inflateReset(&s->zstream);
+            if (ret != Z_OK) {
+                return -1;
+            }
+            ret = inflate(&s->zstream, Z_FINISH);
+            if (ret != Z_STREAM_END ||
+                s->zstream.total_out != 512 * s->sectorcounts[chunk]) {
+                return -1;
+            }
+            break; }
+        case 1: /* copy */
+            ret = bdrv_pread(bs->file, s->offsets[chunk],
                             s->uncompressed_chunk, s->lengths[chunk]);
-	    if (ret != s->lengths[chunk])
-		return -1;
-	    break;
-	case 2: /* zero */
-	    memset(s->uncompressed_chunk, 0, 512*s->sectorcounts[chunk]);
-	    break;
-	}
-	s->current_chunk = chunk;
+            if (ret != s->lengths[chunk]) {
+                return -1;
+            }
+            break;
+        case 2: /* zero */
+            memset(s->uncompressed_chunk, 0, 512 * s->sectorcounts[chunk]);
+            break;
+        }
+        s->current_chunk = chunk;
    }
    return 0;
 }
@@ -339,12 +386,14 @@ static int dmg_read(BlockDriverState *bs, int64_t sector_num,
    BDRVDMGState *s = bs->opaque;
    int i;

-    for(i=0;i<nb_sectors;i++) {
-	uint32_t sector_offset_in_chunk;
-	if(dmg_read_chunk(bs, sector_num+i) != 0)
-	    return -1;
-	sector_offset_in_chunk = sector_num+i-s->sectors[s->current_chunk];
-	memcpy(buf+i*512,s->uncompressed_chunk+sector_offset_in_chunk*512,512);
+    for (i = 0; i < nb_sectors; i++) {
+        uint32_t sector_offset_in_chunk;
+        if (dmg_read_chunk(bs, sector_num + i) != 0) {
+            return -1;
+        }
+        sector_offset_in_chunk = sector_num + i - s->sectors[s->current_chunk];
+        memcpy(buf + i * 512,
+               s->uncompressed_chunk + sector_offset_in_chunk * 512, 512);
    }
    return 0;
 }
@@ -376,12 +425,12 @@ static void dmg_close(BlockDriverState *bs)
 }

 static BlockDriver bdrv_dmg = {
-    .format_name	= "dmg",
-    .instance_size	= sizeof(BDRVDMGState),
-    .bdrv_probe		= dmg_probe,
-    .bdrv_open		= dmg_open,
-    .bdrv_read          = dmg_co_read,
-    .bdrv_close		= dmg_close,
+    .format_name    = "dmg",
+    .instance_size  = sizeof(BDRVDMGState),
+    .bdrv_probe     = dmg_probe,
+    .bdrv_open      = dmg_open,
+    .bdrv_read      = dmg_co_read,
+    .bdrv_close     = dmg_close,
 };

 static void bdrv_dmg_init(void)
--- a/block/gluster.c
+++ b/block/gluster.c
@@ -3,26 +3,43 @@
 *
 * Copyright (C) 2012 Bharata B Rao <bharata@linux.vnet.ibm.com>
 *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
+ * Pipe handling mechanism in AIO implementation is derived from
+ * block/rbd.c. Hence,
 *
+ * Copyright (C) 2010-2011 Christian Brunner <chb@muc.de>,
+ *                         Josh Durgin <josh.durgin@dreamhost.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ * Contributions after 2012-01-13 are licensed under the terms of the
+ * GNU GPL, version 2 or (at your option) any later version.
 */
 #include <glusterfs/api/glfs.h>
 #include "block/block_int.h"
+#include "qemu/sockets.h"
 #include "qemu/uri.h"

 typedef struct GlusterAIOCB {
+    BlockDriverAIOCB common;
    int64_t size;
    int ret;
+    bool *finished;
    QEMUBH *bh;
-    Coroutine *coroutine;
 } GlusterAIOCB;

 typedef struct BDRVGlusterState {
    struct glfs *glfs;
+    int fds[2];
    struct glfs_fd *fd;
+    int qemu_aio_count;
+    int event_reader_pos;
+    GlusterAIOCB *event_acb;
 } BDRVGlusterState;

+#define GLUSTER_FD_READ  0
+#define GLUSTER_FD_WRITE 1
+
 typedef struct GlusterConf {
    char *server;
    int port;
@@ -33,13 +50,11 @@ typedef struct GlusterConf {

 static void qemu_gluster_gconf_free(GlusterConf *gconf)
 {
-    if (gconf) {
-        g_free(gconf->server);
-        g_free(gconf->volname);
-        g_free(gconf->image);
-        g_free(gconf->transport);
-        g_free(gconf);
-    }
+    g_free(gconf->server);
+    g_free(gconf->volname);
+    g_free(gconf->image);
+    g_free(gconf->transport);
+    g_free(gconf);
 }

 static int parse_volume_options(GlusterConf *gconf, char *path)
@@ -117,7 +132,7 @@ static int qemu_gluster_parseuri(GlusterConf *gconf, const char *filename)
    }

    /* transport */
-    if (!uri->scheme || !strcmp(uri->scheme, "gluster")) {
+    if (!strcmp(uri->scheme, "gluster")) {
        gconf->transport = g_strdup("tcp");
    } else if (!strcmp(uri->scheme, "gluster+tcp")) {
        gconf->transport = g_strdup("tcp");
@@ -153,7 +168,7 @@ static int qemu_gluster_parseuri(GlusterConf *gconf, const char *filename)
        }
        gconf->server = g_strdup(qp->p[0].value);
    } else {
-        gconf->server = g_strdup(uri->server ? uri->server : "localhost");
+        gconf->server = g_strdup(uri->server);
        gconf->port = uri->port;
    }

@@ -165,8 +180,7 @@ out:
    return ret;
 }

-static struct glfs *qemu_gluster_init(GlusterConf *gconf, const char *filename,
-                                      Error **errp)
+static struct glfs *qemu_gluster_init(GlusterConf *gconf, const char *filename)
 {
    struct glfs *glfs = NULL;
    int ret;
@@ -174,8 +188,8 @@ static struct glfs *qemu_gluster_init(GlusterConf *gconf, const char *filename,

    ret = qemu_gluster_parseuri(gconf, filename);
    if (ret < 0) {
-        error_setg(errp, "Usage: file=gluster[+transport]://[server[:port]]/"
-                   "volname/image[?socket=...]");
+        error_report("Usage: file=gluster[+transport]://[server[:port]]/"
+            "volname/image[?socket=...]");
        errno = -ret;
        goto out;
    }
@@ -202,11 +216,9 @@ static struct glfs *qemu_gluster_init(GlusterConf *gconf, const char *filename,

    ret = glfs_init(glfs);
    if (ret) {
-        error_setg_errno(errp, errno,
-                         "Gluster connection failed for server=%s port=%d "
-                         "volume=%s image=%s transport=%s", gconf->server,
-                         gconf->port, gconf->volname, gconf->image,
-                         gconf->transport);
+        error_report("Gluster connection failed for server=%s port=%d "
+             "volume=%s image=%s transport=%s", gconf->server, gconf->port,
+             gconf->volname, gconf->image, gconf->transport);
        goto out;
    }
    return glfs;
@@ -220,101 +232,96 @@ out:
    return NULL;
 }

-static void qemu_gluster_complete_aio(void *opaque)
+static void qemu_gluster_complete_aio(GlusterAIOCB *acb, BDRVGlusterState *s)
 {
-    GlusterAIOCB *acb = (GlusterAIOCB *)opaque;
+    int ret;
+    bool *finished = acb->finished;
+    BlockDriverCompletionFunc *cb = acb->common.cb;
+    void *opaque = acb->common.opaque;

-    qemu_bh_delete(acb->bh);
-    acb->bh = NULL;
-    qemu_coroutine_enter(acb->coroutine, NULL);
-}
-
-/*
- * AIO callback routine called from GlusterFS thread.
- */
-static void gluster_finish_aiocb(struct glfs_fd *fd, ssize_t ret, void *arg)
-{
-    GlusterAIOCB *acb = (GlusterAIOCB *)arg;
-
-    if (!ret || ret == acb->size) {
-        acb->ret = 0; /* Success */
-    } else if (ret < 0) {
-        acb->ret = ret; /* Read/Write failed */
+    if (!acb->ret || acb->ret == acb->size) {
+        ret = 0; /* Success */
+    } else if (acb->ret < 0) {
+        ret = acb->ret; /* Read/Write failed */
    } else {
-        acb->ret = -EIO; /* Partial read/write - fail it */
+        ret = -EIO; /* Partial read/write - fail it */
    }

-    acb->bh = qemu_bh_new(qemu_gluster_complete_aio, acb);
-    qemu_bh_schedule(acb->bh);
+    s->qemu_aio_count--;
+    qemu_aio_release(acb);
+    cb(opaque, ret);
+    if (finished) {
+        *finished = true;
+    }
 }

-/* TODO Convert to fine grained options */
-static QemuOptsList runtime_opts = {
-    .name = "gluster",
-    .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
-    .desc = {
-        {
-            .name = "filename",
-            .type = QEMU_OPT_STRING,
-            .help = "URL to the gluster image",
-        },
-        { /* end of list */ }
-    },
-};
-
-static void qemu_gluster_parse_flags(int bdrv_flags, int *open_flags)
+static void qemu_gluster_aio_event_reader(void *opaque)
 {
-    assert(open_flags != NULL);
+    BDRVGlusterState *s = opaque;
+    ssize_t ret;

-    *open_flags |= O_BINARY;
+    do {
+        char *p = (char *)&s->event_acb;

-    if (bdrv_flags & BDRV_O_RDWR) {
-        *open_flags |= O_RDWR;
-    } else {
-        *open_flags |= O_RDONLY;
-    }
-
-    if ((bdrv_flags & BDRV_O_NOCACHE)) {
-        *open_flags |= O_DIRECT;
-    }
+        ret = read(s->fds[GLUSTER_FD_READ], p + s->event_reader_pos,
+                   sizeof(s->event_acb) - s->event_reader_pos);
+        if (ret > 0) {
+            s->event_reader_pos += ret;
+            if (s->event_reader_pos == sizeof(s->event_acb)) {
+                s->event_reader_pos = 0;
+                qemu_gluster_complete_aio(s->event_acb, s);
+            }
+        }
+    } while (ret < 0 && errno == EINTR);
 }

-static int qemu_gluster_open(BlockDriverState *bs,  QDict *options,
-                             int bdrv_flags, Error **errp)
+static int qemu_gluster_aio_flush_cb(void *opaque)
+{
+    BDRVGlusterState *s = opaque;
+
+    return (s->qemu_aio_count > 0);
+}
+
+static int qemu_gluster_open(BlockDriverState *bs, const char *filename,
+    int bdrv_flags)
 {
    BDRVGlusterState *s = bs->opaque;
-    int open_flags = 0;
+    int open_flags = O_BINARY;
    int ret = 0;
    GlusterConf *gconf = g_malloc0(sizeof(GlusterConf));
-    QemuOpts *opts;
-    Error *local_err = NULL;
-    const char *filename;

-    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
-    qemu_opts_absorb_qdict(opts, options, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        ret = -EINVAL;
-        goto out;
-    }
-
-    filename = qemu_opt_get(opts, "filename");
-
-    s->glfs = qemu_gluster_init(gconf, filename, errp);
+    s->glfs = qemu_gluster_init(gconf, filename);
    if (!s->glfs) {
        ret = -errno;
        goto out;
    }

-    qemu_gluster_parse_flags(bdrv_flags, &open_flags);
+    if (bdrv_flags & BDRV_O_RDWR) {
+        open_flags |= O_RDWR;
+    } else {
+        open_flags |= O_RDONLY;
+    }
+
+    if ((bdrv_flags & BDRV_O_NOCACHE)) {
+        open_flags |= O_DIRECT;
+    }

    s->fd = glfs_open(s->glfs, gconf->image, open_flags);
    if (!s->fd) {
        ret = -errno;
+        goto out;
    }

+    ret = qemu_pipe(s->fds);
+    if (ret < 0) {
+        ret = -errno;
+        goto out;
+    }
+    fcntl(s->fds[GLUSTER_FD_READ], F_SETFL, O_NONBLOCK);
+    qemu_aio_set_fd_handler(s->fds[GLUSTER_FD_READ],
+        qemu_gluster_aio_event_reader, NULL, qemu_gluster_aio_flush_cb, s);
+
 out:
-    qemu_opts_del(opts);
    qemu_gluster_gconf_free(gconf);
    if (!ret) {
        return ret;
@@ -328,180 +335,24 @@ out:
    return ret;
 }

-typedef struct BDRVGlusterReopenState {
-    struct glfs *glfs;
-    struct glfs_fd *fd;
-} BDRVGlusterReopenState;
-
-
-static int qemu_gluster_reopen_prepare(BDRVReopenState *state,
-                                       BlockReopenQueue *queue, Error **errp)
-{
-    int ret = 0;
-    BDRVGlusterReopenState *reop_s;
-    GlusterConf *gconf = NULL;
-    int open_flags = 0;
-
-    assert(state != NULL);
-    assert(state->bs != NULL);
-
-    state->opaque = g_malloc0(sizeof(BDRVGlusterReopenState));
-    reop_s = state->opaque;
-
-    qemu_gluster_parse_flags(state->flags, &open_flags);
-
-    gconf = g_malloc0(sizeof(GlusterConf));
-
-    reop_s->glfs = qemu_gluster_init(gconf, state->bs->filename, errp);
-    if (reop_s->glfs == NULL) {
-        ret = -errno;
-        goto exit;
-    }
-
-    reop_s->fd = glfs_open(reop_s->glfs, gconf->image, open_flags);
-    if (reop_s->fd == NULL) {
-        /* reops->glfs will be cleaned up in _abort */
-        ret = -errno;
-        goto exit;
-    }
-
-exit:
-    /* state->opaque will be freed in either the _abort or _commit */
-    qemu_gluster_gconf_free(gconf);
-    return ret;
-}
-
-static void qemu_gluster_reopen_commit(BDRVReopenState *state)
-{
-    BDRVGlusterReopenState *reop_s = state->opaque;
-    BDRVGlusterState *s = state->bs->opaque;
-
-
-    /* close the old */
-    if (s->fd) {
-        glfs_close(s->fd);
-    }
-    if (s->glfs) {
-        glfs_fini(s->glfs);
-    }
-
-    /* use the newly opened image / connection */
-    s->fd         = reop_s->fd;
-    s->glfs       = reop_s->glfs;
-
-    g_free(state->opaque);
-    state->opaque = NULL;
-
-    return;
-}
-
-
-static void qemu_gluster_reopen_abort(BDRVReopenState *state)
-{
-    BDRVGlusterReopenState *reop_s = state->opaque;
-
-    if (reop_s == NULL) {
-        return;
-    }
-
-    if (reop_s->fd) {
-        glfs_close(reop_s->fd);
-    }
-
-    if (reop_s->glfs) {
-        glfs_fini(reop_s->glfs);
-    }
-
-    g_free(state->opaque);
-    state->opaque = NULL;
-
-    return;
-}
-
-#ifdef CONFIG_GLUSTERFS_ZEROFILL
-static coroutine_fn int qemu_gluster_co_write_zeroes(BlockDriverState *bs,
-        int64_t sector_num, int nb_sectors, BdrvRequestFlags flags)
-{
-    int ret;
-    GlusterAIOCB *acb = g_slice_new(GlusterAIOCB);
-    BDRVGlusterState *s = bs->opaque;
-    off_t size = nb_sectors * BDRV_SECTOR_SIZE;
-    off_t offset = sector_num * BDRV_SECTOR_SIZE;
-
-    acb->size = size;
-    acb->ret = 0;
-    acb->coroutine = qemu_coroutine_self();
-
-    ret = glfs_zerofill_async(s->fd, offset, size, &gluster_finish_aiocb, acb);
-    if (ret < 0) {
-        ret = -errno;
-        goto out;
-    }
-
-    qemu_coroutine_yield();
-    ret = acb->ret;
-
-out:
-    g_slice_free(GlusterAIOCB, acb);
-    return ret;
-}
-
-static inline bool gluster_supports_zerofill(void)
-{
-    return 1;
-}
-
-static inline int qemu_gluster_zerofill(struct glfs_fd *fd, int64_t offset,
-        int64_t size)
-{
-    return glfs_zerofill(fd, offset, size);
-}
-
-#else
-static inline bool gluster_supports_zerofill(void)
-{
-    return 0;
-}
-
-static inline int qemu_gluster_zerofill(struct glfs_fd *fd, int64_t offset,
-        int64_t size)
-{
-    return 0;
-}
-#endif
-
 static int qemu_gluster_create(const char *filename,
-        QEMUOptionParameter *options, Error **errp)
+        QEMUOptionParameter *options)
 {
    struct glfs *glfs;
    struct glfs_fd *fd;
    int ret = 0;
-    int prealloc = 0;
    int64_t total_size = 0;
    GlusterConf *gconf = g_malloc0(sizeof(GlusterConf));

-    glfs = qemu_gluster_init(gconf, filename, errp);
+    glfs = qemu_gluster_init(gconf, filename);
    if (!glfs) {
-        ret = -EINVAL;
+        ret = -errno;
        goto out;
    }

    while (options && options->name) {
        if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
            total_size = options->value.n / BDRV_SECTOR_SIZE;
-        } else if (!strcmp(options->name, BLOCK_OPT_PREALLOC)) {
-            if (!options->value.s || !strcmp(options->value.s, "off")) {
-                prealloc = 0;
-            } else if (!strcmp(options->value.s, "full") &&
-                    gluster_supports_zerofill()) {
-                prealloc = 1;
-            } else {
-                error_setg(errp, "Invalid preallocation mode: '%s'"
-                    " or GlusterFS doesn't support zerofill API",
-                           options->value.s);
-                ret = -EINVAL;
-                goto out;
-            }
        }
        options++;
    }
@@ -511,15 +362,9 @@ static int qemu_gluster_create(const char *filename,
    if (!fd) {
        ret = -errno;
    } else {
-        if (!glfs_ftruncate(fd, total_size * BDRV_SECTOR_SIZE)) {
-            if (prealloc && qemu_gluster_zerofill(fd, 0,
-                    total_size * BDRV_SECTOR_SIZE)) {
-                ret = -errno;
-            }
-        } else {
+        if (glfs_ftruncate(fd, total_size * BDRV_SECTOR_SIZE) != 0) {
            ret = -errno;
        }
-
        if (glfs_close(fd) != 0) {
            ret = -errno;
        }
@@ -532,18 +377,72 @@ out:
    return ret;
 }

-static coroutine_fn int qemu_gluster_co_rw(BlockDriverState *bs,
-        int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, int write)
+static void qemu_gluster_aio_cancel(BlockDriverAIOCB *blockacb)
+{
+    GlusterAIOCB *acb = (GlusterAIOCB *)blockacb;
+    bool finished = false;
+
+    acb->finished = &finished;
+    while (!finished) {
+        qemu_aio_wait();
+    }
+}
+
+static const AIOCBInfo gluster_aiocb_info = {
+    .aiocb_size = sizeof(GlusterAIOCB),
+    .cancel = qemu_gluster_aio_cancel,
+};
+
+static void gluster_finish_aiocb(struct glfs_fd *fd, ssize_t ret, void *arg)
+{
+    GlusterAIOCB *acb = (GlusterAIOCB *)arg;
+    BlockDriverState *bs = acb->common.bs;
+    BDRVGlusterState *s = bs->opaque;
+    int retval;
+
+    acb->ret = ret;
+    retval = qemu_write_full(s->fds[GLUSTER_FD_WRITE], &acb, sizeof(acb));
+    if (retval != sizeof(acb)) {
+        /*
+         * Gluster AIO callback thread failed to notify the waiting
+         * QEMU thread about IO completion.
+         *
+         * Complete this IO request and make the disk inaccessible for
+         * subsequent reads and writes.
+         */
+        error_report("Gluster failed to notify QEMU about IO completion");
+
+        qemu_mutex_lock_iothread(); /* We are in gluster thread context */
+        acb->common.cb(acb->common.opaque, -EIO);
+        qemu_aio_release(acb);
+        s->qemu_aio_count--;
+        close(s->fds[GLUSTER_FD_READ]);
+        close(s->fds[GLUSTER_FD_WRITE]);
+        qemu_aio_set_fd_handler(s->fds[GLUSTER_FD_READ], NULL, NULL, NULL,
+            NULL);
+        bs->drv = NULL; /* Make the disk inaccessible */
+        qemu_mutex_unlock_iothread();
+    }
+}
+
+static BlockDriverAIOCB *qemu_gluster_aio_rw(BlockDriverState *bs,
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+        BlockDriverCompletionFunc *cb, void *opaque, int write)
 {
    int ret;
-    GlusterAIOCB *acb = g_slice_new(GlusterAIOCB);
+    GlusterAIOCB *acb;
    BDRVGlusterState *s = bs->opaque;
-    size_t size = nb_sectors * BDRV_SECTOR_SIZE;
-    off_t offset = sector_num * BDRV_SECTOR_SIZE;
+    size_t size;
+    off_t offset;

+    offset = sector_num * BDRV_SECTOR_SIZE;
+    size = nb_sectors * BDRV_SECTOR_SIZE;
+    s->qemu_aio_count++;
+
+    acb = qemu_aio_get(&gluster_aiocb_info, bs, cb, opaque);
    acb->size = size;
    acb->ret = 0;
-    acb->coroutine = qemu_coroutine_self();
+    acb->finished = NULL;

    if (write) {
        ret = glfs_pwritev_async(s->fd, qiov->iov, qiov->niov, offset, 0,
@@ -554,96 +453,55 @@ static coroutine_fn int qemu_gluster_co_rw(BlockDriverState *bs,
    }

    if (ret < 0) {
-        ret = -errno;
        goto out;
    }
-
-    qemu_coroutine_yield();
-    ret = acb->ret;
+    return &acb->common;

 out:
-    g_slice_free(GlusterAIOCB, acb);
-    return ret;
+    s->qemu_aio_count--;
+    qemu_aio_release(acb);
+    return NULL;
 }

-static int qemu_gluster_truncate(BlockDriverState *bs, int64_t offset)
+static BlockDriverAIOCB *qemu_gluster_aio_readv(BlockDriverState *bs,
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+        BlockDriverCompletionFunc *cb, void *opaque)
+{
+    return qemu_gluster_aio_rw(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
+}
+
+static BlockDriverAIOCB *qemu_gluster_aio_writev(BlockDriverState *bs,
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+        BlockDriverCompletionFunc *cb, void *opaque)
+{
+    return qemu_gluster_aio_rw(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
+}
+
+static BlockDriverAIOCB *qemu_gluster_aio_flush(BlockDriverState *bs,
+        BlockDriverCompletionFunc *cb, void *opaque)
 {
    int ret;
+    GlusterAIOCB *acb;
    BDRVGlusterState *s = bs->opaque;

-    ret = glfs_ftruncate(s->fd, offset);
-    if (ret < 0) {
-        return -errno;
-    }
-
-    return 0;
-}
-
-static coroutine_fn int qemu_gluster_co_readv(BlockDriverState *bs,
-        int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
-{
-    return qemu_gluster_co_rw(bs, sector_num, nb_sectors, qiov, 0);
-}
-
-static coroutine_fn int qemu_gluster_co_writev(BlockDriverState *bs,
-        int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
-{
-    return qemu_gluster_co_rw(bs, sector_num, nb_sectors, qiov, 1);
-}
-
-static coroutine_fn int qemu_gluster_co_flush_to_disk(BlockDriverState *bs)
-{
-    int ret;
-    GlusterAIOCB *acb = g_slice_new(GlusterAIOCB);
-    BDRVGlusterState *s = bs->opaque;
-
+    acb = qemu_aio_get(&gluster_aiocb_info, bs, cb, opaque);
    acb->size = 0;
    acb->ret = 0;
-    acb->coroutine = qemu_coroutine_self();
+    acb->finished = NULL;
+    s->qemu_aio_count++;

    ret = glfs_fsync_async(s->fd, &gluster_finish_aiocb, acb);
    if (ret < 0) {
-        ret = -errno;
        goto out;
    }
-
-    qemu_coroutine_yield();
-    ret = acb->ret;
+    return &acb->common;

 out:
-    g_slice_free(GlusterAIOCB, acb);
-    return ret;
+    s->qemu_aio_count--;
+    qemu_aio_release(acb);
+    return NULL;
 }

-#ifdef CONFIG_GLUSTERFS_DISCARD
-static coroutine_fn int qemu_gluster_co_discard(BlockDriverState *bs,
-        int64_t sector_num, int nb_sectors)
-{
-    int ret;
-    GlusterAIOCB *acb = g_slice_new(GlusterAIOCB);
-    BDRVGlusterState *s = bs->opaque;
-    size_t size = nb_sectors * BDRV_SECTOR_SIZE;
-    off_t offset = sector_num * BDRV_SECTOR_SIZE;
-
-    acb->size = 0;
-    acb->ret = 0;
-    acb->coroutine = qemu_coroutine_self();
-
-    ret = glfs_discard_async(s->fd, offset, size, &gluster_finish_aiocb, acb);
-    if (ret < 0) {
-        ret = -errno;
-        goto out;
-    }
-
-    qemu_coroutine_yield();
-    ret = acb->ret;
-
-out:
-    g_slice_free(GlusterAIOCB, acb);
-    return ret;
-}
-#endif
-
 static int64_t qemu_gluster_getlength(BlockDriverState *bs)
 {
    BDRVGlusterState *s = bs->opaque;
@@ -675,6 +533,10 @@ static void qemu_gluster_close(BlockDriverState *bs)
 {
    BDRVGlusterState *s = bs->opaque;

+    close(s->fds[GLUSTER_FD_READ]);
+    close(s->fds[GLUSTER_FD_WRITE]);
+    qemu_aio_set_fd_handler(s->fds[GLUSTER_FD_READ], NULL, NULL, NULL, NULL);
+
    if (s->fd) {
        glfs_close(s->fd);
        s->fd = NULL;
@@ -682,23 +544,12 @@ static void qemu_gluster_close(BlockDriverState *bs)
    glfs_fini(s->glfs);
 }

-static int qemu_gluster_has_zero_init(BlockDriverState *bs)
-{
-    /* GlusterFS volume could be backed by a block device */
-    return 0;
-}
-
 static QEMUOptionParameter qemu_gluster_create_options[] = {
    {
        .name = BLOCK_OPT_SIZE,
        .type = OPT_SIZE,
        .help = "Virtual disk size"
    },
-    {
-        .name = BLOCK_OPT_PREALLOC,
-        .type = OPT_STRING,
-        .help = "Preallocation mode (allowed values: off, full)"
-    },
    { NULL }
 };

@@ -706,26 +557,14 @@ static BlockDriver bdrv_gluster = {
    .format_name                  = "gluster",
    .protocol_name                = "gluster",
    .instance_size                = sizeof(BDRVGlusterState),
-    .bdrv_needs_filename          = true,
    .bdrv_file_open               = qemu_gluster_open,
-    .bdrv_reopen_prepare          = qemu_gluster_reopen_prepare,
-    .bdrv_reopen_commit           = qemu_gluster_reopen_commit,
-    .bdrv_reopen_abort            = qemu_gluster_reopen_abort,
    .bdrv_close                   = qemu_gluster_close,
    .bdrv_create                  = qemu_gluster_create,
    .bdrv_getlength               = qemu_gluster_getlength,
    .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size,
-    .bdrv_truncate                = qemu_gluster_truncate,
-    .bdrv_co_readv                = qemu_gluster_co_readv,
-    .bdrv_co_writev               = qemu_gluster_co_writev,
-    .bdrv_co_flush_to_disk        = qemu_gluster_co_flush_to_disk,
-    .bdrv_has_zero_init           = qemu_gluster_has_zero_init,
-#ifdef CONFIG_GLUSTERFS_DISCARD
-    .bdrv_co_discard              = qemu_gluster_co_discard,
-#endif
-#ifdef CONFIG_GLUSTERFS_ZEROFILL
-    .bdrv_co_write_zeroes         = qemu_gluster_co_write_zeroes,
-#endif
+    .bdrv_aio_readv               = qemu_gluster_aio_readv,
+    .bdrv_aio_writev              = qemu_gluster_aio_writev,
+    .bdrv_aio_flush               = qemu_gluster_aio_flush,
    .create_options               = qemu_gluster_create_options,
 };

@@ -733,26 +572,14 @@ static BlockDriver bdrv_gluster_tcp = {
    .format_name                  = "gluster",
    .protocol_name                = "gluster+tcp",
    .instance_size                = sizeof(BDRVGlusterState),
-    .bdrv_needs_filename          = true,
    .bdrv_file_open               = qemu_gluster_open,
-    .bdrv_reopen_prepare          = qemu_gluster_reopen_prepare,
-    .bdrv_reopen_commit           = qemu_gluster_reopen_commit,
-    .bdrv_reopen_abort            = qemu_gluster_reopen_abort,
    .bdrv_close                   = qemu_gluster_close,
    .bdrv_create                  = qemu_gluster_create,
    .bdrv_getlength               = qemu_gluster_getlength,
    .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size,
-    .bdrv_truncate                = qemu_gluster_truncate,
-    .bdrv_co_readv                = qemu_gluster_co_readv,
-    .bdrv_co_writev               = qemu_gluster_co_writev,
-    .bdrv_co_flush_to_disk        = qemu_gluster_co_flush_to_disk,
-    .bdrv_has_zero_init           = qemu_gluster_has_zero_init,
-#ifdef CONFIG_GLUSTERFS_DISCARD
-    .bdrv_co_discard              = qemu_gluster_co_discard,
-#endif
-#ifdef CONFIG_GLUSTERFS_ZEROFILL
-    .bdrv_co_write_zeroes         = qemu_gluster_co_write_zeroes,
-#endif
+    .bdrv_aio_readv               = qemu_gluster_aio_readv,
+    .bdrv_aio_writev              = qemu_gluster_aio_writev,
+    .bdrv_aio_flush               = qemu_gluster_aio_flush,
    .create_options               = qemu_gluster_create_options,
 };

@@ -760,26 +587,14 @@ static BlockDriver bdrv_gluster_unix = {
    .format_name                  = "gluster",
    .protocol_name                = "gluster+unix",
    .instance_size                = sizeof(BDRVGlusterState),
-    .bdrv_needs_filename          = true,
    .bdrv_file_open               = qemu_gluster_open,
-    .bdrv_reopen_prepare          = qemu_gluster_reopen_prepare,
-    .bdrv_reopen_commit           = qemu_gluster_reopen_commit,
-    .bdrv_reopen_abort            = qemu_gluster_reopen_abort,
    .bdrv_close                   = qemu_gluster_close,
    .bdrv_create                  = qemu_gluster_create,
    .bdrv_getlength               = qemu_gluster_getlength,
    .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size,
-    .bdrv_truncate                = qemu_gluster_truncate,
-    .bdrv_co_readv                = qemu_gluster_co_readv,
-    .bdrv_co_writev               = qemu_gluster_co_writev,
-    .bdrv_co_flush_to_disk        = qemu_gluster_co_flush_to_disk,
-    .bdrv_has_zero_init           = qemu_gluster_has_zero_init,
-#ifdef CONFIG_GLUSTERFS_DISCARD
-    .bdrv_co_discard              = qemu_gluster_co_discard,
-#endif
-#ifdef CONFIG_GLUSTERFS_ZEROFILL
-    .bdrv_co_write_zeroes         = qemu_gluster_co_write_zeroes,
-#endif
+    .bdrv_aio_readv               = qemu_gluster_aio_readv,
+    .bdrv_aio_writev              = qemu_gluster_aio_writev,
+    .bdrv_aio_flush               = qemu_gluster_aio_flush,
    .create_options               = qemu_gluster_create_options,
 };

@@ -787,26 +602,14 @@ static BlockDriver bdrv_gluster_rdma = {
    .format_name                  = "gluster",
    .protocol_name                = "gluster+rdma",
    .instance_size                = sizeof(BDRVGlusterState),
-    .bdrv_needs_filename          = true,
    .bdrv_file_open               = qemu_gluster_open,
-    .bdrv_reopen_prepare          = qemu_gluster_reopen_prepare,
-    .bdrv_reopen_commit           = qemu_gluster_reopen_commit,
-    .bdrv_reopen_abort            = qemu_gluster_reopen_abort,
    .bdrv_close                   = qemu_gluster_close,
    .bdrv_create                  = qemu_gluster_create,
    .bdrv_getlength               = qemu_gluster_getlength,
    .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size,
-    .bdrv_truncate                = qemu_gluster_truncate,
-    .bdrv_co_readv                = qemu_gluster_co_readv,
-    .bdrv_co_writev               = qemu_gluster_co_writev,
-    .bdrv_co_flush_to_disk        = qemu_gluster_co_flush_to_disk,
-    .bdrv_has_zero_init           = qemu_gluster_has_zero_init,
-#ifdef CONFIG_GLUSTERFS_DISCARD
-    .bdrv_co_discard              = qemu_gluster_co_discard,
-#endif
-#ifdef CONFIG_GLUSTERFS_ZEROFILL
-    .bdrv_co_write_zeroes         = qemu_gluster_co_write_zeroes,
-#endif
+    .bdrv_aio_readv               = qemu_gluster_aio_readv,
+    .bdrv_aio_writev              = qemu_gluster_aio_writev,
+    .bdrv_aio_flush               = qemu_gluster_aio_flush,
    .create_options               = qemu_gluster_create_options,
 };

--- a/block/iscsi.c
+++ b/block/iscsi.c
--- a/block/linux-aio.c
+++ b/block/linux-aio.c
@@ -39,6 +39,7 @@ struct qemu_laiocb {
 struct qemu_laio_state {
    io_context_t ctx;
    EventNotifier e;
+    int count;
 };

 static inline ssize_t io_event_ret(struct io_event *ev)
@@ -54,6 +55,8 @@ static void qemu_laio_process_completion(struct qemu_laio_state *s,
 {
    int ret;

+    s->count--;
+
    ret = laiocb->ret;
    if (ret != -ECANCELED) {
        if (ret == laiocb->nbytes) {
@@ -98,6 +101,13 @@ static void qemu_laio_completion_cb(EventNotifier *e)
    }
 }

+static int qemu_laio_flush_cb(EventNotifier *e)
+{
+    struct qemu_laio_state *s = container_of(e, struct qemu_laio_state, e);
+
+    return (s->count > 0) ? 1 : 0;
+}
+
 static void laio_cancel(BlockDriverAIOCB *blockacb)
 {
    struct qemu_laiocb *laiocb = (struct qemu_laiocb *)blockacb;
@@ -167,11 +177,14 @@ BlockDriverAIOCB *laio_submit(BlockDriverState *bs, void *aio_ctx, int fd,
        goto out_free_aiocb;
    }
    io_set_eventfd(&laiocb->iocb, event_notifier_get_fd(&s->e));
+    s->count++;

    if (io_submit(s->ctx, 1, &iocbs) < 0)
-        goto out_free_aiocb;
+        goto out_dec_count;
    return &laiocb->common;

+out_dec_count:
+    s->count--;
 out_free_aiocb:
    qemu_aio_release(laiocb);
    return NULL;
@@ -190,7 +203,8 @@ void *laio_init(void)
        goto out_close_efd;
    }

-    qemu_aio_set_event_notifier(&s->e, qemu_laio_completion_cb);
+    qemu_aio_set_event_notifier(&s->e, qemu_laio_completion_cb,
+                                qemu_laio_flush_cb);

    return s;

--- a/block/mirror.c
+++ b/block/mirror.c
@@ -31,8 +31,7 @@ typedef struct MirrorBlockJob {
    BlockJob common;
    RateLimit limit;
    BlockDriverState *target;
-    BlockDriverState *base;
-    bool is_none_mode;
+    MirrorSyncMode mode;
    BlockdevOnError on_source_error, on_target_error;
    bool synced;
    bool should_complete;
@@ -40,7 +39,6 @@ typedef struct MirrorBlockJob {
    int64_t granularity;
    size_t buf_size;
    unsigned long *cow_bitmap;
-    BdrvDirtyBitmap *dirty_bitmap;
    HBitmapIter hbi;
    uint8_t *buf;
    QSIMPLEQ_HEAD(, MirrorBuffer) buf_free;
@@ -96,7 +94,6 @@ static void mirror_iteration_done(MirrorOp *op, int ret)
        bitmap_set(s->cow_bitmap, chunk_num, nb_chunks);
    }

-    qemu_iovec_destroy(&op->qiov);
    g_slice_free(MirrorOp, op);
    qemu_coroutine_enter(s->common.co, NULL);
 }
@@ -148,10 +145,9 @@ static void coroutine_fn mirror_iteration(MirrorBlockJob *s)

    s->sector_num = hbitmap_iter_next(&s->hbi);
    if (s->sector_num < 0) {
-        bdrv_dirty_iter_init(source, s->dirty_bitmap, &s->hbi);
+        bdrv_dirty_iter_init(source, &s->hbi);
        s->sector_num = hbitmap_iter_next(&s->hbi);
-        trace_mirror_restart_iter(s,
-                                  bdrv_get_dirty_count(source, s->dirty_bitmap));
+        trace_mirror_restart_iter(s, bdrv_get_dirty_count(source));
        assert(s->sector_num >= 0);
    }

@@ -187,7 +183,7 @@ static void coroutine_fn mirror_iteration(MirrorBlockJob *s)
    do {
        int added_sectors, added_chunks;

-        if (!bdrv_get_dirty(source, s->dirty_bitmap, next_sector) ||
+        if (!bdrv_get_dirty(source, next_sector) ||
            test_bit(next_chunk, s->in_flight_bitmap)) {
            assert(nb_sectors > 0);
            break;
@@ -253,8 +249,7 @@ static void coroutine_fn mirror_iteration(MirrorBlockJob *s)
        /* Advance the HBitmapIter in parallel, so that we do not examine
         * the same sector twice.
         */
-        if (next_sector > hbitmap_next_sector
-            && bdrv_get_dirty(source, s->dirty_bitmap, next_sector)) {
+        if (next_sector > hbitmap_next_sector && bdrv_get_dirty(source, next_sector)) {
            hbitmap_next_sector = hbitmap_iter_next(&s->hbi);
        }

@@ -337,13 +332,14 @@ static void coroutine_fn mirror_run(void *opaque)
    sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS;
    mirror_free_init(s);

-    if (!s->is_none_mode) {
+    if (s->mode != MIRROR_SYNC_MODE_NONE) {
        /* First part, loop on the sectors and initialize the dirty bitmap.  */
-        BlockDriverState *base = s->base;
+        BlockDriverState *base;
+        base = s->mode == MIRROR_SYNC_MODE_FULL ? NULL : bs->backing_hd;
        for (sector_num = 0; sector_num < end; ) {
            int64_t next = (sector_num | (sectors_per_chunk - 1)) + 1;
-            ret = bdrv_is_allocated_above(bs, base,
-                                          sector_num, next - sector_num, &n);
+            ret = bdrv_co_is_allocated_above(bs, base,
+                                             sector_num, next - sector_num, &n);

            if (ret < 0) {
                goto immediate_exit;
@@ -359,8 +355,8 @@ static void coroutine_fn mirror_run(void *opaque)
        }
    }

-    bdrv_dirty_iter_init(bs, s->dirty_bitmap, &s->hbi);
-    last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
+    bdrv_dirty_iter_init(bs, &s->hbi);
+    last_pause_ns = qemu_get_clock_ns(rt_clock);
    for (;;) {
        uint64_t delay_ns;
        int64_t cnt;
@@ -371,14 +367,14 @@ static void coroutine_fn mirror_run(void *opaque)
            goto immediate_exit;
        }

-        cnt = bdrv_get_dirty_count(bs, s->dirty_bitmap);
+        cnt = bdrv_get_dirty_count(bs);

        /* Note that even when no rate limit is applied we need to yield
         * periodically with no pending I/O so that qemu_aio_flush() returns.
         * We do so every SLICE_TIME nanoseconds, or when there is an error,
         * or when the source is clean, whichever comes first.
         */
-        if (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - last_pause_ns < SLICE_TIME &&
+        if (qemu_get_clock_ns(rt_clock) - last_pause_ns < SLICE_TIME &&
            s->common.iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
            if (s->in_flight == MAX_IN_FLIGHT || s->buf_free_count == 0 ||
                (cnt == 0 && s->in_flight > 0)) {
@@ -413,7 +409,7 @@ static void coroutine_fn mirror_run(void *opaque)

                should_complete = s->should_complete ||
                    block_job_is_cancelled(&s->common);
-                cnt = bdrv_get_dirty_count(bs, s->dirty_bitmap);
+                cnt = bdrv_get_dirty_count(bs);
            }
        }

@@ -428,7 +424,7 @@ static void coroutine_fn mirror_run(void *opaque)
             */
            trace_mirror_before_drain(s, cnt);
            bdrv_drain_all();
-            cnt = bdrv_get_dirty_count(bs, s->dirty_bitmap);
+            cnt = bdrv_get_dirty_count(bs);
        }

        ret = 0;
@@ -443,13 +439,13 @@ static void coroutine_fn mirror_run(void *opaque)
                delay_ns = 0;
            }

-            block_job_sleep_ns(&s->common, QEMU_CLOCK_REALTIME, delay_ns);
+            block_job_sleep_ns(&s->common, rt_clock, delay_ns);
            if (block_job_is_cancelled(&s->common)) {
                break;
            }
        } else if (!should_complete) {
            delay_ns = (s->in_flight == 0 && cnt == 0 ? SLICE_TIME : 0);
-            block_job_sleep_ns(&s->common, QEMU_CLOCK_REALTIME, delay_ns);
+            block_job_sleep_ns(&s->common, rt_clock, delay_ns);
        } else if (cnt == 0) {
            /* The two disks are in sync.  Exit and report successful
             * completion.
@@ -458,7 +454,7 @@ static void coroutine_fn mirror_run(void *opaque)
            s->common.cancelled = false;
            break;
        }
-        last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
+        last_pause_ns = qemu_get_clock_ns(rt_clock);
    }

 immediate_exit:
@@ -475,22 +471,16 @@ immediate_exit:
    qemu_vfree(s->buf);
    g_free(s->cow_bitmap);
    g_free(s->in_flight_bitmap);
-    bdrv_release_dirty_bitmap(bs, s->dirty_bitmap);
+    bdrv_set_dirty_tracking(bs, 0);
    bdrv_iostatus_disable(s->target);
    if (s->should_complete && ret == 0) {
        if (bdrv_get_flags(s->target) != bdrv_get_flags(s->common.bs)) {
            bdrv_reopen(s->target, bdrv_get_flags(s->common.bs), NULL);
        }
        bdrv_swap(s->target, s->common.bs);
-        if (s->common.driver->job_type == BLOCK_JOB_TYPE_COMMIT) {
-            /* drop the bs loop chain formed by the swap: break the loop then
-             * trigger the unref from the top one */
-            BlockDriverState *p = s->base->backing_hd;
-            s->base->backing_hd = NULL;
-            bdrv_unref(p);
-        }
    }
-    bdrv_unref(s->target);
+    bdrv_close(s->target);
+    bdrv_delete(s->target);
    block_job_completed(&s->common, ret);
 }

@@ -515,12 +505,14 @@ static void mirror_iostatus_reset(BlockJob *job)
 static void mirror_complete(BlockJob *job, Error **errp)
 {
    MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
-    Error *local_err = NULL;
    int ret;

-    ret = bdrv_open_backing_file(s->target, NULL, &local_err);
+    ret = bdrv_open_backing_file(s->target);
    if (ret < 0) {
-        error_propagate(errp, local_err);
+        char backing_filename[PATH_MAX];
+        bdrv_get_full_backing_filename(s->target, backing_filename,
+                                       sizeof(backing_filename));
+        error_set(errp, QERR_OPEN_FILE_FAILED, backing_filename);
        return;
    }
    if (!s->synced) {
@@ -532,32 +524,20 @@ static void mirror_complete(BlockJob *job, Error **errp)
    block_job_resume(job);
 }

-static const BlockJobDriver mirror_job_driver = {
+static BlockJobType mirror_job_type = {
    .instance_size = sizeof(MirrorBlockJob),
-    .job_type      = BLOCK_JOB_TYPE_MIRROR,
+    .job_type      = "mirror",
    .set_speed     = mirror_set_speed,
    .iostatus_reset= mirror_iostatus_reset,
    .complete      = mirror_complete,
 };

-static const BlockJobDriver commit_active_job_driver = {
-    .instance_size = sizeof(MirrorBlockJob),
-    .job_type      = BLOCK_JOB_TYPE_COMMIT,
-    .set_speed     = mirror_set_speed,
-    .iostatus_reset
-                   = mirror_iostatus_reset,
-    .complete      = mirror_complete,
-};
-
-static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
-                            int64_t speed, int64_t granularity,
-                            int64_t buf_size,
-                            BlockdevOnError on_source_error,
-                            BlockdevOnError on_target_error,
-                            BlockDriverCompletionFunc *cb,
-                            void *opaque, Error **errp,
-                            const BlockJobDriver *driver,
-                            bool is_none_mode, BlockDriverState *base)
+void mirror_start(BlockDriverState *bs, BlockDriverState *target,
+                  int64_t speed, int64_t granularity, int64_t buf_size,
+                  MirrorSyncMode mode, BlockdevOnError on_source_error,
+                  BlockdevOnError on_target_error,
+                  BlockDriverCompletionFunc *cb,
+                  void *opaque, Error **errp)
 {
    MirrorBlockJob *s;

@@ -582,8 +562,7 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
        return;
    }

-
-    s = block_job_create(driver, bs, speed, cb, opaque, errp);
+    s = block_job_create(&mirror_job_type, bs, speed, cb, opaque, errp);
    if (!s) {
        return;
    }
@@ -591,12 +570,11 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
    s->on_source_error = on_source_error;
    s->on_target_error = on_target_error;
    s->target = target;
-    s->is_none_mode = is_none_mode;
-    s->base = base;
+    s->mode = mode;
    s->granularity = granularity;
    s->buf_size = MAX(buf_size, granularity);

-    s->dirty_bitmap = bdrv_create_dirty_bitmap(bs, granularity);
+    bdrv_set_dirty_tracking(bs, granularity);
    bdrv_set_enable_write_cache(s->target, true);
    bdrv_set_on_error(s->target, on_target_error, on_target_error);
    bdrv_iostatus_enable(s->target);
@@ -604,80 +582,3 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
    trace_mirror_start(bs, s, s->common.co, opaque);
    qemu_coroutine_enter(s->common.co, s);
 }
-
-void mirror_start(BlockDriverState *bs, BlockDriverState *target,
-                  int64_t speed, int64_t granularity, int64_t buf_size,
-                  MirrorSyncMode mode, BlockdevOnError on_source_error,
-                  BlockdevOnError on_target_error,
-                  BlockDriverCompletionFunc *cb,
-                  void *opaque, Error **errp)
-{
-    bool is_none_mode;
-    BlockDriverState *base;
-
-    is_none_mode = mode == MIRROR_SYNC_MODE_NONE;
-    base = mode == MIRROR_SYNC_MODE_TOP ? bs->backing_hd : NULL;
-    mirror_start_job(bs, target, speed, granularity, buf_size,
-                     on_source_error, on_target_error, cb, opaque, errp,
-                     &mirror_job_driver, is_none_mode, base);
-}
-
-void commit_active_start(BlockDriverState *bs, BlockDriverState *base,
-                         int64_t speed,
-                         BlockdevOnError on_error,
-                         BlockDriverCompletionFunc *cb,
-                         void *opaque, Error **errp)
-{
-    int64_t length, base_length;
-    int orig_base_flags;
-    int ret;
-    Error *local_err = NULL;
-
-    orig_base_flags = bdrv_get_flags(base);
-
-    if (bdrv_reopen(base, bs->open_flags, errp)) {
-        return;
-    }
-
-    length = bdrv_getlength(bs);
-    if (length < 0) {
-        error_setg_errno(errp, -length,
-                         "Unable to determine length of %s", bs->filename);
-        goto error_restore_flags;
-    }
-
-    base_length = bdrv_getlength(base);
-    if (base_length < 0) {
-        error_setg_errno(errp, -base_length,
-                         "Unable to determine length of %s", base->filename);
-        goto error_restore_flags;
-    }
-
-    if (length > base_length) {
-        ret = bdrv_truncate(base, length);
-        if (ret < 0) {
-            error_setg_errno(errp, -ret,
-                            "Top image %s is larger than base image %s, and "
-                             "resize of base image failed",
-                             bs->filename, base->filename);
-            goto error_restore_flags;
-        }
-    }
-
-    bdrv_ref(base);
-    mirror_start_job(bs, base, speed, 0, 0,
-                     on_error, on_error, cb, opaque, &local_err,
-                     &commit_active_job_driver, false, base);
-    if (error_is_set(&local_err)) {
-        error_propagate(errp, local_err);
-        goto error_restore_flags;
-    }
-
-    return;
-
-error_restore_flags:
-    /* ignore error and errp for bdrv_reopen, because we want to propagate
-     * the original error */
-    bdrv_reopen(base, orig_base_flags, NULL);
-    return;
-}
--- a/block/nbd-client.c
+++ b/block/nbd-client.c
@@ -1,385 +0,0 @@
-/*
- * QEMU Block driver for  NBD
- *
- * Copyright (C) 2008 Bull S.A.S.
- *     Author: Laurent Vivier <Laurent.Vivier@bull.net>
- *
- * Some parts:
- *    Copyright (C) 2007 Anthony Liguori <anthony@codemonkey.ws>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-#include "nbd-client.h"
-#include "qemu/sockets.h"
-
-#define HANDLE_TO_INDEX(bs, handle) ((handle) ^ ((uint64_t)(intptr_t)bs))
-#define INDEX_TO_HANDLE(bs, index)  ((index)  ^ ((uint64_t)(intptr_t)bs))
-
-static void nbd_recv_coroutines_enter_all(NbdClientSession *s)
-{
-    int i;
-
-    for (i = 0; i < MAX_NBD_REQUESTS; i++) {
-        if (s->recv_coroutine[i]) {
-            qemu_coroutine_enter(s->recv_coroutine[i], NULL);
-        }
-    }
-}
-
-static void nbd_reply_ready(void *opaque)
-{
-    NbdClientSession *s = opaque;
-    uint64_t i;
-    int ret;
-
-    if (s->reply.handle == 0) {
-        /* No reply already in flight.  Fetch a header.  It is possible
-         * that another thread has done the same thing in parallel, so
-         * the socket is not readable anymore.
-         */
-        ret = nbd_receive_reply(s->sock, &s->reply);
-        if (ret == -EAGAIN) {
-            return;
-        }
-        if (ret < 0) {
-            s->reply.handle = 0;
-            goto fail;
-        }
-    }
-
-    /* There's no need for a mutex on the receive side, because the
-     * handler acts as a synchronization point and ensures that only
-     * one coroutine is called until the reply finishes.  */
-    i = HANDLE_TO_INDEX(s, s->reply.handle);
-    if (i >= MAX_NBD_REQUESTS) {
-        goto fail;
-    }
-
-    if (s->recv_coroutine[i]) {
-        qemu_coroutine_enter(s->recv_coroutine[i], NULL);
-        return;
-    }
-
-fail:
-    nbd_recv_coroutines_enter_all(s);
-}
-
-static void nbd_restart_write(void *opaque)
-{
-    NbdClientSession *s = opaque;
-
-    qemu_coroutine_enter(s->send_coroutine, NULL);
-}
-
-static int nbd_co_send_request(NbdClientSession *s,
-    struct nbd_request *request,
-    QEMUIOVector *qiov, int offset)
-{
-    int rc, ret;
-
-    qemu_co_mutex_lock(&s->send_mutex);
-    s->send_coroutine = qemu_coroutine_self();
-    qemu_aio_set_fd_handler(s->sock, nbd_reply_ready, nbd_restart_write, s);
-    if (qiov) {
-        if (!s->is_unix) {
-            socket_set_cork(s->sock, 1);
-        }
-        rc = nbd_send_request(s->sock, request);
-        if (rc >= 0) {
-            ret = qemu_co_sendv(s->sock, qiov->iov, qiov->niov,
-                                offset, request->len);
-            if (ret != request->len) {
-                rc = -EIO;
-            }
-        }
-        if (!s->is_unix) {
-            socket_set_cork(s->sock, 0);
-        }
-    } else {
-        rc = nbd_send_request(s->sock, request);
-    }
-    qemu_aio_set_fd_handler(s->sock, nbd_reply_ready, NULL, s);
-    s->send_coroutine = NULL;
-    qemu_co_mutex_unlock(&s->send_mutex);
-    return rc;
-}
-
-static void nbd_co_receive_reply(NbdClientSession *s,
-    struct nbd_request *request, struct nbd_reply *reply,
-    QEMUIOVector *qiov, int offset)
-{
-    int ret;
-
-    /* Wait until we're woken up by the read handler.  TODO: perhaps
-     * peek at the next reply and avoid yielding if it's ours?  */
-    qemu_coroutine_yield();
-    *reply = s->reply;
-    if (reply->handle != request->handle) {
-        reply->error = EIO;
-    } else {
-        if (qiov && reply->error == 0) {
-            ret = qemu_co_recvv(s->sock, qiov->iov, qiov->niov,
-                                offset, request->len);
-            if (ret != request->len) {
-                reply->error = EIO;
-            }
-        }
-
-        /* Tell the read handler to read another header.  */
-        s->reply.handle = 0;
-    }
-}
-
-static void nbd_coroutine_start(NbdClientSession *s,
-   struct nbd_request *request)
-{
-    int i;
-
-    /* Poor man semaphore.  The free_sema is locked when no other request
-     * can be accepted, and unlocked after receiving one reply.  */
-    if (s->in_flight >= MAX_NBD_REQUESTS - 1) {
-        qemu_co_mutex_lock(&s->free_sema);
-        assert(s->in_flight < MAX_NBD_REQUESTS);
-    }
-    s->in_flight++;
-
-    for (i = 0; i < MAX_NBD_REQUESTS; i++) {
-        if (s->recv_coroutine[i] == NULL) {
-            s->recv_coroutine[i] = qemu_coroutine_self();
-            break;
-        }
-    }
-
-    assert(i < MAX_NBD_REQUESTS);
-    request->handle = INDEX_TO_HANDLE(s, i);
-}
-
-static void nbd_coroutine_end(NbdClientSession *s,
-    struct nbd_request *request)
-{
-    int i = HANDLE_TO_INDEX(s, request->handle);
-    s->recv_coroutine[i] = NULL;
-    if (s->in_flight-- == MAX_NBD_REQUESTS) {
-        qemu_co_mutex_unlock(&s->free_sema);
-    }
-}
-
-static int nbd_co_readv_1(NbdClientSession *client, int64_t sector_num,
-                          int nb_sectors, QEMUIOVector *qiov,
-                          int offset)
-{
-    struct nbd_request request = { .type = NBD_CMD_READ };
-    struct nbd_reply reply;
-    ssize_t ret;
-
-    request.from = sector_num * 512;
-    request.len = nb_sectors * 512;
-
-    nbd_coroutine_start(client, &request);
-    ret = nbd_co_send_request(client, &request, NULL, 0);
-    if (ret < 0) {
-        reply.error = -ret;
-    } else {
-        nbd_co_receive_reply(client, &request, &reply, qiov, offset);
-    }
-    nbd_coroutine_end(client, &request);
-    return -reply.error;
-
-}
-
-static int nbd_co_writev_1(NbdClientSession *client, int64_t sector_num,
-                           int nb_sectors, QEMUIOVector *qiov,
-                           int offset)
-{
-    struct nbd_request request = { .type = NBD_CMD_WRITE };
-    struct nbd_reply reply;
-    ssize_t ret;
-
-    if (!bdrv_enable_write_cache(client->bs) &&
-        (client->nbdflags & NBD_FLAG_SEND_FUA)) {
-        request.type |= NBD_CMD_FLAG_FUA;
-    }
-
-    request.from = sector_num * 512;
-    request.len = nb_sectors * 512;
-
-    nbd_coroutine_start(client, &request);
-    ret = nbd_co_send_request(client, &request, qiov, offset);
-    if (ret < 0) {
-        reply.error = -ret;
-    } else {
-        nbd_co_receive_reply(client, &request, &reply, NULL, 0);
-    }
-    nbd_coroutine_end(client, &request);
-    return -reply.error;
-}
-
-/* qemu-nbd has a limit of slightly less than 1M per request.  Try to
- * remain aligned to 4K. */
-#define NBD_MAX_SECTORS 2040
-
-int nbd_client_session_co_readv(NbdClientSession *client, int64_t sector_num,
-    int nb_sectors, QEMUIOVector *qiov)
-{
-    int offset = 0;
-    int ret;
-    while (nb_sectors > NBD_MAX_SECTORS) {
-        ret = nbd_co_readv_1(client, sector_num,
-                             NBD_MAX_SECTORS, qiov, offset);
-        if (ret < 0) {
-            return ret;
-        }
-        offset += NBD_MAX_SECTORS * 512;
-        sector_num += NBD_MAX_SECTORS;
-        nb_sectors -= NBD_MAX_SECTORS;
-    }
-    return nbd_co_readv_1(client, sector_num, nb_sectors, qiov, offset);
-}
-
-int nbd_client_session_co_writev(NbdClientSession *client, int64_t sector_num,
-                                 int nb_sectors, QEMUIOVector *qiov)
-{
-    int offset = 0;
-    int ret;
-    while (nb_sectors > NBD_MAX_SECTORS) {
-        ret = nbd_co_writev_1(client, sector_num,
-                              NBD_MAX_SECTORS, qiov, offset);
-        if (ret < 0) {
-            return ret;
-        }
-        offset += NBD_MAX_SECTORS * 512;
-        sector_num += NBD_MAX_SECTORS;
-        nb_sectors -= NBD_MAX_SECTORS;
-    }
-    return nbd_co_writev_1(client, sector_num, nb_sectors, qiov, offset);
-}
-
-int nbd_client_session_co_flush(NbdClientSession *client)
-{
-    struct nbd_request request = { .type = NBD_CMD_FLUSH };
-    struct nbd_reply reply;
-    ssize_t ret;
-
-    if (!(client->nbdflags & NBD_FLAG_SEND_FLUSH)) {
-        return 0;
-    }
-
-    if (client->nbdflags & NBD_FLAG_SEND_FUA) {
-        request.type |= NBD_CMD_FLAG_FUA;
-    }
-
-    request.from = 0;
-    request.len = 0;
-
-    nbd_coroutine_start(client, &request);
-    ret = nbd_co_send_request(client, &request, NULL, 0);
-    if (ret < 0) {
-        reply.error = -ret;
-    } else {
-        nbd_co_receive_reply(client, &request, &reply, NULL, 0);
-    }
-    nbd_coroutine_end(client, &request);
-    return -reply.error;
-}
-
-int nbd_client_session_co_discard(NbdClientSession *client, int64_t sector_num,
-    int nb_sectors)
-{
-    struct nbd_request request = { .type = NBD_CMD_TRIM };
-    struct nbd_reply reply;
-    ssize_t ret;
-
-    if (!(client->nbdflags & NBD_FLAG_SEND_TRIM)) {
-        return 0;
-    }
-    request.from = sector_num * 512;
-    request.len = nb_sectors * 512;
-
-    nbd_coroutine_start(client, &request);
-    ret = nbd_co_send_request(client, &request, NULL, 0);
-    if (ret < 0) {
-        reply.error = -ret;
-    } else {
-        nbd_co_receive_reply(client, &request, &reply, NULL, 0);
-    }
-    nbd_coroutine_end(client, &request);
-    return -reply.error;
-
-}
-
-static void nbd_teardown_connection(NbdClientSession *client)
-{
-    struct nbd_request request = {
-        .type = NBD_CMD_DISC,
-        .from = 0,
-        .len = 0
-    };
-
-    nbd_send_request(client->sock, &request);
-
-    /* finish any pending coroutines */
-    shutdown(client->sock, 2);
-    nbd_recv_coroutines_enter_all(client);
-
-    qemu_aio_set_fd_handler(client->sock, NULL, NULL, NULL);
-    closesocket(client->sock);
-    client->sock = -1;
-}
-
-void nbd_client_session_close(NbdClientSession *client)
-{
-    if (!client->bs) {
-        return;
-    }
-
-    nbd_teardown_connection(client);
-    client->bs = NULL;
-}
-
-int nbd_client_session_init(NbdClientSession *client, BlockDriverState *bs,
-    int sock, const char *export)
-{
-    int ret;
-
-    /* NBD handshake */
-    logout("session init %s\n", export);
-    qemu_set_block(sock);
-    ret = nbd_receive_negotiate(sock, export,
-                                &client->nbdflags, &client->size,
-                                &client->blocksize);
-    if (ret < 0) {
-        logout("Failed to negotiate with the NBD server\n");
-        closesocket(sock);
-        return ret;
-    }
-
-    qemu_co_mutex_init(&client->send_mutex);
-    qemu_co_mutex_init(&client->free_sema);
-    client->bs = bs;
-    client->sock = sock;
-
-    /* Now that we're connected, set the socket to be non-blocking and
-     * kick the reply mechanism.  */
-    qemu_set_nonblock(sock);
-    qemu_aio_set_fd_handler(sock, nbd_reply_ready, NULL, client);
-
-    logout("Established connection with NBD server\n");
-    return 0;
-}
--- a/block/nbd-client.h
+++ b/block/nbd-client.h
@@ -1,50 +0,0 @@
-#ifndef NBD_CLIENT_H
-#define NBD_CLIENT_H
-
-#include "qemu-common.h"
-#include "block/nbd.h"
-#include "block/block_int.h"
-
-/* #define DEBUG_NBD */
-
-#if defined(DEBUG_NBD)
-#define logout(fmt, ...) \
-    fprintf(stderr, "nbd\t%-24s" fmt, __func__, ##__VA_ARGS__)
-#else
-#define logout(fmt, ...) ((void)0)
-#endif
-
-#define MAX_NBD_REQUESTS    16
-
-typedef struct NbdClientSession {
-    int sock;
-    uint32_t nbdflags;
-    off_t size;
-    size_t blocksize;
-
-    CoMutex send_mutex;
-    CoMutex free_sema;
-    Coroutine *send_coroutine;
-    int in_flight;
-
-    Coroutine *recv_coroutine[MAX_NBD_REQUESTS];
-    struct nbd_reply reply;
-
-    bool is_unix;
-
-    BlockDriverState *bs;
-} NbdClientSession;
-
-int nbd_client_session_init(NbdClientSession *client, BlockDriverState *bs,
-                            int sock, const char *export_name);
-void nbd_client_session_close(NbdClientSession *client);
-
-int nbd_client_session_co_discard(NbdClientSession *client, int64_t sector_num,
-                                  int nb_sectors);
-int nbd_client_session_co_flush(NbdClientSession *client);
-int nbd_client_session_co_writev(NbdClientSession *client, int64_t sector_num,
-                                 int nb_sectors, QEMUIOVector *qiov);
-int nbd_client_session_co_readv(NbdClientSession *client, int64_t sector_num,
-                                int nb_sectors, QEMUIOVector *qiov);
-
-#endif /* NBD_CLIENT_H */
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -26,31 +26,56 @@
 * THE SOFTWARE.
 */

-#include "block/nbd-client.h"
+#include "qemu-common.h"
+#include "block/nbd.h"
 #include "qemu/uri.h"
 #include "block/block_int.h"
 #include "qemu/module.h"
 #include "qemu/sockets.h"
-#include "qapi/qmp/qjson.h"
-#include "qapi/qmp/qint.h"

 #include <sys/types.h>
 #include <unistd.h>

 #define EN_OPTSTR ":exportname="

+/* #define DEBUG_NBD */
+
+#if defined(DEBUG_NBD)
+#define logout(fmt, ...) \
+                fprintf(stderr, "nbd\t%-24s" fmt, __func__, ##__VA_ARGS__)
+#else
+#define logout(fmt, ...) ((void)0)
+#endif
+
+#define MAX_NBD_REQUESTS	16
+#define HANDLE_TO_INDEX(bs, handle) ((handle) ^ ((uint64_t)(intptr_t)bs))
+#define INDEX_TO_HANDLE(bs, index)  ((index)  ^ ((uint64_t)(intptr_t)bs))
+
 typedef struct BDRVNBDState {
-    NbdClientSession client;
-    QemuOpts *socket_opts;
+    int sock;
+    uint32_t nbdflags;
+    off_t size;
+    size_t blocksize;
+
+    CoMutex send_mutex;
+    CoMutex free_sema;
+    Coroutine *send_coroutine;
+    int in_flight;
+
+    Coroutine *recv_coroutine[MAX_NBD_REQUESTS];
+    struct nbd_reply reply;
+
+    int is_unix;
+    char *host_spec;
+    char *export_name; /* An NBD server may export several devices */
 } BDRVNBDState;

-static int nbd_parse_uri(const char *filename, QDict *options)
+static int nbd_parse_uri(BDRVNBDState *s, const char *filename)
 {
    URI *uri;
    const char *p;
    QueryParams *qp = NULL;
    int ret = 0;
-    bool is_unix;

    uri = uri_parse(filename);
    if (!uri) {
@@ -59,11 +84,11 @@ static int nbd_parse_uri(const char *filename, QDict *options)

    /* transport */
    if (!strcmp(uri->scheme, "nbd")) {
-        is_unix = false;
+        s->is_unix = false;
    } else if (!strcmp(uri->scheme, "nbd+tcp")) {
-        is_unix = false;
+        s->is_unix = false;
    } else if (!strcmp(uri->scheme, "nbd+unix")) {
-        is_unix = true;
+        s->is_unix = true;
    } else {
        ret = -EINVAL;
        goto out;
@@ -72,44 +97,32 @@ static int nbd_parse_uri(const char *filename, QDict *options)
    p = uri->path ? uri->path : "/";
    p += strspn(p, "/");
    if (p[0]) {
-        qdict_put(options, "export", qstring_from_str(p));
+        s->export_name = g_strdup(p);
    }

    qp = query_params_parse(uri->query);
-    if (qp->n > 1 || (is_unix && !qp->n) || (!is_unix && qp->n)) {
+    if (qp->n > 1 || (s->is_unix && !qp->n) || (!s->is_unix && qp->n)) {
        ret = -EINVAL;
        goto out;
    }

-    if (is_unix) {
+    if (s->is_unix) {
        /* nbd+unix:///export?socket=path */
        if (uri->server || uri->port || strcmp(qp->p[0].name, "socket")) {
            ret = -EINVAL;
            goto out;
        }
-        qdict_put(options, "path", qstring_from_str(qp->p[0].value));
+        s->host_spec = g_strdup(qp->p[0].value);
    } else {
-        QString *host;
-        /* nbd[+tcp]://host[:port]/export */
+        /* nbd[+tcp]://host:port/export */
        if (!uri->server) {
            ret = -EINVAL;
            goto out;
        }
-
-        /* strip braces from literal IPv6 address */
-        if (uri->server[0] == '[') {
-            host = qstring_from_substr(uri->server, 1,
-                                       strlen(uri->server) - 2);
-        } else {
-            host = qstring_from_str(uri->server);
-        }
-
-        qdict_put(options, "host", host);
-        if (uri->port) {
-            char* port_str = g_strdup_printf("%d", uri->port);
-            qdict_put(options, "port", qstring_from_str(port_str));
-            g_free(port_str);
+        if (!uri->port) {
+            uri->port = NBD_DEFAULT_PORT;
        }
+        s->host_spec = g_strdup_printf("%s:%d", uri->server, uri->port);
    }

 out:
@@ -120,29 +133,16 @@ out:
    return ret;
 }

-static void nbd_parse_filename(const char *filename, QDict *options,
-                               Error **errp)
+static int nbd_config(BDRVNBDState *s, const char *filename)
 {
    char *file;
    char *export_name;
    const char *host_spec;
    const char *unixpath;
-
-    if (qdict_haskey(options, "host")
-        || qdict_haskey(options, "port")
-        || qdict_haskey(options, "path"))
-    {
-        error_setg(errp, "host/port/path and a file name may not be specified "
-                         "at the same time");
-        return;
-    }
+    int err = -EINVAL;

    if (strstr(filename, "://")) {
-        int ret = nbd_parse_uri(filename, options);
-        if (ret < 0) {
-            error_setg(errp, "No valid URL specified");
-        }
-        return;
+        return nbd_parse_uri(s, filename);
    }

    file = g_strdup(filename);
@@ -154,86 +154,183 @@ static void nbd_parse_filename(const char *filename, QDict *options,
        }
        export_name[0] = 0; /* truncate 'file' */
        export_name += strlen(EN_OPTSTR);
-
-        qdict_put(options, "export", qstring_from_str(export_name));
+        s->export_name = g_strdup(export_name);
    }

    /* extract the host_spec - fail if it's not nbd:... */
    if (!strstart(file, "nbd:", &host_spec)) {
-        error_setg(errp, "File name string for NBD must start with 'nbd:'");
-        goto out;
-    }
-
-    if (!*host_spec) {
        goto out;
    }

    /* are we a UNIX or TCP socket? */
    if (strstart(host_spec, "unix:", &unixpath)) {
-        qdict_put(options, "path", qstring_from_str(unixpath));
+        s->is_unix = true;
+        s->host_spec = g_strdup(unixpath);
    } else {
-        InetSocketAddress *addr = NULL;
-
-        addr = inet_parse(host_spec, errp);
-        if (error_is_set(errp)) {
-            goto out;
-        }
-
-        qdict_put(options, "host", qstring_from_str(addr->host));
-        qdict_put(options, "port", qstring_from_str(addr->port));
-        qapi_free_InetSocketAddress(addr);
+        s->is_unix = false;
+        s->host_spec = g_strdup(host_spec);
    }

+    err = 0;
+
 out:
    g_free(file);
+    if (err != 0) {
+        g_free(s->export_name);
+        g_free(s->host_spec);
+    }
+    return err;
 }

-static void nbd_config(BDRVNBDState *s, QDict *options, char **export,
-                       Error **errp)
+static void nbd_coroutine_start(BDRVNBDState *s, struct nbd_request *request)
 {
-    Error *local_err = NULL;
+    int i;

-    if (qdict_haskey(options, "path") == qdict_haskey(options, "host")) {
-        if (qdict_haskey(options, "path")) {
-            error_setg(errp, "path and host may not be used at the same time.");
-        } else {
-            error_setg(errp, "one of path and host must be specified.");
+    /* Poor man semaphore.  The free_sema is locked when no other request
+     * can be accepted, and unlocked after receiving one reply.  */
+    if (s->in_flight >= MAX_NBD_REQUESTS - 1) {
+        qemu_co_mutex_lock(&s->free_sema);
+        assert(s->in_flight < MAX_NBD_REQUESTS);
+    }
+    s->in_flight++;
+
+    for (i = 0; i < MAX_NBD_REQUESTS; i++) {
+        if (s->recv_coroutine[i] == NULL) {
+            s->recv_coroutine[i] = qemu_coroutine_self();
+            break;
        }
+    }
+
+    assert(i < MAX_NBD_REQUESTS);
+    request->handle = INDEX_TO_HANDLE(s, i);
+}
+
+static int nbd_have_request(void *opaque)
+{
+    BDRVNBDState *s = opaque;
+
+    return s->in_flight > 0;
+}
+
+static void nbd_reply_ready(void *opaque)
+{
+    BDRVNBDState *s = opaque;
+    uint64_t i;
+    int ret;
+
+    if (s->reply.handle == 0) {
+        /* No reply already in flight.  Fetch a header.  It is possible
+         * that another thread has done the same thing in parallel, so
+         * the socket is not readable anymore.
+         */
+        ret = nbd_receive_reply(s->sock, &s->reply);
+        if (ret == -EAGAIN) {
+            return;
+        }
+        if (ret < 0) {
+            s->reply.handle = 0;
+            goto fail;
+        }
+    }
+
+    /* There's no need for a mutex on the receive side, because the
+     * handler acts as a synchronization point and ensures that only
+     * one coroutine is called until the reply finishes.  */
+    i = HANDLE_TO_INDEX(s, s->reply.handle);
+    if (i >= MAX_NBD_REQUESTS) {
+        goto fail;
+    }
+
+    if (s->recv_coroutine[i]) {
+        qemu_coroutine_enter(s->recv_coroutine[i], NULL);
        return;
    }

-    s->client.is_unix = qdict_haskey(options, "path");
-    s->socket_opts = qemu_opts_create(&socket_optslist, NULL, 0,
-                                      &error_abort);
-
-    qemu_opts_absorb_qdict(s->socket_opts, options, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        return;
-    }
-
-    if (!qemu_opt_get(s->socket_opts, "port")) {
-        qemu_opt_set_number(s->socket_opts, "port", NBD_DEFAULT_PORT);
-    }
-
-    *export = g_strdup(qdict_get_try_str(options, "export"));
-    if (*export) {
-        qdict_del(options, "export");
+fail:
+    for (i = 0; i < MAX_NBD_REQUESTS; i++) {
+        if (s->recv_coroutine[i]) {
+            qemu_coroutine_enter(s->recv_coroutine[i], NULL);
+        }
    }
 }

-static int nbd_establish_connection(BlockDriverState *bs, Error **errp)
+static void nbd_restart_write(void *opaque)
+{
+    BDRVNBDState *s = opaque;
+    qemu_coroutine_enter(s->send_coroutine, NULL);
+}
+
+static int nbd_co_send_request(BDRVNBDState *s, struct nbd_request *request,
+                               QEMUIOVector *qiov, int offset)
+{
+    int rc, ret;
+
+    qemu_co_mutex_lock(&s->send_mutex);
+    s->send_coroutine = qemu_coroutine_self();
+    qemu_aio_set_fd_handler(s->sock, nbd_reply_ready, nbd_restart_write,
+                            nbd_have_request, s);
+    rc = nbd_send_request(s->sock, request);
+    if (rc >= 0 && qiov) {
+        ret = qemu_co_sendv(s->sock, qiov->iov, qiov->niov,
+                            offset, request->len);
+        if (ret != request->len) {
+            rc = -EIO;
+        }
+    }
+    qemu_aio_set_fd_handler(s->sock, nbd_reply_ready, NULL,
+                            nbd_have_request, s);
+    s->send_coroutine = NULL;
+    qemu_co_mutex_unlock(&s->send_mutex);
+    return rc;
+}
+
+static void nbd_co_receive_reply(BDRVNBDState *s, struct nbd_request *request,
+                                 struct nbd_reply *reply,
+                                 QEMUIOVector *qiov, int offset)
+{
+    int ret;
+
+    /* Wait until we're woken up by the read handler.  TODO: perhaps
+     * peek at the next reply and avoid yielding if it's ours?  */
+    qemu_coroutine_yield();
+    *reply = s->reply;
+    if (reply->handle != request->handle) {
+        reply->error = EIO;
+    } else {
+        if (qiov && reply->error == 0) {
+            ret = qemu_co_recvv(s->sock, qiov->iov, qiov->niov,
+                                offset, request->len);
+            if (ret != request->len) {
+                reply->error = EIO;
+            }
+        }
+
+        /* Tell the read handler to read another header.  */
+        s->reply.handle = 0;
+    }
+}
+
+static void nbd_coroutine_end(BDRVNBDState *s, struct nbd_request *request)
+{
+    int i = HANDLE_TO_INDEX(s, request->handle);
+    s->recv_coroutine[i] = NULL;
+    if (s->in_flight-- == MAX_NBD_REQUESTS) {
+        qemu_co_mutex_unlock(&s->free_sema);
+    }
+}
+
+static int nbd_establish_connection(BlockDriverState *bs)
 {
    BDRVNBDState *s = bs->opaque;
    int sock;
+    int ret;
+    off_t size;
+    size_t blocksize;

-    if (s->client.is_unix) {
-        sock = unix_connect_opts(s->socket_opts, errp, NULL, NULL);
+    if (s->is_unix) {
+        sock = unix_socket_outgoing(s->host_spec);
    } else {
-        sock = inet_connect_opts(s->socket_opts, errp, NULL, NULL);
-        if (sock >= 0) {
-            socket_set_nodelay(sock);
-        }
+        sock = tcp_socket_outgoing_spec(s->host_spec);
    }

    /* Failed to establish connection */
@@ -242,92 +339,232 @@ static int nbd_establish_connection(BlockDriverState *bs, Error **errp)
        return -errno;
    }

-    return sock;
+    /* NBD handshake */
+    ret = nbd_receive_negotiate(sock, s->export_name, &s->nbdflags, &size,
+                                &blocksize);
+    if (ret < 0) {
+        logout("Failed to negotiate with the NBD server\n");
+        closesocket(sock);
+        return ret;
+    }
+
+    /* Now that we're connected, set the socket to be non-blocking and
+     * kick the reply mechanism.  */
+    qemu_set_nonblock(sock);
+    qemu_aio_set_fd_handler(sock, nbd_reply_ready, NULL,
+                            nbd_have_request, s);
+
+    s->sock = sock;
+    s->size = size;
+    s->blocksize = blocksize;
+
+    logout("Established connection with NBD server\n");
+    return 0;
 }

-static int nbd_open(BlockDriverState *bs, QDict *options, int flags,
-                    Error **errp)
+static void nbd_teardown_connection(BlockDriverState *bs)
 {
    BDRVNBDState *s = bs->opaque;
-    char *export = NULL;
-    int result, sock;
-    Error *local_err = NULL;
+    struct nbd_request request;
+
+    request.type = NBD_CMD_DISC;
+    request.from = 0;
+    request.len = 0;
+    nbd_send_request(s->sock, &request);
+
+    qemu_aio_set_fd_handler(s->sock, NULL, NULL, NULL, NULL);
+    closesocket(s->sock);
+}
+
+static int nbd_open(BlockDriverState *bs, const char* filename, int flags)
+{
+    BDRVNBDState *s = bs->opaque;
+    int result;
+
+    qemu_co_mutex_init(&s->send_mutex);
+    qemu_co_mutex_init(&s->free_sema);

    /* Pop the config into our state object. Exit if invalid. */
-    nbd_config(s, options, &export, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        return -EINVAL;
+    result = nbd_config(s, filename);
+    if (result != 0) {
+        return result;
    }

    /* establish TCP connection, return error if it fails
     * TODO: Configurable retry-until-timeout behaviour.
     */
-    sock = nbd_establish_connection(bs, errp);
-    if (sock < 0) {
-        return sock;
-    }
+    result = nbd_establish_connection(bs);

-    /* NBD handshake */
-    result = nbd_client_session_init(&s->client, bs, sock, export);
-    g_free(export);
    return result;
 }

+static int nbd_co_readv_1(BlockDriverState *bs, int64_t sector_num,
+                          int nb_sectors, QEMUIOVector *qiov,
+                          int offset)
+{
+    BDRVNBDState *s = bs->opaque;
+    struct nbd_request request;
+    struct nbd_reply reply;
+    ssize_t ret;
+
+    request.type = NBD_CMD_READ;
+    request.from = sector_num * 512;
+    request.len = nb_sectors * 512;
+
+    nbd_coroutine_start(s, &request);
+    ret = nbd_co_send_request(s, &request, NULL, 0);
+    if (ret < 0) {
+        reply.error = -ret;
+    } else {
+        nbd_co_receive_reply(s, &request, &reply, qiov, offset);
+    }
+    nbd_coroutine_end(s, &request);
+    return -reply.error;
+
+}
+
+static int nbd_co_writev_1(BlockDriverState *bs, int64_t sector_num,
+                           int nb_sectors, QEMUIOVector *qiov,
+                           int offset)
+{
+    BDRVNBDState *s = bs->opaque;
+    struct nbd_request request;
+    struct nbd_reply reply;
+    ssize_t ret;
+
+    request.type = NBD_CMD_WRITE;
+    if (!bdrv_enable_write_cache(bs) && (s->nbdflags & NBD_FLAG_SEND_FUA)) {
+        request.type |= NBD_CMD_FLAG_FUA;
+    }
+
+    request.from = sector_num * 512;
+    request.len = nb_sectors * 512;
+
+    nbd_coroutine_start(s, &request);
+    ret = nbd_co_send_request(s, &request, qiov, offset);
+    if (ret < 0) {
+        reply.error = -ret;
+    } else {
+        nbd_co_receive_reply(s, &request, &reply, NULL, 0);
+    }
+    nbd_coroutine_end(s, &request);
+    return -reply.error;
+}
+
+/* qemu-nbd has a limit of slightly less than 1M per request.  Try to
+ * remain aligned to 4K. */
+#define NBD_MAX_SECTORS 2040
+
 static int nbd_co_readv(BlockDriverState *bs, int64_t sector_num,
                        int nb_sectors, QEMUIOVector *qiov)
 {
-    BDRVNBDState *s = bs->opaque;
-
-    return nbd_client_session_co_readv(&s->client, sector_num,
-                                       nb_sectors, qiov);
+    int offset = 0;
+    int ret;
+    while (nb_sectors > NBD_MAX_SECTORS) {
+        ret = nbd_co_readv_1(bs, sector_num, NBD_MAX_SECTORS, qiov, offset);
+        if (ret < 0) {
+            return ret;
+        }
+        offset += NBD_MAX_SECTORS * 512;
+        sector_num += NBD_MAX_SECTORS;
+        nb_sectors -= NBD_MAX_SECTORS;
+    }
+    return nbd_co_readv_1(bs, sector_num, nb_sectors, qiov, offset);
 }

 static int nbd_co_writev(BlockDriverState *bs, int64_t sector_num,
                         int nb_sectors, QEMUIOVector *qiov)
 {
-    BDRVNBDState *s = bs->opaque;
-
-    return nbd_client_session_co_writev(&s->client, sector_num,
-                                        nb_sectors, qiov);
+    int offset = 0;
+    int ret;
+    while (nb_sectors > NBD_MAX_SECTORS) {
+        ret = nbd_co_writev_1(bs, sector_num, NBD_MAX_SECTORS, qiov, offset);
+        if (ret < 0) {
+            return ret;
+        }
+        offset += NBD_MAX_SECTORS * 512;
+        sector_num += NBD_MAX_SECTORS;
+        nb_sectors -= NBD_MAX_SECTORS;
+    }
+    return nbd_co_writev_1(bs, sector_num, nb_sectors, qiov, offset);
 }

 static int nbd_co_flush(BlockDriverState *bs)
 {
    BDRVNBDState *s = bs->opaque;
+    struct nbd_request request;
+    struct nbd_reply reply;
+    ssize_t ret;

-    return nbd_client_session_co_flush(&s->client);
+    if (!(s->nbdflags & NBD_FLAG_SEND_FLUSH)) {
+        return 0;
+    }
+
+    request.type = NBD_CMD_FLUSH;
+    if (s->nbdflags & NBD_FLAG_SEND_FUA) {
+        request.type |= NBD_CMD_FLAG_FUA;
+    }
+
+    request.from = 0;
+    request.len = 0;
+
+    nbd_coroutine_start(s, &request);
+    ret = nbd_co_send_request(s, &request, NULL, 0);
+    if (ret < 0) {
+        reply.error = -ret;
+    } else {
+        nbd_co_receive_reply(s, &request, &reply, NULL, 0);
+    }
+    nbd_coroutine_end(s, &request);
+    return -reply.error;
 }

 static int nbd_co_discard(BlockDriverState *bs, int64_t sector_num,
                          int nb_sectors)
 {
    BDRVNBDState *s = bs->opaque;
+    struct nbd_request request;
+    struct nbd_reply reply;
+    ssize_t ret;

-    return nbd_client_session_co_discard(&s->client, sector_num,
-                                         nb_sectors);
+    if (!(s->nbdflags & NBD_FLAG_SEND_TRIM)) {
+        return 0;
+    }
+    request.type = NBD_CMD_TRIM;
+    request.from = sector_num * 512;;
+    request.len = nb_sectors * 512;
+
+    nbd_coroutine_start(s, &request);
+    ret = nbd_co_send_request(s, &request, NULL, 0);
+    if (ret < 0) {
+        reply.error = -ret;
+    } else {
+        nbd_co_receive_reply(s, &request, &reply, NULL, 0);
+    }
+    nbd_coroutine_end(s, &request);
+    return -reply.error;
 }

 static void nbd_close(BlockDriverState *bs)
 {
    BDRVNBDState *s = bs->opaque;
+    g_free(s->export_name);
+    g_free(s->host_spec);

-    qemu_opts_del(s->socket_opts);
-    nbd_client_session_close(&s->client);
+    nbd_teardown_connection(bs);
 }

 static int64_t nbd_getlength(BlockDriverState *bs)
 {
    BDRVNBDState *s = bs->opaque;

-    return s->client.size;
+    return s->size;
 }

 static BlockDriver bdrv_nbd = {
    .format_name         = "nbd",
    .protocol_name       = "nbd",
    .instance_size       = sizeof(BDRVNBDState),
-    .bdrv_parse_filename = nbd_parse_filename,
    .bdrv_file_open      = nbd_open,
    .bdrv_co_readv       = nbd_co_readv,
    .bdrv_co_writev      = nbd_co_writev,
@@ -341,7 +578,6 @@ static BlockDriver bdrv_nbd_tcp = {
    .format_name         = "nbd",
    .protocol_name       = "nbd+tcp",
    .instance_size       = sizeof(BDRVNBDState),
-    .bdrv_parse_filename = nbd_parse_filename,
    .bdrv_file_open      = nbd_open,
    .bdrv_co_readv       = nbd_co_readv,
    .bdrv_co_writev      = nbd_co_writev,
@@ -355,7 +591,6 @@ static BlockDriver bdrv_nbd_unix = {
    .format_name         = "nbd",
    .protocol_name       = "nbd+unix",
    .instance_size       = sizeof(BDRVNBDState),
-    .bdrv_parse_filename = nbd_parse_filename,
    .bdrv_file_open      = nbd_open,
    .bdrv_co_readv       = nbd_co_readv,
    .bdrv_co_writev      = nbd_co_writev,
--- a/block/nfs.c
+++ b/block/nfs.c
@@ -1,439 +0,0 @@
-/*
- * QEMU Block driver for native access to files on NFS shares
- *
- * Copyright (c) 2014 Peter Lieven <pl@kamp.de>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-#include "config-host.h"
-
-#include <poll.h>
-#include "qemu-common.h"
-#include "qemu/config-file.h"
-#include "qemu/error-report.h"
-#include "block/block_int.h"
-#include "trace.h"
-#include "qemu/iov.h"
-#include "qemu/uri.h"
-#include "sysemu/sysemu.h"
-#include <nfsc/libnfs.h>
-
-typedef struct NFSClient {
-    struct nfs_context *context;
-    struct nfsfh *fh;
-    int events;
-    bool has_zero_init;
-} NFSClient;
-
-typedef struct NFSRPC {
-    int ret;
-    int complete;
-    QEMUIOVector *iov;
-    struct stat *st;
-    Coroutine *co;
-    QEMUBH *bh;
-} NFSRPC;
-
-static void nfs_process_read(void *arg);
-static void nfs_process_write(void *arg);
-
-static void nfs_set_events(NFSClient *client)
-{
-    int ev = nfs_which_events(client->context);
-    if (ev != client->events) {
-        qemu_aio_set_fd_handler(nfs_get_fd(client->context),
-                      (ev & POLLIN) ? nfs_process_read : NULL,
-                      (ev & POLLOUT) ? nfs_process_write : NULL,
-                      client);
-
-    }
-    client->events = ev;
-}
-
-static void nfs_process_read(void *arg)
-{
-    NFSClient *client = arg;
-    nfs_service(client->context, POLLIN);
-    nfs_set_events(client);
-}
-
-static void nfs_process_write(void *arg)
-{
-    NFSClient *client = arg;
-    nfs_service(client->context, POLLOUT);
-    nfs_set_events(client);
-}
-
-static void nfs_co_init_task(NFSClient *client, NFSRPC *task)
-{
-    *task = (NFSRPC) {
-        .co         = qemu_coroutine_self(),
-    };
-}
-
-static void nfs_co_generic_bh_cb(void *opaque)
-{
-    NFSRPC *task = opaque;
-    qemu_bh_delete(task->bh);
-    qemu_coroutine_enter(task->co, NULL);
-}
-
-static void
-nfs_co_generic_cb(int ret, struct nfs_context *nfs, void *data,
-                  void *private_data)
-{
-    NFSRPC *task = private_data;
-    task->complete = 1;
-    task->ret = ret;
-    if (task->ret > 0 && task->iov) {
-        if (task->ret <= task->iov->size) {
-            qemu_iovec_from_buf(task->iov, 0, data, task->ret);
-        } else {
-            task->ret = -EIO;
-        }
-    }
-    if (task->ret == 0 && task->st) {
-        memcpy(task->st, data, sizeof(struct stat));
-    }
-    if (task->co) {
-        task->bh = qemu_bh_new(nfs_co_generic_bh_cb, task);
-        qemu_bh_schedule(task->bh);
-    }
-}
-
-static int coroutine_fn nfs_co_readv(BlockDriverState *bs,
-                                     int64_t sector_num, int nb_sectors,
-                                     QEMUIOVector *iov)
-{
-    NFSClient *client = bs->opaque;
-    NFSRPC task;
-
-    nfs_co_init_task(client, &task);
-    task.iov = iov;
-
-    if (nfs_pread_async(client->context, client->fh,
-                        sector_num * BDRV_SECTOR_SIZE,
-                        nb_sectors * BDRV_SECTOR_SIZE,
-                        nfs_co_generic_cb, &task) != 0) {
-        return -ENOMEM;
-    }
-
-    while (!task.complete) {
-        nfs_set_events(client);
-        qemu_coroutine_yield();
-    }
-
-    if (task.ret < 0) {
-        return task.ret;
-    }
-
-    /* zero pad short reads */
-    if (task.ret < iov->size) {
-        qemu_iovec_memset(iov, task.ret, 0, iov->size - task.ret);
-    }
-
-    return 0;
-}
-
-static int coroutine_fn nfs_co_writev(BlockDriverState *bs,
-                                        int64_t sector_num, int nb_sectors,
-                                        QEMUIOVector *iov)
-{
-    NFSClient *client = bs->opaque;
-    NFSRPC task;
-    char *buf = NULL;
-
-    nfs_co_init_task(client, &task);
-
-    buf = g_malloc(nb_sectors * BDRV_SECTOR_SIZE);
-    qemu_iovec_to_buf(iov, 0, buf, nb_sectors * BDRV_SECTOR_SIZE);
-
-    if (nfs_pwrite_async(client->context, client->fh,
-                         sector_num * BDRV_SECTOR_SIZE,
-                         nb_sectors * BDRV_SECTOR_SIZE,
-                         buf, nfs_co_generic_cb, &task) != 0) {
-        g_free(buf);
-        return -ENOMEM;
-    }
-
-    while (!task.complete) {
-        nfs_set_events(client);
-        qemu_coroutine_yield();
-    }
-
-    g_free(buf);
-
-    if (task.ret != nb_sectors * BDRV_SECTOR_SIZE) {
-        return task.ret < 0 ? task.ret : -EIO;
-    }
-
-    return 0;
-}
-
-static int coroutine_fn nfs_co_flush(BlockDriverState *bs)
-{
-    NFSClient *client = bs->opaque;
-    NFSRPC task;
-
-    nfs_co_init_task(client, &task);
-
-    if (nfs_fsync_async(client->context, client->fh, nfs_co_generic_cb,
-                        &task) != 0) {
-        return -ENOMEM;
-    }
-
-    while (!task.complete) {
-        nfs_set_events(client);
-        qemu_coroutine_yield();
-    }
-
-    return task.ret;
-}
-
-/* TODO Convert to fine grained options */
-static QemuOptsList runtime_opts = {
-    .name = "nfs",
-    .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
-    .desc = {
-        {
-            .name = "filename",
-            .type = QEMU_OPT_STRING,
-            .help = "URL to the NFS file",
-        },
-        { /* end of list */ }
-    },
-};
-
-static void nfs_client_close(NFSClient *client)
-{
-    if (client->context) {
-        if (client->fh) {
-            nfs_close(client->context, client->fh);
-        }
-        qemu_aio_set_fd_handler(nfs_get_fd(client->context), NULL, NULL, NULL);
-        nfs_destroy_context(client->context);
-    }
-    memset(client, 0, sizeof(NFSClient));
-}
-
-static void nfs_file_close(BlockDriverState *bs)
-{
-    NFSClient *client = bs->opaque;
-    nfs_client_close(client);
-}
-
-static int64_t nfs_client_open(NFSClient *client, const char *filename,
-                               int flags, Error **errp)
-{
-    int ret = -EINVAL, i;
-    struct stat st;
-    URI *uri;
-    QueryParams *qp = NULL;
-    char *file = NULL, *strp = NULL;
-
-    uri = uri_parse(filename);
-    if (!uri) {
-        error_setg(errp, "Invalid URL specified");
-        goto fail;
-    }
-    strp = strrchr(uri->path, '/');
-    if (strp == NULL) {
-        error_setg(errp, "Invalid URL specified");
-        goto fail;
-    }
-    file = g_strdup(strp);
-    *strp = 0;
-
-    client->context = nfs_init_context();
-    if (client->context == NULL) {
-        error_setg(errp, "Failed to init NFS context");
-        goto fail;
-    }
-
-    qp = query_params_parse(uri->query);
-    for (i = 0; i < qp->n; i++) {
-        if (!qp->p[i].value) {
-            error_setg(errp, "Value for NFS parameter expected: %s",
-                       qp->p[i].name);
-            goto fail;
-        }
-        if (!strncmp(qp->p[i].name, "uid", 3)) {
-            nfs_set_uid(client->context, atoi(qp->p[i].value));
-        } else if (!strncmp(qp->p[i].name, "gid", 3)) {
-            nfs_set_gid(client->context, atoi(qp->p[i].value));
-        } else if (!strncmp(qp->p[i].name, "tcp-syncnt", 10)) {
-            nfs_set_tcp_syncnt(client->context, atoi(qp->p[i].value));
-        } else {
-            error_setg(errp, "Unknown NFS parameter name: %s",
-                       qp->p[i].name);
-            goto fail;
-        }
-    }
-
-    ret = nfs_mount(client->context, uri->server, uri->path);
-    if (ret < 0) {
-        error_setg(errp, "Failed to mount nfs share: %s",
-                   nfs_get_error(client->context));
-        goto fail;
-    }
-
-    if (flags & O_CREAT) {
-        ret = nfs_creat(client->context, file, 0600, &client->fh);
-        if (ret < 0) {
-            error_setg(errp, "Failed to create file: %s",
-                       nfs_get_error(client->context));
-            goto fail;
-        }
-    } else {
-        ret = nfs_open(client->context, file, flags, &client->fh);
-        if (ret < 0) {
-            error_setg(errp, "Failed to open file : %s",
-                       nfs_get_error(client->context));
-            goto fail;
-        }
-    }
-
-    ret = nfs_fstat(client->context, client->fh, &st);
-    if (ret < 0) {
-        error_setg(errp, "Failed to fstat file: %s",
-                   nfs_get_error(client->context));
-        goto fail;
-    }
-
-    ret = DIV_ROUND_UP(st.st_size, BDRV_SECTOR_SIZE);
-    client->has_zero_init = S_ISREG(st.st_mode);
-    goto out;
-fail:
-    nfs_client_close(client);
-out:
-    if (qp) {
-        query_params_free(qp);
-    }
-    uri_free(uri);
-    g_free(file);
-    return ret;
-}
-
-static int nfs_file_open(BlockDriverState *bs, QDict *options, int flags,
-                         Error **errp) {
-    NFSClient *client = bs->opaque;
-    int64_t ret;
-    QemuOpts *opts;
-    Error *local_err = NULL;
-
-    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
-    qemu_opts_absorb_qdict(opts, options, &local_err);
-    if (error_is_set(&local_err)) {
-        error_propagate(errp, local_err);
-        return -EINVAL;
-    }
-    ret = nfs_client_open(client, qemu_opt_get(opts, "filename"),
-                          (flags & BDRV_O_RDWR) ? O_RDWR : O_RDONLY,
-                          errp);
-    if (ret < 0) {
-        return ret;
-    }
-    bs->total_sectors = ret;
-    return 0;
-}
-
-static int nfs_file_create(const char *url, QEMUOptionParameter *options,
-                           Error **errp)
-{
-    int ret = 0;
-    int64_t total_size = 0;
-    NFSClient *client = g_malloc0(sizeof(NFSClient));
-
-    /* Read out options */
-    while (options && options->name) {
-        if (!strcmp(options->name, "size")) {
-            total_size = options->value.n;
-        }
-        options++;
-    }
-
-    ret = nfs_client_open(client, url, O_CREAT, errp);
-    if (ret < 0) {
-        goto out;
-    }
-    ret = nfs_ftruncate(client->context, client->fh, total_size);
-    nfs_client_close(client);
-out:
-    g_free(client);
-    return ret;
-}
-
-static int nfs_has_zero_init(BlockDriverState *bs)
-{
-    NFSClient *client = bs->opaque;
-    return client->has_zero_init;
-}
-
-static int64_t nfs_get_allocated_file_size(BlockDriverState *bs)
-{
-    NFSClient *client = bs->opaque;
-    NFSRPC task = {0};
-    struct stat st;
-
-    task.st = &st;
-    if (nfs_fstat_async(client->context, client->fh, nfs_co_generic_cb,
-                        &task) != 0) {
-        return -ENOMEM;
-    }
-
-    while (!task.complete) {
-        nfs_set_events(client);
-        qemu_aio_wait();
-    }
-
-    return (task.ret < 0 ? task.ret : st.st_blocks * st.st_blksize);
-}
-
-static int nfs_file_truncate(BlockDriverState *bs, int64_t offset)
-{
-    NFSClient *client = bs->opaque;
-    return nfs_ftruncate(client->context, client->fh, offset);
-}
-
-static BlockDriver bdrv_nfs = {
-    .format_name     = "nfs",
-    .protocol_name   = "nfs",
-
-    .instance_size   = sizeof(NFSClient),
-    .bdrv_needs_filename = true,
-    .bdrv_has_zero_init = nfs_has_zero_init,
-    .bdrv_get_allocated_file_size = nfs_get_allocated_file_size,
-    .bdrv_truncate = nfs_file_truncate,
-
-    .bdrv_file_open  = nfs_file_open,
-    .bdrv_close      = nfs_file_close,
-    .bdrv_create     = nfs_file_create,
-
-    .bdrv_co_readv         = nfs_co_readv,
-    .bdrv_co_writev        = nfs_co_writev,
-    .bdrv_co_flush_to_disk = nfs_co_flush,
-};
-
-static void nfs_block_init(void)
-{
-    bdrv_register(&bdrv_nfs);
-}
-
-block_init(nfs_block_init);
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -49,9 +49,9 @@ typedef struct BDRVParallelsState {
    CoMutex lock;

    uint32_t *catalog_bitmap;
-    int catalog_size;
+    unsigned int catalog_size;

-    int tracks;
+    unsigned int tracks;
 } BDRVParallelsState;

 static int parallels_probe(const uint8_t *buf, int buf_size, const char *filename)
@@ -68,8 +68,7 @@ static int parallels_probe(const uint8_t *buf, int buf_size, const char *filenam
    return 0;
 }

-static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
-                          Error **errp)
+static int parallels_open(BlockDriverState *bs, int flags)
 {
    BDRVParallelsState *s = bs->opaque;
    int i;
@@ -85,16 +84,26 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,

    if (memcmp(ph.magic, HEADER_MAGIC, 16) ||
        (le32_to_cpu(ph.version) != HEADER_VERSION)) {
-        error_setg(errp, "Image not in Parallels format");
-        ret = -EINVAL;
+        ret = -EMEDIUMTYPE;
        goto fail;
    }

    bs->total_sectors = le32_to_cpu(ph.nb_sectors);

    s->tracks = le32_to_cpu(ph.tracks);
+    if (s->tracks == 0) {
+        qerror_report(ERROR_CLASS_GENERIC_ERROR,
+                      "Invalid image: Zero sectors per track");
+        ret = -EINVAL;
+        goto fail;
+    }

    s->catalog_size = le32_to_cpu(ph.catalog_entries);
+    if (s->catalog_size > INT_MAX / 4) {
+        qerror_report(ERROR_CLASS_GENERIC_ERROR, "Catalog too large");
+        ret = -EFBIG;
+        goto fail;
+    }
    s->catalog_bitmap = g_malloc(s->catalog_size * 4);

    ret = bdrv_pread(bs->file, 64, s->catalog_bitmap, s->catalog_size * 4);
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -1,623 +0,0 @@
-/*
- * Block layer qmp and info dump related functions
- *
- * Copyright (c) 2003-2008 Fabrice Bellard
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-#include "block/qapi.h"
-#include "block/block_int.h"
-#include "qmp-commands.h"
-#include "qapi-visit.h"
-#include "qapi/qmp-output-visitor.h"
-#include "qapi/qmp/types.h"
-
-BlockDeviceInfo *bdrv_block_device_info(BlockDriverState *bs)
-{
-    BlockDeviceInfo *info = g_malloc0(sizeof(*info));
-
-    info->file                   = g_strdup(bs->filename);
-    info->ro                     = bs->read_only;
-    info->drv                    = g_strdup(bs->drv->format_name);
-    info->encrypted              = bs->encrypted;
-    info->encryption_key_missing = bdrv_key_required(bs);
-
-    if (bs->node_name[0]) {
-        info->has_node_name = true;
-        info->node_name = g_strdup(bs->node_name);
-    }
-
-    if (bs->backing_file[0]) {
-        info->has_backing_file = true;
-        info->backing_file = g_strdup(bs->backing_file);
-    }
-
-    info->backing_file_depth = bdrv_get_backing_file_depth(bs);
-
-    if (bs->io_limits_enabled) {
-        ThrottleConfig cfg;
-        throttle_get_config(&bs->throttle_state, &cfg);
-        info->bps     = cfg.buckets[THROTTLE_BPS_TOTAL].avg;
-        info->bps_rd  = cfg.buckets[THROTTLE_BPS_READ].avg;
-        info->bps_wr  = cfg.buckets[THROTTLE_BPS_WRITE].avg;
-
-        info->iops    = cfg.buckets[THROTTLE_OPS_TOTAL].avg;
-        info->iops_rd = cfg.buckets[THROTTLE_OPS_READ].avg;
-        info->iops_wr = cfg.buckets[THROTTLE_OPS_WRITE].avg;
-
-        info->has_bps_max     = cfg.buckets[THROTTLE_BPS_TOTAL].max;
-        info->bps_max         = cfg.buckets[THROTTLE_BPS_TOTAL].max;
-        info->has_bps_rd_max  = cfg.buckets[THROTTLE_BPS_READ].max;
-        info->bps_rd_max      = cfg.buckets[THROTTLE_BPS_READ].max;
-        info->has_bps_wr_max  = cfg.buckets[THROTTLE_BPS_WRITE].max;
-        info->bps_wr_max      = cfg.buckets[THROTTLE_BPS_WRITE].max;
-
-        info->has_iops_max    = cfg.buckets[THROTTLE_OPS_TOTAL].max;
-        info->iops_max        = cfg.buckets[THROTTLE_OPS_TOTAL].max;
-        info->has_iops_rd_max = cfg.buckets[THROTTLE_OPS_READ].max;
-        info->iops_rd_max     = cfg.buckets[THROTTLE_OPS_READ].max;
-        info->has_iops_wr_max = cfg.buckets[THROTTLE_OPS_WRITE].max;
-        info->iops_wr_max     = cfg.buckets[THROTTLE_OPS_WRITE].max;
-
-        info->has_iops_size = cfg.op_size;
-        info->iops_size = cfg.op_size;
-    }
-
-    return info;
-}
-
-/*
- * Returns 0 on success, with *p_list either set to describe snapshot
- * information, or NULL because there are no snapshots.  Returns -errno on
- * error, with *p_list untouched.
- */
-int bdrv_query_snapshot_info_list(BlockDriverState *bs,
-                                  SnapshotInfoList **p_list,
-                                  Error **errp)
-{
-    int i, sn_count;
-    QEMUSnapshotInfo *sn_tab = NULL;
-    SnapshotInfoList *info_list, *cur_item = NULL, *head = NULL;
-    SnapshotInfo *info;
-
-    sn_count = bdrv_snapshot_list(bs, &sn_tab);
-    if (sn_count < 0) {
-        const char *dev = bdrv_get_device_name(bs);
-        switch (sn_count) {
-        case -ENOMEDIUM:
-            error_setg(errp, "Device '%s' is not inserted", dev);
-            break;
-        case -ENOTSUP:
-            error_setg(errp,
-                       "Device '%s' does not support internal snapshots",
-                       dev);
-            break;
-        default:
-            error_setg_errno(errp, -sn_count,
-                             "Can't list snapshots of device '%s'", dev);
-            break;
-        }
-        return sn_count;
-    }
-
-    for (i = 0; i < sn_count; i++) {
-        info = g_new0(SnapshotInfo, 1);
-        info->id            = g_strdup(sn_tab[i].id_str);
-        info->name          = g_strdup(sn_tab[i].name);
-        info->vm_state_size = sn_tab[i].vm_state_size;
-        info->date_sec      = sn_tab[i].date_sec;
-        info->date_nsec     = sn_tab[i].date_nsec;
-        info->vm_clock_sec  = sn_tab[i].vm_clock_nsec / 1000000000;
-        info->vm_clock_nsec = sn_tab[i].vm_clock_nsec % 1000000000;
-
-        info_list = g_new0(SnapshotInfoList, 1);
-        info_list->value = info;
-
-        /* XXX: waiting for the qapi to support qemu-queue.h types */
-        if (!cur_item) {
-            head = cur_item = info_list;
-        } else {
-            cur_item->next = info_list;
-            cur_item = info_list;
-        }
-
-    }
-
-    g_free(sn_tab);
-    *p_list = head;
-    return 0;
-}
-
-/**
- * bdrv_query_image_info:
- * @bs: block device to examine
- * @p_info: location to store image information
- * @errp: location to store error information
- *
- * Store "flat" image information in @p_info.
- *
- * "Flat" means it does *not* query backing image information,
- * i.e. (*pinfo)->has_backing_image will be set to false and
- * (*pinfo)->backing_image to NULL even when the image does in fact have
- * a backing image.
- *
- * @p_info will be set only on success. On error, store error in @errp.
- */
-void bdrv_query_image_info(BlockDriverState *bs,
-                           ImageInfo **p_info,
-                           Error **errp)
-{
-    uint64_t total_sectors;
-    const char *backing_filename;
-    char backing_filename2[1024];
-    BlockDriverInfo bdi;
-    int ret;
-    Error *err = NULL;
-    ImageInfo *info = g_new0(ImageInfo, 1);
-
-    bdrv_get_geometry(bs, &total_sectors);
-
-    info->filename        = g_strdup(bs->filename);
-    info->format          = g_strdup(bdrv_get_format_name(bs));
-    info->virtual_size    = total_sectors * 512;
-    info->actual_size     = bdrv_get_allocated_file_size(bs);
-    info->has_actual_size = info->actual_size >= 0;
-    if (bdrv_is_encrypted(bs)) {
-        info->encrypted = true;
-        info->has_encrypted = true;
-    }
-    if (bdrv_get_info(bs, &bdi) >= 0) {
-        if (bdi.cluster_size != 0) {
-            info->cluster_size = bdi.cluster_size;
-            info->has_cluster_size = true;
-        }
-        info->dirty_flag = bdi.is_dirty;
-        info->has_dirty_flag = true;
-    }
-    info->format_specific     = bdrv_get_specific_info(bs);
-    info->has_format_specific = info->format_specific != NULL;
-
-    backing_filename = bs->backing_file;
-    if (backing_filename[0] != '\0') {
-        info->backing_filename = g_strdup(backing_filename);
-        info->has_backing_filename = true;
-        bdrv_get_full_backing_filename(bs, backing_filename2,
-                                       sizeof(backing_filename2));
-
-        if (strcmp(backing_filename, backing_filename2) != 0) {
-            info->full_backing_filename =
-                        g_strdup(backing_filename2);
-            info->has_full_backing_filename = true;
-        }
-
-        if (bs->backing_format[0]) {
-            info->backing_filename_format = g_strdup(bs->backing_format);
-            info->has_backing_filename_format = true;
-        }
-    }
-
-    ret = bdrv_query_snapshot_info_list(bs, &info->snapshots, &err);
-    switch (ret) {
-    case 0:
-        if (info->snapshots) {
-            info->has_snapshots = true;
-        }
-        break;
-    /* recoverable error */
-    case -ENOMEDIUM:
-    case -ENOTSUP:
-        error_free(err);
-        break;
-    default:
-        error_propagate(errp, err);
-        qapi_free_ImageInfo(info);
-        return;
-    }
-
-    *p_info = info;
-}
-
-/* @p_info will be set only on success. */
-void bdrv_query_info(BlockDriverState *bs,
-                     BlockInfo **p_info,
-                     Error **errp)
-{
-    BlockInfo *info = g_malloc0(sizeof(*info));
-    BlockDriverState *bs0;
-    ImageInfo **p_image_info;
-    Error *local_err = NULL;
-    info->device = g_strdup(bs->device_name);
-    info->type = g_strdup("unknown");
-    info->locked = bdrv_dev_is_medium_locked(bs);
-    info->removable = bdrv_dev_has_removable_media(bs);
-
-    if (bdrv_dev_has_removable_media(bs)) {
-        info->has_tray_open = true;
-        info->tray_open = bdrv_dev_is_tray_open(bs);
-    }
-
-    if (bdrv_iostatus_is_enabled(bs)) {
-        info->has_io_status = true;
-        info->io_status = bs->iostatus;
-    }
-
-    if (!QLIST_EMPTY(&bs->dirty_bitmaps)) {
-        info->has_dirty_bitmaps = true;
-        info->dirty_bitmaps = bdrv_query_dirty_bitmaps(bs);
-    }
-
-    if (bs->drv) {
-        info->has_inserted = true;
-        info->inserted = bdrv_block_device_info(bs);
-
-        bs0 = bs;
-        p_image_info = &info->inserted->image;
-        while (1) {
-            bdrv_query_image_info(bs0, p_image_info, &local_err);
-            if (local_err) {
-                error_propagate(errp, local_err);
-                goto err;
-            }
-            if (bs0->drv && bs0->backing_hd) {
-                bs0 = bs0->backing_hd;
-                (*p_image_info)->has_backing_image = true;
-                p_image_info = &((*p_image_info)->backing_image);
-            } else {
-                break;
-            }
-        }
-    }
-
-    *p_info = info;
-    return;
-
- err:
-    qapi_free_BlockInfo(info);
-}
-
-BlockStats *bdrv_query_stats(const BlockDriverState *bs)
-{
-    BlockStats *s;
-
-    s = g_malloc0(sizeof(*s));
-
-    if (bs->device_name[0]) {
-        s->has_device = true;
-        s->device = g_strdup(bs->device_name);
-    }
-
-    s->stats = g_malloc0(sizeof(*s->stats));
-    s->stats->rd_bytes = bs->nr_bytes[BDRV_ACCT_READ];
-    s->stats->wr_bytes = bs->nr_bytes[BDRV_ACCT_WRITE];
-    s->stats->rd_operations = bs->nr_ops[BDRV_ACCT_READ];
-    s->stats->wr_operations = bs->nr_ops[BDRV_ACCT_WRITE];
-    s->stats->wr_highest_offset = bs->wr_highest_sector * BDRV_SECTOR_SIZE;
-    s->stats->flush_operations = bs->nr_ops[BDRV_ACCT_FLUSH];
-    s->stats->wr_total_time_ns = bs->total_time_ns[BDRV_ACCT_WRITE];
-    s->stats->rd_total_time_ns = bs->total_time_ns[BDRV_ACCT_READ];
-    s->stats->flush_total_time_ns = bs->total_time_ns[BDRV_ACCT_FLUSH];
-
-    if (bs->file) {
-        s->has_parent = true;
-        s->parent = bdrv_query_stats(bs->file);
-    }
-
-    if (bs->backing_hd) {
-        s->has_backing = true;
-        s->backing = bdrv_query_stats(bs->backing_hd);
-    }
-
-    return s;
-}
-
-BlockInfoList *qmp_query_block(Error **errp)
-{
-    BlockInfoList *head = NULL, **p_next = &head;
-    BlockDriverState *bs = NULL;
-    Error *local_err = NULL;
-
-     while ((bs = bdrv_next(bs))) {
-        BlockInfoList *info = g_malloc0(sizeof(*info));
-        bdrv_query_info(bs, &info->value, &local_err);
-        if (local_err) {
-            error_propagate(errp, local_err);
-            goto err;
-        }
-
-        *p_next = info;
-        p_next = &info->next;
-    }
-
-    return head;
-
- err:
-    qapi_free_BlockInfoList(head);
-    return NULL;
-}
-
-BlockStatsList *qmp_query_blockstats(Error **errp)
-{
-    BlockStatsList *head = NULL, **p_next = &head;
-    BlockDriverState *bs = NULL;
-
-     while ((bs = bdrv_next(bs))) {
-        BlockStatsList *info = g_malloc0(sizeof(*info));
-        info->value = bdrv_query_stats(bs);
-
-        *p_next = info;
-        p_next = &info->next;
-    }
-
-    return head;
-}
-
-#define NB_SUFFIXES 4
-
-static char *get_human_readable_size(char *buf, int buf_size, int64_t size)
-{
-    static const char suffixes[NB_SUFFIXES] = "KMGT";
-    int64_t base;
-    int i;
-
-    if (size <= 999) {
-        snprintf(buf, buf_size, "%" PRId64, size);
-    } else {
-        base = 1024;
-        for (i = 0; i < NB_SUFFIXES; i++) {
-            if (size < (10 * base)) {
-                snprintf(buf, buf_size, "%0.1f%c",
-                         (double)size / base,
-                         suffixes[i]);
-                break;
-            } else if (size < (1000 * base) || i == (NB_SUFFIXES - 1)) {
-                snprintf(buf, buf_size, "%" PRId64 "%c",
-                         ((size + (base >> 1)) / base),
-                         suffixes[i]);
-                break;
-            }
-            base = base * 1024;
-        }
-    }
-    return buf;
-}
-
-void bdrv_snapshot_dump(fprintf_function func_fprintf, void *f,
-                        QEMUSnapshotInfo *sn)
-{
-    char buf1[128], date_buf[128], clock_buf[128];
-    struct tm tm;
-    time_t ti;
-    int64_t secs;
-
-    if (!sn) {
-        func_fprintf(f,
-                     "%-10s%-20s%7s%20s%15s",
-                     "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
-    } else {
-        ti = sn->date_sec;
-        localtime_r(&ti, &tm);
-        strftime(date_buf, sizeof(date_buf),
-                 "%Y-%m-%d %H:%M:%S", &tm);
-        secs = sn->vm_clock_nsec / 1000000000;
-        snprintf(clock_buf, sizeof(clock_buf),
-                 "%02d:%02d:%02d.%03d",
-                 (int)(secs / 3600),
-                 (int)((secs / 60) % 60),
-                 (int)(secs % 60),
-                 (int)((sn->vm_clock_nsec / 1000000) % 1000));
-        func_fprintf(f,
-                     "%-10s%-20s%7s%20s%15s",
-                     sn->id_str, sn->name,
-                     get_human_readable_size(buf1, sizeof(buf1),
-                                             sn->vm_state_size),
-                     date_buf,
-                     clock_buf);
-    }
-}
-
-static void dump_qdict(fprintf_function func_fprintf, void *f, int indentation,
-                       QDict *dict);
-static void dump_qlist(fprintf_function func_fprintf, void *f, int indentation,
-                       QList *list);
-
-static void dump_qobject(fprintf_function func_fprintf, void *f,
-                         int comp_indent, QObject *obj)
-{
-    switch (qobject_type(obj)) {
-        case QTYPE_QINT: {
-            QInt *value = qobject_to_qint(obj);
-            func_fprintf(f, "%" PRId64, qint_get_int(value));
-            break;
-        }
-        case QTYPE_QSTRING: {
-            QString *value = qobject_to_qstring(obj);
-            func_fprintf(f, "%s", qstring_get_str(value));
-            break;
-        }
-        case QTYPE_QDICT: {
-            QDict *value = qobject_to_qdict(obj);
-            dump_qdict(func_fprintf, f, comp_indent, value);
-            break;
-        }
-        case QTYPE_QLIST: {
-            QList *value = qobject_to_qlist(obj);
-            dump_qlist(func_fprintf, f, comp_indent, value);
-            break;
-        }
-        case QTYPE_QFLOAT: {
-            QFloat *value = qobject_to_qfloat(obj);
-            func_fprintf(f, "%g", qfloat_get_double(value));
-            break;
-        }
-        case QTYPE_QBOOL: {
-            QBool *value = qobject_to_qbool(obj);
-            func_fprintf(f, "%s", qbool_get_int(value) ? "true" : "false");
-            break;
-        }
-        case QTYPE_QERROR: {
-            QString *value = qerror_human((QError *)obj);
-            func_fprintf(f, "%s", qstring_get_str(value));
-            break;
-        }
-        case QTYPE_NONE:
-            break;
-        case QTYPE_MAX:
-        default:
-            abort();
-    }
-}
-
-static void dump_qlist(fprintf_function func_fprintf, void *f, int indentation,
-                       QList *list)
-{
-    const QListEntry *entry;
-    int i = 0;
-
-    for (entry = qlist_first(list); entry; entry = qlist_next(entry), i++) {
-        qtype_code type = qobject_type(entry->value);
-        bool composite = (type == QTYPE_QDICT || type == QTYPE_QLIST);
-        const char *format = composite ? "%*s[%i]:\n" : "%*s[%i]: ";
-
-        func_fprintf(f, format, indentation * 4, "", i);
-        dump_qobject(func_fprintf, f, indentation + 1, entry->value);
-        if (!composite) {
-            func_fprintf(f, "\n");
-        }
-    }
-}
-
-static void dump_qdict(fprintf_function func_fprintf, void *f, int indentation,
-                       QDict *dict)
-{
-    const QDictEntry *entry;
-
-    for (entry = qdict_first(dict); entry; entry = qdict_next(dict, entry)) {
-        qtype_code type = qobject_type(entry->value);
-        bool composite = (type == QTYPE_QDICT || type == QTYPE_QLIST);
-        const char *format = composite ? "%*s%s:\n" : "%*s%s: ";
-        char key[strlen(entry->key) + 1];
-        int i;
-
-        /* replace dashes with spaces in key (variable) names */
-        for (i = 0; entry->key[i]; i++) {
-            key[i] = entry->key[i] == '-' ? ' ' : entry->key[i];
-        }
-        key[i] = 0;
-
-        func_fprintf(f, format, indentation * 4, "", key);
-        dump_qobject(func_fprintf, f, indentation + 1, entry->value);
-        if (!composite) {
-            func_fprintf(f, "\n");
-        }
-    }
-}
-
-void bdrv_image_info_specific_dump(fprintf_function func_fprintf, void *f,
-                                   ImageInfoSpecific *info_spec)
-{
-    Error *local_err = NULL;
-    QmpOutputVisitor *ov = qmp_output_visitor_new();
-    QObject *obj, *data;
-
-    visit_type_ImageInfoSpecific(qmp_output_get_visitor(ov), &info_spec, NULL,
-                                 &local_err);
-    obj = qmp_output_get_qobject(ov);
-    assert(qobject_type(obj) == QTYPE_QDICT);
-    data = qdict_get(qobject_to_qdict(obj), "data");
-    dump_qobject(func_fprintf, f, 1, data);
-    qmp_output_visitor_cleanup(ov);
-}
-
-void bdrv_image_info_dump(fprintf_function func_fprintf, void *f,
-                          ImageInfo *info)
-{
-    char size_buf[128], dsize_buf[128];
-    if (!info->has_actual_size) {
-        snprintf(dsize_buf, sizeof(dsize_buf), "unavailable");
-    } else {
-        get_human_readable_size(dsize_buf, sizeof(dsize_buf),
-                                info->actual_size);
-    }
-    get_human_readable_size(size_buf, sizeof(size_buf), info->virtual_size);
-    func_fprintf(f,
-                 "image: %s\n"
-                 "file format: %s\n"
-                 "virtual size: %s (%" PRId64 " bytes)\n"
-                 "disk size: %s\n",
-                 info->filename, info->format, size_buf,
-                 info->virtual_size,
-                 dsize_buf);
-
-    if (info->has_encrypted && info->encrypted) {
-        func_fprintf(f, "encrypted: yes\n");
-    }
-
-    if (info->has_cluster_size) {
-        func_fprintf(f, "cluster_size: %" PRId64 "\n",
-                       info->cluster_size);
-    }
-
-    if (info->has_dirty_flag && info->dirty_flag) {
-        func_fprintf(f, "cleanly shut down: no\n");
-    }
-
-    if (info->has_backing_filename) {
-        func_fprintf(f, "backing file: %s", info->backing_filename);
-        if (info->has_full_backing_filename) {
-            func_fprintf(f, " (actual path: %s)", info->full_backing_filename);
-        }
-        func_fprintf(f, "\n");
-        if (info->has_backing_filename_format) {
-            func_fprintf(f, "backing file format: %s\n",
-                         info->backing_filename_format);
-        }
-    }
-
-    if (info->has_snapshots) {
-        SnapshotInfoList *elem;
-
-        func_fprintf(f, "Snapshot list:\n");
-        bdrv_snapshot_dump(func_fprintf, f, NULL);
-        func_fprintf(f, "\n");
-
-        /* Ideally bdrv_snapshot_dump() would operate on SnapshotInfoList but
-         * we convert to the block layer's native QEMUSnapshotInfo for now.
-         */
-        for (elem = info->snapshots; elem; elem = elem->next) {
-            QEMUSnapshotInfo sn = {
-                .vm_state_size = elem->value->vm_state_size,
-                .date_sec = elem->value->date_sec,
-                .date_nsec = elem->value->date_nsec,
-                .vm_clock_nsec = elem->value->vm_clock_sec * 1000000000ULL +
-                                 elem->value->vm_clock_nsec,
-            };
-
-            pstrcpy(sn.id_str, sizeof(sn.id_str), elem->value->id);
-            pstrcpy(sn.name, sizeof(sn.name), elem->value->name);
-            bdrv_snapshot_dump(func_fprintf, f, &sn);
-            func_fprintf(f, "\n");
-        }
-    }
-
-    if (info->has_format_specific) {
-        func_fprintf(f, "Format specific information:\n");
-        bdrv_image_info_specific_dump(func_fprintf, f, info->format_specific);
-    }
-}
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -25,7 +25,7 @@
 #include "block/block_int.h"
 #include "qemu/module.h"
 #include <zlib.h>
-#include "qemu/aes.h"
+#include "block/aes.h"
 #include "migration/migration.h"

 /**************************************************************/
@@ -60,7 +60,7 @@ typedef struct BDRVQcowState {
    int cluster_sectors;
    int l2_bits;
    int l2_size;
-    int l1_size;
+    unsigned int l1_size;
    uint64_t cluster_offset_mask;
    uint64_t l1_table_offset;
    uint64_t *l1_table;
@@ -92,8 +92,7 @@ static int qcow_probe(const uint8_t *buf, int buf_size, const char *filename)
        return 0;
 }

-static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
-                     Error **errp)
+static int qcow_open(BlockDriverState *bs, int flags)
 {
    BDRVQcowState *s = bs->opaque;
    int len, i, shift, ret;
@@ -113,26 +112,41 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
    be64_to_cpus(&header.l1_table_offset);

    if (header.magic != QCOW_MAGIC) {
-        error_setg(errp, "Image not in qcow format");
-        ret = -EINVAL;
+        ret = -EMEDIUMTYPE;
        goto fail;
    }
    if (header.version != QCOW_VERSION) {
        char version[64];
        snprintf(version, sizeof(version), "QCOW version %d", header.version);
-        error_set(errp, QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
-                  bs->device_name, "qcow", version);
+        qerror_report(QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
+            bs->device_name, "qcow", version);
        ret = -ENOTSUP;
        goto fail;
    }

-    if (header.size <= 1 || header.cluster_bits < 9) {
-        error_setg(errp, "invalid value in qcow header");
+    if (header.size <= 1) {
+        qerror_report(ERROR_CLASS_GENERIC_ERROR,
+                      "Image size is too small (must be at least 2 bytes)");
        ret = -EINVAL;
        goto fail;
    }
+    if (header.cluster_bits < 9 || header.cluster_bits > 16) {
+        qerror_report(ERROR_CLASS_GENERIC_ERROR,
+                      "Cluster size must be between 512 and 64k");
+        ret = -EINVAL;
+        goto fail;
+    }
+
+    /* l2_bits specifies number of entries; storing a uint64_t in each entry,
+     * so bytes = num_entries << 3. */
+    if (header.l2_bits < 9 - 3 || header.l2_bits > 16 - 3) {
+        qerror_report(ERROR_CLASS_GENERIC_ERROR,
+                      "L2 table size must be between 512 and 64k");
+        ret = -EINVAL;
+        goto fail;
+    }
+
    if (header.crypt_method > QCOW_CRYPT_AES) {
-        error_setg(errp, "invalid encryption method in qcow header");
        ret = -EINVAL;
        goto fail;
    }
@@ -150,7 +164,19 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,

    /* read the level 1 table */
    shift = s->cluster_bits + s->l2_bits;
-    s->l1_size = (header.size + (1LL << shift) - 1) >> shift;
+    if (header.size > UINT64_MAX - (1LL << shift)) {
+        qerror_report(ERROR_CLASS_GENERIC_ERROR, "Image too large");
+        ret = -EINVAL;
+        goto fail;
+    } else {
+        uint64_t l1_size = (header.size + (1LL << shift) - 1) >> shift;
+        if (l1_size > INT_MAX / sizeof(uint64_t)) {
+            qerror_report(ERROR_CLASS_GENERIC_ERROR, "Image too large");
+            ret = -EINVAL;
+            goto fail;
+        }
+        s->l1_size = l1_size;
+    }

    s->l1_table_offset = header.l1_table_offset;
    s->l1_table = g_malloc(s->l1_size * sizeof(uint64_t));
@@ -399,7 +425,7 @@ static uint64_t get_cluster_offset(BlockDriverState *bs,
    return cluster_offset;
 }

-static int64_t coroutine_fn qcow_co_get_block_status(BlockDriverState *bs,
+static int coroutine_fn qcow_co_is_allocated(BlockDriverState *bs,
        int64_t sector_num, int nb_sectors, int *pnum)
 {
    BDRVQcowState *s = bs->opaque;
@@ -414,14 +440,7 @@ static int64_t coroutine_fn qcow_co_get_block_status(BlockDriverState *bs,
    if (n > nb_sectors)
        n = nb_sectors;
    *pnum = n;
-    if (!cluster_offset) {
-        return 0;
-    }
-    if ((cluster_offset & QCOW_OFLAG_COMPRESSED) || s->crypt_method) {
-        return BDRV_BLOCK_DATA;
-    }
-    cluster_offset |= (index_in_cluster << BDRV_SECTOR_BITS);
-    return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | cluster_offset;
+    return (cluster_offset != 0);
 }

 static int decompress_buffer(uint8_t *out_buf, int out_buf_size,
@@ -662,8 +681,7 @@ static void qcow_close(BlockDriverState *bs)
    error_free(s->migration_blocker);
 }

-static int qcow_create(const char *filename, QEMUOptionParameter *options,
-                       Error **errp)
+static int qcow_create(const char *filename, QEMUOptionParameter *options)
 {
    int header_size, backing_filename_len, l1_size, shift, i;
    QCowHeader header;
@@ -671,7 +689,6 @@ static int qcow_create(const char *filename, QEMUOptionParameter *options,
    int64_t total_size = 0;
    const char *backing_file = NULL;
    int flags = 0;
-    Error *local_err = NULL;
    int ret;
    BlockDriverState *qcow_bs;

@@ -687,17 +704,13 @@ static int qcow_create(const char *filename, QEMUOptionParameter *options,
        options++;
    }

-    ret = bdrv_create_file(filename, options, &local_err);
+    ret = bdrv_create_file(filename, options);
    if (ret < 0) {
-        error_propagate(errp, local_err);
        return ret;
    }

-    qcow_bs = NULL;
-    ret = bdrv_open(&qcow_bs, filename, NULL, NULL,
-                    BDRV_O_RDWR | BDRV_O_PROTOCOL, NULL, &local_err);
+    ret = bdrv_file_open(&qcow_bs, filename, BDRV_O_RDWR);
    if (ret < 0) {
-        error_propagate(errp, local_err);
        return ret;
    }

@@ -768,7 +781,7 @@ static int qcow_create(const char *filename, QEMUOptionParameter *options,
    g_free(tmp);
    ret = 0;
 exit:
-    bdrv_unref(qcow_bs);
+    bdrv_delete(qcow_bs);
    return ret;
 }

@@ -804,21 +817,8 @@ static int qcow_write_compressed(BlockDriverState *bs, int64_t sector_num,
    uint8_t *out_buf;
    uint64_t cluster_offset;

-    if (nb_sectors != s->cluster_sectors) {
-        ret = -EINVAL;
-
-        /* Zero-pad last write if image size is not cluster aligned */
-        if (sector_num + nb_sectors == bs->total_sectors &&
-            nb_sectors < s->cluster_sectors) {
-            uint8_t *pad_buf = qemu_blockalign(bs, s->cluster_size);
-            memset(pad_buf, 0, s->cluster_size);
-            memcpy(pad_buf, buf, nb_sectors * BDRV_SECTOR_SIZE);
-            ret = qcow_write_compressed(bs, sector_num,
-                                        pad_buf, s->cluster_sectors);
-            qemu_vfree(pad_buf);
-        }
-        return ret;
-    }
+    if (nb_sectors != s->cluster_sectors)
+        return -EINVAL;

    out_buf = g_malloc(s->cluster_size + (s->cluster_size / 1000) + 128);

@@ -909,11 +909,10 @@ static BlockDriver bdrv_qcow = {
    .bdrv_close		= qcow_close,
    .bdrv_reopen_prepare = qcow_reopen_prepare,
    .bdrv_create	= qcow_create,
-    .bdrv_has_zero_init     = bdrv_has_zero_init_1,

    .bdrv_co_readv          = qcow_co_readv,
    .bdrv_co_writev         = qcow_co_writev,
-    .bdrv_co_get_block_status   = qcow_co_get_block_status,
+    .bdrv_co_is_allocated   = qcow_co_is_allocated,

    .bdrv_set_key           = qcow_set_key,
    .bdrv_make_empty        = qcow_make_empty,
--- a/block/qcow2-cache.c
+++ b/block/qcow2-cache.c
@@ -114,21 +114,6 @@ static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i)
        return ret;
    }

-    if (c == s->refcount_block_cache) {
-        ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_REFCOUNT_BLOCK,
-                c->entries[i].offset, s->cluster_size);
-    } else if (c == s->l2_table_cache) {
-        ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_ACTIVE_L2,
-                c->entries[i].offset, s->cluster_size);
-    } else {
-        ret = qcow2_pre_write_overlap_check(bs, 0,
-                c->entries[i].offset, s->cluster_size);
-    }
-
-    if (ret < 0) {
-        return ret;
-    }
-
    if (c == s->refcount_block_cache) {
        BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_UPDATE_PART);
    } else if (c == s->l2_table_cache) {
@@ -200,24 +185,6 @@ void qcow2_cache_depends_on_flush(Qcow2Cache *c)
    c->depends_on_flush = true;
 }

-int qcow2_cache_empty(BlockDriverState *bs, Qcow2Cache *c)
-{
-    int ret, i;
-
-    ret = qcow2_cache_flush(bs, c);
-    if (ret < 0) {
-        return ret;
-    }
-
-    for (i = 0; i < c->size; i++) {
-        assert(c->entries[i].ref == 0);
-        c->entries[i].offset = 0;
-        c->entries[i].cache_hits = 0;
-    }
-
-    return 0;
-}
-
 static int qcow2_cache_find_entry_to_replace(Qcow2Cache *c)
 {
    int i;
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
--- a/block/qcow2-snapshot.c
+++ b/block/qcow2-snapshot.c
@@ -26,31 +26,6 @@
 #include "block/block_int.h"
 #include "block/qcow2.h"

-typedef struct QEMU_PACKED QCowSnapshotHeader {
-    /* header is 8 byte aligned */
-    uint64_t l1_table_offset;
-
-    uint32_t l1_size;
-    uint16_t id_str_size;
-    uint16_t name_size;
-
-    uint32_t date_sec;
-    uint32_t date_nsec;
-
-    uint64_t vm_clock_nsec;
-
-    uint32_t vm_state_size;
-    uint32_t extra_data_size; /* for extension */
-    /* extra data follows */
-    /* id_str follows */
-    /* name follows  */
-} QCowSnapshotHeader;
-
-typedef struct QEMU_PACKED QCowSnapshotExtraData {
-    uint64_t vm_state_size_large;
-    uint64_t disk_size;
-} QCowSnapshotExtraData;
-
 void qcow2_free_snapshots(BlockDriverState *bs)
 {
    BDRVQcowState *s = bs->opaque;
@@ -141,8 +116,14 @@ int qcow2_read_snapshots(BlockDriverState *bs)
        }
        offset += name_size;
        sn->name[name_size] = '\0';
+
+        if (offset - s->snapshots_offset > QCOW_MAX_SNAPSHOTS_SIZE) {
+            ret = -EFBIG;
+            goto fail;
+        }
    }

+    assert(offset - s->snapshots_offset <= INT_MAX);
    s->snapshots_size = offset - s->snapshots_offset;
    return 0;

@@ -163,7 +144,7 @@ static int qcow2_write_snapshots(BlockDriverState *bs)
        uint32_t nb_snapshots;
        uint64_t snapshots_offset;
    } QEMU_PACKED header_data;
-    int64_t offset, snapshots_offset;
+    int64_t offset, snapshots_offset = 0;
    int ret;

    /* compute the size of the snapshots */
@@ -175,29 +156,27 @@ static int qcow2_write_snapshots(BlockDriverState *bs)
        offset += sizeof(extra);
        offset += strlen(sn->id_str);
        offset += strlen(sn->name);
+
+        if (offset > QCOW_MAX_SNAPSHOTS_SIZE) {
+            ret = -EFBIG;
+            goto fail;
+        }
    }
+
+    assert(offset <= INT_MAX);
    snapshots_size = offset;

    /* Allocate space for the new snapshot list */
    snapshots_offset = qcow2_alloc_clusters(bs, snapshots_size);
    offset = snapshots_offset;
    if (offset < 0) {
-        ret = offset;
-        goto fail;
+        return offset;
    }
    ret = bdrv_flush(bs);
    if (ret < 0) {
-        goto fail;
+        return ret;
    }

-    /* The snapshot list position has not yet been updated, so these clusters
-     * must indeed be completely free */
-    ret = qcow2_pre_write_overlap_check(bs, 0, offset, snapshots_size);
-    if (ret < 0) {
-        goto fail;
-    }
-
-
    /* Write all snapshots to the new list */
    for(i = 0; i < s->nb_snapshots; i++) {
        sn = s->snapshots + i;
@@ -220,7 +199,6 @@ static int qcow2_write_snapshots(BlockDriverState *bs)

        id_str_size = strlen(sn->id_str);
        name_size = strlen(sn->name);
-        assert(id_str_size <= UINT16_MAX && name_size <= UINT16_MAX);
        h.id_str_size = cpu_to_be16(id_str_size);
        h.name_size = cpu_to_be16(name_size);
        offset = align_offset(offset, 8);
@@ -272,17 +250,12 @@ static int qcow2_write_snapshots(BlockDriverState *bs)
    }

    /* free the old snapshot table */
-    qcow2_free_clusters(bs, s->snapshots_offset, s->snapshots_size,
-                        QCOW2_DISCARD_SNAPSHOT);
+    qcow2_free_clusters(bs, s->snapshots_offset, s->snapshots_size);
    s->snapshots_offset = snapshots_offset;
    s->snapshots_size = snapshots_size;
    return 0;

 fail:
-    if (snapshots_offset > 0) {
-        qcow2_free_clusters(bs, snapshots_offset, snapshots_size,
-                            QCOW2_DISCARD_ALWAYS);
-    }
    return ret;
 }

@@ -291,8 +264,7 @@ static void find_new_snapshot_id(BlockDriverState *bs,
 {
    BDRVQcowState *s = bs->opaque;
    QCowSnapshot *sn;
-    int i;
-    unsigned long id, id_max = 0;
+    int i, id, id_max = 0;

    for(i = 0; i < s->nb_snapshots; i++) {
        sn = s->snapshots + i;
@@ -300,50 +272,34 @@ static void find_new_snapshot_id(BlockDriverState *bs,
        if (id > id_max)
            id_max = id;
    }
-    snprintf(id_str, id_str_size, "%lu", id_max + 1);
+    snprintf(id_str, id_str_size, "%d", id_max + 1);
 }

-static int find_snapshot_by_id_and_name(BlockDriverState *bs,
-                                        const char *id,
-                                        const char *name)
+static int find_snapshot_by_id(BlockDriverState *bs, const char *id_str)
 {
    BDRVQcowState *s = bs->opaque;
    int i;

-    if (id && name) {
-        for (i = 0; i < s->nb_snapshots; i++) {
-            if (!strcmp(s->snapshots[i].id_str, id) &&
-                !strcmp(s->snapshots[i].name, name)) {
-                return i;
-            }
-        }
-    } else if (id) {
-        for (i = 0; i < s->nb_snapshots; i++) {
-            if (!strcmp(s->snapshots[i].id_str, id)) {
-                return i;
-            }
-        }
-    } else if (name) {
-        for (i = 0; i < s->nb_snapshots; i++) {
-            if (!strcmp(s->snapshots[i].name, name)) {
-                return i;
-            }
-        }
+    for(i = 0; i < s->nb_snapshots; i++) {
+        if (!strcmp(s->snapshots[i].id_str, id_str))
+            return i;
    }
-
    return -1;
 }

-static int find_snapshot_by_id_or_name(BlockDriverState *bs,
-                                       const char *id_or_name)
+static int find_snapshot_by_id_or_name(BlockDriverState *bs, const char *name)
 {
-    int ret;
+    BDRVQcowState *s = bs->opaque;
+    int i, ret;

-    ret = find_snapshot_by_id_and_name(bs, id_or_name, NULL);
-    if (ret >= 0) {
+    ret = find_snapshot_by_id(bs, name);
+    if (ret >= 0)
        return ret;
+    for(i = 0; i < s->nb_snapshots; i++) {
+        if (!strcmp(s->snapshots[i].name, name))
+            return i;
    }
-    return find_snapshot_by_id_and_name(bs, NULL, id_or_name);
+    return -1;
 }

 /* if no id is provided, a new one is constructed */
@@ -357,6 +313,10 @@ int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
    uint64_t *l1_table = NULL;
    int64_t l1_table_offset;

+    if (s->nb_snapshots >= QCOW_MAX_SNAPSHOTS) {
+        return -EFBIG;
+    }
+
    memset(sn, 0, sizeof(*sn));

    /* Generate an ID if it wasn't passed */
@@ -365,7 +325,7 @@ int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
    }

    /* Check that the ID is unique */
-    if (find_snapshot_by_id_and_name(bs, sn_info->id_str, NULL) >= 0) {
+    if (find_snapshot_by_id(bs, sn_info->id_str) >= 0) {
        return -EEXIST;
    }

@@ -394,12 +354,6 @@ int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
        l1_table[i] = cpu_to_be64(s->l1_table[i]);
    }

-    ret = qcow2_pre_write_overlap_check(bs, 0, sn->l1_table_offset,
-                                        s->l1_size * sizeof(uint64_t));
-    if (ret < 0) {
-        goto fail;
-    }
-
    ret = bdrv_pwrite(bs->file, sn->l1_table_offset, l1_table,
                      s->l1_size * sizeof(uint64_t));
    if (ret < 0) {
@@ -419,6 +373,11 @@ int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
        goto fail;
    }

+    ret = bdrv_flush(bs);
+    if (ret < 0) {
+        goto fail;
+    }
+
    /* Append the new snapshot to the snapshot list */
    new_snapshot_list = g_malloc((s->nb_snapshots + 1) * sizeof(QCowSnapshot));
    if (s->snapshots) {
@@ -433,19 +392,11 @@ int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
    if (ret < 0) {
        g_free(s->snapshots);
        s->snapshots = old_snapshot_list;
-        s->nb_snapshots--;
        goto fail;
    }

    g_free(old_snapshot_list);

-    /* The VM state isn't needed any more in the active L1 table; in fact, it
-     * hurts by causing expensive COW for the next snapshot. */
-    qcow2_discard_clusters(bs, qcow2_vm_state_offset(s),
-                           align_offset(sn->vm_state_size, s->cluster_size)
-                                >> BDRV_SECTOR_BITS,
-                           QCOW2_DISCARD_NEVER);
-
 #ifdef DEBUG_ALLOC
    {
      BdrvCheckResult result = {0};
@@ -520,12 +471,6 @@ int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
        goto fail;
    }

-    ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_ACTIVE_L1,
-                                        s->l1_table_offset, cur_l1_bytes);
-    if (ret < 0) {
-        goto fail;
-    }
-
    ret = bdrv_pwrite_sync(bs->file, s->l1_table_offset, sn_l1_table,
                           cur_l1_bytes);
    if (ret < 0) {
@@ -582,19 +527,15 @@ fail:
    return ret;
 }

-int qcow2_snapshot_delete(BlockDriverState *bs,
-                          const char *snapshot_id,
-                          const char *name,
-                          Error **errp)
+int qcow2_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
 {
    BDRVQcowState *s = bs->opaque;
    QCowSnapshot sn;
    int snapshot_index, ret;

    /* Search the snapshot */
-    snapshot_index = find_snapshot_by_id_and_name(bs, snapshot_id, name);
+    snapshot_index = find_snapshot_by_id_or_name(bs, snapshot_id);
    if (snapshot_index < 0) {
-        error_setg(errp, "Can't find the snapshot");
        return -ENOENT;
    }
    sn = s->snapshots[snapshot_index];
@@ -606,8 +547,6 @@ int qcow2_snapshot_delete(BlockDriverState *bs,
    s->nb_snapshots--;
    ret = qcow2_write_snapshots(bs);
    if (ret < 0) {
-        error_setg_errno(errp, -ret,
-                         "Failed to remove snapshot from snapshot list");
        return ret;
    }

@@ -625,17 +564,13 @@ int qcow2_snapshot_delete(BlockDriverState *bs,
    ret = qcow2_update_snapshot_refcount(bs, sn.l1_table_offset,
                                         sn.l1_size, -1);
    if (ret < 0) {
-        error_setg_errno(errp, -ret, "Failed to free the cluster and L1 table");
        return ret;
    }
-    qcow2_free_clusters(bs, sn.l1_table_offset, sn.l1_size * sizeof(uint64_t),
-                        QCOW2_DISCARD_SNAPSHOT);
+    qcow2_free_clusters(bs, sn.l1_table_offset, sn.l1_size * sizeof(uint64_t));

    /* must update the copied flag on the current cluster offsets */
    ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 0);
    if (ret < 0) {
-        error_setg_errno(errp, -ret,
-                         "Failed to update snapshot status in disk");
        return ret;
    }

@@ -677,10 +612,7 @@ int qcow2_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
    return s->nb_snapshots;
 }

-int qcow2_snapshot_load_tmp(BlockDriverState *bs,
-                            const char *snapshot_id,
-                            const char *name,
-                            Error **errp)
+int qcow2_snapshot_load_tmp(BlockDriverState *bs, const char *snapshot_name)
 {
    int i, snapshot_index;
    BDRVQcowState *s = bs->opaque;
@@ -692,21 +624,22 @@ int qcow2_snapshot_load_tmp(BlockDriverState *bs,
    assert(bs->read_only);

    /* Search the snapshot */
-    snapshot_index = find_snapshot_by_id_and_name(bs, snapshot_id, name);
+    snapshot_index = find_snapshot_by_id_or_name(bs, snapshot_name);
    if (snapshot_index < 0) {
-        error_setg(errp,
-                   "Can't find snapshot");
        return -ENOENT;
    }
    sn = &s->snapshots[snapshot_index];

    /* Allocate and read in the snapshot's L1 table */
-    new_l1_bytes = s->l1_size * sizeof(uint64_t);
+    if (sn->l1_size > QCOW_MAX_L1_SIZE) {
+        error_report("Snapshot L1 table too large");
+        return -EFBIG;
+    }
+    new_l1_bytes = sn->l1_size * sizeof(uint64_t);
    new_l1_table = g_malloc0(align_offset(new_l1_bytes, 512));

    ret = bdrv_pread(bs->file, sn->l1_table_offset, new_l1_table, new_l1_bytes);
    if (ret < 0) {
-        error_setg(errp, "Failed to read l1 table for snapshot");
        g_free(new_l1_table);
        return ret;
    }
--- a/block/qcow2.c
+++ b/block/qcow2.c
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -25,7 +25,7 @@
 #ifndef BLOCK_QCOW2_H
 #define BLOCK_QCOW2_H

-#include "qemu/aes.h"
+#include "block/aes.h"
 #include "block/coroutine.h"

 //#define DEBUG_ALLOC
@@ -38,13 +38,26 @@
 #define QCOW_CRYPT_AES  1

 #define QCOW_MAX_CRYPT_CLUSTERS 32
+#define QCOW_MAX_SNAPSHOTS 65536
+
+/* 8 MB refcount table is enough for 2 PB images at 64k cluster size
+ * (128 GB for 512 byte clusters, 2 EB for 2 MB clusters) */
+#define QCOW_MAX_REFTABLE_SIZE 0x800000
+
+/* 32 MB L1 table is enough for 2 PB images at 64k cluster size
+ * (128 GB for 512 byte clusters, 2 EB for 2 MB clusters) */
+#define QCOW_MAX_L1_SIZE 0x2000000
+
+/* Allow for an average of 1k per snapshot table entry, should be plenty of
+ * space for snapshot names and IDs */
+#define QCOW_MAX_SNAPSHOTS_SIZE (1024 * QCOW_MAX_SNAPSHOTS)

 /* indicate that the refcount of the referenced cluster is exactly one. */
-#define QCOW_OFLAG_COPIED     (1ULL << 63)
+#define QCOW_OFLAG_COPIED     (1LL << 63)
 /* indicate that the cluster is compressed (they never have the copied flag) */
-#define QCOW_OFLAG_COMPRESSED (1ULL << 62)
+#define QCOW_OFLAG_COMPRESSED (1LL << 62)
 /* The cluster reads as all zeros */
-#define QCOW_OFLAG_ZERO (1ULL << 0)
+#define QCOW_OFLAG_ZERO (1LL << 0)

 #define REFCOUNT_SHIFT 1 /* refcount size is 2 bytes */

@@ -58,21 +71,6 @@

 #define DEFAULT_CLUSTER_SIZE 65536

-
-#define QCOW2_OPT_LAZY_REFCOUNTS "lazy-refcounts"
-#define QCOW2_OPT_DISCARD_REQUEST "pass-discard-request"
-#define QCOW2_OPT_DISCARD_SNAPSHOT "pass-discard-snapshot"
-#define QCOW2_OPT_DISCARD_OTHER "pass-discard-other"
-#define QCOW2_OPT_OVERLAP "overlap-check"
-#define QCOW2_OPT_OVERLAP_MAIN_HEADER "overlap-check.main-header"
-#define QCOW2_OPT_OVERLAP_ACTIVE_L1 "overlap-check.active-l1"
-#define QCOW2_OPT_OVERLAP_ACTIVE_L2 "overlap-check.active-l2"
-#define QCOW2_OPT_OVERLAP_REFCOUNT_TABLE "overlap-check.refcount-table"
-#define QCOW2_OPT_OVERLAP_REFCOUNT_BLOCK "overlap-check.refcount-block"
-#define QCOW2_OPT_OVERLAP_SNAPSHOT_TABLE "overlap-check.snapshot-table"
-#define QCOW2_OPT_OVERLAP_INACTIVE_L1 "overlap-check.inactive-l1"
-#define QCOW2_OPT_OVERLAP_INACTIVE_L2 "overlap-check.inactive-l2"
-
 typedef struct QCowHeader {
    uint32_t magic;
    uint32_t version;
@@ -95,7 +93,33 @@ typedef struct QCowHeader {

    uint32_t refcount_order;
    uint32_t header_length;
-} QEMU_PACKED QCowHeader;
+} QCowHeader;
+
+typedef struct QEMU_PACKED QCowSnapshotHeader {
+    /* header is 8 byte aligned */
+    uint64_t l1_table_offset;
+
+    uint32_t l1_size;
+    uint16_t id_str_size;
+    uint16_t name_size;
+
+    uint32_t date_sec;
+    uint32_t date_nsec;
+
+    uint64_t vm_clock_nsec;
+
+    uint32_t vm_state_size;
+    uint32_t extra_data_size; /* for extension */
+    /* extra data follows */
+    /* id_str follows */
+    /* name follows  */
+} QCowSnapshotHeader;
+
+typedef struct QEMU_PACKED QCowSnapshotExtraData {
+    uint64_t vm_state_size_large;
+    uint64_t disk_size;
+} QCowSnapshotExtraData;
+

 typedef struct QCowSnapshot {
    uint64_t l1_table_offset;
@@ -128,12 +152,9 @@ enum {
 /* Incompatible feature bits */
 enum {
    QCOW2_INCOMPAT_DIRTY_BITNR   = 0,
-    QCOW2_INCOMPAT_CORRUPT_BITNR = 1,
    QCOW2_INCOMPAT_DIRTY         = 1 << QCOW2_INCOMPAT_DIRTY_BITNR,
-    QCOW2_INCOMPAT_CORRUPT       = 1 << QCOW2_INCOMPAT_CORRUPT_BITNR,

-    QCOW2_INCOMPAT_MASK          = QCOW2_INCOMPAT_DIRTY
-                                 | QCOW2_INCOMPAT_CORRUPT,
+    QCOW2_INCOMPAT_MASK          = QCOW2_INCOMPAT_DIRTY,
 };

 /* Compatible feature bits */
@@ -144,28 +165,12 @@ enum {
    QCOW2_COMPAT_FEAT_MASK            = QCOW2_COMPAT_LAZY_REFCOUNTS,
 };

-enum qcow2_discard_type {
-    QCOW2_DISCARD_NEVER = 0,
-    QCOW2_DISCARD_ALWAYS,
-    QCOW2_DISCARD_REQUEST,
-    QCOW2_DISCARD_SNAPSHOT,
-    QCOW2_DISCARD_OTHER,
-    QCOW2_DISCARD_MAX
-};
-
 typedef struct Qcow2Feature {
    uint8_t type;
    uint8_t bit;
    char    name[46];
 } QEMU_PACKED Qcow2Feature;

-typedef struct Qcow2DiscardRegion {
-    BlockDriverState *bs;
-    uint64_t offset;
-    uint64_t bytes;
-    QTAILQ_ENTRY(Qcow2DiscardRegion) next;
-} Qcow2DiscardRegion;
-
 typedef struct BDRVQcowState {
    int cluster_bits;
    int cluster_size;
@@ -191,8 +196,8 @@ typedef struct BDRVQcowState {
    uint64_t *refcount_table;
    uint64_t refcount_table_offset;
    uint32_t refcount_table_size;
-    int64_t free_cluster_index;
-    int64_t free_byte_offset;
+    uint64_t free_cluster_index;
+    uint64_t free_byte_offset;

    CoMutex lock;

@@ -202,17 +207,11 @@ typedef struct BDRVQcowState {
    AES_KEY aes_decrypt_key;
    uint64_t snapshots_offset;
    int snapshots_size;
-    int nb_snapshots;
+    unsigned int nb_snapshots;
    QCowSnapshot *snapshots;

    int flags;
    int qcow_version;
-    bool use_lazy_refcounts;
-    int refcount_order;
-
-    bool discard_passthrough[QCOW2_DISCARD_MAX];
-
-    int overlap_check; /* bitmask of Qcow2MetadataOverlap values */

    uint64_t incompatible_features;
    uint64_t compatible_features;
@@ -221,8 +220,6 @@ typedef struct BDRVQcowState {
    size_t unknown_header_fields_size;
    void* unknown_header_fields;
    QLIST_HEAD(, Qcow2UnknownHeaderExtension) unknown_header_ext;
-    QTAILQ_HEAD (, Qcow2DiscardRegion) discards;
-    bool cache_discards;
 } BDRVQcowState;

 /* XXX: use std qcow open function ? */
@@ -288,9 +285,6 @@ typedef struct QCowL2Meta
     */
    Qcow2COWRegion cow_end;

-    /** Pointer to next L2Meta of the same write request */
-    struct QCowL2Meta *next;
-
    QLIST_ENTRY(QCowL2Meta) next_in_flight;
 } QCowL2Meta;

@@ -301,60 +295,11 @@ enum {
    QCOW2_CLUSTER_ZERO
 };

-typedef enum QCow2MetadataOverlap {
-    QCOW2_OL_MAIN_HEADER_BITNR    = 0,
-    QCOW2_OL_ACTIVE_L1_BITNR      = 1,
-    QCOW2_OL_ACTIVE_L2_BITNR      = 2,
-    QCOW2_OL_REFCOUNT_TABLE_BITNR = 3,
-    QCOW2_OL_REFCOUNT_BLOCK_BITNR = 4,
-    QCOW2_OL_SNAPSHOT_TABLE_BITNR = 5,
-    QCOW2_OL_INACTIVE_L1_BITNR    = 6,
-    QCOW2_OL_INACTIVE_L2_BITNR    = 7,
-
-    QCOW2_OL_MAX_BITNR            = 8,
-
-    QCOW2_OL_NONE           = 0,
-    QCOW2_OL_MAIN_HEADER    = (1 << QCOW2_OL_MAIN_HEADER_BITNR),
-    QCOW2_OL_ACTIVE_L1      = (1 << QCOW2_OL_ACTIVE_L1_BITNR),
-    QCOW2_OL_ACTIVE_L2      = (1 << QCOW2_OL_ACTIVE_L2_BITNR),
-    QCOW2_OL_REFCOUNT_TABLE = (1 << QCOW2_OL_REFCOUNT_TABLE_BITNR),
-    QCOW2_OL_REFCOUNT_BLOCK = (1 << QCOW2_OL_REFCOUNT_BLOCK_BITNR),
-    QCOW2_OL_SNAPSHOT_TABLE = (1 << QCOW2_OL_SNAPSHOT_TABLE_BITNR),
-    QCOW2_OL_INACTIVE_L1    = (1 << QCOW2_OL_INACTIVE_L1_BITNR),
-    /* NOTE: Checking overlaps with inactive L2 tables will result in bdrv
-     * reads. */
-    QCOW2_OL_INACTIVE_L2    = (1 << QCOW2_OL_INACTIVE_L2_BITNR),
-} QCow2MetadataOverlap;
-
-/* Perform all overlap checks which can be done in constant time */
-#define QCOW2_OL_CONSTANT \
-    (QCOW2_OL_MAIN_HEADER | QCOW2_OL_ACTIVE_L1 | QCOW2_OL_REFCOUNT_TABLE | \
-     QCOW2_OL_SNAPSHOT_TABLE)
-
-/* Perform all overlap checks which don't require disk access */
-#define QCOW2_OL_CACHED \
-    (QCOW2_OL_CONSTANT | QCOW2_OL_ACTIVE_L2 | QCOW2_OL_REFCOUNT_BLOCK | \
-     QCOW2_OL_INACTIVE_L1)
-
-/* Perform all overlap checks */
-#define QCOW2_OL_ALL \
-    (QCOW2_OL_CACHED | QCOW2_OL_INACTIVE_L2)
-
-#define L1E_OFFSET_MASK 0x00fffffffffffe00ULL
-#define L2E_OFFSET_MASK 0x00fffffffffffe00ULL
+#define L1E_OFFSET_MASK 0x00ffffffffffff00ULL
+#define L2E_OFFSET_MASK 0x00ffffffffffff00ULL
 #define L2E_COMPRESSED_OFFSET_SIZE_MASK 0x3fffffffffffffffULL

-#define REFT_OFFSET_MASK 0xfffffffffffffe00ULL
-
-static inline int64_t start_of_cluster(BDRVQcowState *s, int64_t offset)
-{
-    return offset & ~(s->cluster_size - 1);
-}
-
-static inline int64_t offset_into_cluster(BDRVQcowState *s, int64_t offset)
-{
-    return offset & (s->cluster_size - 1);
-}
+#define REFT_OFFSET_MASK 0xffffffffffffff00ULL

 static inline int size_to_clusters(BDRVQcowState *s, int64_t size)
 {
@@ -367,20 +312,15 @@ static inline int64_t size_to_l1(BDRVQcowState *s, int64_t size)
    return (size + (1ULL << shift) - 1) >> shift;
 }

-static inline int offset_to_l2_index(BDRVQcowState *s, int64_t offset)
-{
-    return (offset >> s->cluster_bits) & (s->l2_size - 1);
-}
-
 static inline int64_t align_offset(int64_t offset, int n)
 {
    offset = (offset + n - 1) & ~(n - 1);
    return offset;
 }

-static inline int64_t qcow2_vm_state_offset(BDRVQcowState *s)
+static inline uint64_t qcow2_max_refcount_clusters(BDRVQcowState *s)
 {
-    return (int64_t)s->l1_vm_state_index << (s->cluster_bits + s->l2_bits);
+    return QCOW_MAX_REFTABLE_SIZE >> s->cluster_bits;
 }

 static inline int qcow2_get_cluster_type(uint64_t l2_entry)
@@ -402,17 +342,6 @@ static inline bool qcow2_need_accurate_refcounts(BDRVQcowState *s)
    return !(s->incompatible_features & QCOW2_INCOMPAT_DIRTY);
 }

-static inline uint64_t l2meta_cow_start(QCowL2Meta *m)
-{
-    return m->offset + m->cow_start.offset;
-}
-
-static inline uint64_t l2meta_cow_end(QCowL2Meta *m)
-{
-    return m->offset + m->cow_end.offset
-        + (m->cow_end.nb_sectors << BDRV_SECTOR_BITS);
-}
-
 // FIXME Need qcow2_ prefix to global functions

 /* qcow2.c functions */
@@ -420,26 +349,20 @@ int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov,
                  int64_t sector_num, int nb_sectors);

 int qcow2_mark_dirty(BlockDriverState *bs);
-int qcow2_mark_corrupt(BlockDriverState *bs);
-int qcow2_mark_consistent(BlockDriverState *bs);
 int qcow2_update_header(BlockDriverState *bs);

 /* qcow2-refcount.c functions */
 int qcow2_refcount_init(BlockDriverState *bs);
 void qcow2_refcount_close(BlockDriverState *bs);

-int qcow2_update_cluster_refcount(BlockDriverState *bs, int64_t cluster_index,
-                                  int addend, enum qcow2_discard_type type);
-
-int64_t qcow2_alloc_clusters(BlockDriverState *bs, int64_t size);
+int64_t qcow2_alloc_clusters(BlockDriverState *bs, uint64_t size);
 int qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset,
    int nb_clusters);
 int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size);
 void qcow2_free_clusters(BlockDriverState *bs,
-                          int64_t offset, int64_t size,
-                          enum qcow2_discard_type type);
-void qcow2_free_any_clusters(BlockDriverState *bs, uint64_t l2_entry,
-                             int nb_clusters, enum qcow2_discard_type type);
+    int64_t offset, int64_t size);
+void qcow2_free_any_clusters(BlockDriverState *bs,
+    uint64_t cluster_offset, int nb_clusters);

 int qcow2_update_snapshot_refcount(BlockDriverState *bs,
    int64_t l1_table_offset, int l1_size, int addend);
@@ -447,17 +370,9 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
 int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
                          BdrvCheckMode fix);

-void qcow2_process_discards(BlockDriverState *bs, int ret);
-
-int qcow2_check_metadata_overlap(BlockDriverState *bs, int ign, int64_t offset,
-                                 int64_t size);
-int qcow2_pre_write_overlap_check(BlockDriverState *bs, int ign, int64_t offset,
-                                  int64_t size);
-
 /* qcow2-cluster.c functions */
 int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
                        bool exact_size);
-int qcow2_write_l1_entry(BlockDriverState *bs, int l1_index);
 void qcow2_l2_cache_reset(BlockDriverState *bs);
 int qcow2_decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset);
 void qcow2_encrypt_sectors(BDRVQcowState *s, int64_t sector_num,
@@ -468,30 +383,22 @@ void qcow2_encrypt_sectors(BDRVQcowState *s, int64_t sector_num,
 int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
    int *num, uint64_t *cluster_offset);
 int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset,
-    int *num, uint64_t *host_offset, QCowL2Meta **m);
+    int n_start, int n_end, int *num, uint64_t *host_offset, QCowL2Meta **m);
 uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
                                         uint64_t offset,
                                         int compressed_size);

 int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m);
 int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset,
-    int nb_sectors, enum qcow2_discard_type type);
+    int nb_sectors);
 int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors);

-int qcow2_expand_zero_clusters(BlockDriverState *bs);
-
 /* qcow2-snapshot.c functions */
 int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info);
 int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id);
-int qcow2_snapshot_delete(BlockDriverState *bs,
-                          const char *snapshot_id,
-                          const char *name,
-                          Error **errp);
+int qcow2_snapshot_delete(BlockDriverState *bs, const char *snapshot_id);
 int qcow2_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab);
-int qcow2_snapshot_load_tmp(BlockDriverState *bs,
-                            const char *snapshot_id,
-                            const char *name,
-                            Error **errp);
+int qcow2_snapshot_load_tmp(BlockDriverState *bs, const char *snapshot_name);

 void qcow2_free_snapshots(BlockDriverState *bs);
 int qcow2_read_snapshots(BlockDriverState *bs);
@@ -506,8 +413,6 @@ int qcow2_cache_set_dependency(BlockDriverState *bs, Qcow2Cache *c,
    Qcow2Cache *dependency);
 void qcow2_cache_depends_on_flush(Qcow2Cache *c);

-int qcow2_cache_empty(BlockDriverState *bs, Qcow2Cache *c);
-
 int qcow2_cache_get(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
    void **table);
 int qcow2_cache_get_empty(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
--- a/block/qed.c
+++ b/block/qed.c
@@ -353,10 +353,10 @@ static void qed_start_need_check_timer(BDRVQEDState *s)
 {
    trace_qed_start_need_check_timer(s);

-    /* Use QEMU_CLOCK_VIRTUAL so we don't alter the image file while suspended for
+    /* Use vm_clock so we don't alter the image file while suspended for
     * migration.
     */
-    timer_mod(s->need_check_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
+    qemu_mod_timer(s->need_check_timer, qemu_get_clock_ns(vm_clock) +
                   get_ticks_per_sec() * QED_NEED_CHECK_TIMEOUT);
 }

@@ -364,7 +364,7 @@ static void qed_start_need_check_timer(BDRVQEDState *s)
 static void qed_cancel_need_check_timer(BDRVQEDState *s)
 {
    trace_qed_cancel_need_check_timer(s);
-    timer_del(s->need_check_timer);
+    qemu_del_timer(s->need_check_timer);
 }

 static void bdrv_qed_rebind(BlockDriverState *bs)
@@ -373,8 +373,7 @@ static void bdrv_qed_rebind(BlockDriverState *bs)
    s->bs = bs;
 }

-static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
-                         Error **errp)
+static int bdrv_qed_open(BlockDriverState *bs, int flags)
 {
    BDRVQEDState *s = bs->opaque;
    QEDHeader le_header;
@@ -391,15 +390,14 @@ static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
    qed_header_le_to_cpu(&le_header, &s->header);

    if (s->header.magic != QED_MAGIC) {
-        error_setg(errp, "Image not in QED format");
-        return -EINVAL;
+        return -EMEDIUMTYPE;
    }
    if (s->header.features & ~QED_FEATURE_MASK) {
        /* image uses unsupported feature bits */
        char buf[64];
        snprintf(buf, sizeof(buf), "%" PRIx64,
            s->header.features & ~QED_FEATURE_MASK);
-        error_set(errp, QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
+        qerror_report(QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
            bs->device_name, "QED", buf);
        return -ENOTSUP;
    }
@@ -496,7 +494,7 @@ static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
        }
    }

-    s->need_check_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
+    s->need_check_timer = qemu_new_timer_ns(vm_clock,
                                            qed_need_check_timer_cb, s);

 out:
@@ -507,15 +505,6 @@ out:
    return ret;
 }

-static int bdrv_qed_refresh_limits(BlockDriverState *bs)
-{
-    BDRVQEDState *s = bs->opaque;
-
-    bs->bl.write_zeroes_alignment = s->header.cluster_size >> BDRV_SECTOR_BITS;
-
-    return 0;
-}
-
 /* We have nothing to do for QED reopen, stubs just return
 * success */
 static int bdrv_qed_reopen_prepare(BDRVReopenState *state,
@@ -529,7 +518,7 @@ static void bdrv_qed_close(BlockDriverState *bs)
    BDRVQEDState *s = bs->opaque;

    qed_cancel_need_check_timer(s);
-    timer_free(s->need_check_timer);
+    qemu_free_timer(s->need_check_timer);

    /* Ensure writes reach stable storage */
    bdrv_flush(bs->file);
@@ -546,8 +535,7 @@ static void bdrv_qed_close(BlockDriverState *bs)

 static int qed_create(const char *filename, uint32_t cluster_size,
                      uint64_t image_size, uint32_t table_size,
-                      const char *backing_file, const char *backing_fmt,
-                      Error **errp)
+                      const char *backing_file, const char *backing_fmt)
 {
    QEDHeader header = {
        .magic = QED_MAGIC,
@@ -562,22 +550,16 @@ static int qed_create(const char *filename, uint32_t cluster_size,
    QEDHeader le_header;
    uint8_t *l1_table = NULL;
    size_t l1_size = header.cluster_size * header.table_size;
-    Error *local_err = NULL;
    int ret = 0;
-    BlockDriverState *bs;
+    BlockDriverState *bs = NULL;

-    ret = bdrv_create_file(filename, NULL, &local_err);
+    ret = bdrv_create_file(filename, NULL);
    if (ret < 0) {
-        error_propagate(errp, local_err);
        return ret;
    }

-    bs = NULL;
-    ret = bdrv_open(&bs, filename, NULL, NULL,
-                    BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_PROTOCOL, NULL,
-                    &local_err);
+    ret = bdrv_file_open(&bs, filename, BDRV_O_RDWR | BDRV_O_CACHE_WB);
    if (ret < 0) {
-        error_propagate(errp, local_err);
        return ret;
    }

@@ -617,12 +599,11 @@ static int qed_create(const char *filename, uint32_t cluster_size,
    ret = 0; /* success */
 out:
    g_free(l1_table);
-    bdrv_unref(bs);
+    bdrv_delete(bs);
    return ret;
 }

-static int bdrv_qed_create(const char *filename, QEMUOptionParameter *options,
-                           Error **errp)
+static int bdrv_qed_create(const char *filename, QEMUOptionParameter *options)
 {
    uint64_t image_size = 0;
    uint32_t cluster_size = QED_DEFAULT_CLUSTER_SIZE;
@@ -667,70 +648,54 @@ static int bdrv_qed_create(const char *filename, QEMUOptionParameter *options,
    }

    return qed_create(filename, cluster_size, image_size, table_size,
-                      backing_file, backing_fmt, errp);
+                      backing_file, backing_fmt);
 }

 typedef struct {
-    BlockDriverState *bs;
    Coroutine *co;
-    uint64_t pos;
-    int64_t status;
+    int is_allocated;
    int *pnum;
 } QEDIsAllocatedCB;

 static void qed_is_allocated_cb(void *opaque, int ret, uint64_t offset, size_t len)
 {
    QEDIsAllocatedCB *cb = opaque;
-    BDRVQEDState *s = cb->bs->opaque;
    *cb->pnum = len / BDRV_SECTOR_SIZE;
-    switch (ret) {
-    case QED_CLUSTER_FOUND:
-        offset |= qed_offset_into_cluster(s, cb->pos);
-        cb->status = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | offset;
-        break;
-    case QED_CLUSTER_ZERO:
-        cb->status = BDRV_BLOCK_ZERO;
-        break;
-    case QED_CLUSTER_L2:
-    case QED_CLUSTER_L1:
-        cb->status = 0;
-        break;
-    default:
-        assert(ret < 0);
-        cb->status = ret;
-        break;
-    }
-
+    cb->is_allocated = (ret == QED_CLUSTER_FOUND || ret == QED_CLUSTER_ZERO);
    if (cb->co) {
        qemu_coroutine_enter(cb->co, NULL);
    }
 }

-static int64_t coroutine_fn bdrv_qed_co_get_block_status(BlockDriverState *bs,
+static int coroutine_fn bdrv_qed_co_is_allocated(BlockDriverState *bs,
                                                 int64_t sector_num,
                                                 int nb_sectors, int *pnum)
 {
    BDRVQEDState *s = bs->opaque;
+    uint64_t pos = (uint64_t)sector_num * BDRV_SECTOR_SIZE;
    size_t len = (size_t)nb_sectors * BDRV_SECTOR_SIZE;
    QEDIsAllocatedCB cb = {
-        .bs = bs,
-        .pos = (uint64_t)sector_num * BDRV_SECTOR_SIZE,
-        .status = BDRV_BLOCK_OFFSET_MASK,
+        .is_allocated = -1,
        .pnum = pnum,
    };
    QEDRequest request = { .l2_table = NULL };

-    qed_find_cluster(s, &request, cb.pos, len, qed_is_allocated_cb, &cb);
+    qed_find_cluster(s, &request, pos, len, qed_is_allocated_cb, &cb);

    /* Now sleep if the callback wasn't invoked immediately */
-    while (cb.status == BDRV_BLOCK_OFFSET_MASK) {
+    while (cb.is_allocated == -1) {
        cb.co = qemu_coroutine_self();
        qemu_coroutine_yield();
    }

    qed_unref_l2_cache_entry(request.l2_table);

-    return cb.status;
+    return cb.is_allocated;
+}
+
+static int bdrv_qed_make_empty(BlockDriverState *bs)
+{
+    return -ENOTSUP;
 }

 static BDRVQEDState *acb_to_s(QEDAIOCB *acb)
@@ -1403,8 +1368,7 @@ static void coroutine_fn qed_co_write_zeroes_cb(void *opaque, int ret)

 static int coroutine_fn bdrv_qed_co_write_zeroes(BlockDriverState *bs,
                                                 int64_t sector_num,
-                                                 int nb_sectors,
-                                                 BdrvRequestFlags flags)
+                                                 int nb_sectors)
 {
    BlockDriverAIOCB *blockacb;
    BDRVQEDState *s = bs->opaque;
@@ -1481,8 +1445,6 @@ static int bdrv_qed_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
    memset(bdi, 0, sizeof(*bdi));
    bdi->cluster_size = s->header.cluster_size;
    bdi->is_dirty = s->header.features & QED_F_NEED_CHECK;
-    bdi->unallocated_blocks_are_zero = true;
-    bdi->can_write_zeroes_with_unmap = true;
    return 0;
 }

@@ -1564,7 +1526,7 @@ static void bdrv_qed_invalidate_cache(BlockDriverState *bs)

    bdrv_qed_close(bs);
    memset(s, 0, sizeof(BDRVQEDState));
-    bdrv_qed_open(bs, NULL, bs->open_flags, NULL);
+    bdrv_qed_open(bs, bs->open_flags);
 }

 static int bdrv_qed_check(BlockDriverState *bs, BdrvCheckResult *result,
@@ -1612,15 +1574,14 @@ static BlockDriver bdrv_qed = {
    .bdrv_close               = bdrv_qed_close,
    .bdrv_reopen_prepare      = bdrv_qed_reopen_prepare,
    .bdrv_create              = bdrv_qed_create,
-    .bdrv_has_zero_init       = bdrv_has_zero_init_1,
-    .bdrv_co_get_block_status = bdrv_qed_co_get_block_status,
+    .bdrv_co_is_allocated     = bdrv_qed_co_is_allocated,
+    .bdrv_make_empty          = bdrv_qed_make_empty,
    .bdrv_aio_readv           = bdrv_qed_aio_readv,
    .bdrv_aio_writev          = bdrv_qed_aio_writev,
    .bdrv_co_write_zeroes     = bdrv_qed_co_write_zeroes,
    .bdrv_truncate            = bdrv_qed_truncate,
    .bdrv_getlength           = bdrv_qed_getlength,
    .bdrv_get_info            = bdrv_qed_get_info,
-    .bdrv_refresh_limits      = bdrv_qed_refresh_limits,
    .bdrv_change_backing_file = bdrv_qed_change_backing_file,
    .bdrv_invalidate_cache    = bdrv_qed_invalidate_cache,
    .bdrv_check               = bdrv_qed_check,
--- a/block/qed.h
+++ b/block/qed.h
@@ -100,7 +100,7 @@ typedef struct {
    /* if (features & QED_F_BACKING_FILE) */
    uint32_t backing_filename_offset; /* in bytes from start of header */
    uint32_t backing_filename_size;   /* in bytes */
-} QEMU_PACKED QEDHeader;
+} QEDHeader;

 typedef struct {
    uint64_t offsets[0];            /* in bytes */
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -1,873 +0,0 @@
-/*
- * Quorum Block filter
- *
- * Copyright (C) 2012-2014 Nodalink, EURL.
- *
- * Author:
- *   Benoît Canet <benoit.canet@irqsave.net>
- *
- * Based on the design and code of blkverify.c (Copyright (C) 2010 IBM, Corp)
- * and blkmirror.c (Copyright (C) 2011 Red Hat, Inc).
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- */
-
-#include <gnutls/gnutls.h>
-#include <gnutls/crypto.h>
-#include "block/block_int.h"
-#include "qapi/qmp/qjson.h"
-
-#define HASH_LENGTH 32
-
-#define QUORUM_OPT_VOTE_THRESHOLD "vote-threshold"
-#define QUORUM_OPT_BLKVERIFY      "blkverify"
-
-/* This union holds a vote hash value */
-typedef union QuorumVoteValue {
-    char h[HASH_LENGTH];       /* SHA-256 hash */
-    int64_t l;                 /* simpler 64 bits hash */
-} QuorumVoteValue;
-
-/* A vote item */
-typedef struct QuorumVoteItem {
-    int index;
-    QLIST_ENTRY(QuorumVoteItem) next;
-} QuorumVoteItem;
-
-/* this structure is a vote version. A version is the set of votes sharing the
- * same vote value.
- * The set of votes will be tracked with the items field and its cardinality is
- * vote_count.
- */
-typedef struct QuorumVoteVersion {
-    QuorumVoteValue value;
-    int index;
-    int vote_count;
-    QLIST_HEAD(, QuorumVoteItem) items;
-    QLIST_ENTRY(QuorumVoteVersion) next;
-} QuorumVoteVersion;
-
-/* this structure holds a group of vote versions together */
-typedef struct QuorumVotes {
-    QLIST_HEAD(, QuorumVoteVersion) vote_list;
-    bool (*compare)(QuorumVoteValue *a, QuorumVoteValue *b);
-} QuorumVotes;
-
-/* the following structure holds the state of one quorum instance */
-typedef struct BDRVQuorumState {
-    BlockDriverState **bs; /* children BlockDriverStates */
-    int num_children;      /* children count */
-    int threshold;         /* if less than threshold children reads gave the
-                            * same result a quorum error occurs.
-                            */
-    bool is_blkverify;     /* true if the driver is in blkverify mode
-                            * Writes are mirrored on two children devices.
-                            * On reads the two children devices' contents are
-                            * compared and if a difference is spotted its
-                            * location is printed and the code aborts.
-                            * It is useful to debug other block drivers by
-                            * comparing them with a reference one.
-                            */
-} BDRVQuorumState;
-
-typedef struct QuorumAIOCB QuorumAIOCB;
-
-/* Quorum will create one instance of the following structure per operation it
- * performs on its children.
- * So for each read/write operation coming from the upper layer there will be
- * $children_count QuorumChildRequest.
- */
-typedef struct QuorumChildRequest {
-    BlockDriverAIOCB *aiocb;
-    QEMUIOVector qiov;
-    uint8_t *buf;
-    int ret;
-    QuorumAIOCB *parent;
-} QuorumChildRequest;
-
-/* Quorum will use the following structure to track progress of each read/write
- * operation received by the upper layer.
- * This structure hold pointers to the QuorumChildRequest structures instances
- * used to do operations on each children and track overall progress.
- */
-struct QuorumAIOCB {
-    BlockDriverAIOCB common;
-
-    /* Request metadata */
-    uint64_t sector_num;
-    int nb_sectors;
-
-    QEMUIOVector *qiov;         /* calling IOV */
-
-    QuorumChildRequest *qcrs;   /* individual child requests */
-    int count;                  /* number of completed AIOCB */
-    int success_count;          /* number of successfully completed AIOCB */
-
-    QuorumVotes votes;
-
-    bool is_read;
-    int vote_ret;
-};
-
-static void quorum_vote(QuorumAIOCB *acb);
-
-static void quorum_aio_cancel(BlockDriverAIOCB *blockacb)
-{
-    QuorumAIOCB *acb = container_of(blockacb, QuorumAIOCB, common);
-    BDRVQuorumState *s = acb->common.bs->opaque;
-    int i;
-
-    /* cancel all callbacks */
-    for (i = 0; i < s->num_children; i++) {
-        bdrv_aio_cancel(acb->qcrs[i].aiocb);
-    }
-
-    g_free(acb->qcrs);
-    qemu_aio_release(acb);
-}
-
-static AIOCBInfo quorum_aiocb_info = {
-    .aiocb_size         = sizeof(QuorumAIOCB),
-    .cancel             = quorum_aio_cancel,
-};
-
-static void quorum_aio_finalize(QuorumAIOCB *acb)
-{
-    BDRVQuorumState *s = acb->common.bs->opaque;
-    int i, ret = 0;
-
-    if (acb->vote_ret) {
-        ret = acb->vote_ret;
-    }
-
-    acb->common.cb(acb->common.opaque, ret);
-
-    if (acb->is_read) {
-        for (i = 0; i < s->num_children; i++) {
-            qemu_vfree(acb->qcrs[i].buf);
-            qemu_iovec_destroy(&acb->qcrs[i].qiov);
-        }
-    }
-
-    g_free(acb->qcrs);
-    qemu_aio_release(acb);
-}
-
-static bool quorum_sha256_compare(QuorumVoteValue *a, QuorumVoteValue *b)
-{
-    return !memcmp(a->h, b->h, HASH_LENGTH);
-}
-
-static bool quorum_64bits_compare(QuorumVoteValue *a, QuorumVoteValue *b)
-{
-    return a->l == b->l;
-}
-
-static QuorumAIOCB *quorum_aio_get(BDRVQuorumState *s,
-                                   BlockDriverState *bs,
-                                   QEMUIOVector *qiov,
-                                   uint64_t sector_num,
-                                   int nb_sectors,
-                                   BlockDriverCompletionFunc *cb,
-                                   void *opaque)
-{
-    QuorumAIOCB *acb = qemu_aio_get(&quorum_aiocb_info, bs, cb, opaque);
-    int i;
-
-    acb->common.bs->opaque = s;
-    acb->sector_num = sector_num;
-    acb->nb_sectors = nb_sectors;
-    acb->qiov = qiov;
-    acb->qcrs = g_new0(QuorumChildRequest, s->num_children);
-    acb->count = 0;
-    acb->success_count = 0;
-    acb->votes.compare = quorum_sha256_compare;
-    QLIST_INIT(&acb->votes.vote_list);
-    acb->is_read = false;
-    acb->vote_ret = 0;
-
-    for (i = 0; i < s->num_children; i++) {
-        acb->qcrs[i].buf = NULL;
-        acb->qcrs[i].ret = 0;
-        acb->qcrs[i].parent = acb;
-    }
-
-    return acb;
-}
-
-static void quorum_report_bad(QuorumAIOCB *acb, char *node_name, int ret)
-{
-    QObject *data;
-    assert(node_name);
-    data = qobject_from_jsonf("{ 'node-name': %s"
-                              ", 'sector-num': %" PRId64
-                              ", 'sectors-count': %d }",
-                              node_name, acb->sector_num, acb->nb_sectors);
-    if (ret < 0) {
-        QDict *dict = qobject_to_qdict(data);
-        qdict_put(dict, "error", qstring_from_str(strerror(-ret)));
-    }
-    monitor_protocol_event(QEVENT_QUORUM_REPORT_BAD, data);
-    qobject_decref(data);
-}
-
-static void quorum_report_failure(QuorumAIOCB *acb)
-{
-    QObject *data;
-    const char *reference = acb->common.bs->device_name[0] ?
-                            acb->common.bs->device_name :
-                            acb->common.bs->node_name;
-    data = qobject_from_jsonf("{ 'reference': %s"
-                              ", 'sector-num': %" PRId64
-                              ", 'sectors-count': %d }",
-                              reference, acb->sector_num, acb->nb_sectors);
-    monitor_protocol_event(QEVENT_QUORUM_FAILURE, data);
-    qobject_decref(data);
-}
-
-static int quorum_vote_error(QuorumAIOCB *acb);
-
-static bool quorum_has_too_much_io_failed(QuorumAIOCB *acb)
-{
-    BDRVQuorumState *s = acb->common.bs->opaque;
-
-    if (acb->success_count < s->threshold) {
-        acb->vote_ret = quorum_vote_error(acb);
-        quorum_report_failure(acb);
-        return true;
-    }
-
-    return false;
-}
-
-static void quorum_aio_cb(void *opaque, int ret)
-{
-    QuorumChildRequest *sacb = opaque;
-    QuorumAIOCB *acb = sacb->parent;
-    BDRVQuorumState *s = acb->common.bs->opaque;
-
-    sacb->ret = ret;
-    acb->count++;
-    if (ret == 0) {
-        acb->success_count++;
-    } else {
-        quorum_report_bad(acb, sacb->aiocb->bs->node_name, ret);
-    }
-    assert(acb->count <= s->num_children);
-    assert(acb->success_count <= s->num_children);
-    if (acb->count < s->num_children) {
-        return;
-    }
-
-    /* Do the vote on read */
-    if (acb->is_read) {
-        quorum_vote(acb);
-    } else {
-        quorum_has_too_much_io_failed(acb);
-    }
-
-    quorum_aio_finalize(acb);
-}
-
-static void quorum_report_bad_versions(BDRVQuorumState *s,
-                                       QuorumAIOCB *acb,
-                                       QuorumVoteValue *value)
-{
-    QuorumVoteVersion *version;
-    QuorumVoteItem *item;
-
-    QLIST_FOREACH(version, &acb->votes.vote_list, next) {
-        if (acb->votes.compare(&version->value, value)) {
-            continue;
-        }
-        QLIST_FOREACH(item, &version->items, next) {
-            quorum_report_bad(acb, s->bs[item->index]->node_name, 0);
-        }
-    }
-}
-
-static void quorum_copy_qiov(QEMUIOVector *dest, QEMUIOVector *source)
-{
-    int i;
-    assert(dest->niov == source->niov);
-    assert(dest->size == source->size);
-    for (i = 0; i < source->niov; i++) {
-        assert(dest->iov[i].iov_len == source->iov[i].iov_len);
-        memcpy(dest->iov[i].iov_base,
-               source->iov[i].iov_base,
-               source->iov[i].iov_len);
-    }
-}
-
-static void quorum_count_vote(QuorumVotes *votes,
-                              QuorumVoteValue *value,
-                              int index)
-{
-    QuorumVoteVersion *v = NULL, *version = NULL;
-    QuorumVoteItem *item;
-
-    /* look if we have something with this hash */
-    QLIST_FOREACH(v, &votes->vote_list, next) {
-        if (votes->compare(&v->value, value)) {
-            version = v;
-            break;
-        }
-    }
-
-    /* It's a version not yet in the list add it */
-    if (!version) {
-        version = g_new0(QuorumVoteVersion, 1);
-        QLIST_INIT(&version->items);
-        memcpy(&version->value, value, sizeof(version->value));
-        version->index = index;
-        version->vote_count = 0;
-        QLIST_INSERT_HEAD(&votes->vote_list, version, next);
-    }
-
-    version->vote_count++;
-
-    item = g_new0(QuorumVoteItem, 1);
-    item->index = index;
-    QLIST_INSERT_HEAD(&version->items, item, next);
-}
-
-static void quorum_free_vote_list(QuorumVotes *votes)
-{
-    QuorumVoteVersion *version, *next_version;
-    QuorumVoteItem *item, *next_item;
-
-    QLIST_FOREACH_SAFE(version, &votes->vote_list, next, next_version) {
-        QLIST_REMOVE(version, next);
-        QLIST_FOREACH_SAFE(item, &version->items, next, next_item) {
-            QLIST_REMOVE(item, next);
-            g_free(item);
-        }
-        g_free(version);
-    }
-}
-
-static int quorum_compute_hash(QuorumAIOCB *acb, int i, QuorumVoteValue *hash)
-{
-    int j, ret;
-    gnutls_hash_hd_t dig;
-    QEMUIOVector *qiov = &acb->qcrs[i].qiov;
-
-    ret = gnutls_hash_init(&dig, GNUTLS_DIG_SHA256);
-
-    if (ret < 0) {
-        return ret;
-    }
-
-    for (j = 0; j < qiov->niov; j++) {
-        ret = gnutls_hash(dig, qiov->iov[j].iov_base, qiov->iov[j].iov_len);
-        if (ret < 0) {
-            break;
-        }
-    }
-
-    gnutls_hash_deinit(dig, (void *) hash);
-    return ret;
-}
-
-static QuorumVoteVersion *quorum_get_vote_winner(QuorumVotes *votes)
-{
-    int max = 0;
-    QuorumVoteVersion *candidate, *winner = NULL;
-
-    QLIST_FOREACH(candidate, &votes->vote_list, next) {
-        if (candidate->vote_count > max) {
-            max = candidate->vote_count;
-            winner = candidate;
-        }
-    }
-
-    return winner;
-}
-
-/* qemu_iovec_compare is handy for blkverify mode because it returns the first
- * differing byte location. Yet it is handcoded to compare vectors one byte
- * after another so it does not benefit from the libc SIMD optimizations.
- * quorum_iovec_compare is written for speed and should be used in the non
- * blkverify mode of quorum.
- */
-static bool quorum_iovec_compare(QEMUIOVector *a, QEMUIOVector *b)
-{
-    int i;
-    int result;
-
-    assert(a->niov == b->niov);
-    for (i = 0; i < a->niov; i++) {
-        assert(a->iov[i].iov_len == b->iov[i].iov_len);
-        result = memcmp(a->iov[i].iov_base,
-                        b->iov[i].iov_base,
-                        a->iov[i].iov_len);
-        if (result) {
-            return false;
-        }
-    }
-
-    return true;
-}
-
-static void GCC_FMT_ATTR(2, 3) quorum_err(QuorumAIOCB *acb,
-                                          const char *fmt, ...)
-{
-    va_list ap;
-
-    va_start(ap, fmt);
-    fprintf(stderr, "quorum: sector_num=%" PRId64 " nb_sectors=%d ",
-            acb->sector_num, acb->nb_sectors);
-    vfprintf(stderr, fmt, ap);
-    fprintf(stderr, "\n");
-    va_end(ap);
-    exit(1);
-}
-
-static bool quorum_compare(QuorumAIOCB *acb,
-                           QEMUIOVector *a,
-                           QEMUIOVector *b)
-{
-    BDRVQuorumState *s = acb->common.bs->opaque;
-    ssize_t offset;
-
-    /* This driver will replace blkverify in this particular case */
-    if (s->is_blkverify) {
-        offset = qemu_iovec_compare(a, b);
-        if (offset != -1) {
-            quorum_err(acb, "contents mismatch in sector %" PRId64,
-                       acb->sector_num +
-                       (uint64_t)(offset / BDRV_SECTOR_SIZE));
-        }
-        return true;
-    }
-
-    return quorum_iovec_compare(a, b);
-}
-
-/* Do a vote to get the error code */
-static int quorum_vote_error(QuorumAIOCB *acb)
-{
-    BDRVQuorumState *s = acb->common.bs->opaque;
-    QuorumVoteVersion *winner = NULL;
-    QuorumVotes error_votes;
-    QuorumVoteValue result_value;
-    int i, ret = 0;
-    bool error = false;
-
-    QLIST_INIT(&error_votes.vote_list);
-    error_votes.compare = quorum_64bits_compare;
-
-    for (i = 0; i < s->num_children; i++) {
-        ret = acb->qcrs[i].ret;
-        if (ret) {
-            error = true;
-            result_value.l = ret;
-            quorum_count_vote(&error_votes, &result_value, i);
-        }
-    }
-
-    if (error) {
-        winner = quorum_get_vote_winner(&error_votes);
-        ret = winner->value.l;
-    }
-
-    quorum_free_vote_list(&error_votes);
-
-    return ret;
-}
-
-static void quorum_vote(QuorumAIOCB *acb)
-{
-    bool quorum = true;
-    int i, j, ret;
-    QuorumVoteValue hash;
-    BDRVQuorumState *s = acb->common.bs->opaque;
-    QuorumVoteVersion *winner;
-
-    if (quorum_has_too_much_io_failed(acb)) {
-        return;
-    }
-
-    /* get the index of the first successful read */
-    for (i = 0; i < s->num_children; i++) {
-        if (!acb->qcrs[i].ret) {
-            break;
-        }
-    }
-
-    assert(i < s->num_children);
-
-    /* compare this read with all other successful reads stopping at quorum
-     * failure
-     */
-    for (j = i + 1; j < s->num_children; j++) {
-        if (acb->qcrs[j].ret) {
-            continue;
-        }
-        quorum = quorum_compare(acb, &acb->qcrs[i].qiov, &acb->qcrs[j].qiov);
-        if (!quorum) {
-            break;
-       }
-    }
-
-    /* Every successful read agrees */
-    if (quorum) {
-        quorum_copy_qiov(acb->qiov, &acb->qcrs[i].qiov);
-        return;
-    }
-
-    /* compute hashes for each successful read, also store indexes */
-    for (i = 0; i < s->num_children; i++) {
-        if (acb->qcrs[i].ret) {
-            continue;
-        }
-        ret = quorum_compute_hash(acb, i, &hash);
-        /* if ever the hash computation failed */
-        if (ret < 0) {
-            acb->vote_ret = ret;
-            goto free_exit;
-        }
-        quorum_count_vote(&acb->votes, &hash, i);
-    }
-
-    /* vote to select the most represented version */
-    winner = quorum_get_vote_winner(&acb->votes);
-
-    /* if the winner count is smaller than threshold the read fails */
-    if (winner->vote_count < s->threshold) {
-        quorum_report_failure(acb);
-        acb->vote_ret = -EIO;
-        goto free_exit;
-    }
-
-    /* we have a winner: copy it */
-    quorum_copy_qiov(acb->qiov, &acb->qcrs[winner->index].qiov);
-
-    /* some versions are bad print them */
-    quorum_report_bad_versions(s, acb, &winner->value);
-
-free_exit:
-    /* free lists */
-    quorum_free_vote_list(&acb->votes);
-}
-
-static BlockDriverAIOCB *quorum_aio_readv(BlockDriverState *bs,
-                                         int64_t sector_num,
-                                         QEMUIOVector *qiov,
-                                         int nb_sectors,
-                                         BlockDriverCompletionFunc *cb,
-                                         void *opaque)
-{
-    BDRVQuorumState *s = bs->opaque;
-    QuorumAIOCB *acb = quorum_aio_get(s, bs, qiov, sector_num,
-                                      nb_sectors, cb, opaque);
-    int i;
-
-    acb->is_read = true;
-
-    for (i = 0; i < s->num_children; i++) {
-        acb->qcrs[i].buf = qemu_blockalign(s->bs[i], qiov->size);
-        qemu_iovec_init(&acb->qcrs[i].qiov, qiov->niov);
-        qemu_iovec_clone(&acb->qcrs[i].qiov, qiov, acb->qcrs[i].buf);
-    }
-
-    for (i = 0; i < s->num_children; i++) {
-        bdrv_aio_readv(s->bs[i], sector_num, &acb->qcrs[i].qiov, nb_sectors,
-                       quorum_aio_cb, &acb->qcrs[i]);
-    }
-
-    return &acb->common;
-}
-
-static BlockDriverAIOCB *quorum_aio_writev(BlockDriverState *bs,
-                                          int64_t sector_num,
-                                          QEMUIOVector *qiov,
-                                          int nb_sectors,
-                                          BlockDriverCompletionFunc *cb,
-                                          void *opaque)
-{
-    BDRVQuorumState *s = bs->opaque;
-    QuorumAIOCB *acb = quorum_aio_get(s, bs, qiov, sector_num, nb_sectors,
-                                      cb, opaque);
-    int i;
-
-    for (i = 0; i < s->num_children; i++) {
-        acb->qcrs[i].aiocb = bdrv_aio_writev(s->bs[i], sector_num, qiov,
-                                             nb_sectors, &quorum_aio_cb,
-                                             &acb->qcrs[i]);
-    }
-
-    return &acb->common;
-}
-
-static int64_t quorum_getlength(BlockDriverState *bs)
-{
-    BDRVQuorumState *s = bs->opaque;
-    int64_t result;
-    int i;
-
-    /* check that all file have the same length */
-    result = bdrv_getlength(s->bs[0]);
-    if (result < 0) {
-        return result;
-    }
-    for (i = 1; i < s->num_children; i++) {
-        int64_t value = bdrv_getlength(s->bs[i]);
-        if (value < 0) {
-            return value;
-        }
-        if (value != result) {
-            return -EIO;
-        }
-    }
-
-    return result;
-}
-
-static void quorum_invalidate_cache(BlockDriverState *bs)
-{
-    BDRVQuorumState *s = bs->opaque;
-    int i;
-
-    for (i = 0; i < s->num_children; i++) {
-        bdrv_invalidate_cache(s->bs[i]);
-    }
-}
-
-static coroutine_fn int quorum_co_flush(BlockDriverState *bs)
-{
-    BDRVQuorumState *s = bs->opaque;
-    QuorumVoteVersion *winner = NULL;
-    QuorumVotes error_votes;
-    QuorumVoteValue result_value;
-    int i;
-    int result = 0;
-
-    QLIST_INIT(&error_votes.vote_list);
-    error_votes.compare = quorum_64bits_compare;
-
-    for (i = 0; i < s->num_children; i++) {
-        result = bdrv_co_flush(s->bs[i]);
-        result_value.l = result;
-        quorum_count_vote(&error_votes, &result_value, i);
-    }
-
-    winner = quorum_get_vote_winner(&error_votes);
-    result = winner->value.l;
-
-    quorum_free_vote_list(&error_votes);
-
-    return result;
-}
-
-static bool quorum_recurse_is_first_non_filter(BlockDriverState *bs,
-                                               BlockDriverState *candidate)
-{
-    BDRVQuorumState *s = bs->opaque;
-    int i;
-
-    for (i = 0; i < s->num_children; i++) {
-        bool perm = bdrv_recurse_is_first_non_filter(s->bs[i],
-                                                     candidate);
-        if (perm) {
-            return true;
-        }
-    }
-
-    return false;
-}
-
-static int quorum_valid_threshold(int threshold, int num_children, Error **errp)
-{
-
-    if (threshold < 1) {
-        error_set(errp, QERR_INVALID_PARAMETER_VALUE,
-                  "vote-threshold", "value >= 1");
-        return -ERANGE;
-    }
-
-    if (threshold > num_children) {
-        error_setg(errp, "threshold may not exceed children count");
-        return -ERANGE;
-    }
-
-    return 0;
-}
-
-static QemuOptsList quorum_runtime_opts = {
-    .name = "quorum",
-    .head = QTAILQ_HEAD_INITIALIZER(quorum_runtime_opts.head),
-    .desc = {
-        {
-            .name = QUORUM_OPT_VOTE_THRESHOLD,
-            .type = QEMU_OPT_NUMBER,
-            .help = "The number of vote needed for reaching quorum",
-        },
-        {
-            .name = QUORUM_OPT_BLKVERIFY,
-            .type = QEMU_OPT_BOOL,
-            .help = "Trigger block verify mode if set",
-        },
-        { /* end of list */ }
-    },
-};
-
-static int quorum_open(BlockDriverState *bs, QDict *options, int flags,
-                       Error **errp)
-{
-    BDRVQuorumState *s = bs->opaque;
-    Error *local_err = NULL;
-    QemuOpts *opts;
-    bool *opened;
-    QDict *sub = NULL;
-    QList *list = NULL;
-    const QListEntry *lentry;
-    int i;
-    int ret = 0;
-
-    qdict_flatten(options);
-    qdict_extract_subqdict(options, &sub, "children.");
-    qdict_array_split(sub, &list);
-
-    if (qdict_size(sub)) {
-        error_setg(&local_err, "Invalid option children.%s",
-                   qdict_first(sub)->key);
-        ret = -EINVAL;
-        goto exit;
-    }
-
-    /* count how many different children are present */
-    s->num_children = qlist_size(list);
-    if (s->num_children < 2) {
-        error_setg(&local_err,
-                   "Number of provided children must be greater than 1");
-        ret = -EINVAL;
-        goto exit;
-    }
-
-    opts = qemu_opts_create(&quorum_runtime_opts, NULL, 0, &error_abort);
-    qemu_opts_absorb_qdict(opts, options, &local_err);
-    if (error_is_set(&local_err)) {
-        ret = -EINVAL;
-        goto exit;
-    }
-
-    s->threshold = qemu_opt_get_number(opts, QUORUM_OPT_VOTE_THRESHOLD, 0);
-
-    /* and validate it against s->num_children */
-    ret = quorum_valid_threshold(s->threshold, s->num_children, &local_err);
-    if (ret < 0) {
-        goto exit;
-    }
-
-    /* is the driver in blkverify mode */
-    if (qemu_opt_get_bool(opts, QUORUM_OPT_BLKVERIFY, false) &&
-        s->num_children == 2 && s->threshold == 2) {
-        s->is_blkverify = true;
-    } else if (qemu_opt_get_bool(opts, QUORUM_OPT_BLKVERIFY, false)) {
-        fprintf(stderr, "blkverify mode is set by setting blkverify=on "
-                "and using two files with vote_threshold=2\n");
-    }
-
-    /* allocate the children BlockDriverState array */
-    s->bs = g_new0(BlockDriverState *, s->num_children);
-    opened = g_new0(bool, s->num_children);
-
-    for (i = 0, lentry = qlist_first(list); lentry;
-         lentry = qlist_next(lentry), i++) {
-        QDict *d;
-        QString *string;
-
-        switch (qobject_type(lentry->value))
-        {
-            /* List of options */
-            case QTYPE_QDICT:
-                d = qobject_to_qdict(lentry->value);
-                QINCREF(d);
-                ret = bdrv_open(&s->bs[i], NULL, NULL, d, flags, NULL,
-                                &local_err);
-                break;
-
-            /* QMP reference */
-            case QTYPE_QSTRING:
-                string = qobject_to_qstring(lentry->value);
-                ret = bdrv_open(&s->bs[i], NULL, qstring_get_str(string), NULL,
-                                flags, NULL, &local_err);
-                break;
-
-            default:
-                error_setg(&local_err, "Specification of child block device %i "
-                           "is invalid", i);
-                ret = -EINVAL;
-        }
-
-        if (ret < 0) {
-            goto close_exit;
-        }
-        opened[i] = true;
-    }
-
-    g_free(opened);
-    goto exit;
-
-close_exit:
-    /* cleanup on error */
-    for (i = 0; i < s->num_children; i++) {
-        if (!opened[i]) {
-            continue;
-        }
-        bdrv_unref(s->bs[i]);
-    }
-    g_free(s->bs);
-    g_free(opened);
-exit:
-    /* propagate error */
-    if (error_is_set(&local_err)) {
-        error_propagate(errp, local_err);
-    }
-    QDECREF(list);
-    QDECREF(sub);
-    return ret;
-}
-
-static void quorum_close(BlockDriverState *bs)
-{
-    BDRVQuorumState *s = bs->opaque;
-    int i;
-
-    for (i = 0; i < s->num_children; i++) {
-        bdrv_unref(s->bs[i]);
-    }
-
-    g_free(s->bs);
-}
-
-static BlockDriver bdrv_quorum = {
-    .format_name        = "quorum",
-    .protocol_name      = "quorum",
-
-    .instance_size      = sizeof(BDRVQuorumState),
-
-    .bdrv_file_open     = quorum_open,
-    .bdrv_close         = quorum_close,
-
-    .authorizations     = { true, true },
-
-    .bdrv_co_flush_to_disk = quorum_co_flush,
-
-    .bdrv_getlength     = quorum_getlength,
-
-    .bdrv_aio_readv     = quorum_aio_readv,
-    .bdrv_aio_writev    = quorum_aio_writev,
-    .bdrv_invalidate_cache = quorum_invalidate_cache,
-
-    .bdrv_recurse_is_first_non_filter = quorum_recurse_is_first_non_filter,
-};
-
-static void bdrv_quorum_init(void)
-{
-    bdrv_register(&bdrv_quorum);
-}
-
-block_init(bdrv_quorum_init);
--- a/block/raw-aio.h
+++ b/block/raw-aio.h
@@ -21,10 +21,9 @@
 #define QEMU_AIO_IOCTL        0x0004
 #define QEMU_AIO_FLUSH        0x0008
 #define QEMU_AIO_DISCARD      0x0010
-#define QEMU_AIO_WRITE_ZEROES 0x0020
 #define QEMU_AIO_TYPE_MASK \
        (QEMU_AIO_READ|QEMU_AIO_WRITE|QEMU_AIO_IOCTL|QEMU_AIO_FLUSH| \
-         QEMU_AIO_DISCARD|QEMU_AIO_WRITE_ZEROES)
+         QEMU_AIO_DISCARD)

 /* AIO flags */
 #define QEMU_AIO_MISALIGNED   0x1000
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -127,8 +127,6 @@ typedef struct BDRVRawState {
    int fd;
    int type;
    int open_flags;
-    size_t buf_align;
-
 #if defined(__linux__)
    /* linux floppy specific */
    int64_t fd_open_time;
@@ -141,11 +139,12 @@ typedef struct BDRVRawState {
    void *aio_ctx;
 #endif
 #ifdef CONFIG_XFS
-    bool is_xfs:1;
+    bool is_xfs : 1;
+#endif
+    bool has_discard : 1;
+#ifdef CONFIG_FIEMAP
+    bool skip_fiemap;
 #endif
-    bool has_discard:1;
-    bool has_write_zeroes:1;
-    bool discard_zeroes:1;
 } BDRVRawState;

 typedef struct BDRVRawReopenState {
@@ -215,76 +214,6 @@ static int raw_normalize_devicepath(const char **filename)
 }
 #endif

-static void raw_probe_alignment(BlockDriverState *bs)
-{
-    BDRVRawState *s = bs->opaque;
-    char *buf;
-    unsigned int sector_size;
-
-    /* For /dev/sg devices the alignment is not really used.
-       With buffered I/O, we don't have any restrictions. */
-    if (bs->sg || !(s->open_flags & O_DIRECT)) {
-        bs->request_alignment = 1;
-        s->buf_align = 1;
-        return;
-    }
-
-    /* Try a few ioctls to get the right size */
-    bs->request_alignment = 0;
-    s->buf_align = 0;
-
-#ifdef BLKSSZGET
-    if (ioctl(s->fd, BLKSSZGET, &sector_size) >= 0) {
-        bs->request_alignment = sector_size;
-    }
-#endif
-#ifdef DKIOCGETBLOCKSIZE
-    if (ioctl(s->fd, DKIOCGETBLOCKSIZE, &sector_size) >= 0) {
-        bs->request_alignment = sector_size;
-    }
-#endif
-#ifdef DIOCGSECTORSIZE
-    if (ioctl(s->fd, DIOCGSECTORSIZE, &sector_size) >= 0) {
-        bs->request_alignment = sector_size;
-    }
-#endif
-#ifdef CONFIG_XFS
-    if (s->is_xfs) {
-        struct dioattr da;
-        if (xfsctl(NULL, s->fd, XFS_IOC_DIOINFO, &da) >= 0) {
-            bs->request_alignment = da.d_miniosz;
-            /* The kernel returns wrong information for d_mem */
-            /* s->buf_align = da.d_mem; */
-        }
-    }
-#endif
-
-    /* If we could not get the sizes so far, we can only guess them */
-    if (!s->buf_align) {
-        size_t align;
-        buf = qemu_memalign(MAX_BLOCKSIZE, 2 * MAX_BLOCKSIZE);
-        for (align = 512; align <= MAX_BLOCKSIZE; align <<= 1) {
-            if (pread(s->fd, buf + align, MAX_BLOCKSIZE, 0) >= 0) {
-                s->buf_align = align;
-                break;
-            }
-        }
-        qemu_vfree(buf);
-    }
-
-    if (!bs->request_alignment) {
-        size_t align;
-        buf = qemu_memalign(s->buf_align, MAX_BLOCKSIZE);
-        for (align = 512; align <= MAX_BLOCKSIZE; align <<= 1) {
-            if (pread(s->fd, buf, align, 0) >= 0) {
-                bs->request_alignment = align;
-                break;
-            }
-        }
-        qemu_vfree(buf);
-    }
-}
-
 static void raw_parse_flags(int bdrv_flags, int *open_flags)
 {
    assert(open_flags != NULL);
@@ -336,54 +265,15 @@ error:
 }
 #endif

-static void raw_parse_filename(const char *filename, QDict *options,
-                               Error **errp)
-{
-    /* The filename does not have to be prefixed by the protocol name, since
-     * "file" is the default protocol; therefore, the return value of this
-     * function call can be ignored. */
-    strstart(filename, "file:", &filename);
-
-    qdict_put_obj(options, "filename", QOBJECT(qstring_from_str(filename)));
-}
-
-static QemuOptsList raw_runtime_opts = {
-    .name = "raw",
-    .head = QTAILQ_HEAD_INITIALIZER(raw_runtime_opts.head),
-    .desc = {
-        {
-            .name = "filename",
-            .type = QEMU_OPT_STRING,
-            .help = "File name of the image",
-        },
-        { /* end of list */ }
-    },
-};
-
-static int raw_open_common(BlockDriverState *bs, QDict *options,
-                           int bdrv_flags, int open_flags, Error **errp)
+static int raw_open_common(BlockDriverState *bs, const char *filename,
+                           int bdrv_flags, int open_flags)
 {
    BDRVRawState *s = bs->opaque;
-    QemuOpts *opts;
-    Error *local_err = NULL;
-    const char *filename;
    int fd, ret;
-    struct stat st;
-
-    opts = qemu_opts_create(&raw_runtime_opts, NULL, 0, &error_abort);
-    qemu_opts_absorb_qdict(opts, options, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        ret = -EINVAL;
-        goto fail;
-    }
-
-    filename = qemu_opt_get(opts, "filename");

    ret = raw_normalize_devicepath(&filename);
    if (ret != 0) {
-        error_setg_errno(errp, -ret, "Could not normalize device path");
-        goto fail;
+        return ret;
    }

    s->open_flags = open_flags;
@@ -393,76 +283,35 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
    fd = qemu_open(filename, s->open_flags, 0644);
    if (fd < 0) {
        ret = -errno;
-        if (ret == -EROFS) {
+        if (ret == -EROFS)
            ret = -EACCES;
-        }
-        goto fail;
+        return ret;
    }
    s->fd = fd;

 #ifdef CONFIG_LINUX_AIO
    if (raw_set_aio(&s->aio_ctx, &s->use_aio, bdrv_flags)) {
        qemu_close(fd);
-        ret = -errno;
-        error_setg_errno(errp, -ret, "Could not set AIO state");
-        goto fail;
+        return -errno;
    }
 #endif

-    s->has_discard = true;
-    s->has_write_zeroes = true;
-
-    if (fstat(s->fd, &st) < 0) {
-        error_setg_errno(errp, errno, "Could not stat file");
-        goto fail;
-    }
-    if (S_ISREG(st.st_mode)) {
-        s->discard_zeroes = true;
-    }
-    if (S_ISBLK(st.st_mode)) {
-#ifdef BLKDISCARDZEROES
-        unsigned int arg;
-        if (ioctl(s->fd, BLKDISCARDZEROES, &arg) == 0 && arg) {
-            s->discard_zeroes = true;
-        }
-#endif
-#ifdef __linux__
-        /* On Linux 3.10, BLKDISCARD leaves stale data in the page cache.  Do
-         * not rely on the contents of discarded blocks unless using O_DIRECT.
-         * Same for BLKZEROOUT.
-         */
-        if (!(bs->open_flags & BDRV_O_NOCACHE)) {
-            s->discard_zeroes = false;
-            s->has_write_zeroes = false;
-        }
-#endif
-    }
-
+    s->has_discard = 1;
 #ifdef CONFIG_XFS
    if (platform_test_xfs_fd(s->fd)) {
-        s->is_xfs = true;
+        s->is_xfs = 1;
    }
 #endif

-    ret = 0;
-fail:
-    qemu_opts_del(opts);
-    return ret;
+    return 0;
 }

-static int raw_open(BlockDriverState *bs, QDict *options, int flags,
-                    Error **errp)
+static int raw_open(BlockDriverState *bs, const char *filename, int flags)
 {
    BDRVRawState *s = bs->opaque;
-    Error *local_err = NULL;
-    int ret;

    s->type = FTYPE_FILE;
-    ret = raw_open_common(bs, options, flags, 0, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-    }
-    return ret;
+    return raw_open_common(bs, filename, flags, 0);
 }

 static int raw_reopen_prepare(BDRVReopenState *state,
@@ -487,7 +336,6 @@ static int raw_reopen_prepare(BDRVReopenState *state,
     * valid in the 'false' condition even if aio_ctx is set, and raw_set_aio()
     * won't override aio_ctx if aio_ctx is non-NULL */
    if (raw_set_aio(&s->aio_ctx, &raw_s->use_aio, state->flags)) {
-        error_setg(errp, "Could not set AIO state");
        return -1;
    }
 #endif
@@ -539,13 +387,13 @@ static int raw_reopen_prepare(BDRVReopenState *state,
        assert(!(raw_s->open_flags & O_CREAT));
        raw_s->fd = qemu_open(state->bs->filename, raw_s->open_flags);
        if (raw_s->fd == -1) {
-            error_setg_errno(errp, errno, "Could not reopen file");
            ret = -1;
        }
    }
    return ret;
 }

+
 static void raw_reopen_commit(BDRVReopenState *state)
 {
    BDRVRawReopenState *raw_s = state->opaque;
@@ -581,15 +429,23 @@ static void raw_reopen_abort(BDRVReopenState *state)
    state->opaque = NULL;
 }

-static int raw_refresh_limits(BlockDriverState *bs)
-{
-    BDRVRawState *s = bs->opaque;

-    raw_probe_alignment(bs);
-    bs->bl.opt_mem_alignment = s->buf_align;
-
-    return 0;
-}
+/* XXX: use host sector size if necessary with:
+#ifdef DIOCGSECTORSIZE
+        {
+            unsigned int sectorsize = 512;
+            if (!ioctl(fd, DIOCGSECTORSIZE, &sectorsize) &&
+                sectorsize > bufsize)
+                bufsize = sectorsize;
+        }
+#endif
+#ifdef CONFIG_COCOA
+        uint32_t blockSize = 512;
+        if ( !ioctl( fd, DKIOCGETBLOCKSIZE, &blockSize ) && blockSize > bufsize) {
+            bufsize = blockSize;
+        }
+#endif
+*/

 static ssize_t handle_aiocb_ioctl(RawPosixAIOData *aiocb)
 {
@@ -780,23 +636,6 @@ static ssize_t handle_aiocb_rw(RawPosixAIOData *aiocb)
 }

 #ifdef CONFIG_XFS
-static int xfs_write_zeroes(BDRVRawState *s, int64_t offset, uint64_t bytes)
-{
-    struct xfs_flock64 fl;
-
-    memset(&fl, 0, sizeof(fl));
-    fl.l_whence = SEEK_SET;
-    fl.l_start = offset;
-    fl.l_len = bytes;
-
-    if (xfsctl(NULL, s->fd, XFS_IOC_ZERO_RANGE, &fl) < 0) {
-        DEBUG_BLOCK_PRINT("cannot write zero range (%s)\n", strerror(errno));
-        return -errno;
-    }
-
-    return 0;
-}
-
 static int xfs_discard(BDRVRawState *s, int64_t offset, uint64_t bytes)
 {
    struct xfs_flock64 fl;
@@ -815,49 +654,13 @@ static int xfs_discard(BDRVRawState *s, int64_t offset, uint64_t bytes)
 }
 #endif

-static ssize_t handle_aiocb_write_zeroes(RawPosixAIOData *aiocb)
-{
-    int ret = -EOPNOTSUPP;
-    BDRVRawState *s = aiocb->bs->opaque;
-
-    if (s->has_write_zeroes == 0) {
-        return -ENOTSUP;
-    }
-
-    if (aiocb->aio_type & QEMU_AIO_BLKDEV) {
-#ifdef BLKZEROOUT
-        do {
-            uint64_t range[2] = { aiocb->aio_offset, aiocb->aio_nbytes };
-            if (ioctl(aiocb->aio_fildes, BLKZEROOUT, range) == 0) {
-                return 0;
-            }
-        } while (errno == EINTR);
-
-        ret = -errno;
-#endif
-    } else {
-#ifdef CONFIG_XFS
-        if (s->is_xfs) {
-            return xfs_write_zeroes(s, aiocb->aio_offset, aiocb->aio_nbytes);
-        }
-#endif
-    }
-
-    if (ret == -ENODEV || ret == -ENOSYS || ret == -EOPNOTSUPP ||
-        ret == -ENOTTY) {
-        s->has_write_zeroes = false;
-        ret = -ENOTSUP;
-    }
-    return ret;
-}
-
 static ssize_t handle_aiocb_discard(RawPosixAIOData *aiocb)
 {
    int ret = -EOPNOTSUPP;
    BDRVRawState *s = aiocb->bs->opaque;

-    if (!s->has_discard) {
-        return -ENOTSUP;
+    if (s->has_discard == 0) {
+        return 0;
    }

    if (aiocb->aio_type & QEMU_AIO_BLKDEV) {
@@ -892,8 +695,8 @@ static ssize_t handle_aiocb_discard(RawPosixAIOData *aiocb)

    if (ret == -ENODEV || ret == -ENOSYS || ret == -EOPNOTSUPP ||
        ret == -ENOTTY) {
-        s->has_discard = false;
-        ret = -ENOTSUP;
+        s->has_discard = 0;
+        ret = 0;
    }
    return ret;
 }
@@ -935,9 +738,6 @@ static int aio_worker(void *arg)
    case QEMU_AIO_DISCARD:
        ret = handle_aiocb_discard(aiocb);
        break;
-    case QEMU_AIO_WRITE_ZEROES:
-        ret = handle_aiocb_write_zeroes(aiocb);
-        break;
    default:
        fprintf(stderr, "invalid aio request (0x%x)\n", aiocb->aio_type);
        ret = -EINVAL;
@@ -948,35 +748,11 @@ static int aio_worker(void *arg)
    return ret;
 }

-static int paio_submit_co(BlockDriverState *bs, int fd,
-        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        int type)
-{
-    RawPosixAIOData *acb = g_slice_new(RawPosixAIOData);
-    ThreadPool *pool;
-
-    acb->bs = bs;
-    acb->aio_type = type;
-    acb->aio_fildes = fd;
-
-    if (qiov) {
-        acb->aio_iov = qiov->iov;
-        acb->aio_niov = qiov->niov;
-    }
-    acb->aio_nbytes = nb_sectors * 512;
-    acb->aio_offset = sector_num * 512;
-
-    trace_paio_submit_co(sector_num, nb_sectors, type);
-    pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
-    return thread_pool_submit_co(pool, aio_worker, acb);
-}
-
 static BlockDriverAIOCB *paio_submit(BlockDriverState *bs, int fd,
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
        BlockDriverCompletionFunc *cb, void *opaque, int type)
 {
    RawPosixAIOData *acb = g_slice_new(RawPosixAIOData);
-    ThreadPool *pool;

    acb->bs = bs;
    acb->aio_type = type;
@@ -990,8 +766,7 @@ static BlockDriverAIOCB *paio_submit(BlockDriverState *bs, int fd,
    acb->aio_offset = sector_num * 512;

    trace_paio_submit(acb, opaque, sector_num, nb_sectors, type);
-    pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
-    return thread_pool_submit_aio(pool, aio_worker, acb, cb, opaque);
+    return thread_pool_submit_aio(aio_worker, acb, cb, opaque);
 }

 static BlockDriverAIOCB *raw_aio_submit(BlockDriverState *bs,
@@ -1234,15 +1009,12 @@ static int64_t raw_get_allocated_file_size(BlockDriverState *bs)
    return (int64_t)st.st_blocks * 512;
 }

-static int raw_create(const char *filename, QEMUOptionParameter *options,
-                      Error **errp)
+static int raw_create(const char *filename, QEMUOptionParameter *options)
 {
    int fd;
    int result = 0;
    int64_t total_size = 0;

-    strstart(filename, "file:", &filename);
-
    /* Read out options */
    while (options && options->name) {
        if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
@@ -1255,20 +1027,90 @@ static int raw_create(const char *filename, QEMUOptionParameter *options,
                   0644);
    if (fd < 0) {
        result = -errno;
-        error_setg_errno(errp, -result, "Could not create file");
    } else {
        if (ftruncate(fd, total_size * BDRV_SECTOR_SIZE) != 0) {
            result = -errno;
-            error_setg_errno(errp, -result, "Could not resize file");
        }
        if (qemu_close(fd) != 0) {
            result = -errno;
-            error_setg_errno(errp, -result, "Could not close the new file");
        }
    }
    return result;
 }

+static int try_fiemap(BlockDriverState *bs, off_t start, off_t *data,
+                      off_t *hole, int nb_sectors, int *pnum)
+{
+#ifdef CONFIG_FIEMAP
+    BDRVRawState *s = bs->opaque;
+    struct {
+        struct fiemap fm;
+        struct fiemap_extent fe;
+    } f;
+
+    if (s->skip_fiemap) {
+        return 1;
+    }
+
+    f.fm.fm_start = start;
+    f.fm.fm_length = (int64_t)nb_sectors * BDRV_SECTOR_SIZE;
+    f.fm.fm_flags = FIEMAP_FLAG_SYNC;
+    f.fm.fm_extent_count = 1;
+    f.fm.fm_reserved = 0;
+    if (ioctl(s->fd, FS_IOC_FIEMAP, &f) == -1) {
+        /* Assume everything is allocated.  */
+        s->skip_fiemap = true;
+        return 1;
+    }
+
+    if (f.fm.fm_mapped_extents == 0) {
+        /* No extents found, data is beyond f.fm.fm_start + f.fm.fm_length.
+         * f.fm.fm_start + f.fm.fm_length must be clamped to the file size!
+         */
+        off_t length = lseek(s->fd, 0, SEEK_END);
+        *hole = f.fm.fm_start;
+        *data = MIN(f.fm.fm_start + f.fm.fm_length, length);
+    } else {
+        *data = f.fe.fe_logical;
+        *hole = f.fe.fe_logical + f.fe.fe_length;
+    }
+    return 0;
+#else
+    return 1;
+#endif
+}
+
+static int64_t try_seek_hole(BlockDriverState *bs, off_t start, off_t *data,
+                             off_t *hole, int *pnum)
+{
+#if defined SEEK_HOLE && defined SEEK_DATA
+    BDRVRawState *s = bs->opaque;
+
+    *hole = lseek(s->fd, start, SEEK_HOLE);
+    if (*hole == -1) {
+        /* -ENXIO indicates that sector_num was past the end of the file.
+         * There is a virtual hole there.  */
+        assert(errno != -ENXIO);
+
+        return 1;
+    }
+
+    if (*hole > start) {
+        *data = start;
+    } else {
+        /* On a hole.  We need another syscall to find its end.  */
+        *data = lseek(s->fd, start, SEEK_DATA);
+        if (*data == -1) {
+            *data = lseek(s->fd, 0, SEEK_END);
+        }
+    }
+    return 0;
+#else
+    return 1;
+#endif
+}
+
+
 /*
 * Returns true iff the specified sector is present in the disk image. Drivers
 * not implementing the functionality are assumed to not support backing files,
@@ -1284,12 +1126,12 @@ static int raw_create(const char *filename, QEMUOptionParameter *options,
 * 'nb_sectors' is the max value 'pnum' should be set to.  If nb_sectors goes
 * beyond the end of the disk image it will be clamped.
 */
-static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs,
+static int coroutine_fn raw_co_is_allocated(BlockDriverState *bs,
                                            int64_t sector_num,
                                            int nb_sectors, int *pnum)
 {
-    off_t start, data, hole;
-    int64_t ret;
+    off_t start, data = 0, hole = 0;
+    int ret;

    ret = fd_open(bs);
    if (ret < 0) {
@@ -1297,82 +1139,26 @@ static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs,
    }

    start = sector_num * BDRV_SECTOR_SIZE;
-    ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | start;

-#ifdef CONFIG_FIEMAP
-
-    BDRVRawState *s = bs->opaque;
-    struct {
-        struct fiemap fm;
-        struct fiemap_extent fe;
-    } f;
-
-    f.fm.fm_start = start;
-    f.fm.fm_length = (int64_t)nb_sectors * BDRV_SECTOR_SIZE;
-    f.fm.fm_flags = 0;
-    f.fm.fm_extent_count = 1;
-    f.fm.fm_reserved = 0;
-    if (ioctl(s->fd, FS_IOC_FIEMAP, &f) == -1) {
-        /* Assume everything is allocated.  */
-        *pnum = nb_sectors;
-        return ret;
-    }
-
-    if (f.fm.fm_mapped_extents == 0) {
-        /* No extents found, data is beyond f.fm.fm_start + f.fm.fm_length.
-         * f.fm.fm_start + f.fm.fm_length must be clamped to the file size!
-         */
-        off_t length = lseek(s->fd, 0, SEEK_END);
-        hole = f.fm.fm_start;
-        data = MIN(f.fm.fm_start + f.fm.fm_length, length);
-    } else {
-        data = f.fe.fe_logical;
-        hole = f.fe.fe_logical + f.fe.fe_length;
-        if (f.fe.fe_flags & FIEMAP_EXTENT_UNWRITTEN) {
-            ret |= BDRV_BLOCK_ZERO;
+    ret = try_seek_hole(bs, start, &data, &hole, pnum);
+    if (ret) {
+        ret = try_fiemap(bs, start, &data, &hole, nb_sectors, pnum);
+        if (ret) {
+            /* Assume everything is allocated. */
+            data = 0;
+            hole = start + nb_sectors * BDRV_SECTOR_SIZE;
        }
    }

-#elif defined SEEK_HOLE && defined SEEK_DATA
-
-    BDRVRawState *s = bs->opaque;
-
-    hole = lseek(s->fd, start, SEEK_HOLE);
-    if (hole == -1) {
-        /* -ENXIO indicates that sector_num was past the end of the file.
-         * There is a virtual hole there.  */
-        assert(errno != -ENXIO);
-
-        /* Most likely EINVAL.  Assume everything is allocated.  */
-        *pnum = nb_sectors;
-        return ret;
-    }
-
-    if (hole > start) {
-        data = start;
-    } else {
-        /* On a hole.  We need another syscall to find its end.  */
-        data = lseek(s->fd, start, SEEK_DATA);
-        if (data == -1) {
-            data = lseek(s->fd, 0, SEEK_END);
-        }
-    }
-#else
-    data = 0;
-    hole = start + nb_sectors * BDRV_SECTOR_SIZE;
-#endif
-
    if (data <= start) {
        /* On a data extent, compute sectors to the end of the extent.  */
        *pnum = MIN(nb_sectors, (hole - start) / BDRV_SECTOR_SIZE);
+        return 1;
    } else {
        /* On a hole, compute sectors to the beginning of the next extent.  */
        *pnum = MIN(nb_sectors, (data - start) / BDRV_SECTOR_SIZE);
-        ret &= ~BDRV_BLOCK_DATA;
-        ret |= BDRV_BLOCK_ZERO;
+        return 0;
    }
-
-    return ret;
 }

 static coroutine_fn BlockDriverAIOCB *raw_aio_discard(BlockDriverState *bs,
@@ -1385,31 +1171,6 @@ static coroutine_fn BlockDriverAIOCB *raw_aio_discard(BlockDriverState *bs,
                       cb, opaque, QEMU_AIO_DISCARD);
 }

-static int coroutine_fn raw_co_write_zeroes(
-    BlockDriverState *bs, int64_t sector_num,
-    int nb_sectors, BdrvRequestFlags flags)
-{
-    BDRVRawState *s = bs->opaque;
-
-    if (!(flags & BDRV_REQ_MAY_UNMAP)) {
-        return paio_submit_co(bs, s->fd, sector_num, NULL, nb_sectors,
-                              QEMU_AIO_WRITE_ZEROES);
-    } else if (s->discard_zeroes) {
-        return paio_submit_co(bs, s->fd, sector_num, NULL, nb_sectors,
-                              QEMU_AIO_DISCARD);
-    }
-    return -ENOTSUP;
-}
-
-static int raw_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
-{
-    BDRVRawState *s = bs->opaque;
-
-    bdi->unallocated_blocks_are_zero = s->discard_zeroes;
-    bdi->can_write_zeroes_with_unmap = s->discard_zeroes;
-    return 0;
-}
-
 static QEMUOptionParameter raw_create_options[] = {
    {
        .name = BLOCK_OPT_SIZE,
@@ -1423,28 +1184,22 @@ static BlockDriver bdrv_file = {
    .format_name = "file",
    .protocol_name = "file",
    .instance_size = sizeof(BDRVRawState),
-    .bdrv_needs_filename = true,
    .bdrv_probe = NULL, /* no probe for protocols */
-    .bdrv_parse_filename = raw_parse_filename,
    .bdrv_file_open = raw_open,
    .bdrv_reopen_prepare = raw_reopen_prepare,
    .bdrv_reopen_commit = raw_reopen_commit,
    .bdrv_reopen_abort = raw_reopen_abort,
    .bdrv_close = raw_close,
    .bdrv_create = raw_create,
-    .bdrv_has_zero_init = bdrv_has_zero_init_1,
-    .bdrv_co_get_block_status = raw_co_get_block_status,
-    .bdrv_co_write_zeroes = raw_co_write_zeroes,
+    .bdrv_co_is_allocated = raw_co_is_allocated,

    .bdrv_aio_readv = raw_aio_readv,
    .bdrv_aio_writev = raw_aio_writev,
    .bdrv_aio_flush = raw_aio_flush,
    .bdrv_aio_discard = raw_aio_discard,
-    .bdrv_refresh_limits = raw_refresh_limits,

    .bdrv_truncate = raw_truncate,
    .bdrv_getlength = raw_getlength,
-    .bdrv_get_info = raw_get_info,
    .bdrv_get_allocated_file_size
                        = raw_get_allocated_file_size,

@@ -1561,13 +1316,10 @@ static int check_hdev_writable(BDRVRawState *s)
    return 0;
 }

-static int hdev_open(BlockDriverState *bs, QDict *options, int flags,
-                     Error **errp)
+static int hdev_open(BlockDriverState *bs, const char *filename, int flags)
 {
    BDRVRawState *s = bs->opaque;
-    Error *local_err = NULL;
    int ret;
-    const char *filename = qdict_get_str(options, "filename");

 #if defined(__APPLE__) && defined(__MACH__)
    if (strstart(filename, "/dev/cdrom", NULL)) {
@@ -1589,7 +1341,6 @@ static int hdev_open(BlockDriverState *bs, QDict *options, int flags,
                qemu_close(fd);
            }
            filename = bsdPath;
-            qdict_put(options, "filename", qstring_from_str(filename));
        }

        if ( mediaIterator )
@@ -1609,11 +1360,8 @@ static int hdev_open(BlockDriverState *bs, QDict *options, int flags,
    }
 #endif

-    ret = raw_open_common(bs, options, flags, 0, &local_err);
+    ret = raw_open_common(bs, filename, flags, 0);
    if (ret < 0) {
-        if (local_err) {
-            error_propagate(errp, local_err);
-        }
        return ret;
    }

@@ -1621,7 +1369,6 @@ static int hdev_open(BlockDriverState *bs, QDict *options, int flags,
        ret = check_hdev_writable(s);
        if (ret < 0) {
            raw_close(bs);
-            error_setg_errno(errp, -ret, "The device is not writable");
            return ret;
        }
    }
@@ -1692,7 +1439,6 @@ static BlockDriverAIOCB *hdev_aio_ioctl(BlockDriverState *bs,
 {
    BDRVRawState *s = bs->opaque;
    RawPosixAIOData *acb;
-    ThreadPool *pool;

    if (fd_open(bs) < 0)
        return NULL;
@@ -1704,8 +1450,7 @@ static BlockDriverAIOCB *hdev_aio_ioctl(BlockDriverState *bs,
    acb->aio_offset = 0;
    acb->aio_ioctl_buf = buf;
    acb->aio_ioctl_cmd = req;
-    pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
-    return thread_pool_submit_aio(pool, aio_worker, acb, cb, opaque);
+    return thread_pool_submit_aio(aio_worker, acb, cb, opaque);
 }

 #elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
@@ -1740,28 +1485,7 @@ static coroutine_fn BlockDriverAIOCB *hdev_aio_discard(BlockDriverState *bs,
                       cb, opaque, QEMU_AIO_DISCARD|QEMU_AIO_BLKDEV);
 }

-static coroutine_fn int hdev_co_write_zeroes(BlockDriverState *bs,
-    int64_t sector_num, int nb_sectors, BdrvRequestFlags flags)
-{
-    BDRVRawState *s = bs->opaque;
-    int rc;
-
-    rc = fd_open(bs);
-    if (rc < 0) {
-        return rc;
-    }
-    if (!(flags & BDRV_REQ_MAY_UNMAP)) {
-        return paio_submit_co(bs, s->fd, sector_num, NULL, nb_sectors,
-                              QEMU_AIO_WRITE_ZEROES|QEMU_AIO_BLKDEV);
-    } else if (s->discard_zeroes) {
-        return paio_submit_co(bs, s->fd, sector_num, NULL, nb_sectors,
-                              QEMU_AIO_DISCARD|QEMU_AIO_BLKDEV);
-    }
-    return -ENOTSUP;
-}
-
-static int hdev_create(const char *filename, QEMUOptionParameter *options,
-                       Error **errp)
+static int hdev_create(const char *filename, QEMUOptionParameter *options)
 {
    int fd;
    int ret = 0;
@@ -1777,33 +1501,29 @@ static int hdev_create(const char *filename, QEMUOptionParameter *options,
    }

    fd = qemu_open(filename, O_WRONLY | O_BINARY);
-    if (fd < 0) {
-        ret = -errno;
-        error_setg_errno(errp, -ret, "Could not open device");
-        return ret;
-    }
+    if (fd < 0)
+        return -errno;

-    if (fstat(fd, &stat_buf) < 0) {
+    if (fstat(fd, &stat_buf) < 0)
        ret = -errno;
-        error_setg_errno(errp, -ret, "Could not stat device");
-    } else if (!S_ISBLK(stat_buf.st_mode) && !S_ISCHR(stat_buf.st_mode)) {
-        error_setg(errp,
-                   "The given file is neither a block nor a character device");
+    else if (!S_ISBLK(stat_buf.st_mode) && !S_ISCHR(stat_buf.st_mode))
        ret = -ENODEV;
-    } else if (lseek(fd, 0, SEEK_END) < total_size * BDRV_SECTOR_SIZE) {
-        error_setg(errp, "Device is too small");
+    else if (lseek(fd, 0, SEEK_END) < total_size * BDRV_SECTOR_SIZE)
        ret = -ENOSPC;
-    }

    qemu_close(fd);
    return ret;
 }

+static int hdev_has_zero_init(BlockDriverState *bs)
+{
+    return 0;
+}
+
 static BlockDriver bdrv_host_device = {
    .format_name        = "host_device",
    .protocol_name        = "host_device",
    .instance_size      = sizeof(BDRVRawState),
-    .bdrv_needs_filename = true,
    .bdrv_probe_device  = hdev_probe_device,
    .bdrv_file_open     = hdev_open,
    .bdrv_close         = raw_close,
@@ -1812,17 +1532,15 @@ static BlockDriver bdrv_host_device = {
    .bdrv_reopen_abort   = raw_reopen_abort,
    .bdrv_create        = hdev_create,
    .create_options     = raw_create_options,
-    .bdrv_co_write_zeroes = hdev_co_write_zeroes,
+    .bdrv_has_zero_init = hdev_has_zero_init,

    .bdrv_aio_readv	= raw_aio_readv,
    .bdrv_aio_writev	= raw_aio_writev,
    .bdrv_aio_flush	= raw_aio_flush,
    .bdrv_aio_discard   = hdev_aio_discard,
-    .bdrv_refresh_limits = raw_refresh_limits,

    .bdrv_truncate      = raw_truncate,
    .bdrv_getlength	= raw_getlength,
-    .bdrv_get_info = raw_get_info,
    .bdrv_get_allocated_file_size
                        = raw_get_allocated_file_size,

@@ -1834,23 +1552,17 @@ static BlockDriver bdrv_host_device = {
 };

 #ifdef __linux__
-static int floppy_open(BlockDriverState *bs, QDict *options, int flags,
-                       Error **errp)
+static int floppy_open(BlockDriverState *bs, const char *filename, int flags)
 {
    BDRVRawState *s = bs->opaque;
-    Error *local_err = NULL;
    int ret;

    s->type = FTYPE_FD;

    /* open will not fail even if no floppy is inserted, so add O_NONBLOCK */
-    ret = raw_open_common(bs, options, flags, O_NONBLOCK, &local_err);
-    if (ret) {
-        if (local_err) {
-            error_propagate(errp, local_err);
-        }
+    ret = raw_open_common(bs, filename, flags, O_NONBLOCK);
+    if (ret)
        return ret;
-    }

    /* close fd so that we can reopen it as needed */
    qemu_close(s->fd);
@@ -1937,7 +1649,6 @@ static BlockDriver bdrv_host_floppy = {
    .format_name        = "host_floppy",
    .protocol_name      = "host_floppy",
    .instance_size      = sizeof(BDRVRawState),
-    .bdrv_needs_filename = true,
    .bdrv_probe_device	= floppy_probe_device,
    .bdrv_file_open     = floppy_open,
    .bdrv_close         = raw_close,
@@ -1946,15 +1657,14 @@ static BlockDriver bdrv_host_floppy = {
    .bdrv_reopen_abort   = raw_reopen_abort,
    .bdrv_create        = hdev_create,
    .create_options     = raw_create_options,
+    .bdrv_has_zero_init = hdev_has_zero_init,

    .bdrv_aio_readv     = raw_aio_readv,
    .bdrv_aio_writev    = raw_aio_writev,
    .bdrv_aio_flush	= raw_aio_flush,
-    .bdrv_refresh_limits = raw_refresh_limits,

    .bdrv_truncate      = raw_truncate,
-    .bdrv_getlength      = raw_getlength,
-    .has_variable_length = true,
+    .bdrv_getlength	= raw_getlength,
    .bdrv_get_allocated_file_size
                        = raw_get_allocated_file_size,

@@ -1964,21 +1674,14 @@ static BlockDriver bdrv_host_floppy = {
    .bdrv_eject         = floppy_eject,
 };

-static int cdrom_open(BlockDriverState *bs, QDict *options, int flags,
-                      Error **errp)
+static int cdrom_open(BlockDriverState *bs, const char *filename, int flags)
 {
    BDRVRawState *s = bs->opaque;
-    Error *local_err = NULL;
-    int ret;

    s->type = FTYPE_CD;

    /* open will not fail even if no CD is inserted, so add O_NONBLOCK */
-    ret = raw_open_common(bs, options, flags, O_NONBLOCK, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-    }
-    return ret;
+    return raw_open_common(bs, filename, flags, O_NONBLOCK);
 }

 static int cdrom_probe_device(const char *filename)
@@ -2048,7 +1751,6 @@ static BlockDriver bdrv_host_cdrom = {
    .format_name        = "host_cdrom",
    .protocol_name      = "host_cdrom",
    .instance_size      = sizeof(BDRVRawState),
-    .bdrv_needs_filename = true,
    .bdrv_probe_device	= cdrom_probe_device,
    .bdrv_file_open     = cdrom_open,
    .bdrv_close         = raw_close,
@@ -2057,15 +1759,14 @@ static BlockDriver bdrv_host_cdrom = {
    .bdrv_reopen_abort   = raw_reopen_abort,
    .bdrv_create        = hdev_create,
    .create_options     = raw_create_options,
+    .bdrv_has_zero_init = hdev_has_zero_init,

    .bdrv_aio_readv     = raw_aio_readv,
    .bdrv_aio_writev    = raw_aio_writev,
    .bdrv_aio_flush	= raw_aio_flush,
-    .bdrv_refresh_limits = raw_refresh_limits,

    .bdrv_truncate      = raw_truncate,
-    .bdrv_getlength      = raw_getlength,
-    .has_variable_length = true,
+    .bdrv_getlength     = raw_getlength,
    .bdrv_get_allocated_file_size
                        = raw_get_allocated_file_size,

@@ -2081,22 +1782,16 @@ static BlockDriver bdrv_host_cdrom = {
 #endif /* __linux__ */

 #if defined (__FreeBSD__) || defined(__FreeBSD_kernel__)
-static int cdrom_open(BlockDriverState *bs, QDict *options, int flags,
-                      Error **errp)
+static int cdrom_open(BlockDriverState *bs, const char *filename, int flags)
 {
    BDRVRawState *s = bs->opaque;
-    Error *local_err = NULL;
    int ret;

    s->type = FTYPE_CD;

-    ret = raw_open_common(bs, options, flags, 0, &local_err);
-    if (ret) {
-        if (local_err) {
-            error_propagate(errp, local_err);
-        }
+    ret = raw_open_common(bs, filename, flags, 0);
+    if (ret)
        return ret;
-    }

    /* make sure the door isn't locked at this time */
    ioctl(s->fd, CDIOCALLOW);
@@ -2178,7 +1873,6 @@ static BlockDriver bdrv_host_cdrom = {
    .format_name        = "host_cdrom",
    .protocol_name      = "host_cdrom",
    .instance_size      = sizeof(BDRVRawState),
-    .bdrv_needs_filename = true,
    .bdrv_probe_device	= cdrom_probe_device,
    .bdrv_file_open     = cdrom_open,
    .bdrv_close         = raw_close,
@@ -2187,15 +1881,14 @@ static BlockDriver bdrv_host_cdrom = {
    .bdrv_reopen_abort   = raw_reopen_abort,
    .bdrv_create        = hdev_create,
    .create_options     = raw_create_options,
+    .bdrv_has_zero_init = hdev_has_zero_init,

    .bdrv_aio_readv     = raw_aio_readv,
    .bdrv_aio_writev    = raw_aio_writev,
    .bdrv_aio_flush	= raw_aio_flush,
-    .bdrv_refresh_limits = raw_refresh_limits,

    .bdrv_truncate      = raw_truncate,
-    .bdrv_getlength      = raw_getlength,
-    .has_variable_length = true,
+    .bdrv_getlength     = raw_getlength,
    .bdrv_get_allocated_file_size
                        = raw_get_allocated_file_size,

--- a/block/raw-win32.c
+++ b/block/raw-win32.c
@@ -85,7 +85,6 @@ static size_t handle_aiocb_rw(RawWin32AIOData *aiocb)
            ret_count = 0;
        }
        if (ret_count != len) {
-            offset += ret_count;
            break;
        }
        offset += len;
@@ -145,7 +144,6 @@ static BlockDriverAIOCB *paio_submit(BlockDriverState *bs, HANDLE hfile,
        BlockDriverCompletionFunc *cb, void *opaque, int type)
 {
    RawWin32AIOData *acb = g_slice_new(RawWin32AIOData);
-    ThreadPool *pool;

    acb->bs = bs;
    acb->hfile = hfile;
@@ -159,8 +157,7 @@ static BlockDriverAIOCB *paio_submit(BlockDriverState *bs, HANDLE hfile,
    acb->aio_offset = sector_num * 512;

    trace_paio_submit(acb, opaque, sector_num, nb_sectors, type);
-    pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
-    return thread_pool_submit_aio(pool, aio_worker, acb, cb, opaque);
+    return thread_pool_submit_aio(aio_worker, acb, cb, opaque);
 }

 int qemu_ftruncate64(int fd, int64_t length)
@@ -202,35 +199,6 @@ static int set_sparse(int fd)
 				 NULL, 0, NULL, 0, &returned, NULL);
 }

-static void raw_probe_alignment(BlockDriverState *bs)
-{
-    BDRVRawState *s = bs->opaque;
-    DWORD sectorsPerCluster, freeClusters, totalClusters, count;
-    DISK_GEOMETRY_EX dg;
-    BOOL status;
-
-    if (s->type == FTYPE_CD) {
-        bs->request_alignment = 2048;
-        return;
-    }
-    if (s->type == FTYPE_HARDDISK) {
-        status = DeviceIoControl(s->hfile, IOCTL_DISK_GET_DRIVE_GEOMETRY_EX,
-                                 NULL, 0, &dg, sizeof(dg), &count, NULL);
-        if (status != 0) {
-            bs->request_alignment = dg.Geometry.BytesPerSector;
-            return;
-        }
-        /* try GetDiskFreeSpace too */
-    }
-
-    if (s->drive_path[0]) {
-        GetDiskFreeSpace(s->drive_path, &sectorsPerCluster,
-                         &dg.Geometry.BytesPerSector,
-                         &freeClusters, &totalClusters);
-        bs->request_alignment = dg.Geometry.BytesPerSector;
-    }
-}
-
 static void raw_parse_flags(int flags, int *access_flags, DWORD *overlapped)
 {
    assert(access_flags != NULL);
@@ -251,104 +219,43 @@ static void raw_parse_flags(int flags, int *access_flags, DWORD *overlapped)
    }
 }

-static void raw_parse_filename(const char *filename, QDict *options,
-                               Error **errp)
-{
-    /* The filename does not have to be prefixed by the protocol name, since
-     * "file" is the default protocol; therefore, the return value of this
-     * function call can be ignored. */
-    strstart(filename, "file:", &filename);
-
-    qdict_put_obj(options, "filename", QOBJECT(qstring_from_str(filename)));
-}
-
-static QemuOptsList raw_runtime_opts = {
-    .name = "raw",
-    .head = QTAILQ_HEAD_INITIALIZER(raw_runtime_opts.head),
-    .desc = {
-        {
-            .name = "filename",
-            .type = QEMU_OPT_STRING,
-            .help = "File name of the image",
-        },
-        { /* end of list */ }
-    },
-};
-
-static int raw_open(BlockDriverState *bs, QDict *options, int flags,
-                    Error **errp)
+static int raw_open(BlockDriverState *bs, const char *filename, int flags)
 {
    BDRVRawState *s = bs->opaque;
    int access_flags;
    DWORD overlapped;
-    QemuOpts *opts;
-    Error *local_err = NULL;
-    const char *filename;
-    int ret;

    s->type = FTYPE_FILE;

-    opts = qemu_opts_create(&raw_runtime_opts, NULL, 0, &error_abort);
-    qemu_opts_absorb_qdict(opts, options, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        ret = -EINVAL;
-        goto fail;
-    }
-
-    filename = qemu_opt_get(opts, "filename");
-
    raw_parse_flags(flags, &access_flags, &overlapped);
-
+    
    if ((flags & BDRV_O_NATIVE_AIO) && aio == NULL) {
        aio = win32_aio_init();
        if (aio == NULL) {
-            error_setg(errp, "Could not initialize AIO");
-            ret = -EINVAL;
-            goto fail;
+            return -EINVAL;
        }
    }

-    if (filename[0] && filename[1] == ':') {
-        snprintf(s->drive_path, sizeof(s->drive_path), "%c:\\", filename[0]);
-    } else if (filename[0] == '\\' && filename[1] == '\\') {
-        s->drive_path[0] = 0;
-    } else {
-        /* Relative path.  */
-        char buf[MAX_PATH];
-        GetCurrentDirectory(MAX_PATH, buf);
-        snprintf(s->drive_path, sizeof(s->drive_path), "%c:\\", buf[0]);
-    }
-
    s->hfile = CreateFile(filename, access_flags,
                          FILE_SHARE_READ, NULL,
                          OPEN_EXISTING, overlapped, NULL);
    if (s->hfile == INVALID_HANDLE_VALUE) {
        int err = GetLastError();

-        if (err == ERROR_ACCESS_DENIED) {
-            ret = -EACCES;
-        } else {
-            ret = -EINVAL;
-        }
-        goto fail;
+        if (err == ERROR_ACCESS_DENIED)
+            return -EACCES;
+        return -EINVAL;
    }

    if (flags & BDRV_O_NATIVE_AIO) {
-        ret = win32_aio_attach(aio, s->hfile);
+        int ret = win32_aio_attach(aio, s->hfile);
        if (ret < 0) {
            CloseHandle(s->hfile);
-            error_setg_errno(errp, -ret, "Could not enable AIO");
-            goto fail;
+            return ret;
        }
        s->aio = aio;
    }
-
-    raw_probe_alignment(bs);
-    ret = 0;
-fail:
-    qemu_opts_del(opts);
-    return ret;
+    return 0;
 }

 static BlockDriverAIOCB *raw_aio_readv(BlockDriverState *bs,
@@ -475,14 +382,11 @@ static int64_t raw_get_allocated_file_size(BlockDriverState *bs)
    return st.st_size;
 }

-static int raw_create(const char *filename, QEMUOptionParameter *options,
-                      Error **errp)
+static int raw_create(const char *filename, QEMUOptionParameter *options)
 {
    int fd;
    int64_t total_size = 0;

-    strstart(filename, "file:", &filename);
-
    /* Read out options */
    while (options && options->name) {
        if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
@@ -493,10 +397,8 @@ static int raw_create(const char *filename, QEMUOptionParameter *options,

    fd = qemu_open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY,
                   0644);
-    if (fd < 0) {
-        error_setg_errno(errp, errno, "Could not create file");
+    if (fd < 0)
        return -EIO;
-    }
    set_sparse(fd);
    ftruncate(fd, total_size * 512);
    qemu_close(fd);
@@ -516,12 +418,9 @@ static BlockDriver bdrv_file = {
    .format_name	= "file",
    .protocol_name	= "file",
    .instance_size	= sizeof(BDRVRawState),
-    .bdrv_needs_filename = true,
-    .bdrv_parse_filename = raw_parse_filename,
    .bdrv_file_open	= raw_open,
    .bdrv_close		= raw_close,
    .bdrv_create	= raw_create,
-    .bdrv_has_zero_init = bdrv_has_zero_init_1,

    .bdrv_aio_readv     = raw_aio_readv,
    .bdrv_aio_writev    = raw_aio_writev,
@@ -593,35 +492,16 @@ static int hdev_probe_device(const char *filename)
    return 0;
 }

-static int hdev_open(BlockDriverState *bs, QDict *options, int flags,
-                     Error **errp)
+static int hdev_open(BlockDriverState *bs, const char *filename, int flags)
 {
    BDRVRawState *s = bs->opaque;
    int access_flags, create_flags;
-    int ret = 0;
    DWORD overlapped;
    char device_name[64];

-    Error *local_err = NULL;
-    const char *filename;
-
-    QemuOpts *opts = qemu_opts_create(&raw_runtime_opts, NULL, 0,
-                                      &error_abort);
-    qemu_opts_absorb_qdict(opts, options, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        ret = -EINVAL;
-        goto done;
-    }
-
-    filename = qemu_opt_get(opts, "filename");
-
    if (strstart(filename, "/dev/cdrom", NULL)) {
-        if (find_cdrom(device_name, sizeof(device_name)) < 0) {
-            error_setg(errp, "Could not open CD-ROM drive");
-            ret = -ENOENT;
-            goto done;
-        }
+        if (find_cdrom(device_name, sizeof(device_name)) < 0)
+            return -ENOENT;
        filename = device_name;
    } else {
        /* transform drive letters into device name */
@@ -644,36 +524,32 @@ static int hdev_open(BlockDriverState *bs, QDict *options, int flags,
    if (s->hfile == INVALID_HANDLE_VALUE) {
        int err = GetLastError();

-        if (err == ERROR_ACCESS_DENIED) {
-            ret = -EACCES;
-        } else {
-            ret = -EINVAL;
-        }
-        error_setg_errno(errp, -ret, "Could not open device");
-        goto done;
+        if (err == ERROR_ACCESS_DENIED)
+            return -EACCES;
+        return -1;
    }
+    return 0;
+}

-done:
-    qemu_opts_del(opts);
-    return ret;
+static int hdev_has_zero_init(BlockDriverState *bs)
+{
+    return 0;
 }

 static BlockDriver bdrv_host_device = {
    .format_name	= "host_device",
    .protocol_name	= "host_device",
    .instance_size	= sizeof(BDRVRawState),
-    .bdrv_needs_filename = true,
    .bdrv_probe_device	= hdev_probe_device,
    .bdrv_file_open	= hdev_open,
    .bdrv_close		= raw_close,
+    .bdrv_has_zero_init = hdev_has_zero_init,

    .bdrv_aio_readv     = raw_aio_readv,
    .bdrv_aio_writev    = raw_aio_writev,
    .bdrv_aio_flush     = raw_aio_flush,

-    .bdrv_getlength      = raw_getlength,
-    .has_variable_length = true,
-
+    .bdrv_getlength	= raw_getlength,
    .bdrv_get_allocated_file_size
                        = raw_get_allocated_file_size,
 };
--- a/block/raw.c
+++ b/block/raw.c
@@ -0,0 +1,155 @@
+
+#include "qemu-common.h"
+#include "block/block_int.h"
+#include "qemu/module.h"
+
+static int raw_open(BlockDriverState *bs, int flags)
+{
+    bs->sg = bs->file->sg;
+    return 0;
+}
+
+/* We have nothing to do for raw reopen, stubs just return
+ * success */
+static int raw_reopen_prepare(BDRVReopenState *state,
+                              BlockReopenQueue *queue,  Error **errp)
+{
+    return 0;
+}
+
+static int coroutine_fn raw_co_readv(BlockDriverState *bs, int64_t sector_num,
+                                     int nb_sectors, QEMUIOVector *qiov)
+{
+    BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
+    return bdrv_co_readv(bs->file, sector_num, nb_sectors, qiov);
+}
+
+static int coroutine_fn raw_co_writev(BlockDriverState *bs, int64_t sector_num,
+                                      int nb_sectors, QEMUIOVector *qiov)
+{
+    BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
+    return bdrv_co_writev(bs->file, sector_num, nb_sectors, qiov);
+}
+
+static void raw_close(BlockDriverState *bs)
+{
+}
+
+static int coroutine_fn raw_co_is_allocated(BlockDriverState *bs,
+                                            int64_t sector_num,
+                                            int nb_sectors, int *pnum)
+{
+    return bdrv_co_is_allocated(bs->file, sector_num, nb_sectors, pnum);
+}
+
+static int64_t raw_getlength(BlockDriverState *bs)
+{
+    return bdrv_getlength(bs->file);
+}
+
+static int raw_truncate(BlockDriverState *bs, int64_t offset)
+{
+    return bdrv_truncate(bs->file, offset);
+}
+
+static int raw_probe(const uint8_t *buf, int buf_size, const char *filename)
+{
+   return 1; /* everything can be opened as raw image */
+}
+
+static int coroutine_fn raw_co_discard(BlockDriverState *bs,
+                                       int64_t sector_num, int nb_sectors)
+{
+    return bdrv_co_discard(bs->file, sector_num, nb_sectors);
+}
+
+static int raw_is_inserted(BlockDriverState *bs)
+{
+    return bdrv_is_inserted(bs->file);
+}
+
+static int raw_media_changed(BlockDriverState *bs)
+{
+    return bdrv_media_changed(bs->file);
+}
+
+static void raw_eject(BlockDriverState *bs, bool eject_flag)
+{
+    bdrv_eject(bs->file, eject_flag);
+}
+
+static void raw_lock_medium(BlockDriverState *bs, bool locked)
+{
+    bdrv_lock_medium(bs->file, locked);
+}
+
+static int raw_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
+{
+   return bdrv_ioctl(bs->file, req, buf);
+}
+
+static BlockDriverAIOCB *raw_aio_ioctl(BlockDriverState *bs,
+        unsigned long int req, void *buf,
+        BlockDriverCompletionFunc *cb, void *opaque)
+{
+   return bdrv_aio_ioctl(bs->file, req, buf, cb, opaque);
+}
+
+static int raw_create(const char *filename, QEMUOptionParameter *options)
+{
+    return bdrv_create_file(filename, options);
+}
+
+static QEMUOptionParameter raw_create_options[] = {
+    {
+        .name = BLOCK_OPT_SIZE,
+        .type = OPT_SIZE,
+        .help = "Virtual disk size"
+    },
+    { NULL }
+};
+
+static int raw_has_zero_init(BlockDriverState *bs)
+{
+    return bdrv_has_zero_init(bs->file);
+}
+
+static BlockDriver bdrv_raw = {
+    .format_name        = "raw",
+
+    /* It's really 0, but we need to make g_malloc() happy */
+    .instance_size      = 1,
+
+    .bdrv_open          = raw_open,
+    .bdrv_close         = raw_close,
+
+    .bdrv_reopen_prepare  = raw_reopen_prepare,
+
+    .bdrv_co_readv          = raw_co_readv,
+    .bdrv_co_writev         = raw_co_writev,
+    .bdrv_co_is_allocated   = raw_co_is_allocated,
+    .bdrv_co_discard        = raw_co_discard,
+
+    .bdrv_probe         = raw_probe,
+    .bdrv_getlength     = raw_getlength,
+    .bdrv_truncate      = raw_truncate,
+
+    .bdrv_is_inserted   = raw_is_inserted,
+    .bdrv_media_changed = raw_media_changed,
+    .bdrv_eject         = raw_eject,
+    .bdrv_lock_medium   = raw_lock_medium,
+
+    .bdrv_ioctl         = raw_ioctl,
+    .bdrv_aio_ioctl     = raw_aio_ioctl,
+
+    .bdrv_create        = raw_create,
+    .create_options     = raw_create_options,
+    .bdrv_has_zero_init = raw_has_zero_init,
+};
+
+static void bdrv_raw_init(void)
+{
+    bdrv_register(&bdrv_raw);
+}
+
+block_init(bdrv_raw_init);
--- a/block/raw_bsd.c
+++ b/block/raw_bsd.c
@@ -1,206 +0,0 @@
-/* BlockDriver implementation for "raw"
- *
- * Copyright (C) 2010, 2013, Red Hat, Inc.
- * Copyright (C) 2010, Blue Swirl <blauwirbel@gmail.com>
- * Copyright (C) 2009, Anthony Liguori <aliguori@us.ibm.com>
- *
- * Author:
- *   Laszlo Ersek <lersek@redhat.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include "block/block_int.h"
-#include "qemu/option.h"
-
-static QEMUOptionParameter raw_create_options[] = {
-    {
-        .name = BLOCK_OPT_SIZE,
-        .type = OPT_SIZE,
-        .help = "Virtual disk size"
-    },
-    { 0 }
-};
-
-static int raw_reopen_prepare(BDRVReopenState *reopen_state,
-                              BlockReopenQueue *queue, Error **errp)
-{
-    return 0;
-}
-
-static int coroutine_fn raw_co_readv(BlockDriverState *bs, int64_t sector_num,
-                                     int nb_sectors, QEMUIOVector *qiov)
-{
-    BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
-    return bdrv_co_readv(bs->file, sector_num, nb_sectors, qiov);
-}
-
-static int coroutine_fn raw_co_writev(BlockDriverState *bs, int64_t sector_num,
-                                      int nb_sectors, QEMUIOVector *qiov)
-{
-    BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
-    return bdrv_co_writev(bs->file, sector_num, nb_sectors, qiov);
-}
-
-static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs,
-                                            int64_t sector_num,
-                                            int nb_sectors, int *pnum)
-{
-    *pnum = nb_sectors;
-    return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID | BDRV_BLOCK_DATA |
-           (sector_num << BDRV_SECTOR_BITS);
-}
-
-static int coroutine_fn raw_co_write_zeroes(BlockDriverState *bs,
-                                            int64_t sector_num, int nb_sectors,
-                                            BdrvRequestFlags flags)
-{
-    return bdrv_co_write_zeroes(bs->file, sector_num, nb_sectors, flags);
-}
-
-static int coroutine_fn raw_co_discard(BlockDriverState *bs,
-                                       int64_t sector_num, int nb_sectors)
-{
-    return bdrv_co_discard(bs->file, sector_num, nb_sectors);
-}
-
-static int64_t raw_getlength(BlockDriverState *bs)
-{
-    return bdrv_getlength(bs->file);
-}
-
-static int raw_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
-{
-    return bdrv_get_info(bs->file, bdi);
-}
-
-static int raw_refresh_limits(BlockDriverState *bs)
-{
-    bs->bl = bs->file->bl;
-    return 0;
-}
-
-static int raw_truncate(BlockDriverState *bs, int64_t offset)
-{
-    return bdrv_truncate(bs->file, offset);
-}
-
-static int raw_is_inserted(BlockDriverState *bs)
-{
-    return bdrv_is_inserted(bs->file);
-}
-
-static int raw_media_changed(BlockDriverState *bs)
-{
-    return bdrv_media_changed(bs->file);
-}
-
-static void raw_eject(BlockDriverState *bs, bool eject_flag)
-{
-    bdrv_eject(bs->file, eject_flag);
-}
-
-static void raw_lock_medium(BlockDriverState *bs, bool locked)
-{
-    bdrv_lock_medium(bs->file, locked);
-}
-
-static int raw_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
-{
-    return bdrv_ioctl(bs->file, req, buf);
-}
-
-static BlockDriverAIOCB *raw_aio_ioctl(BlockDriverState *bs,
-                                       unsigned long int req, void *buf,
-                                       BlockDriverCompletionFunc *cb,
-                                       void *opaque)
-{
-    return bdrv_aio_ioctl(bs->file, req, buf, cb, opaque);
-}
-
-static int raw_has_zero_init(BlockDriverState *bs)
-{
-    return bdrv_has_zero_init(bs->file);
-}
-
-static int raw_create(const char *filename, QEMUOptionParameter *options,
-                      Error **errp)
-{
-    Error *local_err = NULL;
-    int ret;
-
-    ret = bdrv_create_file(filename, options, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-    }
-    return ret;
-}
-
-static int raw_open(BlockDriverState *bs, QDict *options, int flags,
-                    Error **errp)
-{
-    bs->sg = bs->file->sg;
-    return 0;
-}
-
-static void raw_close(BlockDriverState *bs)
-{
-}
-
-static int raw_probe(const uint8_t *buf, int buf_size, const char *filename)
-{
-    /* smallest possible positive score so that raw is used if and only if no
-     * other block driver works
-     */
-    return 1;
-}
-
-static BlockDriver bdrv_raw = {
-    .format_name          = "raw",
-    .bdrv_probe           = &raw_probe,
-    .bdrv_reopen_prepare  = &raw_reopen_prepare,
-    .bdrv_open            = &raw_open,
-    .bdrv_close           = &raw_close,
-    .bdrv_create          = &raw_create,
-    .bdrv_co_readv        = &raw_co_readv,
-    .bdrv_co_writev       = &raw_co_writev,
-    .bdrv_co_write_zeroes = &raw_co_write_zeroes,
-    .bdrv_co_discard      = &raw_co_discard,
-    .bdrv_co_get_block_status = &raw_co_get_block_status,
-    .bdrv_truncate        = &raw_truncate,
-    .bdrv_getlength       = &raw_getlength,
-    .has_variable_length  = true,
-    .bdrv_get_info        = &raw_get_info,
-    .bdrv_refresh_limits  = &raw_refresh_limits,
-    .bdrv_is_inserted     = &raw_is_inserted,
-    .bdrv_media_changed   = &raw_media_changed,
-    .bdrv_eject           = &raw_eject,
-    .bdrv_lock_medium     = &raw_lock_medium,
-    .bdrv_ioctl           = &raw_ioctl,
-    .bdrv_aio_ioctl       = &raw_aio_ioctl,
-    .create_options       = &raw_create_options[0],
-    .bdrv_has_zero_init   = &raw_has_zero_init
-};
-
-static void bdrv_raw_init(void)
-{
-    bdrv_register(&bdrv_raw);
-}
-
-block_init(bdrv_raw_init);
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -95,13 +95,19 @@ typedef struct RADOSCB {
 #define RBD_FD_WRITE 1

 typedef struct BDRVRBDState {
+    int fds[2];
    rados_t cluster;
    rados_ioctx_t io_ctx;
    rbd_image_t image;
    char name[RBD_MAX_IMAGE_NAME_SIZE];
+    int qemu_aio_count;
    char *snap;
+    int event_reader_pos;
+    RADOSCB *event_rcb;
 } BDRVRBDState;

+static void rbd_aio_bh_cb(void *opaque);
+
 static int qemu_rbd_next_tok(char *dst, int dst_len,
                             char *src, char delim,
                             const char *name,
@@ -282,8 +288,7 @@ static int qemu_rbd_set_conf(rados_t cluster, const char *conf)
    return ret;
 }

-static int qemu_rbd_create(const char *filename, QEMUOptionParameter *options,
-                           Error **errp)
+static int qemu_rbd_create(const char *filename, QEMUOptionParameter *options)
 {
    int64_t bytes = 0;
    int64_t objsize;
@@ -364,8 +369,9 @@ static int qemu_rbd_create(const char *filename, QEMUOptionParameter *options,
 }

 /*
- * This aio completion is being called from rbd_finish_bh() and runs in qemu
- * BH context.
+ * This aio completion is being called from qemu_rbd_aio_event_reader()
+ * and runs in qemu context. It schedules a bh, but just in case the aio
+ * was not cancelled before.
 */
 static void qemu_rbd_complete_aio(RADOSCB *rcb)
 {
@@ -395,37 +401,47 @@ static void qemu_rbd_complete_aio(RADOSCB *rcb)
            acb->ret = r;
        }
    }
-
+    /* Note that acb->bh can be NULL in case where the aio was cancelled */
+    acb->bh = qemu_bh_new(rbd_aio_bh_cb, acb);
+    qemu_bh_schedule(acb->bh);
    g_free(rcb);
-
-    if (acb->cmd == RBD_AIO_READ) {
-        qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
-    }
-    qemu_vfree(acb->bounce);
-    acb->common.cb(acb->common.opaque, (acb->ret > 0 ? 0 : acb->ret));
-    acb->status = 0;
-
-    if (!acb->cancelled) {
-        qemu_aio_release(acb);
-    }
 }

-/* TODO Convert to fine grained options */
-static QemuOptsList runtime_opts = {
-    .name = "rbd",
-    .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
-    .desc = {
-        {
-            .name = "filename",
-            .type = QEMU_OPT_STRING,
-            .help = "Specification of the rbd image",
-        },
-        { /* end of list */ }
-    },
-};
+/*
+ * aio fd read handler. It runs in the qemu context and calls the
+ * completion handling of completed rados aio operations.
+ */
+static void qemu_rbd_aio_event_reader(void *opaque)
+{
+    BDRVRBDState *s = opaque;

-static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
-                         Error **errp)
+    ssize_t ret;
+
+    do {
+        char *p = (char *)&s->event_rcb;
+
+        /* now read the rcb pointer that was sent from a non qemu thread */
+        ret = read(s->fds[RBD_FD_READ], p + s->event_reader_pos,
+                   sizeof(s->event_rcb) - s->event_reader_pos);
+        if (ret > 0) {
+            s->event_reader_pos += ret;
+            if (s->event_reader_pos == sizeof(s->event_rcb)) {
+                s->event_reader_pos = 0;
+                qemu_rbd_complete_aio(s->event_rcb);
+                s->qemu_aio_count--;
+            }
+        }
+    } while (ret < 0 && errno == EINTR);
+}
+
+static int qemu_rbd_aio_flush_cb(void *opaque)
+{
+    BDRVRBDState *s = opaque;
+
+    return (s->qemu_aio_count > 0);
+}
+
+static int qemu_rbd_open(BlockDriverState *bs, const char *filename, int flags)
 {
    BDRVRBDState *s = bs->opaque;
    char pool[RBD_MAX_POOL_NAME_SIZE];
@@ -433,35 +449,20 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
    char conf[RBD_MAX_CONF_SIZE];
    char clientname_buf[RBD_MAX_CONF_SIZE];
    char *clientname;
-    QemuOpts *opts;
-    Error *local_err = NULL;
-    const char *filename;
    int r;

-    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
-    qemu_opts_absorb_qdict(opts, options, &local_err);
-    if (local_err) {
-        qerror_report_err(local_err);
-        error_free(local_err);
-        qemu_opts_del(opts);
-        return -EINVAL;
-    }
-
-    filename = qemu_opt_get(opts, "filename");
-
    if (qemu_rbd_parsename(filename, pool, sizeof(pool),
                           snap_buf, sizeof(snap_buf),
                           s->name, sizeof(s->name),
                           conf, sizeof(conf)) < 0) {
-        r = -EINVAL;
-        goto failed_opts;
+        return -EINVAL;
    }

    clientname = qemu_rbd_parse_clientname(conf, clientname_buf);
    r = rados_create(&s->cluster, clientname);
    if (r < 0) {
        error_report("error initializing");
-        goto failed_opts;
+        return r;
    }

    s->snap = NULL;
@@ -515,16 +516,27 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,

    bs->read_only = (s->snap != NULL);

-    qemu_opts_del(opts);
+    s->event_reader_pos = 0;
+    r = qemu_pipe(s->fds);
+    if (r < 0) {
+        error_report("error opening eventfd");
+        goto failed;
+    }
+    fcntl(s->fds[0], F_SETFL, O_NONBLOCK);
+    fcntl(s->fds[1], F_SETFL, O_NONBLOCK);
+    qemu_aio_set_fd_handler(s->fds[RBD_FD_READ], qemu_rbd_aio_event_reader,
+                            NULL, qemu_rbd_aio_flush_cb, s);
+
+
    return 0;

+failed:
+    rbd_close(s->image);
 failed_open:
    rados_ioctx_destroy(s->io_ctx);
 failed_shutdown:
    rados_shutdown(s->cluster);
    g_free(s->snap);
-failed_opts:
-    qemu_opts_del(opts);
    return r;
 }

@@ -532,6 +544,10 @@ static void qemu_rbd_close(BlockDriverState *bs)
 {
    BDRVRBDState *s = bs->opaque;

+    close(s->fds[0]);
+    close(s->fds[1]);
+    qemu_aio_set_fd_handler(s->fds[RBD_FD_READ], NULL, NULL, NULL, NULL);
+
    rbd_close(s->image);
    rados_ioctx_destroy(s->io_ctx);
    g_free(s->snap);
@@ -559,11 +575,34 @@ static const AIOCBInfo rbd_aiocb_info = {
    .cancel = qemu_rbd_aio_cancel,
 };

-static void rbd_finish_bh(void *opaque)
+static int qemu_rbd_send_pipe(BDRVRBDState *s, RADOSCB *rcb)
 {
-    RADOSCB *rcb = opaque;
-    qemu_bh_delete(rcb->acb->bh);
-    qemu_rbd_complete_aio(rcb);
+    int ret = 0;
+    while (1) {
+        fd_set wfd;
+        int fd = s->fds[RBD_FD_WRITE];
+
+        /* send the op pointer to the qemu thread that is responsible
+           for the aio/op completion. Must do it in a qemu thread context */
+        ret = write(fd, (void *)&rcb, sizeof(rcb));
+        if (ret >= 0) {
+            break;
+        }
+        if (errno == EINTR) {
+            continue;
+        }
+        if (errno != EAGAIN) {
+            break;
+        }
+
+        FD_ZERO(&wfd);
+        FD_SET(fd, &wfd);
+        do {
+            ret = select(fd + 1, NULL, &wfd, NULL, NULL);
+        } while (ret < 0 && errno == EINTR);
+    }
+
+    return ret;
 }

 /*
@@ -571,18 +610,40 @@ static void rbd_finish_bh(void *opaque)
 *
 * Note: this function is being called from a non qemu thread so
 * we need to be careful about what we do here. Generally we only
- * schedule a BH, and do the rest of the io completion handling
- * from rbd_finish_bh() which runs in a qemu context.
+ * write to the block notification pipe, and do the rest of the
+ * io completion handling from qemu_rbd_aio_event_reader() which
+ * runs in a qemu context.
 */
 static void rbd_finish_aiocb(rbd_completion_t c, RADOSCB *rcb)
 {
-    RBDAIOCB *acb = rcb->acb;
-
+    int ret;
    rcb->ret = rbd_aio_get_return_value(c);
    rbd_aio_release(c);
+    ret = qemu_rbd_send_pipe(rcb->s, rcb);
+    if (ret < 0) {
+        error_report("failed writing to acb->s->fds");
+        g_free(rcb);
+    }
+}

-    acb->bh = qemu_bh_new(rbd_finish_bh, rcb);
-    qemu_bh_schedule(acb->bh);
+/* Callback when all queued rbd_aio requests are complete */
+
+static void rbd_aio_bh_cb(void *opaque)
+{
+    RBDAIOCB *acb = opaque;
+
+    if (acb->cmd == RBD_AIO_READ) {
+        qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
+    }
+    qemu_vfree(acb->bounce);
+    acb->common.cb(acb->common.opaque, (acb->ret > 0 ? 0 : acb->ret));
+    qemu_bh_delete(acb->bh);
+    acb->bh = NULL;
+    acb->status = 0;
+
+    if (!acb->cancelled) {
+        qemu_aio_release(acb);
+    }
 }

 static int rbd_aio_discard_wrapper(rbd_image_t image,
@@ -648,6 +709,8 @@ static BlockDriverAIOCB *rbd_start_aio(BlockDriverState *bs,
    off = sector_num * BDRV_SECTOR_SIZE;
    size = nb_sectors * BDRV_SECTOR_SIZE;

+    s->qemu_aio_count++; /* All the RADOSCB */
+
    rcb = g_malloc(sizeof(RADOSCB));
    rcb->done = 0;
    rcb->acb = acb;
@@ -684,6 +747,7 @@ static BlockDriverAIOCB *rbd_start_aio(BlockDriverState *bs,

 failed:
    g_free(rcb);
+    s->qemu_aio_count--;
    qemu_aio_release(acb);
    return NULL;
 }
@@ -807,31 +871,12 @@ static int qemu_rbd_snap_create(BlockDriverState *bs,
 }

 static int qemu_rbd_snap_remove(BlockDriverState *bs,
-                                const char *snapshot_id,
-                                const char *snapshot_name,
-                                Error **errp)
+                                const char *snapshot_name)
 {
    BDRVRBDState *s = bs->opaque;
    int r;

-    if (!snapshot_name) {
-        error_setg(errp, "rbd need a valid snapshot name");
-        return -EINVAL;
-    }
-
-    /* If snapshot_id is specified, it must be equal to name, see
-       qemu_rbd_snap_list() */
-    if (snapshot_id && strcmp(snapshot_id, snapshot_name)) {
-        error_setg(errp,
-                   "rbd do not support snapshot id, it should be NULL or "
-                   "equal to snapshot name");
-        return -EINVAL;
-    }
-
    r = rbd_snap_remove(s->image, snapshot_name);
-    if (r < 0) {
-        error_setg_errno(errp, -r, "Failed to remove the snapshot");
-    }
    return r;
 }

@@ -857,7 +902,7 @@ static int qemu_rbd_snap_list(BlockDriverState *bs,
    do {
        snaps = g_malloc(sizeof(*snaps) * max_snaps);
        snap_count = rbd_snap_list(s->image, snaps, &max_snaps);
-        if (snap_count <= 0) {
+        if (snap_count < 0) {
            g_free(snaps);
        }
    } while (snap_count == -ERANGE);
@@ -881,7 +926,6 @@ static int qemu_rbd_snap_list(BlockDriverState *bs,
        sn_info->vm_clock_nsec = 0;
    }
    rbd_snap_list_end(snaps);
-    g_free(snaps);

 done:
    *psn_tab = sn_tab;
@@ -917,11 +961,9 @@ static QEMUOptionParameter qemu_rbd_create_options[] = {
 static BlockDriver bdrv_rbd = {
    .format_name        = "rbd",
    .instance_size      = sizeof(BDRVRBDState),
-    .bdrv_needs_filename = true,
    .bdrv_file_open     = qemu_rbd_open,
    .bdrv_close         = qemu_rbd_close,
    .bdrv_create        = qemu_rbd_create,
-    .bdrv_has_zero_init = bdrv_has_zero_init_1,
    .bdrv_get_info      = qemu_rbd_getinfo,
    .create_options     = qemu_rbd_create_options,
    .bdrv_getlength     = qemu_rbd_getlength,
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
--- a/block/snapshot.c
+++ b/block/snapshot.c
@@ -1,353 +0,0 @@
-/*
- * Block layer snapshot related functions
- *
- * Copyright (c) 2003-2008 Fabrice Bellard
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-#include "block/snapshot.h"
-#include "block/block_int.h"
-
-QemuOptsList internal_snapshot_opts = {
-    .name = "snapshot",
-    .head = QTAILQ_HEAD_INITIALIZER(internal_snapshot_opts.head),
-    .desc = {
-        {
-            .name = SNAPSHOT_OPT_ID,
-            .type = QEMU_OPT_STRING,
-            .help = "snapshot id"
-        },{
-            .name = SNAPSHOT_OPT_NAME,
-            .type = QEMU_OPT_STRING,
-            .help = "snapshot name"
-        },{
-            /* end of list */
-        }
-    },
-};
-
-int bdrv_snapshot_find(BlockDriverState *bs, QEMUSnapshotInfo *sn_info,
-                       const char *name)
-{
-    QEMUSnapshotInfo *sn_tab, *sn;
-    int nb_sns, i, ret;
-
-    ret = -ENOENT;
-    nb_sns = bdrv_snapshot_list(bs, &sn_tab);
-    if (nb_sns < 0) {
-        return ret;
-    }
-    for (i = 0; i < nb_sns; i++) {
-        sn = &sn_tab[i];
-        if (!strcmp(sn->id_str, name) || !strcmp(sn->name, name)) {
-            *sn_info = *sn;
-            ret = 0;
-            break;
-        }
-    }
-    g_free(sn_tab);
-    return ret;
-}
-
-/**
- * Look up an internal snapshot by @id and @name.
- * @bs: block device to search
- * @id: unique snapshot ID, or NULL
- * @name: snapshot name, or NULL
- * @sn_info: location to store information on the snapshot found
- * @errp: location to store error, will be set only for exception
- *
- * This function will traverse snapshot list in @bs to search the matching
- * one, @id and @name are the matching condition:
- * If both @id and @name are specified, find the first one with id @id and
- * name @name.
- * If only @id is specified, find the first one with id @id.
- * If only @name is specified, find the first one with name @name.
- * if none is specified, abort().
- *
- * Returns: true when a snapshot is found and @sn_info will be filled, false
- * when error or not found. If all operation succeed but no matching one is
- * found, @errp will NOT be set.
- */
-bool bdrv_snapshot_find_by_id_and_name(BlockDriverState *bs,
-                                       const char *id,
-                                       const char *name,
-                                       QEMUSnapshotInfo *sn_info,
-                                       Error **errp)
-{
-    QEMUSnapshotInfo *sn_tab, *sn;
-    int nb_sns, i;
-    bool ret = false;
-
-    assert(id || name);
-
-    nb_sns = bdrv_snapshot_list(bs, &sn_tab);
-    if (nb_sns < 0) {
-        error_setg_errno(errp, -nb_sns, "Failed to get a snapshot list");
-        return false;
-    } else if (nb_sns == 0) {
-        return false;
-    }
-
-    if (id && name) {
-        for (i = 0; i < nb_sns; i++) {
-            sn = &sn_tab[i];
-            if (!strcmp(sn->id_str, id) && !strcmp(sn->name, name)) {
-                *sn_info = *sn;
-                ret = true;
-                break;
-            }
-        }
-    } else if (id) {
-        for (i = 0; i < nb_sns; i++) {
-            sn = &sn_tab[i];
-            if (!strcmp(sn->id_str, id)) {
-                *sn_info = *sn;
-                ret = true;
-                break;
-            }
-        }
-    } else if (name) {
-        for (i = 0; i < nb_sns; i++) {
-            sn = &sn_tab[i];
-            if (!strcmp(sn->name, name)) {
-                *sn_info = *sn;
-                ret = true;
-                break;
-            }
-        }
-    }
-
-    g_free(sn_tab);
-    return ret;
-}
-
-int bdrv_can_snapshot(BlockDriverState *bs)
-{
-    BlockDriver *drv = bs->drv;
-    if (!drv || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
-        return 0;
-    }
-
-    if (!drv->bdrv_snapshot_create) {
-        if (bs->file != NULL) {
-            return bdrv_can_snapshot(bs->file);
-        }
-        return 0;
-    }
-
-    return 1;
-}
-
-int bdrv_snapshot_create(BlockDriverState *bs,
-                         QEMUSnapshotInfo *sn_info)
-{
-    BlockDriver *drv = bs->drv;
-    if (!drv) {
-        return -ENOMEDIUM;
-    }
-    if (drv->bdrv_snapshot_create) {
-        return drv->bdrv_snapshot_create(bs, sn_info);
-    }
-    if (bs->file) {
-        return bdrv_snapshot_create(bs->file, sn_info);
-    }
-    return -ENOTSUP;
-}
-
-int bdrv_snapshot_goto(BlockDriverState *bs,
-                       const char *snapshot_id)
-{
-    BlockDriver *drv = bs->drv;
-    int ret, open_ret;
-
-    if (!drv) {
-        return -ENOMEDIUM;
-    }
-    if (drv->bdrv_snapshot_goto) {
-        return drv->bdrv_snapshot_goto(bs, snapshot_id);
-    }
-
-    if (bs->file) {
-        drv->bdrv_close(bs);
-        ret = bdrv_snapshot_goto(bs->file, snapshot_id);
-        open_ret = drv->bdrv_open(bs, NULL, bs->open_flags, NULL);
-        if (open_ret < 0) {
-            bdrv_unref(bs->file);
-            bs->drv = NULL;
-            return open_ret;
-        }
-        return ret;
-    }
-
-    return -ENOTSUP;
-}
-
-/**
- * Delete an internal snapshot by @snapshot_id and @name.
- * @bs: block device used in the operation
- * @snapshot_id: unique snapshot ID, or NULL
- * @name: snapshot name, or NULL
- * @errp: location to store error
- *
- * If both @snapshot_id and @name are specified, delete the first one with
- * id @snapshot_id and name @name.
- * If only @snapshot_id is specified, delete the first one with id
- * @snapshot_id.
- * If only @name is specified, delete the first one with name @name.
- * if none is specified, return -EINVAL.
- *
- * Returns: 0 on success, -errno on failure. If @bs is not inserted, return
- * -ENOMEDIUM. If @snapshot_id and @name are both NULL, return -EINVAL. If @bs
- * does not support internal snapshot deletion, return -ENOTSUP. If @bs does
- * not support parameter @snapshot_id or @name, or one of them is not correctly
- * specified, return -EINVAL. If @bs can't find one matching @id and @name,
- * return -ENOENT. If @errp != NULL, it will always be filled with error
- * message on failure.
- */
-int bdrv_snapshot_delete(BlockDriverState *bs,
-                         const char *snapshot_id,
-                         const char *name,
-                         Error **errp)
-{
-    BlockDriver *drv = bs->drv;
-    if (!drv) {
-        error_set(errp, QERR_DEVICE_HAS_NO_MEDIUM, bdrv_get_device_name(bs));
-        return -ENOMEDIUM;
-    }
-    if (!snapshot_id && !name) {
-        error_setg(errp, "snapshot_id and name are both NULL");
-        return -EINVAL;
-    }
-    if (drv->bdrv_snapshot_delete) {
-        return drv->bdrv_snapshot_delete(bs, snapshot_id, name, errp);
-    }
-    if (bs->file) {
-        return bdrv_snapshot_delete(bs->file, snapshot_id, name, errp);
-    }
-    error_set(errp, QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
-              drv->format_name, bdrv_get_device_name(bs),
-              "internal snapshot deletion");
-    return -ENOTSUP;
-}
-
-void bdrv_snapshot_delete_by_id_or_name(BlockDriverState *bs,
-                                        const char *id_or_name,
-                                        Error **errp)
-{
-    int ret;
-    Error *local_err = NULL;
-
-    ret = bdrv_snapshot_delete(bs, id_or_name, NULL, &local_err);
-    if (ret == -ENOENT || ret == -EINVAL) {
-        error_free(local_err);
-        local_err = NULL;
-        ret = bdrv_snapshot_delete(bs, NULL, id_or_name, &local_err);
-    }
-
-    if (ret < 0) {
-        error_propagate(errp, local_err);
-    }
-}
-
-int bdrv_snapshot_list(BlockDriverState *bs,
-                       QEMUSnapshotInfo **psn_info)
-{
-    BlockDriver *drv = bs->drv;
-    if (!drv) {
-        return -ENOMEDIUM;
-    }
-    if (drv->bdrv_snapshot_list) {
-        return drv->bdrv_snapshot_list(bs, psn_info);
-    }
-    if (bs->file) {
-        return bdrv_snapshot_list(bs->file, psn_info);
-    }
-    return -ENOTSUP;
-}
-
-/**
- * Temporarily load an internal snapshot by @snapshot_id and @name.
- * @bs: block device used in the operation
- * @snapshot_id: unique snapshot ID, or NULL
- * @name: snapshot name, or NULL
- * @errp: location to store error
- *
- * If both @snapshot_id and @name are specified, load the first one with
- * id @snapshot_id and name @name.
- * If only @snapshot_id is specified, load the first one with id
- * @snapshot_id.
- * If only @name is specified, load the first one with name @name.
- * if none is specified, return -EINVAL.
- *
- * Returns: 0 on success, -errno on fail. If @bs is not inserted, return
- * -ENOMEDIUM. If @bs is not readonly, return -EINVAL. If @bs did not support
- * internal snapshot, return -ENOTSUP. If qemu can't find a matching @id and
- * @name, return -ENOENT. If @errp != NULL, it will always be filled on
- * failure.
- */
-int bdrv_snapshot_load_tmp(BlockDriverState *bs,
-                           const char *snapshot_id,
-                           const char *name,
-                           Error **errp)
-{
-    BlockDriver *drv = bs->drv;
-
-    if (!drv) {
-        error_set(errp, QERR_DEVICE_HAS_NO_MEDIUM, bdrv_get_device_name(bs));
-        return -ENOMEDIUM;
-    }
-    if (!snapshot_id && !name) {
-        error_setg(errp, "snapshot_id and name are both NULL");
-        return -EINVAL;
-    }
-    if (!bs->read_only) {
-        error_setg(errp, "Device is not readonly");
-        return -EINVAL;
-    }
-    if (drv->bdrv_snapshot_load_tmp) {
-        return drv->bdrv_snapshot_load_tmp(bs, snapshot_id, name, errp);
-    }
-    error_set(errp, QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
-              drv->format_name, bdrv_get_device_name(bs),
-              "temporarily load internal snapshot");
-    return -ENOTSUP;
-}
-
-int bdrv_snapshot_load_tmp_by_id_or_name(BlockDriverState *bs,
-                                         const char *id_or_name,
-                                         Error **errp)
-{
-    int ret;
-    Error *local_err = NULL;
-
-    ret = bdrv_snapshot_load_tmp(bs, id_or_name, NULL, &local_err);
-    if (ret == -ENOENT || ret == -EINVAL) {
-        error_free(local_err);
-        local_err = NULL;
-        ret = bdrv_snapshot_load_tmp(bs, NULL, id_or_name, &local_err);
-    }
-
-    if (local_err) {
-        error_propagate(errp, local_err);
-    }
-
-    return ret;
-}
--- a/block/ssh.c
+++ b/block/ssh.c
--- a/block/stream.c
+++ b/block/stream.c
@@ -57,11 +57,6 @@ static void close_unused_images(BlockDriverState *top, BlockDriverState *base,
    BlockDriverState *intermediate;
    intermediate = top->backing_hd;

-    /* Must assign before bdrv_delete() to prevent traversing dangling pointer
-     * while we delete backing image instances.
-     */
-    top->backing_hd = base;
-
    while (intermediate) {
        BlockDriverState *unused;

@@ -73,10 +68,9 @@ static void close_unused_images(BlockDriverState *top, BlockDriverState *base,
        unused = intermediate;
        intermediate = intermediate->backing_hd;
        unused->backing_hd = NULL;
-        bdrv_unref(unused);
+        bdrv_delete(unused);
    }
-
-    bdrv_refresh_limits(top);
+    top->backing_hd = base;
 }

 static void coroutine_fn stream_run(void *opaque)
@@ -90,11 +84,6 @@ static void coroutine_fn stream_run(void *opaque)
    int n = 0;
    void *buf;

-    if (!bs->backing_hd) {
-        block_job_completed(&s->common, 0);
-        return;
-    }
-
    s->common.len = bdrv_getlength(bs);
    if (s->common.len < 0) {
        block_job_completed(&s->common, s->common.len);
@@ -121,22 +110,21 @@ wait:
        /* Note that even when no rate limit is applied we need to yield
         * with no pending I/O here so that bdrv_drain_all() returns.
         */
-        block_job_sleep_ns(&s->common, QEMU_CLOCK_REALTIME, delay_ns);
+        block_job_sleep_ns(&s->common, rt_clock, delay_ns);
        if (block_job_is_cancelled(&s->common)) {
            break;
        }

-        copy = false;
-
-        ret = bdrv_is_allocated(bs, sector_num,
-                                STREAM_BUFFER_SIZE / BDRV_SECTOR_SIZE, &n);
+        ret = bdrv_co_is_allocated(bs, sector_num,
+                                   STREAM_BUFFER_SIZE / BDRV_SECTOR_SIZE, &n);
        if (ret == 1) {
            /* Allocated in the top, no need to copy.  */
-        } else if (ret >= 0) {
+            copy = false;
+        } else {
            /* Copy if allocated in the intermediate images.  Limit to the
             * known-unallocated area [sector_num, sector_num+n).  */
-            ret = bdrv_is_allocated_above(bs->backing_hd, base,
-                                          sector_num, n, &n);
+            ret = bdrv_co_is_allocated_above(bs->backing_hd, base,
+                                             sector_num, n, &n);

            /* Finish early if end of backing file has been reached */
            if (ret == 0 && n == 0) {
@@ -146,7 +134,7 @@ wait:
            copy = (ret == 1);
        }
        trace_stream_one_iteration(s, sector_num, n, ret);
-        if (copy) {
+        if (ret >= 0 && copy) {
            if (s->common.speed) {
                delay_ns = ratelimit_calculate_delay(&s->limit, n);
                if (delay_ns > 0) {
@@ -210,9 +198,9 @@ static void stream_set_speed(BlockJob *job, int64_t speed, Error **errp)
    ratelimit_set_speed(&s->limit, speed / BDRV_SECTOR_SIZE, SLICE_TIME);
 }

-static const BlockJobDriver stream_job_driver = {
+static BlockJobType stream_job_type = {
    .instance_size = sizeof(StreamBlockJob),
-    .job_type      = BLOCK_JOB_TYPE_STREAM,
+    .job_type      = "stream",
    .set_speed     = stream_set_speed,
 };

@@ -231,7 +219,7 @@ void stream_start(BlockDriverState *bs, BlockDriverState *base,
        return;
    }

-    s = block_job_create(&stream_job_driver, bs, speed, cb, opaque, errp);
+    s = block_job_create(&stream_job_type, bs, speed, cb, opaque, errp);
    if (!s) {
        return;
    }
--- a/block/tar.c
+++ b/block/tar.c
@@ -0,0 +1,365 @@
+/*
+ * Tar block driver
+ *
+ * Copyright (c) 2009 Alexander Graf <agraf@suse.de>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu-common.h"
+#include "block/block_int.h"
+
+// #define DEBUG
+
+#ifdef DEBUG
+#define dprintf(fmt, ...) do { printf("tar: " fmt, ## __VA_ARGS__); } while (0)
+#else
+#define dprintf(fmt, ...) do { } while (0)
+#endif
+
+#define SECTOR_SIZE      512
+
+#define POSIX_TAR_MAGIC  "ustar"
+#define OFFS_LENGTH      0x7c
+#define OFFS_TYPE        0x9c
+#define OFFS_MAGIC       0x101
+
+#define OFFS_S_SP        0x182
+#define OFFS_S_EXT       0x1e2
+#define OFFS_S_LENGTH    0x1e3
+#define OFFS_SX_EXT      0x1f8
+
+typedef struct SparseCache {
+    uint64_t start;
+    uint64_t end;
+} SparseCache;
+
+typedef struct BDRVTarState {
+    BlockDriverState *hd;
+    size_t file_sec;
+    uint64_t file_len;
+    SparseCache *sparse;
+    int sparse_num;
+    uint64_t last_end;
+    char longfile[2048];
+} BDRVTarState;
+
+static int tar_probe(const uint8_t *buf, int buf_size, const char *filename)
+{
+    if (buf_size < OFFS_MAGIC + 5)
+        return 0;
+
+    /* we only support newer tar */
+    if (!strncmp((char*)buf + OFFS_MAGIC, POSIX_TAR_MAGIC, 5))
+        return 100;
+
+    return 0;
+}
+
+static int str_ends(char *str, const char *end)
+{
+    int end_len = strlen(end);
+    int str_len = strlen(str);
+
+    if (str_len < end_len)
+        return 0;
+
+    return !strncmp(str + str_len - end_len, end, end_len);
+}
+
+static int is_target_file(BlockDriverState *bs, char *filename,
+                          char *header)
+{
+    int retval = 0;
+
+    if (str_ends(filename, ".raw"))
+        retval = 1;
+
+    if (str_ends(filename, ".qcow"))
+        retval = 1;
+
+    if (str_ends(filename, ".qcow2"))
+        retval = 1;
+
+    if (str_ends(filename, ".vmdk"))
+        retval = 1;
+
+    if (retval &&
+        (header[OFFS_TYPE] != '0') &&
+        (header[OFFS_TYPE] != 'S')) {
+        retval = 0;
+    }
+
+    dprintf("does filename %s match? %s\n", filename, retval ? "yes" : "no");
+
+    /* make sure we're not using this name again */
+    filename[0] = '\0';
+
+    return retval;
+}
+
+static uint64_t tar2u64(char *ptr)
+{
+    uint64_t retval;
+    char oldend = ptr[12];
+
+    ptr[12] = '\0';
+    if (*ptr & 0x80) {
+        /* XXX we only support files up to 64 bit length */
+        retval = be64_to_cpu(*(uint64_t *)(ptr+4));
+        dprintf("Convert %lx -> %#lx\n", *(uint64_t*)(ptr+4), retval);
+    } else {
+        retval = strtol(ptr, NULL, 8);
+        dprintf("Convert %s -> %#lx\n", ptr, retval);
+    }
+
+    ptr[12] = oldend;
+
+    return retval;
+}
+
+static void tar_sparse(BDRVTarState *s, uint64_t offs, uint64_t len)
+{
+    SparseCache *sparse;
+
+    if (!len)
+        return;
+    if (!(offs - s->last_end)) {
+        s->last_end += len;
+        return;
+    }
+    if (s->last_end > offs)
+        return;
+
+    dprintf("Last chunk until %lx new chunk at %lx\n", s->last_end, offs);
+
+    s->sparse = g_realloc(s->sparse, (s->sparse_num + 1) * sizeof(SparseCache));
+    sparse = &s->sparse[s->sparse_num];
+    sparse->start = s->last_end;
+    sparse->end = offs;
+    s->last_end = offs + len;
+    s->sparse_num++;
+    dprintf("Sparse at %lx end=%lx\n", sparse->start,
+                                       sparse->end);
+}
+
+static int tar_open(BlockDriverState *bs, const char *filename, int flags)
+{
+    BDRVTarState *s = bs->opaque;
+    char header[SECTOR_SIZE];
+    char *real_file = header;
+    char *magic;
+    const char *fname = filename;
+    size_t header_offs = 0;
+    int ret;
+
+    if (!strncmp(filename, "tar://", 6))
+        fname += 6;
+    else if (!strncmp(filename, "tar:", 4))
+        fname += 4;
+
+    ret = bdrv_file_open(&s->hd, fname, flags);
+    if (ret < 0)
+        return ret;
+
+    /* Search the file for an image */
+
+    do {
+        /* tar header */
+        if (bdrv_pread(s->hd, header_offs, header, SECTOR_SIZE) != SECTOR_SIZE)
+            goto fail;
+
+        if ((header_offs > 1) && !header[0]) {
+            fprintf(stderr, "Tar: No image file found in archive\n");
+            goto fail;
+        }
+
+        magic = &header[OFFS_MAGIC];
+        if (strncmp(magic, POSIX_TAR_MAGIC, 5)) {
+            fprintf(stderr, "Tar: Invalid magic: %s\n", magic);
+            goto fail;
+        }
+
+        dprintf("file type: %c\n", header[OFFS_TYPE]);
+
+        /* file length*/
+        s->file_len = (tar2u64(&header[OFFS_LENGTH]) + (SECTOR_SIZE - 1)) &
+                      ~(SECTOR_SIZE - 1);
+        s->file_sec = (header_offs / SECTOR_SIZE) + 1;
+
+        header_offs += s->file_len + SECTOR_SIZE;
+
+        if (header[OFFS_TYPE] == 'L') {
+            bdrv_pread(s->hd, header_offs - s->file_len, s->longfile,
+                       sizeof(s->longfile));
+            s->longfile[sizeof(s->longfile)-1] = '\0';
+            real_file = header;
+        } else if (s->longfile[0]) {
+            real_file = s->longfile;
+        } else {
+            real_file = header;
+        }
+    } while(!is_target_file(bs, real_file, header));
+
+    /* We found an image! */
+
+    if (header[OFFS_TYPE] == 'S') {
+        uint8_t isextended;
+        int i;
+
+        for (i = OFFS_S_SP; i < (OFFS_S_SP + (4 * 24)); i += 24)
+            tar_sparse(s, tar2u64(&header[i]), tar2u64(&header[i+12]));
+
+        s->file_len = tar2u64(&header[OFFS_S_LENGTH]);
+        isextended = header[OFFS_S_EXT];
+
+        while (isextended) {
+            if (bdrv_pread(s->hd, s->file_sec * SECTOR_SIZE, header,
+                           SECTOR_SIZE) != SECTOR_SIZE)
+                goto fail;
+
+            for (i = 0; i < (21 * 24); i += 24)
+                tar_sparse(s, tar2u64(&header[i]), tar2u64(&header[i+12]));
+            isextended = header[OFFS_SX_EXT];
+            s->file_sec++;
+        }
+        tar_sparse(s, s->file_len, 1);
+    }
+
+    return 0;
+
+fail:
+    fprintf(stderr, "Tar: Error opening file\n");
+    bdrv_delete(s->hd);
+    return -EINVAL;
+}
+
+typedef struct TarAIOCB {
+    BlockDriverAIOCB common;
+    QEMUBH *bh;
+} TarAIOCB;
+
+/* This callback gets invoked when we have pure sparseness */
+static void tar_sparse_cb(void *opaque)
+{
+    TarAIOCB *acb = (TarAIOCB *)opaque;
+
+    acb->common.cb(acb->common.opaque, 0);
+    qemu_bh_delete(acb->bh);
+    qemu_aio_release(acb);
+}
+
+static void tar_aio_cancel(BlockDriverAIOCB *blockacb)
+{
+}
+
+static AIOCBInfo tar_aiocb_info = {
+    .aiocb_size         = sizeof(TarAIOCB),
+    .cancel             = tar_aio_cancel,
+};
+
+/* This is where we get a request from a caller to read something */
+static BlockDriverAIOCB *tar_aio_readv(BlockDriverState *bs,
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+        BlockDriverCompletionFunc *cb, void *opaque)
+{
+    BDRVTarState *s = bs->opaque;
+    SparseCache *sparse;
+    int64_t sec_file = sector_num + s->file_sec;
+    int64_t start = sector_num * SECTOR_SIZE;
+    int64_t end = start + (nb_sectors * SECTOR_SIZE);
+    int i;
+    TarAIOCB *acb;
+
+    for (i = 0; i < s->sparse_num; i++) {
+        sparse = &s->sparse[i];
+        if (sparse->start > end) {
+            /* We expect the cache to be start increasing */
+            break;
+        } else if ((sparse->start < start) && (sparse->end <= start)) {
+            /* sparse before our offset */
+            sec_file -= (sparse->end - sparse->start) / SECTOR_SIZE;
+        } else if ((sparse->start <= start) && (sparse->end >= end)) {
+            /* all our sectors are sparse */
+            char *buf = g_malloc0(nb_sectors * SECTOR_SIZE);
+
+            acb = qemu_aio_get(&tar_aiocb_info, bs, cb, opaque);
+            qemu_iovec_from_buf(qiov, 0, buf, nb_sectors * SECTOR_SIZE);
+            g_free(buf);
+            acb->bh = qemu_bh_new(tar_sparse_cb, acb);
+            qemu_bh_schedule(acb->bh);
+
+            return &acb->common;
+        } else if (((sparse->start >= start) && (sparse->start < end)) ||
+                   ((sparse->end >= start) && (sparse->end < end))) {
+            /* we're semi-sparse (worst case) */
+            /* let's go synchronous and read all sectors individually */
+            char *buf = g_malloc(nb_sectors * SECTOR_SIZE);
+            uint64_t offs;
+
+            for (offs = 0; offs < (nb_sectors * SECTOR_SIZE);
+                 offs += SECTOR_SIZE) {
+                bdrv_pread(bs, (sector_num * SECTOR_SIZE) + offs,
+                           buf + offs, SECTOR_SIZE);
+            }
+
+            qemu_iovec_from_buf(qiov, 0, buf, nb_sectors * SECTOR_SIZE);
+            acb = qemu_aio_get(&tar_aiocb_info, bs, cb, opaque);
+            acb->bh = qemu_bh_new(tar_sparse_cb, acb);
+            qemu_bh_schedule(acb->bh);
+
+            return &acb->common;
+        }
+    }
+
+    return bdrv_aio_readv(s->hd, sec_file, qiov, nb_sectors,
+                          cb, opaque);
+}
+
+static void tar_close(BlockDriverState *bs)
+{
+    dprintf("Close\n");
+}
+
+static int64_t tar_getlength(BlockDriverState *bs)
+{
+    BDRVTarState *s = bs->opaque;
+    dprintf("getlength -> %ld\n", s->file_len);
+    return s->file_len;
+}
+
+static BlockDriver bdrv_tar = {
+    .format_name     = "tar",
+    .protocol_name   = "tar",
+
+    .instance_size   = sizeof(BDRVTarState),
+    .bdrv_file_open  = tar_open,
+    .bdrv_close      = tar_close,
+    .bdrv_getlength  = tar_getlength,
+    .bdrv_probe      = tar_probe,
+
+    .bdrv_aio_readv  = tar_aio_readv,
+};
+
+static void tar_block_init(void)
+{
+    bdrv_register(&bdrv_tar);
+}
+
+block_init(tar_block_init);
--- a/block/vdi.c
+++ b/block/vdi.c
@@ -120,6 +120,11 @@ typedef unsigned char uuid_t[16];

 #define VDI_IS_ALLOCATED(X) ((X) < VDI_DISCARDED)

+/* max blocks in image is (0xffffffff / 4) */
+#define VDI_BLOCKS_IN_IMAGE_MAX  0x3fffffff
+#define VDI_DISK_SIZE_MAX        ((uint64_t)VDI_BLOCKS_IN_IMAGE_MAX * \
+                                  (uint64_t)DEFAULT_CLUSTER_SIZE)
+
 #if !defined(CONFIG_UUID)
 static inline void uuid_generate(uuid_t out)
 {
@@ -165,7 +170,7 @@ typedef struct {
    uuid_t uuid_link;
    uuid_t uuid_parent;
    uint64_t unused2[7];
-} QEMU_PACKED VdiHeader;
+} VdiHeader;

 typedef struct {
    /* The block map entries are little endian (even in memory). */
@@ -331,7 +336,6 @@ static int vdi_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
    logout("\n");
    bdi->cluster_size = s->block_size;
    bdi->vm_state_offset = 0;
-    bdi->unallocated_blocks_are_zero = true;
    return 0;
 }

@@ -365,8 +369,7 @@ static int vdi_probe(const uint8_t *buf, int buf_size, const char *filename)
    return result;
 }

-static int vdi_open(BlockDriverState *bs, QDict *options, int flags,
-                    Error **errp)
+static int vdi_open(BlockDriverState *bs, int flags)
 {
    BDRVVdiState *s = bs->opaque;
    VdiHeader header;
@@ -385,6 +388,14 @@ static int vdi_open(BlockDriverState *bs, QDict *options, int flags,
    vdi_header_print(&header);
 #endif

+    if (header.disk_size > VDI_DISK_SIZE_MAX) {
+        logout("Unsupported VDI image size (size is 0x%" PRIx64
+               ", max supported is 0x%" PRIx64 ")\n",
+               header.disk_size, VDI_DISK_SIZE_MAX);
+        ret = -ENOTSUP;
+        goto fail;
+    }
+
    if (header.disk_size % SECTOR_SIZE != 0) {
        /* 'VBoxManage convertfromraw' can create images with odd disk sizes.
           We accept them but round the disk size to the next multiple of
@@ -395,50 +406,49 @@ static int vdi_open(BlockDriverState *bs, QDict *options, int flags,
    }

    if (header.signature != VDI_SIGNATURE) {
-        error_setg(errp, "Image not in VDI format (bad signature %08x)", header.signature);
-        ret = -EINVAL;
+        logout("bad vdi signature %08x\n", header.signature);
+        ret = -EMEDIUMTYPE;
        goto fail;
    } else if (header.version != VDI_VERSION_1_1) {
-        error_setg(errp, "unsupported VDI image (version %u.%u)",
-                   header.version >> 16, header.version & 0xffff);
+        logout("unsupported version %u.%u\n",
+               header.version >> 16, header.version & 0xffff);
        ret = -ENOTSUP;
        goto fail;
    } else if (header.offset_bmap % SECTOR_SIZE != 0) {
        /* We only support block maps which start on a sector boundary. */
-        error_setg(errp, "unsupported VDI image (unaligned block map offset "
-                   "0x%x)", header.offset_bmap);
+        logout("unsupported block map offset 0x%x B\n", header.offset_bmap);
        ret = -ENOTSUP;
        goto fail;
    } else if (header.offset_data % SECTOR_SIZE != 0) {
        /* We only support data blocks which start on a sector boundary. */
-        error_setg(errp, "unsupported VDI image (unaligned data offset 0x%x)",
-                   header.offset_data);
+        logout("unsupported data offset 0x%x B\n", header.offset_data);
        ret = -ENOTSUP;
        goto fail;
    } else if (header.sector_size != SECTOR_SIZE) {
-        error_setg(errp, "unsupported VDI image (sector size %u is not %u)",
-                   header.sector_size, SECTOR_SIZE);
+        logout("unsupported sector size %u B\n", header.sector_size);
        ret = -ENOTSUP;
        goto fail;
-    } else if (header.block_size != 1 * MiB) {
-        error_setg(errp, "unsupported VDI image (sector size %u is not %u)",
-                   header.block_size, 1 * MiB);
+    } else if (header.block_size != DEFAULT_CLUSTER_SIZE) {
+        logout("unsupported VDI image (block size %u is not %u)\n",
+               header.block_size, DEFAULT_CLUSTER_SIZE);
        ret = -ENOTSUP;
        goto fail;
    } else if (header.disk_size >
               (uint64_t)header.blocks_in_image * header.block_size) {
-        error_setg(errp, "unsupported VDI image (disk size %" PRIu64 ", "
-                   "image bitmap has room for %" PRIu64 ")",
-                   header.disk_size,
-                   (uint64_t)header.blocks_in_image * header.block_size);
+        logout("unsupported disk size %" PRIu64 " B\n", header.disk_size);
        ret = -ENOTSUP;
        goto fail;
    } else if (!uuid_is_null(header.uuid_link)) {
-        error_setg(errp, "unsupported VDI image (non-NULL link UUID)");
+        logout("link uuid != 0, unsupported\n");
        ret = -ENOTSUP;
        goto fail;
    } else if (!uuid_is_null(header.uuid_parent)) {
-        error_setg(errp, "unsupported VDI image (non-NULL parent UUID)");
+        logout("parent uuid != 0, unsupported\n");
+        ret = -ENOTSUP;
+        goto fail;
+    } else if (header.blocks_in_image > VDI_BLOCKS_IN_IMAGE_MAX) {
+        logout("unsupported VDI image (too many blocks %u, max is %u)\n",
+               header.blocks_in_image, VDI_BLOCKS_IN_IMAGE_MAX);
        ret = -ENOTSUP;
        goto fail;
    }
@@ -479,7 +489,7 @@ static int vdi_reopen_prepare(BDRVReopenState *state,
    return 0;
 }

-static int64_t coroutine_fn vdi_co_get_block_status(BlockDriverState *bs,
+static int coroutine_fn vdi_co_is_allocated(BlockDriverState *bs,
        int64_t sector_num, int nb_sectors, int *pnum)
 {
    /* TODO: Check for too large sector_num (in bdrv_is_allocated or here). */
@@ -488,23 +498,12 @@ static int64_t coroutine_fn vdi_co_get_block_status(BlockDriverState *bs,
    size_t sector_in_block = sector_num % s->block_sectors;
    int n_sectors = s->block_sectors - sector_in_block;
    uint32_t bmap_entry = le32_to_cpu(s->bmap[bmap_index]);
-    uint64_t offset;
-    int result;
-
    logout("%p, %" PRId64 ", %d, %p\n", bs, sector_num, nb_sectors, pnum);
    if (n_sectors > nb_sectors) {
        n_sectors = nb_sectors;
    }
    *pnum = n_sectors;
-    result = VDI_IS_ALLOCATED(bmap_entry);
-    if (!result) {
-        return 0;
-    }
-
-    offset = s->header.offset_data +
-                              (uint64_t)bmap_entry * s->block_size +
-                              sector_in_block * SECTOR_SIZE;
-    return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | offset;
+    return VDI_IS_ALLOCATED(bmap_entry);
 }

 static int vdi_co_read(BlockDriverState *bs,
@@ -653,8 +652,7 @@ static int vdi_co_write(BlockDriverState *bs,
    return ret;
 }

-static int vdi_create(const char *filename, QEMUOptionParameter *options,
-                      Error **errp)
+static int vdi_create(const char *filename, QEMUOptionParameter *options)
 {
    int fd;
    int result = 0;
@@ -689,11 +687,20 @@ static int vdi_create(const char *filename, QEMUOptionParameter *options,
        options++;
    }

+    if (bytes > VDI_DISK_SIZE_MAX) {
+        result = -ENOTSUP;
+        logout("Unsupported VDI image size (size is 0x%" PRIx64
+               ", max supported is 0x%" PRIx64 ")\n",
+               bytes, VDI_DISK_SIZE_MAX);
+        goto exit;
+    }
+
    fd = qemu_open(filename,
                   O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE,
                   0644);
    if (fd < 0) {
-        return -errno;
+        result = -errno;
+        goto exit;
    }

    /* We need enough blocks to store the given disk size,
@@ -754,6 +761,7 @@ static int vdi_create(const char *filename, QEMUOptionParameter *options,
        result = -errno;
    }

+exit:
    return result;
 }

@@ -800,8 +808,7 @@ static BlockDriver bdrv_vdi = {
    .bdrv_close = vdi_close,
    .bdrv_reopen_prepare = vdi_reopen_prepare,
    .bdrv_create = vdi_create,
-    .bdrv_has_zero_init = bdrv_has_zero_init_1,
-    .bdrv_co_get_block_status = vdi_co_get_block_status,
+    .bdrv_co_is_allocated = vdi_co_is_allocated,
    .bdrv_make_empty = vdi_make_empty,

    .bdrv_read = vdi_co_read,
--- a/block/vhdx-endian.c
+++ b/block/vhdx-endian.c
@@ -1,216 +0,0 @@
-/*
- * Block driver for Hyper-V VHDX Images
- *
- * Copyright (c) 2013 Red Hat, Inc.,
- *
- * Authors:
- *  Jeff Cody <jcody@redhat.com>
- *
- *  This is based on the "VHDX Format Specification v1.00", published 8/25/2012
- *  by Microsoft:
- *      https://www.microsoft.com/en-us/download/details.aspx?id=34750
- *
- * This work is licensed under the terms of the GNU LGPL, version 2 or later.
- * See the COPYING.LIB file in the top-level directory.
- *
- */
-
-#include "qemu-common.h"
-#include "block/block_int.h"
-#include "block/vhdx.h"
-
-#include <uuid/uuid.h>
-
-
-/*
- * All the VHDX formats on disk are little endian - the following
- * are helper import/export functions to correctly convert
- * endianness from disk read to native cpu format, and back again.
- */
-
-
-/* VHDX File Header */
-
-
-void vhdx_header_le_import(VHDXHeader *h)
-{
-    assert(h != NULL);
-
-    le32_to_cpus(&h->signature);
-    le32_to_cpus(&h->checksum);
-    le64_to_cpus(&h->sequence_number);
-
-    leguid_to_cpus(&h->file_write_guid);
-    leguid_to_cpus(&h->data_write_guid);
-    leguid_to_cpus(&h->log_guid);
-
-    le16_to_cpus(&h->log_version);
-    le16_to_cpus(&h->version);
-    le32_to_cpus(&h->log_length);
-    le64_to_cpus(&h->log_offset);
-}
-
-void vhdx_header_le_export(VHDXHeader *orig_h, VHDXHeader *new_h)
-{
-    assert(orig_h != NULL);
-    assert(new_h != NULL);
-
-    new_h->signature       = cpu_to_le32(orig_h->signature);
-    new_h->checksum        = cpu_to_le32(orig_h->checksum);
-    new_h->sequence_number = cpu_to_le64(orig_h->sequence_number);
-
-    new_h->file_write_guid = orig_h->file_write_guid;
-    new_h->data_write_guid = orig_h->data_write_guid;
-    new_h->log_guid        = orig_h->log_guid;
-
-    cpu_to_leguids(&new_h->file_write_guid);
-    cpu_to_leguids(&new_h->data_write_guid);
-    cpu_to_leguids(&new_h->log_guid);
-
-    new_h->log_version     = cpu_to_le16(orig_h->log_version);
-    new_h->version         = cpu_to_le16(orig_h->version);
-    new_h->log_length      = cpu_to_le32(orig_h->log_length);
-    new_h->log_offset      = cpu_to_le64(orig_h->log_offset);
-}
-
-
-/* VHDX Log Headers */
-
-
-void vhdx_log_desc_le_import(VHDXLogDescriptor *d)
-{
-    assert(d != NULL);
-
-    le32_to_cpus(&d->signature);
-    le32_to_cpus(&d->trailing_bytes);
-    le64_to_cpus(&d->leading_bytes);
-    le64_to_cpus(&d->file_offset);
-    le64_to_cpus(&d->sequence_number);
-}
-
-void vhdx_log_desc_le_export(VHDXLogDescriptor *d)
-{
-    assert(d != NULL);
-
-    cpu_to_le32s(&d->signature);
-    cpu_to_le32s(&d->trailing_bytes);
-    cpu_to_le64s(&d->leading_bytes);
-    cpu_to_le64s(&d->file_offset);
-    cpu_to_le64s(&d->sequence_number);
-}
-
-void vhdx_log_data_le_export(VHDXLogDataSector *d)
-{
-    assert(d != NULL);
-
-    cpu_to_le32s(&d->data_signature);
-    cpu_to_le32s(&d->sequence_high);
-    cpu_to_le32s(&d->sequence_low);
-}
-
-void vhdx_log_entry_hdr_le_import(VHDXLogEntryHeader *hdr)
-{
-    assert(hdr != NULL);
-
-    le32_to_cpus(&hdr->signature);
-    le32_to_cpus(&hdr->checksum);
-    le32_to_cpus(&hdr->entry_length);
-    le32_to_cpus(&hdr->tail);
-    le64_to_cpus(&hdr->sequence_number);
-    le32_to_cpus(&hdr->descriptor_count);
-    leguid_to_cpus(&hdr->log_guid);
-    le64_to_cpus(&hdr->flushed_file_offset);
-    le64_to_cpus(&hdr->last_file_offset);
-}
-
-void vhdx_log_entry_hdr_le_export(VHDXLogEntryHeader *hdr)
-{
-    assert(hdr != NULL);
-
-    cpu_to_le32s(&hdr->signature);
-    cpu_to_le32s(&hdr->checksum);
-    cpu_to_le32s(&hdr->entry_length);
-    cpu_to_le32s(&hdr->tail);
-    cpu_to_le64s(&hdr->sequence_number);
-    cpu_to_le32s(&hdr->descriptor_count);
-    cpu_to_leguids(&hdr->log_guid);
-    cpu_to_le64s(&hdr->flushed_file_offset);
-    cpu_to_le64s(&hdr->last_file_offset);
-}
-
-
-/* Region table entries */
-void vhdx_region_header_le_import(VHDXRegionTableHeader *hdr)
-{
-    assert(hdr != NULL);
-
-    le32_to_cpus(&hdr->signature);
-    le32_to_cpus(&hdr->checksum);
-    le32_to_cpus(&hdr->entry_count);
-}
-
-void vhdx_region_header_le_export(VHDXRegionTableHeader *hdr)
-{
-    assert(hdr != NULL);
-
-    cpu_to_le32s(&hdr->signature);
-    cpu_to_le32s(&hdr->checksum);
-    cpu_to_le32s(&hdr->entry_count);
-}
-
-void vhdx_region_entry_le_import(VHDXRegionTableEntry *e)
-{
-    assert(e != NULL);
-
-    leguid_to_cpus(&e->guid);
-    le64_to_cpus(&e->file_offset);
-    le32_to_cpus(&e->length);
-    le32_to_cpus(&e->data_bits);
-}
-
-void vhdx_region_entry_le_export(VHDXRegionTableEntry *e)
-{
-    assert(e != NULL);
-
-    cpu_to_leguids(&e->guid);
-    cpu_to_le64s(&e->file_offset);
-    cpu_to_le32s(&e->length);
-    cpu_to_le32s(&e->data_bits);
-}
-
-
-/* Metadata headers & table */
-void vhdx_metadata_header_le_import(VHDXMetadataTableHeader *hdr)
-{
-    assert(hdr != NULL);
-
-    le64_to_cpus(&hdr->signature);
-    le16_to_cpus(&hdr->entry_count);
-}
-
-void vhdx_metadata_header_le_export(VHDXMetadataTableHeader *hdr)
-{
-    assert(hdr != NULL);
-
-    cpu_to_le64s(&hdr->signature);
-    cpu_to_le16s(&hdr->entry_count);
-}
-
-void vhdx_metadata_entry_le_import(VHDXMetadataTableEntry *e)
-{
-    assert(e != NULL);
-
-    leguid_to_cpus(&e->item_id);
-    le32_to_cpus(&e->offset);
-    le32_to_cpus(&e->length);
-    le32_to_cpus(&e->data_bits);
-}
-void vhdx_metadata_entry_le_export(VHDXMetadataTableEntry *e)
-{
-    assert(e != NULL);
-
-    cpu_to_leguids(&e->item_id);
-    cpu_to_le32s(&e->offset);
-    cpu_to_le32s(&e->length);
-    cpu_to_le32s(&e->data_bits);
-}
--- a/block/vhdx-log.c
+++ b/block/vhdx-log.c
--- a/block/vhdx.c
+++ b/block/vhdx.c
--- a/block/vhdx.h
+++ b/block/vhdx.h
@@ -1,450 +0,0 @@
-/*
- * Block driver for Hyper-V VHDX Images
- *
- * Copyright (c) 2013 Red Hat, Inc.,
- *
- * Authors:
- *  Jeff Cody <jcody@redhat.com>
- *
- *  This is based on the "VHDX Format Specification v1.00", published 8/25/2012
- *  by Microsoft:
- *      https://www.microsoft.com/en-us/download/details.aspx?id=34750
- *
- * This work is licensed under the terms of the GNU LGPL, version 2 or later.
- * See the COPYING.LIB file in the top-level directory.
- *
- */
-
-#ifndef BLOCK_VHDX_H
-#define BLOCK_VHDX_H
-
-#define KiB              (1 * 1024)
-#define MiB            (KiB * 1024)
-#define GiB            (MiB * 1024)
-#define TiB ((uint64_t) GiB * 1024)
-
-/* Structures and fields present in the VHDX file */
-
-/* The header section has the following blocks,
- * each block is 64KB:
- *
- * _____________________________________________________________________________
- * | File Id. |   Header 1    | Header 2   | Region Table |  Reserved (768KB)  |
- * |----------|---------------|------------|--------------|--------------------|
- * |          |               |            |              |                    |
- * 0.........64KB...........128KB........192KB..........256KB................1MB
- */
-
-#define VHDX_HEADER_BLOCK_SIZE      (64 * 1024)
-
-#define VHDX_FILE_ID_OFFSET         0
-#define VHDX_HEADER1_OFFSET         (VHDX_HEADER_BLOCK_SIZE * 1)
-#define VHDX_HEADER2_OFFSET         (VHDX_HEADER_BLOCK_SIZE * 2)
-#define VHDX_REGION_TABLE_OFFSET    (VHDX_HEADER_BLOCK_SIZE * 3)
-#define VHDX_REGION_TABLE2_OFFSET   (VHDX_HEADER_BLOCK_SIZE * 4)
-
-#define VHDX_HEADER_SECTION_END     (1 * MiB)
-/*
- * A note on the use of MS-GUID fields.  For more details on the GUID,
- * please see: https://en.wikipedia.org/wiki/Globally_unique_identifier.
- *
- * The VHDX specification only states that these are MS GUIDs, and which
- * bytes are data1-data4. It makes no mention of what algorithm should be used
- * to generate the GUID, nor what standard.  However, looking at the specified
- * known GUID fields, it appears the GUIDs are:
- *  Standard/DCE GUID type  (noted by 10b in the MSB of byte 0 of .data4)
- *  Random algorithm        (noted by 0x4XXX for .data3)
- */
-
-/* ---- HEADER SECTION STRUCTURES ---- */
-
-/* These structures are ones that are defined in the VHDX specification
- * document */
-
-#define VHDX_FILE_SIGNATURE 0x656C696678646876  /* "vhdxfile" in ASCII */
-typedef struct VHDXFileIdentifier {
-    uint64_t    signature;              /* "vhdxfile" in ASCII */
-    uint16_t    creator[256];           /* optional; utf-16 string to identify
-                                           the vhdx file creator.  Diagnostic
-                                           only */
-} VHDXFileIdentifier;
-
-
-/* the guid is a 16 byte unique ID - the definition for this used by
- * Microsoft is not just 16 bytes though - it is a structure that is defined,
- * so we need to follow it here so that endianness does not trip us up */
-
-typedef struct QEMU_PACKED MSGUID {
-    uint32_t  data1;
-    uint16_t  data2;
-    uint16_t  data3;
-    uint8_t   data4[8];
-} MSGUID;
-
-#define guid_eq(a, b) \
-    (memcmp(&(a), &(b), sizeof(MSGUID)) == 0)
-
-#define VHDX_HEADER_SIZE (4 * 1024)   /* although the vhdx_header struct in disk
-                                         is only 582 bytes, for purposes of crc
-                                         the header is the first 4KB of the 64KB
-                                         block */
-
-/* The full header is 4KB, although the actual header data is much smaller.
- * But for the checksum calculation, it is over the entire 4KB structure,
- * not just the defined portion of it */
-#define VHDX_HEADER_SIGNATURE 0x64616568
-typedef struct QEMU_PACKED VHDXHeader {
-    uint32_t    signature;              /* "head" in ASCII */
-    uint32_t    checksum;               /* CRC-32C hash of the whole header */
-    uint64_t    sequence_number;        /* Seq number of this header.  Each
-                                           VHDX file has 2 of these headers,
-                                           and only the header with the highest
-                                           sequence number is valid */
-    MSGUID      file_write_guid;        /* 128 bit unique identifier. Must be
-                                           updated to new, unique value before
-                                           the first modification is made to
-                                           file */
-    MSGUID      data_write_guid;        /* 128 bit unique identifier. Must be
-                                           updated to new, unique value before
-                                           the first modification is made to
-                                           visible data.   Visbile data is
-                                           defined as:
-                                                    - system & user metadata
-                                                    - raw block data
-                                                    - disk size
-                                                    - any change that will
-                                                      cause the virtual disk
-                                                      sector read to differ
-
-                                           This does not need to change if
-                                           blocks are re-arranged */
-    MSGUID      log_guid;               /* 128 bit unique identifier. If zero,
-                                           there is no valid log. If non-zero,
-                                           log entries with this guid are
-                                           valid. */
-    uint16_t    log_version;            /* version of the log format. Must be
-                                           set to zero */
-    uint16_t    version;                /* version of the vhdx file.  Currently,
-                                           only supported version is "1" */
-    uint32_t    log_length;             /* length of the log.  Must be multiple
-                                           of 1MB */
-    uint64_t    log_offset;             /* byte offset in the file of the log.
-                                           Must also be a multiple of 1MB */
-} VHDXHeader;
-
-/* Header for the region table block */
-#define VHDX_REGION_SIGNATURE  0x69676572  /* "regi" in ASCII */
-typedef struct QEMU_PACKED VHDXRegionTableHeader {
-    uint32_t    signature;              /* "regi" in ASCII */
-    uint32_t    checksum;               /* CRC-32C hash of the 64KB table */
-    uint32_t    entry_count;            /* number of valid entries */
-    uint32_t    reserved;
-} VHDXRegionTableHeader;
-
-/* Individual region table entry.  There may be a maximum of 2047 of these
- *
- *  There are two known region table properties.  Both are required.
- *  BAT (block allocation table):  2DC27766F62342009D64115E9BFD4A08
- *  Metadata:                      8B7CA20647904B9AB8FE575F050F886E
- */
-#define VHDX_REGION_ENTRY_REQUIRED  0x01    /* if set, parser must understand
-                                               this entry in order to open
-                                               file */
-typedef struct QEMU_PACKED VHDXRegionTableEntry {
-    MSGUID      guid;                   /* 128-bit unique identifier */
-    uint64_t    file_offset;            /* offset of the object in the file.
-                                           Must be multiple of 1MB */
-    uint32_t    length;                 /* length, in bytes, of the object */
-    uint32_t    data_bits;
-} VHDXRegionTableEntry;
-
-
-/* ---- LOG ENTRY STRUCTURES ---- */
-#define VHDX_LOG_MIN_SIZE (1024 * 1024)
-#define VHDX_LOG_SECTOR_SIZE 4096
-#define VHDX_LOG_HDR_SIZE 64
-#define VHDX_LOG_SIGNATURE 0x65676f6c
-typedef struct QEMU_PACKED VHDXLogEntryHeader {
-    uint32_t    signature;              /* "loge" in ASCII */
-    uint32_t    checksum;               /* CRC-32C hash of the 64KB table */
-    uint32_t    entry_length;           /* length in bytes, multiple of 1MB */
-    uint32_t    tail;                   /* byte offset of first log entry of a
-                                           seq, where this entry is the last
-                                           entry */
-    uint64_t    sequence_number;        /* incremented with each log entry.
-                                           May not be zero. */
-    uint32_t    descriptor_count;       /* number of descriptors in this log
-                                           entry, must be >= 0 */
-    uint32_t    reserved;
-    MSGUID      log_guid;               /* value of the log_guid from
-                                           vhdx_header.  If not found in
-                                           vhdx_header, it is invalid */
-    uint64_t    flushed_file_offset;    /* see spec for full details - this
-                                           should be vhdx file size in bytes */
-    uint64_t    last_file_offset;       /* size in bytes that all allocated
-                                           file structures fit into */
-} VHDXLogEntryHeader;
-
-#define VHDX_LOG_DESC_SIZE 32
-#define VHDX_LOG_DESC_SIGNATURE 0x63736564
-#define VHDX_LOG_ZERO_SIGNATURE 0x6f72657a
-typedef struct QEMU_PACKED VHDXLogDescriptor {
-    uint32_t    signature;              /* "zero" or "desc" in ASCII */
-    union  {
-        uint32_t    reserved;           /* zero desc */
-        uint32_t    trailing_bytes;     /* data desc: bytes 4092-4096 of the
-                                           data sector */
-    };
-    union {
-        uint64_t    zero_length;        /* zero desc: length of the section to
-                                           zero */
-        uint64_t    leading_bytes;      /* data desc: bytes 0-7 of the data
-                                           sector */
-    };
-    uint64_t    file_offset;            /* file offset to write zeros - multiple
-                                           of 4kB */
-    uint64_t    sequence_number;        /* must match same field in
-                                           vhdx_log_entry_header */
-} VHDXLogDescriptor;
-
-#define VHDX_LOG_DATA_SIGNATURE 0x61746164
-typedef struct QEMU_PACKED VHDXLogDataSector {
-    uint32_t    data_signature;         /* "data" in ASCII */
-    uint32_t    sequence_high;          /* 4 MSB of 8 byte sequence_number */
-    uint8_t     data[4084];             /* raw data, bytes 8-4091 (inclusive).
-                                           see the data descriptor field for the
-                                           other mising bytes */
-    uint32_t    sequence_low;           /* 4 LSB of 8 byte sequence_number */
-} VHDXLogDataSector;
-
-
-
-/* block states - different state values depending on whether it is a
- * payload block, or a sector block. */
-
-#define PAYLOAD_BLOCK_NOT_PRESENT       0
-#define PAYLOAD_BLOCK_UNDEFINED         1
-#define PAYLOAD_BLOCK_ZERO              2
-#define PAYLOAD_BLOCK_UNMAPPED          5
-#define PAYLOAD_BLOCK_FULLY_PRESENT     6
-#define PAYLOAD_BLOCK_PARTIALLY_PRESENT 7
-
-#define SB_BLOCK_NOT_PRESENT    0
-#define SB_BLOCK_PRESENT        6
-
-/* per the spec */
-#define VHDX_MAX_SECTORS_PER_BLOCK  (1 << 23)
-
-/* upper 44 bits are the file offset in 1MB units lower 3 bits are the state
-   other bits are reserved */
-#define VHDX_BAT_STATE_BIT_MASK 0x07
-#define VHDX_BAT_FILE_OFF_MASK  0xFFFFFFFFFFF00000 /* upper 44 bits */
-typedef uint64_t VHDXBatEntry;
-
-/* ---- METADATA REGION STRUCTURES ---- */
-
-#define VHDX_METADATA_ENTRY_SIZE 32
-#define VHDX_METADATA_MAX_ENTRIES 2047  /* not including the header */
-#define VHDX_METADATA_TABLE_MAX_SIZE \
-    (VHDX_METADATA_ENTRY_SIZE * (VHDX_METADATA_MAX_ENTRIES+1))
-#define VHDX_METADATA_SIGNATURE 0x617461646174656D  /* "metadata" in ASCII */
-typedef struct QEMU_PACKED VHDXMetadataTableHeader {
-    uint64_t    signature;              /* "metadata" in ASCII */
-    uint16_t    reserved;
-    uint16_t    entry_count;            /* number table entries. <= 2047 */
-    uint32_t    reserved2[5];
-} VHDXMetadataTableHeader;
-
-#define VHDX_META_FLAGS_IS_USER         0x01    /* max 1024 entries */
-#define VHDX_META_FLAGS_IS_VIRTUAL_DISK 0x02    /* virtual disk metadata if set,
-                                                   otherwise file metdata */
-#define VHDX_META_FLAGS_IS_REQUIRED     0x04    /* parse must understand this
-                                                   entry to open the file */
-typedef struct QEMU_PACKED VHDXMetadataTableEntry {
-    MSGUID      item_id;                /* 128-bit identifier for metadata */
-    uint32_t    offset;                 /* byte offset of the metadata.  At
-                                           least 64kB.  Relative to start of
-                                           metadata region */
-                                        /* note: if length = 0, so is offset */
-    uint32_t    length;                 /* length of metadata. <= 1MB. */
-    uint32_t    data_bits;              /* least-significant 3 bits are flags,
-                                           the rest are reserved (see above) */
-    uint32_t    reserved2;
-} VHDXMetadataTableEntry;
-
-#define VHDX_PARAMS_LEAVE_BLOCKS_ALLOCED 0x01   /* Do not change any blocks to
-                                                   be BLOCK_NOT_PRESENT.
-                                                   If set indicates a fixed
-                                                   size VHDX file */
-#define VHDX_PARAMS_HAS_PARENT           0x02    /* has parent / backing file */
-#define VHDX_BLOCK_SIZE_MIN             (1   * MiB)
-#define VHDX_BLOCK_SIZE_MAX             (256 * MiB)
-typedef struct QEMU_PACKED VHDXFileParameters {
-    uint32_t    block_size;             /* size of each payload block, always
-                                           power of 2, <= 256MB and >= 1MB. */
-    uint32_t data_bits;                 /* least-significant 2 bits are flags,
-                                           the rest are reserved (see above) */
-} VHDXFileParameters;
-
-#define VHDX_MAX_IMAGE_SIZE  ((uint64_t) 64 * TiB)
-typedef struct QEMU_PACKED VHDXVirtualDiskSize {
-    uint64_t    virtual_disk_size;      /* Size of the virtual disk, in bytes.
-                                           Must be multiple of the sector size,
-                                           max of 64TB */
-} VHDXVirtualDiskSize;
-
-typedef struct QEMU_PACKED VHDXPage83Data {
-    MSGUID      page_83_data;           /* unique id for scsi devices that
-                                           support page 0x83 */
-} VHDXPage83Data;
-
-typedef struct QEMU_PACKED VHDXVirtualDiskLogicalSectorSize {
-    uint32_t    logical_sector_size;    /* virtual disk sector size (in bytes).
-                                           Can only be 512 or 4096 bytes */
-} VHDXVirtualDiskLogicalSectorSize;
-
-typedef struct QEMU_PACKED VHDXVirtualDiskPhysicalSectorSize {
-    uint32_t    physical_sector_size;   /* physical sector size (in bytes).
-                                           Can only be 512 or 4096 bytes */
-} VHDXVirtualDiskPhysicalSectorSize;
-
-typedef struct QEMU_PACKED VHDXParentLocatorHeader {
-    MSGUID      locator_type;           /* type of the parent virtual disk. */
-    uint16_t    reserved;
-    uint16_t    key_value_count;        /* number of key/value pairs for this
-                                           locator */
-} VHDXParentLocatorHeader;
-
-/* key and value strings are UNICODE strings, UTF-16 LE encoding, no NULs */
-typedef struct QEMU_PACKED VHDXParentLocatorEntry {
-    uint32_t    key_offset;             /* offset in metadata for key, > 0 */
-    uint32_t    value_offset;           /* offset in metadata for value, >0 */
-    uint16_t    key_length;             /* length of entry key, > 0 */
-    uint16_t    value_length;           /* length of entry value, > 0 */
-} VHDXParentLocatorEntry;
-
-
-/* ----- END VHDX SPECIFICATION STRUCTURES ---- */
-
-typedef struct VHDXMetadataEntries {
-    VHDXMetadataTableEntry file_parameters_entry;
-    VHDXMetadataTableEntry virtual_disk_size_entry;
-    VHDXMetadataTableEntry page83_data_entry;
-    VHDXMetadataTableEntry logical_sector_size_entry;
-    VHDXMetadataTableEntry phys_sector_size_entry;
-    VHDXMetadataTableEntry parent_locator_entry;
-    uint16_t present;
-} VHDXMetadataEntries;
-
-typedef struct VHDXLogEntries {
-    uint64_t offset;
-    uint64_t length;
-    uint32_t write;
-    uint32_t read;
-    VHDXLogEntryHeader *hdr;
-    void *desc_buffer;
-    uint64_t sequence;
-    uint32_t tail;
-} VHDXLogEntries;
-
-typedef struct VHDXRegionEntry {
-    uint64_t start;
-    uint64_t end;
-    QLIST_ENTRY(VHDXRegionEntry) entries;
-} VHDXRegionEntry;
-
-typedef struct BDRVVHDXState {
-    CoMutex lock;
-
-    int curr_header;
-    VHDXHeader *headers[2];
-
-    VHDXRegionTableHeader rt;
-    VHDXRegionTableEntry bat_rt;         /* region table for the BAT */
-    VHDXRegionTableEntry metadata_rt;    /* region table for the metadata */
-
-    VHDXMetadataTableHeader metadata_hdr;
-    VHDXMetadataEntries metadata_entries;
-
-    VHDXFileParameters params;
-    uint32_t block_size;
-    uint32_t block_size_bits;
-    uint32_t sectors_per_block;
-    uint32_t sectors_per_block_bits;
-
-    uint64_t virtual_disk_size;
-    uint32_t logical_sector_size;
-    uint32_t physical_sector_size;
-
-    uint64_t chunk_ratio;
-    uint32_t chunk_ratio_bits;
-    uint32_t logical_sector_size_bits;
-
-    uint32_t bat_entries;
-    VHDXBatEntry *bat;
-    uint64_t bat_offset;
-
-    bool first_visible_write;
-    MSGUID session_guid;
-
-    VHDXLogEntries log;
-
-    VHDXParentLocatorHeader parent_header;
-    VHDXParentLocatorEntry *parent_entries;
-
-    Error *migration_blocker;
-
-    bool log_replayed_on_open;
-
-    QLIST_HEAD(VHDXRegionHead, VHDXRegionEntry) regions;
-} BDRVVHDXState;
-
-void vhdx_guid_generate(MSGUID *guid);
-
-int vhdx_update_headers(BlockDriverState *bs, BDRVVHDXState *s, bool rw,
-                        MSGUID *log_guid);
-
-uint32_t vhdx_update_checksum(uint8_t *buf, size_t size, int crc_offset);
-uint32_t vhdx_checksum_calc(uint32_t crc, uint8_t *buf, size_t size,
-                            int crc_offset);
-
-bool vhdx_checksum_is_valid(uint8_t *buf, size_t size, int crc_offset);
-
-int vhdx_parse_log(BlockDriverState *bs, BDRVVHDXState *s, bool *flushed,
-                   Error **errp);
-
-int vhdx_log_write_and_flush(BlockDriverState *bs, BDRVVHDXState *s,
-                             void *data, uint32_t length, uint64_t offset);
-
-static inline void leguid_to_cpus(MSGUID *guid)
-{
-    le32_to_cpus(&guid->data1);
-    le16_to_cpus(&guid->data2);
-    le16_to_cpus(&guid->data3);
-}
-
-static inline void cpu_to_leguids(MSGUID *guid)
-{
-    cpu_to_le32s(&guid->data1);
-    cpu_to_le16s(&guid->data2);
-    cpu_to_le16s(&guid->data3);
-}
-
-void vhdx_header_le_import(VHDXHeader *h);
-void vhdx_header_le_export(VHDXHeader *orig_h, VHDXHeader *new_h);
-void vhdx_log_desc_le_import(VHDXLogDescriptor *d);
-void vhdx_log_desc_le_export(VHDXLogDescriptor *d);
-void vhdx_log_data_le_export(VHDXLogDataSector *d);
-void vhdx_log_entry_hdr_le_import(VHDXLogEntryHeader *hdr);
-void vhdx_log_entry_hdr_le_export(VHDXLogEntryHeader *hdr);
-void vhdx_region_header_le_import(VHDXRegionTableHeader *hdr);
-void vhdx_region_header_le_export(VHDXRegionTableHeader *hdr);
-void vhdx_region_entry_le_import(VHDXRegionTableEntry *e);
-void vhdx_region_entry_le_export(VHDXRegionTableEntry *e);
-void vhdx_metadata_header_le_import(VHDXMetadataTableHeader *hdr);
-void vhdx_metadata_header_le_export(VHDXMetadataTableHeader *hdr);
-void vhdx_metadata_entry_le_import(VHDXMetadataTableEntry *e);
-void vhdx_metadata_entry_le_export(VHDXMetadataTableEntry *e);
-int vhdx_user_visible_write(BlockDriverState *bs, BDRVVHDXState *s);
-
-#endif
--- a/block/vmdk.c
+++ b/block/vmdk.c
--- a/block/vpc.c
+++ b/block/vpc.c
@@ -45,8 +45,10 @@ enum vhd_type {
 // Seconds since Jan 1, 2000 0:00:00 (UTC)
 #define VHD_TIMESTAMP_BASE 946684800

+#define VHD_MAX_SECTORS       (65535LL * 255 * 255)
+
 // always big-endian
-typedef struct vhd_footer {
+struct vhd_footer {
    char        creator[8]; // "conectix"
    uint32_t    features;
    uint32_t    version;
@@ -79,9 +81,9 @@ typedef struct vhd_footer {
    uint8_t     uuid[16];

    uint8_t     in_saved_state;
-} QEMU_PACKED VHDFooter;
+};

-typedef struct vhd_dyndisk_header {
+struct vhd_dyndisk_header {
    char        magic[8]; // "cxsparse"

    // Offset of next header structure, 0xFFFFFFFF if none
@@ -111,7 +113,7 @@ typedef struct vhd_dyndisk_header {
        uint32_t    reserved;
        uint64_t    data_offset;
    } parent_locator[8];
-} QEMU_PACKED VHDDynDiskHeader;
+};

 typedef struct BDRVVPCState {
    CoMutex lock;
@@ -155,15 +157,15 @@ static int vpc_probe(const uint8_t *buf, int buf_size, const char *filename)
    return 0;
 }

-static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
-                    Error **errp)
+static int vpc_open(BlockDriverState *bs, int flags)
 {
    BDRVVPCState *s = bs->opaque;
    int i;
-    VHDFooter *footer;
-    VHDDynDiskHeader *dyndisk_header;
+    struct vhd_footer* footer;
+    struct vhd_dyndisk_header* dyndisk_header;
    uint8_t buf[HEADER_SIZE];
    uint32_t checksum;
+    uint64_t computed_size;
    int disk_type = VHD_DYNAMIC;
    int ret;

@@ -172,7 +174,7 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
        goto fail;
    }

-    footer = (VHDFooter *) s->footer_buf;
+    footer = (struct vhd_footer*) s->footer_buf;
    if (strncmp(footer->creator, "conectix", 8)) {
        int64_t offset = bdrv_getlength(bs->file);
        if (offset < 0) {
@@ -190,8 +192,7 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
            goto fail;
        }
        if (strncmp(footer->creator, "conectix", 8)) {
-            error_setg(errp, "invalid VPC image");
-            ret = -EINVAL;
+            ret = -EMEDIUMTYPE;
            goto fail;
        }
        disk_type = VHD_FIXED;
@@ -212,17 +213,8 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
    bs->total_sectors = (int64_t)
        be16_to_cpu(footer->cyls) * footer->heads * footer->secs_per_cyl;

-    /* images created with disk2vhd report a far higher virtual size
-     * than expected with the cyls * heads * sectors_per_cyl formula.
-     * use the footer->size instead if the image was created with
-     * disk2vhd.
-     */
-    if (!strncmp(footer->creator_app, "d2v", 4)) {
-        bs->total_sectors = be64_to_cpu(footer->size) / BDRV_SECTOR_SIZE;
-    }
-
    /* Allow a maximum disk size of approximately 2 TB */
-    if (bs->total_sectors >= 65535LL * 255 * 255) {
+    if (bs->total_sectors >= VHD_MAX_SECTORS) {
        ret = -EFBIG;
        goto fail;
    }
@@ -234,7 +226,7 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
            goto fail;
        }

-        dyndisk_header = (VHDDynDiskHeader *) buf;
+        dyndisk_header = (struct vhd_dyndisk_header *) buf;

        if (strncmp(dyndisk_header->magic, "cxsparse", 8)) {
            ret = -EINVAL;
@@ -242,10 +234,32 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
        }

        s->block_size = be32_to_cpu(dyndisk_header->block_size);
+        if (!is_power_of_2(s->block_size) || s->block_size < BDRV_SECTOR_SIZE) {
+            qerror_report(ERROR_CLASS_GENERIC_ERROR,
+                          "Invalid block size %" PRIu32, s->block_size);
+            ret = -EINVAL;
+            goto fail;
+        }
        s->bitmap_size = ((s->block_size / (8 * 512)) + 511) & ~511;

        s->max_table_entries = be32_to_cpu(dyndisk_header->max_table_entries);
-        s->pagetable = g_malloc(s->max_table_entries * 4);
+
+        if ((bs->total_sectors * 512) / s->block_size > 0xffffffffU) {
+            ret = -EINVAL;
+            goto fail;
+        }
+        if (s->max_table_entries > (VHD_MAX_SECTORS * 512) / s->block_size) {
+            ret = -EINVAL;
+            goto fail;
+        }
+
+        computed_size = (uint64_t) s->max_table_entries * s->block_size;
+        if (computed_size < bs->total_sectors * 512) {
+            ret = -EINVAL;
+            goto fail;
+        }
+
+        s->pagetable = qemu_blockalign(bs, s->max_table_entries * 4);

        s->bat_offset = be64_to_cpu(dyndisk_header->table_offset);

@@ -270,13 +284,6 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
            }
        }

-        if (s->free_data_block_offset > bdrv_getlength(bs->file)) {
-            error_setg(errp, "block-vpc: free_data_block_offset points after "
-                             "the end of file. The image has been truncated.");
-            ret = -EINVAL;
-            goto fail;
-        }
-
        s->last_bitmap_offset = (int64_t) -1;

 #ifdef CACHE
@@ -298,7 +305,7 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
    return 0;

 fail:
-    g_free(s->pagetable);
+    qemu_vfree(s->pagetable);
 #ifdef CACHE
    g_free(s->pageentry_u8);
 #endif
@@ -456,19 +463,6 @@ fail:
    return -1;
 }

-static int vpc_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
-{
-    BDRVVPCState *s = (BDRVVPCState *)bs->opaque;
-    VHDFooter *footer = (VHDFooter *) s->footer_buf;
-
-    if (cpu_to_be32(footer->type) != VHD_FIXED) {
-        bdi->cluster_size = s->block_size;
-    }
-
-    bdi->unallocated_blocks_are_zero = true;
-    return 0;
-}
-
 static int vpc_read(BlockDriverState *bs, int64_t sector_num,
                    uint8_t *buf, int nb_sectors)
 {
@@ -476,7 +470,7 @@ static int vpc_read(BlockDriverState *bs, int64_t sector_num,
    int ret;
    int64_t offset;
    int64_t sectors, sectors_per_block;
-    VHDFooter *footer = (VHDFooter *) s->footer_buf;
+    struct vhd_footer *footer = (struct vhd_footer *) s->footer_buf;

    if (cpu_to_be32(footer->type) == VHD_FIXED) {
        return bdrv_read(bs->file, sector_num, buf, nb_sectors);
@@ -525,7 +519,7 @@ static int vpc_write(BlockDriverState *bs, int64_t sector_num,
    int64_t offset;
    int64_t sectors, sectors_per_block;
    int ret;
-    VHDFooter *footer =  (VHDFooter *) s->footer_buf;
+    struct vhd_footer *footer =  (struct vhd_footer *) s->footer_buf;

    if (cpu_to_be32(footer->type) == VHD_FIXED) {
        return bdrv_write(bs->file, sector_num, buf, nb_sectors);
@@ -627,8 +621,8 @@ static int calculate_geometry(int64_t total_sectors, uint16_t* cyls,

 static int create_dynamic_disk(int fd, uint8_t *buf, int64_t total_sectors)
 {
-    VHDDynDiskHeader *dyndisk_header =
-        (VHDDynDiskHeader *) buf;
+    struct vhd_dyndisk_header* dyndisk_header =
+        (struct vhd_dyndisk_header*) buf;
    size_t block_size, num_bat_entries;
    int i;
    int ret = -EIO;
@@ -714,11 +708,10 @@ static int create_fixed_disk(int fd, uint8_t *buf, int64_t total_size)
    return ret;
 }

-static int vpc_create(const char *filename, QEMUOptionParameter *options,
-                      Error **errp)
+static int vpc_create(const char *filename, QEMUOptionParameter *options)
 {
    uint8_t buf[1024];
-    VHDFooter *footer = (VHDFooter *) buf;
+    struct vhd_footer *footer = (struct vhd_footer *) buf;
    QEMUOptionParameter *disk_type_param;
    int fd, i;
    uint16_t cyls = 0;
@@ -818,22 +811,10 @@ static int vpc_create(const char *filename, QEMUOptionParameter *options,
    return ret;
 }

-static int vpc_has_zero_init(BlockDriverState *bs)
-{
-    BDRVVPCState *s = bs->opaque;
-    VHDFooter *footer =  (VHDFooter *) s->footer_buf;
-
-    if (cpu_to_be32(footer->type) == VHD_FIXED) {
-        return bdrv_has_zero_init(bs->file);
-    } else {
-        return 1;
-    }
-}
-
 static void vpc_close(BlockDriverState *bs)
 {
    BDRVVPCState *s = bs->opaque;
-    g_free(s->pagetable);
+    qemu_vfree(s->pagetable);
 #ifdef CACHE
    g_free(s->pageentry_u8);
 #endif
@@ -862,19 +843,16 @@ static BlockDriver bdrv_vpc = {
    .format_name    = "vpc",
    .instance_size  = sizeof(BDRVVPCState),

-    .bdrv_probe             = vpc_probe,
-    .bdrv_open              = vpc_open,
-    .bdrv_close             = vpc_close,
-    .bdrv_reopen_prepare    = vpc_reopen_prepare,
-    .bdrv_create            = vpc_create,
+    .bdrv_probe     = vpc_probe,
+    .bdrv_open      = vpc_open,
+    .bdrv_close     = vpc_close,
+    .bdrv_reopen_prepare = vpc_reopen_prepare,
+    .bdrv_create    = vpc_create,

    .bdrv_read              = vpc_co_read,
    .bdrv_write             = vpc_co_write,

-    .bdrv_get_info          = vpc_get_info,
-
-    .create_options         = vpc_create_options,
-    .bdrv_has_zero_init     = vpc_has_zero_init,
+    .create_options = vpc_create_options,
 };

 static void bdrv_vpc_init(void)
--- a/block/vvfat.c
+++ b/block/vvfat.c
@@ -1,4 +1,4 @@
-/* vim:set shiftwidth=4 ts=4: */
+/* vim:set shiftwidth=4 ts=8: */
 /*
 * QEMU Block driver for virtual VFAT (shadows a local directory)
 *
@@ -28,8 +28,6 @@
 #include "block/block_int.h"
 #include "qemu/module.h"
 #include "migration/migration.h"
-#include "qapi/qmp/qint.h"
-#include "qapi/qmp/qbool.h"

 #ifndef S_IWGRP
 #define S_IWGRP 0
@@ -266,7 +264,8 @@ typedef struct mbr_t {
 } QEMU_PACKED mbr_t;

 typedef struct direntry_t {
-    uint8_t name[8 + 3];
+    uint8_t name[8];
+    uint8_t extension[3];
    uint8_t attributes;
    uint8_t reserved[2];
    uint16_t ctime;
@@ -517,9 +516,11 @@ static inline uint8_t fat_chksum(const direntry_t* entry)
    uint8_t chksum=0;
    int i;

-    for (i = 0; i < ARRAY_SIZE(entry->name); i++) {
-        chksum = (((chksum & 0xfe) >> 1) |
-                  ((chksum & 0x01) ? 0x80 : 0)) + entry->name[i];
+    for(i=0;i<11;i++) {
+        unsigned char c;
+
+        c = (i < 8) ? entry->name[i] : entry->extension[i-8];
+        chksum=(((chksum&0xfe)>>1)|((chksum&0x01)?0x80:0)) + c;
    }

    return chksum;
@@ -614,7 +615,7 @@ static inline direntry_t* create_short_and_long_name(BDRVVVFATState* s,

    if(is_dot) {
 	entry=array_get_next(&(s->directory));
-        memset(entry->name, 0x20, sizeof(entry->name));
+	memset(entry->name,0x20,11);
 	memcpy(entry->name,filename,strlen(filename));
 	return entry;
    }
@@ -629,14 +630,12 @@ static inline direntry_t* create_short_and_long_name(BDRVVVFATState* s,
 	i = 8;

    entry=array_get_next(&(s->directory));
-    memset(entry->name, 0x20, sizeof(entry->name));
+    memset(entry->name,0x20,11);
    memcpy(entry->name, filename, i);

-    if (j > 0) {
-        for (i = 0; i < 3 && filename[j + 1 + i]; i++) {
-            entry->name[8 + i] = filename[j + 1 + i];
-        }
-    }
+    if(j > 0)
+	for (i = 0; i < 3 && filename[j+1+i]; i++)
+	    entry->extension[i] = filename[j+1+i];

    /* upcase & remove unwanted characters */
    for(i=10;i>=0;i--) {
@@ -860,7 +859,8 @@ static int init_directories(BDRVVVFATState* s,
    {
 	direntry_t* entry=array_get_next(&(s->directory));
 	entry->attributes=0x28; /* archive | volume label */
-        memcpy(entry->name, "QEMU VVFAT ", sizeof(entry->name));
+	memcpy(entry->name,"QEMU VVF",8);
+	memcpy(entry->extension,"AT ",3);
    }

    /* Now build FAT, and write back information into directory */
@@ -988,91 +988,10 @@ static void vvfat_rebind(BlockDriverState *bs)
    s->bs = bs;
 }

-static QemuOptsList runtime_opts = {
-    .name = "vvfat",
-    .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
-    .desc = {
-        {
-            .name = "dir",
-            .type = QEMU_OPT_STRING,
-            .help = "Host directory to map to the vvfat device",
-        },
-        {
-            .name = "fat-type",
-            .type = QEMU_OPT_NUMBER,
-            .help = "FAT type (12, 16 or 32)",
-        },
-        {
-            .name = "floppy",
-            .type = QEMU_OPT_BOOL,
-            .help = "Create a floppy rather than a hard disk image",
-        },
-        {
-            .name = "rw",
-            .type = QEMU_OPT_BOOL,
-            .help = "Make the image writable",
-        },
-        { /* end of list */ }
-    },
-};
-
-static void vvfat_parse_filename(const char *filename, QDict *options,
-                                 Error **errp)
-{
-    int fat_type = 0;
-    bool floppy = false;
-    bool rw = false;
-    int i;
-
-    if (!strstart(filename, "fat:", NULL)) {
-        error_setg(errp, "File name string must start with 'fat:'");
-        return;
-    }
-
-    /* Parse options */
-    if (strstr(filename, ":32:")) {
-        fat_type = 32;
-    } else if (strstr(filename, ":16:")) {
-        fat_type = 16;
-    } else if (strstr(filename, ":12:")) {
-        fat_type = 12;
-    }
-
-    if (strstr(filename, ":floppy:")) {
-        floppy = true;
-    }
-
-    if (strstr(filename, ":rw:")) {
-        rw = true;
-    }
-
-    /* Get the directory name without options */
-    i = strrchr(filename, ':') - filename;
-    assert(i >= 3);
-    if (filename[i - 2] == ':' && qemu_isalpha(filename[i - 1])) {
-        /* workaround for DOS drive names */
-        filename += i - 1;
-    } else {
-        filename += i + 1;
-    }
-
-    /* Fill in the options QDict */
-    qdict_put(options, "dir", qstring_from_str(filename));
-    qdict_put(options, "fat-type", qint_from_int(fat_type));
-    qdict_put(options, "floppy", qbool_from_int(floppy));
-    qdict_put(options, "rw", qbool_from_int(rw));
-}
-
-static int vvfat_open(BlockDriverState *bs, QDict *options, int flags,
-                      Error **errp)
+static int vvfat_open(BlockDriverState *bs, const char* dirname, int flags)
 {
    BDRVVVFATState *s = bs->opaque;
-    int cyls, heads, secs;
-    bool floppy;
-    const char *dirname;
-    QemuOpts *opts;
-    Error *local_err = NULL;
-    int ret;
+    int i, cyls, heads, secs;

 #ifdef DEBUG
    vvv = s;
@@ -1083,62 +1002,6 @@ DLOG(if (stderr == NULL) {
    setbuf(stderr, NULL);
 })

-    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
-    qemu_opts_absorb_qdict(opts, options, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        ret = -EINVAL;
-        goto fail;
-    }
-
-    dirname = qemu_opt_get(opts, "dir");
-    if (!dirname) {
-        error_setg(errp, "vvfat block driver requires a 'dir' option");
-        ret = -EINVAL;
-        goto fail;
-    }
-
-    s->fat_type = qemu_opt_get_number(opts, "fat-type", 0);
-    floppy = qemu_opt_get_bool(opts, "floppy", false);
-
-    if (floppy) {
-        /* 1.44MB or 2.88MB floppy.  2.88MB can be FAT12 (default) or FAT16. */
-        if (!s->fat_type) {
-            s->fat_type = 12;
-            secs = 36;
-            s->sectors_per_cluster = 2;
-        } else {
-            secs = s->fat_type == 12 ? 18 : 36;
-            s->sectors_per_cluster = 1;
-        }
-        s->first_sectors_number = 1;
-        cyls = 80;
-        heads = 2;
-    } else {
-        /* 32MB or 504MB disk*/
-        if (!s->fat_type) {
-            s->fat_type = 16;
-        }
-        cyls = s->fat_type == 12 ? 64 : 1024;
-        heads = 16;
-        secs = 63;
-    }
-
-    switch (s->fat_type) {
-    case 32:
-	    fprintf(stderr, "Big fat greek warning: FAT32 has not been tested. "
-                "You are welcome to do so!\n");
-        break;
-    case 16:
-    case 12:
-        break;
-    default:
-        error_setg(errp, "Valid FAT types are only 12, 16 and 32");
-        ret = -EINVAL;
-        goto fail;
-    }
-
-
    s->bs = bs;

    /* LATER TODO: if FAT32, adjust */
@@ -1154,24 +1017,63 @@ DLOG(if (stderr == NULL) {
    s->fat2 = NULL;
    s->downcase_short_names = 1;

+    if (!strstart(dirname, "fat:", NULL))
+	return -1;
+
+    if (strstr(dirname, ":32:")) {
+	fprintf(stderr, "Big fat greek warning: FAT32 has not been tested. You are welcome to do so!\n");
+	s->fat_type = 32;
+    } else if (strstr(dirname, ":16:")) {
+	s->fat_type = 16;
+    } else if (strstr(dirname, ":12:")) {
+	s->fat_type = 12;
+    }
+
+    if (strstr(dirname, ":floppy:")) {
+	/* 1.44MB or 2.88MB floppy.  2.88MB can be FAT12 (default) or FAT16. */
+	if (!s->fat_type) {
+	    s->fat_type = 12;
+            secs = 36;
+	    s->sectors_per_cluster=2;
+	} else {
+            secs = s->fat_type == 12 ? 18 : 36;
+	    s->sectors_per_cluster=1;
+	}
+	s->first_sectors_number = 1;
+        cyls = 80;
+        heads = 2;
+    } else {
+	/* 32MB or 504MB disk*/
+	if (!s->fat_type) {
+	    s->fat_type = 16;
+	}
+        cyls = s->fat_type == 12 ? 64 : 1024;
+        heads = 16;
+        secs = 63;
+    }
    fprintf(stderr, "vvfat %s chs %d,%d,%d\n",
            dirname, cyls, heads, secs);

    s->sector_count = cyls * heads * secs - (s->first_sectors_number - 1);

-    if (qemu_opt_get_bool(opts, "rw", false)) {
-        ret = enable_write_target(s);
-        if (ret < 0) {
-            goto fail;
-        }
-        bs->read_only = 0;
+    if (strstr(dirname, ":rw:")) {
+	if (enable_write_target(s))
+	    return -1;
+	bs->read_only = 0;
    }

+    i = strrchr(dirname, ':') - dirname;
+    assert(i >= 3);
+    if (dirname[i-2] == ':' && qemu_isalpha(dirname[i-1]))
+	/* workaround for DOS drive names */
+	dirname += i-1;
+    else
+	dirname += i+1;
+
    bs->total_sectors = cyls * heads * secs;

    if (init_directories(s, dirname, heads, secs)) {
-        ret = -EIO;
-        goto fail;
+	return -1;
    }

    s->sector_count = s->faked_sectors + s->sectors_per_cluster*s->cluster_count;
@@ -1191,10 +1093,7 @@ DLOG(if (stderr == NULL) {
        migrate_add_blocker(s->migration_blocker);
    }

-    ret = 0;
-fail:
-    qemu_opts_del(opts);
-    return ret;
+    return 0;
 }

 static inline void vvfat_close_current_file(BDRVVVFATState *s)
@@ -1586,20 +1485,17 @@ static int parse_short_name(BDRVVVFATState* s,
 	    lfn->name[i] = direntry->name[i];
    }

-    for (j = 2; j >= 0 && direntry->name[8 + j] == ' '; j--) {
-    }
+    for (j = 2; j >= 0 && direntry->extension[j] == ' '; j--);
    if (j >= 0) {
 	lfn->name[i++] = '.';
 	lfn->name[i + j + 1] = '\0';
 	for (;j >= 0; j--) {
-            uint8_t c = direntry->name[8 + j];
-            if (c <= ' ' || c > 0x7f) {
-                return -2;
-            } else if (s->downcase_short_names) {
-                lfn->name[i + j] = qemu_tolower(c);
-            } else {
-                lfn->name[i + j] = c;
-            }
+	    if (direntry->extension[j] <= ' ' || direntry->extension[j] > 0x7f)
+		return -2;
+	    else if (s->downcase_short_names)
+		lfn->name[i + j] = qemu_tolower(direntry->extension[j]);
+	    else
+		lfn->name[i + j] = direntry->extension[j];
 	}
    } else
 	lfn->name[i + j + 1] = '\0';
@@ -2873,17 +2769,16 @@ static coroutine_fn int vvfat_co_write(BlockDriverState *bs, int64_t sector_num,
    return ret;
 }

-static int64_t coroutine_fn vvfat_co_get_block_status(BlockDriverState *bs,
+static int coroutine_fn vvfat_co_is_allocated(BlockDriverState *bs,
 	int64_t sector_num, int nb_sectors, int* n)
 {
    BDRVVVFATState* s = bs->opaque;
    *n = s->sector_count - sector_num;
-    if (*n > nb_sectors) {
-        *n = nb_sectors;
-    } else if (*n < 0) {
-        return 0;
-    }
-    return BDRV_BLOCK_DATA;
+    if (*n > nb_sectors)
+	*n = nb_sectors;
+    else if (*n < 0)
+	return 0;
+    return 1;
 }

 static int write_target_commit(BlockDriverState *bs, int64_t sector_num,
@@ -2894,7 +2789,7 @@ static int write_target_commit(BlockDriverState *bs, int64_t sector_num,

 static void write_target_close(BlockDriverState *bs) {
    BDRVVVFATState* s = *((BDRVVVFATState**) bs->opaque);
-    bdrv_unref(s->qcow);
+    bdrv_delete(s->qcow);
    g_free(s->qcow_filename);
 }

@@ -2908,7 +2803,6 @@ static int enable_write_target(BDRVVVFATState *s)
 {
    BlockDriver *bdrv_qcow;
    QEMUOptionParameter *options;
-    Error *local_err = NULL;
    int ret;
    int size = sector2cluster(s, s->sector_count);
    s->used_clusters = calloc(size, 1);
@@ -2918,7 +2812,9 @@ static int enable_write_target(BDRVVVFATState *s)
    s->qcow_filename = g_malloc(1024);
    ret = get_tmp_filename(s->qcow_filename, 1024);
    if (ret < 0) {
-        goto err;
+        g_free(s->qcow_filename);
+        s->qcow_filename = NULL;
+        return ret;
    }

    bdrv_qcow = bdrv_find_format("qcow");
@@ -2926,38 +2822,30 @@ static int enable_write_target(BDRVVVFATState *s)
    set_option_parameter_int(options, BLOCK_OPT_SIZE, s->sector_count * 512);
    set_option_parameter(options, BLOCK_OPT_BACKING_FILE, "fat:");

-    ret = bdrv_create(bdrv_qcow, s->qcow_filename, options, &local_err);
-    if (ret < 0) {
-        qerror_report_err(local_err);
-        error_free(local_err);
-        goto err;
+    if (bdrv_create(bdrv_qcow, s->qcow_filename, options) < 0)
+	return -1;
+
+    s->qcow = bdrv_new("");
+    if (s->qcow == NULL) {
+        return -1;
    }

-    s->qcow = NULL;
-    ret = bdrv_open(&s->qcow, s->qcow_filename, NULL, NULL,
-            BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_NO_FLUSH, bdrv_qcow,
-            &local_err);
+    ret = bdrv_open(s->qcow, s->qcow_filename,
+            BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_NO_FLUSH, bdrv_qcow);
    if (ret < 0) {
-        qerror_report_err(local_err);
-        error_free(local_err);
-        goto err;
+	return ret;
    }

 #ifndef _WIN32
    unlink(s->qcow_filename);
 #endif

-    s->bs->backing_hd = bdrv_new("");
+    s->bs->backing_hd = calloc(sizeof(BlockDriverState), 1);
    s->bs->backing_hd->drv = &vvfat_write_target;
    s->bs->backing_hd->opaque = g_malloc(sizeof(void*));
    *(void**)s->bs->backing_hd->opaque = s;

    return 0;
-
-err:
-    g_free(s->qcow_filename);
-    s->qcow_filename = NULL;
-    return ret;
 }

 static void vvfat_close(BlockDriverState *bs)
@@ -2977,18 +2865,15 @@ static void vvfat_close(BlockDriverState *bs)
 }

 static BlockDriver bdrv_vvfat = {
-    .format_name            = "vvfat",
-    .protocol_name          = "fat",
-    .instance_size          = sizeof(BDRVVVFATState),
-
-    .bdrv_parse_filename    = vvfat_parse_filename,
-    .bdrv_file_open         = vvfat_open,
-    .bdrv_close             = vvfat_close,
-    .bdrv_rebind            = vvfat_rebind,
-
-    .bdrv_read              = vvfat_co_read,
-    .bdrv_write             = vvfat_co_write,
-    .bdrv_co_get_block_status = vvfat_co_get_block_status,
+    .format_name	= "vvfat",
+    .instance_size	= sizeof(BDRVVVFATState),
+    .bdrv_file_open	= vvfat_open,
+    .bdrv_rebind	= vvfat_rebind,
+    .bdrv_read          = vvfat_co_read,
+    .bdrv_write         = vvfat_co_write,
+    .bdrv_close		= vvfat_close,
+    .bdrv_co_is_allocated = vvfat_co_is_allocated,
+    .protocol_name	= "fat",
 };

 static void bdrv_vvfat_init(void)
--- a/block/win32-aio.c
+++ b/block/win32-aio.c
@@ -25,6 +25,7 @@
 #include "qemu/timer.h"
 #include "block/block_int.h"
 #include "qemu/module.h"
+#include "qemu-common.h"
 #include "block/aio.h"
 #include "raw-aio.h"
 #include "qemu/event_notifier.h"
@@ -105,6 +106,13 @@ static void win32_aio_completion_cb(EventNotifier *e)
    }
 }

+static int win32_aio_flush_cb(EventNotifier *e)
+{
+    QEMUWin32AIOState *s = container_of(e, QEMUWin32AIOState, e);
+
+    return (s->count > 0) ? 1 : 0;
+}
+
 static void win32_aio_cancel(BlockDriverAIOCB *blockacb)
 {
    QEMUWin32AIOCB *waiocb = (QEMUWin32AIOCB *)blockacb;
@@ -194,7 +202,8 @@ QEMUWin32AIOState *win32_aio_init(void)
        goto out_close_efd;
    }

-    qemu_aio_set_event_notifier(&s->e, win32_aio_completion_cb);
+    qemu_aio_set_event_notifier(&s->e, win32_aio_completion_cb,
+                                win32_aio_flush_cb);

    return s;

--- a/blockdev-nbd.c
+++ b/blockdev-nbd.c
@@ -10,7 +10,7 @@
 */

 #include "sysemu/blockdev.h"
-#include "hw/block/block.h"
+#include "hw/block-common.h"
 #include "monitor/monitor.h"
 #include "qapi/qmp/qerror.h"
 #include "sysemu/sysemu.h"
@@ -69,6 +69,12 @@ static void nbd_close_notifier(Notifier *n, void *data)
    g_free(cn);
 }

+static void nbd_server_put_ref(NBDExport *exp)
+{
+    BlockDriverState *bs = nbd_export_get_blockdev(exp);
+    drive_put_ref(drive_get_by_blockdev(bs));
+}
+
 void qmp_nbd_server_add(const char *device, bool has_writable, bool writable,
                        Error **errp)
 {
@@ -99,9 +105,11 @@ void qmp_nbd_server_add(const char *device, bool has_writable, bool writable,
        writable = false;
    }

-    exp = nbd_export_new(bs, 0, -1, writable ? 0 : NBD_FLAG_READ_ONLY, NULL);
+    exp = nbd_export_new(bs, 0, -1, writable ? 0 : NBD_FLAG_READ_ONLY,
+                         nbd_server_put_ref);

    nbd_export_set_name(exp, device);
+    drive_get_ref(drive_get_by_blockdev(bs));

    n = g_malloc0(sizeof(NBDCloseNotifier));
    n->n.notify = nbd_close_notifier;
--- a/blockdev.c
+++ b/blockdev.c
--- a/blockjob.c
+++ b/blockjob.c
@@ -35,7 +35,7 @@
 #include "qmp-commands.h"
 #include "qemu/timer.h"

-void *block_job_create(const BlockJobDriver *driver, BlockDriverState *bs,
+void *block_job_create(const BlockJobType *job_type, BlockDriverState *bs,
                       int64_t speed, BlockDriverCompletionFunc *cb,
                       void *opaque, Error **errp)
 {
@@ -45,11 +45,10 @@ void *block_job_create(const BlockJobDriver *driver, BlockDriverState *bs,
        error_set(errp, QERR_DEVICE_IN_USE, bdrv_get_device_name(bs));
        return NULL;
    }
-    bdrv_ref(bs);
    bdrv_set_in_use(bs, 1);

-    job = g_malloc0(driver->instance_size);
-    job->driver        = driver;
+    job = g_malloc0(job_type->instance_size);
+    job->job_type      = job_type;
    job->bs            = bs;
    job->cb            = cb;
    job->opaque        = opaque;
@@ -61,7 +60,7 @@ void *block_job_create(const BlockJobDriver *driver, BlockDriverState *bs,
        Error *local_err = NULL;

        block_job_set_speed(job, speed, &local_err);
-        if (local_err) {
+        if (error_is_set(&local_err)) {
            bs->job = NULL;
            g_free(job);
            bdrv_set_in_use(bs, 0);
@@ -87,12 +86,12 @@ void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp)
 {
    Error *local_err = NULL;

-    if (!job->driver->set_speed) {
+    if (!job->job_type->set_speed) {
        error_set(errp, QERR_NOT_SUPPORTED);
        return;
    }
-    job->driver->set_speed(job, speed, &local_err);
-    if (local_err) {
+    job->job_type->set_speed(job, speed, &local_err);
+    if (error_is_set(&local_err)) {
        error_propagate(errp, local_err);
        return;
    }
@@ -102,12 +101,12 @@ void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp)

 void block_job_complete(BlockJob *job, Error **errp)
 {
-    if (job->paused || job->cancelled || !job->driver->complete) {
+    if (job->paused || job->cancelled || !job->job_type->complete) {
        error_set(errp, QERR_BLOCK_JOB_NOT_READY, job->bs->device_name);
        return;
    }

-    job->driver->complete(job, errp);
+    job->job_type->complete(job, errp);
 }

 void block_job_pause(BlockJob *job)
@@ -143,8 +142,8 @@ bool block_job_is_cancelled(BlockJob *job)
 void block_job_iostatus_reset(BlockJob *job)
 {
    job->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
-    if (job->driver->iostatus_reset) {
-        job->driver->iostatus_reset(job);
+    if (job->job_type->iostatus_reset) {
+        job->job_type->iostatus_reset(job);
    }
 }

@@ -188,7 +187,7 @@ int block_job_cancel_sync(BlockJob *job)
    return (data.cancelled && data.ret == 0) ? -ECANCELED : data.ret;
 }

-void block_job_sleep_ns(BlockJob *job, QEMUClockType type, int64_t ns)
+void block_job_sleep_ns(BlockJob *job, QEMUClock *clock, int64_t ns)
 {
    assert(job->busy);

@@ -201,7 +200,7 @@ void block_job_sleep_ns(BlockJob *job, QEMUClockType type, int64_t ns)
    if (block_job_is_paused(job)) {
        qemu_coroutine_yield();
    } else {
-        co_sleep_ns(type, ns);
+        co_sleep_ns(clock, ns);
    }
    job->busy = true;
 }
@@ -209,7 +208,7 @@ void block_job_sleep_ns(BlockJob *job, QEMUClockType type, int64_t ns)
 BlockJobInfo *block_job_query(BlockJob *job)
 {
    BlockJobInfo *info = g_new0(BlockJobInfo, 1);
-    info->type      = g_strdup(BlockJobType_lookup[job->driver->job_type]);
+    info->type      = g_strdup(job->job_type->job_type);
    info->device    = g_strdup(bdrv_get_device_name(job->bs));
    info->len       = job->len;
    info->busy      = job->busy;
@@ -236,7 +235,7 @@ QObject *qobject_from_block_job(BlockJob *job)
                              "'len': %" PRId64 ","
                              "'offset': %" PRId64 ","
                              "'speed': %" PRId64 " }",
-                              BlockJobType_lookup[job->driver->job_type],
+                              job->job_type->job_type,
                              bdrv_get_device_name(job->bs),
                              job->len,
                              job->offset,
--- a/Show More
+++ b/Show More
@@ -1 +1 @@
 .7.50
 .4.2