Update version for v2.1.3 release

Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
vl.c: fix regression when reading machine type from config file
2015-01-21 19:16:38 -06:00 · 2015-01-14 17:08:44 -06:00 · 2015-01-14 17:08:44 -06:00 · 2015-01-14 17:08:44 -06:00 · 2015-01-14 17:08:44 -06:00 · 2015-01-14 17:08:43 -06:00
2036 changed files with 50632 additions and 160883 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -11,10 +11,6 @@
 /trace/generated-tracers.dtrace
 /trace/generated-events.h
 /trace/generated-events.c
-/trace/generated-helpers-wrappers.h
-/trace/generated-helpers.h
-/trace/generated-helpers.c
-/trace/generated-tcg-tracers.h
 /trace/generated-ust-provider.h
 /trace/generated-ust.c
 /libcacard/trace/generated-tracers.c
@@ -37,8 +33,14 @@
 /qemu-tech.html
 /qemu-doc.info
 /qemu-tech.info
+/qemu.1
+/qemu.pod
+/qemu-img.1
+/qemu-img.pod
 /qemu-img
 /qemu-nbd
+/qemu-nbd.8
+/qemu-nbd.pod
 /qemu-options.def
 /qemu-options.texi
 /qemu-img-cmds.texi
@@ -50,7 +52,8 @@
 /qmp-commands.txt
 /vscclient
 /fsdev/virtfs-proxy-helper
-*.[1-9]
+/fsdev/virtfs-proxy-helper.1
+/fsdev/virtfs-proxy-helper.pod
 *.a
 *.aux
 *.cp
@@ -63,7 +66,6 @@
 *.ky
 *.log
 *.pdf
-*.pod
 *.cps
 *.fns
 *.kys
--- a/.travis.yml
+++ b/.travis.yml
@@ -12,7 +12,7 @@ notifications:
    on_failure: always
 env:
  global:
-    - TEST_CMD=""
+    - TEST_CMD="make check"
    - EXTRA_CONFIG=""
    # Development packages, EXTRA_PKGS saved for additional builds
    - CORE_PKGS="libusb-1.0-0-dev libiscsi-dev librados-dev libncurses5-dev"
@@ -20,51 +20,31 @@ env:
    - GUI_PKGS="libgtk-3-dev libvte-2.90-dev libsdl1.2-dev libpng12-dev libpixman-1-dev"
    - EXTRA_PKGS=""
  matrix:
-    # Group major targets together with their linux-user counterparts
    - TARGETS=alpha-softmmu,alpha-linux-user
-    - TARGETS=arm-softmmu,arm-linux-user,armeb-linux-user,aarch64-softmmu,aarch64-linux-user
-    - TARGETS=cris-softmmu,cris-linux-user
-    - TARGETS=i386-softmmu,i386-linux-user,x86_64-softmmu,x86_64-linux-user
-    - TARGETS=m68k-softmmu,m68k-linux-user
-    - TARGETS=microblaze-softmmu,microblazeel-softmmu,microblaze-linux-user,microblazeel-linux-user
+    - TARGETS=arm-softmmu,arm-linux-user
+    - TARGETS=aarch64-softmmu,aarch64-linux-user
+    - TARGETS=cris-softmmu
+    - TARGETS=i386-softmmu,x86_64-softmmu
+    - TARGETS=lm32-softmmu
+    - TARGETS=m68k-softmmu
+    - TARGETS=microblaze-softmmu,microblazeel-softmmu
    - TARGETS=mips-softmmu,mips64-softmmu,mips64el-softmmu,mipsel-softmmu
-    - TARGETS=mips-linux-user,mips64-linux-user,mips64el-linux-user,mipsel-linux-user,mipsn32-linux-user,mipsn32el-linux-user
-    - TARGETS=or32-softmmu,or32-linux-user
-    - TARGETS=ppc-softmmu,ppc64-softmmu,ppcemb-softmmu,ppc-linux-user,ppc64-linux-user,ppc64abi32-linux-user,ppc64le-linux-user
-    - TARGETS=s390x-softmmu,s390x-linux-user
-    - TARGETS=sh4-softmmu,sh4eb-softmmu,sh4-linux-user sh4eb-linux-user
-    - TARGETS=sparc-softmmu,sparc64-softmmu,sparc-linux-user,sparc32plus-linux-user,sparc64-linux-user
-    - TARGETS=unicore32-softmmu,unicore32-linux-user
-    # Group remaining softmmu only targets into one build
-    - TARGETS=lm32-softmmu,moxie-softmmu,tricore-softmmu,xtensa-softmmu,xtensaeb-softmmu
-git:
-  # we want to do this ourselves
-  submodules: false
+    - TARGETS=moxie-softmmu
+    - TARGETS=or32-softmmu,
+    - TARGETS=ppc-softmmu,ppc64-softmmu,ppcemb-softmmu
+    - TARGETS=s390x-softmmu
+    - TARGETS=sh4-softmmu,sh4eb-softmmu
+    - TARGETS=sparc-softmmu,sparc64-softmmu
+    - TARGETS=unicore32-softmmu
+    - TARGETS=xtensa-softmmu,xtensaeb-softmmu
 before_install:
-  - wget -O - http://people.linaro.org/~alex.bennee/qemu-submodule-git-seed.tar.xz | tar -xvJ
  - git submodule update --init --recursive
  - sudo apt-get update -qq
  - sudo apt-get install -qq ${CORE_PKGS} ${NET_PKGS} ${GUI_PKGS} ${EXTRA_PKGS}
-before_script:
-  - ./configure --target-list=${TARGETS} --enable-debug-tcg ${EXTRA_CONFIG}
-script:
-  - make -j2 && ${TEST_CMD}
+script: "./configure --target-list=${TARGETS} ${EXTRA_CONFIG} && make && ${TEST_CMD}"
 matrix:
  # We manually include a number of additional build for non-standard bits
  include:
-    # Make check target (we only do this once)
-    - env:
-        - TARGETS=alpha-softmmu,arm-softmmu,aarch64-softmmu,cris-softmmu,
-                  i386-softmmu,x86_64-softmmu,m68k-softmmu,microblaze-softmmu,
-                  microblazeel-softmmu,mips-softmmu,mips64-softmmu,
-                  mips64el-softmmu,mipsel-softmmu,or32-softmmu,ppc-softmmu,
-                  ppc64-softmmu,ppcemb-softmmu,s390x-softmmu,sh4-softmmu,
-                  sh4eb-softmmu,sparc-softmmu,sparc64-softmmu,
-                  unicore32-softmmu,unicore32-linux-user,
-                  lm32-softmmu,moxie-softmmu,tricore-softmmu,xtensa-softmmu,
-                  xtensaeb-softmmu
-          TEST_CMD="make check"
-      compiler: gcc
    # Debug related options
    - env: TARGETS=i386-softmmu,x86_64-softmmu
           EXTRA_CONFIG="--enable-debug"
@@ -93,11 +73,9 @@ matrix:
      compiler: gcc
    - env: TARGETS=i386-softmmu,x86_64-softmmu
           EXTRA_CONFIG="--enable-trace-backends=ftrace"
+           TEST_CMD=""
      compiler: gcc
    - env: TARGETS=i386-softmmu,x86_64-softmmu
          EXTRA_PKGS="liblttng-ust-dev liburcu-dev"
          EXTRA_CONFIG="--enable-trace-backends=ust"
      compiler: gcc
-    - env: TARGETS=i386-softmmu,x86_64-softmmu
-           EXTRA_CONFIG="--enable-modules"
-      compiler: gcc
--- a/14
+++ b/14
@@ -91,17 +91,3 @@ Mixed declarations (interleaving statements and declarations within blocks)
 are not allowed; declarations should be at the beginning of blocks.  In other
 words, the code should not generate warnings if using GCC's
 -Wdeclaration-after-statement option.
-
-6. Conditional statements
-
-When comparing a variable for (in)equality with a constant, list the
-constant on the right, as in:
-
-if (a == 1) {
-    /* Reads like: "If a equals 1" */
-    do_something();
-}
-
-Rationale: Yoda conditions (as in 'if (1 == a)') are awkward to read.
-Besides, good compilers already warn users when '==' is mis-typed as '=',
-even when the constant is on the right.
--- a/2
+++ b/2
@@ -11,7 +11,7 @@ option) any later version.

 As of July 2013, contributions under version 2 of the GNU General Public
 License (and no later version) are only accepted for the following files
-or directories: bsd-user/, linux-user/, hw/vfio/, hw/xen/xen_pt*.
+or directories: bsd-user/, linux-user/, hw/misc/vfio.c, hw/xen/xen_pt*.

 3) The Tiny Code Generator (TCG) is released under the BSD license
   (see license headers in files).
--- a/394
+++ b/394
@@ -50,33 +50,22 @@ Descriptions of section entries:

 General Project Administration
 ------------------------------
-M: Peter Maydell <peter.maydell@linaro.org>
+M: Anthony Liguori <aliguori@amazon.com>

 Responsible Disclosure, Reporting Security Issues
 ------------------------------
 W: http://wiki.qemu.org/SecurityProcess
 M: Michael S. Tsirkin <mst@redhat.com>
+M: Anthony Liguori <aliguori@amazon.com>
 L: secalert@redhat.com

 Guest CPU cores (TCG):
 ----------------------
-Overall
-L: qemu-devel@nongnu.org
-S: Odd fixes
-F: cpu-exec.c
-F: cputlb.c
-F: softmmu_template.h
-F: translate-all.c
-F: include/exec/cpu_ldst.h
-F: include/exec/cpu_ldst_template.h
-F: include/exec/helper*.h
-
 Alpha
 M: Richard Henderson <rth@twiddle.net>
 S: Maintained
 F: target-alpha/
 F: hw/alpha/
-F: tests/tcg/alpha/

 ARM
 M: Peter Maydell <peter.maydell@linaro.org>
@@ -90,19 +79,13 @@ M: Edgar E. Iglesias <edgar.iglesias@gmail.com>
 S: Maintained
 F: target-cris/
 F: hw/cris/
-F: tests/tcg/cris/

 LM32
 M: Michael Walle <michael@walle.cc>
 S: Maintained
 F: target-lm32/
-F: disas/lm32.c
 F: hw/lm32/
-F: hw/*/lm32_*
-F: hw/*/milkymist-*
-F: include/hw/char/lm32_juart.h
-F: include/hw/lm32/
-F: tests/tcg/lm32/
+F: hw/char/lm32_*

 M68K
 S: Orphan
@@ -117,11 +100,9 @@ F: hw/microblaze/

 MIPS
 M: Aurelien Jarno <aurelien@aurel32.net>
-M: Leon Alrae <leon.alrae@imgtec.com>
-S: Maintained
+S: Odd Fixes
 F: target-mips/
 F: hw/mips/
-F: tests/tcg/mips/

 Moxie
 M: Anthony Green <green@moxielogic.com>
@@ -133,7 +114,6 @@ M: Jia Liu <proljc@gmail.com>
 S: Maintained
 F: target-openrisc/
 F: hw/openrisc/
-F: tests/tcg/openrisc/

 PowerPC
 M: Alexander Graf <agraf@suse.de>
@@ -157,7 +137,6 @@ F: hw/sh4/

 SPARC
 M: Blue Swirl <blauwirbel@gmail.com>
-M: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
 S: Maintained
 F: target-sparc/
 F: hw/sparc/
@@ -170,10 +149,8 @@ F: target-unicore32/
 F: hw/unicore32/

 X86
-M: Paolo Bonzini <pbonzini@redhat.com>
-M: Richard Henderson <rth@twiddle.net>
-M: Eduardo Habkost <ehabkost@redhat.com>
-S: Maintained
+M: qemu-devel@nongnu.org
+S: Odd Fixes
 F: target-i386/
 F: hw/i386/

@@ -183,13 +160,6 @@ W: http://wiki.osll.spb.ru/doku.php?id=etc:users:jcmvbkbc:qemu-target-xtensa
 S: Maintained
 F: target-xtensa/
 F: hw/xtensa/
-F: tests/tcg/xtensa/
-
-TriCore
-M: Bastian Koppelmann <kbastian@mail.uni-paderborn.de>
-S: Maintained
-F: target-tricore/
-F: hw/tricore/

 Guest CPU Cores (KVM):
 ----------------------
@@ -222,12 +192,9 @@ M: Cornelia Huck <cornelia.huck@de.ibm.com>
 M: Alexander Graf <agraf@suse.de>
 S: Maintained
 F: target-s390x/kvm.c
-F: hw/intc/s390_flic.c
-F: hw/intc/s390_flic_kvm.c
-F: include/hw/s390x/s390_flic.h
+F: hw/intc/s390_flic.[hc]

 X86
-M: Paolo Bonzini <pbonzini@redhat.com>
 M: Marcelo Tosatti <mtosatti@redhat.com>
 L: kvm@vger.kernel.org
 S: Supported
@@ -293,7 +260,7 @@ F: include/hw/arm/digic.h
 F: hw/*/digic*

 Gumstix
-L: qemu-devel@nongnu.org
+M: qemu-devel@nongnu.org
 S: Orphan
 F: hw/arm/gumstix.c

@@ -309,7 +276,7 @@ S: Maintained
 F: hw/arm/integratorcp.c

 Mainstone
-L: qemu-devel@nongnu.org
+M: qemu-devel@nongnu.org
 S: Orphan
 F: hw/arm/mainstone.c

@@ -415,7 +382,7 @@ S: Maintained
 F: hw/mips/mips_malta.c

 Mipssim
-L: qemu-devel@nongnu.org
+M: qemu-devel@nongnu.org
 S: Orphan
 F: hw/mips/mips_mipssim.c

@@ -486,8 +453,7 @@ F: hw/ppc/prep.c
 F: hw/pci-host/prep.[hc]
 F: hw/isa/pc87312.[hc]

-sPAPR (pseries)
-M: David Gibson <david@gibson.dropbear.id.au>
+sPAPR
 M: Alexander Graf <agraf@suse.de>
 L: qemu-ppc@nongnu.org
 S: Supported
@@ -519,13 +485,11 @@ SPARC Machines
 --------------
 Sun4m
 M: Blue Swirl <blauwirbel@gmail.com>
-M: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
 S: Maintained
 F: hw/sparc/sun4m.c

 Sun4u
 M: Blue Swirl <blauwirbel@gmail.com>
-M: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
 S: Maintained
 F: hw/sparc64/sun4u.c

@@ -541,7 +505,6 @@ S390 Virtio
 M: Alexander Graf <agraf@suse.de>
 S: Maintained
 F: hw/s390x/s390-*.c
-X: hw/s390x/*pci*.[hc]

 S390 Virtio-ccw
 M: Cornelia Huck <cornelia.huck@de.ibm.com>
@@ -552,9 +515,6 @@ F: hw/s390x/s390-virtio-ccw.c
 F: hw/s390x/css.[hc]
 F: hw/s390x/sclp*.[hc]
 F: hw/s390x/ipl*.[hc]
-F: hw/s390x/*pci*.[hc]
-F: include/hw/s390x/
-F: pc-bios/s390-ccw/
 T: git git://github.com/cohuck/qemu virtio-ccw-upstr

 UniCore32 Machines
@@ -568,6 +528,7 @@ F: hw/unicore32/
 X86 Machines
 ------------
 PC
+M: Anthony Liguori <aliguori@amazon.com>
 M: Michael S. Tsirkin <mst@redhat.com>
 S: Supported
 F: include/hw/i386/
@@ -591,41 +552,21 @@ Xtensa Machines
 sim
 M: Max Filippov <jcmvbkbc@gmail.com>
 S: Maintained
-F: hw/xtensa/sim.c
+F: hw/xtensa/xtensa_sim.c

-XTFPGA (LX60, LX200, ML605, KC705)
+Avnet LX60
 M: Max Filippov <jcmvbkbc@gmail.com>
 S: Maintained
-F: hw/xtensa/xtfpga.c
-F: hw/net/opencores_eth.c
+F: hw/xtensa/xtensa_lx60.c

 Devices
 -------
-EDU
-M: Jiri Slaby <jslaby@suse.cz>
-S: Maintained
-F: hw/misc/edu.c
-
 IDE
-M: John Snow <jsnow@redhat.com>
-L: qemu-block@nongnu.org
-S: Supported
+M: Kevin Wolf <kwolf@redhat.com>
+M: Stefan Hajnoczi <stefanha@redhat.com>
+S: Odd Fixes
 F: include/hw/ide.h
 F: hw/ide/
-F: hw/block/block.c
-F: hw/block/cdrom.c
-F: hw/block/hd-geometry.c
-F: tests/ide-test.c
-F: tests/ahci-test.c
-T: git git://github.com/jnsnow/qemu.git ide
-
-Floppy
-M: John Snow <jsnow@redhat.com>
-L: qemu-block@nongnu.org
-S: Supported
-F: hw/block/fdc.c
-F: include/hw/block/fdc.h
-T: git git://github.com/jnsnow/qemu.git ide

 OMAP
 M: Peter Maydell <peter.maydell@linaro.org>
@@ -673,18 +614,12 @@ USB
 M: Gerd Hoffmann <kraxel@redhat.com>
 S: Maintained
 F: hw/usb/*
-F: tests/usb-*-test.c
-
-USB (serial adapter)
-M: Gerd Hoffmann <kraxel@redhat.com>
-M: Samuel Thibault <samuel.thibault@ens-lyon.org>
-S: Maintained
-F: hw/usb/dev-serial.c
+F: tests/usb-hcd-ehci-test.c

 VFIO
 M: Alex Williamson <alex.williamson@redhat.com>
 S: Supported
-F: hw/vfio/*
+F: hw/misc/vfio.c

 vhost
 M: Michael S. Tsirkin <mst@redhat.com>
@@ -692,6 +627,7 @@ S: Supported
 F: hw/*/*vhost*

 virtio
+M: Anthony Liguori <aliguori@amazon.com>
 M: Michael S. Tsirkin <mst@redhat.com>
 S: Supported
 F: hw/*/virtio*
@@ -705,13 +641,10 @@ F: tests/virtio-9p-test.c
 T: git git://github.com/kvaneesh/QEMU.git

 virtio-blk
+M: Kevin Wolf <kwolf@redhat.com>
 M: Stefan Hajnoczi <stefanha@redhat.com>
-L: qemu-block@nongnu.org
 S: Supported
 F: hw/block/virtio-blk.c
-F: hw/block/dataplane/*
-F: hw/virtio/dataplane/*
-T: git git://github.com/stefanha/qemu.git block

 virtio-ccw
 M: Cornelia Huck <cornelia.huck@de.ibm.com>
@@ -725,25 +658,15 @@ M: Amit Shah <amit.shah@redhat.com>
 S: Supported
 F: hw/char/virtio-serial-bus.c
 F: hw/char/virtio-console.c
-F: include/hw/virtio/virtio-serial.h
-
-virtio-rng
-M: Amit Shah <amit.shah@redhat.com>
-S: Supported
-F: hw/virtio/virtio-rng.c
-F: include/hw/virtio/virtio-rng.h
-F: backends/rng*.c

 nvme
 M: Keith Busch <keith.busch@intel.com>
-L: qemu-block@nongnu.org
 S: Supported
 F: hw/block/nvme*
 F: tests/nvme-test.c

 megasas
 M: Hannes Reinecke <hare@suse.de>
-L: qemu-block@nongnu.org
 S: Supported
 F: hw/scsi/megasas.c
 F: hw/scsi/mfi.h
@@ -755,18 +678,6 @@ S: Maintained
 F: hw/*/xilinx_*
 F: include/hw/xilinx.h

-Vmware
-M: Dmitry Fleytman <dmitry@daynix.com>
-S: Maintained
-F: hw/net/vmxnet*
-F: hw/scsi/vmw_pvscsi*
-
-Rocker
-M: Scott Feldman <sfeldma@gmail.com>
-M: Jiri Pirko <jiri@resnulli.us>
-S: Maintained
-F: hw/net/rocker/
-
 Subsystems
 ----------
 Audio
@@ -779,65 +690,22 @@ F: tests/ac97-test.c
 F: tests/es1370-test.c
 F: tests/intel-hda-test.c

-Block layer core
+Block
 M: Kevin Wolf <kwolf@redhat.com>
-L: qemu-block@nongnu.org
+M: Stefan Hajnoczi <stefanha@redhat.com>
 S: Supported
 F: block*
 F: block/
 F: hw/block/
-F: include/block/
 F: qemu-img*
 F: qemu-io*
-F: tests/qemu-iotests/
 T: git git://repo.or.cz/qemu/kevin.git block
-
-Block I/O path
-M: Stefan Hajnoczi <stefanha@redhat.com>
-L: qemu-block@nongnu.org
-S: Supported
-F: async.c
-F: aio-*.c
-F: block/io.c
-F: migration/block*
 T: git git://github.com/stefanha/qemu.git block

-Block Jobs
-M: Jeff Cody <jcody@redhat.com>
-L: qemu-block@nongnu.org
-S: Supported
-F: blockjob.c
-F: include/block/blockjob.h
-F: block/backup.c
-F: block/commit.c
-F: block/stream.h
-F: block/mirror.c
-T: git git://github.com/codyprime/qemu-kvm-jtc.git block
-
-Block QAPI, monitor, command line
-M: Markus Armbruster <armbru@redhat.com>
-S: Supported
-F: blockdev.c
-F: block/qapi.c
-F: qapi/block*.json
-T: git git://repo.or.cz/qemu/armbru.git block-next
-
 Character Devices
-M: Paolo Bonzini <pbonzini@redhat.com>
+M: Anthony Liguori <aliguori@amazon.com>
 S: Maintained
 F: qemu-char.c
-F: backends/msmouse.c
-F: backends/testdev.c
-
-Character Devices (Braille)
-M: Samuel Thibault <samuel.thibault@ens-lyon.org>
-S: Maintained
-F: backends/baum.c
-
-Coverity model
-M: Markus Armbruster <armbru@redhat.com>
-S: Supported
-F: scripts/coverity-model.c

 CPU
 M: Andreas Färber <afaerber@suse.de>
@@ -859,7 +727,7 @@ S: Maintained
 F: device_tree.[ch]

 GDB stub
-L: qemu-devel@nongnu.org
+M: qemu-devel@nongnu.org
 S: Odd Fixes
 F: gdbstub*
 F: gdb-xml/
@@ -883,6 +751,7 @@ F: audio/spiceaudio.c
 F: hw/display/qxl*

 Graphics
+M: Anthony Liguori <aliguori@amazon.com>
 M: Gerd Hoffmann <kraxel@redhat.com>
 S: Odd Fixes
 F: ui/
@@ -894,11 +763,8 @@ S: Odd Fixes
 F: ui/cocoa.m

 Main loop
-M: Paolo Bonzini <pbonzini@redhat.com>
-S: Maintained
-F: cpus.c
-F: main-loop.c
-F: qemu-timer.c
+M: Anthony Liguori <aliguori@amazon.com>
+S: Supported
 F: vl.c

 Human Monitor (HMP)
@@ -910,8 +776,8 @@ F: hmp-commands.hx
 T: git git://repo.or.cz/qemu/qmp-unstable.git queue/qmp

 Network device layer
+M: Anthony Liguori <aliguori@amazon.com>
 M: Stefan Hajnoczi <stefanha@redhat.com>
-M: Jason Wang <jasowang@redhat.com>
 S: Maintained
 F: net/
 T: git git://github.com/stefanha/qemu.git net
@@ -932,43 +798,23 @@ F: nbd.*
 F: qemu-nbd.c
 T: git git://github.com/bonzini/qemu.git nbd-next

-NUMA
-M: Eduardo Habkost <ehabkost@redhat.com>
-S: Maintained
-F: numa.c
-F: include/sysemu/numa.h
-K: numa|NUMA
-K: srat|SRAT
-T: git git://github.com/ehabkost/qemu.git numa
-
 QAPI
-M: Markus Armbruster <armbru@redhat.com>
+M: Luiz Capitulino <lcapitulino@redhat.com>
 M: Michael Roth <mdroth@linux.vnet.ibm.com>
-S: Supported
+S: Maintained
 F: qapi/
-F: tests/qapi-schema/
-T: git git://repo.or.cz/qemu/armbru.git qapi-next
+T: git git://repo.or.cz/qemu/qmp-unstable.git queue/qmp

 QAPI Schema
 M: Eric Blake <eblake@redhat.com>
+M: Luiz Capitulino <lcapitulino@redhat.com>
 M: Markus Armbruster <armbru@redhat.com>
 S: Supported
 F: qapi-schema.json
-T: git git://repo.or.cz/qemu/armbru.git qapi-next
-
-QObject
-M: Luiz Capitulino <lcapitulino@redhat.com>
-S: Maintained
-F: qobject/
 T: git git://repo.or.cz/qemu/qmp-unstable.git queue/qmp

-QEMU Guest Agent
-M: Michael Roth <mdroth@linux.vnet.ibm.com>
-S: Maintained
-F: qga/
-T: git git://github.com/mdroth/qemu.git qga
-
 QOM
+M: Anthony Liguori <aliguori@amazon.com>
 M: Andreas Färber <afaerber@suse.de>
 S: Supported
 T: git git://github.com/afaerber/qemu-cpu.git qom-next
@@ -979,14 +825,13 @@ X: qom/cpu.c
 F: tests/qom-test.c

 QMP
-M: Markus Armbruster <armbru@redhat.com>
-S: Supported
+M: Luiz Capitulino <lcapitulino@redhat.com>
+S: Maintained
 F: qmp.c
 F: monitor.c
 F: qmp-commands.hx
-F: docs/qmp/
-F: scripts/qmp/
-T: git git://repo.or.cz/qemu/armbru.git qapi-next
+F: QMP/
+T: git git://repo.or.cz/qemu/qmp-unstable.git queue/qmp

 SLIRP
 M: Jan Kiszka <jan.kiszka@siemens.com>
@@ -1008,17 +853,6 @@ M: Blue Swirl <blauwirbel@gmail.com>
 S: Odd Fixes
 F: scripts/checkpatch.pl

-Migration
-M: Juan Quintela <quintela@redhat.com>
-M: Amit Shah <amit.shah@redhat.com>
-S: Maintained
-F: include/migration/
-F: migration/
-F: savevm.c
-F: arch_init.c
-F: scripts/vmstate-static-checker.py
-F: tests/vmstate-static-checker-data/
-
 Seccomp
 M: Eduardo Otubo <eduardo.otubo@profitbricks.com>
 S: Supported
@@ -1027,12 +861,6 @@ F: include/sysemu/seccomp.h

 Usermode Emulation
 ------------------
-Overall
-M: Riku Voipio <riku.voipio@iki.fi>
-S: Maintained
-F: thunk.c
-F: user-exec.c
-
 BSD user
 M: Blue Swirl <blauwirbel@gmail.com>
 S: Maintained
@@ -1046,6 +874,7 @@ F: linux-user/
 Tiny Code Generator (TCG)
 -------------------------
 Common code
+M: qemu-devel@nongnu.org
 M: Richard Henderson <rth@twiddle.net>
 S: Maintained
 F: tcg/
@@ -1062,7 +891,7 @@ S: Maintained
 F: tcg/arm/

 i386 target
-L: qemu-devel@nongnu.org
+M: qemu-devel@nongnu.org
 S: Maintained
 F: tcg/i386/

@@ -1130,38 +959,28 @@ Block drivers
 -------------
 VMDK
 M: Fam Zheng <famz@redhat.com>
-L: qemu-block@nongnu.org
 S: Supported
 F: block/vmdk.c

 RBD
 M: Josh Durgin <josh.durgin@inktank.com>
-M: Jeff Cody <jcody@redhat.com>
-L: qemu-block@nongnu.org
 S: Supported
 F: block/rbd.c
-T: git git://github.com/codyprime/qemu-kvm-jtc.git block

 Sheepdog
-M: Hitoshi Mitake <mitake.hitoshi@lab.ntt.co.jp>
+M: MORITA Kazutaka <morita.kazutaka@lab.ntt.co.jp>
 M: Liu Yuan <namei.unix@gmail.com>
-M: Jeff Cody <jcody@redhat.com>
-L: qemu-block@nongnu.org
 L: sheepdog@lists.wpkg.org
 S: Supported
 F: block/sheepdog.c
-T: git git://github.com/codyprime/qemu-kvm-jtc.git block

 VHDX
 M: Jeff Cody <jcody@redhat.com>
-L: qemu-block@nongnu.org
 S: Supported
 F: block/vhdx*
-T: git git://github.com/codyprime/qemu-kvm-jtc.git block

 VDI
 M: Stefan Weil <sw@weilnetz.de>
-L: qemu-block@nongnu.org
 S: Maintained
 F: block/vdi.c

@@ -1169,144 +988,15 @@ iSCSI
 M: Ronnie Sahlberg <ronniesahlberg@gmail.com>
 M: Paolo Bonzini <pbonzini@redhat.com>
 M: Peter Lieven <pl@kamp.de>
-L: qemu-block@nongnu.org
 S: Supported
 F: block/iscsi.c

 NFS
-M: Jeff Cody <jcody@redhat.com>
 M: Peter Lieven <pl@kamp.de>
-L: qemu-block@nongnu.org
 S: Maintained
 F: block/nfs.c
-T: git git://github.com/codyprime/qemu-kvm-jtc.git block

 SSH
 M: Richard W.M. Jones <rjones@redhat.com>
-M: Jeff Cody <jcody@redhat.com>
-L: qemu-block@nongnu.org
 S: Supported
 F: block/ssh.c
-T: git git://github.com/codyprime/qemu-kvm-jtc.git block
-
-ARCHIPELAGO
-M: Chrysostomos Nanakos <chris@include.gr>
-M: Jeff Cody <jcody@redhat.com>
-L: qemu-block@nongnu.org
-S: Maintained
-F: block/archipelago.c
-T: git git://github.com/codyprime/qemu-kvm-jtc.git block
-
-CURL
-M: Jeff Cody <jcody@redhat.com>
-L: qemu-block@nongnu.org
-S: Supported
-F: block/curl.c
-T: git git://github.com/codyprime/qemu-kvm-jtc.git block
-
-GLUSTER
-M: Jeff Cody <jcody@redhat.com>
-L: qemu-block@nongnu.org
-S: Supported
-F: block/gluster.c
-T: git git://github.com/codyprime/qemu-kvm-jtc.git block
-
-Null Block Driver
-M: Fam Zheng <famz@redhat.com>
-L: qemu-block@nongnu.org
-S: Supported
-F: block/null.c
-
-Bootdevice
-M: Gonglei <arei.gonglei@huawei.com>
-S: Maintained
-F: bootdevice.c
-
-Quorum
-M: Alberto Garcia <berto@igalia.com>
-S: Supported
-F: block/quorum.c
-L: qemu-block@nongnu.org
-
-blkverify
-M: Stefan Hajnoczi <stefanha@redhat.com>
-L: qemu-block@nongnu.org
-S: Supported
-F: block/blkverify.c
-
-bochs
-M: Stefan Hajnoczi <stefanha@redhat.com>
-L: qemu-block@nongnu.org
-S: Supported
-F: block/bochs.c
-
-cloop
-M: Stefan Hajnoczi <stefanha@redhat.com>
-L: qemu-block@nongnu.org
-S: Supported
-F: block/cloop.c
-
-dmg
-M: Stefan Hajnoczi <stefanha@redhat.com>
-L: qemu-block@nongnu.org
-S: Supported
-F: block/dmg.c
-
-parallels
-M: Stefan Hajnoczi <stefanha@redhat.com>
-L: qemu-block@nongnu.org
-S: Supported
-F: block/parallels.c
-
-qed
-M: Stefan Hajnoczi <stefanha@redhat.com>
-L: qemu-block@nongnu.org
-S: Supported
-F: block/qed.c
-
-raw
-M: Kevin Wolf <kwolf@redhat.com>
-L: qemu-block@nongnu.org
-S: Supported
-F: block/linux-aio.c
-F: block/raw-aio.h
-F: block/raw-posix.c
-F: block/raw-win32.c
-F: block/raw_bsd.c
-F: block/win32-aio.c
-
-qcow2
-M: Kevin Wolf <kwolf@redhat.com>
-L: qemu-block@nongnu.org
-S: Supported
-F: block/qcow2*
-
-qcow
-M: Kevin Wolf <kwolf@redhat.com>
-L: qemu-block@nongnu.org
-S: Supported
-F: block/qcow.c
-
-blkdebug
-M: Kevin Wolf <kwolf@redhat.com>
-L: qemu-block@nongnu.org
-S: Supported
-F: block/blkdebug.c
-
-vpc
-M: Kevin Wolf <kwolf@redhat.com>
-L: qemu-block@nongnu.org
-S: Supported
-F: block/vpc.c
-
-vvfat
-M: Kevin Wolf <kwolf@redhat.com>
-L: qemu-block@nongnu.org
-S: Supported
-F: block/vvfat.c
-
-Image format fuzzer
-M: Stefan Hajnoczi <stefanha@redhat.com>
-L: qemu-block@nongnu.org
-S: Supported
-F: tests/image-fuzzer/
--- a/77
+++ b/77
@@ -57,12 +57,6 @@ GENERATED_HEADERS += trace/generated-tracers-dtrace.h
 endif
 GENERATED_SOURCES += trace/generated-tracers.c

-GENERATED_HEADERS += trace/generated-tcg-tracers.h
-
-GENERATED_HEADERS += trace/generated-helpers-wrappers.h
-GENERATED_HEADERS += trace/generated-helpers.h
-GENERATED_SOURCES += trace/generated-helpers.c
-
 ifeq ($(findstring ust,$(TRACE_BACKENDS)),ust)
 GENERATED_HEADERS += trace/generated-ust-provider.h
 GENERATED_SOURCES += trace/generated-ust.c
@@ -84,9 +78,6 @@ HELPERS-$(CONFIG_LINUX) = qemu-bridge-helper$(EXESUF)

 ifdef BUILD_DOCS
 DOCS=qemu-doc.html qemu-tech.html qemu.1 qemu-img.1 qemu-nbd.8 qmp-commands.txt
-ifdef CONFIG_LINUX
-DOCS+=kvm_stat.1
-endif
 ifdef CONFIG_VIRTFS
 DOCS+=fsdev/virtfs-proxy-helper.1
 endif
@@ -112,9 +103,8 @@ endif
 -include $(SUBDIR_DEVICES_MAK_DEP)

 %/config-devices.mak: default-configs/%.mak
-	$(call quiet-command, \
-            $(SHELL) $(SRC_PATH)/scripts/make_device_config.sh $< $*-config-devices.mak.d $@ > $@.tmp, "  GEN   $@.tmp")
-	$(call quiet-command, if test -f $@; then \
+	$(call quiet-command,$(SHELL) $(SRC_PATH)/scripts/make_device_config.sh $@ $<, "  GEN   $@")
+	@if test -f $@; then \
 	  if cmp -s $@.old $@; then \
 	    mv $@.tmp $@; \
 	    cp -p $@ $@.old; \
@@ -130,7 +120,7 @@ endif
 	 else \
 	  mv $@.tmp $@; \
 	  cp -p $@ $@.old; \
-	 fi, "  GEN  $@");
+	 fi

 defconfig:
 	rm -f config-all-devices.mak $(SUBDIR_DEVICES_MAK)
@@ -201,9 +191,9 @@ ALL_SUBDIRS=$(TARGET_DIRS) $(patsubst %,pc-bios/%, $(ROMS))

 recurse-all: $(SUBDIR_RULES) $(ROMSUBDIR_RULES)

-$(BUILD_DIR)/version.o: $(SRC_PATH)/version.rc config-host.h | $(BUILD_DIR)/version.lo
+$(BUILD_DIR)/version.o: $(SRC_PATH)/version.rc $(BUILD_DIR)/config-host.h | $(BUILD_DIR)/version.lo
 	$(call quiet-command,$(WINDRES) -I$(BUILD_DIR) -o $@ $<,"  RC    version.o")
-$(BUILD_DIR)/version.lo: $(SRC_PATH)/version.rc config-host.h
+$(BUILD_DIR)/version.lo: $(SRC_PATH)/version.rc $(BUILD_DIR)/config-host.h
 	$(call quiet-command,$(WINDRES) -I$(BUILD_DIR) -o $@ $<,"  RC    version.lo")

 Makefile: $(version-obj-y) $(version-lobj-y)
@@ -212,7 +202,7 @@ Makefile: $(version-obj-y) $(version-lobj-y)
 # Build libraries

 libqemustub.a: $(stub-obj-y)
-libqemuutil.a: $(util-obj-y)
+libqemuutil.a: $(util-obj-y) qapi-types.o qapi-visit.o qapi-event.o

 block-modules = $(foreach o,$(block-obj-m),"$(basename $(subst /,-,$o))",) NULL
 util/module.o-cflags = -D'CONFIG_BLOCK_MODULES=$(block-modules)'
@@ -243,17 +233,17 @@ qapi-py = $(SRC_PATH)/scripts/qapi.py $(SRC_PATH)/scripts/ordereddict.py
 qga/qapi-generated/qga-qapi-types.c qga/qapi-generated/qga-qapi-types.h :\
 $(SRC_PATH)/qga/qapi-schema.json $(SRC_PATH)/scripts/qapi-types.py $(qapi-py)
 	$(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/qapi-types.py \
-		$(gen-out-type) -o qga/qapi-generated -p "qga-" $<, \
+		$(gen-out-type) -o qga/qapi-generated -p "qga-" -i $<, \
 		"  GEN   $@")
 qga/qapi-generated/qga-qapi-visit.c qga/qapi-generated/qga-qapi-visit.h :\
 $(SRC_PATH)/qga/qapi-schema.json $(SRC_PATH)/scripts/qapi-visit.py $(qapi-py)
 	$(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/qapi-visit.py \
-		$(gen-out-type) -o qga/qapi-generated -p "qga-" $<, \
+		$(gen-out-type) -o qga/qapi-generated -p "qga-" -i $<, \
 		"  GEN   $@")
 qga/qapi-generated/qga-qmp-commands.h qga/qapi-generated/qga-qmp-marshal.c :\
 $(SRC_PATH)/qga/qapi-schema.json $(SRC_PATH)/scripts/qapi-commands.py $(qapi-py)
 	$(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/qapi-commands.py \
-		$(gen-out-type) -o qga/qapi-generated -p "qga-" $<, \
+		$(gen-out-type) -o qga/qapi-generated -p "qga-" -i $<, \
 		"  GEN   $@")

 qapi-modules = $(SRC_PATH)/qapi-schema.json $(SRC_PATH)/qapi/common.json \
@@ -263,22 +253,22 @@ qapi-modules = $(SRC_PATH)/qapi-schema.json $(SRC_PATH)/qapi/common.json \
 qapi-types.c qapi-types.h :\
 $(qapi-modules) $(SRC_PATH)/scripts/qapi-types.py $(qapi-py)
 	$(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/qapi-types.py \
-		$(gen-out-type) -o "." -b $<, \
+		$(gen-out-type) -o "." -b -i $<, \
 		"  GEN   $@")
 qapi-visit.c qapi-visit.h :\
 $(qapi-modules) $(SRC_PATH)/scripts/qapi-visit.py $(qapi-py)
 	$(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/qapi-visit.py \
-		$(gen-out-type) -o "." -b $<, \
+		$(gen-out-type) -o "." -b -i $<, \
 		"  GEN   $@")
 qapi-event.c qapi-event.h :\
 $(qapi-modules) $(SRC_PATH)/scripts/qapi-event.py $(qapi-py)
 	$(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/qapi-event.py \
-		$(gen-out-type) -o "." $<, \
+		$(gen-out-type) -o "." -b -i $<, \
 		"  GEN   $@")
 qmp-commands.h qmp-marshal.c :\
 $(qapi-modules) $(SRC_PATH)/scripts/qapi-commands.py $(qapi-py)
 	$(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/qapi-commands.py \
-		$(gen-out-type) -o "." -m $<, \
+		$(gen-out-type) -o "." -m -i $<, \
 		"  GEN   $@")

 QGALIB_GEN=$(addprefix qga/qapi-generated/, qga-qapi-types.h qga-qapi-visit.h qga-qmp-commands.h)
@@ -296,7 +286,6 @@ clean:
 	rm -f fsdev/*.pod
 	rm -rf .libs */.libs
 	rm -f qemu-img-cmds.h
-	rm -f ui/shader/*-vert.h ui/shader/*-frag.h
 	@# May not be present in GENERATED_HEADERS
 	rm -f trace/generated-tracers-dtrace.dtrace*
 	rm -f trace/generated-tracers-dtrace.h*
@@ -318,8 +307,8 @@ qemu-%.tar.bz2:

 distclean: clean
 	rm -f config-host.mak config-host.h* config-host.ld $(DOCS) qemu-options.texi qemu-img-cmds.texi qemu-monitor.texi
-	rm -f config-all-devices.mak config-all-disas.mak config.status
-	rm -f po/*.mo tests/qemu-iotests/common.env
+	rm -f config-all-devices.mak config-all-disas.mak
+	rm -f po/*.mo
 	rm -f roms/seabios/config.mak roms/vgabios/config.mak
 	rm -f qemu-doc.info qemu-doc.aux qemu-doc.cp qemu-doc.cps qemu-doc.dvi
 	rm -f qemu-doc.fn qemu-doc.fns qemu-doc.info qemu-doc.ky qemu-doc.kys
@@ -332,8 +321,8 @@ distclean: clean
 	rm -rf $$d || exit 1 ; \
        done
 	rm -Rf .sdk
-	if test -f pixman/config.log; then $(MAKE) -C pixman distclean; fi
-	if test -f dtc/version_gen.h; then $(MAKE) $(DTC_MAKE_ARGS) clean; fi
+	if test -f pixman/config.log; then make -C pixman distclean; fi
+	if test -f dtc/version_gen.h; then make $(DTC_MAKE_ARGS) clean; fi

 KEYMAPS=da     en-gb  et  fr     fr-ch  is  lt  modifiers  no  pt-br  sv \
 ar      de     en-us  fi  fr-be  hr     it  lv  nl         pl  ru     th \
@@ -389,8 +378,13 @@ ifneq (,$(findstring qemu-ga,$(TOOLS)))
 endif
 endif

+install-confdir:
+	$(INSTALL_DIR) "$(DESTDIR)$(qemu_confdir)"

-install: all $(if $(BUILD_DOCS),install-doc) \
+install-sysconfig: install-datadir install-confdir
+	$(INSTALL_DATA) $(SRC_PATH)/sysconfigs/target/target-x86_64.conf "$(DESTDIR)$(qemu_confdir)"
+
+install: all $(if $(BUILD_DOCS),install-doc) install-sysconfig \
 install-datadir install-localstatedir
 ifneq ($(TOOLS),)
 	$(call install-prog,$(TOOLS),$(DESTDIR)$(bindir))
@@ -418,7 +412,6 @@ endif
 	set -e; for x in $(KEYMAPS); do \
 		$(INSTALL_DATA) $(SRC_PATH)/pc-bios/keymaps/$$x "$(DESTDIR)$(qemu_datadir)/keymaps"; \
 	done
-	$(INSTALL_DATA) $(SRC_PATH)/trace-events "$(DESTDIR)$(qemu_datadir)/trace-events"
 	for d in $(TARGET_DIRS); do \
 	$(MAKE) $(SUBDIR_MAKEFLAGS) TARGET_DIR=$$d/ -C $$d $@ || exit 1 ; \
        done
@@ -437,22 +430,6 @@ cscope:
 	find "$(SRC_PATH)" -name "*.[chsS]" -print | sed 's,^\./,,' > ./cscope.files
 	cscope -b

-# opengl shader programs
-ui/shader/%-vert.h: $(SRC_PATH)/ui/shader/%.vert $(SRC_PATH)/scripts/shaderinclude.pl
-	@mkdir -p $(dir $@)
-	$(call quiet-command,\
-		perl $(SRC_PATH)/scripts/shaderinclude.pl $< > $@,\
-		"  VERT  $@")
-
-ui/shader/%-frag.h: $(SRC_PATH)/ui/shader/%.frag $(SRC_PATH)/scripts/shaderinclude.pl
-	@mkdir -p $(dir $@)
-	$(call quiet-command,\
-		perl $(SRC_PATH)/scripts/shaderinclude.pl $< > $@,\
-		"  FRAG  $@")
-
-ui/console-gl.o: $(SRC_PATH)/ui/console-gl.c \
-	ui/shader/texture-blit-vert.h ui/shader/texture-blit-frag.h
-
 # documentation
 MAKEINFO=makeinfo
 MAKEINFOFLAGS=--no-headers --no-split --number-sections
@@ -506,12 +483,6 @@ qemu-nbd.8: qemu-nbd.texi
 	  $(POD2MAN) --section=8 --center=" " --release=" " qemu-nbd.pod > $@, \
 	  "  GEN   $@")

-kvm_stat.1: scripts/kvm/kvm_stat.texi
-	$(call quiet-command, \
-	  perl -Ww -- $(SRC_PATH)/scripts/texi2pod.pl $< kvm_stat.pod && \
-	  $(POD2MAN) --section=1 --center=" " --release=" " kvm_stat.pod > $@, \
-	  "  GEN   $@")
-
 dvi: qemu-doc.dvi qemu-tech.dvi
 html: qemu-doc.html qemu-tech.html
 info: qemu-doc.info qemu-tech.info
@@ -544,7 +515,7 @@ installer: $(INSTALLER)
 INSTDIR=/tmp/qemu-nsis

 $(INSTALLER): $(SRC_PATH)/qemu.nsi
-	$(MAKE) install prefix=${INSTDIR}
+	make install prefix=${INSTDIR}
 ifdef SIGNCODE
 	(cd ${INSTDIR}; \
         for i in *.exe; do \
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -1,7 +1,7 @@
 #######################################################################
 # Common libraries for tools and emulators
 stub-obj-y = stubs/
-util-obj-y = util/ qobject/ qapi/ qapi-types.o qapi-visit.o qapi-event.o
+util-obj-y = util/ qobject/ qapi/ trace/

 #######################################################################
 # block-obj-y is code used by both qemu system emulation and qemu-img
@@ -12,6 +12,7 @@ block-obj-y += main-loop.o iohandler.o qemu-timer.o
 block-obj-$(CONFIG_POSIX) += aio-posix.o
 block-obj-$(CONFIG_WIN32) += aio-win32.o
 block-obj-y += block/
+block-obj-y += qapi-types.o qapi-visit.o qapi-event.o
 block-obj-y += qemu-io-cmds.o

 block-obj-y += qemu-coroutine.o qemu-coroutine-lock.o qemu-coroutine-io.o
@@ -48,16 +49,20 @@ common-obj-$(CONFIG_POSIX) += os-posix.o

 common-obj-$(CONFIG_LINUX) += fsdev/

-common-obj-y += migration/
+common-obj-y += migration.o migration-tcp.o
+common-obj-y += vmstate.o
+common-obj-y += qemu-file.o
+common-obj-$(CONFIG_RDMA) += migration-rdma.o
 common-obj-y += qemu-char.o #aio.o
-common-obj-y += page_cache.o
-common-obj-y += qjson.o
+common-obj-y += block-migration.o
+common-obj-y += page_cache.o xbzrle.o
+
+common-obj-$(CONFIG_POSIX) += migration-exec.o migration-unix.o migration-fd.o

 common-obj-$(CONFIG_SPICE) += spice-qemu-char.o

 common-obj-y += audio/
 common-obj-y += hw/
-common-obj-y += accel.o

 common-obj-y += ui/
 common-obj-y += bt-host.o bt-vhci.o
@@ -76,8 +81,6 @@ common-obj-$(CONFIG_SECCOMP) += qemu-seccomp.o

 common-obj-$(CONFIG_SMARTCARD_NSS) += $(libcacard-y)

-common-obj-$(CONFIG_FDT) += device_tree.o
-
 ######################################################################
 # qapi

@@ -85,6 +88,11 @@ common-obj-y += qmp-marshal.o
 common-obj-y += qmp.o hmp.o
 endif

+######################################################################
+# some qapi visitors are used by both system and user emulation:
+
+common-obj-y += qapi-visit.o qapi-types.o
+
 #######################################################################
 # Target-independent parts used in system and user emulation
 common-obj-y += qemu-log.o
@@ -98,15 +106,10 @@ common-obj-y += disas/
 version-obj-$(CONFIG_WIN32) += $(BUILD_DIR)/version.o
 version-lobj-$(CONFIG_WIN32) += $(BUILD_DIR)/version.lo

-######################################################################
-# tracing
-util-obj-y +=  trace/
-target-obj-y += trace/
-
 ######################################################################
 # guest agent

 # FIXME: a few definitions from qapi-types.o/qapi-visit.o are needed
 # by libqemuutil.a.  These should be moved to a separate .json schema.
-qga-obj-y = qga/
+qga-obj-y = qga/ qapi-types.o qapi-visit.o
 qga-vss-dll-obj-y = qga/
--- a/Makefile.target
+++ b/Makefile.target
@@ -1,7 +1,5 @@
 # -*- Mode: makefile -*-

-BUILD_DIR?=$(CURDIR)/..
-
 include ../config-host.mak
 include config-target.mak
 include config-devices.mak
@@ -40,7 +38,7 @@ config-target.h: config-target.h-timestamp
 config-target.h-timestamp: config-target.mak

 ifdef CONFIG_TRACE_SYSTEMTAP
-stap: $(QEMU_PROG).stp-installed $(QEMU_PROG).stp $(QEMU_PROG)-simpletrace.stp
+stap: $(QEMU_PROG).stp-installed $(QEMU_PROG).stp

 ifdef CONFIG_USER_ONLY
 TARGET_TYPE=user
@@ -66,13 +64,6 @@ $(QEMU_PROG).stp: $(SRC_PATH)/trace-events
 		--target-type=$(TARGET_TYPE) \
 		< $< > $@,"  GEN   $(TARGET_DIR)$(QEMU_PROG).stp")

-$(QEMU_PROG)-simpletrace.stp: $(SRC_PATH)/trace-events
-	$(call quiet-command,$(TRACETOOL) \
-		--format=simpletrace-stap \
-		--backends=$(TRACE_BACKENDS) \
-		--probe-prefix=qemu.$(TARGET_TYPE).$(TARGET_NAME) \
-		< $< > $@,"  GEN   $(TARGET_DIR)$(QEMU_PROG)-simpletrace.stp")
-
 else
 stap:
 endif
@@ -85,7 +76,7 @@ all: $(PROGS) stap
 #########################################################
 # cpu emulator library
 obj-y = exec.o translate-all.o cpu-exec.o
-obj-y += tcg/tcg.o tcg/tcg-op.o tcg/optimize.o
+obj-y += tcg/tcg.o tcg/optimize.o
 obj-$(CONFIG_TCG_INTERPRETER) += tci.o
 obj-$(CONFIG_TCG_INTERPRETER) += disas/tci.o
 obj-y += fpu/softfloat.o
@@ -129,13 +120,14 @@ endif #CONFIG_BSD_USER
 # System emulator target
 ifdef CONFIG_SOFTMMU
 obj-y += arch_init.o cpus.o monitor.o gdbstub.o balloon.o ioport.o numa.o
-obj-y += qtest.o bootdevice.o
+obj-y += qtest.o
 obj-y += hw/
+obj-$(CONFIG_FDT) += device_tree.o
 obj-$(CONFIG_KVM) += kvm-all.o
 obj-y += memory.o savevm.o cputlb.o
 obj-y += memory_mapping.o
 obj-y += dump.o
-LIBS := $(libs_softmmu) $(LIBS)
+LIBS+=$(libs_softmmu)

 # xen support
 obj-$(CONFIG_XEN) += xen-common.o
@@ -160,31 +152,20 @@ endif # CONFIG_SOFTMMU
 dummy := $(call unnest-vars,,obj-y)
 all-obj-y := $(obj-y)

-target-obj-y :=
 block-obj-y :=
 common-obj-y :=
 include $(SRC_PATH)/Makefile.objs
-dummy := $(call unnest-vars,,target-obj-y)
-target-obj-y-save := $(target-obj-y)
 dummy := $(call unnest-vars,.., \
               block-obj-y \
               block-obj-m \
               common-obj-y \
               common-obj-m)
-target-obj-y := $(target-obj-y-save)
 all-obj-y += $(common-obj-y)
-all-obj-y += $(target-obj-y)
 all-obj-$(CONFIG_SOFTMMU) += $(block-obj-y)

-$(QEMU_PROG_BUILD): config-devices.mak
-
 # build either PROG or PROGW
 $(QEMU_PROG_BUILD): $(all-obj-y) ../libqemuutil.a ../libqemustub.a
-	$(call LINK, $(filter-out %.mak, $^))
-ifdef CONFIG_DARWIN
-	$(call quiet-command,Rez -append $(SRC_PATH)/pc-bios/qemu.rsrc -o $@,"  REZ   $(TARGET_DIR)$@")
-	$(call quiet-command,SetFile -a C $@,"  SETFILE $(TARGET_DIR)$@")
-endif
+	$(call LINK,$^)

 gdbstub-xml.c: $(TARGET_XML_FILES) $(SRC_PATH)/scripts/feature_to_c.sh
 	$(call quiet-command,rm -f $@ && $(SHELL) $(SRC_PATH)/scripts/feature_to_c.sh $@ $(TARGET_XML_FILES),"  GEN   $(TARGET_DIR)$@")
@@ -210,7 +191,6 @@ endif
 ifdef CONFIG_TRACE_SYSTEMTAP
 	$(INSTALL_DIR) "$(DESTDIR)$(qemu_datadir)/../systemtap/tapset"
 	$(INSTALL_DATA) $(QEMU_PROG).stp-installed "$(DESTDIR)$(qemu_datadir)/../systemtap/tapset/$(QEMU_PROG).stp"
-	$(INSTALL_DATA) $(QEMU_PROG)-simpletrace.stp "$(DESTDIR)$(qemu_datadir)/../systemtap/tapset/$(QEMU_PROG)-simpletrace.stp"
 endif

 GENERATED_HEADERS += config-target.h
--- a/2
+++ b/2
@@ -1 +1 @@
-2.3.50
+2.1.3
--- a/accel.c
+++ b/accel.c
@@ -1,157 +0,0 @@
-/*
- * QEMU System Emulator, accelerator interfaces
- *
- * Copyright (c) 2003-2008 Fabrice Bellard
- * Copyright (c) 2014 Red Hat Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-#include "sysemu/accel.h"
-#include "hw/boards.h"
-#include "qemu-common.h"
-#include "sysemu/arch_init.h"
-#include "sysemu/sysemu.h"
-#include "sysemu/kvm.h"
-#include "sysemu/qtest.h"
-#include "hw/xen/xen.h"
-#include "qom/object.h"
-#include "hw/boards.h"
-
-int tcg_tb_size;
-static bool tcg_allowed = true;
-
-static int tcg_init(MachineState *ms)
-{
-    tcg_exec_init(tcg_tb_size * 1024 * 1024);
-    return 0;
-}
-
-static const TypeInfo accel_type = {
-    .name = TYPE_ACCEL,
-    .parent = TYPE_OBJECT,
-    .class_size = sizeof(AccelClass),
-    .instance_size = sizeof(AccelState),
-};
-
-/* Lookup AccelClass from opt_name. Returns NULL if not found */
-static AccelClass *accel_find(const char *opt_name)
-{
-    char *class_name = g_strdup_printf(ACCEL_CLASS_NAME("%s"), opt_name);
-    AccelClass *ac = ACCEL_CLASS(object_class_by_name(class_name));
-    g_free(class_name);
-    return ac;
-}
-
-static int accel_init_machine(AccelClass *acc, MachineState *ms)
-{
-    ObjectClass *oc = OBJECT_CLASS(acc);
-    const char *cname = object_class_get_name(oc);
-    AccelState *accel = ACCEL(object_new(cname));
-    int ret;
-    ms->accelerator = accel;
-    *(acc->allowed) = true;
-    ret = acc->init_machine(ms);
-    if (ret < 0) {
-        ms->accelerator = NULL;
-        *(acc->allowed) = false;
-        object_unref(OBJECT(accel));
-    }
-    return ret;
-}
-
-int configure_accelerator(MachineState *ms)
-{
-    const char *p;
-    char buf[10];
-    int ret;
-    bool accel_initialised = false;
-    bool init_failed = false;
-    AccelClass *acc = NULL;
-
-    p = qemu_opt_get(qemu_get_machine_opts(), "accel");
-    if (p == NULL) {
-        /* Use the default "accelerator", tcg */
-        p = "tcg";
-    }
-
-    while (!accel_initialised && *p != '\0') {
-        if (*p == ':') {
-            p++;
-        }
-        p = get_opt_name(buf, sizeof(buf), p, ':');
-        acc = accel_find(buf);
-        if (!acc) {
-            fprintf(stderr, "\"%s\" accelerator not found.\n", buf);
-            continue;
-        }
-        if (acc->available && !acc->available()) {
-            printf("%s not supported for this target\n",
-                   acc->name);
-            continue;
-        }
-        ret = accel_init_machine(acc, ms);
-        if (ret < 0) {
-            init_failed = true;
-            fprintf(stderr, "failed to initialize %s: %s\n",
-                    acc->name,
-                    strerror(-ret));
-        } else {
-            accel_initialised = true;
-        }
-    }
-
-    if (!accel_initialised) {
-        if (!init_failed) {
-            fprintf(stderr, "No accelerator found!\n");
-        }
-        exit(1);
-    }
-
-    if (init_failed) {
-        fprintf(stderr, "Back to %s accelerator.\n", acc->name);
-    }
-
-    return !accel_initialised;
-}
-
-
-static void tcg_accel_class_init(ObjectClass *oc, void *data)
-{
-    AccelClass *ac = ACCEL_CLASS(oc);
-    ac->name = "tcg";
-    ac->init_machine = tcg_init;
-    ac->allowed = &tcg_allowed;
-}
-
-#define TYPE_TCG_ACCEL ACCEL_CLASS_NAME("tcg")
-
-static const TypeInfo tcg_accel_type = {
-    .name = TYPE_TCG_ACCEL,
-    .parent = TYPE_ACCEL,
-    .class_init = tcg_accel_class_init,
-};
-
-static void register_accel_types(void)
-{
-    type_register_static(&accel_type);
-    type_register_static(&tcg_accel_type);
-}
-
-type_init(register_accel_types);
--- a/aio-posix.c
+++ b/aio-posix.c
@@ -24,6 +24,7 @@ struct AioHandler
    IOHandler *io_read;
    IOHandler *io_write;
    int deleted;
+    int pollfds_idx;
    void *opaque;
    QLIST_ENTRY(AioHandler) node;
 };
@@ -72,7 +73,7 @@ void aio_set_fd_handler(AioContext *ctx,
    } else {
        if (node == NULL) {
            /* Alloc and insert if it's not already there */
-            node = g_new0(AioHandler, 1);
+            node = g_malloc0(sizeof(AioHandler));
            node->pfd.fd = fd;
            QLIST_INSERT_HEAD(&ctx->aio_handlers, node, node);

@@ -82,6 +83,7 @@ void aio_set_fd_handler(AioContext *ctx,
        node->io_read = io_read;
        node->io_write = io_write;
        node->opaque = opaque;
+        node->pollfds_idx = -1;

        node->pfd.events = (io_read ? G_IO_IN | G_IO_HUP | G_IO_ERR : 0);
        node->pfd.events |= (io_write ? G_IO_OUT | G_IO_ERR : 0);
@@ -98,11 +100,6 @@ void aio_set_event_notifier(AioContext *ctx,
                       (IOHandler *)io_read, NULL, notifier);
 }

-bool aio_prepare(AioContext *ctx)
-{
-    return false;
-}
-
 bool aio_pending(AioContext *ctx)
 {
    AioHandler *node;
@@ -122,20 +119,11 @@ bool aio_pending(AioContext *ctx)
    return false;
 }

-bool aio_dispatch(AioContext *ctx)
+static bool aio_dispatch(AioContext *ctx)
 {
    AioHandler *node;
    bool progress = false;

-    /*
-     * If there are callbacks left that have been queued, we need to call them.
-     * Do not call select in this case, because it is possible that the caller
-     * does not need a complete flush (as is the case for aio_poll loops).
-     */
-    if (aio_bh_poll(ctx)) {
-        progress = true;
-    }
-
    /*
     * We have to walk very carefully in case aio_set_fd_handler is
     * called while we're walking.
@@ -184,69 +172,34 @@ bool aio_dispatch(AioContext *ctx)
    return progress;
 }

-/* These thread-local variables are used only in a small part of aio_poll
- * around the call to the poll() system call.  In particular they are not
- * used while aio_poll is performing callbacks, which makes it much easier
- * to think about reentrancy!
- *
- * Stack-allocated arrays would be perfect but they have size limitations;
- * heap allocation is expensive enough that we want to reuse arrays across
- * calls to aio_poll().  And because poll() has to be called without holding
- * any lock, the arrays cannot be stored in AioContext.  Thread-local data
- * has none of the disadvantages of these three options.
- */
-static __thread GPollFD *pollfds;
-static __thread AioHandler **nodes;
-static __thread unsigned npfd, nalloc;
-static __thread Notifier pollfds_cleanup_notifier;
-
-static void pollfds_cleanup(Notifier *n, void *unused)
-{
-    g_assert(npfd == 0);
-    g_free(pollfds);
-    g_free(nodes);
-    nalloc = 0;
-}
-
-static void add_pollfd(AioHandler *node)
-{
-    if (npfd == nalloc) {
-        if (nalloc == 0) {
-            pollfds_cleanup_notifier.notify = pollfds_cleanup;
-            qemu_thread_atexit_add(&pollfds_cleanup_notifier);
-            nalloc = 8;
-        } else {
-            g_assert(nalloc <= INT_MAX);
-            nalloc *= 2;
-        }
-        pollfds = g_renew(GPollFD, pollfds, nalloc);
-        nodes = g_renew(AioHandler *, nodes, nalloc);
-    }
-    nodes[npfd] = node;
-    pollfds[npfd] = (GPollFD) {
-        .fd = node->pfd.fd,
-        .events = node->pfd.events,
-    };
-    npfd++;
-}
-
 bool aio_poll(AioContext *ctx, bool blocking)
 {
    AioHandler *node;
    bool was_dispatching;
-    int i, ret;
+    int ret;
    bool progress;
-    int64_t timeout;

-    aio_context_acquire(ctx);
    was_dispatching = ctx->dispatching;
    progress = false;

    /* aio_notify can avoid the expensive event_notifier_set if
     * everything (file descriptors, bottom halves, timers) will
-     * be re-evaluated before the next blocking poll().  This is
-     * already true when aio_poll is called with blocking == false;
-     * if blocking == true, it is only true after poll() returns.
+     * be re-evaluated before the next blocking poll().  This happens
+     * in two cases:
+     *
+     * 1) when aio_poll is called with blocking == false
+     *
+     * 2) when we are called after poll().  If we are called before
+     *    poll(), bottom halves will not be re-evaluated and we need
+     *    aio_notify() if blocking == true.
+     *
+     * The first aio_dispatch() only does something when AioContext is
+     * running as a GSource, and in that case aio_poll is used only
+     * with blocking == false, so this optimization is already quite
+     * effective.  However, the code is ugly and should be restructured
+     * to have a single aio_dispatch() call.  To do this, we need to
+     * reorganize aio_poll into a prepare/poll/dispatch model like
+     * glib's.
     *
     * If we're in a nested event loop, ctx->dispatching might be true.
     * In that case we can restore it just before returning, but we
@@ -254,46 +207,68 @@ bool aio_poll(AioContext *ctx, bool blocking)
     */
    aio_set_dispatching(ctx, !blocking);

+    /*
+     * If there are callbacks left that have been queued, we need to call them.
+     * Do not call select in this case, because it is possible that the caller
+     * does not need a complete flush (as is the case for aio_poll loops).
+     */
+    if (aio_bh_poll(ctx)) {
+        blocking = false;
+        progress = true;
+    }
+
+    /* Re-evaluate condition (1) above.  */
+    aio_set_dispatching(ctx, !blocking);
+    if (aio_dispatch(ctx)) {
+        progress = true;
+    }
+
+    if (progress && !blocking) {
+        goto out;
+    }
+
    ctx->walking_handlers++;

-    assert(npfd == 0);
+    g_array_set_size(ctx->pollfds, 0);

    /* fill pollfds */
    QLIST_FOREACH(node, &ctx->aio_handlers, node) {
+        node->pollfds_idx = -1;
        if (!node->deleted && node->pfd.events) {
-            add_pollfd(node);
+            GPollFD pfd = {
+                .fd = node->pfd.fd,
+                .events = node->pfd.events,
+            };
+            node->pollfds_idx = ctx->pollfds->len;
+            g_array_append_val(ctx->pollfds, pfd);
        }
    }

-    timeout = blocking ? aio_compute_timeout(ctx) : 0;
+    ctx->walking_handlers--;

    /* wait until next event */
-    if (timeout) {
-        aio_context_release(ctx);
-    }
-    ret = qemu_poll_ns((GPollFD *)pollfds, npfd, timeout);
-    if (timeout) {
-        aio_context_acquire(ctx);
-    }
+    ret = qemu_poll_ns((GPollFD *)ctx->pollfds->data,
+                         ctx->pollfds->len,
+                         blocking ? timerlistgroup_deadline_ns(&ctx->tlg) : 0);

    /* if we have any readable fds, dispatch event */
    if (ret > 0) {
-        for (i = 0; i < npfd; i++) {
-            nodes[i]->pfd.revents = pollfds[i].revents;
+        QLIST_FOREACH(node, &ctx->aio_handlers, node) {
+            if (node->pollfds_idx != -1) {
+                GPollFD *pfd = &g_array_index(ctx->pollfds, GPollFD,
+                                              node->pollfds_idx);
+                node->pfd.revents = pfd->revents;
+            }
        }
    }

-    npfd = 0;
-    ctx->walking_handlers--;
-
    /* Run dispatch even if there were no readable fds to run timers */
    aio_set_dispatching(ctx, true);
    if (aio_dispatch(ctx)) {
        progress = true;
    }

+out:
    aio_set_dispatching(ctx, was_dispatching);
-    aio_context_release(ctx);
-
    return progress;
 }
--- a/aio-win32.c
+++ b/aio-win32.c
@@ -22,80 +22,12 @@

 struct AioHandler {
    EventNotifier *e;
-    IOHandler *io_read;
-    IOHandler *io_write;
    EventNotifierHandler *io_notify;
    GPollFD pfd;
    int deleted;
-    void *opaque;
    QLIST_ENTRY(AioHandler) node;
 };

-void aio_set_fd_handler(AioContext *ctx,
-                        int fd,
-                        IOHandler *io_read,
-                        IOHandler *io_write,
-                        void *opaque)
-{
-    /* fd is a SOCKET in our case */
-    AioHandler *node;
-
-    QLIST_FOREACH(node, &ctx->aio_handlers, node) {
-        if (node->pfd.fd == fd && !node->deleted) {
-            break;
-        }
-    }
-
-    /* Are we deleting the fd handler? */
-    if (!io_read && !io_write) {
-        if (node) {
-            /* If the lock is held, just mark the node as deleted */
-            if (ctx->walking_handlers) {
-                node->deleted = 1;
-                node->pfd.revents = 0;
-            } else {
-                /* Otherwise, delete it for real.  We can't just mark it as
-                 * deleted because deleted nodes are only cleaned up after
-                 * releasing the walking_handlers lock.
-                 */
-                QLIST_REMOVE(node, node);
-                g_free(node);
-            }
-        }
-    } else {
-        HANDLE event;
-
-        if (node == NULL) {
-            /* Alloc and insert if it's not already there */
-            node = g_new0(AioHandler, 1);
-            node->pfd.fd = fd;
-            QLIST_INSERT_HEAD(&ctx->aio_handlers, node, node);
-        }
-
-        node->pfd.events = 0;
-        if (node->io_read) {
-            node->pfd.events |= G_IO_IN;
-        }
-        if (node->io_write) {
-            node->pfd.events |= G_IO_OUT;
-        }
-
-        node->e = &ctx->notifier;
-
-        /* Update handler with latest information */
-        node->opaque = opaque;
-        node->io_read = io_read;
-        node->io_write = io_write;
-
-        event = event_notifier_get_handle(&ctx->notifier);
-        WSAEventSelect(node->pfd.fd, event,
-                       FD_READ | FD_ACCEPT | FD_CLOSE |
-                       FD_CONNECT | FD_WRITE | FD_OOB);
-    }
-
-    aio_notify(ctx);
-}
-
 void aio_set_event_notifier(AioContext *ctx,
                            EventNotifier *e,
                            EventNotifierHandler *io_notify)
@@ -129,7 +61,7 @@ void aio_set_event_notifier(AioContext *ctx,
    } else {
        if (node == NULL) {
            /* Alloc and insert if it's not already there */
-            node = g_new0(AioHandler, 1);
+            node = g_malloc0(sizeof(AioHandler));
            node->e = e;
            node->pfd.fd = (uintptr_t)event_notifier_get_handle(e);
            node->pfd.events = G_IO_IN;
@@ -144,43 +76,6 @@ void aio_set_event_notifier(AioContext *ctx,
    aio_notify(ctx);
 }

-bool aio_prepare(AioContext *ctx)
-{
-    static struct timeval tv0;
-    AioHandler *node;
-    bool have_select_revents = false;
-    fd_set rfds, wfds;
-
-    /* fill fd sets */
-    FD_ZERO(&rfds);
-    FD_ZERO(&wfds);
-    QLIST_FOREACH(node, &ctx->aio_handlers, node) {
-        if (node->io_read) {
-            FD_SET ((SOCKET)node->pfd.fd, &rfds);
-        }
-        if (node->io_write) {
-            FD_SET ((SOCKET)node->pfd.fd, &wfds);
-        }
-    }
-
-    if (select(0, &rfds, &wfds, NULL, &tv0) > 0) {
-        QLIST_FOREACH(node, &ctx->aio_handlers, node) {
-            node->pfd.revents = 0;
-            if (FD_ISSET(node->pfd.fd, &rfds)) {
-                node->pfd.revents |= G_IO_IN;
-                have_select_revents = true;
-            }
-
-            if (FD_ISSET(node->pfd.fd, &wfds)) {
-                node->pfd.revents |= G_IO_OUT;
-                have_select_revents = true;
-            }
-        }
-    }
-
-    return have_select_revents;
-}
-
 bool aio_pending(AioContext *ctx)
 {
    AioHandler *node;
@@ -189,37 +84,47 @@ bool aio_pending(AioContext *ctx)
        if (node->pfd.revents && node->io_notify) {
            return true;
        }
-
-        if ((node->pfd.revents & G_IO_IN) && node->io_read) {
-            return true;
-        }
-        if ((node->pfd.revents & G_IO_OUT) && node->io_write) {
-            return true;
-        }
    }

    return false;
 }

-static bool aio_dispatch_handlers(AioContext *ctx, HANDLE event)
+bool aio_poll(AioContext *ctx, bool blocking)
 {
    AioHandler *node;
-    bool progress = false;
+    HANDLE events[MAXIMUM_WAIT_OBJECTS + 1];
+    bool progress;
+    int count;
+    int timeout;
+
+    progress = false;

    /*
+     * If there are callbacks left that have been queued, we need to call then.
+     * Do not call select in this case, because it is possible that the caller
+     * does not need a complete flush (as is the case for aio_poll loops).
+     */
+    if (aio_bh_poll(ctx)) {
+        blocking = false;
+        progress = true;
+    }
+
+    /* Run timers */
+    progress |= timerlistgroup_run_timers(&ctx->tlg);
+
+    /*
+     * Then dispatch any pending callbacks from the GSource.
+     *
     * We have to walk very carefully in case aio_set_fd_handler is
     * called while we're walking.
     */
    node = QLIST_FIRST(&ctx->aio_handlers);
    while (node) {
        AioHandler *tmp;
-        int revents = node->pfd.revents;

        ctx->walking_handlers++;

-        if (!node->deleted &&
-            (revents || event_notifier_get_handle(node->e) == event) &&
-            node->io_notify) {
+        if (node->pfd.revents && node->io_notify) {
            node->pfd.revents = 0;
            node->io_notify(node->e);

@@ -229,28 +134,6 @@ static bool aio_dispatch_handlers(AioContext *ctx, HANDLE event)
            }
        }

-        if (!node->deleted &&
-            (node->io_read || node->io_write)) {
-            node->pfd.revents = 0;
-            if ((revents & G_IO_IN) && node->io_read) {
-                node->io_read(node->opaque);
-                progress = true;
-            }
-            if ((revents & G_IO_OUT) && node->io_write) {
-                node->io_write(node->opaque);
-                progress = true;
-            }
-
-            /* if the next select() will return an event, we have progressed */
-            if (event == event_notifier_get_handle(&ctx->notifier)) {
-                WSANETWORKEVENTS ev;
-                WSAEnumNetworkEvents(node->pfd.fd, event, &ev);
-                if (ev.lNetworkEvents) {
-                    progress = true;
-                }
-            }
-        }
-
        tmp = node;
        node = QLIST_NEXT(node, node);

@@ -262,48 +145,10 @@ static bool aio_dispatch_handlers(AioContext *ctx, HANDLE event)
        }
    }

-    return progress;
-}
-
-bool aio_dispatch(AioContext *ctx)
-{
-    bool progress;
-
-    progress = aio_bh_poll(ctx);
-    progress |= aio_dispatch_handlers(ctx, INVALID_HANDLE_VALUE);
-    progress |= timerlistgroup_run_timers(&ctx->tlg);
-    return progress;
-}
-
-bool aio_poll(AioContext *ctx, bool blocking)
-{
-    AioHandler *node;
-    HANDLE events[MAXIMUM_WAIT_OBJECTS + 1];
-    bool was_dispatching, progress, have_select_revents, first;
-    int count;
-    int timeout;
-
-    aio_context_acquire(ctx);
-    have_select_revents = aio_prepare(ctx);
-    if (have_select_revents) {
-        blocking = false;
+    if (progress && !blocking) {
+        return true;
    }

-    was_dispatching = ctx->dispatching;
-    progress = false;
-
-    /* aio_notify can avoid the expensive event_notifier_set if
-     * everything (file descriptors, bottom halves, timers) will
-     * be re-evaluated before the next blocking poll().  This is
-     * already true when aio_poll is called with blocking == false;
-     * if blocking == true, it is only true after poll() returns.
-     *
-     * If we're in a nested event loop, ctx->dispatching might be true.
-     * In that case we can restore it just before returning, but we
-     * have to clear it now.
-     */
-    aio_set_dispatching(ctx, !blocking);
-
    ctx->walking_handlers++;

    /* fill fd sets */
@@ -315,47 +160,64 @@ bool aio_poll(AioContext *ctx, bool blocking)
    }

    ctx->walking_handlers--;
-    first = true;

    /* wait until next event */
    while (count > 0) {
-        HANDLE event;
        int ret;

-        timeout = blocking
-            ? qemu_timeout_ns_to_ms(aio_compute_timeout(ctx)) : 0;
-        if (timeout) {
-            aio_context_release(ctx);
-        }
+        timeout = blocking ?
+            qemu_timeout_ns_to_ms(timerlistgroup_deadline_ns(&ctx->tlg)) : 0;
        ret = WaitForMultipleObjects(count, events, FALSE, timeout);
-        if (timeout) {
-            aio_context_acquire(ctx);
-        }
-        aio_set_dispatching(ctx, true);
-
-        if (first && aio_bh_poll(ctx)) {
-            progress = true;
-        }
-        first = false;

        /* if we have any signaled events, dispatch event */
-        event = NULL;
-        if ((DWORD) (ret - WAIT_OBJECT_0) < count) {
-            event = events[ret - WAIT_OBJECT_0];
-            events[ret - WAIT_OBJECT_0] = events[--count];
-        } else if (!have_select_revents) {
+        if ((DWORD) (ret - WAIT_OBJECT_0) >= count) {
            break;
        }

-        have_select_revents = false;
        blocking = false;

-        progress |= aio_dispatch_handlers(ctx, event);
+        /* we have to walk very carefully in case
+         * aio_set_fd_handler is called while we're walking */
+        node = QLIST_FIRST(&ctx->aio_handlers);
+        while (node) {
+            AioHandler *tmp;
+
+            ctx->walking_handlers++;
+
+            if (!node->deleted &&
+                event_notifier_get_handle(node->e) == events[ret - WAIT_OBJECT_0] &&
+                node->io_notify) {
+                node->io_notify(node->e);
+
+                /* aio_notify() does not count as progress */
+                if (node->e != &ctx->notifier) {
+                    progress = true;
+                }
+            }
+
+            tmp = node;
+            node = QLIST_NEXT(node, node);
+
+            ctx->walking_handlers--;
+
+            if (!ctx->walking_handlers && tmp->deleted) {
+                QLIST_REMOVE(tmp, node);
+                g_free(tmp);
+            }
+        }
+
+        /* Try again, but only call each handler once.  */
+        events[ret - WAIT_OBJECT_0] = events[--count];
    }

-    progress |= timerlistgroup_run_timers(&ctx->tlg);
+    if (blocking) {
+        /* Run the timers a second time. We do this because otherwise aio_wait
+         * will not note progress - and will stop a drain early - if we have
+         * a timer that was not ready to run entering g_poll but is ready
+         * after g_poll. This will only do anything if a timer has expired.
+         */
+        progress |= timerlistgroup_run_timers(&ctx->tlg);
+    }

-    aio_set_dispatching(ctx, was_dispatching);
-    aio_context_release(ctx);
    return progress;
 }
--- a/arch_init.c
+++ b/arch_init.c
--- a/async.c
+++ b/async.c
@@ -44,12 +44,10 @@ struct QEMUBH {
 QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque)
 {
    QEMUBH *bh;
-    bh = g_new(QEMUBH, 1);
-    *bh = (QEMUBH){
-        .ctx = ctx,
-        .cb = cb,
-        .opaque = opaque,
-    };
+    bh = g_malloc0(sizeof(QEMUBH));
+    bh->ctx = ctx;
+    bh->cb = cb;
+    bh->opaque = opaque;
    qemu_mutex_lock(&ctx->bh_lock);
    bh->next = ctx->first_bh;
    /* Make sure that the members are ready before putting bh into list */
@@ -72,13 +70,12 @@ int aio_bh_poll(AioContext *ctx)
        /* Make sure that fetching bh happens before accessing its members */
        smp_read_barrier_depends();
        next = bh->next;
-        /* The atomic_xchg is paired with the one in qemu_bh_schedule.  The
-         * implicit memory barrier ensures that the callback sees all writes
-         * done by the scheduling thread.  It also ensures that the scheduling
-         * thread sees the zero before bh->cb has run, and thus will call
-         * aio_notify again if necessary.
-         */
-        if (!bh->deleted && atomic_xchg(&bh->scheduled, 0)) {
+        if (!bh->deleted && bh->scheduled) {
+            bh->scheduled = 0;
+            /* Paired with write barrier in bh schedule to ensure reading for
+             * idle & callbacks coming after bh's scheduling.
+             */
+            smp_rmb();
            if (!bh->idle)
                ret = 1;
            bh->idle = 0;
@@ -109,28 +106,33 @@ int aio_bh_poll(AioContext *ctx)

 void qemu_bh_schedule_idle(QEMUBH *bh)
 {
+    if (bh->scheduled)
+        return;
    bh->idle = 1;
    /* Make sure that idle & any writes needed by the callback are done
     * before the locations are read in the aio_bh_poll.
     */
-    atomic_mb_set(&bh->scheduled, 1);
+    smp_wmb();
+    bh->scheduled = 1;
 }

 void qemu_bh_schedule(QEMUBH *bh)
 {
    AioContext *ctx;

+    if (bh->scheduled)
+        return;
    ctx = bh->ctx;
    bh->idle = 0;
-    /* The memory barrier implicit in atomic_xchg makes sure that:
+    /* Make sure that:
     * 1. idle & any writes needed by the callback are done before the
     *    locations are read in the aio_bh_poll.
     * 2. ctx is loaded before scheduled is set and the callback has a chance
     *    to execute.
     */
-    if (atomic_xchg(&bh->scheduled, 1) == 0) {
-        aio_notify(ctx);
-    }
+    smp_mb();
+    bh->scheduled = 1;
+    aio_notify(ctx);
 }


@@ -150,48 +152,39 @@ void qemu_bh_delete(QEMUBH *bh)
    bh->deleted = 1;
 }

-int64_t
-aio_compute_timeout(AioContext *ctx)
+static gboolean
+aio_ctx_prepare(GSource *source, gint    *timeout)
 {
-    int64_t deadline;
-    int timeout = -1;
+    AioContext *ctx = (AioContext *) source;
    QEMUBH *bh;
+    int deadline;

+    /* We assume there is no timeout already supplied */
+    *timeout = -1;
    for (bh = ctx->first_bh; bh; bh = bh->next) {
        if (!bh->deleted && bh->scheduled) {
            if (bh->idle) {
                /* idle bottom halves will be polled at least
                 * every 10ms */
-                timeout = 10000000;
+                *timeout = 10;
            } else {
                /* non-idle bottom halves will be executed
                 * immediately */
-                return 0;
+                *timeout = 0;
+                return true;
            }
        }
    }

-    deadline = timerlistgroup_deadline_ns(&ctx->tlg);
+    deadline = qemu_timeout_ns_to_ms(timerlistgroup_deadline_ns(&ctx->tlg));
    if (deadline == 0) {
-        return 0;
-    } else {
-        return qemu_soonest_timeout(timeout, deadline);
-    }
-}
-
-static gboolean
-aio_ctx_prepare(GSource *source, gint    *timeout)
-{
-    AioContext *ctx = (AioContext *) source;
-
-    /* We assume there is no timeout already supplied */
-    *timeout = qemu_timeout_ns_to_ms(aio_compute_timeout(ctx));
-
-    if (aio_prepare(ctx)) {
        *timeout = 0;
+        return true;
+    } else {
+        *timeout = qemu_soonest_timeout(*timeout, deadline);
    }

-    return *timeout == 0;
+    return false;
 }

 static gboolean
@@ -216,7 +209,7 @@ aio_ctx_dispatch(GSource     *source,
    AioContext *ctx = (AioContext *) source;

    assert(callback == NULL);
-    aio_dispatch(ctx);
+    aio_poll(ctx, false);
    return true;
 }

@@ -230,6 +223,7 @@ aio_ctx_finalize(GSource     *source)
    event_notifier_cleanup(&ctx->notifier);
    rfifolock_destroy(&ctx->lock);
    qemu_mutex_destroy(&ctx->bh_lock);
+    g_array_free(ctx->pollfds, TRUE);
    timerlistgroup_deinit(&ctx->tlg);
 }

@@ -280,24 +274,24 @@ static void aio_timerlist_notify(void *opaque)
    aio_notify(opaque);
 }

-AioContext *aio_context_new(Error **errp)
+static void aio_rfifolock_cb(void *opaque)
+{
+    /* Kick owner thread in case they are blocked in aio_poll() */
+    aio_notify(opaque);
+}
+
+AioContext *aio_context_new(void)
 {
-    int ret;
    AioContext *ctx;
    ctx = (AioContext *) g_source_new(&aio_source_funcs, sizeof(AioContext));
-    ret = event_notifier_init(&ctx->notifier, false);
-    if (ret < 0) {
-        g_source_destroy(&ctx->source);
-        error_setg_errno(errp, -ret, "Failed to initialize event notifier");
-        return NULL;
-    }
-    g_source_set_can_recurse(&ctx->source, true);
-    aio_set_event_notifier(ctx, &ctx->notifier,
-                           (EventNotifierHandler *)
-                           event_notifier_test_and_clear);
+    ctx->pollfds = g_array_new(FALSE, FALSE, sizeof(GPollFD));
    ctx->thread_pool = NULL;
    qemu_mutex_init(&ctx->bh_lock);
-    rfifolock_init(&ctx->lock, NULL, NULL);
+    rfifolock_init(&ctx->lock, aio_rfifolock_cb, ctx);
+    event_notifier_init(&ctx->notifier, false);
+    aio_set_event_notifier(ctx, &ctx->notifier, 
+                           (EventNotifierHandler *)
+                           event_notifier_test_and_clear);
    timerlistgroup_init(&ctx->tlg, aio_timerlist_notify, ctx);

    return ctx;
--- a/backends/Makefile.objs
+++ b/backends/Makefile.objs
@@ -1,7 +1,7 @@
 common-obj-y += rng.o rng-egd.o
 common-obj-$(CONFIG_POSIX) += rng-random.o

-common-obj-y += msmouse.o testdev.o
+common-obj-y += msmouse.o
 common-obj-$(CONFIG_BRLAPI) += baum.o
 baum.o-cflags := $(SDL_CFLAGS)

--- a/backends/baum.c
+++ b/backends/baum.c
@@ -629,7 +629,7 @@ fail_handle:

 static void register_types(void)
 {
-    register_char_driver("braille", CHARDEV_BACKEND_KIND_BRAILLE, NULL);
+    register_char_driver_qapi("braille", CHARDEV_BACKEND_KIND_BRAILLE, NULL);
 }

 type_init(register_types);
--- a/backends/hostmem-file.c
+++ b/backends/hostmem-file.c
@@ -43,7 +43,7 @@ file_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
        return;
    }
    if (!fb->mem_path) {
-        error_setg(errp, "mem-path property not set");
+        error_setg(errp, "mem_path property not set");
        return;
    }
 #ifndef CONFIG_LINUX
--- a/backends/hostmem-ram.c
+++ b/backends/hostmem-ram.c
@@ -27,7 +27,7 @@ ram_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)

    path = object_get_canonical_path_component(OBJECT(backend));
    memory_region_init_ram(&backend->mr, OBJECT(backend), path,
-                           backend->size, errp);
+                           backend->size);
    g_free(path);
 }

--- a/backends/hostmem.c
+++ b/backends/hostmem.c
@@ -257,6 +257,15 @@ static void host_memory_backend_init(Object *obj)
                        host_memory_backend_set_policy, NULL, NULL, NULL);
 }

+static void host_memory_backend_finalize(Object *obj)
+{
+    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
+
+    if (memory_region_size(&backend->mr)) {
+        memory_region_destroy(&backend->mr);
+    }
+}
+
 MemoryRegion *
 host_memory_backend_get_memory(HostMemoryBackend *backend, Error **errp)
 {
@@ -335,26 +344,12 @@ host_memory_backend_memory_complete(UserCreatable *uc, Error **errp)
    }
 }

-static bool
-host_memory_backend_can_be_deleted(UserCreatable *uc, Error **errp)
-{
-    MemoryRegion *mr;
-
-    mr = host_memory_backend_get_memory(MEMORY_BACKEND(uc), errp);
-    if (memory_region_is_mapped(mr)) {
-        return false;
-    } else {
-        return true;
-    }
-}
-
 static void
 host_memory_backend_class_init(ObjectClass *oc, void *data)
 {
    UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc);

    ucc->complete = host_memory_backend_memory_complete;
-    ucc->can_be_deleted = host_memory_backend_can_be_deleted;
 }

 static const TypeInfo host_memory_backend_info = {
@@ -365,6 +360,7 @@ static const TypeInfo host_memory_backend_info = {
    .class_init = host_memory_backend_class_init,
    .instance_size = sizeof(HostMemoryBackend),
    .instance_init = host_memory_backend_init,
+    .instance_finalize = host_memory_backend_finalize,
    .interfaces = (InterfaceInfo[]) {
        { TYPE_USER_CREATABLE },
        { }
--- a/backends/msmouse.c
+++ b/backends/msmouse.c
@@ -79,7 +79,7 @@ CharDriverState *qemu_chr_open_msmouse(void)

 static void register_types(void)
 {
-    register_char_driver("msmouse", CHARDEV_BACKEND_KIND_MSMOUSE, NULL);
+    register_char_driver_qapi("msmouse", CHARDEV_BACKEND_KIND_MSMOUSE, NULL);
 }

 type_init(register_types);
--- a/backends/rng-random.c
+++ b/backends/rng-random.c
@@ -88,7 +88,11 @@ static char *rng_random_get_filename(Object *obj, Error **errp)
 {
    RndRandom *s = RNG_RANDOM(obj);

-    return g_strdup(s->filename);
+    if (s->filename) {
+        return g_strdup(s->filename);
+    }
+
+    return NULL;
 }

 static void rng_random_set_filename(Object *obj, const char *filename,
--- a/backends/testdev.c
+++ b/backends/testdev.c
@@ -1,131 +0,0 @@
-/*
- * QEMU Char Device for testsuite control
- *
- * Copyright (c) 2014 Red Hat, Inc.
- *
- * Author: Paolo Bonzini <pbonzini@redhat.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-#include "qemu-common.h"
-#include "sysemu/char.h"
-
-#define BUF_SIZE 32
-
-typedef struct {
-    CharDriverState *chr;
-    uint8_t in_buf[32];
-    int in_buf_used;
-} TestdevCharState;
-
-/* Try to interpret a whole incoming packet */
-static int testdev_eat_packet(TestdevCharState *testdev)
-{
-    const uint8_t *cur = testdev->in_buf;
-    int len = testdev->in_buf_used;
-    uint8_t c;
-    int arg;
-
-#define EAT(c) do { \
-    if (!len--) {   \
-        return 0;   \
-    }               \
-    c = *cur++;     \
-} while (0)
-
-    EAT(c);
-
-    while (isspace(c)) {
-        EAT(c);
-    }
-
-    arg = 0;
-    while (isdigit(c)) {
-        arg = arg * 10 + c - '0';
-        EAT(c);
-    }
-
-    while (isspace(c)) {
-        EAT(c);
-    }
-
-    switch (c) {
-    case 'q':
-        exit((arg << 1) | 1);
-        break;
-    default:
-        break;
-    }
-    return cur - testdev->in_buf;
-}
-
-/* The other end is writing some data.  Store it and try to interpret */
-static int testdev_write(CharDriverState *chr, const uint8_t *buf, int len)
-{
-    TestdevCharState *testdev = chr->opaque;
-    int tocopy, eaten, orig_len = len;
-
-    while (len) {
-        /* Complete our buffer as much as possible */
-        tocopy = MIN(len, BUF_SIZE - testdev->in_buf_used);
-
-        memcpy(testdev->in_buf + testdev->in_buf_used, buf, tocopy);
-        testdev->in_buf_used += tocopy;
-        buf += tocopy;
-        len -= tocopy;
-
-        /* Interpret it as much as possible */
-        while (testdev->in_buf_used > 0 &&
-               (eaten = testdev_eat_packet(testdev)) > 0) {
-            memmove(testdev->in_buf, testdev->in_buf + eaten,
-                    testdev->in_buf_used - eaten);
-            testdev->in_buf_used -= eaten;
-        }
-    }
-    return orig_len;
-}
-
-static void testdev_close(struct CharDriverState *chr)
-{
-    TestdevCharState *testdev = chr->opaque;
-
-    g_free(testdev);
-}
-
-CharDriverState *chr_testdev_init(void)
-{
-    TestdevCharState *testdev;
-    CharDriverState *chr;
-
-    testdev = g_malloc0(sizeof(TestdevCharState));
-    testdev->chr = chr = g_malloc0(sizeof(CharDriverState));
-
-    chr->opaque = testdev;
-    chr->chr_write = testdev_write;
-    chr->chr_close = testdev_close;
-
-    return chr;
-}
-
-static void register_types(void)
-{
-    register_char_driver("testdev", CHARDEV_BACKEND_KIND_TESTDEV, NULL);
-}
-
-type_init(register_types);
--- a/backends/tpm.c
+++ b/backends/tpm.c
@@ -36,7 +36,7 @@ void tpm_backend_destroy(TPMBackend *s)
 {
    TPMBackendClass *k = TPM_BACKEND_GET_CLASS(s);

-    k->ops->destroy(s);
+    return k->ops->destroy(s);
 }

 int tpm_backend_init(TPMBackend *s, TPMState *state,
@@ -96,20 +96,6 @@ bool tpm_backend_get_tpm_established_flag(TPMBackend *s)
    return k->ops->get_tpm_established_flag(s);
 }

-int tpm_backend_reset_tpm_established_flag(TPMBackend *s, uint8_t locty)
-{
-    TPMBackendClass *k = TPM_BACKEND_GET_CLASS(s);
-
-    return k->ops->reset_tpm_established_flag(s, locty);
-}
-
-TPMVersion tpm_backend_get_tpm_version(TPMBackend *s)
-{
-    TPMBackendClass *k = TPM_BACKEND_GET_CLASS(s);
-
-    return k->ops->get_tpm_version(s);
-}
-
 static bool tpm_backend_prop_get_opened(Object *obj, Error **errp)
 {
    TPMBackend *s = TPM_BACKEND(obj);
@@ -179,6 +165,17 @@ void tpm_backend_thread_end(TPMBackendThread *tbt)
    }
 }

+void tpm_backend_thread_tpm_reset(TPMBackendThread *tbt,
+                                  GFunc func, gpointer user_data)
+{
+    if (!tbt->pool) {
+        tpm_backend_thread_create(tbt, func, user_data);
+    } else {
+        g_thread_pool_push(tbt->pool, (gpointer)TPM_BACKEND_CMD_TPM_RESET,
+                           NULL);
+    }
+}
+
 static const TypeInfo tpm_backend_info = {
    .name = TYPE_TPM_BACKEND,
    .parent = TYPE_OBJECT,
--- a/balloon.c
+++ b/balloon.c
@@ -36,21 +36,6 @@ static QEMUBalloonEvent *balloon_event_fn;
 static QEMUBalloonStatus *balloon_stat_fn;
 static void *balloon_opaque;

-static bool have_balloon(Error **errp)
-{
-    if (kvm_enabled() && !kvm_has_sync_mmu()) {
-        error_set(errp, ERROR_CLASS_KVM_MISSING_CAP,
-                  "Using KVM without synchronous MMU, balloon unavailable");
-        return false;
-    }
-    if (!balloon_event_fn) {
-        error_set(errp, ERROR_CLASS_DEVICE_NOT_ACTIVE,
-                  "No balloon device has been activated");
-        return false;
-    }
-    return true;
-}
-
 int qemu_add_balloon_handler(QEMUBalloonEvent *event_func,
                             QEMUBalloonStatus *stat_func, void *opaque)
 {
@@ -58,6 +43,7 @@ int qemu_add_balloon_handler(QEMUBalloonEvent *event_func,
        /* We're already registered one balloon handler.  How many can
         * a guest really have?
         */
+        error_report("Another balloon device already registered");
        return -1;
    }
    balloon_event_fn = event_func;
@@ -76,30 +62,58 @@ void qemu_remove_balloon_handler(void *opaque)
    balloon_opaque = NULL;
 }

+static int qemu_balloon(ram_addr_t target)
+{
+    if (!balloon_event_fn) {
+        return 0;
+    }
+    trace_balloon_event(balloon_opaque, target);
+    balloon_event_fn(balloon_opaque, target);
+    return 1;
+}
+
+static int qemu_balloon_status(BalloonInfo *info)
+{
+    if (!balloon_stat_fn) {
+        return 0;
+    }
+    balloon_stat_fn(balloon_opaque, info);
+    return 1;
+}
+
 BalloonInfo *qmp_query_balloon(Error **errp)
 {
    BalloonInfo *info;

-    if (!have_balloon(errp)) {
+    if (kvm_enabled() && !kvm_has_sync_mmu()) {
+        error_set(errp, QERR_KVM_MISSING_CAP, "synchronous MMU", "balloon");
        return NULL;
    }

    info = g_malloc0(sizeof(*info));
-    balloon_stat_fn(balloon_opaque, info);
+
+    if (qemu_balloon_status(info) == 0) {
+        error_set(errp, QERR_DEVICE_NOT_ACTIVE, "balloon");
+        qapi_free_BalloonInfo(info);
+        return NULL;
+    }
+
    return info;
 }

-void qmp_balloon(int64_t target, Error **errp)
+void qmp_balloon(int64_t value, Error **errp)
 {
-    if (!have_balloon(errp)) {
+    if (kvm_enabled() && !kvm_has_sync_mmu()) {
+        error_set(errp, QERR_KVM_MISSING_CAP, "synchronous MMU", "balloon");
        return;
    }

-    if (target <= 0) {
+    if (value <= 0) {
        error_set(errp, QERR_INVALID_PARAMETER_VALUE, "target", "a size");
        return;
    }
-
-    trace_balloon_event(balloon_opaque, target);
-    balloon_event_fn(balloon_opaque, target);
+    
+    if (qemu_balloon(value) == 0) {
+        error_set(errp, QERR_DEVICE_NOT_ACTIVE, "balloon");
+    }
 }
--- a/block-migration.c
+++ b/block-migration.c
@@ -14,16 +14,13 @@
 */

 #include "qemu-common.h"
-#include "block/block.h"
-#include "qemu/error-report.h"
-#include "qemu/main-loop.h"
+#include "block/block_int.h"
 #include "hw/hw.h"
 #include "qemu/queue.h"
 #include "qemu/timer.h"
 #include "migration/block.h"
 #include "migration/migration.h"
 #include "sysemu/blockdev.h"
-#include "sysemu/block-backend.h"
 #include <assert.h>

 #define BLOCK_SIZE                       (1 << 20)
@@ -73,7 +70,7 @@ typedef struct BlkMigBlock {
    int nr_sectors;
    struct iovec iov;
    QEMUIOVector qiov;
-    BlockAIOCB *aiocb;
+    BlockDriverAIOCB *aiocb;

    /* Protected by block migration lock.  */
    int ret;
@@ -133,9 +130,9 @@ static void blk_send(QEMUFile *f, BlkMigBlock * blk)
                     | flags);

    /* device name */
-    len = strlen(bdrv_get_device_name(blk->bmds->bs));
+    len = strlen(blk->bmds->bs->device_name);
    qemu_put_byte(f, len);
-    qemu_put_buffer(f, (uint8_t *)bdrv_get_device_name(blk->bmds->bs), len);
+    qemu_put_buffer(f, (uint8_t *)blk->bmds->bs->device_name, len);

    /* if a block is zero we need to flush here since the network
     * bandwidth is now a lot higher than the storage device bandwidth.
@@ -189,7 +186,7 @@ static int bmds_aio_inflight(BlkMigDevState *bmds, int64_t sector)
 {
    int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;

-    if (sector < bdrv_nb_sectors(bmds->bs)) {
+    if ((sector << BDRV_SECTOR_BITS) < bdrv_getlength(bmds->bs)) {
        return !!(bmds->aio_bitmap[chunk / (sizeof(unsigned long) * 8)] &
            (1UL << (chunk % (sizeof(unsigned long) * 8))));
    } else {
@@ -226,7 +223,8 @@ static void alloc_aio_bitmap(BlkMigDevState *bmds)
    BlockDriverState *bs = bmds->bs;
    int64_t bitmap_size;

-    bitmap_size = bdrv_nb_sectors(bs) + BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
+    bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
+            BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
    bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8;

    bmds->aio_bitmap = g_malloc0(bitmap_size);
@@ -286,7 +284,7 @@ static int mig_save_device_bulk(QEMUFile *f, BlkMigDevState *bmds)
        nr_sectors = total_sectors - cur_sector;
    }

-    blk = g_new(BlkMigBlock, 1);
+    blk = g_malloc(sizeof(BlkMigBlock));
    blk->buf = g_malloc(BLOCK_SIZE);
    blk->bmds = bmds;
    blk->sector = cur_sector;
@@ -304,7 +302,7 @@ static int mig_save_device_bulk(QEMUFile *f, BlkMigDevState *bmds)
    blk->aiocb = bdrv_aio_readv(bs, cur_sector, &blk->qiov,
                                nr_sectors, blk_mig_read_cb, blk);

-    bdrv_reset_dirty_bitmap(bmds->dirty_bitmap, cur_sector, nr_sectors);
+    bdrv_reset_dirty(bs, cur_sector, nr_sectors);
    qemu_mutex_unlock_iothread();

    bmds->cur_sector = cur_sector + nr_sectors;
@@ -320,7 +318,7 @@ static int set_dirty_tracking(void)

    QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
        bmds->dirty_bitmap = bdrv_create_dirty_bitmap(bmds->bs, BLOCK_SIZE,
-                                                      NULL, NULL);
+                                                      NULL);
        if (!bmds->dirty_bitmap) {
            ret = -errno;
            goto fail;
@@ -346,31 +344,18 @@ static void unset_dirty_tracking(void)
    }
 }

-static void init_blk_migration(QEMUFile *f)
+static void init_blk_migration_it(void *opaque, BlockDriverState *bs)
 {
-    BlockDriverState *bs;
    BlkMigDevState *bmds;
    int64_t sectors;

-    block_mig_state.submitted = 0;
-    block_mig_state.read_done = 0;
-    block_mig_state.transferred = 0;
-    block_mig_state.total_sector_sum = 0;
-    block_mig_state.prev_progress = -1;
-    block_mig_state.bulk_completed = 0;
-    block_mig_state.zero_blocks = migrate_zero_blocks();
-
-    for (bs = bdrv_next(NULL); bs; bs = bdrv_next(bs)) {
-        if (bdrv_is_read_only(bs)) {
-            continue;
-        }
-
-        sectors = bdrv_nb_sectors(bs);
+    if (!bdrv_is_read_only(bs)) {
+        sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
        if (sectors <= 0) {
            return;
        }

-        bmds = g_new0(BlkMigDevState, 1);
+        bmds = g_malloc0(sizeof(BlkMigDevState));
        bmds->bs = bs;
        bmds->bulk_completed = 0;
        bmds->total_sectors = sectors;
@@ -385,15 +370,28 @@ static void init_blk_migration(QEMUFile *f)

        if (bmds->shared_base) {
            DPRINTF("Start migration for %s with shared base image\n",
-                    bdrv_get_device_name(bs));
+                    bs->device_name);
        } else {
-            DPRINTF("Start full migration for %s\n", bdrv_get_device_name(bs));
+            DPRINTF("Start full migration for %s\n", bs->device_name);
        }

        QSIMPLEQ_INSERT_TAIL(&block_mig_state.bmds_list, bmds, entry);
    }
 }

+static void init_blk_migration(QEMUFile *f)
+{
+    block_mig_state.submitted = 0;
+    block_mig_state.read_done = 0;
+    block_mig_state.transferred = 0;
+    block_mig_state.total_sector_sum = 0;
+    block_mig_state.prev_progress = -1;
+    block_mig_state.bulk_completed = 0;
+    block_mig_state.zero_blocks = migrate_zero_blocks();
+
+    bdrv_iterate(init_blk_migration_it, NULL);
+}
+
 /* Called with no lock taken.  */

 static int blk_mig_save_bulked_block(QEMUFile *f)
@@ -468,7 +466,7 @@ static int mig_save_device_dirty(QEMUFile *f, BlkMigDevState *bmds,
            } else {
                nr_sectors = BDRV_SECTORS_PER_DIRTY_CHUNK;
            }
-            blk = g_new(BlkMigBlock, 1);
+            blk = g_malloc(sizeof(BlkMigBlock));
            blk->buf = g_malloc(BLOCK_SIZE);
            blk->bmds = bmds;
            blk->sector = sector;
@@ -497,7 +495,7 @@ static int mig_save_device_dirty(QEMUFile *f, BlkMigDevState *bmds,
                g_free(blk);
            }

-            bdrv_reset_dirty_bitmap(bmds->dirty_bitmap, sector, nr_sectors);
+            bdrv_reset_dirty(bmds->bs, sector, nr_sectors);
            break;
        }
        sector += BDRV_SECTORS_PER_DIRTY_CHUNK;
@@ -583,7 +581,7 @@ static int64_t get_remaining_dirty(void)
    int64_t dirty = 0;

    QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
-        dirty += bdrv_get_dirty_count(bmds->dirty_bitmap);
+        dirty += bdrv_get_dirty_count(bmds->bs, bmds->dirty_bitmap);
    }

    return dirty << BDRV_SECTOR_BITS;
@@ -783,7 +781,6 @@ static int block_load(QEMUFile *f, void *opaque, int version_id)
    char device_name[256];
    int64_t addr;
    BlockDriverState *bs, *bs_prev = NULL;
-    BlockBackend *blk;
    uint8_t *buf;
    int64_t total_sectors = 0;
    int nr_sectors;
@@ -801,17 +798,16 @@ static int block_load(QEMUFile *f, void *opaque, int version_id)
            qemu_get_buffer(f, (uint8_t *)device_name, len);
            device_name[len] = '\0';

-            blk = blk_by_name(device_name);
-            if (!blk) {
+            bs = bdrv_find(device_name);
+            if (!bs) {
                fprintf(stderr, "Error unknown block device %s\n",
                        device_name);
                return -EINVAL;
            }
-            bs = blk_bs(blk);

            if (bs != bs_prev) {
                bs_prev = bs;
-                total_sectors = bdrv_nb_sectors(bs);
+                total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
                if (total_sectors <= 0) {
                    error_report("Error getting length of block device %s",
                                 device_name);
--- a/block.c
+++ b/block.c
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -1,29 +1,28 @@
-block-obj-y += raw_bsd.o qcow.o vdi.o vmdk.o cloop.o bochs.o vpc.o vvfat.o
+block-obj-y += raw_bsd.o cow.o qcow.o vdi.o vmdk.o cloop.o dmg.o bochs.o vpc.o vvfat.o
 block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-cache.o
 block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o
 block-obj-y += qed-check.o
 block-obj-$(CONFIG_VHDX) += vhdx.o vhdx-endian.o vhdx-log.o
 block-obj-$(CONFIG_QUORUM) += quorum.o
 block-obj-y += parallels.o blkdebug.o blkverify.o
-block-obj-y += block-backend.o snapshot.o qapi.o
+block-obj-y += snapshot.o qapi.o
 block-obj-$(CONFIG_WIN32) += raw-win32.o win32-aio.o
 block-obj-$(CONFIG_POSIX) += raw-posix.o
 block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
-block-obj-y += null.o mirror.o io.o

+ifeq ($(CONFIG_POSIX),y)
 block-obj-y += nbd.o nbd-client.o sheepdog.o
 block-obj-$(CONFIG_LIBISCSI) += iscsi.o
 block-obj-$(CONFIG_LIBNFS) += nfs.o
 block-obj-$(CONFIG_CURL) += curl.o
 block-obj-$(CONFIG_RBD) += rbd.o
 block-obj-$(CONFIG_GLUSTERFS) += gluster.o
-block-obj-$(CONFIG_ARCHIPELAGO) += archipelago.o
 block-obj-$(CONFIG_LIBSSH2) += ssh.o
-block-obj-y += accounting.o
-block-obj-y += write-threshold.o
+endif

 common-obj-y += stream.o
 common-obj-y += commit.o
+common-obj-y += mirror.o
 common-obj-y += backup.o

 iscsi.o-cflags     := $(LIBISCSI_CFLAGS)
@@ -36,8 +35,5 @@ gluster.o-cflags   := $(GLUSTERFS_CFLAGS)
 gluster.o-libs     := $(GLUSTERFS_LIBS)
 ssh.o-cflags       := $(LIBSSH2_CFLAGS)
 ssh.o-libs         := $(LIBSSH2_LIBS)
-archipelago.o-libs := $(ARCHIPELAGO_LIBS)
-block-obj-m        += dmg.o
-dmg.o-libs         := $(BZIP2_LIBS)
 qcow.o-libs        := -lz
 linux-aio.o-libs   := -laio
--- a/block/accounting.c
+++ b/block/accounting.c
@@ -1,63 +0,0 @@
-/*
- * QEMU System Emulator block accounting
- *
- * Copyright (c) 2011 Christoph Hellwig
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-#include "block/accounting.h"
-#include "block/block_int.h"
-#include "qemu/timer.h"
-
-void block_acct_start(BlockAcctStats *stats, BlockAcctCookie *cookie,
-                      int64_t bytes, enum BlockAcctType type)
-{
-    assert(type < BLOCK_MAX_IOTYPE);
-
-    cookie->bytes = bytes;
-    cookie->start_time_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
-    cookie->type = type;
-}
-
-void block_acct_done(BlockAcctStats *stats, BlockAcctCookie *cookie)
-{
-    assert(cookie->type < BLOCK_MAX_IOTYPE);
-
-    stats->nr_bytes[cookie->type] += cookie->bytes;
-    stats->nr_ops[cookie->type]++;
-    stats->total_time_ns[cookie->type] +=
-        qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - cookie->start_time_ns;
-}
-
-
-void block_acct_highest_sector(BlockAcctStats *stats, int64_t sector_num,
-                               unsigned int nb_sectors)
-{
-    if (stats->wr_highest_sector < sector_num + nb_sectors - 1) {
-        stats->wr_highest_sector = sector_num + nb_sectors - 1;
-    }
-}
-
-void block_acct_merge_done(BlockAcctStats *stats, enum BlockAcctType type,
-                      int num_requests)
-{
-    assert(type < BLOCK_MAX_IOTYPE);
-    stats->merged[type] += num_requests;
-}
--- a/block/archipelago.c
+++ b/block/archipelago.c
--- a/block/backup.c
+++ b/block/backup.c
@@ -37,8 +37,6 @@ typedef struct CowRequest {
 typedef struct BackupBlockJob {
    BlockJob common;
    BlockDriverState *target;
-    /* bitmap for sync=dirty-bitmap */
-    BdrvDirtyBitmap *sync_bitmap;
    MirrorSyncMode sync_mode;
    RateLimit limit;
    BlockdevOnError on_source_error;
@@ -229,110 +227,9 @@ static BlockErrorAction backup_error_action(BackupBlockJob *job,
    }
 }

-typedef struct {
-    int ret;
-} BackupCompleteData;
-
-static void backup_complete(BlockJob *job, void *opaque)
-{
-    BackupBlockJob *s = container_of(job, BackupBlockJob, common);
-    BackupCompleteData *data = opaque;
-
-    bdrv_unref(s->target);
-
-    block_job_completed(job, data->ret);
-    g_free(data);
-}
-
-static bool coroutine_fn yield_and_check(BackupBlockJob *job)
-{
-    if (block_job_is_cancelled(&job->common)) {
-        return true;
-    }
-
-    /* we need to yield so that bdrv_drain_all() returns.
-     * (without, VM does not reboot)
-     */
-    if (job->common.speed) {
-        uint64_t delay_ns = ratelimit_calculate_delay(&job->limit,
-                                                      job->sectors_read);
-        job->sectors_read = 0;
-        block_job_sleep_ns(&job->common, QEMU_CLOCK_REALTIME, delay_ns);
-    } else {
-        block_job_sleep_ns(&job->common, QEMU_CLOCK_REALTIME, 0);
-    }
-
-    if (block_job_is_cancelled(&job->common)) {
-        return true;
-    }
-
-    return false;
-}
-
-static int coroutine_fn backup_run_incremental(BackupBlockJob *job)
-{
-    bool error_is_read;
-    int ret = 0;
-    int clusters_per_iter;
-    uint32_t granularity;
-    int64_t sector;
-    int64_t cluster;
-    int64_t end;
-    int64_t last_cluster = -1;
-    BlockDriverState *bs = job->common.bs;
-    HBitmapIter hbi;
-
-    granularity = bdrv_dirty_bitmap_granularity(job->sync_bitmap);
-    clusters_per_iter = MAX((granularity / BACKUP_CLUSTER_SIZE), 1);
-    bdrv_dirty_iter_init(job->sync_bitmap, &hbi);
-
-    /* Find the next dirty sector(s) */
-    while ((sector = hbitmap_iter_next(&hbi)) != -1) {
-        cluster = sector / BACKUP_SECTORS_PER_CLUSTER;
-
-        /* Fake progress updates for any clusters we skipped */
-        if (cluster != last_cluster + 1) {
-            job->common.offset += ((cluster - last_cluster - 1) *
-                                   BACKUP_CLUSTER_SIZE);
-        }
-
-        for (end = cluster + clusters_per_iter; cluster < end; cluster++) {
-            do {
-                if (yield_and_check(job)) {
-                    return ret;
-                }
-                ret = backup_do_cow(bs, cluster * BACKUP_SECTORS_PER_CLUSTER,
-                                    BACKUP_SECTORS_PER_CLUSTER, &error_is_read);
-                if ((ret < 0) &&
-                    backup_error_action(job, error_is_read, -ret) ==
-                    BLOCK_ERROR_ACTION_REPORT) {
-                    return ret;
-                }
-            } while (ret < 0);
-        }
-
-        /* If the bitmap granularity is smaller than the backup granularity,
-         * we need to advance the iterator pointer to the next cluster. */
-        if (granularity < BACKUP_CLUSTER_SIZE) {
-            bdrv_set_dirty_iter(&hbi, cluster * BACKUP_SECTORS_PER_CLUSTER);
-        }
-
-        last_cluster = cluster - 1;
-    }
-
-    /* Play some final catchup with the progress meter */
-    end = DIV_ROUND_UP(job->common.len, BACKUP_CLUSTER_SIZE);
-    if (last_cluster + 1 < end) {
-        job->common.offset += ((end - last_cluster - 1) * BACKUP_CLUSTER_SIZE);
-    }
-
-    return ret;
-}
-
 static void coroutine_fn backup_run(void *opaque)
 {
    BackupBlockJob *job = opaque;
-    BackupCompleteData *data;
    BlockDriverState *bs = job->common.bs;
    BlockDriverState *target = job->target;
    BlockdevOnError on_target_error = job->on_target_error;
@@ -346,7 +243,8 @@ static void coroutine_fn backup_run(void *opaque)
    qemu_co_rwlock_init(&job->flush_rwlock);

    start = 0;
-    end = DIV_ROUND_UP(job->common.len, BACKUP_CLUSTER_SIZE);
+    end = DIV_ROUND_UP(job->common.len / BDRV_SECTOR_SIZE,
+                       BACKUP_SECTORS_PER_CLUSTER);

    job->bitmap = hbitmap_alloc(end, 0);

@@ -364,13 +262,28 @@ static void coroutine_fn backup_run(void *opaque)
            qemu_coroutine_yield();
            job->common.busy = true;
        }
-    } else if (job->sync_mode == MIRROR_SYNC_MODE_DIRTY_BITMAP) {
-        ret = backup_run_incremental(job);
    } else {
        /* Both FULL and TOP SYNC_MODE's require copying.. */
        for (; start < end; start++) {
            bool error_is_read;
-            if (yield_and_check(job)) {
+
+            if (block_job_is_cancelled(&job->common)) {
+                break;
+            }
+
+            /* we need to yield so that qemu_aio_flush() returns.
+             * (without, VM does not reboot)
+             */
+            if (job->common.speed) {
+                uint64_t delay_ns = ratelimit_calculate_delay(
+                        &job->limit, job->sectors_read);
+                job->sectors_read = 0;
+                block_job_sleep_ns(&job->common, QEMU_CLOCK_REALTIME, delay_ns);
+            } else {
+                block_job_sleep_ns(&job->common, QEMU_CLOCK_REALTIME, 0);
+            }
+
+            if (block_job_is_cancelled(&job->common)) {
                break;
            }

@@ -428,34 +341,19 @@ static void coroutine_fn backup_run(void *opaque)
    qemu_co_rwlock_wrlock(&job->flush_rwlock);
    qemu_co_rwlock_unlock(&job->flush_rwlock);

-    if (job->sync_bitmap) {
-        BdrvDirtyBitmap *bm;
-        if (ret < 0) {
-            /* Merge the successor back into the parent, delete nothing. */
-            bm = bdrv_reclaim_dirty_bitmap(bs, job->sync_bitmap, NULL);
-            assert(bm);
-        } else {
-            /* Everything is fine, delete this bitmap and install the backup. */
-            bm = bdrv_dirty_bitmap_abdicate(bs, job->sync_bitmap, NULL);
-            assert(bm);
-        }
-    }
    hbitmap_free(job->bitmap);

    bdrv_iostatus_disable(target);
-    bdrv_op_unblock_all(target, job->common.blocker);
+    bdrv_unref(target);

-    data = g_malloc(sizeof(*data));
-    data->ret = ret;
-    block_job_defer_to_main_loop(&job->common, backup_complete, data);
+    block_job_completed(&job->common, ret);
 }

 void backup_start(BlockDriverState *bs, BlockDriverState *target,
                  int64_t speed, MirrorSyncMode sync_mode,
-                  BdrvDirtyBitmap *sync_bitmap,
                  BlockdevOnError on_source_error,
                  BlockdevOnError on_target_error,
-                  BlockCompletionFunc *cb, void *opaque,
+                  BlockDriverCompletionFunc *cb, void *opaque,
                  Error **errp)
 {
    int64_t len;
@@ -464,11 +362,6 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target,
    assert(target);
    assert(cb);

-    if (bs == target) {
-        error_setg(errp, "Source and target cannot be the same");
-        return;
-    }
-
    if ((on_source_error == BLOCKDEV_ON_ERROR_STOP ||
         on_source_error == BLOCKDEV_ON_ERROR_ENOSPC) &&
        !bdrv_iostatus_is_enabled(bs)) {
@@ -476,73 +369,24 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target,
        return;
    }

-    if (!bdrv_is_inserted(bs)) {
-        error_setg(errp, "Device is not inserted: %s",
-                   bdrv_get_device_name(bs));
-        return;
-    }
-
-    if (!bdrv_is_inserted(target)) {
-        error_setg(errp, "Device is not inserted: %s",
-                   bdrv_get_device_name(target));
-        return;
-    }
-
-    if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_BACKUP_SOURCE, errp)) {
-        return;
-    }
-
-    if (bdrv_op_is_blocked(target, BLOCK_OP_TYPE_BACKUP_TARGET, errp)) {
-        return;
-    }
-
-    if (sync_mode == MIRROR_SYNC_MODE_DIRTY_BITMAP) {
-        if (!sync_bitmap) {
-            error_setg(errp, "must provide a valid bitmap name for "
-                             "\"dirty-bitmap\" sync mode");
-            return;
-        }
-
-        /* Create a new bitmap, and freeze/disable this one. */
-        if (bdrv_dirty_bitmap_create_successor(bs, sync_bitmap, errp) < 0) {
-            return;
-        }
-    } else if (sync_bitmap) {
-        error_setg(errp,
-                   "a sync_bitmap was provided to backup_run, "
-                   "but received an incompatible sync_mode (%s)",
-                   MirrorSyncMode_lookup[sync_mode]);
-        return;
-    }
-
    len = bdrv_getlength(bs);
    if (len < 0) {
        error_setg_errno(errp, -len, "unable to get length for '%s'",
                         bdrv_get_device_name(bs));
-        goto error;
+        return;
    }

    BackupBlockJob *job = block_job_create(&backup_job_driver, bs, speed,
                                           cb, opaque, errp);
    if (!job) {
-        goto error;
+        return;
    }

-    bdrv_op_block_all(target, job->common.blocker);
-
    job->on_source_error = on_source_error;
    job->on_target_error = on_target_error;
    job->target = target;
    job->sync_mode = sync_mode;
-    job->sync_bitmap = sync_mode == MIRROR_SYNC_MODE_DIRTY_BITMAP ?
-                       sync_bitmap : NULL;
    job->common.len = len;
    job->common.co = qemu_coroutine_create(backup_run);
    qemu_coroutine_enter(job->common.co, job);
-    return;
-
- error:
-    if (sync_bitmap) {
-        bdrv_reclaim_dirty_bitmap(bs, sync_bitmap, NULL);
-    }
 }
--- a/block/blkdebug.c
+++ b/block/blkdebug.c
@@ -26,10 +26,6 @@
 #include "qemu/config-file.h"
 #include "block/block_int.h"
 #include "qemu/module.h"
-#include "qapi/qmp/qbool.h"
-#include "qapi/qmp/qdict.h"
-#include "qapi/qmp/qint.h"
-#include "qapi/qmp/qstring.h"

 typedef struct BDRVBlkdebugState {
    int state;
@@ -41,7 +37,7 @@ typedef struct BDRVBlkdebugState {
 } BDRVBlkdebugState;

 typedef struct BlkdebugAIOCB {
-    BlockAIOCB common;
+    BlockDriverAIOCB common;
    QEMUBH *bh;
    int ret;
 } BlkdebugAIOCB;
@@ -52,8 +48,11 @@ typedef struct BlkdebugSuspendedReq {
    QLIST_ENTRY(BlkdebugSuspendedReq) next;
 } BlkdebugSuspendedReq;

+static void blkdebug_aio_cancel(BlockDriverAIOCB *blockacb);
+
 static const AIOCBInfo blkdebug_aiocb_info = {
-    .aiocb_size    = sizeof(BlkdebugAIOCB),
+    .aiocb_size = sizeof(BlkdebugAIOCB),
+    .cancel     = blkdebug_aio_cancel,
 };

 enum {
@@ -195,8 +194,6 @@ static const char *event_names[BLKDBG_EVENT_MAX] = {
    [BLKDBG_PWRITEV]                        = "pwritev",
    [BLKDBG_PWRITEV_ZERO]                   = "pwritev_zero",
    [BLKDBG_PWRITEV_DONE]                   = "pwritev_done",
-
-    [BLKDBG_EMPTY_IMAGE_PREPARE]            = "empty_image_prepare",
 };

 static int get_event_by_name(const char *name, BlkDebugEvent *event)
@@ -218,7 +215,7 @@ struct add_rule_data {
    int action;
 };

-static int add_rule(void *opaque, QemuOpts *opts, Error **errp)
+static int add_rule(QemuOpts *opts, void *opaque)
 {
    struct add_rule_data *d = opaque;
    BDRVBlkdebugState *s = d->s;
@@ -228,11 +225,7 @@ static int add_rule(void *opaque, QemuOpts *opts, Error **errp)

    /* Find the right event for the rule */
    event_name = qemu_opt_get(opts, "event");
-    if (!event_name) {
-        error_setg(errp, "Missing event name for rule");
-        return -1;
-    } else if (get_event_by_name(event_name, &event) < 0) {
-        error_setg(errp, "Invalid event name \"%s\"", event_name);
+    if (!event_name || get_event_by_name(event_name, &event) < 0) {
        return -1;
    }

@@ -318,20 +311,10 @@ static int read_config(BDRVBlkdebugState *s, const char *filename,

    d.s = s;
    d.action = ACTION_INJECT_ERROR;
-    qemu_opts_foreach(&inject_error_opts, add_rule, &d, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        ret = -EINVAL;
-        goto fail;
-    }
+    qemu_opts_foreach(&inject_error_opts, add_rule, &d, 0);

    d.action = ACTION_SET_STATE;
-    qemu_opts_foreach(&set_state_opts, add_rule, &d, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        ret = -EINVAL;
-        goto fail;
-    }
+    qemu_opts_foreach(&set_state_opts, add_rule, &d, 0);

    ret = 0;
 fail:
@@ -460,24 +443,32 @@ static void error_callback_bh(void *opaque)
    struct BlkdebugAIOCB *acb = opaque;
    qemu_bh_delete(acb->bh);
    acb->common.cb(acb->common.opaque, acb->ret);
-    qemu_aio_unref(acb);
+    qemu_aio_release(acb);
 }

-static BlockAIOCB *inject_error(BlockDriverState *bs,
-    BlockCompletionFunc *cb, void *opaque, BlkdebugRule *rule)
+static void blkdebug_aio_cancel(BlockDriverAIOCB *blockacb)
+{
+    BlkdebugAIOCB *acb = container_of(blockacb, BlkdebugAIOCB, common);
+    if (acb->bh) {
+        qemu_bh_delete(acb->bh);
+        acb->bh = NULL;
+    }
+    qemu_aio_release(acb);
+}
+
+static BlockDriverAIOCB *inject_error(BlockDriverState *bs,
+    BlockDriverCompletionFunc *cb, void *opaque, BlkdebugRule *rule)
 {
    BDRVBlkdebugState *s = bs->opaque;
    int error = rule->options.inject.error;
    struct BlkdebugAIOCB *acb;
    QEMUBH *bh;
-    bool immediately = rule->options.inject.immediately;

    if (rule->options.inject.once) {
-        QSIMPLEQ_REMOVE(&s->active_rules, rule, BlkdebugRule, active_next);
-        remove_rule(rule);
+        QSIMPLEQ_INIT(&s->active_rules);
    }

-    if (immediately) {
+    if (rule->options.inject.immediately) {
        return NULL;
    }

@@ -491,9 +482,9 @@ static BlockAIOCB *inject_error(BlockDriverState *bs,
    return &acb->common;
 }

-static BlockAIOCB *blkdebug_aio_readv(BlockDriverState *bs,
+static BlockDriverAIOCB *blkdebug_aio_readv(BlockDriverState *bs,
    int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-    BlockCompletionFunc *cb, void *opaque)
+    BlockDriverCompletionFunc *cb, void *opaque)
 {
    BDRVBlkdebugState *s = bs->opaque;
    BlkdebugRule *rule = NULL;
@@ -513,9 +504,9 @@ static BlockAIOCB *blkdebug_aio_readv(BlockDriverState *bs,
    return bdrv_aio_readv(bs->file, sector_num, qiov, nb_sectors, cb, opaque);
 }

-static BlockAIOCB *blkdebug_aio_writev(BlockDriverState *bs,
+static BlockDriverAIOCB *blkdebug_aio_writev(BlockDriverState *bs,
    int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-    BlockCompletionFunc *cb, void *opaque)
+    BlockDriverCompletionFunc *cb, void *opaque)
 {
    BDRVBlkdebugState *s = bs->opaque;
    BlkdebugRule *rule = NULL;
@@ -535,8 +526,8 @@ static BlockAIOCB *blkdebug_aio_writev(BlockDriverState *bs,
    return bdrv_aio_writev(bs->file, sector_num, qiov, nb_sectors, cb, opaque);
 }

-static BlockAIOCB *blkdebug_aio_flush(BlockDriverState *bs,
-    BlockCompletionFunc *cb, void *opaque)
+static BlockDriverAIOCB *blkdebug_aio_flush(BlockDriverState *bs,
+    BlockDriverCompletionFunc *cb, void *opaque)
 {
    BDRVBlkdebugState *s = bs->opaque;
    BlkdebugRule *rule = NULL;
@@ -719,60 +710,6 @@ static int64_t blkdebug_getlength(BlockDriverState *bs)
    return bdrv_getlength(bs->file);
 }

-static int blkdebug_truncate(BlockDriverState *bs, int64_t offset)
-{
-    return bdrv_truncate(bs->file, offset);
-}
-
-static void blkdebug_refresh_filename(BlockDriverState *bs)
-{
-    QDict *opts;
-    const QDictEntry *e;
-    bool force_json = false;
-
-    for (e = qdict_first(bs->options); e; e = qdict_next(bs->options, e)) {
-        if (strcmp(qdict_entry_key(e), "config") &&
-            strcmp(qdict_entry_key(e), "x-image") &&
-            strcmp(qdict_entry_key(e), "image") &&
-            strncmp(qdict_entry_key(e), "image.", strlen("image.")))
-        {
-            force_json = true;
-            break;
-        }
-    }
-
-    if (force_json && !bs->file->full_open_options) {
-        /* The config file cannot be recreated, so creating a plain filename
-         * is impossible */
-        return;
-    }
-
-    if (!force_json && bs->file->exact_filename[0]) {
-        snprintf(bs->exact_filename, sizeof(bs->exact_filename),
-                 "blkdebug:%s:%s",
-                 qdict_get_try_str(bs->options, "config") ?: "",
-                 bs->file->exact_filename);
-    }
-
-    opts = qdict_new();
-    qdict_put_obj(opts, "driver", QOBJECT(qstring_from_str("blkdebug")));
-
-    QINCREF(bs->file->full_open_options);
-    qdict_put_obj(opts, "image", QOBJECT(bs->file->full_open_options));
-
-    for (e = qdict_first(bs->options); e; e = qdict_next(bs->options, e)) {
-        if (strcmp(qdict_entry_key(e), "x-image") &&
-            strcmp(qdict_entry_key(e), "image") &&
-            strncmp(qdict_entry_key(e), "image.", strlen("image.")))
-        {
-            qobject_incref(qdict_entry_value(e));
-            qdict_put_obj(opts, qdict_entry_key(e), qdict_entry_value(e));
-        }
-    }
-
-    bs->full_open_options = opts;
-}
-
 static BlockDriver bdrv_blkdebug = {
    .format_name            = "blkdebug",
    .protocol_name          = "blkdebug",
@@ -782,8 +719,6 @@ static BlockDriver bdrv_blkdebug = {
    .bdrv_file_open         = blkdebug_open,
    .bdrv_close             = blkdebug_close,
    .bdrv_getlength         = blkdebug_getlength,
-    .bdrv_truncate          = blkdebug_truncate,
-    .bdrv_refresh_filename  = blkdebug_refresh_filename,

    .bdrv_aio_readv         = blkdebug_aio_readv,
    .bdrv_aio_writev        = blkdebug_aio_writev,
--- a/block/blkverify.c
+++ b/block/blkverify.c
@@ -10,8 +10,6 @@
 #include <stdarg.h>
 #include "qemu/sockets.h" /* for EINPROGRESS on Windows */
 #include "block/block_int.h"
-#include "qapi/qmp/qdict.h"
-#include "qapi/qmp/qstring.h"

 typedef struct {
    BlockDriverState *test_file;
@@ -19,7 +17,7 @@ typedef struct {

 typedef struct BlkverifyAIOCB BlkverifyAIOCB;
 struct BlkverifyAIOCB {
-    BlockAIOCB common;
+    BlockDriverAIOCB common;
    QEMUBH *bh;

    /* Request metadata */
@@ -29,6 +27,7 @@ struct BlkverifyAIOCB {

    int ret;                    /* first completed request's result */
    unsigned int done;          /* completion counter */
+    bool *finished;             /* completion signal for cancel */

    QEMUIOVector *qiov;         /* user I/O vector */
    QEMUIOVector raw_qiov;      /* cloned I/O vector for raw file */
@@ -37,8 +36,22 @@ struct BlkverifyAIOCB {
    void (*verify)(BlkverifyAIOCB *acb);
 };

+static void blkverify_aio_cancel(BlockDriverAIOCB *blockacb)
+{
+    BlkverifyAIOCB *acb = (BlkverifyAIOCB *)blockacb;
+    AioContext *aio_context = bdrv_get_aio_context(blockacb->bs);
+    bool finished = false;
+
+    /* Wait until request completes, invokes its callback, and frees itself */
+    acb->finished = &finished;
+    while (!finished) {
+        aio_poll(aio_context, true);
+    }
+}
+
 static const AIOCBInfo blkverify_aiocb_info = {
    .aiocb_size         = sizeof(BlkverifyAIOCB),
+    .cancel             = blkverify_aio_cancel,
 };

 static void GCC_FMT_ATTR(2, 3) blkverify_err(BlkverifyAIOCB *acb,
@@ -143,7 +156,6 @@ static int blkverify_open(BlockDriverState *bs, QDict *options, int flags,

    ret = 0;
 fail:
-    qemu_opts_del(opts);
    return ret;
 }

@@ -165,7 +177,7 @@ static int64_t blkverify_getlength(BlockDriverState *bs)
 static BlkverifyAIOCB *blkverify_aio_get(BlockDriverState *bs, bool is_write,
                                         int64_t sector_num, QEMUIOVector *qiov,
                                         int nb_sectors,
-                                         BlockCompletionFunc *cb,
+                                         BlockDriverCompletionFunc *cb,
                                         void *opaque)
 {
    BlkverifyAIOCB *acb = qemu_aio_get(&blkverify_aiocb_info, bs, cb, opaque);
@@ -179,6 +191,7 @@ static BlkverifyAIOCB *blkverify_aio_get(BlockDriverState *bs, bool is_write,
    acb->qiov = qiov;
    acb->buf = NULL;
    acb->verify = NULL;
+    acb->finished = NULL;
    return acb;
 }

@@ -192,7 +205,10 @@ static void blkverify_aio_bh(void *opaque)
        qemu_vfree(acb->buf);
    }
    acb->common.cb(acb->common.opaque, acb->ret);
-    qemu_aio_unref(acb);
+    if (acb->finished) {
+        *acb->finished = true;
+    }
+    qemu_aio_release(acb);
 }

 static void blkverify_aio_cb(void *opaque, int ret)
@@ -229,9 +245,9 @@ static void blkverify_verify_readv(BlkverifyAIOCB *acb)
    }
 }

-static BlockAIOCB *blkverify_aio_readv(BlockDriverState *bs,
+static BlockDriverAIOCB *blkverify_aio_readv(BlockDriverState *bs,
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockCompletionFunc *cb, void *opaque)
+        BlockDriverCompletionFunc *cb, void *opaque)
 {
    BDRVBlkverifyState *s = bs->opaque;
    BlkverifyAIOCB *acb = blkverify_aio_get(bs, false, sector_num, qiov,
@@ -249,9 +265,9 @@ static BlockAIOCB *blkverify_aio_readv(BlockDriverState *bs,
    return &acb->common;
 }

-static BlockAIOCB *blkverify_aio_writev(BlockDriverState *bs,
+static BlockDriverAIOCB *blkverify_aio_writev(BlockDriverState *bs,
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockCompletionFunc *cb, void *opaque)
+        BlockDriverCompletionFunc *cb, void *opaque)
 {
    BDRVBlkverifyState *s = bs->opaque;
    BlkverifyAIOCB *acb = blkverify_aio_get(bs, true, sector_num, qiov,
@@ -264,9 +280,9 @@ static BlockAIOCB *blkverify_aio_writev(BlockDriverState *bs,
    return &acb->common;
 }

-static BlockAIOCB *blkverify_aio_flush(BlockDriverState *bs,
-                                       BlockCompletionFunc *cb,
-                                       void *opaque)
+static BlockDriverAIOCB *blkverify_aio_flush(BlockDriverState *bs,
+                                             BlockDriverCompletionFunc *cb,
+                                             void *opaque)
 {
    BDRVBlkverifyState *s = bs->opaque;

@@ -304,32 +320,6 @@ static void blkverify_attach_aio_context(BlockDriverState *bs,
    bdrv_attach_aio_context(s->test_file, new_context);
 }

-static void blkverify_refresh_filename(BlockDriverState *bs)
-{
-    BDRVBlkverifyState *s = bs->opaque;
-
-    /* bs->file has already been refreshed */
-    bdrv_refresh_filename(s->test_file);
-
-    if (bs->file->full_open_options && s->test_file->full_open_options) {
-        QDict *opts = qdict_new();
-        qdict_put_obj(opts, "driver", QOBJECT(qstring_from_str("blkverify")));
-
-        QINCREF(bs->file->full_open_options);
-        qdict_put_obj(opts, "raw", QOBJECT(bs->file->full_open_options));
-        QINCREF(s->test_file->full_open_options);
-        qdict_put_obj(opts, "test", QOBJECT(s->test_file->full_open_options));
-
-        bs->full_open_options = opts;
-    }
-
-    if (bs->file->exact_filename[0] && s->test_file->exact_filename[0]) {
-        snprintf(bs->exact_filename, sizeof(bs->exact_filename),
-                 "blkverify:%s:%s",
-                 bs->file->exact_filename, s->test_file->exact_filename);
-    }
-}
-
 static BlockDriver bdrv_blkverify = {
    .format_name                      = "blkverify",
    .protocol_name                    = "blkverify",
@@ -339,7 +329,6 @@ static BlockDriver bdrv_blkverify = {
    .bdrv_file_open                   = blkverify_open,
    .bdrv_close                       = blkverify_close,
    .bdrv_getlength                   = blkverify_getlength,
-    .bdrv_refresh_filename            = blkverify_refresh_filename,

    .bdrv_aio_readv                   = blkverify_aio_readv,
    .bdrv_aio_writev                  = blkverify_aio_writev,
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -1,915 +0,0 @@
-/*
- * QEMU Block backends
- *
- * Copyright (C) 2014 Red Hat, Inc.
- *
- * Authors:
- *  Markus Armbruster <armbru@redhat.com>,
- *
- * This work is licensed under the terms of the GNU LGPL, version 2.1
- * or later.  See the COPYING.LIB file in the top-level directory.
- */
-
-#include "sysemu/block-backend.h"
-#include "block/block_int.h"
-#include "sysemu/blockdev.h"
-#include "qapi-event.h"
-
-/* Number of coroutines to reserve per attached device model */
-#define COROUTINE_POOL_RESERVATION 64
-
-struct BlockBackend {
-    char *name;
-    int refcnt;
-    BlockDriverState *bs;
-    DriveInfo *legacy_dinfo;    /* null unless created by drive_new() */
-    QTAILQ_ENTRY(BlockBackend) link; /* for blk_backends */
-
-    void *dev;                  /* attached device model, if any */
-    /* TODO change to DeviceState when all users are qdevified */
-    const BlockDevOps *dev_ops;
-    void *dev_opaque;
-};
-
-typedef struct BlockBackendAIOCB {
-    BlockAIOCB common;
-    QEMUBH *bh;
-    int ret;
-} BlockBackendAIOCB;
-
-static const AIOCBInfo block_backend_aiocb_info = {
-    .aiocb_size = sizeof(BlockBackendAIOCB),
-};
-
-static void drive_info_del(DriveInfo *dinfo);
-
-/* All the BlockBackends (except for hidden ones) */
-static QTAILQ_HEAD(, BlockBackend) blk_backends =
-    QTAILQ_HEAD_INITIALIZER(blk_backends);
-
-/*
- * Create a new BlockBackend with @name, with a reference count of one.
- * @name must not be null or empty.
- * Fail if a BlockBackend with this name already exists.
- * Store an error through @errp on failure, unless it's null.
- * Return the new BlockBackend on success, null on failure.
- */
-BlockBackend *blk_new(const char *name, Error **errp)
-{
-    BlockBackend *blk;
-
-    assert(name && name[0]);
-    if (!id_wellformed(name)) {
-        error_setg(errp, "Invalid device name");
-        return NULL;
-    }
-    if (blk_by_name(name)) {
-        error_setg(errp, "Device with id '%s' already exists", name);
-        return NULL;
-    }
-    if (bdrv_find_node(name)) {
-        error_setg(errp,
-                   "Device name '%s' conflicts with an existing node name",
-                   name);
-        return NULL;
-    }
-
-    blk = g_new0(BlockBackend, 1);
-    blk->name = g_strdup(name);
-    blk->refcnt = 1;
-    QTAILQ_INSERT_TAIL(&blk_backends, blk, link);
-    return blk;
-}
-
-/*
- * Create a new BlockBackend with a new BlockDriverState attached.
- * Otherwise just like blk_new(), which see.
- */
-BlockBackend *blk_new_with_bs(const char *name, Error **errp)
-{
-    BlockBackend *blk;
-    BlockDriverState *bs;
-
-    blk = blk_new(name, errp);
-    if (!blk) {
-        return NULL;
-    }
-
-    bs = bdrv_new_root();
-    blk->bs = bs;
-    bs->blk = blk;
-    return blk;
-}
-
-/*
- * Calls blk_new_with_bs() and then calls bdrv_open() on the BlockDriverState.
- *
- * Just as with bdrv_open(), after having called this function the reference to
- * @options belongs to the block layer (even on failure).
- *
- * TODO: Remove @filename and @flags; it should be possible to specify a whole
- * BDS tree just by specifying the @options QDict (or @reference,
- * alternatively). At the time of adding this function, this is not possible,
- * though, so callers of this function have to be able to specify @filename and
- * @flags.
- */
-BlockBackend *blk_new_open(const char *name, const char *filename,
-                           const char *reference, QDict *options, int flags,
-                           Error **errp)
-{
-    BlockBackend *blk;
-    int ret;
-
-    blk = blk_new_with_bs(name, errp);
-    if (!blk) {
-        QDECREF(options);
-        return NULL;
-    }
-
-    ret = bdrv_open(&blk->bs, filename, reference, options, flags, NULL, errp);
-    if (ret < 0) {
-        blk_unref(blk);
-        return NULL;
-    }
-
-    return blk;
-}
-
-static void blk_delete(BlockBackend *blk)
-{
-    assert(!blk->refcnt);
-    assert(!blk->dev);
-    if (blk->bs) {
-        assert(blk->bs->blk == blk);
-        blk->bs->blk = NULL;
-        bdrv_unref(blk->bs);
-        blk->bs = NULL;
-    }
-    /* Avoid double-remove after blk_hide_on_behalf_of_hmp_drive_del() */
-    if (blk->name[0]) {
-        QTAILQ_REMOVE(&blk_backends, blk, link);
-    }
-    g_free(blk->name);
-    drive_info_del(blk->legacy_dinfo);
-    g_free(blk);
-}
-
-static void drive_info_del(DriveInfo *dinfo)
-{
-    if (!dinfo) {
-        return;
-    }
-    qemu_opts_del(dinfo->opts);
-    g_free(dinfo->serial);
-    g_free(dinfo);
-}
-
-/*
- * Increment @blk's reference count.
- * @blk must not be null.
- */
-void blk_ref(BlockBackend *blk)
-{
-    blk->refcnt++;
-}
-
-/*
- * Decrement @blk's reference count.
- * If this drops it to zero, destroy @blk.
- * For convenience, do nothing if @blk is null.
- */
-void blk_unref(BlockBackend *blk)
-{
-    if (blk) {
-        assert(blk->refcnt > 0);
-        if (!--blk->refcnt) {
-            blk_delete(blk);
-        }
-    }
-}
-
-/*
- * Return the BlockBackend after @blk.
- * If @blk is null, return the first one.
- * Else, return @blk's next sibling, which may be null.
- *
- * To iterate over all BlockBackends, do
- * for (blk = blk_next(NULL); blk; blk = blk_next(blk)) {
- *     ...
- * }
- */
-BlockBackend *blk_next(BlockBackend *blk)
-{
-    return blk ? QTAILQ_NEXT(blk, link) : QTAILQ_FIRST(&blk_backends);
-}
-
-/*
- * Return @blk's name, a non-null string.
- * Wart: the name is empty iff @blk has been hidden with
- * blk_hide_on_behalf_of_hmp_drive_del().
- */
-const char *blk_name(BlockBackend *blk)
-{
-    return blk->name;
-}
-
-/*
- * Return the BlockBackend with name @name if it exists, else null.
- * @name must not be null.
- */
-BlockBackend *blk_by_name(const char *name)
-{
-    BlockBackend *blk;
-
-    assert(name);
-    QTAILQ_FOREACH(blk, &blk_backends, link) {
-        if (!strcmp(name, blk->name)) {
-            return blk;
-        }
-    }
-    return NULL;
-}
-
-/*
- * Return the BlockDriverState attached to @blk if any, else null.
- */
-BlockDriverState *blk_bs(BlockBackend *blk)
-{
-    return blk->bs;
-}
-
-/*
- * Return @blk's DriveInfo if any, else null.
- */
-DriveInfo *blk_legacy_dinfo(BlockBackend *blk)
-{
-    return blk->legacy_dinfo;
-}
-
-/*
- * Set @blk's DriveInfo to @dinfo, and return it.
- * @blk must not have a DriveInfo set already.
- * No other BlockBackend may have the same DriveInfo set.
- */
-DriveInfo *blk_set_legacy_dinfo(BlockBackend *blk, DriveInfo *dinfo)
-{
-    assert(!blk->legacy_dinfo);
-    return blk->legacy_dinfo = dinfo;
-}
-
-/*
- * Return the BlockBackend with DriveInfo @dinfo.
- * It must exist.
- */
-BlockBackend *blk_by_legacy_dinfo(DriveInfo *dinfo)
-{
-    BlockBackend *blk;
-
-    QTAILQ_FOREACH(blk, &blk_backends, link) {
-        if (blk->legacy_dinfo == dinfo) {
-            return blk;
-        }
-    }
-    abort();
-}
-
-/*
- * Hide @blk.
- * @blk must not have been hidden already.
- * Make attached BlockDriverState, if any, anonymous.
- * Once hidden, @blk is invisible to all functions that don't receive
- * it as argument.  For example, blk_by_name() won't return it.
- * Strictly for use by do_drive_del().
- * TODO get rid of it!
- */
-void blk_hide_on_behalf_of_hmp_drive_del(BlockBackend *blk)
-{
-    QTAILQ_REMOVE(&blk_backends, blk, link);
-    blk->name[0] = 0;
-    if (blk->bs) {
-        bdrv_make_anon(blk->bs);
-    }
-}
-
-/*
- * Attach device model @dev to @blk.
- * Return 0 on success, -EBUSY when a device model is attached already.
- */
-int blk_attach_dev(BlockBackend *blk, void *dev)
-/* TODO change to DeviceState *dev when all users are qdevified */
-{
-    if (blk->dev) {
-        return -EBUSY;
-    }
-    blk_ref(blk);
-    blk->dev = dev;
-    bdrv_iostatus_reset(blk->bs);
-    return 0;
-}
-
-/*
- * Attach device model @dev to @blk.
- * @blk must not have a device model attached already.
- * TODO qdevified devices don't use this, remove when devices are qdevified
- */
-void blk_attach_dev_nofail(BlockBackend *blk, void *dev)
-{
-    if (blk_attach_dev(blk, dev) < 0) {
-        abort();
-    }
-}
-
-/*
- * Detach device model @dev from @blk.
- * @dev must be currently attached to @blk.
- */
-void blk_detach_dev(BlockBackend *blk, void *dev)
-/* TODO change to DeviceState *dev when all users are qdevified */
-{
-    assert(blk->dev == dev);
-    blk->dev = NULL;
-    blk->dev_ops = NULL;
-    blk->dev_opaque = NULL;
-    bdrv_set_guest_block_size(blk->bs, 512);
-    blk_unref(blk);
-}
-
-/*
- * Return the device model attached to @blk if any, else null.
- */
-void *blk_get_attached_dev(BlockBackend *blk)
-/* TODO change to return DeviceState * when all users are qdevified */
-{
-    return blk->dev;
-}
-
-/*
- * Set @blk's device model callbacks to @ops.
- * @opaque is the opaque argument to pass to the callbacks.
- * This is for use by device models.
- */
-void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops,
-                     void *opaque)
-{
-    blk->dev_ops = ops;
-    blk->dev_opaque = opaque;
-}
-
-/*
- * Notify @blk's attached device model of media change.
- * If @load is true, notify of media load.
- * Else, notify of media eject.
- * Also send DEVICE_TRAY_MOVED events as appropriate.
- */
-void blk_dev_change_media_cb(BlockBackend *blk, bool load)
-{
-    if (blk->dev_ops && blk->dev_ops->change_media_cb) {
-        bool tray_was_closed = !blk_dev_is_tray_open(blk);
-
-        blk->dev_ops->change_media_cb(blk->dev_opaque, load);
-        if (tray_was_closed) {
-            /* tray open */
-            qapi_event_send_device_tray_moved(blk_name(blk),
-                                              true, &error_abort);
-        }
-        if (load) {
-            /* tray close */
-            qapi_event_send_device_tray_moved(blk_name(blk),
-                                              false, &error_abort);
-        }
-    }
-}
-
-/*
- * Does @blk's attached device model have removable media?
- * %true if no device model is attached.
- */
-bool blk_dev_has_removable_media(BlockBackend *blk)
-{
-    return !blk->dev || (blk->dev_ops && blk->dev_ops->change_media_cb);
-}
-
-/*
- * Notify @blk's attached device model of a media eject request.
- * If @force is true, the medium is about to be yanked out forcefully.
- */
-void blk_dev_eject_request(BlockBackend *blk, bool force)
-{
-    if (blk->dev_ops && blk->dev_ops->eject_request_cb) {
-        blk->dev_ops->eject_request_cb(blk->dev_opaque, force);
-    }
-}
-
-/*
- * Does @blk's attached device model have a tray, and is it open?
- */
-bool blk_dev_is_tray_open(BlockBackend *blk)
-{
-    if (blk->dev_ops && blk->dev_ops->is_tray_open) {
-        return blk->dev_ops->is_tray_open(blk->dev_opaque);
-    }
-    return false;
-}
-
-/*
- * Does @blk's attached device model have the medium locked?
- * %false if the device model has no such lock.
- */
-bool blk_dev_is_medium_locked(BlockBackend *blk)
-{
-    if (blk->dev_ops && blk->dev_ops->is_medium_locked) {
-        return blk->dev_ops->is_medium_locked(blk->dev_opaque);
-    }
-    return false;
-}
-
-/*
- * Notify @blk's attached device model of a backend size change.
- */
-void blk_dev_resize_cb(BlockBackend *blk)
-{
-    if (blk->dev_ops && blk->dev_ops->resize_cb) {
-        blk->dev_ops->resize_cb(blk->dev_opaque);
-    }
-}
-
-void blk_iostatus_enable(BlockBackend *blk)
-{
-    bdrv_iostatus_enable(blk->bs);
-}
-
-static int blk_check_byte_request(BlockBackend *blk, int64_t offset,
-                                  size_t size)
-{
-    int64_t len;
-
-    if (size > INT_MAX) {
-        return -EIO;
-    }
-
-    if (!blk_is_inserted(blk)) {
-        return -ENOMEDIUM;
-    }
-
-    len = blk_getlength(blk);
-    if (len < 0) {
-        return len;
-    }
-
-    if (offset < 0) {
-        return -EIO;
-    }
-
-    if (offset > len || len - offset < size) {
-        return -EIO;
-    }
-
-    return 0;
-}
-
-static int blk_check_request(BlockBackend *blk, int64_t sector_num,
-                             int nb_sectors)
-{
-    if (sector_num < 0 || sector_num > INT64_MAX / BDRV_SECTOR_SIZE) {
-        return -EIO;
-    }
-
-    if (nb_sectors < 0 || nb_sectors > INT_MAX / BDRV_SECTOR_SIZE) {
-        return -EIO;
-    }
-
-    return blk_check_byte_request(blk, sector_num * BDRV_SECTOR_SIZE,
-                                  nb_sectors * BDRV_SECTOR_SIZE);
-}
-
-int blk_read(BlockBackend *blk, int64_t sector_num, uint8_t *buf,
-             int nb_sectors)
-{
-    int ret = blk_check_request(blk, sector_num, nb_sectors);
-    if (ret < 0) {
-        return ret;
-    }
-
-    return bdrv_read(blk->bs, sector_num, buf, nb_sectors);
-}
-
-int blk_read_unthrottled(BlockBackend *blk, int64_t sector_num, uint8_t *buf,
-                         int nb_sectors)
-{
-    int ret = blk_check_request(blk, sector_num, nb_sectors);
-    if (ret < 0) {
-        return ret;
-    }
-
-    return bdrv_read_unthrottled(blk->bs, sector_num, buf, nb_sectors);
-}
-
-int blk_write(BlockBackend *blk, int64_t sector_num, const uint8_t *buf,
-              int nb_sectors)
-{
-    int ret = blk_check_request(blk, sector_num, nb_sectors);
-    if (ret < 0) {
-        return ret;
-    }
-
-    return bdrv_write(blk->bs, sector_num, buf, nb_sectors);
-}
-
-int blk_write_zeroes(BlockBackend *blk, int64_t sector_num,
-                     int nb_sectors, BdrvRequestFlags flags)
-{
-    int ret = blk_check_request(blk, sector_num, nb_sectors);
-    if (ret < 0) {
-        return ret;
-    }
-
-    return bdrv_write_zeroes(blk->bs, sector_num, nb_sectors, flags);
-}
-
-static void error_callback_bh(void *opaque)
-{
-    struct BlockBackendAIOCB *acb = opaque;
-    qemu_bh_delete(acb->bh);
-    acb->common.cb(acb->common.opaque, acb->ret);
-    qemu_aio_unref(acb);
-}
-
-static BlockAIOCB *abort_aio_request(BlockBackend *blk, BlockCompletionFunc *cb,
-                                     void *opaque, int ret)
-{
-    struct BlockBackendAIOCB *acb;
-    QEMUBH *bh;
-
-    acb = blk_aio_get(&block_backend_aiocb_info, blk, cb, opaque);
-    acb->ret = ret;
-
-    bh = aio_bh_new(blk_get_aio_context(blk), error_callback_bh, acb);
-    acb->bh = bh;
-    qemu_bh_schedule(bh);
-
-    return &acb->common;
-}
-
-BlockAIOCB *blk_aio_write_zeroes(BlockBackend *blk, int64_t sector_num,
-                                 int nb_sectors, BdrvRequestFlags flags,
-                                 BlockCompletionFunc *cb, void *opaque)
-{
-    int ret = blk_check_request(blk, sector_num, nb_sectors);
-    if (ret < 0) {
-        return abort_aio_request(blk, cb, opaque, ret);
-    }
-
-    return bdrv_aio_write_zeroes(blk->bs, sector_num, nb_sectors, flags,
-                                 cb, opaque);
-}
-
-int blk_pread(BlockBackend *blk, int64_t offset, void *buf, int count)
-{
-    int ret = blk_check_byte_request(blk, offset, count);
-    if (ret < 0) {
-        return ret;
-    }
-
-    return bdrv_pread(blk->bs, offset, buf, count);
-}
-
-int blk_pwrite(BlockBackend *blk, int64_t offset, const void *buf, int count)
-{
-    int ret = blk_check_byte_request(blk, offset, count);
-    if (ret < 0) {
-        return ret;
-    }
-
-    return bdrv_pwrite(blk->bs, offset, buf, count);
-}
-
-int64_t blk_getlength(BlockBackend *blk)
-{
-    return bdrv_getlength(blk->bs);
-}
-
-void blk_get_geometry(BlockBackend *blk, uint64_t *nb_sectors_ptr)
-{
-    bdrv_get_geometry(blk->bs, nb_sectors_ptr);
-}
-
-int64_t blk_nb_sectors(BlockBackend *blk)
-{
-    return bdrv_nb_sectors(blk->bs);
-}
-
-BlockAIOCB *blk_aio_readv(BlockBackend *blk, int64_t sector_num,
-                          QEMUIOVector *iov, int nb_sectors,
-                          BlockCompletionFunc *cb, void *opaque)
-{
-    int ret = blk_check_request(blk, sector_num, nb_sectors);
-    if (ret < 0) {
-        return abort_aio_request(blk, cb, opaque, ret);
-    }
-
-    return bdrv_aio_readv(blk->bs, sector_num, iov, nb_sectors, cb, opaque);
-}
-
-BlockAIOCB *blk_aio_writev(BlockBackend *blk, int64_t sector_num,
-                           QEMUIOVector *iov, int nb_sectors,
-                           BlockCompletionFunc *cb, void *opaque)
-{
-    int ret = blk_check_request(blk, sector_num, nb_sectors);
-    if (ret < 0) {
-        return abort_aio_request(blk, cb, opaque, ret);
-    }
-
-    return bdrv_aio_writev(blk->bs, sector_num, iov, nb_sectors, cb, opaque);
-}
-
-BlockAIOCB *blk_aio_flush(BlockBackend *blk,
-                          BlockCompletionFunc *cb, void *opaque)
-{
-    return bdrv_aio_flush(blk->bs, cb, opaque);
-}
-
-BlockAIOCB *blk_aio_discard(BlockBackend *blk,
-                            int64_t sector_num, int nb_sectors,
-                            BlockCompletionFunc *cb, void *opaque)
-{
-    int ret = blk_check_request(blk, sector_num, nb_sectors);
-    if (ret < 0) {
-        return abort_aio_request(blk, cb, opaque, ret);
-    }
-
-    return bdrv_aio_discard(blk->bs, sector_num, nb_sectors, cb, opaque);
-}
-
-void blk_aio_cancel(BlockAIOCB *acb)
-{
-    bdrv_aio_cancel(acb);
-}
-
-void blk_aio_cancel_async(BlockAIOCB *acb)
-{
-    bdrv_aio_cancel_async(acb);
-}
-
-int blk_aio_multiwrite(BlockBackend *blk, BlockRequest *reqs, int num_reqs)
-{
-    int i, ret;
-
-    for (i = 0; i < num_reqs; i++) {
-        ret = blk_check_request(blk, reqs[i].sector, reqs[i].nb_sectors);
-        if (ret < 0) {
-            return ret;
-        }
-    }
-
-    return bdrv_aio_multiwrite(blk->bs, reqs, num_reqs);
-}
-
-int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
-{
-    return bdrv_ioctl(blk->bs, req, buf);
-}
-
-BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf,
-                          BlockCompletionFunc *cb, void *opaque)
-{
-    return bdrv_aio_ioctl(blk->bs, req, buf, cb, opaque);
-}
-
-int blk_co_discard(BlockBackend *blk, int64_t sector_num, int nb_sectors)
-{
-    int ret = blk_check_request(blk, sector_num, nb_sectors);
-    if (ret < 0) {
-        return ret;
-    }
-
-    return bdrv_co_discard(blk->bs, sector_num, nb_sectors);
-}
-
-int blk_co_flush(BlockBackend *blk)
-{
-    return bdrv_co_flush(blk->bs);
-}
-
-int blk_flush(BlockBackend *blk)
-{
-    return bdrv_flush(blk->bs);
-}
-
-int blk_flush_all(void)
-{
-    return bdrv_flush_all();
-}
-
-void blk_drain_all(void)
-{
-    bdrv_drain_all();
-}
-
-BlockdevOnError blk_get_on_error(BlockBackend *blk, bool is_read)
-{
-    return bdrv_get_on_error(blk->bs, is_read);
-}
-
-BlockErrorAction blk_get_error_action(BlockBackend *blk, bool is_read,
-                                      int error)
-{
-    return bdrv_get_error_action(blk->bs, is_read, error);
-}
-
-void blk_error_action(BlockBackend *blk, BlockErrorAction action,
-                      bool is_read, int error)
-{
-    bdrv_error_action(blk->bs, action, is_read, error);
-}
-
-int blk_is_read_only(BlockBackend *blk)
-{
-    return bdrv_is_read_only(blk->bs);
-}
-
-int blk_is_sg(BlockBackend *blk)
-{
-    return bdrv_is_sg(blk->bs);
-}
-
-int blk_enable_write_cache(BlockBackend *blk)
-{
-    return bdrv_enable_write_cache(blk->bs);
-}
-
-void blk_set_enable_write_cache(BlockBackend *blk, bool wce)
-{
-    bdrv_set_enable_write_cache(blk->bs, wce);
-}
-
-void blk_invalidate_cache(BlockBackend *blk, Error **errp)
-{
-    bdrv_invalidate_cache(blk->bs, errp);
-}
-
-int blk_is_inserted(BlockBackend *blk)
-{
-    return bdrv_is_inserted(blk->bs);
-}
-
-void blk_lock_medium(BlockBackend *blk, bool locked)
-{
-    bdrv_lock_medium(blk->bs, locked);
-}
-
-void blk_eject(BlockBackend *blk, bool eject_flag)
-{
-    bdrv_eject(blk->bs, eject_flag);
-}
-
-int blk_get_flags(BlockBackend *blk)
-{
-    return bdrv_get_flags(blk->bs);
-}
-
-int blk_get_max_transfer_length(BlockBackend *blk)
-{
-    return blk->bs->bl.max_transfer_length;
-}
-
-void blk_set_guest_block_size(BlockBackend *blk, int align)
-{
-    bdrv_set_guest_block_size(blk->bs, align);
-}
-
-void *blk_blockalign(BlockBackend *blk, size_t size)
-{
-    return qemu_blockalign(blk ? blk->bs : NULL, size);
-}
-
-bool blk_op_is_blocked(BlockBackend *blk, BlockOpType op, Error **errp)
-{
-    return bdrv_op_is_blocked(blk->bs, op, errp);
-}
-
-void blk_op_unblock(BlockBackend *blk, BlockOpType op, Error *reason)
-{
-    bdrv_op_unblock(blk->bs, op, reason);
-}
-
-void blk_op_block_all(BlockBackend *blk, Error *reason)
-{
-    bdrv_op_block_all(blk->bs, reason);
-}
-
-void blk_op_unblock_all(BlockBackend *blk, Error *reason)
-{
-    bdrv_op_unblock_all(blk->bs, reason);
-}
-
-AioContext *blk_get_aio_context(BlockBackend *blk)
-{
-    return bdrv_get_aio_context(blk->bs);
-}
-
-void blk_set_aio_context(BlockBackend *blk, AioContext *new_context)
-{
-    bdrv_set_aio_context(blk->bs, new_context);
-}
-
-void blk_add_aio_context_notifier(BlockBackend *blk,
-        void (*attached_aio_context)(AioContext *new_context, void *opaque),
-        void (*detach_aio_context)(void *opaque), void *opaque)
-{
-    bdrv_add_aio_context_notifier(blk->bs, attached_aio_context,
-                                  detach_aio_context, opaque);
-}
-
-void blk_remove_aio_context_notifier(BlockBackend *blk,
-                                     void (*attached_aio_context)(AioContext *,
-                                                                  void *),
-                                     void (*detach_aio_context)(void *),
-                                     void *opaque)
-{
-    bdrv_remove_aio_context_notifier(blk->bs, attached_aio_context,
-                                     detach_aio_context, opaque);
-}
-
-void blk_add_close_notifier(BlockBackend *blk, Notifier *notify)
-{
-    bdrv_add_close_notifier(blk->bs, notify);
-}
-
-void blk_io_plug(BlockBackend *blk)
-{
-    bdrv_io_plug(blk->bs);
-}
-
-void blk_io_unplug(BlockBackend *blk)
-{
-    bdrv_io_unplug(blk->bs);
-}
-
-BlockAcctStats *blk_get_stats(BlockBackend *blk)
-{
-    return bdrv_get_stats(blk->bs);
-}
-
-void *blk_aio_get(const AIOCBInfo *aiocb_info, BlockBackend *blk,
-                  BlockCompletionFunc *cb, void *opaque)
-{
-    return qemu_aio_get(aiocb_info, blk_bs(blk), cb, opaque);
-}
-
-int coroutine_fn blk_co_write_zeroes(BlockBackend *blk, int64_t sector_num,
-                                     int nb_sectors, BdrvRequestFlags flags)
-{
-    int ret = blk_check_request(blk, sector_num, nb_sectors);
-    if (ret < 0) {
-        return ret;
-    }
-
-    return bdrv_co_write_zeroes(blk->bs, sector_num, nb_sectors, flags);
-}
-
-int blk_write_compressed(BlockBackend *blk, int64_t sector_num,
-                         const uint8_t *buf, int nb_sectors)
-{
-    int ret = blk_check_request(blk, sector_num, nb_sectors);
-    if (ret < 0) {
-        return ret;
-    }
-
-    return bdrv_write_compressed(blk->bs, sector_num, buf, nb_sectors);
-}
-
-int blk_truncate(BlockBackend *blk, int64_t offset)
-{
-    return bdrv_truncate(blk->bs, offset);
-}
-
-int blk_discard(BlockBackend *blk, int64_t sector_num, int nb_sectors)
-{
-    int ret = blk_check_request(blk, sector_num, nb_sectors);
-    if (ret < 0) {
-        return ret;
-    }
-
-    return bdrv_discard(blk->bs, sector_num, nb_sectors);
-}
-
-int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf,
-                     int64_t pos, int size)
-{
-    return bdrv_save_vmstate(blk->bs, buf, pos, size);
-}
-
-int blk_load_vmstate(BlockBackend *blk, uint8_t *buf, int64_t pos, int size)
-{
-    return bdrv_load_vmstate(blk->bs, buf, pos, size);
-}
-
-int blk_probe_blocksizes(BlockBackend *blk, BlockSizes *bsz)
-{
-    return bdrv_probe_blocksizes(blk->bs, bsz);
-}
-
-int blk_probe_geometry(BlockBackend *blk, HDGeometry *geo)
-{
-    return bdrv_probe_geometry(blk->bs, geo);
-}
--- a/block/bochs.c
+++ b/block/bochs.c
@@ -131,11 +131,7 @@ static int bochs_open(BlockDriverState *bs, QDict *options, int flags,
        return -EFBIG;
    }

-    s->catalog_bitmap = g_try_new(uint32_t, s->catalog_size);
-    if (s->catalog_size && s->catalog_bitmap == NULL) {
-        error_setg(errp, "Could not allocate memory for catalog");
-        return -ENOMEM;
-    }
+    s->catalog_bitmap = g_malloc(s->catalog_size * 4);

    ret = bdrv_pread(bs->file, le32_to_cpu(bochs.header), s->catalog_bitmap,
                     s->catalog_size * 4);
--- a/block/cloop.c
+++ b/block/cloop.c
@@ -116,12 +116,7 @@ static int cloop_open(BlockDriverState *bs, QDict *options, int flags,
                   "try increasing block size");
        return -EINVAL;
    }
-
-    s->offsets = g_try_malloc(offsets_size);
-    if (s->offsets == NULL) {
-        error_setg(errp, "Could not allocate offsets table");
-        return -ENOMEM;
-    }
+    s->offsets = g_malloc(offsets_size);

    ret = bdrv_pread(bs->file, 128 + 4 + 4, s->offsets, offsets_size);
    if (ret < 0) {
@@ -163,20 +158,8 @@ static int cloop_open(BlockDriverState *bs, QDict *options, int flags,
    }

    /* initialize zlib engine */
-    s->compressed_block = g_try_malloc(max_compressed_block_size + 1);
-    if (s->compressed_block == NULL) {
-        error_setg(errp, "Could not allocate compressed_block");
-        ret = -ENOMEM;
-        goto fail;
-    }
-
-    s->uncompressed_block = g_try_malloc(s->block_size);
-    if (s->uncompressed_block == NULL) {
-        error_setg(errp, "Could not allocate uncompressed_block");
-        ret = -ENOMEM;
-        goto fail;
-    }
-
+    s->compressed_block = g_malloc(max_compressed_block_size + 1);
+    s->uncompressed_block = g_malloc(s->block_size);
    if (inflateInit(&s->zstream) != Z_OK) {
        ret = -EINVAL;
        goto fail;
--- a/block/commit.c
+++ b/block/commit.c
@@ -60,50 +60,17 @@ static int coroutine_fn commit_populate(BlockDriverState *bs,
    return 0;
 }

-typedef struct {
-    int ret;
-} CommitCompleteData;
-
-static void commit_complete(BlockJob *job, void *opaque)
+static void coroutine_fn commit_run(void *opaque)
 {
-    CommitBlockJob *s = container_of(job, CommitBlockJob, common);
-    CommitCompleteData *data = opaque;
+    CommitBlockJob *s = opaque;
    BlockDriverState *active = s->active;
    BlockDriverState *top = s->top;
    BlockDriverState *base = s->base;
    BlockDriverState *overlay_bs;
-    int ret = data->ret;
-
-    if (!block_job_is_cancelled(&s->common) && ret == 0) {
-        /* success */
-        ret = bdrv_drop_intermediate(active, top, base, s->backing_file_str);
-    }
-
-    /* restore base open flags here if appropriate (e.g., change the base back
-     * to r/o). These reopens do not need to be atomic, since we won't abort
-     * even on failure here */
-    if (s->base_flags != bdrv_get_flags(base)) {
-        bdrv_reopen(base, s->base_flags, NULL);
-    }
-    overlay_bs = bdrv_find_overlay(active, top);
-    if (overlay_bs && s->orig_overlay_flags != bdrv_get_flags(overlay_bs)) {
-        bdrv_reopen(overlay_bs, s->orig_overlay_flags, NULL);
-    }
-    g_free(s->backing_file_str);
-    block_job_completed(&s->common, ret);
-    g_free(data);
-}
-
-static void coroutine_fn commit_run(void *opaque)
-{
-    CommitBlockJob *s = opaque;
-    CommitCompleteData *data;
-    BlockDriverState *top = s->top;
-    BlockDriverState *base = s->base;
    int64_t sector_num, end;
    int ret = 0;
    int n = 0;
-    void *buf = NULL;
+    void *buf;
    int bytes_written = 0;
    int64_t base_len;

@@ -111,18 +78,18 @@ static void coroutine_fn commit_run(void *opaque)


    if (s->common.len < 0) {
-        goto out;
+        goto exit_restore_reopen;
    }

    ret = base_len = bdrv_getlength(base);
    if (base_len < 0) {
-        goto out;
+        goto exit_restore_reopen;
    }

    if (base_len < s->common.len) {
        ret = bdrv_truncate(base, s->common.len);
        if (ret) {
-            goto out;
+            goto exit_restore_reopen;
        }
    }

@@ -161,7 +128,7 @@ wait:
            if (s->on_error == BLOCKDEV_ON_ERROR_STOP ||
                s->on_error == BLOCKDEV_ON_ERROR_REPORT||
                (s->on_error == BLOCKDEV_ON_ERROR_ENOSPC && ret == -ENOSPC)) {
-                goto out;
+                goto exit_free_buf;
            } else {
                n = 0;
                continue;
@@ -173,12 +140,27 @@ wait:

    ret = 0;

-out:
+    if (!block_job_is_cancelled(&s->common) && sector_num == end) {
+        /* success */
+        ret = bdrv_drop_intermediate(active, top, base, s->backing_file_str);
+    }
+
+exit_free_buf:
    qemu_vfree(buf);

-    data = g_malloc(sizeof(*data));
-    data->ret = ret;
-    block_job_defer_to_main_loop(&s->common, commit_complete, data);
+exit_restore_reopen:
+    /* restore base open flags here if appropriate (e.g., change the base back
+     * to r/o). These reopens do not need to be atomic, since we won't abort
+     * even on failure here */
+    if (s->base_flags != bdrv_get_flags(base)) {
+        bdrv_reopen(base, s->base_flags, NULL);
+    }
+    overlay_bs = bdrv_find_overlay(active, top);
+    if (overlay_bs && s->orig_overlay_flags != bdrv_get_flags(overlay_bs)) {
+        bdrv_reopen(overlay_bs, s->orig_overlay_flags, NULL);
+    }
+    g_free(s->backing_file_str);
+    block_job_completed(&s->common, ret);
 }

 static void commit_set_speed(BlockJob *job, int64_t speed, Error **errp)
@@ -200,7 +182,7 @@ static const BlockJobDriver commit_job_driver = {

 void commit_start(BlockDriverState *bs, BlockDriverState *base,
                  BlockDriverState *top, int64_t speed,
-                  BlockdevOnError on_error, BlockCompletionFunc *cb,
+                  BlockdevOnError on_error, BlockDriverCompletionFunc *cb,
                  void *opaque, const char *backing_file_str, Error **errp)
 {
    CommitBlockJob *s;
--- a/block/cow.c
+++ b/block/cow.c
@@ -0,0 +1,432 @@
+/*
+ * Block driver for the COW format
+ *
+ * Copyright (c) 2004 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "qemu-common.h"
+#include "block/block_int.h"
+#include "qemu/module.h"
+
+/**************************************************************/
+/* COW block driver using file system holes */
+
+/* user mode linux compatible COW file */
+#define COW_MAGIC 0x4f4f4f4d  /* MOOO */
+#define COW_VERSION 2
+
+struct cow_header_v2 {
+    uint32_t magic;
+    uint32_t version;
+    char backing_file[1024];
+    int32_t mtime;
+    uint64_t size;
+    uint32_t sectorsize;
+};
+
+typedef struct BDRVCowState {
+    CoMutex lock;
+    int64_t cow_sectors_offset;
+} BDRVCowState;
+
+static int cow_probe(const uint8_t *buf, int buf_size, const char *filename)
+{
+    const struct cow_header_v2 *cow_header = (const void *)buf;
+
+    if (buf_size >= sizeof(struct cow_header_v2) &&
+        be32_to_cpu(cow_header->magic) == COW_MAGIC &&
+        be32_to_cpu(cow_header->version) == COW_VERSION)
+        return 100;
+    else
+        return 0;
+}
+
+static int cow_open(BlockDriverState *bs, QDict *options, int flags,
+                    Error **errp)
+{
+    BDRVCowState *s = bs->opaque;
+    struct cow_header_v2 cow_header;
+    int bitmap_size;
+    int64_t size;
+    int ret;
+
+    /* see if it is a cow image */
+    ret = bdrv_pread(bs->file, 0, &cow_header, sizeof(cow_header));
+    if (ret < 0) {
+        goto fail;
+    }
+
+    if (be32_to_cpu(cow_header.magic) != COW_MAGIC) {
+        error_setg(errp, "Image not in COW format");
+        ret = -EINVAL;
+        goto fail;
+    }
+
+    if (be32_to_cpu(cow_header.version) != COW_VERSION) {
+        char version[64];
+        snprintf(version, sizeof(version),
+               "COW version %" PRIu32, cow_header.version);
+        error_set(errp, QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
+            bs->device_name, "cow", version);
+        ret = -ENOTSUP;
+        goto fail;
+    }
+
+    /* cow image found */
+    size = be64_to_cpu(cow_header.size);
+    bs->total_sectors = size / 512;
+
+    pstrcpy(bs->backing_file, sizeof(bs->backing_file),
+            cow_header.backing_file);
+
+    bitmap_size = ((bs->total_sectors + 7) >> 3) + sizeof(cow_header);
+    s->cow_sectors_offset = (bitmap_size + 511) & ~511;
+    qemu_co_mutex_init(&s->lock);
+    return 0;
+ fail:
+    return ret;
+}
+
+static inline void cow_set_bits(uint8_t *bitmap, int start, int64_t nb_sectors)
+{
+    int64_t bitnum = start, last = start + nb_sectors;
+    while (bitnum < last) {
+        if ((bitnum & 7) == 0 && bitnum + 8 <= last) {
+            bitmap[bitnum / 8] = 0xFF;
+            bitnum += 8;
+            continue;
+        }
+        bitmap[bitnum/8] |= (1 << (bitnum % 8));
+        bitnum++;
+    }
+}
+
+#define BITS_PER_BITMAP_SECTOR (512 * 8)
+
+/* Cannot use bitmap.c on big-endian machines.  */
+static int cow_test_bit(int64_t bitnum, const uint8_t *bitmap)
+{
+    return (bitmap[bitnum / 8] & (1 << (bitnum & 7))) != 0;
+}
+
+static int cow_find_streak(const uint8_t *bitmap, int value, int start, int nb_sectors)
+{
+    int streak_value = value ? 0xFF : 0;
+    int last = MIN(start + nb_sectors, BITS_PER_BITMAP_SECTOR);
+    int bitnum = start;
+    while (bitnum < last) {
+        if ((bitnum & 7) == 0 && bitmap[bitnum / 8] == streak_value) {
+            bitnum += 8;
+            continue;
+        }
+        if (cow_test_bit(bitnum, bitmap) == value) {
+            bitnum++;
+            continue;
+        }
+        break;
+    }
+    return MIN(bitnum, last) - start;
+}
+
+/* Return true if first block has been changed (ie. current version is
+ * in COW file).  Set the number of continuous blocks for which that
+ * is true. */
+static int coroutine_fn cow_co_is_allocated(BlockDriverState *bs,
+        int64_t sector_num, int nb_sectors, int *num_same)
+{
+    int64_t bitnum = sector_num + sizeof(struct cow_header_v2) * 8;
+    uint64_t offset = (bitnum / 8) & -BDRV_SECTOR_SIZE;
+    bool first = true;
+    int changed = 0, same = 0;
+
+    do {
+        int ret;
+        uint8_t bitmap[BDRV_SECTOR_SIZE];
+
+        bitnum &= BITS_PER_BITMAP_SECTOR - 1;
+        int sector_bits = MIN(nb_sectors, BITS_PER_BITMAP_SECTOR - bitnum);
+
+        ret = bdrv_pread(bs->file, offset, &bitmap, sizeof(bitmap));
+        if (ret < 0) {
+            return ret;
+        }
+
+        if (first) {
+            changed = cow_test_bit(bitnum, bitmap);
+            first = false;
+        }
+
+        same += cow_find_streak(bitmap, changed, bitnum, nb_sectors);
+
+        bitnum += sector_bits;
+        nb_sectors -= sector_bits;
+        offset += BDRV_SECTOR_SIZE;
+    } while (nb_sectors);
+
+    *num_same = same;
+    return changed;
+}
+
+static int64_t coroutine_fn cow_co_get_block_status(BlockDriverState *bs,
+        int64_t sector_num, int nb_sectors, int *num_same)
+{
+    BDRVCowState *s = bs->opaque;
+    int ret = cow_co_is_allocated(bs, sector_num, nb_sectors, num_same);
+    int64_t offset = s->cow_sectors_offset + (sector_num << BDRV_SECTOR_BITS);
+    if (ret < 0) {
+        return ret;
+    }
+    return (ret ? BDRV_BLOCK_DATA : 0) | offset | BDRV_BLOCK_OFFSET_VALID;
+}
+
+static int cow_update_bitmap(BlockDriverState *bs, int64_t sector_num,
+        int nb_sectors)
+{
+    int64_t bitnum = sector_num + sizeof(struct cow_header_v2) * 8;
+    uint64_t offset = (bitnum / 8) & -BDRV_SECTOR_SIZE;
+    bool first = true;
+    int sector_bits;
+
+    for ( ; nb_sectors;
+            bitnum += sector_bits,
+            nb_sectors -= sector_bits,
+            offset += BDRV_SECTOR_SIZE) {
+        int ret, set;
+        uint8_t bitmap[BDRV_SECTOR_SIZE];
+
+        bitnum &= BITS_PER_BITMAP_SECTOR - 1;
+        sector_bits = MIN(nb_sectors, BITS_PER_BITMAP_SECTOR - bitnum);
+
+        ret = bdrv_pread(bs->file, offset, &bitmap, sizeof(bitmap));
+        if (ret < 0) {
+            return ret;
+        }
+
+        /* Skip over any already set bits */
+        set = cow_find_streak(bitmap, 1, bitnum, sector_bits);
+        bitnum += set;
+        sector_bits -= set;
+        nb_sectors -= set;
+        if (!sector_bits) {
+            continue;
+        }
+
+        if (first) {
+            ret = bdrv_flush(bs->file);
+            if (ret < 0) {
+                return ret;
+            }
+            first = false;
+        }
+
+        cow_set_bits(bitmap, bitnum, sector_bits);
+
+        ret = bdrv_pwrite(bs->file, offset, &bitmap, sizeof(bitmap));
+        if (ret < 0) {
+            return ret;
+        }
+    }
+
+    return 0;
+}
+
+static int coroutine_fn cow_read(BlockDriverState *bs, int64_t sector_num,
+                                 uint8_t *buf, int nb_sectors)
+{
+    BDRVCowState *s = bs->opaque;
+    int ret, n;
+
+    while (nb_sectors > 0) {
+        ret = cow_co_is_allocated(bs, sector_num, nb_sectors, &n);
+        if (ret < 0) {
+            return ret;
+        }
+        if (ret) {
+            ret = bdrv_pread(bs->file,
+                        s->cow_sectors_offset + sector_num * 512,
+                        buf, n * 512);
+            if (ret < 0) {
+                return ret;
+            }
+        } else {
+            if (bs->backing_hd) {
+                /* read from the base image */
+                ret = bdrv_read(bs->backing_hd, sector_num, buf, n);
+                if (ret < 0) {
+                    return ret;
+                }
+            } else {
+                memset(buf, 0, n * 512);
+            }
+        }
+        nb_sectors -= n;
+        sector_num += n;
+        buf += n * 512;
+    }
+    return 0;
+}
+
+static coroutine_fn int cow_co_read(BlockDriverState *bs, int64_t sector_num,
+                                    uint8_t *buf, int nb_sectors)
+{
+    int ret;
+    BDRVCowState *s = bs->opaque;
+    qemu_co_mutex_lock(&s->lock);
+    ret = cow_read(bs, sector_num, buf, nb_sectors);
+    qemu_co_mutex_unlock(&s->lock);
+    return ret;
+}
+
+static int cow_write(BlockDriverState *bs, int64_t sector_num,
+                     const uint8_t *buf, int nb_sectors)
+{
+    BDRVCowState *s = bs->opaque;
+    int ret;
+
+    ret = bdrv_pwrite(bs->file, s->cow_sectors_offset + sector_num * 512,
+                      buf, nb_sectors * 512);
+    if (ret < 0) {
+        return ret;
+    }
+
+    return cow_update_bitmap(bs, sector_num, nb_sectors);
+}
+
+static coroutine_fn int cow_co_write(BlockDriverState *bs, int64_t sector_num,
+                                     const uint8_t *buf, int nb_sectors)
+{
+    int ret;
+    BDRVCowState *s = bs->opaque;
+    qemu_co_mutex_lock(&s->lock);
+    ret = cow_write(bs, sector_num, buf, nb_sectors);
+    qemu_co_mutex_unlock(&s->lock);
+    return ret;
+}
+
+static void cow_close(BlockDriverState *bs)
+{
+}
+
+static int cow_create(const char *filename, QemuOpts *opts, Error **errp)
+{
+    struct cow_header_v2 cow_header;
+    struct stat st;
+    int64_t image_sectors = 0;
+    char *image_filename = NULL;
+    Error *local_err = NULL;
+    int ret;
+    BlockDriverState *cow_bs = NULL;
+
+    /* Read out options */
+    image_sectors = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0) / 512;
+    image_filename = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FILE);
+
+    ret = bdrv_create_file(filename, opts, &local_err);
+    if (ret < 0) {
+        error_propagate(errp, local_err);
+        goto exit;
+    }
+
+    ret = bdrv_open(&cow_bs, filename, NULL, NULL,
+                    BDRV_O_RDWR | BDRV_O_PROTOCOL, NULL, &local_err);
+    if (ret < 0) {
+        error_propagate(errp, local_err);
+        goto exit;
+    }
+
+    memset(&cow_header, 0, sizeof(cow_header));
+    cow_header.magic = cpu_to_be32(COW_MAGIC);
+    cow_header.version = cpu_to_be32(COW_VERSION);
+    if (image_filename) {
+        /* Note: if no file, we put a dummy mtime */
+        cow_header.mtime = cpu_to_be32(0);
+
+        if (stat(image_filename, &st) != 0) {
+            goto mtime_fail;
+        }
+        cow_header.mtime = cpu_to_be32(st.st_mtime);
+    mtime_fail:
+        pstrcpy(cow_header.backing_file, sizeof(cow_header.backing_file),
+                image_filename);
+    }
+    cow_header.sectorsize = cpu_to_be32(512);
+    cow_header.size = cpu_to_be64(image_sectors * 512);
+    ret = bdrv_pwrite(cow_bs, 0, &cow_header, sizeof(cow_header));
+    if (ret < 0) {
+        goto exit;
+    }
+
+    /* resize to include at least all the bitmap */
+    ret = bdrv_truncate(cow_bs,
+        sizeof(cow_header) + ((image_sectors + 7) >> 3));
+    if (ret < 0) {
+        goto exit;
+    }
+
+exit:
+    g_free(image_filename);
+    if (cow_bs) {
+        bdrv_unref(cow_bs);
+    }
+    return ret;
+}
+
+static QemuOptsList cow_create_opts = {
+    .name = "cow-create-opts",
+    .head = QTAILQ_HEAD_INITIALIZER(cow_create_opts.head),
+    .desc = {
+        {
+            .name = BLOCK_OPT_SIZE,
+            .type = QEMU_OPT_SIZE,
+            .help = "Virtual disk size"
+        },
+        {
+            .name = BLOCK_OPT_BACKING_FILE,
+            .type = QEMU_OPT_STRING,
+            .help = "File name of a base image"
+        },
+        { /* end of list */ }
+    }
+};
+
+static BlockDriver bdrv_cow = {
+    .format_name    = "cow",
+    .instance_size  = sizeof(BDRVCowState),
+
+    .bdrv_probe     = cow_probe,
+    .bdrv_open      = cow_open,
+    .bdrv_close     = cow_close,
+    .bdrv_create    = cow_create,
+    .bdrv_has_zero_init     = bdrv_has_zero_init_1,
+    .supports_backing       = true,
+
+    .bdrv_read              = cow_co_read,
+    .bdrv_write             = cow_co_write,
+    .bdrv_co_get_block_status   = cow_co_get_block_status,
+
+    .create_opts    = &cow_create_opts,
+};
+
+static void bdrv_cow_init(void)
+{
+    bdrv_register(&bdrv_cow);
+}
+
+block_init(bdrv_cow_init);
--- a/block/curl.c
+++ b/block/curl.c
@@ -26,7 +26,7 @@
 #include "qapi/qmp/qbool.h"
 #include <curl/curl.h>

-// #define DEBUG_CURL
+// #define DEBUG
 // #define DEBUG_VERBOSE

 #ifdef DEBUG_CURL
@@ -63,8 +63,6 @@ static CURLMcode __curl_multi_socket_action(CURLM *multi_handle,
 #define CURL_NUM_ACB    8
 #define SECTOR_SIZE     512
 #define READ_AHEAD_DEFAULT (256 * 1024)
-#define CURL_TIMEOUT_DEFAULT 5
-#define CURL_TIMEOUT_MAX 10000

 #define FIND_RET_NONE   0
 #define FIND_RET_OK     1
@@ -73,13 +71,11 @@ static CURLMcode __curl_multi_socket_action(CURLM *multi_handle,
 #define CURL_BLOCK_OPT_URL       "url"
 #define CURL_BLOCK_OPT_READAHEAD "readahead"
 #define CURL_BLOCK_OPT_SSLVERIFY "sslverify"
-#define CURL_BLOCK_OPT_TIMEOUT "timeout"
-#define CURL_BLOCK_OPT_COOKIE    "cookie"

 struct BDRVCURLState;

 typedef struct CURLAIOCB {
-    BlockAIOCB common;
+    BlockDriverAIOCB common;
    QEMUBH *bh;
    QEMUIOVector *qiov;

@@ -113,8 +109,6 @@ typedef struct BDRVCURLState {
    char *url;
    size_t readahead_size;
    bool sslverify;
-    uint64_t timeout;
-    char *cookie;
    bool accept_range;
    AioContext *aio_context;
 } BDRVCURLState;
@@ -213,7 +207,7 @@ static size_t curl_read_cb(void *ptr, size_t size, size_t nmemb, void *opaque)
            qemu_iovec_from_buf(acb->qiov, 0, s->orig_buf + acb->start,
                                acb->end - acb->start);
            acb->common.cb(acb->common.opaque, 0);
-            qemu_aio_unref(acb);
+            qemu_aio_release(acb);
            s->acb[i] = NULL;
        }
    }
@@ -305,7 +299,7 @@ static void curl_multi_check_completion(BDRVCURLState *s)
                    }

                    acb->common.cb(acb->common.opaque, -EIO);
-                    qemu_aio_unref(acb);
+                    qemu_aio_release(acb);
                    state->acb[i] = NULL;
                }
            }
@@ -358,7 +352,7 @@ static void curl_multi_timeout_do(void *arg)
 #endif
 }

-static CURLState *curl_init_state(BlockDriverState *bs, BDRVCURLState *s)
+static CURLState *curl_init_state(BDRVCURLState *s)
 {
    CURLState *state = NULL;
    int i, j;
@@ -376,7 +370,7 @@ static CURLState *curl_init_state(BlockDriverState *bs, BDRVCURLState *s)
            break;
        }
        if (!state) {
-            aio_poll(bdrv_get_aio_context(bs), true);
+            aio_poll(state->s->aio_context, true);
        }
    } while(!state);

@@ -388,10 +382,7 @@ static CURLState *curl_init_state(BlockDriverState *bs, BDRVCURLState *s)
        curl_easy_setopt(state->curl, CURLOPT_URL, s->url);
        curl_easy_setopt(state->curl, CURLOPT_SSL_VERIFYPEER,
                         (long) s->sslverify);
-        if (s->cookie) {
-            curl_easy_setopt(state->curl, CURLOPT_COOKIE, s->cookie);
-        }
-        curl_easy_setopt(state->curl, CURLOPT_TIMEOUT, (long)s->timeout);
+        curl_easy_setopt(state->curl, CURLOPT_TIMEOUT, 5);
        curl_easy_setopt(state->curl, CURLOPT_WRITEFUNCTION,
                         (void *)curl_read_cb);
        curl_easy_setopt(state->curl, CURLOPT_WRITEDATA, (void *)state);
@@ -498,16 +489,6 @@ static QemuOptsList runtime_opts = {
            .type = QEMU_OPT_BOOL,
            .help = "Verify SSL certificate"
        },
-        {
-            .name = CURL_BLOCK_OPT_TIMEOUT,
-            .type = QEMU_OPT_NUMBER,
-            .help = "Curl timeout"
-        },
-        {
-            .name = CURL_BLOCK_OPT_COOKIE,
-            .type = QEMU_OPT_STRING,
-            .help = "Pass the cookie or list of cookies with each request"
-        },
        { /* end of list */ }
    },
 };
@@ -520,7 +501,6 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags,
    QemuOpts *opts;
    Error *local_err = NULL;
    const char *file;
-    const char *cookie;
    double d;

    static int inited = 0;
@@ -545,18 +525,8 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags,
        goto out_noclean;
    }

-    s->timeout = qemu_opt_get_number(opts, CURL_BLOCK_OPT_TIMEOUT,
-                                     CURL_TIMEOUT_DEFAULT);
-    if (s->timeout > CURL_TIMEOUT_MAX) {
-        error_setg(errp, "timeout parameter is too large or negative");
-        goto out_noclean;
-    }
-
    s->sslverify = qemu_opt_get_bool(opts, CURL_BLOCK_OPT_SSLVERIFY, true);

-    cookie = qemu_opt_get(opts, CURL_BLOCK_OPT_COOKIE);
-    s->cookie = g_strdup(cookie);
-
    file = qemu_opt_get(opts, CURL_BLOCK_OPT_URL);
    if (file == NULL) {
        error_setg(errp, "curl block driver requires an 'url' option");
@@ -571,7 +541,7 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags,
    DPRINTF("CURL: Opening %s\n", file);
    s->aio_context = bdrv_get_aio_context(bs);
    s->url = g_strdup(file);
-    state = curl_init_state(bs, s);
+    state = curl_init_state(s);
    if (!state)
        goto out_noclean;

@@ -612,14 +582,19 @@ out:
    curl_easy_cleanup(state->curl);
    state->curl = NULL;
 out_noclean:
-    g_free(s->cookie);
    g_free(s->url);
    qemu_opts_del(opts);
    return -EINVAL;
 }

+static void curl_aio_cancel(BlockDriverAIOCB *blockacb)
+{
+    // Do we have to implement canceling? Seems to work without...
+}
+
 static const AIOCBInfo curl_aiocb_info = {
    .aiocb_size         = sizeof(CURLAIOCB),
+    .cancel             = curl_aio_cancel,
 };


@@ -641,7 +616,7 @@ static void curl_readv_bh_cb(void *p)
    // we can just call the callback and be done.
    switch (curl_find_buf(s, start, acb->nb_sectors * SECTOR_SIZE, acb)) {
        case FIND_RET_OK:
-            qemu_aio_unref(acb);
+            qemu_aio_release(acb);
            // fall through
        case FIND_RET_WAIT:
            return;
@@ -650,10 +625,10 @@ static void curl_readv_bh_cb(void *p)
    }

    // No cache found, so let's start a new request
-    state = curl_init_state(acb->common.bs, s);
+    state = curl_init_state(s);
    if (!state) {
        acb->common.cb(acb->common.opaque, -EIO);
-        qemu_aio_unref(acb);
+        qemu_aio_release(acb);
        return;
    }

@@ -665,13 +640,7 @@ static void curl_readv_bh_cb(void *p)
    state->buf_start = start;
    state->buf_len = acb->end + s->readahead_size;
    end = MIN(start + state->buf_len, s->len) - 1;
-    state->orig_buf = g_try_malloc(state->buf_len);
-    if (state->buf_len && state->orig_buf == NULL) {
-        curl_clean_state(state);
-        acb->common.cb(acb->common.opaque, -ENOMEM);
-        qemu_aio_unref(acb);
-        return;
-    }
+    state->orig_buf = g_malloc(state->buf_len);
    state->acb[0] = acb;

    snprintf(state->range, 127, "%zd-%zd", start, end);
@@ -685,9 +654,9 @@ static void curl_readv_bh_cb(void *p)
    curl_multi_socket_action(s->multi, CURL_SOCKET_TIMEOUT, 0, &running);
 }

-static BlockAIOCB *curl_aio_readv(BlockDriverState *bs,
+static BlockDriverAIOCB *curl_aio_readv(BlockDriverState *bs,
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockCompletionFunc *cb, void *opaque)
+        BlockDriverCompletionFunc *cb, void *opaque)
 {
    CURLAIOCB *acb;

@@ -709,7 +678,6 @@ static void curl_close(BlockDriverState *bs)
    DPRINTF("CURL: Close\n");
    curl_detach_aio_context(bs);

-    g_free(s->cookie);
    g_free(s->url);
 }

--- a/block/dmg.c
+++ b/block/dmg.c
@@ -26,10 +26,6 @@
 #include "qemu/bswap.h"
 #include "qemu/module.h"
 #include <zlib.h>
-#ifdef CONFIG_BZIP2
-#include <bzlib.h>
-#endif
-#include <glib.h>

 enum {
    /* Limit chunk sizes to prevent unreasonable amounts of memory being used
@@ -59,9 +55,6 @@ typedef struct BDRVDMGState {
    uint8_t *compressed_chunk;
    uint8_t *uncompressed_chunk;
    z_stream zstream;
-#ifdef CONFIG_BZIP2
-    bz_stream bzstream;
-#endif
 } BDRVDMGState;

 static int dmg_probe(const uint8_t *buf, int buf_size, const char *filename)
@@ -107,16 +100,6 @@ static int read_uint32(BlockDriverState *bs, int64_t offset, uint32_t *result)
    return 0;
 }

-static inline uint64_t buff_read_uint64(const uint8_t *buffer, int64_t offset)
-{
-    return be64_to_cpu(*(uint64_t *)&buffer[offset]);
-}
-
-static inline uint32_t buff_read_uint32(const uint8_t *buffer, int64_t offset)
-{
-    return be32_to_cpu(*(uint32_t *)&buffer[offset]);
-}
-
 /* Increase max chunk sizes, if necessary.  This function is used to calculate
 * the buffer sizes needed for compressed/uncompressed chunk I/O.
 */
@@ -129,7 +112,6 @@ static void update_max_chunk_size(BDRVDMGState *s, uint32_t chunk,

    switch (s->types[chunk]) {
    case 0x80000005: /* zlib compressed */
-    case 0x80000006: /* bzip2 compressed */
        compressed_size = s->lengths[chunk];
        uncompressed_sectors = s->sectorcounts[chunk];
        break;
@@ -137,9 +119,7 @@ static void update_max_chunk_size(BDRVDMGState *s, uint32_t chunk,
        uncompressed_sectors = (s->lengths[chunk] + 511) / 512;
        break;
    case 2: /* zero */
-        /* as the all-zeroes block may be large, it is treated specially: the
-         * sector is not copied from a large buffer, a simple memset is used
-         * instead. Therefore uncompressed_sectors does not need to be set. */
+        uncompressed_sectors = s->sectorcounts[chunk];
        break;
    }

@@ -151,377 +131,161 @@ static void update_max_chunk_size(BDRVDMGState *s, uint32_t chunk,
    }
 }

-static int64_t dmg_find_koly_offset(BlockDriverState *file_bs, Error **errp)
-{
-    int64_t length;
-    int64_t offset = 0;
-    uint8_t buffer[515];
-    int i, ret;
-
-    /* bdrv_getlength returns a multiple of block size (512), rounded up. Since
-     * dmg images can have odd sizes, try to look for the "koly" magic which
-     * marks the begin of the UDIF trailer (512 bytes). This magic can be found
-     * in the last 511 bytes of the second-last sector or the first 4 bytes of
-     * the last sector (search space: 515 bytes) */
-    length = bdrv_getlength(file_bs);
-    if (length < 0) {
-        error_setg_errno(errp, -length,
-            "Failed to get file size while reading UDIF trailer");
-        return length;
-    } else if (length < 512) {
-        error_setg(errp, "dmg file must be at least 512 bytes long");
-        return -EINVAL;
-    }
-    if (length > 511 + 512) {
-        offset = length - 511 - 512;
-    }
-    length = length < 515 ? length : 515;
-    ret = bdrv_pread(file_bs, offset, buffer, length);
-    if (ret < 0) {
-        error_setg_errno(errp, -ret, "Failed while reading UDIF trailer");
-        return ret;
-    }
-    for (i = 0; i < length - 3; i++) {
-        if (buffer[i] == 'k' && buffer[i+1] == 'o' &&
-            buffer[i+2] == 'l' && buffer[i+3] == 'y') {
-            return offset + i;
-        }
-    }
-    error_setg(errp, "Could not locate UDIF trailer in dmg file");
-    return -EINVAL;
-}
-
-/* used when building the sector table */
-typedef struct DmgHeaderState {
-    /* used internally by dmg_read_mish_block to remember offsets of blocks
-     * across calls */
-    uint64_t data_fork_offset;
-    /* exported for dmg_open */
-    uint32_t max_compressed_size;
-    uint32_t max_sectors_per_chunk;
-} DmgHeaderState;
-
-static bool dmg_is_known_block_type(uint32_t entry_type)
-{
-    switch (entry_type) {
-    case 0x00000001:    /* uncompressed */
-    case 0x00000002:    /* zeroes */
-    case 0x80000005:    /* zlib */
-#ifdef CONFIG_BZIP2
-    case 0x80000006:    /* bzip2 */
-#endif
-        return true;
-    default:
-        return false;
-    }
-}
-
-static int dmg_read_mish_block(BDRVDMGState *s, DmgHeaderState *ds,
-                               uint8_t *buffer, uint32_t count)
-{
-    uint32_t type, i;
-    int ret;
-    size_t new_size;
-    uint32_t chunk_count;
-    int64_t offset = 0;
-    uint64_t data_offset;
-    uint64_t in_offset = ds->data_fork_offset;
-    uint64_t out_offset;
-
-    type = buff_read_uint32(buffer, offset);
-    /* skip data that is not a valid MISH block (invalid magic or too small) */
-    if (type != 0x6d697368 || count < 244) {
-        /* assume success for now */
-        return 0;
-    }
-
-    /* chunk offsets are relative to this sector number */
-    out_offset = buff_read_uint64(buffer, offset + 8);
-
-    /* location in data fork for (compressed) blob (in bytes) */
-    data_offset = buff_read_uint64(buffer, offset + 0x18);
-    in_offset += data_offset;
-
-    /* move to begin of chunk entries */
-    offset += 204;
-
-    chunk_count = (count - 204) / 40;
-    new_size = sizeof(uint64_t) * (s->n_chunks + chunk_count);
-    s->types = g_realloc(s->types, new_size / 2);
-    s->offsets = g_realloc(s->offsets, new_size);
-    s->lengths = g_realloc(s->lengths, new_size);
-    s->sectors = g_realloc(s->sectors, new_size);
-    s->sectorcounts = g_realloc(s->sectorcounts, new_size);
-
-    for (i = s->n_chunks; i < s->n_chunks + chunk_count; i++) {
-        s->types[i] = buff_read_uint32(buffer, offset);
-        if (!dmg_is_known_block_type(s->types[i])) {
-            chunk_count--;
-            i--;
-            offset += 40;
-            continue;
-        }
-
-        /* sector number */
-        s->sectors[i] = buff_read_uint64(buffer, offset + 8);
-        s->sectors[i] += out_offset;
-
-        /* sector count */
-        s->sectorcounts[i] = buff_read_uint64(buffer, offset + 0x10);
-
-        /* all-zeroes sector (type 2) does not need to be "uncompressed" and can
-         * therefore be unbounded. */
-        if (s->types[i] != 2 && s->sectorcounts[i] > DMG_SECTORCOUNTS_MAX) {
-            error_report("sector count %" PRIu64 " for chunk %" PRIu32
-                         " is larger than max (%u)",
-                         s->sectorcounts[i], i, DMG_SECTORCOUNTS_MAX);
-            ret = -EINVAL;
-            goto fail;
-        }
-
-        /* offset in (compressed) data fork */
-        s->offsets[i] = buff_read_uint64(buffer, offset + 0x18);
-        s->offsets[i] += in_offset;
-
-        /* length in (compressed) data fork */
-        s->lengths[i] = buff_read_uint64(buffer, offset + 0x20);
-
-        if (s->lengths[i] > DMG_LENGTHS_MAX) {
-            error_report("length %" PRIu64 " for chunk %" PRIu32
-                         " is larger than max (%u)",
-                         s->lengths[i], i, DMG_LENGTHS_MAX);
-            ret = -EINVAL;
-            goto fail;
-        }
-
-        update_max_chunk_size(s, i, &ds->max_compressed_size,
-                              &ds->max_sectors_per_chunk);
-        offset += 40;
-    }
-    s->n_chunks += chunk_count;
-    return 0;
-
-fail:
-    return ret;
-}
-
-static int dmg_read_resource_fork(BlockDriverState *bs, DmgHeaderState *ds,
-                                  uint64_t info_begin, uint64_t info_length)
-{
-    BDRVDMGState *s = bs->opaque;
-    int ret;
-    uint32_t count, rsrc_data_offset;
-    uint8_t *buffer = NULL;
-    uint64_t info_end;
-    uint64_t offset;
-
-    /* read offset from begin of resource fork (info_begin) to resource data */
-    ret = read_uint32(bs, info_begin, &rsrc_data_offset);
-    if (ret < 0) {
-        goto fail;
-    } else if (rsrc_data_offset > info_length) {
-        ret = -EINVAL;
-        goto fail;
-    }
-
-    /* read length of resource data */
-    ret = read_uint32(bs, info_begin + 8, &count);
-    if (ret < 0) {
-        goto fail;
-    } else if (count == 0 || rsrc_data_offset + count > info_length) {
-        ret = -EINVAL;
-        goto fail;
-    }
-
-    /* begin of resource data (consisting of one or more resources) */
-    offset = info_begin + rsrc_data_offset;
-
-    /* end of resource data (there is possibly a following resource map
-     * which will be ignored). */
-    info_end = offset + count;
-
-    /* read offsets (mish blocks) from one or more resources in resource data */
-    while (offset < info_end) {
-        /* size of following resource */
-        ret = read_uint32(bs, offset, &count);
-        if (ret < 0) {
-            goto fail;
-        } else if (count == 0 || count > info_end - offset) {
-            ret = -EINVAL;
-            goto fail;
-        }
-        offset += 4;
-
-        buffer = g_realloc(buffer, count);
-        ret = bdrv_pread(bs->file, offset, buffer, count);
-        if (ret < 0) {
-            goto fail;
-        }
-
-        ret = dmg_read_mish_block(s, ds, buffer, count);
-        if (ret < 0) {
-            goto fail;
-        }
-        /* advance offset by size of resource */
-        offset += count;
-    }
-    ret = 0;
-
-fail:
-    g_free(buffer);
-    return ret;
-}
-
-static int dmg_read_plist_xml(BlockDriverState *bs, DmgHeaderState *ds,
-                              uint64_t info_begin, uint64_t info_length)
-{
-    BDRVDMGState *s = bs->opaque;
-    int ret;
-    uint8_t *buffer = NULL;
-    char *data_begin, *data_end;
-
-    /* Have at least some length to avoid NULL for g_malloc. Attempt to set a
-     * safe upper cap on the data length. A test sample had a XML length of
-     * about 1 MiB. */
-    if (info_length == 0 || info_length > 16 * 1024 * 1024) {
-        ret = -EINVAL;
-        goto fail;
-    }
-
-    buffer = g_malloc(info_length + 1);
-    buffer[info_length] = '\0';
-    ret = bdrv_pread(bs->file, info_begin, buffer, info_length);
-    if (ret != info_length) {
-        ret = -EINVAL;
-        goto fail;
-    }
-
-    /* look for <data>...</data>. The data is 284 (0x11c) bytes after base64
-     * decode. The actual data element has 431 (0x1af) bytes which includes tabs
-     * and line feeds. */
-    data_end = (char *)buffer;
-    while ((data_begin = strstr(data_end, "<data>")) != NULL) {
-        guchar *mish;
-        gsize out_len = 0;
-
-        data_begin += 6;
-        data_end = strstr(data_begin, "</data>");
-        /* malformed XML? */
-        if (data_end == NULL) {
-            ret = -EINVAL;
-            goto fail;
-        }
-        *data_end++ = '\0';
-        mish = g_base64_decode(data_begin, &out_len);
-        ret = dmg_read_mish_block(s, ds, mish, (uint32_t)out_len);
-        g_free(mish);
-        if (ret < 0) {
-            goto fail;
-        }
-    }
-    ret = 0;
-
-fail:
-    g_free(buffer);
-    return ret;
-}
-
 static int dmg_open(BlockDriverState *bs, QDict *options, int flags,
                    Error **errp)
 {
    BDRVDMGState *s = bs->opaque;
-    DmgHeaderState ds;
-    uint64_t rsrc_fork_offset, rsrc_fork_length;
-    uint64_t plist_xml_offset, plist_xml_length;
+    uint64_t info_begin, info_end, last_in_offset, last_out_offset;
+    uint32_t count, tmp;
+    uint32_t max_compressed_size = 1, max_sectors_per_chunk = 1, i;
    int64_t offset;
    int ret;

    bs->read_only = 1;
    s->n_chunks = 0;
    s->offsets = s->lengths = s->sectors = s->sectorcounts = NULL;
-    /* used by dmg_read_mish_block to keep track of the current I/O position */
-    ds.data_fork_offset = 0;
-    ds.max_compressed_size = 1;
-    ds.max_sectors_per_chunk = 1;

-    /* locate the UDIF trailer */
-    offset = dmg_find_koly_offset(bs->file, errp);
+    /* read offset of info blocks */
+    offset = bdrv_getlength(bs->file);
    if (offset < 0) {
        ret = offset;
        goto fail;
    }
+    offset -= 0x1d8;

-    /* offset of data fork (DataForkOffset) */
-    ret = read_uint64(bs, offset + 0x18, &ds.data_fork_offset);
+    ret = read_uint64(bs, offset, &info_begin);
    if (ret < 0) {
        goto fail;
-    } else if (ds.data_fork_offset > offset) {
+    } else if (info_begin == 0) {
        ret = -EINVAL;
        goto fail;
    }

-    /* offset of resource fork (RsrcForkOffset) */
-    ret = read_uint64(bs, offset + 0x28, &rsrc_fork_offset);
+    ret = read_uint32(bs, info_begin, &tmp);
    if (ret < 0) {
        goto fail;
-    }
-    ret = read_uint64(bs, offset + 0x30, &rsrc_fork_length);
-    if (ret < 0) {
-        goto fail;
-    }
-    if (rsrc_fork_offset >= offset ||
-        rsrc_fork_length > offset - rsrc_fork_offset) {
+    } else if (tmp != 0x100) {
        ret = -EINVAL;
        goto fail;
    }
-    /* offset of property list (XMLOffset) */
-    ret = read_uint64(bs, offset + 0xd8, &plist_xml_offset);
+
+    ret = read_uint32(bs, info_begin + 4, &count);
    if (ret < 0) {
        goto fail;
-    }
-    ret = read_uint64(bs, offset + 0xe0, &plist_xml_length);
-    if (ret < 0) {
-        goto fail;
-    }
-    if (plist_xml_offset >= offset ||
-        plist_xml_length > offset - plist_xml_offset) {
+    } else if (count == 0) {
        ret = -EINVAL;
        goto fail;
    }
-    ret = read_uint64(bs, offset + 0x1ec, (uint64_t *)&bs->total_sectors);
-    if (ret < 0) {
-        goto fail;
-    }
-    if (bs->total_sectors < 0) {
-        ret = -EINVAL;
-        goto fail;
-    }
-    if (rsrc_fork_length != 0) {
-        ret = dmg_read_resource_fork(bs, &ds,
-                                     rsrc_fork_offset, rsrc_fork_length);
+    info_end = info_begin + count;
+
+    offset = info_begin + 0x100;
+
+    /* read offsets */
+    last_in_offset = last_out_offset = 0;
+    while (offset < info_end) {
+        uint32_t type;
+
+        ret = read_uint32(bs, offset, &count);
+        if (ret < 0) {
+            goto fail;
+        } else if (count == 0) {
+            ret = -EINVAL;
+            goto fail;
+        }
+        offset += 4;
+
+        ret = read_uint32(bs, offset, &type);
        if (ret < 0) {
            goto fail;
        }
-    } else if (plist_xml_length != 0) {
-        ret = dmg_read_plist_xml(bs, &ds, plist_xml_offset, plist_xml_length);
-        if (ret < 0) {
-            goto fail;
+
+        if (type == 0x6d697368 && count >= 244) {
+            size_t new_size;
+            uint32_t chunk_count;
+
+            offset += 4;
+            offset += 200;
+
+            chunk_count = (count - 204) / 40;
+            new_size = sizeof(uint64_t) * (s->n_chunks + chunk_count);
+            s->types = g_realloc(s->types, new_size / 2);
+            s->offsets = g_realloc(s->offsets, new_size);
+            s->lengths = g_realloc(s->lengths, new_size);
+            s->sectors = g_realloc(s->sectors, new_size);
+            s->sectorcounts = g_realloc(s->sectorcounts, new_size);
+
+            for (i = s->n_chunks; i < s->n_chunks + chunk_count; i++) {
+                ret = read_uint32(bs, offset, &s->types[i]);
+                if (ret < 0) {
+                    goto fail;
+                }
+                offset += 4;
+                if (s->types[i] != 0x80000005 && s->types[i] != 1 &&
+                    s->types[i] != 2) {
+                    if (s->types[i] == 0xffffffff && i > 0) {
+                        last_in_offset = s->offsets[i - 1] + s->lengths[i - 1];
+                        last_out_offset = s->sectors[i - 1] +
+                                          s->sectorcounts[i - 1];
+                    }
+                    chunk_count--;
+                    i--;
+                    offset += 36;
+                    continue;
+                }
+                offset += 4;
+
+                ret = read_uint64(bs, offset, &s->sectors[i]);
+                if (ret < 0) {
+                    goto fail;
+                }
+                s->sectors[i] += last_out_offset;
+                offset += 8;
+
+                ret = read_uint64(bs, offset, &s->sectorcounts[i]);
+                if (ret < 0) {
+                    goto fail;
+                }
+                offset += 8;
+
+                if (s->sectorcounts[i] > DMG_SECTORCOUNTS_MAX) {
+                    error_report("sector count %" PRIu64 " for chunk %" PRIu32
+                                 " is larger than max (%u)",
+                                 s->sectorcounts[i], i, DMG_SECTORCOUNTS_MAX);
+                    ret = -EINVAL;
+                    goto fail;
+                }
+
+                ret = read_uint64(bs, offset, &s->offsets[i]);
+                if (ret < 0) {
+                    goto fail;
+                }
+                s->offsets[i] += last_in_offset;
+                offset += 8;
+
+                ret = read_uint64(bs, offset, &s->lengths[i]);
+                if (ret < 0) {
+                    goto fail;
+                }
+                offset += 8;
+
+                if (s->lengths[i] > DMG_LENGTHS_MAX) {
+                    error_report("length %" PRIu64 " for chunk %" PRIu32
+                                 " is larger than max (%u)",
+                                 s->lengths[i], i, DMG_LENGTHS_MAX);
+                    ret = -EINVAL;
+                    goto fail;
+                }
+
+                update_max_chunk_size(s, i, &max_compressed_size,
+                                      &max_sectors_per_chunk);
+            }
+            s->n_chunks += chunk_count;
        }
-    } else {
-        ret = -EINVAL;
-        goto fail;
    }

    /* initialize zlib engine */
-    s->compressed_chunk = qemu_try_blockalign(bs->file,
-                                              ds.max_compressed_size + 1);
-    s->uncompressed_chunk = qemu_try_blockalign(bs->file,
-                                                512 * ds.max_sectors_per_chunk);
-    if (s->compressed_chunk == NULL || s->uncompressed_chunk == NULL) {
-        ret = -ENOMEM;
-        goto fail;
-    }
-
+    s->compressed_chunk = g_malloc(max_compressed_size + 1);
+    s->uncompressed_chunk = g_malloc(512 * max_sectors_per_chunk);
    if (inflateInit(&s->zstream) != Z_OK) {
        ret = -EINVAL;
        goto fail;
@@ -538,8 +302,8 @@ fail:
    g_free(s->lengths);
    g_free(s->sectors);
    g_free(s->sectorcounts);
-    qemu_vfree(s->compressed_chunk);
-    qemu_vfree(s->uncompressed_chunk);
+    g_free(s->compressed_chunk);
+    g_free(s->uncompressed_chunk);
    return ret;
 }

@@ -578,16 +342,13 @@ static inline int dmg_read_chunk(BlockDriverState *bs, uint64_t sector_num)
    if (!is_sector_in_chunk(s, s->current_chunk, sector_num)) {
        int ret;
        uint32_t chunk = search_chunk(s, sector_num);
-#ifdef CONFIG_BZIP2
-        uint64_t total_out;
-#endif

        if (chunk >= s->n_chunks) {
            return -1;
        }

        s->current_chunk = s->n_chunks;
-        switch (s->types[chunk]) { /* block entry type */
+        switch (s->types[chunk]) {
        case 0x80000005: { /* zlib compressed */
            /* we need to buffer, because only the chunk as whole can be
             * inflated. */
@@ -611,34 +372,6 @@ static inline int dmg_read_chunk(BlockDriverState *bs, uint64_t sector_num)
                return -1;
            }
            break; }
-#ifdef CONFIG_BZIP2
-        case 0x80000006: /* bzip2 compressed */
-            /* we need to buffer, because only the chunk as whole can be
-             * inflated. */
-            ret = bdrv_pread(bs->file, s->offsets[chunk],
-                             s->compressed_chunk, s->lengths[chunk]);
-            if (ret != s->lengths[chunk]) {
-                return -1;
-            }
-
-            ret = BZ2_bzDecompressInit(&s->bzstream, 0, 0);
-            if (ret != BZ_OK) {
-                return -1;
-            }
-            s->bzstream.next_in = (char *)s->compressed_chunk;
-            s->bzstream.avail_in = (unsigned int) s->lengths[chunk];
-            s->bzstream.next_out = (char *)s->uncompressed_chunk;
-            s->bzstream.avail_out = (unsigned int) 512 * s->sectorcounts[chunk];
-            ret = BZ2_bzDecompress(&s->bzstream);
-            total_out = ((uint64_t)s->bzstream.total_out_hi32 << 32) +
-                        s->bzstream.total_out_lo32;
-            BZ2_bzDecompressEnd(&s->bzstream);
-            if (ret != BZ_STREAM_END ||
-                total_out != 512 * s->sectorcounts[chunk]) {
-                return -1;
-            }
-            break;
-#endif /* CONFIG_BZIP2 */
        case 1: /* copy */
            ret = bdrv_pread(bs->file, s->offsets[chunk],
                             s->uncompressed_chunk, s->lengths[chunk]);
@@ -647,8 +380,7 @@ static inline int dmg_read_chunk(BlockDriverState *bs, uint64_t sector_num)
            }
            break;
        case 2: /* zero */
-            /* see dmg_read, it is treated specially. No buffer needs to be
-             * pre-filled, the zeroes can be set directly. */
+            memset(s->uncompressed_chunk, 0, 512 * s->sectorcounts[chunk]);
            break;
        }
        s->current_chunk = chunk;
@@ -667,13 +399,6 @@ static int dmg_read(BlockDriverState *bs, int64_t sector_num,
        if (dmg_read_chunk(bs, sector_num + i) != 0) {
            return -1;
        }
-        /* Special case: current chunk is all zeroes. Do not perform a memcpy as
-         * s->uncompressed_chunk may be too small to cover the large all-zeroes
-         * section. dmg_read_chunk is called to find s->current_chunk */
-        if (s->types[s->current_chunk] == 2) { /* all zeroes block entry */
-            memset(buf + i * 512, 0, 512);
-            continue;
-        }
        sector_offset_in_chunk = sector_num + i - s->sectors[s->current_chunk];
        memcpy(buf + i * 512,
               s->uncompressed_chunk + sector_offset_in_chunk * 512, 512);
@@ -701,8 +426,8 @@ static void dmg_close(BlockDriverState *bs)
    g_free(s->lengths);
    g_free(s->sectors);
    g_free(s->sectorcounts);
-    qemu_vfree(s->compressed_chunk);
-    qemu_vfree(s->uncompressed_chunk);
+    g_free(s->compressed_chunk);
+    g_free(s->uncompressed_chunk);

    inflateEnd(&s->zstream);
 }
--- a/block/gluster.c
+++ b/block/gluster.c
@@ -291,7 +291,7 @@ static int qemu_gluster_open(BlockDriverState *bs,  QDict *options,
    BDRVGlusterState *s = bs->opaque;
    int open_flags = 0;
    int ret = 0;
-    GlusterConf *gconf = g_new0(GlusterConf, 1);
+    GlusterConf *gconf = g_malloc0(sizeof(GlusterConf));
    QemuOpts *opts;
    Error *local_err = NULL;
    const char *filename;
@@ -351,12 +351,12 @@ static int qemu_gluster_reopen_prepare(BDRVReopenState *state,
    assert(state != NULL);
    assert(state->bs != NULL);

-    state->opaque = g_new0(BDRVGlusterReopenState, 1);
+    state->opaque = g_malloc0(sizeof(BDRVGlusterReopenState));
    reop_s = state->opaque;

    qemu_gluster_parse_flags(state->flags, &open_flags);

-    gconf = g_new0(GlusterConf, 1);
+    gconf = g_malloc0(sizeof(GlusterConf));

    reop_s->glfs = qemu_gluster_init(gconf, state->bs->filename, errp);
    if (reop_s->glfs == NULL) {
@@ -486,7 +486,7 @@ static int qemu_gluster_create(const char *filename,
    int prealloc = 0;
    int64_t total_size = 0;
    char *tmp = NULL;
-    GlusterConf *gconf = g_new0(GlusterConf, 1);
+    GlusterConf *gconf = g_malloc0(sizeof(GlusterConf));

    glfs = qemu_gluster_init(gconf, filename, errp);
    if (!glfs) {
@@ -494,8 +494,8 @@ static int qemu_gluster_create(const char *filename,
        goto out;
    }

-    total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
-                          BDRV_SECTOR_SIZE);
+    total_size =
+        qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0) / BDRV_SECTOR_SIZE;

    tmp = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC);
    if (!tmp || !strcmp(tmp, "off")) {
@@ -516,8 +516,9 @@ static int qemu_gluster_create(const char *filename,
    if (!fd) {
        ret = -errno;
    } else {
-        if (!glfs_ftruncate(fd, total_size)) {
-            if (prealloc && qemu_gluster_zerofill(fd, 0, total_size)) {
+        if (!glfs_ftruncate(fd, total_size * BDRV_SECTOR_SIZE)) {
+            if (prealloc && qemu_gluster_zerofill(fd, 0,
+                    total_size * BDRV_SECTOR_SIZE)) {
                ret = -errno;
            }
        } else {
--- a/block/io.c
+++ b/block/io.c
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -2,7 +2,7 @@
 * QEMU Block driver for iSCSI images
 *
 * Copyright (c) 2010-2011 Ronnie Sahlberg <ronniesahlberg@gmail.com>
- * Copyright (c) 2012-2015 Peter Lieven <pl@kamp.de>
+ * Copyright (c) 2012-2014 Peter Lieven <pl@kamp.de>
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
@@ -34,6 +34,7 @@
 #include "qemu/bitops.h"
 #include "qemu/bitmap.h"
 #include "block/block_int.h"
+#include "trace.h"
 #include "block/scsi.h"
 #include "qemu/iov.h"
 #include "sysemu/sysemu.h"
@@ -56,19 +57,15 @@ typedef struct IscsiLun {
    uint64_t num_blocks;
    int events;
    QEMUTimer *nop_timer;
-    QEMUTimer *event_timer;
+    uint8_t lbpme;
+    uint8_t lbprz;
+    uint8_t has_write_same;
    struct scsi_inquiry_logical_block_provisioning lbp;
    struct scsi_inquiry_block_limits bl;
    unsigned char *zeroblock;
    unsigned long *allocationmap;
    int cluster_sectors;
    bool use_16_for_rw;
-    bool write_protected;
-    bool lbpme;
-    bool lbprz;
-    bool dpofua;
-    bool has_write_same;
-    bool force_next_flush;
 } IscsiLun;

 typedef struct IscsiTask {
@@ -81,17 +78,17 @@ typedef struct IscsiTask {
    QEMUBH *bh;
    IscsiLun *iscsilun;
    QEMUTimer retry_timer;
-    bool force_next_flush;
 } IscsiTask;

 typedef struct IscsiAIOCB {
-    BlockAIOCB common;
+    BlockDriverAIOCB common;
    QEMUIOVector *qiov;
    QEMUBH *bh;
    IscsiLun *iscsilun;
    struct scsi_task *task;
    uint8_t *buf;
    int status;
+    int canceled;
    int64_t sector_num;
    int nb_sectors;
 #ifdef __linux__
@@ -99,11 +96,10 @@ typedef struct IscsiAIOCB {
 #endif
 } IscsiAIOCB;

-#define EVENT_INTERVAL 250
 #define NOP_INTERVAL 5000
 #define MAX_NOP_FAILURES 3
 #define ISCSI_CMD_RETRIES ARRAY_SIZE(iscsi_retry_times)
-static const unsigned iscsi_retry_times[] = {8, 32, 128, 512, 2048, 8192, 32768};
+static const unsigned iscsi_retry_times[] = {8, 32, 128, 512, 2048};

 /* this threshold is a trade-off knob to choose between
 * the potential additional overhead of an extra GET_LBA_STATUS request
@@ -124,14 +120,16 @@ iscsi_bh_cb(void *p)
    g_free(acb->buf);
    acb->buf = NULL;

-    acb->common.cb(acb->common.opaque, acb->status);
+    if (acb->canceled == 0) {
+        acb->common.cb(acb->common.opaque, acb->status);
+    }

    if (acb->task != NULL) {
        scsi_free_scsi_task(acb->task);
        acb->task = NULL;
    }

-    qemu_aio_unref(acb);
+    qemu_aio_release(acb);
 }

 static void
@@ -186,13 +184,10 @@ iscsi_co_generic_cb(struct iscsi_context *iscsi, int status,
                iTask->do_retry = 1;
                goto out;
            }
-            /* status 0x28 is SCSI_TASK_SET_FULL. It was first introduced
-             * in libiscsi 1.10.0. Hardcode this value here to avoid
-             * the need to bump the libiscsi requirement to 1.10.0 */
-            if (status == SCSI_STATUS_BUSY || status == 0x28) {
+            if (status == SCSI_STATUS_BUSY) {
                unsigned retry_time =
                    exp_random(iscsi_retry_times[iTask->retries - 1]);
-                error_report("iSCSI Busy/TaskSetFull (retry #%u in %u ms): %s",
+                error_report("iSCSI Busy (retry #%u in %u ms): %s",
                             iTask->retries, retry_time,
                             iscsi_get_error(iscsi));
                aio_timer_init(iTask->iscsilun->aio_context,
@@ -205,8 +200,6 @@ iscsi_co_generic_cb(struct iscsi_context *iscsi, int status,
            }
        }
        error_report("iSCSI Failure: %s", iscsi_get_error(iscsi));
-    } else {
-        iTask->iscsilun->force_next_flush |= iTask->force_next_flush;
    }

 out:
@@ -238,7 +231,7 @@ iscsi_abort_task_cb(struct iscsi_context *iscsi, int status, void *command_data,
 }

 static void
-iscsi_aio_cancel(BlockAIOCB *blockacb)
+iscsi_aio_cancel(BlockDriverAIOCB *blockacb)
 {
    IscsiAIOCB *acb = (IscsiAIOCB *)blockacb;
    IscsiLun *iscsilun = acb->iscsilun;
@@ -247,15 +240,20 @@ iscsi_aio_cancel(BlockAIOCB *blockacb)
        return;
    }

+    acb->canceled = 1;
+
    /* send a task mgmt call to the target to cancel the task on the target */
    iscsi_task_mgmt_abort_task_async(iscsilun->iscsi, acb->task,
                                     iscsi_abort_task_cb, acb);

+    while (acb->status == -EINPROGRESS) {
+        aio_poll(iscsilun->aio_context, true);
+    }
 }

 static const AIOCBInfo iscsi_aiocb_info = {
    .aiocb_size         = sizeof(IscsiAIOCB),
-    .cancel_async       = iscsi_aio_cancel,
+    .cancel             = iscsi_aio_cancel,
 };


@@ -266,30 +264,21 @@ static void
 iscsi_set_events(IscsiLun *iscsilun)
 {
    struct iscsi_context *iscsi = iscsilun->iscsi;
-    int ev = iscsi_which_events(iscsi);
+    int ev;

+    /* We always register a read handler.  */
+    ev = POLLIN;
+    ev |= iscsi_which_events(iscsi);
    if (ev != iscsilun->events) {
        aio_set_fd_handler(iscsilun->aio_context,
                           iscsi_get_fd(iscsi),
-                           (ev & POLLIN) ? iscsi_process_read : NULL,
+                           iscsi_process_read,
                           (ev & POLLOUT) ? iscsi_process_write : NULL,
                           iscsilun);
-        iscsilun->events = ev;
+
    }

-    /* newer versions of libiscsi may return zero events. In this
-     * case start a timer to ensure we are able to return to service
-     * once this situation changes. */
-    if (!ev) {
-        timer_mod(iscsilun->event_timer,
-                  qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + EVENT_INTERVAL);
-    }
-}
-
-static void iscsi_timed_set_events(void *opaque)
-{
-    IscsiLun *iscsilun = opaque;
-    iscsi_set_events(iscsilun);
+    iscsilun->events = ev;
 }

 static void
@@ -336,13 +325,6 @@ static bool is_request_lun_aligned(int64_t sector_num, int nb_sectors,
    return 1;
 }

-static unsigned long *iscsi_allocationmap_init(IscsiLun *iscsilun)
-{
-    return bitmap_try_new(DIV_ROUND_UP(sector_lun2qemu(iscsilun->num_blocks,
-                                                       iscsilun),
-                                       iscsilun->cluster_sectors));
-}
-
 static void iscsi_allocationmap_set(IscsiLun *iscsilun, int64_t sector_num,
                                    int nb_sectors)
 {
@@ -377,33 +359,24 @@ static int coroutine_fn iscsi_co_writev(BlockDriverState *bs,
    struct IscsiTask iTask;
    uint64_t lba;
    uint32_t num_sectors;
-    int fua;

    if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
        return -EINVAL;
    }

-    if (bs->bl.max_transfer_length && nb_sectors > bs->bl.max_transfer_length) {
-        error_report("iSCSI Error: Write of %d sectors exceeds max_xfer_len "
-                     "of %d sectors", nb_sectors, bs->bl.max_transfer_length);
-        return -EINVAL;
-    }
-
    lba = sector_qemu2lun(sector_num, iscsilun);
    num_sectors = sector_qemu2lun(nb_sectors, iscsilun);
    iscsi_co_init_iscsitask(iscsilun, &iTask);
 retry:
-    fua = iscsilun->dpofua && !bs->enable_write_cache;
-    iTask.force_next_flush = !fua;
    if (iscsilun->use_16_for_rw) {
        iTask.task = iscsi_write16_task(iscsilun->iscsi, iscsilun->lun, lba,
                                        NULL, num_sectors * iscsilun->block_size,
-                                        iscsilun->block_size, 0, 0, fua, 0, 0,
+                                        iscsilun->block_size, 0, 0, 0, 0, 0,
                                        iscsi_co_generic_cb, &iTask);
    } else {
        iTask.task = iscsi_write10_task(iscsilun->iscsi, iscsilun->lun, lba,
                                        NULL, num_sectors * iscsilun->block_size,
-                                        iscsilun->block_size, 0, 0, fua, 0, 0,
+                                        iscsilun->block_size, 0, 0, 0, 0, 0,
                                        iscsi_co_generic_cb, &iTask);
    }
    if (iTask.task == NULL) {
@@ -471,7 +444,7 @@ static int64_t coroutine_fn iscsi_co_get_block_status(BlockDriverState *bs,
    *pnum = nb_sectors;

    /* LUN does not support logical block provisioning */
-    if (!iscsilun->lbpme) {
+    if (iscsilun->lbpme == 0) {
        goto out;
    }

@@ -558,12 +531,6 @@ static int coroutine_fn iscsi_co_readv(BlockDriverState *bs,
        return -EINVAL;
    }

-    if (bs->bl.max_transfer_length && nb_sectors > bs->bl.max_transfer_length) {
-        error_report("iSCSI Error: Read of %d sectors exceeds max_xfer_len "
-                     "of %d sectors", nb_sectors, bs->bl.max_transfer_length);
-        return -EINVAL;
-    }
-
    if (iscsilun->lbprz && nb_sectors >= ISCSI_CHECKALLOC_THRES &&
        !iscsi_allocationmap_is_allocated(iscsilun, sector_num, nb_sectors)) {
        int64_t ret;
@@ -631,12 +598,8 @@ static int coroutine_fn iscsi_co_flush(BlockDriverState *bs)
        return 0;
    }

-    if (!iscsilun->force_next_flush) {
-        return 0;
-    }
-    iscsilun->force_next_flush = false;
-
    iscsi_co_init_iscsitask(iscsilun, &iTask);
+
 retry:
    if (iscsi_synchronizecache10_task(iscsilun->iscsi, iscsilun->lun, 0, 0, 0,
                                      0, iscsi_co_generic_cb, &iTask) == NULL) {
@@ -675,6 +638,10 @@ iscsi_aio_ioctl_cb(struct iscsi_context *iscsi, int status,
    g_free(acb->buf);
    acb->buf = NULL;

+    if (acb->canceled != 0) {
+        return;
+    }
+
    acb->status = 0;
    if (status < 0) {
        error_report("Failed to ioctl(SG_IO) to iSCSI lun. %s",
@@ -702,9 +669,9 @@ iscsi_aio_ioctl_cb(struct iscsi_context *iscsi, int status,
    iscsi_schedule_bh(acb);
 }

-static BlockAIOCB *iscsi_aio_ioctl(BlockDriverState *bs,
+static BlockDriverAIOCB *iscsi_aio_ioctl(BlockDriverState *bs,
        unsigned long int req, void *buf,
-        BlockCompletionFunc *cb, void *opaque)
+        BlockDriverCompletionFunc *cb, void *opaque)
 {
    IscsiLun *iscsilun = bs->opaque;
    struct iscsi_context *iscsi = iscsilun->iscsi;
@@ -716,6 +683,7 @@ static BlockAIOCB *iscsi_aio_ioctl(BlockDriverState *bs,
    acb = qemu_aio_get(&iscsi_aiocb_info, bs, cb, opaque);

    acb->iscsilun = iscsilun;
+    acb->canceled    = 0;
    acb->bh          = NULL;
    acb->status      = -EINPROGRESS;
    acb->buf         = NULL;
@@ -725,7 +693,7 @@ static BlockAIOCB *iscsi_aio_ioctl(BlockDriverState *bs,
    if (acb->task == NULL) {
        error_report("iSCSI: Failed to allocate task for scsi command. %s",
                     iscsi_get_error(iscsi));
-        qemu_aio_unref(acb);
+        qemu_aio_release(acb);
        return NULL;
    }
    memset(acb->task, 0, sizeof(struct scsi_task));
@@ -763,7 +731,7 @@ static BlockAIOCB *iscsi_aio_ioctl(BlockDriverState *bs,
                                 (data.size > 0) ? &data : NULL,
                                 acb) != 0) {
        scsi_free_scsi_task(acb->task);
-        qemu_aio_unref(acb);
+        qemu_aio_release(acb);
        return NULL;
    }

@@ -925,14 +893,10 @@ coroutine_fn iscsi_co_write_zeroes(BlockDriverState *bs, int64_t sector_num,
    nb_blocks = sector_qemu2lun(nb_sectors, iscsilun);

    if (iscsilun->zeroblock == NULL) {
-        iscsilun->zeroblock = g_try_malloc0(iscsilun->block_size);
-        if (iscsilun->zeroblock == NULL) {
-            return -ENOMEM;
-        }
+        iscsilun->zeroblock = g_malloc0(iscsilun->block_size);
    }

    iscsi_co_init_iscsitask(iscsilun, &iTask);
-    iTask.force_next_flush = true;
 retry:
    if (use_16_for_ws) {
        iTask.task = iscsi_writesame16_task(iscsilun->iscsi, iscsilun->lun, lba,
@@ -1137,8 +1101,8 @@ static void iscsi_readcapacity_sync(IscsiLun *iscsilun, Error **errp)
                } else {
                    iscsilun->block_size = rc16->block_length;
                    iscsilun->num_blocks = rc16->returned_lba + 1;
-                    iscsilun->lbpme = !!rc16->lbpme;
-                    iscsilun->lbprz = !!rc16->lbprz;
+                    iscsilun->lbpme = rc16->lbpme;
+                    iscsilun->lbprz = rc16->lbprz;
                    iscsilun->use_16_for_rw = (rc16->returned_lba > 0xffffffff);
                }
            }
@@ -1241,11 +1205,6 @@ static void iscsi_detach_aio_context(BlockDriverState *bs)
        timer_free(iscsilun->nop_timer);
        iscsilun->nop_timer = NULL;
    }
-    if (iscsilun->event_timer) {
-        timer_del(iscsilun->event_timer);
-        timer_free(iscsilun->event_timer);
-        iscsilun->event_timer = NULL;
-    }
 }

 static void iscsi_attach_aio_context(BlockDriverState *bs,
@@ -1262,51 +1221,15 @@ static void iscsi_attach_aio_context(BlockDriverState *bs,
                                        iscsi_nop_timed_event, iscsilun);
    timer_mod(iscsilun->nop_timer,
              qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL);
-
-    /* Prepare a timer for a delayed call to iscsi_set_events */
-    iscsilun->event_timer = aio_timer_new(iscsilun->aio_context,
-                                          QEMU_CLOCK_REALTIME, SCALE_MS,
-                                          iscsi_timed_set_events, iscsilun);
-}
-
-static void iscsi_modesense_sync(IscsiLun *iscsilun)
-{
-    struct scsi_task *task;
-    struct scsi_mode_sense *ms = NULL;
-    iscsilun->write_protected = false;
-    iscsilun->dpofua = false;
-
-    task = iscsi_modesense6_sync(iscsilun->iscsi, iscsilun->lun,
-                                 1, SCSI_MODESENSE_PC_CURRENT,
-                                 0x3F, 0, 255);
-    if (task == NULL) {
-        error_report("iSCSI: Failed to send MODE_SENSE(6) command: %s",
-                     iscsi_get_error(iscsilun->iscsi));
-        goto out;
-    }
-
-    if (task->status != SCSI_STATUS_GOOD) {
-        error_report("iSCSI: Failed MODE_SENSE(6), LUN assumed writable");
-        goto out;
-    }
-    ms = scsi_datain_unmarshall(task);
-    if (!ms) {
-        error_report("iSCSI: Failed to unmarshall MODE_SENSE(6) data: %s",
-                     iscsi_get_error(iscsilun->iscsi));
-        goto out;
-    }
-    iscsilun->write_protected = ms->device_specific_parameter & 0x80;
-    iscsilun->dpofua          = ms->device_specific_parameter & 0x10;
-
-out:
-    if (task) {
-        scsi_free_scsi_task(task);
-    }
 }

 /*
 * We support iscsi url's on the form
 * iscsi://[<username>%<password>@]<host>[:<port>]/<targetname>/<lun>
+ *
+ * Note: flags are currently not used by iscsi_open.  If this function
+ * is changed such that flags are used, please examine iscsi_reopen_prepare()
+ * to see if needs to be changed as well.
 */
 static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
                      Error **errp)
@@ -1321,7 +1244,14 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
    QemuOpts *opts;
    Error *local_err = NULL;
    const char *filename;
-    int i, ret = 0;
+    int i, ret;
+
+    if ((BDRV_SECTOR_SIZE % 512) != 0) {
+        error_setg(errp, "iSCSI: Invalid BDRV_SECTOR_SIZE. "
+                   "BDRV_SECTOR_SIZE(%lld) is not a multiple "
+                   "of 512", BDRV_SECTOR_SIZE);
+        return -EINVAL;
+    }

    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
    qemu_opts_absorb_qdict(opts, options, &local_err);
@@ -1357,7 +1287,7 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
        goto out;
    }

-    if (iscsi_url->user[0] != '\0') {
+    if (iscsi_url->user != NULL) {
        ret = iscsi_set_initiator_username_pwd(iscsi, iscsi_url->user,
                                              iscsi_url->passwd);
        if (ret != 0) {
@@ -1413,16 +1343,6 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
    scsi_free_scsi_task(task);
    task = NULL;

-    iscsi_modesense_sync(iscsilun);
-
-    /* Check the write protect flag of the LUN if we want to write */
-    if (iscsilun->type == TYPE_DISK && (flags & BDRV_O_RDWR) &&
-        iscsilun->write_protected) {
-        error_setg(errp, "Cannot open a write protected LUN as read-write");
-        ret = -EACCES;
-        goto out;
-    }
-
    iscsi_readcapacity_sync(iscsilun, &local_err);
    if (local_err != NULL) {
        error_propagate(errp, local_err);
@@ -1492,11 +1412,10 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
        iscsilun->bl.opt_unmap_gran * iscsilun->block_size <= 16 * 1024 * 1024) {
        iscsilun->cluster_sectors = (iscsilun->bl.opt_unmap_gran *
                                     iscsilun->block_size) >> BDRV_SECTOR_BITS;
-        if (iscsilun->lbprz) {
-            iscsilun->allocationmap = iscsi_allocationmap_init(iscsilun);
-            if (iscsilun->allocationmap == NULL) {
-                ret = -ENOMEM;
-            }
+        if (iscsilun->lbprz && !(bs->open_flags & BDRV_O_NOCACHE)) {
+            iscsilun->allocationmap =
+                bitmap_new(DIV_ROUND_UP(bs->total_sectors,
+                                        iscsilun->cluster_sectors));
        }
    }

@@ -1512,9 +1431,6 @@ out:

    if (ret) {
        if (iscsi != NULL) {
-            if (iscsi_is_logged_in(iscsi)) {
-                iscsi_logout_sync(iscsi);
-            }
            iscsi_destroy_context(iscsi);
        }
        memset(iscsilun, 0, sizeof(IscsiLun));
@@ -1528,66 +1444,46 @@ static void iscsi_close(BlockDriverState *bs)
    struct iscsi_context *iscsi = iscsilun->iscsi;

    iscsi_detach_aio_context(bs);
-    if (iscsi_is_logged_in(iscsi)) {
-        iscsi_logout_sync(iscsi);
-    }
    iscsi_destroy_context(iscsi);
    g_free(iscsilun->zeroblock);
    g_free(iscsilun->allocationmap);
    memset(iscsilun, 0, sizeof(IscsiLun));
 }

-static int sector_limits_lun2qemu(int64_t sector, IscsiLun *iscsilun)
-{
-    return MIN(sector_lun2qemu(sector, iscsilun), INT_MAX / 2 + 1);
-}
-
 static void iscsi_refresh_limits(BlockDriverState *bs, Error **errp)
 {
+    IscsiLun *iscsilun = bs->opaque;
+
    /* We don't actually refresh here, but just return data queried in
     * iscsi_open(): iscsi targets don't change their limits. */
-
-    IscsiLun *iscsilun = bs->opaque;
-    uint32_t max_xfer_len = iscsilun->use_16_for_rw ? 0xffffffff : 0xffff;
-
-    if (iscsilun->bl.max_xfer_len) {
-        max_xfer_len = MIN(max_xfer_len, iscsilun->bl.max_xfer_len);
-    }
-
-    bs->bl.max_transfer_length = sector_limits_lun2qemu(max_xfer_len, iscsilun);
-
    if (iscsilun->lbp.lbpu) {
        if (iscsilun->bl.max_unmap < 0xffffffff) {
-            bs->bl.max_discard =
-                sector_limits_lun2qemu(iscsilun->bl.max_unmap, iscsilun);
+            bs->bl.max_discard = sector_lun2qemu(iscsilun->bl.max_unmap,
+                                                 iscsilun);
        }
-        bs->bl.discard_alignment =
-            sector_limits_lun2qemu(iscsilun->bl.opt_unmap_gran, iscsilun);
+        bs->bl.discard_alignment = sector_lun2qemu(iscsilun->bl.opt_unmap_gran,
+                                                   iscsilun);
    }

    if (iscsilun->bl.max_ws_len < 0xffffffff) {
-        bs->bl.max_write_zeroes =
-            sector_limits_lun2qemu(iscsilun->bl.max_ws_len, iscsilun);
+        bs->bl.max_write_zeroes = sector_lun2qemu(iscsilun->bl.max_ws_len,
+                                                  iscsilun);
    }
    if (iscsilun->lbp.lbpws) {
-        bs->bl.write_zeroes_alignment =
-            sector_limits_lun2qemu(iscsilun->bl.opt_unmap_gran, iscsilun);
+        bs->bl.write_zeroes_alignment = sector_lun2qemu(iscsilun->bl.opt_unmap_gran,
+                                                        iscsilun);
    }
-    bs->bl.opt_transfer_length =
-        sector_limits_lun2qemu(iscsilun->bl.opt_xfer_len, iscsilun);
+    bs->bl.opt_transfer_length = sector_lun2qemu(iscsilun->bl.opt_xfer_len,
+                                                 iscsilun);
 }

-/* Note that this will not re-establish a connection with an iSCSI target - it
- * is effectively a NOP.  */
+/* Since iscsi_open() ignores bdrv_flags, there is nothing to do here in
+ * prepare.  Note that this will not re-establish a connection with an iSCSI
+ * target - it is effectively a NOP.  */
 static int iscsi_reopen_prepare(BDRVReopenState *state,
                                BlockReopenQueue *queue, Error **errp)
 {
-    IscsiLun *iscsilun = state->bs->opaque;
-
-    if (state->flags & BDRV_O_RDWR && iscsilun->write_protected) {
-        error_setg(errp, "Cannot open a write protected LUN as read-write");
-        return -EACCES;
-    }
+    /* NOP */
    return 0;
 }

@@ -1612,7 +1508,10 @@ static int iscsi_truncate(BlockDriverState *bs, int64_t offset)

    if (iscsilun->allocationmap != NULL) {
        g_free(iscsilun->allocationmap);
-        iscsilun->allocationmap = iscsi_allocationmap_init(iscsilun);
+        iscsilun->allocationmap =
+            bitmap_new(DIV_ROUND_UP(sector_lun2qemu(iscsilun->num_blocks,
+                                                    iscsilun),
+                                    iscsilun->cluster_sectors));
    }

    return 0;
@@ -1626,12 +1525,12 @@ static int iscsi_create(const char *filename, QemuOpts *opts, Error **errp)
    IscsiLun *iscsilun = NULL;
    QDict *bs_options;

-    bs = bdrv_new();
+    bs = bdrv_new("", &error_abort);

    /* Read out options */
-    total_size = DIV_ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
-                              BDRV_SECTOR_SIZE);
-    bs->opaque = g_new0(struct IscsiLun, 1);
+    total_size =
+        qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0) / BDRV_SECTOR_SIZE;
+    bs->opaque = g_malloc0(sizeof(struct IscsiLun));
    iscsilun = bs->opaque;

    bs_options = qdict_new();
@@ -1666,7 +1565,7 @@ out:
 static int iscsi_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
 {
    IscsiLun *iscsilun = bs->opaque;
-    bdi->unallocated_blocks_are_zero = iscsilun->lbprz;
+    bdi->unallocated_blocks_are_zero = !!iscsilun->lbprz;
    bdi->can_write_zeroes_with_unmap = iscsilun->lbprz && iscsilun->lbp.lbpws;
    bdi->cluster_size = iscsilun->cluster_sectors * BDRV_SECTOR_SIZE;
    return 0;
--- a/block/linux-aio.c
+++ b/block/linux-aio.c
@@ -28,21 +28,21 @@
 #define MAX_QUEUED_IO  128

 struct qemu_laiocb {
-    BlockAIOCB common;
+    BlockDriverAIOCB common;
    struct qemu_laio_state *ctx;
    struct iocb iocb;
    ssize_t ret;
    size_t nbytes;
    QEMUIOVector *qiov;
    bool is_read;
-    QSIMPLEQ_ENTRY(qemu_laiocb) next;
+    QLIST_ENTRY(qemu_laiocb) node;
 };

 typedef struct {
+    struct iocb *iocbs[MAX_QUEUED_IO];
    int plugged;
-    unsigned int n;
-    bool blocked;
-    QSIMPLEQ_HEAD(, qemu_laiocb) pending;
+    unsigned int size;
+    unsigned int idx;
 } LaioQueue;

 struct qemu_laio_state {
@@ -51,16 +51,8 @@ struct qemu_laio_state {

    /* io queue for submit at batch */
    LaioQueue io_q;
-
-    /* I/O completion processing */
-    QEMUBH *completion_bh;
-    struct io_event events[MAX_EVENTS];
-    int event_idx;
-    int event_max;
 };

-static void ioq_submit(struct qemu_laio_state *s);
-
 static inline ssize_t io_event_ret(struct io_event *ev)
 {
    return (ssize_t)(((uint64_t)ev->res2 << 32) | ev->res);
@@ -87,132 +79,120 @@ static void qemu_laio_process_completion(struct qemu_laio_state *s,
                ret = -EINVAL;
            }
        }
-    }
-    laiocb->common.cb(laiocb->common.opaque, ret);

-    qemu_aio_unref(laiocb);
-}
-
-/* The completion BH fetches completed I/O requests and invokes their
- * callbacks.
- *
- * The function is somewhat tricky because it supports nested event loops, for
- * example when a request callback invokes aio_poll().  In order to do this,
- * the completion events array and index are kept in qemu_laio_state.  The BH
- * reschedules itself as long as there are completions pending so it will
- * either be called again in a nested event loop or will be called after all
- * events have been completed.  When there are no events left to complete, the
- * BH returns without rescheduling.
- */
-static void qemu_laio_completion_bh(void *opaque)
-{
-    struct qemu_laio_state *s = opaque;
-
-    /* Fetch more completion events when empty */
-    if (s->event_idx == s->event_max) {
-        do {
-            struct timespec ts = { 0 };
-            s->event_max = io_getevents(s->ctx, MAX_EVENTS, MAX_EVENTS,
-                                        s->events, &ts);
-        } while (s->event_max == -EINTR);
-
-        s->event_idx = 0;
-        if (s->event_max <= 0) {
-            s->event_max = 0;
-            return; /* no more events */
-        }
+        laiocb->common.cb(laiocb->common.opaque, ret);
    }

-    /* Reschedule so nested event loops see currently pending completions */
-    qemu_bh_schedule(s->completion_bh);
-
-    /* Process completion events */
-    while (s->event_idx < s->event_max) {
-        struct iocb *iocb = s->events[s->event_idx].obj;
-        struct qemu_laiocb *laiocb =
-                container_of(iocb, struct qemu_laiocb, iocb);
-
-        laiocb->ret = io_event_ret(&s->events[s->event_idx]);
-        s->event_idx++;
-
-        qemu_laio_process_completion(s, laiocb);
-    }
-
-    if (!s->io_q.plugged && !QSIMPLEQ_EMPTY(&s->io_q.pending)) {
-        ioq_submit(s);
-    }
+    qemu_aio_release(laiocb);
 }

 static void qemu_laio_completion_cb(EventNotifier *e)
 {
    struct qemu_laio_state *s = container_of(e, struct qemu_laio_state, e);

-    if (event_notifier_test_and_clear(&s->e)) {
-        qemu_bh_schedule(s->completion_bh);
+    while (event_notifier_test_and_clear(&s->e)) {
+        struct io_event events[MAX_EVENTS];
+        struct timespec ts = { 0 };
+        int nevents, i;
+
+        do {
+            nevents = io_getevents(s->ctx, MAX_EVENTS, MAX_EVENTS, events, &ts);
+        } while (nevents == -EINTR);
+
+        for (i = 0; i < nevents; i++) {
+            struct iocb *iocb = events[i].obj;
+            struct qemu_laiocb *laiocb =
+                    container_of(iocb, struct qemu_laiocb, iocb);
+
+            laiocb->ret = io_event_ret(&events[i]);
+            qemu_laio_process_completion(s, laiocb);
+        }
    }
 }

-static void laio_cancel(BlockAIOCB *blockacb)
+static void laio_cancel(BlockDriverAIOCB *blockacb)
 {
    struct qemu_laiocb *laiocb = (struct qemu_laiocb *)blockacb;
    struct io_event event;
    int ret;

-    if (laiocb->ret != -EINPROGRESS) {
+    if (laiocb->ret != -EINPROGRESS)
        return;
-    }
+
+    /*
+     * Note that as of Linux 2.6.31 neither the block device code nor any
+     * filesystem implements cancellation of AIO request.
+     * Thus the polling loop below is the normal code path.
+     */
    ret = io_cancel(laiocb->ctx->ctx, &laiocb->iocb, &event);
-    laiocb->ret = -ECANCELED;
-    if (ret != 0) {
-        /* iocb is not cancelled, cb will be called by the event loop later */
+    if (ret == 0) {
+        laiocb->ret = -ECANCELED;
        return;
    }

-    laiocb->common.cb(laiocb->common.opaque, laiocb->ret);
+    /*
+     * We have to wait for the iocb to finish.
+     *
+     * The only way to get the iocb status update is by polling the io context.
+     * We might be able to do this slightly more optimal by removing the
+     * O_NONBLOCK flag.
+     */
+    while (laiocb->ret == -EINPROGRESS) {
+        qemu_laio_completion_cb(&laiocb->ctx->e);
+    }
 }

 static const AIOCBInfo laio_aiocb_info = {
    .aiocb_size         = sizeof(struct qemu_laiocb),
-    .cancel_async       = laio_cancel,
+    .cancel             = laio_cancel,
 };

 static void ioq_init(LaioQueue *io_q)
 {
-    QSIMPLEQ_INIT(&io_q->pending);
+    io_q->size = MAX_QUEUED_IO;
+    io_q->idx = 0;
    io_q->plugged = 0;
-    io_q->n = 0;
-    io_q->blocked = false;
 }

-static void ioq_submit(struct qemu_laio_state *s)
+static int ioq_submit(struct qemu_laio_state *s)
 {
-    int ret, len;
-    struct qemu_laiocb *aiocb;
-    struct iocb *iocbs[MAX_QUEUED_IO];
-    QSIMPLEQ_HEAD(, qemu_laiocb) completed;
+    int ret, i = 0;
+    int len = s->io_q.idx;

    do {
-        len = 0;
-        QSIMPLEQ_FOREACH(aiocb, &s->io_q.pending, next) {
-            iocbs[len++] = &aiocb->iocb;
-            if (len == MAX_QUEUED_IO) {
-                break;
-            }
-        }
+        ret = io_submit(s->ctx, len, s->io_q.iocbs);
+    } while (i++ < 3 && ret == -EAGAIN);

-        ret = io_submit(s->ctx, len, iocbs);
-        if (ret == -EAGAIN) {
-            break;
-        }
-        if (ret < 0) {
-            abort();
-        }
+    /* empty io queue */
+    s->io_q.idx = 0;

-        s->io_q.n -= ret;
-        aiocb = container_of(iocbs[ret - 1], struct qemu_laiocb, iocb);
-        QSIMPLEQ_SPLIT_AFTER(&s->io_q.pending, aiocb, next, &completed);
-    } while (ret == len && !QSIMPLEQ_EMPTY(&s->io_q.pending));
-    s->io_q.blocked = (s->io_q.n > 0);
+    if (ret < 0) {
+        i = 0;
+    } else {
+        i = ret;
+    }
+
+    for (; i < len; i++) {
+        struct qemu_laiocb *laiocb =
+            container_of(s->io_q.iocbs[i], struct qemu_laiocb, iocb);
+
+        laiocb->ret = (ret < 0) ? ret : -EIO;
+        qemu_laio_process_completion(s, laiocb);
+    }
+    return ret;
+}
+
+static void ioq_enqueue(struct qemu_laio_state *s, struct iocb *iocb)
+{
+    unsigned int idx = s->io_q.idx;
+
+    s->io_q.iocbs[idx++] = iocb;
+    s->io_q.idx = idx;
+
+    /* submit immediately if queue is full */
+    if (idx == s->io_q.size) {
+        ioq_submit(s);
+    }
 }

 void laio_io_plug(BlockDriverState *bs, void *aio_ctx)
@@ -222,24 +202,27 @@ void laio_io_plug(BlockDriverState *bs, void *aio_ctx)
    s->io_q.plugged++;
 }

-void laio_io_unplug(BlockDriverState *bs, void *aio_ctx, bool unplug)
+int laio_io_unplug(BlockDriverState *bs, void *aio_ctx, bool unplug)
 {
    struct qemu_laio_state *s = aio_ctx;
+    int ret = 0;

    assert(s->io_q.plugged > 0 || !unplug);

    if (unplug && --s->io_q.plugged > 0) {
-        return;
+        return 0;
    }

-    if (!s->io_q.blocked && !QSIMPLEQ_EMPTY(&s->io_q.pending)) {
-        ioq_submit(s);
+    if (s->io_q.idx > 0) {
+        ret = ioq_submit(s);
    }
+
+    return ret;
 }

-BlockAIOCB *laio_submit(BlockDriverState *bs, void *aio_ctx, int fd,
+BlockDriverAIOCB *laio_submit(BlockDriverState *bs, void *aio_ctx, int fd,
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockCompletionFunc *cb, void *opaque, int type)
+        BlockDriverCompletionFunc *cb, void *opaque, int type)
 {
    struct qemu_laio_state *s = aio_ctx;
    struct qemu_laiocb *laiocb;
@@ -270,16 +253,17 @@ BlockAIOCB *laio_submit(BlockDriverState *bs, void *aio_ctx, int fd,
    }
    io_set_eventfd(&laiocb->iocb, event_notifier_get_fd(&s->e));

-    QSIMPLEQ_INSERT_TAIL(&s->io_q.pending, laiocb, next);
-    s->io_q.n++;
-    if (!s->io_q.blocked &&
-        (!s->io_q.plugged || s->io_q.n >= MAX_QUEUED_IO)) {
-        ioq_submit(s);
+    if (!s->io_q.plugged) {
+        if (io_submit(s->ctx, 1, &iocbs) < 0) {
+            goto out_free_aiocb;
+        }
+    } else {
+        ioq_enqueue(s, iocbs);
    }
    return &laiocb->common;

 out_free_aiocb:
-    qemu_aio_unref(laiocb);
+    qemu_aio_release(laiocb);
    return NULL;
 }

@@ -288,14 +272,12 @@ void laio_detach_aio_context(void *s_, AioContext *old_context)
    struct qemu_laio_state *s = s_;

    aio_set_event_notifier(old_context, &s->e, NULL);
-    qemu_bh_delete(s->completion_bh);
 }

 void laio_attach_aio_context(void *s_, AioContext *new_context)
 {
    struct qemu_laio_state *s = s_;

-    s->completion_bh = aio_bh_new(new_context, qemu_laio_completion_bh, s);
    aio_set_event_notifier(new_context, &s->e, qemu_laio_completion_cb);
 }

--- a/block/mirror.c
+++ b/block/mirror.c
@@ -45,7 +45,6 @@ typedef struct MirrorBlockJob {
    int64_t sector_num;
    int64_t granularity;
    size_t buf_size;
-    int64_t bdev_length;
    unsigned long *cow_bitmap;
    BdrvDirtyBitmap *dirty_bitmap;
    HBitmapIter hbi;
@@ -55,7 +54,6 @@ typedef struct MirrorBlockJob {

    unsigned long *in_flight_bitmap;
    int in_flight;
-    int sectors_in_flight;
    int ret;
 } MirrorBlockJob;

@@ -89,7 +87,6 @@ static void mirror_iteration_done(MirrorOp *op, int ret)
    trace_mirror_iteration_done(s, op->sector_num, op->nb_sectors, ret);

    s->in_flight--;
-    s->sectors_in_flight -= op->nb_sectors;
    iov = op->qiov.iov;
    for (i = 0; i < op->qiov.niov; i++) {
        MirrorBuffer *buf = (MirrorBuffer *) iov[i].iov_base;
@@ -101,11 +98,8 @@ static void mirror_iteration_done(MirrorOp *op, int ret)
    chunk_num = op->sector_num / sectors_per_chunk;
    nb_chunks = op->nb_sectors / sectors_per_chunk;
    bitmap_clear(s->in_flight_bitmap, chunk_num, nb_chunks);
-    if (ret >= 0) {
-        if (s->cow_bitmap) {
-            bitmap_set(s->cow_bitmap, chunk_num, nb_chunks);
-        }
-        s->common.offset += (uint64_t)op->nb_sectors * BDRV_SECTOR_SIZE;
+    if (s->cow_bitmap && ret >= 0) {
+        bitmap_set(s->cow_bitmap, chunk_num, nb_chunks);
    }

    qemu_iovec_destroy(&op->qiov);
@@ -125,9 +119,10 @@ static void mirror_write_complete(void *opaque, int ret)
    MirrorOp *op = opaque;
    MirrorBlockJob *s = op->s;
    if (ret < 0) {
+        BlockDriverState *source = s->common.bs;
        BlockErrorAction action;

-        bdrv_set_dirty_bitmap(s->dirty_bitmap, op->sector_num, op->nb_sectors);
+        bdrv_set_dirty(source, op->sector_num, op->nb_sectors);
        action = mirror_error_action(s, false, -ret);
        if (action == BLOCK_ERROR_ACTION_REPORT && s->ret >= 0) {
            s->ret = ret;
@@ -141,9 +136,10 @@ static void mirror_read_complete(void *opaque, int ret)
    MirrorOp *op = opaque;
    MirrorBlockJob *s = op->s;
    if (ret < 0) {
+        BlockDriverState *source = s->common.bs;
        BlockErrorAction action;

-        bdrv_set_dirty_bitmap(s->dirty_bitmap, op->sector_num, op->nb_sectors);
+        bdrv_set_dirty(source, op->sector_num, op->nb_sectors);
        action = mirror_error_action(s, true, -ret);
        if (action == BLOCK_ERROR_ACTION_REPORT && s->ret >= 0) {
            s->ret = ret;
@@ -161,21 +157,22 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
    BlockDriverState *source = s->common.bs;
    int nb_sectors, sectors_per_chunk, nb_chunks;
    int64_t end, sector_num, next_chunk, next_sector, hbitmap_next_sector;
-    uint64_t delay_ns = 0;
+    uint64_t delay_ns;
    MirrorOp *op;

    s->sector_num = hbitmap_iter_next(&s->hbi);
    if (s->sector_num < 0) {
-        bdrv_dirty_iter_init(s->dirty_bitmap, &s->hbi);
+        bdrv_dirty_iter_init(source, s->dirty_bitmap, &s->hbi);
        s->sector_num = hbitmap_iter_next(&s->hbi);
-        trace_mirror_restart_iter(s, bdrv_get_dirty_count(s->dirty_bitmap));
+        trace_mirror_restart_iter(s,
+                                  bdrv_get_dirty_count(source, s->dirty_bitmap));
        assert(s->sector_num >= 0);
    }

    hbitmap_next_sector = s->sector_num;
    sector_num = s->sector_num;
    sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS;
-    end = s->bdev_length / BDRV_SECTOR_SIZE;
+    end = s->common.len >> BDRV_SECTOR_BITS;

    /* Extend the QEMUIOVector to include all adjacent blocks that will
     * be copied in this operation.
@@ -250,6 +247,8 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
        next_chunk += added_chunks;
        if (!s->synced && s->common.speed) {
            delay_ns = ratelimit_calculate_delay(&s->limit, added_sectors);
+        } else {
+            delay_ns = 0;
        }
    } while (delay_ns == 0 && next_sector < end);

@@ -283,11 +282,10 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
        next_sector += sectors_per_chunk;
    }

-    bdrv_reset_dirty_bitmap(s->dirty_bitmap, sector_num, nb_sectors);
+    bdrv_reset_dirty(source, sector_num, nb_sectors);

    /* Copy the dirty cluster.  */
    s->in_flight++;
-    s->sectors_in_flight += nb_sectors;
    trace_mirror_one_iteration(s, sector_num, nb_sectors);
    bdrv_aio_readv(source, sector_num, &op->qiov, nb_sectors,
                   mirror_read_complete, op);
@@ -318,62 +316,14 @@ static void mirror_drain(MirrorBlockJob *s)
    }
 }

-typedef struct {
-    int ret;
-} MirrorExitData;
-
-static void mirror_exit(BlockJob *job, void *opaque)
-{
-    MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
-    MirrorExitData *data = opaque;
-    AioContext *replace_aio_context = NULL;
-
-    if (s->to_replace) {
-        replace_aio_context = bdrv_get_aio_context(s->to_replace);
-        aio_context_acquire(replace_aio_context);
-    }
-
-    if (s->should_complete && data->ret == 0) {
-        BlockDriverState *to_replace = s->common.bs;
-        if (s->to_replace) {
-            to_replace = s->to_replace;
-        }
-        if (bdrv_get_flags(s->target) != bdrv_get_flags(to_replace)) {
-            bdrv_reopen(s->target, bdrv_get_flags(to_replace), NULL);
-        }
-        bdrv_swap(s->target, to_replace);
-        if (s->common.driver->job_type == BLOCK_JOB_TYPE_COMMIT) {
-            /* drop the bs loop chain formed by the swap: break the loop then
-             * trigger the unref from the top one */
-            BlockDriverState *p = s->base->backing_hd;
-            bdrv_set_backing_hd(s->base, NULL);
-            bdrv_unref(p);
-        }
-    }
-    if (s->to_replace) {
-        bdrv_op_unblock_all(s->to_replace, s->replace_blocker);
-        error_free(s->replace_blocker);
-        bdrv_unref(s->to_replace);
-    }
-    if (replace_aio_context) {
-        aio_context_release(replace_aio_context);
-    }
-    g_free(s->replaces);
-    bdrv_unref(s->target);
-    block_job_completed(&s->common, data->ret);
-    g_free(data);
-}
-
 static void coroutine_fn mirror_run(void *opaque)
 {
    MirrorBlockJob *s = opaque;
-    MirrorExitData *data;
    BlockDriverState *bs = s->common.bs;
    int64_t sector_num, end, sectors_per_chunk, length;
    uint64_t last_pause_ns;
    BlockDriverInfo bdi;
-    char backing_filename[2]; /* we only need 2 characters because we are only
-                                 checking for a NULL string */
+    char backing_filename[1024];
    int ret = 0;
    int n;

@@ -381,11 +331,11 @@ static void coroutine_fn mirror_run(void *opaque)
        goto immediate_exit;
    }

-    s->bdev_length = bdrv_getlength(bs);
-    if (s->bdev_length < 0) {
-        ret = s->bdev_length;
+    s->common.len = bdrv_getlength(bs);
+    if (s->common.len < 0) {
+        ret = s->common.len;
        goto immediate_exit;
-    } else if (s->bdev_length == 0) {
+    } else if (s->common.len == 0) {
        /* Report BLOCK_JOB_READY and wait for complete. */
        block_job_event_ready(&s->common);
        s->synced = true;
@@ -396,7 +346,7 @@ static void coroutine_fn mirror_run(void *opaque)
        goto immediate_exit;
    }

-    length = DIV_ROUND_UP(s->bdev_length, s->granularity);
+    length = DIV_ROUND_UP(s->common.len, s->granularity);
    s->in_flight_bitmap = bitmap_new(length);

    /* If we have no backing file yet in the destination, we cannot let
@@ -416,13 +366,8 @@ static void coroutine_fn mirror_run(void *opaque)
        }
    }

-    end = s->bdev_length / BDRV_SECTOR_SIZE;
-    s->buf = qemu_try_blockalign(bs, s->buf_size);
-    if (s->buf == NULL) {
-        ret = -ENOMEM;
-        goto immediate_exit;
-    }
-
+    end = s->common.len >> BDRV_SECTOR_BITS;
+    s->buf = qemu_blockalign(bs, s->buf_size);
    sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS;
    mirror_free_init(s);

@@ -440,7 +385,7 @@ static void coroutine_fn mirror_run(void *opaque)

            assert(n > 0);
            if (ret == 1) {
-                bdrv_set_dirty_bitmap(s->dirty_bitmap, sector_num, n);
+                bdrv_set_dirty(bs, sector_num, n);
                sector_num = next;
            } else {
                sector_num += n;
@@ -448,7 +393,7 @@ static void coroutine_fn mirror_run(void *opaque)
        }
    }

-    bdrv_dirty_iter_init(s->dirty_bitmap, &s->hbi);
+    bdrv_dirty_iter_init(bs, s->dirty_bitmap, &s->hbi);
    last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
    for (;;) {
        uint64_t delay_ns = 0;
@@ -460,16 +405,10 @@ static void coroutine_fn mirror_run(void *opaque)
            goto immediate_exit;
        }

-        cnt = bdrv_get_dirty_count(s->dirty_bitmap);
-        /* s->common.offset contains the number of bytes already processed so
-         * far, cnt is the number of dirty sectors remaining and
-         * s->sectors_in_flight is the number of sectors currently being
-         * processed; together those are the current total operation length */
-        s->common.len = s->common.offset +
-                        (cnt + s->sectors_in_flight) * BDRV_SECTOR_SIZE;
+        cnt = bdrv_get_dirty_count(bs, s->dirty_bitmap);

        /* Note that even when no rate limit is applied we need to yield
-         * periodically with no pending I/O so that bdrv_drain_all() returns.
+         * periodically with no pending I/O so that qemu_aio_flush() returns.
         * We do so every SLICE_TIME nanoseconds, or when there is an error,
         * or when the source is clean, whichever comes first.
         */
@@ -482,6 +421,9 @@ static void coroutine_fn mirror_run(void *opaque)
                continue;
            } else if (cnt != 0) {
                delay_ns = mirror_iteration(s);
+                if (delay_ns == 0) {
+                    continue;
+                }
            }
        }

@@ -500,6 +442,7 @@ static void coroutine_fn mirror_run(void *opaque)
                 * report completion.  This way, block-job-cancel will leave
                 * the target in a consistent state.
                 */
+                s->common.offset = end * BDRV_SECTOR_SIZE;
                if (!s->synced) {
                    block_job_event_ready(&s->common);
                    s->synced = true;
@@ -507,7 +450,7 @@ static void coroutine_fn mirror_run(void *opaque)

                should_complete = s->should_complete ||
                    block_job_is_cancelled(&s->common);
-                cnt = bdrv_get_dirty_count(s->dirty_bitmap);
+                cnt = bdrv_get_dirty_count(bs, s->dirty_bitmap);
            }
        }

@@ -521,13 +464,15 @@ static void coroutine_fn mirror_run(void *opaque)
             * mirror_populate runs.
             */
            trace_mirror_before_drain(s, cnt);
-            bdrv_drain(bs);
-            cnt = bdrv_get_dirty_count(s->dirty_bitmap);
+            bdrv_drain_all();
+            cnt = bdrv_get_dirty_count(bs, s->dirty_bitmap);
        }

        ret = 0;
        trace_mirror_before_sleep(s, cnt, s->synced, delay_ns);
        if (!s->synced) {
+            /* Publish progress */
+            s->common.offset = (end - cnt) * BDRV_SECTOR_SIZE;
            block_job_sleep_ns(&s->common, QEMU_CLOCK_REALTIME, delay_ns);
            if (block_job_is_cancelled(&s->common)) {
                break;
@@ -562,10 +507,31 @@ immediate_exit:
    g_free(s->in_flight_bitmap);
    bdrv_release_dirty_bitmap(bs, s->dirty_bitmap);
    bdrv_iostatus_disable(s->target);
-
-    data = g_malloc(sizeof(*data));
-    data->ret = ret;
-    block_job_defer_to_main_loop(&s->common, mirror_exit, data);
+    if (s->should_complete && ret == 0) {
+        BlockDriverState *to_replace = s->common.bs;
+        if (s->to_replace) {
+            to_replace = s->to_replace;
+        }
+        if (bdrv_get_flags(s->target) != bdrv_get_flags(to_replace)) {
+            bdrv_reopen(s->target, bdrv_get_flags(to_replace), NULL);
+        }
+        bdrv_swap(s->target, to_replace);
+        if (s->common.driver->job_type == BLOCK_JOB_TYPE_COMMIT) {
+            /* drop the bs loop chain formed by the swap: break the loop then
+             * trigger the unref from the top one */
+            BlockDriverState *p = s->base->backing_hd;
+            bdrv_set_backing_hd(s->base, NULL);
+            bdrv_unref(p);
+        }
+    }
+    if (s->to_replace) {
+        bdrv_op_unblock_all(s->to_replace, s->replace_blocker);
+        error_free(s->replace_blocker);
+        bdrv_unref(s->to_replace);
+    }
+    g_free(s->replaces);
+    bdrv_unref(s->target);
+    block_job_completed(&s->common, ret);
 }

 static void mirror_set_speed(BlockJob *job, int64_t speed, Error **errp)
@@ -598,34 +564,26 @@ static void mirror_complete(BlockJob *job, Error **errp)
        return;
    }
    if (!s->synced) {
-        error_set(errp, QERR_BLOCK_JOB_NOT_READY,
-                  bdrv_get_device_name(job->bs));
+        error_set(errp, QERR_BLOCK_JOB_NOT_READY, job->bs->device_name);
        return;
    }

    /* check the target bs is not blocked and block all operations on it */
    if (s->replaces) {
-        AioContext *replace_aio_context;
-
        s->to_replace = check_to_replace_node(s->replaces, &local_err);
        if (!s->to_replace) {
            error_propagate(errp, local_err);
            return;
        }

-        replace_aio_context = bdrv_get_aio_context(s->to_replace);
-        aio_context_acquire(replace_aio_context);
-
        error_setg(&s->replace_blocker,
                   "block device is in use by block-job-complete");
        bdrv_op_block_all(s->to_replace, s->replace_blocker);
        bdrv_ref(s->to_replace);
-
-        aio_context_release(replace_aio_context);
    }

    s->should_complete = true;
-    block_job_enter(&s->common);
+    block_job_resume(job);
 }

 static const BlockJobDriver mirror_job_driver = {
@@ -647,11 +605,11 @@ static const BlockJobDriver commit_active_job_driver = {

 static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
                             const char *replaces,
-                             int64_t speed, uint32_t granularity,
+                             int64_t speed, int64_t granularity,
                             int64_t buf_size,
                             BlockdevOnError on_source_error,
                             BlockdevOnError on_target_error,
-                             BlockCompletionFunc *cb,
+                             BlockDriverCompletionFunc *cb,
                             void *opaque, Error **errp,
                             const BlockJobDriver *driver,
                             bool is_none_mode, BlockDriverState *base)
@@ -659,7 +617,15 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
    MirrorBlockJob *s;

    if (granularity == 0) {
-        granularity = bdrv_get_default_bitmap_granularity(target);
+        /* Choose the default granularity based on the target file's cluster
+         * size, clamped between 4k and 64k.  */
+        BlockDriverInfo bdi;
+        if (bdrv_get_info(target, &bdi) >= 0 && bdi.cluster_size != 0) {
+            granularity = MAX(4096, bdi.cluster_size);
+            granularity = MIN(65536, granularity);
+        } else {
+            granularity = 65536;
+        }
    }

    assert ((granularity & (granularity - 1)) == 0);
@@ -686,7 +652,7 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
    s->granularity = granularity;
    s->buf_size = MAX(buf_size, granularity);

-    s->dirty_bitmap = bdrv_create_dirty_bitmap(bs, granularity, NULL, errp);
+    s->dirty_bitmap = bdrv_create_dirty_bitmap(bs, granularity, errp);
    if (!s->dirty_bitmap) {
        return;
    }
@@ -700,19 +666,15 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,

 void mirror_start(BlockDriverState *bs, BlockDriverState *target,
                  const char *replaces,
-                  int64_t speed, uint32_t granularity, int64_t buf_size,
+                  int64_t speed, int64_t granularity, int64_t buf_size,
                  MirrorSyncMode mode, BlockdevOnError on_source_error,
                  BlockdevOnError on_target_error,
-                  BlockCompletionFunc *cb,
+                  BlockDriverCompletionFunc *cb,
                  void *opaque, Error **errp)
 {
    bool is_none_mode;
    BlockDriverState *base;

-    if (mode == MIRROR_SYNC_MODE_DIRTY_BITMAP) {
-        error_setg(errp, "Sync mode 'dirty-bitmap' not supported");
-        return;
-    }
    is_none_mode = mode == MIRROR_SYNC_MODE_NONE;
    base = mode == MIRROR_SYNC_MODE_TOP ? bs->backing_hd : NULL;
    mirror_start_job(bs, target, replaces,
@@ -724,7 +686,7 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target,
 void commit_active_start(BlockDriverState *bs, BlockDriverState *base,
                         int64_t speed,
                         BlockdevOnError on_error,
-                         BlockCompletionFunc *cb,
+                         BlockDriverCompletionFunc *cb,
                         void *opaque, Error **errp)
 {
    int64_t length, base_length;
--- a/block/nbd-client.c
+++ b/block/nbd-client.c
@@ -43,23 +43,20 @@ static void nbd_recv_coroutines_enter_all(NbdClientSession *s)
    }
 }

-static void nbd_teardown_connection(BlockDriverState *bs)
+static void nbd_teardown_connection(NbdClientSession *client)
 {
-    NbdClientSession *client = nbd_get_client_session(bs);
-
    /* finish any pending coroutines */
    shutdown(client->sock, 2);
    nbd_recv_coroutines_enter_all(client);

-    nbd_client_detach_aio_context(bs);
+    nbd_client_session_detach_aio_context(client);
    closesocket(client->sock);
    client->sock = -1;
 }

 static void nbd_reply_ready(void *opaque)
 {
-    BlockDriverState *bs = opaque;
-    NbdClientSession *s = nbd_get_client_session(bs);
+    NbdClientSession *s = opaque;
    uint64_t i;
    int ret;

@@ -92,40 +89,28 @@ static void nbd_reply_ready(void *opaque)
    }

 fail:
-    nbd_teardown_connection(bs);
+    nbd_teardown_connection(s);
 }

 static void nbd_restart_write(void *opaque)
 {
-    BlockDriverState *bs = opaque;
+    NbdClientSession *s = opaque;

-    qemu_coroutine_enter(nbd_get_client_session(bs)->send_coroutine, NULL);
+    qemu_coroutine_enter(s->send_coroutine, NULL);
 }

-static int nbd_co_send_request(BlockDriverState *bs,
-                               struct nbd_request *request,
-                               QEMUIOVector *qiov, int offset)
+static int nbd_co_send_request(NbdClientSession *s,
+    struct nbd_request *request,
+    QEMUIOVector *qiov, int offset)
 {
-    NbdClientSession *s = nbd_get_client_session(bs);
    AioContext *aio_context;
-    int rc, ret, i;
+    int rc, ret;

    qemu_co_mutex_lock(&s->send_mutex);
-
-    for (i = 0; i < MAX_NBD_REQUESTS; i++) {
-        if (s->recv_coroutine[i] == NULL) {
-            s->recv_coroutine[i] = qemu_coroutine_self();
-            break;
-        }
-    }
-
-    assert(i < MAX_NBD_REQUESTS);
-    request->handle = INDEX_TO_HANDLE(s, i);
    s->send_coroutine = qemu_coroutine_self();
-    aio_context = bdrv_get_aio_context(bs);
-
+    aio_context = bdrv_get_aio_context(s->bs);
    aio_set_fd_handler(aio_context, s->sock,
-                       nbd_reply_ready, nbd_restart_write, bs);
+                       nbd_reply_ready, nbd_restart_write, s);
    if (qiov) {
        if (!s->is_unix) {
            socket_set_cork(s->sock, 1);
@@ -144,7 +129,7 @@ static int nbd_co_send_request(BlockDriverState *bs,
    } else {
        rc = nbd_send_request(s->sock, request);
    }
-    aio_set_fd_handler(aio_context, s->sock, nbd_reply_ready, NULL, bs);
+    aio_set_fd_handler(aio_context, s->sock, nbd_reply_ready, NULL, s);
    s->send_coroutine = NULL;
    qemu_co_mutex_unlock(&s->send_mutex);
    return rc;
@@ -179,6 +164,8 @@ static void nbd_co_receive_reply(NbdClientSession *s,
 static void nbd_coroutine_start(NbdClientSession *s,
   struct nbd_request *request)
 {
+    int i;
+
    /* Poor man semaphore.  The free_sema is locked when no other request
     * can be accepted, and unlocked after receiving one reply.  */
    if (s->in_flight >= MAX_NBD_REQUESTS - 1) {
@@ -187,7 +174,15 @@ static void nbd_coroutine_start(NbdClientSession *s,
    }
    s->in_flight++;

-    /* s->recv_coroutine[i] is set as soon as we get the send_lock.  */
+    for (i = 0; i < MAX_NBD_REQUESTS; i++) {
+        if (s->recv_coroutine[i] == NULL) {
+            s->recv_coroutine[i] = qemu_coroutine_self();
+            break;
+        }
+    }
+
+    assert(i < MAX_NBD_REQUESTS);
+    request->handle = INDEX_TO_HANDLE(s, i);
 }

 static void nbd_coroutine_end(NbdClientSession *s,
@@ -200,11 +195,10 @@ static void nbd_coroutine_end(NbdClientSession *s,
    }
 }

-static int nbd_co_readv_1(BlockDriverState *bs, int64_t sector_num,
+static int nbd_co_readv_1(NbdClientSession *client, int64_t sector_num,
                          int nb_sectors, QEMUIOVector *qiov,
                          int offset)
 {
-    NbdClientSession *client = nbd_get_client_session(bs);
    struct nbd_request request = { .type = NBD_CMD_READ };
    struct nbd_reply reply;
    ssize_t ret;
@@ -213,7 +207,7 @@ static int nbd_co_readv_1(BlockDriverState *bs, int64_t sector_num,
    request.len = nb_sectors * 512;

    nbd_coroutine_start(client, &request);
-    ret = nbd_co_send_request(bs, &request, NULL, 0);
+    ret = nbd_co_send_request(client, &request, NULL, 0);
    if (ret < 0) {
        reply.error = -ret;
    } else {
@@ -224,16 +218,15 @@ static int nbd_co_readv_1(BlockDriverState *bs, int64_t sector_num,

 }

-static int nbd_co_writev_1(BlockDriverState *bs, int64_t sector_num,
+static int nbd_co_writev_1(NbdClientSession *client, int64_t sector_num,
                           int nb_sectors, QEMUIOVector *qiov,
                           int offset)
 {
-    NbdClientSession *client = nbd_get_client_session(bs);
    struct nbd_request request = { .type = NBD_CMD_WRITE };
    struct nbd_reply reply;
    ssize_t ret;

-    if (!bdrv_enable_write_cache(bs) &&
+    if (!bdrv_enable_write_cache(client->bs) &&
        (client->nbdflags & NBD_FLAG_SEND_FUA)) {
        request.type |= NBD_CMD_FLAG_FUA;
    }
@@ -242,7 +235,7 @@ static int nbd_co_writev_1(BlockDriverState *bs, int64_t sector_num,
    request.len = nb_sectors * 512;

    nbd_coroutine_start(client, &request);
-    ret = nbd_co_send_request(bs, &request, qiov, offset);
+    ret = nbd_co_send_request(client, &request, qiov, offset);
    if (ret < 0) {
        reply.error = -ret;
    } else {
@@ -256,13 +249,14 @@ static int nbd_co_writev_1(BlockDriverState *bs, int64_t sector_num,
 * remain aligned to 4K. */
 #define NBD_MAX_SECTORS 2040

-int nbd_client_co_readv(BlockDriverState *bs, int64_t sector_num,
-                        int nb_sectors, QEMUIOVector *qiov)
+int nbd_client_session_co_readv(NbdClientSession *client, int64_t sector_num,
+    int nb_sectors, QEMUIOVector *qiov)
 {
    int offset = 0;
    int ret;
    while (nb_sectors > NBD_MAX_SECTORS) {
-        ret = nbd_co_readv_1(bs, sector_num, NBD_MAX_SECTORS, qiov, offset);
+        ret = nbd_co_readv_1(client, sector_num,
+                             NBD_MAX_SECTORS, qiov, offset);
        if (ret < 0) {
            return ret;
        }
@@ -270,16 +264,17 @@ int nbd_client_co_readv(BlockDriverState *bs, int64_t sector_num,
        sector_num += NBD_MAX_SECTORS;
        nb_sectors -= NBD_MAX_SECTORS;
    }
-    return nbd_co_readv_1(bs, sector_num, nb_sectors, qiov, offset);
+    return nbd_co_readv_1(client, sector_num, nb_sectors, qiov, offset);
 }

-int nbd_client_co_writev(BlockDriverState *bs, int64_t sector_num,
-                         int nb_sectors, QEMUIOVector *qiov)
+int nbd_client_session_co_writev(NbdClientSession *client, int64_t sector_num,
+                                 int nb_sectors, QEMUIOVector *qiov)
 {
    int offset = 0;
    int ret;
    while (nb_sectors > NBD_MAX_SECTORS) {
-        ret = nbd_co_writev_1(bs, sector_num, NBD_MAX_SECTORS, qiov, offset);
+        ret = nbd_co_writev_1(client, sector_num,
+                              NBD_MAX_SECTORS, qiov, offset);
        if (ret < 0) {
            return ret;
        }
@@ -287,12 +282,11 @@ int nbd_client_co_writev(BlockDriverState *bs, int64_t sector_num,
        sector_num += NBD_MAX_SECTORS;
        nb_sectors -= NBD_MAX_SECTORS;
    }
-    return nbd_co_writev_1(bs, sector_num, nb_sectors, qiov, offset);
+    return nbd_co_writev_1(client, sector_num, nb_sectors, qiov, offset);
 }

-int nbd_client_co_flush(BlockDriverState *bs)
+int nbd_client_session_co_flush(NbdClientSession *client)
 {
-    NbdClientSession *client = nbd_get_client_session(bs);
    struct nbd_request request = { .type = NBD_CMD_FLUSH };
    struct nbd_reply reply;
    ssize_t ret;
@@ -309,7 +303,7 @@ int nbd_client_co_flush(BlockDriverState *bs)
    request.len = 0;

    nbd_coroutine_start(client, &request);
-    ret = nbd_co_send_request(bs, &request, NULL, 0);
+    ret = nbd_co_send_request(client, &request, NULL, 0);
    if (ret < 0) {
        reply.error = -ret;
    } else {
@@ -319,10 +313,9 @@ int nbd_client_co_flush(BlockDriverState *bs)
    return -reply.error;
 }

-int nbd_client_co_discard(BlockDriverState *bs, int64_t sector_num,
-                          int nb_sectors)
+int nbd_client_session_co_discard(NbdClientSession *client, int64_t sector_num,
+    int nb_sectors)
 {
-    NbdClientSession *client = nbd_get_client_session(bs);
    struct nbd_request request = { .type = NBD_CMD_TRIM };
    struct nbd_reply reply;
    ssize_t ret;
@@ -334,7 +327,7 @@ int nbd_client_co_discard(BlockDriverState *bs, int64_t sector_num,
    request.len = nb_sectors * 512;

    nbd_coroutine_start(client, &request);
-    ret = nbd_co_send_request(bs, &request, NULL, 0);
+    ret = nbd_co_send_request(client, &request, NULL, 0);
    if (ret < 0) {
        reply.error = -ret;
    } else {
@@ -345,48 +338,51 @@ int nbd_client_co_discard(BlockDriverState *bs, int64_t sector_num,

 }

-void nbd_client_detach_aio_context(BlockDriverState *bs)
+void nbd_client_session_detach_aio_context(NbdClientSession *client)
 {
-    aio_set_fd_handler(bdrv_get_aio_context(bs),
-                       nbd_get_client_session(bs)->sock, NULL, NULL, NULL);
+    aio_set_fd_handler(bdrv_get_aio_context(client->bs), client->sock,
+                       NULL, NULL, NULL);
 }

-void nbd_client_attach_aio_context(BlockDriverState *bs,
-                                   AioContext *new_context)
+void nbd_client_session_attach_aio_context(NbdClientSession *client,
+                                           AioContext *new_context)
 {
-    aio_set_fd_handler(new_context, nbd_get_client_session(bs)->sock,
-                       nbd_reply_ready, NULL, bs);
+    aio_set_fd_handler(new_context, client->sock,
+                       nbd_reply_ready, NULL, client);
 }

-void nbd_client_close(BlockDriverState *bs)
+void nbd_client_session_close(NbdClientSession *client)
 {
-    NbdClientSession *client = nbd_get_client_session(bs);
    struct nbd_request request = {
        .type = NBD_CMD_DISC,
        .from = 0,
        .len = 0
    };

+    if (!client->bs) {
+        return;
+    }
    if (client->sock == -1) {
        return;
    }

    nbd_send_request(client->sock, &request);

-    nbd_teardown_connection(bs);
+    nbd_teardown_connection(client);
+    client->bs = NULL;
 }

-int nbd_client_init(BlockDriverState *bs, int sock, const char *export,
-                    Error **errp)
+int nbd_client_session_init(NbdClientSession *client, BlockDriverState *bs,
+    int sock, const char *export)
 {
-    NbdClientSession *client = nbd_get_client_session(bs);
    int ret;

    /* NBD handshake */
    logout("session init %s\n", export);
    qemu_set_block(sock);
    ret = nbd_receive_negotiate(sock, export,
-                                &client->nbdflags, &client->size, errp);
+                                &client->nbdflags, &client->size,
+                                &client->blocksize);
    if (ret < 0) {
        logout("Failed to negotiate with the NBD server\n");
        closesocket(sock);
@@ -395,12 +391,13 @@ int nbd_client_init(BlockDriverState *bs, int sock, const char *export,

    qemu_co_mutex_init(&client->send_mutex);
    qemu_co_mutex_init(&client->free_sema);
+    client->bs = bs;
    client->sock = sock;

    /* Now that we're connected, set the socket to be non-blocking and
     * kick the reply mechanism.  */
    qemu_set_nonblock(sock);
-    nbd_client_attach_aio_context(bs, bdrv_get_aio_context(bs));
+    nbd_client_session_attach_aio_context(client, bdrv_get_aio_context(bs));

    logout("Established connection with NBD server\n");
    return 0;
--- a/block/nbd-client.h
+++ b/block/nbd-client.h
@@ -20,6 +20,7 @@ typedef struct NbdClientSession {
    int sock;
    uint32_t nbdflags;
    off_t size;
+    size_t blocksize;

    CoMutex send_mutex;
    CoMutex free_sema;
@@ -30,24 +31,24 @@ typedef struct NbdClientSession {
    struct nbd_reply reply;

    bool is_unix;
+
+    BlockDriverState *bs;
 } NbdClientSession;

-NbdClientSession *nbd_get_client_session(BlockDriverState *bs);
+int nbd_client_session_init(NbdClientSession *client, BlockDriverState *bs,
+                            int sock, const char *export_name);
+void nbd_client_session_close(NbdClientSession *client);

-int nbd_client_init(BlockDriverState *bs, int sock, const char *export_name,
-                    Error **errp);
-void nbd_client_close(BlockDriverState *bs);
+int nbd_client_session_co_discard(NbdClientSession *client, int64_t sector_num,
+                                  int nb_sectors);
+int nbd_client_session_co_flush(NbdClientSession *client);
+int nbd_client_session_co_writev(NbdClientSession *client, int64_t sector_num,
+                                 int nb_sectors, QEMUIOVector *qiov);
+int nbd_client_session_co_readv(NbdClientSession *client, int64_t sector_num,
+                                int nb_sectors, QEMUIOVector *qiov);

-int nbd_client_co_discard(BlockDriverState *bs, int64_t sector_num,
-                          int nb_sectors);
-int nbd_client_co_flush(BlockDriverState *bs);
-int nbd_client_co_writev(BlockDriverState *bs, int64_t sector_num,
-                         int nb_sectors, QEMUIOVector *qiov);
-int nbd_client_co_readv(BlockDriverState *bs, int64_t sector_num,
-                        int nb_sectors, QEMUIOVector *qiov);
-
-void nbd_client_detach_aio_context(BlockDriverState *bs);
-void nbd_client_attach_aio_context(BlockDriverState *bs,
-                                   AioContext *new_context);
+void nbd_client_session_detach_aio_context(NbdClientSession *client);
+void nbd_client_session_attach_aio_context(NbdClientSession *client,
+                                           AioContext *new_context);

 #endif /* NBD_CLIENT_H */
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -31,10 +31,8 @@
 #include "block/block_int.h"
 #include "qemu/module.h"
 #include "qemu/sockets.h"
-#include "qapi/qmp/qdict.h"
 #include "qapi/qmp/qjson.h"
 #include "qapi/qmp/qint.h"
-#include "qapi/qmp/qstring.h"

 #include <sys/types.h>
 #include <unistd.h>
@@ -215,8 +213,7 @@ static void nbd_config(BDRVNBDState *s, QDict *options, char **export,
    }

    if (!qemu_opt_get(s->socket_opts, "port")) {
-        qemu_opt_set_number(s->socket_opts, "port", NBD_DEFAULT_PORT,
-                            &error_abort);
+        qemu_opt_set_number(s->socket_opts, "port", NBD_DEFAULT_PORT);
    }

    *export = g_strdup(qdict_get_try_str(options, "export"));
@@ -225,12 +222,6 @@ static void nbd_config(BDRVNBDState *s, QDict *options, char **export,
    }
 }

-NbdClientSession *nbd_get_client_session(BlockDriverState *bs)
-{
-    BDRVNBDState *s = bs->opaque;
-    return &s->client;
-}
-
 static int nbd_establish_connection(BlockDriverState *bs, Error **errp)
 {
    BDRVNBDState *s = bs->opaque;
@@ -248,7 +239,7 @@ static int nbd_establish_connection(BlockDriverState *bs, Error **errp)
    /* Failed to establish connection */
    if (sock < 0) {
        logout("Failed to establish connection to NBD server\n");
-        return -EIO;
+        return -errno;
    }

    return sock;
@@ -274,12 +265,11 @@ static int nbd_open(BlockDriverState *bs, QDict *options, int flags,
     */
    sock = nbd_establish_connection(bs, errp);
    if (sock < 0) {
-        g_free(export);
        return sock;
    }

    /* NBD handshake */
-    result = nbd_client_init(bs, sock, export, errp);
+    result = nbd_client_session_init(&s->client, bs, sock, export);
    g_free(export);
    return result;
 }
@@ -287,30 +277,35 @@ static int nbd_open(BlockDriverState *bs, QDict *options, int flags,
 static int nbd_co_readv(BlockDriverState *bs, int64_t sector_num,
                        int nb_sectors, QEMUIOVector *qiov)
 {
-    return nbd_client_co_readv(bs, sector_num, nb_sectors, qiov);
+    BDRVNBDState *s = bs->opaque;
+
+    return nbd_client_session_co_readv(&s->client, sector_num,
+                                       nb_sectors, qiov);
 }

 static int nbd_co_writev(BlockDriverState *bs, int64_t sector_num,
                         int nb_sectors, QEMUIOVector *qiov)
 {
-    return nbd_client_co_writev(bs, sector_num, nb_sectors, qiov);
+    BDRVNBDState *s = bs->opaque;
+
+    return nbd_client_session_co_writev(&s->client, sector_num,
+                                        nb_sectors, qiov);
 }

 static int nbd_co_flush(BlockDriverState *bs)
 {
-    return nbd_client_co_flush(bs);
-}
+    BDRVNBDState *s = bs->opaque;

-static void nbd_refresh_limits(BlockDriverState *bs, Error **errp)
-{
-    bs->bl.max_discard = UINT32_MAX >> BDRV_SECTOR_BITS;
-    bs->bl.max_transfer_length = UINT32_MAX >> BDRV_SECTOR_BITS;
+    return nbd_client_session_co_flush(&s->client);
 }

 static int nbd_co_discard(BlockDriverState *bs, int64_t sector_num,
                          int nb_sectors)
 {
-    return nbd_client_co_discard(bs, sector_num, nb_sectors);
+    BDRVNBDState *s = bs->opaque;
+
+    return nbd_client_session_co_discard(&s->client, sector_num,
+                                         nb_sectors);
 }

 static void nbd_close(BlockDriverState *bs)
@@ -318,7 +313,7 @@ static void nbd_close(BlockDriverState *bs)
    BDRVNBDState *s = bs->opaque;

    qemu_opts_del(s->socket_opts);
-    nbd_client_close(bs);
+    nbd_client_session_close(&s->client);
 }

 static int64_t nbd_getlength(BlockDriverState *bs)
@@ -330,58 +325,17 @@ static int64_t nbd_getlength(BlockDriverState *bs)

 static void nbd_detach_aio_context(BlockDriverState *bs)
 {
-    nbd_client_detach_aio_context(bs);
+    BDRVNBDState *s = bs->opaque;
+
+    nbd_client_session_detach_aio_context(&s->client);
 }

 static void nbd_attach_aio_context(BlockDriverState *bs,
                                   AioContext *new_context)
 {
-    nbd_client_attach_aio_context(bs, new_context);
-}
+    BDRVNBDState *s = bs->opaque;

-static void nbd_refresh_filename(BlockDriverState *bs)
-{
-    QDict *opts = qdict_new();
-    const char *path   = qdict_get_try_str(bs->options, "path");
-    const char *host   = qdict_get_try_str(bs->options, "host");
-    const char *port   = qdict_get_try_str(bs->options, "port");
-    const char *export = qdict_get_try_str(bs->options, "export");
-
-    qdict_put_obj(opts, "driver", QOBJECT(qstring_from_str("nbd")));
-
-    if (path && export) {
-        snprintf(bs->exact_filename, sizeof(bs->exact_filename),
-                 "nbd+unix:///%s?socket=%s", export, path);
-    } else if (path && !export) {
-        snprintf(bs->exact_filename, sizeof(bs->exact_filename),
-                 "nbd+unix://?socket=%s", path);
-    } else if (!path && export && port) {
-        snprintf(bs->exact_filename, sizeof(bs->exact_filename),
-                 "nbd://%s:%s/%s", host, port, export);
-    } else if (!path && export && !port) {
-        snprintf(bs->exact_filename, sizeof(bs->exact_filename),
-                 "nbd://%s/%s", host, export);
-    } else if (!path && !export && port) {
-        snprintf(bs->exact_filename, sizeof(bs->exact_filename),
-                 "nbd://%s:%s", host, port);
-    } else if (!path && !export && !port) {
-        snprintf(bs->exact_filename, sizeof(bs->exact_filename),
-                 "nbd://%s", host);
-    }
-
-    if (path) {
-        qdict_put_obj(opts, "path", QOBJECT(qstring_from_str(path)));
-    } else if (port) {
-        qdict_put_obj(opts, "host", QOBJECT(qstring_from_str(host)));
-        qdict_put_obj(opts, "port", QOBJECT(qstring_from_str(port)));
-    } else {
-        qdict_put_obj(opts, "host", QOBJECT(qstring_from_str(host)));
-    }
-    if (export) {
-        qdict_put_obj(opts, "export", QOBJECT(qstring_from_str(export)));
-    }
-
-    bs->full_open_options = opts;
+    nbd_client_session_attach_aio_context(&s->client, new_context);
 }

 static BlockDriver bdrv_nbd = {
@@ -395,11 +349,9 @@ static BlockDriver bdrv_nbd = {
    .bdrv_close                 = nbd_close,
    .bdrv_co_flush_to_os        = nbd_co_flush,
    .bdrv_co_discard            = nbd_co_discard,
-    .bdrv_refresh_limits        = nbd_refresh_limits,
    .bdrv_getlength             = nbd_getlength,
    .bdrv_detach_aio_context    = nbd_detach_aio_context,
    .bdrv_attach_aio_context    = nbd_attach_aio_context,
-    .bdrv_refresh_filename      = nbd_refresh_filename,
 };

 static BlockDriver bdrv_nbd_tcp = {
@@ -413,11 +365,9 @@ static BlockDriver bdrv_nbd_tcp = {
    .bdrv_close                 = nbd_close,
    .bdrv_co_flush_to_os        = nbd_co_flush,
    .bdrv_co_discard            = nbd_co_discard,
-    .bdrv_refresh_limits        = nbd_refresh_limits,
    .bdrv_getlength             = nbd_getlength,
    .bdrv_detach_aio_context    = nbd_detach_aio_context,
    .bdrv_attach_aio_context    = nbd_attach_aio_context,
-    .bdrv_refresh_filename      = nbd_refresh_filename,
 };

 static BlockDriver bdrv_nbd_unix = {
@@ -431,11 +381,9 @@ static BlockDriver bdrv_nbd_unix = {
    .bdrv_close                 = nbd_close,
    .bdrv_co_flush_to_os        = nbd_co_flush,
    .bdrv_co_discard            = nbd_co_discard,
-    .bdrv_refresh_limits        = nbd_refresh_limits,
    .bdrv_getlength             = nbd_getlength,
    .bdrv_detach_aio_context    = nbd_detach_aio_context,
    .bdrv_attach_aio_context    = nbd_attach_aio_context,
-    .bdrv_refresh_filename      = nbd_refresh_filename,
 };

 static void bdrv_nbd_init(void)
--- a/block/nfs.c
+++ b/block/nfs.c
@@ -172,11 +172,7 @@ static int coroutine_fn nfs_co_writev(BlockDriverState *bs,

    nfs_co_init_task(client, &task);

-    buf = g_try_malloc(nb_sectors * BDRV_SECTOR_SIZE);
-    if (nb_sectors && buf == NULL) {
-        return -ENOMEM;
-    }
-
+    buf = g_malloc(nb_sectors * BDRV_SECTOR_SIZE);
    qemu_iovec_to_buf(iov, 0, buf, nb_sectors * BDRV_SECTOR_SIZE);

    if (nfs_pwrite_async(client->context, client->fh,
@@ -393,20 +389,16 @@ static int nfs_file_open(BlockDriverState *bs, QDict *options, int flags,
    qemu_opts_absorb_qdict(opts, options, &local_err);
    if (local_err) {
        error_propagate(errp, local_err);
-        ret = -EINVAL;
-        goto out;
+        return -EINVAL;
    }
    ret = nfs_client_open(client, qemu_opt_get(opts, "filename"),
                          (flags & BDRV_O_RDWR) ? O_RDWR : O_RDONLY,
                          errp);
    if (ret < 0) {
-        goto out;
+        return ret;
    }
    bs->total_sectors = ret;
-    ret = 0;
-out:
-    qemu_opts_del(opts);
-    return ret;
+    return 0;
 }

 static QemuOptsList nfs_create_opts = {
@@ -426,13 +418,12 @@ static int nfs_file_create(const char *url, QemuOpts *opts, Error **errp)
 {
    int ret = 0;
    int64_t total_size = 0;
-    NFSClient *client = g_new0(NFSClient, 1);
+    NFSClient *client = g_malloc0(sizeof(NFSClient));

    client->aio_context = qemu_get_aio_context();

    /* Read out options */
-    total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
-                          BDRV_SECTOR_SIZE);
+    total_size = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0);

    ret = nfs_client_open(client, url, O_CREAT, errp);
    if (ret < 0) {
--- a/block/null.c
+++ b/block/null.c
@@ -1,222 +0,0 @@
-/*
- * Null block driver
- *
- * Authors:
- *  Fam Zheng <famz@redhat.com>
- *
- * Copyright (C) 2014 Red Hat, Inc.
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- */
-
-#include "block/block_int.h"
-
-#define NULL_OPT_LATENCY "latency-ns"
-
-typedef struct {
-    int64_t length;
-    int64_t latency_ns;
-} BDRVNullState;
-
-static QemuOptsList runtime_opts = {
-    .name = "null",
-    .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
-    .desc = {
-        {
-            .name = "filename",
-            .type = QEMU_OPT_STRING,
-            .help = "",
-        },
-        {
-            .name = BLOCK_OPT_SIZE,
-            .type = QEMU_OPT_SIZE,
-            .help = "size of the null block",
-        },
-        {
-            .name = NULL_OPT_LATENCY,
-            .type = QEMU_OPT_NUMBER,
-            .help = "nanoseconds (approximated) to wait "
-                    "before completing request",
-        },
-        { /* end of list */ }
-    },
-};
-
-static int null_file_open(BlockDriverState *bs, QDict *options, int flags,
-                          Error **errp)
-{
-    QemuOpts *opts;
-    BDRVNullState *s = bs->opaque;
-    int ret = 0;
-
-    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
-    qemu_opts_absorb_qdict(opts, options, &error_abort);
-    s->length =
-        qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 1 << 30);
-    s->latency_ns =
-        qemu_opt_get_number(opts, NULL_OPT_LATENCY, 0);
-    if (s->latency_ns < 0) {
-        error_setg(errp, "latency-ns is invalid");
-        ret = -EINVAL;
-    }
-    qemu_opts_del(opts);
-    return ret;
-}
-
-static void null_close(BlockDriverState *bs)
-{
-}
-
-static int64_t null_getlength(BlockDriverState *bs)
-{
-    BDRVNullState *s = bs->opaque;
-    return s->length;
-}
-
-static coroutine_fn int null_co_common(BlockDriverState *bs)
-{
-    BDRVNullState *s = bs->opaque;
-
-    if (s->latency_ns) {
-        co_aio_sleep_ns(bdrv_get_aio_context(bs), QEMU_CLOCK_REALTIME,
-                        s->latency_ns);
-    }
-    return 0;
-}
-
-static coroutine_fn int null_co_readv(BlockDriverState *bs,
-                                      int64_t sector_num, int nb_sectors,
-                                      QEMUIOVector *qiov)
-{
-    return null_co_common(bs);
-}
-
-static coroutine_fn int null_co_writev(BlockDriverState *bs,
-                                       int64_t sector_num, int nb_sectors,
-                                       QEMUIOVector *qiov)
-{
-    return null_co_common(bs);
-}
-
-static coroutine_fn int null_co_flush(BlockDriverState *bs)
-{
-    return null_co_common(bs);
-}
-
-typedef struct {
-    BlockAIOCB common;
-    QEMUBH *bh;
-    QEMUTimer timer;
-} NullAIOCB;
-
-static const AIOCBInfo null_aiocb_info = {
-    .aiocb_size = sizeof(NullAIOCB),
-};
-
-static void null_bh_cb(void *opaque)
-{
-    NullAIOCB *acb = opaque;
-    acb->common.cb(acb->common.opaque, 0);
-    qemu_bh_delete(acb->bh);
-    qemu_aio_unref(acb);
-}
-
-static void null_timer_cb(void *opaque)
-{
-    NullAIOCB *acb = opaque;
-    acb->common.cb(acb->common.opaque, 0);
-    timer_deinit(&acb->timer);
-    qemu_aio_unref(acb);
-}
-
-static inline BlockAIOCB *null_aio_common(BlockDriverState *bs,
-                                          BlockCompletionFunc *cb,
-                                          void *opaque)
-{
-    NullAIOCB *acb;
-    BDRVNullState *s = bs->opaque;
-
-    acb = qemu_aio_get(&null_aiocb_info, bs, cb, opaque);
-    /* Only emulate latency after vcpu is running. */
-    if (s->latency_ns) {
-        aio_timer_init(bdrv_get_aio_context(bs), &acb->timer,
-                       QEMU_CLOCK_REALTIME, SCALE_NS,
-                       null_timer_cb, acb);
-        timer_mod_ns(&acb->timer,
-                     qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + s->latency_ns);
-    } else {
-        acb->bh = aio_bh_new(bdrv_get_aio_context(bs), null_bh_cb, acb);
-        qemu_bh_schedule(acb->bh);
-    }
-    return &acb->common;
-}
-
-static BlockAIOCB *null_aio_readv(BlockDriverState *bs,
-                                  int64_t sector_num, QEMUIOVector *qiov,
-                                  int nb_sectors,
-                                  BlockCompletionFunc *cb,
-                                  void *opaque)
-{
-    return null_aio_common(bs, cb, opaque);
-}
-
-static BlockAIOCB *null_aio_writev(BlockDriverState *bs,
-                                   int64_t sector_num, QEMUIOVector *qiov,
-                                   int nb_sectors,
-                                   BlockCompletionFunc *cb,
-                                   void *opaque)
-{
-    return null_aio_common(bs, cb, opaque);
-}
-
-static BlockAIOCB *null_aio_flush(BlockDriverState *bs,
-                                  BlockCompletionFunc *cb,
-                                  void *opaque)
-{
-    return null_aio_common(bs, cb, opaque);
-}
-
-static int null_reopen_prepare(BDRVReopenState *reopen_state,
-                               BlockReopenQueue *queue, Error **errp)
-{
-    return 0;
-}
-
-static BlockDriver bdrv_null_co = {
-    .format_name            = "null-co",
-    .protocol_name          = "null-co",
-    .instance_size          = sizeof(BDRVNullState),
-
-    .bdrv_file_open         = null_file_open,
-    .bdrv_close             = null_close,
-    .bdrv_getlength         = null_getlength,
-
-    .bdrv_co_readv          = null_co_readv,
-    .bdrv_co_writev         = null_co_writev,
-    .bdrv_co_flush_to_disk  = null_co_flush,
-    .bdrv_reopen_prepare    = null_reopen_prepare,
-};
-
-static BlockDriver bdrv_null_aio = {
-    .format_name            = "null-aio",
-    .protocol_name          = "null-aio",
-    .instance_size          = sizeof(BDRVNullState),
-
-    .bdrv_file_open         = null_file_open,
-    .bdrv_close             = null_close,
-    .bdrv_getlength         = null_getlength,
-
-    .bdrv_aio_readv         = null_aio_readv,
-    .bdrv_aio_writev        = null_aio_writev,
-    .bdrv_aio_flush         = null_aio_flush,
-    .bdrv_reopen_prepare    = null_reopen_prepare,
-};
-
-static void bdrv_null_init(void)
-{
-    bdrv_register(&bdrv_null_co);
-    bdrv_register(&bdrv_null_aio);
-}
-
-block_init(bdrv_null_init);
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -2,12 +2,8 @@
 * Block driver for Parallels disk image format
 *
 * Copyright (c) 2007 Alex Beregszaszi
- * Copyright (c) 2015 Denis V. Lunev <den@openvz.org>
 *
- * This code was originally based on comparing different disk images created
- * by Parallels. Currently it is based on opened OpenVZ sources
- * available at
- *     http://git.openvz.org/?p=ploop;a=summary
+ * This code is based on comparing different disk images created by Parallels.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
@@ -30,558 +26,71 @@
 #include "qemu-common.h"
 #include "block/block_int.h"
 #include "qemu/module.h"
-#include "qemu/bitmap.h"
-#include "qapi/util.h"

 /**************************************************************/

 #define HEADER_MAGIC "WithoutFreeSpace"
-#define HEADER_MAGIC2 "WithouFreSpacExt"
 #define HEADER_VERSION 2
-#define HEADER_INUSE_MAGIC  (0x746F6E59)
-
-#define DEFAULT_CLUSTER_SIZE 1048576        /* 1 MiB */
-
+#define HEADER_SIZE 64

 // always little-endian
-typedef struct ParallelsHeader {
+struct parallels_header {
    char magic[16]; // "WithoutFreeSpace"
    uint32_t version;
    uint32_t heads;
    uint32_t cylinders;
    uint32_t tracks;
-    uint32_t bat_entries;
-    uint64_t nb_sectors;
-    uint32_t inuse;
-    uint32_t data_off;
-    char padding[12];
-} QEMU_PACKED ParallelsHeader;
-
-
-typedef enum ParallelsPreallocMode {
-    PRL_PREALLOC_MODE_FALLOCATE = 0,
-    PRL_PREALLOC_MODE_TRUNCATE = 1,
-    PRL_PREALLOC_MODE_MAX = 2,
-} ParallelsPreallocMode;
-
-static const char *prealloc_mode_lookup[] = {
-    "falloc",
-    "truncate",
-    NULL,
-};
-
+    uint32_t catalog_entries;
+    uint32_t nb_sectors;
+    char padding[24];
+} QEMU_PACKED;

 typedef struct BDRVParallelsState {
-    /** Locking is conservative, the lock protects
-     *   - image file extending (truncate, fallocate)
-     *   - any access to block allocation table
-     */
    CoMutex lock;

-    ParallelsHeader *header;
-    uint32_t header_size;
-    bool header_unclean;
-
-    unsigned long *bat_dirty_bmap;
-    unsigned int  bat_dirty_block;
-
-    uint32_t *bat_bitmap;
-    unsigned int bat_size;
-
-    int64_t  data_end;
-    uint64_t prealloc_size;
-    ParallelsPreallocMode prealloc_mode;
+    uint32_t *catalog_bitmap;
+    unsigned int catalog_size;

    unsigned int tracks;
-
-    unsigned int off_multiplier;
 } BDRVParallelsState;

-
-#define PARALLELS_OPT_PREALLOC_MODE     "prealloc-mode"
-#define PARALLELS_OPT_PREALLOC_SIZE     "prealloc-size"
-
-static QemuOptsList parallels_runtime_opts = {
-    .name = "parallels",
-    .head = QTAILQ_HEAD_INITIALIZER(parallels_runtime_opts.head),
-    .desc = {
-        {
-            .name = PARALLELS_OPT_PREALLOC_SIZE,
-            .type = QEMU_OPT_SIZE,
-            .help = "Preallocation size on image expansion",
-            .def_value_str = "128MiB",
-        },
-        {
-            .name = PARALLELS_OPT_PREALLOC_MODE,
-            .type = QEMU_OPT_STRING,
-            .help = "Preallocation mode on image expansion "
-                    "(allowed values: falloc, truncate)",
-            .def_value_str = "falloc",
-        },
-        { /* end of list */ },
-    },
-};
-
-
-static int64_t bat2sect(BDRVParallelsState *s, uint32_t idx)
+static int parallels_probe(const uint8_t *buf, int buf_size, const char *filename)
 {
-    return (uint64_t)le32_to_cpu(s->bat_bitmap[idx]) * s->off_multiplier;
-}
+    const struct parallels_header *ph = (const void *)buf;

-static uint32_t bat_entry_off(uint32_t idx)
-{
-    return sizeof(ParallelsHeader) + sizeof(uint32_t) * idx;
-}
+    if (buf_size < HEADER_SIZE)
+	return 0;

-static int64_t seek_to_sector(BDRVParallelsState *s, int64_t sector_num)
-{
-    uint32_t index, offset;
-
-    index = sector_num / s->tracks;
-    offset = sector_num % s->tracks;
-
-    /* not allocated */
-    if ((index >= s->bat_size) || (s->bat_bitmap[index] == 0)) {
-        return -1;
-    }
-    return bat2sect(s, index) + offset;
-}
-
-static int cluster_remainder(BDRVParallelsState *s, int64_t sector_num,
-        int nb_sectors)
-{
-    int ret = s->tracks - sector_num % s->tracks;
-    return MIN(nb_sectors, ret);
-}
-
-static int64_t block_status(BDRVParallelsState *s, int64_t sector_num,
-                            int nb_sectors, int *pnum)
-{
-    int64_t start_off = -2, prev_end_off = -2;
-
-    *pnum = 0;
-    while (nb_sectors > 0 || start_off == -2) {
-        int64_t offset = seek_to_sector(s, sector_num);
-        int to_end;
-
-        if (start_off == -2) {
-            start_off = offset;
-            prev_end_off = offset;
-        } else if (offset != prev_end_off) {
-            break;
-        }
-
-        to_end = cluster_remainder(s, sector_num, nb_sectors);
-        nb_sectors -= to_end;
-        sector_num += to_end;
-        *pnum += to_end;
-
-        if (offset > 0) {
-            prev_end_off += to_end;
-        }
-    }
-    return start_off;
-}
-
-static int64_t allocate_clusters(BlockDriverState *bs, int64_t sector_num,
-                                 int nb_sectors, int *pnum)
-{
-    BDRVParallelsState *s = bs->opaque;
-    uint32_t idx, to_allocate, i;
-    int64_t pos, space;
-
-    pos = block_status(s, sector_num, nb_sectors, pnum);
-    if (pos > 0) {
-        return pos;
-    }
-
-    idx = sector_num / s->tracks;
-    if (idx >= s->bat_size) {
-        return -EINVAL;
-    }
-
-    to_allocate = (sector_num + *pnum + s->tracks - 1) / s->tracks - idx;
-    space = to_allocate * s->tracks;
-    if (s->data_end + space > bdrv_getlength(bs->file) >> BDRV_SECTOR_BITS) {
-        int ret;
-        space += s->prealloc_size;
-        if (s->prealloc_mode == PRL_PREALLOC_MODE_FALLOCATE) {
-            ret = bdrv_write_zeroes(bs->file, s->data_end, space, 0);
-        } else {
-            ret = bdrv_truncate(bs->file,
-                                (s->data_end + space) << BDRV_SECTOR_BITS);
-        }
-        if (ret < 0) {
-            return ret;
-        }
-    }
-
-    for (i = 0; i < to_allocate; i++) {
-        s->bat_bitmap[idx + i] = cpu_to_le32(s->data_end / s->off_multiplier);
-        s->data_end += s->tracks;
-        bitmap_set(s->bat_dirty_bmap,
-                   bat_entry_off(idx) / s->bat_dirty_block, 1);
-    }
-
-    return bat2sect(s, idx) + sector_num % s->tracks;
-}
-
-
-static coroutine_fn int parallels_co_flush_to_os(BlockDriverState *bs)
-{
-    BDRVParallelsState *s = bs->opaque;
-    unsigned long size = DIV_ROUND_UP(s->header_size, s->bat_dirty_block);
-    unsigned long bit;
-
-    qemu_co_mutex_lock(&s->lock);
-
-    bit = find_first_bit(s->bat_dirty_bmap, size);
-    while (bit < size) {
-        uint32_t off = bit * s->bat_dirty_block;
-        uint32_t to_write = s->bat_dirty_block;
-        int ret;
-
-        if (off + to_write > s->header_size) {
-            to_write = s->header_size - off;
-        }
-        ret = bdrv_pwrite(bs->file, off, (uint8_t *)s->header + off, to_write);
-        if (ret < 0) {
-            qemu_co_mutex_unlock(&s->lock);
-            return ret;
-        }
-        bit = find_next_bit(s->bat_dirty_bmap, size, bit + 1);
-    }
-    bitmap_zero(s->bat_dirty_bmap, size);
-
-    qemu_co_mutex_unlock(&s->lock);
-    return 0;
-}
-
-
-static int64_t coroutine_fn parallels_co_get_block_status(BlockDriverState *bs,
-        int64_t sector_num, int nb_sectors, int *pnum)
-{
-    BDRVParallelsState *s = bs->opaque;
-    int64_t offset;
-
-    qemu_co_mutex_lock(&s->lock);
-    offset = block_status(s, sector_num, nb_sectors, pnum);
-    qemu_co_mutex_unlock(&s->lock);
-
-    if (offset < 0) {
-        return 0;
-    }
-
-    return (offset << BDRV_SECTOR_BITS) |
-        BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID;
-}
-
-static coroutine_fn int parallels_co_writev(BlockDriverState *bs,
-        int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
-{
-    BDRVParallelsState *s = bs->opaque;
-    uint64_t bytes_done = 0;
-    QEMUIOVector hd_qiov;
-    int ret = 0;
-
-    qemu_iovec_init(&hd_qiov, qiov->niov);
-
-    while (nb_sectors > 0) {
-        int64_t position;
-        int n, nbytes;
-
-        qemu_co_mutex_lock(&s->lock);
-        position = allocate_clusters(bs, sector_num, nb_sectors, &n);
-        qemu_co_mutex_unlock(&s->lock);
-        if (position < 0) {
-            ret = (int)position;
-            break;
-        }
-
-        nbytes = n << BDRV_SECTOR_BITS;
-
-        qemu_iovec_reset(&hd_qiov);
-        qemu_iovec_concat(&hd_qiov, qiov, bytes_done, nbytes);
-
-        ret = bdrv_co_writev(bs->file, position, n, &hd_qiov);
-        if (ret < 0) {
-            break;
-        }
-
-        nb_sectors -= n;
-        sector_num += n;
-        bytes_done += nbytes;
-    }
-
-    qemu_iovec_destroy(&hd_qiov);
-    return ret;
-}
-
-static coroutine_fn int parallels_co_readv(BlockDriverState *bs,
-        int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
-{
-    BDRVParallelsState *s = bs->opaque;
-    uint64_t bytes_done = 0;
-    QEMUIOVector hd_qiov;
-    int ret = 0;
-
-    qemu_iovec_init(&hd_qiov, qiov->niov);
-
-    while (nb_sectors > 0) {
-        int64_t position;
-        int n, nbytes;
-
-        qemu_co_mutex_lock(&s->lock);
-        position = block_status(s, sector_num, nb_sectors, &n);
-        qemu_co_mutex_unlock(&s->lock);
-
-        nbytes = n << BDRV_SECTOR_BITS;
-
-        if (position < 0) {
-            qemu_iovec_memset(qiov, bytes_done, 0, nbytes);
-        } else {
-            qemu_iovec_reset(&hd_qiov);
-            qemu_iovec_concat(&hd_qiov, qiov, bytes_done, nbytes);
-
-            ret = bdrv_co_readv(bs->file, position, n, &hd_qiov);
-            if (ret < 0) {
-                break;
-            }
-        }
-
-        nb_sectors -= n;
-        sector_num += n;
-        bytes_done += nbytes;
-    }
-
-    qemu_iovec_destroy(&hd_qiov);
-    return ret;
-}
-
-
-static int parallels_check(BlockDriverState *bs, BdrvCheckResult *res,
-                           BdrvCheckMode fix)
-{
-    BDRVParallelsState *s = bs->opaque;
-    int64_t size, prev_off, high_off;
-    int ret;
-    uint32_t i;
-    bool flush_bat = false;
-    int cluster_size = s->tracks << BDRV_SECTOR_BITS;
-
-    size = bdrv_getlength(bs->file);
-    if (size < 0) {
-        res->check_errors++;
-        return size;
-    }
-
-    if (s->header_unclean) {
-        fprintf(stderr, "%s image was not closed correctly\n",
-                fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR");
-        res->corruptions++;
-        if (fix & BDRV_FIX_ERRORS) {
-            /* parallels_close will do the job right */
-            res->corruptions_fixed++;
-            s->header_unclean = false;
-        }
-    }
-
-    res->bfi.total_clusters = s->bat_size;
-    res->bfi.compressed_clusters = 0; /* compression is not supported */
-
-    high_off = 0;
-    prev_off = 0;
-    for (i = 0; i < s->bat_size; i++) {
-        int64_t off = bat2sect(s, i) << BDRV_SECTOR_BITS;
-        if (off == 0) {
-            prev_off = 0;
-            continue;
-        }
-
-        /* cluster outside the image */
-        if (off > size) {
-            fprintf(stderr, "%s cluster %u is outside image\n",
-                    fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", i);
-            res->corruptions++;
-            if (fix & BDRV_FIX_ERRORS) {
-                prev_off = 0;
-                s->bat_bitmap[i] = 0;
-                res->corruptions_fixed++;
-                flush_bat = true;
-                continue;
-            }
-        }
-
-        res->bfi.allocated_clusters++;
-        if (off > high_off) {
-            high_off = off;
-        }
-
-        if (prev_off != 0 && (prev_off + cluster_size) != off) {
-            res->bfi.fragmented_clusters++;
-        }
-        prev_off = off;
-    }
-
-    if (flush_bat) {
-        ret = bdrv_pwrite_sync(bs->file, 0, s->header, s->header_size);
-        if (ret < 0) {
-            res->check_errors++;
-            return ret;
-        }
-    }
-
-    res->image_end_offset = high_off + cluster_size;
-    if (size > res->image_end_offset) {
-        int64_t count;
-        count = DIV_ROUND_UP(size - res->image_end_offset, cluster_size);
-        fprintf(stderr, "%s space leaked at the end of the image %" PRId64 "\n",
-                fix & BDRV_FIX_LEAKS ? "Repairing" : "ERROR",
-                size - res->image_end_offset);
-        res->leaks += count;
-        if (fix & BDRV_FIX_LEAKS) {
-            ret = bdrv_truncate(bs->file, res->image_end_offset);
-            if (ret < 0) {
-                res->check_errors++;
-                return ret;
-            }
-            res->leaks_fixed += count;
-        }
-    }
+    if (!memcmp(ph->magic, HEADER_MAGIC, 16) &&
+	(le32_to_cpu(ph->version) == HEADER_VERSION))
+	return 100;

    return 0;
 }

-
-static int parallels_create(const char *filename, QemuOpts *opts, Error **errp)
-{
-    int64_t total_size, cl_size;
-    uint8_t tmp[BDRV_SECTOR_SIZE];
-    Error *local_err = NULL;
-    BlockDriverState *file;
-    uint32_t bat_entries, bat_sectors;
-    ParallelsHeader header;
-    int ret;
-
-    total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
-                          BDRV_SECTOR_SIZE);
-    cl_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_CLUSTER_SIZE,
-                          DEFAULT_CLUSTER_SIZE), BDRV_SECTOR_SIZE);
-
-    ret = bdrv_create_file(filename, opts, &local_err);
-    if (ret < 0) {
-        error_propagate(errp, local_err);
-        return ret;
-    }
-
-    file = NULL;
-    ret = bdrv_open(&file, filename, NULL, NULL,
-                    BDRV_O_RDWR | BDRV_O_PROTOCOL, NULL, &local_err);
-    if (ret < 0) {
-        error_propagate(errp, local_err);
-        return ret;
-    }
-    ret = bdrv_truncate(file, 0);
-    if (ret < 0) {
-        goto exit;
-    }
-
-    bat_entries = DIV_ROUND_UP(total_size, cl_size);
-    bat_sectors = DIV_ROUND_UP(bat_entry_off(bat_entries), cl_size);
-    bat_sectors = (bat_sectors *  cl_size) >> BDRV_SECTOR_BITS;
-
-    memset(&header, 0, sizeof(header));
-    memcpy(header.magic, HEADER_MAGIC2, sizeof(header.magic));
-    header.version = cpu_to_le32(HEADER_VERSION);
-    /* don't care much about geometry, it is not used on image level */
-    header.heads = cpu_to_le32(16);
-    header.cylinders = cpu_to_le32(total_size / BDRV_SECTOR_SIZE / 16 / 32);
-    header.tracks = cpu_to_le32(cl_size >> BDRV_SECTOR_BITS);
-    header.bat_entries = cpu_to_le32(bat_entries);
-    header.nb_sectors = cpu_to_le64(DIV_ROUND_UP(total_size, BDRV_SECTOR_SIZE));
-    header.data_off = cpu_to_le32(bat_sectors);
-
-    /* write all the data */
-    memset(tmp, 0, sizeof(tmp));
-    memcpy(tmp, &header, sizeof(header));
-
-    ret = bdrv_pwrite(file, 0, tmp, BDRV_SECTOR_SIZE);
-    if (ret < 0) {
-        goto exit;
-    }
-    ret = bdrv_write_zeroes(file, 1, bat_sectors - 1, 0);
-    if (ret < 0) {
-        goto exit;
-    }
-    ret = 0;
-
-done:
-    bdrv_unref(file);
-    return ret;
-
-exit:
-    error_setg_errno(errp, -ret, "Failed to create Parallels image");
-    goto done;
-}
-
-
-static int parallels_probe(const uint8_t *buf, int buf_size,
-                           const char *filename)
-{
-    const ParallelsHeader *ph = (const void *)buf;
-
-    if (buf_size < sizeof(ParallelsHeader)) {
-        return 0;
-    }
-
-    if ((!memcmp(ph->magic, HEADER_MAGIC, 16) ||
-           !memcmp(ph->magic, HEADER_MAGIC2, 16)) &&
-           (le32_to_cpu(ph->version) == HEADER_VERSION)) {
-        return 100;
-    }
-
-    return 0;
-}
-
-static int parallels_update_header(BlockDriverState *bs)
-{
-    BDRVParallelsState *s = bs->opaque;
-    unsigned size = MAX(bdrv_opt_mem_align(bs->file), sizeof(ParallelsHeader));
-
-    if (size > s->header_size) {
-        size = s->header_size;
-    }
-    return bdrv_pwrite_sync(bs->file, 0, s->header, size);
-}
-
 static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
                          Error **errp)
 {
    BDRVParallelsState *s = bs->opaque;
-    ParallelsHeader ph;
-    int ret, size, i;
-    QemuOpts *opts = NULL;
-    Error *local_err = NULL;
-    char *buf;
+    int i;
+    struct parallels_header ph;
+    int ret;
+
+    bs->read_only = 1; // no write support yet

    ret = bdrv_pread(bs->file, 0, &ph, sizeof(ph));
    if (ret < 0) {
        goto fail;
    }

-    bs->total_sectors = le64_to_cpu(ph.nb_sectors);
+    if (memcmp(ph.magic, HEADER_MAGIC, 16) ||
+        (le32_to_cpu(ph.version) != HEADER_VERSION)) {
+        error_setg(errp, "Image not in Parallels format");
+        ret = -EINVAL;
+        goto fail;
+    }

-    if (le32_to_cpu(ph.version) != HEADER_VERSION) {
-        goto fail_format;
-    }
-    if (!memcmp(ph.magic, HEADER_MAGIC, 16)) {
-        s->off_multiplier = 1;
-        bs->total_sectors = 0xffffffff & bs->total_sectors;
-    } else if (!memcmp(ph.magic, HEADER_MAGIC2, 16)) {
-        s->off_multiplier = le32_to_cpu(ph.tracks);
-    } else {
-        goto fail_format;
-    }
+    bs->total_sectors = le32_to_cpu(ph.nb_sectors);

    s->tracks = le32_to_cpu(ph.tracks);
    if (s->tracks == 0) {
@@ -589,165 +98,87 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
        ret = -EINVAL;
        goto fail;
    }
-    if (s->tracks > INT32_MAX/513) {
-        error_setg(errp, "Invalid image: Too big cluster");
-        ret = -EFBIG;
-        goto fail;
-    }

-    s->bat_size = le32_to_cpu(ph.bat_entries);
-    if (s->bat_size > INT_MAX / sizeof(uint32_t)) {
+    s->catalog_size = le32_to_cpu(ph.catalog_entries);
+    if (s->catalog_size > INT_MAX / 4) {
        error_setg(errp, "Catalog too large");
        ret = -EFBIG;
        goto fail;
    }
+    s->catalog_bitmap = g_malloc(s->catalog_size * 4);

-    size = bat_entry_off(s->bat_size);
-    s->header_size = ROUND_UP(size, bdrv_opt_mem_align(bs->file));
-    s->header = qemu_try_blockalign(bs->file, s->header_size);
-    if (s->header == NULL) {
-        ret = -ENOMEM;
-        goto fail;
-    }
-    s->data_end = le32_to_cpu(ph.data_off);
-    if (s->data_end == 0) {
-        s->data_end = ROUND_UP(bat_entry_off(s->bat_size), BDRV_SECTOR_SIZE);
-    }
-    if (s->data_end < s->header_size) {
-        /* there is not enough unused space to fit to block align between BAT
-           and actual data. We can't avoid read-modify-write... */
-        s->header_size = size;
-    }
-
-    ret = bdrv_pread(bs->file, 0, s->header, s->header_size);
+    ret = bdrv_pread(bs->file, 64, s->catalog_bitmap, s->catalog_size * 4);
    if (ret < 0) {
        goto fail;
    }
-    s->bat_bitmap = (uint32_t *)(s->header + 1);

-    for (i = 0; i < s->bat_size; i++) {
-        int64_t off = bat2sect(s, i);
-        if (off >= s->data_end) {
-            s->data_end = off + s->tracks;
-        }
-    }
-
-    if (le32_to_cpu(ph.inuse) == HEADER_INUSE_MAGIC) {
-        /* Image was not closed correctly. The check is mandatory */
-        s->header_unclean = true;
-        if ((flags & BDRV_O_RDWR) && !(flags & BDRV_O_CHECK)) {
-            error_setg(errp, "parallels: Image was not closed correctly; "
-                       "cannot be opened read/write");
-            ret = -EACCES;
-            goto fail;
-        }
-    }
-
-    opts = qemu_opts_create(&parallels_runtime_opts, NULL, 0, &local_err);
-    if (local_err != NULL) {
-        goto fail_options;
-    }
-
-    qemu_opts_absorb_qdict(opts, options, &local_err);
-    if (local_err != NULL) {
-        goto fail_options;
-    }
-
-    s->prealloc_size =
-        qemu_opt_get_size_del(opts, PARALLELS_OPT_PREALLOC_SIZE, 0);
-    s->prealloc_size = MAX(s->tracks, s->prealloc_size >> BDRV_SECTOR_BITS);
-    buf = qemu_opt_get_del(opts, PARALLELS_OPT_PREALLOC_MODE);
-    s->prealloc_mode = qapi_enum_parse(prealloc_mode_lookup, buf,
-            PRL_PREALLOC_MODE_MAX, PRL_PREALLOC_MODE_FALLOCATE, &local_err);
-    g_free(buf);
-    if (local_err != NULL) {
-        goto fail_options;
-    }
-    if (!bdrv_has_zero_init(bs->file) ||
-            bdrv_truncate(bs->file, bdrv_getlength(bs->file)) != 0) {
-        s->prealloc_mode = PRL_PREALLOC_MODE_FALLOCATE;
-    }
-
-    if (flags & BDRV_O_RDWR) {
-        s->header->inuse = cpu_to_le32(HEADER_INUSE_MAGIC);
-        ret = parallels_update_header(bs);
-        if (ret < 0) {
-            goto fail;
-        }
-    }
-
-    s->bat_dirty_block = 4 * getpagesize();
-    s->bat_dirty_bmap =
-        bitmap_new(DIV_ROUND_UP(s->header_size, s->bat_dirty_block));
+    for (i = 0; i < s->catalog_size; i++)
+	le32_to_cpus(&s->catalog_bitmap[i]);

    qemu_co_mutex_init(&s->lock);
    return 0;

-fail_format:
-    error_setg(errp, "Image not in Parallels format");
-    ret = -EINVAL;
 fail:
-    qemu_vfree(s->header);
+    g_free(s->catalog_bitmap);
    return ret;
-
-fail_options:
-    error_propagate(errp, local_err);
-    ret = -EINVAL;
-    goto fail;
 }

+static int64_t seek_to_sector(BlockDriverState *bs, int64_t sector_num)
+{
+    BDRVParallelsState *s = bs->opaque;
+    uint32_t index, offset;
+
+    index = sector_num / s->tracks;
+    offset = sector_num % s->tracks;
+
+    /* not allocated */
+    if ((index > s->catalog_size) || (s->catalog_bitmap[index] == 0))
+	return -1;
+    return (uint64_t)(s->catalog_bitmap[index] + offset) * 512;
+}
+
+static int parallels_read(BlockDriverState *bs, int64_t sector_num,
+                    uint8_t *buf, int nb_sectors)
+{
+    while (nb_sectors > 0) {
+        int64_t position = seek_to_sector(bs, sector_num);
+        if (position >= 0) {
+            if (bdrv_pread(bs->file, position, buf, 512) != 512)
+                return -1;
+        } else {
+            memset(buf, 0, 512);
+        }
+        nb_sectors--;
+        sector_num++;
+        buf += 512;
+    }
+    return 0;
+}
+
+static coroutine_fn int parallels_co_read(BlockDriverState *bs, int64_t sector_num,
+                                          uint8_t *buf, int nb_sectors)
+{
+    int ret;
+    BDRVParallelsState *s = bs->opaque;
+    qemu_co_mutex_lock(&s->lock);
+    ret = parallels_read(bs, sector_num, buf, nb_sectors);
+    qemu_co_mutex_unlock(&s->lock);
+    return ret;
+}

 static void parallels_close(BlockDriverState *bs)
 {
    BDRVParallelsState *s = bs->opaque;
-
-    if (bs->open_flags & BDRV_O_RDWR) {
-        s->header->inuse = 0;
-        parallels_update_header(bs);
-    }
-
-    if (bs->open_flags & BDRV_O_RDWR) {
-        bdrv_truncate(bs->file, s->data_end << BDRV_SECTOR_BITS);
-    }
-
-    g_free(s->bat_dirty_bmap);
-    qemu_vfree(s->header);
+    g_free(s->catalog_bitmap);
 }

-static QemuOptsList parallels_create_opts = {
-    .name = "parallels-create-opts",
-    .head = QTAILQ_HEAD_INITIALIZER(parallels_create_opts.head),
-    .desc = {
-        {
-            .name = BLOCK_OPT_SIZE,
-            .type = QEMU_OPT_SIZE,
-            .help = "Virtual disk size",
-        },
-        {
-            .name = BLOCK_OPT_CLUSTER_SIZE,
-            .type = QEMU_OPT_SIZE,
-            .help = "Parallels image cluster size",
-            .def_value_str = stringify(DEFAULT_CLUSTER_SIZE),
-        },
-        { /* end of list */ }
-    }
-};
-
 static BlockDriver bdrv_parallels = {
    .format_name	= "parallels",
    .instance_size	= sizeof(BDRVParallelsState),
    .bdrv_probe		= parallels_probe,
    .bdrv_open		= parallels_open,
+    .bdrv_read          = parallels_co_read,
    .bdrv_close		= parallels_close,
-    .bdrv_co_get_block_status = parallels_co_get_block_status,
-    .bdrv_has_zero_init       = bdrv_has_zero_init_1,
-    .bdrv_co_flush_to_os      = parallels_co_flush_to_os,
-    .bdrv_co_readv  = parallels_co_readv,
-    .bdrv_co_writev = parallels_co_writev,
-
-    .bdrv_create    = parallels_create,
-    .bdrv_check     = parallels_check,
-    .create_opts    = &parallels_create_opts,
 };

 static void bdrv_parallels_init(void)
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -24,17 +24,13 @@

 #include "block/qapi.h"
 #include "block/block_int.h"
-#include "block/write-threshold.h"
 #include "qmp-commands.h"
 #include "qapi-visit.h"
 #include "qapi/qmp-output-visitor.h"
 #include "qapi/qmp/types.h"
-#include "sysemu/block-backend.h"

-BlockDeviceInfo *bdrv_block_device_info(BlockDriverState *bs, Error **errp)
+BlockDeviceInfo *bdrv_block_device_info(BlockDriverState *bs)
 {
-    ImageInfo **p_image_info;
-    BlockDriverState *bs0;
    BlockDeviceInfo *info = g_malloc0(sizeof(*info));

    info->file                   = g_strdup(bs->filename);
@@ -43,13 +39,6 @@ BlockDeviceInfo *bdrv_block_device_info(BlockDriverState *bs, Error **errp)
    info->encrypted              = bs->encrypted;
    info->encryption_key_missing = bdrv_key_required(bs);

-    info->cache = g_new(BlockdevCacheInfo, 1);
-    *info->cache = (BlockdevCacheInfo) {
-        .writeback      = bdrv_enable_write_cache(bs),
-        .direct         = !!(bs->open_flags & BDRV_O_NOCACHE),
-        .no_flush       = !!(bs->open_flags & BDRV_O_NO_FLUSH),
-    };
-
    if (bs->node_name[0]) {
        info->has_node_name = true;
        info->node_name = g_strdup(bs->node_name);
@@ -92,27 +81,6 @@ BlockDeviceInfo *bdrv_block_device_info(BlockDriverState *bs, Error **errp)
        info->iops_size = cfg.op_size;
    }

-    info->write_threshold = bdrv_write_threshold_get(bs);
-
-    bs0 = bs;
-    p_image_info = &info->image;
-    while (1) {
-        Error *local_err = NULL;
-        bdrv_query_image_info(bs0, p_image_info, &local_err);
-        if (local_err) {
-            error_propagate(errp, local_err);
-            qapi_free_BlockDeviceInfo(info);
-            return NULL;
-        }
-        if (bs0->drv && bs0->backing_hd) {
-            bs0 = bs0->backing_hd;
-            (*p_image_info)->has_backing_image = true;
-            p_image_info = &((*p_image_info)->backing_image);
-        } else {
-            break;
-        }
-    }
-
    return info;
 }

@@ -197,24 +165,19 @@ void bdrv_query_image_info(BlockDriverState *bs,
                           ImageInfo **p_info,
                           Error **errp)
 {
-    int64_t size;
+    uint64_t total_sectors;
    const char *backing_filename;
+    char backing_filename2[1024];
    BlockDriverInfo bdi;
    int ret;
    Error *err = NULL;
-    ImageInfo *info;
+    ImageInfo *info = g_new0(ImageInfo, 1);

-    size = bdrv_getlength(bs);
-    if (size < 0) {
-        error_setg_errno(errp, -size, "Can't get size of device '%s'",
-                         bdrv_get_device_name(bs));
-        return;
-    }
+    bdrv_get_geometry(bs, &total_sectors);

-    info = g_new0(ImageInfo, 1);
    info->filename        = g_strdup(bs->filename);
    info->format          = g_strdup(bdrv_get_format_name(bs));
-    info->virtual_size    = size;
+    info->virtual_size    = total_sectors * 512;
    info->actual_size     = bdrv_get_allocated_file_size(bs);
    info->has_actual_size = info->actual_size >= 0;
    if (bdrv_is_encrypted(bs)) {
@@ -234,16 +197,10 @@ void bdrv_query_image_info(BlockDriverState *bs,

    backing_filename = bs->backing_file;
    if (backing_filename[0] != '\0') {
-        char *backing_filename2 = g_malloc0(PATH_MAX);
        info->backing_filename = g_strdup(backing_filename);
        info->has_backing_filename = true;
-        bdrv_get_full_backing_filename(bs, backing_filename2, PATH_MAX, &err);
-        if (err) {
-            error_propagate(errp, err);
-            qapi_free_ImageInfo(info);
-            g_free(backing_filename2);
-            return;
-        }
+        bdrv_get_full_backing_filename(bs, backing_filename2,
+                                       sizeof(backing_filename2));

        if (strcmp(backing_filename, backing_filename2) != 0) {
            info->full_backing_filename =
@@ -255,7 +212,6 @@ void bdrv_query_image_info(BlockDriverState *bs,
            info->backing_filename_format = g_strdup(bs->backing_format);
            info->has_backing_filename_format = true;
        }
-        g_free(backing_filename2);
    }

    ret = bdrv_query_snapshot_info_list(bs, &info->snapshots, &err);
@@ -280,19 +236,22 @@ void bdrv_query_image_info(BlockDriverState *bs,
 }

 /* @p_info will be set only on success. */
-static void bdrv_query_info(BlockBackend *blk, BlockInfo **p_info,
-                            Error **errp)
+void bdrv_query_info(BlockDriverState *bs,
+                     BlockInfo **p_info,
+                     Error **errp)
 {
    BlockInfo *info = g_malloc0(sizeof(*info));
-    BlockDriverState *bs = blk_bs(blk);
-    info->device = g_strdup(blk_name(blk));
+    BlockDriverState *bs0;
+    ImageInfo **p_image_info;
+    Error *local_err = NULL;
+    info->device = g_strdup(bs->device_name);
    info->type = g_strdup("unknown");
-    info->locked = blk_dev_is_medium_locked(blk);
-    info->removable = blk_dev_has_removable_media(blk);
+    info->locked = bdrv_dev_is_medium_locked(bs);
+    info->removable = bdrv_dev_has_removable_media(bs);

-    if (blk_dev_has_removable_media(blk)) {
+    if (bdrv_dev_has_removable_media(bs)) {
        info->has_tray_open = true;
-        info->tray_open = blk_dev_is_tray_open(blk);
+        info->tray_open = bdrv_dev_is_tray_open(bs);
    }

    if (bdrv_iostatus_is_enabled(bs)) {
@@ -307,9 +266,23 @@ static void bdrv_query_info(BlockBackend *blk, BlockInfo **p_info,

    if (bs->drv) {
        info->has_inserted = true;
-        info->inserted = bdrv_block_device_info(bs, errp);
-        if (info->inserted == NULL) {
-            goto err;
+        info->inserted = bdrv_block_device_info(bs);
+
+        bs0 = bs;
+        p_image_info = &info->inserted->image;
+        while (1) {
+            bdrv_query_image_info(bs0, p_image_info, &local_err);
+            if (local_err) {
+                error_propagate(errp, local_err);
+                goto err;
+            }
+            if (bs0->drv && bs0->backing_hd) {
+                bs0 = bs0->backing_hd;
+                (*p_image_info)->has_backing_image = true;
+                p_image_info = &((*p_image_info)->backing_image);
+            } else {
+                break;
+            }
        }
    }

@@ -320,45 +293,36 @@ static void bdrv_query_info(BlockBackend *blk, BlockInfo **p_info,
    qapi_free_BlockInfo(info);
 }

-static BlockStats *bdrv_query_stats(const BlockDriverState *bs,
-                                    bool query_backing)
+static BlockStats *bdrv_query_stats(const BlockDriverState *bs)
 {
    BlockStats *s;

    s = g_malloc0(sizeof(*s));

-    if (bdrv_get_device_name(bs)[0]) {
+    if (bs->device_name[0]) {
        s->has_device = true;
-        s->device = g_strdup(bdrv_get_device_name(bs));
-    }
-
-    if (bdrv_get_node_name(bs)[0]) {
-        s->has_node_name = true;
-        s->node_name = g_strdup(bdrv_get_node_name(bs));
+        s->device = g_strdup(bs->device_name);
    }

    s->stats = g_malloc0(sizeof(*s->stats));
-    s->stats->rd_bytes = bs->stats.nr_bytes[BLOCK_ACCT_READ];
-    s->stats->wr_bytes = bs->stats.nr_bytes[BLOCK_ACCT_WRITE];
-    s->stats->rd_operations = bs->stats.nr_ops[BLOCK_ACCT_READ];
-    s->stats->wr_operations = bs->stats.nr_ops[BLOCK_ACCT_WRITE];
-    s->stats->rd_merged = bs->stats.merged[BLOCK_ACCT_READ];
-    s->stats->wr_merged = bs->stats.merged[BLOCK_ACCT_WRITE];
-    s->stats->wr_highest_offset =
-        bs->stats.wr_highest_sector * BDRV_SECTOR_SIZE;
-    s->stats->flush_operations = bs->stats.nr_ops[BLOCK_ACCT_FLUSH];
-    s->stats->wr_total_time_ns = bs->stats.total_time_ns[BLOCK_ACCT_WRITE];
-    s->stats->rd_total_time_ns = bs->stats.total_time_ns[BLOCK_ACCT_READ];
-    s->stats->flush_total_time_ns = bs->stats.total_time_ns[BLOCK_ACCT_FLUSH];
+    s->stats->rd_bytes = bs->nr_bytes[BDRV_ACCT_READ];
+    s->stats->wr_bytes = bs->nr_bytes[BDRV_ACCT_WRITE];
+    s->stats->rd_operations = bs->nr_ops[BDRV_ACCT_READ];
+    s->stats->wr_operations = bs->nr_ops[BDRV_ACCT_WRITE];
+    s->stats->wr_highest_offset = bs->wr_highest_sector * BDRV_SECTOR_SIZE;
+    s->stats->flush_operations = bs->nr_ops[BDRV_ACCT_FLUSH];
+    s->stats->wr_total_time_ns = bs->total_time_ns[BDRV_ACCT_WRITE];
+    s->stats->rd_total_time_ns = bs->total_time_ns[BDRV_ACCT_READ];
+    s->stats->flush_total_time_ns = bs->total_time_ns[BDRV_ACCT_FLUSH];

    if (bs->file) {
        s->has_parent = true;
-        s->parent = bdrv_query_stats(bs->file, query_backing);
+        s->parent = bdrv_query_stats(bs->file);
    }

-    if (query_backing && bs->backing_hd) {
+    if (bs->backing_hd) {
        s->has_backing = true;
-        s->backing = bdrv_query_stats(bs->backing_hd, query_backing);
+        s->backing = bdrv_query_stats(bs->backing_hd);
    }

    return s;
@@ -367,12 +331,12 @@ static BlockStats *bdrv_query_stats(const BlockDriverState *bs,
 BlockInfoList *qmp_query_block(Error **errp)
 {
    BlockInfoList *head = NULL, **p_next = &head;
-    BlockBackend *blk;
+    BlockDriverState *bs = NULL;
    Error *local_err = NULL;

-    for (blk = blk_next(NULL); blk; blk = blk_next(blk)) {
+     while ((bs = bdrv_next(bs))) {
        BlockInfoList *info = g_malloc0(sizeof(*info));
-        bdrv_query_info(blk, &info->value, &local_err);
+        bdrv_query_info(bs, &info->value, &local_err);
        if (local_err) {
            error_propagate(errp, local_err);
            goto err;
@@ -389,22 +353,17 @@ BlockInfoList *qmp_query_block(Error **errp)
    return NULL;
 }

-BlockStatsList *qmp_query_blockstats(bool has_query_nodes,
-                                     bool query_nodes,
-                                     Error **errp)
+BlockStatsList *qmp_query_blockstats(Error **errp)
 {
    BlockStatsList *head = NULL, **p_next = &head;
    BlockDriverState *bs = NULL;

-    /* Just to be safe if query_nodes is not always initialized */
-    query_nodes = has_query_nodes && query_nodes;
-
-    while ((bs = query_nodes ? bdrv_next_node(bs) : bdrv_next(bs))) {
+     while ((bs = bdrv_next(bs))) {
        BlockStatsList *info = g_malloc0(sizeof(*info));
        AioContext *ctx = bdrv_get_aio_context(bs);

        aio_context_acquire(ctx);
-        info->value = bdrv_query_stats(bs, !query_nodes);
+        info->value = bdrv_query_stats(bs);
        aio_context_release(ctx);

        *p_next = info;
@@ -418,7 +377,7 @@ BlockStatsList *qmp_query_blockstats(bool has_query_nodes,

 static char *get_human_readable_size(char *buf, int buf_size, int64_t size)
 {
-    static const char suffixes[NB_SUFFIXES] = {'K', 'M', 'G', 'T'};
+    static const char suffixes[NB_SUFFIXES] = "KMGT";
    int64_t base;
    int i;

@@ -523,6 +482,9 @@ static void dump_qobject(fprintf_function func_fprintf, void *f,
            QDECREF(value);
            break;
        }
+        case QTYPE_NONE:
+            break;
+        case QTYPE_MAX:
        default:
            abort();
    }
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -124,7 +124,7 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
        snprintf(version, sizeof(version), "QCOW version %" PRIu32,
                 header.version);
        error_set(errp, QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
-                  bdrv_get_device_or_node_name(bs), "qcow", version);
+                  bs->device_name, "qcow", version);
        ret = -ENOTSUP;
        goto fail;
    }
@@ -182,12 +182,7 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
    }

    s->l1_table_offset = header.l1_table_offset;
-    s->l1_table = g_try_new(uint64_t, s->l1_size);
-    if (s->l1_table == NULL) {
-        error_setg(errp, "Could not allocate memory for L1 table");
-        ret = -ENOMEM;
-        goto fail;
-    }
+    s->l1_table = g_malloc(s->l1_size * sizeof(uint64_t));

    ret = bdrv_pread(bs->file, s->l1_table_offset, s->l1_table,
               s->l1_size * sizeof(uint64_t));
@@ -198,16 +193,8 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
    for(i = 0;i < s->l1_size; i++) {
        be64_to_cpus(&s->l1_table[i]);
    }
-
-    /* alloc L2 cache (max. 64k * 16 * 8 = 8 MB) */
-    s->l2_cache =
-        qemu_try_blockalign(bs->file,
-                            s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t));
-    if (s->l2_cache == NULL) {
-        error_setg(errp, "Could not allocate L2 table cache");
-        ret = -ENOMEM;
-        goto fail;
-    }
+    /* alloc L2 cache */
+    s->l2_cache = g_malloc(s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t));
    s->cluster_cache = g_malloc(s->cluster_size);
    s->cluster_data = g_malloc(s->cluster_size);
    s->cluster_cache_offset = -1;
@@ -215,7 +202,7 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
    /* read the backing file name */
    if (header.backing_file_offset != 0) {
        len = header.backing_file_size;
-        if (len > 1023 || len >= sizeof(bs->backing_file)) {
+        if (len > 1023) {
            error_setg(errp, "Backing file name too long");
            ret = -EINVAL;
            goto fail;
@@ -229,9 +216,9 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
    }

    /* Disable migration when qcow images are used */
-    error_setg(&s->migration_blocker, "The qcow format used by node '%s' "
-               "does not support live migration",
-               bdrv_get_device_or_node_name(bs));
+    error_set(&s->migration_blocker,
+              QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
+              "qcow", bs->device_name, "live migration");
    migrate_add_blocker(s->migration_blocker);

    qemu_co_mutex_init(&s->lock);
@@ -239,7 +226,7 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,

 fail:
    g_free(s->l1_table);
-    qemu_vfree(s->l2_cache);
+    g_free(s->l2_cache);
    g_free(s->cluster_cache);
    g_free(s->cluster_data);
    return ret;
@@ -269,7 +256,6 @@ static int qcow_set_key(BlockDriverState *bs, const char *key)
    for(i = 0;i < len;i++) {
        keybuf[i] = key[i];
    }
-    assert(bs->encrypted);
    s->crypt_method = s->crypt_method_header;

    if (AES_set_encrypt_key(keybuf, 128, &s->aes_encrypt_key) != 0)
@@ -412,10 +398,9 @@ static uint64_t get_cluster_offset(BlockDriverState *bs,
                bdrv_truncate(bs->file, cluster_offset + s->cluster_size);
                /* if encrypted, we must initialize the cluster
                   content which won't be written */
-                if (bs->encrypted &&
+                if (s->crypt_method &&
                    (n_end - n_start) < s->cluster_sectors) {
                    uint64_t start_sect;
-                    assert(s->crypt_method);
                    start_sect = (offset & ~(s->cluster_size - 1)) >> 9;
                    memset(s->cluster_data + 512, 0x00, 512);
                    for(i = 0; i < s->cluster_sectors; i++) {
@@ -532,10 +517,7 @@ static coroutine_fn int qcow_co_readv(BlockDriverState *bs, int64_t sector_num,
    void *orig_buf;

    if (qiov->niov > 1) {
-        buf = orig_buf = qemu_try_blockalign(bs, qiov->size);
-        if (buf == NULL) {
-            return -ENOMEM;
-        }
+        buf = orig_buf = qemu_blockalign(bs, qiov->size);
    } else {
        orig_buf = NULL;
        buf = (uint8_t *)qiov->iov->iov_base;
@@ -592,8 +574,7 @@ static coroutine_fn int qcow_co_readv(BlockDriverState *bs, int64_t sector_num,
            if (ret < 0) {
                break;
            }
-            if (bs->encrypted) {
-                assert(s->crypt_method);
+            if (s->crypt_method) {
                encrypt_sectors(s, sector_num, buf, buf,
                                n, 0,
                                &s->aes_decrypt_key);
@@ -638,10 +619,7 @@ static coroutine_fn int qcow_co_writev(BlockDriverState *bs, int64_t sector_num,
    s->cluster_cache_offset = -1; /* disable compressed cache */

    if (qiov->niov > 1) {
-        buf = orig_buf = qemu_try_blockalign(bs, qiov->size);
-        if (buf == NULL) {
-            return -ENOMEM;
-        }
+        buf = orig_buf = qemu_blockalign(bs, qiov->size);
        qemu_iovec_to_buf(qiov, 0, buf, qiov->size);
    } else {
        orig_buf = NULL;
@@ -664,8 +642,7 @@ static coroutine_fn int qcow_co_writev(BlockDriverState *bs, int64_t sector_num,
            ret = -EIO;
            break;
        }
-        if (bs->encrypted) {
-            assert(s->crypt_method);
+        if (s->crypt_method) {
            if (!cluster_data) {
                cluster_data = g_malloc0(s->cluster_size);
            }
@@ -708,7 +685,7 @@ static void qcow_close(BlockDriverState *bs)
    BDRVQcowState *s = bs->opaque;

    g_free(s->l1_table);
-    qemu_vfree(s->l2_cache);
+    g_free(s->l2_cache);
    g_free(s->cluster_cache);
    g_free(s->cluster_data);

@@ -729,8 +706,7 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp)
    BlockDriverState *qcow_bs;

    /* Read out options */
-    total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
-                          BDRV_SECTOR_SIZE);
+    total_size = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0) / 512;
    backing_file = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FILE);
    if (qemu_opt_get_bool_del(opts, BLOCK_OPT_ENCRYPT, false)) {
        flags |= BLOCK_FLAG_ENCRYPT;
@@ -758,7 +734,7 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp)
    memset(&header, 0, sizeof(header));
    header.magic = cpu_to_be32(QCOW_MAGIC);
    header.version = cpu_to_be32(QCOW_VERSION);
-    header.size = cpu_to_be64(total_size);
+    header.size = cpu_to_be64(total_size * 512);
    header_size = sizeof(header);
    backing_filename_len = 0;
    if (backing_file) {
@@ -780,7 +756,7 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp)
    }
    header_size = (header_size + 7) & ~7;
    shift = header.cluster_bits + header.l2_bits;
-    l1_size = (total_size + (1LL << shift) - 1) >> shift;
+    l1_size = ((total_size * 512) + (1LL << shift) - 1) >> shift;

    header.l1_table_offset = cpu_to_be64(header_size);
    if (flags & BLOCK_FLAG_ENCRYPT) {
--- a/block/qcow2-cache.c
+++ b/block/qcow2-cache.c
@@ -28,68 +28,46 @@
 #include "trace.h"

 typedef struct Qcow2CachedTable {
-    int64_t  offset;
-    bool     dirty;
-    uint64_t lru_counter;
-    int      ref;
+    void*   table;
+    int64_t offset;
+    bool    dirty;
+    int     cache_hits;
+    int     ref;
 } Qcow2CachedTable;

 struct Qcow2Cache {
-    Qcow2CachedTable       *entries;
-    struct Qcow2Cache      *depends;
+    Qcow2CachedTable*       entries;
+    struct Qcow2Cache*      depends;
    int                     size;
    bool                    depends_on_flush;
-    void                   *table_array;
-    uint64_t                lru_counter;
 };

-static inline void *qcow2_cache_get_table_addr(BlockDriverState *bs,
-                    Qcow2Cache *c, int table)
-{
-    BDRVQcowState *s = bs->opaque;
-    return (uint8_t *) c->table_array + (size_t) table * s->cluster_size;
-}
-
-static inline int qcow2_cache_get_table_idx(BlockDriverState *bs,
-                  Qcow2Cache *c, void *table)
-{
-    BDRVQcowState *s = bs->opaque;
-    ptrdiff_t table_offset = (uint8_t *) table - (uint8_t *) c->table_array;
-    int idx = table_offset / s->cluster_size;
-    assert(idx >= 0 && idx < c->size && table_offset % s->cluster_size == 0);
-    return idx;
-}
-
 Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables)
 {
    BDRVQcowState *s = bs->opaque;
    Qcow2Cache *c;
+    int i;

-    c = g_new0(Qcow2Cache, 1);
+    c = g_malloc0(sizeof(*c));
    c->size = num_tables;
-    c->entries = g_try_new0(Qcow2CachedTable, num_tables);
-    c->table_array = qemu_try_blockalign(bs->file,
-                                         (size_t) num_tables * s->cluster_size);
+    c->entries = g_malloc0(sizeof(*c->entries) * num_tables);

-    if (!c->entries || !c->table_array) {
-        qemu_vfree(c->table_array);
-        g_free(c->entries);
-        g_free(c);
-        c = NULL;
+    for (i = 0; i < c->size; i++) {
+        c->entries[i].table = qemu_blockalign(bs, s->cluster_size);
    }

    return c;
 }

-int qcow2_cache_destroy(BlockDriverState *bs, Qcow2Cache *c)
+int qcow2_cache_destroy(BlockDriverState* bs, Qcow2Cache *c)
 {
    int i;

    for (i = 0; i < c->size; i++) {
        assert(c->entries[i].ref == 0);
+        qemu_vfree(c->entries[i].table);
    }

-    qemu_vfree(c->table_array);
    g_free(c->entries);
    g_free(c);

@@ -157,8 +135,8 @@ static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i)
        BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE);
    }

-    ret = bdrv_pwrite(bs->file, c->entries[i].offset,
-                      qcow2_cache_get_table_addr(bs, c, i), s->cluster_size);
+    ret = bdrv_pwrite(bs->file, c->entries[i].offset, c->entries[i].table,
+        s->cluster_size);
    if (ret < 0) {
        return ret;
    }
@@ -234,51 +212,61 @@ int qcow2_cache_empty(BlockDriverState *bs, Qcow2Cache *c)
    for (i = 0; i < c->size; i++) {
        assert(c->entries[i].ref == 0);
        c->entries[i].offset = 0;
-        c->entries[i].lru_counter = 0;
+        c->entries[i].cache_hits = 0;
    }

-    c->lru_counter = 0;
-
    return 0;
 }

+static int qcow2_cache_find_entry_to_replace(Qcow2Cache *c)
+{
+    int i;
+    int min_count = INT_MAX;
+    int min_index = -1;
+
+
+    for (i = 0; i < c->size; i++) {
+        if (c->entries[i].ref) {
+            continue;
+        }
+
+        if (c->entries[i].cache_hits < min_count) {
+            min_index = i;
+            min_count = c->entries[i].cache_hits;
+        }
+
+        /* Give newer hits priority */
+        /* TODO Check how to optimize the replacement strategy */
+        c->entries[i].cache_hits /= 2;
+    }
+
+    if (min_index == -1) {
+        /* This can't happen in current synchronous code, but leave the check
+         * here as a reminder for whoever starts using AIO with the cache */
+        abort();
+    }
+    return min_index;
+}
+
 static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c,
    uint64_t offset, void **table, bool read_from_disk)
 {
    BDRVQcowState *s = bs->opaque;
    int i;
    int ret;
-    int lookup_index;
-    uint64_t min_lru_counter = UINT64_MAX;
-    int min_lru_index = -1;

    trace_qcow2_cache_get(qemu_coroutine_self(), c == s->l2_table_cache,
                          offset, read_from_disk);

    /* Check if the table is already cached */
-    i = lookup_index = (offset / s->cluster_size * 4) % c->size;
-    do {
-        const Qcow2CachedTable *t = &c->entries[i];
-        if (t->offset == offset) {
+    for (i = 0; i < c->size; i++) {
+        if (c->entries[i].offset == offset) {
            goto found;
        }
-        if (t->ref == 0 && t->lru_counter < min_lru_counter) {
-            min_lru_counter = t->lru_counter;
-            min_lru_index = i;
-        }
-        if (++i == c->size) {
-            i = 0;
-        }
-    } while (i != lookup_index);
-
-    if (min_lru_index == -1) {
-        /* This can't happen in current synchronous code, but leave the check
-         * here as a reminder for whoever starts using AIO with the cache */
-        abort();
    }

-    /* Cache miss: write a table back and replace it */
-    i = min_lru_index;
+    /* If not, write a table back and replace it */
+    i = qcow2_cache_find_entry_to_replace(c);
    trace_qcow2_cache_get_replace_entry(qemu_coroutine_self(),
                                        c == s->l2_table_cache, i);
    if (i < 0) {
@@ -298,19 +286,22 @@ static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c,
            BLKDBG_EVENT(bs->file, BLKDBG_L2_LOAD);
        }

-        ret = bdrv_pread(bs->file, offset, qcow2_cache_get_table_addr(bs, c, i),
-                         s->cluster_size);
+        ret = bdrv_pread(bs->file, offset, c->entries[i].table, s->cluster_size);
        if (ret < 0) {
            return ret;
        }
    }

+    /* Give the table some hits for the start so that it won't be replaced
+     * immediately. The number 32 is completely arbitrary. */
+    c->entries[i].cache_hits = 32;
    c->entries[i].offset = offset;

    /* And return the right table */
 found:
+    c->entries[i].cache_hits++;
    c->entries[i].ref++;
-    *table = qcow2_cache_get_table_addr(bs, c, i);
+    *table = c->entries[i].table;

    trace_qcow2_cache_get_done(qemu_coroutine_self(),
                               c == s->l2_table_cache, i);
@@ -330,24 +321,36 @@ int qcow2_cache_get_empty(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
    return qcow2_cache_do_get(bs, c, offset, table, false);
 }

-void qcow2_cache_put(BlockDriverState *bs, Qcow2Cache *c, void **table)
+int qcow2_cache_put(BlockDriverState *bs, Qcow2Cache *c, void **table)
 {
-    int i = qcow2_cache_get_table_idx(bs, c, *table);
+    int i;

+    for (i = 0; i < c->size; i++) {
+        if (c->entries[i].table == *table) {
+            goto found;
+        }
+    }
+    return -ENOENT;
+
+found:
    c->entries[i].ref--;
    *table = NULL;

-    if (c->entries[i].ref == 0) {
-        c->entries[i].lru_counter = ++c->lru_counter;
-    }
-
    assert(c->entries[i].ref >= 0);
+    return 0;
 }

-void qcow2_cache_entry_mark_dirty(BlockDriverState *bs, Qcow2Cache *c,
-     void *table)
+void qcow2_cache_entry_mark_dirty(Qcow2Cache *c, void *table)
 {
-    int i = qcow2_cache_get_table_idx(bs, c, table);
-    assert(c->entries[i].offset != 0);
+    int i;
+
+    for (i = 0; i < c->size; i++) {
+        if (c->entries[i].table == table) {
+            goto found;
+        }
+    }
+    abort();
+
+found:
    c->entries[i].dirty = true;
 }
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -72,20 +72,14 @@ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
 #endif

    new_l1_size2 = sizeof(uint64_t) * new_l1_size;
-    new_l1_table = qemu_try_blockalign(bs->file,
-                                       align_offset(new_l1_size2, 512));
-    if (new_l1_table == NULL) {
-        return -ENOMEM;
-    }
-    memset(new_l1_table, 0, align_offset(new_l1_size2, 512));
-
+    new_l1_table = g_malloc0(align_offset(new_l1_size2, 512));
    memcpy(new_l1_table, s->l1_table, s->l1_size * sizeof(uint64_t));

    /* write new table (align to cluster) */
    BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_ALLOC_TABLE);
    new_l1_table_offset = qcow2_alloc_clusters(bs, new_l1_size2);
    if (new_l1_table_offset < 0) {
-        qemu_vfree(new_l1_table);
+        g_free(new_l1_table);
        return new_l1_table_offset;
    }

@@ -119,7 +113,7 @@ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
    if (ret < 0) {
        goto fail;
    }
-    qemu_vfree(s->l1_table);
+    g_free(s->l1_table);
    old_l1_table_offset = s->l1_table_offset;
    s->l1_table_offset = new_l1_table_offset;
    s->l1_table = new_l1_table;
@@ -129,7 +123,7 @@ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
                        QCOW2_DISCARD_OTHER);
    return 0;
 fail:
-    qemu_vfree(new_l1_table);
+    g_free(new_l1_table);
    qcow2_free_clusters(bs, new_l1_table_offset, new_l1_size2,
                        QCOW2_DISCARD_OTHER);
    return ret;
@@ -253,14 +247,17 @@ static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table)

        memcpy(l2_table, old_table, s->cluster_size);

-        qcow2_cache_put(bs, s->l2_table_cache, (void **) &old_table);
+        ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &old_table);
+        if (ret < 0) {
+            goto fail;
+        }
    }

    /* write the l2 table to the file */
    BLKDBG_EVENT(bs->file, BLKDBG_L2_ALLOC_WRITE);

    trace_qcow2_l2_allocate_write_l2(bs, l1_index);
-    qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table);
+    qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
    ret = qcow2_cache_flush(bs, s->l2_table_cache);
    if (ret < 0) {
        goto fail;
@@ -377,10 +374,7 @@ static int coroutine_fn copy_sectors(BlockDriverState *bs,
    }

    iov.iov_len = n * BDRV_SECTOR_SIZE;
-    iov.iov_base = qemu_try_blockalign(bs, iov.iov_len);
-    if (iov.iov_base == NULL) {
-        return -ENOMEM;
-    }
+    iov.iov_base = qemu_blockalign(bs, iov.iov_len);

    qemu_iovec_init_external(&qiov, &iov, 1);

@@ -400,8 +394,7 @@ static int coroutine_fn copy_sectors(BlockDriverState *bs,
        goto out;
    }

-    if (bs->encrypted) {
-        assert(s->crypt_method);
+    if (s->crypt_method) {
        qcow2_encrypt_sectors(s, start_sect + n_start,
                        iov.iov_base, iov.iov_base, n, 1,
                        &s->aes_encrypt_key);
@@ -486,13 +479,6 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
        goto out;
    }

-    if (offset_into_cluster(s, l2_offset)) {
-        qcow2_signal_corruption(bs, true, -1, -1, "L2 table offset %#" PRIx64
-                                " unaligned (L1 index: %#" PRIx64 ")",
-                                l2_offset, l1_index);
-        return -EIO;
-    }
-
    /* load the l2 table in memory */

    ret = l2_load(bs, l2_offset, &l2_table);
@@ -515,11 +501,8 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
        break;
    case QCOW2_CLUSTER_ZERO:
        if (s->qcow_version < 3) {
-            qcow2_signal_corruption(bs, true, -1, -1, "Zero cluster entry found"
-                                    " in pre-v3 image (L2 offset: %#" PRIx64
-                                    ", L2 index: %#x)", l2_offset, l2_index);
-            ret = -EIO;
-            goto fail;
+            qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
+            return -EIO;
        }
        c = count_contiguous_clusters(nb_clusters, s->cluster_size,
                &l2_table[l2_index], QCOW_OFLAG_ZERO);
@@ -535,14 +518,6 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
        c = count_contiguous_clusters(nb_clusters, s->cluster_size,
                &l2_table[l2_index], QCOW_OFLAG_ZERO);
        *cluster_offset &= L2E_OFFSET_MASK;
-        if (offset_into_cluster(s, *cluster_offset)) {
-            qcow2_signal_corruption(bs, true, -1, -1, "Data cluster offset %#"
-                                    PRIx64 " unaligned (L2 offset: %#" PRIx64
-                                    ", L2 index: %#x)", *cluster_offset,
-                                    l2_offset, l2_index);
-            ret = -EIO;
-            goto fail;
-        }
        break;
    default:
        abort();
@@ -559,10 +534,6 @@ out:
    *num = nb_available - index_in_cluster;

    return ret;
-
-fail:
-    qcow2_cache_put(bs, s->l2_table_cache, (void **)&l2_table);
-    return ret;
 }

 /*
@@ -598,12 +569,6 @@ static int get_cluster_table(BlockDriverState *bs, uint64_t offset,

    assert(l1_index < s->l1_size);
    l2_offset = s->l1_table[l1_index] & L1E_OFFSET_MASK;
-    if (offset_into_cluster(s, l2_offset)) {
-        qcow2_signal_corruption(bs, true, -1, -1, "L2 table offset %#" PRIx64
-                                " unaligned (L1 index: %#" PRIx64 ")",
-                                l2_offset, l1_index);
-        return -EIO;
-    }

    /* seek the l2 table of the given l2 offset */

@@ -690,9 +655,12 @@ uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
    /* compressed clusters never have the copied flag */

    BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE_COMPRESSED);
-    qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table);
+    qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
    l2_table[l2_index] = cpu_to_be64(cluster_offset);
-    qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table);
+    ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
+    if (ret < 0) {
+        return 0;
+    }

    return cluster_offset;
 }
@@ -736,11 +704,7 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
    trace_qcow2_cluster_link_l2(qemu_coroutine_self(), m->nb_clusters);
    assert(m->nb_clusters > 0);

-    old_cluster = g_try_new(uint64_t, m->nb_clusters);
-    if (old_cluster == NULL) {
-        ret = -ENOMEM;
-        goto err;
-    }
+    old_cluster = g_malloc(m->nb_clusters * sizeof(uint64_t));

    /* copy content of unmodified sectors */
    ret = perform_cow(bs, m, &m->cow_start);
@@ -766,7 +730,7 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
    if (ret < 0) {
        goto err;
    }
-    qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table);
+    qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);

    assert(l2_index + m->nb_clusters <= s->l2_size);
    for (i = 0; i < m->nb_clusters; i++) {
@@ -784,7 +748,10 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
     }


-    qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table);
+    ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
+    if (ret < 0) {
+        goto err;
+    }

    /*
     * If this was a COW, we need to decrease the refcount of the old cluster.
@@ -936,7 +903,7 @@ static int handle_copied(BlockDriverState *bs, uint64_t guest_offset,
    uint64_t *l2_table;
    unsigned int nb_clusters;
    unsigned int keep_clusters;
-    int ret;
+    int ret, pret;

    trace_qcow2_handle_copied(qemu_coroutine_self(), guest_offset, *host_offset,
                              *bytes);
@@ -970,15 +937,6 @@ static int handle_copied(BlockDriverState *bs, uint64_t guest_offset,
        bool offset_matches =
            (cluster_offset & L2E_OFFSET_MASK) == *host_offset;

-        if (offset_into_cluster(s, cluster_offset & L2E_OFFSET_MASK)) {
-            qcow2_signal_corruption(bs, true, -1, -1, "Data cluster offset "
-                                    "%#llx unaligned (guest offset: %#" PRIx64
-                                    ")", cluster_offset & L2E_OFFSET_MASK,
-                                    guest_offset);
-            ret = -EIO;
-            goto out;
-        }
-
        if (*host_offset != 0 && !offset_matches) {
            *bytes = 0;
            ret = 0;
@@ -1003,11 +961,14 @@ static int handle_copied(BlockDriverState *bs, uint64_t guest_offset,

    /* Cleanup */
 out:
-    qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table);
+    pret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
+    if (pret < 0) {
+        return pret;
+    }

    /* Only return a host offset if we actually made progress. Otherwise we
     * would make requirements for handle_alloc() that it can't fulfill */
-    if (ret > 0) {
+    if (ret) {
        *host_offset = (cluster_offset & L2E_OFFSET_MASK)
                     + offset_into_cluster(s, guest_offset);
    }
@@ -1128,7 +1089,10 @@ static int handle_alloc(BlockDriverState *bs, uint64_t guest_offset,
     * wrong with our code. */
    assert(nb_clusters > 0);

-    qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table);
+    ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
+    if (ret < 0) {
+        return ret;
+    }

    /* Allocate, if necessary at a given offset in the image file */
    alloc_cluster_offset = start_of_cluster(s, *host_offset);
@@ -1144,17 +1108,6 @@ static int handle_alloc(BlockDriverState *bs, uint64_t guest_offset,
        return 0;
    }

-    /* !*host_offset would overwrite the image header and is reserved for "no
-     * host offset preferred". If 0 was a valid host offset, it'd trigger the
-     * following overlap check; do that now to avoid having an invalid value in
-     * *host_offset. */
-    if (!alloc_cluster_offset) {
-        ret = qcow2_pre_write_overlap_check(bs, 0, alloc_cluster_offset,
-                                            nb_clusters * s->cluster_size);
-        assert(ret < 0);
-        goto fail;
-    }
-
    /*
     * Save info needed for meta data update.
     *
@@ -1400,7 +1353,7 @@ int qcow2_decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset)
 * clusters.
 */
 static int discard_single_l2(BlockDriverState *bs, uint64_t offset,
-    unsigned int nb_clusters, enum qcow2_discard_type type, bool full_discard)
+    unsigned int nb_clusters, enum qcow2_discard_type type)
 {
    BDRVQcowState *s = bs->opaque;
    uint64_t *l2_table;
@@ -1422,30 +1375,23 @@ static int discard_single_l2(BlockDriverState *bs, uint64_t offset,
        old_l2_entry = be64_to_cpu(l2_table[l2_index + i]);

        /*
-         * If full_discard is false, make sure that a discarded area reads back
-         * as zeroes for v3 images (we cannot do it for v2 without actually
-         * writing a zero-filled buffer). We can skip the operation if the
-         * cluster is already marked as zero, or if it's unallocated and we
-         * don't have a backing file.
+         * Make sure that a discarded area reads back as zeroes for v3 images
+         * (we cannot do it for v2 without actually writing a zero-filled
+         * buffer). We can skip the operation if the cluster is already marked
+         * as zero, or if it's unallocated and we don't have a backing file.
         *
         * TODO We might want to use bdrv_get_block_status(bs) here, but we're
         * holding s->lock, so that doesn't work today.
-         *
-         * If full_discard is true, the sector should not read back as zeroes,
-         * but rather fall through to the backing file.
         */
        switch (qcow2_get_cluster_type(old_l2_entry)) {
            case QCOW2_CLUSTER_UNALLOCATED:
-                if (full_discard || !bs->backing_hd) {
+                if (!bs->backing_hd) {
                    continue;
                }
                break;

            case QCOW2_CLUSTER_ZERO:
-                if (!full_discard) {
-                    continue;
-                }
-                break;
+                continue;

            case QCOW2_CLUSTER_NORMAL:
            case QCOW2_CLUSTER_COMPRESSED:
@@ -1456,8 +1402,8 @@ static int discard_single_l2(BlockDriverState *bs, uint64_t offset,
        }

        /* First remove L2 entries */
-        qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table);
-        if (!full_discard && s->qcow_version >= 3) {
+        qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
+        if (s->qcow_version >= 3) {
            l2_table[l2_index + i] = cpu_to_be64(QCOW_OFLAG_ZERO);
        } else {
            l2_table[l2_index + i] = cpu_to_be64(0);
@@ -1467,13 +1413,16 @@ static int discard_single_l2(BlockDriverState *bs, uint64_t offset,
        qcow2_free_any_clusters(bs, old_l2_entry, 1, type);
    }

-    qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table);
+    ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
+    if (ret < 0) {
+        return ret;
+    }

    return nb_clusters;
 }

 int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset,
-    int nb_sectors, enum qcow2_discard_type type, bool full_discard)
+    int nb_sectors, enum qcow2_discard_type type)
 {
    BDRVQcowState *s = bs->opaque;
    uint64_t end_offset;
@@ -1496,7 +1445,7 @@ int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset,

    /* Each L2 table is handled by its own loop iteration */
    while (nb_clusters > 0) {
-        ret = discard_single_l2(bs, offset, nb_clusters, type, full_discard);
+        ret = discard_single_l2(bs, offset, nb_clusters, type);
        if (ret < 0) {
            goto fail;
        }
@@ -1541,7 +1490,7 @@ static int zero_single_l2(BlockDriverState *bs, uint64_t offset,
        old_offset = be64_to_cpu(l2_table[l2_index + i]);

        /* Update L2 entries */
-        qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table);
+        qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
        if (old_offset & QCOW_OFLAG_COMPRESSED) {
            l2_table[l2_index + i] = cpu_to_be64(QCOW_OFLAG_ZERO);
            qcow2_free_any_clusters(bs, old_offset, 1, QCOW2_DISCARD_REQUEST);
@@ -1550,7 +1499,10 @@ static int zero_single_l2(BlockDriverState *bs, uint64_t offset,
        }
    }

-    qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table);
+    ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
+    if (ret < 0) {
+        return ret;
+    }

    return nb_clusters;
 }
@@ -1593,14 +1545,15 @@ fail:
 * Expands all zero clusters in a specific L1 table (or deallocates them, for
 * non-backed non-pre-allocated zero clusters).
 *
- * l1_entries and *visited_l1_entries are used to keep track of progress for
- * status_cb(). l1_entries contains the total number of L1 entries and
- * *visited_l1_entries counts all visited L1 entries.
+ * expanded_clusters is a bitmap where every bit corresponds to one cluster in
+ * the image file; a bit gets set if the corresponding cluster has been used for
+ * zero expansion (i.e., has been filled with zeroes and is referenced from an
+ * L2 table). nb_clusters contains the total cluster count of the image file,
+ * i.e., the number of bits in expanded_clusters.
 */
 static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
-                                      int l1_size, int64_t *visited_l1_entries,
-                                      int64_t l1_entries,
-                                      BlockDriverAmendStatusCB *status_cb)
+                                      int l1_size, uint8_t **expanded_clusters,
+                                      uint64_t *nb_clusters)
 {
    BDRVQcowState *s = bs->opaque;
    bool is_active_l1 = (l1_table == s->l1_table);
@@ -1611,34 +1564,18 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
    if (!is_active_l1) {
        /* inactive L2 tables require a buffer to be stored in when loading
         * them from disk */
-        l2_table = qemu_try_blockalign(bs->file, s->cluster_size);
-        if (l2_table == NULL) {
-            return -ENOMEM;
-        }
+        l2_table = qemu_blockalign(bs, s->cluster_size);
    }

    for (i = 0; i < l1_size; i++) {
        uint64_t l2_offset = l1_table[i] & L1E_OFFSET_MASK;
        bool l2_dirty = false;
-        uint64_t l2_refcount;

        if (!l2_offset) {
            /* unallocated */
-            (*visited_l1_entries)++;
-            if (status_cb) {
-                status_cb(bs, *visited_l1_entries, l1_entries);
-            }
            continue;
        }

-        if (offset_into_cluster(s, l2_offset)) {
-            qcow2_signal_corruption(bs, true, -1, -1, "L2 table offset %#"
-                                    PRIx64 " unaligned (L1 index: %#x)",
-                                    l2_offset, i);
-            ret = -EIO;
-            goto fail;
-        }
-
        if (is_active_l1) {
            /* get active L2 tables from cache */
            ret = qcow2_cache_get(bs, s->l2_table_cache, l2_offset,
@@ -1652,19 +1589,33 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
            goto fail;
        }

-        ret = qcow2_get_refcount(bs, l2_offset >> s->cluster_bits,
-                                 &l2_refcount);
-        if (ret < 0) {
-            goto fail;
-        }
-
        for (j = 0; j < s->l2_size; j++) {
            uint64_t l2_entry = be64_to_cpu(l2_table[j]);
-            int64_t offset = l2_entry & L2E_OFFSET_MASK;
+            int64_t offset = l2_entry & L2E_OFFSET_MASK, cluster_index;
            int cluster_type = qcow2_get_cluster_type(l2_entry);
            bool preallocated = offset != 0;

-            if (cluster_type != QCOW2_CLUSTER_ZERO) {
+            if (cluster_type == QCOW2_CLUSTER_NORMAL) {
+                cluster_index = offset >> s->cluster_bits;
+                assert((cluster_index >= 0) && (cluster_index < *nb_clusters));
+                if ((*expanded_clusters)[cluster_index / 8] &
+                    (1 << (cluster_index % 8))) {
+                    /* Probably a shared L2 table; this cluster was a zero
+                     * cluster which has been expanded, its refcount
+                     * therefore most likely requires an update. */
+                    ret = qcow2_update_cluster_refcount(bs, cluster_index, 1,
+                                                        QCOW2_DISCARD_NEVER);
+                    if (ret < 0) {
+                        goto fail;
+                    }
+                    /* Since we just increased the refcount, the COPIED flag may
+                     * no longer be set. */
+                    l2_table[j] = cpu_to_be64(l2_entry & ~QCOW_OFLAG_COPIED);
+                    l2_dirty = true;
+                }
+                continue;
+            }
+            else if (qcow2_get_cluster_type(l2_entry) != QCOW2_CLUSTER_ZERO) {
                continue;
            }

@@ -1682,33 +1633,6 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
                    ret = offset;
                    goto fail;
                }
-
-                if (l2_refcount > 1) {
-                    /* For shared L2 tables, set the refcount accordingly (it is
-                     * already 1 and needs to be l2_refcount) */
-                    ret = qcow2_update_cluster_refcount(bs,
-                            offset >> s->cluster_bits,
-                            refcount_diff(1, l2_refcount), false,
-                            QCOW2_DISCARD_OTHER);
-                    if (ret < 0) {
-                        qcow2_free_clusters(bs, offset, s->cluster_size,
-                                            QCOW2_DISCARD_OTHER);
-                        goto fail;
-                    }
-                }
-            }
-
-            if (offset_into_cluster(s, offset)) {
-                qcow2_signal_corruption(bs, true, -1, -1, "Data cluster offset "
-                                        "%#" PRIx64 " unaligned (L2 offset: %#"
-                                        PRIx64 ", L2 index: %#x)", offset,
-                                        l2_offset, j);
-                if (!preallocated) {
-                    qcow2_free_clusters(bs, offset, s->cluster_size,
-                                        QCOW2_DISCARD_ALWAYS);
-                }
-                ret = -EIO;
-                goto fail;
            }

            ret = qcow2_pre_write_overlap_check(bs, 0, offset, s->cluster_size);
@@ -1730,20 +1654,41 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
                goto fail;
            }

-            if (l2_refcount == 1) {
-                l2_table[j] = cpu_to_be64(offset | QCOW_OFLAG_COPIED);
-            } else {
-                l2_table[j] = cpu_to_be64(offset);
-            }
+            l2_table[j] = cpu_to_be64(offset | QCOW_OFLAG_COPIED);
            l2_dirty = true;
+
+            cluster_index = offset >> s->cluster_bits;
+
+            if (cluster_index >= *nb_clusters) {
+                uint64_t old_bitmap_size = (*nb_clusters + 7) / 8;
+                uint64_t new_bitmap_size;
+                /* The offset may lie beyond the old end of the underlying image
+                 * file for growable files only */
+                assert(bs->file->growable);
+                *nb_clusters = size_to_clusters(s, bs->file->total_sectors *
+                                                BDRV_SECTOR_SIZE);
+                new_bitmap_size = (*nb_clusters + 7) / 8;
+                *expanded_clusters = g_realloc(*expanded_clusters,
+                                               new_bitmap_size);
+                /* clear the newly allocated space */
+                memset(&(*expanded_clusters)[old_bitmap_size], 0,
+                       new_bitmap_size - old_bitmap_size);
+            }
+
+            assert((cluster_index >= 0) && (cluster_index < *nb_clusters));
+            (*expanded_clusters)[cluster_index / 8] |= 1 << (cluster_index % 8);
        }

        if (is_active_l1) {
            if (l2_dirty) {
-                qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table);
+                qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
                qcow2_cache_depends_on_flush(s->l2_table_cache);
            }
-            qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table);
+            ret = qcow2_cache_put(bs, s->l2_table_cache, (void **)&l2_table);
+            if (ret < 0) {
+                l2_table = NULL;
+                goto fail;
+            }
        } else {
            if (l2_dirty) {
                ret = qcow2_pre_write_overlap_check(bs,
@@ -1760,11 +1705,6 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
                }
            }
        }
-
-        (*visited_l1_entries)++;
-        if (status_cb) {
-            status_cb(bs, *visited_l1_entries, l1_entries);
-        }
    }

    ret = 0;
@@ -1774,7 +1714,12 @@ fail:
        if (!is_active_l1) {
            qemu_vfree(l2_table);
        } else {
-            qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table);
+            if (ret < 0) {
+                qcow2_cache_put(bs, s->l2_table_cache, (void **)&l2_table);
+            } else {
+                ret = qcow2_cache_put(bs, s->l2_table_cache,
+                        (void **)&l2_table);
+            }
        }
    }
    return ret;
@@ -1786,25 +1731,21 @@ fail:
 * allocation for pre-allocated ones). This is important for downgrading to a
 * qcow2 version which doesn't yet support metadata zero clusters.
 */
-int qcow2_expand_zero_clusters(BlockDriverState *bs,
-                               BlockDriverAmendStatusCB *status_cb)
+int qcow2_expand_zero_clusters(BlockDriverState *bs)
 {
    BDRVQcowState *s = bs->opaque;
    uint64_t *l1_table = NULL;
-    int64_t l1_entries = 0, visited_l1_entries = 0;
+    uint64_t nb_clusters;
+    uint8_t *expanded_clusters;
    int ret;
    int i, j;

-    if (status_cb) {
-        l1_entries = s->l1_size;
-        for (i = 0; i < s->nb_snapshots; i++) {
-            l1_entries += s->snapshots[i].l1_size;
-        }
-    }
+    nb_clusters = size_to_clusters(s, bs->file->total_sectors *
+                                   BDRV_SECTOR_SIZE);
+    expanded_clusters = g_malloc0((nb_clusters + 7) / 8);

    ret = expand_zero_clusters_in_l1(bs, s->l1_table, s->l1_size,
-                                     &visited_l1_entries, l1_entries,
-                                     status_cb);
+                                     &expanded_clusters, &nb_clusters);
    if (ret < 0) {
        goto fail;
    }
@@ -1838,8 +1779,7 @@ int qcow2_expand_zero_clusters(BlockDriverState *bs,
        }

        ret = expand_zero_clusters_in_l1(bs, l1_table, s->snapshots[i].l1_size,
-                                         &visited_l1_entries, l1_entries,
-                                         status_cb);
+                                         &expanded_clusters, &nb_clusters);
        if (ret < 0) {
            goto fail;
        }
@@ -1848,6 +1788,7 @@ int qcow2_expand_zero_clusters(BlockDriverState *bs,
    ret = 0;

 fail:
+    g_free(expanded_clusters);
    g_free(l1_table);
    return ret;
 }
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
--- a/block/qcow2-snapshot.c
+++ b/block/qcow2-snapshot.c
@@ -58,7 +58,7 @@ int qcow2_read_snapshots(BlockDriverState *bs)
    }

    offset = s->snapshots_offset;
-    s->snapshots = g_new0(QCowSnapshot, s->nb_snapshots);
+    s->snapshots = g_malloc0(s->nb_snapshots * sizeof(QCowSnapshot));

    for(i = 0; i < s->nb_snapshots; i++) {
        /* Read statically sized part of the snapshot header */
@@ -351,8 +351,10 @@ int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)

    memset(sn, 0, sizeof(*sn));

-    /* Generate an ID */
-    find_new_snapshot_id(bs, sn_info->id_str, sizeof(sn_info->id_str));
+    /* Generate an ID if it wasn't passed */
+    if (sn_info->id_str[0] == '\0') {
+        find_new_snapshot_id(bs, sn_info->id_str, sizeof(sn_info->id_str));
+    }

    /* Check that the ID is unique */
    if (find_snapshot_by_id_and_name(bs, sn_info->id_str, NULL) >= 0) {
@@ -379,12 +381,7 @@ int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
    sn->l1_table_offset = l1_table_offset;
    sn->l1_size = s->l1_size;

-    l1_table = g_try_new(uint64_t, s->l1_size);
-    if (s->l1_size && l1_table == NULL) {
-        ret = -ENOMEM;
-        goto fail;
-    }
-
+    l1_table = g_malloc(s->l1_size * sizeof(uint64_t));
    for(i = 0; i < s->l1_size; i++) {
        l1_table[i] = cpu_to_be64(s->l1_table[i]);
    }
@@ -415,7 +412,7 @@ int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
    }

    /* Append the new snapshot to the snapshot list */
-    new_snapshot_list = g_new(QCowSnapshot, s->nb_snapshots + 1);
+    new_snapshot_list = g_malloc((s->nb_snapshots + 1) * sizeof(QCowSnapshot));
    if (s->snapshots) {
        memcpy(new_snapshot_list, s->snapshots,
               s->nb_snapshots * sizeof(QCowSnapshot));
@@ -439,7 +436,7 @@ int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
    qcow2_discard_clusters(bs, qcow2_vm_state_offset(s),
                           align_offset(sn->vm_state_size, s->cluster_size)
                                >> BDRV_SECTOR_BITS,
-                           QCOW2_DISCARD_NEVER, false);
+                           QCOW2_DISCARD_NEVER);

 #ifdef DEBUG_ALLOC
    {
@@ -502,11 +499,7 @@ int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
     * Decrease the refcount referenced by the old one only when the L1
     * table is overwritten.
     */
-    sn_l1_table = g_try_malloc0(cur_l1_bytes);
-    if (cur_l1_bytes && sn_l1_table == NULL) {
-        ret = -ENOMEM;
-        goto fail;
-    }
+    sn_l1_table = g_malloc0(cur_l1_bytes);

    ret = bdrv_pread(bs->file, sn->l1_table_offset, sn_l1_table, sn_l1_bytes);
    if (ret < 0) {
@@ -659,7 +652,7 @@ int qcow2_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
        return s->nb_snapshots;
    }

-    sn_tab = g_new0(QEMUSnapshotInfo, s->nb_snapshots);
+    sn_tab = g_malloc0(s->nb_snapshots * sizeof(QEMUSnapshotInfo));
    for(i = 0; i < s->nb_snapshots; i++) {
        sn_info = sn_tab + i;
        sn = s->snapshots + i;
@@ -700,26 +693,22 @@ int qcow2_snapshot_load_tmp(BlockDriverState *bs,
    sn = &s->snapshots[snapshot_index];

    /* Allocate and read in the snapshot's L1 table */
-    if (sn->l1_size > QCOW_MAX_L1_SIZE / sizeof(uint64_t)) {
+    if (sn->l1_size > QCOW_MAX_L1_SIZE) {
        error_setg(errp, "Snapshot L1 table too large");
        return -EFBIG;
    }
    new_l1_bytes = sn->l1_size * sizeof(uint64_t);
-    new_l1_table = qemu_try_blockalign(bs->file,
-                                       align_offset(new_l1_bytes, 512));
-    if (new_l1_table == NULL) {
-        return -ENOMEM;
-    }
+    new_l1_table = g_malloc0(align_offset(new_l1_bytes, 512));

    ret = bdrv_pread(bs->file, sn->l1_table_offset, new_l1_table, new_l1_bytes);
    if (ret < 0) {
        error_setg(errp, "Failed to read l1 table for snapshot");
-        qemu_vfree(new_l1_table);
+        g_free(new_l1_table);
        return ret;
    }

    /* Switch the L1 table */
-    qemu_vfree(s->l1_table);
+    g_free(s->l1_table);

    s->l1_size = sn->l1_size;
    s->l1_table_offset = sn->l1_table_offset;
--- a/block/qcow2.c
+++ b/block/qcow2.c
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -59,19 +59,15 @@
 /* The cluster reads as all zeros */
 #define QCOW_OFLAG_ZERO (1ULL << 0)

+#define REFCOUNT_SHIFT 1 /* refcount size is 2 bytes */
+
 #define MIN_CLUSTER_BITS 9
 #define MAX_CLUSTER_BITS 21

-#define MIN_L2_CACHE_SIZE 1 /* cluster */
+#define L2_CACHE_SIZE 16

 /* Must be at least 4 to cover all cases of refcount table growth */
-#define MIN_REFCOUNT_CACHE_SIZE 4 /* clusters */
-
-#define DEFAULT_L2_CACHE_BYTE_SIZE 1048576 /* bytes */
-
-/* The refblock cache needs only a fourth of the L2 cache size to cover as many
- * clusters */
-#define DEFAULT_L2_REFCOUNT_SIZE_RATIO 4
+#define REFCOUNT_CACHE_SIZE 4

 #define DEFAULT_CLUSTER_SIZE 65536

@@ -81,7 +77,6 @@
 #define QCOW2_OPT_DISCARD_SNAPSHOT "pass-discard-snapshot"
 #define QCOW2_OPT_DISCARD_OTHER "pass-discard-other"
 #define QCOW2_OPT_OVERLAP "overlap-check"
-#define QCOW2_OPT_OVERLAP_TEMPLATE "overlap-check.template"
 #define QCOW2_OPT_OVERLAP_MAIN_HEADER "overlap-check.main-header"
 #define QCOW2_OPT_OVERLAP_ACTIVE_L1 "overlap-check.active-l1"
 #define QCOW2_OPT_OVERLAP_ACTIVE_L2 "overlap-check.active-l2"
@@ -90,9 +85,6 @@
 #define QCOW2_OPT_OVERLAP_SNAPSHOT_TABLE "overlap-check.snapshot-table"
 #define QCOW2_OPT_OVERLAP_INACTIVE_L1 "overlap-check.inactive-l1"
 #define QCOW2_OPT_OVERLAP_INACTIVE_L2 "overlap-check.inactive-l2"
-#define QCOW2_OPT_CACHE_SIZE "cache-size"
-#define QCOW2_OPT_L2_CACHE_SIZE "l2-cache-size"
-#define QCOW2_OPT_REFCOUNT_CACHE_SIZE "refcount-cache-size"

 typedef struct QCowHeader {
    uint32_t magic;
@@ -213,11 +205,6 @@ typedef struct Qcow2DiscardRegion {
    QTAILQ_ENTRY(Qcow2DiscardRegion) next;
 } Qcow2DiscardRegion;

-typedef uint64_t Qcow2GetRefcountFunc(const void *refcount_array,
-                                      uint64_t index);
-typedef void Qcow2SetRefcountFunc(void *refcount_array,
-                                  uint64_t index, uint64_t value);
-
 typedef struct BDRVQcowState {
    int cluster_bits;
    int cluster_size;
@@ -226,8 +213,6 @@ typedef struct BDRVQcowState {
    int l2_size;
    int l1_size;
    int l1_vm_state_index;
-    int refcount_block_bits;
-    int refcount_block_size;
    int csize_shift;
    int csize_mask;
    uint64_t cluster_offset_mask;
@@ -263,16 +248,10 @@ typedef struct BDRVQcowState {
    int qcow_version;
    bool use_lazy_refcounts;
    int refcount_order;
-    int refcount_bits;
-    uint64_t refcount_max;
-
-    Qcow2GetRefcountFunc *get_refcount;
-    Qcow2SetRefcountFunc *set_refcount;

    bool discard_passthrough[QCOW2_DISCARD_MAX];

    int overlap_check; /* bitmask of Qcow2MetadataOverlap values */
-    bool signaled_corruption;

    uint64_t incompatible_features;
    uint64_t compatible_features;
@@ -283,14 +262,19 @@ typedef struct BDRVQcowState {
    QLIST_HEAD(, Qcow2UnknownHeaderExtension) unknown_header_ext;
    QTAILQ_HEAD (, Qcow2DiscardRegion) discards;
    bool cache_discards;
-
-    /* Backing file path and format as stored in the image (this is not the
-     * effective path/format, which may be the result of a runtime option
-     * override) */
-    char *image_backing_file;
-    char *image_backing_format;
 } BDRVQcowState;

+/* XXX: use std qcow open function ? */
+typedef struct QCowCreateState {
+    int cluster_size;
+    int cluster_bits;
+    uint16_t *refcount_block;
+    uint64_t *refcount_table;
+    int64_t l1_table_offset;
+    int64_t refcount_table_offset;
+    int64_t refcount_block_offset;
+} QCowCreateState;
+
 struct QCowAIOCB;

 typedef struct Qcow2COWRegion {
@@ -473,11 +457,6 @@ static inline uint64_t l2meta_cow_end(QCowL2Meta *m)
        + (m->cow_end.nb_sectors << BDRV_SECTOR_BITS);
 }

-static inline uint64_t refcount_diff(uint64_t r1, uint64_t r2)
-{
-    return r1 > r2 ? r1 - r2 : r2 - r1;
-}
-
 // FIXME Need qcow2_ prefix to global functions

 /* qcow2.c functions */
@@ -489,20 +468,12 @@ int qcow2_mark_corrupt(BlockDriverState *bs);
 int qcow2_mark_consistent(BlockDriverState *bs);
 int qcow2_update_header(BlockDriverState *bs);

-void qcow2_signal_corruption(BlockDriverState *bs, bool fatal, int64_t offset,
-                             int64_t size, const char *message_format, ...)
-                             GCC_FMT_ATTR(5, 6);
-
 /* qcow2-refcount.c functions */
 int qcow2_refcount_init(BlockDriverState *bs);
 void qcow2_refcount_close(BlockDriverState *bs);

-int qcow2_get_refcount(BlockDriverState *bs, int64_t cluster_index,
-                       uint64_t *refcount);
-
 int qcow2_update_cluster_refcount(BlockDriverState *bs, int64_t cluster_index,
-                                  uint64_t addend, bool decrease,
-                                  enum qcow2_discard_type type);
+                                  int addend, enum qcow2_discard_type type);

 int64_t qcow2_alloc_clusters(BlockDriverState *bs, uint64_t size);
 int qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset,
@@ -548,11 +519,10 @@ uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,

 int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m);
 int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset,
-    int nb_sectors, enum qcow2_discard_type type, bool full_discard);
+    int nb_sectors, enum qcow2_discard_type type);
 int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors);

-int qcow2_expand_zero_clusters(BlockDriverState *bs,
-                               BlockDriverAmendStatusCB *status_cb);
+int qcow2_expand_zero_clusters(BlockDriverState *bs);

 /* qcow2-snapshot.c functions */
 int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info);
@@ -574,8 +544,7 @@ int qcow2_read_snapshots(BlockDriverState *bs);
 Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables);
 int qcow2_cache_destroy(BlockDriverState* bs, Qcow2Cache *c);

-void qcow2_cache_entry_mark_dirty(BlockDriverState *bs, Qcow2Cache *c,
-     void *table);
+void qcow2_cache_entry_mark_dirty(Qcow2Cache *c, void *table);
 int qcow2_cache_flush(BlockDriverState *bs, Qcow2Cache *c);
 int qcow2_cache_set_dependency(BlockDriverState *bs, Qcow2Cache *c,
    Qcow2Cache *dependency);
@@ -587,6 +556,6 @@ int qcow2_cache_get(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
    void **table);
 int qcow2_cache_get_empty(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
    void **table);
-void qcow2_cache_put(BlockDriverState *bs, Qcow2Cache *c, void **table);
+int qcow2_cache_put(BlockDriverState *bs, Qcow2Cache *c, void **table);

 #endif
--- a/block/qed-check.c
+++ b/block/qed-check.c
@@ -227,10 +227,8 @@ int qed_check(BDRVQEDState *s, BdrvCheckResult *result, bool fix)
    };
    int ret;

-    check.used_clusters = g_try_new0(uint32_t, (check.nclusters + 31) / 32);
-    if (check.nclusters && check.used_clusters == NULL) {
-        return -ENOMEM;
-    }
+    check.used_clusters = g_malloc0(((check.nclusters + 31) / 32) *
+                                       sizeof(check.used_clusters[0]));

    check.result->bfi.total_clusters =
        (s->header.image_size + s->header.cluster_size - 1) /
--- a/block/qed-gencb.c
+++ b/block/qed-gencb.c
@@ -13,7 +13,7 @@

 #include "qed.h"

-void *gencb_alloc(size_t len, BlockCompletionFunc *cb, void *opaque)
+void *gencb_alloc(size_t len, BlockDriverCompletionFunc *cb, void *opaque)
 {
    GenericCB *gencb = g_malloc(len);
    gencb->cb = cb;
@@ -24,7 +24,7 @@ void *gencb_alloc(size_t len, BlockCompletionFunc *cb, void *opaque)
 void gencb_complete(void *opaque, int ret)
 {
    GenericCB *gencb = opaque;
-    BlockCompletionFunc *cb = gencb->cb;
+    BlockDriverCompletionFunc *cb = gencb->cb;
    void *user_opaque = gencb->opaque;

    g_free(gencb);
--- a/block/qed-table.c
+++ b/block/qed-table.c
@@ -49,7 +49,7 @@ out:
 }

 static void qed_read_table(BDRVQEDState *s, uint64_t offset, QEDTable *table,
-                           BlockCompletionFunc *cb, void *opaque)
+                           BlockDriverCompletionFunc *cb, void *opaque)
 {
    QEDReadTableCB *read_table_cb = gencb_alloc(sizeof(*read_table_cb),
                                                cb, opaque);
@@ -119,7 +119,7 @@ out:
 */
 static void qed_write_table(BDRVQEDState *s, uint64_t offset, QEDTable *table,
                            unsigned int index, unsigned int n, bool flush,
-                            BlockCompletionFunc *cb, void *opaque)
+                            BlockDriverCompletionFunc *cb, void *opaque)
 {
    QEDWriteTableCB *write_table_cb;
    unsigned int sector_mask = BDRV_SECTOR_SIZE / sizeof(uint64_t) - 1;
@@ -180,7 +180,7 @@ int qed_read_l1_table_sync(BDRVQEDState *s)
 }

 void qed_write_l1_table(BDRVQEDState *s, unsigned int index, unsigned int n,
-                        BlockCompletionFunc *cb, void *opaque)
+                        BlockDriverCompletionFunc *cb, void *opaque)
 {
    BLKDBG_EVENT(s->bs->file, BLKDBG_L1_UPDATE);
    qed_write_table(s, s->header.l1_table_offset,
@@ -235,7 +235,7 @@ static void qed_read_l2_table_cb(void *opaque, int ret)
 }

 void qed_read_l2_table(BDRVQEDState *s, QEDRequest *request, uint64_t offset,
-                       BlockCompletionFunc *cb, void *opaque)
+                       BlockDriverCompletionFunc *cb, void *opaque)
 {
    QEDReadL2TableCB *read_l2_table_cb;

@@ -275,7 +275,7 @@ int qed_read_l2_table_sync(BDRVQEDState *s, QEDRequest *request, uint64_t offset

 void qed_write_l2_table(BDRVQEDState *s, QEDRequest *request,
                        unsigned int index, unsigned int n, bool flush,
-                        BlockCompletionFunc *cb, void *opaque)
+                        BlockDriverCompletionFunc *cb, void *opaque)
 {
    BLKDBG_EVENT(s->bs->file, BLKDBG_L2_UPDATE);
    qed_write_table(s, request->l2_table->offset,
--- a/block/qed.c
+++ b/block/qed.c
@@ -18,8 +18,22 @@
 #include "qapi/qmp/qerror.h"
 #include "migration/migration.h"

+static void qed_aio_cancel(BlockDriverAIOCB *blockacb)
+{
+    QEDAIOCB *acb = (QEDAIOCB *)blockacb;
+    AioContext *aio_context = bdrv_get_aio_context(blockacb->bs);
+    bool finished = false;
+
+    /* Wait for the request to finish */
+    acb->finished = &finished;
+    while (!finished) {
+        aio_poll(aio_context, true);
+    }
+}
+
 static const AIOCBInfo qed_aiocb_info = {
    .aiocb_size         = sizeof(QEDAIOCB),
+    .cancel             = qed_aio_cancel,
 };

 static int bdrv_qed_probe(const uint8_t *buf, int buf_size,
@@ -130,7 +144,7 @@ static void qed_write_header_read_cb(void *opaque, int ret)
 * This function only updates known header fields in-place and does not affect
 * extra data after the QED header.
 */
-static void qed_write_header(BDRVQEDState *s, BlockCompletionFunc cb,
+static void qed_write_header(BDRVQEDState *s, BlockDriverCompletionFunc cb,
                             void *opaque)
 {
    /* We must write full sectors for O_DIRECT but cannot necessarily generate
@@ -408,7 +422,7 @@ static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
        snprintf(buf, sizeof(buf), "%" PRIx64,
            s->header.features & ~QED_FEATURE_MASK);
        error_set(errp, QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
-            bdrv_get_device_or_node_name(bs), "QED", buf);
+            bs->device_name, "QED", buf);
        return -ENOTSUP;
    }
    if (!qed_is_cluster_size_valid(s->header.cluster_size)) {
@@ -436,14 +450,9 @@ static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,

    s->table_nelems = (s->header.cluster_size * s->header.table_size) /
                      sizeof(uint64_t);
-    s->l2_shift = ctz32(s->header.cluster_size);
+    s->l2_shift = ffs(s->header.cluster_size) - 1;
    s->l2_mask = s->table_nelems - 1;
-    s->l1_shift = s->l2_shift + ctz32(s->table_nelems);
-
-    /* Header size calculation must not overflow uint32_t */
-    if (s->header.header_size > UINT32_MAX / s->header.cluster_size) {
-        return -EINVAL;
-    }
+    s->l1_shift = s->l2_shift + ffs(s->table_nelems) - 1;

    if ((s->header.features & QED_F_BACKING_FILE)) {
        if ((uint64_t)s->header.backing_filename_offset +
@@ -639,8 +648,7 @@ static int bdrv_qed_create(const char *filename, QemuOpts *opts, Error **errp)
    char *backing_fmt = NULL;
    int ret;

-    image_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
-                          BDRV_SECTOR_SIZE);
+    image_size = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0);
    backing_file = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FILE);
    backing_fmt = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FMT);
    cluster_size = qemu_opt_get_size_del(opts,
@@ -764,7 +772,7 @@ static BDRVQEDState *acb_to_s(QEDAIOCB *acb)
 static void qed_read_backing_file(BDRVQEDState *s, uint64_t pos,
                                  QEMUIOVector *qiov,
                                  QEMUIOVector **backing_qiov,
-                                  BlockCompletionFunc *cb, void *opaque)
+                                  BlockDriverCompletionFunc *cb, void *opaque)
 {
    uint64_t backing_length = 0;
    size_t size;
@@ -856,7 +864,7 @@ static void qed_copy_from_backing_file_write(void *opaque, int ret)
 */
 static void qed_copy_from_backing_file(BDRVQEDState *s, uint64_t pos,
                                       uint64_t len, uint64_t offset,
-                                       BlockCompletionFunc *cb,
+                                       BlockDriverCompletionFunc *cb,
                                       void *opaque)
 {
    CopyFromBackingFileCB *copy_cb;
@@ -907,15 +915,21 @@ static void qed_update_l2_table(BDRVQEDState *s, QEDTable *table, int index,
 static void qed_aio_complete_bh(void *opaque)
 {
    QEDAIOCB *acb = opaque;
-    BlockCompletionFunc *cb = acb->common.cb;
+    BlockDriverCompletionFunc *cb = acb->common.cb;
    void *user_opaque = acb->common.opaque;
    int ret = acb->bh_ret;
+    bool *finished = acb->finished;

    qemu_bh_delete(acb->bh);
-    qemu_aio_unref(acb);
+    qemu_aio_release(acb);

    /* Invoke callback */
    cb(user_opaque, ret);
+
+    /* Signal cancel completion */
+    if (finished) {
+        *finished = true;
+    }
 }

 static void qed_aio_complete(QEDAIOCB *acb, int ret)
@@ -1069,7 +1083,7 @@ static void qed_aio_write_main(void *opaque, int ret)
    BDRVQEDState *s = acb_to_s(acb);
    uint64_t offset = acb->cur_cluster +
                      qed_offset_into_cluster(s, acb->cur_pos);
-    BlockCompletionFunc *next_fn;
+    BlockDriverCompletionFunc *next_fn;

    trace_qed_aio_write_main(s, acb, ret, offset, acb->cur_qiov.size);

@@ -1169,7 +1183,7 @@ static void qed_aio_write_zero_cluster(void *opaque, int ret)
 static void qed_aio_write_alloc(QEDAIOCB *acb, size_t len)
 {
    BDRVQEDState *s = acb_to_s(acb);
-    BlockCompletionFunc *cb;
+    BlockDriverCompletionFunc *cb;

    /* Cancel timer when the first allocating request comes in */
    if (QSIMPLEQ_EMPTY(&s->allocating_write_reqs)) {
@@ -1226,11 +1240,7 @@ static void qed_aio_write_inplace(QEDAIOCB *acb, uint64_t offset, size_t len)
        struct iovec *iov = acb->qiov->iov;

        if (!iov->iov_base) {
-            iov->iov_base = qemu_try_blockalign(acb->common.bs, iov->iov_len);
-            if (iov->iov_base == NULL) {
-                qed_aio_complete(acb, -ENOMEM);
-                return;
-            }
+            iov->iov_base = qemu_blockalign(acb->common.bs, iov->iov_len);
            memset(iov->iov_base, 0, iov->iov_len);
        }
    }
@@ -1370,11 +1380,11 @@ static void qed_aio_next_io(void *opaque, int ret)
                      io_fn, acb);
 }

-static BlockAIOCB *qed_aio_setup(BlockDriverState *bs,
-                                 int64_t sector_num,
-                                 QEMUIOVector *qiov, int nb_sectors,
-                                 BlockCompletionFunc *cb,
-                                 void *opaque, int flags)
+static BlockDriverAIOCB *qed_aio_setup(BlockDriverState *bs,
+                                       int64_t sector_num,
+                                       QEMUIOVector *qiov, int nb_sectors,
+                                       BlockDriverCompletionFunc *cb,
+                                       void *opaque, int flags)
 {
    QEDAIOCB *acb = qemu_aio_get(&qed_aiocb_info, bs, cb, opaque);

@@ -1382,6 +1392,7 @@ static BlockAIOCB *qed_aio_setup(BlockDriverState *bs,
                        opaque, flags);

    acb->flags = flags;
+    acb->finished = NULL;
    acb->qiov = qiov;
    acb->qiov_offset = 0;
    acb->cur_pos = (uint64_t)sector_num * BDRV_SECTOR_SIZE;
@@ -1395,20 +1406,20 @@ static BlockAIOCB *qed_aio_setup(BlockDriverState *bs,
    return &acb->common;
 }

-static BlockAIOCB *bdrv_qed_aio_readv(BlockDriverState *bs,
-                                      int64_t sector_num,
-                                      QEMUIOVector *qiov, int nb_sectors,
-                                      BlockCompletionFunc *cb,
-                                      void *opaque)
+static BlockDriverAIOCB *bdrv_qed_aio_readv(BlockDriverState *bs,
+                                            int64_t sector_num,
+                                            QEMUIOVector *qiov, int nb_sectors,
+                                            BlockDriverCompletionFunc *cb,
+                                            void *opaque)
 {
    return qed_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
 }

-static BlockAIOCB *bdrv_qed_aio_writev(BlockDriverState *bs,
-                                       int64_t sector_num,
-                                       QEMUIOVector *qiov, int nb_sectors,
-                                       BlockCompletionFunc *cb,
-                                       void *opaque)
+static BlockDriverAIOCB *bdrv_qed_aio_writev(BlockDriverState *bs,
+                                             int64_t sector_num,
+                                             QEMUIOVector *qiov, int nb_sectors,
+                                             BlockDriverCompletionFunc *cb,
+                                             void *opaque)
 {
    return qed_aio_setup(bs, sector_num, qiov, nb_sectors, cb,
                         opaque, QED_AIOCB_WRITE);
@@ -1436,7 +1447,7 @@ static int coroutine_fn bdrv_qed_co_write_zeroes(BlockDriverState *bs,
                                                 int nb_sectors,
                                                 BdrvRequestFlags flags)
 {
-    BlockAIOCB *blockacb;
+    BlockDriverAIOCB *blockacb;
    BDRVQEDState *s = bs->opaque;
    QEDWriteZeroesCB cb = { .done = false };
    QEMUIOVector qiov;
--- a/block/qed.h
+++ b/block/qed.h
@@ -128,11 +128,12 @@ enum {
 };

 typedef struct QEDAIOCB {
-    BlockAIOCB common;
+    BlockDriverAIOCB common;
    QEMUBH *bh;
    int bh_ret;                     /* final return status for completion bh */
    QSIMPLEQ_ENTRY(QEDAIOCB) next;  /* next request */
    int flags;                      /* QED_AIOCB_* bits ORed together */
+    bool *finished;                 /* signal for cancel completion */
    uint64_t end_pos;               /* request end on block device, in bytes */

    /* User scatter-gather list */
@@ -202,11 +203,11 @@ typedef void QEDFindClusterFunc(void *opaque, int ret, uint64_t offset, size_t l
 * Generic callback for chaining async callbacks
 */
 typedef struct {
-    BlockCompletionFunc *cb;
+    BlockDriverCompletionFunc *cb;
    void *opaque;
 } GenericCB;

-void *gencb_alloc(size_t len, BlockCompletionFunc *cb, void *opaque);
+void *gencb_alloc(size_t len, BlockDriverCompletionFunc *cb, void *opaque);
 void gencb_complete(void *opaque, int ret);

 /**
@@ -229,16 +230,16 @@ void qed_commit_l2_cache_entry(L2TableCache *l2_cache, CachedL2Table *l2_table);
 */
 int qed_read_l1_table_sync(BDRVQEDState *s);
 void qed_write_l1_table(BDRVQEDState *s, unsigned int index, unsigned int n,
-                        BlockCompletionFunc *cb, void *opaque);
+                        BlockDriverCompletionFunc *cb, void *opaque);
 int qed_write_l1_table_sync(BDRVQEDState *s, unsigned int index,
                            unsigned int n);
 int qed_read_l2_table_sync(BDRVQEDState *s, QEDRequest *request,
                           uint64_t offset);
 void qed_read_l2_table(BDRVQEDState *s, QEDRequest *request, uint64_t offset,
-                       BlockCompletionFunc *cb, void *opaque);
+                       BlockDriverCompletionFunc *cb, void *opaque);
 void qed_write_l2_table(BDRVQEDState *s, QEDRequest *request,
                        unsigned int index, unsigned int n, bool flush,
-                        BlockCompletionFunc *cb, void *opaque);
+                        BlockDriverCompletionFunc *cb, void *opaque);
 int qed_write_l2_table_sync(BDRVQEDState *s, QEDRequest *request,
                            unsigned int index, unsigned int n, bool flush);

--- a/block/quorum.c
+++ b/block/quorum.c
@@ -16,12 +16,7 @@
 #include <gnutls/gnutls.h>
 #include <gnutls/crypto.h>
 #include "block/block_int.h"
-#include "qapi/qmp/qbool.h"
-#include "qapi/qmp/qdict.h"
-#include "qapi/qmp/qint.h"
 #include "qapi/qmp/qjson.h"
-#include "qapi/qmp/qlist.h"
-#include "qapi/qmp/qstring.h"
 #include "qapi-event.h"

 #define HASH_LENGTH 32
@@ -29,7 +24,6 @@
 #define QUORUM_OPT_VOTE_THRESHOLD "vote-threshold"
 #define QUORUM_OPT_BLKVERIFY      "blkverify"
 #define QUORUM_OPT_REWRITE        "rewrite-corrupted"
-#define QUORUM_OPT_READ_PATTERN   "read-pattern"

 /* This union holds a vote hash value */
 typedef union QuorumVoteValue {
@@ -80,8 +74,6 @@ typedef struct BDRVQuorumState {
    bool rewrite_corrupted;/* true if the driver must rewrite-on-read corrupted
                            * block if Quorum is reached.
                            */
-
-    QuorumReadPattern read_pattern;
 } BDRVQuorumState;

 typedef struct QuorumAIOCB QuorumAIOCB;
@@ -92,7 +84,7 @@ typedef struct QuorumAIOCB QuorumAIOCB;
 * $children_count QuorumChildRequest.
 */
 typedef struct QuorumChildRequest {
-    BlockAIOCB *aiocb;
+    BlockDriverAIOCB *aiocb;
    QEMUIOVector qiov;
    uint8_t *buf;
    int ret;
@@ -105,7 +97,7 @@ typedef struct QuorumChildRequest {
 * used to do operations on each children and track overall progress.
 */
 struct QuorumAIOCB {
-    BlockAIOCB common;
+    BlockDriverAIOCB common;

    /* Request metadata */
    uint64_t sector_num;
@@ -125,12 +117,11 @@ struct QuorumAIOCB {

    bool is_read;
    int vote_ret;
-    int child_iter;             /* which child to read in fifo pattern */
 };

 static bool quorum_vote(QuorumAIOCB *acb);

-static void quorum_aio_cancel(BlockAIOCB *blockacb)
+static void quorum_aio_cancel(BlockDriverAIOCB *blockacb)
 {
    QuorumAIOCB *acb = container_of(blockacb, QuorumAIOCB, common);
    BDRVQuorumState *s = acb->common.bs->opaque;
@@ -138,19 +129,21 @@ static void quorum_aio_cancel(BlockAIOCB *blockacb)

    /* cancel all callbacks */
    for (i = 0; i < s->num_children; i++) {
-        if (acb->qcrs[i].aiocb) {
-            bdrv_aio_cancel_async(acb->qcrs[i].aiocb);
-        }
+        bdrv_aio_cancel(acb->qcrs[i].aiocb);
    }
+
+    g_free(acb->qcrs);
+    qemu_aio_release(acb);
 }

 static AIOCBInfo quorum_aiocb_info = {
    .aiocb_size         = sizeof(QuorumAIOCB),
-    .cancel_async       = quorum_aio_cancel,
+    .cancel             = quorum_aio_cancel,
 };

 static void quorum_aio_finalize(QuorumAIOCB *acb)
 {
+    BDRVQuorumState *s = acb->common.bs->opaque;
    int i, ret = 0;

    if (acb->vote_ret) {
@@ -160,15 +153,14 @@ static void quorum_aio_finalize(QuorumAIOCB *acb)
    acb->common.cb(acb->common.opaque, ret);

    if (acb->is_read) {
-        /* on the quorum case acb->child_iter == s->num_children - 1 */
-        for (i = 0; i <= acb->child_iter; i++) {
+        for (i = 0; i < s->num_children; i++) {
            qemu_vfree(acb->qcrs[i].buf);
            qemu_iovec_destroy(&acb->qcrs[i].qiov);
        }
    }

    g_free(acb->qcrs);
-    qemu_aio_unref(acb);
+    qemu_aio_release(acb);
 }

 static bool quorum_sha256_compare(QuorumVoteValue *a, QuorumVoteValue *b)
@@ -186,7 +178,7 @@ static QuorumAIOCB *quorum_aio_get(BDRVQuorumState *s,
                                   QEMUIOVector *qiov,
                                   uint64_t sector_num,
                                   int nb_sectors,
-                                   BlockCompletionFunc *cb,
+                                   BlockDriverCompletionFunc *cb,
                                   void *opaque)
 {
    QuorumAIOCB *acb = qemu_aio_get(&quorum_aiocb_info, bs, cb, opaque);
@@ -226,7 +218,10 @@ static void quorum_report_bad(QuorumAIOCB *acb, char *node_name, int ret)

 static void quorum_report_failure(QuorumAIOCB *acb)
 {
-    const char *reference = bdrv_get_device_or_node_name(acb->common.bs);
+    const char *reference = acb->common.bs->device_name[0] ?
+                            acb->common.bs->device_name :
+                            acb->common.bs->node_name;
+
    qapi_event_send_quorum_failure(reference, acb->sector_num,
                                   acb->nb_sectors, &error_abort);
 }
@@ -261,21 +256,6 @@ static void quorum_rewrite_aio_cb(void *opaque, int ret)
    quorum_aio_finalize(acb);
 }

-static BlockAIOCB *read_fifo_child(QuorumAIOCB *acb);
-
-static void quorum_copy_qiov(QEMUIOVector *dest, QEMUIOVector *source)
-{
-    int i;
-    assert(dest->niov == source->niov);
-    assert(dest->size == source->size);
-    for (i = 0; i < source->niov; i++) {
-        assert(dest->iov[i].iov_len == source->iov[i].iov_len);
-        memcpy(dest->iov[i].iov_base,
-               source->iov[i].iov_base,
-               source->iov[i].iov_len);
-    }
-}
-
 static void quorum_aio_cb(void *opaque, int ret)
 {
    QuorumChildRequest *sacb = opaque;
@@ -283,21 +263,6 @@ static void quorum_aio_cb(void *opaque, int ret)
    BDRVQuorumState *s = acb->common.bs->opaque;
    bool rewrite = false;

-    if (acb->is_read && s->read_pattern == QUORUM_READ_PATTERN_FIFO) {
-        /* We try to read next child in FIFO order if we fail to read */
-        if (ret < 0 && ++acb->child_iter < s->num_children) {
-            read_fifo_child(acb);
-            return;
-        }
-
-        if (ret == 0) {
-            quorum_copy_qiov(acb->qiov, &acb->qcrs[acb->child_iter].qiov);
-        }
-        acb->vote_ret = ret;
-        quorum_aio_finalize(acb);
-        return;
-    }
-
    sacb->ret = ret;
    acb->count++;
    if (ret == 0) {
@@ -378,6 +343,19 @@ static bool quorum_rewrite_bad_versions(BDRVQuorumState *s, QuorumAIOCB *acb,
    return count;
 }

+static void quorum_copy_qiov(QEMUIOVector *dest, QEMUIOVector *source)
+{
+    int i;
+    assert(dest->niov == source->niov);
+    assert(dest->size == source->size);
+    for (i = 0; i < source->niov; i++) {
+        assert(dest->iov[i].iov_len == source->iov[i].iov_len);
+        memcpy(dest->iov[i].iov_base,
+               source->iov[i].iov_base,
+               source->iov[i].iov_len);
+    }
+}
+
 static void quorum_count_vote(QuorumVotes *votes,
                              QuorumVoteValue *value,
                              int index)
@@ -637,68 +615,40 @@ free_exit:
    return rewrite;
 }

-static BlockAIOCB *read_quorum_children(QuorumAIOCB *acb)
-{
-    BDRVQuorumState *s = acb->common.bs->opaque;
-    int i;
-
-    for (i = 0; i < s->num_children; i++) {
-        acb->qcrs[i].buf = qemu_blockalign(s->bs[i], acb->qiov->size);
-        qemu_iovec_init(&acb->qcrs[i].qiov, acb->qiov->niov);
-        qemu_iovec_clone(&acb->qcrs[i].qiov, acb->qiov, acb->qcrs[i].buf);
-    }
-
-    for (i = 0; i < s->num_children; i++) {
-        bdrv_aio_readv(s->bs[i], acb->sector_num, &acb->qcrs[i].qiov,
-                       acb->nb_sectors, quorum_aio_cb, &acb->qcrs[i]);
-    }
-
-    return &acb->common;
-}
-
-static BlockAIOCB *read_fifo_child(QuorumAIOCB *acb)
-{
-    BDRVQuorumState *s = acb->common.bs->opaque;
-
-    acb->qcrs[acb->child_iter].buf = qemu_blockalign(s->bs[acb->child_iter],
-                                                     acb->qiov->size);
-    qemu_iovec_init(&acb->qcrs[acb->child_iter].qiov, acb->qiov->niov);
-    qemu_iovec_clone(&acb->qcrs[acb->child_iter].qiov, acb->qiov,
-                     acb->qcrs[acb->child_iter].buf);
-    bdrv_aio_readv(s->bs[acb->child_iter], acb->sector_num,
-                   &acb->qcrs[acb->child_iter].qiov, acb->nb_sectors,
-                   quorum_aio_cb, &acb->qcrs[acb->child_iter]);
-
-    return &acb->common;
-}
-
-static BlockAIOCB *quorum_aio_readv(BlockDriverState *bs,
-                                    int64_t sector_num,
-                                    QEMUIOVector *qiov,
-                                    int nb_sectors,
-                                    BlockCompletionFunc *cb,
-                                    void *opaque)
+static BlockDriverAIOCB *quorum_aio_readv(BlockDriverState *bs,
+                                         int64_t sector_num,
+                                         QEMUIOVector *qiov,
+                                         int nb_sectors,
+                                         BlockDriverCompletionFunc *cb,
+                                         void *opaque)
 {
    BDRVQuorumState *s = bs->opaque;
    QuorumAIOCB *acb = quorum_aio_get(s, bs, qiov, sector_num,
                                      nb_sectors, cb, opaque);
+    int i;
+
    acb->is_read = true;

-    if (s->read_pattern == QUORUM_READ_PATTERN_QUORUM) {
-        acb->child_iter = s->num_children - 1;
-        return read_quorum_children(acb);
+    for (i = 0; i < s->num_children; i++) {
+        acb->qcrs[i].buf = qemu_blockalign(s->bs[i], qiov->size);
+        qemu_iovec_init(&acb->qcrs[i].qiov, qiov->niov);
+        qemu_iovec_clone(&acb->qcrs[i].qiov, qiov, acb->qcrs[i].buf);
    }

-    acb->child_iter = 0;
-    return read_fifo_child(acb);
+    for (i = 0; i < s->num_children; i++) {
+        bdrv_aio_readv(s->bs[i], sector_num, &acb->qcrs[i].qiov, nb_sectors,
+                       quorum_aio_cb, &acb->qcrs[i]);
+    }
+
+    return &acb->common;
 }

-static BlockAIOCB *quorum_aio_writev(BlockDriverState *bs,
-                                     int64_t sector_num,
-                                     QEMUIOVector *qiov,
-                                     int nb_sectors,
-                                     BlockCompletionFunc *cb,
-                                     void *opaque)
+static BlockDriverAIOCB *quorum_aio_writev(BlockDriverState *bs,
+                                          int64_t sector_num,
+                                          QEMUIOVector *qiov,
+                                          int nb_sectors,
+                                          BlockDriverCompletionFunc *cb,
+                                          void *opaque)
 {
    BDRVQuorumState *s = bs->opaque;
    QuorumAIOCB *acb = quorum_aio_get(s, bs, qiov, sector_num, nb_sectors,
@@ -832,39 +782,16 @@ static QemuOptsList quorum_runtime_opts = {
            .type = QEMU_OPT_BOOL,
            .help = "Rewrite corrupted block on read quorum",
        },
-        {
-            .name = QUORUM_OPT_READ_PATTERN,
-            .type = QEMU_OPT_STRING,
-            .help = "Allowed pattern: quorum, fifo. Quorum is default",
-        },
        { /* end of list */ }
    },
 };

-static int parse_read_pattern(const char *opt)
-{
-    int i;
-
-    if (!opt) {
-        /* Set quorum as default */
-        return QUORUM_READ_PATTERN_QUORUM;
-    }
-
-    for (i = 0; i < QUORUM_READ_PATTERN_MAX; i++) {
-        if (!strcmp(opt, QuorumReadPattern_lookup[i])) {
-            return i;
-        }
-    }
-
-    return -EINVAL;
-}
-
 static int quorum_open(BlockDriverState *bs, QDict *options, int flags,
                       Error **errp)
 {
    BDRVQuorumState *s = bs->opaque;
    Error *local_err = NULL;
-    QemuOpts *opts = NULL;
+    QemuOpts *opts;
    bool *opened;
    QDict *sub = NULL;
    QList *list = NULL;
@@ -900,37 +827,28 @@ static int quorum_open(BlockDriverState *bs, QDict *options, int flags,
    }

    s->threshold = qemu_opt_get_number(opts, QUORUM_OPT_VOTE_THRESHOLD, 0);
-    ret = parse_read_pattern(qemu_opt_get(opts, QUORUM_OPT_READ_PATTERN));
+
+    /* and validate it against s->num_children */
+    ret = quorum_valid_threshold(s->threshold, s->num_children, &local_err);
    if (ret < 0) {
-        error_setg(&local_err, "Please set read-pattern as fifo or quorum");
        goto exit;
    }
-    s->read_pattern = ret;

-    if (s->read_pattern == QUORUM_READ_PATTERN_QUORUM) {
-        /* and validate it against s->num_children */
-        ret = quorum_valid_threshold(s->threshold, s->num_children, &local_err);
-        if (ret < 0) {
-            goto exit;
-        }
+    /* is the driver in blkverify mode */
+    if (qemu_opt_get_bool(opts, QUORUM_OPT_BLKVERIFY, false) &&
+        s->num_children == 2 && s->threshold == 2) {
+        s->is_blkverify = true;
+    } else if (qemu_opt_get_bool(opts, QUORUM_OPT_BLKVERIFY, false)) {
+        fprintf(stderr, "blkverify mode is set by setting blkverify=on "
+                "and using two files with vote_threshold=2\n");
+    }

-        /* is the driver in blkverify mode */
-        if (qemu_opt_get_bool(opts, QUORUM_OPT_BLKVERIFY, false) &&
-            s->num_children == 2 && s->threshold == 2) {
-            s->is_blkverify = true;
-        } else if (qemu_opt_get_bool(opts, QUORUM_OPT_BLKVERIFY, false)) {
-            fprintf(stderr, "blkverify mode is set by setting blkverify=on "
-                    "and using two files with vote_threshold=2\n");
-        }
-
-        s->rewrite_corrupted = qemu_opt_get_bool(opts, QUORUM_OPT_REWRITE,
-                                                 false);
-        if (s->rewrite_corrupted && s->is_blkverify) {
-            error_setg(&local_err,
-                       "rewrite-corrupted=on cannot be used with blkverify=on");
-            ret = -EINVAL;
-            goto exit;
-        }
+    s->rewrite_corrupted = qemu_opt_get_bool(opts, QUORUM_OPT_REWRITE, false);
+    if (s->rewrite_corrupted && s->is_blkverify) {
+        error_setg(&local_err,
+                   "rewrite-corrupted=on cannot be used with blkverify=on");
+        ret = -EINVAL;
+        goto exit;
    }

    /* allocate the children BlockDriverState array */
@@ -985,7 +903,6 @@ close_exit:
    g_free(s->bs);
    g_free(opened);
 exit:
-    qemu_opts_del(opts);
    /* propagate error */
    if (local_err) {
        error_propagate(errp, local_err);
@@ -1028,39 +945,6 @@ static void quorum_attach_aio_context(BlockDriverState *bs,
    }
 }

-static void quorum_refresh_filename(BlockDriverState *bs)
-{
-    BDRVQuorumState *s = bs->opaque;
-    QDict *opts;
-    QList *children;
-    int i;
-
-    for (i = 0; i < s->num_children; i++) {
-        bdrv_refresh_filename(s->bs[i]);
-        if (!s->bs[i]->full_open_options) {
-            return;
-        }
-    }
-
-    children = qlist_new();
-    for (i = 0; i < s->num_children; i++) {
-        QINCREF(s->bs[i]->full_open_options);
-        qlist_append_obj(children, QOBJECT(s->bs[i]->full_open_options));
-    }
-
-    opts = qdict_new();
-    qdict_put_obj(opts, "driver", QOBJECT(qstring_from_str("quorum")));
-    qdict_put_obj(opts, QUORUM_OPT_VOTE_THRESHOLD,
-                  QOBJECT(qint_from_int(s->threshold)));
-    qdict_put_obj(opts, QUORUM_OPT_BLKVERIFY,
-                  QOBJECT(qbool_from_int(s->is_blkverify)));
-    qdict_put_obj(opts, QUORUM_OPT_REWRITE,
-                  QOBJECT(qbool_from_int(s->rewrite_corrupted)));
-    qdict_put_obj(opts, "children", QOBJECT(children));
-
-    bs->full_open_options = opts;
-}
-
 static BlockDriver bdrv_quorum = {
    .format_name                        = "quorum",
    .protocol_name                      = "quorum",
@@ -1069,7 +953,6 @@ static BlockDriver bdrv_quorum = {

    .bdrv_file_open                     = quorum_open,
    .bdrv_close                         = quorum_close,
-    .bdrv_refresh_filename              = quorum_refresh_filename,

    .bdrv_co_flush_to_disk              = quorum_co_flush,

--- a/block/raw-aio.h
+++ b/block/raw-aio.h
@@ -35,13 +35,13 @@
 #ifdef CONFIG_LINUX_AIO
 void *laio_init(void);
 void laio_cleanup(void *s);
-BlockAIOCB *laio_submit(BlockDriverState *bs, void *aio_ctx, int fd,
+BlockDriverAIOCB *laio_submit(BlockDriverState *bs, void *aio_ctx, int fd,
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockCompletionFunc *cb, void *opaque, int type);
+        BlockDriverCompletionFunc *cb, void *opaque, int type);
 void laio_detach_aio_context(void *s, AioContext *old_context);
 void laio_attach_aio_context(void *s, AioContext *new_context);
 void laio_io_plug(BlockDriverState *bs, void *aio_ctx);
-void laio_io_unplug(BlockDriverState *bs, void *aio_ctx, bool unplug);
+int laio_io_unplug(BlockDriverState *bs, void *aio_ctx, bool unplug);
 #endif

 #ifdef _WIN32
@@ -49,10 +49,10 @@ typedef struct QEMUWin32AIOState QEMUWin32AIOState;
 QEMUWin32AIOState *win32_aio_init(void);
 void win32_aio_cleanup(QEMUWin32AIOState *aio);
 int win32_aio_attach(QEMUWin32AIOState *aio, HANDLE hfile);
-BlockAIOCB *win32_aio_submit(BlockDriverState *bs,
+BlockDriverAIOCB *win32_aio_submit(BlockDriverState *bs,
        QEMUWin32AIOState *aio, HANDLE hfile,
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockCompletionFunc *cb, void *opaque, int type);
+        BlockDriverCompletionFunc *cb, void *opaque, int type);
 void win32_aio_detach_aio_context(QEMUWin32AIOState *aio,
                                  AioContext *old_context);
 void win32_aio_attach_aio_context(QEMUWin32AIOState *aio,
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
--- a/block/raw-win32.c
+++ b/block/raw-win32.c
@@ -101,7 +101,7 @@ static int aio_worker(void *arg)
    switch (aiocb->aio_type & QEMU_AIO_TYPE_MASK) {
    case QEMU_AIO_READ:
        count = handle_aiocb_rw(aiocb);
-        if (count < aiocb->aio_nbytes) {
+        if (count < aiocb->aio_nbytes && aiocb->bs->growable) {
            /* A short read means that we have reached EOF. Pad the buffer
             * with zeros for bytes after EOF. */
            iov_memset(aiocb->aio_iov, aiocb->aio_niov, count,
@@ -138,9 +138,9 @@ static int aio_worker(void *arg)
    return ret;
 }

-static BlockAIOCB *paio_submit(BlockDriverState *bs, HANDLE hfile,
+static BlockDriverAIOCB *paio_submit(BlockDriverState *bs, HANDLE hfile,
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockCompletionFunc *cb, void *opaque, int type)
+        BlockDriverCompletionFunc *cb, void *opaque, int type)
 {
    RawWin32AIOData *acb = g_slice_new(RawWin32AIOData);
    ThreadPool *pool;
@@ -369,9 +369,9 @@ fail:
    return ret;
 }

-static BlockAIOCB *raw_aio_readv(BlockDriverState *bs,
+static BlockDriverAIOCB *raw_aio_readv(BlockDriverState *bs,
                         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-                         BlockCompletionFunc *cb, void *opaque)
+                         BlockDriverCompletionFunc *cb, void *opaque)
 {
    BDRVRawState *s = bs->opaque;
    if (s->aio) {
@@ -383,9 +383,9 @@ static BlockAIOCB *raw_aio_readv(BlockDriverState *bs,
    }
 }

-static BlockAIOCB *raw_aio_writev(BlockDriverState *bs,
+static BlockDriverAIOCB *raw_aio_writev(BlockDriverState *bs,
                          int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-                          BlockCompletionFunc *cb, void *opaque)
+                          BlockDriverCompletionFunc *cb, void *opaque)
 {
    BDRVRawState *s = bs->opaque;
    if (s->aio) {
@@ -397,8 +397,8 @@ static BlockAIOCB *raw_aio_writev(BlockDriverState *bs,
    }
 }

-static BlockAIOCB *raw_aio_flush(BlockDriverState *bs,
-                         BlockCompletionFunc *cb, void *opaque)
+static BlockDriverAIOCB *raw_aio_flush(BlockDriverState *bs,
+                         BlockDriverCompletionFunc *cb, void *opaque)
 {
    BDRVRawState *s = bs->opaque;
    return paio_submit(bs, s->hfile, 0, NULL, 0, cb, opaque, QEMU_AIO_FLUSH);
@@ -511,8 +511,8 @@ static int raw_create(const char *filename, QemuOpts *opts, Error **errp)
    strstart(filename, "file:", &filename);

    /* Read out options */
-    total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
-                          BDRV_SECTOR_SIZE);
+    total_size =
+        qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0) / 512;

    fd = qemu_open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY,
                   0644);
@@ -521,7 +521,7 @@ static int raw_create(const char *filename, QemuOpts *opts, Error **errp)
        return -EIO;
    }
    set_sparse(fd);
-    ftruncate(fd, total_size);
+    ftruncate(fd, total_size * 512);
    qemu_close(fd);
    return 0;
 }
--- a/block/raw_bsd.c
+++ b/block/raw_bsd.c
@@ -58,58 +58,8 @@ static int coroutine_fn raw_co_readv(BlockDriverState *bs, int64_t sector_num,
 static int coroutine_fn raw_co_writev(BlockDriverState *bs, int64_t sector_num,
                                      int nb_sectors, QEMUIOVector *qiov)
 {
-    void *buf = NULL;
-    BlockDriver *drv;
-    QEMUIOVector local_qiov;
-    int ret;
-
-    if (bs->probed && sector_num == 0) {
-        /* As long as these conditions are true, we can't get partial writes to
-         * the probe buffer and can just directly check the request. */
-        QEMU_BUILD_BUG_ON(BLOCK_PROBE_BUF_SIZE != 512);
-        QEMU_BUILD_BUG_ON(BDRV_SECTOR_SIZE != 512);
-
-        if (nb_sectors == 0) {
-            /* qemu_iovec_to_buf() would fail, but we want to return success
-             * instead of -EINVAL in this case. */
-            return 0;
-        }
-
-        buf = qemu_try_blockalign(bs->file, 512);
-        if (!buf) {
-            ret = -ENOMEM;
-            goto fail;
-        }
-
-        ret = qemu_iovec_to_buf(qiov, 0, buf, 512);
-        if (ret != 512) {
-            ret = -EINVAL;
-            goto fail;
-        }
-
-        drv = bdrv_probe_all(buf, 512, NULL);
-        if (drv != bs->drv) {
-            ret = -EPERM;
-            goto fail;
-        }
-
-        /* Use the checked buffer, a malicious guest might be overwriting its
-         * original buffer in the background. */
-        qemu_iovec_init(&local_qiov, qiov->niov + 1);
-        qemu_iovec_add(&local_qiov, buf, 512);
-        qemu_iovec_concat(&local_qiov, qiov, 512, qiov->size - 512);
-        qiov = &local_qiov;
-    }
-
    BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
-    ret = bdrv_co_writev(bs->file, sector_num, nb_sectors, qiov);
-
-fail:
-    if (qiov == &local_qiov) {
-        qemu_iovec_destroy(&local_qiov);
-    }
-    qemu_vfree(buf);
-    return ret;
+    return bdrv_co_writev(bs->file, sector_num, nb_sectors, qiov);
 }

 static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs,
@@ -179,10 +129,10 @@ static int raw_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
    return bdrv_ioctl(bs->file, req, buf);
 }

-static BlockAIOCB *raw_aio_ioctl(BlockDriverState *bs,
-                                 unsigned long int req, void *buf,
-                                 BlockCompletionFunc *cb,
-                                 void *opaque)
+static BlockDriverAIOCB *raw_aio_ioctl(BlockDriverState *bs,
+                                       unsigned long int req, void *buf,
+                                       BlockDriverCompletionFunc *cb,
+                                       void *opaque)
 {
    return bdrv_aio_ioctl(bs->file, req, buf, cb, opaque);
 }
@@ -208,18 +158,6 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,
                    Error **errp)
 {
    bs->sg = bs->file->sg;
-
-    if (bs->probed && !bdrv_is_read_only(bs)) {
-        fprintf(stderr,
-                "WARNING: Image format was not specified for '%s' and probing "
-                "guessed raw.\n"
-                "         Automatically detecting the format is dangerous for "
-                "raw images, write operations on block 0 will be restricted.\n"
-                "         Specify the 'raw' format explicitly to remove the "
-                "restrictions.\n",
-                bs->file->filename);
-    }
-
    return 0;
 }

@@ -235,16 +173,6 @@ static int raw_probe(const uint8_t *buf, int buf_size, const char *filename)
    return 1;
 }

-static int raw_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
-{
-    return bdrv_probe_blocksizes(bs->file, bsz);
-}
-
-static int raw_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
-{
-    return bdrv_probe_geometry(bs->file, geo);
-}
-
 BlockDriver bdrv_raw = {
    .format_name          = "raw",
    .bdrv_probe           = &raw_probe,
@@ -262,8 +190,6 @@ BlockDriver bdrv_raw = {
    .has_variable_length  = true,
    .bdrv_get_info        = &raw_get_info,
    .bdrv_refresh_limits  = &raw_refresh_limits,
-    .bdrv_probe_blocksizes = &raw_probe_blocksizes,
-    .bdrv_probe_geometry  = &raw_probe_geometry,
    .bdrv_is_inserted     = &raw_is_inserted,
    .bdrv_media_changed   = &raw_media_changed,
    .bdrv_eject           = &raw_eject,
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -68,7 +68,7 @@ typedef enum {
 } RBDAIOCmd;

 typedef struct RBDAIOCB {
-    BlockAIOCB common;
+    BlockDriverAIOCB common;
    QEMUBH *bh;
    int64_t ret;
    QEMUIOVector *qiov;
@@ -77,6 +77,7 @@ typedef struct RBDAIOCB {
    int64_t sector_num;
    int error;
    struct BDRVRBDState *s;
+    int cancelled;
    int status;
 } RBDAIOCB;

@@ -313,8 +314,7 @@ static int qemu_rbd_create(const char *filename, QemuOpts *opts, Error **errp)
    }

    /* Read out options */
-    bytes = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
-                     BDRV_SECTOR_SIZE);
+    bytes = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0);
    objsize = qemu_opt_get_size_del(opts, BLOCK_OPT_CLUSTER_SIZE, 0);
    if (objsize) {
        if ((objsize - 1) & objsize) {    /* not a power of 2? */
@@ -325,7 +325,7 @@ static int qemu_rbd_create(const char *filename, QemuOpts *opts, Error **errp)
            error_setg(errp, "obj size too small");
            return -EINVAL;
        }
-        obj_order = ctz32(objsize);
+        obj_order = ffs(objsize) - 1;
    }

    clientname = qemu_rbd_parse_clientname(conf, clientname_buf);
@@ -407,7 +407,9 @@ static void qemu_rbd_complete_aio(RADOSCB *rcb)
    acb->common.cb(acb->common.opaque, (acb->ret > 0 ? 0 : acb->ret));
    acb->status = 0;

-    qemu_aio_unref(acb);
+    if (!acb->cancelled) {
+        qemu_aio_release(acb);
+    }
 }

 /* TODO Convert to fine grained options */
@@ -459,7 +461,7 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
    clientname = qemu_rbd_parse_clientname(conf, clientname_buf);
    r = rados_create(&s->cluster, clientname);
    if (r < 0) {
-        error_setg(errp, "error initializing");
+        error_setg(&local_err, "error initializing");
        goto failed_opts;
    }

@@ -495,19 +497,19 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,

    r = rados_connect(s->cluster);
    if (r < 0) {
-        error_setg(errp, "error connecting");
+        error_setg(&local_err, "error connecting");
        goto failed_shutdown;
    }

    r = rados_ioctx_create(s->cluster, pool, &s->io_ctx);
    if (r < 0) {
-        error_setg(errp, "error opening pool %s", pool);
+        error_setg(&local_err, "error opening pool %s", pool);
        goto failed_shutdown;
    }

    r = rbd_open(s->io_ctx, s->name, &s->image, s->snap);
    if (r < 0) {
-        error_setg(errp, "error reading header from %s", s->name);
+        error_setg(&local_err, "error reading header from %s", s->name);
        goto failed_open;
    }

@@ -536,8 +538,25 @@ static void qemu_rbd_close(BlockDriverState *bs)
    rados_shutdown(s->cluster);
 }

+/*
+ * Cancel aio. Since we don't reference acb in a non qemu threads,
+ * it is safe to access it here.
+ */
+static void qemu_rbd_aio_cancel(BlockDriverAIOCB *blockacb)
+{
+    RBDAIOCB *acb = (RBDAIOCB *) blockacb;
+    acb->cancelled = 1;
+
+    while (acb->status == -EINPROGRESS) {
+        aio_poll(bdrv_get_aio_context(acb->common.bs), true);
+    }
+
+    qemu_aio_release(acb);
+}
+
 static const AIOCBInfo rbd_aiocb_info = {
    .aiocb_size = sizeof(RBDAIOCB),
+    .cancel = qemu_rbd_aio_cancel,
 };

 static void rbd_finish_bh(void *opaque)
@@ -589,16 +608,16 @@ static int rbd_aio_flush_wrapper(rbd_image_t image,
 #endif
 }

-static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
-                                 int64_t sector_num,
-                                 QEMUIOVector *qiov,
-                                 int nb_sectors,
-                                 BlockCompletionFunc *cb,
-                                 void *opaque,
-                                 RBDAIOCmd cmd)
+static BlockDriverAIOCB *rbd_start_aio(BlockDriverState *bs,
+                                       int64_t sector_num,
+                                       QEMUIOVector *qiov,
+                                       int nb_sectors,
+                                       BlockDriverCompletionFunc *cb,
+                                       void *opaque,
+                                       RBDAIOCmd cmd)
 {
    RBDAIOCB *acb;
-    RADOSCB *rcb = NULL;
+    RADOSCB *rcb;
    rbd_completion_t c;
    int64_t off, size;
    char *buf;
@@ -612,14 +631,12 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
    if (cmd == RBD_AIO_DISCARD || cmd == RBD_AIO_FLUSH) {
        acb->bounce = NULL;
    } else {
-        acb->bounce = qemu_try_blockalign(bs, qiov->size);
-        if (acb->bounce == NULL) {
-            goto failed;
-        }
+        acb->bounce = qemu_blockalign(bs, qiov->size);
    }
    acb->ret = 0;
    acb->error = 0;
    acb->s = s;
+    acb->cancelled = 0;
    acb->bh = NULL;
    acb->status = -EINPROGRESS;

@@ -632,7 +649,7 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
    off = sector_num * BDRV_SECTOR_SIZE;
    size = nb_sectors * BDRV_SECTOR_SIZE;

-    rcb = g_new(RADOSCB, 1);
+    rcb = g_malloc(sizeof(RADOSCB));
    rcb->done = 0;
    rcb->acb = acb;
    rcb->buf = buf;
@@ -671,36 +688,36 @@ failed_completion:
 failed:
    g_free(rcb);
    qemu_vfree(acb->bounce);
-    qemu_aio_unref(acb);
+    qemu_aio_release(acb);
    return NULL;
 }

-static BlockAIOCB *qemu_rbd_aio_readv(BlockDriverState *bs,
-                                      int64_t sector_num,
-                                      QEMUIOVector *qiov,
-                                      int nb_sectors,
-                                      BlockCompletionFunc *cb,
-                                      void *opaque)
+static BlockDriverAIOCB *qemu_rbd_aio_readv(BlockDriverState *bs,
+                                            int64_t sector_num,
+                                            QEMUIOVector *qiov,
+                                            int nb_sectors,
+                                            BlockDriverCompletionFunc *cb,
+                                            void *opaque)
 {
    return rbd_start_aio(bs, sector_num, qiov, nb_sectors, cb, opaque,
                         RBD_AIO_READ);
 }

-static BlockAIOCB *qemu_rbd_aio_writev(BlockDriverState *bs,
-                                       int64_t sector_num,
-                                       QEMUIOVector *qiov,
-                                       int nb_sectors,
-                                       BlockCompletionFunc *cb,
-                                       void *opaque)
+static BlockDriverAIOCB *qemu_rbd_aio_writev(BlockDriverState *bs,
+                                             int64_t sector_num,
+                                             QEMUIOVector *qiov,
+                                             int nb_sectors,
+                                             BlockDriverCompletionFunc *cb,
+                                             void *opaque)
 {
    return rbd_start_aio(bs, sector_num, qiov, nb_sectors, cb, opaque,
                         RBD_AIO_WRITE);
 }

 #ifdef LIBRBD_SUPPORTS_AIO_FLUSH
-static BlockAIOCB *qemu_rbd_aio_flush(BlockDriverState *bs,
-                                      BlockCompletionFunc *cb,
-                                      void *opaque)
+static BlockDriverAIOCB *qemu_rbd_aio_flush(BlockDriverState *bs,
+                                            BlockDriverCompletionFunc *cb,
+                                            void *opaque)
 {
    return rbd_start_aio(bs, 0, NULL, 0, cb, opaque, RBD_AIO_FLUSH);
 }
@@ -842,7 +859,7 @@ static int qemu_rbd_snap_list(BlockDriverState *bs,
    int max_snaps = RBD_MAX_SNAPS;

    do {
-        snaps = g_new(rbd_snap_info_t, max_snaps);
+        snaps = g_malloc(sizeof(*snaps) * max_snaps);
        snap_count = rbd_snap_list(s->image, snaps, &max_snaps);
        if (snap_count <= 0) {
            g_free(snaps);
@@ -853,7 +870,7 @@ static int qemu_rbd_snap_list(BlockDriverState *bs,
        goto done;
    }

-    sn_tab = g_new0(QEMUSnapshotInfo, snap_count);
+    sn_tab = g_malloc0(snap_count * sizeof(QEMUSnapshotInfo));

    for (i = 0; i < snap_count; i++) {
        const char *snap_name = snaps[i].name;
@@ -876,29 +893,17 @@ static int qemu_rbd_snap_list(BlockDriverState *bs,
 }

 #ifdef LIBRBD_SUPPORTS_DISCARD
-static BlockAIOCB* qemu_rbd_aio_discard(BlockDriverState *bs,
-                                        int64_t sector_num,
-                                        int nb_sectors,
-                                        BlockCompletionFunc *cb,
-                                        void *opaque)
+static BlockDriverAIOCB* qemu_rbd_aio_discard(BlockDriverState *bs,
+                                              int64_t sector_num,
+                                              int nb_sectors,
+                                              BlockDriverCompletionFunc *cb,
+                                              void *opaque)
 {
    return rbd_start_aio(bs, sector_num, NULL, nb_sectors, cb, opaque,
                         RBD_AIO_DISCARD);
 }
 #endif

-#ifdef LIBRBD_SUPPORTS_INVALIDATE
-static void qemu_rbd_invalidate_cache(BlockDriverState *bs,
-                                      Error **errp)
-{
-    BDRVRBDState *s = bs->opaque;
-    int r = rbd_invalidate_cache(s->image);
-    if (r < 0) {
-        error_setg_errno(errp, -r, "Failed to invalidate the cache");
-    }
-}
-#endif
-
 static QemuOptsList qemu_rbd_create_opts = {
    .name = "rbd-create-opts",
    .head = QTAILQ_HEAD_INITIALIZER(qemu_rbd_create_opts.head),
@@ -948,9 +953,6 @@ static BlockDriver bdrv_rbd = {
    .bdrv_snapshot_delete   = qemu_rbd_snap_remove,
    .bdrv_snapshot_list     = qemu_rbd_snap_list,
    .bdrv_snapshot_goto     = qemu_rbd_snap_rollback,
-#ifdef LIBRBD_SUPPORTS_INVALIDATE
-    .bdrv_invalidate_cache  = qemu_rbd_invalidate_cache,
-#endif
 };

 static void bdrv_rbd_init(void)
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -37,7 +37,6 @@
 #define SD_OP_READ_VDIS      0x15
 #define SD_OP_FLUSH_VDI      0x16
 #define SD_OP_DEL_VDI        0x17
-#define SD_OP_GET_CLUSTER_DEFAULT   0x18

 #define SD_FLAG_CMD_WRITE    0x01
 #define SD_FLAG_CMD_COW      0x02
@@ -92,7 +91,6 @@
 #define SD_NR_VDIS   (1U << 24)
 #define SD_DATA_OBJ_SIZE (UINT64_C(1) << 22)
 #define SD_MAX_VDI_SIZE (SD_DATA_OBJ_SIZE * MAX_DATA_OBJS)
-#define SD_DEFAULT_BLOCK_SIZE_SHIFT 22
 /*
 * For erasure coding, we use at most SD_EC_MAX_STRIP for data strips and
 * (SD_EC_MAX_STRIP - 1) for parity strips
@@ -105,9 +103,6 @@
 #define SD_INODE_SIZE (sizeof(SheepdogInode))
 #define CURRENT_VDI_ID 0

-#define LOCK_TYPE_NORMAL 0
-#define LOCK_TYPE_SHARED 1      /* for iSCSI multipath */
-
 typedef struct SheepdogReq {
    uint8_t proto_ver;
    uint8_t opcode;
@@ -169,11 +164,9 @@ typedef struct SheepdogVdiReq {
    uint32_t base_vdi_id;
    uint8_t copies;
    uint8_t copy_policy;
-    uint8_t store_policy;
-    uint8_t block_size_shift;
+    uint8_t reserved[2];
    uint32_t snapid;
-    uint32_t type;
-    uint32_t pad[2];
+    uint32_t pad[3];
 } SheepdogVdiReq;

 typedef struct SheepdogVdiRsp {
@@ -189,21 +182,6 @@ typedef struct SheepdogVdiRsp {
    uint32_t pad[5];
 } SheepdogVdiRsp;

-typedef struct SheepdogClusterRsp {
-    uint8_t proto_ver;
-    uint8_t opcode;
-    uint16_t flags;
-    uint32_t epoch;
-    uint32_t id;
-    uint32_t data_length;
-    uint32_t result;
-    uint8_t nr_copies;
-    uint8_t copy_policy;
-    uint8_t block_size_shift;
-    uint8_t __pad1;
-    uint32_t __pad2[6];
-} SheepdogClusterRsp;
-
 typedef struct SheepdogInode {
    char name[SD_MAX_VDI_LEN];
    char tag[SD_MAX_VDI_TAG_LEN];
@@ -319,7 +297,7 @@ enum AIOCBState {
 };

 struct SheepdogAIOCB {
-    BlockAIOCB common;
+    BlockDriverAIOCB common;

    QEMUIOVector *qiov;

@@ -333,6 +311,7 @@ struct SheepdogAIOCB {
    void (*aio_done_func)(SheepdogAIOCB *);

    bool cancelable;
+    bool *finished;
    int nr_pending;
 };

@@ -463,7 +442,10 @@ static inline void free_aio_req(BDRVSheepdogState *s, AIOReq *aio_req)
 static void coroutine_fn sd_finish_aiocb(SheepdogAIOCB *acb)
 {
    qemu_coroutine_enter(acb->coroutine, NULL);
-    qemu_aio_unref(acb);
+    if (acb->finished) {
+        *acb->finished = true;
+    }
+    qemu_aio_release(acb);
 }

 /*
@@ -491,38 +473,41 @@ static bool sd_acb_cancelable(const SheepdogAIOCB *acb)
    return true;
 }

-static void sd_aio_cancel(BlockAIOCB *blockacb)
+static void sd_aio_cancel(BlockDriverAIOCB *blockacb)
 {
    SheepdogAIOCB *acb = (SheepdogAIOCB *)blockacb;
    BDRVSheepdogState *s = acb->common.bs->opaque;
    AIOReq *aioreq, *next;
+    bool finished = false;

-    if (sd_acb_cancelable(acb)) {
-        /* Remove outstanding requests from pending and failed queues.  */
-        QLIST_FOREACH_SAFE(aioreq, &s->pending_aio_head, aio_siblings,
-                           next) {
-            if (aioreq->aiocb == acb) {
-                free_aio_req(s, aioreq);
+    acb->finished = &finished;
+    while (!finished) {
+        if (sd_acb_cancelable(acb)) {
+            /* Remove outstanding requests from pending and failed queues.  */
+            QLIST_FOREACH_SAFE(aioreq, &s->pending_aio_head, aio_siblings,
+                               next) {
+                if (aioreq->aiocb == acb) {
+                    free_aio_req(s, aioreq);
+                }
            }
-        }
-        QLIST_FOREACH_SAFE(aioreq, &s->failed_aio_head, aio_siblings,
-                           next) {
-            if (aioreq->aiocb == acb) {
-                free_aio_req(s, aioreq);
+            QLIST_FOREACH_SAFE(aioreq, &s->failed_aio_head, aio_siblings,
+                               next) {
+                if (aioreq->aiocb == acb) {
+                    free_aio_req(s, aioreq);
+                }
            }
-        }

-        assert(acb->nr_pending == 0);
-        if (acb->common.cb) {
-            acb->common.cb(acb->common.opaque, -ECANCELED);
+            assert(acb->nr_pending == 0);
+            sd_finish_aiocb(acb);
+            return;
        }
-        sd_finish_aiocb(acb);
+        aio_poll(s->aio_context, true);
    }
 }

 static const AIOCBInfo sd_aiocb_info = {
-    .aiocb_size     = sizeof(SheepdogAIOCB),
-    .cancel_async   = sd_aio_cancel,
+    .aiocb_size = sizeof(SheepdogAIOCB),
+    .cancel = sd_aio_cancel,
 };

 static SheepdogAIOCB *sd_aio_setup(BlockDriverState *bs, QEMUIOVector *qiov,
@@ -539,13 +524,13 @@ static SheepdogAIOCB *sd_aio_setup(BlockDriverState *bs, QEMUIOVector *qiov,

    acb->aio_done_func = NULL;
    acb->cancelable = true;
+    acb->finished = NULL;
    acb->coroutine = qemu_coroutine_self();
    acb->ret = 0;
    acb->nr_pending = 0;
    return acb;
 }

-/* Return -EIO in case of error, file descriptor on success */
 static int connect_to_sdog(BDRVSheepdogState *s, Error **errp)
 {
    int fd;
@@ -565,14 +550,11 @@ static int connect_to_sdog(BDRVSheepdogState *s, Error **errp)

    if (fd >= 0) {
        qemu_set_nonblock(fd);
-    } else {
-        fd = -EIO;
    }

    return fd;
 }

-/* Return 0 on success and -errno in case of error */
 static coroutine_fn int send_co_req(int sockfd, SheepdogReq *hdr, void *data,
                                    unsigned int *wlen)
 {
@@ -581,13 +563,11 @@ static coroutine_fn int send_co_req(int sockfd, SheepdogReq *hdr, void *data,
    ret = qemu_co_send(sockfd, hdr, sizeof(*hdr));
    if (ret != sizeof(*hdr)) {
        error_report("failed to send a req, %s", strerror(errno));
-        ret = -socket_error();
        return ret;
    }

    ret = qemu_co_send(sockfd, data, *wlen);
    if (ret != *wlen) {
-        ret = -socket_error();
        error_report("failed to send a req, %s", strerror(errno));
    }

@@ -662,11 +642,6 @@ out:
    srco->finished = true;
 }

-/*
- * Send the request to the sheep in a synchronous manner.
- *
- * Return 0 on success, -errno in case of error.
- */
 static int do_req(int sockfd, AioContext *aio_context, SheepdogReq *hdr,
                  void *data, unsigned int *wlen, unsigned int *rlen)
 {
@@ -737,6 +712,7 @@ static void coroutine_fn send_pending_req(BDRVSheepdogState *s, uint64_t oid)

 static coroutine_fn void reconnect_to_sdog(void *opaque)
 {
+    Error *local_err = NULL;
    BDRVSheepdogState *s = opaque;
    AIOReq *aio_req, *next;

@@ -751,11 +727,11 @@ static coroutine_fn void reconnect_to_sdog(void *opaque)

    /* Try to reconnect the sheepdog server every one second. */
    while (s->fd < 0) {
-        Error *local_err = NULL;
        s->fd = get_sheep_fd(s, &local_err);
        if (s->fd < 0) {
            DPRINTF("Wait for connection to be established\n");
-            error_report_err(local_err);
+            error_report("%s", error_get_pretty(local_err));
+            error_free(local_err);
            co_aio_sleep_ns(bdrv_get_aio_context(s->bs), QEMU_CLOCK_REALTIME,
                            1000000000ULL);
        }
@@ -1114,7 +1090,6 @@ static int find_vdi_name(BDRVSheepdogState *s, const char *filename,
    memset(&hdr, 0, sizeof(hdr));
    if (lock) {
        hdr.opcode = SD_OP_LOCK_VDI;
-        hdr.type = LOCK_TYPE_NORMAL;
    } else {
        hdr.opcode = SD_OP_GET_VDI_INFO;
    }
@@ -1135,8 +1110,6 @@ static int find_vdi_name(BDRVSheepdogState *s, const char *filename,
                   sd_strerror(rsp->result), filename, snapid, tag);
        if (rsp->result == SD_RES_NO_VDI) {
            ret = -ENOENT;
-        } else if (rsp->result == SD_RES_VDI_LOCKED) {
-            ret = -EBUSY;
        } else {
            ret = -EIO;
        }
@@ -1311,7 +1284,8 @@ static int reload_inode(BDRVSheepdogState *s, uint32_t snapid, const char *tag)

    fd = connect_to_sdog(s, &local_err);
    if (fd < 0) {
-        error_report_err(local_err);
+        error_report("%s", error_get_pretty(local_err));;
+        error_free(local_err);
        return -EIO;
    }

@@ -1319,7 +1293,8 @@ static int reload_inode(BDRVSheepdogState *s, uint32_t snapid, const char *tag)

    ret = find_vdi_name(s, s->name, snapid, tag, &vid, false, &local_err);
    if (ret) {
-        error_report_err(local_err);
+        error_report("%s", error_get_pretty(local_err));;
+        error_free(local_err);
        goto out;
    }

@@ -1570,7 +1545,6 @@ static int do_sd_create(BDRVSheepdogState *s, uint32_t *vdi_id, int snapshot,
    hdr.vdi_size = s->inode.vdi_size;
    hdr.copy_policy = s->inode.copy_policy;
    hdr.copies = s->inode.nr_copies;
-    hdr.block_size_shift = s->inode.block_size_shift;

    ret = do_req(fd, s->aio_context, (SheepdogReq *)&hdr, buf, &wlen, &rlen);

@@ -1596,12 +1570,9 @@ static int do_sd_create(BDRVSheepdogState *s, uint32_t *vdi_id, int snapshot,
 static int sd_prealloc(const char *filename, Error **errp)
 {
    BlockDriverState *bs = NULL;
-    BDRVSheepdogState *base = NULL;
-    unsigned long buf_size;
    uint32_t idx, max_idx;
-    uint32_t object_size;
    int64_t vdi_size;
-    void *buf = NULL;
+    void *buf = g_malloc0(SD_DATA_OBJ_SIZE);
    int ret;

    ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL,
@@ -1615,24 +1586,18 @@ static int sd_prealloc(const char *filename, Error **errp)
        ret = vdi_size;
        goto out;
    }
-
-    base = bs->opaque;
-    object_size = (UINT32_C(1) << base->inode.block_size_shift);
-    buf_size = MIN(object_size, SD_DATA_OBJ_SIZE);
-    buf = g_malloc0(buf_size);
-
-    max_idx = DIV_ROUND_UP(vdi_size, buf_size);
+    max_idx = DIV_ROUND_UP(vdi_size, SD_DATA_OBJ_SIZE);

    for (idx = 0; idx < max_idx; idx++) {
        /*
         * The created image can be a cloned image, so we need to read
         * a data from the source image.
         */
-        ret = bdrv_pread(bs, idx * buf_size, buf, buf_size);
+        ret = bdrv_pread(bs, idx * SD_DATA_OBJ_SIZE, buf, SD_DATA_OBJ_SIZE);
        if (ret < 0) {
            goto out;
        }
-        ret = bdrv_pwrite(bs, idx * buf_size, buf, buf_size);
+        ret = bdrv_pwrite(bs, idx * SD_DATA_OBJ_SIZE, buf, SD_DATA_OBJ_SIZE);
        if (ret < 0) {
            goto out;
        }
@@ -1705,27 +1670,6 @@ static int parse_redundancy(BDRVSheepdogState *s, const char *opt)
    return 0;
 }

-static int parse_block_size_shift(BDRVSheepdogState *s, QemuOpts *opt)
-{
-    struct SheepdogInode *inode = &s->inode;
-    uint64_t object_size;
-    int obj_order;
-
-    object_size = qemu_opt_get_size_del(opt, BLOCK_OPT_OBJECT_SIZE, 0);
-    if (object_size) {
-        if ((object_size - 1) & object_size) {    /* not a power of 2? */
-            return -EINVAL;
-        }
-        obj_order = ctz32(object_size);
-        if (obj_order < 20 || obj_order > 31) {
-            return -EINVAL;
-        }
-        inode->block_size_shift = (uint8_t)obj_order;
-    }
-
-    return 0;
-}
-
 static int sd_create(const char *filename, QemuOpts *opts,
                     Error **errp)
 {
@@ -1736,10 +1680,9 @@ static int sd_create(const char *filename, QemuOpts *opts,
    BDRVSheepdogState *s;
    char tag[SD_MAX_VDI_TAG_LEN];
    uint32_t snapid;
-    uint64_t max_vdi_size;
    bool prealloc = false;

-    s = g_new0(BDRVSheepdogState, 1);
+    s = g_malloc0(sizeof(BDRVSheepdogState));

    memset(tag, 0, sizeof(tag));
    if (strstr(filename, "://")) {
@@ -1752,8 +1695,7 @@ static int sd_create(const char *filename, QemuOpts *opts,
        goto out;
    }

-    s->inode.vdi_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
-                                 BDRV_SECTOR_SIZE);
+    s->inode.vdi_size = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0);
    backing_file = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FILE);
    buf = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC);
    if (!buf || !strcmp(buf, "off")) {
@@ -1775,11 +1717,10 @@ static int sd_create(const char *filename, QemuOpts *opts,
            goto out;
        }
    }
-    ret = parse_block_size_shift(s, opts);
-    if (ret < 0) {
-        error_setg(errp, "Invalid object_size."
-                         " obect_size needs to be power of 2"
-                         " and be limited from 2^20 to 2^31");
+
+    if (s->inode.vdi_size > SD_MAX_VDI_SIZE) {
+        error_setg(errp, "too big image size");
+        ret = -EINVAL;
        goto out;
    }

@@ -1789,7 +1730,7 @@ static int sd_create(const char *filename, QemuOpts *opts,
        BlockDriver *drv;

        /* Currently, only Sheepdog backing image is supported. */
-        drv = bdrv_find_protocol(backing_file, true, NULL);
+        drv = bdrv_find_protocol(backing_file, true);
        if (!drv || strcmp(drv->protocol_name, "sheepdog") != 0) {
            error_setg(errp, "backing_file must be a sheepdog image");
            ret = -EINVAL;
@@ -1816,51 +1757,6 @@ static int sd_create(const char *filename, QemuOpts *opts,
    }

    s->aio_context = qemu_get_aio_context();
-
-    /* if block_size_shift is not specified, get cluster default value */
-    if (s->inode.block_size_shift == 0) {
-        SheepdogVdiReq hdr;
-        SheepdogClusterRsp *rsp = (SheepdogClusterRsp *)&hdr;
-        Error *local_err = NULL;
-        int fd;
-        unsigned int wlen = 0, rlen = 0;
-
-        fd = connect_to_sdog(s, &local_err);
-        if (fd < 0) {
-            error_report("%s", error_get_pretty(local_err));
-            error_free(local_err);
-            ret = -EIO;
-            goto out;
-        }
-
-        memset(&hdr, 0, sizeof(hdr));
-        hdr.opcode = SD_OP_GET_CLUSTER_DEFAULT;
-        hdr.proto_ver = SD_PROTO_VER;
-
-        ret = do_req(fd, s->aio_context, (SheepdogReq *)&hdr,
-                     NULL, &wlen, &rlen);
-        closesocket(fd);
-        if (ret) {
-            error_setg_errno(errp, -ret, "failed to get cluster default");
-            goto out;
-        }
-        if (rsp->result == SD_RES_SUCCESS) {
-            s->inode.block_size_shift = rsp->block_size_shift;
-        } else {
-            s->inode.block_size_shift = SD_DEFAULT_BLOCK_SIZE_SHIFT;
-        }
-    }
-
-    max_vdi_size = (UINT64_C(1) << s->inode.block_size_shift) * MAX_DATA_OBJS;
-
-    if (s->inode.vdi_size > max_vdi_size) {
-        error_setg(errp, "An image is too large."
-                         " The maximum image size is %"PRIu64 "GB",
-                         max_vdi_size / 1024 / 1024 / 1024);
-        ret = -EINVAL;
-        goto out;
-    }
-
    ret = do_sd_create(s, &vid, 0, errp);
    if (ret) {
        goto out;
@@ -1889,14 +1785,14 @@ static void sd_close(BlockDriverState *bs)

    fd = connect_to_sdog(s, &local_err);
    if (fd < 0) {
-        error_report_err(local_err);
+        error_report("%s", error_get_pretty(local_err));;
+        error_free(local_err);
        return;
    }

    memset(&hdr, 0, sizeof(hdr));

    hdr.opcode = SD_OP_RELEASE_VDI;
-    hdr.type = LOCK_TYPE_NORMAL;
    hdr.base_vdi_id = s->inode.vdi_id;
    wlen = strlen(s->name) + 1;
    hdr.data_length = wlen;
@@ -1930,20 +1826,19 @@ static int sd_truncate(BlockDriverState *bs, int64_t offset)
    BDRVSheepdogState *s = bs->opaque;
    int ret, fd;
    unsigned int datalen;
-    uint64_t max_vdi_size;

-    max_vdi_size = (UINT64_C(1) << s->inode.block_size_shift) * MAX_DATA_OBJS;
    if (offset < s->inode.vdi_size) {
        error_report("shrinking is not supported");
        return -EINVAL;
-    } else if (offset > max_vdi_size) {
+    } else if (offset > SD_MAX_VDI_SIZE) {
        error_report("too big image size");
        return -EINVAL;
    }

    fd = connect_to_sdog(s, &local_err);
    if (fd < 0) {
-        error_report_err(local_err);
+        error_report("%s", error_get_pretty(local_err));;
+        error_free(local_err);
        return fd;
    }

@@ -2016,7 +1911,8 @@ static bool sd_delete(BDRVSheepdogState *s)

    fd = connect_to_sdog(s, &local_err);
    if (fd < 0) {
-        error_report_err(local_err);
+        error_report("%s", error_get_pretty(local_err));;
+        error_free(local_err);
        return false;
    }

@@ -2063,7 +1959,8 @@ static int sd_create_branch(BDRVSheepdogState *s)
    deleted = sd_delete(s);
    ret = do_sd_create(s, &vid, !deleted, &local_err);
    if (ret) {
-        error_report_err(local_err);
+        error_report("%s", error_get_pretty(local_err));;
+        error_free(local_err);
        goto out;
    }

@@ -2071,7 +1968,8 @@ static int sd_create_branch(BDRVSheepdogState *s)

    fd = connect_to_sdog(s, &local_err);
    if (fd < 0) {
-        error_report_err(local_err);
+        error_report("%s", error_get_pretty(local_err));;
+        error_free(local_err);
        ret = fd;
        goto out;
    }
@@ -2114,10 +2012,9 @@ static int coroutine_fn sd_co_rw_vector(void *p)
    SheepdogAIOCB *acb = p;
    int ret = 0;
    unsigned long len, done = 0, total = acb->nb_sectors * BDRV_SECTOR_SIZE;
-    unsigned long idx;
-    uint32_t object_size;
+    unsigned long idx = acb->sector_num * BDRV_SECTOR_SIZE / SD_DATA_OBJ_SIZE;
    uint64_t oid;
-    uint64_t offset;
+    uint64_t offset = (acb->sector_num * BDRV_SECTOR_SIZE) % SD_DATA_OBJ_SIZE;
    BDRVSheepdogState *s = acb->common.bs->opaque;
    SheepdogInode *inode = &s->inode;
    AIOReq *aio_req;
@@ -2134,10 +2031,6 @@ static int coroutine_fn sd_co_rw_vector(void *p)
        }
    }

-    object_size = (UINT32_C(1) << inode->block_size_shift);
-    idx = acb->sector_num * BDRV_SECTOR_SIZE / object_size;
-    offset = (acb->sector_num * BDRV_SECTOR_SIZE) % object_size;
-
    /*
     * Make sure we don't free the aiocb before we are done with all requests.
     * This additional reference is dropped at the end of this function.
@@ -2151,7 +2044,7 @@ static int coroutine_fn sd_co_rw_vector(void *p)

        oid = vid_to_data_oid(inode->data_vdi_id[idx], idx);

-        len = MIN(total - done, object_size - offset);
+        len = MIN(total - done, SD_DATA_OBJ_SIZE - offset);

        switch (acb->aiocb_type) {
        case AIOCB_READ_UDATA:
@@ -2175,7 +2068,7 @@ static int coroutine_fn sd_co_rw_vector(void *p)
             * We discard the object only when the whole object is
             * 1) allocated 2) trimmed. Otherwise, simply skip it.
             */
-            if (len != object_size || inode->data_vdi_id[idx] == 0) {
+            if (len != SD_DATA_OBJ_SIZE || inode->data_vdi_id[idx] == 0) {
                goto done;
            }
            break;
@@ -2223,7 +2116,7 @@ static coroutine_fn int sd_co_writev(BlockDriverState *bs, int64_t sector_num,
    int64_t offset = (sector_num + nb_sectors) * BDRV_SECTOR_SIZE;
    BDRVSheepdogState *s = bs->opaque;

-    if (offset > s->inode.vdi_size) {
+    if (bs->growable && offset > s->inode.vdi_size) {
        ret = sd_truncate(bs, offset);
        if (ret < 0) {
            return ret;
@@ -2236,7 +2129,7 @@ static coroutine_fn int sd_co_writev(BlockDriverState *bs, int64_t sector_num,

    ret = sd_co_rw_vector(acb);
    if (ret <= 0) {
-        qemu_aio_unref(acb);
+        qemu_aio_release(acb);
        return ret;
    }

@@ -2257,7 +2150,7 @@ static coroutine_fn int sd_co_readv(BlockDriverState *bs, int64_t sector_num,

    ret = sd_co_rw_vector(acb);
    if (ret <= 0) {
-        qemu_aio_unref(acb);
+        qemu_aio_release(acb);
        return ret;
    }

@@ -2324,7 +2217,8 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
    /* refresh inode. */
    fd = connect_to_sdog(s, &local_err);
    if (fd < 0) {
-        error_report_err(local_err);
+        error_report("%s", error_get_pretty(local_err));;
+        error_free(local_err);
        ret = fd;
        goto cleanup;
    }
@@ -2339,9 +2233,10 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)

    ret = do_sd_create(s, &new_vid, 1, &local_err);
    if (ret < 0) {
-        error_report("failed to create inode for snapshot: %s",
-                     error_get_pretty(local_err));
+        error_report("%s", error_get_pretty(local_err));;
        error_free(local_err);
+        error_report("failed to create inode for snapshot. %s",
+                     strerror(errno));
        goto cleanup;
    }

@@ -2378,7 +2273,7 @@ static int sd_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
    uint32_t snapid = 0;
    int ret = 0;

-    old_s = g_new(BDRVSheepdogState, 1);
+    old_s = g_malloc(sizeof(BDRVSheepdogState));

    memcpy(old_s, s, sizeof(BDRVSheepdogState));

@@ -2440,7 +2335,8 @@ static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)

    fd = connect_to_sdog(s, &local_err);
    if (fd < 0) {
-        error_report_err(local_err);
+        error_report("%s", error_get_pretty(local_err));;
+        error_free(local_err);
        ret = fd;
        goto out;
    }
@@ -2461,7 +2357,7 @@ static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
        goto out;
    }

-    sn_tab = g_new0(QEMUSnapshotInfo, nr);
+    sn_tab = g_malloc0(nr * sizeof(*sn_tab));

    /* calculate a vdi id with hash function */
    hval = fnv_64a_buf(s->name, strlen(s->name), FNV1A_64_INIT);
@@ -2469,7 +2365,8 @@ static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)

    fd = connect_to_sdog(s, &local_err);
    if (fd < 0) {
-        error_report_err(local_err);
+        error_report("%s", error_get_pretty(local_err));;
+        error_free(local_err);
        ret = fd;
        goto out;
    }
@@ -2528,19 +2425,19 @@ static int do_load_save_vmstate(BDRVSheepdogState *s, uint8_t *data,
    uint64_t offset;
    uint32_t vdi_index;
    uint32_t vdi_id = load ? s->inode.parent_vdi_id : s->inode.vdi_id;
-    uint32_t object_size = (UINT32_C(1) << s->inode.block_size_shift);

    fd = connect_to_sdog(s, &local_err);
    if (fd < 0) {
-        error_report_err(local_err);
+        error_report("%s", error_get_pretty(local_err));;
+        error_free(local_err);
        return fd;
    }

    while (remaining) {
-        vdi_index = pos / object_size;
-        offset = pos % object_size;
+        vdi_index = pos / SD_DATA_OBJ_SIZE;
+        offset = pos % SD_DATA_OBJ_SIZE;

-        data_len = MIN(remaining, object_size - offset);
+        data_len = MIN(remaining, SD_DATA_OBJ_SIZE - offset);

        vmstate_oid = vid_to_vmstate_oid(vdi_id, vdi_index);

@@ -2612,7 +2509,7 @@ static coroutine_fn int sd_co_discard(BlockDriverState *bs, int64_t sector_num,

    ret = sd_co_rw_vector(acb);
    if (ret <= 0) {
-        qemu_aio_unref(acb);
+        qemu_aio_release(acb);
        return ret;
    }

@@ -2627,11 +2524,10 @@ sd_co_get_block_status(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
 {
    BDRVSheepdogState *s = bs->opaque;
    SheepdogInode *inode = &s->inode;
-    uint32_t object_size = (UINT32_C(1) << inode->block_size_shift);
    uint64_t offset = sector_num * BDRV_SECTOR_SIZE;
-    unsigned long start = offset / object_size,
+    unsigned long start = offset / SD_DATA_OBJ_SIZE,
                  end = DIV_ROUND_UP((sector_num + nb_sectors) *
-                                     BDRV_SECTOR_SIZE, object_size);
+                                     BDRV_SECTOR_SIZE, SD_DATA_OBJ_SIZE);
    unsigned long idx;
    int64_t ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | offset;

@@ -2650,7 +2546,7 @@ sd_co_get_block_status(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
        }
    }

-    *pnum = (idx - start) * object_size / BDRV_SECTOR_SIZE;
+    *pnum = (idx - start) * SD_DATA_OBJ_SIZE / BDRV_SECTOR_SIZE;
    if (*pnum > nb_sectors) {
        *pnum = nb_sectors;
    }
@@ -2661,15 +2557,14 @@ static int64_t sd_get_allocated_file_size(BlockDriverState *bs)
 {
    BDRVSheepdogState *s = bs->opaque;
    SheepdogInode *inode = &s->inode;
-    uint32_t object_size = (UINT32_C(1) << inode->block_size_shift);
-    unsigned long i, last = DIV_ROUND_UP(inode->vdi_size, object_size);
+    unsigned long i, last = DIV_ROUND_UP(inode->vdi_size, SD_DATA_OBJ_SIZE);
    uint64_t size = 0;

    for (i = 0; i < last; i++) {
        if (inode->data_vdi_id[i] == 0) {
            continue;
        }
-        size += object_size;
+        size += SD_DATA_OBJ_SIZE;
    }
    return size;
 }
@@ -2698,11 +2593,6 @@ static QemuOptsList sd_create_opts = {
            .type = QEMU_OPT_STRING,
            .help = "Redundancy of the image"
        },
-        {
-            .name = BLOCK_OPT_OBJECT_SIZE,
-            .type = QEMU_OPT_SIZE,
-            .help = "Object size of the image"
-        },
        { /* end of list */ }
    }
 };
--- a/block/snapshot.c
+++ b/block/snapshot.c
@@ -246,9 +246,9 @@ int bdrv_snapshot_delete(BlockDriverState *bs,
    if (bs->file) {
        return bdrv_snapshot_delete(bs->file, snapshot_id, name, errp);
    }
-    error_setg(errp, "Block format '%s' used by device '%s' "
-               "does not support internal snapshot deletion",
-               drv->format_name, bdrv_get_device_name(bs));
+    error_set(errp, QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
+              drv->format_name, bdrv_get_device_name(bs),
+              "internal snapshot deletion");
    return -ENOTSUP;
 }

@@ -329,9 +329,9 @@ int bdrv_snapshot_load_tmp(BlockDriverState *bs,
    if (drv->bdrv_snapshot_load_tmp) {
        return drv->bdrv_snapshot_load_tmp(bs, snapshot_id, name, errp);
    }
-    error_setg(errp, "Block format '%s' used by device '%s' "
-               "does not support temporarily loading internal snapshots",
-               drv->format_name, bdrv_get_device_name(bs));
+    error_set(errp, QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
+              drv->format_name, bdrv_get_device_name(bs),
+              "temporarily load internal snapshot");
    return -ENOTSUP;
 }

--- a/block/ssh.c
+++ b/block/ssh.c
@@ -517,11 +517,6 @@ static int connect_to_ssh(BDRVSSHState *s, QDict *options,
    const char *host, *user, *path, *host_key_check;
    int port;

-    if (!qdict_haskey(options, "host")) {
-        ret = -EINVAL;
-        error_setg(errp, "No hostname was specified");
-        goto err;
-    }
    host = qdict_get_str(options, "host");

    if (qdict_haskey(options, "port")) {
@@ -530,11 +525,6 @@ static int connect_to_ssh(BDRVSSHState *s, QDict *options,
        port = 22;
    }

-    if (!qdict_haskey(options, "path")) {
-        ret = -EINVAL;
-        error_setg(errp, "No path was specified");
-        goto err;
-    }
    path = qdict_get_str(options, "path");

    if (qdict_haskey(options, "user")) {
@@ -710,8 +700,7 @@ static int ssh_create(const char *filename, QemuOpts *opts, Error **errp)
    ssh_state_init(&s);

    /* Get desired file size. */
-    total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
-                          BDRV_SECTOR_SIZE);
+    total_size = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0);
    DPRINTF("total_size=%" PRIi64, total_size);

    uri_options = qdict_new();
--- a/block/stream.c
+++ b/block/stream.c
@@ -79,39 +79,9 @@ static void close_unused_images(BlockDriverState *top, BlockDriverState *base,
    bdrv_refresh_limits(top, NULL);
 }

-typedef struct {
-    int ret;
-    bool reached_end;
-} StreamCompleteData;
-
-static void stream_complete(BlockJob *job, void *opaque)
-{
-    StreamBlockJob *s = container_of(job, StreamBlockJob, common);
-    StreamCompleteData *data = opaque;
-    BlockDriverState *base = s->base;
-
-    if (!block_job_is_cancelled(&s->common) && data->reached_end &&
-        data->ret == 0) {
-        const char *base_id = NULL, *base_fmt = NULL;
-        if (base) {
-            base_id = s->backing_file_str;
-            if (base->drv) {
-                base_fmt = base->drv->format_name;
-            }
-        }
-        data->ret = bdrv_change_backing_file(job->bs, base_id, base_fmt);
-        close_unused_images(job->bs, base, base_id);
-    }
-
-    g_free(s->backing_file_str);
-    block_job_completed(&s->common, data->ret);
-    g_free(data);
-}
-
 static void coroutine_fn stream_run(void *opaque)
 {
    StreamBlockJob *s = opaque;
-    StreamCompleteData *data;
    BlockDriverState *bs = s->common.bs;
    BlockDriverState *base = s->base;
    int64_t sector_num, end;
@@ -213,13 +183,21 @@ wait:
    /* Do not remove the backing file if an error was there but ignored.  */
    ret = error;

-    qemu_vfree(buf);
+    if (!block_job_is_cancelled(&s->common) && sector_num == end && ret == 0) {
+        const char *base_id = NULL, *base_fmt = NULL;
+        if (base) {
+            base_id = s->backing_file_str;
+            if (base->drv) {
+                base_fmt = base->drv->format_name;
+            }
+        }
+        ret = bdrv_change_backing_file(bs, base_id, base_fmt);
+        close_unused_images(bs, base, base_id);
+    }

-    /* Modify backing chain and close BDSes in main loop */
-    data = g_malloc(sizeof(*data));
-    data->ret = ret;
-    data->reached_end = sector_num == end;
-    block_job_defer_to_main_loop(&s->common, stream_complete, data);
+    qemu_vfree(buf);
+    g_free(s->backing_file_str);
+    block_job_completed(&s->common, ret);
 }

 static void stream_set_speed(BlockJob *job, int64_t speed, Error **errp)
@@ -242,7 +220,7 @@ static const BlockJobDriver stream_job_driver = {
 void stream_start(BlockDriverState *bs, BlockDriverState *base,
                  const char *backing_file_str, int64_t speed,
                  BlockdevOnError on_error,
-                  BlockCompletionFunc *cb,
+                  BlockDriverCompletionFunc *cb,
                  void *opaque, Error **errp)
 {
    StreamBlockJob *s;
--- a/block/vdi.c
+++ b/block/vdi.c
@@ -53,7 +53,13 @@
 #include "block/block_int.h"
 #include "qemu/module.h"
 #include "migration/migration.h"
-#include "block/coroutine.h"
+#ifdef __linux__
+#include <linux/fs.h>
+#include <sys/ioctl.h>
+#ifndef FS_NOCOW_FL
+#define FS_NOCOW_FL                     0x00800000 /* Do not cow file */
+#endif
+#endif

 #if defined(CONFIG_UUID)
 #include <uuid/uuid.h>
@@ -121,18 +127,8 @@ typedef unsigned char uuid_t[16];

 #define VDI_IS_ALLOCATED(X) ((X) < VDI_DISCARDED)

-/* The bmap will take up VDI_BLOCKS_IN_IMAGE_MAX * sizeof(uint32_t) bytes; since
- * the bmap is read and written in a single operation, its size needs to be
- * limited to INT_MAX; furthermore, when opening an image, the bmap size is
- * rounded up to be aligned on BDRV_SECTOR_SIZE.
- * Therefore this should satisfy the following:
- * VDI_BLOCKS_IN_IMAGE_MAX * sizeof(uint32_t) + BDRV_SECTOR_SIZE == INT_MAX + 1
- * (INT_MAX + 1 is the first value not representable as an int)
- * This guarantees that any value below or equal to the constant will, when
- * multiplied by sizeof(uint32_t) and rounded up to a BDRV_SECTOR_SIZE boundary,
- * still be below or equal to INT_MAX. */
-#define VDI_BLOCKS_IN_IMAGE_MAX \
-    ((unsigned)((INT_MAX + 1u - BDRV_SECTOR_SIZE) / sizeof(uint32_t)))
+/* max blocks in image is (0xffffffff / 4) */
+#define VDI_BLOCKS_IN_IMAGE_MAX  0x3fffffff
 #define VDI_DISK_SIZE_MAX        ((uint64_t)VDI_BLOCKS_IN_IMAGE_MAX * \
                                  (uint64_t)DEFAULT_CLUSTER_SIZE)

@@ -148,14 +144,12 @@ static inline int uuid_is_null(const uuid_t uu)
    return memcmp(uu, null_uuid, sizeof(uuid_t)) == 0;
 }

-# if defined(CONFIG_VDI_DEBUG)
 static inline void uuid_unparse(const uuid_t uu, char *out)
 {
    snprintf(out, 37, UUID_FMT,
            uu[0], uu[1], uu[2], uu[3], uu[4], uu[5], uu[6], uu[7],
            uu[8], uu[9], uu[10], uu[11], uu[12], uu[13], uu[14], uu[15]);
 }
-# endif
 #endif

 typedef struct {
@@ -197,8 +191,6 @@ typedef struct {
    /* VDI header (converted to host endianness). */
    VdiHeader header;

-    CoMutex write_lock;
-
    Error *migration_blocker;
 } BDRVVdiState;

@@ -307,12 +299,7 @@ static int vdi_check(BlockDriverState *bs, BdrvCheckResult *res,
        return -ENOTSUP;
    }

-    bmap = g_try_new(uint32_t, s->header.blocks_in_image);
-    if (s->header.blocks_in_image && bmap == NULL) {
-        res->check_errors++;
-        return -ENOMEM;
-    }
-
+    bmap = g_malloc(s->header.blocks_in_image * sizeof(uint32_t));
    memset(bmap, 0xff, s->header.blocks_in_image * sizeof(uint32_t));

    /* Check block map and value of blocks_allocated. */
@@ -370,23 +357,23 @@ static int vdi_make_empty(BlockDriverState *bs)
 static int vdi_probe(const uint8_t *buf, int buf_size, const char *filename)
 {
    const VdiHeader *header = (const VdiHeader *)buf;
-    int ret = 0;
+    int result = 0;

    logout("\n");

    if (buf_size < sizeof(*header)) {
        /* Header too small, no VDI. */
    } else if (le32_to_cpu(header->signature) == VDI_SIGNATURE) {
-        ret = 100;
+        result = 100;
    }

-    if (ret == 0) {
+    if (result == 0) {
        logout("no vdi image\n");
    } else {
        logout("%s", header->text);
    }

-    return ret;
+    return result;
 }

 static int vdi_open(BlockDriverState *bs, QDict *options, int flags,
@@ -422,7 +409,8 @@ static int vdi_open(BlockDriverState *bs, QDict *options, int flags,
           We accept them but round the disk size to the next multiple of
           SECTOR_SIZE. */
        logout("odd disk size %" PRIu64 " B, round up\n", header.disk_size);
-        header.disk_size = ROUND_UP(header.disk_size, SECTOR_SIZE);
+        header.disk_size += SECTOR_SIZE - 1;
+        header.disk_size &= ~(SECTOR_SIZE - 1);
    }

    if (header.signature != VDI_SIGNATURE) {
@@ -489,30 +477,23 @@ static int vdi_open(BlockDriverState *bs, QDict *options, int flags,
    s->header = header;

    bmap_size = header.blocks_in_image * sizeof(uint32_t);
-    bmap_size = DIV_ROUND_UP(bmap_size, SECTOR_SIZE);
-    s->bmap = qemu_try_blockalign(bs->file, bmap_size * SECTOR_SIZE);
-    if (s->bmap == NULL) {
-        ret = -ENOMEM;
-        goto fail;
-    }
-
+    bmap_size = (bmap_size + SECTOR_SIZE - 1) / SECTOR_SIZE;
+    s->bmap = g_malloc(bmap_size * SECTOR_SIZE);
    ret = bdrv_read(bs->file, s->bmap_sector, (uint8_t *)s->bmap, bmap_size);
    if (ret < 0) {
        goto fail_free_bmap;
    }

    /* Disable migration when vdi images are used */
-    error_setg(&s->migration_blocker, "The vdi format used by node '%s' "
-               "does not support live migration",
-               bdrv_get_device_or_node_name(bs));
+    error_set(&s->migration_blocker,
+              QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
+              "vdi", bs->device_name, "live migration");
    migrate_add_blocker(s->migration_blocker);

-    qemu_co_mutex_init(&s->write_lock);
-
    return 0;

 fail_free_bmap:
-    qemu_vfree(s->bmap);
+    g_free(s->bmap);

 fail:
    return ret;
@@ -644,31 +625,11 @@ static int vdi_co_write(BlockDriverState *bs,
                   buf, n_sectors * SECTOR_SIZE);
            memset(block + (sector_in_block + n_sectors) * SECTOR_SIZE, 0,
                   (s->block_sectors - n_sectors - sector_in_block) * SECTOR_SIZE);
-
-            /* Note that this coroutine does not yield anywhere from reading the
-             * bmap entry until here, so in regards to all the coroutines trying
-             * to write to this cluster, the one doing the allocation will
-             * always be the first to try to acquire the lock.
-             * Therefore, it is also the first that will actually be able to
-             * acquire the lock and thus the padded cluster is written before
-             * the other coroutines can write to the affected area. */
-            qemu_co_mutex_lock(&s->write_lock);
            ret = bdrv_write(bs->file, offset, block, s->block_sectors);
-            qemu_co_mutex_unlock(&s->write_lock);
        } else {
            uint64_t offset = s->header.offset_data / SECTOR_SIZE +
                              (uint64_t)bmap_entry * s->block_sectors +
                              sector_in_block;
-            qemu_co_mutex_lock(&s->write_lock);
-            /* This lock is only used to make sure the following write operation
-             * is executed after the write issued by the coroutine allocating
-             * this cluster, therefore we do not need to keep it locked.
-             * As stated above, the allocating coroutine will always try to lock
-             * the mutex before all the other concurrent accesses to that
-             * cluster, therefore at this point we can be absolutely certain
-             * that that write operation has returned (there may be other writes
-             * in flight, but they do not concern this very operation). */
-            qemu_co_mutex_unlock(&s->write_lock);
            ret = bdrv_write(bs->file, offset, buf, n_sectors);
        }

@@ -720,7 +681,8 @@ static int vdi_co_write(BlockDriverState *bs,

 static int vdi_create(const char *filename, QemuOpts *opts, Error **errp)
 {
-    int ret = 0;
+    int fd;
+    int result = 0;
    uint64_t bytes = 0;
    uint32_t blocks;
    size_t block_size = DEFAULT_CLUSTER_SIZE;
@@ -728,16 +690,12 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp)
    VdiHeader header;
    size_t i;
    size_t bmap_size;
-    int64_t offset = 0;
-    Error *local_err = NULL;
-    BlockDriverState *bs = NULL;
-    uint32_t *bmap = NULL;
+    bool nocow = false;

    logout("\n");

    /* Read out options. */
-    bytes = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
-                     BDRV_SECTOR_SIZE);
+    bytes = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0);
 #if defined(CONFIG_VDI_BLOCK_SIZE)
    /* TODO: Additional checks (SECTOR_SIZE * 2^n, ...). */
    block_size = qemu_opt_get_size_del(opts,
@@ -749,33 +707,45 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp)
        image_type = VDI_TYPE_STATIC;
    }
 #endif
+    nocow = qemu_opt_get_bool_del(opts, BLOCK_OPT_NOCOW, false);

    if (bytes > VDI_DISK_SIZE_MAX) {
-        ret = -ENOTSUP;
+        result = -ENOTSUP;
        error_setg(errp, "Unsupported VDI image size (size is 0x%" PRIx64
                          ", max supported is 0x%" PRIx64 ")",
                          bytes, VDI_DISK_SIZE_MAX);
        goto exit;
    }

-    ret = bdrv_create_file(filename, opts, &local_err);
-    if (ret < 0) {
-        error_propagate(errp, local_err);
+    fd = qemu_open(filename,
+                   O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE,
+                   0644);
+    if (fd < 0) {
+        result = -errno;
        goto exit;
    }
-    ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL,
-                    NULL, &local_err);
-    if (ret < 0) {
-        error_propagate(errp, local_err);
-        goto exit;
+
+    if (nocow) {
+#ifdef __linux__
+        /* Set NOCOW flag to solve performance issue on fs like btrfs.
+         * This is an optimisation. The FS_IOC_SETFLAGS ioctl return value will
+         * be ignored since any failure of this operation should not block the
+         * left work.
+         */
+        int attr;
+        if (ioctl(fd, FS_IOC_GETFLAGS, &attr) == 0) {
+            attr |= FS_NOCOW_FL;
+            ioctl(fd, FS_IOC_SETFLAGS, &attr);
+        }
+#endif
    }

    /* We need enough blocks to store the given disk size,
       so always round up. */
-    blocks = DIV_ROUND_UP(bytes, block_size);
+    blocks = (bytes + block_size - 1) / block_size;

    bmap_size = blocks * sizeof(uint32_t);
-    bmap_size = ROUND_UP(bmap_size, SECTOR_SIZE);
+    bmap_size = ((bmap_size + SECTOR_SIZE - 1) & ~(SECTOR_SIZE -1));

    memset(&header, 0, sizeof(header));
    pstrcpy(header.text, sizeof(header.text), VDI_TEXT);
@@ -799,20 +769,13 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp)
    vdi_header_print(&header);
 #endif
    vdi_header_to_le(&header);
-    ret = bdrv_pwrite_sync(bs, offset, &header, sizeof(header));
-    if (ret < 0) {
-        error_setg(errp, "Error writing header to %s", filename);
-        goto exit;
+    if (write(fd, &header, sizeof(header)) < 0) {
+        result = -errno;
+        goto close_and_exit;
    }
-    offset += sizeof(header);

    if (bmap_size > 0) {
-        bmap = g_try_malloc0(bmap_size);
-        if (bmap == NULL) {
-            ret = -ENOMEM;
-            error_setg(errp, "Could not allocate bmap");
-            goto exit;
-        }
+        uint32_t *bmap = g_malloc0(bmap_size);
        for (i = 0; i < blocks; i++) {
            if (image_type == VDI_TYPE_STATIC) {
                bmap[i] = i;
@@ -820,33 +783,35 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp)
                bmap[i] = VDI_UNALLOCATED;
            }
        }
-        ret = bdrv_pwrite_sync(bs, offset, bmap, bmap_size);
-        if (ret < 0) {
-            error_setg(errp, "Error writing bmap to %s", filename);
-            goto exit;
+        if (write(fd, bmap, bmap_size) < 0) {
+            result = -errno;
+            g_free(bmap);
+            goto close_and_exit;
        }
-        offset += bmap_size;
+        g_free(bmap);
    }

    if (image_type == VDI_TYPE_STATIC) {
-        ret = bdrv_truncate(bs, offset + blocks * block_size);
-        if (ret < 0) {
-            error_setg(errp, "Failed to statically allocate %s", filename);
-            goto exit;
+        if (ftruncate(fd, sizeof(header) + bmap_size + blocks * block_size)) {
+            result = -errno;
+            goto close_and_exit;
        }
    }

+close_and_exit:
+    if ((close(fd) < 0) && !result) {
+        result = -errno;
+    }
+
 exit:
-    bdrv_unref(bs);
-    g_free(bmap);
-    return ret;
+    return result;
 }

 static void vdi_close(BlockDriverState *bs)
 {
    BDRVVdiState *s = bs->opaque;

-    qemu_vfree(s->bmap);
+    g_free(s->bmap);

    migrate_del_blocker(s->migration_blocker);
    error_free(s->migration_blocker);
@@ -877,6 +842,11 @@ static QemuOptsList vdi_create_opts = {
            .def_value_str = "off"
        },
 #endif
+        {
+            .name = BLOCK_OPT_NOCOW,
+            .type = QEMU_OPT_BOOL,
+            .help = "Turn off copy-on-write (valid only on btrfs)"
+        },
        /* TODO: An additional option to set UUID values might be useful. */
        { /* end of list */ }
    }
--- a/block/vhdx-endian.c
+++ b/block/vhdx-endian.c
@@ -82,6 +82,8 @@ void vhdx_log_desc_le_import(VHDXLogDescriptor *d)
    assert(d != NULL);

    le32_to_cpus(&d->signature);
+    le32_to_cpus(&d->trailing_bytes);
+    le64_to_cpus(&d->leading_bytes);
    le64_to_cpus(&d->file_offset);
    le64_to_cpus(&d->sequence_number);
 }
@@ -97,15 +99,6 @@ void vhdx_log_desc_le_export(VHDXLogDescriptor *d)
    cpu_to_le64s(&d->sequence_number);
 }

-void vhdx_log_data_le_import(VHDXLogDataSector *d)
-{
-    assert(d != NULL);
-
-    le32_to_cpus(&d->data_signature);
-    le32_to_cpus(&d->sequence_high);
-    le32_to_cpus(&d->sequence_low);
-}
-
 void vhdx_log_data_le_export(VHDXLogDataSector *d)
 {
    assert(d != NULL);
--- a/block/vhdx-log.c
+++ b/block/vhdx-log.c
@@ -84,7 +84,6 @@ static int vhdx_log_peek_hdr(BlockDriverState *bs, VHDXLogEntries *log,
    if (ret < 0) {
        goto exit;
    }
-    vhdx_log_entry_hdr_le_import(hdr);

 exit:
    return ret;
@@ -212,7 +211,7 @@ static bool vhdx_log_hdr_is_valid(VHDXLogEntries *log, VHDXLogEntryHeader *hdr,
 {
    int valid = false;

-    if (hdr->signature != VHDX_LOG_SIGNATURE) {
+    if (memcmp(&hdr->signature, "loge", 4)) {
        goto exit;
    }

@@ -276,12 +275,12 @@ static bool vhdx_log_desc_is_valid(VHDXLogDescriptor *desc,
        goto exit;
    }

-    if (desc->signature == VHDX_LOG_ZERO_SIGNATURE) {
+    if (!memcmp(&desc->signature, "zero", 4)) {
        if (desc->zero_length % VHDX_LOG_SECTOR_SIZE == 0) {
            /* valid */
            ret = true;
        }
-    } else if (desc->signature == VHDX_LOG_DESC_SIGNATURE) {
+    } else if (!memcmp(&desc->signature, "desc", 4)) {
            /* valid */
            ret = true;
    }
@@ -328,15 +327,13 @@ static int vhdx_compute_desc_sectors(uint32_t desc_cnt)
 * passed into this function. Each descriptor will also be validated,
 * and error returned if any are invalid. */
 static int vhdx_log_read_desc(BlockDriverState *bs, BDRVVHDXState *s,
-                              VHDXLogEntries *log, VHDXLogDescEntries **buffer,
-                              bool convert_endian)
+                              VHDXLogEntries *log, VHDXLogDescEntries **buffer)
 {
    int ret = 0;
    uint32_t desc_sectors;
    uint32_t sectors_read;
    VHDXLogEntryHeader hdr;
    VHDXLogDescEntries *desc_entries = NULL;
-    VHDXLogDescriptor desc;
    int i;

    assert(*buffer == NULL);
@@ -345,19 +342,14 @@ static int vhdx_log_read_desc(BlockDriverState *bs, BDRVVHDXState *s,
    if (ret < 0) {
        goto exit;
    }
-
+    vhdx_log_entry_hdr_le_import(&hdr);
    if (vhdx_log_hdr_is_valid(log, &hdr, s) == false) {
        ret = -EINVAL;
        goto exit;
    }

    desc_sectors = vhdx_compute_desc_sectors(hdr.descriptor_count);
-    desc_entries = qemu_try_blockalign(bs->file,
-                                       desc_sectors * VHDX_LOG_SECTOR_SIZE);
-    if (desc_entries == NULL) {
-        ret = -ENOMEM;
-        goto exit;
-    }
+    desc_entries = qemu_blockalign(bs, desc_sectors * VHDX_LOG_SECTOR_SIZE);

    ret = vhdx_log_read_sectors(bs, log, &sectors_read, desc_entries,
                                desc_sectors, false);
@@ -371,19 +363,12 @@ static int vhdx_log_read_desc(BlockDriverState *bs, BDRVVHDXState *s,

    /* put in proper endianness, and validate each desc */
    for (i = 0; i < hdr.descriptor_count; i++) {
-        desc = desc_entries->desc[i];
-        vhdx_log_desc_le_import(&desc);
-        if (convert_endian) {
-            desc_entries->desc[i] = desc;
-        }
-        if (vhdx_log_desc_is_valid(&desc, &hdr) == false) {
+        vhdx_log_desc_le_import(&desc_entries->desc[i]);
+        if (vhdx_log_desc_is_valid(&desc_entries->desc[i], &hdr) == false) {
            ret = -EINVAL;
            goto free_and_exit;
        }
    }
-    if (convert_endian) {
-        desc_entries->hdr = hdr;
-    }

    *buffer = desc_entries;
    goto exit;
@@ -418,7 +403,7 @@ static int vhdx_log_flush_desc(BlockDriverState *bs, VHDXLogDescriptor *desc,

    buffer = qemu_blockalign(bs, VHDX_LOG_SECTOR_SIZE);

-    if (desc->signature == VHDX_LOG_DESC_SIGNATURE) {
+    if (!memcmp(&desc->signature, "desc", 4)) {
        /* data sector */
        if (data == NULL) {
            ret = -EFAULT;
@@ -446,15 +431,10 @@ static int vhdx_log_flush_desc(BlockDriverState *bs, VHDXLogDescriptor *desc,

        memcpy(buffer+offset, &desc->trailing_bytes, 4);

-    } else if (desc->signature == VHDX_LOG_ZERO_SIGNATURE) {
+    } else if (!memcmp(&desc->signature, "zero", 4)) {
        /* write 'count' sectors of sector */
        memset(buffer, 0, VHDX_LOG_SECTOR_SIZE);
        count = desc->zero_length / VHDX_LOG_SECTOR_SIZE;
-    } else {
-        error_report("Invalid VHDX log descriptor entry signature 0x%" PRIx32,
-                      desc->signature);
-        ret = -EINVAL;
-        goto exit;
    }

    file_offset = desc->file_offset;
@@ -513,13 +493,13 @@ static int vhdx_log_flush(BlockDriverState *bs, BDRVVHDXState *s,
            goto exit;
        }

-        ret = vhdx_log_read_desc(bs, s, &logs->log, &desc_entries, true);
+        ret = vhdx_log_read_desc(bs, s, &logs->log, &desc_entries);
        if (ret < 0) {
            goto exit;
        }

        for (i = 0; i < desc_entries->hdr.descriptor_count; i++) {
-            if (desc_entries->desc[i].signature == VHDX_LOG_DESC_SIGNATURE) {
+            if (!memcmp(&desc_entries->desc[i].signature, "desc", 4)) {
                /* data sector, so read a sector to flush */
                ret = vhdx_log_read_sectors(bs, &logs->log, &sectors_read,
                                            data, 1, false);
@@ -530,7 +510,6 @@ static int vhdx_log_flush(BlockDriverState *bs, BDRVVHDXState *s,
                    ret = -EINVAL;
                    goto exit;
                }
-                vhdx_log_data_le_import(data);
            }

            ret = vhdx_log_flush_desc(bs, &desc_entries->desc[i], data);
@@ -579,6 +558,9 @@ static int vhdx_validate_log_entry(BlockDriverState *bs, BDRVVHDXState *s,
        goto inc_and_exit;
    }

+    vhdx_log_entry_hdr_le_import(&hdr);
+
+
    if (vhdx_log_hdr_is_valid(log, &hdr, s) == false) {
        goto inc_and_exit;
    }
@@ -591,13 +573,13 @@ static int vhdx_validate_log_entry(BlockDriverState *bs, BDRVVHDXState *s,

    desc_sectors = vhdx_compute_desc_sectors(hdr.descriptor_count);

-    /* Read all log sectors, and calculate log checksum */
+    /* Read desc sectors, and calculate log checksum */

    total_sectors = hdr.entry_length / VHDX_LOG_SECTOR_SIZE;


    /* read_desc() will increment the read idx */
-    ret = vhdx_log_read_desc(bs, s, log, &desc_buffer, false);
+    ret = vhdx_log_read_desc(bs, s, log, &desc_buffer);
    if (ret < 0) {
        goto free_and_exit;
    }
@@ -620,7 +602,7 @@ static int vhdx_validate_log_entry(BlockDriverState *bs, BDRVVHDXState *s,
        }
    }
    crc ^= 0xffffffff;
-    if (crc != hdr.checksum) {
+    if (crc != desc_buffer->hdr.checksum) {
        goto free_and_exit;
    }

@@ -923,7 +905,7 @@ static int vhdx_log_write(BlockDriverState *bs, BDRVVHDXState *s,
    buffer = qemu_blockalign(bs, total_length);
    memcpy(buffer, &new_hdr, sizeof(new_hdr));

-    new_desc = buffer + sizeof(new_hdr);
+    new_desc = (VHDXLogDescriptor *) (buffer + sizeof(new_hdr));
    data_sector = buffer + (desc_sectors * VHDX_LOG_SECTOR_SIZE);
    data_tmp = data;

@@ -980,6 +962,7 @@ static int vhdx_log_write(BlockDriverState *bs, BDRVVHDXState *s,
     * last data sector */
    vhdx_update_checksum(buffer, total_length,
                         offsetof(VHDXLogEntryHeader, checksum));
+    cpu_to_le32s((uint32_t *)(buffer + 4));

    /* now write to the log */
    ret = vhdx_log_write_sectors(bs, &s->log, &sectors_written, buffer,
--- a/block/vhdx.c
+++ b/block/vhdx.c
@@ -99,8 +99,7 @@ static const MSGUID logical_sector_guid = { .data1 = 0x8141bf1d,
 /* Each parent type must have a valid GUID; this is for parent images
 * of type 'VHDX'.  If we were to allow e.g. a QCOW2 parent, we would
 * need to make up our own QCOW2 GUID type */
-static const MSGUID parent_vhdx_guid __attribute__((unused))
-                                     = { .data1 = 0xb04aefb7,
+static const MSGUID parent_vhdx_guid = { .data1 = 0xb04aefb7,
                                         .data2 = 0xd19e,
                                         .data3 = 0x4a81,
                                         .data4 = { 0xb7, 0x89, 0x25, 0xb8,
@@ -136,8 +135,10 @@ typedef struct VHDXSectorInfo {
 * buf: buffer pointer
 * size: size of buffer (must be > crc_offset+4)
 *
- * Note: The buffer should have all multi-byte data in little-endian format,
- *       and the resulting checksum is in little endian format.
+ * Note: The resulting checksum is in the CPU endianness, not necessarily
+ *       in the file format endianness (LE).  Any header export to disk should
+ *       make sure that vhdx_header_le_export() is used to convert to the
+ *       correct endianness
 */
 uint32_t vhdx_update_checksum(uint8_t *buf, size_t size, int crc_offset)
 {
@@ -148,7 +149,6 @@ uint32_t vhdx_update_checksum(uint8_t *buf, size_t size, int crc_offset)

    memset(buf + crc_offset, 0, sizeof(crc));
    crc =  crc32c(0xffffffff, buf, size);
-    cpu_to_le32s(&crc);
    memcpy(buf + crc_offset, &crc, sizeof(crc));

    return crc;
@@ -300,7 +300,7 @@ static int vhdx_write_header(BlockDriverState *bs_file, VHDXHeader *hdr,
 {
    uint8_t *buffer = NULL;
    int ret;
-    VHDXHeader *header_le;
+    VHDXHeader header_le;

    assert(bs_file != NULL);
    assert(hdr != NULL);
@@ -321,12 +321,11 @@ static int vhdx_write_header(BlockDriverState *bs_file, VHDXHeader *hdr,
    }

    /* overwrite the actual VHDXHeader portion */
-    header_le = (VHDXHeader *)buffer;
-    memcpy(header_le, hdr, sizeof(VHDXHeader));
-    vhdx_header_le_export(hdr, header_le);
-    vhdx_update_checksum(buffer, VHDX_HEADER_SIZE,
-                         offsetof(VHDXHeader, checksum));
-    ret = bdrv_pwrite_sync(bs_file, offset, header_le, sizeof(VHDXHeader));
+    memcpy(buffer, hdr, sizeof(VHDXHeader));
+    hdr->checksum = vhdx_update_checksum(buffer, VHDX_HEADER_SIZE,
+                                         offsetof(VHDXHeader, checksum));
+    vhdx_header_le_export(hdr, &header_le);
+    ret = bdrv_pwrite_sync(bs_file, offset, &header_le, sizeof(VHDXHeader));

 exit:
    qemu_vfree(buffer);
@@ -433,14 +432,13 @@ static void vhdx_parse_header(BlockDriverState *bs, BDRVVHDXState *s,
    }
    /* copy over just the relevant portion that we need */
    memcpy(header1, buffer, sizeof(VHDXHeader));
+    vhdx_header_le_import(header1);

-    if (vhdx_checksum_is_valid(buffer, VHDX_HEADER_SIZE, 4)) {
-        vhdx_header_le_import(header1);
-        if (header1->signature == VHDX_HEADER_SIGNATURE &&
-            header1->version == 1) {
-            h1_seq = header1->sequence_number;
-            h1_valid = true;
-        }
+    if (vhdx_checksum_is_valid(buffer, VHDX_HEADER_SIZE, 4) &&
+        !memcmp(&header1->signature, "head", 4)             &&
+        header1->version == 1) {
+        h1_seq = header1->sequence_number;
+        h1_valid = true;
    }

    ret = bdrv_pread(bs->file, VHDX_HEADER2_OFFSET, buffer, VHDX_HEADER_SIZE);
@@ -449,14 +447,13 @@ static void vhdx_parse_header(BlockDriverState *bs, BDRVVHDXState *s,
    }
    /* copy over just the relevant portion that we need */
    memcpy(header2, buffer, sizeof(VHDXHeader));
+    vhdx_header_le_import(header2);

-    if (vhdx_checksum_is_valid(buffer, VHDX_HEADER_SIZE, 4)) {
-        vhdx_header_le_import(header2);
-        if (header2->signature == VHDX_HEADER_SIGNATURE &&
-            header2->version == 1) {
-            h2_seq = header2->sequence_number;
-            h2_valid = true;
-        }
+    if (vhdx_checksum_is_valid(buffer, VHDX_HEADER_SIZE, 4) &&
+        !memcmp(&header2->signature, "head", 4)             &&
+        header2->version == 1) {
+        h2_seq = header2->sequence_number;
+        h2_valid = true;
    }

    /* If there is only 1 valid header (or no valid headers), we
@@ -522,21 +519,15 @@ static int vhdx_open_region_tables(BlockDriverState *bs, BDRVVHDXState *s)
        goto fail;
    }
    memcpy(&s->rt, buffer, sizeof(s->rt));
+    vhdx_region_header_le_import(&s->rt);
    offset += sizeof(s->rt);

-    if (!vhdx_checksum_is_valid(buffer, VHDX_HEADER_BLOCK_SIZE, 4)) {
+    if (!vhdx_checksum_is_valid(buffer, VHDX_HEADER_BLOCK_SIZE, 4) ||
+        memcmp(&s->rt.signature, "regi", 4)) {
        ret = -EINVAL;
        goto fail;
    }

-    vhdx_region_header_le_import(&s->rt);
-
-    if (s->rt.signature != VHDX_REGION_SIGNATURE) {
-        ret = -EINVAL;
-        goto fail;
-    }
-
-
    /* Per spec, maximum region table entry count is 2047 */
    if (s->rt.entry_count > 2047) {
        ret = -EINVAL;
@@ -639,7 +630,7 @@ static int vhdx_parse_metadata(BlockDriverState *bs, BDRVVHDXState *s)

    vhdx_metadata_header_le_import(&s->metadata_hdr);

-    if (s->metadata_hdr.signature != VHDX_METADATA_SIGNATURE) {
+    if (memcmp(&s->metadata_hdr.signature, "metadata", 8)) {
        ret = -EINVAL;
        goto exit;
    }
@@ -959,11 +950,7 @@ static int vhdx_open(BlockDriverState *bs, QDict *options, int flags,
    }

    /* s->bat is freed in vhdx_close() */
-    s->bat = qemu_try_blockalign(bs->file, s->bat_rt.length);
-    if (s->bat == NULL) {
-        ret = -ENOMEM;
-        goto fail;
-    }
+    s->bat = qemu_blockalign(bs, s->bat_rt.length);

    ret = bdrv_pread(bs->file, s->bat_offset, s->bat, s->bat_rt.length);
    if (ret < 0) {
@@ -1002,9 +989,9 @@ static int vhdx_open(BlockDriverState *bs, QDict *options, int flags,
    /* TODO: differencing files */

    /* Disable migration when VHDX images are used */
-    error_setg(&s->migration_blocker, "The vhdx format used by node '%s' "
-               "does not support live migration",
-               bdrv_get_device_or_node_name(bs));
+    error_set(&s->migration_blocker,
+            QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
+            "vhdx", bs->device_name, "live migration");
    migrate_add_blocker(s->migration_blocker);

    return 0;
@@ -1109,9 +1096,8 @@ static coroutine_fn int vhdx_co_readv(BlockDriverState *bs, int64_t sector_num,
            /* check the payload block state */
            switch (s->bat[sinfo.bat_idx] & VHDX_BAT_STATE_BIT_MASK) {
            case PAYLOAD_BLOCK_NOT_PRESENT: /* fall through */
-            case PAYLOAD_BLOCK_UNDEFINED:
-            case PAYLOAD_BLOCK_UNMAPPED:
-            case PAYLOAD_BLOCK_UNMAPPED_v095:
+            case PAYLOAD_BLOCK_UNDEFINED:   /* fall through */
+            case PAYLOAD_BLOCK_UNMAPPED:    /* fall through */
            case PAYLOAD_BLOCK_ZERO:
                /* return zero */
                qemu_iovec_memset(&hd_qiov, 0, 0, sinfo.bytes_avail);
@@ -1174,18 +1160,7 @@ static void vhdx_update_bat_table_entry(BlockDriverState *bs, BDRVVHDXState *s,
 {
    /* The BAT entry is a uint64, with 44 bits for the file offset in units of
     * 1MB, and 3 bits for the block state. */
-    if ((state == PAYLOAD_BLOCK_ZERO)        ||
-        (state == PAYLOAD_BLOCK_UNDEFINED)   ||
-        (state == PAYLOAD_BLOCK_NOT_PRESENT) ||
-        (state == PAYLOAD_BLOCK_UNMAPPED)) {
-        s->bat[sinfo->bat_idx]  = 0;  /* For PAYLOAD_BLOCK_ZERO, the
-                                         FileOffsetMB field is denoted as
-                                         'reserved' in the v1.0 spec.  If it is
-                                         non-zero, MS Hyper-V will fail to read
-                                         the disk image */
-    } else {
-        s->bat[sinfo->bat_idx]  = sinfo->file_offset;
-    }
+    s->bat[sinfo->bat_idx]  = sinfo->file_offset;

    s->bat[sinfo->bat_idx] |= state & VHDX_BAT_STATE_BIT_MASK;

@@ -1269,7 +1244,7 @@ static coroutine_fn int vhdx_co_writev(BlockDriverState *bs, int64_t sector_num,
                        iov1.iov_base = qemu_blockalign(bs, iov1.iov_len);
                        memset(iov1.iov_base, 0, iov1.iov_len);
                        qemu_iovec_concat_iov(&hd_qiov, &iov1, 1, 0,
-                                              iov1.iov_len);
+                                              sinfo.block_offset);
                        sectors_to_write += iov1.iov_len >> BDRV_SECTOR_BITS;
                    }

@@ -1285,15 +1260,15 @@ static coroutine_fn int vhdx_co_writev(BlockDriverState *bs, int64_t sector_num,
                        iov2.iov_base = qemu_blockalign(bs, iov2.iov_len);
                        memset(iov2.iov_base, 0, iov2.iov_len);
                        qemu_iovec_concat_iov(&hd_qiov, &iov2, 1, 0,
-                                              iov2.iov_len);
+                                              sinfo.block_offset);
                        sectors_to_write += iov2.iov_len >> BDRV_SECTOR_BITS;
                    }
                }
+
                /* fall through */
            case PAYLOAD_BLOCK_NOT_PRESENT: /* fall through */
-            case PAYLOAD_BLOCK_UNMAPPED:
-            case PAYLOAD_BLOCK_UNMAPPED_v095:
-            case PAYLOAD_BLOCK_UNDEFINED:
+            case PAYLOAD_BLOCK_UNMAPPED:    /* fall through */
+            case PAYLOAD_BLOCK_UNDEFINED:   /* fall through */
                bat_prior_offset = sinfo.file_offset;
                ret = vhdx_allocate_block(bs, s, &sinfo.file_offset);
                if (ret < 0) {
@@ -1394,7 +1369,7 @@ static int vhdx_create_new_headers(BlockDriverState *bs, uint64_t image_size,
    int ret = 0;
    VHDXHeader *hdr = NULL;

-    hdr = g_new0(VHDXHeader, 1);
+    hdr = g_malloc0(sizeof(VHDXHeader));

    hdr->signature       = VHDX_HEADER_SIGNATURE;
    hdr->sequence_number = g_random_int();
@@ -1420,12 +1395,6 @@ exit:
    return ret;
 }

-#define VHDX_METADATA_ENTRY_BUFFER_SIZE \
-                                    (sizeof(VHDXFileParameters)               +\
-                                     sizeof(VHDXVirtualDiskSize)              +\
-                                     sizeof(VHDXPage83Data)                   +\
-                                     sizeof(VHDXVirtualDiskLogicalSectorSize) +\
-                                     sizeof(VHDXVirtualDiskPhysicalSectorSize))

 /*
 * Create the Metadata entries.
@@ -1464,7 +1433,11 @@ static int vhdx_create_new_metadata(BlockDriverState *bs,
    VHDXVirtualDiskLogicalSectorSize  *mt_log_sector_size;
    VHDXVirtualDiskPhysicalSectorSize *mt_phys_sector_size;

-    entry_buffer = g_malloc0(VHDX_METADATA_ENTRY_BUFFER_SIZE);
+    entry_buffer = g_malloc0(sizeof(VHDXFileParameters)               +
+                             sizeof(VHDXVirtualDiskSize)              +
+                             sizeof(VHDXPage83Data)                   +
+                             sizeof(VHDXVirtualDiskLogicalSectorSize) +
+                             sizeof(VHDXVirtualDiskPhysicalSectorSize));

    mt_file_params = entry_buffer;
    offset += sizeof(VHDXFileParameters);
@@ -1545,7 +1518,7 @@ static int vhdx_create_new_metadata(BlockDriverState *bs,
    }

    ret = bdrv_pwrite(bs, metadata_offset + (64 * KiB), entry_buffer,
-                      VHDX_METADATA_ENTRY_BUFFER_SIZE);
+                      VHDX_HEADER_BLOCK_SIZE);
    if (ret < 0) {
        goto exit;
    }
@@ -1567,8 +1540,7 @@ exit:
 */
 static int vhdx_create_bat(BlockDriverState *bs, BDRVVHDXState *s,
                           uint64_t image_size, VHDXImageType type,
-                           bool use_zero_blocks, uint64_t file_offset,
-                           uint32_t length)
+                           bool use_zero_blocks, VHDXRegionTableEntry *rt_bat)
 {
    int ret = 0;
    uint64_t data_file_offset;
@@ -1583,7 +1555,7 @@ static int vhdx_create_bat(BlockDriverState *bs, BDRVVHDXState *s,
    /* this gives a data start after BAT/bitmap entries, and well
     * past any metadata entries (with a 4 MB buffer for future
     * expansion */
-    data_file_offset = file_offset + length + 5 * MiB;
+    data_file_offset = rt_bat->file_offset + rt_bat->length + 5 * MiB;
    total_sectors = image_size >> s->logical_sector_size_bits;

    if (type == VHDX_TYPE_DYNAMIC) {
@@ -1607,11 +1579,7 @@ static int vhdx_create_bat(BlockDriverState *bs, BDRVVHDXState *s,
                use_zero_blocks ||
                bdrv_has_zero_init(bs) == 0) {
        /* for a fixed file, the default BAT entry is not zero */
-        s->bat = g_try_malloc0(length);
-        if (length && s->bat == NULL) {
-            ret = -ENOMEM;
-            goto exit;
-        }
+        s->bat = g_malloc0(rt_bat->length);
        block_state = type == VHDX_TYPE_FIXED ? PAYLOAD_BLOCK_FULLY_PRESENT :
                                                PAYLOAD_BLOCK_NOT_PRESENT;
        block_state = use_zero_blocks ? PAYLOAD_BLOCK_ZERO : block_state;
@@ -1626,7 +1594,7 @@ static int vhdx_create_bat(BlockDriverState *bs, BDRVVHDXState *s,
            cpu_to_le64s(&s->bat[sinfo.bat_idx]);
            sector_num += s->sectors_per_block;
        }
-        ret = bdrv_pwrite(bs, file_offset, s->bat, length);
+        ret = bdrv_pwrite(bs, rt_bat->file_offset, s->bat, rt_bat->length);
        if (ret < 0) {
            goto exit;
        }
@@ -1658,8 +1626,6 @@ static int vhdx_create_new_region_table(BlockDriverState *bs,
    int ret = 0;
    uint32_t offset = 0;
    void *buffer = NULL;
-    uint64_t bat_file_offset;
-    uint32_t bat_length;
    BDRVVHDXState *s = NULL;
    VHDXRegionTableHeader *region_table;
    VHDXRegionTableEntry *rt_bat;
@@ -1669,7 +1635,7 @@ static int vhdx_create_new_region_table(BlockDriverState *bs,

    /* Populate enough of the BDRVVHDXState to be able to use the
     * pre-existing BAT calculation, translation, and update functions */
-    s = g_new0(BDRVVHDXState, 1);
+    s = g_malloc0(sizeof(BDRVVHDXState));

    s->chunk_ratio = (VHDX_MAX_SECTORS_PER_BLOCK) *
                     (uint64_t) sector_size / (uint64_t) block_size;
@@ -1708,26 +1674,19 @@ static int vhdx_create_new_region_table(BlockDriverState *bs,
    rt_metadata->length      = 1 * MiB; /* min size, and more than enough */
    *metadata_offset = rt_metadata->file_offset;

-    bat_file_offset = rt_bat->file_offset;
-    bat_length = rt_bat->length;
-
-    vhdx_region_header_le_export(region_table);
-    vhdx_region_entry_le_export(rt_bat);
-    vhdx_region_entry_le_export(rt_metadata);
-
    vhdx_update_checksum(buffer, VHDX_HEADER_BLOCK_SIZE,
                         offsetof(VHDXRegionTableHeader, checksum));


    /* The region table gives us the data we need to create the BAT,
     * so do that now */
-    ret = vhdx_create_bat(bs, s, image_size, type, use_zero_blocks,
-                          bat_file_offset, bat_length);
-    if (ret < 0) {
-        goto exit;
-    }
+    ret = vhdx_create_bat(bs, s, image_size, type, use_zero_blocks, rt_bat);

    /* Now write out the region headers to disk */
+    vhdx_region_header_le_export(region_table);
+    vhdx_region_entry_le_export(rt_bat);
+    vhdx_region_entry_le_export(rt_metadata);
+
    ret = bdrv_pwrite(bs, VHDX_REGION_TABLE_OFFSET, buffer,
                      VHDX_HEADER_BLOCK_SIZE);
    if (ret < 0) {
@@ -1740,6 +1699,7 @@ static int vhdx_create_new_region_table(BlockDriverState *bs,
        goto exit;
    }

+
 exit:
    g_free(s);
    g_free(buffer);
@@ -1780,12 +1740,11 @@ static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp)
    VHDXImageType image_type;
    Error *local_err = NULL;

-    image_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
-                          BDRV_SECTOR_SIZE);
+    image_size = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0);
    log_size = qemu_opt_get_size_del(opts, VHDX_BLOCK_OPT_LOG_SIZE, 0);
    block_size = qemu_opt_get_size_del(opts, VHDX_BLOCK_OPT_BLOCK_SIZE, 0);
    type = qemu_opt_get_del(opts, BLOCK_OPT_SUBFMT);
-    use_zero_blocks = qemu_opt_get_bool_del(opts, VHDX_BLOCK_OPT_ZERO, true);
+    use_zero_blocks = qemu_opt_get_bool_del(opts, VHDX_BLOCK_OPT_ZERO, false);

    if (image_size > VHDX_MAX_IMAGE_SIZE) {
        error_setg_errno(errp, EINVAL, "Image size too large; max of 64TB");
@@ -1890,6 +1849,7 @@ static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp)
    }


+
 delete_and_exit:
    bdrv_unref(bs);
 exit:
@@ -1947,9 +1907,7 @@ static QemuOptsList vhdx_create_opts = {
       {
           .name = VHDX_BLOCK_OPT_ZERO,
           .type = QEMU_OPT_BOOL,
-           .help = "Force use of payload blocks of type 'ZERO'. "\
-                   "Non-standard, but default.  Do not set to 'off' when "\
-                   "using 'qemu-img convert' with subformat=dynamic."
+           .help = "Force use of payload blocks of type 'ZERO'.  Non-standard."
       },
       { NULL }
    }
@@ -1967,7 +1925,6 @@ static BlockDriver bdrv_vhdx = {
    .bdrv_create            = vhdx_create,
    .bdrv_get_info          = vhdx_get_info,
    .bdrv_check             = vhdx_check,
-    .bdrv_has_zero_init     = bdrv_has_zero_init_1,

    .create_opts            = &vhdx_create_opts,
 };
--- a/block/vhdx.h
+++ b/block/vhdx.h
@@ -226,8 +226,7 @@ typedef struct QEMU_PACKED VHDXLogDataSector {
 #define PAYLOAD_BLOCK_NOT_PRESENT       0
 #define PAYLOAD_BLOCK_UNDEFINED         1
 #define PAYLOAD_BLOCK_ZERO              2
-#define PAYLOAD_BLOCK_UNMAPPED          3
-#define PAYLOAD_BLOCK_UNMAPPED_v095     5
+#define PAYLOAD_BLOCK_UNMAPPED          5
 #define PAYLOAD_BLOCK_FULLY_PRESENT     6
 #define PAYLOAD_BLOCK_PARTIALLY_PRESENT 7

@@ -436,7 +435,6 @@ void vhdx_header_le_import(VHDXHeader *h);
 void vhdx_header_le_export(VHDXHeader *orig_h, VHDXHeader *new_h);
 void vhdx_log_desc_le_import(VHDXLogDescriptor *d);
 void vhdx_log_desc_le_export(VHDXLogDescriptor *d);
-void vhdx_log_data_le_import(VHDXLogDataSector *d);
 void vhdx_log_data_le_export(VHDXLogDataSector *d);
 void vhdx_log_entry_hdr_le_import(VHDXLogEntryHeader *hdr);
 void vhdx_log_entry_hdr_le_export(VHDXLogEntryHeader *hdr);
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -28,7 +28,6 @@
 #include "qemu/module.h"
 #include "migration/migration.h"
 #include <zlib.h>
-#include <glib.h>

 #define VMDK3_MAGIC (('C' << 24) | ('O' << 16) | ('W' << 8) | 'D')
 #define VMDK4_MAGIC (('K' << 24) | ('D' << 16) | ('M' << 8) | 'V')
@@ -107,7 +106,6 @@ typedef struct VmdkExtent {
    uint32_t l2_cache_counts[L2_CACHE_SIZE];

    int64_t cluster_sectors;
-    int64_t next_cluster_sector;
    char *type;
 } VmdkExtent;

@@ -126,6 +124,7 @@ typedef struct BDRVVmdkState {
 } BDRVVmdkState;

 typedef struct VmdkMetaData {
+    uint32_t offset;
    unsigned int l1_index;
    unsigned int l2_index;
    unsigned int l2_offset;
@@ -234,7 +233,7 @@ static void vmdk_free_last_extent(BlockDriverState *bs)
        return;
    }
    s->num_extents--;
-    s->extents = g_renew(VmdkExtent, s->extents, s->num_extents);
+    s->extents = g_realloc(s->extents, s->num_extents * sizeof(VmdkExtent));
 }

 static uint32_t vmdk_read_cid(BlockDriverState *bs, int parent)
@@ -398,7 +397,6 @@ static int vmdk_add_extent(BlockDriverState *bs,
 {
    VmdkExtent *extent;
    BDRVVmdkState *s = bs->opaque;
-    int64_t nb_sectors;

    if (cluster_sectors > 0x200000) {
        /* 0x200000 * 512Bytes = 1GB for one cluster is unrealistic */
@@ -414,12 +412,8 @@ static int vmdk_add_extent(BlockDriverState *bs,
        return -EFBIG;
    }

-    nb_sectors = bdrv_nb_sectors(file);
-    if (nb_sectors < 0) {
-        return nb_sectors;
-    }
-
-    s->extents = g_renew(VmdkExtent, s->extents, s->num_extents + 1);
+    s->extents = g_realloc(s->extents,
+                              (s->num_extents + 1) * sizeof(VmdkExtent));
    extent = &s->extents[s->num_extents];
    s->num_extents++;

@@ -433,7 +427,6 @@ static int vmdk_add_extent(BlockDriverState *bs,
    extent->l1_entry_sectors = l2_size * cluster_sectors;
    extent->l2_size = l2_size;
    extent->cluster_sectors = flat ? sectors : cluster_sectors;
-    extent->next_cluster_sector = ROUND_UP(nb_sectors, cluster_sectors);

    if (s->num_extents > 1) {
        extent->end_sector = (*(extent - 1)).end_sector + extent->sectors;
@@ -451,16 +444,11 @@ static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent,
                            Error **errp)
 {
    int ret;
-    size_t l1_size;
-    int i;
+    int l1_size, i;

    /* read the L1 table */
    l1_size = extent->l1_size * sizeof(uint32_t);
-    extent->l1_table = g_try_malloc(l1_size);
-    if (l1_size && extent->l1_table == NULL) {
-        return -ENOMEM;
-    }
-
+    extent->l1_table = g_malloc(l1_size);
    ret = bdrv_pread(extent->file,
                     extent->l1_table_offset,
                     extent->l1_table,
@@ -476,11 +464,7 @@ static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent,
    }

    if (extent->l1_backup_table_offset) {
-        extent->l1_backup_table = g_try_malloc(l1_size);
-        if (l1_size && extent->l1_backup_table == NULL) {
-            ret = -ENOMEM;
-            goto fail_l1;
-        }
+        extent->l1_backup_table = g_malloc(l1_size);
        ret = bdrv_pread(extent->file,
                         extent->l1_backup_table_offset,
                         extent->l1_backup_table,
@@ -497,7 +481,7 @@ static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent,
    }

    extent->l2_cache =
-        g_new(uint32_t, extent->l2_size * L2_CACHE_SIZE);
+        g_malloc(extent->l2_size * L2_CACHE_SIZE * sizeof(uint32_t));
    return 0;
 fail_l1b:
    g_free(extent->l1_backup_table);
@@ -524,7 +508,7 @@ static int vmdk_open_vmfs_sparse(BlockDriverState *bs,
    }
    ret = vmdk_add_extent(bs, file, false,
                          le32_to_cpu(header.disk_sectors),
-                          (int64_t)le32_to_cpu(header.l1dir_offset) << 9,
+                          le32_to_cpu(header.l1dir_offset) << 9,
                          0,
                          le32_to_cpu(header.l1dir_size),
                          4096,
@@ -558,16 +542,8 @@ static char *vmdk_read_desc(BlockDriverState *file, uint64_t desc_offset,
        return NULL;
    }

-    if (size < 4) {
-        /* Both descriptor file and sparse image must be much larger than 4
-         * bytes, also callers of vmdk_read_desc want to compare the first 4
-         * bytes with VMDK4_MAGIC, let's error out if less is read. */
-        error_setg(errp, "File is too small, not a valid image");
-        return NULL;
-    }
-
-    size = MIN(size, (1 << 20) - 1);  /* avoid unbounded allocation */
-    buf = g_malloc(size + 1);
+    size = MIN(size, 1 << 20);  /* avoid unbounded allocation */
+    buf = g_malloc0(size + 1);

    ret = bdrv_pread(file, desc_offset, buf, size);
    if (ret < 0) {
@@ -575,7 +551,6 @@ static char *vmdk_read_desc(BlockDriverState *file, uint64_t desc_offset,
        g_free(buf);
        return NULL;
    }
-    buf[ret] = 0;

    return buf;
 }
@@ -646,7 +621,6 @@ static int vmdk_open_vmdk4(BlockDriverState *bs,
            bs->file->total_sectors * 512 - 1536,
            &footer, sizeof(footer));
        if (ret < 0) {
-            error_setg_errno(errp, -ret, "Failed to read footer");
            return ret;
        }

@@ -658,7 +632,6 @@ static int vmdk_open_vmdk4(BlockDriverState *bs,
            le32_to_cpu(footer.eos_marker.size) != 0  ||
            le32_to_cpu(footer.eos_marker.type) != MARKER_END_OF_STREAM)
        {
-            error_setg(errp, "Invalid footer");
            return -EINVAL;
        }

@@ -670,7 +643,7 @@ static int vmdk_open_vmdk4(BlockDriverState *bs,
        snprintf(buf, sizeof(buf), "VMDK version %" PRId32,
                 le32_to_cpu(header.version));
        error_set(errp, QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
-                  bdrv_get_device_or_node_name(bs), "vmdk", buf);
+                  bs->device_name, "vmdk", buf);
        return -ENOTSUP;
    } else if (le32_to_cpu(header.version) == 3 && (flags & BDRV_O_RDWR)) {
        /* VMware KB 2064959 explains that version 3 added support for
@@ -689,7 +662,6 @@ static int vmdk_open_vmdk4(BlockDriverState *bs,
    l1_entry_sectors = le32_to_cpu(header.num_gtes_per_gt)
                        * le64_to_cpu(header.granularity);
    if (l1_entry_sectors == 0) {
-        error_setg(errp, "L1 entry size is invalid");
        return -EINVAL;
    }
    l1_size = (le64_to_cpu(header.capacity) + l1_entry_sectors - 1)
@@ -697,7 +669,8 @@ static int vmdk_open_vmdk4(BlockDriverState *bs,
    if (le32_to_cpu(header.flags) & VMDK4_FLAG_RGD) {
        l1_backup_offset = le64_to_cpu(header.rgd_offset) << 9;
    }
-    if (bdrv_nb_sectors(file) < le64_to_cpu(header.grain_offset)) {
+    if (bdrv_getlength(file) <
+            le64_to_cpu(header.grain_offset) * BDRV_SECTOR_SIZE) {
        error_setg(errp, "File truncated, expecting at least %" PRId64 " bytes",
                   (int64_t)(le64_to_cpu(header.grain_offset)
                             * BDRV_SECTOR_SIZE));
@@ -786,44 +759,41 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
                              const char *desc_file_path, Error **errp)
 {
    int ret;
-    int matches;
    char access[11];
    char type[11];
    char fname[512];
    const char *p = desc;
    int64_t sectors = 0;
    int64_t flat_offset;
-    char *extent_path;
+    char extent_path[PATH_MAX];
    BlockDriverState *extent_file;
    BDRVVmdkState *s = bs->opaque;
    VmdkExtent *extent;

    while (*p) {
-        /* parse extent line in one of below formats:
-         *
+        /* parse extent line:
         * RW [size in sectors] FLAT "file-name.vmdk" OFFSET
+         * or
         * RW [size in sectors] SPARSE "file-name.vmdk"
-         * RW [size in sectors] VMFS "file-name.vmdk"
-         * RW [size in sectors] VMFSSPARSE "file-name.vmdk"
         */
        flat_offset = -1;
-        matches = sscanf(p, "%10s %" SCNd64 " %10s \"%511[^\n\r\"]\" %" SCNd64,
-                         access, &sectors, type, fname, &flat_offset);
-        if (matches < 4 || strcmp(access, "RW")) {
+        ret = sscanf(p, "%10s %" SCNd64 " %10s \"%511[^\n\r\"]\" %" SCNd64,
+                access, &sectors, type, fname, &flat_offset);
+        if (ret < 4 || strcmp(access, "RW")) {
            goto next_line;
        } else if (!strcmp(type, "FLAT")) {
-            if (matches != 5 || flat_offset < 0) {
+            if (ret != 5 || flat_offset < 0) {
                error_setg(errp, "Invalid extent lines: \n%s", p);
                return -EINVAL;
            }
        } else if (!strcmp(type, "VMFS")) {
-            if (matches == 4) {
+            if (ret == 4) {
                flat_offset = 0;
            } else {
                error_setg(errp, "Invalid extent lines:\n%s", p);
                return -EINVAL;
            }
-        } else if (matches != 4) {
+        } else if (ret != 4) {
            error_setg(errp, "Invalid extent lines:\n%s", p);
            return -EINVAL;
        }
@@ -835,20 +805,11 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
            goto next_line;
        }

-        if (!path_is_absolute(fname) && !path_has_protocol(fname) &&
-            !desc_file_path[0])
-        {
-            error_setg(errp, "Cannot use relative extent paths with VMDK "
-                       "descriptor file '%s'", bs->file->filename);
-            return -EINVAL;
-        }
-
-        extent_path = g_malloc0(PATH_MAX);
-        path_combine(extent_path, PATH_MAX, desc_file_path, fname);
+        path_combine(extent_path, sizeof(extent_path),
+                desc_file_path, fname);
        extent_file = NULL;
        ret = bdrv_open(&extent_file, extent_path, NULL, NULL,
                        bs->open_flags | BDRV_O_PROTOCOL, NULL, errp);
-        g_free(extent_path);
        if (ret) {
            return ret;
        }
@@ -860,7 +821,6 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
            ret = vmdk_add_extent(bs, extent_file, true, sectors,
                            0, 0, 0, 0, 0, &extent, errp);
            if (ret < 0) {
-                bdrv_unref(extent_file);
                return ret;
            }
            extent->flat_start_offset = flat_offset << 9;
@@ -872,15 +832,14 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
            } else {
                ret = vmdk_open_sparse(bs, extent_file, bs->open_flags, buf, errp);
            }
-            g_free(buf);
            if (ret) {
+                g_free(buf);
                bdrv_unref(extent_file);
                return ret;
            }
            extent = &s->extents[s->num_extents - 1];
        } else {
            error_setg(errp, "Unsupported extent type '%s'", type);
-            bdrv_unref(extent_file);
            return -ENOTSUP;
        }
        extent->type = g_strdup(type);
@@ -920,7 +879,7 @@ static int vmdk_open_desc_file(BlockDriverState *bs, int flags, char *buf,
    }
    s->create_type = g_strdup(ct);
    s->desc_offset = 0;
-    ret = vmdk_parse_extents(buf, bs, bs->file->exact_filename, errp);
+    ret = vmdk_parse_extents(buf, bs, bs->file->filename, errp);
 exit:
    return ret;
 }
@@ -928,7 +887,7 @@ exit:
 static int vmdk_open(BlockDriverState *bs, QDict *options, int flags,
                     Error **errp)
 {
-    char *buf;
+    char *buf = NULL;
    int ret;
    BDRVVmdkState *s = bs->opaque;
    uint32_t magic;
@@ -963,9 +922,9 @@ static int vmdk_open(BlockDriverState *bs, QDict *options, int flags,
    qemu_co_mutex_init(&s->lock);

    /* Disable migration when VMDK images are used */
-    error_setg(&s->migration_blocker, "The vmdk format used by node '%s' "
-               "does not support live migration",
-               bdrv_get_device_or_node_name(bs));
+    error_set(&s->migration_blocker,
+              QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
+              "vmdk", bs->device_name, "live migration");
    migrate_add_blocker(s->migration_blocker);
    g_free(buf);
    return 0;
@@ -993,97 +952,57 @@ static void vmdk_refresh_limits(BlockDriverState *bs, Error **errp)
    }
 }

-/**
- * get_whole_cluster
- *
- * Copy backing file's cluster that covers @sector_num, otherwise write zero,
- * to the cluster at @cluster_sector_num.
- *
- * If @skip_start_sector < @skip_end_sector, the relative range
- * [@skip_start_sector, @skip_end_sector) is not copied or written, and leave
- * it for call to write user data in the request.
- */
 static int get_whole_cluster(BlockDriverState *bs,
-                             VmdkExtent *extent,
-                             uint64_t cluster_sector_num,
-                             uint64_t sector_num,
-                             uint64_t skip_start_sector,
-                             uint64_t skip_end_sector)
+                VmdkExtent *extent,
+                uint64_t cluster_offset,
+                uint64_t offset,
+                bool allocate)
 {
    int ret = VMDK_OK;
-    int64_t cluster_bytes;
-    uint8_t *whole_grain;
+    uint8_t *whole_grain = NULL;

-    /* For COW, align request sector_num to cluster start */
-    sector_num = QEMU_ALIGN_DOWN(sector_num, extent->cluster_sectors);
-    cluster_bytes = extent->cluster_sectors << BDRV_SECTOR_BITS;
-    whole_grain = qemu_blockalign(bs, cluster_bytes);
-
-    if (!bs->backing_hd) {
-        memset(whole_grain, 0,  skip_start_sector << BDRV_SECTOR_BITS);
-        memset(whole_grain + (skip_end_sector << BDRV_SECTOR_BITS), 0,
-               cluster_bytes - (skip_end_sector << BDRV_SECTOR_BITS));
-    }
-
-    assert(skip_end_sector <= extent->cluster_sectors);
    /* we will be here if it's first write on non-exist grain(cluster).
     * try to read from parent image, if exist */
-    if (bs->backing_hd && !vmdk_is_cid_valid(bs)) {
-        ret = VMDK_ERROR;
-        goto exit;
-    }
-
-    /* Read backing data before skip range */
-    if (skip_start_sector > 0) {
-        if (bs->backing_hd) {
-            ret = bdrv_read(bs->backing_hd, sector_num,
-                            whole_grain, skip_start_sector);
-            if (ret < 0) {
-                ret = VMDK_ERROR;
-                goto exit;
-            }
+    if (bs->backing_hd) {
+        whole_grain =
+            qemu_blockalign(bs, extent->cluster_sectors << BDRV_SECTOR_BITS);
+        if (!vmdk_is_cid_valid(bs)) {
+            ret = VMDK_ERROR;
+            goto exit;
        }
-        ret = bdrv_write(extent->file, cluster_sector_num, whole_grain,
-                         skip_start_sector);
+
+        /* floor offset to cluster */
+        offset -= offset % (extent->cluster_sectors * 512);
+        ret = bdrv_read(bs->backing_hd, offset >> 9, whole_grain,
+                extent->cluster_sectors);
+        if (ret < 0) {
+            ret = VMDK_ERROR;
+            goto exit;
+        }
+
+        /* Write grain only into the active image */
+        ret = bdrv_write(extent->file, cluster_offset, whole_grain,
+                extent->cluster_sectors);
        if (ret < 0) {
            ret = VMDK_ERROR;
            goto exit;
        }
    }
-    /* Read backing data after skip range */
-    if (skip_end_sector < extent->cluster_sectors) {
-        if (bs->backing_hd) {
-            ret = bdrv_read(bs->backing_hd, sector_num + skip_end_sector,
-                            whole_grain + (skip_end_sector << BDRV_SECTOR_BITS),
-                            extent->cluster_sectors - skip_end_sector);
-            if (ret < 0) {
-                ret = VMDK_ERROR;
-                goto exit;
-            }
-        }
-        ret = bdrv_write(extent->file, cluster_sector_num + skip_end_sector,
-                         whole_grain + (skip_end_sector << BDRV_SECTOR_BITS),
-                         extent->cluster_sectors - skip_end_sector);
-        if (ret < 0) {
-            ret = VMDK_ERROR;
-            goto exit;
-        }
-    }
-
 exit:
    qemu_vfree(whole_grain);
    return ret;
 }

-static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data,
-                         uint32_t offset)
+static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data)
 {
-    offset = cpu_to_le32(offset);
+    uint32_t offset;
+    QEMU_BUILD_BUG_ON(sizeof(offset) != sizeof(m_data->offset));
+    offset = cpu_to_le32(m_data->offset);
    /* update L2 table */
    if (bdrv_pwrite_sync(
                extent->file,
                ((int64_t)m_data->l2_offset * 512)
-                    + (m_data->l2_index * sizeof(offset)),
+                    + (m_data->l2_index * sizeof(m_data->offset)),
                &offset, sizeof(offset)) < 0) {
        return VMDK_ERROR;
    }
@@ -1093,7 +1012,7 @@ static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data,
        if (bdrv_pwrite_sync(
                    extent->file,
                    ((int64_t)m_data->l2_offset * 512)
-                        + (m_data->l2_index * sizeof(offset)),
+                        + (m_data->l2_index * sizeof(m_data->offset)),
                    &offset, sizeof(offset)) < 0) {
            return VMDK_ERROR;
        }
@@ -1105,41 +1024,17 @@ static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data,
    return VMDK_OK;
 }

-/**
- * get_cluster_offset
- *
- * Look up cluster offset in extent file by sector number, and store in
- * @cluster_offset.
- *
- * For flat extents, the start offset as parsed from the description file is
- * returned.
- *
- * For sparse extents, look up in L1, L2 table. If allocate is true, return an
- * offset for a new cluster and update L2 cache. If there is a backing file,
- * COW is done before returning; otherwise, zeroes are written to the allocated
- * cluster. Both COW and zero writing skips the sector range
- * [@skip_start_sector, @skip_end_sector) passed in by caller, because caller
- * has new data to write there.
- *
- * Returns: VMDK_OK if cluster exists and mapped in the image.
- *          VMDK_UNALLOC if cluster is not mapped and @allocate is false.
- *          VMDK_ERROR if failed.
- */
 static int get_cluster_offset(BlockDriverState *bs,
-                              VmdkExtent *extent,
-                              VmdkMetaData *m_data,
-                              uint64_t offset,
-                              bool allocate,
-                              uint64_t *cluster_offset,
-                              uint64_t skip_start_sector,
-                              uint64_t skip_end_sector)
+                                    VmdkExtent *extent,
+                                    VmdkMetaData *m_data,
+                                    uint64_t offset,
+                                    int allocate,
+                                    uint64_t *cluster_offset)
 {
    unsigned int l1_index, l2_offset, l2_index;
    int min_index, i, j;
    uint32_t min_count, *l2_table;
    bool zeroed = false;
-    int64_t ret;
-    int64_t cluster_sector;

    if (m_data) {
        m_data->valid = 0;
@@ -1193,41 +1088,52 @@ static int get_cluster_offset(BlockDriverState *bs,
    extent->l2_cache_counts[min_index] = 1;
 found:
    l2_index = ((offset >> 9) / extent->cluster_sectors) % extent->l2_size;
-    cluster_sector = le32_to_cpu(l2_table[l2_index]);
+    *cluster_offset = le32_to_cpu(l2_table[l2_index]);

    if (m_data) {
        m_data->valid = 1;
        m_data->l1_index = l1_index;
        m_data->l2_index = l2_index;
+        m_data->offset = *cluster_offset;
        m_data->l2_offset = l2_offset;
        m_data->l2_cache_entry = &l2_table[l2_index];
    }
-    if (extent->has_zero_grain && cluster_sector == VMDK_GTE_ZEROED) {
+    if (extent->has_zero_grain && *cluster_offset == VMDK_GTE_ZEROED) {
        zeroed = true;
    }

-    if (!cluster_sector || zeroed) {
+    if (!*cluster_offset || zeroed) {
        if (!allocate) {
            return zeroed ? VMDK_ZEROED : VMDK_UNALLOC;
        }

-        cluster_sector = extent->next_cluster_sector;
-        extent->next_cluster_sector += extent->cluster_sectors;
+        /* Avoid the L2 tables update for the images that have snapshots. */
+        *cluster_offset = bdrv_getlength(extent->file);
+        if (!extent->compressed) {
+            bdrv_truncate(
+                extent->file,
+                *cluster_offset + (extent->cluster_sectors << 9)
+            );
+        }
+
+        *cluster_offset >>= 9;
+        l2_table[l2_index] = cpu_to_le32(*cluster_offset);

        /* First of all we write grain itself, to avoid race condition
         * that may to corrupt the image.
         * This problem may occur because of insufficient space on host disk
         * or inappropriate VM shutdown.
         */
-        ret = get_whole_cluster(bs, extent,
-                                cluster_sector,
-                                offset >> BDRV_SECTOR_BITS,
-                                skip_start_sector, skip_end_sector);
-        if (ret) {
-            return ret;
+        if (get_whole_cluster(
+                bs, extent, *cluster_offset, offset, allocate) == -1) {
+            return VMDK_ERROR;
+        }
+
+        if (m_data) {
+            m_data->offset = *cluster_offset;
        }
    }
-    *cluster_offset = cluster_sector << BDRV_SECTOR_BITS;
+    *cluster_offset <<= 9;
    return VMDK_OK;
 }

@@ -1262,8 +1168,7 @@ static int64_t coroutine_fn vmdk_co_get_block_status(BlockDriverState *bs,
    }
    qemu_co_mutex_lock(&s->lock);
    ret = get_cluster_offset(bs, extent, NULL,
-                             sector_num * 512, false, &offset,
-                             0, 0);
+                            sector_num * 512, 0, &offset);
    qemu_co_mutex_unlock(&s->lock);

    switch (ret) {
@@ -1303,8 +1208,6 @@ static int vmdk_write_extent(VmdkExtent *extent, int64_t cluster_offset,
    uLongf buf_len;
    const uint8_t *write_buf = buf;
    int write_len = nb_sectors * 512;
-    int64_t write_offset;
-    int64_t write_end_sector;

    if (extent->compressed) {
        if (!extent->has_marker) {
@@ -1323,14 +1226,10 @@ static int vmdk_write_extent(VmdkExtent *extent, int64_t cluster_offset,
        write_buf = (uint8_t *)data;
        write_len = buf_len + sizeof(VmdkGrainMarker);
    }
-    write_offset = cluster_offset + offset_in_cluster,
-    ret = bdrv_pwrite(extent->file, write_offset, write_buf, write_len);
-
-    write_end_sector = DIV_ROUND_UP(write_offset + write_len, BDRV_SECTOR_SIZE);
-
-    extent->next_cluster_sector = MAX(extent->next_cluster_sector,
-                                      write_end_sector);
-
+    ret = bdrv_pwrite(extent->file,
+                        cluster_offset + offset_in_cluster,
+                        write_buf,
+                        write_len);
    if (ret != write_len) {
        ret = ret < 0 ? ret : -EIO;
        goto out;
@@ -1422,9 +1321,9 @@ static int vmdk_read(BlockDriverState *bs, int64_t sector_num,
        if (!extent) {
            return -EIO;
        }
-        ret = get_cluster_offset(bs, extent, NULL,
-                                 sector_num << 9, false, &cluster_offset,
-                                 0, 0);
+        ret = get_cluster_offset(
+                            bs, extent, NULL,
+                            sector_num << 9, 0, &cluster_offset);
        extent_begin_sector = extent->end_sector - extent->sectors;
        extent_relative_sector_num = sector_num - extent_begin_sector;
        index_in_cluster = extent_relative_sector_num % extent->cluster_sectors;
@@ -1505,17 +1404,12 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
        if (!extent) {
            return -EIO;
        }
-        extent_begin_sector = extent->end_sector - extent->sectors;
-        extent_relative_sector_num = sector_num - extent_begin_sector;
-        index_in_cluster = extent_relative_sector_num % extent->cluster_sectors;
-        n = extent->cluster_sectors - index_in_cluster;
-        if (n > nb_sectors) {
-            n = nb_sectors;
-        }
-        ret = get_cluster_offset(bs, extent, &m_data, sector_num << 9,
-                                 !(extent->compressed || zeroed),
-                                 &cluster_offset,
-                                 index_in_cluster, index_in_cluster + n);
+        ret = get_cluster_offset(
+                                bs,
+                                extent,
+                                &m_data,
+                                sector_num << 9, !extent->compressed,
+                                &cluster_offset);
        if (extent->compressed) {
            if (ret == VMDK_OK) {
                /* Refuse write to allocated cluster for streamOptimized */
@@ -1524,13 +1418,24 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
                return -EIO;
            } else {
                /* allocate */
-                ret = get_cluster_offset(bs, extent, &m_data, sector_num << 9,
-                                         true, &cluster_offset, 0, 0);
+                ret = get_cluster_offset(
+                                        bs,
+                                        extent,
+                                        &m_data,
+                                        sector_num << 9, 1,
+                                        &cluster_offset);
            }
        }
        if (ret == VMDK_ERROR) {
            return -EINVAL;
        }
+        extent_begin_sector = extent->end_sector - extent->sectors;
+        extent_relative_sector_num = sector_num - extent_begin_sector;
+        index_in_cluster = extent_relative_sector_num % extent->cluster_sectors;
+        n = extent->cluster_sectors - index_in_cluster;
+        if (n > nb_sectors) {
+            n = nb_sectors;
+        }
        if (zeroed) {
            /* Do zeroed write, buf is ignored */
            if (extent->has_zero_grain &&
@@ -1538,9 +1443,9 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
                    n >= extent->cluster_sectors) {
                n = extent->cluster_sectors;
                if (!zero_dry_run) {
+                    m_data.offset = VMDK_GTE_ZEROED;
                    /* update L2 tables */
-                    if (vmdk_L2update(extent, &m_data, VMDK_GTE_ZEROED)
-                            != VMDK_OK) {
+                    if (vmdk_L2update(extent, &m_data) != VMDK_OK) {
                        return -EIO;
                    }
                }
@@ -1556,9 +1461,7 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
            }
            if (m_data.valid) {
                /* update L2 tables */
-                if (vmdk_L2update(extent, &m_data,
-                                  cluster_offset >> BDRV_SECTOR_BITS)
-                        != VMDK_OK) {
+                if (vmdk_L2update(extent, &m_data) != VMDK_OK) {
                    return -EIO;
                }
            }
@@ -1570,7 +1473,7 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
        /* update CID on the first write every time the virtual disk is
         * opened */
        if (!s->cid_updated) {
-            ret = vmdk_write_cid(bs, g_random_int());
+            ret = vmdk_write_cid(bs, time(NULL));
            if (ret < 0) {
                return ret;
            }
@@ -1804,15 +1707,10 @@ static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp)
    int ret = 0;
    bool flat, split, compress;
    GString *ext_desc_lines;
-    char *path = g_malloc0(PATH_MAX);
-    char *prefix = g_malloc0(PATH_MAX);
-    char *postfix = g_malloc0(PATH_MAX);
-    char *desc_line = g_malloc0(BUF_SIZE);
-    char *ext_filename = g_malloc0(PATH_MAX);
-    char *desc_filename = g_malloc0(PATH_MAX);
+    char path[PATH_MAX], prefix[PATH_MAX], postfix[PATH_MAX];
    const int64_t split_size = 0x80000000;  /* VMDK has constant split size */
    const char *desc_extent_line;
-    char *parent_desc_line = g_malloc0(BUF_SIZE);
+    char parent_desc_line[BUF_SIZE] = "";
    uint32_t parent_cid = 0xffffffff;
    uint32_t number_heads = 16;
    bool zeroed_grain = false;
@@ -1844,8 +1742,7 @@ static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp)
        goto exit;
    }
    /* Read out options */
-    total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
-                          BDRV_SECTOR_SIZE);
+    total_size = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0);
    adapter_type = qemu_opt_get_del(opts, BLOCK_OPT_ADAPTER_TYPE);
    backing_file = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FILE);
    if (qemu_opt_get_bool_del(opts, BLOCK_OPT_COMPAT6, false)) {
@@ -1905,19 +1802,8 @@ static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp)
    }
    if (backing_file) {
        BlockDriverState *bs = NULL;
-        char *full_backing = g_new0(char, PATH_MAX);
-        bdrv_get_full_backing_filename_from_filename(filename, backing_file,
-                                                     full_backing, PATH_MAX,
-                                                     &local_err);
-        if (local_err) {
-            g_free(full_backing);
-            error_propagate(errp, local_err);
-            ret = -ENOENT;
-            goto exit;
-        }
-        ret = bdrv_open(&bs, full_backing, NULL, NULL, BDRV_O_NO_BACKING, NULL,
+        ret = bdrv_open(&bs, backing_file, NULL, NULL, BDRV_O_NO_BACKING, NULL,
                        errp);
-        g_free(full_backing);
        if (ret != 0) {
            goto exit;
        }
@@ -1928,27 +1814,33 @@ static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp)
        }
        parent_cid = vmdk_read_cid(bs, 0);
        bdrv_unref(bs);
-        snprintf(parent_desc_line, BUF_SIZE,
+        snprintf(parent_desc_line, sizeof(parent_desc_line),
                "parentFileNameHint=\"%s\"", backing_file);
    }

    /* Create extents */
    filesize = total_size;
    while (filesize > 0) {
+        char desc_line[BUF_SIZE];
+        char ext_filename[PATH_MAX];
+        char desc_filename[PATH_MAX];
        int64_t size = filesize;

        if (split && size > split_size) {
            size = split_size;
        }
        if (split) {
-            snprintf(desc_filename, PATH_MAX, "%s-%c%03d%s",
+            snprintf(desc_filename, sizeof(desc_filename), "%s-%c%03d%s",
                    prefix, flat ? 'f' : 's', ++idx, postfix);
        } else if (flat) {
-            snprintf(desc_filename, PATH_MAX, "%s-flat%s", prefix, postfix);
+            snprintf(desc_filename, sizeof(desc_filename), "%s-flat%s",
+                    prefix, postfix);
        } else {
-            snprintf(desc_filename, PATH_MAX, "%s%s", prefix, postfix);
+            snprintf(desc_filename, sizeof(desc_filename), "%s%s",
+                    prefix, postfix);
        }
-        snprintf(ext_filename, PATH_MAX, "%s%s", path, desc_filename);
+        snprintf(ext_filename, sizeof(ext_filename), "%s%s",
+                path, desc_filename);

        if (vmdk_create_extent(ext_filename, size,
                               flat, compress, zeroed_grain, opts, errp)) {
@@ -1958,13 +1850,13 @@ static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp)
        filesize -= size;

        /* Format description line */
-        snprintf(desc_line, BUF_SIZE,
+        snprintf(desc_line, sizeof(desc_line),
                    desc_extent_line, size / BDRV_SECTOR_SIZE, desc_filename);
        g_string_append(ext_desc_lines, desc_line);
    }
    /* generate descriptor file */
    desc = g_strdup_printf(desc_template,
-                           g_random_int(),
+                           (uint32_t)time(NULL),
                           parent_cid,
                           fmt,
                           parent_desc_line,
@@ -2013,13 +1905,6 @@ exit:
    g_free(backing_file);
    g_free(fmt);
    g_free(desc);
-    g_free(path);
-    g_free(prefix);
-    g_free(postfix);
-    g_free(desc_line);
-    g_free(ext_filename);
-    g_free(desc_filename);
-    g_free(parent_desc_line);
    g_string_free(ext_desc_lines, true);
    return ret;
 }
@@ -2114,7 +1999,7 @@ static int vmdk_check(BlockDriverState *bs, BdrvCheckResult *result,
    BDRVVmdkState *s = bs->opaque;
    VmdkExtent *extent = NULL;
    int64_t sector_num = 0;
-    int64_t total_sectors = bdrv_nb_sectors(bs);
+    int64_t total_sectors = bdrv_getlength(bs) / BDRV_SECTOR_SIZE;
    int ret;
    uint64_t cluster_offset;

@@ -2135,7 +2020,7 @@ static int vmdk_check(BlockDriverState *bs, BdrvCheckResult *result,
        }
        ret = get_cluster_offset(bs, extent, NULL,
                                 sector_num << BDRV_SECTOR_BITS,
-                                 false, &cluster_offset, 0, 0);
+                                 0, &cluster_offset);
        if (ret == VMDK_ERROR) {
            fprintf(stderr,
                    "ERROR: could not get cluster_offset for sector %"
@@ -2186,29 +2071,23 @@ static ImageInfoSpecific *vmdk_get_specific_info(BlockDriverState *bs)
    return spec_info;
 }

-static bool vmdk_extents_type_eq(const VmdkExtent *a, const VmdkExtent *b)
-{
-    return a->flat == b->flat &&
-           a->compressed == b->compressed &&
-           (a->flat || a->cluster_sectors == b->cluster_sectors);
-}
-
 static int vmdk_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
 {
    int i;
    BDRVVmdkState *s = bs->opaque;
    assert(s->num_extents);
-
-    /* See if we have multiple extents but they have different cases */
-    for (i = 1; i < s->num_extents; i++) {
-        if (!vmdk_extents_type_eq(&s->extents[0], &s->extents[i])) {
-            return -ENOTSUP;
-        }
-    }
    bdi->needs_compressed_writes = s->extents[0].compressed;
    if (!s->extents[0].flat) {
        bdi->cluster_size = s->extents[0].cluster_sectors << BDRV_SECTOR_BITS;
    }
+    /* See if we have multiple extents but they have different cases */
+    for (i = 1; i < s->num_extents; i++) {
+        if (bdi->needs_compressed_writes != s->extents[i].compressed ||
+            (bdi->cluster_size && bdi->cluster_size !=
+                s->extents[i].cluster_sectors << BDRV_SECTOR_BITS)) {
+            return -ENOTSUP;
+        }
+    }
    return 0;
 }

--- a/block/vpc.c
+++ b/block/vpc.c
@@ -29,6 +29,13 @@
 #if defined(CONFIG_UUID)
 #include <uuid/uuid.h>
 #endif
+#ifdef __linux__
+#include <linux/fs.h>
+#include <sys/ioctl.h>
+#ifndef FS_NOCOW_FL
+#define FS_NOCOW_FL                     0x00800000 /* Do not cow file */
+#endif
+#endif

 /**************************************************************/

@@ -46,7 +53,6 @@ enum vhd_type {
 #define VHD_TIMESTAMP_BASE 946684800

 #define VHD_MAX_SECTORS       (65535LL * 255 * 255)
-#define VHD_MAX_GEOMETRY      (65535LL *  16 * 255)

 // always big-endian
 typedef struct vhd_footer {
@@ -66,7 +72,7 @@ typedef struct vhd_footer {
    char        creator_os[4]; // "Wi2k"

    uint64_t    orig_size;
-    uint64_t    current_size;
+    uint64_t    size;

    uint16_t    cyls;
    uint8_t     heads;
@@ -208,7 +214,7 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
            "incorrect.\n", bs->filename);

    /* Write 'checksum' back to footer, or else will leave it with zero. */
-    footer->checksum = cpu_to_be32(checksum);
+    footer->checksum = be32_to_cpu(checksum);

    // The visible size of a image in Virtual PC depends on the geometry
    // rather than on the size stored in the footer (the size in the footer
@@ -216,12 +222,13 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
    bs->total_sectors = (int64_t)
        be16_to_cpu(footer->cyls) * footer->heads * footer->secs_per_cyl;

-    /* Images that have exactly the maximum geometry are probably bigger and
-     * would be truncated if we adhered to the geometry for them. Rely on
-     * footer->current_size for them. */
-    if (bs->total_sectors == VHD_MAX_GEOMETRY) {
-        bs->total_sectors = be64_to_cpu(footer->current_size) /
-                            BDRV_SECTOR_SIZE;
+    /* images created with disk2vhd report a far higher virtual size
+     * than expected with the cyls * heads * sectors_per_cyl formula.
+     * use the footer->size instead if the image was created with
+     * disk2vhd.
+     */
+    if (!strncmp(footer->creator_app, "d2v", 4)) {
+        bs->total_sectors = be64_to_cpu(footer->size) / BDRV_SECTOR_SIZE;
    }

    /* Allow a maximum disk size of approximately 2 TB */
@@ -269,11 +276,7 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
            goto fail;
        }

-        s->pagetable = qemu_try_blockalign(bs->file, s->max_table_entries * 4);
-        if (s->pagetable == NULL) {
-            ret = -ENOMEM;
-            goto fail;
-        }
+        s->pagetable = qemu_blockalign(bs, s->max_table_entries * 4);

        s->bat_offset = be64_to_cpu(dyndisk_header->table_offset);

@@ -318,9 +321,9 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
    qemu_co_mutex_init(&s->lock);

    /* Disable migration when VHD images are used */
-    error_setg(&s->migration_blocker, "The vpc format used by node '%s' "
-               "does not support live migration",
-               bdrv_get_device_or_node_name(bs));
+    error_set(&s->migration_blocker,
+              QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
+              "vpc", bs->device_name, "live migration");
    migrate_add_blocker(s->migration_blocker);

    return 0;
@@ -376,6 +379,38 @@ static inline int64_t get_sector_offset(BlockDriverState *bs,
        bdrv_pwrite_sync(bs->file, bitmap_offset, bitmap, s->bitmap_size);
    }

+//    printf("sector: %" PRIx64 ", index: %x, offset: %x, bioff: %" PRIx64 ", bloff: %" PRIx64 "\n",
+//	sector_num, pagetable_index, pageentry_index,
+//	bitmap_offset, block_offset);
+
+// disabled by reason
+#if 0
+#ifdef CACHE
+    if (bitmap_offset != s->last_bitmap)
+    {
+	lseek(s->fd, bitmap_offset, SEEK_SET);
+
+	s->last_bitmap = bitmap_offset;
+
+	// Scary! Bitmap is stored as big endian 32bit entries,
+	// while we used to look it up byte by byte
+	read(s->fd, s->pageentry_u8, 512);
+	for (i = 0; i < 128; i++)
+	    be32_to_cpus(&s->pageentry_u32[i]);
+    }
+
+    if ((s->pageentry_u8[pageentry_index / 8] >> (pageentry_index % 8)) & 1)
+	return -1;
+#else
+    lseek(s->fd, bitmap_offset + (pageentry_index / 8), SEEK_SET);
+
+    read(s->fd, &bitmap_entry, 1);
+
+    if ((bitmap_entry >> (pageentry_index % 8)) & 1)
+	return -1; // not allocated
+#endif
+#endif
+
    return block_offset;
 }

@@ -440,7 +475,7 @@ static int64_t alloc_block(BlockDriverState* bs, int64_t sector_num)

    // Write BAT entry to disk
    bat_offset = s->bat_offset + (4 * index);
-    bat_value = cpu_to_be32(s->pagetable[index]);
+    bat_value = be32_to_cpu(s->pagetable[index]);
    ret = bdrv_pwrite_sync(bs->file, bat_offset, &bat_value, 4);
    if (ret < 0)
        goto fail;
@@ -457,7 +492,7 @@ static int vpc_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
    BDRVVPCState *s = (BDRVVPCState *)bs->opaque;
    VHDFooter *footer = (VHDFooter *) s->footer_buf;

-    if (be32_to_cpu(footer->type) != VHD_FIXED) {
+    if (cpu_to_be32(footer->type) != VHD_FIXED) {
        bdi->cluster_size = s->block_size;
    }

@@ -474,7 +509,7 @@ static int vpc_read(BlockDriverState *bs, int64_t sector_num,
    int64_t sectors, sectors_per_block;
    VHDFooter *footer = (VHDFooter *) s->footer_buf;

-    if (be32_to_cpu(footer->type) == VHD_FIXED) {
+    if (cpu_to_be32(footer->type) == VHD_FIXED) {
        return bdrv_read(bs->file, sector_num, buf, nb_sectors);
    }
    while (nb_sectors > 0) {
@@ -523,7 +558,7 @@ static int vpc_write(BlockDriverState *bs, int64_t sector_num,
    int ret;
    VHDFooter *footer =  (VHDFooter *) s->footer_buf;

-    if (be32_to_cpu(footer->type) == VHD_FIXED) {
+    if (cpu_to_be32(footer->type) == VHD_FIXED) {
        return bdrv_write(bs->file, sector_num, buf, nb_sectors);
    }
    while (nb_sectors > 0) {
@@ -565,49 +600,6 @@ static coroutine_fn int vpc_co_write(BlockDriverState *bs, int64_t sector_num,
    return ret;
 }

-static int64_t coroutine_fn vpc_co_get_block_status(BlockDriverState *bs,
-        int64_t sector_num, int nb_sectors, int *pnum)
-{
-    BDRVVPCState *s = bs->opaque;
-    VHDFooter *footer = (VHDFooter*) s->footer_buf;
-    int64_t start, offset;
-    bool allocated;
-    int n;
-
-    if (be32_to_cpu(footer->type) == VHD_FIXED) {
-        *pnum = nb_sectors;
-        return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID | BDRV_BLOCK_DATA |
-               (sector_num << BDRV_SECTOR_BITS);
-    }
-
-    offset = get_sector_offset(bs, sector_num, 0);
-    start = offset;
-    allocated = (offset != -1);
-    *pnum = 0;
-
-    do {
-        /* All sectors in a block are contiguous (without using the bitmap) */
-        n = ROUND_UP(sector_num + 1, s->block_size / BDRV_SECTOR_SIZE)
-          - sector_num;
-        n = MIN(n, nb_sectors);
-
-        *pnum += n;
-        sector_num += n;
-        nb_sectors -= n;
-        /* *pnum can't be greater than one block for allocated
-         * sectors since there is always a bitmap in between. */
-        if (allocated) {
-            return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | start;
-        }
-        if (nb_sectors == 0) {
-            break;
-        }
-        offset = get_sector_offset(bs, sector_num, 0);
-    } while (offset == -1);
-
-    return 0;
-}
-
 /*
 * Calculates the number of cylinders, heads and sectors per cylinder
 * based on a given number of sectors. This is the algorithm described
@@ -625,20 +617,26 @@ static int calculate_geometry(int64_t total_sectors, uint16_t* cyls,
 {
    uint32_t cyls_times_heads;

-    total_sectors = MIN(total_sectors, VHD_MAX_GEOMETRY);
+    /* Allow a maximum disk size of approximately 2 TB */
+    if (total_sectors > 65535LL * 255 * 255) {
+        return -EFBIG;
+    }

-    if (total_sectors >= 65535LL * 16 * 63) {
+    if (total_sectors > 65535 * 16 * 63) {
        *secs_per_cyl = 255;
-        *heads = 16;
+        if (total_sectors > 65535 * 16 * 255) {
+            *heads = 255;
+        } else {
+            *heads = 16;
+        }
        cyls_times_heads = total_sectors / *secs_per_cyl;
    } else {
        *secs_per_cyl = 17;
        cyls_times_heads = total_sectors / *secs_per_cyl;
        *heads = (cyls_times_heads + 1023) / 1024;

-        if (*heads < 4) {
+        if (*heads < 4)
            *heads = 4;
-        }

        if (cyls_times_heads >= (*heads * 1024) || *heads > 16) {
            *secs_per_cyl = 31;
@@ -658,41 +656,39 @@ static int calculate_geometry(int64_t total_sectors, uint16_t* cyls,
    return 0;
 }

-static int create_dynamic_disk(BlockDriverState *bs, uint8_t *buf,
-                               int64_t total_sectors)
+static int create_dynamic_disk(int fd, uint8_t *buf, int64_t total_sectors)
 {
    VHDDynDiskHeader *dyndisk_header =
        (VHDDynDiskHeader *) buf;
    size_t block_size, num_bat_entries;
    int i;
-    int ret;
-    int64_t offset = 0;
+    int ret = -EIO;

    // Write the footer (twice: at the beginning and at the end)
    block_size = 0x200000;
    num_bat_entries = (total_sectors + block_size / 512) / (block_size / 512);

-    ret = bdrv_pwrite_sync(bs, offset, buf, HEADER_SIZE);
-    if (ret) {
+    if (write(fd, buf, HEADER_SIZE) != HEADER_SIZE) {
        goto fail;
    }

-    offset = 1536 + ((num_bat_entries * 4 + 511) & ~511);
-    ret = bdrv_pwrite_sync(bs, offset, buf, HEADER_SIZE);
-    if (ret < 0) {
+    if (lseek(fd, 1536 + ((num_bat_entries * 4 + 511) & ~511), SEEK_SET) < 0) {
+        goto fail;
+    }
+    if (write(fd, buf, HEADER_SIZE) != HEADER_SIZE) {
        goto fail;
    }

    // Write the initial BAT
-    offset = 3 * 512;
+    if (lseek(fd, 3 * 512, SEEK_SET) < 0) {
+        goto fail;
+    }

    memset(buf, 0xFF, 512);
    for (i = 0; i < (num_bat_entries * 4 + 511) / 512; i++) {
-        ret = bdrv_pwrite_sync(bs, offset, buf, 512);
-        if (ret < 0) {
+        if (write(fd, buf, 512) != 512) {
            goto fail;
        }
-        offset += 512;
    }

    // Prepare the Dynamic Disk Header
@@ -704,44 +700,48 @@ static int create_dynamic_disk(BlockDriverState *bs, uint8_t *buf,
     * Note: The spec is actually wrong here for data_offset, it says
     * 0xFFFFFFFF, but MS tools expect all 64 bits to be set.
     */
-    dyndisk_header->data_offset = cpu_to_be64(0xFFFFFFFFFFFFFFFFULL);
-    dyndisk_header->table_offset = cpu_to_be64(3 * 512);
-    dyndisk_header->version = cpu_to_be32(0x00010000);
-    dyndisk_header->block_size = cpu_to_be32(block_size);
-    dyndisk_header->max_table_entries = cpu_to_be32(num_bat_entries);
+    dyndisk_header->data_offset = be64_to_cpu(0xFFFFFFFFFFFFFFFFULL);
+    dyndisk_header->table_offset = be64_to_cpu(3 * 512);
+    dyndisk_header->version = be32_to_cpu(0x00010000);
+    dyndisk_header->block_size = be32_to_cpu(block_size);
+    dyndisk_header->max_table_entries = be32_to_cpu(num_bat_entries);

-    dyndisk_header->checksum = cpu_to_be32(vpc_checksum(buf, 1024));
+    dyndisk_header->checksum = be32_to_cpu(vpc_checksum(buf, 1024));

    // Write the header
-    offset = 512;
-
-    ret = bdrv_pwrite_sync(bs, offset, buf, 1024);
-    if (ret < 0) {
+    if (lseek(fd, 512, SEEK_SET) < 0) {
        goto fail;
    }

+    if (write(fd, buf, 1024) != 1024) {
+        goto fail;
+    }
+    ret = 0;
+
 fail:
    return ret;
 }

-static int create_fixed_disk(BlockDriverState *bs, uint8_t *buf,
-                             int64_t total_size)
+static int create_fixed_disk(int fd, uint8_t *buf, int64_t total_size)
 {
-    int ret;
+    int ret = -EIO;

    /* Add footer to total size */
-    total_size += HEADER_SIZE;
-
-    ret = bdrv_truncate(bs, total_size);
-    if (ret < 0) {
-        return ret;
+    total_size += 512;
+    if (ftruncate(fd, total_size) != 0) {
+        ret = -errno;
+        goto fail;
+    }
+    if (lseek(fd, -512, SEEK_END) < 0) {
+        goto fail;
+    }
+    if (write(fd, buf, HEADER_SIZE) != HEADER_SIZE) {
+        goto fail;
    }

-    ret = bdrv_pwrite_sync(bs, total_size - HEADER_SIZE, buf, HEADER_SIZE);
-    if (ret < 0) {
-        return ret;
-    }
+    ret = 0;

+ fail:
    return ret;
 }

@@ -750,7 +750,7 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp)
    uint8_t buf[1024];
    VHDFooter *footer = (VHDFooter *) buf;
    char *disk_type_param;
-    int i;
+    int fd, i;
    uint16_t cyls = 0;
    uint8_t heads = 0;
    uint8_t secs_per_cyl = 0;
@@ -758,12 +758,10 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp)
    int64_t total_size;
    int disk_type;
    int ret = -EIO;
-    Error *local_err = NULL;
-    BlockDriverState *bs = NULL;
+    bool nocow = false;

    /* Read out options */
-    total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
-                          BDRV_SECTOR_SIZE);
+    total_size = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0);
    disk_type_param = qemu_opt_get_del(opts, BLOCK_OPT_SUBFMT);
    if (disk_type_param) {
        if (!strcmp(disk_type_param, "dynamic")) {
@@ -777,44 +775,46 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp)
    } else {
        disk_type = VHD_DYNAMIC;
    }
+    nocow = qemu_opt_get_bool_del(opts, BLOCK_OPT_NOCOW, false);

-    ret = bdrv_create_file(filename, opts, &local_err);
-    if (ret < 0) {
-        error_propagate(errp, local_err);
+    /* Create the file */
+    fd = qemu_open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0644);
+    if (fd < 0) {
+        ret = -EIO;
        goto out;
    }
-    ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL,
-                    NULL, &local_err);
-    if (ret < 0) {
-        error_propagate(errp, local_err);
-        goto out;
+
+    if (nocow) {
+#ifdef __linux__
+        /* Set NOCOW flag to solve performance issue on fs like btrfs.
+         * This is an optimisation. The FS_IOC_SETFLAGS ioctl return value will
+         * be ignored since any failure of this operation should not block the
+         * left work.
+         */
+        int attr;
+        if (ioctl(fd, FS_IOC_GETFLAGS, &attr) == 0) {
+            attr |= FS_NOCOW_FL;
+            ioctl(fd, FS_IOC_SETFLAGS, &attr);
+        }
+#endif
    }

    /*
     * Calculate matching total_size and geometry. Increase the number of
     * sectors requested until we get enough (or fail). This ensures that
     * qemu-img convert doesn't truncate images, but rather rounds up.
-     *
-     * If the image size can't be represented by a spec conform CHS geometry,
-     * we set the geometry to 65535 x 16 x 255 (CxHxS) sectors and use
-     * the image size from the VHD footer to calculate total_sectors.
     */
-    total_sectors = MIN(VHD_MAX_GEOMETRY, total_size / BDRV_SECTOR_SIZE);
+    total_sectors = total_size / BDRV_SECTOR_SIZE;
    for (i = 0; total_sectors > (int64_t)cyls * heads * secs_per_cyl; i++) {
-        calculate_geometry(total_sectors + i, &cyls, &heads, &secs_per_cyl);
+        if (calculate_geometry(total_sectors + i, &cyls, &heads,
+                               &secs_per_cyl))
+        {
+            ret = -EFBIG;
+            goto fail;
+        }
    }

-    if ((int64_t)cyls * heads * secs_per_cyl == VHD_MAX_GEOMETRY) {
-        total_sectors = total_size / BDRV_SECTOR_SIZE;
-        /* Allow a maximum disk size of approximately 2 TB */
-        if (total_sectors > VHD_MAX_SECTORS) {
-            ret = -EFBIG;
-            goto out;
-        }
-    } else {
-        total_sectors = (int64_t)cyls * heads * secs_per_cyl;
-        total_size = total_sectors * BDRV_SECTOR_SIZE;
-    }
+    total_sectors = (int64_t) cyls * heads * secs_per_cyl;

    /* Prepare the Hard Disk Footer */
    memset(buf, 0, 1024);
@@ -824,40 +824,46 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp)
    memcpy(footer->creator_app, "qemu", 4);
    memcpy(footer->creator_os, "Wi2k", 4);

-    footer->features = cpu_to_be32(0x02);
-    footer->version = cpu_to_be32(0x00010000);
+    footer->features = be32_to_cpu(0x02);
+    footer->version = be32_to_cpu(0x00010000);
    if (disk_type == VHD_DYNAMIC) {
-        footer->data_offset = cpu_to_be64(HEADER_SIZE);
+        footer->data_offset = be64_to_cpu(HEADER_SIZE);
    } else {
-        footer->data_offset = cpu_to_be64(0xFFFFFFFFFFFFFFFFULL);
+        footer->data_offset = be64_to_cpu(0xFFFFFFFFFFFFFFFFULL);
    }
-    footer->timestamp = cpu_to_be32(time(NULL) - VHD_TIMESTAMP_BASE);
+    footer->timestamp = be32_to_cpu(time(NULL) - VHD_TIMESTAMP_BASE);

    /* Version of Virtual PC 2007 */
-    footer->major = cpu_to_be16(0x0005);
-    footer->minor = cpu_to_be16(0x0003);
-    footer->orig_size = cpu_to_be64(total_size);
-    footer->current_size = cpu_to_be64(total_size);
-    footer->cyls = cpu_to_be16(cyls);
+    footer->major = be16_to_cpu(0x0005);
+    footer->minor = be16_to_cpu(0x0003);
+    if (disk_type == VHD_DYNAMIC) {
+        footer->orig_size = be64_to_cpu(total_sectors * 512);
+        footer->size = be64_to_cpu(total_sectors * 512);
+    } else {
+        footer->orig_size = be64_to_cpu(total_size);
+        footer->size = be64_to_cpu(total_size);
+    }
+    footer->cyls = be16_to_cpu(cyls);
    footer->heads = heads;
    footer->secs_per_cyl = secs_per_cyl;

-    footer->type = cpu_to_be32(disk_type);
+    footer->type = be32_to_cpu(disk_type);

 #if defined(CONFIG_UUID)
    uuid_generate(footer->uuid);
 #endif

-    footer->checksum = cpu_to_be32(vpc_checksum(buf, HEADER_SIZE));
+    footer->checksum = be32_to_cpu(vpc_checksum(buf, HEADER_SIZE));

    if (disk_type == VHD_DYNAMIC) {
-        ret = create_dynamic_disk(bs, buf, total_sectors);
+        ret = create_dynamic_disk(fd, buf, total_sectors);
    } else {
-        ret = create_fixed_disk(bs, buf, total_size);
+        ret = create_fixed_disk(fd, buf, total_size);
    }

+fail:
+    qemu_close(fd);
 out:
-    bdrv_unref(bs);
    g_free(disk_type_param);
    return ret;
 }
@@ -867,7 +873,7 @@ static int vpc_has_zero_init(BlockDriverState *bs)
    BDRVVPCState *s = bs->opaque;
    VHDFooter *footer =  (VHDFooter *) s->footer_buf;

-    if (be32_to_cpu(footer->type) == VHD_FIXED) {
+    if (cpu_to_be32(footer->type) == VHD_FIXED) {
        return bdrv_has_zero_init(bs->file);
    } else {
        return 1;
@@ -902,6 +908,11 @@ static QemuOptsList vpc_create_opts = {
                "Type of virtual hard disk format. Supported formats are "
                "{dynamic (default) | fixed} "
        },
+        {
+            .name = BLOCK_OPT_NOCOW,
+            .type = QEMU_OPT_BOOL,
+            .help = "Turn off copy-on-write (valid only on btrfs)"
+        },
        { /* end of list */ }
    }
 };
@@ -916,9 +927,8 @@ static BlockDriver bdrv_vpc = {
    .bdrv_reopen_prepare    = vpc_reopen_prepare,
    .bdrv_create            = vpc_create,

-    .bdrv_read                  = vpc_co_read,
-    .bdrv_write                 = vpc_co_write,
-    .bdrv_co_get_block_status   = vpc_co_get_block_status,
+    .bdrv_read              = vpc_co_read,
+    .bdrv_write             = vpc_co_write,

    .bdrv_get_info          = vpc_get_info,

--- a/block/vvfat.c
+++ b/block/vvfat.c
@@ -52,6 +52,10 @@

 #define DLOG(a) a

+#undef stderr
+#define stderr STDERR
+FILE* stderr = NULL;
+
 static void checkpoint(void);

 #ifdef __MINGW32__
@@ -728,7 +732,7 @@ static int read_directory(BDRVVVFATState* s, int mapping_index)
 	if(first_cluster == 0 && (is_dotdot || is_dot))
 	    continue;

-	buffer = g_malloc(length);
+	buffer=(char*)g_malloc(length);
 	snprintf(buffer,length,"%s/%s",dirname,entry->d_name);

 	if(stat(buffer,&st)<0) {
@@ -763,7 +767,7 @@ static int read_directory(BDRVVVFATState* s, int mapping_index)

 	/* create mapping for this file */
 	if(!is_dot && !is_dotdot && (S_ISDIR(st.st_mode) || st.st_size)) {
-	    s->current_mapping = array_get_next(&(s->mapping));
+	    s->current_mapping=(mapping_t*)array_get_next(&(s->mapping));
 	    s->current_mapping->begin=0;
 	    s->current_mapping->end=st.st_size;
 	    /*
@@ -807,12 +811,12 @@ static int read_directory(BDRVVVFATState* s, int mapping_index)
    }

     /* reget the mapping, since s->mapping was possibly realloc()ed */
-    mapping = array_get(&(s->mapping), mapping_index);
+    mapping = (mapping_t*)array_get(&(s->mapping), mapping_index);
    first_cluster += (s->directory.next - mapping->info.dir.first_dir_index)
 	* 0x20 / s->cluster_size;
    mapping->end = first_cluster;

-    direntry = array_get(&(s->directory), mapping->dir_index);
+    direntry = (direntry_t*)array_get(&(s->directory), mapping->dir_index);
    set_begin_of_direntry(direntry, mapping->begin);

    return 0;
@@ -1078,6 +1082,11 @@ static int vvfat_open(BlockDriverState *bs, QDict *options, int flags,
    vvv = s;
 #endif

+DLOG(if (stderr == NULL) {
+    stderr = fopen("vvfat.log", "a");
+    setbuf(stderr, NULL);
+})
+
    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
    qemu_opts_absorb_qdict(opts, options, &local_err);
    if (local_err) {
@@ -1180,10 +1189,9 @@ static int vvfat_open(BlockDriverState *bs, QDict *options, int flags,

    /* Disable migration when vvfat is used rw */
    if (s->qcow) {
-        error_setg(&s->migration_blocker,
-                   "The vvfat (rw) format used by node '%s' "
-                   "does not support live migration",
-                   bdrv_get_device_or_node_name(bs));
+        error_set(&s->migration_blocker,
+                  QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
+                  "vvfat (rw)", bs->device_name, "live migration");
        migrate_add_blocker(s->migration_blocker);
    }

@@ -2910,8 +2918,8 @@ static int enable_write_target(BDRVVVFATState *s, Error **errp)

    array_init(&(s->commits), sizeof(commit_t));

-    s->qcow_filename = g_malloc(PATH_MAX);
-    ret = get_tmp_filename(s->qcow_filename, PATH_MAX);
+    s->qcow_filename = g_malloc(1024);
+    ret = get_tmp_filename(s->qcow_filename, 1024);
    if (ret < 0) {
        error_setg_errno(errp, -ret, "can't create temporary file");
        goto err;
@@ -2925,9 +2933,8 @@ static int enable_write_target(BDRVVVFATState *s, Error **errp)
    }

    opts = qemu_opts_create(bdrv_qcow->create_opts, NULL, 0, &error_abort);
-    qemu_opt_set_number(opts, BLOCK_OPT_SIZE, s->sector_count * 512,
-                        &error_abort);
-    qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, "fat:", &error_abort);
+    qemu_opt_set_number(opts, BLOCK_OPT_SIZE, s->sector_count * 512);
+    qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, "fat:");

    ret = bdrv_create(bdrv_qcow, s->qcow_filename, opts, errp);
    qemu_opts_del(opts);
@@ -2947,9 +2954,9 @@ static int enable_write_target(BDRVVVFATState *s, Error **errp)
    unlink(s->qcow_filename);
 #endif

-    bdrv_set_backing_hd(s->bs, bdrv_new());
+    bdrv_set_backing_hd(s->bs, bdrv_new("", &error_abort));
    s->bs->backing_hd->drv = &vvfat_write_target;
-    s->bs->backing_hd->opaque = g_new(void *, 1);
+    s->bs->backing_hd->opaque = g_malloc(sizeof(void*));
    *(void**)s->bs->backing_hd->opaque = s;

    return 0;
--- a/block/win32-aio.c
+++ b/block/win32-aio.c
@@ -44,7 +44,7 @@ struct QEMUWin32AIOState {
 };

 typedef struct QEMUWin32AIOCB {
-    BlockAIOCB common;
+    BlockDriverAIOCB common;
    struct QEMUWin32AIOState *ctx;
    int nbytes;
    OVERLAPPED ov;
@@ -88,7 +88,7 @@ static void win32_aio_process_completion(QEMUWin32AIOState *s,


    waiocb->common.cb(waiocb->common.opaque, ret);
-    qemu_aio_unref(waiocb);
+    qemu_aio_release(waiocb);
 }

 static void win32_aio_completion_cb(EventNotifier *e)
@@ -106,14 +106,28 @@ static void win32_aio_completion_cb(EventNotifier *e)
    }
 }

+static void win32_aio_cancel(BlockDriverAIOCB *blockacb)
+{
+    QEMUWin32AIOCB *waiocb = (QEMUWin32AIOCB *)blockacb;
+
+    /*
+     * CancelIoEx is only supported in Vista and newer.  For now, just
+     * wait for completion.
+     */
+    while (!HasOverlappedIoCompleted(&waiocb->ov)) {
+        aio_poll(bdrv_get_aio_context(blockacb->bs), true);
+    }
+}
+
 static const AIOCBInfo win32_aiocb_info = {
    .aiocb_size         = sizeof(QEMUWin32AIOCB),
+    .cancel             = win32_aio_cancel,
 };

-BlockAIOCB *win32_aio_submit(BlockDriverState *bs,
+BlockDriverAIOCB *win32_aio_submit(BlockDriverState *bs,
        QEMUWin32AIOState *aio, HANDLE hfile,
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockCompletionFunc *cb, void *opaque, int type)
+        BlockDriverCompletionFunc *cb, void *opaque, int type)
 {
    struct QEMUWin32AIOCB *waiocb;
    uint64_t offset = sector_num * 512;
@@ -125,10 +139,7 @@ BlockAIOCB *win32_aio_submit(BlockDriverState *bs,
    waiocb->is_read = (type == QEMU_AIO_READ);

    if (qiov->niov > 1) {
-        waiocb->buf = qemu_try_blockalign(bs, qiov->size);
-        if (waiocb->buf == NULL) {
-            goto out;
-        }
+        waiocb->buf = qemu_blockalign(bs, qiov->size);
        if (type & QEMU_AIO_WRITE) {
            iov_to_buf(qiov->iov, qiov->niov, 0, waiocb->buf, qiov->size);
        }
@@ -157,8 +168,7 @@ BlockAIOCB *win32_aio_submit(BlockDriverState *bs,

 out_dec_count:
    aio->count--;
-out:
-    qemu_aio_unref(waiocb);
+    qemu_aio_release(waiocb);
    return NULL;
 }

--- a/block/write-threshold.c
+++ b/block/write-threshold.c
@@ -1,125 +0,0 @@
-/*
- * QEMU System Emulator block write threshold notification
- *
- * Copyright Red Hat, Inc. 2014
- *
- * Authors:
- *  Francesco Romani <fromani@redhat.com>
- *
- * This work is licensed under the terms of the GNU LGPL, version 2 or later.
- * See the COPYING.LIB file in the top-level directory.
- */
-
-#include "block/block_int.h"
-#include "block/coroutine.h"
-#include "block/write-threshold.h"
-#include "qemu/notify.h"
-#include "qapi-event.h"
-#include "qmp-commands.h"
-
-
-uint64_t bdrv_write_threshold_get(const BlockDriverState *bs)
-{
-    return bs->write_threshold_offset;
-}
-
-bool bdrv_write_threshold_is_set(const BlockDriverState *bs)
-{
-    return bs->write_threshold_offset > 0;
-}
-
-static void write_threshold_disable(BlockDriverState *bs)
-{
-    if (bdrv_write_threshold_is_set(bs)) {
-        notifier_with_return_remove(&bs->write_threshold_notifier);
-        bs->write_threshold_offset = 0;
-    }
-}
-
-uint64_t bdrv_write_threshold_exceeded(const BlockDriverState *bs,
-                                       const BdrvTrackedRequest *req)
-{
-    if (bdrv_write_threshold_is_set(bs)) {
-        if (req->offset > bs->write_threshold_offset) {
-            return (req->offset - bs->write_threshold_offset) + req->bytes;
-        }
-        if ((req->offset + req->bytes) > bs->write_threshold_offset) {
-            return (req->offset + req->bytes) - bs->write_threshold_offset;
-        }
-    }
-    return 0;
-}
-
-static int coroutine_fn before_write_notify(NotifierWithReturn *notifier,
-                                            void *opaque)
-{
-    BdrvTrackedRequest *req = opaque;
-    BlockDriverState *bs = req->bs;
-    uint64_t amount = 0;
-
-    amount = bdrv_write_threshold_exceeded(bs, req);
-    if (amount > 0) {
-        qapi_event_send_block_write_threshold(
-            bs->node_name,
-            amount,
-            bs->write_threshold_offset,
-            &error_abort);
-
-        /* autodisable to avoid flooding the monitor */
-        write_threshold_disable(bs);
-    }
-
-    return 0; /* should always let other notifiers run */
-}
-
-static void write_threshold_register_notifier(BlockDriverState *bs)
-{
-    bs->write_threshold_notifier.notify = before_write_notify;
-    notifier_with_return_list_add(&bs->before_write_notifiers,
-                                  &bs->write_threshold_notifier);
-}
-
-static void write_threshold_update(BlockDriverState *bs,
-                                   int64_t threshold_bytes)
-{
-    bs->write_threshold_offset = threshold_bytes;
-}
-
-void bdrv_write_threshold_set(BlockDriverState *bs, uint64_t threshold_bytes)
-{
-    if (bdrv_write_threshold_is_set(bs)) {
-        if (threshold_bytes > 0) {
-            write_threshold_update(bs, threshold_bytes);
-        } else {
-            write_threshold_disable(bs);
-        }
-    } else {
-        if (threshold_bytes > 0) {
-            /* avoid multiple registration */
-            write_threshold_register_notifier(bs);
-            write_threshold_update(bs, threshold_bytes);
-        }
-        /* discard bogus disable request */
-    }
-}
-
-void qmp_block_set_write_threshold(const char *node_name,
-                                   uint64_t threshold_bytes,
-                                   Error **errp)
-{
-    BlockDriverState *bs;
-    AioContext *aio_context;
-
-    bs = bdrv_find_node(node_name);
-    if (!bs) {
-        error_setg(errp, "Device '%s' not found", node_name);
-        return;
-    }
-
-    aio_context = bdrv_get_aio_context(bs);
-    aio_context_acquire(aio_context);
-
-    bdrv_write_threshold_set(bs, threshold_bytes);
-
-    aio_context_release(aio_context);
-}
--- a/blockdev-nbd.c
+++ b/blockdev-nbd.c
@@ -10,7 +10,6 @@
 */

 #include "sysemu/blockdev.h"
-#include "sysemu/block-backend.h"
 #include "hw/block/block.h"
 #include "monitor/monitor.h"
 #include "qapi/qmp/qerror.h"
@@ -47,9 +46,8 @@ void qmp_nbd_server_start(SocketAddress *addr, Error **errp)
    }
 }

-/*
- * Hook into the BlockBackend notifiers to close the export when the
- * backend is closed.
+/* Hook into the BlockDriverState notifiers to close the export when
+ * the file is closed.
 */
 typedef struct NBDCloseNotifier {
    Notifier n;
@@ -75,7 +73,7 @@ static void nbd_close_notifier(Notifier *n, void *data)
 void qmp_nbd_server_add(const char *device, bool has_writable, bool writable,
                        Error **errp)
 {
-    BlockBackend *blk;
+    BlockDriverState *bs;
    NBDExport *exp;
    NBDCloseNotifier *n;

@@ -89,12 +87,12 @@ void qmp_nbd_server_add(const char *device, bool has_writable, bool writable,
        return;
    }

-    blk = blk_by_name(device);
-    if (!blk) {
+    bs = bdrv_find(device);
+    if (!bs) {
        error_set(errp, QERR_DEVICE_NOT_FOUND, device);
        return;
    }
-    if (!blk_is_inserted(blk)) {
+    if (!bdrv_is_inserted(bs)) {
        error_set(errp, QERR_DEVICE_HAS_NO_MEDIUM, device);
        return;
    }
@@ -102,22 +100,18 @@ void qmp_nbd_server_add(const char *device, bool has_writable, bool writable,
    if (!has_writable) {
        writable = false;
    }
-    if (blk_is_read_only(blk)) {
+    if (bdrv_is_read_only(bs)) {
        writable = false;
    }

-    exp = nbd_export_new(blk, 0, -1, writable ? 0 : NBD_FLAG_READ_ONLY, NULL,
-                         errp);
-    if (!exp) {
-        return;
-    }
+    exp = nbd_export_new(bs, 0, -1, writable ? 0 : NBD_FLAG_READ_ONLY, NULL);

    nbd_export_set_name(exp, device);

-    n = g_new0(NBDCloseNotifier, 1);
+    n = g_malloc0(sizeof(NBDCloseNotifier));
    n->n.notify = nbd_close_notifier;
    n->exp = exp;
-    blk_add_close_notifier(blk, &n->n);
+    bdrv_add_close_notifier(bs, &n->n);
    QTAILQ_INSERT_TAIL(&close_notifiers, n, next);
 }

--- a/blockdev.c
+++ b/blockdev.c
--- a/blockjob.c
+++ b/blockjob.c
@@ -36,7 +36,7 @@
 #include "qapi-event.h"

 void *block_job_create(const BlockJobDriver *driver, BlockDriverState *bs,
-                       int64_t speed, BlockCompletionFunc *cb,
+                       int64_t speed, BlockDriverCompletionFunc *cb,
                       void *opaque, Error **errp)
 {
    BlockJob *job;
@@ -50,7 +50,6 @@ void *block_job_create(const BlockJobDriver *driver, BlockDriverState *bs,
    error_setg(&job->blocker, "block device is in use by block job: %s",
               BlockJobType_lookup[driver->job_type]);
    bdrv_op_block_all(bs, job->blocker);
-    bdrv_op_unblock(bs, BLOCK_OP_TYPE_DATAPLANE, job->blocker);

    job->driver        = driver;
    job->bs            = bs;
@@ -107,9 +106,8 @@ void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp)

 void block_job_complete(BlockJob *job, Error **errp)
 {
-    if (job->pause_count || job->cancelled || !job->driver->complete) {
-        error_set(errp, QERR_BLOCK_JOB_NOT_READY,
-                  bdrv_get_device_name(job->bs));
+    if (job->paused || job->cancelled || !job->driver->complete) {
+        error_set(errp, QERR_BLOCK_JOB_NOT_READY, job->bs->device_name);
        return;
    }

@@ -118,26 +116,17 @@ void block_job_complete(BlockJob *job, Error **errp)

 void block_job_pause(BlockJob *job)
 {
-    job->pause_count++;
+    job->paused = true;
 }

 bool block_job_is_paused(BlockJob *job)
 {
-    return job->pause_count > 0;
+    return job->paused;
 }

 void block_job_resume(BlockJob *job)
 {
-    assert(job->pause_count > 0);
-    job->pause_count--;
-    if (job->pause_count) {
-        return;
-    }
-    block_job_enter(job);
-}
-
-void block_job_enter(BlockJob *job)
-{
+    job->paused = false;
    block_job_iostatus_reset(job);
    if (job->co && !job->busy) {
        qemu_coroutine_enter(job->co, NULL);
@@ -147,7 +136,7 @@ void block_job_enter(BlockJob *job)
 void block_job_cancel(BlockJob *job)
 {
    job->cancelled = true;
-    block_job_enter(job);
+    block_job_resume(job);
 }

 bool block_job_is_cancelled(BlockJob *job)
@@ -163,30 +152,27 @@ void block_job_iostatus_reset(BlockJob *job)
    }
 }

-struct BlockFinishData {
+struct BlockCancelData {
    BlockJob *job;
-    BlockCompletionFunc *cb;
+    BlockDriverCompletionFunc *cb;
    void *opaque;
    bool cancelled;
    int ret;
 };

-static void block_job_finish_cb(void *opaque, int ret)
+static void block_job_cancel_cb(void *opaque, int ret)
 {
-    struct BlockFinishData *data = opaque;
+    struct BlockCancelData *data = opaque;

    data->cancelled = block_job_is_cancelled(data->job);
    data->ret = ret;
    data->cb(data->opaque, ret);
 }

-static int block_job_finish_sync(BlockJob *job,
-                                 void (*finish)(BlockJob *, Error **errp),
-                                 Error **errp)
+int block_job_cancel_sync(BlockJob *job)
 {
-    struct BlockFinishData data;
+    struct BlockCancelData data;
    BlockDriverState *bs = job->bs;
-    Error *local_err = NULL;

    assert(bs->job == job);

@@ -197,37 +183,15 @@ static int block_job_finish_sync(BlockJob *job,
    data.cb = job->cb;
    data.opaque = job->opaque;
    data.ret = -EINPROGRESS;
-    job->cb = block_job_finish_cb;
+    job->cb = block_job_cancel_cb;
    job->opaque = &data;
-    finish(job, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        return -EBUSY;
-    }
+    block_job_cancel(job);
    while (data.ret == -EINPROGRESS) {
        aio_poll(bdrv_get_aio_context(bs), true);
    }
    return (data.cancelled && data.ret == 0) ? -ECANCELED : data.ret;
 }

-/* A wrapper around block_job_cancel() taking an Error ** parameter so it may be
- * used with block_job_finish_sync() without the need for (rather nasty)
- * function pointer casts there. */
-static void block_job_cancel_err(BlockJob *job, Error **errp)
-{
-    block_job_cancel(job);
-}
-
-int block_job_cancel_sync(BlockJob *job)
-{
-    return block_job_finish_sync(job, &block_job_cancel_err, NULL);
-}
-
-int block_job_complete_sync(BlockJob *job, Error **errp)
-{
-    return block_job_finish_sync(job, &block_job_complete, errp);
-}
-
 void block_job_sleep_ns(BlockJob *job, QEMUClockType type, int64_t ns)
 {
    assert(job->busy);
@@ -241,7 +205,7 @@ void block_job_sleep_ns(BlockJob *job, QEMUClockType type, int64_t ns)
    if (block_job_is_paused(job)) {
        qemu_coroutine_yield();
    } else {
-        co_aio_sleep_ns(bdrv_get_aio_context(job->bs), type, ns);
+        co_sleep_ns(type, ns);
    }
    job->busy = true;
 }
@@ -267,11 +231,10 @@ BlockJobInfo *block_job_query(BlockJob *job)
    info->device    = g_strdup(bdrv_get_device_name(job->bs));
    info->len       = job->len;
    info->busy      = job->busy;
-    info->paused    = job->pause_count > 0;
+    info->paused    = job->paused;
    info->offset    = job->offset;
    info->speed     = job->speed;
    info->io_status = job->iostatus;
-    info->ready     = job->ready;
    return info;
 }

@@ -307,8 +270,6 @@ void block_job_event_completed(BlockJob *job, const char *msg)

 void block_job_event_ready(BlockJob *job)
 {
-    job->ready = true;
-
    qapi_event_send_block_job_ready(job->driver->job_type,
                                    bdrv_get_device_name(job->bs),
                                    job->len,
@@ -344,8 +305,6 @@ BlockErrorAction block_job_error_action(BlockJob *job, BlockDriverState *bs,
                                    IO_OPERATION_TYPE_WRITE,
                                    action, &error_abort);
    if (action == BLOCK_ERROR_ACTION_STOP) {
-        /* make the pause user visible, which will be resumed from QMP. */
-        job->user_paused = true;
        block_job_pause(job);
        block_job_iostatus_set_err(job, error);
        if (bs != job->bs) {
@@ -354,48 +313,3 @@ BlockErrorAction block_job_error_action(BlockJob *job, BlockDriverState *bs,
    }
    return action;
 }
-
-typedef struct {
-    BlockJob *job;
-    QEMUBH *bh;
-    AioContext *aio_context;
-    BlockJobDeferToMainLoopFn *fn;
-    void *opaque;
-} BlockJobDeferToMainLoopData;
-
-static void block_job_defer_to_main_loop_bh(void *opaque)
-{
-    BlockJobDeferToMainLoopData *data = opaque;
-    AioContext *aio_context;
-
-    qemu_bh_delete(data->bh);
-
-    /* Prevent race with block_job_defer_to_main_loop() */
-    aio_context_acquire(data->aio_context);
-
-    /* Fetch BDS AioContext again, in case it has changed */
-    aio_context = bdrv_get_aio_context(data->job->bs);
-    aio_context_acquire(aio_context);
-
-    data->fn(data->job, data->opaque);
-
-    aio_context_release(aio_context);
-
-    aio_context_release(data->aio_context);
-
-    g_free(data);
-}
-
-void block_job_defer_to_main_loop(BlockJob *job,
-                                  BlockJobDeferToMainLoopFn *fn,
-                                  void *opaque)
-{
-    BlockJobDeferToMainLoopData *data = g_malloc(sizeof(*data));
-    data->job = job;
-    data->bh = qemu_bh_new(block_job_defer_to_main_loop_bh, data);
-    data->aio_context = bdrv_get_aio_context(job->bs);
-    data->fn = fn;
-    data->opaque = opaque;
-
-    qemu_bh_schedule(data->bh);
-}
--- a/bootdevice.c
+++ b/bootdevice.c
@@ -1,341 +0,0 @@
-/*
- * QEMU Boot Device Implement
- *
- * Copyright (c) 2014 HUAWEI TECHNOLOGIES CO., LTD.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-#include "sysemu/sysemu.h"
-#include "qapi/visitor.h"
-#include "qemu/error-report.h"
-#include "hw/hw.h"
-
-typedef struct FWBootEntry FWBootEntry;
-
-struct FWBootEntry {
-    QTAILQ_ENTRY(FWBootEntry) link;
-    int32_t bootindex;
-    DeviceState *dev;
-    char *suffix;
-};
-
-static QTAILQ_HEAD(, FWBootEntry) fw_boot_order =
-    QTAILQ_HEAD_INITIALIZER(fw_boot_order);
-static QEMUBootSetHandler *boot_set_handler;
-static void *boot_set_opaque;
-
-void qemu_register_boot_set(QEMUBootSetHandler *func, void *opaque)
-{
-    boot_set_handler = func;
-    boot_set_opaque = opaque;
-}
-
-void qemu_boot_set(const char *boot_order, Error **errp)
-{
-    Error *local_err = NULL;
-
-    if (!boot_set_handler) {
-        error_setg(errp, "no function defined to set boot device list for"
-                         " this architecture");
-        return;
-    }
-
-    validate_bootdevices(boot_order, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        return;
-    }
-
-    boot_set_handler(boot_set_opaque, boot_order, errp);
-}
-
-void validate_bootdevices(const char *devices, Error **errp)
-{
-    /* We just do some generic consistency checks */
-    const char *p;
-    int bitmap = 0;
-
-    for (p = devices; *p != '\0'; p++) {
-        /* Allowed boot devices are:
-         * a-b: floppy disk drives
-         * c-f: IDE disk drives
-         * g-m: machine implementation dependent drives
-         * n-p: network devices
-         * It's up to each machine implementation to check if the given boot
-         * devices match the actual hardware implementation and firmware
-         * features.
-         */
-        if (*p < 'a' || *p > 'p') {
-            error_setg(errp, "Invalid boot device '%c'", *p);
-            return;
-        }
-        if (bitmap & (1 << (*p - 'a'))) {
-            error_setg(errp, "Boot device '%c' was given twice", *p);
-            return;
-        }
-        bitmap |= 1 << (*p - 'a');
-    }
-}
-
-void restore_boot_order(void *opaque)
-{
-    char *normal_boot_order = opaque;
-    static int first = 1;
-
-    /* Restore boot order and remove ourselves after the first boot */
-    if (first) {
-        first = 0;
-        return;
-    }
-
-    if (boot_set_handler) {
-        qemu_boot_set(normal_boot_order, &error_abort);
-    }
-
-    qemu_unregister_reset(restore_boot_order, normal_boot_order);
-    g_free(normal_boot_order);
-}
-
-void check_boot_index(int32_t bootindex, Error **errp)
-{
-    FWBootEntry *i;
-
-    if (bootindex >= 0) {
-        QTAILQ_FOREACH(i, &fw_boot_order, link) {
-            if (i->bootindex == bootindex) {
-                error_setg(errp, "The bootindex %d has already been used",
-                           bootindex);
-                return;
-            }
-        }
-    }
-}
-
-void del_boot_device_path(DeviceState *dev, const char *suffix)
-{
-    FWBootEntry *i;
-
-    if (dev == NULL) {
-        return;
-    }
-
-    QTAILQ_FOREACH(i, &fw_boot_order, link) {
-        if ((!suffix || !g_strcmp0(i->suffix, suffix)) &&
-             i->dev == dev) {
-            QTAILQ_REMOVE(&fw_boot_order, i, link);
-            g_free(i->suffix);
-            g_free(i);
-
-            break;
-        }
-    }
-}
-
-void add_boot_device_path(int32_t bootindex, DeviceState *dev,
-                          const char *suffix)
-{
-    FWBootEntry *node, *i;
-
-    if (bootindex < 0) {
-        del_boot_device_path(dev, suffix);
-        return;
-    }
-
-    assert(dev != NULL || suffix != NULL);
-
-    del_boot_device_path(dev, suffix);
-
-    node = g_malloc0(sizeof(FWBootEntry));
-    node->bootindex = bootindex;
-    node->suffix = g_strdup(suffix);
-    node->dev = dev;
-
-    QTAILQ_FOREACH(i, &fw_boot_order, link) {
-        if (i->bootindex == bootindex) {
-            error_report("Two devices with same boot index %d", bootindex);
-            exit(1);
-        } else if (i->bootindex < bootindex) {
-            continue;
-        }
-        QTAILQ_INSERT_BEFORE(i, node, link);
-        return;
-    }
-    QTAILQ_INSERT_TAIL(&fw_boot_order, node, link);
-}
-
-DeviceState *get_boot_device(uint32_t position)
-{
-    uint32_t counter = 0;
-    FWBootEntry *i = NULL;
-    DeviceState *res = NULL;
-
-    if (!QTAILQ_EMPTY(&fw_boot_order)) {
-        QTAILQ_FOREACH(i, &fw_boot_order, link) {
-            if (counter == position) {
-                res = i->dev;
-                break;
-            }
-            counter++;
-        }
-    }
-    return res;
-}
-
-/*
- * This function returns null terminated string that consist of new line
- * separated device paths.
- *
- * memory pointed by "size" is assigned total length of the array in bytes
- *
- */
-char *get_boot_devices_list(size_t *size, bool ignore_suffixes)
-{
-    FWBootEntry *i;
-    size_t total = 0;
-    char *list = NULL;
-
-    QTAILQ_FOREACH(i, &fw_boot_order, link) {
-        char *devpath = NULL,  *suffix = NULL;
-        char *bootpath;
-        char *d;
-        size_t len;
-
-        if (i->dev) {
-            devpath = qdev_get_fw_dev_path(i->dev);
-            assert(devpath);
-        }
-
-        if (!ignore_suffixes) {
-            if (i->dev) {
-                d = qdev_get_own_fw_dev_path_from_handler(i->dev->parent_bus,
-                                                          i->dev);
-                if (d) {
-                    assert(!i->suffix);
-                    suffix = d;
-                } else {
-                    suffix = g_strdup(i->suffix);
-                }
-            } else {
-                suffix = g_strdup(i->suffix);
-            }
-        }
-
-        bootpath = g_strdup_printf("%s%s",
-                                   devpath ? devpath : "",
-                                   suffix ? suffix : "");
-        g_free(devpath);
-        g_free(suffix);
-
-        if (total) {
-            list[total-1] = '\n';
-        }
-        len = strlen(bootpath) + 1;
-        list = g_realloc(list, total + len);
-        memcpy(&list[total], bootpath, len);
-        total += len;
-        g_free(bootpath);
-    }
-
-    *size = total;
-
-    if (boot_strict && *size > 0) {
-        list[total-1] = '\n';
-        list = g_realloc(list, total + 5);
-        memcpy(&list[total], "HALT", 5);
-        *size = total + 5;
-    }
-    return list;
-}
-
-typedef struct {
-    int32_t *bootindex;
-    const char *suffix;
-    DeviceState *dev;
-} BootIndexProperty;
-
-static void device_get_bootindex(Object *obj, Visitor *v, void *opaque,
-                                 const char *name, Error **errp)
-{
-    BootIndexProperty *prop = opaque;
-    visit_type_int32(v, prop->bootindex, name, errp);
-}
-
-static void device_set_bootindex(Object *obj, Visitor *v, void *opaque,
-                                 const char *name, Error **errp)
-{
-    BootIndexProperty *prop = opaque;
-    int32_t boot_index;
-    Error *local_err = NULL;
-
-    visit_type_int32(v, &boot_index, name, &local_err);
-    if (local_err) {
-        goto out;
-    }
-    /* check whether bootindex is present in fw_boot_order list  */
-    check_boot_index(boot_index, &local_err);
-    if (local_err) {
-        goto out;
-    }
-    /* change bootindex to a new one */
-    *prop->bootindex = boot_index;
-
-    add_boot_device_path(*prop->bootindex, prop->dev, prop->suffix);
-
-out:
-    if (local_err) {
-        error_propagate(errp, local_err);
-    }
-}
-
-static void property_release_bootindex(Object *obj, const char *name,
-                                       void *opaque)
-
-{
-    BootIndexProperty *prop = opaque;
-
-    del_boot_device_path(prop->dev, prop->suffix);
-    g_free(prop);
-}
-
-void device_add_bootindex_property(Object *obj, int32_t *bootindex,
-                                   const char *name, const char *suffix,
-                                   DeviceState *dev, Error **errp)
-{
-    Error *local_err = NULL;
-    BootIndexProperty *prop = g_malloc0(sizeof(*prop));
-
-    prop->bootindex = bootindex;
-    prop->suffix = suffix;
-    prop->dev = dev;
-
-    object_property_add(obj, name, "int32",
-                        device_get_bootindex,
-                        device_set_bootindex,
-                        property_release_bootindex,
-                        prop, &local_err);
-
-    if (local_err) {
-        error_propagate(errp, local_err);
-        g_free(prop);
-        return;
-    }
-    /* initialize devices' bootindex property to -1 */
-    object_property_set_int(obj, -1, name, NULL);
-}
--- a/bsd-user/elfload.c
+++ b/bsd-user/elfload.c
@@ -351,10 +351,8 @@ static inline void init_thread(struct target_pt_regs *_regs, struct image_info *

    _regs->gpr[1] = infop->start_stack;
 #if defined(TARGET_PPC64) && !defined(TARGET_ABI32)
-    get_user_u64(entry, infop->entry);
-    entry += infop->load_addr;
-    get_user_u64(toc, infop->entry + 8);
-    toc += infop->load_addr;
+    entry = ldq_raw(infop->entry) + infop->load_addr;
+    toc = ldq_raw(infop->entry + 8) + infop->load_addr;
    _regs->gpr[2] = toc;
    infop->entry = entry;
 #endif
@@ -367,9 +365,8 @@ static inline void init_thread(struct target_pt_regs *_regs, struct image_info *
    get_user_ual(_regs->gpr[3], pos);
    pos += sizeof(abi_ulong);
    _regs->gpr[4] = pos;
-    for (tmp = 1; tmp != 0; pos += sizeof(abi_ulong)) {
-        get_user_ual(tmp, pos);
-    }
+    for (tmp = 1; tmp != 0; pos += sizeof(abi_ulong))
+        tmp = ldl(pos);
    _regs->gpr[5] = pos;
 }

--- a/bsd-user/main.c
+++ b/bsd-user/main.c
@@ -108,6 +108,10 @@ void cpu_list_unlock(void)
 /***********************************************************/
 /* CPUX86 core interface */

+void cpu_smm_update(CPUX86State *env)
+{
+}
+
 uint64_t cpu_get_tsc(CPUX86State *env)
 {
    return cpu_get_real_ticks();
@@ -901,14 +905,15 @@ int main(int argc, char **argv)
 #endif
    }
    tcg_exec_init(0);
+    cpu_exec_init_all();
    /* NOTE: we need to init the CPU at this stage to get
       qemu_host_page_size */
-    cpu = cpu_init(cpu_model);
-    if (!cpu) {
+    env = cpu_init(cpu_model);
+    if (!env) {
        fprintf(stderr, "Unable to find CPU definition\n");
        exit(1);
    }
-    env = cpu->env_ptr;
+    cpu = ENV_GET_CPU(env);
 #if defined(TARGET_SPARC) || defined(TARGET_PPC)
    cpu_reset(cpu);
 #endif
--- a/457
+++ b/457
@@ -103,8 +103,7 @@ update_cxxflags() {
 }

 compile_object() {
-  local_cflags="$1"
-  do_cc $QEMU_CFLAGS $local_cflags -c -o $TMPO $TMPC
+  do_cc $QEMU_CFLAGS -c -o $TMPO $TMPC
 }

 compile_prog() {
@@ -310,11 +309,10 @@ rbd=""
 smartcard_nss=""
 libusb=""
 usb_redir=""
-opengl=""
+glx=""
 zlib="yes"
 lzo=""
 snappy=""
-bzip2=""
 guest_agent=""
 guest_agent_with_vss="no"
 vss_win32_sdk=""
@@ -328,7 +326,7 @@ seccomp=""
 glusterfs=""
 glusterfs_discard="no"
 glusterfs_zerofill="no"
-archipelago="no"
+virtio_blk_data_plane=""
 gtk=""
 gtkabi=""
 vte=""
@@ -337,7 +335,6 @@ libssh2=""
 vhdx=""
 quorum=""
 numa=""
-tcmalloc="no"

 # parse CC options first
 for opt do
@@ -353,7 +350,7 @@ for opt do
  ;;
  --cpu=*) cpu="$optarg"
  ;;
-  --extra-cflags=*) QEMU_CFLAGS="$QEMU_CFLAGS $optarg"
+  --extra-cflags=*) QEMU_CFLAGS="$optarg $QEMU_CFLAGS"
                    EXTRA_CFLAGS="$optarg"
  ;;
  --extra-ldflags=*) LDFLAGS="$optarg $LDFLAGS"
@@ -391,7 +388,6 @@ cpp="${CPP-$cc -E}"
 objcopy="${OBJCOPY-${cross_prefix}objcopy}"
 ld="${LD-${cross_prefix}ld}"
 libtool="${LIBTOOL-${cross_prefix}libtool}"
-nm="${NM-${cross_prefix}nm}"
 strip="${STRIP-${cross_prefix}strip}"
 windres="${WINDRES-${cross_prefix}windres}"
 pkg_config_exe="${PKG_CONFIG-${cross_prefix}pkg-config}"
@@ -437,12 +433,6 @@ EOF
  compile_object
 }

-write_c_skeleton() {
-    cat > $TMPC <<EOF
-int main(void) { return 0; }
-EOF
-}
-
 if check_define __linux__ ; then
  targetos="Linux"
 elif check_define _WIN32 ; then
@@ -712,7 +702,9 @@ if test "$mingw32" = "yes" ; then
  # enable C99/POSIX format strings (needs mingw32-runtime 3.15 or later)
  QEMU_CFLAGS="-D__USE_MINGW_ANSI_STDIO=1 $QEMU_CFLAGS"
  LIBS="-lwinmm -lws2_32 -liphlpapi $LIBS"
-  write_c_skeleton;
+cat > $TMPC << EOF
+int main(void) { return 0; }
+EOF
  if compile_prog "" "-liberty" ; then
    LIBS="-liberty $LIBS"
  fi
@@ -1033,9 +1025,9 @@ for opt do
  ;;
  --enable-vhost-scsi) vhost_scsi="yes"
  ;;
-  --disable-opengl) opengl="no"
+  --disable-glx) glx="no"
  ;;
-  --enable-opengl) opengl="yes"
+  --enable-glx) glx="yes"
  ;;
  --disable-rbd) rbd="no"
  ;;
@@ -1067,10 +1059,6 @@ for opt do
  ;;
  --enable-snappy) snappy="yes"
  ;;
-  --disable-bzip2) bzip2="no"
-  ;;
-  --enable-bzip2) bzip2="yes"
-  ;;
  --enable-guest-agent) guest_agent="yes"
  ;;
  --disable-guest-agent) guest_agent="no"
@@ -1099,12 +1087,9 @@ for opt do
  ;;
  --enable-glusterfs) glusterfs="yes"
  ;;
-  --disable-archipelago) archipelago="no"
+  --disable-virtio-blk-data-plane) virtio_blk_data_plane="no"
  ;;
-  --enable-archipelago) archipelago="yes"
-  ;;
-  --disable-virtio-blk-data-plane|--enable-virtio-blk-data-plane)
-      echo "$0: $opt is obsolete, virtio-blk data-plane is always on" >&2
+  --enable-virtio-blk-data-plane) virtio_blk_data_plane="yes"
  ;;
  --disable-gtk) gtk="no"
  ;;
@@ -1140,10 +1125,6 @@ for opt do
  ;;
  --enable-numa) numa="yes"
  ;;
-  --disable-tcmalloc) tcmalloc="no"
-  ;;
-  --enable-tcmalloc) tcmalloc="yes"
-  ;;
  *)
      echo "ERROR: unknown option $opt"
      echo "Try '$0 --help' for more information"
@@ -1363,7 +1344,7 @@ Advanced options (experts only):
  --enable-linux-aio       enable Linux AIO support
  --disable-cap-ng         disable libcap-ng support
  --enable-cap-ng          enable libcap-ng support
-  --disable-attr           disable attr and xattr support
+  --disable-attr           disables attr and xattr support
  --enable-attr            enable attr and xattr support
  --disable-blobs          disable installing provided firmware blobs
  --enable-docs            enable documentation build
@@ -1389,36 +1370,30 @@ Advanced options (experts only):
  --enable-usb-redir       enable usb network redirection support
  --enable-lzo             enable the support of lzo compression library
  --enable-snappy          enable the support of snappy compression library
-  --enable-bzip2           enable the support of bzip2 compression library (for
-                           reading bzip2-compressed dmg images)
  --disable-guest-agent    disable building of the QEMU Guest Agent
  --enable-guest-agent     enable building of the QEMU Guest Agent
  --with-vss-sdk=SDK-path  enable Windows VSS support in QEMU Guest Agent
  --with-win-sdk=SDK-path  path to Windows Platform SDK (to build VSS .tlb)
  --disable-seccomp        disable seccomp support
-  --enable-seccomp         enable seccomp support
+  --enable-seccomp         enables seccomp support
  --with-coroutine=BACKEND coroutine backend. Supported options:
                           gthread, ucontext, sigaltstack, windows
  --disable-coroutine-pool disable coroutine freelist (worse performance)
  --enable-coroutine-pool  enable coroutine freelist (better performance)
  --enable-glusterfs       enable GlusterFS backend
  --disable-glusterfs      disable GlusterFS backend
-  --enable-archipelago     enable Archipelago backend
-  --disable-archipelago    disable Archipelago backend
  --enable-gcov            enable test coverage analysis with gcov
  --gcov=GCOV              use specified gcov [$gcov_tool]
  --disable-tpm            disable TPM support
  --enable-tpm             enable TPM support
  --disable-libssh2        disable ssh block device support
  --enable-libssh2         enable ssh block device support
-  --disable-vhdx           disable support for the Microsoft VHDX image format
+  --disable-vhdx           disables support for the Microsoft VHDX image format
  --enable-vhdx            enable support for the Microsoft VHDX image format
  --disable-quorum         disable quorum block filter support
  --enable-quorum          enable quorum block filter support
  --disable-numa           disable libnuma support
  --enable-numa            enable libnuma support
-  --disable-tcmalloc       disable tcmalloc support
-  --enable-tcmalloc        enable tcmalloc support

 NOTE: The object files are built at the place where configure is launched
 EOF
@@ -1450,7 +1425,10 @@ if test -z "$werror" ; then
 fi

 # check that the C compiler works.
-write_c_skeleton;
+cat > $TMPC <<EOF
+int main(void) { return 0; }
+EOF
+
 if compile_object ; then
  : C compiler works ok
 else
@@ -1498,20 +1476,16 @@ gcc_flags="-Wno-string-plus-int $gcc_flags"
 # enable it for all configure tests. If a configure test failed due
 # to -Werror this would just silently disable some features,
 # so it's too error prone.
-
-cc_has_warning_flag() {
-    write_c_skeleton;
-
+cat > $TMPC << EOF
+int main(void) { return 0; }
+EOF
+for flag in $gcc_flags; do
    # Use the positive sense of the flag when testing for -Wno-wombat
    # support (gcc will happily accept the -Wno- form of unknown
    # warning options).
-    optflag="$(echo $1 | sed -e 's/^-Wno-/-W/')"
-    compile_prog "-Werror $optflag" ""
-}
-
-for flag in $gcc_flags; do
-    if cc_has_warning_flag $flag ; then
-        QEMU_CFLAGS="$QEMU_CFLAGS $flag"
+    optflag="$(echo $flag | sed -e 's/^-Wno-/-W/')"
+    if compile_prog "-Werror $optflag" "" ; then
+	QEMU_CFLAGS="$QEMU_CFLAGS $flag"
    fi
 done

@@ -1562,17 +1536,6 @@ if test "$static" = "yes" ; then
  fi
 fi

-# Unconditional check for compiler __thread support
-  cat > $TMPC << EOF
-static __thread int tls_var;
-int main(void) { return tls_var; }
-EOF
-
-if ! compile_prog "-Werror" "" ; then
-    error_exit "Your compiler does not support the __thread specifier for " \
-	"Thread-Local Storage (TLS). Please upgrade to a version that does."
-fi
-
 if test "$pie" = ""; then
  case "$cpu-$targetos" in
    i386-Linux|x86_64-Linux|x32-Linux|i386-OpenBSD|x86_64-OpenBSD)
@@ -1613,7 +1576,7 @@ EOF
    fi
  fi

-  if compile_prog "-Werror -fno-pie" "-nopie"; then
+  if compile_prog "-fno-pie" "-nopie"; then
    CFLAGS_NOPIE="-fno-pie"
    LDFLAGS_NOPIE="-nopie"
  fi
@@ -1850,36 +1813,17 @@ EOF
    fi
 fi

-##########################################
-# bzip2 check
-
-if test "$bzip2" != "no" ; then
-    cat > $TMPC << EOF
-#include <bzlib.h>
-int main(void) { BZ2_bzlibVersion(); return 0; }
-EOF
-    if compile_prog "" "-lbz2" ; then
-        bzip2="yes"
-    else
-        if test "$bzip2" = "yes"; then
-            feature_not_found "libbzip2" "Install libbzip2 devel"
-        fi
-        bzip2="no"
-    fi
-fi
-
 ##########################################
 # libseccomp check

 if test "$seccomp" != "no" ; then
-    if test "$cpu" = "i386" || test "$cpu" = "x86_64" &&
-        $pkg_config --atleast-version=2.1.1 libseccomp; then
+    if $pkg_config --atleast-version=2.1.0 libseccomp; then
        libs_softmmu="$libs_softmmu `$pkg_config --libs libseccomp`"
        QEMU_CFLAGS="$QEMU_CFLAGS `$pkg_config --cflags libseccomp`"
 	seccomp="yes"
    else
 	if test "$seccomp" = "yes"; then
-            feature_not_found "libseccomp" "Install libseccomp devel >= 2.1.1"
+            feature_not_found "libseccomp" "Install libseccomp devel >= 2.1.0"
 	fi
 	seccomp="no"
    fi
@@ -1918,32 +1862,6 @@ EOF
 #if !defined(HVM_MAX_VCPUS)
 # error HVM_MAX_VCPUS not defined
 #endif
-int main(void) {
-  xc_interface *xc;
-  xs_daemon_open();
-  xc = xc_interface_open(0, 0, 0);
-  xc_hvm_set_mem_type(0, 0, HVMMEM_ram_ro, 0, 0);
-  xc_gnttab_open(NULL, 0);
-  xc_domain_add_to_physmap(0, 0, XENMAPSPACE_gmfn, 0, 0);
-  xc_hvm_inject_msi(xc, 0, 0xf0000000, 0x00000000);
-  xc_hvm_create_ioreq_server(xc, 0, 0, NULL);
-  return 0;
-}
-EOF
-      compile_prog "" "$xen_libs"
-    then
-    xen_ctrl_version=450
-    xen=yes
-
-  elif
-      cat > $TMPC <<EOF &&
-#include <xenctrl.h>
-#include <xenstore.h>
-#include <stdint.h>
-#include <xen/hvm/hvm_info_table.h>
-#if !defined(HVM_MAX_VCPUS)
-# error HVM_MAX_VCPUS not defined
-#endif
 int main(void) {
  xc_interface *xc;
  xs_daemon_open();
@@ -2108,15 +2026,6 @@ if test "$sparse" != "no" ; then
  fi
 fi

-##########################################
-# X11 probe
-x11_cflags=
-x11_libs=-lX11
-if $pkg_config --exists "x11"; then
-    x11_cflags=`$pkg_config --cflags x11`
-    x11_libs=`$pkg_config --libs x11`
-fi
-
 ##########################################
 # GTK probe

@@ -2144,8 +2053,7 @@ if test "$gtk" != "no"; then
        gtk_cflags=`$pkg_config --cflags $gtkpackage`
        gtk_libs=`$pkg_config --libs $gtkpackage`
        if $pkg_config --exists "$gtkx11package >= $gtkversion"; then
-            gtk_cflags="$gtk_cflags $x11_cflags"
-            gtk_libs="$gtk_libs $x11_libs"
+            gtk_libs="$gtk_libs -lX11"
        fi
        libs_softmmu="$gtk_libs $libs_softmmu"
        gtk="yes"
@@ -2270,9 +2178,8 @@ if test "$sdl" = "yes" ; then
 #endif
 int main(void) { return 0; }
 EOF
-  if compile_prog "$sdl_cflags $x11_cflags" "$sdl_libs $x11_libs" ; then
-    sdl_cflags="$sdl_cflags $x11_cflags"
-    sdl_libs="$sdl_libs $x11_libs"
+  if compile_prog "$sdl_cflags" "$sdl_libs" ; then
+    sdl_libs="$sdl_libs -lX11"
  fi
  libs_softmmu="$sdl_libs $libs_softmmu"
 fi
@@ -2779,7 +2686,12 @@ fi
 ##########################################
 # glib support probe

-glib_req_ver=2.22
+if test "$mingw32" = yes; then
+    # g_poll is required in order to integrate with the glib main loop.
+    glib_req_ver=2.20
+else
+    glib_req_ver=2.12
+fi
 glib_modules=gthread-2.0
 if test "$modules" = yes; then
    glib_modules="$glib_modules gmodule-2.0"
@@ -2797,30 +2709,12 @@ for i in $glib_modules; do
    fi
 done

-# g_test_trap_subprocess added in 2.38. Used by some tests.
-glib_subprocess=yes
-if ! $pkg_config --atleast-version=2.38 glib-2.0; then
-    glib_subprocess=no
-fi
-
-# Silence clang 3.5.0 warnings about glib attribute __alloc_size__ usage
-cat > $TMPC << EOF
-#include <glib.h>
-int main(void) { return 0; }
-EOF
-if ! compile_prog "$glib_cflags -Werror" "$glib_libs" ; then
-    if cc_has_warning_flag "-Wno-unknown-attributes"; then
-        glib_cflags="-Wno-unknown-attributes $glib_cflags"
-        CFLAGS="-Wno-unknown-attributes $CFLAGS"
-    fi
-fi
-
 ##########################################
 # SHA command probe for modules
 if test "$modules" = yes; then
    shacmd_probe="sha1sum sha1 shasum"
    for c in $shacmd_probe; do
-        if has $c; then
+        if which $c >/dev/null 2>&1; then
            shacmd="$c"
            break
        fi
@@ -2836,7 +2730,7 @@ fi
 if test "$pixman" = ""; then
  if test "$want_tools" = "no" -a "$softmmu" = "no"; then
    pixman="none"
-  elif $pkg_config --atleast-version=0.21.8 pixman-1 > /dev/null 2>&1; then
+  elif $pkg_config pixman-1 > /dev/null 2>&1; then
    pixman="system"
  else
    pixman="internal"
@@ -2852,12 +2746,11 @@ if test "$pixman" = "none"; then
  pixman_cflags=
  pixman_libs=
 elif test "$pixman" = "system"; then
-  # pixman version has been checked above
  pixman_cflags=`$pkg_config --cflags pixman-1`
  pixman_libs=`$pkg_config --libs pixman-1`
 else
  if test ! -d ${source_path}/pixman/pixman; then
-    error_exit "pixman >= 0.21.8 not present. Your options:" \
+    error_exit "pixman not present. Your options:" \
        "  (1) Preferred: Install the pixman devel package (any recent" \
        "      distro should have packages as Xorg needs pixman too)." \
        "  (2) Fetch the pixman submodule, using:" \
@@ -3035,6 +2928,16 @@ else
  tpm_passthrough=no
 fi

+##########################################
+# adjust virtio-blk-data-plane based on linux-aio
+
+if test "$virtio_blk_data_plane" = "yes" -a \
+	"$linux_aio" != "yes" ; then
+  error_exit "virtio-blk-data-plane requires Linux AIO, please try --enable-linux-aio"
+elif test -z "$virtio_blk_data_plane" ; then
+  virtio_blk_data_plane=$linux_aio
+fi
+
 ##########################################
 # attr probe

@@ -3115,11 +3018,9 @@ fi
 if test "$fdt" != "no" ; then
  fdt_libs="-lfdt"
  # explicitly check for libfdt_env.h as it is missing in some stable installs
-  # and test for required functions to make sure we are on a version >= 1.4.0
  cat > $TMPC << EOF
-#include <libfdt.h>
 #include <libfdt_env.h>
-int main(void) { fdt_get_property_by_offset(0, 0, 0); return 0; }
+int main(void) { return 0; }
 EOF
  if compile_prog "" "$fdt_libs" ; then
    # system DTC is good - use it
@@ -3137,7 +3038,7 @@ EOF
    fdt_libs="-L\$(BUILD_DIR)/dtc/libfdt $fdt_libs"
  elif test "$fdt" = "yes" ; then
    # have neither and want - prompt for system/submodule install
-    error_exit "DTC (libfdt) version >= 1.4.0 not present. Your options:" \
+    error_exit "DTC (libfdt) not present. Your options:" \
        "  (1) Preferred: Install the DTC (libfdt) devel package" \
        "  (2) Fetch the DTC submodule, using:" \
        "      git submodule update --init dtc"
@@ -3151,71 +3052,26 @@ fi
 libs_softmmu="$libs_softmmu $fdt_libs"

 ##########################################
-# opengl probe (for sdl2, milkymist-tmu2)
-
 # GLX probe, used by milkymist-tmu2
-# this is temporary, code will be switched to egl mid-term.
-cat > $TMPC << EOF
+if test "$glx" != "no" ; then
+  glx_libs="-lGL -lX11"
+  cat > $TMPC << EOF
 #include <X11/Xlib.h>
 #include <GL/gl.h>
 #include <GL/glx.h>
 int main(void) { glBegin(0); glXQueryVersion(0,0,0); return 0; }
 EOF
-if compile_prog "" "-lGL -lX11" ; then
-  have_glx=yes
-else
-  have_glx=no
-fi
-
-if test "$opengl" != "no" ; then
-  opengl_pkgs="gl glesv2 epoxy egl"
-  if $pkg_config $opengl_pkgs x11 && test "$have_glx" = "yes"; then
-    opengl_cflags="$($pkg_config --cflags $opengl_pkgs) $x11_cflags"
-    opengl_libs="$($pkg_config --libs $opengl_pkgs) $x11_libs"
-    opengl=yes
+  if compile_prog "" "-lGL -lX11" ; then
+    glx=yes
  else
-    if test "$opengl" = "yes" ; then
-      feature_not_found "opengl" "Please install opengl (mesa) devel pkgs: $opengl_pkgs"
+    if test "$glx" = "yes" ; then
+      feature_not_found "glx" "Install GL devel (e.g. MESA)"
    fi
-    opengl_cflags=""
-    opengl_libs=""
-    opengl=no
+    glx_libs=
+    glx=no
  fi
 fi

-
-##########################################
-# archipelago probe
-if test "$archipelago" != "no" ; then
-    cat > $TMPC <<EOF
-#include <stdio.h>
-#include <xseg/xseg.h>
-#include <xseg/protocol.h>
-int main(void) {
-    xseg_initialize();
-    return 0;
-}
-EOF
-    archipelago_libs=-lxseg
-    if compile_prog "" "$archipelago_libs"; then
-        archipelago="yes"
-        libs_tools="$archipelago_libs $libs_tools"
-        libs_softmmu="$archipelago_libs $libs_softmmu"
-
-	echo "WARNING: Please check the licenses of QEMU and libxseg carefully."
-	echo "GPLv3 versions of libxseg may not be compatible with QEMU's"
-	echo "license and therefore prevent redistribution."
-	echo
-	echo "To disable Archipelago, use --disable-archipelago"
-    else
-      if test "$archipelago" = "yes" ; then
-        feature_not_found "Archipelago backend support" "Install libxseg devel"
-      fi
-      archipelago="no"
-    fi
-fi
-
-
 ##########################################
 # glusterfs probe
 if test "$glusterfs" != "no" ; then
@@ -3231,8 +3087,7 @@ if test "$glusterfs" != "no" ; then
    fi
  else
    if test "$glusterfs" = "yes" ; then
-      feature_not_found "GlusterFS backend support" \
-          "Install glusterfs-api devel >= 3"
+      feature_not_found "GlusterFS backend support" "Install glusterfs-api devel"
    fi
    glusterfs="no"
  fi
@@ -3363,22 +3218,6 @@ EOF
  fi
 fi

-##########################################
-# tcmalloc probe
-
-if test "$tcmalloc" = "yes" ; then
-  cat > $TMPC << EOF
-#include <stdlib.h>
-int main(void) { malloc(1); return 0; }
-EOF
-
-  if compile_prog "" "-ltcmalloc" ; then
-    LIBS="-ltcmalloc $LIBS"
-  else
-    feature_not_found "tcmalloc" "install gperftools devel"
-  fi
-fi
-
 ##########################################
 # signalfd probe
 signalfd="no"
@@ -3438,37 +3277,6 @@ if compile_prog "" "" ; then
  fallocate_punch_hole=yes
 fi

-# check that fallocate supports range zeroing inside the file
-fallocate_zero_range=no
-cat > $TMPC << EOF
-#include <fcntl.h>
-#include <linux/falloc.h>
-
-int main(void)
-{
-    fallocate(0, FALLOC_FL_ZERO_RANGE, 0, 0);
-    return 0;
-}
-EOF
-if compile_prog "" "" ; then
-  fallocate_zero_range=yes
-fi
-
-# check for posix_fallocate
-posix_fallocate=no
-cat > $TMPC << EOF
-#include <fcntl.h>
-
-int main(void)
-{
-    posix_fallocate(0, 0, 0);
-    return 0;
-}
-EOF
-if compile_prog "" "" ; then
-    posix_fallocate=yes
-fi
-
 # check for sync_file_range
 sync_file_range=no
 cat > $TMPC << EOF
@@ -3613,37 +3421,6 @@ if compile_prog "" "" ; then
  sendfile=yes
 fi

-# check for timerfd support (glibc 2.8 and newer)
-timerfd=no
-cat > $TMPC << EOF
-#include <sys/timerfd.h>
-
-int main(void)
-{
-    return(timerfd_create(CLOCK_REALTIME, 0));
-}
-EOF
-if compile_prog "" "" ; then
-  timerfd=yes
-fi
-
-# check for setns and unshare support
-setns=no
-cat > $TMPC << EOF
-#include <sched.h>
-
-int main(void)
-{
-    int ret;
-    ret = setns(0, 0);
-    ret = unshare(0);
-    return ret;
-}
-EOF
-if compile_prog "" "" ; then
-  setns=yes
-fi
-
 # Check if tools are available to build documentation.
 if test "$docs" != "no" ; then
  if has makeinfo && has pod2man; then
@@ -3755,8 +3532,7 @@ EOF
    spice_server_version=$($pkg_config --modversion spice-server)
  else
    if test "$spice" = "yes" ; then
-      feature_not_found "spice" \
-          "Install spice-server(>=0.12.0) and spice-protocol(>=0.12.3) devel"
+      feature_not_found "spice" "Install spice-server and spice-protocol devel"
    fi
    spice="no"
  fi
@@ -3787,7 +3563,7 @@ EOF
        smartcard_nss="yes"
    else
        if test "$smartcard_nss" = "yes"; then
-            feature_not_found "nss" "Install nss devel >= 3.12.8"
+            feature_not_found "nss"
        fi
        smartcard_nss="no"
    fi
@@ -3803,7 +3579,7 @@ if test "$libusb" != "no" ; then
        libs_softmmu="$libs_softmmu $libusb_libs"
    else
        if test "$libusb" = "yes"; then
-            feature_not_found "libusb" "Install libusb devel >= 1.0.13"
+            feature_not_found "libusb" "Install libusb devel"
        fi
        libusb="no"
    fi
@@ -4117,11 +3893,12 @@ else
 fi

 ########################################
-# check if we have valgrind/valgrind.h
+# check if we have valgrind/valgrind.h and valgrind/memcheck.h

 valgrind_h=no
 cat > $TMPC << EOF
 #include <valgrind/valgrind.h>
+#include <valgrind/memcheck.h>
 int main(void) {
  return 0;
 }
@@ -4207,33 +3984,6 @@ if compile_prog "" "" ; then
    getauxval=yes
 fi

-########################################
-# check if ccache is interfering with
-# semantic analysis of macros
-
-ccache_cpp2=no
-cat > $TMPC << EOF
-static const int Z = 1;
-#define fn() ({ Z; })
-#define TAUT(X) ((X) == Z)
-#define PAREN(X, Y) (X == Y)
-#define ID(X) (X)
-int main(int argc, char *argv[])
-{
-    int x = 0, y = 0;
-    x = ID(x);
-    x = fn();
-    fn();
-    if (PAREN(x, y)) return 0;
-    if (TAUT(Z)) return 0;
-    return 0;
-}
-EOF
-
-if ! compile_object "-Werror"; then
-    ccache_cpp2=yes
-fi
-
 ##########################################
 # End of CC checks
 # After here, no more $cc or $ld runs
@@ -4254,7 +4004,7 @@ if test "$libnfs" != "no" ; then
    LIBS="$LIBS $libnfs_libs"
  else
    if test "$libnfs" = "yes" ; then
-      feature_not_found "libnfs" "Install libnfs devel >= 1.9.3"
+      feature_not_found "libnfs"
    fi
    libnfs="no"
  fi
@@ -4384,9 +4134,9 @@ EOF
  fi
 fi

-# prepend pixman and ftd flags after all config tests are done
-QEMU_CFLAGS="$pixman_cflags $fdt_cflags $QEMU_CFLAGS"
-libs_softmmu="$pixman_libs $libs_softmmu"
+# add pixman flags after all config tests are done
+QEMU_CFLAGS="$QEMU_CFLAGS $pixman_cflags $fdt_cflags"
+libs_softmmu="$libs_softmmu $pixman_libs"

 echo "Install prefix    $prefix"
 echo "BIOS directory    `eval echo $qemu_datadir`"
@@ -4455,9 +4205,6 @@ if test -n "$sparc_cpu"; then
    echo "Target Sparc Arch $sparc_cpu"
 fi
 echo "xen support       $xen"
-if test "$xen" = "yes" ; then
-  echo "xen ctrl version  $xen_ctrl_version"
-fi
 echo "brlapi support    $brlapi"
 echo "bluez  support    $bluez"
 echo "Documentation     $docs"
@@ -4495,7 +4242,7 @@ echo "xfsctl support    $xfs"
 echo "nss used          $smartcard_nss"
 echo "libusb            $libusb"
 echo "usb net redir     $usb_redir"
-echo "OpenGL support    $opengl"
+echo "GLX support       $glx"
 echo "libiscsi support  $libiscsi"
 echo "libnfs support    $libnfs"
 echo "build guest agent $guest_agent"
@@ -4504,7 +4251,7 @@ echo "seccomp support   $seccomp"
 echo "coroutine backend $coroutine"
 echo "coroutine pool    $coroutine_pool"
 echo "GlusterFS support $glusterfs"
-echo "Archipelago support $archipelago"
+echo "virtio-blk-data-plane $virtio_blk_data_plane"
 echo "gcov              $gcov_tool"
 echo "gcov enabled      $gcov"
 echo "TPM support       $tpm"
@@ -4515,9 +4262,7 @@ echo "vhdx              $vhdx"
 echo "Quorum            $quorum"
 echo "lzo support       $lzo"
 echo "snappy support    $snappy"
-echo "bzip2 support     $bzip2"
 echo "NUMA host support $numa"
-echo "tcmalloc support  $tcmalloc"

 if test "$sdl_too_old" = "yes"; then
 echo "-> Your SDL version is too old - please upgrade to have SDL support"
@@ -4715,12 +4460,6 @@ fi
 if test "$fallocate_punch_hole" = "yes" ; then
  echo "CONFIG_FALLOCATE_PUNCH_HOLE=y" >> $config_host_mak
 fi
-if test "$fallocate_zero_range" = "yes" ; then
-  echo "CONFIG_FALLOCATE_ZERO_RANGE=y" >> $config_host_mak
-fi
-if test "$posix_fallocate" = "yes" ; then
-  echo "CONFIG_POSIX_FALLOCATE=y" >> $config_host_mak
-fi
 if test "$sync_file_range" = "yes" ; then
  echo "CONFIG_SYNC_FILE_RANGE=y" >> $config_host_mak
 fi
@@ -4748,12 +4487,6 @@ fi
 if test "$sendfile" = "yes" ; then
  echo "CONFIG_SENDFILE=y" >> $config_host_mak
 fi
-if test "$timerfd" = "yes" ; then
-  echo "CONFIG_TIMERFD=y" >> $config_host_mak
-fi
-if test "$setns" = "yes" ; then
-  echo "CONFIG_SETNS=y" >> $config_host_mak
-fi
 if test "$inotify" = "yes" ; then
  echo "CONFIG_INOTIFY=y" >> $config_host_mak
 fi
@@ -4778,9 +4511,6 @@ if test "$bluez" = "yes" ; then
  echo "CONFIG_BLUEZ=y" >> $config_host_mak
  echo "BLUEZ_CFLAGS=$bluez_cflags" >> $config_host_mak
 fi
-if test "glib_subprocess" = "yes" ; then
-  echo "CONFIG_HAS_GLIB_SUBPROCESS_TESTS=y" >> $config_host_mak
-fi
 echo "GLIB_CFLAGS=$glib_cflags" >> $config_host_mak
 if test "$gtk" = "yes" ; then
  echo "CONFIG_GTK=y" >> $config_host_mak
@@ -4862,10 +4592,9 @@ if test "$usb_redir" = "yes" ; then
  echo "CONFIG_USB_REDIR=y" >> $config_host_mak
 fi

-if test "$opengl" = "yes" ; then
-  echo "CONFIG_OPENGL=y" >> $config_host_mak
-  echo "OPENGL_CFLAGS=$opengl_cflags" >> $config_host_mak
-  echo "OPENGL_LIBS=$opengl_libs" >> $config_host_mak
+if test "$glx" = "yes" ; then
+  echo "CONFIG_GLX=y" >> $config_host_mak
+  echo "GLX_LIBS=$glx_libs" >> $config_host_mak
 fi

 if test "$lzo" = "yes" ; then
@@ -4876,11 +4605,6 @@ if test "$snappy" = "yes" ; then
  echo "CONFIG_SNAPPY=y" >> $config_host_mak
 fi

-if test "$bzip2" = "yes" ; then
-  echo "CONFIG_BZIP2=y" >> $config_host_mak
-  echo "BZIP2_LIBS=-lbz2" >> $config_host_mak
-fi
-
 if test "$libiscsi" = "yes" ; then
  echo "CONFIG_LIBISCSI=m" >> $config_host_mak
  echo "LIBISCSI_CFLAGS=$libiscsi_cflags" >> $config_host_mak
@@ -4965,11 +4689,6 @@ if test "$glusterfs_zerofill" = "yes" ; then
  echo "CONFIG_GLUSTERFS_ZEROFILL=y" >> $config_host_mak
 fi

-if test "$archipelago" = "yes" ; then
-  echo "CONFIG_ARCHIPELAGO=m" >> $config_host_mak
-  echo "ARCHIPELAGO_LIBS=$archipelago_libs" >> $config_host_mak
-fi
-
 if test "$libssh2" = "yes" ; then
  echo "CONFIG_LIBSSH2=m" >> $config_host_mak
  echo "LIBSSH2_CFLAGS=$libssh2_cflags" >> $config_host_mak
@@ -4980,6 +4699,10 @@ if test "$quorum" = "yes" ; then
  echo "CONFIG_QUORUM=y" >> $config_host_mak
 fi

+if test "$virtio_blk_data_plane" = "yes" ; then
+  echo 'CONFIG_VIRTIO_BLK_DATA_PLANE=$(CONFIG_VIRTIO)' >> $config_host_mak
+fi
+
 if test "$vhdx" = "yes" ; then
  echo "CONFIG_VHDX=y" >> $config_host_mak
 fi
@@ -5086,7 +4809,6 @@ echo "AS=$as" >> $config_host_mak
 echo "CPP=$cpp" >> $config_host_mak
 echo "OBJCOPY=$objcopy" >> $config_host_mak
 echo "LD=$ld" >> $config_host_mak
-echo "NM=$nm" >> $config_host_mak
 echo "WINDRES=$windres" >> $config_host_mak
 echo "LIBTOOL=$libtool" >> $config_host_mak
 echo "CFLAGS=$CFLAGS" >> $config_host_mak
@@ -5095,8 +4817,6 @@ echo "QEMU_CFLAGS=$QEMU_CFLAGS" >> $config_host_mak
 echo "QEMU_INCLUDES=$QEMU_INCLUDES" >> $config_host_mak
 if test "$sparse" = "yes" ; then
  echo "CC           := REAL_CC=\"\$(CC)\" cgcc"       >> $config_host_mak
-  echo "CPP          := REAL_CC=\"\$(CPP)\" cgcc"      >> $config_host_mak
-  echo "CXX          := REAL_CC=\"\$(CXX)\" cgcc"      >> $config_host_mak
  echo "HOST_CC      := REAL_CC=\"\$(HOST_CC)\" cgcc"  >> $config_host_mak
  echo "QEMU_CFLAGS  += -Wbitwise -Wno-transparent-union -Wno-old-initializer -Wno-non-pointer-null" >> $config_host_mak
 fi
@@ -5217,7 +4937,7 @@ case "$target_name" in
  aarch64)
    TARGET_BASE_ARCH=arm
    bflt="yes"
-    gdb_xml_files="aarch64-core.xml aarch64-fpu.xml arm-core.xml arm-vfp.xml arm-vfp3.xml arm-neon.xml"
+    gdb_xml_files="aarch64-core.xml aarch64-fpu.xml"
  ;;
  cris)
  ;;
@@ -5294,9 +5014,6 @@ case "$target_name" in
    echo "TARGET_ABI32=y" >> $config_target_mak
  ;;
  s390x)
-    gdb_xml_files="s390x-core64.xml s390-acr.xml s390-fpr.xml s390-vx.xml"
-  ;;
-  tricore)
  ;;
  unicore32)
  ;;
@@ -5351,9 +5068,7 @@ case "$target_name" in
      \( "$target_name" = "ppcemb" -a "$cpu" = "ppc64" \) -o \
      \( "$target_name" = "mipsel" -a "$cpu" = "mips" \) -o \
      \( "$target_name" = "x86_64" -a "$cpu" = "i386"   \) -o \
-      \( "$target_name" = "i386"   -a "$cpu" = "x86_64" \) -o \
-      \( "$target_name" = "x86_64" -a "$cpu" = "x32"   \) -o \
-      \( "$target_name" = "i386"   -a "$cpu" = "x32" \) \) ; then
+      \( "$target_name" = "i386"   -a "$cpu" = "x86_64" \) \) ; then
      echo "CONFIG_KVM=y" >> $config_target_mak
      if test "$vhost_net" = "yes" ; then
        echo "CONFIG_VHOST_NET=y" >> $config_target_mak
@@ -5527,10 +5242,6 @@ if test "$numa" = "yes"; then
  echo "CONFIG_NUMA=y" >> $config_host_mak
 fi

-if test "$ccache_cpp2" = "yes"; then
-  echo "export CCACHE_CPP2=y" >> $config_host_mak
-fi
-
 # build tree in object directory in case the source is not in the current directory
 DIRS="tests tests/tcg tests/tcg/cris tests/tcg/lm32 tests/libqos tests/qapi-schema tests/tcg/xtensa tests/qemu-iotests"
 DIRS="$DIRS fsdev"
@@ -5582,6 +5293,10 @@ for rom in seabios vgabios ; do
    echo "LD=$ld" >> $config_mak
 done

+if test "$docs" = "yes" ; then
+  mkdir -p QMP
+fi
+
 # set up qemu-iotests in this build directory
 iotests_common_env="tests/qemu-iotests/common.env"
 iotests_check="tests/qemu-iotests/check"
--- a/coroutine-sigaltstack.c
+++ b/coroutine-sigaltstack.c
@@ -155,7 +155,7 @@ Coroutine *qemu_coroutine_new(void)
    stack_t oss;
    sigset_t sigs;
    sigset_t osigs;
-    sigjmp_buf old_env;
+    jmp_buf old_env;

    /* The way to manipulate stack is with the sigaltstack function. We
     * prepare a stack, with it delivering a signal to ourselves and then
--- a/coroutine-ucontext.c
+++ b/coroutine-ucontext.c
@@ -25,6 +25,7 @@
 #include <stdlib.h>
 #include <setjmp.h>
 #include <stdint.h>
+#include <pthread.h>
 #include <ucontext.h>
 #include "qemu-common.h"
 #include "block/coroutine_int.h"
@@ -47,8 +48,15 @@ typedef struct {
 /**
 * Per-thread coroutine bookkeeping
 */
-static __thread CoroutineUContext leader;
-static __thread Coroutine *current;
+typedef struct {
+    /** Currently executing coroutine */
+    Coroutine *current;
+
+    /** The default coroutine */
+    CoroutineUContext leader;
+} CoroutineThreadState;
+
+static pthread_key_t thread_state_key;

 /*
 * va_args to makecontext() must be type 'int', so passing
@@ -60,6 +68,36 @@ union cc_arg {
    int i[2];
 };

+static CoroutineThreadState *coroutine_get_thread_state(void)
+{
+    CoroutineThreadState *s = pthread_getspecific(thread_state_key);
+
+    if (!s) {
+        s = g_malloc0(sizeof(*s));
+        s->current = &s->leader.base;
+        pthread_setspecific(thread_state_key, s);
+    }
+    return s;
+}
+
+static void qemu_coroutine_thread_cleanup(void *opaque)
+{
+    CoroutineThreadState *s = opaque;
+
+    g_free(s);
+}
+
+static void __attribute__((constructor)) coroutine_init(void)
+{
+    int ret;
+
+    ret = pthread_key_create(&thread_state_key, qemu_coroutine_thread_cleanup);
+    if (ret != 0) {
+        fprintf(stderr, "unable to create leader key: %s\n", strerror(errno));
+        abort();
+    }
+}
+
 static void coroutine_trampoline(int i0, int i1)
 {
    union cc_arg arg;
@@ -155,23 +193,15 @@ void qemu_coroutine_delete(Coroutine *co_)
    g_free(co);
 }

-/* This function is marked noinline to prevent GCC from inlining it
- * into coroutine_trampoline(). If we allow it to do that then it
- * hoists the code to get the address of the TLS variable "current"
- * out of the while() loop. This is an invalid transformation because
- * the sigsetjmp() call may be called when running thread A but
- * return in thread B, and so we might be in a different thread
- * context each time round the loop.
- */
-CoroutineAction __attribute__((noinline))
-qemu_coroutine_switch(Coroutine *from_, Coroutine *to_,
-                      CoroutineAction action)
+CoroutineAction qemu_coroutine_switch(Coroutine *from_, Coroutine *to_,
+                                      CoroutineAction action)
 {
    CoroutineUContext *from = DO_UPCAST(CoroutineUContext, base, from_);
    CoroutineUContext *to = DO_UPCAST(CoroutineUContext, base, to_);
+    CoroutineThreadState *s = coroutine_get_thread_state();
    int ret;

-    current = to_;
+    s->current = to_;

    ret = sigsetjmp(from->env, 0);
    if (ret == 0) {
@@ -182,13 +212,14 @@ qemu_coroutine_switch(Coroutine *from_, Coroutine *to_,

 Coroutine *qemu_coroutine_self(void)
 {
-    if (!current) {
-        current = &leader.base;
-    }
-    return current;
+    CoroutineThreadState *s = coroutine_get_thread_state();
+
+    return s->current;
 }

 bool qemu_in_coroutine(void)
 {
-    return current && current->caller;
+    CoroutineThreadState *s = pthread_getspecific(thread_state_key);
+
+    return s && s->current->caller;
 }
--- a/cpu-exec.c
+++ b/cpu-exec.c
@@ -18,114 +18,10 @@
 */
 #include "config.h"
 #include "cpu.h"
-#include "trace.h"
 #include "disas/disas.h"
 #include "tcg.h"
 #include "qemu/atomic.h"
 #include "sysemu/qtest.h"
-#include "qemu/timer.h"
-#include "exec/address-spaces.h"
-#include "exec/memory-internal.h"
-#include "qemu/rcu.h"
-
-/* -icount align implementation. */
-
-typedef struct SyncClocks {
-    int64_t diff_clk;
-    int64_t last_cpu_icount;
-    int64_t realtime_clock;
-} SyncClocks;
-
-#if !defined(CONFIG_USER_ONLY)
-/* Allow the guest to have a max 3ms advance.
- * The difference between the 2 clocks could therefore
- * oscillate around 0.
- */
-#define VM_CLOCK_ADVANCE 3000000
-#define THRESHOLD_REDUCE 1.5
-#define MAX_DELAY_PRINT_RATE 2000000000LL
-#define MAX_NB_PRINTS 100
-
-static void align_clocks(SyncClocks *sc, const CPUState *cpu)
-{
-    int64_t cpu_icount;
-
-    if (!icount_align_option) {
-        return;
-    }
-
-    cpu_icount = cpu->icount_extra + cpu->icount_decr.u16.low;
-    sc->diff_clk += cpu_icount_to_ns(sc->last_cpu_icount - cpu_icount);
-    sc->last_cpu_icount = cpu_icount;
-
-    if (sc->diff_clk > VM_CLOCK_ADVANCE) {
-#ifndef _WIN32
-        struct timespec sleep_delay, rem_delay;
-        sleep_delay.tv_sec = sc->diff_clk / 1000000000LL;
-        sleep_delay.tv_nsec = sc->diff_clk % 1000000000LL;
-        if (nanosleep(&sleep_delay, &rem_delay) < 0) {
-            sc->diff_clk = rem_delay.tv_sec * 1000000000LL + rem_delay.tv_nsec;
-        } else {
-            sc->diff_clk = 0;
-        }
-#else
-        Sleep(sc->diff_clk / SCALE_MS);
-        sc->diff_clk = 0;
-#endif
-    }
-}
-
-static void print_delay(const SyncClocks *sc)
-{
-    static float threshold_delay;
-    static int64_t last_realtime_clock;
-    static int nb_prints;
-
-    if (icount_align_option &&
-        sc->realtime_clock - last_realtime_clock >= MAX_DELAY_PRINT_RATE &&
-        nb_prints < MAX_NB_PRINTS) {
-        if ((-sc->diff_clk / (float)1000000000LL > threshold_delay) ||
-            (-sc->diff_clk / (float)1000000000LL <
-             (threshold_delay - THRESHOLD_REDUCE))) {
-            threshold_delay = (-sc->diff_clk / 1000000000LL) + 1;
-            printf("Warning: The guest is now late by %.1f to %.1f seconds\n",
-                   threshold_delay - 1,
-                   threshold_delay);
-            nb_prints++;
-            last_realtime_clock = sc->realtime_clock;
-        }
-    }
-}
-
-static void init_delay_params(SyncClocks *sc,
-                              const CPUState *cpu)
-{
-    if (!icount_align_option) {
-        return;
-    }
-    sc->realtime_clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
-    sc->diff_clk = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) - sc->realtime_clock;
-    sc->last_cpu_icount = cpu->icount_extra + cpu->icount_decr.u16.low;
-    if (sc->diff_clk < max_delay) {
-        max_delay = sc->diff_clk;
-    }
-    if (sc->diff_clk > max_advance) {
-        max_advance = sc->diff_clk;
-    }
-
-    /* Print every 2s max if the guest is late. We limit the number
-       of printed messages to NB_PRINT_MAX(currently 100) */
-    print_delay(sc);
-}
-#else
-static void align_clocks(SyncClocks *sc, const CPUState *cpu)
-{
-}
-
-static void init_delay_params(SyncClocks *sc, const CPUState *cpu)
-{
-}
-#endif /* CONFIG USER ONLY */

 void cpu_loop_exit(CPUState *cpu)
 {
@@ -144,33 +40,6 @@ void cpu_resume_from_signal(CPUState *cpu, void *puc)
    cpu->exception_index = -1;
    siglongjmp(cpu->jmp_env, 1);
 }
-
-void cpu_reload_memory_map(CPUState *cpu)
-{
-    AddressSpaceDispatch *d;
-
-    if (qemu_in_vcpu_thread()) {
-        /* Do not let the guest prolong the critical section as much as it
-         * as it desires.
-         *
-         * Currently, this is prevented by the I/O thread's periodinc kicking
-         * of the VCPU thread (iothread_requesting_mutex, qemu_cpu_kick_thread)
-         * but this will go away once TCG's execution moves out of the global
-         * mutex.
-         *
-         * This pair matches cpu_exec's rcu_read_lock()/rcu_read_unlock(), which
-         * only protects cpu->as->dispatch.  Since we reload it below, we can
-         * split the critical section.
-         */
-        rcu_read_unlock();
-        rcu_read_lock();
-    }
-
-    /* The CPU and TLB are protected by the iothread lock.  */
-    d = atomic_rcu_read(&cpu->as->dispatch);
-    cpu->memory_dispatch = d;
-    tlb_flush(cpu, 1);
-}
 #endif

 /* Execute a TB, and fix up the CPU state afterwards if necessary */
@@ -195,12 +64,7 @@ static inline tcg_target_ulong cpu_tb_exec(CPUState *cpu, uint8_t *tb_ptr)
    }
 #endif /* DEBUG_DISAS */

-    cpu->can_do_io = 0;
    next_tb = tcg_qemu_tb_exec(env, tb_ptr);
-    cpu->can_do_io = 1;
-    trace_exec_tb_exit((void *) (next_tb & ~TB_EXIT_MASK),
-                       next_tb & TB_EXIT_MASK);
-
    if ((next_tb & TB_EXIT_MASK) > TB_EXIT_IDX1) {
        /* We didn't start executing this TB (eg because the instruction
         * counter hit zero); we must restore the guest PC to the address
@@ -231,22 +95,16 @@ static void cpu_exec_nocache(CPUArchState *env, int max_cycles,
 {
    CPUState *cpu = ENV_GET_CPU(env);
    TranslationBlock *tb;
-    target_ulong pc = orig_tb->pc;
-    target_ulong cs_base = orig_tb->cs_base;
-    uint64_t flags = orig_tb->flags;

    /* Should never happen.
       We only end up here when an existing TB is too long.  */
    if (max_cycles > CF_COUNT_MASK)
        max_cycles = CF_COUNT_MASK;

-    /* tb_gen_code can flush our orig_tb, invalidate it now */
-    tb_phys_invalidate(orig_tb, -1);
-    tb = tb_gen_code(cpu, pc, cs_base, flags,
-                     max_cycles | CF_NOCACHE);
+    tb = tb_gen_code(cpu, orig_tb->pc, orig_tb->cs_base, orig_tb->flags,
+                     max_cycles);
    cpu->current_tb = tb;
    /* execute the generated code */
-    trace_exec_tb_nocache(tb, tb->pc);
    cpu_tb_exec(cpu, tb->tc_ptr);
    cpu->current_tb = NULL;
    tb_phys_invalidate(tb, -1);
@@ -329,10 +187,16 @@ static inline TranslationBlock *tb_find_fast(CPUArchState *env)
    return tb;
 }

+static CPUDebugExcpHandler *debug_excp_handler;
+
+void cpu_set_debug_excp_handler(CPUDebugExcpHandler *handler)
+{
+    debug_excp_handler = handler;
+}
+
 static void cpu_handle_debug_exception(CPUArchState *env)
 {
    CPUState *cpu = ENV_GET_CPU(env);
-    CPUClass *cc = CPU_GET_CLASS(cpu);
    CPUWatchpoint *wp;

    if (!cpu->watchpoint_hit) {
@@ -340,8 +204,9 @@ static void cpu_handle_debug_exception(CPUArchState *env)
            wp->flags &= ~BP_WATCHPOINT_HIT;
        }
    }
-
-    cc->debug_excp_handler(cpu);
+    if (debug_excp_handler) {
+        debug_excp_handler(env);
+    }
 }

 /* main execution loop */
@@ -351,7 +216,10 @@ volatile sig_atomic_t exit_request;
 int cpu_exec(CPUArchState *env)
 {
    CPUState *cpu = ENV_GET_CPU(env);
+#if !(defined(CONFIG_USER_ONLY) && \
+      (defined(TARGET_M68K) || defined(TARGET_PPC) || defined(TARGET_S390X)))
    CPUClass *cc = CPU_GET_CLASS(cpu);
+#endif
 #ifdef TARGET_I386
    X86CPU *x86_cpu = X86_CPU(cpu);
 #endif
@@ -359,8 +227,6 @@ int cpu_exec(CPUArchState *env)
    TranslationBlock *tb;
    uint8_t *tc_ptr;
    uintptr_t next_tb;
-    SyncClocks sc;
-
    /* This must be volatile so it is not trashed by longjmp() */
    volatile bool have_tb_lock = false;

@@ -382,20 +248,40 @@ int cpu_exec(CPUArchState *env)
     * an instruction scheduling constraint on modern architectures.  */
    smp_mb();

-    rcu_read_lock();
-
    if (unlikely(exit_request)) {
        cpu->exit_request = 1;
    }

-    cc->cpu_exec_enter(cpu);
-
-    /* Calculate difference between guest clock and host clock.
-     * This delay includes the delay of the last cycle, so
-     * what we have to do is sleep until it is 0. As for the
-     * advance/delay we gain here, we try to fix it next time.
-     */
-    init_delay_params(&sc, cpu);
+#if defined(TARGET_I386)
+    /* put eflags in CPU temporary format */
+    CC_SRC = env->eflags & (CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C);
+    env->df = 1 - (2 * ((env->eflags >> 10) & 1));
+    CC_OP = CC_OP_EFLAGS;
+    env->eflags &= ~(DF_MASK | CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C);
+#elif defined(TARGET_SPARC)
+#elif defined(TARGET_M68K)
+    env->cc_op = CC_OP_FLAGS;
+    env->cc_dest = env->sr & 0xf;
+    env->cc_x = (env->sr >> 4) & 1;
+#elif defined(TARGET_ALPHA)
+#elif defined(TARGET_ARM)
+#elif defined(TARGET_UNICORE32)
+#elif defined(TARGET_PPC)
+    env->reserve_addr = -1;
+#elif defined(TARGET_LM32)
+#elif defined(TARGET_MICROBLAZE)
+#elif defined(TARGET_MIPS)
+#elif defined(TARGET_MOXIE)
+#elif defined(TARGET_OPENRISC)
+#elif defined(TARGET_SH4)
+#elif defined(TARGET_CRIS)
+#elif defined(TARGET_S390X)
+#elif defined(TARGET_XTENSA)
+    /* XXXXX */
+#else
+#error unsupported target CPU
+#endif
+    cpu->exception_index = -1;

    /* prepare setjmp context for exception handling */
    for(;;) {
@@ -408,7 +294,6 @@ int cpu_exec(CPUArchState *env)
                    if (ret == EXCP_DEBUG) {
                        cpu_handle_debug_exception(env);
                    }
-                    cpu->exception_index = -1;
                    break;
                } else {
 #if defined(CONFIG_USER_ONLY)
@@ -419,7 +304,6 @@ int cpu_exec(CPUArchState *env)
                    cc->do_interrupt(cpu);
 #endif
                    ret = cpu->exception_index;
-                    cpu->exception_index = -1;
                    break;
 #else
                    cc->do_interrupt(cpu);
@@ -441,12 +325,16 @@ int cpu_exec(CPUArchState *env)
                        cpu->exception_index = EXCP_DEBUG;
                        cpu_loop_exit(cpu);
                    }
+#if defined(TARGET_ARM) || defined(TARGET_SPARC) || defined(TARGET_MIPS) || \
+    defined(TARGET_PPC) || defined(TARGET_ALPHA) || defined(TARGET_CRIS) || \
+    defined(TARGET_MICROBLAZE) || defined(TARGET_LM32) || defined(TARGET_UNICORE32)
                    if (interrupt_request & CPU_INTERRUPT_HALT) {
                        cpu->interrupt_request &= ~CPU_INTERRUPT_HALT;
                        cpu->halted = 1;
                        cpu->exception_index = EXCP_HLT;
                        cpu_loop_exit(cpu);
                    }
+#endif
 #if defined(TARGET_I386)
                    if (interrupt_request & CPU_INTERRUPT_INIT) {
                        cpu_svm_check_intercept_param(env, SVM_EXIT_INIT, 0);
@@ -459,15 +347,251 @@ int cpu_exec(CPUArchState *env)
                        cpu_reset(cpu);
                    }
 #endif
-                    /* The target hook has 3 exit conditions:
-                       False when the interrupt isn't processed,
-                       True when it is, and we should restart on a new TB,
-                       and via longjmp via cpu_loop_exit.  */
-                    if (cc->cpu_exec_interrupt(cpu, interrupt_request)) {
+#if defined(TARGET_I386)
+#if !defined(CONFIG_USER_ONLY)
+                    if (interrupt_request & CPU_INTERRUPT_POLL) {
+                        cpu->interrupt_request &= ~CPU_INTERRUPT_POLL;
+                        apic_poll_irq(x86_cpu->apic_state);
+                    }
+#endif
+                    if (interrupt_request & CPU_INTERRUPT_SIPI) {
+                            do_cpu_sipi(x86_cpu);
+                    } else if (env->hflags2 & HF2_GIF_MASK) {
+                        if ((interrupt_request & CPU_INTERRUPT_SMI) &&
+                            !(env->hflags & HF_SMM_MASK)) {
+                            cpu_svm_check_intercept_param(env, SVM_EXIT_SMI,
+                                                          0);
+                            cpu->interrupt_request &= ~CPU_INTERRUPT_SMI;
+                            do_smm_enter(x86_cpu);
+                            next_tb = 0;
+                        } else if ((interrupt_request & CPU_INTERRUPT_NMI) &&
+                                   !(env->hflags2 & HF2_NMI_MASK)) {
+                            cpu->interrupt_request &= ~CPU_INTERRUPT_NMI;
+                            env->hflags2 |= HF2_NMI_MASK;
+                            do_interrupt_x86_hardirq(env, EXCP02_NMI, 1);
+                            next_tb = 0;
+                        } else if (interrupt_request & CPU_INTERRUPT_MCE) {
+                            cpu->interrupt_request &= ~CPU_INTERRUPT_MCE;
+                            do_interrupt_x86_hardirq(env, EXCP12_MCHK, 0);
+                            next_tb = 0;
+                        } else if ((interrupt_request & CPU_INTERRUPT_HARD) &&
+                                   (((env->hflags2 & HF2_VINTR_MASK) && 
+                                     (env->hflags2 & HF2_HIF_MASK)) ||
+                                    (!(env->hflags2 & HF2_VINTR_MASK) && 
+                                     (env->eflags & IF_MASK && 
+                                      !(env->hflags & HF_INHIBIT_IRQ_MASK))))) {
+                            int intno;
+                            cpu_svm_check_intercept_param(env, SVM_EXIT_INTR,
+                                                          0);
+                            cpu->interrupt_request &= ~(CPU_INTERRUPT_HARD |
+                                                        CPU_INTERRUPT_VIRQ);
+                            intno = cpu_get_pic_interrupt(env);
+                            qemu_log_mask(CPU_LOG_TB_IN_ASM, "Servicing hardware INT=0x%02x\n", intno);
+                            do_interrupt_x86_hardirq(env, intno, 1);
+                            /* ensure that no TB jump will be modified as
+                               the program flow was changed */
+                            next_tb = 0;
+#if !defined(CONFIG_USER_ONLY)
+                        } else if ((interrupt_request & CPU_INTERRUPT_VIRQ) &&
+                                   (env->eflags & IF_MASK) && 
+                                   !(env->hflags & HF_INHIBIT_IRQ_MASK)) {
+                            int intno;
+                            /* FIXME: this should respect TPR */
+                            cpu_svm_check_intercept_param(env, SVM_EXIT_VINTR,
+                                                          0);
+                            intno = ldl_phys(cpu->as,
+                                             env->vm_vmcb
+                                             + offsetof(struct vmcb,
+                                                        control.int_vector));
+                            qemu_log_mask(CPU_LOG_TB_IN_ASM, "Servicing virtual hardware INT=0x%02x\n", intno);
+                            do_interrupt_x86_hardirq(env, intno, 1);
+                            cpu->interrupt_request &= ~CPU_INTERRUPT_VIRQ;
+                            next_tb = 0;
+#endif
+                        }
+                    }
+#elif defined(TARGET_PPC)
+                    if (interrupt_request & CPU_INTERRUPT_HARD) {
+                        ppc_hw_interrupt(env);
+                        if (env->pending_interrupts == 0) {
+                            cpu->interrupt_request &= ~CPU_INTERRUPT_HARD;
+                        }
                        next_tb = 0;
                    }
-                    /* Don't use the cached interrupt_request value,
-                       do_interrupt may have updated the EXITTB flag. */
+#elif defined(TARGET_LM32)
+                    if ((interrupt_request & CPU_INTERRUPT_HARD)
+                        && (env->ie & IE_IE)) {
+                        cpu->exception_index = EXCP_IRQ;
+                        cc->do_interrupt(cpu);
+                        next_tb = 0;
+                    }
+#elif defined(TARGET_MICROBLAZE)
+                    if ((interrupt_request & CPU_INTERRUPT_HARD)
+                        && (env->sregs[SR_MSR] & MSR_IE)
+                        && !(env->sregs[SR_MSR] & (MSR_EIP | MSR_BIP))
+                        && !(env->iflags & (D_FLAG | IMM_FLAG))) {
+                        cpu->exception_index = EXCP_IRQ;
+                        cc->do_interrupt(cpu);
+                        next_tb = 0;
+                    }
+#elif defined(TARGET_MIPS)
+                    if ((interrupt_request & CPU_INTERRUPT_HARD) &&
+                        cpu_mips_hw_interrupts_pending(env)) {
+                        /* Raise it */
+                        cpu->exception_index = EXCP_EXT_INTERRUPT;
+                        env->error_code = 0;
+                        cc->do_interrupt(cpu);
+                        next_tb = 0;
+                    }
+#elif defined(TARGET_OPENRISC)
+                    {
+                        int idx = -1;
+                        if ((interrupt_request & CPU_INTERRUPT_HARD)
+                            && (env->sr & SR_IEE)) {
+                            idx = EXCP_INT;
+                        }
+                        if ((interrupt_request & CPU_INTERRUPT_TIMER)
+                            && (env->sr & SR_TEE)) {
+                            idx = EXCP_TICK;
+                        }
+                        if (idx >= 0) {
+                            cpu->exception_index = idx;
+                            cc->do_interrupt(cpu);
+                            next_tb = 0;
+                        }
+                    }
+#elif defined(TARGET_SPARC)
+                    if (interrupt_request & CPU_INTERRUPT_HARD) {
+                        if (cpu_interrupts_enabled(env) &&
+                            env->interrupt_index > 0) {
+                            int pil = env->interrupt_index & 0xf;
+                            int type = env->interrupt_index & 0xf0;
+
+                            if (((type == TT_EXTINT) &&
+                                  cpu_pil_allowed(env, pil)) ||
+                                  type != TT_EXTINT) {
+                                cpu->exception_index = env->interrupt_index;
+                                cc->do_interrupt(cpu);
+                                next_tb = 0;
+                            }
+                        }
+                    }
+#elif defined(TARGET_ARM)
+                    if (interrupt_request & CPU_INTERRUPT_FIQ
+                        && !(env->daif & PSTATE_F)) {
+                        cpu->exception_index = EXCP_FIQ;
+                        cc->do_interrupt(cpu);
+                        next_tb = 0;
+                    }
+                    /* ARMv7-M interrupt return works by loading a magic value
+                       into the PC.  On real hardware the load causes the
+                       return to occur.  The qemu implementation performs the
+                       jump normally, then does the exception return when the
+                       CPU tries to execute code at the magic address.
+                       This will cause the magic PC value to be pushed to
+                       the stack if an interrupt occurred at the wrong time.
+                       We avoid this by disabling interrupts when
+                       pc contains a magic address.  */
+                    if (interrupt_request & CPU_INTERRUPT_HARD
+                        && ((IS_M(env) && env->regs[15] < 0xfffffff0)
+                            || !(env->daif & PSTATE_I))) {
+                        cpu->exception_index = EXCP_IRQ;
+                        cc->do_interrupt(cpu);
+                        next_tb = 0;
+                    }
+#elif defined(TARGET_UNICORE32)
+                    if (interrupt_request & CPU_INTERRUPT_HARD
+                        && !(env->uncached_asr & ASR_I)) {
+                        cpu->exception_index = UC32_EXCP_INTR;
+                        cc->do_interrupt(cpu);
+                        next_tb = 0;
+                    }
+#elif defined(TARGET_SH4)
+                    if (interrupt_request & CPU_INTERRUPT_HARD) {
+                        cc->do_interrupt(cpu);
+                        next_tb = 0;
+                    }
+#elif defined(TARGET_ALPHA)
+                    {
+                        int idx = -1;
+                        /* ??? This hard-codes the OSF/1 interrupt levels.  */
+                        switch (env->pal_mode ? 7 : env->ps & PS_INT_MASK) {
+                        case 0 ... 3:
+                            if (interrupt_request & CPU_INTERRUPT_HARD) {
+                                idx = EXCP_DEV_INTERRUPT;
+                            }
+                            /* FALLTHRU */
+                        case 4:
+                            if (interrupt_request & CPU_INTERRUPT_TIMER) {
+                                idx = EXCP_CLK_INTERRUPT;
+                            }
+                            /* FALLTHRU */
+                        case 5:
+                            if (interrupt_request & CPU_INTERRUPT_SMP) {
+                                idx = EXCP_SMP_INTERRUPT;
+                            }
+                            /* FALLTHRU */
+                        case 6:
+                            if (interrupt_request & CPU_INTERRUPT_MCHK) {
+                                idx = EXCP_MCHK;
+                            }
+                        }
+                        if (idx >= 0) {
+                            cpu->exception_index = idx;
+                            env->error_code = 0;
+                            cc->do_interrupt(cpu);
+                            next_tb = 0;
+                        }
+                    }
+#elif defined(TARGET_CRIS)
+                    if (interrupt_request & CPU_INTERRUPT_HARD
+                        && (env->pregs[PR_CCS] & I_FLAG)
+                        && !env->locked_irq) {
+                        cpu->exception_index = EXCP_IRQ;
+                        cc->do_interrupt(cpu);
+                        next_tb = 0;
+                    }
+                    if (interrupt_request & CPU_INTERRUPT_NMI) {
+                        unsigned int m_flag_archval;
+                        if (env->pregs[PR_VR] < 32) {
+                            m_flag_archval = M_FLAG_V10;
+                        } else {
+                            m_flag_archval = M_FLAG_V32;
+                        }
+                        if ((env->pregs[PR_CCS] & m_flag_archval)) {
+                            cpu->exception_index = EXCP_NMI;
+                            cc->do_interrupt(cpu);
+                            next_tb = 0;
+                        }
+                    }
+#elif defined(TARGET_M68K)
+                    if (interrupt_request & CPU_INTERRUPT_HARD
+                        && ((env->sr & SR_I) >> SR_I_SHIFT)
+                            < env->pending_level) {
+                        /* Real hardware gets the interrupt vector via an
+                           IACK cycle at this point.  Current emulated
+                           hardware doesn't rely on this, so we
+                           provide/save the vector when the interrupt is
+                           first signalled.  */
+                        cpu->exception_index = env->pending_vector;
+                        do_interrupt_m68k_hardirq(env);
+                        next_tb = 0;
+                    }
+#elif defined(TARGET_S390X) && !defined(CONFIG_USER_ONLY)
+                    if ((interrupt_request & CPU_INTERRUPT_HARD) &&
+                        (env->psw.mask & PSW_MASK_EXT)) {
+                        cc->do_interrupt(cpu);
+                        next_tb = 0;
+                    }
+#elif defined(TARGET_XTENSA)
+                    if (interrupt_request & CPU_INTERRUPT_HARD) {
+                        cpu->exception_index = EXC_IRQ;
+                        cc->do_interrupt(cpu);
+                        next_tb = 0;
+                    }
+#endif
+                   /* Don't use the cached interrupt_request value,
+                      do_interrupt may have updated the EXITTB flag. */
                    if (cpu->interrupt_request & CPU_INTERRUPT_EXITTB) {
                        cpu->interrupt_request &= ~CPU_INTERRUPT_EXITTB;
                        /* ensure that no TB jump will be modified as
@@ -513,7 +637,6 @@ int cpu_exec(CPUArchState *env)
                cpu->current_tb = tb;
                barrier();
                if (likely(!cpu->exit_request)) {
-                    trace_exec_tb(tb, tb->pc);
                    tc_ptr = tb->tc_ptr;
                    /* execute the generated code */
                    next_tb = cpu_tb_exec(cpu, tc_ptr);
@@ -526,24 +649,29 @@ int cpu_exec(CPUArchState *env)
                         * interrupt_request) which we will handle
                         * next time around the loop.
                         */
+                        tb = (TranslationBlock *)(next_tb & ~TB_EXIT_MASK);
                        next_tb = 0;
                        break;
                    case TB_EXIT_ICOUNT_EXPIRED:
                    {
                        /* Instruction counter expired.  */
-                        int insns_left = cpu->icount_decr.u32;
+                        int insns_left;
+                        tb = (TranslationBlock *)(next_tb & ~TB_EXIT_MASK);
+                        insns_left = cpu->icount_decr.u32;
                        if (cpu->icount_extra && insns_left >= 0) {
                            /* Refill decrementer and continue execution.  */
                            cpu->icount_extra += insns_left;
-                            insns_left = MIN(0xffff, cpu->icount_extra);
+                            if (cpu->icount_extra > 0xffff) {
+                                insns_left = 0xffff;
+                            } else {
+                                insns_left = cpu->icount_extra;
+                            }
                            cpu->icount_extra -= insns_left;
                            cpu->icount_decr.u16.low = insns_left;
                        } else {
                            if (insns_left > 0) {
                                /* Execute remaining instructions.  */
-                                tb = (TranslationBlock *)(next_tb & ~TB_EXIT_MASK);
                                cpu_exec_nocache(env, insns_left, tb);
-                                align_clocks(&sc, cpu);
                            }
                            cpu->exception_index = EXCP_INTERRUPT;
                            next_tb = 0;
@@ -556,9 +684,6 @@ int cpu_exec(CPUArchState *env)
                    }
                }
                cpu->current_tb = NULL;
-                /* Try to align the host and virtual clocks
-                   if the guest is in advance */
-                align_clocks(&sc, cpu);
                /* reset soft MMU for next block (it can currently
                   only be set by a memory fault) */
            } /* for(;;) */
@@ -567,8 +692,10 @@ int cpu_exec(CPUArchState *env)
             * local variables as longjmp is marked 'noreturn'. */
            cpu = current_cpu;
            env = cpu->env_ptr;
+#if !(defined(CONFIG_USER_ONLY) && \
+      (defined(TARGET_M68K) || defined(TARGET_PPC) || defined(TARGET_S390X)))
            cc = CPU_GET_CLASS(cpu);
-            cpu->can_do_io = 1;
+#endif
 #ifdef TARGET_I386
            x86_cpu = X86_CPU(cpu);
 #endif
@@ -579,8 +706,35 @@ int cpu_exec(CPUArchState *env)
        }
    } /* for(;;) */

-    cc->cpu_exec_exit(cpu);
-    rcu_read_unlock();
+
+#if defined(TARGET_I386)
+    /* restore flags in standard format */
+    env->eflags = env->eflags | cpu_cc_compute_all(env, CC_OP)
+        | (env->df & DF_MASK);
+#elif defined(TARGET_ARM)
+    /* XXX: Save/restore host fpu exception state?.  */
+#elif defined(TARGET_UNICORE32)
+#elif defined(TARGET_SPARC)
+#elif defined(TARGET_PPC)
+#elif defined(TARGET_LM32)
+#elif defined(TARGET_M68K)
+    cpu_m68k_flush_flags(env, env->cc_op);
+    env->cc_op = CC_OP_FLAGS;
+    env->sr = (env->sr & 0xffe0)
+              | env->cc_dest | (env->cc_x << 4);
+#elif defined(TARGET_MICROBLAZE)
+#elif defined(TARGET_MIPS)
+#elif defined(TARGET_MOXIE)
+#elif defined(TARGET_OPENRISC)
+#elif defined(TARGET_SH4)
+#elif defined(TARGET_ALPHA)
+#elif defined(TARGET_CRIS)
+#elif defined(TARGET_S390X)
+#elif defined(TARGET_XTENSA)
+    /* XXXXX */
+#else
+#error unsupported target CPU
+#endif

    /* fail safe : never use current_cpu outside cpu_exec() */
    current_cpu = NULL;
--- a/cpus.c
+++ b/cpus.c
@@ -40,7 +40,6 @@
 #include "qemu/bitmap.h"
 #include "qemu/seqlock.h"
 #include "qapi-event.h"
-#include "hw/nmi.h"

 #ifndef _WIN32
 #include "qemu/compatfd.h"
@@ -65,8 +64,6 @@
 #endif /* CONFIG_LINUX */

 static CPUState *next_cpu;
-int64_t max_delay;
-int64_t max_advance;

 bool cpu_is_stopped(CPUState *cpu)
 {
@@ -105,13 +102,17 @@ static bool all_cpu_threads_idle(void)

 /* Protected by TimersState seqlock */

-static bool icount_sleep = true;
-static int64_t vm_clock_warp_start = -1;
+/* Compensate for varying guest execution speed.  */
+static int64_t qemu_icount_bias;
+static int64_t vm_clock_warp_start;
 /* Conversion factor from emulated instructions to virtual clock ticks.  */
 static int icount_time_shift;
 /* Arbitrarily pick 1MIPS as the minimum allowable speed.  */
 #define MAX_ICOUNT_SHIFT 10

+/* Only written by TCG thread */
+static int64_t qemu_icount;
+
 static QEMUTimer *icount_rt_timer;
 static QEMUTimer *icount_vm_timer;
 static QEMUTimer *icount_warp_timer;
@@ -128,36 +129,24 @@ typedef struct TimersState {
    int64_t cpu_clock_offset;
    int32_t cpu_ticks_enabled;
    int64_t dummy;
-
-    /* Compensate for varying guest execution speed.  */
-    int64_t qemu_icount_bias;
-    /* Only written by TCG thread */
-    int64_t qemu_icount;
 } TimersState;

 static TimersState timers_state;

-int64_t cpu_get_icount_raw(void)
+/* Return the virtual CPU time, based on the instruction counter.  */
+static int64_t cpu_get_icount_locked(void)
 {
    int64_t icount;
    CPUState *cpu = current_cpu;

-    icount = timers_state.qemu_icount;
+    icount = qemu_icount;
    if (cpu) {
        if (!cpu_can_do_io(cpu)) {
-            fprintf(stderr, "Bad icount read\n");
-            exit(1);
+            fprintf(stderr, "Bad clock read\n");
        }
        icount -= (cpu->icount_decr.u16.low + cpu->icount_extra);
    }
-    return icount;
-}
-
-/* Return the virtual CPU time, based on the instruction counter.  */
-static int64_t cpu_get_icount_locked(void)
-{
-    int64_t icount = cpu_get_icount_raw();
-    return timers_state.qemu_icount_bias + cpu_icount_to_ns(icount);
+    return qemu_icount_bias + (icount << icount_time_shift);
 }

 int64_t cpu_get_icount(void)
@@ -173,11 +162,6 @@ int64_t cpu_get_icount(void)
    return icount;
 }

-int64_t cpu_icount_to_ns(int64_t icount)
-{
-    return icount << icount_time_shift;
-}
-
 /* return the host CPU cycle counter and handle stop/restart */
 /* Caller must hold the BQL */
 int64_t cpu_get_ticks(void)
@@ -300,15 +284,14 @@ static void icount_adjust(void)
        icount_time_shift++;
    }
    last_delta = delta;
-    timers_state.qemu_icount_bias = cur_icount
-                              - (timers_state.qemu_icount << icount_time_shift);
+    qemu_icount_bias = cur_icount - (qemu_icount << icount_time_shift);
    seqlock_write_unlock(&timers_state.vm_clock_seqlock);
 }

 static void icount_adjust_rt(void *opaque)
 {
    timer_mod(icount_rt_timer,
-              qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
+                   qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + 1000);
    icount_adjust();
 }

@@ -336,7 +319,7 @@ static void icount_warp_rt(void *opaque)

    seqlock_write_lock(&timers_state.vm_clock_seqlock);
    if (runstate_is_running()) {
-        int64_t clock = cpu_get_clock_locked();
+        int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
        int64_t warp_delta;

        warp_delta = clock - vm_clock_warp_start;
@@ -345,11 +328,12 @@ static void icount_warp_rt(void *opaque)
             * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
             * far ahead of real time.
             */
+            int64_t cur_time = cpu_get_clock_locked();
            int64_t cur_icount = cpu_get_icount_locked();
-            int64_t delta = clock - cur_icount;
+            int64_t delta = cur_time - cur_icount;
            warp_delta = MIN(warp_delta, delta);
        }
-        timers_state.qemu_icount_bias += warp_delta;
+        qemu_icount_bias += warp_delta;
    }
    vm_clock_warp_start = -1;
    seqlock_write_unlock(&timers_state.vm_clock_seqlock);
@@ -362,19 +346,15 @@ static void icount_warp_rt(void *opaque)
 void qtest_clock_warp(int64_t dest)
 {
    int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
-    AioContext *aio_context;
    assert(qtest_enabled());
-    aio_context = qemu_get_aio_context();
    while (clock < dest) {
        int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
        int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
-
        seqlock_write_lock(&timers_state.vm_clock_seqlock);
-        timers_state.qemu_icount_bias += warp;
+        qemu_icount_bias += warp;
        seqlock_write_unlock(&timers_state.vm_clock_seqlock);

        qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
-        timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
        clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
    }
    qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
@@ -394,18 +374,15 @@ void qemu_clock_warp(QEMUClockType type)
        return;
    }

-    if (icount_sleep) {
-        /*
-         * If the CPUs have been sleeping, advance QEMU_CLOCK_VIRTUAL timer now.
-         * This ensures that the deadline for the timer is computed correctly
-         * below.
-         * This also makes sure that the insn counter is synchronized before
-         * the CPU starts running, in case the CPU is woken by an event other
-         * than the earliest QEMU_CLOCK_VIRTUAL timer.
-         */
-        icount_warp_rt(NULL);
-        timer_del(icount_warp_timer);
-    }
+    /*
+     * If the CPUs have been sleeping, advance QEMU_CLOCK_VIRTUAL timer now.
+     * This ensures that the deadline for the timer is computed correctly below.
+     * This also makes sure that the insn counter is synchronized before the
+     * CPU starts running, in case the CPU is woken by an event other than
+     * the earliest QEMU_CLOCK_VIRTUAL timer.
+     */
+    icount_warp_rt(NULL);
+    timer_del(icount_warp_timer);
    if (!all_cpu_threads_idle()) {
        return;
    }
@@ -416,14 +393,9 @@ void qemu_clock_warp(QEMUClockType type)
    }

    /* We want to use the earliest deadline from ALL vm_clocks */
-    clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
+    clock = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
    deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
    if (deadline < 0) {
-        static bool notified;
-        if (!icount_sleep && !notified) {
-            error_report("WARNING: icount sleep disabled and no active timers");
-            notified = true;
-        }
        return;
    }

@@ -434,59 +406,28 @@ void qemu_clock_warp(QEMUClockType type)
         * interrupt to wake it up, but the interrupt never comes because
         * the vCPU isn't running any insns and thus doesn't advance the
         * QEMU_CLOCK_VIRTUAL.
+         *
+         * An extreme solution for this problem would be to never let VCPUs
+         * sleep in icount mode if there is a pending QEMU_CLOCK_VIRTUAL
+         * timer; rather time could just advance to the next QEMU_CLOCK_VIRTUAL
+         * event.  Instead, we do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL
+         * after some e"real" time, (related to the time left until the next
+         * event) has passed. The QEMU_CLOCK_REALTIME timer will do this.
+         * This avoids that the warps are visible externally; for example,
+         * you will not be sending network packets continuously instead of
+         * every 100ms.
         */
-        if (!icount_sleep) {
-            /*
-             * We never let VCPUs sleep in no sleep icount mode.
-             * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
-             * to the next QEMU_CLOCK_VIRTUAL event and notify it.
-             * It is useful when we want a deterministic execution time,
-             * isolated from host latencies.
-             */
-            seqlock_write_lock(&timers_state.vm_clock_seqlock);
-            timers_state.qemu_icount_bias += deadline;
-            seqlock_write_unlock(&timers_state.vm_clock_seqlock);
-            qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
-        } else {
-            /*
-             * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
-             * "real" time, (related to the time left until the next event) has
-             * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
-             * This avoids that the warps are visible externally; for example,
-             * you will not be sending network packets continuously instead of
-             * every 100ms.
-             */
-            seqlock_write_lock(&timers_state.vm_clock_seqlock);
-            if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) {
-                vm_clock_warp_start = clock;
-            }
-            seqlock_write_unlock(&timers_state.vm_clock_seqlock);
-            timer_mod_anticipate(icount_warp_timer, clock + deadline);
+        seqlock_write_lock(&timers_state.vm_clock_seqlock);
+        if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) {
+            vm_clock_warp_start = clock;
        }
+        seqlock_write_unlock(&timers_state.vm_clock_seqlock);
+        timer_mod_anticipate(icount_warp_timer, clock + deadline);
    } else if (deadline == 0) {
        qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
    }
 }

-static bool icount_state_needed(void *opaque)
-{
-    return use_icount;
-}
-
-/*
- * This is a subsection for icount migration.
- */
-static const VMStateDescription icount_vmstate_timers = {
-    .name = "timer/icount",
-    .version_id = 1,
-    .minimum_version_id = 1,
-    .fields = (VMStateField[]) {
-        VMSTATE_INT64(qemu_icount_bias, TimersState),
-        VMSTATE_INT64(qemu_icount, TimersState),
-        VMSTATE_END_OF_LIST()
-    }
-};
-
 static const VMStateDescription vmstate_timers = {
    .name = "timer",
    .version_id = 2,
@@ -496,59 +437,23 @@ static const VMStateDescription vmstate_timers = {
        VMSTATE_INT64(dummy, TimersState),
        VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
        VMSTATE_END_OF_LIST()
-    },
-    .subsections = (VMStateSubsection[]) {
-        {
-            .vmsd = &icount_vmstate_timers,
-            .needed = icount_state_needed,
-        }, {
-            /* empty */
-        }
    }
 };

-void cpu_ticks_init(void)
+void configure_icount(const char *option)
 {
    seqlock_init(&timers_state.vm_clock_seqlock, NULL);
    vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
-}
-
-void configure_icount(QemuOpts *opts, Error **errp)
-{
-    const char *option;
-    char *rem_str = NULL;
-
-    option = qemu_opt_get(opts, "shift");
    if (!option) {
-        if (qemu_opt_get(opts, "align") != NULL) {
-            error_setg(errp, "Please specify shift option when using align");
-        }
        return;
    }

-    icount_sleep = qemu_opt_get_bool(opts, "sleep", true);
-    if (icount_sleep) {
-        icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
-                                         icount_warp_rt, NULL);
-    }
-
-    icount_align_option = qemu_opt_get_bool(opts, "align", false);
-
-    if (icount_align_option && !icount_sleep) {
-        error_setg(errp, "align=on and sleep=no are incompatible");
-    }
+    icount_warp_timer = timer_new_ns(QEMU_CLOCK_REALTIME,
+                                          icount_warp_rt, NULL);
    if (strcmp(option, "auto") != 0) {
-        errno = 0;
-        icount_time_shift = strtol(option, &rem_str, 0);
-        if (errno != 0 || *rem_str != '\0' || !strlen(option)) {
-            error_setg(errp, "icount: Invalid shift value");
-        }
+        icount_time_shift = strtol(option, NULL, 0);
        use_icount = 1;
        return;
-    } else if (icount_align_option) {
-        error_setg(errp, "shift=auto and align=on are incompatible");
-    } else if (!icount_sleep) {
-        error_setg(errp, "shift=auto and sleep=no are incompatible");
    }

    use_icount = 2;
@@ -562,10 +467,10 @@ void configure_icount(QemuOpts *opts, Error **errp)
       the virtual time trigger catches emulated time passing too fast.
       Realtime triggers occur even when idle, so use them less frequently
       than VM triggers.  */
-    icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
-                                   icount_adjust_rt, NULL);
+    icount_rt_timer = timer_new_ms(QEMU_CLOCK_REALTIME,
+                                        icount_adjust_rt, NULL);
    timer_mod(icount_rt_timer,
-                   qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
+                   qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + 1000);
    icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
                                        icount_adjust_vm, NULL);
    timer_mod(icount_vm_timer,
@@ -810,7 +715,7 @@ static void qemu_tcg_init_cpu_signals(void)

 static QemuMutex qemu_global_mutex;
 static QemuCond qemu_io_proceeded_cond;
-static unsigned iothread_requesting_mutex;
+static bool iothread_requesting_mutex;

 static QemuThread io_thread;

@@ -959,7 +864,6 @@ static void *qemu_kvm_cpu_thread_fn(void *arg)
    qemu_mutex_lock(&qemu_global_mutex);
    qemu_thread_get_self(cpu->thread);
    cpu->thread_id = qemu_get_thread_id();
-    cpu->can_do_io = 1;
    current_cpu = cpu;

    r = kvm_init_vcpu(cpu);
@@ -1000,7 +904,6 @@ static void *qemu_dummy_cpu_thread_fn(void *arg)
    qemu_mutex_lock_iothread();
    qemu_thread_get_self(cpu->thread);
    cpu->thread_id = qemu_get_thread_id();
-    cpu->can_do_io = 1;

    sigemptyset(&waitset);
    sigaddset(&waitset, SIG_IPI);
@@ -1043,12 +946,11 @@ static void *qemu_tcg_cpu_thread_fn(void *arg)
    CPU_FOREACH(cpu) {
        cpu->thread_id = qemu_get_thread_id();
        cpu->created = true;
-        cpu->can_do_io = 1;
    }
    qemu_cond_signal(&qemu_cpu_cond);

    /* wait for initial kick-off after machine start */
-    while (first_cpu->stopped) {
+    while (QTAILQ_FIRST(&cpus)->stopped) {
        qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);

        /* process any pending work */
@@ -1057,9 +959,6 @@ static void *qemu_tcg_cpu_thread_fn(void *arg)
        }
    }

-    /* process any pending work */
-    exit_request = 1;
-
    while (1) {
        tcg_exec_all();

@@ -1143,23 +1042,22 @@ bool qemu_cpu_is_self(CPUState *cpu)
    return qemu_thread_is_self(cpu->thread);
 }

-bool qemu_in_vcpu_thread(void)
+static bool qemu_in_vcpu_thread(void)
 {
    return current_cpu && qemu_cpu_is_self(current_cpu);
 }

 void qemu_mutex_lock_iothread(void)
 {
-    atomic_inc(&iothread_requesting_mutex);
-    if (!tcg_enabled() || !first_cpu || !first_cpu->thread) {
+    if (!tcg_enabled()) {
        qemu_mutex_lock(&qemu_global_mutex);
-        atomic_dec(&iothread_requesting_mutex);
    } else {
+        iothread_requesting_mutex = true;
        if (qemu_mutex_trylock(&qemu_global_mutex)) {
            qemu_cpu_kick_thread(first_cpu);
            qemu_mutex_lock(&qemu_global_mutex);
        }
-        atomic_dec(&iothread_requesting_mutex);
+        iothread_requesting_mutex = false;
        qemu_cond_broadcast(&qemu_io_proceeded_cond);
    }
 }
@@ -1361,8 +1259,7 @@ static int tcg_cpu_exec(CPUArchState *env)
        int64_t count;
        int64_t deadline;
        int decr;
-        timers_state.qemu_icount -= (cpu->icount_decr.u16.low
-                                    + cpu->icount_extra);
+        qemu_icount -= (cpu->icount_decr.u16.low + cpu->icount_extra);
        cpu->icount_decr.u16.low = 0;
        cpu->icount_extra = 0;
        deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
@@ -1377,7 +1274,7 @@ static int tcg_cpu_exec(CPUArchState *env)
        }

        count = qemu_icount_round(deadline);
-        timers_state.qemu_icount += count;
+        qemu_icount += count;
        decr = (count > 0xffff) ? 0xffff : count;
        count -= decr;
        cpu->icount_decr.u16.low = decr;
@@ -1385,13 +1282,12 @@ static int tcg_cpu_exec(CPUArchState *env)
    }
    ret = cpu_exec(env);
 #ifdef CONFIG_PROFILER
-    tcg_time += profile_getclock() - ti;
+    qemu_time += profile_getclock() - ti;
 #endif
    if (use_icount) {
        /* Fold pending instructions back into the
           instruction counter, and clear the interrupt flag.  */
-        timers_state.qemu_icount -= (cpu->icount_decr.u16.low
-                        + cpu->icount_extra);
+        qemu_icount -= (cpu->icount_decr.u16.low + cpu->icount_extra);
        cpu->icount_decr.u32 = 0;
        cpu->icount_extra = 0;
    }
@@ -1455,9 +1351,6 @@ CpuInfoList *qmp_query_cpus(Error **errp)
 #elif defined(TARGET_MIPS)
        MIPSCPU *mips_cpu = MIPS_CPU(cpu);
        CPUMIPSState *env = &mips_cpu->env;
-#elif defined(TARGET_TRICORE)
-        TriCoreCPU *tricore_cpu = TRICORE_CPU(cpu);
-        CPUTriCoreState *env = &tricore_cpu->env;
 #endif

        cpu_synchronize_state(cpu);
@@ -1467,7 +1360,6 @@ CpuInfoList *qmp_query_cpus(Error **errp)
        info->value->CPU = cpu->cpu_index;
        info->value->current = (cpu == first_cpu);
        info->value->halted = cpu->halted;
-        info->value->qom_path = object_get_canonical_path(OBJECT(cpu));
        info->value->thread_id = cpu->thread_id;
 #if defined(TARGET_I386)
        info->value->has_pc = true;
@@ -1483,9 +1375,6 @@ CpuInfoList *qmp_query_cpus(Error **errp)
 #elif defined(TARGET_MIPS)
        info->value->has_PC = true;
        info->value->PC = env->active_tc.PC;
-#elif defined(TARGET_TRICORE)
-        info->value->has_PC = true;
-        info->value->PC = env->PC;
 #endif

        /* XXX: waiting for the qapi to support GSList */
@@ -1507,7 +1396,6 @@ void qmp_memsave(int64_t addr, int64_t size, const char *filename,
    uint32_t l;
    CPUState *cpu;
    uint8_t buf[1024];
-    int64_t orig_addr = addr, orig_size = size;

    if (!has_cpu) {
        cpu_index = 0;
@@ -1531,8 +1419,7 @@ void qmp_memsave(int64_t addr, int64_t size, const char *filename,
        if (l > size)
            l = size;
        if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
-            error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64
-                             " specified", orig_addr, orig_size);
+            error_setg(errp, "Invalid addr 0x%016" PRIx64 "specified", addr);
            goto exit;
        }
        if (fwrite(buf, 1, l, f) != l) {
@@ -1591,24 +1478,21 @@ void qmp_inject_nmi(Error **errp)
            apic_deliver_nmi(cpu->apic_state);
        }
    }
+#elif defined(TARGET_S390X)
+    CPUState *cs;
+    S390CPU *cpu;
+
+    CPU_FOREACH(cs) {
+        cpu = S390_CPU(cs);
+        if (cpu->env.cpu_num == monitor_get_cpu_index()) {
+            if (s390_cpu_restart(S390_CPU(cs)) == -1) {
+                error_set(errp, QERR_UNSUPPORTED);
+                return;
+            }
+            break;
+        }
+    }
 #else
-    nmi_monitor_handle(monitor_get_cpu_index(), errp);
+    error_set(errp, QERR_UNSUPPORTED);
 #endif
 }
-
-void dump_drift_info(FILE *f, fprintf_function cpu_fprintf)
-{
-    if (!use_icount) {
-        return;
-    }
-
-    cpu_fprintf(f, "Host - Guest clock  %"PRIi64" ms\n",
-                (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
-    if (icount_align_option) {
-        cpu_fprintf(f, "Max guest delay     %"PRIi64" ms\n", -max_delay/SCALE_MS);
-        cpu_fprintf(f, "Max guest advance   %"PRIi64" ms\n", max_advance/SCALE_MS);
-    } else {
-        cpu_fprintf(f, "Max guest delay     NA\n");
-        cpu_fprintf(f, "Max guest advance   NA\n");
-    }
-}
--- a/cputlb.c
+++ b/cputlb.c
@@ -60,10 +60,8 @@ void tlb_flush(CPUState *cpu, int flush_global)
    cpu->current_tb = NULL;

    memset(env->tlb_table, -1, sizeof(env->tlb_table));
-    memset(env->tlb_v_table, -1, sizeof(env->tlb_v_table));
    memset(cpu->tb_jmp_cache, 0, sizeof(cpu->tb_jmp_cache));

-    env->vtlb_index = 0;
    env->tlb_flush_addr = -1;
    env->tlb_flush_mask = 0;
    tlb_flush_count++;
@@ -110,14 +108,6 @@ void tlb_flush_page(CPUState *cpu, target_ulong addr)
        tlb_flush_entry(&env->tlb_table[mmu_idx][i], addr);
    }

-    /* check whether there are entries that need to be flushed in the vtlb */
-    for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
-        int k;
-        for (k = 0; k < CPU_VTLB_SIZE; k++) {
-            tlb_flush_entry(&env->tlb_v_table[mmu_idx][k], addr);
-        }
-    }
-
    tb_flush_jmp_cache(cpu, addr);
 }

@@ -125,13 +115,14 @@ void tlb_flush_page(CPUState *cpu, target_ulong addr)
   can be detected */
 void tlb_protect_code(ram_addr_t ram_addr)
 {
-    cpu_physical_memory_test_and_clear_dirty(ram_addr, TARGET_PAGE_SIZE,
-                                             DIRTY_MEMORY_CODE);
+    cpu_physical_memory_reset_dirty(ram_addr, TARGET_PAGE_SIZE,
+                                    DIRTY_MEMORY_CODE);
 }

 /* update the TLB so that writes in physical page 'phys_addr' are no longer
   tested for self modifying code */
-void tlb_unprotect_code(ram_addr_t ram_addr)
+void tlb_unprotect_code_phys(CPUState *cpu, ram_addr_t ram_addr,
+                             target_ulong vaddr)
 {
    cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_CODE);
 }
@@ -181,11 +172,6 @@ void cpu_tlb_reset_dirty_all(ram_addr_t start1, ram_addr_t length)
                tlb_reset_dirty_range(&env->tlb_table[mmu_idx][i],
                                      start1, length);
            }
-
-            for (i = 0; i < CPU_VTLB_SIZE; i++) {
-                tlb_reset_dirty_range(&env->tlb_v_table[mmu_idx][i],
-                                      start1, length);
-            }
        }
    }
 }
@@ -209,13 +195,6 @@ void tlb_set_dirty(CPUArchState *env, target_ulong vaddr)
    for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
        tlb_set_dirty1(&env->tlb_table[mmu_idx][i], vaddr);
    }
-
-    for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
-        int k;
-        for (k = 0; k < CPU_VTLB_SIZE; k++) {
-            tlb_set_dirty1(&env->tlb_v_table[mmu_idx][k], vaddr);
-        }
-    }
 }

 /* Our TLB does not support large pages, so remember the area covered by
@@ -242,15 +221,11 @@ static void tlb_add_large_page(CPUArchState *env, target_ulong vaddr,
 }

 /* Add a new TLB entry. At most one entry for a given virtual address
- * is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the
- * supplied size is only used by tlb_flush_page.
- *
- * Called from TCG-generated code, which is under an RCU read-side
- * critical section.
- */
-void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
-                             hwaddr paddr, MemTxAttrs attrs, int prot,
-                             int mmu_idx, target_ulong size)
+   is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the
+   supplied size is only used by tlb_flush_page.  */
+void tlb_set_page(CPUState *cpu, target_ulong vaddr,
+                  hwaddr paddr, int prot,
+                  int mmu_idx, target_ulong size)
 {
    CPUArchState *env = cpu->env_ptr;
    MemoryRegionSection *section;
@@ -260,7 +235,6 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
    uintptr_t addend;
    CPUTLBEntry *te;
    hwaddr iotlb, xlat, sz;
-    unsigned vidx = env->vtlb_index++ % CPU_VTLB_SIZE;

    assert(size >= TARGET_PAGE_SIZE);
    if (size != TARGET_PAGE_SIZE) {
@@ -268,12 +242,12 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
    }

    sz = size;
-    section = address_space_translate_for_iotlb(cpu, paddr, &xlat, &sz);
+    section = address_space_translate_for_iotlb(cpu->as, paddr,
+                                                &xlat, &sz);
    assert(sz >= TARGET_PAGE_SIZE);

 #if defined(DEBUG_TLB)
-    qemu_log_mask(CPU_LOG_MMU,
-           "tlb_set_page: vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx
+    printf("tlb_set_page: vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx
           " prot=%x idx=%d\n",
           vaddr, paddr, prot, mmu_idx);
 #endif
@@ -293,15 +267,8 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
                                            prot, &address);

    index = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
+    env->iotlb[mmu_idx][index] = iotlb - vaddr;
    te = &env->tlb_table[mmu_idx][index];
-
-    /* do not discard the translation in te, evict it into a victim tlb */
-    env->tlb_v_table[mmu_idx][vidx] = *te;
-    env->iotlb_v[mmu_idx][vidx] = env->iotlb[mmu_idx][index];
-
-    /* refill the tlb */
-    env->iotlb[mmu_idx][index].addr = iotlb - vaddr;
-    env->iotlb[mmu_idx][index].attrs = attrs;
    te->addend = addend - vaddr;
    if (prot & PAGE_READ) {
        te->addr_read = address;
@@ -331,17 +298,6 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
    }
 }

-/* Add a new TLB entry, but without specifying the memory
- * transaction attributes to be used.
- */
-void tlb_set_page(CPUState *cpu, target_ulong vaddr,
-                  hwaddr paddr, int prot,
-                  int mmu_idx, target_ulong size)
-{
-    tlb_set_page_with_attrs(cpu, vaddr, paddr, MEMTXATTRS_UNSPECIFIED,
-                            prot, mmu_idx, size);
-}
-
 /* NOTE: this function can trigger an exception */
 /* NOTE2: the returned address is not exactly the physical address: it
 * is actually a ram_addr_t (in system mode; the user mode emulation
@@ -360,8 +316,8 @@ tb_page_addr_t get_page_addr_code(CPUArchState *env1, target_ulong addr)
                 (addr & TARGET_PAGE_MASK))) {
        cpu_ldub_code(env1, addr);
    }
-    pd = env1->iotlb[mmu_idx][page_index].addr & ~TARGET_PAGE_MASK;
-    mr = iotlb_to_region(cpu, pd);
+    pd = env1->iotlb[mmu_idx][page_index] & ~TARGET_PAGE_MASK;
+    mr = iotlb_to_region(cpu->as, pd);
    if (memory_region_is_unassigned(mr)) {
        CPUClass *cc = CPU_GET_CLASS(cpu);

--- a/default-configs/aarch64-softmmu.mak
+++ b/default-configs/aarch64-softmmu.mak
@@ -3,4 +3,4 @@
 # We support all the 32 bit boards so need all their config
 include arm-softmmu.mak

-CONFIG_XLNX_ZYNQMP=y
+# Currently no 64-bit specific config requirements
--- a/default-configs/alpha-softmmu.mak
+++ b/default-configs/alpha-softmmu.mak
@@ -5,6 +5,8 @@ include usb.mak
 CONFIG_SERIAL=y
 CONFIG_I8254=y
 CONFIG_PCKBD=y
+CONFIG_VGA=y
+CONFIG_VGA_PCI=y
 CONFIG_VGA_CIRRUS=y
 CONFIG_IDE_CORE=y
 CONFIG_IDE_QDEV=y
--- a/default-configs/arm-softmmu.mak
+++ b/default-configs/arm-softmmu.mak
@@ -32,10 +32,7 @@ CONFIG_DS1338=y
 CONFIG_PFLASH_CFI01=y
 CONFIG_PFLASH_CFI02=y
 CONFIG_MICRODRIVE=y
-CONFIG_USB=y
 CONFIG_USB_MUSB=y
-CONFIG_USB_EHCI_SYSBUS=y
-CONFIG_PLATFORM_BUS=y

 CONFIG_ARM11MPCORE=y
 CONFIG_A9MPCORE=y
@@ -81,24 +78,13 @@ CONFIG_NSERIES=y
 CONFIG_REALVIEW=y
 CONFIG_ZAURUS=y
 CONFIG_ZYNQ=y
-CONFIG_STM32F2XX_TIMER=y
-CONFIG_STM32F2XX_USART=y
-CONFIG_STM32F2XX_SYSCFG=y
-CONFIG_STM32F205_SOC=y

 CONFIG_VERSATILE_PCI=y
 CONFIG_VERSATILE_I2C=y

-CONFIG_PCI_GENERIC=y
-
 CONFIG_SDHCI=y
 CONFIG_INTEGRATOR_DEBUG=y

 CONFIG_ALLWINNER_A10_PIT=y
 CONFIG_ALLWINNER_A10_PIC=y
 CONFIG_ALLWINNER_A10=y
-
-CONFIG_XIO3130=y
-CONFIG_IOH3420=y
-CONFIG_I82801B11=y
-CONFIG_ACPI=y
--- a/default-configs/i386-softmmu.mak
+++ b/default-configs/i386-softmmu.mak
@@ -3,7 +3,9 @@
 include pci.mak
 include sound.mak
 include usb.mak
+CONFIG_VGA=y
 CONFIG_QXL=$(CONFIG_SPICE)
+CONFIG_VGA_PCI=y
 CONFIG_VGA_ISA=y
 CONFIG_VGA_CIRRUS=y
 CONFIG_VMWARE_VGA=y
@@ -15,9 +17,6 @@ CONFIG_PCSPK=y
 CONFIG_PCKBD=y
 CONFIG_FDC=y
 CONFIG_ACPI=y
-CONFIG_ACPI_X86=y
-CONFIG_ACPI_MEMORY_HOTPLUG=y
-CONFIG_ACPI_CPU_HOTPLUG=y
 CONFIG_APM=y
 CONFIG_I8257=y
 CONFIG_IDE_ISA=y
@@ -29,6 +28,7 @@ CONFIG_APPLESMC=y
 CONFIG_I8259=y
 CONFIG_PFLASH_CFI01=y
 CONFIG_TPM_TIS=$(CONFIG_TPM)
+CONFIG_PCI_HOTPLUG_OLD=y
 CONFIG_MC146818RTC=y
 CONFIG_PAM=y
 CONFIG_PCI_PIIX=y
@@ -45,6 +45,3 @@ CONFIG_IOAPIC=y
 CONFIG_ICC_BUS=y
 CONFIG_PVPANIC=y
 CONFIG_MEM_HOTPLUG=y
-CONFIG_XIO3130=y
-CONFIG_IOH3420=y
-CONFIG_I82801B11=y
--- a/default-configs/lm32-softmmu.mak
+++ b/default-configs/lm32-softmmu.mak
@@ -2,7 +2,7 @@

 CONFIG_LM32=y
 CONFIG_MILKYMIST=y
-CONFIG_MILKYMIST_TMU2=$(CONFIG_OPENGL)
+CONFIG_MILKYMIST_TMU2=$(CONFIG_GLX)
 CONFIG_FRAMEBUFFER=y
 CONFIG_PTIMER=y
 CONFIG_PFLASH_CFI01=y
--- a/Show More
+++ b/Show More
@@ -1 +1 @@
 .3.50
 .1.3