Update version for 2.8.1 release

Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
spapr: fix off-by-one error in spapr_ovec_populate_dt()
2017-03-30 23:02:53 -05:00 · 2017-03-30 15:45:06 -05:00 · 2017-03-30 14:53:59 -05:00 · 2017-03-30 12:52:04 -05:00 · 2017-03-30 12:47:49 -05:00 · 2017-03-30 12:35:02 -05:00
2278 changed files with 52366 additions and 138833 deletions
--- a/.gdbinit
+++ b/.gdbinit
@@ -1,8 +0,0 @@
-# GDB may have ./.gdbinit loading disabled by default.  In that case you can
-# follow the instructions it prints.  They boil down to adding the following to
-# your home directory's ~/.gdbinit file:
-#
-#   add-auto-load-safe-path /path/to/qemu/.gdbinit
-
-# Load QEMU-specific sub-commands and settings
-source scripts/qemu-gdb.py
--- a/.gitignore
+++ b/.gitignore
@@ -6,12 +6,18 @@
 /config.status
 /config-temp
 /trace-events-all
+/trace/generated-tracers.h
+/trace/generated-tracers.c
+/trace/generated-tracers-dtrace.h
+/trace/generated-tracers.dtrace
 /trace/generated-events.h
 /trace/generated-events.c
 /trace/generated-helpers-wrappers.h
 /trace/generated-helpers.h
 /trace/generated-helpers.c
 /trace/generated-tcg-tracers.h
+/trace/generated-ust-provider.h
+/trace/generated-ust.c
 /ui/shader/texture-blit-frag.h
 /ui/shader/texture-blit-vert.h
 *-timestamp
@@ -34,7 +40,6 @@
 /qmp-marshal.c
 /qemu-doc.html
 /qemu-doc.info
-/qemu-doc.txt
 /qemu-img
 /qemu-nbd
 /qemu-options.def
@@ -50,12 +55,12 @@
 /qemu-version.h.tmp
 /module_block.h
 /vscclient
-/vhost-user-scsi
 /fsdev/virtfs-proxy-helper
 *.[1-9]
 *.a
 *.aux
 *.cp
+*.dvi
 *.exe
 *.msi
 *.dll
@@ -77,6 +82,10 @@
 *.d
 !/scripts/qemu-guest-agent/fsfreeze-hook.d
 *.o
+*.lo
+*.la
+*.pc
+.libs
 .sdk
 *.gcda
 *.gcno
@@ -100,35 +109,9 @@
 /pc-bios/optionrom/kvmvapic.img
 /pc-bios/s390-ccw/s390-ccw.elf
 /pc-bios/s390-ccw/s390-ccw.img
-/docs/interop/qemu-ga-qapi.texi
-/docs/interop/qemu-ga-ref.html
-/docs/interop/qemu-ga-ref.info*
-/docs/interop/qemu-ga-ref.txt
-/docs/interop/qemu-qmp-qapi.texi
-/docs/interop/qemu-qmp-ref.html
-/docs/interop/qemu-qmp-ref.info*
-/docs/interop/qemu-qmp-ref.txt
-/docs/version.texi
-*.tps
 .stgit-*
 cscope.*
 tags
 TAGS
 docker-src.*
 *~
-trace.h
-trace.c
-trace-ust.h
-trace-ust.h
-trace-dtrace.h
-trace-dtrace.dtrace
-trace-root.h
-trace-root.c
-trace-ust-root.h
-trace-ust-root.h
-trace-ust-all.h
-trace-ust-all.c
-trace-dtrace-root.h
-trace-dtrace-root.dtrace
-trace-ust-all.h
-trace-ust-all.c
--- a/.gitmodules
+++ b/.gitmodules
@@ -34,6 +34,3 @@
 [submodule "roms/skiboot"]
 	path = roms/skiboot
 	url = git://git.qemu.org/skiboot.git
-[submodule "roms/QemuMacDrivers"]
-	path = roms/QemuMacDrivers
-	url = git://git.qemu.org/QemuMacDrivers.git
--- a/.shippable.yml
+++ b/.shippable.yml
@@ -1,21 +0,0 @@
-language: c
-env:
-  matrix:
-    - IMAGE=debian-armhf-cross
-      TARGET_LIST=arm-softmmu,arm-linux-user
-    - IMAGE=debian-arm64-cross
-      TARGET_LIST=aarch64-softmmu,aarch64-linux-user
-    - IMAGE=debian-s390x-cross
-      TARGET_LIST=s390x-softmmu,s390x-linux-user
-build:
-  pre_ci:
-    - make docker-image-${IMAGE}
-  pre_ci_boot:
-    image_name: qemu
-    image_tag: ${IMAGE}
-    pull: false
-    options: "-e HOME=/root"
-  ci:
-    - unset CC
-    - ./configure ${QEMU_CONFIGURE_OPTS} --target-list=${TARGET_LIST}
-    - make -j2
--- a/.travis.yml
+++ b/.travis.yml
@@ -4,6 +4,7 @@ python:
  - "2.4"
 compiler:
  - gcc
+  - clang
 cache: ccache
 addons:
  apt:
@@ -67,9 +68,6 @@ script:
  - make -j3 && ${TEST_CMD}
 matrix:
  include:
-    # Test with CLang for compile portability
-    - env: CONFIG=""
-      compiler: clang
    # gprof/gcov are GCC features
    - env: CONFIG="--enable-gprof --enable-gcov --disable-pie"
      compiler: gcc
@@ -86,11 +84,14 @@ matrix:
    - env: CONFIG="--enable-trace-backends=ust"
           TEST_CMD=""
      compiler: gcc
+    - env: CONFIG="--with-coroutine=gthread"
+           TEST_CMD=""
+      compiler: gcc
    - env: CONFIG=""
      os: osx
      compiler: clang
-    # Plain Trusty System Build
-    - env: CONFIG="--disable-linux-user"
+    # Plain Trusty Build
+    - env: CONFIG=""
      sudo: required
      addons:
      dist: trusty
@@ -100,55 +101,6 @@ matrix:
        - sudo apt-get build-dep -qq qemu
        - wget -O - http://people.linaro.org/~alex.bennee/qemu-submodule-git-seed.tar.xz | tar -xvJ
        - git submodule update --init --recursive
-    # Plain Trusty Linux User Build
-    - env: CONFIG="--disable-system"
-      sudo: required
-      addons:
-      dist: trusty
-      compiler: gcc
-      before_install:
-        - sudo apt-get update -qq
-        - sudo apt-get build-dep -qq qemu
-        - wget -O - http://people.linaro.org/~alex.bennee/qemu-submodule-git-seed.tar.xz | tar -xvJ
-        - git submodule update --init --recursive
-    # Trusty System build with latest stable clang
-    - sudo: required
-      addons:
-      dist: trusty
-      language: generic
-      compiler: none
-      env:
-        - COMPILER_NAME=clang CXX=clang++-3.9 CC=clang-3.9
-        - CONFIG="--disable-linux-user --cc=clang-3.9 --cxx=clang++-3.9"
-      before_install:
-        - wget -nv -O - http://llvm.org/apt/llvm-snapshot.gpg.key | sudo apt-key add -
-        - sudo apt-add-repository -y 'deb http://llvm.org/apt/trusty llvm-toolchain-trusty-3.9 main'
-        - sudo apt-get update -qq
-        - sudo apt-get install -qq -y clang-3.9
-        - sudo apt-get build-dep -qq qemu
-        - wget -O - http://people.linaro.org/~alex.bennee/qemu-submodule-git-seed.tar.xz | tar -xvJ
-        - git submodule update --init --recursive
-      before_script:
-        - ./configure ${CONFIG} || cat config.log
-    # Trusty Linux User build with latest stable clang
-    - sudo: required
-      addons:
-      dist: trusty
-      language: generic
-      compiler: none
-      env:
-        - COMPILER_NAME=clang CXX=clang++-3.9 CC=clang-3.9
-        - CONFIG="--disable-system --cc=clang-3.9 --cxx=clang++-3.9"
-      before_install:
-        - wget -nv -O - http://llvm.org/apt/llvm-snapshot.gpg.key | sudo apt-key add -
-        - sudo apt-add-repository -y 'deb http://llvm.org/apt/trusty llvm-toolchain-trusty-3.9 main'
-        - sudo apt-get update -qq
-        - sudo apt-get install -qq -y clang-3.9
-        - sudo apt-get build-dep -qq qemu
-        - wget -O - http://people.linaro.org/~alex.bennee/qemu-submodule-git-seed.tar.xz | tar -xvJ
-        - git submodule update --init --recursive
-      before_script:
-        - ./configure ${CONFIG} || cat config.log
    # Using newer GCC with sanitizers
    - addons:
        apt:
@@ -188,7 +140,7 @@ matrix:
      compiler: none
      env:
        - COMPILER_NAME=gcc CXX=g++-5 CC=gcc-5
-        - CONFIG="--cc=gcc-5 --cxx=g++-5 --disable-pie --disable-linux-user"
+        - CONFIG="--cc=gcc-5 --cxx=g++-5 --disable-pie --disable-linux-user --with-coroutine=gthread"
        - TEST_CMD=""
      before_script:
        - ./configure ${CONFIG} --extra-cflags="-g3 -O0 -fsanitize=thread -fuse-ld=gold" || cat config.log
--- a/7
+++ b/7
@@ -116,10 +116,3 @@ if (a == 1) {
 Rationale: Yoda conditions (as in 'if (1 == a)') are awkward to read.
 Besides, good compilers already warn users when '==' is mis-typed as '=',
 even when the constant is on the right.
-
-7. Comment style
-
-We use traditional C-style /* */ comments and avoid // comments.
-
-Rationale: The // form is valid in C99, so this is purely a matter of
-consistency of style. The checkpatch script will warn you about this.
--- a/18
+++ b/18
@@ -1,28 +1,10 @@
 1. Preprocessor

-1.1. Variadic macros
-
 For variadic macros, stick with this C99-like syntax:

 #define DPRINTF(fmt, ...)                                       \
    do { printf("IRQ: " fmt, ## __VA_ARGS__); } while (0)

-1.2. Include directives
-
-Order include directives as follows:
-
-#include "qemu/osdep.h"  /* Always first... */
-#include <...>           /* then system headers... */
-#include "..."           /* and finally QEMU headers. */
-
-The "qemu/osdep.h" header contains preprocessor macros that affect the behavior
-of core system headers like <stdint.h>.  It must be the first include so that
-core system headers included by external libraries get the preprocessor macros
-that QEMU depends on.
-
-Do not include "qemu/osdep.h" from header files since the .c file will have
-already included it.
-
 2. C types

 It should be common sense to use the right type, but we have collected
--- a/225
+++ b/225
@@ -12,8 +12,6 @@ consult qemu-devel and not any specific individual privately.
 Descriptions of section entries:

 	M: Mail patches to: FullName <address@domain>
-	R: Designated reviewer: FullName <address@domain>
-	   These reviewers should be CCed on patches.
 	L: Mailing list that is relevant to this area
 	W: Web-page with status/info
 	Q: Patchwork web based patch tracking system site
@@ -108,7 +106,7 @@ F: include/fpu/
 Alpha
 M: Richard Henderson <rth@twiddle.net>
 S: Maintained
-F: target/alpha/
+F: target-alpha/
 F: hw/alpha/
 F: tests/tcg/alpha/
 F: disas/alpha.c
@@ -117,7 +115,7 @@ ARM
 M: Peter Maydell <peter.maydell@linaro.org>
 L: qemu-arm@nongnu.org
 S: Maintained
-F: target/arm/
+F: target-arm/
 F: hw/arm/
 F: hw/cpu/a*mpcore.c
 F: include/hw/cpu/a*mpcore.h
@@ -128,22 +126,16 @@ F: disas/libvixl/
 CRIS
 M: Edgar E. Iglesias <edgar.iglesias@gmail.com>
 S: Maintained
-F: target/cris/
+F: target-cris/
 F: hw/cris/
 F: include/hw/cris/
 F: tests/tcg/cris/
 F: disas/cris.c

-HPPA (PA-RISC)
-M: Richard Henderson <rth@twiddle.net>
-S: Maintained
-F: target/hppa/
-F: disas/hppa.c
-
 LM32
 M: Michael Walle <michael@walle.cc>
 S: Maintained
-F: target/lm32/
+F: target-lm32/
 F: disas/lm32.c
 F: hw/lm32/
 F: hw/*/lm32_*
@@ -155,13 +147,13 @@ F: tests/tcg/lm32/
 M68K
 M: Laurent Vivier <laurent@vivier.eu>
 S: Maintained
-F: target/m68k/
+F: target-m68k/
 F: disas/m68k.c

 MicroBlaze
 M: Edgar E. Iglesias <edgar.iglesias@gmail.com>
 S: Maintained
-F: target/microblaze/
+F: target-microblaze/
 F: hw/microblaze/
 F: disas/microblaze.c

@@ -169,7 +161,7 @@ MIPS
 M: Aurelien Jarno <aurelien@aurel32.net>
 M: Yongbok Kim <yongbok.kim@imgtec.com>
 S: Maintained
-F: target/mips/
+F: target-mips/
 F: hw/mips/
 F: hw/misc/mips_*
 F: hw/intc/mips_gic.c
@@ -184,23 +176,15 @@ F: disas/mips.c
 Moxie
 M: Anthony Green <green@moxielogic.com>
 S: Maintained
-F: target/moxie/
+F: target-moxie/
 F: disas/moxie.c
 F: hw/moxie/
 F: default-configs/moxie-softmmu.mak

-NiosII
-M: Chris Wulff <crwulff@gmail.com>
-M: Marek Vasut <marex@denx.de>
-S: Maintained
-F: target/nios2/
-F: hw/nios2/
-F: disas/nios2.c
-
 OpenRISC
-M: Stafford Horne <shorne@gmail.com>
-S: Odd Fixes
-F: target/openrisc/
+M: Jia Liu <proljc@gmail.com>
+S: Maintained
+F: target-openrisc/
 F: hw/openrisc/
 F: tests/tcg/openrisc/

@@ -209,7 +193,7 @@ M: David Gibson <david@gibson.dropbear.id.au>
 M: Alexander Graf <agraf@suse.de>
 L: qemu-ppc@nongnu.org
 S: Maintained
-F: target/ppc/
+F: target-ppc/
 F: hw/ppc/
 F: include/hw/ppc/
 F: disas/ppc.c
@@ -218,14 +202,14 @@ S390
 M: Richard Henderson <rth@twiddle.net>
 M: Alexander Graf <agraf@suse.de>
 S: Maintained
-F: target/s390x/
+F: target-s390x/
 F: hw/s390x/
 F: disas/s390.c

 SH4
 M: Aurelien Jarno <aurelien@aurel32.net>
 S: Odd Fixes
-F: target/sh4/
+F: target-sh4/
 F: hw/sh4/
 F: disas/sh4.c
 F: include/hw/sh4/
@@ -234,7 +218,7 @@ SPARC
 M: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
 M: Artyom Tarasenko <atar4qemu@gmail.com>
 S: Maintained
-F: target/sparc/
+F: target-sparc/
 F: hw/sparc/
 F: hw/sparc64/
 F: disas/sparc.c
@@ -242,7 +226,7 @@ F: disas/sparc.c
 UniCore32
 M: Guan Xuetao <gxt@mprc.pku.edu.cn>
 S: Maintained
-F: target/unicore32/
+F: target-unicore32/
 F: hw/unicore32/
 F: include/hw/unicore32/

@@ -251,7 +235,7 @@ M: Paolo Bonzini <pbonzini@redhat.com>
 M: Richard Henderson <rth@twiddle.net>
 M: Eduardo Habkost <ehabkost@redhat.com>
 S: Maintained
-F: target/i386/
+F: target-i386/
 F: hw/i386/
 F: disas/i386.c

@@ -259,14 +243,14 @@ Xtensa
 M: Max Filippov <jcmvbkbc@gmail.com>
 W: http://wiki.osll.spb.ru/doku.php?id=etc:users:jcmvbkbc:qemu-target-xtensa
 S: Maintained
-F: target/xtensa/
+F: target-xtensa/
 F: hw/xtensa/
 F: tests/tcg/xtensa/

 TriCore
 M: Bastian Koppelmann <kbastian@mail.uni-paderborn.de>
 S: Maintained
-F: target/tricore/
+F: target-tricore/
 F: hw/tricore/
 F: include/hw/tricore/

@@ -285,26 +269,26 @@ ARM
 M: Peter Maydell <peter.maydell@linaro.org>
 L: qemu-arm@nongnu.org
 S: Maintained
-F: target/arm/kvm.c
+F: target-arm/kvm.c

 MIPS
 M: James Hogan <james.hogan@imgtec.com>
 S: Maintained
-F: target/mips/kvm.c
+F: target-mips/kvm.c

 PPC
 M: Alexander Graf <agraf@suse.de>
 S: Maintained
-F: target/ppc/kvm.c
+F: target-ppc/kvm.c

 S390
 M: Christian Borntraeger <borntraeger@de.ibm.com>
 M: Cornelia Huck <cornelia.huck@de.ibm.com>
 M: Alexander Graf <agraf@suse.de>
 S: Maintained
-F: target/s390x/kvm.c
-F: target/s390x/ioinst.[ch]
-F: target/s390x/machine.c
+F: target-s390x/kvm.c
+F: target-s390x/ioinst.[ch]
+F: target-s390x/machine.c
 F: hw/intc/s390_flic.c
 F: hw/intc/s390_flic_kvm.c
 F: include/hw/s390x/s390_flic.h
@@ -317,7 +301,7 @@ M: Paolo Bonzini <pbonzini@redhat.com>
 M: Marcelo Tosatti <mtosatti@redhat.com>
 L: kvm@vger.kernel.org
 S: Supported
-F: target/i386/kvm.c
+F: target-i386/kvm.c

 Guest CPU Cores (Xen):
 ----------------------
@@ -325,11 +309,10 @@ Guest CPU Cores (Xen):
 X86
 M: Stefano Stabellini <sstabellini@kernel.org>
 M: Anthony Perard <anthony.perard@citrix.com>
-L: xen-devel@lists.xenproject.org
+L: xen-devel@lists.xensource.com
 S: Supported
 F: xen-*
 F: */xen*
-F: hw/9pfs/xen-9p-backend.c
 F: hw/char/xen_console.c
 F: hw/display/xenfb.c
 F: hw/net/xen_nic.c
@@ -354,12 +337,6 @@ L: qemu-devel@nongnu.org
 S: Maintained
 F: *posix*

-NETBSD
-L: qemu-devel@nongnu.org
-M: Kamil Rytarowski <kamil@netbsd.org>
-S: Maintained
-K: (?i)NetBSD
-
 W32, W64
 L: qemu-devel@nongnu.org
 M: Stefan Weil <sw@weilnetz.de>
@@ -531,6 +508,7 @@ M: Shannon Zhao <shannon.zhao@linaro.org>
 L: qemu-arm@nongnu.org
 S: Maintained
 F: hw/arm/virt-acpi-build.c
+F: include/hw/arm/virt-acpi-build.h

 STM32F205
 M: Alistair Francis <alistair@alistair23.me>
@@ -570,19 +548,20 @@ F: hw/lm32/milkymist.c
 M68K Machines
 -------------
 an5206
-M: Thomas Huth <huth@tuxfamily.org>
-S: Odd Fixes
+S: Orphan
 F: hw/m68k/an5206.c
 F: hw/m68k/mcf5206.c

+dummy_m68k
+S: Orphan
+F: hw/m68k/dummy_m68k.c
+
 mcf5208
-M: Thomas Huth <huth@tuxfamily.org>
-S: Odd Fixes
+S: Orphan
 F: hw/m68k/mcf5208.c
 F: hw/m68k/mcf_intc.c
 F: hw/char/mcf_uart.c
 F: hw/net/mcf_fec.c
-F: include/hw/m68k/mcf*.h

 MicroBlaze Machines
 -------------------
@@ -609,28 +588,15 @@ S: Maintained
 F: hw/mips/mips_malta.c

 Mipssim
-M: Yongbok Kim <yongbok.kim@imgtec.com>
-S: Odd Fixes
+L: qemu-devel@nongnu.org
+S: Orphan
 F: hw/mips/mips_mipssim.c
-F: hw/net/mipsnet.c

 R4000
 M: Aurelien Jarno <aurelien@aurel32.net>
 S: Maintained
 F: hw/mips/mips_r4k.c

-Fulong 2E
-M: Yongbok Kim <yongbok.kim@imgtec.com>
-S: Odd Fixes
-F: hw/mips/mips_fulong2e.c
-
-Boston
-M: Paul Burton <paul.burton@imgtec.com>
-S: Maintained
-F: hw/core/loader-fit.c
-F: hw/mips/boston.c
-F: hw/pci-host/xilinx-pcie.c
-
 OpenRISC Machines
 -----------------
 or1k-sim
@@ -654,6 +620,7 @@ F: hw/ppc/ppc440_bamboo.c

 e500
 M: Alexander Graf <agraf@suse.de>
+M: Scott Wood <scottwood@freescale.com>
 L: qemu-ppc@nongnu.org
 S: Supported
 F: hw/ppc/e500.[hc]
@@ -664,6 +631,7 @@ F: pc-bios/u-boot.e500

 mpc8544ds
 M: Alexander Graf <agraf@suse.de>
+M: Scott Wood <scottwood@freescale.com>
 L: qemu-ppc@nongnu.org
 S: Supported
 F: hw/ppc/mpc8544ds.c
@@ -690,13 +658,10 @@ F: hw/misc/macio/
 F: hw/intc/heathrow_pic.c

 PReP
-M: Hervé Poussineau <hpoussin@reactos.org>
 L: qemu-devel@nongnu.org
 L: qemu-ppc@nongnu.org
-S: Maintained
+S: Odd Fixes
 F: hw/ppc/prep.c
-F: hw/ppc/prep_systemio.c
-F: hw/ppc/rs6000_mc.c
 F: hw/pci-host/prep.[hc]
 F: hw/isa/pc87312.[hc]
 F: pc-bios/ppc_rom.bin
@@ -761,13 +726,6 @@ S: Maintained
 F: hw/sparc64/sun4u.c
 F: pc-bios/openbios-sparc64

-Sun4v
-M: Artyom Tarasenko <atar4qemu@gmail.com>
-S: Maintained
-F: hw/sparc64/sun4v.c
-F: hw/timer/sun4v-rtc.c
-F: include/hw/timer/sun4v-rtc.h
-
 Leon3
 M: Fabien Chouteau <chouteau@adacore.com>
 S: Maintained
@@ -849,7 +807,6 @@ M: Eduardo Habkost <ehabkost@redhat.com>
 M: Marcel Apfelbaum <marcel@redhat.com>
 S: Supported
 F: hw/core/machine.c
-F: hw/core/null-machine.c
 F: include/hw/boards.h

 Xtensa Machines
@@ -928,8 +885,7 @@ F: hw/acpi/*
 F: hw/smbios/*
 F: hw/i386/acpi-build.[hc]
 F: hw/arm/virt-acpi-build.c
-F: tests/bios-tables-test.c
-F: tests/acpi-utils.[hc]
+F: include/hw/arm/virt-acpi-build.h

 ppc4xx
 M: Alexander Graf <agraf@suse.de>
@@ -940,6 +896,7 @@ F: include/hw/ppc/ppc4xx.h

 ppce500
 M: Alexander Graf <agraf@suse.de>
+M: Scott Wood <scottwood@freescale.com>
 L: qemu-ppc@nongnu.org
 S: Supported
 F: hw/ppc/e500*
@@ -1005,14 +962,6 @@ S: Supported
 F: hw/vfio/*
 F: include/hw/vfio/

-vfio-ccw
-M: Cornelia Huck <cornelia.huck@de.ibm.com>
-S: Supported
-F: hw/vfio/ccw.c
-F: hw/s390x/s390-ccw.c
-F: include/hw/s390x/s390-ccw.h
-T: git git://github.com/cohuck/qemu.git s390-next
-
 vhost
 M: Michael S. Tsirkin <mst@redhat.com>
 S: Supported
@@ -1062,7 +1011,7 @@ F: hw/input/virtio-input*.c
 F: include/hw/virtio/virtio-input.h

 virtio-serial
-M: Amit Shah <amit@kernel.org>
+M: Amit Shah <amit.shah@redhat.com>
 S: Supported
 F: hw/char/virtio-serial-bus.c
 F: hw/char/virtio-console.c
@@ -1071,7 +1020,7 @@ F: tests/virtio-console-test.c
 F: tests/virtio-serial-test.c

 virtio-rng
-M: Amit Shah <amit@kernel.org>
+M: Amit Shah <amit.shah@redhat.com>
 S: Supported
 F: hw/virtio/virtio-rng.c
 F: include/hw/virtio/virtio-rng.h
@@ -1151,15 +1100,6 @@ F: hw/nvram/chrp_nvram.c
 F: include/hw/nvram/chrp_nvram.h
 F: tests/prom-env-test.c

-VM Generation ID
-M: Ben Warren <ben@skyportsystems.com>
-S: Maintained
-F: hw/acpi/vmgenid.c
-F: include/hw/acpi/vmgenid.h
-F: docs/specs/vmgenid.txt
-F: tests/vmgenid-test.c
-F: stubs/vmgenid.c
-
 Subsystems
 ----------
 Audio
@@ -1184,7 +1124,6 @@ F: include/block/
 F: qemu-img*
 F: qemu-io*
 F: tests/qemu-iotests/
-F: util/qemu-progress.c
 T: git git://repo.or.cz/qemu/kevin.git block

 Block I/O path
@@ -1192,8 +1131,8 @@ M: Stefan Hajnoczi <stefanha@redhat.com>
 M: Fam Zheng <famz@redhat.com>
 L: qemu-block@nongnu.org
 S: Supported
-F: util/async.c
-F: util/aio-*.c
+F: async.c
+F: aio-*.c
 F: block/io.c
 F: migration/block*
 F: include/block/aio.h
@@ -1235,24 +1174,15 @@ T: git git://github.com/jnsnow/qemu.git bitmaps

 Character device backends
 M: Paolo Bonzini <pbonzini@redhat.com>
-M: Marc-André Lureau <marcandre.lureau@redhat.com>
 S: Maintained
-F: chardev/
-F: include/chardev/
+F: qemu-char.c
+F: backends/msmouse.c
+F: backends/testdev.c

 Character Devices (Braille)
 M: Samuel Thibault <samuel.thibault@ens-lyon.org>
 S: Maintained
-F: chardev/baum.c
-
-Command line option argument parsing
-M: Markus Armbruster <armbru@redhat.com>
-S: Supported
-F: include/qemu/option.h
-F: tests/test-keyval.c
-F: tests/test-qemu-opts.c
-F: util/keyval.c
-F: util/qemu-option.c
+F: backends/baum.c

 Coverity model
 M: Markus Armbruster <armbru@redhat.com>
@@ -1321,8 +1251,8 @@ Main loop
 M: Paolo Bonzini <pbonzini@redhat.com>
 S: Maintained
 F: cpus.c
-F: util/main-loop.c
-F: util/qemu-timer.c
+F: main-loop.c
+F: qemu-timer.c
 F: vl.c

 Human Monitor (HMP)
@@ -1388,9 +1318,7 @@ X: include/qapi/qmp/
 F: include/qapi/qmp/dispatch.h
 F: tests/qapi-schema/
 F: tests/test-*-visitor.c
-F: tests/test-qapi-*.c
 F: tests/test-qmp-*.c
-F: tests/test-visitor-serialization.c
 F: scripts/qapi*
 F: docs/qapi*
 T: git git://repo.or.cz/qemu/armbru.git qapi-next
@@ -1409,7 +1337,6 @@ S: Supported
 F: qobject/
 F: include/qapi/qmp/
 X: include/qapi/qmp/dispatch.h
-F: scripts/coccinelle/qobject.cocci
 F: tests/check-qdict.c
 F: tests/check-qfloat.c
 F: tests/check-qint.c
@@ -1443,7 +1370,6 @@ F: qmp.c
 F: monitor.c
 F: docs/*qmp-*
 F: scripts/qmp/
-F: tests/qmp-test.c
 T: git git://repo.or.cz/qemu/armbru.git qapi-next

 Register API
@@ -1481,7 +1407,7 @@ F: scripts/checkpatch.pl

 Migration
 M: Juan Quintela <quintela@redhat.com>
-M: Dr. David Alan Gilbert <dgilbert@redhat.com>
+M: Amit Shah <amit.shah@redhat.com>
 S: Maintained
 F: include/migration/
 F: migration/
@@ -1501,7 +1427,6 @@ S: Maintained
 F: crypto/
 F: include/crypto/
 F: tests/test-crypto-*
-F: qemu.sasl

 Coroutines
 M: Stefan Hajnoczi <stefanha@redhat.com>
@@ -1564,18 +1489,6 @@ F: net/colo*
 F: net/filter-rewriter.c
 F: net/filter-mirror.c

-Record/replay
-M: Pavel Dovgalyuk <pavel.dovgaluk@ispras.ru>
-R: Paolo Bonzini <pbonzini@redhat.com>
-W: http://wiki.qemu.org/Features/record-replay
-S: Supported
-F: replay/*
-F: block/blkreplay.c
-F: net/filter-replay.c
-F: include/sysemu/replay.h
-F: docs/replay.txt
-F: stubs/replay.c
-
 Usermode Emulation
 ------------------
 Overall
@@ -1583,7 +1496,6 @@ M: Riku Voipio <riku.voipio@iki.fi>
 S: Maintained
 F: thunk.c
 F: user-exec.c
-F: user-exec-stub.c

 BSD user
 S: Orphan
@@ -1592,7 +1504,6 @@ F: default-configs/*-bsd-user.mak

 Linux user
 M: Riku Voipio <riku.voipio@iki.fi>
-R: Laurent Vivier <laurent@vivier.eu>
 S: Maintained
 F: linux-user/
 F: default-configs/*-linux-user.mak
@@ -1709,7 +1620,6 @@ M: Peter Lieven <pl@kamp.de>
 L: qemu-block@nongnu.org
 S: Supported
 F: block/iscsi.c
-F: block/iscsi-opts.c

 NFS
 M: Jeff Cody <jcody@redhat.com>
@@ -1727,6 +1637,14 @@ S: Supported
 F: block/ssh.c
 T: git git://github.com/codyprime/qemu-kvm-jtc.git block

+ARCHIPELAGO
+M: Chrysostomos Nanakos <chris@include.gr>
+M: Jeff Cody <jcody@redhat.com>
+L: qemu-block@nongnu.org
+S: Maintained
+F: block/archipelago.c
+T: git git://github.com/codyprime/qemu-kvm-jtc.git block
+
 CURL
 M: Jeff Cody <jcody@redhat.com>
 L: qemu-block@nongnu.org
@@ -1802,9 +1720,9 @@ L: qemu-block@nongnu.org
 S: Supported
 F: block/linux-aio.c
 F: include/block/raw-aio.h
-F: block/raw-format.c
-F: block/file-posix.c
-F: block/file-win32.c
+F: block/raw-posix.c
+F: block/raw-win32.c
+F: block/raw_bsd.c
 F: block/win32-aio.c

 qcow2
@@ -1846,8 +1764,8 @@ S: Supported
 F: tests/image-fuzzer/

 Replication
-M: Wen Congyang <wencongyang2@huawei.com>
-M: Xie Changlong <xiechanglong.d@gmail.com>
+M: Wen Congyang <wency@cn.fujitsu.com>
+M: Changlong Xie <xiecl.fnst@cn.fujitsu.com>
 S: Supported
 F: replication*
 F: block/replication.c
@@ -1857,14 +1775,9 @@ F: docs/block-replication.txt
 Build and test automation
 -------------------------
 M: Alex Bennée <alex.bennee@linaro.org>
-M: Fam Zheng <famz@redhat.com>
 L: qemu-devel@nongnu.org
-S: Maintained
+S: Supported
 F: .travis.yml
-F: .shippable.yml
-F: tests/docker/
-W: https://travis-ci.org/qemu/qemu
-W: http://patchew.org/QEMU/

 Documentation
 -------------
@@ -1873,3 +1786,9 @@ M: Daniel P. Berrange <berrange@redhat.com>
 S: Odd Fixes
 F: docs/build-system.txt

+Docker testing
+--------------
+Docker based testing framework and cases
+M: Fam Zheng <famz@redhat.com>
+S: Maintained
+F: tests/docker/
--- a/330
+++ b/330
@@ -26,7 +26,6 @@ endif

 CONFIG_SOFTMMU := $(if $(filter %-softmmu,$(TARGET_DIRS)),y)
 CONFIG_USER_ONLY := $(if $(filter %-user,$(TARGET_DIRS)),y)
-CONFIG_XEN := $(CONFIG_XEN_BACKEND)
 CONFIG_ALL=y
 -include config-all-devices.mak
 -include config-all-disas.mak
@@ -51,153 +50,38 @@ endif

 include $(SRC_PATH)/rules.mak

-GENERATED_FILES = qemu-version.h config-host.h qemu-options.def
-GENERATED_FILES += qmp-commands.h qapi-types.h qapi-visit.h qapi-event.h
-GENERATED_FILES += qmp-marshal.c qapi-types.c qapi-visit.c qapi-event.c
-GENERATED_FILES += qmp-introspect.h
-GENERATED_FILES += qmp-introspect.c
+GENERATED_HEADERS = qemu-version.h config-host.h qemu-options.def
+GENERATED_HEADERS += qmp-commands.h qapi-types.h qapi-visit.h qapi-event.h
+GENERATED_SOURCES += qmp-marshal.c qapi-types.c qapi-visit.c qapi-event.c
+GENERATED_HEADERS += qmp-introspect.h
+GENERATED_SOURCES += qmp-introspect.c

-GENERATED_FILES += trace/generated-tcg-tracers.h
+GENERATED_HEADERS += trace/generated-tracers.h
+ifeq ($(findstring dtrace,$(TRACE_BACKENDS)),dtrace)
+GENERATED_HEADERS += trace/generated-tracers-dtrace.h
+endif
+GENERATED_SOURCES += trace/generated-tracers.c

-GENERATED_FILES += trace/generated-helpers-wrappers.h
-GENERATED_FILES += trace/generated-helpers.h
-GENERATED_FILES += trace/generated-helpers.c
+GENERATED_HEADERS += trace/generated-tcg-tracers.h

-ifdef CONFIG_TRACE_UST
-GENERATED_FILES += trace-ust-all.h
-GENERATED_FILES += trace-ust-all.c
+GENERATED_HEADERS += trace/generated-helpers-wrappers.h
+GENERATED_HEADERS += trace/generated-helpers.h
+GENERATED_SOURCES += trace/generated-helpers.c
+
+ifeq ($(findstring ust,$(TRACE_BACKENDS)),ust)
+GENERATED_HEADERS += trace/generated-ust-provider.h
+GENERATED_SOURCES += trace/generated-ust.c
 endif

-GENERATED_FILES += module_block.h
-
-TRACE_HEADERS = trace-root.h $(trace-events-subdirs:%=%/trace.h)
-TRACE_SOURCES = trace-root.c $(trace-events-subdirs:%=%/trace.c)
-TRACE_DTRACE =
-ifdef CONFIG_TRACE_DTRACE
-TRACE_HEADERS += trace-dtrace-root.h $(trace-events-subdirs:%=%/trace-dtrace.h)
-TRACE_DTRACE += trace-dtrace-root.dtrace $(trace-events-subdirs:%=%/trace-dtrace.dtrace)
-endif
-ifdef CONFIG_TRACE_UST
-TRACE_HEADERS += trace-ust-root.h $(trace-events-subdirs:%=%/trace-ust.h)
-endif
-
-GENERATED_FILES += $(TRACE_HEADERS)
-GENERATED_FILES += $(TRACE_SOURCES)
-GENERATED_FILES += $(BUILD_DIR)/trace-events-all
-
-trace-group-name = $(shell dirname $1 | sed -e 's/[^a-zA-Z0-9]/_/g')
-
-tracetool-y = $(SRC_PATH)/scripts/tracetool.py
-tracetool-y += $(shell find $(SRC_PATH)/scripts/tracetool -name "*.py")
-
-%/trace.h: %/trace.h-timestamp
-	@cmp $< $@ >/dev/null 2>&1 || cp $< $@
-%/trace.h-timestamp: $(SRC_PATH)/%/trace-events $(tracetool-y)
-	$(call quiet-command,$(TRACETOOL) \
-		--group=$(call trace-group-name,$@) \
-		--format=h \
-		--backends=$(TRACE_BACKENDS) \
-		$< > $@,"GEN","$(@:%-timestamp=%)")
-
-%/trace.c: %/trace.c-timestamp
-	@cmp $< $@ >/dev/null 2>&1 || cp $< $@
-%/trace.c-timestamp: $(SRC_PATH)/%/trace-events $(tracetool-y)
-	$(call quiet-command,$(TRACETOOL) \
-		--group=$(call trace-group-name,$@) \
-		--format=c \
-		--backends=$(TRACE_BACKENDS) \
-		$< > $@,"GEN","$(@:%-timestamp=%)")
-
-%/trace-ust.h: %/trace-ust.h-timestamp
-	@cmp $< $@ >/dev/null 2>&1 || cp $< $@
-%/trace-ust.h-timestamp: $(SRC_PATH)/%/trace-events $(tracetool-y)
-	$(call quiet-command,$(TRACETOOL) \
-		--group=$(call trace-group-name,$@) \
-		--format=ust-events-h \
-		--backends=$(TRACE_BACKENDS) \
-		$< > $@,"GEN","$(@:%-timestamp=%)")
-
-%/trace-dtrace.dtrace: %/trace-dtrace.dtrace-timestamp
-	@cmp $< $@ >/dev/null 2>&1 || cp $< $@
-%/trace-dtrace.dtrace-timestamp: $(SRC_PATH)/%/trace-events $(BUILD_DIR)/config-host.mak $(tracetool-y)
-	$(call quiet-command,$(TRACETOOL) \
-		--group=$(call trace-group-name,$@) \
-		--format=d \
-		--backends=$(TRACE_BACKENDS) \
-		$< > $@,"GEN","$(@:%-timestamp=%)")
-
-%/trace-dtrace.h: %/trace-dtrace.dtrace $(tracetool-y)
-	$(call quiet-command,dtrace -o $@ -h -s $<, "GEN","$@")
-
-%/trace-dtrace.o: %/trace-dtrace.dtrace $(tracetool-y)
-
-
-trace-root.h: trace-root.h-timestamp
-	@cmp $< $@ >/dev/null 2>&1 || cp $< $@
-trace-root.h-timestamp: $(SRC_PATH)/trace-events $(tracetool-y)
-	$(call quiet-command,$(TRACETOOL) \
-		--group=root \
-		--format=h \
-		--backends=$(TRACE_BACKENDS) \
-		$< > $@,"GEN","$(@:%-timestamp=%)")
-
-trace-root.c: trace-root.c-timestamp
-	@cmp $< $@ >/dev/null 2>&1 || cp $< $@
-trace-root.c-timestamp: $(SRC_PATH)/trace-events $(tracetool-y)
-	$(call quiet-command,$(TRACETOOL) \
-		--group=root \
-		--format=c \
-		--backends=$(TRACE_BACKENDS) \
-		$< > $@,"GEN","$(@:%-timestamp=%)")
-
-trace-ust-root.h: trace-ust-root.h-timestamp
-	@cmp $< $@ >/dev/null 2>&1 || cp $< $@
-trace-ust-root.h-timestamp: $(SRC_PATH)/trace-events $(tracetool-y)
-	$(call quiet-command,$(TRACETOOL) \
-		--group=root \
-		--format=ust-events-h \
-		--backends=$(TRACE_BACKENDS) \
-		$< > $@,"GEN","$(@:%-timestamp=%)")
-
-trace-ust-all.h: trace-ust-all.h-timestamp
-	@cmp $< $@ >/dev/null 2>&1 || cp $< $@
-trace-ust-all.h-timestamp: $(trace-events-files) $(tracetool-y)
-	$(call quiet-command,$(TRACETOOL) \
-		--group=all \
-		--format=ust-events-h \
-		--backends=$(TRACE_BACKENDS) \
-		$(trace-events-files) > $@,"GEN","$(@:%-timestamp=%)")
-
-trace-ust-all.c: trace-ust-all.c-timestamp
-	@cmp $< $@ >/dev/null 2>&1 || cp $< $@
-trace-ust-all.c-timestamp: $(trace-events-files) $(tracetool-y)
-	$(call quiet-command,$(TRACETOOL) \
-		--group=all \
-		--format=ust-events-c \
-		--backends=$(TRACE_BACKENDS) \
-		$(trace-events-files) > $@,"GEN","$(@:%-timestamp=%)")
-
-trace-dtrace-root.dtrace: trace-dtrace-root.dtrace-timestamp
-	@cmp $< $@ >/dev/null 2>&1 || cp $< $@
-trace-dtrace-root.dtrace-timestamp: $(SRC_PATH)/trace-events $(BUILD_DIR)/config-host.mak $(tracetool-y)
-	$(call quiet-command,$(TRACETOOL) \
-		--group=root \
-		--format=d \
-		--backends=$(TRACE_BACKENDS) \
-		$< > $@,"GEN","$(@:%-timestamp=%)")
-
-trace-dtrace-root.h: trace-dtrace-root.dtrace
-	$(call quiet-command,dtrace -o $@ -h -s $<, "GEN","$@")
-
-trace-dtrace-root.o: trace-dtrace-root.dtrace
+GENERATED_HEADERS += module_block.h

 # Don't try to regenerate Makefile or configure
 # We don't generate any of them
 Makefile: ;
 configure: ;

-.PHONY: all clean cscope distclean html info install install-doc \
-	pdf txt recurse-all speed test dist msi FORCE
+.PHONY: all clean cscope distclean dvi html info install install-doc \
+	pdf recurse-all speed test dist msi FORCE

 $(call set-vpath, $(SRC_PATH))

@@ -206,9 +90,7 @@ LIBS+=-lz $(LIBS_TOOLS)
 HELPERS-$(CONFIG_LINUX) = qemu-bridge-helper$(EXESUF)

 ifdef BUILD_DOCS
-DOCS=qemu-doc.html qemu-doc.txt qemu.1 qemu-img.1 qemu-nbd.8 qemu-ga.8
-DOCS+=docs/interop/qemu-qmp-ref.html docs/interop/qemu-qmp-ref.txt docs/interop/qemu-qmp-ref.7
-DOCS+=docs/interop/qemu-ga-ref.html docs/interop/qemu-ga-ref.txt docs/interop/qemu-ga-ref.7
+DOCS=qemu-doc.html qemu.1 qemu-img.1 qemu-nbd.8 qemu-ga.8
 ifdef CONFIG_VIRTFS
 DOCS+=fsdev/virtfs-proxy-helper.1
 endif
@@ -263,13 +145,10 @@ endif

 dummy := $(call unnest-vars,, \
                stub-obj-y \
-                chardev-obj-y \
                util-obj-y \
                qga-obj-y \
                ivshmem-client-obj-y \
                ivshmem-server-obj-y \
-                libvhost-user-obj-y \
-                vhost-user-scsi-obj-y \
                qga-vss-dll-obj-y \
                block-obj-y \
                block-obj-m \
@@ -278,8 +157,7 @@ dummy := $(call unnest-vars,, \
                qom-obj-y \
                io-obj-y \
                common-obj-y \
-                common-obj-m \
-                trace-obj-y)
+                common-obj-m)

 ifneq ($(wildcard config-host.mak),)
 include $(SRC_PATH)/tests/Makefile.include
@@ -305,11 +183,7 @@ qemu-version.h: FORCE
 				printf '""\n'; \
 			fi; \
 		fi) > $@.tmp)
-	$(call quiet-command, if ! cmp -s $@ $@.tmp; then \
-	  mv $@.tmp $@; \
-	 else \
-	  rm $@.tmp; \
-	 fi)
+	$(call quiet-command, cmp -s $@ $@.tmp || mv $@.tmp $@)

 config-host.h: config-host.h-timestamp
 config-host.h-timestamp: config-host.mak
@@ -346,8 +220,7 @@ subdir-dtc:dtc/libfdt dtc/tests
 dtc/%:
 	mkdir -p $@

-$(SUBDIR_RULES): libqemuutil.a libqemustub.a $(common-obj-y) $(chardev-obj-y) \
-	$(qom-obj-y) $(crypto-aes-obj-$(CONFIG_USER_ONLY))
+$(SUBDIR_RULES): libqemuutil.a libqemustub.a $(common-obj-y) $(qom-obj-y) $(crypto-aes-obj-$(CONFIG_USER_ONLY))

 ROMSUBDIR_RULES=$(patsubst %,romsubdir-%, $(ROMS))
 # Only keep -O and -g cflags
@@ -358,30 +231,30 @@ ALL_SUBDIRS=$(TARGET_DIRS) $(patsubst %,pc-bios/%, $(ROMS))

 recurse-all: $(SUBDIR_RULES) $(ROMSUBDIR_RULES)

-$(BUILD_DIR)/version.o: $(SRC_PATH)/version.rc config-host.h
+$(BUILD_DIR)/version.o: $(SRC_PATH)/version.rc config-host.h | $(BUILD_DIR)/version.lo
 	$(call quiet-command,$(WINDRES) -I$(BUILD_DIR) -o $@ $<,"RC","version.o")
+$(BUILD_DIR)/version.lo: $(SRC_PATH)/version.rc config-host.h
+	$(call quiet-command,$(WINDRES) -I$(BUILD_DIR) -o $@ $<,"RC","version.lo")

-Makefile: $(version-obj-y)
+Makefile: $(version-obj-y) $(version-lobj-y)

 ######################################################################
 # Build libraries

 libqemustub.a: $(stub-obj-y)
-libqemuutil.a: $(util-obj-y) $(trace-obj-y)
+libqemuutil.a: $(util-obj-y)

 ######################################################################

-COMMON_LDADDS = libqemuutil.a libqemustub.a
-
 qemu-img.o: qemu-img-cmds.h

-qemu-img$(EXESUF): qemu-img.o $(block-obj-y) $(crypto-obj-y) $(io-obj-y) $(qom-obj-y) $(COMMON_LDADDS)
-qemu-nbd$(EXESUF): qemu-nbd.o $(block-obj-y) $(crypto-obj-y) $(io-obj-y) $(qom-obj-y) $(COMMON_LDADDS)
-qemu-io$(EXESUF): qemu-io.o $(block-obj-y) $(crypto-obj-y) $(io-obj-y) $(qom-obj-y) $(COMMON_LDADDS)
+qemu-img$(EXESUF): qemu-img.o $(block-obj-y) $(crypto-obj-y) $(io-obj-y) $(qom-obj-y) libqemuutil.a libqemustub.a
+qemu-nbd$(EXESUF): qemu-nbd.o $(block-obj-y) $(crypto-obj-y) $(io-obj-y) $(qom-obj-y) libqemuutil.a libqemustub.a
+qemu-io$(EXESUF): qemu-io.o $(block-obj-y) $(crypto-obj-y) $(io-obj-y) $(qom-obj-y) libqemuutil.a libqemustub.a

-qemu-bridge-helper$(EXESUF): qemu-bridge-helper.o $(COMMON_LDADDS)
+qemu-bridge-helper$(EXESUF): qemu-bridge-helper.o libqemuutil.a libqemustub.a

-fsdev/virtfs-proxy-helper$(EXESUF): fsdev/virtfs-proxy-helper.o fsdev/9p-marshal.o fsdev/9p-iov-marshal.o $(COMMON_LDADDS)
+fsdev/virtfs-proxy-helper$(EXESUF): fsdev/virtfs-proxy-helper.o fsdev/9p-marshal.o fsdev/9p-iov-marshal.o libqemuutil.a libqemustub.a
 fsdev/virtfs-proxy-helper$(EXESUF): LIBS += -lcap

 qemu-img-cmds.h: $(SRC_PATH)/qemu-img-cmds.hx $(SRC_PATH)/scripts/hxtool
@@ -445,7 +318,7 @@ $(qapi-modules) $(SRC_PATH)/scripts/qapi-introspect.py $(qapi-py)
 QGALIB_GEN=$(addprefix qga/qapi-generated/, qga-qapi-types.h qga-qapi-visit.h qga-qmp-commands.h)
 $(qga-obj-y) qemu-ga.o: $(QGALIB_GEN)

-qemu-ga$(EXESUF): $(qga-obj-y) $(COMMON_LDADDS)
+qemu-ga$(EXESUF): $(qga-obj-y) libqemuutil.a libqemustub.a
 	$(call LINK, $^)

 ifdef QEMU_GA_MSI_ENABLED
@@ -470,11 +343,9 @@ ifneq ($(EXESUF),)
 qemu-ga: qemu-ga$(EXESUF) $(QGA_VSS_PROVIDER) $(QEMU_GA_MSI)
 endif

-ivshmem-client$(EXESUF): $(ivshmem-client-obj-y) $(COMMON_LDADDS)
+ivshmem-client$(EXESUF): $(ivshmem-client-obj-y) libqemuutil.a libqemustub.a
 	$(call LINK, $^)
-ivshmem-server$(EXESUF): $(ivshmem-server-obj-y) $(COMMON_LDADDS)
-	$(call LINK, $^)
-vhost-user-scsi$(EXESUF): $(vhost-user-scsi-obj-y)
+ivshmem-server$(EXESUF): $(ivshmem-server-obj-y) libqemuutil.a libqemustub.a
 	$(call LINK, $^)

 module_block.h: $(SRC_PATH)/scripts/modules/module_block.py config-host.mak
@@ -487,15 +358,17 @@ clean:
 	rm -f config.mak op-i386.h opc-i386.h gen-op-i386.h op-arm.h opc-arm.h gen-op-arm.h
 	rm -f qemu-options.def
 	rm -f *.msi
-	find . \( -name '*.so' -o -name '*.dll' -o -name '*.mo' -o -name '*.[oda]' \) -type f -exec rm {} +
+	find . \( -name '*.l[oa]' -o -name '*.so' -o -name '*.dll' -o -name '*.mo' -o -name '*.[oda]' \) -type f -exec rm {} +
 	rm -f $(filter-out %.tlb,$(TOOLS)) $(HELPERS-y) qemu-ga TAGS cscope.* *.pod *~ */*~
 	rm -f fsdev/*.pod
+	rm -rf .libs */.libs
 	rm -f qemu-img-cmds.h
 	rm -f ui/shader/*-vert.h ui/shader/*-frag.h
-	@# May not be present in GENERATED_FILES
+	@# May not be present in GENERATED_HEADERS
 	rm -f trace/generated-tracers-dtrace.dtrace*
 	rm -f trace/generated-tracers-dtrace.h*
-	rm -f $(foreach f,$(GENERATED_FILES),$(f) $(f)-timestamp)
+	rm -f $(foreach f,$(GENERATED_HEADERS),$(f) $(f)-timestamp)
+	rm -f $(foreach f,$(GENERATED_SOURCES),$(f) $(f)-timestamp)
 	rm -rf qapi-generated
 	rm -rf qga/qapi-generated
 	for d in $(ALL_SUBDIRS); do \
@@ -516,18 +389,12 @@ distclean: clean
 	rm -f config-all-devices.mak config-all-disas.mak config.status
 	rm -f po/*.mo tests/qemu-iotests/common.env
 	rm -f roms/seabios/config.mak roms/vgabios/config.mak
-	rm -f qemu-doc.info qemu-doc.aux qemu-doc.cp qemu-doc.cps
+	rm -f qemu-doc.info qemu-doc.aux qemu-doc.cp qemu-doc.cps qemu-doc.dvi
 	rm -f qemu-doc.fn qemu-doc.fns qemu-doc.info qemu-doc.ky qemu-doc.kys
 	rm -f qemu-doc.log qemu-doc.pdf qemu-doc.pg qemu-doc.toc qemu-doc.tp
-	rm -f qemu-doc.vr qemu-doc.txt
+	rm -f qemu-doc.vr
 	rm -f config.log
 	rm -f linux-headers/asm
-	rm -f docs/version.texi
-	rm -f docs/interop/qemu-ga-qapi.texi docs/interop/qemu-qmp-qapi.texi
-	rm -f docs/interop/qemu-qmp-ref.7 docs/interop/qemu-ga-ref.7
-	rm -f docs/interop/qemu-qmp-ref.txt docs/interop/qemu-ga-ref.txt
-	rm -f docs/interop/qemu-qmp-ref.pdf docs/interop/qemu-ga-ref.pdf
-	rm -f docs/interop/qemu-qmp-ref.html docs/interop/qemu-ga-ref.html
 	for d in $(TARGET_DIRS); do \
 	rm -rf $$d || exit 1 ; \
        done
@@ -556,8 +423,7 @@ multiboot.bin linuxboot.bin linuxboot_dma.bin kvmvapic.bin \
 s390-ccw.img \
 spapr-rtas.bin slof.bin skiboot.lid \
 palcode-clipper \
-u-boot.e500 \
-qemu_vga.ndrv
+u-boot.e500
 else
 BLOBS=
 endif
@@ -565,14 +431,10 @@ endif
 install-doc: $(DOCS)
 	$(INSTALL_DIR) "$(DESTDIR)$(qemu_docdir)"
 	$(INSTALL_DATA) qemu-doc.html "$(DESTDIR)$(qemu_docdir)"
-	$(INSTALL_DATA) qemu-doc.txt "$(DESTDIR)$(qemu_docdir)"
-	$(INSTALL_DATA) docs/interop/qemu-qmp-ref.html "$(DESTDIR)$(qemu_docdir)"
-	$(INSTALL_DATA) docs/interop/qemu-qmp-ref.txt "$(DESTDIR)$(qemu_docdir)"
+	$(INSTALL_DATA) $(SRC_PATH)/docs/qmp-commands.txt "$(DESTDIR)$(qemu_docdir)"
 ifdef CONFIG_POSIX
 	$(INSTALL_DIR) "$(DESTDIR)$(mandir)/man1"
 	$(INSTALL_DATA) qemu.1 "$(DESTDIR)$(mandir)/man1"
-	$(INSTALL_DIR) "$(DESTDIR)$(mandir)/man7"
-	$(INSTALL_DATA) docs/interop/qemu-qmp-ref.7 "$(DESTDIR)$(mandir)/man7"
 ifneq ($(TOOLS),)
 	$(INSTALL_DATA) qemu-img.1 "$(DESTDIR)$(mandir)/man1"
 	$(INSTALL_DIR) "$(DESTDIR)$(mandir)/man8"
@@ -580,9 +442,6 @@ ifneq ($(TOOLS),)
 endif
 ifneq (,$(findstring qemu-ga,$(TOOLS)))
 	$(INSTALL_DATA) qemu-ga.8 "$(DESTDIR)$(mandir)/man8"
-	$(INSTALL_DATA) docs/interop/qemu-ga-ref.html "$(DESTDIR)$(qemu_docdir)"
-	$(INSTALL_DATA) docs/interop/qemu-ga-ref.txt "$(DESTDIR)$(qemu_docdir)"
-	$(INSTALL_DATA) docs/interop/qemu-ga-ref.7 "$(DESTDIR)$(mandir)/man7"
 endif
 endif
 ifdef CONFIG_VIRTFS
@@ -601,7 +460,8 @@ endif
 endif


-install: all $(if $(BUILD_DOCS),install-doc) install-datadir install-localstatedir
+install: all $(if $(BUILD_DOCS),install-doc) \
+install-datadir install-localstatedir
 ifneq ($(TOOLS),)
 	$(call install-prog,$(subst qemu-ga,qemu-ga$(EXESUF),$(TOOLS)),$(DESTDIR)$(bindir))
 endif
@@ -670,27 +530,20 @@ ui/console-gl.o: $(SRC_PATH)/ui/console-gl.c \

 # documentation
 MAKEINFO=makeinfo
-MAKEINFOINCLUDES= -I docs -I $(<D) -I $(@D)
-MAKEINFOFLAGS=--no-split --number-sections $(MAKEINFOINCLUDES)
-TEXI2PODFLAGS=$(MAKEINFOINCLUDES) "-DVERSION=$(VERSION)"
-TEXI2PDFFLAGS=$(if $(V),,--quiet) -I $(SRC_PATH) $(MAKEINFOINCLUDES)
+MAKEINFOFLAGS=--no-headers --no-split --number-sections
+TEXIFLAG=$(if $(V),,--quiet)
+%.dvi: %.texi
+	$(call quiet-command,texi2dvi $(TEXIFLAG) -I . $<,"GEN","$@")

-docs/version.texi: $(SRC_PATH)/VERSION
-	$(call quiet-command,echo "@set VERSION $(VERSION)" > $@,"GEN","$@")
+%.html: %.texi
+	$(call quiet-command,LC_ALL=C $(MAKEINFO) $(MAKEINFOFLAGS) --html $< -o $@, \
+	"GEN","$@")

-%.html: %.texi docs/version.texi
-	$(call quiet-command,LC_ALL=C $(MAKEINFO) $(MAKEINFOFLAGS) --no-headers \
-	--html $< -o $@,"GEN","$@")
+%.info: %.texi
+	$(call quiet-command,$(MAKEINFO) $< -o $@,"GEN","$@")

-%.info: %.texi docs/version.texi
-	$(call quiet-command,$(MAKEINFO) $(MAKEINFOFLAGS) $< -o $@,"GEN","$@")
-
-%.txt: %.texi docs/version.texi
-	$(call quiet-command,LC_ALL=C $(MAKEINFO) $(MAKEINFOFLAGS) --no-headers \
-	--plaintext $< -o $@,"GEN","$@")
-
-%.pdf: %.texi docs/version.texi
-	$(call quiet-command,texi2pdf $(TEXI2PDFFLAGS) $< -o $@,"GEN","$@")
+%.pdf: %.texi
+	$(call quiet-command,texi2pdf $(TEXIFLAG) -I . $<,"GEN","$@")

 qemu-options.texi: $(SRC_PATH)/qemu-options.hx $(SRC_PATH)/scripts/hxtool
 	$(call quiet-command,sh $(SRC_PATH)/scripts/hxtool -t < $< > $@,"GEN","$@")
@@ -704,42 +557,47 @@ qemu-monitor-info.texi: $(SRC_PATH)/hmp-commands-info.hx $(SRC_PATH)/scripts/hxt
 qemu-img-cmds.texi: $(SRC_PATH)/qemu-img-cmds.hx $(SRC_PATH)/scripts/hxtool
 	$(call quiet-command,sh $(SRC_PATH)/scripts/hxtool -t < $< > $@,"GEN","$@")

-docs/interop/qemu-qmp-qapi.texi docs/interop/qemu-ga-qapi.texi: $(SRC_PATH)/scripts/qapi2texi.py $(qapi-py)
-
-docs/interop/qemu-qmp-qapi.texi: $(qapi-modules)
-	$(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/qapi2texi.py $< > $@,"GEN","$@")
-
-docs/interop/qemu-ga-qapi.texi: $(SRC_PATH)/qga/qapi-schema.json
-	$(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/qapi2texi.py $< > $@,"GEN","$@")
-
 qemu.1: qemu-doc.texi qemu-options.texi qemu-monitor.texi qemu-monitor-info.texi
+	$(call quiet-command, \
+	  perl -Ww -- $(SRC_PATH)/scripts/texi2pod.pl $< qemu.pod && \
+	  $(POD2MAN) --section=1 --center=" " --release=" " qemu.pod > $@, \
+	  "GEN","$@")
 qemu.1: qemu-option-trace.texi
+
 qemu-img.1: qemu-img.texi qemu-option-trace.texi qemu-img-cmds.texi
+	$(call quiet-command, \
+	  perl -Ww -- $(SRC_PATH)/scripts/texi2pod.pl $< qemu-img.pod && \
+	  $(POD2MAN) --section=1 --center=" " --release=" " qemu-img.pod > $@, \
+	  "GEN","$@")
+
 fsdev/virtfs-proxy-helper.1: fsdev/virtfs-proxy-helper.texi
+	$(call quiet-command, \
+	  perl -Ww -- $(SRC_PATH)/scripts/texi2pod.pl $< fsdev/virtfs-proxy-helper.pod && \
+	  $(POD2MAN) --section=1 --center=" " --release=" " fsdev/virtfs-proxy-helper.pod > $@, \
+	  "GEN","$@")
+
 qemu-nbd.8: qemu-nbd.texi qemu-option-trace.texi
+	$(call quiet-command, \
+	  perl -Ww -- $(SRC_PATH)/scripts/texi2pod.pl $< qemu-nbd.pod && \
+	  $(POD2MAN) --section=8 --center=" " --release=" " qemu-nbd.pod > $@, \
+	  "GEN","$@")
+
 qemu-ga.8: qemu-ga.texi
+	$(call quiet-command, \
+	  perl -Ww -- $(SRC_PATH)/scripts/texi2pod.pl $< qemu-ga.pod && \
+	  $(POD2MAN) --section=8 --center=" " --release=" " qemu-ga.pod > $@, \
+	  "GEN","$@")

-html: qemu-doc.html docs/interop/qemu-qmp-ref.html docs/interop/qemu-ga-ref.html
-info: qemu-doc.info docs/interop/qemu-qmp-ref.info docs/interop/qemu-ga-ref.info
-pdf: qemu-doc.pdf docs/interop/qemu-qmp-ref.pdf docs/interop/qemu-ga-ref.pdf
-txt: qemu-doc.txt docs/interop/qemu-qmp-ref.txt docs/interop/qemu-ga-ref.txt
+dvi: qemu-doc.dvi
+html: qemu-doc.html
+info: qemu-doc.info
+pdf: qemu-doc.pdf

-qemu-doc.html qemu-doc.info qemu-doc.pdf qemu-doc.txt: \
+qemu-doc.dvi qemu-doc.html qemu-doc.info qemu-doc.pdf: \
 	qemu-img.texi qemu-nbd.texi qemu-options.texi qemu-option-trace.texi \
 	qemu-monitor.texi qemu-img-cmds.texi qemu-ga.texi \
 	qemu-monitor-info.texi

-docs/interop/qemu-ga-ref.dvi docs/interop/qemu-ga-ref.html \
-    docs/interop/qemu-ga-ref.info docs/interop/qemu-ga-ref.pdf \
-    docs/interop/qemu-ga-ref.txt docs/interop/qemu-ga-ref.7: \
-	docs/interop/qemu-ga-ref.texi docs/interop/qemu-ga-qapi.texi
-
-docs/interop/qemu-qmp-ref.dvi docs/interop/qemu-qmp-ref.html \
-    docs/interop/qemu-qmp-ref.info docs/interop/qemu-qmp-ref.pdf \
-    docs/interop/qemu-qmp-ref.txt docs/interop/qemu-qmp-ref.7: \
-	docs/interop/qemu-qmp-ref.texi docs/interop/qemu-qmp-qapi.texi
-
-
 ifdef CONFIG_WIN32

 INSTALLER = qemu-setup-$(VERSION)$(EXESUF)
@@ -798,15 +656,9 @@ endif # CONFIG_WIN

 # Add a dependency on the generated files, so that they are always
 # rebuilt before other object files
-ifneq ($(wildcard config-host.mak),)
 ifneq ($(filter-out $(UNCHECKED_GOALS),$(MAKECMDGOALS)),$(if $(MAKECMDGOALS),,fail))
-Makefile: $(GENERATED_FILES)
+Makefile: $(GENERATED_HEADERS)
 endif
-endif
-
-.SECONDARY: $(TRACE_HEADERS) $(TRACE_HEADERS:%=%-timestamp) \
-	$(TRACE_SOURCES) $(TRACE_SOURCES:%=%-timestamp) \
-	$(TRACE_DTRACE) $(TRACE_DTRACE:%=%-timestamp)

 # Include automatically generated dependency files
 # Dependencies in Makefile.objs files come from our recursive subdir rules
@@ -838,7 +690,7 @@ help:
 	@echo  '  docker          - Help about targets running tests inside Docker containers'
 	@echo  ''
 	@echo  'Documentation targets:'
-	@echo  '  html info pdf txt'
+	@echo  '  dvi html info pdf'
 	@echo  '                  - Build documentation in specified format'
 	@echo  ''
 ifdef CONFIG_WIN32
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -4,13 +4,15 @@ stub-obj-y = stubs/ crypto/
 util-obj-y = util/ qobject/ qapi/
 util-obj-y += qmp-introspect.o qapi-types.o qapi-visit.o qapi-event.o

-chardev-obj-y = chardev/
-
 #######################################################################
 # block-obj-y is code used by both qemu system emulation and qemu-img

+block-obj-y = async.o thread-pool.o
 block-obj-y += nbd/
 block-obj-y += block.o blockjob.o
+block-obj-y += main-loop.o iohandler.o qemu-timer.o
+block-obj-$(CONFIG_POSIX) += aio-posix.o
+block-obj-$(CONFIG_WIN32) += aio-win32.o
 block-obj-y += block/
 block-obj-y += qemu-io-cmds.o
 block-obj-$(CONFIG_REPLICATION) += replication.o
@@ -49,9 +51,14 @@ common-obj-$(CONFIG_POSIX) += os-posix.o
 common-obj-$(CONFIG_LINUX) += fsdev/

 common-obj-y += migration/
+common-obj-y += qemu-char.o #aio.o
+common-obj-y += page_cache.o
+
+common-obj-$(CONFIG_SPICE) += spice-qemu-char.o

 common-obj-y += audio/
 common-obj-y += hw/
+common-obj-y += accel.o

 common-obj-y += replay/

@@ -67,7 +74,6 @@ common-obj-y += tpm.o
 common-obj-$(CONFIG_SLIRP) += slirp/

 common-obj-y += backends/
-common-obj-y += chardev/

 common-obj-$(CONFIG_SECCOMP) += qemu-seccomp.o

@@ -91,6 +97,7 @@ common-obj-y += disas/
 ######################################################################
 # Resource file for Windows executables
 version-obj-$(CONFIG_WIN32) += $(BUILD_DIR)/version.o
+version-lobj-$(CONFIG_WIN32) += $(BUILD_DIR)/version.lo

 ######################################################################
 # tracing
@@ -109,70 +116,50 @@ qga-vss-dll-obj-y = qga/
 # contrib
 ivshmem-client-obj-y = contrib/ivshmem-client/
 ivshmem-server-obj-y = contrib/ivshmem-server/
-libvhost-user-obj-y = contrib/libvhost-user/
-vhost-user-scsi.o-cflags := $(LIBISCSI_CFLAGS)
-vhost-user-scsi.o-libs := $(LIBISCSI_LIBS)
-vhost-user-scsi-obj-y = contrib/vhost-user-scsi/
-vhost-user-scsi-obj-y += contrib/libvhost-user/libvhost-user.o
+

 ######################################################################
-trace-events-subdirs =
-trace-events-subdirs += util
-trace-events-subdirs += crypto
-trace-events-subdirs += io
-trace-events-subdirs += migration
-trace-events-subdirs += block
-trace-events-subdirs += backends
-trace-events-subdirs += chardev
-trace-events-subdirs += hw/block
-trace-events-subdirs += hw/block/dataplane
-trace-events-subdirs += hw/char
-trace-events-subdirs += hw/intc
-trace-events-subdirs += hw/net
-trace-events-subdirs += hw/virtio
-trace-events-subdirs += hw/audio
-trace-events-subdirs += hw/misc
-trace-events-subdirs += hw/usb
-trace-events-subdirs += hw/scsi
-trace-events-subdirs += hw/nvram
-trace-events-subdirs += hw/display
-trace-events-subdirs += hw/input
-trace-events-subdirs += hw/timer
-trace-events-subdirs += hw/dma
-trace-events-subdirs += hw/sparc
-trace-events-subdirs += hw/sd
-trace-events-subdirs += hw/isa
-trace-events-subdirs += hw/mem
-trace-events-subdirs += hw/i386
-trace-events-subdirs += hw/i386/xen
-trace-events-subdirs += hw/9pfs
-trace-events-subdirs += hw/ppc
-trace-events-subdirs += hw/pci
-trace-events-subdirs += hw/s390x
-trace-events-subdirs += hw/vfio
-trace-events-subdirs += hw/acpi
-trace-events-subdirs += hw/arm
-trace-events-subdirs += hw/alpha
-trace-events-subdirs += hw/xen
-trace-events-subdirs += ui
-trace-events-subdirs += audio
-trace-events-subdirs += net
-trace-events-subdirs += target/arm
-trace-events-subdirs += target/i386
-trace-events-subdirs += target/mips
-trace-events-subdirs += target/sparc
-trace-events-subdirs += target/s390x
-trace-events-subdirs += target/ppc
-trace-events-subdirs += qom
-trace-events-subdirs += linux-user
-trace-events-subdirs += qapi
-trace-events-subdirs += accel/tcg
-trace-events-subdirs += accel/kvm
-
-trace-events-files = $(SRC_PATH)/trace-events $(trace-events-subdirs:%=$(SRC_PATH)/%/trace-events)
-
-trace-obj-y = trace-root.o
-trace-obj-y += $(trace-events-subdirs:%=%/trace.o)
-trace-obj-$(CONFIG_TRACE_UST) += trace-ust-all.o
-trace-obj-$(CONFIG_TRACE_DTRACE) += trace-dtrace-root.o
-trace-obj-$(CONFIG_TRACE_DTRACE) += $(trace-events-subdirs:%=%/trace-dtrace.o)
+trace-events-y = trace-events
+trace-events-y += util/trace-events
+trace-events-y += crypto/trace-events
+trace-events-y += io/trace-events
+trace-events-y += migration/trace-events
+trace-events-y += block/trace-events
+trace-events-y += hw/block/trace-events
+trace-events-y += hw/char/trace-events
+trace-events-y += hw/intc/trace-events
+trace-events-y += hw/net/trace-events
+trace-events-y += hw/virtio/trace-events
+trace-events-y += hw/audio/trace-events
+trace-events-y += hw/misc/trace-events
+trace-events-y += hw/usb/trace-events
+trace-events-y += hw/scsi/trace-events
+trace-events-y += hw/nvram/trace-events
+trace-events-y += hw/display/trace-events
+trace-events-y += hw/input/trace-events
+trace-events-y += hw/timer/trace-events
+trace-events-y += hw/dma/trace-events
+trace-events-y += hw/sparc/trace-events
+trace-events-y += hw/sd/trace-events
+trace-events-y += hw/isa/trace-events
+trace-events-y += hw/mem/trace-events
+trace-events-y += hw/i386/trace-events
+trace-events-y += hw/9pfs/trace-events
+trace-events-y += hw/ppc/trace-events
+trace-events-y += hw/pci/trace-events
+trace-events-y += hw/s390x/trace-events
+trace-events-y += hw/vfio/trace-events
+trace-events-y += hw/acpi/trace-events
+trace-events-y += hw/arm/trace-events
+trace-events-y += hw/alpha/trace-events
+trace-events-y += ui/trace-events
+trace-events-y += audio/trace-events
+trace-events-y += net/trace-events
+trace-events-y += target-arm/trace-events
+trace-events-y += target-i386/trace-events
+trace-events-y += target-sparc/trace-events
+trace-events-y += target-s390x/trace-events
+trace-events-y += target-ppc/trace-events
+trace-events-y += qom/trace-events
+trace-events-y += linux-user/trace-events
+trace-events-y += qapi/trace-events
--- a/Makefile.target
+++ b/Makefile.target
@@ -11,7 +11,7 @@ $(call set-vpath, $(SRC_PATH):$(BUILD_DIR))
 ifdef CONFIG_LINUX
 QEMU_CFLAGS += -I../linux-headers
 endif
-QEMU_CFLAGS += -I.. -I$(SRC_PATH)/target/$(TARGET_BASE_ARCH) -DNEED_CPU_H
+QEMU_CFLAGS += -I.. -I$(SRC_PATH)/target-$(TARGET_BASE_ARCH) -DNEED_CPU_H

 QEMU_CFLAGS+=-I$(SRC_PATH)/include

@@ -50,7 +50,6 @@ endif

 $(QEMU_PROG).stp-installed: $(BUILD_DIR)/trace-events-all
 	$(call quiet-command,$(TRACETOOL) \
-		--group=all \
 		--format=stap \
 		--backends=$(TRACE_BACKENDS) \
 		--binary=$(bindir)/$(QEMU_PROG) \
@@ -60,7 +59,6 @@ $(QEMU_PROG).stp-installed: $(BUILD_DIR)/trace-events-all

 $(QEMU_PROG).stp: $(BUILD_DIR)/trace-events-all
 	$(call quiet-command,$(TRACETOOL) \
-		--group=all \
 		--format=stap \
 		--backends=$(TRACE_BACKENDS) \
 		--binary=$(realpath .)/$(QEMU_PROG) \
@@ -70,7 +68,6 @@ $(QEMU_PROG).stp: $(BUILD_DIR)/trace-events-all

 $(QEMU_PROG)-simpletrace.stp: $(BUILD_DIR)/trace-events-all
 	$(call quiet-command,$(TRACETOOL) \
-		--group=all \
 		--format=simpletrace-stap \
 		--backends=$(TRACE_BACKENDS) \
 		--probe-prefix=qemu.$(TARGET_TYPE).$(TARGET_NAME) \
@@ -79,7 +76,6 @@ $(QEMU_PROG)-simpletrace.stp: $(BUILD_DIR)/trace-events-all
 else
 stap:
 endif
-.PHONY: stap

 all: $(PROGS) stap

@@ -88,17 +84,19 @@ all: $(PROGS) stap

 #########################################################
 # cpu emulator library
-obj-y += exec.o
-obj-y += accel/
+obj-y = exec.o translate-all.o cpu-exec.o
+obj-y += translate-common.o
+obj-y += cpu-exec-common.o
 obj-y += tcg/tcg.o tcg/tcg-op.o tcg/optimize.o
-obj-y += tcg/tcg-common.o tcg/tcg-runtime.o
-obj-$(CONFIG_TCG_INTERPRETER) += tcg/tci.o
+obj-$(CONFIG_TCG_INTERPRETER) += tci.o
+obj-y += tcg/tcg-common.o
 obj-$(CONFIG_TCG_INTERPRETER) += disas/tci.o
 obj-y += fpu/softfloat.o
-obj-y += target/$(TARGET_BASE_ARCH)/
+obj-y += target-$(TARGET_BASE_ARCH)/
 obj-y += disas.o
+obj-y += tcg-runtime.o
 obj-$(call notempty,$(TARGET_XML_FILES)) += gdbstub-xml.o
-obj-$(call lnot,$(CONFIG_HAX)) += hax-stub.o
+obj-$(call lnot,$(CONFIG_KVM)) += kvm-stub.o

 obj-$(CONFIG_LIBDECNUMBER) += libdecnumber/decContext.o
 obj-$(CONFIG_LIBDECNUMBER) += libdecnumber/decNumber.o
@@ -116,7 +114,7 @@ QEMU_CFLAGS+=-I$(SRC_PATH)/linux-user/$(TARGET_ABI_DIR) \
             -I$(SRC_PATH)/linux-user

 obj-y += linux-user/
-obj-y += gdbstub.o thunk.o user-exec.o user-exec-stub.o
+obj-y += gdbstub.o thunk.o user-exec.o

 endif #CONFIG_LINUX_USER

@@ -129,7 +127,7 @@ QEMU_CFLAGS+=-I$(SRC_PATH)/bsd-user -I$(SRC_PATH)/bsd-user/$(TARGET_ABI_DIR) \
 			 -I$(SRC_PATH)/bsd-user/$(HOST_VARIANT_DIR)

 obj-y += bsd-user/
-obj-y += gdbstub.o user-exec.o user-exec-stub.o
+obj-y += gdbstub.o user-exec.o

 endif #CONFIG_BSD_USER

@@ -139,12 +137,19 @@ ifdef CONFIG_SOFTMMU
 obj-y += arch_init.o cpus.o monitor.o gdbstub.o balloon.o ioport.o numa.o
 obj-y += qtest.o bootdevice.o
 obj-y += hw/
-obj-y += memory.o
+obj-$(CONFIG_KVM) += kvm-all.o
+obj-y += memory.o cputlb.o
 obj-y += memory_mapping.o
 obj-y += dump.o
-obj-y += migration/ram.o
+obj-y += migration/ram.o migration/savevm.o
 LIBS := $(libs_softmmu) $(LIBS)

+# xen support
+obj-$(CONFIG_XEN) += xen-common.o
+obj-$(CONFIG_XEN_I386) += xen-hvm.o xen-mapcache.o
+obj-$(call lnot,$(CONFIG_XEN)) += xen-common-stub.o
+obj-$(call lnot,$(CONFIG_XEN_I386)) += xen-hvm-stub.o
+
 # Hardware support
 ifeq ($(TARGET_NAME), sparc64)
 obj-y += hw/sparc64/
@@ -152,7 +157,7 @@ else
 obj-y += hw/$(TARGET_BASE_ARCH)/
 endif

-GENERATED_FILES += hmp-commands.h hmp-commands-info.h
+GENERATED_HEADERS += hmp-commands.h hmp-commands-info.h

 endif # CONFIG_SOFTMMU

@@ -165,14 +170,12 @@ all-obj-y := $(obj-y)
 target-obj-y :=
 block-obj-y :=
 common-obj-y :=
-chardev-obj-y :=
 include $(SRC_PATH)/Makefile.objs
 dummy := $(call unnest-vars,,target-obj-y)
 target-obj-y-save := $(target-obj-y)
 dummy := $(call unnest-vars,.., \
               block-obj-y \
               block-obj-m \
-               chardev-obj-y \
               crypto-obj-y \
               crypto-aes-obj-y \
               qom-obj-y \
@@ -183,17 +186,15 @@ target-obj-y := $(target-obj-y-save)
 all-obj-y += $(common-obj-y)
 all-obj-y += $(target-obj-y)
 all-obj-y += $(qom-obj-y)
-all-obj-$(CONFIG_SOFTMMU) += $(block-obj-y) $(chardev-obj-y)
+all-obj-$(CONFIG_SOFTMMU) += $(block-obj-y)
 all-obj-$(CONFIG_USER_ONLY) += $(crypto-aes-obj-y)
 all-obj-$(CONFIG_SOFTMMU) += $(crypto-obj-y)
 all-obj-$(CONFIG_SOFTMMU) += $(io-obj-y)

 $(QEMU_PROG_BUILD): config-devices.mak

-COMMON_LDADDS = ../libqemuutil.a ../libqemustub.a
-
 # build either PROG or PROGW
-$(QEMU_PROG_BUILD): $(all-obj-y) $(COMMON_LDADDS)
+$(QEMU_PROG_BUILD): $(all-obj-y) ../libqemuutil.a ../libqemustub.a
 	$(call LINK, $(filter-out %.mak, $^))
 ifdef CONFIG_DARWIN
 	$(call quiet-command,Rez -append $(SRC_PATH)/pc-bios/qemu.rsrc -o $@,"REZ","$(TARGET_DIR)$@")
@@ -227,5 +228,5 @@ ifdef CONFIG_TRACE_SYSTEMTAP
 	$(INSTALL_DATA) $(QEMU_PROG)-simpletrace.stp "$(DESTDIR)$(qemu_datadir)/../systemtap/tapset/$(QEMU_PROG)-simpletrace.stp"
 endif

-GENERATED_FILES += config-target.h
-Makefile: $(GENERATED_FILES)
+GENERATED_HEADERS += config-target.h
+Makefile: $(GENERATED_HEADERS)
--- a/1
+++ b/1
@@ -45,7 +45,6 @@ of other UNIX targets. The simple steps to build QEMU are:
 Additional information can also be found online via the QEMU website:

  http://qemu-project.org/Hosts/Linux
-  http://qemu-project.org/Hosts/Mac
  http://qemu-project.org/Hosts/W32


--- a/2
+++ b/2
@@ -1 +1 @@
-2.9.50
+2.8.1
--- a/accel/accel.c
+++ b/accel/accel.c
@@ -34,6 +34,15 @@
 #include "hw/xen/xen.h"
 #include "qom/object.h"

+int tcg_tb_size;
+static bool tcg_allowed = true;
+
+static int tcg_init(MachineState *ms)
+{
+    tcg_exec_init(tcg_tb_size * 1024 * 1024);
+    return 0;
+}
+
 static const TypeInfo accel_type = {
    .name = TYPE_ACCEL,
    .parent = TYPE_OBJECT,
@@ -120,9 +129,27 @@ void configure_accelerator(MachineState *ms)
    }
 }

+
+static void tcg_accel_class_init(ObjectClass *oc, void *data)
+{
+    AccelClass *ac = ACCEL_CLASS(oc);
+    ac->name = "tcg";
+    ac->init_machine = tcg_init;
+    ac->allowed = &tcg_allowed;
+}
+
+#define TYPE_TCG_ACCEL ACCEL_CLASS_NAME("tcg")
+
+static const TypeInfo tcg_accel_type = {
+    .name = TYPE_TCG_ACCEL,
+    .parent = TYPE_ACCEL,
+    .class_init = tcg_accel_class_init,
+};
+
 static void register_accel_types(void)
 {
    type_register_static(&accel_type);
+    type_register_static(&tcg_accel_type);
 }

 type_init(register_accel_types);
--- a/accel/Makefile.objs
+++ b/accel/Makefile.objs
@@ -1,4 +0,0 @@
-obj-$(CONFIG_SOFTMMU) += accel.o
-obj-y += kvm/
-obj-y += tcg/
-obj-y += stubs/
--- a/accel/kvm/Makefile.objs
+++ b/accel/kvm/Makefile.objs
@@ -1 +0,0 @@
-obj-$(CONFIG_KVM) += kvm-all.o
--- a/accel/kvm/trace-events
+++ b/accel/kvm/trace-events
@@ -1,15 +0,0 @@
-# Trace events for debugging and performance instrumentation
-
-# kvm-all.c
-kvm_ioctl(int type, void *arg) "type 0x%x, arg %p"
-kvm_vm_ioctl(int type, void *arg) "type 0x%x, arg %p"
-kvm_vcpu_ioctl(int cpu_index, int type, void *arg) "cpu_index %d, type 0x%x, arg %p"
-kvm_run_exit(int cpu_index, uint32_t reason) "cpu_index %d, reason %d"
-kvm_device_ioctl(int fd, int type, void *arg) "dev fd %d, type 0x%x, arg %p"
-kvm_failed_reg_get(uint64_t id, const char *msg) "Warning: Unable to retrieve ONEREG %" PRIu64 " from KVM: %s"
-kvm_failed_reg_set(uint64_t id, const char *msg) "Warning: Unable to set ONEREG %" PRIu64 " to KVM: %s"
-kvm_irqchip_commit_routes(void) ""
-kvm_irqchip_add_msi_route(char *name, int vector, int virq) "dev %s vector %d virq %d"
-kvm_irqchip_update_msi_route(int virq) "Updating MSI route virq=%d"
-kvm_irqchip_release_virq(int virq) "virq %d"
-
--- a/accel/stubs/Makefile.objs
+++ b/accel/stubs/Makefile.objs
@@ -1 +0,0 @@
-obj-$(call lnot,$(CONFIG_KVM)) += kvm-stub.o
--- a/accel/tcg/Makefile.objs
+++ b/accel/tcg/Makefile.objs
@@ -1,3 +0,0 @@
-obj-$(CONFIG_SOFTMMU) += tcg-all.o
-obj-$(CONFIG_SOFTMMU) += cputlb.o
-obj-y += cpu-exec.o cpu-exec-common.o translate-all.o translate-common.o
--- a/accel/tcg/tcg-all.c
+++ b/accel/tcg/tcg-all.c
@@ -1,61 +0,0 @@
-/*
- * QEMU System Emulator, accelerator interfaces
- *
- * Copyright (c) 2003-2008 Fabrice Bellard
- * Copyright (c) 2014 Red Hat Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-#include "qemu/osdep.h"
-#include "sysemu/accel.h"
-#include "sysemu/sysemu.h"
-#include "qom/object.h"
-
-int tcg_tb_size;
-static bool tcg_allowed = true;
-
-static int tcg_init(MachineState *ms)
-{
-    tcg_exec_init(tcg_tb_size * 1024 * 1024);
-    return 0;
-}
-
-static void tcg_accel_class_init(ObjectClass *oc, void *data)
-{
-    AccelClass *ac = ACCEL_CLASS(oc);
-    ac->name = "tcg";
-    ac->init_machine = tcg_init;
-    ac->allowed = &tcg_allowed;
-}
-
-#define TYPE_TCG_ACCEL ACCEL_CLASS_NAME("tcg")
-
-static const TypeInfo tcg_accel_type = {
-    .name = TYPE_TCG_ACCEL,
-    .parent = TYPE_ACCEL,
-    .class_init = tcg_accel_class_init,
-};
-
-static void register_accel_types(void)
-{
-    type_register_static(&tcg_accel_type);
-}
-
-type_init(register_accel_types);
--- a/accel/tcg/trace-events
+++ b/accel/tcg/trace-events
@@ -1,10 +0,0 @@
-# Trace events for debugging and performance instrumentation
-
-# TCG related tracing (mostly disabled by default)
-# cpu-exec.c
-disable exec_tb(void *tb, uintptr_t pc) "tb:%p pc=0x%"PRIxPTR
-disable exec_tb_nocache(void *tb, uintptr_t pc) "tb:%p pc=0x%"PRIxPTR
-disable exec_tb_exit(void *last_tb, unsigned int flags) "tb:%p flags=%x"
-
-# translate-all.c
-translate_block(void *tb, uintptr_t pc, uint8_t *tb_code) "tb:%p, pc:0x%"PRIxPTR", tb_code:%p"
--- a/util/aio-posix.c
+++ b/util/aio-posix.c
@@ -16,10 +16,8 @@
 #include "qemu/osdep.h"
 #include "qemu-common.h"
 #include "block/block.h"
-#include "qemu/rcu_queue.h"
+#include "qemu/queue.h"
 #include "qemu/sockets.h"
-#include "qemu/cutils.h"
-#include "trace.h"
 #ifdef CONFIG_EPOLL_CREATE1
 #include <sys/epoll.h>
 #endif
@@ -29,9 +27,6 @@ struct AioHandler
    GPollFD pfd;
    IOHandler *io_read;
    IOHandler *io_write;
-    AioPollFn *io_poll;
-    IOHandler *io_poll_begin;
-    IOHandler *io_poll_end;
    int deleted;
    void *opaque;
    bool is_external;
@@ -66,7 +61,7 @@ static bool aio_epoll_try_enable(AioContext *ctx)
    AioHandler *node;
    struct epoll_event event;

-    QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
+    QLIST_FOREACH(node, &ctx->aio_handlers, node) {
        int r;
        if (node->deleted || !node->pfd.events) {
            continue;
@@ -205,61 +200,47 @@ void aio_set_fd_handler(AioContext *ctx,
                        bool is_external,
                        IOHandler *io_read,
                        IOHandler *io_write,
-                        AioPollFn *io_poll,
                        void *opaque)
 {
    AioHandler *node;
    bool is_new = false;
    bool deleted = false;

-    qemu_lockcnt_lock(&ctx->list_lock);
-
    node = find_aio_handler(ctx, fd);

    /* Are we deleting the fd handler? */
-    if (!io_read && !io_write && !io_poll) {
+    if (!io_read && !io_write) {
        if (node == NULL) {
-            qemu_lockcnt_unlock(&ctx->list_lock);
            return;
        }

        g_source_remove_poll(&ctx->source, &node->pfd);

        /* If the lock is held, just mark the node as deleted */
-        if (qemu_lockcnt_count(&ctx->list_lock)) {
+        if (ctx->walking_handlers) {
            node->deleted = 1;
            node->pfd.revents = 0;
        } else {
            /* Otherwise, delete it for real.  We can't just mark it as
-             * deleted because deleted nodes are only cleaned up while
-             * no one is walking the handlers list.
+             * deleted because deleted nodes are only cleaned up after
+             * releasing the walking_handlers lock.
             */
            QLIST_REMOVE(node, node);
            deleted = true;
        }
-
-        if (!node->io_poll) {
-            ctx->poll_disable_cnt--;
-        }
    } else {
        if (node == NULL) {
            /* Alloc and insert if it's not already there */
            node = g_new0(AioHandler, 1);
            node->pfd.fd = fd;
-            QLIST_INSERT_HEAD_RCU(&ctx->aio_handlers, node, node);
+            QLIST_INSERT_HEAD(&ctx->aio_handlers, node, node);

            g_source_add_poll(&ctx->source, &node->pfd);
            is_new = true;
-
-            ctx->poll_disable_cnt += !io_poll;
-        } else {
-            ctx->poll_disable_cnt += !io_poll - !node->io_poll;
        }
-
        /* Update handler with latest information */
        node->io_read = io_read;
        node->io_write = io_write;
-        node->io_poll = io_poll;
        node->opaque = opaque;
        node->is_external = is_external;

@@ -268,127 +249,72 @@ void aio_set_fd_handler(AioContext *ctx,
    }

    aio_epoll_update(ctx, node, is_new);
-    qemu_lockcnt_unlock(&ctx->list_lock);
    aio_notify(ctx);
-
    if (deleted) {
        g_free(node);
    }
 }

-void aio_set_fd_poll(AioContext *ctx, int fd,
-                     IOHandler *io_poll_begin,
-                     IOHandler *io_poll_end)
-{
-    AioHandler *node = find_aio_handler(ctx, fd);
-
-    if (!node) {
-        return;
-    }
-
-    node->io_poll_begin = io_poll_begin;
-    node->io_poll_end = io_poll_end;
-}
-
 void aio_set_event_notifier(AioContext *ctx,
                            EventNotifier *notifier,
                            bool is_external,
-                            EventNotifierHandler *io_read,
-                            AioPollFn *io_poll)
+                            EventNotifierHandler *io_read)
 {
-    aio_set_fd_handler(ctx, event_notifier_get_fd(notifier), is_external,
-                       (IOHandler *)io_read, NULL, io_poll, notifier);
+    aio_set_fd_handler(ctx, event_notifier_get_fd(notifier),
+                       is_external, (IOHandler *)io_read, NULL, notifier);
 }

-void aio_set_event_notifier_poll(AioContext *ctx,
-                                 EventNotifier *notifier,
-                                 EventNotifierHandler *io_poll_begin,
-                                 EventNotifierHandler *io_poll_end)
-{
-    aio_set_fd_poll(ctx, event_notifier_get_fd(notifier),
-                    (IOHandler *)io_poll_begin,
-                    (IOHandler *)io_poll_end);
-}
-
-static void poll_set_started(AioContext *ctx, bool started)
-{
-    AioHandler *node;
-
-    if (started == ctx->poll_started) {
-        return;
-    }
-
-    ctx->poll_started = started;
-
-    qemu_lockcnt_inc(&ctx->list_lock);
-    QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
-        IOHandler *fn;
-
-        if (node->deleted) {
-            continue;
-        }
-
-        if (started) {
-            fn = node->io_poll_begin;
-        } else {
-            fn = node->io_poll_end;
-        }
-
-        if (fn) {
-            fn(node->opaque);
-        }
-    }
-    qemu_lockcnt_dec(&ctx->list_lock);
-}
-
-
 bool aio_prepare(AioContext *ctx)
 {
-    /* Poll mode cannot be used with glib's event loop, disable it. */
-    poll_set_started(ctx, false);
-
    return false;
 }

 bool aio_pending(AioContext *ctx)
 {
    AioHandler *node;
-    bool result = false;

-    /*
-     * We have to walk very carefully in case aio_set_fd_handler is
-     * called while we're walking.
-     */
-    qemu_lockcnt_inc(&ctx->list_lock);
-
-    QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
+    QLIST_FOREACH(node, &ctx->aio_handlers, node) {
        int revents;

        revents = node->pfd.revents & node->pfd.events;
        if (revents & (G_IO_IN | G_IO_HUP | G_IO_ERR) && node->io_read &&
            aio_node_check(ctx, node->is_external)) {
-            result = true;
-            break;
+            return true;
        }
        if (revents & (G_IO_OUT | G_IO_ERR) && node->io_write &&
            aio_node_check(ctx, node->is_external)) {
-            result = true;
-            break;
+            return true;
        }
    }
-    qemu_lockcnt_dec(&ctx->list_lock);

-    return result;
+    return false;
 }

-static bool aio_dispatch_handlers(AioContext *ctx)
+bool aio_dispatch(AioContext *ctx)
 {
-    AioHandler *node, *tmp;
+    AioHandler *node;
    bool progress = false;

-    QLIST_FOREACH_SAFE_RCU(node, &ctx->aio_handlers, node, tmp) {
+    /*
+     * If there are callbacks left that have been queued, we need to call them.
+     * Do not call select in this case, because it is possible that the caller
+     * does not need a complete flush (as is the case for aio_poll loops).
+     */
+    if (aio_bh_poll(ctx)) {
+        progress = true;
+    }
+
+    /*
+     * We have to walk very carefully in case aio_set_fd_handler is
+     * called while we're walking.
+     */
+    node = QLIST_FIRST(&ctx->aio_handlers);
+    while (node) {
+        AioHandler *tmp;
        int revents;

+        ctx->walking_handlers++;
+
        revents = node->pfd.revents & node->pfd.events;
        node->pfd.revents = 0;

@@ -411,28 +337,23 @@ static bool aio_dispatch_handlers(AioContext *ctx)
            progress = true;
        }

-        if (node->deleted) {
-            if (qemu_lockcnt_dec_if_lock(&ctx->list_lock)) {
-                QLIST_REMOVE(node, node);
-                g_free(node);
-                qemu_lockcnt_inc_and_unlock(&ctx->list_lock);
-            }
+        tmp = node;
+        node = QLIST_NEXT(node, node);
+
+        ctx->walking_handlers--;
+
+        if (!ctx->walking_handlers && tmp->deleted) {
+            QLIST_REMOVE(tmp, node);
+            g_free(tmp);
        }
    }

+    /* Run our timers */
+    progress |= timerlistgroup_run_timers(&ctx->tlg);
+
    return progress;
 }

-void aio_dispatch(AioContext *ctx)
-{
-    qemu_lockcnt_inc(&ctx->list_lock);
-    aio_bh_poll(ctx);
-    aio_dispatch_handlers(ctx);
-    qemu_lockcnt_dec(&ctx->list_lock);
-
-    timerlistgroup_run_timers(&ctx->tlg);
-}
-
 /* These thread-local variables are used only in a small part of aio_poll
 * around the call to the poll() system call.  In particular they are not
 * used while aio_poll is performing callbacks, which makes it much easier
@@ -479,101 +400,15 @@ static void add_pollfd(AioHandler *node)
    npfd++;
 }

-static bool run_poll_handlers_once(AioContext *ctx)
-{
-    bool progress = false;
-    AioHandler *node;
-
-    QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
-        if (!node->deleted && node->io_poll &&
-            aio_node_check(ctx, node->is_external) &&
-            node->io_poll(node->opaque)) {
-            progress = true;
-        }
-
-        /* Caller handles freeing deleted nodes.  Don't do it here. */
-    }
-
-    return progress;
-}
-
-/* run_poll_handlers:
- * @ctx: the AioContext
- * @max_ns: maximum time to poll for, in nanoseconds
- *
- * Polls for a given time.
- *
- * Note that ctx->notify_me must be non-zero so this function can detect
- * aio_notify().
- *
- * Note that the caller must have incremented ctx->list_lock.
- *
- * Returns: true if progress was made, false otherwise
- */
-static bool run_poll_handlers(AioContext *ctx, int64_t max_ns)
-{
-    bool progress;
-    int64_t end_time;
-
-    assert(ctx->notify_me);
-    assert(qemu_lockcnt_count(&ctx->list_lock) > 0);
-    assert(ctx->poll_disable_cnt == 0);
-
-    trace_run_poll_handlers_begin(ctx, max_ns);
-
-    end_time = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + max_ns;
-
-    do {
-        progress = run_poll_handlers_once(ctx);
-    } while (!progress && qemu_clock_get_ns(QEMU_CLOCK_REALTIME) < end_time);
-
-    trace_run_poll_handlers_end(ctx, progress);
-
-    return progress;
-}
-
-/* try_poll_mode:
- * @ctx: the AioContext
- * @blocking: busy polling is only attempted when blocking is true
- *
- * ctx->notify_me must be non-zero so this function can detect aio_notify().
- *
- * Note that the caller must have incremented ctx->list_lock.
- *
- * Returns: true if progress was made, false otherwise
- */
-static bool try_poll_mode(AioContext *ctx, bool blocking)
-{
-    if (blocking && ctx->poll_max_ns && ctx->poll_disable_cnt == 0) {
-        /* See qemu_soonest_timeout() uint64_t hack */
-        int64_t max_ns = MIN((uint64_t)aio_compute_timeout(ctx),
-                             (uint64_t)ctx->poll_ns);
-
-        if (max_ns) {
-            poll_set_started(ctx, true);
-
-            if (run_poll_handlers(ctx, max_ns)) {
-                return true;
-            }
-        }
-    }
-
-    poll_set_started(ctx, false);
-
-    /* Even if we don't run busy polling, try polling once in case it can make
-     * progress and the caller will be able to avoid ppoll(2)/epoll_wait(2).
-     */
-    return run_poll_handlers_once(ctx);
-}
-
 bool aio_poll(AioContext *ctx, bool blocking)
 {
    AioHandler *node;
-    int i;
-    int ret = 0;
+    int i, ret;
    bool progress;
    int64_t timeout;
-    int64_t start = 0;
+
+    aio_context_acquire(ctx);
+    progress = false;

    /* aio_notify can avoid the expensive event_notifier_set if
     * everything (file descriptors, bottom halves, timers) will
@@ -586,86 +421,43 @@ bool aio_poll(AioContext *ctx, bool blocking)
        atomic_add(&ctx->notify_me, 2);
    }

-    qemu_lockcnt_inc(&ctx->list_lock);
+    ctx->walking_handlers++;

-    if (ctx->poll_max_ns) {
-        start = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
-    }
+    assert(npfd == 0);

-    progress = try_poll_mode(ctx, blocking);
-    if (!progress) {
-        assert(npfd == 0);
+    /* fill pollfds */

-        /* fill pollfds */
-
-        if (!aio_epoll_enabled(ctx)) {
-            QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
-                if (!node->deleted && node->pfd.events
-                    && aio_node_check(ctx, node->is_external)) {
-                    add_pollfd(node);
-                }
+    if (!aio_epoll_enabled(ctx)) {
+        QLIST_FOREACH(node, &ctx->aio_handlers, node) {
+            if (!node->deleted && node->pfd.events
+                && aio_node_check(ctx, node->is_external)) {
+                add_pollfd(node);
            }
        }
-
-        timeout = blocking ? aio_compute_timeout(ctx) : 0;
-
-        /* wait until next event */
-        if (aio_epoll_check_poll(ctx, pollfds, npfd, timeout)) {
-            AioHandler epoll_handler;
-
-            epoll_handler.pfd.fd = ctx->epollfd;
-            epoll_handler.pfd.events = G_IO_IN | G_IO_OUT | G_IO_HUP | G_IO_ERR;
-            npfd = 0;
-            add_pollfd(&epoll_handler);
-            ret = aio_epoll(ctx, pollfds, npfd, timeout);
-        } else  {
-            ret = qemu_poll_ns(pollfds, npfd, timeout);
-        }
    }

+    timeout = blocking ? aio_compute_timeout(ctx) : 0;
+
+    /* wait until next event */
+    if (timeout) {
+        aio_context_release(ctx);
+    }
+    if (aio_epoll_check_poll(ctx, pollfds, npfd, timeout)) {
+        AioHandler epoll_handler;
+
+        epoll_handler.pfd.fd = ctx->epollfd;
+        epoll_handler.pfd.events = G_IO_IN | G_IO_OUT | G_IO_HUP | G_IO_ERR;
+        npfd = 0;
+        add_pollfd(&epoll_handler);
+        ret = aio_epoll(ctx, pollfds, npfd, timeout);
+    } else  {
+        ret = qemu_poll_ns(pollfds, npfd, timeout);
+    }
    if (blocking) {
        atomic_sub(&ctx->notify_me, 2);
    }
-
-    /* Adjust polling time */
-    if (ctx->poll_max_ns) {
-        int64_t block_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - start;
-
-        if (block_ns <= ctx->poll_ns) {
-            /* This is the sweet spot, no adjustment needed */
-        } else if (block_ns > ctx->poll_max_ns) {
-            /* We'd have to poll for too long, poll less */
-            int64_t old = ctx->poll_ns;
-
-            if (ctx->poll_shrink) {
-                ctx->poll_ns /= ctx->poll_shrink;
-            } else {
-                ctx->poll_ns = 0;
-            }
-
-            trace_poll_shrink(ctx, old, ctx->poll_ns);
-        } else if (ctx->poll_ns < ctx->poll_max_ns &&
-                   block_ns < ctx->poll_max_ns) {
-            /* There is room to grow, poll longer */
-            int64_t old = ctx->poll_ns;
-            int64_t grow = ctx->poll_grow;
-
-            if (grow == 0) {
-                grow = 2;
-            }
-
-            if (ctx->poll_ns) {
-                ctx->poll_ns *= grow;
-            } else {
-                ctx->poll_ns = 4000; /* start polling at 4 microseconds */
-            }
-
-            if (ctx->poll_ns > ctx->poll_max_ns) {
-                ctx->poll_ns = ctx->poll_max_ns;
-            }
-
-            trace_poll_grow(ctx, old, ctx->poll_ns);
-        }
+    if (timeout) {
+        aio_context_acquire(ctx);
    }

    aio_notify_accept(ctx);
@@ -678,29 +470,20 @@ bool aio_poll(AioContext *ctx, bool blocking)
    }

    npfd = 0;
+    ctx->walking_handlers--;

-    progress |= aio_bh_poll(ctx);
-
-    if (ret > 0) {
-        progress |= aio_dispatch_handlers(ctx);
+    /* Run dispatch even if there were no readable fds to run timers */
+    if (aio_dispatch(ctx)) {
+        progress = true;
    }

-    qemu_lockcnt_dec(&ctx->list_lock);
-
-    progress |= timerlistgroup_run_timers(&ctx->tlg);
+    aio_context_release(ctx);

    return progress;
 }

 void aio_context_setup(AioContext *ctx)
 {
-    /* TODO remove this in final patch submission */
-    if (getenv("QEMU_AIO_POLL_MAX_NS")) {
-        fprintf(stderr, "The QEMU_AIO_POLL_MAX_NS environment variable has "
-                "been replaced with -object iothread,poll-max-ns=NUM\n");
-        exit(1);
-    }
-
 #ifdef CONFIG_EPOLL_CREATE1
    assert(!ctx->epollfd);
    ctx->epollfd = epoll_create1(EPOLL_CLOEXEC);
@@ -712,17 +495,3 @@ void aio_context_setup(AioContext *ctx)
    }
 #endif
 }
-
-void aio_context_set_poll_params(AioContext *ctx, int64_t max_ns,
-                                 int64_t grow, int64_t shrink, Error **errp)
-{
-    /* No thread synchronization here, it doesn't matter if an incorrect value
-     * is used once.
-     */
-    ctx->poll_max_ns = max_ns;
-    ctx->poll_ns = 0;
-    ctx->poll_grow = grow;
-    ctx->poll_shrink = shrink;
-
-    aio_notify(ctx);
-}
--- a/util/aio-win32.c
+++ b/util/aio-win32.c
@@ -20,8 +20,6 @@
 #include "block/block.h"
 #include "qemu/queue.h"
 #include "qemu/sockets.h"
-#include "qapi/error.h"
-#include "qemu/rcu_queue.h"

 struct AioHandler {
    EventNotifier *e;
@@ -40,13 +38,11 @@ void aio_set_fd_handler(AioContext *ctx,
                        bool is_external,
                        IOHandler *io_read,
                        IOHandler *io_write,
-                        AioPollFn *io_poll,
                        void *opaque)
 {
    /* fd is a SOCKET in our case */
    AioHandler *node;

-    qemu_lockcnt_lock(&ctx->list_lock);
    QLIST_FOREACH(node, &ctx->aio_handlers, node) {
        if (node->pfd.fd == fd && !node->deleted) {
            break;
@@ -56,14 +52,14 @@ void aio_set_fd_handler(AioContext *ctx,
    /* Are we deleting the fd handler? */
    if (!io_read && !io_write) {
        if (node) {
-            /* If aio_poll is in progress, just mark the node as deleted */
-            if (qemu_lockcnt_count(&ctx->list_lock)) {
+            /* If the lock is held, just mark the node as deleted */
+            if (ctx->walking_handlers) {
                node->deleted = 1;
                node->pfd.revents = 0;
            } else {
                /* Otherwise, delete it for real.  We can't just mark it as
                 * deleted because deleted nodes are only cleaned up after
-                 * releasing the list_lock.
+                 * releasing the walking_handlers lock.
                 */
                QLIST_REMOVE(node, node);
                g_free(node);
@@ -76,7 +72,7 @@ void aio_set_fd_handler(AioContext *ctx,
            /* Alloc and insert if it's not already there */
            node = g_new0(AioHandler, 1);
            node->pfd.fd = fd;
-            QLIST_INSERT_HEAD_RCU(&ctx->aio_handlers, node, node);
+            QLIST_INSERT_HEAD(&ctx->aio_handlers, node, node);
        }

        node->pfd.events = 0;
@@ -101,26 +97,16 @@ void aio_set_fd_handler(AioContext *ctx,
                       FD_CONNECT | FD_WRITE | FD_OOB);
    }

-    qemu_lockcnt_unlock(&ctx->list_lock);
    aio_notify(ctx);
 }

-void aio_set_fd_poll(AioContext *ctx, int fd,
-                     IOHandler *io_poll_begin,
-                     IOHandler *io_poll_end)
-{
-    /* Not implemented */
-}
-
 void aio_set_event_notifier(AioContext *ctx,
                            EventNotifier *e,
                            bool is_external,
-                            EventNotifierHandler *io_notify,
-                            AioPollFn *io_poll)
+                            EventNotifierHandler *io_notify)
 {
    AioHandler *node;

-    qemu_lockcnt_lock(&ctx->list_lock);
    QLIST_FOREACH(node, &ctx->aio_handlers, node) {
        if (node->e == e && !node->deleted) {
            break;
@@ -132,14 +118,14 @@ void aio_set_event_notifier(AioContext *ctx,
        if (node) {
            g_source_remove_poll(&ctx->source, &node->pfd);

-            /* aio_poll is in progress, just mark the node as deleted */
-            if (qemu_lockcnt_count(&ctx->list_lock)) {
+            /* If the lock is held, just mark the node as deleted */
+            if (ctx->walking_handlers) {
                node->deleted = 1;
                node->pfd.revents = 0;
            } else {
                /* Otherwise, delete it for real.  We can't just mark it as
                 * deleted because deleted nodes are only cleaned up after
-                 * releasing the list_lock.
+                 * releasing the walking_handlers lock.
                 */
                QLIST_REMOVE(node, node);
                g_free(node);
@@ -153,7 +139,7 @@ void aio_set_event_notifier(AioContext *ctx,
            node->pfd.fd = (uintptr_t)event_notifier_get_handle(e);
            node->pfd.events = G_IO_IN;
            node->is_external = is_external;
-            QLIST_INSERT_HEAD_RCU(&ctx->aio_handlers, node, node);
+            QLIST_INSERT_HEAD(&ctx->aio_handlers, node, node);

            g_source_add_poll(&ctx->source, &node->pfd);
        }
@@ -161,18 +147,9 @@ void aio_set_event_notifier(AioContext *ctx,
        node->io_notify = io_notify;
    }

-    qemu_lockcnt_unlock(&ctx->list_lock);
    aio_notify(ctx);
 }

-void aio_set_event_notifier_poll(AioContext *ctx,
-                                 EventNotifier *notifier,
-                                 EventNotifierHandler *io_poll_begin,
-                                 EventNotifierHandler *io_poll_end)
-{
-    /* Not implemented */
-}
-
 bool aio_prepare(AioContext *ctx)
 {
    static struct timeval tv0;
@@ -180,16 +157,10 @@ bool aio_prepare(AioContext *ctx)
    bool have_select_revents = false;
    fd_set rfds, wfds;

-    /*
-     * We have to walk very carefully in case aio_set_fd_handler is
-     * called while we're walking.
-     */
-    qemu_lockcnt_inc(&ctx->list_lock);
-
    /* fill fd sets */
    FD_ZERO(&rfds);
    FD_ZERO(&wfds);
-    QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
+    QLIST_FOREACH(node, &ctx->aio_handlers, node) {
        if (node->io_read) {
            FD_SET ((SOCKET)node->pfd.fd, &rfds);
        }
@@ -199,7 +170,7 @@ bool aio_prepare(AioContext *ctx)
    }

    if (select(0, &rfds, &wfds, NULL, &tv0) > 0) {
-        QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
+        QLIST_FOREACH(node, &ctx->aio_handlers, node) {
            node->pfd.revents = 0;
            if (FD_ISSET(node->pfd.fd, &rfds)) {
                node->pfd.revents |= G_IO_IN;
@@ -213,53 +184,45 @@ bool aio_prepare(AioContext *ctx)
        }
    }

-    qemu_lockcnt_dec(&ctx->list_lock);
    return have_select_revents;
 }

 bool aio_pending(AioContext *ctx)
 {
    AioHandler *node;
-    bool result = false;

-    /*
-     * We have to walk very carefully in case aio_set_fd_handler is
-     * called while we're walking.
-     */
-    qemu_lockcnt_inc(&ctx->list_lock);
-    QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
+    QLIST_FOREACH(node, &ctx->aio_handlers, node) {
        if (node->pfd.revents && node->io_notify) {
-            result = true;
-            break;
+            return true;
        }

        if ((node->pfd.revents & G_IO_IN) && node->io_read) {
-            result = true;
-            break;
+            return true;
        }
        if ((node->pfd.revents & G_IO_OUT) && node->io_write) {
-            result = true;
-            break;
+            return true;
        }
    }

-    qemu_lockcnt_dec(&ctx->list_lock);
-    return result;
+    return false;
 }

 static bool aio_dispatch_handlers(AioContext *ctx, HANDLE event)
 {
    AioHandler *node;
    bool progress = false;
-    AioHandler *tmp;

    /*
     * We have to walk very carefully in case aio_set_fd_handler is
     * called while we're walking.
     */
-    QLIST_FOREACH_SAFE_RCU(node, &ctx->aio_handlers, node, tmp) {
+    node = QLIST_FIRST(&ctx->aio_handlers);
+    while (node) {
+        AioHandler *tmp;
        int revents = node->pfd.revents;

+        ctx->walking_handlers++;
+
        if (!node->deleted &&
            (revents || event_notifier_get_handle(node->e) == event) &&
            node->io_notify) {
@@ -294,25 +257,28 @@ static bool aio_dispatch_handlers(AioContext *ctx, HANDLE event)
            }
        }

-        if (node->deleted) {
-            if (qemu_lockcnt_dec_if_lock(&ctx->list_lock)) {
-                QLIST_REMOVE(node, node);
-                g_free(node);
-                qemu_lockcnt_inc_and_unlock(&ctx->list_lock);
-            }
+        tmp = node;
+        node = QLIST_NEXT(node, node);
+
+        ctx->walking_handlers--;
+
+        if (!ctx->walking_handlers && tmp->deleted) {
+            QLIST_REMOVE(tmp, node);
+            g_free(tmp);
        }
    }

    return progress;
 }

-void aio_dispatch(AioContext *ctx)
+bool aio_dispatch(AioContext *ctx)
 {
-    qemu_lockcnt_inc(&ctx->list_lock);
-    aio_bh_poll(ctx);
-    aio_dispatch_handlers(ctx, INVALID_HANDLE_VALUE);
-    qemu_lockcnt_dec(&ctx->list_lock);
-    timerlistgroup_run_timers(&ctx->tlg);
+    bool progress;
+
+    progress = aio_bh_poll(ctx);
+    progress |= aio_dispatch_handlers(ctx, INVALID_HANDLE_VALUE);
+    progress |= timerlistgroup_run_timers(&ctx->tlg);
+    return progress;
 }

 bool aio_poll(AioContext *ctx, bool blocking)
@@ -323,6 +289,7 @@ bool aio_poll(AioContext *ctx, bool blocking)
    int count;
    int timeout;

+    aio_context_acquire(ctx);
    progress = false;

    /* aio_notify can avoid the expensive event_notifier_set if
@@ -336,18 +303,20 @@ bool aio_poll(AioContext *ctx, bool blocking)
        atomic_add(&ctx->notify_me, 2);
    }

-    qemu_lockcnt_inc(&ctx->list_lock);
    have_select_revents = aio_prepare(ctx);

+    ctx->walking_handlers++;
+
    /* fill fd sets */
    count = 0;
-    QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
+    QLIST_FOREACH(node, &ctx->aio_handlers, node) {
        if (!node->deleted && node->io_notify
            && aio_node_check(ctx, node->is_external)) {
            events[count++] = event_notifier_get_handle(node->e);
        }
    }

+    ctx->walking_handlers--;
    first = true;

    /* ctx->notifier is always registered.  */
@@ -363,11 +332,17 @@ bool aio_poll(AioContext *ctx, bool blocking)

        timeout = blocking && !have_select_revents
            ? qemu_timeout_ns_to_ms(aio_compute_timeout(ctx)) : 0;
+        if (timeout) {
+            aio_context_release(ctx);
+        }
        ret = WaitForMultipleObjects(count, events, FALSE, timeout);
        if (blocking) {
            assert(first);
            atomic_sub(&ctx->notify_me, 2);
        }
+        if (timeout) {
+            aio_context_acquire(ctx);
+        }

        if (first) {
            aio_notify_accept(ctx);
@@ -390,18 +365,12 @@ bool aio_poll(AioContext *ctx, bool blocking)
        progress |= aio_dispatch_handlers(ctx, event);
    } while (count > 0);

-    qemu_lockcnt_dec(&ctx->list_lock);
-
    progress |= timerlistgroup_run_timers(&ctx->tlg);
+
+    aio_context_release(ctx);
    return progress;
 }

 void aio_context_setup(AioContext *ctx)
 {
 }
-
-void aio_context_set_poll_params(AioContext *ctx, int64_t max_ns,
-                                 int64_t grow, int64_t shrink, Error **errp)
-{
-    error_setg(errp, "AioContext polling is not implemented on Windows");
-}
--- a/arch_init.c
+++ b/arch_init.c
@@ -27,7 +27,8 @@
 #include "sysemu/sysemu.h"
 #include "sysemu/arch_init.h"
 #include "hw/pci/pci.h"
-#include "hw/audio/soundhw.h"
+#include "hw/audio/audio.h"
+#include "hw/smbios/smbios.h"
 #include "qemu/config-file.h"
 #include "qemu/error-report.h"
 #include "qmp-commands.h"
@@ -63,8 +64,6 @@ int graphic_depth = 32;
 #define QEMU_ARCH QEMU_ARCH_MIPS
 #elif defined(TARGET_MOXIE)
 #define QEMU_ARCH QEMU_ARCH_MOXIE
-#elif defined(TARGET_NIOS2)
-#define QEMU_ARCH QEMU_ARCH_NIOS2
 #elif defined(TARGET_OPENRISC)
 #define QEMU_ARCH QEMU_ARCH_OPENRISC
 #elif defined(TARGET_PPC)
@@ -85,6 +84,177 @@ int graphic_depth = 32;

 const uint32_t arch_type = QEMU_ARCH;

+static struct defconfig_file {
+    const char *filename;
+    /* Indicates it is an user config file (disabled by -no-user-config) */
+    bool userconfig;
+} default_config_files[] = {
+    { CONFIG_QEMU_CONFDIR "/qemu.conf",                   true },
+    { NULL }, /* end of list */
+};
+
+int qemu_read_default_config_files(bool userconfig)
+{
+    int ret;
+    struct defconfig_file *f;
+
+    for (f = default_config_files; f->filename; f++) {
+        if (!userconfig && f->userconfig) {
+            continue;
+        }
+        ret = qemu_read_config_file(f->filename);
+        if (ret < 0 && ret != -ENOENT) {
+            return ret;
+        }
+    }
+
+    return 0;
+}
+
+struct soundhw {
+    const char *name;
+    const char *descr;
+    int enabled;
+    int isa;
+    union {
+        int (*init_isa) (ISABus *bus);
+        int (*init_pci) (PCIBus *bus);
+    } init;
+};
+
+static struct soundhw soundhw[9];
+static int soundhw_count;
+
+void isa_register_soundhw(const char *name, const char *descr,
+                          int (*init_isa)(ISABus *bus))
+{
+    assert(soundhw_count < ARRAY_SIZE(soundhw) - 1);
+    soundhw[soundhw_count].name = name;
+    soundhw[soundhw_count].descr = descr;
+    soundhw[soundhw_count].isa = 1;
+    soundhw[soundhw_count].init.init_isa = init_isa;
+    soundhw_count++;
+}
+
+void pci_register_soundhw(const char *name, const char *descr,
+                          int (*init_pci)(PCIBus *bus))
+{
+    assert(soundhw_count < ARRAY_SIZE(soundhw) - 1);
+    soundhw[soundhw_count].name = name;
+    soundhw[soundhw_count].descr = descr;
+    soundhw[soundhw_count].isa = 0;
+    soundhw[soundhw_count].init.init_pci = init_pci;
+    soundhw_count++;
+}
+
+void select_soundhw(const char *optarg)
+{
+    struct soundhw *c;
+
+    if (is_help_option(optarg)) {
+    show_valid_cards:
+
+        if (soundhw_count) {
+             printf("Valid sound card names (comma separated):\n");
+             for (c = soundhw; c->name; ++c) {
+                 printf ("%-11s %s\n", c->name, c->descr);
+             }
+             printf("\n-soundhw all will enable all of the above\n");
+        } else {
+             printf("Machine has no user-selectable audio hardware "
+                    "(it may or may not have always-present audio hardware).\n");
+        }
+        exit(!is_help_option(optarg));
+    }
+    else {
+        size_t l;
+        const char *p;
+        char *e;
+        int bad_card = 0;
+
+        if (!strcmp(optarg, "all")) {
+            for (c = soundhw; c->name; ++c) {
+                c->enabled = 1;
+            }
+            return;
+        }
+
+        p = optarg;
+        while (*p) {
+            e = strchr(p, ',');
+            l = !e ? strlen(p) : (size_t) (e - p);
+
+            for (c = soundhw; c->name; ++c) {
+                if (!strncmp(c->name, p, l) && !c->name[l]) {
+                    c->enabled = 1;
+                    break;
+                }
+            }
+
+            if (!c->name) {
+                if (l > 80) {
+                    error_report("Unknown sound card name (too big to show)");
+                }
+                else {
+                    error_report("Unknown sound card name `%.*s'",
+                                 (int) l, p);
+                }
+                bad_card = 1;
+            }
+            p += l + (e != NULL);
+        }
+
+        if (bad_card) {
+            goto show_valid_cards;
+        }
+    }
+}
+
+void audio_init(void)
+{
+    struct soundhw *c;
+    ISABus *isa_bus = (ISABus *) object_resolve_path_type("", TYPE_ISA_BUS, NULL);
+    PCIBus *pci_bus = (PCIBus *) object_resolve_path_type("", TYPE_PCI_BUS, NULL);
+
+    for (c = soundhw; c->name; ++c) {
+        if (c->enabled) {
+            if (c->isa) {
+                if (!isa_bus) {
+                    error_report("ISA bus not available for %s", c->name);
+                    exit(1);
+                }
+                c->init.init_isa(isa_bus);
+            } else {
+                if (!pci_bus) {
+                    error_report("PCI bus not available for %s", c->name);
+                    exit(1);
+                }
+                c->init.init_pci(pci_bus);
+            }
+        }
+    }
+}
+
+void do_acpitable_option(const QemuOpts *opts)
+{
+#ifdef TARGET_I386
+    Error *err = NULL;
+
+    acpi_table_add(opts, &err);
+    if (err) {
+        error_reportf_err(err, "Wrong acpi table provided: ");
+        exit(1);
+    }
+#endif
+}
+
+void do_smbios_option(QemuOpts *opts)
+{
+#ifdef TARGET_I386
+    smbios_entry_add(opts);
+#endif
+}
+
 int kvm_available(void)
 {
 #ifdef CONFIG_KVM
--- a/util/async.c
+++ b/util/async.c
@@ -1,8 +1,7 @@
 /*
- * Data plane event loop
+ * QEMU System Emulator
 *
 * Copyright (c) 2003-2008 Fabrice Bellard
- * Copyright (c) 2009-2017 QEMU contributors
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
@@ -31,8 +30,6 @@
 #include "qemu/main-loop.h"
 #include "qemu/atomic.h"
 #include "block/raw-aio.h"
-#include "qemu/coroutine_int.h"
-#include "trace.h"

 /***********************************************************/
 /* bottom halves (can be seen as timers which expire ASAP) */
@@ -56,14 +53,14 @@ void aio_bh_schedule_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque)
        .cb = cb,
        .opaque = opaque,
    };
-    qemu_lockcnt_lock(&ctx->list_lock);
+    qemu_mutex_lock(&ctx->bh_lock);
    bh->next = ctx->first_bh;
    bh->scheduled = 1;
    bh->deleted = 1;
    /* Make sure that the members are ready before putting bh into list */
    smp_wmb();
    ctx->first_bh = bh;
-    qemu_lockcnt_unlock(&ctx->list_lock);
+    qemu_mutex_unlock(&ctx->bh_lock);
    aio_notify(ctx);
 }

@@ -76,12 +73,12 @@ QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque)
        .cb = cb,
        .opaque = opaque,
    };
-    qemu_lockcnt_lock(&ctx->list_lock);
+    qemu_mutex_lock(&ctx->bh_lock);
    bh->next = ctx->first_bh;
    /* Make sure that the members are ready before putting bh into list */
    smp_wmb();
    ctx->first_bh = bh;
-    qemu_lockcnt_unlock(&ctx->list_lock);
+    qemu_mutex_unlock(&ctx->bh_lock);
    return bh;
 }

@@ -90,19 +87,19 @@ void aio_bh_call(QEMUBH *bh)
    bh->cb(bh->opaque);
 }

-/* Multiple occurrences of aio_bh_poll cannot be called concurrently.
- * The count in ctx->list_lock is incremented before the call, and is
- * not affected by the call.
- */
+/* Multiple occurrences of aio_bh_poll cannot be called concurrently */
 int aio_bh_poll(AioContext *ctx)
 {
    QEMUBH *bh, **bhp, *next;
    int ret;
-    bool deleted = false;
+
+    ctx->walking_bh++;

    ret = 0;
-    for (bh = atomic_rcu_read(&ctx->first_bh); bh; bh = next) {
-        next = atomic_rcu_read(&bh->next);
+    for (bh = ctx->first_bh; bh; bh = next) {
+        /* Make sure that fetching bh happens before accessing its members */
+        smp_read_barrier_depends();
+        next = bh->next;
        /* The atomic_xchg is paired with the one in qemu_bh_schedule.  The
         * implicit memory barrier ensures that the callback sees all writes
         * done by the scheduling thread.  It also ensures that the scheduling
@@ -117,17 +114,13 @@ int aio_bh_poll(AioContext *ctx)
            bh->idle = 0;
            aio_bh_call(bh);
        }
-        if (bh->deleted) {
-            deleted = true;
-        }
    }

+    ctx->walking_bh--;
+
    /* remove deleted bhs */
-    if (!deleted) {
-        return ret;
-    }
-
-    if (qemu_lockcnt_dec_if_lock(&ctx->list_lock)) {
+    if (!ctx->walking_bh) {
+        qemu_mutex_lock(&ctx->bh_lock);
        bhp = &ctx->first_bh;
        while (*bhp) {
            bh = *bhp;
@@ -138,8 +131,9 @@ int aio_bh_poll(AioContext *ctx)
                bhp = &bh->next;
            }
        }
-        qemu_lockcnt_inc_and_unlock(&ctx->list_lock);
+        qemu_mutex_unlock(&ctx->bh_lock);
    }
+
    return ret;
 }

@@ -193,8 +187,7 @@ aio_compute_timeout(AioContext *ctx)
    int timeout = -1;
    QEMUBH *bh;

-    for (bh = atomic_rcu_read(&ctx->first_bh); bh;
-         bh = atomic_rcu_read(&bh->next)) {
+    for (bh = ctx->first_bh; bh; bh = bh->next) {
        if (bh->scheduled) {
            if (bh->idle) {
                /* idle bottom halves will be polled at least
@@ -277,11 +270,7 @@ aio_ctx_finalize(GSource     *source)
    }
 #endif

-    assert(QSLIST_EMPTY(&ctx->scheduled_coroutines));
-    qemu_bh_delete(ctx->co_schedule_bh);
-
-    qemu_lockcnt_lock(&ctx->list_lock);
-    assert(!qemu_lockcnt_count(&ctx->list_lock));
+    qemu_mutex_lock(&ctx->bh_lock);
    while (ctx->first_bh) {
        QEMUBH *next = ctx->first_bh->next;

@@ -291,12 +280,12 @@ aio_ctx_finalize(GSource     *source)
        g_free(ctx->first_bh);
        ctx->first_bh = next;
    }
-    qemu_lockcnt_unlock(&ctx->list_lock);
+    qemu_mutex_unlock(&ctx->bh_lock);

-    aio_set_event_notifier(ctx, &ctx->notifier, false, NULL, NULL);
+    aio_set_event_notifier(ctx, &ctx->notifier, false, NULL);
    event_notifier_cleanup(&ctx->notifier);
    qemu_rec_mutex_destroy(&ctx->lock);
-    qemu_lockcnt_destroy(&ctx->list_lock);
+    qemu_mutex_destroy(&ctx->bh_lock);
    timerlistgroup_deinit(&ctx->tlg);
 }

@@ -351,7 +340,7 @@ void aio_notify_accept(AioContext *ctx)
    }
 }

-static void aio_timerlist_notify(void *opaque, QEMUClockType type)
+static void aio_timerlist_notify(void *opaque)
 {
    aio_notify(opaque);
 }
@@ -360,39 +349,6 @@ static void event_notifier_dummy_cb(EventNotifier *e)
 {
 }

-/* Returns true if aio_notify() was called (e.g. a BH was scheduled) */
-static bool event_notifier_poll(void *opaque)
-{
-    EventNotifier *e = opaque;
-    AioContext *ctx = container_of(e, AioContext, notifier);
-
-    return atomic_read(&ctx->notified);
-}
-
-static void co_schedule_bh_cb(void *opaque)
-{
-    AioContext *ctx = opaque;
-    QSLIST_HEAD(, Coroutine) straight, reversed;
-
-    QSLIST_MOVE_ATOMIC(&reversed, &ctx->scheduled_coroutines);
-    QSLIST_INIT(&straight);
-
-    while (!QSLIST_EMPTY(&reversed)) {
-        Coroutine *co = QSLIST_FIRST(&reversed);
-        QSLIST_REMOVE_HEAD(&reversed, co_scheduled_next);
-        QSLIST_INSERT_HEAD(&straight, co, co_scheduled_next);
-    }
-
-    while (!QSLIST_EMPTY(&straight)) {
-        Coroutine *co = QSLIST_FIRST(&straight);
-        QSLIST_REMOVE_HEAD(&straight, co_scheduled_next);
-        trace_aio_co_schedule_bh_cb(ctx, co);
-        aio_context_acquire(ctx);
-        qemu_coroutine_enter(co);
-        aio_context_release(ctx);
-    }
-}
-
 AioContext *aio_context_new(Error **errp)
 {
    int ret;
@@ -407,73 +363,24 @@ AioContext *aio_context_new(Error **errp)
        goto fail;
    }
    g_source_set_can_recurse(&ctx->source, true);
-    qemu_lockcnt_init(&ctx->list_lock);
-
-    ctx->co_schedule_bh = aio_bh_new(ctx, co_schedule_bh_cb, ctx);
-    QSLIST_INIT(&ctx->scheduled_coroutines);
-
    aio_set_event_notifier(ctx, &ctx->notifier,
                           false,
                           (EventNotifierHandler *)
-                           event_notifier_dummy_cb,
-                           event_notifier_poll);
+                           event_notifier_dummy_cb);
 #ifdef CONFIG_LINUX_AIO
    ctx->linux_aio = NULL;
 #endif
    ctx->thread_pool = NULL;
+    qemu_mutex_init(&ctx->bh_lock);
    qemu_rec_mutex_init(&ctx->lock);
    timerlistgroup_init(&ctx->tlg, aio_timerlist_notify, ctx);

-    ctx->poll_ns = 0;
-    ctx->poll_max_ns = 0;
-    ctx->poll_grow = 0;
-    ctx->poll_shrink = 0;
-
    return ctx;
 fail:
    g_source_destroy(&ctx->source);
    return NULL;
 }

-void aio_co_schedule(AioContext *ctx, Coroutine *co)
-{
-    trace_aio_co_schedule(ctx, co);
-    QSLIST_INSERT_HEAD_ATOMIC(&ctx->scheduled_coroutines,
-                              co, co_scheduled_next);
-    qemu_bh_schedule(ctx->co_schedule_bh);
-}
-
-void aio_co_wake(struct Coroutine *co)
-{
-    AioContext *ctx;
-
-    /* Read coroutine before co->ctx.  Matches smp_wmb in
-     * qemu_coroutine_enter.
-     */
-    smp_read_barrier_depends();
-    ctx = atomic_read(&co->ctx);
-
-    aio_co_enter(ctx, co);
-}
-
-void aio_co_enter(AioContext *ctx, struct Coroutine *co)
-{
-    if (ctx != qemu_get_current_aio_context()) {
-        aio_co_schedule(ctx, co);
-        return;
-    }
-
-    if (qemu_in_coroutine()) {
-        Coroutine *self = qemu_coroutine_self();
-        assert(self != co);
-        QSIMPLEQ_INSERT_TAIL(&self->co_queue_wakeup, co, co_queue_next);
-    } else {
-        aio_context_acquire(ctx);
-        qemu_aio_coroutine_enter(ctx, co);
-        aio_context_release(ctx);
-    }
-}
-
 void aio_context_ref(AioContext *ctx)
 {
    g_source_ref(&ctx->source);
--- a/audio/audio.c
+++ b/audio/audio.c
@@ -28,7 +28,6 @@
 #include "qemu/timer.h"
 #include "sysemu/sysemu.h"
 #include "qemu/cutils.h"
-#include "sysemu/replay.h"

 #define AUDIO_CAP "audio"
 #include "audio_int.h"
@@ -1113,7 +1112,7 @@ static int audio_is_timer_needed (void)
 static void audio_reset_timer (AudioState *s)
 {
    if (audio_is_timer_needed ()) {
-        timer_mod_anticipate_ns(s->ts,
+        timer_mod (s->ts,
            qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + conf.period.ticks);
    }
    else {
@@ -1388,7 +1387,6 @@ static void audio_run_out (AudioState *s)

        prev_rpos = hw->rpos;
        played = hw->pcm_ops->run_out (hw, live);
-        replay_audio_out(&played);
        if (audio_bug (AUDIO_FUNC, hw->rpos >= hw->samples)) {
            dolog ("hw->rpos=%d hw->samples=%d played=%d\n",
                   hw->rpos, hw->samples, played);
@@ -1452,12 +1450,9 @@ static void audio_run_in (AudioState *s)

    while ((hw = audio_pcm_hw_find_any_enabled_in (hw))) {
        SWVoiceIn *sw;
-        int captured = 0, min;
+        int captured, min;

-        if (replay_mode != REPLAY_MODE_PLAY) {
-            captured = hw->pcm_ops->run_in(hw);
-        }
-        replay_audio_in(&captured, hw->conv_buf, &hw->wpos, hw->samples);
+        captured = hw->pcm_ops->run_in (hw);

        min = audio_pcm_hw_find_min_in (hw);
        hw->total_samples_captured += captured - min;
@@ -2028,8 +2023,6 @@ void AUD_del_capture (CaptureVoiceOut *cap, void *cb_opaque)
                    sw = sw1;
                }
                QLIST_REMOVE (cap, entries);
-                g_free (cap->hw.mix_buf);
-                g_free (cap->buf);
                g_free (cap);
            }
            return;
--- a/audio/audio.h
+++ b/audio/audio.h
@@ -166,9 +166,4 @@ int wav_start_capture (CaptureState *s, const char *path, int freq,
 bool audio_is_cleaning_up(void);
 void audio_cleanup(void);

-void audio_sample_to_uint64(void *samples, int pos,
-                            uint64_t *left, uint64_t *right);
-void audio_sample_from_uint64(void *samples, int pos,
-                            uint64_t left, uint64_t right);
-
 #endif /* QEMU_AUDIO_H */
--- a/audio/mixeng.c
+++ b/audio/mixeng.c
@@ -25,7 +25,6 @@
 #include "qemu/osdep.h"
 #include "qemu-common.h"
 #include "qemu/bswap.h"
-#include "qemu/error-report.h"
 #include "audio.h"

 #define AUDIO_CAP "mixeng"
@@ -268,37 +267,6 @@ f_sample *mixeng_clip[2][2][2][3] = {
    }
 };

-
-void audio_sample_to_uint64(void *samples, int pos,
-                            uint64_t *left, uint64_t *right)
-{
-    struct st_sample *sample = samples;
-    sample += pos;
-#ifdef FLOAT_MIXENG
-    error_report(
-        "Coreaudio and floating point samples are not supported by replay yet");
-    abort();
-#else
-    *left = sample->l;
-    *right = sample->r;
-#endif
-}
-
-void audio_sample_from_uint64(void *samples, int pos,
-                            uint64_t left, uint64_t right)
-{
-    struct st_sample *sample = samples;
-    sample += pos;
-#ifdef FLOAT_MIXENG
-    error_report(
-        "Coreaudio and floating point samples are not supported by replay yet");
-    abort();
-#else
-    sample->l = left;
-    sample->r = right;
-#endif
-}
-
 /*
 * August 21, 1998
 * Copyright 1998 Fabrice Bellard.
--- a/audio/sdlaudio.c
+++ b/audio/sdlaudio.c
@@ -38,14 +38,10 @@
 #define AUDIO_CAP "sdl"
 #include "audio_int.h"

-#define USE_SEMAPHORE (SDL_MAJOR_VERSION < 2)
-
 typedef struct SDLVoiceOut {
    HWVoiceOut hw;
    int live;
-#if USE_SEMAPHORE
    int rpos;
-#endif
    int decr;
 } SDLVoiceOut;

@@ -57,10 +53,8 @@ static struct {

 static struct SDLAudioState {
    int exit;
-#if USE_SEMAPHORE
    SDL_mutex *mutex;
    SDL_sem *sem;
-#endif
    int initialized;
    bool driver_created;
 } glob_sdl;
@@ -79,45 +73,31 @@ static void GCC_FMT_ATTR (1, 2) sdl_logerr (const char *fmt, ...)

 static int sdl_lock (SDLAudioState *s, const char *forfn)
 {
-#if USE_SEMAPHORE
    if (SDL_LockMutex (s->mutex)) {
        sdl_logerr ("SDL_LockMutex for %s failed\n", forfn);
        return -1;
    }
-#else
-    SDL_LockAudio();
-#endif
-
    return 0;
 }

 static int sdl_unlock (SDLAudioState *s, const char *forfn)
 {
-#if USE_SEMAPHORE
    if (SDL_UnlockMutex (s->mutex)) {
        sdl_logerr ("SDL_UnlockMutex for %s failed\n", forfn);
        return -1;
    }
-#else
-    SDL_UnlockAudio();
-#endif
-
    return 0;
 }

 static int sdl_post (SDLAudioState *s, const char *forfn)
 {
-#if USE_SEMAPHORE
    if (SDL_SemPost (s->sem)) {
        sdl_logerr ("SDL_SemPost for %s failed\n", forfn);
        return -1;
    }
-#endif
-
    return 0;
 }

-#if USE_SEMAPHORE
 static int sdl_wait (SDLAudioState *s, const char *forfn)
 {
    if (SDL_SemWait (s->sem)) {
@@ -126,7 +106,6 @@ static int sdl_wait (SDLAudioState *s, const char *forfn)
    }
    return 0;
 }
-#endif

 static int sdl_unlock_and_post (SDLAudioState *s, const char *forfn)
 {
@@ -267,7 +246,6 @@ static void sdl_callback (void *opaque, Uint8 *buf, int len)
        int to_mix, decr;

        /* dolog ("in callback samples=%d\n", samples); */
-#if USE_SEMAPHORE
        sdl_wait (s, "sdl_callback");
        if (s->exit) {
            return;
@@ -286,11 +264,6 @@ static void sdl_callback (void *opaque, Uint8 *buf, int len)
        if (!sdl->live) {
            goto again;
        }
-#else
-        if (s->exit || !sdl->live) {
-            break;
-        }
-#endif

        /* dolog ("in callback live=%d\n", live); */
        to_mix = audio_MIN (samples, sdl->live);
@@ -301,11 +274,7 @@ static void sdl_callback (void *opaque, Uint8 *buf, int len)

            /* dolog ("in callback to_mix %d, chunk %d\n", to_mix, chunk); */
            hw->clip (buf, src, chunk);
-#if USE_SEMAPHORE
            sdl->rpos = (sdl->rpos + chunk) % hw->samples;
-#else
-            hw->rpos = (hw->rpos + chunk) % hw->samples;
-#endif
            to_mix -= chunk;
            buf += chunk << hw->info.shift;
        }
@@ -313,21 +282,12 @@ static void sdl_callback (void *opaque, Uint8 *buf, int len)
        sdl->live -= decr;
        sdl->decr += decr;

-#if USE_SEMAPHORE
    again:
        if (sdl_unlock (s, "sdl_callback")) {
            return;
        }
-#endif
    }
    /* dolog ("done len=%d\n", len); */
-
-#if (SDL_MAJOR_VERSION >= 2)
-    /* SDL2 does not clear the remaining buffer for us, so do it on our own */
-    if (samples) {
-        memset(buf, 0, samples << hw->info.shift);
-    }
-#endif
 }

 static int sdl_write_out (SWVoiceOut *sw, void *buf, int len)
@@ -355,12 +315,8 @@ static int sdl_run_out (HWVoiceOut *hw, int live)
    decr = audio_MIN (sdl->decr, live);
    sdl->decr -= decr;

-#if USE_SEMAPHORE
    sdl->live = live - decr;
    hw->rpos = sdl->rpos;
-#else
-    sdl->live = live;
-#endif

    if (sdl->live > 0) {
        sdl_unlock_and_post (s, "sdl_run_out");
@@ -449,7 +405,6 @@ static void *sdl_audio_init (void)
        return NULL;
    }

-#if USE_SEMAPHORE
    s->mutex = SDL_CreateMutex ();
    if (!s->mutex) {
        sdl_logerr ("Failed to create SDL mutex\n");
@@ -464,7 +419,6 @@ static void *sdl_audio_init (void)
        SDL_QuitSubSystem (SDL_INIT_AUDIO);
        return NULL;
    }
-#endif

    s->driver_created = true;
    return s;
@@ -474,10 +428,8 @@ static void sdl_audio_fini (void *opaque)
 {
    SDLAudioState *s = opaque;
    sdl_close (s);
-#if USE_SEMAPHORE
    SDL_DestroySemaphore (s->sem);
    SDL_DestroyMutex (s->mutex);
-#endif
    SDL_QuitSubSystem (SDL_INIT_AUDIO);
    s->driver_created = false;
 }
--- a/audio/wavcapture.c
+++ b/audio/wavcapture.c
@@ -88,7 +88,6 @@ static void wav_capture_destroy (void *opaque)
    WAVState *wav = opaque;

    AUD_del_capture (wav->cap, wav);
-    g_free (wav);
 }

 static void wav_capture_info (void *opaque)
--- a/backends/Makefile.objs
+++ b/backends/Makefile.objs
@@ -1,6 +1,10 @@
 common-obj-y += rng.o rng-egd.o
 common-obj-$(CONFIG_POSIX) += rng-random.o

+common-obj-y += msmouse.o testdev.o
+common-obj-$(CONFIG_BRLAPI) += baum.o
+baum.o-cflags := $(SDL_CFLAGS)
+
 common-obj-$(CONFIG_TPM) += tpm.o

 common-obj-y += hostmem.o hostmem-ram.o
--- a/backends/baum.c
+++ b/backends/baum.c
@@ -24,13 +24,15 @@
 #include "qemu/osdep.h"
 #include "qapi/error.h"
 #include "qemu-common.h"
-#include "chardev/char.h"
+#include "sysemu/char.h"
 #include "qemu/timer.h"
 #include "hw/usb.h"
-#include "ui/console.h"
 #include <brlapi.h>
 #include <brlapi_constants.h>
 #include <brlapi_keycodes.h>
+#ifdef CONFIG_SDL
+#include <SDL_syswm.h>
+#endif

 #if 0
 #define DPRINTF(fmt, ...) \
@@ -85,7 +87,7 @@
 #define BUF_SIZE 256

 typedef struct {
-    Chardev parent;
+    CharDriverState *chr;

    brlapi_handle_t *brlapi;
    int brlapi_fd;
@@ -98,10 +100,7 @@ typedef struct {
    uint8_t out_buf_used, out_buf_ptr;

    QEMUTimer *cellCount_timer;
-} BaumChardev;
-
-#define TYPE_CHARDEV_BRAILLE "chardev-braille"
-#define BAUM_CHARDEV(obj) OBJECT_CHECK(BaumChardev, (obj), TYPE_CHARDEV_BRAILLE)
+} BaumDriverState;

 /* Let's assume NABCC by default */
 enum way {
@@ -226,10 +225,14 @@ static const uint8_t nabcc_translation[2][256] = {
 };

 /* The guest OS has started discussing with us, finish initializing BrlAPI */
-static int baum_deferred_init(BaumChardev *baum)
+static int baum_deferred_init(BaumDriverState *baum)
 {
-    int tty = BRLAPI_TTY_DEFAULT;
-    QemuConsole *con;
+#if defined(CONFIG_SDL)
+#if SDL_COMPILEDVERSION < SDL_VERSIONNUM(2, 0, 0)
+    SDL_SysWMinfo info;
+#endif
+#endif
+    int tty;

    if (baum->deferred_init) {
        return 1;
@@ -240,12 +243,21 @@ static int baum_deferred_init(BaumChardev *baum)
        return 0;
    }

-    con = qemu_console_lookup_by_index(0);
-    if (con && qemu_console_is_graphic(con)) {
-        tty = qemu_console_get_window_id(con);
-        if (tty == -1)
-            tty = BRLAPI_TTY_DEFAULT;
+#if defined(CONFIG_SDL)
+#if SDL_COMPILEDVERSION < SDL_VERSIONNUM(2, 0, 0)
+    memset(&info, 0, sizeof(info));
+    SDL_VERSION(&info.version);
+    if (SDL_GetWMInfo(&info)) {
+        tty = info.info.x11.wmwindow;
+    } else {
+#endif
+#endif
+        tty = BRLAPI_TTY_DEFAULT;
+#if defined(CONFIG_SDL)
+#if SDL_COMPILEDVERSION < SDL_VERSIONNUM(2, 0, 0)
    }
+#endif
+#endif

    if (brlapi__enterTtyMode(baum->brlapi, tty, NULL) == -1) {
        brlapi_perror("baum: brlapi__enterTtyMode");
@@ -256,9 +268,9 @@ static int baum_deferred_init(BaumChardev *baum)
 }

 /* The serial port can receive more of our data */
-static void baum_chr_accept_input(struct Chardev *chr)
+static void baum_accept_input(struct CharDriverState *chr)
 {
-    BaumChardev *baum = BAUM_CHARDEV(chr);
+    BaumDriverState *baum = chr->opaque;
    int room, first;

    if (!baum->out_buf_used)
@@ -282,25 +294,24 @@ static void baum_chr_accept_input(struct Chardev *chr)
 }

 /* We want to send a packet */
-static void baum_write_packet(BaumChardev *baum, const uint8_t *buf, int len)
+static void baum_write_packet(BaumDriverState *baum, const uint8_t *buf, int len)
 {
-    Chardev *chr = CHARDEV(baum);
    uint8_t io_buf[1 + 2 * len], *cur = io_buf;
    int room;
    *cur++ = ESC;
    while (len--)
        if ((*cur++ = *buf++) == ESC)
            *cur++ = ESC;
-    room = qemu_chr_be_can_write(chr);
+    room = qemu_chr_be_can_write(baum->chr);
    len = cur - io_buf;
    if (len <= room) {
        /* Fits */
-        qemu_chr_be_write(chr, io_buf, len);
+        qemu_chr_be_write(baum->chr, io_buf, len);
    } else {
        int first;
        uint8_t out;
        /* Can't fit all, send what can be, and store the rest. */
-        qemu_chr_be_write(chr, io_buf, room);
+        qemu_chr_be_write(baum->chr, io_buf, room);
        len -= room;
        cur = io_buf + room;
        if (len > BUF_SIZE - baum->out_buf_used) {
@@ -325,14 +336,14 @@ static void baum_write_packet(BaumChardev *baum, const uint8_t *buf, int len)
 /* Called when the other end seems to have a wrong idea of our display size */
 static void baum_cellCount_timer_cb(void *opaque)
 {
-    BaumChardev *baum = BAUM_CHARDEV(opaque);
+    BaumDriverState *baum = opaque;
    uint8_t cell_count[] = { BAUM_RSP_CellCount, baum->x * baum->y };
    DPRINTF("Timeout waiting for DisplayData, sending cell count\n");
    baum_write_packet(baum, cell_count, sizeof(cell_count));
 }

 /* Try to interpret a whole incoming packet */
-static int baum_eat_packet(BaumChardev *baum, const uint8_t *buf, int len)
+static int baum_eat_packet(BaumDriverState *baum, const uint8_t *buf, int len)
 {
    const uint8_t *cur = buf;
    uint8_t req = 0;
@@ -473,9 +484,9 @@ static int baum_eat_packet(BaumChardev *baum, const uint8_t *buf, int len)
 }

 /* The other end is writing some data.  Store it and try to interpret */
-static int baum_chr_write(Chardev *chr, const uint8_t *buf, int len)
+static int baum_write(CharDriverState *chr, const uint8_t *buf, int len)
 {
-    BaumChardev *baum = BAUM_CHARDEV(chr);
+    BaumDriverState *baum = chr->opaque;
    int tocopy, cur, eaten, orig_len = len;

    if (!len)
@@ -514,16 +525,14 @@ static int baum_chr_write(Chardev *chr, const uint8_t *buf, int len)
 }

 /* Send the key code to the other end */
-static void baum_send_key(BaumChardev *baum, uint8_t type, uint8_t value)
-{
+static void baum_send_key(BaumDriverState *baum, uint8_t type, uint8_t value) {
    uint8_t packet[] = { type, value };
    DPRINTF("writing key %x %x\n", type, value);
    baum_write_packet(baum, packet, sizeof(packet));
 }

-static void baum_send_key2(BaumChardev *baum, uint8_t type, uint8_t value,
-                           uint8_t value2)
-{
+static void baum_send_key2(BaumDriverState *baum, uint8_t type, uint8_t value,
+                           uint8_t value2) {
    uint8_t packet[] = { type, value, value2 };
    DPRINTF("writing key %x %x\n", type, value);
    baum_write_packet(baum, packet, sizeof(packet));
@@ -532,7 +541,7 @@ static void baum_send_key2(BaumChardev *baum, uint8_t type, uint8_t value,
 /* We got some data on the BrlAPI socket */
 static void baum_chr_read(void *opaque)
 {
-    BaumChardev *baum = BAUM_CHARDEV(opaque);
+    BaumDriverState *baum = opaque;
    brlapi_keyCode_t code;
    int ret;
    if (!baum->brlapi)
@@ -616,25 +625,41 @@ static void baum_chr_read(void *opaque)
    }
 }

-static void char_braille_finalize(Object *obj)
+static void baum_free(struct CharDriverState *chr)
 {
-    BaumChardev *baum = BAUM_CHARDEV(obj);
+    BaumDriverState *baum = chr->opaque;

    timer_free(baum->cellCount_timer);
    if (baum->brlapi) {
        brlapi__closeConnection(baum->brlapi);
        g_free(baum->brlapi);
    }
+    g_free(baum);
 }

-static void baum_chr_open(Chardev *chr,
-                          ChardevBackend *backend,
-                          bool *be_opened,
-                          Error **errp)
+static CharDriverState *chr_baum_init(const char *id,
+                                      ChardevBackend *backend,
+                                      ChardevReturn *ret,
+                                      bool *be_opened,
+                                      Error **errp)
 {
-    BaumChardev *baum = BAUM_CHARDEV(chr);
+    ChardevCommon *common = backend->u.braille.data;
+    BaumDriverState *baum;
+    CharDriverState *chr;
    brlapi_handle_t *handle;

+    chr = qemu_chr_alloc(common, errp);
+    if (!chr) {
+        return NULL;
+    }
+    baum = g_malloc0(sizeof(BaumDriverState));
+    baum->chr = chr;
+
+    chr->opaque = baum;
+    chr->chr_write = baum_write;
+    chr->chr_accept_input = baum_accept_input;
+    chr->chr_free = baum_free;
+
    handle = g_malloc0(brlapi_getHandleSize());
    baum->brlapi = handle;

@@ -642,36 +667,27 @@ static void baum_chr_open(Chardev *chr,
    if (baum->brlapi_fd == -1) {
        error_setg(errp, "brlapi__openConnection: %s",
                   brlapi_strerror(brlapi_error_location()));
-        g_free(handle);
-        return;
+        goto fail_handle;
    }
    baum->deferred_init = 0;

    baum->cellCount_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, baum_cellCount_timer_cb, baum);

    qemu_set_fd_handler(baum->brlapi_fd, baum_chr_read, NULL, baum);
+
+    return chr;
+
+fail_handle:
+    g_free(handle);
+    g_free(chr);
+    g_free(baum);
+    return NULL;
 }

-static void char_braille_class_init(ObjectClass *oc, void *data)
-{
-    ChardevClass *cc = CHARDEV_CLASS(oc);
-
-    cc->open = baum_chr_open;
-    cc->chr_write = baum_chr_write;
-    cc->chr_accept_input = baum_chr_accept_input;
-}
-
-static const TypeInfo char_braille_type_info = {
-    .name = TYPE_CHARDEV_BRAILLE,
-    .parent = TYPE_CHARDEV,
-    .instance_size = sizeof(BaumChardev),
-    .instance_finalize = char_braille_finalize,
-    .class_init = char_braille_class_init,
-};
-
 static void register_types(void)
 {
-    type_register_static(&char_braille_type_info);
+    register_char_driver("braille", CHARDEV_BACKEND_KIND_BRAILLE, NULL,
+                         chr_baum_init);
 }

 type_init(register_types);
--- a/backends/cryptodev-builtin.c
+++ b/backends/cryptodev-builtin.c
@@ -94,8 +94,6 @@ static void cryptodev_builtin_init(
    backend->conf.max_size = LONG_MAX - sizeof(CryptoDevBackendSymOpInfo);
    backend->conf.max_cipher_key_len = CRYPTODEV_BUITLIN_MAX_CIPHER_KEY_LEN;
    backend->conf.max_auth_key_len = CRYPTODEV_BUITLIN_MAX_AUTH_KEY_LEN;
-
-    cryptodev_backend_set_ready(backend, true);
 }

 static int
@@ -113,42 +111,23 @@ cryptodev_builtin_get_unused_session_index(
    return -1;
 }

-#define AES_KEYSIZE_128 16
-#define AES_KEYSIZE_192 24
-#define AES_KEYSIZE_256 32
-#define AES_KEYSIZE_128_XTS AES_KEYSIZE_256
-#define AES_KEYSIZE_256_XTS 64
-
 static int
-cryptodev_builtin_get_aes_algo(uint32_t key_len, int mode, Error **errp)
+cryptodev_builtin_get_aes_algo(uint32_t key_len, Error **errp)
 {
    int algo;

-    if (key_len == AES_KEYSIZE_128) {
+    if (key_len == 128 / 8) {
        algo = QCRYPTO_CIPHER_ALG_AES_128;
-    } else if (key_len == AES_KEYSIZE_192) {
+    } else if (key_len == 192 / 8) {
        algo = QCRYPTO_CIPHER_ALG_AES_192;
-    } else if (key_len == AES_KEYSIZE_256) { /* equals AES_KEYSIZE_128_XTS */
-        if (mode == QCRYPTO_CIPHER_MODE_XTS) {
-            algo = QCRYPTO_CIPHER_ALG_AES_128;
-        } else {
-            algo = QCRYPTO_CIPHER_ALG_AES_256;
-        }
-    } else if (key_len == AES_KEYSIZE_256_XTS) {
-        if (mode == QCRYPTO_CIPHER_MODE_XTS) {
-            algo = QCRYPTO_CIPHER_ALG_AES_256;
-        } else {
-            goto err;
-        }
+    } else if (key_len == 256 / 8) {
+        algo = QCRYPTO_CIPHER_ALG_AES_256;
    } else {
-        goto err;
+        error_setg(errp, "Unsupported key length :%u", key_len);
+        return -1;
    }

    return algo;
-
-err:
-   error_setg(errp, "Unsupported key length :%u", key_len);
-   return -1;
 }

 static int cryptodev_builtin_create_cipher_session(
@@ -176,48 +155,32 @@ static int cryptodev_builtin_create_cipher_session(

    switch (sess_info->cipher_alg) {
    case VIRTIO_CRYPTO_CIPHER_AES_ECB:
-        mode = QCRYPTO_CIPHER_MODE_ECB;
        algo = cryptodev_builtin_get_aes_algo(sess_info->key_len,
-                                                    mode, errp);
+                                                          errp);
        if (algo < 0)  {
            return -1;
        }
+        mode = QCRYPTO_CIPHER_MODE_ECB;
        break;
    case VIRTIO_CRYPTO_CIPHER_AES_CBC:
-        mode = QCRYPTO_CIPHER_MODE_CBC;
        algo = cryptodev_builtin_get_aes_algo(sess_info->key_len,
-                                                    mode, errp);
+                                                          errp);
        if (algo < 0)  {
            return -1;
        }
+        mode = QCRYPTO_CIPHER_MODE_CBC;
        break;
    case VIRTIO_CRYPTO_CIPHER_AES_CTR:
+        algo = cryptodev_builtin_get_aes_algo(sess_info->key_len,
+                                                          errp);
+        if (algo < 0)  {
+            return -1;
+        }
        mode = QCRYPTO_CIPHER_MODE_CTR;
-        algo = cryptodev_builtin_get_aes_algo(sess_info->key_len,
-                                                    mode, errp);
-        if (algo < 0)  {
-            return -1;
-        }
        break;
-    case VIRTIO_CRYPTO_CIPHER_AES_XTS:
-        mode = QCRYPTO_CIPHER_MODE_XTS;
-        algo = cryptodev_builtin_get_aes_algo(sess_info->key_len,
-                                                    mode, errp);
-        if (algo < 0)  {
-            return -1;
-        }
-        break;
-    case VIRTIO_CRYPTO_CIPHER_3DES_ECB:
+    case VIRTIO_CRYPTO_CIPHER_DES_ECB:
+        algo = QCRYPTO_CIPHER_ALG_DES_RFB;
        mode = QCRYPTO_CIPHER_MODE_ECB;
-        algo = QCRYPTO_CIPHER_ALG_3DES;
-        break;
-    case VIRTIO_CRYPTO_CIPHER_3DES_CBC:
-        mode = QCRYPTO_CIPHER_MODE_CBC;
-        algo = QCRYPTO_CIPHER_ALG_3DES;
-        break;
-    case VIRTIO_CRYPTO_CIPHER_3DES_CTR:
-        mode = QCRYPTO_CIPHER_MODE_CTR;
-        algo = QCRYPTO_CIPHER_ALG_3DES;
        break;
    default:
        error_setg(errp, "Unsupported cipher alg :%u",
@@ -320,12 +283,10 @@ static int cryptodev_builtin_sym_operation(

    sess = builtin->sessions[op_info->session_id];

-    if (op_info->iv_len > 0) {
-        ret = qcrypto_cipher_setiv(sess->cipher, op_info->iv,
-                                   op_info->iv_len, errp);
-        if (ret < 0) {
-            return -VIRTIO_CRYPTO_ERR;
-        }
+    ret = qcrypto_cipher_setiv(sess->cipher, op_info->iv,
+                               op_info->iv_len, errp);
+    if (ret < 0) {
+        return -VIRTIO_CRYPTO_ERR;
    }

    if (sess->direction == VIRTIO_CRYPTO_OP_ENCRYPT) {
@@ -361,6 +322,8 @@ static void cryptodev_builtin_cleanup(
        }
    }

+    assert(queues == 1);
+
    for (i = 0; i < queues; i++) {
        cc = backend->conf.peers.ccs[i];
        if (cc) {
@@ -368,8 +331,6 @@ static void cryptodev_builtin_cleanup(
            backend->conf.peers.ccs[i] = NULL;
        }
    }
-
-    cryptodev_backend_set_ready(backend, false);
 }

 static void
--- a/backends/cryptodev.c
+++ b/backends/cryptodev.c
@@ -73,6 +73,8 @@ void cryptodev_backend_cleanup(
    if (bc->cleanup) {
        bc->cleanup(backend, errp);
    }
+
+    backend->ready = false;
 }

 int64_t cryptodev_backend_sym_create_session(
@@ -187,39 +189,14 @@ cryptodev_backend_complete(UserCreatable *uc, Error **errp)
            goto out;
        }
    }
-
+    backend->ready = true;
    return;

 out:
+    backend->ready = false;
    error_propagate(errp, local_err);
 }

-void cryptodev_backend_set_used(CryptoDevBackend *backend, bool used)
-{
-    backend->is_used = used;
-}
-
-bool cryptodev_backend_is_used(CryptoDevBackend *backend)
-{
-    return backend->is_used;
-}
-
-void cryptodev_backend_set_ready(CryptoDevBackend *backend, bool ready)
-{
-    backend->ready = ready;
-}
-
-bool cryptodev_backend_is_ready(CryptoDevBackend *backend)
-{
-    return backend->ready;
-}
-
-static bool
-cryptodev_backend_can_be_deleted(UserCreatable *uc, Error **errp)
-{
-    return !cryptodev_backend_is_used(CRYPTODEV_BACKEND(uc));
-}
-
 static void cryptodev_backend_instance_init(Object *obj)
 {
    object_property_add(obj, "queues", "int",
@@ -232,9 +209,7 @@ static void cryptodev_backend_instance_init(Object *obj)

 static void cryptodev_backend_finalize(Object *obj)
 {
-    CryptoDevBackend *backend = CRYPTODEV_BACKEND(obj);

-    cryptodev_backend_cleanup(backend, NULL);
 }

 static void
@@ -243,7 +218,6 @@ cryptodev_backend_class_init(ObjectClass *oc, void *data)
    UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc);

    ucc->complete = cryptodev_backend_complete;
-    ucc->can_be_deleted = cryptodev_backend_can_be_deleted;

    QTAILQ_INIT(&crypto_clients);
 }
--- a/backends/hostmem-file.c
+++ b/backends/hostmem-file.c
@@ -51,7 +51,7 @@ file_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
 #ifndef CONFIG_LINUX
    error_setg(errp, "-mem-path not supported on this host");
 #else
-    if (!host_memory_backend_mr_inited(backend)) {
+    if (!memory_region_size(&backend->mr)) {
        gchar *path;
        backend->force_prealloc = mem_prealloc;
        path = object_get_canonical_path(OBJECT(backend));
@@ -76,7 +76,7 @@ static void set_mem_path(Object *o, const char *str, Error **errp)
    HostMemoryBackend *backend = MEMORY_BACKEND(o);
    HostMemoryBackendFile *fb = MEMORY_BACKEND_FILE(o);

-    if (host_memory_backend_mr_inited(backend)) {
+    if (memory_region_size(&backend->mr)) {
        error_setg(errp, "cannot change property value");
        return;
    }
@@ -96,7 +96,7 @@ static void file_memory_backend_set_share(Object *o, bool value, Error **errp)
    HostMemoryBackend *backend = MEMORY_BACKEND(o);
    HostMemoryBackendFile *fb = MEMORY_BACKEND_FILE(o);

-    if (host_memory_backend_mr_inited(backend)) {
+    if (memory_region_size(&backend->mr)) {
        error_setg(errp, "cannot change property value");
        return;
    }
--- a/backends/hostmem.c
+++ b/backends/hostmem.c
@@ -45,7 +45,7 @@ host_memory_backend_set_size(Object *obj, Visitor *v, const char *name,
    Error *local_err = NULL;
    uint64_t value;

-    if (host_memory_backend_mr_inited(backend)) {
+    if (memory_region_size(&backend->mr)) {
        error_setg(&local_err, "cannot change property value");
        goto out;
    }
@@ -64,6 +64,14 @@ out:
    error_propagate(errp, local_err);
 }

+static uint16List **host_memory_append_node(uint16List **node,
+                                            unsigned long value)
+{
+     *node = g_malloc0(sizeof(**node));
+     (*node)->value = value;
+     return &(*node)->next;
+}
+
 static void
 host_memory_backend_get_host_nodes(Object *obj, Visitor *v, const char *name,
                                   void *opaque, Error **errp)
@@ -74,13 +82,12 @@ host_memory_backend_get_host_nodes(Object *obj, Visitor *v, const char *name,
    unsigned long value;

    value = find_first_bit(backend->host_nodes, MAX_NODES);
-    if (value == MAX_NODES) {
-        return;
-    }

-    *node = g_malloc0(sizeof(**node));
-    (*node)->value = value;
-    node = &(*node)->next;
+    node = host_memory_append_node(node, value);
+
+    if (value == MAX_NODES) {
+        goto out;
+    }

    do {
        value = find_next_bit(backend->host_nodes, MAX_NODES, value + 1);
@@ -88,11 +95,10 @@ host_memory_backend_get_host_nodes(Object *obj, Visitor *v, const char *name,
            break;
        }

-        *node = g_malloc0(sizeof(**node));
-        (*node)->value = value;
-        node = &(*node)->next;
+        node = host_memory_append_node(node, value);
    } while (true);

+out:
    visit_type_uint16List(v, name, &host_nodes, errp);
 }

@@ -146,7 +152,7 @@ static void host_memory_backend_set_merge(Object *obj, bool value, Error **errp)
 {
    HostMemoryBackend *backend = MEMORY_BACKEND(obj);

-    if (!host_memory_backend_mr_inited(backend)) {
+    if (!memory_region_size(&backend->mr)) {
        backend->merge = value;
        return;
    }
@@ -172,7 +178,7 @@ static void host_memory_backend_set_dump(Object *obj, bool value, Error **errp)
 {
    HostMemoryBackend *backend = MEMORY_BACKEND(obj);

-    if (!host_memory_backend_mr_inited(backend)) {
+    if (!memory_region_size(&backend->mr)) {
        backend->dump = value;
        return;
    }
@@ -208,7 +214,7 @@ static void host_memory_backend_set_prealloc(Object *obj, bool value,
        }
    }

-    if (!host_memory_backend_mr_inited(backend)) {
+    if (!memory_region_size(&backend->mr)) {
        backend->prealloc = value;
        return;
    }
@@ -218,7 +224,7 @@ static void host_memory_backend_set_prealloc(Object *obj, bool value,
        void *ptr = memory_region_get_ram_ptr(&backend->mr);
        uint64_t sz = memory_region_size(&backend->mr);

-        os_mem_prealloc(fd, ptr, sz, smp_cpus, &local_err);
+        os_mem_prealloc(fd, ptr, sz, &local_err);
        if (local_err) {
            error_propagate(errp, local_err);
            return;
@@ -237,19 +243,10 @@ static void host_memory_backend_init(Object *obj)
    backend->prealloc = mem_prealloc;
 }

-bool host_memory_backend_mr_inited(HostMemoryBackend *backend)
-{
-    /*
-     * NOTE: We forbid zero-length memory backend, so here zero means
-     * "we haven't inited the backend memory region yet".
-     */
-    return memory_region_size(&backend->mr) != 0;
-}
-
 MemoryRegion *
 host_memory_backend_get_memory(HostMemoryBackend *backend, Error **errp)
 {
-    return host_memory_backend_mr_inited(backend) ? &backend->mr : NULL;
+    return memory_region_size(&backend->mr) ? &backend->mr : NULL;
 }

 void host_memory_backend_set_mapped(HostMemoryBackend *backend, bool mapped)
@@ -331,7 +328,7 @@ host_memory_backend_memory_complete(UserCreatable *uc, Error **errp)
         */
        if (backend->prealloc) {
            os_mem_prealloc(memory_region_get_fd(&backend->mr), ptr, sz,
-                            smp_cpus, &local_err);
+                            &local_err);
            if (local_err) {
                goto out;
            }
@@ -351,24 +348,6 @@ host_memory_backend_can_be_deleted(UserCreatable *uc, Error **errp)
    }
 }

-static char *get_id(Object *o, Error **errp)
-{
-    HostMemoryBackend *backend = MEMORY_BACKEND(o);
-
-    return g_strdup(backend->id);
-}
-
-static void set_id(Object *o, const char *str, Error **errp)
-{
-    HostMemoryBackend *backend = MEMORY_BACKEND(o);
-
-    if (backend->id) {
-        error_setg(errp, "cannot change property value");
-        return;
-    }
-    backend->id = g_strdup(str);
-}
-
 static void
 host_memory_backend_class_init(ObjectClass *oc, void *data)
 {
@@ -398,13 +377,6 @@ host_memory_backend_class_init(ObjectClass *oc, void *data)
        HostMemPolicy_lookup,
        host_memory_backend_get_policy,
        host_memory_backend_set_policy, &error_abort);
-    object_class_property_add_str(oc, "id", get_id, set_id, &error_abort);
-}
-
-static void host_memory_backend_finalize(Object *o)
-{
-    HostMemoryBackend *backend = MEMORY_BACKEND(o);
-    g_free(backend->id);
 }

 static const TypeInfo host_memory_backend_info = {
@@ -415,7 +387,6 @@ static const TypeInfo host_memory_backend_info = {
    .class_init = host_memory_backend_class_init,
    .instance_size = sizeof(HostMemoryBackend),
    .instance_init = host_memory_backend_init,
-    .instance_finalize = host_memory_backend_finalize,
    .interfaces = (InterfaceInfo[]) {
        { TYPE_USER_CREATABLE },
        { }
--- a/backends/msmouse.c
+++ b/backends/msmouse.c
@@ -23,7 +23,7 @@
 */
 #include "qemu/osdep.h"
 #include "qemu-common.h"
-#include "chardev/char.h"
+#include "sysemu/char.h"
 #include "ui/console.h"
 #include "ui/input.h"

@@ -31,23 +31,18 @@
 #define MSMOUSE_HI2(n) (((n) & 0xc0) >> 6)

 typedef struct {
-    Chardev parent;
-
+    CharDriverState *chr;
    QemuInputHandlerState *hs;
    int axis[INPUT_AXIS__MAX];
    bool btns[INPUT_BUTTON__MAX];
    bool btnc[INPUT_BUTTON__MAX];
    uint8_t outbuf[32];
    int outlen;
-} MouseChardev;
+} MouseState;

-#define TYPE_CHARDEV_MSMOUSE "chardev-msmouse"
-#define MOUSE_CHARDEV(obj)                                      \
-    OBJECT_CHECK(MouseChardev, (obj), TYPE_CHARDEV_MSMOUSE)
-
-static void msmouse_chr_accept_input(Chardev *chr)
+static void msmouse_chr_accept_input(CharDriverState *chr)
 {
-    MouseChardev *mouse = MOUSE_CHARDEV(chr);
+    MouseState *mouse = chr->opaque;
    int len;

    len = qemu_chr_be_can_write(chr);
@@ -65,7 +60,7 @@ static void msmouse_chr_accept_input(Chardev *chr)
    }
 }

-static void msmouse_queue_event(MouseChardev *mouse)
+static void msmouse_queue_event(MouseState *mouse)
 {
    unsigned char bytes[4] = { 0x40, 0x00, 0x00, 0x00 };
    int dx, dy, count = 3;
@@ -102,7 +97,7 @@ static void msmouse_queue_event(MouseChardev *mouse)
 static void msmouse_input_event(DeviceState *dev, QemuConsole *src,
                                InputEvent *evt)
 {
-    MouseChardev *mouse = MOUSE_CHARDEV(dev);
+    MouseState *mouse = (MouseState *)dev;
    InputMoveEvent *move;
    InputBtnEvent *btn;

@@ -126,24 +121,24 @@ static void msmouse_input_event(DeviceState *dev, QemuConsole *src,

 static void msmouse_input_sync(DeviceState *dev)
 {
-    MouseChardev *mouse = MOUSE_CHARDEV(dev);
-    Chardev *chr = CHARDEV(dev);
+    MouseState *mouse = (MouseState *)dev;

    msmouse_queue_event(mouse);
-    msmouse_chr_accept_input(chr);
+    msmouse_chr_accept_input(mouse->chr);
 }

-static int msmouse_chr_write(struct Chardev *s, const uint8_t *buf, int len)
+static int msmouse_chr_write (struct CharDriverState *s, const uint8_t *buf, int len)
 {
    /* Ignore writes to mouse port */
    return len;
 }

-static void char_msmouse_finalize(Object *obj)
+static void msmouse_chr_free(struct CharDriverState *chr)
 {
-    MouseChardev *mouse = MOUSE_CHARDEV(obj);
+    MouseState *mouse = chr->opaque;

    qemu_input_handler_unregister(mouse->hs);
+    g_free(mouse);
 }

 static QemuInputHandler msmouse_handler = {
@@ -153,38 +148,39 @@ static QemuInputHandler msmouse_handler = {
    .sync  = msmouse_input_sync,
 };

-static void msmouse_chr_open(Chardev *chr,
-                             ChardevBackend *backend,
-                             bool *be_opened,
-                             Error **errp)
+static CharDriverState *qemu_chr_open_msmouse(const char *id,
+                                              ChardevBackend *backend,
+                                              ChardevReturn *ret,
+                                              bool *be_opened,
+                                              Error **errp)
 {
-    MouseChardev *mouse = MOUSE_CHARDEV(chr);
+    ChardevCommon *common = backend->u.msmouse.data;
+    MouseState *mouse;
+    CharDriverState *chr;

+    chr = qemu_chr_alloc(common, errp);
+    if (!chr) {
+        return NULL;
+    }
+    chr->chr_write = msmouse_chr_write;
+    chr->chr_free = msmouse_chr_free;
+    chr->chr_accept_input = msmouse_chr_accept_input;
    *be_opened = false;
+
+    mouse = g_new0(MouseState, 1);
    mouse->hs = qemu_input_handler_register((DeviceState *)mouse,
                                            &msmouse_handler);
+
+    mouse->chr = chr;
+    chr->opaque = mouse;
+
+    return chr;
 }

-static void char_msmouse_class_init(ObjectClass *oc, void *data)
-{
-    ChardevClass *cc = CHARDEV_CLASS(oc);
-
-    cc->open = msmouse_chr_open;
-    cc->chr_write = msmouse_chr_write;
-    cc->chr_accept_input = msmouse_chr_accept_input;
-}
-
-static const TypeInfo char_msmouse_type_info = {
-    .name = TYPE_CHARDEV_MSMOUSE,
-    .parent = TYPE_CHARDEV,
-    .instance_size = sizeof(MouseChardev),
-    .instance_finalize = char_msmouse_finalize,
-    .class_init = char_msmouse_class_init,
-};
-
 static void register_types(void)
 {
-    type_register_static(&char_msmouse_type_info);
+    register_char_driver("msmouse", CHARDEV_BACKEND_KIND_MSMOUSE, NULL,
+                         qemu_chr_open_msmouse);
 }

 type_init(register_types);
--- a/backends/rng-egd.c
+++ b/backends/rng-egd.c
@@ -12,7 +12,7 @@

 #include "qemu/osdep.h"
 #include "sysemu/rng.h"
-#include "chardev/char-fe.h"
+#include "sysemu/char.h"
 #include "qapi/error.h"
 #include "qapi/qmp/qerror.h"

@@ -86,7 +86,7 @@ static void rng_egd_chr_read(void *opaque, const uint8_t *buf, int size)
 static void rng_egd_opened(RngBackend *b, Error **errp)
 {
    RngEgd *s = RNG_EGD(b);
-    Chardev *chr;
+    CharDriverState *chr;

    if (s->chr_name == NULL) {
        error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
@@ -125,7 +125,7 @@ static void rng_egd_set_chardev(Object *obj, const char *value, Error **errp)
 static char *rng_egd_get_chardev(Object *obj, Error **errp)
 {
    RngEgd *s = RNG_EGD(obj);
-    Chardev *chr = qemu_chr_fe_get_driver(&s->chr);
+    CharDriverState *chr = qemu_chr_fe_get_driver(&s->chr);

    if (chr && chr->label) {
        return g_strdup(chr->label);
@@ -145,7 +145,7 @@ static void rng_egd_finalize(Object *obj)
 {
    RngEgd *s = RNG_EGD(obj);

-    qemu_chr_fe_deinit(&s->chr, false);
+    qemu_chr_fe_deinit(&s->chr);
    g_free(s->chr_name);
 }

--- a/backends/testdev.c
+++ b/backends/testdev.c
@@ -25,23 +25,18 @@
 */
 #include "qemu/osdep.h"
 #include "qemu-common.h"
-#include "chardev/char.h"
+#include "sysemu/char.h"

 #define BUF_SIZE 32

 typedef struct {
-    Chardev parent;
-
+    CharDriverState *chr;
    uint8_t in_buf[32];
    int in_buf_used;
-} TestdevChardev;
-
-#define TYPE_CHARDEV_TESTDEV "chardev-testdev"
-#define TESTDEV_CHARDEV(obj)                                    \
-    OBJECT_CHECK(TestdevChardev, (obj), TYPE_CHARDEV_TESTDEV)
+} TestdevCharState;

 /* Try to interpret a whole incoming packet */
-static int testdev_eat_packet(TestdevChardev *testdev)
+static int testdev_eat_packet(TestdevCharState *testdev)
 {
    const uint8_t *cur = testdev->in_buf;
    int len = testdev->in_buf_used;
@@ -82,9 +77,9 @@ static int testdev_eat_packet(TestdevChardev *testdev)
 }

 /* The other end is writing some data.  Store it and try to interpret */
-static int testdev_chr_write(Chardev *chr, const uint8_t *buf, int len)
+static int testdev_write(CharDriverState *chr, const uint8_t *buf, int len)
 {
-    TestdevChardev *testdev = TESTDEV_CHARDEV(chr);
+    TestdevCharState *testdev = chr->opaque;
    int tocopy, eaten, orig_len = len;

    while (len) {
@@ -107,23 +102,36 @@ static int testdev_chr_write(Chardev *chr, const uint8_t *buf, int len)
    return orig_len;
 }

-static void char_testdev_class_init(ObjectClass *oc, void *data)
+static void testdev_free(struct CharDriverState *chr)
 {
-    ChardevClass *cc = CHARDEV_CLASS(oc);
+    TestdevCharState *testdev = chr->opaque;

-    cc->chr_write = testdev_chr_write;
+    g_free(testdev);
 }

-static const TypeInfo char_testdev_type_info = {
-    .name = TYPE_CHARDEV_TESTDEV,
-    .parent = TYPE_CHARDEV,
-    .instance_size = sizeof(TestdevChardev),
-    .class_init = char_testdev_class_init,
-};
+static CharDriverState *chr_testdev_init(const char *id,
+                                         ChardevBackend *backend,
+                                         ChardevReturn *ret,
+                                         bool *be_opened,
+                                         Error **errp)
+{
+    TestdevCharState *testdev;
+    CharDriverState *chr;
+
+    testdev = g_new0(TestdevCharState, 1);
+    testdev->chr = chr = g_new0(CharDriverState, 1);
+
+    chr->opaque = testdev;
+    chr->chr_write = testdev_write;
+    chr->chr_free = testdev_free;
+
+    return chr;
+}

 static void register_types(void)
 {
-    type_register_static(&char_testdev_type_info);
+    register_char_driver("testdev", CHARDEV_BACKEND_KIND_TESTDEV, NULL,
+                         chr_testdev_init);
 }

 type_init(register_types);
--- a/backends/trace-events
+++ b/backends/trace-events
--- a/balloon.c
+++ b/balloon.c
@@ -29,7 +29,7 @@
 #include "exec/cpu-common.h"
 #include "sysemu/kvm.h"
 #include "sysemu/balloon.h"
-#include "trace-root.h"
+#include "trace.h"
 #include "qmp-commands.h"
 #include "qapi/qmp/qerror.h"
 #include "qapi/qmp/qjson.h"
--- a/block.c
+++ b/block.c
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -1,4 +1,4 @@
-block-obj-y += raw-format.o qcow.o vdi.o vmdk.o cloop.o bochs.o vpc.o vvfat.o dmg.o
+block-obj-y += raw_bsd.o qcow.o vdi.o vmdk.o cloop.o bochs.o vpc.o vvfat.o dmg.o
 block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-cache.o
 block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o
 block-obj-y += qed-check.o
@@ -6,20 +6,19 @@ block-obj-y += vhdx.o vhdx-endian.o vhdx-log.o
 block-obj-y += quorum.o
 block-obj-y += parallels.o blkdebug.o blkverify.o blkreplay.o
 block-obj-y += block-backend.o snapshot.o qapi.o
-block-obj-$(CONFIG_WIN32) += file-win32.o win32-aio.o
-block-obj-$(CONFIG_POSIX) += file-posix.o
+block-obj-$(CONFIG_WIN32) += raw-win32.o win32-aio.o
+block-obj-$(CONFIG_POSIX) += raw-posix.o
 block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
 block-obj-y += null.o mirror.o commit.o io.o
 block-obj-y += throttle-groups.o

 block-obj-y += nbd.o nbd-client.o sheepdog.o
 block-obj-$(CONFIG_LIBISCSI) += iscsi.o
-block-obj-$(if $(CONFIG_LIBISCSI),y,n) += iscsi-opts.o
 block-obj-$(CONFIG_LIBNFS) += nfs.o
 block-obj-$(CONFIG_CURL) += curl.o
 block-obj-$(CONFIG_RBD) += rbd.o
 block-obj-$(CONFIG_GLUSTERFS) += gluster.o
-block-obj-$(CONFIG_VXHS) += vxhs.o
+block-obj-$(CONFIG_ARCHIPELAGO) += archipelago.o
 block-obj-$(CONFIG_LIBSSH2) += ssh.o
 block-obj-y += accounting.o dirty-bitmap.o
 block-obj-y += write-threshold.o
@@ -39,9 +38,9 @@ rbd.o-cflags       := $(RBD_CFLAGS)
 rbd.o-libs         := $(RBD_LIBS)
 gluster.o-cflags   := $(GLUSTERFS_CFLAGS)
 gluster.o-libs     := $(GLUSTERFS_LIBS)
-vxhs.o-libs        := $(VXHS_LIBS)
 ssh.o-cflags       := $(LIBSSH2_CFLAGS)
 ssh.o-libs         := $(LIBSSH2_LIBS)
+archipelago.o-libs := $(ARCHIPELAGO_LIBS)
 block-obj-$(if $(CONFIG_BZIP2),m,n) += dmg-bz2.o
 dmg-bz2.o-libs     := $(BZIP2_LIBS)
 qcow.o-libs        := -lz
--- a/block/accounting.c
+++ b/block/accounting.c
@@ -32,19 +32,15 @@
 static QEMUClockType clock_type = QEMU_CLOCK_REALTIME;
 static const int qtest_latency_ns = NANOSECONDS_PER_SECOND / 1000;

-void block_acct_init(BlockAcctStats *stats)
-{
-    qemu_mutex_init(&stats->lock);
-    if (qtest_enabled()) {
-        clock_type = QEMU_CLOCK_VIRTUAL;
-    }
-}
-
-void block_acct_setup(BlockAcctStats *stats, bool account_invalid,
-                      bool account_failed)
+void block_acct_init(BlockAcctStats *stats, bool account_invalid,
+                     bool account_failed)
 {
    stats->account_invalid = account_invalid;
    stats->account_failed = account_failed;
+
+    if (qtest_enabled()) {
+        clock_type = QEMU_CLOCK_VIRTUAL;
+    }
 }

 void block_acct_cleanup(BlockAcctStats *stats)
@@ -53,7 +49,6 @@ void block_acct_cleanup(BlockAcctStats *stats)
    QSLIST_FOREACH_SAFE(s, &stats->intervals, entries, next) {
        g_free(s);
    }
-    qemu_mutex_destroy(&stats->lock);
 }

 void block_acct_add_interval(BlockAcctStats *stats, unsigned interval_length)
@@ -63,15 +58,12 @@ void block_acct_add_interval(BlockAcctStats *stats, unsigned interval_length)

    s = g_new0(BlockAcctTimedStats, 1);
    s->interval_length = interval_length;
-    s->stats = stats;
-    qemu_mutex_lock(&stats->lock);
    QSLIST_INSERT_HEAD(&stats->intervals, s, entries);

    for (i = 0; i < BLOCK_MAX_IOTYPE; i++) {
        timed_average_init(&s->latency[i], clock_type,
                           (uint64_t) interval_length * NANOSECONDS_PER_SECOND);
    }
-    qemu_mutex_unlock(&stats->lock);
 }

 BlockAcctTimedStats *block_acct_interval_next(BlockAcctStats *stats,
@@ -94,8 +86,7 @@ void block_acct_start(BlockAcctStats *stats, BlockAcctCookie *cookie,
    cookie->type = type;
 }

-static void block_account_one_io(BlockAcctStats *stats, BlockAcctCookie *cookie,
-                                 bool failed)
+void block_acct_done(BlockAcctStats *stats, BlockAcctCookie *cookie)
 {
    BlockAcctTimedStats *s;
    int64_t time_ns = qemu_clock_get_ns(clock_type);
@@ -107,16 +98,31 @@ static void block_account_one_io(BlockAcctStats *stats, BlockAcctCookie *cookie,

    assert(cookie->type < BLOCK_MAX_IOTYPE);

-    qemu_mutex_lock(&stats->lock);
+    stats->nr_bytes[cookie->type] += cookie->bytes;
+    stats->nr_ops[cookie->type]++;
+    stats->total_time_ns[cookie->type] += latency_ns;
+    stats->last_access_time_ns = time_ns;

-    if (failed) {
-        stats->failed_ops[cookie->type]++;
-    } else {
-        stats->nr_bytes[cookie->type] += cookie->bytes;
-        stats->nr_ops[cookie->type]++;
+    QSLIST_FOREACH(s, &stats->intervals, entries) {
+        timed_average_account(&s->latency[cookie->type], latency_ns);
    }
+}
+
+void block_acct_failed(BlockAcctStats *stats, BlockAcctCookie *cookie)
+{
+    assert(cookie->type < BLOCK_MAX_IOTYPE);
+
+    stats->failed_ops[cookie->type]++;
+
+    if (stats->account_failed) {
+        BlockAcctTimedStats *s;
+        int64_t time_ns = qemu_clock_get_ns(clock_type);
+        int64_t latency_ns = time_ns - cookie->start_time_ns;
+
+        if (qtest_enabled()) {
+            latency_ns = qtest_latency_ns;
+        }

-    if (!failed || stats->account_failed) {
        stats->total_time_ns[cookie->type] += latency_ns;
        stats->last_access_time_ns = time_ns;

@@ -124,45 +130,29 @@ static void block_account_one_io(BlockAcctStats *stats, BlockAcctCookie *cookie,
            timed_average_account(&s->latency[cookie->type], latency_ns);
        }
    }
-
-    qemu_mutex_unlock(&stats->lock);
-}
-
-void block_acct_done(BlockAcctStats *stats, BlockAcctCookie *cookie)
-{
-    block_account_one_io(stats, cookie, false);
-}
-
-void block_acct_failed(BlockAcctStats *stats, BlockAcctCookie *cookie)
-{
-    block_account_one_io(stats, cookie, true);
 }

 void block_acct_invalid(BlockAcctStats *stats, enum BlockAcctType type)
 {
    assert(type < BLOCK_MAX_IOTYPE);

-    /* block_account_one_io() updates total_time_ns[], but this one does
-     * not.  The reason is that invalid requests are accounted during their
-     * submission, therefore there's no actual I/O involved.
-     */
-    qemu_mutex_lock(&stats->lock);
+    /* block_acct_done() and block_acct_failed() update
+     * total_time_ns[], but this one does not. The reason is that
+     * invalid requests are accounted during their submission,
+     * therefore there's no actual I/O involved. */
+
    stats->invalid_ops[type]++;

    if (stats->account_invalid) {
        stats->last_access_time_ns = qemu_clock_get_ns(clock_type);
    }
-    qemu_mutex_unlock(&stats->lock);
 }

 void block_acct_merge_done(BlockAcctStats *stats, enum BlockAcctType type,
                      int num_requests)
 {
    assert(type < BLOCK_MAX_IOTYPE);
-
-    qemu_mutex_lock(&stats->lock);
    stats->merged[type] += num_requests;
-    qemu_mutex_unlock(&stats->lock);
 }

 int64_t block_acct_idle_time_ns(BlockAcctStats *stats)
@@ -177,9 +167,7 @@ double block_acct_queue_depth(BlockAcctTimedStats *stats,

    assert(type < BLOCK_MAX_IOTYPE);

-    qemu_mutex_lock(&stats->stats->lock);
    sum = timed_average_sum(&stats->latency[type], &elapsed);
-    qemu_mutex_unlock(&stats->stats->lock);

    return (double) sum / elapsed;
 }
--- a/block/archipelago.c
+++ b/block/archipelago.c
--- a/block/backup.c
+++ b/block/backup.c
@@ -24,7 +24,6 @@
 #include "qemu/cutils.h"
 #include "sysemu/block-backend.h"
 #include "qemu/bitmap.h"
-#include "qemu/error-report.h"

 #define BACKUP_CLUSTER_SIZE_DEFAULT (1 << 16)
 #define SLICE_TIME 100000000ULL /* ns */
@@ -65,7 +64,7 @@ static void coroutine_fn wait_for_overlapping_requests(BackupBlockJob *job,
        retry = false;
        QLIST_FOREACH(req, &job->inflight_reqs, list) {
            if (end > req->start && start < req->end) {
-                qemu_co_queue_wait(&req->wait_queue, NULL);
+                qemu_co_queue_wait(&req->wait_queue);
                retry = true;
                break;
            }
@@ -468,14 +467,13 @@ static void coroutine_fn backup_run(void *opaque)
        /* Both FULL and TOP SYNC_MODE's require copying.. */
        for (; start < end; start++) {
            bool error_is_read;
-            int alloced = 0;
-
            if (yield_and_check(job)) {
                break;
            }

            if (job->sync_mode == MIRROR_SYNC_MODE_TOP) {
                int i, n;
+                int alloced = 0;

                /* Check to see if these blocks are already in the
                 * backing file. */
@@ -493,7 +491,7 @@ static void coroutine_fn backup_run(void *opaque)
                                sectors_per_cluster - i, &n);
                    i += n;

-                    if (alloced || n == 0) {
+                    if (alloced == 1 || n == 0) {
                        break;
                    }
                }
@@ -505,13 +503,8 @@ static void coroutine_fn backup_run(void *opaque)
                }
            }
            /* FULL sync mode we copy the whole drive. */
-            if (alloced < 0) {
-                ret = alloced;
-            } else {
-                ret = backup_do_cow(job, start * sectors_per_cluster,
-                                    sectors_per_cluster, &error_is_read,
-                                    false);
-            }
+            ret = backup_do_cow(job, start * sectors_per_cluster,
+                                sectors_per_cluster, &error_is_read, false);
            if (ret < 0) {
                /* Depending on error action, fail now or retry cluster */
                BlockErrorAction action =
@@ -625,24 +618,14 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
        goto error;
    }

-    /* job->common.len is fixed, so we can't allow resize */
-    job = block_job_create(job_id, &backup_job_driver, bs,
-                           BLK_PERM_CONSISTENT_READ,
-                           BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE |
-                           BLK_PERM_WRITE_UNCHANGED | BLK_PERM_GRAPH_MOD,
-                           speed, creation_flags, cb, opaque, errp);
+    job = block_job_create(job_id, &backup_job_driver, bs, speed,
+                           creation_flags, cb, opaque, errp);
    if (!job) {
        goto error;
    }

-    /* The target must match the source in size, so no resize here either */
-    job->target = blk_new(BLK_PERM_WRITE,
-                          BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE |
-                          BLK_PERM_WRITE_UNCHANGED | BLK_PERM_GRAPH_MOD);
-    ret = blk_insert_bs(job->target, target, errp);
-    if (ret < 0) {
-        goto error;
-    }
+    job->target = blk_new();
+    blk_insert_bs(job->target, target);

    job->on_source_error = on_source_error;
    job->on_target_error = on_target_error;
@@ -655,16 +638,7 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
     * backup cluster size is smaller than the target cluster size. Even for
     * targets with a backing file, try to avoid COW if possible. */
    ret = bdrv_get_info(target, &bdi);
-    if (ret == -ENOTSUP && !target->backing) {
-        /* Cluster size is not defined */
-        error_report("WARNING: The target block device doesn't provide "
-                     "information about the block size and it doesn't have a "
-                     "backing file. The default block size of %u bytes is "
-                     "used. If the actual block size of the target exceeds "
-                     "this default, the backup may be unusable",
-                     BACKUP_CLUSTER_SIZE_DEFAULT);
-        job->cluster_size = BACKUP_CLUSTER_SIZE_DEFAULT;
-    } else if (ret < 0 && !target->backing) {
+    if (ret < 0 && !target->backing) {
        error_setg_errno(errp, -ret,
            "Couldn't determine the cluster size of the target image, "
            "which has no backing file");
@@ -678,9 +652,7 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
        job->cluster_size = MAX(BACKUP_CLUSTER_SIZE_DEFAULT, bdi.cluster_size);
    }

-    /* Required permissions are already taken with target's blk_new() */
-    block_job_add_bdrv(&job->common, "target", target, 0, BLK_PERM_ALL,
-                       &error_abort);
+    block_job_add_bdrv(&job->common, target);
    job->common.len = len;
    block_job_txn_add_job(txn, &job->common);

@@ -692,7 +664,7 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
    }
    if (job) {
        backup_clean(&job->common);
-        block_job_early_fail(&job->common);
+        block_job_unref(&job->common);
    }

    return NULL;
--- a/block/blkdebug.c
+++ b/block/blkdebug.c
@@ -1,7 +1,6 @@
 /*
 * Block protocol for I/O error injection
 *
- * Copyright (C) 2016-2017 Red Hat, Inc.
 * Copyright (c) 2010 Kevin Wolf <kwolf@redhat.com>
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
@@ -38,12 +37,7 @@
 typedef struct BDRVBlkdebugState {
    int state;
    int new_state;
-    uint64_t align;
-    uint64_t max_transfer;
-    uint64_t opt_write_zero;
-    uint64_t max_write_zero;
-    uint64_t opt_discard;
-    uint64_t max_discard;
+    int align;

    /* For blkdebug_refresh_filename() */
    char *config_file;
@@ -64,6 +58,10 @@ typedef struct BlkdebugSuspendedReq {
    QLIST_ENTRY(BlkdebugSuspendedReq) next;
 } BlkdebugSuspendedReq;

+static const AIOCBInfo blkdebug_aiocb_info = {
+    .aiocb_size    = sizeof(BlkdebugAIOCB),
+};
+
 enum {
    ACTION_INJECT_ERROR,
    ACTION_SET_STATE,
@@ -79,7 +77,7 @@ typedef struct BlkdebugRule {
            int error;
            int immediately;
            int once;
-            int64_t offset;
+            int64_t sector;
        } inject;
        struct {
            int new_state;
@@ -176,7 +174,6 @@ static int add_rule(void *opaque, QemuOpts *opts, Error **errp)
    const char* event_name;
    BlkdebugEvent event;
    struct BlkdebugRule *rule;
-    int64_t sector;

    /* Find the right event for the rule */
    event_name = qemu_opt_get(opts, "event");
@@ -203,9 +200,7 @@ static int add_rule(void *opaque, QemuOpts *opts, Error **errp)
        rule->options.inject.once  = qemu_opt_get_bool(opts, "once", 0);
        rule->options.inject.immediately =
            qemu_opt_get_bool(opts, "immediately", 0);
-        sector = qemu_opt_get_number(opts, "sector", -1);
-        rule->options.inject.offset =
-            sector == -1 ? -1 : sector * BDRV_SECTOR_SIZE;
+        rule->options.inject.sector = qemu_opt_get_number(opts, "sector", -1);
        break;

    case ACTION_SET_STATE:
@@ -307,7 +302,7 @@ static void blkdebug_parse_filename(const char *filename, QDict *options,
    if (!strstart(filename, "blkdebug:", &filename)) {
        /* There was no prefix; therefore, all options have to be already
           present in the QDict (except for the filename) */
-        qdict_put_str(options, "x-image", filename);
+        qdict_put(options, "x-image", qstring_from_str(filename));
        return;
    }

@@ -326,7 +321,7 @@ static void blkdebug_parse_filename(const char *filename, QDict *options,

    /* TODO Allow multi-level nesting and set file.filename here */
    filename = c + 1;
-    qdict_put_str(options, "x-image", filename);
+    qdict_put(options, "x-image", qstring_from_str(filename));
 }

 static QemuOptsList runtime_opts = {
@@ -348,31 +343,6 @@ static QemuOptsList runtime_opts = {
            .type = QEMU_OPT_SIZE,
            .help = "Required alignment in bytes",
        },
-        {
-            .name = "max-transfer",
-            .type = QEMU_OPT_SIZE,
-            .help = "Maximum transfer size in bytes",
-        },
-        {
-            .name = "opt-write-zero",
-            .type = QEMU_OPT_SIZE,
-            .help = "Optimum write zero alignment in bytes",
-        },
-        {
-            .name = "max-write-zero",
-            .type = QEMU_OPT_SIZE,
-            .help = "Maximum write zero size in bytes",
-        },
-        {
-            .name = "opt-discard",
-            .type = QEMU_OPT_SIZE,
-            .help = "Optimum discard alignment in bytes",
-        },
-        {
-            .name = "max-discard",
-            .type = QEMU_OPT_SIZE,
-            .help = "Maximum discard size in bytes",
-        },
        { /* end of list */ }
    },
 };
@@ -383,8 +353,8 @@ static int blkdebug_open(BlockDriverState *bs, QDict *options, int flags,
    BDRVBlkdebugState *s = bs->opaque;
    QemuOpts *opts;
    Error *local_err = NULL;
-    int ret;
    uint64_t align;
+    int ret;

    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
    qemu_opts_absorb_qdict(opts, options, &local_err);
@@ -413,69 +383,21 @@ static int blkdebug_open(BlockDriverState *bs, QDict *options, int flags,
        goto out;
    }

-    bs->supported_write_flags = BDRV_REQ_FUA &
-        bs->file->bs->supported_write_flags;
-    bs->supported_zero_flags = (BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP) &
-        bs->file->bs->supported_zero_flags;
-    ret = -EINVAL;
-
-    /* Set alignment overrides */
-    s->align = qemu_opt_get_size(opts, "align", 0);
-    if (s->align && (s->align >= INT_MAX || !is_power_of_2(s->align))) {
-        error_setg(errp, "Cannot meet constraints with align %" PRIu64,
-                   s->align);
-        goto out;
-    }
-    align = MAX(s->align, bs->file->bs->bl.request_alignment);
-
-    s->max_transfer = qemu_opt_get_size(opts, "max-transfer", 0);
-    if (s->max_transfer &&
-        (s->max_transfer >= INT_MAX ||
-         !QEMU_IS_ALIGNED(s->max_transfer, align))) {
-        error_setg(errp, "Cannot meet constraints with max-transfer %" PRIu64,
-                   s->max_transfer);
-        goto out;
-    }
-
-    s->opt_write_zero = qemu_opt_get_size(opts, "opt-write-zero", 0);
-    if (s->opt_write_zero &&
-        (s->opt_write_zero >= INT_MAX ||
-         !QEMU_IS_ALIGNED(s->opt_write_zero, align))) {
-        error_setg(errp, "Cannot meet constraints with opt-write-zero %" PRIu64,
-                   s->opt_write_zero);
-        goto out;
-    }
-
-    s->max_write_zero = qemu_opt_get_size(opts, "max-write-zero", 0);
-    if (s->max_write_zero &&
-        (s->max_write_zero >= INT_MAX ||
-         !QEMU_IS_ALIGNED(s->max_write_zero,
-                          MAX(s->opt_write_zero, align)))) {
-        error_setg(errp, "Cannot meet constraints with max-write-zero %" PRIu64,
-                   s->max_write_zero);
-        goto out;
-    }
-
-    s->opt_discard = qemu_opt_get_size(opts, "opt-discard", 0);
-    if (s->opt_discard &&
-        (s->opt_discard >= INT_MAX ||
-         !QEMU_IS_ALIGNED(s->opt_discard, align))) {
-        error_setg(errp, "Cannot meet constraints with opt-discard %" PRIu64,
-                   s->opt_discard);
-        goto out;
-    }
-
-    s->max_discard = qemu_opt_get_size(opts, "max-discard", 0);
-    if (s->max_discard &&
-        (s->max_discard >= INT_MAX ||
-         !QEMU_IS_ALIGNED(s->max_discard,
-                          MAX(s->opt_discard, align)))) {
-        error_setg(errp, "Cannot meet constraints with max-discard %" PRIu64,
-                   s->max_discard);
-        goto out;
+    /* Set request alignment */
+    align = qemu_opt_get_size(opts, "align", 0);
+    if (align < INT_MAX && is_power_of_2(align)) {
+        s->align = align;
+    } else if (align) {
+        error_setg(errp, "Invalid alignment");
+        ret = -EINVAL;
+        goto fail_unref;
    }

    ret = 0;
+    goto out;
+
+fail_unref:
+    bdrv_unref_child(bs, bs->file);
 out:
    if (ret < 0) {
        g_free(s->config_file);
@@ -484,163 +406,103 @@ out:
    return ret;
 }

-static int rule_check(BlockDriverState *bs, uint64_t offset, uint64_t bytes)
+static void error_callback_bh(void *opaque)
+{
+    struct BlkdebugAIOCB *acb = opaque;
+    acb->common.cb(acb->common.opaque, acb->ret);
+    qemu_aio_unref(acb);
+}
+
+static BlockAIOCB *inject_error(BlockDriverState *bs,
+    BlockCompletionFunc *cb, void *opaque, BlkdebugRule *rule)
 {
    BDRVBlkdebugState *s = bs->opaque;
-    BlkdebugRule *rule = NULL;
-    int error;
-    bool immediately;
-
-    QSIMPLEQ_FOREACH(rule, &s->active_rules, active_next) {
-        uint64_t inject_offset = rule->options.inject.offset;
-
-        if (inject_offset == -1 ||
-            (bytes && inject_offset >= offset &&
-             inject_offset < offset + bytes))
-        {
-            break;
-        }
-    }
-
-    if (!rule || !rule->options.inject.error) {
-        return 0;
-    }
-
-    immediately = rule->options.inject.immediately;
-    error = rule->options.inject.error;
+    int error = rule->options.inject.error;
+    struct BlkdebugAIOCB *acb;
+    bool immediately = rule->options.inject.immediately;

    if (rule->options.inject.once) {
        QSIMPLEQ_REMOVE(&s->active_rules, rule, BlkdebugRule, active_next);
        remove_rule(rule);
    }

-    if (!immediately) {
-        aio_co_schedule(qemu_get_current_aio_context(), qemu_coroutine_self());
-        qemu_coroutine_yield();
+    if (immediately) {
+        return NULL;
    }

-    return -error;
+    acb = qemu_aio_get(&blkdebug_aiocb_info, bs, cb, opaque);
+    acb->ret = -error;
+
+    aio_bh_schedule_oneshot(bdrv_get_aio_context(bs), error_callback_bh, acb);
+
+    return &acb->common;
 }

-static int coroutine_fn
-blkdebug_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
-                   QEMUIOVector *qiov, int flags)
+static BlockAIOCB *blkdebug_aio_readv(BlockDriverState *bs,
+    int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+    BlockCompletionFunc *cb, void *opaque)
 {
-    int err;
+    BDRVBlkdebugState *s = bs->opaque;
+    BlkdebugRule *rule = NULL;

-    /* Sanity check block layer guarantees */
-    assert(QEMU_IS_ALIGNED(offset, bs->bl.request_alignment));
-    assert(QEMU_IS_ALIGNED(bytes, bs->bl.request_alignment));
-    if (bs->bl.max_transfer) {
-        assert(bytes <= bs->bl.max_transfer);
+    QSIMPLEQ_FOREACH(rule, &s->active_rules, active_next) {
+        if (rule->options.inject.sector == -1 ||
+            (rule->options.inject.sector >= sector_num &&
+             rule->options.inject.sector < sector_num + nb_sectors)) {
+            break;
+        }
    }

-    err = rule_check(bs, offset, bytes);
-    if (err) {
-        return err;
+    if (rule && rule->options.inject.error) {
+        return inject_error(bs, cb, opaque, rule);
    }

-    return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags);
+    return bdrv_aio_readv(bs->file, sector_num, qiov, nb_sectors,
+                          cb, opaque);
 }

-static int coroutine_fn
-blkdebug_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
-                    QEMUIOVector *qiov, int flags)
+static BlockAIOCB *blkdebug_aio_writev(BlockDriverState *bs,
+    int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+    BlockCompletionFunc *cb, void *opaque)
 {
-    int err;
+    BDRVBlkdebugState *s = bs->opaque;
+    BlkdebugRule *rule = NULL;

-    /* Sanity check block layer guarantees */
-    assert(QEMU_IS_ALIGNED(offset, bs->bl.request_alignment));
-    assert(QEMU_IS_ALIGNED(bytes, bs->bl.request_alignment));
-    if (bs->bl.max_transfer) {
-        assert(bytes <= bs->bl.max_transfer);
+    QSIMPLEQ_FOREACH(rule, &s->active_rules, active_next) {
+        if (rule->options.inject.sector == -1 ||
+            (rule->options.inject.sector >= sector_num &&
+             rule->options.inject.sector < sector_num + nb_sectors)) {
+            break;
+        }
    }

-    err = rule_check(bs, offset, bytes);
-    if (err) {
-        return err;
+    if (rule && rule->options.inject.error) {
+        return inject_error(bs, cb, opaque, rule);
    }

-    return bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
+    return bdrv_aio_writev(bs->file, sector_num, qiov, nb_sectors,
+                           cb, opaque);
 }

-static int blkdebug_co_flush(BlockDriverState *bs)
+static BlockAIOCB *blkdebug_aio_flush(BlockDriverState *bs,
+    BlockCompletionFunc *cb, void *opaque)
 {
-    int err = rule_check(bs, 0, 0);
+    BDRVBlkdebugState *s = bs->opaque;
+    BlkdebugRule *rule = NULL;

-    if (err) {
-        return err;
+    QSIMPLEQ_FOREACH(rule, &s->active_rules, active_next) {
+        if (rule->options.inject.sector == -1) {
+            break;
+        }
    }

-    return bdrv_co_flush(bs->file->bs);
+    if (rule && rule->options.inject.error) {
+        return inject_error(bs, cb, opaque, rule);
+    }
+
+    return bdrv_aio_flush(bs->file->bs, cb, opaque);
 }

-static int coroutine_fn blkdebug_co_pwrite_zeroes(BlockDriverState *bs,
-                                                  int64_t offset, int count,
-                                                  BdrvRequestFlags flags)
-{
-    uint32_t align = MAX(bs->bl.request_alignment,
-                         bs->bl.pwrite_zeroes_alignment);
-    int err;
-
-    /* Only pass through requests that are larger than requested
-     * preferred alignment (so that we test the fallback to writes on
-     * unaligned portions), and check that the block layer never hands
-     * us anything unaligned that crosses an alignment boundary.  */
-    if (count < align) {
-        assert(QEMU_IS_ALIGNED(offset, align) ||
-               QEMU_IS_ALIGNED(offset + count, align) ||
-               DIV_ROUND_UP(offset, align) ==
-               DIV_ROUND_UP(offset + count, align));
-        return -ENOTSUP;
-    }
-    assert(QEMU_IS_ALIGNED(offset, align));
-    assert(QEMU_IS_ALIGNED(count, align));
-    if (bs->bl.max_pwrite_zeroes) {
-        assert(count <= bs->bl.max_pwrite_zeroes);
-    }
-
-    err = rule_check(bs, offset, count);
-    if (err) {
-        return err;
-    }
-
-    return bdrv_co_pwrite_zeroes(bs->file, offset, count, flags);
-}
-
-static int coroutine_fn blkdebug_co_pdiscard(BlockDriverState *bs,
-                                             int64_t offset, int count)
-{
-    uint32_t align = bs->bl.pdiscard_alignment;
-    int err;
-
-    /* Only pass through requests that are larger than requested
-     * minimum alignment, and ensure that unaligned requests do not
-     * cross optimum discard boundaries. */
-    if (count < bs->bl.request_alignment) {
-        assert(QEMU_IS_ALIGNED(offset, align) ||
-               QEMU_IS_ALIGNED(offset + count, align) ||
-               DIV_ROUND_UP(offset, align) ==
-               DIV_ROUND_UP(offset + count, align));
-        return -ENOTSUP;
-    }
-    assert(QEMU_IS_ALIGNED(offset, bs->bl.request_alignment));
-    assert(QEMU_IS_ALIGNED(count, bs->bl.request_alignment));
-    if (align && count >= align) {
-        assert(QEMU_IS_ALIGNED(offset, align));
-        assert(QEMU_IS_ALIGNED(count, align));
-    }
-    if (bs->bl.max_pdiscard) {
-        assert(count <= bs->bl.max_pdiscard);
-    }
-
-    err = rule_check(bs, offset, count);
-    if (err) {
-        return err;
-    }
-
-    return bdrv_co_pdiscard(bs->file->bs, offset, count);
-}

 static void blkdebug_close(BlockDriverState *bs)
 {
@@ -812,9 +674,9 @@ static int64_t blkdebug_getlength(BlockDriverState *bs)
    return bdrv_getlength(bs->file->bs);
 }

-static int blkdebug_truncate(BlockDriverState *bs, int64_t offset, Error **errp)
+static int blkdebug_truncate(BlockDriverState *bs, int64_t offset)
 {
-    return bdrv_truncate(bs->file, offset, errp);
+    return bdrv_truncate(bs->file->bs, offset);
 }

 static void blkdebug_refresh_filename(BlockDriverState *bs, QDict *options)
@@ -846,10 +708,10 @@ static void blkdebug_refresh_filename(BlockDriverState *bs, QDict *options)
    }

    opts = qdict_new();
-    qdict_put_str(opts, "driver", "blkdebug");
+    qdict_put_obj(opts, "driver", QOBJECT(qstring_from_str("blkdebug")));

    QINCREF(bs->file->bs->full_open_options);
-    qdict_put(opts, "image", bs->file->bs->full_open_options);
+    qdict_put_obj(opts, "image", QOBJECT(bs->file->bs->full_open_options));

    for (e = qdict_first(options); e; e = qdict_next(options, e)) {
        if (strcmp(qdict_entry_key(e), "x-image")) {
@@ -868,21 +730,6 @@ static void blkdebug_refresh_limits(BlockDriverState *bs, Error **errp)
    if (s->align) {
        bs->bl.request_alignment = s->align;
    }
-    if (s->max_transfer) {
-        bs->bl.max_transfer = s->max_transfer;
-    }
-    if (s->opt_write_zero) {
-        bs->bl.pwrite_zeroes_alignment = s->opt_write_zero;
-    }
-    if (s->max_write_zero) {
-        bs->bl.max_pwrite_zeroes = s->max_write_zero;
-    }
-    if (s->opt_discard) {
-        bs->bl.pdiscard_alignment = s->opt_discard;
-    }
-    if (s->max_discard) {
-        bs->bl.max_pdiscard = s->max_discard;
-    }
 }

 static int blkdebug_reopen_prepare(BDRVReopenState *reopen_state,
@@ -900,18 +747,14 @@ static BlockDriver bdrv_blkdebug = {
    .bdrv_file_open         = blkdebug_open,
    .bdrv_close             = blkdebug_close,
    .bdrv_reopen_prepare    = blkdebug_reopen_prepare,
-    .bdrv_child_perm        = bdrv_filter_default_perms,
-
    .bdrv_getlength         = blkdebug_getlength,
    .bdrv_truncate          = blkdebug_truncate,
    .bdrv_refresh_filename  = blkdebug_refresh_filename,
    .bdrv_refresh_limits    = blkdebug_refresh_limits,

-    .bdrv_co_preadv         = blkdebug_co_preadv,
-    .bdrv_co_pwritev        = blkdebug_co_pwritev,
-    .bdrv_co_flush_to_disk  = blkdebug_co_flush,
-    .bdrv_co_pwrite_zeroes  = blkdebug_co_pwrite_zeroes,
-    .bdrv_co_pdiscard       = blkdebug_co_pdiscard,
+    .bdrv_aio_readv         = blkdebug_aio_readv,
+    .bdrv_aio_writev        = blkdebug_aio_writev,
+    .bdrv_aio_flush         = blkdebug_aio_flush,

    .bdrv_debug_event           = blkdebug_debug_event,
    .bdrv_debug_breakpoint      = blkdebug_debug_breakpoint,
--- a/block/blkreplay.c
+++ b/block/blkreplay.c
@@ -37,6 +37,9 @@ static int blkreplay_open(BlockDriverState *bs, QDict *options, int flags,

    ret = 0;
 fail:
+    if (ret < 0) {
+        bdrv_unref_child(bs, bs->file);
+    }
    return ret;
 }

@@ -57,7 +60,7 @@ static int64_t blkreplay_getlength(BlockDriverState *bs)
 static void blkreplay_bh_cb(void *opaque)
 {
    Request *req = opaque;
-    aio_co_wake(req->co);
+    qemu_coroutine_enter(req->co);
    qemu_bh_delete(req->bh);
    g_free(req);
 }
@@ -134,7 +137,6 @@ static BlockDriver bdrv_blkreplay = {

    .bdrv_file_open         = blkreplay_open,
    .bdrv_close             = blkreplay_close,
-    .bdrv_child_perm        = bdrv_filter_default_perms,
    .bdrv_getlength         = blkreplay_getlength,

    .bdrv_co_preadv         = blkreplay_co_preadv,
--- a/block/blkverify.c
+++ b/block/blkverify.c
@@ -19,36 +19,38 @@ typedef struct {
    BdrvChild *test_file;
 } BDRVBlkverifyState;

-typedef struct BlkverifyRequest {
-    Coroutine *co;
-    BlockDriverState *bs;
+typedef struct BlkverifyAIOCB BlkverifyAIOCB;
+struct BlkverifyAIOCB {
+    BlockAIOCB common;

    /* Request metadata */
    bool is_write;
-    uint64_t offset;
-    uint64_t bytes;
-    int flags;
-
-    int (*request_fn)(BdrvChild *, int64_t, unsigned int, QEMUIOVector *,
-                      BdrvRequestFlags);
-
-    int ret;                    /* test image result */
-    int raw_ret;                /* raw image result */
+    int64_t sector_num;
+    int nb_sectors;

+    int ret;                    /* first completed request's result */
    unsigned int done;          /* completion counter */

    QEMUIOVector *qiov;         /* user I/O vector */
-    QEMUIOVector *raw_qiov;     /* cloned I/O vector for raw file */
-} BlkverifyRequest;
+    QEMUIOVector raw_qiov;      /* cloned I/O vector for raw file */
+    void *buf;                  /* buffer for raw file I/O */

-static void GCC_FMT_ATTR(2, 3) blkverify_err(BlkverifyRequest *r,
+    void (*verify)(BlkverifyAIOCB *acb);
+};
+
+static const AIOCBInfo blkverify_aiocb_info = {
+    .aiocb_size         = sizeof(BlkverifyAIOCB),
+};
+
+static void GCC_FMT_ATTR(2, 3) blkverify_err(BlkverifyAIOCB *acb,
                                             const char *fmt, ...)
 {
    va_list ap;

    va_start(ap, fmt);
-    fprintf(stderr, "blkverify: %s offset=%" PRId64 " bytes=%" PRId64 " ",
-            r->is_write ? "write" : "read", r->offset, r->bytes);
+    fprintf(stderr, "blkverify: %s sector_num=%" PRId64 " nb_sectors=%d ",
+            acb->is_write ? "write" : "read", acb->sector_num,
+            acb->nb_sectors);
    vfprintf(stderr, fmt, ap);
    fprintf(stderr, "\n");
    va_end(ap);
@@ -67,7 +69,7 @@ static void blkverify_parse_filename(const char *filename, QDict *options,
    if (!strstart(filename, "blkverify:", &filename)) {
        /* There was no prefix; therefore, all options have to be already
           present in the QDict (except for the filename) */
-        qdict_put_str(options, "x-image", filename);
+        qdict_put(options, "x-image", qstring_from_str(filename));
        return;
    }

@@ -84,7 +86,7 @@ static void blkverify_parse_filename(const char *filename, QDict *options,

    /* TODO Allow multi-level nesting and set file.filename here */
    filename = c + 1;
-    qdict_put_str(options, "x-image", filename);
+    qdict_put(options, "x-image", qstring_from_str(filename));
 }

 static QemuOptsList runtime_opts = {
@@ -142,6 +144,9 @@ static int blkverify_open(BlockDriverState *bs, QDict *options, int flags,

    ret = 0;
 fail:
+    if (ret < 0) {
+        bdrv_unref_child(bs, bs->file);
+    }
    qemu_opts_del(opts);
    return ret;
 }
@@ -161,106 +166,113 @@ static int64_t blkverify_getlength(BlockDriverState *bs)
    return bdrv_getlength(s->test_file->bs);
 }

-static void coroutine_fn blkverify_do_test_req(void *opaque)
+static BlkverifyAIOCB *blkverify_aio_get(BlockDriverState *bs, bool is_write,
+                                         int64_t sector_num, QEMUIOVector *qiov,
+                                         int nb_sectors,
+                                         BlockCompletionFunc *cb,
+                                         void *opaque)
 {
-    BlkverifyRequest *r = opaque;
-    BDRVBlkverifyState *s = r->bs->opaque;
+    BlkverifyAIOCB *acb = qemu_aio_get(&blkverify_aiocb_info, bs, cb, opaque);

-    r->ret = r->request_fn(s->test_file, r->offset, r->bytes, r->qiov,
-                           r->flags);
-    r->done++;
-    qemu_coroutine_enter_if_inactive(r->co);
+    acb->is_write = is_write;
+    acb->sector_num = sector_num;
+    acb->nb_sectors = nb_sectors;
+    acb->ret = -EINPROGRESS;
+    acb->done = 0;
+    acb->qiov = qiov;
+    acb->buf = NULL;
+    acb->verify = NULL;
+    return acb;
 }

-static void coroutine_fn blkverify_do_raw_req(void *opaque)
+static void blkverify_aio_bh(void *opaque)
 {
-    BlkverifyRequest *r = opaque;
+    BlkverifyAIOCB *acb = opaque;

-    r->raw_ret = r->request_fn(r->bs->file, r->offset, r->bytes, r->raw_qiov,
-                               r->flags);
-    r->done++;
-    qemu_coroutine_enter_if_inactive(r->co);
-}
-
-static int coroutine_fn
-blkverify_co_prwv(BlockDriverState *bs, BlkverifyRequest *r, uint64_t offset,
-                  uint64_t bytes, QEMUIOVector *qiov, QEMUIOVector *raw_qiov,
-                  int flags, bool is_write)
-{
-    Coroutine *co_a, *co_b;
-
-    *r = (BlkverifyRequest) {
-        .co         = qemu_coroutine_self(),
-        .bs         = bs,
-        .offset     = offset,
-        .bytes      = bytes,
-        .qiov       = qiov,
-        .raw_qiov   = raw_qiov,
-        .flags      = flags,
-        .is_write   = is_write,
-        .request_fn = is_write ? bdrv_co_pwritev : bdrv_co_preadv,
-    };
-
-    co_a = qemu_coroutine_create(blkverify_do_test_req, r);
-    co_b = qemu_coroutine_create(blkverify_do_raw_req, r);
-
-    qemu_coroutine_enter(co_a);
-    qemu_coroutine_enter(co_b);
-
-    while (r->done < 2) {
-        qemu_coroutine_yield();
+    if (acb->buf) {
+        qemu_iovec_destroy(&acb->raw_qiov);
+        qemu_vfree(acb->buf);
    }
-
-    if (r->ret != r->raw_ret) {
-        blkverify_err(r, "return value mismatch %d != %d", r->ret, r->raw_ret);
-    }
-
-    return r->ret;
+    acb->common.cb(acb->common.opaque, acb->ret);
+    qemu_aio_unref(acb);
 }

-static int coroutine_fn
-blkverify_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
-                    QEMUIOVector *qiov, int flags)
+static void blkverify_aio_cb(void *opaque, int ret)
 {
-    BlkverifyRequest r;
-    QEMUIOVector raw_qiov;
-    void *buf;
-    ssize_t cmp_offset;
-    int ret;
+    BlkverifyAIOCB *acb = opaque;

-    buf = qemu_blockalign(bs->file->bs, qiov->size);
-    qemu_iovec_init(&raw_qiov, qiov->niov);
-    qemu_iovec_clone(&raw_qiov, qiov, buf);
+    switch (++acb->done) {
+    case 1:
+        acb->ret = ret;
+        break;

-    ret = blkverify_co_prwv(bs, &r, offset, bytes, qiov, &raw_qiov, flags,
-                            false);
+    case 2:
+        if (acb->ret != ret) {
+            blkverify_err(acb, "return value mismatch %d != %d", acb->ret, ret);
+        }

-    cmp_offset = qemu_iovec_compare(qiov, &raw_qiov);
-    if (cmp_offset != -1) {
-        blkverify_err(&r, "contents mismatch at offset %" PRId64,
-                      offset + cmp_offset);
+        if (acb->verify) {
+            acb->verify(acb);
+        }
+
+        aio_bh_schedule_oneshot(bdrv_get_aio_context(acb->common.bs),
+                                blkverify_aio_bh, acb);
+        break;
    }
-
-    qemu_iovec_destroy(&raw_qiov);
-    qemu_vfree(buf);
-
-    return ret;
 }

-static int coroutine_fn
-blkverify_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
-                     QEMUIOVector *qiov, int flags)
+static void blkverify_verify_readv(BlkverifyAIOCB *acb)
 {
-    BlkverifyRequest r;
-    return blkverify_co_prwv(bs, &r, offset, bytes, qiov, qiov, flags, true);
+    ssize_t offset = qemu_iovec_compare(acb->qiov, &acb->raw_qiov);
+    if (offset != -1) {
+        blkverify_err(acb, "contents mismatch in sector %" PRId64,
+                      acb->sector_num + (int64_t)(offset / BDRV_SECTOR_SIZE));
+    }
 }

-static int blkverify_co_flush(BlockDriverState *bs)
+static BlockAIOCB *blkverify_aio_readv(BlockDriverState *bs,
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+        BlockCompletionFunc *cb, void *opaque)
+{
+    BDRVBlkverifyState *s = bs->opaque;
+    BlkverifyAIOCB *acb = blkverify_aio_get(bs, false, sector_num, qiov,
+                                            nb_sectors, cb, opaque);
+
+    acb->verify = blkverify_verify_readv;
+    acb->buf = qemu_blockalign(bs->file->bs, qiov->size);
+    qemu_iovec_init(&acb->raw_qiov, acb->qiov->niov);
+    qemu_iovec_clone(&acb->raw_qiov, qiov, acb->buf);
+
+    bdrv_aio_readv(s->test_file, sector_num, qiov, nb_sectors,
+                   blkverify_aio_cb, acb);
+    bdrv_aio_readv(bs->file, sector_num, &acb->raw_qiov, nb_sectors,
+                   blkverify_aio_cb, acb);
+    return &acb->common;
+}
+
+static BlockAIOCB *blkverify_aio_writev(BlockDriverState *bs,
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+        BlockCompletionFunc *cb, void *opaque)
+{
+    BDRVBlkverifyState *s = bs->opaque;
+    BlkverifyAIOCB *acb = blkverify_aio_get(bs, true, sector_num, qiov,
+                                            nb_sectors, cb, opaque);
+
+    bdrv_aio_writev(s->test_file, sector_num, qiov, nb_sectors,
+                    blkverify_aio_cb, acb);
+    bdrv_aio_writev(bs->file, sector_num, qiov, nb_sectors,
+                    blkverify_aio_cb, acb);
+    return &acb->common;
+}
+
+static BlockAIOCB *blkverify_aio_flush(BlockDriverState *bs,
+                                       BlockCompletionFunc *cb,
+                                       void *opaque)
 {
    BDRVBlkverifyState *s = bs->opaque;

    /* Only flush test file, the raw file is not important */
-    return bdrv_co_flush(s->test_file->bs);
+    return bdrv_aio_flush(s->test_file->bs, cb, opaque);
 }

 static bool blkverify_recurse_is_first_non_filter(BlockDriverState *bs,
@@ -288,12 +300,13 @@ static void blkverify_refresh_filename(BlockDriverState *bs, QDict *options)
        && s->test_file->bs->full_open_options)
    {
        QDict *opts = qdict_new();
-        qdict_put_str(opts, "driver", "blkverify");
+        qdict_put_obj(opts, "driver", QOBJECT(qstring_from_str("blkverify")));

        QINCREF(bs->file->bs->full_open_options);
-        qdict_put(opts, "raw", bs->file->bs->full_open_options);
+        qdict_put_obj(opts, "raw", QOBJECT(bs->file->bs->full_open_options));
        QINCREF(s->test_file->bs->full_open_options);
-        qdict_put(opts, "test", s->test_file->bs->full_open_options);
+        qdict_put_obj(opts, "test",
+                      QOBJECT(s->test_file->bs->full_open_options));

        bs->full_open_options = opts;
    }
@@ -316,13 +329,12 @@ static BlockDriver bdrv_blkverify = {
    .bdrv_parse_filename              = blkverify_parse_filename,
    .bdrv_file_open                   = blkverify_open,
    .bdrv_close                       = blkverify_close,
-    .bdrv_child_perm                  = bdrv_filter_default_perms,
    .bdrv_getlength                   = blkverify_getlength,
    .bdrv_refresh_filename            = blkverify_refresh_filename,

-    .bdrv_co_preadv                   = blkverify_co_preadv,
-    .bdrv_co_pwritev                  = blkverify_co_pwritev,
-    .bdrv_co_flush                    = blkverify_co_flush,
+    .bdrv_aio_readv                   = blkverify_aio_readv,
+    .bdrv_aio_writev                  = blkverify_aio_writev,
+    .bdrv_aio_flush                   = blkverify_aio_flush,

    .is_filter                        = true,
    .bdrv_recurse_is_first_non_filter = blkverify_recurse_is_first_non_filter,
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -59,15 +59,9 @@ struct BlockBackend {
    bool iostatus_enabled;
    BlockDeviceIoStatus iostatus;

-    uint64_t perm;
-    uint64_t shared_perm;
-    bool disable_perm;
-
    bool allow_write_beyond_eof;

    NotifierList remove_bs_notifiers, insert_bs_notifiers;
-
-    int quiesce_counter;
 };

 typedef struct BlockBackendAIOCB {
@@ -83,7 +77,6 @@ static const AIOCBInfo block_backend_aiocb_info = {

 static void drive_info_del(DriveInfo *dinfo);
 static BlockBackend *bdrv_first_blk(BlockDriverState *bs);
-static char *blk_get_attached_dev_id(BlockBackend *blk);

 /* All BlockBackends */
 static QTAILQ_HEAD(, BlockBackend) block_backends =
@@ -106,120 +99,37 @@ static void blk_root_drained_end(BdrvChild *child);
 static void blk_root_change_media(BdrvChild *child, bool load);
 static void blk_root_resize(BdrvChild *child);

-static char *blk_root_get_parent_desc(BdrvChild *child)
-{
-    BlockBackend *blk = child->opaque;
-    char *dev_id;
-
-    if (blk->name) {
-        return g_strdup(blk->name);
-    }
-
-    dev_id = blk_get_attached_dev_id(blk);
-    if (*dev_id) {
-        return dev_id;
-    } else {
-        /* TODO Callback into the BB owner for something more detailed */
-        g_free(dev_id);
-        return g_strdup("a block device");
-    }
-}
-
 static const char *blk_root_get_name(BdrvChild *child)
 {
    return blk_name(child->opaque);
 }

-/*
- * Notifies the user of the BlockBackend that migration has completed. qdev
- * devices can tighten their permissions in response (specifically revoke
- * shared write permissions that we needed for storage migration).
- *
- * If an error is returned, the VM cannot be allowed to be resumed.
- */
-static void blk_root_activate(BdrvChild *child, Error **errp)
-{
-    BlockBackend *blk = child->opaque;
-    Error *local_err = NULL;
-
-    if (!blk->disable_perm) {
-        return;
-    }
-
-    blk->disable_perm = false;
-
-    blk_set_perm(blk, blk->perm, blk->shared_perm, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        blk->disable_perm = true;
-        return;
-    }
-}
-
-static int blk_root_inactivate(BdrvChild *child)
-{
-    BlockBackend *blk = child->opaque;
-
-    if (blk->disable_perm) {
-        return 0;
-    }
-
-    /* Only inactivate BlockBackends for guest devices (which are inactive at
-     * this point because the VM is stopped) and unattached monitor-owned
-     * BlockBackends. If there is still any other user like a block job, then
-     * we simply can't inactivate the image. */
-    if (!blk->dev && !blk_name(blk)[0]) {
-        return -EPERM;
-    }
-
-    blk->disable_perm = true;
-    if (blk->root) {
-        bdrv_child_try_set_perm(blk->root, 0, BLK_PERM_ALL, &error_abort);
-    }
-
-    return 0;
-}
-
 static const BdrvChildRole child_root = {
    .inherit_options    = blk_root_inherit_options,

    .change_media       = blk_root_change_media,
    .resize             = blk_root_resize,
    .get_name           = blk_root_get_name,
-    .get_parent_desc    = blk_root_get_parent_desc,

    .drained_begin      = blk_root_drained_begin,
    .drained_end        = blk_root_drained_end,
-
-    .activate           = blk_root_activate,
-    .inactivate         = blk_root_inactivate,
 };

 /*
 * Create a new BlockBackend with a reference count of one.
- *
- * @perm is a bitmasks of BLK_PERM_* constants which describes the permissions
- * to request for a block driver node that is attached to this BlockBackend.
- * @shared_perm is a bitmask which describes which permissions may be granted
- * to other users of the attached node.
- * Both sets of permissions can be changed later using blk_set_perm().
- *
+ * Store an error through @errp on failure, unless it's null.
 * Return the new BlockBackend on success, null on failure.
 */
-BlockBackend *blk_new(uint64_t perm, uint64_t shared_perm)
+BlockBackend *blk_new(void)
 {
    BlockBackend *blk;

    blk = g_new0(BlockBackend, 1);
    blk->refcnt = 1;
-    blk->perm = perm;
-    blk->shared_perm = shared_perm;
    blk_set_enable_write_cache(blk, true);

-    qemu_co_mutex_init(&blk->public.throttled_reqs_lock);
    qemu_co_queue_init(&blk->public.throttled_reqs[0]);
    qemu_co_queue_init(&blk->public.throttled_reqs[1]);
-    block_acct_init(&blk->stats);

    notifier_list_init(&blk->remove_bs_notifiers);
    notifier_list_init(&blk->insert_bs_notifiers);
@@ -245,38 +155,15 @@ BlockBackend *blk_new_open(const char *filename, const char *reference,
 {
    BlockBackend *blk;
    BlockDriverState *bs;
-    uint64_t perm;

-    /* blk_new_open() is mainly used in .bdrv_create implementations and the
-     * tools where sharing isn't a concern because the BDS stays private, so we
-     * just request permission according to the flags.
-     *
-     * The exceptions are xen_disk and blockdev_init(); in these cases, the
-     * caller of blk_new_open() doesn't make use of the permissions, but they
-     * shouldn't hurt either. We can still share everything here because the
-     * guest devices will add their own blockers if they can't share. */
-    perm = BLK_PERM_CONSISTENT_READ;
-    if (flags & BDRV_O_RDWR) {
-        perm |= BLK_PERM_WRITE;
-    }
-    if (flags & BDRV_O_RESIZE) {
-        perm |= BLK_PERM_RESIZE;
-    }
-
-    blk = blk_new(perm, BLK_PERM_ALL);
+    blk = blk_new();
    bs = bdrv_open(filename, reference, options, flags, errp);
    if (!bs) {
        blk_unref(blk);
        return NULL;
    }

-    blk->root = bdrv_root_attach_child(bs, "root", &child_root,
-                                       perm, BLK_PERM_ALL, blk, errp);
-    if (!blk->root) {
-        bdrv_unref(bs);
-        blk_unref(blk);
-        return NULL;
-    }
+    blk->root = bdrv_root_attach_child(bs, "root", &child_root, blk);

    return blk;
 }
@@ -286,9 +173,6 @@ static void blk_delete(BlockBackend *blk)
    assert(!blk->refcnt);
    assert(!blk->name);
    assert(!blk->dev);
-    if (blk->public.throttle_state) {
-        blk_io_limits_disable(blk);
-    }
    if (blk->root) {
        blk_remove_bs(blk);
    }
@@ -475,7 +359,7 @@ void monitor_remove_blk(BlockBackend *blk)
 * Return @blk's name, a non-null string.
 * Returns an empty string iff @blk is not referenced by the monitor.
 */
-const char *blk_name(const BlockBackend *blk)
+const char *blk_name(BlockBackend *blk)
 {
    return blk->name ?: "";
 }
@@ -611,49 +495,16 @@ void blk_remove_bs(BlockBackend *blk)
 /*
 * Associates a new BlockDriverState with @blk.
 */
-int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp)
+void blk_insert_bs(BlockBackend *blk, BlockDriverState *bs)
 {
-    blk->root = bdrv_root_attach_child(bs, "root", &child_root,
-                                       blk->perm, blk->shared_perm, blk, errp);
-    if (blk->root == NULL) {
-        return -EPERM;
-    }
    bdrv_ref(bs);
+    blk->root = bdrv_root_attach_child(bs, "root", &child_root, blk);

    notifier_list_notify(&blk->insert_bs_notifiers, blk);
    if (blk->public.throttle_state) {
        throttle_timers_attach_aio_context(
            &blk->public.throttle_timers, bdrv_get_aio_context(bs));
    }
-
-    return 0;
-}
-
-/*
- * Sets the permission bitmasks that the user of the BlockBackend needs.
- */
-int blk_set_perm(BlockBackend *blk, uint64_t perm, uint64_t shared_perm,
-                 Error **errp)
-{
-    int ret;
-
-    if (blk->root && !blk->disable_perm) {
-        ret = bdrv_child_try_set_perm(blk->root, perm, shared_perm, errp);
-        if (ret < 0) {
-            return ret;
-        }
-    }
-
-    blk->perm = perm;
-    blk->shared_perm = shared_perm;
-
-    return 0;
-}
-
-void blk_get_perm(BlockBackend *blk, uint64_t *perm, uint64_t *shared_perm)
-{
-    *perm = blk->perm;
-    *shared_perm = blk->shared_perm;
 }

 static int blk_do_attach_dev(BlockBackend *blk, void *dev)
@@ -661,19 +512,10 @@ static int blk_do_attach_dev(BlockBackend *blk, void *dev)
    if (blk->dev) {
        return -EBUSY;
    }
-
-    /* While migration is still incoming, we don't need to apply the
-     * permissions of guest device BlockBackends. We might still have a block
-     * job or NBD server writing to the image for storage migration. */
-    if (runstate_check(RUN_STATE_INMIGRATE)) {
-        blk->disable_perm = true;
-    }
-
    blk_ref(blk);
    blk->dev = dev;
    blk->legacy_dev = false;
    blk_iostatus_reset(blk);
-
    return 0;
 }

@@ -711,7 +553,6 @@ void blk_detach_dev(BlockBackend *blk, void *dev)
    blk->dev_ops = NULL;
    blk->dev_opaque = NULL;
    blk->guest_block_size = 512;
-    blk_set_perm(blk, 0, BLK_PERM_ALL, &error_abort);
    blk_unref(blk);
 }

@@ -769,44 +610,29 @@ void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops,
                     void *opaque)
 {
    /* All drivers that use blk_set_dev_ops() are qdevified and we want to keep
-     * it that way, so we can assume blk->dev, if present, is a DeviceState if
-     * blk->dev_ops is set. Non-device users may use dev_ops without device. */
+     * it that way, so we can assume blk->dev is a DeviceState if blk->dev_ops
+     * is set. */
    assert(!blk->legacy_dev);

    blk->dev_ops = ops;
    blk->dev_opaque = opaque;
-
-    /* Are we currently quiesced? Should we enforce this right now? */
-    if (blk->quiesce_counter && ops->drained_begin) {
-        ops->drained_begin(opaque);
-    }
 }

 /*
 * Notify @blk's attached device model of media change.
- *
- * If @load is true, notify of media load. This action can fail, meaning that
- * the medium cannot be loaded. @errp is set then.
- *
- * If @load is false, notify of media eject. This can never fail.
- *
+ * If @load is true, notify of media load.
+ * Else, notify of media eject.
 * Also send DEVICE_TRAY_MOVED events as appropriate.
 */
-void blk_dev_change_media_cb(BlockBackend *blk, bool load, Error **errp)
+void blk_dev_change_media_cb(BlockBackend *blk, bool load)
 {
    if (blk->dev_ops && blk->dev_ops->change_media_cb) {
        bool tray_was_open, tray_is_open;
-        Error *local_err = NULL;

        assert(!blk->legacy_dev);

        tray_was_open = blk_dev_is_tray_open(blk);
-        blk->dev_ops->change_media_cb(blk->dev_opaque, load, &local_err);
-        if (local_err) {
-            assert(load == true);
-            error_propagate(errp, local_err);
-            return;
-        }
+        blk->dev_ops->change_media_cb(blk->dev_opaque, load);
        tray_is_open = blk_dev_is_tray_open(blk);

        if (tray_was_open != tray_is_open) {
@@ -820,7 +646,7 @@ void blk_dev_change_media_cb(BlockBackend *blk, bool load, Error **errp)

 static void blk_root_change_media(BdrvChild *child, bool load)
 {
-    blk_dev_change_media_cb(child->opaque, load, NULL);
+    blk_dev_change_media_cb(child->opaque, load);
 }

 /*
@@ -1054,6 +880,7 @@ static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf,
 {
    QEMUIOVector qiov;
    struct iovec iov;
+    Coroutine *co;
    BlkRwCo rwco;

    iov = (struct iovec) {
@@ -1070,14 +897,9 @@ static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf,
        .ret    = NOT_DONE,
    };

-    if (qemu_in_coroutine()) {
-        /* Fast-path if already in coroutine context */
-        co_entry(&rwco);
-    } else {
-        Coroutine *co = qemu_coroutine_create(co_entry, &rwco);
-        bdrv_coroutine_enter(blk_bs(blk), co);
-        BDRV_POLL_WHILE(blk_bs(blk), rwco.ret == NOT_DONE);
-    }
+    co = qemu_coroutine_create(co_entry, &rwco);
+    qemu_coroutine_enter(co);
+    BDRV_POLL_WHILE(blk_bs(blk), rwco.ret == NOT_DONE);

    return rwco.ret;
 }
@@ -1157,6 +979,7 @@ static void blk_aio_complete(BlkAioEmAIOCB *acb)
 static void blk_aio_complete_bh(void *opaque)
 {
    BlkAioEmAIOCB *acb = opaque;
+
    assert(acb->has_returned);
    blk_aio_complete(acb);
 }
@@ -1182,7 +1005,7 @@ static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, int bytes,
    acb->has_returned = false;

    co = qemu_coroutine_create(co_entry, acb);
-    bdrv_coroutine_enter(blk_bs(blk), co);
+    qemu_coroutine_enter(co);

    acb->has_returned = true;
    if (acb->rwco.ret != NOT_DONE) {
@@ -1773,14 +1596,13 @@ int blk_pwrite_compressed(BlockBackend *blk, int64_t offset, const void *buf,
                   BDRV_REQ_WRITE_COMPRESSED);
 }

-int blk_truncate(BlockBackend *blk, int64_t offset, Error **errp)
+int blk_truncate(BlockBackend *blk, int64_t offset)
 {
    if (!blk_is_available(blk)) {
-        error_setg(errp, "No medium inserted");
        return -ENOMEDIUM;
    }

-    return bdrv_truncate(blk->root, offset, errp);
+    return bdrv_truncate(blk_bs(blk), offset);
 }

 static void blk_pdiscard_entry(void *opaque)
@@ -1946,16 +1768,10 @@ static void blk_root_drained_begin(BdrvChild *child)
 {
    BlockBackend *blk = child->opaque;

-    if (++blk->quiesce_counter == 1) {
-        if (blk->dev_ops && blk->dev_ops->drained_begin) {
-            blk->dev_ops->drained_begin(blk->dev_opaque);
-        }
-    }
-
    /* Note that blk->root may not be accessible here yet if we are just
     * attaching to a BlockDriverState that is drained. Use child instead. */

-    if (atomic_fetch_inc(&blk->public.io_limits_disabled) == 0) {
+    if (blk->public.io_limits_disabled++ == 0) {
        throttle_group_restart_blk(blk);
    }
 }
@@ -1963,14 +1779,7 @@ static void blk_root_drained_begin(BdrvChild *child)
 static void blk_root_drained_end(BdrvChild *child)
 {
    BlockBackend *blk = child->opaque;
-    assert(blk->quiesce_counter);

    assert(blk->public.io_limits_disabled);
-    atomic_dec(&blk->public.io_limits_disabled);
-
-    if (--blk->quiesce_counter == 0) {
-        if (blk->dev_ops && blk->dev_ops->drained_end) {
-            blk->dev_ops->drained_end(blk->dev_opaque);
-        }
-    }
+    --blk->public.io_limits_disabled;
 }
--- a/block/bochs.c
+++ b/block/bochs.c
@@ -104,16 +104,7 @@ static int bochs_open(BlockDriverState *bs, QDict *options, int flags,
    struct bochs_header bochs;
    int ret;

-    bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
-                               false, errp);
-    if (!bs->file) {
-        return -EINVAL;
-    }
-
-    ret = bdrv_set_read_only(bs, true, errp); /* no write support yet */
-    if (ret < 0) {
-        return ret;
-    }
+    bs->read_only = true; /* no write support yet */

    ret = bdrv_pread(bs->file, 0, &bochs, sizeof(bochs));
    if (ret < 0) {
@@ -296,7 +287,6 @@ static BlockDriver bdrv_bochs = {
    .instance_size	= sizeof(BDRVBochsState),
    .bdrv_probe		= bochs_probe,
    .bdrv_open		= bochs_open,
-    .bdrv_child_perm     = bdrv_format_default_perms,
    .bdrv_refresh_limits = bochs_refresh_limits,
    .bdrv_co_preadv = bochs_co_preadv,
    .bdrv_close		= bochs_close,
--- a/block/cloop.c
+++ b/block/cloop.c
@@ -66,16 +66,7 @@ static int cloop_open(BlockDriverState *bs, QDict *options, int flags,
    uint32_t offsets_size, max_compressed_block_size = 1, i;
    int ret;

-    bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
-                               false, errp);
-    if (!bs->file) {
-        return -EINVAL;
-    }
-
-    ret = bdrv_set_read_only(bs, true, errp);
-    if (ret < 0) {
-        return ret;
-    }
+    bs->read_only = true;

    /* read header */
    ret = bdrv_pread(bs->file, 128, &s->block_size, 4);
@@ -293,7 +284,6 @@ static BlockDriver bdrv_cloop = {
    .instance_size  = sizeof(BDRVCloopState),
    .bdrv_probe     = cloop_probe,
    .bdrv_open      = cloop_open,
-    .bdrv_child_perm     = bdrv_format_default_perms,
    .bdrv_refresh_limits = cloop_refresh_limits,
    .bdrv_co_preadv = cloop_co_preadv,
    .bdrv_close     = cloop_close,
--- a/block/commit.c
+++ b/block/commit.c
@@ -13,7 +13,6 @@
 */

 #include "qemu/osdep.h"
-#include "qemu/cutils.h"
 #include "trace.h"
 #include "block/block_int.h"
 #include "block/blockjob_int.h"
@@ -37,7 +36,6 @@ typedef struct CommitBlockJob {
    BlockJob common;
    RateLimit limit;
    BlockDriverState *active;
-    BlockDriverState *commit_top_bs;
    BlockBackend *top;
    BlockBackend *base;
    BlockdevOnError on_error;
@@ -85,27 +83,12 @@ static void commit_complete(BlockJob *job, void *opaque)
    BlockDriverState *active = s->active;
    BlockDriverState *top = blk_bs(s->top);
    BlockDriverState *base = blk_bs(s->base);
-    BlockDriverState *overlay_bs = bdrv_find_overlay(active, s->commit_top_bs);
+    BlockDriverState *overlay_bs = bdrv_find_overlay(active, top);
    int ret = data->ret;
-    bool remove_commit_top_bs = false;
-
-    /* Make sure overlay_bs and top stay around until bdrv_set_backing_hd() */
-    bdrv_ref(top);
-    bdrv_ref(overlay_bs);
-
-    /* Remove base node parent that still uses BLK_PERM_WRITE/RESIZE before
-     * the normal backing chain can be restored. */
-    blk_unref(s->base);

    if (!block_job_is_cancelled(&s->common) && ret == 0) {
        /* success */
-        ret = bdrv_drop_intermediate(active, s->commit_top_bs, base,
-                                     s->backing_file_str);
-    } else if (overlay_bs) {
-        /* XXX Can (or should) we somehow keep 'consistent read' blocked even
-         * after the failed/cancelled commit job is gone? If we already wrote
-         * something to base, the intermediate images aren't valid any more. */
-        remove_commit_top_bs = true;
+        ret = bdrv_drop_intermediate(active, top, base, s->backing_file_str);
    }

    /* restore base open flags here if appropriate (e.g., change the base back
@@ -119,18 +102,9 @@ static void commit_complete(BlockJob *job, void *opaque)
    }
    g_free(s->backing_file_str);
    blk_unref(s->top);
+    blk_unref(s->base);
    block_job_completed(&s->common, ret);
    g_free(data);
-
-    /* If bdrv_drop_intermediate() didn't already do that, remove the commit
-     * filter driver from the backing chain. Do this as the final step so that
-     * the 'consistent read' permission can be granted.  */
-    if (remove_commit_top_bs) {
-        bdrv_set_backing_hd(overlay_bs, top, &error_abort);
-    }
-
-    bdrv_unref(overlay_bs);
-    bdrv_unref(top);
 }

 static void coroutine_fn commit_run(void *opaque)
@@ -158,7 +132,7 @@ static void coroutine_fn commit_run(void *opaque)
    }

    if (base_len < s->common.len) {
-        ret = blk_truncate(s->base, s->common.len, NULL);
+        ret = blk_truncate(s->base, s->common.len);
        if (ret) {
            goto out;
        }
@@ -234,57 +208,10 @@ static const BlockJobDriver commit_job_driver = {
    .start         = commit_run,
 };

-static int coroutine_fn bdrv_commit_top_preadv(BlockDriverState *bs,
-    uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags)
-{
-    return bdrv_co_preadv(bs->backing, offset, bytes, qiov, flags);
-}
-
-static int64_t coroutine_fn bdrv_commit_top_get_block_status(
-    BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum,
-    BlockDriverState **file)
-{
-    *pnum = nb_sectors;
-    *file = bs->backing->bs;
-    return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID | BDRV_BLOCK_DATA |
-           (sector_num << BDRV_SECTOR_BITS);
-}
-
-static void bdrv_commit_top_refresh_filename(BlockDriverState *bs, QDict *opts)
-{
-    bdrv_refresh_filename(bs->backing->bs);
-    pstrcpy(bs->exact_filename, sizeof(bs->exact_filename),
-            bs->backing->bs->filename);
-}
-
-static void bdrv_commit_top_close(BlockDriverState *bs)
-{
-}
-
-static void bdrv_commit_top_child_perm(BlockDriverState *bs, BdrvChild *c,
-                                       const BdrvChildRole *role,
-                                       uint64_t perm, uint64_t shared,
-                                       uint64_t *nperm, uint64_t *nshared)
-{
-    *nperm = 0;
-    *nshared = BLK_PERM_ALL;
-}
-
-/* Dummy node that provides consistent read to its users without requiring it
- * from its backing file and that allows writes on the backing file chain. */
-static BlockDriver bdrv_commit_top = {
-    .format_name                = "commit_top",
-    .bdrv_co_preadv             = bdrv_commit_top_preadv,
-    .bdrv_co_get_block_status   = bdrv_commit_top_get_block_status,
-    .bdrv_refresh_filename      = bdrv_commit_top_refresh_filename,
-    .bdrv_close                 = bdrv_commit_top_close,
-    .bdrv_child_perm            = bdrv_commit_top_child_perm,
-};
-
 void commit_start(const char *job_id, BlockDriverState *bs,
                  BlockDriverState *base, BlockDriverState *top, int64_t speed,
                  BlockdevOnError on_error, const char *backing_file_str,
-                  const char *filter_node_name, Error **errp)
+                  Error **errp)
 {
    CommitBlockJob *s;
    BlockReopenQueue *reopen_queue = NULL;
@@ -292,9 +219,7 @@ void commit_start(const char *job_id, BlockDriverState *bs,
    int orig_base_flags;
    BlockDriverState *iter;
    BlockDriverState *overlay_bs;
-    BlockDriverState *commit_top_bs = NULL;
    Error *local_err = NULL;
-    int ret;

    assert(top != bs);
    if (top == base) {
@@ -309,8 +234,8 @@ void commit_start(const char *job_id, BlockDriverState *bs,
        return;
    }

-    s = block_job_create(job_id, &commit_job_driver, bs, 0, BLK_PERM_ALL,
-                         speed, BLOCK_JOB_DEFAULT, NULL, NULL, errp);
+    s = block_job_create(job_id, &commit_job_driver, bs, speed,
+                         BLOCK_JOB_DEFAULT, NULL, NULL, errp);
    if (!s) {
        return;
    }
@@ -331,84 +256,30 @@ void commit_start(const char *job_id, BlockDriverState *bs,
        bdrv_reopen_multiple(bdrv_get_aio_context(bs), reopen_queue, &local_err);
        if (local_err != NULL) {
            error_propagate(errp, local_err);
-            goto fail;
+            block_job_unref(&s->common);
+            return;
        }
    }

-    /* Insert commit_top block node above top, so we can block consistent read
-     * on the backing chain below it */
-    commit_top_bs = bdrv_new_open_driver(&bdrv_commit_top, filter_node_name, 0,
-                                         errp);
-    if (commit_top_bs == NULL) {
-        goto fail;
-    }
-    commit_top_bs->total_sectors = top->total_sectors;
-    bdrv_set_aio_context(commit_top_bs, bdrv_get_aio_context(top));
-
-    bdrv_set_backing_hd(commit_top_bs, top, &local_err);
-    if (local_err) {
-        bdrv_unref(commit_top_bs);
-        commit_top_bs = NULL;
-        error_propagate(errp, local_err);
-        goto fail;
-    }
-    bdrv_set_backing_hd(overlay_bs, commit_top_bs, &local_err);
-    if (local_err) {
-        bdrv_unref(commit_top_bs);
-        commit_top_bs = NULL;
-        error_propagate(errp, local_err);
-        goto fail;
-    }
-
-    s->commit_top_bs = commit_top_bs;
-    bdrv_unref(commit_top_bs);

    /* Block all nodes between top and base, because they will
     * disappear from the chain after this operation. */
    assert(bdrv_chain_contains(top, base));
-    for (iter = top; iter != base; iter = backing_bs(iter)) {
-        /* XXX BLK_PERM_WRITE needs to be allowed so we don't block ourselves
-         * at s->base (if writes are blocked for a node, they are also blocked
-         * for its backing file). The other options would be a second filter
-         * driver above s->base. */
-        ret = block_job_add_bdrv(&s->common, "intermediate node", iter, 0,
-                                 BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE,
-                                 errp);
-        if (ret < 0) {
-            goto fail;
-        }
+    for (iter = top; iter != backing_bs(base); iter = backing_bs(iter)) {
+        block_job_add_bdrv(&s->common, iter);
    }
-
-    ret = block_job_add_bdrv(&s->common, "base", base, 0, BLK_PERM_ALL, errp);
-    if (ret < 0) {
-        goto fail;
-    }
-
    /* overlay_bs must be blocked because it needs to be modified to
-     * update the backing image string. */
-    ret = block_job_add_bdrv(&s->common, "overlay of top", overlay_bs,
-                             BLK_PERM_GRAPH_MOD, BLK_PERM_ALL, errp);
-    if (ret < 0) {
-        goto fail;
+     * update the backing image string, but if it's the root node then
+     * don't block it again */
+    if (bs != overlay_bs) {
+        block_job_add_bdrv(&s->common, overlay_bs);
    }

-    s->base = blk_new(BLK_PERM_CONSISTENT_READ
-                      | BLK_PERM_WRITE
-                      | BLK_PERM_RESIZE,
-                      BLK_PERM_CONSISTENT_READ
-                      | BLK_PERM_GRAPH_MOD
-                      | BLK_PERM_WRITE_UNCHANGED);
-    ret = blk_insert_bs(s->base, base, errp);
-    if (ret < 0) {
-        goto fail;
-    }
+    s->base = blk_new();
+    blk_insert_bs(s->base, base);

-    /* Required permissions are already taken with block_job_add_bdrv() */
-    s->top = blk_new(0, BLK_PERM_ALL);
-    ret = blk_insert_bs(s->top, top, errp);
-    if (ret < 0) {
-        goto fail;
-    }
+    s->top = blk_new();
+    blk_insert_bs(s->top, top);

    s->active = bs;

@@ -421,19 +292,6 @@ void commit_start(const char *job_id, BlockDriverState *bs,

    trace_commit_start(bs, base, top, s);
    block_job_start(&s->common);
-    return;
-
-fail:
-    if (s->base) {
-        blk_unref(s->base);
-    }
-    if (s->top) {
-        blk_unref(s->top);
-    }
-    if (commit_top_bs) {
-        bdrv_set_backing_hd(overlay_bs, top, &error_abort);
-    }
-    block_job_early_fail(&s->common);
 }


@@ -443,14 +301,11 @@ fail:
 int bdrv_commit(BlockDriverState *bs)
 {
    BlockBackend *src, *backing;
-    BlockDriverState *backing_file_bs = NULL;
-    BlockDriverState *commit_top_bs = NULL;
    BlockDriver *drv = bs->drv;
    int64_t sector, total_sectors, length, backing_length;
    int n, ro, open_flags;
    int ret = 0;
    uint8_t *buf = NULL;
-    Error *local_err = NULL;

    if (!drv)
        return -ENOMEDIUM;
@@ -473,34 +328,11 @@ int bdrv_commit(BlockDriverState *bs)
        }
    }

-    src = blk_new(BLK_PERM_CONSISTENT_READ, BLK_PERM_ALL);
-    backing = blk_new(BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL);
+    src = blk_new();
+    blk_insert_bs(src, bs);

-    ret = blk_insert_bs(src, bs, &local_err);
-    if (ret < 0) {
-        error_report_err(local_err);
-        goto ro_cleanup;
-    }
-
-    /* Insert commit_top block node above backing, so we can write to it */
-    backing_file_bs = backing_bs(bs);
-
-    commit_top_bs = bdrv_new_open_driver(&bdrv_commit_top, NULL, BDRV_O_RDWR,
-                                         &local_err);
-    if (commit_top_bs == NULL) {
-        error_report_err(local_err);
-        goto ro_cleanup;
-    }
-    bdrv_set_aio_context(commit_top_bs, bdrv_get_aio_context(backing_file_bs));
-
-    bdrv_set_backing_hd(commit_top_bs, backing_file_bs, &error_abort);
-    bdrv_set_backing_hd(bs, commit_top_bs, &error_abort);
-
-    ret = blk_insert_bs(backing, backing_file_bs, &local_err);
-    if (ret < 0) {
-        error_report_err(local_err);
-        goto ro_cleanup;
-    }
+    backing = blk_new();
+    blk_insert_bs(backing, bs->backing->bs);

    length = blk_getlength(src);
    if (length < 0) {
@@ -518,9 +350,8 @@ int bdrv_commit(BlockDriverState *bs)
     * grow the backing file image if possible.  If not possible,
     * we must return an error */
    if (length > backing_length) {
-        ret = blk_truncate(backing, length, &local_err);
+        ret = blk_truncate(backing, length);
        if (ret < 0) {
-            error_report_err(local_err);
            goto ro_cleanup;
        }
    }
@@ -573,12 +404,8 @@ int bdrv_commit(BlockDriverState *bs)
 ro_cleanup:
    qemu_vfree(buf);

-    blk_unref(backing);
-    if (backing_file_bs) {
-        bdrv_set_backing_hd(bs, backing_file_bs, &error_abort);
-    }
-    bdrv_unref(commit_top_bs);
    blk_unref(src);
+    blk_unref(backing);

    if (ro) {
        /* ignoring error return here */
--- a/block/crypto.c
+++ b/block/crypto.c
@@ -59,8 +59,8 @@ static ssize_t block_crypto_read_func(QCryptoBlock *block,
                                      size_t offset,
                                      uint8_t *buf,
                                      size_t buflen,
-                                      void *opaque,
-                                      Error **errp)
+                                      Error **errp,
+                                      void *opaque)
 {
    BlockDriverState *bs = opaque;
    ssize_t ret;
@@ -86,8 +86,8 @@ static ssize_t block_crypto_write_func(QCryptoBlock *block,
                                       size_t offset,
                                       const uint8_t *buf,
                                       size_t buflen,
-                                       void *opaque,
-                                       Error **errp)
+                                       Error **errp,
+                                       void *opaque)
 {
    struct BlockCryptoCreateData *data = opaque;
    ssize_t ret;
@@ -103,8 +103,8 @@ static ssize_t block_crypto_write_func(QCryptoBlock *block,

 static ssize_t block_crypto_init_func(QCryptoBlock *block,
                                      size_t headerlen,
-                                      void *opaque,
-                                      Error **errp)
+                                      Error **errp,
+                                      void *opaque)
 {
    struct BlockCryptoCreateData *data = opaque;
    int ret;
@@ -300,12 +300,6 @@ static int block_crypto_open_generic(QCryptoBlockFormat format,
    QCryptoBlockOpenOptions *open_opts = NULL;
    unsigned int cflags = 0;

-    bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
-                               false, errp);
-    if (!bs->file) {
-        return -EINVAL;
-    }
-
    opts = qemu_opts_create(opts_spec, NULL, 0, &error_abort);
    qemu_opts_absorb_qdict(opts, options, &local_err);
    if (local_err) {
@@ -381,8 +375,7 @@ static int block_crypto_create_generic(QCryptoBlockFormat format,
    return ret;
 }

-static int block_crypto_truncate(BlockDriverState *bs, int64_t offset,
-                                 Error **errp)
+static int block_crypto_truncate(BlockDriverState *bs, int64_t offset)
 {
    BlockCrypto *crypto = bs->opaque;
    size_t payload_offset =
@@ -390,7 +383,7 @@ static int block_crypto_truncate(BlockDriverState *bs, int64_t offset,

    offset += payload_offset;

-    return bdrv_truncate(bs->file, offset, errp);
+    return bdrv_truncate(bs->file->bs, offset);
 }

 static void block_crypto_close(BlockDriverState *bs)
@@ -629,7 +622,6 @@ BlockDriver bdrv_crypto_luks = {
    .bdrv_probe         = block_crypto_probe_luks,
    .bdrv_open          = block_crypto_open_luks,
    .bdrv_close         = block_crypto_close,
-    .bdrv_child_perm    = bdrv_format_default_perms,
    .bdrv_create        = block_crypto_create_luks,
    .bdrv_truncate      = block_crypto_truncate,
    .create_opts        = &block_crypto_create_opts_luks,
--- a/block/curl.c
+++ b/block/curl.c
@@ -76,12 +76,15 @@ static CURLMcode __curl_multi_socket_action(CURLM *multi_handle,
 #define CURL_TIMEOUT_DEFAULT 5
 #define CURL_TIMEOUT_MAX 10000

+#define FIND_RET_NONE   0
+#define FIND_RET_OK     1
+#define FIND_RET_WAIT   2
+
 #define CURL_BLOCK_OPT_URL       "url"
 #define CURL_BLOCK_OPT_READAHEAD "readahead"
 #define CURL_BLOCK_OPT_SSLVERIFY "sslverify"
 #define CURL_BLOCK_OPT_TIMEOUT "timeout"
 #define CURL_BLOCK_OPT_COOKIE    "cookie"
-#define CURL_BLOCK_OPT_COOKIE_SECRET "cookie-secret"
 #define CURL_BLOCK_OPT_USERNAME "username"
 #define CURL_BLOCK_OPT_PASSWORD_SECRET "password-secret"
 #define CURL_BLOCK_OPT_PROXY_USERNAME "proxy-username"
@@ -90,17 +93,14 @@ static CURLMcode __curl_multi_socket_action(CURLM *multi_handle,
 struct BDRVCURLState;

 typedef struct CURLAIOCB {
-    Coroutine *co;
+    BlockAIOCB common;
    QEMUIOVector *qiov;

-    uint64_t offset;
-    uint64_t bytes;
-    int ret;
+    int64_t sector_num;
+    int nb_sectors;

    size_t start;
    size_t end;
-
-    QSIMPLEQ_ENTRY(CURLAIOCB) next;
 } CURLAIOCB;

 typedef struct CURLSocket {
@@ -115,7 +115,7 @@ typedef struct CURLState
    CURL *curl;
    QLIST_HEAD(, CURLSocket) sockets;
    char *orig_buf;
-    uint64_t buf_start;
+    size_t buf_start;
    size_t buf_off;
    size_t buf_len;
    char range[128];
@@ -126,7 +126,7 @@ typedef struct CURLState
 typedef struct BDRVCURLState {
    CURLM *multi;
    QEMUTimer timer;
-    uint64_t len;
+    size_t len;
    CURLState states[CURL_NUM_STATES];
    char *url;
    size_t readahead_size;
@@ -135,8 +135,6 @@ typedef struct BDRVCURLState {
    char *cookie;
    bool accept_range;
    AioContext *aio_context;
-    QemuMutex mutex;
-    QSIMPLEQ_HEAD(, CURLAIOCB) free_state_waitq;
    char *username;
    char *password;
    char *proxyusername;
@@ -148,7 +146,6 @@ static void curl_multi_do(void *arg);
 static void curl_multi_read(void *arg);

 #ifdef NEED_CURL_TIMER_CALLBACK
-/* Called from curl_multi_do_locked, with s->mutex held.  */
 static int curl_timer_cb(CURLM *multi, long timeout_ms, void *opaque)
 {
    BDRVCURLState *s = opaque;
@@ -165,7 +162,6 @@ static int curl_timer_cb(CURLM *multi, long timeout_ms, void *opaque)
 }
 #endif

-/* Called from curl_multi_do_locked, with s->mutex held.  */
 static int curl_sock_cb(CURL *curl, curl_socket_t fd, int action,
                        void *userp, void *sp)
 {
@@ -196,26 +192,25 @@ static int curl_sock_cb(CURL *curl, curl_socket_t fd, int action,
    switch (action) {
        case CURL_POLL_IN:
            aio_set_fd_handler(s->aio_context, fd, false,
-                               curl_multi_read, NULL, NULL, state);
+                               curl_multi_read, NULL, state);
            break;
        case CURL_POLL_OUT:
            aio_set_fd_handler(s->aio_context, fd, false,
-                               NULL, curl_multi_do, NULL, state);
+                               NULL, curl_multi_do, state);
            break;
        case CURL_POLL_INOUT:
            aio_set_fd_handler(s->aio_context, fd, false,
-                               curl_multi_read, curl_multi_do, NULL, state);
+                               curl_multi_read, curl_multi_do, state);
            break;
        case CURL_POLL_REMOVE:
            aio_set_fd_handler(s->aio_context, fd, false,
-                               NULL, NULL, NULL, NULL);
+                               NULL, NULL, NULL);
            break;
    }

    return 0;
 }

-/* Called from curl_multi_do_locked, with s->mutex held.  */
 static size_t curl_header_cb(void *ptr, size_t size, size_t nmemb, void *opaque)
 {
    BDRVCURLState *s = opaque;
@@ -230,7 +225,6 @@ static size_t curl_header_cb(void *ptr, size_t size, size_t nmemb, void *opaque)
    return realsize;
 }

-/* Called from curl_multi_do_locked, with s->mutex held.  */
 static size_t curl_read_cb(void *ptr, size_t size, size_t nmemb, void *opaque)
 {
    CURLState *s = ((CURLState*)opaque);
@@ -258,7 +252,7 @@ static size_t curl_read_cb(void *ptr, size_t size, size_t nmemb, void *opaque)
            continue;

        if ((s->buf_off >= acb->end)) {
-            size_t request_length = acb->bytes;
+            size_t request_length = acb->nb_sectors * BDRV_SECTOR_SIZE;

            qemu_iovec_from_buf(acb->qiov, 0, s->orig_buf + acb->start,
                                acb->end - acb->start);
@@ -269,11 +263,9 @@ static size_t curl_read_cb(void *ptr, size_t size, size_t nmemb, void *opaque)
                                  request_length - offset);
            }

-            acb->ret = 0;
+            acb->common.cb(acb->common.opaque, 0);
+            qemu_aio_unref(acb);
            s->acb[i] = NULL;
-            qemu_mutex_unlock(&s->s->mutex);
-            aio_co_wake(acb->co);
-            qemu_mutex_lock(&s->s->mutex);
        }
    }

@@ -282,19 +274,18 @@ read_end:
    return size * nmemb;
 }

-/* Called with s->mutex held.  */
-static bool curl_find_buf(BDRVCURLState *s, uint64_t start, uint64_t len,
-                          CURLAIOCB *acb)
+static int curl_find_buf(BDRVCURLState *s, size_t start, size_t len,
+                         CURLAIOCB *acb)
 {
    int i;
-    uint64_t end = start + len;
-    uint64_t clamped_end = MIN(end, s->len);
-    uint64_t clamped_len = clamped_end - start;
+    size_t end = start + len;
+    size_t clamped_end = MIN(end, s->len);
+    size_t clamped_len = clamped_end - start;

    for (i=0; i<CURL_NUM_STATES; i++) {
        CURLState *state = &s->states[i];
-        uint64_t buf_end = (state->buf_start + state->buf_off);
-        uint64_t buf_fend = (state->buf_start + state->buf_len);
+        size_t buf_end = (state->buf_start + state->buf_off);
+        size_t buf_fend = (state->buf_start + state->buf_len);

        if (!state->orig_buf)
            continue;
@@ -313,8 +304,9 @@ static bool curl_find_buf(BDRVCURLState *s, uint64_t start, uint64_t len,
            if (clamped_len < len) {
                qemu_iovec_memset(acb->qiov, clamped_len, 0, len - clamped_len);
            }
-            acb->ret = 0;
-            return true;
+            acb->common.cb(acb->common.opaque, 0);
+
+            return FIND_RET_OK;
        }

        // Wait for unfinished chunks
@@ -332,16 +324,15 @@ static bool curl_find_buf(BDRVCURLState *s, uint64_t start, uint64_t len,
            for (j=0; j<CURL_NUM_ACB; j++) {
                if (!state->acb[j]) {
                    state->acb[j] = acb;
-                    return true;
+                    return FIND_RET_WAIT;
                }
            }
        }
    }

-    return false;
+    return FIND_RET_NONE;
 }

-/* Called with s->mutex held.  */
 static void curl_multi_check_completion(BDRVCURLState *s)
 {
    int msgs_in_queue;
@@ -383,11 +374,9 @@ static void curl_multi_check_completion(BDRVCURLState *s)
                        continue;
                    }

-                    acb->ret = -EIO;
+                    acb->common.cb(acb->common.opaque, -EPROTO);
+                    qemu_aio_unref(acb);
                    state->acb[i] = NULL;
-                    qemu_mutex_unlock(&s->mutex);
-                    aio_co_wake(acb->co);
-                    qemu_mutex_lock(&s->mutex);
                }
            }

@@ -397,9 +386,9 @@ static void curl_multi_check_completion(BDRVCURLState *s)
    }
 }

-/* Called with s->mutex held.  */
-static void curl_multi_do_locked(CURLState *s)
+static void curl_multi_do(void *arg)
 {
+    CURLState *s = (CURLState *)arg;
    CURLSocket *socket, *next_socket;
    int running;
    int r;
@@ -417,23 +406,12 @@ static void curl_multi_do_locked(CURLState *s)
    }
 }

-static void curl_multi_do(void *arg)
-{
-    CURLState *s = (CURLState *)arg;
-
-    qemu_mutex_lock(&s->s->mutex);
-    curl_multi_do_locked(s);
-    qemu_mutex_unlock(&s->s->mutex);
-}
-
 static void curl_multi_read(void *arg)
 {
    CURLState *s = (CURLState *)arg;

-    qemu_mutex_lock(&s->s->mutex);
-    curl_multi_do_locked(s);
+    curl_multi_do(arg);
    curl_multi_check_completion(s->s);
-    qemu_mutex_unlock(&s->s->mutex);
 }

 static void curl_multi_timeout_do(void *arg)
@@ -446,38 +424,40 @@ static void curl_multi_timeout_do(void *arg)
        return;
    }

-    qemu_mutex_lock(&s->mutex);
    curl_multi_socket_action(s->multi, CURL_SOCKET_TIMEOUT, 0, &running);

    curl_multi_check_completion(s);
-    qemu_mutex_unlock(&s->mutex);
 #else
    abort();
 #endif
 }

-/* Called with s->mutex held.  */
-static CURLState *curl_find_state(BDRVCURLState *s)
+static CURLState *curl_init_state(BlockDriverState *bs, BDRVCURLState *s)
 {
    CURLState *state = NULL;
-    int i;
+    int i, j;
+
+    do {
+        for (i=0; i<CURL_NUM_STATES; i++) {
+            for (j=0; j<CURL_NUM_ACB; j++)
+                if (s->states[i].acb[j])
+                    continue;
+            if (s->states[i].in_use)
+                continue;

-    for (i = 0; i < CURL_NUM_STATES; i++) {
-        if (!s->states[i].in_use) {
            state = &s->states[i];
            state->in_use = 1;
            break;
        }
-    }
-    return state;
-}
+        if (!state) {
+            aio_poll(bdrv_get_aio_context(bs), true);
+        }
+    } while(!state);

-static int curl_init_state(BDRVCURLState *s, CURLState *state)
-{
    if (!state->curl) {
        state->curl = curl_easy_init();
        if (!state->curl) {
-            return -EIO;
+            return NULL;
        }
        curl_easy_setopt(state->curl, CURLOPT_URL, s->url);
        curl_easy_setopt(state->curl, CURLOPT_SSL_VERIFYPEER,
@@ -530,18 +510,11 @@ static int curl_init_state(BDRVCURLState *s, CURLState *state)
    QLIST_INIT(&state->sockets);
    state->s = s;

-    return 0;
+    return state;
 }

-/* Called with s->mutex held.  */
 static void curl_clean_state(CURLState *s)
 {
-    CURLAIOCB *next;
-    int j;
-    for (j = 0; j < CURL_NUM_ACB; j++) {
-        assert(!s->acb[j]);
-    }
-
    if (s->s->multi)
        curl_multi_remove_handle(s->s->multi, s->curl);

@@ -553,20 +526,12 @@ static void curl_clean_state(CURLState *s)
    }

    s->in_use = 0;
-
-    next = QSIMPLEQ_FIRST(&s->s->free_state_waitq);
-    if (next) {
-        QSIMPLEQ_REMOVE_HEAD(&s->s->free_state_waitq, next);
-        qemu_mutex_unlock(&s->s->mutex);
-        aio_co_wake(next->co);
-        qemu_mutex_lock(&s->s->mutex);
-    }
 }

 static void curl_parse_filename(const char *filename, QDict *options,
                                Error **errp)
 {
-    qdict_put_str(options, CURL_BLOCK_OPT_URL, filename);
+    qdict_put(options, CURL_BLOCK_OPT_URL, qstring_from_str(filename));
 }

 static void curl_detach_aio_context(BlockDriverState *bs)
@@ -574,7 +539,6 @@ static void curl_detach_aio_context(BlockDriverState *bs)
    BDRVCURLState *s = bs->opaque;
    int i;

-    qemu_mutex_lock(&s->mutex);
    for (i = 0; i < CURL_NUM_STATES; i++) {
        if (s->states[i].in_use) {
            curl_clean_state(&s->states[i]);
@@ -590,7 +554,6 @@ static void curl_detach_aio_context(BlockDriverState *bs)
        curl_multi_cleanup(s->multi);
        s->multi = NULL;
    }
-    qemu_mutex_unlock(&s->mutex);

    timer_del(&s->timer);
 }
@@ -643,11 +606,6 @@ static QemuOptsList runtime_opts = {
            .type = QEMU_OPT_STRING,
            .help = "Pass the cookie or list of cookies with each request"
        },
-        {
-            .name = CURL_BLOCK_OPT_COOKIE_SECRET,
-            .type = QEMU_OPT_STRING,
-            .help = "ID of secret used as cookie passed with each request"
-        },
        {
            .name = CURL_BLOCK_OPT_USERNAME,
            .type = QEMU_OPT_STRING,
@@ -682,10 +640,8 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags,
    Error *local_err = NULL;
    const char *file;
    const char *cookie;
-    const char *cookie_secret;
    double d;
    const char *secretid;
-    const char *protocol_delimiter;

    static int inited = 0;

@@ -694,7 +650,6 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags,
        return -EROFS;
    }

-    qemu_mutex_init(&s->mutex);
    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
    qemu_opts_absorb_qdict(opts, options, &local_err);
    if (local_err) {
@@ -720,22 +675,7 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags,
    s->sslverify = qemu_opt_get_bool(opts, CURL_BLOCK_OPT_SSLVERIFY, true);

    cookie = qemu_opt_get(opts, CURL_BLOCK_OPT_COOKIE);
-    cookie_secret = qemu_opt_get(opts, CURL_BLOCK_OPT_COOKIE_SECRET);
-
-    if (cookie && cookie_secret) {
-        error_setg(errp,
-                   "curl driver cannot handle both cookie and cookie secret");
-        goto out_noclean;
-    }
-
-    if (cookie_secret) {
-        s->cookie = qcrypto_secret_lookup_as_utf8(cookie_secret, errp);
-        if (!s->cookie) {
-            goto out_noclean;
-        }
-    } else {
-        s->cookie = g_strdup(cookie);
-    }
+    s->cookie = g_strdup(cookie);

    file = qemu_opt_get(opts, CURL_BLOCK_OPT_URL);
    if (file == NULL) {
@@ -743,15 +683,6 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags,
        goto out_noclean;
    }

-    if (!strstart(file, bs->drv->protocol_name, &protocol_delimiter) ||
-        !strstart(protocol_delimiter, "://", NULL))
-    {
-        error_setg(errp, "%s curl driver cannot handle the URL '%s' (does not "
-                   "start with '%s://')", bs->drv->protocol_name, file,
-                   bs->drv->protocol_name);
-        goto out_noclean;
-    }
-
    s->username = g_strdup(qemu_opt_get(opts, CURL_BLOCK_OPT_USERNAME));
    secretid = qemu_opt_get(opts, CURL_BLOCK_OPT_PASSWORD_SECRET);

@@ -778,22 +709,14 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags,
    }

    DPRINTF("CURL: Opening %s\n", file);
-    QSIMPLEQ_INIT(&s->free_state_waitq);
    s->aio_context = bdrv_get_aio_context(bs);
    s->url = g_strdup(file);
-    qemu_mutex_lock(&s->mutex);
-    state = curl_find_state(s);
-    qemu_mutex_unlock(&s->mutex);
-    if (!state) {
+    state = curl_init_state(bs, s);
+    if (!state)
        goto out_noclean;
-    }

    // Get file size

-    if (curl_init_state(s, state) < 0) {
-        goto out;
-    }
-
    s->accept_range = false;
    curl_easy_setopt(state->curl, CURLOPT_NOBODY, 1);
    curl_easy_setopt(state->curl, CURLOPT_HEADERFUNCTION,
@@ -821,7 +744,7 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags,
    }
 #endif

-    s->len = d;
+    s->len = (size_t)d;

    if ((!strncasecmp(s->url, "http://", strlen("http://"))
        || !strncasecmp(s->url, "https://", strlen("https://")))
@@ -830,11 +753,9 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags,
                "Server does not support 'range' (byte ranges).");
        goto out;
    }
-    DPRINTF("CURL: Size = %" PRIu64 "\n", s->len);
+    DPRINTF("CURL: Size = %zd\n", s->len);

-    qemu_mutex_lock(&s->mutex);
    curl_clean_state(state);
-    qemu_mutex_unlock(&s->mutex);
    curl_easy_cleanup(state->curl);
    state->curl = NULL;

@@ -848,51 +769,50 @@ out:
    curl_easy_cleanup(state->curl);
    state->curl = NULL;
 out_noclean:
-    qemu_mutex_destroy(&s->mutex);
    g_free(s->cookie);
    g_free(s->url);
    qemu_opts_del(opts);
    return -EINVAL;
 }

-static void curl_setup_preadv(BlockDriverState *bs, CURLAIOCB *acb)
+static const AIOCBInfo curl_aiocb_info = {
+    .aiocb_size         = sizeof(CURLAIOCB),
+};
+
+
+static void curl_readv_bh_cb(void *p)
 {
    CURLState *state;
    int running;

-    BDRVCURLState *s = bs->opaque;
+    CURLAIOCB *acb = p;
+    BDRVCURLState *s = acb->common.bs->opaque;

-    uint64_t start = acb->offset;
-    uint64_t end;
-
-    qemu_mutex_lock(&s->mutex);
+    size_t start = acb->sector_num * BDRV_SECTOR_SIZE;
+    size_t end;

    // In case we have the requested data already (e.g. read-ahead),
    // we can just call the callback and be done.
-    if (curl_find_buf(s, start, acb->bytes, acb)) {
-        goto out;
+    switch (curl_find_buf(s, start, acb->nb_sectors * BDRV_SECTOR_SIZE, acb)) {
+        case FIND_RET_OK:
+            qemu_aio_unref(acb);
+            // fall through
+        case FIND_RET_WAIT:
+            return;
+        default:
+            break;
    }

    // No cache found, so let's start a new request
-    for (;;) {
-        state = curl_find_state(s);
-        if (state) {
-            break;
-        }
-        QSIMPLEQ_INSERT_TAIL(&s->free_state_waitq, acb, next);
-        qemu_mutex_unlock(&s->mutex);
-        qemu_coroutine_yield();
-        qemu_mutex_lock(&s->mutex);
-    }
-
-    if (curl_init_state(s, state) < 0) {
-        curl_clean_state(state);
-        acb->ret = -EIO;
-        goto out;
+    state = curl_init_state(acb->common.bs, s);
+    if (!state) {
+        acb->common.cb(acb->common.opaque, -EIO);
+        qemu_aio_unref(acb);
+        return;
    }

    acb->start = 0;
-    acb->end = MIN(acb->bytes, s->len - start);
+    acb->end = MIN(acb->nb_sectors * BDRV_SECTOR_SIZE, s->len - start);

    state->buf_off = 0;
    g_free(state->orig_buf);
@@ -902,41 +822,37 @@ static void curl_setup_preadv(BlockDriverState *bs, CURLAIOCB *acb)
    state->orig_buf = g_try_malloc(state->buf_len);
    if (state->buf_len && state->orig_buf == NULL) {
        curl_clean_state(state);
-        acb->ret = -ENOMEM;
-        goto out;
+        acb->common.cb(acb->common.opaque, -ENOMEM);
+        qemu_aio_unref(acb);
+        return;
    }
    state->acb[0] = acb;

-    snprintf(state->range, 127, "%" PRIu64 "-%" PRIu64, start, end);
-    DPRINTF("CURL (AIO): Reading %" PRIu64 " at %" PRIu64 " (%s)\n",
-            acb->bytes, start, state->range);
+    snprintf(state->range, 127, "%zd-%zd", start, end);
+    DPRINTF("CURL (AIO): Reading %llu at %zd (%s)\n",
+            (acb->nb_sectors * BDRV_SECTOR_SIZE), start, state->range);
    curl_easy_setopt(state->curl, CURLOPT_RANGE, state->range);

    curl_multi_add_handle(s->multi, state->curl);

    /* Tell curl it needs to kick things off */
    curl_multi_socket_action(s->multi, CURL_SOCKET_TIMEOUT, 0, &running);
-
-out:
-    qemu_mutex_unlock(&s->mutex);
 }

-static int coroutine_fn curl_co_preadv(BlockDriverState *bs,
-        uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags)
+static BlockAIOCB *curl_aio_readv(BlockDriverState *bs,
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+        BlockCompletionFunc *cb, void *opaque)
 {
-    CURLAIOCB acb = {
-        .co = qemu_coroutine_self(),
-        .ret = -EINPROGRESS,
-        .qiov = qiov,
-        .offset = offset,
-        .bytes = bytes
-    };
+    CURLAIOCB *acb;

-    curl_setup_preadv(bs, &acb);
-    while (acb.ret == -EINPROGRESS) {
-        qemu_coroutine_yield();
-    }
-    return acb.ret;
+    acb = qemu_aio_get(&curl_aiocb_info, bs, cb, opaque);
+
+    acb->qiov = qiov;
+    acb->sector_num = sector_num;
+    acb->nb_sectors = nb_sectors;
+
+    aio_bh_schedule_oneshot(bdrv_get_aio_context(bs), curl_readv_bh_cb, acb);
+    return &acb->common;
 }

 static void curl_close(BlockDriverState *bs)
@@ -945,7 +861,6 @@ static void curl_close(BlockDriverState *bs)

    DPRINTF("CURL: Close\n");
    curl_detach_aio_context(bs);
-    qemu_mutex_destroy(&s->mutex);

    g_free(s->cookie);
    g_free(s->url);
@@ -967,7 +882,7 @@ static BlockDriver bdrv_http = {
    .bdrv_close                 = curl_close,
    .bdrv_getlength             = curl_getlength,

-    .bdrv_co_preadv             = curl_co_preadv,
+    .bdrv_aio_readv             = curl_aio_readv,

    .bdrv_detach_aio_context    = curl_detach_aio_context,
    .bdrv_attach_aio_context    = curl_attach_aio_context,
@@ -983,7 +898,7 @@ static BlockDriver bdrv_https = {
    .bdrv_close                 = curl_close,
    .bdrv_getlength             = curl_getlength,

-    .bdrv_co_preadv             = curl_co_preadv,
+    .bdrv_aio_readv             = curl_aio_readv,

    .bdrv_detach_aio_context    = curl_detach_aio_context,
    .bdrv_attach_aio_context    = curl_attach_aio_context,
@@ -999,7 +914,7 @@ static BlockDriver bdrv_ftp = {
    .bdrv_close                 = curl_close,
    .bdrv_getlength             = curl_getlength,

-    .bdrv_co_preadv             = curl_co_preadv,
+    .bdrv_aio_readv             = curl_aio_readv,

    .bdrv_detach_aio_context    = curl_detach_aio_context,
    .bdrv_attach_aio_context    = curl_attach_aio_context,
@@ -1015,7 +930,7 @@ static BlockDriver bdrv_ftps = {
    .bdrv_close                 = curl_close,
    .bdrv_getlength             = curl_getlength,

-    .bdrv_co_preadv             = curl_co_preadv,
+    .bdrv_aio_readv             = curl_aio_readv,

    .bdrv_detach_aio_context    = curl_detach_aio_context,
    .bdrv_attach_aio_context    = curl_attach_aio_context,
--- a/block/dirty-bitmap.c
+++ b/block/dirty-bitmap.c
@@ -37,7 +37,6 @@
 *     or enabled. A frozen bitmap can only abdicate() or reclaim().
 */
 struct BdrvDirtyBitmap {
-    QemuMutex *mutex;
    HBitmap *bitmap;            /* Dirty sector bitmap implementation */
    HBitmap *meta;              /* Meta dirty bitmap */
    BdrvDirtyBitmap *successor; /* Anonymous child; implies frozen status */
@@ -53,27 +52,6 @@ struct BdrvDirtyBitmapIter {
    BdrvDirtyBitmap *bitmap;
 };

-static inline void bdrv_dirty_bitmaps_lock(BlockDriverState *bs)
-{
-    qemu_mutex_lock(&bs->dirty_bitmap_mutex);
-}
-
-static inline void bdrv_dirty_bitmaps_unlock(BlockDriverState *bs)
-{
-    qemu_mutex_unlock(&bs->dirty_bitmap_mutex);
-}
-
-void bdrv_dirty_bitmap_lock(BdrvDirtyBitmap *bitmap)
-{
-    qemu_mutex_lock(bitmap->mutex);
-}
-
-void bdrv_dirty_bitmap_unlock(BdrvDirtyBitmap *bitmap)
-{
-    qemu_mutex_unlock(bitmap->mutex);
-}
-
-/* Called with BQL or dirty_bitmap lock taken.  */
 BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, const char *name)
 {
    BdrvDirtyBitmap *bm;
@@ -87,7 +65,6 @@ BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, const char *name)
    return NULL;
 }

-/* Called with BQL taken.  */
 void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap)
 {
    assert(!bdrv_dirty_bitmap_frozen(bitmap));
@@ -95,7 +72,6 @@ void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap)
    bitmap->name = NULL;
 }

-/* Called with BQL taken.  */
 BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs,
                                          uint32_t granularity,
                                          const char *name,
@@ -120,14 +96,11 @@ BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs,
        return NULL;
    }
    bitmap = g_new0(BdrvDirtyBitmap, 1);
-    bitmap->mutex = &bs->dirty_bitmap_mutex;
    bitmap->bitmap = hbitmap_alloc(bitmap_size, ctz32(sector_granularity));
    bitmap->size = bitmap_size;
    bitmap->name = g_strdup(name);
    bitmap->disabled = false;
-    bdrv_dirty_bitmaps_lock(bs);
    QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list);
-    bdrv_dirty_bitmaps_unlock(bs);
    return bitmap;
 }

@@ -146,24 +119,20 @@ void bdrv_create_meta_dirty_bitmap(BdrvDirtyBitmap *bitmap,
                                   int chunk_size)
 {
    assert(!bitmap->meta);
-    qemu_mutex_lock(bitmap->mutex);
    bitmap->meta = hbitmap_create_meta(bitmap->bitmap,
                                       chunk_size * BITS_PER_BYTE);
-    qemu_mutex_unlock(bitmap->mutex);
 }

 void bdrv_release_meta_dirty_bitmap(BdrvDirtyBitmap *bitmap)
 {
    assert(bitmap->meta);
-    qemu_mutex_lock(bitmap->mutex);
    hbitmap_free_meta(bitmap->bitmap);
    bitmap->meta = NULL;
-    qemu_mutex_unlock(bitmap->mutex);
 }

-int bdrv_dirty_bitmap_get_meta_locked(BlockDriverState *bs,
-                                      BdrvDirtyBitmap *bitmap, int64_t sector,
-                                      int nb_sectors)
+int bdrv_dirty_bitmap_get_meta(BlockDriverState *bs,
+                               BdrvDirtyBitmap *bitmap, int64_t sector,
+                               int nb_sectors)
 {
    uint64_t i;
    int sectors_per_bit = 1 << hbitmap_granularity(bitmap->meta);
@@ -178,26 +147,11 @@ int bdrv_dirty_bitmap_get_meta_locked(BlockDriverState *bs,
    return false;
 }

-int bdrv_dirty_bitmap_get_meta(BlockDriverState *bs,
-                               BdrvDirtyBitmap *bitmap, int64_t sector,
-                               int nb_sectors)
-{
-    bool dirty;
-
-    qemu_mutex_lock(bitmap->mutex);
-    dirty = bdrv_dirty_bitmap_get_meta_locked(bs, bitmap, sector, nb_sectors);
-    qemu_mutex_unlock(bitmap->mutex);
-
-    return dirty;
-}
-
 void bdrv_dirty_bitmap_reset_meta(BlockDriverState *bs,
                                  BdrvDirtyBitmap *bitmap, int64_t sector,
                                  int nb_sectors)
 {
-    qemu_mutex_lock(bitmap->mutex);
    hbitmap_reset(bitmap->meta, sector, nb_sectors);
-    qemu_mutex_unlock(bitmap->mutex);
 }

 int64_t bdrv_dirty_bitmap_size(const BdrvDirtyBitmap *bitmap)
@@ -210,19 +164,16 @@ const char *bdrv_dirty_bitmap_name(const BdrvDirtyBitmap *bitmap)
    return bitmap->name;
 }

-/* Called with BQL taken.  */
 bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap)
 {
    return bitmap->successor;
 }

-/* Called with BQL taken.  */
 bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap)
 {
    return !(bitmap->disabled || bitmap->successor);
 }

-/* Called with BQL taken.  */
 DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap)
 {
    if (bdrv_dirty_bitmap_frozen(bitmap)) {
@@ -237,7 +188,6 @@ DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap)
 /**
 * Create a successor bitmap destined to replace this bitmap after an operation.
 * Requires that the bitmap is not frozen and has no successor.
- * Called with BQL taken.
 */
 int bdrv_dirty_bitmap_create_successor(BlockDriverState *bs,
                                       BdrvDirtyBitmap *bitmap, Error **errp)
@@ -270,7 +220,6 @@ int bdrv_dirty_bitmap_create_successor(BlockDriverState *bs,
 /**
 * For a bitmap with a successor, yield our name to the successor,
 * delete the old bitmap, and return a handle to the new bitmap.
- * Called with BQL taken.
 */
 BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs,
                                            BdrvDirtyBitmap *bitmap,
@@ -298,7 +247,6 @@ BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs,
 * In cases of failure where we can no longer safely delete the parent,
 * we may wish to re-join the parent and child/successor.
 * The merged parent will be un-frozen, but not explicitly re-enabled.
- * Called with BQL taken.
 */
 BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs,
                                           BdrvDirtyBitmap *parent,
@@ -323,30 +271,25 @@ BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs,

 /**
 * Truncates _all_ bitmaps attached to a BDS.
- * Called with BQL taken.
 */
 void bdrv_dirty_bitmap_truncate(BlockDriverState *bs)
 {
    BdrvDirtyBitmap *bitmap;
    uint64_t size = bdrv_nb_sectors(bs);

-    bdrv_dirty_bitmaps_lock(bs);
    QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
        assert(!bdrv_dirty_bitmap_frozen(bitmap));
        assert(!bitmap->active_iterators);
        hbitmap_truncate(bitmap->bitmap, size);
        bitmap->size = size;
    }
-    bdrv_dirty_bitmaps_unlock(bs);
 }

-/* Called with BQL taken.  */
 static void bdrv_do_release_matching_dirty_bitmap(BlockDriverState *bs,
                                                  BdrvDirtyBitmap *bitmap,
                                                  bool only_named)
 {
    BdrvDirtyBitmap *bm, *next;
-    bdrv_dirty_bitmaps_lock(bs);
    QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) {
        if ((!bitmap || bm == bitmap) && (!only_named || bm->name)) {
            assert(!bm->active_iterators);
@@ -358,19 +301,15 @@ static void bdrv_do_release_matching_dirty_bitmap(BlockDriverState *bs,
            g_free(bm);

            if (bitmap) {
-                goto out;
+                return;
            }
        }
    }
    if (bitmap) {
        abort();
    }
-
-out:
-    bdrv_dirty_bitmaps_unlock(bs);
 }

-/* Called with BQL taken.  */
 void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
 {
    bdrv_do_release_matching_dirty_bitmap(bs, bitmap, false);
@@ -379,21 +318,18 @@ void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
 /**
 * Release all named dirty bitmaps attached to a BDS (for use in bdrv_close()).
 * There must not be any frozen bitmaps attached.
- * Called with BQL taken.
 */
 void bdrv_release_named_dirty_bitmaps(BlockDriverState *bs)
 {
    bdrv_do_release_matching_dirty_bitmap(bs, NULL, true);
 }

-/* Called with BQL taken.  */
 void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
 {
    assert(!bdrv_dirty_bitmap_frozen(bitmap));
    bitmap->disabled = true;
 }

-/* Called with BQL taken.  */
 void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
 {
    assert(!bdrv_dirty_bitmap_frozen(bitmap));
@@ -406,7 +342,6 @@ BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs)
    BlockDirtyInfoList *list = NULL;
    BlockDirtyInfoList **plist = &list;

-    bdrv_dirty_bitmaps_lock(bs);
    QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
        BlockDirtyInfo *info = g_new0(BlockDirtyInfo, 1);
        BlockDirtyInfoList *entry = g_new0(BlockDirtyInfoList, 1);
@@ -419,14 +354,12 @@ BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs)
        *plist = entry;
        plist = &entry->next;
    }
-    bdrv_dirty_bitmaps_unlock(bs);

    return list;
 }

-/* Called within bdrv_dirty_bitmap_lock..unlock */
-int bdrv_get_dirty_locked(BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
-                          int64_t sector)
+int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
+                   int64_t sector)
 {
    if (bitmap) {
        return hbitmap_get(bitmap->bitmap, sector);
@@ -499,42 +432,23 @@ int64_t bdrv_dirty_iter_next(BdrvDirtyBitmapIter *iter)
    return hbitmap_iter_next(&iter->hbi);
 }

-/* Called within bdrv_dirty_bitmap_lock..unlock */
-void bdrv_set_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap,
-                                  int64_t cur_sector, int64_t nr_sectors)
+void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap,
+                           int64_t cur_sector, int64_t nr_sectors)
 {
    assert(bdrv_dirty_bitmap_enabled(bitmap));
    hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
 }

-void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap,
-                           int64_t cur_sector, int64_t nr_sectors)
-{
-    bdrv_dirty_bitmap_lock(bitmap);
-    bdrv_set_dirty_bitmap_locked(bitmap, cur_sector, nr_sectors);
-    bdrv_dirty_bitmap_unlock(bitmap);
-}
-
-/* Called within bdrv_dirty_bitmap_lock..unlock */
-void bdrv_reset_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap,
-                                    int64_t cur_sector, int64_t nr_sectors)
+void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
+                             int64_t cur_sector, int64_t nr_sectors)
 {
    assert(bdrv_dirty_bitmap_enabled(bitmap));
    hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
 }

-void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
-                             int64_t cur_sector, int64_t nr_sectors)
-{
-    bdrv_dirty_bitmap_lock(bitmap);
-    bdrv_reset_dirty_bitmap_locked(bitmap, cur_sector, nr_sectors);
-    bdrv_dirty_bitmap_unlock(bitmap);
-}
-
 void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out)
 {
    assert(bdrv_dirty_bitmap_enabled(bitmap));
-    bdrv_dirty_bitmap_lock(bitmap);
    if (!out) {
        hbitmap_reset_all(bitmap->bitmap);
    } else {
@@ -543,7 +457,6 @@ void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out)
                                       hbitmap_granularity(backup));
        *out = backup;
    }
-    bdrv_dirty_bitmap_unlock(bitmap);
 }

 void bdrv_undo_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *in)
@@ -595,19 +508,12 @@ void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
                    int64_t nr_sectors)
 {
    BdrvDirtyBitmap *bitmap;
-
-    if (QLIST_EMPTY(&bs->dirty_bitmaps)) {
-        return;
-    }
-
-    bdrv_dirty_bitmaps_lock(bs);
    QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
        if (!bdrv_dirty_bitmap_enabled(bitmap)) {
            continue;
        }
        hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
    }
-    bdrv_dirty_bitmaps_unlock(bs);
 }

 /**
--- a/block/dmg.c
+++ b/block/dmg.c
@@ -413,18 +413,8 @@ static int dmg_open(BlockDriverState *bs, QDict *options, int flags,
    int64_t offset;
    int ret;

-    bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
-                               false, errp);
-    if (!bs->file) {
-        return -EINVAL;
-    }
-
-    ret = bdrv_set_read_only(bs, true, errp);
-    if (ret < 0) {
-        return ret;
-    }
-
    block_module_load_one("dmg-bz2");
+    bs->read_only = true;

    s->n_chunks = 0;
    s->offsets = s->lengths = s->sectors = s->sectorcounts = NULL;
@@ -701,7 +691,6 @@ static BlockDriver bdrv_dmg = {
    .bdrv_probe     = dmg_probe,
    .bdrv_open      = dmg_open,
    .bdrv_refresh_limits = dmg_refresh_limits,
-    .bdrv_child_perm     = bdrv_format_default_perms,
    .bdrv_co_preadv = dmg_co_preadv,
    .bdrv_close     = dmg_close,
 };
--- a/block/gluster.c
+++ b/block/gluster.c
@@ -12,7 +12,6 @@
 #include "block/block_int.h"
 #include "qapi/error.h"
 #include "qapi/qmp/qerror.h"
-#include "qapi/util.h"
 #include "qemu/uri.h"
 #include "qemu/error-report.h"
 #include "qemu/cutils.h"
@@ -152,7 +151,7 @@ static QemuOptsList runtime_type_opts = {
        {
            .name = GLUSTER_OPT_TYPE,
            .type = QEMU_OPT_STRING,
-            .help = "inet|unix",
+            .help = "tcp|unix",
        },
        { /* end of list */ }
    },
@@ -171,14 +170,14 @@ static QemuOptsList runtime_unix_opts = {
    },
 };

-static QemuOptsList runtime_inet_opts = {
-    .name = "gluster_inet",
-    .head = QTAILQ_HEAD_INITIALIZER(runtime_inet_opts.head),
+static QemuOptsList runtime_tcp_opts = {
+    .name = "gluster_tcp",
+    .head = QTAILQ_HEAD_INITIALIZER(runtime_tcp_opts.head),
    .desc = {
        {
            .name = GLUSTER_OPT_TYPE,
            .type = QEMU_OPT_STRING,
-            .help = "inet|unix",
+            .help = "tcp|unix",
        },
        {
            .name = GLUSTER_OPT_HOST,
@@ -321,7 +320,7 @@ static int parse_volume_options(BlockdevOptionsGluster *gconf, char *path)
 static int qemu_gluster_parse_uri(BlockdevOptionsGluster *gconf,
                                  const char *filename)
 {
-    SocketAddress *gsconf;
+    GlusterServer *gsconf;
    URI *uri;
    QueryParams *qp = NULL;
    bool is_unix = false;
@@ -332,19 +331,19 @@ static int qemu_gluster_parse_uri(BlockdevOptionsGluster *gconf,
        return -EINVAL;
    }

-    gconf->server = g_new0(SocketAddressList, 1);
-    gconf->server->value = gsconf = g_new0(SocketAddress, 1);
+    gconf->server = g_new0(GlusterServerList, 1);
+    gconf->server->value = gsconf = g_new0(GlusterServer, 1);

    /* transport */
    if (!uri->scheme || !strcmp(uri->scheme, "gluster")) {
-        gsconf->type = SOCKET_ADDRESS_TYPE_INET;
+        gsconf->type = GLUSTER_TRANSPORT_TCP;
    } else if (!strcmp(uri->scheme, "gluster+tcp")) {
-        gsconf->type = SOCKET_ADDRESS_TYPE_INET;
+        gsconf->type = GLUSTER_TRANSPORT_TCP;
    } else if (!strcmp(uri->scheme, "gluster+unix")) {
-        gsconf->type = SOCKET_ADDRESS_TYPE_UNIX;
+        gsconf->type = GLUSTER_TRANSPORT_UNIX;
        is_unix = true;
    } else if (!strcmp(uri->scheme, "gluster+rdma")) {
-        gsconf->type = SOCKET_ADDRESS_TYPE_INET;
+        gsconf->type = GLUSTER_TRANSPORT_TCP;
        error_report("Warning: rdma feature is not supported, falling "
                     "back to tcp");
    } else {
@@ -374,11 +373,11 @@ static int qemu_gluster_parse_uri(BlockdevOptionsGluster *gconf,
        }
        gsconf->u.q_unix.path = g_strdup(qp->p[0].value);
    } else {
-        gsconf->u.inet.host = g_strdup(uri->server ? uri->server : "localhost");
+        gsconf->u.tcp.host = g_strdup(uri->server ? uri->server : "localhost");
        if (uri->port) {
-            gsconf->u.inet.port = g_strdup_printf("%d", uri->port);
+            gsconf->u.tcp.port = g_strdup_printf("%d", uri->port);
        } else {
-            gsconf->u.inet.port = g_strdup_printf("%d", GLUSTER_DEFAULT_PORT);
+            gsconf->u.tcp.port = g_strdup_printf("%d", GLUSTER_DEFAULT_PORT);
        }
    }

@@ -396,7 +395,7 @@ static struct glfs *qemu_gluster_glfs_init(BlockdevOptionsGluster *gconf,
    struct glfs *glfs;
    int ret;
    int old_errno;
-    SocketAddressList *server;
+    GlusterServerList *server;
    unsigned long long port;

    glfs = glfs_find_preopened(gconf->volume);
@@ -412,27 +411,22 @@ static struct glfs *qemu_gluster_glfs_init(BlockdevOptionsGluster *gconf,
    glfs_set_preopened(gconf->volume, glfs);

    for (server = gconf->server; server; server = server->next) {
-        switch (server->value->type) {
-        case SOCKET_ADDRESS_TYPE_UNIX:
-            ret = glfs_set_volfile_server(glfs, "unix",
+        if (server->value->type  == GLUSTER_TRANSPORT_UNIX) {
+            ret = glfs_set_volfile_server(glfs,
+                                   GlusterTransport_lookup[server->value->type],
                                   server->value->u.q_unix.path, 0);
-            break;
-        case SOCKET_ADDRESS_TYPE_INET:
-            if (parse_uint_full(server->value->u.inet.port, &port, 10) < 0 ||
+        } else {
+            if (parse_uint_full(server->value->u.tcp.port, &port, 10) < 0 ||
                port > 65535) {
                error_setg(errp, "'%s' is not a valid port number",
-                           server->value->u.inet.port);
+                           server->value->u.tcp.port);
                errno = EINVAL;
                goto out;
            }
-            ret = glfs_set_volfile_server(glfs, "tcp",
-                                   server->value->u.inet.host,
+            ret = glfs_set_volfile_server(glfs,
+                                   GlusterTransport_lookup[server->value->type],
+                                   server->value->u.tcp.host,
                                   (int)port);
-            break;
-        case SOCKET_ADDRESS_TYPE_VSOCK:
-        case SOCKET_ADDRESS_TYPE_FD:
-        default:
-            abort();
        }

        if (ret < 0) {
@@ -450,13 +444,13 @@ static struct glfs *qemu_gluster_glfs_init(BlockdevOptionsGluster *gconf,
        error_setg(errp, "Gluster connection for volume %s, path %s failed"
                         " to connect", gconf->volume, gconf->path);
        for (server = gconf->server; server; server = server->next) {
-            if (server->value->type  == SOCKET_ADDRESS_TYPE_UNIX) {
+            if (server->value->type  == GLUSTER_TRANSPORT_UNIX) {
                error_append_hint(errp, "hint: failed on socket %s ",
                                  server->value->u.q_unix.path);
            } else {
                error_append_hint(errp, "hint: failed on host %s and port %s ",
-                                  server->value->u.inet.host,
-                                  server->value->u.inet.port);
+                                  server->value->u.tcp.host,
+                                  server->value->u.tcp.port);
            }
        }

@@ -480,6 +474,23 @@ out:
    return NULL;
 }

+static int qapi_enum_parse(const char *opt)
+{
+    int i;
+
+    if (!opt) {
+        return GLUSTER_TRANSPORT__MAX;
+    }
+
+    for (i = 0; i < GLUSTER_TRANSPORT__MAX; i++) {
+        if (!strcmp(opt, GlusterTransport_lookup[i])) {
+            return i;
+        }
+    }
+
+    return i;
+}
+
 /*
 * Convert the json formatted command line into qapi.
 */
@@ -487,13 +498,14 @@ static int qemu_gluster_parse_json(BlockdevOptionsGluster *gconf,
                                  QDict *options, Error **errp)
 {
    QemuOpts *opts;
-    SocketAddress *gsconf = NULL;
-    SocketAddressList *curr = NULL;
+    GlusterServer *gsconf;
+    GlusterServerList *curr = NULL;
    QDict *backing_options = NULL;
    Error *local_err = NULL;
    char *str = NULL;
    const char *ptr;
-    int i, type, num_servers;
+    size_t num_servers;
+    int i;

    /* create opts info from runtime_json_opts list */
    opts = qemu_opts_create(&runtime_json_opts, NULL, 0, &error_abort);
@@ -535,32 +547,25 @@ static int qemu_gluster_parse_json(BlockdevOptionsGluster *gconf,
        }

        ptr = qemu_opt_get(opts, GLUSTER_OPT_TYPE);
+        gsconf = g_new0(GlusterServer, 1);
+        gsconf->type = qapi_enum_parse(ptr);
        if (!ptr) {
            error_setg(&local_err, QERR_MISSING_PARAMETER, GLUSTER_OPT_TYPE);
            error_append_hint(&local_err, GERR_INDEX_HINT, i);
            goto out;

        }
-        gsconf = g_new0(SocketAddress, 1);
-        if (!strcmp(ptr, "tcp")) {
-            ptr = "inet";       /* accept legacy "tcp" */
-        }
-        type = qapi_enum_parse(SocketAddressType_lookup, ptr,
-                               SOCKET_ADDRESS_TYPE__MAX, -1, NULL);
-        if (type != SOCKET_ADDRESS_TYPE_INET
-            && type != SOCKET_ADDRESS_TYPE_UNIX) {
-            error_setg(&local_err,
-                       "Parameter '%s' may be 'inet' or 'unix'",
-                       GLUSTER_OPT_TYPE);
+        if (gsconf->type == GLUSTER_TRANSPORT__MAX) {
+            error_setg(&local_err, QERR_INVALID_PARAMETER_VALUE,
+                       GLUSTER_OPT_TYPE, "tcp or unix");
            error_append_hint(&local_err, GERR_INDEX_HINT, i);
            goto out;
        }
-        gsconf->type = type;
        qemu_opts_del(opts);

-        if (gsconf->type == SOCKET_ADDRESS_TYPE_INET) {
-            /* create opts info from runtime_inet_opts list */
-            opts = qemu_opts_create(&runtime_inet_opts, NULL, 0, &error_abort);
+        if (gsconf->type == GLUSTER_TRANSPORT_TCP) {
+            /* create opts info from runtime_tcp_opts list */
+            opts = qemu_opts_create(&runtime_tcp_opts, NULL, 0, &error_abort);
            qemu_opts_absorb_qdict(opts, backing_options, &local_err);
            if (local_err) {
                goto out;
@@ -573,7 +578,7 @@ static int qemu_gluster_parse_json(BlockdevOptionsGluster *gconf,
                error_append_hint(&local_err, GERR_INDEX_HINT, i);
                goto out;
            }
-            gsconf->u.inet.host = g_strdup(ptr);
+            gsconf->u.tcp.host = g_strdup(ptr);
            ptr = qemu_opt_get(opts, GLUSTER_OPT_PORT);
            if (!ptr) {
                error_setg(&local_err, QERR_MISSING_PARAMETER,
@@ -581,28 +586,28 @@ static int qemu_gluster_parse_json(BlockdevOptionsGluster *gconf,
                error_append_hint(&local_err, GERR_INDEX_HINT, i);
                goto out;
            }
-            gsconf->u.inet.port = g_strdup(ptr);
+            gsconf->u.tcp.port = g_strdup(ptr);

            /* defend for unsupported fields in InetSocketAddress,
             * i.e. @ipv4, @ipv6  and @to
             */
            ptr = qemu_opt_get(opts, GLUSTER_OPT_TO);
            if (ptr) {
-                gsconf->u.inet.has_to = true;
+                gsconf->u.tcp.has_to = true;
            }
            ptr = qemu_opt_get(opts, GLUSTER_OPT_IPV4);
            if (ptr) {
-                gsconf->u.inet.has_ipv4 = true;
+                gsconf->u.tcp.has_ipv4 = true;
            }
            ptr = qemu_opt_get(opts, GLUSTER_OPT_IPV6);
            if (ptr) {
-                gsconf->u.inet.has_ipv6 = true;
+                gsconf->u.tcp.has_ipv6 = true;
            }
-            if (gsconf->u.inet.has_to) {
+            if (gsconf->u.tcp.has_to) {
                error_setg(&local_err, "Parameter 'to' not supported");
                goto out;
            }
-            if (gsconf->u.inet.has_ipv4 || gsconf->u.inet.has_ipv6) {
+            if (gsconf->u.tcp.has_ipv4 || gsconf->u.tcp.has_ipv6) {
                error_setg(&local_err, "Parameters 'ipv4/ipv6' not supported");
                goto out;
            }
@@ -627,18 +632,16 @@ static int qemu_gluster_parse_json(BlockdevOptionsGluster *gconf,
        }

        if (gconf->server == NULL) {
-            gconf->server = g_new0(SocketAddressList, 1);
+            gconf->server = g_new0(GlusterServerList, 1);
            gconf->server->value = gsconf;
            curr = gconf->server;
        } else {
-            curr->next = g_new0(SocketAddressList, 1);
+            curr->next = g_new0(GlusterServerList, 1);
            curr->next->value = gsconf;
            curr = curr->next;
        }
-        gsconf = NULL;

-        QDECREF(backing_options);
-        backing_options = NULL;
+        qdict_del(backing_options, str);
        g_free(str);
        str = NULL;
    }
@@ -647,10 +650,11 @@ static int qemu_gluster_parse_json(BlockdevOptionsGluster *gconf,

 out:
    error_propagate(errp, local_err);
-    qapi_free_SocketAddress(gsconf);
    qemu_opts_del(opts);
-    g_free(str);
-    QDECREF(backing_options);
+    if (str) {
+        qdict_del(backing_options, str);
+        g_free(str);
+    }
    errno = EINVAL;
    return -errno;
 }
@@ -679,7 +683,7 @@ static struct glfs *qemu_gluster_init(BlockdevOptionsGluster *gconf,
                             "file.volume=testvol,file.path=/path/a.qcow2"
                             "[,file.debug=9]"
                             "[,file.logfile=/path/filename.log],"
-                             "file.server.0.type=inet,"
+                             "file.server.0.type=tcp,"
                             "file.server.0.host=1.2.3.4,"
                             "file.server.0.port=24007,"
                             "file.server.1.transport=unix,"
@@ -694,6 +698,13 @@ static struct glfs *qemu_gluster_init(BlockdevOptionsGluster *gconf,
    return qemu_gluster_glfs_init(gconf, errp);
 }

+static void qemu_gluster_complete_aio(void *opaque)
+{
+    GlusterAIOCB *acb = (GlusterAIOCB *)opaque;
+
+    qemu_coroutine_enter(acb->coroutine);
+}
+
 /*
 * AIO callback routine called from GlusterFS thread.
 */
@@ -709,7 +720,7 @@ static void gluster_finish_aiocb(struct glfs_fd *fd, ssize_t ret, void *arg)
        acb->ret = -EIO; /* Partial read/write - fail it */
    }

-    aio_co_schedule(acb->aio_context, acb->coroutine);
+    aio_bh_schedule_oneshot(acb->aio_context, qemu_gluster_complete_aio, acb);
 }

 static void qemu_gluster_parse_flags(int bdrv_flags, int *open_flags)
@@ -963,6 +974,29 @@ static coroutine_fn int qemu_gluster_co_pwrite_zeroes(BlockDriverState *bs,
    qemu_coroutine_yield();
    return acb.ret;
 }
+
+static inline bool gluster_supports_zerofill(void)
+{
+    return 1;
+}
+
+static inline int qemu_gluster_zerofill(struct glfs_fd *fd, int64_t offset,
+                                        int64_t size)
+{
+    return glfs_zerofill(fd, offset, size);
+}
+
+#else
+static inline bool gluster_supports_zerofill(void)
+{
+    return 0;
+}
+
+static inline int qemu_gluster_zerofill(struct glfs_fd *fd, int64_t offset,
+                                        int64_t size)
+{
+    return 0;
+}
 #endif

 static int qemu_gluster_create(const char *filename,
@@ -972,10 +1006,9 @@ static int qemu_gluster_create(const char *filename,
    struct glfs *glfs;
    struct glfs_fd *fd;
    int ret = 0;
-    PreallocMode prealloc;
+    int prealloc = 0;
    int64_t total_size = 0;
    char *tmp = NULL;
-    Error *local_err = NULL;

    gconf = g_new0(BlockdevOptionsGluster, 1);
    gconf->debug = qemu_opt_get_number_del(opts, GLUSTER_OPT_DEBUG,
@@ -1003,12 +1036,13 @@ static int qemu_gluster_create(const char *filename,
                          BDRV_SECTOR_SIZE);

    tmp = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC);
-    prealloc = qapi_enum_parse(PreallocMode_lookup, tmp,
-                               PREALLOC_MODE__MAX, PREALLOC_MODE_OFF,
-                               &local_err);
-    g_free(tmp);
-    if (local_err) {
-        error_propagate(errp, local_err);
+    if (!tmp || !strcmp(tmp, "off")) {
+        prealloc = 0;
+    } else if (!strcmp(tmp, "full") && gluster_supports_zerofill()) {
+        prealloc = 1;
+    } else {
+        error_setg(errp, "Invalid preallocation mode: '%s'"
+                         " or GlusterFS doesn't support zerofill API", tmp);
        ret = -EINVAL;
        goto out;
    }
@@ -1017,48 +1051,21 @@ static int qemu_gluster_create(const char *filename,
                    O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, S_IRUSR | S_IWUSR);
    if (!fd) {
        ret = -errno;
-        goto out;
-    }
-
-    switch (prealloc) {
-#ifdef CONFIG_GLUSTERFS_FALLOCATE
-    case PREALLOC_MODE_FALLOC:
-        if (glfs_fallocate(fd, 0, 0, total_size)) {
-            error_setg(errp, "Could not preallocate data for the new file");
-            ret = -errno;
-        }
-        break;
-#endif /* CONFIG_GLUSTERFS_FALLOCATE */
-#ifdef CONFIG_GLUSTERFS_ZEROFILL
-    case PREALLOC_MODE_FULL:
+    } else {
        if (!glfs_ftruncate(fd, total_size)) {
-            if (glfs_zerofill(fd, 0, total_size)) {
-                error_setg(errp, "Could not zerofill the new file");
+            if (prealloc && qemu_gluster_zerofill(fd, 0, total_size)) {
                ret = -errno;
            }
        } else {
-            error_setg(errp, "Could not resize file");
            ret = -errno;
        }
-        break;
-#endif /* CONFIG_GLUSTERFS_ZEROFILL */
-    case PREALLOC_MODE_OFF:
-        if (glfs_ftruncate(fd, total_size) != 0) {
-            ret = -errno;
-            error_setg(errp, "Could not resize file");
-        }
-        break;
-    default:
-        ret = -EINVAL;
-        error_setg(errp, "Unsupported preallocation mode: %s",
-                   PreallocMode_lookup[prealloc]);
-        break;
-    }

-    if (glfs_close(fd) != 0) {
-        ret = -errno;
+        if (glfs_close(fd) != 0) {
+            ret = -errno;
+        }
    }
 out:
+    g_free(tmp);
    qapi_free_BlockdevOptionsGluster(gconf);
    glfs_clear_preopened(glfs);
    return ret;
@@ -1095,17 +1102,14 @@ static coroutine_fn int qemu_gluster_co_rw(BlockDriverState *bs,
    return acb.ret;
 }

-static int qemu_gluster_truncate(BlockDriverState *bs, int64_t offset,
-                                 Error **errp)
+static int qemu_gluster_truncate(BlockDriverState *bs, int64_t offset)
 {
    int ret;
    BDRVGlusterState *s = bs->opaque;

    ret = glfs_ftruncate(s->fd, offset);
    if (ret < 0) {
-        ret = -errno;
-        error_setg_errno(errp, -ret, "Failed to truncate file");
-        return ret;
+        return -errno;
    }

    return 0;
@@ -1249,7 +1253,7 @@ static int qemu_gluster_has_zero_init(BlockDriverState *bs)
 * If @start is in a trailing hole or beyond EOF, return -ENXIO.
 * If we can't find out, return a negative errno other than -ENXIO.
 *
- * (Shamefully copied from file-posix.c, only miniscule adaptions.)
+ * (Shamefully copied from raw-posix.c, only miniscule adaptions.)
 */
 static int find_allocation(BlockDriverState *bs, off_t start,
                           off_t *data, off_t *hole)
@@ -1278,14 +1282,7 @@ static int find_allocation(BlockDriverState *bs, off_t start,
    if (offs < 0) {
        return -errno;          /* D3 or D4 */
    }
-
-    if (offs < start) {
-        /* This is not a valid return by lseek().  We are safe to just return
-         * -EIO in this case, and we'll treat it like D4. Unfortunately some
-         *  versions of gluster server will return offs < start, so an assert
-         *  here will unnecessarily abort QEMU. */
-        return -EIO;
-    }
+    assert(offs >= start);

    if (offs > start) {
        /* D2: in hole, next data at offs */
@@ -1317,14 +1314,7 @@ static int find_allocation(BlockDriverState *bs, off_t start,
    if (offs < 0) {
        return -errno;          /* D1 and (H3 or H4) */
    }
-
-    if (offs < start) {
-        /* This is not a valid return by lseek().  We are safe to just return
-         * -EIO in this case, and we'll treat it like H4. Unfortunately some
-         *  versions of gluster server will return offs < start, so an assert
-         *  here will unnecessarily abort QEMU. */
-        return -EIO;
-    }
+    assert(offs >= start);

    if (offs > start) {
        /*
@@ -1359,7 +1349,7 @@ exit:
 * 'nb_sectors' is the max value 'pnum' should be set to.  If nb_sectors goes
 * beyond the end of the disk image it will be clamped.
 *
- * (Based on raw_co_get_block_status() from file-posix.c.)
+ * (Based on raw_co_get_block_status() from raw-posix.c.)
 */
 static int64_t coroutine_fn qemu_gluster_co_get_block_status(
        BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum,
--- a/block/io.c
+++ b/block/io.c
@@ -26,7 +26,6 @@
 #include "trace.h"
 #include "sysemu/block-backend.h"
 #include "block/blockjob.h"
-#include "block/blockjob_int.h"
 #include "block/block_int.h"
 #include "qemu/cutils.h"
 #include "qapi/error.h"
@@ -45,7 +44,7 @@ static void coroutine_fn bdrv_co_do_rw(void *opaque);
 static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
    int64_t offset, int count, BdrvRequestFlags flags);

-void bdrv_parent_drained_begin(BlockDriverState *bs)
+static void bdrv_parent_drained_begin(BlockDriverState *bs)
 {
    BdrvChild *c;

@@ -56,7 +55,7 @@ void bdrv_parent_drained_begin(BlockDriverState *bs)
    }
 }

-void bdrv_parent_drained_end(BlockDriverState *bs)
+static void bdrv_parent_drained_end(BlockDriverState *bs)
 {
    BdrvChild *c;

@@ -130,13 +129,13 @@ void bdrv_refresh_limits(BlockDriverState *bs, Error **errp)
 */
 void bdrv_enable_copy_on_read(BlockDriverState *bs)
 {
-    atomic_inc(&bs->copy_on_read);
+    bs->copy_on_read++;
 }

 void bdrv_disable_copy_on_read(BlockDriverState *bs)
 {
-    int old = atomic_fetch_dec(&bs->copy_on_read);
-    assert(old >= 1);
+    assert(bs->copy_on_read > 0);
+    bs->copy_on_read--;
 }

 /* Check if any requests are in-flight (including throttled requests) */
@@ -159,7 +158,7 @@ bool bdrv_requests_pending(BlockDriverState *bs)

 static bool bdrv_drain_recurse(BlockDriverState *bs)
 {
-    BdrvChild *child, *tmp;
+    BdrvChild *child;
    bool waited;

    waited = BDRV_POLL_WHILE(bs, atomic_read(&bs->in_flight) > 0);
@@ -168,25 +167,8 @@ static bool bdrv_drain_recurse(BlockDriverState *bs)
        bs->drv->bdrv_drain(bs);
    }

-    QLIST_FOREACH_SAFE(child, &bs->children, next, tmp) {
-        BlockDriverState *bs = child->bs;
-        bool in_main_loop =
-            qemu_get_current_aio_context() == qemu_get_aio_context();
-        assert(bs->refcnt > 0);
-        if (in_main_loop) {
-            /* In case the recursive bdrv_drain_recurse processes a
-             * block_job_defer_to_main_loop BH and modifies the graph,
-             * let's hold a reference to bs until we are done.
-             *
-             * IOThread doesn't have such a BH, and it is not safe to call
-             * bdrv_unref without BQL, so skip doing it there.
-             */
-            bdrv_ref(bs);
-        }
-        waited |= bdrv_drain_recurse(bs);
-        if (in_main_loop) {
-            bdrv_unref(bs);
-        }
+    QLIST_FOREACH(child, &bs->children, next) {
+        waited |= bdrv_drain_recurse(child->bs);
    }

    return waited;
@@ -207,7 +189,7 @@ static void bdrv_co_drain_bh_cb(void *opaque)
    bdrv_dec_in_flight(bs);
    bdrv_drained_begin(bs);
    data->done = true;
-    aio_co_wake(co);
+    qemu_coroutine_enter(co);
 }

 static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs)
@@ -241,18 +223,20 @@ void bdrv_drained_begin(BlockDriverState *bs)
        return;
    }

-    if (atomic_fetch_inc(&bs->quiesce_counter) == 0) {
+    if (!bs->quiesce_counter++) {
        aio_disable_external(bdrv_get_aio_context(bs));
        bdrv_parent_drained_begin(bs);
    }

+    bdrv_io_unplugged_begin(bs);
    bdrv_drain_recurse(bs);
+    bdrv_io_unplugged_end(bs);
 }

 void bdrv_drained_end(BlockDriverState *bs)
 {
    assert(bs->quiesce_counter > 0);
-    if (atomic_fetch_dec(&bs->quiesce_counter) > 1) {
+    if (--bs->quiesce_counter > 0) {
        return;
    }

@@ -302,15 +286,23 @@ void bdrv_drain_all_begin(void)
    bool waited = true;
    BlockDriverState *bs;
    BdrvNextIterator it;
+    BlockJob *job = NULL;
    GSList *aio_ctxs = NULL, *ctx;

-    block_job_pause_all();
+    while ((job = block_job_next(job))) {
+        AioContext *aio_context = blk_get_aio_context(job->blk);
+
+        aio_context_acquire(aio_context);
+        block_job_pause(job);
+        aio_context_release(aio_context);
+    }

    for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
        AioContext *aio_context = bdrv_get_aio_context(bs);

        aio_context_acquire(aio_context);
        bdrv_parent_drained_begin(bs);
+        bdrv_io_unplugged_begin(bs);
        aio_disable_external(aio_context);
        aio_context_release(aio_context);

@@ -348,17 +340,25 @@ void bdrv_drain_all_end(void)
 {
    BlockDriverState *bs;
    BdrvNextIterator it;
+    BlockJob *job = NULL;

    for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
        AioContext *aio_context = bdrv_get_aio_context(bs);

        aio_context_acquire(aio_context);
        aio_enable_external(aio_context);
+        bdrv_io_unplugged_end(bs);
        bdrv_parent_drained_end(bs);
        aio_context_release(aio_context);
    }

-    block_job_resume_all();
+    while ((job = block_job_next(job))) {
+        AioContext *aio_context = blk_get_aio_context(job->blk);
+
+        aio_context_acquire(aio_context);
+        block_job_resume(job);
+        aio_context_release(aio_context);
+    }
 }

 void bdrv_drain_all(void)
@@ -375,13 +375,11 @@ void bdrv_drain_all(void)
 static void tracked_request_end(BdrvTrackedRequest *req)
 {
    if (req->serialising) {
-        atomic_dec(&req->bs->serialising_in_flight);
+        req->bs->serialising_in_flight--;
    }

-    qemu_co_mutex_lock(&req->bs->reqs_lock);
    QLIST_REMOVE(req, list);
    qemu_co_queue_restart_all(&req->wait_queue);
-    qemu_co_mutex_unlock(&req->bs->reqs_lock);
 }

 /**
@@ -406,9 +404,7 @@ static void tracked_request_begin(BdrvTrackedRequest *req,

    qemu_co_queue_init(&req->wait_queue);

-    qemu_co_mutex_lock(&bs->reqs_lock);
    QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
-    qemu_co_mutex_unlock(&bs->reqs_lock);
 }

 static void mark_request_serialising(BdrvTrackedRequest *req, uint64_t align)
@@ -418,7 +414,7 @@ static void mark_request_serialising(BdrvTrackedRequest *req, uint64_t align)
                               - overlap_offset;

    if (!req->serialising) {
-        atomic_inc(&req->bs->serialising_in_flight);
+        req->bs->serialising_in_flight++;
        req->serialising = true;
    }

@@ -505,8 +501,7 @@ static void dummy_bh_cb(void *opaque)

 void bdrv_wakeup(BlockDriverState *bs)
 {
-    /* The barrier (or an atomic op) is in the caller.  */
-    if (atomic_read(&bs->wakeup)) {
+    if (bs->wakeup) {
        aio_bh_schedule_oneshot(qemu_get_aio_context(), dummy_bh_cb, NULL);
    }
 }
@@ -524,13 +519,12 @@ static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
    bool retry;
    bool waited = false;

-    if (!atomic_read(&bs->serialising_in_flight)) {
+    if (!bs->serialising_in_flight) {
        return false;
    }

    do {
        retry = false;
-        qemu_co_mutex_lock(&bs->reqs_lock);
        QLIST_FOREACH(req, &bs->tracked_requests, list) {
            if (req == self || (!req->serialising && !self->serialising)) {
                continue;
@@ -549,7 +543,7 @@ static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
                 * (instead of producing a deadlock in the former case). */
                if (!req->waiting_for) {
                    self->waiting_for = req;
-                    qemu_co_queue_wait(&req->wait_queue, &bs->reqs_lock);
+                    qemu_co_queue_wait(&req->wait_queue);
                    self->waiting_for = NULL;
                    retry = true;
                    waited = true;
@@ -557,7 +551,6 @@ static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
                }
            }
        }
-        qemu_co_mutex_unlock(&bs->reqs_lock);
    } while (retry);

    return waited;
@@ -627,7 +620,7 @@ static int bdrv_prwv_co(BdrvChild *child, int64_t offset,
        bdrv_rw_co_entry(&rwco);
    } else {
        co = qemu_coroutine_create(bdrv_rw_co_entry, &rwco);
-        bdrv_coroutine_enter(child->bs, co);
+        qemu_coroutine_enter(co);
        BDRV_POLL_WHILE(child->bs, rwco.ret == NOT_DONE);
    }
    return rwco.ret;
@@ -824,7 +817,7 @@ static void bdrv_co_io_em_complete(void *opaque, int ret)
    CoroutineIOCompletion *co = opaque;

    co->ret = ret;
-    aio_co_wake(co->coroutine);
+    qemu_coroutine_enter(co->coroutine);
 }

 static int coroutine_fn bdrv_driver_preadv(BlockDriverState *bs,
@@ -936,11 +929,9 @@ bdrv_driver_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
    return drv->bdrv_co_pwritev_compressed(bs, offset, bytes, qiov);
 }

-static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child,
+static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
        int64_t offset, unsigned int bytes, QEMUIOVector *qiov)
 {
-    BlockDriverState *bs = child->bs;
-
    /* Perform I/O through a temporary buffer so that users who scribble over
     * their read buffer while the operation is in progress do not end up
     * modifying the image file.  This is critical for zero-copy guest I/O
@@ -956,15 +947,6 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child,
    size_t skip_bytes;
    int ret;

-    /* FIXME We cannot require callers to have write permissions when all they
-     * are doing is a read request. If we did things right, write permissions
-     * would be obtained anyway, but internally by the copy-on-read code. As
-     * long as it is implemented here rather than in a separat filter driver,
-     * the copy-on-read code doesn't have its own BdrvChild, however, for which
-     * it could request permissions. Therefore we have to bypass the permission
-     * system for the moment. */
-    // assert(child->perm & (BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE));
-
    /* Cover entire cluster so no additional backing file I/O is required when
     * allocating cluster in the image file.
     */
@@ -1023,11 +1005,10 @@ err:
 * handles copy on read, zeroing after EOF, and fragmentation of large
 * reads; any other features must be implemented by the caller.
 */
-static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child,
+static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
    BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
    int64_t align, QEMUIOVector *qiov, int flags)
 {
-    BlockDriverState *bs = child->bs;
    int64_t total_bytes, max_bytes;
    int ret = 0;
    uint64_t bytes_remaining = bytes;
@@ -1073,7 +1054,7 @@ static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child,
        }

        if (!ret || pnum != nb_sectors) {
-            ret = bdrv_co_do_copy_on_readv(child, offset, bytes, qiov);
+            ret = bdrv_co_do_copy_on_readv(bs, offset, bytes, qiov);
            goto out;
        }
    }
@@ -1151,7 +1132,7 @@ int coroutine_fn bdrv_co_preadv(BdrvChild *child,
    bdrv_inc_in_flight(bs);

    /* Don't do copy-on-read if we read data before write operation */
-    if (atomic_read(&bs->copy_on_read) && !(flags & BDRV_REQ_NO_SERIALISING)) {
+    if (bs->copy_on_read && !(flags & BDRV_REQ_NO_SERIALISING)) {
        flags |= BDRV_REQ_COPY_ON_READ;
    }

@@ -1181,7 +1162,7 @@ int coroutine_fn bdrv_co_preadv(BdrvChild *child,
    }

    tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_READ);
-    ret = bdrv_aligned_preadv(child, &req, offset, bytes, align,
+    ret = bdrv_aligned_preadv(bs, &req, offset, bytes, align,
                              use_local_qiov ? &local_qiov : qiov,
                              flags);
    tracked_request_end(&req);
@@ -1329,11 +1310,10 @@ fail:
 * Forwards an already correctly aligned write request to the BlockDriver,
 * after possibly fragmenting it.
 */
-static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child,
+static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
    BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
    int64_t align, QEMUIOVector *qiov, int flags)
 {
-    BlockDriverState *bs = child->bs;
    BlockDriver *drv = bs->drv;
    bool waited;
    int ret;
@@ -1356,8 +1336,6 @@ static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child,
    assert(!waited || !req->serialising);
    assert(req->overlap_offset <= offset);
    assert(offset + bytes <= req->overlap_offset + req->overlap_bytes);
-    assert(child->perm & BLK_PERM_WRITE);
-    assert(end_sector <= bs->total_sectors || child->perm & BLK_PERM_RESIZE);

    ret = notifier_with_return_list_notify(&bs->before_write_notifiers, req);

@@ -1408,10 +1386,12 @@ static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child,
    }
    bdrv_debug_event(bs, BLKDBG_PWRITEV_DONE);

-    atomic_inc(&bs->write_gen);
+    ++bs->write_gen;
    bdrv_set_dirty(bs, start_sector, end_sector - start_sector);

-    stat64_max(&bs->wr_highest_offset, offset + bytes);
+    if (bs->wr_highest_offset < offset + bytes) {
+        bs->wr_highest_offset = offset + bytes;
+    }

    if (ret >= 0) {
        bs->total_sectors = MAX(bs->total_sectors, end_sector);
@@ -1421,13 +1401,12 @@ static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child,
    return ret;
 }

-static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child,
+static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs,
                                                int64_t offset,
                                                unsigned int bytes,
                                                BdrvRequestFlags flags,
                                                BdrvTrackedRequest *req)
 {
-    BlockDriverState *bs = child->bs;
    uint8_t *buf = NULL;
    QEMUIOVector local_qiov;
    struct iovec iov;
@@ -1436,7 +1415,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child,
    int ret = 0;

    head_padding_bytes = offset & (align - 1);
-    tail_padding_bytes = (align - (offset + bytes)) & (align - 1);
+    tail_padding_bytes = align - ((offset + bytes) & (align - 1));


    assert(flags & BDRV_REQ_ZERO_WRITE);
@@ -1455,7 +1434,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child,
        mark_request_serialising(req, align);
        wait_serialising_requests(req);
        bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD);
-        ret = bdrv_aligned_preadv(child, req, offset & ~(align - 1), align,
+        ret = bdrv_aligned_preadv(bs, req, offset & ~(align - 1), align,
                                  align, &local_qiov, 0);
        if (ret < 0) {
            goto fail;
@@ -1463,7 +1442,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child,
        bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);

        memset(buf + head_padding_bytes, 0, zero_bytes);
-        ret = bdrv_aligned_pwritev(child, req, offset & ~(align - 1), align,
+        ret = bdrv_aligned_pwritev(bs, req, offset & ~(align - 1), align,
                                   align, &local_qiov,
                                   flags & ~BDRV_REQ_ZERO_WRITE);
        if (ret < 0) {
@@ -1477,7 +1456,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child,
    if (bytes >= align) {
        /* Write the aligned part in the middle. */
        uint64_t aligned_bytes = bytes & ~(align - 1);
-        ret = bdrv_aligned_pwritev(child, req, offset, aligned_bytes, align,
+        ret = bdrv_aligned_pwritev(bs, req, offset, aligned_bytes, align,
                                   NULL, flags);
        if (ret < 0) {
            goto fail;
@@ -1493,7 +1472,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child,
        mark_request_serialising(req, align);
        wait_serialising_requests(req);
        bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL);
-        ret = bdrv_aligned_preadv(child, req, offset, align,
+        ret = bdrv_aligned_preadv(bs, req, offset, align,
                                  align, &local_qiov, 0);
        if (ret < 0) {
            goto fail;
@@ -1501,7 +1480,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child,
        bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);

        memset(buf, 0, bytes);
-        ret = bdrv_aligned_pwritev(child, req, offset, align, align,
+        ret = bdrv_aligned_pwritev(bs, req, offset, align, align,
                                   &local_qiov, flags & ~BDRV_REQ_ZERO_WRITE);
    }
 fail:
@@ -1548,7 +1527,7 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
    tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_WRITE);

    if (!qiov) {
-        ret = bdrv_co_do_zero_pwritev(child, offset, bytes, flags, &req);
+        ret = bdrv_co_do_zero_pwritev(bs, offset, bytes, flags, &req);
        goto out;
    }

@@ -1567,7 +1546,7 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
        qemu_iovec_init_external(&head_qiov, &head_iov, 1);

        bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD);
-        ret = bdrv_aligned_preadv(child, &req, offset & ~(align - 1), align,
+        ret = bdrv_aligned_preadv(bs, &req, offset & ~(align - 1), align,
                                  align, &head_qiov, 0);
        if (ret < 0) {
            goto fail;
@@ -1609,8 +1588,8 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
        qemu_iovec_init_external(&tail_qiov, &tail_iov, 1);

        bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL);
-        ret = bdrv_aligned_preadv(child, &req, (offset + bytes) & ~(align - 1),
-                                  align, align, &tail_qiov, 0);
+        ret = bdrv_aligned_preadv(bs, &req, (offset + bytes) & ~(align - 1), align,
+                                  align, &tail_qiov, 0);
        if (ret < 0) {
            goto fail;
        }
@@ -1628,7 +1607,7 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
        bytes = ROUND_UP(bytes, align);
    }

-    ret = bdrv_aligned_pwritev(child, &req, offset, bytes, align,
+    ret = bdrv_aligned_pwritev(bs, &req, offset, bytes, align,
                               use_local_qiov ? &local_qiov : qiov,
                               flags);

@@ -1776,8 +1755,8 @@ static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,

    if (ret & BDRV_BLOCK_RAW) {
        assert(ret & BDRV_BLOCK_OFFSET_VALID);
-        ret = bdrv_co_get_block_status(*file, ret >> BDRV_SECTOR_BITS,
-                                       *pnum, pnum, file);
+        ret = bdrv_get_block_status(bs->file->bs, ret >> BDRV_SECTOR_BITS,
+                                    *pnum, pnum, file);
        goto out;
    }

@@ -1889,7 +1868,7 @@ int64_t bdrv_get_block_status_above(BlockDriverState *bs,
    } else {
        co = qemu_coroutine_create(bdrv_get_block_status_above_co_entry,
                                   &data);
-        bdrv_coroutine_enter(bs, co);
+        qemu_coroutine_enter(co);
        BDRV_POLL_WHILE(bs, !data.done);
    }
    return data.ret;
@@ -2015,7 +1994,7 @@ bdrv_rw_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos,
        };
        Coroutine *co = qemu_coroutine_create(bdrv_co_rw_vmstate_entry, &data);

-        bdrv_coroutine_enter(bs, co);
+        qemu_coroutine_enter(co);
        while (data.ret == -EINPROGRESS) {
            aio_poll(bdrv_get_aio_context(bs), true);
        }
@@ -2105,11 +2084,6 @@ void bdrv_aio_cancel(BlockAIOCB *acb)
        if (acb->aiocb_info->get_aio_context) {
            aio_poll(acb->aiocb_info->get_aio_context(acb), true);
        } else if (acb->bs) {
-            /* qemu_aio_ref and qemu_aio_unref are not thread-safe, so
-             * assert that we're not using an I/O thread.  Thread-safe
-             * code should use bdrv_aio_cancel_async exclusively.
-             */
-            assert(bdrv_get_aio_context(acb->bs) == qemu_get_aio_context());
            aio_poll(bdrv_get_aio_context(acb->bs), true);
        } else {
            abort();
@@ -2232,7 +2206,7 @@ static BlockAIOCB *bdrv_co_aio_prw_vector(BdrvChild *child,
    acb->is_write = is_write;

    co = qemu_coroutine_create(bdrv_co_do_rw, acb);
-    bdrv_coroutine_enter(child->bs, co);
+    qemu_coroutine_enter(co);

    bdrv_co_maybe_schedule_bh(acb);
    return &acb->common;
@@ -2263,12 +2237,41 @@ BlockAIOCB *bdrv_aio_flush(BlockDriverState *bs,
    acb->req.error = -EINPROGRESS;

    co = qemu_coroutine_create(bdrv_aio_flush_co_entry, acb);
-    bdrv_coroutine_enter(bs, co);
+    qemu_coroutine_enter(co);

    bdrv_co_maybe_schedule_bh(acb);
    return &acb->common;
 }

+void *qemu_aio_get(const AIOCBInfo *aiocb_info, BlockDriverState *bs,
+                   BlockCompletionFunc *cb, void *opaque)
+{
+    BlockAIOCB *acb;
+
+    acb = g_malloc(aiocb_info->aiocb_size);
+    acb->aiocb_info = aiocb_info;
+    acb->bs = bs;
+    acb->cb = cb;
+    acb->opaque = opaque;
+    acb->refcnt = 1;
+    return acb;
+}
+
+void qemu_aio_ref(void *p)
+{
+    BlockAIOCB *acb = p;
+    acb->refcnt++;
+}
+
+void qemu_aio_unref(void *p)
+{
+    BlockAIOCB *acb = p;
+    assert(acb->refcnt > 0);
+    if (--acb->refcnt == 0) {
+        g_free(acb);
+    }
+}
+
 /**************************************************************/
 /* Coroutine block device emulation */

@@ -2287,27 +2290,23 @@ static void coroutine_fn bdrv_flush_co_entry(void *opaque)

 int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
 {
-    int current_gen;
-    int ret = 0;
+    int ret;
+
+    if (!bs || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs) ||
+        bdrv_is_sg(bs)) {
+        return 0;
+    }

    bdrv_inc_in_flight(bs);

-    if (!bdrv_is_inserted(bs) || bdrv_is_read_only(bs) ||
-        bdrv_is_sg(bs)) {
-        goto early_exit;
-    }
-
-    qemu_co_mutex_lock(&bs->reqs_lock);
-    current_gen = atomic_read(&bs->write_gen);
+    int current_gen = bs->write_gen;

    /* Wait until any previous flushes are completed */
    while (bs->active_flush_req) {
-        qemu_co_queue_wait(&bs->flush_queue, &bs->reqs_lock);
+        qemu_co_queue_wait(&bs->flush_queue);
    }

-    /* Flushes reach this point in nondecreasing current_gen order.  */
    bs->active_flush_req = true;
-    qemu_co_mutex_unlock(&bs->reqs_lock);

    /* Write back all layers by calling one driver function */
    if (bs->drv->bdrv_co_flush) {
@@ -2379,14 +2378,10 @@ out:
    if (ret == 0) {
        bs->flushed_gen = current_gen;
    }
-
-    qemu_co_mutex_lock(&bs->reqs_lock);
    bs->active_flush_req = false;
    /* Return value is ignored - it's ok if wait queue is empty */
    qemu_co_queue_next(&bs->flush_queue);
-    qemu_co_mutex_unlock(&bs->reqs_lock);

-early_exit:
    bdrv_dec_in_flight(bs);
    return ret;
 }
@@ -2404,7 +2399,7 @@ int bdrv_flush(BlockDriverState *bs)
        bdrv_flush_co_entry(&flush_co);
    } else {
        co = qemu_coroutine_create(bdrv_flush_co_entry, &flush_co);
-        bdrv_coroutine_enter(bs, co);
+        qemu_coroutine_enter(co);
        BDRV_POLL_WHILE(bs, flush_co.ret == NOT_DONE);
    }

@@ -2528,7 +2523,7 @@ int coroutine_fn bdrv_co_pdiscard(BlockDriverState *bs, int64_t offset,
    }
    ret = 0;
 out:
-    atomic_inc(&bs->write_gen);
+    ++bs->write_gen;
    bdrv_set_dirty(bs, req.offset >> BDRV_SECTOR_BITS,
                   req.bytes >> BDRV_SECTOR_BITS);
    tracked_request_end(&req);
@@ -2551,7 +2546,7 @@ int bdrv_pdiscard(BlockDriverState *bs, int64_t offset, int count)
        bdrv_pdiscard_co_entry(&rwco);
    } else {
        co = qemu_coroutine_create(bdrv_pdiscard_co_entry, &rwco);
-        bdrv_coroutine_enter(bs, co);
+        qemu_coroutine_enter(co);
        BDRV_POLL_WHILE(bs, rwco.ret == NOT_DONE);
    }

@@ -2655,7 +2650,7 @@ void bdrv_io_plug(BlockDriverState *bs)
        bdrv_io_plug(child->bs);
    }

-    if (atomic_fetch_inc(&bs->io_plugged) == 0) {
+    if (bs->io_plugged++ == 0 && bs->io_plug_disabled == 0) {
        BlockDriver *drv = bs->drv;
        if (drv && drv->bdrv_io_plug) {
            drv->bdrv_io_plug(bs);
@@ -2668,7 +2663,7 @@ void bdrv_io_unplug(BlockDriverState *bs)
    BdrvChild *child;

    assert(bs->io_plugged);
-    if (atomic_fetch_dec(&bs->io_plugged) == 1) {
+    if (--bs->io_plugged == 0 && bs->io_plug_disabled == 0) {
        BlockDriver *drv = bs->drv;
        if (drv && drv->bdrv_io_unplug) {
            drv->bdrv_io_unplug(bs);
@@ -2679,3 +2674,36 @@ void bdrv_io_unplug(BlockDriverState *bs)
        bdrv_io_unplug(child->bs);
    }
 }
+
+void bdrv_io_unplugged_begin(BlockDriverState *bs)
+{
+    BdrvChild *child;
+
+    if (bs->io_plug_disabled++ == 0 && bs->io_plugged > 0) {
+        BlockDriver *drv = bs->drv;
+        if (drv && drv->bdrv_io_unplug) {
+            drv->bdrv_io_unplug(bs);
+        }
+    }
+
+    QLIST_FOREACH(child, &bs->children, next) {
+        bdrv_io_unplugged_begin(child->bs);
+    }
+}
+
+void bdrv_io_unplugged_end(BlockDriverState *bs)
+{
+    BdrvChild *child;
+
+    assert(bs->io_plug_disabled);
+    QLIST_FOREACH(child, &bs->children, next) {
+        bdrv_io_unplugged_end(child->bs);
+    }
+
+    if (--bs->io_plug_disabled == 0 && bs->io_plugged > 0) {
+        BlockDriver *drv = bs->drv;
+        if (drv && drv->bdrv_io_plug) {
+            drv->bdrv_io_plug(bs);
+        }
+    }
+}
--- a/block/iscsi-opts.c
+++ b/block/iscsi-opts.c
@@ -1,69 +0,0 @@
-/*
- * QEMU Block driver for iSCSI images (static options)
- *
- * Copyright (c) 2017 Peter Lieven <pl@kamp.de>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-#include "qemu/osdep.h"
-#include "qemu-common.h"
-#include "qemu/config-file.h"
-
-static QemuOptsList qemu_iscsi_opts = {
-    .name = "iscsi",
-    .head = QTAILQ_HEAD_INITIALIZER(qemu_iscsi_opts.head),
-    .desc = {
-        {
-            .name = "user",
-            .type = QEMU_OPT_STRING,
-            .help = "username for CHAP authentication to target",
-        },{
-            .name = "password",
-            .type = QEMU_OPT_STRING,
-            .help = "password for CHAP authentication to target",
-        },{
-            .name = "password-secret",
-            .type = QEMU_OPT_STRING,
-            .help = "ID of the secret providing password for CHAP "
-                    "authentication to target",
-        },{
-            .name = "header-digest",
-            .type = QEMU_OPT_STRING,
-            .help = "HeaderDigest setting. "
-                    "{CRC32C|CRC32C-NONE|NONE-CRC32C|NONE}",
-        },{
-            .name = "initiator-name",
-            .type = QEMU_OPT_STRING,
-            .help = "Initiator iqn name to use when connecting",
-        },{
-            .name = "timeout",
-            .type = QEMU_OPT_NUMBER,
-            .help = "Request timeout in seconds (default 0 = no timeout)",
-        },
-        { /* end of list */ }
-    },
-};
-
-static void iscsi_block_opts_init(void)
-{
-    qemu_add_opts(&qemu_iscsi_opts);
-}
-
-block_init(iscsi_block_opts_init);
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -58,7 +58,6 @@ typedef struct IscsiLun {
    int events;
    QEMUTimer *nop_timer;
    QEMUTimer *event_timer;
-    QemuMutex mutex;
    struct scsi_inquiry_logical_block_provisioning lbp;
    struct scsi_inquiry_block_limits bl;
    unsigned char *zeroblock;
@@ -103,6 +102,7 @@ typedef struct IscsiTask {

 typedef struct IscsiAIOCB {
    BlockAIOCB common;
+    QEMUIOVector *qiov;
    QEMUBH *bh;
    IscsiLun *iscsilun;
    struct scsi_task *task;
@@ -165,9 +165,8 @@ iscsi_schedule_bh(IscsiAIOCB *acb)
 static void iscsi_co_generic_bh_cb(void *opaque)
 {
    struct IscsiTask *iTask = opaque;
-
    iTask->complete = 1;
-    aio_co_wake(iTask->co);
+    qemu_coroutine_enter(iTask->co);
 }

 static void iscsi_retry_timer_expired(void *opaque)
@@ -175,7 +174,7 @@ static void iscsi_retry_timer_expired(void *opaque)
    struct IscsiTask *iTask = opaque;
    iTask->complete = 1;
    if (iTask->co) {
-        aio_co_wake(iTask->co);
+        qemu_coroutine_enter(iTask->co);
    }
 }

@@ -252,7 +251,6 @@ static int iscsi_translate_sense(struct scsi_sense *sense)
    return ret;
 }

-/* Called (via iscsi_service) with QemuMutex held.  */
 static void
 iscsi_co_generic_cb(struct iscsi_context *iscsi, int status,
                        void *command_data, void *opaque)
@@ -353,7 +351,6 @@ static const AIOCBInfo iscsi_aiocb_info = {
 static void iscsi_process_read(void *arg);
 static void iscsi_process_write(void *arg);

-/* Called with QemuMutex held.  */
 static void
 iscsi_set_events(IscsiLun *iscsilun)
 {
@@ -365,7 +362,6 @@ iscsi_set_events(IscsiLun *iscsilun)
                           false,
                           (ev & POLLIN) ? iscsi_process_read : NULL,
                           (ev & POLLOUT) ? iscsi_process_write : NULL,
-                           NULL,
                           iscsilun);
        iscsilun->events = ev;
    }
@@ -397,10 +393,8 @@ iscsi_process_read(void *arg)
    IscsiLun *iscsilun = arg;
    struct iscsi_context *iscsi = iscsilun->iscsi;

-    qemu_mutex_lock(&iscsilun->mutex);
    iscsi_service(iscsi, POLLIN);
    iscsi_set_events(iscsilun);
-    qemu_mutex_unlock(&iscsilun->mutex);
 }

 static void
@@ -409,10 +403,8 @@ iscsi_process_write(void *arg)
    IscsiLun *iscsilun = arg;
    struct iscsi_context *iscsi = iscsilun->iscsi;

-    qemu_mutex_lock(&iscsilun->mutex);
    iscsi_service(iscsi, POLLOUT);
    iscsi_set_events(iscsilun);
-    qemu_mutex_unlock(&iscsilun->mutex);
 }

 static int64_t sector_lun2qemu(int64_t sector, IscsiLun *iscsilun)
@@ -591,7 +583,6 @@ iscsi_co_writev_flags(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
    uint64_t lba;
    uint32_t num_sectors;
    bool fua = flags & BDRV_REQ_FUA;
-    int r = 0;

    if (fua) {
        assert(iscsilun->dpofua);
@@ -607,7 +598,6 @@ iscsi_co_writev_flags(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
    lba = sector_qemu2lun(sector_num, iscsilun);
    num_sectors = sector_qemu2lun(nb_sectors, iscsilun);
    iscsi_co_init_iscsitask(iscsilun, &iTask);
-    qemu_mutex_lock(&iscsilun->mutex);
 retry:
    if (iscsilun->use_16_for_rw) {
 #if LIBISCSI_API_VERSION >= (20160603)
@@ -636,7 +626,6 @@ retry:
    }
 #endif
    if (iTask.task == NULL) {
-        qemu_mutex_unlock(&iscsilun->mutex);
        return -ENOMEM;
    }
 #if LIBISCSI_API_VERSION < (20160603)
@@ -645,9 +634,7 @@ retry:
 #endif
    while (!iTask.complete) {
        iscsi_set_events(iscsilun);
-        qemu_mutex_unlock(&iscsilun->mutex);
        qemu_coroutine_yield();
-        qemu_mutex_lock(&iscsilun->mutex);
    }

    if (iTask.task != NULL) {
@@ -662,15 +649,12 @@ retry:

    if (iTask.status != SCSI_STATUS_GOOD) {
        iscsi_allocmap_set_invalid(iscsilun, sector_num, nb_sectors);
-        r = iTask.err_code;
-        goto out_unlock;
+        return iTask.err_code;
    }

    iscsi_allocmap_set_allocated(iscsilun, sector_num, nb_sectors);

-out_unlock:
-    qemu_mutex_unlock(&iscsilun->mutex);
-    return r;
+    return 0;
 }


@@ -703,21 +687,18 @@ static int64_t coroutine_fn iscsi_co_get_block_status(BlockDriverState *bs,
        goto out;
    }

-    qemu_mutex_lock(&iscsilun->mutex);
 retry:
    if (iscsi_get_lba_status_task(iscsilun->iscsi, iscsilun->lun,
                                  sector_qemu2lun(sector_num, iscsilun),
                                  8 + 16, iscsi_co_generic_cb,
                                  &iTask) == NULL) {
        ret = -ENOMEM;
-        goto out_unlock;
+        goto out;
    }

    while (!iTask.complete) {
        iscsi_set_events(iscsilun);
-        qemu_mutex_unlock(&iscsilun->mutex);
        qemu_coroutine_yield();
-        qemu_mutex_lock(&iscsilun->mutex);
    }

    if (iTask.do_retry) {
@@ -734,20 +715,20 @@ retry:
         * because the device is busy or the cmd is not
         * supported) we pretend all blocks are allocated
         * for backwards compatibility */
-        goto out_unlock;
+        goto out;
    }

    lbas = scsi_datain_unmarshall(iTask.task);
    if (lbas == NULL) {
        ret = -EIO;
-        goto out_unlock;
+        goto out;
    }

    lbasd = &lbas->descriptors[0];

    if (sector_qemu2lun(sector_num, iscsilun) != lbasd->lba) {
        ret = -EIO;
-        goto out_unlock;
+        goto out;
    }

    *pnum = sector_lun2qemu(lbasd->num_blocks, iscsilun);
@@ -769,8 +750,6 @@ retry:
    if (*pnum > nb_sectors) {
        *pnum = nb_sectors;
    }
-out_unlock:
-    qemu_mutex_unlock(&iscsilun->mutex);
 out:
    if (iTask.task != NULL) {
        scsi_free_scsi_task(iTask.task);
@@ -833,7 +812,6 @@ static int coroutine_fn iscsi_co_readv(BlockDriverState *bs,
    num_sectors = sector_qemu2lun(nb_sectors, iscsilun);

    iscsi_co_init_iscsitask(iscsilun, &iTask);
-    qemu_mutex_lock(&iscsilun->mutex);
 retry:
    if (iscsilun->use_16_for_rw) {
 #if LIBISCSI_API_VERSION >= (20160603)
@@ -864,7 +842,6 @@ retry:
    }
 #endif
    if (iTask.task == NULL) {
-        qemu_mutex_unlock(&iscsilun->mutex);
        return -ENOMEM;
    }
 #if LIBISCSI_API_VERSION < (20160603)
@@ -872,9 +849,7 @@ retry:
 #endif
    while (!iTask.complete) {
        iscsi_set_events(iscsilun);
-        qemu_mutex_unlock(&iscsilun->mutex);
        qemu_coroutine_yield();
-        qemu_mutex_lock(&iscsilun->mutex);
    }

    if (iTask.task != NULL) {
@@ -886,7 +861,6 @@ retry:
        iTask.complete = 0;
        goto retry;
    }
-    qemu_mutex_unlock(&iscsilun->mutex);

    if (iTask.status != SCSI_STATUS_GOOD) {
        return iTask.err_code;
@@ -901,19 +875,15 @@ static int coroutine_fn iscsi_co_flush(BlockDriverState *bs)
    struct IscsiTask iTask;

    iscsi_co_init_iscsitask(iscsilun, &iTask);
-    qemu_mutex_lock(&iscsilun->mutex);
 retry:
    if (iscsi_synchronizecache10_task(iscsilun->iscsi, iscsilun->lun, 0, 0, 0,
                                      0, iscsi_co_generic_cb, &iTask) == NULL) {
-        qemu_mutex_unlock(&iscsilun->mutex);
        return -ENOMEM;
    }

    while (!iTask.complete) {
        iscsi_set_events(iscsilun);
-        qemu_mutex_unlock(&iscsilun->mutex);
        qemu_coroutine_yield();
-        qemu_mutex_lock(&iscsilun->mutex);
    }

    if (iTask.task != NULL) {
@@ -925,7 +895,6 @@ retry:
        iTask.complete = 0;
        goto retry;
    }
-    qemu_mutex_unlock(&iscsilun->mutex);

    if (iTask.status != SCSI_STATUS_GOOD) {
        return iTask.err_code;
@@ -935,7 +904,6 @@ retry:
 }

 #ifdef __linux__
-/* Called (via iscsi_service) with QemuMutex held.  */
 static void
 iscsi_aio_ioctl_cb(struct iscsi_context *iscsi, int status,
                     void *command_data, void *opaque)
@@ -1060,7 +1028,6 @@ static BlockAIOCB *iscsi_aio_ioctl(BlockDriverState *bs,
    acb->task->expxferlen = acb->ioh->dxfer_len;

    data.size = 0;
-    qemu_mutex_lock(&iscsilun->mutex);
    if (acb->task->xfer_dir == SCSI_XFER_WRITE) {
        if (acb->ioh->iovec_count == 0) {
            data.data = acb->ioh->dxferp;
@@ -1076,7 +1043,6 @@ static BlockAIOCB *iscsi_aio_ioctl(BlockDriverState *bs,
                                 iscsi_aio_ioctl_cb,
                                 (data.size > 0) ? &data : NULL,
                                 acb) != 0) {
-        qemu_mutex_unlock(&iscsilun->mutex);
        scsi_free_scsi_task(acb->task);
        qemu_aio_unref(acb);
        return NULL;
@@ -1096,7 +1062,6 @@ static BlockAIOCB *iscsi_aio_ioctl(BlockDriverState *bs,
    }

    iscsi_set_events(iscsilun);
-    qemu_mutex_unlock(&iscsilun->mutex);

    return &acb->common;
 }
@@ -1121,7 +1086,6 @@ coroutine_fn iscsi_co_pdiscard(BlockDriverState *bs, int64_t offset, int count)
    IscsiLun *iscsilun = bs->opaque;
    struct IscsiTask iTask;
    struct unmap_list list;
-    int r = 0;

    if (!is_byte_request_lun_aligned(offset, count, iscsilun)) {
        return -ENOTSUP;
@@ -1136,19 +1100,15 @@ coroutine_fn iscsi_co_pdiscard(BlockDriverState *bs, int64_t offset, int count)
    list.num = count / iscsilun->block_size;

    iscsi_co_init_iscsitask(iscsilun, &iTask);
-    qemu_mutex_lock(&iscsilun->mutex);
 retry:
    if (iscsi_unmap_task(iscsilun->iscsi, iscsilun->lun, 0, 0, &list, 1,
                         iscsi_co_generic_cb, &iTask) == NULL) {
-        r = -ENOMEM;
-        goto out_unlock;
+        return -ENOMEM;
    }

    while (!iTask.complete) {
        iscsi_set_events(iscsilun);
-        qemu_mutex_unlock(&iscsilun->mutex);
        qemu_coroutine_yield();
-        qemu_mutex_lock(&iscsilun->mutex);
    }

    if (iTask.task != NULL) {
@@ -1165,20 +1125,17 @@ retry:
        /* the target might fail with a check condition if it
           is not happy with the alignment of the UNMAP request
           we silently fail in this case */
-        goto out_unlock;
+        return 0;
    }

    if (iTask.status != SCSI_STATUS_GOOD) {
-        r = iTask.err_code;
-        goto out_unlock;
+        return iTask.err_code;
    }

    iscsi_allocmap_set_invalid(iscsilun, offset >> BDRV_SECTOR_BITS,
                               count >> BDRV_SECTOR_BITS);

-out_unlock:
-    qemu_mutex_unlock(&iscsilun->mutex);
-    return r;
+    return 0;
 }

 static int
@@ -1190,7 +1147,6 @@ coroutine_fn iscsi_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
    uint64_t lba;
    uint32_t nb_blocks;
    bool use_16_for_ws = iscsilun->use_16_for_rw;
-    int r = 0;

    if (!is_byte_request_lun_aligned(offset, count, iscsilun)) {
        return -ENOTSUP;
@@ -1224,7 +1180,6 @@ coroutine_fn iscsi_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
        }
    }

-    qemu_mutex_lock(&iscsilun->mutex);
    iscsi_co_init_iscsitask(iscsilun, &iTask);
 retry:
    if (use_16_for_ws) {
@@ -1239,15 +1194,12 @@ retry:
                                            0, 0, iscsi_co_generic_cb, &iTask);
    }
    if (iTask.task == NULL) {
-        qemu_mutex_unlock(&iscsilun->mutex);
        return -ENOMEM;
    }

    while (!iTask.complete) {
        iscsi_set_events(iscsilun);
-        qemu_mutex_unlock(&iscsilun->mutex);
        qemu_coroutine_yield();
-        qemu_mutex_lock(&iscsilun->mutex);
    }

    if (iTask.status == SCSI_STATUS_CHECK_CONDITION &&
@@ -1257,8 +1209,7 @@ retry:
        /* WRITE SAME is not supported by the target */
        iscsilun->has_write_same = false;
        scsi_free_scsi_task(iTask.task);
-        r = -ENOTSUP;
-        goto out_unlock;
+        return -ENOTSUP;
    }

    if (iTask.task != NULL) {
@@ -1274,8 +1225,7 @@ retry:
    if (iTask.status != SCSI_STATUS_GOOD) {
        iscsi_allocmap_set_invalid(iscsilun, offset >> BDRV_SECTOR_BITS,
                                   count >> BDRV_SECTOR_BITS);
-        r = iTask.err_code;
-        goto out_unlock;
+        return iTask.err_code;
    }

    if (flags & BDRV_REQ_MAY_UNMAP) {
@@ -1286,19 +1236,32 @@ retry:
                                     count >> BDRV_SECTOR_BITS);
    }

-out_unlock:
-    qemu_mutex_unlock(&iscsilun->mutex);
-    return r;
+    return 0;
 }

-static void apply_chap(struct iscsi_context *iscsi, QemuOpts *opts,
+static void parse_chap(struct iscsi_context *iscsi, const char *target,
                       Error **errp)
 {
+    QemuOptsList *list;
+    QemuOpts *opts;
    const char *user = NULL;
    const char *password = NULL;
    const char *secretid;
    char *secret = NULL;

+    list = qemu_find_opts("iscsi");
+    if (!list) {
+        return;
+    }
+
+    opts = qemu_opts_find(list, target);
+    if (opts == NULL) {
+        opts = QTAILQ_FIRST(&list->head);
+        if (!opts) {
+            return;
+        }
+    }
+
    user = qemu_opt_get(opts, "user");
    if (!user) {
        return;
@@ -1329,36 +1292,64 @@ static void apply_chap(struct iscsi_context *iscsi, QemuOpts *opts,
    g_free(secret);
 }

-static void apply_header_digest(struct iscsi_context *iscsi, QemuOpts *opts,
+static void parse_header_digest(struct iscsi_context *iscsi, const char *target,
                                Error **errp)
 {
+    QemuOptsList *list;
+    QemuOpts *opts;
    const char *digest = NULL;

+    list = qemu_find_opts("iscsi");
+    if (!list) {
+        return;
+    }
+
+    opts = qemu_opts_find(list, target);
+    if (opts == NULL) {
+        opts = QTAILQ_FIRST(&list->head);
+        if (!opts) {
+            return;
+        }
+    }
+
    digest = qemu_opt_get(opts, "header-digest");
    if (!digest) {
-        iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE_CRC32C);
-    } else if (!strcmp(digest, "crc32c")) {
+        return;
+    }
+
+    if (!strcmp(digest, "CRC32C")) {
        iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_CRC32C);
-    } else if (!strcmp(digest, "none")) {
+    } else if (!strcmp(digest, "NONE")) {
        iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE);
-    } else if (!strcmp(digest, "crc32c-none")) {
+    } else if (!strcmp(digest, "CRC32C-NONE")) {
        iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_CRC32C_NONE);
-    } else if (!strcmp(digest, "none-crc32c")) {
+    } else if (!strcmp(digest, "NONE-CRC32C")) {
        iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE_CRC32C);
    } else {
        error_setg(errp, "Invalid header-digest setting : %s", digest);
    }
 }

-static char *get_initiator_name(QemuOpts *opts)
+static char *parse_initiator_name(const char *target)
 {
+    QemuOptsList *list;
+    QemuOpts *opts;
    const char *name;
    char *iscsi_name;
    UuidInfo *uuid_info;

-    name = qemu_opt_get(opts, "initiator-name");
-    if (name) {
-        return g_strdup(name);
+    list = qemu_find_opts("iscsi");
+    if (list) {
+        opts = qemu_opts_find(list, target);
+        if (!opts) {
+            opts = QTAILQ_FIRST(&list->head);
+        }
+        if (opts) {
+            name = qemu_opt_get(opts, "initiator-name");
+            if (name) {
+                return g_strdup(name);
+            }
+        }
    }

    uuid_info = qmp_query_uuid(NULL);
@@ -1373,24 +1364,43 @@ static char *get_initiator_name(QemuOpts *opts)
    return iscsi_name;
 }

+static int parse_timeout(const char *target)
+{
+    QemuOptsList *list;
+    QemuOpts *opts;
+    const char *timeout;
+
+    list = qemu_find_opts("iscsi");
+    if (list) {
+        opts = qemu_opts_find(list, target);
+        if (!opts) {
+            opts = QTAILQ_FIRST(&list->head);
+        }
+        if (opts) {
+            timeout = qemu_opt_get(opts, "timeout");
+            if (timeout) {
+                return atoi(timeout);
+            }
+        }
+    }
+
+    return 0;
+}
+
 static void iscsi_nop_timed_event(void *opaque)
 {
    IscsiLun *iscsilun = opaque;

-    qemu_mutex_lock(&iscsilun->mutex);
    if (iscsi_get_nops_in_flight(iscsilun->iscsi) >= MAX_NOP_FAILURES) {
        error_report("iSCSI: NOP timeout. Reconnecting...");
        iscsilun->request_timed_out = true;
    } else if (iscsi_nop_out_async(iscsilun->iscsi, NULL, NULL, 0, NULL) != 0) {
        error_report("iSCSI: failed to sent NOP-Out. Disabling NOP messages.");
-        goto out;
+        return;
    }

    timer_mod(iscsilun->nop_timer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL);
    iscsi_set_events(iscsilun);
-
-out:
-    qemu_mutex_unlock(&iscsilun->mutex);
 }

 static void iscsi_readcapacity_sync(IscsiLun *iscsilun, Error **errp)
@@ -1463,6 +1473,20 @@ static void iscsi_readcapacity_sync(IscsiLun *iscsilun, Error **errp)
    }
 }

+/* TODO Convert to fine grained options */
+static QemuOptsList runtime_opts = {
+    .name = "iscsi",
+    .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
+    .desc = {
+        {
+            .name = "filename",
+            .type = QEMU_OPT_STRING,
+            .help = "URL to the iscsi image",
+        },
+        { /* end of list */ }
+    },
+};
+
 static struct scsi_task *iscsi_do_inquiry(struct iscsi_context *iscsi, int lun,
                                          int evpd, int pc, void **inq, Error **errp)
 {
@@ -1506,7 +1530,7 @@ static void iscsi_detach_aio_context(BlockDriverState *bs)
    IscsiLun *iscsilun = bs->opaque;

    aio_set_fd_handler(iscsilun->aio_context, iscsi_get_fd(iscsilun->iscsi),
-                       false, NULL, NULL, NULL, NULL);
+                       false, NULL, NULL, NULL);
    iscsilun->events = 0;

    if (iscsilun->nop_timer) {
@@ -1580,197 +1604,24 @@ out:
    }
 }

-static void iscsi_parse_iscsi_option(const char *target, QDict *options)
-{
-    QemuOptsList *list;
-    QemuOpts *opts;
-    const char *user, *password, *password_secret, *initiator_name,
-               *header_digest, *timeout;
-
-    list = qemu_find_opts("iscsi");
-    if (!list) {
-        return;
-    }
-
-    opts = qemu_opts_find(list, target);
-    if (opts == NULL) {
-        opts = QTAILQ_FIRST(&list->head);
-        if (!opts) {
-            return;
-        }
-    }
-
-    user = qemu_opt_get(opts, "user");
-    if (user) {
-        qdict_set_default_str(options, "user", user);
-    }
-
-    password = qemu_opt_get(opts, "password");
-    if (password) {
-        qdict_set_default_str(options, "password", password);
-    }
-
-    password_secret = qemu_opt_get(opts, "password-secret");
-    if (password_secret) {
-        qdict_set_default_str(options, "password-secret", password_secret);
-    }
-
-    initiator_name = qemu_opt_get(opts, "initiator-name");
-    if (initiator_name) {
-        qdict_set_default_str(options, "initiator-name", initiator_name);
-    }
-
-    header_digest = qemu_opt_get(opts, "header-digest");
-    if (header_digest) {
-        /* -iscsi takes upper case values, but QAPI only supports lower case
-         * enum constant names, so we have to convert here. */
-        char *qapi_value = g_ascii_strdown(header_digest, -1);
-        qdict_set_default_str(options, "header-digest", qapi_value);
-        g_free(qapi_value);
-    }
-
-    timeout = qemu_opt_get(opts, "timeout");
-    if (timeout) {
-        qdict_set_default_str(options, "timeout", timeout);
-    }
-}
-
 /*
 * We support iscsi url's on the form
 * iscsi://[<username>%<password>@]<host>[:<port>]/<targetname>/<lun>
 */
-static void iscsi_parse_filename(const char *filename, QDict *options,
-                                 Error **errp)
-{
-    struct iscsi_url *iscsi_url;
-    const char *transport_name;
-    char *lun_str;
-
-    iscsi_url = iscsi_parse_full_url(NULL, filename);
-    if (iscsi_url == NULL) {
-        error_setg(errp, "Failed to parse URL : %s", filename);
-        return;
-    }
-
-#if LIBISCSI_API_VERSION >= (20160603)
-    switch (iscsi_url->transport) {
-    case TCP_TRANSPORT:
-        transport_name = "tcp";
-        break;
-    case ISER_TRANSPORT:
-        transport_name = "iser";
-        break;
-    default:
-        error_setg(errp, "Unknown transport type (%d)",
-                   iscsi_url->transport);
-        return;
-    }
-#else
-    transport_name = "tcp";
-#endif
-
-    qdict_set_default_str(options, "transport", transport_name);
-    qdict_set_default_str(options, "portal", iscsi_url->portal);
-    qdict_set_default_str(options, "target", iscsi_url->target);
-
-    lun_str = g_strdup_printf("%d", iscsi_url->lun);
-    qdict_set_default_str(options, "lun", lun_str);
-    g_free(lun_str);
-
-    /* User/password from -iscsi take precedence over those from the URL */
-    iscsi_parse_iscsi_option(iscsi_url->target, options);
-
-    if (iscsi_url->user[0] != '\0') {
-        qdict_set_default_str(options, "user", iscsi_url->user);
-        qdict_set_default_str(options, "password", iscsi_url->passwd);
-    }
-
-    iscsi_destroy_url(iscsi_url);
-}
-
-static QemuOptsList runtime_opts = {
-    .name = "iscsi",
-    .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
-    .desc = {
-        {
-            .name = "transport",
-            .type = QEMU_OPT_STRING,
-        },
-        {
-            .name = "portal",
-            .type = QEMU_OPT_STRING,
-        },
-        {
-            .name = "target",
-            .type = QEMU_OPT_STRING,
-        },
-        {
-            .name = "user",
-            .type = QEMU_OPT_STRING,
-        },
-        {
-            .name = "password",
-            .type = QEMU_OPT_STRING,
-        },
-        {
-            .name = "password-secret",
-            .type = QEMU_OPT_STRING,
-        },
-        {
-            .name = "lun",
-            .type = QEMU_OPT_NUMBER,
-        },
-        {
-            .name = "initiator-name",
-            .type = QEMU_OPT_STRING,
-        },
-        {
-            .name = "header-digest",
-            .type = QEMU_OPT_STRING,
-        },
-        {
-            .name = "timeout",
-            .type = QEMU_OPT_NUMBER,
-        },
-        {
-            .name = "filename",
-            .type = QEMU_OPT_STRING,
-        },
-        { /* end of list */ }
-    },
-};
-
 static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
                      Error **errp)
 {
    IscsiLun *iscsilun = bs->opaque;
    struct iscsi_context *iscsi = NULL;
+    struct iscsi_url *iscsi_url = NULL;
    struct scsi_task *task = NULL;
    struct scsi_inquiry_standard *inq = NULL;
    struct scsi_inquiry_supported_pages *inq_vpd;
    char *initiator_name = NULL;
    QemuOpts *opts;
    Error *local_err = NULL;
-    const char *transport_name, *portal, *target, *filename;
-#if LIBISCSI_API_VERSION >= (20160603)
-    enum iscsi_transport_type transport;
-#endif
-    int i, ret = 0, timeout = 0, lun;
-
-    /* If we are given a filename, parse the filename, with precedence given to
-     * filename encoded options */
-    filename = qdict_get_try_str(options, "filename");
-    if (filename) {
-        error_report("Warning: 'filename' option specified. "
-                      "This is an unsupported option, and may be deprecated "
-                      "in the future");
-        iscsi_parse_filename(filename, options, &local_err);
-        if (local_err) {
-            ret = -EINVAL;
-            error_propagate(errp, local_err);
-            goto exit;
-        }
-    }
+    const char *filename;
+    int i, ret = 0, timeout = 0;

    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
    qemu_opts_absorb_qdict(opts, options, &local_err);
@@ -1780,34 +1631,18 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
        goto out;
    }

-    transport_name = qemu_opt_get(opts, "transport");
-    portal = qemu_opt_get(opts, "portal");
-    target = qemu_opt_get(opts, "target");
-    lun = qemu_opt_get_number(opts, "lun", 0);
+    filename = qemu_opt_get(opts, "filename");

-    if (!transport_name || !portal || !target) {
-        error_setg(errp, "Need all of transport, portal and target options");
-        ret = -EINVAL;
-        goto out;
-    }
-
-    if (!strcmp(transport_name, "tcp")) {
-#if LIBISCSI_API_VERSION >= (20160603)
-        transport = TCP_TRANSPORT;
-    } else if (!strcmp(transport_name, "iser")) {
-        transport = ISER_TRANSPORT;
-#else
-        /* TCP is what older libiscsi versions always use */
-#endif
-    } else {
-        error_setg(errp, "Unknown transport: %s", transport_name);
+    iscsi_url = iscsi_parse_full_url(iscsi, filename);
+    if (iscsi_url == NULL) {
+        error_setg(errp, "Failed to parse URL : %s", filename);
        ret = -EINVAL;
        goto out;
    }

    memset(iscsilun, 0, sizeof(IscsiLun));

-    initiator_name = get_initiator_name(opts);
+    initiator_name = parse_initiator_name(iscsi_url->target);

    iscsi = iscsi_create_context(initiator_name);
    if (iscsi == NULL) {
@@ -1816,20 +1651,30 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
        goto out;
    }
 #if LIBISCSI_API_VERSION >= (20160603)
-    if (iscsi_init_transport(iscsi, transport)) {
+    if (iscsi_init_transport(iscsi, iscsi_url->transport)) {
        error_setg(errp, ("Error initializing transport."));
        ret = -EINVAL;
        goto out;
    }
 #endif
-    if (iscsi_set_targetname(iscsi, target)) {
+    if (iscsi_set_targetname(iscsi, iscsi_url->target)) {
        error_setg(errp, "iSCSI: Failed to set target name.");
        ret = -EINVAL;
        goto out;
    }

+    if (iscsi_url->user[0] != '\0') {
+        ret = iscsi_set_initiator_username_pwd(iscsi, iscsi_url->user,
+                                              iscsi_url->passwd);
+        if (ret != 0) {
+            error_setg(errp, "Failed to set initiator username and password");
+            ret = -EINVAL;
+            goto out;
+        }
+    }
+
    /* check if we got CHAP username/password via the options */
-    apply_chap(iscsi, opts, &local_err);
+    parse_chap(iscsi, iscsi_url->target, &local_err);
    if (local_err != NULL) {
        error_propagate(errp, local_err);
        ret = -EINVAL;
@@ -1842,8 +1687,10 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
        goto out;
    }

+    iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE_CRC32C);
+
    /* check if we got HEADER_DIGEST via the options */
-    apply_header_digest(iscsi, opts, &local_err);
+    parse_header_digest(iscsi, iscsi_url->target, &local_err);
    if (local_err != NULL) {
        error_propagate(errp, local_err);
        ret = -EINVAL;
@@ -1851,7 +1698,7 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
    }

    /* timeout handling is broken in libiscsi before 1.15.0 */
-    timeout = qemu_opt_get_number(opts, "timeout", 0);
+    timeout = parse_timeout(iscsi_url->target);
 #if LIBISCSI_API_VERSION >= 20150621
    iscsi_set_timeout(iscsi, timeout);
 #else
@@ -1860,7 +1707,7 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
    }
 #endif

-    if (iscsi_full_connect_sync(iscsi, portal, lun) != 0) {
+    if (iscsi_full_connect_sync(iscsi, iscsi_url->portal, iscsi_url->lun) != 0) {
        error_setg(errp, "iSCSI: Failed to connect to LUN : %s",
            iscsi_get_error(iscsi));
        ret = -EINVAL;
@@ -1869,7 +1716,7 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,

    iscsilun->iscsi = iscsi;
    iscsilun->aio_context = bdrv_get_aio_context(bs);
-    iscsilun->lun = lun;
+    iscsilun->lun   = iscsi_url->lun;
    iscsilun->has_write_same = true;

    task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 0, 0,
@@ -1955,7 +1802,6 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
    scsi_free_scsi_task(task);
    task = NULL;

-    qemu_mutex_init(&iscsilun->mutex);
    iscsi_attach_aio_context(bs, iscsilun->aio_context);

    /* Guess the internal cluster (page) size of the iscsi target by the means
@@ -1973,6 +1819,9 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
 out:
    qemu_opts_del(opts);
    g_free(initiator_name);
+    if (iscsi_url != NULL) {
+        iscsi_destroy_url(iscsi_url);
+    }
    if (task != NULL) {
        scsi_free_scsi_task(task);
    }
@@ -1986,7 +1835,6 @@ out:
        }
        memset(iscsilun, 0, sizeof(IscsiLun));
    }
-exit:
    return ret;
 }

@@ -2002,7 +1850,6 @@ static void iscsi_close(BlockDriverState *bs)
    iscsi_destroy_context(iscsi);
    g_free(iscsilun->zeroblock);
    iscsi_allocmap_free(iscsilun);
-    qemu_mutex_destroy(&iscsilun->mutex);
    memset(iscsilun, 0, sizeof(IscsiLun));
 }

@@ -2079,24 +1926,22 @@ static void iscsi_reopen_commit(BDRVReopenState *reopen_state)
    }
 }

-static int iscsi_truncate(BlockDriverState *bs, int64_t offset, Error **errp)
+static int iscsi_truncate(BlockDriverState *bs, int64_t offset)
 {
    IscsiLun *iscsilun = bs->opaque;
    Error *local_err = NULL;

    if (iscsilun->type != TYPE_DISK) {
-        error_setg(errp, "Cannot resize non-disk iSCSI devices");
        return -ENOTSUP;
    }

    iscsi_readcapacity_sync(iscsilun, &local_err);
    if (local_err != NULL) {
-        error_propagate(errp, local_err);
+        error_free(local_err);
        return -EIO;
    }

    if (offset > iscsi_getlength(bs)) {
-        error_setg(errp, "Cannot grow iSCSI devices");
        return -EINVAL;
    }

@@ -2114,7 +1959,6 @@ static int iscsi_create(const char *filename, QemuOpts *opts, Error **errp)
    BlockDriverState *bs;
    IscsiLun *iscsilun = NULL;
    QDict *bs_options;
-    Error *local_err = NULL;

    bs = bdrv_new();

@@ -2125,13 +1969,8 @@ static int iscsi_create(const char *filename, QemuOpts *opts, Error **errp)
    iscsilun = bs->opaque;

    bs_options = qdict_new();
-    iscsi_parse_filename(filename, bs_options, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        ret = -EINVAL;
-    } else {
-        ret = iscsi_open(bs, bs_options, 0, NULL);
-    }
+    qdict_put(bs_options, "filename", qstring_from_str(filename));
+    ret = iscsi_open(bs, bs_options, 0, NULL);
    QDECREF(bs_options);

    if (ret != 0) {
@@ -2191,15 +2030,15 @@ static BlockDriver bdrv_iscsi = {
    .format_name     = "iscsi",
    .protocol_name   = "iscsi",

-    .instance_size          = sizeof(IscsiLun),
-    .bdrv_parse_filename    = iscsi_parse_filename,
-    .bdrv_file_open         = iscsi_open,
-    .bdrv_close             = iscsi_close,
-    .bdrv_create            = iscsi_create,
-    .create_opts            = &iscsi_create_opts,
-    .bdrv_reopen_prepare    = iscsi_reopen_prepare,
-    .bdrv_reopen_commit     = iscsi_reopen_commit,
-    .bdrv_invalidate_cache  = iscsi_invalidate_cache,
+    .instance_size   = sizeof(IscsiLun),
+    .bdrv_needs_filename = true,
+    .bdrv_file_open  = iscsi_open,
+    .bdrv_close      = iscsi_close,
+    .bdrv_create     = iscsi_create,
+    .create_opts     = &iscsi_create_opts,
+    .bdrv_reopen_prepare   = iscsi_reopen_prepare,
+    .bdrv_reopen_commit    = iscsi_reopen_commit,
+    .bdrv_invalidate_cache = iscsi_invalidate_cache,

    .bdrv_getlength  = iscsi_getlength,
    .bdrv_get_info   = iscsi_get_info,
@@ -2226,15 +2065,15 @@ static BlockDriver bdrv_iser = {
    .format_name     = "iser",
    .protocol_name   = "iser",

-    .instance_size          = sizeof(IscsiLun),
-    .bdrv_parse_filename    = iscsi_parse_filename,
-    .bdrv_file_open         = iscsi_open,
-    .bdrv_close             = iscsi_close,
-    .bdrv_create            = iscsi_create,
-    .create_opts            = &iscsi_create_opts,
-    .bdrv_reopen_prepare    = iscsi_reopen_prepare,
-    .bdrv_reopen_commit     = iscsi_reopen_commit,
-    .bdrv_invalidate_cache  = iscsi_invalidate_cache,
+    .instance_size   = sizeof(IscsiLun),
+    .bdrv_needs_filename = true,
+    .bdrv_file_open  = iscsi_open,
+    .bdrv_close      = iscsi_close,
+    .bdrv_create     = iscsi_create,
+    .create_opts     = &iscsi_create_opts,
+    .bdrv_reopen_prepare   = iscsi_reopen_prepare,
+    .bdrv_reopen_commit    = iscsi_reopen_commit,
+    .bdrv_invalidate_cache = iscsi_invalidate_cache,

    .bdrv_getlength  = iscsi_getlength,
    .bdrv_get_info   = iscsi_get_info,
--- a/block/linux-aio.c
+++ b/block/linux-aio.c
@@ -54,10 +54,10 @@ struct LinuxAioState {
    io_context_t ctx;
    EventNotifier e;

-    /* io queue for submit at batch.  Protected by AioContext lock. */
+    /* io queue for submit at batch */
    LaioQueue io_q;

-    /* I/O completion processing.  Only runs in I/O thread.  */
+    /* I/O completion processing */
    QEMUBH *completion_bh;
    int event_idx;
    int event_max;
@@ -100,7 +100,7 @@ static void qemu_laio_process_completion(struct qemu_laiocb *laiocb)
         * that!
         */
        if (!qemu_coroutine_entered(laiocb->co)) {
-            aio_co_wake(laiocb->co);
+            qemu_coroutine_enter(laiocb->co);
        }
    } else {
        laiocb->common.cb(laiocb->common.opaque, ret);
@@ -234,12 +234,9 @@ static void qemu_laio_process_completions(LinuxAioState *s)
 static void qemu_laio_process_completions_and_submit(LinuxAioState *s)
 {
    qemu_laio_process_completions(s);
-
-    aio_context_acquire(s->aio_context);
    if (!s->io_q.plugged && !QSIMPLEQ_EMPTY(&s->io_q.pending)) {
        ioq_submit(s);
    }
-    aio_context_release(s->aio_context);
 }

 static void qemu_laio_completion_bh(void *opaque)
@@ -258,20 +255,6 @@ static void qemu_laio_completion_cb(EventNotifier *e)
    }
 }

-static bool qemu_laio_poll_cb(void *opaque)
-{
-    EventNotifier *e = opaque;
-    LinuxAioState *s = container_of(e, LinuxAioState, e);
-    struct io_event *events;
-
-    if (!io_getevents_peek(s->ctx, &events)) {
-        return false;
-    }
-
-    qemu_laio_process_completions_and_submit(s);
-    return true;
-}
-
 static void laio_cancel(BlockAIOCB *blockacb)
 {
    struct qemu_laiocb *laiocb = (struct qemu_laiocb *)blockacb;
@@ -456,9 +439,8 @@ BlockAIOCB *laio_submit(BlockDriverState *bs, LinuxAioState *s, int fd,

 void laio_detach_aio_context(LinuxAioState *s, AioContext *old_context)
 {
-    aio_set_event_notifier(old_context, &s->e, false, NULL, NULL);
+    aio_set_event_notifier(old_context, &s->e, false, NULL);
    qemu_bh_delete(s->completion_bh);
-    s->aio_context = NULL;
 }

 void laio_attach_aio_context(LinuxAioState *s, AioContext *new_context)
@@ -466,8 +448,7 @@ void laio_attach_aio_context(LinuxAioState *s, AioContext *new_context)
    s->aio_context = new_context;
    s->completion_bh = aio_bh_new(new_context, qemu_laio_completion_bh, s);
    aio_set_event_notifier(new_context, &s->e, false,
-                           qemu_laio_completion_cb,
-                           qemu_laio_poll_cb);
+                           qemu_laio_completion_cb);
 }

 LinuxAioState *laio_init(void)
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -12,7 +12,6 @@
 */

 #include "qemu/osdep.h"
-#include "qemu/cutils.h"
 #include "trace.h"
 #include "block/blockjob_int.h"
 #include "block/block_int.h"
@@ -39,10 +38,7 @@ typedef struct MirrorBlockJob {
    BlockJob common;
    RateLimit limit;
    BlockBackend *target;
-    BlockDriverState *mirror_top_bs;
-    BlockDriverState *source;
    BlockDriverState *base;
-
    /* The name of the graph node to replace */
    char *replaces;
    /* The BDS to replace */
@@ -73,7 +69,6 @@ typedef struct MirrorBlockJob {
    bool waiting_for_io;
    int target_cluster_sectors;
    int max_iov;
-    bool initial_zeroing_ongoing;
 } MirrorBlockJob;

 typedef struct MirrorOp {
@@ -122,10 +117,9 @@ static void mirror_iteration_done(MirrorOp *op, int ret)
        if (s->cow_bitmap) {
            bitmap_set(s->cow_bitmap, chunk_num, nb_chunks);
        }
-        if (!s->initial_zeroing_ongoing) {
-            s->common.offset += (uint64_t)op->nb_sectors * BDRV_SECTOR_SIZE;
-        }
+        s->common.offset += (uint64_t)op->nb_sectors * BDRV_SECTOR_SIZE;
    }
+
    qemu_iovec_destroy(&op->qiov);
    g_free(op);

@@ -138,8 +132,6 @@ static void mirror_write_complete(void *opaque, int ret)
 {
    MirrorOp *op = opaque;
    MirrorBlockJob *s = op->s;
-
-    aio_context_acquire(blk_get_aio_context(s->common.blk));
    if (ret < 0) {
        BlockErrorAction action;

@@ -150,15 +142,12 @@ static void mirror_write_complete(void *opaque, int ret)
        }
    }
    mirror_iteration_done(op, ret);
-    aio_context_release(blk_get_aio_context(s->common.blk));
 }

 static void mirror_read_complete(void *opaque, int ret)
 {
    MirrorOp *op = opaque;
    MirrorBlockJob *s = op->s;
-
-    aio_context_acquire(blk_get_aio_context(s->common.blk));
    if (ret < 0) {
        BlockErrorAction action;

@@ -169,11 +158,10 @@ static void mirror_read_complete(void *opaque, int ret)
        }

        mirror_iteration_done(op, ret);
-    } else {
-        blk_aio_pwritev(s->target, op->sector_num * BDRV_SECTOR_SIZE, &op->qiov,
-                        0, mirror_write_complete, op);
+        return;
    }
-    aio_context_release(blk_get_aio_context(s->common.blk));
+    blk_aio_pwritev(s->target, op->sector_num * BDRV_SECTOR_SIZE, &op->qiov,
+                    0, mirror_write_complete, op);
 }

 static inline void mirror_clip_sectors(MirrorBlockJob *s,
@@ -331,7 +319,7 @@ static void mirror_do_zero_or_discard(MirrorBlockJob *s,

 static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
 {
-    BlockDriverState *source = s->source;
+    BlockDriverState *source = blk_bs(s->common.blk);
    int64_t sector_num, first_chunk;
    uint64_t delay_ns = 0;
    /* At least the first dirty chunk is mirrored in one iteration. */
@@ -342,7 +330,6 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
    int max_io_sectors = MAX((s->buf_size >> BDRV_SECTOR_BITS) / MAX_IN_FLIGHT,
                             MAX_IO_SECTORS);

-    bdrv_dirty_bitmap_lock(s->dirty_bitmap);
    sector_num = bdrv_dirty_iter_next(s->dbi);
    if (sector_num < 0) {
        bdrv_set_dirty_iter(s->dbi, 0);
@@ -350,7 +337,6 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
        trace_mirror_restart_iter(s, bdrv_get_dirty_count(s->dirty_bitmap));
        assert(sector_num >= 0);
    }
-    bdrv_dirty_bitmap_unlock(s->dirty_bitmap);

    first_chunk = sector_num / sectors_per_chunk;
    while (test_bit(first_chunk, s->in_flight_bitmap)) {
@@ -362,13 +348,12 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)

    /* Find the number of consective dirty chunks following the first dirty
     * one, and wait for in flight requests in them. */
-    bdrv_dirty_bitmap_lock(s->dirty_bitmap);
    while (nb_chunks * sectors_per_chunk < (s->buf_size >> BDRV_SECTOR_BITS)) {
        int64_t next_dirty;
        int64_t next_sector = sector_num + nb_chunks * sectors_per_chunk;
        int64_t next_chunk = next_sector / sectors_per_chunk;
        if (next_sector >= end ||
-            !bdrv_get_dirty_locked(source, s->dirty_bitmap, next_sector)) {
+            !bdrv_get_dirty(source, s->dirty_bitmap, next_sector)) {
            break;
        }
        if (test_bit(next_chunk, s->in_flight_bitmap)) {
@@ -389,13 +374,11 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
     * calling bdrv_get_block_status_above could yield - if some blocks are
     * marked dirty in this window, we need to know.
     */
-    bdrv_reset_dirty_bitmap_locked(s->dirty_bitmap, sector_num,
-                                  nb_chunks * sectors_per_chunk);
-    bdrv_dirty_bitmap_unlock(s->dirty_bitmap);
-
+    bdrv_reset_dirty_bitmap(s->dirty_bitmap, sector_num,
+                            nb_chunks * sectors_per_chunk);
    bitmap_set(s->in_flight_bitmap, sector_num / sectors_per_chunk, nb_chunks);
    while (nb_chunks > 0 && sector_num < end) {
-        int64_t ret;
+        int ret;
        int io_sectors, io_sectors_acct;
        BlockDriverState *file;
        enum MirrorMethod {
@@ -506,44 +489,12 @@ static void mirror_exit(BlockJob *job, void *opaque)
    MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
    MirrorExitData *data = opaque;
    AioContext *replace_aio_context = NULL;
-    BlockDriverState *src = s->source;
+    BlockDriverState *src = blk_bs(s->common.blk);
    BlockDriverState *target_bs = blk_bs(s->target);
-    BlockDriverState *mirror_top_bs = s->mirror_top_bs;
-    Error *local_err = NULL;
-
-    bdrv_release_dirty_bitmap(src, s->dirty_bitmap);

    /* Make sure that the source BDS doesn't go away before we called
     * block_job_completed(). */
    bdrv_ref(src);
-    bdrv_ref(mirror_top_bs);
-    bdrv_ref(target_bs);
-
-    /* Remove target parent that still uses BLK_PERM_WRITE/RESIZE before
-     * inserting target_bs at s->to_replace, where we might not be able to get
-     * these permissions.
-     *
-     * Note that blk_unref() alone doesn't necessarily drop permissions because
-     * we might be running nested inside mirror_drain(), which takes an extra
-     * reference, so use an explicit blk_set_perm() first. */
-    blk_set_perm(s->target, 0, BLK_PERM_ALL, &error_abort);
-    blk_unref(s->target);
-    s->target = NULL;
-
-    /* We don't access the source any more. Dropping any WRITE/RESIZE is
-     * required before it could become a backing file of target_bs. */
-    bdrv_child_try_set_perm(mirror_top_bs->backing, 0, BLK_PERM_ALL,
-                            &error_abort);
-    if (s->backing_mode == MIRROR_SOURCE_BACKING_CHAIN) {
-        BlockDriverState *backing = s->is_none_mode ? src : s->base;
-        if (backing_bs(target_bs) != backing) {
-            bdrv_set_backing_hd(target_bs, backing, &local_err);
-            if (local_err) {
-                error_report_err(local_err);
-                data->ret = -EPERM;
-            }
-        }
-    }

    if (s->to_replace) {
        replace_aio_context = bdrv_get_aio_context(s->to_replace);
@@ -563,12 +514,12 @@ static void mirror_exit(BlockJob *job, void *opaque)
        /* The mirror job has no requests in flight any more, but we need to
         * drain potential other users of the BDS before changing the graph. */
        bdrv_drained_begin(target_bs);
-        bdrv_replace_node(to_replace, target_bs, &local_err);
+        bdrv_replace_in_backing_chain(to_replace, target_bs);
        bdrv_drained_end(target_bs);
-        if (local_err) {
-            error_report_err(local_err);
-            data->ret = -EPERM;
-        }
+
+        /* We just changed the BDS the job BB refers to */
+        blk_remove_bs(job->blk);
+        blk_insert_bs(job->blk, src);
    }
    if (s->to_replace) {
        bdrv_op_unblock_all(s->to_replace, s->replace_blocker);
@@ -579,29 +530,11 @@ static void mirror_exit(BlockJob *job, void *opaque)
        aio_context_release(replace_aio_context);
    }
    g_free(s->replaces);
-    bdrv_unref(target_bs);
-
-    /* Remove the mirror filter driver from the graph. Before this, get rid of
-     * the blockers on the intermediate nodes so that the resulting state is
-     * valid. Also give up permissions on mirror_top_bs->backing, which might
-     * block the removal. */
-    block_job_remove_all_bdrv(job);
-    bdrv_child_try_set_perm(mirror_top_bs->backing, 0, BLK_PERM_ALL,
-                            &error_abort);
-    bdrv_replace_node(mirror_top_bs, backing_bs(mirror_top_bs), &error_abort);
-
-    /* We just changed the BDS the job BB refers to (with either or both of the
-     * bdrv_replace_node() calls), so switch the BB back so the cleanup does
-     * the right thing. We don't need any permissions any more now. */
-    blk_remove_bs(job->blk);
-    blk_set_perm(job->blk, 0, BLK_PERM_ALL, &error_abort);
-    blk_insert_bs(job->blk, mirror_top_bs, &error_abort);
-
+    blk_unref(s->target);
+    s->target = NULL;
    block_job_completed(&s->common, data->ret);
-
    g_free(data);
    bdrv_drained_end(src);
-    bdrv_unref(mirror_top_bs);
    bdrv_unref(src);
 }

@@ -621,7 +554,7 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)
 {
    int64_t sector_num, end;
    BlockDriverState *base = s->base;
-    BlockDriverState *bs = s->source;
+    BlockDriverState *bs = blk_bs(s->common.blk);
    BlockDriverState *target_bs = blk_bs(s->target);
    int ret, n;

@@ -633,7 +566,6 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)
            return 0;
        }

-        s->initial_zeroing_ongoing = true;
        for (sector_num = 0; sector_num < end; ) {
            int nb_sectors = MIN(end - sector_num,
                QEMU_ALIGN_DOWN(INT_MAX, s->granularity) >> BDRV_SECTOR_BITS);
@@ -641,13 +573,11 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)
            mirror_throttle(s);

            if (block_job_is_cancelled(&s->common)) {
-                s->initial_zeroing_ongoing = false;
                return 0;
            }

            if (s->in_flight >= MAX_IN_FLIGHT) {
-                trace_mirror_yield(s, UINT64_MAX, s->buf_free_count,
-                                   s->in_flight);
+                trace_mirror_yield(s, s->in_flight, s->buf_free_count, -1);
                mirror_wait_for_io(s);
                continue;
            }
@@ -657,7 +587,6 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)
        }

        mirror_wait_for_all_io(s);
-        s->initial_zeroing_ongoing = false;
    }

    /* First part, loop on the sectors and initialize the dirty bitmap.  */
@@ -704,7 +633,7 @@ static void coroutine_fn mirror_run(void *opaque)
 {
    MirrorBlockJob *s = opaque;
    MirrorExitData *data;
-    BlockDriverState *bs = s->source;
+    BlockDriverState *bs = blk_bs(s->common.blk);
    BlockDriverState *target_bs = blk_bs(s->target);
    bool need_drain = true;
    int64_t length;
@@ -722,28 +651,7 @@ static void coroutine_fn mirror_run(void *opaque)
    if (s->bdev_length < 0) {
        ret = s->bdev_length;
        goto immediate_exit;
-    }
-
-    /* Active commit must resize the base image if its size differs from the
-     * active layer. */
-    if (s->base == blk_bs(s->target)) {
-        int64_t base_length;
-
-        base_length = blk_getlength(s->target);
-        if (base_length < 0) {
-            ret = base_length;
-            goto immediate_exit;
-        }
-
-        if (s->bdev_length > base_length) {
-            ret = blk_truncate(s->target, s->bdev_length, NULL);
-            if (ret < 0) {
-                goto immediate_exit;
-            }
-        }
-    }
-
-    if (s->bdev_length == 0) {
+    } else if (s->bdev_length == 0) {
        /* Report BLOCK_JOB_READY and wait for complete. */
        block_job_event_ready(&s->common);
        s->synced = true;
@@ -822,7 +730,7 @@ static void coroutine_fn mirror_run(void *opaque)
            s->common.iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
            if (s->in_flight >= MAX_IN_FLIGHT || s->buf_free_count == 0 ||
                (cnt == 0 && s->in_flight > 0)) {
-                trace_mirror_yield(s, cnt, s->buf_free_count, s->in_flight);
+                trace_mirror_yield(s, s->in_flight, s->buf_free_count, cnt);
                mirror_wait_for_io(s);
                continue;
            } else if (cnt != 0) {
@@ -911,6 +819,7 @@ immediate_exit:
    g_free(s->cow_bitmap);
    g_free(s->in_flight_bitmap);
    bdrv_dirty_iter_free(s->dbi);
+    bdrv_release_dirty_bitmap(bs, s->dirty_bitmap);

    data = g_malloc(sizeof(*data));
    data->ret = ret;
@@ -935,8 +844,9 @@ static void mirror_set_speed(BlockJob *job, int64_t speed, Error **errp)
 static void mirror_complete(BlockJob *job, Error **errp)
 {
    MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
-    BlockDriverState *target;
+    BlockDriverState *src, *target;

+    src = blk_bs(job->blk);
    target = blk_bs(s->target);

    if (!s->synced) {
@@ -968,10 +878,6 @@ static void mirror_complete(BlockJob *job, Error **errp)
        replace_aio_context = bdrv_get_aio_context(s->to_replace);
        aio_context_acquire(replace_aio_context);

-        /* TODO Translate this into permission system. Current definition of
-         * GRAPH_MOD would require to request it for the parents; they might
-         * not even be BlockDriverStates, however, so a BdrvChild can't address
-         * them. May need redefinition of GRAPH_MOD. */
        error_setg(&s->replace_blocker,
                   "block device is in use by block-job-complete");
        bdrv_op_block_all(s->to_replace, s->replace_blocker);
@@ -980,6 +886,13 @@ static void mirror_complete(BlockJob *job, Error **errp)
        aio_context_release(replace_aio_context);
    }

+    if (s->backing_mode == MIRROR_SOURCE_BACKING_CHAIN) {
+        BlockDriverState *backing = s->is_none_mode ? src : s->base;
+        if (backing_bs(target) != backing) {
+            bdrv_set_backing_hd(target, backing);
+        }
+    }
+
    s->should_complete = true;
    block_job_enter(&s->common);
 }
@@ -1035,85 +948,6 @@ static const BlockJobDriver commit_active_job_driver = {
    .drain                  = mirror_drain,
 };

-static int coroutine_fn bdrv_mirror_top_preadv(BlockDriverState *bs,
-    uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags)
-{
-    return bdrv_co_preadv(bs->backing, offset, bytes, qiov, flags);
-}
-
-static int coroutine_fn bdrv_mirror_top_pwritev(BlockDriverState *bs,
-    uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags)
-{
-    return bdrv_co_pwritev(bs->backing, offset, bytes, qiov, flags);
-}
-
-static int coroutine_fn bdrv_mirror_top_flush(BlockDriverState *bs)
-{
-    return bdrv_co_flush(bs->backing->bs);
-}
-
-static int64_t coroutine_fn bdrv_mirror_top_get_block_status(
-    BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum,
-    BlockDriverState **file)
-{
-    *pnum = nb_sectors;
-    *file = bs->backing->bs;
-    return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID | BDRV_BLOCK_DATA |
-           (sector_num << BDRV_SECTOR_BITS);
-}
-
-static int coroutine_fn bdrv_mirror_top_pwrite_zeroes(BlockDriverState *bs,
-    int64_t offset, int count, BdrvRequestFlags flags)
-{
-    return bdrv_co_pwrite_zeroes(bs->backing, offset, count, flags);
-}
-
-static int coroutine_fn bdrv_mirror_top_pdiscard(BlockDriverState *bs,
-    int64_t offset, int count)
-{
-    return bdrv_co_pdiscard(bs->backing->bs, offset, count);
-}
-
-static void bdrv_mirror_top_refresh_filename(BlockDriverState *bs, QDict *opts)
-{
-    bdrv_refresh_filename(bs->backing->bs);
-    pstrcpy(bs->exact_filename, sizeof(bs->exact_filename),
-            bs->backing->bs->filename);
-}
-
-static void bdrv_mirror_top_close(BlockDriverState *bs)
-{
-}
-
-static void bdrv_mirror_top_child_perm(BlockDriverState *bs, BdrvChild *c,
-                                       const BdrvChildRole *role,
-                                       uint64_t perm, uint64_t shared,
-                                       uint64_t *nperm, uint64_t *nshared)
-{
-    /* Must be able to forward guest writes to the real image */
-    *nperm = 0;
-    if (perm & BLK_PERM_WRITE) {
-        *nperm |= BLK_PERM_WRITE;
-    }
-
-    *nshared = BLK_PERM_ALL;
-}
-
-/* Dummy node that provides consistent read to its users without requiring it
- * from its backing file and that allows writes on the backing file chain. */
-static BlockDriver bdrv_mirror_top = {
-    .format_name                = "mirror_top",
-    .bdrv_co_preadv             = bdrv_mirror_top_preadv,
-    .bdrv_co_pwritev            = bdrv_mirror_top_pwritev,
-    .bdrv_co_pwrite_zeroes      = bdrv_mirror_top_pwrite_zeroes,
-    .bdrv_co_pdiscard           = bdrv_mirror_top_pdiscard,
-    .bdrv_co_flush              = bdrv_mirror_top_flush,
-    .bdrv_co_get_block_status   = bdrv_mirror_top_get_block_status,
-    .bdrv_refresh_filename      = bdrv_mirror_top_refresh_filename,
-    .bdrv_close                 = bdrv_mirror_top_close,
-    .bdrv_child_perm            = bdrv_mirror_top_child_perm,
-};
-
 static void mirror_start_job(const char *job_id, BlockDriverState *bs,
                             int creation_flags, BlockDriverState *target,
                             const char *replaces, int64_t speed,
@@ -1123,18 +957,12 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs,
                             BlockdevOnError on_target_error,
                             bool unmap,
                             BlockCompletionFunc *cb,
-                             void *opaque,
+                             void *opaque, Error **errp,
                             const BlockJobDriver *driver,
                             bool is_none_mode, BlockDriverState *base,
-                             bool auto_complete, const char *filter_node_name,
-                             Error **errp)
+                             bool auto_complete)
 {
    MirrorBlockJob *s;
-    BlockDriverState *mirror_top_bs;
-    bool target_graph_mod;
-    bool target_is_backing;
-    Error *local_err = NULL;
-    int ret;

    if (granularity == 0) {
        granularity = bdrv_get_default_bitmap_granularity(target);
@@ -1151,65 +979,14 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs,
        buf_size = DEFAULT_MIRROR_BUF_SIZE;
    }

-    /* In the case of active commit, add dummy driver to provide consistent
-     * reads on the top, while disabling it in the intermediate nodes, and make
-     * the backing chain writable. */
-    mirror_top_bs = bdrv_new_open_driver(&bdrv_mirror_top, filter_node_name,
-                                         BDRV_O_RDWR, errp);
-    if (mirror_top_bs == NULL) {
-        return;
-    }
-    mirror_top_bs->total_sectors = bs->total_sectors;
-    bdrv_set_aio_context(mirror_top_bs, bdrv_get_aio_context(bs));
-
-    /* bdrv_append takes ownership of the mirror_top_bs reference, need to keep
-     * it alive until block_job_create() succeeds even if bs has no parent. */
-    bdrv_ref(mirror_top_bs);
-    bdrv_drained_begin(bs);
-    bdrv_append(mirror_top_bs, bs, &local_err);
-    bdrv_drained_end(bs);
-
-    if (local_err) {
-        bdrv_unref(mirror_top_bs);
-        error_propagate(errp, local_err);
-        return;
-    }
-
-    /* Make sure that the source is not resized while the job is running */
-    s = block_job_create(job_id, driver, mirror_top_bs,
-                         BLK_PERM_CONSISTENT_READ,
-                         BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED |
-                         BLK_PERM_WRITE | BLK_PERM_GRAPH_MOD, speed,
-                         creation_flags, cb, opaque, errp);
+    s = block_job_create(job_id, driver, bs, speed, creation_flags,
+                         cb, opaque, errp);
    if (!s) {
-        goto fail;
+        return;
    }
-    /* The block job now has a reference to this node */
-    bdrv_unref(mirror_top_bs);

-    s->source = bs;
-    s->mirror_top_bs = mirror_top_bs;
-
-    /* No resize for the target either; while the mirror is still running, a
-     * consistent read isn't necessarily possible. We could possibly allow
-     * writes and graph modifications, though it would likely defeat the
-     * purpose of a mirror, so leave them blocked for now.
-     *
-     * In the case of active commit, things look a bit different, though,
-     * because the target is an already populated backing file in active use.
-     * We can allow anything except resize there.*/
-    target_is_backing = bdrv_chain_contains(bs, target);
-    target_graph_mod = (backing_mode != MIRROR_LEAVE_BACKING_CHAIN);
-    s->target = blk_new(BLK_PERM_WRITE | BLK_PERM_RESIZE |
-                        (target_graph_mod ? BLK_PERM_GRAPH_MOD : 0),
-                        BLK_PERM_WRITE_UNCHANGED |
-                        (target_is_backing ? BLK_PERM_CONSISTENT_READ |
-                                             BLK_PERM_WRITE |
-                                             BLK_PERM_GRAPH_MOD : 0));
-    ret = blk_insert_bs(s->target, target, errp);
-    if (ret < 0) {
-        goto fail;
-    }
+    s->target = blk_new();
+    blk_insert_bs(s->target, target);

    s->replaces = g_strdup(replaces);
    s->on_source_error = on_source_error;
@@ -1226,51 +1003,24 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs,

    s->dirty_bitmap = bdrv_create_dirty_bitmap(bs, granularity, NULL, errp);
    if (!s->dirty_bitmap) {
-        goto fail;
+        g_free(s->replaces);
+        blk_unref(s->target);
+        block_job_unref(&s->common);
+        return;
    }

-    /* Required permissions are already taken with blk_new() */
-    block_job_add_bdrv(&s->common, "target", target, 0, BLK_PERM_ALL,
-                       &error_abort);
-
+    block_job_add_bdrv(&s->common, target);
    /* In commit_active_start() all intermediate nodes disappear, so
     * any jobs in them must be blocked */
-    if (target_is_backing) {
+    if (bdrv_chain_contains(bs, target)) {
        BlockDriverState *iter;
        for (iter = backing_bs(bs); iter != target; iter = backing_bs(iter)) {
-            /* XXX BLK_PERM_WRITE needs to be allowed so we don't block
-             * ourselves at s->base (if writes are blocked for a node, they are
-             * also blocked for its backing file). The other options would be a
-             * second filter driver above s->base (== target). */
-            ret = block_job_add_bdrv(&s->common, "intermediate node", iter, 0,
-                                     BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE,
-                                     errp);
-            if (ret < 0) {
-                goto fail;
-            }
+            block_job_add_bdrv(&s->common, iter);
        }
    }

    trace_mirror_start(bs, s, opaque);
    block_job_start(&s->common);
-    return;
-
-fail:
-    if (s) {
-        /* Make sure this BDS does not go away until we have completed the graph
-         * changes below */
-        bdrv_ref(mirror_top_bs);
-
-        g_free(s->replaces);
-        blk_unref(s->target);
-        block_job_early_fail(&s->common);
-    }
-
-    bdrv_child_try_set_perm(mirror_top_bs->backing, 0, BLK_PERM_ALL,
-                            &error_abort);
-    bdrv_replace_node(mirror_top_bs, backing_bs(mirror_top_bs), &error_abort);
-
-    bdrv_unref(mirror_top_bs);
 }

 void mirror_start(const char *job_id, BlockDriverState *bs,
@@ -1279,7 +1029,7 @@ void mirror_start(const char *job_id, BlockDriverState *bs,
                  MirrorSyncMode mode, BlockMirrorBackingMode backing_mode,
                  BlockdevOnError on_source_error,
                  BlockdevOnError on_target_error,
-                  bool unmap, const char *filter_node_name, Error **errp)
+                  bool unmap, Error **errp)
 {
    bool is_none_mode;
    BlockDriverState *base;
@@ -1292,19 +1042,19 @@ void mirror_start(const char *job_id, BlockDriverState *bs,
    base = mode == MIRROR_SYNC_MODE_TOP ? backing_bs(bs) : NULL;
    mirror_start_job(job_id, bs, BLOCK_JOB_DEFAULT, target, replaces,
                     speed, granularity, buf_size, backing_mode,
-                     on_source_error, on_target_error, unmap, NULL, NULL,
-                     &mirror_job_driver, is_none_mode, base, false,
-                     filter_node_name, errp);
+                     on_source_error, on_target_error, unmap, NULL, NULL, errp,
+                     &mirror_job_driver, is_none_mode, base, false);
 }

 void commit_active_start(const char *job_id, BlockDriverState *bs,
                         BlockDriverState *base, int creation_flags,
                         int64_t speed, BlockdevOnError on_error,
-                         const char *filter_node_name,
-                         BlockCompletionFunc *cb, void *opaque,
-                         bool auto_complete, Error **errp)
+                         BlockCompletionFunc *cb, void *opaque, Error **errp,
+                         bool auto_complete)
 {
+    int64_t length, base_length;
    int orig_base_flags;
+    int ret;
    Error *local_err = NULL;

    orig_base_flags = bdrv_get_flags(base);
@@ -1313,11 +1063,35 @@ void commit_active_start(const char *job_id, BlockDriverState *bs,
        return;
    }

+    length = bdrv_getlength(bs);
+    if (length < 0) {
+        error_setg_errno(errp, -length,
+                         "Unable to determine length of %s", bs->filename);
+        goto error_restore_flags;
+    }
+
+    base_length = bdrv_getlength(base);
+    if (base_length < 0) {
+        error_setg_errno(errp, -base_length,
+                         "Unable to determine length of %s", base->filename);
+        goto error_restore_flags;
+    }
+
+    if (length > base_length) {
+        ret = bdrv_truncate(base, length);
+        if (ret < 0) {
+            error_setg_errno(errp, -ret,
+                            "Top image %s is larger than base image %s, and "
+                             "resize of base image failed",
+                             bs->filename, base->filename);
+            goto error_restore_flags;
+        }
+    }
+
    mirror_start_job(job_id, bs, creation_flags, base, NULL, speed, 0, 0,
                     MIRROR_LEAVE_BACKING_CHAIN,
-                     on_error, on_error, true, cb, opaque,
-                     &commit_active_job_driver, false, base, auto_complete,
-                     filter_node_name, &local_err);
+                     on_error, on_error, true, cb, opaque, &local_err,
+                     &commit_active_job_driver, false, base, auto_complete);
    if (local_err) {
        error_propagate(errp, local_err);
        goto error_restore_flags;
--- a/block/nbd-client.c
+++ b/block/nbd-client.c
@@ -28,7 +28,6 @@
 */

 #include "qemu/osdep.h"
-#include "qapi/error.h"
 #include "nbd-client.h"

 #define HANDLE_TO_INDEX(bs, handle) ((handle) ^ ((uint64_t)(intptr_t)bs))
@@ -40,7 +39,7 @@ static void nbd_recv_coroutines_enter_all(NBDClientSession *s)

    for (i = 0; i < MAX_NBD_REQUESTS; i++) {
        if (s->recv_coroutine[i]) {
-            aio_co_wake(s->recv_coroutine[i]);
+            qemu_coroutine_enter(s->recv_coroutine[i]);
        }
    }
 }
@@ -57,7 +56,7 @@ static void nbd_teardown_connection(BlockDriverState *bs)
    qio_channel_shutdown(client->ioc,
                         QIO_CHANNEL_SHUTDOWN_BOTH,
                         NULL);
-    BDRV_POLL_WHILE(bs, client->read_reply_co);
+    nbd_recv_coroutines_enter_all(client);

    nbd_client_detach_aio_context(bs);
    object_unref(OBJECT(client->sioc));
@@ -66,49 +65,54 @@ static void nbd_teardown_connection(BlockDriverState *bs)
    client->ioc = NULL;
 }

-static coroutine_fn void nbd_read_reply_entry(void *opaque)
+static void nbd_reply_ready(void *opaque)
 {
-    NBDClientSession *s = opaque;
+    BlockDriverState *bs = opaque;
+    NBDClientSession *s = nbd_get_client_session(bs);
    uint64_t i;
    int ret;
-    Error *local_err = NULL;

-    for (;;) {
-        assert(s->reply.handle == 0);
-        ret = nbd_receive_reply(s->ioc, &s->reply, &local_err);
-        if (ret < 0) {
-            error_report_err(local_err);
-        }
-        if (ret <= 0) {
-            break;
-        }
-
-        /* There's no need for a mutex on the receive side, because the
-         * handler acts as a synchronization point and ensures that only
-         * one coroutine is called until the reply finishes.
-         */
-        i = HANDLE_TO_INDEX(s, s->reply.handle);
-        if (i >= MAX_NBD_REQUESTS || !s->recv_coroutine[i]) {
-            break;
-        }
-
-        /* We're woken up by the recv_coroutine itself.  Note that there
-         * is no race between yielding and reentering read_reply_co.  This
-         * is because:
-         *
-         * - if recv_coroutine[i] runs on the same AioContext, it is only
-         *   entered after we yield
-         *
-         * - if recv_coroutine[i] runs on a different AioContext, reentering
-         *   read_reply_co happens through a bottom half, which can only
-         *   run after we yield.
-         */
-        aio_co_wake(s->recv_coroutine[i]);
-        qemu_coroutine_yield();
+    if (!s->ioc) { /* Already closed */
+        return;
    }

-    nbd_recv_coroutines_enter_all(s);
-    s->read_reply_co = NULL;
+    if (s->reply.handle == 0) {
+        /* No reply already in flight.  Fetch a header.  It is possible
+         * that another thread has done the same thing in parallel, so
+         * the socket is not readable anymore.
+         */
+        ret = nbd_receive_reply(s->ioc, &s->reply);
+        if (ret == -EAGAIN) {
+            return;
+        }
+        if (ret < 0) {
+            s->reply.handle = 0;
+            goto fail;
+        }
+    }
+
+    /* There's no need for a mutex on the receive side, because the
+     * handler acts as a synchronization point and ensures that only
+     * one coroutine is called until the reply finishes.  */
+    i = HANDLE_TO_INDEX(s, s->reply.handle);
+    if (i >= MAX_NBD_REQUESTS) {
+        goto fail;
+    }
+
+    if (s->recv_coroutine[i]) {
+        qemu_coroutine_enter(s->recv_coroutine[i]);
+        return;
+    }
+
+fail:
+    nbd_teardown_connection(bs);
+}
+
+static void nbd_restart_write(void *opaque)
+{
+    BlockDriverState *bs = opaque;
+
+    qemu_coroutine_enter(nbd_get_client_session(bs)->send_coroutine);
 }

 static int nbd_co_send_request(BlockDriverState *bs,
@@ -116,13 +120,10 @@ static int nbd_co_send_request(BlockDriverState *bs,
                               QEMUIOVector *qiov)
 {
    NBDClientSession *s = nbd_get_client_session(bs);
+    AioContext *aio_context;
    int rc, ret, i;

    qemu_co_mutex_lock(&s->send_mutex);
-    while (s->in_flight == MAX_NBD_REQUESTS) {
-        qemu_co_queue_wait(&s->free_sema, &s->send_mutex);
-    }
-    s->in_flight++;

    for (i = 0; i < MAX_NBD_REQUESTS; i++) {
        if (s->recv_coroutine[i] == NULL) {
@@ -140,12 +141,17 @@ static int nbd_co_send_request(BlockDriverState *bs,
        return -EPIPE;
    }

+    s->send_coroutine = qemu_coroutine_self();
+    aio_context = bdrv_get_aio_context(bs);
+
+    aio_set_fd_handler(aio_context, s->sioc->fd, false,
+                       nbd_reply_ready, nbd_restart_write, bs);
    if (qiov) {
        qio_channel_set_cork(s->ioc, true);
        rc = nbd_send_request(s->ioc, request);
        if (rc >= 0) {
-            ret = nbd_rwv(s->ioc, qiov->iov, qiov->niov, request->len, false,
-                          NULL);
+            ret = nbd_wr_syncv(s->ioc, qiov->iov, qiov->niov, request->len,
+                               false);
            if (ret != request->len) {
                rc = -EIO;
            }
@@ -154,6 +160,9 @@ static int nbd_co_send_request(BlockDriverState *bs,
    } else {
        rc = nbd_send_request(s->ioc, request);
    }
+    aio_set_fd_handler(aio_context, s->sioc->fd, false,
+                       nbd_reply_ready, NULL, bs);
+    s->send_coroutine = NULL;
    qemu_co_mutex_unlock(&s->send_mutex);
    return rc;
 }
@@ -165,7 +174,8 @@ static void nbd_co_receive_reply(NBDClientSession *s,
 {
    int ret;

-    /* Wait until we're woken up by nbd_read_reply_entry.  */
+    /* Wait until we're woken up by the read handler.  TODO: perhaps
+     * peek at the next reply and avoid yielding if it's ours?  */
    qemu_coroutine_yield();
    *reply = s->reply;
    if (reply->handle != request->handle ||
@@ -173,8 +183,8 @@ static void nbd_co_receive_reply(NBDClientSession *s,
        reply->error = EIO;
    } else {
        if (qiov && reply->error == 0) {
-            ret = nbd_rwv(s->ioc, qiov->iov, qiov->niov, request->len, true,
-                          NULL);
+            ret = nbd_wr_syncv(s->ioc, qiov->iov, qiov->niov, request->len,
+                               true);
            if (ret != request->len) {
                reply->error = EIO;
            }
@@ -185,23 +195,28 @@ static void nbd_co_receive_reply(NBDClientSession *s,
    }
 }

-static void nbd_coroutine_end(BlockDriverState *bs,
+static void nbd_coroutine_start(NBDClientSession *s,
+                                NBDRequest *request)
+{
+    /* Poor man semaphore.  The free_sema is locked when no other request
+     * can be accepted, and unlocked after receiving one reply.  */
+    if (s->in_flight == MAX_NBD_REQUESTS) {
+        qemu_co_queue_wait(&s->free_sema);
+        assert(s->in_flight < MAX_NBD_REQUESTS);
+    }
+    s->in_flight++;
+
+    /* s->recv_coroutine[i] is set as soon as we get the send_lock.  */
+}
+
+static void nbd_coroutine_end(NBDClientSession *s,
                              NBDRequest *request)
 {
-    NBDClientSession *s = nbd_get_client_session(bs);
    int i = HANDLE_TO_INDEX(s, request->handle);
-
    s->recv_coroutine[i] = NULL;
-
-    /* Kick the read_reply_co to get the next reply.  */
-    if (s->read_reply_co) {
-        aio_co_wake(s->read_reply_co);
+    if (s->in_flight-- == MAX_NBD_REQUESTS) {
+        qemu_co_queue_next(&s->free_sema);
    }
-
-    qemu_co_mutex_lock(&s->send_mutex);
-    s->in_flight--;
-    qemu_co_queue_next(&s->free_sema);
-    qemu_co_mutex_unlock(&s->send_mutex);
 }

 int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset,
@@ -219,13 +234,14 @@ int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset,
    assert(bytes <= NBD_MAX_BUFFER_SIZE);
    assert(!flags);

+    nbd_coroutine_start(client, &request);
    ret = nbd_co_send_request(bs, &request, NULL);
    if (ret < 0) {
        reply.error = -ret;
    } else {
        nbd_co_receive_reply(client, &request, &reply, qiov);
    }
-    nbd_coroutine_end(bs, &request);
+    nbd_coroutine_end(client, &request);
    return -reply.error;
 }

@@ -248,13 +264,14 @@ int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset,

    assert(bytes <= NBD_MAX_BUFFER_SIZE);

+    nbd_coroutine_start(client, &request);
    ret = nbd_co_send_request(bs, &request, qiov);
    if (ret < 0) {
        reply.error = -ret;
    } else {
        nbd_co_receive_reply(client, &request, &reply, NULL);
    }
-    nbd_coroutine_end(bs, &request);
+    nbd_coroutine_end(client, &request);
    return -reply.error;
 }

@@ -282,13 +299,14 @@ int nbd_client_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
        request.flags |= NBD_CMD_FLAG_NO_HOLE;
    }

+    nbd_coroutine_start(client, &request);
    ret = nbd_co_send_request(bs, &request, NULL);
    if (ret < 0) {
        reply.error = -ret;
    } else {
        nbd_co_receive_reply(client, &request, &reply, NULL);
    }
-    nbd_coroutine_end(bs, &request);
+    nbd_coroutine_end(client, &request);
    return -reply.error;
 }

@@ -306,13 +324,14 @@ int nbd_client_co_flush(BlockDriverState *bs)
    request.from = 0;
    request.len = 0;

+    nbd_coroutine_start(client, &request);
    ret = nbd_co_send_request(bs, &request, NULL);
    if (ret < 0) {
        reply.error = -ret;
    } else {
        nbd_co_receive_reply(client, &request, &reply, NULL);
    }
-    nbd_coroutine_end(bs, &request);
+    nbd_coroutine_end(client, &request);
    return -reply.error;
 }

@@ -331,29 +350,30 @@ int nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset, int count)
        return 0;
    }

+    nbd_coroutine_start(client, &request);
    ret = nbd_co_send_request(bs, &request, NULL);
    if (ret < 0) {
        reply.error = -ret;
    } else {
        nbd_co_receive_reply(client, &request, &reply, NULL);
    }
-    nbd_coroutine_end(bs, &request);
+    nbd_coroutine_end(client, &request);
    return -reply.error;

 }

 void nbd_client_detach_aio_context(BlockDriverState *bs)
 {
-    NBDClientSession *client = nbd_get_client_session(bs);
-    qio_channel_detach_aio_context(QIO_CHANNEL(client->sioc));
+    aio_set_fd_handler(bdrv_get_aio_context(bs),
+                       nbd_get_client_session(bs)->sioc->fd,
+                       false, NULL, NULL, NULL);
 }

 void nbd_client_attach_aio_context(BlockDriverState *bs,
                                   AioContext *new_context)
 {
-    NBDClientSession *client = nbd_get_client_session(bs);
-    qio_channel_attach_aio_context(QIO_CHANNEL(client->sioc), new_context);
-    aio_co_schedule(new_context, client->read_reply_co);
+    aio_set_fd_handler(new_context, nbd_get_client_session(bs)->sioc->fd,
+                       false, nbd_reply_ready, NULL, bs);
 }

 void nbd_client_close(BlockDriverState *bs)
@@ -414,7 +434,7 @@ int nbd_client_init(BlockDriverState *bs,
    /* Now that we're connected, set the socket to be non-blocking and
     * kick the reply mechanism.  */
    qio_channel_set_blocking(QIO_CHANNEL(sioc), false, NULL);
-    client->read_reply_co = qemu_coroutine_create(nbd_read_reply_entry, client);
+
    nbd_client_attach_aio_context(bs, bdrv_get_aio_context(bs));

    logout("Established connection with NBD server\n");
--- a/block/nbd-client.h
+++ b/block/nbd-client.h
@@ -25,11 +25,13 @@ typedef struct NBDClientSession {

    CoMutex send_mutex;
    CoQueue free_sema;
-    Coroutine *read_reply_co;
+    Coroutine *send_coroutine;
    int in_flight;

    Coroutine *recv_coroutine[MAX_NBD_REQUESTS];
    NBDReply reply;
+
+    bool is_unix;
 } NBDClientSession;

 NBDClientSession *nbd_get_client_session(BlockDriverState *bs);
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -79,7 +79,7 @@ static int nbd_parse_uri(const char *filename, QDict *options)
    p = uri->path ? uri->path : "/";
    p += strspn(p, "/");
    if (p[0]) {
-        qdict_put_str(options, "export", p);
+        qdict_put(options, "export", qstring_from_str(p));
    }

    qp = query_params_parse(uri->query);
@@ -94,8 +94,9 @@ static int nbd_parse_uri(const char *filename, QDict *options)
            ret = -EINVAL;
            goto out;
        }
-        qdict_put_str(options, "server.type", "unix");
-        qdict_put_str(options, "server.path", qp->p[0].value);
+        qdict_put(options, "server.type", qstring_from_str("unix"));
+        qdict_put(options, "server.data.path",
+                  qstring_from_str(qp->p[0].value));
    } else {
        QString *host;
        char *port_str;
@@ -114,11 +115,11 @@ static int nbd_parse_uri(const char *filename, QDict *options)
            host = qstring_from_str(uri->server);
        }

-        qdict_put_str(options, "server.type", "inet");
-        qdict_put(options, "server.host", host);
+        qdict_put(options, "server.type", qstring_from_str("inet"));
+        qdict_put(options, "server.data.host", host);

        port_str = g_strdup_printf("%d", uri->port ?: NBD_DEFAULT_PORT);
-        qdict_put_str(options, "server.port", port_str);
+        qdict_put(options, "server.data.port", qstring_from_str(port_str));
        g_free(port_str);
    }

@@ -180,7 +181,7 @@ static void nbd_parse_filename(const char *filename, QDict *options,
        export_name[0] = 0; /* truncate 'file' */
        export_name += strlen(EN_OPTSTR);

-        qdict_put_str(options, "export", export_name);
+        qdict_put(options, "export", qstring_from_str(export_name));
    }

    /* extract the host_spec - fail if it's not nbd:... */
@@ -195,19 +196,19 @@ static void nbd_parse_filename(const char *filename, QDict *options,

    /* are we a UNIX or TCP socket? */
    if (strstart(host_spec, "unix:", &unixpath)) {
-        qdict_put_str(options, "server.type", "unix");
-        qdict_put_str(options, "server.path", unixpath);
+        qdict_put(options, "server.type", qstring_from_str("unix"));
+        qdict_put(options, "server.data.path", qstring_from_str(unixpath));
    } else {
-        InetSocketAddress *addr = g_new(InetSocketAddress, 1);
+        InetSocketAddress *addr = NULL;

-        if (inet_parse(addr, host_spec, errp)) {
-            goto out_inet;
+        addr = inet_parse(host_spec, errp);
+        if (!addr) {
+            goto out;
        }

-        qdict_put_str(options, "server.type", "inet");
-        qdict_put_str(options, "server.host", addr->host);
-        qdict_put_str(options, "server.port", addr->port);
-    out_inet:
+        qdict_put(options, "server.type", qstring_from_str("inet"));
+        qdict_put(options, "server.data.host", qstring_from_str(addr->host));
+        qdict_put(options, "server.data.port", qstring_from_str(addr->port));
        qapi_free_InetSocketAddress(addr);
    }

@@ -246,20 +247,19 @@ static bool nbd_process_legacy_socket_options(QDict *output_options,
            return false;
        }

-        qdict_put_str(output_options, "server.type", "unix");
-        qdict_put_str(output_options, "server.path", path);
+        qdict_put(output_options, "server.type", qstring_from_str("unix"));
+        qdict_put(output_options, "server.data.path", qstring_from_str(path));
    } else if (host) {
-        qdict_put_str(output_options, "server.type", "inet");
-        qdict_put_str(output_options, "server.host", host);
-        qdict_put_str(output_options, "server.port",
-                      port ?: stringify(NBD_DEFAULT_PORT));
+        qdict_put(output_options, "server.type", qstring_from_str("inet"));
+        qdict_put(output_options, "server.data.host", qstring_from_str(host));
+        qdict_put(output_options, "server.data.port",
+                  qstring_from_str(port ?: stringify(NBD_DEFAULT_PORT)));
    }

    return true;
 }

-static SocketAddress *nbd_config(BDRVNBDState *s, QDict *options,
-                                 Error **errp)
+static SocketAddress *nbd_config(BDRVNBDState *s, QDict *options, Error **errp)
 {
    SocketAddress *saddr = NULL;
    QDict *addr = NULL;
@@ -278,21 +278,15 @@ static SocketAddress *nbd_config(BDRVNBDState *s, QDict *options,
        goto done;
    }

-    /*
-     * FIXME .numeric, .to, .ipv4 or .ipv6 don't work with -drive
-     * server.type=inet.  .to doesn't matter, it's ignored anyway.
-     * That's because when @options come from -blockdev or
-     * blockdev_add, members are typed according to the QAPI schema,
-     * but when they come from -drive, they're all QString.  The
-     * visitor expects the former.
-     */
-    iv = qobject_input_visitor_new(crumpled_addr);
+    iv = qobject_input_visitor_new(crumpled_addr, true);
    visit_type_SocketAddress(iv, NULL, &saddr, &local_err);
    if (local_err) {
        error_propagate(errp, local_err);
        goto done;
    }

+    s->client.is_unix = saddr->type == SOCKET_ADDRESS_KIND_UNIX;
+
 done:
    QDECREF(addr);
    qobject_decref(crumpled_addr);
@@ -319,7 +313,6 @@ static QIOChannelSocket *nbd_establish_connection(SocketAddress *saddr,
                                    saddr,
                                    &local_err);
    if (local_err) {
-        object_unref(OBJECT(sioc));
        error_propagate(errp, local_err);
        return NULL;
    }
@@ -430,12 +423,11 @@ static int nbd_open(BlockDriverState *bs, QDict *options, int flags,
            goto error;
        }

-        /* TODO SOCKET_ADDRESS_KIND_FD where fd has AF_INET or AF_INET6 */
-        if (s->saddr->type != SOCKET_ADDRESS_TYPE_INET) {
+        if (s->saddr->type != SOCKET_ADDRESS_KIND_INET) {
            error_setg(errp, "TLS only supported over IP sockets");
            goto error;
        }
-        hostname = s->saddr->u.inet.host;
+        hostname = s->saddr->u.inet.data->host;
    }

    /* establish TCP connection, return error if it fails
@@ -515,17 +507,17 @@ static void nbd_refresh_filename(BlockDriverState *bs, QDict *options)
    Visitor *ov;
    const char *host = NULL, *port = NULL, *path = NULL;

-    if (s->saddr->type == SOCKET_ADDRESS_TYPE_INET) {
-        const InetSocketAddress *inet = &s->saddr->u.inet;
+    if (s->saddr->type == SOCKET_ADDRESS_KIND_INET) {
+        const InetSocketAddress *inet = s->saddr->u.inet.data;
        if (!inet->has_ipv4 && !inet->has_ipv6 && !inet->has_to) {
            host = inet->host;
            port = inet->port;
        }
-    } else if (s->saddr->type == SOCKET_ADDRESS_TYPE_UNIX) {
-        path = s->saddr->u.q_unix.path;
-    } /* else can't represent as pseudo-filename */
+    } else if (s->saddr->type == SOCKET_ADDRESS_KIND_UNIX) {
+        path = s->saddr->u.q_unix.data->path;
+    }

-    qdict_put_str(opts, "driver", "nbd");
+    qdict_put(opts, "driver", qstring_from_str("nbd"));

    if (path && s->export) {
        snprintf(bs->exact_filename, sizeof(bs->exact_filename),
@@ -545,13 +537,15 @@ static void nbd_refresh_filename(BlockDriverState *bs, QDict *options)
    visit_type_SocketAddress(ov, NULL, &s->saddr, &error_abort);
    visit_complete(ov, &saddr_qdict);
    visit_free(ov);
+    assert(qobject_type(saddr_qdict) == QTYPE_QDICT);
+
    qdict_put_obj(opts, "server", saddr_qdict);

    if (s->export) {
-        qdict_put_str(opts, "export", s->export);
+        qdict_put(opts, "export", qstring_from_str(s->export));
    }
    if (s->tlscredsid) {
-        qdict_put_str(opts, "tls-creds", s->tlscredsid);
+        qdict_put(opts, "tls-creds", qstring_from_str(s->tlscredsid));
    }

    qdict_flatten(opts);
--- a/block/nfs.c
+++ b/block/nfs.c
@@ -54,7 +54,6 @@ typedef struct NFSClient {
    int events;
    bool has_zero_init;
    AioContext *aio_context;
-    QemuMutex mutex;
    blkcnt_t st_blocks;
    bool cache_used;
    NFSServer *server;
@@ -104,9 +103,9 @@ static int nfs_parse_uri(const char *filename, QDict *options, Error **errp)
        goto out;
    }

-    qdict_put_str(options, "server.host", uri->server);
-    qdict_put_str(options, "server.type", "inet");
-    qdict_put_str(options, "path", uri->path);
+    qdict_put(options, "server.host", qstring_from_str(uri->server));
+    qdict_put(options, "server.type", qstring_from_str("inet"));
+    qdict_put(options, "path", qstring_from_str(uri->path));

    for (i = 0; i < qp->n; i++) {
        unsigned long long val;
@@ -121,17 +120,23 @@ static int nfs_parse_uri(const char *filename, QDict *options, Error **errp)
            goto out;
        }
        if (!strcmp(qp->p[i].name, "uid")) {
-            qdict_put_str(options, "user", qp->p[i].value);
+            qdict_put(options, "user",
+                      qstring_from_str(qp->p[i].value));
        } else if (!strcmp(qp->p[i].name, "gid")) {
-            qdict_put_str(options, "group", qp->p[i].value);
+            qdict_put(options, "group",
+                      qstring_from_str(qp->p[i].value));
        } else if (!strcmp(qp->p[i].name, "tcp-syncnt")) {
-            qdict_put_str(options, "tcp-syn-count", qp->p[i].value);
+            qdict_put(options, "tcp-syn-count",
+                      qstring_from_str(qp->p[i].value));
        } else if (!strcmp(qp->p[i].name, "readahead")) {
-            qdict_put_str(options, "readahead-size", qp->p[i].value);
+            qdict_put(options, "readahead-size",
+                      qstring_from_str(qp->p[i].value));
        } else if (!strcmp(qp->p[i].name, "pagecache")) {
-            qdict_put_str(options, "page-cache-size", qp->p[i].value);
+            qdict_put(options, "page-cache-size",
+                      qstring_from_str(qp->p[i].value));
        } else if (!strcmp(qp->p[i].name, "debug")) {
-            qdict_put_str(options, "debug", qp->p[i].value);
+            qdict_put(options, "debug",
+                      qstring_from_str(qp->p[i].value));
        } else {
            error_setg(errp, "Unknown NFS parameter name: %s",
                       qp->p[i].name);
@@ -186,7 +191,6 @@ static void nfs_parse_filename(const char *filename, QDict *options,
 static void nfs_process_read(void *arg);
 static void nfs_process_write(void *arg);

-/* Called with QemuMutex held.  */
 static void nfs_set_events(NFSClient *client)
 {
    int ev = nfs_which_events(client->context);
@@ -194,8 +198,7 @@ static void nfs_set_events(NFSClient *client)
        aio_set_fd_handler(client->aio_context, nfs_get_fd(client->context),
                           false,
                           (ev & POLLIN) ? nfs_process_read : NULL,
-                           (ev & POLLOUT) ? nfs_process_write : NULL,
-                           NULL, client);
+                           (ev & POLLOUT) ? nfs_process_write : NULL, client);

    }
    client->events = ev;
@@ -204,21 +207,15 @@ static void nfs_set_events(NFSClient *client)
 static void nfs_process_read(void *arg)
 {
    NFSClient *client = arg;
-
-    qemu_mutex_lock(&client->mutex);
    nfs_service(client->context, POLLIN);
    nfs_set_events(client);
-    qemu_mutex_unlock(&client->mutex);
 }

 static void nfs_process_write(void *arg)
 {
    NFSClient *client = arg;
-
-    qemu_mutex_lock(&client->mutex);
    nfs_service(client->context, POLLOUT);
    nfs_set_events(client);
-    qemu_mutex_unlock(&client->mutex);
 }

 static void nfs_co_init_task(BlockDriverState *bs, NFSRPC *task)
@@ -233,12 +230,10 @@ static void nfs_co_init_task(BlockDriverState *bs, NFSRPC *task)
 static void nfs_co_generic_bh_cb(void *opaque)
 {
    NFSRPC *task = opaque;
-
    task->complete = 1;
-    aio_co_wake(task->co);
+    qemu_coroutine_enter(task->co);
 }

-/* Called (via nfs_service) with QemuMutex held.  */
 static void
 nfs_co_generic_cb(int ret, struct nfs_context *nfs, void *data,
                  void *private_data)
@@ -260,9 +255,9 @@ nfs_co_generic_cb(int ret, struct nfs_context *nfs, void *data,
                            nfs_co_generic_bh_cb, task);
 }

-static int coroutine_fn nfs_co_preadv(BlockDriverState *bs, uint64_t offset,
-                                      uint64_t bytes, QEMUIOVector *iov,
-                                      int flags)
+static int coroutine_fn nfs_co_readv(BlockDriverState *bs,
+                                     int64_t sector_num, int nb_sectors,
+                                     QEMUIOVector *iov)
 {
    NFSClient *client = bs->opaque;
    NFSRPC task;
@@ -270,15 +265,14 @@ static int coroutine_fn nfs_co_preadv(BlockDriverState *bs, uint64_t offset,
    nfs_co_init_task(bs, &task);
    task.iov = iov;

-    qemu_mutex_lock(&client->mutex);
    if (nfs_pread_async(client->context, client->fh,
-                        offset, bytes, nfs_co_generic_cb, &task) != 0) {
-        qemu_mutex_unlock(&client->mutex);
+                        sector_num * BDRV_SECTOR_SIZE,
+                        nb_sectors * BDRV_SECTOR_SIZE,
+                        nfs_co_generic_cb, &task) != 0) {
        return -ENOMEM;
    }

    nfs_set_events(client);
-    qemu_mutex_unlock(&client->mutex);
    while (!task.complete) {
        qemu_coroutine_yield();
    }
@@ -295,50 +289,39 @@ static int coroutine_fn nfs_co_preadv(BlockDriverState *bs, uint64_t offset,
    return 0;
 }

-static int coroutine_fn nfs_co_pwritev(BlockDriverState *bs, uint64_t offset,
-                                       uint64_t bytes, QEMUIOVector *iov,
-                                       int flags)
+static int coroutine_fn nfs_co_writev(BlockDriverState *bs,
+                                        int64_t sector_num, int nb_sectors,
+                                        QEMUIOVector *iov)
 {
    NFSClient *client = bs->opaque;
    NFSRPC task;
    char *buf = NULL;
-    bool my_buffer = false;

    nfs_co_init_task(bs, &task);

-    if (iov->niov != 1) {
-        buf = g_try_malloc(bytes);
-        if (bytes && buf == NULL) {
-            return -ENOMEM;
-        }
-        qemu_iovec_to_buf(iov, 0, buf, bytes);
-        my_buffer = true;
-    } else {
-        buf = iov->iov[0].iov_base;
+    buf = g_try_malloc(nb_sectors * BDRV_SECTOR_SIZE);
+    if (nb_sectors && buf == NULL) {
+        return -ENOMEM;
    }

-    qemu_mutex_lock(&client->mutex);
+    qemu_iovec_to_buf(iov, 0, buf, nb_sectors * BDRV_SECTOR_SIZE);
+
    if (nfs_pwrite_async(client->context, client->fh,
-                         offset, bytes, buf,
-                         nfs_co_generic_cb, &task) != 0) {
-        qemu_mutex_unlock(&client->mutex);
-        if (my_buffer) {
-            g_free(buf);
-        }
+                         sector_num * BDRV_SECTOR_SIZE,
+                         nb_sectors * BDRV_SECTOR_SIZE,
+                         buf, nfs_co_generic_cb, &task) != 0) {
+        g_free(buf);
        return -ENOMEM;
    }

    nfs_set_events(client);
-    qemu_mutex_unlock(&client->mutex);
    while (!task.complete) {
        qemu_coroutine_yield();
    }

-    if (my_buffer) {
-        g_free(buf);
-    }
+    g_free(buf);

-    if (task.ret != bytes) {
+    if (task.ret != nb_sectors * BDRV_SECTOR_SIZE) {
        return task.ret < 0 ? task.ret : -EIO;
    }

@@ -352,15 +335,12 @@ static int coroutine_fn nfs_co_flush(BlockDriverState *bs)

    nfs_co_init_task(bs, &task);

-    qemu_mutex_lock(&client->mutex);
    if (nfs_fsync_async(client->context, client->fh, nfs_co_generic_cb,
                        &task) != 0) {
-        qemu_mutex_unlock(&client->mutex);
        return -ENOMEM;
    }

    nfs_set_events(client);
-    qemu_mutex_unlock(&client->mutex);
    while (!task.complete) {
        qemu_coroutine_yield();
    }
@@ -416,7 +396,7 @@ static void nfs_detach_aio_context(BlockDriverState *bs)
    NFSClient *client = bs->opaque;

    aio_set_fd_handler(client->aio_context, nfs_get_fd(client->context),
-                       false, NULL, NULL, NULL, NULL);
+                       false, NULL, NULL, NULL);
    client->events = 0;
 }

@@ -436,7 +416,7 @@ static void nfs_client_close(NFSClient *client)
            nfs_close(client->context, client->fh);
        }
        aio_set_fd_handler(client->aio_context, nfs_get_fd(client->context),
-                           false, NULL, NULL, NULL, NULL);
+                           false, NULL, NULL, NULL);
        nfs_destroy_context(client->context);
    }
    memset(client, 0, sizeof(NFSClient));
@@ -446,7 +426,6 @@ static void nfs_file_close(BlockDriverState *bs)
 {
    NFSClient *client = bs->opaque;
    nfs_client_close(client);
-    qemu_mutex_destroy(&client->mutex);
 }

 static NFSServer *nfs_config(QDict *options, Error **errp)
@@ -468,14 +447,7 @@ static NFSServer *nfs_config(QDict *options, Error **errp)
        goto out;
    }

-    /*
-     * Caution: this works only because all scalar members of
-     * NFSServer are QString in @crumpled_addr.  The visitor expects
-     * @crumpled_addr to be typed according to the QAPI schema.  It
-     * is when @options come from -blockdev or blockdev_add.  But when
-     * they come from -drive, they're all QString.
-     */
-    iv = qobject_input_visitor_new(crumpled_addr);
+    iv = qobject_input_visitor_new(crumpled_addr, true);
    visit_type_NFSServer(iv, NULL, &server, &local_error);
    if (local_error) {
        error_propagate(errp, local_error);
@@ -491,7 +463,7 @@ out:


 static int64_t nfs_client_open(NFSClient *client, QDict *options,
-                               int flags, int open_flags, Error **errp)
+                               int flags, Error **errp, int open_flags)
 {
    int ret = -EINVAL;
    QemuOpts *opts = NULL;
@@ -657,11 +629,10 @@ static int nfs_file_open(BlockDriverState *bs, QDict *options, int flags,

    ret = nfs_client_open(client, options,
                          (flags & BDRV_O_RDWR) ? O_RDWR : O_RDONLY,
-                          bs->open_flags, errp);
+                          errp, bs->open_flags);
    if (ret < 0) {
        return ret;
    }
-    qemu_mutex_init(&client->mutex);
    bs->total_sectors = ret;
    ret = 0;
    return ret;
@@ -699,7 +670,7 @@ static int nfs_file_create(const char *url, QemuOpts *opts, Error **errp)
        goto out;
    }

-    ret = nfs_client_open(client, options, O_CREAT, 0, errp);
+    ret = nfs_client_open(client, options, O_CREAT, errp, 0);
    if (ret < 0) {
        goto out;
    }
@@ -717,7 +688,6 @@ static int nfs_has_zero_init(BlockDriverState *bs)
    return client->has_zero_init;
 }

-/* Called (via nfs_service) with QemuMutex held.  */
 static void
 nfs_get_allocated_file_size_cb(int ret, struct nfs_context *nfs, void *data,
                               void *private_data)
@@ -730,9 +700,7 @@ nfs_get_allocated_file_size_cb(int ret, struct nfs_context *nfs, void *data,
    if (task->ret < 0) {
        error_report("NFS Error: %s", nfs_get_error(nfs));
    }
-
-    /* Set task->complete before reading bs->wakeup.  */
-    atomic_mb_set(&task->complete, 1);
+    task->complete = 1;
    bdrv_wakeup(task->bs);
 }

@@ -760,18 +728,10 @@ static int64_t nfs_get_allocated_file_size(BlockDriverState *bs)
    return (task.ret < 0 ? task.ret : st.st_blocks * 512);
 }

-static int nfs_file_truncate(BlockDriverState *bs, int64_t offset, Error **errp)
+static int nfs_file_truncate(BlockDriverState *bs, int64_t offset)
 {
    NFSClient *client = bs->opaque;
-    int ret;
-
-    ret = nfs_ftruncate(client->context, client->fh, offset);
-    if (ret < 0) {
-        error_setg_errno(errp, -ret, "Failed to truncate file");
-        return ret;
-    }
-
-    return 0;
+    return nfs_ftruncate(client->context, client->fh, offset);
 }

 /* Note that this will not re-establish a connection with the NFS server
@@ -815,7 +775,7 @@ static void nfs_refresh_filename(BlockDriverState *bs, QDict *options)
    QObject *server_qdict;
    Visitor *ov;

-    qdict_put_str(opts, "driver", "nfs");
+    qdict_put(opts, "driver", qstring_from_str("nfs"));

    if (client->uid && !client->gid) {
        snprintf(bs->exact_filename, sizeof(bs->exact_filename),
@@ -837,26 +797,31 @@ static void nfs_refresh_filename(BlockDriverState *bs, QDict *options)
    ov = qobject_output_visitor_new(&server_qdict);
    visit_type_NFSServer(ov, NULL, &client->server, &error_abort);
    visit_complete(ov, &server_qdict);
+    assert(qobject_type(server_qdict) == QTYPE_QDICT);
+
    qdict_put_obj(opts, "server", server_qdict);
-    qdict_put_str(opts, "path", client->path);
+    qdict_put(opts, "path", qstring_from_str(client->path));

    if (client->uid) {
-        qdict_put_int(opts, "user", client->uid);
+        qdict_put(opts, "user", qint_from_int(client->uid));
    }
    if (client->gid) {
-        qdict_put_int(opts, "group", client->gid);
+        qdict_put(opts, "group", qint_from_int(client->gid));
    }
    if (client->tcp_syncnt) {
-        qdict_put_int(opts, "tcp-syn-cnt", client->tcp_syncnt);
+        qdict_put(opts, "tcp-syn-cnt",
+                  qint_from_int(client->tcp_syncnt));
    }
    if (client->readahead) {
-        qdict_put_int(opts, "readahead-size", client->readahead);
+        qdict_put(opts, "readahead-size",
+                  qint_from_int(client->readahead));
    }
    if (client->pagecache) {
-        qdict_put_int(opts, "page-cache-size", client->pagecache);
+        qdict_put(opts, "page-cache-size",
+                  qint_from_int(client->pagecache));
    }
    if (client->debug) {
-        qdict_put_int(opts, "debug", client->debug);
+        qdict_put(opts, "debug", qint_from_int(client->debug));
    }

    visit_free(ov);
@@ -890,8 +855,8 @@ static BlockDriver bdrv_nfs = {
    .bdrv_create                    = nfs_file_create,
    .bdrv_reopen_prepare            = nfs_reopen_prepare,

-    .bdrv_co_preadv                 = nfs_co_preadv,
-    .bdrv_co_pwritev                = nfs_co_pwritev,
+    .bdrv_co_readv                  = nfs_co_readv,
+    .bdrv_co_writev                 = nfs_co_writev,
    .bdrv_co_flush_to_disk          = nfs_co_flush,

    .bdrv_detach_aio_context        = nfs_detach_aio_context,
--- a/block/null.c
+++ b/block/null.c
@@ -232,7 +232,7 @@ static void null_refresh_filename(BlockDriverState *bs, QDict *opts)
                 bs->drv->format_name);
    }

-    qdict_put_str(opts, "driver", bs->drv->format_name);
+    qdict_put(opts, "driver", qstring_from_str(bs->drv->format_name));
    bs->full_open_options = opts;
 }

--- a/block/parallels.c
+++ b/block/parallels.c
@@ -114,7 +114,7 @@ static QemuOptsList parallels_runtime_opts = {
            .name = PARALLELS_OPT_PREALLOC_SIZE,
            .type = QEMU_OPT_SIZE,
            .help = "Preallocation size on image expansion",
-            .def_value_str = "128M",
+            .def_value_str = "128MiB",
        },
        {
            .name = PARALLELS_OPT_PREALLOC_MODE,
@@ -192,7 +192,8 @@ static int64_t allocate_clusters(BlockDriverState *bs, int64_t sector_num,
                                 int nb_sectors, int *pnum)
 {
    BDRVParallelsState *s = bs->opaque;
-    int64_t pos, space, idx, to_allocate, i;
+    uint32_t idx, to_allocate, i;
+    int64_t pos, space;

    pos = block_status(s, sector_num, nb_sectors, pnum);
    if (pos > 0) {
@@ -200,19 +201,11 @@ static int64_t allocate_clusters(BlockDriverState *bs, int64_t sector_num,
    }

    idx = sector_num / s->tracks;
+    if (idx >= s->bat_size) {
+        return -EINVAL;
+    }
+
    to_allocate = DIV_ROUND_UP(sector_num + *pnum, s->tracks) - idx;
-
-    /* This function is called only by parallels_co_writev(), which will never
-     * pass a sector_num at or beyond the end of the image (because the block
-     * layer never passes such a sector_num to that function). Therefore, idx
-     * is always below s->bat_size.
-     * block_status() will limit *pnum so that sector_num + *pnum will not
-     * exceed the image end. Therefore, idx + to_allocate cannot exceed
-     * s->bat_size.
-     * Note that s->bat_size is an unsigned int, therefore idx + to_allocate
-     * will always fit into a uint32_t. */
-    assert(idx < s->bat_size && idx + to_allocate <= s->bat_size);
-
    space = to_allocate * s->tracks;
    if (s->data_end + space > bdrv_getlength(bs->file->bs) >> BDRV_SECTOR_BITS) {
        int ret;
@@ -222,9 +215,8 @@ static int64_t allocate_clusters(BlockDriverState *bs, int64_t sector_num,
                                     s->data_end << BDRV_SECTOR_BITS,
                                     space << BDRV_SECTOR_BITS, 0);
        } else {
-            ret = bdrv_truncate(bs->file,
-                                (s->data_end + space) << BDRV_SECTOR_BITS,
-                                NULL);
+            ret = bdrv_truncate(bs->file->bs,
+                                (s->data_end + space) << BDRV_SECTOR_BITS);
        }
        if (ret < 0) {
            return ret;
@@ -457,10 +449,8 @@ static int parallels_check(BlockDriverState *bs, BdrvCheckResult *res,
                size - res->image_end_offset);
        res->leaks += count;
        if (fix & BDRV_FIX_LEAKS) {
-            Error *local_err = NULL;
-            ret = bdrv_truncate(bs->file, res->image_end_offset, &local_err);
+            ret = bdrv_truncate(bs->file->bs, res->image_end_offset);
            if (ret < 0) {
-                error_report_err(local_err);
                res->check_errors++;
                return ret;
            }
@@ -498,8 +488,7 @@ static int parallels_create(const char *filename, QemuOpts *opts, Error **errp)
    }

    file = blk_new_open(filename, NULL, NULL,
-                        BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
-                        &local_err);
+                        BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
    if (file == NULL) {
        error_propagate(errp, local_err);
        return -EIO;
@@ -507,7 +496,7 @@ static int parallels_create(const char *filename, QemuOpts *opts, Error **errp)

    blk_set_allow_write_beyond_eof(file, true);

-    ret = blk_truncate(file, 0, errp);
+    ret = blk_truncate(file, 0);
    if (ret < 0) {
        goto exit;
    }
@@ -592,12 +581,6 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
    Error *local_err = NULL;
    char *buf;

-    bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
-                               false, errp);
-    if (!bs->file) {
-        return -EINVAL;
-    }
-
    ret = bdrv_pread(bs->file, 0, &ph, sizeof(ph));
    if (ret < 0) {
        goto fail;
@@ -697,9 +680,8 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
    if (local_err != NULL) {
        goto fail_options;
    }
-
-    if (!(flags & BDRV_O_RESIZE) || !bdrv_has_zero_init(bs->file->bs) ||
-            bdrv_truncate(bs->file, bdrv_getlength(bs->file->bs), NULL) != 0) {
+    if (!bdrv_has_zero_init(bs->file->bs) ||
+            bdrv_truncate(bs->file->bs, bdrv_getlength(bs->file->bs)) != 0) {
        s->prealloc_mode = PRL_PREALLOC_MODE_FALLOCATE;
    }

@@ -742,7 +724,7 @@ static void parallels_close(BlockDriverState *bs)
    }

    if (bs->open_flags & BDRV_O_RDWR) {
-        bdrv_truncate(bs->file, s->data_end << BDRV_SECTOR_BITS, NULL);
+        bdrv_truncate(bs->file->bs, s->data_end << BDRV_SECTOR_BITS);
    }

    g_free(s->bat_dirty_bmap);
@@ -774,7 +756,6 @@ static BlockDriver bdrv_parallels = {
    .bdrv_probe		= parallels_probe,
    .bdrv_open		= parallels_open,
    .bdrv_close		= parallels_close,
-    .bdrv_child_perm          = bdrv_format_default_perms,
    .bdrv_co_get_block_status = parallels_co_get_block_status,
    .bdrv_has_zero_init       = bdrv_has_zero_init_1,
    .bdrv_co_flush_to_os      = parallels_co_flush_to_os,
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -237,8 +237,8 @@ void bdrv_query_image_info(BlockDriverState *bs,

    size = bdrv_getlength(bs);
    if (size < 0) {
-        error_setg_errno(errp, -size, "Can't get image size '%s'",
-                         bs->exact_filename);
+        error_setg_errno(errp, -size, "Can't get size of device '%s'",
+                         bdrv_get_device_name(bs));
        goto out;
    }

@@ -357,6 +357,10 @@ static void bdrv_query_info(BlockBackend *blk, BlockInfo **p_info,
    qapi_free_BlockInfo(info);
 }

+static BlockStats *bdrv_query_stats(BlockBackend *blk,
+                                    const BlockDriverState *bs,
+                                    bool query_backing);
+
 static void bdrv_query_blk_stats(BlockDeviceStats *ds, BlockBackend *blk)
 {
    BlockAcctStats *stats = blk_get_stats(blk);
@@ -424,33 +428,44 @@ static void bdrv_query_blk_stats(BlockDeviceStats *ds, BlockBackend *blk)
    }
 }

-static BlockStats *bdrv_query_bds_stats(const BlockDriverState *bs,
+static void bdrv_query_bds_stats(BlockStats *s, const BlockDriverState *bs,
                                 bool query_backing)
 {
-    BlockStats *s = NULL;
-
-    s = g_malloc0(sizeof(*s));
-    s->stats = g_malloc0(sizeof(*s->stats));
-
-    if (!bs) {
-        return s;
-    }
-
    if (bdrv_get_node_name(bs)[0]) {
        s->has_node_name = true;
        s->node_name = g_strdup(bdrv_get_node_name(bs));
    }

-    s->stats->wr_highest_offset = stat64_get(&bs->wr_highest_offset);
+    s->stats->wr_highest_offset = bs->wr_highest_offset;

    if (bs->file) {
        s->has_parent = true;
-        s->parent = bdrv_query_bds_stats(bs->file->bs, query_backing);
+        s->parent = bdrv_query_stats(NULL, bs->file->bs, query_backing);
    }

    if (query_backing && bs->backing) {
        s->has_backing = true;
-        s->backing = bdrv_query_bds_stats(bs->backing->bs, query_backing);
+        s->backing = bdrv_query_stats(NULL, bs->backing->bs, query_backing);
+    }
+
+}
+
+static BlockStats *bdrv_query_stats(BlockBackend *blk,
+                                    const BlockDriverState *bs,
+                                    bool query_backing)
+{
+    BlockStats *s;
+
+    s = g_malloc0(sizeof(*s));
+    s->stats = g_malloc0(sizeof(*s->stats));
+
+    if (blk) {
+        s->has_device = true;
+        s->device = g_strdup(blk_name(blk));
+        bdrv_query_blk_stats(s->stats, blk);
+    }
+    if (bs) {
+        bdrv_query_bds_stats(s, bs, query_backing);
    }

    return s;
@@ -479,44 +494,42 @@ BlockInfoList *qmp_query_block(Error **errp)
    return head;
 }

+static bool next_query_bds(BlockBackend **blk, BlockDriverState **bs,
+                           bool query_nodes)
+{
+    if (query_nodes) {
+        *bs = bdrv_next_node(*bs);
+        return !!*bs;
+    }
+
+    *blk = blk_next(*blk);
+    *bs = *blk ? blk_bs(*blk) : NULL;
+
+    return !!*blk;
+}
+
 BlockStatsList *qmp_query_blockstats(bool has_query_nodes,
                                     bool query_nodes,
                                     Error **errp)
 {
    BlockStatsList *head = NULL, **p_next = &head;
-    BlockBackend *blk;
-    BlockDriverState *bs;
+    BlockBackend *blk = NULL;
+    BlockDriverState *bs = NULL;

    /* Just to be safe if query_nodes is not always initialized */
-    if (has_query_nodes && query_nodes) {
-        for (bs = bdrv_next_node(NULL); bs; bs = bdrv_next_node(bs)) {
-            BlockStatsList *info = g_malloc0(sizeof(*info));
-            AioContext *ctx = bdrv_get_aio_context(bs);
+    query_nodes = has_query_nodes && query_nodes;

-            aio_context_acquire(ctx);
-            info->value = bdrv_query_bds_stats(bs, false);
-            aio_context_release(ctx);
+    while (next_query_bds(&blk, &bs, query_nodes)) {
+        BlockStatsList *info = g_malloc0(sizeof(*info));
+        AioContext *ctx = blk ? blk_get_aio_context(blk)
+                              : bdrv_get_aio_context(bs);

-            *p_next = info;
-            p_next = &info->next;
-        }
-    } else {
-        for (blk = blk_next(NULL); blk; blk = blk_next(blk)) {
-            BlockStatsList *info = g_malloc0(sizeof(*info));
-            AioContext *ctx = blk_get_aio_context(blk);
-            BlockStats *s;
+        aio_context_acquire(ctx);
+        info->value = bdrv_query_stats(blk, bs, !query_nodes);
+        aio_context_release(ctx);

-            aio_context_acquire(ctx);
-            s = bdrv_query_bds_stats(blk_bs(blk), true);
-            s->has_device = true;
-            s->device = g_strdup(blk_name(blk));
-            bdrv_query_blk_stats(s->stats, blk);
-            aio_context_release(ctx);
-
-            info->value = s;
-            *p_next = info;
-            p_next = &info->next;
-        }
+        *p_next = info;
+        p_next = &info->next;
    }

    return head;
@@ -682,6 +695,7 @@ void bdrv_image_info_specific_dump(fprintf_function func_fprintf, void *f,

    visit_type_ImageInfoSpecific(v, NULL, &info_spec, &error_abort);
    visit_complete(v, &obj);
+    assert(qobject_type(obj) == QTYPE_QDICT);
    data = qdict_get(qobject_to_qdict(obj), "data");
    dump_qobject(func_fprintf, f, 1, data);
    qobject_decref(obj);
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -32,7 +32,7 @@
 #include <zlib.h>
 #include "qapi/qmp/qerror.h"
 #include "crypto/cipher.h"
-#include "migration/blocker.h"
+#include "migration/migration.h"

 /**************************************************************/
 /* QEMU COW block driver with compression and encryption support */
@@ -104,13 +104,6 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
    unsigned int len, i, shift;
    int ret;
    QCowHeader header;
-    Error *local_err = NULL;
-
-    bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
-                               false, errp);
-    if (!bs->file) {
-        return -EINVAL;
-    }

    ret = bdrv_pread(bs->file, 0, &header, sizeof(header));
    if (ret < 0) {
@@ -259,12 +252,7 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
    error_setg(&s->migration_blocker, "The qcow format used by node '%s' "
               "does not support live migration",
               bdrv_get_device_or_node_name(bs));
-    ret = migrate_add_blocker(s->migration_blocker, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        error_free(s->migration_blocker);
-        goto fail;
-    }
+    migrate_add_blocker(s->migration_blocker);

    qemu_co_mutex_init(&s->lock);
    return 0;
@@ -473,7 +461,7 @@ static uint64_t get_cluster_offset(BlockDriverState *bs,
                /* round to cluster size */
                cluster_offset = (cluster_offset + s->cluster_size - 1) &
                    ~(s->cluster_size - 1);
-                bdrv_truncate(bs->file, cluster_offset + s->cluster_size, NULL);
+                bdrv_truncate(bs->file->bs, cluster_offset + s->cluster_size);
                /* if encrypted, we must initialize the cluster
                   content which won't be written */
                if (bs->encrypted &&
@@ -823,8 +811,7 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp)
    }

    qcow_blk = blk_new_open(filename, NULL, NULL,
-                            BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
-                            &local_err);
+                            BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
    if (qcow_blk == NULL) {
        error_propagate(errp, local_err);
        ret = -EIO;
@@ -833,7 +820,7 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp)

    blk_set_allow_write_beyond_eof(qcow_blk, true);

-    ret = blk_truncate(qcow_blk, 0, errp);
+    ret = blk_truncate(qcow_blk, 0);
    if (ret < 0) {
        goto exit;
    }
@@ -852,7 +839,6 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp)
            header_size += backing_filename_len;
        } else {
            /* special backing file for vvfat */
-            g_free(backing_file);
            backing_file = NULL;
        }
        header.cluster_bits = 9; /* 512 byte cluster to avoid copying
@@ -917,7 +903,7 @@ static int qcow_make_empty(BlockDriverState *bs)
    if (bdrv_pwrite_sync(bs->file, s->l1_table_offset, s->l1_table,
            l1_length) < 0)
        return -1;
-    ret = bdrv_truncate(bs->file, s->l1_table_offset + l1_length, NULL);
+    ret = bdrv_truncate(bs->file->bs, s->l1_table_offset + l1_length);
    if (ret < 0)
        return ret;

@@ -1054,7 +1040,6 @@ static BlockDriver bdrv_qcow = {
    .bdrv_probe		= qcow_probe,
    .bdrv_open		= qcow_open,
    .bdrv_close		= qcow_close,
-    .bdrv_child_perm        = bdrv_format_default_perms,
    .bdrv_reopen_prepare    = qcow_reopen_prepare,
    .bdrv_create            = qcow_create,
    .bdrv_has_zero_init     = bdrv_has_zero_init_1,
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -309,19 +309,14 @@ static int count_contiguous_clusters(int nb_clusters, int cluster_size,
        uint64_t *l2_table, uint64_t stop_flags)
 {
    int i;
-    QCow2ClusterType first_cluster_type;
    uint64_t mask = stop_flags | L2E_OFFSET_MASK | QCOW_OFLAG_COMPRESSED;
    uint64_t first_entry = be64_to_cpu(l2_table[0]);
    uint64_t offset = first_entry & mask;

-    if (!offset) {
+    if (!offset)
        return 0;
-    }

-    /* must be allocated */
-    first_cluster_type = qcow2_get_cluster_type(first_entry);
-    assert(first_cluster_type == QCOW2_CLUSTER_NORMAL ||
-           first_cluster_type == QCOW2_CLUSTER_ZERO_ALLOC);
+    assert(qcow2_get_cluster_type(first_entry) == QCOW2_CLUSTER_NORMAL);

    for (i = 0; i < nb_clusters; i++) {
        uint64_t l2_entry = be64_to_cpu(l2_table[i]) & mask;
@@ -333,21 +328,14 @@ static int count_contiguous_clusters(int nb_clusters, int cluster_size,
 	return i;
 }

-/*
- * Checks how many consecutive unallocated clusters in a given L2
- * table have the same cluster type.
- */
-static int count_contiguous_clusters_unallocated(int nb_clusters,
-                                                 uint64_t *l2_table,
-                                                 QCow2ClusterType wanted_type)
+static int count_contiguous_clusters_by_type(int nb_clusters,
+                                             uint64_t *l2_table,
+                                             int wanted_type)
 {
    int i;

-    assert(wanted_type == QCOW2_CLUSTER_ZERO_PLAIN ||
-           wanted_type == QCOW2_CLUSTER_UNALLOCATED);
    for (i = 0; i < nb_clusters; i++) {
-        uint64_t entry = be64_to_cpu(l2_table[i]);
-        QCow2ClusterType type = qcow2_get_cluster_type(entry);
+        int type = qcow2_get_cluster_type(be64_to_cpu(l2_table[i]));

        if (type != wanted_type) {
            break;
@@ -499,7 +487,6 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
    int l1_bits, c;
    unsigned int offset_in_cluster;
    uint64_t bytes_available, bytes_needed, nb_clusters;
-    QCow2ClusterType type;
    int ret;

    offset_in_cluster = offset_into_cluster(s, offset);
@@ -522,13 +509,13 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,

    l1_index = offset >> l1_bits;
    if (l1_index >= s->l1_size) {
-        type = QCOW2_CLUSTER_UNALLOCATED;
+        ret = QCOW2_CLUSTER_UNALLOCATED;
        goto out;
    }

    l2_offset = s->l1_table[l1_index] & L1E_OFFSET_MASK;
    if (!l2_offset) {
-        type = QCOW2_CLUSTER_UNALLOCATED;
+        ret = QCOW2_CLUSTER_UNALLOCATED;
        goto out;
    }

@@ -557,37 +544,38 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
     * true */
    assert(nb_clusters <= INT_MAX);

-    type = qcow2_get_cluster_type(*cluster_offset);
-    if (s->qcow_version < 3 && (type == QCOW2_CLUSTER_ZERO_PLAIN ||
-                                type == QCOW2_CLUSTER_ZERO_ALLOC)) {
-        qcow2_signal_corruption(bs, true, -1, -1, "Zero cluster entry found"
-                                " in pre-v3 image (L2 offset: %#" PRIx64
-                                ", L2 index: %#x)", l2_offset, l2_index);
-        ret = -EIO;
-        goto fail;
-    }
-    switch (type) {
+    ret = qcow2_get_cluster_type(*cluster_offset);
+    switch (ret) {
    case QCOW2_CLUSTER_COMPRESSED:
        /* Compressed clusters can only be processed one by one */
        c = 1;
        *cluster_offset &= L2E_COMPRESSED_OFFSET_SIZE_MASK;
        break;
-    case QCOW2_CLUSTER_ZERO_PLAIN:
-    case QCOW2_CLUSTER_UNALLOCATED:
-        /* how many empty clusters ? */
-        c = count_contiguous_clusters_unallocated(nb_clusters,
-                                                  &l2_table[l2_index], type);
+    case QCOW2_CLUSTER_ZERO:
+        if (s->qcow_version < 3) {
+            qcow2_signal_corruption(bs, true, -1, -1, "Zero cluster entry found"
+                                    " in pre-v3 image (L2 offset: %#" PRIx64
+                                    ", L2 index: %#x)", l2_offset, l2_index);
+            ret = -EIO;
+            goto fail;
+        }
+        c = count_contiguous_clusters_by_type(nb_clusters, &l2_table[l2_index],
+                                              QCOW2_CLUSTER_ZERO);
+        *cluster_offset = 0;
+        break;
+    case QCOW2_CLUSTER_UNALLOCATED:
+        /* how many empty clusters ? */
+        c = count_contiguous_clusters_by_type(nb_clusters, &l2_table[l2_index],
+                                              QCOW2_CLUSTER_UNALLOCATED);
        *cluster_offset = 0;
        break;
-    case QCOW2_CLUSTER_ZERO_ALLOC:
    case QCOW2_CLUSTER_NORMAL:
        /* how many allocated clusters ? */
        c = count_contiguous_clusters(nb_clusters, s->cluster_size,
-                                      &l2_table[l2_index], QCOW_OFLAG_ZERO);
+                &l2_table[l2_index], QCOW_OFLAG_ZERO);
        *cluster_offset &= L2E_OFFSET_MASK;
        if (offset_into_cluster(s, *cluster_offset)) {
-            qcow2_signal_corruption(bs, true, -1, -1,
-                                    "Cluster allocation offset %#"
+            qcow2_signal_corruption(bs, true, -1, -1, "Data cluster offset %#"
                                    PRIx64 " unaligned (L2 offset: %#" PRIx64
                                    ", L2 index: %#x)", *cluster_offset,
                                    l2_offset, l2_index);
@@ -614,7 +602,7 @@ out:
    assert(bytes_available - offset_in_cluster <= UINT_MAX);
    *bytes = bytes_available - offset_in_cluster;

-    return type;
+    return ret;

 fail:
    qcow2_cache_put(bs, s->l2_table_cache, (void **)&l2_table);
@@ -847,7 +835,7 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
     * Don't discard clusters that reach a refcount of 0 (e.g. compressed
     * clusters), the next write will reuse them anyway.
     */
-    if (!m->keep_old_clusters && j != 0) {
+    if (j != 0) {
        for (i = 0; i < j; i++) {
            qcow2_free_any_clusters(bs, be64_to_cpu(old_cluster[i]), 1,
                                    QCOW2_DISCARD_NEVER);
@@ -872,7 +860,7 @@ static int count_cow_clusters(BDRVQcow2State *s, int nb_clusters,

    for (i = 0; i < nb_clusters; i++) {
        uint64_t l2_entry = be64_to_cpu(l2_table[l2_index + i]);
-        QCow2ClusterType cluster_type = qcow2_get_cluster_type(l2_entry);
+        int cluster_type = qcow2_get_cluster_type(l2_entry);

        switch(cluster_type) {
        case QCOW2_CLUSTER_NORMAL:
@@ -882,8 +870,7 @@ static int count_cow_clusters(BDRVQcow2State *s, int nb_clusters,
            break;
        case QCOW2_CLUSTER_UNALLOCATED:
        case QCOW2_CLUSTER_COMPRESSED:
-        case QCOW2_CLUSTER_ZERO_PLAIN:
-        case QCOW2_CLUSTER_ZERO_ALLOC:
+        case QCOW2_CLUSTER_ZERO:
            break;
        default:
            abort();
@@ -945,7 +932,9 @@ static int handle_dependencies(BlockDriverState *bs, uint64_t guest_offset,
            if (bytes == 0) {
                /* Wait for the dependency to complete. We need to recheck
                 * the free/allocated clusters when we continue. */
-                qemu_co_queue_wait(&old_alloc->dependent_requests, &s->lock);
+                qemu_co_mutex_unlock(&s->lock);
+                qemu_co_queue_wait(&old_alloc->dependent_requests);
+                qemu_co_mutex_lock(&s->lock);
                return -EAGAIN;
            }
        }
@@ -1145,9 +1134,8 @@ static int handle_alloc(BlockDriverState *bs, uint64_t guest_offset,
    uint64_t entry;
    uint64_t nb_clusters;
    int ret;
-    bool keep_old_clusters = false;

-    uint64_t alloc_cluster_offset = 0;
+    uint64_t alloc_cluster_offset;

    trace_qcow2_handle_alloc(qemu_coroutine_self(), guest_offset, *host_offset,
                             *bytes);
@@ -1184,54 +1172,31 @@ static int handle_alloc(BlockDriverState *bs, uint64_t guest_offset,
     * wrong with our code. */
    assert(nb_clusters > 0);

-    if (qcow2_get_cluster_type(entry) == QCOW2_CLUSTER_ZERO_ALLOC &&
-        (entry & QCOW_OFLAG_COPIED) &&
-        (!*host_offset ||
-         start_of_cluster(s, *host_offset) == (entry & L2E_OFFSET_MASK)))
-    {
-        /* Try to reuse preallocated zero clusters; contiguous normal clusters
-         * would be fine, too, but count_cow_clusters() above has limited
-         * nb_clusters already to a range of COW clusters */
-        int preallocated_nb_clusters =
-            count_contiguous_clusters(nb_clusters, s->cluster_size,
-                                      &l2_table[l2_index], QCOW_OFLAG_COPIED);
-        assert(preallocated_nb_clusters > 0);
-
-        nb_clusters = preallocated_nb_clusters;
-        alloc_cluster_offset = entry & L2E_OFFSET_MASK;
-
-        /* We want to reuse these clusters, so qcow2_alloc_cluster_link_l2()
-         * should not free them. */
-        keep_old_clusters = true;
-    }
-
    qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table);

+    /* Allocate, if necessary at a given offset in the image file */
+    alloc_cluster_offset = start_of_cluster(s, *host_offset);
+    ret = do_alloc_cluster_offset(bs, guest_offset, &alloc_cluster_offset,
+                                  &nb_clusters);
+    if (ret < 0) {
+        goto fail;
+    }
+
+    /* Can't extend contiguous allocation */
+    if (nb_clusters == 0) {
+        *bytes = 0;
+        return 0;
+    }
+
+    /* !*host_offset would overwrite the image header and is reserved for "no
+     * host offset preferred". If 0 was a valid host offset, it'd trigger the
+     * following overlap check; do that now to avoid having an invalid value in
+     * *host_offset. */
    if (!alloc_cluster_offset) {
-        /* Allocate, if necessary at a given offset in the image file */
-        alloc_cluster_offset = start_of_cluster(s, *host_offset);
-        ret = do_alloc_cluster_offset(bs, guest_offset, &alloc_cluster_offset,
-                                      &nb_clusters);
-        if (ret < 0) {
-            goto fail;
-        }
-
-        /* Can't extend contiguous allocation */
-        if (nb_clusters == 0) {
-            *bytes = 0;
-            return 0;
-        }
-
-        /* !*host_offset would overwrite the image header and is reserved for
-         * "no host offset preferred". If 0 was a valid host offset, it'd
-         * trigger the following overlap check; do that now to avoid having an
-         * invalid value in *host_offset. */
-        if (!alloc_cluster_offset) {
-            ret = qcow2_pre_write_overlap_check(bs, 0, alloc_cluster_offset,
-                                                nb_clusters * s->cluster_size);
-            assert(ret < 0);
-            goto fail;
-        }
+        ret = qcow2_pre_write_overlap_check(bs, 0, alloc_cluster_offset,
+                                            nb_clusters * s->cluster_size);
+        assert(ret < 0);
+        goto fail;
    }

    /*
@@ -1262,8 +1227,6 @@ static int handle_alloc(BlockDriverState *bs, uint64_t guest_offset,
        .offset         = start_of_cluster(s, guest_offset),
        .nb_clusters    = nb_clusters,

-        .keep_old_clusters  = keep_old_clusters,
-
        .cow_start = {
            .offset     = 0,
            .nb_bytes   = offset_into_cluster(s, guest_offset),
@@ -1511,25 +1474,24 @@ static int discard_single_l2(BlockDriverState *bs, uint64_t offset,
         * but rather fall through to the backing file.
         */
        switch (qcow2_get_cluster_type(old_l2_entry)) {
-        case QCOW2_CLUSTER_UNALLOCATED:
-            if (full_discard || !bs->backing) {
-                continue;
-            }
-            break;
+            case QCOW2_CLUSTER_UNALLOCATED:
+                if (full_discard || !bs->backing) {
+                    continue;
+                }
+                break;

-        case QCOW2_CLUSTER_ZERO_PLAIN:
-            if (!full_discard) {
-                continue;
-            }
-            break;
+            case QCOW2_CLUSTER_ZERO:
+                if (!full_discard) {
+                    continue;
+                }
+                break;

-        case QCOW2_CLUSTER_ZERO_ALLOC:
-        case QCOW2_CLUSTER_NORMAL:
-        case QCOW2_CLUSTER_COMPRESSED:
-            break;
+            case QCOW2_CLUSTER_NORMAL:
+            case QCOW2_CLUSTER_COMPRESSED:
+                break;

-        default:
-            abort();
+            default:
+                abort();
        }

        /* First remove L2 entries */
@@ -1549,36 +1511,37 @@ static int discard_single_l2(BlockDriverState *bs, uint64_t offset,
    return nb_clusters;
 }

-int qcow2_cluster_discard(BlockDriverState *bs, uint64_t offset,
-                          uint64_t bytes, enum qcow2_discard_type type,
-                          bool full_discard)
+int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset,
+    int nb_sectors, enum qcow2_discard_type type, bool full_discard)
 {
    BDRVQcow2State *s = bs->opaque;
-    uint64_t end_offset = offset + bytes;
+    uint64_t end_offset;
    uint64_t nb_clusters;
-    int64_t cleared;
    int ret;

-    /* Caller must pass aligned values, except at image end */
-    assert(QEMU_IS_ALIGNED(offset, s->cluster_size));
-    assert(QEMU_IS_ALIGNED(end_offset, s->cluster_size) ||
-           end_offset == bs->total_sectors << BDRV_SECTOR_BITS);
+    end_offset = offset + (nb_sectors << BDRV_SECTOR_BITS);

-    nb_clusters = size_to_clusters(s, bytes);
+    /* Round start up and end down */
+    offset = align_offset(offset, s->cluster_size);
+    end_offset = start_of_cluster(s, end_offset);
+
+    if (offset > end_offset) {
+        return 0;
+    }
+
+    nb_clusters = size_to_clusters(s, end_offset - offset);

    s->cache_discards = true;

    /* Each L2 table is handled by its own loop iteration */
    while (nb_clusters > 0) {
-        cleared = discard_single_l2(bs, offset, nb_clusters, type,
-                                    full_discard);
-        if (cleared < 0) {
-            ret = cleared;
+        ret = discard_single_l2(bs, offset, nb_clusters, type, full_discard);
+        if (ret < 0) {
            goto fail;
        }

-        nb_clusters -= cleared;
-        offset += (cleared * s->cluster_size);
+        nb_clusters -= ret;
+        offset += (ret * s->cluster_size);
    }

    ret = 0;
@@ -1602,7 +1565,6 @@ static int zero_single_l2(BlockDriverState *bs, uint64_t offset,
    int l2_index;
    int ret;
    int i;
-    bool unmap = !!(flags & BDRV_REQ_MAY_UNMAP);

    ret = get_cluster_table(bs, offset, &l2_table, &l2_index);
    if (ret < 0) {
@@ -1615,22 +1577,12 @@ static int zero_single_l2(BlockDriverState *bs, uint64_t offset,

    for (i = 0; i < nb_clusters; i++) {
        uint64_t old_offset;
-        QCow2ClusterType cluster_type;

        old_offset = be64_to_cpu(l2_table[l2_index + i]);

-        /*
-         * Minimize L2 changes if the cluster already reads back as
-         * zeroes with correct allocation.
-         */
-        cluster_type = qcow2_get_cluster_type(old_offset);
-        if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN ||
-            (cluster_type == QCOW2_CLUSTER_ZERO_ALLOC && !unmap)) {
-            continue;
-        }
-
+        /* Update L2 entries */
        qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table);
-        if (cluster_type == QCOW2_CLUSTER_COMPRESSED || unmap) {
+        if (old_offset & QCOW_OFLAG_COMPRESSED || flags & BDRV_REQ_MAY_UNMAP) {
            l2_table[l2_index + i] = cpu_to_be64(QCOW_OFLAG_ZERO);
            qcow2_free_any_clusters(bs, old_offset, 1, QCOW2_DISCARD_REQUEST);
        } else {
@@ -1643,39 +1595,31 @@ static int zero_single_l2(BlockDriverState *bs, uint64_t offset,
    return nb_clusters;
 }

-int qcow2_cluster_zeroize(BlockDriverState *bs, uint64_t offset,
-                          uint64_t bytes, int flags)
+int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors,
+                        int flags)
 {
    BDRVQcow2State *s = bs->opaque;
-    uint64_t end_offset = offset + bytes;
    uint64_t nb_clusters;
-    int64_t cleared;
    int ret;

-    /* Caller must pass aligned values, except at image end */
-    assert(QEMU_IS_ALIGNED(offset, s->cluster_size));
-    assert(QEMU_IS_ALIGNED(end_offset, s->cluster_size) ||
-           end_offset == bs->total_sectors << BDRV_SECTOR_BITS);
-
    /* The zero flag is only supported by version 3 and newer */
    if (s->qcow_version < 3) {
        return -ENOTSUP;
    }

    /* Each L2 table is handled by its own loop iteration */
-    nb_clusters = size_to_clusters(s, bytes);
+    nb_clusters = size_to_clusters(s, nb_sectors << BDRV_SECTOR_BITS);

    s->cache_discards = true;

    while (nb_clusters > 0) {
-        cleared = zero_single_l2(bs, offset, nb_clusters, flags);
-        if (cleared < 0) {
-            ret = cleared;
+        ret = zero_single_l2(bs, offset, nb_clusters, flags);
+        if (ret < 0) {
            goto fail;
        }

-        nb_clusters -= cleared;
-        offset += (cleared * s->cluster_size);
+        nb_clusters -= ret;
+        offset += (ret * s->cluster_size);
    }

    ret = 0;
@@ -1759,14 +1703,14 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
        for (j = 0; j < s->l2_size; j++) {
            uint64_t l2_entry = be64_to_cpu(l2_table[j]);
            int64_t offset = l2_entry & L2E_OFFSET_MASK;
-            QCow2ClusterType cluster_type = qcow2_get_cluster_type(l2_entry);
+            int cluster_type = qcow2_get_cluster_type(l2_entry);
+            bool preallocated = offset != 0;

-            if (cluster_type != QCOW2_CLUSTER_ZERO_PLAIN &&
-                cluster_type != QCOW2_CLUSTER_ZERO_ALLOC) {
+            if (cluster_type != QCOW2_CLUSTER_ZERO) {
                continue;
            }

-            if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN) {
+            if (!preallocated) {
                if (!bs->backing) {
                    /* not backed; therefore we can simply deallocate the
                     * cluster */
@@ -1797,12 +1741,11 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
            }

            if (offset_into_cluster(s, offset)) {
-                qcow2_signal_corruption(bs, true, -1, -1,
-                                        "Cluster allocation offset "
+                qcow2_signal_corruption(bs, true, -1, -1, "Data cluster offset "
                                        "%#" PRIx64 " unaligned (L2 offset: %#"
                                        PRIx64 ", L2 index: %#x)", offset,
                                        l2_offset, j);
-                if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN) {
+                if (!preallocated) {
                    qcow2_free_clusters(bs, offset, s->cluster_size,
                                        QCOW2_DISCARD_ALWAYS);
                }
@@ -1812,7 +1755,7 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,

            ret = qcow2_pre_write_overlap_check(bs, 0, offset, s->cluster_size);
            if (ret < 0) {
-                if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN) {
+                if (!preallocated) {
                    qcow2_free_clusters(bs, offset, s->cluster_size,
                                        QCOW2_DISCARD_ALWAYS);
                }
@@ -1821,7 +1764,7 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,

            ret = bdrv_pwrite_zeroes(bs->file, offset, s->cluster_size, 0);
            if (ret < 0) {
-                if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN) {
+                if (!preallocated) {
                    qcow2_free_clusters(bs, offset, s->cluster_size,
                                        QCOW2_DISCARD_ALWAYS);
                }
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -83,16 +83,6 @@ static Qcow2SetRefcountFunc *const set_refcount_funcs[] = {
 /*********************************************************/
 /* refcount handling */

-static void update_max_refcount_table_index(BDRVQcow2State *s)
-{
-    unsigned i = s->refcount_table_size - 1;
-    while (i > 0 && (s->refcount_table[i] & REFT_OFFSET_MASK) == 0) {
-        i--;
-    }
-    /* Set s->max_refcount_table_index to the index of the last used entry */
-    s->max_refcount_table_index = i;
-}
-
 int qcow2_refcount_init(BlockDriverState *bs)
 {
    BDRVQcow2State *s = bs->opaque;
@@ -121,7 +111,6 @@ int qcow2_refcount_init(BlockDriverState *bs)
        }
        for(i = 0; i < s->refcount_table_size; i++)
            be64_to_cpus(&s->refcount_table[i]);
-        update_max_refcount_table_index(s);
    }
    return 0;
 fail:
@@ -450,10 +439,6 @@ static int alloc_refcount_block(BlockDriverState *bs,
        }

        s->refcount_table[refcount_table_index] = new_block;
-        /* If there's a hole in s->refcount_table then it can happen
-         * that refcount_table_index < s->max_refcount_table_index */
-        s->max_refcount_table_index =
-            MAX(s->max_refcount_table_index, refcount_table_index);

        /* The new refcount block may be where the caller intended to put its
         * data, so let it restart the search. */
@@ -595,7 +580,6 @@ static int alloc_refcount_block(BlockDriverState *bs,
    s->refcount_table = new_table;
    s->refcount_table_size = table_size;
    s->refcount_table_offset = table_offset;
-    update_max_refcount_table_index(s);

    /* Free old table. */
    qcow2_free_clusters(bs, old_table_offset, old_table_size * sizeof(uint64_t),
@@ -1028,17 +1012,18 @@ void qcow2_free_any_clusters(BlockDriverState *bs, uint64_t l2_entry,
        }
        break;
    case QCOW2_CLUSTER_NORMAL:
-    case QCOW2_CLUSTER_ZERO_ALLOC:
-        if (offset_into_cluster(s, l2_entry & L2E_OFFSET_MASK)) {
-            qcow2_signal_corruption(bs, false, -1, -1,
-                                    "Cannot free unaligned cluster %#llx",
-                                    l2_entry & L2E_OFFSET_MASK);
-        } else {
-            qcow2_free_clusters(bs, l2_entry & L2E_OFFSET_MASK,
-                                nb_clusters << s->cluster_bits, type);
+    case QCOW2_CLUSTER_ZERO:
+        if (l2_entry & L2E_OFFSET_MASK) {
+            if (offset_into_cluster(s, l2_entry & L2E_OFFSET_MASK)) {
+                qcow2_signal_corruption(bs, false, -1, -1,
+                                        "Cannot free unaligned cluster %#llx",
+                                        l2_entry & L2E_OFFSET_MASK);
+            } else {
+                qcow2_free_clusters(bs, l2_entry & L2E_OFFSET_MASK,
+                                    nb_clusters << s->cluster_bits, type);
+            }
        }
        break;
-    case QCOW2_CLUSTER_ZERO_PLAIN:
    case QCOW2_CLUSTER_UNALLOCATED:
        break;
    default:
@@ -1058,9 +1043,9 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
    int64_t l1_table_offset, int l1_size, int addend)
 {
    BDRVQcow2State *s = bs->opaque;
-    uint64_t *l1_table, *l2_table, l2_offset, entry, l1_size2, refcount;
+    uint64_t *l1_table, *l2_table, l2_offset, offset, l1_size2, refcount;
    bool l1_allocated = false;
-    int64_t old_entry, old_l2_offset;
+    int64_t old_offset, old_l2_offset;
    int i, j, l1_modified = 0, nb_csectors;
    int ret;

@@ -1088,16 +1073,15 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
            goto fail;
        }

-        for (i = 0; i < l1_size; i++) {
+        for(i = 0;i < l1_size; i++)
            be64_to_cpus(&l1_table[i]);
-        }
    } else {
        assert(l1_size == s->l1_size);
        l1_table = s->l1_table;
        l1_allocated = false;
    }

-    for (i = 0; i < l1_size; i++) {
+    for(i = 0; i < l1_size; i++) {
        l2_offset = l1_table[i];
        if (l2_offset) {
            old_l2_offset = l2_offset;
@@ -1117,79 +1101,81 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
                goto fail;
            }

-            for (j = 0; j < s->l2_size; j++) {
+            for(j = 0; j < s->l2_size; j++) {
                uint64_t cluster_index;
-                uint64_t offset;

-                entry = be64_to_cpu(l2_table[j]);
-                old_entry = entry;
-                entry &= ~QCOW_OFLAG_COPIED;
-                offset = entry & L2E_OFFSET_MASK;
+                offset = be64_to_cpu(l2_table[j]);
+                old_offset = offset;
+                offset &= ~QCOW_OFLAG_COPIED;

-                switch (qcow2_get_cluster_type(entry)) {
-                case QCOW2_CLUSTER_COMPRESSED:
-                    nb_csectors = ((entry >> s->csize_shift) &
-                                   s->csize_mask) + 1;
-                    if (addend != 0) {
-                        ret = update_refcount(bs,
-                                (entry & s->cluster_offset_mask) & ~511,
+                switch (qcow2_get_cluster_type(offset)) {
+                    case QCOW2_CLUSTER_COMPRESSED:
+                        nb_csectors = ((offset >> s->csize_shift) &
+                                       s->csize_mask) + 1;
+                        if (addend != 0) {
+                            ret = update_refcount(bs,
+                                (offset & s->cluster_offset_mask) & ~511,
                                nb_csectors * 512, abs(addend), addend < 0,
                                QCOW2_DISCARD_SNAPSHOT);
-                        if (ret < 0) {
+                            if (ret < 0) {
+                                goto fail;
+                            }
+                        }
+                        /* compressed clusters are never modified */
+                        refcount = 2;
+                        break;
+
+                    case QCOW2_CLUSTER_NORMAL:
+                    case QCOW2_CLUSTER_ZERO:
+                        if (offset_into_cluster(s, offset & L2E_OFFSET_MASK)) {
+                            qcow2_signal_corruption(bs, true, -1, -1, "Data "
+                                                    "cluster offset %#llx "
+                                                    "unaligned (L2 offset: %#"
+                                                    PRIx64 ", L2 index: %#x)",
+                                                    offset & L2E_OFFSET_MASK,
+                                                    l2_offset, j);
+                            ret = -EIO;
                            goto fail;
                        }
-                    }
-                    /* compressed clusters are never modified */
-                    refcount = 2;
-                    break;

-                case QCOW2_CLUSTER_NORMAL:
-                case QCOW2_CLUSTER_ZERO_ALLOC:
-                    if (offset_into_cluster(s, offset)) {
-                        qcow2_signal_corruption(bs, true, -1, -1, "Cluster "
-                                                "allocation offset %#" PRIx64
-                                                " unaligned (L2 offset: %#"
-                                                PRIx64 ", L2 index: %#x)",
-                                                offset, l2_offset, j);
-                        ret = -EIO;
-                        goto fail;
-                    }
-
-                    cluster_index = offset >> s->cluster_bits;
-                    assert(cluster_index);
-                    if (addend != 0) {
-                        ret = qcow2_update_cluster_refcount(bs,
+                        cluster_index = (offset & L2E_OFFSET_MASK) >> s->cluster_bits;
+                        if (!cluster_index) {
+                            /* unallocated */
+                            refcount = 0;
+                            break;
+                        }
+                        if (addend != 0) {
+                            ret = qcow2_update_cluster_refcount(bs,
                                    cluster_index, abs(addend), addend < 0,
                                    QCOW2_DISCARD_SNAPSHOT);
+                            if (ret < 0) {
+                                goto fail;
+                            }
+                        }
+
+                        ret = qcow2_get_refcount(bs, cluster_index, &refcount);
                        if (ret < 0) {
                            goto fail;
                        }
-                    }
+                        break;

-                    ret = qcow2_get_refcount(bs, cluster_index, &refcount);
-                    if (ret < 0) {
-                        goto fail;
-                    }
-                    break;
+                    case QCOW2_CLUSTER_UNALLOCATED:
+                        refcount = 0;
+                        break;

-                case QCOW2_CLUSTER_ZERO_PLAIN:
-                case QCOW2_CLUSTER_UNALLOCATED:
-                    refcount = 0;
-                    break;
-
-                default:
-                    abort();
+                    default:
+                        abort();
                }

                if (refcount == 1) {
-                    entry |= QCOW_OFLAG_COPIED;
+                    offset |= QCOW_OFLAG_COPIED;
                }
-                if (entry != old_entry) {
+                if (offset != old_offset) {
                    if (addend > 0) {
                        qcow2_cache_set_dependency(bs, s->l2_table_cache,
                            s->refcount_block_cache);
                    }
-                    l2_table[j] = cpu_to_be64(entry);
+                    l2_table[j] = cpu_to_be64(offset);
                    qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache,
                                                 l2_table);
                }
@@ -1439,7 +1425,12 @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
            }
            break;

-        case QCOW2_CLUSTER_ZERO_ALLOC:
+        case QCOW2_CLUSTER_ZERO:
+            if ((l2_entry & L2E_OFFSET_MASK) == 0) {
+                break;
+            }
+            /* fall through */
+
        case QCOW2_CLUSTER_NORMAL:
        {
            uint64_t offset = l2_entry & L2E_OFFSET_MASK;
@@ -1469,7 +1460,6 @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
            break;
        }

-        case QCOW2_CLUSTER_ZERO_PLAIN:
        case QCOW2_CLUSTER_UNALLOCATED:
            break;

@@ -1632,10 +1622,10 @@ static int check_oflag_copied(BlockDriverState *bs, BdrvCheckResult *res,
        for (j = 0; j < s->l2_size; j++) {
            uint64_t l2_entry = be64_to_cpu(l2_table[j]);
            uint64_t data_offset = l2_entry & L2E_OFFSET_MASK;
-            QCow2ClusterType cluster_type = qcow2_get_cluster_type(l2_entry);
+            int cluster_type = qcow2_get_cluster_type(l2_entry);

-            if (cluster_type == QCOW2_CLUSTER_NORMAL ||
-                cluster_type == QCOW2_CLUSTER_ZERO_ALLOC) {
+            if ((cluster_type == QCOW2_CLUSTER_NORMAL) ||
+                ((cluster_type == QCOW2_CLUSTER_ZERO) && (data_offset != 0))) {
                ret = qcow2_get_refcount(bs,
                                         data_offset >> s->cluster_bits,
                                         &refcount);
@@ -1722,17 +1712,14 @@ static int check_refblocks(BlockDriverState *bs, BdrvCheckResult *res,

            if (fix & BDRV_FIX_ERRORS) {
                int64_t new_nb_clusters;
-                Error *local_err = NULL;

                if (offset > INT64_MAX - s->cluster_size) {
                    ret = -EINVAL;
                    goto resize_fail;
                }

-                ret = bdrv_truncate(bs->file, offset + s->cluster_size,
-                                    &local_err);
+                ret = bdrv_truncate(bs->file->bs, offset + s->cluster_size);
                if (ret < 0) {
-                    error_report_err(local_err);
                    goto resize_fail;
                }
                size = bdrv_getlength(bs->file->bs);
@@ -2184,7 +2171,6 @@ write_refblocks:
    s->refcount_table = on_disk_reftable;
    s->refcount_table_offset = reftable_offset;
    s->refcount_table_size = reftable_size;
-    update_max_refcount_table_index(s);

    return 0;

@@ -2397,11 +2383,7 @@ int qcow2_check_metadata_overlap(BlockDriverState *bs, int ign, int64_t offset,
    }

    if ((chk & QCOW2_OL_REFCOUNT_BLOCK) && s->refcount_table) {
-        unsigned last_entry = s->max_refcount_table_index;
-        assert(last_entry < s->refcount_table_size);
-        assert(last_entry + 1 == s->refcount_table_size ||
-               (s->refcount_table[last_entry + 1] & REFT_OFFSET_MASK) == 0);
-        for (i = 0; i <= last_entry; i++) {
+        for (i = 0; i < s->refcount_table_size; i++) {
            if ((s->refcount_table[i] & REFT_OFFSET_MASK) &&
                overlaps_with(s->refcount_table[i] & REFT_OFFSET_MASK,
                s->cluster_size)) {
@@ -2889,7 +2871,6 @@ int qcow2_change_refcount_order(BlockDriverState *bs, int refcount_order,
    /* Now update the rest of the in-memory information */
    old_reftable = s->refcount_table;
    s->refcount_table = new_reftable;
-    update_max_refcount_table_index(s);

    s->refcount_bits = 1 << refcount_order;
    s->refcount_max = UINT64_C(1) << (s->refcount_bits - 1);
--- a/block/qcow2-snapshot.c
+++ b/block/qcow2-snapshot.c
@@ -440,9 +440,10 @@ int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)

    /* The VM state isn't needed any more in the active L1 table; in fact, it
     * hurts by causing expensive COW for the next snapshot. */
-    qcow2_cluster_discard(bs, qcow2_vm_state_offset(s),
-                          align_offset(sn->vm_state_size, s->cluster_size),
-                          QCOW2_DISCARD_NEVER, false);
+    qcow2_discard_clusters(bs, qcow2_vm_state_offset(s),
+                           align_offset(sn->vm_state_size, s->cluster_size)
+                                >> BDRV_SECTOR_BITS,
+                           QCOW2_DISCARD_NEVER, false);

 #ifdef DEBUG_ALLOC
    {
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -814,8 +814,8 @@ static int qcow2_update_options(BlockDriverState *bs, QDict *options,
    return ret;
 }

-static int qcow2_do_open(BlockDriverState *bs, QDict *options, int flags,
-                         Error **errp)
+static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
+                      Error **errp)
 {
    BDRVQcow2State *s = bs->opaque;
    unsigned int len, i;
@@ -1205,18 +1205,6 @@ static int qcow2_do_open(BlockDriverState *bs, QDict *options, int flags,
    return ret;
 }

-static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
-                      Error **errp)
-{
-    bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
-                               false, errp);
-    if (!bs->file) {
-        return -EINVAL;
-    }
-
-    return qcow2_do_open(bs, options, flags, errp);
-}
-
 static void qcow2_refresh_limits(BlockDriverState *bs, Error **errp)
 {
    BDRVQcow2State *s = bs->opaque;
@@ -1385,7 +1373,7 @@ static int64_t coroutine_fn qcow2_co_get_block_status(BlockDriverState *bs,
        *file = bs->file->bs;
        status |= BDRV_BLOCK_OFFSET_VALID | cluster_offset;
    }
-    if (ret == QCOW2_CLUSTER_ZERO_PLAIN || ret == QCOW2_CLUSTER_ZERO_ALLOC) {
+    if (ret == QCOW2_CLUSTER_ZERO) {
        status |= BDRV_BLOCK_ZERO;
    } else if (ret != QCOW2_CLUSTER_UNALLOCATED) {
        status |= BDRV_BLOCK_DATA;
@@ -1482,8 +1470,7 @@ static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset,
            }
            break;

-        case QCOW2_CLUSTER_ZERO_PLAIN:
-        case QCOW2_CLUSTER_ZERO_ALLOC:
+        case QCOW2_CLUSTER_ZERO:
            qemu_iovec_memset(&hd_qiov, 0, 0, cur_bytes);
            break;

@@ -1798,7 +1785,7 @@ static void qcow2_invalidate_cache(BlockDriverState *bs, Error **errp)
    options = qdict_clone_shallow(bs->options);

    flags &= ~BDRV_O_INACTIVE;
-    ret = qcow2_do_open(bs, options, flags, &local_err);
+    ret = qcow2_open(bs, options, flags, &local_err);
    QDECREF(options);
    if (local_err) {
        error_propagate(errp, local_err);
@@ -2140,7 +2127,7 @@ static int qcow2_create2(const char *filename, int64_t total_size,
         * too, as long as the bulk is allocated here). Therefore, using
         * floating point arithmetic is fine. */
        int64_t meta_size = 0;
-        uint64_t nreftablee, nrefblocke, nl1e, nl2e, refblock_count;
+        uint64_t nreftablee, nrefblocke, nl1e, nl2e;
        int64_t aligned_total_size = align_offset(total_size, cluster_size);
        int refblock_bits, refblock_size;
        /* refcount entry size in bytes */
@@ -2183,12 +2170,11 @@ static int qcow2_create2(const char *filename, int64_t total_size,
        nrefblocke = (aligned_total_size + meta_size + cluster_size)
                   / (cluster_size - rces - rces * sizeof(uint64_t)
                                                 / cluster_size);
-        refblock_count = DIV_ROUND_UP(nrefblocke, refblock_size);
-        meta_size += refblock_count * cluster_size;
+        meta_size += DIV_ROUND_UP(nrefblocke, refblock_size) * cluster_size;

        /* total size of refcount tables */
-        nreftablee = align_offset(refblock_count,
-                                  cluster_size / sizeof(uint64_t));
+        nreftablee = nrefblocke / refblock_size;
+        nreftablee = align_offset(nreftablee, cluster_size / sizeof(uint64_t));
        meta_size += nreftablee * sizeof(uint64_t);

        qemu_opt_set_number(opts, BLOCK_OPT_SIZE,
@@ -2204,8 +2190,7 @@ static int qcow2_create2(const char *filename, int64_t total_size,
    }

    blk = blk_new_open(filename, NULL, NULL,
-                       BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
-                       &local_err);
+                       BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
    if (blk == NULL) {
        error_propagate(errp, local_err);
        return -EIO;
@@ -2267,10 +2252,9 @@ static int qcow2_create2(const char *filename, int64_t total_size,
     * table)
     */
    options = qdict_new();
-    qdict_put_str(options, "driver", "qcow2");
+    qdict_put(options, "driver", qstring_from_str("qcow2"));
    blk = blk_new_open(filename, NULL, options,
-                       BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_NO_FLUSH,
-                       &local_err);
+                       BDRV_O_RDWR | BDRV_O_NO_FLUSH, &local_err);
    if (blk == NULL) {
        error_propagate(errp, local_err);
        ret = -EIO;
@@ -2296,9 +2280,9 @@ static int qcow2_create2(const char *filename, int64_t total_size,
    }

    /* Okay, now that we have a valid image, let's give it the right size */
-    ret = blk_truncate(blk, total_size, errp);
+    ret = blk_truncate(blk, total_size);
    if (ret < 0) {
-        error_prepend(errp, "Could not resize image: ");
+        error_setg_errno(errp, -ret, "Could not resize image");
        goto out;
    }

@@ -2329,7 +2313,7 @@ static int qcow2_create2(const char *filename, int64_t total_size,

    /* Reopen the image without BDRV_O_NO_FLUSH to flush it before returning */
    options = qdict_new();
-    qdict_put_str(options, "driver", "qcow2");
+    qdict_put(options, "driver", qstring_from_str("qcow2"));
    blk = blk_new_open(filename, NULL, options,
                       BDRV_O_RDWR | BDRV_O_NO_BACKING, &local_err);
    if (blk == NULL) {
@@ -2451,10 +2435,6 @@ static bool is_zero_sectors(BlockDriverState *bs, int64_t start,
    BlockDriverState *file;
    int64_t res;

-    if (start + count > bs->total_sectors) {
-        count = bs->total_sectors - start;
-    }
-
    if (!count) {
        return true;
    }
@@ -2473,9 +2453,6 @@ static coroutine_fn int qcow2_co_pwrite_zeroes(BlockDriverState *bs,
    uint32_t tail = (offset + count) % s->cluster_size;

    trace_qcow2_pwrite_zeroes_start_req(qemu_coroutine_self(), offset, count);
-    if (offset + count == bs->total_sectors * BDRV_SECTOR_SIZE) {
-        tail = 0;
-    }

    if (head || tail) {
        int64_t cl_start = (offset - head) >> BDRV_SECTOR_BITS;
@@ -2499,9 +2476,7 @@ static coroutine_fn int qcow2_co_pwrite_zeroes(BlockDriverState *bs,
        count = s->cluster_size;
        nr = s->cluster_size;
        ret = qcow2_get_cluster_offset(bs, offset, &nr, &off);
-        if (ret != QCOW2_CLUSTER_UNALLOCATED &&
-            ret != QCOW2_CLUSTER_ZERO_PLAIN &&
-            ret != QCOW2_CLUSTER_ZERO_ALLOC) {
+        if (ret != QCOW2_CLUSTER_UNALLOCATED && ret != QCOW2_CLUSTER_ZERO) {
            qemu_co_mutex_unlock(&s->lock);
            return -ENOTSUP;
        }
@@ -2512,7 +2487,7 @@ static coroutine_fn int qcow2_co_pwrite_zeroes(BlockDriverState *bs,
    trace_qcow2_pwrite_zeroes(qemu_coroutine_self(), offset, count);

    /* Whatever is left can use real zero clusters */
-    ret = qcow2_cluster_zeroize(bs, offset, count, flags);
+    ret = qcow2_zero_clusters(bs, offset, count >> BDRV_SECTOR_BITS, flags);
    qemu_co_mutex_unlock(&s->lock);

    return ret;
@@ -2526,48 +2501,42 @@ static coroutine_fn int qcow2_co_pdiscard(BlockDriverState *bs,

    if (!QEMU_IS_ALIGNED(offset | count, s->cluster_size)) {
        assert(count < s->cluster_size);
-        /* Ignore partial clusters, except for the special case of the
-         * complete partial cluster at the end of an unaligned file */
-        if (!QEMU_IS_ALIGNED(offset, s->cluster_size) ||
-            offset + count != bs->total_sectors * BDRV_SECTOR_SIZE) {
-            return -ENOTSUP;
-        }
+        return -ENOTSUP;
    }

    qemu_co_mutex_lock(&s->lock);
-    ret = qcow2_cluster_discard(bs, offset, count, QCOW2_DISCARD_REQUEST,
-                                false);
+    ret = qcow2_discard_clusters(bs, offset, count >> BDRV_SECTOR_BITS,
+                                 QCOW2_DISCARD_REQUEST, false);
    qemu_co_mutex_unlock(&s->lock);
    return ret;
 }

-static int qcow2_truncate(BlockDriverState *bs, int64_t offset, Error **errp)
+static int qcow2_truncate(BlockDriverState *bs, int64_t offset)
 {
    BDRVQcow2State *s = bs->opaque;
    int64_t new_l1_size;
    int ret;

    if (offset & 511) {
-        error_setg(errp, "The new size must be a multiple of 512");
+        error_report("The new size must be a multiple of 512");
        return -EINVAL;
    }

    /* cannot proceed if image has snapshots */
    if (s->nb_snapshots) {
-        error_setg(errp, "Can't resize an image which has snapshots");
+        error_report("Can't resize an image which has snapshots");
        return -ENOTSUP;
    }

    /* shrinking is currently not supported */
    if (offset < bs->total_sectors * 512) {
-        error_setg(errp, "qcow2 doesn't support shrinking images yet");
+        error_report("qcow2 doesn't support shrinking images yet");
        return -ENOTSUP;
    }

    new_l1_size = size_to_l1(s, offset);
    ret = qcow2_grow_l1_table(bs, new_l1_size, true);
    if (ret < 0) {
-        error_setg_errno(errp, -ret, "Failed to grow the L1 table");
        return ret;
    }

@@ -2576,7 +2545,6 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset, Error **errp)
    ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, size),
                           &offset, sizeof(uint64_t));
    if (ret < 0) {
-        error_setg_errno(errp, -ret, "Failed to update the image size");
        return ret;
    }

@@ -2602,7 +2570,7 @@ qcow2_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
        /* align end of file to a sector boundary to ease reading with
           sector based I/Os */
        cluster_offset = bdrv_getlength(bs->file->bs);
-        return bdrv_truncate(bs->file, cluster_offset, NULL);
+        return bdrv_truncate(bs->file->bs, cluster_offset);
    }

    buf = qemu_blockalign(bs, s->cluster_size);
@@ -2692,7 +2660,6 @@ fail:
 static int make_completely_empty(BlockDriverState *bs)
 {
    BDRVQcow2State *s = bs->opaque;
-    Error *local_err = NULL;
    int ret, l1_clusters;
    int64_t offset;
    uint64_t *new_reftable = NULL;
@@ -2776,7 +2743,6 @@ static int make_completely_empty(BlockDriverState *bs)

    s->refcount_table_offset = s->cluster_size;
    s->refcount_table_size   = s->cluster_size / sizeof(uint64_t);
-    s->max_refcount_table_index = 0;

    g_free(s->refcount_table);
    s->refcount_table = new_reftable;
@@ -2817,10 +2783,8 @@ static int make_completely_empty(BlockDriverState *bs)
        goto fail;
    }

-    ret = bdrv_truncate(bs->file, (3 + l1_clusters) * s->cluster_size,
-                        &local_err);
+    ret = bdrv_truncate(bs->file->bs, (3 + l1_clusters) * s->cluster_size);
    if (ret < 0) {
-        error_report_err(local_err);
        goto fail;
    }

@@ -2843,8 +2807,9 @@ fail:
 static int qcow2_make_empty(BlockDriverState *bs)
 {
    BDRVQcow2State *s = bs->opaque;
-    uint64_t offset, end_offset;
-    int step = QEMU_ALIGN_DOWN(INT_MAX, s->cluster_size);
+    uint64_t start_sector;
+    int sector_step = (QEMU_ALIGN_DOWN(INT_MAX, s->cluster_size) /
+                       BDRV_SECTOR_SIZE);
    int l1_clusters, ret = 0;

    l1_clusters = DIV_ROUND_UP(s->l1_size, s->cluster_size / sizeof(uint64_t));
@@ -2861,15 +2826,18 @@ static int qcow2_make_empty(BlockDriverState *bs)

    /* This fallback code simply discards every active cluster; this is slow,
     * but works in all cases */
-    end_offset = bs->total_sectors * BDRV_SECTOR_SIZE;
-    for (offset = 0; offset < end_offset; offset += step) {
+    for (start_sector = 0; start_sector < bs->total_sectors;
+         start_sector += sector_step)
+    {
        /* As this function is generally used after committing an external
         * snapshot, QCOW2_DISCARD_SNAPSHOT seems appropriate. Also, the
         * default action for this kind of discard is to pass the discard,
         * which will ideally result in an actually smaller image file, as
         * is probably desired. */
-        ret = qcow2_cluster_discard(bs, offset, MIN(step, end_offset - offset),
-                                    QCOW2_DISCARD_SNAPSHOT, true);
+        ret = qcow2_discard_clusters(bs, start_sector * BDRV_SECTOR_SIZE,
+                                     MIN(sector_step,
+                                         bs->total_sectors - start_sector),
+                                     QCOW2_DISCARD_SNAPSHOT, true);
        if (ret < 0) {
            break;
        }
@@ -3132,7 +3100,6 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts,
    uint64_t cluster_size = s->cluster_size;
    bool encrypt;
    int refcount_bits = s->refcount_bits;
-    Error *local_err = NULL;
    int ret;
    QemuOptDesc *desc = opts->list->desc;
    Qcow2AmendHelperCBInfo helper_cb_info;
@@ -3222,6 +3189,7 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts,

    if (s->refcount_bits != refcount_bits) {
        int refcount_order = ctz32(refcount_bits);
+        Error *local_error = NULL;

        if (new_version < 3 && refcount_bits != 16) {
            error_report("Different refcount widths than 16 bits require "
@@ -3233,9 +3201,9 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts,
        helper_cb_info.current_operation = QCOW2_CHANGING_REFCOUNT_ORDER;
        ret = qcow2_change_refcount_order(bs, refcount_order,
                                          &qcow2_amend_helper_cb,
-                                          &helper_cb_info, &local_err);
+                                          &helper_cb_info, &local_error);
        if (ret < 0) {
-            error_report_err(local_err);
+            error_report_err(local_error);
            return ret;
        }
    }
@@ -3281,18 +3249,8 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts,
    }

    if (new_size) {
-        BlockBackend *blk = blk_new(BLK_PERM_RESIZE, BLK_PERM_ALL);
-        ret = blk_insert_bs(blk, bs, &local_err);
+        ret = bdrv_truncate(bs, new_size);
        if (ret < 0) {
-            error_report_err(local_err);
-            blk_unref(blk);
-            return ret;
-        }
-
-        ret = blk_truncate(blk, new_size, &local_err);
-        blk_unref(blk);
-        if (ret < 0) {
-            error_report_err(local_err);
            return ret;
        }
    }
@@ -3428,7 +3386,6 @@ BlockDriver bdrv_qcow2 = {
    .bdrv_reopen_commit   = qcow2_reopen_commit,
    .bdrv_reopen_abort    = qcow2_reopen_abort,
    .bdrv_join_options    = qcow2_join_options,
-    .bdrv_child_perm      = bdrv_format_default_perms,
    .bdrv_create        = qcow2_create,
    .bdrv_has_zero_init = bdrv_has_zero_init_1,
    .bdrv_co_get_block_status = qcow2_co_get_block_status,
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -251,7 +251,6 @@ typedef struct BDRVQcow2State {
    uint64_t *refcount_table;
    uint64_t refcount_table_offset;
    uint32_t refcount_table_size;
-    uint32_t max_refcount_table_index; /* Last used entry in refcount_table */
    uint64_t free_cluster_index;
    uint64_t free_byte_offset;

@@ -322,9 +321,6 @@ typedef struct QCowL2Meta
    /** Number of newly allocated clusters */
    int nb_clusters;

-    /** Do not free the old clusters */
-    bool keep_old_clusters;
-
    /**
     * Requests that overlap with this allocation and wait to be restarted
     * when the allocating request has completed.
@@ -349,13 +345,12 @@ typedef struct QCowL2Meta
    QLIST_ENTRY(QCowL2Meta) next_in_flight;
 } QCowL2Meta;

-typedef enum QCow2ClusterType {
+enum {
    QCOW2_CLUSTER_UNALLOCATED,
-    QCOW2_CLUSTER_ZERO_PLAIN,
-    QCOW2_CLUSTER_ZERO_ALLOC,
    QCOW2_CLUSTER_NORMAL,
    QCOW2_CLUSTER_COMPRESSED,
-} QCow2ClusterType;
+    QCOW2_CLUSTER_ZERO
+};

 typedef enum QCow2MetadataOverlap {
    QCOW2_OL_MAIN_HEADER_BITNR    = 0,
@@ -444,15 +439,12 @@ static inline uint64_t qcow2_max_refcount_clusters(BDRVQcow2State *s)
    return QCOW_MAX_REFTABLE_SIZE >> s->cluster_bits;
 }

-static inline QCow2ClusterType qcow2_get_cluster_type(uint64_t l2_entry)
+static inline int qcow2_get_cluster_type(uint64_t l2_entry)
 {
    if (l2_entry & QCOW_OFLAG_COMPRESSED) {
        return QCOW2_CLUSTER_COMPRESSED;
    } else if (l2_entry & QCOW_OFLAG_ZERO) {
-        if (l2_entry & L2E_OFFSET_MASK) {
-            return QCOW2_CLUSTER_ZERO_ALLOC;
-        }
-        return QCOW2_CLUSTER_ZERO_PLAIN;
+        return QCOW2_CLUSTER_ZERO;
    } else if (!(l2_entry & L2E_OFFSET_MASK)) {
        return QCOW2_CLUSTER_UNALLOCATED;
    } else {
@@ -551,11 +543,10 @@ uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
                                         int compressed_size);

 int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m);
-int qcow2_cluster_discard(BlockDriverState *bs, uint64_t offset,
-                          uint64_t bytes, enum qcow2_discard_type type,
-                          bool full_discard);
-int qcow2_cluster_zeroize(BlockDriverState *bs, uint64_t offset,
-                          uint64_t bytes, int flags);
+int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset,
+    int nb_sectors, enum qcow2_discard_type type, bool full_discard);
+int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors,
+                        int flags);

 int qcow2_expand_zero_clusters(BlockDriverState *bs,
                               BlockDriverAmendStatusCB *status_cb,
--- a/block/qed-cluster.c
+++ b/block/qed-cluster.c
@@ -83,7 +83,6 @@ static void qed_find_cluster_cb(void *opaque, int ret)
    unsigned int index;
    unsigned int n;

-    qed_acquire(s);
    if (ret) {
        goto out;
    }
@@ -110,7 +109,6 @@ static void qed_find_cluster_cb(void *opaque, int ret)

 out:
    find_cluster_cb->cb(find_cluster_cb->opaque, ret, offset, len);
-    qed_release(s);
    g_free(find_cluster_cb);
 }

--- a/block/qed-table.c
+++ b/block/qed-table.c
@@ -31,7 +31,6 @@ static void qed_read_table_cb(void *opaque, int ret)
 {
    QEDReadTableCB *read_table_cb = opaque;
    QEDTable *table = read_table_cb->table;
-    BDRVQEDState *s = read_table_cb->s;
    int noffsets = read_table_cb->qiov.size / sizeof(uint64_t);
    int i;

@@ -41,15 +40,13 @@ static void qed_read_table_cb(void *opaque, int ret)
    }

    /* Byteswap offsets */
-    qed_acquire(s);
    for (i = 0; i < noffsets; i++) {
        table->offsets[i] = le64_to_cpu(table->offsets[i]);
    }
-    qed_release(s);

 out:
    /* Completion */
-    trace_qed_read_table_cb(s, read_table_cb->table, ret);
+    trace_qed_read_table_cb(read_table_cb->s, read_table_cb->table, ret);
    gencb_complete(&read_table_cb->gencb, ret);
 }

@@ -87,9 +84,8 @@ typedef struct {
 static void qed_write_table_cb(void *opaque, int ret)
 {
    QEDWriteTableCB *write_table_cb = opaque;
-    BDRVQEDState *s = write_table_cb->s;

-    trace_qed_write_table_cb(s,
+    trace_qed_write_table_cb(write_table_cb->s,
                             write_table_cb->orig_table,
                             write_table_cb->flush,
                             ret);
@@ -101,10 +97,8 @@ static void qed_write_table_cb(void *opaque, int ret)
    if (write_table_cb->flush) {
        /* We still need to flush first */
        write_table_cb->flush = false;
-        qed_acquire(s);
        bdrv_aio_flush(write_table_cb->s->bs, qed_write_table_cb,
                       write_table_cb);
-        qed_release(s);
        return;
    }

@@ -219,7 +213,6 @@ static void qed_read_l2_table_cb(void *opaque, int ret)
    CachedL2Table *l2_table = request->l2_table;
    uint64_t l2_offset = read_l2_table_cb->l2_offset;

-    qed_acquire(s);
    if (ret) {
        /* can't trust loaded L2 table anymore */
        qed_unref_l2_cache_entry(l2_table);
@@ -235,7 +228,6 @@ static void qed_read_l2_table_cb(void *opaque, int ret)
        request->l2_table = qed_find_l2_cache_entry(&s->l2_cache, l2_offset);
        assert(request->l2_table != NULL);
    }
-    qed_release(s);

    gencb_complete(&read_l2_table_cb->gencb, ret);
 }
--- a/block/qed.c
+++ b/block/qed.c
@@ -19,6 +19,7 @@
 #include "trace.h"
 #include "qed.h"
 #include "qapi/qmp/qerror.h"
+#include "migration/migration.h"
 #include "sysemu/block-backend.h"

 static const AIOCBInfo qed_aiocb_info = {
@@ -272,19 +273,7 @@ static CachedL2Table *qed_new_l2_table(BDRVQEDState *s)
    return l2_table;
 }

-static void qed_aio_next_io(QEDAIOCB *acb, int ret);
-
-static void qed_aio_start_io(QEDAIOCB *acb)
-{
-    qed_aio_next_io(acb, 0);
-}
-
-static void qed_aio_next_io_cb(void *opaque, int ret)
-{
-    QEDAIOCB *acb = opaque;
-
-    qed_aio_next_io(acb, ret);
-}
+static void qed_aio_next_io(void *opaque, int ret);

 static void qed_plug_allocating_write_reqs(BDRVQEDState *s)
 {
@@ -303,7 +292,7 @@ static void qed_unplug_allocating_write_reqs(BDRVQEDState *s)

    acb = QSIMPLEQ_FIRST(&s->allocating_write_reqs);
    if (acb) {
-        qed_aio_start_io(acb);
+        qed_aio_next_io(acb, 0);
    }
 }

@@ -344,22 +333,10 @@ static void qed_need_check_timer_cb(void *opaque)

    trace_qed_need_check_timer_cb(s);

-    qed_acquire(s);
    qed_plug_allocating_write_reqs(s);

    /* Ensure writes are on disk before clearing flag */
    bdrv_aio_flush(s->bs->file->bs, qed_clear_need_check, s);
-    qed_release(s);
-}
-
-void qed_acquire(BDRVQEDState *s)
-{
-    aio_context_acquire(bdrv_get_aio_context(s->bs));
-}
-
-void qed_release(BDRVQEDState *s)
-{
-    aio_context_release(bdrv_get_aio_context(s->bs));
 }

 static void qed_start_need_check_timer(BDRVQEDState *s)
@@ -414,8 +391,8 @@ static void bdrv_qed_drain(BlockDriverState *bs)
    }
 }

-static int bdrv_qed_do_open(BlockDriverState *bs, QDict *options, int flags,
-                            Error **errp)
+static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
+                         Error **errp)
 {
    BDRVQEDState *s = bs->opaque;
    QEDHeader le_header;
@@ -549,18 +526,6 @@ out:
    return ret;
 }

-static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
-                         Error **errp)
-{
-    bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
-                               false, errp);
-    if (!bs->file) {
-        return -EINVAL;
-    }
-
-    return bdrv_qed_do_open(bs, options, flags, errp);
-}
-
 static void bdrv_qed_refresh_limits(BlockDriverState *bs, Error **errp)
 {
    BDRVQEDState *s = bs->opaque;
@@ -624,8 +589,7 @@ static int qed_create(const char *filename, uint32_t cluster_size,
    }

    blk = blk_new_open(filename, NULL, NULL,
-                       BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
-                       &local_err);
+                       BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
    if (blk == NULL) {
        error_propagate(errp, local_err);
        return -EIO;
@@ -634,7 +598,7 @@ static int qed_create(const char *filename, uint32_t cluster_size,
    blk_set_allow_write_beyond_eof(blk, true);

    /* File must start empty and grow, check truncate is supported */
-    ret = blk_truncate(blk, 0, errp);
+    ret = blk_truncate(blk, 0);
    if (ret < 0) {
        goto out;
    }
@@ -757,7 +721,7 @@ static void qed_is_allocated_cb(void *opaque, int ret, uint64_t offset, size_t l
    }

    if (cb->co) {
-        aio_co_wake(cb->co);
+        qemu_coroutine_enter(cb->co);
    }
 }

@@ -954,7 +918,6 @@ static void qed_update_l2_table(BDRVQEDState *s, QEDTable *table, int index,
 static void qed_aio_complete_bh(void *opaque)
 {
    QEDAIOCB *acb = opaque;
-    BDRVQEDState *s = acb_to_s(acb);
    BlockCompletionFunc *cb = acb->common.cb;
    void *user_opaque = acb->common.opaque;
    int ret = acb->bh_ret;
@@ -962,9 +925,7 @@ static void qed_aio_complete_bh(void *opaque)
    qemu_aio_unref(acb);

    /* Invoke callback */
-    qed_acquire(s);
    cb(user_opaque, ret);
-    qed_release(s);
 }

 static void qed_aio_complete(QEDAIOCB *acb, int ret)
@@ -998,7 +959,7 @@ static void qed_aio_complete(QEDAIOCB *acb, int ret)
        QSIMPLEQ_REMOVE_HEAD(&s->allocating_write_reqs, next);
        acb = QSIMPLEQ_FIRST(&s->allocating_write_reqs);
        if (acb) {
-            qed_aio_start_io(acb);
+            qed_aio_next_io(acb, 0);
        } else if (s->header.features & QED_F_NEED_CHECK) {
            qed_start_need_check_timer(s);
        }
@@ -1023,7 +984,7 @@ static void qed_commit_l2_update(void *opaque, int ret)
    acb->request.l2_table = qed_find_l2_cache_entry(&s->l2_cache, l2_offset);
    assert(acb->request.l2_table != NULL);

-    qed_aio_next_io(acb, ret);
+    qed_aio_next_io(opaque, ret);
 }

 /**
@@ -1071,11 +1032,11 @@ static void qed_aio_write_l2_update(QEDAIOCB *acb, int ret, uint64_t offset)
    if (need_alloc) {
        /* Write out the whole new L2 table */
        qed_write_l2_table(s, &acb->request, 0, s->table_nelems, true,
-                           qed_aio_write_l1_update, acb);
+                            qed_aio_write_l1_update, acb);
    } else {
        /* Write out only the updated part of the L2 table */
        qed_write_l2_table(s, &acb->request, index, acb->cur_nclusters, false,
-                           qed_aio_next_io_cb, acb);
+                            qed_aio_next_io, acb);
    }
    return;

@@ -1127,7 +1088,7 @@ static void qed_aio_write_main(void *opaque, int ret)
    }

    if (acb->find_cluster_ret == QED_CLUSTER_FOUND) {
-        next_fn = qed_aio_next_io_cb;
+        next_fn = qed_aio_next_io;
    } else {
        if (s->bs->backing) {
            next_fn = qed_aio_write_flush_before_l2_update;
@@ -1240,7 +1201,7 @@ static void qed_aio_write_alloc(QEDAIOCB *acb, size_t len)
    if (acb->flags & QED_AIOCB_ZERO) {
        /* Skip ahead if the clusters are already zero */
        if (acb->find_cluster_ret == QED_CLUSTER_ZERO) {
-            qed_aio_start_io(acb);
+            qed_aio_next_io(acb, 0);
            return;
        }

@@ -1360,18 +1321,18 @@ static void qed_aio_read_data(void *opaque, int ret,
    /* Handle zero cluster and backing file reads */
    if (ret == QED_CLUSTER_ZERO) {
        qemu_iovec_memset(&acb->cur_qiov, 0, 0, acb->cur_qiov.size);
-        qed_aio_start_io(acb);
+        qed_aio_next_io(acb, 0);
        return;
    } else if (ret != QED_CLUSTER_FOUND) {
        qed_read_backing_file(s, acb->cur_pos, &acb->cur_qiov,
-                              &acb->backing_qiov, qed_aio_next_io_cb, acb);
+                              &acb->backing_qiov, qed_aio_next_io, acb);
        return;
    }

    BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
    bdrv_aio_readv(bs->file, offset / BDRV_SECTOR_SIZE,
                   &acb->cur_qiov, acb->cur_qiov.size / BDRV_SECTOR_SIZE,
-                   qed_aio_next_io_cb, acb);
+                   qed_aio_next_io, acb);
    return;

 err:
@@ -1381,8 +1342,9 @@ err:
 /**
 * Begin next I/O or complete the request
 */
-static void qed_aio_next_io(QEDAIOCB *acb, int ret)
+static void qed_aio_next_io(void *opaque, int ret)
 {
+    QEDAIOCB *acb = opaque;
    BDRVQEDState *s = acb_to_s(acb);
    QEDFindClusterFunc *io_fn = (acb->flags & QED_AIOCB_WRITE) ?
                                qed_aio_write_data : qed_aio_read_data;
@@ -1438,7 +1400,7 @@ static BlockAIOCB *qed_aio_setup(BlockDriverState *bs,
    qemu_iovec_init(&acb->cur_qiov, qiov->niov);

    /* Start request */
-    qed_aio_start_io(acb);
+    qed_aio_next_io(acb, 0);
    return &acb->common;
 }

@@ -1474,7 +1436,7 @@ static void coroutine_fn qed_co_pwrite_zeroes_cb(void *opaque, int ret)
    cb->done = true;
    cb->ret = ret;
    if (cb->co) {
-        aio_co_wake(cb->co);
+        qemu_coroutine_enter(cb->co);
    }
 }

@@ -1517,7 +1479,7 @@ static int coroutine_fn bdrv_qed_co_pwrite_zeroes(BlockDriverState *bs,
    return cb.ret;
 }

-static int bdrv_qed_truncate(BlockDriverState *bs, int64_t offset, Error **errp)
+static int bdrv_qed_truncate(BlockDriverState *bs, int64_t offset)
 {
    BDRVQEDState *s = bs->opaque;
    uint64_t old_image_size;
@@ -1525,12 +1487,11 @@ static int bdrv_qed_truncate(BlockDriverState *bs, int64_t offset, Error **errp)

    if (!qed_is_image_size_valid(offset, s->header.cluster_size,
                                 s->header.table_size)) {
-        error_setg(errp, "Invalid image size specified");
        return -EINVAL;
    }

+    /* Shrinking is currently not supported */
    if ((uint64_t)offset < s->header.image_size) {
-        error_setg(errp, "Shrinking images is currently not supported");
        return -ENOTSUP;
    }

@@ -1539,7 +1500,6 @@ static int bdrv_qed_truncate(BlockDriverState *bs, int64_t offset, Error **errp)
    ret = qed_write_header_sync(s);
    if (ret < 0) {
        s->header.image_size = old_image_size;
-        error_setg_errno(errp, -ret, "Failed to update the image size");
    }
    return ret;
 }
@@ -1643,7 +1603,7 @@ static void bdrv_qed_invalidate_cache(BlockDriverState *bs, Error **errp)
    bdrv_qed_close(bs);

    memset(s, 0, sizeof(BDRVQEDState));
-    ret = bdrv_qed_do_open(bs, NULL, bs->open_flags, &local_err);
+    ret = bdrv_qed_open(bs, NULL, bs->open_flags, &local_err);
    if (local_err) {
        error_propagate(errp, local_err);
        error_prepend(errp, "Could not reopen qed layer: ");
@@ -1706,7 +1666,6 @@ static BlockDriver bdrv_qed = {
    .bdrv_open                = bdrv_qed_open,
    .bdrv_close               = bdrv_qed_close,
    .bdrv_reopen_prepare      = bdrv_qed_reopen_prepare,
-    .bdrv_child_perm          = bdrv_format_default_perms,
    .bdrv_create              = bdrv_qed_create,
    .bdrv_has_zero_init       = bdrv_has_zero_init_1,
    .bdrv_co_get_block_status = bdrv_qed_co_get_block_status,
--- a/block/qed.h
+++ b/block/qed.h
@@ -198,9 +198,6 @@ enum {
 */
 typedef void QEDFindClusterFunc(void *opaque, int ret, uint64_t offset, size_t len);

-void qed_acquire(BDRVQEDState *s);
-void qed_release(BDRVQEDState *s);
-
 /**
 * Generic callback for chaining async callbacks
 */
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -97,7 +97,7 @@ typedef struct QuorumAIOCB QuorumAIOCB;
 * $children_count QuorumChildRequest.
 */
 typedef struct QuorumChildRequest {
-    BlockDriverState *bs;
+    BlockAIOCB *aiocb;
    QEMUIOVector qiov;
    uint8_t *buf;
    int ret;
@@ -110,12 +110,11 @@ typedef struct QuorumChildRequest {
 * used to do operations on each children and track overall progress.
 */
 struct QuorumAIOCB {
-    BlockDriverState *bs;
-    Coroutine *co;
+    BlockAIOCB common;

    /* Request metadata */
-    uint64_t offset;
-    uint64_t bytes;
+    uint64_t sector_num;
+    int nb_sectors;

    QEMUIOVector *qiov;         /* calling IOV */

@@ -134,15 +133,32 @@ struct QuorumAIOCB {
    int children_read;          /* how many children have been read from */
 };

-typedef struct QuorumCo {
-    QuorumAIOCB *acb;
-    int idx;
-} QuorumCo;
+static bool quorum_vote(QuorumAIOCB *acb);
+
+static void quorum_aio_cancel(BlockAIOCB *blockacb)
+{
+    QuorumAIOCB *acb = container_of(blockacb, QuorumAIOCB, common);
+    BDRVQuorumState *s = acb->common.bs->opaque;
+    int i;
+
+    /* cancel all callbacks */
+    for (i = 0; i < s->num_children; i++) {
+        if (acb->qcrs[i].aiocb) {
+            bdrv_aio_cancel_async(acb->qcrs[i].aiocb);
+        }
+    }
+}
+
+static AIOCBInfo quorum_aiocb_info = {
+    .aiocb_size         = sizeof(QuorumAIOCB),
+    .cancel_async       = quorum_aio_cancel,
+};

 static void quorum_aio_finalize(QuorumAIOCB *acb)
 {
+    acb->common.cb(acb->common.opaque, acb->vote_ret);
    g_free(acb->qcrs);
-    g_free(acb);
+    qemu_aio_unref(acb);
 }

 static bool quorum_sha256_compare(QuorumVoteValue *a, QuorumVoteValue *b)
@@ -155,26 +171,30 @@ static bool quorum_64bits_compare(QuorumVoteValue *a, QuorumVoteValue *b)
    return a->l == b->l;
 }

-static QuorumAIOCB *quorum_aio_get(BlockDriverState *bs,
+static QuorumAIOCB *quorum_aio_get(BDRVQuorumState *s,
+                                   BlockDriverState *bs,
                                   QEMUIOVector *qiov,
-                                   uint64_t offset,
-                                   uint64_t bytes)
+                                   uint64_t sector_num,
+                                   int nb_sectors,
+                                   BlockCompletionFunc *cb,
+                                   void *opaque)
 {
-    BDRVQuorumState *s = bs->opaque;
-    QuorumAIOCB *acb = g_new(QuorumAIOCB, 1);
+    QuorumAIOCB *acb = qemu_aio_get(&quorum_aiocb_info, bs, cb, opaque);
    int i;

-    *acb = (QuorumAIOCB) {
-        .co                 = qemu_coroutine_self(),
-        .bs                 = bs,
-        .offset             = offset,
-        .bytes              = bytes,
-        .qiov               = qiov,
-        .votes.compare      = quorum_sha256_compare,
-        .votes.vote_list    = QLIST_HEAD_INITIALIZER(acb.votes.vote_list),
-    };
-
+    acb->common.bs->opaque = s;
+    acb->sector_num = sector_num;
+    acb->nb_sectors = nb_sectors;
+    acb->qiov = qiov;
    acb->qcrs = g_new0(QuorumChildRequest, s->num_children);
+    acb->count = 0;
+    acb->success_count = 0;
+    acb->rewrite_count = 0;
+    acb->votes.compare = quorum_sha256_compare;
+    QLIST_INIT(&acb->votes.vote_list);
+    acb->is_read = false;
+    acb->vote_ret = 0;
+
    for (i = 0; i < s->num_children; i++) {
        acb->qcrs[i].buf = NULL;
        acb->qcrs[i].ret = 0;
@@ -184,37 +204,30 @@ static QuorumAIOCB *quorum_aio_get(BlockDriverState *bs,
    return acb;
 }

-static void quorum_report_bad(QuorumOpType type, uint64_t offset,
-                              uint64_t bytes, char *node_name, int ret)
+static void quorum_report_bad(QuorumOpType type, uint64_t sector_num,
+                              int nb_sectors, char *node_name, int ret)
 {
    const char *msg = NULL;
-    int64_t start_sector = offset / BDRV_SECTOR_SIZE;
-    int64_t end_sector = DIV_ROUND_UP(offset + bytes, BDRV_SECTOR_SIZE);
-
    if (ret < 0) {
        msg = strerror(-ret);
    }

-    qapi_event_send_quorum_report_bad(type, !!msg, msg, node_name, start_sector,
-                                      end_sector - start_sector, &error_abort);
+    qapi_event_send_quorum_report_bad(type, !!msg, msg, node_name,
+                                      sector_num, nb_sectors, &error_abort);
 }

 static void quorum_report_failure(QuorumAIOCB *acb)
 {
-    const char *reference = bdrv_get_device_or_node_name(acb->bs);
-    int64_t start_sector = acb->offset / BDRV_SECTOR_SIZE;
-    int64_t end_sector = DIV_ROUND_UP(acb->offset + acb->bytes,
-                                      BDRV_SECTOR_SIZE);
-
-    qapi_event_send_quorum_failure(reference, start_sector,
-                                   end_sector - start_sector, &error_abort);
+    const char *reference = bdrv_get_device_or_node_name(acb->common.bs);
+    qapi_event_send_quorum_failure(reference, acb->sector_num,
+                                   acb->nb_sectors, &error_abort);
 }

 static int quorum_vote_error(QuorumAIOCB *acb);

 static bool quorum_has_too_much_io_failed(QuorumAIOCB *acb)
 {
-    BDRVQuorumState *s = acb->bs->opaque;
+    BDRVQuorumState *s = acb->common.bs->opaque;

    if (acb->success_count < s->threshold) {
        acb->vote_ret = quorum_vote_error(acb);
@@ -225,7 +238,22 @@ static bool quorum_has_too_much_io_failed(QuorumAIOCB *acb)
    return false;
 }

-static int read_fifo_child(QuorumAIOCB *acb);
+static void quorum_rewrite_aio_cb(void *opaque, int ret)
+{
+    QuorumAIOCB *acb = opaque;
+
+    /* one less rewrite to do */
+    acb->rewrite_count--;
+
+    /* wait until all rewrite callbacks have completed */
+    if (acb->rewrite_count) {
+        return;
+    }
+
+    quorum_aio_finalize(acb);
+}
+
+static BlockAIOCB *read_fifo_child(QuorumAIOCB *acb);

 static void quorum_copy_qiov(QEMUIOVector *dest, QEMUIOVector *source)
 {
@@ -244,7 +272,70 @@ static void quorum_report_bad_acb(QuorumChildRequest *sacb, int ret)
 {
    QuorumAIOCB *acb = sacb->parent;
    QuorumOpType type = acb->is_read ? QUORUM_OP_TYPE_READ : QUORUM_OP_TYPE_WRITE;
-    quorum_report_bad(type, acb->offset, acb->bytes, sacb->bs->node_name, ret);
+    quorum_report_bad(type, acb->sector_num, acb->nb_sectors,
+                      sacb->aiocb->bs->node_name, ret);
+}
+
+static void quorum_fifo_aio_cb(void *opaque, int ret)
+{
+    QuorumChildRequest *sacb = opaque;
+    QuorumAIOCB *acb = sacb->parent;
+    BDRVQuorumState *s = acb->common.bs->opaque;
+
+    assert(acb->is_read && s->read_pattern == QUORUM_READ_PATTERN_FIFO);
+
+    if (ret < 0) {
+        quorum_report_bad_acb(sacb, ret);
+
+        /* We try to read next child in FIFO order if we fail to read */
+        if (acb->children_read < s->num_children) {
+            read_fifo_child(acb);
+            return;
+        }
+    }
+
+    acb->vote_ret = ret;
+
+    /* FIXME: rewrite failed children if acb->children_read > 1? */
+    quorum_aio_finalize(acb);
+}
+
+static void quorum_aio_cb(void *opaque, int ret)
+{
+    QuorumChildRequest *sacb = opaque;
+    QuorumAIOCB *acb = sacb->parent;
+    BDRVQuorumState *s = acb->common.bs->opaque;
+    bool rewrite = false;
+    int i;
+
+    sacb->ret = ret;
+    if (ret == 0) {
+        acb->success_count++;
+    } else {
+        quorum_report_bad_acb(sacb, ret);
+    }
+    acb->count++;
+    assert(acb->count <= s->num_children);
+    assert(acb->success_count <= s->num_children);
+    if (acb->count < s->num_children) {
+        return;
+    }
+
+    /* Do the vote on read */
+    if (acb->is_read) {
+        rewrite = quorum_vote(acb);
+        for (i = 0; i < s->num_children; i++) {
+            qemu_vfree(acb->qcrs[i].buf);
+            qemu_iovec_destroy(&acb->qcrs[i].qiov);
+        }
+    } else {
+        quorum_has_too_much_io_failed(acb);
+    }
+
+    /* if no rewrite is done the code will finish right away */
+    if (!rewrite) {
+        quorum_aio_finalize(acb);
+    }
 }

 static void quorum_report_bad_versions(BDRVQuorumState *s,
@@ -259,31 +350,14 @@ static void quorum_report_bad_versions(BDRVQuorumState *s,
            continue;
        }
        QLIST_FOREACH(item, &version->items, next) {
-            quorum_report_bad(QUORUM_OP_TYPE_READ, acb->offset, acb->bytes,
+            quorum_report_bad(QUORUM_OP_TYPE_READ, acb->sector_num,
+                              acb->nb_sectors,
                              s->children[item->index]->bs->node_name, 0);
        }
    }
 }

-static void quorum_rewrite_entry(void *opaque)
-{
-    QuorumCo *co = opaque;
-    QuorumAIOCB *acb = co->acb;
-    BDRVQuorumState *s = acb->bs->opaque;
-
-    /* Ignore any errors, it's just a correction attempt for already
-     * corrupted data. */
-    bdrv_co_pwritev(s->children[co->idx], acb->offset, acb->bytes,
-                    acb->qiov, 0);
-
-    /* Wake up the caller after the last rewrite */
-    acb->rewrite_count--;
-    if (!acb->rewrite_count) {
-        qemu_coroutine_enter_if_inactive(acb->co);
-    }
-}
-
-static bool quorum_rewrite_bad_versions(QuorumAIOCB *acb,
+static bool quorum_rewrite_bad_versions(BDRVQuorumState *s, QuorumAIOCB *acb,
                                        QuorumVoteValue *value)
 {
    QuorumVoteVersion *version;
@@ -302,7 +376,7 @@ static bool quorum_rewrite_bad_versions(QuorumAIOCB *acb,
        }
    }

-    /* quorum_rewrite_entry will count down this to zero */
+    /* quorum_rewrite_aio_cb will count down this to zero */
    acb->rewrite_count = count;

    /* now fire the correcting rewrites */
@@ -311,14 +385,9 @@ static bool quorum_rewrite_bad_versions(QuorumAIOCB *acb,
            continue;
        }
        QLIST_FOREACH(item, &version->items, next) {
-            Coroutine *co;
-            QuorumCo data = {
-                .acb = acb,
-                .idx = item->index,
-            };
-
-            co = qemu_coroutine_create(quorum_rewrite_entry, &data);
-            qemu_coroutine_enter(co);
+            bdrv_aio_writev(s->children[item->index], acb->sector_num,
+                            acb->qiov, acb->nb_sectors, quorum_rewrite_aio_cb,
+                            acb);
        }
    }

@@ -438,8 +507,8 @@ static void GCC_FMT_ATTR(2, 3) quorum_err(QuorumAIOCB *acb,
    va_list ap;

    va_start(ap, fmt);
-    fprintf(stderr, "quorum: offset=%" PRIu64 " bytes=%" PRIu64 " ",
-            acb->offset, acb->bytes);
+    fprintf(stderr, "quorum: sector_num=%" PRId64 " nb_sectors=%d ",
+            acb->sector_num, acb->nb_sectors);
    vfprintf(stderr, fmt, ap);
    fprintf(stderr, "\n");
    va_end(ap);
@@ -450,15 +519,16 @@ static bool quorum_compare(QuorumAIOCB *acb,
                           QEMUIOVector *a,
                           QEMUIOVector *b)
 {
-    BDRVQuorumState *s = acb->bs->opaque;
+    BDRVQuorumState *s = acb->common.bs->opaque;
    ssize_t offset;

    /* This driver will replace blkverify in this particular case */
    if (s->is_blkverify) {
        offset = qemu_iovec_compare(a, b);
        if (offset != -1) {
-            quorum_err(acb, "contents mismatch at offset %" PRIu64,
-                       acb->offset + offset);
+            quorum_err(acb, "contents mismatch in sector %" PRId64,
+                       acb->sector_num +
+                       (uint64_t)(offset / BDRV_SECTOR_SIZE));
        }
        return true;
    }
@@ -469,7 +539,7 @@ static bool quorum_compare(QuorumAIOCB *acb,
 /* Do a vote to get the error code */
 static int quorum_vote_error(QuorumAIOCB *acb)
 {
-    BDRVQuorumState *s = acb->bs->opaque;
+    BDRVQuorumState *s = acb->common.bs->opaque;
    QuorumVoteVersion *winner = NULL;
    QuorumVotes error_votes;
    QuorumVoteValue result_value;
@@ -498,16 +568,17 @@ static int quorum_vote_error(QuorumAIOCB *acb)
    return ret;
 }

-static void quorum_vote(QuorumAIOCB *acb)
+static bool quorum_vote(QuorumAIOCB *acb)
 {
    bool quorum = true;
+    bool rewrite = false;
    int i, j, ret;
    QuorumVoteValue hash;
-    BDRVQuorumState *s = acb->bs->opaque;
+    BDRVQuorumState *s = acb->common.bs->opaque;
    QuorumVoteVersion *winner;

    if (quorum_has_too_much_io_failed(acb)) {
-        return;
+        return false;
    }

    /* get the index of the first successful read */
@@ -535,7 +606,7 @@ static void quorum_vote(QuorumAIOCB *acb)
    /* Every successful read agrees */
    if (quorum) {
        quorum_copy_qiov(acb->qiov, &acb->qcrs[i].qiov);
-        return;
+        return false;
    }

    /* compute hashes for each successful read, also store indexes */
@@ -570,46 +641,19 @@ static void quorum_vote(QuorumAIOCB *acb)

    /* corruption correction is enabled */
    if (s->rewrite_corrupted) {
-        quorum_rewrite_bad_versions(acb, &winner->value);
+        rewrite = quorum_rewrite_bad_versions(s, acb, &winner->value);
    }

 free_exit:
    /* free lists */
    quorum_free_vote_list(&acb->votes);
+    return rewrite;
 }

-static void read_quorum_children_entry(void *opaque)
+static BlockAIOCB *read_quorum_children(QuorumAIOCB *acb)
 {
-    QuorumCo *co = opaque;
-    QuorumAIOCB *acb = co->acb;
-    BDRVQuorumState *s = acb->bs->opaque;
-    int i = co->idx;
-    QuorumChildRequest *sacb = &acb->qcrs[i];
-
-    sacb->bs = s->children[i]->bs;
-    sacb->ret = bdrv_co_preadv(s->children[i], acb->offset, acb->bytes,
-                               &acb->qcrs[i].qiov, 0);
-
-    if (sacb->ret == 0) {
-        acb->success_count++;
-    } else {
-        quorum_report_bad_acb(sacb, sacb->ret);
-    }
-
-    acb->count++;
-    assert(acb->count <= s->num_children);
-    assert(acb->success_count <= s->num_children);
-
-    /* Wake up the caller after the last read */
-    if (acb->count == s->num_children) {
-        qemu_coroutine_enter_if_inactive(acb->co);
-    }
-}
-
-static int read_quorum_children(QuorumAIOCB *acb)
-{
-    BDRVQuorumState *s = acb->bs->opaque;
-    int i, ret;
+    BDRVQuorumState *s = acb->common.bs->opaque;
+    int i;

    acb->children_read = s->num_children;
    for (i = 0; i < s->num_children; i++) {
@@ -619,131 +663,65 @@ static int read_quorum_children(QuorumAIOCB *acb)
    }

    for (i = 0; i < s->num_children; i++) {
-        Coroutine *co;
-        QuorumCo data = {
-            .acb = acb,
-            .idx = i,
-        };
-
-        co = qemu_coroutine_create(read_quorum_children_entry, &data);
-        qemu_coroutine_enter(co);
+        acb->qcrs[i].aiocb = bdrv_aio_readv(s->children[i], acb->sector_num,
+                                            &acb->qcrs[i].qiov, acb->nb_sectors,
+                                            quorum_aio_cb, &acb->qcrs[i]);
    }

-    while (acb->count < s->num_children) {
-        qemu_coroutine_yield();
-    }
-
-    /* Do the vote on read */
-    quorum_vote(acb);
-    for (i = 0; i < s->num_children; i++) {
-        qemu_vfree(acb->qcrs[i].buf);
-        qemu_iovec_destroy(&acb->qcrs[i].qiov);
-    }
-
-    while (acb->rewrite_count) {
-        qemu_coroutine_yield();
-    }
-
-    ret = acb->vote_ret;
-
-    return ret;
+    return &acb->common;
 }

-static int read_fifo_child(QuorumAIOCB *acb)
+static BlockAIOCB *read_fifo_child(QuorumAIOCB *acb)
 {
-    BDRVQuorumState *s = acb->bs->opaque;
-    int n, ret;
+    BDRVQuorumState *s = acb->common.bs->opaque;
+    int n = acb->children_read++;

-    /* We try to read the next child in FIFO order if we failed to read */
-    do {
-        n = acb->children_read++;
-        acb->qcrs[n].bs = s->children[n]->bs;
-        ret = bdrv_co_preadv(s->children[n], acb->offset, acb->bytes,
-                             acb->qiov, 0);
-        if (ret < 0) {
-            quorum_report_bad_acb(&acb->qcrs[n], ret);
-        }
-    } while (ret < 0 && acb->children_read < s->num_children);
+    acb->qcrs[n].aiocb = bdrv_aio_readv(s->children[n], acb->sector_num,
+                                        acb->qiov, acb->nb_sectors,
+                                        quorum_fifo_aio_cb, &acb->qcrs[n]);

-    /* FIXME: rewrite failed children if acb->children_read > 1? */
-
-    return ret;
+    return &acb->common;
 }

-static int quorum_co_preadv(BlockDriverState *bs, uint64_t offset,
-                            uint64_t bytes, QEMUIOVector *qiov, int flags)
+static BlockAIOCB *quorum_aio_readv(BlockDriverState *bs,
+                                    int64_t sector_num,
+                                    QEMUIOVector *qiov,
+                                    int nb_sectors,
+                                    BlockCompletionFunc *cb,
+                                    void *opaque)
 {
    BDRVQuorumState *s = bs->opaque;
-    QuorumAIOCB *acb = quorum_aio_get(bs, qiov, offset, bytes);
-    int ret;
-
+    QuorumAIOCB *acb = quorum_aio_get(s, bs, qiov, sector_num,
+                                      nb_sectors, cb, opaque);
    acb->is_read = true;
    acb->children_read = 0;

    if (s->read_pattern == QUORUM_READ_PATTERN_QUORUM) {
-        ret = read_quorum_children(acb);
-    } else {
-        ret = read_fifo_child(acb);
+        return read_quorum_children(acb);
    }
-    quorum_aio_finalize(acb);

-    return ret;
+    return read_fifo_child(acb);
 }

-static void write_quorum_entry(void *opaque)
-{
-    QuorumCo *co = opaque;
-    QuorumAIOCB *acb = co->acb;
-    BDRVQuorumState *s = acb->bs->opaque;
-    int i = co->idx;
-    QuorumChildRequest *sacb = &acb->qcrs[i];
-
-    sacb->bs = s->children[i]->bs;
-    sacb->ret = bdrv_co_pwritev(s->children[i], acb->offset, acb->bytes,
-                                acb->qiov, 0);
-    if (sacb->ret == 0) {
-        acb->success_count++;
-    } else {
-        quorum_report_bad_acb(sacb, sacb->ret);
-    }
-    acb->count++;
-    assert(acb->count <= s->num_children);
-    assert(acb->success_count <= s->num_children);
-
-    /* Wake up the caller after the last write */
-    if (acb->count == s->num_children) {
-        qemu_coroutine_enter_if_inactive(acb->co);
-    }
-}
-
-static int quorum_co_pwritev(BlockDriverState *bs, uint64_t offset,
-                             uint64_t bytes, QEMUIOVector *qiov, int flags)
+static BlockAIOCB *quorum_aio_writev(BlockDriverState *bs,
+                                     int64_t sector_num,
+                                     QEMUIOVector *qiov,
+                                     int nb_sectors,
+                                     BlockCompletionFunc *cb,
+                                     void *opaque)
 {
    BDRVQuorumState *s = bs->opaque;
-    QuorumAIOCB *acb = quorum_aio_get(bs, qiov, offset, bytes);
-    int i, ret;
+    QuorumAIOCB *acb = quorum_aio_get(s, bs, qiov, sector_num, nb_sectors,
+                                      cb, opaque);
+    int i;

    for (i = 0; i < s->num_children; i++) {
-        Coroutine *co;
-        QuorumCo data = {
-            .acb = acb,
-            .idx = i,
-        };
-
-        co = qemu_coroutine_create(write_quorum_entry, &data);
-        qemu_coroutine_enter(co);
+        acb->qcrs[i].aiocb = bdrv_aio_writev(s->children[i], sector_num,
+                                             qiov, nb_sectors, &quorum_aio_cb,
+                                             &acb->qcrs[i]);
    }

-    while (acb->count < s->num_children) {
-        qemu_coroutine_yield();
-    }
-
-    quorum_has_too_much_io_failed(acb);
-
-    ret = acb->vote_ret;
-    quorum_aio_finalize(acb);
-
-    return ret;
+    return &acb->common;
 }

 static int64_t quorum_getlength(BlockDriverState *bs)
@@ -787,7 +765,7 @@ static coroutine_fn int quorum_co_flush(BlockDriverState *bs)
        result = bdrv_co_flush(s->children[i]->bs);
        if (result) {
            quorum_report_bad(QUORUM_OP_TYPE_FLUSH, 0,
-                              bdrv_getlength(s->children[i]->bs),
+                              bdrv_nb_sectors(s->children[i]->bs),
                              s->children[i]->bs->node_name, result);
            result_value.l = result;
            quorum_count_vote(&error_votes, &result_value, i);
@@ -1032,17 +1010,10 @@ static void quorum_add_child(BlockDriverState *bs, BlockDriverState *child_bs,

    /* We can safely add the child now */
    bdrv_ref(child_bs);
-
-    child = bdrv_attach_child(bs, child_bs, indexstr, &child_format, errp);
-    if (child == NULL) {
-        s->next_child_index--;
-        bdrv_unref(child_bs);
-        goto out;
-    }
+    child = bdrv_attach_child(bs, child_bs, indexstr, &child_format);
    s->children = g_renew(BdrvChild *, s->children, s->num_children + 1);
    s->children[s->num_children++] = child;

-out:
    bdrv_drained_end(bs);
 }

@@ -1096,15 +1067,19 @@ static void quorum_refresh_filename(BlockDriverState *bs, QDict *options)
    children = qlist_new();
    for (i = 0; i < s->num_children; i++) {
        QINCREF(s->children[i]->bs->full_open_options);
-        qlist_append(children, s->children[i]->bs->full_open_options);
+        qlist_append_obj(children,
+                         QOBJECT(s->children[i]->bs->full_open_options));
    }

    opts = qdict_new();
-    qdict_put_str(opts, "driver", "quorum");
-    qdict_put_int(opts, QUORUM_OPT_VOTE_THRESHOLD, s->threshold);
-    qdict_put_bool(opts, QUORUM_OPT_BLKVERIFY, s->is_blkverify);
-    qdict_put_bool(opts, QUORUM_OPT_REWRITE, s->rewrite_corrupted);
-    qdict_put(opts, "children", children);
+    qdict_put_obj(opts, "driver", QOBJECT(qstring_from_str("quorum")));
+    qdict_put_obj(opts, QUORUM_OPT_VOTE_THRESHOLD,
+                  QOBJECT(qint_from_int(s->threshold)));
+    qdict_put_obj(opts, QUORUM_OPT_BLKVERIFY,
+                  QOBJECT(qbool_from_bool(s->is_blkverify)));
+    qdict_put_obj(opts, QUORUM_OPT_REWRITE,
+                  QOBJECT(qbool_from_bool(s->rewrite_corrupted)));
+    qdict_put_obj(opts, "children", QOBJECT(children));

    bs->full_open_options = opts;
 }
@@ -1123,14 +1098,12 @@ static BlockDriver bdrv_quorum = {

    .bdrv_getlength                     = quorum_getlength,

-    .bdrv_co_preadv                     = quorum_co_preadv,
-    .bdrv_co_pwritev                    = quorum_co_pwritev,
+    .bdrv_aio_readv                     = quorum_aio_readv,
+    .bdrv_aio_writev                    = quorum_aio_writev,

    .bdrv_add_child                     = quorum_add_child,
    .bdrv_del_child                     = quorum_del_child,

-    .bdrv_child_perm                    = bdrv_filter_default_perms,
-
    .is_filter                          = true,
    .bdrv_recurse_is_first_non_filter   = quorum_recurse_is_first_non_filter,
 };
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -25,6 +25,8 @@
 #include "qapi/error.h"
 #include "qemu/cutils.h"
 #include "qemu/error-report.h"
+#include "qemu/timer.h"
+#include "qemu/log.h"
 #include "block/block_int.h"
 #include "qemu/module.h"
 #include "trace.h"
@@ -129,23 +131,12 @@ do { \

 #define MAX_BLOCKSIZE	4096

-/* Posix file locking bytes. Libvirt takes byte 0, we start from higher bytes,
- * leaving a few more bytes for its future use. */
-#define RAW_LOCK_PERM_BASE             100
-#define RAW_LOCK_SHARED_BASE           200
-
 typedef struct BDRVRawState {
    int fd;
-    int lock_fd;
-    bool use_lock;
    int type;
    int open_flags;
    size_t buf_align;

-    /* The current permissions. */
-    uint64_t perm;
-    uint64_t shared_perm;
-
 #ifdef CONFIG_XFS
    bool is_xfs:1;
 #endif
@@ -153,7 +144,6 @@ typedef struct BDRVRawState {
    bool has_write_zeroes:1;
    bool discard_zeroes:1;
    bool use_linux_aio:1;
-    bool page_cache_inconsistent:1;
    bool has_fallocate;
    bool needs_alignment;
 } BDRVRawState;
@@ -229,28 +219,28 @@ static int probe_logical_blocksize(int fd, unsigned int *sector_size_p)
 {
    unsigned int sector_size;
    bool success = false;
-    int i;

    errno = ENOTSUP;
-    static const unsigned long ioctl_list[] = {
-#ifdef BLKSSZGET
-        BLKSSZGET,
-#endif
-#ifdef DKIOCGETBLOCKSIZE
-        DKIOCGETBLOCKSIZE,
-#endif
-#ifdef DIOCGSECTORSIZE
-        DIOCGSECTORSIZE,
-#endif
-    };

    /* Try a few ioctls to get the right size */
-    for (i = 0; i < (int)ARRAY_SIZE(ioctl_list); i++) {
-        if (ioctl(fd, ioctl_list[i], &sector_size) >= 0) {
-            *sector_size_p = sector_size;
-            success = true;
-        }
+#ifdef BLKSSZGET
+    if (ioctl(fd, BLKSSZGET, &sector_size) >= 0) {
+        *sector_size_p = sector_size;
+        success = true;
    }
+#endif
+#ifdef DKIOCGETBLOCKSIZE
+    if (ioctl(fd, DKIOCGETBLOCKSIZE, &sector_size) >= 0) {
+        *sector_size_p = sector_size;
+        success = true;
+    }
+#endif
+#ifdef DIOCGSECTORSIZE
+    if (ioctl(fd, DIOCGSECTORSIZE, &sector_size) >= 0) {
+        *sector_size_p = sector_size;
+        success = true;
+    }
+#endif

    return success ? 0 : -errno;
 }
@@ -381,7 +371,12 @@ static void raw_parse_flags(int bdrv_flags, int *open_flags)
 static void raw_parse_filename(const char *filename, QDict *options,
                               Error **errp)
 {
-    bdrv_parse_filename_strip_prefix(filename, "file:", options);
+    /* The filename does not have to be prefixed by the protocol name, since
+     * "file" is the default protocol; therefore, the return value of this
+     * function call can be ignored. */
+    strstart(filename, "file:", &filename);
+
+    qdict_put_obj(options, "filename", QOBJECT(qstring_from_str(filename)));
 }

 static QemuOptsList raw_runtime_opts = {
@@ -398,11 +393,6 @@ static QemuOptsList raw_runtime_opts = {
            .type = QEMU_OPT_STRING,
            .help = "host AIO implementation (threads, native)",
        },
-        {
-            .name = "locking",
-            .type = QEMU_OPT_STRING,
-            .help = "file locking mode (on/off/auto, default: auto)",
-        },
        { /* end of list */ }
    },
 };
@@ -417,7 +407,6 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
    BlockdevAioOptions aio, aio_default;
    int fd, ret;
    struct stat st;
-    OnOffAuto locking;

    opts = qemu_opts_create(&raw_runtime_opts, NULL, 0, &error_abort);
    qemu_opts_absorb_qdict(opts, options, &local_err);
@@ -447,37 +436,6 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
    }
    s->use_linux_aio = (aio == BLOCKDEV_AIO_OPTIONS_NATIVE);

-    locking = qapi_enum_parse(OnOffAuto_lookup, qemu_opt_get(opts, "locking"),
-                              ON_OFF_AUTO__MAX, ON_OFF_AUTO_AUTO, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        ret = -EINVAL;
-        goto fail;
-    }
-    switch (locking) {
-    case ON_OFF_AUTO_ON:
-        s->use_lock = true;
-#ifndef F_OFD_SETLK
-        fprintf(stderr,
-                "File lock requested but OFD locking syscall is unavailable, "
-                "falling back to POSIX file locks.\n"
-                "Due to the implementation, locks can be lost unexpectedly.\n");
-#endif
-        break;
-    case ON_OFF_AUTO_OFF:
-        s->use_lock = false;
-        break;
-    case ON_OFF_AUTO_AUTO:
-#ifdef F_OFD_SETLK
-        s->use_lock = true;
-#else
-        s->use_lock = false;
-#endif
-        break;
-    default:
-        abort();
-    }
-
    s->open_flags = open_flags;
    raw_parse_flags(bdrv_flags, &s->open_flags);

@@ -493,21 +451,6 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
    }
    s->fd = fd;

-    s->lock_fd = -1;
-    if (s->use_lock) {
-        fd = qemu_open(filename, s->open_flags);
-        if (fd < 0) {
-            ret = -errno;
-            error_setg_errno(errp, errno, "Could not open '%s' for locking",
-                             filename);
-            qemu_close(s->fd);
-            goto fail;
-        }
-        s->lock_fd = fd;
-    }
-    s->perm = 0;
-    s->shared_perm = BLK_PERM_ALL;
-
 #ifdef CONFIG_LINUX_AIO
     /* Currently Linux does AIO only for files opened with O_DIRECT */
    if (s->use_linux_aio && !(s->open_flags & O_DIRECT)) {
@@ -595,161 +538,6 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,
    return raw_open_common(bs, options, flags, 0, errp);
 }

-typedef enum {
-    RAW_PL_PREPARE,
-    RAW_PL_COMMIT,
-    RAW_PL_ABORT,
-} RawPermLockOp;
-
-#define PERM_FOREACH(i) \
-    for ((i) = 0; (1ULL << (i)) <= BLK_PERM_ALL; i++)
-
-/* Lock bytes indicated by @perm_lock_bits and @shared_perm_lock_bits in the
- * file; if @unlock == true, also unlock the unneeded bytes.
- * @shared_perm_lock_bits is the mask of all permissions that are NOT shared.
- */
-static int raw_apply_lock_bytes(BDRVRawState *s,
-                                uint64_t perm_lock_bits,
-                                uint64_t shared_perm_lock_bits,
-                                bool unlock, Error **errp)
-{
-    int ret;
-    int i;
-
-    PERM_FOREACH(i) {
-        int off = RAW_LOCK_PERM_BASE + i;
-        if (perm_lock_bits & (1ULL << i)) {
-            ret = qemu_lock_fd(s->lock_fd, off, 1, false);
-            if (ret) {
-                error_setg(errp, "Failed to lock byte %d", off);
-                return ret;
-            }
-        } else if (unlock) {
-            ret = qemu_unlock_fd(s->lock_fd, off, 1);
-            if (ret) {
-                error_setg(errp, "Failed to unlock byte %d", off);
-                return ret;
-            }
-        }
-    }
-    PERM_FOREACH(i) {
-        int off = RAW_LOCK_SHARED_BASE + i;
-        if (shared_perm_lock_bits & (1ULL << i)) {
-            ret = qemu_lock_fd(s->lock_fd, off, 1, false);
-            if (ret) {
-                error_setg(errp, "Failed to lock byte %d", off);
-                return ret;
-            }
-        } else if (unlock) {
-            ret = qemu_unlock_fd(s->lock_fd, off, 1);
-            if (ret) {
-                error_setg(errp, "Failed to unlock byte %d", off);
-                return ret;
-            }
-        }
-    }
-    return 0;
-}
-
-/* Check "unshared" bytes implied by @perm and ~@shared_perm in the file. */
-static int raw_check_lock_bytes(BDRVRawState *s,
-                                uint64_t perm, uint64_t shared_perm,
-                                Error **errp)
-{
-    int ret;
-    int i;
-
-    PERM_FOREACH(i) {
-        int off = RAW_LOCK_SHARED_BASE + i;
-        uint64_t p = 1ULL << i;
-        if (perm & p) {
-            ret = qemu_lock_fd_test(s->lock_fd, off, 1, true);
-            if (ret) {
-                char *perm_name = bdrv_perm_names(p);
-                error_setg(errp,
-                           "Failed to get \"%s\" lock",
-                           perm_name);
-                g_free(perm_name);
-                error_append_hint(errp,
-                                  "Is another process using the image?\n");
-                return ret;
-            }
-        }
-    }
-    PERM_FOREACH(i) {
-        int off = RAW_LOCK_PERM_BASE + i;
-        uint64_t p = 1ULL << i;
-        if (!(shared_perm & p)) {
-            ret = qemu_lock_fd_test(s->lock_fd, off, 1, true);
-            if (ret) {
-                char *perm_name = bdrv_perm_names(p);
-                error_setg(errp,
-                           "Failed to get shared \"%s\" lock",
-                           perm_name);
-                g_free(perm_name);
-                error_append_hint(errp,
-                                  "Is another process using the image?\n");
-                return ret;
-            }
-        }
-    }
-    return 0;
-}
-
-static int raw_handle_perm_lock(BlockDriverState *bs,
-                                RawPermLockOp op,
-                                uint64_t new_perm, uint64_t new_shared,
-                                Error **errp)
-{
-    BDRVRawState *s = bs->opaque;
-    int ret = 0;
-    Error *local_err = NULL;
-
-    if (!s->use_lock) {
-        return 0;
-    }
-
-    if (bdrv_get_flags(bs) & BDRV_O_INACTIVE) {
-        return 0;
-    }
-
-    assert(s->lock_fd > 0);
-
-    switch (op) {
-    case RAW_PL_PREPARE:
-        ret = raw_apply_lock_bytes(s, s->perm | new_perm,
-                                   ~s->shared_perm | ~new_shared,
-                                   false, errp);
-        if (!ret) {
-            ret = raw_check_lock_bytes(s, new_perm, new_shared, errp);
-            if (!ret) {
-                return 0;
-            }
-        }
-        op = RAW_PL_ABORT;
-        /* fall through to unlock bytes. */
-    case RAW_PL_ABORT:
-        raw_apply_lock_bytes(s, s->perm, ~s->shared_perm, true, &local_err);
-        if (local_err) {
-            /* Theoretically the above call only unlocks bytes and it cannot
-             * fail. Something weird happened, report it.
-             */
-            error_report_err(local_err);
-        }
-        break;
-    case RAW_PL_COMMIT:
-        raw_apply_lock_bytes(s, new_perm, ~new_shared, true, &local_err);
-        if (local_err) {
-            /* Theoretically the above call only unlocks bytes and it cannot
-             * fail. Something weird happened, report it.
-             */
-            error_report_err(local_err);
-        }
-        break;
-    }
-    return ret;
-}
-
 static int raw_reopen_prepare(BDRVReopenState *state,
                              BlockReopenQueue *queue, Error **errp)
 {
@@ -863,15 +651,12 @@ static void raw_reopen_abort(BDRVReopenState *state)
    state->opaque = NULL;
 }

-static int hdev_get_max_transfer_length(BlockDriverState *bs, int fd)
+static int hdev_get_max_transfer_length(int fd)
 {
 #ifdef BLKSECTGET
-    int max_bytes = 0;
-    short max_sectors = 0;
-    if (bs->sg && ioctl(fd, BLKSECTGET, &max_bytes) == 0) {
-        return max_bytes;
-    } else if (!bs->sg && ioctl(fd, BLKSECTGET, &max_sectors) == 0) {
-        return max_sectors << BDRV_SECTOR_BITS;
+    int max_sectors = 0;
+    if (ioctl(fd, BLKSECTGET, &max_sectors) == 0) {
+        return max_sectors;
    } else {
        return -errno;
    }
@@ -880,66 +665,16 @@ static int hdev_get_max_transfer_length(BlockDriverState *bs, int fd)
 #endif
 }

-static int hdev_get_max_segments(const struct stat *st)
-{
-#ifdef CONFIG_LINUX
-    char buf[32];
-    const char *end;
-    char *sysfspath;
-    int ret;
-    int fd = -1;
-    long max_segments;
-
-    sysfspath = g_strdup_printf("/sys/dev/block/%u:%u/queue/max_segments",
-                                major(st->st_rdev), minor(st->st_rdev));
-    fd = open(sysfspath, O_RDONLY);
-    if (fd == -1) {
-        ret = -errno;
-        goto out;
-    }
-    do {
-        ret = read(fd, buf, sizeof(buf) - 1);
-    } while (ret == -1 && errno == EINTR);
-    if (ret < 0) {
-        ret = -errno;
-        goto out;
-    } else if (ret == 0) {
-        ret = -EIO;
-        goto out;
-    }
-    buf[ret] = 0;
-    /* The file is ended with '\n', pass 'end' to accept that. */
-    ret = qemu_strtol(buf, &end, 10, &max_segments);
-    if (ret == 0 && end && *end == '\n') {
-        ret = max_segments;
-    }
-
-out:
-    if (fd != -1) {
-        close(fd);
-    }
-    g_free(sysfspath);
-    return ret;
-#else
-    return -ENOTSUP;
-#endif
-}
-
 static void raw_refresh_limits(BlockDriverState *bs, Error **errp)
 {
    BDRVRawState *s = bs->opaque;
    struct stat st;

    if (!fstat(s->fd, &st)) {
-        if (S_ISBLK(st.st_mode) || S_ISCHR(st.st_mode)) {
-            int ret = hdev_get_max_transfer_length(bs, s->fd);
-            if (ret > 0 && ret <= BDRV_REQUEST_MAX_BYTES) {
-                bs->bl.max_transfer = pow2floor(ret);
-            }
-            ret = hdev_get_max_segments(&st);
-            if (ret > 0) {
-                bs->bl.max_transfer = MIN(bs->bl.max_transfer,
-                                          ret * getpagesize());
+        if (S_ISBLK(st.st_mode)) {
+            int ret = hdev_get_max_transfer_length(s->fd);
+            if (ret > 0 && ret <= BDRV_REQUEST_MAX_SECTORS) {
+                bs->bl.max_transfer = pow2floor(ret << BDRV_SECTOR_BITS);
            }
        }
    }
@@ -1036,31 +771,10 @@ static ssize_t handle_aiocb_ioctl(RawPosixAIOData *aiocb)

 static ssize_t handle_aiocb_flush(RawPosixAIOData *aiocb)
 {
-    BDRVRawState *s = aiocb->bs->opaque;
    int ret;

-    if (s->page_cache_inconsistent) {
-        return -EIO;
-    }
-
    ret = qemu_fdatasync(aiocb->aio_fildes);
    if (ret == -1) {
-        /* There is no clear definition of the semantics of a failing fsync(),
-         * so we may have to assume the worst. The sad truth is that this
-         * assumption is correct for Linux. Some pages are now probably marked
-         * clean in the page cache even though they are inconsistent with the
-         * on-disk contents. The next fdatasync() call would succeed, but no
-         * further writeback attempt will be made. We can't get back to a state
-         * in which we know what is on disk (we would have to rewrite
-         * everything that was touched since the last fdatasync() at least), so
-         * make bdrv_flush() fail permanently. Given that the behaviour isn't
-         * really defined, I have little hope that other OSes are doing better.
-         *
-         * Obviously, this doesn't affect O_DIRECT, which bypasses the page
-         * cache. */
-        if ((s->open_flags & O_DIRECT) == 0) {
-            s->page_cache_inconsistent = true;
-        }
        return -errno;
    }
    return 0;
@@ -1618,37 +1332,26 @@ static void raw_close(BlockDriverState *bs)
        qemu_close(s->fd);
        s->fd = -1;
    }
-    if (s->lock_fd >= 0) {
-        qemu_close(s->lock_fd);
-        s->lock_fd = -1;
-    }
 }

-static int raw_truncate(BlockDriverState *bs, int64_t offset, Error **errp)
+static int raw_truncate(BlockDriverState *bs, int64_t offset)
 {
    BDRVRawState *s = bs->opaque;
    struct stat st;
-    int ret;

    if (fstat(s->fd, &st)) {
-        ret = -errno;
-        error_setg_errno(errp, -ret, "Failed to fstat() the file");
-        return ret;
+        return -errno;
    }

    if (S_ISREG(st.st_mode)) {
        if (ftruncate(s->fd, offset) < 0) {
-            ret = -errno;
-            error_setg_errno(errp, -ret, "Failed to resize the file");
-            return ret;
+            return -errno;
        }
    } else if (S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode)) {
-        if (offset > raw_getlength(bs)) {
-            error_setg(errp, "Cannot grow device files");
-            return -EINVAL;
-        }
+       if (offset > raw_getlength(bs)) {
+           return -EINVAL;
+       }
    } else {
-        error_setg(errp, "Resizing this file is not supported");
        return -ENOTSUP;
    }

@@ -1885,17 +1588,18 @@ static int raw_create(const char *filename, QemuOpts *opts, Error **errp)
 #endif
    }

+    if (ftruncate(fd, total_size) != 0) {
+        result = -errno;
+        error_setg_errno(errp, -result, "Could not resize file");
+        goto out_close;
+    }
+
    switch (prealloc) {
 #ifdef CONFIG_POSIX_FALLOCATE
    case PREALLOC_MODE_FALLOC:
-        /*
-         * Truncating before posix_fallocate() makes it about twice slower on
-         * file systems that do not support fallocate(), trying to check if a
-         * block is allocated before allocating it, so don't do that here.
-         */
+        /* posix_fallocate() doesn't set errno. */
        result = -posix_fallocate(fd, 0, total_size);
        if (result != 0) {
-            /* posix_fallocate() doesn't set errno. */
            error_setg_errno(errp, -result,
                             "Could not preallocate data for the new file");
        }
@@ -1903,17 +1607,6 @@ static int raw_create(const char *filename, QemuOpts *opts, Error **errp)
 #endif
    case PREALLOC_MODE_FULL:
    {
-        /*
-         * Knowing the final size from the beginning could allow the file
-         * system driver to do less allocations and possibly avoid
-         * fragmentation of the file.
-         */
-        if (ftruncate(fd, total_size) != 0) {
-            result = -errno;
-            error_setg_errno(errp, -result, "Could not resize file");
-            goto out_close;
-        }
-
        int64_t num = 0, left = total_size;
        buf = g_malloc0(65536);

@@ -1940,10 +1633,6 @@ static int raw_create(const char *filename, QemuOpts *opts, Error **errp)
        break;
    }
    case PREALLOC_MODE_OFF:
-        if (ftruncate(fd, total_size) != 0) {
-            result = -errno;
-            error_setg_errno(errp, -result, "Could not resize file");
-        }
        break;
    default:
        result = -EINVAL;
@@ -2166,25 +1855,6 @@ static QemuOptsList raw_create_opts = {
    }
 };

-static int raw_check_perm(BlockDriverState *bs, uint64_t perm, uint64_t shared,
-                          Error **errp)
-{
-    return raw_handle_perm_lock(bs, RAW_PL_PREPARE, perm, shared, errp);
-}
-
-static void raw_set_perm(BlockDriverState *bs, uint64_t perm, uint64_t shared)
-{
-    BDRVRawState *s = bs->opaque;
-    raw_handle_perm_lock(bs, RAW_PL_COMMIT, perm, shared, NULL);
-    s->perm = perm;
-    s->shared_perm = shared;
-}
-
-static void raw_abort_perm_update(BlockDriverState *bs)
-{
-    raw_handle_perm_lock(bs, RAW_PL_ABORT, 0, 0, NULL);
-}
-
 BlockDriver bdrv_file = {
    .format_name = "file",
    .protocol_name = "file",
@@ -2215,9 +1885,7 @@ BlockDriver bdrv_file = {
    .bdrv_get_info = raw_get_info,
    .bdrv_get_allocated_file_size
                        = raw_get_allocated_file_size,
-    .bdrv_check_perm = raw_check_perm,
-    .bdrv_set_perm   = raw_set_perm,
-    .bdrv_abort_perm_update = raw_abort_perm_update,
+
    .create_opts = &raw_create_opts,
 };

@@ -2390,7 +2058,10 @@ static int check_hdev_writable(BDRVRawState *s)
 static void hdev_parse_filename(const char *filename, QDict *options,
                                Error **errp)
 {
-    bdrv_parse_filename_strip_prefix(filename, "host_device:", options);
+    /* The prefix is optional, just as for "file". */
+    strstart(filename, "host_device:", &filename);
+
+    qdict_put_obj(options, "filename", QOBJECT(qstring_from_str(filename)));
 }

 static bool hdev_is_sg(BlockDriverState *bs)
@@ -2433,12 +2104,6 @@ static int hdev_open(BlockDriverState *bs, QDict *options, int flags,
    int ret;

 #if defined(__APPLE__) && defined(__MACH__)
-    /*
-     * Caution: while qdict_get_str() is fine, getting non-string types
-     * would require more care.  When @options come from -blockdev or
-     * blockdev_add, its members are typed according to the QAPI
-     * schema, but when they come from -drive, they're all QString.
-     */
    const char *filename = qdict_get_str(options, "filename");
    char bsd_path[MAXPATHLEN] = "";
    bool error_occurred = false;
@@ -2479,7 +2144,7 @@ static int hdev_open(BlockDriverState *bs, QDict *options, int flags,
            goto hdev_open_Mac_error;
        }

-        qdict_put_str(options, "filename", bsd_path);
+        qdict_put(options, "filename", qstring_from_str(bsd_path));

 hdev_open_Mac_error:
        g_free(mediaType);
@@ -2673,9 +2338,6 @@ static BlockDriver bdrv_host_device = {
    .bdrv_get_info = raw_get_info,
    .bdrv_get_allocated_file_size
                        = raw_get_allocated_file_size,
-    .bdrv_check_perm = raw_check_perm,
-    .bdrv_set_perm   = raw_set_perm,
-    .bdrv_abort_perm_update = raw_abort_perm_update,
    .bdrv_probe_blocksizes = hdev_probe_blocksizes,
    .bdrv_probe_geometry = hdev_probe_geometry,

@@ -2689,7 +2351,10 @@ static BlockDriver bdrv_host_device = {
 static void cdrom_parse_filename(const char *filename, QDict *options,
                                 Error **errp)
 {
-    bdrv_parse_filename_strip_prefix(filename, "host_cdrom:", options);
+    /* The prefix is optional, just as for "file". */
+    strstart(filename, "host_cdrom:", &filename);
+
+    qdict_put_obj(options, "filename", QOBJECT(qstring_from_str(filename)));
 }
 #endif

--- a/block/file-win32.c
+++ b/block/file-win32.c
@@ -24,6 +24,7 @@
 #include "qemu/osdep.h"
 #include "qapi/error.h"
 #include "qemu/cutils.h"
+#include "qemu/timer.h"
 #include "block/block_int.h"
 #include "qemu/module.h"
 #include "block/raw-aio.h"
@@ -276,7 +277,12 @@ static void raw_parse_flags(int flags, bool use_aio, int *access_flags,
 static void raw_parse_filename(const char *filename, QDict *options,
                               Error **errp)
 {
-    bdrv_parse_filename_strip_prefix(filename, "file:", options);
+    /* The filename does not have to be prefixed by the protocol name, since
+     * "file" is the default protocol; therefore, the return value of this
+     * function call can be ignored. */
+    strstart(filename, "file:", &filename);
+
+    qdict_put_obj(options, "filename", QOBJECT(qstring_from_str(filename)));
 }

 static QemuOptsList raw_runtime_opts = {
@@ -339,12 +345,6 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,
        goto fail;
    }

-    if (qdict_get_try_bool(options, "locking", false)) {
-        error_setg(errp, "locking=on is not supported on Windows");
-        ret = -EINVAL;
-        goto fail;
-    }
-
    filename = qemu_opt_get(opts, "filename");

    use_aio = get_aio_option(opts, flags, &local_err);
@@ -461,7 +461,7 @@ static void raw_close(BlockDriverState *bs)
    }
 }

-static int raw_truncate(BlockDriverState *bs, int64_t offset, Error **errp)
+static int raw_truncate(BlockDriverState *bs, int64_t offset)
 {
    BDRVRawState *s = bs->opaque;
    LONG low, high;
@@ -476,11 +476,11 @@ static int raw_truncate(BlockDriverState *bs, int64_t offset, Error **errp)
     */
    dwPtrLow = SetFilePointer(s->hfile, low, &high, FILE_BEGIN);
    if (dwPtrLow == INVALID_SET_FILE_POINTER && GetLastError() != NO_ERROR) {
-        error_setg_win32(errp, GetLastError(), "SetFilePointer error");
+        fprintf(stderr, "SetFilePointer error: %lu\n", GetLastError());
        return -EIO;
    }
    if (SetEndOfFile(s->hfile) == 0) {
-        error_setg_win32(errp, GetLastError(), "SetEndOfFile error");
+        fprintf(stderr, "SetEndOfFile error: %lu\n", GetLastError());
        return -EIO;
    }
    return 0;
@@ -666,7 +666,10 @@ static int hdev_probe_device(const char *filename)
 static void hdev_parse_filename(const char *filename, QDict *options,
                                Error **errp)
 {
-    bdrv_parse_filename_strip_prefix(filename, "host_device:", options);
+    /* The prefix is optional, just as for "file". */
+    strstart(filename, "host_device:", &filename);
+
+    qdict_put_obj(options, "filename", QOBJECT(qstring_from_str(filename)));
 }

 static int hdev_open(BlockDriverState *bs, QDict *options, int flags,
--- a/block/raw-format.c
+++ b/block/raw-format.c
@@ -1,4 +1,4 @@
-/* BlockDriver implementation for "raw" format driver
+/* BlockDriver implementation for "raw"
 *
 * Copyright (C) 2010-2016 Red Hat, Inc.
 * Copyright (C) 2010, Blue Swirl <blauwirbel@gmail.com>
@@ -327,23 +327,21 @@ static void raw_refresh_limits(BlockDriverState *bs, Error **errp)
    }
 }

-static int raw_truncate(BlockDriverState *bs, int64_t offset, Error **errp)
+static int raw_truncate(BlockDriverState *bs, int64_t offset)
 {
    BDRVRawState *s = bs->opaque;

    if (s->has_size) {
-        error_setg(errp, "Cannot resize fixed-size raw disks");
        return -ENOTSUP;
    }

    if (INT64_MAX - offset < s->offset) {
-        error_setg(errp, "Disk size too large for the chosen offset");
        return -EINVAL;
    }

    s->size = offset;
    offset += s->offset;
-    return bdrv_truncate(bs->file, offset, errp);
+    return bdrv_truncate(bs->file->bs, offset);
 }

 static int raw_media_changed(BlockDriverState *bs)
@@ -386,12 +384,6 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,
    BDRVRawState *s = bs->opaque;
    int ret;

-    bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
-                               false, errp);
-    if (!bs->file) {
-        return -EINVAL;
-    }
-
    bs->sg = bs->file->bs->sg;
    bs->supported_write_flags = BDRV_REQ_FUA &
        bs->file->bs->supported_write_flags;
@@ -469,7 +461,6 @@ BlockDriver bdrv_raw = {
    .bdrv_reopen_abort    = &raw_reopen_abort,
    .bdrv_open            = &raw_open,
    .bdrv_close           = &raw_close,
-    .bdrv_child_perm      = bdrv_filter_default_perms,
    .bdrv_create          = &raw_create,
    .bdrv_co_preadv       = &raw_co_preadv,
    .bdrv_co_pwritev      = &raw_co_pwritev,
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -13,14 +13,13 @@

 #include "qemu/osdep.h"

-#include <rbd/librbd.h>
 #include "qapi/error.h"
 #include "qemu/error-report.h"
 #include "block/block_int.h"
 #include "crypto/secret.h"
 #include "qemu/cutils.h"
-#include "qapi/qmp/qstring.h"
-#include "qapi/qmp/qjson.h"
+
+#include <rbd/librbd.h>

 /*
 * When specifying the image filename use:
@@ -56,15 +55,13 @@

 #define OBJ_MAX_SIZE (1UL << OBJ_DEFAULT_OBJ_ORDER)

+#define RBD_MAX_CONF_NAME_SIZE 128
+#define RBD_MAX_CONF_VAL_SIZE 512
+#define RBD_MAX_CONF_SIZE 1024
+#define RBD_MAX_POOL_NAME_SIZE 128
+#define RBD_MAX_SNAP_NAME_SIZE 128
 #define RBD_MAX_SNAPS 100

-/* The LIBRBD_SUPPORTS_IOVEC is defined in librbd.h */
-#ifdef LIBRBD_SUPPORTS_IOVEC
-#define LIBRBD_USE_IOVEC 1
-#else
-#define LIBRBD_USE_IOVEC 0
-#endif
-
 typedef enum {
    RBD_AIO_READ,
    RBD_AIO_WRITE,
@@ -94,29 +91,46 @@ typedef struct BDRVRBDState {
    rados_t cluster;
    rados_ioctx_t io_ctx;
    rbd_image_t image;
-    char *image_name;
+    char name[RBD_MAX_IMAGE_NAME_SIZE];
    char *snap;
 } BDRVRBDState;

-static char *qemu_rbd_next_tok(char *src, char delim, char **p)
+static int qemu_rbd_next_tok(char *dst, int dst_len,
+                             char *src, char delim,
+                             const char *name,
+                             char **p, Error **errp)
 {
+    int l;
    char *end;

    *p = NULL;

-    for (end = src; *end; ++end) {
+    if (delim != '\0') {
+        for (end = src; *end; ++end) {
+            if (*end == delim) {
+                break;
+            }
+            if (*end == '\\' && end[1] != '\0') {
+                end++;
+            }
+        }
        if (*end == delim) {
-            break;
-        }
-        if (*end == '\\' && end[1] != '\0') {
-            end++;
+            *p = end + 1;
+            *end = '\0';
        }
    }
-    if (*end == delim) {
-        *p = end + 1;
-        *end = '\0';
+    l = strlen(src);
+    if (l >= dst_len) {
+        error_setg(errp, "%s too long", name);
+        return -EINVAL;
+    } else if (l == 0) {
+        error_setg(errp, "%s too short", name);
+        return -EINVAL;
    }
-    return src;
+
+    pstrcpy(dst, dst_len, src);
+
+    return 0;
 }

 static void qemu_rbd_unescape(char *src)
@@ -132,92 +146,87 @@ static void qemu_rbd_unescape(char *src)
    *p = '\0';
 }

-static void qemu_rbd_parse_filename(const char *filename, QDict *options,
-                                    Error **errp)
+static int qemu_rbd_parsename(const char *filename,
+                              char *pool, int pool_len,
+                              char *snap, int snap_len,
+                              char *name, int name_len,
+                              char *conf, int conf_len,
+                              Error **errp)
 {
    const char *start;
    char *p, *buf;
-    QList *keypairs = NULL;
-    char *found_str;
+    int ret;

    if (!strstart(filename, "rbd:", &start)) {
        error_setg(errp, "File name must start with 'rbd:'");
-        return;
+        return -EINVAL;
    }

    buf = g_strdup(start);
    p = buf;
+    *snap = '\0';
+    *conf = '\0';

-    found_str = qemu_rbd_next_tok(p, '/', &p);
-    if (!p) {
-        error_setg(errp, "Pool name is required");
+    ret = qemu_rbd_next_tok(pool, pool_len, p,
+                            '/', "pool name", &p, errp);
+    if (ret < 0 || !p) {
+        ret = -EINVAL;
        goto done;
    }
-    qemu_rbd_unescape(found_str);
-    qdict_put_str(options, "pool", found_str);
+    qemu_rbd_unescape(pool);

    if (strchr(p, '@')) {
-        found_str = qemu_rbd_next_tok(p, '@', &p);
-        qemu_rbd_unescape(found_str);
-        qdict_put_str(options, "image", found_str);
-
-        found_str = qemu_rbd_next_tok(p, ':', &p);
-        qemu_rbd_unescape(found_str);
-        qdict_put_str(options, "snapshot", found_str);
+        ret = qemu_rbd_next_tok(name, name_len, p,
+                                '@', "object name", &p, errp);
+        if (ret < 0) {
+            goto done;
+        }
+        ret = qemu_rbd_next_tok(snap, snap_len, p,
+                                ':', "snap name", &p, errp);
+        qemu_rbd_unescape(snap);
    } else {
-        found_str = qemu_rbd_next_tok(p, ':', &p);
-        qemu_rbd_unescape(found_str);
-        qdict_put_str(options, "image", found_str);
+        ret = qemu_rbd_next_tok(name, name_len, p,
+                                ':', "object name", &p, errp);
    }
-    if (!p) {
+    qemu_rbd_unescape(name);
+    if (ret < 0 || !p) {
        goto done;
    }

-    /* The following are essentially all key/value pairs, and we treat
-     * 'id' and 'conf' a bit special.  Key/value pairs may be in any order. */
-    while (p) {
-        char *name, *value;
-        name = qemu_rbd_next_tok(p, '=', &p);
-        if (!p) {
-            error_setg(errp, "conf option %s has no value", name);
-            break;
-        }
-
-        qemu_rbd_unescape(name);
-
-        value = qemu_rbd_next_tok(p, ':', &p);
-        qemu_rbd_unescape(value);
-
-        if (!strcmp(name, "conf")) {
-            qdict_put_str(options, "conf", value);
-        } else if (!strcmp(name, "id")) {
-            qdict_put_str(options, "user", value);
-        } else {
-            /*
-             * We pass these internally to qemu_rbd_set_keypairs(), so
-             * we can get away with the simpler list of [ "key1",
-             * "value1", "key2", "value2" ] rather than a raw dict
-             * { "key1": "value1", "key2": "value2" } where we can't
-             * guarantee order, or even a more correct but complex
-             * [ { "key1": "value1" }, { "key2": "value2" } ]
-             */
-            if (!keypairs) {
-                keypairs = qlist_new();
-            }
-            qlist_append_str(keypairs, name);
-            qlist_append_str(keypairs, value);
-        }
-    }
-
-    if (keypairs) {
-        qdict_put(options, "=keyvalue-pairs",
-                  qobject_to_json(QOBJECT(keypairs)));
-    }
+    ret = qemu_rbd_next_tok(conf, conf_len, p,
+                            '\0', "configuration", &p, errp);

 done:
    g_free(buf);
-    QDECREF(keypairs);
-    return;
+    return ret;
+}
+
+static char *qemu_rbd_parse_clientname(const char *conf, char *clientname)
+{
+    const char *p = conf;
+
+    while (*p) {
+        int len;
+        const char *end = strchr(p, ':');
+
+        if (end) {
+            len = end - p;
+        } else {
+            len = strlen(p);
+        }
+
+        if (strncmp(p, "id=", 3) == 0) {
+            len -= 3;
+            strncpy(clientname, p + 3, len);
+            clientname[len] = '\0';
+            return clientname;
+        }
+        if (end == NULL) {
+            break;
+        }
+        p = end + 1;
+    }
+    return NULL;
 }


@@ -240,129 +249,94 @@ static int qemu_rbd_set_auth(rados_t cluster, const char *secretid,
    return 0;
 }

-static int qemu_rbd_set_keypairs(rados_t cluster, const char *keypairs_json,
-                                 Error **errp)
+
+static int qemu_rbd_set_conf(rados_t cluster, const char *conf,
+                             bool only_read_conf_file,
+                             Error **errp)
 {
-    QList *keypairs;
-    QString *name;
-    QString *value;
-    const char *key;
-    size_t remaining;
+    char *p, *buf;
+    char name[RBD_MAX_CONF_NAME_SIZE];
+    char value[RBD_MAX_CONF_VAL_SIZE];
    int ret = 0;

-    if (!keypairs_json) {
-        return ret;
-    }
-    keypairs = qobject_to_qlist(qobject_from_json(keypairs_json,
-                                                  &error_abort));
-    remaining = qlist_size(keypairs) / 2;
-    assert(remaining);
+    buf = g_strdup(conf);
+    p = buf;

-    while (remaining--) {
-        name = qobject_to_qstring(qlist_pop(keypairs));
-        value = qobject_to_qstring(qlist_pop(keypairs));
-        assert(name && value);
-        key = qstring_get_str(name);
-
-        ret = rados_conf_set(cluster, key, qstring_get_str(value));
-        QDECREF(name);
-        QDECREF(value);
+    while (p) {
+        ret = qemu_rbd_next_tok(name, sizeof(name), p,
+                                '=', "conf option name", &p, errp);
        if (ret < 0) {
-            error_setg_errno(errp, -ret, "invalid conf option %s", key);
+            break;
+        }
+        qemu_rbd_unescape(name);
+
+        if (!p) {
+            error_setg(errp, "conf option %s has no value", name);
            ret = -EINVAL;
            break;
        }
+
+        ret = qemu_rbd_next_tok(value, sizeof(value), p,
+                                ':', "conf option value", &p, errp);
+        if (ret < 0) {
+            break;
+        }
+        qemu_rbd_unescape(value);
+
+        if (strcmp(name, "conf") == 0) {
+            /* read the conf file alone, so it doesn't override more
+               specific settings for a particular device */
+            if (only_read_conf_file) {
+                ret = rados_conf_read_file(cluster, value);
+                if (ret < 0) {
+                    error_setg_errno(errp, -ret, "error reading conf file %s",
+                                     value);
+                    break;
+                }
+            }
+        } else if (strcmp(name, "id") == 0) {
+            /* ignore, this is parsed by qemu_rbd_parse_clientname() */
+        } else if (!only_read_conf_file) {
+            ret = rados_conf_set(cluster, name, value);
+            if (ret < 0) {
+                error_setg_errno(errp, -ret, "invalid conf option %s", name);
+                ret = -EINVAL;
+                break;
+            }
+        }
    }

-    QDECREF(keypairs);
+    g_free(buf);
    return ret;
 }

-static void qemu_rbd_memset(RADOSCB *rcb, int64_t offs)
-{
-    if (LIBRBD_USE_IOVEC) {
-        RBDAIOCB *acb = rcb->acb;
-        iov_memset(acb->qiov->iov, acb->qiov->niov, offs, 0,
-                   acb->qiov->size - offs);
-    } else {
-        memset(rcb->buf + offs, 0, rcb->size - offs);
-    }
-}
-
-static QemuOptsList runtime_opts = {
-    .name = "rbd",
-    .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
-    .desc = {
-        {
-            .name = "pool",
-            .type = QEMU_OPT_STRING,
-            .help = "Rados pool name",
-        },
-        {
-            .name = "image",
-            .type = QEMU_OPT_STRING,
-            .help = "Image name in the pool",
-        },
-        {
-            .name = "conf",
-            .type = QEMU_OPT_STRING,
-            .help = "Rados config file location",
-        },
-        {
-            .name = "snapshot",
-            .type = QEMU_OPT_STRING,
-            .help = "Ceph snapshot name",
-        },
-        {
-            /* maps to 'id' in rados_create() */
-            .name = "user",
-            .type = QEMU_OPT_STRING,
-            .help = "Rados id name",
-        },
-        /*
-         * server.* extracted manually, see qemu_rbd_mon_host()
-         */
-        {
-            .name = "password-secret",
-            .type = QEMU_OPT_STRING,
-            .help = "ID of secret providing the password",
-        },
-
-        /*
-         * Keys for qemu_rbd_parse_filename(), not in the QAPI schema
-         */
-        {
-            /*
-             * HACK: name starts with '=' so that qemu_opts_parse()
-             * can't set it
-             */
-            .name = "=keyvalue-pairs",
-            .type = QEMU_OPT_STRING,
-            .help = "Legacy rados key/value option parameters",
-        },
-        {
-            .name = "filename",
-            .type = QEMU_OPT_STRING,
-        },
-        { /* end of list */ }
-    },
-};
-
 static int qemu_rbd_create(const char *filename, QemuOpts *opts, Error **errp)
 {
    Error *local_err = NULL;
    int64_t bytes = 0;
    int64_t objsize;
    int obj_order = 0;
-    const char *pool, *image_name, *conf, *user, *keypairs;
+    char pool[RBD_MAX_POOL_NAME_SIZE];
+    char name[RBD_MAX_IMAGE_NAME_SIZE];
+    char snap_buf[RBD_MAX_SNAP_NAME_SIZE];
+    char conf[RBD_MAX_CONF_SIZE];
+    char clientname_buf[RBD_MAX_CONF_SIZE];
+    char *clientname;
    const char *secretid;
    rados_t cluster;
    rados_ioctx_t io_ctx;
-    QDict *options = NULL;
-    int ret = 0;
+    int ret;

    secretid = qemu_opt_get(opts, "password-secret");

+    if (qemu_rbd_parsename(filename, pool, sizeof(pool),
+                           snap_buf, sizeof(snap_buf),
+                           name, sizeof(name),
+                           conf, sizeof(conf), &local_err) < 0) {
+        error_propagate(errp, local_err);
+        return -EINVAL;
+    }
+
    /* Read out options */
    bytes = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
                     BDRV_SECTOR_SIZE);
@@ -370,53 +344,35 @@ static int qemu_rbd_create(const char *filename, QemuOpts *opts, Error **errp)
    if (objsize) {
        if ((objsize - 1) & objsize) {    /* not a power of 2? */
            error_setg(errp, "obj size needs to be power of 2");
-            ret = -EINVAL;
-            goto exit;
+            return -EINVAL;
        }
        if (objsize < 4096) {
            error_setg(errp, "obj size too small");
-            ret = -EINVAL;
-            goto exit;
+            return -EINVAL;
        }
        obj_order = ctz32(objsize);
    }

-    options = qdict_new();
-    qemu_rbd_parse_filename(filename, options, &local_err);
-    if (local_err) {
-        ret = -EINVAL;
-        error_propagate(errp, local_err);
-        goto exit;
-    }
-
-    /*
-     * Caution: while qdict_get_try_str() is fine, getting non-string
-     * types would require more care.  When @options come from -blockdev
-     * or blockdev_add, its members are typed according to the QAPI
-     * schema, but when they come from -drive, they're all QString.
-     */
-    pool       = qdict_get_try_str(options, "pool");
-    conf       = qdict_get_try_str(options, "conf");
-    user       = qdict_get_try_str(options, "user");
-    image_name = qdict_get_try_str(options, "image");
-    keypairs   = qdict_get_try_str(options, "=keyvalue-pairs");
-
-    ret = rados_create(&cluster, user);
+    clientname = qemu_rbd_parse_clientname(conf, clientname_buf);
+    ret = rados_create(&cluster, clientname);
    if (ret < 0) {
        error_setg_errno(errp, -ret, "error initializing");
-        goto exit;
+        return ret;
    }

-    /* try default location when conf=NULL, but ignore failure */
-    ret = rados_conf_read_file(cluster, conf);
-    if (conf && ret < 0) {
-        error_setg_errno(errp, -ret, "error reading conf file %s", conf);
+    if (strstr(conf, "conf=") == NULL) {
+        /* try default location, but ignore failure */
+        rados_conf_read_file(cluster, NULL);
+    } else if (conf[0] != '\0' &&
+               qemu_rbd_set_conf(cluster, conf, true, &local_err) < 0) {
+        error_propagate(errp, local_err);
        ret = -EIO;
        goto shutdown;
    }

-    ret = qemu_rbd_set_keypairs(cluster, keypairs, errp);
-    if (ret < 0) {
+    if (conf[0] != '\0' &&
+        qemu_rbd_set_conf(cluster, conf, false, &local_err) < 0) {
+        error_propagate(errp, local_err);
        ret = -EIO;
        goto shutdown;
    }
@@ -438,7 +394,7 @@ static int qemu_rbd_create(const char *filename, QemuOpts *opts, Error **errp)
        goto shutdown;
    }

-    ret = rbd_create(io_ctx, image_name, bytes, &obj_order);
+    ret = rbd_create(io_ctx, name, bytes, &obj_order);
    if (ret < 0) {
        error_setg_errno(errp, -ret, "error rbd create");
    }
@@ -447,9 +403,6 @@ static int qemu_rbd_create(const char *filename, QemuOpts *opts, Error **errp)

 shutdown:
    rados_shutdown(cluster);
-
-exit:
-    QDECREF(options);
    return ret;
 }

@@ -473,11 +426,11 @@ static void qemu_rbd_complete_aio(RADOSCB *rcb)
        }
    } else {
        if (r < 0) {
-            qemu_rbd_memset(rcb, 0);
+            memset(rcb->buf, 0, rcb->size);
            acb->ret = r;
            acb->error = 1;
        } else if (r < rcb->size) {
-            qemu_rbd_memset(rcb, r);
+            memset(rcb->buf + r, 0, rcb->size - r);
            if (!acb->error) {
                acb->ret = rcb->size;
            }
@@ -488,137 +441,92 @@ static void qemu_rbd_complete_aio(RADOSCB *rcb)

    g_free(rcb);

-    if (!LIBRBD_USE_IOVEC) {
-        if (acb->cmd == RBD_AIO_READ) {
-            qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
-        }
-        qemu_vfree(acb->bounce);
+    if (acb->cmd == RBD_AIO_READ) {
+        qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
    }
-
+    qemu_vfree(acb->bounce);
    acb->common.cb(acb->common.opaque, (acb->ret > 0 ? 0 : acb->ret));

    qemu_aio_unref(acb);
 }

-static char *qemu_rbd_mon_host(QDict *options, Error **errp)
-{
-    const char **vals = g_new(const char *, qdict_size(options) + 1);
-    char keybuf[32];
-    const char *host, *port;
-    char *rados_str;
-    int i;
-
-    for (i = 0;; i++) {
-        sprintf(keybuf, "server.%d.host", i);
-        host = qdict_get_try_str(options, keybuf);
-        qdict_del(options, keybuf);
-        sprintf(keybuf, "server.%d.port", i);
-        port = qdict_get_try_str(options, keybuf);
-        qdict_del(options, keybuf);
-        if (!host && !port) {
-            break;
-        }
-        if (!host) {
-            error_setg(errp, "Parameter server.%d.host is missing", i);
-            rados_str = NULL;
-            goto out;
-        }
-
-        if (strchr(host, ':')) {
-            vals[i] = port ? g_strdup_printf("[%s]:%s", host, port)
-                : g_strdup_printf("[%s]", host);
-        } else {
-            vals[i] = port ? g_strdup_printf("%s:%s", host, port)
-                : g_strdup(host);
-        }
-    }
-    vals[i] = NULL;
-
-    rados_str = i ? g_strjoinv(";", (char **)vals) : NULL;
-out:
-    g_strfreev((char **)vals);
-    return rados_str;
-}
+/* TODO Convert to fine grained options */
+static QemuOptsList runtime_opts = {
+    .name = "rbd",
+    .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
+    .desc = {
+        {
+            .name = "filename",
+            .type = QEMU_OPT_STRING,
+            .help = "Specification of the rbd image",
+        },
+        {
+            .name = "password-secret",
+            .type = QEMU_OPT_STRING,
+            .help = "ID of secret providing the password",
+        },
+        { /* end of list */ }
+    },
+};

 static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
                         Error **errp)
 {
    BDRVRBDState *s = bs->opaque;
-    const char *pool, *snap, *conf, *user, *image_name, *keypairs;
-    const char *secretid, *filename;
+    char pool[RBD_MAX_POOL_NAME_SIZE];
+    char snap_buf[RBD_MAX_SNAP_NAME_SIZE];
+    char conf[RBD_MAX_CONF_SIZE];
+    char clientname_buf[RBD_MAX_CONF_SIZE];
+    char *clientname;
+    const char *secretid;
    QemuOpts *opts;
    Error *local_err = NULL;
-    char *mon_host = NULL;
+    const char *filename;
    int r;

-    /* If we are given a filename, parse the filename, with precedence given to
-     * filename encoded options */
-    filename = qdict_get_try_str(options, "filename");
-    if (filename) {
-        error_report("Warning: 'filename' option specified. "
-                      "This is an unsupported option, and may be deprecated "
-                      "in the future");
-        qemu_rbd_parse_filename(filename, options, &local_err);
-        if (local_err) {
-            r = -EINVAL;
-            error_propagate(errp, local_err);
-            goto exit;
-        }
-    }
-
    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
    qemu_opts_absorb_qdict(opts, options, &local_err);
    if (local_err) {
        error_propagate(errp, local_err);
-        r = -EINVAL;
-        goto failed_opts;
-    }
-
-    mon_host = qemu_rbd_mon_host(options, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        r = -EINVAL;
-        goto failed_opts;
+        qemu_opts_del(opts);
+        return -EINVAL;
    }

+    filename = qemu_opt_get(opts, "filename");
    secretid = qemu_opt_get(opts, "password-secret");

-    pool           = qemu_opt_get(opts, "pool");
-    conf           = qemu_opt_get(opts, "conf");
-    snap           = qemu_opt_get(opts, "snapshot");
-    user           = qemu_opt_get(opts, "user");
-    image_name     = qemu_opt_get(opts, "image");
-    keypairs       = qemu_opt_get(opts, "=keyvalue-pairs");
-
-    if (!pool || !image_name) {
-        error_setg(errp, "Parameters 'pool' and 'image' are required");
+    if (qemu_rbd_parsename(filename, pool, sizeof(pool),
+                           snap_buf, sizeof(snap_buf),
+                           s->name, sizeof(s->name),
+                           conf, sizeof(conf), errp) < 0) {
        r = -EINVAL;
        goto failed_opts;
    }

-    r = rados_create(&s->cluster, user);
+    clientname = qemu_rbd_parse_clientname(conf, clientname_buf);
+    r = rados_create(&s->cluster, clientname);
    if (r < 0) {
        error_setg_errno(errp, -r, "error initializing");
        goto failed_opts;
    }

-    s->snap = g_strdup(snap);
-    s->image_name = g_strdup(image_name);
-
-    /* try default location when conf=NULL, but ignore failure */
-    r = rados_conf_read_file(s->cluster, conf);
-    if (conf && r < 0) {
-        error_setg_errno(errp, -r, "error reading conf file %s", conf);
-        goto failed_shutdown;
+    s->snap = NULL;
+    if (snap_buf[0] != '\0') {
+        s->snap = g_strdup(snap_buf);
    }

-    r = qemu_rbd_set_keypairs(s->cluster, keypairs, errp);
-    if (r < 0) {
-        goto failed_shutdown;
+    if (strstr(conf, "conf=") == NULL) {
+        /* try default location, but ignore failure */
+        rados_conf_read_file(s->cluster, NULL);
+    } else if (conf[0] != '\0') {
+        r = qemu_rbd_set_conf(s->cluster, conf, true, errp);
+        if (r < 0) {
+            goto failed_shutdown;
+        }
    }

-    if (mon_host) {
-        r = rados_conf_set(s->cluster, "mon_host", mon_host);
+    if (conf[0] != '\0') {
+        r = qemu_rbd_set_conf(s->cluster, conf, false, errp);
        if (r < 0) {
            goto failed_shutdown;
        }
@@ -654,23 +562,13 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
        goto failed_shutdown;
    }

-    /* rbd_open is always r/w */
-    r = rbd_open(s->io_ctx, s->image_name, &s->image, s->snap);
+    r = rbd_open(s->io_ctx, s->name, &s->image, s->snap);
    if (r < 0) {
-        error_setg_errno(errp, -r, "error reading header from %s",
-                         s->image_name);
+        error_setg_errno(errp, -r, "error reading header from %s", s->name);
        goto failed_open;
    }

-    /* If we are using an rbd snapshot, we must be r/o, otherwise
-     * leave as-is */
-    if (s->snap != NULL) {
-        r = bdrv_set_read_only(bs, true, &local_err);
-        if (r < 0) {
-            error_propagate(errp, local_err);
-            goto failed_open;
-        }
-    }
+    bs->read_only = (s->snap != NULL);

    qemu_opts_del(opts);
    return 0;
@@ -680,34 +578,11 @@ failed_open:
 failed_shutdown:
    rados_shutdown(s->cluster);
    g_free(s->snap);
-    g_free(s->image_name);
 failed_opts:
    qemu_opts_del(opts);
-    g_free(mon_host);
-exit:
    return r;
 }

-
-/* Since RBD is currently always opened R/W via the API,
- * we just need to check if we are using a snapshot or not, in
- * order to determine if we will allow it to be R/W */
-static int qemu_rbd_reopen_prepare(BDRVReopenState *state,
-                                   BlockReopenQueue *queue, Error **errp)
-{
-    BDRVRBDState *s = state->bs->opaque;
-    int ret = 0;
-
-    if (s->snap && state->flags & BDRV_O_RDWR) {
-        error_setg(errp,
-                   "Cannot change node '%s' to r/w when using RBD snapshot",
-                   bdrv_get_device_or_node_name(state->bs));
-        ret = -EINVAL;
-    }
-
-    return ret;
-}
-
 static void qemu_rbd_close(BlockDriverState *bs)
 {
    BDRVRBDState *s = bs->opaque;
@@ -715,7 +590,6 @@ static void qemu_rbd_close(BlockDriverState *bs)
    rbd_close(s->image);
    rados_ioctx_destroy(s->io_ctx);
    g_free(s->snap);
-    g_free(s->image_name);
    rados_shutdown(s->cluster);
 }

@@ -781,6 +655,7 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
    RBDAIOCB *acb;
    RADOSCB *rcb = NULL;
    rbd_completion_t c;
+    char *buf;
    int r;

    BDRVRBDState *s = bs->opaque;
@@ -789,29 +664,27 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
    acb->cmd = cmd;
    acb->qiov = qiov;
    assert(!qiov || qiov->size == size);
-
-    rcb = g_new(RADOSCB, 1);
-
-    if (!LIBRBD_USE_IOVEC) {
-        if (cmd == RBD_AIO_DISCARD || cmd == RBD_AIO_FLUSH) {
-            acb->bounce = NULL;
-        } else {
-            acb->bounce = qemu_try_blockalign(bs, qiov->size);
-            if (acb->bounce == NULL) {
-                goto failed;
-            }
+    if (cmd == RBD_AIO_DISCARD || cmd == RBD_AIO_FLUSH) {
+        acb->bounce = NULL;
+    } else {
+        acb->bounce = qemu_try_blockalign(bs, qiov->size);
+        if (acb->bounce == NULL) {
+            goto failed;
        }
-        if (cmd == RBD_AIO_WRITE) {
-            qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
-        }
-        rcb->buf = acb->bounce;
    }
-
    acb->ret = 0;
    acb->error = 0;
    acb->s = s;

+    if (cmd == RBD_AIO_WRITE) {
+        qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
+    }
+
+    buf = acb->bounce;
+
+    rcb = g_new(RADOSCB, 1);
    rcb->acb = acb;
+    rcb->buf = buf;
    rcb->s = acb->s;
    rcb->size = size;
    r = rbd_aio_create_completion(rcb, (rbd_callback_t) rbd_finish_aiocb, &c);
@@ -821,18 +694,10 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,

    switch (cmd) {
    case RBD_AIO_WRITE:
-#ifdef LIBRBD_SUPPORTS_IOVEC
-            r = rbd_aio_writev(s->image, qiov->iov, qiov->niov, off, c);
-#else
-            r = rbd_aio_write(s->image, off, size, rcb->buf, c);
-#endif
+        r = rbd_aio_write(s->image, off, size, buf, c);
        break;
    case RBD_AIO_READ:
-#ifdef LIBRBD_SUPPORTS_IOVEC
-            r = rbd_aio_readv(s->image, qiov->iov, qiov->niov, off, c);
-#else
-            r = rbd_aio_read(s->image, off, size, rcb->buf, c);
-#endif
+        r = rbd_aio_read(s->image, off, size, buf, c);
        break;
    case RBD_AIO_DISCARD:
        r = rbd_aio_discard_wrapper(s->image, off, size, c);
@@ -847,16 +712,14 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
    if (r < 0) {
        goto failed_completion;
    }
+
    return &acb->common;

 failed_completion:
    rbd_aio_release(c);
 failed:
    g_free(rcb);
-    if (!LIBRBD_USE_IOVEC) {
-        qemu_vfree(acb->bounce);
-    }
-
+    qemu_vfree(acb->bounce);
    qemu_aio_unref(acb);
    return NULL;
 }
@@ -936,14 +799,13 @@ static int64_t qemu_rbd_getlength(BlockDriverState *bs)
    return info.size;
 }

-static int qemu_rbd_truncate(BlockDriverState *bs, int64_t offset, Error **errp)
+static int qemu_rbd_truncate(BlockDriverState *bs, int64_t offset)
 {
    BDRVRBDState *s = bs->opaque;
    int r;

    r = rbd_resize(s->image, offset);
    if (r < 0) {
-        error_setg_errno(errp, -r, "Failed to resize file");
        return r;
    }

@@ -1110,19 +972,18 @@ static QemuOptsList qemu_rbd_create_opts = {
 };

 static BlockDriver bdrv_rbd = {
-    .format_name            = "rbd",
-    .instance_size          = sizeof(BDRVRBDState),
-    .bdrv_parse_filename    = qemu_rbd_parse_filename,
-    .bdrv_file_open         = qemu_rbd_open,
-    .bdrv_close             = qemu_rbd_close,
-    .bdrv_reopen_prepare    = qemu_rbd_reopen_prepare,
-    .bdrv_create            = qemu_rbd_create,
-    .bdrv_has_zero_init     = bdrv_has_zero_init_1,
-    .bdrv_get_info          = qemu_rbd_getinfo,
-    .create_opts            = &qemu_rbd_create_opts,
-    .bdrv_getlength         = qemu_rbd_getlength,
-    .bdrv_truncate          = qemu_rbd_truncate,
-    .protocol_name          = "rbd",
+    .format_name        = "rbd",
+    .instance_size      = sizeof(BDRVRBDState),
+    .bdrv_needs_filename = true,
+    .bdrv_file_open     = qemu_rbd_open,
+    .bdrv_close         = qemu_rbd_close,
+    .bdrv_create        = qemu_rbd_create,
+    .bdrv_has_zero_init = bdrv_has_zero_init_1,
+    .bdrv_get_info      = qemu_rbd_getinfo,
+    .create_opts        = &qemu_rbd_create_opts,
+    .bdrv_getlength     = qemu_rbd_getlength,
+    .bdrv_truncate      = qemu_rbd_truncate,
+    .protocol_name      = "rbd",

    .bdrv_aio_readv         = qemu_rbd_aio_readv,
    .bdrv_aio_writev        = qemu_rbd_aio_writev,
--- a/block/replication.c
+++ b/block/replication.c
@@ -22,17 +22,9 @@
 #include "qapi/error.h"
 #include "replication.h"

-typedef enum {
-    BLOCK_REPLICATION_NONE,             /* block replication is not started */
-    BLOCK_REPLICATION_RUNNING,          /* block replication is running */
-    BLOCK_REPLICATION_FAILOVER,         /* failover is running in background */
-    BLOCK_REPLICATION_FAILOVER_FAILED,  /* failover failed */
-    BLOCK_REPLICATION_DONE,             /* block replication is done */
-} ReplicationStage;
-
 typedef struct BDRVReplicationState {
    ReplicationMode mode;
-    ReplicationStage stage;
+    int replication_state;
    BdrvChild *active_disk;
    BdrvChild *hidden_disk;
    BdrvChild *secondary_disk;
@@ -44,6 +36,14 @@ typedef struct BDRVReplicationState {
    int error;
 } BDRVReplicationState;

+enum {
+    BLOCK_REPLICATION_NONE,             /* block replication is not started */
+    BLOCK_REPLICATION_RUNNING,          /* block replication is running */
+    BLOCK_REPLICATION_FAILOVER,         /* failover is running in background */
+    BLOCK_REPLICATION_FAILOVER_FAILED,  /* failover failed */
+    BLOCK_REPLICATION_DONE,             /* block replication is done */
+};
+
 static void replication_start(ReplicationState *rs, ReplicationMode mode,
                              Error **errp);
 static void replication_do_checkpoint(ReplicationState *rs, Error **errp);
@@ -86,12 +86,6 @@ static int replication_open(BlockDriverState *bs, QDict *options,
    const char *mode;
    const char *top_id;

-    bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
-                               false, errp);
-    if (!bs->file) {
-        return -EINVAL;
-    }
-
    ret = -EINVAL;
    opts = qemu_opts_create(&replication_runtime_opts, NULL, 0, &error_abort);
    qemu_opts_absorb_qdict(opts, options, &local_err);
@@ -141,10 +135,10 @@ static void replication_close(BlockDriverState *bs)
 {
    BDRVReplicationState *s = bs->opaque;

-    if (s->stage == BLOCK_REPLICATION_RUNNING) {
+    if (s->replication_state == BLOCK_REPLICATION_RUNNING) {
        replication_stop(s->rs, false, NULL);
    }
-    if (s->stage == BLOCK_REPLICATION_FAILOVER) {
+    if (s->replication_state == BLOCK_REPLICATION_FAILOVER) {
        block_job_cancel_sync(s->active_disk->bs->job);
    }

@@ -155,18 +149,6 @@ static void replication_close(BlockDriverState *bs)
    replication_remove(s->rs);
 }

-static void replication_child_perm(BlockDriverState *bs, BdrvChild *c,
-                                   const BdrvChildRole *role,
-                                   uint64_t perm, uint64_t shared,
-                                   uint64_t *nperm, uint64_t *nshared)
-{
-    *nperm = *nshared = BLK_PERM_CONSISTENT_READ \
-                        | BLK_PERM_WRITE \
-                        | BLK_PERM_WRITE_UNCHANGED;
-
-    return;
-}
-
 static int64_t replication_getlength(BlockDriverState *bs)
 {
    return bdrv_getlength(bs->file->bs);
@@ -174,7 +156,7 @@ static int64_t replication_getlength(BlockDriverState *bs)

 static int replication_get_io_status(BDRVReplicationState *s)
 {
-    switch (s->stage) {
+    switch (s->replication_state) {
    case BLOCK_REPLICATION_NONE:
        return -EIO;
    case BLOCK_REPLICATION_RUNNING:
@@ -403,7 +385,7 @@ static void backup_job_completed(void *opaque, int ret)
    BlockDriverState *bs = opaque;
    BDRVReplicationState *s = bs->opaque;

-    if (s->stage != BLOCK_REPLICATION_FAILOVER) {
+    if (s->replication_state != BLOCK_REPLICATION_FAILOVER) {
        /* The backup job is cancelled unexpectedly */
        s->error = -EIO;
    }
@@ -445,7 +427,7 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode,
    aio_context_acquire(aio_context);
    s = bs->opaque;

-    if (s->stage != BLOCK_REPLICATION_NONE) {
+    if (s->replication_state != BLOCK_REPLICATION_NONE) {
        error_setg(errp, "Block replication is running or done");
        aio_context_release(aio_context);
        return;
@@ -545,7 +527,7 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode,
        abort();
    }

-    s->stage = BLOCK_REPLICATION_RUNNING;
+    s->replication_state = BLOCK_REPLICATION_RUNNING;

    if (s->mode == REPLICATION_MODE_SECONDARY) {
        secondary_do_checkpoint(s, errp);
@@ -581,7 +563,7 @@ static void replication_get_error(ReplicationState *rs, Error **errp)
    aio_context_acquire(aio_context);
    s = bs->opaque;

-    if (s->stage != BLOCK_REPLICATION_RUNNING) {
+    if (s->replication_state != BLOCK_REPLICATION_RUNNING) {
        error_setg(errp, "Block replication is not running");
        aio_context_release(aio_context);
        return;
@@ -601,7 +583,7 @@ static void replication_done(void *opaque, int ret)
    BDRVReplicationState *s = bs->opaque;

    if (ret == 0) {
-        s->stage = BLOCK_REPLICATION_DONE;
+        s->replication_state = BLOCK_REPLICATION_DONE;

        /* refresh top bs's filename */
        bdrv_refresh_filename(bs);
@@ -610,7 +592,7 @@ static void replication_done(void *opaque, int ret)
        s->hidden_disk = NULL;
        s->error = 0;
    } else {
-        s->stage = BLOCK_REPLICATION_FAILOVER_FAILED;
+        s->replication_state = BLOCK_REPLICATION_FAILOVER_FAILED;
        s->error = -EIO;
    }
 }
@@ -625,7 +607,7 @@ static void replication_stop(ReplicationState *rs, bool failover, Error **errp)
    aio_context_acquire(aio_context);
    s = bs->opaque;

-    if (s->stage != BLOCK_REPLICATION_RUNNING) {
+    if (s->replication_state != BLOCK_REPLICATION_RUNNING) {
        error_setg(errp, "Block replication is not running");
        aio_context_release(aio_context);
        return;
@@ -633,7 +615,7 @@ static void replication_stop(ReplicationState *rs, bool failover, Error **errp)

    switch (s->mode) {
    case REPLICATION_MODE_PRIMARY:
-        s->stage = BLOCK_REPLICATION_DONE;
+        s->replication_state = BLOCK_REPLICATION_DONE;
        s->error = 0;
        break;
    case REPLICATION_MODE_SECONDARY:
@@ -648,15 +630,15 @@ static void replication_stop(ReplicationState *rs, bool failover, Error **errp)

        if (!failover) {
            secondary_do_checkpoint(s, errp);
-            s->stage = BLOCK_REPLICATION_DONE;
+            s->replication_state = BLOCK_REPLICATION_DONE;
            aio_context_release(aio_context);
            return;
        }

-        s->stage = BLOCK_REPLICATION_FAILOVER;
+        s->replication_state = BLOCK_REPLICATION_FAILOVER;
        commit_active_start(NULL, s->active_disk->bs, s->secondary_disk->bs,
                            BLOCK_JOB_INTERNAL, 0, BLOCKDEV_ON_ERROR_REPORT,
-                            NULL, replication_done, bs, true, errp);
+                            replication_done, bs, errp, true);
        break;
    default:
        aio_context_release(aio_context);
@@ -672,7 +654,6 @@ BlockDriver bdrv_replication = {

    .bdrv_open                  = replication_open,
    .bdrv_close                 = replication_close,
-    .bdrv_child_perm            = replication_child_perm,

    .bdrv_getlength             = replication_getlength,
    .bdrv_co_readv              = replication_co_readv,
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
--- a/block/snapshot.c
+++ b/block/snapshot.c
@@ -27,7 +27,6 @@
 #include "block/block_int.h"
 #include "qapi/error.h"
 #include "qapi/qmp/qerror.h"
-#include "qapi/qmp/qstring.h"

 QemuOptsList internal_snapshot_opts = {
    .name = "snapshot",
@@ -190,33 +189,14 @@ int bdrv_snapshot_goto(BlockDriverState *bs,
    }

    if (bs->file) {
-        BlockDriverState *file;
-        QDict *options = qdict_clone_shallow(bs->options);
-        QDict *file_options;
-
-        file = bs->file->bs;
-        /* Prevent it from getting deleted when detached from bs */
-        bdrv_ref(file);
-
-        qdict_extract_subqdict(options, &file_options, "file.");
-        QDECREF(file_options);
-        qdict_put_str(options, "file", bdrv_get_node_name(file));
-
        drv->bdrv_close(bs);
-        bdrv_unref_child(bs, bs->file);
-        bs->file = NULL;
-
-        ret = bdrv_snapshot_goto(file, snapshot_id);
-        open_ret = drv->bdrv_open(bs, options, bs->open_flags, NULL);
-        QDECREF(options);
+        ret = bdrv_snapshot_goto(bs->file->bs, snapshot_id);
+        open_ret = drv->bdrv_open(bs, NULL, bs->open_flags, NULL);
        if (open_ret < 0) {
-            bdrv_unref(file);
+            bdrv_unref(bs->file->bs);
            bs->drv = NULL;
            return open_ret;
        }
-
-        assert(bs->file->bs == file);
-        bdrv_unref(file);
        return ret;
    }

--- a/block/ssh.c
+++ b/block/ssh.c
@@ -227,23 +227,24 @@ static int parse_uri(const char *filename, QDict *options, Error **errp)
    }

    if(uri->user && strcmp(uri->user, "") != 0) {
-        qdict_put_str(options, "user", uri->user);
+        qdict_put(options, "user", qstring_from_str(uri->user));
    }

-    qdict_put_str(options, "server.host", uri->server);
+    qdict_put(options, "server.host", qstring_from_str(uri->server));

    port_str = g_strdup_printf("%d", uri->port ?: 22);
-    qdict_put_str(options, "server.port", port_str);
+    qdict_put(options, "server.port", qstring_from_str(port_str));
    g_free(port_str);

-    qdict_put_str(options, "path", uri->path);
+    qdict_put(options, "path", qstring_from_str(uri->path));

    /* Pick out any query parameters that we understand, and ignore
     * the rest.
     */
    for (i = 0; i < qp->n; ++i) {
        if (strcmp(qp->p[i].name, "host_key_check") == 0) {
-            qdict_put_str(options, "host_key_check", qp->p[i].value);
+            qdict_put(options, "host_key_check",
+                      qstring_from_str(qp->p[i].value));
        }
    }

@@ -573,8 +574,9 @@ static bool ssh_process_legacy_socket_options(QDict *output_opts,
    }

    if (host) {
-        qdict_put_str(output_opts, "server.host", host);
-        qdict_put_str(output_opts, "server.port", port ?: stringify(22));
+        qdict_put(output_opts, "server.host", qstring_from_str(host));
+        qdict_put(output_opts, "server.port",
+                  qstring_from_str(port ?: stringify(22)));
    }

    return true;
@@ -599,15 +601,7 @@ static InetSocketAddress *ssh_config(QDict *options, Error **errp)
        goto out;
    }

-    /*
-     * FIXME .numeric, .to, .ipv4 or .ipv6 don't work with -drive.
-     * .to doesn't matter, it's ignored anyway.
-     * That's because when @options come from -blockdev or
-     * blockdev_add, members are typed according to the QAPI schema,
-     * but when they come from -drive, they're all QString.  The
-     * visitor expects the former.
-     */
-    iv = qobject_input_visitor_new(crumpled_addr);
+    iv = qobject_input_visitor_new(crumpled_addr, true);
    visit_type_InetSocketAddress(iv, NULL, &inet, &local_error);
    if (local_error) {
        error_propagate(errp, local_error);
@@ -679,7 +673,7 @@ static int connect_to_ssh(BDRVSSHState *s, QDict *options,
    }

    /* Open the socket and connect. */
-    s->sock = inet_connect_saddr(s->inet, NULL, NULL, errp);
+    s->sock = inet_connect_saddr(s->inet, errp, NULL, NULL);
    if (s->sock < 0) {
        ret = -EIO;
        goto err;
@@ -895,14 +889,10 @@ static void restart_coroutine(void *opaque)

    DPRINTF("co=%p", co);

-    aio_co_wake(co);
+    qemu_coroutine_enter(co);
 }

-/* A non-blocking call returned EAGAIN, so yield, ensuring the
- * handlers are set up so that we'll be rescheduled when there is an
- * interesting event on the socket.
- */
-static coroutine_fn void co_yield(BDRVSSHState *s, BlockDriverState *bs)
+static coroutine_fn void set_fd_handler(BDRVSSHState *s, BlockDriverState *bs)
 {
    int r;
    IOHandler *rd_handler = NULL, *wr_handler = NULL;
@@ -921,11 +911,26 @@ static coroutine_fn void co_yield(BDRVSSHState *s, BlockDriverState *bs)
            rd_handler, wr_handler);

    aio_set_fd_handler(bdrv_get_aio_context(bs), s->sock,
-                       false, rd_handler, wr_handler, NULL, co);
+                       false, rd_handler, wr_handler, co);
+}
+
+static coroutine_fn void clear_fd_handler(BDRVSSHState *s,
+                                          BlockDriverState *bs)
+{
+    DPRINTF("s->sock=%d", s->sock);
+    aio_set_fd_handler(bdrv_get_aio_context(bs), s->sock,
+                       false, NULL, NULL, NULL);
+}
+
+/* A non-blocking call returned EAGAIN, so yield, ensuring the
+ * handlers are set up so that we'll be rescheduled when there is an
+ * interesting event on the socket.
+ */
+static coroutine_fn void co_yield(BDRVSSHState *s, BlockDriverState *bs)
+{
+    set_fd_handler(s, bs);
    qemu_coroutine_yield();
-    DPRINTF("s->sock=%d - back", s->sock);
-    aio_set_fd_handler(bdrv_get_aio_context(bs), s->sock, false,
-                       NULL, NULL, NULL, NULL);
+    clear_fd_handler(s, bs);
 }

 /* SFTP has a function `libssh2_sftp_seek64' which seeks to a position
--- a/block/stream.c
+++ b/block/stream.c
@@ -68,7 +68,6 @@ static void stream_complete(BlockJob *job, void *opaque)
    StreamCompleteData *data = opaque;
    BlockDriverState *bs = blk_bs(job->blk);
    BlockDriverState *base = s->base;
-    Error *local_err = NULL;

    if (!block_job_is_cancelled(&s->common) && data->reached_end &&
        data->ret == 0) {
@@ -80,19 +79,11 @@ static void stream_complete(BlockJob *job, void *opaque)
            }
        }
        data->ret = bdrv_change_backing_file(bs, base_id, base_fmt);
-        bdrv_set_backing_hd(bs, base, &local_err);
-        if (local_err) {
-            error_report_err(local_err);
-            data->ret = -EPERM;
-            goto out;
-        }
+        bdrv_set_backing_hd(bs, base);
    }

-out:
    /* Reopen the image back in read-only mode if necessary */
    if (s->bs_flags != bdrv_get_flags(bs)) {
-        /* Give up write permissions before making it read-only */
-        blk_set_perm(job->blk, 0, BLK_PERM_ALL, &error_abort);
        bdrv_reopen(bs, s->bs_flags, NULL);
    }

@@ -238,35 +229,25 @@ void stream_start(const char *job_id, BlockDriverState *bs,
    BlockDriverState *iter;
    int orig_bs_flags;

+    s = block_job_create(job_id, &stream_job_driver, bs, speed,
+                         BLOCK_JOB_DEFAULT, NULL, NULL, errp);
+    if (!s) {
+        return;
+    }
+
    /* Make sure that the image is opened in read-write mode */
    orig_bs_flags = bdrv_get_flags(bs);
    if (!(orig_bs_flags & BDRV_O_RDWR)) {
        if (bdrv_reopen(bs, orig_bs_flags | BDRV_O_RDWR, errp) != 0) {
+            block_job_unref(&s->common);
            return;
        }
    }

-    /* Prevent concurrent jobs trying to modify the graph structure here, we
-     * already have our own plans. Also don't allow resize as the image size is
-     * queried only at the job start and then cached. */
-    s = block_job_create(job_id, &stream_job_driver, bs,
-                         BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED |
-                         BLK_PERM_GRAPH_MOD,
-                         BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED |
-                         BLK_PERM_WRITE,
-                         speed, BLOCK_JOB_DEFAULT, NULL, NULL, errp);
-    if (!s) {
-        goto fail;
-    }
-
-    /* Block all intermediate nodes between bs and base, because they will
-     * disappear from the chain after this operation. The streaming job reads
-     * every block only once, assuming that it doesn't change, so block writes
-     * and resizes. */
+    /* Block all intermediate nodes between bs and base, because they
+     * will disappear from the chain after this operation */
    for (iter = backing_bs(bs); iter && iter != base; iter = backing_bs(iter)) {
-        block_job_add_bdrv(&s->common, "intermediate node", iter, 0,
-                           BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED,
-                           &error_abort);
+        block_job_add_bdrv(&s->common, iter);
    }

    s->base = base;
@@ -276,10 +257,4 @@ void stream_start(const char *job_id, BlockDriverState *bs,
    s->on_error = on_error;
    trace_stream_start(bs, base, s);
    block_job_start(&s->common);
-    return;
-
-fail:
-    if (orig_bs_flags != bdrv_get_flags(bs)) {
-        bdrv_reopen(bs, orig_bs_flags, NULL);
-    }
 }
--- a/block/throttle-groups.c
+++ b/block/throttle-groups.c
@@ -240,7 +240,7 @@ static bool throttle_group_schedule_timer(BlockBackend *blk, bool is_write)
    ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts);
    bool must_wait;

-    if (atomic_read(&blkp->io_limits_disabled)) {
+    if (blkp->io_limits_disabled) {
        return false;
    }

@@ -260,25 +260,6 @@ static bool throttle_group_schedule_timer(BlockBackend *blk, bool is_write)
    return must_wait;
 }

-/* Start the next pending I/O request for a BlockBackend.  Return whether
- * any request was actually pending.
- *
- * @blk:       the current BlockBackend
- * @is_write:  the type of operation (read/write)
- */
-static bool coroutine_fn throttle_group_co_restart_queue(BlockBackend *blk,
-                                                         bool is_write)
-{
-    BlockBackendPublic *blkp = blk_get_public(blk);
-    bool ret;
-
-    qemu_co_mutex_lock(&blkp->throttled_reqs_lock);
-    ret = qemu_co_queue_next(&blkp->throttled_reqs[is_write]);
-    qemu_co_mutex_unlock(&blkp->throttled_reqs_lock);
-
-    return ret;
-}
-
 /* Look for the next pending I/O request and schedule it.
 *
 * This assumes that tg->lock is held.
@@ -306,12 +287,12 @@ static void schedule_next_request(BlockBackend *blk, bool is_write)
    if (!must_wait) {
        /* Give preference to requests from the current blk */
        if (qemu_in_coroutine() &&
-            throttle_group_co_restart_queue(blk, is_write)) {
+            qemu_co_queue_next(&blkp->throttled_reqs[is_write])) {
            token = blk;
        } else {
            ThrottleTimers *tt = &blk_get_public(token)->throttle_timers;
            int64_t now = qemu_clock_get_ns(tt->clock_type);
-            timer_mod(tt->timers[is_write], now);
+            timer_mod(tt->timers[is_write], now + 1);
            tg->any_timer_armed[is_write] = true;
        }
        tg->tokens[is_write] = token;
@@ -345,10 +326,7 @@ void coroutine_fn throttle_group_co_io_limits_intercept(BlockBackend *blk,
    if (must_wait || blkp->pending_reqs[is_write]) {
        blkp->pending_reqs[is_write]++;
        qemu_mutex_unlock(&tg->lock);
-        qemu_co_mutex_lock(&blkp->throttled_reqs_lock);
-        qemu_co_queue_wait(&blkp->throttled_reqs[is_write],
-                           &blkp->throttled_reqs_lock);
-        qemu_co_mutex_unlock(&blkp->throttled_reqs_lock);
+        qemu_co_queue_wait(&blkp->throttled_reqs[is_write]);
        qemu_mutex_lock(&tg->lock);
        blkp->pending_reqs[is_write]--;
    }
@@ -362,50 +340,15 @@ void coroutine_fn throttle_group_co_io_limits_intercept(BlockBackend *blk,
    qemu_mutex_unlock(&tg->lock);
 }

-typedef struct {
-    BlockBackend *blk;
-    bool is_write;
-} RestartData;
-
-static void coroutine_fn throttle_group_restart_queue_entry(void *opaque)
-{
-    RestartData *data = opaque;
-    BlockBackend *blk = data->blk;
-    bool is_write = data->is_write;
-    BlockBackendPublic *blkp = blk_get_public(blk);
-    ThrottleGroup *tg = container_of(blkp->throttle_state, ThrottleGroup, ts);
-    bool empty_queue;
-
-    empty_queue = !throttle_group_co_restart_queue(blk, is_write);
-
-    /* If the request queue was empty then we have to take care of
-     * scheduling the next one */
-    if (empty_queue) {
-        qemu_mutex_lock(&tg->lock);
-        schedule_next_request(blk, is_write);
-        qemu_mutex_unlock(&tg->lock);
-    }
-}
-
-static void throttle_group_restart_queue(BlockBackend *blk, bool is_write)
-{
-    Coroutine *co;
-    RestartData rd = {
-        .blk = blk,
-        .is_write = is_write
-    };
-
-    co = qemu_coroutine_create(throttle_group_restart_queue_entry, &rd);
-    aio_co_enter(blk_get_aio_context(blk), co);
-}
-
 void throttle_group_restart_blk(BlockBackend *blk)
 {
    BlockBackendPublic *blkp = blk_get_public(blk);
+    int i;

-    if (blkp->throttle_state) {
-        throttle_group_restart_queue(blk, 0);
-        throttle_group_restart_queue(blk, 1);
+    for (i = 0; i < 2; i++) {
+        while (qemu_co_enter_next(&blkp->throttled_reqs[i])) {
+            ;
+        }
    }
 }

@@ -433,7 +376,8 @@ void throttle_group_config(BlockBackend *blk, ThrottleConfig *cfg)
    throttle_config(ts, tt, cfg);
    qemu_mutex_unlock(&tg->lock);

-    throttle_group_restart_blk(blk);
+    qemu_co_enter_next(&blkp->throttled_reqs[0]);
+    qemu_co_enter_next(&blkp->throttled_reqs[1]);
 }

 /* Get the throttle configuration from a particular group. Similar to
@@ -464,6 +408,7 @@ static void timer_cb(BlockBackend *blk, bool is_write)
    BlockBackendPublic *blkp = blk_get_public(blk);
    ThrottleState *ts = blkp->throttle_state;
    ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts);
+    bool empty_queue;

    /* The timer has just been fired, so we can update the flag */
    qemu_mutex_lock(&tg->lock);
@@ -471,7 +416,15 @@ static void timer_cb(BlockBackend *blk, bool is_write)
    qemu_mutex_unlock(&tg->lock);

    /* Run the request that was waiting for this timer */
-    throttle_group_restart_queue(blk, is_write);
+    empty_queue = !qemu_co_enter_next(&blkp->throttled_reqs[is_write]);
+
+    /* If the request queue was empty then we have to take care of
+     * scheduling the next one */
+    if (empty_queue) {
+        qemu_mutex_lock(&tg->lock);
+        schedule_next_request(blk, is_write);
+        qemu_mutex_unlock(&tg->lock);
+    }
 }

 static void read_timer_cb(void *opaque)
--- a/block/trace-events
+++ b/block/trace-events
@@ -35,6 +35,8 @@ mirror_one_iteration(void *s, int64_t sector_num, int nb_sectors) "s %p sector_n
 mirror_iteration_done(void *s, int64_t sector_num, int nb_sectors, int ret) "s %p sector_num %"PRId64" nb_sectors %d ret %d"
 mirror_yield(void *s, int64_t cnt, int buf_free_count, int in_flight) "s %p dirty count %"PRId64" free buffers %d in_flight %d"
 mirror_yield_in_flight(void *s, int64_t sector_num, int in_flight) "s %p sector_num %"PRId64" in_flight %d"
+mirror_yield_buf_busy(void *s, int nb_chunks, int in_flight) "s %p requested chunks %d in_flight %d"
+mirror_break_buf_busy(void *s, int nb_chunks, int in_flight) "s %p requested chunks %d in_flight %d"

 # block/backup.c
 backup_do_cow_enter(void *job, int64_t start, int64_t sector_num, int nb_sectors) "job %p start %"PRId64" sector_num %"PRId64" nb_sectors %d"
@@ -51,8 +53,8 @@ qmp_block_job_resume(void *job) "job %p"
 qmp_block_job_complete(void *job) "job %p"
 qmp_block_stream(void *bs, void *job) "bs %p job %p"

-# block/file-win32.c
-# block/file-posix.c
+# block/raw-win32.c
+# block/raw-posix.c
 paio_submit_co(int64_t offset, int count, int type) "offset %"PRId64" count %d type %d"
 paio_submit(void *acb, void *opaque, int64_t offset, int count, int type) "acb %p opaque %p offset %"PRId64" count %d type %d"

@@ -110,20 +112,3 @@ qed_aio_write_data(void *s, void *acb, int ret, uint64_t offset, size_t len) "s
 qed_aio_write_prefill(void *s, void *acb, uint64_t start, size_t len, uint64_t offset) "s %p acb %p start %"PRIu64" len %zu offset %"PRIu64
 qed_aio_write_postfill(void *s, void *acb, uint64_t start, size_t len, uint64_t offset) "s %p acb %p start %"PRIu64" len %zu offset %"PRIu64
 qed_aio_write_main(void *s, void *acb, int ret, uint64_t offset, size_t len) "s %p acb %p ret %d offset %"PRIu64" len %zu"
-
-# block/vxhs.c
-vxhs_iio_callback(int error) "ctx is NULL: error %d"
-vxhs_iio_callback_chnfail(int err, int error) "QNIO channel failed, no i/o %d, %d"
-vxhs_iio_callback_unknwn(int opcode, int err) "unexpected opcode %d, errno %d"
-vxhs_aio_rw_invalid(int req) "Invalid I/O request iodir %d"
-vxhs_aio_rw_ioerr(char *guid, int iodir, uint64_t size, uint64_t off, void *acb, int ret, int err) "IO ERROR (vDisk %s) FOR : Read/Write = %d size = %"PRIu64" offset = %"PRIu64" ACB = %p. Error = %d, errno = %d"
-vxhs_get_vdisk_stat_err(char *guid, int ret, int err) "vDisk (%s) stat ioctl failed, ret = %d, errno = %d"
-vxhs_get_vdisk_stat(char *vdisk_guid, uint64_t vdisk_size) "vDisk %s stat ioctl returned size %"PRIu64
-vxhs_complete_aio(void *acb, uint64_t ret) "aio failed acb %p ret %"PRIu64
-vxhs_parse_uri_filename(const char *filename) "URI passed via bdrv_parse_filename %s"
-vxhs_open_vdiskid(const char *vdisk_id) "Opening vdisk-id %s"
-vxhs_open_hostinfo(char *of_vsa_addr, int port) "Adding host %s:%d to BDRVVXHSState"
-vxhs_open_iio_open(const char *host) "Failed to connect to storage agent on host %s"
-vxhs_parse_uri_hostinfo(char *host, int port) "Host: IP %s, Port %d"
-vxhs_close(char *vdisk_guid) "Closing vdisk %s"
-vxhs_get_creds(const char *cacert, const char *client_key, const char *client_cert) "cacert %s, client_key %s, client_cert %s"
--- a/block/vdi.c
+++ b/block/vdi.c
@@ -55,7 +55,7 @@
 #include "sysemu/block-backend.h"
 #include "qemu/module.h"
 #include "qemu/bswap.h"
-#include "migration/blocker.h"
+#include "migration/migration.h"
 #include "qemu/coroutine.h"
 #include "qemu/cutils.h"
 #include "qemu/uuid.h"
@@ -361,13 +361,6 @@ static int vdi_open(BlockDriverState *bs, QDict *options, int flags,
    VdiHeader header;
    size_t bmap_size;
    int ret;
-    Error *local_err = NULL;
-
-    bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
-                               false, errp);
-    if (!bs->file) {
-        return -EINVAL;
-    }

    logout("\n");

@@ -478,12 +471,7 @@ static int vdi_open(BlockDriverState *bs, QDict *options, int flags,
    error_setg(&s->migration_blocker, "The vdi format used by node '%s' "
               "does not support live migration",
               bdrv_get_device_or_node_name(bs));
-    ret = migrate_add_blocker(s->migration_blocker, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        error_free(s->migration_blocker);
-        goto fail_free_bmap;
-    }
+    migrate_add_blocker(s->migration_blocker);

    qemu_co_mutex_init(&s->write_lock);

@@ -763,8 +751,7 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp)
    }

    blk = blk_new_open(filename, NULL, NULL,
-                       BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
-                       &local_err);
+                       BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
    if (blk == NULL) {
        error_propagate(errp, local_err);
        ret = -EIO;
@@ -832,9 +819,9 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp)
    }

    if (image_type == VDI_TYPE_STATIC) {
-        ret = blk_truncate(blk, offset + blocks * block_size, errp);
+        ret = blk_truncate(blk, offset + blocks * block_size);
        if (ret < 0) {
-            error_prepend(errp, "Failed to statically allocate %s", filename);
+            error_setg(errp, "Failed to statically allocate %s", filename);
            goto exit;
        }
    }
@@ -892,7 +879,6 @@ static BlockDriver bdrv_vdi = {
    .bdrv_open = vdi_open,
    .bdrv_close = vdi_close,
    .bdrv_reopen_prepare = vdi_reopen_prepare,
-    .bdrv_child_perm          = bdrv_format_default_perms,
    .bdrv_create = vdi_create,
    .bdrv_has_zero_init = bdrv_has_zero_init_1,
    .bdrv_co_get_block_status = vdi_co_get_block_status,
--- a/block/vhdx-log.c
+++ b/block/vhdx-log.c
@@ -548,7 +548,7 @@ static int vhdx_log_flush(BlockDriverState *bs, BDRVVHDXState *s,
            if (new_file_size % (1024*1024)) {
                /* round up to nearest 1MB boundary */
                new_file_size = ((new_file_size >> 20) + 1) << 20;
-                bdrv_truncate(bs->file, new_file_size, NULL);
+                bdrv_truncate(bs->file->bs, new_file_size);
            }
        }
        qemu_vfree(desc_entries);
--- a/block/vhdx.c
+++ b/block/vhdx.c
@@ -24,7 +24,7 @@
 #include "qemu/crc32c.h"
 #include "qemu/bswap.h"
 #include "block/vhdx.h"
-#include "migration/blocker.h"
+#include "migration/migration.h"
 #include "qemu/uuid.h"

 /* Options for VHDX creation */
@@ -898,12 +898,6 @@ static int vhdx_open(BlockDriverState *bs, QDict *options, int flags,
    uint64_t signature;
    Error *local_err = NULL;

-    bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
-                               false, errp);
-    if (!bs->file) {
-        return -EINVAL;
-    }
-
    s->bat = NULL;
    s->first_visible_write = true;

@@ -997,17 +991,6 @@ static int vhdx_open(BlockDriverState *bs, QDict *options, int flags,
        }
    }

-    /* Disable migration when VHDX images are used */
-    error_setg(&s->migration_blocker, "The vhdx format used by node '%s' "
-               "does not support live migration",
-               bdrv_get_device_or_node_name(bs));
-    ret = migrate_add_blocker(s->migration_blocker, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        error_free(s->migration_blocker);
-        goto fail;
-    }
-
    if (flags & BDRV_O_RDWR) {
        ret = vhdx_update_headers(bs, s, false, NULL);
        if (ret < 0) {
@@ -1017,6 +1000,12 @@ static int vhdx_open(BlockDriverState *bs, QDict *options, int flags,

    /* TODO: differencing files */

+    /* Disable migration when VHDX images are used */
+    error_setg(&s->migration_blocker, "The vhdx format used by node '%s' "
+               "does not support live migration",
+               bdrv_get_device_or_node_name(bs));
+    migrate_add_blocker(s->migration_blocker);
+
    return 0;
 fail:
    vhdx_close(bs);
@@ -1171,7 +1160,7 @@ static int vhdx_allocate_block(BlockDriverState *bs, BDRVVHDXState *s,
    /* per the spec, the address for a block is in units of 1MB */
    *new_offset = ROUND_UP(*new_offset, 1024 * 1024);

-    return bdrv_truncate(bs->file, *new_offset + s->block_size, NULL);
+    return bdrv_truncate(bs->file->bs, *new_offset + s->block_size);
 }

 /*
@@ -1586,7 +1575,7 @@ exit:
 static int vhdx_create_bat(BlockBackend *blk, BDRVVHDXState *s,
                           uint64_t image_size, VHDXImageType type,
                           bool use_zero_blocks, uint64_t file_offset,
-                           uint32_t length, Error **errp)
+                           uint32_t length)
 {
    int ret = 0;
    uint64_t data_file_offset;
@@ -1607,17 +1596,16 @@ static int vhdx_create_bat(BlockBackend *blk, BDRVVHDXState *s,
    if (type == VHDX_TYPE_DYNAMIC) {
        /* All zeroes, so we can just extend the file - the end of the BAT
         * is the furthest thing we have written yet */
-        ret = blk_truncate(blk, data_file_offset, errp);
+        ret = blk_truncate(blk, data_file_offset);
        if (ret < 0) {
            goto exit;
        }
    } else if (type == VHDX_TYPE_FIXED) {
-        ret = blk_truncate(blk, data_file_offset + image_size, errp);
+        ret = blk_truncate(blk, data_file_offset + image_size);
        if (ret < 0) {
            goto exit;
        }
    } else {
-        error_setg(errp, "Unsupported image type");
        ret = -ENOTSUP;
        goto exit;
    }
@@ -1628,7 +1616,6 @@ static int vhdx_create_bat(BlockBackend *blk, BDRVVHDXState *s,
        /* for a fixed file, the default BAT entry is not zero */
        s->bat = g_try_malloc0(length);
        if (length && s->bat == NULL) {
-            error_setg(errp, "Failed to allocate memory for the BAT");
            ret = -ENOMEM;
            goto exit;
        }
@@ -1648,7 +1635,6 @@ static int vhdx_create_bat(BlockBackend *blk, BDRVVHDXState *s,
        }
        ret = blk_pwrite(blk, file_offset, s->bat, length, 0);
        if (ret < 0) {
-            error_setg_errno(errp, -ret, "Failed to write the BAT");
            goto exit;
        }
    }
@@ -1674,8 +1660,7 @@ static int vhdx_create_new_region_table(BlockBackend *blk,
                                        uint32_t log_size,
                                        bool use_zero_blocks,
                                        VHDXImageType type,
-                                        uint64_t *metadata_offset,
-                                        Error **errp)
+                                        uint64_t *metadata_offset)
 {
    int ret = 0;
    uint32_t offset = 0;
@@ -1744,7 +1729,7 @@ static int vhdx_create_new_region_table(BlockBackend *blk,
    /* The region table gives us the data we need to create the BAT,
     * so do that now */
    ret = vhdx_create_bat(blk, s, image_size, type, use_zero_blocks,
-                          bat_file_offset, bat_length, errp);
+                          bat_file_offset, bat_length);
    if (ret < 0) {
        goto exit;
    }
@@ -1753,14 +1738,12 @@ static int vhdx_create_new_region_table(BlockBackend *blk,
    ret = blk_pwrite(blk, VHDX_REGION_TABLE_OFFSET, buffer,
                     VHDX_HEADER_BLOCK_SIZE, 0);
    if (ret < 0) {
-        error_setg_errno(errp, -ret, "Failed to write first region table");
        goto exit;
    }

    ret = blk_pwrite(blk, VHDX_REGION_TABLE2_OFFSET, buffer,
                     VHDX_HEADER_BLOCK_SIZE, 0);
    if (ret < 0) {
-        error_setg_errno(errp, -ret, "Failed to write second region table");
        goto exit;
    }

@@ -1831,7 +1814,6 @@ static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp)
        ret = -ENOTSUP;
        goto exit;
    } else {
-        error_setg(errp, "Invalid subformat '%s'", type);
        ret = -EINVAL;
        goto exit;
    }
@@ -1866,8 +1848,7 @@ static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp)
    }

    blk = blk_new_open(filename, NULL, NULL,
-                       BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
-                       &local_err);
+                       BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
    if (blk == NULL) {
        error_propagate(errp, local_err);
        ret = -EIO;
@@ -1886,14 +1867,12 @@ static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp)
    ret = blk_pwrite(blk, VHDX_FILE_ID_OFFSET, &signature, sizeof(signature),
                     0);
    if (ret < 0) {
-        error_setg_errno(errp, -ret, "Failed to write file signature");
        goto delete_and_exit;
    }
    if (creator) {
        ret = blk_pwrite(blk, VHDX_FILE_ID_OFFSET + sizeof(signature),
                         creator, creator_items * sizeof(gunichar2), 0);
        if (ret < 0) {
-            error_setg_errno(errp, -ret, "Failed to write creator field");
            goto delete_and_exit;
        }
    }
@@ -1902,14 +1881,13 @@ static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp)
    /* Creates (B),(C) */
    ret = vhdx_create_new_headers(blk, image_size, log_size);
    if (ret < 0) {
-        error_setg_errno(errp, -ret, "Failed to write image headers");
        goto delete_and_exit;
    }

    /* Creates (D),(E),(G) explicitly. (F) created as by-product */
    ret = vhdx_create_new_region_table(blk, image_size, block_size, 512,
                                       log_size, use_zero_blocks, image_type,
-                                       &metadata_offset, errp);
+                                       &metadata_offset);
    if (ret < 0) {
        goto delete_and_exit;
    }
@@ -1918,7 +1896,6 @@ static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp)
    ret = vhdx_create_new_metadata(blk, image_size, block_size, 512,
                                   metadata_offset, image_type);
    if (ret < 0) {
-        error_setg_errno(errp, -ret, "Failed to initialize metadata");
        goto delete_and_exit;
    }

@@ -1995,7 +1972,6 @@ static BlockDriver bdrv_vhdx = {
    .bdrv_open              = vhdx_open,
    .bdrv_close             = vhdx_close,
    .bdrv_reopen_prepare    = vhdx_reopen_prepare,
-    .bdrv_child_perm        = bdrv_format_default_perms,
    .bdrv_co_readv          = vhdx_co_readv,
    .bdrv_co_writev         = vhdx_co_writev,
    .bdrv_create            = vhdx_create,
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -31,7 +31,7 @@
 #include "qemu/error-report.h"
 #include "qemu/module.h"
 #include "qemu/bswap.h"
-#include "migration/blocker.h"
+#include "migration/migration.h"
 #include "qemu/cutils.h"
 #include <zlib.h>

@@ -941,13 +941,6 @@ static int vmdk_open(BlockDriverState *bs, QDict *options, int flags,
    int ret;
    BDRVVmdkState *s = bs->opaque;
    uint32_t magic;
-    Error *local_err = NULL;
-
-    bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
-                               false, errp);
-    if (!bs->file) {
-        return -EINVAL;
-    }

    buf = vmdk_read_desc(bs->file, 0, errp);
    if (!buf) {
@@ -983,13 +976,7 @@ static int vmdk_open(BlockDriverState *bs, QDict *options, int flags,
    error_setg(&s->migration_blocker, "The vmdk format used by node '%s' "
               "does not support live migration",
               bdrv_get_device_or_node_name(bs));
-    ret = migrate_add_blocker(s->migration_blocker, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        error_free(s->migration_blocker);
-        goto fail;
-    }
-
+    migrate_add_blocker(s->migration_blocker);
    g_free(buf);
    return 0;

@@ -1703,8 +1690,7 @@ static int vmdk_create_extent(const char *filename, int64_t filesize,
    }

    blk = blk_new_open(filename, NULL, NULL,
-                       BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
-                       &local_err);
+                       BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
    if (blk == NULL) {
        error_propagate(errp, local_err);
        ret = -EIO;
@@ -1714,7 +1700,10 @@ static int vmdk_create_extent(const char *filename, int64_t filesize,
    blk_set_allow_write_beyond_eof(blk, true);

    if (flat) {
-        ret = blk_truncate(blk, filesize, errp);
+        ret = blk_truncate(blk, filesize);
+        if (ret < 0) {
+            error_setg_errno(errp, -ret, "Could not truncate file");
+        }
        goto exit;
    }
    magic = cpu_to_be32(VMDK4_MAGIC);
@@ -1777,8 +1766,9 @@ static int vmdk_create_extent(const char *filename, int64_t filesize,
        goto exit;
    }

-    ret = blk_truncate(blk, le64_to_cpu(header.grain_offset) << 9, errp);
+    ret = blk_truncate(blk, le64_to_cpu(header.grain_offset) << 9);
    if (ret < 0) {
+        error_setg_errno(errp, -ret, "Could not truncate file");
        goto exit;
    }

@@ -2068,8 +2058,7 @@ static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp)
    }

    new_blk = blk_new_open(filename, NULL, NULL,
-                           BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
-                           &local_err);
+                           BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
    if (new_blk == NULL) {
        error_propagate(errp, local_err);
        ret = -EIO;
@@ -2086,7 +2075,10 @@ static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp)
    /* bdrv_pwrite write padding zeros to align to sector, we don't need that
     * for description file */
    if (desc_offset == 0) {
-        ret = blk_truncate(new_blk, desc_len, errp);
+        ret = blk_truncate(new_blk, desc_len);
+        if (ret < 0) {
+            error_setg_errno(errp, -ret, "Could not truncate file");
+        }
    }
 exit:
    if (new_blk) {
@@ -2354,7 +2346,6 @@ static BlockDriver bdrv_vmdk = {
    .bdrv_open                    = vmdk_open,
    .bdrv_check                   = vmdk_check,
    .bdrv_reopen_prepare          = vmdk_reopen_prepare,
-    .bdrv_child_perm              = bdrv_format_default_perms,
    .bdrv_co_preadv               = vmdk_co_preadv,
    .bdrv_co_pwritev              = vmdk_co_pwritev,
    .bdrv_co_pwritev_compressed   = vmdk_co_pwritev_compressed,
--- a/block/vpc.c
+++ b/block/vpc.c
@@ -28,7 +28,7 @@
 #include "block/block_int.h"
 #include "sysemu/block-backend.h"
 #include "qemu/module.h"
-#include "migration/blocker.h"
+#include "migration/migration.h"
 #include "qemu/bswap.h"
 #include "qemu/uuid.h"

@@ -220,12 +220,6 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
    int disk_type = VHD_DYNAMIC;
    int ret;

-    bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
-                               false, errp);
-    if (!bs->file) {
-        return -EINVAL;
-    }
-
    opts = qemu_opts_create(&vpc_runtime_opts, NULL, 0, &error_abort);
    qemu_opts_absorb_qdict(opts, options, &local_err);
    if (local_err) {
@@ -428,18 +422,13 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
 #endif
    }

+    qemu_co_mutex_init(&s->lock);
+
    /* Disable migration when VHD images are used */
    error_setg(&s->migration_blocker, "The vpc format used by node '%s' "
               "does not support live migration",
               bdrv_get_device_or_node_name(bs));
-    ret = migrate_add_blocker(s->migration_blocker, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        error_free(s->migration_blocker);
-        goto fail;
-    }
-
-    qemu_co_mutex_init(&s->lock);
+    migrate_add_blocker(s->migration_blocker);

    return 0;

@@ -851,21 +840,20 @@ static int create_dynamic_disk(BlockBackend *blk, uint8_t *buf,
 }

 static int create_fixed_disk(BlockBackend *blk, uint8_t *buf,
-                             int64_t total_size, Error **errp)
+                             int64_t total_size)
 {
    int ret;

    /* Add footer to total size */
    total_size += HEADER_SIZE;

-    ret = blk_truncate(blk, total_size, errp);
+    ret = blk_truncate(blk, total_size);
    if (ret < 0) {
        return ret;
    }

    ret = blk_pwrite(blk, total_size - HEADER_SIZE, buf, HEADER_SIZE, 0);
    if (ret < 0) {
-        error_setg_errno(errp, -ret, "Unable to write VHD header");
        return ret;
    }

@@ -916,8 +904,7 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp)
    }

    blk = blk_new_open(filename, NULL, NULL,
-                       BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
-                       &local_err);
+                       BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
    if (blk == NULL) {
        error_propagate(errp, local_err);
        ret = -EIO;
@@ -997,11 +984,11 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp)

    if (disk_type == VHD_DYNAMIC) {
        ret = create_dynamic_disk(blk, buf, total_sectors);
-        if (ret < 0) {
-            error_setg(errp, "Unable to create or write VHD header");
-        }
    } else {
-        ret = create_fixed_disk(blk, buf, total_size, errp);
+        ret = create_fixed_disk(blk, buf, total_size);
+    }
+    if (ret < 0) {
+        error_setg(errp, "Unable to create or write VHD header");
    }

 out:
@@ -1069,7 +1056,6 @@ static BlockDriver bdrv_vpc = {
    .bdrv_open              = vpc_open,
    .bdrv_close             = vpc_close,
    .bdrv_reopen_prepare    = vpc_reopen_prepare,
-    .bdrv_child_perm        = bdrv_format_default_perms,
    .bdrv_create            = vpc_create,

    .bdrv_co_preadv             = vpc_co_preadv,
--- a/block/vvfat.c
+++ b/block/vvfat.c
@@ -28,7 +28,7 @@
 #include "block/block_int.h"
 #include "qemu/module.h"
 #include "qemu/bswap.h"
-#include "migration/blocker.h"
+#include "migration/migration.h"
 #include "qapi/qmp/qint.h"
 #include "qapi/qmp/qbool.h"
 #include "qapi/qmp/qstring.h"
@@ -1057,10 +1057,10 @@ static void vvfat_parse_filename(const char *filename, QDict *options,
    }

    /* Fill in the options QDict */
-    qdict_put_str(options, "dir", filename);
-    qdict_put_int(options, "fat-type", fat_type);
-    qdict_put_bool(options, "floppy", floppy);
-    qdict_put_bool(options, "rw", rw);
+    qdict_put(options, "dir", qstring_from_str(filename));
+    qdict_put(options, "fat-type", qint_from_int(fat_type));
+    qdict_put(options, "floppy", qbool_from_bool(floppy));
+    qdict_put(options, "rw", qbool_from_bool(rw));
 }

 static int vvfat_open(BlockDriverState *bs, QDict *options, int flags,
@@ -1156,6 +1156,8 @@ static int vvfat_open(BlockDriverState *bs, QDict *options, int flags,

    s->current_cluster=0xffffffff;

+    /* read only is the default for safety */
+    bs->read_only = true;
    s->qcow = NULL;
    s->qcow_filename = NULL;
    s->fat2 = NULL;
@@ -1167,24 +1169,11 @@ static int vvfat_open(BlockDriverState *bs, QDict *options, int flags,
    s->sector_count = cyls * heads * secs - (s->first_sectors_number - 1);

    if (qemu_opt_get_bool(opts, "rw", false)) {
-        if (!bdrv_is_read_only(bs)) {
-            ret = enable_write_target(bs, errp);
-            if (ret < 0) {
-                goto fail;
-            }
-        } else {
-            ret = -EPERM;
-            error_setg(errp,
-                       "Unable to set VVFAT to 'rw' when drive is read-only");
-            goto fail;
-        }
-    } else  {
-        /* read only is the default for safety */
-        ret = bdrv_set_read_only(bs, true, &local_err);
+        ret = enable_write_target(bs, errp);
        if (ret < 0) {
-            error_propagate(errp, local_err);
            goto fail;
        }
+        bs->read_only = false;
    }

    bs->total_sectors = cyls * heads * secs;
@@ -1196,26 +1185,22 @@ static int vvfat_open(BlockDriverState *bs, QDict *options, int flags,

    s->sector_count = s->faked_sectors + s->sectors_per_cluster*s->cluster_count;

+    if (s->first_sectors_number == 0x40) {
+        init_mbr(s, cyls, heads, secs);
+    }
+
+    //    assert(is_consistent(s));
+    qemu_co_mutex_init(&s->lock);
+
    /* Disable migration when vvfat is used rw */
    if (s->qcow) {
        error_setg(&s->migration_blocker,
                   "The vvfat (rw) format used by node '%s' "
                   "does not support live migration",
                   bdrv_get_device_or_node_name(bs));
-        ret = migrate_add_blocker(s->migration_blocker, &local_err);
-        if (local_err) {
-            error_propagate(errp, local_err);
-            error_free(s->migration_blocker);
-            goto fail;
-        }
+        migrate_add_blocker(s->migration_blocker);
    }

-    if (s->first_sectors_number == 0x40) {
-        init_mbr(s, cyls, heads, secs);
-    }
-
-    qemu_co_mutex_init(&s->lock);
-
    ret = 0;
 fail:
    qemu_opts_del(opts);
@@ -1405,13 +1390,7 @@ static int vvfat_read(BlockDriverState *bs, int64_t sector_num,
 	   return -1;
 	if (s->qcow) {
 	    int n;
-            int ret;
-            ret = bdrv_is_allocated(s->qcow->bs, sector_num,
-                                    nb_sectors - i, &n);
-            if (ret < 0) {
-                return ret;
-            }
-            if (ret) {
+            if (bdrv_is_allocated(s->qcow->bs, sector_num, nb_sectors-i, &n)) {
                DLOG(fprintf(stderr, "sectors %d+%d allocated\n",
                             (int)sector_num, n));
                if (bdrv_read(s->qcow, sector_num, buf + i * 0x200, n)) {
@@ -1685,8 +1664,7 @@ static inline uint32_t modified_fat_get(BDRVVVFATState* s,
    }
 }

-static inline bool cluster_was_modified(BDRVVVFATState *s,
-                                        uint32_t cluster_num)
+static inline int cluster_was_modified(BDRVVVFATState* s, uint32_t cluster_num)
 {
    int was_modified = 0;
    int i, dummy;
@@ -1701,13 +1679,7 @@ static inline bool cluster_was_modified(BDRVVVFATState *s,
                                         1, &dummy);
    }

-    /*
-     * Note that this treats failures to learn allocation status the
-     * same as if an allocation has occurred.  It's as safe as
-     * anything else, given that a failure to learn allocation status
-     * will probably result in more failures.
-     */
-    return !!was_modified;
+    return was_modified;
 }

 static const char* get_basename(const char* path)
@@ -1857,9 +1829,6 @@ static uint32_t get_cluster_count_for_direntry(BDRVVVFATState* s,
                    int res;

                    res = bdrv_is_allocated(s->qcow->bs, offset + i, 1, &dummy);
-                    if (res < 0) {
-                        return -1;
-                    }
                    if (!res) {
                        res = vvfat_read(s->bs, offset, s->cluster_buffer, 1);
                        if (res) {
@@ -2995,7 +2964,6 @@ static void write_target_close(BlockDriverState *bs) {

 static BlockDriver vvfat_write_target = {
    .format_name        = "vvfat_write_target",
-    .instance_size      = sizeof(void*),
    .bdrv_co_pwritev    = write_target_commit,
    .bdrv_close         = write_target_close,
 };
@@ -3051,7 +3019,7 @@ static int enable_write_target(BlockDriverState *bs, Error **errp)
    }

    options = qdict_new();
-    qdict_put_str(options, "write-target.driver", "qcow");
+    qdict_put(options, "write-target.driver", qstring_from_str("qcow"));
    s->qcow = bdrv_open_child(s->qcow_filename, options, "write-target", bs,
                              &child_vvfat_qcow, false, errp);
    QDECREF(options);
@@ -3064,13 +3032,14 @@ static int enable_write_target(BlockDriverState *bs, Error **errp)
    unlink(s->qcow_filename);
 #endif

-    backing = bdrv_new_open_driver(&vvfat_write_target, NULL, BDRV_O_ALLOW_RDWR,
-                                   &error_abort);
-    *(void**) backing->opaque = s;
-
-    bdrv_set_backing_hd(s->bs, backing, &error_abort);
+    backing = bdrv_new();
+    bdrv_set_backing_hd(s->bs, backing);
    bdrv_unref(backing);

+    s->bs->backing->bs->drv = &vvfat_write_target;
+    s->bs->backing->bs->opaque = g_new(void *, 1);
+    *(void**)s->bs->backing->bs->opaque = s;
+
    return 0;

 err:
@@ -3079,27 +3048,6 @@ err:
    return ret;
 }

-static void vvfat_child_perm(BlockDriverState *bs, BdrvChild *c,
-                             const BdrvChildRole *role,
-                             uint64_t perm, uint64_t shared,
-                             uint64_t *nperm, uint64_t *nshared)
-{
-    BDRVVVFATState *s = bs->opaque;
-
-    assert(c == s->qcow || role == &child_backing);
-
-    if (c == s->qcow) {
-        /* This is a private node, nobody should try to attach to it */
-        *nperm = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE;
-        *nshared = BLK_PERM_WRITE_UNCHANGED;
-    } else {
-        /* The backing file is there so 'commit' can use it. vvfat doesn't
-         * access it in any way. */
-        *nperm = 0;
-        *nshared = BLK_PERM_ALL;
-    }
-}
-
 static void vvfat_close(BlockDriverState *bs)
 {
    BDRVVVFATState *s = bs->opaque;
@@ -3125,7 +3073,6 @@ static BlockDriver bdrv_vvfat = {
    .bdrv_file_open         = vvfat_open,
    .bdrv_refresh_limits    = vvfat_refresh_limits,
    .bdrv_close             = vvfat_close,
-    .bdrv_child_perm        = vvfat_child_perm,

    .bdrv_co_preadv         = vvfat_co_preadv,
    .bdrv_co_pwritev        = vvfat_co_pwritev,
--- a/block/vxhs.c
+++ b/block/vxhs.c
@@ -1,575 +0,0 @@
-/*
- * QEMU Block driver for Veritas HyperScale (VxHS)
- *
- * Copyright (c) 2017 Veritas Technologies LLC.
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- *
- */
-
-#include "qemu/osdep.h"
-#include <qnio/qnio_api.h>
-#include <sys/param.h>
-#include "block/block_int.h"
-#include "qapi/qmp/qerror.h"
-#include "qapi/qmp/qdict.h"
-#include "qapi/qmp/qstring.h"
-#include "trace.h"
-#include "qemu/uri.h"
-#include "qapi/error.h"
-#include "qemu/uuid.h"
-#include "crypto/tlscredsx509.h"
-
-#define VXHS_OPT_FILENAME           "filename"
-#define VXHS_OPT_VDISK_ID           "vdisk-id"
-#define VXHS_OPT_SERVER             "server"
-#define VXHS_OPT_HOST               "host"
-#define VXHS_OPT_PORT               "port"
-
-/* Only accessed under QEMU global mutex */
-static uint32_t vxhs_ref;
-
-typedef enum {
-    VDISK_AIO_READ,
-    VDISK_AIO_WRITE,
-} VDISKAIOCmd;
-
-/*
- * HyperScale AIO callbacks structure
- */
-typedef struct VXHSAIOCB {
-    BlockAIOCB common;
-    int err;
-} VXHSAIOCB;
-
-typedef struct VXHSvDiskHostsInfo {
-    void *dev_handle; /* Device handle */
-    char *host; /* Host name or IP */
-    int port; /* Host's port number */
-} VXHSvDiskHostsInfo;
-
-/*
- * Structure per vDisk maintained for state
- */
-typedef struct BDRVVXHSState {
-    VXHSvDiskHostsInfo vdisk_hostinfo; /* Per host info */
-    char *vdisk_guid;
-    char *tlscredsid; /* tlscredsid */
-} BDRVVXHSState;
-
-static void vxhs_complete_aio_bh(void *opaque)
-{
-    VXHSAIOCB *acb = opaque;
-    BlockCompletionFunc *cb = acb->common.cb;
-    void *cb_opaque = acb->common.opaque;
-    int ret = 0;
-
-    if (acb->err != 0) {
-        trace_vxhs_complete_aio(acb, acb->err);
-        ret = (-EIO);
-    }
-
-    qemu_aio_unref(acb);
-    cb(cb_opaque, ret);
-}
-
-/*
- * Called from a libqnio thread
- */
-static void vxhs_iio_callback(void *ctx, uint32_t opcode, uint32_t error)
-{
-    VXHSAIOCB *acb = NULL;
-
-    switch (opcode) {
-    case IRP_READ_REQUEST:
-    case IRP_WRITE_REQUEST:
-
-        /*
-         * ctx is VXHSAIOCB*
-         * ctx is NULL if error is QNIOERROR_CHANNEL_HUP
-         */
-        if (ctx) {
-            acb = ctx;
-        } else {
-            trace_vxhs_iio_callback(error);
-            goto out;
-        }
-
-        if (error) {
-            if (!acb->err) {
-                acb->err = error;
-            }
-            trace_vxhs_iio_callback(error);
-        }
-
-        aio_bh_schedule_oneshot(bdrv_get_aio_context(acb->common.bs),
-                                vxhs_complete_aio_bh, acb);
-        break;
-
-    default:
-        if (error == QNIOERROR_HUP) {
-            /*
-             * Channel failed, spontaneous notification,
-             * not in response to I/O
-             */
-            trace_vxhs_iio_callback_chnfail(error, errno);
-        } else {
-            trace_vxhs_iio_callback_unknwn(opcode, error);
-        }
-        break;
-    }
-out:
-    return;
-}
-
-static QemuOptsList runtime_opts = {
-    .name = "vxhs",
-    .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
-    .desc = {
-        {
-            .name = VXHS_OPT_FILENAME,
-            .type = QEMU_OPT_STRING,
-            .help = "URI to the Veritas HyperScale image",
-        },
-        {
-            .name = VXHS_OPT_VDISK_ID,
-            .type = QEMU_OPT_STRING,
-            .help = "UUID of the VxHS vdisk",
-        },
-        {
-            .name = "tls-creds",
-            .type = QEMU_OPT_STRING,
-            .help = "ID of the TLS/SSL credentials to use",
-        },
-        { /* end of list */ }
-    },
-};
-
-static QemuOptsList runtime_tcp_opts = {
-    .name = "vxhs_tcp",
-    .head = QTAILQ_HEAD_INITIALIZER(runtime_tcp_opts.head),
-    .desc = {
-        {
-            .name = VXHS_OPT_HOST,
-            .type = QEMU_OPT_STRING,
-            .help = "host address (ipv4 addresses)",
-        },
-        {
-            .name = VXHS_OPT_PORT,
-            .type = QEMU_OPT_NUMBER,
-            .help = "port number on which VxHSD is listening (default 9999)",
-            .def_value_str = "9999"
-        },
-        { /* end of list */ }
-    },
-};
-
-/*
- * Parse incoming URI and populate *options with the host
- * and device information
- */
-static int vxhs_parse_uri(const char *filename, QDict *options)
-{
-    URI *uri = NULL;
-    char *port;
-    int ret = 0;
-
-    trace_vxhs_parse_uri_filename(filename);
-    uri = uri_parse(filename);
-    if (!uri || !uri->server || !uri->path) {
-        uri_free(uri);
-        return -EINVAL;
-    }
-
-    qdict_put_str(options, VXHS_OPT_SERVER ".host", uri->server);
-
-    if (uri->port) {
-        port = g_strdup_printf("%d", uri->port);
-        qdict_put_str(options, VXHS_OPT_SERVER ".port", port);
-        g_free(port);
-    }
-
-    qdict_put_str(options, "vdisk-id", uri->path);
-
-    trace_vxhs_parse_uri_hostinfo(uri->server, uri->port);
-    uri_free(uri);
-
-    return ret;
-}
-
-static void vxhs_parse_filename(const char *filename, QDict *options,
-                                Error **errp)
-{
-    if (qdict_haskey(options, "vdisk-id") || qdict_haskey(options, "server")) {
-        error_setg(errp, "vdisk-id/server and a file name may not be specified "
-                         "at the same time");
-        return;
-    }
-
-    if (strstr(filename, "://")) {
-        int ret = vxhs_parse_uri(filename, options);
-        if (ret < 0) {
-            error_setg(errp, "Invalid URI. URI should be of the form "
-                       "  vxhs://<host_ip>:<port>/<vdisk-id>");
-        }
-    }
-}
-
-static int vxhs_init_and_ref(void)
-{
-    if (vxhs_ref++ == 0) {
-        if (iio_init(QNIO_VERSION, vxhs_iio_callback)) {
-            return -ENODEV;
-        }
-    }
-    return 0;
-}
-
-static void vxhs_unref(void)
-{
-    if (--vxhs_ref == 0) {
-        iio_fini();
-    }
-}
-
-static void vxhs_get_tls_creds(const char *id, char **cacert,
-                               char **key, char **cert, Error **errp)
-{
-    Object *obj;
-    QCryptoTLSCreds *creds;
-    QCryptoTLSCredsX509 *creds_x509;
-
-    obj = object_resolve_path_component(
-        object_get_objects_root(), id);
-
-    if (!obj) {
-        error_setg(errp, "No TLS credentials with id '%s'",
-                   id);
-        return;
-    }
-
-    creds_x509 = (QCryptoTLSCredsX509 *)
-        object_dynamic_cast(obj, TYPE_QCRYPTO_TLS_CREDS_X509);
-
-    if (!creds_x509) {
-        error_setg(errp, "Object with id '%s' is not TLS credentials",
-                   id);
-        return;
-    }
-
-    creds = &creds_x509->parent_obj;
-
-    if (creds->endpoint != QCRYPTO_TLS_CREDS_ENDPOINT_CLIENT) {
-        error_setg(errp,
-                   "Expecting TLS credentials with a client endpoint");
-        return;
-    }
-
-    /*
-     * Get the cacert, client_cert and client_key file names.
-     */
-    if (!creds->dir) {
-        error_setg(errp, "TLS object missing 'dir' property value");
-        return;
-    }
-
-    *cacert = g_strdup_printf("%s/%s", creds->dir,
-                              QCRYPTO_TLS_CREDS_X509_CA_CERT);
-    *cert = g_strdup_printf("%s/%s", creds->dir,
-                            QCRYPTO_TLS_CREDS_X509_CLIENT_CERT);
-    *key = g_strdup_printf("%s/%s", creds->dir,
-                           QCRYPTO_TLS_CREDS_X509_CLIENT_KEY);
-}
-
-static int vxhs_open(BlockDriverState *bs, QDict *options,
-                     int bdrv_flags, Error **errp)
-{
-    BDRVVXHSState *s = bs->opaque;
-    void *dev_handlep;
-    QDict *backing_options = NULL;
-    QemuOpts *opts = NULL;
-    QemuOpts *tcp_opts = NULL;
-    char *of_vsa_addr = NULL;
-    Error *local_err = NULL;
-    const char *vdisk_id_opt;
-    const char *server_host_opt;
-    int ret = 0;
-    char *cacert = NULL;
-    char *client_key = NULL;
-    char *client_cert = NULL;
-
-    ret = vxhs_init_and_ref();
-    if (ret < 0) {
-        ret = -EINVAL;
-        goto out;
-    }
-
-    /* Create opts info from runtime_opts and runtime_tcp_opts list */
-    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
-    tcp_opts = qemu_opts_create(&runtime_tcp_opts, NULL, 0, &error_abort);
-
-    qemu_opts_absorb_qdict(opts, options, &local_err);
-    if (local_err) {
-        ret = -EINVAL;
-        goto out;
-    }
-
-    /* vdisk-id is the disk UUID */
-    vdisk_id_opt = qemu_opt_get(opts, VXHS_OPT_VDISK_ID);
-    if (!vdisk_id_opt) {
-        error_setg(&local_err, QERR_MISSING_PARAMETER, VXHS_OPT_VDISK_ID);
-        ret = -EINVAL;
-        goto out;
-    }
-
-    /* vdisk-id may contain a leading '/' */
-    if (strlen(vdisk_id_opt) > UUID_FMT_LEN + 1) {
-        error_setg(&local_err, "vdisk-id cannot be more than %d characters",
-                   UUID_FMT_LEN);
-        ret = -EINVAL;
-        goto out;
-    }
-
-    s->vdisk_guid = g_strdup(vdisk_id_opt);
-    trace_vxhs_open_vdiskid(vdisk_id_opt);
-
-    /* get the 'server.' arguments */
-    qdict_extract_subqdict(options, &backing_options, VXHS_OPT_SERVER".");
-
-    qemu_opts_absorb_qdict(tcp_opts, backing_options, &local_err);
-    if (local_err != NULL) {
-        ret = -EINVAL;
-        goto out;
-    }
-
-    server_host_opt = qemu_opt_get(tcp_opts, VXHS_OPT_HOST);
-    if (!server_host_opt) {
-        error_setg(&local_err, QERR_MISSING_PARAMETER,
-                   VXHS_OPT_SERVER"."VXHS_OPT_HOST);
-        ret = -EINVAL;
-        goto out;
-    }
-
-    if (strlen(server_host_opt) > MAXHOSTNAMELEN) {
-        error_setg(&local_err, "server.host cannot be more than %d characters",
-                   MAXHOSTNAMELEN);
-        ret = -EINVAL;
-        goto out;
-    }
-
-    /* check if we got tls-creds via the --object argument */
-    s->tlscredsid = g_strdup(qemu_opt_get(opts, "tls-creds"));
-    if (s->tlscredsid) {
-        vxhs_get_tls_creds(s->tlscredsid, &cacert, &client_key,
-                           &client_cert, &local_err);
-        if (local_err != NULL) {
-            ret = -EINVAL;
-            goto out;
-        }
-        trace_vxhs_get_creds(cacert, client_key, client_cert);
-    }
-
-    s->vdisk_hostinfo.host = g_strdup(server_host_opt);
-    s->vdisk_hostinfo.port = g_ascii_strtoll(qemu_opt_get(tcp_opts,
-                                                          VXHS_OPT_PORT),
-                                                          NULL, 0);
-
-    trace_vxhs_open_hostinfo(s->vdisk_hostinfo.host,
-                             s->vdisk_hostinfo.port);
-
-    of_vsa_addr = g_strdup_printf("of://%s:%d",
-                                  s->vdisk_hostinfo.host,
-                                  s->vdisk_hostinfo.port);
-
-    /*
-     * Open qnio channel to storage agent if not opened before
-     */
-    dev_handlep = iio_open(of_vsa_addr, s->vdisk_guid, 0,
-                           cacert, client_key, client_cert);
-    if (dev_handlep == NULL) {
-        trace_vxhs_open_iio_open(of_vsa_addr);
-        ret = -ENODEV;
-        goto out;
-    }
-    s->vdisk_hostinfo.dev_handle = dev_handlep;
-
-out:
-    g_free(of_vsa_addr);
-    QDECREF(backing_options);
-    qemu_opts_del(tcp_opts);
-    qemu_opts_del(opts);
-    g_free(cacert);
-    g_free(client_key);
-    g_free(client_cert);
-
-    if (ret < 0) {
-        vxhs_unref();
-        error_propagate(errp, local_err);
-        g_free(s->vdisk_hostinfo.host);
-        g_free(s->vdisk_guid);
-        g_free(s->tlscredsid);
-        s->vdisk_guid = NULL;
-    }
-
-    return ret;
-}
-
-static const AIOCBInfo vxhs_aiocb_info = {
-    .aiocb_size = sizeof(VXHSAIOCB)
-};
-
-/*
- * This allocates QEMU-VXHS callback for each IO
- * and is passed to QNIO. When QNIO completes the work,
- * it will be passed back through the callback.
- */
-static BlockAIOCB *vxhs_aio_rw(BlockDriverState *bs, int64_t sector_num,
-                               QEMUIOVector *qiov, int nb_sectors,
-                               BlockCompletionFunc *cb, void *opaque,
-                               VDISKAIOCmd iodir)
-{
-    VXHSAIOCB *acb = NULL;
-    BDRVVXHSState *s = bs->opaque;
-    size_t size;
-    uint64_t offset;
-    int iio_flags = 0;
-    int ret = 0;
-    void *dev_handle = s->vdisk_hostinfo.dev_handle;
-
-    offset = sector_num * BDRV_SECTOR_SIZE;
-    size = nb_sectors * BDRV_SECTOR_SIZE;
-    acb = qemu_aio_get(&vxhs_aiocb_info, bs, cb, opaque);
-
-    /*
-     * Initialize VXHSAIOCB.
-     */
-    acb->err = 0;
-
-    iio_flags = IIO_FLAG_ASYNC;
-
-    switch (iodir) {
-    case VDISK_AIO_WRITE:
-            ret = iio_writev(dev_handle, acb, qiov->iov, qiov->niov,
-                             offset, (uint64_t)size, iio_flags);
-            break;
-    case VDISK_AIO_READ:
-            ret = iio_readv(dev_handle, acb, qiov->iov, qiov->niov,
-                            offset, (uint64_t)size, iio_flags);
-            break;
-    default:
-            trace_vxhs_aio_rw_invalid(iodir);
-            goto errout;
-    }
-
-    if (ret != 0) {
-        trace_vxhs_aio_rw_ioerr(s->vdisk_guid, iodir, size, offset,
-                                acb, ret, errno);
-        goto errout;
-    }
-    return &acb->common;
-
-errout:
-    qemu_aio_unref(acb);
-    return NULL;
-}
-
-static BlockAIOCB *vxhs_aio_readv(BlockDriverState *bs,
-                                   int64_t sector_num, QEMUIOVector *qiov,
-                                   int nb_sectors,
-                                   BlockCompletionFunc *cb, void *opaque)
-{
-    return vxhs_aio_rw(bs, sector_num, qiov, nb_sectors, cb,
-                       opaque, VDISK_AIO_READ);
-}
-
-static BlockAIOCB *vxhs_aio_writev(BlockDriverState *bs,
-                                   int64_t sector_num, QEMUIOVector *qiov,
-                                   int nb_sectors,
-                                   BlockCompletionFunc *cb, void *opaque)
-{
-    return vxhs_aio_rw(bs, sector_num, qiov, nb_sectors,
-                       cb, opaque, VDISK_AIO_WRITE);
-}
-
-static void vxhs_close(BlockDriverState *bs)
-{
-    BDRVVXHSState *s = bs->opaque;
-
-    trace_vxhs_close(s->vdisk_guid);
-
-    g_free(s->vdisk_guid);
-    s->vdisk_guid = NULL;
-
-    /*
-     * Close vDisk device
-     */
-    if (s->vdisk_hostinfo.dev_handle) {
-        iio_close(s->vdisk_hostinfo.dev_handle);
-        s->vdisk_hostinfo.dev_handle = NULL;
-    }
-
-    vxhs_unref();
-
-    /*
-     * Free the dynamically allocated host string etc
-     */
-    g_free(s->vdisk_hostinfo.host);
-    g_free(s->tlscredsid);
-    s->tlscredsid = NULL;
-    s->vdisk_hostinfo.host = NULL;
-    s->vdisk_hostinfo.port = 0;
-}
-
-static int64_t vxhs_get_vdisk_stat(BDRVVXHSState *s)
-{
-    int64_t vdisk_size = -1;
-    int ret = 0;
-    void *dev_handle = s->vdisk_hostinfo.dev_handle;
-
-    ret = iio_ioctl(dev_handle, IOR_VDISK_STAT, &vdisk_size, 0);
-    if (ret < 0) {
-        trace_vxhs_get_vdisk_stat_err(s->vdisk_guid, ret, errno);
-        return -EIO;
-    }
-
-    trace_vxhs_get_vdisk_stat(s->vdisk_guid, vdisk_size);
-    return vdisk_size;
-}
-
-/*
- * Returns the size of vDisk in bytes. This is required
- * by QEMU block upper block layer so that it is visible
- * to guest.
- */
-static int64_t vxhs_getlength(BlockDriverState *bs)
-{
-    BDRVVXHSState *s = bs->opaque;
-    int64_t vdisk_size;
-
-    vdisk_size = vxhs_get_vdisk_stat(s);
-    if (vdisk_size < 0) {
-        return -EIO;
-    }
-
-    return vdisk_size;
-}
-
-static BlockDriver bdrv_vxhs = {
-    .format_name                  = "vxhs",
-    .protocol_name                = "vxhs",
-    .instance_size                = sizeof(BDRVVXHSState),
-    .bdrv_file_open               = vxhs_open,
-    .bdrv_parse_filename          = vxhs_parse_filename,
-    .bdrv_close                   = vxhs_close,
-    .bdrv_getlength               = vxhs_getlength,
-    .bdrv_aio_readv               = vxhs_aio_readv,
-    .bdrv_aio_writev              = vxhs_aio_writev,
-};
-
-static void bdrv_vxhs_init(void)
-{
-    bdrv_register(&bdrv_vxhs);
-}
-
-block_init(bdrv_vxhs_init);
--- a/block/win32-aio.c
+++ b/block/win32-aio.c
@@ -41,7 +41,7 @@ struct QEMUWin32AIOState {
    HANDLE hIOCP;
    EventNotifier e;
    int count;
-    AioContext *aio_ctx;
+    bool is_aio_context_attached;
 };

 typedef struct QEMUWin32AIOCB {
@@ -87,6 +87,7 @@ static void win32_aio_process_completion(QEMUWin32AIOState *s,
        qemu_vfree(waiocb->buf);
    }

+
    waiocb->common.cb(waiocb->common.opaque, ret);
    qemu_aio_unref(waiocb);
 }
@@ -174,16 +175,16 @@ int win32_aio_attach(QEMUWin32AIOState *aio, HANDLE hfile)
 void win32_aio_detach_aio_context(QEMUWin32AIOState *aio,
                                  AioContext *old_context)
 {
-    aio_set_event_notifier(old_context, &aio->e, false, NULL, NULL);
-    aio->aio_ctx = NULL;
+    aio_set_event_notifier(old_context, &aio->e, false, NULL);
+    aio->is_aio_context_attached = false;
 }

 void win32_aio_attach_aio_context(QEMUWin32AIOState *aio,
                                  AioContext *new_context)
 {
-    aio->aio_ctx = new_context;
+    aio->is_aio_context_attached = true;
    aio_set_event_notifier(new_context, &aio->e, false,
-                           win32_aio_completion_cb, NULL);
+                           win32_aio_completion_cb);
 }

 QEMUWin32AIOState *win32_aio_init(void)
@@ -211,7 +212,7 @@ out_free_state:

 void win32_aio_cleanup(QEMUWin32AIOState *aio)
 {
-    assert(!aio->aio_ctx);
+    assert(!aio->is_aio_context_attached);
    CloseHandle(aio->hIOCP);
    event_notifier_cleanup(&aio->e);
    g_free(aio);
--- a/Show More
+++ b/Show More
@@ -1 +1 @@
 .9.50
 .8.1
				`@@ -1 +0,0 @@`
				`obj-$(call lnot,$(CONFIG_KVM)) += kvm-stub.o`