cirrus: add blit_is_unsafe call to cirrus_bitblt_cputovideo (CVE-2017-2620)

CIRRUS_BLTMODE_MEMSYSSRC blits do NOT check blit destination and blit width, at all. Oops. Fix it. Security impact: high. The missing blit destination check allows to write to host memory. Basically same as CVE-2014-8106 for the other blit variants. Cc: qemu-stable@nongnu.org Signed-off-by: Gerd Hoffmann <kraxel@redhat.com> (cherry picked from commit 92f2b88cea) [BR: BSC#1024972] Signed-off-by: Bruce Rogers <brogers@suse.com>
cirrus: fix patterncopy checks
2017-02-28 20:35:39 -07:00 · 2017-02-28 20:35:35 -07:00 · 2017-02-28 20:27:39 -07:00 · 2017-02-28 20:27:39 -07:00 · 2017-02-28 20:27:39 -07:00 · 2017-02-28 20:27:39 -07:00
2741 changed files with 101072 additions and 206716 deletions
--- a/.editorconfig
+++ b/.editorconfig
@@ -1,15 +0,0 @@
-# http://editorconfig.org
-root = true
-
-[*]
-end_of_line = lf
-insert_final_newline = true
-charset = utf-8
-
-[Makefile*]
-indent_style = tab
-indent_size = 8
-
-[*.{c,h}]
-indent_style = space
-indent_size = 4
--- a/.gdbinit
+++ b/.gdbinit
@@ -1,8 +0,0 @@
-# GDB may have ./.gdbinit loading disabled by default.  In that case you can
-# follow the instructions it prints.  They boil down to adding the following to
-# your home directory's ~/.gdbinit file:
-#
-#   add-auto-load-safe-path /path/to/qemu/.gdbinit
-
-# Load QEMU-specific sub-commands and settings
-source scripts/qemu-gdb.py
--- a/.gitignore
+++ b/.gitignore
@@ -6,12 +6,18 @@
 /config.status
 /config-temp
 /trace-events-all
+/trace/generated-tracers.h
+/trace/generated-tracers.c
+/trace/generated-tracers-dtrace.h
+/trace/generated-tracers.dtrace
 /trace/generated-events.h
 /trace/generated-events.c
 /trace/generated-helpers-wrappers.h
 /trace/generated-helpers.h
 /trace/generated-helpers.c
 /trace/generated-tcg-tracers.h
+/trace/generated-ust-provider.h
+/trace/generated-ust.c
 /ui/shader/texture-blit-frag.h
 /ui/shader/texture-blit-vert.h
 *-timestamp
@@ -34,7 +40,6 @@
 /qmp-marshal.c
 /qemu-doc.html
 /qemu-doc.info
-/qemu-doc.txt
 /qemu-img
 /qemu-nbd
 /qemu-options.def
@@ -50,12 +55,12 @@
 /qemu-version.h.tmp
 /module_block.h
 /vscclient
-/vhost-user-scsi
 /fsdev/virtfs-proxy-helper
 *.[1-9]
 *.a
 *.aux
 *.cp
+*.dvi
 *.exe
 *.msi
 *.dll
@@ -77,6 +82,10 @@
 *.d
 !/scripts/qemu-guest-agent/fsfreeze-hook.d
 *.o
+*.lo
+*.la
+*.pc
+.libs
 .sdk
 *.gcda
 *.gcno
@@ -100,37 +109,9 @@
 /pc-bios/optionrom/kvmvapic.img
 /pc-bios/s390-ccw/s390-ccw.elf
 /pc-bios/s390-ccw/s390-ccw.img
-/docs/interop/qemu-ga-qapi.texi
-/docs/interop/qemu-ga-ref.html
-/docs/interop/qemu-ga-ref.info*
-/docs/interop/qemu-ga-ref.txt
-/docs/interop/qemu-qmp-qapi.texi
-/docs/interop/qemu-qmp-ref.html
-/docs/interop/qemu-qmp-ref.info*
-/docs/interop/qemu-qmp-ref.txt
-/docs/version.texi
-*.tps
 .stgit-*
 cscope.*
 tags
 TAGS
 docker-src.*
 *~
-*.ast_raw
-*.depend_raw
-trace.h
-trace.c
-trace-ust.h
-trace-ust.h
-trace-dtrace.h
-trace-dtrace.dtrace
-trace-root.h
-trace-root.c
-trace-ust-root.h
-trace-ust-root.h
-trace-ust-all.h
-trace-ust-all.c
-trace-dtrace-root.h
-trace-dtrace-root.dtrace
-trace-ust-all.h
-trace-ust-all.c
--- a/.gitmodules
+++ b/.gitmodules
@@ -22,6 +22,9 @@
 [submodule "roms/sgabios"]
 	path = roms/sgabios
 	url = git://git.qemu-project.org/sgabios.git
+[submodule "pixman"]
+	path = pixman
+	url = git://anongit.freedesktop.org/pixman
 [submodule "dtc"]
 	path = dtc
 	url = git://git.qemu-project.org/dtc.git
@@ -31,6 +34,3 @@
 [submodule "roms/skiboot"]
 	path = roms/skiboot
 	url = git://git.qemu.org/skiboot.git
-[submodule "roms/QemuMacDrivers"]
-	path = roms/QemuMacDrivers
-	url = git://git.qemu.org/QemuMacDrivers.git
--- a/.shippable.yml
+++ b/.shippable.yml
@@ -1,49 +0,0 @@
-language: c
-git:
-   submodules: false
-env:
-  global:
-    - LC_ALL=C
-  matrix:
-    - IMAGE=debian-amd64
-      TARGET_LIST=x86_64-softmmu,x86_64-linux-user
-    - IMAGE=debian-win32-cross
-      TARGET_LIST=arm-softmmu,i386-softmmu,lm32-softmmu
-    - IMAGE=debian-win64-cross
-      TARGET_LIST=aarch64-softmmu,sparc64-softmmu,x86_64-softmmu
-    - IMAGE=debian-armel-cross
-      TARGET_LIST=arm-softmmu,arm-linux-user,armeb-linux-user
-    - IMAGE=debian-armhf-cross
-      TARGET_LIST=arm-softmmu,arm-linux-user,armeb-linux-user
-    - IMAGE=debian-arm64-cross
-      TARGET_LIST=aarch64-softmmu,aarch64-linux-user
-    - IMAGE=debian-s390x-cross
-      TARGET_LIST=s390x-softmmu,s390x-linux-user
-    - IMAGE=debian-mips-cross
-      TARGET_LIST=mips-softmmu,mipsel-linux-user
-    - IMAGE=debian-mips64el-cross
-      TARGET_LIST=mips64el-softmmu,mips64el-linux-user
-    - IMAGE=debian-powerpc-cross
-      TARGET_LIST=ppc-softmmu,ppcemb-softmmu,ppc-linux-user
-    - IMAGE=debian-ppc64el-cross
-      TARGET_LIST=ppc64-softmmu,ppc64-linux-user,ppc64abi32-linux-user
-build:
-  pre_ci:
-    - make docker-image-${IMAGE} V=1
-  pre_ci_boot:
-    image_name: qemu
-    image_tag: ${IMAGE}
-    pull: false
-    options: "-e HOME=/root"
-  ci:
-    - unset CC
-    # some targets require newer up to date packages, for example TARGET_LIST matching
-    # aarch64*-softmmu|arm*-softmmu|ppc*-softmmu|microblaze*-softmmu|mips64el-softmmu)
-    # see the configure script:
-    #    error_exit "DTC (libfdt) version >= 1.4.2 not present. Your options:"
-    #    "  (1) Preferred: Install the DTC (libfdt) devel package"
-    #    "  (2) Fetch the DTC submodule, using:"
-    #    "      git submodule update --init dtc"
-    - dpkg --compare-versions `dpkg-query --showformat='${Version}' --show libfdt-dev` ge 1.4.2 || git submodule update --init dtc
-    - ./configure ${QEMU_CONFIGURE_OPTS} --target-list=${TARGET_LIST}
-    - make -j$(($(getconf _NPROCESSORS_ONLN) + 1))
--- a/.travis.yml
+++ b/.travis.yml
@@ -4,6 +4,7 @@ python:
  - "2.4"
 compiler:
  - gcc
+  - clang
 cache: ccache
 addons:
  apt:
@@ -46,7 +47,6 @@ notifications:
 env:
  global:
    - TEST_CMD="make check"
-    - MAKEFLAGS="-j3"
  matrix:
    - CONFIG=""
    - CONFIG="--enable-debug --enable-debug-tcg --enable-trace-backends=log"
@@ -65,12 +65,9 @@ before_install:
 before_script:
  - ./configure ${CONFIG}
 script:
-  - make ${MAKEFLAGS} && ${TEST_CMD}
+  - make -j3 && ${TEST_CMD}
 matrix:
  include:
-    # Test with CLang for compile portability
-    - env: CONFIG=""
-      compiler: clang
    # gprof/gcov are GCC features
    - env: CONFIG="--enable-gprof --enable-gcov --disable-pie"
      compiler: gcc
@@ -87,14 +84,14 @@ matrix:
    - env: CONFIG="--enable-trace-backends=ust"
           TEST_CMD=""
      compiler: gcc
-    - env: CONFIG="--disable-tcg"
+    - env: CONFIG="--with-coroutine=gthread"
           TEST_CMD=""
      compiler: gcc
    - env: CONFIG=""
      os: osx
      compiler: clang
-    # Plain Trusty System Build
-    - env: CONFIG="--disable-linux-user"
+    # Plain Trusty Build
+    - env: CONFIG=""
      sudo: required
      addons:
      dist: trusty
@@ -104,55 +101,6 @@ matrix:
        - sudo apt-get build-dep -qq qemu
        - wget -O - http://people.linaro.org/~alex.bennee/qemu-submodule-git-seed.tar.xz | tar -xvJ
        - git submodule update --init --recursive
-    # Plain Trusty Linux User Build
-    - env: CONFIG="--disable-system"
-      sudo: required
-      addons:
-      dist: trusty
-      compiler: gcc
-      before_install:
-        - sudo apt-get update -qq
-        - sudo apt-get build-dep -qq qemu
-        - wget -O - http://people.linaro.org/~alex.bennee/qemu-submodule-git-seed.tar.xz | tar -xvJ
-        - git submodule update --init --recursive
-    # Trusty System build with latest stable clang
-    - sudo: required
-      addons:
-      dist: trusty
-      language: generic
-      compiler: none
-      env:
-        - COMPILER_NAME=clang CXX=clang++-3.9 CC=clang-3.9
-        - CONFIG="--disable-linux-user --cc=clang-3.9 --cxx=clang++-3.9"
-      before_install:
-        - wget -nv -O - http://llvm.org/apt/llvm-snapshot.gpg.key | sudo apt-key add -
-        - sudo apt-add-repository -y 'deb http://llvm.org/apt/trusty llvm-toolchain-trusty-3.9 main'
-        - sudo apt-get update -qq
-        - sudo apt-get install -qq -y clang-3.9
-        - sudo apt-get build-dep -qq qemu
-        - wget -O - http://people.linaro.org/~alex.bennee/qemu-submodule-git-seed.tar.xz | tar -xvJ
-        - git submodule update --init --recursive
-      before_script:
-        - ./configure ${CONFIG} || cat config.log
-    # Trusty Linux User build with latest stable clang
-    - sudo: required
-      addons:
-      dist: trusty
-      language: generic
-      compiler: none
-      env:
-        - COMPILER_NAME=clang CXX=clang++-3.9 CC=clang-3.9
-        - CONFIG="--disable-system --cc=clang-3.9 --cxx=clang++-3.9"
-      before_install:
-        - wget -nv -O - http://llvm.org/apt/llvm-snapshot.gpg.key | sudo apt-key add -
-        - sudo apt-add-repository -y 'deb http://llvm.org/apt/trusty llvm-toolchain-trusty-3.9 main'
-        - sudo apt-get update -qq
-        - sudo apt-get install -qq -y clang-3.9
-        - sudo apt-get build-dep -qq qemu
-        - wget -O - http://people.linaro.org/~alex.bennee/qemu-submodule-git-seed.tar.xz | tar -xvJ
-        - git submodule update --init --recursive
-      before_script:
-        - ./configure ${CONFIG} || cat config.log
    # Using newer GCC with sanitizers
    - addons:
        apt:
@@ -192,7 +140,7 @@ matrix:
      compiler: none
      env:
        - COMPILER_NAME=gcc CXX=g++-5 CC=gcc-5
-        - CONFIG="--cc=gcc-5 --cxx=g++-5 --disable-pie --disable-linux-user"
+        - CONFIG="--cc=gcc-5 --cxx=g++-5 --disable-pie --disable-linux-user --with-coroutine=gthread"
        - TEST_CMD=""
      before_script:
        - ./configure ${CONFIG} --extra-cflags="-g3 -O0 -fsanitize=thread -fuse-ld=gold" || cat config.log
--- a/42
+++ b/42
@@ -116,45 +116,3 @@ if (a == 1) {
 Rationale: Yoda conditions (as in 'if (1 == a)') are awkward to read.
 Besides, good compilers already warn users when '==' is mis-typed as '=',
 even when the constant is on the right.
-
-7. Comment style
-
-We use traditional C-style /* */ comments and avoid // comments.
-
-Rationale: The // form is valid in C99, so this is purely a matter of
-consistency of style. The checkpatch script will warn you about this.
-
-8. trace-events style
-
-8.1 0x prefix
-
-In trace-events files, use a '0x' prefix to specify hex numbers, as in:
-
-some_trace(unsigned x, uint64_t y) "x 0x%x y 0x" PRIx64
-
-An exception is made for groups of numbers that are hexadecimal by
-convention and separated by the symbols '.', '/', ':', or ' ' (such as
-PCI bus id):
-
-another_trace(int cssid, int ssid, int dev_num) "bus id: %x.%x.%04x"
-
-However, you can use '0x' for such groups if you want. Anyway, be sure that
-it is obvious that numbers are in hex, ex.:
-
-data_dump(uint8_t c1, uint8_t c2, uint8_t c3) "bytes (in hex): %02x %02x %02x"
-
-Rationale: hex numbers are hard to read in logs when there is no 0x prefix,
-especially when (occasionally) the representation doesn't contain any letters
-and especially in one line with other decimal numbers. Number groups are allowed
-to not use '0x' because for some things notations like %x.%x.%x are used not
-only in Qemu. Also dumping raw data bytes with '0x' is less readable.
-
-8.2 '#' printf flag
-
-Do not use printf flag '#', like '%#x'.
-
-Rationale: there are two ways to add a '0x' prefix to printed number: '0x%...'
-and '%#...'. For consistency the only one way should be used. Arguments for
-'0x%' are:
- - it is more popular
- - '%#' omits the 0x for the value 0 which makes output inconsistent
--- a/COPYING.PYTHON
+++ b/COPYING.PYTHON
@@ -1,270 +0,0 @@
-A. HISTORY OF THE SOFTWARE
-==========================
-
-Python was created in the early 1990s by Guido van Rossum at Stichting
-Mathematisch Centrum (CWI, see http://www.cwi.nl) in the Netherlands
-as a successor of a language called ABC.  Guido remains Python's
-principal author, although it includes many contributions from others.
-
-In 1995, Guido continued his work on Python at the Corporation for
-National Research Initiatives (CNRI, see http://www.cnri.reston.va.us)
-in Reston, Virginia where he released several versions of the
-software.
-
-In May 2000, Guido and the Python core development team moved to
-BeOpen.com to form the BeOpen PythonLabs team.  In October of the same
-year, the PythonLabs team moved to Digital Creations (now Zope
-Corporation, see http://www.zope.com).  In 2001, the Python Software
-Foundation (PSF, see http://www.python.org/psf/) was formed, a
-non-profit organization created specifically to own Python-related
-Intellectual Property.  Zope Corporation is a sponsoring member of
-the PSF.
-
-All Python releases are Open Source (see http://www.opensource.org for
-the Open Source Definition).  Historically, most, but not all, Python
-releases have also been GPL-compatible; the table below summarizes
-the various releases.
-
-    Release         Derived     Year        Owner       GPL-
-                    from                                compatible? (1)
-
-    0.9.0 thru 1.2              1991-1995   CWI         yes
-    1.3 thru 1.5.2  1.2         1995-1999   CNRI        yes
-    1.6             1.5.2       2000        CNRI        no
-    2.0             1.6         2000        BeOpen.com  no
-    1.6.1           1.6         2001        CNRI        yes (2)
-    2.1             2.0+1.6.1   2001        PSF         no
-    2.0.1           2.0+1.6.1   2001        PSF         yes
-    2.1.1           2.1+2.0.1   2001        PSF         yes
-    2.2             2.1.1       2001        PSF         yes
-    2.1.2           2.1.1       2002        PSF         yes
-    2.1.3           2.1.2       2002        PSF         yes
-    2.2.1           2.2         2002        PSF         yes
-    2.2.2           2.2.1       2002        PSF         yes
-    2.2.3           2.2.2       2003        PSF         yes
-    2.3             2.2.2       2002-2003   PSF         yes
-    2.3.1           2.3         2002-2003   PSF         yes
-    2.3.2           2.3.1       2002-2003   PSF         yes
-    2.3.3           2.3.2       2002-2003   PSF         yes
-    2.3.4           2.3.3       2004        PSF         yes
-    2.3.5           2.3.4       2005        PSF         yes
-    2.4             2.3         2004        PSF         yes
-    2.4.1           2.4         2005        PSF         yes
-    2.4.2           2.4.1       2005        PSF         yes
-    2.4.3           2.4.2       2006        PSF         yes
-    2.5             2.4         2006        PSF         yes
-    2.7             2.6         2010        PSF         yes
-
-Footnotes:
-
-(1) GPL-compatible doesn't mean that we're distributing Python under
-    the GPL.  All Python licenses, unlike the GPL, let you distribute
-    a modified version without making your changes open source.  The
-    GPL-compatible licenses make it possible to combine Python with
-    other software that is released under the GPL; the others don't.
-
-(2) According to Richard Stallman, 1.6.1 is not GPL-compatible,
-    because its license has a choice of law clause.  According to
-    CNRI, however, Stallman's lawyer has told CNRI's lawyer that 1.6.1
-    is "not incompatible" with the GPL.
-
-Thanks to the many outside volunteers who have worked under Guido's
-direction to make these releases possible.
-
-
-B. TERMS AND CONDITIONS FOR ACCESSING OR OTHERWISE USING PYTHON
-===============================================================
-
-PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2
--------------------------------------------
-
-1. This LICENSE AGREEMENT is between the Python Software Foundation
-("PSF"), and the Individual or Organization ("Licensee") accessing and
-otherwise using this software ("Python") in source or binary form and
-its associated documentation.
-
-2. Subject to the terms and conditions of this License Agreement, PSF
-hereby grants Licensee a nonexclusive, royalty-free, world-wide
-license to reproduce, analyze, test, perform and/or display publicly,
-prepare derivative works, distribute, and otherwise use Python
-alone or in any derivative version, provided, however, that PSF's
-License Agreement and PSF's notice of copyright, i.e., "Copyright (c)
-2001, 2002, 2003, 2004, 2005, 2006 Python Software Foundation; All Rights
-Reserved" are retained in Python alone or in any derivative version 
-prepared by Licensee.
-
-3. In the event Licensee prepares a derivative work that is based on
-or incorporates Python or any part thereof, and wants to make
-the derivative work available to others as provided herein, then
-Licensee hereby agrees to include in any such work a brief summary of
-the changes made to Python.
-
-4. PSF is making Python available to Licensee on an "AS IS"
-basis.  PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
-IMPLIED.  BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND
-DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
-FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON WILL NOT
-INFRINGE ANY THIRD PARTY RIGHTS.
-
-5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON
-FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS
-A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON,
-OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
-
-6. This License Agreement will automatically terminate upon a material
-breach of its terms and conditions.
-
-7. Nothing in this License Agreement shall be deemed to create any
-relationship of agency, partnership, or joint venture between PSF and
-Licensee.  This License Agreement does not grant permission to use PSF
-trademarks or trade name in a trademark sense to endorse or promote
-products or services of Licensee, or any third party.
-
-8. By copying, installing or otherwise using Python, Licensee
-agrees to be bound by the terms and conditions of this License
-Agreement.
-
-
-BEOPEN.COM LICENSE AGREEMENT FOR PYTHON 2.0
-------------------------------------------
-
-BEOPEN PYTHON OPEN SOURCE LICENSE AGREEMENT VERSION 1
-
-1. This LICENSE AGREEMENT is between BeOpen.com ("BeOpen"), having an
-office at 160 Saratoga Avenue, Santa Clara, CA 95051, and the
-Individual or Organization ("Licensee") accessing and otherwise using
-this software in source or binary form and its associated
-documentation ("the Software").
-
-2. Subject to the terms and conditions of this BeOpen Python License
-Agreement, BeOpen hereby grants Licensee a non-exclusive,
-royalty-free, world-wide license to reproduce, analyze, test, perform
-and/or display publicly, prepare derivative works, distribute, and
-otherwise use the Software alone or in any derivative version,
-provided, however, that the BeOpen Python License is retained in the
-Software, alone or in any derivative version prepared by Licensee.
-
-3. BeOpen is making the Software available to Licensee on an "AS IS"
-basis.  BEOPEN MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
-IMPLIED.  BY WAY OF EXAMPLE, BUT NOT LIMITATION, BEOPEN MAKES NO AND
-DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
-FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF THE SOFTWARE WILL NOT
-INFRINGE ANY THIRD PARTY RIGHTS.
-
-4. BEOPEN SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF THE
-SOFTWARE FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS
-AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THE SOFTWARE, OR ANY
-DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
-
-5. This License Agreement will automatically terminate upon a material
-breach of its terms and conditions.
-
-6. This License Agreement shall be governed by and interpreted in all
-respects by the law of the State of California, excluding conflict of
-law provisions.  Nothing in this License Agreement shall be deemed to
-create any relationship of agency, partnership, or joint venture
-between BeOpen and Licensee.  This License Agreement does not grant
-permission to use BeOpen trademarks or trade names in a trademark
-sense to endorse or promote products or services of Licensee, or any
-third party.  As an exception, the "BeOpen Python" logos available at
-http://www.pythonlabs.com/logos.html may be used according to the
-permissions granted on that web page.
-
-7. By copying, installing or otherwise using the software, Licensee
-agrees to be bound by the terms and conditions of this License
-Agreement.
-
-
-CNRI LICENSE AGREEMENT FOR PYTHON 1.6.1
---------------------------------------
-
-1. This LICENSE AGREEMENT is between the Corporation for National
-Research Initiatives, having an office at 1895 Preston White Drive,
-Reston, VA 20191 ("CNRI"), and the Individual or Organization
-("Licensee") accessing and otherwise using Python 1.6.1 software in
-source or binary form and its associated documentation.
-
-2. Subject to the terms and conditions of this License Agreement, CNRI
-hereby grants Licensee a nonexclusive, royalty-free, world-wide
-license to reproduce, analyze, test, perform and/or display publicly,
-prepare derivative works, distribute, and otherwise use Python 1.6.1
-alone or in any derivative version, provided, however, that CNRI's
-License Agreement and CNRI's notice of copyright, i.e., "Copyright (c)
-1995-2001 Corporation for National Research Initiatives; All Rights
-Reserved" are retained in Python 1.6.1 alone or in any derivative
-version prepared by Licensee.  Alternately, in lieu of CNRI's License
-Agreement, Licensee may substitute the following text (omitting the
-quotes): "Python 1.6.1 is made available subject to the terms and
-conditions in CNRI's License Agreement.  This Agreement together with
-Python 1.6.1 may be located on the Internet using the following
-unique, persistent identifier (known as a handle): 1895.22/1013.  This
-Agreement may also be obtained from a proxy server on the Internet
-using the following URL: http://hdl.handle.net/1895.22/1013".
-
-3. In the event Licensee prepares a derivative work that is based on
-or incorporates Python 1.6.1 or any part thereof, and wants to make
-the derivative work available to others as provided herein, then
-Licensee hereby agrees to include in any such work a brief summary of
-the changes made to Python 1.6.1.
-
-4. CNRI is making Python 1.6.1 available to Licensee on an "AS IS"
-basis.  CNRI MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
-IMPLIED.  BY WAY OF EXAMPLE, BUT NOT LIMITATION, CNRI MAKES NO AND
-DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
-FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON 1.6.1 WILL NOT
-INFRINGE ANY THIRD PARTY RIGHTS.
-
-5. CNRI SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON
-1.6.1 FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS
-A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON 1.6.1,
-OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
-
-6. This License Agreement will automatically terminate upon a material
-breach of its terms and conditions.
-
-7. This License Agreement shall be governed by the federal
-intellectual property law of the United States, including without
-limitation the federal copyright law, and, to the extent such
-U.S. federal law does not apply, by the law of the Commonwealth of
-Virginia, excluding Virginia's conflict of law provisions.
-Notwithstanding the foregoing, with regard to derivative works based
-on Python 1.6.1 that incorporate non-separable material that was
-previously distributed under the GNU General Public License (GPL), the
-law of the Commonwealth of Virginia shall govern this License
-Agreement only as to issues arising under or with respect to
-Paragraphs 4, 5, and 7 of this License Agreement.  Nothing in this
-License Agreement shall be deemed to create any relationship of
-agency, partnership, or joint venture between CNRI and Licensee.  This
-License Agreement does not grant permission to use CNRI trademarks or
-trade name in a trademark sense to endorse or promote products or
-services of Licensee, or any third party.
-
-8. By clicking on the "ACCEPT" button where indicated, or by copying,
-installing or otherwise using Python 1.6.1, Licensee agrees to be
-bound by the terms and conditions of this License Agreement.
-
-        ACCEPT
-
-
-CWI LICENSE AGREEMENT FOR PYTHON 0.9.0 THROUGH 1.2
--------------------------------------------------
-
-Copyright (c) 1991 - 1995, Stichting Mathematisch Centrum Amsterdam,
-The Netherlands.  All rights reserved.
-
-Permission to use, copy, modify, and distribute this software and its
-documentation for any purpose and without fee is hereby granted,
-provided that the above copyright notice appear in all copies and that
-both that copyright notice and this permission notice appear in
-supporting documentation, and that the name of Stichting Mathematisch
-Centrum or CWI not be used in advertising or publicity pertaining to
-distribution of the software without specific, written prior
-permission.
-
-STICHTING MATHEMATISCH CENTRUM DISCLAIMS ALL WARRANTIES WITH REGARD TO
-THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
-FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH CENTRUM BE LIABLE
-FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
-WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
-ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
-OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
--- a/18
+++ b/18
@@ -1,28 +1,10 @@
 1. Preprocessor

-1.1. Variadic macros
-
 For variadic macros, stick with this C99-like syntax:

 #define DPRINTF(fmt, ...)                                       \
    do { printf("IRQ: " fmt, ## __VA_ARGS__); } while (0)

-1.2. Include directives
-
-Order include directives as follows:
-
-#include "qemu/osdep.h"  /* Always first... */
-#include <...>           /* then system headers... */
-#include "..."           /* and finally QEMU headers. */
-
-The "qemu/osdep.h" header contains preprocessor macros that affect the behavior
-of core system headers like <stdint.h>.  It must be the first include so that
-core system headers included by external libraries get the preprocessor macros
-that QEMU depends on.
-
-Do not include "qemu/osdep.h" from header files since the .c file will have
-already included it.
-
 2. C types

 It should be common sense to use the right type, but we have collected
--- a/349
+++ b/349
@@ -12,8 +12,6 @@ consult qemu-devel and not any specific individual privately.
 Descriptions of section entries:

 	M: Mail patches to: FullName <address@domain>
-	R: Designated reviewer: FullName <address@domain>
-	   These reviewers should be CCed on patches.
 	L: Mailing list that is relevant to this area
 	W: Web-page with status/info
 	Q: Patchwork web based patch tracking system site
@@ -84,9 +82,14 @@ M: Paolo Bonzini <pbonzini@redhat.com>
 M: Peter Crosthwaite <crosthwaite.peter@gmail.com>
 M: Richard Henderson <rth@twiddle.net>
 S: Maintained
+F: cpu-exec.c
+F: cpu-exec-common.c
 F: cpus.c
+F: cputlb.c
 F: exec.c
-F: accel/tcg/
+F: softmmu_template.h
+F: translate-all.*
+F: translate-common.c
 F: include/exec/cpu*.h
 F: include/exec/exec-all.h
 F: include/exec/helper*.h
@@ -103,7 +106,7 @@ F: include/fpu/
 Alpha
 M: Richard Henderson <rth@twiddle.net>
 S: Maintained
-F: target/alpha/
+F: target-alpha/
 F: hw/alpha/
 F: tests/tcg/alpha/
 F: disas/alpha.c
@@ -112,7 +115,7 @@ ARM
 M: Peter Maydell <peter.maydell@linaro.org>
 L: qemu-arm@nongnu.org
 S: Maintained
-F: target/arm/
+F: target-arm/
 F: hw/arm/
 F: hw/cpu/a*mpcore.c
 F: include/hw/cpu/a*mpcore.h
@@ -123,22 +126,16 @@ F: disas/libvixl/
 CRIS
 M: Edgar E. Iglesias <edgar.iglesias@gmail.com>
 S: Maintained
-F: target/cris/
+F: target-cris/
 F: hw/cris/
 F: include/hw/cris/
 F: tests/tcg/cris/
 F: disas/cris.c

-HPPA (PA-RISC)
-M: Richard Henderson <rth@twiddle.net>
-S: Maintained
-F: target/hppa/
-F: disas/hppa.c
-
 LM32
 M: Michael Walle <michael@walle.cc>
 S: Maintained
-F: target/lm32/
+F: target-lm32/
 F: disas/lm32.c
 F: hw/lm32/
 F: hw/*/lm32_*
@@ -150,13 +147,13 @@ F: tests/tcg/lm32/
 M68K
 M: Laurent Vivier <laurent@vivier.eu>
 S: Maintained
-F: target/m68k/
+F: target-m68k/
 F: disas/m68k.c

 MicroBlaze
 M: Edgar E. Iglesias <edgar.iglesias@gmail.com>
 S: Maintained
-F: target/microblaze/
+F: target-microblaze/
 F: hw/microblaze/
 F: disas/microblaze.c

@@ -164,7 +161,7 @@ MIPS
 M: Aurelien Jarno <aurelien@aurel32.net>
 M: Yongbok Kim <yongbok.kim@imgtec.com>
 S: Maintained
-F: target/mips/
+F: target-mips/
 F: hw/mips/
 F: hw/misc/mips_*
 F: hw/intc/mips_gic.c
@@ -179,23 +176,15 @@ F: disas/mips.c
 Moxie
 M: Anthony Green <green@moxielogic.com>
 S: Maintained
-F: target/moxie/
+F: target-moxie/
 F: disas/moxie.c
 F: hw/moxie/
 F: default-configs/moxie-softmmu.mak

-NiosII
-M: Chris Wulff <crwulff@gmail.com>
-M: Marek Vasut <marex@denx.de>
-S: Maintained
-F: target/nios2/
-F: hw/nios2/
-F: disas/nios2.c
-
 OpenRISC
-M: Stafford Horne <shorne@gmail.com>
-S: Odd Fixes
-F: target/openrisc/
+M: Jia Liu <proljc@gmail.com>
+S: Maintained
+F: target-openrisc/
 F: hw/openrisc/
 F: tests/tcg/openrisc/

@@ -204,7 +193,7 @@ M: David Gibson <david@gibson.dropbear.id.au>
 M: Alexander Graf <agraf@suse.de>
 L: qemu-ppc@nongnu.org
 S: Maintained
-F: target/ppc/
+F: target-ppc/
 F: hw/ppc/
 F: include/hw/ppc/
 F: disas/ppc.c
@@ -213,14 +202,14 @@ S390
 M: Richard Henderson <rth@twiddle.net>
 M: Alexander Graf <agraf@suse.de>
 S: Maintained
-F: target/s390x/
+F: target-s390x/
 F: hw/s390x/
 F: disas/s390.c

 SH4
 M: Aurelien Jarno <aurelien@aurel32.net>
 S: Odd Fixes
-F: target/sh4/
+F: target-sh4/
 F: hw/sh4/
 F: disas/sh4.c
 F: include/hw/sh4/
@@ -229,7 +218,7 @@ SPARC
 M: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
 M: Artyom Tarasenko <atar4qemu@gmail.com>
 S: Maintained
-F: target/sparc/
+F: target-sparc/
 F: hw/sparc/
 F: hw/sparc64/
 F: disas/sparc.c
@@ -237,7 +226,7 @@ F: disas/sparc.c
 UniCore32
 M: Guan Xuetao <gxt@mprc.pku.edu.cn>
 S: Maintained
-F: target/unicore32/
+F: target-unicore32/
 F: hw/unicore32/
 F: include/hw/unicore32/

@@ -246,23 +235,22 @@ M: Paolo Bonzini <pbonzini@redhat.com>
 M: Richard Henderson <rth@twiddle.net>
 M: Eduardo Habkost <ehabkost@redhat.com>
 S: Maintained
-F: target/i386/
+F: target-i386/
 F: hw/i386/
 F: disas/i386.c
-T: git git://github.com/ehabkost/qemu.git x86-next

 Xtensa
 M: Max Filippov <jcmvbkbc@gmail.com>
 W: http://wiki.osll.spb.ru/doku.php?id=etc:users:jcmvbkbc:qemu-target-xtensa
 S: Maintained
-F: target/xtensa/
+F: target-xtensa/
 F: hw/xtensa/
 F: tests/tcg/xtensa/

 TriCore
 M: Bastian Koppelmann <kbastian@mail.uni-paderborn.de>
 S: Maintained
-F: target/tricore/
+F: target-tricore/
 F: hw/tricore/
 F: include/hw/tricore/

@@ -273,34 +261,34 @@ Overall
 M: Paolo Bonzini <pbonzini@redhat.com>
 L: kvm@vger.kernel.org
 S: Supported
+F: kvm-*
 F: */kvm.*
-F: accel/kvm/
 F: include/sysemu/kvm*.h

 ARM
 M: Peter Maydell <peter.maydell@linaro.org>
 L: qemu-arm@nongnu.org
 S: Maintained
-F: target/arm/kvm.c
+F: target-arm/kvm.c

 MIPS
 M: James Hogan <james.hogan@imgtec.com>
 S: Maintained
-F: target/mips/kvm.c
+F: target-mips/kvm.c

 PPC
 M: Alexander Graf <agraf@suse.de>
 S: Maintained
-F: target/ppc/kvm.c
+F: target-ppc/kvm.c

 S390
 M: Christian Borntraeger <borntraeger@de.ibm.com>
-M: Cornelia Huck <cohuck@redhat.com>
+M: Cornelia Huck <cornelia.huck@de.ibm.com>
 M: Alexander Graf <agraf@suse.de>
 S: Maintained
-F: target/s390x/kvm.c
-F: target/s390x/ioinst.[ch]
-F: target/s390x/machine.c
+F: target-s390x/kvm.c
+F: target-s390x/ioinst.[ch]
+F: target-s390x/machine.c
 F: hw/intc/s390_flic.c
 F: hw/intc/s390_flic_kvm.c
 F: include/hw/s390x/s390_flic.h
@@ -313,7 +301,7 @@ M: Paolo Bonzini <pbonzini@redhat.com>
 M: Marcelo Tosatti <mtosatti@redhat.com>
 L: kvm@vger.kernel.org
 S: Supported
-F: target/i386/kvm.c
+F: target-i386/kvm.c

 Guest CPU Cores (Xen):
 ----------------------
@@ -321,10 +309,10 @@ Guest CPU Cores (Xen):
 X86
 M: Stefano Stabellini <sstabellini@kernel.org>
 M: Anthony Perard <anthony.perard@citrix.com>
-L: xen-devel@lists.xenproject.org
+L: xen-devel@lists.xensource.com
 S: Supported
+F: xen-*
 F: */xen*
-F: hw/9pfs/xen-9p-backend.c
 F: hw/char/xen_console.c
 F: hw/display/xenfb.c
 F: hw/net/xen_nic.c
@@ -349,12 +337,6 @@ L: qemu-devel@nongnu.org
 S: Maintained
 F: *posix*

-NETBSD
-L: qemu-devel@nongnu.org
-M: Kamil Rytarowski <kamil@netbsd.org>
-S: Maintained
-K: ^Subject:.*(?i)NetBSD
-
 W32, W64
 L: qemu-devel@nongnu.org
 M: Stefan Weil <sw@weilnetz.de>
@@ -375,7 +357,7 @@ F: hw/*/allwinner*
 F: include/hw/*/allwinner*
 F: hw/arm/cubieboard.c

-ARM PrimeCell and CMSDK devices
+ARM PrimeCell
 M: Peter Maydell <peter.maydell@linaro.org>
 L: qemu-arm@nongnu.org
 S: Maintained
@@ -389,10 +371,6 @@ F: hw/intc/pl190.c
 F: hw/sd/pl181.c
 F: hw/timer/pl031.c
 F: include/hw/arm/primecell.h
-F: hw/timer/cmsdk-apb-timer.c
-F: include/hw/timer/cmsdk-apb-timer.h
-F: hw/char/cmsdk-apb-uart.c
-F: include/hw/char/cmsdk-apb-uart.h

 ARM cores
 M: Peter Maydell <peter.maydell@linaro.org>
@@ -454,14 +432,6 @@ S: Maintained
 F: hw/arm/integratorcp.c
 F: hw/misc/arm_integrator_debug.c

-MPS2
-M: Peter Maydell <peter.maydell@linaro.org>
-L: qemu-arm@nongnu.org
-S: Maintained
-F: hw/arm/mps2.c
-F: hw/misc/mps2-scc.c
-F: include/hw/misc/mps2-scc.h
-
 Musicpal
 M: Jan Kiszka <jan.kiszka@web.de>
 L: qemu-arm@nongnu.org
@@ -538,6 +508,7 @@ M: Shannon Zhao <shannon.zhao@linaro.org>
 L: qemu-arm@nongnu.org
 S: Maintained
 F: hw/arm/virt-acpi-build.c
+F: include/hw/arm/virt-acpi-build.h

 STM32F205
 M: Alistair Francis <alistair@alistair23.me>
@@ -577,19 +548,20 @@ F: hw/lm32/milkymist.c
 M68K Machines
 -------------
 an5206
-M: Thomas Huth <huth@tuxfamily.org>
-S: Odd Fixes
+S: Orphan
 F: hw/m68k/an5206.c
 F: hw/m68k/mcf5206.c

+dummy_m68k
+S: Orphan
+F: hw/m68k/dummy_m68k.c
+
 mcf5208
-M: Thomas Huth <huth@tuxfamily.org>
-S: Odd Fixes
+S: Orphan
 F: hw/m68k/mcf5208.c
 F: hw/m68k/mcf_intc.c
 F: hw/char/mcf_uart.c
 F: hw/net/mcf_fec.c
-F: include/hw/m68k/mcf*.h

 MicroBlaze Machines
 -------------------
@@ -616,28 +588,15 @@ S: Maintained
 F: hw/mips/mips_malta.c

 Mipssim
-M: Yongbok Kim <yongbok.kim@imgtec.com>
-S: Odd Fixes
+L: qemu-devel@nongnu.org
+S: Orphan
 F: hw/mips/mips_mipssim.c
-F: hw/net/mipsnet.c

 R4000
 M: Aurelien Jarno <aurelien@aurel32.net>
 S: Maintained
 F: hw/mips/mips_r4k.c

-Fulong 2E
-M: Yongbok Kim <yongbok.kim@imgtec.com>
-S: Odd Fixes
-F: hw/mips/mips_fulong2e.c
-
-Boston
-M: Paul Burton <paul.burton@imgtec.com>
-S: Maintained
-F: hw/core/loader-fit.c
-F: hw/mips/boston.c
-F: hw/pci-host/xilinx-pcie.c
-
 OpenRISC Machines
 -----------------
 or1k-sim
@@ -661,6 +620,7 @@ F: hw/ppc/ppc440_bamboo.c

 e500
 M: Alexander Graf <agraf@suse.de>
+M: Scott Wood <scottwood@freescale.com>
 L: qemu-ppc@nongnu.org
 S: Supported
 F: hw/ppc/e500.[hc]
@@ -671,6 +631,7 @@ F: pc-bios/u-boot.e500

 mpc8544ds
 M: Alexander Graf <agraf@suse.de>
+M: Scott Wood <scottwood@freescale.com>
 L: qemu-ppc@nongnu.org
 S: Supported
 F: hw/ppc/mpc8544ds.c
@@ -697,13 +658,10 @@ F: hw/misc/macio/
 F: hw/intc/heathrow_pic.c

 PReP
-M: Hervé Poussineau <hpoussin@reactos.org>
 L: qemu-devel@nongnu.org
 L: qemu-ppc@nongnu.org
-S: Maintained
+S: Odd Fixes
 F: hw/ppc/prep.c
-F: hw/ppc/prep_systemio.c
-F: hw/ppc/rs6000_mc.c
 F: hw/pci-host/prep.[hc]
 F: hw/isa/pc87312.[hc]
 F: pc-bios/ppc_rom.bin
@@ -768,13 +726,6 @@ S: Maintained
 F: hw/sparc64/sun4u.c
 F: pc-bios/openbios-sparc64

-Sun4v
-M: Artyom Tarasenko <atar4qemu@gmail.com>
-S: Maintained
-F: hw/sparc64/sun4v.c
-F: hw/timer/sun4v-rtc.c
-F: include/hw/timer/sun4v-rtc.h
-
 Leon3
 M: Fabien Chouteau <chouteau@adacore.com>
 S: Maintained
@@ -785,12 +736,11 @@ F: include/hw/sparc/grlib.h
 S390 Machines
 -------------
 S390 Virtio-ccw
-M: Cornelia Huck <cohuck@redhat.com>
+M: Cornelia Huck <cornelia.huck@de.ibm.com>
 M: Christian Borntraeger <borntraeger@de.ibm.com>
 M: Alexander Graf <agraf@suse.de>
 S: Supported
 F: hw/char/sclp*.[hc]
-F: hw/char/terminal3270.c
 F: hw/s390x/
 F: include/hw/s390x/
 F: pc-bios/s390-ccw/
@@ -857,9 +807,7 @@ M: Eduardo Habkost <ehabkost@redhat.com>
 M: Marcel Apfelbaum <marcel@redhat.com>
 S: Supported
 F: hw/core/machine.c
-F: hw/core/null-machine.c
 F: include/hw/boards.h
-T: git git://github.com/ehabkost/qemu.git machine-next

 Xtensa Machines
 ---------------
@@ -937,8 +885,7 @@ F: hw/acpi/*
 F: hw/smbios/*
 F: hw/i386/acpi-build.[hc]
 F: hw/arm/virt-acpi-build.c
-F: tests/bios-tables-test.c
-F: tests/acpi-utils.[hc]
+F: include/hw/arm/virt-acpi-build.h

 ppc4xx
 M: Alexander Graf <agraf@suse.de>
@@ -949,6 +896,7 @@ F: include/hw/ppc/ppc4xx.h

 ppce500
 M: Alexander Graf <agraf@suse.de>
+M: Scott Wood <scottwood@freescale.com>
 L: qemu-ppc@nongnu.org
 S: Supported
 F: hw/ppc/e500*
@@ -971,9 +919,8 @@ SCSI
 M: Paolo Bonzini <pbonzini@redhat.com>
 S: Supported
 F: include/hw/scsi/*
-F: include/scsi/*
 F: hw/scsi/*
-F: util/scsi*
+F: tests/scsi-disk-test.c
 F: tests/virtio-scsi-test.c
 T: git git://github.com/bonzini/qemu.git scsi-next

@@ -1003,7 +950,6 @@ F: docs/usb2.txt
 F: docs/usb-storage.txt
 F: include/hw/usb.h
 F: include/hw/usb/
-F: default-configs/usb.mak

 USB (serial adapter)
 M: Gerd Hoffmann <kraxel@redhat.com>
@@ -1017,14 +963,6 @@ S: Supported
 F: hw/vfio/*
 F: include/hw/vfio/

-vfio-ccw
-M: Cornelia Huck <cohuck@redhat.com>
-S: Supported
-F: hw/vfio/ccw.c
-F: hw/s390x/s390-ccw.c
-F: include/hw/s390x/s390-ccw.h
-T: git git://github.com/cohuck/qemu.git s390-next
-
 vhost
 M: Michael S. Tsirkin <mst@redhat.com>
 S: Supported
@@ -1060,7 +998,7 @@ F: tests/virtio-blk-test.c
 T: git git://github.com/stefanha/qemu.git block

 virtio-ccw
-M: Cornelia Huck <cohuck@redhat.com>
+M: Cornelia Huck <cornelia.huck@de.ibm.com>
 M: Christian Borntraeger <borntraeger@de.ibm.com>
 S: Supported
 F: hw/s390x/virtio-ccw.[hc]
@@ -1074,7 +1012,7 @@ F: hw/input/virtio-input*.c
 F: include/hw/virtio/virtio-input.h

 virtio-serial
-M: Amit Shah <amit@kernel.org>
+M: Amit Shah <amit.shah@redhat.com>
 S: Supported
 F: hw/char/virtio-serial-bus.c
 F: hw/char/virtio-console.c
@@ -1083,7 +1021,7 @@ F: tests/virtio-console-test.c
 F: tests/virtio-serial-test.c

 virtio-rng
-M: Amit Shah <amit@kernel.org>
+M: Amit Shah <amit.shah@redhat.com>
 S: Supported
 F: hw/virtio/virtio-rng.c
 F: include/hw/virtio/virtio-rng.h
@@ -1106,12 +1044,11 @@ F: hw/block/nvme*
 F: tests/nvme-test.c

 megasas
-M: Hannes Reinecke <hare@suse.com>
+M: Hannes Reinecke <hare@suse.de>
 L: qemu-block@nongnu.org
 S: Supported
 F: hw/scsi/megasas.c
 F: hw/scsi/mfi.h
-F: tests/megasas-test.c

 Network packet abstractions
 M: Dmitry Fleytman <dmitry@daynix.com>
@@ -1135,7 +1072,7 @@ F: tests/rocker/
 F: docs/specs/rocker.txt

 NVDIMM
-M: Xiao Guangrong <xiaoguangrong.eric@gmail.com>
+M: Xiao Guangrong <guangrong.xiao@linux.intel.com>
 S: Maintained
 F: hw/acpi/nvdimm.c
 F: hw/mem/nvdimm.c
@@ -1164,22 +1101,6 @@ F: hw/nvram/chrp_nvram.c
 F: include/hw/nvram/chrp_nvram.h
 F: tests/prom-env-test.c

-VM Generation ID
-M: Ben Warren <ben@skyportsystems.com>
-S: Maintained
-F: hw/acpi/vmgenid.c
-F: include/hw/acpi/vmgenid.h
-F: docs/specs/vmgenid.txt
-F: tests/vmgenid-test.c
-F: stubs/vmgenid.c
-
-Unimplemented device
-M: Peter Maydell <peter.maydell@linaro.org>
-R: Philippe Mathieu-Daudé <f4bug@amsat.org>
-S: Maintained
-F: include/hw/misc/unimp.h
-F: hw/misc/unimp.c
-
 Subsystems
 ----------
 Audio
@@ -1204,7 +1125,6 @@ F: include/block/
 F: qemu-img*
 F: qemu-io*
 F: tests/qemu-iotests/
-F: util/qemu-progress.c
 T: git git://repo.or.cz/qemu/kevin.git block

 Block I/O path
@@ -1212,20 +1132,13 @@ M: Stefan Hajnoczi <stefanha@redhat.com>
 M: Fam Zheng <famz@redhat.com>
 L: qemu-block@nongnu.org
 S: Supported
-F: util/async.c
-F: util/aio-*.c
+F: async.c
+F: aio-*.c
 F: block/io.c
 F: migration/block*
 F: include/block/aio.h
 T: git git://github.com/stefanha/qemu.git block

-Block SCSI subsystem
-M: Paolo Bonzini <pbonzini@redhat.com>
-L: qemu-block@nongnu.org
-S: Supported
-F: include/scsi/*
-F: scsi/*
-
 Block Jobs
 M: Jeff Cody <jcody@redhat.com>
 L: qemu-block@nongnu.org
@@ -1244,7 +1157,6 @@ S: Supported
 F: blockdev.c
 F: block/qapi.c
 F: qapi/block*.json
-F: qapi/transaction.json
 T: git git://repo.or.cz/qemu/armbru.git block-next

 Dirty Bitmaps
@@ -1263,25 +1175,15 @@ T: git git://github.com/jnsnow/qemu.git bitmaps

 Character device backends
 M: Paolo Bonzini <pbonzini@redhat.com>
-M: Marc-André Lureau <marcandre.lureau@redhat.com>
 S: Maintained
-F: chardev/
-F: include/chardev/
-F: qapi/char.json
+F: qemu-char.c
+F: backends/msmouse.c
+F: backends/testdev.c

 Character Devices (Braille)
 M: Samuel Thibault <samuel.thibault@ens-lyon.org>
 S: Maintained
-F: chardev/baum.c
-
-Command line option argument parsing
-M: Markus Armbruster <armbru@redhat.com>
-S: Supported
-F: include/qemu/option.h
-F: tests/test-keyval.c
-F: tests/test-qemu-opts.c
-F: util/keyval.c
-F: util/qemu-option.c
+F: backends/baum.c

 Coverity model
 M: Markus Armbruster <armbru@redhat.com>
@@ -1334,14 +1236,12 @@ F: include/ui/spice-display.h
 F: ui/spice-*.c
 F: audio/spiceaudio.c
 F: hw/display/qxl*
-F: qapi/ui.json

 Graphics
 M: Gerd Hoffmann <kraxel@redhat.com>
 S: Odd Fixes
 F: ui/
 F: include/ui/
-F: qapi/ui.json

 Cocoa graphics
 M: Peter Maydell <peter.maydell@linaro.org>
@@ -1352,10 +1252,9 @@ Main loop
 M: Paolo Bonzini <pbonzini@redhat.com>
 S: Maintained
 F: cpus.c
-F: util/main-loop.c
-F: util/qemu-timer.c
+F: main-loop.c
+F: qemu-timer.c
 F: vl.c
-F: qapi/run-state.json

 Human Monitor (HMP)
 M: Dr. David Alan Gilbert <dgilbert@redhat.com>
@@ -1364,7 +1263,6 @@ F: monitor.c
 F: hmp.[ch]
 F: hmp-commands*.hx
 F: include/monitor/hmp-target.h
-F: tests/test-hmp.c

 Network device backends
 M: Jason Wang <jasowang@redhat.com>
@@ -1372,7 +1270,6 @@ S: Maintained
 F: net/
 F: include/net/
 T: git git://github.com/jasowang/qemu.git net
-F: qapi/net.json

 Netmap network backend
 M: Luigi Rizzo <rizzo@iet.unipi.it>
@@ -1382,12 +1279,21 @@ W: http://info.iet.unipi.it/~luigi/netmap/
 S: Maintained
 F: net/netmap.c

+Network Block Device (NBD)
+M: Paolo Bonzini <pbonzini@redhat.com>
+S: Odd Fixes
+F: block/nbd*
+F: nbd/
+F: include/block/nbd*
+F: qemu-nbd.c
+T: git git://github.com/bonzini/qemu.git nbd-next
+
 NUMA
 M: Eduardo Habkost <ehabkost@redhat.com>
 S: Maintained
 F: numa.c
 F: include/sysemu/numa.h
-T: git git://github.com/ehabkost/qemu.git machine-next
+T: git git://github.com/ehabkost/qemu.git numa

 Host Memory Backends
 M: Eduardo Habkost <ehabkost@redhat.com>
@@ -1395,7 +1301,6 @@ M: Igor Mammedov <imammedo@redhat.com>
 S: Maintained
 F: backends/hostmem*.c
 F: include/sysemu/hostmem.h
-T: git git://github.com/ehabkost/qemu.git machine-next

 Cryptodev Backends
 M: Gonglei <arei.gonglei@huawei.com>
@@ -1403,14 +1308,6 @@ S: Maintained
 F: include/sysemu/cryptodev*.h
 F: backends/cryptodev*.c

-Python scripts
-M: Eduardo Habkost <ehabkost@redhat.com>
-M: Cleber Rosa <crosa@redhat.com>
-S: Odd fixes
-F: scripts/qmp/*
-F: scripts/*.py
-F: tests/*.py
-
 QAPI
 M: Markus Armbruster <armbru@redhat.com>
 M: Michael Roth <mdroth@linux.vnet.ibm.com>
@@ -1422,9 +1319,7 @@ X: include/qapi/qmp/
 F: include/qapi/qmp/dispatch.h
 F: tests/qapi-schema/
 F: tests/test-*-visitor.c
-F: tests/test-qapi-*.c
 F: tests/test-qmp-*.c
-F: tests/test-visitor-serialization.c
 F: scripts/qapi*
 F: docs/qapi*
 T: git git://repo.or.cz/qemu/armbru.git qapi-next
@@ -1443,9 +1338,9 @@ S: Supported
 F: qobject/
 F: include/qapi/qmp/
 X: include/qapi/qmp/dispatch.h
-F: scripts/coccinelle/qobject.cocci
 F: tests/check-qdict.c
-F: tests/check-qnum.c
+F: tests/check-qfloat.c
+F: tests/check-qint.c
 F: tests/check-qjson.c
 F: tests/check-qlist.c
 F: tests/check-qstring.c
@@ -1467,6 +1362,7 @@ F: qom/
 X: qom/cpu.c
 F: tests/check-qom-interface.c
 F: tests/check-qom-proplist.c
+F: tests/check-qom-props.c
 F: tests/qom-test.c

 QMP
@@ -1476,7 +1372,6 @@ F: qmp.c
 F: monitor.c
 F: docs/*qmp-*
 F: scripts/qmp/
-F: tests/qmp-test.c
 T: git git://repo.or.cz/qemu/armbru.git qapi-next

 Register API
@@ -1508,31 +1403,22 @@ F: scripts/tracetool/
 F: docs/tracing.txt
 T: git git://github.com/stefanha/qemu.git tracing

-TPM
-S: Orphan
-F: tpm.c
-F: hw/tpm/*
-F: include/hw/acpi/tpm.h
-F: include/sysemu/tpm*
-F: qapi/tpm.json
-
 Checkpatch
 S: Odd Fixes
 F: scripts/checkpatch.pl

 Migration
 M: Juan Quintela <quintela@redhat.com>
-M: Dr. David Alan Gilbert <dgilbert@redhat.com>
+M: Amit Shah <amit.shah@redhat.com>
 S: Maintained
 F: include/migration/
 F: migration/
 F: scripts/vmstate-static-checker.py
 F: tests/vmstate-static-checker-data/
 F: docs/migration.txt
-F: qapi/migration.json

 Seccomp
-M: Eduardo Otubo <otubo@redhat.com>
+M: Eduardo Otubo <eduardo.otubo@profitbricks.com>
 S: Supported
 F: qemu-seccomp.c
 F: include/sysemu/seccomp.h
@@ -1543,7 +1429,6 @@ S: Maintained
 F: crypto/
 F: include/crypto/
 F: tests/test-crypto-*
-F: qemu.sasl

 Coroutines
 M: Stefan Hajnoczi <stefanha@redhat.com>
@@ -1572,7 +1457,6 @@ M: Paolo Bonzini <pbonzini@redhat.com>
 S: Maintained
 F: include/qemu/sockets.h
 F: util/qemu-sockets.c
-F: qapi/sockets.json

 Throttling infrastructure
 M: Alberto Garcia <berto@igalia.com>
@@ -1599,7 +1483,7 @@ F: include/migration/failover.h
 F: docs/COLO-FT.txt

 COLO Proxy
-M: Zhang Chen <zhangckid@gmail.com>
+M: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
 M: Li Zhijian <lizhijian@cn.fujitsu.com>
 S: Supported
 F: docs/colo-proxy.txt
@@ -1607,25 +1491,13 @@ F: net/colo*
 F: net/filter-rewriter.c
 F: net/filter-mirror.c

-Record/replay
-M: Pavel Dovgalyuk <pavel.dovgaluk@ispras.ru>
-R: Paolo Bonzini <pbonzini@redhat.com>
-W: http://wiki.qemu.org/Features/record-replay
-S: Supported
-F: replay/*
-F: block/blkreplay.c
-F: net/filter-replay.c
-F: include/sysemu/replay.h
-F: docs/replay.txt
-F: stubs/replay.c
-
 Usermode Emulation
 ------------------
 Overall
 M: Riku Voipio <riku.voipio@iki.fi>
 S: Maintained
 F: thunk.c
-F: accel/tcg/user-exec*.c
+F: user-exec.c

 BSD user
 S: Orphan
@@ -1634,7 +1506,6 @@ F: default-configs/*-bsd-user.mak

 Linux user
 M: Riku Voipio <riku.voipio@iki.fi>
-R: Laurent Vivier <laurent@vivier.eu>
 S: Maintained
 F: linux-user/
 F: default-configs/*-linux-user.mak
@@ -1668,6 +1539,12 @@ S: Maintained
 F: tcg/i386/
 F: disas/i386.c

+IA64 target
+M: Aurelien Jarno <aurelien@aurel32.net>
+S: Maintained
+F: tcg/ia64/
+F: disas/ia64.c
+
 MIPS target
 M: Aurelien Jarno <aurelien@aurel32.net>
 S: Maintained
@@ -1696,7 +1573,7 @@ TCI target
 M: Stefan Weil <sw@weilnetz.de>
 S: Maintained
 F: tcg/tci/
-F: tcg/tci.c
+F: tci.c
 F: disas/tci.c

 Block drivers
@@ -1745,19 +1622,6 @@ M: Peter Lieven <pl@kamp.de>
 L: qemu-block@nongnu.org
 S: Supported
 F: block/iscsi.c
-F: block/iscsi-opts.c
-
-Network Block Device (NBD)
-M: Eric Blake <eblake@redhat.com>
-M: Paolo Bonzini <pbonzini@redhat.com>
-L: qemu-block@nongnu.org
-S: Maintained
-F: block/nbd*
-F: nbd/
-F: include/block/nbd*
-F: qemu-nbd.*
-F: blockdev-nbd.c
-T: git git://repo.or.cz/qemu/ericb.git nbd

 NFS
 M: Jeff Cody <jcody@redhat.com>
@@ -1775,6 +1639,14 @@ S: Supported
 F: block/ssh.c
 T: git git://github.com/codyprime/qemu-kvm-jtc.git block

+ARCHIPELAGO
+M: Chrysostomos Nanakos <chris@include.gr>
+M: Jeff Cody <jcody@redhat.com>
+L: qemu-block@nongnu.org
+S: Maintained
+F: block/archipelago.c
+T: git git://github.com/codyprime/qemu-kvm-jtc.git block
+
 CURL
 M: Jeff Cody <jcody@redhat.com>
 L: qemu-block@nongnu.org
@@ -1850,9 +1722,9 @@ L: qemu-block@nongnu.org
 S: Supported
 F: block/linux-aio.c
 F: include/block/raw-aio.h
-F: block/raw-format.c
-F: block/file-posix.c
-F: block/file-win32.c
+F: block/raw-posix.c
+F: block/raw-win32.c
+F: block/raw_bsd.c
 F: block/win32-aio.c

 qcow2
@@ -1894,8 +1766,8 @@ S: Supported
 F: tests/image-fuzzer/

 Replication
-M: Wen Congyang <wencongyang2@huawei.com>
-M: Xie Changlong <xiechanglong.d@gmail.com>
+M: Wen Congyang <wency@cn.fujitsu.com>
+M: Changlong Xie <xiecl.fnst@cn.fujitsu.com>
 S: Supported
 F: replication*
 F: block/replication.c
@@ -1905,16 +1777,9 @@ F: docs/block-replication.txt
 Build and test automation
 -------------------------
 M: Alex Bennée <alex.bennee@linaro.org>
-M: Fam Zheng <famz@redhat.com>
-R: Philippe Mathieu-Daudé <f4bug@amsat.org>
 L: qemu-devel@nongnu.org
-S: Maintained
+S: Supported
 F: .travis.yml
-F: .shippable.yml
-F: tests/docker/
-W: https://travis-ci.org/qemu/qemu
-W: https://app.shippable.com/github/qemu/qemu
-W: http://patchew.org/QEMU/

 Documentation
 -------------
@@ -1923,3 +1788,9 @@ M: Daniel P. Berrange <berrange@redhat.com>
 S: Odd Fixes
 F: docs/build-system.txt

+Docker testing
+--------------
+Docker based testing framework and cases
+M: Fam Zheng <famz@redhat.com>
+S: Maintained
+F: tests/docker/
--- a/364
+++ b/364
@@ -26,7 +26,6 @@ endif

 CONFIG_SOFTMMU := $(if $(filter %-softmmu,$(TARGET_DIRS)),y)
 CONFIG_USER_ONLY := $(if $(filter %-user,$(TARGET_DIRS)),y)
-CONFIG_XEN := $(CONFIG_XEN_BACKEND)
 CONFIG_ALL=y
 -include config-all-devices.mak
 -include config-all-disas.mak
@@ -51,153 +50,38 @@ endif

 include $(SRC_PATH)/rules.mak

-GENERATED_FILES = qemu-version.h config-host.h qemu-options.def
-GENERATED_FILES += qmp-commands.h qapi-types.h qapi-visit.h qapi-event.h
-GENERATED_FILES += qmp-marshal.c qapi-types.c qapi-visit.c qapi-event.c
-GENERATED_FILES += qmp-introspect.h
-GENERATED_FILES += qmp-introspect.c
+GENERATED_HEADERS = qemu-version.h config-host.h qemu-options.def
+GENERATED_HEADERS += qmp-commands.h qapi-types.h qapi-visit.h qapi-event.h
+GENERATED_SOURCES += qmp-marshal.c qapi-types.c qapi-visit.c qapi-event.c
+GENERATED_HEADERS += qmp-introspect.h
+GENERATED_SOURCES += qmp-introspect.c

-GENERATED_FILES += trace/generated-tcg-tracers.h
+GENERATED_HEADERS += trace/generated-tracers.h
+ifeq ($(findstring dtrace,$(TRACE_BACKENDS)),dtrace)
+GENERATED_HEADERS += trace/generated-tracers-dtrace.h
+endif
+GENERATED_SOURCES += trace/generated-tracers.c

-GENERATED_FILES += trace/generated-helpers-wrappers.h
-GENERATED_FILES += trace/generated-helpers.h
-GENERATED_FILES += trace/generated-helpers.c
+GENERATED_HEADERS += trace/generated-tcg-tracers.h

-ifdef CONFIG_TRACE_UST
-GENERATED_FILES += trace-ust-all.h
-GENERATED_FILES += trace-ust-all.c
+GENERATED_HEADERS += trace/generated-helpers-wrappers.h
+GENERATED_HEADERS += trace/generated-helpers.h
+GENERATED_SOURCES += trace/generated-helpers.c
+
+ifeq ($(findstring ust,$(TRACE_BACKENDS)),ust)
+GENERATED_HEADERS += trace/generated-ust-provider.h
+GENERATED_SOURCES += trace/generated-ust.c
 endif

-GENERATED_FILES += module_block.h
-
-TRACE_HEADERS = trace-root.h $(trace-events-subdirs:%=%/trace.h)
-TRACE_SOURCES = trace-root.c $(trace-events-subdirs:%=%/trace.c)
-TRACE_DTRACE =
-ifdef CONFIG_TRACE_DTRACE
-TRACE_HEADERS += trace-dtrace-root.h $(trace-events-subdirs:%=%/trace-dtrace.h)
-TRACE_DTRACE += trace-dtrace-root.dtrace $(trace-events-subdirs:%=%/trace-dtrace.dtrace)
-endif
-ifdef CONFIG_TRACE_UST
-TRACE_HEADERS += trace-ust-root.h $(trace-events-subdirs:%=%/trace-ust.h)
-endif
-
-GENERATED_FILES += $(TRACE_HEADERS)
-GENERATED_FILES += $(TRACE_SOURCES)
-GENERATED_FILES += $(BUILD_DIR)/trace-events-all
-
-trace-group-name = $(shell dirname $1 | sed -e 's/[^a-zA-Z0-9]/_/g')
-
-tracetool-y = $(SRC_PATH)/scripts/tracetool.py
-tracetool-y += $(shell find $(SRC_PATH)/scripts/tracetool -name "*.py")
-
-%/trace.h: %/trace.h-timestamp
-	@cmp $< $@ >/dev/null 2>&1 || cp $< $@
-%/trace.h-timestamp: $(SRC_PATH)/%/trace-events $(tracetool-y)
-	$(call quiet-command,$(TRACETOOL) \
-		--group=$(call trace-group-name,$@) \
-		--format=h \
-		--backends=$(TRACE_BACKENDS) \
-		$< > $@,"GEN","$(@:%-timestamp=%)")
-
-%/trace.c: %/trace.c-timestamp
-	@cmp $< $@ >/dev/null 2>&1 || cp $< $@
-%/trace.c-timestamp: $(SRC_PATH)/%/trace-events $(tracetool-y)
-	$(call quiet-command,$(TRACETOOL) \
-		--group=$(call trace-group-name,$@) \
-		--format=c \
-		--backends=$(TRACE_BACKENDS) \
-		$< > $@,"GEN","$(@:%-timestamp=%)")
-
-%/trace-ust.h: %/trace-ust.h-timestamp
-	@cmp $< $@ >/dev/null 2>&1 || cp $< $@
-%/trace-ust.h-timestamp: $(SRC_PATH)/%/trace-events $(tracetool-y)
-	$(call quiet-command,$(TRACETOOL) \
-		--group=$(call trace-group-name,$@) \
-		--format=ust-events-h \
-		--backends=$(TRACE_BACKENDS) \
-		$< > $@,"GEN","$(@:%-timestamp=%)")
-
-%/trace-dtrace.dtrace: %/trace-dtrace.dtrace-timestamp
-	@cmp $< $@ >/dev/null 2>&1 || cp $< $@
-%/trace-dtrace.dtrace-timestamp: $(SRC_PATH)/%/trace-events $(BUILD_DIR)/config-host.mak $(tracetool-y)
-	$(call quiet-command,$(TRACETOOL) \
-		--group=$(call trace-group-name,$@) \
-		--format=d \
-		--backends=$(TRACE_BACKENDS) \
-		$< > $@,"GEN","$(@:%-timestamp=%)")
-
-%/trace-dtrace.h: %/trace-dtrace.dtrace $(tracetool-y)
-	$(call quiet-command,dtrace -o $@ -h -s $<, "GEN","$@")
-
-%/trace-dtrace.o: %/trace-dtrace.dtrace $(tracetool-y)
-
-
-trace-root.h: trace-root.h-timestamp
-	@cmp $< $@ >/dev/null 2>&1 || cp $< $@
-trace-root.h-timestamp: $(SRC_PATH)/trace-events $(tracetool-y)
-	$(call quiet-command,$(TRACETOOL) \
-		--group=root \
-		--format=h \
-		--backends=$(TRACE_BACKENDS) \
-		$< > $@,"GEN","$(@:%-timestamp=%)")
-
-trace-root.c: trace-root.c-timestamp
-	@cmp $< $@ >/dev/null 2>&1 || cp $< $@
-trace-root.c-timestamp: $(SRC_PATH)/trace-events $(tracetool-y)
-	$(call quiet-command,$(TRACETOOL) \
-		--group=root \
-		--format=c \
-		--backends=$(TRACE_BACKENDS) \
-		$< > $@,"GEN","$(@:%-timestamp=%)")
-
-trace-ust-root.h: trace-ust-root.h-timestamp
-	@cmp $< $@ >/dev/null 2>&1 || cp $< $@
-trace-ust-root.h-timestamp: $(SRC_PATH)/trace-events $(tracetool-y)
-	$(call quiet-command,$(TRACETOOL) \
-		--group=root \
-		--format=ust-events-h \
-		--backends=$(TRACE_BACKENDS) \
-		$< > $@,"GEN","$(@:%-timestamp=%)")
-
-trace-ust-all.h: trace-ust-all.h-timestamp
-	@cmp $< $@ >/dev/null 2>&1 || cp $< $@
-trace-ust-all.h-timestamp: $(trace-events-files) $(tracetool-y)
-	$(call quiet-command,$(TRACETOOL) \
-		--group=all \
-		--format=ust-events-h \
-		--backends=$(TRACE_BACKENDS) \
-		$(trace-events-files) > $@,"GEN","$(@:%-timestamp=%)")
-
-trace-ust-all.c: trace-ust-all.c-timestamp
-	@cmp $< $@ >/dev/null 2>&1 || cp $< $@
-trace-ust-all.c-timestamp: $(trace-events-files) $(tracetool-y)
-	$(call quiet-command,$(TRACETOOL) \
-		--group=all \
-		--format=ust-events-c \
-		--backends=$(TRACE_BACKENDS) \
-		$(trace-events-files) > $@,"GEN","$(@:%-timestamp=%)")
-
-trace-dtrace-root.dtrace: trace-dtrace-root.dtrace-timestamp
-	@cmp $< $@ >/dev/null 2>&1 || cp $< $@
-trace-dtrace-root.dtrace-timestamp: $(SRC_PATH)/trace-events $(BUILD_DIR)/config-host.mak $(tracetool-y)
-	$(call quiet-command,$(TRACETOOL) \
-		--group=root \
-		--format=d \
-		--backends=$(TRACE_BACKENDS) \
-		$< > $@,"GEN","$(@:%-timestamp=%)")
-
-trace-dtrace-root.h: trace-dtrace-root.dtrace
-	$(call quiet-command,dtrace -o $@ -h -s $<, "GEN","$@")
-
-trace-dtrace-root.o: trace-dtrace-root.dtrace
+GENERATED_HEADERS += module_block.h

 # Don't try to regenerate Makefile or configure
 # We don't generate any of them
 Makefile: ;
 configure: ;

-.PHONY: all clean cscope distclean html info install install-doc \
-	pdf txt recurse-all speed test dist msi FORCE
+.PHONY: all clean cscope distclean dvi html info install install-doc \
+	pdf recurse-all speed test dist msi FORCE

 $(call set-vpath, $(SRC_PATH))

@@ -206,9 +90,7 @@ LIBS+=-lz $(LIBS_TOOLS)
 HELPERS-$(CONFIG_LINUX) = qemu-bridge-helper$(EXESUF)

 ifdef BUILD_DOCS
-DOCS=qemu-doc.html qemu-doc.txt qemu.1 qemu-img.1 qemu-nbd.8 qemu-ga.8
-DOCS+=docs/interop/qemu-qmp-ref.html docs/interop/qemu-qmp-ref.txt docs/interop/qemu-qmp-ref.7
-DOCS+=docs/interop/qemu-ga-ref.html docs/interop/qemu-ga-ref.txt docs/interop/qemu-ga-ref.7
+DOCS=qemu-doc.html qemu.1 qemu-img.1 qemu-nbd.8 qemu-ga.8
 ifdef CONFIG_VIRTFS
 DOCS+=fsdev/virtfs-proxy-helper.1
 endif
@@ -263,13 +145,10 @@ endif

 dummy := $(call unnest-vars,, \
                stub-obj-y \
-                chardev-obj-y \
                util-obj-y \
                qga-obj-y \
                ivshmem-client-obj-y \
                ivshmem-server-obj-y \
-                libvhost-user-obj-y \
-                vhost-user-scsi-obj-y \
                qga-vss-dll-obj-y \
                block-obj-y \
                block-obj-m \
@@ -278,10 +157,11 @@ dummy := $(call unnest-vars,, \
                qom-obj-y \
                io-obj-y \
                common-obj-y \
-                common-obj-m \
-                trace-obj-y)
+                common-obj-m)

+ifneq ($(wildcard config-host.mak),)
 include $(SRC_PATH)/tests/Makefile.include
+endif

 all: $(DOCS) $(TOOLS) $(HELPERS-y) recurse-all modules

@@ -303,11 +183,7 @@ qemu-version.h: FORCE
 				printf '""\n'; \
 			fi; \
 		fi) > $@.tmp)
-	$(call quiet-command, if ! cmp -s $@ $@.tmp; then \
-	  mv $@.tmp $@; \
-	 else \
-	  rm $@.tmp; \
-	 fi)
+	$(call quiet-command, cmp -s $@ $@.tmp || mv $@.tmp $@)

 config-host.h: config-host.h-timestamp
 config-host.h-timestamp: config-host.mak
@@ -325,6 +201,15 @@ $(SOFTMMU_SUBDIR_RULES): config-all-devices.mak
 subdir-%:
 	$(call quiet-command,$(MAKE) $(SUBDIR_MAKEFLAGS) -C $* V="$(V)" TARGET_DIR="$*/" all,)

+subdir-pixman: pixman/Makefile
+	$(call quiet-command,$(MAKE) $(SUBDIR_MAKEFLAGS) -C pixman V="$(V)" all,)
+
+pixman/Makefile: $(SRC_PATH)/pixman/configure
+	(cd pixman; CFLAGS="$(CFLAGS) -fPIC $(extra_cflags) $(extra_ldflags)" $(SRC_PATH)/pixman/configure $(AUTOCONF_HOST) --disable-gtk --disable-shared --enable-static)
+
+$(SRC_PATH)/pixman/configure:
+	(cd $(SRC_PATH)/pixman; autoreconf -v --install)
+
 DTC_MAKE_ARGS=-I$(SRC_PATH)/dtc VPATH=$(SRC_PATH)/dtc -C dtc V="$(V)" LIBFDT_srcdir=$(SRC_PATH)/dtc/libfdt
 DTC_CFLAGS=$(CFLAGS) $(QEMU_CFLAGS)
 DTC_CPPFLAGS=-I$(BUILD_DIR)/dtc -I$(SRC_PATH)/dtc -I$(SRC_PATH)/dtc/libfdt
@@ -335,8 +220,7 @@ subdir-dtc:dtc/libfdt dtc/tests
 dtc/%:
 	mkdir -p $@

-$(SUBDIR_RULES): libqemuutil.a $(common-obj-y) $(chardev-obj-y) \
-	$(qom-obj-y) $(crypto-aes-obj-$(CONFIG_USER_ONLY))
+$(SUBDIR_RULES): libqemuutil.a libqemustub.a $(common-obj-y) $(qom-obj-y) $(crypto-aes-obj-$(CONFIG_USER_ONLY))

 ROMSUBDIR_RULES=$(patsubst %,romsubdir-%, $(ROMS))
 # Only keep -O and -g cflags
@@ -347,29 +231,30 @@ ALL_SUBDIRS=$(TARGET_DIRS) $(patsubst %,pc-bios/%, $(ROMS))

 recurse-all: $(SUBDIR_RULES) $(ROMSUBDIR_RULES)

-$(BUILD_DIR)/version.o: $(SRC_PATH)/version.rc config-host.h
+$(BUILD_DIR)/version.o: $(SRC_PATH)/version.rc config-host.h | $(BUILD_DIR)/version.lo
 	$(call quiet-command,$(WINDRES) -I$(BUILD_DIR) -o $@ $<,"RC","version.o")
+$(BUILD_DIR)/version.lo: $(SRC_PATH)/version.rc config-host.h
+	$(call quiet-command,$(WINDRES) -I$(BUILD_DIR) -o $@ $<,"RC","version.lo")

-Makefile: $(version-obj-y)
+Makefile: $(version-obj-y) $(version-lobj-y)

 ######################################################################
 # Build libraries

-libqemuutil.a: $(util-obj-y) $(trace-obj-y) $(stub-obj-y)
+libqemustub.a: $(stub-obj-y)
+libqemuutil.a: $(util-obj-y)

 ######################################################################

-COMMON_LDADDS = libqemuutil.a
-
 qemu-img.o: qemu-img-cmds.h

-qemu-img$(EXESUF): qemu-img.o $(block-obj-y) $(crypto-obj-y) $(io-obj-y) $(qom-obj-y) $(COMMON_LDADDS)
-qemu-nbd$(EXESUF): qemu-nbd.o $(block-obj-y) $(crypto-obj-y) $(io-obj-y) $(qom-obj-y) $(COMMON_LDADDS)
-qemu-io$(EXESUF): qemu-io.o $(block-obj-y) $(crypto-obj-y) $(io-obj-y) $(qom-obj-y) $(COMMON_LDADDS)
+qemu-img$(EXESUF): qemu-img.o $(block-obj-y) $(crypto-obj-y) $(io-obj-y) $(qom-obj-y) libqemuutil.a libqemustub.a
+qemu-nbd$(EXESUF): qemu-nbd.o $(block-obj-y) $(crypto-obj-y) $(io-obj-y) $(qom-obj-y) libqemuutil.a libqemustub.a
+qemu-io$(EXESUF): qemu-io.o $(block-obj-y) $(crypto-obj-y) $(io-obj-y) $(qom-obj-y) libqemuutil.a libqemustub.a

-qemu-bridge-helper$(EXESUF): qemu-bridge-helper.o $(COMMON_LDADDS)
+qemu-bridge-helper$(EXESUF): qemu-bridge-helper.o libqemuutil.a libqemustub.a

-fsdev/virtfs-proxy-helper$(EXESUF): fsdev/virtfs-proxy-helper.o fsdev/9p-marshal.o fsdev/9p-iov-marshal.o $(COMMON_LDADDS)
+fsdev/virtfs-proxy-helper$(EXESUF): fsdev/virtfs-proxy-helper.o fsdev/9p-marshal.o fsdev/9p-iov-marshal.o libqemuutil.a libqemustub.a
 fsdev/virtfs-proxy-helper$(EXESUF): LIBS += -lcap

 qemu-img-cmds.h: $(SRC_PATH)/qemu-img-cmds.hx $(SRC_PATH)/scripts/hxtool
@@ -400,18 +285,9 @@ $(SRC_PATH)/qga/qapi-schema.json $(SRC_PATH)/scripts/qapi-commands.py $(qapi-py)

 qapi-modules = $(SRC_PATH)/qapi-schema.json $(SRC_PATH)/qapi/common.json \
               $(SRC_PATH)/qapi/block.json $(SRC_PATH)/qapi/block-core.json \
-               $(SRC_PATH)/qapi/char.json \
-               $(SRC_PATH)/qapi/crypto.json \
-               $(SRC_PATH)/qapi/introspect.json \
-               $(SRC_PATH)/qapi/migration.json \
-               $(SRC_PATH)/qapi/net.json \
-               $(SRC_PATH)/qapi/rocker.json \
-               $(SRC_PATH)/qapi/run-state.json \
-               $(SRC_PATH)/qapi/sockets.json \
-               $(SRC_PATH)/qapi/tpm.json \
-               $(SRC_PATH)/qapi/trace.json \
-               $(SRC_PATH)/qapi/transaction.json \
-               $(SRC_PATH)/qapi/ui.json
+               $(SRC_PATH)/qapi/event.json $(SRC_PATH)/qapi/introspect.json \
+               $(SRC_PATH)/qapi/crypto.json $(SRC_PATH)/qapi/rocker.json \
+               $(SRC_PATH)/qapi/trace.json

 qapi-types.c qapi-types.h :\
 $(qapi-modules) $(SRC_PATH)/scripts/qapi-types.py $(qapi-py)
@@ -440,9 +316,9 @@ $(qapi-modules) $(SRC_PATH)/scripts/qapi-introspect.py $(qapi-py)
 		"GEN","$@")

 QGALIB_GEN=$(addprefix qga/qapi-generated/, qga-qapi-types.h qga-qapi-visit.h qga-qmp-commands.h)
-$(qga-obj-y): $(QGALIB_GEN)
+$(qga-obj-y) qemu-ga.o: $(QGALIB_GEN)

-qemu-ga$(EXESUF): $(qga-obj-y) $(COMMON_LDADDS)
+qemu-ga$(EXESUF): $(qga-obj-y) libqemuutil.a libqemustub.a
 	$(call LINK, $^)

 ifdef QEMU_GA_MSI_ENABLED
@@ -467,13 +343,9 @@ ifneq ($(EXESUF),)
 qemu-ga: qemu-ga$(EXESUF) $(QGA_VSS_PROVIDER) $(QEMU_GA_MSI)
 endif

-ifdef CONFIG_IVSHMEM
-ivshmem-client$(EXESUF): $(ivshmem-client-obj-y) $(COMMON_LDADDS)
+ivshmem-client$(EXESUF): $(ivshmem-client-obj-y) libqemuutil.a libqemustub.a
 	$(call LINK, $^)
-ivshmem-server$(EXESUF): $(ivshmem-server-obj-y) $(COMMON_LDADDS)
-	$(call LINK, $^)
-endif
-vhost-user-scsi$(EXESUF): $(vhost-user-scsi-obj-y)
+ivshmem-server$(EXESUF): $(ivshmem-server-obj-y) libqemuutil.a libqemustub.a
 	$(call LINK, $^)

 module_block.h: $(SRC_PATH)/scripts/modules/module_block.py config-host.mak
@@ -486,15 +358,17 @@ clean:
 	rm -f config.mak op-i386.h opc-i386.h gen-op-i386.h op-arm.h opc-arm.h gen-op-arm.h
 	rm -f qemu-options.def
 	rm -f *.msi
-	find . \( -name '*.so' -o -name '*.dll' -o -name '*.mo' -o -name '*.[oda]' \) -type f -exec rm {} +
+	find . \( -name '*.l[oa]' -o -name '*.so' -o -name '*.dll' -o -name '*.mo' -o -name '*.[oda]' \) -type f -exec rm {} +
 	rm -f $(filter-out %.tlb,$(TOOLS)) $(HELPERS-y) qemu-ga TAGS cscope.* *.pod *~ */*~
 	rm -f fsdev/*.pod
+	rm -rf .libs */.libs
 	rm -f qemu-img-cmds.h
 	rm -f ui/shader/*-vert.h ui/shader/*-frag.h
-	@# May not be present in GENERATED_FILES
+	@# May not be present in GENERATED_HEADERS
 	rm -f trace/generated-tracers-dtrace.dtrace*
 	rm -f trace/generated-tracers-dtrace.h*
-	rm -f $(foreach f,$(GENERATED_FILES),$(f) $(f)-timestamp)
+	rm -f $(foreach f,$(GENERATED_HEADERS),$(f) $(f)-timestamp)
+	rm -f $(foreach f,$(GENERATED_SOURCES),$(f) $(f)-timestamp)
 	rm -rf qapi-generated
 	rm -rf qga/qapi-generated
 	for d in $(ALL_SUBDIRS); do \
@@ -515,22 +389,17 @@ distclean: clean
 	rm -f config-all-devices.mak config-all-disas.mak config.status
 	rm -f po/*.mo tests/qemu-iotests/common.env
 	rm -f roms/seabios/config.mak roms/vgabios/config.mak
-	rm -f qemu-doc.info qemu-doc.aux qemu-doc.cp qemu-doc.cps
+	rm -f qemu-doc.info qemu-doc.aux qemu-doc.cp qemu-doc.cps qemu-doc.dvi
 	rm -f qemu-doc.fn qemu-doc.fns qemu-doc.info qemu-doc.ky qemu-doc.kys
 	rm -f qemu-doc.log qemu-doc.pdf qemu-doc.pg qemu-doc.toc qemu-doc.tp
-	rm -f qemu-doc.vr qemu-doc.txt
+	rm -f qemu-doc.vr
 	rm -f config.log
 	rm -f linux-headers/asm
-	rm -f docs/version.texi
-	rm -f docs/interop/qemu-ga-qapi.texi docs/interop/qemu-qmp-qapi.texi
-	rm -f docs/interop/qemu-qmp-ref.7 docs/interop/qemu-ga-ref.7
-	rm -f docs/interop/qemu-qmp-ref.txt docs/interop/qemu-ga-ref.txt
-	rm -f docs/interop/qemu-qmp-ref.pdf docs/interop/qemu-ga-ref.pdf
-	rm -f docs/interop/qemu-qmp-ref.html docs/interop/qemu-ga-ref.html
 	for d in $(TARGET_DIRS); do \
 	rm -rf $$d || exit 1 ; \
        done
 	rm -Rf .sdk
+	if test -f pixman/config.log; then $(MAKE) -C pixman distclean; fi
 	if test -f dtc/version_gen.h; then $(MAKE) $(DTC_MAKE_ARGS) clean; fi

 KEYMAPS=da     en-gb  et  fr     fr-ch  is  lt  modifiers  no  pt-br  sv \
@@ -551,11 +420,10 @@ efi-e1000e.rom efi-vmxnet3.rom \
 qemu-icon.bmp qemu_logo_no_text.svg \
 bamboo.dtb petalogix-s3adsp1800.dtb petalogix-ml605.dtb \
 multiboot.bin linuxboot.bin linuxboot_dma.bin kvmvapic.bin \
-s390-ccw.img s390-netboot.img \
+s390-ccw.img \
 spapr-rtas.bin slof.bin skiboot.lid \
 palcode-clipper \
-u-boot.e500 \
-qemu_vga.ndrv
+u-boot.e500
 else
 BLOBS=
 endif
@@ -563,14 +431,10 @@ endif
 install-doc: $(DOCS)
 	$(INSTALL_DIR) "$(DESTDIR)$(qemu_docdir)"
 	$(INSTALL_DATA) qemu-doc.html "$(DESTDIR)$(qemu_docdir)"
-	$(INSTALL_DATA) qemu-doc.txt "$(DESTDIR)$(qemu_docdir)"
-	$(INSTALL_DATA) docs/interop/qemu-qmp-ref.html "$(DESTDIR)$(qemu_docdir)"
-	$(INSTALL_DATA) docs/interop/qemu-qmp-ref.txt "$(DESTDIR)$(qemu_docdir)"
+	$(INSTALL_DATA) $(SRC_PATH)/docs/qmp-commands.txt "$(DESTDIR)$(qemu_docdir)"
 ifdef CONFIG_POSIX
 	$(INSTALL_DIR) "$(DESTDIR)$(mandir)/man1"
 	$(INSTALL_DATA) qemu.1 "$(DESTDIR)$(mandir)/man1"
-	$(INSTALL_DIR) "$(DESTDIR)$(mandir)/man7"
-	$(INSTALL_DATA) docs/interop/qemu-qmp-ref.7 "$(DESTDIR)$(mandir)/man7"
 ifneq ($(TOOLS),)
 	$(INSTALL_DATA) qemu-img.1 "$(DESTDIR)$(mandir)/man1"
 	$(INSTALL_DIR) "$(DESTDIR)$(mandir)/man8"
@@ -578,9 +442,6 @@ ifneq ($(TOOLS),)
 endif
 ifneq (,$(findstring qemu-ga,$(TOOLS)))
 	$(INSTALL_DATA) qemu-ga.8 "$(DESTDIR)$(mandir)/man8"
-	$(INSTALL_DATA) docs/interop/qemu-ga-ref.html "$(DESTDIR)$(qemu_docdir)"
-	$(INSTALL_DATA) docs/interop/qemu-ga-ref.txt "$(DESTDIR)$(qemu_docdir)"
-	$(INSTALL_DATA) docs/interop/qemu-ga-ref.7 "$(DESTDIR)$(mandir)/man7"
 endif
 endif
 ifdef CONFIG_VIRTFS
@@ -599,7 +460,8 @@ endif
 endif


-install: all $(if $(BUILD_DOCS),install-doc) install-datadir install-localstatedir
+install: all $(if $(BUILD_DOCS),install-doc) \
+install-datadir install-localstatedir
 ifneq ($(TOOLS),)
 	$(call install-prog,$(subst qemu-ga,qemu-ga$(EXESUF),$(TOOLS)),$(DESTDIR)$(bindir))
 endif
@@ -668,27 +530,20 @@ ui/console-gl.o: $(SRC_PATH)/ui/console-gl.c \

 # documentation
 MAKEINFO=makeinfo
-MAKEINFOINCLUDES= -I docs -I $(<D) -I $(@D)
-MAKEINFOFLAGS=--no-split --number-sections $(MAKEINFOINCLUDES)
-TEXI2PODFLAGS=$(MAKEINFOINCLUDES) "-DVERSION=$(VERSION)"
-TEXI2PDFFLAGS=$(if $(V),,--quiet) -I $(SRC_PATH) $(MAKEINFOINCLUDES)
+MAKEINFOFLAGS=--no-headers --no-split --number-sections
+TEXIFLAG=$(if $(V),,--quiet)
+%.dvi: %.texi
+	$(call quiet-command,texi2dvi $(TEXIFLAG) -I . $<,"GEN","$@")

-docs/version.texi: $(SRC_PATH)/VERSION
-	$(call quiet-command,echo "@set VERSION $(VERSION)" > $@,"GEN","$@")
+%.html: %.texi
+	$(call quiet-command,LC_ALL=C $(MAKEINFO) $(MAKEINFOFLAGS) --html $< -o $@, \
+	"GEN","$@")

-%.html: %.texi docs/version.texi
-	$(call quiet-command,LC_ALL=C $(MAKEINFO) $(MAKEINFOFLAGS) --no-headers \
-	--html $< -o $@,"GEN","$@")
+%.info: %.texi
+	$(call quiet-command,$(MAKEINFO) $< -o $@,"GEN","$@")

-%.info: %.texi docs/version.texi
-	$(call quiet-command,$(MAKEINFO) $(MAKEINFOFLAGS) $< -o $@,"GEN","$@")
-
-%.txt: %.texi docs/version.texi
-	$(call quiet-command,LC_ALL=C $(MAKEINFO) $(MAKEINFOFLAGS) --no-headers \
-	--plaintext $< -o $@,"GEN","$@")
-
-%.pdf: %.texi docs/version.texi
-	$(call quiet-command,texi2pdf $(TEXI2PDFFLAGS) $< -o $@,"GEN","$@")
+%.pdf: %.texi
+	$(call quiet-command,texi2pdf $(TEXIFLAG) -I . $<,"GEN","$@")

 qemu-options.texi: $(SRC_PATH)/qemu-options.hx $(SRC_PATH)/scripts/hxtool
 	$(call quiet-command,sh $(SRC_PATH)/scripts/hxtool -t < $< > $@,"GEN","$@")
@@ -702,42 +557,47 @@ qemu-monitor-info.texi: $(SRC_PATH)/hmp-commands-info.hx $(SRC_PATH)/scripts/hxt
 qemu-img-cmds.texi: $(SRC_PATH)/qemu-img-cmds.hx $(SRC_PATH)/scripts/hxtool
 	$(call quiet-command,sh $(SRC_PATH)/scripts/hxtool -t < $< > $@,"GEN","$@")

-docs/interop/qemu-qmp-qapi.texi docs/interop/qemu-ga-qapi.texi: $(SRC_PATH)/scripts/qapi2texi.py $(qapi-py)
-
-docs/interop/qemu-qmp-qapi.texi: $(qapi-modules)
-	$(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/qapi2texi.py $< > $@,"GEN","$@")
-
-docs/interop/qemu-ga-qapi.texi: $(SRC_PATH)/qga/qapi-schema.json
-	$(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/qapi2texi.py $< > $@,"GEN","$@")
-
 qemu.1: qemu-doc.texi qemu-options.texi qemu-monitor.texi qemu-monitor-info.texi
+	$(call quiet-command, \
+	  perl -Ww -- $(SRC_PATH)/scripts/texi2pod.pl $< qemu.pod && \
+	  $(POD2MAN) --section=1 --center=" " --release=" " qemu.pod > $@, \
+	  "GEN","$@")
 qemu.1: qemu-option-trace.texi
+
 qemu-img.1: qemu-img.texi qemu-option-trace.texi qemu-img-cmds.texi
+	$(call quiet-command, \
+	  perl -Ww -- $(SRC_PATH)/scripts/texi2pod.pl $< qemu-img.pod && \
+	  $(POD2MAN) --section=1 --center=" " --release=" " qemu-img.pod > $@, \
+	  "GEN","$@")
+
 fsdev/virtfs-proxy-helper.1: fsdev/virtfs-proxy-helper.texi
+	$(call quiet-command, \
+	  perl -Ww -- $(SRC_PATH)/scripts/texi2pod.pl $< fsdev/virtfs-proxy-helper.pod && \
+	  $(POD2MAN) --section=1 --center=" " --release=" " fsdev/virtfs-proxy-helper.pod > $@, \
+	  "GEN","$@")
+
 qemu-nbd.8: qemu-nbd.texi qemu-option-trace.texi
+	$(call quiet-command, \
+	  perl -Ww -- $(SRC_PATH)/scripts/texi2pod.pl $< qemu-nbd.pod && \
+	  $(POD2MAN) --section=8 --center=" " --release=" " qemu-nbd.pod > $@, \
+	  "GEN","$@")
+
 qemu-ga.8: qemu-ga.texi
+	$(call quiet-command, \
+	  perl -Ww -- $(SRC_PATH)/scripts/texi2pod.pl $< qemu-ga.pod && \
+	  $(POD2MAN) --section=8 --center=" " --release=" " qemu-ga.pod > $@, \
+	  "GEN","$@")

-html: qemu-doc.html docs/interop/qemu-qmp-ref.html docs/interop/qemu-ga-ref.html
-info: qemu-doc.info docs/interop/qemu-qmp-ref.info docs/interop/qemu-ga-ref.info
-pdf: qemu-doc.pdf docs/interop/qemu-qmp-ref.pdf docs/interop/qemu-ga-ref.pdf
-txt: qemu-doc.txt docs/interop/qemu-qmp-ref.txt docs/interop/qemu-ga-ref.txt
+dvi: qemu-doc.dvi
+html: qemu-doc.html
+info: qemu-doc.info
+pdf: qemu-doc.pdf

-qemu-doc.html qemu-doc.info qemu-doc.pdf qemu-doc.txt: \
+qemu-doc.dvi qemu-doc.html qemu-doc.info qemu-doc.pdf: \
 	qemu-img.texi qemu-nbd.texi qemu-options.texi qemu-option-trace.texi \
 	qemu-monitor.texi qemu-img-cmds.texi qemu-ga.texi \
 	qemu-monitor-info.texi

-docs/interop/qemu-ga-ref.dvi docs/interop/qemu-ga-ref.html \
-    docs/interop/qemu-ga-ref.info docs/interop/qemu-ga-ref.pdf \
-    docs/interop/qemu-ga-ref.txt docs/interop/qemu-ga-ref.7: \
-	docs/interop/qemu-ga-ref.texi docs/interop/qemu-ga-qapi.texi
-
-docs/interop/qemu-qmp-ref.dvi docs/interop/qemu-qmp-ref.html \
-    docs/interop/qemu-qmp-ref.info docs/interop/qemu-qmp-ref.pdf \
-    docs/interop/qemu-qmp-ref.txt docs/interop/qemu-qmp-ref.7: \
-	docs/interop/qemu-qmp-ref.texi docs/interop/qemu-qmp-qapi.texi
-
-
 ifdef CONFIG_WIN32

 INSTALLER = qemu-setup-$(VERSION)$(EXESUF)
@@ -796,15 +656,9 @@ endif # CONFIG_WIN

 # Add a dependency on the generated files, so that they are always
 # rebuilt before other object files
-ifneq ($(wildcard config-host.mak),)
 ifneq ($(filter-out $(UNCHECKED_GOALS),$(MAKECMDGOALS)),$(if $(MAKECMDGOALS),,fail))
-Makefile: $(GENERATED_FILES)
+Makefile: $(GENERATED_HEADERS)
 endif
-endif
-
-.SECONDARY: $(TRACE_HEADERS) $(TRACE_HEADERS:%=%-timestamp) \
-	$(TRACE_SOURCES) $(TRACE_SOURCES:%=%-timestamp) \
-	$(TRACE_DTRACE) $(TRACE_DTRACE:%=%-timestamp)

 # Include automatically generated dependency files
 # Dependencies in Makefile.objs files come from our recursive subdir rules
@@ -836,7 +690,7 @@ help:
 	@echo  '  docker          - Help about targets running tests inside Docker containers'
 	@echo  ''
 	@echo  'Documentation targets:'
-	@echo  '  html info pdf txt'
+	@echo  '  dvi html info pdf'
 	@echo  '                  - Build documentation in specified format'
 	@echo  ''
 ifdef CONFIG_WIN32
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -4,14 +4,16 @@ stub-obj-y = stubs/ crypto/
 util-obj-y = util/ qobject/ qapi/
 util-obj-y += qmp-introspect.o qapi-types.o qapi-visit.o qapi-event.o

-chardev-obj-y = chardev/
-
 #######################################################################
 # block-obj-y is code used by both qemu system emulation and qemu-img

+block-obj-y = async.o thread-pool.o
 block-obj-y += nbd/
 block-obj-y += block.o blockjob.o
-block-obj-y += block/ scsi/
+block-obj-y += main-loop.o iohandler.o qemu-timer.o
+block-obj-$(CONFIG_POSIX) += aio-posix.o
+block-obj-$(CONFIG_WIN32) += aio-win32.o
+block-obj-y += block/
 block-obj-y += qemu-io-cmds.o
 block-obj-$(CONFIG_REPLICATION) += replication.o

@@ -40,7 +42,7 @@ io-obj-y = io/

 ifeq ($(CONFIG_SOFTMMU),y)
 common-obj-y = blockdev.o blockdev-nbd.o block/
-common-obj-y += bootdevice.o iothread.o
+common-obj-y += iothread.o
 common-obj-y += net/
 common-obj-y += qdev-monitor.o device-hotplug.o
 common-obj-$(CONFIG_WIN32) += os-win32.o
@@ -49,9 +51,14 @@ common-obj-$(CONFIG_POSIX) += os-posix.o
 common-obj-$(CONFIG_LINUX) += fsdev/

 common-obj-y += migration/
+common-obj-y += qemu-char.o #aio.o
+common-obj-y += page_cache.o
+
+common-obj-$(CONFIG_SPICE) += spice-qemu-char.o

 common-obj-y += audio/
 common-obj-y += hw/
+common-obj-y += accel.o

 common-obj-y += replay/

@@ -67,11 +74,8 @@ common-obj-y += tpm.o
 common-obj-$(CONFIG_SLIRP) += slirp/

 common-obj-y += backends/
-common-obj-y += chardev/

 common-obj-$(CONFIG_SECCOMP) += qemu-seccomp.o
-qemu-seccomp.o-cflags := $(SECCOMP_CFLAGS)
-qemu-seccomp.o-libs := $(SECCOMP_LIBS)

 common-obj-$(CONFIG_FDT) += device_tree.o

@@ -93,6 +97,7 @@ common-obj-y += disas/
 ######################################################################
 # Resource file for Windows executables
 version-obj-$(CONFIG_WIN32) += $(BUILD_DIR)/version.o
+version-lobj-$(CONFIG_WIN32) += $(BUILD_DIR)/version.lo

 ######################################################################
 # tracing
@@ -109,73 +114,52 @@ qga-vss-dll-obj-y = qga/

 ######################################################################
 # contrib
-ivshmem-client-obj-$(CONFIG_IVSHMEM) = contrib/ivshmem-client/
-ivshmem-server-obj-$(CONFIG_IVSHMEM) = contrib/ivshmem-server/
-libvhost-user-obj-y = contrib/libvhost-user/
-vhost-user-scsi.o-cflags := $(LIBISCSI_CFLAGS)
-vhost-user-scsi.o-libs := $(LIBISCSI_LIBS)
-vhost-user-scsi-obj-y = contrib/vhost-user-scsi/
-vhost-user-scsi-obj-y += contrib/libvhost-user/libvhost-user.o
+ivshmem-client-obj-y = contrib/ivshmem-client/
+ivshmem-server-obj-y = contrib/ivshmem-server/
+

 ######################################################################
-trace-events-subdirs =
-trace-events-subdirs += util
-trace-events-subdirs += crypto
-trace-events-subdirs += io
-trace-events-subdirs += migration
-trace-events-subdirs += block
-trace-events-subdirs += chardev
-trace-events-subdirs += hw/block
-trace-events-subdirs += hw/block/dataplane
-trace-events-subdirs += hw/char
-trace-events-subdirs += hw/intc
-trace-events-subdirs += hw/net
-trace-events-subdirs += hw/virtio
-trace-events-subdirs += hw/audio
-trace-events-subdirs += hw/misc
-trace-events-subdirs += hw/usb
-trace-events-subdirs += hw/scsi
-trace-events-subdirs += hw/nvram
-trace-events-subdirs += hw/display
-trace-events-subdirs += hw/input
-trace-events-subdirs += hw/timer
-trace-events-subdirs += hw/dma
-trace-events-subdirs += hw/sparc
-trace-events-subdirs += hw/sd
-trace-events-subdirs += hw/isa
-trace-events-subdirs += hw/mem
-trace-events-subdirs += hw/i386
-trace-events-subdirs += hw/i386/xen
-trace-events-subdirs += hw/9pfs
-trace-events-subdirs += hw/ppc
-trace-events-subdirs += hw/pci
-trace-events-subdirs += hw/s390x
-trace-events-subdirs += hw/vfio
-trace-events-subdirs += hw/acpi
-trace-events-subdirs += hw/arm
-trace-events-subdirs += hw/alpha
-trace-events-subdirs += hw/xen
-trace-events-subdirs += hw/ide
-trace-events-subdirs += ui
-trace-events-subdirs += audio
-trace-events-subdirs += net
-trace-events-subdirs += target/arm
-trace-events-subdirs += target/i386
-trace-events-subdirs += target/mips
-trace-events-subdirs += target/sparc
-trace-events-subdirs += target/s390x
-trace-events-subdirs += target/ppc
-trace-events-subdirs += qom
-trace-events-subdirs += linux-user
-trace-events-subdirs += qapi
-trace-events-subdirs += accel/tcg
-trace-events-subdirs += accel/kvm
-trace-events-subdirs += nbd
-
-trace-events-files = $(SRC_PATH)/trace-events $(trace-events-subdirs:%=$(SRC_PATH)/%/trace-events)
-
-trace-obj-y = trace-root.o
-trace-obj-y += $(trace-events-subdirs:%=%/trace.o)
-trace-obj-$(CONFIG_TRACE_UST) += trace-ust-all.o
-trace-obj-$(CONFIG_TRACE_DTRACE) += trace-dtrace-root.o
-trace-obj-$(CONFIG_TRACE_DTRACE) += $(trace-events-subdirs:%=%/trace-dtrace.o)
+trace-events-y = trace-events
+trace-events-y += util/trace-events
+trace-events-y += crypto/trace-events
+trace-events-y += io/trace-events
+trace-events-y += migration/trace-events
+trace-events-y += block/trace-events
+trace-events-y += hw/block/trace-events
+trace-events-y += hw/char/trace-events
+trace-events-y += hw/intc/trace-events
+trace-events-y += hw/net/trace-events
+trace-events-y += hw/virtio/trace-events
+trace-events-y += hw/audio/trace-events
+trace-events-y += hw/misc/trace-events
+trace-events-y += hw/usb/trace-events
+trace-events-y += hw/scsi/trace-events
+trace-events-y += hw/nvram/trace-events
+trace-events-y += hw/display/trace-events
+trace-events-y += hw/input/trace-events
+trace-events-y += hw/timer/trace-events
+trace-events-y += hw/dma/trace-events
+trace-events-y += hw/sparc/trace-events
+trace-events-y += hw/sd/trace-events
+trace-events-y += hw/isa/trace-events
+trace-events-y += hw/mem/trace-events
+trace-events-y += hw/i386/trace-events
+trace-events-y += hw/9pfs/trace-events
+trace-events-y += hw/ppc/trace-events
+trace-events-y += hw/pci/trace-events
+trace-events-y += hw/s390x/trace-events
+trace-events-y += hw/vfio/trace-events
+trace-events-y += hw/acpi/trace-events
+trace-events-y += hw/arm/trace-events
+trace-events-y += hw/alpha/trace-events
+trace-events-y += ui/trace-events
+trace-events-y += audio/trace-events
+trace-events-y += net/trace-events
+trace-events-y += target-arm/trace-events
+trace-events-y += target-i386/trace-events
+trace-events-y += target-sparc/trace-events
+trace-events-y += target-s390x/trace-events
+trace-events-y += target-ppc/trace-events
+trace-events-y += qom/trace-events
+trace-events-y += linux-user/trace-events
+trace-events-y += qapi/trace-events
--- a/Makefile.target
+++ b/Makefile.target
@@ -11,7 +11,7 @@ $(call set-vpath, $(SRC_PATH):$(BUILD_DIR))
 ifdef CONFIG_LINUX
 QEMU_CFLAGS += -I../linux-headers
 endif
-QEMU_CFLAGS += -I.. -I$(SRC_PATH)/target/$(TARGET_BASE_ARCH) -DNEED_CPU_H
+QEMU_CFLAGS += -I.. -I$(SRC_PATH)/target-$(TARGET_BASE_ARCH) -DNEED_CPU_H

 QEMU_CFLAGS+=-I$(SRC_PATH)/include

@@ -36,6 +36,10 @@ endif
 PROGS=$(QEMU_PROG) $(QEMU_PROGW)
 STPFILES=

+ifdef CONFIG_LINUX_USER
+PROGS+=$(QEMU_PROG)-binfmt
+endif
+
 config-target.h: config-target.h-timestamp
 config-target.h-timestamp: config-target.mak

@@ -48,12 +52,8 @@ else
 TARGET_TYPE=system
 endif

-tracetool-y = $(SRC_PATH)/scripts/tracetool.py
-tracetool-y += $(shell find $(SRC_PATH)/scripts/tracetool -name "*.py")
-
-$(QEMU_PROG).stp-installed: $(BUILD_DIR)/trace-events-all $(tracetool-y)
+$(QEMU_PROG).stp-installed: $(BUILD_DIR)/trace-events-all
 	$(call quiet-command,$(TRACETOOL) \
-		--group=all \
 		--format=stap \
 		--backends=$(TRACE_BACKENDS) \
 		--binary=$(bindir)/$(QEMU_PROG) \
@@ -61,9 +61,8 @@ $(QEMU_PROG).stp-installed: $(BUILD_DIR)/trace-events-all $(tracetool-y)
 		--target-type=$(TARGET_TYPE) \
 		$< > $@,"GEN","$(TARGET_DIR)$(QEMU_PROG).stp-installed")

-$(QEMU_PROG).stp: $(BUILD_DIR)/trace-events-all $(tracetool-y)
+$(QEMU_PROG).stp: $(BUILD_DIR)/trace-events-all
 	$(call quiet-command,$(TRACETOOL) \
-		--group=all \
 		--format=stap \
 		--backends=$(TRACE_BACKENDS) \
 		--binary=$(realpath .)/$(QEMU_PROG) \
@@ -71,9 +70,8 @@ $(QEMU_PROG).stp: $(BUILD_DIR)/trace-events-all $(tracetool-y)
 		--target-type=$(TARGET_TYPE) \
 		$< > $@,"GEN","$(TARGET_DIR)$(QEMU_PROG).stp")

-$(QEMU_PROG)-simpletrace.stp: $(BUILD_DIR)/trace-events-all $(tracetool-y)
+$(QEMU_PROG)-simpletrace.stp: $(BUILD_DIR)/trace-events-all
 	$(call quiet-command,$(TRACETOOL) \
-		--group=all \
 		--format=simpletrace-stap \
 		--backends=$(TRACE_BACKENDS) \
 		--probe-prefix=qemu.$(TARGET_TYPE).$(TARGET_NAME) \
@@ -82,7 +80,6 @@ $(QEMU_PROG)-simpletrace.stp: $(BUILD_DIR)/trace-events-all $(tracetool-y)
 else
 stap:
 endif
-.PHONY: stap

 all: $(PROGS) stap

@@ -91,16 +88,19 @@ all: $(PROGS) stap

 #########################################################
 # cpu emulator library
-obj-y += exec.o
-obj-y += accel/
-obj-$(CONFIG_TCG) += tcg/tcg.o tcg/tcg-op.o tcg/optimize.o
-obj-$(CONFIG_TCG) += tcg/tcg-common.o
-obj-$(CONFIG_TCG_INTERPRETER) += tcg/tci.o
+obj-y = exec.o translate-all.o cpu-exec.o
+obj-y += translate-common.o
+obj-y += cpu-exec-common.o
+obj-y += tcg/tcg.o tcg/tcg-op.o tcg/optimize.o
+obj-$(CONFIG_TCG_INTERPRETER) += tci.o
+obj-y += tcg/tcg-common.o
 obj-$(CONFIG_TCG_INTERPRETER) += disas/tci.o
 obj-y += fpu/softfloat.o
-obj-y += target/$(TARGET_BASE_ARCH)/
+obj-y += target-$(TARGET_BASE_ARCH)/
 obj-y += disas.o
+obj-y += tcg-runtime.o
 obj-$(call notempty,$(TARGET_XML_FILES)) += gdbstub-xml.o
+obj-$(call lnot,$(CONFIG_KVM)) += kvm-stub.o

 obj-$(CONFIG_LIBDECNUMBER) += libdecnumber/decContext.o
 obj-$(CONFIG_LIBDECNUMBER) += libdecnumber/decNumber.o
@@ -118,7 +118,9 @@ QEMU_CFLAGS+=-I$(SRC_PATH)/linux-user/$(TARGET_ABI_DIR) \
             -I$(SRC_PATH)/linux-user

 obj-y += linux-user/
-obj-y += gdbstub.o thunk.o
+obj-y += gdbstub.o thunk.o user-exec.o
+
+obj-binfmt-y += linux-user/

 endif #CONFIG_LINUX_USER

@@ -131,7 +133,7 @@ QEMU_CFLAGS+=-I$(SRC_PATH)/bsd-user -I$(SRC_PATH)/bsd-user/$(TARGET_ABI_DIR) \
 			 -I$(SRC_PATH)/bsd-user/$(HOST_VARIANT_DIR)

 obj-y += bsd-user/
-obj-y += gdbstub.o
+obj-y += gdbstub.o user-exec.o

 endif #CONFIG_BSD_USER

@@ -139,14 +141,21 @@ endif #CONFIG_BSD_USER
 # System emulator target
 ifdef CONFIG_SOFTMMU
 obj-y += arch_init.o cpus.o monitor.o gdbstub.o balloon.o ioport.o numa.o
-obj-y += qtest.o
+obj-y += qtest.o bootdevice.o
 obj-y += hw/
-obj-y += memory.o
+obj-$(CONFIG_KVM) += kvm-all.o
+obj-y += memory.o cputlb.o
 obj-y += memory_mapping.o
 obj-y += dump.o
-obj-y += migration/ram.o
+obj-y += migration/ram.o migration/savevm.o
 LIBS := $(libs_softmmu) $(LIBS)

+# xen support
+obj-$(CONFIG_XEN) += xen-common.o
+obj-$(CONFIG_XEN_I386) += xen-hvm.o xen-mapcache.o
+obj-$(call lnot,$(CONFIG_XEN)) += xen-common-stub.o
+obj-$(call lnot,$(CONFIG_XEN_I386)) += xen-hvm-stub.o
+
 # Hardware support
 ifeq ($(TARGET_NAME), sparc64)
 obj-y += hw/sparc64/
@@ -154,27 +163,29 @@ else
 obj-y += hw/$(TARGET_BASE_ARCH)/
 endif

-GENERATED_FILES += hmp-commands.h hmp-commands-info.h
+GENERATED_HEADERS += hmp-commands.h hmp-commands-info.h

 endif # CONFIG_SOFTMMU

 # Workaround for http://gcc.gnu.org/PR55489, see configure.
 %/translate.o: QEMU_CFLAGS += $(TRANSLATE_OPT_CFLAGS)

+ifdef CONFIG_LINUX_USER
+dummy := $(call unnest-vars,,obj-y obj-binfmt-y)
+else
 dummy := $(call unnest-vars,,obj-y)
+endif
 all-obj-y := $(obj-y)

 target-obj-y :=
 block-obj-y :=
 common-obj-y :=
-chardev-obj-y :=
 include $(SRC_PATH)/Makefile.objs
 dummy := $(call unnest-vars,,target-obj-y)
 target-obj-y-save := $(target-obj-y)
 dummy := $(call unnest-vars,.., \
               block-obj-y \
               block-obj-m \
-               chardev-obj-y \
               crypto-obj-y \
               crypto-aes-obj-y \
               qom-obj-y \
@@ -185,23 +196,24 @@ target-obj-y := $(target-obj-y-save)
 all-obj-y += $(common-obj-y)
 all-obj-y += $(target-obj-y)
 all-obj-y += $(qom-obj-y)
-all-obj-$(CONFIG_SOFTMMU) += $(block-obj-y) $(chardev-obj-y)
+all-obj-$(CONFIG_SOFTMMU) += $(block-obj-y)
 all-obj-$(CONFIG_USER_ONLY) += $(crypto-aes-obj-y)
 all-obj-$(CONFIG_SOFTMMU) += $(crypto-obj-y)
 all-obj-$(CONFIG_SOFTMMU) += $(io-obj-y)

 $(QEMU_PROG_BUILD): config-devices.mak

-COMMON_LDADDS = ../libqemuutil.a
-
 # build either PROG or PROGW
-$(QEMU_PROG_BUILD): $(all-obj-y) $(COMMON_LDADDS)
+$(QEMU_PROG_BUILD): $(all-obj-y) ../libqemuutil.a ../libqemustub.a
 	$(call LINK, $(filter-out %.mak, $^))
 ifdef CONFIG_DARWIN
 	$(call quiet-command,Rez -append $(SRC_PATH)/pc-bios/qemu.rsrc -o $@,"REZ","$(TARGET_DIR)$@")
 	$(call quiet-command,SetFile -a C $@,"SETFILE","$(TARGET_DIR)$@")
 endif

+$(QEMU_PROG)-binfmt: $(obj-binfmt-y)
+	$(call LINK,$^)
+
 gdbstub-xml.c: $(TARGET_XML_FILES) $(SRC_PATH)/scripts/feature_to_c.sh
 	$(call quiet-command,rm -f $@ && $(SHELL) $(SRC_PATH)/scripts/feature_to_c.sh $@ $(TARGET_XML_FILES),"GEN","$(TARGET_DIR)$@")

@@ -229,5 +241,5 @@ ifdef CONFIG_TRACE_SYSTEMTAP
 	$(INSTALL_DATA) $(QEMU_PROG)-simpletrace.stp "$(DESTDIR)$(qemu_datadir)/../systemtap/tapset/$(QEMU_PROG)-simpletrace.stp"
 endif

-GENERATED_FILES += config-target.h
-Makefile: $(GENERATED_FILES)
+GENERATED_HEADERS += config-target.h
+Makefile: $(GENERATED_HEADERS)
--- a/1
+++ b/1
@@ -45,7 +45,6 @@ of other UNIX targets. The simple steps to build QEMU are:
 Additional information can also be found online via the QEMU website:

  http://qemu-project.org/Hosts/Linux
-  http://qemu-project.org/Hosts/Mac
  http://qemu-project.org/Hosts/W32


--- a/2
+++ b/2
@@ -1 +1 @@
-2.10.50
+2.8.0
--- a/accel/accel.c
+++ b/accel/accel.c
@@ -33,7 +33,15 @@
 #include "sysemu/qtest.h"
 #include "hw/xen/xen.h"
 #include "qom/object.h"
-#include "qemu/error-report.h"
+
+int tcg_tb_size;
+static bool tcg_allowed = true;
+
+static int tcg_init(MachineState *ms)
+{
+    tcg_exec_init(tcg_tb_size * 1024 * 1024);
+    return 0;
+}

 static const TypeInfo accel_type = {
    .name = TYPE_ACCEL,
@@ -70,20 +78,19 @@ static int accel_init_machine(AccelClass *acc, MachineState *ms)

 void configure_accelerator(MachineState *ms)
 {
-    const char *accel, *p;
+    const char *p;
    char buf[10];
    int ret;
    bool accel_initialised = false;
    bool init_failed = false;
    AccelClass *acc = NULL;

-    accel = qemu_opt_get(qemu_get_machine_opts(), "accel");
-    if (accel == NULL) {
+    p = qemu_opt_get(qemu_get_machine_opts(), "accel");
+    if (p == NULL) {
        /* Use the default "accelerator", tcg */
-        accel = "tcg";
+        p = "tcg";
    }

-    p = accel;
    while (!accel_initialised && *p != '\0') {
        if (*p == ':') {
            p++;
@@ -91,6 +98,7 @@ void configure_accelerator(MachineState *ms)
        p = get_opt_name(buf, sizeof(buf), p, ':');
        acc = accel_find(buf);
        if (!acc) {
+            fprintf(stderr, "\"%s\" accelerator not found.\n", buf);
            continue;
        }
        if (acc->available && !acc->available()) {
@@ -101,8 +109,9 @@ void configure_accelerator(MachineState *ms)
        ret = accel_init_machine(acc, ms);
        if (ret < 0) {
            init_failed = true;
-            error_report("failed to initialize %s: %s",
-                         acc->name, strerror(-ret));
+            fprintf(stderr, "failed to initialize %s: %s\n",
+                    acc->name,
+                    strerror(-ret));
        } else {
            accel_initialised = true;
        }
@@ -110,25 +119,37 @@ void configure_accelerator(MachineState *ms)

    if (!accel_initialised) {
        if (!init_failed) {
-            error_report("-machine accel=%s: No accelerator found", accel);
+            fprintf(stderr, "No accelerator found!\n");
        }
        exit(1);
    }

    if (init_failed) {
-        error_report("Back to %s accelerator", acc->name);
+        fprintf(stderr, "Back to %s accelerator.\n", acc->name);
    }
 }

-void accel_register_compat_props(AccelState *accel)
+
+static void tcg_accel_class_init(ObjectClass *oc, void *data)
 {
-    AccelClass *class = ACCEL_GET_CLASS(accel);
-    register_compat_props_array(class->global_props);
+    AccelClass *ac = ACCEL_CLASS(oc);
+    ac->name = "tcg";
+    ac->init_machine = tcg_init;
+    ac->allowed = &tcg_allowed;
 }

+#define TYPE_TCG_ACCEL ACCEL_CLASS_NAME("tcg")
+
+static const TypeInfo tcg_accel_type = {
+    .name = TYPE_TCG_ACCEL,
+    .parent = TYPE_ACCEL,
+    .class_init = tcg_accel_class_init,
+};
+
 static void register_accel_types(void)
 {
    type_register_static(&accel_type);
+    type_register_static(&tcg_accel_type);
 }

 type_init(register_accel_types);
--- a/accel/Makefile.objs
+++ b/accel/Makefile.objs
@@ -1,4 +0,0 @@
-obj-$(CONFIG_SOFTMMU) += accel.o
-obj-y += kvm/
-obj-$(CONFIG_TCG) += tcg/
-obj-y += stubs/
--- a/accel/kvm/Makefile.objs
+++ b/accel/kvm/Makefile.objs
@@ -1 +0,0 @@
-obj-$(CONFIG_KVM) += kvm-all.o
--- a/accel/kvm/trace-events
+++ b/accel/kvm/trace-events
@@ -1,15 +0,0 @@
-# Trace events for debugging and performance instrumentation
-
-# kvm-all.c
-kvm_ioctl(int type, void *arg) "type 0x%x, arg %p"
-kvm_vm_ioctl(int type, void *arg) "type 0x%x, arg %p"
-kvm_vcpu_ioctl(int cpu_index, int type, void *arg) "cpu_index %d, type 0x%x, arg %p"
-kvm_run_exit(int cpu_index, uint32_t reason) "cpu_index %d, reason %d"
-kvm_device_ioctl(int fd, int type, void *arg) "dev fd %d, type 0x%x, arg %p"
-kvm_failed_reg_get(uint64_t id, const char *msg) "Warning: Unable to retrieve ONEREG %" PRIu64 " from KVM: %s"
-kvm_failed_reg_set(uint64_t id, const char *msg) "Warning: Unable to set ONEREG %" PRIu64 " to KVM: %s"
-kvm_irqchip_commit_routes(void) ""
-kvm_irqchip_add_msi_route(char *name, int vector, int virq) "dev %s vector %d virq %d"
-kvm_irqchip_update_msi_route(int virq) "Updating MSI route virq=%d"
-kvm_irqchip_release_virq(int virq) "virq %d"
-
--- a/accel/stubs/Makefile.objs
+++ b/accel/stubs/Makefile.objs
@@ -1,3 +0,0 @@
-obj-$(call lnot,$(CONFIG_HAX)) += hax-stub.o
-obj-$(call lnot,$(CONFIG_KVM)) += kvm-stub.o
-obj-$(call lnot,$(CONFIG_TCG)) += tcg-stub.o
--- a/accel/stubs/hax-stub.c
+++ b/accel/stubs/hax-stub.c
@@ -1,34 +0,0 @@
-/*
- * QEMU HAXM support
- *
- * Copyright (c) 2015, Intel Corporation
- *
- * Copyright 2016 Google, Inc.
- *
- * This software is licensed under the terms of the GNU General Public
- * License version 2, as published by the Free Software Foundation, and
- * may be copied, distributed, and modified under those terms.
- *
- * See the COPYING file in the top-level directory.
- *
- */
-
-#include "qemu/osdep.h"
-#include "qemu-common.h"
-#include "cpu.h"
-#include "sysemu/hax.h"
-
-int hax_sync_vcpus(void)
-{
-    return 0;
-}
-
-int hax_init_vcpu(CPUState *cpu)
-{
-    return -ENOSYS;
-}
-
-int hax_smp_cpu_exec(CPUState *cpu)
-{
-    return -ENOSYS;
-}
--- a/accel/stubs/tcg-stub.c
+++ b/accel/stubs/tcg-stub.c
@@ -1,22 +0,0 @@
-/*
- * QEMU TCG accelerator stub
- *
- * Copyright Red Hat, Inc. 2013
- *
- * Author: Paolo Bonzini     <pbonzini@redhat.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- *
- */
-
-#include "qemu/osdep.h"
-#include "qemu-common.h"
-#include "cpu.h"
-#include "tcg/tcg.h"
-#include "exec/cpu-common.h"
-#include "exec/exec-all.h"
-
-void tb_flush(CPUState *cpu)
-{
-}
--- a/accel/tcg/Makefile.objs
+++ b/accel/tcg/Makefile.objs
@@ -1,8 +0,0 @@
-obj-$(CONFIG_SOFTMMU) += tcg-all.o
-obj-$(CONFIG_SOFTMMU) += cputlb.o
-obj-y += tcg-runtime.o
-obj-y += cpu-exec.o cpu-exec-common.o translate-all.o
-obj-y += translator.o
-
-obj-$(CONFIG_USER_ONLY) += user-exec.o
-obj-$(call lnot,$(CONFIG_SOFTMMU)) += user-exec-stub.o
--- a/accel/tcg/tcg-all.c
+++ b/accel/tcg/tcg-all.c
@@ -1,92 +0,0 @@
-/*
- * QEMU System Emulator, accelerator interfaces
- *
- * Copyright (c) 2003-2008 Fabrice Bellard
- * Copyright (c) 2014 Red Hat Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-#include "qemu/osdep.h"
-#include "sysemu/accel.h"
-#include "sysemu/sysemu.h"
-#include "qom/object.h"
-#include "qemu-common.h"
-#include "qom/cpu.h"
-#include "sysemu/cpus.h"
-#include "qemu/main-loop.h"
-
-unsigned long tcg_tb_size;
-
-#ifndef CONFIG_USER_ONLY
-/* mask must never be zero, except for A20 change call */
-static void tcg_handle_interrupt(CPUState *cpu, int mask)
-{
-    int old_mask;
-    g_assert(qemu_mutex_iothread_locked());
-
-    old_mask = cpu->interrupt_request;
-    cpu->interrupt_request |= mask;
-
-    /*
-     * If called from iothread context, wake the target cpu in
-     * case its halted.
-     */
-    if (!qemu_cpu_is_self(cpu)) {
-        qemu_cpu_kick(cpu);
-    } else {
-        cpu->icount_decr.u16.high = -1;
-        if (use_icount &&
-            !cpu->can_do_io
-            && (mask & ~old_mask) != 0) {
-            cpu_abort(cpu, "Raised interrupt while not in I/O function");
-        }
-    }
-}
-#endif
-
-static int tcg_init(MachineState *ms)
-{
-    tcg_exec_init(tcg_tb_size * 1024 * 1024);
-    cpu_interrupt_handler = tcg_handle_interrupt;
-    return 0;
-}
-
-static void tcg_accel_class_init(ObjectClass *oc, void *data)
-{
-    AccelClass *ac = ACCEL_CLASS(oc);
-    ac->name = "tcg";
-    ac->init_machine = tcg_init;
-    ac->allowed = &tcg_allowed;
-}
-
-#define TYPE_TCG_ACCEL ACCEL_CLASS_NAME("tcg")
-
-static const TypeInfo tcg_accel_type = {
-    .name = TYPE_TCG_ACCEL,
-    .parent = TYPE_ACCEL,
-    .class_init = tcg_accel_class_init,
-};
-
-static void register_accel_types(void)
-{
-    type_register_static(&tcg_accel_type);
-}
-
-type_init(register_accel_types);
--- a/accel/tcg/trace-events
+++ b/accel/tcg/trace-events
@@ -1,10 +0,0 @@
-# Trace events for debugging and performance instrumentation
-
-# TCG related tracing (mostly disabled by default)
-# cpu-exec.c
-disable exec_tb(void *tb, uintptr_t pc) "tb:%p pc=0x%"PRIxPTR
-disable exec_tb_nocache(void *tb, uintptr_t pc) "tb:%p pc=0x%"PRIxPTR
-disable exec_tb_exit(void *last_tb, unsigned int flags) "tb:%p flags=0x%x"
-
-# translate-all.c
-translate_block(void *tb, uintptr_t pc, uint8_t *tb_code) "tb:%p, pc:0x%"PRIxPTR", tb_code:%p"
--- a/accel/tcg/translator.c
+++ b/accel/tcg/translator.c
@@ -1,138 +0,0 @@
-/*
- * Generic intermediate code generation.
- *
- * Copyright (C) 2016-2017 Lluís Vilanova <vilanova@ac.upc.edu>
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- */
-
-#include "qemu/osdep.h"
-#include "qemu-common.h"
-#include "qemu/error-report.h"
-#include "cpu.h"
-#include "tcg/tcg.h"
-#include "tcg/tcg-op.h"
-#include "exec/exec-all.h"
-#include "exec/gen-icount.h"
-#include "exec/log.h"
-#include "exec/translator.h"
-
-/* Pairs with tcg_clear_temp_count.
-   To be called by #TranslatorOps.{translate_insn,tb_stop} if
-   (1) the target is sufficiently clean to support reporting,
-   (2) as and when all temporaries are known to be consumed.
-   For most targets, (2) is at the end of translate_insn.  */
-void translator_loop_temp_check(DisasContextBase *db)
-{
-    if (tcg_check_temp_count()) {
-        qemu_log("warning: TCG temporary leaks before "
-                 TARGET_FMT_lx "\n", db->pc_next);
-    }
-}
-
-void translator_loop(const TranslatorOps *ops, DisasContextBase *db,
-                     CPUState *cpu, TranslationBlock *tb)
-{
-    int max_insns;
-
-    /* Initialize DisasContext */
-    db->tb = tb;
-    db->pc_first = tb->pc;
-    db->pc_next = db->pc_first;
-    db->is_jmp = DISAS_NEXT;
-    db->num_insns = 0;
-    db->singlestep_enabled = cpu->singlestep_enabled;
-
-    /* Instruction counting */
-    max_insns = db->tb->cflags & CF_COUNT_MASK;
-    if (max_insns == 0) {
-        max_insns = CF_COUNT_MASK;
-    }
-    if (max_insns > TCG_MAX_INSNS) {
-        max_insns = TCG_MAX_INSNS;
-    }
-    if (db->singlestep_enabled || singlestep) {
-        max_insns = 1;
-    }
-
-    max_insns = ops->init_disas_context(db, cpu, max_insns);
-    tcg_debug_assert(db->is_jmp == DISAS_NEXT);  /* no early exit */
-
-    /* Reset the temp count so that we can identify leaks */
-    tcg_clear_temp_count();
-
-    /* Start translating.  */
-    gen_tb_start(db->tb);
-    ops->tb_start(db, cpu);
-    tcg_debug_assert(db->is_jmp == DISAS_NEXT);  /* no early exit */
-
-    while (true) {
-        db->num_insns++;
-        ops->insn_start(db, cpu);
-        tcg_debug_assert(db->is_jmp == DISAS_NEXT);  /* no early exit */
-
-        /* Pass breakpoint hits to target for further processing */
-        if (unlikely(!QTAILQ_EMPTY(&cpu->breakpoints))) {
-            CPUBreakpoint *bp;
-            QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
-                if (bp->pc == db->pc_next) {
-                    if (ops->breakpoint_check(db, cpu, bp)) {
-                        break;
-                    }
-                }
-            }
-            /* The breakpoint_check hook may use DISAS_TOO_MANY to indicate
-               that only one more instruction is to be executed.  Otherwise
-               it should use DISAS_NORETURN when generating an exception,
-               but may use a DISAS_TARGET_* value for Something Else.  */
-            if (db->is_jmp > DISAS_TOO_MANY) {
-                break;
-            }
-        }
-
-        /* Disassemble one instruction.  The translate_insn hook should
-           update db->pc_next and db->is_jmp to indicate what should be
-           done next -- either exiting this loop or locate the start of
-           the next instruction.  */
-        if (db->num_insns == max_insns && (db->tb->cflags & CF_LAST_IO)) {
-            /* Accept I/O on the last instruction.  */
-            gen_io_start();
-            ops->translate_insn(db, cpu);
-            gen_io_end();
-        } else {
-            ops->translate_insn(db, cpu);
-        }
-
-        /* Stop translation if translate_insn so indicated.  */
-        if (db->is_jmp != DISAS_NEXT) {
-            break;
-        }
-
-        /* Stop translation if the output buffer is full,
-           or we have executed all of the allowed instructions.  */
-        if (tcg_op_buf_full() || db->num_insns >= max_insns) {
-            db->is_jmp = DISAS_TOO_MANY;
-            break;
-        }
-    }
-
-    /* Emit code to exit the TB, as indicated by db->is_jmp.  */
-    ops->tb_stop(db, cpu);
-    gen_tb_end(db->tb, db->num_insns);
-
-    /* The disas_log hook may use these values rather than recompute.  */
-    db->tb->size = db->pc_next - db->pc_first;
-    db->tb->icount = db->num_insns;
-
-#ifdef DEBUG_DISAS
-    if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)
-        && qemu_log_in_addr_range(db->pc_first)) {
-        qemu_log_lock();
-        qemu_log("----------------\n");
-        ops->disas_log(db, cpu);
-        qemu_log("\n");
-        qemu_log_unlock();
-    }
-#endif
-}
--- a/accel/tcg/user-exec-stub.c
+++ b/accel/tcg/user-exec-stub.c
@@ -1,34 +0,0 @@
-#include "qemu/osdep.h"
-#include "qemu-common.h"
-#include "qom/cpu.h"
-#include "sysemu/replay.h"
-
-void cpu_resume(CPUState *cpu)
-{
-}
-
-void qemu_init_vcpu(CPUState *cpu)
-{
-}
-
-/* User mode emulation does not support record/replay yet.  */
-
-bool replay_exception(void)
-{
-    return true;
-}
-
-bool replay_has_exception(void)
-{
-    return false;
-}
-
-bool replay_interrupt(void)
-{
-    return true;
-}
-
-bool replay_has_interrupt(void)
-{
-    return false;
-}
--- a/util/aio-posix.c
+++ b/util/aio-posix.c
@@ -16,10 +16,8 @@
 #include "qemu/osdep.h"
 #include "qemu-common.h"
 #include "block/block.h"
-#include "qemu/rcu_queue.h"
+#include "qemu/queue.h"
 #include "qemu/sockets.h"
-#include "qemu/cutils.h"
-#include "trace.h"
 #ifdef CONFIG_EPOLL_CREATE1
 #include <sys/epoll.h>
 #endif
@@ -29,9 +27,6 @@ struct AioHandler
    GPollFD pfd;
    IOHandler *io_read;
    IOHandler *io_write;
-    AioPollFn *io_poll;
-    IOHandler *io_poll_begin;
-    IOHandler *io_poll_end;
    int deleted;
    void *opaque;
    bool is_external;
@@ -66,7 +61,7 @@ static bool aio_epoll_try_enable(AioContext *ctx)
    AioHandler *node;
    struct epoll_event event;

-    QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
+    QLIST_FOREACH(node, &ctx->aio_handlers, node) {
        int r;
        if (node->deleted || !node->pfd.events) {
            continue;
@@ -205,61 +200,47 @@ void aio_set_fd_handler(AioContext *ctx,
                        bool is_external,
                        IOHandler *io_read,
                        IOHandler *io_write,
-                        AioPollFn *io_poll,
                        void *opaque)
 {
    AioHandler *node;
    bool is_new = false;
    bool deleted = false;

-    qemu_lockcnt_lock(&ctx->list_lock);
-
    node = find_aio_handler(ctx, fd);

    /* Are we deleting the fd handler? */
-    if (!io_read && !io_write && !io_poll) {
+    if (!io_read && !io_write) {
        if (node == NULL) {
-            qemu_lockcnt_unlock(&ctx->list_lock);
            return;
        }

        g_source_remove_poll(&ctx->source, &node->pfd);

        /* If the lock is held, just mark the node as deleted */
-        if (qemu_lockcnt_count(&ctx->list_lock)) {
+        if (ctx->walking_handlers) {
            node->deleted = 1;
            node->pfd.revents = 0;
        } else {
            /* Otherwise, delete it for real.  We can't just mark it as
-             * deleted because deleted nodes are only cleaned up while
-             * no one is walking the handlers list.
+             * deleted because deleted nodes are only cleaned up after
+             * releasing the walking_handlers lock.
             */
            QLIST_REMOVE(node, node);
            deleted = true;
        }
-
-        if (!node->io_poll) {
-            ctx->poll_disable_cnt--;
-        }
    } else {
        if (node == NULL) {
            /* Alloc and insert if it's not already there */
            node = g_new0(AioHandler, 1);
            node->pfd.fd = fd;
-            QLIST_INSERT_HEAD_RCU(&ctx->aio_handlers, node, node);
+            QLIST_INSERT_HEAD(&ctx->aio_handlers, node, node);

            g_source_add_poll(&ctx->source, &node->pfd);
            is_new = true;
-
-            ctx->poll_disable_cnt += !io_poll;
-        } else {
-            ctx->poll_disable_cnt += !io_poll - !node->io_poll;
        }
-
        /* Update handler with latest information */
        node->io_read = io_read;
        node->io_write = io_write;
-        node->io_poll = io_poll;
        node->opaque = opaque;
        node->is_external = is_external;

@@ -268,127 +249,72 @@ void aio_set_fd_handler(AioContext *ctx,
    }

    aio_epoll_update(ctx, node, is_new);
-    qemu_lockcnt_unlock(&ctx->list_lock);
    aio_notify(ctx);
-
    if (deleted) {
        g_free(node);
    }
 }

-void aio_set_fd_poll(AioContext *ctx, int fd,
-                     IOHandler *io_poll_begin,
-                     IOHandler *io_poll_end)
-{
-    AioHandler *node = find_aio_handler(ctx, fd);
-
-    if (!node) {
-        return;
-    }
-
-    node->io_poll_begin = io_poll_begin;
-    node->io_poll_end = io_poll_end;
-}
-
 void aio_set_event_notifier(AioContext *ctx,
                            EventNotifier *notifier,
                            bool is_external,
-                            EventNotifierHandler *io_read,
-                            AioPollFn *io_poll)
+                            EventNotifierHandler *io_read)
 {
-    aio_set_fd_handler(ctx, event_notifier_get_fd(notifier), is_external,
-                       (IOHandler *)io_read, NULL, io_poll, notifier);
+    aio_set_fd_handler(ctx, event_notifier_get_fd(notifier),
+                       is_external, (IOHandler *)io_read, NULL, notifier);
 }

-void aio_set_event_notifier_poll(AioContext *ctx,
-                                 EventNotifier *notifier,
-                                 EventNotifierHandler *io_poll_begin,
-                                 EventNotifierHandler *io_poll_end)
-{
-    aio_set_fd_poll(ctx, event_notifier_get_fd(notifier),
-                    (IOHandler *)io_poll_begin,
-                    (IOHandler *)io_poll_end);
-}
-
-static void poll_set_started(AioContext *ctx, bool started)
-{
-    AioHandler *node;
-
-    if (started == ctx->poll_started) {
-        return;
-    }
-
-    ctx->poll_started = started;
-
-    qemu_lockcnt_inc(&ctx->list_lock);
-    QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
-        IOHandler *fn;
-
-        if (node->deleted) {
-            continue;
-        }
-
-        if (started) {
-            fn = node->io_poll_begin;
-        } else {
-            fn = node->io_poll_end;
-        }
-
-        if (fn) {
-            fn(node->opaque);
-        }
-    }
-    qemu_lockcnt_dec(&ctx->list_lock);
-}
-
-
 bool aio_prepare(AioContext *ctx)
 {
-    /* Poll mode cannot be used with glib's event loop, disable it. */
-    poll_set_started(ctx, false);
-
    return false;
 }

 bool aio_pending(AioContext *ctx)
 {
    AioHandler *node;
-    bool result = false;

-    /*
-     * We have to walk very carefully in case aio_set_fd_handler is
-     * called while we're walking.
-     */
-    qemu_lockcnt_inc(&ctx->list_lock);
-
-    QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
+    QLIST_FOREACH(node, &ctx->aio_handlers, node) {
        int revents;

        revents = node->pfd.revents & node->pfd.events;
        if (revents & (G_IO_IN | G_IO_HUP | G_IO_ERR) && node->io_read &&
            aio_node_check(ctx, node->is_external)) {
-            result = true;
-            break;
+            return true;
        }
        if (revents & (G_IO_OUT | G_IO_ERR) && node->io_write &&
            aio_node_check(ctx, node->is_external)) {
-            result = true;
-            break;
+            return true;
        }
    }
-    qemu_lockcnt_dec(&ctx->list_lock);

-    return result;
+    return false;
 }

-static bool aio_dispatch_handlers(AioContext *ctx)
+bool aio_dispatch(AioContext *ctx)
 {
-    AioHandler *node, *tmp;
+    AioHandler *node;
    bool progress = false;

-    QLIST_FOREACH_SAFE_RCU(node, &ctx->aio_handlers, node, tmp) {
+    /*
+     * If there are callbacks left that have been queued, we need to call them.
+     * Do not call select in this case, because it is possible that the caller
+     * does not need a complete flush (as is the case for aio_poll loops).
+     */
+    if (aio_bh_poll(ctx)) {
+        progress = true;
+    }
+
+    /*
+     * We have to walk very carefully in case aio_set_fd_handler is
+     * called while we're walking.
+     */
+    node = QLIST_FIRST(&ctx->aio_handlers);
+    while (node) {
+        AioHandler *tmp;
        int revents;

+        ctx->walking_handlers++;
+
        revents = node->pfd.revents & node->pfd.events;
        node->pfd.revents = 0;

@@ -411,28 +337,23 @@ static bool aio_dispatch_handlers(AioContext *ctx)
            progress = true;
        }

-        if (node->deleted) {
-            if (qemu_lockcnt_dec_if_lock(&ctx->list_lock)) {
-                QLIST_REMOVE(node, node);
-                g_free(node);
-                qemu_lockcnt_inc_and_unlock(&ctx->list_lock);
-            }
+        tmp = node;
+        node = QLIST_NEXT(node, node);
+
+        ctx->walking_handlers--;
+
+        if (!ctx->walking_handlers && tmp->deleted) {
+            QLIST_REMOVE(tmp, node);
+            g_free(tmp);
        }
    }

+    /* Run our timers */
+    progress |= timerlistgroup_run_timers(&ctx->tlg);
+
    return progress;
 }

-void aio_dispatch(AioContext *ctx)
-{
-    qemu_lockcnt_inc(&ctx->list_lock);
-    aio_bh_poll(ctx);
-    aio_dispatch_handlers(ctx);
-    qemu_lockcnt_dec(&ctx->list_lock);
-
-    timerlistgroup_run_timers(&ctx->tlg);
-}
-
 /* These thread-local variables are used only in a small part of aio_poll
 * around the call to the poll() system call.  In particular they are not
 * used while aio_poll is performing callbacks, which makes it much easier
@@ -479,101 +400,15 @@ static void add_pollfd(AioHandler *node)
    npfd++;
 }

-static bool run_poll_handlers_once(AioContext *ctx)
-{
-    bool progress = false;
-    AioHandler *node;
-
-    QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
-        if (!node->deleted && node->io_poll &&
-            aio_node_check(ctx, node->is_external) &&
-            node->io_poll(node->opaque)) {
-            progress = true;
-        }
-
-        /* Caller handles freeing deleted nodes.  Don't do it here. */
-    }
-
-    return progress;
-}
-
-/* run_poll_handlers:
- * @ctx: the AioContext
- * @max_ns: maximum time to poll for, in nanoseconds
- *
- * Polls for a given time.
- *
- * Note that ctx->notify_me must be non-zero so this function can detect
- * aio_notify().
- *
- * Note that the caller must have incremented ctx->list_lock.
- *
- * Returns: true if progress was made, false otherwise
- */
-static bool run_poll_handlers(AioContext *ctx, int64_t max_ns)
-{
-    bool progress;
-    int64_t end_time;
-
-    assert(ctx->notify_me);
-    assert(qemu_lockcnt_count(&ctx->list_lock) > 0);
-    assert(ctx->poll_disable_cnt == 0);
-
-    trace_run_poll_handlers_begin(ctx, max_ns);
-
-    end_time = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + max_ns;
-
-    do {
-        progress = run_poll_handlers_once(ctx);
-    } while (!progress && qemu_clock_get_ns(QEMU_CLOCK_REALTIME) < end_time);
-
-    trace_run_poll_handlers_end(ctx, progress);
-
-    return progress;
-}
-
-/* try_poll_mode:
- * @ctx: the AioContext
- * @blocking: busy polling is only attempted when blocking is true
- *
- * ctx->notify_me must be non-zero so this function can detect aio_notify().
- *
- * Note that the caller must have incremented ctx->list_lock.
- *
- * Returns: true if progress was made, false otherwise
- */
-static bool try_poll_mode(AioContext *ctx, bool blocking)
-{
-    if (blocking && ctx->poll_max_ns && ctx->poll_disable_cnt == 0) {
-        /* See qemu_soonest_timeout() uint64_t hack */
-        int64_t max_ns = MIN((uint64_t)aio_compute_timeout(ctx),
-                             (uint64_t)ctx->poll_ns);
-
-        if (max_ns) {
-            poll_set_started(ctx, true);
-
-            if (run_poll_handlers(ctx, max_ns)) {
-                return true;
-            }
-        }
-    }
-
-    poll_set_started(ctx, false);
-
-    /* Even if we don't run busy polling, try polling once in case it can make
-     * progress and the caller will be able to avoid ppoll(2)/epoll_wait(2).
-     */
-    return run_poll_handlers_once(ctx);
-}
-
 bool aio_poll(AioContext *ctx, bool blocking)
 {
    AioHandler *node;
-    int i;
-    int ret = 0;
+    int i, ret;
    bool progress;
    int64_t timeout;
-    int64_t start = 0;
+
+    aio_context_acquire(ctx);
+    progress = false;

    /* aio_notify can avoid the expensive event_notifier_set if
     * everything (file descriptors, bottom halves, timers) will
@@ -586,86 +421,43 @@ bool aio_poll(AioContext *ctx, bool blocking)
        atomic_add(&ctx->notify_me, 2);
    }

-    qemu_lockcnt_inc(&ctx->list_lock);
+    ctx->walking_handlers++;

-    if (ctx->poll_max_ns) {
-        start = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
-    }
+    assert(npfd == 0);

-    progress = try_poll_mode(ctx, blocking);
-    if (!progress) {
-        assert(npfd == 0);
+    /* fill pollfds */

-        /* fill pollfds */
-
-        if (!aio_epoll_enabled(ctx)) {
-            QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
-                if (!node->deleted && node->pfd.events
-                    && aio_node_check(ctx, node->is_external)) {
-                    add_pollfd(node);
-                }
+    if (!aio_epoll_enabled(ctx)) {
+        QLIST_FOREACH(node, &ctx->aio_handlers, node) {
+            if (!node->deleted && node->pfd.events
+                && aio_node_check(ctx, node->is_external)) {
+                add_pollfd(node);
            }
        }
-
-        timeout = blocking ? aio_compute_timeout(ctx) : 0;
-
-        /* wait until next event */
-        if (aio_epoll_check_poll(ctx, pollfds, npfd, timeout)) {
-            AioHandler epoll_handler;
-
-            epoll_handler.pfd.fd = ctx->epollfd;
-            epoll_handler.pfd.events = G_IO_IN | G_IO_OUT | G_IO_HUP | G_IO_ERR;
-            npfd = 0;
-            add_pollfd(&epoll_handler);
-            ret = aio_epoll(ctx, pollfds, npfd, timeout);
-        } else  {
-            ret = qemu_poll_ns(pollfds, npfd, timeout);
-        }
    }

+    timeout = blocking ? aio_compute_timeout(ctx) : 0;
+
+    /* wait until next event */
+    if (timeout) {
+        aio_context_release(ctx);
+    }
+    if (aio_epoll_check_poll(ctx, pollfds, npfd, timeout)) {
+        AioHandler epoll_handler;
+
+        epoll_handler.pfd.fd = ctx->epollfd;
+        epoll_handler.pfd.events = G_IO_IN | G_IO_OUT | G_IO_HUP | G_IO_ERR;
+        npfd = 0;
+        add_pollfd(&epoll_handler);
+        ret = aio_epoll(ctx, pollfds, npfd, timeout);
+    } else  {
+        ret = qemu_poll_ns(pollfds, npfd, timeout);
+    }
    if (blocking) {
        atomic_sub(&ctx->notify_me, 2);
    }
-
-    /* Adjust polling time */
-    if (ctx->poll_max_ns) {
-        int64_t block_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - start;
-
-        if (block_ns <= ctx->poll_ns) {
-            /* This is the sweet spot, no adjustment needed */
-        } else if (block_ns > ctx->poll_max_ns) {
-            /* We'd have to poll for too long, poll less */
-            int64_t old = ctx->poll_ns;
-
-            if (ctx->poll_shrink) {
-                ctx->poll_ns /= ctx->poll_shrink;
-            } else {
-                ctx->poll_ns = 0;
-            }
-
-            trace_poll_shrink(ctx, old, ctx->poll_ns);
-        } else if (ctx->poll_ns < ctx->poll_max_ns &&
-                   block_ns < ctx->poll_max_ns) {
-            /* There is room to grow, poll longer */
-            int64_t old = ctx->poll_ns;
-            int64_t grow = ctx->poll_grow;
-
-            if (grow == 0) {
-                grow = 2;
-            }
-
-            if (ctx->poll_ns) {
-                ctx->poll_ns *= grow;
-            } else {
-                ctx->poll_ns = 4000; /* start polling at 4 microseconds */
-            }
-
-            if (ctx->poll_ns > ctx->poll_max_ns) {
-                ctx->poll_ns = ctx->poll_max_ns;
-            }
-
-            trace_poll_grow(ctx, old, ctx->poll_ns);
-        }
+    if (timeout) {
+        aio_context_acquire(ctx);
    }

    aio_notify_accept(ctx);
@@ -678,29 +470,20 @@ bool aio_poll(AioContext *ctx, bool blocking)
    }

    npfd = 0;
+    ctx->walking_handlers--;

-    progress |= aio_bh_poll(ctx);
-
-    if (ret > 0) {
-        progress |= aio_dispatch_handlers(ctx);
+    /* Run dispatch even if there were no readable fds to run timers */
+    if (aio_dispatch(ctx)) {
+        progress = true;
    }

-    qemu_lockcnt_dec(&ctx->list_lock);
-
-    progress |= timerlistgroup_run_timers(&ctx->tlg);
+    aio_context_release(ctx);

    return progress;
 }

 void aio_context_setup(AioContext *ctx)
 {
-    /* TODO remove this in final patch submission */
-    if (getenv("QEMU_AIO_POLL_MAX_NS")) {
-        fprintf(stderr, "The QEMU_AIO_POLL_MAX_NS environment variable has "
-                "been replaced with -object iothread,poll-max-ns=NUM\n");
-        exit(1);
-    }
-
 #ifdef CONFIG_EPOLL_CREATE1
    assert(!ctx->epollfd);
    ctx->epollfd = epoll_create1(EPOLL_CLOEXEC);
@@ -712,17 +495,3 @@ void aio_context_setup(AioContext *ctx)
    }
 #endif
 }
-
-void aio_context_set_poll_params(AioContext *ctx, int64_t max_ns,
-                                 int64_t grow, int64_t shrink, Error **errp)
-{
-    /* No thread synchronization here, it doesn't matter if an incorrect value
-     * is used once.
-     */
-    ctx->poll_max_ns = max_ns;
-    ctx->poll_ns = 0;
-    ctx->poll_grow = grow;
-    ctx->poll_shrink = shrink;
-
-    aio_notify(ctx);
-}
--- a/util/aio-win32.c
+++ b/util/aio-win32.c
@@ -20,8 +20,6 @@
 #include "block/block.h"
 #include "qemu/queue.h"
 #include "qemu/sockets.h"
-#include "qapi/error.h"
-#include "qemu/rcu_queue.h"

 struct AioHandler {
    EventNotifier *e;
@@ -40,13 +38,11 @@ void aio_set_fd_handler(AioContext *ctx,
                        bool is_external,
                        IOHandler *io_read,
                        IOHandler *io_write,
-                        AioPollFn *io_poll,
                        void *opaque)
 {
    /* fd is a SOCKET in our case */
    AioHandler *node;

-    qemu_lockcnt_lock(&ctx->list_lock);
    QLIST_FOREACH(node, &ctx->aio_handlers, node) {
        if (node->pfd.fd == fd && !node->deleted) {
            break;
@@ -56,14 +52,14 @@ void aio_set_fd_handler(AioContext *ctx,
    /* Are we deleting the fd handler? */
    if (!io_read && !io_write) {
        if (node) {
-            /* If aio_poll is in progress, just mark the node as deleted */
-            if (qemu_lockcnt_count(&ctx->list_lock)) {
+            /* If the lock is held, just mark the node as deleted */
+            if (ctx->walking_handlers) {
                node->deleted = 1;
                node->pfd.revents = 0;
            } else {
                /* Otherwise, delete it for real.  We can't just mark it as
                 * deleted because deleted nodes are only cleaned up after
-                 * releasing the list_lock.
+                 * releasing the walking_handlers lock.
                 */
                QLIST_REMOVE(node, node);
                g_free(node);
@@ -71,13 +67,12 @@ void aio_set_fd_handler(AioContext *ctx,
        }
    } else {
        HANDLE event;
-        long bitmask = 0;

        if (node == NULL) {
            /* Alloc and insert if it's not already there */
            node = g_new0(AioHandler, 1);
            node->pfd.fd = fd;
-            QLIST_INSERT_HEAD_RCU(&ctx->aio_handlers, node, node);
+            QLIST_INSERT_HEAD(&ctx->aio_handlers, node, node);
        }

        node->pfd.events = 0;
@@ -96,38 +91,22 @@ void aio_set_fd_handler(AioContext *ctx,
        node->io_write = io_write;
        node->is_external = is_external;

-        if (io_read) {
-            bitmask |= FD_READ | FD_ACCEPT | FD_CLOSE;
-        }
-
-        if (io_write) {
-            bitmask |= FD_WRITE | FD_CONNECT;
-        }
-
        event = event_notifier_get_handle(&ctx->notifier);
-        WSAEventSelect(node->pfd.fd, event, bitmask);
+        WSAEventSelect(node->pfd.fd, event,
+                       FD_READ | FD_ACCEPT | FD_CLOSE |
+                       FD_CONNECT | FD_WRITE | FD_OOB);
    }

-    qemu_lockcnt_unlock(&ctx->list_lock);
    aio_notify(ctx);
 }

-void aio_set_fd_poll(AioContext *ctx, int fd,
-                     IOHandler *io_poll_begin,
-                     IOHandler *io_poll_end)
-{
-    /* Not implemented */
-}
-
 void aio_set_event_notifier(AioContext *ctx,
                            EventNotifier *e,
                            bool is_external,
-                            EventNotifierHandler *io_notify,
-                            AioPollFn *io_poll)
+                            EventNotifierHandler *io_notify)
 {
    AioHandler *node;

-    qemu_lockcnt_lock(&ctx->list_lock);
    QLIST_FOREACH(node, &ctx->aio_handlers, node) {
        if (node->e == e && !node->deleted) {
            break;
@@ -139,14 +118,14 @@ void aio_set_event_notifier(AioContext *ctx,
        if (node) {
            g_source_remove_poll(&ctx->source, &node->pfd);

-            /* aio_poll is in progress, just mark the node as deleted */
-            if (qemu_lockcnt_count(&ctx->list_lock)) {
+            /* If the lock is held, just mark the node as deleted */
+            if (ctx->walking_handlers) {
                node->deleted = 1;
                node->pfd.revents = 0;
            } else {
                /* Otherwise, delete it for real.  We can't just mark it as
                 * deleted because deleted nodes are only cleaned up after
-                 * releasing the list_lock.
+                 * releasing the walking_handlers lock.
                 */
                QLIST_REMOVE(node, node);
                g_free(node);
@@ -160,7 +139,7 @@ void aio_set_event_notifier(AioContext *ctx,
            node->pfd.fd = (uintptr_t)event_notifier_get_handle(e);
            node->pfd.events = G_IO_IN;
            node->is_external = is_external;
-            QLIST_INSERT_HEAD_RCU(&ctx->aio_handlers, node, node);
+            QLIST_INSERT_HEAD(&ctx->aio_handlers, node, node);

            g_source_add_poll(&ctx->source, &node->pfd);
        }
@@ -168,18 +147,9 @@ void aio_set_event_notifier(AioContext *ctx,
        node->io_notify = io_notify;
    }

-    qemu_lockcnt_unlock(&ctx->list_lock);
    aio_notify(ctx);
 }

-void aio_set_event_notifier_poll(AioContext *ctx,
-                                 EventNotifier *notifier,
-                                 EventNotifierHandler *io_poll_begin,
-                                 EventNotifierHandler *io_poll_end)
-{
-    /* Not implemented */
-}
-
 bool aio_prepare(AioContext *ctx)
 {
    static struct timeval tv0;
@@ -187,16 +157,10 @@ bool aio_prepare(AioContext *ctx)
    bool have_select_revents = false;
    fd_set rfds, wfds;

-    /*
-     * We have to walk very carefully in case aio_set_fd_handler is
-     * called while we're walking.
-     */
-    qemu_lockcnt_inc(&ctx->list_lock);
-
    /* fill fd sets */
    FD_ZERO(&rfds);
    FD_ZERO(&wfds);
-    QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
+    QLIST_FOREACH(node, &ctx->aio_handlers, node) {
        if (node->io_read) {
            FD_SET ((SOCKET)node->pfd.fd, &rfds);
        }
@@ -206,7 +170,7 @@ bool aio_prepare(AioContext *ctx)
    }

    if (select(0, &rfds, &wfds, NULL, &tv0) > 0) {
-        QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
+        QLIST_FOREACH(node, &ctx->aio_handlers, node) {
            node->pfd.revents = 0;
            if (FD_ISSET(node->pfd.fd, &rfds)) {
                node->pfd.revents |= G_IO_IN;
@@ -220,53 +184,45 @@ bool aio_prepare(AioContext *ctx)
        }
    }

-    qemu_lockcnt_dec(&ctx->list_lock);
    return have_select_revents;
 }

 bool aio_pending(AioContext *ctx)
 {
    AioHandler *node;
-    bool result = false;

-    /*
-     * We have to walk very carefully in case aio_set_fd_handler is
-     * called while we're walking.
-     */
-    qemu_lockcnt_inc(&ctx->list_lock);
-    QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
+    QLIST_FOREACH(node, &ctx->aio_handlers, node) {
        if (node->pfd.revents && node->io_notify) {
-            result = true;
-            break;
+            return true;
        }

        if ((node->pfd.revents & G_IO_IN) && node->io_read) {
-            result = true;
-            break;
+            return true;
        }
        if ((node->pfd.revents & G_IO_OUT) && node->io_write) {
-            result = true;
-            break;
+            return true;
        }
    }

-    qemu_lockcnt_dec(&ctx->list_lock);
-    return result;
+    return false;
 }

 static bool aio_dispatch_handlers(AioContext *ctx, HANDLE event)
 {
    AioHandler *node;
    bool progress = false;
-    AioHandler *tmp;

    /*
     * We have to walk very carefully in case aio_set_fd_handler is
     * called while we're walking.
     */
-    QLIST_FOREACH_SAFE_RCU(node, &ctx->aio_handlers, node, tmp) {
+    node = QLIST_FIRST(&ctx->aio_handlers);
+    while (node) {
+        AioHandler *tmp;
        int revents = node->pfd.revents;

+        ctx->walking_handlers++;
+
        if (!node->deleted &&
            (revents || event_notifier_get_handle(node->e) == event) &&
            node->io_notify) {
@@ -301,25 +257,28 @@ static bool aio_dispatch_handlers(AioContext *ctx, HANDLE event)
            }
        }

-        if (node->deleted) {
-            if (qemu_lockcnt_dec_if_lock(&ctx->list_lock)) {
-                QLIST_REMOVE(node, node);
-                g_free(node);
-                qemu_lockcnt_inc_and_unlock(&ctx->list_lock);
-            }
+        tmp = node;
+        node = QLIST_NEXT(node, node);
+
+        ctx->walking_handlers--;
+
+        if (!ctx->walking_handlers && tmp->deleted) {
+            QLIST_REMOVE(tmp, node);
+            g_free(tmp);
        }
    }

    return progress;
 }

-void aio_dispatch(AioContext *ctx)
+bool aio_dispatch(AioContext *ctx)
 {
-    qemu_lockcnt_inc(&ctx->list_lock);
-    aio_bh_poll(ctx);
-    aio_dispatch_handlers(ctx, INVALID_HANDLE_VALUE);
-    qemu_lockcnt_dec(&ctx->list_lock);
-    timerlistgroup_run_timers(&ctx->tlg);
+    bool progress;
+
+    progress = aio_bh_poll(ctx);
+    progress |= aio_dispatch_handlers(ctx, INVALID_HANDLE_VALUE);
+    progress |= timerlistgroup_run_timers(&ctx->tlg);
+    return progress;
 }

 bool aio_poll(AioContext *ctx, bool blocking)
@@ -330,6 +289,7 @@ bool aio_poll(AioContext *ctx, bool blocking)
    int count;
    int timeout;

+    aio_context_acquire(ctx);
    progress = false;

    /* aio_notify can avoid the expensive event_notifier_set if
@@ -343,18 +303,20 @@ bool aio_poll(AioContext *ctx, bool blocking)
        atomic_add(&ctx->notify_me, 2);
    }

-    qemu_lockcnt_inc(&ctx->list_lock);
    have_select_revents = aio_prepare(ctx);

+    ctx->walking_handlers++;
+
    /* fill fd sets */
    count = 0;
-    QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
+    QLIST_FOREACH(node, &ctx->aio_handlers, node) {
        if (!node->deleted && node->io_notify
            && aio_node_check(ctx, node->is_external)) {
            events[count++] = event_notifier_get_handle(node->e);
        }
    }

+    ctx->walking_handlers--;
    first = true;

    /* ctx->notifier is always registered.  */
@@ -370,11 +332,17 @@ bool aio_poll(AioContext *ctx, bool blocking)

        timeout = blocking && !have_select_revents
            ? qemu_timeout_ns_to_ms(aio_compute_timeout(ctx)) : 0;
+        if (timeout) {
+            aio_context_release(ctx);
+        }
        ret = WaitForMultipleObjects(count, events, FALSE, timeout);
        if (blocking) {
            assert(first);
            atomic_sub(&ctx->notify_me, 2);
        }
+        if (timeout) {
+            aio_context_acquire(ctx);
+        }

        if (first) {
            aio_notify_accept(ctx);
@@ -397,18 +365,12 @@ bool aio_poll(AioContext *ctx, bool blocking)
        progress |= aio_dispatch_handlers(ctx, event);
    } while (count > 0);

-    qemu_lockcnt_dec(&ctx->list_lock);
-
    progress |= timerlistgroup_run_timers(&ctx->tlg);
+
+    aio_context_release(ctx);
    return progress;
 }

 void aio_context_setup(AioContext *ctx)
 {
 }
-
-void aio_context_set_poll_params(AioContext *ctx, int64_t max_ns,
-                                 int64_t grow, int64_t shrink, Error **errp)
-{
-    error_setg(errp, "AioContext polling is not implemented on Windows");
-}
--- a/arch_init.c
+++ b/arch_init.c
@@ -27,7 +27,8 @@
 #include "sysemu/sysemu.h"
 #include "sysemu/arch_init.h"
 #include "hw/pci/pci.h"
-#include "hw/audio/soundhw.h"
+#include "hw/audio/audio.h"
+#include "hw/smbios/smbios.h"
 #include "qemu/config-file.h"
 #include "qemu/error-report.h"
 #include "qmp-commands.h"
@@ -63,8 +64,6 @@ int graphic_depth = 32;
 #define QEMU_ARCH QEMU_ARCH_MIPS
 #elif defined(TARGET_MOXIE)
 #define QEMU_ARCH QEMU_ARCH_MOXIE
-#elif defined(TARGET_NIOS2)
-#define QEMU_ARCH QEMU_ARCH_NIOS2
 #elif defined(TARGET_OPENRISC)
 #define QEMU_ARCH QEMU_ARCH_OPENRISC
 #elif defined(TARGET_PPC)
@@ -85,6 +84,177 @@ int graphic_depth = 32;

 const uint32_t arch_type = QEMU_ARCH;

+static struct defconfig_file {
+    const char *filename;
+    /* Indicates it is an user config file (disabled by -no-user-config) */
+    bool userconfig;
+} default_config_files[] = {
+    { CONFIG_QEMU_CONFDIR "/qemu.conf",                   true },
+    { NULL }, /* end of list */
+};
+
+int qemu_read_default_config_files(bool userconfig)
+{
+    int ret;
+    struct defconfig_file *f;
+
+    for (f = default_config_files; f->filename; f++) {
+        if (!userconfig && f->userconfig) {
+            continue;
+        }
+        ret = qemu_read_config_file(f->filename);
+        if (ret < 0 && ret != -ENOENT) {
+            return ret;
+        }
+    }
+
+    return 0;
+}
+
+struct soundhw {
+    const char *name;
+    const char *descr;
+    int enabled;
+    int isa;
+    union {
+        int (*init_isa) (ISABus *bus);
+        int (*init_pci) (PCIBus *bus);
+    } init;
+};
+
+static struct soundhw soundhw[9];
+static int soundhw_count;
+
+void isa_register_soundhw(const char *name, const char *descr,
+                          int (*init_isa)(ISABus *bus))
+{
+    assert(soundhw_count < ARRAY_SIZE(soundhw) - 1);
+    soundhw[soundhw_count].name = name;
+    soundhw[soundhw_count].descr = descr;
+    soundhw[soundhw_count].isa = 1;
+    soundhw[soundhw_count].init.init_isa = init_isa;
+    soundhw_count++;
+}
+
+void pci_register_soundhw(const char *name, const char *descr,
+                          int (*init_pci)(PCIBus *bus))
+{
+    assert(soundhw_count < ARRAY_SIZE(soundhw) - 1);
+    soundhw[soundhw_count].name = name;
+    soundhw[soundhw_count].descr = descr;
+    soundhw[soundhw_count].isa = 0;
+    soundhw[soundhw_count].init.init_pci = init_pci;
+    soundhw_count++;
+}
+
+void select_soundhw(const char *optarg)
+{
+    struct soundhw *c;
+
+    if (is_help_option(optarg)) {
+    show_valid_cards:
+
+        if (soundhw_count) {
+             printf("Valid sound card names (comma separated):\n");
+             for (c = soundhw; c->name; ++c) {
+                 printf ("%-11s %s\n", c->name, c->descr);
+             }
+             printf("\n-soundhw all will enable all of the above\n");
+        } else {
+             printf("Machine has no user-selectable audio hardware "
+                    "(it may or may not have always-present audio hardware).\n");
+        }
+        exit(!is_help_option(optarg));
+    }
+    else {
+        size_t l;
+        const char *p;
+        char *e;
+        int bad_card = 0;
+
+        if (!strcmp(optarg, "all")) {
+            for (c = soundhw; c->name; ++c) {
+                c->enabled = 1;
+            }
+            return;
+        }
+
+        p = optarg;
+        while (*p) {
+            e = strchr(p, ',');
+            l = !e ? strlen(p) : (size_t) (e - p);
+
+            for (c = soundhw; c->name; ++c) {
+                if (!strncmp(c->name, p, l) && !c->name[l]) {
+                    c->enabled = 1;
+                    break;
+                }
+            }
+
+            if (!c->name) {
+                if (l > 80) {
+                    error_report("Unknown sound card name (too big to show)");
+                }
+                else {
+                    error_report("Unknown sound card name `%.*s'",
+                                 (int) l, p);
+                }
+                bad_card = 1;
+            }
+            p += l + (e != NULL);
+        }
+
+        if (bad_card) {
+            goto show_valid_cards;
+        }
+    }
+}
+
+void audio_init(void)
+{
+    struct soundhw *c;
+    ISABus *isa_bus = (ISABus *) object_resolve_path_type("", TYPE_ISA_BUS, NULL);
+    PCIBus *pci_bus = (PCIBus *) object_resolve_path_type("", TYPE_PCI_BUS, NULL);
+
+    for (c = soundhw; c->name; ++c) {
+        if (c->enabled) {
+            if (c->isa) {
+                if (!isa_bus) {
+                    error_report("ISA bus not available for %s", c->name);
+                    exit(1);
+                }
+                c->init.init_isa(isa_bus);
+            } else {
+                if (!pci_bus) {
+                    error_report("PCI bus not available for %s", c->name);
+                    exit(1);
+                }
+                c->init.init_pci(pci_bus);
+            }
+        }
+    }
+}
+
+void do_acpitable_option(const QemuOpts *opts)
+{
+#ifdef TARGET_I386
+    Error *err = NULL;
+
+    acpi_table_add(opts, &err);
+    if (err) {
+        error_reportf_err(err, "Wrong acpi table provided: ");
+        exit(1);
+    }
+#endif
+}
+
+void do_smbios_option(QemuOpts *opts)
+{
+#ifdef TARGET_I386
+    smbios_entry_add(opts);
+#endif
+}
+
 int kvm_available(void)
 {
 #ifdef CONFIG_KVM
--- a/util/async.c
+++ b/util/async.c
@@ -1,8 +1,7 @@
 /*
- * Data plane event loop
+ * QEMU System Emulator
 *
 * Copyright (c) 2003-2008 Fabrice Bellard
- * Copyright (c) 2009-2017 QEMU contributors
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
@@ -31,8 +30,6 @@
 #include "qemu/main-loop.h"
 #include "qemu/atomic.h"
 #include "block/raw-aio.h"
-#include "qemu/coroutine_int.h"
-#include "trace.h"

 /***********************************************************/
 /* bottom halves (can be seen as timers which expire ASAP) */
@@ -56,14 +53,14 @@ void aio_bh_schedule_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque)
        .cb = cb,
        .opaque = opaque,
    };
-    qemu_lockcnt_lock(&ctx->list_lock);
+    qemu_mutex_lock(&ctx->bh_lock);
    bh->next = ctx->first_bh;
    bh->scheduled = 1;
    bh->deleted = 1;
    /* Make sure that the members are ready before putting bh into list */
    smp_wmb();
    ctx->first_bh = bh;
-    qemu_lockcnt_unlock(&ctx->list_lock);
+    qemu_mutex_unlock(&ctx->bh_lock);
    aio_notify(ctx);
 }

@@ -76,12 +73,12 @@ QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque)
        .cb = cb,
        .opaque = opaque,
    };
-    qemu_lockcnt_lock(&ctx->list_lock);
+    qemu_mutex_lock(&ctx->bh_lock);
    bh->next = ctx->first_bh;
    /* Make sure that the members are ready before putting bh into list */
    smp_wmb();
    ctx->first_bh = bh;
-    qemu_lockcnt_unlock(&ctx->list_lock);
+    qemu_mutex_unlock(&ctx->bh_lock);
    return bh;
 }

@@ -90,19 +87,19 @@ void aio_bh_call(QEMUBH *bh)
    bh->cb(bh->opaque);
 }

-/* Multiple occurrences of aio_bh_poll cannot be called concurrently.
- * The count in ctx->list_lock is incremented before the call, and is
- * not affected by the call.
- */
+/* Multiple occurrences of aio_bh_poll cannot be called concurrently */
 int aio_bh_poll(AioContext *ctx)
 {
    QEMUBH *bh, **bhp, *next;
    int ret;
-    bool deleted = false;
+
+    ctx->walking_bh++;

    ret = 0;
-    for (bh = atomic_rcu_read(&ctx->first_bh); bh; bh = next) {
-        next = atomic_rcu_read(&bh->next);
+    for (bh = ctx->first_bh; bh; bh = next) {
+        /* Make sure that fetching bh happens before accessing its members */
+        smp_read_barrier_depends();
+        next = bh->next;
        /* The atomic_xchg is paired with the one in qemu_bh_schedule.  The
         * implicit memory barrier ensures that the callback sees all writes
         * done by the scheduling thread.  It also ensures that the scheduling
@@ -117,17 +114,13 @@ int aio_bh_poll(AioContext *ctx)
            bh->idle = 0;
            aio_bh_call(bh);
        }
-        if (bh->deleted) {
-            deleted = true;
-        }
    }

+    ctx->walking_bh--;
+
    /* remove deleted bhs */
-    if (!deleted) {
-        return ret;
-    }
-
-    if (qemu_lockcnt_dec_if_lock(&ctx->list_lock)) {
+    if (!ctx->walking_bh) {
+        qemu_mutex_lock(&ctx->bh_lock);
        bhp = &ctx->first_bh;
        while (*bhp) {
            bh = *bhp;
@@ -138,8 +131,9 @@ int aio_bh_poll(AioContext *ctx)
                bhp = &bh->next;
            }
        }
-        qemu_lockcnt_inc_and_unlock(&ctx->list_lock);
+        qemu_mutex_unlock(&ctx->bh_lock);
    }
+
    return ret;
 }

@@ -193,8 +187,7 @@ aio_compute_timeout(AioContext *ctx)
    int timeout = -1;
    QEMUBH *bh;

-    for (bh = atomic_rcu_read(&ctx->first_bh); bh;
-         bh = atomic_rcu_read(&bh->next)) {
+    for (bh = ctx->first_bh; bh; bh = bh->next) {
        if (bh->scheduled) {
            if (bh->idle) {
                /* idle bottom halves will be polled at least
@@ -277,11 +270,7 @@ aio_ctx_finalize(GSource     *source)
    }
 #endif

-    assert(QSLIST_EMPTY(&ctx->scheduled_coroutines));
-    qemu_bh_delete(ctx->co_schedule_bh);
-
-    qemu_lockcnt_lock(&ctx->list_lock);
-    assert(!qemu_lockcnt_count(&ctx->list_lock));
+    qemu_mutex_lock(&ctx->bh_lock);
    while (ctx->first_bh) {
        QEMUBH *next = ctx->first_bh->next;

@@ -291,12 +280,12 @@ aio_ctx_finalize(GSource     *source)
        g_free(ctx->first_bh);
        ctx->first_bh = next;
    }
-    qemu_lockcnt_unlock(&ctx->list_lock);
+    qemu_mutex_unlock(&ctx->bh_lock);

-    aio_set_event_notifier(ctx, &ctx->notifier, false, NULL, NULL);
+    aio_set_event_notifier(ctx, &ctx->notifier, false, NULL);
    event_notifier_cleanup(&ctx->notifier);
    qemu_rec_mutex_destroy(&ctx->lock);
-    qemu_lockcnt_destroy(&ctx->list_lock);
+    qemu_mutex_destroy(&ctx->bh_lock);
    timerlistgroup_deinit(&ctx->tlg);
 }

@@ -351,7 +340,7 @@ void aio_notify_accept(AioContext *ctx)
    }
 }

-static void aio_timerlist_notify(void *opaque, QEMUClockType type)
+static void aio_timerlist_notify(void *opaque)
 {
    aio_notify(opaque);
 }
@@ -360,39 +349,6 @@ static void event_notifier_dummy_cb(EventNotifier *e)
 {
 }

-/* Returns true if aio_notify() was called (e.g. a BH was scheduled) */
-static bool event_notifier_poll(void *opaque)
-{
-    EventNotifier *e = opaque;
-    AioContext *ctx = container_of(e, AioContext, notifier);
-
-    return atomic_read(&ctx->notified);
-}
-
-static void co_schedule_bh_cb(void *opaque)
-{
-    AioContext *ctx = opaque;
-    QSLIST_HEAD(, Coroutine) straight, reversed;
-
-    QSLIST_MOVE_ATOMIC(&reversed, &ctx->scheduled_coroutines);
-    QSLIST_INIT(&straight);
-
-    while (!QSLIST_EMPTY(&reversed)) {
-        Coroutine *co = QSLIST_FIRST(&reversed);
-        QSLIST_REMOVE_HEAD(&reversed, co_scheduled_next);
-        QSLIST_INSERT_HEAD(&straight, co, co_scheduled_next);
-    }
-
-    while (!QSLIST_EMPTY(&straight)) {
-        Coroutine *co = QSLIST_FIRST(&straight);
-        QSLIST_REMOVE_HEAD(&straight, co_scheduled_next);
-        trace_aio_co_schedule_bh_cb(ctx, co);
-        aio_context_acquire(ctx);
-        qemu_coroutine_enter(co);
-        aio_context_release(ctx);
-    }
-}
-
 AioContext *aio_context_new(Error **errp)
 {
    int ret;
@@ -407,73 +363,24 @@ AioContext *aio_context_new(Error **errp)
        goto fail;
    }
    g_source_set_can_recurse(&ctx->source, true);
-    qemu_lockcnt_init(&ctx->list_lock);
-
-    ctx->co_schedule_bh = aio_bh_new(ctx, co_schedule_bh_cb, ctx);
-    QSLIST_INIT(&ctx->scheduled_coroutines);
-
    aio_set_event_notifier(ctx, &ctx->notifier,
                           false,
                           (EventNotifierHandler *)
-                           event_notifier_dummy_cb,
-                           event_notifier_poll);
+                           event_notifier_dummy_cb);
 #ifdef CONFIG_LINUX_AIO
    ctx->linux_aio = NULL;
 #endif
    ctx->thread_pool = NULL;
+    qemu_mutex_init(&ctx->bh_lock);
    qemu_rec_mutex_init(&ctx->lock);
    timerlistgroup_init(&ctx->tlg, aio_timerlist_notify, ctx);

-    ctx->poll_ns = 0;
-    ctx->poll_max_ns = 0;
-    ctx->poll_grow = 0;
-    ctx->poll_shrink = 0;
-
    return ctx;
 fail:
    g_source_destroy(&ctx->source);
    return NULL;
 }

-void aio_co_schedule(AioContext *ctx, Coroutine *co)
-{
-    trace_aio_co_schedule(ctx, co);
-    QSLIST_INSERT_HEAD_ATOMIC(&ctx->scheduled_coroutines,
-                              co, co_scheduled_next);
-    qemu_bh_schedule(ctx->co_schedule_bh);
-}
-
-void aio_co_wake(struct Coroutine *co)
-{
-    AioContext *ctx;
-
-    /* Read coroutine before co->ctx.  Matches smp_wmb in
-     * qemu_coroutine_enter.
-     */
-    smp_read_barrier_depends();
-    ctx = atomic_read(&co->ctx);
-
-    aio_co_enter(ctx, co);
-}
-
-void aio_co_enter(AioContext *ctx, struct Coroutine *co)
-{
-    if (ctx != qemu_get_current_aio_context()) {
-        aio_co_schedule(ctx, co);
-        return;
-    }
-
-    if (qemu_in_coroutine()) {
-        Coroutine *self = qemu_coroutine_self();
-        assert(self != co);
-        QSIMPLEQ_INSERT_TAIL(&self->co_queue_wakeup, co, co_queue_next);
-    } else {
-        aio_context_acquire(ctx);
-        qemu_aio_coroutine_enter(ctx, co);
-        aio_context_release(ctx);
-    }
-}
-
 void aio_context_ref(AioContext *ctx)
 {
    g_source_ref(&ctx->source);
--- a/accel/tcg/atomic_template.h
+++ b/accel/tcg/atomic_template.h
--- a/audio/audio.c
+++ b/audio/audio.c
@@ -28,7 +28,6 @@
 #include "qemu/timer.h"
 #include "sysemu/sysemu.h"
 #include "qemu/cutils.h"
-#include "sysemu/replay.h"

 #define AUDIO_CAP "audio"
 #include "audio_int.h"
@@ -1113,7 +1112,7 @@ static int audio_is_timer_needed (void)
 static void audio_reset_timer (AudioState *s)
 {
    if (audio_is_timer_needed ()) {
-        timer_mod_anticipate_ns(s->ts,
+        timer_mod (s->ts,
            qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + conf.period.ticks);
    }
    else {
@@ -1388,7 +1387,6 @@ static void audio_run_out (AudioState *s)

        prev_rpos = hw->rpos;
        played = hw->pcm_ops->run_out (hw, live);
-        replay_audio_out(&played);
        if (audio_bug (AUDIO_FUNC, hw->rpos >= hw->samples)) {
            dolog ("hw->rpos=%d hw->samples=%d played=%d\n",
                   hw->rpos, hw->samples, played);
@@ -1452,12 +1450,9 @@ static void audio_run_in (AudioState *s)

    while ((hw = audio_pcm_hw_find_any_enabled_in (hw))) {
        SWVoiceIn *sw;
-        int captured = 0, min;
+        int captured, min;

-        if (replay_mode != REPLAY_MODE_PLAY) {
-            captured = hw->pcm_ops->run_in(hw);
-        }
-        replay_audio_in(&captured, hw->conv_buf, &hw->wpos, hw->samples);
+        captured = hw->pcm_ops->run_in (hw);

        min = audio_pcm_hw_find_min_in (hw);
        hw->total_samples_captured += captured - min;
@@ -2028,8 +2023,6 @@ void AUD_del_capture (CaptureVoiceOut *cap, void *cb_opaque)
                    sw = sw1;
                }
                QLIST_REMOVE (cap, entries);
-                g_free (cap->hw.mix_buf);
-                g_free (cap->buf);
                g_free (cap);
            }
            return;
--- a/audio/audio.h
+++ b/audio/audio.h
@@ -166,9 +166,4 @@ int wav_start_capture (CaptureState *s, const char *path, int freq,
 bool audio_is_cleaning_up(void);
 void audio_cleanup(void);

-void audio_sample_to_uint64(void *samples, int pos,
-                            uint64_t *left, uint64_t *right);
-void audio_sample_from_uint64(void *samples, int pos,
-                            uint64_t left, uint64_t right);
-
 #endif /* QEMU_AUDIO_H */
--- a/audio/mixeng.c
+++ b/audio/mixeng.c
@@ -25,7 +25,6 @@
 #include "qemu/osdep.h"
 #include "qemu-common.h"
 #include "qemu/bswap.h"
-#include "qemu/error-report.h"
 #include "audio.h"

 #define AUDIO_CAP "mixeng"
@@ -268,37 +267,6 @@ f_sample *mixeng_clip[2][2][2][3] = {
    }
 };

-
-void audio_sample_to_uint64(void *samples, int pos,
-                            uint64_t *left, uint64_t *right)
-{
-    struct st_sample *sample = samples;
-    sample += pos;
-#ifdef FLOAT_MIXENG
-    error_report(
-        "Coreaudio and floating point samples are not supported by replay yet");
-    abort();
-#else
-    *left = sample->l;
-    *right = sample->r;
-#endif
-}
-
-void audio_sample_from_uint64(void *samples, int pos,
-                            uint64_t left, uint64_t right)
-{
-    struct st_sample *sample = samples;
-    sample += pos;
-#ifdef FLOAT_MIXENG
-    error_report(
-        "Coreaudio and floating point samples are not supported by replay yet");
-    abort();
-#else
-    sample->l = left;
-    sample->r = right;
-#endif
-}
-
 /*
 * August 21, 1998
 * Copyright 1998 Fabrice Bellard.
--- a/audio/rate_template.h
+++ b/audio/rate_template.h
@@ -71,12 +71,6 @@ void NAME (void *opaque, struct st_sample *ibuf, struct st_sample *obuf,
        while (rate->ipos <= (rate->opos >> 32)) {
            ilast = *ibuf++;
            rate->ipos++;
-
-            /* if ipos overflow, there is  a infinite loop */
-            if (rate->ipos == 0xffffffff) {
-                rate->ipos = 1;
-                rate->opos = rate->opos & 0xffffffff;
-            }
            /* See if we finished the input buffer yet */
            if (ibuf >= iend) {
                goto the_end;
--- a/audio/sdlaudio.c
+++ b/audio/sdlaudio.c
@@ -38,14 +38,10 @@
 #define AUDIO_CAP "sdl"
 #include "audio_int.h"

-#define USE_SEMAPHORE (SDL_MAJOR_VERSION < 2)
-
 typedef struct SDLVoiceOut {
    HWVoiceOut hw;
    int live;
-#if USE_SEMAPHORE
    int rpos;
-#endif
    int decr;
 } SDLVoiceOut;

@@ -57,10 +53,8 @@ static struct {

 static struct SDLAudioState {
    int exit;
-#if USE_SEMAPHORE
    SDL_mutex *mutex;
    SDL_sem *sem;
-#endif
    int initialized;
    bool driver_created;
 } glob_sdl;
@@ -79,45 +73,31 @@ static void GCC_FMT_ATTR (1, 2) sdl_logerr (const char *fmt, ...)

 static int sdl_lock (SDLAudioState *s, const char *forfn)
 {
-#if USE_SEMAPHORE
    if (SDL_LockMutex (s->mutex)) {
        sdl_logerr ("SDL_LockMutex for %s failed\n", forfn);
        return -1;
    }
-#else
-    SDL_LockAudio();
-#endif
-
    return 0;
 }

 static int sdl_unlock (SDLAudioState *s, const char *forfn)
 {
-#if USE_SEMAPHORE
    if (SDL_UnlockMutex (s->mutex)) {
        sdl_logerr ("SDL_UnlockMutex for %s failed\n", forfn);
        return -1;
    }
-#else
-    SDL_UnlockAudio();
-#endif
-
    return 0;
 }

 static int sdl_post (SDLAudioState *s, const char *forfn)
 {
-#if USE_SEMAPHORE
    if (SDL_SemPost (s->sem)) {
        sdl_logerr ("SDL_SemPost for %s failed\n", forfn);
        return -1;
    }
-#endif
-
    return 0;
 }

-#if USE_SEMAPHORE
 static int sdl_wait (SDLAudioState *s, const char *forfn)
 {
    if (SDL_SemWait (s->sem)) {
@@ -126,7 +106,6 @@ static int sdl_wait (SDLAudioState *s, const char *forfn)
    }
    return 0;
 }
-#endif

 static int sdl_unlock_and_post (SDLAudioState *s, const char *forfn)
 {
@@ -267,7 +246,6 @@ static void sdl_callback (void *opaque, Uint8 *buf, int len)
        int to_mix, decr;

        /* dolog ("in callback samples=%d\n", samples); */
-#if USE_SEMAPHORE
        sdl_wait (s, "sdl_callback");
        if (s->exit) {
            return;
@@ -286,11 +264,6 @@ static void sdl_callback (void *opaque, Uint8 *buf, int len)
        if (!sdl->live) {
            goto again;
        }
-#else
-        if (s->exit || !sdl->live) {
-            break;
-        }
-#endif

        /* dolog ("in callback live=%d\n", live); */
        to_mix = audio_MIN (samples, sdl->live);
@@ -301,11 +274,7 @@ static void sdl_callback (void *opaque, Uint8 *buf, int len)

            /* dolog ("in callback to_mix %d, chunk %d\n", to_mix, chunk); */
            hw->clip (buf, src, chunk);
-#if USE_SEMAPHORE
            sdl->rpos = (sdl->rpos + chunk) % hw->samples;
-#else
-            hw->rpos = (hw->rpos + chunk) % hw->samples;
-#endif
            to_mix -= chunk;
            buf += chunk << hw->info.shift;
        }
@@ -313,21 +282,12 @@ static void sdl_callback (void *opaque, Uint8 *buf, int len)
        sdl->live -= decr;
        sdl->decr += decr;

-#if USE_SEMAPHORE
    again:
        if (sdl_unlock (s, "sdl_callback")) {
            return;
        }
-#endif
    }
    /* dolog ("done len=%d\n", len); */
-
-#if (SDL_MAJOR_VERSION >= 2)
-    /* SDL2 does not clear the remaining buffer for us, so do it on our own */
-    if (samples) {
-        memset(buf, 0, samples << hw->info.shift);
-    }
-#endif
 }

 static int sdl_write_out (SWVoiceOut *sw, void *buf, int len)
@@ -355,12 +315,8 @@ static int sdl_run_out (HWVoiceOut *hw, int live)
    decr = audio_MIN (sdl->decr, live);
    sdl->decr -= decr;

-#if USE_SEMAPHORE
    sdl->live = live - decr;
    hw->rpos = sdl->rpos;
-#else
-    sdl->live = live;
-#endif

    if (sdl->live > 0) {
        sdl_unlock_and_post (s, "sdl_run_out");
@@ -449,7 +405,6 @@ static void *sdl_audio_init (void)
        return NULL;
    }

-#if USE_SEMAPHORE
    s->mutex = SDL_CreateMutex ();
    if (!s->mutex) {
        sdl_logerr ("Failed to create SDL mutex\n");
@@ -464,7 +419,6 @@ static void *sdl_audio_init (void)
        SDL_QuitSubSystem (SDL_INIT_AUDIO);
        return NULL;
    }
-#endif

    s->driver_created = true;
    return s;
@@ -474,10 +428,8 @@ static void sdl_audio_fini (void *opaque)
 {
    SDLAudioState *s = opaque;
    sdl_close (s);
-#if USE_SEMAPHORE
    SDL_DestroySemaphore (s->sem);
    SDL_DestroyMutex (s->mutex);
-#endif
    SDL_QuitSubSystem (SDL_INIT_AUDIO);
    s->driver_created = false;
 }
--- a/audio/trace-events
+++ b/audio/trace-events
@@ -1,9 +1,9 @@
-# See docs/devel/tracing.txt for syntax documentation.
+# See docs/tracing.txt for syntax documentation.

 # audio/alsaaudio.c
 alsa_revents(int revents) "revents = %d"
 alsa_pollout(int i, int fd) "i = %d fd = %d"
-alsa_set_handler(int events, int index, int fd, int err) "events=0x%x index=%d fd=%d err=%d"
+alsa_set_handler(int events, int index, int fd, int err) "events=%#x index=%d fd=%d err=%d"
 alsa_wrote_zero(int len) "Failed to write %d frames (wrote zero)"
 alsa_read_zero(long len) "Failed to read %ld frames (read zero)"
 alsa_xrun_out(void) "Recovering from playback xrun"
@@ -13,5 +13,5 @@ alsa_resume_in(void) "Resuming suspended input stream"
 alsa_no_frames(int state) "No frames available and ALSA state is %d"

 # audio/ossaudio.c
-oss_version(int version) "OSS version = 0x%x"
+oss_version(int version) "OSS version = %#x"
 oss_invalid_available_size(int size, int bufsize) "Invalid available size, size=%d bufsize=%d"
--- a/audio/wavcapture.c
+++ b/audio/wavcapture.c
@@ -88,7 +88,6 @@ static void wav_capture_destroy (void *opaque)
    WAVState *wav = opaque;

    AUD_del_capture (wav->cap, wav);
-    g_free (wav);
 }

 static void wav_capture_info (void *opaque)
--- a/backends/Makefile.objs
+++ b/backends/Makefile.objs
@@ -1,6 +1,10 @@
 common-obj-y += rng.o rng-egd.o
 common-obj-$(CONFIG_POSIX) += rng-random.o

+common-obj-y += msmouse.o testdev.o
+common-obj-$(CONFIG_BRLAPI) += baum.o
+baum.o-cflags := $(SDL_CFLAGS)
+
 common-obj-$(CONFIG_TPM) += tpm.o

 common-obj-y += hostmem.o hostmem-ram.o
--- a/backends/baum.c
+++ b/backends/baum.c
@@ -24,13 +24,15 @@
 #include "qemu/osdep.h"
 #include "qapi/error.h"
 #include "qemu-common.h"
-#include "chardev/char.h"
+#include "sysemu/char.h"
 #include "qemu/timer.h"
 #include "hw/usb.h"
-#include "ui/console.h"
 #include <brlapi.h>
 #include <brlapi_constants.h>
 #include <brlapi_keycodes.h>
+#ifdef CONFIG_SDL
+#include <SDL_syswm.h>
+#endif

 #if 0
 #define DPRINTF(fmt, ...) \
@@ -85,7 +87,7 @@
 #define BUF_SIZE 256

 typedef struct {
-    Chardev parent;
+    CharDriverState *chr;

    brlapi_handle_t *brlapi;
    int brlapi_fd;
@@ -98,10 +100,7 @@ typedef struct {
    uint8_t out_buf_used, out_buf_ptr;

    QEMUTimer *cellCount_timer;
-} BaumChardev;
-
-#define TYPE_CHARDEV_BRAILLE "chardev-braille"
-#define BAUM_CHARDEV(obj) OBJECT_CHECK(BaumChardev, (obj), TYPE_CHARDEV_BRAILLE)
+} BaumDriverState;

 /* Let's assume NABCC by default */
 enum way {
@@ -226,10 +225,14 @@ static const uint8_t nabcc_translation[2][256] = {
 };

 /* The guest OS has started discussing with us, finish initializing BrlAPI */
-static int baum_deferred_init(BaumChardev *baum)
+static int baum_deferred_init(BaumDriverState *baum)
 {
-    int tty = BRLAPI_TTY_DEFAULT;
-    QemuConsole *con;
+#if defined(CONFIG_SDL)
+#if SDL_COMPILEDVERSION < SDL_VERSIONNUM(2, 0, 0)
+    SDL_SysWMinfo info;
+#endif
+#endif
+    int tty;

    if (baum->deferred_init) {
        return 1;
@@ -240,12 +243,21 @@ static int baum_deferred_init(BaumChardev *baum)
        return 0;
    }

-    con = qemu_console_lookup_by_index(0);
-    if (con && qemu_console_is_graphic(con)) {
-        tty = qemu_console_get_window_id(con);
-        if (tty == -1)
-            tty = BRLAPI_TTY_DEFAULT;
+#if defined(CONFIG_SDL)
+#if SDL_COMPILEDVERSION < SDL_VERSIONNUM(2, 0, 0)
+    memset(&info, 0, sizeof(info));
+    SDL_VERSION(&info.version);
+    if (SDL_GetWMInfo(&info)) {
+        tty = info.info.x11.wmwindow;
+    } else {
+#endif
+#endif
+        tty = BRLAPI_TTY_DEFAULT;
+#if defined(CONFIG_SDL)
+#if SDL_COMPILEDVERSION < SDL_VERSIONNUM(2, 0, 0)
    }
+#endif
+#endif

    if (brlapi__enterTtyMode(baum->brlapi, tty, NULL) == -1) {
        brlapi_perror("baum: brlapi__enterTtyMode");
@@ -256,9 +268,9 @@ static int baum_deferred_init(BaumChardev *baum)
 }

 /* The serial port can receive more of our data */
-static void baum_chr_accept_input(struct Chardev *chr)
+static void baum_accept_input(struct CharDriverState *chr)
 {
-    BaumChardev *baum = BAUM_CHARDEV(chr);
+    BaumDriverState *baum = chr->opaque;
    int room, first;

    if (!baum->out_buf_used)
@@ -282,25 +294,24 @@ static void baum_chr_accept_input(struct Chardev *chr)
 }

 /* We want to send a packet */
-static void baum_write_packet(BaumChardev *baum, const uint8_t *buf, int len)
+static void baum_write_packet(BaumDriverState *baum, const uint8_t *buf, int len)
 {
-    Chardev *chr = CHARDEV(baum);
    uint8_t io_buf[1 + 2 * len], *cur = io_buf;
    int room;
    *cur++ = ESC;
    while (len--)
        if ((*cur++ = *buf++) == ESC)
            *cur++ = ESC;
-    room = qemu_chr_be_can_write(chr);
+    room = qemu_chr_be_can_write(baum->chr);
    len = cur - io_buf;
    if (len <= room) {
        /* Fits */
-        qemu_chr_be_write(chr, io_buf, len);
+        qemu_chr_be_write(baum->chr, io_buf, len);
    } else {
        int first;
        uint8_t out;
        /* Can't fit all, send what can be, and store the rest. */
-        qemu_chr_be_write(chr, io_buf, room);
+        qemu_chr_be_write(baum->chr, io_buf, room);
        len -= room;
        cur = io_buf + room;
        if (len > BUF_SIZE - baum->out_buf_used) {
@@ -325,14 +336,14 @@ static void baum_write_packet(BaumChardev *baum, const uint8_t *buf, int len)
 /* Called when the other end seems to have a wrong idea of our display size */
 static void baum_cellCount_timer_cb(void *opaque)
 {
-    BaumChardev *baum = BAUM_CHARDEV(opaque);
+    BaumDriverState *baum = opaque;
    uint8_t cell_count[] = { BAUM_RSP_CellCount, baum->x * baum->y };
    DPRINTF("Timeout waiting for DisplayData, sending cell count\n");
    baum_write_packet(baum, cell_count, sizeof(cell_count));
 }

 /* Try to interpret a whole incoming packet */
-static int baum_eat_packet(BaumChardev *baum, const uint8_t *buf, int len)
+static int baum_eat_packet(BaumDriverState *baum, const uint8_t *buf, int len)
 {
    const uint8_t *cur = buf;
    uint8_t req = 0;
@@ -473,9 +484,9 @@ static int baum_eat_packet(BaumChardev *baum, const uint8_t *buf, int len)
 }

 /* The other end is writing some data.  Store it and try to interpret */
-static int baum_chr_write(Chardev *chr, const uint8_t *buf, int len)
+static int baum_write(CharDriverState *chr, const uint8_t *buf, int len)
 {
-    BaumChardev *baum = BAUM_CHARDEV(chr);
+    BaumDriverState *baum = chr->opaque;
    int tocopy, cur, eaten, orig_len = len;

    if (!len)
@@ -514,16 +525,14 @@ static int baum_chr_write(Chardev *chr, const uint8_t *buf, int len)
 }

 /* Send the key code to the other end */
-static void baum_send_key(BaumChardev *baum, uint8_t type, uint8_t value)
-{
+static void baum_send_key(BaumDriverState *baum, uint8_t type, uint8_t value) {
    uint8_t packet[] = { type, value };
    DPRINTF("writing key %x %x\n", type, value);
    baum_write_packet(baum, packet, sizeof(packet));
 }

-static void baum_send_key2(BaumChardev *baum, uint8_t type, uint8_t value,
-                           uint8_t value2)
-{
+static void baum_send_key2(BaumDriverState *baum, uint8_t type, uint8_t value,
+                           uint8_t value2) {
    uint8_t packet[] = { type, value, value2 };
    DPRINTF("writing key %x %x\n", type, value);
    baum_write_packet(baum, packet, sizeof(packet));
@@ -532,7 +541,7 @@ static void baum_send_key2(BaumChardev *baum, uint8_t type, uint8_t value,
 /* We got some data on the BrlAPI socket */
 static void baum_chr_read(void *opaque)
 {
-    BaumChardev *baum = BAUM_CHARDEV(opaque);
+    BaumDriverState *baum = opaque;
    brlapi_keyCode_t code;
    int ret;
    if (!baum->brlapi)
@@ -616,25 +625,41 @@ static void baum_chr_read(void *opaque)
    }
 }

-static void char_braille_finalize(Object *obj)
+static void baum_free(struct CharDriverState *chr)
 {
-    BaumChardev *baum = BAUM_CHARDEV(obj);
+    BaumDriverState *baum = chr->opaque;

    timer_free(baum->cellCount_timer);
    if (baum->brlapi) {
        brlapi__closeConnection(baum->brlapi);
        g_free(baum->brlapi);
    }
+    g_free(baum);
 }

-static void baum_chr_open(Chardev *chr,
-                          ChardevBackend *backend,
-                          bool *be_opened,
-                          Error **errp)
+static CharDriverState *chr_baum_init(const char *id,
+                                      ChardevBackend *backend,
+                                      ChardevReturn *ret,
+                                      bool *be_opened,
+                                      Error **errp)
 {
-    BaumChardev *baum = BAUM_CHARDEV(chr);
+    ChardevCommon *common = backend->u.braille.data;
+    BaumDriverState *baum;
+    CharDriverState *chr;
    brlapi_handle_t *handle;

+    chr = qemu_chr_alloc(common, errp);
+    if (!chr) {
+        return NULL;
+    }
+    baum = g_malloc0(sizeof(BaumDriverState));
+    baum->chr = chr;
+
+    chr->opaque = baum;
+    chr->chr_write = baum_write;
+    chr->chr_accept_input = baum_accept_input;
+    chr->chr_free = baum_free;
+
    handle = g_malloc0(brlapi_getHandleSize());
    baum->brlapi = handle;

@@ -642,36 +667,27 @@ static void baum_chr_open(Chardev *chr,
    if (baum->brlapi_fd == -1) {
        error_setg(errp, "brlapi__openConnection: %s",
                   brlapi_strerror(brlapi_error_location()));
-        g_free(handle);
-        return;
+        goto fail_handle;
    }
    baum->deferred_init = 0;

    baum->cellCount_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, baum_cellCount_timer_cb, baum);

    qemu_set_fd_handler(baum->brlapi_fd, baum_chr_read, NULL, baum);
+
+    return chr;
+
+fail_handle:
+    g_free(handle);
+    g_free(chr);
+    g_free(baum);
+    return NULL;
 }

-static void char_braille_class_init(ObjectClass *oc, void *data)
-{
-    ChardevClass *cc = CHARDEV_CLASS(oc);
-
-    cc->open = baum_chr_open;
-    cc->chr_write = baum_chr_write;
-    cc->chr_accept_input = baum_chr_accept_input;
-}
-
-static const TypeInfo char_braille_type_info = {
-    .name = TYPE_CHARDEV_BRAILLE,
-    .parent = TYPE_CHARDEV,
-    .instance_size = sizeof(BaumChardev),
-    .instance_finalize = char_braille_finalize,
-    .class_init = char_braille_class_init,
-};
-
 static void register_types(void)
 {
-    type_register_static(&char_braille_type_info);
+    register_char_driver("braille", CHARDEV_BACKEND_KIND_BRAILLE, NULL,
+                         chr_baum_init);
 }

 type_init(register_types);
--- a/backends/cryptodev-builtin.c
+++ b/backends/cryptodev-builtin.c
@@ -94,8 +94,6 @@ static void cryptodev_builtin_init(
    backend->conf.max_size = LONG_MAX - sizeof(CryptoDevBackendSymOpInfo);
    backend->conf.max_cipher_key_len = CRYPTODEV_BUITLIN_MAX_CIPHER_KEY_LEN;
    backend->conf.max_auth_key_len = CRYPTODEV_BUITLIN_MAX_AUTH_KEY_LEN;
-
-    cryptodev_backend_set_ready(backend, true);
 }

 static int
@@ -113,42 +111,23 @@ cryptodev_builtin_get_unused_session_index(
    return -1;
 }

-#define AES_KEYSIZE_128 16
-#define AES_KEYSIZE_192 24
-#define AES_KEYSIZE_256 32
-#define AES_KEYSIZE_128_XTS AES_KEYSIZE_256
-#define AES_KEYSIZE_256_XTS 64
-
 static int
-cryptodev_builtin_get_aes_algo(uint32_t key_len, int mode, Error **errp)
+cryptodev_builtin_get_aes_algo(uint32_t key_len, Error **errp)
 {
    int algo;

-    if (key_len == AES_KEYSIZE_128) {
+    if (key_len == 128 / 8) {
        algo = QCRYPTO_CIPHER_ALG_AES_128;
-    } else if (key_len == AES_KEYSIZE_192) {
+    } else if (key_len == 192 / 8) {
        algo = QCRYPTO_CIPHER_ALG_AES_192;
-    } else if (key_len == AES_KEYSIZE_256) { /* equals AES_KEYSIZE_128_XTS */
-        if (mode == QCRYPTO_CIPHER_MODE_XTS) {
-            algo = QCRYPTO_CIPHER_ALG_AES_128;
-        } else {
-            algo = QCRYPTO_CIPHER_ALG_AES_256;
-        }
-    } else if (key_len == AES_KEYSIZE_256_XTS) {
-        if (mode == QCRYPTO_CIPHER_MODE_XTS) {
-            algo = QCRYPTO_CIPHER_ALG_AES_256;
-        } else {
-            goto err;
-        }
+    } else if (key_len == 256 / 8) {
+        algo = QCRYPTO_CIPHER_ALG_AES_256;
    } else {
-        goto err;
+        error_setg(errp, "Unsupported key length :%u", key_len);
+        return -1;
    }

    return algo;
-
-err:
-   error_setg(errp, "Unsupported key length :%u", key_len);
-   return -1;
 }

 static int cryptodev_builtin_create_cipher_session(
@@ -176,48 +155,32 @@ static int cryptodev_builtin_create_cipher_session(

    switch (sess_info->cipher_alg) {
    case VIRTIO_CRYPTO_CIPHER_AES_ECB:
-        mode = QCRYPTO_CIPHER_MODE_ECB;
        algo = cryptodev_builtin_get_aes_algo(sess_info->key_len,
-                                                    mode, errp);
+                                                          errp);
        if (algo < 0)  {
            return -1;
        }
+        mode = QCRYPTO_CIPHER_MODE_ECB;
        break;
    case VIRTIO_CRYPTO_CIPHER_AES_CBC:
-        mode = QCRYPTO_CIPHER_MODE_CBC;
        algo = cryptodev_builtin_get_aes_algo(sess_info->key_len,
-                                                    mode, errp);
+                                                          errp);
        if (algo < 0)  {
            return -1;
        }
+        mode = QCRYPTO_CIPHER_MODE_CBC;
        break;
    case VIRTIO_CRYPTO_CIPHER_AES_CTR:
+        algo = cryptodev_builtin_get_aes_algo(sess_info->key_len,
+                                                          errp);
+        if (algo < 0)  {
+            return -1;
+        }
        mode = QCRYPTO_CIPHER_MODE_CTR;
-        algo = cryptodev_builtin_get_aes_algo(sess_info->key_len,
-                                                    mode, errp);
-        if (algo < 0)  {
-            return -1;
-        }
        break;
-    case VIRTIO_CRYPTO_CIPHER_AES_XTS:
-        mode = QCRYPTO_CIPHER_MODE_XTS;
-        algo = cryptodev_builtin_get_aes_algo(sess_info->key_len,
-                                                    mode, errp);
-        if (algo < 0)  {
-            return -1;
-        }
-        break;
-    case VIRTIO_CRYPTO_CIPHER_3DES_ECB:
+    case VIRTIO_CRYPTO_CIPHER_DES_ECB:
+        algo = QCRYPTO_CIPHER_ALG_DES_RFB;
        mode = QCRYPTO_CIPHER_MODE_ECB;
-        algo = QCRYPTO_CIPHER_ALG_3DES;
-        break;
-    case VIRTIO_CRYPTO_CIPHER_3DES_CBC:
-        mode = QCRYPTO_CIPHER_MODE_CBC;
-        algo = QCRYPTO_CIPHER_ALG_3DES;
-        break;
-    case VIRTIO_CRYPTO_CIPHER_3DES_CTR:
-        mode = QCRYPTO_CIPHER_MODE_CTR;
-        algo = QCRYPTO_CIPHER_ALG_3DES;
        break;
    default:
        error_setg(errp, "Unsupported cipher alg :%u",
@@ -320,12 +283,10 @@ static int cryptodev_builtin_sym_operation(

    sess = builtin->sessions[op_info->session_id];

-    if (op_info->iv_len > 0) {
-        ret = qcrypto_cipher_setiv(sess->cipher, op_info->iv,
-                                   op_info->iv_len, errp);
-        if (ret < 0) {
-            return -VIRTIO_CRYPTO_ERR;
-        }
+    ret = qcrypto_cipher_setiv(sess->cipher, op_info->iv,
+                               op_info->iv_len, errp);
+    if (ret < 0) {
+        return -VIRTIO_CRYPTO_ERR;
    }

    if (sess->direction == VIRTIO_CRYPTO_OP_ENCRYPT) {
@@ -361,6 +322,8 @@ static void cryptodev_builtin_cleanup(
        }
    }

+    assert(queues == 1);
+
    for (i = 0; i < queues; i++) {
        cc = backend->conf.peers.ccs[i];
        if (cc) {
@@ -368,8 +331,6 @@ static void cryptodev_builtin_cleanup(
            backend->conf.peers.ccs[i] = NULL;
        }
    }
-
-    cryptodev_backend_set_ready(backend, false);
 }

 static void
--- a/backends/cryptodev.c
+++ b/backends/cryptodev.c
@@ -73,6 +73,8 @@ void cryptodev_backend_cleanup(
    if (bc->cleanup) {
        bc->cleanup(backend, errp);
    }
+
+    backend->ready = false;
 }

 int64_t cryptodev_backend_sym_create_session(
@@ -187,42 +189,17 @@ cryptodev_backend_complete(UserCreatable *uc, Error **errp)
            goto out;
        }
    }
-
+    backend->ready = true;
    return;

 out:
+    backend->ready = false;
    error_propagate(errp, local_err);
 }

-void cryptodev_backend_set_used(CryptoDevBackend *backend, bool used)
-{
-    backend->is_used = used;
-}
-
-bool cryptodev_backend_is_used(CryptoDevBackend *backend)
-{
-    return backend->is_used;
-}
-
-void cryptodev_backend_set_ready(CryptoDevBackend *backend, bool ready)
-{
-    backend->ready = ready;
-}
-
-bool cryptodev_backend_is_ready(CryptoDevBackend *backend)
-{
-    return backend->ready;
-}
-
-static bool
-cryptodev_backend_can_be_deleted(UserCreatable *uc)
-{
-    return !cryptodev_backend_is_used(CRYPTODEV_BACKEND(uc));
-}
-
 static void cryptodev_backend_instance_init(Object *obj)
 {
-    object_property_add(obj, "queues", "uint32",
+    object_property_add(obj, "queues", "int",
                          cryptodev_backend_get_queues,
                          cryptodev_backend_set_queues,
                          NULL, NULL, NULL);
@@ -232,9 +209,7 @@ static void cryptodev_backend_instance_init(Object *obj)

 static void cryptodev_backend_finalize(Object *obj)
 {
-    CryptoDevBackend *backend = CRYPTODEV_BACKEND(obj);

-    cryptodev_backend_cleanup(backend, NULL);
 }

 static void
@@ -243,7 +218,6 @@ cryptodev_backend_class_init(ObjectClass *oc, void *data)
    UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc);

    ucc->complete = cryptodev_backend_complete;
-    ucc->can_be_deleted = cryptodev_backend_can_be_deleted;

    QTAILQ_INIT(&crypto_clients);
 }
--- a/backends/hostmem-file.c
+++ b/backends/hostmem-file.c
@@ -32,7 +32,6 @@ struct HostMemoryBackendFile {
    HostMemoryBackend parent_obj;

    bool share;
-    bool discard_data;
    char *mem_path;
 };

@@ -52,7 +51,7 @@ file_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
 #ifndef CONFIG_LINUX
    error_setg(errp, "-mem-path not supported on this host");
 #else
-    if (!host_memory_backend_mr_inited(backend)) {
+    if (!memory_region_size(&backend->mr)) {
        gchar *path;
        backend->force_prealloc = mem_prealloc;
        path = object_get_canonical_path(OBJECT(backend));
@@ -77,7 +76,7 @@ static void set_mem_path(Object *o, const char *str, Error **errp)
    HostMemoryBackend *backend = MEMORY_BACKEND(o);
    HostMemoryBackendFile *fb = MEMORY_BACKEND_FILE(o);

-    if (host_memory_backend_mr_inited(backend)) {
+    if (memory_region_size(&backend->mr)) {
        error_setg(errp, "cannot change property value");
        return;
    }
@@ -97,51 +96,23 @@ static void file_memory_backend_set_share(Object *o, bool value, Error **errp)
    HostMemoryBackend *backend = MEMORY_BACKEND(o);
    HostMemoryBackendFile *fb = MEMORY_BACKEND_FILE(o);

-    if (host_memory_backend_mr_inited(backend)) {
+    if (memory_region_size(&backend->mr)) {
        error_setg(errp, "cannot change property value");
        return;
    }
    fb->share = value;
 }

-static bool file_memory_backend_get_discard_data(Object *o, Error **errp)
-{
-    return MEMORY_BACKEND_FILE(o)->discard_data;
-}
-
-static void file_memory_backend_set_discard_data(Object *o, bool value,
-                                               Error **errp)
-{
-    MEMORY_BACKEND_FILE(o)->discard_data = value;
-}
-
-static void file_backend_unparent(Object *obj)
-{
-    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
-    HostMemoryBackendFile *fb = MEMORY_BACKEND_FILE(obj);
-
-    if (host_memory_backend_mr_inited(backend) && fb->discard_data) {
-        void *ptr = memory_region_get_ram_ptr(&backend->mr);
-        uint64_t sz = memory_region_size(&backend->mr);
-
-        qemu_madvise(ptr, sz, QEMU_MADV_REMOVE);
-    }
-}
-
 static void
 file_backend_class_init(ObjectClass *oc, void *data)
 {
    HostMemoryBackendClass *bc = MEMORY_BACKEND_CLASS(oc);

    bc->alloc = file_backend_memory_alloc;
-    oc->unparent = file_backend_unparent;

    object_class_property_add_bool(oc, "share",
        file_memory_backend_get_share, file_memory_backend_set_share,
        &error_abort);
-    object_class_property_add_bool(oc, "discard-data",
-        file_memory_backend_get_discard_data, file_memory_backend_set_discard_data,
-        &error_abort);
    object_class_property_add_str(oc, "mem-path",
        get_mem_path, set_mem_path,
        &error_abort);
--- a/backends/hostmem-ram.c
+++ b/backends/hostmem-ram.c
@@ -28,7 +28,7 @@ ram_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
    }

    path = object_get_canonical_path_component(OBJECT(backend));
-    memory_region_init_ram_nomigrate(&backend->mr, OBJECT(backend), path,
+    memory_region_init_ram(&backend->mr, OBJECT(backend), path,
                           backend->size, errp);
    g_free(path);
 }
--- a/backends/hostmem.c
+++ b/backends/hostmem.c
@@ -45,7 +45,7 @@ host_memory_backend_set_size(Object *obj, Visitor *v, const char *name,
    Error *local_err = NULL;
    uint64_t value;

-    if (host_memory_backend_mr_inited(backend)) {
+    if (memory_region_size(&backend->mr)) {
        error_setg(&local_err, "cannot change property value");
        goto out;
    }
@@ -64,6 +64,14 @@ out:
    error_propagate(errp, local_err);
 }

+static uint16List **host_memory_append_node(uint16List **node,
+                                            unsigned long value)
+{
+     *node = g_malloc0(sizeof(**node));
+     (*node)->value = value;
+     return &(*node)->next;
+}
+
 static void
 host_memory_backend_get_host_nodes(Object *obj, Visitor *v, const char *name,
                                   void *opaque, Error **errp)
@@ -74,13 +82,12 @@ host_memory_backend_get_host_nodes(Object *obj, Visitor *v, const char *name,
    unsigned long value;

    value = find_first_bit(backend->host_nodes, MAX_NODES);
-    if (value == MAX_NODES) {
-        return;
-    }

-    *node = g_malloc0(sizeof(**node));
-    (*node)->value = value;
-    node = &(*node)->next;
+    node = host_memory_append_node(node, value);
+
+    if (value == MAX_NODES) {
+        goto out;
+    }

    do {
        value = find_next_bit(backend->host_nodes, MAX_NODES, value + 1);
@@ -88,11 +95,10 @@ host_memory_backend_get_host_nodes(Object *obj, Visitor *v, const char *name,
            break;
        }

-        *node = g_malloc0(sizeof(**node));
-        (*node)->value = value;
-        node = &(*node)->next;
+        node = host_memory_append_node(node, value);
    } while (true);

+out:
    visit_type_uint16List(v, name, &host_nodes, errp);
 }

@@ -146,7 +152,7 @@ static void host_memory_backend_set_merge(Object *obj, bool value, Error **errp)
 {
    HostMemoryBackend *backend = MEMORY_BACKEND(obj);

-    if (!host_memory_backend_mr_inited(backend)) {
+    if (!memory_region_size(&backend->mr)) {
        backend->merge = value;
        return;
    }
@@ -172,7 +178,7 @@ static void host_memory_backend_set_dump(Object *obj, bool value, Error **errp)
 {
    HostMemoryBackend *backend = MEMORY_BACKEND(obj);

-    if (!host_memory_backend_mr_inited(backend)) {
+    if (!memory_region_size(&backend->mr)) {
        backend->dump = value;
        return;
    }
@@ -208,7 +214,7 @@ static void host_memory_backend_set_prealloc(Object *obj, bool value,
        }
    }

-    if (!host_memory_backend_mr_inited(backend)) {
+    if (!memory_region_size(&backend->mr)) {
        backend->prealloc = value;
        return;
    }
@@ -218,7 +224,7 @@ static void host_memory_backend_set_prealloc(Object *obj, bool value,
        void *ptr = memory_region_get_ram_ptr(&backend->mr);
        uint64_t sz = memory_region_size(&backend->mr);

-        os_mem_prealloc(fd, ptr, sz, smp_cpus, &local_err);
+        os_mem_prealloc(fd, ptr, sz, &local_err);
        if (local_err) {
            error_propagate(errp, local_err);
            return;
@@ -237,19 +243,10 @@ static void host_memory_backend_init(Object *obj)
    backend->prealloc = mem_prealloc;
 }

-bool host_memory_backend_mr_inited(HostMemoryBackend *backend)
-{
-    /*
-     * NOTE: We forbid zero-length memory backend, so here zero means
-     * "we haven't inited the backend memory region yet".
-     */
-    return memory_region_size(&backend->mr) != 0;
-}
-
 MemoryRegion *
 host_memory_backend_get_memory(HostMemoryBackend *backend, Error **errp)
 {
-    return host_memory_backend_mr_inited(backend) ? &backend->mr : NULL;
+    return memory_region_size(&backend->mr) ? &backend->mr : NULL;
 }

 void host_memory_backend_set_mapped(HostMemoryBackend *backend, bool mapped)
@@ -304,7 +301,7 @@ host_memory_backend_memory_complete(UserCreatable *uc, Error **errp)
            return;
        } else if (maxnode == 0 && backend->policy != MPOL_DEFAULT) {
            error_setg(errp, "host-nodes must be set for policy %s",
-                       HostMemPolicy_str(backend->policy));
+                       HostMemPolicy_lookup[backend->policy]);
            return;
        }

@@ -331,7 +328,7 @@ host_memory_backend_memory_complete(UserCreatable *uc, Error **errp)
         */
        if (backend->prealloc) {
            os_mem_prealloc(memory_region_get_fd(&backend->mr), ptr, sz,
-                            smp_cpus, &local_err);
+                            &local_err);
            if (local_err) {
                goto out;
            }
@@ -342,7 +339,7 @@ out:
 }

 static bool
-host_memory_backend_can_be_deleted(UserCreatable *uc)
+host_memory_backend_can_be_deleted(UserCreatable *uc, Error **errp)
 {
    if (host_memory_backend_is_mapped(MEMORY_BACKEND(uc))) {
        return false;
@@ -351,24 +348,6 @@ host_memory_backend_can_be_deleted(UserCreatable *uc)
    }
 }

-static char *get_id(Object *o, Error **errp)
-{
-    HostMemoryBackend *backend = MEMORY_BACKEND(o);
-
-    return g_strdup(backend->id);
-}
-
-static void set_id(Object *o, const char *str, Error **errp)
-{
-    HostMemoryBackend *backend = MEMORY_BACKEND(o);
-
-    if (backend->id) {
-        error_setg(errp, "cannot change property value");
-        return;
-    }
-    backend->id = g_strdup(str);
-}
-
 static void
 host_memory_backend_class_init(ObjectClass *oc, void *data)
 {
@@ -395,16 +374,9 @@ host_memory_backend_class_init(ObjectClass *oc, void *data)
        host_memory_backend_set_host_nodes,
        NULL, NULL, &error_abort);
    object_class_property_add_enum(oc, "policy", "HostMemPolicy",
-        &HostMemPolicy_lookup,
+        HostMemPolicy_lookup,
        host_memory_backend_get_policy,
        host_memory_backend_set_policy, &error_abort);
-    object_class_property_add_str(oc, "id", get_id, set_id, &error_abort);
-}
-
-static void host_memory_backend_finalize(Object *o)
-{
-    HostMemoryBackend *backend = MEMORY_BACKEND(o);
-    g_free(backend->id);
 }

 static const TypeInfo host_memory_backend_info = {
@@ -415,7 +387,6 @@ static const TypeInfo host_memory_backend_info = {
    .class_init = host_memory_backend_class_init,
    .instance_size = sizeof(HostMemoryBackend),
    .instance_init = host_memory_backend_init,
-    .instance_finalize = host_memory_backend_finalize,
    .interfaces = (InterfaceInfo[]) {
        { TYPE_USER_CREATABLE },
        { }
--- a/backends/msmouse.c
+++ b/backends/msmouse.c
@@ -23,7 +23,7 @@
 */
 #include "qemu/osdep.h"
 #include "qemu-common.h"
-#include "chardev/char.h"
+#include "sysemu/char.h"
 #include "ui/console.h"
 #include "ui/input.h"

@@ -31,23 +31,18 @@
 #define MSMOUSE_HI2(n) (((n) & 0xc0) >> 6)

 typedef struct {
-    Chardev parent;
-
+    CharDriverState *chr;
    QemuInputHandlerState *hs;
    int axis[INPUT_AXIS__MAX];
    bool btns[INPUT_BUTTON__MAX];
    bool btnc[INPUT_BUTTON__MAX];
    uint8_t outbuf[32];
    int outlen;
-} MouseChardev;
+} MouseState;

-#define TYPE_CHARDEV_MSMOUSE "chardev-msmouse"
-#define MOUSE_CHARDEV(obj)                                      \
-    OBJECT_CHECK(MouseChardev, (obj), TYPE_CHARDEV_MSMOUSE)
-
-static void msmouse_chr_accept_input(Chardev *chr)
+static void msmouse_chr_accept_input(CharDriverState *chr)
 {
-    MouseChardev *mouse = MOUSE_CHARDEV(chr);
+    MouseState *mouse = chr->opaque;
    int len;

    len = qemu_chr_be_can_write(chr);
@@ -65,7 +60,7 @@ static void msmouse_chr_accept_input(Chardev *chr)
    }
 }

-static void msmouse_queue_event(MouseChardev *mouse)
+static void msmouse_queue_event(MouseState *mouse)
 {
    unsigned char bytes[4] = { 0x40, 0x00, 0x00, 0x00 };
    int dx, dy, count = 3;
@@ -102,7 +97,7 @@ static void msmouse_queue_event(MouseChardev *mouse)
 static void msmouse_input_event(DeviceState *dev, QemuConsole *src,
                                InputEvent *evt)
 {
-    MouseChardev *mouse = MOUSE_CHARDEV(dev);
+    MouseState *mouse = (MouseState *)dev;
    InputMoveEvent *move;
    InputBtnEvent *btn;

@@ -126,24 +121,24 @@ static void msmouse_input_event(DeviceState *dev, QemuConsole *src,

 static void msmouse_input_sync(DeviceState *dev)
 {
-    MouseChardev *mouse = MOUSE_CHARDEV(dev);
-    Chardev *chr = CHARDEV(dev);
+    MouseState *mouse = (MouseState *)dev;

    msmouse_queue_event(mouse);
-    msmouse_chr_accept_input(chr);
+    msmouse_chr_accept_input(mouse->chr);
 }

-static int msmouse_chr_write(struct Chardev *s, const uint8_t *buf, int len)
+static int msmouse_chr_write (struct CharDriverState *s, const uint8_t *buf, int len)
 {
    /* Ignore writes to mouse port */
    return len;
 }

-static void char_msmouse_finalize(Object *obj)
+static void msmouse_chr_free(struct CharDriverState *chr)
 {
-    MouseChardev *mouse = MOUSE_CHARDEV(obj);
+    MouseState *mouse = chr->opaque;

    qemu_input_handler_unregister(mouse->hs);
+    g_free(mouse);
 }

 static QemuInputHandler msmouse_handler = {
@@ -153,38 +148,39 @@ static QemuInputHandler msmouse_handler = {
    .sync  = msmouse_input_sync,
 };

-static void msmouse_chr_open(Chardev *chr,
-                             ChardevBackend *backend,
-                             bool *be_opened,
-                             Error **errp)
+static CharDriverState *qemu_chr_open_msmouse(const char *id,
+                                              ChardevBackend *backend,
+                                              ChardevReturn *ret,
+                                              bool *be_opened,
+                                              Error **errp)
 {
-    MouseChardev *mouse = MOUSE_CHARDEV(chr);
+    ChardevCommon *common = backend->u.msmouse.data;
+    MouseState *mouse;
+    CharDriverState *chr;

+    chr = qemu_chr_alloc(common, errp);
+    if (!chr) {
+        return NULL;
+    }
+    chr->chr_write = msmouse_chr_write;
+    chr->chr_free = msmouse_chr_free;
+    chr->chr_accept_input = msmouse_chr_accept_input;
    *be_opened = false;
+
+    mouse = g_new0(MouseState, 1);
    mouse->hs = qemu_input_handler_register((DeviceState *)mouse,
                                            &msmouse_handler);
+
+    mouse->chr = chr;
+    chr->opaque = mouse;
+
+    return chr;
 }

-static void char_msmouse_class_init(ObjectClass *oc, void *data)
-{
-    ChardevClass *cc = CHARDEV_CLASS(oc);
-
-    cc->open = msmouse_chr_open;
-    cc->chr_write = msmouse_chr_write;
-    cc->chr_accept_input = msmouse_chr_accept_input;
-}
-
-static const TypeInfo char_msmouse_type_info = {
-    .name = TYPE_CHARDEV_MSMOUSE,
-    .parent = TYPE_CHARDEV,
-    .instance_size = sizeof(MouseChardev),
-    .instance_finalize = char_msmouse_finalize,
-    .class_init = char_msmouse_class_init,
-};
-
 static void register_types(void)
 {
-    type_register_static(&char_msmouse_type_info);
+    register_char_driver("msmouse", CHARDEV_BACKEND_KIND_MSMOUSE, NULL,
+                         qemu_chr_open_msmouse);
 }

 type_init(register_types);
--- a/backends/rng-egd.c
+++ b/backends/rng-egd.c
@@ -12,7 +12,7 @@

 #include "qemu/osdep.h"
 #include "sysemu/rng.h"
-#include "chardev/char-fe.h"
+#include "sysemu/char.h"
 #include "qapi/error.h"
 #include "qapi/qmp/qerror.h"

@@ -86,7 +86,7 @@ static void rng_egd_chr_read(void *opaque, const uint8_t *buf, int size)
 static void rng_egd_opened(RngBackend *b, Error **errp)
 {
    RngEgd *s = RNG_EGD(b);
-    Chardev *chr;
+    CharDriverState *chr;

    if (s->chr_name == NULL) {
        error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
@@ -106,7 +106,7 @@ static void rng_egd_opened(RngBackend *b, Error **errp)

    /* FIXME we should resubmit pending requests when the CDS reconnects. */
    qemu_chr_fe_set_handlers(&s->chr, rng_egd_chr_can_read,
-                             rng_egd_chr_read, NULL, NULL, s, NULL, true);
+                             rng_egd_chr_read, NULL, s, NULL, true);
 }

 static void rng_egd_set_chardev(Object *obj, const char *value, Error **errp)
@@ -125,7 +125,7 @@ static void rng_egd_set_chardev(Object *obj, const char *value, Error **errp)
 static char *rng_egd_get_chardev(Object *obj, Error **errp)
 {
    RngEgd *s = RNG_EGD(obj);
-    Chardev *chr = qemu_chr_fe_get_driver(&s->chr);
+    CharDriverState *chr = qemu_chr_fe_get_driver(&s->chr);

    if (chr && chr->label) {
        return g_strdup(chr->label);
@@ -145,7 +145,7 @@ static void rng_egd_finalize(Object *obj)
 {
    RngEgd *s = RNG_EGD(obj);

-    qemu_chr_fe_deinit(&s->chr, false);
+    qemu_chr_fe_deinit(&s->chr);
    g_free(s->chr_name);
 }

--- a/backends/testdev.c
+++ b/backends/testdev.c
@@ -25,23 +25,18 @@
 */
 #include "qemu/osdep.h"
 #include "qemu-common.h"
-#include "chardev/char.h"
+#include "sysemu/char.h"

 #define BUF_SIZE 32

 typedef struct {
-    Chardev parent;
-
+    CharDriverState *chr;
    uint8_t in_buf[32];
    int in_buf_used;
-} TestdevChardev;
-
-#define TYPE_CHARDEV_TESTDEV "chardev-testdev"
-#define TESTDEV_CHARDEV(obj)                                    \
-    OBJECT_CHECK(TestdevChardev, (obj), TYPE_CHARDEV_TESTDEV)
+} TestdevCharState;

 /* Try to interpret a whole incoming packet */
-static int testdev_eat_packet(TestdevChardev *testdev)
+static int testdev_eat_packet(TestdevCharState *testdev)
 {
    const uint8_t *cur = testdev->in_buf;
    int len = testdev->in_buf_used;
@@ -82,9 +77,9 @@ static int testdev_eat_packet(TestdevChardev *testdev)
 }

 /* The other end is writing some data.  Store it and try to interpret */
-static int testdev_chr_write(Chardev *chr, const uint8_t *buf, int len)
+static int testdev_write(CharDriverState *chr, const uint8_t *buf, int len)
 {
-    TestdevChardev *testdev = TESTDEV_CHARDEV(chr);
+    TestdevCharState *testdev = chr->opaque;
    int tocopy, eaten, orig_len = len;

    while (len) {
@@ -107,23 +102,36 @@ static int testdev_chr_write(Chardev *chr, const uint8_t *buf, int len)
    return orig_len;
 }

-static void char_testdev_class_init(ObjectClass *oc, void *data)
+static void testdev_free(struct CharDriverState *chr)
 {
-    ChardevClass *cc = CHARDEV_CLASS(oc);
+    TestdevCharState *testdev = chr->opaque;

-    cc->chr_write = testdev_chr_write;
+    g_free(testdev);
 }

-static const TypeInfo char_testdev_type_info = {
-    .name = TYPE_CHARDEV_TESTDEV,
-    .parent = TYPE_CHARDEV,
-    .instance_size = sizeof(TestdevChardev),
-    .class_init = char_testdev_class_init,
-};
+static CharDriverState *chr_testdev_init(const char *id,
+                                         ChardevBackend *backend,
+                                         ChardevReturn *ret,
+                                         bool *be_opened,
+                                         Error **errp)
+{
+    TestdevCharState *testdev;
+    CharDriverState *chr;
+
+    testdev = g_new0(TestdevCharState, 1);
+    testdev->chr = chr = g_new0(CharDriverState, 1);
+
+    chr->opaque = testdev;
+    chr->chr_write = testdev_write;
+    chr->chr_free = testdev_free;
+
+    return chr;
+}

 static void register_types(void)
 {
-    type_register_static(&char_testdev_type_info);
+    register_char_driver("testdev", CHARDEV_BACKEND_KIND_TESTDEV, NULL,
+                         chr_testdev_init);
 }

 type_init(register_types);
--- a/balloon.c
+++ b/balloon.c
@@ -29,7 +29,7 @@
 #include "exec/cpu-common.h"
 #include "sysemu/kvm.h"
 #include "sysemu/balloon.h"
-#include "trace-root.h"
+#include "trace.h"
 #include "qmp-commands.h"
 #include "qapi/qmp/qerror.h"
 #include "qapi/qmp/qjson.h"
--- a/block.c
+++ b/block.c
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -1,31 +1,31 @@
-block-obj-y += raw-format.o qcow.o vdi.o vmdk.o cloop.o bochs.o vpc.o vvfat.o dmg.o
-block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-cache.o qcow2-bitmap.o
-block-obj-y += qed.o qed-l2-cache.o qed-table.o qed-cluster.o
+block-obj-y += raw_bsd.o qcow.o vdi.o vmdk.o cloop.o bochs.o vpc.o vvfat.o dmg.o
+block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-cache.o
+block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o
 block-obj-y += qed-check.o
 block-obj-y += vhdx.o vhdx-endian.o vhdx-log.o
 block-obj-y += quorum.o
 block-obj-y += parallels.o blkdebug.o blkverify.o blkreplay.o
 block-obj-y += block-backend.o snapshot.o qapi.o
-block-obj-$(CONFIG_WIN32) += file-win32.o win32-aio.o
-block-obj-$(CONFIG_POSIX) += file-posix.o
+block-obj-$(CONFIG_WIN32) += raw-win32.o win32-aio.o
+block-obj-$(CONFIG_POSIX) += raw-posix.o
 block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
 block-obj-y += null.o mirror.o commit.o io.o
 block-obj-y += throttle-groups.o

 block-obj-y += nbd.o nbd-client.o sheepdog.o
 block-obj-$(CONFIG_LIBISCSI) += iscsi.o
-block-obj-$(if $(CONFIG_LIBISCSI),y,n) += iscsi-opts.o
 block-obj-$(CONFIG_LIBNFS) += nfs.o
 block-obj-$(CONFIG_CURL) += curl.o
 block-obj-$(CONFIG_RBD) += rbd.o
 block-obj-$(CONFIG_GLUSTERFS) += gluster.o
-block-obj-$(CONFIG_VXHS) += vxhs.o
+block-obj-$(CONFIG_ARCHIPELAGO) += archipelago.o
 block-obj-$(CONFIG_LIBSSH2) += ssh.o
 block-obj-y += accounting.o dirty-bitmap.o
+block-obj-y += dictzip.o
+block-obj-y += tar.o
 block-obj-y += write-threshold.o
 block-obj-y += backup.o
 block-obj-$(CONFIG_REPLICATION) += replication.o
-block-obj-y += throttle.o

 block-obj-y += crypto.o

@@ -40,9 +40,9 @@ rbd.o-cflags       := $(RBD_CFLAGS)
 rbd.o-libs         := $(RBD_LIBS)
 gluster.o-cflags   := $(GLUSTERFS_CFLAGS)
 gluster.o-libs     := $(GLUSTERFS_LIBS)
-vxhs.o-libs        := $(VXHS_LIBS)
 ssh.o-cflags       := $(LIBSSH2_CFLAGS)
 ssh.o-libs         := $(LIBSSH2_LIBS)
+archipelago.o-libs := $(ARCHIPELAGO_LIBS)
 block-obj-$(if $(CONFIG_BZIP2),m,n) += dmg-bz2.o
 dmg-bz2.o-libs     := $(BZIP2_LIBS)
 qcow.o-libs        := -lz
--- a/block/accounting.c
+++ b/block/accounting.c
@@ -32,19 +32,15 @@
 static QEMUClockType clock_type = QEMU_CLOCK_REALTIME;
 static const int qtest_latency_ns = NANOSECONDS_PER_SECOND / 1000;

-void block_acct_init(BlockAcctStats *stats)
-{
-    qemu_mutex_init(&stats->lock);
-    if (qtest_enabled()) {
-        clock_type = QEMU_CLOCK_VIRTUAL;
-    }
-}
-
-void block_acct_setup(BlockAcctStats *stats, bool account_invalid,
-                      bool account_failed)
+void block_acct_init(BlockAcctStats *stats, bool account_invalid,
+                     bool account_failed)
 {
    stats->account_invalid = account_invalid;
    stats->account_failed = account_failed;
+
+    if (qtest_enabled()) {
+        clock_type = QEMU_CLOCK_VIRTUAL;
+    }
 }

 void block_acct_cleanup(BlockAcctStats *stats)
@@ -53,7 +49,6 @@ void block_acct_cleanup(BlockAcctStats *stats)
    QSLIST_FOREACH_SAFE(s, &stats->intervals, entries, next) {
        g_free(s);
    }
-    qemu_mutex_destroy(&stats->lock);
 }

 void block_acct_add_interval(BlockAcctStats *stats, unsigned interval_length)
@@ -63,15 +58,12 @@ void block_acct_add_interval(BlockAcctStats *stats, unsigned interval_length)

    s = g_new0(BlockAcctTimedStats, 1);
    s->interval_length = interval_length;
-    s->stats = stats;
-    qemu_mutex_lock(&stats->lock);
    QSLIST_INSERT_HEAD(&stats->intervals, s, entries);

    for (i = 0; i < BLOCK_MAX_IOTYPE; i++) {
        timed_average_init(&s->latency[i], clock_type,
                           (uint64_t) interval_length * NANOSECONDS_PER_SECOND);
    }
-    qemu_mutex_unlock(&stats->lock);
 }

 BlockAcctTimedStats *block_acct_interval_next(BlockAcctStats *stats,
@@ -94,8 +86,7 @@ void block_acct_start(BlockAcctStats *stats, BlockAcctCookie *cookie,
    cookie->type = type;
 }

-static void block_account_one_io(BlockAcctStats *stats, BlockAcctCookie *cookie,
-                                 bool failed)
+void block_acct_done(BlockAcctStats *stats, BlockAcctCookie *cookie)
 {
    BlockAcctTimedStats *s;
    int64_t time_ns = qemu_clock_get_ns(clock_type);
@@ -107,16 +98,31 @@ static void block_account_one_io(BlockAcctStats *stats, BlockAcctCookie *cookie,

    assert(cookie->type < BLOCK_MAX_IOTYPE);

-    qemu_mutex_lock(&stats->lock);
+    stats->nr_bytes[cookie->type] += cookie->bytes;
+    stats->nr_ops[cookie->type]++;
+    stats->total_time_ns[cookie->type] += latency_ns;
+    stats->last_access_time_ns = time_ns;

-    if (failed) {
-        stats->failed_ops[cookie->type]++;
-    } else {
-        stats->nr_bytes[cookie->type] += cookie->bytes;
-        stats->nr_ops[cookie->type]++;
+    QSLIST_FOREACH(s, &stats->intervals, entries) {
+        timed_average_account(&s->latency[cookie->type], latency_ns);
    }
+}
+
+void block_acct_failed(BlockAcctStats *stats, BlockAcctCookie *cookie)
+{
+    assert(cookie->type < BLOCK_MAX_IOTYPE);
+
+    stats->failed_ops[cookie->type]++;
+
+    if (stats->account_failed) {
+        BlockAcctTimedStats *s;
+        int64_t time_ns = qemu_clock_get_ns(clock_type);
+        int64_t latency_ns = time_ns - cookie->start_time_ns;
+
+        if (qtest_enabled()) {
+            latency_ns = qtest_latency_ns;
+        }

-    if (!failed || stats->account_failed) {
        stats->total_time_ns[cookie->type] += latency_ns;
        stats->last_access_time_ns = time_ns;

@@ -124,45 +130,29 @@ static void block_account_one_io(BlockAcctStats *stats, BlockAcctCookie *cookie,
            timed_average_account(&s->latency[cookie->type], latency_ns);
        }
    }
-
-    qemu_mutex_unlock(&stats->lock);
-}
-
-void block_acct_done(BlockAcctStats *stats, BlockAcctCookie *cookie)
-{
-    block_account_one_io(stats, cookie, false);
-}
-
-void block_acct_failed(BlockAcctStats *stats, BlockAcctCookie *cookie)
-{
-    block_account_one_io(stats, cookie, true);
 }

 void block_acct_invalid(BlockAcctStats *stats, enum BlockAcctType type)
 {
    assert(type < BLOCK_MAX_IOTYPE);

-    /* block_account_one_io() updates total_time_ns[], but this one does
-     * not.  The reason is that invalid requests are accounted during their
-     * submission, therefore there's no actual I/O involved.
-     */
-    qemu_mutex_lock(&stats->lock);
+    /* block_acct_done() and block_acct_failed() update
+     * total_time_ns[], but this one does not. The reason is that
+     * invalid requests are accounted during their submission,
+     * therefore there's no actual I/O involved. */
+
    stats->invalid_ops[type]++;

    if (stats->account_invalid) {
        stats->last_access_time_ns = qemu_clock_get_ns(clock_type);
    }
-    qemu_mutex_unlock(&stats->lock);
 }

 void block_acct_merge_done(BlockAcctStats *stats, enum BlockAcctType type,
                      int num_requests)
 {
    assert(type < BLOCK_MAX_IOTYPE);
-
-    qemu_mutex_lock(&stats->lock);
    stats->merged[type] += num_requests;
-    qemu_mutex_unlock(&stats->lock);
 }

 int64_t block_acct_idle_time_ns(BlockAcctStats *stats)
@@ -177,9 +167,7 @@ double block_acct_queue_depth(BlockAcctTimedStats *stats,

    assert(type < BLOCK_MAX_IOTYPE);

-    qemu_mutex_lock(&stats->stats->lock);
    sum = timed_average_sum(&stats->latency[type], &elapsed);
-    qemu_mutex_unlock(&stats->stats->lock);

    return (double) sum / elapsed;
 }
--- a/block/archipelago.c
+++ b/block/archipelago.c
--- a/block/backup.c
+++ b/block/backup.c
@@ -24,7 +24,6 @@
 #include "qemu/cutils.h"
 #include "sysemu/block-backend.h"
 #include "qemu/bitmap.h"
-#include "qemu/error-report.h"

 #define BACKUP_CLUSTER_SIZE_DEFAULT (1 << 16)
 #define SLICE_TIME 100000000ULL /* ns */
@@ -39,7 +38,7 @@ typedef struct BackupBlockJob {
    BlockdevOnError on_source_error;
    BlockdevOnError on_target_error;
    CoRwlock flush_rwlock;
-    uint64_t bytes_read;
+    uint64_t sectors_read;
    unsigned long *done_bitmap;
    int64_t cluster_size;
    bool compress;
@@ -47,6 +46,12 @@ typedef struct BackupBlockJob {
    QLIST_HEAD(, CowRequest) inflight_reqs;
 } BackupBlockJob;

+/* Size of a cluster in sectors, instead of bytes. */
+static inline int64_t cluster_size_sectors(BackupBlockJob *job)
+{
+  return job->cluster_size / BDRV_SECTOR_SIZE;
+}
+
 /* See if in-flight requests overlap and wait for them to complete */
 static void coroutine_fn wait_for_overlapping_requests(BackupBlockJob *job,
                                                       int64_t start,
@@ -58,8 +63,8 @@ static void coroutine_fn wait_for_overlapping_requests(BackupBlockJob *job,
    do {
        retry = false;
        QLIST_FOREACH(req, &job->inflight_reqs, list) {
-            if (end > req->start_byte && start < req->end_byte) {
-                qemu_co_queue_wait(&req->wait_queue, NULL);
+            if (end > req->start && start < req->end) {
+                qemu_co_queue_wait(&req->wait_queue);
                retry = true;
                break;
            }
@@ -69,10 +74,10 @@ static void coroutine_fn wait_for_overlapping_requests(BackupBlockJob *job,

 /* Keep track of an in-flight request */
 static void cow_request_begin(CowRequest *req, BackupBlockJob *job,
-                              int64_t start, int64_t end)
+                                     int64_t start, int64_t end)
 {
-    req->start_byte = start;
-    req->end_byte = end;
+    req->start = start;
+    req->end = end;
    qemu_co_queue_init(&req->wait_queue);
    QLIST_INSERT_HEAD(&job->inflight_reqs, req, list);
 }
@@ -85,7 +90,7 @@ static void cow_request_end(CowRequest *req)
 }

 static int coroutine_fn backup_do_cow(BackupBlockJob *job,
-                                      int64_t offset, uint64_t bytes,
+                                      int64_t sector_num, int nb_sectors,
                                      bool *error_is_read,
                                      bool is_write_notifier)
 {
@@ -95,37 +100,41 @@ static int coroutine_fn backup_do_cow(BackupBlockJob *job,
    QEMUIOVector bounce_qiov;
    void *bounce_buffer = NULL;
    int ret = 0;
-    int64_t start, end; /* bytes */
-    int n; /* bytes */
+    int64_t sectors_per_cluster = cluster_size_sectors(job);
+    int64_t start, end;
+    int n;

    qemu_co_rwlock_rdlock(&job->flush_rwlock);

-    start = QEMU_ALIGN_DOWN(offset, job->cluster_size);
-    end = QEMU_ALIGN_UP(bytes + offset, job->cluster_size);
+    start = sector_num / sectors_per_cluster;
+    end = DIV_ROUND_UP(sector_num + nb_sectors, sectors_per_cluster);

-    trace_backup_do_cow_enter(job, start, offset, bytes);
+    trace_backup_do_cow_enter(job, start, sector_num, nb_sectors);

    wait_for_overlapping_requests(job, start, end);
    cow_request_begin(&cow_request, job, start, end);

-    for (; start < end; start += job->cluster_size) {
-        if (test_bit(start / job->cluster_size, job->done_bitmap)) {
+    for (; start < end; start++) {
+        if (test_bit(start, job->done_bitmap)) {
            trace_backup_do_cow_skip(job, start);
            continue; /* already copied */
        }

        trace_backup_do_cow_process(job, start);

-        n = MIN(job->cluster_size, job->common.len - start);
+        n = MIN(sectors_per_cluster,
+                job->common.len / BDRV_SECTOR_SIZE -
+                start * sectors_per_cluster);

        if (!bounce_buffer) {
            bounce_buffer = blk_blockalign(blk, job->cluster_size);
        }
        iov.iov_base = bounce_buffer;
-        iov.iov_len = n;
+        iov.iov_len = n * BDRV_SECTOR_SIZE;
        qemu_iovec_init_external(&bounce_qiov, &iov, 1);

-        ret = blk_co_preadv(blk, start, bounce_qiov.size, &bounce_qiov,
+        ret = blk_co_preadv(blk, start * job->cluster_size,
+                            bounce_qiov.size, &bounce_qiov,
                            is_write_notifier ? BDRV_REQ_NO_SERIALISING : 0);
        if (ret < 0) {
            trace_backup_do_cow_read_fail(job, start, ret);
@@ -136,10 +145,10 @@ static int coroutine_fn backup_do_cow(BackupBlockJob *job,
        }

        if (buffer_is_zero(iov.iov_base, iov.iov_len)) {
-            ret = blk_co_pwrite_zeroes(job->target, start,
+            ret = blk_co_pwrite_zeroes(job->target, start * job->cluster_size,
                                       bounce_qiov.size, BDRV_REQ_MAY_UNMAP);
        } else {
-            ret = blk_co_pwritev(job->target, start,
+            ret = blk_co_pwritev(job->target, start * job->cluster_size,
                                 bounce_qiov.size, &bounce_qiov,
                                 job->compress ? BDRV_REQ_WRITE_COMPRESSED : 0);
        }
@@ -151,13 +160,13 @@ static int coroutine_fn backup_do_cow(BackupBlockJob *job,
            goto out;
        }

-        set_bit(start / job->cluster_size, job->done_bitmap);
+        set_bit(start, job->done_bitmap);

        /* Publish progress, guest I/O counts as progress too.  Note that the
         * offset field is an opaque progress value, it is not a disk offset.
         */
-        job->bytes_read += n;
-        job->common.offset += n;
+        job->sectors_read += n;
+        job->common.offset += n * BDRV_SECTOR_SIZE;
    }

 out:
@@ -167,7 +176,7 @@ out:

    cow_request_end(&cow_request);

-    trace_backup_do_cow_return(job, offset, bytes, ret);
+    trace_backup_do_cow_return(job, sector_num, nb_sectors, ret);

    qemu_co_rwlock_unlock(&job->flush_rwlock);

@@ -180,12 +189,14 @@ static int coroutine_fn backup_before_write_notify(
 {
    BackupBlockJob *job = container_of(notifier, BackupBlockJob, before_write);
    BdrvTrackedRequest *req = opaque;
+    int64_t sector_num = req->offset >> BDRV_SECTOR_BITS;
+    int nb_sectors = req->bytes >> BDRV_SECTOR_BITS;

    assert(req->bs == blk_bs(job->common.blk));
-    assert(QEMU_IS_ALIGNED(req->offset, BDRV_SECTOR_SIZE));
-    assert(QEMU_IS_ALIGNED(req->bytes, BDRV_SECTOR_SIZE));
+    assert((req->offset & (BDRV_SECTOR_SIZE - 1)) == 0);
+    assert((req->bytes & (BDRV_SECTOR_SIZE - 1)) == 0);

-    return backup_do_cow(job, req->offset, req->bytes, NULL, true);
+    return backup_do_cow(job, sector_num, nb_sectors, NULL, true);
 }

 static void backup_set_speed(BlockJob *job, int64_t speed, Error **errp)
@@ -196,7 +207,7 @@ static void backup_set_speed(BlockJob *job, int64_t speed, Error **errp)
        error_setg(errp, QERR_INVALID_PARAMETER, "speed");
        return;
    }
-    ratelimit_set_speed(&s->limit, speed, SLICE_TIME);
+    ratelimit_set_speed(&s->limit, speed / BDRV_SECTOR_SIZE, SLICE_TIME);
 }

 static void backup_cleanup_sync_bitmap(BackupBlockJob *job, int ret)
@@ -263,29 +274,32 @@ void backup_do_checkpoint(BlockJob *job, Error **errp)
    bitmap_zero(backup_job->done_bitmap, len);
 }

-void backup_wait_for_overlapping_requests(BlockJob *job, int64_t offset,
-                                          uint64_t bytes)
+void backup_wait_for_overlapping_requests(BlockJob *job, int64_t sector_num,
+                                          int nb_sectors)
 {
    BackupBlockJob *backup_job = container_of(job, BackupBlockJob, common);
+    int64_t sectors_per_cluster = cluster_size_sectors(backup_job);
    int64_t start, end;

    assert(job->driver->job_type == BLOCK_JOB_TYPE_BACKUP);

-    start = QEMU_ALIGN_DOWN(offset, backup_job->cluster_size);
-    end = QEMU_ALIGN_UP(offset + bytes, backup_job->cluster_size);
+    start = sector_num / sectors_per_cluster;
+    end = DIV_ROUND_UP(sector_num + nb_sectors, sectors_per_cluster);
    wait_for_overlapping_requests(backup_job, start, end);
 }

 void backup_cow_request_begin(CowRequest *req, BlockJob *job,
-                              int64_t offset, uint64_t bytes)
+                              int64_t sector_num,
+                              int nb_sectors)
 {
    BackupBlockJob *backup_job = container_of(job, BackupBlockJob, common);
+    int64_t sectors_per_cluster = cluster_size_sectors(backup_job);
    int64_t start, end;

    assert(job->driver->job_type == BLOCK_JOB_TYPE_BACKUP);

-    start = QEMU_ALIGN_DOWN(offset, backup_job->cluster_size);
-    end = QEMU_ALIGN_UP(offset + bytes, backup_job->cluster_size);
+    start = sector_num / sectors_per_cluster;
+    end = DIV_ROUND_UP(sector_num + nb_sectors, sectors_per_cluster);
    cow_request_begin(req, backup_job, start, end);
 }

@@ -344,8 +358,8 @@ static bool coroutine_fn yield_and_check(BackupBlockJob *job)
     */
    if (job->common.speed) {
        uint64_t delay_ns = ratelimit_calculate_delay(&job->limit,
-                                                      job->bytes_read);
-        job->bytes_read = 0;
+                                                      job->sectors_read);
+        job->sectors_read = 0;
        block_job_sleep_ns(&job->common, QEMU_CLOCK_REALTIME, delay_ns);
    } else {
        block_job_sleep_ns(&job->common, QEMU_CLOCK_REALTIME, 0);
@@ -364,10 +378,11 @@ static int coroutine_fn backup_run_incremental(BackupBlockJob *job)
    int ret = 0;
    int clusters_per_iter;
    uint32_t granularity;
-    int64_t offset;
+    int64_t sector;
    int64_t cluster;
    int64_t end;
    int64_t last_cluster = -1;
+    int64_t sectors_per_cluster = cluster_size_sectors(job);
    BdrvDirtyBitmapIter *dbi;

    granularity = bdrv_dirty_bitmap_granularity(job->sync_bitmap);
@@ -375,8 +390,8 @@ static int coroutine_fn backup_run_incremental(BackupBlockJob *job)
    dbi = bdrv_dirty_iter_new(job->sync_bitmap, 0);

    /* Find the next dirty sector(s) */
-    while ((offset = bdrv_dirty_iter_next(dbi) * BDRV_SECTOR_SIZE) >= 0) {
-        cluster = offset / job->cluster_size;
+    while ((sector = bdrv_dirty_iter_next(dbi)) != -1) {
+        cluster = sector / sectors_per_cluster;

        /* Fake progress updates for any clusters we skipped */
        if (cluster != last_cluster + 1) {
@@ -389,8 +404,8 @@ static int coroutine_fn backup_run_incremental(BackupBlockJob *job)
                if (yield_and_check(job)) {
                    goto out;
                }
-                ret = backup_do_cow(job, cluster * job->cluster_size,
-                                    job->cluster_size, &error_is_read,
+                ret = backup_do_cow(job, cluster * sectors_per_cluster,
+                                    sectors_per_cluster, &error_is_read,
                                    false);
                if ((ret < 0) &&
                    backup_error_action(job, error_is_read, -ret) ==
@@ -403,8 +418,7 @@ static int coroutine_fn backup_run_incremental(BackupBlockJob *job)
        /* If the bitmap granularity is smaller than the backup granularity,
         * we need to advance the iterator pointer to the next cluster. */
        if (granularity < job->cluster_size) {
-            bdrv_set_dirty_iter(dbi,
-                                cluster * job->cluster_size / BDRV_SECTOR_SIZE);
+            bdrv_set_dirty_iter(dbi, cluster * sectors_per_cluster);
        }

        last_cluster = cluster - 1;
@@ -426,14 +440,17 @@ static void coroutine_fn backup_run(void *opaque)
    BackupBlockJob *job = opaque;
    BackupCompleteData *data;
    BlockDriverState *bs = blk_bs(job->common.blk);
-    int64_t offset;
+    int64_t start, end;
+    int64_t sectors_per_cluster = cluster_size_sectors(job);
    int ret = 0;

    QLIST_INIT(&job->inflight_reqs);
    qemu_co_rwlock_init(&job->flush_rwlock);

-    job->done_bitmap = bitmap_new(DIV_ROUND_UP(job->common.len,
-                                               job->cluster_size));
+    start = 0;
+    end = DIV_ROUND_UP(job->common.len, job->cluster_size);
+
+    job->done_bitmap = bitmap_new(end);

    job->before_write.notify = backup_before_write_notify;
    bdrv_add_before_write_notifier(bs, &job->before_write);
@@ -448,23 +465,20 @@ static void coroutine_fn backup_run(void *opaque)
        ret = backup_run_incremental(job);
    } else {
        /* Both FULL and TOP SYNC_MODE's require copying.. */
-        for (offset = 0; offset < job->common.len;
-             offset += job->cluster_size) {
+        for (; start < end; start++) {
            bool error_is_read;
-            int alloced = 0;
-
            if (yield_and_check(job)) {
                break;
            }

            if (job->sync_mode == MIRROR_SYNC_MODE_TOP) {
-                int i;
-                int64_t n;
+                int i, n;
+                int alloced = 0;

                /* Check to see if these blocks are already in the
                 * backing file. */

-                for (i = 0; i < job->cluster_size;) {
+                for (i = 0; i < sectors_per_cluster;) {
                    /* bdrv_is_allocated() only returns true/false based
                     * on the first set of sectors it comes across that
                     * are are all in the same state.
@@ -472,11 +486,12 @@ static void coroutine_fn backup_run(void *opaque)
                     * backup cluster length.  We end up copying more than
                     * needed but at some point that is always the case. */
                    alloced =
-                        bdrv_is_allocated(bs, offset + i,
-                                          job->cluster_size - i, &n);
+                        bdrv_is_allocated(bs,
+                                start * sectors_per_cluster + i,
+                                sectors_per_cluster - i, &n);
                    i += n;

-                    if (alloced || n == 0) {
+                    if (alloced == 1 || n == 0) {
                        break;
                    }
                }
@@ -488,12 +503,8 @@ static void coroutine_fn backup_run(void *opaque)
                }
            }
            /* FULL sync mode we copy the whole drive. */
-            if (alloced < 0) {
-                ret = alloced;
-            } else {
-                ret = backup_do_cow(job, offset, job->cluster_size,
-                                    &error_is_read, false);
-            }
+            ret = backup_do_cow(job, start * sectors_per_cluster,
+                                sectors_per_cluster, &error_is_read, false);
            if (ret < 0) {
                /* Depending on error action, fail now or retry cluster */
                BlockErrorAction action =
@@ -501,7 +512,7 @@ static void coroutine_fn backup_run(void *opaque)
                if (action == BLOCK_ERROR_ACTION_REPORT) {
                    break;
                } else {
-                    offset -= job->cluster_size;
+                    start--;
                    continue;
                }
            }
@@ -596,7 +607,7 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
        error_setg(errp,
                   "a sync_bitmap was provided to backup_run, "
                   "but received an incompatible sync_mode (%s)",
-                   MirrorSyncMode_str(sync_mode));
+                   MirrorSyncMode_lookup[sync_mode]);
        return NULL;
    }

@@ -607,24 +618,14 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
        goto error;
    }

-    /* job->common.len is fixed, so we can't allow resize */
-    job = block_job_create(job_id, &backup_job_driver, bs,
-                           BLK_PERM_CONSISTENT_READ,
-                           BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE |
-                           BLK_PERM_WRITE_UNCHANGED | BLK_PERM_GRAPH_MOD,
-                           speed, creation_flags, cb, opaque, errp);
+    job = block_job_create(job_id, &backup_job_driver, bs, speed,
+                           creation_flags, cb, opaque, errp);
    if (!job) {
        goto error;
    }

-    /* The target must match the source in size, so no resize here either */
-    job->target = blk_new(BLK_PERM_WRITE,
-                          BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE |
-                          BLK_PERM_WRITE_UNCHANGED | BLK_PERM_GRAPH_MOD);
-    ret = blk_insert_bs(job->target, target, errp);
-    if (ret < 0) {
-        goto error;
-    }
+    job->target = blk_new();
+    blk_insert_bs(job->target, target);

    job->on_source_error = on_source_error;
    job->on_target_error = on_target_error;
@@ -637,16 +638,7 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
     * backup cluster size is smaller than the target cluster size. Even for
     * targets with a backing file, try to avoid COW if possible. */
    ret = bdrv_get_info(target, &bdi);
-    if (ret == -ENOTSUP && !target->backing) {
-        /* Cluster size is not defined */
-        warn_report("The target block device doesn't provide "
-                    "information about the block size and it doesn't have a "
-                    "backing file. The default block size of %u bytes is "
-                    "used. If the actual block size of the target exceeds "
-                    "this default, the backup may be unusable",
-                    BACKUP_CLUSTER_SIZE_DEFAULT);
-        job->cluster_size = BACKUP_CLUSTER_SIZE_DEFAULT;
-    } else if (ret < 0 && !target->backing) {
+    if (ret < 0 && !target->backing) {
        error_setg_errno(errp, -ret,
            "Couldn't determine the cluster size of the target image, "
            "which has no backing file");
@@ -660,9 +652,7 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
        job->cluster_size = MAX(BACKUP_CLUSTER_SIZE_DEFAULT, bdi.cluster_size);
    }

-    /* Required permissions are already taken with target's blk_new() */
-    block_job_add_bdrv(&job->common, "target", target, 0, BLK_PERM_ALL,
-                       &error_abort);
+    block_job_add_bdrv(&job->common, target);
    job->common.len = len;
    block_job_txn_add_job(txn, &job->common);

@@ -674,7 +664,7 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
    }
    if (job) {
        backup_clean(&job->common);
-        block_job_early_fail(&job->common);
+        block_job_unref(&job->common);
    }

    return NULL;
--- a/block/blkdebug.c
+++ b/block/blkdebug.c
@@ -1,7 +1,6 @@
 /*
 * Block protocol for I/O error injection
 *
- * Copyright (C) 2016-2017 Red Hat, Inc.
 * Copyright (c) 2010 Kevin Wolf <kwolf@redhat.com>
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
@@ -31,18 +30,14 @@
 #include "qemu/module.h"
 #include "qapi/qmp/qbool.h"
 #include "qapi/qmp/qdict.h"
+#include "qapi/qmp/qint.h"
 #include "qapi/qmp/qstring.h"
 #include "sysemu/qtest.h"

 typedef struct BDRVBlkdebugState {
    int state;
    int new_state;
-    uint64_t align;
-    uint64_t max_transfer;
-    uint64_t opt_write_zero;
-    uint64_t max_write_zero;
-    uint64_t opt_discard;
-    uint64_t max_discard;
+    int align;

    /* For blkdebug_refresh_filename() */
    char *config_file;
@@ -63,6 +58,10 @@ typedef struct BlkdebugSuspendedReq {
    QLIST_ENTRY(BlkdebugSuspendedReq) next;
 } BlkdebugSuspendedReq;

+static const AIOCBInfo blkdebug_aiocb_info = {
+    .aiocb_size    = sizeof(BlkdebugAIOCB),
+};
+
 enum {
    ACTION_INJECT_ERROR,
    ACTION_SET_STATE,
@@ -78,7 +77,7 @@ typedef struct BlkdebugRule {
            int error;
            int immediately;
            int once;
-            int64_t offset;
+            int64_t sector;
        } inject;
        struct {
            int new_state;
@@ -149,6 +148,20 @@ static QemuOptsList *config_groups[] = {
    NULL
 };

+static int get_event_by_name(const char *name, BlkdebugEvent *event)
+{
+    int i;
+
+    for (i = 0; i < BLKDBG__MAX; i++) {
+        if (!strcmp(BlkdebugEvent_lookup[i], name)) {
+            *event = i;
+            return 0;
+        }
+    }
+
+    return -1;
+}
+
 struct add_rule_data {
    BDRVBlkdebugState *s;
    int action;
@@ -159,18 +172,16 @@ static int add_rule(void *opaque, QemuOpts *opts, Error **errp)
    struct add_rule_data *d = opaque;
    BDRVBlkdebugState *s = d->s;
    const char* event_name;
-    int event;
+    BlkdebugEvent event;
    struct BlkdebugRule *rule;
-    int64_t sector;

    /* Find the right event for the rule */
    event_name = qemu_opt_get(opts, "event");
    if (!event_name) {
        error_setg(errp, "Missing event name for rule");
        return -1;
-    }
-    event = qapi_enum_parse(&BlkdebugEvent_lookup, event_name, -1, errp);
-    if (event < 0) {
+    } else if (get_event_by_name(event_name, &event) < 0) {
+        error_setg(errp, "Invalid event name \"%s\"", event_name);
        return -1;
    }

@@ -189,9 +200,7 @@ static int add_rule(void *opaque, QemuOpts *opts, Error **errp)
        rule->options.inject.once  = qemu_opt_get_bool(opts, "once", 0);
        rule->options.inject.immediately =
            qemu_opt_get_bool(opts, "immediately", 0);
-        sector = qemu_opt_get_number(opts, "sector", -1);
-        rule->options.inject.offset =
-            sector == -1 ? -1 : sector * BDRV_SECTOR_SIZE;
+        rule->options.inject.sector = qemu_opt_get_number(opts, "sector", -1);
        break;

    case ACTION_SET_STATE:
@@ -293,7 +302,7 @@ static void blkdebug_parse_filename(const char *filename, QDict *options,
    if (!strstart(filename, "blkdebug:", &filename)) {
        /* There was no prefix; therefore, all options have to be already
           present in the QDict (except for the filename) */
-        qdict_put_str(options, "x-image", filename);
+        qdict_put(options, "x-image", qstring_from_str(filename));
        return;
    }

@@ -312,7 +321,7 @@ static void blkdebug_parse_filename(const char *filename, QDict *options,

    /* TODO Allow multi-level nesting and set file.filename here */
    filename = c + 1;
-    qdict_put_str(options, "x-image", filename);
+    qdict_put(options, "x-image", qstring_from_str(filename));
 }

 static QemuOptsList runtime_opts = {
@@ -334,31 +343,6 @@ static QemuOptsList runtime_opts = {
            .type = QEMU_OPT_SIZE,
            .help = "Required alignment in bytes",
        },
-        {
-            .name = "max-transfer",
-            .type = QEMU_OPT_SIZE,
-            .help = "Maximum transfer size in bytes",
-        },
-        {
-            .name = "opt-write-zero",
-            .type = QEMU_OPT_SIZE,
-            .help = "Optimum write zero alignment in bytes",
-        },
-        {
-            .name = "max-write-zero",
-            .type = QEMU_OPT_SIZE,
-            .help = "Maximum write zero size in bytes",
-        },
-        {
-            .name = "opt-discard",
-            .type = QEMU_OPT_SIZE,
-            .help = "Optimum discard alignment in bytes",
-        },
-        {
-            .name = "max-discard",
-            .type = QEMU_OPT_SIZE,
-            .help = "Maximum discard size in bytes",
-        },
        { /* end of list */ }
    },
 };
@@ -369,8 +353,8 @@ static int blkdebug_open(BlockDriverState *bs, QDict *options, int flags,
    BDRVBlkdebugState *s = bs->opaque;
    QemuOpts *opts;
    Error *local_err = NULL;
-    int ret;
    uint64_t align;
+    int ret;

    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
    qemu_opts_absorb_qdict(opts, options, &local_err);
@@ -399,69 +383,21 @@ static int blkdebug_open(BlockDriverState *bs, QDict *options, int flags,
        goto out;
    }

-    bs->supported_write_flags = BDRV_REQ_FUA &
-        bs->file->bs->supported_write_flags;
-    bs->supported_zero_flags = (BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP) &
-        bs->file->bs->supported_zero_flags;
-    ret = -EINVAL;
-
-    /* Set alignment overrides */
-    s->align = qemu_opt_get_size(opts, "align", 0);
-    if (s->align && (s->align >= INT_MAX || !is_power_of_2(s->align))) {
-        error_setg(errp, "Cannot meet constraints with align %" PRIu64,
-                   s->align);
-        goto out;
-    }
-    align = MAX(s->align, bs->file->bs->bl.request_alignment);
-
-    s->max_transfer = qemu_opt_get_size(opts, "max-transfer", 0);
-    if (s->max_transfer &&
-        (s->max_transfer >= INT_MAX ||
-         !QEMU_IS_ALIGNED(s->max_transfer, align))) {
-        error_setg(errp, "Cannot meet constraints with max-transfer %" PRIu64,
-                   s->max_transfer);
-        goto out;
-    }
-
-    s->opt_write_zero = qemu_opt_get_size(opts, "opt-write-zero", 0);
-    if (s->opt_write_zero &&
-        (s->opt_write_zero >= INT_MAX ||
-         !QEMU_IS_ALIGNED(s->opt_write_zero, align))) {
-        error_setg(errp, "Cannot meet constraints with opt-write-zero %" PRIu64,
-                   s->opt_write_zero);
-        goto out;
-    }
-
-    s->max_write_zero = qemu_opt_get_size(opts, "max-write-zero", 0);
-    if (s->max_write_zero &&
-        (s->max_write_zero >= INT_MAX ||
-         !QEMU_IS_ALIGNED(s->max_write_zero,
-                          MAX(s->opt_write_zero, align)))) {
-        error_setg(errp, "Cannot meet constraints with max-write-zero %" PRIu64,
-                   s->max_write_zero);
-        goto out;
-    }
-
-    s->opt_discard = qemu_opt_get_size(opts, "opt-discard", 0);
-    if (s->opt_discard &&
-        (s->opt_discard >= INT_MAX ||
-         !QEMU_IS_ALIGNED(s->opt_discard, align))) {
-        error_setg(errp, "Cannot meet constraints with opt-discard %" PRIu64,
-                   s->opt_discard);
-        goto out;
-    }
-
-    s->max_discard = qemu_opt_get_size(opts, "max-discard", 0);
-    if (s->max_discard &&
-        (s->max_discard >= INT_MAX ||
-         !QEMU_IS_ALIGNED(s->max_discard,
-                          MAX(s->opt_discard, align)))) {
-        error_setg(errp, "Cannot meet constraints with max-discard %" PRIu64,
-                   s->max_discard);
-        goto out;
+    /* Set request alignment */
+    align = qemu_opt_get_size(opts, "align", 0);
+    if (align < INT_MAX && is_power_of_2(align)) {
+        s->align = align;
+    } else if (align) {
+        error_setg(errp, "Invalid alignment");
+        ret = -EINVAL;
+        goto fail_unref;
    }

    ret = 0;
+    goto out;
+
+fail_unref:
+    bdrv_unref_child(bs, bs->file);
 out:
    if (ret < 0) {
        g_free(s->config_file);
@@ -470,163 +406,103 @@ out:
    return ret;
 }

-static int rule_check(BlockDriverState *bs, uint64_t offset, uint64_t bytes)
+static void error_callback_bh(void *opaque)
+{
+    struct BlkdebugAIOCB *acb = opaque;
+    acb->common.cb(acb->common.opaque, acb->ret);
+    qemu_aio_unref(acb);
+}
+
+static BlockAIOCB *inject_error(BlockDriverState *bs,
+    BlockCompletionFunc *cb, void *opaque, BlkdebugRule *rule)
 {
    BDRVBlkdebugState *s = bs->opaque;
-    BlkdebugRule *rule = NULL;
-    int error;
-    bool immediately;
-
-    QSIMPLEQ_FOREACH(rule, &s->active_rules, active_next) {
-        uint64_t inject_offset = rule->options.inject.offset;
-
-        if (inject_offset == -1 ||
-            (bytes && inject_offset >= offset &&
-             inject_offset < offset + bytes))
-        {
-            break;
-        }
-    }
-
-    if (!rule || !rule->options.inject.error) {
-        return 0;
-    }
-
-    immediately = rule->options.inject.immediately;
-    error = rule->options.inject.error;
+    int error = rule->options.inject.error;
+    struct BlkdebugAIOCB *acb;
+    bool immediately = rule->options.inject.immediately;

    if (rule->options.inject.once) {
        QSIMPLEQ_REMOVE(&s->active_rules, rule, BlkdebugRule, active_next);
        remove_rule(rule);
    }

-    if (!immediately) {
-        aio_co_schedule(qemu_get_current_aio_context(), qemu_coroutine_self());
-        qemu_coroutine_yield();
+    if (immediately) {
+        return NULL;
    }

-    return -error;
+    acb = qemu_aio_get(&blkdebug_aiocb_info, bs, cb, opaque);
+    acb->ret = -error;
+
+    aio_bh_schedule_oneshot(bdrv_get_aio_context(bs), error_callback_bh, acb);
+
+    return &acb->common;
 }

-static int coroutine_fn
-blkdebug_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
-                   QEMUIOVector *qiov, int flags)
+static BlockAIOCB *blkdebug_aio_readv(BlockDriverState *bs,
+    int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+    BlockCompletionFunc *cb, void *opaque)
 {
-    int err;
+    BDRVBlkdebugState *s = bs->opaque;
+    BlkdebugRule *rule = NULL;

-    /* Sanity check block layer guarantees */
-    assert(QEMU_IS_ALIGNED(offset, bs->bl.request_alignment));
-    assert(QEMU_IS_ALIGNED(bytes, bs->bl.request_alignment));
-    if (bs->bl.max_transfer) {
-        assert(bytes <= bs->bl.max_transfer);
+    QSIMPLEQ_FOREACH(rule, &s->active_rules, active_next) {
+        if (rule->options.inject.sector == -1 ||
+            (rule->options.inject.sector >= sector_num &&
+             rule->options.inject.sector < sector_num + nb_sectors)) {
+            break;
+        }
    }

-    err = rule_check(bs, offset, bytes);
-    if (err) {
-        return err;
+    if (rule && rule->options.inject.error) {
+        return inject_error(bs, cb, opaque, rule);
    }

-    return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags);
+    return bdrv_aio_readv(bs->file, sector_num, qiov, nb_sectors,
+                          cb, opaque);
 }

-static int coroutine_fn
-blkdebug_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
-                    QEMUIOVector *qiov, int flags)
+static BlockAIOCB *blkdebug_aio_writev(BlockDriverState *bs,
+    int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+    BlockCompletionFunc *cb, void *opaque)
 {
-    int err;
+    BDRVBlkdebugState *s = bs->opaque;
+    BlkdebugRule *rule = NULL;

-    /* Sanity check block layer guarantees */
-    assert(QEMU_IS_ALIGNED(offset, bs->bl.request_alignment));
-    assert(QEMU_IS_ALIGNED(bytes, bs->bl.request_alignment));
-    if (bs->bl.max_transfer) {
-        assert(bytes <= bs->bl.max_transfer);
+    QSIMPLEQ_FOREACH(rule, &s->active_rules, active_next) {
+        if (rule->options.inject.sector == -1 ||
+            (rule->options.inject.sector >= sector_num &&
+             rule->options.inject.sector < sector_num + nb_sectors)) {
+            break;
+        }
    }

-    err = rule_check(bs, offset, bytes);
-    if (err) {
-        return err;
+    if (rule && rule->options.inject.error) {
+        return inject_error(bs, cb, opaque, rule);
    }

-    return bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
+    return bdrv_aio_writev(bs->file, sector_num, qiov, nb_sectors,
+                           cb, opaque);
 }

-static int blkdebug_co_flush(BlockDriverState *bs)
+static BlockAIOCB *blkdebug_aio_flush(BlockDriverState *bs,
+    BlockCompletionFunc *cb, void *opaque)
 {
-    int err = rule_check(bs, 0, 0);
+    BDRVBlkdebugState *s = bs->opaque;
+    BlkdebugRule *rule = NULL;

-    if (err) {
-        return err;
+    QSIMPLEQ_FOREACH(rule, &s->active_rules, active_next) {
+        if (rule->options.inject.sector == -1) {
+            break;
+        }
    }

-    return bdrv_co_flush(bs->file->bs);
+    if (rule && rule->options.inject.error) {
+        return inject_error(bs, cb, opaque, rule);
+    }
+
+    return bdrv_aio_flush(bs->file->bs, cb, opaque);
 }

-static int coroutine_fn blkdebug_co_pwrite_zeroes(BlockDriverState *bs,
-                                                  int64_t offset, int bytes,
-                                                  BdrvRequestFlags flags)
-{
-    uint32_t align = MAX(bs->bl.request_alignment,
-                         bs->bl.pwrite_zeroes_alignment);
-    int err;
-
-    /* Only pass through requests that are larger than requested
-     * preferred alignment (so that we test the fallback to writes on
-     * unaligned portions), and check that the block layer never hands
-     * us anything unaligned that crosses an alignment boundary.  */
-    if (bytes < align) {
-        assert(QEMU_IS_ALIGNED(offset, align) ||
-               QEMU_IS_ALIGNED(offset + bytes, align) ||
-               DIV_ROUND_UP(offset, align) ==
-               DIV_ROUND_UP(offset + bytes, align));
-        return -ENOTSUP;
-    }
-    assert(QEMU_IS_ALIGNED(offset, align));
-    assert(QEMU_IS_ALIGNED(bytes, align));
-    if (bs->bl.max_pwrite_zeroes) {
-        assert(bytes <= bs->bl.max_pwrite_zeroes);
-    }
-
-    err = rule_check(bs, offset, bytes);
-    if (err) {
-        return err;
-    }
-
-    return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags);
-}
-
-static int coroutine_fn blkdebug_co_pdiscard(BlockDriverState *bs,
-                                             int64_t offset, int bytes)
-{
-    uint32_t align = bs->bl.pdiscard_alignment;
-    int err;
-
-    /* Only pass through requests that are larger than requested
-     * minimum alignment, and ensure that unaligned requests do not
-     * cross optimum discard boundaries. */
-    if (bytes < bs->bl.request_alignment) {
-        assert(QEMU_IS_ALIGNED(offset, align) ||
-               QEMU_IS_ALIGNED(offset + bytes, align) ||
-               DIV_ROUND_UP(offset, align) ==
-               DIV_ROUND_UP(offset + bytes, align));
-        return -ENOTSUP;
-    }
-    assert(QEMU_IS_ALIGNED(offset, bs->bl.request_alignment));
-    assert(QEMU_IS_ALIGNED(bytes, bs->bl.request_alignment));
-    if (align && bytes >= align) {
-        assert(QEMU_IS_ALIGNED(offset, align));
-        assert(QEMU_IS_ALIGNED(bytes, align));
-    }
-    if (bs->bl.max_pdiscard) {
-        assert(bytes <= bs->bl.max_pdiscard);
-    }
-
-    err = rule_check(bs, offset, bytes);
-    if (err) {
-        return err;
-    }
-
-    return bdrv_co_pdiscard(bs->file->bs, offset, bytes);
-}

 static void blkdebug_close(BlockDriverState *bs)
 {
@@ -720,13 +596,13 @@ static int blkdebug_debug_breakpoint(BlockDriverState *bs, const char *event,
 {
    BDRVBlkdebugState *s = bs->opaque;
    struct BlkdebugRule *rule;
-    int blkdebug_event;
+    BlkdebugEvent blkdebug_event;

-    blkdebug_event = qapi_enum_parse(&BlkdebugEvent_lookup, event, -1, NULL);
-    if (blkdebug_event < 0) {
+    if (get_event_by_name(event, &blkdebug_event) < 0) {
        return -ENOENT;
    }

+
    rule = g_malloc(sizeof(*rule));
    *rule = (struct BlkdebugRule) {
        .event  = blkdebug_event,
@@ -798,6 +674,11 @@ static int64_t blkdebug_getlength(BlockDriverState *bs)
    return bdrv_getlength(bs->file->bs);
 }

+static int blkdebug_truncate(BlockDriverState *bs, int64_t offset)
+{
+    return bdrv_truncate(bs->file->bs, offset);
+}
+
 static void blkdebug_refresh_filename(BlockDriverState *bs, QDict *options)
 {
    BDRVBlkdebugState *s = bs->opaque;
@@ -821,20 +702,16 @@ static void blkdebug_refresh_filename(BlockDriverState *bs, QDict *options)
    }

    if (!force_json && bs->file->bs->exact_filename[0]) {
-        int ret = snprintf(bs->exact_filename, sizeof(bs->exact_filename),
-                           "blkdebug:%s:%s", s->config_file ?: "",
-                           bs->file->bs->exact_filename);
-        if (ret >= sizeof(bs->exact_filename)) {
-            /* An overflow makes the filename unusable, so do not report any */
-            bs->exact_filename[0] = 0;
-        }
+        snprintf(bs->exact_filename, sizeof(bs->exact_filename),
+                 "blkdebug:%s:%s", s->config_file ?: "",
+                 bs->file->bs->exact_filename);
    }

    opts = qdict_new();
-    qdict_put_str(opts, "driver", "blkdebug");
+    qdict_put_obj(opts, "driver", QOBJECT(qstring_from_str("blkdebug")));

    QINCREF(bs->file->bs->full_open_options);
-    qdict_put(opts, "image", bs->file->bs->full_open_options);
+    qdict_put_obj(opts, "image", QOBJECT(bs->file->bs->full_open_options));

    for (e = qdict_first(options); e; e = qdict_next(options, e)) {
        if (strcmp(qdict_entry_key(e), "x-image")) {
@@ -853,21 +730,6 @@ static void blkdebug_refresh_limits(BlockDriverState *bs, Error **errp)
    if (s->align) {
        bs->bl.request_alignment = s->align;
    }
-    if (s->max_transfer) {
-        bs->bl.max_transfer = s->max_transfer;
-    }
-    if (s->opt_write_zero) {
-        bs->bl.pwrite_zeroes_alignment = s->opt_write_zero;
-    }
-    if (s->max_write_zero) {
-        bs->bl.max_pwrite_zeroes = s->max_write_zero;
-    }
-    if (s->opt_discard) {
-        bs->bl.pdiscard_alignment = s->opt_discard;
-    }
-    if (s->max_discard) {
-        bs->bl.max_pdiscard = s->max_discard;
-    }
 }

 static int blkdebug_reopen_prepare(BDRVReopenState *reopen_state,
@@ -880,24 +742,19 @@ static BlockDriver bdrv_blkdebug = {
    .format_name            = "blkdebug",
    .protocol_name          = "blkdebug",
    .instance_size          = sizeof(BDRVBlkdebugState),
-    .is_filter              = true,

    .bdrv_parse_filename    = blkdebug_parse_filename,
    .bdrv_file_open         = blkdebug_open,
    .bdrv_close             = blkdebug_close,
    .bdrv_reopen_prepare    = blkdebug_reopen_prepare,
-    .bdrv_child_perm        = bdrv_filter_default_perms,
-
    .bdrv_getlength         = blkdebug_getlength,
+    .bdrv_truncate          = blkdebug_truncate,
    .bdrv_refresh_filename  = blkdebug_refresh_filename,
    .bdrv_refresh_limits    = blkdebug_refresh_limits,

-    .bdrv_co_preadv         = blkdebug_co_preadv,
-    .bdrv_co_pwritev        = blkdebug_co_pwritev,
-    .bdrv_co_flush_to_disk  = blkdebug_co_flush,
-    .bdrv_co_pwrite_zeroes  = blkdebug_co_pwrite_zeroes,
-    .bdrv_co_pdiscard       = blkdebug_co_pdiscard,
-    .bdrv_co_get_block_status = bdrv_co_get_block_status_from_file,
+    .bdrv_aio_readv         = blkdebug_aio_readv,
+    .bdrv_aio_writev        = blkdebug_aio_writev,
+    .bdrv_aio_flush         = blkdebug_aio_flush,

    .bdrv_debug_event           = blkdebug_debug_event,
    .bdrv_debug_breakpoint      = blkdebug_debug_breakpoint,
--- a/block/blkreplay.c
+++ b/block/blkreplay.c
@@ -37,6 +37,9 @@ static int blkreplay_open(BlockDriverState *bs, QDict *options, int flags,

    ret = 0;
 fail:
+    if (ret < 0) {
+        bdrv_unref_child(bs, bs->file);
+    }
    return ret;
 }

@@ -57,7 +60,7 @@ static int64_t blkreplay_getlength(BlockDriverState *bs)
 static void blkreplay_bh_cb(void *opaque)
 {
    Request *req = opaque;
-    aio_co_wake(req->co);
+    qemu_coroutine_enter(req->co);
    qemu_bh_delete(req->bh);
    g_free(req);
 }
@@ -96,10 +99,10 @@ static int coroutine_fn blkreplay_co_pwritev(BlockDriverState *bs,
 }

 static int coroutine_fn blkreplay_co_pwrite_zeroes(BlockDriverState *bs,
-    int64_t offset, int bytes, BdrvRequestFlags flags)
+    int64_t offset, int count, BdrvRequestFlags flags)
 {
    uint64_t reqid = blkreplay_next_id();
-    int ret = bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags);
+    int ret = bdrv_co_pwrite_zeroes(bs->file, offset, count, flags);
    block_request_create(reqid, bs, qemu_coroutine_self());
    qemu_coroutine_yield();

@@ -107,10 +110,10 @@ static int coroutine_fn blkreplay_co_pwrite_zeroes(BlockDriverState *bs,
 }

 static int coroutine_fn blkreplay_co_pdiscard(BlockDriverState *bs,
-                                              int64_t offset, int bytes)
+                                              int64_t offset, int count)
 {
    uint64_t reqid = blkreplay_next_id();
-    int ret = bdrv_co_pdiscard(bs->file->bs, offset, bytes);
+    int ret = bdrv_co_pdiscard(bs->file->bs, offset, count);
    block_request_create(reqid, bs, qemu_coroutine_self());
    qemu_coroutine_yield();

@@ -134,7 +137,6 @@ static BlockDriver bdrv_blkreplay = {

    .bdrv_file_open         = blkreplay_open,
    .bdrv_close             = blkreplay_close,
-    .bdrv_child_perm        = bdrv_filter_default_perms,
    .bdrv_getlength         = blkreplay_getlength,

    .bdrv_co_preadv         = blkreplay_co_preadv,
--- a/block/blkverify.c
+++ b/block/blkverify.c
@@ -19,36 +19,38 @@ typedef struct {
    BdrvChild *test_file;
 } BDRVBlkverifyState;

-typedef struct BlkverifyRequest {
-    Coroutine *co;
-    BlockDriverState *bs;
+typedef struct BlkverifyAIOCB BlkverifyAIOCB;
+struct BlkverifyAIOCB {
+    BlockAIOCB common;

    /* Request metadata */
    bool is_write;
-    uint64_t offset;
-    uint64_t bytes;
-    int flags;
-
-    int (*request_fn)(BdrvChild *, int64_t, unsigned int, QEMUIOVector *,
-                      BdrvRequestFlags);
-
-    int ret;                    /* test image result */
-    int raw_ret;                /* raw image result */
+    int64_t sector_num;
+    int nb_sectors;

+    int ret;                    /* first completed request's result */
    unsigned int done;          /* completion counter */

    QEMUIOVector *qiov;         /* user I/O vector */
-    QEMUIOVector *raw_qiov;     /* cloned I/O vector for raw file */
-} BlkverifyRequest;
+    QEMUIOVector raw_qiov;      /* cloned I/O vector for raw file */
+    void *buf;                  /* buffer for raw file I/O */

-static void GCC_FMT_ATTR(2, 3) blkverify_err(BlkverifyRequest *r,
+    void (*verify)(BlkverifyAIOCB *acb);
+};
+
+static const AIOCBInfo blkverify_aiocb_info = {
+    .aiocb_size         = sizeof(BlkverifyAIOCB),
+};
+
+static void GCC_FMT_ATTR(2, 3) blkverify_err(BlkverifyAIOCB *acb,
                                             const char *fmt, ...)
 {
    va_list ap;

    va_start(ap, fmt);
-    fprintf(stderr, "blkverify: %s offset=%" PRId64 " bytes=%" PRId64 " ",
-            r->is_write ? "write" : "read", r->offset, r->bytes);
+    fprintf(stderr, "blkverify: %s sector_num=%" PRId64 " nb_sectors=%d ",
+            acb->is_write ? "write" : "read", acb->sector_num,
+            acb->nb_sectors);
    vfprintf(stderr, fmt, ap);
    fprintf(stderr, "\n");
    va_end(ap);
@@ -67,7 +69,7 @@ static void blkverify_parse_filename(const char *filename, QDict *options,
    if (!strstart(filename, "blkverify:", &filename)) {
        /* There was no prefix; therefore, all options have to be already
           present in the QDict (except for the filename) */
-        qdict_put_str(options, "x-image", filename);
+        qdict_put(options, "x-image", qstring_from_str(filename));
        return;
    }

@@ -84,7 +86,7 @@ static void blkverify_parse_filename(const char *filename, QDict *options,

    /* TODO Allow multi-level nesting and set file.filename here */
    filename = c + 1;
-    qdict_put_str(options, "x-image", filename);
+    qdict_put(options, "x-image", qstring_from_str(filename));
 }

 static QemuOptsList runtime_opts = {
@@ -142,6 +144,9 @@ static int blkverify_open(BlockDriverState *bs, QDict *options, int flags,

    ret = 0;
 fail:
+    if (ret < 0) {
+        bdrv_unref_child(bs, bs->file);
+    }
    qemu_opts_del(opts);
    return ret;
 }
@@ -161,106 +166,113 @@ static int64_t blkverify_getlength(BlockDriverState *bs)
    return bdrv_getlength(s->test_file->bs);
 }

-static void coroutine_fn blkverify_do_test_req(void *opaque)
+static BlkverifyAIOCB *blkverify_aio_get(BlockDriverState *bs, bool is_write,
+                                         int64_t sector_num, QEMUIOVector *qiov,
+                                         int nb_sectors,
+                                         BlockCompletionFunc *cb,
+                                         void *opaque)
 {
-    BlkverifyRequest *r = opaque;
-    BDRVBlkverifyState *s = r->bs->opaque;
+    BlkverifyAIOCB *acb = qemu_aio_get(&blkverify_aiocb_info, bs, cb, opaque);

-    r->ret = r->request_fn(s->test_file, r->offset, r->bytes, r->qiov,
-                           r->flags);
-    r->done++;
-    qemu_coroutine_enter_if_inactive(r->co);
+    acb->is_write = is_write;
+    acb->sector_num = sector_num;
+    acb->nb_sectors = nb_sectors;
+    acb->ret = -EINPROGRESS;
+    acb->done = 0;
+    acb->qiov = qiov;
+    acb->buf = NULL;
+    acb->verify = NULL;
+    return acb;
 }

-static void coroutine_fn blkverify_do_raw_req(void *opaque)
+static void blkverify_aio_bh(void *opaque)
 {
-    BlkverifyRequest *r = opaque;
+    BlkverifyAIOCB *acb = opaque;

-    r->raw_ret = r->request_fn(r->bs->file, r->offset, r->bytes, r->raw_qiov,
-                               r->flags);
-    r->done++;
-    qemu_coroutine_enter_if_inactive(r->co);
-}
-
-static int coroutine_fn
-blkverify_co_prwv(BlockDriverState *bs, BlkverifyRequest *r, uint64_t offset,
-                  uint64_t bytes, QEMUIOVector *qiov, QEMUIOVector *raw_qiov,
-                  int flags, bool is_write)
-{
-    Coroutine *co_a, *co_b;
-
-    *r = (BlkverifyRequest) {
-        .co         = qemu_coroutine_self(),
-        .bs         = bs,
-        .offset     = offset,
-        .bytes      = bytes,
-        .qiov       = qiov,
-        .raw_qiov   = raw_qiov,
-        .flags      = flags,
-        .is_write   = is_write,
-        .request_fn = is_write ? bdrv_co_pwritev : bdrv_co_preadv,
-    };
-
-    co_a = qemu_coroutine_create(blkverify_do_test_req, r);
-    co_b = qemu_coroutine_create(blkverify_do_raw_req, r);
-
-    qemu_coroutine_enter(co_a);
-    qemu_coroutine_enter(co_b);
-
-    while (r->done < 2) {
-        qemu_coroutine_yield();
+    if (acb->buf) {
+        qemu_iovec_destroy(&acb->raw_qiov);
+        qemu_vfree(acb->buf);
    }
-
-    if (r->ret != r->raw_ret) {
-        blkverify_err(r, "return value mismatch %d != %d", r->ret, r->raw_ret);
-    }
-
-    return r->ret;
+    acb->common.cb(acb->common.opaque, acb->ret);
+    qemu_aio_unref(acb);
 }

-static int coroutine_fn
-blkverify_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
-                    QEMUIOVector *qiov, int flags)
+static void blkverify_aio_cb(void *opaque, int ret)
 {
-    BlkverifyRequest r;
-    QEMUIOVector raw_qiov;
-    void *buf;
-    ssize_t cmp_offset;
-    int ret;
+    BlkverifyAIOCB *acb = opaque;

-    buf = qemu_blockalign(bs->file->bs, qiov->size);
-    qemu_iovec_init(&raw_qiov, qiov->niov);
-    qemu_iovec_clone(&raw_qiov, qiov, buf);
+    switch (++acb->done) {
+    case 1:
+        acb->ret = ret;
+        break;

-    ret = blkverify_co_prwv(bs, &r, offset, bytes, qiov, &raw_qiov, flags,
-                            false);
+    case 2:
+        if (acb->ret != ret) {
+            blkverify_err(acb, "return value mismatch %d != %d", acb->ret, ret);
+        }

-    cmp_offset = qemu_iovec_compare(qiov, &raw_qiov);
-    if (cmp_offset != -1) {
-        blkverify_err(&r, "contents mismatch at offset %" PRId64,
-                      offset + cmp_offset);
+        if (acb->verify) {
+            acb->verify(acb);
+        }
+
+        aio_bh_schedule_oneshot(bdrv_get_aio_context(acb->common.bs),
+                                blkverify_aio_bh, acb);
+        break;
    }
-
-    qemu_iovec_destroy(&raw_qiov);
-    qemu_vfree(buf);
-
-    return ret;
 }

-static int coroutine_fn
-blkverify_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
-                     QEMUIOVector *qiov, int flags)
+static void blkverify_verify_readv(BlkverifyAIOCB *acb)
 {
-    BlkverifyRequest r;
-    return blkverify_co_prwv(bs, &r, offset, bytes, qiov, qiov, flags, true);
+    ssize_t offset = qemu_iovec_compare(acb->qiov, &acb->raw_qiov);
+    if (offset != -1) {
+        blkverify_err(acb, "contents mismatch in sector %" PRId64,
+                      acb->sector_num + (int64_t)(offset / BDRV_SECTOR_SIZE));
+    }
 }

-static int blkverify_co_flush(BlockDriverState *bs)
+static BlockAIOCB *blkverify_aio_readv(BlockDriverState *bs,
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+        BlockCompletionFunc *cb, void *opaque)
+{
+    BDRVBlkverifyState *s = bs->opaque;
+    BlkverifyAIOCB *acb = blkverify_aio_get(bs, false, sector_num, qiov,
+                                            nb_sectors, cb, opaque);
+
+    acb->verify = blkverify_verify_readv;
+    acb->buf = qemu_blockalign(bs->file->bs, qiov->size);
+    qemu_iovec_init(&acb->raw_qiov, acb->qiov->niov);
+    qemu_iovec_clone(&acb->raw_qiov, qiov, acb->buf);
+
+    bdrv_aio_readv(s->test_file, sector_num, qiov, nb_sectors,
+                   blkverify_aio_cb, acb);
+    bdrv_aio_readv(bs->file, sector_num, &acb->raw_qiov, nb_sectors,
+                   blkverify_aio_cb, acb);
+    return &acb->common;
+}
+
+static BlockAIOCB *blkverify_aio_writev(BlockDriverState *bs,
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+        BlockCompletionFunc *cb, void *opaque)
+{
+    BDRVBlkverifyState *s = bs->opaque;
+    BlkverifyAIOCB *acb = blkverify_aio_get(bs, true, sector_num, qiov,
+                                            nb_sectors, cb, opaque);
+
+    bdrv_aio_writev(s->test_file, sector_num, qiov, nb_sectors,
+                    blkverify_aio_cb, acb);
+    bdrv_aio_writev(bs->file, sector_num, qiov, nb_sectors,
+                    blkverify_aio_cb, acb);
+    return &acb->common;
+}
+
+static BlockAIOCB *blkverify_aio_flush(BlockDriverState *bs,
+                                       BlockCompletionFunc *cb,
+                                       void *opaque)
 {
    BDRVBlkverifyState *s = bs->opaque;

    /* Only flush test file, the raw file is not important */
-    return bdrv_co_flush(s->test_file->bs);
+    return bdrv_aio_flush(s->test_file->bs, cb, opaque);
 }

 static bool blkverify_recurse_is_first_non_filter(BlockDriverState *bs,
@@ -288,12 +300,13 @@ static void blkverify_refresh_filename(BlockDriverState *bs, QDict *options)
        && s->test_file->bs->full_open_options)
    {
        QDict *opts = qdict_new();
-        qdict_put_str(opts, "driver", "blkverify");
+        qdict_put_obj(opts, "driver", QOBJECT(qstring_from_str("blkverify")));

        QINCREF(bs->file->bs->full_open_options);
-        qdict_put(opts, "raw", bs->file->bs->full_open_options);
+        qdict_put_obj(opts, "raw", QOBJECT(bs->file->bs->full_open_options));
        QINCREF(s->test_file->bs->full_open_options);
-        qdict_put(opts, "test", s->test_file->bs->full_open_options);
+        qdict_put_obj(opts, "test",
+                      QOBJECT(s->test_file->bs->full_open_options));

        bs->full_open_options = opts;
    }
@@ -301,14 +314,10 @@ static void blkverify_refresh_filename(BlockDriverState *bs, QDict *options)
    if (bs->file->bs->exact_filename[0]
        && s->test_file->bs->exact_filename[0])
    {
-        int ret = snprintf(bs->exact_filename, sizeof(bs->exact_filename),
-                           "blkverify:%s:%s",
-                           bs->file->bs->exact_filename,
-                           s->test_file->bs->exact_filename);
-        if (ret >= sizeof(bs->exact_filename)) {
-            /* An overflow makes the filename unusable, so do not report any */
-            bs->exact_filename[0] = 0;
-        }
+        snprintf(bs->exact_filename, sizeof(bs->exact_filename),
+                 "blkverify:%s:%s",
+                 bs->file->bs->exact_filename,
+                 s->test_file->bs->exact_filename);
    }
 }

@@ -320,13 +329,12 @@ static BlockDriver bdrv_blkverify = {
    .bdrv_parse_filename              = blkverify_parse_filename,
    .bdrv_file_open                   = blkverify_open,
    .bdrv_close                       = blkverify_close,
-    .bdrv_child_perm                  = bdrv_filter_default_perms,
    .bdrv_getlength                   = blkverify_getlength,
    .bdrv_refresh_filename            = blkverify_refresh_filename,

-    .bdrv_co_preadv                   = blkverify_co_preadv,
-    .bdrv_co_pwritev                  = blkverify_co_pwritev,
-    .bdrv_co_flush                    = blkverify_co_flush,
+    .bdrv_aio_readv                   = blkverify_aio_readv,
+    .bdrv_aio_writev                  = blkverify_aio_writev,
+    .bdrv_aio_flush                   = blkverify_aio_flush,

    .is_filter                        = true,
    .bdrv_recurse_is_first_non_filter = blkverify_recurse_is_first_non_filter,
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -20,7 +20,6 @@
 #include "qapi-event.h"
 #include "qemu/id.h"
 #include "trace.h"
-#include "migration/misc.h"

 /* Number of coroutines to reserve per attached device model */
 #define COROUTINE_POOL_RESERVATION 64
@@ -60,17 +59,9 @@ struct BlockBackend {
    bool iostatus_enabled;
    BlockDeviceIoStatus iostatus;

-    uint64_t perm;
-    uint64_t shared_perm;
-    bool disable_perm;
-
    bool allow_write_beyond_eof;

    NotifierList remove_bs_notifiers, insert_bs_notifiers;
-
-    int quiesce_counter;
-    VMChangeStateEntry *vmsh;
-    bool force_allow_inactivate;
 };

 typedef struct BlockBackendAIOCB {
@@ -108,172 +99,37 @@ static void blk_root_drained_end(BdrvChild *child);
 static void blk_root_change_media(BdrvChild *child, bool load);
 static void blk_root_resize(BdrvChild *child);

-static char *blk_root_get_parent_desc(BdrvChild *child)
-{
-    BlockBackend *blk = child->opaque;
-    char *dev_id;
-
-    if (blk->name) {
-        return g_strdup(blk->name);
-    }
-
-    dev_id = blk_get_attached_dev_id(blk);
-    if (*dev_id) {
-        return dev_id;
-    } else {
-        /* TODO Callback into the BB owner for something more detailed */
-        g_free(dev_id);
-        return g_strdup("a block device");
-    }
-}
-
 static const char *blk_root_get_name(BdrvChild *child)
 {
    return blk_name(child->opaque);
 }

-static void blk_vm_state_changed(void *opaque, int running, RunState state)
-{
-    Error *local_err = NULL;
-    BlockBackend *blk = opaque;
-
-    if (state == RUN_STATE_INMIGRATE) {
-        return;
-    }
-
-    qemu_del_vm_change_state_handler(blk->vmsh);
-    blk->vmsh = NULL;
-    blk_set_perm(blk, blk->perm, blk->shared_perm, &local_err);
-    if (local_err) {
-        error_report_err(local_err);
-    }
-}
-
-/*
- * Notifies the user of the BlockBackend that migration has completed. qdev
- * devices can tighten their permissions in response (specifically revoke
- * shared write permissions that we needed for storage migration).
- *
- * If an error is returned, the VM cannot be allowed to be resumed.
- */
-static void blk_root_activate(BdrvChild *child, Error **errp)
-{
-    BlockBackend *blk = child->opaque;
-    Error *local_err = NULL;
-
-    if (!blk->disable_perm) {
-        return;
-    }
-
-    blk->disable_perm = false;
-
-    blk_set_perm(blk, blk->perm, BLK_PERM_ALL, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        blk->disable_perm = true;
-        return;
-    }
-
-    if (runstate_check(RUN_STATE_INMIGRATE)) {
-        /* Activation can happen when migration process is still active, for
-         * example when nbd_server_add is called during non-shared storage
-         * migration. Defer the shared_perm update to migration completion. */
-        if (!blk->vmsh) {
-            blk->vmsh = qemu_add_vm_change_state_handler(blk_vm_state_changed,
-                                                         blk);
-        }
-        return;
-    }
-
-    blk_set_perm(blk, blk->perm, blk->shared_perm, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        blk->disable_perm = true;
-        return;
-    }
-}
-
-void blk_set_force_allow_inactivate(BlockBackend *blk)
-{
-    blk->force_allow_inactivate = true;
-}
-
-static bool blk_can_inactivate(BlockBackend *blk)
-{
-    /* If it is a guest device, inactivate is ok. */
-    if (blk->dev || blk_name(blk)[0]) {
-        return true;
-    }
-
-    /* Inactivating means no more writes to the image can be done,
-     * even if those writes would be changes invisible to the
-     * guest.  For block job BBs that satisfy this, we can just allow
-     * it.  This is the case for mirror job source, which is required
-     * by libvirt non-shared block migration. */
-    if (!(blk->perm & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED))) {
-        return true;
-    }
-
-    return blk->force_allow_inactivate;
-}
-
-static int blk_root_inactivate(BdrvChild *child)
-{
-    BlockBackend *blk = child->opaque;
-
-    if (blk->disable_perm) {
-        return 0;
-    }
-
-    if (!blk_can_inactivate(blk)) {
-        return -EPERM;
-    }
-
-    blk->disable_perm = true;
-    if (blk->root) {
-        bdrv_child_try_set_perm(blk->root, 0, BLK_PERM_ALL, &error_abort);
-    }
-
-    return 0;
-}
-
 static const BdrvChildRole child_root = {
    .inherit_options    = blk_root_inherit_options,

    .change_media       = blk_root_change_media,
    .resize             = blk_root_resize,
    .get_name           = blk_root_get_name,
-    .get_parent_desc    = blk_root_get_parent_desc,

    .drained_begin      = blk_root_drained_begin,
    .drained_end        = blk_root_drained_end,
-
-    .activate           = blk_root_activate,
-    .inactivate         = blk_root_inactivate,
 };

 /*
 * Create a new BlockBackend with a reference count of one.
- *
- * @perm is a bitmasks of BLK_PERM_* constants which describes the permissions
- * to request for a block driver node that is attached to this BlockBackend.
- * @shared_perm is a bitmask which describes which permissions may be granted
- * to other users of the attached node.
- * Both sets of permissions can be changed later using blk_set_perm().
- *
+ * Store an error through @errp on failure, unless it's null.
 * Return the new BlockBackend on success, null on failure.
 */
-BlockBackend *blk_new(uint64_t perm, uint64_t shared_perm)
+BlockBackend *blk_new(void)
 {
    BlockBackend *blk;

    blk = g_new0(BlockBackend, 1);
    blk->refcnt = 1;
-    blk->perm = perm;
-    blk->shared_perm = shared_perm;
    blk_set_enable_write_cache(blk, true);

-    block_acct_init(&blk->stats);
+    qemu_co_queue_init(&blk->public.throttled_reqs[0]);
+    qemu_co_queue_init(&blk->public.throttled_reqs[1]);

    notifier_list_init(&blk->remove_bs_notifiers);
    notifier_list_init(&blk->insert_bs_notifiers);
@@ -299,38 +155,15 @@ BlockBackend *blk_new_open(const char *filename, const char *reference,
 {
    BlockBackend *blk;
    BlockDriverState *bs;
-    uint64_t perm;

-    /* blk_new_open() is mainly used in .bdrv_create implementations and the
-     * tools where sharing isn't a concern because the BDS stays private, so we
-     * just request permission according to the flags.
-     *
-     * The exceptions are xen_disk and blockdev_init(); in these cases, the
-     * caller of blk_new_open() doesn't make use of the permissions, but they
-     * shouldn't hurt either. We can still share everything here because the
-     * guest devices will add their own blockers if they can't share. */
-    perm = BLK_PERM_CONSISTENT_READ;
-    if (flags & BDRV_O_RDWR) {
-        perm |= BLK_PERM_WRITE;
-    }
-    if (flags & BDRV_O_RESIZE) {
-        perm |= BLK_PERM_RESIZE;
-    }
-
-    blk = blk_new(perm, BLK_PERM_ALL);
+    blk = blk_new();
    bs = bdrv_open(filename, reference, options, flags, errp);
    if (!bs) {
        blk_unref(blk);
        return NULL;
    }

-    blk->root = bdrv_root_attach_child(bs, "root", &child_root,
-                                       perm, BLK_PERM_ALL, blk, errp);
-    if (!blk->root) {
-        bdrv_unref(bs);
-        blk_unref(blk);
-        return NULL;
-    }
+    blk->root = bdrv_root_attach_child(bs, "root", &child_root, blk);

    return blk;
 }
@@ -340,16 +173,9 @@ static void blk_delete(BlockBackend *blk)
    assert(!blk->refcnt);
    assert(!blk->name);
    assert(!blk->dev);
-    if (blk->public.throttle_group_member.throttle_state) {
-        blk_io_limits_disable(blk);
-    }
    if (blk->root) {
        blk_remove_bs(blk);
    }
-    if (blk->vmsh) {
-        qemu_del_vm_change_state_handler(blk->vmsh);
-        blk->vmsh = NULL;
-    }
    assert(QLIST_EMPTY(&blk->remove_bs_notifiers.notifiers));
    assert(QLIST_EMPTY(&blk->insert_bs_notifiers.notifiers));
    QTAILQ_REMOVE(&block_backends, blk, link);
@@ -401,7 +227,7 @@ void blk_unref(BlockBackend *blk)
 * Behaves similarly to blk_next() but iterates over all BlockBackends, even the
 * ones which are hidden (i.e. are not referenced by the monitor).
 */
-BlockBackend *blk_all_next(BlockBackend *blk)
+static BlockBackend *blk_all_next(BlockBackend *blk)
 {
    return blk ? QTAILQ_NEXT(blk, link)
               : QTAILQ_FIRST(&block_backends);
@@ -533,7 +359,7 @@ void monitor_remove_blk(BlockBackend *blk)
 * Return @blk's name, a non-null string.
 * Returns an empty string iff @blk is not referenced by the monitor.
 */
-const char *blk_name(const BlockBackend *blk)
+const char *blk_name(BlockBackend *blk)
 {
    return blk->name ?: "";
 }
@@ -655,12 +481,9 @@ BlockBackend *blk_by_public(BlockBackendPublic *public)
 */
 void blk_remove_bs(BlockBackend *blk)
 {
-    ThrottleTimers *tt;
-
    notifier_list_notify(&blk->remove_bs_notifiers, blk);
-    if (blk->public.throttle_group_member.throttle_state) {
-        tt = &blk->public.throttle_group_member.throttle_timers;
-        throttle_timers_detach_aio_context(tt);
+    if (blk->public.throttle_state) {
+        throttle_timers_detach_aio_context(&blk->public.throttle_timers);
    }

    blk_update_root_state(blk);
@@ -672,50 +495,16 @@ void blk_remove_bs(BlockBackend *blk)
 /*
 * Associates a new BlockDriverState with @blk.
 */
-int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp)
+void blk_insert_bs(BlockBackend *blk, BlockDriverState *bs)
 {
-    blk->root = bdrv_root_attach_child(bs, "root", &child_root,
-                                       blk->perm, blk->shared_perm, blk, errp);
-    if (blk->root == NULL) {
-        return -EPERM;
-    }
    bdrv_ref(bs);
+    blk->root = bdrv_root_attach_child(bs, "root", &child_root, blk);

    notifier_list_notify(&blk->insert_bs_notifiers, blk);
-    if (blk->public.throttle_group_member.throttle_state) {
+    if (blk->public.throttle_state) {
        throttle_timers_attach_aio_context(
-            &blk->public.throttle_group_member.throttle_timers,
-            bdrv_get_aio_context(bs));
+            &blk->public.throttle_timers, bdrv_get_aio_context(bs));
    }
-
-    return 0;
-}
-
-/*
- * Sets the permission bitmasks that the user of the BlockBackend needs.
- */
-int blk_set_perm(BlockBackend *blk, uint64_t perm, uint64_t shared_perm,
-                 Error **errp)
-{
-    int ret;
-
-    if (blk->root && !blk->disable_perm) {
-        ret = bdrv_child_try_set_perm(blk->root, perm, shared_perm, errp);
-        if (ret < 0) {
-            return ret;
-        }
-    }
-
-    blk->perm = perm;
-    blk->shared_perm = shared_perm;
-
-    return 0;
-}
-
-void blk_get_perm(BlockBackend *blk, uint64_t *perm, uint64_t *shared_perm)
-{
-    *perm = blk->perm;
-    *shared_perm = blk->shared_perm;
 }

 static int blk_do_attach_dev(BlockBackend *blk, void *dev)
@@ -723,19 +512,10 @@ static int blk_do_attach_dev(BlockBackend *blk, void *dev)
    if (blk->dev) {
        return -EBUSY;
    }
-
-    /* While migration is still incoming, we don't need to apply the
-     * permissions of guest device BlockBackends. We might still have a block
-     * job or NBD server writing to the image for storage migration. */
-    if (runstate_check(RUN_STATE_INMIGRATE)) {
-        blk->disable_perm = true;
-    }
-
    blk_ref(blk);
    blk->dev = dev;
    blk->legacy_dev = false;
    blk_iostatus_reset(blk);
-
    return 0;
 }

@@ -773,7 +553,6 @@ void blk_detach_dev(BlockBackend *blk, void *dev)
    blk->dev_ops = NULL;
    blk->dev_opaque = NULL;
    blk->guest_block_size = 512;
-    blk_set_perm(blk, 0, BLK_PERM_ALL, &error_abort);
    blk_unref(blk);
 }

@@ -788,7 +567,7 @@ void *blk_get_attached_dev(BlockBackend *blk)

 /* Return the qdev ID, or if no ID is assigned the QOM path, of the block
 * device attached to the BlockBackend. */
-char *blk_get_attached_dev_id(BlockBackend *blk)
+static char *blk_get_attached_dev_id(BlockBackend *blk)
 {
    DeviceState *dev;

@@ -831,44 +610,29 @@ void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops,
                     void *opaque)
 {
    /* All drivers that use blk_set_dev_ops() are qdevified and we want to keep
-     * it that way, so we can assume blk->dev, if present, is a DeviceState if
-     * blk->dev_ops is set. Non-device users may use dev_ops without device. */
+     * it that way, so we can assume blk->dev is a DeviceState if blk->dev_ops
+     * is set. */
    assert(!blk->legacy_dev);

    blk->dev_ops = ops;
    blk->dev_opaque = opaque;
-
-    /* Are we currently quiesced? Should we enforce this right now? */
-    if (blk->quiesce_counter && ops->drained_begin) {
-        ops->drained_begin(opaque);
-    }
 }

 /*
 * Notify @blk's attached device model of media change.
- *
- * If @load is true, notify of media load. This action can fail, meaning that
- * the medium cannot be loaded. @errp is set then.
- *
- * If @load is false, notify of media eject. This can never fail.
- *
+ * If @load is true, notify of media load.
+ * Else, notify of media eject.
 * Also send DEVICE_TRAY_MOVED events as appropriate.
 */
-void blk_dev_change_media_cb(BlockBackend *blk, bool load, Error **errp)
+void blk_dev_change_media_cb(BlockBackend *blk, bool load)
 {
    if (blk->dev_ops && blk->dev_ops->change_media_cb) {
        bool tray_was_open, tray_is_open;
-        Error *local_err = NULL;

        assert(!blk->legacy_dev);

        tray_was_open = blk_dev_is_tray_open(blk);
-        blk->dev_ops->change_media_cb(blk->dev_opaque, load, &local_err);
-        if (local_err) {
-            assert(load == true);
-            error_propagate(errp, local_err);
-            return;
-        }
+        blk->dev_ops->change_media_cb(blk->dev_opaque, load);
        tray_is_open = blk_dev_is_tray_open(blk);

        if (tray_was_open != tray_is_open) {
@@ -882,7 +646,7 @@ void blk_dev_change_media_cb(BlockBackend *blk, bool load, Error **errp)

 static void blk_root_change_media(BdrvChild *child, bool load)
 {
-    blk_dev_change_media_cb(child->opaque, load, NULL);
+    blk_dev_change_media_cb(child->opaque, load);
 }

 /*
@@ -1047,9 +811,8 @@ int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset,
    bdrv_inc_in_flight(bs);

    /* throttling disk I/O */
-    if (blk->public.throttle_group_member.throttle_state) {
-        throttle_group_co_io_limits_intercept(&blk->public.throttle_group_member,
-                bytes, false);
+    if (blk->public.throttle_state) {
+        throttle_group_co_io_limits_intercept(blk, bytes, false);
    }

    ret = bdrv_co_preadv(blk->root, offset, bytes, qiov, flags);
@@ -1072,10 +835,10 @@ int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset,
    }

    bdrv_inc_in_flight(bs);
+
    /* throttling disk I/O */
-    if (blk->public.throttle_group_member.throttle_state) {
-        throttle_group_co_io_limits_intercept(&blk->public.throttle_group_member,
-                bytes, true);
+    if (blk->public.throttle_state) {
+        throttle_group_co_io_limits_intercept(blk, bytes, true);
    }

    if (!blk->enable_write_cache) {
@@ -1117,6 +880,7 @@ static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf,
 {
    QEMUIOVector qiov;
    struct iovec iov;
+    Coroutine *co;
    BlkRwCo rwco;

    iov = (struct iovec) {
@@ -1133,14 +897,9 @@ static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf,
        .ret    = NOT_DONE,
    };

-    if (qemu_in_coroutine()) {
-        /* Fast-path if already in coroutine context */
-        co_entry(&rwco);
-    } else {
-        Coroutine *co = qemu_coroutine_create(co_entry, &rwco);
-        bdrv_coroutine_enter(blk_bs(blk), co);
-        BDRV_POLL_WHILE(blk_bs(blk), rwco.ret == NOT_DONE);
-    }
+    co = qemu_coroutine_create(co_entry, &rwco);
+    qemu_coroutine_enter(co);
+    BDRV_POLL_WHILE(blk_bs(blk), rwco.ret == NOT_DONE);

    return rwco.ret;
 }
@@ -1162,9 +921,9 @@ int blk_pread_unthrottled(BlockBackend *blk, int64_t offset, uint8_t *buf,
 }

 int blk_pwrite_zeroes(BlockBackend *blk, int64_t offset,
-                      int bytes, BdrvRequestFlags flags)
+                      int count, BdrvRequestFlags flags)
 {
-    return blk_prw(blk, offset, NULL, bytes, blk_write_entry,
+    return blk_prw(blk, offset, NULL, count, blk_write_entry,
                   flags | BDRV_REQ_ZERO_WRITE);
 }

@@ -1220,6 +979,7 @@ static void blk_aio_complete(BlkAioEmAIOCB *acb)
 static void blk_aio_complete_bh(void *opaque)
 {
    BlkAioEmAIOCB *acb = opaque;
+
    assert(acb->has_returned);
    blk_aio_complete(acb);
 }
@@ -1245,7 +1005,7 @@ static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, int bytes,
    acb->has_returned = false;

    co = qemu_coroutine_create(co_entry, acb);
-    bdrv_coroutine_enter(blk_bs(blk), co);
+    qemu_coroutine_enter(co);

    acb->has_returned = true;
    if (acb->rwco.ret != NOT_DONE) {
@@ -1374,10 +1134,10 @@ static void blk_aio_pdiscard_entry(void *opaque)
 }

 BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk,
-                             int64_t offset, int bytes,
+                             int64_t offset, int count,
                             BlockCompletionFunc *cb, void *opaque)
 {
-    return blk_aio_prwv(blk, offset, bytes, NULL, blk_aio_pdiscard_entry, 0,
+    return blk_aio_prwv(blk, offset, count, NULL, blk_aio_pdiscard_entry, 0,
                        cb, opaque);
 }

@@ -1437,14 +1197,14 @@ BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf,
    return blk_aio_prwv(blk, req, 0, &qiov, blk_aio_ioctl_entry, 0, cb, opaque);
 }

-int blk_co_pdiscard(BlockBackend *blk, int64_t offset, int bytes)
+int blk_co_pdiscard(BlockBackend *blk, int64_t offset, int count)
 {
-    int ret = blk_check_byte_request(blk, offset, bytes);
+    int ret = blk_check_byte_request(blk, offset, count);
    if (ret < 0) {
        return ret;
    }

-    return bdrv_co_pdiscard(blk_bs(blk), offset, bytes);
+    return bdrv_co_pdiscard(blk_bs(blk), offset, count);
 }

 int blk_co_flush(BlockBackend *blk)
@@ -1744,14 +1504,16 @@ static AioContext *blk_aiocb_get_aio_context(BlockAIOCB *acb)
 void blk_set_aio_context(BlockBackend *blk, AioContext *new_context)
 {
    BlockDriverState *bs = blk_bs(blk);
-    ThrottleGroupMember *tgm = &blk->public.throttle_group_member;

    if (bs) {
-        if (tgm->throttle_state) {
-            throttle_group_detach_aio_context(tgm);
-            throttle_group_attach_aio_context(tgm, new_context);
+        if (blk->public.throttle_state) {
+            throttle_timers_detach_aio_context(&blk->public.throttle_timers);
        }
        bdrv_set_aio_context(bs, new_context);
+        if (blk->public.throttle_state) {
+            throttle_timers_attach_aio_context(&blk->public.throttle_timers,
+                                               new_context);
+        }
    }
 }

@@ -1821,9 +1583,9 @@ void *blk_aio_get(const AIOCBInfo *aiocb_info, BlockBackend *blk,
 }

 int coroutine_fn blk_co_pwrite_zeroes(BlockBackend *blk, int64_t offset,
-                                      int bytes, BdrvRequestFlags flags)
+                                      int count, BdrvRequestFlags flags)
 {
-    return blk_co_pwritev(blk, offset, bytes, NULL,
+    return blk_co_pwritev(blk, offset, count, NULL,
                          flags | BDRV_REQ_ZERO_WRITE);
 }

@@ -1834,15 +1596,13 @@ int blk_pwrite_compressed(BlockBackend *blk, int64_t offset, const void *buf,
                   BDRV_REQ_WRITE_COMPRESSED);
 }

-int blk_truncate(BlockBackend *blk, int64_t offset, PreallocMode prealloc,
-                 Error **errp)
+int blk_truncate(BlockBackend *blk, int64_t offset)
 {
    if (!blk_is_available(blk)) {
-        error_setg(errp, "No medium inserted");
        return -ENOMEDIUM;
    }

-    return bdrv_truncate(blk->root, offset, prealloc, errp);
+    return bdrv_truncate(blk_bs(blk), offset);
 }

 static void blk_pdiscard_entry(void *opaque)
@@ -1851,9 +1611,9 @@ static void blk_pdiscard_entry(void *opaque)
    rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, rwco->qiov->size);
 }

-int blk_pdiscard(BlockBackend *blk, int64_t offset, int bytes)
+int blk_pdiscard(BlockBackend *blk, int64_t offset, int count)
 {
-    return blk_prw(blk, offset, NULL, bytes, blk_pdiscard_entry, 0);
+    return blk_prw(blk, offset, NULL, count, blk_pdiscard_entry, 0);
 }

 int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf,
@@ -1969,35 +1729,33 @@ int blk_commit_all(void)
 /* throttling disk I/O limits */
 void blk_set_io_limits(BlockBackend *blk, ThrottleConfig *cfg)
 {
-    throttle_group_config(&blk->public.throttle_group_member, cfg);
+    throttle_group_config(blk, cfg);
 }

 void blk_io_limits_disable(BlockBackend *blk)
 {
-    assert(blk->public.throttle_group_member.throttle_state);
+    assert(blk->public.throttle_state);
    bdrv_drained_begin(blk_bs(blk));
-    throttle_group_unregister_tgm(&blk->public.throttle_group_member);
+    throttle_group_unregister_blk(blk);
    bdrv_drained_end(blk_bs(blk));
 }

 /* should be called before blk_set_io_limits if a limit is set */
 void blk_io_limits_enable(BlockBackend *blk, const char *group)
 {
-    assert(!blk->public.throttle_group_member.throttle_state);
-    throttle_group_register_tgm(&blk->public.throttle_group_member,
-                                group, blk_get_aio_context(blk));
+    assert(!blk->public.throttle_state);
+    throttle_group_register_blk(blk, group);
 }

 void blk_io_limits_update_group(BlockBackend *blk, const char *group)
 {
    /* this BB is not part of any group */
-    if (!blk->public.throttle_group_member.throttle_state) {
+    if (!blk->public.throttle_state) {
        return;
    }

    /* this BB is a part of the same group than the one we want */
-    if (!g_strcmp0(throttle_group_get_name(&blk->public.throttle_group_member),
-                group)) {
+    if (!g_strcmp0(throttle_group_get_name(blk), group)) {
        return;
    }

@@ -2010,31 +1768,18 @@ static void blk_root_drained_begin(BdrvChild *child)
 {
    BlockBackend *blk = child->opaque;

-    if (++blk->quiesce_counter == 1) {
-        if (blk->dev_ops && blk->dev_ops->drained_begin) {
-            blk->dev_ops->drained_begin(blk->dev_opaque);
-        }
-    }
-
    /* Note that blk->root may not be accessible here yet if we are just
     * attaching to a BlockDriverState that is drained. Use child instead. */

-    if (atomic_fetch_inc(&blk->public.throttle_group_member.io_limits_disabled) == 0) {
-        throttle_group_restart_tgm(&blk->public.throttle_group_member);
+    if (blk->public.io_limits_disabled++ == 0) {
+        throttle_group_restart_blk(blk);
    }
 }

 static void blk_root_drained_end(BdrvChild *child)
 {
    BlockBackend *blk = child->opaque;
-    assert(blk->quiesce_counter);

-    assert(blk->public.throttle_group_member.io_limits_disabled);
-    atomic_dec(&blk->public.throttle_group_member.io_limits_disabled);
-
-    if (--blk->quiesce_counter == 0) {
-        if (blk->dev_ops && blk->dev_ops->drained_end) {
-            blk->dev_ops->drained_end(blk->dev_opaque);
-        }
-    }
+    assert(blk->public.io_limits_disabled);
+    --blk->public.io_limits_disabled;
 }
--- a/block/bochs.c
+++ b/block/bochs.c
@@ -104,16 +104,7 @@ static int bochs_open(BlockDriverState *bs, QDict *options, int flags,
    struct bochs_header bochs;
    int ret;

-    bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
-                               false, errp);
-    if (!bs->file) {
-        return -EINVAL;
-    }
-
-    ret = bdrv_set_read_only(bs, true, errp); /* no write support yet */
-    if (ret < 0) {
-        return ret;
-    }
+    bs->read_only = true; /* no write support yet */

    ret = bdrv_pread(bs->file, 0, &bochs, sizeof(bochs));
    if (ret < 0) {
@@ -296,7 +287,6 @@ static BlockDriver bdrv_bochs = {
    .instance_size	= sizeof(BDRVBochsState),
    .bdrv_probe		= bochs_probe,
    .bdrv_open		= bochs_open,
-    .bdrv_child_perm     = bdrv_format_default_perms,
    .bdrv_refresh_limits = bochs_refresh_limits,
    .bdrv_co_preadv = bochs_co_preadv,
    .bdrv_close		= bochs_close,
--- a/block/cloop.c
+++ b/block/cloop.c
@@ -66,16 +66,7 @@ static int cloop_open(BlockDriverState *bs, QDict *options, int flags,
    uint32_t offsets_size, max_compressed_block_size = 1, i;
    int ret;

-    bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
-                               false, errp);
-    if (!bs->file) {
-        return -EINVAL;
-    }
-
-    ret = bdrv_set_read_only(bs, true, errp);
-    if (ret < 0) {
-        return ret;
-    }
+    bs->read_only = true;

    /* read header */
    ret = bdrv_pread(bs->file, 128, &s->block_size, 4);
@@ -293,7 +284,6 @@ static BlockDriver bdrv_cloop = {
    .instance_size  = sizeof(BDRVCloopState),
    .bdrv_probe     = cloop_probe,
    .bdrv_open      = cloop_open,
-    .bdrv_child_perm     = bdrv_format_default_perms,
    .bdrv_refresh_limits = cloop_refresh_limits,
    .bdrv_co_preadv = cloop_co_preadv,
    .bdrv_close     = cloop_close,
--- a/block/commit.c
+++ b/block/commit.c
@@ -13,7 +13,6 @@
 */

 #include "qemu/osdep.h"
-#include "qemu/cutils.h"
 #include "trace.h"
 #include "block/block_int.h"
 #include "block/blockjob_int.h"
@@ -37,7 +36,6 @@ typedef struct CommitBlockJob {
    BlockJob common;
    RateLimit limit;
    BlockDriverState *active;
-    BlockDriverState *commit_top_bs;
    BlockBackend *top;
    BlockBackend *base;
    BlockdevOnError on_error;
@@ -47,25 +45,26 @@ typedef struct CommitBlockJob {
 } CommitBlockJob;

 static int coroutine_fn commit_populate(BlockBackend *bs, BlockBackend *base,
-                                        int64_t offset, uint64_t bytes,
+                                        int64_t sector_num, int nb_sectors,
                                        void *buf)
 {
    int ret = 0;
    QEMUIOVector qiov;
    struct iovec iov = {
        .iov_base = buf,
-        .iov_len = bytes,
+        .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
    };

-    assert(bytes < SIZE_MAX);
    qemu_iovec_init_external(&qiov, &iov, 1);

-    ret = blk_co_preadv(bs, offset, qiov.size, &qiov, 0);
+    ret = blk_co_preadv(bs, sector_num * BDRV_SECTOR_SIZE,
+                        qiov.size, &qiov, 0);
    if (ret < 0) {
        return ret;
    }

-    ret = blk_co_pwritev(base, offset, qiov.size, &qiov, 0);
+    ret = blk_co_pwritev(base, sector_num * BDRV_SECTOR_SIZE,
+                         qiov.size, &qiov, 0);
    if (ret < 0) {
        return ret;
    }
@@ -84,29 +83,12 @@ static void commit_complete(BlockJob *job, void *opaque)
    BlockDriverState *active = s->active;
    BlockDriverState *top = blk_bs(s->top);
    BlockDriverState *base = blk_bs(s->base);
-    BlockDriverState *overlay_bs = bdrv_find_overlay(active, s->commit_top_bs);
+    BlockDriverState *overlay_bs = bdrv_find_overlay(active, top);
    int ret = data->ret;
-    bool remove_commit_top_bs = false;
-
-    /* Make sure overlay_bs and top stay around until bdrv_set_backing_hd() */
-    bdrv_ref(top);
-    if (overlay_bs) {
-        bdrv_ref(overlay_bs);
-    }
-
-    /* Remove base node parent that still uses BLK_PERM_WRITE/RESIZE before
-     * the normal backing chain can be restored. */
-    blk_unref(s->base);

    if (!block_job_is_cancelled(&s->common) && ret == 0) {
        /* success */
-        ret = bdrv_drop_intermediate(active, s->commit_top_bs, base,
-                                     s->backing_file_str);
-    } else if (overlay_bs) {
-        /* XXX Can (or should) we somehow keep 'consistent read' blocked even
-         * after the failed/cancelled commit job is gone? If we already wrote
-         * something to base, the intermediate images aren't valid any more. */
-        remove_commit_top_bs = true;
+        ret = bdrv_drop_intermediate(active, top, base, s->backing_file_str);
    }

    /* restore base open flags here if appropriate (e.g., change the base back
@@ -120,41 +102,26 @@ static void commit_complete(BlockJob *job, void *opaque)
    }
    g_free(s->backing_file_str);
    blk_unref(s->top);
-
-    /* If there is more than one reference to the job (e.g. if called from
-     * block_job_finish_sync()), block_job_completed() won't free it and
-     * therefore the blockers on the intermediate nodes remain. This would
-     * cause bdrv_set_backing_hd() to fail. */
-    block_job_remove_all_bdrv(job);
-
+    blk_unref(s->base);
    block_job_completed(&s->common, ret);
    g_free(data);
-
-    /* If bdrv_drop_intermediate() didn't already do that, remove the commit
-     * filter driver from the backing chain. Do this as the final step so that
-     * the 'consistent read' permission can be granted.  */
-    if (remove_commit_top_bs) {
-        bdrv_set_backing_hd(overlay_bs, top, &error_abort);
-    }
-
-    bdrv_unref(overlay_bs);
-    bdrv_unref(top);
 }

 static void coroutine_fn commit_run(void *opaque)
 {
    CommitBlockJob *s = opaque;
    CommitCompleteData *data;
-    int64_t offset;
+    int64_t sector_num, end;
    uint64_t delay_ns = 0;
    int ret = 0;
-    int64_t n = 0; /* bytes */
+    int n = 0;
    void *buf = NULL;
    int bytes_written = 0;
    int64_t base_len;

    ret = s->common.len = blk_getlength(s->top);

+
    if (s->common.len < 0) {
        goto out;
    }
@@ -165,15 +132,16 @@ static void coroutine_fn commit_run(void *opaque)
    }

    if (base_len < s->common.len) {
-        ret = blk_truncate(s->base, s->common.len, PREALLOC_MODE_OFF, NULL);
+        ret = blk_truncate(s->base, s->common.len);
        if (ret) {
            goto out;
        }
    }

+    end = s->common.len >> BDRV_SECTOR_BITS;
    buf = blk_blockalign(s->top, COMMIT_BUFFER_SIZE);

-    for (offset = 0; offset < s->common.len; offset += n) {
+    for (sector_num = 0; sector_num < end; sector_num += n) {
        bool copy;

        /* Note that even when no rate limit is applied we need to yield
@@ -185,12 +153,14 @@ static void coroutine_fn commit_run(void *opaque)
        }
        /* Copy if allocated above the base */
        ret = bdrv_is_allocated_above(blk_bs(s->top), blk_bs(s->base),
-                                      offset, COMMIT_BUFFER_SIZE, &n);
+                                      sector_num,
+                                      COMMIT_BUFFER_SIZE / BDRV_SECTOR_SIZE,
+                                      &n);
        copy = (ret == 1);
-        trace_commit_one_iteration(s, offset, n, ret);
+        trace_commit_one_iteration(s, sector_num, n, ret);
        if (copy) {
-            ret = commit_populate(s->top, s->base, offset, n, buf);
-            bytes_written += n;
+            ret = commit_populate(s->top, s->base, sector_num, n, buf);
+            bytes_written += n * BDRV_SECTOR_SIZE;
        }
        if (ret < 0) {
            BlockErrorAction action =
@@ -203,7 +173,7 @@ static void coroutine_fn commit_run(void *opaque)
            }
        }
        /* Publish progress */
-        s->common.offset += n;
+        s->common.offset += n * BDRV_SECTOR_SIZE;

        if (copy && s->common.speed) {
            delay_ns = ratelimit_calculate_delay(&s->limit, n);
@@ -228,7 +198,7 @@ static void commit_set_speed(BlockJob *job, int64_t speed, Error **errp)
        error_setg(errp, QERR_INVALID_PARAMETER, "speed");
        return;
    }
-    ratelimit_set_speed(&s->limit, speed, SLICE_TIME);
+    ratelimit_set_speed(&s->limit, speed / BDRV_SECTOR_SIZE, SLICE_TIME);
 }

 static const BlockJobDriver commit_job_driver = {
@@ -238,47 +208,10 @@ static const BlockJobDriver commit_job_driver = {
    .start         = commit_run,
 };

-static int coroutine_fn bdrv_commit_top_preadv(BlockDriverState *bs,
-    uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags)
-{
-    return bdrv_co_preadv(bs->backing, offset, bytes, qiov, flags);
-}
-
-static void bdrv_commit_top_refresh_filename(BlockDriverState *bs, QDict *opts)
-{
-    bdrv_refresh_filename(bs->backing->bs);
-    pstrcpy(bs->exact_filename, sizeof(bs->exact_filename),
-            bs->backing->bs->filename);
-}
-
-static void bdrv_commit_top_close(BlockDriverState *bs)
-{
-}
-
-static void bdrv_commit_top_child_perm(BlockDriverState *bs, BdrvChild *c,
-                                       const BdrvChildRole *role,
-                                       uint64_t perm, uint64_t shared,
-                                       uint64_t *nperm, uint64_t *nshared)
-{
-    *nperm = 0;
-    *nshared = BLK_PERM_ALL;
-}
-
-/* Dummy node that provides consistent read to its users without requiring it
- * from its backing file and that allows writes on the backing file chain. */
-static BlockDriver bdrv_commit_top = {
-    .format_name                = "commit_top",
-    .bdrv_co_preadv             = bdrv_commit_top_preadv,
-    .bdrv_co_get_block_status   = bdrv_co_get_block_status_from_backing,
-    .bdrv_refresh_filename      = bdrv_commit_top_refresh_filename,
-    .bdrv_close                 = bdrv_commit_top_close,
-    .bdrv_child_perm            = bdrv_commit_top_child_perm,
-};
-
 void commit_start(const char *job_id, BlockDriverState *bs,
                  BlockDriverState *base, BlockDriverState *top, int64_t speed,
                  BlockdevOnError on_error, const char *backing_file_str,
-                  const char *filter_node_name, Error **errp)
+                  Error **errp)
 {
    CommitBlockJob *s;
    BlockReopenQueue *reopen_queue = NULL;
@@ -286,9 +219,7 @@ void commit_start(const char *job_id, BlockDriverState *bs,
    int orig_base_flags;
    BlockDriverState *iter;
    BlockDriverState *overlay_bs;
-    BlockDriverState *commit_top_bs = NULL;
    Error *local_err = NULL;
-    int ret;

    assert(top != bs);
    if (top == base) {
@@ -303,8 +234,8 @@ void commit_start(const char *job_id, BlockDriverState *bs,
        return;
    }

-    s = block_job_create(job_id, &commit_job_driver, bs, 0, BLK_PERM_ALL,
-                         speed, BLOCK_JOB_DEFAULT, NULL, NULL, errp);
+    s = block_job_create(job_id, &commit_job_driver, bs, speed,
+                         BLOCK_JOB_DEFAULT, NULL, NULL, errp);
    if (!s) {
        return;
    }
@@ -325,87 +256,30 @@ void commit_start(const char *job_id, BlockDriverState *bs,
        bdrv_reopen_multiple(bdrv_get_aio_context(bs), reopen_queue, &local_err);
        if (local_err != NULL) {
            error_propagate(errp, local_err);
-            goto fail;
+            block_job_unref(&s->common);
+            return;
        }
    }

-    /* Insert commit_top block node above top, so we can block consistent read
-     * on the backing chain below it */
-    commit_top_bs = bdrv_new_open_driver(&bdrv_commit_top, filter_node_name, 0,
-                                         errp);
-    if (commit_top_bs == NULL) {
-        goto fail;
-    }
-    if (!filter_node_name) {
-        commit_top_bs->implicit = true;
-    }
-    commit_top_bs->total_sectors = top->total_sectors;
-    bdrv_set_aio_context(commit_top_bs, bdrv_get_aio_context(top));
-
-    bdrv_set_backing_hd(commit_top_bs, top, &local_err);
-    if (local_err) {
-        bdrv_unref(commit_top_bs);
-        commit_top_bs = NULL;
-        error_propagate(errp, local_err);
-        goto fail;
-    }
-    bdrv_set_backing_hd(overlay_bs, commit_top_bs, &local_err);
-    if (local_err) {
-        bdrv_unref(commit_top_bs);
-        commit_top_bs = NULL;
-        error_propagate(errp, local_err);
-        goto fail;
-    }
-
-    s->commit_top_bs = commit_top_bs;
-    bdrv_unref(commit_top_bs);

    /* Block all nodes between top and base, because they will
     * disappear from the chain after this operation. */
    assert(bdrv_chain_contains(top, base));
-    for (iter = top; iter != base; iter = backing_bs(iter)) {
-        /* XXX BLK_PERM_WRITE needs to be allowed so we don't block ourselves
-         * at s->base (if writes are blocked for a node, they are also blocked
-         * for its backing file). The other options would be a second filter
-         * driver above s->base. */
-        ret = block_job_add_bdrv(&s->common, "intermediate node", iter, 0,
-                                 BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE,
-                                 errp);
-        if (ret < 0) {
-            goto fail;
-        }
+    for (iter = top; iter != backing_bs(base); iter = backing_bs(iter)) {
+        block_job_add_bdrv(&s->common, iter);
    }
-
-    ret = block_job_add_bdrv(&s->common, "base", base, 0, BLK_PERM_ALL, errp);
-    if (ret < 0) {
-        goto fail;
-    }
-
    /* overlay_bs must be blocked because it needs to be modified to
-     * update the backing image string. */
-    ret = block_job_add_bdrv(&s->common, "overlay of top", overlay_bs,
-                             BLK_PERM_GRAPH_MOD, BLK_PERM_ALL, errp);
-    if (ret < 0) {
-        goto fail;
+     * update the backing image string, but if it's the root node then
+     * don't block it again */
+    if (bs != overlay_bs) {
+        block_job_add_bdrv(&s->common, overlay_bs);
    }

-    s->base = blk_new(BLK_PERM_CONSISTENT_READ
-                      | BLK_PERM_WRITE
-                      | BLK_PERM_RESIZE,
-                      BLK_PERM_CONSISTENT_READ
-                      | BLK_PERM_GRAPH_MOD
-                      | BLK_PERM_WRITE_UNCHANGED);
-    ret = blk_insert_bs(s->base, base, errp);
-    if (ret < 0) {
-        goto fail;
-    }
+    s->base = blk_new();
+    blk_insert_bs(s->base, base);

-    /* Required permissions are already taken with block_job_add_bdrv() */
-    s->top = blk_new(0, BLK_PERM_ALL);
-    ret = blk_insert_bs(s->top, top, errp);
-    if (ret < 0) {
-        goto fail;
-    }
+    s->top = blk_new();
+    blk_insert_bs(s->top, top);

    s->active = bs;

@@ -418,37 +292,20 @@ void commit_start(const char *job_id, BlockDriverState *bs,

    trace_commit_start(bs, base, top, s);
    block_job_start(&s->common);
-    return;
-
-fail:
-    if (s->base) {
-        blk_unref(s->base);
-    }
-    if (s->top) {
-        blk_unref(s->top);
-    }
-    if (commit_top_bs) {
-        bdrv_set_backing_hd(overlay_bs, top, &error_abort);
-    }
-    block_job_early_fail(&s->common);
 }


-#define COMMIT_BUF_SIZE (2048 * BDRV_SECTOR_SIZE)
+#define COMMIT_BUF_SECTORS 2048

 /* commit COW file into the raw image */
 int bdrv_commit(BlockDriverState *bs)
 {
    BlockBackend *src, *backing;
-    BlockDriverState *backing_file_bs = NULL;
-    BlockDriverState *commit_top_bs = NULL;
    BlockDriver *drv = bs->drv;
-    int64_t offset, length, backing_length;
-    int ro, open_flags;
-    int64_t n;
+    int64_t sector, total_sectors, length, backing_length;
+    int n, ro, open_flags;
    int ret = 0;
    uint8_t *buf = NULL;
-    Error *local_err = NULL;

    if (!drv)
        return -ENOMEDIUM;
@@ -471,34 +328,11 @@ int bdrv_commit(BlockDriverState *bs)
        }
    }

-    src = blk_new(BLK_PERM_CONSISTENT_READ, BLK_PERM_ALL);
-    backing = blk_new(BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL);
+    src = blk_new();
+    blk_insert_bs(src, bs);

-    ret = blk_insert_bs(src, bs, &local_err);
-    if (ret < 0) {
-        error_report_err(local_err);
-        goto ro_cleanup;
-    }
-
-    /* Insert commit_top block node above backing, so we can write to it */
-    backing_file_bs = backing_bs(bs);
-
-    commit_top_bs = bdrv_new_open_driver(&bdrv_commit_top, NULL, BDRV_O_RDWR,
-                                         &local_err);
-    if (commit_top_bs == NULL) {
-        error_report_err(local_err);
-        goto ro_cleanup;
-    }
-    bdrv_set_aio_context(commit_top_bs, bdrv_get_aio_context(backing_file_bs));
-
-    bdrv_set_backing_hd(commit_top_bs, backing_file_bs, &error_abort);
-    bdrv_set_backing_hd(bs, commit_top_bs, &error_abort);
-
-    ret = blk_insert_bs(backing, backing_file_bs, &local_err);
-    if (ret < 0) {
-        error_report_err(local_err);
-        goto ro_cleanup;
-    }
+    backing = blk_new();
+    blk_insert_bs(backing, bs->backing->bs);

    length = blk_getlength(src);
    if (length < 0) {
@@ -516,33 +350,36 @@ int bdrv_commit(BlockDriverState *bs)
     * grow the backing file image if possible.  If not possible,
     * we must return an error */
    if (length > backing_length) {
-        ret = blk_truncate(backing, length, PREALLOC_MODE_OFF, &local_err);
+        ret = blk_truncate(backing, length);
        if (ret < 0) {
-            error_report_err(local_err);
            goto ro_cleanup;
        }
    }

+    total_sectors = length >> BDRV_SECTOR_BITS;
+
    /* blk_try_blockalign() for src will choose an alignment that works for
     * backing as well, so no need to compare the alignment manually. */
-    buf = blk_try_blockalign(src, COMMIT_BUF_SIZE);
+    buf = blk_try_blockalign(src, COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
    if (buf == NULL) {
        ret = -ENOMEM;
        goto ro_cleanup;
    }

-    for (offset = 0; offset < length; offset += n) {
-        ret = bdrv_is_allocated(bs, offset, COMMIT_BUF_SIZE, &n);
+    for (sector = 0; sector < total_sectors; sector += n) {
+        ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n);
        if (ret < 0) {
            goto ro_cleanup;
        }
        if (ret) {
-            ret = blk_pread(src, offset, buf, n);
+            ret = blk_pread(src, sector * BDRV_SECTOR_SIZE, buf,
+                            n * BDRV_SECTOR_SIZE);
            if (ret < 0) {
                goto ro_cleanup;
            }

-            ret = blk_pwrite(backing, offset, buf, n, 0);
+            ret = blk_pwrite(backing, sector * BDRV_SECTOR_SIZE, buf,
+                             n * BDRV_SECTOR_SIZE, 0);
            if (ret < 0) {
                goto ro_cleanup;
            }
@@ -567,12 +404,8 @@ int bdrv_commit(BlockDriverState *bs)
 ro_cleanup:
    qemu_vfree(buf);

-    blk_unref(backing);
-    if (backing_file_bs) {
-        bdrv_set_backing_hd(bs, backing_file_bs, &error_abort);
-    }
-    bdrv_unref(commit_top_bs);
    blk_unref(src);
+    blk_unref(backing);

    if (ro) {
        /* ignoring error return here */
--- a/block/crypto.c
+++ b/block/crypto.c
@@ -24,10 +24,16 @@
 #include "sysemu/block-backend.h"
 #include "crypto/block.h"
 #include "qapi/opts-visitor.h"
-#include "qapi/qobject-input-visitor.h"
 #include "qapi-visit.h"
 #include "qapi/error.h"
-#include "block/crypto.h"
+
+#define BLOCK_CRYPTO_OPT_LUKS_KEY_SECRET "key-secret"
+#define BLOCK_CRYPTO_OPT_LUKS_CIPHER_ALG "cipher-alg"
+#define BLOCK_CRYPTO_OPT_LUKS_CIPHER_MODE "cipher-mode"
+#define BLOCK_CRYPTO_OPT_LUKS_IVGEN_ALG "ivgen-alg"
+#define BLOCK_CRYPTO_OPT_LUKS_IVGEN_HASH_ALG "ivgen-hash-alg"
+#define BLOCK_CRYPTO_OPT_LUKS_HASH_ALG "hash-alg"
+#define BLOCK_CRYPTO_OPT_LUKS_ITER_TIME "iter-time"

 typedef struct BlockCrypto BlockCrypto;

@@ -53,8 +59,8 @@ static ssize_t block_crypto_read_func(QCryptoBlock *block,
                                      size_t offset,
                                      uint8_t *buf,
                                      size_t buflen,
-                                      void *opaque,
-                                      Error **errp)
+                                      Error **errp,
+                                      void *opaque)
 {
    BlockDriverState *bs = opaque;
    ssize_t ret;
@@ -80,8 +86,8 @@ static ssize_t block_crypto_write_func(QCryptoBlock *block,
                                       size_t offset,
                                       const uint8_t *buf,
                                       size_t buflen,
-                                       void *opaque,
-                                       Error **errp)
+                                       Error **errp,
+                                       void *opaque)
 {
    struct BlockCryptoCreateData *data = opaque;
    ssize_t ret;
@@ -97,8 +103,8 @@ static ssize_t block_crypto_write_func(QCryptoBlock *block,

 static ssize_t block_crypto_init_func(QCryptoBlock *block,
                                      size_t headerlen,
-                                      void *opaque,
-                                      Error **errp)
+                                      Error **errp,
+                                      void *opaque)
 {
    struct BlockCryptoCreateData *data = opaque;
    int ret;
@@ -129,7 +135,11 @@ static QemuOptsList block_crypto_runtime_opts_luks = {
    .name = "crypto",
    .head = QTAILQ_HEAD_INITIALIZER(block_crypto_runtime_opts_luks.head),
    .desc = {
-        BLOCK_CRYPTO_OPT_DEF_LUKS_KEY_SECRET(""),
+        {
+            .name = BLOCK_CRYPTO_OPT_LUKS_KEY_SECRET,
+            .type = QEMU_OPT_STRING,
+            .help = "ID of the secret that provides the encryption key",
+        },
        { /* end of list */ }
    },
 };
@@ -144,21 +154,49 @@ static QemuOptsList block_crypto_create_opts_luks = {
            .type = QEMU_OPT_SIZE,
            .help = "Virtual disk size"
        },
-        BLOCK_CRYPTO_OPT_DEF_LUKS_KEY_SECRET(""),
-        BLOCK_CRYPTO_OPT_DEF_LUKS_CIPHER_ALG(""),
-        BLOCK_CRYPTO_OPT_DEF_LUKS_CIPHER_MODE(""),
-        BLOCK_CRYPTO_OPT_DEF_LUKS_IVGEN_ALG(""),
-        BLOCK_CRYPTO_OPT_DEF_LUKS_IVGEN_HASH_ALG(""),
-        BLOCK_CRYPTO_OPT_DEF_LUKS_HASH_ALG(""),
-        BLOCK_CRYPTO_OPT_DEF_LUKS_ITER_TIME(""),
+        {
+            .name = BLOCK_CRYPTO_OPT_LUKS_KEY_SECRET,
+            .type = QEMU_OPT_STRING,
+            .help = "ID of the secret that provides the encryption key",
+        },
+        {
+            .name = BLOCK_CRYPTO_OPT_LUKS_CIPHER_ALG,
+            .type = QEMU_OPT_STRING,
+            .help = "Name of encryption cipher algorithm",
+        },
+        {
+            .name = BLOCK_CRYPTO_OPT_LUKS_CIPHER_MODE,
+            .type = QEMU_OPT_STRING,
+            .help = "Name of encryption cipher mode",
+        },
+        {
+            .name = BLOCK_CRYPTO_OPT_LUKS_IVGEN_ALG,
+            .type = QEMU_OPT_STRING,
+            .help = "Name of IV generator algorithm",
+        },
+        {
+            .name = BLOCK_CRYPTO_OPT_LUKS_IVGEN_HASH_ALG,
+            .type = QEMU_OPT_STRING,
+            .help = "Name of IV generator hash algorithm",
+        },
+        {
+            .name = BLOCK_CRYPTO_OPT_LUKS_HASH_ALG,
+            .type = QEMU_OPT_STRING,
+            .help = "Name of encryption hash algorithm",
+        },
+        {
+            .name = BLOCK_CRYPTO_OPT_LUKS_ITER_TIME,
+            .type = QEMU_OPT_NUMBER,
+            .help = "Time to spend in PBKDF in milliseconds",
+        },
        { /* end of list */ }
    },
 };


-QCryptoBlockOpenOptions *
+static QCryptoBlockOpenOptions *
 block_crypto_open_opts_init(QCryptoBlockFormat format,
-                            QDict *opts,
+                            QemuOpts *opts,
                            Error **errp)
 {
    Visitor *v;
@@ -168,7 +206,7 @@ block_crypto_open_opts_init(QCryptoBlockFormat format,
    ret = g_new0(QCryptoBlockOpenOptions, 1);
    ret->format = format;

-    v = qobject_input_visitor_new_keyval(QOBJECT(opts));
+    v = opts_visitor_new(opts);

    visit_start_struct(v, NULL, NULL, 0, &local_err);
    if (local_err) {
@@ -181,11 +219,6 @@ block_crypto_open_opts_init(QCryptoBlockFormat format,
            v, &ret->u.luks, &local_err);
        break;

-    case Q_CRYPTO_BLOCK_FORMAT_QCOW:
-        visit_type_QCryptoBlockOptionsQCow_members(
-            v, &ret->u.qcow, &local_err);
-        break;
-
    default:
        error_setg(&local_err, "Unsupported block format %d", format);
        break;
@@ -207,9 +240,9 @@ block_crypto_open_opts_init(QCryptoBlockFormat format,
 }


-QCryptoBlockCreateOptions *
+static QCryptoBlockCreateOptions *
 block_crypto_create_opts_init(QCryptoBlockFormat format,
-                              QDict *opts,
+                              QemuOpts *opts,
                              Error **errp)
 {
    Visitor *v;
@@ -219,7 +252,7 @@ block_crypto_create_opts_init(QCryptoBlockFormat format,
    ret = g_new0(QCryptoBlockCreateOptions, 1);
    ret->format = format;

-    v = qobject_input_visitor_new_keyval(QOBJECT(opts));
+    v = opts_visitor_new(opts);

    visit_start_struct(v, NULL, NULL, 0, &local_err);
    if (local_err) {
@@ -232,11 +265,6 @@ block_crypto_create_opts_init(QCryptoBlockFormat format,
            v, &ret->u.luks, &local_err);
        break;

-    case Q_CRYPTO_BLOCK_FORMAT_QCOW:
-        visit_type_QCryptoBlockOptionsQCow_members(
-            v, &ret->u.qcow, &local_err);
-        break;
-
    default:
        error_setg(&local_err, "Unsupported block format %d", format);
        break;
@@ -271,13 +299,6 @@ static int block_crypto_open_generic(QCryptoBlockFormat format,
    int ret = -EINVAL;
    QCryptoBlockOpenOptions *open_opts = NULL;
    unsigned int cflags = 0;
-    QDict *cryptoopts = NULL;
-
-    bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
-                               false, errp);
-    if (!bs->file) {
-        return -EINVAL;
-    }

    opts = qemu_opts_create(opts_spec, NULL, 0, &error_abort);
    qemu_opts_absorb_qdict(opts, options, &local_err);
@@ -286,9 +307,7 @@ static int block_crypto_open_generic(QCryptoBlockFormat format,
        goto cleanup;
    }

-    cryptoopts = qemu_opts_to_qdict(opts, NULL);
-
-    open_opts = block_crypto_open_opts_init(format, cryptoopts, errp);
+    open_opts = block_crypto_open_opts_init(format, opts, errp);
    if (!open_opts) {
        goto cleanup;
    }
@@ -296,7 +315,7 @@ static int block_crypto_open_generic(QCryptoBlockFormat format,
    if (flags & BDRV_O_NO_IO) {
        cflags |= QCRYPTO_BLOCK_OPEN_NO_IO;
    }
-    crypto->block = qcrypto_block_open(open_opts, NULL,
+    crypto->block = qcrypto_block_open(open_opts,
                                       block_crypto_read_func,
                                       bs,
                                       cflags,
@@ -308,10 +327,10 @@ static int block_crypto_open_generic(QCryptoBlockFormat format,
    }

    bs->encrypted = true;
+    bs->valid_key = true;

    ret = 0;
 cleanup:
-    QDECREF(cryptoopts);
    qapi_free_QCryptoBlockOpenOptions(open_opts);
    return ret;
 }
@@ -331,16 +350,13 @@ static int block_crypto_create_generic(QCryptoBlockFormat format,
        .opts = opts,
        .filename = filename,
    };
-    QDict *cryptoopts;

-    cryptoopts = qemu_opts_to_qdict(opts, NULL);
-
-    create_opts = block_crypto_create_opts_init(format, cryptoopts, errp);
+    create_opts = block_crypto_create_opts_init(format, opts, errp);
    if (!create_opts) {
        return -1;
    }

-    crypto = qcrypto_block_create(create_opts, NULL,
+    crypto = qcrypto_block_create(create_opts,
                                  block_crypto_init_func,
                                  block_crypto_write_func,
                                  &data,
@@ -353,15 +369,13 @@ static int block_crypto_create_generic(QCryptoBlockFormat format,

    ret = 0;
 cleanup:
-    QDECREF(cryptoopts);
    qcrypto_block_free(crypto);
    blk_unref(data.blk);
    qapi_free_QCryptoBlockCreateOptions(create_opts);
    return ret;
 }

-static int block_crypto_truncate(BlockDriverState *bs, int64_t offset,
-                                 PreallocMode prealloc, Error **errp)
+static int block_crypto_truncate(BlockDriverState *bs, int64_t offset)
 {
    BlockCrypto *crypto = bs->opaque;
    size_t payload_offset =
@@ -369,7 +383,7 @@ static int block_crypto_truncate(BlockDriverState *bs, int64_t offset,

    offset += payload_offset;

-    return bdrv_truncate(bs->file, offset, prealloc, errp);
+    return bdrv_truncate(bs->file->bs, offset);
 }

 static void block_crypto_close(BlockDriverState *bs)
@@ -608,7 +622,6 @@ BlockDriver bdrv_crypto_luks = {
    .bdrv_probe         = block_crypto_probe_luks,
    .bdrv_open          = block_crypto_open_luks,
    .bdrv_close         = block_crypto_close,
-    .bdrv_child_perm    = bdrv_format_default_perms,
    .bdrv_create        = block_crypto_create_luks,
    .bdrv_truncate      = block_crypto_truncate,
    .create_opts        = &block_crypto_create_opts_luks,
--- a/block/crypto.h
+++ b/block/crypto.h
@@ -1,101 +0,0 @@
-/*
- * QEMU block full disk encryption
- *
- * Copyright (c) 2015-2017 Red Hat, Inc.
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, see <http://www.gnu.org/licenses/>.
- *
- */
-
-#ifndef BLOCK_CRYPTO_H__
-#define BLOCK_CRYPTO_H__
-
-#define BLOCK_CRYPTO_OPT_DEF_KEY_SECRET(prefix, helpstr)                \
-    {                                                                   \
-        .name = prefix BLOCK_CRYPTO_OPT_QCOW_KEY_SECRET,                \
-        .type = QEMU_OPT_STRING,                                        \
-        .help = helpstr,                                                \
-    }
-
-#define BLOCK_CRYPTO_OPT_QCOW_KEY_SECRET "key-secret"
-
-#define BLOCK_CRYPTO_OPT_DEF_QCOW_KEY_SECRET(prefix)                    \
-    BLOCK_CRYPTO_OPT_DEF_KEY_SECRET(prefix,                             \
-        "ID of the secret that provides the AES encryption key")
-
-#define BLOCK_CRYPTO_OPT_LUKS_KEY_SECRET "key-secret"
-#define BLOCK_CRYPTO_OPT_LUKS_CIPHER_ALG "cipher-alg"
-#define BLOCK_CRYPTO_OPT_LUKS_CIPHER_MODE "cipher-mode"
-#define BLOCK_CRYPTO_OPT_LUKS_IVGEN_ALG "ivgen-alg"
-#define BLOCK_CRYPTO_OPT_LUKS_IVGEN_HASH_ALG "ivgen-hash-alg"
-#define BLOCK_CRYPTO_OPT_LUKS_HASH_ALG "hash-alg"
-#define BLOCK_CRYPTO_OPT_LUKS_ITER_TIME "iter-time"
-
-#define BLOCK_CRYPTO_OPT_DEF_LUKS_KEY_SECRET(prefix)                    \
-    BLOCK_CRYPTO_OPT_DEF_KEY_SECRET(prefix,                             \
-        "ID of the secret that provides the keyslot passphrase")
-
-#define BLOCK_CRYPTO_OPT_DEF_LUKS_CIPHER_ALG(prefix)       \
-    {                                                      \
-        .name = prefix BLOCK_CRYPTO_OPT_LUKS_CIPHER_ALG,   \
-        .type = QEMU_OPT_STRING,                           \
-        .help = "Name of encryption cipher algorithm",     \
-    }
-
-#define BLOCK_CRYPTO_OPT_DEF_LUKS_CIPHER_MODE(prefix)      \
-    {                                                      \
-        .name = prefix BLOCK_CRYPTO_OPT_LUKS_CIPHER_MODE,  \
-        .type = QEMU_OPT_STRING,                           \
-        .help = "Name of encryption cipher mode",          \
-    }
-
-#define BLOCK_CRYPTO_OPT_DEF_LUKS_IVGEN_ALG(prefix)     \
-    {                                                   \
-        .name = prefix BLOCK_CRYPTO_OPT_LUKS_IVGEN_ALG, \
-        .type = QEMU_OPT_STRING,                        \
-        .help = "Name of IV generator algorithm",       \
-    }
-
-#define BLOCK_CRYPTO_OPT_DEF_LUKS_IVGEN_HASH_ALG(prefix)        \
-    {                                                           \
-        .name = prefix BLOCK_CRYPTO_OPT_LUKS_IVGEN_HASH_ALG,    \
-        .type = QEMU_OPT_STRING,                                \
-        .help = "Name of IV generator hash algorithm",          \
-    }
-
-#define BLOCK_CRYPTO_OPT_DEF_LUKS_HASH_ALG(prefix)       \
-    {                                                    \
-        .name = prefix BLOCK_CRYPTO_OPT_LUKS_HASH_ALG,   \
-        .type = QEMU_OPT_STRING,                         \
-        .help = "Name of encryption hash algorithm",     \
-    }
-
-#define BLOCK_CRYPTO_OPT_DEF_LUKS_ITER_TIME(prefix)           \
-    {                                                         \
-        .name = prefix BLOCK_CRYPTO_OPT_LUKS_ITER_TIME,       \
-        .type = QEMU_OPT_NUMBER,                              \
-        .help = "Time to spend in PBKDF in milliseconds",     \
-    }
-
-QCryptoBlockCreateOptions *
-block_crypto_create_opts_init(QCryptoBlockFormat format,
-                              QDict *opts,
-                              Error **errp);
-
-QCryptoBlockOpenOptions *
-block_crypto_open_opts_init(QCryptoBlockFormat format,
-                            QDict *opts,
-                            Error **errp);
-
-#endif /* BLOCK_CRYPTO_H__ */
--- a/block/curl.c
+++ b/block/curl.c
@@ -76,12 +76,15 @@ static CURLMcode __curl_multi_socket_action(CURLM *multi_handle,
 #define CURL_TIMEOUT_DEFAULT 5
 #define CURL_TIMEOUT_MAX 10000

+#define FIND_RET_NONE   0
+#define FIND_RET_OK     1
+#define FIND_RET_WAIT   2
+
 #define CURL_BLOCK_OPT_URL       "url"
 #define CURL_BLOCK_OPT_READAHEAD "readahead"
 #define CURL_BLOCK_OPT_SSLVERIFY "sslverify"
 #define CURL_BLOCK_OPT_TIMEOUT "timeout"
 #define CURL_BLOCK_OPT_COOKIE    "cookie"
-#define CURL_BLOCK_OPT_COOKIE_SECRET "cookie-secret"
 #define CURL_BLOCK_OPT_USERNAME "username"
 #define CURL_BLOCK_OPT_PASSWORD_SECRET "password-secret"
 #define CURL_BLOCK_OPT_PROXY_USERNAME "proxy-username"
@@ -90,17 +93,14 @@ static CURLMcode __curl_multi_socket_action(CURLM *multi_handle,
 struct BDRVCURLState;

 typedef struct CURLAIOCB {
-    Coroutine *co;
+    BlockAIOCB common;
    QEMUIOVector *qiov;

-    uint64_t offset;
-    uint64_t bytes;
-    int ret;
+    int64_t sector_num;
+    int nb_sectors;

    size_t start;
    size_t end;
-
-    QSIMPLEQ_ENTRY(CURLAIOCB) next;
 } CURLAIOCB;

 typedef struct CURLSocket {
@@ -115,7 +115,7 @@ typedef struct CURLState
    CURL *curl;
    QLIST_HEAD(, CURLSocket) sockets;
    char *orig_buf;
-    uint64_t buf_start;
+    size_t buf_start;
    size_t buf_off;
    size_t buf_len;
    char range[128];
@@ -126,7 +126,7 @@ typedef struct CURLState
 typedef struct BDRVCURLState {
    CURLM *multi;
    QEMUTimer timer;
-    uint64_t len;
+    size_t len;
    CURLState states[CURL_NUM_STATES];
    char *url;
    size_t readahead_size;
@@ -135,8 +135,6 @@ typedef struct BDRVCURLState {
    char *cookie;
    bool accept_range;
    AioContext *aio_context;
-    QemuMutex mutex;
-    QSIMPLEQ_HEAD(, CURLAIOCB) free_state_waitq;
    char *username;
    char *password;
    char *proxyusername;
@@ -148,7 +146,6 @@ static void curl_multi_do(void *arg);
 static void curl_multi_read(void *arg);

 #ifdef NEED_CURL_TIMER_CALLBACK
-/* Called from curl_multi_do_locked, with s->mutex held.  */
 static int curl_timer_cb(CURLM *multi, long timeout_ms, void *opaque)
 {
    BDRVCURLState *s = opaque;
@@ -165,7 +162,6 @@ static int curl_timer_cb(CURLM *multi, long timeout_ms, void *opaque)
 }
 #endif

-/* Called from curl_multi_do_locked, with s->mutex held.  */
 static int curl_sock_cb(CURL *curl, curl_socket_t fd, int action,
                        void *userp, void *sp)
 {
@@ -196,26 +192,25 @@ static int curl_sock_cb(CURL *curl, curl_socket_t fd, int action,
    switch (action) {
        case CURL_POLL_IN:
            aio_set_fd_handler(s->aio_context, fd, false,
-                               curl_multi_read, NULL, NULL, state);
+                               curl_multi_read, NULL, state);
            break;
        case CURL_POLL_OUT:
            aio_set_fd_handler(s->aio_context, fd, false,
-                               NULL, curl_multi_do, NULL, state);
+                               NULL, curl_multi_do, state);
            break;
        case CURL_POLL_INOUT:
            aio_set_fd_handler(s->aio_context, fd, false,
-                               curl_multi_read, curl_multi_do, NULL, state);
+                               curl_multi_read, curl_multi_do, state);
            break;
        case CURL_POLL_REMOVE:
            aio_set_fd_handler(s->aio_context, fd, false,
-                               NULL, NULL, NULL, NULL);
+                               NULL, NULL, NULL);
            break;
    }

    return 0;
 }

-/* Called from curl_multi_do_locked, with s->mutex held.  */
 static size_t curl_header_cb(void *ptr, size_t size, size_t nmemb, void *opaque)
 {
    BDRVCURLState *s = opaque;
@@ -230,7 +225,6 @@ static size_t curl_header_cb(void *ptr, size_t size, size_t nmemb, void *opaque)
    return realsize;
 }

-/* Called from curl_multi_do_locked, with s->mutex held.  */
 static size_t curl_read_cb(void *ptr, size_t size, size_t nmemb, void *opaque)
 {
    CURLState *s = ((CURLState*)opaque);
@@ -258,7 +252,7 @@ static size_t curl_read_cb(void *ptr, size_t size, size_t nmemb, void *opaque)
            continue;

        if ((s->buf_off >= acb->end)) {
-            size_t request_length = acb->bytes;
+            size_t request_length = acb->nb_sectors * BDRV_SECTOR_SIZE;

            qemu_iovec_from_buf(acb->qiov, 0, s->orig_buf + acb->start,
                                acb->end - acb->start);
@@ -269,11 +263,9 @@ static size_t curl_read_cb(void *ptr, size_t size, size_t nmemb, void *opaque)
                                  request_length - offset);
            }

-            acb->ret = 0;
+            acb->common.cb(acb->common.opaque, 0);
+            qemu_aio_unref(acb);
            s->acb[i] = NULL;
-            qemu_mutex_unlock(&s->s->mutex);
-            aio_co_wake(acb->co);
-            qemu_mutex_lock(&s->s->mutex);
        }
    }

@@ -282,19 +274,18 @@ read_end:
    return size * nmemb;
 }

-/* Called with s->mutex held.  */
-static bool curl_find_buf(BDRVCURLState *s, uint64_t start, uint64_t len,
-                          CURLAIOCB *acb)
+static int curl_find_buf(BDRVCURLState *s, size_t start, size_t len,
+                         CURLAIOCB *acb)
 {
    int i;
-    uint64_t end = start + len;
-    uint64_t clamped_end = MIN(end, s->len);
-    uint64_t clamped_len = clamped_end - start;
+    size_t end = start + len;
+    size_t clamped_end = MIN(end, s->len);
+    size_t clamped_len = clamped_end - start;

    for (i=0; i<CURL_NUM_STATES; i++) {
        CURLState *state = &s->states[i];
-        uint64_t buf_end = (state->buf_start + state->buf_off);
-        uint64_t buf_fend = (state->buf_start + state->buf_len);
+        size_t buf_end = (state->buf_start + state->buf_off);
+        size_t buf_fend = (state->buf_start + state->buf_len);

        if (!state->orig_buf)
            continue;
@@ -313,8 +304,9 @@ static bool curl_find_buf(BDRVCURLState *s, uint64_t start, uint64_t len,
            if (clamped_len < len) {
                qemu_iovec_memset(acb->qiov, clamped_len, 0, len - clamped_len);
            }
-            acb->ret = 0;
-            return true;
+            acb->common.cb(acb->common.opaque, 0);
+
+            return FIND_RET_OK;
        }

        // Wait for unfinished chunks
@@ -332,16 +324,15 @@ static bool curl_find_buf(BDRVCURLState *s, uint64_t start, uint64_t len,
            for (j=0; j<CURL_NUM_ACB; j++) {
                if (!state->acb[j]) {
                    state->acb[j] = acb;
-                    return true;
+                    return FIND_RET_WAIT;
                }
            }
        }
    }

-    return false;
+    return FIND_RET_NONE;
 }

-/* Called with s->mutex held.  */
 static void curl_multi_check_completion(BDRVCURLState *s)
 {
    int msgs_in_queue;
@@ -383,11 +374,9 @@ static void curl_multi_check_completion(BDRVCURLState *s)
                        continue;
                    }

-                    acb->ret = -EIO;
+                    acb->common.cb(acb->common.opaque, -EPROTO);
+                    qemu_aio_unref(acb);
                    state->acb[i] = NULL;
-                    qemu_mutex_unlock(&s->mutex);
-                    aio_co_wake(acb->co);
-                    qemu_mutex_lock(&s->mutex);
                }
            }

@@ -397,9 +386,9 @@ static void curl_multi_check_completion(BDRVCURLState *s)
    }
 }

-/* Called with s->mutex held.  */
-static void curl_multi_do_locked(CURLState *s)
+static void curl_multi_do(void *arg)
 {
+    CURLState *s = (CURLState *)arg;
    CURLSocket *socket, *next_socket;
    int running;
    int r;
@@ -417,23 +406,12 @@ static void curl_multi_do_locked(CURLState *s)
    }
 }

-static void curl_multi_do(void *arg)
-{
-    CURLState *s = (CURLState *)arg;
-
-    qemu_mutex_lock(&s->s->mutex);
-    curl_multi_do_locked(s);
-    qemu_mutex_unlock(&s->s->mutex);
-}
-
 static void curl_multi_read(void *arg)
 {
    CURLState *s = (CURLState *)arg;

-    qemu_mutex_lock(&s->s->mutex);
-    curl_multi_do_locked(s);
+    curl_multi_do(arg);
    curl_multi_check_completion(s->s);
-    qemu_mutex_unlock(&s->s->mutex);
 }

 static void curl_multi_timeout_do(void *arg)
@@ -446,38 +424,40 @@ static void curl_multi_timeout_do(void *arg)
        return;
    }

-    qemu_mutex_lock(&s->mutex);
    curl_multi_socket_action(s->multi, CURL_SOCKET_TIMEOUT, 0, &running);

    curl_multi_check_completion(s);
-    qemu_mutex_unlock(&s->mutex);
 #else
    abort();
 #endif
 }

-/* Called with s->mutex held.  */
-static CURLState *curl_find_state(BDRVCURLState *s)
+static CURLState *curl_init_state(BlockDriverState *bs, BDRVCURLState *s)
 {
    CURLState *state = NULL;
-    int i;
+    int i, j;
+
+    do {
+        for (i=0; i<CURL_NUM_STATES; i++) {
+            for (j=0; j<CURL_NUM_ACB; j++)
+                if (s->states[i].acb[j])
+                    continue;
+            if (s->states[i].in_use)
+                continue;

-    for (i = 0; i < CURL_NUM_STATES; i++) {
-        if (!s->states[i].in_use) {
            state = &s->states[i];
            state->in_use = 1;
            break;
        }
-    }
-    return state;
-}
+        if (!state) {
+            aio_poll(bdrv_get_aio_context(bs), true);
+        }
+    } while(!state);

-static int curl_init_state(BDRVCURLState *s, CURLState *state)
-{
    if (!state->curl) {
        state->curl = curl_easy_init();
        if (!state->curl) {
-            return -EIO;
+            return NULL;
        }
        curl_easy_setopt(state->curl, CURLOPT_URL, s->url);
        curl_easy_setopt(state->curl, CURLOPT_SSL_VERIFYPEER,
@@ -530,18 +510,11 @@ static int curl_init_state(BDRVCURLState *s, CURLState *state)
    QLIST_INIT(&state->sockets);
    state->s = s;

-    return 0;
+    return state;
 }

-/* Called with s->mutex held.  */
 static void curl_clean_state(CURLState *s)
 {
-    CURLAIOCB *next;
-    int j;
-    for (j = 0; j < CURL_NUM_ACB; j++) {
-        assert(!s->acb[j]);
-    }
-
    if (s->s->multi)
        curl_multi_remove_handle(s->s->multi, s->curl);

@@ -553,20 +526,12 @@ static void curl_clean_state(CURLState *s)
    }

    s->in_use = 0;
-
-    next = QSIMPLEQ_FIRST(&s->s->free_state_waitq);
-    if (next) {
-        QSIMPLEQ_REMOVE_HEAD(&s->s->free_state_waitq, next);
-        qemu_mutex_unlock(&s->s->mutex);
-        aio_co_wake(next->co);
-        qemu_mutex_lock(&s->s->mutex);
-    }
 }

 static void curl_parse_filename(const char *filename, QDict *options,
                                Error **errp)
 {
-    qdict_put_str(options, CURL_BLOCK_OPT_URL, filename);
+    qdict_put(options, CURL_BLOCK_OPT_URL, qstring_from_str(filename));
 }

 static void curl_detach_aio_context(BlockDriverState *bs)
@@ -574,7 +539,6 @@ static void curl_detach_aio_context(BlockDriverState *bs)
    BDRVCURLState *s = bs->opaque;
    int i;

-    qemu_mutex_lock(&s->mutex);
    for (i = 0; i < CURL_NUM_STATES; i++) {
        if (s->states[i].in_use) {
            curl_clean_state(&s->states[i]);
@@ -590,7 +554,6 @@ static void curl_detach_aio_context(BlockDriverState *bs)
        curl_multi_cleanup(s->multi);
        s->multi = NULL;
    }
-    qemu_mutex_unlock(&s->mutex);

    timer_del(&s->timer);
 }
@@ -643,11 +606,6 @@ static QemuOptsList runtime_opts = {
            .type = QEMU_OPT_STRING,
            .help = "Pass the cookie or list of cookies with each request"
        },
-        {
-            .name = CURL_BLOCK_OPT_COOKIE_SECRET,
-            .type = QEMU_OPT_STRING,
-            .help = "ID of secret used as cookie passed with each request"
-        },
        {
            .name = CURL_BLOCK_OPT_USERNAME,
            .type = QEMU_OPT_STRING,
@@ -682,10 +640,8 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags,
    Error *local_err = NULL;
    const char *file;
    const char *cookie;
-    const char *cookie_secret;
    double d;
    const char *secretid;
-    const char *protocol_delimiter;

    static int inited = 0;

@@ -694,7 +650,6 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags,
        return -EROFS;
    }

-    qemu_mutex_init(&s->mutex);
    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
    qemu_opts_absorb_qdict(opts, options, &local_err);
    if (local_err) {
@@ -720,22 +675,7 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags,
    s->sslverify = qemu_opt_get_bool(opts, CURL_BLOCK_OPT_SSLVERIFY, true);

    cookie = qemu_opt_get(opts, CURL_BLOCK_OPT_COOKIE);
-    cookie_secret = qemu_opt_get(opts, CURL_BLOCK_OPT_COOKIE_SECRET);
-
-    if (cookie && cookie_secret) {
-        error_setg(errp,
-                   "curl driver cannot handle both cookie and cookie secret");
-        goto out_noclean;
-    }
-
-    if (cookie_secret) {
-        s->cookie = qcrypto_secret_lookup_as_utf8(cookie_secret, errp);
-        if (!s->cookie) {
-            goto out_noclean;
-        }
-    } else {
-        s->cookie = g_strdup(cookie);
-    }
+    s->cookie = g_strdup(cookie);

    file = qemu_opt_get(opts, CURL_BLOCK_OPT_URL);
    if (file == NULL) {
@@ -743,15 +683,6 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags,
        goto out_noclean;
    }

-    if (!strstart(file, bs->drv->protocol_name, &protocol_delimiter) ||
-        !strstart(protocol_delimiter, "://", NULL))
-    {
-        error_setg(errp, "%s curl driver cannot handle the URL '%s' (does not "
-                   "start with '%s://')", bs->drv->protocol_name, file,
-                   bs->drv->protocol_name);
-        goto out_noclean;
-    }
-
    s->username = g_strdup(qemu_opt_get(opts, CURL_BLOCK_OPT_USERNAME));
    secretid = qemu_opt_get(opts, CURL_BLOCK_OPT_PASSWORD_SECRET);

@@ -778,22 +709,14 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags,
    }

    DPRINTF("CURL: Opening %s\n", file);
-    QSIMPLEQ_INIT(&s->free_state_waitq);
    s->aio_context = bdrv_get_aio_context(bs);
    s->url = g_strdup(file);
-    qemu_mutex_lock(&s->mutex);
-    state = curl_find_state(s);
-    qemu_mutex_unlock(&s->mutex);
-    if (!state) {
+    state = curl_init_state(bs, s);
+    if (!state)
        goto out_noclean;
-    }

    // Get file size

-    if (curl_init_state(s, state) < 0) {
-        goto out;
-    }
-
    s->accept_range = false;
    curl_easy_setopt(state->curl, CURLOPT_NOBODY, 1);
    curl_easy_setopt(state->curl, CURLOPT_HEADERFUNCTION,
@@ -821,7 +744,7 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags,
    }
 #endif

-    s->len = d;
+    s->len = (size_t)d;

    if ((!strncasecmp(s->url, "http://", strlen("http://"))
        || !strncasecmp(s->url, "https://", strlen("https://")))
@@ -830,11 +753,9 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags,
                "Server does not support 'range' (byte ranges).");
        goto out;
    }
-    DPRINTF("CURL: Size = %" PRIu64 "\n", s->len);
+    DPRINTF("CURL: Size = %zd\n", s->len);

-    qemu_mutex_lock(&s->mutex);
    curl_clean_state(state);
-    qemu_mutex_unlock(&s->mutex);
    curl_easy_cleanup(state->curl);
    state->curl = NULL;

@@ -848,51 +769,50 @@ out:
    curl_easy_cleanup(state->curl);
    state->curl = NULL;
 out_noclean:
-    qemu_mutex_destroy(&s->mutex);
    g_free(s->cookie);
    g_free(s->url);
    qemu_opts_del(opts);
    return -EINVAL;
 }

-static void curl_setup_preadv(BlockDriverState *bs, CURLAIOCB *acb)
+static const AIOCBInfo curl_aiocb_info = {
+    .aiocb_size         = sizeof(CURLAIOCB),
+};
+
+
+static void curl_readv_bh_cb(void *p)
 {
    CURLState *state;
    int running;

-    BDRVCURLState *s = bs->opaque;
+    CURLAIOCB *acb = p;
+    BDRVCURLState *s = acb->common.bs->opaque;

-    uint64_t start = acb->offset;
-    uint64_t end;
-
-    qemu_mutex_lock(&s->mutex);
+    size_t start = acb->sector_num * BDRV_SECTOR_SIZE;
+    size_t end;

    // In case we have the requested data already (e.g. read-ahead),
    // we can just call the callback and be done.
-    if (curl_find_buf(s, start, acb->bytes, acb)) {
-        goto out;
+    switch (curl_find_buf(s, start, acb->nb_sectors * BDRV_SECTOR_SIZE, acb)) {
+        case FIND_RET_OK:
+            qemu_aio_unref(acb);
+            // fall through
+        case FIND_RET_WAIT:
+            return;
+        default:
+            break;
    }

    // No cache found, so let's start a new request
-    for (;;) {
-        state = curl_find_state(s);
-        if (state) {
-            break;
-        }
-        QSIMPLEQ_INSERT_TAIL(&s->free_state_waitq, acb, next);
-        qemu_mutex_unlock(&s->mutex);
-        qemu_coroutine_yield();
-        qemu_mutex_lock(&s->mutex);
-    }
-
-    if (curl_init_state(s, state) < 0) {
-        curl_clean_state(state);
-        acb->ret = -EIO;
-        goto out;
+    state = curl_init_state(acb->common.bs, s);
+    if (!state) {
+        acb->common.cb(acb->common.opaque, -EIO);
+        qemu_aio_unref(acb);
+        return;
    }

    acb->start = 0;
-    acb->end = MIN(acb->bytes, s->len - start);
+    acb->end = MIN(acb->nb_sectors * BDRV_SECTOR_SIZE, s->len - start);

    state->buf_off = 0;
    g_free(state->orig_buf);
@@ -902,41 +822,37 @@ static void curl_setup_preadv(BlockDriverState *bs, CURLAIOCB *acb)
    state->orig_buf = g_try_malloc(state->buf_len);
    if (state->buf_len && state->orig_buf == NULL) {
        curl_clean_state(state);
-        acb->ret = -ENOMEM;
-        goto out;
+        acb->common.cb(acb->common.opaque, -ENOMEM);
+        qemu_aio_unref(acb);
+        return;
    }
    state->acb[0] = acb;

-    snprintf(state->range, 127, "%" PRIu64 "-%" PRIu64, start, end);
-    DPRINTF("CURL (AIO): Reading %" PRIu64 " at %" PRIu64 " (%s)\n",
-            acb->bytes, start, state->range);
+    snprintf(state->range, 127, "%zd-%zd", start, end);
+    DPRINTF("CURL (AIO): Reading %llu at %zd (%s)\n",
+            (acb->nb_sectors * BDRV_SECTOR_SIZE), start, state->range);
    curl_easy_setopt(state->curl, CURLOPT_RANGE, state->range);

    curl_multi_add_handle(s->multi, state->curl);

    /* Tell curl it needs to kick things off */
    curl_multi_socket_action(s->multi, CURL_SOCKET_TIMEOUT, 0, &running);
-
-out:
-    qemu_mutex_unlock(&s->mutex);
 }

-static int coroutine_fn curl_co_preadv(BlockDriverState *bs,
-        uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags)
+static BlockAIOCB *curl_aio_readv(BlockDriverState *bs,
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+        BlockCompletionFunc *cb, void *opaque)
 {
-    CURLAIOCB acb = {
-        .co = qemu_coroutine_self(),
-        .ret = -EINPROGRESS,
-        .qiov = qiov,
-        .offset = offset,
-        .bytes = bytes
-    };
+    CURLAIOCB *acb;

-    curl_setup_preadv(bs, &acb);
-    while (acb.ret == -EINPROGRESS) {
-        qemu_coroutine_yield();
-    }
-    return acb.ret;
+    acb = qemu_aio_get(&curl_aiocb_info, bs, cb, opaque);
+
+    acb->qiov = qiov;
+    acb->sector_num = sector_num;
+    acb->nb_sectors = nb_sectors;
+
+    aio_bh_schedule_oneshot(bdrv_get_aio_context(bs), curl_readv_bh_cb, acb);
+    return &acb->common;
 }

 static void curl_close(BlockDriverState *bs)
@@ -945,7 +861,6 @@ static void curl_close(BlockDriverState *bs)

    DPRINTF("CURL: Close\n");
    curl_detach_aio_context(bs);
-    qemu_mutex_destroy(&s->mutex);

    g_free(s->cookie);
    g_free(s->url);
@@ -967,7 +882,7 @@ static BlockDriver bdrv_http = {
    .bdrv_close                 = curl_close,
    .bdrv_getlength             = curl_getlength,

-    .bdrv_co_preadv             = curl_co_preadv,
+    .bdrv_aio_readv             = curl_aio_readv,

    .bdrv_detach_aio_context    = curl_detach_aio_context,
    .bdrv_attach_aio_context    = curl_attach_aio_context,
@@ -983,7 +898,7 @@ static BlockDriver bdrv_https = {
    .bdrv_close                 = curl_close,
    .bdrv_getlength             = curl_getlength,

-    .bdrv_co_preadv             = curl_co_preadv,
+    .bdrv_aio_readv             = curl_aio_readv,

    .bdrv_detach_aio_context    = curl_detach_aio_context,
    .bdrv_attach_aio_context    = curl_attach_aio_context,
@@ -999,7 +914,7 @@ static BlockDriver bdrv_ftp = {
    .bdrv_close                 = curl_close,
    .bdrv_getlength             = curl_getlength,

-    .bdrv_co_preadv             = curl_co_preadv,
+    .bdrv_aio_readv             = curl_aio_readv,

    .bdrv_detach_aio_context    = curl_detach_aio_context,
    .bdrv_attach_aio_context    = curl_attach_aio_context,
@@ -1015,7 +930,7 @@ static BlockDriver bdrv_ftps = {
    .bdrv_close                 = curl_close,
    .bdrv_getlength             = curl_getlength,

-    .bdrv_co_preadv             = curl_co_preadv,
+    .bdrv_aio_readv             = curl_aio_readv,

    .bdrv_detach_aio_context    = curl_detach_aio_context,
    .bdrv_attach_aio_context    = curl_attach_aio_context,
--- a/block/dictzip.c
+++ b/block/dictzip.c
@@ -0,0 +1,586 @@
+/*
+ * DictZip Block driver for dictzip enabled gzip files
+ *
+ * Use the "dictzip" tool from the "dictd" package to create gzip files that
+ * contain the extra DictZip headers.
+ *
+ * dictzip(1) is a compression program which creates compressed files in the
+ * gzip format (see RFC 1952). However, unlike gzip(1), dictzip(1) compresses
+ * the file in pieces and stores an index to the pieces in the gzip header.
+ * This allows random access to the file at the granularity of the compressed
+ * pieces (currently about 64kB) while maintaining good compression ratios
+ * (within 5% of the expected ratio for dictionary data).
+ * dictd(8) uses files stored in this format.
+ *
+ * For details on DictZip see http://dict.org/.
+ *
+ * Copyright (c) 2009 Alexander Graf <agraf@suse.de>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qemu-common.h"
+#include "block/block_int.h"
+#include <zlib.h>
+
+// #define DEBUG
+
+#ifdef DEBUG
+#define dprintf(fmt, ...) do { printf("dzip: " fmt, ## __VA_ARGS__); } while (0)
+#else
+#define dprintf(fmt, ...) do { } while (0)
+#endif
+
+#define SECTOR_SIZE 512
+#define Z_STREAM_COUNT 4
+#define CACHE_COUNT 20
+
+/* magic values */
+
+#define GZ_MAGIC1     0x1f
+#define GZ_MAGIC2     0x8b
+#define DZ_MAGIC1      'R'
+#define DZ_MAGIC2      'A'
+
+#define GZ_FEXTRA     0x04      /* Optional field (random access index)    */
+#define GZ_FNAME      0x08      /* Original name                           */
+#define GZ_COMMENT    0x10      /* Zero-terminated, human-readable comment */
+#define GZ_FHCRC      0x02      /* Header CRC16                            */
+
+/* offsets */
+
+#define GZ_ID            0      /* GZ_MAGIC (16bit)                        */
+#define GZ_FLG           3      /* FLaGs (see above)                       */
+#define GZ_XLEN         10      /* eXtra LENgth (16bit)                    */
+#define GZ_SI           12      /* Subfield ID (16bit)                     */
+#define GZ_VERSION      16      /* Version for subfield format             */
+#define GZ_CHUNKSIZE    18      /* Chunk size (16bit)                      */
+#define GZ_CHUNKCNT     20      /* Number of chunks (16bit)                */
+#define GZ_RNDDATA      22      /* Random access data (16bit)              */
+
+#define GZ_99_CHUNKSIZE 18      /* Chunk size (32bit)                      */
+#define GZ_99_CHUNKCNT  22      /* Number of chunks (32bit)                */
+#define GZ_99_FILESIZE  26      /* Size of unpacked file (64bit)           */
+#define GZ_99_RNDDATA   34      /* Random access data (32bit)              */
+
+struct BDRVDictZipState;
+
+typedef struct DictZipAIOCB {
+    BlockAIOCB common;
+    struct BDRVDictZipState *s;
+    QEMUIOVector *qiov;          /* QIOV of the original request */
+    QEMUIOVector *qiov_gz;       /* QIOV of the gz subrequest */
+    QEMUBH *bh;                  /* BH for cache */
+    z_stream *zStream;           /* stream to use for decoding */
+    int zStream_id;              /* stream id of the above pointer */
+    size_t start;                /* offset into the uncompressed file */
+    size_t len;                  /* uncompressed bytes to read */
+    uint8_t *gzipped;            /* the gzipped data */
+    uint8_t *buf;                /* cached result */
+    size_t gz_len;               /* amount of gzip data */
+    size_t gz_start;             /* uncompressed starting point of gzip data */
+    uint64_t offset;             /* offset for "start" into the uncompressed chunk */
+    int chunks_len;              /* amount of uncompressed data in all gzip data */
+} DictZipAIOCB;
+
+typedef struct dict_cache {
+    size_t start;
+    size_t len;
+    uint8_t *buf;
+} DictCache;
+
+typedef struct BDRVDictZipState {
+    BlockDriverState *hd;
+    z_stream zStream[Z_STREAM_COUNT];
+    DictCache cache[CACHE_COUNT];
+    int cache_index;
+    uint8_t  stream_in_use;
+    uint64_t chunk_len;
+    uint32_t chunk_cnt;
+    uint16_t *chunks;
+    uint32_t *chunks32;
+    uint64_t *offsets;
+    int64_t file_len;
+} BDRVDictZipState;
+
+static int start_zStream(z_stream *zStream)
+{
+    zStream->zalloc    = NULL;
+    zStream->zfree     = NULL;
+    zStream->opaque    = NULL;
+    zStream->next_in   = 0;
+    zStream->avail_in  = 0;
+    zStream->next_out  = NULL;
+    zStream->avail_out = 0;
+
+    return inflateInit2( zStream, -15 );
+}
+
+static QemuOptsList runtime_opts = {
+    .name = "dzip",
+    .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
+    .desc = {
+        {
+            .name = "filename",
+            .type = QEMU_OPT_STRING,
+            .help = "URL to the dictzip file",
+        },
+        { /* end of list */ }
+    },
+};
+
+static int dictzip_open(BlockDriverState *bs, QDict *options, int flags, Error **errp)
+{
+    BDRVDictZipState *s = bs->opaque;
+    const char *err = "Unknown (read error?)";
+    uint8_t magic[2];
+    char buf[100];
+    uint8_t header_flags;
+    uint16_t chunk_len16;
+    uint16_t chunk_cnt16;
+    uint32_t chunk_len32;
+    uint16_t header_ver;
+    uint16_t tmp_short;
+    uint64_t offset;
+    int chunks_len;
+    int headerLength = GZ_XLEN - 1;
+    int rnd_offs;
+    int ret;
+    int i;
+    QemuOpts *opts;
+    Error *local_err = NULL;
+    const char *filename;
+
+    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
+    qemu_opts_absorb_qdict(opts, options, &local_err);
+    if (local_err != NULL) {
+        error_propagate(errp, local_err);
+        ret = -EINVAL;
+        goto fail;
+    }
+
+    filename = qemu_opt_get(opts, "filename");
+
+    if (!strncmp(filename, "dzip://", 7))
+        filename += 7;
+    else if (!strncmp(filename, "dzip:", 5))
+        filename += 5;
+
+    s->hd = bdrv_open(filename, NULL, NULL, flags | BDRV_O_PROTOCOL, errp);
+    if (!s->hd) {
+        ret = -EINVAL;
+        qemu_opts_del(opts);
+        return ret;
+    }
+
+    /* initialize zlib streams */
+    for (i = 0; i < Z_STREAM_COUNT; i++) {
+        if (start_zStream( &s->zStream[i] ) != Z_OK) {
+            err = s->zStream[i].msg;
+            goto fail;
+        }
+    }
+
+    /* gzip header */
+    if (bdrv_pread(s->hd->file, GZ_ID, &magic, sizeof(magic)) != sizeof(magic))
+        goto fail;
+
+    if (!((magic[0] == GZ_MAGIC1) && (magic[1] == GZ_MAGIC2))) {
+        err = "No gzip file";
+        goto fail;
+    }
+
+    /* dzip header */
+    if (bdrv_pread(s->hd->file, GZ_FLG, &header_flags, 1) != 1)
+        goto fail;
+
+    if (!(header_flags & GZ_FEXTRA)) {
+        err = "Not a dictzip file (wrong flags)";
+        goto fail;
+    }
+
+    /* extra length */
+    if (bdrv_pread(s->hd->file, GZ_XLEN, &tmp_short, 2) != 2)
+        goto fail;
+
+    headerLength += le16_to_cpu(tmp_short) + 2;
+
+    /* DictZip magic */
+    if (bdrv_pread(s->hd->file, GZ_SI, &magic, 2) != 2)
+        goto fail;
+
+    if (magic[0] != DZ_MAGIC1 || magic[1] != DZ_MAGIC2) {
+        err = "Not a dictzip file (missing extra magic)";
+        goto fail;
+    }
+
+    /* DictZip version */
+    if (bdrv_pread(s->hd->file, GZ_VERSION, &header_ver, 2) != 2)
+        goto fail;
+
+    header_ver = le16_to_cpu(header_ver);
+
+    switch (header_ver) {
+        case 1: /* Normal DictZip */
+            /* number of chunks */
+            if (bdrv_pread(s->hd->file, GZ_CHUNKSIZE, &chunk_len16, 2) != 2)
+                goto fail;
+
+            s->chunk_len = le16_to_cpu(chunk_len16);
+
+            /* chunk count */
+            if (bdrv_pread(s->hd->file, GZ_CHUNKCNT, &chunk_cnt16, 2) != 2)
+                goto fail;
+
+            s->chunk_cnt = le16_to_cpu(chunk_cnt16);
+            chunks_len = sizeof(short) * s->chunk_cnt;
+            rnd_offs = GZ_RNDDATA;
+            break;
+        case 99: /* Special Alex pigz version */
+            /* number of chunks */
+            if (bdrv_pread(s->hd->file, GZ_99_CHUNKSIZE, &chunk_len32, 4) != 4)
+                goto fail;
+
+            dprintf("chunk len [%#x] = %d\n", GZ_99_CHUNKSIZE, chunk_len32);
+            s->chunk_len = le32_to_cpu(chunk_len32);
+
+            /* chunk count */
+            if (bdrv_pread(s->hd->file, GZ_99_CHUNKCNT, &s->chunk_cnt, 4) != 4)
+                goto fail;
+
+            s->chunk_cnt = le32_to_cpu(s->chunk_cnt);
+
+            dprintf("chunk len | count = %"PRId64" | %d\n", s->chunk_len, s->chunk_cnt);
+
+            /* file size */
+            if (bdrv_pread(s->hd->file, GZ_99_FILESIZE, &s->file_len, 8) != 8)
+                goto fail;
+
+            s->file_len = le64_to_cpu(s->file_len);
+            chunks_len = sizeof(int) * s->chunk_cnt;
+            rnd_offs = GZ_99_RNDDATA;
+            break;
+        default:
+            err = "Invalid DictZip version";
+            goto fail;
+    }
+
+    /* random access data */
+    s->chunks = g_malloc(chunks_len);
+    if (header_ver == 99)
+        s->chunks32 = (uint32_t *)s->chunks;
+
+    if (bdrv_pread(s->hd->file, rnd_offs, s->chunks, chunks_len) != chunks_len)
+        goto fail;
+
+    /* orig filename */
+    if (header_flags & GZ_FNAME) {
+        if (bdrv_pread(s->hd->file, headerLength + 1, buf, sizeof(buf)) != sizeof(buf))
+            goto fail;
+
+        buf[sizeof(buf) - 1] = '\0';
+        headerLength += strlen(buf) + 1;
+
+        if (strlen(buf) == sizeof(buf))
+            goto fail;
+
+        dprintf("filename: %s\n", buf);
+    }
+
+    /* comment field */
+    if (header_flags & GZ_COMMENT) {
+        if (bdrv_pread(s->hd->file, headerLength, buf, sizeof(buf)) != sizeof(buf))
+            goto fail;
+
+        buf[sizeof(buf) - 1] = '\0';
+        headerLength += strlen(buf) + 1;
+
+        if (strlen(buf) == sizeof(buf))
+            goto fail;
+
+        dprintf("comment: %s\n", buf);
+    }
+
+    if (header_flags & GZ_FHCRC)
+        headerLength += 2;
+
+    /* uncompressed file length*/
+    if (!s->file_len) {
+        uint32_t file_len;
+
+        if (bdrv_pread(s->hd->file, bdrv_getlength(s->hd) - 4, &file_len, 4) != 4)
+            goto fail;
+
+        s->file_len = le32_to_cpu(file_len);
+    }
+
+    /* compute offsets */
+    s->offsets = g_malloc(sizeof( *s->offsets ) * s->chunk_cnt);
+
+    for (offset = headerLength + 1, i = 0; i < s->chunk_cnt; i++) {
+        s->offsets[i] = offset;
+        switch (header_ver) {
+        case 1:
+            offset += le16_to_cpu(s->chunks[i]);
+            break;
+        case 99:
+            offset += le32_to_cpu(s->chunks32[i]);
+            break;
+        }
+
+        dprintf("chunk %#"PRIx64" - %#"PRIx64" = offset %#"PRIx64" -> %#"PRIx64"\n", i * s->chunk_len, (i+1) * s->chunk_len, s->offsets[i], offset);
+    }
+    qemu_opts_del(opts);
+
+    return 0;
+
+fail:
+    fprintf(stderr, "DictZip: Error opening file: %s\n", err);
+    bdrv_unref(s->hd);
+    if (s->chunks)
+        g_free(s->chunks);
+    qemu_opts_del(opts);
+    return -EINVAL;
+}
+
+/* This callback gets invoked when we have the result in cache already */
+static void dictzip_cache_cb(void *opaque)
+{
+    DictZipAIOCB *acb = (DictZipAIOCB *)opaque;
+
+    qemu_iovec_from_buf(acb->qiov, 0, acb->buf, acb->len);
+    acb->common.cb(acb->common.opaque, 0);
+    qemu_bh_delete(acb->bh);
+    qemu_aio_unref(acb);
+}
+
+/* This callback gets invoked by the underlying block reader when we have
+ * all compressed data. We uncompress in here. */
+static void dictzip_read_cb(void *opaque, int ret)
+{
+    DictZipAIOCB *acb = (DictZipAIOCB *)opaque;
+    struct BDRVDictZipState *s = acb->s;
+    uint8_t *buf;
+    DictCache *cache;
+    int r, i;
+
+    buf = g_malloc(acb->chunks_len);
+
+    /* try to find zlib stream for decoding */
+    do {
+        for (i = 0; i < Z_STREAM_COUNT; i++) {
+            if (!(s->stream_in_use & (1 << i))) {
+                s->stream_in_use |= (1 << i);
+                acb->zStream_id = i;
+                acb->zStream = &s->zStream[i];
+                break;
+            }
+        }
+    } while(!acb->zStream);
+
+    /* sure, we could handle more streams, but this callback should be single
+       threaded and when it's not, we really want to know! */
+    assert(i == 0);
+
+    /* uncompress the chunk */
+    acb->zStream->next_in   = acb->gzipped;
+    acb->zStream->avail_in  = acb->gz_len;
+    acb->zStream->next_out  = buf;
+    acb->zStream->avail_out = acb->chunks_len;
+
+    r = inflate( acb->zStream,  Z_PARTIAL_FLUSH );
+    if ( (r != Z_OK) && (r != Z_STREAM_END) )
+        fprintf(stderr, "Error inflating: [%d] %s\n", r, acb->zStream->msg);
+
+    if ( r == Z_STREAM_END )
+        inflateReset(acb->zStream);
+
+    dprintf("inflating [%d] left: %d | %d bytes\n", r, acb->zStream->avail_in, acb->zStream->avail_out);
+    s->stream_in_use &= ~(1 << acb->zStream_id);
+
+    /* nofity the caller */
+    qemu_iovec_from_buf(acb->qiov, 0, buf + acb->offset, acb->len);
+    acb->common.cb(acb->common.opaque, 0);
+
+    /* fill the cache */
+    cache = &s->cache[s->cache_index];
+    s->cache_index++;
+    if (s->cache_index == CACHE_COUNT)
+        s->cache_index = 0;
+
+    cache->len = 0;
+    if (cache->buf)
+        g_free(cache->buf);
+    cache->start = acb->gz_start;
+    cache->buf = buf;
+    cache->len = acb->chunks_len;
+
+    /* free occupied ressources */
+    g_free(acb->qiov_gz);
+    qemu_aio_unref(acb);
+}
+
+static const AIOCBInfo dictzip_aiocb_info = {
+    .aiocb_size         = sizeof(DictZipAIOCB),
+};
+
+/* This is where we get a request from a caller to read something */
+static BlockAIOCB *dictzip_aio_readv(BlockDriverState *bs,
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+        BlockCompletionFunc *cb, void *opaque)
+{
+    BDRVDictZipState *s = bs->opaque;
+    DictZipAIOCB *acb;
+    QEMUIOVector *qiov_gz;
+    struct iovec *iov;
+    uint8_t *buf;
+    size_t  start = sector_num * SECTOR_SIZE;
+    size_t  len = nb_sectors * SECTOR_SIZE;
+    size_t  end = start + len;
+    size_t  gz_start;
+    size_t  gz_len;
+    int64_t gz_sector_num;
+    int     gz_nb_sectors;
+    int     first_chunk, last_chunk;
+    int     first_offset;
+    int     i;
+
+    acb = qemu_aio_get(&dictzip_aiocb_info, bs, cb, opaque);
+    if (!acb)
+        return NULL;
+
+    /* Search Cache */
+    for (i = 0; i < CACHE_COUNT; i++) {
+        if (!s->cache[i].len)
+            continue;
+
+        if ((start >= s->cache[i].start) &&
+            (end <= (s->cache[i].start + s->cache[i].len))) {
+            acb->buf = s->cache[i].buf + (start - s->cache[i].start);
+            acb->len = len;
+            acb->qiov = qiov;
+            acb->bh = qemu_bh_new(dictzip_cache_cb, acb);
+            qemu_bh_schedule(acb->bh);
+
+            return &acb->common;
+        }
+    }
+
+    /* No cache, so let's decode */
+    /* We need to read these chunks */
+    first_chunk  = start / s->chunk_len;
+    first_offset = start - first_chunk * s->chunk_len;
+    last_chunk   = end / s->chunk_len;
+
+    gz_start = s->offsets[first_chunk];
+    gz_len = 0;
+    for (i = first_chunk; i <= last_chunk; i++) {
+        if (s->chunks32)
+            gz_len += le32_to_cpu(s->chunks32[i]);
+        else
+            gz_len += le16_to_cpu(s->chunks[i]);
+    }
+
+    gz_sector_num = gz_start / SECTOR_SIZE;
+    gz_nb_sectors = (gz_len / SECTOR_SIZE);
+
+    /* account for tail and heads */
+    while ((gz_start + gz_len) > ((gz_sector_num + gz_nb_sectors) * SECTOR_SIZE))
+        gz_nb_sectors++;
+
+    /* Allocate qiov, iov and buf in one chunk so we only need to free qiov */
+    qiov_gz = g_malloc0(sizeof(QEMUIOVector) + sizeof(struct iovec) +
+                           (gz_nb_sectors * SECTOR_SIZE));
+    iov = (struct iovec *)(((char *)qiov_gz) + sizeof(QEMUIOVector));
+    buf = ((uint8_t *)iov) + sizeof(struct iovec *);
+
+    /* Kick off the read by the backing file, so we can start decompressing */
+    iov->iov_base = (void *)buf;
+    iov->iov_len = gz_nb_sectors * 512;
+    qemu_iovec_init_external(qiov_gz, iov, 1);
+
+    dprintf("read %zd - %zd => %zd - %zd\n", start, end, gz_start, gz_start + gz_len);
+
+    acb->s = s;
+    acb->qiov = qiov;
+    acb->qiov_gz = qiov_gz;
+    acb->start = start;
+    acb->len = len;
+    acb->gzipped = buf + (gz_start % SECTOR_SIZE);
+    acb->gz_len = gz_len;
+    acb->gz_start = first_chunk * s->chunk_len;
+    acb->offset = first_offset;
+    acb->chunks_len = (last_chunk - first_chunk + 1) * s->chunk_len;
+
+    return bdrv_aio_readv(s->hd->file, gz_sector_num, qiov_gz, gz_nb_sectors,
+                          dictzip_read_cb, acb);
+}
+
+static void dictzip_close(BlockDriverState *bs)
+{
+    BDRVDictZipState *s = bs->opaque;
+    int i;
+
+    for (i = 0; i < CACHE_COUNT; i++) {
+        if (!s->cache[i].len)
+            continue;
+
+        g_free(s->cache[i].buf);
+    }
+
+    for (i = 0; i < Z_STREAM_COUNT; i++) {
+        inflateEnd(&s->zStream[i]);
+    }
+
+    if (s->chunks)
+        g_free(s->chunks);
+
+    if (s->offsets)
+        g_free(s->offsets);
+
+    dprintf("Close\n");
+}
+
+static int64_t dictzip_getlength(BlockDriverState *bs)
+{
+    BDRVDictZipState *s = bs->opaque;
+    dprintf("getlength -> %ld\n", s->file_len);
+    return s->file_len;
+}
+
+static BlockDriver bdrv_dictzip = {
+    .format_name     = "dzip",
+    .protocol_name   = "dzip",
+
+    .instance_size   = sizeof(BDRVDictZipState),
+    .bdrv_file_open  = dictzip_open,
+    .bdrv_close      = dictzip_close,
+    .bdrv_getlength  = dictzip_getlength,
+
+    .bdrv_aio_readv  = dictzip_aio_readv,
+};
+
+static void dictzip_block_init(void)
+{
+    bdrv_register(&bdrv_dictzip);
+}
+
+block_init(dictzip_block_init);
--- a/block/dirty-bitmap.c
+++ b/block/dirty-bitmap.c
@@ -37,24 +37,13 @@
 *     or enabled. A frozen bitmap can only abdicate() or reclaim().
 */
 struct BdrvDirtyBitmap {
-    QemuMutex *mutex;
    HBitmap *bitmap;            /* Dirty sector bitmap implementation */
    HBitmap *meta;              /* Meta dirty bitmap */
    BdrvDirtyBitmap *successor; /* Anonymous child; implies frozen status */
    char *name;                 /* Optional non-empty unique ID */
    int64_t size;               /* Size of the bitmap (Number of sectors) */
-    bool disabled;              /* Bitmap is disabled. It ignores all writes to
-                                   the device */
+    bool disabled;              /* Bitmap is read-only */
    int active_iterators;       /* How many iterators are active */
-    bool readonly;              /* Bitmap is read-only. This field also
-                                   prevents the respective image from being
-                                   modified (i.e. blocks writes and discards).
-                                   Such operations must fail and both the image
-                                   and this bitmap must remain unchanged while
-                                   this flag is set. */
-    bool autoload;              /* For persistent bitmaps: bitmap must be
-                                   autoloaded on image opening */
-    bool persistent;            /* bitmap must be saved to owner disk image */
    QLIST_ENTRY(BdrvDirtyBitmap) list;
 };

@@ -63,27 +52,6 @@ struct BdrvDirtyBitmapIter {
    BdrvDirtyBitmap *bitmap;
 };

-static inline void bdrv_dirty_bitmaps_lock(BlockDriverState *bs)
-{
-    qemu_mutex_lock(&bs->dirty_bitmap_mutex);
-}
-
-static inline void bdrv_dirty_bitmaps_unlock(BlockDriverState *bs)
-{
-    qemu_mutex_unlock(&bs->dirty_bitmap_mutex);
-}
-
-void bdrv_dirty_bitmap_lock(BdrvDirtyBitmap *bitmap)
-{
-    qemu_mutex_lock(bitmap->mutex);
-}
-
-void bdrv_dirty_bitmap_unlock(BdrvDirtyBitmap *bitmap)
-{
-    qemu_mutex_unlock(bitmap->mutex);
-}
-
-/* Called with BQL or dirty_bitmap lock taken.  */
 BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, const char *name)
 {
    BdrvDirtyBitmap *bm;
@@ -97,17 +65,13 @@ BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, const char *name)
    return NULL;
 }

-/* Called with BQL taken.  */
 void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap)
 {
    assert(!bdrv_dirty_bitmap_frozen(bitmap));
    g_free(bitmap->name);
    bitmap->name = NULL;
-    bitmap->persistent = false;
-    bitmap->autoload = false;
 }

-/* Called with BQL taken.  */
 BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs,
                                          uint32_t granularity,
                                          const char *name,
@@ -132,14 +96,11 @@ BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs,
        return NULL;
    }
    bitmap = g_new0(BdrvDirtyBitmap, 1);
-    bitmap->mutex = &bs->dirty_bitmap_mutex;
    bitmap->bitmap = hbitmap_alloc(bitmap_size, ctz32(sector_granularity));
    bitmap->size = bitmap_size;
    bitmap->name = g_strdup(name);
    bitmap->disabled = false;
-    bdrv_dirty_bitmaps_lock(bs);
    QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list);
-    bdrv_dirty_bitmaps_unlock(bs);
    return bitmap;
 }

@@ -158,24 +119,20 @@ void bdrv_create_meta_dirty_bitmap(BdrvDirtyBitmap *bitmap,
                                   int chunk_size)
 {
    assert(!bitmap->meta);
-    qemu_mutex_lock(bitmap->mutex);
    bitmap->meta = hbitmap_create_meta(bitmap->bitmap,
                                       chunk_size * BITS_PER_BYTE);
-    qemu_mutex_unlock(bitmap->mutex);
 }

 void bdrv_release_meta_dirty_bitmap(BdrvDirtyBitmap *bitmap)
 {
    assert(bitmap->meta);
-    qemu_mutex_lock(bitmap->mutex);
    hbitmap_free_meta(bitmap->bitmap);
    bitmap->meta = NULL;
-    qemu_mutex_unlock(bitmap->mutex);
 }

-int bdrv_dirty_bitmap_get_meta_locked(BlockDriverState *bs,
-                                      BdrvDirtyBitmap *bitmap, int64_t sector,
-                                      int nb_sectors)
+int bdrv_dirty_bitmap_get_meta(BlockDriverState *bs,
+                               BdrvDirtyBitmap *bitmap, int64_t sector,
+                               int nb_sectors)
 {
    uint64_t i;
    int sectors_per_bit = 1 << hbitmap_granularity(bitmap->meta);
@@ -190,26 +147,11 @@ int bdrv_dirty_bitmap_get_meta_locked(BlockDriverState *bs,
    return false;
 }

-int bdrv_dirty_bitmap_get_meta(BlockDriverState *bs,
-                               BdrvDirtyBitmap *bitmap, int64_t sector,
-                               int nb_sectors)
-{
-    bool dirty;
-
-    qemu_mutex_lock(bitmap->mutex);
-    dirty = bdrv_dirty_bitmap_get_meta_locked(bs, bitmap, sector, nb_sectors);
-    qemu_mutex_unlock(bitmap->mutex);
-
-    return dirty;
-}
-
 void bdrv_dirty_bitmap_reset_meta(BlockDriverState *bs,
                                  BdrvDirtyBitmap *bitmap, int64_t sector,
                                  int nb_sectors)
 {
-    qemu_mutex_lock(bitmap->mutex);
    hbitmap_reset(bitmap->meta, sector, nb_sectors);
-    qemu_mutex_unlock(bitmap->mutex);
 }

 int64_t bdrv_dirty_bitmap_size(const BdrvDirtyBitmap *bitmap)
@@ -222,19 +164,16 @@ const char *bdrv_dirty_bitmap_name(const BdrvDirtyBitmap *bitmap)
    return bitmap->name;
 }

-/* Called with BQL taken.  */
 bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap)
 {
    return bitmap->successor;
 }

-/* Called with BQL taken.  */
 bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap)
 {
    return !(bitmap->disabled || bitmap->successor);
 }

-/* Called with BQL taken.  */
 DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap)
 {
    if (bdrv_dirty_bitmap_frozen(bitmap)) {
@@ -249,7 +188,6 @@ DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap)
 /**
 * Create a successor bitmap destined to replace this bitmap after an operation.
 * Requires that the bitmap is not frozen and has no successor.
- * Called with BQL taken.
 */
 int bdrv_dirty_bitmap_create_successor(BlockDriverState *bs,
                                       BdrvDirtyBitmap *bitmap, Error **errp)
@@ -282,7 +220,6 @@ int bdrv_dirty_bitmap_create_successor(BlockDriverState *bs,
 /**
 * For a bitmap with a successor, yield our name to the successor,
 * delete the old bitmap, and return a handle to the new bitmap.
- * Called with BQL taken.
 */
 BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs,
                                            BdrvDirtyBitmap *bitmap,
@@ -301,10 +238,6 @@ BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs,
    bitmap->name = NULL;
    successor->name = name;
    bitmap->successor = NULL;
-    successor->persistent = bitmap->persistent;
-    bitmap->persistent = false;
-    successor->autoload = bitmap->autoload;
-    bitmap->autoload = false;
    bdrv_release_dirty_bitmap(bs, bitmap);

    return successor;
@@ -314,7 +247,6 @@ BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs,
 * In cases of failure where we can no longer safely delete the parent,
 * we may wish to re-join the parent and child/successor.
 * The merged parent will be un-frozen, but not explicitly re-enabled.
- * Called with BQL taken.
 */
 BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs,
                                           BdrvDirtyBitmap *parent,
@@ -339,37 +271,27 @@ BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs,

 /**
 * Truncates _all_ bitmaps attached to a BDS.
- * Called with BQL taken.
 */
 void bdrv_dirty_bitmap_truncate(BlockDriverState *bs)
 {
    BdrvDirtyBitmap *bitmap;
    uint64_t size = bdrv_nb_sectors(bs);

-    bdrv_dirty_bitmaps_lock(bs);
    QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
        assert(!bdrv_dirty_bitmap_frozen(bitmap));
        assert(!bitmap->active_iterators);
        hbitmap_truncate(bitmap->bitmap, size);
        bitmap->size = size;
    }
-    bdrv_dirty_bitmaps_unlock(bs);
 }

-static bool bdrv_dirty_bitmap_has_name(BdrvDirtyBitmap *bitmap)
-{
-    return !!bdrv_dirty_bitmap_name(bitmap);
-}
-
-/* Called with BQL taken.  */
-static void bdrv_do_release_matching_dirty_bitmap(
-    BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
-    bool (*cond)(BdrvDirtyBitmap *bitmap))
+static void bdrv_do_release_matching_dirty_bitmap(BlockDriverState *bs,
+                                                  BdrvDirtyBitmap *bitmap,
+                                                  bool only_named)
 {
    BdrvDirtyBitmap *bm, *next;
-    bdrv_dirty_bitmaps_lock(bs);
    QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) {
-        if ((!bitmap || bm == bitmap) && (!cond || cond(bm))) {
+        if ((!bitmap || bm == bitmap) && (!only_named || bm->name)) {
            assert(!bm->active_iterators);
            assert(!bdrv_dirty_bitmap_frozen(bm));
            assert(!bm->meta);
@@ -379,72 +301,35 @@ static void bdrv_do_release_matching_dirty_bitmap(
            g_free(bm);

            if (bitmap) {
-                goto out;
+                return;
            }
        }
    }
    if (bitmap) {
        abort();
    }
-
-out:
-    bdrv_dirty_bitmaps_unlock(bs);
 }

-/* Called with BQL taken.  */
 void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
 {
-    bdrv_do_release_matching_dirty_bitmap(bs, bitmap, NULL);
+    bdrv_do_release_matching_dirty_bitmap(bs, bitmap, false);
 }

 /**
 * Release all named dirty bitmaps attached to a BDS (for use in bdrv_close()).
 * There must not be any frozen bitmaps attached.
- * This function does not remove persistent bitmaps from the storage.
- * Called with BQL taken.
 */
 void bdrv_release_named_dirty_bitmaps(BlockDriverState *bs)
 {
-    bdrv_do_release_matching_dirty_bitmap(bs, NULL, bdrv_dirty_bitmap_has_name);
+    bdrv_do_release_matching_dirty_bitmap(bs, NULL, true);
 }

-/**
- * Release all persistent dirty bitmaps attached to a BDS (for use in
- * bdrv_inactivate_recurse()).
- * There must not be any frozen bitmaps attached.
- * This function does not remove persistent bitmaps from the storage.
- */
-void bdrv_release_persistent_dirty_bitmaps(BlockDriverState *bs)
-{
-    bdrv_do_release_matching_dirty_bitmap(bs, NULL,
-                                          bdrv_dirty_bitmap_get_persistance);
-}
-
-/**
- * Remove persistent dirty bitmap from the storage if it exists.
- * Absence of bitmap is not an error, because we have the following scenario:
- * BdrvDirtyBitmap can have .persistent = true but not yet saved and have no
- * stored version. For such bitmap bdrv_remove_persistent_dirty_bitmap() should
- * not fail.
- * This function doesn't release corresponding BdrvDirtyBitmap.
- */
-void bdrv_remove_persistent_dirty_bitmap(BlockDriverState *bs,
-                                         const char *name,
-                                         Error **errp)
-{
-    if (bs->drv && bs->drv->bdrv_remove_persistent_dirty_bitmap) {
-        bs->drv->bdrv_remove_persistent_dirty_bitmap(bs, name, errp);
-    }
-}
-
-/* Called with BQL taken.  */
 void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
 {
    assert(!bdrv_dirty_bitmap_frozen(bitmap));
    bitmap->disabled = true;
 }

-/* Called with BQL taken.  */
 void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
 {
    assert(!bdrv_dirty_bitmap_frozen(bitmap));
@@ -457,11 +342,10 @@ BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs)
    BlockDirtyInfoList *list = NULL;
    BlockDirtyInfoList **plist = &list;

-    bdrv_dirty_bitmaps_lock(bs);
    QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
        BlockDirtyInfo *info = g_new0(BlockDirtyInfo, 1);
        BlockDirtyInfoList *entry = g_new0(BlockDirtyInfoList, 1);
-        info->count = bdrv_get_dirty_count(bm) << BDRV_SECTOR_BITS;
+        info->count = bdrv_get_dirty_count(bm);
        info->granularity = bdrv_dirty_bitmap_granularity(bm);
        info->has_name = !!bm->name;
        info->name = g_strdup(bm->name);
@@ -470,14 +354,12 @@ BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs)
        *plist = entry;
        plist = &entry->next;
    }
-    bdrv_dirty_bitmaps_unlock(bs);

    return list;
 }

-/* Called within bdrv_dirty_bitmap_lock..unlock */
-int bdrv_get_dirty_locked(BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
-                          int64_t sector)
+int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
+                   int64_t sector)
 {
    if (bitmap) {
        return hbitmap_get(bitmap->bitmap, sector);
@@ -506,7 +388,7 @@ uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState *bs)
    return granularity;
 }

-uint32_t bdrv_dirty_bitmap_granularity(const BdrvDirtyBitmap *bitmap)
+uint32_t bdrv_dirty_bitmap_granularity(BdrvDirtyBitmap *bitmap)
 {
    return BDRV_SECTOR_SIZE << hbitmap_granularity(bitmap->bitmap);
 }
@@ -550,45 +432,23 @@ int64_t bdrv_dirty_iter_next(BdrvDirtyBitmapIter *iter)
    return hbitmap_iter_next(&iter->hbi);
 }

-/* Called within bdrv_dirty_bitmap_lock..unlock */
-void bdrv_set_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap,
-                                  int64_t cur_sector, int64_t nr_sectors)
-{
-    assert(bdrv_dirty_bitmap_enabled(bitmap));
-    assert(!bdrv_dirty_bitmap_readonly(bitmap));
-    hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
-}
-
 void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap,
                           int64_t cur_sector, int64_t nr_sectors)
-{
-    bdrv_dirty_bitmap_lock(bitmap);
-    bdrv_set_dirty_bitmap_locked(bitmap, cur_sector, nr_sectors);
-    bdrv_dirty_bitmap_unlock(bitmap);
-}
-
-/* Called within bdrv_dirty_bitmap_lock..unlock */
-void bdrv_reset_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap,
-                                    int64_t cur_sector, int64_t nr_sectors)
 {
    assert(bdrv_dirty_bitmap_enabled(bitmap));
-    assert(!bdrv_dirty_bitmap_readonly(bitmap));
-    hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
+    hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
 }

 void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
                             int64_t cur_sector, int64_t nr_sectors)
 {
-    bdrv_dirty_bitmap_lock(bitmap);
-    bdrv_reset_dirty_bitmap_locked(bitmap, cur_sector, nr_sectors);
-    bdrv_dirty_bitmap_unlock(bitmap);
+    assert(bdrv_dirty_bitmap_enabled(bitmap));
+    hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
 }

 void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out)
 {
    assert(bdrv_dirty_bitmap_enabled(bitmap));
-    assert(!bdrv_dirty_bitmap_readonly(bitmap));
-    bdrv_dirty_bitmap_lock(bitmap);
    if (!out) {
        hbitmap_reset_all(bitmap->bitmap);
    } else {
@@ -597,14 +457,12 @@ void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out)
                                       hbitmap_granularity(backup));
        *out = backup;
    }
-    bdrv_dirty_bitmap_unlock(bitmap);
 }

 void bdrv_undo_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *in)
 {
    HBitmap *tmp = bitmap->bitmap;
    assert(bdrv_dirty_bitmap_enabled(bitmap));
-    assert(!bdrv_dirty_bitmap_readonly(bitmap));
    bitmap->bitmap = in;
    hbitmap_free(tmp);
 }
@@ -641,13 +499,6 @@ void bdrv_dirty_bitmap_deserialize_zeroes(BdrvDirtyBitmap *bitmap,
    hbitmap_deserialize_zeroes(bitmap->bitmap, start, count, finish);
 }

-void bdrv_dirty_bitmap_deserialize_ones(BdrvDirtyBitmap *bitmap,
-                                        uint64_t start, uint64_t count,
-                                        bool finish)
-{
-    hbitmap_deserialize_ones(bitmap->bitmap, start, count, finish);
-}
-
 void bdrv_dirty_bitmap_deserialize_finish(BdrvDirtyBitmap *bitmap)
 {
    hbitmap_deserialize_finish(bitmap->bitmap);
@@ -657,20 +508,12 @@ void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
                    int64_t nr_sectors)
 {
    BdrvDirtyBitmap *bitmap;
-
-    if (QLIST_EMPTY(&bs->dirty_bitmaps)) {
-        return;
-    }
-
-    bdrv_dirty_bitmaps_lock(bs);
    QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
        if (!bdrv_dirty_bitmap_enabled(bitmap)) {
            continue;
        }
-        assert(!bdrv_dirty_bitmap_readonly(bitmap));
        hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
    }
-    bdrv_dirty_bitmaps_unlock(bs);
 }

 /**
@@ -690,78 +533,3 @@ int64_t bdrv_get_meta_dirty_count(BdrvDirtyBitmap *bitmap)
 {
    return hbitmap_count(bitmap->meta);
 }
-
-bool bdrv_dirty_bitmap_readonly(const BdrvDirtyBitmap *bitmap)
-{
-    return bitmap->readonly;
-}
-
-/* Called with BQL taken. */
-void bdrv_dirty_bitmap_set_readonly(BdrvDirtyBitmap *bitmap, bool value)
-{
-    qemu_mutex_lock(bitmap->mutex);
-    bitmap->readonly = value;
-    qemu_mutex_unlock(bitmap->mutex);
-}
-
-bool bdrv_has_readonly_bitmaps(BlockDriverState *bs)
-{
-    BdrvDirtyBitmap *bm;
-    QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
-        if (bm->readonly) {
-            return true;
-        }
-    }
-
-    return false;
-}
-
-/* Called with BQL taken. */
-void bdrv_dirty_bitmap_set_autoload(BdrvDirtyBitmap *bitmap, bool autoload)
-{
-    qemu_mutex_lock(bitmap->mutex);
-    bitmap->autoload = autoload;
-    qemu_mutex_unlock(bitmap->mutex);
-}
-
-bool bdrv_dirty_bitmap_get_autoload(const BdrvDirtyBitmap *bitmap)
-{
-    return bitmap->autoload;
-}
-
-/* Called with BQL taken. */
-void bdrv_dirty_bitmap_set_persistance(BdrvDirtyBitmap *bitmap, bool persistent)
-{
-    qemu_mutex_lock(bitmap->mutex);
-    bitmap->persistent = persistent;
-    qemu_mutex_unlock(bitmap->mutex);
-}
-
-bool bdrv_dirty_bitmap_get_persistance(BdrvDirtyBitmap *bitmap)
-{
-    return bitmap->persistent;
-}
-
-bool bdrv_has_changed_persistent_bitmaps(BlockDriverState *bs)
-{
-    BdrvDirtyBitmap *bm;
-    QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
-        if (bm->persistent && !bm->readonly) {
-            return true;
-        }
-    }
-
-    return false;
-}
-
-BdrvDirtyBitmap *bdrv_dirty_bitmap_next(BlockDriverState *bs,
-                                        BdrvDirtyBitmap *bitmap)
-{
-    return bitmap == NULL ? QLIST_FIRST(&bs->dirty_bitmaps) :
-                            QLIST_NEXT(bitmap, list);
-}
-
-char *bdrv_dirty_bitmap_sha256(const BdrvDirtyBitmap *bitmap, Error **errp)
-{
-    return hbitmap_sha256(bitmap->bitmap, errp);
-}
--- a/block/dmg.c
+++ b/block/dmg.c
@@ -111,7 +111,7 @@ static void update_max_chunk_size(BDRVDMGState *s, uint32_t chunk,
        uncompressed_sectors = s->sectorcounts[chunk];
        break;
    case 1: /* copy */
-        uncompressed_sectors = DIV_ROUND_UP(s->lengths[chunk], 512);
+        uncompressed_sectors = (s->lengths[chunk] + 511) / 512;
        break;
    case 2: /* zero */
        /* as the all-zeroes block may be large, it is treated specially: the
@@ -413,18 +413,8 @@ static int dmg_open(BlockDriverState *bs, QDict *options, int flags,
    int64_t offset;
    int ret;

-    bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
-                               false, errp);
-    if (!bs->file) {
-        return -EINVAL;
-    }
-
-    ret = bdrv_set_read_only(bs, true, errp);
-    if (ret < 0) {
-        return ret;
-    }
-
    block_module_load_one("dmg-bz2");
+    bs->read_only = true;

    s->n_chunks = 0;
    s->offsets = s->lengths = s->sectors = s->sectorcounts = NULL;
@@ -701,7 +691,6 @@ static BlockDriver bdrv_dmg = {
    .bdrv_probe     = dmg_probe,
    .bdrv_open      = dmg_open,
    .bdrv_refresh_limits = dmg_refresh_limits,
-    .bdrv_child_perm     = bdrv_format_default_perms,
    .bdrv_co_preadv = dmg_co_preadv,
    .bdrv_close     = dmg_close,
 };
--- a/block/gluster.c
+++ b/block/gluster.c
@@ -151,7 +151,7 @@ static QemuOptsList runtime_type_opts = {
        {
            .name = GLUSTER_OPT_TYPE,
            .type = QEMU_OPT_STRING,
-            .help = "inet|unix",
+            .help = "tcp|unix",
        },
        { /* end of list */ }
    },
@@ -170,14 +170,14 @@ static QemuOptsList runtime_unix_opts = {
    },
 };

-static QemuOptsList runtime_inet_opts = {
-    .name = "gluster_inet",
-    .head = QTAILQ_HEAD_INITIALIZER(runtime_inet_opts.head),
+static QemuOptsList runtime_tcp_opts = {
+    .name = "gluster_tcp",
+    .head = QTAILQ_HEAD_INITIALIZER(runtime_tcp_opts.head),
    .desc = {
        {
            .name = GLUSTER_OPT_TYPE,
            .type = QEMU_OPT_STRING,
-            .help = "inet|unix",
+            .help = "tcp|unix",
        },
        {
            .name = GLUSTER_OPT_HOST,
@@ -320,7 +320,7 @@ static int parse_volume_options(BlockdevOptionsGluster *gconf, char *path)
 static int qemu_gluster_parse_uri(BlockdevOptionsGluster *gconf,
                                  const char *filename)
 {
-    SocketAddress *gsconf;
+    GlusterServer *gsconf;
    URI *uri;
    QueryParams *qp = NULL;
    bool is_unix = false;
@@ -331,20 +331,21 @@ static int qemu_gluster_parse_uri(BlockdevOptionsGluster *gconf,
        return -EINVAL;
    }

-    gconf->server = g_new0(SocketAddressList, 1);
-    gconf->server->value = gsconf = g_new0(SocketAddress, 1);
+    gconf->server = g_new0(GlusterServerList, 1);
+    gconf->server->value = gsconf = g_new0(GlusterServer, 1);

    /* transport */
    if (!uri->scheme || !strcmp(uri->scheme, "gluster")) {
-        gsconf->type = SOCKET_ADDRESS_TYPE_INET;
+        gsconf->type = GLUSTER_TRANSPORT_TCP;
    } else if (!strcmp(uri->scheme, "gluster+tcp")) {
-        gsconf->type = SOCKET_ADDRESS_TYPE_INET;
+        gsconf->type = GLUSTER_TRANSPORT_TCP;
    } else if (!strcmp(uri->scheme, "gluster+unix")) {
-        gsconf->type = SOCKET_ADDRESS_TYPE_UNIX;
+        gsconf->type = GLUSTER_TRANSPORT_UNIX;
        is_unix = true;
    } else if (!strcmp(uri->scheme, "gluster+rdma")) {
-        gsconf->type = SOCKET_ADDRESS_TYPE_INET;
-        warn_report("rdma feature is not supported, falling back to tcp");
+        gsconf->type = GLUSTER_TRANSPORT_TCP;
+        error_report("Warning: rdma feature is not supported, falling "
+                     "back to tcp");
    } else {
        ret = -EINVAL;
        goto out;
@@ -372,11 +373,11 @@ static int qemu_gluster_parse_uri(BlockdevOptionsGluster *gconf,
        }
        gsconf->u.q_unix.path = g_strdup(qp->p[0].value);
    } else {
-        gsconf->u.inet.host = g_strdup(uri->server ? uri->server : "localhost");
+        gsconf->u.tcp.host = g_strdup(uri->server ? uri->server : "localhost");
        if (uri->port) {
-            gsconf->u.inet.port = g_strdup_printf("%d", uri->port);
+            gsconf->u.tcp.port = g_strdup_printf("%d", uri->port);
        } else {
-            gsconf->u.inet.port = g_strdup_printf("%d", GLUSTER_DEFAULT_PORT);
+            gsconf->u.tcp.port = g_strdup_printf("%d", GLUSTER_DEFAULT_PORT);
        }
    }

@@ -394,7 +395,7 @@ static struct glfs *qemu_gluster_glfs_init(BlockdevOptionsGluster *gconf,
    struct glfs *glfs;
    int ret;
    int old_errno;
-    SocketAddressList *server;
+    GlusterServerList *server;
    unsigned long long port;

    glfs = glfs_find_preopened(gconf->volume);
@@ -410,27 +411,22 @@ static struct glfs *qemu_gluster_glfs_init(BlockdevOptionsGluster *gconf,
    glfs_set_preopened(gconf->volume, glfs);

    for (server = gconf->server; server; server = server->next) {
-        switch (server->value->type) {
-        case SOCKET_ADDRESS_TYPE_UNIX:
-            ret = glfs_set_volfile_server(glfs, "unix",
+        if (server->value->type  == GLUSTER_TRANSPORT_UNIX) {
+            ret = glfs_set_volfile_server(glfs,
+                                   GlusterTransport_lookup[server->value->type],
                                   server->value->u.q_unix.path, 0);
-            break;
-        case SOCKET_ADDRESS_TYPE_INET:
-            if (parse_uint_full(server->value->u.inet.port, &port, 10) < 0 ||
+        } else {
+            if (parse_uint_full(server->value->u.tcp.port, &port, 10) < 0 ||
                port > 65535) {
                error_setg(errp, "'%s' is not a valid port number",
-                           server->value->u.inet.port);
+                           server->value->u.tcp.port);
                errno = EINVAL;
                goto out;
            }
-            ret = glfs_set_volfile_server(glfs, "tcp",
-                                   server->value->u.inet.host,
+            ret = glfs_set_volfile_server(glfs,
+                                   GlusterTransport_lookup[server->value->type],
+                                   server->value->u.tcp.host,
                                   (int)port);
-            break;
-        case SOCKET_ADDRESS_TYPE_VSOCK:
-        case SOCKET_ADDRESS_TYPE_FD:
-        default:
-            abort();
        }

        if (ret < 0) {
@@ -448,13 +444,13 @@ static struct glfs *qemu_gluster_glfs_init(BlockdevOptionsGluster *gconf,
        error_setg(errp, "Gluster connection for volume %s, path %s failed"
                         " to connect", gconf->volume, gconf->path);
        for (server = gconf->server; server; server = server->next) {
-            if (server->value->type  == SOCKET_ADDRESS_TYPE_UNIX) {
+            if (server->value->type  == GLUSTER_TRANSPORT_UNIX) {
                error_append_hint(errp, "hint: failed on socket %s ",
                                  server->value->u.q_unix.path);
            } else {
                error_append_hint(errp, "hint: failed on host %s and port %s ",
-                                  server->value->u.inet.host,
-                                  server->value->u.inet.port);
+                                  server->value->u.tcp.host,
+                                  server->value->u.tcp.port);
            }
        }

@@ -478,6 +474,23 @@ out:
    return NULL;
 }

+static int qapi_enum_parse(const char *opt)
+{
+    int i;
+
+    if (!opt) {
+        return GLUSTER_TRANSPORT__MAX;
+    }
+
+    for (i = 0; i < GLUSTER_TRANSPORT__MAX; i++) {
+        if (!strcmp(opt, GlusterTransport_lookup[i])) {
+            return i;
+        }
+    }
+
+    return i;
+}
+
 /*
 * Convert the json formatted command line into qapi.
 */
@@ -485,13 +498,14 @@ static int qemu_gluster_parse_json(BlockdevOptionsGluster *gconf,
                                  QDict *options, Error **errp)
 {
    QemuOpts *opts;
-    SocketAddress *gsconf = NULL;
-    SocketAddressList *curr = NULL;
+    GlusterServer *gsconf;
+    GlusterServerList *curr = NULL;
    QDict *backing_options = NULL;
    Error *local_err = NULL;
    char *str = NULL;
    const char *ptr;
-    int i, type, num_servers;
+    size_t num_servers;
+    int i;

    /* create opts info from runtime_json_opts list */
    opts = qemu_opts_create(&runtime_json_opts, NULL, 0, &error_abort);
@@ -533,31 +547,25 @@ static int qemu_gluster_parse_json(BlockdevOptionsGluster *gconf,
        }

        ptr = qemu_opt_get(opts, GLUSTER_OPT_TYPE);
+        gsconf = g_new0(GlusterServer, 1);
+        gsconf->type = qapi_enum_parse(ptr);
        if (!ptr) {
            error_setg(&local_err, QERR_MISSING_PARAMETER, GLUSTER_OPT_TYPE);
            error_append_hint(&local_err, GERR_INDEX_HINT, i);
            goto out;

        }
-        gsconf = g_new0(SocketAddress, 1);
-        if (!strcmp(ptr, "tcp")) {
-            ptr = "inet";       /* accept legacy "tcp" */
-        }
-        type = qapi_enum_parse(&SocketAddressType_lookup, ptr, -1, NULL);
-        if (type != SOCKET_ADDRESS_TYPE_INET
-            && type != SOCKET_ADDRESS_TYPE_UNIX) {
-            error_setg(&local_err,
-                       "Parameter '%s' may be 'inet' or 'unix'",
-                       GLUSTER_OPT_TYPE);
+        if (gsconf->type == GLUSTER_TRANSPORT__MAX) {
+            error_setg(&local_err, QERR_INVALID_PARAMETER_VALUE,
+                       GLUSTER_OPT_TYPE, "tcp or unix");
            error_append_hint(&local_err, GERR_INDEX_HINT, i);
            goto out;
        }
-        gsconf->type = type;
        qemu_opts_del(opts);

-        if (gsconf->type == SOCKET_ADDRESS_TYPE_INET) {
-            /* create opts info from runtime_inet_opts list */
-            opts = qemu_opts_create(&runtime_inet_opts, NULL, 0, &error_abort);
+        if (gsconf->type == GLUSTER_TRANSPORT_TCP) {
+            /* create opts info from runtime_tcp_opts list */
+            opts = qemu_opts_create(&runtime_tcp_opts, NULL, 0, &error_abort);
            qemu_opts_absorb_qdict(opts, backing_options, &local_err);
            if (local_err) {
                goto out;
@@ -570,7 +578,7 @@ static int qemu_gluster_parse_json(BlockdevOptionsGluster *gconf,
                error_append_hint(&local_err, GERR_INDEX_HINT, i);
                goto out;
            }
-            gsconf->u.inet.host = g_strdup(ptr);
+            gsconf->u.tcp.host = g_strdup(ptr);
            ptr = qemu_opt_get(opts, GLUSTER_OPT_PORT);
            if (!ptr) {
                error_setg(&local_err, QERR_MISSING_PARAMETER,
@@ -578,28 +586,28 @@ static int qemu_gluster_parse_json(BlockdevOptionsGluster *gconf,
                error_append_hint(&local_err, GERR_INDEX_HINT, i);
                goto out;
            }
-            gsconf->u.inet.port = g_strdup(ptr);
+            gsconf->u.tcp.port = g_strdup(ptr);

            /* defend for unsupported fields in InetSocketAddress,
             * i.e. @ipv4, @ipv6  and @to
             */
            ptr = qemu_opt_get(opts, GLUSTER_OPT_TO);
            if (ptr) {
-                gsconf->u.inet.has_to = true;
+                gsconf->u.tcp.has_to = true;
            }
            ptr = qemu_opt_get(opts, GLUSTER_OPT_IPV4);
            if (ptr) {
-                gsconf->u.inet.has_ipv4 = true;
+                gsconf->u.tcp.has_ipv4 = true;
            }
            ptr = qemu_opt_get(opts, GLUSTER_OPT_IPV6);
            if (ptr) {
-                gsconf->u.inet.has_ipv6 = true;
+                gsconf->u.tcp.has_ipv6 = true;
            }
-            if (gsconf->u.inet.has_to) {
+            if (gsconf->u.tcp.has_to) {
                error_setg(&local_err, "Parameter 'to' not supported");
                goto out;
            }
-            if (gsconf->u.inet.has_ipv4 || gsconf->u.inet.has_ipv6) {
+            if (gsconf->u.tcp.has_ipv4 || gsconf->u.tcp.has_ipv6) {
                error_setg(&local_err, "Parameters 'ipv4/ipv6' not supported");
                goto out;
            }
@@ -624,18 +632,16 @@ static int qemu_gluster_parse_json(BlockdevOptionsGluster *gconf,
        }

        if (gconf->server == NULL) {
-            gconf->server = g_new0(SocketAddressList, 1);
+            gconf->server = g_new0(GlusterServerList, 1);
            gconf->server->value = gsconf;
            curr = gconf->server;
        } else {
-            curr->next = g_new0(SocketAddressList, 1);
+            curr->next = g_new0(GlusterServerList, 1);
            curr->next->value = gsconf;
            curr = curr->next;
        }
-        gsconf = NULL;

-        QDECREF(backing_options);
-        backing_options = NULL;
+        qdict_del(backing_options, str);
        g_free(str);
        str = NULL;
    }
@@ -644,10 +650,11 @@ static int qemu_gluster_parse_json(BlockdevOptionsGluster *gconf,

 out:
    error_propagate(errp, local_err);
-    qapi_free_SocketAddress(gsconf);
    qemu_opts_del(opts);
-    g_free(str);
-    QDECREF(backing_options);
+    if (str) {
+        qdict_del(backing_options, str);
+        g_free(str);
+    }
    errno = EINVAL;
    return -errno;
 }
@@ -676,7 +683,7 @@ static struct glfs *qemu_gluster_init(BlockdevOptionsGluster *gconf,
                             "file.volume=testvol,file.path=/path/a.qcow2"
                             "[,file.debug=9]"
                             "[,file.logfile=/path/filename.log],"
-                             "file.server.0.type=inet,"
+                             "file.server.0.type=tcp,"
                             "file.server.0.host=1.2.3.4,"
                             "file.server.0.port=24007,"
                             "file.server.1.transport=unix,"
@@ -691,6 +698,13 @@ static struct glfs *qemu_gluster_init(BlockdevOptionsGluster *gconf,
    return qemu_gluster_glfs_init(gconf, errp);
 }

+static void qemu_gluster_complete_aio(void *opaque)
+{
+    GlusterAIOCB *acb = (GlusterAIOCB *)opaque;
+
+    qemu_coroutine_enter(acb->coroutine);
+}
+
 /*
 * AIO callback routine called from GlusterFS thread.
 */
@@ -706,7 +720,7 @@ static void gluster_finish_aiocb(struct glfs_fd *fd, ssize_t ret, void *arg)
        acb->ret = -EIO; /* Partial read/write - fail it */
    }

-    aio_co_schedule(acb->aio_context, acb->coroutine);
+    aio_bh_schedule_oneshot(acb->aio_context, qemu_gluster_complete_aio, acb);
 }

 static void qemu_gluster_parse_flags(int bdrv_flags, int *open_flags)
@@ -960,6 +974,29 @@ static coroutine_fn int qemu_gluster_co_pwrite_zeroes(BlockDriverState *bs,
    qemu_coroutine_yield();
    return acb.ret;
 }
+
+static inline bool gluster_supports_zerofill(void)
+{
+    return 1;
+}
+
+static inline int qemu_gluster_zerofill(struct glfs_fd *fd, int64_t offset,
+                                        int64_t size)
+{
+    return glfs_zerofill(fd, offset, size);
+}
+
+#else
+static inline bool gluster_supports_zerofill(void)
+{
+    return 0;
+}
+
+static inline int qemu_gluster_zerofill(struct glfs_fd *fd, int64_t offset,
+                                        int64_t size)
+{
+    return 0;
+}
 #endif

 static int qemu_gluster_create(const char *filename,
@@ -969,10 +1006,9 @@ static int qemu_gluster_create(const char *filename,
    struct glfs *glfs;
    struct glfs_fd *fd;
    int ret = 0;
-    PreallocMode prealloc;
+    int prealloc = 0;
    int64_t total_size = 0;
    char *tmp = NULL;
-    Error *local_err = NULL;

    gconf = g_new0(BlockdevOptionsGluster, 1);
    gconf->debug = qemu_opt_get_number_del(opts, GLUSTER_OPT_DEBUG,
@@ -1000,11 +1036,13 @@ static int qemu_gluster_create(const char *filename,
                          BDRV_SECTOR_SIZE);

    tmp = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC);
-    prealloc = qapi_enum_parse(&PreallocMode_lookup, tmp, PREALLOC_MODE_OFF,
-                               &local_err);
-    g_free(tmp);
-    if (local_err) {
-        error_propagate(errp, local_err);
+    if (!tmp || !strcmp(tmp, "off")) {
+        prealloc = 0;
+    } else if (!strcmp(tmp, "full") && gluster_supports_zerofill()) {
+        prealloc = 1;
+    } else {
+        error_setg(errp, "Invalid preallocation mode: '%s'"
+                         " or GlusterFS doesn't support zerofill API", tmp);
        ret = -EINVAL;
        goto out;
    }
@@ -1013,48 +1051,21 @@ static int qemu_gluster_create(const char *filename,
                    O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, S_IRUSR | S_IWUSR);
    if (!fd) {
        ret = -errno;
-        goto out;
-    }
-
-    switch (prealloc) {
-#ifdef CONFIG_GLUSTERFS_FALLOCATE
-    case PREALLOC_MODE_FALLOC:
-        if (glfs_fallocate(fd, 0, 0, total_size)) {
-            error_setg(errp, "Could not preallocate data for the new file");
-            ret = -errno;
-        }
-        break;
-#endif /* CONFIG_GLUSTERFS_FALLOCATE */
-#ifdef CONFIG_GLUSTERFS_ZEROFILL
-    case PREALLOC_MODE_FULL:
+    } else {
        if (!glfs_ftruncate(fd, total_size)) {
-            if (glfs_zerofill(fd, 0, total_size)) {
-                error_setg(errp, "Could not zerofill the new file");
+            if (prealloc && qemu_gluster_zerofill(fd, 0, total_size)) {
                ret = -errno;
            }
        } else {
-            error_setg(errp, "Could not resize file");
            ret = -errno;
        }
-        break;
-#endif /* CONFIG_GLUSTERFS_ZEROFILL */
-    case PREALLOC_MODE_OFF:
-        if (glfs_ftruncate(fd, total_size) != 0) {
-            ret = -errno;
-            error_setg(errp, "Could not resize file");
-        }
-        break;
-    default:
-        ret = -EINVAL;
-        error_setg(errp, "Unsupported preallocation mode: %s",
-                   PreallocMode_str(prealloc));
-        break;
-    }

-    if (glfs_close(fd) != 0) {
-        ret = -errno;
+        if (glfs_close(fd) != 0) {
+            ret = -errno;
+        }
    }
 out:
+    g_free(tmp);
    qapi_free_BlockdevOptionsGluster(gconf);
    glfs_clear_preopened(glfs);
    return ret;
@@ -1091,23 +1102,14 @@ static coroutine_fn int qemu_gluster_co_rw(BlockDriverState *bs,
    return acb.ret;
 }

-static int qemu_gluster_truncate(BlockDriverState *bs, int64_t offset,
-                                 PreallocMode prealloc, Error **errp)
+static int qemu_gluster_truncate(BlockDriverState *bs, int64_t offset)
 {
    int ret;
    BDRVGlusterState *s = bs->opaque;

-    if (prealloc != PREALLOC_MODE_OFF) {
-        error_setg(errp, "Unsupported preallocation mode '%s'",
-                   PreallocMode_str(prealloc));
-        return -ENOTSUP;
-    }
-
    ret = glfs_ftruncate(s->fd, offset);
    if (ret < 0) {
-        ret = -errno;
-        error_setg_errno(errp, -ret, "Failed to truncate file");
-        return ret;
+        return -errno;
    }

    return 0;
@@ -1251,7 +1253,7 @@ static int qemu_gluster_has_zero_init(BlockDriverState *bs)
 * If @start is in a trailing hole or beyond EOF, return -ENXIO.
 * If we can't find out, return a negative errno other than -ENXIO.
 *
- * (Shamefully copied from file-posix.c, only miniscule adaptions.)
+ * (Shamefully copied from raw-posix.c, only miniscule adaptions.)
 */
 static int find_allocation(BlockDriverState *bs, off_t start,
                           off_t *data, off_t *hole)
@@ -1280,14 +1282,7 @@ static int find_allocation(BlockDriverState *bs, off_t start,
    if (offs < 0) {
        return -errno;          /* D3 or D4 */
    }
-
-    if (offs < start) {
-        /* This is not a valid return by lseek().  We are safe to just return
-         * -EIO in this case, and we'll treat it like D4. Unfortunately some
-         *  versions of gluster server will return offs < start, so an assert
-         *  here will unnecessarily abort QEMU. */
-        return -EIO;
-    }
+    assert(offs >= start);

    if (offs > start) {
        /* D2: in hole, next data at offs */
@@ -1319,14 +1314,7 @@ static int find_allocation(BlockDriverState *bs, off_t start,
    if (offs < 0) {
        return -errno;          /* D1 and (H3 or H4) */
    }
-
-    if (offs < start) {
-        /* This is not a valid return by lseek().  We are safe to just return
-         * -EIO in this case, and we'll treat it like H4. Unfortunately some
-         *  versions of gluster server will return offs < start, so an assert
-         *  here will unnecessarily abort QEMU. */
-        return -EIO;
-    }
+    assert(offs >= start);

    if (offs > start) {
        /*
@@ -1361,7 +1349,7 @@ exit:
 * 'nb_sectors' is the max value 'pnum' should be set to.  If nb_sectors goes
 * beyond the end of the disk image it will be clamped.
 *
- * (Based on raw_co_get_block_status() from file-posix.c.)
+ * (Based on raw_co_get_block_status() from raw-posix.c.)
 */
 static int64_t coroutine_fn qemu_gluster_co_get_block_status(
        BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum,
--- a/block/io.c
+++ b/block/io.c
--- a/block/iscsi-opts.c
+++ b/block/iscsi-opts.c
@@ -1,69 +0,0 @@
-/*
- * QEMU Block driver for iSCSI images (static options)
- *
- * Copyright (c) 2017 Peter Lieven <pl@kamp.de>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-#include "qemu/osdep.h"
-#include "qemu-common.h"
-#include "qemu/config-file.h"
-
-static QemuOptsList qemu_iscsi_opts = {
-    .name = "iscsi",
-    .head = QTAILQ_HEAD_INITIALIZER(qemu_iscsi_opts.head),
-    .desc = {
-        {
-            .name = "user",
-            .type = QEMU_OPT_STRING,
-            .help = "username for CHAP authentication to target",
-        },{
-            .name = "password",
-            .type = QEMU_OPT_STRING,
-            .help = "password for CHAP authentication to target",
-        },{
-            .name = "password-secret",
-            .type = QEMU_OPT_STRING,
-            .help = "ID of the secret providing password for CHAP "
-                    "authentication to target",
-        },{
-            .name = "header-digest",
-            .type = QEMU_OPT_STRING,
-            .help = "HeaderDigest setting. "
-                    "{CRC32C|CRC32C-NONE|NONE-CRC32C|NONE}",
-        },{
-            .name = "initiator-name",
-            .type = QEMU_OPT_STRING,
-            .help = "Initiator iqn name to use when connecting",
-        },{
-            .name = "timeout",
-            .type = QEMU_OPT_NUMBER,
-            .help = "Request timeout in seconds (default 0 = no timeout)",
-        },
-        { /* end of list */ }
-    },
-};
-
-static void iscsi_block_opts_init(void)
-{
-    qemu_add_opts(&qemu_iscsi_opts);
-}
-
-block_init(iscsi_block_opts_init);
--- a/block/iscsi.c
+++ b/block/iscsi.c
--- a/block/linux-aio.c
+++ b/block/linux-aio.c
@@ -54,10 +54,10 @@ struct LinuxAioState {
    io_context_t ctx;
    EventNotifier e;

-    /* io queue for submit at batch.  Protected by AioContext lock. */
+    /* io queue for submit at batch */
    LaioQueue io_q;

-    /* I/O completion processing.  Only runs in I/O thread.  */
+    /* I/O completion processing */
    QEMUBH *completion_bh;
    int event_idx;
    int event_max;
@@ -100,7 +100,7 @@ static void qemu_laio_process_completion(struct qemu_laiocb *laiocb)
         * that!
         */
        if (!qemu_coroutine_entered(laiocb->co)) {
-            aio_co_wake(laiocb->co);
+            qemu_coroutine_enter(laiocb->co);
        }
    } else {
        laiocb->common.cb(laiocb->common.opaque, ret);
@@ -234,12 +234,9 @@ static void qemu_laio_process_completions(LinuxAioState *s)
 static void qemu_laio_process_completions_and_submit(LinuxAioState *s)
 {
    qemu_laio_process_completions(s);
-
-    aio_context_acquire(s->aio_context);
    if (!s->io_q.plugged && !QSIMPLEQ_EMPTY(&s->io_q.pending)) {
        ioq_submit(s);
    }
-    aio_context_release(s->aio_context);
 }

 static void qemu_laio_completion_bh(void *opaque)
@@ -258,20 +255,6 @@ static void qemu_laio_completion_cb(EventNotifier *e)
    }
 }

-static bool qemu_laio_poll_cb(void *opaque)
-{
-    EventNotifier *e = opaque;
-    LinuxAioState *s = container_of(e, LinuxAioState, e);
-    struct io_event *events;
-
-    if (!io_getevents_peek(s->ctx, &events)) {
-        return false;
-    }
-
-    qemu_laio_process_completions_and_submit(s);
-    return true;
-}
-
 static void laio_cancel(BlockAIOCB *blockacb)
 {
    struct qemu_laiocb *laiocb = (struct qemu_laiocb *)blockacb;
@@ -456,9 +439,8 @@ BlockAIOCB *laio_submit(BlockDriverState *bs, LinuxAioState *s, int fd,

 void laio_detach_aio_context(LinuxAioState *s, AioContext *old_context)
 {
-    aio_set_event_notifier(old_context, &s->e, false, NULL, NULL);
+    aio_set_event_notifier(old_context, &s->e, false, NULL);
    qemu_bh_delete(s->completion_bh);
-    s->aio_context = NULL;
 }

 void laio_attach_aio_context(LinuxAioState *s, AioContext *new_context)
@@ -466,8 +448,7 @@ void laio_attach_aio_context(LinuxAioState *s, AioContext *new_context)
    s->aio_context = new_context;
    s->completion_bh = aio_bh_new(new_context, qemu_laio_completion_bh, s);
    aio_set_event_notifier(new_context, &s->e, false,
-                           qemu_laio_completion_cb,
-                           qemu_laio_poll_cb);
+                           qemu_laio_completion_cb);
 }

 LinuxAioState *laio_init(void)
--- a/block/mirror.c
+++ b/block/mirror.c
--- a/block/nbd-client.c
+++ b/block/nbd-client.c
@@ -28,21 +28,18 @@
 */

 #include "qemu/osdep.h"
-#include "qapi/error.h"
 #include "nbd-client.h"

 #define HANDLE_TO_INDEX(bs, handle) ((handle) ^ ((uint64_t)(intptr_t)bs))
 #define INDEX_TO_HANDLE(bs, index)  ((index)  ^ ((uint64_t)(intptr_t)bs))

-static void nbd_recv_coroutines_wake_all(NBDClientSession *s)
+static void nbd_recv_coroutines_enter_all(NBDClientSession *s)
 {
    int i;

    for (i = 0; i < MAX_NBD_REQUESTS; i++) {
-        NBDClientRequest *req = &s->requests[i];
-
-        if (req->coroutine && req->receiving) {
-            aio_co_wake(req->coroutine);
+        if (s->recv_coroutine[i]) {
+            qemu_coroutine_enter(s->recv_coroutine[i]);
        }
    }
 }
@@ -59,7 +56,7 @@ static void nbd_teardown_connection(BlockDriverState *bs)
    qio_channel_shutdown(client->ioc,
                         QIO_CHANNEL_SHUTDOWN_BOTH,
                         NULL);
-    BDRV_POLL_WHILE(bs, client->read_reply_co);
+    nbd_recv_coroutines_enter_all(client);

    nbd_client_detach_aio_context(bs);
    object_unref(OBJECT(client->sioc));
@@ -68,52 +65,54 @@ static void nbd_teardown_connection(BlockDriverState *bs)
    client->ioc = NULL;
 }

-static coroutine_fn void nbd_read_reply_entry(void *opaque)
+static void nbd_reply_ready(void *opaque)
 {
-    NBDClientSession *s = opaque;
+    BlockDriverState *bs = opaque;
+    NBDClientSession *s = nbd_get_client_session(bs);
    uint64_t i;
-    int ret = 0;
-    Error *local_err = NULL;
+    int ret;

-    while (!s->quit) {
-        assert(s->reply.handle == 0);
-        ret = nbd_receive_reply(s->ioc, &s->reply, &local_err);
-        if (ret < 0) {
-            error_report_err(local_err);
-        }
-        if (ret <= 0) {
-            break;
-        }
-
-        /* There's no need for a mutex on the receive side, because the
-         * handler acts as a synchronization point and ensures that only
-         * one coroutine is called until the reply finishes.
-         */
-        i = HANDLE_TO_INDEX(s, s->reply.handle);
-        if (i >= MAX_NBD_REQUESTS ||
-            !s->requests[i].coroutine ||
-            !s->requests[i].receiving) {
-            break;
-        }
-
-        /* We're woken up again by the request itself.  Note that there
-         * is no race between yielding and reentering read_reply_co.  This
-         * is because:
-         *
-         * - if the request runs on the same AioContext, it is only
-         *   entered after we yield
-         *
-         * - if the request runs on a different AioContext, reentering
-         *   read_reply_co happens through a bottom half, which can only
-         *   run after we yield.
-         */
-        aio_co_wake(s->requests[i].coroutine);
-        qemu_coroutine_yield();
+    if (!s->ioc) { /* Already closed */
+        return;
    }

-    s->quit = true;
-    nbd_recv_coroutines_wake_all(s);
-    s->read_reply_co = NULL;
+    if (s->reply.handle == 0) {
+        /* No reply already in flight.  Fetch a header.  It is possible
+         * that another thread has done the same thing in parallel, so
+         * the socket is not readable anymore.
+         */
+        ret = nbd_receive_reply(s->ioc, &s->reply);
+        if (ret == -EAGAIN) {
+            return;
+        }
+        if (ret < 0) {
+            s->reply.handle = 0;
+            goto fail;
+        }
+    }
+
+    /* There's no need for a mutex on the receive side, because the
+     * handler acts as a synchronization point and ensures that only
+     * one coroutine is called until the reply finishes.  */
+    i = HANDLE_TO_INDEX(s, s->reply.handle);
+    if (i >= MAX_NBD_REQUESTS) {
+        goto fail;
+    }
+
+    if (s->recv_coroutine[i]) {
+        qemu_coroutine_enter(s->recv_coroutine[i]);
+        return;
+    }
+
+fail:
+    nbd_teardown_connection(bs);
+}
+
+static void nbd_restart_write(void *opaque)
+{
+    BlockDriverState *bs = opaque;
+
+    qemu_coroutine_enter(nbd_get_client_session(bs)->send_coroutine);
 }

 static int nbd_co_send_request(BlockDriverState *bs,
@@ -121,44 +120,39 @@ static int nbd_co_send_request(BlockDriverState *bs,
                               QEMUIOVector *qiov)
 {
    NBDClientSession *s = nbd_get_client_session(bs);
-    int rc, i;
+    AioContext *aio_context;
+    int rc, ret, i;

    qemu_co_mutex_lock(&s->send_mutex);
-    while (s->in_flight == MAX_NBD_REQUESTS) {
-        qemu_co_queue_wait(&s->free_sema, &s->send_mutex);
-    }
-    s->in_flight++;

    for (i = 0; i < MAX_NBD_REQUESTS; i++) {
-        if (s->requests[i].coroutine == NULL) {
+        if (s->recv_coroutine[i] == NULL) {
+            s->recv_coroutine[i] = qemu_coroutine_self();
            break;
        }
    }

    g_assert(qemu_in_coroutine());
    assert(i < MAX_NBD_REQUESTS);
-
-    s->requests[i].coroutine = qemu_coroutine_self();
-    s->requests[i].receiving = false;
-
    request->handle = INDEX_TO_HANDLE(s, i);

-    if (s->quit) {
-        rc = -EIO;
-        goto err;
-    }
    if (!s->ioc) {
-        rc = -EPIPE;
-        goto err;
+        qemu_co_mutex_unlock(&s->send_mutex);
+        return -EPIPE;
    }

+    s->send_coroutine = qemu_coroutine_self();
+    aio_context = bdrv_get_aio_context(bs);
+
+    aio_set_fd_handler(aio_context, s->sioc->fd, false,
+                       nbd_reply_ready, nbd_restart_write, bs);
    if (qiov) {
        qio_channel_set_cork(s->ioc, true);
        rc = nbd_send_request(s->ioc, request);
-        if (rc >= 0 && !s->quit) {
-            assert(request->len == iov_size(qiov->iov, qiov->niov));
-            if (qio_channel_writev_all(s->ioc, qiov->iov, qiov->niov,
-                                       NULL) < 0) {
+        if (rc >= 0) {
+            ret = nbd_wr_syncv(s->ioc, qiov->iov, qiov->niov, request->len,
+                               false);
+            if (ret != request->len) {
                rc = -EIO;
            }
        }
@@ -166,14 +160,9 @@ static int nbd_co_send_request(BlockDriverState *bs,
    } else {
        rc = nbd_send_request(s->ioc, request);
    }
-
-err:
-    if (rc < 0) {
-        s->quit = true;
-        s->requests[i].coroutine = NULL;
-        s->in_flight--;
-        qemu_co_queue_next(&s->free_sema);
-    }
+    aio_set_fd_handler(aio_context, s->sioc->fd, false,
+                       nbd_reply_ready, NULL, bs);
+    s->send_coroutine = NULL;
    qemu_co_mutex_unlock(&s->send_mutex);
    return rc;
 }
@@ -183,76 +172,77 @@ static void nbd_co_receive_reply(NBDClientSession *s,
                                 NBDReply *reply,
                                 QEMUIOVector *qiov)
 {
-    int i = HANDLE_TO_INDEX(s, request->handle);
+    int ret;

-    /* Wait until we're woken up by nbd_read_reply_entry.  */
-    s->requests[i].receiving = true;
+    /* Wait until we're woken up by the read handler.  TODO: perhaps
+     * peek at the next reply and avoid yielding if it's ours?  */
    qemu_coroutine_yield();
-    s->requests[i].receiving = false;
    *reply = s->reply;
-    if (reply->handle != request->handle || !s->ioc || s->quit) {
+    if (reply->handle != request->handle ||
+        !s->ioc) {
        reply->error = EIO;
    } else {
        if (qiov && reply->error == 0) {
-            assert(request->len == iov_size(qiov->iov, qiov->niov));
-            if (qio_channel_readv_all(s->ioc, qiov->iov, qiov->niov,
-                                      NULL) < 0) {
+            ret = nbd_wr_syncv(s->ioc, qiov->iov, qiov->niov, request->len,
+                               true);
+            if (ret != request->len) {
                reply->error = EIO;
-                s->quit = true;
            }
        }

        /* Tell the read handler to read another header.  */
        s->reply.handle = 0;
    }
-
-    s->requests[i].coroutine = NULL;
-
-    /* Kick the read_reply_co to get the next reply.  */
-    if (s->read_reply_co) {
-        aio_co_wake(s->read_reply_co);
-    }
-
-    qemu_co_mutex_lock(&s->send_mutex);
-    s->in_flight--;
-    qemu_co_queue_next(&s->free_sema);
-    qemu_co_mutex_unlock(&s->send_mutex);
 }

-static int nbd_co_request(BlockDriverState *bs,
-                          NBDRequest *request,
-                          QEMUIOVector *qiov)
+static void nbd_coroutine_start(NBDClientSession *s,
+                                NBDRequest *request)
 {
-    NBDClientSession *client = nbd_get_client_session(bs);
-    NBDReply reply;
-    int ret;
-
-    assert(!qiov || request->type == NBD_CMD_WRITE ||
-           request->type == NBD_CMD_READ);
-    ret = nbd_co_send_request(bs, request,
-                              request->type == NBD_CMD_WRITE ? qiov : NULL);
-    if (ret < 0) {
-        reply.error = -ret;
-    } else {
-        nbd_co_receive_reply(client, request, &reply,
-                             request->type == NBD_CMD_READ ? qiov : NULL);
+    /* Poor man semaphore.  The free_sema is locked when no other request
+     * can be accepted, and unlocked after receiving one reply.  */
+    if (s->in_flight == MAX_NBD_REQUESTS) {
+        qemu_co_queue_wait(&s->free_sema);
+        assert(s->in_flight < MAX_NBD_REQUESTS);
+    }
+    s->in_flight++;
+
+    /* s->recv_coroutine[i] is set as soon as we get the send_lock.  */
+}
+
+static void nbd_coroutine_end(NBDClientSession *s,
+                              NBDRequest *request)
+{
+    int i = HANDLE_TO_INDEX(s, request->handle);
+    s->recv_coroutine[i] = NULL;
+    if (s->in_flight-- == MAX_NBD_REQUESTS) {
+        qemu_co_queue_next(&s->free_sema);
    }
-    return -reply.error;
 }

 int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset,
                         uint64_t bytes, QEMUIOVector *qiov, int flags)
 {
+    NBDClientSession *client = nbd_get_client_session(bs);
    NBDRequest request = {
        .type = NBD_CMD_READ,
        .from = offset,
        .len = bytes,
    };
+    NBDReply reply;
+    ssize_t ret;

    assert(bytes <= NBD_MAX_BUFFER_SIZE);
    assert(!flags);

-    return nbd_co_request(bs, &request, qiov);
+    nbd_coroutine_start(client, &request);
+    ret = nbd_co_send_request(bs, &request, NULL);
+    if (ret < 0) {
+        reply.error = -ret;
+    } else {
+        nbd_co_receive_reply(client, &request, &reply, qiov);
+    }
+    nbd_coroutine_end(client, &request);
+    return -reply.error;
 }

 int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset,
@@ -264,85 +254,126 @@ int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset,
        .from = offset,
        .len = bytes,
    };
+    NBDReply reply;
+    ssize_t ret;

    if (flags & BDRV_REQ_FUA) {
-        assert(client->info.flags & NBD_FLAG_SEND_FUA);
+        assert(client->nbdflags & NBD_FLAG_SEND_FUA);
        request.flags |= NBD_CMD_FLAG_FUA;
    }

    assert(bytes <= NBD_MAX_BUFFER_SIZE);

-    return nbd_co_request(bs, &request, qiov);
+    nbd_coroutine_start(client, &request);
+    ret = nbd_co_send_request(bs, &request, qiov);
+    if (ret < 0) {
+        reply.error = -ret;
+    } else {
+        nbd_co_receive_reply(client, &request, &reply, NULL);
+    }
+    nbd_coroutine_end(client, &request);
+    return -reply.error;
 }

 int nbd_client_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
-                                int bytes, BdrvRequestFlags flags)
+                                int count, BdrvRequestFlags flags)
 {
+    ssize_t ret;
    NBDClientSession *client = nbd_get_client_session(bs);
    NBDRequest request = {
        .type = NBD_CMD_WRITE_ZEROES,
        .from = offset,
-        .len = bytes,
+        .len = count,
    };
+    NBDReply reply;

-    if (!(client->info.flags & NBD_FLAG_SEND_WRITE_ZEROES)) {
+    if (!(client->nbdflags & NBD_FLAG_SEND_WRITE_ZEROES)) {
        return -ENOTSUP;
    }

    if (flags & BDRV_REQ_FUA) {
-        assert(client->info.flags & NBD_FLAG_SEND_FUA);
+        assert(client->nbdflags & NBD_FLAG_SEND_FUA);
        request.flags |= NBD_CMD_FLAG_FUA;
    }
    if (!(flags & BDRV_REQ_MAY_UNMAP)) {
        request.flags |= NBD_CMD_FLAG_NO_HOLE;
    }

-    return nbd_co_request(bs, &request, NULL);
+    nbd_coroutine_start(client, &request);
+    ret = nbd_co_send_request(bs, &request, NULL);
+    if (ret < 0) {
+        reply.error = -ret;
+    } else {
+        nbd_co_receive_reply(client, &request, &reply, NULL);
+    }
+    nbd_coroutine_end(client, &request);
+    return -reply.error;
 }

 int nbd_client_co_flush(BlockDriverState *bs)
 {
    NBDClientSession *client = nbd_get_client_session(bs);
    NBDRequest request = { .type = NBD_CMD_FLUSH };
+    NBDReply reply;
+    ssize_t ret;

-    if (!(client->info.flags & NBD_FLAG_SEND_FLUSH)) {
+    if (!(client->nbdflags & NBD_FLAG_SEND_FLUSH)) {
        return 0;
    }

    request.from = 0;
    request.len = 0;

-    return nbd_co_request(bs, &request, NULL);
+    nbd_coroutine_start(client, &request);
+    ret = nbd_co_send_request(bs, &request, NULL);
+    if (ret < 0) {
+        reply.error = -ret;
+    } else {
+        nbd_co_receive_reply(client, &request, &reply, NULL);
+    }
+    nbd_coroutine_end(client, &request);
+    return -reply.error;
 }

-int nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset, int bytes)
+int nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset, int count)
 {
    NBDClientSession *client = nbd_get_client_session(bs);
    NBDRequest request = {
        .type = NBD_CMD_TRIM,
        .from = offset,
-        .len = bytes,
+        .len = count,
    };
+    NBDReply reply;
+    ssize_t ret;

-    if (!(client->info.flags & NBD_FLAG_SEND_TRIM)) {
+    if (!(client->nbdflags & NBD_FLAG_SEND_TRIM)) {
        return 0;
    }

-    return nbd_co_request(bs, &request, NULL);
+    nbd_coroutine_start(client, &request);
+    ret = nbd_co_send_request(bs, &request, NULL);
+    if (ret < 0) {
+        reply.error = -ret;
+    } else {
+        nbd_co_receive_reply(client, &request, &reply, NULL);
+    }
+    nbd_coroutine_end(client, &request);
+    return -reply.error;
+
 }

 void nbd_client_detach_aio_context(BlockDriverState *bs)
 {
-    NBDClientSession *client = nbd_get_client_session(bs);
-    qio_channel_detach_aio_context(QIO_CHANNEL(client->ioc));
+    aio_set_fd_handler(bdrv_get_aio_context(bs),
+                       nbd_get_client_session(bs)->sioc->fd,
+                       false, NULL, NULL, NULL);
 }

 void nbd_client_attach_aio_context(BlockDriverState *bs,
                                   AioContext *new_context)
 {
-    NBDClientSession *client = nbd_get_client_session(bs);
-    qio_channel_attach_aio_context(QIO_CHANNEL(client->ioc), new_context);
-    aio_co_schedule(new_context, client->read_reply_co);
+    aio_set_fd_handler(new_context, nbd_get_client_session(bs)->sioc->fd,
+                       false, nbd_reply_ready, NULL, bs);
 }

 void nbd_client_close(BlockDriverState *bs)
@@ -373,24 +404,22 @@ int nbd_client_init(BlockDriverState *bs,
    logout("session init %s\n", export);
    qio_channel_set_blocking(QIO_CHANNEL(sioc), true, NULL);

-    client->info.request_sizes = true;
    ret = nbd_receive_negotiate(QIO_CHANNEL(sioc), export,
+                                &client->nbdflags,
                                tlscreds, hostname,
-                                &client->ioc, &client->info, errp);
+                                &client->ioc,
+                                &client->size, errp);
    if (ret < 0) {
        logout("Failed to negotiate with the NBD server\n");
        return ret;
    }
-    if (client->info.flags & NBD_FLAG_SEND_FUA) {
+    if (client->nbdflags & NBD_FLAG_SEND_FUA) {
        bs->supported_write_flags = BDRV_REQ_FUA;
        bs->supported_zero_flags |= BDRV_REQ_FUA;
    }
-    if (client->info.flags & NBD_FLAG_SEND_WRITE_ZEROES) {
+    if (client->nbdflags & NBD_FLAG_SEND_WRITE_ZEROES) {
        bs->supported_zero_flags |= BDRV_REQ_MAY_UNMAP;
    }
-    if (client->info.min_block > bs->bl.request_alignment) {
-        bs->bl.request_alignment = client->info.min_block;
-    }

    qemu_co_mutex_init(&client->send_mutex);
    qemu_co_queue_init(&client->free_sema);
@@ -405,7 +434,7 @@ int nbd_client_init(BlockDriverState *bs,
    /* Now that we're connected, set the socket to be non-blocking and
     * kick the reply mechanism.  */
    qio_channel_set_blocking(QIO_CHANNEL(sioc), false, NULL);
-    client->read_reply_co = qemu_coroutine_create(nbd_read_reply_entry, client);
+
    nbd_client_attach_aio_context(bs, bdrv_get_aio_context(bs));

    logout("Established connection with NBD server\n");
--- a/block/nbd-client.h
+++ b/block/nbd-client.h
@@ -17,24 +17,21 @@

 #define MAX_NBD_REQUESTS    16

-typedef struct {
-    Coroutine *coroutine;
-    bool receiving;         /* waiting for read_reply_co? */
-} NBDClientRequest;
-
 typedef struct NBDClientSession {
    QIOChannelSocket *sioc; /* The master data channel */
    QIOChannel *ioc; /* The current I/O channel which may differ (eg TLS) */
-    NBDExportInfo info;
+    uint16_t nbdflags;
+    off_t size;

    CoMutex send_mutex;
    CoQueue free_sema;
-    Coroutine *read_reply_co;
+    Coroutine *send_coroutine;
    int in_flight;

-    NBDClientRequest requests[MAX_NBD_REQUESTS];
+    Coroutine *recv_coroutine[MAX_NBD_REQUESTS];
    NBDReply reply;
-    bool quit;
+
+    bool is_unix;
 } NBDClientSession;

 NBDClientSession *nbd_get_client_session(BlockDriverState *bs);
@@ -47,12 +44,12 @@ int nbd_client_init(BlockDriverState *bs,
                    Error **errp);
 void nbd_client_close(BlockDriverState *bs);

-int nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset, int bytes);
+int nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset, int count);
 int nbd_client_co_flush(BlockDriverState *bs);
 int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset,
                          uint64_t bytes, QEMUIOVector *qiov, int flags);
 int nbd_client_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
-                                int bytes, BdrvRequestFlags flags);
+                                int count, BdrvRequestFlags flags);
 int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset,
                         uint64_t bytes, QEMUIOVector *qiov, int flags);

--- a/block/nbd.c
+++ b/block/nbd.c
@@ -37,6 +37,7 @@
 #include "qapi/qobject-output-visitor.h"
 #include "qapi/qmp/qdict.h"
 #include "qapi/qmp/qjson.h"
+#include "qapi/qmp/qint.h"
 #include "qapi/qmp/qstring.h"
 #include "qemu/cutils.h"

@@ -64,11 +65,11 @@ static int nbd_parse_uri(const char *filename, QDict *options)
    }

    /* transport */
-    if (!g_strcmp0(uri->scheme, "nbd")) {
+    if (!strcmp(uri->scheme, "nbd")) {
        is_unix = false;
-    } else if (!g_strcmp0(uri->scheme, "nbd+tcp")) {
+    } else if (!strcmp(uri->scheme, "nbd+tcp")) {
        is_unix = false;
-    } else if (!g_strcmp0(uri->scheme, "nbd+unix")) {
+    } else if (!strcmp(uri->scheme, "nbd+unix")) {
        is_unix = true;
    } else {
        ret = -EINVAL;
@@ -78,7 +79,7 @@ static int nbd_parse_uri(const char *filename, QDict *options)
    p = uri->path ? uri->path : "/";
    p += strspn(p, "/");
    if (p[0]) {
-        qdict_put_str(options, "export", p);
+        qdict_put(options, "export", qstring_from_str(p));
    }

    qp = query_params_parse(uri->query);
@@ -93,8 +94,9 @@ static int nbd_parse_uri(const char *filename, QDict *options)
            ret = -EINVAL;
            goto out;
        }
-        qdict_put_str(options, "server.type", "unix");
-        qdict_put_str(options, "server.path", qp->p[0].value);
+        qdict_put(options, "server.type", qstring_from_str("unix"));
+        qdict_put(options, "server.data.path",
+                  qstring_from_str(qp->p[0].value));
    } else {
        QString *host;
        char *port_str;
@@ -113,11 +115,11 @@ static int nbd_parse_uri(const char *filename, QDict *options)
            host = qstring_from_str(uri->server);
        }

-        qdict_put_str(options, "server.type", "inet");
-        qdict_put(options, "server.host", host);
+        qdict_put(options, "server.type", qstring_from_str("inet"));
+        qdict_put(options, "server.data.host", host);

        port_str = g_strdup_printf("%d", uri->port ?: NBD_DEFAULT_PORT);
-        qdict_put_str(options, "server.port", port_str);
+        qdict_put(options, "server.data.port", qstring_from_str(port_str));
        g_free(port_str);
    }

@@ -179,7 +181,7 @@ static void nbd_parse_filename(const char *filename, QDict *options,
        export_name[0] = 0; /* truncate 'file' */
        export_name += strlen(EN_OPTSTR);

-        qdict_put_str(options, "export", export_name);
+        qdict_put(options, "export", qstring_from_str(export_name));
    }

    /* extract the host_spec - fail if it's not nbd:... */
@@ -194,19 +196,19 @@ static void nbd_parse_filename(const char *filename, QDict *options,

    /* are we a UNIX or TCP socket? */
    if (strstart(host_spec, "unix:", &unixpath)) {
-        qdict_put_str(options, "server.type", "unix");
-        qdict_put_str(options, "server.path", unixpath);
+        qdict_put(options, "server.type", qstring_from_str("unix"));
+        qdict_put(options, "server.data.path", qstring_from_str(unixpath));
    } else {
-        InetSocketAddress *addr = g_new(InetSocketAddress, 1);
+        InetSocketAddress *addr = NULL;

-        if (inet_parse(addr, host_spec, errp)) {
-            goto out_inet;
+        addr = inet_parse(host_spec, errp);
+        if (!addr) {
+            goto out;
        }

-        qdict_put_str(options, "server.type", "inet");
-        qdict_put_str(options, "server.host", addr->host);
-        qdict_put_str(options, "server.port", addr->port);
-    out_inet:
+        qdict_put(options, "server.type", qstring_from_str("inet"));
+        qdict_put(options, "server.data.host", qstring_from_str(addr->host));
+        qdict_put(options, "server.data.port", qstring_from_str(addr->port));
        qapi_free_InetSocketAddress(addr);
    }

@@ -245,20 +247,19 @@ static bool nbd_process_legacy_socket_options(QDict *output_options,
            return false;
        }

-        qdict_put_str(output_options, "server.type", "unix");
-        qdict_put_str(output_options, "server.path", path);
+        qdict_put(output_options, "server.type", qstring_from_str("unix"));
+        qdict_put(output_options, "server.data.path", qstring_from_str(path));
    } else if (host) {
-        qdict_put_str(output_options, "server.type", "inet");
-        qdict_put_str(output_options, "server.host", host);
-        qdict_put_str(output_options, "server.port",
-                      port ?: stringify(NBD_DEFAULT_PORT));
+        qdict_put(output_options, "server.type", qstring_from_str("inet"));
+        qdict_put(output_options, "server.data.host", qstring_from_str(host));
+        qdict_put(output_options, "server.data.port",
+                  qstring_from_str(port ?: stringify(NBD_DEFAULT_PORT)));
    }

    return true;
 }

-static SocketAddress *nbd_config(BDRVNBDState *s, QDict *options,
-                                 Error **errp)
+static SocketAddress *nbd_config(BDRVNBDState *s, QDict *options, Error **errp)
 {
    SocketAddress *saddr = NULL;
    QDict *addr = NULL;
@@ -277,21 +278,15 @@ static SocketAddress *nbd_config(BDRVNBDState *s, QDict *options,
        goto done;
    }

-    /*
-     * FIXME .numeric, .to, .ipv4 or .ipv6 don't work with -drive
-     * server.type=inet.  .to doesn't matter, it's ignored anyway.
-     * That's because when @options come from -blockdev or
-     * blockdev_add, members are typed according to the QAPI schema,
-     * but when they come from -drive, they're all QString.  The
-     * visitor expects the former.
-     */
-    iv = qobject_input_visitor_new(crumpled_addr);
+    iv = qobject_input_visitor_new(crumpled_addr, true);
    visit_type_SocketAddress(iv, NULL, &saddr, &local_err);
    if (local_err) {
        error_propagate(errp, local_err);
        goto done;
    }

+    s->client.is_unix = saddr->type == SOCKET_ADDRESS_KIND_UNIX;
+
 done:
    QDECREF(addr);
    qobject_decref(crumpled_addr);
@@ -318,7 +313,6 @@ static QIOChannelSocket *nbd_establish_connection(SocketAddress *saddr,
                                    saddr,
                                    &local_err);
    if (local_err) {
-        object_unref(OBJECT(sioc));
        error_propagate(errp, local_err);
        return NULL;
    }
@@ -429,12 +423,11 @@ static int nbd_open(BlockDriverState *bs, QDict *options, int flags,
            goto error;
        }

-        /* TODO SOCKET_ADDRESS_KIND_FD where fd has AF_INET or AF_INET6 */
-        if (s->saddr->type != SOCKET_ADDRESS_TYPE_INET) {
+        if (s->saddr->type != SOCKET_ADDRESS_KIND_INET) {
            error_setg(errp, "TLS only supported over IP sockets");
            goto error;
        }
-        hostname = s->saddr->u.inet.host;
+        hostname = s->saddr->u.inet.data->host;
    }

    /* establish TCP connection, return error if it fails
@@ -472,17 +465,9 @@ static int nbd_co_flush(BlockDriverState *bs)

 static void nbd_refresh_limits(BlockDriverState *bs, Error **errp)
 {
-    NBDClientSession *s = nbd_get_client_session(bs);
-    uint32_t max = MIN_NON_ZERO(NBD_MAX_BUFFER_SIZE, s->info.max_block);
-
-    bs->bl.max_pdiscard = max;
-    bs->bl.max_pwrite_zeroes = max;
-    bs->bl.max_transfer = max;
-
-    if (s->info.opt_block &&
-        s->info.opt_block > bs->bl.opt_transfer) {
-        bs->bl.opt_transfer = s->info.opt_block;
-    }
+    bs->bl.max_pdiscard = NBD_MAX_BUFFER_SIZE;
+    bs->bl.max_pwrite_zeroes = NBD_MAX_BUFFER_SIZE;
+    bs->bl.max_transfer = NBD_MAX_BUFFER_SIZE;
 }

 static void nbd_close(BlockDriverState *bs)
@@ -500,7 +485,7 @@ static int64_t nbd_getlength(BlockDriverState *bs)
 {
    BDRVNBDState *s = bs->opaque;

-    return s->client.info.size;
+    return s->client.size;
 }

 static void nbd_detach_aio_context(BlockDriverState *bs)
@@ -522,17 +507,17 @@ static void nbd_refresh_filename(BlockDriverState *bs, QDict *options)
    Visitor *ov;
    const char *host = NULL, *port = NULL, *path = NULL;

-    if (s->saddr->type == SOCKET_ADDRESS_TYPE_INET) {
-        const InetSocketAddress *inet = &s->saddr->u.inet;
+    if (s->saddr->type == SOCKET_ADDRESS_KIND_INET) {
+        const InetSocketAddress *inet = s->saddr->u.inet.data;
        if (!inet->has_ipv4 && !inet->has_ipv6 && !inet->has_to) {
            host = inet->host;
            port = inet->port;
        }
-    } else if (s->saddr->type == SOCKET_ADDRESS_TYPE_UNIX) {
-        path = s->saddr->u.q_unix.path;
-    } /* else can't represent as pseudo-filename */
+    } else if (s->saddr->type == SOCKET_ADDRESS_KIND_UNIX) {
+        path = s->saddr->u.q_unix.data->path;
+    }

-    qdict_put_str(opts, "driver", "nbd");
+    qdict_put(opts, "driver", qstring_from_str("nbd"));

    if (path && s->export) {
        snprintf(bs->exact_filename, sizeof(bs->exact_filename),
@@ -552,13 +537,15 @@ static void nbd_refresh_filename(BlockDriverState *bs, QDict *options)
    visit_type_SocketAddress(ov, NULL, &s->saddr, &error_abort);
    visit_complete(ov, &saddr_qdict);
    visit_free(ov);
+    assert(qobject_type(saddr_qdict) == QTYPE_QDICT);
+
    qdict_put_obj(opts, "server", saddr_qdict);

    if (s->export) {
-        qdict_put_str(opts, "export", s->export);
+        qdict_put(opts, "export", qstring_from_str(s->export));
    }
    if (s->tlscredsid) {
-        qdict_put_str(opts, "tls-creds", s->tlscredsid);
+        qdict_put(opts, "tls-creds", qstring_from_str(s->tlscredsid));
    }

    qdict_flatten(opts);
--- a/block/nfs.c
+++ b/block/nfs.c
@@ -36,6 +36,7 @@
 #include "qemu/cutils.h"
 #include "sysemu/sysemu.h"
 #include "qapi/qmp/qdict.h"
+#include "qapi/qmp/qint.h"
 #include "qapi/qmp/qstring.h"
 #include "qapi-visit.h"
 #include "qapi/qobject-input-visitor.h"
@@ -53,7 +54,6 @@ typedef struct NFSClient {
    int events;
    bool has_zero_init;
    AioContext *aio_context;
-    QemuMutex mutex;
    blkcnt_t st_blocks;
    bool cache_used;
    NFSServer *server;
@@ -82,7 +82,7 @@ static int nfs_parse_uri(const char *filename, QDict *options, Error **errp)
        error_setg(errp, "Invalid URI specified");
        goto out;
    }
-    if (g_strcmp0(uri->scheme, "nfs") != 0) {
+    if (strcmp(uri->scheme, "nfs") != 0) {
        error_setg(errp, "URI scheme must be 'nfs'");
        goto out;
    }
@@ -103,34 +103,39 @@ static int nfs_parse_uri(const char *filename, QDict *options, Error **errp)
        goto out;
    }

-    qdict_put_str(options, "server.host", uri->server);
-    qdict_put_str(options, "server.type", "inet");
-    qdict_put_str(options, "path", uri->path);
+    qdict_put(options, "server.host", qstring_from_str(uri->server));
+    qdict_put(options, "server.type", qstring_from_str("inet"));
+    qdict_put(options, "path", qstring_from_str(uri->path));

    for (i = 0; i < qp->n; i++) {
-        unsigned long long val;
        if (!qp->p[i].value) {
            error_setg(errp, "Value for NFS parameter expected: %s",
                       qp->p[i].name);
            goto out;
        }
-        if (parse_uint_full(qp->p[i].value, &val, 0)) {
+        if (parse_uint_full(qp->p[i].value, NULL, 0)) {
            error_setg(errp, "Illegal value for NFS parameter: %s",
                       qp->p[i].name);
            goto out;
        }
        if (!strcmp(qp->p[i].name, "uid")) {
-            qdict_put_str(options, "user", qp->p[i].value);
+            qdict_put(options, "user",
+                      qstring_from_str(qp->p[i].value));
        } else if (!strcmp(qp->p[i].name, "gid")) {
-            qdict_put_str(options, "group", qp->p[i].value);
+            qdict_put(options, "group",
+                      qstring_from_str(qp->p[i].value));
        } else if (!strcmp(qp->p[i].name, "tcp-syncnt")) {
-            qdict_put_str(options, "tcp-syn-count", qp->p[i].value);
+            qdict_put(options, "tcp-syn-count",
+                      qstring_from_str(qp->p[i].value));
        } else if (!strcmp(qp->p[i].name, "readahead")) {
-            qdict_put_str(options, "readahead-size", qp->p[i].value);
+            qdict_put(options, "readahead-size",
+                      qstring_from_str(qp->p[i].value));
        } else if (!strcmp(qp->p[i].name, "pagecache")) {
-            qdict_put_str(options, "page-cache-size", qp->p[i].value);
+            qdict_put(options, "page-cache-size",
+                      qstring_from_str(qp->p[i].value));
        } else if (!strcmp(qp->p[i].name, "debug")) {
-            qdict_put_str(options, "debug", qp->p[i].value);
+            qdict_put(options, "debug",
+                      qstring_from_str(qp->p[i].value));
        } else {
            error_setg(errp, "Unknown NFS parameter name: %s",
                       qp->p[i].name);
@@ -185,7 +190,6 @@ static void nfs_parse_filename(const char *filename, QDict *options,
 static void nfs_process_read(void *arg);
 static void nfs_process_write(void *arg);

-/* Called with QemuMutex held.  */
 static void nfs_set_events(NFSClient *client)
 {
    int ev = nfs_which_events(client->context);
@@ -193,8 +197,7 @@ static void nfs_set_events(NFSClient *client)
        aio_set_fd_handler(client->aio_context, nfs_get_fd(client->context),
                           false,
                           (ev & POLLIN) ? nfs_process_read : NULL,
-                           (ev & POLLOUT) ? nfs_process_write : NULL,
-                           NULL, client);
+                           (ev & POLLOUT) ? nfs_process_write : NULL, client);

    }
    client->events = ev;
@@ -203,21 +206,15 @@ static void nfs_set_events(NFSClient *client)
 static void nfs_process_read(void *arg)
 {
    NFSClient *client = arg;
-
-    qemu_mutex_lock(&client->mutex);
    nfs_service(client->context, POLLIN);
    nfs_set_events(client);
-    qemu_mutex_unlock(&client->mutex);
 }

 static void nfs_process_write(void *arg)
 {
    NFSClient *client = arg;
-
-    qemu_mutex_lock(&client->mutex);
    nfs_service(client->context, POLLOUT);
    nfs_set_events(client);
-    qemu_mutex_unlock(&client->mutex);
 }

 static void nfs_co_init_task(BlockDriverState *bs, NFSRPC *task)
@@ -232,12 +229,10 @@ static void nfs_co_init_task(BlockDriverState *bs, NFSRPC *task)
 static void nfs_co_generic_bh_cb(void *opaque)
 {
    NFSRPC *task = opaque;
-
    task->complete = 1;
-    aio_co_wake(task->co);
+    qemu_coroutine_enter(task->co);
 }

-/* Called (via nfs_service) with QemuMutex held.  */
 static void
 nfs_co_generic_cb(int ret, struct nfs_context *nfs, void *data,
                  void *private_data)
@@ -259,9 +254,9 @@ nfs_co_generic_cb(int ret, struct nfs_context *nfs, void *data,
                            nfs_co_generic_bh_cb, task);
 }

-static int coroutine_fn nfs_co_preadv(BlockDriverState *bs, uint64_t offset,
-                                      uint64_t bytes, QEMUIOVector *iov,
-                                      int flags)
+static int coroutine_fn nfs_co_readv(BlockDriverState *bs,
+                                     int64_t sector_num, int nb_sectors,
+                                     QEMUIOVector *iov)
 {
    NFSClient *client = bs->opaque;
    NFSRPC task;
@@ -269,15 +264,14 @@ static int coroutine_fn nfs_co_preadv(BlockDriverState *bs, uint64_t offset,
    nfs_co_init_task(bs, &task);
    task.iov = iov;

-    qemu_mutex_lock(&client->mutex);
    if (nfs_pread_async(client->context, client->fh,
-                        offset, bytes, nfs_co_generic_cb, &task) != 0) {
-        qemu_mutex_unlock(&client->mutex);
+                        sector_num * BDRV_SECTOR_SIZE,
+                        nb_sectors * BDRV_SECTOR_SIZE,
+                        nfs_co_generic_cb, &task) != 0) {
        return -ENOMEM;
    }

    nfs_set_events(client);
-    qemu_mutex_unlock(&client->mutex);
    while (!task.complete) {
        qemu_coroutine_yield();
    }
@@ -294,50 +288,39 @@ static int coroutine_fn nfs_co_preadv(BlockDriverState *bs, uint64_t offset,
    return 0;
 }

-static int coroutine_fn nfs_co_pwritev(BlockDriverState *bs, uint64_t offset,
-                                       uint64_t bytes, QEMUIOVector *iov,
-                                       int flags)
+static int coroutine_fn nfs_co_writev(BlockDriverState *bs,
+                                        int64_t sector_num, int nb_sectors,
+                                        QEMUIOVector *iov)
 {
    NFSClient *client = bs->opaque;
    NFSRPC task;
    char *buf = NULL;
-    bool my_buffer = false;

    nfs_co_init_task(bs, &task);

-    if (iov->niov != 1) {
-        buf = g_try_malloc(bytes);
-        if (bytes && buf == NULL) {
-            return -ENOMEM;
-        }
-        qemu_iovec_to_buf(iov, 0, buf, bytes);
-        my_buffer = true;
-    } else {
-        buf = iov->iov[0].iov_base;
+    buf = g_try_malloc(nb_sectors * BDRV_SECTOR_SIZE);
+    if (nb_sectors && buf == NULL) {
+        return -ENOMEM;
    }

-    qemu_mutex_lock(&client->mutex);
+    qemu_iovec_to_buf(iov, 0, buf, nb_sectors * BDRV_SECTOR_SIZE);
+
    if (nfs_pwrite_async(client->context, client->fh,
-                         offset, bytes, buf,
-                         nfs_co_generic_cb, &task) != 0) {
-        qemu_mutex_unlock(&client->mutex);
-        if (my_buffer) {
-            g_free(buf);
-        }
+                         sector_num * BDRV_SECTOR_SIZE,
+                         nb_sectors * BDRV_SECTOR_SIZE,
+                         buf, nfs_co_generic_cb, &task) != 0) {
+        g_free(buf);
        return -ENOMEM;
    }

    nfs_set_events(client);
-    qemu_mutex_unlock(&client->mutex);
    while (!task.complete) {
        qemu_coroutine_yield();
    }

-    if (my_buffer) {
-        g_free(buf);
-    }
+    g_free(buf);

-    if (task.ret != bytes) {
+    if (task.ret != nb_sectors * BDRV_SECTOR_SIZE) {
        return task.ret < 0 ? task.ret : -EIO;
    }

@@ -351,15 +334,12 @@ static int coroutine_fn nfs_co_flush(BlockDriverState *bs)

    nfs_co_init_task(bs, &task);

-    qemu_mutex_lock(&client->mutex);
    if (nfs_fsync_async(client->context, client->fh, nfs_co_generic_cb,
                        &task) != 0) {
-        qemu_mutex_unlock(&client->mutex);
        return -ENOMEM;
    }

    nfs_set_events(client);
-    qemu_mutex_unlock(&client->mutex);
    while (!task.complete) {
        qemu_coroutine_yield();
    }
@@ -377,27 +357,27 @@ static QemuOptsList runtime_opts = {
            .help = "Path of the image on the host",
        },
        {
-            .name = "user",
+            .name = "uid",
            .type = QEMU_OPT_NUMBER,
            .help = "UID value to use when talking to the server",
        },
        {
-            .name = "group",
+            .name = "gid",
            .type = QEMU_OPT_NUMBER,
            .help = "GID value to use when talking to the server",
        },
        {
-            .name = "tcp-syn-count",
+            .name = "tcp-syncnt",
            .type = QEMU_OPT_NUMBER,
            .help = "Number of SYNs to send during the session establish",
        },
        {
-            .name = "readahead-size",
+            .name = "readahead",
            .type = QEMU_OPT_NUMBER,
            .help = "Set the readahead size in bytes",
        },
        {
-            .name = "page-cache-size",
+            .name = "pagecache",
            .type = QEMU_OPT_NUMBER,
            .help = "Set the pagecache size in bytes",
        },
@@ -415,7 +395,7 @@ static void nfs_detach_aio_context(BlockDriverState *bs)
    NFSClient *client = bs->opaque;

    aio_set_fd_handler(client->aio_context, nfs_get_fd(client->context),
-                       false, NULL, NULL, NULL, NULL);
+                       false, NULL, NULL, NULL);
    client->events = 0;
 }

@@ -433,17 +413,12 @@ static void nfs_client_close(NFSClient *client)
    if (client->context) {
        if (client->fh) {
            nfs_close(client->context, client->fh);
-            client->fh = NULL;
        }
        aio_set_fd_handler(client->aio_context, nfs_get_fd(client->context),
-                           false, NULL, NULL, NULL, NULL);
+                           false, NULL, NULL, NULL);
        nfs_destroy_context(client->context);
-        client->context = NULL;
    }
-    g_free(client->path);
-    qemu_mutex_destroy(&client->mutex);
-    qapi_free_NFSServer(client->server);
-    client->server = NULL;
+    memset(client, 0, sizeof(NFSClient));
 }

 static void nfs_file_close(BlockDriverState *bs)
@@ -471,14 +446,7 @@ static NFSServer *nfs_config(QDict *options, Error **errp)
        goto out;
    }

-    /*
-     * Caution: this works only because all scalar members of
-     * NFSServer are QString in @crumpled_addr.  The visitor expects
-     * @crumpled_addr to be typed according to the QAPI schema.  It
-     * is when @options come from -blockdev or blockdev_add.  But when
-     * they come from -drive, they're all QString.
-     */
-    iv = qobject_input_visitor_new(crumpled_addr);
+    iv = qobject_input_visitor_new(crumpled_addr, true);
    visit_type_NFSServer(iv, NULL, &server, &local_error);
    if (local_error) {
        error_propagate(errp, local_error);
@@ -494,7 +462,7 @@ out:


 static int64_t nfs_client_open(NFSClient *client, QDict *options,
-                               int flags, int open_flags, Error **errp)
+                               int flags, Error **errp, int open_flags)
 {
    int ret = -EINVAL;
    QemuOpts *opts = NULL;
@@ -502,7 +470,6 @@ static int64_t nfs_client_open(NFSClient *client, QDict *options,
    struct stat st;
    char *file = NULL, *strp = NULL;

-    qemu_mutex_init(&client->mutex);
    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
    qemu_opts_absorb_qdict(opts, options, &local_err);
    if (local_err) {
@@ -539,32 +506,32 @@ static int64_t nfs_client_open(NFSClient *client, QDict *options,
        goto fail;
    }

-    if (qemu_opt_get(opts, "user")) {
-        client->uid = qemu_opt_get_number(opts, "user", 0);
+    if (qemu_opt_get(opts, "uid")) {
+        client->uid = qemu_opt_get_number(opts, "uid", 0);
        nfs_set_uid(client->context, client->uid);
    }

-    if (qemu_opt_get(opts, "group")) {
-        client->gid = qemu_opt_get_number(opts, "group", 0);
+    if (qemu_opt_get(opts, "gid")) {
+        client->gid = qemu_opt_get_number(opts, "gid", 0);
        nfs_set_gid(client->context, client->gid);
    }

-    if (qemu_opt_get(opts, "tcp-syn-count")) {
-        client->tcp_syncnt = qemu_opt_get_number(opts, "tcp-syn-count", 0);
+    if (qemu_opt_get(opts, "tcp-syncnt")) {
+        client->tcp_syncnt = qemu_opt_get_number(opts, "tcp-syncnt", 0);
        nfs_set_tcp_syncnt(client->context, client->tcp_syncnt);
    }

 #ifdef LIBNFS_FEATURE_READAHEAD
-    if (qemu_opt_get(opts, "readahead-size")) {
+    if (qemu_opt_get(opts, "readahead")) {
        if (open_flags & BDRV_O_NOCACHE) {
            error_setg(errp, "Cannot enable NFS readahead "
                             "if cache.direct = on");
            goto fail;
        }
-        client->readahead = qemu_opt_get_number(opts, "readahead-size", 0);
+        client->readahead = qemu_opt_get_number(opts, "readahead", 0);
        if (client->readahead > QEMU_NFS_MAX_READAHEAD_SIZE) {
-            warn_report("Truncating NFS readahead size to %d",
-                        QEMU_NFS_MAX_READAHEAD_SIZE);
+            error_report("NFS Warning: Truncating NFS readahead "
+                         "size to %d", QEMU_NFS_MAX_READAHEAD_SIZE);
            client->readahead = QEMU_NFS_MAX_READAHEAD_SIZE;
        }
        nfs_set_readahead(client->context, client->readahead);
@@ -576,16 +543,16 @@ static int64_t nfs_client_open(NFSClient *client, QDict *options,
 #endif

 #ifdef LIBNFS_FEATURE_PAGECACHE
-    if (qemu_opt_get(opts, "page-cache-size")) {
+    if (qemu_opt_get(opts, "pagecache")) {
        if (open_flags & BDRV_O_NOCACHE) {
            error_setg(errp, "Cannot enable NFS pagecache "
                             "if cache.direct = on");
            goto fail;
        }
-        client->pagecache = qemu_opt_get_number(opts, "page-cache-size", 0);
+        client->pagecache = qemu_opt_get_number(opts, "pagecache", 0);
        if (client->pagecache > QEMU_NFS_MAX_PAGECACHE_SIZE) {
-            warn_report("Truncating NFS pagecache size to %d pages",
-                        QEMU_NFS_MAX_PAGECACHE_SIZE);
+            error_report("NFS Warning: Truncating NFS pagecache "
+                         "size to %d pages", QEMU_NFS_MAX_PAGECACHE_SIZE);
            client->pagecache = QEMU_NFS_MAX_PAGECACHE_SIZE;
        }
        nfs_set_pagecache(client->context, client->pagecache);
@@ -600,8 +567,8 @@ static int64_t nfs_client_open(NFSClient *client, QDict *options,
        /* limit the maximum debug level to avoid potential flooding
         * of our log files. */
        if (client->debug > QEMU_NFS_MAX_DEBUG_LEVEL) {
-            warn_report("Limiting NFS debug level to %d",
-                        QEMU_NFS_MAX_DEBUG_LEVEL);
+            error_report("NFS Warning: Limiting NFS debug level "
+                         "to %d", QEMU_NFS_MAX_DEBUG_LEVEL);
            client->debug = QEMU_NFS_MAX_DEBUG_LEVEL;
        }
        nfs_set_debug(client->context, client->debug);
@@ -661,11 +628,10 @@ static int nfs_file_open(BlockDriverState *bs, QDict *options, int flags,

    ret = nfs_client_open(client, options,
                          (flags & BDRV_O_RDWR) ? O_RDWR : O_RDONLY,
-                          bs->open_flags, errp);
+                          errp, bs->open_flags);
    if (ret < 0) {
        return ret;
    }
-
    bs->total_sectors = ret;
    ret = 0;
    return ret;
@@ -703,7 +669,7 @@ static int nfs_file_create(const char *url, QemuOpts *opts, Error **errp)
        goto out;
    }

-    ret = nfs_client_open(client, options, O_CREAT, 0, errp);
+    ret = nfs_client_open(client, options, O_CREAT, errp, 0);
    if (ret < 0) {
        goto out;
    }
@@ -721,7 +687,6 @@ static int nfs_has_zero_init(BlockDriverState *bs)
    return client->has_zero_init;
 }

-/* Called (via nfs_service) with QemuMutex held.  */
 static void
 nfs_get_allocated_file_size_cb(int ret, struct nfs_context *nfs, void *data,
                               void *private_data)
@@ -734,9 +699,7 @@ nfs_get_allocated_file_size_cb(int ret, struct nfs_context *nfs, void *data,
    if (task->ret < 0) {
        error_report("NFS Error: %s", nfs_get_error(nfs));
    }
-
-    /* Set task->complete before reading bs->wakeup.  */
-    atomic_mb_set(&task->complete, 1);
+    task->complete = 1;
    bdrv_wakeup(task->bs);
 }

@@ -764,25 +727,10 @@ static int64_t nfs_get_allocated_file_size(BlockDriverState *bs)
    return (task.ret < 0 ? task.ret : st.st_blocks * 512);
 }

-static int nfs_file_truncate(BlockDriverState *bs, int64_t offset,
-                             PreallocMode prealloc, Error **errp)
+static int nfs_file_truncate(BlockDriverState *bs, int64_t offset)
 {
    NFSClient *client = bs->opaque;
-    int ret;
-
-    if (prealloc != PREALLOC_MODE_OFF) {
-        error_setg(errp, "Unsupported preallocation mode '%s'",
-                   PreallocMode_str(prealloc));
-        return -ENOTSUP;
-    }
-
-    ret = nfs_ftruncate(client->context, client->fh, offset);
-    if (ret < 0) {
-        error_setg_errno(errp, -ret, "Failed to truncate file");
-        return ret;
-    }
-
-    return 0;
+    return nfs_ftruncate(client->context, client->fh, offset);
 }

 /* Note that this will not re-establish a connection with the NFS server
@@ -826,7 +774,7 @@ static void nfs_refresh_filename(BlockDriverState *bs, QDict *options)
    QObject *server_qdict;
    Visitor *ov;

-    qdict_put_str(opts, "driver", "nfs");
+    qdict_put(opts, "driver", qstring_from_str("nfs"));

    if (client->uid && !client->gid) {
        snprintf(bs->exact_filename, sizeof(bs->exact_filename),
@@ -848,26 +796,31 @@ static void nfs_refresh_filename(BlockDriverState *bs, QDict *options)
    ov = qobject_output_visitor_new(&server_qdict);
    visit_type_NFSServer(ov, NULL, &client->server, &error_abort);
    visit_complete(ov, &server_qdict);
+    assert(qobject_type(server_qdict) == QTYPE_QDICT);
+
    qdict_put_obj(opts, "server", server_qdict);
-    qdict_put_str(opts, "path", client->path);
+    qdict_put(opts, "path", qstring_from_str(client->path));

    if (client->uid) {
-        qdict_put_int(opts, "user", client->uid);
+        qdict_put(opts, "uid", qint_from_int(client->uid));
    }
    if (client->gid) {
-        qdict_put_int(opts, "group", client->gid);
+        qdict_put(opts, "gid", qint_from_int(client->gid));
    }
    if (client->tcp_syncnt) {
-        qdict_put_int(opts, "tcp-syn-cnt", client->tcp_syncnt);
+        qdict_put(opts, "tcp-syncnt",
+                      qint_from_int(client->tcp_syncnt));
    }
    if (client->readahead) {
-        qdict_put_int(opts, "readahead-size", client->readahead);
+        qdict_put(opts, "readahead",
+                      qint_from_int(client->readahead));
    }
    if (client->pagecache) {
-        qdict_put_int(opts, "page-cache-size", client->pagecache);
+        qdict_put(opts, "pagecache",
+                      qint_from_int(client->pagecache));
    }
    if (client->debug) {
-        qdict_put_int(opts, "debug", client->debug);
+        qdict_put(opts, "debug", qint_from_int(client->debug));
    }

    visit_free(ov);
@@ -901,8 +854,8 @@ static BlockDriver bdrv_nfs = {
    .bdrv_create                    = nfs_file_create,
    .bdrv_reopen_prepare            = nfs_reopen_prepare,

-    .bdrv_co_preadv                 = nfs_co_preadv,
-    .bdrv_co_pwritev                = nfs_co_pwritev,
+    .bdrv_co_readv                  = nfs_co_readv,
+    .bdrv_co_writev                 = nfs_co_writev,
    .bdrv_co_flush_to_disk          = nfs_co_flush,

    .bdrv_detach_aio_context        = nfs_detach_aio_context,
--- a/block/null.c
+++ b/block/null.c
@@ -29,6 +29,11 @@ static QemuOptsList runtime_opts = {
    .name = "null",
    .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
    .desc = {
+        {
+            .name = "filename",
+            .type = QEMU_OPT_STRING,
+            .help = "",
+        },
        {
            .name = BLOCK_OPT_SIZE,
            .type = QEMU_OPT_SIZE,
@@ -49,30 +54,6 @@ static QemuOptsList runtime_opts = {
    },
 };

-static void null_co_parse_filename(const char *filename, QDict *options,
-                                   Error **errp)
-{
-    /* This functions only exists so that a null-co:// filename is accepted
-     * with the null-co driver. */
-    if (strcmp(filename, "null-co://")) {
-        error_setg(errp, "The only allowed filename for this driver is "
-                         "'null-co://'");
-        return;
-    }
-}
-
-static void null_aio_parse_filename(const char *filename, QDict *options,
-                                    Error **errp)
-{
-    /* This functions only exists so that a null-aio:// filename is accepted
-     * with the null-aio driver. */
-    if (strcmp(filename, "null-aio://")) {
-        error_setg(errp, "The only allowed filename for this driver is "
-                         "'null-aio://'");
-        return;
-    }
-}
-
 static int null_file_open(BlockDriverState *bs, QDict *options, int flags,
                          Error **errp)
 {
@@ -251,7 +232,7 @@ static void null_refresh_filename(BlockDriverState *bs, QDict *opts)
                 bs->drv->format_name);
    }

-    qdict_put_str(opts, "driver", bs->drv->format_name);
+    qdict_put(opts, "driver", qstring_from_str(bs->drv->format_name));
    bs->full_open_options = opts;
 }

@@ -261,7 +242,6 @@ static BlockDriver bdrv_null_co = {
    .instance_size          = sizeof(BDRVNullState),

    .bdrv_file_open         = null_file_open,
-    .bdrv_parse_filename    = null_co_parse_filename,
    .bdrv_close             = null_close,
    .bdrv_getlength         = null_getlength,

@@ -281,7 +261,6 @@ static BlockDriver bdrv_null_aio = {
    .instance_size          = sizeof(BDRVNullState),

    .bdrv_file_open         = null_file_open,
-    .bdrv_parse_filename    = null_aio_parse_filename,
    .bdrv_close             = null_close,
    .bdrv_getlength         = null_getlength,

--- a/block/parallels.c
+++ b/block/parallels.c
@@ -35,6 +35,7 @@
 #include "qemu/module.h"
 #include "qemu/bswap.h"
 #include "qemu/bitmap.h"
+#include "qapi/util.h"

 /**************************************************************/

@@ -68,14 +69,13 @@ typedef enum ParallelsPreallocMode {
    PRL_PREALLOC_MODE__MAX = 2,
 } ParallelsPreallocMode;

-static QEnumLookup prealloc_mode_lookup = {
-    .array = (const char *const[]) {
-        "falloc",
-        "truncate",
-    },
-    .size = PRL_PREALLOC_MODE__MAX
+static const char *prealloc_mode_lookup[] = {
+    "falloc",
+    "truncate",
+    NULL,
 };

+
 typedef struct BDRVParallelsState {
    /** Locking is conservative, the lock protects
     *   - image file extending (truncate, fallocate)
@@ -114,7 +114,7 @@ static QemuOptsList parallels_runtime_opts = {
            .name = PARALLELS_OPT_PREALLOC_SIZE,
            .type = QEMU_OPT_SIZE,
            .help = "Preallocation size on image expansion",
-            .def_value_str = "128M",
+            .def_value_str = "128MiB",
        },
        {
            .name = PARALLELS_OPT_PREALLOC_MODE,
@@ -192,7 +192,8 @@ static int64_t allocate_clusters(BlockDriverState *bs, int64_t sector_num,
                                 int nb_sectors, int *pnum)
 {
    BDRVParallelsState *s = bs->opaque;
-    int64_t pos, space, idx, to_allocate, i, len;
+    uint32_t idx, to_allocate, i;
+    int64_t pos, space;

    pos = block_status(s, sector_num, nb_sectors, pnum);
    if (pos > 0) {
@@ -200,25 +201,13 @@ static int64_t allocate_clusters(BlockDriverState *bs, int64_t sector_num,
    }

    idx = sector_num / s->tracks;
-    to_allocate = DIV_ROUND_UP(sector_num + *pnum, s->tracks) - idx;
-
-    /* This function is called only by parallels_co_writev(), which will never
-     * pass a sector_num at or beyond the end of the image (because the block
-     * layer never passes such a sector_num to that function). Therefore, idx
-     * is always below s->bat_size.
-     * block_status() will limit *pnum so that sector_num + *pnum will not
-     * exceed the image end. Therefore, idx + to_allocate cannot exceed
-     * s->bat_size.
-     * Note that s->bat_size is an unsigned int, therefore idx + to_allocate
-     * will always fit into a uint32_t. */
-    assert(idx < s->bat_size && idx + to_allocate <= s->bat_size);
-
-    space = to_allocate * s->tracks;
-    len = bdrv_getlength(bs->file->bs);
-    if (len < 0) {
-        return len;
+    if (idx >= s->bat_size) {
+        return -EINVAL;
    }
-    if (s->data_end + space > (len >> BDRV_SECTOR_BITS)) {
+
+    to_allocate = DIV_ROUND_UP(sector_num + *pnum, s->tracks) - idx;
+    space = to_allocate * s->tracks;
+    if (s->data_end + space > bdrv_getlength(bs->file->bs) >> BDRV_SECTOR_BITS) {
        int ret;
        space += s->prealloc_size;
        if (s->prealloc_mode == PRL_PREALLOC_MODE_FALLOCATE) {
@@ -226,9 +215,8 @@ static int64_t allocate_clusters(BlockDriverState *bs, int64_t sector_num,
                                     s->data_end << BDRV_SECTOR_BITS,
                                     space << BDRV_SECTOR_BITS, 0);
        } else {
-            ret = bdrv_truncate(bs->file,
-                                (s->data_end + space) << BDRV_SECTOR_BITS,
-                                PREALLOC_MODE_OFF, NULL);
+            ret = bdrv_truncate(bs->file->bs,
+                                (s->data_end + space) << BDRV_SECTOR_BITS);
        }
        if (ret < 0) {
            return ret;
@@ -461,11 +449,8 @@ static int parallels_check(BlockDriverState *bs, BdrvCheckResult *res,
                size - res->image_end_offset);
        res->leaks += count;
        if (fix & BDRV_FIX_LEAKS) {
-            Error *local_err = NULL;
-            ret = bdrv_truncate(bs->file, res->image_end_offset,
-                                PREALLOC_MODE_OFF, &local_err);
+            ret = bdrv_truncate(bs->file->bs, res->image_end_offset);
            if (ret < 0) {
-                error_report_err(local_err);
                res->check_errors++;
                return ret;
            }
@@ -503,8 +488,7 @@ static int parallels_create(const char *filename, QemuOpts *opts, Error **errp)
    }

    file = blk_new_open(filename, NULL, NULL,
-                        BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
-                        &local_err);
+                        BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
    if (file == NULL) {
        error_propagate(errp, local_err);
        return -EIO;
@@ -512,7 +496,7 @@ static int parallels_create(const char *filename, QemuOpts *opts, Error **errp)

    blk_set_allow_write_beyond_eof(file, true);

-    ret = blk_truncate(file, 0, PREALLOC_MODE_OFF, errp);
+    ret = blk_truncate(file, 0);
    if (ret < 0) {
        goto exit;
    }
@@ -597,12 +581,6 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
    Error *local_err = NULL;
    char *buf;

-    bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
-                               false, errp);
-    if (!bs->file) {
-        return -EINVAL;
-    }
-
    ret = bdrv_pread(bs->file, 0, &ph, sizeof(ph));
    if (ret < 0) {
        goto fail;
@@ -696,15 +674,14 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
        qemu_opt_get_size_del(opts, PARALLELS_OPT_PREALLOC_SIZE, 0);
    s->prealloc_size = MAX(s->tracks, s->prealloc_size >> BDRV_SECTOR_BITS);
    buf = qemu_opt_get_del(opts, PARALLELS_OPT_PREALLOC_MODE);
-    s->prealloc_mode = qapi_enum_parse(&prealloc_mode_lookup, buf,
-                                       PRL_PREALLOC_MODE_FALLOCATE,
-                                       &local_err);
+    s->prealloc_mode = qapi_enum_parse(prealloc_mode_lookup, buf,
+            PRL_PREALLOC_MODE__MAX, PRL_PREALLOC_MODE_FALLOCATE, &local_err);
    g_free(buf);
    if (local_err != NULL) {
        goto fail_options;
    }
-
-    if (!bdrv_has_zero_init(bs->file->bs)) {
+    if (!bdrv_has_zero_init(bs->file->bs) ||
+            bdrv_truncate(bs->file->bs, bdrv_getlength(bs->file->bs)) != 0) {
        s->prealloc_mode = PRL_PREALLOC_MODE_FALLOCATE;
    }

@@ -747,8 +724,7 @@ static void parallels_close(BlockDriverState *bs)
    }

    if (bs->open_flags & BDRV_O_RDWR) {
-        bdrv_truncate(bs->file, s->data_end << BDRV_SECTOR_BITS,
-                      PREALLOC_MODE_OFF, NULL);
+        bdrv_truncate(bs->file->bs, s->data_end << BDRV_SECTOR_BITS);
    }

    g_free(s->bat_dirty_bmap);
@@ -780,7 +756,6 @@ static BlockDriver bdrv_parallels = {
    .bdrv_probe		= parallels_probe,
    .bdrv_open		= parallels_open,
    .bdrv_close		= parallels_close,
-    .bdrv_child_perm          = bdrv_format_default_perms,
    .bdrv_co_get_block_status = parallels_co_get_block_status,
    .bdrv_has_zero_init       = bdrv_has_zero_init_1,
    .bdrv_co_flush_to_os      = parallels_co_flush_to_os,
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -45,7 +45,7 @@ BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk,
    info->ro                     = bs->read_only;
    info->drv                    = g_strdup(bs->drv->format_name);
    info->encrypted              = bs->encrypted;
-    info->encryption_key_missing = false;
+    info->encryption_key_missing = bdrv_key_required(bs);

    info->cache = g_new(BlockdevCacheInfo, 1);
    *info->cache = (BlockdevCacheInfo) {
@@ -64,13 +64,13 @@ BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk,
        info->backing_file = g_strdup(bs->backing_file);
    }

+    info->backing_file_depth = bdrv_get_backing_file_depth(bs);
    info->detect_zeroes = bs->detect_zeroes;

-    if (blk && blk_get_public(blk)->throttle_group_member.throttle_state) {
+    if (blk && blk_get_public(blk)->throttle_state) {
        ThrottleConfig cfg;
-        BlockBackendPublic *blkp = blk_get_public(blk);

-        throttle_group_get_config(&blkp->throttle_group_member, &cfg);
+        throttle_group_get_config(blk, &cfg);

        info->bps     = cfg.buckets[THROTTLE_BPS_TOTAL].avg;
        info->bps_rd  = cfg.buckets[THROTTLE_BPS_READ].avg;
@@ -118,15 +118,13 @@ BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk,
        info->iops_size = cfg.op_size;

        info->has_group = true;
-        info->group =
-            g_strdup(throttle_group_get_name(&blkp->throttle_group_member));
+        info->group = g_strdup(throttle_group_get_name(blk));
    }

    info->write_threshold = bdrv_write_threshold_get(bs);

    bs0 = bs;
    p_image_info = &info->image;
-    info->backing_file_depth = 0;
    while (1) {
        Error *local_err = NULL;
        bdrv_query_image_info(bs0, p_image_info, &local_err);
@@ -135,22 +133,13 @@ BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk,
            qapi_free_BlockDeviceInfo(info);
            return NULL;
        }
-
        if (bs0->drv && bs0->backing) {
-            info->backing_file_depth++;
            bs0 = bs0->backing->bs;
            (*p_image_info)->has_backing_image = true;
            p_image_info = &((*p_image_info)->backing_image);
        } else {
            break;
        }
-
-        /* Skip automatically inserted nodes that the user isn't aware of for
-         * query-block (blk != NULL), but not for query-named-block-nodes */
-        while (blk && bs0->drv && bs0->implicit) {
-            bs0 = backing_bs(bs0);
-            assert(bs0);
-        }
    }

    return info;
@@ -248,8 +237,8 @@ void bdrv_query_image_info(BlockDriverState *bs,

    size = bdrv_getlength(bs);
    if (size < 0) {
-        error_setg_errno(errp, -size, "Can't get image size '%s'",
-                         bs->exact_filename);
+        error_setg_errno(errp, -size, "Can't get size of device '%s'",
+                         bdrv_get_device_name(bs));
        goto out;
    }

@@ -333,26 +322,11 @@ static void bdrv_query_info(BlockBackend *blk, BlockInfo **p_info,
 {
    BlockInfo *info = g_malloc0(sizeof(*info));
    BlockDriverState *bs = blk_bs(blk);
-    char *qdev;
-
-    /* Skip automatically inserted nodes that the user isn't aware of */
-    while (bs && bs->drv && bs->implicit) {
-        bs = backing_bs(bs);
-    }
-
    info->device = g_strdup(blk_name(blk));
    info->type = g_strdup("unknown");
    info->locked = blk_dev_is_medium_locked(blk);
    info->removable = blk_dev_has_removable_media(blk);

-    qdev = blk_get_attached_dev_id(blk);
-    if (qdev && *qdev) {
-        info->has_qdev = true;
-        info->qdev = qdev;
-    } else {
-        g_free(qdev);
-    }
-
    if (blk_dev_has_tray(blk)) {
        info->has_tray_open = true;
        info->tray_open = blk_dev_is_tray_open(blk);
@@ -383,6 +357,10 @@ static void bdrv_query_info(BlockBackend *blk, BlockInfo **p_info,
    qapi_free_BlockInfo(info);
 }

+static BlockStats *bdrv_query_stats(BlockBackend *blk,
+                                    const BlockDriverState *bs,
+                                    bool query_backing);
+
 static void bdrv_query_blk_stats(BlockDeviceStats *ds, BlockBackend *blk)
 {
    BlockAcctStats *stats = blk_get_stats(blk);
@@ -450,41 +428,44 @@ static void bdrv_query_blk_stats(BlockDeviceStats *ds, BlockBackend *blk)
    }
 }

-static BlockStats *bdrv_query_bds_stats(BlockDriverState *bs,
-                                        bool blk_level)
+static void bdrv_query_bds_stats(BlockStats *s, const BlockDriverState *bs,
+                                 bool query_backing)
 {
-    BlockStats *s = NULL;
-
-    s = g_malloc0(sizeof(*s));
-    s->stats = g_malloc0(sizeof(*s->stats));
-
-    if (!bs) {
-        return s;
-    }
-
-    /* Skip automatically inserted nodes that the user isn't aware of in
-     * a BlockBackend-level command. Stay at the exact node for a node-level
-     * command. */
-    while (blk_level && bs->drv && bs->implicit) {
-        bs = backing_bs(bs);
-        assert(bs);
-    }
-
    if (bdrv_get_node_name(bs)[0]) {
        s->has_node_name = true;
        s->node_name = g_strdup(bdrv_get_node_name(bs));
    }

-    s->stats->wr_highest_offset = stat64_get(&bs->wr_highest_offset);
+    s->stats->wr_highest_offset = bs->wr_highest_offset;

    if (bs->file) {
        s->has_parent = true;
-        s->parent = bdrv_query_bds_stats(bs->file->bs, blk_level);
+        s->parent = bdrv_query_stats(NULL, bs->file->bs, query_backing);
    }

-    if (blk_level && bs->backing) {
+    if (query_backing && bs->backing) {
        s->has_backing = true;
-        s->backing = bdrv_query_bds_stats(bs->backing->bs, blk_level);
+        s->backing = bdrv_query_stats(NULL, bs->backing->bs, query_backing);
+    }
+
+}
+
+static BlockStats *bdrv_query_stats(BlockBackend *blk,
+                                    const BlockDriverState *bs,
+                                    bool query_backing)
+{
+    BlockStats *s;
+
+    s = g_malloc0(sizeof(*s));
+    s->stats = g_malloc0(sizeof(*s->stats));
+
+    if (blk) {
+        s->has_device = true;
+        s->device = g_strdup(blk_name(blk));
+        bdrv_query_blk_stats(s->stats, blk);
+    }
+    if (bs) {
+        bdrv_query_bds_stats(s, bs, query_backing);
    }

    return s;
@@ -496,14 +477,8 @@ BlockInfoList *qmp_query_block(Error **errp)
    BlockBackend *blk;
    Error *local_err = NULL;

-    for (blk = blk_all_next(NULL); blk; blk = blk_all_next(blk)) {
-        BlockInfoList *info;
-
-        if (!*blk_name(blk) && !blk_get_attached_dev(blk)) {
-            continue;
-        }
-
-        info = g_malloc0(sizeof(*info));
+    for (blk = blk_next(NULL); blk; blk = blk_next(blk)) {
+        BlockInfoList *info = g_malloc0(sizeof(*info));
        bdrv_query_info(blk, &info->value, &local_err);
        if (local_err) {
            error_propagate(errp, local_err);
@@ -519,44 +494,42 @@ BlockInfoList *qmp_query_block(Error **errp)
    return head;
 }

+static bool next_query_bds(BlockBackend **blk, BlockDriverState **bs,
+                           bool query_nodes)
+{
+    if (query_nodes) {
+        *bs = bdrv_next_node(*bs);
+        return !!*bs;
+    }
+
+    *blk = blk_next(*blk);
+    *bs = *blk ? blk_bs(*blk) : NULL;
+
+    return !!*blk;
+}
+
 BlockStatsList *qmp_query_blockstats(bool has_query_nodes,
                                     bool query_nodes,
                                     Error **errp)
 {
    BlockStatsList *head = NULL, **p_next = &head;
-    BlockBackend *blk;
-    BlockDriverState *bs;
+    BlockBackend *blk = NULL;
+    BlockDriverState *bs = NULL;

    /* Just to be safe if query_nodes is not always initialized */
-    if (has_query_nodes && query_nodes) {
-        for (bs = bdrv_next_node(NULL); bs; bs = bdrv_next_node(bs)) {
-            BlockStatsList *info = g_malloc0(sizeof(*info));
-            AioContext *ctx = bdrv_get_aio_context(bs);
+    query_nodes = has_query_nodes && query_nodes;

-            aio_context_acquire(ctx);
-            info->value = bdrv_query_bds_stats(bs, false);
-            aio_context_release(ctx);
+    while (next_query_bds(&blk, &bs, query_nodes)) {
+        BlockStatsList *info = g_malloc0(sizeof(*info));
+        AioContext *ctx = blk ? blk_get_aio_context(blk)
+                              : bdrv_get_aio_context(bs);

-            *p_next = info;
-            p_next = &info->next;
-        }
-    } else {
-        for (blk = blk_next(NULL); blk; blk = blk_next(blk)) {
-            BlockStatsList *info = g_malloc0(sizeof(*info));
-            AioContext *ctx = blk_get_aio_context(blk);
-            BlockStats *s;
+        aio_context_acquire(ctx);
+        info->value = bdrv_query_stats(blk, bs, !query_nodes);
+        aio_context_release(ctx);

-            aio_context_acquire(ctx);
-            s = bdrv_query_bds_stats(blk_bs(blk), true);
-            s->has_device = true;
-            s->device = g_strdup(blk_name(blk));
-            bdrv_query_blk_stats(s->stats, blk);
-            aio_context_release(ctx);
-
-            info->value = s;
-            *p_next = info;
-            p_next = &info->next;
-        }
+        *p_next = info;
+        p_next = &info->next;
    }

    return head;
@@ -635,11 +608,9 @@ static void dump_qobject(fprintf_function func_fprintf, void *f,
                         int comp_indent, QObject *obj)
 {
    switch (qobject_type(obj)) {
-        case QTYPE_QNUM: {
-            QNum *value = qobject_to_qnum(obj);
-            char *tmp = qnum_to_string(value);
-            func_fprintf(f, "%s", tmp);
-            g_free(tmp);
+        case QTYPE_QINT: {
+            QInt *value = qobject_to_qint(obj);
+            func_fprintf(f, "%" PRId64, qint_get_int(value));
            break;
        }
        case QTYPE_QSTRING: {
@@ -657,6 +628,11 @@ static void dump_qobject(fprintf_function func_fprintf, void *f,
            dump_qlist(func_fprintf, f, comp_indent, value);
            break;
        }
+        case QTYPE_QFLOAT: {
+            QFloat *value = qobject_to_qfloat(obj);
+            func_fprintf(f, "%g", qfloat_get_double(value));
+            break;
+        }
        case QTYPE_QBOOL: {
            QBool *value = qobject_to_qbool(obj);
            func_fprintf(f, "%s", qbool_get_bool(value) ? "true" : "false");
@@ -719,6 +695,7 @@ void bdrv_image_info_specific_dump(fprintf_function func_fprintf, void *f,

    visit_type_ImageInfoSpecific(v, NULL, &info_spec, &error_abort);
    visit_complete(v, &obj);
+    assert(qobject_type(obj) == QTYPE_QDICT);
    data = qdict_get(qobject_to_qdict(obj), "data");
    dump_qobject(func_fprintf, f, 1, data);
    qobject_decref(obj);
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -31,10 +31,8 @@
 #include "qemu/bswap.h"
 #include <zlib.h>
 #include "qapi/qmp/qerror.h"
-#include "qapi/qmp/qstring.h"
-#include "crypto/block.h"
-#include "migration/blocker.h"
-#include "block/crypto.h"
+#include "crypto/cipher.h"
+#include "migration/migration.h"

 /**************************************************************/
 /* QEMU COW block driver with compression and encryption support */
@@ -79,7 +77,7 @@ typedef struct BDRVQcowState {
    uint8_t *cluster_cache;
    uint8_t *cluster_data;
    uint64_t cluster_cache_offset;
-    QCryptoBlock *crypto; /* Disk encryption format driver */
+    QCryptoCipher *cipher; /* NULL if no key yet */
    uint32_t crypt_method_header;
    CoMutex lock;
    Error *migration_blocker;
@@ -99,15 +97,6 @@ static int qcow_probe(const uint8_t *buf, int buf_size, const char *filename)
        return 0;
 }

-static QemuOptsList qcow_runtime_opts = {
-    .name = "qcow",
-    .head = QTAILQ_HEAD_INITIALIZER(qcow_runtime_opts.head),
-    .desc = {
-        BLOCK_CRYPTO_OPT_DEF_QCOW_KEY_SECRET("encrypt."),
-        { /* end of list */ }
-    },
-};
-
 static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
                     Error **errp)
 {
@@ -115,21 +104,6 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
    unsigned int len, i, shift;
    int ret;
    QCowHeader header;
-    Error *local_err = NULL;
-    QCryptoBlockOpenOptions *crypto_opts = NULL;
-    unsigned int cflags = 0;
-    QDict *encryptopts = NULL;
-    const char *encryptfmt;
-
-    qdict_extract_subqdict(options, &encryptopts, "encrypt.");
-    encryptfmt = qdict_get_try_str(encryptopts, "format");
-
-    bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
-                               false, errp);
-    if (!bs->file) {
-        ret = -EINVAL;
-        goto fail;
-    }

    ret = bdrv_pread(bs->file, 0, &header, sizeof(header));
    if (ret < 0) {
@@ -174,6 +148,17 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
        goto fail;
    }

+    if (header.crypt_method > QCOW_CRYPT_AES) {
+        error_setg(errp, "invalid encryption method in qcow header");
+        ret = -EINVAL;
+        goto fail;
+    }
+    if (!qcrypto_cipher_supports(QCRYPTO_CIPHER_ALG_AES_128,
+                                 QCRYPTO_CIPHER_MODE_CBC)) {
+        error_setg(errp, "AES cipher not available");
+        ret = -EINVAL;
+        goto fail;
+    }
    s->crypt_method_header = header.crypt_method;
    if (s->crypt_method_header) {
        if (bdrv_uses_whitelist() &&
@@ -189,44 +174,8 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
            ret = -ENOSYS;
            goto fail;
        }
-        if (s->crypt_method_header == QCOW_CRYPT_AES) {
-            if (encryptfmt && !g_str_equal(encryptfmt, "aes")) {
-                error_setg(errp,
-                           "Header reported 'aes' encryption format but "
-                           "options specify '%s'", encryptfmt);
-                ret = -EINVAL;
-                goto fail;
-            }
-            qdict_del(encryptopts, "format");
-            crypto_opts = block_crypto_open_opts_init(
-                Q_CRYPTO_BLOCK_FORMAT_QCOW, encryptopts, errp);
-            if (!crypto_opts) {
-                ret = -EINVAL;
-                goto fail;
-            }

-            if (flags & BDRV_O_NO_IO) {
-                cflags |= QCRYPTO_BLOCK_OPEN_NO_IO;
-            }
-            s->crypto = qcrypto_block_open(crypto_opts, "encrypt.",
-                                           NULL, NULL, cflags, errp);
-            if (!s->crypto) {
-                ret = -EINVAL;
-                goto fail;
-            }
-        } else {
-            error_setg(errp, "invalid encryption method in qcow header");
-            ret = -EINVAL;
-            goto fail;
-        }
        bs->encrypted = true;
-    } else {
-        if (encryptfmt) {
-            error_setg(errp, "No encryption in image header, but options "
-                       "specified format '%s'", encryptfmt);
-            ret = -EINVAL;
-            goto fail;
-        }
    }
    s->cluster_bits = header.cluster_bits;
    s->cluster_size = 1 << s->cluster_bits;
@@ -303,15 +252,8 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
    error_setg(&s->migration_blocker, "The qcow format used by node '%s' "
               "does not support live migration",
               bdrv_get_device_or_node_name(bs));
-    ret = migrate_add_blocker(s->migration_blocker, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        error_free(s->migration_blocker);
-        goto fail;
-    }
+    migrate_add_blocker(s->migration_blocker);

-    QDECREF(encryptopts);
-    qapi_free_QCryptoBlockOpenOptions(crypto_opts);
    qemu_co_mutex_init(&s->lock);
    return 0;

@@ -320,9 +262,6 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
    qemu_vfree(s->l2_cache);
    g_free(s->cluster_cache);
    g_free(s->cluster_data);
-    qcrypto_block_free(s->crypto);
-    QDECREF(encryptopts);
-    qapi_free_QCryptoBlockOpenOptions(crypto_opts);
    return ret;
 }

@@ -335,6 +274,85 @@ static int qcow_reopen_prepare(BDRVReopenState *state,
    return 0;
 }

+static int qcow_set_key(BlockDriverState *bs, const char *key)
+{
+    BDRVQcowState *s = bs->opaque;
+    uint8_t keybuf[16];
+    int len, i;
+    Error *err;
+
+    memset(keybuf, 0, 16);
+    len = strlen(key);
+    if (len > 16)
+        len = 16;
+    /* XXX: we could compress the chars to 7 bits to increase
+       entropy */
+    for(i = 0;i < len;i++) {
+        keybuf[i] = key[i];
+    }
+    assert(bs->encrypted);
+
+    qcrypto_cipher_free(s->cipher);
+    s->cipher = qcrypto_cipher_new(
+        QCRYPTO_CIPHER_ALG_AES_128,
+        QCRYPTO_CIPHER_MODE_CBC,
+        keybuf, G_N_ELEMENTS(keybuf),
+        &err);
+
+    if (!s->cipher) {
+        /* XXX would be nice if errors in this method could
+         * be properly propagate to the caller. Would need
+         * the bdrv_set_key() API signature to be fixed. */
+        error_free(err);
+        return -1;
+    }
+    return 0;
+}
+
+/* The crypt function is compatible with the linux cryptoloop
+   algorithm for < 4 GB images. NOTE: out_buf == in_buf is
+   supported */
+static int encrypt_sectors(BDRVQcowState *s, int64_t sector_num,
+                           uint8_t *out_buf, const uint8_t *in_buf,
+                           int nb_sectors, bool enc, Error **errp)
+{
+    union {
+        uint64_t ll[2];
+        uint8_t b[16];
+    } ivec;
+    int i;
+    int ret;
+
+    for(i = 0; i < nb_sectors; i++) {
+        ivec.ll[0] = cpu_to_le64(sector_num);
+        ivec.ll[1] = 0;
+        if (qcrypto_cipher_setiv(s->cipher,
+                                 ivec.b, G_N_ELEMENTS(ivec.b),
+                                 errp) < 0) {
+            return -1;
+        }
+        if (enc) {
+            ret = qcrypto_cipher_encrypt(s->cipher,
+                                         in_buf,
+                                         out_buf,
+                                         512,
+                                         errp);
+        } else {
+            ret = qcrypto_cipher_decrypt(s->cipher,
+                                         in_buf,
+                                         out_buf,
+                                         512,
+                                         errp);
+        }
+        if (ret < 0) {
+            return -1;
+        }
+        sector_num++;
+        in_buf += 512;
+        out_buf += 512;
+    }
+    return 0;
+}

 /* 'allocate' is:
 *
@@ -347,22 +365,19 @@ static int qcow_reopen_prepare(BDRVReopenState *state,
 * 'compressed_size'. 'compressed_size' must be > 0 and <
 * cluster_size
 *
- * return 0 if not allocated, 1 if *result is assigned, and negative
- * errno on failure.
+ * return 0 if not allocated.
 */
-static int get_cluster_offset(BlockDriverState *bs,
-                              uint64_t offset, int allocate,
-                              int compressed_size,
-                              int n_start, int n_end, uint64_t *result)
+static uint64_t get_cluster_offset(BlockDriverState *bs,
+                                   uint64_t offset, int allocate,
+                                   int compressed_size,
+                                   int n_start, int n_end)
 {
    BDRVQcowState *s = bs->opaque;
-    int min_index, i, j, l1_index, l2_index, ret;
-    int64_t l2_offset;
-    uint64_t *l2_table, cluster_offset, tmp;
+    int min_index, i, j, l1_index, l2_index;
+    uint64_t l2_offset, *l2_table, cluster_offset, tmp;
    uint32_t min_count;
    int new_l2_table;

-    *result = 0;
    l1_index = offset >> (s->l2_bits + s->cluster_bits);
    l2_offset = s->l1_table[l1_index];
    new_l2_table = 0;
@@ -371,20 +386,15 @@ static int get_cluster_offset(BlockDriverState *bs,
            return 0;
        /* allocate a new l2 entry */
        l2_offset = bdrv_getlength(bs->file->bs);
-        if (l2_offset < 0) {
-            return l2_offset;
-        }
        /* round to cluster size */
-        l2_offset = QEMU_ALIGN_UP(l2_offset, s->cluster_size);
+        l2_offset = (l2_offset + s->cluster_size - 1) & ~(s->cluster_size - 1);
        /* update the L1 entry */
        s->l1_table[l1_index] = l2_offset;
        tmp = cpu_to_be64(l2_offset);
-        ret = bdrv_pwrite_sync(bs->file,
-                               s->l1_table_offset + l1_index * sizeof(tmp),
-                               &tmp, sizeof(tmp));
-        if (ret < 0) {
-            return ret;
-        }
+        if (bdrv_pwrite_sync(bs->file,
+                s->l1_table_offset + l1_index * sizeof(tmp),
+                &tmp, sizeof(tmp)) < 0)
+            return 0;
        new_l2_table = 1;
    }
    for(i = 0; i < L2_CACHE_SIZE; i++) {
@@ -411,17 +421,14 @@ static int get_cluster_offset(BlockDriverState *bs,
    l2_table = s->l2_cache + (min_index << s->l2_bits);
    if (new_l2_table) {
        memset(l2_table, 0, s->l2_size * sizeof(uint64_t));
-        ret = bdrv_pwrite_sync(bs->file, l2_offset, l2_table,
-                               s->l2_size * sizeof(uint64_t));
-        if (ret < 0) {
-            return ret;
-        }
+        if (bdrv_pwrite_sync(bs->file, l2_offset, l2_table,
+                s->l2_size * sizeof(uint64_t)) < 0)
+            return 0;
    } else {
-        ret = bdrv_pread(bs->file, l2_offset, l2_table,
-                         s->l2_size * sizeof(uint64_t));
-        if (ret < 0) {
-            return ret;
-        }
+        if (bdrv_pread(bs->file, l2_offset, l2_table,
+                       s->l2_size * sizeof(uint64_t)) !=
+            s->l2_size * sizeof(uint64_t))
+            return 0;
    }
    s->l2_cache_offsets[min_index] = l2_offset;
    s->l2_cache_counts[min_index] = 1;
@@ -438,58 +445,46 @@ static int get_cluster_offset(BlockDriverState *bs,
            /* if the cluster is already compressed, we must
               decompress it in the case it is not completely
               overwritten */
-            if (decompress_cluster(bs, cluster_offset) < 0) {
-                return -EIO;
-            }
+            if (decompress_cluster(bs, cluster_offset) < 0)
+                return 0;
            cluster_offset = bdrv_getlength(bs->file->bs);
-            if ((int64_t) cluster_offset < 0) {
-                return cluster_offset;
-            }
-            cluster_offset = QEMU_ALIGN_UP(cluster_offset, s->cluster_size);
+            cluster_offset = (cluster_offset + s->cluster_size - 1) &
+                ~(s->cluster_size - 1);
            /* write the cluster content */
-            ret = bdrv_pwrite(bs->file, cluster_offset, s->cluster_cache,
-                              s->cluster_size);
-            if (ret < 0) {
-                return ret;
-            }
+            if (bdrv_pwrite(bs->file, cluster_offset, s->cluster_cache,
+                            s->cluster_size) !=
+                s->cluster_size)
+                return -1;
        } else {
            cluster_offset = bdrv_getlength(bs->file->bs);
-            if ((int64_t) cluster_offset < 0) {
-                return cluster_offset;
-            }
            if (allocate == 1) {
                /* round to cluster size */
-                cluster_offset = QEMU_ALIGN_UP(cluster_offset, s->cluster_size);
-                if (cluster_offset + s->cluster_size > INT64_MAX) {
-                    return -E2BIG;
-                }
-                ret = bdrv_truncate(bs->file, cluster_offset + s->cluster_size,
-                                    PREALLOC_MODE_OFF, NULL);
-                if (ret < 0) {
-                    return ret;
-                }
+                cluster_offset = (cluster_offset + s->cluster_size - 1) &
+                    ~(s->cluster_size - 1);
+                bdrv_truncate(bs->file->bs, cluster_offset + s->cluster_size);
                /* if encrypted, we must initialize the cluster
                   content which won't be written */
                if (bs->encrypted &&
                    (n_end - n_start) < s->cluster_sectors) {
                    uint64_t start_sect;
-                    assert(s->crypto);
+                    assert(s->cipher);
                    start_sect = (offset & ~(s->cluster_size - 1)) >> 9;
+                    memset(s->cluster_data + 512, 0x00, 512);
                    for(i = 0; i < s->cluster_sectors; i++) {
                        if (i < n_start || i >= n_end) {
-                            memset(s->cluster_data, 0x00, 512);
-                            if (qcrypto_block_encrypt(s->crypto, start_sect + i,
-                                                      s->cluster_data,
-                                                      BDRV_SECTOR_SIZE,
-                                                      NULL) < 0) {
-                                return -EIO;
-                            }
-                            ret = bdrv_pwrite(bs->file,
-                                              cluster_offset + i * 512,
-                                              s->cluster_data, 512);
-                            if (ret < 0) {
-                                return ret;
+                            Error *err = NULL;
+                            if (encrypt_sectors(s, start_sect + i,
+                                                s->cluster_data,
+                                                s->cluster_data + 512, 1,
+                                                true, &err) < 0) {
+                                error_free(err);
+                                errno = EIO;
+                                return -1;
                            }
+                            if (bdrv_pwrite(bs->file,
+                                            cluster_offset + i * 512,
+                                            s->cluster_data, 512) != 512)
+                                return -1;
                        }
                    }
                }
@@ -501,29 +496,23 @@ static int get_cluster_offset(BlockDriverState *bs,
        /* update L2 table */
        tmp = cpu_to_be64(cluster_offset);
        l2_table[l2_index] = tmp;
-        ret = bdrv_pwrite_sync(bs->file, l2_offset + l2_index * sizeof(tmp),
-                               &tmp, sizeof(tmp));
-        if (ret < 0) {
-            return ret;
-        }
+        if (bdrv_pwrite_sync(bs->file, l2_offset + l2_index * sizeof(tmp),
+                &tmp, sizeof(tmp)) < 0)
+            return 0;
    }
-    *result = cluster_offset;
-    return 1;
+    return cluster_offset;
 }

 static int64_t coroutine_fn qcow_co_get_block_status(BlockDriverState *bs,
        int64_t sector_num, int nb_sectors, int *pnum, BlockDriverState **file)
 {
    BDRVQcowState *s = bs->opaque;
-    int index_in_cluster, n, ret;
+    int index_in_cluster, n;
    uint64_t cluster_offset;

    qemu_co_mutex_lock(&s->lock);
-    ret = get_cluster_offset(bs, sector_num << 9, 0, 0, 0, 0, &cluster_offset);
+    cluster_offset = get_cluster_offset(bs, sector_num << 9, 0, 0, 0, 0);
    qemu_co_mutex_unlock(&s->lock);
-    if (ret < 0) {
-        return ret;
-    }
    index_in_cluster = sector_num & (s->cluster_sectors - 1);
    n = s->cluster_sectors - index_in_cluster;
    if (n > nb_sectors)
@@ -532,7 +521,7 @@ static int64_t coroutine_fn qcow_co_get_block_status(BlockDriverState *bs,
    if (!cluster_offset) {
        return 0;
    }
-    if ((cluster_offset & QCOW_OFLAG_COMPRESSED) || s->crypto) {
+    if ((cluster_offset & QCOW_OFLAG_COMPRESSED) || s->cipher) {
        return BDRV_BLOCK_DATA;
    }
    cluster_offset |= (index_in_cluster << BDRV_SECTOR_BITS);
@@ -600,6 +589,7 @@ static coroutine_fn int qcow_co_readv(BlockDriverState *bs, int64_t sector_num,
    QEMUIOVector hd_qiov;
    uint8_t *buf;
    void *orig_buf;
+    Error *err = NULL;

    if (qiov->niov > 1) {
        buf = orig_buf = qemu_try_blockalign(bs, qiov->size);
@@ -615,11 +605,8 @@ static coroutine_fn int qcow_co_readv(BlockDriverState *bs, int64_t sector_num,

    while (nb_sectors != 0) {
        /* prepare next request */
-        ret = get_cluster_offset(bs, sector_num << 9,
-                                 0, 0, 0, 0, &cluster_offset);
-        if (ret < 0) {
-            break;
-        }
+        cluster_offset = get_cluster_offset(bs, sector_num << 9,
+                                                 0, 0, 0, 0);
        index_in_cluster = sector_num & (s->cluster_sectors - 1);
        n = s->cluster_sectors - index_in_cluster;
        if (n > nb_sectors) {
@@ -636,7 +623,7 @@ static coroutine_fn int qcow_co_readv(BlockDriverState *bs, int64_t sector_num,
                ret = bdrv_co_readv(bs->backing, sector_num, n, &hd_qiov);
                qemu_co_mutex_lock(&s->lock);
                if (ret < 0) {
-                    break;
+                    goto fail;
                }
            } else {
                /* Note: in this case, no need to wait */
@@ -645,15 +632,13 @@ static coroutine_fn int qcow_co_readv(BlockDriverState *bs, int64_t sector_num,
        } else if (cluster_offset & QCOW_OFLAG_COMPRESSED) {
            /* add AIO support for compressed blocks ? */
            if (decompress_cluster(bs, cluster_offset) < 0) {
-                ret = -EIO;
-                break;
+                goto fail;
            }
            memcpy(buf,
                   s->cluster_cache + index_in_cluster * 512, 512 * n);
        } else {
            if ((cluster_offset & 511) != 0) {
-                ret = -EIO;
-                break;
+                goto fail;
            }
            hd_iov.iov_base = (void *)buf;
            hd_iov.iov_len = n * 512;
@@ -667,11 +652,10 @@ static coroutine_fn int qcow_co_readv(BlockDriverState *bs, int64_t sector_num,
                break;
            }
            if (bs->encrypted) {
-                assert(s->crypto);
-                if (qcrypto_block_decrypt(s->crypto, sector_num, buf,
-                                          n * BDRV_SECTOR_SIZE, NULL) < 0) {
-                    ret = -EIO;
-                    break;
+                assert(s->cipher);
+                if (encrypt_sectors(s, sector_num, buf, buf,
+                                    n, false, &err) < 0) {
+                    goto fail;
                }
            }
        }
@@ -682,6 +666,7 @@ static coroutine_fn int qcow_co_readv(BlockDriverState *bs, int64_t sector_num,
        buf += n * 512;
    }

+done:
    qemu_co_mutex_unlock(&s->lock);

    if (qiov->niov > 1) {
@@ -690,6 +675,11 @@ static coroutine_fn int qcow_co_readv(BlockDriverState *bs, int64_t sector_num,
    }

    return ret;
+
+fail:
+    error_free(err);
+    ret = -EIO;
+    goto done;
 }

 static coroutine_fn int qcow_co_writev(BlockDriverState *bs, int64_t sector_num,
@@ -698,7 +688,9 @@ static coroutine_fn int qcow_co_writev(BlockDriverState *bs, int64_t sector_num,
    BDRVQcowState *s = bs->opaque;
    int index_in_cluster;
    uint64_t cluster_offset;
+    const uint8_t *src_buf;
    int ret = 0, n;
+    uint8_t *cluster_data = NULL;
    struct iovec hd_iov;
    QEMUIOVector hd_qiov;
    uint8_t *buf;
@@ -706,9 +698,7 @@ static coroutine_fn int qcow_co_writev(BlockDriverState *bs, int64_t sector_num,

    s->cluster_cache_offset = -1; /* disable compressed cache */

-    /* We must always copy the iov when encrypting, so we
-     * don't modify the original data buffer during encryption */
-    if (bs->encrypted || qiov->niov > 1) {
+    if (qiov->niov > 1) {
        buf = orig_buf = qemu_try_blockalign(bs, qiov->size);
        if (buf == NULL) {
            return -ENOMEM;
@@ -728,26 +718,31 @@ static coroutine_fn int qcow_co_writev(BlockDriverState *bs, int64_t sector_num,
        if (n > nb_sectors) {
            n = nb_sectors;
        }
-        ret = get_cluster_offset(bs, sector_num << 9, 1, 0,
-                                 index_in_cluster,
-                                 index_in_cluster + n, &cluster_offset);
-        if (ret < 0) {
-            break;
-        }
+        cluster_offset = get_cluster_offset(bs, sector_num << 9, 1, 0,
+                                            index_in_cluster,
+                                            index_in_cluster + n);
        if (!cluster_offset || (cluster_offset & 511) != 0) {
            ret = -EIO;
            break;
        }
        if (bs->encrypted) {
-            assert(s->crypto);
-            if (qcrypto_block_encrypt(s->crypto, sector_num, buf,
-                                      n * BDRV_SECTOR_SIZE, NULL) < 0) {
+            Error *err = NULL;
+            assert(s->cipher);
+            if (!cluster_data) {
+                cluster_data = g_malloc0(s->cluster_size);
+            }
+            if (encrypt_sectors(s, sector_num, cluster_data, buf,
+                                n, true, &err) < 0) {
+                error_free(err);
                ret = -EIO;
                break;
            }
+            src_buf = cluster_data;
+        } else {
+            src_buf = buf;
        }

-        hd_iov.iov_base = (void *)buf;
+        hd_iov.iov_base = (void *)src_buf;
        hd_iov.iov_len = n * 512;
        qemu_iovec_init_external(&hd_qiov, &hd_iov, 1);
        qemu_co_mutex_unlock(&s->lock);
@@ -766,7 +761,10 @@ static coroutine_fn int qcow_co_writev(BlockDriverState *bs, int64_t sector_num,
    }
    qemu_co_mutex_unlock(&s->lock);

-    qemu_vfree(orig_buf);
+    if (qiov->niov > 1) {
+        qemu_vfree(orig_buf);
+    }
+    g_free(cluster_data);

    return ret;
 }
@@ -775,8 +773,8 @@ static void qcow_close(BlockDriverState *bs)
 {
    BDRVQcowState *s = bs->opaque;

-    qcrypto_block_free(s->crypto);
-    s->crypto = NULL;
+    qcrypto_cipher_free(s->cipher);
+    s->cipher = NULL;
    g_free(s->l1_table);
    qemu_vfree(s->l2_cache);
    g_free(s->cluster_cache);
@@ -793,35 +791,17 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp)
    uint8_t *tmp;
    int64_t total_size = 0;
    char *backing_file = NULL;
+    int flags = 0;
    Error *local_err = NULL;
    int ret;
    BlockBackend *qcow_blk;
-    char *encryptfmt = NULL;
-    QDict *options;
-    QDict *encryptopts = NULL;
-    QCryptoBlockCreateOptions *crypto_opts = NULL;
-    QCryptoBlock *crypto = NULL;

    /* Read out options */
    total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
                          BDRV_SECTOR_SIZE);
-    if (total_size == 0) {
-        error_setg(errp, "Image size is too small, cannot be zero length");
-        ret = -EINVAL;
-        goto cleanup;
-    }
-
    backing_file = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FILE);
-    encryptfmt = qemu_opt_get_del(opts, BLOCK_OPT_ENCRYPT_FORMAT);
-    if (encryptfmt) {
-        if (qemu_opt_get(opts, BLOCK_OPT_ENCRYPT)) {
-            error_setg(errp, "Options " BLOCK_OPT_ENCRYPT " and "
-                       BLOCK_OPT_ENCRYPT_FORMAT " are mutually exclusive");
-            ret = -EINVAL;
-            goto cleanup;
-        }
-    } else if (qemu_opt_get_bool_del(opts, BLOCK_OPT_ENCRYPT, false)) {
-        encryptfmt = g_strdup("aes");
+    if (qemu_opt_get_bool_del(opts, BLOCK_OPT_ENCRYPT, false)) {
+        flags |= BLOCK_FLAG_ENCRYPT;
    }

    ret = bdrv_create_file(filename, opts, &local_err);
@@ -831,8 +811,7 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp)
    }

    qcow_blk = blk_new_open(filename, NULL, NULL,
-                            BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
-                            &local_err);
+                            BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
    if (qcow_blk == NULL) {
        error_propagate(errp, local_err);
        ret = -EIO;
@@ -841,7 +820,7 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp)

    blk_set_allow_write_beyond_eof(qcow_blk, true);

-    ret = blk_truncate(qcow_blk, 0, PREALLOC_MODE_OFF, errp);
+    ret = blk_truncate(qcow_blk, 0);
    if (ret < 0) {
        goto exit;
    }
@@ -860,7 +839,6 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp)
            header_size += backing_filename_len;
        } else {
            /* special backing file for vvfat */
-            g_free(backing_file);
            backing_file = NULL;
        }
        header.cluster_bits = 9; /* 512 byte cluster to avoid copying
@@ -875,32 +853,8 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp)
    l1_size = (total_size + (1LL << shift) - 1) >> shift;

    header.l1_table_offset = cpu_to_be64(header_size);
-
-    options = qemu_opts_to_qdict(opts, NULL);
-    qdict_extract_subqdict(options, &encryptopts, "encrypt.");
-    QDECREF(options);
-    if (encryptfmt) {
-        if (!g_str_equal(encryptfmt, "aes")) {
-            error_setg(errp, "Unknown encryption format '%s', expected 'aes'",
-                       encryptfmt);
-            ret = -EINVAL;
-            goto exit;
-        }
+    if (flags & BLOCK_FLAG_ENCRYPT) {
        header.crypt_method = cpu_to_be32(QCOW_CRYPT_AES);
-
-        crypto_opts = block_crypto_create_opts_init(
-            Q_CRYPTO_BLOCK_FORMAT_QCOW, encryptopts, errp);
-        if (!crypto_opts) {
-            ret = -EINVAL;
-            goto exit;
-        }
-
-        crypto = qcrypto_block_create(crypto_opts, "encrypt.",
-                                      NULL, NULL, NULL, errp);
-        if (!crypto) {
-            ret = -EINVAL;
-            goto exit;
-        }
    } else {
        header.crypt_method = cpu_to_be32(QCOW_CRYPT_NONE);
    }
@@ -935,10 +889,6 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp)
 exit:
    blk_unref(qcow_blk);
 cleanup:
-    QDECREF(encryptopts);
-    g_free(encryptfmt);
-    qcrypto_block_free(crypto);
-    qapi_free_QCryptoBlockCreateOptions(crypto_opts);
    g_free(backing_file);
    return ret;
 }
@@ -953,8 +903,7 @@ static int qcow_make_empty(BlockDriverState *bs)
    if (bdrv_pwrite_sync(bs->file, s->l1_table_offset, s->l1_table,
            l1_length) < 0)
        return -1;
-    ret = bdrv_truncate(bs->file, s->l1_table_offset + l1_length,
-                        PREALLOC_MODE_OFF, NULL);
+    ret = bdrv_truncate(bs->file->bs, s->l1_table_offset + l1_length);
    if (ret < 0)
        return ret;

@@ -1029,11 +978,8 @@ qcow_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
        goto success;
    }
    qemu_co_mutex_lock(&s->lock);
-    ret = get_cluster_offset(bs, offset, 2, out_len, 0, 0, &cluster_offset);
+    cluster_offset = get_cluster_offset(bs, offset, 2, out_len, 0, 0);
    qemu_co_mutex_unlock(&s->lock);
-    if (ret < 0) {
-        goto fail;
-    }
    if (cluster_offset == 0) {
        ret = -EIO;
        goto fail;
@@ -1081,15 +1027,9 @@ static QemuOptsList qcow_create_opts = {
        {
            .name = BLOCK_OPT_ENCRYPT,
            .type = QEMU_OPT_BOOL,
-            .help = "Encrypt the image with format 'aes'. (Deprecated "
-                    "in favor of " BLOCK_OPT_ENCRYPT_FORMAT "=aes)",
+            .help = "Encrypt the image",
+            .def_value_str = "off"
        },
-        {
-            .name = BLOCK_OPT_ENCRYPT_FORMAT,
-            .type = QEMU_OPT_STRING,
-            .help = "Encrypt the image, format choices: 'aes'",
-        },
-        BLOCK_CRYPTO_OPT_DEF_QCOW_KEY_SECRET("encrypt."),
        { /* end of list */ }
    }
 };
@@ -1100,7 +1040,6 @@ static BlockDriver bdrv_qcow = {
    .bdrv_probe		= qcow_probe,
    .bdrv_open		= qcow_open,
    .bdrv_close		= qcow_close,
-    .bdrv_child_perm        = bdrv_format_default_perms,
    .bdrv_reopen_prepare    = qcow_reopen_prepare,
    .bdrv_create            = qcow_create,
    .bdrv_has_zero_init     = bdrv_has_zero_init_1,
@@ -1110,6 +1049,7 @@ static BlockDriver bdrv_qcow = {
    .bdrv_co_writev         = qcow_co_writev,
    .bdrv_co_get_block_status   = qcow_co_get_block_status,

+    .bdrv_set_key           = qcow_set_key,
    .bdrv_make_empty        = qcow_make_empty,
    .bdrv_co_pwritev_compressed = qcow_co_pwritev_compressed,
    .bdrv_get_info          = qcow_get_info,
--- a/block/qcow2-bitmap.c
+++ b/block/qcow2-bitmap.c
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -61,7 +61,7 @@ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
            new_l1_size = 1;
        }
        while (min_size > new_l1_size) {
-            new_l1_size = DIV_ROUND_UP(new_l1_size * 3, 2);
+            new_l1_size = (new_l1_size * 3 + 1) / 2;
        }
    }

@@ -309,19 +309,14 @@ static int count_contiguous_clusters(int nb_clusters, int cluster_size,
        uint64_t *l2_table, uint64_t stop_flags)
 {
    int i;
-    QCow2ClusterType first_cluster_type;
    uint64_t mask = stop_flags | L2E_OFFSET_MASK | QCOW_OFLAG_COMPRESSED;
    uint64_t first_entry = be64_to_cpu(l2_table[0]);
    uint64_t offset = first_entry & mask;

-    if (!offset) {
+    if (!offset)
        return 0;
-    }

-    /* must be allocated */
-    first_cluster_type = qcow2_get_cluster_type(first_entry);
-    assert(first_cluster_type == QCOW2_CLUSTER_NORMAL ||
-           first_cluster_type == QCOW2_CLUSTER_ZERO_ALLOC);
+    assert(qcow2_get_cluster_type(first_entry) == QCOW2_CLUSTER_NORMAL);

    for (i = 0; i < nb_clusters; i++) {
        uint64_t l2_entry = be64_to_cpu(l2_table[i]) & mask;
@@ -333,21 +328,14 @@ static int count_contiguous_clusters(int nb_clusters, int cluster_size,
 	return i;
 }

-/*
- * Checks how many consecutive unallocated clusters in a given L2
- * table have the same cluster type.
- */
-static int count_contiguous_clusters_unallocated(int nb_clusters,
-                                                 uint64_t *l2_table,
-                                                 QCow2ClusterType wanted_type)
+static int count_contiguous_clusters_by_type(int nb_clusters,
+                                             uint64_t *l2_table,
+                                             int wanted_type)
 {
    int i;

-    assert(wanted_type == QCOW2_CLUSTER_ZERO_PLAIN ||
-           wanted_type == QCOW2_CLUSTER_UNALLOCATED);
    for (i = 0; i < nb_clusters; i++) {
-        uint64_t entry = be64_to_cpu(l2_table[i]);
-        QCow2ClusterType type = qcow2_get_cluster_type(entry);
+        int type = qcow2_get_cluster_type(be64_to_cpu(l2_table[i]));

        if (type != wanted_type) {
            break;
@@ -357,21 +345,76 @@ static int count_contiguous_clusters_unallocated(int nb_clusters,
    return i;
 }

-static int coroutine_fn do_perform_cow_read(BlockDriverState *bs,
-                                            uint64_t src_cluster_offset,
-                                            unsigned offset_in_cluster,
-                                            QEMUIOVector *qiov)
+/* The crypt function is compatible with the linux cryptoloop
+   algorithm for < 4 GB images. NOTE: out_buf == in_buf is
+   supported */
+int qcow2_encrypt_sectors(BDRVQcow2State *s, int64_t sector_num,
+                          uint8_t *out_buf, const uint8_t *in_buf,
+                          int nb_sectors, bool enc,
+                          Error **errp)
 {
+    union {
+        uint64_t ll[2];
+        uint8_t b[16];
+    } ivec;
+    int i;
    int ret;

-    if (qiov->size == 0) {
-        return 0;
+    for(i = 0; i < nb_sectors; i++) {
+        ivec.ll[0] = cpu_to_le64(sector_num);
+        ivec.ll[1] = 0;
+        if (qcrypto_cipher_setiv(s->cipher,
+                                 ivec.b, G_N_ELEMENTS(ivec.b),
+                                 errp) < 0) {
+            return -1;
+        }
+        if (enc) {
+            ret = qcrypto_cipher_encrypt(s->cipher,
+                                         in_buf,
+                                         out_buf,
+                                         512,
+                                         errp);
+        } else {
+            ret = qcrypto_cipher_decrypt(s->cipher,
+                                         in_buf,
+                                         out_buf,
+                                         512,
+                                         errp);
+        }
+        if (ret < 0) {
+            return -1;
+        }
+        sector_num++;
+        in_buf += 512;
+        out_buf += 512;
    }
+    return 0;
+}
+
+static int coroutine_fn do_perform_cow(BlockDriverState *bs,
+                                       uint64_t src_cluster_offset,
+                                       uint64_t cluster_offset,
+                                       int offset_in_cluster,
+                                       int bytes)
+{
+    BDRVQcow2State *s = bs->opaque;
+    QEMUIOVector qiov;
+    struct iovec iov;
+    int ret;
+
+    iov.iov_len = bytes;
+    iov.iov_base = qemu_try_blockalign(bs, iov.iov_len);
+    if (iov.iov_base == NULL) {
+        return -ENOMEM;
+    }
+
+    qemu_iovec_init_external(&qiov, &iov, 1);

    BLKDBG_EVENT(bs->file, BLKDBG_COW_READ);

    if (!bs->drv) {
-        return -ENOMEDIUM;
+        ret = -ENOMEDIUM;
+        goto out;
    }

    /* Call .bdrv_co_readv() directly instead of using the public block-layer
@@ -379,63 +422,43 @@ static int coroutine_fn do_perform_cow_read(BlockDriverState *bs,
     * which can lead to deadlock when block layer copy-on-read is enabled.
     */
    ret = bs->drv->bdrv_co_preadv(bs, src_cluster_offset + offset_in_cluster,
-                                  qiov->size, qiov, 0);
+                                  bytes, &qiov, 0);
    if (ret < 0) {
-        return ret;
+        goto out;
    }

-    return 0;
-}
-
-static bool coroutine_fn do_perform_cow_encrypt(BlockDriverState *bs,
-                                                uint64_t src_cluster_offset,
-                                                uint64_t cluster_offset,
-                                                unsigned offset_in_cluster,
-                                                uint8_t *buffer,
-                                                unsigned bytes)
-{
-    if (bytes && bs->encrypted) {
-        BDRVQcow2State *s = bs->opaque;
-        int64_t sector = (s->crypt_physical_offset ?
-                          (cluster_offset + offset_in_cluster) :
-                          (src_cluster_offset + offset_in_cluster))
+    if (bs->encrypted) {
+        Error *err = NULL;
+        int64_t sector = (src_cluster_offset + offset_in_cluster)
                         >> BDRV_SECTOR_BITS;
+        assert(s->cipher);
        assert((offset_in_cluster & ~BDRV_SECTOR_MASK) == 0);
        assert((bytes & ~BDRV_SECTOR_MASK) == 0);
-        assert(s->crypto);
-        if (qcrypto_block_encrypt(s->crypto, sector, buffer,
-                                  bytes, NULL) < 0) {
-            return false;
+        if (qcow2_encrypt_sectors(s, sector, iov.iov_base, iov.iov_base,
+                                  bytes >> BDRV_SECTOR_BITS, true, &err) < 0) {
+            ret = -EIO;
+            error_free(err);
+            goto out;
        }
    }
-    return true;
-}
-
-static int coroutine_fn do_perform_cow_write(BlockDriverState *bs,
-                                             uint64_t cluster_offset,
-                                             unsigned offset_in_cluster,
-                                             QEMUIOVector *qiov)
-{
-    int ret;
-
-    if (qiov->size == 0) {
-        return 0;
-    }

    ret = qcow2_pre_write_overlap_check(bs, 0,
-            cluster_offset + offset_in_cluster, qiov->size);
+            cluster_offset + offset_in_cluster, bytes);
    if (ret < 0) {
-        return ret;
+        goto out;
    }

    BLKDBG_EVENT(bs->file, BLKDBG_COW_WRITE);
    ret = bdrv_co_pwritev(bs->file, cluster_offset + offset_in_cluster,
-                          qiov->size, qiov, 0);
+                          bytes, &qiov, 0);
    if (ret < 0) {
-        return ret;
+        goto out;
    }

-    return 0;
+    ret = 0;
+out:
+    qemu_vfree(iov.iov_base);
+    return ret;
 }


@@ -464,7 +487,6 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
    int l1_bits, c;
    unsigned int offset_in_cluster;
    uint64_t bytes_available, bytes_needed, nb_clusters;
-    QCow2ClusterType type;
    int ret;

    offset_in_cluster = offset_into_cluster(s, offset);
@@ -487,13 +509,13 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,

    l1_index = offset >> l1_bits;
    if (l1_index >= s->l1_size) {
-        type = QCOW2_CLUSTER_UNALLOCATED;
+        ret = QCOW2_CLUSTER_UNALLOCATED;
        goto out;
    }

    l2_offset = s->l1_table[l1_index] & L1E_OFFSET_MASK;
    if (!l2_offset) {
-        type = QCOW2_CLUSTER_UNALLOCATED;
+        ret = QCOW2_CLUSTER_UNALLOCATED;
        goto out;
    }

@@ -513,7 +535,7 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,

    /* find the cluster offset for the given disk offset */

-    l2_index = offset_to_l2_index(s, offset);
+    l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1);
    *cluster_offset = be64_to_cpu(l2_table[l2_index]);

    nb_clusters = size_to_clusters(s, bytes_needed);
@@ -522,37 +544,38 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
     * true */
    assert(nb_clusters <= INT_MAX);

-    type = qcow2_get_cluster_type(*cluster_offset);
-    if (s->qcow_version < 3 && (type == QCOW2_CLUSTER_ZERO_PLAIN ||
-                                type == QCOW2_CLUSTER_ZERO_ALLOC)) {
-        qcow2_signal_corruption(bs, true, -1, -1, "Zero cluster entry found"
-                                " in pre-v3 image (L2 offset: %#" PRIx64
-                                ", L2 index: %#x)", l2_offset, l2_index);
-        ret = -EIO;
-        goto fail;
-    }
-    switch (type) {
+    ret = qcow2_get_cluster_type(*cluster_offset);
+    switch (ret) {
    case QCOW2_CLUSTER_COMPRESSED:
        /* Compressed clusters can only be processed one by one */
        c = 1;
        *cluster_offset &= L2E_COMPRESSED_OFFSET_SIZE_MASK;
        break;
-    case QCOW2_CLUSTER_ZERO_PLAIN:
-    case QCOW2_CLUSTER_UNALLOCATED:
-        /* how many empty clusters ? */
-        c = count_contiguous_clusters_unallocated(nb_clusters,
-                                                  &l2_table[l2_index], type);
+    case QCOW2_CLUSTER_ZERO:
+        if (s->qcow_version < 3) {
+            qcow2_signal_corruption(bs, true, -1, -1, "Zero cluster entry found"
+                                    " in pre-v3 image (L2 offset: %#" PRIx64
+                                    ", L2 index: %#x)", l2_offset, l2_index);
+            ret = -EIO;
+            goto fail;
+        }
+        c = count_contiguous_clusters_by_type(nb_clusters, &l2_table[l2_index],
+                                              QCOW2_CLUSTER_ZERO);
+        *cluster_offset = 0;
+        break;
+    case QCOW2_CLUSTER_UNALLOCATED:
+        /* how many empty clusters ? */
+        c = count_contiguous_clusters_by_type(nb_clusters, &l2_table[l2_index],
+                                              QCOW2_CLUSTER_UNALLOCATED);
        *cluster_offset = 0;
        break;
-    case QCOW2_CLUSTER_ZERO_ALLOC:
    case QCOW2_CLUSTER_NORMAL:
        /* how many allocated clusters ? */
        c = count_contiguous_clusters(nb_clusters, s->cluster_size,
-                                      &l2_table[l2_index], QCOW_OFLAG_ZERO);
+                &l2_table[l2_index], QCOW_OFLAG_ZERO);
        *cluster_offset &= L2E_OFFSET_MASK;
        if (offset_into_cluster(s, *cluster_offset)) {
-            qcow2_signal_corruption(bs, true, -1, -1,
-                                    "Cluster allocation offset %#"
+            qcow2_signal_corruption(bs, true, -1, -1, "Data cluster offset %#"
                                    PRIx64 " unaligned (L2 offset: %#" PRIx64
                                    ", L2 index: %#x)", *cluster_offset,
                                    l2_offset, l2_index);
@@ -579,7 +602,7 @@ out:
    assert(bytes_available - offset_in_cluster <= UINT_MAX);
    *bytes = bytes_available - offset_in_cluster;

-    return type;
+    return ret;

 fail:
    qcow2_cache_put(bs, s->l2_table_cache, (void **)&l2_table);
@@ -650,7 +673,7 @@ static int get_cluster_table(BlockDriverState *bs, uint64_t offset,

    /* find the cluster offset for the given disk offset */

-    l2_index = offset_to_l2_index(s, offset);
+    l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1);

    *new_l2_table = l2_table;
    *new_l2_index = l2_index;
@@ -718,134 +741,31 @@ uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
    return cluster_offset;
 }

-static int perform_cow(BlockDriverState *bs, QCowL2Meta *m)
+static int perform_cow(BlockDriverState *bs, QCowL2Meta *m, Qcow2COWRegion *r)
 {
    BDRVQcow2State *s = bs->opaque;
-    Qcow2COWRegion *start = &m->cow_start;
-    Qcow2COWRegion *end = &m->cow_end;
-    unsigned buffer_size;
-    unsigned data_bytes = end->offset - (start->offset + start->nb_bytes);
-    bool merge_reads;
-    uint8_t *start_buffer, *end_buffer;
-    QEMUIOVector qiov;
    int ret;

-    assert(start->nb_bytes <= UINT_MAX - end->nb_bytes);
-    assert(start->nb_bytes + end->nb_bytes <= UINT_MAX - data_bytes);
-    assert(start->offset + start->nb_bytes <= end->offset);
-    assert(!m->data_qiov || m->data_qiov->size == data_bytes);
-
-    if (start->nb_bytes == 0 && end->nb_bytes == 0) {
+    if (r->nb_bytes == 0) {
        return 0;
    }

-    /* If we have to read both the start and end COW regions and the
-     * middle region is not too large then perform just one read
-     * operation */
-    merge_reads = start->nb_bytes && end->nb_bytes && data_bytes <= 16384;
-    if (merge_reads) {
-        buffer_size = start->nb_bytes + data_bytes + end->nb_bytes;
-    } else {
-        /* If we have to do two reads, add some padding in the middle
-         * if necessary to make sure that the end region is optimally
-         * aligned. */
-        size_t align = bdrv_opt_mem_align(bs);
-        assert(align > 0 && align <= UINT_MAX);
-        assert(QEMU_ALIGN_UP(start->nb_bytes, align) <=
-               UINT_MAX - end->nb_bytes);
-        buffer_size = QEMU_ALIGN_UP(start->nb_bytes, align) + end->nb_bytes;
-    }
-
-    /* Reserve a buffer large enough to store all the data that we're
-     * going to read */
-    start_buffer = qemu_try_blockalign(bs, buffer_size);
-    if (start_buffer == NULL) {
-        return -ENOMEM;
-    }
-    /* The part of the buffer where the end region is located */
-    end_buffer = start_buffer + buffer_size - end->nb_bytes;
-
-    qemu_iovec_init(&qiov, 2 + (m->data_qiov ? m->data_qiov->niov : 0));
-
    qemu_co_mutex_unlock(&s->lock);
-    /* First we read the existing data from both COW regions. We
-     * either read the whole region in one go, or the start and end
-     * regions separately. */
-    if (merge_reads) {
-        qemu_iovec_add(&qiov, start_buffer, buffer_size);
-        ret = do_perform_cow_read(bs, m->offset, start->offset, &qiov);
-    } else {
-        qemu_iovec_add(&qiov, start_buffer, start->nb_bytes);
-        ret = do_perform_cow_read(bs, m->offset, start->offset, &qiov);
-        if (ret < 0) {
-            goto fail;
-        }
-
-        qemu_iovec_reset(&qiov);
-        qemu_iovec_add(&qiov, end_buffer, end->nb_bytes);
-        ret = do_perform_cow_read(bs, m->offset, end->offset, &qiov);
-    }
-    if (ret < 0) {
-        goto fail;
-    }
-
-    /* Encrypt the data if necessary before writing it */
-    if (bs->encrypted) {
-        if (!do_perform_cow_encrypt(bs, m->offset, m->alloc_offset,
-                                    start->offset, start_buffer,
-                                    start->nb_bytes) ||
-            !do_perform_cow_encrypt(bs, m->offset, m->alloc_offset,
-                                    end->offset, end_buffer, end->nb_bytes)) {
-            ret = -EIO;
-            goto fail;
-        }
-    }
-
-    /* And now we can write everything. If we have the guest data we
-     * can write everything in one single operation */
-    if (m->data_qiov) {
-        qemu_iovec_reset(&qiov);
-        if (start->nb_bytes) {
-            qemu_iovec_add(&qiov, start_buffer, start->nb_bytes);
-        }
-        qemu_iovec_concat(&qiov, m->data_qiov, 0, data_bytes);
-        if (end->nb_bytes) {
-            qemu_iovec_add(&qiov, end_buffer, end->nb_bytes);
-        }
-        /* NOTE: we have a write_aio blkdebug event here followed by
-         * a cow_write one in do_perform_cow_write(), but there's only
-         * one single I/O operation */
-        BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
-        ret = do_perform_cow_write(bs, m->alloc_offset, start->offset, &qiov);
-    } else {
-        /* If there's no guest data then write both COW regions separately */
-        qemu_iovec_reset(&qiov);
-        qemu_iovec_add(&qiov, start_buffer, start->nb_bytes);
-        ret = do_perform_cow_write(bs, m->alloc_offset, start->offset, &qiov);
-        if (ret < 0) {
-            goto fail;
-        }
-
-        qemu_iovec_reset(&qiov);
-        qemu_iovec_add(&qiov, end_buffer, end->nb_bytes);
-        ret = do_perform_cow_write(bs, m->alloc_offset, end->offset, &qiov);
-    }
-
-fail:
+    ret = do_perform_cow(bs, m->offset, m->alloc_offset, r->offset, r->nb_bytes);
    qemu_co_mutex_lock(&s->lock);

+    if (ret < 0) {
+        return ret;
+    }
+
    /*
     * Before we update the L2 table to actually point to the new cluster, we
     * need to be sure that the refcounts have been increased and COW was
     * handled.
     */
-    if (ret == 0) {
-        qcow2_cache_depends_on_flush(s->l2_table_cache);
-    }
+    qcow2_cache_depends_on_flush(s->l2_table_cache);

-    qemu_vfree(start_buffer);
-    qemu_iovec_destroy(&qiov);
-    return ret;
+    return 0;
 }

 int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
@@ -865,7 +785,12 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
    }

    /* copy content of unmodified sectors */
-    ret = perform_cow(bs, m);
+    ret = perform_cow(bs, m, &m->cow_start);
+    if (ret < 0) {
+        goto err;
+    }
+
+    ret = perform_cow(bs, m, &m->cow_end);
    if (ret < 0) {
        goto err;
    }
@@ -910,7 +835,7 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
     * Don't discard clusters that reach a refcount of 0 (e.g. compressed
     * clusters), the next write will reuse them anyway.
     */
-    if (!m->keep_old_clusters && j != 0) {
+    if (j != 0) {
        for (i = 0; i < j; i++) {
            qcow2_free_any_clusters(bs, be64_to_cpu(old_cluster[i]), 1,
                                    QCOW2_DISCARD_NEVER);
@@ -935,7 +860,7 @@ static int count_cow_clusters(BDRVQcow2State *s, int nb_clusters,

    for (i = 0; i < nb_clusters; i++) {
        uint64_t l2_entry = be64_to_cpu(l2_table[l2_index + i]);
-        QCow2ClusterType cluster_type = qcow2_get_cluster_type(l2_entry);
+        int cluster_type = qcow2_get_cluster_type(l2_entry);

        switch(cluster_type) {
        case QCOW2_CLUSTER_NORMAL:
@@ -945,8 +870,7 @@ static int count_cow_clusters(BDRVQcow2State *s, int nb_clusters,
            break;
        case QCOW2_CLUSTER_UNALLOCATED:
        case QCOW2_CLUSTER_COMPRESSED:
-        case QCOW2_CLUSTER_ZERO_PLAIN:
-        case QCOW2_CLUSTER_ZERO_ALLOC:
+        case QCOW2_CLUSTER_ZERO:
            break;
        default:
            abort();
@@ -1008,7 +932,9 @@ static int handle_dependencies(BlockDriverState *bs, uint64_t guest_offset,
            if (bytes == 0) {
                /* Wait for the dependency to complete. We need to recheck
                 * the free/allocated clusters when we continue. */
-                qemu_co_queue_wait(&old_alloc->dependent_requests, &s->lock);
+                qemu_co_mutex_unlock(&s->lock);
+                qemu_co_queue_wait(&old_alloc->dependent_requests);
+                qemu_co_mutex_lock(&s->lock);
                return -EAGAIN;
            }
        }
@@ -1208,9 +1134,8 @@ static int handle_alloc(BlockDriverState *bs, uint64_t guest_offset,
    uint64_t entry;
    uint64_t nb_clusters;
    int ret;
-    bool keep_old_clusters = false;

-    uint64_t alloc_cluster_offset = 0;
+    uint64_t alloc_cluster_offset;

    trace_qcow2_handle_alloc(qemu_coroutine_self(), guest_offset, *host_offset,
                             *bytes);
@@ -1247,54 +1172,31 @@ static int handle_alloc(BlockDriverState *bs, uint64_t guest_offset,
     * wrong with our code. */
    assert(nb_clusters > 0);

-    if (qcow2_get_cluster_type(entry) == QCOW2_CLUSTER_ZERO_ALLOC &&
-        (entry & QCOW_OFLAG_COPIED) &&
-        (!*host_offset ||
-         start_of_cluster(s, *host_offset) == (entry & L2E_OFFSET_MASK)))
-    {
-        /* Try to reuse preallocated zero clusters; contiguous normal clusters
-         * would be fine, too, but count_cow_clusters() above has limited
-         * nb_clusters already to a range of COW clusters */
-        int preallocated_nb_clusters =
-            count_contiguous_clusters(nb_clusters, s->cluster_size,
-                                      &l2_table[l2_index], QCOW_OFLAG_COPIED);
-        assert(preallocated_nb_clusters > 0);
-
-        nb_clusters = preallocated_nb_clusters;
-        alloc_cluster_offset = entry & L2E_OFFSET_MASK;
-
-        /* We want to reuse these clusters, so qcow2_alloc_cluster_link_l2()
-         * should not free them. */
-        keep_old_clusters = true;
-    }
-
    qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table);

+    /* Allocate, if necessary at a given offset in the image file */
+    alloc_cluster_offset = start_of_cluster(s, *host_offset);
+    ret = do_alloc_cluster_offset(bs, guest_offset, &alloc_cluster_offset,
+                                  &nb_clusters);
+    if (ret < 0) {
+        goto fail;
+    }
+
+    /* Can't extend contiguous allocation */
+    if (nb_clusters == 0) {
+        *bytes = 0;
+        return 0;
+    }
+
+    /* !*host_offset would overwrite the image header and is reserved for "no
+     * host offset preferred". If 0 was a valid host offset, it'd trigger the
+     * following overlap check; do that now to avoid having an invalid value in
+     * *host_offset. */
    if (!alloc_cluster_offset) {
-        /* Allocate, if necessary at a given offset in the image file */
-        alloc_cluster_offset = start_of_cluster(s, *host_offset);
-        ret = do_alloc_cluster_offset(bs, guest_offset, &alloc_cluster_offset,
-                                      &nb_clusters);
-        if (ret < 0) {
-            goto fail;
-        }
-
-        /* Can't extend contiguous allocation */
-        if (nb_clusters == 0) {
-            *bytes = 0;
-            return 0;
-        }
-
-        /* !*host_offset would overwrite the image header and is reserved for
-         * "no host offset preferred". If 0 was a valid host offset, it'd
-         * trigger the following overlap check; do that now to avoid having an
-         * invalid value in *host_offset. */
-        if (!alloc_cluster_offset) {
-            ret = qcow2_pre_write_overlap_check(bs, 0, alloc_cluster_offset,
-                                                nb_clusters * s->cluster_size);
-            assert(ret < 0);
-            goto fail;
-        }
+        ret = qcow2_pre_write_overlap_check(bs, 0, alloc_cluster_offset,
+                                            nb_clusters * s->cluster_size);
+        assert(ret < 0);
+        goto fail;
    }

    /*
@@ -1325,8 +1227,6 @@ static int handle_alloc(BlockDriverState *bs, uint64_t guest_offset,
        .offset         = start_of_cluster(s, guest_offset),
        .nb_clusters    = nb_clusters,

-        .keep_old_clusters  = keep_old_clusters,
-
        .cow_start = {
            .offset     = 0,
            .nb_bytes   = offset_into_cluster(s, guest_offset),
@@ -1516,23 +1416,6 @@ int qcow2_decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset)
        nb_csectors = ((cluster_offset >> s->csize_shift) & s->csize_mask) + 1;
        sector_offset = coffset & 511;
        csize = nb_csectors * 512 - sector_offset;
-
-        /* Allocate buffers on first decompress operation, most images are
-         * uncompressed and the memory overhead can be avoided.  The buffers
-         * are freed in .bdrv_close().
-         */
-        if (!s->cluster_data) {
-            /* one more sector for decompressed data alignment */
-            s->cluster_data = qemu_try_blockalign(bs->file->bs,
-                    QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size + 512);
-            if (!s->cluster_data) {
-                return -ENOMEM;
-            }
-        }
-        if (!s->cluster_cache) {
-            s->cluster_cache = g_malloc(s->cluster_size);
-        }
-
        BLKDBG_EVENT(bs->file, BLKDBG_READ_COMPRESSED);
        ret = bdrv_read(bs->file, coffset >> 9, s->cluster_data,
                        nb_csectors);
@@ -1591,25 +1474,24 @@ static int discard_single_l2(BlockDriverState *bs, uint64_t offset,
         * but rather fall through to the backing file.
         */
        switch (qcow2_get_cluster_type(old_l2_entry)) {
-        case QCOW2_CLUSTER_UNALLOCATED:
-            if (full_discard || !bs->backing) {
-                continue;
-            }
-            break;
+            case QCOW2_CLUSTER_UNALLOCATED:
+                if (full_discard || !bs->backing) {
+                    continue;
+                }
+                break;

-        case QCOW2_CLUSTER_ZERO_PLAIN:
-            if (!full_discard) {
-                continue;
-            }
-            break;
+            case QCOW2_CLUSTER_ZERO:
+                if (!full_discard) {
+                    continue;
+                }
+                break;

-        case QCOW2_CLUSTER_ZERO_ALLOC:
-        case QCOW2_CLUSTER_NORMAL:
-        case QCOW2_CLUSTER_COMPRESSED:
-            break;
+            case QCOW2_CLUSTER_NORMAL:
+            case QCOW2_CLUSTER_COMPRESSED:
+                break;

-        default:
-            abort();
+            default:
+                abort();
        }

        /* First remove L2 entries */
@@ -1629,36 +1511,37 @@ static int discard_single_l2(BlockDriverState *bs, uint64_t offset,
    return nb_clusters;
 }

-int qcow2_cluster_discard(BlockDriverState *bs, uint64_t offset,
-                          uint64_t bytes, enum qcow2_discard_type type,
-                          bool full_discard)
+int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset,
+    int nb_sectors, enum qcow2_discard_type type, bool full_discard)
 {
    BDRVQcow2State *s = bs->opaque;
-    uint64_t end_offset = offset + bytes;
+    uint64_t end_offset;
    uint64_t nb_clusters;
-    int64_t cleared;
    int ret;

-    /* Caller must pass aligned values, except at image end */
-    assert(QEMU_IS_ALIGNED(offset, s->cluster_size));
-    assert(QEMU_IS_ALIGNED(end_offset, s->cluster_size) ||
-           end_offset == bs->total_sectors << BDRV_SECTOR_BITS);
+    end_offset = offset + (nb_sectors << BDRV_SECTOR_BITS);

-    nb_clusters = size_to_clusters(s, bytes);
+    /* Round start up and end down */
+    offset = align_offset(offset, s->cluster_size);
+    end_offset = start_of_cluster(s, end_offset);
+
+    if (offset > end_offset) {
+        return 0;
+    }
+
+    nb_clusters = size_to_clusters(s, end_offset - offset);

    s->cache_discards = true;

    /* Each L2 table is handled by its own loop iteration */
    while (nb_clusters > 0) {
-        cleared = discard_single_l2(bs, offset, nb_clusters, type,
-                                    full_discard);
-        if (cleared < 0) {
-            ret = cleared;
+        ret = discard_single_l2(bs, offset, nb_clusters, type, full_discard);
+        if (ret < 0) {
            goto fail;
        }

-        nb_clusters -= cleared;
-        offset += (cleared * s->cluster_size);
+        nb_clusters -= ret;
+        offset += (ret * s->cluster_size);
    }

    ret = 0;
@@ -1682,7 +1565,6 @@ static int zero_single_l2(BlockDriverState *bs, uint64_t offset,
    int l2_index;
    int ret;
    int i;
-    bool unmap = !!(flags & BDRV_REQ_MAY_UNMAP);

    ret = get_cluster_table(bs, offset, &l2_table, &l2_index);
    if (ret < 0) {
@@ -1695,22 +1577,12 @@ static int zero_single_l2(BlockDriverState *bs, uint64_t offset,

    for (i = 0; i < nb_clusters; i++) {
        uint64_t old_offset;
-        QCow2ClusterType cluster_type;

        old_offset = be64_to_cpu(l2_table[l2_index + i]);

-        /*
-         * Minimize L2 changes if the cluster already reads back as
-         * zeroes with correct allocation.
-         */
-        cluster_type = qcow2_get_cluster_type(old_offset);
-        if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN ||
-            (cluster_type == QCOW2_CLUSTER_ZERO_ALLOC && !unmap)) {
-            continue;
-        }
-
+        /* Update L2 entries */
        qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table);
-        if (cluster_type == QCOW2_CLUSTER_COMPRESSED || unmap) {
+        if (old_offset & QCOW_OFLAG_COMPRESSED || flags & BDRV_REQ_MAY_UNMAP) {
            l2_table[l2_index + i] = cpu_to_be64(QCOW_OFLAG_ZERO);
            qcow2_free_any_clusters(bs, old_offset, 1, QCOW2_DISCARD_REQUEST);
        } else {
@@ -1723,39 +1595,31 @@ static int zero_single_l2(BlockDriverState *bs, uint64_t offset,
    return nb_clusters;
 }

-int qcow2_cluster_zeroize(BlockDriverState *bs, uint64_t offset,
-                          uint64_t bytes, int flags)
+int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors,
+                        int flags)
 {
    BDRVQcow2State *s = bs->opaque;
-    uint64_t end_offset = offset + bytes;
    uint64_t nb_clusters;
-    int64_t cleared;
    int ret;

-    /* Caller must pass aligned values, except at image end */
-    assert(QEMU_IS_ALIGNED(offset, s->cluster_size));
-    assert(QEMU_IS_ALIGNED(end_offset, s->cluster_size) ||
-           end_offset == bs->total_sectors << BDRV_SECTOR_BITS);
-
    /* The zero flag is only supported by version 3 and newer */
    if (s->qcow_version < 3) {
        return -ENOTSUP;
    }

    /* Each L2 table is handled by its own loop iteration */
-    nb_clusters = size_to_clusters(s, bytes);
+    nb_clusters = size_to_clusters(s, nb_sectors << BDRV_SECTOR_BITS);

    s->cache_discards = true;

    while (nb_clusters > 0) {
-        cleared = zero_single_l2(bs, offset, nb_clusters, flags);
-        if (cleared < 0) {
-            ret = cleared;
+        ret = zero_single_l2(bs, offset, nb_clusters, flags);
+        if (ret < 0) {
            goto fail;
        }

-        nb_clusters -= cleared;
-        offset += (cleared * s->cluster_size);
+        nb_clusters -= ret;
+        offset += (ret * s->cluster_size);
    }

    ret = 0;
@@ -1839,14 +1703,14 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
        for (j = 0; j < s->l2_size; j++) {
            uint64_t l2_entry = be64_to_cpu(l2_table[j]);
            int64_t offset = l2_entry & L2E_OFFSET_MASK;
-            QCow2ClusterType cluster_type = qcow2_get_cluster_type(l2_entry);
+            int cluster_type = qcow2_get_cluster_type(l2_entry);
+            bool preallocated = offset != 0;

-            if (cluster_type != QCOW2_CLUSTER_ZERO_PLAIN &&
-                cluster_type != QCOW2_CLUSTER_ZERO_ALLOC) {
+            if (cluster_type != QCOW2_CLUSTER_ZERO) {
                continue;
            }

-            if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN) {
+            if (!preallocated) {
                if (!bs->backing) {
                    /* not backed; therefore we can simply deallocate the
                     * cluster */
@@ -1877,12 +1741,11 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
            }

            if (offset_into_cluster(s, offset)) {
-                qcow2_signal_corruption(bs, true, -1, -1,
-                                        "Cluster allocation offset "
+                qcow2_signal_corruption(bs, true, -1, -1, "Data cluster offset "
                                        "%#" PRIx64 " unaligned (L2 offset: %#"
                                        PRIx64 ", L2 index: %#x)", offset,
                                        l2_offset, j);
-                if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN) {
+                if (!preallocated) {
                    qcow2_free_clusters(bs, offset, s->cluster_size,
                                        QCOW2_DISCARD_ALWAYS);
                }
@@ -1892,7 +1755,7 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,

            ret = qcow2_pre_write_overlap_check(bs, 0, offset, s->cluster_size);
            if (ret < 0) {
-                if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN) {
+                if (!preallocated) {
                    qcow2_free_clusters(bs, offset, s->cluster_size,
                                        QCOW2_DISCARD_ALWAYS);
                }
@@ -1901,7 +1764,7 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,

            ret = bdrv_pwrite_zeroes(bs->file, offset, s->cluster_size, 0);
            if (ret < 0) {
-                if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN) {
+                if (!preallocated) {
                    qcow2_free_clusters(bs, offset, s->cluster_size,
                                        QCOW2_DISCARD_ALWAYS);
                }
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -83,16 +83,6 @@ static Qcow2SetRefcountFunc *const set_refcount_funcs[] = {
 /*********************************************************/
 /* refcount handling */

-static void update_max_refcount_table_index(BDRVQcow2State *s)
-{
-    unsigned i = s->refcount_table_size - 1;
-    while (i > 0 && (s->refcount_table[i] & REFT_OFFSET_MASK) == 0) {
-        i--;
-    }
-    /* Set s->max_refcount_table_index to the index of the last used entry */
-    s->max_refcount_table_index = i;
-}
-
 int qcow2_refcount_init(BlockDriverState *bs)
 {
    BDRVQcow2State *s = bs->opaque;
@@ -121,7 +111,6 @@ int qcow2_refcount_init(BlockDriverState *bs)
        }
        for(i = 0; i < s->refcount_table_size; i++)
            be64_to_cpus(&s->refcount_table[i]);
-        update_max_refcount_table_index(s);
    }
    return 0;
 fail:
@@ -281,6 +270,25 @@ int qcow2_get_refcount(BlockDriverState *bs, int64_t cluster_index,
    return 0;
 }

+/*
+ * Rounds the refcount table size up to avoid growing the table for each single
+ * refcount block that is allocated.
+ */
+static unsigned int next_refcount_table_size(BDRVQcow2State *s,
+    unsigned int min_size)
+{
+    unsigned int min_clusters = (min_size >> (s->cluster_bits - 3)) + 1;
+    unsigned int refcount_table_clusters =
+        MAX(1, s->refcount_table_size >> (s->cluster_bits - 3));
+
+    while (min_clusters > refcount_table_clusters) {
+        refcount_table_clusters = (refcount_table_clusters * 3 + 1) / 2;
+    }
+
+    return refcount_table_clusters << (s->cluster_bits - 3);
+}
+
+
 /* Checks if two offsets are described by the same refcount block */
 static int in_same_refcount_block(BDRVQcow2State *s, uint64_t offset_a,
    uint64_t offset_b)
@@ -302,7 +310,7 @@ static int alloc_refcount_block(BlockDriverState *bs,
 {
    BDRVQcow2State *s = bs->opaque;
    unsigned int refcount_table_index;
-    int64_t ret;
+    int ret;

    BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC);

@@ -377,7 +385,7 @@ static int alloc_refcount_block(BlockDriverState *bs,
        ret = qcow2_cache_get_empty(bs, s->refcount_block_cache, new_block,
                                    refcount_block);
        if (ret < 0) {
-            goto fail;
+            goto fail_block;
        }

        memset(*refcount_block, 0, s->cluster_size);
@@ -392,12 +400,12 @@ static int alloc_refcount_block(BlockDriverState *bs,
        ret = update_refcount(bs, new_block, s->cluster_size, 1, false,
                              QCOW2_DISCARD_NEVER);
        if (ret < 0) {
-            goto fail;
+            goto fail_block;
        }

        ret = qcow2_cache_flush(bs, s->refcount_block_cache);
        if (ret < 0) {
-            goto fail;
+            goto fail_block;
        }

        /* Initialize the new refcount block only after updating its refcount,
@@ -405,7 +413,7 @@ static int alloc_refcount_block(BlockDriverState *bs,
        ret = qcow2_cache_get_empty(bs, s->refcount_block_cache, new_block,
                                    refcount_block);
        if (ret < 0) {
-            goto fail;
+            goto fail_block;
        }

        memset(*refcount_block, 0, s->cluster_size);
@@ -416,7 +424,7 @@ static int alloc_refcount_block(BlockDriverState *bs,
    qcow2_cache_entry_mark_dirty(bs, s->refcount_block_cache, *refcount_block);
    ret = qcow2_cache_flush(bs, s->refcount_block_cache);
    if (ret < 0) {
-        goto fail;
+        goto fail_block;
    }

    /* If the refcount table is big enough, just hook the block up there */
@@ -427,14 +435,10 @@ static int alloc_refcount_block(BlockDriverState *bs,
            s->refcount_table_offset + refcount_table_index * sizeof(uint64_t),
            &data64, sizeof(data64));
        if (ret < 0) {
-            goto fail;
+            goto fail_block;
        }

        s->refcount_table[refcount_table_index] = new_block;
-        /* If there's a hole in s->refcount_table then it can happen
-         * that refcount_table_index < s->max_refcount_table_index */
-        s->max_refcount_table_index =
-            MAX(s->max_refcount_table_index, refcount_table_index);

        /* The new refcount block may be where the caller intended to put its
         * data, so let it restart the search. */
@@ -471,201 +475,74 @@ static int alloc_refcount_block(BlockDriverState *bs,
                                            (new_block >> s->cluster_bits) + 1),
                                        s->refcount_block_size);

+    if (blocks_used > QCOW_MAX_REFTABLE_SIZE / sizeof(uint64_t)) {
+        return -EFBIG;
+    }
+
+    /* And now we need at least one block more for the new metadata */
+    uint64_t table_size = next_refcount_table_size(s, blocks_used + 1);
+    uint64_t last_table_size;
+    uint64_t blocks_clusters;
+    do {
+        uint64_t table_clusters =
+            size_to_clusters(s, table_size * sizeof(uint64_t));
+        blocks_clusters = 1 +
+            DIV_ROUND_UP(table_clusters, s->refcount_block_size);
+        uint64_t meta_clusters = table_clusters + blocks_clusters;
+
+        last_table_size = table_size;
+        table_size = next_refcount_table_size(s, blocks_used +
+            DIV_ROUND_UP(meta_clusters, s->refcount_block_size));
+
+    } while (last_table_size != table_size);
+
+#ifdef DEBUG_ALLOC2
+    fprintf(stderr, "qcow2: Grow refcount table %" PRId32 " => %" PRId64 "\n",
+        s->refcount_table_size, table_size);
+#endif
+
    /* Create the new refcount table and blocks */
    uint64_t meta_offset = (blocks_used * s->refcount_block_size) *
        s->cluster_size;
+    uint64_t table_offset = meta_offset + blocks_clusters * s->cluster_size;
+    uint64_t *new_table = g_try_new0(uint64_t, table_size);
+    void *new_blocks = g_try_malloc0(blocks_clusters * s->cluster_size);

-    ret = qcow2_refcount_area(bs, meta_offset, 0, false,
-                              refcount_table_index, new_block);
-    if (ret < 0) {
-        return ret;
-    }
-
-    ret = load_refcount_block(bs, new_block, refcount_block);
-    if (ret < 0) {
-        return ret;
-    }
-
-    /* If we were trying to do the initial refcount update for some cluster
-     * allocation, we might have used the same clusters to store newly
-     * allocated metadata. Make the caller search some new space. */
-    return -EAGAIN;
-
-fail:
-    if (*refcount_block != NULL) {
-        qcow2_cache_put(bs, s->refcount_block_cache, refcount_block);
-    }
-    return ret;
-}
-
-/*
- * Starting at @start_offset, this function creates new self-covering refcount
- * structures: A new refcount table and refcount blocks which cover all of
- * themselves, and a number of @additional_clusters beyond their end.
- * @start_offset must be at the end of the image file, that is, there must be
- * only empty space beyond it.
- * If @exact_size is false, the refcount table will have 50 % more entries than
- * necessary so it will not need to grow again soon.
- * If @new_refblock_offset is not zero, it contains the offset of a refcount
- * block that should be entered into the new refcount table at index
- * @new_refblock_index.
- *
- * Returns: The offset after the new refcount structures (i.e. where the
- *          @additional_clusters may be placed) on success, -errno on error.
- */
-int64_t qcow2_refcount_area(BlockDriverState *bs, uint64_t start_offset,
-                            uint64_t additional_clusters, bool exact_size,
-                            int new_refblock_index,
-                            uint64_t new_refblock_offset)
-{
-    BDRVQcow2State *s = bs->opaque;
-    uint64_t total_refblock_count_u64, additional_refblock_count;
-    int total_refblock_count, table_size, area_reftable_index, table_clusters;
-    int i;
-    uint64_t table_offset, block_offset, end_offset;
-    int ret;
-    uint64_t *new_table;
-
-    assert(!(start_offset % s->cluster_size));
-
-    qcow2_refcount_metadata_size(start_offset / s->cluster_size +
-                                 additional_clusters,
-                                 s->cluster_size, s->refcount_order,
-                                 !exact_size, &total_refblock_count_u64);
-    if (total_refblock_count_u64 > QCOW_MAX_REFTABLE_SIZE) {
-        return -EFBIG;
-    }
-    total_refblock_count = total_refblock_count_u64;
-
-    /* Index in the refcount table of the first refcount block to cover the area
-     * of refcount structures we are about to create; we know that
-     * @total_refblock_count can cover @start_offset, so this will definitely
-     * fit into an int. */
-    area_reftable_index = (start_offset / s->cluster_size) /
-                          s->refcount_block_size;
-
-    if (exact_size) {
-        table_size = total_refblock_count;
-    } else {
-        table_size = total_refblock_count +
-                     DIV_ROUND_UP(total_refblock_count, 2);
-    }
-    /* The qcow2 file can only store the reftable size in number of clusters */
-    table_size = ROUND_UP(table_size, s->cluster_size / sizeof(uint64_t));
-    table_clusters = (table_size * sizeof(uint64_t)) / s->cluster_size;
-
-    if (table_size > QCOW_MAX_REFTABLE_SIZE) {
-        return -EFBIG;
-    }
-
-    new_table = g_try_new0(uint64_t, table_size);
-
-    assert(table_size > 0);
-    if (new_table == NULL) {
+    assert(table_size > 0 && blocks_clusters > 0);
+    if (new_table == NULL || new_blocks == NULL) {
        ret = -ENOMEM;
-        goto fail;
+        goto fail_table;
    }

    /* Fill the new refcount table */
-    if (table_size > s->max_refcount_table_index) {
-        /* We're actually growing the reftable */
-        memcpy(new_table, s->refcount_table,
-               (s->max_refcount_table_index + 1) * sizeof(uint64_t));
-    } else {
-        /* Improbable case: We're shrinking the reftable. However, the caller
-         * has assured us that there is only empty space beyond @start_offset,
-         * so we can simply drop all of the refblocks that won't fit into the
-         * new reftable. */
-        memcpy(new_table, s->refcount_table, table_size * sizeof(uint64_t));
+    memcpy(new_table, s->refcount_table,
+        s->refcount_table_size * sizeof(uint64_t));
+    new_table[refcount_table_index] = new_block;
+
+    int i;
+    for (i = 0; i < blocks_clusters; i++) {
+        new_table[blocks_used + i] = meta_offset + (i * s->cluster_size);
    }

-    if (new_refblock_offset) {
-        assert(new_refblock_index < total_refblock_count);
-        new_table[new_refblock_index] = new_refblock_offset;
+    /* Fill the refcount blocks */
+    uint64_t table_clusters = size_to_clusters(s, table_size * sizeof(uint64_t));
+    int block = 0;
+    for (i = 0; i < table_clusters + blocks_clusters; i++) {
+        s->set_refcount(new_blocks, block++, 1);
    }

-    /* Count how many new refblocks we have to create */
-    additional_refblock_count = 0;
-    for (i = area_reftable_index; i < total_refblock_count; i++) {
-        if (!new_table[i]) {
-            additional_refblock_count++;
-        }
-    }
-
-    table_offset = start_offset + additional_refblock_count * s->cluster_size;
-    end_offset = table_offset + table_clusters * s->cluster_size;
-
-    /* Fill the refcount blocks, and create new ones, if necessary */
-    block_offset = start_offset;
-    for (i = area_reftable_index; i < total_refblock_count; i++) {
-        void *refblock_data;
-        uint64_t first_offset_covered;
-
-        /* Reuse an existing refblock if possible, create a new one otherwise */
-        if (new_table[i]) {
-            ret = qcow2_cache_get(bs, s->refcount_block_cache, new_table[i],
-                                  &refblock_data);
-            if (ret < 0) {
-                goto fail;
-            }
-        } else {
-            ret = qcow2_cache_get_empty(bs, s->refcount_block_cache,
-                                        block_offset, &refblock_data);
-            if (ret < 0) {
-                goto fail;
-            }
-            memset(refblock_data, 0, s->cluster_size);
-            qcow2_cache_entry_mark_dirty(bs, s->refcount_block_cache,
-                                         refblock_data);
-
-            new_table[i] = block_offset;
-            block_offset += s->cluster_size;
-        }
-
-        /* First host offset covered by this refblock */
-        first_offset_covered = (uint64_t)i * s->refcount_block_size *
-                               s->cluster_size;
-        if (first_offset_covered < end_offset) {
-            int j, end_index;
-
-            /* Set the refcount of all of the new refcount structures to 1 */
-
-            if (first_offset_covered < start_offset) {
-                assert(i == area_reftable_index);
-                j = (start_offset - first_offset_covered) / s->cluster_size;
-                assert(j < s->refcount_block_size);
-            } else {
-                j = 0;
-            }
-
-            end_index = MIN((end_offset - first_offset_covered) /
-                            s->cluster_size,
-                            s->refcount_block_size);
-
-            for (; j < end_index; j++) {
-                /* The caller guaranteed us this space would be empty */
-                assert(s->get_refcount(refblock_data, j) == 0);
-                s->set_refcount(refblock_data, j, 1);
-            }
-
-            qcow2_cache_entry_mark_dirty(bs, s->refcount_block_cache,
-                                         refblock_data);
-        }
-
-        qcow2_cache_put(bs, s->refcount_block_cache, &refblock_data);
-    }
-
-    assert(block_offset == table_offset);
-
    /* Write refcount blocks to disk */
    BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_WRITE_BLOCKS);
-    ret = qcow2_cache_flush(bs, s->refcount_block_cache);
+    ret = bdrv_pwrite_sync(bs->file, meta_offset, new_blocks,
+        blocks_clusters * s->cluster_size);
+    g_free(new_blocks);
+    new_blocks = NULL;
    if (ret < 0) {
-        goto fail;
+        goto fail_table;
    }

    /* Write refcount table to disk */
-    for (i = 0; i < total_refblock_count; i++) {
+    for(i = 0; i < table_size; i++) {
        cpu_to_be64s(&new_table[i]);
    }

@@ -673,10 +550,10 @@ int64_t qcow2_refcount_area(BlockDriverState *bs, uint64_t start_offset,
    ret = bdrv_pwrite_sync(bs->file, table_offset, new_table,
        table_size * sizeof(uint64_t));
    if (ret < 0) {
-        goto fail;
+        goto fail_table;
    }

-    for (i = 0; i < total_refblock_count; i++) {
+    for(i = 0; i < table_size; i++) {
        be64_to_cpus(&new_table[i]);
    }

@@ -692,7 +569,7 @@ int64_t qcow2_refcount_area(BlockDriverState *bs, uint64_t start_offset,
                           offsetof(QCowHeader, refcount_table_offset),
                           &data, sizeof(data));
    if (ret < 0) {
-        goto fail;
+        goto fail_table;
    }

    /* And switch it in memory */
@@ -703,16 +580,28 @@ int64_t qcow2_refcount_area(BlockDriverState *bs, uint64_t start_offset,
    s->refcount_table = new_table;
    s->refcount_table_size = table_size;
    s->refcount_table_offset = table_offset;
-    update_max_refcount_table_index(s);

    /* Free old table. */
    qcow2_free_clusters(bs, old_table_offset, old_table_size * sizeof(uint64_t),
                        QCOW2_DISCARD_OTHER);

-    return end_offset;
+    ret = load_refcount_block(bs, new_block, refcount_block);
+    if (ret < 0) {
+        return ret;
+    }

-fail:
+    /* If we were trying to do the initial refcount update for some cluster
+     * allocation, we might have used the same clusters to store newly
+     * allocated metadata. Make the caller search some new space. */
+    return -EAGAIN;
+
+fail_table:
+    g_free(new_blocks);
    g_free(new_table);
+fail_block:
+    if (*refcount_block != NULL) {
+        qcow2_cache_put(bs, s->refcount_block_cache, refcount_block);
+    }
    return ret;
 }

@@ -1123,17 +1012,18 @@ void qcow2_free_any_clusters(BlockDriverState *bs, uint64_t l2_entry,
        }
        break;
    case QCOW2_CLUSTER_NORMAL:
-    case QCOW2_CLUSTER_ZERO_ALLOC:
-        if (offset_into_cluster(s, l2_entry & L2E_OFFSET_MASK)) {
-            qcow2_signal_corruption(bs, false, -1, -1,
-                                    "Cannot free unaligned cluster %#llx",
-                                    l2_entry & L2E_OFFSET_MASK);
-        } else {
-            qcow2_free_clusters(bs, l2_entry & L2E_OFFSET_MASK,
-                                nb_clusters << s->cluster_bits, type);
+    case QCOW2_CLUSTER_ZERO:
+        if (l2_entry & L2E_OFFSET_MASK) {
+            if (offset_into_cluster(s, l2_entry & L2E_OFFSET_MASK)) {
+                qcow2_signal_corruption(bs, false, -1, -1,
+                                        "Cannot free unaligned cluster %#llx",
+                                        l2_entry & L2E_OFFSET_MASK);
+            } else {
+                qcow2_free_clusters(bs, l2_entry & L2E_OFFSET_MASK,
+                                    nb_clusters << s->cluster_bits, type);
+            }
        }
        break;
-    case QCOW2_CLUSTER_ZERO_PLAIN:
    case QCOW2_CLUSTER_UNALLOCATED:
        break;
    default:
@@ -1153,9 +1043,9 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
    int64_t l1_table_offset, int l1_size, int addend)
 {
    BDRVQcow2State *s = bs->opaque;
-    uint64_t *l1_table, *l2_table, l2_offset, entry, l1_size2, refcount;
+    uint64_t *l1_table, *l2_table, l2_offset, offset, l1_size2, refcount;
    bool l1_allocated = false;
-    int64_t old_entry, old_l2_offset;
+    int64_t old_offset, old_l2_offset;
    int i, j, l1_modified = 0, nb_csectors;
    int ret;

@@ -1183,16 +1073,15 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
            goto fail;
        }

-        for (i = 0; i < l1_size; i++) {
+        for(i = 0;i < l1_size; i++)
            be64_to_cpus(&l1_table[i]);
-        }
    } else {
        assert(l1_size == s->l1_size);
        l1_table = s->l1_table;
        l1_allocated = false;
    }

-    for (i = 0; i < l1_size; i++) {
+    for(i = 0; i < l1_size; i++) {
        l2_offset = l1_table[i];
        if (l2_offset) {
            old_l2_offset = l2_offset;
@@ -1212,79 +1101,81 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
                goto fail;
            }

-            for (j = 0; j < s->l2_size; j++) {
+            for(j = 0; j < s->l2_size; j++) {
                uint64_t cluster_index;
-                uint64_t offset;

-                entry = be64_to_cpu(l2_table[j]);
-                old_entry = entry;
-                entry &= ~QCOW_OFLAG_COPIED;
-                offset = entry & L2E_OFFSET_MASK;
+                offset = be64_to_cpu(l2_table[j]);
+                old_offset = offset;
+                offset &= ~QCOW_OFLAG_COPIED;

-                switch (qcow2_get_cluster_type(entry)) {
-                case QCOW2_CLUSTER_COMPRESSED:
-                    nb_csectors = ((entry >> s->csize_shift) &
-                                   s->csize_mask) + 1;
-                    if (addend != 0) {
-                        ret = update_refcount(bs,
-                                (entry & s->cluster_offset_mask) & ~511,
+                switch (qcow2_get_cluster_type(offset)) {
+                    case QCOW2_CLUSTER_COMPRESSED:
+                        nb_csectors = ((offset >> s->csize_shift) &
+                                       s->csize_mask) + 1;
+                        if (addend != 0) {
+                            ret = update_refcount(bs,
+                                (offset & s->cluster_offset_mask) & ~511,
                                nb_csectors * 512, abs(addend), addend < 0,
                                QCOW2_DISCARD_SNAPSHOT);
-                        if (ret < 0) {
+                            if (ret < 0) {
+                                goto fail;
+                            }
+                        }
+                        /* compressed clusters are never modified */
+                        refcount = 2;
+                        break;
+
+                    case QCOW2_CLUSTER_NORMAL:
+                    case QCOW2_CLUSTER_ZERO:
+                        if (offset_into_cluster(s, offset & L2E_OFFSET_MASK)) {
+                            qcow2_signal_corruption(bs, true, -1, -1, "Data "
+                                                    "cluster offset %#llx "
+                                                    "unaligned (L2 offset: %#"
+                                                    PRIx64 ", L2 index: %#x)",
+                                                    offset & L2E_OFFSET_MASK,
+                                                    l2_offset, j);
+                            ret = -EIO;
                            goto fail;
                        }
-                    }
-                    /* compressed clusters are never modified */
-                    refcount = 2;
-                    break;

-                case QCOW2_CLUSTER_NORMAL:
-                case QCOW2_CLUSTER_ZERO_ALLOC:
-                    if (offset_into_cluster(s, offset)) {
-                        qcow2_signal_corruption(bs, true, -1, -1, "Cluster "
-                                                "allocation offset %#" PRIx64
-                                                " unaligned (L2 offset: %#"
-                                                PRIx64 ", L2 index: %#x)",
-                                                offset, l2_offset, j);
-                        ret = -EIO;
-                        goto fail;
-                    }
-
-                    cluster_index = offset >> s->cluster_bits;
-                    assert(cluster_index);
-                    if (addend != 0) {
-                        ret = qcow2_update_cluster_refcount(bs,
+                        cluster_index = (offset & L2E_OFFSET_MASK) >> s->cluster_bits;
+                        if (!cluster_index) {
+                            /* unallocated */
+                            refcount = 0;
+                            break;
+                        }
+                        if (addend != 0) {
+                            ret = qcow2_update_cluster_refcount(bs,
                                    cluster_index, abs(addend), addend < 0,
                                    QCOW2_DISCARD_SNAPSHOT);
+                            if (ret < 0) {
+                                goto fail;
+                            }
+                        }
+
+                        ret = qcow2_get_refcount(bs, cluster_index, &refcount);
                        if (ret < 0) {
                            goto fail;
                        }
-                    }
+                        break;

-                    ret = qcow2_get_refcount(bs, cluster_index, &refcount);
-                    if (ret < 0) {
-                        goto fail;
-                    }
-                    break;
+                    case QCOW2_CLUSTER_UNALLOCATED:
+                        refcount = 0;
+                        break;

-                case QCOW2_CLUSTER_ZERO_PLAIN:
-                case QCOW2_CLUSTER_UNALLOCATED:
-                    refcount = 0;
-                    break;
-
-                default:
-                    abort();
+                    default:
+                        abort();
                }

                if (refcount == 1) {
-                    entry |= QCOW_OFLAG_COPIED;
+                    offset |= QCOW_OFLAG_COPIED;
                }
-                if (entry != old_entry) {
+                if (offset != old_offset) {
                    if (addend > 0) {
                        qcow2_cache_set_dependency(bs, s->l2_table_cache,
                            s->refcount_block_cache);
                    }
-                    l2_table[j] = cpu_to_be64(entry);
+                    l2_table[j] = cpu_to_be64(offset);
                    qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache,
                                                 l2_table);
                }
@@ -1418,10 +1309,11 @@ static int realloc_refcount_array(BDRVQcow2State *s, void **array,
 *
 * Modifies the number of errors in res.
 */
-int qcow2_inc_refcounts_imrt(BlockDriverState *bs, BdrvCheckResult *res,
-                             void **refcount_table,
-                             int64_t *refcount_table_size,
-                             int64_t offset, int64_t size)
+static int inc_refcounts(BlockDriverState *bs,
+                         BdrvCheckResult *res,
+                         void **refcount_table,
+                         int64_t *refcount_table_size,
+                         int64_t offset, int64_t size)
 {
    BDRVQcow2State *s = bs->opaque;
    uint64_t start, last, cluster_offset, k, refcount;
@@ -1514,9 +1406,8 @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
            nb_csectors = ((l2_entry >> s->csize_shift) &
                           s->csize_mask) + 1;
            l2_entry &= s->cluster_offset_mask;
-            ret = qcow2_inc_refcounts_imrt(bs, res,
-                                           refcount_table, refcount_table_size,
-                                           l2_entry & ~511, nb_csectors * 512);
+            ret = inc_refcounts(bs, res, refcount_table, refcount_table_size,
+                                l2_entry & ~511, nb_csectors * 512);
            if (ret < 0) {
                goto fail;
            }
@@ -1534,7 +1425,12 @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
            }
            break;

-        case QCOW2_CLUSTER_ZERO_ALLOC:
+        case QCOW2_CLUSTER_ZERO:
+            if ((l2_entry & L2E_OFFSET_MASK) == 0) {
+                break;
+            }
+            /* fall through */
+
        case QCOW2_CLUSTER_NORMAL:
        {
            uint64_t offset = l2_entry & L2E_OFFSET_MASK;
@@ -1549,9 +1445,8 @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
            }

            /* Mark cluster as used */
-            ret = qcow2_inc_refcounts_imrt(bs, res,
-                                           refcount_table, refcount_table_size,
-                                           offset, s->cluster_size);
+            ret = inc_refcounts(bs, res, refcount_table, refcount_table_size,
+                                offset, s->cluster_size);
            if (ret < 0) {
                goto fail;
            }
@@ -1565,7 +1460,6 @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
            break;
        }

-        case QCOW2_CLUSTER_ZERO_PLAIN:
        case QCOW2_CLUSTER_UNALLOCATED:
            break;

@@ -1604,8 +1498,8 @@ static int check_refcounts_l1(BlockDriverState *bs,
    l1_size2 = l1_size * sizeof(uint64_t);

    /* Mark L1 table as used */
-    ret = qcow2_inc_refcounts_imrt(bs, res, refcount_table, refcount_table_size,
-                                   l1_table_offset, l1_size2);
+    ret = inc_refcounts(bs, res, refcount_table, refcount_table_size,
+                        l1_table_offset, l1_size2);
    if (ret < 0) {
        goto fail;
    }
@@ -1634,9 +1528,8 @@ static int check_refcounts_l1(BlockDriverState *bs,
        if (l2_offset) {
            /* Mark L2 table as used */
            l2_offset &= L1E_OFFSET_MASK;
-            ret = qcow2_inc_refcounts_imrt(bs, res,
-                                           refcount_table, refcount_table_size,
-                                           l2_offset, s->cluster_size);
+            ret = inc_refcounts(bs, res, refcount_table, refcount_table_size,
+                                l2_offset, s->cluster_size);
            if (ret < 0) {
                goto fail;
            }
@@ -1729,10 +1622,10 @@ static int check_oflag_copied(BlockDriverState *bs, BdrvCheckResult *res,
        for (j = 0; j < s->l2_size; j++) {
            uint64_t l2_entry = be64_to_cpu(l2_table[j]);
            uint64_t data_offset = l2_entry & L2E_OFFSET_MASK;
-            QCow2ClusterType cluster_type = qcow2_get_cluster_type(l2_entry);
+            int cluster_type = qcow2_get_cluster_type(l2_entry);

-            if (cluster_type == QCOW2_CLUSTER_NORMAL ||
-                cluster_type == QCOW2_CLUSTER_ZERO_ALLOC) {
+            if ((cluster_type == QCOW2_CLUSTER_NORMAL) ||
+                ((cluster_type == QCOW2_CLUSTER_ZERO) && (data_offset != 0))) {
                ret = qcow2_get_refcount(bs,
                                         data_offset >> s->cluster_bits,
                                         &refcount);
@@ -1819,17 +1712,14 @@ static int check_refblocks(BlockDriverState *bs, BdrvCheckResult *res,

            if (fix & BDRV_FIX_ERRORS) {
                int64_t new_nb_clusters;
-                Error *local_err = NULL;

                if (offset > INT64_MAX - s->cluster_size) {
                    ret = -EINVAL;
                    goto resize_fail;
                }

-                ret = bdrv_truncate(bs->file, offset + s->cluster_size,
-                                    PREALLOC_MODE_OFF, &local_err);
+                ret = bdrv_truncate(bs->file->bs, offset + s->cluster_size);
                if (ret < 0) {
-                    error_report_err(local_err);
                    goto resize_fail;
                }
                size = bdrv_getlength(bs->file->bs);
@@ -1854,15 +1744,14 @@ static int check_refblocks(BlockDriverState *bs, BdrvCheckResult *res,
                }

                res->corruptions_fixed++;
-                ret = qcow2_inc_refcounts_imrt(bs, res,
-                                               refcount_table, nb_clusters,
-                                               offset, s->cluster_size);
+                ret = inc_refcounts(bs, res, refcount_table, nb_clusters,
+                                    offset, s->cluster_size);
                if (ret < 0) {
                    return ret;
                }
                /* No need to check whether the refcount is now greater than 1:
                 * This area was just allocated and zeroed, so it can only be
-                 * exactly 1 after qcow2_inc_refcounts_imrt() */
+                 * exactly 1 after inc_refcounts() */
                continue;

 resize_fail:
@@ -1877,8 +1766,8 @@ resize_fail:
        }

        if (offset != 0) {
-            ret = qcow2_inc_refcounts_imrt(bs, res, refcount_table, nb_clusters,
-                                           offset, s->cluster_size);
+            ret = inc_refcounts(bs, res, refcount_table, nb_clusters,
+                                offset, s->cluster_size);
            if (ret < 0) {
                return ret;
            }
@@ -1918,8 +1807,8 @@ static int calculate_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
    }

    /* header */
-    ret = qcow2_inc_refcounts_imrt(bs, res, refcount_table, nb_clusters,
-                                   0, s->cluster_size);
+    ret = inc_refcounts(bs, res, refcount_table, nb_clusters,
+                        0, s->cluster_size);
    if (ret < 0) {
        return ret;
    }
@@ -1940,32 +1829,16 @@ static int calculate_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
            return ret;
        }
    }
-    ret = qcow2_inc_refcounts_imrt(bs, res, refcount_table, nb_clusters,
-                                   s->snapshots_offset, s->snapshots_size);
+    ret = inc_refcounts(bs, res, refcount_table, nb_clusters,
+                        s->snapshots_offset, s->snapshots_size);
    if (ret < 0) {
        return ret;
    }

    /* refcount data */
-    ret = qcow2_inc_refcounts_imrt(bs, res, refcount_table, nb_clusters,
-                                   s->refcount_table_offset,
-                                   s->refcount_table_size * sizeof(uint64_t));
-    if (ret < 0) {
-        return ret;
-    }
-
-    /* encryption */
-    if (s->crypto_header.length) {
-        ret = qcow2_inc_refcounts_imrt(bs, res, refcount_table, nb_clusters,
-                                       s->crypto_header.offset,
-                                       s->crypto_header.length);
-        if (ret < 0) {
-            return ret;
-        }
-    }
-
-    /* bitmaps */
-    ret = qcow2_check_bitmaps_refcounts(bs, res, refcount_table, nb_clusters);
+    ret = inc_refcounts(bs, res, refcount_table, nb_clusters,
+                        s->refcount_table_offset,
+                        s->refcount_table_size * sizeof(uint64_t));
    if (ret < 0) {
        return ret;
    }
@@ -2189,8 +2062,6 @@ write_refblocks:
             * this will leak that range, but we can easily fix that by running
             * a leak-fixing check after this rebuild operation */
            reftable_offset = -1;
-        } else {
-            assert(on_disk_reftable);
        }
        on_disk_reftable[refblock_index] = refblock_offset;

@@ -2260,6 +2131,8 @@ write_refblocks:
        goto write_refblocks;
    }

+    assert(on_disk_reftable);
+
    for (refblock_index = 0; refblock_index < reftable_size; refblock_index++) {
        cpu_to_be64s(&on_disk_reftable[refblock_index]);
    }
@@ -2298,7 +2171,6 @@ write_refblocks:
    s->refcount_table = on_disk_reftable;
    s->refcount_table_offset = reftable_offset;
    s->refcount_table_size = reftable_size;
-    update_max_refcount_table_index(s);

    return 0;

@@ -2511,11 +2383,7 @@ int qcow2_check_metadata_overlap(BlockDriverState *bs, int ign, int64_t offset,
    }

    if ((chk & QCOW2_OL_REFCOUNT_BLOCK) && s->refcount_table) {
-        unsigned last_entry = s->max_refcount_table_index;
-        assert(last_entry < s->refcount_table_size);
-        assert(last_entry + 1 == s->refcount_table_size ||
-               (s->refcount_table[last_entry + 1] & REFT_OFFSET_MASK) == 0);
-        for (i = 0; i <= last_entry; i++) {
+        for (i = 0; i < s->refcount_table_size; i++) {
            if ((s->refcount_table[i] & REFT_OFFSET_MASK) &&
                overlaps_with(s->refcount_table[i] & REFT_OFFSET_MASK,
                s->cluster_size)) {
@@ -3003,7 +2871,6 @@ int qcow2_change_refcount_order(BlockDriverState *bs, int refcount_order,
    /* Now update the rest of the in-memory information */
    old_reftable = s->refcount_table;
    s->refcount_table = new_reftable;
-    update_max_refcount_table_index(s);

    s->refcount_bits = 1 << refcount_order;
    s->refcount_max = UINT64_C(1) << (s->refcount_bits - 1);
--- a/block/qcow2-snapshot.c
+++ b/block/qcow2-snapshot.c
@@ -440,9 +440,10 @@ int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)

    /* The VM state isn't needed any more in the active L1 table; in fact, it
     * hurts by causing expensive COW for the next snapshot. */
-    qcow2_cluster_discard(bs, qcow2_vm_state_offset(s),
-                          align_offset(sn->vm_state_size, s->cluster_size),
-                          QCOW2_DISCARD_NEVER, false);
+    qcow2_discard_clusters(bs, qcow2_vm_state_offset(s),
+                           align_offset(sn->vm_state_size, s->cluster_size)
+                                >> BDRV_SECTOR_BITS,
+                           QCOW2_DISCARD_NEVER, false);

 #ifdef DEBUG_ALLOC
    {
--- a/block/qcow2.c
+++ b/block/qcow2.c
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -25,7 +25,7 @@
 #ifndef BLOCK_QCOW2_H
 #define BLOCK_QCOW2_H

-#include "crypto/block.h"
+#include "crypto/cipher.h"
 #include "qemu/coroutine.h"

 //#define DEBUG_ALLOC
@@ -36,7 +36,6 @@

 #define QCOW_CRYPT_NONE 0
 #define QCOW_CRYPT_AES  1
-#define QCOW_CRYPT_LUKS 2

 #define QCOW_MAX_CRYPT_CLUSTERS 32
 #define QCOW_MAX_SNAPSHOTS 65536
@@ -53,10 +52,6 @@
 * space for snapshot names and IDs */
 #define QCOW_MAX_SNAPSHOTS_SIZE (1024 * QCOW_MAX_SNAPSHOTS)

-/* Bitmap header extension constraints */
-#define QCOW2_MAX_BITMAPS 65535
-#define QCOW2_MAX_BITMAP_DIRECTORY_SIZE (1024 * QCOW2_MAX_BITMAPS)
-
 /* indicate that the refcount of the referenced cluster is exactly one. */
 #define QCOW_OFLAG_COPIED     (1ULL << 63)
 /* indicate that the cluster is compressed (they never have the copied flag) */
@@ -168,11 +163,6 @@ typedef struct QCowSnapshot {
 struct Qcow2Cache;
 typedef struct Qcow2Cache Qcow2Cache;

-typedef struct Qcow2CryptoHeaderExtension {
-    uint64_t offset;
-    uint64_t length;
-} QEMU_PACKED Qcow2CryptoHeaderExtension;
-
 typedef struct Qcow2UnknownHeaderExtension {
    uint32_t magic;
    uint32_t len;
@@ -205,14 +195,6 @@ enum {
    QCOW2_COMPAT_FEAT_MASK            = QCOW2_COMPAT_LAZY_REFCOUNTS,
 };

-/* Autoclear feature bits */
-enum {
-    QCOW2_AUTOCLEAR_BITMAPS_BITNR = 0,
-    QCOW2_AUTOCLEAR_BITMAPS       = 1 << QCOW2_AUTOCLEAR_BITMAPS_BITNR,
-
-    QCOW2_AUTOCLEAR_MASK          = QCOW2_AUTOCLEAR_BITMAPS,
-};
-
 enum qcow2_discard_type {
    QCOW2_DISCARD_NEVER = 0,
    QCOW2_DISCARD_ALWAYS,
@@ -240,13 +222,6 @@ typedef uint64_t Qcow2GetRefcountFunc(const void *refcount_array,
 typedef void Qcow2SetRefcountFunc(void *refcount_array,
                                  uint64_t index, uint64_t value);

-typedef struct Qcow2BitmapHeaderExt {
-    uint32_t nb_bitmaps;
-    uint32_t reserved32;
-    uint64_t bitmap_directory_size;
-    uint64_t bitmap_directory_offset;
-} QEMU_PACKED Qcow2BitmapHeaderExt;
-
 typedef struct BDRVQcow2State {
    int cluster_bits;
    int cluster_size;
@@ -276,27 +251,18 @@ typedef struct BDRVQcow2State {
    uint64_t *refcount_table;
    uint64_t refcount_table_offset;
    uint32_t refcount_table_size;
-    uint32_t max_refcount_table_index; /* Last used entry in refcount_table */
    uint64_t free_cluster_index;
    uint64_t free_byte_offset;

    CoMutex lock;

-    Qcow2CryptoHeaderExtension crypto_header; /* QCow2 header extension */
-    QCryptoBlockOpenOptions *crypto_opts; /* Disk encryption runtime options */
-    QCryptoBlock *crypto; /* Disk encryption format driver */
-    bool crypt_physical_offset; /* Whether to use virtual or physical offset
-                                   for encryption initialization vector tweak */
+    QCryptoCipher *cipher; /* current cipher, NULL if no key yet */
    uint32_t crypt_method_header;
    uint64_t snapshots_offset;
    int snapshots_size;
    unsigned int nb_snapshots;
    QCowSnapshot *snapshots;

-    uint32_t nb_bitmaps;
-    uint64_t bitmap_directory_size;
-    uint64_t bitmap_directory_offset;
-
    int flags;
    int qcow_version;
    bool use_lazy_refcounts;
@@ -334,10 +300,10 @@ typedef struct Qcow2COWRegion {
     * Offset of the COW region in bytes from the start of the first cluster
     * touched by the request.
     */
-    unsigned    offset;
+    uint64_t    offset;

    /** Number of bytes to copy */
-    unsigned    nb_bytes;
+    int         nb_bytes;
 } Qcow2COWRegion;

 /**
@@ -355,9 +321,6 @@ typedef struct QCowL2Meta
    /** Number of newly allocated clusters */
    int nb_clusters;

-    /** Do not free the old clusters */
-    bool keep_old_clusters;
-
    /**
     * Requests that overlap with this allocation and wait to be restarted
     * when the allocating request has completed.
@@ -376,26 +339,18 @@ typedef struct QCowL2Meta
     */
    Qcow2COWRegion cow_end;

-    /**
-     * The I/O vector with the data from the actual guest write request.
-     * If non-NULL, this is meant to be merged together with the data
-     * from @cow_start and @cow_end into one single write operation.
-     */
-    QEMUIOVector *data_qiov;
-
    /** Pointer to next L2Meta of the same write request */
    struct QCowL2Meta *next;

    QLIST_ENTRY(QCowL2Meta) next_in_flight;
 } QCowL2Meta;

-typedef enum QCow2ClusterType {
+enum {
    QCOW2_CLUSTER_UNALLOCATED,
-    QCOW2_CLUSTER_ZERO_PLAIN,
-    QCOW2_CLUSTER_ZERO_ALLOC,
    QCOW2_CLUSTER_NORMAL,
    QCOW2_CLUSTER_COMPRESSED,
-} QCow2ClusterType;
+    QCOW2_CLUSTER_ZERO
+};

 typedef enum QCow2MetadataOverlap {
    QCOW2_OL_MAIN_HEADER_BITNR    = 0,
@@ -484,15 +439,12 @@ static inline uint64_t qcow2_max_refcount_clusters(BDRVQcow2State *s)
    return QCOW_MAX_REFTABLE_SIZE >> s->cluster_bits;
 }

-static inline QCow2ClusterType qcow2_get_cluster_type(uint64_t l2_entry)
+static inline int qcow2_get_cluster_type(uint64_t l2_entry)
 {
    if (l2_entry & QCOW_OFLAG_COMPRESSED) {
        return QCOW2_CLUSTER_COMPRESSED;
    } else if (l2_entry & QCOW_OFLAG_ZERO) {
-        if (l2_entry & L2E_OFFSET_MASK) {
-            return QCOW2_CLUSTER_ZERO_ALLOC;
-        }
-        return QCOW2_CLUSTER_ZERO_PLAIN;
+        return QCOW2_CLUSTER_ZERO;
    } else if (!(l2_entry & L2E_OFFSET_MASK)) {
        return QCOW2_CLUSTER_UNALLOCATED;
    } else {
@@ -525,10 +477,6 @@ static inline uint64_t refcount_diff(uint64_t r1, uint64_t r2)
 int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov,
                  int64_t sector_num, int nb_sectors);

-int64_t qcow2_refcount_metadata_size(int64_t clusters, size_t cluster_size,
-                                     int refcount_order, bool generous_increase,
-                                     uint64_t *refblock_count);
-
 int qcow2_mark_dirty(BlockDriverState *bs);
 int qcow2_mark_corrupt(BlockDriverState *bs);
 int qcow2_mark_consistent(BlockDriverState *bs);
@@ -549,11 +497,6 @@ int qcow2_update_cluster_refcount(BlockDriverState *bs, int64_t cluster_index,
                                  uint64_t addend, bool decrease,
                                  enum qcow2_discard_type type);

-int64_t qcow2_refcount_area(BlockDriverState *bs, uint64_t offset,
-                            uint64_t additional_clusters, bool exact_size,
-                            int new_refblock_index,
-                            uint64_t new_refblock_offset);
-
 int64_t qcow2_alloc_clusters(BlockDriverState *bs, uint64_t size);
 int64_t qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset,
                                int64_t nb_clusters);
@@ -576,10 +519,6 @@ int qcow2_check_metadata_overlap(BlockDriverState *bs, int ign, int64_t offset,
                                 int64_t size);
 int qcow2_pre_write_overlap_check(BlockDriverState *bs, int ign, int64_t offset,
                                  int64_t size);
-int qcow2_inc_refcounts_imrt(BlockDriverState *bs, BdrvCheckResult *res,
-                             void **refcount_table,
-                             int64_t *refcount_table_size,
-                             int64_t offset, int64_t size);

 int qcow2_change_refcount_order(BlockDriverState *bs, int refcount_order,
                                BlockDriverAmendStatusCB *status_cb,
@@ -591,7 +530,8 @@ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
 int qcow2_write_l1_entry(BlockDriverState *bs, int l1_index);
 int qcow2_decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset);
 int qcow2_encrypt_sectors(BDRVQcow2State *s, int64_t sector_num,
-                          uint8_t *buf, int nb_sectors, bool enc, Error **errp);
+                          uint8_t *out_buf, const uint8_t *in_buf,
+                          int nb_sectors, bool enc, Error **errp);

 int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
                             unsigned int *bytes, uint64_t *cluster_offset);
@@ -603,11 +543,10 @@ uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
                                         int compressed_size);

 int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m);
-int qcow2_cluster_discard(BlockDriverState *bs, uint64_t offset,
-                          uint64_t bytes, enum qcow2_discard_type type,
-                          bool full_discard);
-int qcow2_cluster_zeroize(BlockDriverState *bs, uint64_t offset,
-                          uint64_t bytes, int flags);
+int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset,
+    int nb_sectors, enum qcow2_discard_type type, bool full_discard);
+int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors,
+                        int flags);

 int qcow2_expand_zero_clusters(BlockDriverState *bs,
                               BlockDriverAmendStatusCB *status_cb,
@@ -650,20 +589,4 @@ int qcow2_cache_get_empty(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
    void **table);
 void qcow2_cache_put(BlockDriverState *bs, Qcow2Cache *c, void **table);

-/* qcow2-bitmap.c functions */
-int qcow2_check_bitmaps_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
-                                  void **refcount_table,
-                                  int64_t *refcount_table_size);
-bool qcow2_load_autoloading_dirty_bitmaps(BlockDriverState *bs, Error **errp);
-int qcow2_reopen_bitmaps_rw(BlockDriverState *bs, Error **errp);
-void qcow2_store_persistent_dirty_bitmaps(BlockDriverState *bs, Error **errp);
-int qcow2_reopen_bitmaps_ro(BlockDriverState *bs, Error **errp);
-bool qcow2_can_store_new_dirty_bitmap(BlockDriverState *bs,
-                                      const char *name,
-                                      uint32_t granularity,
-                                      Error **errp);
-void qcow2_remove_persistent_dirty_bitmap(BlockDriverState *bs,
-                                          const char *name,
-                                          Error **errp);
-
 #endif
--- a/block/qed-cluster.c
+++ b/block/qed-cluster.c
@@ -61,65 +61,36 @@ static unsigned int qed_count_contiguous_clusters(BDRVQEDState *s,
    return i - index;
 }

-/**
- * Find the offset of a data cluster
- *
- * @s:          QED state
- * @request:    L2 cache entry
- * @pos:        Byte position in device
- * @len:        Number of bytes (may be shortened on return)
- * @img_offset: Contains offset in the image file on success
- *
- * This function translates a position in the block device to an offset in the
- * image file. The translated offset or unallocated range in the image file is
- * reported back in *img_offset and *len.
- *
- * If the L2 table exists, request->l2_table points to the L2 table cache entry
- * and the caller must free the reference when they are finished.  The cache
- * entry is exposed in this way to avoid callers having to read the L2 table
- * again later during request processing.  If request->l2_table is non-NULL it
- * will be unreferenced before taking on the new cache entry.
- *
- * On success QED_CLUSTER_FOUND is returned and img_offset/len are a contiguous
- * range in the image file.
- *
- * On failure QED_CLUSTER_L2 or QED_CLUSTER_L1 is returned for missing L2 or L1
- * table offset, respectively. len is number of contiguous unallocated bytes.
- *
- * Called with table_lock held.
- */
-int coroutine_fn qed_find_cluster(BDRVQEDState *s, QEDRequest *request,
-                                  uint64_t pos, size_t *len,
-                                  uint64_t *img_offset)
+typedef struct {
+    BDRVQEDState *s;
+    uint64_t pos;
+    size_t len;
+
+    QEDRequest *request;
+
+    /* User callback */
+    QEDFindClusterFunc *cb;
+    void *opaque;
+} QEDFindClusterCB;
+
+static void qed_find_cluster_cb(void *opaque, int ret)
 {
-    uint64_t l2_offset;
+    QEDFindClusterCB *find_cluster_cb = opaque;
+    BDRVQEDState *s = find_cluster_cb->s;
+    QEDRequest *request = find_cluster_cb->request;
    uint64_t offset = 0;
+    size_t len = 0;
    unsigned int index;
    unsigned int n;
-    int ret;

-    /* Limit length to L2 boundary.  Requests are broken up at the L2 boundary
-     * so that a request acts on one L2 table at a time.
-     */
-    *len = MIN(*len, (((pos >> s->l1_shift) + 1) << s->l1_shift) - pos);
-
-    l2_offset = s->l1_table->offsets[qed_l1_index(s, pos)];
-    if (qed_offset_is_unalloc_cluster(l2_offset)) {
-        *img_offset = 0;
-        return QED_CLUSTER_L1;
-    }
-    if (!qed_check_table_offset(s, l2_offset)) {
-        *img_offset = *len = 0;
-        return -EINVAL;
-    }
-
-    ret = qed_read_l2_table(s, request, l2_offset);
    if (ret) {
        goto out;
    }

-    index = qed_l2_index(s, pos);
-    n = qed_bytes_to_clusters(s, qed_offset_into_cluster(s, pos) + *len);
+    index = qed_l2_index(s, find_cluster_cb->pos);
+    n = qed_bytes_to_clusters(s,
+                              qed_offset_into_cluster(s, find_cluster_cb->pos) +
+                              find_cluster_cb->len);
    n = qed_count_contiguous_clusters(s, request->l2_table->table,
                                      index, n, &offset);

@@ -133,10 +104,63 @@ int coroutine_fn qed_find_cluster(BDRVQEDState *s, QEDRequest *request,
        ret = -EINVAL;
    }

-    *len = MIN(*len,
-               n * s->header.cluster_size - qed_offset_into_cluster(s, pos));
+    len = MIN(find_cluster_cb->len, n * s->header.cluster_size -
+              qed_offset_into_cluster(s, find_cluster_cb->pos));

 out:
-    *img_offset = offset;
-    return ret;
+    find_cluster_cb->cb(find_cluster_cb->opaque, ret, offset, len);
+    g_free(find_cluster_cb);
+}
+
+/**
+ * Find the offset of a data cluster
+ *
+ * @s:          QED state
+ * @request:    L2 cache entry
+ * @pos:        Byte position in device
+ * @len:        Number of bytes
+ * @cb:         Completion function
+ * @opaque:     User data for completion function
+ *
+ * This function translates a position in the block device to an offset in the
+ * image file.  It invokes the cb completion callback to report back the
+ * translated offset or unallocated range in the image file.
+ *
+ * If the L2 table exists, request->l2_table points to the L2 table cache entry
+ * and the caller must free the reference when they are finished.  The cache
+ * entry is exposed in this way to avoid callers having to read the L2 table
+ * again later during request processing.  If request->l2_table is non-NULL it
+ * will be unreferenced before taking on the new cache entry.
+ */
+void qed_find_cluster(BDRVQEDState *s, QEDRequest *request, uint64_t pos,
+                      size_t len, QEDFindClusterFunc *cb, void *opaque)
+{
+    QEDFindClusterCB *find_cluster_cb;
+    uint64_t l2_offset;
+
+    /* Limit length to L2 boundary.  Requests are broken up at the L2 boundary
+     * so that a request acts on one L2 table at a time.
+     */
+    len = MIN(len, (((pos >> s->l1_shift) + 1) << s->l1_shift) - pos);
+
+    l2_offset = s->l1_table->offsets[qed_l1_index(s, pos)];
+    if (qed_offset_is_unalloc_cluster(l2_offset)) {
+        cb(opaque, QED_CLUSTER_L1, 0, len);
+        return;
+    }
+    if (!qed_check_table_offset(s, l2_offset)) {
+        cb(opaque, -EINVAL, 0, 0);
+        return;
+    }
+
+    find_cluster_cb = g_malloc(sizeof(*find_cluster_cb));
+    find_cluster_cb->s = s;
+    find_cluster_cb->pos = pos;
+    find_cluster_cb->len = len;
+    find_cluster_cb->cb = cb;
+    find_cluster_cb->opaque = opaque;
+    find_cluster_cb->request = request;
+
+    qed_read_l2_table(s, request, l2_offset,
+                      qed_find_cluster_cb, find_cluster_cb);
 }
--- a/block/qed-gencb.c
+++ b/block/qed-gencb.c
@@ -0,0 +1,33 @@
+/*
+ * QEMU Enhanced Disk Format
+ *
+ * Copyright IBM, Corp. 2010
+ *
+ * Authors:
+ *  Stefan Hajnoczi   <stefanha@linux.vnet.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qed.h"
+
+void *gencb_alloc(size_t len, BlockCompletionFunc *cb, void *opaque)
+{
+    GenericCB *gencb = g_malloc(len);
+    gencb->cb = cb;
+    gencb->opaque = opaque;
+    return gencb;
+}
+
+void gencb_complete(void *opaque, int ret)
+{
+    GenericCB *gencb = opaque;
+    BlockCompletionFunc *cb = gencb->cb;
+    void *user_opaque = gencb->opaque;
+
+    g_free(gencb);
+    cb(user_opaque, ret);
+}
--- a/block/qed-l2-cache.c
+++ b/block/qed-l2-cache.c
@@ -101,8 +101,6 @@ CachedL2Table *qed_alloc_l2_cache_entry(L2TableCache *l2_cache)
 /**
 * Decrease an entry's reference count and free if necessary when the reference
 * count drops to zero.
- *
- * Called with table_lock held.
 */
 void qed_unref_l2_cache_entry(CachedL2Table *entry)
 {
@@ -124,8 +122,6 @@ void qed_unref_l2_cache_entry(CachedL2Table *entry)
 *
 * For a cached entry, this function increases the reference count and returns
 * the entry.
- *
- * Called with table_lock held.
 */
 CachedL2Table *qed_find_l2_cache_entry(L2TableCache *l2_cache, uint64_t offset)
 {
@@ -154,8 +150,6 @@ CachedL2Table *qed_find_l2_cache_entry(L2TableCache *l2_cache, uint64_t offset)
 * N.B. This function steals a reference to the l2_table from the caller so the
 * caller must obtain a new reference by issuing a call to
 * qed_find_l2_cache_entry().
- *
- * Called with table_lock held.
 */
 void qed_commit_l2_cache_entry(L2TableCache *l2_cache, CachedL2Table *l2_table)
 {
--- a/block/qed-table.c
+++ b/block/qed-table.c
@@ -18,43 +18,93 @@
 #include "qed.h"
 #include "qemu/bswap.h"

-/* Called either from qed_check or with table_lock held.  */
-static int qed_read_table(BDRVQEDState *s, uint64_t offset, QEDTable *table)
-{
+typedef struct {
+    GenericCB gencb;
+    BDRVQEDState *s;
+    QEDTable *table;
+
+    struct iovec iov;
    QEMUIOVector qiov;
-    int noffsets;
-    int i, ret;
+} QEDReadTableCB;

-    struct iovec iov = {
-        .iov_base = table->offsets,
-        .iov_len = s->header.cluster_size * s->header.table_size,
-    };
-    qemu_iovec_init_external(&qiov, &iov, 1);
+static void qed_read_table_cb(void *opaque, int ret)
+{
+    QEDReadTableCB *read_table_cb = opaque;
+    QEDTable *table = read_table_cb->table;
+    int noffsets = read_table_cb->qiov.size / sizeof(uint64_t);
+    int i;

-    trace_qed_read_table(s, offset, table);
-
-    if (qemu_in_coroutine()) {
-        qemu_co_mutex_unlock(&s->table_lock);
-    }
-    ret = bdrv_preadv(s->bs->file, offset, &qiov);
-    if (qemu_in_coroutine()) {
-        qemu_co_mutex_lock(&s->table_lock);
-    }
-    if (ret < 0) {
+    /* Handle I/O error */
+    if (ret) {
        goto out;
    }

    /* Byteswap offsets */
-    noffsets = qiov.size / sizeof(uint64_t);
    for (i = 0; i < noffsets; i++) {
        table->offsets[i] = le64_to_cpu(table->offsets[i]);
    }

-    ret = 0;
 out:
    /* Completion */
-    trace_qed_read_table_cb(s, table, ret);
-    return ret;
+    trace_qed_read_table_cb(read_table_cb->s, read_table_cb->table, ret);
+    gencb_complete(&read_table_cb->gencb, ret);
+}
+
+static void qed_read_table(BDRVQEDState *s, uint64_t offset, QEDTable *table,
+                           BlockCompletionFunc *cb, void *opaque)
+{
+    QEDReadTableCB *read_table_cb = gencb_alloc(sizeof(*read_table_cb),
+                                                cb, opaque);
+    QEMUIOVector *qiov = &read_table_cb->qiov;
+
+    trace_qed_read_table(s, offset, table);
+
+    read_table_cb->s = s;
+    read_table_cb->table = table;
+    read_table_cb->iov.iov_base = table->offsets,
+    read_table_cb->iov.iov_len = s->header.cluster_size * s->header.table_size,
+
+    qemu_iovec_init_external(qiov, &read_table_cb->iov, 1);
+    bdrv_aio_readv(s->bs->file, offset / BDRV_SECTOR_SIZE, qiov,
+                   qiov->size / BDRV_SECTOR_SIZE,
+                   qed_read_table_cb, read_table_cb);
+}
+
+typedef struct {
+    GenericCB gencb;
+    BDRVQEDState *s;
+    QEDTable *orig_table;
+    QEDTable *table;
+    bool flush;             /* flush after write? */
+
+    struct iovec iov;
+    QEMUIOVector qiov;
+} QEDWriteTableCB;
+
+static void qed_write_table_cb(void *opaque, int ret)
+{
+    QEDWriteTableCB *write_table_cb = opaque;
+
+    trace_qed_write_table_cb(write_table_cb->s,
+                             write_table_cb->orig_table,
+                             write_table_cb->flush,
+                             ret);
+
+    if (ret) {
+        goto out;
+    }
+
+    if (write_table_cb->flush) {
+        /* We still need to flush first */
+        write_table_cb->flush = false;
+        bdrv_aio_flush(write_table_cb->s->bs, qed_write_table_cb,
+                       write_table_cb);
+        return;
+    }
+
+out:
+    qemu_vfree(write_table_cb->table);
+    gencb_complete(&write_table_cb->gencb, ret);
 }

 /**
@@ -66,19 +116,17 @@ out:
 * @index:      Index of first element
 * @n:          Number of elements
 * @flush:      Whether or not to sync to disk
- *
- * Called either from qed_check or with table_lock held.
+ * @cb:         Completion function
+ * @opaque:     Argument for completion function
 */
-static int qed_write_table(BDRVQEDState *s, uint64_t offset, QEDTable *table,
-                           unsigned int index, unsigned int n, bool flush)
+static void qed_write_table(BDRVQEDState *s, uint64_t offset, QEDTable *table,
+                            unsigned int index, unsigned int n, bool flush,
+                            BlockCompletionFunc *cb, void *opaque)
 {
+    QEDWriteTableCB *write_table_cb;
    unsigned int sector_mask = BDRV_SECTOR_SIZE / sizeof(uint64_t) - 1;
    unsigned int start, end, i;
-    QEDTable *new_table;
-    struct iovec iov;
-    QEMUIOVector qiov;
    size_t len_bytes;
-    int ret;

    trace_qed_write_table(s, offset, table, index, n);

@@ -88,120 +136,155 @@ static int qed_write_table(BDRVQEDState *s, uint64_t offset, QEDTable *table,

    len_bytes = (end - start) * sizeof(uint64_t);

-    new_table = qemu_blockalign(s->bs, len_bytes);
-    iov = (struct iovec) {
-        .iov_base = new_table->offsets,
-        .iov_len = len_bytes,
-    };
-    qemu_iovec_init_external(&qiov, &iov, 1);
+    write_table_cb = gencb_alloc(sizeof(*write_table_cb), cb, opaque);
+    write_table_cb->s = s;
+    write_table_cb->orig_table = table;
+    write_table_cb->flush = flush;
+    write_table_cb->table = qemu_blockalign(s->bs, len_bytes);
+    write_table_cb->iov.iov_base = write_table_cb->table->offsets;
+    write_table_cb->iov.iov_len = len_bytes;
+    qemu_iovec_init_external(&write_table_cb->qiov, &write_table_cb->iov, 1);

    /* Byteswap table */
    for (i = start; i < end; i++) {
        uint64_t le_offset = cpu_to_le64(table->offsets[i]);
-        new_table->offsets[i - start] = le_offset;
+        write_table_cb->table->offsets[i - start] = le_offset;
    }

    /* Adjust for offset into table */
    offset += start * sizeof(uint64_t);

-    if (qemu_in_coroutine()) {
-        qemu_co_mutex_unlock(&s->table_lock);
-    }
-    ret = bdrv_pwritev(s->bs->file, offset, &qiov);
-    if (qemu_in_coroutine()) {
-        qemu_co_mutex_lock(&s->table_lock);
-    }
-    trace_qed_write_table_cb(s, table, flush, ret);
-    if (ret < 0) {
-        goto out;
-    }
+    bdrv_aio_writev(s->bs->file, offset / BDRV_SECTOR_SIZE,
+                    &write_table_cb->qiov,
+                    write_table_cb->qiov.size / BDRV_SECTOR_SIZE,
+                    qed_write_table_cb, write_table_cb);
+}

-    if (flush) {
-        ret = bdrv_flush(s->bs);
-        if (ret < 0) {
-            goto out;
-        }
-    }
-
-    ret = 0;
-out:
-    qemu_vfree(new_table);
-    return ret;
+/**
+ * Propagate return value from async callback
+ */
+static void qed_sync_cb(void *opaque, int ret)
+{
+    *(int *)opaque = ret;
 }

 int qed_read_l1_table_sync(BDRVQEDState *s)
 {
-    return qed_read_table(s, s->header.l1_table_offset, s->l1_table);
+    int ret = -EINPROGRESS;
+
+    qed_read_table(s, s->header.l1_table_offset,
+                   s->l1_table, qed_sync_cb, &ret);
+    BDRV_POLL_WHILE(s->bs, ret == -EINPROGRESS);
+
+    return ret;
 }

-/* Called either from qed_check or with table_lock held.  */
-int qed_write_l1_table(BDRVQEDState *s, unsigned int index, unsigned int n)
+void qed_write_l1_table(BDRVQEDState *s, unsigned int index, unsigned int n,
+                        BlockCompletionFunc *cb, void *opaque)
 {
    BLKDBG_EVENT(s->bs->file, BLKDBG_L1_UPDATE);
-    return qed_write_table(s, s->header.l1_table_offset,
-                           s->l1_table, index, n, false);
+    qed_write_table(s, s->header.l1_table_offset,
+                    s->l1_table, index, n, false, cb, opaque);
 }

 int qed_write_l1_table_sync(BDRVQEDState *s, unsigned int index,
                            unsigned int n)
 {
-    return qed_write_l1_table(s, index, n);
+    int ret = -EINPROGRESS;
+
+    qed_write_l1_table(s, index, n, qed_sync_cb, &ret);
+    BDRV_POLL_WHILE(s->bs, ret == -EINPROGRESS);
+
+    return ret;
 }

-/* Called either from qed_check or with table_lock held.  */
-int qed_read_l2_table(BDRVQEDState *s, QEDRequest *request, uint64_t offset)
+typedef struct {
+    GenericCB gencb;
+    BDRVQEDState *s;
+    uint64_t l2_offset;
+    QEDRequest *request;
+} QEDReadL2TableCB;
+
+static void qed_read_l2_table_cb(void *opaque, int ret)
 {
-    int ret;
+    QEDReadL2TableCB *read_l2_table_cb = opaque;
+    QEDRequest *request = read_l2_table_cb->request;
+    BDRVQEDState *s = read_l2_table_cb->s;
+    CachedL2Table *l2_table = request->l2_table;
+    uint64_t l2_offset = read_l2_table_cb->l2_offset;
+
+    if (ret) {
+        /* can't trust loaded L2 table anymore */
+        qed_unref_l2_cache_entry(l2_table);
+        request->l2_table = NULL;
+    } else {
+        l2_table->offset = l2_offset;
+
+        qed_commit_l2_cache_entry(&s->l2_cache, l2_table);
+
+        /* This is guaranteed to succeed because we just committed the entry
+         * to the cache.
+         */
+        request->l2_table = qed_find_l2_cache_entry(&s->l2_cache, l2_offset);
+        assert(request->l2_table != NULL);
+    }
+
+    gencb_complete(&read_l2_table_cb->gencb, ret);
+}
+
+void qed_read_l2_table(BDRVQEDState *s, QEDRequest *request, uint64_t offset,
+                       BlockCompletionFunc *cb, void *opaque)
+{
+    QEDReadL2TableCB *read_l2_table_cb;

    qed_unref_l2_cache_entry(request->l2_table);

    /* Check for cached L2 entry */
    request->l2_table = qed_find_l2_cache_entry(&s->l2_cache, offset);
    if (request->l2_table) {
-        return 0;
+        cb(opaque, 0);
+        return;
    }

    request->l2_table = qed_alloc_l2_cache_entry(&s->l2_cache);
    request->l2_table->table = qed_alloc_table(s);

+    read_l2_table_cb = gencb_alloc(sizeof(*read_l2_table_cb), cb, opaque);
+    read_l2_table_cb->s = s;
+    read_l2_table_cb->l2_offset = offset;
+    read_l2_table_cb->request = request;
+
    BLKDBG_EVENT(s->bs->file, BLKDBG_L2_LOAD);
-    ret = qed_read_table(s, offset, request->l2_table->table);
-
-    if (ret) {
-        /* can't trust loaded L2 table anymore */
-        qed_unref_l2_cache_entry(request->l2_table);
-        request->l2_table = NULL;
-    } else {
-        request->l2_table->offset = offset;
-
-        qed_commit_l2_cache_entry(&s->l2_cache, request->l2_table);
-
-        /* This is guaranteed to succeed because we just committed the entry
-         * to the cache.
-         */
-        request->l2_table = qed_find_l2_cache_entry(&s->l2_cache, offset);
-        assert(request->l2_table != NULL);
-    }
-
-    return ret;
+    qed_read_table(s, offset, request->l2_table->table,
+                   qed_read_l2_table_cb, read_l2_table_cb);
 }

 int qed_read_l2_table_sync(BDRVQEDState *s, QEDRequest *request, uint64_t offset)
 {
-    return qed_read_l2_table(s, request, offset);
+    int ret = -EINPROGRESS;
+
+    qed_read_l2_table(s, request, offset, qed_sync_cb, &ret);
+    BDRV_POLL_WHILE(s->bs, ret == -EINPROGRESS);
+
+    return ret;
 }

-/* Called either from qed_check or with table_lock held.  */
-int qed_write_l2_table(BDRVQEDState *s, QEDRequest *request,
-                       unsigned int index, unsigned int n, bool flush)
+void qed_write_l2_table(BDRVQEDState *s, QEDRequest *request,
+                        unsigned int index, unsigned int n, bool flush,
+                        BlockCompletionFunc *cb, void *opaque)
 {
    BLKDBG_EVENT(s->bs->file, BLKDBG_L2_UPDATE);
-    return qed_write_table(s, request->l2_table->offset,
-                           request->l2_table->table, index, n, flush);
+    qed_write_table(s, request->l2_table->offset,
+                    request->l2_table->table, index, n, flush, cb, opaque);
 }

 int qed_write_l2_table_sync(BDRVQEDState *s, QEDRequest *request,
                            unsigned int index, unsigned int n, bool flush)
 {
-    return qed_write_l2_table(s, request, index, n, flush);
+    int ret = -EINPROGRESS;
+
+    qed_write_l2_table(s, request, index, n, flush, qed_sync_cb, &ret);
+    BDRV_POLL_WHILE(s->bs, ret == -EINPROGRESS);
+
+    return ret;
 }
--- a/block/qed.c
+++ b/block/qed.c
--- a/block/qed.h
+++ b/block/qed.h
@@ -129,7 +129,8 @@ enum {
 };

 typedef struct QEDAIOCB {
-    BlockDriverState *bs;
+    BlockAIOCB common;
+    int bh_ret;                     /* final return status for completion bh */
    QSIMPLEQ_ENTRY(QEDAIOCB) next;  /* next request */
    int flags;                      /* QED_AIOCB_* bits ORed together */
    uint64_t end_pos;               /* request end on block device, in bytes */
@@ -151,25 +152,18 @@ typedef struct QEDAIOCB {

 typedef struct {
    BlockDriverState *bs;           /* device */
+    uint64_t file_size;             /* length of image file, in bytes */

-    /* Written only by an allocating write or the timer handler (the latter
-     * while allocating reqs are plugged).
-     */
    QEDHeader header;               /* always cpu-endian */
-
-    /* Protected by table_lock.  */
-    CoMutex table_lock;
    QEDTable *l1_table;
    L2TableCache l2_cache;          /* l2 table cache */
    uint32_t table_nelems;
    uint32_t l1_shift;
    uint32_t l2_shift;
    uint32_t l2_mask;
-    uint64_t file_size;             /* length of image file, in bytes */

    /* Allocating write request queue */
-    QEDAIOCB *allocating_acb;
-    CoQueue allocating_write_reqs;
+    QSIMPLEQ_HEAD(, QEDAIOCB) allocating_write_reqs;
    bool allocating_write_reqs_plugged;

    /* Periodic flush and clear need check flag */
@@ -183,6 +177,38 @@ enum {
    QED_CLUSTER_L1,            /* cluster missing in L1 */
 };

+/**
+ * qed_find_cluster() completion callback
+ *
+ * @opaque:     User data for completion callback
+ * @ret:        QED_CLUSTER_FOUND   Success
+ *              QED_CLUSTER_L2      Data cluster unallocated in L2
+ *              QED_CLUSTER_L1      L2 unallocated in L1
+ *              -errno              POSIX error occurred
+ * @offset:     Data cluster offset
+ * @len:        Contiguous bytes starting from cluster offset
+ *
+ * This function is invoked when qed_find_cluster() completes.
+ *
+ * On success ret is QED_CLUSTER_FOUND and offset/len are a contiguous range
+ * in the image file.
+ *
+ * On failure ret is QED_CLUSTER_L2 or QED_CLUSTER_L1 for missing L2 or L1
+ * table offset, respectively.  len is number of contiguous unallocated bytes.
+ */
+typedef void QEDFindClusterFunc(void *opaque, int ret, uint64_t offset, size_t len);
+
+/**
+ * Generic callback for chaining async callbacks
+ */
+typedef struct {
+    BlockCompletionFunc *cb;
+    void *opaque;
+} GenericCB;
+
+void *gencb_alloc(size_t len, BlockCompletionFunc *cb, void *opaque);
+void gencb_complete(void *opaque, int ret);
+
 /**
 * Header functions
 */
@@ -202,23 +228,25 @@ void qed_commit_l2_cache_entry(L2TableCache *l2_cache, CachedL2Table *l2_table);
 * Table I/O functions
 */
 int qed_read_l1_table_sync(BDRVQEDState *s);
-int qed_write_l1_table(BDRVQEDState *s, unsigned int index, unsigned int n);
+void qed_write_l1_table(BDRVQEDState *s, unsigned int index, unsigned int n,
+                        BlockCompletionFunc *cb, void *opaque);
 int qed_write_l1_table_sync(BDRVQEDState *s, unsigned int index,
                            unsigned int n);
 int qed_read_l2_table_sync(BDRVQEDState *s, QEDRequest *request,
                           uint64_t offset);
-int qed_read_l2_table(BDRVQEDState *s, QEDRequest *request, uint64_t offset);
-int qed_write_l2_table(BDRVQEDState *s, QEDRequest *request,
-                       unsigned int index, unsigned int n, bool flush);
+void qed_read_l2_table(BDRVQEDState *s, QEDRequest *request, uint64_t offset,
+                       BlockCompletionFunc *cb, void *opaque);
+void qed_write_l2_table(BDRVQEDState *s, QEDRequest *request,
+                        unsigned int index, unsigned int n, bool flush,
+                        BlockCompletionFunc *cb, void *opaque);
 int qed_write_l2_table_sync(BDRVQEDState *s, QEDRequest *request,
                            unsigned int index, unsigned int n, bool flush);

 /**
 * Cluster functions
 */
-int coroutine_fn qed_find_cluster(BDRVQEDState *s, QEDRequest *request,
-                                  uint64_t pos, size_t *len,
-                                  uint64_t *img_offset);
+void qed_find_cluster(BDRVQEDState *s, QEDRequest *request, uint64_t pos,
+                      size_t len, QEDFindClusterFunc *cb, void *opaque);

 /**
 * Consistency check
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -19,6 +19,7 @@
 #include "qapi/qmp/qbool.h"
 #include "qapi/qmp/qdict.h"
 #include "qapi/qmp/qerror.h"
+#include "qapi/qmp/qint.h"
 #include "qapi/qmp/qjson.h"
 #include "qapi/qmp/qlist.h"
 #include "qapi/qmp/qstring.h"
@@ -96,7 +97,7 @@ typedef struct QuorumAIOCB QuorumAIOCB;
 * $children_count QuorumChildRequest.
 */
 typedef struct QuorumChildRequest {
-    BlockDriverState *bs;
+    BlockAIOCB *aiocb;
    QEMUIOVector qiov;
    uint8_t *buf;
    int ret;
@@ -109,12 +110,11 @@ typedef struct QuorumChildRequest {
 * used to do operations on each children and track overall progress.
 */
 struct QuorumAIOCB {
-    BlockDriverState *bs;
-    Coroutine *co;
+    BlockAIOCB common;

    /* Request metadata */
-    uint64_t offset;
-    uint64_t bytes;
+    uint64_t sector_num;
+    int nb_sectors;

    QEMUIOVector *qiov;         /* calling IOV */

@@ -133,15 +133,32 @@ struct QuorumAIOCB {
    int children_read;          /* how many children have been read from */
 };

-typedef struct QuorumCo {
-    QuorumAIOCB *acb;
-    int idx;
-} QuorumCo;
+static bool quorum_vote(QuorumAIOCB *acb);
+
+static void quorum_aio_cancel(BlockAIOCB *blockacb)
+{
+    QuorumAIOCB *acb = container_of(blockacb, QuorumAIOCB, common);
+    BDRVQuorumState *s = acb->common.bs->opaque;
+    int i;
+
+    /* cancel all callbacks */
+    for (i = 0; i < s->num_children; i++) {
+        if (acb->qcrs[i].aiocb) {
+            bdrv_aio_cancel_async(acb->qcrs[i].aiocb);
+        }
+    }
+}
+
+static AIOCBInfo quorum_aiocb_info = {
+    .aiocb_size         = sizeof(QuorumAIOCB),
+    .cancel_async       = quorum_aio_cancel,
+};

 static void quorum_aio_finalize(QuorumAIOCB *acb)
 {
+    acb->common.cb(acb->common.opaque, acb->vote_ret);
    g_free(acb->qcrs);
-    g_free(acb);
+    qemu_aio_unref(acb);
 }

 static bool quorum_sha256_compare(QuorumVoteValue *a, QuorumVoteValue *b)
@@ -154,26 +171,30 @@ static bool quorum_64bits_compare(QuorumVoteValue *a, QuorumVoteValue *b)
    return a->l == b->l;
 }

-static QuorumAIOCB *quorum_aio_get(BlockDriverState *bs,
+static QuorumAIOCB *quorum_aio_get(BDRVQuorumState *s,
+                                   BlockDriverState *bs,
                                   QEMUIOVector *qiov,
-                                   uint64_t offset,
-                                   uint64_t bytes)
+                                   uint64_t sector_num,
+                                   int nb_sectors,
+                                   BlockCompletionFunc *cb,
+                                   void *opaque)
 {
-    BDRVQuorumState *s = bs->opaque;
-    QuorumAIOCB *acb = g_new(QuorumAIOCB, 1);
+    QuorumAIOCB *acb = qemu_aio_get(&quorum_aiocb_info, bs, cb, opaque);
    int i;

-    *acb = (QuorumAIOCB) {
-        .co                 = qemu_coroutine_self(),
-        .bs                 = bs,
-        .offset             = offset,
-        .bytes              = bytes,
-        .qiov               = qiov,
-        .votes.compare      = quorum_sha256_compare,
-        .votes.vote_list    = QLIST_HEAD_INITIALIZER(acb.votes.vote_list),
-    };
-
+    acb->common.bs->opaque = s;
+    acb->sector_num = sector_num;
+    acb->nb_sectors = nb_sectors;
+    acb->qiov = qiov;
    acb->qcrs = g_new0(QuorumChildRequest, s->num_children);
+    acb->count = 0;
+    acb->success_count = 0;
+    acb->rewrite_count = 0;
+    acb->votes.compare = quorum_sha256_compare;
+    QLIST_INIT(&acb->votes.vote_list);
+    acb->is_read = false;
+    acb->vote_ret = 0;
+
    for (i = 0; i < s->num_children; i++) {
        acb->qcrs[i].buf = NULL;
        acb->qcrs[i].ret = 0;
@@ -183,37 +204,30 @@ static QuorumAIOCB *quorum_aio_get(BlockDriverState *bs,
    return acb;
 }

-static void quorum_report_bad(QuorumOpType type, uint64_t offset,
-                              uint64_t bytes, char *node_name, int ret)
+static void quorum_report_bad(QuorumOpType type, uint64_t sector_num,
+                              int nb_sectors, char *node_name, int ret)
 {
    const char *msg = NULL;
-    int64_t start_sector = offset / BDRV_SECTOR_SIZE;
-    int64_t end_sector = DIV_ROUND_UP(offset + bytes, BDRV_SECTOR_SIZE);
-
    if (ret < 0) {
        msg = strerror(-ret);
    }

-    qapi_event_send_quorum_report_bad(type, !!msg, msg, node_name, start_sector,
-                                      end_sector - start_sector, &error_abort);
+    qapi_event_send_quorum_report_bad(type, !!msg, msg, node_name,
+                                      sector_num, nb_sectors, &error_abort);
 }

 static void quorum_report_failure(QuorumAIOCB *acb)
 {
-    const char *reference = bdrv_get_device_or_node_name(acb->bs);
-    int64_t start_sector = acb->offset / BDRV_SECTOR_SIZE;
-    int64_t end_sector = DIV_ROUND_UP(acb->offset + acb->bytes,
-                                      BDRV_SECTOR_SIZE);
-
-    qapi_event_send_quorum_failure(reference, start_sector,
-                                   end_sector - start_sector, &error_abort);
+    const char *reference = bdrv_get_device_or_node_name(acb->common.bs);
+    qapi_event_send_quorum_failure(reference, acb->sector_num,
+                                   acb->nb_sectors, &error_abort);
 }

 static int quorum_vote_error(QuorumAIOCB *acb);

 static bool quorum_has_too_much_io_failed(QuorumAIOCB *acb)
 {
-    BDRVQuorumState *s = acb->bs->opaque;
+    BDRVQuorumState *s = acb->common.bs->opaque;

    if (acb->success_count < s->threshold) {
        acb->vote_ret = quorum_vote_error(acb);
@@ -224,7 +238,22 @@ static bool quorum_has_too_much_io_failed(QuorumAIOCB *acb)
    return false;
 }

-static int read_fifo_child(QuorumAIOCB *acb);
+static void quorum_rewrite_aio_cb(void *opaque, int ret)
+{
+    QuorumAIOCB *acb = opaque;
+
+    /* one less rewrite to do */
+    acb->rewrite_count--;
+
+    /* wait until all rewrite callbacks have completed */
+    if (acb->rewrite_count) {
+        return;
+    }
+
+    quorum_aio_finalize(acb);
+}
+
+static BlockAIOCB *read_fifo_child(QuorumAIOCB *acb);

 static void quorum_copy_qiov(QEMUIOVector *dest, QEMUIOVector *source)
 {
@@ -243,7 +272,70 @@ static void quorum_report_bad_acb(QuorumChildRequest *sacb, int ret)
 {
    QuorumAIOCB *acb = sacb->parent;
    QuorumOpType type = acb->is_read ? QUORUM_OP_TYPE_READ : QUORUM_OP_TYPE_WRITE;
-    quorum_report_bad(type, acb->offset, acb->bytes, sacb->bs->node_name, ret);
+    quorum_report_bad(type, acb->sector_num, acb->nb_sectors,
+                      sacb->aiocb->bs->node_name, ret);
+}
+
+static void quorum_fifo_aio_cb(void *opaque, int ret)
+{
+    QuorumChildRequest *sacb = opaque;
+    QuorumAIOCB *acb = sacb->parent;
+    BDRVQuorumState *s = acb->common.bs->opaque;
+
+    assert(acb->is_read && s->read_pattern == QUORUM_READ_PATTERN_FIFO);
+
+    if (ret < 0) {
+        quorum_report_bad_acb(sacb, ret);
+
+        /* We try to read next child in FIFO order if we fail to read */
+        if (acb->children_read < s->num_children) {
+            read_fifo_child(acb);
+            return;
+        }
+    }
+
+    acb->vote_ret = ret;
+
+    /* FIXME: rewrite failed children if acb->children_read > 1? */
+    quorum_aio_finalize(acb);
+}
+
+static void quorum_aio_cb(void *opaque, int ret)
+{
+    QuorumChildRequest *sacb = opaque;
+    QuorumAIOCB *acb = sacb->parent;
+    BDRVQuorumState *s = acb->common.bs->opaque;
+    bool rewrite = false;
+    int i;
+
+    sacb->ret = ret;
+    if (ret == 0) {
+        acb->success_count++;
+    } else {
+        quorum_report_bad_acb(sacb, ret);
+    }
+    acb->count++;
+    assert(acb->count <= s->num_children);
+    assert(acb->success_count <= s->num_children);
+    if (acb->count < s->num_children) {
+        return;
+    }
+
+    /* Do the vote on read */
+    if (acb->is_read) {
+        rewrite = quorum_vote(acb);
+        for (i = 0; i < s->num_children; i++) {
+            qemu_vfree(acb->qcrs[i].buf);
+            qemu_iovec_destroy(&acb->qcrs[i].qiov);
+        }
+    } else {
+        quorum_has_too_much_io_failed(acb);
+    }
+
+    /* if no rewrite is done the code will finish right away */
+    if (!rewrite) {
+        quorum_aio_finalize(acb);
+    }
 }

 static void quorum_report_bad_versions(BDRVQuorumState *s,
@@ -258,31 +350,14 @@ static void quorum_report_bad_versions(BDRVQuorumState *s,
            continue;
        }
        QLIST_FOREACH(item, &version->items, next) {
-            quorum_report_bad(QUORUM_OP_TYPE_READ, acb->offset, acb->bytes,
+            quorum_report_bad(QUORUM_OP_TYPE_READ, acb->sector_num,
+                              acb->nb_sectors,
                              s->children[item->index]->bs->node_name, 0);
        }
    }
 }

-static void quorum_rewrite_entry(void *opaque)
-{
-    QuorumCo *co = opaque;
-    QuorumAIOCB *acb = co->acb;
-    BDRVQuorumState *s = acb->bs->opaque;
-
-    /* Ignore any errors, it's just a correction attempt for already
-     * corrupted data. */
-    bdrv_co_pwritev(s->children[co->idx], acb->offset, acb->bytes,
-                    acb->qiov, 0);
-
-    /* Wake up the caller after the last rewrite */
-    acb->rewrite_count--;
-    if (!acb->rewrite_count) {
-        qemu_coroutine_enter_if_inactive(acb->co);
-    }
-}
-
-static bool quorum_rewrite_bad_versions(QuorumAIOCB *acb,
+static bool quorum_rewrite_bad_versions(BDRVQuorumState *s, QuorumAIOCB *acb,
                                        QuorumVoteValue *value)
 {
    QuorumVoteVersion *version;
@@ -301,7 +376,7 @@ static bool quorum_rewrite_bad_versions(QuorumAIOCB *acb,
        }
    }

-    /* quorum_rewrite_entry will count down this to zero */
+    /* quorum_rewrite_aio_cb will count down this to zero */
    acb->rewrite_count = count;

    /* now fire the correcting rewrites */
@@ -310,14 +385,9 @@ static bool quorum_rewrite_bad_versions(QuorumAIOCB *acb,
            continue;
        }
        QLIST_FOREACH(item, &version->items, next) {
-            Coroutine *co;
-            QuorumCo data = {
-                .acb = acb,
-                .idx = item->index,
-            };
-
-            co = qemu_coroutine_create(quorum_rewrite_entry, &data);
-            qemu_coroutine_enter(co);
+            bdrv_aio_writev(s->children[item->index], acb->sector_num,
+                            acb->qiov, acb->nb_sectors, quorum_rewrite_aio_cb,
+                            acb);
        }
    }

@@ -437,8 +507,8 @@ static void GCC_FMT_ATTR(2, 3) quorum_err(QuorumAIOCB *acb,
    va_list ap;

    va_start(ap, fmt);
-    fprintf(stderr, "quorum: offset=%" PRIu64 " bytes=%" PRIu64 " ",
-            acb->offset, acb->bytes);
+    fprintf(stderr, "quorum: sector_num=%" PRId64 " nb_sectors=%d ",
+            acb->sector_num, acb->nb_sectors);
    vfprintf(stderr, fmt, ap);
    fprintf(stderr, "\n");
    va_end(ap);
@@ -449,15 +519,16 @@ static bool quorum_compare(QuorumAIOCB *acb,
                           QEMUIOVector *a,
                           QEMUIOVector *b)
 {
-    BDRVQuorumState *s = acb->bs->opaque;
+    BDRVQuorumState *s = acb->common.bs->opaque;
    ssize_t offset;

    /* This driver will replace blkverify in this particular case */
    if (s->is_blkverify) {
        offset = qemu_iovec_compare(a, b);
        if (offset != -1) {
-            quorum_err(acb, "contents mismatch at offset %" PRIu64,
-                       acb->offset + offset);
+            quorum_err(acb, "contents mismatch in sector %" PRId64,
+                       acb->sector_num +
+                       (uint64_t)(offset / BDRV_SECTOR_SIZE));
        }
        return true;
    }
@@ -468,7 +539,7 @@ static bool quorum_compare(QuorumAIOCB *acb,
 /* Do a vote to get the error code */
 static int quorum_vote_error(QuorumAIOCB *acb)
 {
-    BDRVQuorumState *s = acb->bs->opaque;
+    BDRVQuorumState *s = acb->common.bs->opaque;
    QuorumVoteVersion *winner = NULL;
    QuorumVotes error_votes;
    QuorumVoteValue result_value;
@@ -497,16 +568,17 @@ static int quorum_vote_error(QuorumAIOCB *acb)
    return ret;
 }

-static void quorum_vote(QuorumAIOCB *acb)
+static bool quorum_vote(QuorumAIOCB *acb)
 {
    bool quorum = true;
+    bool rewrite = false;
    int i, j, ret;
    QuorumVoteValue hash;
-    BDRVQuorumState *s = acb->bs->opaque;
+    BDRVQuorumState *s = acb->common.bs->opaque;
    QuorumVoteVersion *winner;

    if (quorum_has_too_much_io_failed(acb)) {
-        return;
+        return false;
    }

    /* get the index of the first successful read */
@@ -534,7 +606,7 @@ static void quorum_vote(QuorumAIOCB *acb)
    /* Every successful read agrees */
    if (quorum) {
        quorum_copy_qiov(acb->qiov, &acb->qcrs[i].qiov);
-        return;
+        return false;
    }

    /* compute hashes for each successful read, also store indexes */
@@ -569,46 +641,19 @@ static void quorum_vote(QuorumAIOCB *acb)

    /* corruption correction is enabled */
    if (s->rewrite_corrupted) {
-        quorum_rewrite_bad_versions(acb, &winner->value);
+        rewrite = quorum_rewrite_bad_versions(s, acb, &winner->value);
    }

 free_exit:
    /* free lists */
    quorum_free_vote_list(&acb->votes);
+    return rewrite;
 }

-static void read_quorum_children_entry(void *opaque)
+static BlockAIOCB *read_quorum_children(QuorumAIOCB *acb)
 {
-    QuorumCo *co = opaque;
-    QuorumAIOCB *acb = co->acb;
-    BDRVQuorumState *s = acb->bs->opaque;
-    int i = co->idx;
-    QuorumChildRequest *sacb = &acb->qcrs[i];
-
-    sacb->bs = s->children[i]->bs;
-    sacb->ret = bdrv_co_preadv(s->children[i], acb->offset, acb->bytes,
-                               &acb->qcrs[i].qiov, 0);
-
-    if (sacb->ret == 0) {
-        acb->success_count++;
-    } else {
-        quorum_report_bad_acb(sacb, sacb->ret);
-    }
-
-    acb->count++;
-    assert(acb->count <= s->num_children);
-    assert(acb->success_count <= s->num_children);
-
-    /* Wake up the caller after the last read */
-    if (acb->count == s->num_children) {
-        qemu_coroutine_enter_if_inactive(acb->co);
-    }
-}
-
-static int read_quorum_children(QuorumAIOCB *acb)
-{
-    BDRVQuorumState *s = acb->bs->opaque;
-    int i, ret;
+    BDRVQuorumState *s = acb->common.bs->opaque;
+    int i;

    acb->children_read = s->num_children;
    for (i = 0; i < s->num_children; i++) {
@@ -618,131 +663,65 @@ static int read_quorum_children(QuorumAIOCB *acb)
    }

    for (i = 0; i < s->num_children; i++) {
-        Coroutine *co;
-        QuorumCo data = {
-            .acb = acb,
-            .idx = i,
-        };
-
-        co = qemu_coroutine_create(read_quorum_children_entry, &data);
-        qemu_coroutine_enter(co);
+        acb->qcrs[i].aiocb = bdrv_aio_readv(s->children[i], acb->sector_num,
+                                            &acb->qcrs[i].qiov, acb->nb_sectors,
+                                            quorum_aio_cb, &acb->qcrs[i]);
    }

-    while (acb->count < s->num_children) {
-        qemu_coroutine_yield();
-    }
-
-    /* Do the vote on read */
-    quorum_vote(acb);
-    for (i = 0; i < s->num_children; i++) {
-        qemu_vfree(acb->qcrs[i].buf);
-        qemu_iovec_destroy(&acb->qcrs[i].qiov);
-    }
-
-    while (acb->rewrite_count) {
-        qemu_coroutine_yield();
-    }
-
-    ret = acb->vote_ret;
-
-    return ret;
+    return &acb->common;
 }

-static int read_fifo_child(QuorumAIOCB *acb)
+static BlockAIOCB *read_fifo_child(QuorumAIOCB *acb)
 {
-    BDRVQuorumState *s = acb->bs->opaque;
-    int n, ret;
+    BDRVQuorumState *s = acb->common.bs->opaque;
+    int n = acb->children_read++;

-    /* We try to read the next child in FIFO order if we failed to read */
-    do {
-        n = acb->children_read++;
-        acb->qcrs[n].bs = s->children[n]->bs;
-        ret = bdrv_co_preadv(s->children[n], acb->offset, acb->bytes,
-                             acb->qiov, 0);
-        if (ret < 0) {
-            quorum_report_bad_acb(&acb->qcrs[n], ret);
-        }
-    } while (ret < 0 && acb->children_read < s->num_children);
+    acb->qcrs[n].aiocb = bdrv_aio_readv(s->children[n], acb->sector_num,
+                                        acb->qiov, acb->nb_sectors,
+                                        quorum_fifo_aio_cb, &acb->qcrs[n]);

-    /* FIXME: rewrite failed children if acb->children_read > 1? */
-
-    return ret;
+    return &acb->common;
 }

-static int quorum_co_preadv(BlockDriverState *bs, uint64_t offset,
-                            uint64_t bytes, QEMUIOVector *qiov, int flags)
+static BlockAIOCB *quorum_aio_readv(BlockDriverState *bs,
+                                    int64_t sector_num,
+                                    QEMUIOVector *qiov,
+                                    int nb_sectors,
+                                    BlockCompletionFunc *cb,
+                                    void *opaque)
 {
    BDRVQuorumState *s = bs->opaque;
-    QuorumAIOCB *acb = quorum_aio_get(bs, qiov, offset, bytes);
-    int ret;
-
+    QuorumAIOCB *acb = quorum_aio_get(s, bs, qiov, sector_num,
+                                      nb_sectors, cb, opaque);
    acb->is_read = true;
    acb->children_read = 0;

    if (s->read_pattern == QUORUM_READ_PATTERN_QUORUM) {
-        ret = read_quorum_children(acb);
-    } else {
-        ret = read_fifo_child(acb);
+        return read_quorum_children(acb);
    }
-    quorum_aio_finalize(acb);

-    return ret;
+    return read_fifo_child(acb);
 }

-static void write_quorum_entry(void *opaque)
-{
-    QuorumCo *co = opaque;
-    QuorumAIOCB *acb = co->acb;
-    BDRVQuorumState *s = acb->bs->opaque;
-    int i = co->idx;
-    QuorumChildRequest *sacb = &acb->qcrs[i];
-
-    sacb->bs = s->children[i]->bs;
-    sacb->ret = bdrv_co_pwritev(s->children[i], acb->offset, acb->bytes,
-                                acb->qiov, 0);
-    if (sacb->ret == 0) {
-        acb->success_count++;
-    } else {
-        quorum_report_bad_acb(sacb, sacb->ret);
-    }
-    acb->count++;
-    assert(acb->count <= s->num_children);
-    assert(acb->success_count <= s->num_children);
-
-    /* Wake up the caller after the last write */
-    if (acb->count == s->num_children) {
-        qemu_coroutine_enter_if_inactive(acb->co);
-    }
-}
-
-static int quorum_co_pwritev(BlockDriverState *bs, uint64_t offset,
-                             uint64_t bytes, QEMUIOVector *qiov, int flags)
+static BlockAIOCB *quorum_aio_writev(BlockDriverState *bs,
+                                     int64_t sector_num,
+                                     QEMUIOVector *qiov,
+                                     int nb_sectors,
+                                     BlockCompletionFunc *cb,
+                                     void *opaque)
 {
    BDRVQuorumState *s = bs->opaque;
-    QuorumAIOCB *acb = quorum_aio_get(bs, qiov, offset, bytes);
-    int i, ret;
+    QuorumAIOCB *acb = quorum_aio_get(s, bs, qiov, sector_num, nb_sectors,
+                                      cb, opaque);
+    int i;

    for (i = 0; i < s->num_children; i++) {
-        Coroutine *co;
-        QuorumCo data = {
-            .acb = acb,
-            .idx = i,
-        };
-
-        co = qemu_coroutine_create(write_quorum_entry, &data);
-        qemu_coroutine_enter(co);
+        acb->qcrs[i].aiocb = bdrv_aio_writev(s->children[i], sector_num,
+                                             qiov, nb_sectors, &quorum_aio_cb,
+                                             &acb->qcrs[i]);
    }

-    while (acb->count < s->num_children) {
-        qemu_coroutine_yield();
-    }
-
-    quorum_has_too_much_io_failed(acb);
-
-    ret = acb->vote_ret;
-    quorum_aio_finalize(acb);
-
-    return ret;
+    return &acb->common;
 }

 static int64_t quorum_getlength(BlockDriverState *bs)
@@ -785,7 +764,8 @@ static coroutine_fn int quorum_co_flush(BlockDriverState *bs)
    for (i = 0; i < s->num_children; i++) {
        result = bdrv_co_flush(s->children[i]->bs);
        if (result) {
-            quorum_report_bad(QUORUM_OP_TYPE_FLUSH, 0, 0,
+            quorum_report_bad(QUORUM_OP_TYPE_FLUSH, 0,
+                              bdrv_nb_sectors(s->children[i]->bs),
                              s->children[i]->bs->node_name, result);
            result_value.l = result;
            quorum_count_vote(&error_votes, &result_value, i);
@@ -867,13 +847,30 @@ static QemuOptsList quorum_runtime_opts = {
    },
 };

+static int parse_read_pattern(const char *opt)
+{
+    int i;
+
+    if (!opt) {
+        /* Set quorum as default */
+        return QUORUM_READ_PATTERN_QUORUM;
+    }
+
+    for (i = 0; i < QUORUM_READ_PATTERN__MAX; i++) {
+        if (!strcmp(opt, QuorumReadPattern_lookup[i])) {
+            return i;
+        }
+    }
+
+    return -EINVAL;
+}
+
 static int quorum_open(BlockDriverState *bs, QDict *options, int flags,
                       Error **errp)
 {
    BDRVQuorumState *s = bs->opaque;
    Error *local_err = NULL;
    QemuOpts *opts = NULL;
-    const char *pattern_str;
    bool *opened;
    int i;
    int ret = 0;
@@ -908,13 +905,7 @@ static int quorum_open(BlockDriverState *bs, QDict *options, int flags,
        goto exit;
    }

-    pattern_str = qemu_opt_get(opts, QUORUM_OPT_READ_PATTERN);
-    if (!pattern_str) {
-        ret = QUORUM_READ_PATTERN_QUORUM;
-    } else {
-        ret = qapi_enum_parse(&QuorumReadPattern_lookup, pattern_str,
-                              -EINVAL, NULL);
-    }
+    ret = parse_read_pattern(qemu_opt_get(opts, QUORUM_OPT_READ_PATTERN));
    if (ret < 0) {
        error_setg(&local_err, "Please set read-pattern as fifo or quorum");
        goto exit;
@@ -1019,17 +1010,10 @@ static void quorum_add_child(BlockDriverState *bs, BlockDriverState *child_bs,

    /* We can safely add the child now */
    bdrv_ref(child_bs);
-
-    child = bdrv_attach_child(bs, child_bs, indexstr, &child_format, errp);
-    if (child == NULL) {
-        s->next_child_index--;
-        bdrv_unref(child_bs);
-        goto out;
-    }
+    child = bdrv_attach_child(bs, child_bs, indexstr, &child_format);
    s->children = g_renew(BdrvChild *, s->children, s->num_children + 1);
    s->children[s->num_children++] = child;

-out:
    bdrv_drained_end(bs);
 }

@@ -1083,15 +1067,19 @@ static void quorum_refresh_filename(BlockDriverState *bs, QDict *options)
    children = qlist_new();
    for (i = 0; i < s->num_children; i++) {
        QINCREF(s->children[i]->bs->full_open_options);
-        qlist_append(children, s->children[i]->bs->full_open_options);
+        qlist_append_obj(children,
+                         QOBJECT(s->children[i]->bs->full_open_options));
    }

    opts = qdict_new();
-    qdict_put_str(opts, "driver", "quorum");
-    qdict_put_int(opts, QUORUM_OPT_VOTE_THRESHOLD, s->threshold);
-    qdict_put_bool(opts, QUORUM_OPT_BLKVERIFY, s->is_blkverify);
-    qdict_put_bool(opts, QUORUM_OPT_REWRITE, s->rewrite_corrupted);
-    qdict_put(opts, "children", children);
+    qdict_put_obj(opts, "driver", QOBJECT(qstring_from_str("quorum")));
+    qdict_put_obj(opts, QUORUM_OPT_VOTE_THRESHOLD,
+                  QOBJECT(qint_from_int(s->threshold)));
+    qdict_put_obj(opts, QUORUM_OPT_BLKVERIFY,
+                  QOBJECT(qbool_from_bool(s->is_blkverify)));
+    qdict_put_obj(opts, QUORUM_OPT_REWRITE,
+                  QOBJECT(qbool_from_bool(s->rewrite_corrupted)));
+    qdict_put_obj(opts, "children", QOBJECT(children));

    bs->full_open_options = opts;
 }
@@ -1110,14 +1098,12 @@ static BlockDriver bdrv_quorum = {

    .bdrv_getlength                     = quorum_getlength,

-    .bdrv_co_preadv                     = quorum_co_preadv,
-    .bdrv_co_pwritev                    = quorum_co_pwritev,
+    .bdrv_aio_readv                     = quorum_aio_readv,
+    .bdrv_aio_writev                    = quorum_aio_writev,

    .bdrv_add_child                     = quorum_add_child,
    .bdrv_del_child                     = quorum_del_child,

-    .bdrv_child_perm                    = bdrv_filter_default_perms,
-
    .is_filter                          = true,
    .bdrv_recurse_is_first_non_filter   = quorum_recurse_is_first_non_filter,
 };
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -25,12 +25,15 @@
 #include "qapi/error.h"
 #include "qemu/cutils.h"
 #include "qemu/error-report.h"
+#include "qemu/timer.h"
+#include "qemu/log.h"
 #include "block/block_int.h"
 #include "qemu/module.h"
 #include "trace.h"
 #include "block/thread-pool.h"
 #include "qemu/iov.h"
 #include "block/raw-aio.h"
+#include "qapi/util.h"
 #include "qapi/qmp/qstring.h"

 #if defined(__APPLE__) && (__MACH__)
@@ -128,23 +131,12 @@ do { \

 #define MAX_BLOCKSIZE	4096

-/* Posix file locking bytes. Libvirt takes byte 0, we start from higher bytes,
- * leaving a few more bytes for its future use. */
-#define RAW_LOCK_PERM_BASE             100
-#define RAW_LOCK_SHARED_BASE           200
-
 typedef struct BDRVRawState {
    int fd;
-    int lock_fd;
-    bool use_lock;
    int type;
    int open_flags;
    size_t buf_align;

-    /* The current permissions. */
-    uint64_t perm;
-    uint64_t shared_perm;
-
 #ifdef CONFIG_XFS
    bool is_xfs:1;
 #endif
@@ -152,7 +144,6 @@ typedef struct BDRVRawState {
    bool has_write_zeroes:1;
    bool discard_zeroes:1;
    bool use_linux_aio:1;
-    bool page_cache_inconsistent:1;
    bool has_fallocate;
    bool needs_alignment;
 } BDRVRawState;
@@ -228,28 +219,28 @@ static int probe_logical_blocksize(int fd, unsigned int *sector_size_p)
 {
    unsigned int sector_size;
    bool success = false;
-    int i;

    errno = ENOTSUP;
-    static const unsigned long ioctl_list[] = {
-#ifdef BLKSSZGET
-        BLKSSZGET,
-#endif
-#ifdef DKIOCGETBLOCKSIZE
-        DKIOCGETBLOCKSIZE,
-#endif
-#ifdef DIOCGSECTORSIZE
-        DIOCGSECTORSIZE,
-#endif
-    };

    /* Try a few ioctls to get the right size */
-    for (i = 0; i < (int)ARRAY_SIZE(ioctl_list); i++) {
-        if (ioctl(fd, ioctl_list[i], &sector_size) >= 0) {
-            *sector_size_p = sector_size;
-            success = true;
-        }
+#ifdef BLKSSZGET
+    if (ioctl(fd, BLKSSZGET, &sector_size) >= 0) {
+        *sector_size_p = sector_size;
+        success = true;
    }
+#endif
+#ifdef DKIOCGETBLOCKSIZE
+    if (ioctl(fd, DKIOCGETBLOCKSIZE, &sector_size) >= 0) {
+        *sector_size_p = sector_size;
+        success = true;
+    }
+#endif
+#ifdef DIOCGSECTORSIZE
+    if (ioctl(fd, DIOCGSECTORSIZE, &sector_size) >= 0) {
+        *sector_size_p = sector_size;
+        success = true;
+    }
+#endif

    return success ? 0 : -errno;
 }
@@ -380,7 +371,12 @@ static void raw_parse_flags(int bdrv_flags, int *open_flags)
 static void raw_parse_filename(const char *filename, QDict *options,
                               Error **errp)
 {
-    bdrv_parse_filename_strip_prefix(filename, "file:", options);
+    /* The filename does not have to be prefixed by the protocol name, since
+     * "file" is the default protocol; therefore, the return value of this
+     * function call can be ignored. */
+    strstart(filename, "file:", &filename);
+
+    qdict_put_obj(options, "filename", QOBJECT(qstring_from_str(filename)));
 }

 static QemuOptsList raw_runtime_opts = {
@@ -397,11 +393,6 @@ static QemuOptsList raw_runtime_opts = {
            .type = QEMU_OPT_STRING,
            .help = "host AIO implementation (threads, native)",
        },
-        {
-            .name = "locking",
-            .type = QEMU_OPT_STRING,
-            .help = "file locking mode (on/off/auto, default: auto)",
-        },
        { /* end of list */ }
    },
 };
@@ -416,7 +407,6 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
    BlockdevAioOptions aio, aio_default;
    int fd, ret;
    struct stat st;
-    OnOffAuto locking;

    opts = qemu_opts_create(&raw_runtime_opts, NULL, 0, &error_abort);
    qemu_opts_absorb_qdict(opts, options, &local_err);
@@ -437,9 +427,8 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
    aio_default = (bdrv_flags & BDRV_O_NATIVE_AIO)
                  ? BLOCKDEV_AIO_OPTIONS_NATIVE
                  : BLOCKDEV_AIO_OPTIONS_THREADS;
-    aio = qapi_enum_parse(&BlockdevAioOptions_lookup,
-                          qemu_opt_get(opts, "aio"),
-                          aio_default, &local_err);
+    aio = qapi_enum_parse(BlockdevAioOptions_lookup, qemu_opt_get(opts, "aio"),
+                          BLOCKDEV_AIO_OPTIONS__MAX, aio_default, &local_err);
    if (local_err) {
        error_propagate(errp, local_err);
        ret = -EINVAL;
@@ -447,35 +436,6 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
    }
    s->use_linux_aio = (aio == BLOCKDEV_AIO_OPTIONS_NATIVE);

-    locking = qapi_enum_parse(&OnOffAuto_lookup,
-                              qemu_opt_get(opts, "locking"),
-                              ON_OFF_AUTO_AUTO, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        ret = -EINVAL;
-        goto fail;
-    }
-    switch (locking) {
-    case ON_OFF_AUTO_ON:
-        s->use_lock = true;
-        if (!qemu_has_ofd_lock()) {
-            fprintf(stderr,
-                    "File lock requested but OFD locking syscall is "
-                    "unavailable, falling back to POSIX file locks.\n"
-                    "Due to the implementation, locks can be lost "
-                    "unexpectedly.\n");
-        }
-        break;
-    case ON_OFF_AUTO_OFF:
-        s->use_lock = false;
-        break;
-    case ON_OFF_AUTO_AUTO:
-        s->use_lock = qemu_has_ofd_lock();
-        break;
-    default:
-        abort();
-    }
-
    s->open_flags = open_flags;
    raw_parse_flags(bdrv_flags, &s->open_flags);

@@ -491,21 +451,6 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
    }
    s->fd = fd;

-    s->lock_fd = -1;
-    if (s->use_lock) {
-        fd = qemu_open(filename, s->open_flags);
-        if (fd < 0) {
-            ret = -errno;
-            error_setg_errno(errp, errno, "Could not open '%s' for locking",
-                             filename);
-            qemu_close(s->fd);
-            goto fail;
-        }
-        s->lock_fd = fd;
-    }
-    s->perm = 0;
-    s->shared_perm = BLK_PERM_ALL;
-
 #ifdef CONFIG_LINUX_AIO
     /* Currently Linux does AIO only for files opened with O_DIRECT */
    if (s->use_linux_aio && !(s->open_flags & O_DIRECT)) {
@@ -593,161 +538,6 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,
    return raw_open_common(bs, options, flags, 0, errp);
 }

-typedef enum {
-    RAW_PL_PREPARE,
-    RAW_PL_COMMIT,
-    RAW_PL_ABORT,
-} RawPermLockOp;
-
-#define PERM_FOREACH(i) \
-    for ((i) = 0; (1ULL << (i)) <= BLK_PERM_ALL; i++)
-
-/* Lock bytes indicated by @perm_lock_bits and @shared_perm_lock_bits in the
- * file; if @unlock == true, also unlock the unneeded bytes.
- * @shared_perm_lock_bits is the mask of all permissions that are NOT shared.
- */
-static int raw_apply_lock_bytes(BDRVRawState *s,
-                                uint64_t perm_lock_bits,
-                                uint64_t shared_perm_lock_bits,
-                                bool unlock, Error **errp)
-{
-    int ret;
-    int i;
-
-    PERM_FOREACH(i) {
-        int off = RAW_LOCK_PERM_BASE + i;
-        if (perm_lock_bits & (1ULL << i)) {
-            ret = qemu_lock_fd(s->lock_fd, off, 1, false);
-            if (ret) {
-                error_setg(errp, "Failed to lock byte %d", off);
-                return ret;
-            }
-        } else if (unlock) {
-            ret = qemu_unlock_fd(s->lock_fd, off, 1);
-            if (ret) {
-                error_setg(errp, "Failed to unlock byte %d", off);
-                return ret;
-            }
-        }
-    }
-    PERM_FOREACH(i) {
-        int off = RAW_LOCK_SHARED_BASE + i;
-        if (shared_perm_lock_bits & (1ULL << i)) {
-            ret = qemu_lock_fd(s->lock_fd, off, 1, false);
-            if (ret) {
-                error_setg(errp, "Failed to lock byte %d", off);
-                return ret;
-            }
-        } else if (unlock) {
-            ret = qemu_unlock_fd(s->lock_fd, off, 1);
-            if (ret) {
-                error_setg(errp, "Failed to unlock byte %d", off);
-                return ret;
-            }
-        }
-    }
-    return 0;
-}
-
-/* Check "unshared" bytes implied by @perm and ~@shared_perm in the file. */
-static int raw_check_lock_bytes(BDRVRawState *s,
-                                uint64_t perm, uint64_t shared_perm,
-                                Error **errp)
-{
-    int ret;
-    int i;
-
-    PERM_FOREACH(i) {
-        int off = RAW_LOCK_SHARED_BASE + i;
-        uint64_t p = 1ULL << i;
-        if (perm & p) {
-            ret = qemu_lock_fd_test(s->lock_fd, off, 1, true);
-            if (ret) {
-                char *perm_name = bdrv_perm_names(p);
-                error_setg(errp,
-                           "Failed to get \"%s\" lock",
-                           perm_name);
-                g_free(perm_name);
-                error_append_hint(errp,
-                                  "Is another process using the image?\n");
-                return ret;
-            }
-        }
-    }
-    PERM_FOREACH(i) {
-        int off = RAW_LOCK_PERM_BASE + i;
-        uint64_t p = 1ULL << i;
-        if (!(shared_perm & p)) {
-            ret = qemu_lock_fd_test(s->lock_fd, off, 1, true);
-            if (ret) {
-                char *perm_name = bdrv_perm_names(p);
-                error_setg(errp,
-                           "Failed to get shared \"%s\" lock",
-                           perm_name);
-                g_free(perm_name);
-                error_append_hint(errp,
-                                  "Is another process using the image?\n");
-                return ret;
-            }
-        }
-    }
-    return 0;
-}
-
-static int raw_handle_perm_lock(BlockDriverState *bs,
-                                RawPermLockOp op,
-                                uint64_t new_perm, uint64_t new_shared,
-                                Error **errp)
-{
-    BDRVRawState *s = bs->opaque;
-    int ret = 0;
-    Error *local_err = NULL;
-
-    if (!s->use_lock) {
-        return 0;
-    }
-
-    if (bdrv_get_flags(bs) & BDRV_O_INACTIVE) {
-        return 0;
-    }
-
-    assert(s->lock_fd > 0);
-
-    switch (op) {
-    case RAW_PL_PREPARE:
-        ret = raw_apply_lock_bytes(s, s->perm | new_perm,
-                                   ~s->shared_perm | ~new_shared,
-                                   false, errp);
-        if (!ret) {
-            ret = raw_check_lock_bytes(s, new_perm, new_shared, errp);
-            if (!ret) {
-                return 0;
-            }
-        }
-        op = RAW_PL_ABORT;
-        /* fall through to unlock bytes. */
-    case RAW_PL_ABORT:
-        raw_apply_lock_bytes(s, s->perm, ~s->shared_perm, true, &local_err);
-        if (local_err) {
-            /* Theoretically the above call only unlocks bytes and it cannot
-             * fail. Something weird happened, report it.
-             */
-            error_report_err(local_err);
-        }
-        break;
-    case RAW_PL_COMMIT:
-        raw_apply_lock_bytes(s, new_perm, ~new_shared, true, &local_err);
-        if (local_err) {
-            /* Theoretically the above call only unlocks bytes and it cannot
-             * fail. Something weird happened, report it.
-             */
-            error_report_err(local_err);
-        }
-        break;
-    }
-    return ret;
-}
-
 static int raw_reopen_prepare(BDRVReopenState *state,
                              BlockReopenQueue *queue, Error **errp)
 {
@@ -861,15 +651,12 @@ static void raw_reopen_abort(BDRVReopenState *state)
    state->opaque = NULL;
 }

-static int hdev_get_max_transfer_length(BlockDriverState *bs, int fd)
+static int hdev_get_max_transfer_length(int fd)
 {
 #ifdef BLKSECTGET
-    int max_bytes = 0;
-    short max_sectors = 0;
-    if (bs->sg && ioctl(fd, BLKSECTGET, &max_bytes) == 0) {
-        return max_bytes;
-    } else if (!bs->sg && ioctl(fd, BLKSECTGET, &max_sectors) == 0) {
-        return max_sectors << BDRV_SECTOR_BITS;
+    int max_sectors = 0;
+    if (ioctl(fd, BLKSECTGET, &max_sectors) == 0) {
+        return max_sectors;
    } else {
        return -errno;
    }
@@ -878,66 +665,16 @@ static int hdev_get_max_transfer_length(BlockDriverState *bs, int fd)
 #endif
 }

-static int hdev_get_max_segments(const struct stat *st)
-{
-#ifdef CONFIG_LINUX
-    char buf[32];
-    const char *end;
-    char *sysfspath;
-    int ret;
-    int fd = -1;
-    long max_segments;
-
-    sysfspath = g_strdup_printf("/sys/dev/block/%u:%u/queue/max_segments",
-                                major(st->st_rdev), minor(st->st_rdev));
-    fd = open(sysfspath, O_RDONLY);
-    if (fd == -1) {
-        ret = -errno;
-        goto out;
-    }
-    do {
-        ret = read(fd, buf, sizeof(buf) - 1);
-    } while (ret == -1 && errno == EINTR);
-    if (ret < 0) {
-        ret = -errno;
-        goto out;
-    } else if (ret == 0) {
-        ret = -EIO;
-        goto out;
-    }
-    buf[ret] = 0;
-    /* The file is ended with '\n', pass 'end' to accept that. */
-    ret = qemu_strtol(buf, &end, 10, &max_segments);
-    if (ret == 0 && end && *end == '\n') {
-        ret = max_segments;
-    }
-
-out:
-    if (fd != -1) {
-        close(fd);
-    }
-    g_free(sysfspath);
-    return ret;
-#else
-    return -ENOTSUP;
-#endif
-}
-
 static void raw_refresh_limits(BlockDriverState *bs, Error **errp)
 {
    BDRVRawState *s = bs->opaque;
    struct stat st;

    if (!fstat(s->fd, &st)) {
-        if (S_ISBLK(st.st_mode) || S_ISCHR(st.st_mode)) {
-            int ret = hdev_get_max_transfer_length(bs, s->fd);
-            if (ret > 0 && ret <= BDRV_REQUEST_MAX_BYTES) {
-                bs->bl.max_transfer = pow2floor(ret);
-            }
-            ret = hdev_get_max_segments(&st);
-            if (ret > 0) {
-                bs->bl.max_transfer = MIN(bs->bl.max_transfer,
-                                          ret * getpagesize());
+        if (S_ISBLK(st.st_mode)) {
+            int ret = hdev_get_max_transfer_length(s->fd);
+            if (ret > 0 && ret <= BDRV_REQUEST_MAX_SECTORS) {
+                bs->bl.max_transfer = pow2floor(ret << BDRV_SECTOR_BITS);
            }
        }
    }
@@ -1034,31 +771,10 @@ static ssize_t handle_aiocb_ioctl(RawPosixAIOData *aiocb)

 static ssize_t handle_aiocb_flush(RawPosixAIOData *aiocb)
 {
-    BDRVRawState *s = aiocb->bs->opaque;
    int ret;

-    if (s->page_cache_inconsistent) {
-        return -EIO;
-    }
-
    ret = qemu_fdatasync(aiocb->aio_fildes);
    if (ret == -1) {
-        /* There is no clear definition of the semantics of a failing fsync(),
-         * so we may have to assume the worst. The sad truth is that this
-         * assumption is correct for Linux. Some pages are now probably marked
-         * clean in the page cache even though they are inconsistent with the
-         * on-disk contents. The next fdatasync() call would succeed, but no
-         * further writeback attempt will be made. We can't get back to a state
-         * in which we know what is on disk (we would have to rewrite
-         * everything that was touched since the last fdatasync() at least), so
-         * make bdrv_flush() fail permanently. Given that the behaviour isn't
-         * really defined, I have little hope that other OSes are doing better.
-         *
-         * Obviously, this doesn't affect O_DIRECT, which bypasses the page
-         * cache. */
-        if ((s->open_flags & O_DIRECT) == 0) {
-            s->page_cache_inconsistent = true;
-        }
        return -errno;
    }
    return 0;
@@ -1337,9 +1053,6 @@ static ssize_t handle_aiocb_write_zeroes(RawPosixAIOData *aiocb)
 #if defined(CONFIG_FALLOCATE) || defined(CONFIG_XFS)
    BDRVRawState *s = aiocb->bs->opaque;
 #endif
-#ifdef CONFIG_FALLOCATE
-    int64_t len;
-#endif

    if (aiocb->aio_type & QEMU_AIO_BLKDEV) {
        return handle_aiocb_write_zeroes_block(aiocb);
@@ -1382,10 +1095,7 @@ static ssize_t handle_aiocb_write_zeroes(RawPosixAIOData *aiocb)
 #endif

 #ifdef CONFIG_FALLOCATE
-    /* Last resort: we are trying to extend the file with zeroed data. This
-     * can be done via fallocate(fd, 0) */
-    len = bdrv_getlength(aiocb->bs);
-    if (s->has_fallocate && len >= 0 && aiocb->aio_offset >= len) {
+    if (s->has_fallocate && aiocb->aio_offset >= bdrv_getlength(aiocb->bs)) {
        int ret = do_fallocate(s->fd, 0, aiocb->aio_offset, aiocb->aio_nbytes);
        if (ret == 0 || ret != -ENOTSUP) {
            return ret;
@@ -1489,7 +1199,7 @@ static int aio_worker(void *arg)

 static int paio_submit_co(BlockDriverState *bs, int fd,
                          int64_t offset, QEMUIOVector *qiov,
-                          int bytes, int type)
+                          int count, int type)
 {
    RawPosixAIOData *acb = g_new(RawPosixAIOData, 1);
    ThreadPool *pool;
@@ -1498,22 +1208,22 @@ static int paio_submit_co(BlockDriverState *bs, int fd,
    acb->aio_type = type;
    acb->aio_fildes = fd;

-    acb->aio_nbytes = bytes;
+    acb->aio_nbytes = count;
    acb->aio_offset = offset;

    if (qiov) {
        acb->aio_iov = qiov->iov;
        acb->aio_niov = qiov->niov;
-        assert(qiov->size == bytes);
+        assert(qiov->size == count);
    }

-    trace_paio_submit_co(offset, bytes, type);
+    trace_paio_submit_co(offset, count, type);
    pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
    return thread_pool_submit_co(pool, aio_worker, acb);
 }

 static BlockAIOCB *paio_submit(BlockDriverState *bs, int fd,
-        int64_t offset, QEMUIOVector *qiov, int bytes,
+        int64_t offset, QEMUIOVector *qiov, int count,
        BlockCompletionFunc *cb, void *opaque, int type)
 {
    RawPosixAIOData *acb = g_new(RawPosixAIOData, 1);
@@ -1523,7 +1233,7 @@ static BlockAIOCB *paio_submit(BlockDriverState *bs, int fd,
    acb->aio_type = type;
    acb->aio_fildes = fd;

-    acb->aio_nbytes = bytes;
+    acb->aio_nbytes = count;
    acb->aio_offset = offset;

    if (qiov) {
@@ -1532,7 +1242,7 @@ static BlockAIOCB *paio_submit(BlockDriverState *bs, int fd,
        assert(qiov->size == acb->aio_nbytes);
    }

-    trace_paio_submit(acb, opaque, offset, bytes, type);
+    trace_paio_submit(acb, opaque, offset, count, type);
    pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
    return thread_pool_submit_aio(pool, aio_worker, acb, cb, opaque);
 }
@@ -1622,156 +1332,26 @@ static void raw_close(BlockDriverState *bs)
        qemu_close(s->fd);
        s->fd = -1;
    }
-    if (s->lock_fd >= 0) {
-        qemu_close(s->lock_fd);
-        s->lock_fd = -1;
-    }
 }

-/**
- * Truncates the given regular file @fd to @offset and, when growing, fills the
- * new space according to @prealloc.
- *
- * Returns: 0 on success, -errno on failure.
- */
-static int raw_regular_truncate(int fd, int64_t offset, PreallocMode prealloc,
-                                Error **errp)
-{
-    int result = 0;
-    int64_t current_length = 0;
-    char *buf = NULL;
-    struct stat st;
-
-    if (fstat(fd, &st) < 0) {
-        result = -errno;
-        error_setg_errno(errp, -result, "Could not stat file");
-        return result;
-    }
-
-    current_length = st.st_size;
-    if (current_length > offset && prealloc != PREALLOC_MODE_OFF) {
-        error_setg(errp, "Cannot use preallocation for shrinking files");
-        return -ENOTSUP;
-    }
-
-    switch (prealloc) {
-#ifdef CONFIG_POSIX_FALLOCATE
-    case PREALLOC_MODE_FALLOC:
-        /*
-         * Truncating before posix_fallocate() makes it about twice slower on
-         * file systems that do not support fallocate(), trying to check if a
-         * block is allocated before allocating it, so don't do that here.
-         */
-        result = -posix_fallocate(fd, current_length, offset - current_length);
-        if (result != 0) {
-            /* posix_fallocate() doesn't set errno. */
-            error_setg_errno(errp, -result,
-                             "Could not preallocate new data");
-        }
-        goto out;
-#endif
-    case PREALLOC_MODE_FULL:
-    {
-        int64_t num = 0, left = offset - current_length;
-
-        /*
-         * Knowing the final size from the beginning could allow the file
-         * system driver to do less allocations and possibly avoid
-         * fragmentation of the file.
-         */
-        if (ftruncate(fd, offset) != 0) {
-            result = -errno;
-            error_setg_errno(errp, -result, "Could not resize file");
-            goto out;
-        }
-
-        buf = g_malloc0(65536);
-
-        result = lseek(fd, current_length, SEEK_SET);
-        if (result < 0) {
-            result = -errno;
-            error_setg_errno(errp, -result,
-                             "Failed to seek to the old end of file");
-            goto out;
-        }
-
-        while (left > 0) {
-            num = MIN(left, 65536);
-            result = write(fd, buf, num);
-            if (result < 0) {
-                result = -errno;
-                error_setg_errno(errp, -result,
-                                 "Could not write zeros for preallocation");
-                goto out;
-            }
-            left -= result;
-        }
-        if (result >= 0) {
-            result = fsync(fd);
-            if (result < 0) {
-                result = -errno;
-                error_setg_errno(errp, -result,
-                                 "Could not flush file to disk");
-                goto out;
-            }
-        }
-        goto out;
-    }
-    case PREALLOC_MODE_OFF:
-        if (ftruncate(fd, offset) != 0) {
-            result = -errno;
-            error_setg_errno(errp, -result, "Could not resize file");
-        }
-        return result;
-    default:
-        result = -ENOTSUP;
-        error_setg(errp, "Unsupported preallocation mode: %s",
-                   PreallocMode_str(prealloc));
-        return result;
-    }
-
-out:
-    if (result < 0) {
-        if (ftruncate(fd, current_length) < 0) {
-            error_report("Failed to restore old file length: %s",
-                         strerror(errno));
-        }
-    }
-
-    g_free(buf);
-    return result;
-}
-
-static int raw_truncate(BlockDriverState *bs, int64_t offset,
-                        PreallocMode prealloc, Error **errp)
+static int raw_truncate(BlockDriverState *bs, int64_t offset)
 {
    BDRVRawState *s = bs->opaque;
    struct stat st;
-    int ret;

    if (fstat(s->fd, &st)) {
-        ret = -errno;
-        error_setg_errno(errp, -ret, "Failed to fstat() the file");
-        return ret;
+        return -errno;
    }

    if (S_ISREG(st.st_mode)) {
-        return raw_regular_truncate(s->fd, offset, prealloc, errp);
-    }
-
-    if (prealloc != PREALLOC_MODE_OFF) {
-        error_setg(errp, "Preallocation mode '%s' unsupported for this "
-                   "non-regular file", PreallocMode_str(prealloc));
-        return -ENOTSUP;
-    }
-
-    if (S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode)) {
-        if (offset > raw_getlength(bs)) {
-            error_setg(errp, "Cannot grow device files");
-            return -EINVAL;
+        if (ftruncate(s->fd, offset) < 0) {
+            return -errno;
        }
+    } else if (S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode)) {
+       if (offset > raw_getlength(bs)) {
+           return -EINVAL;
+       }
    } else {
-        error_setg(errp, "Resizing this file is not supported");
        return -ENOTSUP;
    }

@@ -1975,8 +1555,9 @@ static int raw_create(const char *filename, QemuOpts *opts, Error **errp)
                          BDRV_SECTOR_SIZE);
    nocow = qemu_opt_get_bool(opts, BLOCK_OPT_NOCOW, false);
    buf = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC);
-    prealloc = qapi_enum_parse(&PreallocMode_lookup, buf,
-                               PREALLOC_MODE_OFF, &local_err);
+    prealloc = qapi_enum_parse(PreallocMode_lookup, buf,
+                               PREALLOC_MODE__MAX, PREALLOC_MODE_OFF,
+                               &local_err);
    g_free(buf);
    if (local_err) {
        error_propagate(errp, local_err);
@@ -2007,11 +1588,59 @@ static int raw_create(const char *filename, QemuOpts *opts, Error **errp)
 #endif
    }

-    result = raw_regular_truncate(fd, total_size, prealloc, errp);
-    if (result < 0) {
+    if (ftruncate(fd, total_size) != 0) {
+        result = -errno;
+        error_setg_errno(errp, -result, "Could not resize file");
        goto out_close;
    }

+    switch (prealloc) {
+#ifdef CONFIG_POSIX_FALLOCATE
+    case PREALLOC_MODE_FALLOC:
+        /* posix_fallocate() doesn't set errno. */
+        result = -posix_fallocate(fd, 0, total_size);
+        if (result != 0) {
+            error_setg_errno(errp, -result,
+                             "Could not preallocate data for the new file");
+        }
+        break;
+#endif
+    case PREALLOC_MODE_FULL:
+    {
+        int64_t num = 0, left = total_size;
+        buf = g_malloc0(65536);
+
+        while (left > 0) {
+            num = MIN(left, 65536);
+            result = write(fd, buf, num);
+            if (result < 0) {
+                result = -errno;
+                error_setg_errno(errp, -result,
+                                 "Could not write to the new file");
+                break;
+            }
+            left -= result;
+        }
+        if (result >= 0) {
+            result = fsync(fd);
+            if (result < 0) {
+                result = -errno;
+                error_setg_errno(errp, -result,
+                                 "Could not flush new file to disk");
+            }
+        }
+        g_free(buf);
+        break;
+    }
+    case PREALLOC_MODE_OFF:
+        break;
+    default:
+        result = -EINVAL;
+        error_setg(errp, "Unsupported preallocation mode: %s",
+                   PreallocMode_lookup[prealloc]);
+        break;
+    }
+
 out_close:
    if (qemu_close(fd) != 0 && result == 0) {
        result = -errno;
@@ -2169,26 +1798,26 @@ static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs,
 }

 static coroutine_fn BlockAIOCB *raw_aio_pdiscard(BlockDriverState *bs,
-    int64_t offset, int bytes,
+    int64_t offset, int count,
    BlockCompletionFunc *cb, void *opaque)
 {
    BDRVRawState *s = bs->opaque;

-    return paio_submit(bs, s->fd, offset, NULL, bytes,
+    return paio_submit(bs, s->fd, offset, NULL, count,
                       cb, opaque, QEMU_AIO_DISCARD);
 }

 static int coroutine_fn raw_co_pwrite_zeroes(
    BlockDriverState *bs, int64_t offset,
-    int bytes, BdrvRequestFlags flags)
+    int count, BdrvRequestFlags flags)
 {
    BDRVRawState *s = bs->opaque;

    if (!(flags & BDRV_REQ_MAY_UNMAP)) {
-        return paio_submit_co(bs, s->fd, offset, NULL, bytes,
+        return paio_submit_co(bs, s->fd, offset, NULL, count,
                              QEMU_AIO_WRITE_ZEROES);
    } else if (s->discard_zeroes) {
-        return paio_submit_co(bs, s->fd, offset, NULL, bytes,
+        return paio_submit_co(bs, s->fd, offset, NULL, count,
                              QEMU_AIO_DISCARD);
    }
    return -ENOTSUP;
@@ -2226,25 +1855,6 @@ static QemuOptsList raw_create_opts = {
    }
 };

-static int raw_check_perm(BlockDriverState *bs, uint64_t perm, uint64_t shared,
-                          Error **errp)
-{
-    return raw_handle_perm_lock(bs, RAW_PL_PREPARE, perm, shared, errp);
-}
-
-static void raw_set_perm(BlockDriverState *bs, uint64_t perm, uint64_t shared)
-{
-    BDRVRawState *s = bs->opaque;
-    raw_handle_perm_lock(bs, RAW_PL_COMMIT, perm, shared, NULL);
-    s->perm = perm;
-    s->shared_perm = shared;
-}
-
-static void raw_abort_perm_update(BlockDriverState *bs)
-{
-    raw_handle_perm_lock(bs, RAW_PL_ABORT, 0, 0, NULL);
-}
-
 BlockDriver bdrv_file = {
    .format_name = "file",
    .protocol_name = "file",
@@ -2275,9 +1885,7 @@ BlockDriver bdrv_file = {
    .bdrv_get_info = raw_get_info,
    .bdrv_get_allocated_file_size
                        = raw_get_allocated_file_size,
-    .bdrv_check_perm = raw_check_perm,
-    .bdrv_set_perm   = raw_set_perm,
-    .bdrv_abort_perm_update = raw_abort_perm_update,
+
    .create_opts = &raw_create_opts,
 };

@@ -2450,7 +2058,10 @@ static int check_hdev_writable(BDRVRawState *s)
 static void hdev_parse_filename(const char *filename, QDict *options,
                                Error **errp)
 {
-    bdrv_parse_filename_strip_prefix(filename, "host_device:", options);
+    /* The prefix is optional, just as for "file". */
+    strstart(filename, "host_device:", &filename);
+
+    qdict_put_obj(options, "filename", QOBJECT(qstring_from_str(filename)));
 }

 static bool hdev_is_sg(BlockDriverState *bs)
@@ -2493,12 +2104,6 @@ static int hdev_open(BlockDriverState *bs, QDict *options, int flags,
    int ret;

 #if defined(__APPLE__) && defined(__MACH__)
-    /*
-     * Caution: while qdict_get_str() is fine, getting non-string types
-     * would require more care.  When @options come from -blockdev or
-     * blockdev_add, its members are typed according to the QAPI
-     * schema, but when they come from -drive, they're all QString.
-     */
    const char *filename = qdict_get_str(options, "filename");
    char bsd_path[MAXPATHLEN] = "";
    bool error_occurred = false;
@@ -2539,7 +2144,7 @@ static int hdev_open(BlockDriverState *bs, QDict *options, int flags,
            goto hdev_open_Mac_error;
        }

-        qdict_put_str(options, "filename", bsd_path);
+        qdict_put(options, "filename", qstring_from_str(bsd_path));

 hdev_open_Mac_error:
        g_free(mediaType);
@@ -2620,7 +2225,7 @@ static int fd_open(BlockDriverState *bs)
 }

 static coroutine_fn BlockAIOCB *hdev_aio_pdiscard(BlockDriverState *bs,
-    int64_t offset, int bytes,
+    int64_t offset, int count,
    BlockCompletionFunc *cb, void *opaque)
 {
    BDRVRawState *s = bs->opaque;
@@ -2628,12 +2233,12 @@ static coroutine_fn BlockAIOCB *hdev_aio_pdiscard(BlockDriverState *bs,
    if (fd_open(bs) < 0) {
        return NULL;
    }
-    return paio_submit(bs, s->fd, offset, NULL, bytes,
+    return paio_submit(bs, s->fd, offset, NULL, count,
                       cb, opaque, QEMU_AIO_DISCARD|QEMU_AIO_BLKDEV);
 }

 static coroutine_fn int hdev_co_pwrite_zeroes(BlockDriverState *bs,
-    int64_t offset, int bytes, BdrvRequestFlags flags)
+    int64_t offset, int count, BdrvRequestFlags flags)
 {
    BDRVRawState *s = bs->opaque;
    int rc;
@@ -2643,10 +2248,10 @@ static coroutine_fn int hdev_co_pwrite_zeroes(BlockDriverState *bs,
        return rc;
    }
    if (!(flags & BDRV_REQ_MAY_UNMAP)) {
-        return paio_submit_co(bs, s->fd, offset, NULL, bytes,
+        return paio_submit_co(bs, s->fd, offset, NULL, count,
                              QEMU_AIO_WRITE_ZEROES|QEMU_AIO_BLKDEV);
    } else if (s->discard_zeroes) {
-        return paio_submit_co(bs, s->fd, offset, NULL, bytes,
+        return paio_submit_co(bs, s->fd, offset, NULL, count,
                              QEMU_AIO_DISCARD|QEMU_AIO_BLKDEV);
    }
    return -ENOTSUP;
@@ -2733,9 +2338,6 @@ static BlockDriver bdrv_host_device = {
    .bdrv_get_info = raw_get_info,
    .bdrv_get_allocated_file_size
                        = raw_get_allocated_file_size,
-    .bdrv_check_perm = raw_check_perm,
-    .bdrv_set_perm   = raw_set_perm,
-    .bdrv_abort_perm_update = raw_abort_perm_update,
    .bdrv_probe_blocksizes = hdev_probe_blocksizes,
    .bdrv_probe_geometry = hdev_probe_geometry,

@@ -2749,7 +2351,10 @@ static BlockDriver bdrv_host_device = {
 static void cdrom_parse_filename(const char *filename, QDict *options,
                                 Error **errp)
 {
-    bdrv_parse_filename_strip_prefix(filename, "host_cdrom:", options);
+    /* The prefix is optional, just as for "file". */
+    strstart(filename, "host_cdrom:", &filename);
+
+    qdict_put_obj(options, "filename", QOBJECT(qstring_from_str(filename)));
 }
 #endif

--- a/block/file-win32.c
+++ b/block/file-win32.c
@@ -24,6 +24,7 @@
 #include "qemu/osdep.h"
 #include "qapi/error.h"
 #include "qemu/cutils.h"
+#include "qemu/timer.h"
 #include "block/block_int.h"
 #include "qemu/module.h"
 #include "block/raw-aio.h"
@@ -31,6 +32,7 @@
 #include "block/thread-pool.h"
 #include "qemu/iov.h"
 #include "qapi/qmp/qstring.h"
+#include "qapi/util.h"
 #include <windows.h>
 #include <winioctl.h>

@@ -275,7 +277,12 @@ static void raw_parse_flags(int flags, bool use_aio, int *access_flags,
 static void raw_parse_filename(const char *filename, QDict *options,
                               Error **errp)
 {
-    bdrv_parse_filename_strip_prefix(filename, "file:", options);
+    /* The filename does not have to be prefixed by the protocol name, since
+     * "file" is the default protocol; therefore, the return value of this
+     * function call can be ignored. */
+    strstart(filename, "file:", &filename);
+
+    qdict_put_obj(options, "filename", QOBJECT(qstring_from_str(filename)));
 }

 static QemuOptsList raw_runtime_opts = {
@@ -302,8 +309,8 @@ static bool get_aio_option(QemuOpts *opts, int flags, Error **errp)

    aio_default = (flags & BDRV_O_NATIVE_AIO) ? BLOCKDEV_AIO_OPTIONS_NATIVE
                                              : BLOCKDEV_AIO_OPTIONS_THREADS;
-    aio = qapi_enum_parse(&BlockdevAioOptions_lookup, qemu_opt_get(opts, "aio"),
-                          aio_default, errp);
+    aio = qapi_enum_parse(BlockdevAioOptions_lookup, qemu_opt_get(opts, "aio"),
+                          BLOCKDEV_AIO_OPTIONS__MAX, aio_default, errp);

    switch (aio) {
    case BLOCKDEV_AIO_OPTIONS_NATIVE:
@@ -338,12 +345,6 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,
        goto fail;
    }

-    if (qdict_get_try_bool(options, "locking", false)) {
-        error_setg(errp, "locking=on is not supported on Windows");
-        ret = -EINVAL;
-        goto fail;
-    }
-
    filename = qemu_opt_get(opts, "filename");

    use_aio = get_aio_option(opts, flags, &local_err);
@@ -460,19 +461,12 @@ static void raw_close(BlockDriverState *bs)
    }
 }

-static int raw_truncate(BlockDriverState *bs, int64_t offset,
-                        PreallocMode prealloc, Error **errp)
+static int raw_truncate(BlockDriverState *bs, int64_t offset)
 {
    BDRVRawState *s = bs->opaque;
    LONG low, high;
    DWORD dwPtrLow;

-    if (prealloc != PREALLOC_MODE_OFF) {
-        error_setg(errp, "Unsupported preallocation mode '%s'",
-                   PreallocMode_str(prealloc));
-        return -ENOTSUP;
-    }
-
    low = offset;
    high = offset >> 32;

@@ -482,11 +476,11 @@ static int raw_truncate(BlockDriverState *bs, int64_t offset,
     */
    dwPtrLow = SetFilePointer(s->hfile, low, &high, FILE_BEGIN);
    if (dwPtrLow == INVALID_SET_FILE_POINTER && GetLastError() != NO_ERROR) {
-        error_setg_win32(errp, GetLastError(), "SetFilePointer error");
+        fprintf(stderr, "SetFilePointer error: %lu\n", GetLastError());
        return -EIO;
    }
    if (SetEndOfFile(s->hfile) == 0) {
-        error_setg_win32(errp, GetLastError(), "SetEndOfFile error");
+        fprintf(stderr, "SetEndOfFile error: %lu\n", GetLastError());
        return -EIO;
    }
    return 0;
@@ -672,7 +666,10 @@ static int hdev_probe_device(const char *filename)
 static void hdev_parse_filename(const char *filename, QDict *options,
                                Error **errp)
 {
-    bdrv_parse_filename_strip_prefix(filename, "host_device:", options);
+    /* The prefix is optional, just as for "file". */
+    strstart(filename, "host_device:", &filename);
+
+    qdict_put_obj(options, "filename", QOBJECT(qstring_from_str(filename)));
 }

 static int hdev_open(BlockDriverState *bs, QDict *options, int flags,
--- a/block/raw-format.c
+++ b/block/raw-format.c
@@ -1,4 +1,4 @@
-/* BlockDriver implementation for "raw" format driver
+/* BlockDriver implementation for "raw"
 *
 * Copyright (C) 2010-2016 Red Hat, Inc.
 * Copyright (C) 2010, Blue Swirl <blauwirbel@gmail.com>
@@ -259,12 +259,12 @@ static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs,
    *pnum = nb_sectors;
    *file = bs->file->bs;
    sector_num += s->offset / BDRV_SECTOR_SIZE;
-    return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID |
+    return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID | BDRV_BLOCK_DATA |
           (sector_num << BDRV_SECTOR_BITS);
 }

 static int coroutine_fn raw_co_pwrite_zeroes(BlockDriverState *bs,
-                                             int64_t offset, int bytes,
+                                             int64_t offset, int count,
                                             BdrvRequestFlags flags)
 {
    BDRVRawState *s = bs->opaque;
@@ -272,18 +272,18 @@ static int coroutine_fn raw_co_pwrite_zeroes(BlockDriverState *bs,
        return -EINVAL;
    }
    offset += s->offset;
-    return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags);
+    return bdrv_co_pwrite_zeroes(bs->file, offset, count, flags);
 }

 static int coroutine_fn raw_co_pdiscard(BlockDriverState *bs,
-                                        int64_t offset, int bytes)
+                                        int64_t offset, int count)
 {
    BDRVRawState *s = bs->opaque;
    if (offset > UINT64_MAX - s->offset) {
        return -EINVAL;
    }
    offset += s->offset;
-    return bdrv_co_pdiscard(bs->file->bs, offset, bytes);
+    return bdrv_co_pdiscard(bs->file->bs, offset, count);
 }

 static int64_t raw_getlength(BlockDriverState *bs)
@@ -312,31 +312,6 @@ static int64_t raw_getlength(BlockDriverState *bs)
    return s->size;
 }

-static BlockMeasureInfo *raw_measure(QemuOpts *opts, BlockDriverState *in_bs,
-                                     Error **errp)
-{
-    BlockMeasureInfo *info;
-    int64_t required;
-
-    if (in_bs) {
-        required = bdrv_getlength(in_bs);
-        if (required < 0) {
-            error_setg_errno(errp, -required, "Unable to get image size");
-            return NULL;
-        }
-    } else {
-        required = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
-                            BDRV_SECTOR_SIZE);
-    }
-
-    info = g_new(BlockMeasureInfo, 1);
-    info->required = required;
-
-    /* Unallocated sectors count towards the file size in raw images */
-    info->fully_allocated = info->required;
-    return info;
-}
-
 static int raw_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
 {
    return bdrv_get_info(bs->file->bs, bdi);
@@ -352,24 +327,26 @@ static void raw_refresh_limits(BlockDriverState *bs, Error **errp)
    }
 }

-static int raw_truncate(BlockDriverState *bs, int64_t offset,
-                        PreallocMode prealloc, Error **errp)
+static int raw_truncate(BlockDriverState *bs, int64_t offset)
 {
    BDRVRawState *s = bs->opaque;

    if (s->has_size) {
-        error_setg(errp, "Cannot resize fixed-size raw disks");
        return -ENOTSUP;
    }

    if (INT64_MAX - offset < s->offset) {
-        error_setg(errp, "Disk size too large for the chosen offset");
        return -EINVAL;
    }

    s->size = offset;
    offset += s->offset;
-    return bdrv_truncate(bs->file, offset, prealloc, errp);
+    return bdrv_truncate(bs->file->bs, offset);
+}
+
+static int raw_media_changed(BlockDriverState *bs)
+{
+    return bdrv_media_changed(bs->file->bs);
 }

 static void raw_eject(BlockDriverState *bs, bool eject_flag)
@@ -407,12 +384,6 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,
    BDRVRawState *s = bs->opaque;
    int ret;

-    bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
-                               false, errp);
-    if (!bs->file) {
-        return -EINVAL;
-    }
-
    bs->sg = bs->file->bs->sg;
    bs->supported_write_flags = BDRV_REQ_FUA &
        bs->file->bs->supported_write_flags;
@@ -490,7 +461,6 @@ BlockDriver bdrv_raw = {
    .bdrv_reopen_abort    = &raw_reopen_abort,
    .bdrv_open            = &raw_open,
    .bdrv_close           = &raw_close,
-    .bdrv_child_perm      = bdrv_filter_default_perms,
    .bdrv_create          = &raw_create,
    .bdrv_co_preadv       = &raw_co_preadv,
    .bdrv_co_pwritev      = &raw_co_pwritev,
@@ -500,11 +470,11 @@ BlockDriver bdrv_raw = {
    .bdrv_truncate        = &raw_truncate,
    .bdrv_getlength       = &raw_getlength,
    .has_variable_length  = true,
-    .bdrv_measure         = &raw_measure,
    .bdrv_get_info        = &raw_get_info,
    .bdrv_refresh_limits  = &raw_refresh_limits,
    .bdrv_probe_blocksizes = &raw_probe_blocksizes,
    .bdrv_probe_geometry  = &raw_probe_geometry,
+    .bdrv_media_changed   = &raw_media_changed,
    .bdrv_eject           = &raw_eject,
    .bdrv_lock_medium     = &raw_lock_medium,
    .bdrv_co_ioctl        = &raw_co_ioctl,
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -13,14 +13,13 @@

 #include "qemu/osdep.h"

-#include <rbd/librbd.h>
 #include "qapi/error.h"
 #include "qemu/error-report.h"
 #include "block/block_int.h"
 #include "crypto/secret.h"
 #include "qemu/cutils.h"
-#include "qapi/qmp/qstring.h"
-#include "qapi/qmp/qjson.h"
+
+#include <rbd/librbd.h>

 /*
 * When specifying the image filename use:
@@ -56,15 +55,13 @@

 #define OBJ_MAX_SIZE (1UL << OBJ_DEFAULT_OBJ_ORDER)

+#define RBD_MAX_CONF_NAME_SIZE 128
+#define RBD_MAX_CONF_VAL_SIZE 512
+#define RBD_MAX_CONF_SIZE 1024
+#define RBD_MAX_POOL_NAME_SIZE 128
+#define RBD_MAX_SNAP_NAME_SIZE 128
 #define RBD_MAX_SNAPS 100

-/* The LIBRBD_SUPPORTS_IOVEC is defined in librbd.h */
-#ifdef LIBRBD_SUPPORTS_IOVEC
-#define LIBRBD_USE_IOVEC 1
-#else
-#define LIBRBD_USE_IOVEC 0
-#endif
-
 typedef enum {
    RBD_AIO_READ,
    RBD_AIO_WRITE,
@@ -94,29 +91,46 @@ typedef struct BDRVRBDState {
    rados_t cluster;
    rados_ioctx_t io_ctx;
    rbd_image_t image;
-    char *image_name;
+    char name[RBD_MAX_IMAGE_NAME_SIZE];
    char *snap;
 } BDRVRBDState;

-static char *qemu_rbd_next_tok(char *src, char delim, char **p)
+static int qemu_rbd_next_tok(char *dst, int dst_len,
+                             char *src, char delim,
+                             const char *name,
+                             char **p, Error **errp)
 {
+    int l;
    char *end;

    *p = NULL;

-    for (end = src; *end; ++end) {
+    if (delim != '\0') {
+        for (end = src; *end; ++end) {
+            if (*end == delim) {
+                break;
+            }
+            if (*end == '\\' && end[1] != '\0') {
+                end++;
+            }
+        }
        if (*end == delim) {
-            break;
-        }
-        if (*end == '\\' && end[1] != '\0') {
-            end++;
+            *p = end + 1;
+            *end = '\0';
        }
    }
-    if (*end == delim) {
-        *p = end + 1;
-        *end = '\0';
+    l = strlen(src);
+    if (l >= dst_len) {
+        error_setg(errp, "%s too long", name);
+        return -EINVAL;
+    } else if (l == 0) {
+        error_setg(errp, "%s too short", name);
+        return -EINVAL;
    }
-    return src;
+
+    pstrcpy(dst, dst_len, src);
+
+    return 0;
 }

 static void qemu_rbd_unescape(char *src)
@@ -132,92 +146,87 @@ static void qemu_rbd_unescape(char *src)
    *p = '\0';
 }

-static void qemu_rbd_parse_filename(const char *filename, QDict *options,
-                                    Error **errp)
+static int qemu_rbd_parsename(const char *filename,
+                              char *pool, int pool_len,
+                              char *snap, int snap_len,
+                              char *name, int name_len,
+                              char *conf, int conf_len,
+                              Error **errp)
 {
    const char *start;
    char *p, *buf;
-    QList *keypairs = NULL;
-    char *found_str;
+    int ret;

    if (!strstart(filename, "rbd:", &start)) {
        error_setg(errp, "File name must start with 'rbd:'");
-        return;
+        return -EINVAL;
    }

    buf = g_strdup(start);
    p = buf;
+    *snap = '\0';
+    *conf = '\0';

-    found_str = qemu_rbd_next_tok(p, '/', &p);
-    if (!p) {
-        error_setg(errp, "Pool name is required");
+    ret = qemu_rbd_next_tok(pool, pool_len, p,
+                            '/', "pool name", &p, errp);
+    if (ret < 0 || !p) {
+        ret = -EINVAL;
        goto done;
    }
-    qemu_rbd_unescape(found_str);
-    qdict_put_str(options, "pool", found_str);
+    qemu_rbd_unescape(pool);

    if (strchr(p, '@')) {
-        found_str = qemu_rbd_next_tok(p, '@', &p);
-        qemu_rbd_unescape(found_str);
-        qdict_put_str(options, "image", found_str);
-
-        found_str = qemu_rbd_next_tok(p, ':', &p);
-        qemu_rbd_unescape(found_str);
-        qdict_put_str(options, "snapshot", found_str);
+        ret = qemu_rbd_next_tok(name, name_len, p,
+                                '@', "object name", &p, errp);
+        if (ret < 0) {
+            goto done;
+        }
+        ret = qemu_rbd_next_tok(snap, snap_len, p,
+                                ':', "snap name", &p, errp);
+        qemu_rbd_unescape(snap);
    } else {
-        found_str = qemu_rbd_next_tok(p, ':', &p);
-        qemu_rbd_unescape(found_str);
-        qdict_put_str(options, "image", found_str);
+        ret = qemu_rbd_next_tok(name, name_len, p,
+                                ':', "object name", &p, errp);
    }
-    if (!p) {
+    qemu_rbd_unescape(name);
+    if (ret < 0 || !p) {
        goto done;
    }

-    /* The following are essentially all key/value pairs, and we treat
-     * 'id' and 'conf' a bit special.  Key/value pairs may be in any order. */
-    while (p) {
-        char *name, *value;
-        name = qemu_rbd_next_tok(p, '=', &p);
-        if (!p) {
-            error_setg(errp, "conf option %s has no value", name);
-            break;
-        }
-
-        qemu_rbd_unescape(name);
-
-        value = qemu_rbd_next_tok(p, ':', &p);
-        qemu_rbd_unescape(value);
-
-        if (!strcmp(name, "conf")) {
-            qdict_put_str(options, "conf", value);
-        } else if (!strcmp(name, "id")) {
-            qdict_put_str(options, "user", value);
-        } else {
-            /*
-             * We pass these internally to qemu_rbd_set_keypairs(), so
-             * we can get away with the simpler list of [ "key1",
-             * "value1", "key2", "value2" ] rather than a raw dict
-             * { "key1": "value1", "key2": "value2" } where we can't
-             * guarantee order, or even a more correct but complex
-             * [ { "key1": "value1" }, { "key2": "value2" } ]
-             */
-            if (!keypairs) {
-                keypairs = qlist_new();
-            }
-            qlist_append_str(keypairs, name);
-            qlist_append_str(keypairs, value);
-        }
-    }
-
-    if (keypairs) {
-        qdict_put(options, "=keyvalue-pairs",
-                  qobject_to_json(QOBJECT(keypairs)));
-    }
+    ret = qemu_rbd_next_tok(conf, conf_len, p,
+                            '\0', "configuration", &p, errp);

 done:
    g_free(buf);
-    QDECREF(keypairs);
-    return;
+    return ret;
+}
+
+static char *qemu_rbd_parse_clientname(const char *conf, char *clientname)
+{
+    const char *p = conf;
+
+    while (*p) {
+        int len;
+        const char *end = strchr(p, ':');
+
+        if (end) {
+            len = end - p;
+        } else {
+            len = strlen(p);
+        }
+
+        if (strncmp(p, "id=", 3) == 0) {
+            len -= 3;
+            strncpy(clientname, p + 3, len);
+            clientname[len] = '\0';
+            return clientname;
+        }
+        if (end == NULL) {
+            break;
+        }
+        p = end + 1;
+    }
+    return NULL;
 }


@@ -240,129 +249,94 @@ static int qemu_rbd_set_auth(rados_t cluster, const char *secretid,
    return 0;
 }

-static int qemu_rbd_set_keypairs(rados_t cluster, const char *keypairs_json,
-                                 Error **errp)
+
+static int qemu_rbd_set_conf(rados_t cluster, const char *conf,
+                             bool only_read_conf_file,
+                             Error **errp)
 {
-    QList *keypairs;
-    QString *name;
-    QString *value;
-    const char *key;
-    size_t remaining;
+    char *p, *buf;
+    char name[RBD_MAX_CONF_NAME_SIZE];
+    char value[RBD_MAX_CONF_VAL_SIZE];
    int ret = 0;

-    if (!keypairs_json) {
-        return ret;
-    }
-    keypairs = qobject_to_qlist(qobject_from_json(keypairs_json,
-                                                  &error_abort));
-    remaining = qlist_size(keypairs) / 2;
-    assert(remaining);
+    buf = g_strdup(conf);
+    p = buf;

-    while (remaining--) {
-        name = qobject_to_qstring(qlist_pop(keypairs));
-        value = qobject_to_qstring(qlist_pop(keypairs));
-        assert(name && value);
-        key = qstring_get_str(name);
-
-        ret = rados_conf_set(cluster, key, qstring_get_str(value));
-        QDECREF(name);
-        QDECREF(value);
+    while (p) {
+        ret = qemu_rbd_next_tok(name, sizeof(name), p,
+                                '=', "conf option name", &p, errp);
        if (ret < 0) {
-            error_setg_errno(errp, -ret, "invalid conf option %s", key);
+            break;
+        }
+        qemu_rbd_unescape(name);
+
+        if (!p) {
+            error_setg(errp, "conf option %s has no value", name);
            ret = -EINVAL;
            break;
        }
+
+        ret = qemu_rbd_next_tok(value, sizeof(value), p,
+                                ':', "conf option value", &p, errp);
+        if (ret < 0) {
+            break;
+        }
+        qemu_rbd_unescape(value);
+
+        if (strcmp(name, "conf") == 0) {
+            /* read the conf file alone, so it doesn't override more
+               specific settings for a particular device */
+            if (only_read_conf_file) {
+                ret = rados_conf_read_file(cluster, value);
+                if (ret < 0) {
+                    error_setg_errno(errp, -ret, "error reading conf file %s",
+                                     value);
+                    break;
+                }
+            }
+        } else if (strcmp(name, "id") == 0) {
+            /* ignore, this is parsed by qemu_rbd_parse_clientname() */
+        } else if (!only_read_conf_file) {
+            ret = rados_conf_set(cluster, name, value);
+            if (ret < 0) {
+                error_setg_errno(errp, -ret, "invalid conf option %s", name);
+                ret = -EINVAL;
+                break;
+            }
+        }
    }

-    QDECREF(keypairs);
+    g_free(buf);
    return ret;
 }

-static void qemu_rbd_memset(RADOSCB *rcb, int64_t offs)
-{
-    if (LIBRBD_USE_IOVEC) {
-        RBDAIOCB *acb = rcb->acb;
-        iov_memset(acb->qiov->iov, acb->qiov->niov, offs, 0,
-                   acb->qiov->size - offs);
-    } else {
-        memset(rcb->buf + offs, 0, rcb->size - offs);
-    }
-}
-
-static QemuOptsList runtime_opts = {
-    .name = "rbd",
-    .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
-    .desc = {
-        {
-            .name = "pool",
-            .type = QEMU_OPT_STRING,
-            .help = "Rados pool name",
-        },
-        {
-            .name = "image",
-            .type = QEMU_OPT_STRING,
-            .help = "Image name in the pool",
-        },
-        {
-            .name = "conf",
-            .type = QEMU_OPT_STRING,
-            .help = "Rados config file location",
-        },
-        {
-            .name = "snapshot",
-            .type = QEMU_OPT_STRING,
-            .help = "Ceph snapshot name",
-        },
-        {
-            /* maps to 'id' in rados_create() */
-            .name = "user",
-            .type = QEMU_OPT_STRING,
-            .help = "Rados id name",
-        },
-        /*
-         * server.* extracted manually, see qemu_rbd_mon_host()
-         */
-        {
-            .name = "password-secret",
-            .type = QEMU_OPT_STRING,
-            .help = "ID of secret providing the password",
-        },
-
-        /*
-         * Keys for qemu_rbd_parse_filename(), not in the QAPI schema
-         */
-        {
-            /*
-             * HACK: name starts with '=' so that qemu_opts_parse()
-             * can't set it
-             */
-            .name = "=keyvalue-pairs",
-            .type = QEMU_OPT_STRING,
-            .help = "Legacy rados key/value option parameters",
-        },
-        {
-            .name = "filename",
-            .type = QEMU_OPT_STRING,
-        },
-        { /* end of list */ }
-    },
-};
-
 static int qemu_rbd_create(const char *filename, QemuOpts *opts, Error **errp)
 {
    Error *local_err = NULL;
    int64_t bytes = 0;
    int64_t objsize;
    int obj_order = 0;
-    const char *pool, *image_name, *conf, *user, *keypairs;
+    char pool[RBD_MAX_POOL_NAME_SIZE];
+    char name[RBD_MAX_IMAGE_NAME_SIZE];
+    char snap_buf[RBD_MAX_SNAP_NAME_SIZE];
+    char conf[RBD_MAX_CONF_SIZE];
+    char clientname_buf[RBD_MAX_CONF_SIZE];
+    char *clientname;
    const char *secretid;
    rados_t cluster;
    rados_ioctx_t io_ctx;
-    QDict *options = NULL;
-    int ret = 0;
+    int ret;

    secretid = qemu_opt_get(opts, "password-secret");

+    if (qemu_rbd_parsename(filename, pool, sizeof(pool),
+                           snap_buf, sizeof(snap_buf),
+                           name, sizeof(name),
+                           conf, sizeof(conf), &local_err) < 0) {
+        error_propagate(errp, local_err);
+        return -EINVAL;
+    }
+
    /* Read out options */
    bytes = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
                     BDRV_SECTOR_SIZE);
@@ -370,53 +344,35 @@ static int qemu_rbd_create(const char *filename, QemuOpts *opts, Error **errp)
    if (objsize) {
        if ((objsize - 1) & objsize) {    /* not a power of 2? */
            error_setg(errp, "obj size needs to be power of 2");
-            ret = -EINVAL;
-            goto exit;
+            return -EINVAL;
        }
        if (objsize < 4096) {
            error_setg(errp, "obj size too small");
-            ret = -EINVAL;
-            goto exit;
+            return -EINVAL;
        }
        obj_order = ctz32(objsize);
    }

-    options = qdict_new();
-    qemu_rbd_parse_filename(filename, options, &local_err);
-    if (local_err) {
-        ret = -EINVAL;
-        error_propagate(errp, local_err);
-        goto exit;
-    }
-
-    /*
-     * Caution: while qdict_get_try_str() is fine, getting non-string
-     * types would require more care.  When @options come from -blockdev
-     * or blockdev_add, its members are typed according to the QAPI
-     * schema, but when they come from -drive, they're all QString.
-     */
-    pool       = qdict_get_try_str(options, "pool");
-    conf       = qdict_get_try_str(options, "conf");
-    user       = qdict_get_try_str(options, "user");
-    image_name = qdict_get_try_str(options, "image");
-    keypairs   = qdict_get_try_str(options, "=keyvalue-pairs");
-
-    ret = rados_create(&cluster, user);
+    clientname = qemu_rbd_parse_clientname(conf, clientname_buf);
+    ret = rados_create(&cluster, clientname);
    if (ret < 0) {
        error_setg_errno(errp, -ret, "error initializing");
-        goto exit;
+        return ret;
    }

-    /* try default location when conf=NULL, but ignore failure */
-    ret = rados_conf_read_file(cluster, conf);
-    if (conf && ret < 0) {
-        error_setg_errno(errp, -ret, "error reading conf file %s", conf);
+    if (strstr(conf, "conf=") == NULL) {
+        /* try default location, but ignore failure */
+        rados_conf_read_file(cluster, NULL);
+    } else if (conf[0] != '\0' &&
+               qemu_rbd_set_conf(cluster, conf, true, &local_err) < 0) {
+        error_propagate(errp, local_err);
        ret = -EIO;
        goto shutdown;
    }

-    ret = qemu_rbd_set_keypairs(cluster, keypairs, errp);
-    if (ret < 0) {
+    if (conf[0] != '\0' &&
+        qemu_rbd_set_conf(cluster, conf, false, &local_err) < 0) {
+        error_propagate(errp, local_err);
        ret = -EIO;
        goto shutdown;
    }
@@ -438,7 +394,7 @@ static int qemu_rbd_create(const char *filename, QemuOpts *opts, Error **errp)
        goto shutdown;
    }

-    ret = rbd_create(io_ctx, image_name, bytes, &obj_order);
+    ret = rbd_create(io_ctx, name, bytes, &obj_order);
    if (ret < 0) {
        error_setg_errno(errp, -ret, "error rbd create");
    }
@@ -447,9 +403,6 @@ static int qemu_rbd_create(const char *filename, QemuOpts *opts, Error **errp)

 shutdown:
    rados_shutdown(cluster);
-
-exit:
-    QDECREF(options);
    return ret;
 }

@@ -473,11 +426,11 @@ static void qemu_rbd_complete_aio(RADOSCB *rcb)
        }
    } else {
        if (r < 0) {
-            qemu_rbd_memset(rcb, 0);
+            memset(rcb->buf, 0, rcb->size);
            acb->ret = r;
            acb->error = 1;
        } else if (r < rcb->size) {
-            qemu_rbd_memset(rcb, r);
+            memset(rcb->buf + r, 0, rcb->size - r);
            if (!acb->error) {
                acb->ret = rcb->size;
            }
@@ -488,137 +441,92 @@ static void qemu_rbd_complete_aio(RADOSCB *rcb)

    g_free(rcb);

-    if (!LIBRBD_USE_IOVEC) {
-        if (acb->cmd == RBD_AIO_READ) {
-            qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
-        }
-        qemu_vfree(acb->bounce);
+    if (acb->cmd == RBD_AIO_READ) {
+        qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
    }
-
+    qemu_vfree(acb->bounce);
    acb->common.cb(acb->common.opaque, (acb->ret > 0 ? 0 : acb->ret));

    qemu_aio_unref(acb);
 }

-static char *qemu_rbd_mon_host(QDict *options, Error **errp)
-{
-    const char **vals = g_new(const char *, qdict_size(options) + 1);
-    char keybuf[32];
-    const char *host, *port;
-    char *rados_str;
-    int i;
-
-    for (i = 0;; i++) {
-        sprintf(keybuf, "server.%d.host", i);
-        host = qdict_get_try_str(options, keybuf);
-        qdict_del(options, keybuf);
-        sprintf(keybuf, "server.%d.port", i);
-        port = qdict_get_try_str(options, keybuf);
-        qdict_del(options, keybuf);
-        if (!host && !port) {
-            break;
-        }
-        if (!host) {
-            error_setg(errp, "Parameter server.%d.host is missing", i);
-            rados_str = NULL;
-            goto out;
-        }
-
-        if (strchr(host, ':')) {
-            vals[i] = port ? g_strdup_printf("[%s]:%s", host, port)
-                : g_strdup_printf("[%s]", host);
-        } else {
-            vals[i] = port ? g_strdup_printf("%s:%s", host, port)
-                : g_strdup(host);
-        }
-    }
-    vals[i] = NULL;
-
-    rados_str = i ? g_strjoinv(";", (char **)vals) : NULL;
-out:
-    g_strfreev((char **)vals);
-    return rados_str;
-}
+/* TODO Convert to fine grained options */
+static QemuOptsList runtime_opts = {
+    .name = "rbd",
+    .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
+    .desc = {
+        {
+            .name = "filename",
+            .type = QEMU_OPT_STRING,
+            .help = "Specification of the rbd image",
+        },
+        {
+            .name = "password-secret",
+            .type = QEMU_OPT_STRING,
+            .help = "ID of secret providing the password",
+        },
+        { /* end of list */ }
+    },
+};

 static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
                         Error **errp)
 {
    BDRVRBDState *s = bs->opaque;
-    const char *pool, *snap, *conf, *user, *image_name, *keypairs;
-    const char *secretid, *filename;
+    char pool[RBD_MAX_POOL_NAME_SIZE];
+    char snap_buf[RBD_MAX_SNAP_NAME_SIZE];
+    char conf[RBD_MAX_CONF_SIZE];
+    char clientname_buf[RBD_MAX_CONF_SIZE];
+    char *clientname;
+    const char *secretid;
    QemuOpts *opts;
    Error *local_err = NULL;
-    char *mon_host = NULL;
+    const char *filename;
    int r;

-    /* If we are given a filename, parse the filename, with precedence given to
-     * filename encoded options */
-    filename = qdict_get_try_str(options, "filename");
-    if (filename) {
-        warn_report("'filename' option specified. "
-                    "This is an unsupported option, and may be deprecated "
-                    "in the future");
-        qemu_rbd_parse_filename(filename, options, &local_err);
-        if (local_err) {
-            r = -EINVAL;
-            error_propagate(errp, local_err);
-            goto exit;
-        }
-    }
-
    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
    qemu_opts_absorb_qdict(opts, options, &local_err);
    if (local_err) {
        error_propagate(errp, local_err);
-        r = -EINVAL;
-        goto failed_opts;
-    }
-
-    mon_host = qemu_rbd_mon_host(options, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        r = -EINVAL;
-        goto failed_opts;
+        qemu_opts_del(opts);
+        return -EINVAL;
    }

+    filename = qemu_opt_get(opts, "filename");
    secretid = qemu_opt_get(opts, "password-secret");

-    pool           = qemu_opt_get(opts, "pool");
-    conf           = qemu_opt_get(opts, "conf");
-    snap           = qemu_opt_get(opts, "snapshot");
-    user           = qemu_opt_get(opts, "user");
-    image_name     = qemu_opt_get(opts, "image");
-    keypairs       = qemu_opt_get(opts, "=keyvalue-pairs");
-
-    if (!pool || !image_name) {
-        error_setg(errp, "Parameters 'pool' and 'image' are required");
+    if (qemu_rbd_parsename(filename, pool, sizeof(pool),
+                           snap_buf, sizeof(snap_buf),
+                           s->name, sizeof(s->name),
+                           conf, sizeof(conf), errp) < 0) {
        r = -EINVAL;
        goto failed_opts;
    }

-    r = rados_create(&s->cluster, user);
+    clientname = qemu_rbd_parse_clientname(conf, clientname_buf);
+    r = rados_create(&s->cluster, clientname);
    if (r < 0) {
        error_setg_errno(errp, -r, "error initializing");
        goto failed_opts;
    }

-    s->snap = g_strdup(snap);
-    s->image_name = g_strdup(image_name);
-
-    /* try default location when conf=NULL, but ignore failure */
-    r = rados_conf_read_file(s->cluster, conf);
-    if (conf && r < 0) {
-        error_setg_errno(errp, -r, "error reading conf file %s", conf);
-        goto failed_shutdown;
+    s->snap = NULL;
+    if (snap_buf[0] != '\0') {
+        s->snap = g_strdup(snap_buf);
    }

-    r = qemu_rbd_set_keypairs(s->cluster, keypairs, errp);
-    if (r < 0) {
-        goto failed_shutdown;
+    if (strstr(conf, "conf=") == NULL) {
+        /* try default location, but ignore failure */
+        rados_conf_read_file(s->cluster, NULL);
+    } else if (conf[0] != '\0') {
+        r = qemu_rbd_set_conf(s->cluster, conf, true, errp);
+        if (r < 0) {
+            goto failed_shutdown;
+        }
    }

-    if (mon_host) {
-        r = rados_conf_set(s->cluster, "mon_host", mon_host);
+    if (conf[0] != '\0') {
+        r = qemu_rbd_set_conf(s->cluster, conf, false, errp);
        if (r < 0) {
            goto failed_shutdown;
        }
@@ -654,23 +562,13 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
        goto failed_shutdown;
    }

-    /* rbd_open is always r/w */
-    r = rbd_open(s->io_ctx, s->image_name, &s->image, s->snap);
+    r = rbd_open(s->io_ctx, s->name, &s->image, s->snap);
    if (r < 0) {
-        error_setg_errno(errp, -r, "error reading header from %s",
-                         s->image_name);
+        error_setg_errno(errp, -r, "error reading header from %s", s->name);
        goto failed_open;
    }

-    /* If we are using an rbd snapshot, we must be r/o, otherwise
-     * leave as-is */
-    if (s->snap != NULL) {
-        r = bdrv_set_read_only(bs, true, &local_err);
-        if (r < 0) {
-            error_propagate(errp, local_err);
-            goto failed_open;
-        }
-    }
+    bs->read_only = (s->snap != NULL);

    qemu_opts_del(opts);
    return 0;
@@ -680,34 +578,11 @@ failed_open:
 failed_shutdown:
    rados_shutdown(s->cluster);
    g_free(s->snap);
-    g_free(s->image_name);
 failed_opts:
    qemu_opts_del(opts);
-    g_free(mon_host);
-exit:
    return r;
 }

-
-/* Since RBD is currently always opened R/W via the API,
- * we just need to check if we are using a snapshot or not, in
- * order to determine if we will allow it to be R/W */
-static int qemu_rbd_reopen_prepare(BDRVReopenState *state,
-                                   BlockReopenQueue *queue, Error **errp)
-{
-    BDRVRBDState *s = state->bs->opaque;
-    int ret = 0;
-
-    if (s->snap && state->flags & BDRV_O_RDWR) {
-        error_setg(errp,
-                   "Cannot change node '%s' to r/w when using RBD snapshot",
-                   bdrv_get_device_or_node_name(state->bs));
-        ret = -EINVAL;
-    }
-
-    return ret;
-}
-
 static void qemu_rbd_close(BlockDriverState *bs)
 {
    BDRVRBDState *s = bs->opaque;
@@ -715,7 +590,6 @@ static void qemu_rbd_close(BlockDriverState *bs)
    rbd_close(s->image);
    rados_ioctx_destroy(s->io_ctx);
    g_free(s->snap);
-    g_free(s->image_name);
    rados_shutdown(s->cluster);
 }

@@ -781,6 +655,7 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
    RBDAIOCB *acb;
    RADOSCB *rcb = NULL;
    rbd_completion_t c;
+    char *buf;
    int r;

    BDRVRBDState *s = bs->opaque;
@@ -789,29 +664,27 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
    acb->cmd = cmd;
    acb->qiov = qiov;
    assert(!qiov || qiov->size == size);
-
-    rcb = g_new(RADOSCB, 1);
-
-    if (!LIBRBD_USE_IOVEC) {
-        if (cmd == RBD_AIO_DISCARD || cmd == RBD_AIO_FLUSH) {
-            acb->bounce = NULL;
-        } else {
-            acb->bounce = qemu_try_blockalign(bs, qiov->size);
-            if (acb->bounce == NULL) {
-                goto failed;
-            }
+    if (cmd == RBD_AIO_DISCARD || cmd == RBD_AIO_FLUSH) {
+        acb->bounce = NULL;
+    } else {
+        acb->bounce = qemu_try_blockalign(bs, qiov->size);
+        if (acb->bounce == NULL) {
+            goto failed;
        }
-        if (cmd == RBD_AIO_WRITE) {
-            qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
-        }
-        rcb->buf = acb->bounce;
    }
-
    acb->ret = 0;
    acb->error = 0;
    acb->s = s;

+    if (cmd == RBD_AIO_WRITE) {
+        qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
+    }
+
+    buf = acb->bounce;
+
+    rcb = g_new(RADOSCB, 1);
    rcb->acb = acb;
+    rcb->buf = buf;
    rcb->s = acb->s;
    rcb->size = size;
    r = rbd_aio_create_completion(rcb, (rbd_callback_t) rbd_finish_aiocb, &c);
@@ -821,18 +694,10 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,

    switch (cmd) {
    case RBD_AIO_WRITE:
-#ifdef LIBRBD_SUPPORTS_IOVEC
-            r = rbd_aio_writev(s->image, qiov->iov, qiov->niov, off, c);
-#else
-            r = rbd_aio_write(s->image, off, size, rcb->buf, c);
-#endif
+        r = rbd_aio_write(s->image, off, size, buf, c);
        break;
    case RBD_AIO_READ:
-#ifdef LIBRBD_SUPPORTS_IOVEC
-            r = rbd_aio_readv(s->image, qiov->iov, qiov->niov, off, c);
-#else
-            r = rbd_aio_read(s->image, off, size, rcb->buf, c);
-#endif
+        r = rbd_aio_read(s->image, off, size, buf, c);
        break;
    case RBD_AIO_DISCARD:
        r = rbd_aio_discard_wrapper(s->image, off, size, c);
@@ -847,16 +712,14 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
    if (r < 0) {
        goto failed_completion;
    }
+
    return &acb->common;

 failed_completion:
    rbd_aio_release(c);
 failed:
    g_free(rcb);
-    if (!LIBRBD_USE_IOVEC) {
-        qemu_vfree(acb->bounce);
-    }
-
+    qemu_vfree(acb->bounce);
    qemu_aio_unref(acb);
    return NULL;
 }
@@ -936,21 +799,13 @@ static int64_t qemu_rbd_getlength(BlockDriverState *bs)
    return info.size;
 }

-static int qemu_rbd_truncate(BlockDriverState *bs, int64_t offset,
-                             PreallocMode prealloc, Error **errp)
+static int qemu_rbd_truncate(BlockDriverState *bs, int64_t offset)
 {
    BDRVRBDState *s = bs->opaque;
    int r;

-    if (prealloc != PREALLOC_MODE_OFF) {
-        error_setg(errp, "Unsupported preallocation mode '%s'",
-                   PreallocMode_str(prealloc));
-        return -ENOTSUP;
-    }
-
    r = rbd_resize(s->image, offset);
    if (r < 0) {
-        error_setg_errno(errp, -r, "Failed to resize file");
        return r;
    }

@@ -1072,11 +927,11 @@ static int qemu_rbd_snap_list(BlockDriverState *bs,
 #ifdef LIBRBD_SUPPORTS_DISCARD
 static BlockAIOCB *qemu_rbd_aio_pdiscard(BlockDriverState *bs,
                                         int64_t offset,
-                                         int bytes,
+                                         int count,
                                         BlockCompletionFunc *cb,
                                         void *opaque)
 {
-    return rbd_start_aio(bs, offset, NULL, bytes, cb, opaque,
+    return rbd_start_aio(bs, offset, NULL, count, cb, opaque,
                         RBD_AIO_DISCARD);
 }
 #endif
@@ -1117,19 +972,18 @@ static QemuOptsList qemu_rbd_create_opts = {
 };

 static BlockDriver bdrv_rbd = {
-    .format_name            = "rbd",
-    .instance_size          = sizeof(BDRVRBDState),
-    .bdrv_parse_filename    = qemu_rbd_parse_filename,
-    .bdrv_file_open         = qemu_rbd_open,
-    .bdrv_close             = qemu_rbd_close,
-    .bdrv_reopen_prepare    = qemu_rbd_reopen_prepare,
-    .bdrv_create            = qemu_rbd_create,
-    .bdrv_has_zero_init     = bdrv_has_zero_init_1,
-    .bdrv_get_info          = qemu_rbd_getinfo,
-    .create_opts            = &qemu_rbd_create_opts,
-    .bdrv_getlength         = qemu_rbd_getlength,
-    .bdrv_truncate          = qemu_rbd_truncate,
-    .protocol_name          = "rbd",
+    .format_name        = "rbd",
+    .instance_size      = sizeof(BDRVRBDState),
+    .bdrv_needs_filename = true,
+    .bdrv_file_open     = qemu_rbd_open,
+    .bdrv_close         = qemu_rbd_close,
+    .bdrv_create        = qemu_rbd_create,
+    .bdrv_has_zero_init = bdrv_has_zero_init_1,
+    .bdrv_get_info      = qemu_rbd_getinfo,
+    .create_opts        = &qemu_rbd_create_opts,
+    .bdrv_getlength     = qemu_rbd_getlength,
+    .bdrv_truncate      = qemu_rbd_truncate,
+    .protocol_name      = "rbd",

    .bdrv_aio_readv         = qemu_rbd_aio_readv,
    .bdrv_aio_writev        = qemu_rbd_aio_writev,
--- a/block/replication.c
+++ b/block/replication.c
@@ -22,17 +22,9 @@
 #include "qapi/error.h"
 #include "replication.h"

-typedef enum {
-    BLOCK_REPLICATION_NONE,             /* block replication is not started */
-    BLOCK_REPLICATION_RUNNING,          /* block replication is running */
-    BLOCK_REPLICATION_FAILOVER,         /* failover is running in background */
-    BLOCK_REPLICATION_FAILOVER_FAILED,  /* failover failed */
-    BLOCK_REPLICATION_DONE,             /* block replication is done */
-} ReplicationStage;
-
 typedef struct BDRVReplicationState {
    ReplicationMode mode;
-    ReplicationStage stage;
+    int replication_state;
    BdrvChild *active_disk;
    BdrvChild *hidden_disk;
    BdrvChild *secondary_disk;
@@ -44,6 +36,14 @@ typedef struct BDRVReplicationState {
    int error;
 } BDRVReplicationState;

+enum {
+    BLOCK_REPLICATION_NONE,             /* block replication is not started */
+    BLOCK_REPLICATION_RUNNING,          /* block replication is running */
+    BLOCK_REPLICATION_FAILOVER,         /* failover is running in background */
+    BLOCK_REPLICATION_FAILOVER_FAILED,  /* failover failed */
+    BLOCK_REPLICATION_DONE,             /* block replication is done */
+};
+
 static void replication_start(ReplicationState *rs, ReplicationMode mode,
                              Error **errp);
 static void replication_do_checkpoint(ReplicationState *rs, Error **errp);
@@ -86,12 +86,6 @@ static int replication_open(BlockDriverState *bs, QDict *options,
    const char *mode;
    const char *top_id;

-    bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
-                               false, errp);
-    if (!bs->file) {
-        return -EINVAL;
-    }
-
    ret = -EINVAL;
    opts = qemu_opts_create(&replication_runtime_opts, NULL, 0, &error_abort);
    qemu_opts_absorb_qdict(opts, options, &local_err);
@@ -141,10 +135,10 @@ static void replication_close(BlockDriverState *bs)
 {
    BDRVReplicationState *s = bs->opaque;

-    if (s->stage == BLOCK_REPLICATION_RUNNING) {
+    if (s->replication_state == BLOCK_REPLICATION_RUNNING) {
        replication_stop(s->rs, false, NULL);
    }
-    if (s->stage == BLOCK_REPLICATION_FAILOVER) {
+    if (s->replication_state == BLOCK_REPLICATION_FAILOVER) {
        block_job_cancel_sync(s->active_disk->bs->job);
    }

@@ -155,18 +149,6 @@ static void replication_close(BlockDriverState *bs)
    replication_remove(s->rs);
 }

-static void replication_child_perm(BlockDriverState *bs, BdrvChild *c,
-                                   const BdrvChildRole *role,
-                                   uint64_t perm, uint64_t shared,
-                                   uint64_t *nperm, uint64_t *nshared)
-{
-    *nperm = *nshared = BLK_PERM_CONSISTENT_READ \
-                        | BLK_PERM_WRITE \
-                        | BLK_PERM_WRITE_UNCHANGED;
-
-    return;
-}
-
 static int64_t replication_getlength(BlockDriverState *bs)
 {
    return bdrv_getlength(bs->file->bs);
@@ -174,7 +156,7 @@ static int64_t replication_getlength(BlockDriverState *bs)

 static int replication_get_io_status(BDRVReplicationState *s)
 {
-    switch (s->stage) {
+    switch (s->replication_state) {
    case BLOCK_REPLICATION_NONE:
        return -EIO;
    case BLOCK_REPLICATION_RUNNING:
@@ -234,14 +216,10 @@ static coroutine_fn int replication_co_readv(BlockDriverState *bs,
    }

    if (job) {
-        uint64_t remaining_bytes = remaining_sectors * BDRV_SECTOR_SIZE;
-
-        backup_wait_for_overlapping_requests(child->bs->job,
-                                             sector_num * BDRV_SECTOR_SIZE,
-                                             remaining_bytes);
-        backup_cow_request_begin(&req, child->bs->job,
-                                 sector_num * BDRV_SECTOR_SIZE,
-                                 remaining_bytes);
+        backup_wait_for_overlapping_requests(child->bs->job, sector_num,
+                                             remaining_sectors);
+        backup_cow_request_begin(&req, child->bs->job, sector_num,
+                                 remaining_sectors);
        ret = bdrv_co_readv(bs->file, sector_num, remaining_sectors,
                            qiov);
        backup_cow_request_end(&req);
@@ -264,8 +242,7 @@ static coroutine_fn int replication_co_writev(BlockDriverState *bs,
    BdrvChild *top = bs->file;
    BdrvChild *base = s->secondary_disk;
    BdrvChild *target;
-    int ret;
-    int64_t n;
+    int ret, n;

    ret = replication_get_io_status(s);
    if (ret < 0) {
@@ -284,20 +261,14 @@ static coroutine_fn int replication_co_writev(BlockDriverState *bs,
     */
    qemu_iovec_init(&hd_qiov, qiov->niov);
    while (remaining_sectors > 0) {
-        int64_t count;
-
-        ret = bdrv_is_allocated_above(top->bs, base->bs,
-                                      sector_num * BDRV_SECTOR_SIZE,
-                                      remaining_sectors * BDRV_SECTOR_SIZE,
-                                      &count);
+        ret = bdrv_is_allocated_above(top->bs, base->bs, sector_num,
+                                      remaining_sectors, &n);
        if (ret < 0) {
            goto out1;
        }

-        assert(QEMU_IS_ALIGNED(count, BDRV_SECTOR_SIZE));
-        n = count >> BDRV_SECTOR_BITS;
        qemu_iovec_reset(&hd_qiov);
-        qemu_iovec_concat(&hd_qiov, qiov, bytes_done, count);
+        qemu_iovec_concat(&hd_qiov, qiov, bytes_done, n * BDRV_SECTOR_SIZE);

        target = ret ? top : base;
        ret = bdrv_co_writev(target, sector_num, n, &hd_qiov);
@@ -307,7 +278,7 @@ static coroutine_fn int replication_co_writev(BlockDriverState *bs,

        remaining_sectors -= n;
        sector_num += n;
-        bytes_done += count;
+        bytes_done += n * BDRV_SECTOR_SIZE;
    }

 out1:
@@ -414,7 +385,7 @@ static void backup_job_completed(void *opaque, int ret)
    BlockDriverState *bs = opaque;
    BDRVReplicationState *s = bs->opaque;

-    if (s->stage != BLOCK_REPLICATION_FAILOVER) {
+    if (s->replication_state != BLOCK_REPLICATION_FAILOVER) {
        /* The backup job is cancelled unexpectedly */
        s->error = -EIO;
    }
@@ -456,7 +427,7 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode,
    aio_context_acquire(aio_context);
    s = bs->opaque;

-    if (s->stage != BLOCK_REPLICATION_NONE) {
+    if (s->replication_state != BLOCK_REPLICATION_NONE) {
        error_setg(errp, "Block replication is running or done");
        aio_context_release(aio_context);
        return;
@@ -556,7 +527,7 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode,
        abort();
    }

-    s->stage = BLOCK_REPLICATION_RUNNING;
+    s->replication_state = BLOCK_REPLICATION_RUNNING;

    if (s->mode == REPLICATION_MODE_SECONDARY) {
        secondary_do_checkpoint(s, errp);
@@ -592,7 +563,7 @@ static void replication_get_error(ReplicationState *rs, Error **errp)
    aio_context_acquire(aio_context);
    s = bs->opaque;

-    if (s->stage != BLOCK_REPLICATION_RUNNING) {
+    if (s->replication_state != BLOCK_REPLICATION_RUNNING) {
        error_setg(errp, "Block replication is not running");
        aio_context_release(aio_context);
        return;
@@ -612,7 +583,7 @@ static void replication_done(void *opaque, int ret)
    BDRVReplicationState *s = bs->opaque;

    if (ret == 0) {
-        s->stage = BLOCK_REPLICATION_DONE;
+        s->replication_state = BLOCK_REPLICATION_DONE;

        /* refresh top bs's filename */
        bdrv_refresh_filename(bs);
@@ -621,7 +592,7 @@ static void replication_done(void *opaque, int ret)
        s->hidden_disk = NULL;
        s->error = 0;
    } else {
-        s->stage = BLOCK_REPLICATION_FAILOVER_FAILED;
+        s->replication_state = BLOCK_REPLICATION_FAILOVER_FAILED;
        s->error = -EIO;
    }
 }
@@ -636,7 +607,7 @@ static void replication_stop(ReplicationState *rs, bool failover, Error **errp)
    aio_context_acquire(aio_context);
    s = bs->opaque;

-    if (s->stage != BLOCK_REPLICATION_RUNNING) {
+    if (s->replication_state != BLOCK_REPLICATION_RUNNING) {
        error_setg(errp, "Block replication is not running");
        aio_context_release(aio_context);
        return;
@@ -644,7 +615,7 @@ static void replication_stop(ReplicationState *rs, bool failover, Error **errp)

    switch (s->mode) {
    case REPLICATION_MODE_PRIMARY:
-        s->stage = BLOCK_REPLICATION_DONE;
+        s->replication_state = BLOCK_REPLICATION_DONE;
        s->error = 0;
        break;
    case REPLICATION_MODE_SECONDARY:
@@ -659,15 +630,15 @@ static void replication_stop(ReplicationState *rs, bool failover, Error **errp)

        if (!failover) {
            secondary_do_checkpoint(s, errp);
-            s->stage = BLOCK_REPLICATION_DONE;
+            s->replication_state = BLOCK_REPLICATION_DONE;
            aio_context_release(aio_context);
            return;
        }

-        s->stage = BLOCK_REPLICATION_FAILOVER;
+        s->replication_state = BLOCK_REPLICATION_FAILOVER;
        commit_active_start(NULL, s->active_disk->bs, s->secondary_disk->bs,
                            BLOCK_JOB_INTERNAL, 0, BLOCKDEV_ON_ERROR_REPORT,
-                            NULL, replication_done, bs, true, errp);
+                            replication_done, bs, errp, true);
        break;
    default:
        aio_context_release(aio_context);
@@ -683,7 +654,6 @@ BlockDriver bdrv_replication = {

    .bdrv_open                  = replication_open,
    .bdrv_close                 = replication_close,
-    .bdrv_child_perm            = replication_child_perm,

    .bdrv_getlength             = replication_getlength,
    .bdrv_co_readv              = replication_co_readv,
--- a/Show More
+++ b/Show More
@@ -1 +1 @@
 .10.50
 .8.0