Update version for 2.10.2 release

Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
spapr: don't initialize PATB entry if max-cpu-compat < power9
2017-12-18 10:09:38 -06:00 · 2017-12-15 09:36:56 -06:00 · 2017-12-15 09:36:56 -06:00 · 2017-12-14 20:18:13 -06:00 · 2017-12-06 18:12:45 -06:00 · 2017-12-06 13:01:53 -06:00
1416 changed files with 49960 additions and 89491 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -14,8 +14,6 @@
 /trace/generated-tcg-tracers.h
 /ui/shader/texture-blit-frag.h
 /ui/shader/texture-blit-vert.h
-/ui/shader/texture-blit-flip-vert.h
-/ui/input-keymap-*.c
 *-timestamp
 /*-softmmu
 /*-darwin-user
@@ -46,17 +44,14 @@
 /qemu-io
 /qemu-ga
 /qemu-bridge-helper
-/qemu-keymap
 /qemu-monitor.texi
 /qemu-monitor-info.texi
 /qemu-version.h
 /qemu-version.h.tmp
 /module_block.h
-/scsi/qemu-pr-helper
 /vscclient
 /vhost-user-scsi
 /fsdev/virtfs-proxy-helper
-*.tmp
 *.[1-9]
 *.a
 *.aux
@@ -116,7 +111,6 @@
 /docs/version.texi
 *.tps
 .stgit-*
-.git-submodule-status
 cscope.*
 tags
 TAGS
--- a/.gitmodules
+++ b/.gitmodules
@@ -22,6 +22,9 @@
 [submodule "roms/sgabios"]
 	path = roms/sgabios
 	url = git://git.qemu-project.org/sgabios.git
+[submodule "pixman"]
+	path = pixman
+	url = git://anongit.freedesktop.org/pixman
 [submodule "dtc"]
 	path = dtc
 	url = git://git.qemu-project.org/dtc.git
@@ -34,9 +37,3 @@
 [submodule "roms/QemuMacDrivers"]
 	path = roms/QemuMacDrivers
 	url = git://git.qemu.org/QemuMacDrivers.git
-[submodule "ui/keycodemapdb"]
-	path = ui/keycodemapdb
-	url = git://git.qemu.org/keycodemapdb.git
-[submodule "capstone"]
-	path = capstone
-	url = git://git.qemu.org/capstone.git
--- a/.mailmap
+++ b/.mailmap
@@ -8,11 +8,8 @@ Aurelien Jarno <aurelien@aurel32.net> aurel32 <aurel32@c046a42c-6fe2-441c-8c8c-7
 Blue Swirl <blauwirbel@gmail.com> blueswir1 <blueswir1@c046a42c-6fe2-441c-8c8c-71466251a162>
 Edgar E. Iglesias <edgar.iglesias@gmail.com> edgar_igl <edgar_igl@c046a42c-6fe2-441c-8c8c-71466251a162>
 Fabrice Bellard <fabrice@bellard.org> bellard <bellard@c046a42c-6fe2-441c-8c8c-71466251a162>
-James Hogan <jhogan@kernel.org> <james.hogan@imgtec.com>
 Jocelyn Mayer <l_indien@magic.fr> j_mayer <j_mayer@c046a42c-6fe2-441c-8c8c-71466251a162>
 Paul Brook <paul@codesourcery.com> pbrook <pbrook@c046a42c-6fe2-441c-8c8c-71466251a162>
-Paul Burton <paul.burton@mips.com> <paul.burton@imgtec.com>
-Paul Burton <paul.burton@mips.com> <paul@archlinuxmips.org>
 Thiemo Seufer <ths@networkno.de> ths <ths@c046a42c-6fe2-441c-8c8c-71466251a162>
 malc <av1474@comtv.ru> malc <malc@c046a42c-6fe2-441c-8c8c-71466251a162>
 # There is also a:
--- a/COPYING.PYTHON
+++ b/COPYING.PYTHON
@@ -1,270 +0,0 @@
-A. HISTORY OF THE SOFTWARE
-==========================
-
-Python was created in the early 1990s by Guido van Rossum at Stichting
-Mathematisch Centrum (CWI, see http://www.cwi.nl) in the Netherlands
-as a successor of a language called ABC.  Guido remains Python's
-principal author, although it includes many contributions from others.
-
-In 1995, Guido continued his work on Python at the Corporation for
-National Research Initiatives (CNRI, see http://www.cnri.reston.va.us)
-in Reston, Virginia where he released several versions of the
-software.
-
-In May 2000, Guido and the Python core development team moved to
-BeOpen.com to form the BeOpen PythonLabs team.  In October of the same
-year, the PythonLabs team moved to Digital Creations (now Zope
-Corporation, see http://www.zope.com).  In 2001, the Python Software
-Foundation (PSF, see http://www.python.org/psf/) was formed, a
-non-profit organization created specifically to own Python-related
-Intellectual Property.  Zope Corporation is a sponsoring member of
-the PSF.
-
-All Python releases are Open Source (see http://www.opensource.org for
-the Open Source Definition).  Historically, most, but not all, Python
-releases have also been GPL-compatible; the table below summarizes
-the various releases.
-
-    Release         Derived     Year        Owner       GPL-
-                    from                                compatible? (1)
-
-    0.9.0 thru 1.2              1991-1995   CWI         yes
-    1.3 thru 1.5.2  1.2         1995-1999   CNRI        yes
-    1.6             1.5.2       2000        CNRI        no
-    2.0             1.6         2000        BeOpen.com  no
-    1.6.1           1.6         2001        CNRI        yes (2)
-    2.1             2.0+1.6.1   2001        PSF         no
-    2.0.1           2.0+1.6.1   2001        PSF         yes
-    2.1.1           2.1+2.0.1   2001        PSF         yes
-    2.2             2.1.1       2001        PSF         yes
-    2.1.2           2.1.1       2002        PSF         yes
-    2.1.3           2.1.2       2002        PSF         yes
-    2.2.1           2.2         2002        PSF         yes
-    2.2.2           2.2.1       2002        PSF         yes
-    2.2.3           2.2.2       2003        PSF         yes
-    2.3             2.2.2       2002-2003   PSF         yes
-    2.3.1           2.3         2002-2003   PSF         yes
-    2.3.2           2.3.1       2002-2003   PSF         yes
-    2.3.3           2.3.2       2002-2003   PSF         yes
-    2.3.4           2.3.3       2004        PSF         yes
-    2.3.5           2.3.4       2005        PSF         yes
-    2.4             2.3         2004        PSF         yes
-    2.4.1           2.4         2005        PSF         yes
-    2.4.2           2.4.1       2005        PSF         yes
-    2.4.3           2.4.2       2006        PSF         yes
-    2.5             2.4         2006        PSF         yes
-    2.7             2.6         2010        PSF         yes
-
-Footnotes:
-
-(1) GPL-compatible doesn't mean that we're distributing Python under
-    the GPL.  All Python licenses, unlike the GPL, let you distribute
-    a modified version without making your changes open source.  The
-    GPL-compatible licenses make it possible to combine Python with
-    other software that is released under the GPL; the others don't.
-
-(2) According to Richard Stallman, 1.6.1 is not GPL-compatible,
-    because its license has a choice of law clause.  According to
-    CNRI, however, Stallman's lawyer has told CNRI's lawyer that 1.6.1
-    is "not incompatible" with the GPL.
-
-Thanks to the many outside volunteers who have worked under Guido's
-direction to make these releases possible.
-
-
-B. TERMS AND CONDITIONS FOR ACCESSING OR OTHERWISE USING PYTHON
-===============================================================
-
-PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2
--------------------------------------------
-
-1. This LICENSE AGREEMENT is between the Python Software Foundation
-("PSF"), and the Individual or Organization ("Licensee") accessing and
-otherwise using this software ("Python") in source or binary form and
-its associated documentation.
-
-2. Subject to the terms and conditions of this License Agreement, PSF
-hereby grants Licensee a nonexclusive, royalty-free, world-wide
-license to reproduce, analyze, test, perform and/or display publicly,
-prepare derivative works, distribute, and otherwise use Python
-alone or in any derivative version, provided, however, that PSF's
-License Agreement and PSF's notice of copyright, i.e., "Copyright (c)
-2001, 2002, 2003, 2004, 2005, 2006 Python Software Foundation; All Rights
-Reserved" are retained in Python alone or in any derivative version 
-prepared by Licensee.
-
-3. In the event Licensee prepares a derivative work that is based on
-or incorporates Python or any part thereof, and wants to make
-the derivative work available to others as provided herein, then
-Licensee hereby agrees to include in any such work a brief summary of
-the changes made to Python.
-
-4. PSF is making Python available to Licensee on an "AS IS"
-basis.  PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
-IMPLIED.  BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND
-DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
-FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON WILL NOT
-INFRINGE ANY THIRD PARTY RIGHTS.
-
-5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON
-FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS
-A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON,
-OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
-
-6. This License Agreement will automatically terminate upon a material
-breach of its terms and conditions.
-
-7. Nothing in this License Agreement shall be deemed to create any
-relationship of agency, partnership, or joint venture between PSF and
-Licensee.  This License Agreement does not grant permission to use PSF
-trademarks or trade name in a trademark sense to endorse or promote
-products or services of Licensee, or any third party.
-
-8. By copying, installing or otherwise using Python, Licensee
-agrees to be bound by the terms and conditions of this License
-Agreement.
-
-
-BEOPEN.COM LICENSE AGREEMENT FOR PYTHON 2.0
-------------------------------------------
-
-BEOPEN PYTHON OPEN SOURCE LICENSE AGREEMENT VERSION 1
-
-1. This LICENSE AGREEMENT is between BeOpen.com ("BeOpen"), having an
-office at 160 Saratoga Avenue, Santa Clara, CA 95051, and the
-Individual or Organization ("Licensee") accessing and otherwise using
-this software in source or binary form and its associated
-documentation ("the Software").
-
-2. Subject to the terms and conditions of this BeOpen Python License
-Agreement, BeOpen hereby grants Licensee a non-exclusive,
-royalty-free, world-wide license to reproduce, analyze, test, perform
-and/or display publicly, prepare derivative works, distribute, and
-otherwise use the Software alone or in any derivative version,
-provided, however, that the BeOpen Python License is retained in the
-Software, alone or in any derivative version prepared by Licensee.
-
-3. BeOpen is making the Software available to Licensee on an "AS IS"
-basis.  BEOPEN MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
-IMPLIED.  BY WAY OF EXAMPLE, BUT NOT LIMITATION, BEOPEN MAKES NO AND
-DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
-FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF THE SOFTWARE WILL NOT
-INFRINGE ANY THIRD PARTY RIGHTS.
-
-4. BEOPEN SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF THE
-SOFTWARE FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS
-AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THE SOFTWARE, OR ANY
-DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
-
-5. This License Agreement will automatically terminate upon a material
-breach of its terms and conditions.
-
-6. This License Agreement shall be governed by and interpreted in all
-respects by the law of the State of California, excluding conflict of
-law provisions.  Nothing in this License Agreement shall be deemed to
-create any relationship of agency, partnership, or joint venture
-between BeOpen and Licensee.  This License Agreement does not grant
-permission to use BeOpen trademarks or trade names in a trademark
-sense to endorse or promote products or services of Licensee, or any
-third party.  As an exception, the "BeOpen Python" logos available at
-http://www.pythonlabs.com/logos.html may be used according to the
-permissions granted on that web page.
-
-7. By copying, installing or otherwise using the software, Licensee
-agrees to be bound by the terms and conditions of this License
-Agreement.
-
-
-CNRI LICENSE AGREEMENT FOR PYTHON 1.6.1
---------------------------------------
-
-1. This LICENSE AGREEMENT is between the Corporation for National
-Research Initiatives, having an office at 1895 Preston White Drive,
-Reston, VA 20191 ("CNRI"), and the Individual or Organization
-("Licensee") accessing and otherwise using Python 1.6.1 software in
-source or binary form and its associated documentation.
-
-2. Subject to the terms and conditions of this License Agreement, CNRI
-hereby grants Licensee a nonexclusive, royalty-free, world-wide
-license to reproduce, analyze, test, perform and/or display publicly,
-prepare derivative works, distribute, and otherwise use Python 1.6.1
-alone or in any derivative version, provided, however, that CNRI's
-License Agreement and CNRI's notice of copyright, i.e., "Copyright (c)
-1995-2001 Corporation for National Research Initiatives; All Rights
-Reserved" are retained in Python 1.6.1 alone or in any derivative
-version prepared by Licensee.  Alternately, in lieu of CNRI's License
-Agreement, Licensee may substitute the following text (omitting the
-quotes): "Python 1.6.1 is made available subject to the terms and
-conditions in CNRI's License Agreement.  This Agreement together with
-Python 1.6.1 may be located on the Internet using the following
-unique, persistent identifier (known as a handle): 1895.22/1013.  This
-Agreement may also be obtained from a proxy server on the Internet
-using the following URL: http://hdl.handle.net/1895.22/1013".
-
-3. In the event Licensee prepares a derivative work that is based on
-or incorporates Python 1.6.1 or any part thereof, and wants to make
-the derivative work available to others as provided herein, then
-Licensee hereby agrees to include in any such work a brief summary of
-the changes made to Python 1.6.1.
-
-4. CNRI is making Python 1.6.1 available to Licensee on an "AS IS"
-basis.  CNRI MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
-IMPLIED.  BY WAY OF EXAMPLE, BUT NOT LIMITATION, CNRI MAKES NO AND
-DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
-FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON 1.6.1 WILL NOT
-INFRINGE ANY THIRD PARTY RIGHTS.
-
-5. CNRI SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON
-1.6.1 FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS
-A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON 1.6.1,
-OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
-
-6. This License Agreement will automatically terminate upon a material
-breach of its terms and conditions.
-
-7. This License Agreement shall be governed by the federal
-intellectual property law of the United States, including without
-limitation the federal copyright law, and, to the extent such
-U.S. federal law does not apply, by the law of the Commonwealth of
-Virginia, excluding Virginia's conflict of law provisions.
-Notwithstanding the foregoing, with regard to derivative works based
-on Python 1.6.1 that incorporate non-separable material that was
-previously distributed under the GNU General Public License (GPL), the
-law of the Commonwealth of Virginia shall govern this License
-Agreement only as to issues arising under or with respect to
-Paragraphs 4, 5, and 7 of this License Agreement.  Nothing in this
-License Agreement shall be deemed to create any relationship of
-agency, partnership, or joint venture between CNRI and Licensee.  This
-License Agreement does not grant permission to use CNRI trademarks or
-trade name in a trademark sense to endorse or promote products or
-services of Licensee, or any third party.
-
-8. By clicking on the "ACCEPT" button where indicated, or by copying,
-installing or otherwise using Python 1.6.1, Licensee agrees to be
-bound by the terms and conditions of this License Agreement.
-
-        ACCEPT
-
-
-CWI LICENSE AGREEMENT FOR PYTHON 0.9.0 THROUGH 1.2
--------------------------------------------------
-
-Copyright (c) 1991 - 1995, Stichting Mathematisch Centrum Amsterdam,
-The Netherlands.  All rights reserved.
-
-Permission to use, copy, modify, and distribute this software and its
-documentation for any purpose and without fee is hereby granted,
-provided that the above copyright notice appear in all copies and that
-both that copyright notice and this permission notice appear in
-supporting documentation, and that the name of Stichting Mathematisch
-Centrum or CWI not be used in advertising or publicity pertaining to
-distribution of the software without specific, written prior
-permission.
-
-STICHTING MATHEMATISCH CENTRUM DISCLAIMS ALL WARRANTIES WITH REGARD TO
-THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
-FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH CENTRUM BE LIABLE
-FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
-WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
-ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
-OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
--- a/2
+++ b/2
@@ -1,6 +1,6 @@
 This file documents changes for QEMU releases 0.12 and earlier.
 For changelog information for later releases, see
-https://wiki.qemu.org/ChangeLog or look at the git history for
+http://wiki.qemu-project.org/ChangeLog or look at the git history for
 more detailed information.


--- a/178
+++ b/178
@@ -61,7 +61,7 @@ F: */

 Responsible Disclosure, Reporting Security Issues
 ------------------------------
-W: https://wiki.qemu.org/SecurityProcess
+W: http://wiki.qemu.org/SecurityProcess
 M: Michael S. Tsirkin <mst@redhat.com>
 L: secalert@redhat.com

@@ -86,6 +86,7 @@ M: Richard Henderson <rth@twiddle.net>
 S: Maintained
 F: cpus.c
 F: exec.c
+F: softmmu_template.h
 F: accel/tcg/
 F: include/exec/cpu*.h
 F: include/exec/exec-all.h
@@ -162,7 +163,7 @@ F: disas/microblaze.c

 MIPS
 M: Aurelien Jarno <aurelien@aurel32.net>
-M: Yongbok Kim <yongbok.kim@mips.com>
+M: Yongbok Kim <yongbok.kim@imgtec.com>
 S: Maintained
 F: target/mips/
 F: hw/mips/
@@ -216,7 +217,6 @@ S: Maintained
 F: target/s390x/
 F: hw/s390x/
 F: disas/s390.c
-L: qemu-s390x@nongnu.org

 SH4
 M: Aurelien Jarno <aurelien@aurel32.net>
@@ -250,7 +250,6 @@ S: Maintained
 F: target/i386/
 F: hw/i386/
 F: disas/i386.c
-T: git git://github.com/ehabkost/qemu.git x86-next

 Xtensa
 M: Max Filippov <jcmvbkbc@gmail.com>
@@ -285,7 +284,7 @@ S: Maintained
 F: target/arm/kvm.c

 MIPS
-M: James Hogan <jhogan@kernel.org>
+M: James Hogan <james.hogan@imgtec.com>
 S: Maintained
 F: target/mips/kvm.c

@@ -300,18 +299,14 @@ M: Cornelia Huck <cohuck@redhat.com>
 M: Alexander Graf <agraf@suse.de>
 S: Maintained
 F: target/s390x/kvm.c
-F: target/s390x/kvm_s390x.h
-F: target/s390x/kvm-stub.c
 F: target/s390x/ioinst.[ch]
 F: target/s390x/machine.c
-F: target/s390x/sigp.c
 F: hw/intc/s390_flic.c
 F: hw/intc/s390_flic_kvm.c
 F: include/hw/s390x/s390_flic.h
 F: gdb-xml/s390*.xml
 T: git git://github.com/cohuck/qemu.git s390-next
 T: git git://github.com/borntraeger/qemu.git s390-next
-L: qemu-s390x@nongnu.org

 X86
 M: Paolo Bonzini <pbonzini@redhat.com>
@@ -385,7 +380,6 @@ M: Peter Maydell <peter.maydell@linaro.org>
 L: qemu-arm@nongnu.org
 S: Maintained
 F: hw/char/pl011.c
-F: include/hw/char/pl011.h
 F: hw/display/pl110*
 F: hw/dma/pl080.c
 F: hw/dma/pl330.c
@@ -409,15 +403,13 @@ F: hw/intc/gic_internal.h
 F: hw/misc/a9scu.c
 F: hw/misc/arm11scu.c
 F: hw/timer/a9gtimer*
-F: hw/timer/arm*
-F: include/hw/arm/arm*.h
+F: hw/timer/arm_*
+F: include/hw/arm/arm.h
 F: include/hw/intc/arm*
 F: include/hw/misc/a9scu.h
 F: include/hw/misc/arm11scu.h
 F: include/hw/timer/a9gtimer.h
 F: include/hw/timer/arm_mptimer.h
-F: include/hw/timer/armv7m_systick.h
-F: tests/test-arm-mptimer.c

 Exynos
 M: Igor Mitsyanko <i.mitsyanko@gmail.com>
@@ -520,7 +512,6 @@ M: Peter Maydell <peter.maydell@linaro.org>
 L: qemu-arm@nongnu.org
 S: Maintained
 F: hw/*/versatile*
-F: hw/misc/arm_sysctl.c

 Xilinx Zynq
 M: Edgar E. Iglesias <edgar.iglesias@gmail.com>
@@ -557,30 +548,12 @@ F: hw/char/stm32f2xx_usart.c
 F: hw/timer/stm32f2xx_timer.c
 F: hw/adc/*
 F: hw/ssi/stm32f2xx_spi.c
-F: include/hw/*/stm32*.h

 Netduino 2
 M: Alistair Francis <alistair@alistair23.me>
 S: Maintained
 F: hw/arm/netduino2.c

-SmartFusion2
-M: Subbaraya Sundeep <sundeep.lkml@gmail.com>
-S: Maintained
-F: hw/arm/msf2-soc.c
-F: hw/misc/msf2-sysreg.c
-F: hw/timer/mss-timer.c
-F: hw/ssi/mss-spi.c
-F: include/hw/arm/msf2-soc.h
-F: include/hw/misc/msf2-sysreg.h
-F: include/hw/timer/mss-timer.h
-F: include/hw/ssi/mss-spi.h
-
-Emcraft M2S-FG484
-M: Subbaraya Sundeep <sundeep.lkml@gmail.com>
-S: Maintained
-F: hw/arm/msf2-som.c
-
 CRIS Machines
 -------------
 Axis Dev88
@@ -643,7 +616,7 @@ S: Maintained
 F: hw/mips/mips_malta.c

 Mipssim
-M: Yongbok Kim <yongbok.kim@mips.com>
+M: Yongbok Kim <yongbok.kim@imgtec.com>
 S: Odd Fixes
 F: hw/mips/mips_mipssim.c
 F: hw/net/mipsnet.c
@@ -654,12 +627,12 @@ S: Maintained
 F: hw/mips/mips_r4k.c

 Fulong 2E
-M: Yongbok Kim <yongbok.kim@mips.com>
+M: Yongbok Kim <yongbok.kim@imgtec.com>
 S: Odd Fixes
 F: hw/mips/mips_fulong2e.c

 Boston
-M: Paul Burton <paul.burton@mips.com>
+M: Paul Burton <paul.burton@imgtec.com>
 S: Maintained
 F: hw/core/loader-fit.c
 F: hw/mips/boston.c
@@ -798,7 +771,7 @@ F: pc-bios/openbios-sparc64
 Sun4v
 M: Artyom Tarasenko <atar4qemu@gmail.com>
 S: Maintained
-F: hw/sparc64/niagara.c
+F: hw/sparc64/sun4v.c
 F: hw/timer/sun4v-rtc.c
 F: include/hw/timer/sun4v-rtc.h

@@ -817,7 +790,6 @@ M: Christian Borntraeger <borntraeger@de.ibm.com>
 M: Alexander Graf <agraf@suse.de>
 S: Supported
 F: hw/char/sclp*.[hc]
-F: hw/char/terminal3270.c
 F: hw/s390x/
 F: include/hw/s390x/
 F: pc-bios/s390-ccw/
@@ -827,7 +799,6 @@ F: pc-bios/s390-ccw.img
 F: default-configs/s390x-softmmu.mak
 T: git git://github.com/cohuck/qemu.git s390-next
 T: git git://github.com/borntraeger/qemu.git s390-next
-L: qemu-s390x@nongnu.org

 UniCore32 Machines
 -------------
@@ -887,7 +858,6 @@ S: Supported
 F: hw/core/machine.c
 F: hw/core/null-machine.c
 F: include/hw/boards.h
-T: git git://github.com/ehabkost/qemu.git machine-next

 Xtensa Machines
 ---------------
@@ -953,9 +923,6 @@ F: include/hw/pci/*
 F: hw/misc/pci-testdev.c
 F: hw/pci/*
 F: hw/pci-bridge/*
-F: docs/pci*
-F: docs/specs/*pci*
-F: default-configs/pci.mak

 ACPI/SMBIOS
 M: Michael S. Tsirkin <mst@redhat.com>
@@ -1006,15 +973,16 @@ F: hw/scsi/*
 F: tests/virtio-scsi-test.c
 T: git git://github.com/bonzini/qemu.git scsi-next

+LSI53C895A
+S: Orphan
+F: hw/scsi/lsi53c895a.c
+
 SSI
 M: Peter Crosthwaite <crosthwaite.peter@gmail.com>
-M: Alistair Francis <alistair.francis@xilinx.com>
 S: Maintained
 F: hw/ssi/*
 F: hw/block/m25p80.c
-F: include/hw/ssi/ssi.h
 X: hw/ssi/xilinx_*
-F: tests/m25p80-test.c

 Xilinx SPI
 M: Alistair Francis <alistair.francis@xilinx.com>
@@ -1031,7 +999,6 @@ F: docs/usb2.txt
 F: docs/usb-storage.txt
 F: include/hw/usb.h
 F: include/hw/usb/
-F: default-configs/usb.mak

 USB (serial adapter)
 M: Gerd Hoffmann <kraxel@redhat.com>
@@ -1052,13 +1019,11 @@ F: hw/vfio/ccw.c
 F: hw/s390x/s390-ccw.c
 F: include/hw/s390x/s390-ccw.h
 T: git git://github.com/cohuck/qemu.git s390-next
-L: qemu-s390x@nongnu.org

 vhost
 M: Michael S. Tsirkin <mst@redhat.com>
 S: Supported
 F: hw/*/*vhost*
-F: docs/interop/vhost-user.txt

 virtio
 M: Michael S. Tsirkin <mst@redhat.com>
@@ -1096,7 +1061,6 @@ S: Supported
 F: hw/s390x/virtio-ccw.[hc]
 T: git git://github.com/cohuck/qemu.git s390-next
 T: git git://github.com/borntraeger/qemu.git s390-next
-L: qemu-s390x@nongnu.org

 virtio-input
 M: Gerd Hoffmann <kraxel@redhat.com>
@@ -1137,12 +1101,11 @@ F: hw/block/nvme*
 F: tests/nvme-test.c

 megasas
-M: Hannes Reinecke <hare@suse.com>
+M: Hannes Reinecke <hare@suse.de>
 L: qemu-block@nongnu.org
 S: Supported
 F: hw/scsi/megasas.c
 F: hw/scsi/mfi.h
-F: tests/megasas-test.c

 Network packet abstractions
 M: Dmitry Fleytman <dmitry@daynix.com>
@@ -1157,7 +1120,6 @@ M: Dmitry Fleytman <dmitry@daynix.com>
 S: Maintained
 F: hw/net/vmxnet*
 F: hw/scsi/vmw_pvscsi*
-F: tests/vmxnet3-test.c

 Rocker
 M: Jiri Pirko <jiri@resnulli.us>
@@ -1167,7 +1129,7 @@ F: tests/rocker/
 F: docs/specs/rocker.txt

 NVDIMM
-M: Xiao Guangrong <xiaoguangrong.eric@gmail.com>
+M: Xiao Guangrong <guangrong.xiao@linux.intel.com>
 S: Maintained
 F: hw/acpi/nvdimm.c
 F: hw/mem/nvdimm.c
@@ -1183,17 +1145,11 @@ M: Dmitry Fleytman <dmitry@daynix.com>
 S: Maintained
 F: hw/net/e1000e*

-eepro100
-M: Stefan Weil <sw@weilnetz.de>
-S: Maintained
-F: hw/net/eepro100.c
-
 Generic Loader
 M: Alistair Francis <alistair.francis@xilinx.com>
 S: Maintained
 F: hw/core/generic-loader.c
 F: include/hw/core/generic-loader.h
-F: docs/generic-loader.txt

 CHRP NVRAM
 M: Thomas Huth <thuth@redhat.com>
@@ -1255,16 +1211,8 @@ F: util/aio-*.c
 F: block/io.c
 F: migration/block*
 F: include/block/aio.h
-F: scripts/qemugdb/aio.py
 T: git git://github.com/stefanha/qemu.git block

-Block SCSI subsystem
-M: Paolo Bonzini <pbonzini@redhat.com>
-L: qemu-block@nongnu.org
-S: Supported
-F: include/scsi/*
-F: scsi/*
-
 Block Jobs
 M: Jeff Cody <jcody@redhat.com>
 L: qemu-block@nongnu.org
@@ -1283,7 +1231,6 @@ S: Supported
 F: blockdev.c
 F: block/qapi.c
 F: qapi/block*.json
-F: qapi/transaction.json
 T: git git://repo.or.cz/qemu/armbru.git block-next

 Dirty Bitmaps
@@ -1296,7 +1243,7 @@ F: block/dirty-bitmap.c
 F: include/qemu/hbitmap.h
 F: include/block/dirty-bitmap.h
 F: tests/test-hbitmap.c
-F: docs/interop/bitmaps.rst
+F: docs/bitmaps.md
 T: git git://github.com/famz/qemu.git bitmaps
 T: git git://github.com/jnsnow/qemu.git bitmaps

@@ -1306,7 +1253,6 @@ M: Marc-André Lureau <marcandre.lureau@redhat.com>
 S: Maintained
 F: chardev/
 F: include/chardev/
-F: qapi/char.json

 Character Devices (Braille)
 M: Samuel Thibault <samuel.thibault@ens-lyon.org>
@@ -1340,17 +1286,6 @@ S: Maintained
 F: device_tree.c
 F: include/sysemu/device_tree.h

-Dump
-S: Supported
-M: Marc-André Lureau <marcandre.lureau@redhat.com>
-F: dump.c
-F: hw/misc/vmcoreinfo.c
-F: include/hw/misc/vmcoreinfo.h
-F: include/sysemu/dump-arch.h
-F: include/sysemu/dump.h
-F: scripts/dump-guest-memory.py
-F: stubs/dump.c
-
 Error reporting
 M: Markus Armbruster <armbru@redhat.com>
 S: Supported
@@ -1384,14 +1319,12 @@ F: include/ui/spice-display.h
 F: ui/spice-*.c
 F: audio/spiceaudio.c
 F: hw/display/qxl*
-F: qapi/ui.json

 Graphics
 M: Gerd Hoffmann <kraxel@redhat.com>
 S: Odd Fixes
 F: ui/
 F: include/ui/
-F: qapi/ui.json

 Cocoa graphics
 M: Peter Maydell <peter.maydell@linaro.org>
@@ -1405,7 +1338,6 @@ F: cpus.c
 F: util/main-loop.c
 F: util/qemu-timer.c
 F: vl.c
-F: qapi/run-state.json

 Human Monitor (HMP)
 M: Dr. David Alan Gilbert <dgilbert@redhat.com>
@@ -1422,7 +1354,6 @@ S: Maintained
 F: net/
 F: include/net/
 T: git git://github.com/jasowang/qemu.git net
-F: qapi/net.json

 Netmap network backend
 M: Luigi Rizzo <rizzo@iet.unipi.it>
@@ -1437,7 +1368,7 @@ M: Eduardo Habkost <ehabkost@redhat.com>
 S: Maintained
 F: numa.c
 F: include/sysemu/numa.h
-T: git git://github.com/ehabkost/qemu.git machine-next
+T: git git://github.com/ehabkost/qemu.git numa

 Host Memory Backends
 M: Eduardo Habkost <ehabkost@redhat.com>
@@ -1445,7 +1376,6 @@ M: Igor Mammedov <imammedo@redhat.com>
 S: Maintained
 F: backends/hostmem*.c
 F: include/sysemu/hostmem.h
-T: git git://github.com/ehabkost/qemu.git machine-next

 Cryptodev Backends
 M: Gonglei <arei.gonglei@huawei.com>
@@ -1453,14 +1383,6 @@ S: Maintained
 F: include/sysemu/cryptodev*.h
 F: backends/cryptodev*.c

-Python scripts
-M: Eduardo Habkost <ehabkost@redhat.com>
-M: Cleber Rosa <crosa@redhat.com>
-S: Odd fixes
-F: scripts/qmp/*
-F: scripts/*.py
-F: tests/*.py
-
 QAPI
 M: Markus Armbruster <armbru@redhat.com>
 M: Michael Roth <mdroth@linux.vnet.ibm.com>
@@ -1476,7 +1398,7 @@ F: tests/test-qapi-*.c
 F: tests/test-qmp-*.c
 F: tests/test-visitor-serialization.c
 F: scripts/qapi*
-F: docs/devel/qapi*
+F: docs/qapi*
 T: git git://repo.or.cz/qemu/armbru.git qapi-next

 QAPI Schema
@@ -1505,10 +1427,6 @@ QEMU Guest Agent
 M: Michael Roth <mdroth@linux.vnet.ibm.com>
 S: Maintained
 F: qga/
-F: qemu-ga.texi
-F: scripts/qemu-guest-agent/
-F: tests/test-qga.c
-F: docs/interop/qemu-ga-ref.texi
 T: git git://github.com/mdroth/qemu.git qga

 QOM
@@ -1528,7 +1446,7 @@ M: Markus Armbruster <armbru@redhat.com>
 S: Supported
 F: qmp.c
 F: monitor.c
-F: docs/devel/*qmp-*
+F: docs/*qmp-*
 F: scripts/qmp/
 F: tests/qmp-test.c
 T: git git://repo.or.cz/qemu/armbru.git qapi-next
@@ -1559,20 +1477,9 @@ S: Maintained
 F: trace/
 F: scripts/tracetool.py
 F: scripts/tracetool/
-F: docs/devel/tracing.txt
+F: docs/tracing.txt
 T: git git://github.com/stefanha/qemu.git tracing

-TPM
-M: Stefan Berger <stefanb@linux.vnet.ibm.com>
-S: Maintained
-F: tpm.c
-F: stubs/tpm.c
-F: hw/tpm/*
-F: include/hw/acpi/tpm.h
-F: include/sysemu/tpm*
-F: qapi/tpm.json
-F: backends/tpm.c
-
 Checkpatch
 S: Odd Fixes
 F: scripts/checkpatch.pl
@@ -1585,9 +1492,7 @@ F: include/migration/
 F: migration/
 F: scripts/vmstate-static-checker.py
 F: tests/vmstate-static-checker-data/
-F: tests/migration-test.c
-F: docs/devel/migration.txt
-F: qapi/migration.json
+F: docs/migration.txt

 Seccomp
 M: Eduardo Otubo <otubo@redhat.com>
@@ -1601,7 +1506,6 @@ S: Maintained
 F: crypto/
 F: include/crypto/
 F: tests/test-crypto-*
-F: tests/benchmark-crypto-*
 F: qemu.sasl

 Coroutines
@@ -1631,17 +1535,14 @@ M: Paolo Bonzini <pbonzini@redhat.com>
 S: Maintained
 F: include/qemu/sockets.h
 F: util/qemu-sockets.c
-F: qapi/sockets.json

 Throttling infrastructure
 M: Alberto Garcia <berto@igalia.com>
 S: Supported
 F: block/throttle-groups.c
 F: include/block/throttle-groups.h
-F: include/qemu/throttle*.h
+F: include/qemu/throttle.h
 F: util/throttle.c
-F: docs/throttle.txt
-F: tests/test-throttle.c
 L: qemu-block@nongnu.org

 UUID
@@ -1660,7 +1561,7 @@ F: include/migration/failover.h
 F: docs/COLO-FT.txt

 COLO Proxy
-M: Zhang Chen <zhangckid@gmail.com>
+M: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
 M: Li Zhijian <lizhijian@cn.fujitsu.com>
 S: Supported
 F: docs/colo-proxy.txt
@@ -1671,7 +1572,7 @@ F: net/filter-mirror.c
 Record/replay
 M: Pavel Dovgalyuk <pavel.dovgaluk@ispras.ru>
 R: Paolo Bonzini <pbonzini@redhat.com>
-W: https://wiki.qemu.org/Features/record-replay
+W: http://wiki.qemu.org/Features/record-replay
 S: Supported
 F: replay/*
 F: block/blkreplay.c
@@ -1680,19 +1581,14 @@ F: include/sysemu/replay.h
 F: docs/replay.txt
 F: stubs/replay.c

-IOVA Tree
-M: Peter Xu <peterx@redhat.com>
-S: Maintained
-F: include/qemu/iova-tree.h
-F: util/iova-tree.c
-
 Usermode Emulation
 ------------------
 Overall
 M: Riku Voipio <riku.voipio@iki.fi>
 S: Maintained
 F: thunk.c
-F: accel/tcg/user-exec*.c
+F: user-exec.c
+F: user-exec-stub.c

 BSD user
 S: Orphan
@@ -1735,6 +1631,12 @@ S: Maintained
 F: tcg/i386/
 F: disas/i386.c

+IA64 target
+M: Aurelien Jarno <aurelien@aurel32.net>
+S: Maintained
+F: tcg/ia64/
+F: disas/ia64.c
+
 MIPS target
 M: Aurelien Jarno <aurelien@aurel32.net>
 S: Maintained
@@ -1753,7 +1655,6 @@ M: Richard Henderson <rth@twiddle.net>
 S: Maintained
 F: tcg/s390/
 F: disas/s390.c
-L: qemu-s390x@nongnu.org

 SPARC target
 S: Odd Fixes
@@ -1904,7 +1805,7 @@ M: Denis V. Lunev <den@openvz.org>
 L: qemu-block@nongnu.org
 S: Supported
 F: block/parallels.c
-F: docs/interop/parallels.txt
+F: docs/specs/parallels.txt

 qed
 M: Stefan Hajnoczi <stefanha@redhat.com>
@@ -1929,7 +1830,6 @@ M: Max Reitz <mreitz@redhat.com>
 L: qemu-block@nongnu.org
 S: Supported
 F: block/qcow2*
-F: docs/interop/qcow2.txt

 qcow
 M: Kevin Wolf <kwolf@redhat.com>
@@ -1973,7 +1873,6 @@ F: docs/block-replication.txt

 Build and test automation
 -------------------------
-Build and test automation
 M: Alex Bennée <alex.bennee@linaro.org>
 M: Fam Zheng <famz@redhat.com>
 R: Philippe Mathieu-Daudé <f4bug@amsat.org>
@@ -1982,7 +1881,6 @@ S: Maintained
 F: .travis.yml
 F: .shippable.yml
 F: tests/docker/
-F: tests/vm/
 W: https://travis-ci.org/qemu/qemu
 W: https://app.shippable.com/github/qemu/qemu
 W: http://patchew.org/QEMU/
@@ -1992,11 +1890,5 @@ Documentation
 Build system architecture
 M: Daniel P. Berrange <berrange@redhat.com>
 S: Odd Fixes
-F: docs/devel/build-system.txt
+F: docs/build-system.txt

-Build System
------------
-GIT submodules
-M: Daniel P. Berrange <berrange@redhat.com>
-S: Odd Fixes
-F: scripts/git-submodule.sh
--- a/144
+++ b/144
@@ -6,7 +6,7 @@ BUILD_DIR=$(CURDIR)
 # Before including a proper config-host.mak, assume we are in the source tree
 SRC_PATH=.

-UNCHECKED_GOALS := %clean TAGS cscope ctags docker docker-% help
+UNCHECKED_GOALS := %clean TAGS cscope ctags docker docker-%

 # All following code might depend on configuration variables
 ifneq ($(wildcard config-host.mak),)
@@ -14,36 +14,6 @@ ifneq ($(wildcard config-host.mak),)
 all:
 include config-host.mak

-git-submodule-update:
-
-.PHONY: git-submodule-update
-
-git_module_status := $(shell \
-  cd '$(SRC_PATH)' && \
-  GIT="$(GIT)" ./scripts/git-submodule.sh status $(GIT_SUBMODULES); \
-  echo $$?; \
-)
-
-ifeq (1,$(git_module_status))
-ifeq (no,$(GIT_UPDATE))
-git-submodule-update:
-	$(call quiet-command, \
-            echo && \
-            echo "GIT submodule checkout is out of date. Please run" && \
-            echo "  scripts/git-submodule.sh update $(GIT_SUBMODULES)" && \
-            echo "from the source directory checkout $(SRC_PATH)" && \
-            echo && \
-            exit 1)
-else
-git-submodule-update:
-	$(call quiet-command, \
-          (cd $(SRC_PATH) && GIT="$(GIT)" ./scripts/git-submodule.sh update $(GIT_SUBMODULES)), \
-          "GIT","$(GIT_SUBMODULES)")
-endif
-endif
-
-.git-submodule-status: git-submodule-update config-host.mak
-
 # Check that we're not trying to do an out-of-tree build from
 # a tree that's been used for an in-tree build.
 ifneq ($(realpath $(SRC_PATH)),$(realpath .))
@@ -114,7 +84,6 @@ endif
 GENERATED_FILES += $(TRACE_HEADERS)
 GENERATED_FILES += $(TRACE_SOURCES)
 GENERATED_FILES += $(BUILD_DIR)/trace-events-all
-GENERATED_FILES += .git-submodule-status

 trace-group-name = $(shell dirname $1 | sed -e 's/[^a-zA-Z0-9]/_/g')

@@ -222,38 +191,13 @@ trace-dtrace-root.h: trace-dtrace-root.dtrace

 trace-dtrace-root.o: trace-dtrace-root.dtrace

-KEYCODEMAP_GEN = $(SRC_PATH)/ui/keycodemapdb/tools/keymap-gen
-KEYCODEMAP_CSV = $(SRC_PATH)/ui/keycodemapdb/data/keymaps.csv
-
-KEYCODEMAP_FILES = \
-		 ui/input-keymap-linux-to-qcode.c \
-		 ui/input-keymap-qcode-to-qnum.c \
-		 ui/input-keymap-qnum-to-qcode.c \
-		 $(NULL)
-
-GENERATED_FILES += $(KEYCODEMAP_FILES)
-
-ui/input-keymap-%.c: $(KEYCODEMAP_GEN) $(KEYCODEMAP_CSV) $(SRC_PATH)/ui/Makefile.objs
-	$(call quiet-command,\
-	    src=$$(echo $@ | sed -E -e "s,^ui/input-keymap-(.+)-to-(.+)\.c$$,\1,") && \
-	    dst=$$(echo $@ | sed -E -e "s,^ui/input-keymap-(.+)-to-(.+)\.c$$,\2,") && \
-	    test -e $(KEYCODEMAP_GEN) && \
-	    $(PYTHON) $(KEYCODEMAP_GEN) \
-	          --lang glib2 \
-	          --varname qemu_input_map_$${src}_to_$${dst} \
-	          code-map $(KEYCODEMAP_CSV) $${src} $${dst} \
-	        > $@ || rm -f $@, "GEN", "$@")
-
-$(KEYCODEMAP_GEN): .git-submodule-status
-$(KEYCODEMAP_CSV): .git-submodule-status
-
 # Don't try to regenerate Makefile or configure
 # We don't generate any of them
 Makefile: ;
 configure: ;

 .PHONY: all clean cscope distclean html info install install-doc \
-	pdf txt recurse-all dist msi FORCE
+	pdf txt recurse-all speed test dist msi FORCE

 $(call set-vpath, $(SRC_PATH))

@@ -265,7 +209,6 @@ ifdef BUILD_DOCS
 DOCS=qemu-doc.html qemu-doc.txt qemu.1 qemu-img.1 qemu-nbd.8 qemu-ga.8
 DOCS+=docs/interop/qemu-qmp-ref.html docs/interop/qemu-qmp-ref.txt docs/interop/qemu-qmp-ref.7
 DOCS+=docs/interop/qemu-ga-ref.html docs/interop/qemu-ga-ref.txt docs/interop/qemu-ga-ref.7
-DOCS+=docs/qemu-block-drivers.7
 ifdef CONFIG_VIRTFS
 DOCS+=fsdev/virtfs-proxy-helper.1
 endif
@@ -382,32 +325,26 @@ $(SOFTMMU_SUBDIR_RULES): config-all-devices.mak
 subdir-%:
 	$(call quiet-command,$(MAKE) $(SUBDIR_MAKEFLAGS) -C $* V="$(V)" TARGET_DIR="$*/" all,)

+subdir-pixman: pixman/Makefile
+	$(call quiet-command,$(MAKE) $(SUBDIR_MAKEFLAGS) -C pixman V="$(V)" all,)
+
+pixman/Makefile: $(SRC_PATH)/pixman/configure
+	(cd pixman; CFLAGS="$(CFLAGS) -fPIC $(extra_cflags) $(extra_ldflags)" $(SRC_PATH)/pixman/configure $(AUTOCONF_HOST) --disable-gtk --disable-shared --enable-static)
+
+$(SRC_PATH)/pixman/configure:
+	(cd $(SRC_PATH)/pixman; autoreconf -v --install)
+
 DTC_MAKE_ARGS=-I$(SRC_PATH)/dtc VPATH=$(SRC_PATH)/dtc -C dtc V="$(V)" LIBFDT_srcdir=$(SRC_PATH)/dtc/libfdt
 DTC_CFLAGS=$(CFLAGS) $(QEMU_CFLAGS)
 DTC_CPPFLAGS=-I$(BUILD_DIR)/dtc -I$(SRC_PATH)/dtc -I$(SRC_PATH)/dtc/libfdt

-subdir-dtc: .git-submodule-status dtc/libfdt dtc/tests
+subdir-dtc:dtc/libfdt dtc/tests
 	$(call quiet-command,$(MAKE) $(DTC_MAKE_ARGS) CPPFLAGS="$(DTC_CPPFLAGS)" CFLAGS="$(DTC_CFLAGS)" LDFLAGS="$(LDFLAGS)" ARFLAGS="$(ARFLAGS)" CC="$(CC)" AR="$(AR)" LD="$(LD)" $(SUBDIR_MAKEFLAGS) libfdt/libfdt.a,)

-dtc/%: .git-submodule-status
+dtc/%:
 	mkdir -p $@

-# Overriding CFLAGS causes us to lose defines added in the sub-makefile.
-# Not overriding CFLAGS leads to mis-matches between compilation modes.
-# Therefore we replicate some of the logic in the sub-makefile.
-# Remove all the extra -Warning flags that QEMU uses that Capstone doesn't;
-# no need to annoy QEMU developers with such things.
-CAP_CFLAGS = $(patsubst -W%,,$(CFLAGS) $(QEMU_CFLAGS))
-CAP_CFLAGS += -DCAPSTONE_USE_SYS_DYN_MEM
-CAP_CFLAGS += -DCAPSTONE_HAS_ARM
-CAP_CFLAGS += -DCAPSTONE_HAS_ARM64
-CAP_CFLAGS += -DCAPSTONE_HAS_POWERPC
-CAP_CFLAGS += -DCAPSTONE_HAS_X86
-
-subdir-capstone: .git-submodule-status
-	$(call quiet-command,$(MAKE) -C $(SRC_PATH)/capstone CAPSTONE_SHARED=no BUILDDIR="$(BUILD_DIR)/capstone" CC="$(CC)" AR="$(AR)" LD="$(LD)" RANLIB="$(RANLIB)" CFLAGS="$(CAP_CFLAGS)" $(SUBDIR_MAKEFLAGS) $(BUILD_DIR)/capstone/$(LIBCAPSTONE))
-
-$(SUBDIR_RULES): libqemuutil.a $(common-obj-y) $(chardev-obj-y) \
+$(SUBDIR_RULES): libqemuutil.a libqemustub.a $(common-obj-y) $(chardev-obj-y) \
 	$(qom-obj-y) $(crypto-aes-obj-$(CONFIG_USER_ONLY))

 ROMSUBDIR_RULES=$(patsubst %,romsubdir-%, $(ROMS))
@@ -427,12 +364,12 @@ Makefile: $(version-obj-y)
 ######################################################################
 # Build libraries

-libqemuutil.a: $(util-obj-y) $(trace-obj-y) $(stub-obj-y)
-libvhost-user.a: $(libvhost-user-obj-y)
+libqemustub.a: $(stub-obj-y)
+libqemuutil.a: $(util-obj-y) $(trace-obj-y)

 ######################################################################

-COMMON_LDADDS = libqemuutil.a
+COMMON_LDADDS = libqemuutil.a libqemustub.a

 qemu-img.o: qemu-img-cmds.h

@@ -442,25 +379,15 @@ qemu-io$(EXESUF): qemu-io.o $(block-obj-y) $(crypto-obj-y) $(io-obj-y) $(qom-obj

 qemu-bridge-helper$(EXESUF): qemu-bridge-helper.o $(COMMON_LDADDS)

-qemu-keymap$(EXESUF): qemu-keymap.o ui/input-keymap.o $(COMMON_LDADDS)
-
 fsdev/virtfs-proxy-helper$(EXESUF): fsdev/virtfs-proxy-helper.o fsdev/9p-marshal.o fsdev/9p-iov-marshal.o $(COMMON_LDADDS)
 fsdev/virtfs-proxy-helper$(EXESUF): LIBS += -lcap

-scsi/qemu-pr-helper$(EXESUF): scsi/qemu-pr-helper.o scsi/utils.o $(crypto-obj-y) $(io-obj-y) $(qom-obj-y) $(COMMON_LDADDS)
-ifdef CONFIG_MPATH
-scsi/qemu-pr-helper$(EXESUF): LIBS += -ludev -lmultipath -lmpathpersist
-endif
-
 qemu-img-cmds.h: $(SRC_PATH)/qemu-img-cmds.hx $(SRC_PATH)/scripts/hxtool
 	$(call quiet-command,sh $(SRC_PATH)/scripts/hxtool -h < $< > $@,"GEN","$@")

 qemu-ga$(EXESUF): LIBS = $(LIBS_QGA)
 qemu-ga$(EXESUF): QEMU_CFLAGS += -I qga/qapi-generated

-qemu-keymap$(EXESUF): LIBS += $(XKBCOMMON_LIBS)
-qemu-keymap$(EXESUF): QEMU_CFLAGS += $(XKBCOMMON_CFLAGS)
-
 gen-out-type = $(subst .,-,$(suffix $@))

 qapi-py = $(SRC_PATH)/scripts/qapi.py $(SRC_PATH)/scripts/ordereddict.py
@@ -483,18 +410,9 @@ $(SRC_PATH)/qga/qapi-schema.json $(SRC_PATH)/scripts/qapi-commands.py $(qapi-py)

 qapi-modules = $(SRC_PATH)/qapi-schema.json $(SRC_PATH)/qapi/common.json \
               $(SRC_PATH)/qapi/block.json $(SRC_PATH)/qapi/block-core.json \
-               $(SRC_PATH)/qapi/char.json \
-               $(SRC_PATH)/qapi/crypto.json \
-               $(SRC_PATH)/qapi/introspect.json \
-               $(SRC_PATH)/qapi/migration.json \
-               $(SRC_PATH)/qapi/net.json \
-               $(SRC_PATH)/qapi/rocker.json \
-               $(SRC_PATH)/qapi/run-state.json \
-               $(SRC_PATH)/qapi/sockets.json \
-               $(SRC_PATH)/qapi/tpm.json \
-               $(SRC_PATH)/qapi/trace.json \
-               $(SRC_PATH)/qapi/transaction.json \
-               $(SRC_PATH)/qapi/ui.json
+               $(SRC_PATH)/qapi/event.json $(SRC_PATH)/qapi/introspect.json \
+               $(SRC_PATH)/qapi/crypto.json $(SRC_PATH)/qapi/rocker.json \
+               $(SRC_PATH)/qapi/trace.json

 qapi-types.c qapi-types.h :\
 $(qapi-modules) $(SRC_PATH)/scripts/qapi-types.py $(qapi-py)
@@ -556,7 +474,7 @@ ivshmem-client$(EXESUF): $(ivshmem-client-obj-y) $(COMMON_LDADDS)
 ivshmem-server$(EXESUF): $(ivshmem-server-obj-y) $(COMMON_LDADDS)
 	$(call LINK, $^)
 endif
-vhost-user-scsi$(EXESUF): $(vhost-user-scsi-obj-y) libvhost-user.a
+vhost-user-scsi$(EXESUF): $(vhost-user-scsi-obj-y)
 	$(call LINK, $^)

 module_block.h: $(SRC_PATH)/scripts/modules/module_block.py config-host.mak
@@ -571,7 +489,7 @@ clean:
 	rm -f *.msi
 	find . \( -name '*.so' -o -name '*.dll' -o -name '*.mo' -o -name '*.[oda]' \) -type f -exec rm {} +
 	rm -f $(filter-out %.tlb,$(TOOLS)) $(HELPERS-y) qemu-ga TAGS cscope.* *.pod *~ */*~
-	rm -f fsdev/*.pod scsi/*.pod
+	rm -f fsdev/*.pod
 	rm -f qemu-img-cmds.h
 	rm -f ui/shader/*-vert.h ui/shader/*-frag.h
 	@# May not be present in GENERATED_FILES
@@ -610,11 +528,11 @@ distclean: clean
 	rm -f docs/interop/qemu-qmp-ref.txt docs/interop/qemu-ga-ref.txt
 	rm -f docs/interop/qemu-qmp-ref.pdf docs/interop/qemu-ga-ref.pdf
 	rm -f docs/interop/qemu-qmp-ref.html docs/interop/qemu-ga-ref.html
-	rm -f docs/qemu-block-drivers.7
 	for d in $(TARGET_DIRS); do \
 	rm -rf $$d || exit 1 ; \
        done
 	rm -Rf .sdk
+	if test -f pixman/config.log; then $(MAKE) -C pixman distclean; fi
 	if test -f dtc/version_gen.h; then $(MAKE) $(DTC_MAKE_ARGS) clean; fi

 KEYMAPS=da     en-gb  et  fr     fr-ch  is  lt  modifiers  no  pt-br  sv \
@@ -655,7 +573,6 @@ ifdef CONFIG_POSIX
 	$(INSTALL_DATA) qemu.1 "$(DESTDIR)$(mandir)/man1"
 	$(INSTALL_DIR) "$(DESTDIR)$(mandir)/man7"
 	$(INSTALL_DATA) docs/interop/qemu-qmp-ref.7 "$(DESTDIR)$(mandir)/man7"
-	$(INSTALL_DATA) docs/qemu-block-drivers.7 "$(DESTDIR)$(mandir)/man7"
 ifneq ($(TOOLS),)
 	$(INSTALL_DATA) qemu-img.1 "$(DESTDIR)$(mandir)/man1"
 	$(INSTALL_DIR) "$(DESTDIR)$(mandir)/man8"
@@ -716,6 +633,10 @@ endif
 	$(MAKE) $(SUBDIR_MAKEFLAGS) TARGET_DIR=$$d/ -C $$d $@ || exit 1 ; \
        done

+# various test targets
+test speed: all
+	$(MAKE) -C tests/tcg $@
+
 .PHONY: ctags
 ctags:
 	rm -f tags
@@ -744,10 +665,8 @@ ui/shader/%-frag.h: $(SRC_PATH)/ui/shader/%.frag $(SRC_PATH)/scripts/shaderinclu
 		perl $(SRC_PATH)/scripts/shaderinclude.pl $< > $@,\
 		"FRAG","$@")

-ui/shader.o: $(SRC_PATH)/ui/shader.c \
-	ui/shader/texture-blit-vert.h \
-	ui/shader/texture-blit-flip-vert.h \
-	ui/shader/texture-blit-frag.h
+ui/console-gl.o: $(SRC_PATH)/ui/console-gl.c \
+	ui/shader/texture-blit-vert.h ui/shader/texture-blit-frag.h

 # documentation
 MAKEINFO=makeinfo
@@ -799,7 +718,6 @@ qemu-img.1: qemu-img.texi qemu-option-trace.texi qemu-img-cmds.texi
 fsdev/virtfs-proxy-helper.1: fsdev/virtfs-proxy-helper.texi
 qemu-nbd.8: qemu-nbd.texi qemu-option-trace.texi
 qemu-ga.8: qemu-ga.texi
-docs/qemu-block-drivers.7: docs/qemu-block-drivers.texi

 html: qemu-doc.html docs/interop/qemu-qmp-ref.html docs/interop/qemu-ga-ref.html
 info: qemu-doc.info docs/interop/qemu-qmp-ref.info docs/interop/qemu-ga-ref.info
@@ -809,7 +727,7 @@ txt: qemu-doc.txt docs/interop/qemu-qmp-ref.txt docs/interop/qemu-ga-ref.txt
 qemu-doc.html qemu-doc.info qemu-doc.pdf qemu-doc.txt: \
 	qemu-img.texi qemu-nbd.texi qemu-options.texi qemu-option-trace.texi \
 	qemu-monitor.texi qemu-img-cmds.texi qemu-ga.texi \
-	qemu-monitor-info.texi docs/qemu-block-drivers.texi
+	qemu-monitor-info.texi

 docs/interop/qemu-ga-ref.dvi docs/interop/qemu-ga-ref.html \
    docs/interop/qemu-ga-ref.info docs/interop/qemu-ga-ref.pdf \
@@ -895,7 +813,6 @@ endif
 -include $(wildcard *.d tests/*.d)

 include $(SRC_PATH)/tests/docker/Makefile.include
-include $(SRC_PATH)/tests/vm/Makefile.include

 .PHONY: help
 help:
@@ -919,7 +836,6 @@ help:
 	@echo  'Test targets:'
 	@echo  '  check           - Run all tests (check-help for details)'
 	@echo  '  docker          - Help about targets running tests inside Docker containers'
-	@echo  '  vm-test         - Help about targets running tests inside VM'
 	@echo  ''
 	@echo  'Documentation targets:'
 	@echo  '  html info pdf txt'
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -11,7 +11,7 @@ chardev-obj-y = chardev/

 block-obj-y += nbd/
 block-obj-y += block.o blockjob.o
-block-obj-y += block/ scsi/
+block-obj-y += block/
 block-obj-y += qemu-io-cmds.o
 block-obj-$(CONFIG_REPLICATION) += replication.o

@@ -62,7 +62,7 @@ bt-host.o-cflags := $(BLUEZ_CFLAGS)
 common-obj-y += dma-helpers.o
 common-obj-y += vl.o
 vl.o-cflags := $(GPROF_CFLAGS) $(SDL_CFLAGS)
-common-obj-$(CONFIG_TPM) += tpm.o
+common-obj-y += tpm.o

 common-obj-$(CONFIG_SLIRP) += slirp/

@@ -70,8 +70,6 @@ common-obj-y += backends/
 common-obj-y += chardev/

 common-obj-$(CONFIG_SECCOMP) += qemu-seccomp.o
-qemu-seccomp.o-cflags := $(SECCOMP_CFLAGS)
-qemu-seccomp.o-libs := $(SECCOMP_LIBS)

 common-obj-$(CONFIG_FDT) += device_tree.o

@@ -115,6 +113,7 @@ libvhost-user-obj-y = contrib/libvhost-user/
 vhost-user-scsi.o-cflags := $(LIBISCSI_CFLAGS)
 vhost-user-scsi.o-libs := $(LIBISCSI_LIBS)
 vhost-user-scsi-obj-y = contrib/vhost-user-scsi/
+vhost-user-scsi-obj-y += contrib/libvhost-user/libvhost-user.o

 ######################################################################
 trace-events-subdirs =
@@ -154,7 +153,6 @@ trace-events-subdirs += hw/acpi
 trace-events-subdirs += hw/arm
 trace-events-subdirs += hw/alpha
 trace-events-subdirs += hw/xen
-trace-events-subdirs += hw/ide
 trace-events-subdirs += ui
 trace-events-subdirs += audio
 trace-events-subdirs += net
@@ -170,7 +168,6 @@ trace-events-subdirs += qapi
 trace-events-subdirs += accel/tcg
 trace-events-subdirs += accel/kvm
 trace-events-subdirs += nbd
-trace-events-subdirs += scsi

 trace-events-files = $(SRC_PATH)/trace-events $(trace-events-subdirs:%=$(SRC_PATH)/%/trace-events)

--- a/Makefile.target
+++ b/Makefile.target
@@ -22,7 +22,7 @@ QEMU_PROG_BUILD = $(QEMU_PROG)
 else
 # system emulator name
 QEMU_PROG=qemu-system-$(TARGET_NAME)$(EXESUF)
-ifneq (,$(findstring -mwindows,$(SDL_LIBS)))
+ifneq (,$(findstring -mwindows,$(libs_softmmu)))
 # Terminate program name with a 'w' because the linker builds a windows executable.
 QEMU_PROGW=qemu-system-$(TARGET_NAME)w$(EXESUF)
 $(QEMU_PROG): $(QEMU_PROGW)
@@ -94,13 +94,20 @@ all: $(PROGS) stap
 obj-y += exec.o
 obj-y += accel/
 obj-$(CONFIG_TCG) += tcg/tcg.o tcg/tcg-op.o tcg/optimize.o
-obj-$(CONFIG_TCG) += tcg/tcg-common.o
+obj-$(CONFIG_TCG) += tcg/tcg-common.o tcg/tcg-runtime.o
 obj-$(CONFIG_TCG_INTERPRETER) += tcg/tci.o
 obj-$(CONFIG_TCG_INTERPRETER) += disas/tci.o
 obj-y += fpu/softfloat.o
 obj-y += target/$(TARGET_BASE_ARCH)/
 obj-y += disas.o
 obj-$(call notempty,$(TARGET_XML_FILES)) += gdbstub-xml.o
+obj-$(call lnot,$(CONFIG_HAX)) += hax-stub.o
+
+obj-$(CONFIG_LIBDECNUMBER) += libdecnumber/decContext.o
+obj-$(CONFIG_LIBDECNUMBER) += libdecnumber/decNumber.o
+obj-$(CONFIG_LIBDECNUMBER) += libdecnumber/dpd/decimal32.o
+obj-$(CONFIG_LIBDECNUMBER) += libdecnumber/dpd/decimal64.o
+obj-$(CONFIG_LIBDECNUMBER) += libdecnumber/dpd/decimal128.o

 #########################################################
 # Linux user emulator target
@@ -112,7 +119,7 @@ QEMU_CFLAGS+=-I$(SRC_PATH)/linux-user/$(TARGET_ABI_DIR) \
             -I$(SRC_PATH)/linux-user

 obj-y += linux-user/
-obj-y += gdbstub.o thunk.o
+obj-y += gdbstub.o thunk.o user-exec.o user-exec-stub.o

 endif #CONFIG_LINUX_USER

@@ -125,7 +132,7 @@ QEMU_CFLAGS+=-I$(SRC_PATH)/bsd-user -I$(SRC_PATH)/bsd-user/$(TARGET_ABI_DIR) \
 			 -I$(SRC_PATH)/bsd-user/$(HOST_VARIANT_DIR)

 obj-y += bsd-user/
-obj-y += gdbstub.o
+obj-y += gdbstub.o user-exec.o user-exec-stub.o

 endif #CONFIG_BSD_USER

@@ -186,7 +193,7 @@ all-obj-$(CONFIG_SOFTMMU) += $(io-obj-y)

 $(QEMU_PROG_BUILD): config-devices.mak

-COMMON_LDADDS = ../libqemuutil.a
+COMMON_LDADDS = ../libqemuutil.a ../libqemustub.a

 # build either PROG or PROGW
 $(QEMU_PROG_BUILD): $(all-obj-y) $(COMMON_LDADDS)
--- a/18
+++ b/18
@@ -44,9 +44,9 @@ of other UNIX targets. The simple steps to build QEMU are:

 Additional information can also be found online via the QEMU website:

-  https://qemu.org/Hosts/Linux
-  https://qemu.org/Hosts/Mac
-  https://qemu.org/Hosts/W32
+  http://qemu-project.org/Hosts/Linux
+  http://qemu-project.org/Hosts/Mac
+  http://qemu-project.org/Hosts/W32


 Submitting patches
@@ -54,7 +54,7 @@ Submitting patches

 The QEMU source code is maintained under the GIT version control system.

-   git clone git://git.qemu.org/qemu.git
+   git clone git://git.qemu-project.org/qemu.git

 When submitting patches, the preferred approach is to use 'git
 format-patch' and/or 'git send-email' to format & send the mail to the
@@ -65,8 +65,8 @@ guidelines set out in the HACKING and CODING_STYLE files.
 Additional information on submitting patches can be found online via
 the QEMU website

-  https://qemu.org/Contribute/SubmitAPatch
-  https://qemu.org/Contribute/TrivialPatches
+  http://qemu-project.org/Contribute/SubmitAPatch
+  http://qemu-project.org/Contribute/TrivialPatches


 Bug reporting
@@ -85,7 +85,7 @@ reported via launchpad.

 For additional information on bug reporting consult:

-  https://qemu.org/Contribute/ReportABug
+  http://qemu-project.org/Contribute/ReportABug


 Contact
@@ -95,12 +95,12 @@ The QEMU community can be contacted in a number of ways, with the two
 main methods being email and IRC

 - qemu-devel@nongnu.org
-   https://lists.nongnu.org/mailman/listinfo/qemu-devel
+   http://lists.nongnu.org/mailman/listinfo/qemu-devel
 - #qemu on irc.oftc.net

 Information on additional methods of contacting the community can be
 found online via the QEMU website:

-  https://qemu.org/Contribute/StartHere
+  http://qemu-project.org/Contribute/StartHere

 -- End
--- a/2
+++ b/2
@@ -1 +1 @@
-2.11.2
+2.10.2
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -79,6 +79,7 @@ struct KVMState
    int coalesced_mmio;
    struct kvm_coalesced_mmio_ring *coalesced_mmio_ring;
    bool coalesced_flush_in_progress;
+    int broken_set_mem_region;
    int vcpu_events;
    int robust_singlestep;
    int debugregs;
@@ -87,7 +88,6 @@ struct KVMState
 #endif
    int many_ioeventfds;
    int intx_set_mask;
-    bool sync_mmu;
    /* The man page (and posix) say ioctl numbers are signed int, but
     * they're not.  Linux, glibc and *BSD all treat ioctl numbers as
     * unsigned, and treating them as signed here can break things */
@@ -127,7 +127,6 @@ static bool kvm_immediate_exit;
 static const KVMCapabilityInfo kvm_required_capabilites[] = {
    KVM_CAP_INFO(USER_MEMORY),
    KVM_CAP_INFO(DESTROY_MEMORY_REGION_WORKS),
-    KVM_CAP_INFO(JOIN_MEMORY_REGIONS_WORKS),
    KVM_CAP_LAST_INFO
 };

@@ -173,7 +172,7 @@ static KVMSlot *kvm_alloc_slot(KVMMemoryListener *kml)

 static KVMSlot *kvm_lookup_matching_slot(KVMMemoryListener *kml,
                                         hwaddr start_addr,
-                                         hwaddr size)
+                                         hwaddr end_addr)
 {
    KVMState *s = kvm_state;
    int i;
@@ -181,7 +180,8 @@ static KVMSlot *kvm_lookup_matching_slot(KVMMemoryListener *kml,
    for (i = 0; i < s->nr_slots; i++) {
        KVMSlot *mem = &kml->slots[i];

-        if (start_addr == mem->start_addr && size == mem->memory_size) {
+        if (start_addr == mem->start_addr &&
+            end_addr == mem->start_addr + mem->memory_size) {
            return mem;
        }
    }
@@ -190,27 +190,31 @@ static KVMSlot *kvm_lookup_matching_slot(KVMMemoryListener *kml,
 }

 /*
- * Calculate and align the start address and the size of the section.
- * Return the size. If the size is 0, the aligned section is empty.
+ * Find overlapping slot with lowest start address
 */
-static hwaddr kvm_align_section(MemoryRegionSection *section,
-                                hwaddr *start)
+static KVMSlot *kvm_lookup_overlapping_slot(KVMMemoryListener *kml,
+                                            hwaddr start_addr,
+                                            hwaddr end_addr)
 {
-    hwaddr size = int128_get64(section->size);
-    hwaddr delta, aligned;
+    KVMState *s = kvm_state;
+    KVMSlot *found = NULL;
+    int i;

-    /* kvm works in page size chunks, but the function may be called
-       with sub-page size and unaligned start address. Pad the start
-       address to next and truncate size to previous page boundary. */
-    aligned = ROUND_UP(section->offset_within_address_space,
-                       qemu_real_host_page_size);
-    delta = aligned - section->offset_within_address_space;
-    *start = aligned;
-    if (delta > size) {
-        return 0;
+    for (i = 0; i < s->nr_slots; i++) {
+        KVMSlot *mem = &kml->slots[i];
+
+        if (mem->memory_size == 0 ||
+            (found && found->start_addr < mem->start_addr)) {
+            continue;
+        }
+
+        if (end_addr > mem->start_addr &&
+            start_addr < mem->start_addr + mem->memory_size) {
+            found = mem;
+        }
    }

-    return (size - delta) & qemu_real_host_page_mask;
+    return found;
 }

 int kvm_physical_memory_addr_from_host(KVMState *s, void *ram,
@@ -378,21 +382,15 @@ static int kvm_slot_update_flags(KVMMemoryListener *kml, KVMSlot *mem,
 static int kvm_section_update_flags(KVMMemoryListener *kml,
                                    MemoryRegionSection *section)
 {
-    hwaddr start_addr, size;
-    KVMSlot *mem;
+    hwaddr phys_addr = section->offset_within_address_space;
+    ram_addr_t size = int128_get64(section->size);
+    KVMSlot *mem = kvm_lookup_matching_slot(kml, phys_addr, phys_addr + size);

-    size = kvm_align_section(section, &start_addr);
-    if (!size) {
+    if (mem == NULL)  {
        return 0;
+    } else {
+        return kvm_slot_update_flags(kml, mem, section->mr);
    }
-
-    mem = kvm_lookup_matching_slot(kml, start_addr, size);
-    if (!mem) {
-        /* We don't have a slot if we want to trap every access. */
-        return 0;
-    }
-
-    return kvm_slot_update_flags(kml, mem, section->mr);
 }

 static void kvm_log_start(MemoryListener *listener,
@@ -456,16 +454,18 @@ static int kvm_physical_sync_dirty_bitmap(KVMMemoryListener *kml,
                                          MemoryRegionSection *section)
 {
    KVMState *s = kvm_state;
+    unsigned long size, allocated_size = 0;
    struct kvm_dirty_log d = {};
    KVMSlot *mem;
-    hwaddr start_addr, size;
+    int ret = 0;
+    hwaddr start_addr = section->offset_within_address_space;
+    hwaddr end_addr = start_addr + int128_get64(section->size);

-    size = kvm_align_section(section, &start_addr);
-    if (size) {
-        mem = kvm_lookup_matching_slot(kml, start_addr, size);
-        if (!mem) {
-            /* We don't have a slot if we want to trap every access. */
-            return 0;
+    d.dirty_bitmap = NULL;
+    while (start_addr < end_addr) {
+        mem = kvm_lookup_overlapping_slot(kml, start_addr, end_addr);
+        if (mem == NULL) {
+            break;
        }

        /* XXX bad kernel interface alert
@@ -482,20 +482,27 @@ static int kvm_physical_sync_dirty_bitmap(KVMMemoryListener *kml,
         */
        size = ALIGN(((mem->memory_size) >> TARGET_PAGE_BITS),
                     /*HOST_LONG_BITS*/ 64) / 8;
-        d.dirty_bitmap = g_malloc0(size);
+        if (!d.dirty_bitmap) {
+            d.dirty_bitmap = g_malloc(size);
+        } else if (size > allocated_size) {
+            d.dirty_bitmap = g_realloc(d.dirty_bitmap, size);
+        }
+        allocated_size = size;
+        memset(d.dirty_bitmap, 0, allocated_size);

        d.slot = mem->slot | (kml->as_id << 16);
        if (kvm_vm_ioctl(s, KVM_GET_DIRTY_LOG, &d) == -1) {
            DPRINTF("ioctl failed %d\n", errno);
-            g_free(d.dirty_bitmap);
-            return -1;
+            ret = -1;
+            break;
        }

        kvm_get_dirty_pages_log_range(section, d.dirty_bitmap);
-        g_free(d.dirty_bitmap);
+        start_addr = mem->start_addr + mem->memory_size;
    }
+    g_free(d.dirty_bitmap);

-    return 0;
+    return ret;
 }

 static void kvm_coalesce_mmio_region(MemoryListener *listener,
@@ -689,12 +696,30 @@ kvm_check_extension_list(KVMState *s, const KVMCapabilityInfo *list)
 static void kvm_set_phys_mem(KVMMemoryListener *kml,
                             MemoryRegionSection *section, bool add)
 {
-    KVMSlot *mem;
+    KVMState *s = kvm_state;
+    KVMSlot *mem, old;
    int err;
    MemoryRegion *mr = section->mr;
    bool writeable = !mr->readonly && !mr->rom_device;
-    hwaddr start_addr, size;
-    void *ram;
+    hwaddr start_addr = section->offset_within_address_space;
+    ram_addr_t size = int128_get64(section->size);
+    void *ram = NULL;
+    unsigned delta;
+
+    /* kvm works in page size chunks, but the function may be called
+       with sub-page size and unaligned start address. Pad the start
+       address to next and truncate size to previous page boundary. */
+    delta = qemu_real_host_page_size - (start_addr & ~qemu_real_host_page_mask);
+    delta &= ~qemu_real_host_page_mask;
+    if (delta > size) {
+        return;
+    }
+    start_addr += delta;
+    size -= delta;
+    size &= qemu_real_host_page_mask;
+    if (!size || (start_addr & ~qemu_real_host_page_mask)) {
+        return;
+    }

    if (!memory_region_is_ram(mr)) {
        if (writeable || !kvm_readonly_mem_allowed) {
@@ -706,36 +731,115 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml,
        }
    }

-    size = kvm_align_section(section, &start_addr);
-    if (!size) {
-        return;
-    }
+    ram = memory_region_get_ram_ptr(mr) + section->offset_within_region + delta;

-    /* use aligned delta to align the ram address */
-    ram = memory_region_get_ram_ptr(mr) + section->offset_within_region +
-          (start_addr - section->offset_within_address_space);
-
-    if (!add) {
-        mem = kvm_lookup_matching_slot(kml, start_addr, size);
+    while (1) {
+        mem = kvm_lookup_overlapping_slot(kml, start_addr, start_addr + size);
        if (!mem) {
+            break;
+        }
+
+        if (add && start_addr >= mem->start_addr &&
+            (start_addr + size <= mem->start_addr + mem->memory_size) &&
+            (ram - start_addr == mem->ram - mem->start_addr)) {
+            /* The new slot fits into the existing one and comes with
+             * identical parameters - update flags and done. */
+            kvm_slot_update_flags(kml, mem, mr);
            return;
        }
+
+        old = *mem;
+
        if (mem->flags & KVM_MEM_LOG_DIRTY_PAGES) {
            kvm_physical_sync_dirty_bitmap(kml, section);
        }

-        /* unregister the slot */
+        /* unregister the overlapping slot */
        mem->memory_size = 0;
        err = kvm_set_user_memory_region(kml, mem);
        if (err) {
-            fprintf(stderr, "%s: error unregistering slot: %s\n",
+            fprintf(stderr, "%s: error unregistering overlapping slot: %s\n",
                    __func__, strerror(-err));
            abort();
        }
-        return;
+
+        /* Workaround for older KVM versions: we can't join slots, even not by
+         * unregistering the previous ones and then registering the larger
+         * slot. We have to maintain the existing fragmentation. Sigh.
+         *
+         * This workaround assumes that the new slot starts at the same
+         * address as the first existing one. If not or if some overlapping
+         * slot comes around later, we will fail (not seen in practice so far)
+         * - and actually require a recent KVM version. */
+        if (s->broken_set_mem_region &&
+            old.start_addr == start_addr && old.memory_size < size && add) {
+            mem = kvm_alloc_slot(kml);
+            mem->memory_size = old.memory_size;
+            mem->start_addr = old.start_addr;
+            mem->ram = old.ram;
+            mem->flags = kvm_mem_flags(mr);
+
+            err = kvm_set_user_memory_region(kml, mem);
+            if (err) {
+                fprintf(stderr, "%s: error updating slot: %s\n", __func__,
+                        strerror(-err));
+                abort();
+            }
+
+            start_addr += old.memory_size;
+            ram += old.memory_size;
+            size -= old.memory_size;
+            continue;
+        }
+
+        /* register prefix slot */
+        if (old.start_addr < start_addr) {
+            mem = kvm_alloc_slot(kml);
+            mem->memory_size = start_addr - old.start_addr;
+            mem->start_addr = old.start_addr;
+            mem->ram = old.ram;
+            mem->flags =  kvm_mem_flags(mr);
+
+            err = kvm_set_user_memory_region(kml, mem);
+            if (err) {
+                fprintf(stderr, "%s: error registering prefix slot: %s\n",
+                        __func__, strerror(-err));
+#ifdef TARGET_PPC
+                fprintf(stderr, "%s: This is probably because your kernel's " \
+                                "PAGE_SIZE is too big. Please try to use 4k " \
+                                "PAGE_SIZE!\n", __func__);
+#endif
+                abort();
+            }
+        }
+
+        /* register suffix slot */
+        if (old.start_addr + old.memory_size > start_addr + size) {
+            ram_addr_t size_delta;
+
+            mem = kvm_alloc_slot(kml);
+            mem->start_addr = start_addr + size;
+            size_delta = mem->start_addr - old.start_addr;
+            mem->memory_size = old.memory_size - size_delta;
+            mem->ram = old.ram + size_delta;
+            mem->flags = kvm_mem_flags(mr);
+
+            err = kvm_set_user_memory_region(kml, mem);
+            if (err) {
+                fprintf(stderr, "%s: error registering suffix slot: %s\n",
+                        __func__, strerror(-err));
+                abort();
+            }
+        }
    }

-    /* register the new slot */
+    /* in case the KVM bug workaround already "consumed" the new slot */
+    if (!size) {
+        return;
+    }
+    if (!add) {
+        return;
+    }
    mem = kvm_alloc_slot(kml);
    mem->memory_size = size;
    mem->start_addr = start_addr;
@@ -1144,7 +1248,7 @@ int kvm_irqchip_add_msi_route(KVMState *s, int vector, PCIDevice *dev)
    int virq;
    MSIMessage msg = {0, 0};

-    if (pci_available && dev) {
+    if (dev) {
        msg = pci_get_msi_message(dev, vector);
    }

@@ -1167,7 +1271,7 @@ int kvm_irqchip_add_msi_route(KVMState *s, int vector, PCIDevice *dev)
    kroute.u.msi.address_lo = (uint32_t)msg.address;
    kroute.u.msi.address_hi = msg.address >> 32;
    kroute.u.msi.data = le32_to_cpu(msg.data);
-    if (pci_available && kvm_msi_devid_required()) {
+    if (kvm_msi_devid_required()) {
        kroute.flags = KVM_MSI_VALID_DEVID;
        kroute.u.msi.devid = pci_requester_id(dev);
    }
@@ -1205,7 +1309,7 @@ int kvm_irqchip_update_msi_route(KVMState *s, int virq, MSIMessage msg,
    kroute.u.msi.address_lo = (uint32_t)msg.address;
    kroute.u.msi.address_hi = msg.address >> 32;
    kroute.u.msi.data = le32_to_cpu(msg.data);
-    if (pci_available && kvm_msi_devid_required()) {
+    if (kvm_msi_devid_required()) {
        kroute.flags = KVM_MSI_VALID_DEVID;
        kroute.u.msi.devid = pci_requester_id(dev);
    }
@@ -1429,7 +1533,7 @@ static void kvm_irqchip_create(MachineState *machine, KVMState *s)
 */
 static int kvm_recommended_vcpus(KVMState *s)
 {
-    int ret = kvm_vm_check_extension(s, KVM_CAP_NR_VCPUS);
+    int ret = kvm_check_extension(s, KVM_CAP_NR_VCPUS);
    return (ret) ? ret : 4;
 }

@@ -1519,6 +1623,27 @@ static int kvm_init(MachineState *ms)
        s->nr_slots = 32;
    }

+    /* check the vcpu limits */
+    soft_vcpus_limit = kvm_recommended_vcpus(s);
+    hard_vcpus_limit = kvm_max_vcpus(s);
+
+    while (nc->name) {
+        if (nc->num > soft_vcpus_limit) {
+            fprintf(stderr,
+                    "Warning: Number of %s cpus requested (%d) exceeds "
+                    "the recommended cpus supported by KVM (%d)\n",
+                    nc->name, nc->num, soft_vcpus_limit);
+
+            if (nc->num > hard_vcpus_limit) {
+                fprintf(stderr, "Number of %s cpus requested (%d) exceeds "
+                        "the maximum cpus supported by KVM (%d)\n",
+                        nc->name, nc->num, hard_vcpus_limit);
+                exit(1);
+            }
+        }
+        nc++;
+    }
+
    kvm_type = qemu_opt_get(qemu_get_machine_opts(), "kvm-type");
    if (mc->kvm_type) {
        type = mc->kvm_type(kvm_type);
@@ -1553,27 +1678,6 @@ static int kvm_init(MachineState *ms)
    }

    s->vmfd = ret;
-
-    /* check the vcpu limits */
-    soft_vcpus_limit = kvm_recommended_vcpus(s);
-    hard_vcpus_limit = kvm_max_vcpus(s);
-
-    while (nc->name) {
-        if (nc->num > soft_vcpus_limit) {
-            warn_report("Number of %s cpus requested (%d) exceeds "
-                        "the recommended cpus supported by KVM (%d)",
-                        nc->name, nc->num, soft_vcpus_limit);
-
-            if (nc->num > hard_vcpus_limit) {
-                fprintf(stderr, "Number of %s cpus requested (%d) exceeds "
-                        "the maximum cpus supported by KVM (%d)\n",
-                        nc->name, nc->num, hard_vcpus_limit);
-                exit(1);
-            }
-        }
-        nc++;
-    }
-
    missing_cap = kvm_check_extension_list(s, kvm_required_capabilites);
    if (!missing_cap) {
        missing_cap =
@@ -1588,6 +1692,12 @@ static int kvm_init(MachineState *ms)

    s->coalesced_mmio = kvm_check_extension(s, KVM_CAP_COALESCED_MMIO);

+    s->broken_set_mem_region = 1;
+    ret = kvm_check_extension(s, KVM_CAP_JOIN_MEMORY_REGIONS_WORKS);
+    if (ret > 0) {
+        s->broken_set_mem_region = 0;
+    }
+
 #ifdef KVM_CAP_VCPU_EVENTS
    s->vcpu_events = kvm_check_extension(s, KVM_CAP_VCPU_EVENTS);
 #endif
@@ -1655,8 +1765,6 @@ static int kvm_init(MachineState *ms)

    s->many_ioeventfds = kvm_check_many_ioeventfds();

-    s->sync_mmu = !!kvm_vm_check_extension(kvm_state, KVM_CAP_SYNC_MMU);
-
    return 0;

 err:
@@ -2123,9 +2231,10 @@ int kvm_device_access(int fd, int group, uint64_t attr,
    return err;
 }

-bool kvm_has_sync_mmu(void)
+/* Return 1 on success, 0 on failure */
+int kvm_has_sync_mmu(void)
 {
-    return kvm_state->sync_mmu;
+    return kvm_check_extension(kvm_state, KVM_CAP_SYNC_MMU);
 }

 int kvm_has_vcpu_events(void)
--- a/accel/stubs/Makefile.objs
+++ b/accel/stubs/Makefile.objs
@@ -1,3 +1,2 @@
-obj-$(call lnot,$(CONFIG_HAX)) += hax-stub.o
 obj-$(call lnot,$(CONFIG_KVM)) += kvm-stub.o
 obj-$(call lnot,$(CONFIG_TCG)) += tcg-stub.o
--- a/accel/stubs/kvm-stub.c
+++ b/accel/stubs/kvm-stub.c
@@ -64,9 +64,9 @@ int kvm_cpu_exec(CPUState *cpu)
    abort();
 }

-bool kvm_has_sync_mmu(void)
+int kvm_has_sync_mmu(void)
 {
-    return false;
+    return 0;
 }

 int kvm_has_many_ioeventfds(void)
--- a/accel/stubs/tcg-stub.c
+++ b/accel/stubs/tcg-stub.c
@@ -20,11 +20,3 @@
 void tb_flush(CPUState *cpu)
 {
 }
-
-void tb_unlock(void)
-{
-}
-
-void tlb_set_dirty(CPUState *cpu, target_ulong vaddr)
-{
-}
--- a/accel/tcg/Makefile.objs
+++ b/accel/tcg/Makefile.objs
@@ -1,8 +1,3 @@
 obj-$(CONFIG_SOFTMMU) += tcg-all.o
 obj-$(CONFIG_SOFTMMU) += cputlb.o
-obj-y += tcg-runtime.o
 obj-y += cpu-exec.o cpu-exec-common.o translate-all.o
-obj-y += translator.o
-
-obj-$(CONFIG_USER_ONLY) += user-exec.o
-obj-$(call lnot,$(CONFIG_SOFTMMU)) += user-exec-stub.o
--- a/accel/tcg/cpu-exec.c
+++ b/accel/tcg/cpu-exec.c
@@ -28,7 +28,6 @@
 #include "exec/address-spaces.h"
 #include "qemu/rcu.h"
 #include "exec/tb-hash.h"
-#include "exec/tb-lookup.h"
 #include "exec/log.h"
 #include "qemu/main-loop.h"
 #if defined(TARGET_I386) && !defined(CONFIG_USER_ONLY)
@@ -143,11 +142,11 @@ static inline tcg_target_ulong cpu_tb_exec(CPUState *cpu, TranslationBlock *itb)
    uintptr_t ret;
    TranslationBlock *last_tb;
    int tb_exit;
-    uint8_t *tb_ptr = itb->tc.ptr;
+    uint8_t *tb_ptr = itb->tc_ptr;

    qemu_log_mask_and_addr(CPU_LOG_EXEC, itb->pc,
                           "Trace %p [%d: " TARGET_FMT_lx "] %s\n",
-                           itb->tc.ptr, cpu->cpu_index, itb->pc,
+                           itb->tc_ptr, cpu->cpu_index, itb->pc,
                           lookup_symbol(itb->pc));

 #if defined(DEBUG_DISAS)
@@ -179,7 +178,7 @@ static inline tcg_target_ulong cpu_tb_exec(CPUState *cpu, TranslationBlock *itb)
        qemu_log_mask_and_addr(CPU_LOG_EXEC, last_tb->pc,
                               "Stopped execution of TB chain before %p ["
                               TARGET_FMT_lx "] %s\n",
-                               last_tb->tc.ptr, last_tb->pc,
+                               last_tb->tc_ptr, last_tb->pc,
                               lookup_symbol(last_tb->pc));
        if (cc->synchronize_from_tb) {
            cc->synchronize_from_tb(cpu, last_tb);
@@ -198,19 +197,16 @@ static void cpu_exec_nocache(CPUState *cpu, int max_cycles,
                             TranslationBlock *orig_tb, bool ignore_icount)
 {
    TranslationBlock *tb;
-    uint32_t cflags = curr_cflags() | CF_NOCACHE;
-
-    if (ignore_icount) {
-        cflags &= ~CF_USE_ICOUNT;
-    }

    /* Should never happen.
       We only end up here when an existing TB is too long.  */
-    cflags |= MIN(max_cycles, CF_COUNT_MASK);
+    if (max_cycles > CF_COUNT_MASK)
+        max_cycles = CF_COUNT_MASK;

    tb_lock();
-    tb = tb_gen_code(cpu, orig_tb->pc, orig_tb->cs_base,
-                     orig_tb->flags, cflags);
+    tb = tb_gen_code(cpu, orig_tb->pc, orig_tb->cs_base, orig_tb->flags,
+                     max_cycles | CF_NOCACHE
+                         | (ignore_icount ? CF_IGNORE_ICOUNT : 0));
    tb->orig_tb = orig_tb;
    tb_unlock();

@@ -220,45 +216,39 @@ static void cpu_exec_nocache(CPUState *cpu, int max_cycles,

    tb_lock();
    tb_phys_invalidate(tb, -1);
-    tb_remove(tb);
+    tb_free(tb);
    tb_unlock();
 }
 #endif

-void cpu_exec_step_atomic(CPUState *cpu)
+static void cpu_exec_step(CPUState *cpu)
 {
    CPUClass *cc = CPU_GET_CLASS(cpu);
+    CPUArchState *env = (CPUArchState *)cpu->env_ptr;
    TranslationBlock *tb;
    target_ulong cs_base, pc;
    uint32_t flags;
-    uint32_t cflags = 1;
-    uint32_t cf_mask = cflags & CF_HASH_MASK;
-    /* volatile because we modify it between setjmp and longjmp */
-    volatile bool in_exclusive_region = false;

+    cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
    if (sigsetjmp(cpu->jmp_env, 0) == 0) {
-        tb = tb_lookup__cpu_state(cpu, &pc, &cs_base, &flags, cf_mask);
-        if (tb == NULL) {
-            mmap_lock();
-            tb_lock();
-            tb = tb_htable_lookup(cpu, pc, cs_base, flags, cf_mask);
-            if (likely(tb == NULL)) {
-                tb = tb_gen_code(cpu, pc, cs_base, flags, cflags);
-            }
-            tb_unlock();
-            mmap_unlock();
-        }
+        mmap_lock();
+        tb_lock();
+        tb = tb_gen_code(cpu, pc, cs_base, flags,
+                         1 | CF_NOCACHE | CF_IGNORE_ICOUNT);
+        tb->orig_tb = NULL;
+        tb_unlock();
+        mmap_unlock();

-        start_exclusive();
-
-        /* Since we got here, we know that parallel_cpus must be true.  */
-        parallel_cpus = false;
-        in_exclusive_region = true;
        cc->cpu_exec_enter(cpu);
        /* execute the generated code */
-        trace_exec_tb(tb, pc);
+        trace_exec_tb_nocache(tb, pc);
        cpu_tb_exec(cpu, tb);
        cc->cpu_exec_exit(cpu);
+
+        tb_lock();
+        tb_phys_invalidate(tb, -1);
+        tb_free(tb);
+        tb_unlock();
    } else {
        /* We may have exited due to another problem here, so we need
         * to reset any tb_locks we may have taken but didn't release.
@@ -270,15 +260,18 @@ void cpu_exec_step_atomic(CPUState *cpu)
 #endif
        tb_lock_reset();
    }
+}

-    if (in_exclusive_region) {
-        /* We might longjump out of either the codegen or the
-         * execution, so must make sure we only end the exclusive
-         * region if we started it.
-         */
-        parallel_cpus = true;
-        end_exclusive();
-    }
+void cpu_exec_step_atomic(CPUState *cpu)
+{
+    start_exclusive();
+
+    /* Since we got here, we know that parallel_cpus must be true.  */
+    parallel_cpus = false;
+    cpu_exec_step(cpu);
+    parallel_cpus = true;
+
+    end_exclusive();
 }

 struct tb_desc {
@@ -287,7 +280,6 @@ struct tb_desc {
    CPUArchState *env;
    tb_page_addr_t phys_page1;
    uint32_t flags;
-    uint32_t cf_mask;
    uint32_t trace_vcpu_dstate;
 };

@@ -301,7 +293,7 @@ static bool tb_cmp(const void *p, const void *d)
        tb->cs_base == desc->cs_base &&
        tb->flags == desc->flags &&
        tb->trace_vcpu_dstate == desc->trace_vcpu_dstate &&
-        (tb_cflags(tb) & (CF_HASH_MASK | CF_INVALID)) == desc->cf_mask) {
+        !atomic_read(&tb->invalid)) {
        /* check next page if needed */
        if (tb->page_addr[1] == -1) {
            return true;
@@ -320,8 +312,7 @@ static bool tb_cmp(const void *p, const void *d)
 }

 TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc,
-                                   target_ulong cs_base, uint32_t flags,
-                                   uint32_t cf_mask)
+                                   target_ulong cs_base, uint32_t flags)
 {
    tb_page_addr_t phys_pc;
    struct tb_desc desc;
@@ -330,79 +321,55 @@ TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc,
    desc.env = (CPUArchState *)cpu->env_ptr;
    desc.cs_base = cs_base;
    desc.flags = flags;
-    desc.cf_mask = cf_mask;
    desc.trace_vcpu_dstate = *cpu->trace_dstate;
    desc.pc = pc;
    phys_pc = get_page_addr_code(desc.env, pc);
    desc.phys_page1 = phys_pc & TARGET_PAGE_MASK;
-    h = tb_hash_func(phys_pc, pc, flags, cf_mask, *cpu->trace_dstate);
-    return qht_lookup(&tb_ctx.htable, tb_cmp, &desc, h);
-}
-
-void tb_set_jmp_target(TranslationBlock *tb, int n, uintptr_t addr)
-{
-    if (TCG_TARGET_HAS_direct_jump) {
-        uintptr_t offset = tb->jmp_target_arg[n];
-        uintptr_t tc_ptr = (uintptr_t)tb->tc.ptr;
-        tb_target_set_jmp_target(tc_ptr, tc_ptr + offset, addr);
-    } else {
-        tb->jmp_target_arg[n] = addr;
-    }
-}
-
-/* Called with tb_lock held.  */
-static inline void tb_add_jump(TranslationBlock *tb, int n,
-                               TranslationBlock *tb_next)
-{
-    assert(n < ARRAY_SIZE(tb->jmp_list_next));
-    if (tb->jmp_list_next[n]) {
-        /* Another thread has already done this while we were
-         * outside of the lock; nothing to do in this case */
-        return;
-    }
-    qemu_log_mask_and_addr(CPU_LOG_EXEC, tb->pc,
-                           "Linking TBs %p [" TARGET_FMT_lx
-                           "] index %d -> %p [" TARGET_FMT_lx "]\n",
-                           tb->tc.ptr, tb->pc, n,
-                           tb_next->tc.ptr, tb_next->pc);
-
-    /* patch the native jump address */
-    tb_set_jmp_target(tb, n, (uintptr_t)tb_next->tc.ptr);
-
-    /* add in TB jmp circular list */
-    tb->jmp_list_next[n] = tb_next->jmp_list_first;
-    tb_next->jmp_list_first = (uintptr_t)tb | n;
+    h = tb_hash_func(phys_pc, pc, flags, *cpu->trace_dstate);
+    return qht_lookup(&tcg_ctx.tb_ctx.htable, tb_cmp, &desc, h);
 }

 static inline TranslationBlock *tb_find(CPUState *cpu,
                                        TranslationBlock *last_tb,
-                                        int tb_exit, uint32_t cf_mask)
+                                        int tb_exit)
 {
+    CPUArchState *env = (CPUArchState *)cpu->env_ptr;
    TranslationBlock *tb;
    target_ulong cs_base, pc;
    uint32_t flags;
-    bool acquired_tb_lock = false;
+    bool have_tb_lock = false;

-    tb = tb_lookup__cpu_state(cpu, &pc, &cs_base, &flags, cf_mask);
-    if (tb == NULL) {
-        /* mmap_lock is needed by tb_gen_code, and mmap_lock must be
-         * taken outside tb_lock. As system emulation is currently
-         * single threaded the locks are NOPs.
-         */
-        mmap_lock();
-        tb_lock();
-        acquired_tb_lock = true;
+    /* we record a subset of the CPU state. It will
+       always be the same before a given translated block
+       is executed. */
+    cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
+    tb = atomic_rcu_read(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)]);
+    if (unlikely(!tb || tb->pc != pc || tb->cs_base != cs_base ||
+                 tb->flags != flags ||
+                 tb->trace_vcpu_dstate != *cpu->trace_dstate)) {
+        tb = tb_htable_lookup(cpu, pc, cs_base, flags);
+        if (!tb) {

-        /* There's a chance that our desired tb has been translated while
-         * taking the locks so we check again inside the lock.
-         */
-        tb = tb_htable_lookup(cpu, pc, cs_base, flags, cf_mask);
-        if (likely(tb == NULL)) {
-            /* if no translated code available, then translate it now */
-            tb = tb_gen_code(cpu, pc, cs_base, flags, cf_mask);
+            /* mmap_lock is needed by tb_gen_code, and mmap_lock must be
+             * taken outside tb_lock. As system emulation is currently
+             * single threaded the locks are NOPs.
+             */
+            mmap_lock();
+            tb_lock();
+            have_tb_lock = true;
+
+            /* There's a chance that our desired tb has been translated while
+             * taking the locks so we check again inside the lock.
+             */
+            tb = tb_htable_lookup(cpu, pc, cs_base, flags);
+            if (!tb) {
+                /* if no translated code available, then translate it now */
+                tb = tb_gen_code(cpu, pc, cs_base, flags, 0);
+            }
+
+            mmap_unlock();
        }

-        mmap_unlock();
        /* We add the TB in the virtual pc hash table for the fast lookup */
        atomic_set(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)], tb);
    }
@@ -417,15 +384,15 @@ static inline TranslationBlock *tb_find(CPUState *cpu,
 #endif
    /* See if we can patch the calling TB. */
    if (last_tb && !qemu_loglevel_mask(CPU_LOG_TB_NOCHAIN)) {
-        if (!acquired_tb_lock) {
+        if (!have_tb_lock) {
            tb_lock();
-            acquired_tb_lock = true;
+            have_tb_lock = true;
        }
-        if (!(tb->cflags & CF_INVALID)) {
+        if (!tb->invalid) {
            tb_add_jump(last_tb, tb_exit, tb);
        }
    }
-    if (acquired_tb_lock) {
+    if (have_tb_lock) {
        tb_unlock();
    }
    return tb;
@@ -470,51 +437,48 @@ static inline void cpu_handle_debug_exception(CPUState *cpu)

 static inline bool cpu_handle_exception(CPUState *cpu, int *ret)
 {
-    if (cpu->exception_index < 0) {
-#ifndef CONFIG_USER_ONLY
-        if (replay_has_exception()
-               && cpu->icount_decr.u16.low + cpu->icount_extra == 0) {
-            /* try to cause an exception pending in the log */
-            cpu_exec_nocache(cpu, 1, tb_find(cpu, NULL, 0, curr_cflags()), true);
-        }
-#endif
-        if (cpu->exception_index < 0) {
-            return false;
-        }
-    }
-
-    if (cpu->exception_index >= EXCP_INTERRUPT) {
-        /* exit request from the cpu execution loop */
-        *ret = cpu->exception_index;
-        if (*ret == EXCP_DEBUG) {
-            cpu_handle_debug_exception(cpu);
-        }
-        cpu->exception_index = -1;
-        return true;
-    } else {
-#if defined(CONFIG_USER_ONLY)
-        /* if user mode only, we simulate a fake exception
-           which will be handled outside the cpu execution
-           loop */
-#if defined(TARGET_I386)
-        CPUClass *cc = CPU_GET_CLASS(cpu);
-        cc->do_interrupt(cpu);
-#endif
-        *ret = cpu->exception_index;
-        cpu->exception_index = -1;
-        return true;
-#else
-        if (replay_exception()) {
-            CPUClass *cc = CPU_GET_CLASS(cpu);
-            qemu_mutex_lock_iothread();
-            cc->do_interrupt(cpu);
-            qemu_mutex_unlock_iothread();
+    if (cpu->exception_index >= 0) {
+        if (cpu->exception_index >= EXCP_INTERRUPT) {
+            /* exit request from the cpu execution loop */
+            *ret = cpu->exception_index;
+            if (*ret == EXCP_DEBUG) {
+                cpu_handle_debug_exception(cpu);
+            }
            cpu->exception_index = -1;
-        } else if (!replay_has_interrupt()) {
-            /* give a chance to iothread in replay mode */
-            *ret = EXCP_INTERRUPT;
            return true;
+        } else {
+#if defined(CONFIG_USER_ONLY)
+            /* if user mode only, we simulate a fake exception
+               which will be handled outside the cpu execution
+               loop */
+#if defined(TARGET_I386)
+            CPUClass *cc = CPU_GET_CLASS(cpu);
+            cc->do_interrupt(cpu);
+#endif
+            *ret = cpu->exception_index;
+            cpu->exception_index = -1;
+            return true;
+#else
+            if (replay_exception()) {
+                CPUClass *cc = CPU_GET_CLASS(cpu);
+                qemu_mutex_lock_iothread();
+                cc->do_interrupt(cpu);
+                qemu_mutex_unlock_iothread();
+                cpu->exception_index = -1;
+            } else if (!replay_has_interrupt()) {
+                /* give a chance to iothread in replay mode */
+                *ret = EXCP_INTERRUPT;
+                return true;
+            }
+#endif
        }
+#ifndef CONFIG_USER_ONLY
+    } else if (replay_has_exception()
+               && cpu->icount_decr.u16.low + cpu->icount_extra == 0) {
+        /* try to cause an exception pending in the log */
+        cpu_exec_nocache(cpu, 1, tb_find(cpu, NULL, 0), true);
+        *ret = -1;
+        return true;
 #endif
    }

@@ -525,19 +489,6 @@ static inline bool cpu_handle_interrupt(CPUState *cpu,
                                        TranslationBlock **last_tb)
 {
    CPUClass *cc = CPU_GET_CLASS(cpu);
-    int32_t insns_left;
-
-    /* Clear the interrupt flag now since we're processing
-     * cpu->interrupt_request and cpu->exit_request.
-     */
-    insns_left = atomic_read(&cpu->icount_decr.u32);
-    atomic_set(&cpu->icount_decr.u16.high, 0);
-    if (unlikely(insns_left < 0)) {
-        /* Ensure the zeroing of icount_decr comes before the next read
-         * of cpu->exit_request or cpu->interrupt_request.
-         */
-        smp_mb();
-    }

    if (unlikely(atomic_read(&cpu->interrupt_request))) {
        int interrupt_request;
@@ -634,14 +585,17 @@ static inline void cpu_loop_exec_tb(CPUState *cpu, TranslationBlock *tb,

    *last_tb = NULL;
    insns_left = atomic_read(&cpu->icount_decr.u32);
+    atomic_set(&cpu->icount_decr.u16.high, 0);
    if (insns_left < 0) {
        /* Something asked us to stop executing chained TBs; just
         * continue round the main loop. Whatever requested the exit
         * will also have set something else (eg exit_request or
-         * interrupt_request) which will be handled by
-         * cpu_handle_interrupt.  cpu_handle_interrupt will also
-         * clear cpu->icount_decr.u16.high.
+         * interrupt_request) which we will handle next time around
+         * the loop.  But we need to ensure the zeroing of icount_decr
+         * comes before the next read of cpu->exit_request
+         * or cpu->interrupt_request.
         */
+        smp_mb();
        return;
    }

@@ -718,21 +672,7 @@ int cpu_exec(CPUState *cpu)
        int tb_exit = 0;

        while (!cpu_handle_interrupt(cpu, &last_tb)) {
-            uint32_t cflags = cpu->cflags_next_tb;
-            TranslationBlock *tb;
-
-            /* When requested, use an exact setting for cflags for the next
-               execution.  This is used for icount, precise smc, and stop-
-               after-access watchpoints.  Since this request should never
-               have CF_INVALID set, -1 is a convenient invalid value that
-               does not require tcg headers for cpu_common_reset.  */
-            if (cflags == -1) {
-                cflags = curr_cflags();
-            } else {
-                cpu->cflags_next_tb = -1;
-            }
-
-            tb = tb_find(cpu, last_tb, tb_exit, cflags);
+            TranslationBlock *tb = tb_find(cpu, last_tb, tb_exit);
            cpu_loop_exec_tb(cpu, tb, &last_tb, &tb_exit);
            /* Try to align the host and virtual clocks
               if the guest is in advance */
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -92,18 +92,8 @@ static void flush_all_helper(CPUState *src, run_on_cpu_func fn,
    }
 }

-size_t tlb_flush_count(void)
-{
-    CPUState *cpu;
-    size_t count = 0;
-
-    CPU_FOREACH(cpu) {
-        CPUArchState *env = cpu->env_ptr;
-
-        count += atomic_read(&env->tlb_flush_count);
-    }
-    return count;
-}
+/* statistics */
+int tlb_flush_count;

 /* This is OK because CPU architectures generally permit an
 * implementation to drop entries from the TLB at any time, so
@@ -122,8 +112,7 @@ static void tlb_flush_nocheck(CPUState *cpu)
    }

    assert_cpu_is_self(cpu);
-    atomic_set(&env->tlb_flush_count, env->tlb_flush_count + 1);
-    tlb_debug("(count: %zu)\n", tlb_flush_count());
+    tlb_debug("(count: %d)\n", tlb_flush_count++);

    tb_lock();

@@ -694,9 +683,6 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
        } else {
            tn.addr_write = address;
        }
-        if (prot & PAGE_WRITE_INV) {
-            tn.addr_write |= TLB_INVALID_MASK;
-        }
    }

    /* Pairs with flag setting in tlb_reset_dirty_range */
@@ -761,7 +747,6 @@ static inline ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
 }

 static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
-                         int mmu_idx,
                         target_ulong addr, uintptr_t retaddr, int size)
 {
    CPUState *cpu = ENV_GET_CPU(env);
@@ -769,7 +754,6 @@ static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
    MemoryRegion *mr = iotlb_to_region(cpu, physaddr, iotlbentry->attrs);
    uint64_t val;
    bool locked = false;
-    MemTxResult r;

    physaddr = (physaddr & TARGET_PAGE_MASK) + addr;
    cpu->mem_io_pc = retaddr;
@@ -783,12 +767,7 @@ static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
        qemu_mutex_lock_iothread();
        locked = true;
    }
-    r = memory_region_dispatch_read(mr, physaddr,
-                                    &val, size, iotlbentry->attrs);
-    if (r != MEMTX_OK) {
-        cpu_transaction_failed(cpu, physaddr, addr, size, MMU_DATA_LOAD,
-                               mmu_idx, iotlbentry->attrs, r, retaddr);
-    }
+    memory_region_dispatch_read(mr, physaddr, &val, size, iotlbentry->attrs);
    if (locked) {
        qemu_mutex_unlock_iothread();
    }
@@ -797,7 +776,6 @@ static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
 }

 static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
-                      int mmu_idx,
                      uint64_t val, target_ulong addr,
                      uintptr_t retaddr, int size)
 {
@@ -805,7 +783,6 @@ static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
    hwaddr physaddr = iotlbentry->addr;
    MemoryRegion *mr = iotlb_to_region(cpu, physaddr, iotlbentry->attrs);
    bool locked = false;
-    MemTxResult r;

    physaddr = (physaddr & TARGET_PAGE_MASK) + addr;
    if (mr != &io_mem_rom && mr != &io_mem_notdirty && !cpu->can_do_io) {
@@ -818,12 +795,7 @@ static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
        qemu_mutex_lock_iothread();
        locked = true;
    }
-    r = memory_region_dispatch_write(mr, physaddr,
-                                     val, size, iotlbentry->attrs);
-    if (r != MEMTX_OK) {
-        cpu_transaction_failed(cpu, physaddr, addr, size, MMU_DATA_STORE,
-                               mmu_idx, iotlbentry->attrs, r, retaddr);
-    }
+    memory_region_dispatch_write(mr, physaddr, val, size, iotlbentry->attrs);
    if (locked) {
        qemu_mutex_unlock_iothread();
    }
@@ -873,7 +845,6 @@ tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr)
    MemoryRegion *mr;
    CPUState *cpu = ENV_GET_CPU(env);
    CPUIOTLBEntry *iotlbentry;
-    hwaddr physaddr;

    index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
    mmu_idx = cpu_mmu_index(env, true);
@@ -897,19 +868,6 @@ tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr)
        }
        qemu_mutex_unlock_iothread();

-        /* Give the new-style cpu_transaction_failed() hook first chance
-         * to handle this.
-         * This is not the ideal place to detect and generate CPU
-         * exceptions for instruction fetch failure (for instance
-         * we don't know the length of the access that the CPU would
-         * use, and it would be better to go ahead and try the access
-         * and use the MemTXResult it produced). However it is the
-         * simplest place we have currently available for the check.
-         */
-        physaddr = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
-        cpu_transaction_failed(cpu, physaddr, addr, 0, MMU_INST_FETCH, mmu_idx,
-                               iotlbentry->attrs, MEMTX_DECODE_ERROR, 0);
-
        cpu_unassigned_access(cpu, addr, false, true, 0, 4);
        /* The CPU's unassigned access hook might have longjumped out
         * with an exception. If it didn't (or there was no hook) then
@@ -946,8 +904,7 @@ void probe_write(CPUArchState *env, target_ulong addr, int mmu_idx,
 /* Probe for a read-modify-write atomic operation.  Do not allow unaligned
 * operations, or io operations to proceed.  Return the host address.  */
 static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
-                               TCGMemOpIdx oi, uintptr_t retaddr,
-                               NotDirtyInfo *ndi)
+                               TCGMemOpIdx oi, uintptr_t retaddr)
 {
    size_t mmu_idx = get_mmuidx(oi);
    size_t index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
@@ -956,7 +913,6 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
    TCGMemOp mop = get_memop(oi);
    int a_bits = get_alignment_bits(mop);
    int s_bits = mop & MO_SIZE;
-    void *hostaddr;

    /* Adjust the given return address.  */
    retaddr -= GETPC_ADJ;
@@ -983,18 +939,24 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
        if (!VICTIM_TLB_HIT(addr_write, addr)) {
            tlb_fill(ENV_GET_CPU(env), addr, MMU_DATA_STORE, mmu_idx, retaddr);
        }
-        tlb_addr = tlbe->addr_write & ~TLB_INVALID_MASK;
+        tlb_addr = tlbe->addr_write;
+    }
+
+    /* Check notdirty */
+    if (unlikely(tlb_addr & TLB_NOTDIRTY)) {
+        tlb_set_dirty(ENV_GET_CPU(env), addr);
+        tlb_addr = tlb_addr & ~TLB_NOTDIRTY;
    }

    /* Notice an IO access  */
-    if (unlikely(tlb_addr & TLB_MMIO)) {
+    if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
        /* There's really nothing that can be done to
           support this apart from stop-the-world.  */
        goto stop_the_world;
    }

    /* Let the guest notice RMW on a write-only page.  */
-    if (unlikely(tlbe->addr_read != (tlb_addr & ~TLB_NOTDIRTY))) {
+    if (unlikely(tlbe->addr_read != tlb_addr)) {
        tlb_fill(ENV_GET_CPU(env), addr, MMU_DATA_LOAD, mmu_idx, retaddr);
        /* Since we don't support reads and writes to different addresses,
           and we do have the proper page loaded for write, this shouldn't
@@ -1002,17 +964,7 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
        goto stop_the_world;
    }

-    hostaddr = (void *)((uintptr_t)addr + tlbe->addend);
-
-    ndi->active = false;
-    if (unlikely(tlb_addr & TLB_NOTDIRTY)) {
-        ndi->active = true;
-        memory_notdirty_write_prepare(ndi, ENV_GET_CPU(env), addr,
-                                      qemu_ram_addr_from_host_nofail(hostaddr),
-                                      1 << s_bits);
-    }
-
-    return hostaddr;
+    return (void *)((uintptr_t)addr + tlbe->addend);

 stop_the_world:
    cpu_loop_exit_atomic(ENV_GET_CPU(env), retaddr);
@@ -1046,14 +998,7 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
 #define EXTRA_ARGS     , TCGMemOpIdx oi, uintptr_t retaddr
 #define ATOMIC_NAME(X) \
    HELPER(glue(glue(glue(atomic_ ## X, SUFFIX), END), _mmu))
-#define ATOMIC_MMU_DECLS NotDirtyInfo ndi
-#define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, oi, retaddr, &ndi)
-#define ATOMIC_MMU_CLEANUP                              \
-    do {                                                \
-        if (unlikely(ndi.active)) {                     \
-            memory_notdirty_write_complete(&ndi);       \
-        }                                               \
-    } while (0)
+#define ATOMIC_MMU_LOOKUP  atomic_mmu_lookup(env, addr, oi, retaddr)

 #define DATA_SIZE 1
 #include "atomic_template.h"
@@ -1081,7 +1026,7 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
 #undef ATOMIC_MMU_LOOKUP
 #define EXTRA_ARGS         , TCGMemOpIdx oi
 #define ATOMIC_NAME(X)     HELPER(glue(glue(atomic_ ## X, SUFFIX), END))
-#define ATOMIC_MMU_LOOKUP  atomic_mmu_lookup(env, addr, oi, GETPC(), &ndi)
+#define ATOMIC_MMU_LOOKUP  atomic_mmu_lookup(env, addr, oi, GETPC())

 #define DATA_SIZE 1
 #include "atomic_template.h"
--- a/accel/tcg/translate-all.c
+++ b/accel/tcg/translate-all.c
--- a/accel/tcg/translator.c
+++ b/accel/tcg/translator.c
@@ -1,138 +0,0 @@
-/*
- * Generic intermediate code generation.
- *
- * Copyright (C) 2016-2017 Lluís Vilanova <vilanova@ac.upc.edu>
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- */
-
-#include "qemu/osdep.h"
-#include "qemu-common.h"
-#include "qemu/error-report.h"
-#include "cpu.h"
-#include "tcg/tcg.h"
-#include "tcg/tcg-op.h"
-#include "exec/exec-all.h"
-#include "exec/gen-icount.h"
-#include "exec/log.h"
-#include "exec/translator.h"
-
-/* Pairs with tcg_clear_temp_count.
-   To be called by #TranslatorOps.{translate_insn,tb_stop} if
-   (1) the target is sufficiently clean to support reporting,
-   (2) as and when all temporaries are known to be consumed.
-   For most targets, (2) is at the end of translate_insn.  */
-void translator_loop_temp_check(DisasContextBase *db)
-{
-    if (tcg_check_temp_count()) {
-        qemu_log("warning: TCG temporary leaks before "
-                 TARGET_FMT_lx "\n", db->pc_next);
-    }
-}
-
-void translator_loop(const TranslatorOps *ops, DisasContextBase *db,
-                     CPUState *cpu, TranslationBlock *tb)
-{
-    int max_insns;
-
-    /* Initialize DisasContext */
-    db->tb = tb;
-    db->pc_first = tb->pc;
-    db->pc_next = db->pc_first;
-    db->is_jmp = DISAS_NEXT;
-    db->num_insns = 0;
-    db->singlestep_enabled = cpu->singlestep_enabled;
-
-    /* Instruction counting */
-    max_insns = tb_cflags(db->tb) & CF_COUNT_MASK;
-    if (max_insns == 0) {
-        max_insns = CF_COUNT_MASK;
-    }
-    if (max_insns > TCG_MAX_INSNS) {
-        max_insns = TCG_MAX_INSNS;
-    }
-    if (db->singlestep_enabled || singlestep) {
-        max_insns = 1;
-    }
-
-    max_insns = ops->init_disas_context(db, cpu, max_insns);
-    tcg_debug_assert(db->is_jmp == DISAS_NEXT);  /* no early exit */
-
-    /* Reset the temp count so that we can identify leaks */
-    tcg_clear_temp_count();
-
-    /* Start translating.  */
-    gen_tb_start(db->tb);
-    ops->tb_start(db, cpu);
-    tcg_debug_assert(db->is_jmp == DISAS_NEXT);  /* no early exit */
-
-    while (true) {
-        db->num_insns++;
-        ops->insn_start(db, cpu);
-        tcg_debug_assert(db->is_jmp == DISAS_NEXT);  /* no early exit */
-
-        /* Pass breakpoint hits to target for further processing */
-        if (unlikely(!QTAILQ_EMPTY(&cpu->breakpoints))) {
-            CPUBreakpoint *bp;
-            QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
-                if (bp->pc == db->pc_next) {
-                    if (ops->breakpoint_check(db, cpu, bp)) {
-                        break;
-                    }
-                }
-            }
-            /* The breakpoint_check hook may use DISAS_TOO_MANY to indicate
-               that only one more instruction is to be executed.  Otherwise
-               it should use DISAS_NORETURN when generating an exception,
-               but may use a DISAS_TARGET_* value for Something Else.  */
-            if (db->is_jmp > DISAS_TOO_MANY) {
-                break;
-            }
-        }
-
-        /* Disassemble one instruction.  The translate_insn hook should
-           update db->pc_next and db->is_jmp to indicate what should be
-           done next -- either exiting this loop or locate the start of
-           the next instruction.  */
-        if (db->num_insns == max_insns && (tb_cflags(db->tb) & CF_LAST_IO)) {
-            /* Accept I/O on the last instruction.  */
-            gen_io_start();
-            ops->translate_insn(db, cpu);
-            gen_io_end();
-        } else {
-            ops->translate_insn(db, cpu);
-        }
-
-        /* Stop translation if translate_insn so indicated.  */
-        if (db->is_jmp != DISAS_NEXT) {
-            break;
-        }
-
-        /* Stop translation if the output buffer is full,
-           or we have executed all of the allowed instructions.  */
-        if (tcg_op_buf_full() || db->num_insns >= max_insns) {
-            db->is_jmp = DISAS_TOO_MANY;
-            break;
-        }
-    }
-
-    /* Emit code to exit the TB, as indicated by db->is_jmp.  */
-    ops->tb_stop(db, cpu);
-    gen_tb_end(db->tb, db->num_insns);
-
-    /* The disas_log hook may use these values rather than recompute.  */
-    db->tb->size = db->pc_next - db->pc_first;
-    db->tb->icount = db->num_insns;
-
-#ifdef DEBUG_DISAS
-    if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)
-        && qemu_log_in_addr_range(db->pc_first)) {
-        qemu_log_lock();
-        qemu_log("----------------\n");
-        ops->disas_log(db, cpu);
-        qemu_log("\n");
-        qemu_log_unlock();
-    }
-#endif
-}
--- a/accel/tcg/atomic_template.h
+++ b/accel/tcg/atomic_template.h
@@ -61,52 +61,39 @@
 ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, target_ulong addr,
                              ABI_TYPE cmpv, ABI_TYPE newv EXTRA_ARGS)
 {
-    ATOMIC_MMU_DECLS;
    DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;
-    DATA_TYPE ret = atomic_cmpxchg__nocheck(haddr, cmpv, newv);
-    ATOMIC_MMU_CLEANUP;
-    return ret;
+    return atomic_cmpxchg__nocheck(haddr, cmpv, newv);
 }

 #if DATA_SIZE >= 16
 ABI_TYPE ATOMIC_NAME(ld)(CPUArchState *env, target_ulong addr EXTRA_ARGS)
 {
-    ATOMIC_MMU_DECLS;
    DATA_TYPE val, *haddr = ATOMIC_MMU_LOOKUP;
    __atomic_load(haddr, &val, __ATOMIC_RELAXED);
-    ATOMIC_MMU_CLEANUP;
    return val;
 }

 void ATOMIC_NAME(st)(CPUArchState *env, target_ulong addr,
                     ABI_TYPE val EXTRA_ARGS)
 {
-    ATOMIC_MMU_DECLS;
    DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;
    __atomic_store(haddr, &val, __ATOMIC_RELAXED);
-    ATOMIC_MMU_CLEANUP;
 }
 #else
 ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, target_ulong addr,
                           ABI_TYPE val EXTRA_ARGS)
 {
-    ATOMIC_MMU_DECLS;
    DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;
-    DATA_TYPE ret = atomic_xchg__nocheck(haddr, val);
-    ATOMIC_MMU_CLEANUP;
-    return ret;
+    return atomic_xchg__nocheck(haddr, val);
 }

 #define GEN_ATOMIC_HELPER(X)                                        \
 ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr,       \
                 ABI_TYPE val EXTRA_ARGS)                           \
 {                                                                   \
-    ATOMIC_MMU_DECLS;                                               \
    DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;                           \
-    DATA_TYPE ret = atomic_##X(haddr, val);                         \
-    ATOMIC_MMU_CLEANUP;                                             \
-    return ret;                                                     \
-}
+    return atomic_##X(haddr, val);                                  \
+}                                                                   \

 GEN_ATOMIC_HELPER(fetch_add)
 GEN_ATOMIC_HELPER(fetch_and)
@@ -135,52 +122,39 @@ GEN_ATOMIC_HELPER(xor_fetch)
 ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, target_ulong addr,
                              ABI_TYPE cmpv, ABI_TYPE newv EXTRA_ARGS)
 {
-    ATOMIC_MMU_DECLS;
    DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;
-    DATA_TYPE ret = atomic_cmpxchg__nocheck(haddr, BSWAP(cmpv), BSWAP(newv));
-    ATOMIC_MMU_CLEANUP;
-    return BSWAP(ret);
+    return BSWAP(atomic_cmpxchg__nocheck(haddr, BSWAP(cmpv), BSWAP(newv)));
 }

 #if DATA_SIZE >= 16
 ABI_TYPE ATOMIC_NAME(ld)(CPUArchState *env, target_ulong addr EXTRA_ARGS)
 {
-    ATOMIC_MMU_DECLS;
    DATA_TYPE val, *haddr = ATOMIC_MMU_LOOKUP;
    __atomic_load(haddr, &val, __ATOMIC_RELAXED);
-    ATOMIC_MMU_CLEANUP;
    return BSWAP(val);
 }

 void ATOMIC_NAME(st)(CPUArchState *env, target_ulong addr,
                     ABI_TYPE val EXTRA_ARGS)
 {
-    ATOMIC_MMU_DECLS;
    DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;
    val = BSWAP(val);
    __atomic_store(haddr, &val, __ATOMIC_RELAXED);
-    ATOMIC_MMU_CLEANUP;
 }
 #else
 ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, target_ulong addr,
                           ABI_TYPE val EXTRA_ARGS)
 {
-    ATOMIC_MMU_DECLS;
    DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;
-    ABI_TYPE ret = atomic_xchg__nocheck(haddr, BSWAP(val));
-    ATOMIC_MMU_CLEANUP;
-    return BSWAP(ret);
+    return BSWAP(atomic_xchg__nocheck(haddr, BSWAP(val)));
 }

 #define GEN_ATOMIC_HELPER(X)                                        \
 ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr,       \
                 ABI_TYPE val EXTRA_ARGS)                           \
 {                                                                   \
-    ATOMIC_MMU_DECLS;                                               \
    DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;                           \
-    DATA_TYPE ret = atomic_##X(haddr, BSWAP(val));                  \
-    ATOMIC_MMU_CLEANUP;                                             \
-    return BSWAP(ret);                                              \
+    return BSWAP(atomic_##X(haddr, BSWAP(val)));                    \
 }

 GEN_ATOMIC_HELPER(fetch_and)
@@ -197,7 +171,6 @@ GEN_ATOMIC_HELPER(xor_fetch)
 ABI_TYPE ATOMIC_NAME(fetch_add)(CPUArchState *env, target_ulong addr,
                         ABI_TYPE val EXTRA_ARGS)
 {
-    ATOMIC_MMU_DECLS;
    DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;
    DATA_TYPE ldo, ldn, ret, sto;

@@ -207,7 +180,6 @@ ABI_TYPE ATOMIC_NAME(fetch_add)(CPUArchState *env, target_ulong addr,
        sto = BSWAP(ret + val);
        ldn = atomic_cmpxchg__nocheck(haddr, ldo, sto);
        if (ldn == ldo) {
-            ATOMIC_MMU_CLEANUP;
            return ret;
        }
        ldo = ldn;
@@ -217,7 +189,6 @@ ABI_TYPE ATOMIC_NAME(fetch_add)(CPUArchState *env, target_ulong addr,
 ABI_TYPE ATOMIC_NAME(add_fetch)(CPUArchState *env, target_ulong addr,
                         ABI_TYPE val EXTRA_ARGS)
 {
-    ATOMIC_MMU_DECLS;
    DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;
    DATA_TYPE ldo, ldn, ret, sto;

@@ -227,7 +198,6 @@ ABI_TYPE ATOMIC_NAME(add_fetch)(CPUArchState *env, target_ulong addr,
        sto = BSWAP(ret);
        ldn = atomic_cmpxchg__nocheck(haddr, ldo, sto);
        if (ldn == ldo) {
-            ATOMIC_MMU_CLEANUP;
            return ret;
        }
        ldo = ldn;
--- a/audio/Makefile.objs
+++ b/audio/Makefile.objs
@@ -11,9 +11,3 @@ common-obj-$(CONFIG_AUDIO_WIN_INT) += audio_win_int.o
 common-obj-y += wavcapture.o

 sdlaudio.o-cflags := $(SDL_CFLAGS)
-sdlaudio.o-libs := $(SDL_LIBS)
-alsaaudio.o-libs := $(ALSA_LIBS)
-paaudio.o-libs := $(PULSE_LIBS)
-coreaudio.o-libs := $(COREAUDIO_LIBS)
-dsoundaudio.o-libs := $(DSOUND_LIBS)
-ossaudio.o-libs := $(OSS_LIBS)
--- a/backends/cryptodev.c
+++ b/backends/cryptodev.c
@@ -215,7 +215,7 @@ bool cryptodev_backend_is_ready(CryptoDevBackend *backend)
 }

 static bool
-cryptodev_backend_can_be_deleted(UserCreatable *uc)
+cryptodev_backend_can_be_deleted(UserCreatable *uc, Error **errp)
 {
    return !cryptodev_backend_is_used(CRYPTODEV_BACKEND(uc));
 }
--- a/backends/hostmem-file.c
+++ b/backends/hostmem-file.c
@@ -32,7 +32,6 @@ struct HostMemoryBackendFile {
    HostMemoryBackend parent_obj;

    bool share;
-    bool discard_data;
    char *mem_path;
 };

@@ -104,44 +103,16 @@ static void file_memory_backend_set_share(Object *o, bool value, Error **errp)
    fb->share = value;
 }

-static bool file_memory_backend_get_discard_data(Object *o, Error **errp)
-{
-    return MEMORY_BACKEND_FILE(o)->discard_data;
-}
-
-static void file_memory_backend_set_discard_data(Object *o, bool value,
-                                               Error **errp)
-{
-    MEMORY_BACKEND_FILE(o)->discard_data = value;
-}
-
-static void file_backend_unparent(Object *obj)
-{
-    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
-    HostMemoryBackendFile *fb = MEMORY_BACKEND_FILE(obj);
-
-    if (host_memory_backend_mr_inited(backend) && fb->discard_data) {
-        void *ptr = memory_region_get_ram_ptr(&backend->mr);
-        uint64_t sz = memory_region_size(&backend->mr);
-
-        qemu_madvise(ptr, sz, QEMU_MADV_REMOVE);
-    }
-}
-
 static void
 file_backend_class_init(ObjectClass *oc, void *data)
 {
    HostMemoryBackendClass *bc = MEMORY_BACKEND_CLASS(oc);

    bc->alloc = file_backend_memory_alloc;
-    oc->unparent = file_backend_unparent;

    object_class_property_add_bool(oc, "share",
        file_memory_backend_get_share, file_memory_backend_set_share,
        &error_abort);
-    object_class_property_add_bool(oc, "discard-data",
-        file_memory_backend_get_discard_data, file_memory_backend_set_discard_data,
-        &error_abort);
    object_class_property_add_str(oc, "mem-path",
        get_mem_path, set_mem_path,
        &error_abort);
--- a/backends/hostmem.c
+++ b/backends/hostmem.c
@@ -304,7 +304,7 @@ host_memory_backend_memory_complete(UserCreatable *uc, Error **errp)
            return;
        } else if (maxnode == 0 && backend->policy != MPOL_DEFAULT) {
            error_setg(errp, "host-nodes must be set for policy %s",
-                       HostMemPolicy_str(backend->policy));
+                       HostMemPolicy_lookup[backend->policy]);
            return;
        }

@@ -342,7 +342,7 @@ out:
 }

 static bool
-host_memory_backend_can_be_deleted(UserCreatable *uc)
+host_memory_backend_can_be_deleted(UserCreatable *uc, Error **errp)
 {
    if (host_memory_backend_is_mapped(MEMORY_BACKEND(uc))) {
        return false;
@@ -395,7 +395,7 @@ host_memory_backend_class_init(ObjectClass *oc, void *data)
        host_memory_backend_set_host_nodes,
        NULL, NULL, &error_abort);
    object_class_property_add_enum(oc, "policy", "HostMemPolicy",
-        &HostMemPolicy_lookup,
+        HostMemPolicy_lookup,
        host_memory_backend_get_policy,
        host_memory_backend_set_policy, &error_abort);
    object_class_property_add_str(oc, "id", get_id, set_id, &error_abort);
--- a/backends/tpm.c
+++ b/backends/tpm.c
@@ -17,128 +17,99 @@
 #include "qapi/error.h"
 #include "qapi/qmp/qerror.h"
 #include "sysemu/tpm.h"
-#include "hw/tpm/tpm_int.h"
 #include "qemu/thread.h"
-
-static void tpm_backend_worker_thread(gpointer data, gpointer user_data)
-{
-    TPMBackend *s = TPM_BACKEND(user_data);
-    TPMBackendClass *k  = TPM_BACKEND_GET_CLASS(s);
-
-    assert(k->handle_request != NULL);
-    k->handle_request(s, (TPMBackendCmd *)data);
-}
-
-static void tpm_backend_thread_end(TPMBackend *s)
-{
-    if (s->thread_pool) {
-        g_thread_pool_free(s->thread_pool, FALSE, TRUE);
-        s->thread_pool = NULL;
-    }
-}
+#include "sysemu/tpm_backend_int.h"

 enum TpmType tpm_backend_get_type(TPMBackend *s)
 {
    TPMBackendClass *k = TPM_BACKEND_GET_CLASS(s);

-    return k->type;
+    return k->ops->type;
 }

-int tpm_backend_init(TPMBackend *s, TPMState *state)
+const char *tpm_backend_get_desc(TPMBackend *s)
 {
-    s->tpm_state = state;
-    s->had_startup_error = false;
+    TPMBackendClass *k = TPM_BACKEND_GET_CLASS(s);

-    return 0;
+    return k->ops->desc();
+}
+
+void tpm_backend_destroy(TPMBackend *s)
+{
+    TPMBackendClass *k = TPM_BACKEND_GET_CLASS(s);
+
+    k->ops->destroy(s);
+}
+
+int tpm_backend_init(TPMBackend *s, TPMState *state,
+                     TPMRecvDataCB *datacb)
+{
+    TPMBackendClass *k = TPM_BACKEND_GET_CLASS(s);
+
+    return k->ops->init(s, state, datacb);
 }

 int tpm_backend_startup_tpm(TPMBackend *s)
 {
-    int res = 0;
    TPMBackendClass *k = TPM_BACKEND_GET_CLASS(s);

-    /* terminate a running TPM */
-    tpm_backend_thread_end(s);
-
-    s->thread_pool = g_thread_pool_new(tpm_backend_worker_thread, s, 1, TRUE,
-                                       NULL);
-
-    res = k->startup_tpm ? k->startup_tpm(s) : 0;
-
-    s->had_startup_error = (res != 0);
-
-    return res;
+    return k->ops->startup_tpm(s);
 }

 bool tpm_backend_had_startup_error(TPMBackend *s)
 {
-    return s->had_startup_error;
+    TPMBackendClass *k = TPM_BACKEND_GET_CLASS(s);
+
+    return k->ops->had_startup_error(s);
 }

-void tpm_backend_deliver_request(TPMBackend *s, TPMBackendCmd *cmd)
+size_t tpm_backend_realloc_buffer(TPMBackend *s, TPMSizedBuffer *sb)
 {
-    g_thread_pool_push(s->thread_pool, cmd, NULL);
+    TPMBackendClass *k = TPM_BACKEND_GET_CLASS(s);
+
+    return k->ops->realloc_buffer(sb);
+}
+
+void tpm_backend_deliver_request(TPMBackend *s)
+{
+    TPMBackendClass *k = TPM_BACKEND_GET_CLASS(s);
+
+    k->ops->deliver_request(s);
 }

 void tpm_backend_reset(TPMBackend *s)
 {
    TPMBackendClass *k = TPM_BACKEND_GET_CLASS(s);

-    if (k->reset) {
-        k->reset(s);
-    }
-
-    tpm_backend_thread_end(s);
-
-    s->had_startup_error = false;
+    k->ops->reset(s);
 }

 void tpm_backend_cancel_cmd(TPMBackend *s)
 {
    TPMBackendClass *k = TPM_BACKEND_GET_CLASS(s);

-    assert(k->cancel_cmd);
-
-    k->cancel_cmd(s);
+    k->ops->cancel_cmd(s);
 }

 bool tpm_backend_get_tpm_established_flag(TPMBackend *s)
 {
    TPMBackendClass *k = TPM_BACKEND_GET_CLASS(s);

-    return k->get_tpm_established_flag ?
-           k->get_tpm_established_flag(s) : false;
+    return k->ops->get_tpm_established_flag(s);
 }

 int tpm_backend_reset_tpm_established_flag(TPMBackend *s, uint8_t locty)
 {
    TPMBackendClass *k = TPM_BACKEND_GET_CLASS(s);

-    return k->reset_tpm_established_flag ?
-           k->reset_tpm_established_flag(s, locty) : 0;
+    return k->ops->reset_tpm_established_flag(s, locty);
 }

 TPMVersion tpm_backend_get_tpm_version(TPMBackend *s)
 {
    TPMBackendClass *k = TPM_BACKEND_GET_CLASS(s);

-    assert(k->get_tpm_version);
-
-    return k->get_tpm_version(s);
-}
-
-TPMInfo *tpm_backend_query_tpm(TPMBackend *s)
-{
-    TPMInfo *info = g_new0(TPMInfo, 1);
-    TPMBackendClass *k = TPM_BACKEND_GET_CLASS(s);
-
-    info->id = g_strdup(s->id);
-    info->model = s->fe_model;
-    if (k->get_tpm_options) {
-        info->options = k->get_tpm_options(s);
-    }
-
-    return info;
+    return k->ops->get_tpm_version(s);
 }

 static bool tpm_backend_prop_get_opened(Object *obj, Error **errp)
@@ -181,21 +152,33 @@ static void tpm_backend_prop_set_opened(Object *obj, bool value, Error **errp)

 static void tpm_backend_instance_init(Object *obj)
 {
-    TPMBackend *s = TPM_BACKEND(obj);
-
    object_property_add_bool(obj, "opened",
                             tpm_backend_prop_get_opened,
                             tpm_backend_prop_set_opened,
                             NULL);
-    s->fe_model = -1;
 }

-static void tpm_backend_instance_finalize(Object *obj)
+void tpm_backend_thread_deliver_request(TPMBackendThread *tbt)
 {
-    TPMBackend *s = TPM_BACKEND(obj);
+   g_thread_pool_push(tbt->pool, (gpointer)TPM_BACKEND_CMD_PROCESS_CMD, NULL);
+}

-    g_free(s->id);
-    tpm_backend_thread_end(s);
+void tpm_backend_thread_create(TPMBackendThread *tbt,
+                               GFunc func, gpointer user_data)
+{
+    if (!tbt->pool) {
+        tbt->pool = g_thread_pool_new(func, user_data, 1, TRUE, NULL);
+        g_thread_pool_push(tbt->pool, (gpointer)TPM_BACKEND_CMD_INIT, NULL);
+    }
+}
+
+void tpm_backend_thread_end(TPMBackendThread *tbt)
+{
+    if (tbt->pool) {
+        g_thread_pool_push(tbt->pool, (gpointer)TPM_BACKEND_CMD_END, NULL);
+        g_thread_pool_free(tbt->pool, FALSE, TRUE);
+        tbt->pool = NULL;
+    }
 }

 static const TypeInfo tpm_backend_info = {
@@ -203,21 +186,13 @@ static const TypeInfo tpm_backend_info = {
    .parent = TYPE_OBJECT,
    .instance_size = sizeof(TPMBackend),
    .instance_init = tpm_backend_instance_init,
-    .instance_finalize = tpm_backend_instance_finalize,
    .class_size = sizeof(TPMBackendClass),
    .abstract = true,
 };

-static const TypeInfo tpm_if_info = {
-    .name = TYPE_TPM_IF,
-    .parent = TYPE_INTERFACE,
-    .class_size = sizeof(TPMIfClass),
-};
-
 static void register_types(void)
 {
    type_register_static(&tpm_backend_info);
-    type_register_static(&tpm_if_info);
 }

 type_init(register_types);
--- a/block.c
+++ b/block.c
@@ -42,6 +42,7 @@
 #include "qapi-event.h"
 #include "qemu/cutils.h"
 #include "qemu/id.h"
+#include "qapi/util.h"

 #ifdef CONFIG_BSD
 #include <sys/ioctl.h>
@@ -239,6 +240,12 @@ bool bdrv_is_read_only(BlockDriverState *bs)
    return bs->read_only;
 }

+/* Returns whether the image file can be written to right now */
+bool bdrv_is_writable(BlockDriverState *bs)
+{
+    return !bdrv_is_read_only(bs) && !(bs->open_flags & BDRV_O_INACTIVE);
+}
+
 int bdrv_can_set_read_only(BlockDriverState *bs, bool read_only,
                           bool ignore_allow_rdw, Error **errp)
 {
@@ -261,11 +268,6 @@ int bdrv_can_set_read_only(BlockDriverState *bs, bool read_only,
    return 0;
 }

-/* TODO Remove (deprecated since 2.11)
- * Block drivers are not supposed to automatically change bs->read_only.
- * Instead, they should just check whether they can provide what the user
- * explicitly requested and error out if read-write is requested, but they can
- * only provide read-only access. */
 int bdrv_set_read_only(BlockDriverState *bs, bool read_only, Error **errp)
 {
    int ret = 0;
@@ -495,8 +497,6 @@ int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)

    if (drv && drv->bdrv_probe_blocksizes) {
        return drv->bdrv_probe_blocksizes(bs, bsz);
-    } else if (drv && drv->is_filter && bs->file) {
-        return bdrv_probe_blocksizes(bs->file->bs, bsz);
    }

    return -ENOTSUP;
@@ -514,8 +514,6 @@ int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo)

    if (drv && drv->bdrv_probe_geometry) {
        return drv->bdrv_probe_geometry(bs, geo);
-    } else if (drv && drv->is_filter && bs->file) {
-        return bdrv_probe_geometry(bs->file->bs, geo);
    }

    return -ENOTSUP;
@@ -720,10 +718,6 @@ static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
 {
    BlockDriver *drv = bs->drv;

-    if (!drv) {
-        return -ENOMEDIUM;
-    }
-
    /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
    if (bdrv_is_sg(bs))
        return 0;
@@ -990,33 +984,6 @@ static void bdrv_backing_options(int *child_flags, QDict *child_options,
    *child_flags = flags;
 }

-static int bdrv_backing_update_filename(BdrvChild *c, BlockDriverState *base,
-                                        const char *filename, Error **errp)
-{
-    BlockDriverState *parent = c->opaque;
-    int orig_flags = bdrv_get_flags(parent);
-    int ret;
-
-    if (!(orig_flags & BDRV_O_RDWR)) {
-        ret = bdrv_reopen(parent, orig_flags | BDRV_O_RDWR, errp);
-        if (ret < 0) {
-            return ret;
-        }
-    }
-
-    ret = bdrv_change_backing_file(parent, filename,
-                                   base->drv ? base->drv->format_name : "");
-    if (ret < 0) {
-        error_setg_errno(errp, -ret, "Could not update backing file link");
-    }
-
-    if (!(orig_flags & BDRV_O_RDWR)) {
-        bdrv_reopen(parent, orig_flags, NULL);
-    }
-
-    return ret;
-}
-
 const BdrvChildRole child_backing = {
    .get_parent_desc = bdrv_child_get_parent_desc,
    .attach          = bdrv_backing_attach,
@@ -1025,7 +992,6 @@ const BdrvChildRole child_backing = {
    .drained_begin   = bdrv_child_cb_drained_begin,
    .drained_end     = bdrv_child_cb_drained_end,
    .inactivate      = bdrv_child_cb_inactivate,
-    .update_filename = bdrv_backing_update_filename,
 };

 static int bdrv_open_flags(BlockDriverState *bs, int flags)
@@ -1367,8 +1333,9 @@ static int bdrv_open_common(BlockDriverState *bs, BlockBackend *file,
    detect_zeroes = qemu_opt_get(opts, "detect-zeroes");
    if (detect_zeroes) {
        BlockdevDetectZeroesOptions value =
-            qapi_enum_parse(&BlockdevDetectZeroesOptions_lookup,
+            qapi_enum_parse(BlockdevDetectZeroesOptions_lookup,
                            detect_zeroes,
+                            BLOCKDEV_DETECT_ZEROES_OPTIONS__MAX,
                            BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF,
                            &local_err);
        if (local_err) {
@@ -1562,70 +1529,22 @@ static int bdrv_fill_options(QDict **options, const char *filename,
    return 0;
 }

-static int bdrv_child_check_perm(BdrvChild *c, BlockReopenQueue *q,
-                                 uint64_t perm, uint64_t shared,
+static int bdrv_child_check_perm(BdrvChild *c, uint64_t perm, uint64_t shared,
                                 GSList *ignore_children, Error **errp);
 static void bdrv_child_abort_perm_update(BdrvChild *c);
 static void bdrv_child_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared);

-typedef struct BlockReopenQueueEntry {
-     bool prepared;
-     BDRVReopenState state;
-     QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry;
-} BlockReopenQueueEntry;
-
-/*
- * Return the flags that @bs will have after the reopens in @q have
- * successfully completed. If @q is NULL (or @bs is not contained in @q),
- * return the current flags.
- */
-static int bdrv_reopen_get_flags(BlockReopenQueue *q, BlockDriverState *bs)
-{
-    BlockReopenQueueEntry *entry;
-
-    if (q != NULL) {
-        QSIMPLEQ_FOREACH(entry, q, entry) {
-            if (entry->state.bs == bs) {
-                return entry->state.flags;
-            }
-        }
-    }
-
-    return bs->open_flags;
-}
-
-/* Returns whether the image file can be written to after the reopen queue @q
- * has been successfully applied, or right now if @q is NULL. */
-static bool bdrv_is_writable_after_reopen(BlockDriverState *bs,
-                                          BlockReopenQueue *q)
-{
-    int flags = bdrv_reopen_get_flags(q, bs);
-
-    return (flags & (BDRV_O_RDWR | BDRV_O_INACTIVE)) == BDRV_O_RDWR;
-}
-
-/*
- * Return whether the BDS can be written to.  This is not necessarily
- * the same as !bdrv_is_read_only(bs), as inactivated images may not
- * be written to but do not count as read-only images.
- */
-bool bdrv_is_writable(BlockDriverState *bs)
-{
-    return bdrv_is_writable_after_reopen(bs, NULL);
-}
-
 static void bdrv_child_perm(BlockDriverState *bs, BlockDriverState *child_bs,
-                            BdrvChild *c, const BdrvChildRole *role,
-                            BlockReopenQueue *reopen_queue,
+                            BdrvChild *c,
+                            const BdrvChildRole *role,
                            uint64_t parent_perm, uint64_t parent_shared,
                            uint64_t *nperm, uint64_t *nshared)
 {
    if (bs->drv && bs->drv->bdrv_child_perm) {
-        bs->drv->bdrv_child_perm(bs, c, role, reopen_queue,
+        bs->drv->bdrv_child_perm(bs, c, role,
                                 parent_perm, parent_shared,
                                 nperm, nshared);
    }
-    /* TODO Take force_share from reopen_queue */
    if (child_bs && child_bs->force_share) {
        *nshared = BLK_PERM_ALL;
    }
@@ -1640,8 +1559,7 @@ static void bdrv_child_perm(BlockDriverState *bs, BlockDriverState *child_bs,
 * A call to this function must always be followed by a call to bdrv_set_perm()
 * or bdrv_abort_perm_update().
 */
-static int bdrv_check_perm(BlockDriverState *bs, BlockReopenQueue *q,
-                           uint64_t cumulative_perms,
+static int bdrv_check_perm(BlockDriverState *bs, uint64_t cumulative_perms,
                           uint64_t cumulative_shared_perms,
                           GSList *ignore_children, Error **errp)
 {
@@ -1651,7 +1569,7 @@ static int bdrv_check_perm(BlockDriverState *bs, BlockReopenQueue *q,

    /* Write permissions never work with read-only images */
    if ((cumulative_perms & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) &&
-        !bdrv_is_writable_after_reopen(bs, q))
+        !bdrv_is_writable(bs))
    {
        error_setg(errp, "Block node is read-only");
        return -EPERM;
@@ -1676,11 +1594,11 @@ static int bdrv_check_perm(BlockDriverState *bs, BlockReopenQueue *q,
    /* Check all children */
    QLIST_FOREACH(c, &bs->children, next) {
        uint64_t cur_perm, cur_shared;
-        bdrv_child_perm(bs, c->bs, c, c->role, q,
+        bdrv_child_perm(bs, c->bs, c, c->role,
                        cumulative_perms, cumulative_shared_perms,
                        &cur_perm, &cur_shared);
-        ret = bdrv_child_check_perm(c, q, cur_perm, cur_shared,
-                                    ignore_children, errp);
+        ret = bdrv_child_check_perm(c, cur_perm, cur_shared, ignore_children,
+                                    errp);
        if (ret < 0) {
            return ret;
        }
@@ -1738,7 +1656,7 @@ static void bdrv_set_perm(BlockDriverState *bs, uint64_t cumulative_perms,
    /* Update all children */
    QLIST_FOREACH(c, &bs->children, next) {
        uint64_t cur_perm, cur_shared;
-        bdrv_child_perm(bs, c->bs, c, c->role, NULL,
+        bdrv_child_perm(bs, c->bs, c, c->role,
                        cumulative_perms, cumulative_shared_perms,
                        &cur_perm, &cur_shared);
        bdrv_child_set_perm(c, cur_perm, cur_shared);
@@ -1806,8 +1724,7 @@ char *bdrv_perm_names(uint64_t perm)
 *
 * Needs to be followed by a call to either bdrv_set_perm() or
 * bdrv_abort_perm_update(). */
-static int bdrv_check_update_perm(BlockDriverState *bs, BlockReopenQueue *q,
-                                  uint64_t new_used_perm,
+static int bdrv_check_update_perm(BlockDriverState *bs, uint64_t new_used_perm,
                                  uint64_t new_shared_perm,
                                  GSList *ignore_children, Error **errp)
 {
@@ -1849,20 +1766,19 @@ static int bdrv_check_update_perm(BlockDriverState *bs, BlockReopenQueue *q,
        cumulative_shared_perms &= c->shared_perm;
    }

-    return bdrv_check_perm(bs, q, cumulative_perms, cumulative_shared_perms,
+    return bdrv_check_perm(bs, cumulative_perms, cumulative_shared_perms,
                           ignore_children, errp);
 }

 /* Needs to be followed by a call to either bdrv_child_set_perm() or
 * bdrv_child_abort_perm_update(). */
-static int bdrv_child_check_perm(BdrvChild *c, BlockReopenQueue *q,
-                                 uint64_t perm, uint64_t shared,
+static int bdrv_child_check_perm(BdrvChild *c, uint64_t perm, uint64_t shared,
                                 GSList *ignore_children, Error **errp)
 {
    int ret;

    ignore_children = g_slist_prepend(g_slist_copy(ignore_children), c);
-    ret = bdrv_check_update_perm(c->bs, q, perm, shared, ignore_children, errp);
+    ret = bdrv_check_update_perm(c->bs, perm, shared, ignore_children, errp);
    g_slist_free(ignore_children);

    return ret;
@@ -1890,7 +1806,7 @@ int bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared,
 {
    int ret;

-    ret = bdrv_child_check_perm(c, NULL, perm, shared, NULL, errp);
+    ret = bdrv_child_check_perm(c, perm, shared, NULL, errp);
    if (ret < 0) {
        bdrv_child_abort_perm_update(c);
        return ret;
@@ -1909,7 +1825,6 @@ int bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared,

 void bdrv_filter_default_perms(BlockDriverState *bs, BdrvChild *c,
                               const BdrvChildRole *role,
-                               BlockReopenQueue *reopen_queue,
                               uint64_t perm, uint64_t shared,
                               uint64_t *nperm, uint64_t *nshared)
 {
@@ -1927,7 +1842,6 @@ void bdrv_filter_default_perms(BlockDriverState *bs, BdrvChild *c,

 void bdrv_format_default_perms(BlockDriverState *bs, BdrvChild *c,
                               const BdrvChildRole *role,
-                               BlockReopenQueue *reopen_queue,
                               uint64_t perm, uint64_t shared,
                               uint64_t *nperm, uint64_t *nshared)
 {
@@ -1937,11 +1851,10 @@ void bdrv_format_default_perms(BlockDriverState *bs, BdrvChild *c,
    if (!backing) {
        /* Apart from the modifications below, the same permissions are
         * forwarded and left alone as for filters */
-        bdrv_filter_default_perms(bs, c, role, reopen_queue, perm, shared,
-                                  &perm, &shared);
+        bdrv_filter_default_perms(bs, c, role, perm, shared, &perm, &shared);

        /* Format drivers may touch metadata even if the guest doesn't write */
-        if (bdrv_is_writable_after_reopen(bs, reopen_queue)) {
+        if (bdrv_is_writable(bs)) {
            perm |= BLK_PERM_WRITE | BLK_PERM_RESIZE;
        }

@@ -2030,7 +1943,7 @@ static void bdrv_replace_child(BdrvChild *child, BlockDriverState *new_bs)
         * because we're just taking a parent away, so we're loosening
         * restrictions. */
        bdrv_get_cumulative_perm(old_bs, &perm, &shared_perm);
-        bdrv_check_perm(old_bs, NULL, perm, shared_perm, NULL, &error_abort);
+        bdrv_check_perm(old_bs, perm, shared_perm, NULL, &error_abort);
        bdrv_set_perm(old_bs, perm, shared_perm);
    }

@@ -2049,7 +1962,7 @@ BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
    BdrvChild *child;
    int ret;

-    ret = bdrv_check_update_perm(child_bs, NULL, perm, shared_perm, NULL, errp);
+    ret = bdrv_check_update_perm(child_bs, perm, shared_perm, NULL, errp);
    if (ret < 0) {
        bdrv_abort_perm_update(child_bs);
        return NULL;
@@ -2084,7 +1997,7 @@ BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,

    assert(parent_bs->drv);
    assert(bdrv_get_aio_context(parent_bs) == bdrv_get_aio_context(child_bs));
-    bdrv_child_perm(parent_bs, child_bs, NULL, child_role, NULL,
+    bdrv_child_perm(parent_bs, child_bs, NULL, child_role,
                    perm, shared_perm, &perm, &shared_perm);

    child = bdrv_root_attach_child(child_bs, child_name, child_role,
@@ -2265,8 +2178,7 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,
        goto free_exit;
    }

-    if (!reference &&
-        bs->backing_format[0] != '\0' && !qdict_haskey(options, "driver")) {
+    if (bs->backing_format[0] != '\0' && !qdict_haskey(options, "driver")) {
        qdict_put_str(options, "driver", bs->backing_format);
    }

@@ -2590,10 +2502,7 @@ static BlockDriverState *bdrv_open_inherit(const char *filename,
            goto fail;
        }
        if (file_bs != NULL) {
-            /* Not requesting BLK_PERM_CONSISTENT_READ because we're only
-             * looking at the header to guess the image format. This works even
-             * in cases where a guest would not see a consistent state. */
-            file = blk_new(0, BLK_PERM_ALL);
+            file = blk_new(BLK_PERM_CONSISTENT_READ, BLK_PERM_ALL);
            blk_insert_bs(file, file_bs, &local_err);
            bdrv_unref(file_bs);
            if (local_err) {
@@ -2722,6 +2631,12 @@ BlockDriverState *bdrv_open(const char *filename, const char *reference,
                             NULL, errp);
 }

+typedef struct BlockReopenQueueEntry {
+     bool prepared;
+     BDRVReopenState state;
+     QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry;
+} BlockReopenQueueEntry;
+
 /*
 * Adds a BlockDriverState to a simple queue for an atomic, transactional
 * reopen of multiple devices.
@@ -2820,23 +2735,6 @@ static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue,
        flags |= BDRV_O_ALLOW_RDWR;
    }

-    if (!bs_entry) {
-        bs_entry = g_new0(BlockReopenQueueEntry, 1);
-        QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry);
-    } else {
-        QDECREF(bs_entry->state.options);
-        QDECREF(bs_entry->state.explicit_options);
-    }
-
-    bs_entry->state.bs = bs;
-    bs_entry->state.options = options;
-    bs_entry->state.explicit_options = explicit_options;
-    bs_entry->state.flags = flags;
-
-    /* This needs to be overwritten in bdrv_reopen_prepare() */
-    bs_entry->state.perm = UINT64_MAX;
-    bs_entry->state.shared_perm = 0;
-
    QLIST_FOREACH(child, &bs->children, next) {
        QDict *new_child_options;
        char *child_key_dot;
@@ -2856,6 +2754,19 @@ static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue,
                                child->role, options, flags);
    }

+    if (!bs_entry) {
+        bs_entry = g_new0(BlockReopenQueueEntry, 1);
+        QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry);
+    } else {
+        QDECREF(bs_entry->state.options);
+        QDECREF(bs_entry->state.explicit_options);
+    }
+
+    bs_entry->state.bs = bs;
+    bs_entry->state.options = options;
+    bs_entry->state.explicit_options = explicit_options;
+    bs_entry->state.flags = flags;
+
    return bs_queue;
 }

@@ -2943,52 +2854,6 @@ int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp)
    return ret;
 }

-static BlockReopenQueueEntry *find_parent_in_reopen_queue(BlockReopenQueue *q,
-                                                          BdrvChild *c)
-{
-    BlockReopenQueueEntry *entry;
-
-    QSIMPLEQ_FOREACH(entry, q, entry) {
-        BlockDriverState *bs = entry->state.bs;
-        BdrvChild *child;
-
-        QLIST_FOREACH(child, &bs->children, next) {
-            if (child == c) {
-                return entry;
-            }
-        }
-    }
-
-    return NULL;
-}
-
-static void bdrv_reopen_perm(BlockReopenQueue *q, BlockDriverState *bs,
-                             uint64_t *perm, uint64_t *shared)
-{
-    BdrvChild *c;
-    BlockReopenQueueEntry *parent;
-    uint64_t cumulative_perms = 0;
-    uint64_t cumulative_shared_perms = BLK_PERM_ALL;
-
-    QLIST_FOREACH(c, &bs->parents, next_parent) {
-        parent = find_parent_in_reopen_queue(q, c);
-        if (!parent) {
-            cumulative_perms |= c->perm;
-            cumulative_shared_perms &= c->shared_perm;
-        } else {
-            uint64_t nperm, nshared;
-
-            bdrv_child_perm(parent->state.bs, bs, c, c->role, q,
-                            parent->state.perm, parent->state.shared_perm,
-                            &nperm, &nshared);
-
-            cumulative_perms |= nperm;
-            cumulative_shared_perms &= nshared;
-        }
-    }
-    *perm = cumulative_perms;
-    *shared = cumulative_shared_perms;
-}

 /*
 * Prepares a BlockDriverState for reopen. All changes are staged in the
@@ -3054,9 +2919,6 @@ int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
        goto error;
    }

-    /* Calculate required permissions after reopening */
-    bdrv_reopen_perm(queue, reopen_state->bs,
-                     &reopen_state->perm, &reopen_state->shared_perm);

    ret = bdrv_flush(reopen_state->bs);
    if (ret) {
@@ -3092,26 +2954,19 @@ int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
        const QDictEntry *entry = qdict_first(reopen_state->options);

        do {
-            QObject *new = entry->value;
-            QObject *old = qdict_get(reopen_state->bs->options, entry->key);
-
+            QString *new_obj = qobject_to_qstring(entry->value);
+            const char *new = qstring_get_str(new_obj);
            /*
-             * TODO: When using -drive to specify blockdev options, all values
-             * will be strings; however, when using -blockdev, blockdev-add or
-             * filenames using the json:{} pseudo-protocol, they will be
-             * correctly typed.
-             * In contrast, reopening options are (currently) always strings
-             * (because you can only specify them through qemu-io; all other
-             * callers do not specify any options).
-             * Therefore, when using anything other than -drive to create a BDS,
-             * this cannot detect non-string options as unchanged, because
-             * qobject_is_equal() always returns false for objects of different
-             * type.  In the future, this should be remedied by correctly typing
-             * all options.  For now, this is not too big of an issue because
-             * the user can simply omit options which cannot be changed anyway,
-             * so they will stay unchanged.
+             * Caution: while qdict_get_try_str() is fine, getting
+             * non-string types would require more care.  When
+             * bs->options come from -blockdev or blockdev_add, its
+             * members are typed according to the QAPI schema, but
+             * when they come from -drive, they're all QString.
             */
-            if (!qobject_is_equal(new, old)) {
+            const char *old = qdict_get_try_str(reopen_state->bs->options,
+                                                entry->key);
+
+            if (!old || strcmp(new, old)) {
                error_setg(errp, "Cannot change the option '%s'", entry->key);
                ret = -EINVAL;
                goto error;
@@ -3119,12 +2974,6 @@ int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
        } while ((entry = qdict_next(reopen_state->options, entry)));
    }

-    ret = bdrv_check_perm(reopen_state->bs, queue, reopen_state->perm,
-                          reopen_state->shared_perm, NULL, errp);
-    if (ret < 0) {
-        goto error;
-    }
-
    ret = 0;

 error:
@@ -3165,9 +3014,6 @@ void bdrv_reopen_commit(BDRVReopenState *reopen_state)

    bdrv_refresh_limits(bs, NULL);

-    bdrv_set_perm(reopen_state->bs, reopen_state->perm,
-                  reopen_state->shared_perm);
-
    new_can_write =
        !bdrv_is_read_only(bs) && !(bdrv_get_flags(bs) & BDRV_O_INACTIVE);
    if (!old_can_write && new_can_write && drv->bdrv_reopen_bitmaps_rw) {
@@ -3201,15 +3047,12 @@ void bdrv_reopen_abort(BDRVReopenState *reopen_state)
    }

    QDECREF(reopen_state->explicit_options);
-
-    bdrv_abort_perm_update(reopen_state->bs);
 }


 static void bdrv_close(BlockDriverState *bs)
 {
    BdrvAioNotifier *ban, *ban_next;
-    BdrvChild *child, *next;

    assert(!bs->job);
    assert(!bs->refcnt);
@@ -3219,40 +3062,42 @@ static void bdrv_close(BlockDriverState *bs)
    bdrv_drain(bs); /* in case flush left pending I/O */

    if (bs->drv) {
+        BdrvChild *child, *next;
+
        bs->drv->bdrv_close(bs);
        bs->drv = NULL;
-    }

-    bdrv_set_backing_hd(bs, NULL, &error_abort);
+        bdrv_set_backing_hd(bs, NULL, &error_abort);

-    if (bs->file != NULL) {
-        bdrv_unref_child(bs, bs->file);
-        bs->file = NULL;
-    }
-
-    QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
-        /* TODO Remove bdrv_unref() from drivers' close function and use
-         * bdrv_unref_child() here */
-        if (child->bs->inherits_from == bs) {
-            child->bs->inherits_from = NULL;
+        if (bs->file != NULL) {
+            bdrv_unref_child(bs, bs->file);
+            bs->file = NULL;
        }
-        bdrv_detach_child(child);
-    }

-    g_free(bs->opaque);
-    bs->opaque = NULL;
-    atomic_set(&bs->copy_on_read, 0);
-    bs->backing_file[0] = '\0';
-    bs->backing_format[0] = '\0';
-    bs->total_sectors = 0;
-    bs->encrypted = false;
-    bs->sg = false;
-    QDECREF(bs->options);
-    QDECREF(bs->explicit_options);
-    bs->options = NULL;
-    bs->explicit_options = NULL;
-    QDECREF(bs->full_open_options);
-    bs->full_open_options = NULL;
+        QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
+            /* TODO Remove bdrv_unref() from drivers' close function and use
+             * bdrv_unref_child() here */
+            if (child->bs->inherits_from == bs) {
+                child->bs->inherits_from = NULL;
+            }
+            bdrv_detach_child(child);
+        }
+
+        g_free(bs->opaque);
+        bs->opaque = NULL;
+        atomic_set(&bs->copy_on_read, 0);
+        bs->backing_file[0] = '\0';
+        bs->backing_format[0] = '\0';
+        bs->total_sectors = 0;
+        bs->encrypted = false;
+        bs->sg = false;
+        QDECREF(bs->options);
+        QDECREF(bs->explicit_options);
+        bs->options = NULL;
+        bs->explicit_options = NULL;
+        QDECREF(bs->full_open_options);
+        bs->full_open_options = NULL;
+    }

    bdrv_release_named_dirty_bitmaps(bs);
    assert(QLIST_EMPTY(&bs->dirty_bitmaps));
@@ -3332,7 +3177,7 @@ void bdrv_replace_node(BlockDriverState *from, BlockDriverState *to,

    /* Check whether the required permissions can be granted on @to, ignoring
     * all BdrvChild in @list so that they can't block themselves. */
-    ret = bdrv_check_update_perm(to, NULL, perm, shared, list, errp);
+    ret = bdrv_check_update_perm(to, perm, shared, list, errp);
    if (ret < 0) {
        bdrv_abort_perm_update(to);
        goto out;
@@ -3448,10 +3293,6 @@ int bdrv_change_backing_file(BlockDriverState *bs,
    BlockDriver *drv = bs->drv;
    int ret;

-    if (!drv) {
-        return -ENOMEDIUM;
-    }
-
    /* Backing file format doesn't make sense without a backing file */
    if (backing_fmt && !backing_file) {
        return -EINVAL;
@@ -3525,62 +3366,53 @@ BlockDriverState *bdrv_find_base(BlockDriverState *bs)
 *  if active == top, that is considered an error
 *
 */
-int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base,
-                           const char *backing_file_str)
+int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
+                           BlockDriverState *base, const char *backing_file_str)
 {
-    BdrvChild *c, *next;
+    BlockDriverState *new_top_bs = NULL;
    Error *local_err = NULL;
    int ret = -EIO;

-    bdrv_ref(top);
-
    if (!top->drv || !base->drv) {
        goto exit;
    }

+    new_top_bs = bdrv_find_overlay(active, top);
+
+    if (new_top_bs == NULL) {
+        /* we could not find the image above 'top', this is an error */
+        goto exit;
+    }
+
+    /* special case of new_top_bs->backing->bs already pointing to base - nothing
+     * to do, no intermediate images */
+    if (backing_bs(new_top_bs) == base) {
+        ret = 0;
+        goto exit;
+    }
+
    /* Make sure that base is in the backing chain of top */
    if (!bdrv_chain_contains(top, base)) {
        goto exit;
    }

    /* success - we can delete the intermediate states, and link top->base */
-    /* TODO Check graph modification op blockers (BLK_PERM_GRAPH_MOD) once
-     * we've figured out how they should work. */
    backing_file_str = backing_file_str ? backing_file_str : base->filename;
+    ret = bdrv_change_backing_file(new_top_bs, backing_file_str,
+                                   base->drv ? base->drv->format_name : "");
+    if (ret) {
+        goto exit;
+    }

-    QLIST_FOREACH_SAFE(c, &top->parents, next_parent, next) {
-        /* Check whether we are allowed to switch c from top to base */
-        GSList *ignore_children = g_slist_prepend(NULL, c);
-        bdrv_check_update_perm(base, NULL, c->perm, c->shared_perm,
-                               ignore_children, &local_err);
-        if (local_err) {
-            ret = -EPERM;
-            error_report_err(local_err);
-            goto exit;
-        }
-        g_slist_free(ignore_children);
-
-        /* If so, update the backing file path in the image file */
-        if (c->role->update_filename) {
-            ret = c->role->update_filename(c, base, backing_file_str,
-                                           &local_err);
-            if (ret < 0) {
-                bdrv_abort_perm_update(base);
-                error_report_err(local_err);
-                goto exit;
-            }
-        }
-
-        /* Do the actual switch in the in-memory graph.
-         * Completes bdrv_check_update_perm() transaction internally. */
-        bdrv_ref(base);
-        bdrv_replace_child(c, base);
-        bdrv_unref(top);
+    bdrv_set_backing_hd(new_top_bs, base, &local_err);
+    if (local_err) {
+        ret = -EPERM;
+        error_report_err(local_err);
+        goto exit;
    }

    ret = 0;
 exit:
-    bdrv_unref(top);
    return ret;
 }

@@ -3596,15 +3428,11 @@ int bdrv_truncate(BdrvChild *child, int64_t offset, PreallocMode prealloc,

    assert(child->perm & BLK_PERM_RESIZE);

-    /* if bs->drv == NULL, bs is closed, so there's nothing to do here */
    if (!drv) {
        error_setg(errp, "No medium inserted");
        return -ENOMEDIUM;
    }
    if (!drv->bdrv_truncate) {
-        if (bs->file && drv->is_filter) {
-            return bdrv_truncate(bs->file, offset, prealloc, errp);
-        }
        error_setg(errp, "Image format driver does not support resize");
        return -ENOTSUP;
    }
@@ -3616,18 +3444,12 @@ int bdrv_truncate(BdrvChild *child, int64_t offset, PreallocMode prealloc,
    assert(!(bs->open_flags & BDRV_O_INACTIVE));

    ret = drv->bdrv_truncate(bs, offset, prealloc, errp);
-    if (ret < 0) {
-        return ret;
+    if (ret == 0) {
+        ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
+        bdrv_dirty_bitmap_truncate(bs);
+        bdrv_parent_cb_resize(bs);
+        atomic_inc(&bs->write_gen);
    }
-    ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
-    if (ret < 0) {
-        error_setg_errno(errp, -ret, "Could not refresh total sector count");
-    } else {
-        offset = bs->total_sectors * BDRV_SECTOR_SIZE;
-    }
-    bdrv_dirty_bitmap_truncate(bs, offset);
-    bdrv_parent_cb_resize(bs);
-    atomic_inc(&bs->write_gen);
    return ret;
 }

@@ -3937,9 +3759,7 @@ int bdrv_has_zero_init_1(BlockDriverState *bs)

 int bdrv_has_zero_init(BlockDriverState *bs)
 {
-    if (!bs->drv) {
-        return 0;
-    }
+    assert(bs->drv);

    /* If BS is a copy on write image, it is initialized to
       the contents of the base image, which may not be zeroes.  */
@@ -3949,9 +3769,6 @@ int bdrv_has_zero_init(BlockDriverState *bs)
    if (bs->drv->bdrv_has_zero_init) {
        return bs->drv->bdrv_has_zero_init(bs);
    }
-    if (bs->file && bs->drv->is_filter) {
-        return bdrv_has_zero_init(bs->file->bs);
-    }

    /* safe default */
    return 0;
@@ -4006,16 +3823,10 @@ void bdrv_get_backing_filename(BlockDriverState *bs,
 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
 {
    BlockDriver *drv = bs->drv;
-    /* if bs->drv == NULL, bs is closed, so there's nothing to do here */
-    if (!drv) {
+    if (!drv)
        return -ENOMEDIUM;
-    }
-    if (!drv->bdrv_get_info) {
-        if (bs->file && drv->is_filter) {
-            return bdrv_get_info(bs->file->bs, bdi);
-        }
+    if (!drv->bdrv_get_info)
        return -ENOTSUP;
-    }
    memset(bdi, 0, sizeof(*bdi));
    return drv->bdrv_get_info(bs, bdi);
 }
@@ -4204,29 +4015,7 @@ void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp)
        }
    }

-    /*
-     * Update permissions, they may differ for inactive nodes.
-     *
-     * Note that the required permissions of inactive images are always a
-     * subset of the permissions required after activating the image. This
-     * allows us to just get the permissions upfront without restricting
-     * drv->bdrv_invalidate_cache().
-     *
-     * It also means that in error cases, we don't have to try and revert to
-     * the old permissions (which is an operation that could fail, too). We can
-     * just keep the extended permissions for the next time that an activation
-     * of the image is tried.
-     */
    bs->open_flags &= ~BDRV_O_INACTIVE;
-    bdrv_get_cumulative_perm(bs, &perm, &shared_perm);
-    ret = bdrv_check_perm(bs, NULL, perm, shared_perm, NULL, &local_err);
-    if (ret < 0) {
-        bs->open_flags |= BDRV_O_INACTIVE;
-        error_propagate(errp, local_err);
-        return;
-    }
-    bdrv_set_perm(bs, perm, shared_perm);
-
    if (bs->drv->bdrv_invalidate_cache) {
        bs->drv->bdrv_invalidate_cache(bs, &local_err);
        if (local_err) {
@@ -4243,6 +4032,16 @@ void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp)
        return;
    }

+    /* Update permissions, they may differ for inactive nodes */
+    bdrv_get_cumulative_perm(bs, &perm, &shared_perm);
+    ret = bdrv_check_perm(bs, perm, shared_perm, NULL, &local_err);
+    if (ret < 0) {
+        bs->open_flags |= BDRV_O_INACTIVE;
+        error_propagate(errp, local_err);
+        return;
+    }
+    bdrv_set_perm(bs, perm, shared_perm);
+
    QLIST_FOREACH(parent, &bs->parents, next_parent) {
        if (parent->role->activate) {
            parent->role->activate(parent, &local_err);
@@ -4268,7 +4067,6 @@ void bdrv_invalidate_cache_all(Error **errp)
        aio_context_release(aio_context);
        if (local_err) {
            error_propagate(errp, local_err);
-            bdrv_next_cleanup(&it);
            return;
        }
    }
@@ -4280,10 +4078,6 @@ static int bdrv_inactivate_recurse(BlockDriverState *bs,
    BdrvChild *child, *parent;
    int ret;

-    if (!bs->drv) {
-        return -ENOMEDIUM;
-    }
-
    if (!setting_flag && bs->drv->bdrv_inactivate) {
        ret = bs->drv->bdrv_inactivate(bs);
        if (ret < 0) {
@@ -4307,7 +4101,7 @@ static int bdrv_inactivate_recurse(BlockDriverState *bs,

        /* Update permissions, they may differ for inactive nodes */
        bdrv_get_cumulative_perm(bs, &perm, &shared_perm);
-        bdrv_check_perm(bs, NULL, perm, shared_perm, NULL, &error_abort);
+        bdrv_check_perm(bs, perm, shared_perm, NULL, &error_abort);
        bdrv_set_perm(bs, perm, shared_perm);
    }

@@ -4344,7 +4138,6 @@ int bdrv_inactivate_all(void)
        for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
            ret = bdrv_inactivate_recurse(bs, pass);
            if (ret < 0) {
-                bdrv_next_cleanup(&it);
                goto out;
            }
        }
@@ -4383,6 +4176,20 @@ bool bdrv_is_inserted(BlockDriverState *bs)
    return true;
 }

+/**
+ * Return whether the media changed since the last call to this
+ * function, or -ENOTSUP if we don't know.  Most drivers don't know.
+ */
+int bdrv_media_changed(BlockDriverState *bs)
+{
+    BlockDriver *drv = bs->drv;
+
+    if (drv && drv->bdrv_media_changed) {
+        return drv->bdrv_media_changed(bs);
+    }
+    return -ENOTSUP;
+}
+
 /**
 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
 */
@@ -4585,7 +4392,7 @@ void bdrv_img_create(const char *filename, const char *fmt,

    /* The size for the image must always be specified, unless we have a backing
     * file and we have not been forbidden from opening it. */
-    size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, img_size);
+    size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0);
    if (backing_file && !(flags & BDRV_O_NO_BACKING)) {
        BlockDriverState *bs;
        char *full_backing = g_new0(char, PATH_MAX);
@@ -4604,11 +4411,10 @@ void bdrv_img_create(const char *filename, const char *fmt,
        back_flags = flags;
        back_flags &= ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);

-        backing_options = qdict_new();
        if (backing_fmt) {
+            backing_options = qdict_new();
            qdict_put_str(backing_options, "driver", backing_fmt);
        }
-        qdict_put_bool(backing_options, BDRV_OPT_FORCE_SHARE, true);

        bs = bdrv_open(full_backing, NULL, backing_options, back_flags,
                       &local_err);
@@ -4820,9 +4626,6 @@ void bdrv_remove_aio_context_notifier(BlockDriverState *bs,
 int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts,
                       BlockDriverAmendStatusCB *status_cb, void *cb_opaque)
 {
-    if (!bs->drv) {
-        return -ENOMEDIUM;
-    }
    if (!bs->drv->bdrv_amend_options) {
        return -ENOTSUP;
    }
@@ -4880,7 +4683,6 @@ bool bdrv_is_first_non_filter(BlockDriverState *candidate)

        /* candidate is the first non filter */
        if (perm) {
-            bdrv_next_cleanup(&it);
            return true;
        }
    }
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -25,7 +25,6 @@ block-obj-y += accounting.o dirty-bitmap.o
 block-obj-y += write-threshold.o
 block-obj-y += backup.o
 block-obj-$(CONFIG_REPLICATION) += replication.o
-block-obj-y += throttle.o

 block-obj-y += crypto.o

--- a/block/backup.c
+++ b/block/backup.c
@@ -346,9 +346,9 @@ static bool coroutine_fn yield_and_check(BackupBlockJob *job)
        uint64_t delay_ns = ratelimit_calculate_delay(&job->limit,
                                                      job->bytes_read);
        job->bytes_read = 0;
-        block_job_sleep_ns(&job->common, delay_ns);
+        block_job_sleep_ns(&job->common, QEMU_CLOCK_REALTIME, delay_ns);
    } else {
-        block_job_sleep_ns(&job->common, 0);
+        block_job_sleep_ns(&job->common, QEMU_CLOCK_REALTIME, 0);
    }

    if (block_job_is_cancelled(&job->common)) {
@@ -372,10 +372,10 @@ static int coroutine_fn backup_run_incremental(BackupBlockJob *job)

    granularity = bdrv_dirty_bitmap_granularity(job->sync_bitmap);
    clusters_per_iter = MAX((granularity / job->cluster_size), 1);
-    dbi = bdrv_dirty_iter_new(job->sync_bitmap);
+    dbi = bdrv_dirty_iter_new(job->sync_bitmap, 0);

    /* Find the next dirty sector(s) */
-    while ((offset = bdrv_dirty_iter_next(dbi)) >= 0) {
+    while ((offset = bdrv_dirty_iter_next(dbi) * BDRV_SECTOR_SIZE) >= 0) {
        cluster = offset / job->cluster_size;

        /* Fake progress updates for any clusters we skipped */
@@ -403,7 +403,8 @@ static int coroutine_fn backup_run_incremental(BackupBlockJob *job)
        /* If the bitmap granularity is smaller than the backup granularity,
         * we need to advance the iterator pointer to the next cluster. */
        if (granularity < job->cluster_size) {
-            bdrv_set_dirty_iter(dbi, cluster * job->cluster_size);
+            bdrv_set_dirty_iter(dbi,
+                                cluster * job->cluster_size / BDRV_SECTOR_SIZE);
        }

        last_cluster = cluster - 1;
@@ -595,7 +596,7 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
        error_setg(errp,
                   "a sync_bitmap was provided to backup_run, "
                   "but received an incompatible sync_mode (%s)",
-                   MirrorSyncMode_str(sync_mode));
+                   MirrorSyncMode_lookup[sync_mode]);
        return NULL;
    }

--- a/block/blkdebug.c
+++ b/block/blkdebug.c
@@ -149,6 +149,20 @@ static QemuOptsList *config_groups[] = {
    NULL
 };

+static int get_event_by_name(const char *name, BlkdebugEvent *event)
+{
+    int i;
+
+    for (i = 0; i < BLKDBG__MAX; i++) {
+        if (!strcmp(BlkdebugEvent_lookup[i], name)) {
+            *event = i;
+            return 0;
+        }
+    }
+
+    return -1;
+}
+
 struct add_rule_data {
    BDRVBlkdebugState *s;
    int action;
@@ -159,7 +173,7 @@ static int add_rule(void *opaque, QemuOpts *opts, Error **errp)
    struct add_rule_data *d = opaque;
    BDRVBlkdebugState *s = d->s;
    const char* event_name;
-    int event;
+    BlkdebugEvent event;
    struct BlkdebugRule *rule;
    int64_t sector;

@@ -168,9 +182,8 @@ static int add_rule(void *opaque, QemuOpts *opts, Error **errp)
    if (!event_name) {
        error_setg(errp, "Missing event name for rule");
        return -1;
-    }
-    event = qapi_enum_parse(&BlkdebugEvent_lookup, event_name, -1, errp);
-    if (event < 0) {
+    } else if (get_event_by_name(event_name, &event) < 0) {
+        error_setg(errp, "Invalid event name \"%s\"", event_name);
        return -1;
    }

@@ -244,6 +257,7 @@ static int read_config(BDRVBlkdebugState *s, const char *filename,
        ret = qemu_config_parse(f, config_groups, filename);
        if (ret < 0) {
            error_setg(errp, "Could not parse blkdebug config file");
+            ret = -EINVAL;
            goto fail;
        }
    }
@@ -631,11 +645,10 @@ static int64_t coroutine_fn blkdebug_co_get_block_status(
    BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum,
    BlockDriverState **file)
 {
-    assert(QEMU_IS_ALIGNED(sector_num | nb_sectors,
-                           DIV_ROUND_UP(bs->bl.request_alignment,
-                                        BDRV_SECTOR_SIZE)));
-    return bdrv_co_get_block_status_from_file(bs, sector_num, nb_sectors,
-                                              pnum, file);
+    *pnum = nb_sectors;
+    *file = bs->file->bs;
+    return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID |
+        (sector_num << BDRV_SECTOR_BITS);
 }

 static void blkdebug_close(BlockDriverState *bs)
@@ -730,13 +743,13 @@ static int blkdebug_debug_breakpoint(BlockDriverState *bs, const char *event,
 {
    BDRVBlkdebugState *s = bs->opaque;
    struct BlkdebugRule *rule;
-    int blkdebug_event;
+    BlkdebugEvent blkdebug_event;

-    blkdebug_event = qapi_enum_parse(&BlkdebugEvent_lookup, event, -1, NULL);
-    if (blkdebug_event < 0) {
+    if (get_event_by_name(event, &blkdebug_event) < 0) {
        return -ENOENT;
    }

+
    rule = g_malloc(sizeof(*rule));
    *rule = (struct BlkdebugRule) {
        .event  = blkdebug_event,
@@ -808,6 +821,12 @@ static int64_t blkdebug_getlength(BlockDriverState *bs)
    return bdrv_getlength(bs->file->bs);
 }

+static int blkdebug_truncate(BlockDriverState *bs, int64_t offset,
+                             PreallocMode prealloc, Error **errp)
+{
+    return bdrv_truncate(bs->file, offset, prealloc, errp);
+}
+
 static void blkdebug_refresh_filename(BlockDriverState *bs, QDict *options)
 {
    BDRVBlkdebugState *s = bs->opaque;
@@ -890,7 +909,6 @@ static BlockDriver bdrv_blkdebug = {
    .format_name            = "blkdebug",
    .protocol_name          = "blkdebug",
    .instance_size          = sizeof(BDRVBlkdebugState),
-    .is_filter              = true,

    .bdrv_parse_filename    = blkdebug_parse_filename,
    .bdrv_file_open         = blkdebug_open,
@@ -899,6 +917,7 @@ static BlockDriver bdrv_blkdebug = {
    .bdrv_child_perm        = bdrv_filter_default_perms,

    .bdrv_getlength         = blkdebug_getlength,
+    .bdrv_truncate          = blkdebug_truncate,
    .bdrv_refresh_filename  = blkdebug_refresh_filename,
    .bdrv_refresh_limits    = blkdebug_refresh_limits,

--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -273,6 +273,9 @@ BlockBackend *blk_new(uint64_t perm, uint64_t shared_perm)
    blk->shared_perm = shared_perm;
    blk_set_enable_write_cache(blk, true);

+    qemu_co_mutex_init(&blk->public.throttled_reqs_lock);
+    qemu_co_queue_init(&blk->public.throttled_reqs[0]);
+    qemu_co_queue_init(&blk->public.throttled_reqs[1]);
    block_acct_init(&blk->stats);

    notifier_list_init(&blk->remove_bs_notifiers);
@@ -299,7 +302,7 @@ BlockBackend *blk_new_open(const char *filename, const char *reference,
 {
    BlockBackend *blk;
    BlockDriverState *bs;
-    uint64_t perm = 0;
+    uint64_t perm;

    /* blk_new_open() is mainly used in .bdrv_create implementations and the
     * tools where sharing isn't a concern because the BDS stays private, so we
@@ -309,11 +312,9 @@ BlockBackend *blk_new_open(const char *filename, const char *reference,
     * caller of blk_new_open() doesn't make use of the permissions, but they
     * shouldn't hurt either. We can still share everything here because the
     * guest devices will add their own blockers if they can't share. */
-    if ((flags & BDRV_O_NO_IO) == 0) {
-        perm |= BLK_PERM_CONSISTENT_READ;
-        if (flags & BDRV_O_RDWR) {
-            perm |= BLK_PERM_WRITE;
-        }
+    perm = BLK_PERM_CONSISTENT_READ;
+    if (flags & BDRV_O_RDWR) {
+        perm |= BLK_PERM_WRITE;
    }
    if (flags & BDRV_O_RESIZE) {
        perm |= BLK_PERM_RESIZE;
@@ -342,7 +343,7 @@ static void blk_delete(BlockBackend *blk)
    assert(!blk->refcnt);
    assert(!blk->name);
    assert(!blk->dev);
-    if (blk->public.throttle_group_member.throttle_state) {
+    if (blk->public.throttle_state) {
        blk_io_limits_disable(blk);
    }
    if (blk->root) {
@@ -444,37 +445,21 @@ BlockBackend *blk_next(BlockBackend *blk)
 * the monitor or attached to a BlockBackend */
 BlockDriverState *bdrv_next(BdrvNextIterator *it)
 {
-    BlockDriverState *bs, *old_bs;
-
-    /* Must be called from the main loop */
-    assert(qemu_get_current_aio_context() == qemu_get_aio_context());
+    BlockDriverState *bs;

    /* First, return all root nodes of BlockBackends. In order to avoid
     * returning a BDS twice when multiple BBs refer to it, we only return it
     * if the BB is the first one in the parent list of the BDS. */
    if (it->phase == BDRV_NEXT_BACKEND_ROOTS) {
-        BlockBackend *old_blk = it->blk;
-
-        old_bs = old_blk ? blk_bs(old_blk) : NULL;
-
        do {
            it->blk = blk_all_next(it->blk);
            bs = it->blk ? blk_bs(it->blk) : NULL;
        } while (it->blk && (bs == NULL || bdrv_first_blk(bs) != it->blk));

-        if (it->blk) {
-            blk_ref(it->blk);
-        }
-        blk_unref(old_blk);
-
        if (bs) {
-            bdrv_ref(bs);
-            bdrv_unref(old_bs);
            return bs;
        }
        it->phase = BDRV_NEXT_MONITOR_OWNED;
-    } else {
-        old_bs = it->bs;
    }

    /* Then return the monitor-owned BDSes without a BB attached. Ignore all
@@ -485,46 +470,18 @@ BlockDriverState *bdrv_next(BdrvNextIterator *it)
        bs = it->bs;
    } while (bs && bdrv_has_blk(bs));

-    if (bs) {
-        bdrv_ref(bs);
-    }
-    bdrv_unref(old_bs);
-
    return bs;
 }

-static void bdrv_next_reset(BdrvNextIterator *it)
-{
-    *it = (BdrvNextIterator) {
-        .phase = BDRV_NEXT_BACKEND_ROOTS,
-    };
-}
-
 BlockDriverState *bdrv_first(BdrvNextIterator *it)
 {
-    bdrv_next_reset(it);
+    *it = (BdrvNextIterator) {
+        .phase = BDRV_NEXT_BACKEND_ROOTS,
+    };
+
    return bdrv_next(it);
 }

-/* Must be called when aborting a bdrv_next() iteration before
- * bdrv_next() returns NULL */
-void bdrv_next_cleanup(BdrvNextIterator *it)
-{
-    /* Must be called from the main loop */
-    assert(qemu_get_current_aio_context() == qemu_get_aio_context());
-
-    if (it->phase == BDRV_NEXT_BACKEND_ROOTS) {
-        if (it->blk) {
-            bdrv_unref(blk_bs(it->blk));
-            blk_unref(it->blk);
-        }
-    } else {
-        bdrv_unref(it->bs);
-    }
-
-    bdrv_next_reset(it);
-}
-
 /*
 * Add a BlockBackend into the list of backends referenced by the monitor, with
 * the given @name acting as the handle for the monitor.
@@ -701,16 +658,9 @@ BlockBackend *blk_by_public(BlockBackendPublic *public)
 */
 void blk_remove_bs(BlockBackend *blk)
 {
-    ThrottleGroupMember *tgm = &blk->public.throttle_group_member;
-    BlockDriverState *bs;
-
    notifier_list_notify(&blk->remove_bs_notifiers, blk);
-    if (tgm->throttle_state) {
-        bs = blk_bs(blk);
-        bdrv_drained_begin(bs);
-        throttle_group_detach_aio_context(tgm);
-        throttle_group_attach_aio_context(tgm, qemu_get_aio_context());
-        bdrv_drained_end(bs);
+    if (blk->public.throttle_state) {
+        throttle_timers_detach_aio_context(&blk->public.throttle_timers);
    }

    blk_update_root_state(blk);
@@ -724,7 +674,6 @@ void blk_remove_bs(BlockBackend *blk)
 */
 int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp)
 {
-    ThrottleGroupMember *tgm = &blk->public.throttle_group_member;
    blk->root = bdrv_root_attach_child(bs, "root", &child_root,
                                       blk->perm, blk->shared_perm, blk, errp);
    if (blk->root == NULL) {
@@ -733,9 +682,9 @@ int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp)
    bdrv_ref(bs);

    notifier_list_notify(&blk->insert_bs_notifiers, blk);
-    if (tgm->throttle_state) {
-        throttle_group_detach_aio_context(tgm);
-        throttle_group_attach_aio_context(tgm, bdrv_get_aio_context(bs));
+    if (blk->public.throttle_state) {
+        throttle_timers_attach_aio_context(
+            &blk->public.throttle_timers, bdrv_get_aio_context(bs));
    }

    return 0;
@@ -1097,9 +1046,8 @@ int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset,
    bdrv_inc_in_flight(bs);

    /* throttling disk I/O */
-    if (blk->public.throttle_group_member.throttle_state) {
-        throttle_group_co_io_limits_intercept(&blk->public.throttle_group_member,
-                bytes, false);
+    if (blk->public.throttle_state) {
+        throttle_group_co_io_limits_intercept(blk, bytes, false);
    }

    ret = bdrv_co_preadv(blk->root, offset, bytes, qiov, flags);
@@ -1122,10 +1070,10 @@ int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset,
    }

    bdrv_inc_in_flight(bs);
+
    /* throttling disk I/O */
-    if (blk->public.throttle_group_member.throttle_state) {
-        throttle_group_co_io_limits_intercept(&blk->public.throttle_group_member,
-                bytes, true);
+    if (blk->public.throttle_state) {
+        throttle_group_co_io_limits_intercept(blk, bytes, true);
    }

    if (!blk->enable_write_cache) {
@@ -1794,16 +1742,16 @@ static AioContext *blk_aiocb_get_aio_context(BlockAIOCB *acb)
 void blk_set_aio_context(BlockBackend *blk, AioContext *new_context)
 {
    BlockDriverState *bs = blk_bs(blk);
-    ThrottleGroupMember *tgm = &blk->public.throttle_group_member;

    if (bs) {
-        if (tgm->throttle_state) {
-            bdrv_drained_begin(bs);
-            throttle_group_detach_aio_context(tgm);
-            throttle_group_attach_aio_context(tgm, new_context);
-            bdrv_drained_end(bs);
+        if (blk->public.throttle_state) {
+            throttle_timers_detach_aio_context(&blk->public.throttle_timers);
        }
        bdrv_set_aio_context(bs, new_context);
+        if (blk->public.throttle_state) {
+            throttle_timers_attach_aio_context(&blk->public.throttle_timers,
+                                               new_context);
+        }
    }
 }

@@ -2021,41 +1969,33 @@ int blk_commit_all(void)
 /* throttling disk I/O limits */
 void blk_set_io_limits(BlockBackend *blk, ThrottleConfig *cfg)
 {
-    throttle_group_config(&blk->public.throttle_group_member, cfg);
+    throttle_group_config(blk, cfg);
 }

 void blk_io_limits_disable(BlockBackend *blk)
 {
-    BlockDriverState *bs = blk_bs(blk);
-    ThrottleGroupMember *tgm = &blk->public.throttle_group_member;
-    assert(tgm->throttle_state);
-    if (bs) {
-        bdrv_drained_begin(bs);
-    }
-    throttle_group_unregister_tgm(tgm);
-    if (bs) {
-        bdrv_drained_end(bs);
-    }
+    assert(blk->public.throttle_state);
+    bdrv_drained_begin(blk_bs(blk));
+    throttle_group_unregister_blk(blk);
+    bdrv_drained_end(blk_bs(blk));
 }

 /* should be called before blk_set_io_limits if a limit is set */
 void blk_io_limits_enable(BlockBackend *blk, const char *group)
 {
-    assert(!blk->public.throttle_group_member.throttle_state);
-    throttle_group_register_tgm(&blk->public.throttle_group_member,
-                                group, blk_get_aio_context(blk));
+    assert(!blk->public.throttle_state);
+    throttle_group_register_blk(blk, group);
 }

 void blk_io_limits_update_group(BlockBackend *blk, const char *group)
 {
    /* this BB is not part of any group */
-    if (!blk->public.throttle_group_member.throttle_state) {
+    if (!blk->public.throttle_state) {
        return;
    }

    /* this BB is a part of the same group than the one we want */
-    if (!g_strcmp0(throttle_group_get_name(&blk->public.throttle_group_member),
-                group)) {
+    if (!g_strcmp0(throttle_group_get_name(blk), group)) {
        return;
    }

@@ -2077,8 +2017,8 @@ static void blk_root_drained_begin(BdrvChild *child)
    /* Note that blk->root may not be accessible here yet if we are just
     * attaching to a BlockDriverState that is drained. Use child instead. */

-    if (atomic_fetch_inc(&blk->public.throttle_group_member.io_limits_disabled) == 0) {
-        throttle_group_restart_tgm(&blk->public.throttle_group_member);
+    if (atomic_fetch_inc(&blk->public.io_limits_disabled) == 0) {
+        throttle_group_restart_blk(blk);
    }
 }

@@ -2087,8 +2027,8 @@ static void blk_root_drained_end(BdrvChild *child)
    BlockBackend *blk = child->opaque;
    assert(blk->quiesce_counter);

-    assert(blk->public.throttle_group_member.io_limits_disabled);
-    atomic_dec(&blk->public.throttle_group_member.io_limits_disabled);
+    assert(blk->public.io_limits_disabled);
+    atomic_dec(&blk->public.io_limits_disabled);

    if (--blk->quiesce_counter == 0) {
        if (blk->dev_ops && blk->dev_ops->drained_end) {
--- a/block/bochs.c
+++ b/block/bochs.c
@@ -28,7 +28,6 @@
 #include "block/block_int.h"
 #include "qemu/module.h"
 #include "qemu/bswap.h"
-#include "qemu/error-report.h"

 /**************************************************************/

@@ -111,15 +110,9 @@ static int bochs_open(BlockDriverState *bs, QDict *options, int flags,
        return -EINVAL;
    }

-    if (!bdrv_is_read_only(bs)) {
-        error_report("Opening bochs images without an explicit read-only=on "
-                     "option is deprecated. Future versions will refuse to "
-                     "open the image instead of automatically marking the "
-                     "image read-only.");
-        ret = bdrv_set_read_only(bs, true, errp); /* no write support yet */
-        if (ret < 0) {
-            return ret;
-        }
+    ret = bdrv_set_read_only(bs, true, errp); /* no write support yet */
+    if (ret < 0) {
+        return ret;
    }

    ret = bdrv_pread(bs->file, 0, &bochs, sizeof(bochs));
--- a/block/cloop.c
+++ b/block/cloop.c
@@ -23,7 +23,6 @@
 */
 #include "qemu/osdep.h"
 #include "qapi/error.h"
-#include "qemu/error-report.h"
 #include "qemu-common.h"
 #include "block/block_int.h"
 #include "qemu/module.h"
@@ -73,15 +72,9 @@ static int cloop_open(BlockDriverState *bs, QDict *options, int flags,
        return -EINVAL;
    }

-    if (!bdrv_is_read_only(bs)) {
-        error_report("Opening cloop images without an explicit read-only=on "
-                     "option is deprecated. Future versions will refuse to "
-                     "open the image instead of automatically marking the "
-                     "image read-only.");
-        ret = bdrv_set_read_only(bs, true, errp);
-        if (ret < 0) {
-            return ret;
-        }
+    ret = bdrv_set_read_only(bs, true, errp);
+    if (ret < 0) {
+        return ret;
    }

    /* read header */
--- a/block/commit.c
+++ b/block/commit.c
@@ -36,11 +36,13 @@ enum {
 typedef struct CommitBlockJob {
    BlockJob common;
    RateLimit limit;
+    BlockDriverState *active;
    BlockDriverState *commit_top_bs;
    BlockBackend *top;
    BlockBackend *base;
    BlockdevOnError on_error;
    int base_flags;
+    int orig_overlay_flags;
    char *backing_file_str;
 } CommitBlockJob;

@@ -79,15 +81,18 @@ static void commit_complete(BlockJob *job, void *opaque)
 {
    CommitBlockJob *s = container_of(job, CommitBlockJob, common);
    CommitCompleteData *data = opaque;
+    BlockDriverState *active = s->active;
    BlockDriverState *top = blk_bs(s->top);
    BlockDriverState *base = blk_bs(s->base);
-    BlockDriverState *commit_top_bs = s->commit_top_bs;
+    BlockDriverState *overlay_bs = bdrv_find_overlay(active, s->commit_top_bs);
    int ret = data->ret;
    bool remove_commit_top_bs = false;

-    /* Make sure commit_top_bs and top stay around until bdrv_replace_node() */
+    /* Make sure overlay_bs and top stay around until bdrv_set_backing_hd() */
    bdrv_ref(top);
-    bdrv_ref(commit_top_bs);
+    if (overlay_bs) {
+        bdrv_ref(overlay_bs);
+    }

    /* Remove base node parent that still uses BLK_PERM_WRITE/RESIZE before
     * the normal backing chain can be restored. */
@@ -95,9 +100,9 @@ static void commit_complete(BlockJob *job, void *opaque)

    if (!block_job_is_cancelled(&s->common) && ret == 0) {
        /* success */
-        ret = bdrv_drop_intermediate(s->commit_top_bs, base,
+        ret = bdrv_drop_intermediate(active, s->commit_top_bs, base,
                                     s->backing_file_str);
-    } else {
+    } else if (overlay_bs) {
        /* XXX Can (or should) we somehow keep 'consistent read' blocked even
         * after the failed/cancelled commit job is gone? If we already wrote
         * something to base, the intermediate images aren't valid any more. */
@@ -110,6 +115,9 @@ static void commit_complete(BlockJob *job, void *opaque)
    if (s->base_flags != bdrv_get_flags(base)) {
        bdrv_reopen(base, s->base_flags, NULL);
    }
+    if (overlay_bs && s->orig_overlay_flags != bdrv_get_flags(overlay_bs)) {
+        bdrv_reopen(overlay_bs, s->orig_overlay_flags, NULL);
+    }
    g_free(s->backing_file_str);
    blk_unref(s->top);

@@ -126,13 +134,10 @@ static void commit_complete(BlockJob *job, void *opaque)
     * filter driver from the backing chain. Do this as the final step so that
     * the 'consistent read' permission can be granted.  */
    if (remove_commit_top_bs) {
-        bdrv_child_try_set_perm(commit_top_bs->backing, 0, BLK_PERM_ALL,
-                                &error_abort);
-        bdrv_replace_node(commit_top_bs, backing_bs(commit_top_bs),
-                          &error_abort);
+        bdrv_set_backing_hd(overlay_bs, top, &error_abort);
    }

-    bdrv_unref(commit_top_bs);
+    bdrv_unref(overlay_bs);
    bdrv_unref(top);
 }

@@ -174,7 +179,7 @@ static void coroutine_fn commit_run(void *opaque)
        /* Note that even when no rate limit is applied we need to yield
         * with no pending I/O here so that bdrv_drain_all() returns.
         */
-        block_job_sleep_ns(&s->common, delay_ns);
+        block_job_sleep_ns(&s->common, QEMU_CLOCK_REALTIME, delay_ns);
        if (block_job_is_cancelled(&s->common)) {
            break;
        }
@@ -239,6 +244,16 @@ static int coroutine_fn bdrv_commit_top_preadv(BlockDriverState *bs,
    return bdrv_co_preadv(bs->backing, offset, bytes, qiov, flags);
 }

+static int64_t coroutine_fn bdrv_commit_top_get_block_status(
+    BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum,
+    BlockDriverState **file)
+{
+    *pnum = nb_sectors;
+    *file = bs->backing->bs;
+    return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID |
+           (sector_num << BDRV_SECTOR_BITS);
+}
+
 static void bdrv_commit_top_refresh_filename(BlockDriverState *bs, QDict *opts)
 {
    bdrv_refresh_filename(bs->backing->bs);
@@ -252,7 +267,6 @@ static void bdrv_commit_top_close(BlockDriverState *bs)

 static void bdrv_commit_top_child_perm(BlockDriverState *bs, BdrvChild *c,
                                       const BdrvChildRole *role,
-                                       BlockReopenQueue *reopen_queue,
                                       uint64_t perm, uint64_t shared,
                                       uint64_t *nperm, uint64_t *nshared)
 {
@@ -265,7 +279,7 @@ static void bdrv_commit_top_child_perm(BlockDriverState *bs, BdrvChild *c,
 static BlockDriver bdrv_commit_top = {
    .format_name                = "commit_top",
    .bdrv_co_preadv             = bdrv_commit_top_preadv,
-    .bdrv_co_get_block_status   = bdrv_co_get_block_status_from_backing,
+    .bdrv_co_get_block_status   = bdrv_commit_top_get_block_status,
    .bdrv_refresh_filename      = bdrv_commit_top_refresh_filename,
    .bdrv_close                 = bdrv_commit_top_close,
    .bdrv_child_perm            = bdrv_commit_top_child_perm,
@@ -278,8 +292,10 @@ void commit_start(const char *job_id, BlockDriverState *bs,
 {
    CommitBlockJob *s;
    BlockReopenQueue *reopen_queue = NULL;
+    int orig_overlay_flags;
    int orig_base_flags;
    BlockDriverState *iter;
+    BlockDriverState *overlay_bs;
    BlockDriverState *commit_top_bs = NULL;
    Error *local_err = NULL;
    int ret;
@@ -290,19 +306,31 @@ void commit_start(const char *job_id, BlockDriverState *bs,
        return;
    }

+    overlay_bs = bdrv_find_overlay(bs, top);
+
+    if (overlay_bs == NULL) {
+        error_setg(errp, "Could not find overlay image for %s:", top->filename);
+        return;
+    }
+
    s = block_job_create(job_id, &commit_job_driver, bs, 0, BLK_PERM_ALL,
                         speed, BLOCK_JOB_DEFAULT, NULL, NULL, errp);
    if (!s) {
        return;
    }

-    /* convert base to r/w, if necessary */
-    orig_base_flags = bdrv_get_flags(base);
+    orig_base_flags    = bdrv_get_flags(base);
+    orig_overlay_flags = bdrv_get_flags(overlay_bs);
+
+    /* convert base & overlay_bs to r/w, if necessary */
    if (!(orig_base_flags & BDRV_O_RDWR)) {
        reopen_queue = bdrv_reopen_queue(reopen_queue, base, NULL,
                                         orig_base_flags | BDRV_O_RDWR);
    }
-
+    if (!(orig_overlay_flags & BDRV_O_RDWR)) {
+        reopen_queue = bdrv_reopen_queue(reopen_queue, overlay_bs, NULL,
+                                         orig_overlay_flags | BDRV_O_RDWR);
+    }
    if (reopen_queue) {
        bdrv_reopen_multiple(bdrv_get_aio_context(bs), reopen_queue, &local_err);
        if (local_err != NULL) {
@@ -331,7 +359,7 @@ void commit_start(const char *job_id, BlockDriverState *bs,
        error_propagate(errp, local_err);
        goto fail;
    }
-    bdrv_replace_node(top, commit_top_bs, &local_err);
+    bdrv_set_backing_hd(overlay_bs, commit_top_bs, &local_err);
    if (local_err) {
        bdrv_unref(commit_top_bs);
        commit_top_bs = NULL;
@@ -363,6 +391,14 @@ void commit_start(const char *job_id, BlockDriverState *bs,
        goto fail;
    }

+    /* overlay_bs must be blocked because it needs to be modified to
+     * update the backing image string. */
+    ret = block_job_add_bdrv(&s->common, "overlay of top", overlay_bs,
+                             BLK_PERM_GRAPH_MOD, BLK_PERM_ALL, errp);
+    if (ret < 0) {
+        goto fail;
+    }
+
    s->base = blk_new(BLK_PERM_CONSISTENT_READ
                      | BLK_PERM_WRITE
                      | BLK_PERM_RESIZE,
@@ -381,8 +417,13 @@ void commit_start(const char *job_id, BlockDriverState *bs,
        goto fail;
    }

-    s->base_flags = orig_base_flags;
+    s->active = bs;
+
+    s->base_flags          = orig_base_flags;
+    s->orig_overlay_flags  = orig_overlay_flags;
+
    s->backing_file_str = g_strdup(backing_file_str);
+
    s->on_error = on_error;

    trace_commit_start(bs, base, top, s);
@@ -397,7 +438,7 @@ fail:
        blk_unref(s->top);
    }
    if (commit_top_bs) {
-        bdrv_replace_node(commit_top_bs, top, &error_abort);
+        bdrv_set_backing_hd(overlay_bs, top, &error_abort);
    }
    block_job_early_fail(&s->common);
 }
--- a/block/crypto.c
+++ b/block/crypto.c
@@ -279,9 +279,6 @@ static int block_crypto_open_generic(QCryptoBlockFormat format,
        return -EINVAL;
    }

-    bs->supported_write_flags = BDRV_REQ_FUA &
-        bs->file->bs->supported_write_flags;
-
    opts = qemu_opts_create(opts_spec, NULL, 0, &error_abort);
    qemu_opts_absorb_qdict(opts, options, &local_err);
    if (local_err) {
@@ -367,9 +364,8 @@ static int block_crypto_truncate(BlockDriverState *bs, int64_t offset,
                                 PreallocMode prealloc, Error **errp)
 {
    BlockCrypto *crypto = bs->opaque;
-    uint64_t payload_offset =
+    size_t payload_offset =
        qcrypto_block_get_payload_offset(crypto->block);
-    assert(payload_offset < (INT64_MAX - offset));

    offset += payload_offset;

@@ -383,65 +379,66 @@ static void block_crypto_close(BlockDriverState *bs)
 }


-/*
- * 1 MB bounce buffer gives good performance / memory tradeoff
- * when using cache=none|directsync.
- */
-#define BLOCK_CRYPTO_MAX_IO_SIZE (1024 * 1024)
+#define BLOCK_CRYPTO_MAX_SECTORS 32

 static coroutine_fn int
-block_crypto_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
-                       QEMUIOVector *qiov, int flags)
+block_crypto_co_readv(BlockDriverState *bs, int64_t sector_num,
+                      int remaining_sectors, QEMUIOVector *qiov)
 {
    BlockCrypto *crypto = bs->opaque;
-    uint64_t cur_bytes; /* number of bytes in current iteration */
+    int cur_nr_sectors; /* number of sectors in current iteration */
    uint64_t bytes_done = 0;
    uint8_t *cipher_data = NULL;
    QEMUIOVector hd_qiov;
    int ret = 0;
-    uint64_t sector_size = qcrypto_block_get_sector_size(crypto->block);
-    uint64_t payload_offset = qcrypto_block_get_payload_offset(crypto->block);
-
-    assert(!flags);
-    assert(payload_offset < INT64_MAX);
-    assert(QEMU_IS_ALIGNED(offset, sector_size));
-    assert(QEMU_IS_ALIGNED(bytes, sector_size));
+    size_t payload_offset =
+        qcrypto_block_get_payload_offset(crypto->block) / 512;

    qemu_iovec_init(&hd_qiov, qiov->niov);

-    /* Bounce buffer because we don't wish to expose cipher text
-     * in qiov which points to guest memory.
+    /* Bounce buffer so we have a linear mem region for
+     * entire sector. XXX optimize so we avoid bounce
+     * buffer in case that qiov->niov == 1
     */
    cipher_data =
-        qemu_try_blockalign(bs->file->bs, MIN(BLOCK_CRYPTO_MAX_IO_SIZE,
+        qemu_try_blockalign(bs->file->bs, MIN(BLOCK_CRYPTO_MAX_SECTORS * 512,
                                              qiov->size));
    if (cipher_data == NULL) {
        ret = -ENOMEM;
        goto cleanup;
    }

-    while (bytes) {
-        cur_bytes = MIN(bytes, BLOCK_CRYPTO_MAX_IO_SIZE);
+    while (remaining_sectors) {
+        cur_nr_sectors = remaining_sectors;
+
+        if (cur_nr_sectors > BLOCK_CRYPTO_MAX_SECTORS) {
+            cur_nr_sectors = BLOCK_CRYPTO_MAX_SECTORS;
+        }

        qemu_iovec_reset(&hd_qiov);
-        qemu_iovec_add(&hd_qiov, cipher_data, cur_bytes);
+        qemu_iovec_add(&hd_qiov, cipher_data, cur_nr_sectors * 512);

-        ret = bdrv_co_preadv(bs->file, payload_offset + offset + bytes_done,
-                             cur_bytes, &hd_qiov, 0);
+        ret = bdrv_co_readv(bs->file,
+                            payload_offset + sector_num,
+                            cur_nr_sectors, &hd_qiov);
        if (ret < 0) {
            goto cleanup;
        }

-        if (qcrypto_block_decrypt(crypto->block, offset + bytes_done,
-                                  cipher_data, cur_bytes, NULL) < 0) {
+        if (qcrypto_block_decrypt(crypto->block,
+                                  sector_num,
+                                  cipher_data, cur_nr_sectors * 512,
+                                  NULL) < 0) {
            ret = -EIO;
            goto cleanup;
        }

-        qemu_iovec_from_buf(qiov, bytes_done, cipher_data, cur_bytes);
+        qemu_iovec_from_buf(qiov, bytes_done,
+                            cipher_data, cur_nr_sectors * 512);

-        bytes -= cur_bytes;
-        bytes_done += cur_bytes;
+        remaining_sectors -= cur_nr_sectors;
+        sector_num += cur_nr_sectors;
+        bytes_done += cur_nr_sectors * 512;
    }

 cleanup:
@@ -453,58 +450,63 @@ block_crypto_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,


 static coroutine_fn int
-block_crypto_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
-                        QEMUIOVector *qiov, int flags)
+block_crypto_co_writev(BlockDriverState *bs, int64_t sector_num,
+                       int remaining_sectors, QEMUIOVector *qiov)
 {
    BlockCrypto *crypto = bs->opaque;
-    uint64_t cur_bytes; /* number of bytes in current iteration */
+    int cur_nr_sectors; /* number of sectors in current iteration */
    uint64_t bytes_done = 0;
    uint8_t *cipher_data = NULL;
    QEMUIOVector hd_qiov;
    int ret = 0;
-    uint64_t sector_size = qcrypto_block_get_sector_size(crypto->block);
-    uint64_t payload_offset = qcrypto_block_get_payload_offset(crypto->block);
-
-    assert(!(flags & ~BDRV_REQ_FUA));
-    assert(payload_offset < INT64_MAX);
-    assert(QEMU_IS_ALIGNED(offset, sector_size));
-    assert(QEMU_IS_ALIGNED(bytes, sector_size));
+    size_t payload_offset =
+        qcrypto_block_get_payload_offset(crypto->block) / 512;

    qemu_iovec_init(&hd_qiov, qiov->niov);

-    /* Bounce buffer because we're not permitted to touch
-     * contents of qiov - it points to guest memory.
+    /* Bounce buffer so we have a linear mem region for
+     * entire sector. XXX optimize so we avoid bounce
+     * buffer in case that qiov->niov == 1
     */
    cipher_data =
-        qemu_try_blockalign(bs->file->bs, MIN(BLOCK_CRYPTO_MAX_IO_SIZE,
+        qemu_try_blockalign(bs->file->bs, MIN(BLOCK_CRYPTO_MAX_SECTORS * 512,
                                              qiov->size));
    if (cipher_data == NULL) {
        ret = -ENOMEM;
        goto cleanup;
    }

-    while (bytes) {
-        cur_bytes = MIN(bytes, BLOCK_CRYPTO_MAX_IO_SIZE);
+    while (remaining_sectors) {
+        cur_nr_sectors = remaining_sectors;

-        qemu_iovec_to_buf(qiov, bytes_done, cipher_data, cur_bytes);
+        if (cur_nr_sectors > BLOCK_CRYPTO_MAX_SECTORS) {
+            cur_nr_sectors = BLOCK_CRYPTO_MAX_SECTORS;
+        }

-        if (qcrypto_block_encrypt(crypto->block, offset + bytes_done,
-                                  cipher_data, cur_bytes, NULL) < 0) {
+        qemu_iovec_to_buf(qiov, bytes_done,
+                          cipher_data, cur_nr_sectors * 512);
+
+        if (qcrypto_block_encrypt(crypto->block,
+                                  sector_num,
+                                  cipher_data, cur_nr_sectors * 512,
+                                  NULL) < 0) {
            ret = -EIO;
            goto cleanup;
        }

        qemu_iovec_reset(&hd_qiov);
-        qemu_iovec_add(&hd_qiov, cipher_data, cur_bytes);
+        qemu_iovec_add(&hd_qiov, cipher_data, cur_nr_sectors * 512);

-        ret = bdrv_co_pwritev(bs->file, payload_offset + offset + bytes_done,
-                              cur_bytes, &hd_qiov, flags);
+        ret = bdrv_co_writev(bs->file,
+                             payload_offset + sector_num,
+                             cur_nr_sectors, &hd_qiov);
        if (ret < 0) {
            goto cleanup;
        }

-        bytes -= cur_bytes;
-        bytes_done += cur_bytes;
+        remaining_sectors -= cur_nr_sectors;
+        sector_num += cur_nr_sectors;
+        bytes_done += cur_nr_sectors * 512;
    }

 cleanup:
@@ -514,22 +516,13 @@ block_crypto_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
    return ret;
 }

-static void block_crypto_refresh_limits(BlockDriverState *bs, Error **errp)
-{
-    BlockCrypto *crypto = bs->opaque;
-    uint64_t sector_size = qcrypto_block_get_sector_size(crypto->block);
-    bs->bl.request_alignment = sector_size; /* No sub-sector I/O */
-}
-

 static int64_t block_crypto_getlength(BlockDriverState *bs)
 {
    BlockCrypto *crypto = bs->opaque;
    int64_t len = bdrv_getlength(bs->file->bs);

-    uint64_t offset = qcrypto_block_get_payload_offset(crypto->block);
-    assert(offset < INT64_MAX);
-    assert(offset < len);
+    ssize_t offset = qcrypto_block_get_payload_offset(crypto->block);

    len -= offset;

@@ -620,9 +613,8 @@ BlockDriver bdrv_crypto_luks = {
    .bdrv_truncate      = block_crypto_truncate,
    .create_opts        = &block_crypto_create_opts_luks,

-    .bdrv_refresh_limits = block_crypto_refresh_limits,
-    .bdrv_co_preadv     = block_crypto_co_preadv,
-    .bdrv_co_pwritev    = block_crypto_co_pwritev,
+    .bdrv_co_readv      = block_crypto_co_readv,
+    .bdrv_co_writev     = block_crypto_co_writev,
    .bdrv_getlength     = block_crypto_getlength,
    .bdrv_get_info      = block_crypto_get_info_luks,
    .bdrv_get_specific_info = block_crypto_get_specific_info_luks,
--- a/block/dirty-bitmap.c
+++ b/block/dirty-bitmap.c
@@ -1,7 +1,7 @@
 /*
 * Block Dirty Bitmap
 *
- * Copyright (c) 2016-2017 Red Hat. Inc
+ * Copyright (c) 2016 Red Hat. Inc
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
@@ -38,11 +38,11 @@
 */
 struct BdrvDirtyBitmap {
    QemuMutex *mutex;
-    HBitmap *bitmap;            /* Dirty bitmap implementation */
+    HBitmap *bitmap;            /* Dirty sector bitmap implementation */
    HBitmap *meta;              /* Meta dirty bitmap */
    BdrvDirtyBitmap *successor; /* Anonymous child; implies frozen status */
    char *name;                 /* Optional non-empty unique ID */
-    int64_t size;               /* Size of the bitmap, in bytes */
+    int64_t size;               /* Size of the bitmap (Number of sectors) */
    bool disabled;              /* Bitmap is disabled. It ignores all writes to
                                   the device */
    int active_iterators;       /* How many iterators are active */
@@ -115,14 +115,17 @@ BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs,
 {
    int64_t bitmap_size;
    BdrvDirtyBitmap *bitmap;
+    uint32_t sector_granularity;

-    assert(is_power_of_2(granularity) && granularity >= BDRV_SECTOR_SIZE);
+    assert((granularity & (granularity - 1)) == 0);

    if (name && bdrv_find_dirty_bitmap(bs, name)) {
        error_setg(errp, "Bitmap already exists: %s", name);
        return NULL;
    }
-    bitmap_size = bdrv_getlength(bs);
+    sector_granularity = granularity >> BDRV_SECTOR_BITS;
+    assert(sector_granularity);
+    bitmap_size = bdrv_nb_sectors(bs);
    if (bitmap_size < 0) {
        error_setg_errno(errp, -bitmap_size, "could not get length of device");
        errno = -bitmap_size;
@@ -130,7 +133,7 @@ BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs,
    }
    bitmap = g_new0(BdrvDirtyBitmap, 1);
    bitmap->mutex = &bs->dirty_bitmap_mutex;
-    bitmap->bitmap = hbitmap_alloc(bitmap_size, ctz32(granularity));
+    bitmap->bitmap = hbitmap_alloc(bitmap_size, ctz32(sector_granularity));
    bitmap->size = bitmap_size;
    bitmap->name = g_strdup(name);
    bitmap->disabled = false;
@@ -170,6 +173,45 @@ void bdrv_release_meta_dirty_bitmap(BdrvDirtyBitmap *bitmap)
    qemu_mutex_unlock(bitmap->mutex);
 }

+int bdrv_dirty_bitmap_get_meta_locked(BlockDriverState *bs,
+                                      BdrvDirtyBitmap *bitmap, int64_t sector,
+                                      int nb_sectors)
+{
+    uint64_t i;
+    int sectors_per_bit = 1 << hbitmap_granularity(bitmap->meta);
+
+    /* To optimize: we can make hbitmap to internally check the range in a
+     * coarse level, or at least do it word by word. */
+    for (i = sector; i < sector + nb_sectors; i += sectors_per_bit) {
+        if (hbitmap_get(bitmap->meta, i)) {
+            return true;
+        }
+    }
+    return false;
+}
+
+int bdrv_dirty_bitmap_get_meta(BlockDriverState *bs,
+                               BdrvDirtyBitmap *bitmap, int64_t sector,
+                               int nb_sectors)
+{
+    bool dirty;
+
+    qemu_mutex_lock(bitmap->mutex);
+    dirty = bdrv_dirty_bitmap_get_meta_locked(bs, bitmap, sector, nb_sectors);
+    qemu_mutex_unlock(bitmap->mutex);
+
+    return dirty;
+}
+
+void bdrv_dirty_bitmap_reset_meta(BlockDriverState *bs,
+                                  BdrvDirtyBitmap *bitmap, int64_t sector,
+                                  int nb_sectors)
+{
+    qemu_mutex_lock(bitmap->mutex);
+    hbitmap_reset(bitmap->meta, sector, nb_sectors);
+    qemu_mutex_unlock(bitmap->mutex);
+}
+
 int64_t bdrv_dirty_bitmap_size(const BdrvDirtyBitmap *bitmap)
 {
    return bitmap->size;
@@ -299,16 +341,17 @@ BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs,
 * Truncates _all_ bitmaps attached to a BDS.
 * Called with BQL taken.
 */
-void bdrv_dirty_bitmap_truncate(BlockDriverState *bs, int64_t bytes)
+void bdrv_dirty_bitmap_truncate(BlockDriverState *bs)
 {
    BdrvDirtyBitmap *bitmap;
+    uint64_t size = bdrv_nb_sectors(bs);

    bdrv_dirty_bitmaps_lock(bs);
    QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
        assert(!bdrv_dirty_bitmap_frozen(bitmap));
        assert(!bitmap->active_iterators);
-        hbitmap_truncate(bitmap->bitmap, bytes);
-        bitmap->size = bytes;
+        hbitmap_truncate(bitmap->bitmap, size);
+        bitmap->size = size;
    }
    bdrv_dirty_bitmaps_unlock(bs);
 }
@@ -418,7 +461,7 @@ BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs)
    QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
        BlockDirtyInfo *info = g_new0(BlockDirtyInfo, 1);
        BlockDirtyInfoList *entry = g_new0(BlockDirtyInfoList, 1);
-        info->count = bdrv_get_dirty_count(bm);
+        info->count = bdrv_get_dirty_count(bm) << BDRV_SECTOR_BITS;
        info->granularity = bdrv_dirty_bitmap_granularity(bm);
        info->has_name = !!bm->name;
        info->name = g_strdup(bm->name);
@@ -433,13 +476,13 @@ BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs)
 }

 /* Called within bdrv_dirty_bitmap_lock..unlock */
-bool bdrv_get_dirty_locked(BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
-                           int64_t offset)
+int bdrv_get_dirty_locked(BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
+                          int64_t sector)
 {
    if (bitmap) {
-        return hbitmap_get(bitmap->bitmap, offset);
+        return hbitmap_get(bitmap->bitmap, sector);
    } else {
-        return false;
+        return 0;
    }
 }

@@ -465,13 +508,19 @@ uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState *bs)

 uint32_t bdrv_dirty_bitmap_granularity(const BdrvDirtyBitmap *bitmap)
 {
-    return 1U << hbitmap_granularity(bitmap->bitmap);
+    return BDRV_SECTOR_SIZE << hbitmap_granularity(bitmap->bitmap);
 }

-BdrvDirtyBitmapIter *bdrv_dirty_iter_new(BdrvDirtyBitmap *bitmap)
+uint32_t bdrv_dirty_bitmap_meta_granularity(BdrvDirtyBitmap *bitmap)
+{
+    return BDRV_SECTOR_SIZE << hbitmap_granularity(bitmap->meta);
+}
+
+BdrvDirtyBitmapIter *bdrv_dirty_iter_new(BdrvDirtyBitmap *bitmap,
+                                         uint64_t first_sector)
 {
    BdrvDirtyBitmapIter *iter = g_new(BdrvDirtyBitmapIter, 1);
-    hbitmap_iter_init(&iter->hbi, bitmap->bitmap, 0);
+    hbitmap_iter_init(&iter->hbi, bitmap->bitmap, first_sector);
    iter->bitmap = bitmap;
    bitmap->active_iterators++;
    return iter;
@@ -503,35 +552,35 @@ int64_t bdrv_dirty_iter_next(BdrvDirtyBitmapIter *iter)

 /* Called within bdrv_dirty_bitmap_lock..unlock */
 void bdrv_set_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap,
-                                  int64_t offset, int64_t bytes)
+                                  int64_t cur_sector, int64_t nr_sectors)
 {
    assert(bdrv_dirty_bitmap_enabled(bitmap));
    assert(!bdrv_dirty_bitmap_readonly(bitmap));
-    hbitmap_set(bitmap->bitmap, offset, bytes);
+    hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
 }

 void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap,
-                           int64_t offset, int64_t bytes)
+                           int64_t cur_sector, int64_t nr_sectors)
 {
    bdrv_dirty_bitmap_lock(bitmap);
-    bdrv_set_dirty_bitmap_locked(bitmap, offset, bytes);
+    bdrv_set_dirty_bitmap_locked(bitmap, cur_sector, nr_sectors);
    bdrv_dirty_bitmap_unlock(bitmap);
 }

 /* Called within bdrv_dirty_bitmap_lock..unlock */
 void bdrv_reset_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap,
-                                    int64_t offset, int64_t bytes)
+                                    int64_t cur_sector, int64_t nr_sectors)
 {
    assert(bdrv_dirty_bitmap_enabled(bitmap));
    assert(!bdrv_dirty_bitmap_readonly(bitmap));
-    hbitmap_reset(bitmap->bitmap, offset, bytes);
+    hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
 }

 void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
-                             int64_t offset, int64_t bytes)
+                             int64_t cur_sector, int64_t nr_sectors)
 {
    bdrv_dirty_bitmap_lock(bitmap);
-    bdrv_reset_dirty_bitmap_locked(bitmap, offset, bytes);
+    bdrv_reset_dirty_bitmap_locked(bitmap, cur_sector, nr_sectors);
    bdrv_dirty_bitmap_unlock(bitmap);
 }

@@ -561,42 +610,42 @@ void bdrv_undo_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *in)
 }

 uint64_t bdrv_dirty_bitmap_serialization_size(const BdrvDirtyBitmap *bitmap,
-                                              uint64_t offset, uint64_t bytes)
+                                              uint64_t start, uint64_t count)
 {
-    return hbitmap_serialization_size(bitmap->bitmap, offset, bytes);
+    return hbitmap_serialization_size(bitmap->bitmap, start, count);
 }

 uint64_t bdrv_dirty_bitmap_serialization_align(const BdrvDirtyBitmap *bitmap)
 {
-    return hbitmap_serialization_align(bitmap->bitmap);
+    return hbitmap_serialization_granularity(bitmap->bitmap);
 }

 void bdrv_dirty_bitmap_serialize_part(const BdrvDirtyBitmap *bitmap,
-                                      uint8_t *buf, uint64_t offset,
-                                      uint64_t bytes)
+                                      uint8_t *buf, uint64_t start,
+                                      uint64_t count)
 {
-    hbitmap_serialize_part(bitmap->bitmap, buf, offset, bytes);
+    hbitmap_serialize_part(bitmap->bitmap, buf, start, count);
 }

 void bdrv_dirty_bitmap_deserialize_part(BdrvDirtyBitmap *bitmap,
-                                        uint8_t *buf, uint64_t offset,
-                                        uint64_t bytes, bool finish)
+                                        uint8_t *buf, uint64_t start,
+                                        uint64_t count, bool finish)
 {
-    hbitmap_deserialize_part(bitmap->bitmap, buf, offset, bytes, finish);
+    hbitmap_deserialize_part(bitmap->bitmap, buf, start, count, finish);
 }

 void bdrv_dirty_bitmap_deserialize_zeroes(BdrvDirtyBitmap *bitmap,
-                                          uint64_t offset, uint64_t bytes,
+                                          uint64_t start, uint64_t count,
                                          bool finish)
 {
-    hbitmap_deserialize_zeroes(bitmap->bitmap, offset, bytes, finish);
+    hbitmap_deserialize_zeroes(bitmap->bitmap, start, count, finish);
 }

 void bdrv_dirty_bitmap_deserialize_ones(BdrvDirtyBitmap *bitmap,
-                                        uint64_t offset, uint64_t bytes,
+                                        uint64_t start, uint64_t count,
                                        bool finish)
 {
-    hbitmap_deserialize_ones(bitmap->bitmap, offset, bytes, finish);
+    hbitmap_deserialize_ones(bitmap->bitmap, start, count, finish);
 }

 void bdrv_dirty_bitmap_deserialize_finish(BdrvDirtyBitmap *bitmap)
@@ -604,7 +653,8 @@ void bdrv_dirty_bitmap_deserialize_finish(BdrvDirtyBitmap *bitmap)
    hbitmap_deserialize_finish(bitmap->bitmap);
 }

-void bdrv_set_dirty(BlockDriverState *bs, int64_t offset, int64_t bytes)
+void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
+                    int64_t nr_sectors)
 {
    BdrvDirtyBitmap *bitmap;

@@ -618,7 +668,7 @@ void bdrv_set_dirty(BlockDriverState *bs, int64_t offset, int64_t bytes)
            continue;
        }
        assert(!bdrv_dirty_bitmap_readonly(bitmap));
-        hbitmap_set(bitmap->bitmap, offset, bytes);
+        hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
    }
    bdrv_dirty_bitmaps_unlock(bs);
 }
@@ -626,9 +676,9 @@ void bdrv_set_dirty(BlockDriverState *bs, int64_t offset, int64_t bytes)
 /**
 * Advance a BdrvDirtyBitmapIter to an arbitrary offset.
 */
-void bdrv_set_dirty_iter(BdrvDirtyBitmapIter *iter, int64_t offset)
+void bdrv_set_dirty_iter(BdrvDirtyBitmapIter *iter, int64_t sector_num)
 {
-    hbitmap_iter_init(&iter->hbi, iter->hbi.hb, offset);
+    hbitmap_iter_init(&iter->hbi, iter->hbi.hb, sector_num);
 }

 int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap)
--- a/block/dmg.c
+++ b/block/dmg.c
@@ -111,7 +111,7 @@ static void update_max_chunk_size(BDRVDMGState *s, uint32_t chunk,
        uncompressed_sectors = s->sectorcounts[chunk];
        break;
    case 1: /* copy */
-        uncompressed_sectors = DIV_ROUND_UP(s->lengths[chunk], 512);
+        uncompressed_sectors = (s->lengths[chunk] + 511) / 512;
        break;
    case 2: /* zero */
        /* as the all-zeroes block may be large, it is treated specially: the
@@ -419,15 +419,9 @@ static int dmg_open(BlockDriverState *bs, QDict *options, int flags,
        return -EINVAL;
    }

-    if (!bdrv_is_read_only(bs)) {
-        error_report("Opening dmg images without an explicit read-only=on "
-                     "option is deprecated. Future versions will refuse to "
-                     "open the image instead of automatically marking the "
-                     "image read-only.");
-        ret = bdrv_set_read_only(bs, true, errp);
-        if (ret < 0) {
-            return ret;
-        }
+    ret = bdrv_set_read_only(bs, true, errp);
+    if (ret < 0) {
+        return ret;
    }

    block_module_load_one("dmg-bz2");
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -31,11 +31,9 @@
 #include "block/thread-pool.h"
 #include "qemu/iov.h"
 #include "block/raw-aio.h"
+#include "qapi/util.h"
 #include "qapi/qmp/qstring.h"

-#include "scsi/pr-manager.h"
-#include "scsi/constants.h"
-
 #if defined(__APPLE__) && (__MACH__)
 #include <paths.h>
 #include <sys/param.h>
@@ -158,8 +156,6 @@ typedef struct BDRVRawState {
    bool page_cache_inconsistent:1;
    bool has_fallocate;
    bool needs_alignment;
-
-    PRManager *pr_mgr;
 } BDRVRawState;

 typedef struct BDRVRawReopenState {
@@ -407,11 +403,6 @@ static QemuOptsList raw_runtime_opts = {
            .type = QEMU_OPT_STRING,
            .help = "file locking mode (on/off/auto, default: auto)",
        },
-        {
-            .name = "pr-manager",
-            .type = QEMU_OPT_STRING,
-            .help = "id of persistent reservation manager object (default: none)",
-        },
        { /* end of list */ }
    },
 };
@@ -423,7 +414,6 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
    QemuOpts *opts;
    Error *local_err = NULL;
    const char *filename = NULL;
-    const char *str;
    BlockdevAioOptions aio, aio_default;
    int fd, ret;
    struct stat st;
@@ -448,9 +438,8 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
    aio_default = (bdrv_flags & BDRV_O_NATIVE_AIO)
                  ? BLOCKDEV_AIO_OPTIONS_NATIVE
                  : BLOCKDEV_AIO_OPTIONS_THREADS;
-    aio = qapi_enum_parse(&BlockdevAioOptions_lookup,
-                          qemu_opt_get(opts, "aio"),
-                          aio_default, &local_err);
+    aio = qapi_enum_parse(BlockdevAioOptions_lookup, qemu_opt_get(opts, "aio"),
+                          BLOCKDEV_AIO_OPTIONS__MAX, aio_default, &local_err);
    if (local_err) {
        error_propagate(errp, local_err);
        ret = -EINVAL;
@@ -458,9 +447,8 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
    }
    s->use_linux_aio = (aio == BLOCKDEV_AIO_OPTIONS_NATIVE);

-    locking = qapi_enum_parse(&OnOffAuto_lookup,
-                              qemu_opt_get(opts, "locking"),
-                              ON_OFF_AUTO_AUTO, &local_err);
+    locking = qapi_enum_parse(OnOffAuto_lookup, qemu_opt_get(opts, "locking"),
+                              ON_OFF_AUTO__MAX, ON_OFF_AUTO_AUTO, &local_err);
    if (local_err) {
        error_propagate(errp, local_err);
        ret = -EINVAL;
@@ -487,16 +475,6 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
        abort();
    }

-    str = qemu_opt_get(opts, "pr-manager");
-    if (str) {
-        s->pr_mgr = pr_manager_lookup(str, &local_err);
-        if (local_err) {
-            error_propagate(errp, local_err);
-            ret = -EINVAL;
-            goto fail;
-        }
-    }
-
    s->open_flags = open_flags;
    raw_parse_flags(bdrv_flags, &s->open_flags);

@@ -1694,7 +1672,6 @@ static int raw_regular_truncate(int fd, int64_t offset, PreallocMode prealloc,
    case PREALLOC_MODE_FULL:
    {
        int64_t num = 0, left = offset - current_length;
-        off_t seek_result;

        /*
         * Knowing the final size from the beginning could allow the file
@@ -1709,8 +1686,8 @@ static int raw_regular_truncate(int fd, int64_t offset, PreallocMode prealloc,

        buf = g_malloc0(65536);

-        seek_result = lseek(fd, current_length, SEEK_SET);
-        if (seek_result < 0) {
+        result = lseek(fd, current_length, SEEK_SET);
+        if (result < 0) {
            result = -errno;
            error_setg_errno(errp, -result,
                             "Failed to seek to the old end of file");
@@ -1748,7 +1725,7 @@ static int raw_regular_truncate(int fd, int64_t offset, PreallocMode prealloc,
    default:
        result = -ENOTSUP;
        error_setg(errp, "Unsupported preallocation mode: %s",
-                   PreallocMode_str(prealloc));
+                   PreallocMode_lookup[prealloc]);
        return result;
    }

@@ -1783,7 +1760,7 @@ static int raw_truncate(BlockDriverState *bs, int64_t offset,

    if (prealloc != PREALLOC_MODE_OFF) {
        error_setg(errp, "Preallocation mode '%s' unsupported for this "
-                   "non-regular file", PreallocMode_str(prealloc));
+                   "non-regular file", PreallocMode_lookup[prealloc]);
        return -ENOTSUP;
    }

@@ -1997,8 +1974,9 @@ static int raw_create(const char *filename, QemuOpts *opts, Error **errp)
                          BDRV_SECTOR_SIZE);
    nocow = qemu_opt_get_bool(opts, BLOCK_OPT_NOCOW, false);
    buf = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC);
-    prealloc = qapi_enum_parse(&PreallocMode_lookup, buf,
-                               PREALLOC_MODE_OFF, &local_err);
+    prealloc = qapi_enum_parse(PreallocMode_lookup, buf,
+                               PREALLOC_MODE__MAX, PREALLOC_MODE_OFF,
+                               &local_err);
    g_free(buf);
    if (local_err) {
        error_propagate(errp, local_err);
@@ -2619,15 +2597,6 @@ static BlockAIOCB *hdev_aio_ioctl(BlockDriverState *bs,
    if (fd_open(bs) < 0)
        return NULL;

-    if (req == SG_IO && s->pr_mgr) {
-        struct sg_io_hdr *io_hdr = buf;
-        if (io_hdr->cmdp[0] == PERSISTENT_RESERVE_OUT ||
-            io_hdr->cmdp[0] == PERSISTENT_RESERVE_IN) {
-            return pr_manager_execute(s->pr_mgr, bdrv_get_aio_context(bs),
-                                      s->fd, io_hdr, cb, opaque);
-        }
-    }
-
    acb = g_new(RawPosixAIOData, 1);
    acb->bs = bs;
    acb->aio_type = QEMU_AIO_IOCTL;
@@ -2731,16 +2700,6 @@ static int hdev_create(const char *filename, QemuOpts *opts,
        ret = -ENOSPC;
    }

-    if (!ret && total_size) {
-        uint8_t buf[BDRV_SECTOR_SIZE] = { 0 };
-        int64_t zero_size = MIN(BDRV_SECTOR_SIZE, total_size);
-        if (lseek(fd, 0, SEEK_SET) == -1) {
-            ret = -errno;
-        } else {
-            ret = qemu_write_full(fd, buf, zero_size);
-            ret = ret == zero_size ? 0 : -errno;
-        }
-    }
    qemu_close(fd);
    return ret;
 }
--- a/block/file-win32.c
+++ b/block/file-win32.c
@@ -31,6 +31,7 @@
 #include "block/thread-pool.h"
 #include "qemu/iov.h"
 #include "qapi/qmp/qstring.h"
+#include "qapi/util.h"
 #include <windows.h>
 #include <winioctl.h>

@@ -302,8 +303,8 @@ static bool get_aio_option(QemuOpts *opts, int flags, Error **errp)

    aio_default = (flags & BDRV_O_NATIVE_AIO) ? BLOCKDEV_AIO_OPTIONS_NATIVE
                                              : BLOCKDEV_AIO_OPTIONS_THREADS;
-    aio = qapi_enum_parse(&BlockdevAioOptions_lookup, qemu_opt_get(opts, "aio"),
-                          aio_default, errp);
+    aio = qapi_enum_parse(BlockdevAioOptions_lookup, qemu_opt_get(opts, "aio"),
+                          BLOCKDEV_AIO_OPTIONS__MAX, aio_default, errp);

    switch (aio) {
    case BLOCKDEV_AIO_OPTIONS_NATIVE:
@@ -469,7 +470,7 @@ static int raw_truncate(BlockDriverState *bs, int64_t offset,

    if (prealloc != PREALLOC_MODE_OFF) {
        error_setg(errp, "Unsupported preallocation mode '%s'",
-                   PreallocMode_str(prealloc));
+                   PreallocMode_lookup[prealloc]);
        return -ENOTSUP;
    }

--- a/block/gluster.c
+++ b/block/gluster.c
@@ -12,6 +12,7 @@
 #include "block/block_int.h"
 #include "qapi/error.h"
 #include "qapi/qmp/qerror.h"
+#include "qapi/util.h"
 #include "qemu/uri.h"
 #include "qemu/error-report.h"
 #include "qemu/cutils.h"
@@ -164,12 +165,7 @@ static QemuOptsList runtime_unix_opts = {
        {
            .name = GLUSTER_OPT_SOCKET,
            .type = QEMU_OPT_STRING,
-            .help = "socket file path (legacy)",
-        },
-        {
-            .name = GLUSTER_OPT_PATH,
-            .type = QEMU_OPT_STRING,
-            .help = "socket file path (QAPI)",
+            .help = "socket file path)",
        },
        { /* end of list */ }
    },
@@ -548,7 +544,8 @@ static int qemu_gluster_parse_json(BlockdevOptionsGluster *gconf,
        if (!strcmp(ptr, "tcp")) {
            ptr = "inet";       /* accept legacy "tcp" */
        }
-        type = qapi_enum_parse(&SocketAddressType_lookup, ptr, -1, NULL);
+        type = qapi_enum_parse(SocketAddressType_lookup, ptr,
+                               SOCKET_ADDRESS_TYPE__MAX, -1, NULL);
        if (type != SOCKET_ADDRESS_TYPE_INET
            && type != SOCKET_ADDRESS_TYPE_UNIX) {
            error_setg(&local_err,
@@ -617,18 +614,10 @@ static int qemu_gluster_parse_json(BlockdevOptionsGluster *gconf,
                goto out;
            }

-            ptr = qemu_opt_get(opts, GLUSTER_OPT_PATH);
-            if (!ptr) {
-                ptr = qemu_opt_get(opts, GLUSTER_OPT_SOCKET);
-            } else if (qemu_opt_get(opts, GLUSTER_OPT_SOCKET)) {
-                error_setg(&local_err,
-                           "Conflicting parameters 'path' and 'socket'");
-                error_append_hint(&local_err, GERR_INDEX_HINT, i);
-                goto out;
-            }
+            ptr = qemu_opt_get(opts, GLUSTER_OPT_SOCKET);
            if (!ptr) {
                error_setg(&local_err, QERR_MISSING_PARAMETER,
-                           GLUSTER_OPT_PATH);
+                           GLUSTER_OPT_SOCKET);
                error_append_hint(&local_err, GERR_INDEX_HINT, i);
                goto out;
            }
@@ -693,7 +682,7 @@ static struct glfs *qemu_gluster_init(BlockdevOptionsGluster *gconf,
                             "file.server.0.host=1.2.3.4,"
                             "file.server.0.port=24007,"
                             "file.server.1.transport=unix,"
-                             "file.server.1.path=/var/run/glusterd.socket ..."
+                             "file.server.1.socket=/var/run/glusterd.socket ..."
                             "\n");
            errno = -ret;
            return NULL;
@@ -1013,7 +1002,8 @@ static int qemu_gluster_create(const char *filename,
                          BDRV_SECTOR_SIZE);

    tmp = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC);
-    prealloc = qapi_enum_parse(&PreallocMode_lookup, tmp, PREALLOC_MODE_OFF,
+    prealloc = qapi_enum_parse(PreallocMode_lookup, tmp,
+                               PREALLOC_MODE__MAX, PREALLOC_MODE_OFF,
                               &local_err);
    g_free(tmp);
    if (local_err) {
@@ -1060,7 +1050,7 @@ static int qemu_gluster_create(const char *filename,
    default:
        ret = -EINVAL;
        error_setg(errp, "Unsupported preallocation mode: %s",
-                   PreallocMode_str(prealloc));
+                   PreallocMode_lookup[prealloc]);
        break;
    }

@@ -1112,7 +1102,7 @@ static int qemu_gluster_truncate(BlockDriverState *bs, int64_t offset,

    if (prealloc != PREALLOC_MODE_OFF) {
        error_setg(errp, "Unsupported preallocation mode '%s'",
-                   PreallocMode_str(prealloc));
+                   PreallocMode_lookup[prealloc]);
        return -ENOTSUP;
    }

--- a/block/io.c
+++ b/block/io.c
@@ -42,9 +42,9 @@ static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,

 void bdrv_parent_drained_begin(BlockDriverState *bs)
 {
-    BdrvChild *c, *next;
+    BdrvChild *c;

-    QLIST_FOREACH_SAFE(c, &bs->parents, next_parent, next) {
+    QLIST_FOREACH(c, &bs->parents, next_parent) {
        if (c->role->drained_begin) {
            c->role->drained_begin(c);
        }
@@ -53,9 +53,9 @@ void bdrv_parent_drained_begin(BlockDriverState *bs)

 void bdrv_parent_drained_end(BlockDriverState *bs)
 {
-    BdrvChild *c, *next;
+    BdrvChild *c;

-    QLIST_FOREACH_SAFE(c, &bs->parents, next_parent, next) {
+    QLIST_FOREACH(c, &bs->parents, next_parent) {
        if (c->role->drained_end) {
            c->role->drained_end(c);
        }
@@ -156,7 +156,6 @@ typedef struct {
    Coroutine *co;
    BlockDriverState *bs;
    bool done;
-    bool begin;
 } BdrvCoDrainData;

 static void coroutine_fn bdrv_drain_invoke_entry(void *opaque)
@@ -164,45 +163,36 @@ static void coroutine_fn bdrv_drain_invoke_entry(void *opaque)
    BdrvCoDrainData *data = opaque;
    BlockDriverState *bs = data->bs;

-    if (data->begin) {
-        bs->drv->bdrv_co_drain_begin(bs);
-    } else {
-        bs->drv->bdrv_co_drain_end(bs);
-    }
+    bs->drv->bdrv_co_drain(bs);

    /* Set data->done before reading bs->wakeup.  */
    atomic_mb_set(&data->done, true);
    bdrv_wakeup(bs);
 }

-/* Recursively call BlockDriver.bdrv_co_drain_begin/end callbacks */
-static void bdrv_drain_invoke(BlockDriverState *bs, bool begin)
+static void bdrv_drain_invoke(BlockDriverState *bs)
 {
-    BdrvChild *child, *tmp;
-    BdrvCoDrainData data = { .bs = bs, .done = false, .begin = begin};
+    BdrvCoDrainData data = { .bs = bs, .done = false };

-    if (!bs->drv || (begin && !bs->drv->bdrv_co_drain_begin) ||
-            (!begin && !bs->drv->bdrv_co_drain_end)) {
+    if (!bs->drv || !bs->drv->bdrv_co_drain) {
        return;
    }

    data.co = qemu_coroutine_create(bdrv_drain_invoke_entry, &data);
    bdrv_coroutine_enter(bs, data.co);
    BDRV_POLL_WHILE(bs, !data.done);
-
-    QLIST_FOREACH_SAFE(child, &bs->children, next, tmp) {
-        bdrv_drain_invoke(child->bs, begin);
-    }
 }

-static bool bdrv_drain_recurse(BlockDriverState *bs, bool begin)
+static bool bdrv_drain_recurse(BlockDriverState *bs)
 {
    BdrvChild *child, *tmp;
    bool waited;

-    /* Wait for drained requests to finish */
    waited = BDRV_POLL_WHILE(bs, atomic_read(&bs->in_flight) > 0);

+    /* Ensure any pending metadata writes are submitted to bs->file.  */
+    bdrv_drain_invoke(bs);
+
    QLIST_FOREACH_SAFE(child, &bs->children, next, tmp) {
        BlockDriverState *bs = child->bs;
        bool in_main_loop =
@@ -218,7 +208,7 @@ static bool bdrv_drain_recurse(BlockDriverState *bs, bool begin)
             */
            bdrv_ref(bs);
        }
-        waited |= bdrv_drain_recurse(bs, begin);
+        waited |= bdrv_drain_recurse(bs);
        if (in_main_loop) {
            bdrv_unref(bs);
        }
@@ -234,18 +224,12 @@ static void bdrv_co_drain_bh_cb(void *opaque)
    BlockDriverState *bs = data->bs;

    bdrv_dec_in_flight(bs);
-    if (data->begin) {
-        bdrv_drained_begin(bs);
-    } else {
-        bdrv_drained_end(bs);
-    }
-
+    bdrv_drained_begin(bs);
    data->done = true;
    aio_co_wake(co);
 }

-static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
-                                                bool begin)
+static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs)
 {
    BdrvCoDrainData data;

@@ -258,7 +242,6 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
        .co = qemu_coroutine_self(),
        .bs = bs,
        .done = false,
-        .begin = begin,
    };
    bdrv_inc_in_flight(bs);
    aio_bh_schedule_oneshot(bdrv_get_aio_context(bs),
@@ -273,7 +256,7 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
 void bdrv_drained_begin(BlockDriverState *bs)
 {
    if (qemu_in_coroutine()) {
-        bdrv_co_yield_to_drain(bs, true);
+        bdrv_co_yield_to_drain(bs);
        return;
    }

@@ -282,24 +265,17 @@ void bdrv_drained_begin(BlockDriverState *bs)
        bdrv_parent_drained_begin(bs);
    }

-    bdrv_drain_invoke(bs, true);
-    bdrv_drain_recurse(bs, true);
+    bdrv_drain_recurse(bs);
 }

 void bdrv_drained_end(BlockDriverState *bs)
 {
-    if (qemu_in_coroutine()) {
-        bdrv_co_yield_to_drain(bs, false);
-        return;
-    }
    assert(bs->quiesce_counter > 0);
    if (atomic_fetch_dec(&bs->quiesce_counter) > 1) {
        return;
    }

    bdrv_parent_drained_end(bs);
-    bdrv_drain_invoke(bs, false);
-    bdrv_drain_recurse(bs, false);
    aio_enable_external(bdrv_get_aio_context(bs));
 }

@@ -355,7 +331,6 @@ void bdrv_drain_all_begin(void)
        aio_context_acquire(aio_context);
        bdrv_parent_drained_begin(bs);
        aio_disable_external(aio_context);
-        bdrv_drain_invoke(bs, true);
        aio_context_release(aio_context);

        if (!g_slist_find(aio_ctxs, aio_context)) {
@@ -378,7 +353,7 @@ void bdrv_drain_all_begin(void)
            aio_context_acquire(aio_context);
            for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
                if (aio_context == bdrv_get_aio_context(bs)) {
-                    waited |= bdrv_drain_recurse(bs, true);
+                    waited |= bdrv_drain_recurse(bs);
                }
            }
            aio_context_release(aio_context);
@@ -399,8 +374,6 @@ void bdrv_drain_all_end(void)
        aio_context_acquire(aio_context);
        aio_enable_external(aio_context);
        bdrv_parent_drained_end(bs);
-        bdrv_drain_invoke(bs, false);
-        bdrv_drain_recurse(bs, false);
        aio_context_release(aio_context);
    }

@@ -476,9 +449,9 @@ static void mark_request_serialising(BdrvTrackedRequest *req, uint64_t align)
 * Round a region to cluster boundaries
 */
 void bdrv_round_to_clusters(BlockDriverState *bs,
-                            int64_t offset, int64_t bytes,
+                            int64_t offset, unsigned int bytes,
                            int64_t *cluster_offset,
-                            int64_t *cluster_bytes)
+                            unsigned int *cluster_bytes)
 {
    BlockDriverInfo bdi;

@@ -723,37 +696,39 @@ int bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset,
 */
 int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags flags)
 {
-    int ret;
-    int64_t target_size, bytes, offset = 0;
+    int64_t target_sectors, ret, nb_sectors, sector_num = 0;
    BlockDriverState *bs = child->bs;
+    BlockDriverState *file;
+    int n;

-    target_size = bdrv_getlength(bs);
-    if (target_size < 0) {
-        return target_size;
+    target_sectors = bdrv_nb_sectors(bs);
+    if (target_sectors < 0) {
+        return target_sectors;
    }

    for (;;) {
-        bytes = MIN(target_size - offset, BDRV_REQUEST_MAX_BYTES);
-        if (bytes <= 0) {
+        nb_sectors = MIN(target_sectors - sector_num, BDRV_REQUEST_MAX_SECTORS);
+        if (nb_sectors <= 0) {
            return 0;
        }
-        ret = bdrv_block_status(bs, offset, bytes, &bytes, NULL, NULL);
+        ret = bdrv_get_block_status(bs, sector_num, nb_sectors, &n, &file);
        if (ret < 0) {
-            error_report("error getting block status at offset %" PRId64 ": %s",
-                         offset, strerror(-ret));
+            error_report("error getting block status at sector %" PRId64 ": %s",
+                         sector_num, strerror(-ret));
            return ret;
        }
        if (ret & BDRV_BLOCK_ZERO) {
-            offset += bytes;
+            sector_num += n;
            continue;
        }
-        ret = bdrv_pwrite_zeroes(child, offset, bytes, flags);
+        ret = bdrv_pwrite_zeroes(child, sector_num << BDRV_SECTOR_BITS,
+                                 n << BDRV_SECTOR_BITS, flags);
        if (ret < 0) {
-            error_report("error writing zeroes at offset %" PRId64 ": %s",
-                         offset, strerror(-ret));
+            error_report("error writing zeroes at sector %" PRId64 ": %s",
+                         sector_num, strerror(-ret));
            return ret;
        }
-        offset += bytes;
+        sector_num += n;
    }
 }

@@ -860,10 +835,6 @@ static int coroutine_fn bdrv_driver_preadv(BlockDriverState *bs,

    assert(!(flags & ~BDRV_REQ_MASK));

-    if (!drv) {
-        return -ENOMEDIUM;
-    }
-
    if (drv->bdrv_co_preadv) {
        return drv->bdrv_co_preadv(bs, offset, bytes, qiov, flags);
    }
@@ -905,10 +876,6 @@ static int coroutine_fn bdrv_driver_pwritev(BlockDriverState *bs,

    assert(!(flags & ~BDRV_REQ_MASK));

-    if (!drv) {
-        return -ENOMEDIUM;
-    }
-
    if (drv->bdrv_co_pwritev) {
        ret = drv->bdrv_co_pwritev(bs, offset, bytes, qiov,
                                   flags & bs->supported_write_flags);
@@ -960,10 +927,6 @@ bdrv_driver_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
 {
    BlockDriver *drv = bs->drv;

-    if (!drv) {
-        return -ENOMEDIUM;
-    }
-
    if (!drv->bdrv_co_pwritev_compressed) {
        return -ENOTSUP;
    }
@@ -987,21 +950,17 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child,
    struct iovec iov;
    QEMUIOVector local_qiov;
    int64_t cluster_offset;
-    int64_t cluster_bytes;
+    unsigned int cluster_bytes;
    size_t skip_bytes;
    int ret;
    int max_transfer = MIN_NON_ZERO(bs->bl.max_transfer,
                                    BDRV_REQUEST_MAX_BYTES);
    unsigned int progress = 0;

-    if (!drv) {
-        return -ENOMEDIUM;
-    }
-
    /* FIXME We cannot require callers to have write permissions when all they
     * are doing is a read request. If we did things right, write permissions
     * would be obtained anyway, but internally by the copy-on-read code. As
-     * long as it is implemented here rather than in a separate filter driver,
+     * long as it is implemented here rather than in a separat filter driver,
     * the copy-on-read code doesn't have its own BdrvChild, however, for which
     * it could request permissions. Therefore we have to bypass the permission
     * system for the moment. */
@@ -1053,7 +1012,6 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child,
                goto err;
            }

-            bdrv_debug_event(bs, BLKDBG_COR_WRITE);
            if (drv->bdrv_co_pwrite_zeroes &&
                buffer_is_zero(bounce_buffer, pnum)) {
                /* FIXME: Should we (perhaps conditionally) be setting
@@ -1147,14 +1105,18 @@ static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child,
    }

    if (flags & BDRV_REQ_COPY_ON_READ) {
+        /* TODO: Simplify further once bdrv_is_allocated no longer
+         * requires sector alignment */
+        int64_t start = QEMU_ALIGN_DOWN(offset, BDRV_SECTOR_SIZE);
+        int64_t end = QEMU_ALIGN_UP(offset + bytes, BDRV_SECTOR_SIZE);
        int64_t pnum;

-        ret = bdrv_is_allocated(bs, offset, bytes, &pnum);
+        ret = bdrv_is_allocated(bs, start, end - start, &pnum);
        if (ret < 0) {
            goto out;
        }

-        if (!ret || pnum != bytes) {
+        if (!ret || pnum != end - start) {
            ret = bdrv_co_do_copy_on_readv(child, offset, bytes, qiov);
            goto out;
        }
@@ -1314,10 +1276,6 @@ static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
                        bs->bl.request_alignment);
    int max_transfer = MIN_NON_ZERO(bs->bl.max_transfer, MAX_BOUNCE_BUFFER);

-    if (!drv) {
-        return -ENOMEDIUM;
-    }
-
    assert(alignment % bs->bl.request_alignment == 0);
    head = offset % alignment;
    tail = (offset + bytes) % alignment;
@@ -1420,14 +1378,11 @@ static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child,
    bool waited;
    int ret;

+    int64_t start_sector = offset >> BDRV_SECTOR_BITS;
    int64_t end_sector = DIV_ROUND_UP(offset + bytes, BDRV_SECTOR_SIZE);
    uint64_t bytes_remaining = bytes;
    int max_transfer;

-    if (!drv) {
-        return -ENOMEDIUM;
-    }
-
    if (bdrv_has_readonly_bitmaps(bs)) {
        return -EPERM;
    }
@@ -1498,7 +1453,7 @@ static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child,
    bdrv_debug_event(bs, BLKDBG_PWRITEV_DONE);

    atomic_inc(&bs->write_gen);
-    bdrv_set_dirty(bs, offset, bytes);
+    bdrv_set_dirty(bs, start_sector, end_sector - start_sector);

    stat64_max(&bs->wr_highest_offset, offset + bytes);

@@ -1792,203 +1747,111 @@ int bdrv_flush_all(void)
 }


-typedef struct BdrvCoBlockStatusData {
+typedef struct BdrvCoGetBlockStatusData {
    BlockDriverState *bs;
    BlockDriverState *base;
-    bool want_zero;
-    int64_t offset;
-    int64_t bytes;
-    int64_t *pnum;
-    int64_t *map;
    BlockDriverState **file;
-    int ret;
+    int64_t sector_num;
+    int nb_sectors;
+    int *pnum;
+    int64_t ret;
    bool done;
-} BdrvCoBlockStatusData;
-
-int64_t coroutine_fn bdrv_co_get_block_status_from_file(BlockDriverState *bs,
-                                                        int64_t sector_num,
-                                                        int nb_sectors,
-                                                        int *pnum,
-                                                        BlockDriverState **file)
-{
-    assert(bs->file && bs->file->bs);
-    *pnum = nb_sectors;
-    *file = bs->file->bs;
-    return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID |
-           (sector_num << BDRV_SECTOR_BITS);
-}
-
-int64_t coroutine_fn bdrv_co_get_block_status_from_backing(BlockDriverState *bs,
-                                                           int64_t sector_num,
-                                                           int nb_sectors,
-                                                           int *pnum,
-                                                           BlockDriverState **file)
-{
-    assert(bs->backing && bs->backing->bs);
-    *pnum = nb_sectors;
-    *file = bs->backing->bs;
-    return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID |
-           (sector_num << BDRV_SECTOR_BITS);
-}
+} BdrvCoGetBlockStatusData;

 /*
 * Returns the allocation status of the specified sectors.
 * Drivers not implementing the functionality are assumed to not support
 * backing files, hence all their sectors are reported as allocated.
 *
- * If 'want_zero' is true, the caller is querying for mapping purposes,
- * and the result should include BDRV_BLOCK_OFFSET_VALID and
- * BDRV_BLOCK_ZERO where possible; otherwise, the result may omit those
- * bits particularly if it allows for a larger value in 'pnum'.
- *
- * If 'offset' is beyond the end of the disk image the return value is
+ * If 'sector_num' is beyond the end of the disk image the return value is
 * BDRV_BLOCK_EOF and 'pnum' is set to 0.
 *
- * 'bytes' is the max value 'pnum' should be set to.  If bytes goes
+ * 'pnum' is set to the number of sectors (including and immediately following
+ * the specified sector) that are known to be in the same
+ * allocated/unallocated state.
+ *
+ * 'nb_sectors' is the max value 'pnum' should be set to.  If nb_sectors goes
 * beyond the end of the disk image it will be clamped; if 'pnum' is set to
 * the end of the image, then the returned value will include BDRV_BLOCK_EOF.
 *
- * 'pnum' is set to the number of bytes (including and immediately
- * following the specified offset) that are easily known to be in the
- * same allocated/unallocated state.  Note that a second call starting
- * at the original offset plus returned pnum may have the same status.
- * The returned value is non-zero on success except at end-of-file.
- *
- * Returns negative errno on failure.  Otherwise, if the
- * BDRV_BLOCK_OFFSET_VALID bit is set, 'map' and 'file' (if non-NULL) are
- * set to the host mapping and BDS corresponding to the guest offset.
+ * If returned value is positive and BDRV_BLOCK_OFFSET_VALID bit is set, 'file'
+ * points to the BDS which the sector range is allocated in.
 */
-static int coroutine_fn bdrv_co_block_status(BlockDriverState *bs,
-                                             bool want_zero,
-                                             int64_t offset, int64_t bytes,
-                                             int64_t *pnum, int64_t *map,
-                                             BlockDriverState **file)
+static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
+                                                     int64_t sector_num,
+                                                     int nb_sectors, int *pnum,
+                                                     BlockDriverState **file)
 {
-    int64_t total_size;
-    int64_t n; /* bytes */
-    int ret;
-    int64_t local_map = 0;
-    BlockDriverState *local_file = NULL;
-    int64_t aligned_offset, aligned_bytes;
-    uint32_t align;
+    int64_t total_sectors;
+    int64_t n;
+    int64_t ret, ret2;

-    assert(pnum);
-    *pnum = 0;
-    total_size = bdrv_getlength(bs);
-    if (total_size < 0) {
-        ret = total_size;
-        goto early_out;
+    *file = NULL;
+    total_sectors = bdrv_nb_sectors(bs);
+    if (total_sectors < 0) {
+        return total_sectors;
    }

-    if (offset >= total_size) {
-        ret = BDRV_BLOCK_EOF;
-        goto early_out;
-    }
-    if (!bytes) {
-        ret = 0;
-        goto early_out;
+    if (sector_num >= total_sectors) {
+        *pnum = 0;
+        return BDRV_BLOCK_EOF;
    }

-    n = total_size - offset;
-    if (n < bytes) {
-        bytes = n;
+    n = total_sectors - sector_num;
+    if (n < nb_sectors) {
+        nb_sectors = n;
    }

-    /* Must be non-NULL or bdrv_getlength() would have failed */
-    assert(bs->drv);
    if (!bs->drv->bdrv_co_get_block_status) {
-        *pnum = bytes;
+        *pnum = nb_sectors;
        ret = BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED;
-        if (offset + bytes == total_size) {
+        if (sector_num + nb_sectors == total_sectors) {
            ret |= BDRV_BLOCK_EOF;
        }
        if (bs->drv->protocol_name) {
-            ret |= BDRV_BLOCK_OFFSET_VALID;
-            local_map = offset;
-            local_file = bs;
+            ret |= BDRV_BLOCK_OFFSET_VALID | (sector_num * BDRV_SECTOR_SIZE);
+            *file = bs;
        }
-        goto early_out;
+        return ret;
    }

    bdrv_inc_in_flight(bs);
-
-    /* Round out to request_alignment boundaries */
-    /* TODO: until we have a byte-based driver callback, we also have to
-     * round out to sectors, even if that is bigger than request_alignment */
-    align = MAX(bs->bl.request_alignment, BDRV_SECTOR_SIZE);
-    aligned_offset = QEMU_ALIGN_DOWN(offset, align);
-    aligned_bytes = ROUND_UP(offset + bytes, align) - aligned_offset;
-
-    {
-        int count; /* sectors */
-        int64_t longret;
-
-        assert(QEMU_IS_ALIGNED(aligned_offset | aligned_bytes,
-                               BDRV_SECTOR_SIZE));
-        /*
-         * The contract allows us to return pnum smaller than bytes, even
-         * if the next query would see the same status; we truncate the
-         * request to avoid overflowing the driver's 32-bit interface.
-         */
-        longret = bs->drv->bdrv_co_get_block_status(
-            bs, aligned_offset >> BDRV_SECTOR_BITS,
-            MIN(INT_MAX, aligned_bytes) >> BDRV_SECTOR_BITS, &count,
-            &local_file);
-        if (longret < 0) {
-            assert(INT_MIN <= longret);
-            ret = longret;
-            goto out;
-        }
-        if (longret & BDRV_BLOCK_OFFSET_VALID) {
-            local_map = longret & BDRV_BLOCK_OFFSET_MASK;
-        }
-        ret = longret & ~BDRV_BLOCK_OFFSET_MASK;
-        *pnum = count * BDRV_SECTOR_SIZE;
-    }
-
-    /*
-     * The driver's result must be a multiple of request_alignment.
-     * Clamp pnum and adjust map to original request.
-     */
-    assert(QEMU_IS_ALIGNED(*pnum, align) && align > offset - aligned_offset);
-    *pnum -= offset - aligned_offset;
-    if (*pnum > bytes) {
-        *pnum = bytes;
-    }
-    if (ret & BDRV_BLOCK_OFFSET_VALID) {
-        local_map += offset - aligned_offset;
+    ret = bs->drv->bdrv_co_get_block_status(bs, sector_num, nb_sectors, pnum,
+                                            file);
+    if (ret < 0) {
+        *pnum = 0;
+        goto out;
    }

    if (ret & BDRV_BLOCK_RAW) {
-        assert(ret & BDRV_BLOCK_OFFSET_VALID && local_file);
-        ret = bdrv_co_block_status(local_file, want_zero, local_map,
-                                   *pnum, pnum, &local_map, &local_file);
+        assert(ret & BDRV_BLOCK_OFFSET_VALID && *file);
+        ret = bdrv_co_get_block_status(*file, ret >> BDRV_SECTOR_BITS,
+                                       *pnum, pnum, file);
        goto out;
    }

    if (ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ZERO)) {
        ret |= BDRV_BLOCK_ALLOCATED;
-    } else if (want_zero) {
+    } else {
        if (bdrv_unallocated_blocks_are_zero(bs)) {
            ret |= BDRV_BLOCK_ZERO;
        } else if (bs->backing) {
            BlockDriverState *bs2 = bs->backing->bs;
-            int64_t size2 = bdrv_getlength(bs2);
-
-            if (size2 >= 0 && offset >= size2) {
+            int64_t nb_sectors2 = bdrv_nb_sectors(bs2);
+            if (nb_sectors2 >= 0 && sector_num >= nb_sectors2) {
                ret |= BDRV_BLOCK_ZERO;
            }
        }
    }

-    if (want_zero && local_file && local_file != bs &&
+    if (*file && *file != bs &&
        (ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO) &&
        (ret & BDRV_BLOCK_OFFSET_VALID)) {
-        int64_t file_pnum;
-        int ret2;
+        BlockDriverState *file2;
+        int file_pnum;

-        ret2 = bdrv_co_block_status(local_file, want_zero, local_map,
-                                    *pnum, &file_pnum, NULL, NULL);
+        ret2 = bdrv_co_get_block_status(*file, ret >> BDRV_SECTOR_BITS,
+                                        *pnum, &file_pnum, &file2);
        if (ret2 >= 0) {
            /* Ignore errors.  This is just providing extra information, it
             * is useful but not necessary.
@@ -2011,36 +1874,26 @@ static int coroutine_fn bdrv_co_block_status(BlockDriverState *bs,

 out:
    bdrv_dec_in_flight(bs);
-    if (ret >= 0 && offset + *pnum == total_size) {
+    if (ret >= 0 && sector_num + *pnum == total_sectors) {
        ret |= BDRV_BLOCK_EOF;
    }
-early_out:
-    if (file) {
-        *file = local_file;
-    }
-    if (map) {
-        *map = local_map;
-    }
    return ret;
 }

-static int coroutine_fn bdrv_co_block_status_above(BlockDriverState *bs,
-                                                   BlockDriverState *base,
-                                                   bool want_zero,
-                                                   int64_t offset,
-                                                   int64_t bytes,
-                                                   int64_t *pnum,
-                                                   int64_t *map,
-                                                   BlockDriverState **file)
+static int64_t coroutine_fn bdrv_co_get_block_status_above(BlockDriverState *bs,
+        BlockDriverState *base,
+        int64_t sector_num,
+        int nb_sectors,
+        int *pnum,
+        BlockDriverState **file)
 {
    BlockDriverState *p;
-    int ret = 0;
+    int64_t ret = 0;
    bool first = true;

    assert(bs != base);
    for (p = bs; p != base; p = backing_bs(p)) {
-        ret = bdrv_co_block_status(p, want_zero, offset, bytes, pnum, map,
-                                   file);
+        ret = bdrv_co_get_block_status(p, sector_num, nb_sectors, pnum, file);
        if (ret < 0) {
            break;
        }
@@ -2051,94 +1904,94 @@ static int coroutine_fn bdrv_co_block_status_above(BlockDriverState *bs,
             * unallocated length we learned from an earlier
             * iteration.
             */
-            *pnum = bytes;
+            *pnum = nb_sectors;
        }
        if (ret & (BDRV_BLOCK_ZERO | BDRV_BLOCK_DATA)) {
            break;
        }
-        /* [offset, pnum] unallocated on this layer, which could be only
-         * the first part of [offset, bytes].  */
-        bytes = MIN(bytes, *pnum);
+        /* [sector_num, pnum] unallocated on this layer, which could be only
+         * the first part of [sector_num, nb_sectors].  */
+        nb_sectors = MIN(nb_sectors, *pnum);
        first = false;
    }
    return ret;
 }

-/* Coroutine wrapper for bdrv_block_status_above() */
-static void coroutine_fn bdrv_block_status_above_co_entry(void *opaque)
+/* Coroutine wrapper for bdrv_get_block_status_above() */
+static void coroutine_fn bdrv_get_block_status_above_co_entry(void *opaque)
 {
-    BdrvCoBlockStatusData *data = opaque;
+    BdrvCoGetBlockStatusData *data = opaque;

-    data->ret = bdrv_co_block_status_above(data->bs, data->base,
-                                           data->want_zero,
-                                           data->offset, data->bytes,
-                                           data->pnum, data->map, data->file);
+    data->ret = bdrv_co_get_block_status_above(data->bs, data->base,
+                                               data->sector_num,
+                                               data->nb_sectors,
+                                               data->pnum,
+                                               data->file);
    data->done = true;
 }

 /*
- * Synchronous wrapper around bdrv_co_block_status_above().
+ * Synchronous wrapper around bdrv_co_get_block_status_above().
 *
- * See bdrv_co_block_status_above() for details.
+ * See bdrv_co_get_block_status_above() for details.
 */
-static int bdrv_common_block_status_above(BlockDriverState *bs,
-                                          BlockDriverState *base,
-                                          bool want_zero, int64_t offset,
-                                          int64_t bytes, int64_t *pnum,
-                                          int64_t *map,
-                                          BlockDriverState **file)
+int64_t bdrv_get_block_status_above(BlockDriverState *bs,
+                                    BlockDriverState *base,
+                                    int64_t sector_num,
+                                    int nb_sectors, int *pnum,
+                                    BlockDriverState **file)
 {
    Coroutine *co;
-    BdrvCoBlockStatusData data = {
+    BdrvCoGetBlockStatusData data = {
        .bs = bs,
        .base = base,
-        .want_zero = want_zero,
-        .offset = offset,
-        .bytes = bytes,
-        .pnum = pnum,
-        .map = map,
        .file = file,
+        .sector_num = sector_num,
+        .nb_sectors = nb_sectors,
+        .pnum = pnum,
        .done = false,
    };

    if (qemu_in_coroutine()) {
        /* Fast-path if already in coroutine context */
-        bdrv_block_status_above_co_entry(&data);
+        bdrv_get_block_status_above_co_entry(&data);
    } else {
-        co = qemu_coroutine_create(bdrv_block_status_above_co_entry, &data);
+        co = qemu_coroutine_create(bdrv_get_block_status_above_co_entry,
+                                   &data);
        bdrv_coroutine_enter(bs, co);
        BDRV_POLL_WHILE(bs, !data.done);
    }
    return data.ret;
 }

-int bdrv_block_status_above(BlockDriverState *bs, BlockDriverState *base,
-                            int64_t offset, int64_t bytes, int64_t *pnum,
-                            int64_t *map, BlockDriverState **file)
+int64_t bdrv_get_block_status(BlockDriverState *bs,
+                              int64_t sector_num,
+                              int nb_sectors, int *pnum,
+                              BlockDriverState **file)
 {
-    return bdrv_common_block_status_above(bs, base, true, offset, bytes,
-                                          pnum, map, file);
-}
-
-int bdrv_block_status(BlockDriverState *bs, int64_t offset, int64_t bytes,
-                      int64_t *pnum, int64_t *map, BlockDriverState **file)
-{
-    return bdrv_block_status_above(bs, backing_bs(bs),
-                                   offset, bytes, pnum, map, file);
+    return bdrv_get_block_status_above(bs, backing_bs(bs),
+                                       sector_num, nb_sectors, pnum, file);
 }

 int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t offset,
                                   int64_t bytes, int64_t *pnum)
 {
-    int ret;
-    int64_t dummy;
+    BlockDriverState *file;
+    int64_t sector_num = offset >> BDRV_SECTOR_BITS;
+    int nb_sectors = bytes >> BDRV_SECTOR_BITS;
+    int64_t ret;
+    int psectors;

-    ret = bdrv_common_block_status_above(bs, backing_bs(bs), false, offset,
-                                         bytes, pnum ? pnum : &dummy, NULL,
-                                         NULL);
+    assert(QEMU_IS_ALIGNED(offset, BDRV_SECTOR_SIZE));
+    assert(QEMU_IS_ALIGNED(bytes, BDRV_SECTOR_SIZE) && bytes < INT_MAX);
+    ret = bdrv_get_block_status(bs, sector_num, nb_sectors, &psectors,
+                                &file);
    if (ret < 0) {
        return ret;
    }
+    if (pnum) {
+        *pnum = psectors * BDRV_SECTOR_SIZE;
+    }
    return !!(ret & BDRV_BLOCK_ALLOCATED);
 }

@@ -2406,12 +2259,6 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
    }

    BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_DISK);
-    if (!bs->drv) {
-        /* bs->drv->bdrv_co_flush() might have ejected the BDS
-         * (even in case of apparent success) */
-        ret = -ENOMEDIUM;
-        goto out;
-    }
    if (bs->drv->bdrv_co_flush_to_disk) {
        ret = bs->drv->bdrv_co_flush_to_disk(bs);
    } else if (bs->drv->bdrv_aio_flush) {
@@ -2581,10 +2428,6 @@ int coroutine_fn bdrv_co_pdiscard(BlockDriverState *bs, int64_t offset,
            num = max_pdiscard;
        }

-        if (!bs->drv) {
-            ret = -ENOMEDIUM;
-            goto out;
-        }
        if (bs->drv->bdrv_co_pdiscard) {
            ret = bs->drv->bdrv_co_pdiscard(bs, offset, num);
        } else {
@@ -2613,7 +2456,8 @@ int coroutine_fn bdrv_co_pdiscard(BlockDriverState *bs, int64_t offset,
    ret = 0;
 out:
    atomic_inc(&bs->write_gen);
-    bdrv_set_dirty(bs, req.offset, req.bytes);
+    bdrv_set_dirty(bs, req.offset >> BDRV_SECTOR_BITS,
+                   req.bytes >> BDRV_SECTOR_BITS);
    tracked_request_end(&req);
    bdrv_dec_in_flight(bs);
    return ret;
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -2,7 +2,7 @@
 * QEMU Block driver for iSCSI images
 *
 * Copyright (c) 2010-2011 Ronnie Sahlberg <ronniesahlberg@gmail.com>
- * Copyright (c) 2012-2017 Peter Lieven <pl@kamp.de>
+ * Copyright (c) 2012-2016 Peter Lieven <pl@kamp.de>
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
@@ -34,20 +34,15 @@
 #include "qemu/bitops.h"
 #include "qemu/bitmap.h"
 #include "block/block_int.h"
-#include "scsi/constants.h"
+#include "block/scsi.h"
 #include "qemu/iov.h"
 #include "qemu/uuid.h"
 #include "qmp-commands.h"
 #include "qapi/qmp/qstring.h"
 #include "crypto/secret.h"
-#include "scsi/utils.h"

-/* Conflict between scsi/utils.h and libiscsi! :( */
-#define SCSI_XFER_NONE ISCSI_XFER_NONE
 #include <iscsi/iscsi.h>
 #include <iscsi/scsi-lowlevel.h>
-#undef SCSI_XFER_NONE
-QEMU_BUILD_BUG_ON((int)SCSI_XFER_NONE != (int)ISCSI_XFER_NONE);

 #ifdef __linux__
 #include <scsi/sg.h>
@@ -214,9 +209,47 @@ static inline unsigned exp_random(double mean)

 static int iscsi_translate_sense(struct scsi_sense *sense)
 {
-    return - scsi_sense_to_errno(sense->key,
-                                 (sense->ascq & 0xFF00) >> 8,
-                                 sense->ascq & 0xFF);
+    int ret;
+
+    switch (sense->key) {
+    case SCSI_SENSE_NOT_READY:
+        return -EBUSY;
+    case SCSI_SENSE_DATA_PROTECTION:
+        return -EACCES;
+    case SCSI_SENSE_COMMAND_ABORTED:
+        return -ECANCELED;
+    case SCSI_SENSE_ILLEGAL_REQUEST:
+        /* Parse ASCQ */
+        break;
+    default:
+        return -EIO;
+    }
+    switch (sense->ascq) {
+    case SCSI_SENSE_ASCQ_PARAMETER_LIST_LENGTH_ERROR:
+    case SCSI_SENSE_ASCQ_INVALID_OPERATION_CODE:
+    case SCSI_SENSE_ASCQ_INVALID_FIELD_IN_CDB:
+    case SCSI_SENSE_ASCQ_INVALID_FIELD_IN_PARAMETER_LIST:
+        ret = -EINVAL;
+        break;
+    case SCSI_SENSE_ASCQ_LBA_OUT_OF_RANGE:
+        ret = -ENOSPC;
+        break;
+    case SCSI_SENSE_ASCQ_LOGICAL_UNIT_NOT_SUPPORTED:
+        ret = -ENOTSUP;
+        break;
+    case SCSI_SENSE_ASCQ_MEDIUM_NOT_PRESENT:
+    case SCSI_SENSE_ASCQ_MEDIUM_NOT_PRESENT_TRAY_CLOSED:
+    case SCSI_SENSE_ASCQ_MEDIUM_NOT_PRESENT_TRAY_OPEN:
+        ret = -ENOMEDIUM;
+        break;
+    case SCSI_SENSE_ASCQ_WRITE_PROTECTED:
+        ret = -EACCES;
+        break;
+    default:
+        ret = -EIO;
+        break;
+    }
+    return ret;
 }

 /* Called (via iscsi_service) with QemuMutex held.  */
@@ -1128,9 +1161,6 @@ retry:
        goto retry;
    }

-    iscsi_allocmap_set_invalid(iscsilun, offset >> BDRV_SECTOR_BITS,
-                               bytes >> BDRV_SECTOR_BITS);
-
    if (iTask.status == SCSI_STATUS_CHECK_CONDITION) {
        /* the target might fail with a check condition if it
           is not happy with the alignment of the UNMAP request
@@ -1143,6 +1173,9 @@ retry:
        goto out_unlock;
    }

+    iscsi_allocmap_set_invalid(iscsilun, offset >> BDRV_SECTOR_BITS,
+                               bytes >> BDRV_SECTOR_BITS);
+
 out_unlock:
    qemu_mutex_unlock(&iscsilun->mutex);
    return r;
@@ -2054,7 +2087,7 @@ static int iscsi_truncate(BlockDriverState *bs, int64_t offset,

    if (prealloc != PREALLOC_MODE_OFF) {
        error_setg(errp, "Unsupported preallocation mode '%s'",
-                   PreallocMode_str(prealloc));
+                   PreallocMode_lookup[prealloc]);
        return -ENOTSUP;
    }

--- a/block/mirror.c
+++ b/block/mirror.c
@@ -141,7 +141,8 @@ static void mirror_write_complete(void *opaque, int ret)
    if (ret < 0) {
        BlockErrorAction action;

-        bdrv_set_dirty_bitmap(s->dirty_bitmap, op->offset, op->bytes);
+        bdrv_set_dirty_bitmap(s->dirty_bitmap, op->offset >> BDRV_SECTOR_BITS,
+                              op->bytes >> BDRV_SECTOR_BITS);
        action = mirror_error_action(s, false, -ret);
        if (action == BLOCK_ERROR_ACTION_REPORT && s->ret >= 0) {
            s->ret = ret;
@@ -160,7 +161,8 @@ static void mirror_read_complete(void *opaque, int ret)
    if (ret < 0) {
        BlockErrorAction action;

-        bdrv_set_dirty_bitmap(s->dirty_bitmap, op->offset, op->bytes);
+        bdrv_set_dirty_bitmap(s->dirty_bitmap, op->offset >> BDRV_SECTOR_BITS,
+                              op->bytes >> BDRV_SECTOR_BITS);
        action = mirror_error_action(s, true, -ret);
        if (action == BLOCK_ERROR_ACTION_REPORT && s->ret >= 0) {
            s->ret = ret;
@@ -190,9 +192,10 @@ static int mirror_cow_align(MirrorBlockJob *s, int64_t *offset,
    bool need_cow;
    int ret = 0;
    int64_t align_offset = *offset;
-    int64_t align_bytes = *bytes;
+    unsigned int align_bytes = *bytes;
    int max_bytes = s->granularity * s->max_iov;

+    assert(*bytes < INT_MAX);
    need_cow = !test_bit(*offset / s->granularity, s->cow_bitmap);
    need_cow |= !test_bit((*offset + *bytes - 1) / s->granularity,
                          s->cow_bitmap);
@@ -328,15 +331,17 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
    uint64_t delay_ns = 0;
    /* At least the first dirty chunk is mirrored in one iteration. */
    int nb_chunks = 1;
+    int sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS;
    bool write_zeroes_ok = bdrv_can_write_zeroes_with_unmap(blk_bs(s->target));
    int max_io_bytes = MAX(s->buf_size / MAX_IN_FLIGHT, MAX_IO_BYTES);

    bdrv_dirty_bitmap_lock(s->dirty_bitmap);
-    offset = bdrv_dirty_iter_next(s->dbi);
+    offset = bdrv_dirty_iter_next(s->dbi) * BDRV_SECTOR_SIZE;
    if (offset < 0) {
        bdrv_set_dirty_iter(s->dbi, 0);
-        offset = bdrv_dirty_iter_next(s->dbi);
-        trace_mirror_restart_iter(s, bdrv_get_dirty_count(s->dirty_bitmap));
+        offset = bdrv_dirty_iter_next(s->dbi) * BDRV_SECTOR_SIZE;
+        trace_mirror_restart_iter(s, bdrv_get_dirty_count(s->dirty_bitmap) *
+                                  BDRV_SECTOR_SIZE);
        assert(offset >= 0);
    }
    bdrv_dirty_bitmap_unlock(s->dirty_bitmap);
@@ -357,36 +362,39 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
        int64_t next_offset = offset + nb_chunks * s->granularity;
        int64_t next_chunk = next_offset / s->granularity;
        if (next_offset >= s->bdev_length ||
-            !bdrv_get_dirty_locked(source, s->dirty_bitmap, next_offset)) {
+            !bdrv_get_dirty_locked(source, s->dirty_bitmap,
+                                   next_offset >> BDRV_SECTOR_BITS)) {
            break;
        }
        if (test_bit(next_chunk, s->in_flight_bitmap)) {
            break;
        }

-        next_dirty = bdrv_dirty_iter_next(s->dbi);
+        next_dirty = bdrv_dirty_iter_next(s->dbi) * BDRV_SECTOR_SIZE;
        if (next_dirty > next_offset || next_dirty < 0) {
            /* The bitmap iterator's cache is stale, refresh it */
-            bdrv_set_dirty_iter(s->dbi, next_offset);
-            next_dirty = bdrv_dirty_iter_next(s->dbi);
+            bdrv_set_dirty_iter(s->dbi, next_offset >> BDRV_SECTOR_BITS);
+            next_dirty = bdrv_dirty_iter_next(s->dbi) * BDRV_SECTOR_SIZE;
        }
        assert(next_dirty == next_offset);
        nb_chunks++;
    }

    /* Clear dirty bits before querying the block status, because
-     * calling bdrv_block_status_above could yield - if some blocks are
+     * calling bdrv_get_block_status_above could yield - if some blocks are
     * marked dirty in this window, we need to know.
     */
-    bdrv_reset_dirty_bitmap_locked(s->dirty_bitmap, offset,
-                                   nb_chunks * s->granularity);
+    bdrv_reset_dirty_bitmap_locked(s->dirty_bitmap, offset >> BDRV_SECTOR_BITS,
+                                   nb_chunks * sectors_per_chunk);
    bdrv_dirty_bitmap_unlock(s->dirty_bitmap);

    bitmap_set(s->in_flight_bitmap, offset / s->granularity, nb_chunks);
    while (nb_chunks > 0 && offset < s->bdev_length) {
-        int ret;
-        int64_t io_bytes;
+        int64_t ret;
+        int io_sectors;
+        unsigned int io_bytes;
        int64_t io_bytes_acct;
+        BlockDriverState *file;
        enum MirrorMethod {
            MIRROR_METHOD_COPY,
            MIRROR_METHOD_ZERO,
@@ -394,9 +402,11 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
        } mirror_method = MIRROR_METHOD_COPY;

        assert(!(offset % s->granularity));
-        ret = bdrv_block_status_above(source, NULL, offset,
-                                      nb_chunks * s->granularity,
-                                      &io_bytes, NULL, NULL);
+        ret = bdrv_get_block_status_above(source, NULL,
+                                          offset >> BDRV_SECTOR_BITS,
+                                          nb_chunks * sectors_per_chunk,
+                                          &io_sectors, &file);
+        io_bytes = io_sectors * BDRV_SECTOR_SIZE;
        if (ret < 0) {
            io_bytes = MIN(nb_chunks * s->granularity, max_io_bytes);
        } else if (ret & BDRV_BLOCK_DATA) {
@@ -408,7 +418,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
            io_bytes = s->granularity;
        } else if (ret >= 0 && !(ret & BDRV_BLOCK_DATA)) {
            int64_t target_offset;
-            int64_t target_bytes;
+            unsigned int target_bytes;
            bdrv_round_to_clusters(blk_bs(s->target), offset, io_bytes,
                                   &target_offset, &target_bytes);
            if (target_offset == offset &&
@@ -598,7 +608,7 @@ static void mirror_throttle(MirrorBlockJob *s)

    if (now - s->last_pause_ns > SLICE_TIME) {
        s->last_pause_ns = now;
-        block_job_sleep_ns(&s->common, 0);
+        block_job_sleep_ns(&s->common, QEMU_CLOCK_REALTIME, 0);
    } else {
        block_job_pause_point(&s->common);
    }
@@ -606,23 +616,25 @@ static void mirror_throttle(MirrorBlockJob *s)

 static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)
 {
-    int64_t offset;
+    int64_t sector_num, end;
    BlockDriverState *base = s->base;
    BlockDriverState *bs = s->source;
    BlockDriverState *target_bs = blk_bs(s->target);
-    int ret;
+    int ret, n;
    int64_t count;

+    end = s->bdev_length / BDRV_SECTOR_SIZE;
+
    if (base == NULL && !bdrv_has_zero_init(target_bs)) {
        if (!bdrv_can_write_zeroes_with_unmap(target_bs)) {
-            bdrv_set_dirty_bitmap(s->dirty_bitmap, 0, s->bdev_length);
+            bdrv_set_dirty_bitmap(s->dirty_bitmap, 0, end);
            return 0;
        }

        s->initial_zeroing_ongoing = true;
-        for (offset = 0; offset < s->bdev_length; ) {
-            int bytes = MIN(s->bdev_length - offset,
-                            QEMU_ALIGN_DOWN(INT_MAX, s->granularity));
+        for (sector_num = 0; sector_num < end; ) {
+            int nb_sectors = MIN(end - sector_num,
+                QEMU_ALIGN_DOWN(INT_MAX, s->granularity) >> BDRV_SECTOR_BITS);

            mirror_throttle(s);

@@ -638,8 +650,9 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)
                continue;
            }

-            mirror_do_zero_or_discard(s, offset, bytes, false);
-            offset += bytes;
+            mirror_do_zero_or_discard(s, sector_num * BDRV_SECTOR_SIZE,
+                                      nb_sectors * BDRV_SECTOR_SIZE, false);
+            sector_num += nb_sectors;
        }

        mirror_wait_for_all_io(s);
@@ -647,10 +660,10 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)
    }

    /* First part, loop on the sectors and initialize the dirty bitmap.  */
-    for (offset = 0; offset < s->bdev_length; ) {
+    for (sector_num = 0; sector_num < end; ) {
        /* Just to make sure we are not exceeding int limit. */
-        int bytes = MIN(s->bdev_length - offset,
-                        QEMU_ALIGN_DOWN(INT_MAX, s->granularity));
+        int nb_sectors = MIN(INT_MAX >> BDRV_SECTOR_BITS,
+                             end - sector_num);

        mirror_throttle(s);

@@ -658,16 +671,21 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)
            return 0;
        }

-        ret = bdrv_is_allocated_above(bs, base, offset, bytes, &count);
+        ret = bdrv_is_allocated_above(bs, base, sector_num * BDRV_SECTOR_SIZE,
+                                      nb_sectors * BDRV_SECTOR_SIZE, &count);
        if (ret < 0) {
            return ret;
        }

-        assert(count);
+        /* TODO: Relax this once bdrv_is_allocated_above and dirty
+         * bitmaps no longer require sector alignment. */
+        assert(QEMU_IS_ALIGNED(count, BDRV_SECTOR_SIZE));
+        n = count >> BDRV_SECTOR_BITS;
+        assert(n > 0);
        if (ret == 1) {
-            bdrv_set_dirty_bitmap(s->dirty_bitmap, offset, count);
+            bdrv_set_dirty_bitmap(s->dirty_bitmap, sector_num, n);
        }
-        offset += count;
+        sector_num += n;
    }
    return 0;
 }
@@ -778,7 +796,7 @@ static void coroutine_fn mirror_run(void *opaque)
    }

    assert(!s->dbi);
-    s->dbi = bdrv_dirty_iter_new(s->dirty_bitmap);
+    s->dbi = bdrv_dirty_iter_new(s->dirty_bitmap, 0);
    for (;;) {
        uint64_t delay_ns = 0;
        int64_t cnt, delta;
@@ -793,10 +811,11 @@ static void coroutine_fn mirror_run(void *opaque)

        cnt = bdrv_get_dirty_count(s->dirty_bitmap);
        /* s->common.offset contains the number of bytes already processed so
-         * far, cnt is the number of dirty bytes remaining and
+         * far, cnt is the number of dirty sectors remaining and
         * s->bytes_in_flight is the number of bytes currently being
         * processed; together those are the current total operation length */
-        s->common.len = s->common.offset + s->bytes_in_flight + cnt;
+        s->common.len = s->common.offset + s->bytes_in_flight +
+            cnt * BDRV_SECTOR_SIZE;

        /* Note that even when no rate limit is applied we need to yield
         * periodically with no pending I/O so that bdrv_drain_all() returns.
@@ -808,7 +827,8 @@ static void coroutine_fn mirror_run(void *opaque)
            s->common.iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
            if (s->in_flight >= MAX_IN_FLIGHT || s->buf_free_count == 0 ||
                (cnt == 0 && s->in_flight > 0)) {
-                trace_mirror_yield(s, cnt, s->buf_free_count, s->in_flight);
+                trace_mirror_yield(s, cnt * BDRV_SECTOR_SIZE,
+                                   s->buf_free_count, s->in_flight);
                mirror_wait_for_io(s);
                continue;
            } else if (cnt != 0) {
@@ -849,7 +869,7 @@ static void coroutine_fn mirror_run(void *opaque)
             * whether to switch to target check one last time if I/O has
             * come in the meanwhile, and if not flush the data to disk.
             */
-            trace_mirror_before_drain(s, cnt);
+            trace_mirror_before_drain(s, cnt * BDRV_SECTOR_SIZE);

            bdrv_drained_begin(bs);
            cnt = bdrv_get_dirty_count(s->dirty_bitmap);
@@ -868,15 +888,16 @@ static void coroutine_fn mirror_run(void *opaque)
        }

        ret = 0;
-        trace_mirror_before_sleep(s, cnt, s->synced, delay_ns);
+        trace_mirror_before_sleep(s, cnt * BDRV_SECTOR_SIZE,
+                                  s->synced, delay_ns);
        if (!s->synced) {
-            block_job_sleep_ns(&s->common, delay_ns);
+            block_job_sleep_ns(&s->common, QEMU_CLOCK_REALTIME, delay_ns);
            if (block_job_is_cancelled(&s->common)) {
                break;
            }
        } else if (!should_complete) {
            delay_ns = (s->in_flight == 0 && cnt == 0 ? SLICE_TIME : 0);
-            block_job_sleep_ns(&s->common, delay_ns);
+            block_job_sleep_ns(&s->common, QEMU_CLOCK_REALTIME, delay_ns);
        }
        s->last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
    }
@@ -1042,6 +1063,16 @@ static int coroutine_fn bdrv_mirror_top_flush(BlockDriverState *bs)
    return bdrv_co_flush(bs->backing->bs);
 }

+static int64_t coroutine_fn bdrv_mirror_top_get_block_status(
+    BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum,
+    BlockDriverState **file)
+{
+    *pnum = nb_sectors;
+    *file = bs->backing->bs;
+    return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID |
+           (sector_num << BDRV_SECTOR_BITS);
+}
+
 static int coroutine_fn bdrv_mirror_top_pwrite_zeroes(BlockDriverState *bs,
    int64_t offset, int bytes, BdrvRequestFlags flags)
 {
@@ -1056,11 +1087,6 @@ static int coroutine_fn bdrv_mirror_top_pdiscard(BlockDriverState *bs,

 static void bdrv_mirror_top_refresh_filename(BlockDriverState *bs, QDict *opts)
 {
-    if (bs->backing == NULL) {
-        /* we can be here after failed bdrv_attach_child in
-         * bdrv_set_backing_hd */
-        return;
-    }
    bdrv_refresh_filename(bs->backing->bs);
    pstrcpy(bs->exact_filename, sizeof(bs->exact_filename),
            bs->backing->bs->filename);
@@ -1072,7 +1098,6 @@ static void bdrv_mirror_top_close(BlockDriverState *bs)

 static void bdrv_mirror_top_child_perm(BlockDriverState *bs, BdrvChild *c,
                                       const BdrvChildRole *role,
-                                       BlockReopenQueue *reopen_queue,
                                       uint64_t perm, uint64_t shared,
                                       uint64_t *nperm, uint64_t *nshared)
 {
@@ -1094,7 +1119,7 @@ static BlockDriver bdrv_mirror_top = {
    .bdrv_co_pwrite_zeroes      = bdrv_mirror_top_pwrite_zeroes,
    .bdrv_co_pdiscard           = bdrv_mirror_top_pdiscard,
    .bdrv_co_flush              = bdrv_mirror_top_flush,
-    .bdrv_co_get_block_status   = bdrv_co_get_block_status_from_backing,
+    .bdrv_co_get_block_status   = bdrv_mirror_top_get_block_status,
    .bdrv_refresh_filename      = bdrv_mirror_top_refresh_filename,
    .bdrv_close                 = bdrv_mirror_top_close,
    .bdrv_child_perm            = bdrv_mirror_top_child_perm,
@@ -1127,7 +1152,9 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs,
        granularity = bdrv_get_default_bitmap_granularity(target);
    }

-    assert(is_power_of_2(granularity));
+    assert ((granularity & (granularity - 1)) == 0);
+    /* Granularity must be large enough for sector-based dirty bitmap */
+    assert(granularity >= BDRV_SECTOR_SIZE);

    if (buf_size < 0) {
        error_setg(errp, "Invalid parameter 'buf-size'");
--- a/block/nbd-client.c
+++ b/block/nbd-client.c
@@ -31,10 +31,10 @@
 #include "qapi/error.h"
 #include "nbd-client.h"

-#define HANDLE_TO_INDEX(bs, handle) ((handle) ^ (uint64_t)(intptr_t)(bs))
-#define INDEX_TO_HANDLE(bs, index)  ((index)  ^ (uint64_t)(intptr_t)(bs))
+#define HANDLE_TO_INDEX(bs, handle) ((handle) ^ ((uint64_t)(intptr_t)bs))
+#define INDEX_TO_HANDLE(bs, index)  ((index)  ^ ((uint64_t)(intptr_t)bs))

-static void nbd_recv_coroutines_wake_all(NBDClientSession *s)
+static void nbd_recv_coroutines_enter_all(NBDClientSession *s)
 {
    int i;

@@ -78,7 +78,7 @@ static coroutine_fn void nbd_read_reply_entry(void *opaque)
    while (!s->quit) {
        assert(s->reply.handle == 0);
        ret = nbd_receive_reply(s->ioc, &s->reply, &local_err);
-        if (local_err) {
+        if (ret < 0) {
            error_report_err(local_err);
        }
        if (ret <= 0) {
@@ -92,9 +92,7 @@ static coroutine_fn void nbd_read_reply_entry(void *opaque)
        i = HANDLE_TO_INDEX(s, s->reply.handle);
        if (i >= MAX_NBD_REQUESTS ||
            !s->requests[i].coroutine ||
-            !s->requests[i].receiving ||
-            (nbd_reply_is_structured(&s->reply) && !s->info.structured_reply))
-        {
+            !s->requests[i].receiving) {
            break;
        }

@@ -114,7 +112,7 @@ static coroutine_fn void nbd_read_reply_entry(void *opaque)
    }

    s->quit = true;
-    nbd_recv_coroutines_wake_all(s);
+    nbd_recv_coroutines_enter_all(s);
    s->read_reply_co = NULL;
 }

@@ -123,7 +121,7 @@ static int nbd_co_send_request(BlockDriverState *bs,
                               QEMUIOVector *qiov)
 {
    NBDClientSession *s = nbd_get_client_session(bs);
-    int rc, i;
+    int rc, ret, i;

    qemu_co_mutex_lock(&s->send_mutex);
    while (s->in_flight == MAX_NBD_REQUESTS) {
@@ -141,7 +139,6 @@ static int nbd_co_send_request(BlockDriverState *bs,
    assert(i < MAX_NBD_REQUESTS);

    s->requests[i].coroutine = qemu_coroutine_self();
-    s->requests[i].offset = request->from;
    s->requests[i].receiving = false;

    request->handle = INDEX_TO_HANDLE(s, i);
@@ -159,12 +156,11 @@ static int nbd_co_send_request(BlockDriverState *bs,
        qio_channel_set_cork(s->ioc, true);
        rc = nbd_send_request(s->ioc, request);
        if (rc >= 0 && !s->quit) {
-            if (qio_channel_writev_all(s->ioc, qiov->iov, qiov->niov,
-                                       NULL) < 0) {
+            ret = nbd_rwv(s->ioc, qiov->iov, qiov->niov, request->len, false,
+                          NULL);
+            if (ret != request->len) {
                rc = -EIO;
            }
-        } else if (rc >= 0) {
-            rc = -EIO;
        }
        qio_channel_set_cork(s->ioc, false);
    } else {
@@ -182,519 +178,70 @@ err:
    return rc;
 }

-static inline uint16_t payload_advance16(uint8_t **payload)
+static void nbd_co_receive_reply(NBDClientSession *s,
+                                 NBDRequest *request,
+                                 NBDReply *reply,
+                                 QEMUIOVector *qiov)
 {
-    *payload += 2;
-    return lduw_be_p(*payload - 2);
-}
-
-static inline uint32_t payload_advance32(uint8_t **payload)
-{
-    *payload += 4;
-    return ldl_be_p(*payload - 4);
-}
-
-static inline uint64_t payload_advance64(uint8_t **payload)
-{
-    *payload += 8;
-    return ldq_be_p(*payload - 8);
-}
-
-static int nbd_parse_offset_hole_payload(NBDStructuredReplyChunk *chunk,
-                                         uint8_t *payload, uint64_t orig_offset,
-                                         QEMUIOVector *qiov, Error **errp)
-{
-    uint64_t offset;
-    uint32_t hole_size;
-
-    if (chunk->length != sizeof(offset) + sizeof(hole_size)) {
-        error_setg(errp, "Protocol error: invalid payload for "
-                         "NBD_REPLY_TYPE_OFFSET_HOLE");
-        return -EINVAL;
-    }
-
-    offset = payload_advance64(&payload);
-    hole_size = payload_advance32(&payload);
-
-    if (!hole_size || offset < orig_offset || hole_size > qiov->size ||
-        offset > orig_offset + qiov->size - hole_size) {
-        error_setg(errp, "Protocol error: server sent chunk exceeding requested"
-                         " region");
-        return -EINVAL;
-    }
-
-    qemu_iovec_memset(qiov, offset - orig_offset, 0, hole_size);
-
-    return 0;
-}
-
-/* nbd_parse_error_payload
- * on success @errp contains message describing nbd error reply
- */
-static int nbd_parse_error_payload(NBDStructuredReplyChunk *chunk,
-                                   uint8_t *payload, int *request_ret,
-                                   Error **errp)
-{
-    uint32_t error;
-    uint16_t message_size;
-
-    assert(chunk->type & (1 << 15));
-
-    if (chunk->length < sizeof(error) + sizeof(message_size)) {
-        error_setg(errp,
-                   "Protocol error: invalid payload for structured error");
-        return -EINVAL;
-    }
-
-    error = nbd_errno_to_system_errno(payload_advance32(&payload));
-    if (error == 0) {
-        error_setg(errp, "Protocol error: server sent structured error chunk "
-                         "with error = 0");
-        return -EINVAL;
-    }
-
-    *request_ret = -error;
-    message_size = payload_advance16(&payload);
-
-    if (message_size > chunk->length - sizeof(error) - sizeof(message_size)) {
-        error_setg(errp, "Protocol error: server sent structured error chunk "
-                         "with incorrect message size");
-        return -EINVAL;
-    }
-
-    /* TODO: Add a trace point to mention the server complaint */
-
-    /* TODO handle ERROR_OFFSET */
-
-    return 0;
-}
-
-static int nbd_co_receive_offset_data_payload(NBDClientSession *s,
-                                              uint64_t orig_offset,
-                                              QEMUIOVector *qiov, Error **errp)
-{
-    QEMUIOVector sub_qiov;
-    uint64_t offset;
-    size_t data_size;
+    int i = HANDLE_TO_INDEX(s, request->handle);
    int ret;
-    NBDStructuredReplyChunk *chunk = &s->reply.structured;
-
-    assert(nbd_reply_is_structured(&s->reply));
-
-    /* The NBD spec requires at least one byte of payload */
-    if (chunk->length <= sizeof(offset)) {
-        error_setg(errp, "Protocol error: invalid payload for "
-                         "NBD_REPLY_TYPE_OFFSET_DATA");
-        return -EINVAL;
-    }
-
-    if (nbd_read(s->ioc, &offset, sizeof(offset), errp) < 0) {
-        return -EIO;
-    }
-    be64_to_cpus(&offset);
-
-    data_size = chunk->length - sizeof(offset);
-    assert(data_size);
-    if (offset < orig_offset || data_size > qiov->size ||
-        offset > orig_offset + qiov->size - data_size) {
-        error_setg(errp, "Protocol error: server sent chunk exceeding requested"
-                         " region");
-        return -EINVAL;
-    }
-
-    qemu_iovec_init(&sub_qiov, qiov->niov);
-    qemu_iovec_concat(&sub_qiov, qiov, offset - orig_offset, data_size);
-    ret = qio_channel_readv_all(s->ioc, sub_qiov.iov, sub_qiov.niov, errp);
-    qemu_iovec_destroy(&sub_qiov);
-
-    return ret < 0 ? -EIO : 0;
-}
-
-#define NBD_MAX_MALLOC_PAYLOAD 1000
-/* nbd_co_receive_structured_payload
- */
-static coroutine_fn int nbd_co_receive_structured_payload(
-        NBDClientSession *s, void **payload, Error **errp)
-{
-    int ret;
-    uint32_t len;
-
-    assert(nbd_reply_is_structured(&s->reply));
-
-    len = s->reply.structured.length;
-
-    if (len == 0) {
-        return 0;
-    }
-
-    if (payload == NULL) {
-        error_setg(errp, "Unexpected structured payload");
-        return -EINVAL;
-    }
-
-    if (len > NBD_MAX_MALLOC_PAYLOAD) {
-        error_setg(errp, "Payload too large");
-        return -EINVAL;
-    }
-
-    *payload = g_new(char, len);
-    ret = nbd_read(s->ioc, *payload, len, errp);
-    if (ret < 0) {
-        g_free(*payload);
-        *payload = NULL;
-        return ret;
-    }
-
-    return 0;
-}
-
-/* nbd_co_do_receive_one_chunk
- * for simple reply:
- *   set request_ret to received reply error
- *   if qiov is not NULL: read payload to @qiov
- * for structured reply chunk:
- *   if error chunk: read payload, set @request_ret, do not set @payload
- *   else if offset_data chunk: read payload data to @qiov, do not set @payload
- *   else: read payload to @payload
- *
- * If function fails, @errp contains corresponding error message, and the
- * connection with the server is suspect.  If it returns 0, then the
- * transaction succeeded (although @request_ret may be a negative errno
- * corresponding to the server's error reply), and errp is unchanged.
- */
-static coroutine_fn int nbd_co_do_receive_one_chunk(
-        NBDClientSession *s, uint64_t handle, bool only_structured,
-        int *request_ret, QEMUIOVector *qiov, void **payload, Error **errp)
-{
-    int ret;
-    int i = HANDLE_TO_INDEX(s, handle);
-    void *local_payload = NULL;
-    NBDStructuredReplyChunk *chunk;
-
-    if (payload) {
-        *payload = NULL;
-    }
-    *request_ret = 0;

    /* Wait until we're woken up by nbd_read_reply_entry.  */
    s->requests[i].receiving = true;
    qemu_coroutine_yield();
    s->requests[i].receiving = false;
-    if (!s->ioc || s->quit) {
-        error_setg(errp, "Connection closed");
-        return -EIO;
-    }
-
-    assert(s->reply.handle == handle);
-
-    if (nbd_reply_is_simple(&s->reply)) {
-        if (only_structured) {
-            error_setg(errp, "Protocol error: simple reply when structured "
-                             "reply chunk was expected");
-            return -EINVAL;
-        }
-
-        *request_ret = -nbd_errno_to_system_errno(s->reply.simple.error);
-        if (*request_ret < 0 || !qiov) {
-            return 0;
-        }
-
-        return qio_channel_readv_all(s->ioc, qiov->iov, qiov->niov,
-                                     errp) < 0 ? -EIO : 0;
-    }
-
-    /* handle structured reply chunk */
-    assert(s->info.structured_reply);
-    chunk = &s->reply.structured;
-
-    if (chunk->type == NBD_REPLY_TYPE_NONE) {
-        if (!(chunk->flags & NBD_REPLY_FLAG_DONE)) {
-            error_setg(errp, "Protocol error: NBD_REPLY_TYPE_NONE chunk without"
-                       " NBD_REPLY_FLAG_DONE flag set");
-            return -EINVAL;
-        }
-        if (chunk->length) {
-            error_setg(errp, "Protocol error: NBD_REPLY_TYPE_NONE chunk with"
-                       " nonzero length");
-            return -EINVAL;
-        }
-        return 0;
-    }
-
-    if (chunk->type == NBD_REPLY_TYPE_OFFSET_DATA) {
-        if (!qiov) {
-            error_setg(errp, "Unexpected NBD_REPLY_TYPE_OFFSET_DATA chunk");
-            return -EINVAL;
-        }
-
-        return nbd_co_receive_offset_data_payload(s, s->requests[i].offset,
-                                                  qiov, errp);
-    }
-
-    if (nbd_reply_type_is_error(chunk->type)) {
-        payload = &local_payload;
-    }
-
-    ret = nbd_co_receive_structured_payload(s, payload, errp);
-    if (ret < 0) {
-        return ret;
-    }
-
-    if (nbd_reply_type_is_error(chunk->type)) {
-        ret = nbd_parse_error_payload(chunk, local_payload, request_ret, errp);
-        g_free(local_payload);
-        return ret;
-    }
-
-    return 0;
-}
-
-/* nbd_co_receive_one_chunk
- * Read reply, wake up read_reply_co and set s->quit if needed.
- * Return value is a fatal error code or normal nbd reply error code
- */
-static coroutine_fn int nbd_co_receive_one_chunk(
-        NBDClientSession *s, uint64_t handle, bool only_structured,
-        QEMUIOVector *qiov, NBDReply *reply, void **payload, Error **errp)
-{
-    int request_ret;
-    int ret = nbd_co_do_receive_one_chunk(s, handle, only_structured,
-                                          &request_ret, qiov, payload, errp);
-
-    if (ret < 0) {
-        s->quit = true;
+    *reply = s->reply;
+    if (reply->handle != request->handle || !s->ioc || s->quit) {
+        reply->error = EIO;
    } else {
-        /* For assert at loop start in nbd_read_reply_entry */
-        if (reply) {
-            *reply = s->reply;
+        if (qiov && reply->error == 0) {
+            ret = nbd_rwv(s->ioc, qiov->iov, qiov->niov, request->len, true,
+                          NULL);
+            if (ret != request->len) {
+                reply->error = EIO;
+                s->quit = true;
+            }
        }
+
+        /* Tell the read handler to read another header.  */
        s->reply.handle = 0;
-        ret = request_ret;
    }

+    s->requests[i].coroutine = NULL;
+
+    /* Kick the read_reply_co to get the next reply.  */
    if (s->read_reply_co) {
        aio_co_wake(s->read_reply_co);
    }

-    return ret;
-}
-
-typedef struct NBDReplyChunkIter {
-    int ret;
-    Error *err;
-    bool done, only_structured;
-} NBDReplyChunkIter;
-
-static void nbd_iter_error(NBDReplyChunkIter *iter, bool fatal,
-                           int ret, Error **local_err)
-{
-    assert(ret < 0);
-
-    if (fatal || iter->ret == 0) {
-        if (iter->ret != 0) {
-            error_free(iter->err);
-            iter->err = NULL;
-        }
-        iter->ret = ret;
-        error_propagate(&iter->err, *local_err);
-    } else {
-        error_free(*local_err);
-    }
-
-    *local_err = NULL;
-}
-
-/* NBD_FOREACH_REPLY_CHUNK
- */
-#define NBD_FOREACH_REPLY_CHUNK(s, iter, handle, structured, \
-                                qiov, reply, payload) \
-    for (iter = (NBDReplyChunkIter) { .only_structured = structured }; \
-         nbd_reply_chunk_iter_receive(s, &iter, handle, qiov, reply, payload);)
-
-/* nbd_reply_chunk_iter_receive
- */
-static bool nbd_reply_chunk_iter_receive(NBDClientSession *s,
-                                         NBDReplyChunkIter *iter,
-                                         uint64_t handle,
-                                         QEMUIOVector *qiov, NBDReply *reply,
-                                         void **payload)
-{
-    int ret;
-    NBDReply local_reply;
-    NBDStructuredReplyChunk *chunk;
-    Error *local_err = NULL;
-    if (s->quit) {
-        error_setg(&local_err, "Connection closed");
-        nbd_iter_error(iter, true, -EIO, &local_err);
-        goto break_loop;
-    }
-
-    if (iter->done) {
-        /* Previous iteration was last. */
-        goto break_loop;
-    }
-
-    if (reply == NULL) {
-        reply = &local_reply;
-    }
-
-    ret = nbd_co_receive_one_chunk(s, handle, iter->only_structured,
-                                   qiov, reply, payload, &local_err);
-    if (ret < 0) {
-        /* If it is a fatal error s->quit is set by nbd_co_receive_one_chunk */
-        nbd_iter_error(iter, s->quit, ret, &local_err);
-    }
-
-    /* Do not execute the body of NBD_FOREACH_REPLY_CHUNK for simple reply. */
-    if (nbd_reply_is_simple(&s->reply) || s->quit) {
-        goto break_loop;
-    }
-
-    chunk = &reply->structured;
-    iter->only_structured = true;
-
-    if (chunk->type == NBD_REPLY_TYPE_NONE) {
-        /* NBD_REPLY_FLAG_DONE is already checked in nbd_co_receive_one_chunk */
-        assert(chunk->flags & NBD_REPLY_FLAG_DONE);
-        goto break_loop;
-    }
-
-    if (chunk->flags & NBD_REPLY_FLAG_DONE) {
-        /* This iteration is last. */
-        iter->done = true;
-    }
-
-    /* Execute the loop body */
-    return true;
-
-break_loop:
-    s->requests[HANDLE_TO_INDEX(s, handle)].coroutine = NULL;
-
    qemu_co_mutex_lock(&s->send_mutex);
    s->in_flight--;
    qemu_co_queue_next(&s->free_sema);
    qemu_co_mutex_unlock(&s->send_mutex);
-
-    return false;
-}
-
-static int nbd_co_receive_return_code(NBDClientSession *s, uint64_t handle,
-                                      Error **errp)
-{
-    NBDReplyChunkIter iter;
-
-    NBD_FOREACH_REPLY_CHUNK(s, iter, handle, false, NULL, NULL, NULL) {
-        /* nbd_reply_chunk_iter_receive does all the work */
-    }
-
-    error_propagate(errp, iter.err);
-    return iter.ret;
-}
-
-static int nbd_co_receive_cmdread_reply(NBDClientSession *s, uint64_t handle,
-                                        uint64_t offset, QEMUIOVector *qiov,
-                                        Error **errp)
-{
-    NBDReplyChunkIter iter;
-    NBDReply reply;
-    void *payload = NULL;
-    Error *local_err = NULL;
-
-    NBD_FOREACH_REPLY_CHUNK(s, iter, handle, s->info.structured_reply,
-                            qiov, &reply, &payload)
-    {
-        int ret;
-        NBDStructuredReplyChunk *chunk = &reply.structured;
-
-        assert(nbd_reply_is_structured(&reply));
-
-        switch (chunk->type) {
-        case NBD_REPLY_TYPE_OFFSET_DATA:
-            /* special cased in nbd_co_receive_one_chunk, data is already
-             * in qiov */
-            break;
-        case NBD_REPLY_TYPE_OFFSET_HOLE:
-            ret = nbd_parse_offset_hole_payload(&reply.structured, payload,
-                                                offset, qiov, &local_err);
-            if (ret < 0) {
-                s->quit = true;
-                nbd_iter_error(&iter, true, ret, &local_err);
-            }
-            break;
-        default:
-            if (!nbd_reply_type_is_error(chunk->type)) {
-                /* not allowed reply type */
-                s->quit = true;
-                error_setg(&local_err,
-                           "Unexpected reply type: %d (%s) for CMD_READ",
-                           chunk->type, nbd_reply_type_lookup(chunk->type));
-                nbd_iter_error(&iter, true, -EINVAL, &local_err);
-            }
-        }
-
-        g_free(payload);
-        payload = NULL;
-    }
-
-    error_propagate(errp, iter.err);
-    return iter.ret;
-}
-
-static int nbd_co_request(BlockDriverState *bs, NBDRequest *request,
-                          QEMUIOVector *write_qiov)
-{
-    int ret;
-    Error *local_err = NULL;
-    NBDClientSession *client = nbd_get_client_session(bs);
-
-    assert(request->type != NBD_CMD_READ);
-    if (write_qiov) {
-        assert(request->type == NBD_CMD_WRITE);
-        assert(request->len == iov_size(write_qiov->iov, write_qiov->niov));
-    } else {
-        assert(request->type != NBD_CMD_WRITE);
-    }
-    ret = nbd_co_send_request(bs, request, write_qiov);
-    if (ret < 0) {
-        return ret;
-    }
-
-    ret = nbd_co_receive_return_code(client, request->handle, &local_err);
-    if (local_err) {
-        error_report_err(local_err);
-    }
-    return ret;
 }

 int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset,
                         uint64_t bytes, QEMUIOVector *qiov, int flags)
 {
-    int ret;
-    Error *local_err = NULL;
    NBDClientSession *client = nbd_get_client_session(bs);
    NBDRequest request = {
        .type = NBD_CMD_READ,
        .from = offset,
        .len = bytes,
    };
+    NBDReply reply;
+    ssize_t ret;

    assert(bytes <= NBD_MAX_BUFFER_SIZE);
    assert(!flags);

-    if (!bytes) {
-        return 0;
-    }
    ret = nbd_co_send_request(bs, &request, NULL);
    if (ret < 0) {
-        return ret;
+        reply.error = -ret;
+    } else {
+        nbd_co_receive_reply(client, &request, &reply, qiov);
    }
-
-    ret = nbd_co_receive_cmdread_reply(client, request.handle, offset, qiov,
-                                       &local_err);
-    if (local_err) {
-        error_report_err(local_err);
-    }
-    return ret;
+    return -reply.error;
 }

 int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset,
@@ -706,6 +253,8 @@ int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset,
        .from = offset,
        .len = bytes,
    };
+    NBDReply reply;
+    ssize_t ret;

    assert(!(client->info.flags & NBD_FLAG_READ_ONLY));
    if (flags & BDRV_REQ_FUA) {
@@ -715,21 +264,26 @@ int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset,

    assert(bytes <= NBD_MAX_BUFFER_SIZE);

-    if (!bytes) {
-        return 0;
+    ret = nbd_co_send_request(bs, &request, qiov);
+    if (ret < 0) {
+        reply.error = -ret;
+    } else {
+        nbd_co_receive_reply(client, &request, &reply, NULL);
    }
-    return nbd_co_request(bs, &request, qiov);
+    return -reply.error;
 }

 int nbd_client_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
                                int bytes, BdrvRequestFlags flags)
 {
+    ssize_t ret;
    NBDClientSession *client = nbd_get_client_session(bs);
    NBDRequest request = {
        .type = NBD_CMD_WRITE_ZEROES,
        .from = offset,
        .len = bytes,
    };
+    NBDReply reply;

    assert(!(client->info.flags & NBD_FLAG_READ_ONLY));
    if (!(client->info.flags & NBD_FLAG_SEND_WRITE_ZEROES)) {
@@ -744,16 +298,21 @@ int nbd_client_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
        request.flags |= NBD_CMD_FLAG_NO_HOLE;
    }

-    if (!bytes) {
-        return 0;
+    ret = nbd_co_send_request(bs, &request, NULL);
+    if (ret < 0) {
+        reply.error = -ret;
+    } else {
+        nbd_co_receive_reply(client, &request, &reply, NULL);
    }
-    return nbd_co_request(bs, &request, NULL);
+    return -reply.error;
 }

 int nbd_client_co_flush(BlockDriverState *bs)
 {
    NBDClientSession *client = nbd_get_client_session(bs);
    NBDRequest request = { .type = NBD_CMD_FLUSH };
+    NBDReply reply;
+    ssize_t ret;

    if (!(client->info.flags & NBD_FLAG_SEND_FLUSH)) {
        return 0;
@@ -762,7 +321,13 @@ int nbd_client_co_flush(BlockDriverState *bs)
    request.from = 0;
    request.len = 0;

-    return nbd_co_request(bs, &request, NULL);
+    ret = nbd_co_send_request(bs, &request, NULL);
+    if (ret < 0) {
+        reply.error = -ret;
+    } else {
+        nbd_co_receive_reply(client, &request, &reply, NULL);
+    }
+    return -reply.error;
 }

 int nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset, int bytes)
@@ -773,13 +338,22 @@ int nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset, int bytes)
        .from = offset,
        .len = bytes,
    };
+    NBDReply reply;
+    ssize_t ret;

    assert(!(client->info.flags & NBD_FLAG_READ_ONLY));
-    if (!(client->info.flags & NBD_FLAG_SEND_TRIM) || !bytes) {
+    if (!(client->info.flags & NBD_FLAG_SEND_TRIM)) {
        return 0;
    }

-    return nbd_co_request(bs, &request, NULL);
+    ret = nbd_co_send_request(bs, &request, NULL);
+    if (ret < 0) {
+        reply.error = -ret;
+    } else {
+        nbd_co_receive_reply(client, &request, &reply, NULL);
+    }
+    return -reply.error;
+
 }

 void nbd_client_detach_aio_context(BlockDriverState *bs)
@@ -825,7 +399,6 @@ int nbd_client_init(BlockDriverState *bs,
    qio_channel_set_blocking(QIO_CHANNEL(sioc), true, NULL);

    client->info.request_sizes = true;
-    client->info.structured_reply = true;
    ret = nbd_receive_negotiate(QIO_CHANNEL(sioc), export,
                                tlscreds, hostname,
                                &client->ioc, &client->info, errp);
@@ -846,6 +419,9 @@ int nbd_client_init(BlockDriverState *bs,
    if (client->info.flags & NBD_FLAG_SEND_WRITE_ZEROES) {
        bs->supported_zero_flags |= BDRV_REQ_MAY_UNMAP;
    }
+    if (client->info.min_block > bs->bl.request_alignment) {
+        bs->bl.request_alignment = client->info.min_block;
+    }

    qemu_co_mutex_init(&client->send_mutex);
    qemu_co_queue_init(&client->free_sema);
--- a/block/nbd-client.h
+++ b/block/nbd-client.h
@@ -19,7 +19,6 @@

 typedef struct {
    Coroutine *coroutine;
-    uint64_t offset;        /* original offset of the request */
    bool receiving;         /* waiting for read_reply_co? */
 } NBDClientRequest;

--- a/block/nbd.c
+++ b/block/nbd.c
@@ -388,7 +388,6 @@ static QemuOptsList nbd_runtime_opts = {
            .type = QEMU_OPT_STRING,
            .help = "ID of the TLS credentials to use",
        },
-        { /* end of list */ }
    },
 };

@@ -474,10 +473,8 @@ static int nbd_co_flush(BlockDriverState *bs)
 static void nbd_refresh_limits(BlockDriverState *bs, Error **errp)
 {
    NBDClientSession *s = nbd_get_client_session(bs);
-    uint32_t min = s->info.min_block;
    uint32_t max = MIN_NON_ZERO(NBD_MAX_BUFFER_SIZE, s->info.max_block);

-    bs->bl.request_alignment = min ? min : BDRV_SECTOR_SIZE;
    bs->bl.max_pdiscard = max;
    bs->bl.max_pwrite_zeroes = max;
    bs->bl.max_transfer = max;
--- a/block/nfs.c
+++ b/block/nfs.c
@@ -771,7 +771,7 @@ static int nfs_file_truncate(BlockDriverState *bs, int64_t offset,

    if (prealloc != PREALLOC_MODE_OFF) {
        error_setg(errp, "Unsupported preallocation mode '%s'",
-                   PreallocMode_str(prealloc));
+                   PreallocMode_lookup[prealloc]);
        return -ENOTSUP;
    }

--- a/block/parallels.c
+++ b/block/parallels.c
@@ -35,7 +35,7 @@
 #include "qemu/module.h"
 #include "qemu/bswap.h"
 #include "qemu/bitmap.h"
-#include "migration/blocker.h"
+#include "qapi/util.h"

 /**************************************************************/

@@ -69,14 +69,13 @@ typedef enum ParallelsPreallocMode {
    PRL_PREALLOC_MODE__MAX = 2,
 } ParallelsPreallocMode;

-static QEnumLookup prealloc_mode_lookup = {
-    .array = (const char *const[]) {
-        "falloc",
-        "truncate",
-    },
-    .size = PRL_PREALLOC_MODE__MAX
+static const char *prealloc_mode_lookup[] = {
+    "falloc",
+    "truncate",
+    NULL,
 };

+
 typedef struct BDRVParallelsState {
    /** Locking is conservative, the lock protects
     *   - image file extending (truncate, fallocate)
@@ -101,7 +100,6 @@ typedef struct BDRVParallelsState {
    unsigned int tracks;

    unsigned int off_multiplier;
-    Error *migration_blocker;
 } BDRVParallelsState;


@@ -698,9 +696,8 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
        qemu_opt_get_size_del(opts, PARALLELS_OPT_PREALLOC_SIZE, 0);
    s->prealloc_size = MAX(s->tracks, s->prealloc_size >> BDRV_SECTOR_BITS);
    buf = qemu_opt_get_del(opts, PARALLELS_OPT_PREALLOC_MODE);
-    s->prealloc_mode = qapi_enum_parse(&prealloc_mode_lookup, buf,
-                                       PRL_PREALLOC_MODE_FALLOCATE,
-                                       &local_err);
+    s->prealloc_mode = qapi_enum_parse(prealloc_mode_lookup, buf,
+            PRL_PREALLOC_MODE__MAX, PRL_PREALLOC_MODE_FALLOCATE, &local_err);
    g_free(buf);
    if (local_err != NULL) {
        goto fail_options;
@@ -710,7 +707,7 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
        s->prealloc_mode = PRL_PREALLOC_MODE_FALLOCATE;
    }

-    if ((flags & BDRV_O_RDWR) && !(flags & BDRV_O_INACTIVE)) {
+    if (flags & BDRV_O_RDWR) {
        s->header->inuse = cpu_to_le32(HEADER_INUSE_MAGIC);
        ret = parallels_update_header(bs);
        if (ret < 0) {
@@ -722,16 +719,6 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
    s->bat_dirty_bmap =
        bitmap_new(DIV_ROUND_UP(s->header_size, s->bat_dirty_block));

-    /* Disable migration until bdrv_invalidate_cache method is added */
-    error_setg(&s->migration_blocker, "The Parallels format used by node '%s' "
-               "does not support live migration",
-               bdrv_get_device_or_node_name(bs));
-    ret = migrate_add_blocker(s->migration_blocker, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        error_free(s->migration_blocker);
-        goto fail;
-    }
    qemu_co_mutex_init(&s->lock);
    return 0;

@@ -753,18 +740,18 @@ static void parallels_close(BlockDriverState *bs)
 {
    BDRVParallelsState *s = bs->opaque;

-    if ((bs->open_flags & BDRV_O_RDWR) && !(bs->open_flags & BDRV_O_INACTIVE)) {
+    if (bs->open_flags & BDRV_O_RDWR) {
        s->header->inuse = 0;
        parallels_update_header(bs);
+    }
+
+    if (bs->open_flags & BDRV_O_RDWR) {
        bdrv_truncate(bs->file, s->data_end << BDRV_SECTOR_BITS,
                      PREALLOC_MODE_OFF, NULL);
    }

    g_free(s->bat_dirty_bmap);
    qemu_vfree(s->header);
-
-    migrate_del_blocker(s->migration_blocker);
-    error_free(s->migration_blocker);
 }

 static QemuOptsList parallels_create_opts = {
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -39,14 +39,8 @@ BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk,
 {
    ImageInfo **p_image_info;
    BlockDriverState *bs0;
-    BlockDeviceInfo *info;
+    BlockDeviceInfo *info = g_malloc0(sizeof(*info));

-    if (!bs->drv) {
-        error_setg(errp, "Block device %s is ejected", bs->node_name);
-        return NULL;
-    }
-
-    info = g_malloc0(sizeof(*info));
    info->file                   = g_strdup(bs->filename);
    info->ro                     = bs->read_only;
    info->drv                    = g_strdup(bs->drv->format_name);
@@ -72,11 +66,10 @@ BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk,

    info->detect_zeroes = bs->detect_zeroes;

-    if (blk && blk_get_public(blk)->throttle_group_member.throttle_state) {
+    if (blk && blk_get_public(blk)->throttle_state) {
        ThrottleConfig cfg;
-        BlockBackendPublic *blkp = blk_get_public(blk);

-        throttle_group_get_config(&blkp->throttle_group_member, &cfg);
+        throttle_group_get_config(blk, &cfg);

        info->bps     = cfg.buckets[THROTTLE_BPS_TOTAL].avg;
        info->bps_rd  = cfg.buckets[THROTTLE_BPS_READ].avg;
@@ -124,8 +117,7 @@ BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk,
        info->iops_size = cfg.op_size;

        info->has_group = true;
-        info->group =
-            g_strdup(throttle_group_get_name(&blkp->throttle_group_member));
+        info->group = g_strdup(throttle_group_get_name(blk));
    }

    info->write_threshold = bdrv_write_threshold_get(bs);
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -347,22 +347,19 @@ static int qcow_reopen_prepare(BDRVReopenState *state,
 * 'compressed_size'. 'compressed_size' must be > 0 and <
 * cluster_size
 *
- * return 0 if not allocated, 1 if *result is assigned, and negative
- * errno on failure.
+ * return 0 if not allocated.
 */
-static int get_cluster_offset(BlockDriverState *bs,
-                              uint64_t offset, int allocate,
-                              int compressed_size,
-                              int n_start, int n_end, uint64_t *result)
+static uint64_t get_cluster_offset(BlockDriverState *bs,
+                                   uint64_t offset, int allocate,
+                                   int compressed_size,
+                                   int n_start, int n_end)
 {
    BDRVQcowState *s = bs->opaque;
-    int min_index, i, j, l1_index, l2_index, ret;
-    int64_t l2_offset;
-    uint64_t *l2_table, cluster_offset, tmp;
+    int min_index, i, j, l1_index, l2_index;
+    uint64_t l2_offset, *l2_table, cluster_offset, tmp;
    uint32_t min_count;
    int new_l2_table;

-    *result = 0;
    l1_index = offset >> (s->l2_bits + s->cluster_bits);
    l2_offset = s->l1_table[l1_index];
    new_l2_table = 0;
@@ -371,20 +368,15 @@ static int get_cluster_offset(BlockDriverState *bs,
            return 0;
        /* allocate a new l2 entry */
        l2_offset = bdrv_getlength(bs->file->bs);
-        if (l2_offset < 0) {
-            return l2_offset;
-        }
        /* round to cluster size */
-        l2_offset = QEMU_ALIGN_UP(l2_offset, s->cluster_size);
+        l2_offset = (l2_offset + s->cluster_size - 1) & ~(s->cluster_size - 1);
        /* update the L1 entry */
        s->l1_table[l1_index] = l2_offset;
        tmp = cpu_to_be64(l2_offset);
-        ret = bdrv_pwrite_sync(bs->file,
-                               s->l1_table_offset + l1_index * sizeof(tmp),
-                               &tmp, sizeof(tmp));
-        if (ret < 0) {
-            return ret;
-        }
+        if (bdrv_pwrite_sync(bs->file,
+                s->l1_table_offset + l1_index * sizeof(tmp),
+                &tmp, sizeof(tmp)) < 0)
+            return 0;
        new_l2_table = 1;
    }
    for(i = 0; i < L2_CACHE_SIZE; i++) {
@@ -411,17 +403,14 @@ static int get_cluster_offset(BlockDriverState *bs,
    l2_table = s->l2_cache + (min_index << s->l2_bits);
    if (new_l2_table) {
        memset(l2_table, 0, s->l2_size * sizeof(uint64_t));
-        ret = bdrv_pwrite_sync(bs->file, l2_offset, l2_table,
-                               s->l2_size * sizeof(uint64_t));
-        if (ret < 0) {
-            return ret;
-        }
+        if (bdrv_pwrite_sync(bs->file, l2_offset, l2_table,
+                s->l2_size * sizeof(uint64_t)) < 0)
+            return 0;
    } else {
-        ret = bdrv_pread(bs->file, l2_offset, l2_table,
-                         s->l2_size * sizeof(uint64_t));
-        if (ret < 0) {
-            return ret;
-        }
+        if (bdrv_pread(bs->file, l2_offset, l2_table,
+                       s->l2_size * sizeof(uint64_t)) !=
+            s->l2_size * sizeof(uint64_t))
+            return 0;
    }
    s->l2_cache_offsets[min_index] = l2_offset;
    s->l2_cache_counts[min_index] = 1;
@@ -438,36 +427,24 @@ static int get_cluster_offset(BlockDriverState *bs,
            /* if the cluster is already compressed, we must
               decompress it in the case it is not completely
               overwritten */
-            if (decompress_cluster(bs, cluster_offset) < 0) {
-                return -EIO;
-            }
+            if (decompress_cluster(bs, cluster_offset) < 0)
+                return 0;
            cluster_offset = bdrv_getlength(bs->file->bs);
-            if ((int64_t) cluster_offset < 0) {
-                return cluster_offset;
-            }
-            cluster_offset = QEMU_ALIGN_UP(cluster_offset, s->cluster_size);
+            cluster_offset = (cluster_offset + s->cluster_size - 1) &
+                ~(s->cluster_size - 1);
            /* write the cluster content */
-            ret = bdrv_pwrite(bs->file, cluster_offset, s->cluster_cache,
-                              s->cluster_size);
-            if (ret < 0) {
-                return ret;
-            }
+            if (bdrv_pwrite(bs->file, cluster_offset, s->cluster_cache,
+                            s->cluster_size) !=
+                s->cluster_size)
+                return -1;
        } else {
            cluster_offset = bdrv_getlength(bs->file->bs);
-            if ((int64_t) cluster_offset < 0) {
-                return cluster_offset;
-            }
            if (allocate == 1) {
                /* round to cluster size */
-                cluster_offset = QEMU_ALIGN_UP(cluster_offset, s->cluster_size);
-                if (cluster_offset + s->cluster_size > INT64_MAX) {
-                    return -E2BIG;
-                }
-                ret = bdrv_truncate(bs->file, cluster_offset + s->cluster_size,
-                                    PREALLOC_MODE_OFF, NULL);
-                if (ret < 0) {
-                    return ret;
-                }
+                cluster_offset = (cluster_offset + s->cluster_size - 1) &
+                    ~(s->cluster_size - 1);
+                bdrv_truncate(bs->file, cluster_offset + s->cluster_size,
+                              PREALLOC_MODE_OFF, NULL);
                /* if encrypted, we must initialize the cluster
                   content which won't be written */
                if (bs->encrypted &&
@@ -477,21 +454,20 @@ static int get_cluster_offset(BlockDriverState *bs,
                    start_sect = (offset & ~(s->cluster_size - 1)) >> 9;
                    for(i = 0; i < s->cluster_sectors; i++) {
                        if (i < n_start || i >= n_end) {
+                            Error *err = NULL;
                            memset(s->cluster_data, 0x00, 512);
-                            if (qcrypto_block_encrypt(s->crypto,
-                                                      (start_sect + i) *
-                                                      BDRV_SECTOR_SIZE,
+                            if (qcrypto_block_encrypt(s->crypto, start_sect + i,
                                                      s->cluster_data,
                                                      BDRV_SECTOR_SIZE,
-                                                      NULL) < 0) {
-                                return -EIO;
-                            }
-                            ret = bdrv_pwrite(bs->file,
-                                              cluster_offset + i * 512,
-                                              s->cluster_data, 512);
-                            if (ret < 0) {
-                                return ret;
+                                                      &err) < 0) {
+                                error_free(err);
+                                errno = EIO;
+                                return -1;
                            }
+                            if (bdrv_pwrite(bs->file,
+                                            cluster_offset + i * 512,
+                                            s->cluster_data, 512) != 512)
+                                return -1;
                        }
                    }
                }
@@ -503,29 +479,23 @@ static int get_cluster_offset(BlockDriverState *bs,
        /* update L2 table */
        tmp = cpu_to_be64(cluster_offset);
        l2_table[l2_index] = tmp;
-        ret = bdrv_pwrite_sync(bs->file, l2_offset + l2_index * sizeof(tmp),
-                               &tmp, sizeof(tmp));
-        if (ret < 0) {
-            return ret;
-        }
+        if (bdrv_pwrite_sync(bs->file, l2_offset + l2_index * sizeof(tmp),
+                &tmp, sizeof(tmp)) < 0)
+            return 0;
    }
-    *result = cluster_offset;
-    return 1;
+    return cluster_offset;
 }

 static int64_t coroutine_fn qcow_co_get_block_status(BlockDriverState *bs,
        int64_t sector_num, int nb_sectors, int *pnum, BlockDriverState **file)
 {
    BDRVQcowState *s = bs->opaque;
-    int index_in_cluster, n, ret;
+    int index_in_cluster, n;
    uint64_t cluster_offset;

    qemu_co_mutex_lock(&s->lock);
-    ret = get_cluster_offset(bs, sector_num << 9, 0, 0, 0, 0, &cluster_offset);
+    cluster_offset = get_cluster_offset(bs, sector_num << 9, 0, 0, 0, 0);
    qemu_co_mutex_unlock(&s->lock);
-    if (ret < 0) {
-        return ret;
-    }
    index_in_cluster = sector_num & (s->cluster_sectors - 1);
    n = s->cluster_sectors - index_in_cluster;
    if (n > nb_sectors)
@@ -602,6 +572,7 @@ static coroutine_fn int qcow_co_readv(BlockDriverState *bs, int64_t sector_num,
    QEMUIOVector hd_qiov;
    uint8_t *buf;
    void *orig_buf;
+    Error *err = NULL;

    if (qiov->niov > 1) {
        buf = orig_buf = qemu_try_blockalign(bs, qiov->size);
@@ -617,11 +588,8 @@ static coroutine_fn int qcow_co_readv(BlockDriverState *bs, int64_t sector_num,

    while (nb_sectors != 0) {
        /* prepare next request */
-        ret = get_cluster_offset(bs, sector_num << 9,
-                                 0, 0, 0, 0, &cluster_offset);
-        if (ret < 0) {
-            break;
-        }
+        cluster_offset = get_cluster_offset(bs, sector_num << 9,
+                                                 0, 0, 0, 0);
        index_in_cluster = sector_num & (s->cluster_sectors - 1);
        n = s->cluster_sectors - index_in_cluster;
        if (n > nb_sectors) {
@@ -638,7 +606,7 @@ static coroutine_fn int qcow_co_readv(BlockDriverState *bs, int64_t sector_num,
                ret = bdrv_co_readv(bs->backing, sector_num, n, &hd_qiov);
                qemu_co_mutex_lock(&s->lock);
                if (ret < 0) {
-                    break;
+                    goto fail;
                }
            } else {
                /* Note: in this case, no need to wait */
@@ -647,15 +615,13 @@ static coroutine_fn int qcow_co_readv(BlockDriverState *bs, int64_t sector_num,
        } else if (cluster_offset & QCOW_OFLAG_COMPRESSED) {
            /* add AIO support for compressed blocks ? */
            if (decompress_cluster(bs, cluster_offset) < 0) {
-                ret = -EIO;
-                break;
+                goto fail;
            }
            memcpy(buf,
                   s->cluster_cache + index_in_cluster * 512, 512 * n);
        } else {
            if ((cluster_offset & 511) != 0) {
-                ret = -EIO;
-                break;
+                goto fail;
            }
            hd_iov.iov_base = (void *)buf;
            hd_iov.iov_len = n * 512;
@@ -670,11 +636,9 @@ static coroutine_fn int qcow_co_readv(BlockDriverState *bs, int64_t sector_num,
            }
            if (bs->encrypted) {
                assert(s->crypto);
-                if (qcrypto_block_decrypt(s->crypto,
-                                          sector_num * BDRV_SECTOR_SIZE, buf,
-                                          n * BDRV_SECTOR_SIZE, NULL) < 0) {
-                    ret = -EIO;
-                    break;
+                if (qcrypto_block_decrypt(s->crypto, sector_num, buf,
+                                          n * BDRV_SECTOR_SIZE, &err) < 0) {
+                    goto fail;
                }
            }
        }
@@ -685,6 +649,7 @@ static coroutine_fn int qcow_co_readv(BlockDriverState *bs, int64_t sector_num,
        buf += n * 512;
    }

+done:
    qemu_co_mutex_unlock(&s->lock);

    if (qiov->niov > 1) {
@@ -693,6 +658,11 @@ static coroutine_fn int qcow_co_readv(BlockDriverState *bs, int64_t sector_num,
    }

    return ret;
+
+fail:
+    error_free(err);
+    ret = -EIO;
+    goto done;
 }

 static coroutine_fn int qcow_co_writev(BlockDriverState *bs, int64_t sector_num,
@@ -731,20 +701,19 @@ static coroutine_fn int qcow_co_writev(BlockDriverState *bs, int64_t sector_num,
        if (n > nb_sectors) {
            n = nb_sectors;
        }
-        ret = get_cluster_offset(bs, sector_num << 9, 1, 0,
-                                 index_in_cluster,
-                                 index_in_cluster + n, &cluster_offset);
-        if (ret < 0) {
-            break;
-        }
+        cluster_offset = get_cluster_offset(bs, sector_num << 9, 1, 0,
+                                            index_in_cluster,
+                                            index_in_cluster + n);
        if (!cluster_offset || (cluster_offset & 511) != 0) {
            ret = -EIO;
            break;
        }
        if (bs->encrypted) {
+            Error *err = NULL;
            assert(s->crypto);
-            if (qcrypto_block_encrypt(s->crypto, sector_num * BDRV_SECTOR_SIZE,
-                                      buf, n * BDRV_SECTOR_SIZE, NULL) < 0) {
+            if (qcrypto_block_encrypt(s->crypto, sector_num, buf,
+                                      n * BDRV_SECTOR_SIZE, &err) < 0) {
+                error_free(err);
                ret = -EIO;
                break;
            }
@@ -1032,11 +1001,8 @@ qcow_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
        goto success;
    }
    qemu_co_mutex_lock(&s->lock);
-    ret = get_cluster_offset(bs, offset, 2, out_len, 0, 0, &cluster_offset);
+    cluster_offset = get_cluster_offset(bs, offset, 2, out_len, 0, 0);
    qemu_co_mutex_unlock(&s->lock);
-    if (ret < 0) {
-        goto fail;
-    }
    if (cluster_offset == 0) {
        ret = -EIO;
        goto fail;
--- a/block/qcow2-bitmap.c
+++ b/block/qcow2-bitmap.c
@@ -269,16 +269,15 @@ static int free_bitmap_clusters(BlockDriverState *bs, Qcow2BitmapTable *tb)
    return 0;
 }

-/* Return the disk size covered by a single qcow2 cluster of bitmap data. */
-static uint64_t bytes_covered_by_bitmap_cluster(const BDRVQcow2State *s,
-                                                const BdrvDirtyBitmap *bitmap)
+/* This function returns the number of disk sectors covered by a single qcow2
+ * cluster of bitmap data. */
+static uint64_t sectors_covered_by_bitmap_cluster(const BDRVQcow2State *s,
+                                                  const BdrvDirtyBitmap *bitmap)
 {
-    uint64_t granularity = bdrv_dirty_bitmap_granularity(bitmap);
-    uint64_t limit = granularity * (s->cluster_size << 3);
+    uint32_t sector_granularity =
+            bdrv_dirty_bitmap_granularity(bitmap) >> BDRV_SECTOR_BITS;

-    assert(QEMU_IS_ALIGNED(limit,
-                           bdrv_dirty_bitmap_serialization_align(bitmap)));
-    return limit;
+    return (uint64_t)sector_granularity * (s->cluster_size << 3);
 }

 /* load_bitmap_data
@@ -291,7 +290,7 @@ static int load_bitmap_data(BlockDriverState *bs,
 {
    int ret = 0;
    BDRVQcow2State *s = bs->opaque;
-    uint64_t offset, limit;
+    uint64_t sector, sbc;
    uint64_t bm_size = bdrv_dirty_bitmap_size(bitmap);
    uint8_t *buf = NULL;
    uint64_t i, tab_size =
@@ -303,28 +302,28 @@ static int load_bitmap_data(BlockDriverState *bs,
    }

    buf = g_malloc(s->cluster_size);
-    limit = bytes_covered_by_bitmap_cluster(s, bitmap);
-    for (i = 0, offset = 0; i < tab_size; ++i, offset += limit) {
-        uint64_t count = MIN(bm_size - offset, limit);
+    sbc = sectors_covered_by_bitmap_cluster(s, bitmap);
+    for (i = 0, sector = 0; i < tab_size; ++i, sector += sbc) {
+        uint64_t count = MIN(bm_size - sector, sbc);
        uint64_t entry = bitmap_table[i];
-        uint64_t data_offset = entry & BME_TABLE_ENTRY_OFFSET_MASK;
+        uint64_t offset = entry & BME_TABLE_ENTRY_OFFSET_MASK;

        assert(check_table_entry(entry, s->cluster_size) == 0);

-        if (data_offset == 0) {
+        if (offset == 0) {
            if (entry & BME_TABLE_ENTRY_FLAG_ALL_ONES) {
-                bdrv_dirty_bitmap_deserialize_ones(bitmap, offset, count,
+                bdrv_dirty_bitmap_deserialize_ones(bitmap, sector, count,
                                                   false);
            } else {
                /* No need to deserialize zeros because the dirty bitmap is
                 * already cleared */
            }
        } else {
-            ret = bdrv_pread(bs->file, data_offset, buf, s->cluster_size);
+            ret = bdrv_pread(bs->file, offset, buf, s->cluster_size);
            if (ret < 0) {
                goto finish;
            }
-            bdrv_dirty_bitmap_deserialize_part(bitmap, buf, offset, count,
+            bdrv_dirty_bitmap_deserialize_part(bitmap, buf, sector, count,
                                               false);
        }
    }
@@ -1072,8 +1071,8 @@ static uint64_t *store_bitmap_data(BlockDriverState *bs,
 {
    int ret;
    BDRVQcow2State *s = bs->opaque;
-    int64_t offset;
-    uint64_t limit;
+    int64_t sector;
+    uint64_t sbc;
    uint64_t bm_size = bdrv_dirty_bitmap_size(bitmap);
    const char *bm_name = bdrv_dirty_bitmap_name(bitmap);
    uint8_t *buf = NULL;
@@ -1096,25 +1095,20 @@ static uint64_t *store_bitmap_data(BlockDriverState *bs,
        return NULL;
    }

-    dbi = bdrv_dirty_iter_new(bitmap);
+    dbi = bdrv_dirty_iter_new(bitmap, 0);
    buf = g_malloc(s->cluster_size);
-    limit = bytes_covered_by_bitmap_cluster(s, bitmap);
-    assert(DIV_ROUND_UP(bm_size, limit) == tb_size);
+    sbc = sectors_covered_by_bitmap_cluster(s, bitmap);
+    assert(DIV_ROUND_UP(bm_size, sbc) == tb_size);

-    while ((offset = bdrv_dirty_iter_next(dbi)) >= 0) {
-        uint64_t cluster = offset / limit;
+    while ((sector = bdrv_dirty_iter_next(dbi)) != -1) {
+        uint64_t cluster = sector / sbc;
        uint64_t end, write_size;
        int64_t off;

-        /*
-         * We found the first dirty offset, but want to write out the
-         * entire cluster of the bitmap that includes that offset,
-         * including any leading zero bits.
-         */
-        offset = QEMU_ALIGN_DOWN(offset, limit);
-        end = MIN(bm_size, offset + limit);
-        write_size = bdrv_dirty_bitmap_serialization_size(bitmap, offset,
-                                                          end - offset);
+        sector = cluster * sbc;
+        end = MIN(bm_size, sector + sbc);
+        write_size =
+            bdrv_dirty_bitmap_serialization_size(bitmap, sector, end - sector);
        assert(write_size <= s->cluster_size);

        off = qcow2_alloc_clusters(bs, s->cluster_size);
@@ -1126,7 +1120,7 @@ static uint64_t *store_bitmap_data(BlockDriverState *bs,
        }
        tb[cluster] = off;

-        bdrv_dirty_bitmap_serialize_part(bitmap, buf, offset, end - offset);
+        bdrv_dirty_bitmap_serialize_part(bitmap, buf, sector, end - sector);
        if (write_size < s->cluster_size) {
            memset(buf + write_size, 0, s->cluster_size - write_size);
        }
--- a/block/qcow2-cache.c
+++ b/block/qcow2-cache.c
@@ -62,18 +62,6 @@ static inline int qcow2_cache_get_table_idx(BlockDriverState *bs,
    return idx;
 }

-static inline const char *qcow2_cache_get_name(BDRVQcow2State *s, Qcow2Cache *c)
-{
-    if (c == s->refcount_block_cache) {
-        return "refcount block";
-    } else if (c == s->l2_table_cache) {
-        return "L2 table";
-    } else {
-        /* Do not abort, because this is not critical */
-        return "unknown";
-    }
-}
-
 static void qcow2_cache_table_release(BlockDriverState *bs, Qcow2Cache *c,
                                      int i, int num_tables)
 {
@@ -85,7 +73,7 @@ static void qcow2_cache_table_release(BlockDriverState *bs, Qcow2Cache *c,
    size_t mem_size = (size_t) s->cluster_size * num_tables;
    size_t offset = QEMU_ALIGN_UP((uintptr_t) t, align) - (uintptr_t) t;
    size_t length = QEMU_ALIGN_DOWN(mem_size - offset, align);
-    if (mem_size > offset && length > 0) {
+    if (length > 0) {
        madvise((uint8_t *) t + offset, length, MADV_DONTNEED);
    }
 #endif
@@ -326,18 +314,9 @@ static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c,
    uint64_t min_lru_counter = UINT64_MAX;
    int min_lru_index = -1;

-    assert(offset != 0);
-
    trace_qcow2_cache_get(qemu_coroutine_self(), c == s->l2_table_cache,
                          offset, read_from_disk);

-    if (offset_into_cluster(s, offset)) {
-        qcow2_signal_corruption(bs, true, -1, -1, "Cannot get entry from %s "
-                                "cache: Offset %#" PRIx64 " is unaligned",
-                                qcow2_cache_get_name(s, c), offset);
-        return -EIO;
-    }
-
    /* Check if the table is already cached */
    i = lookup_index = (offset / s->cluster_size * 4) % c->size;
    do {
@@ -432,29 +411,3 @@ void qcow2_cache_entry_mark_dirty(BlockDriverState *bs, Qcow2Cache *c,
    assert(c->entries[i].offset != 0);
    c->entries[i].dirty = true;
 }
-
-void *qcow2_cache_is_table_offset(BlockDriverState *bs, Qcow2Cache *c,
-                                  uint64_t offset)
-{
-    int i;
-
-    for (i = 0; i < c->size; i++) {
-        if (c->entries[i].offset == offset) {
-            return qcow2_cache_get_table_addr(bs, c, i);
-        }
-    }
-    return NULL;
-}
-
-void qcow2_cache_discard(BlockDriverState *bs, Qcow2Cache *c, void *table)
-{
-    int i = qcow2_cache_get_table_idx(bs, c, table);
-
-    assert(c->entries[i].ref == 0);
-
-    c->entries[i].offset = 0;
-    c->entries[i].lru_counter = 0;
-    c->entries[i].dirty = false;
-
-    qcow2_cache_table_release(bs, c, i, 1);
-}
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -32,56 +32,6 @@
 #include "qemu/bswap.h"
 #include "trace.h"

-int qcow2_shrink_l1_table(BlockDriverState *bs, uint64_t exact_size)
-{
-    BDRVQcow2State *s = bs->opaque;
-    int new_l1_size, i, ret;
-
-    if (exact_size >= s->l1_size) {
-        return 0;
-    }
-
-    new_l1_size = exact_size;
-
-#ifdef DEBUG_ALLOC2
-    fprintf(stderr, "shrink l1_table from %d to %d\n", s->l1_size, new_l1_size);
-#endif
-
-    BLKDBG_EVENT(bs->file, BLKDBG_L1_SHRINK_WRITE_TABLE);
-    ret = bdrv_pwrite_zeroes(bs->file, s->l1_table_offset +
-                                       new_l1_size * sizeof(uint64_t),
-                             (s->l1_size - new_l1_size) * sizeof(uint64_t), 0);
-    if (ret < 0) {
-        goto fail;
-    }
-
-    ret = bdrv_flush(bs->file->bs);
-    if (ret < 0) {
-        goto fail;
-    }
-
-    BLKDBG_EVENT(bs->file, BLKDBG_L1_SHRINK_FREE_L2_CLUSTERS);
-    for (i = s->l1_size - 1; i > new_l1_size - 1; i--) {
-        if ((s->l1_table[i] & L1E_OFFSET_MASK) == 0) {
-            continue;
-        }
-        qcow2_free_clusters(bs, s->l1_table[i] & L1E_OFFSET_MASK,
-                            s->cluster_size, QCOW2_DISCARD_ALWAYS);
-        s->l1_table[i] = 0;
-    }
-    return 0;
-
-fail:
-    /*
-     * If the write in the l1_table failed the image may contain a partially
-     * overwritten l1_table. In this case it would be better to clear the
-     * l1_table in memory to avoid possible image corruption.
-     */
-    memset(s->l1_table + new_l1_size, 0,
-           (s->l1_size - new_l1_size) * sizeof(uint64_t));
-    return ret;
-}
-
 int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
                        bool exact_size)
 {
@@ -111,7 +61,7 @@ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
            new_l1_size = 1;
        }
        while (min_size > new_l1_size) {
-            new_l1_size = DIV_ROUND_UP(new_l1_size * 3, 2);
+            new_l1_size = (new_l1_size * 3 + 1) / 2;
        }
    }

@@ -278,14 +228,6 @@ static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table)
        goto fail;
    }

-    /* If we're allocating the table at offset 0 then something is wrong */
-    if (l2_offset == 0) {
-        qcow2_signal_corruption(bs, true, -1, -1, "Preventing invalid "
-                                "allocation of L2 table at offset 0");
-        ret = -EIO;
-        goto fail;
-    }
-
    ret = qcow2_cache_flush(bs, s->refcount_block_cache);
    if (ret < 0) {
        goto fail;
@@ -454,13 +396,15 @@ static bool coroutine_fn do_perform_cow_encrypt(BlockDriverState *bs,
 {
    if (bytes && bs->encrypted) {
        BDRVQcow2State *s = bs->opaque;
-        int64_t offset = (s->crypt_physical_offset ?
+        int64_t sector = (s->crypt_physical_offset ?
                          (cluster_offset + offset_in_cluster) :
-                          (src_cluster_offset + offset_in_cluster));
+                          (src_cluster_offset + offset_in_cluster))
+                         >> BDRV_SECTOR_BITS;
        assert((offset_in_cluster & ~BDRV_SECTOR_MASK) == 0);
        assert((bytes & ~BDRV_SECTOR_MASK) == 0);
        assert(s->crypto);
-        if (qcrypto_block_encrypt(s->crypto, offset, buffer, bytes, NULL) < 0) {
+        if (qcrypto_block_encrypt(s->crypto, sector, buffer,
+                                  bytes, NULL) < 0) {
            return false;
        }
    }
@@ -1308,21 +1252,10 @@ static int handle_alloc(BlockDriverState *bs, uint64_t guest_offset,
        (!*host_offset ||
         start_of_cluster(s, *host_offset) == (entry & L2E_OFFSET_MASK)))
    {
-        int preallocated_nb_clusters;
-
-        if (offset_into_cluster(s, entry & L2E_OFFSET_MASK)) {
-            qcow2_signal_corruption(bs, true, -1, -1, "Preallocated zero "
-                                    "cluster offset %#llx unaligned (guest "
-                                    "offset: %#" PRIx64 ")",
-                                    entry & L2E_OFFSET_MASK, guest_offset);
-            ret = -EIO;
-            goto fail;
-        }
-
        /* Try to reuse preallocated zero clusters; contiguous normal clusters
         * would be fine, too, but count_cow_clusters() above has limited
         * nb_clusters already to a range of COW clusters */
-        preallocated_nb_clusters =
+        int preallocated_nb_clusters =
            count_contiguous_clusters(nb_clusters, s->cluster_size,
                                      &l2_table[l2_index], QCOW_OFLAG_COPIED);
        assert(preallocated_nb_clusters > 0);
@@ -1583,23 +1516,6 @@ int qcow2_decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset)
        nb_csectors = ((cluster_offset >> s->csize_shift) & s->csize_mask) + 1;
        sector_offset = coffset & 511;
        csize = nb_csectors * 512 - sector_offset;
-
-        /* Allocate buffers on first decompress operation, most images are
-         * uncompressed and the memory overhead can be avoided.  The buffers
-         * are freed in .bdrv_close().
-         */
-        if (!s->cluster_data) {
-            /* one more sector for decompressed data alignment */
-            s->cluster_data = qemu_try_blockalign(bs->file->bs,
-                    QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size + 512);
-            if (!s->cluster_data) {
-                return -ENOMEM;
-            }
-        }
-        if (!s->cluster_cache) {
-            s->cluster_cache = g_malloc(s->cluster_size);
-        }
-
        BLKDBG_EVENT(bs->file, BLKDBG_READ_COMPRESSED);
        ret = bdrv_read(bs->file, coffset >> 9, s->cluster_data,
                        nb_csectors);
@@ -1651,7 +1567,7 @@ static int discard_single_l2(BlockDriverState *bs, uint64_t offset,
         * cluster is already marked as zero, or if it's unallocated and we
         * don't have a backing file.
         *
-         * TODO We might want to use bdrv_block_status(bs) here, but we're
+         * TODO We might want to use bdrv_get_block_status(bs) here, but we're
         * holding s->lock, so that doesn't work today.
         *
         * If full_discard is true, the sector should not read back as zeroes,
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -29,7 +29,6 @@
 #include "block/qcow2.h"
 #include "qemu/range.h"
 #include "qemu/bswap.h"
-#include "qemu/cutils.h"

 static int64_t alloc_clusters_noref(BlockDriverState *bs, uint64_t size);
 static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs,
@@ -367,13 +366,6 @@ static int alloc_refcount_block(BlockDriverState *bs,
        return new_block;
    }

-    /* If we're allocating the block at offset 0 then something is wrong */
-    if (new_block == 0) {
-        qcow2_signal_corruption(bs, true, -1, -1, "Preventing invalid "
-                                "allocation of refcount block at offset 0");
-        return -EIO;
-    }
-
 #ifdef DEBUG_ALLOC2
    fprintf(stderr, "qcow2: Allocate refcount block %d for %" PRIx64
        " at %" PRIx64 "\n",
@@ -869,24 +861,8 @@ static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs,
        }
        s->set_refcount(refcount_block, block_index, refcount);

-        if (refcount == 0) {
-            void *table;
-
-            table = qcow2_cache_is_table_offset(bs, s->refcount_block_cache,
-                                                offset);
-            if (table != NULL) {
-                qcow2_cache_put(bs, s->refcount_block_cache, &refcount_block);
-                qcow2_cache_discard(bs, s->refcount_block_cache, table);
-            }
-
-            table = qcow2_cache_is_table_offset(bs, s->l2_table_cache, offset);
-            if (table != NULL) {
-                qcow2_cache_discard(bs, s->l2_table_cache, table);
-            }
-
-            if (s->discard_passthrough[type]) {
-                update_refcount_discard(bs, cluster_offset, s->cluster_size);
-            }
+        if (refcount == 0 && s->discard_passthrough[type]) {
+            update_refcount_discard(bs, cluster_offset, s->cluster_size);
        }
    }

@@ -1082,13 +1058,6 @@ int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size)
                return new_cluster;
            }

-            if (new_cluster == 0) {
-                qcow2_signal_corruption(bs, true, -1, -1, "Preventing invalid "
-                                        "allocation of compressed cluster "
-                                        "at offset 0");
-                return -EIO;
-            }
-
            if (!offset || ROUND_UP(offset, s->cluster_size) != new_cluster) {
                offset = new_cluster;
                free_in_cluster = s->cluster_size;
@@ -3076,168 +3045,3 @@ done:
    qemu_vfree(new_refblock);
    return ret;
 }
-
-static int64_t get_refblock_offset(BlockDriverState *bs, uint64_t offset)
-{
-    BDRVQcow2State *s = bs->opaque;
-    uint32_t index = offset_to_reftable_index(s, offset);
-    int64_t covering_refblock_offset = 0;
-
-    if (index < s->refcount_table_size) {
-        covering_refblock_offset = s->refcount_table[index] & REFT_OFFSET_MASK;
-    }
-    if (!covering_refblock_offset) {
-        qcow2_signal_corruption(bs, true, -1, -1, "Refblock at %#" PRIx64 " is "
-                                "not covered by the refcount structures",
-                                offset);
-        return -EIO;
-    }
-
-    return covering_refblock_offset;
-}
-
-static int qcow2_discard_refcount_block(BlockDriverState *bs,
-                                        uint64_t discard_block_offs)
-{
-    BDRVQcow2State *s = bs->opaque;
-    int64_t refblock_offs;
-    uint64_t cluster_index = discard_block_offs >> s->cluster_bits;
-    uint32_t block_index = cluster_index & (s->refcount_block_size - 1);
-    void *refblock;
-    int ret;
-
-    refblock_offs = get_refblock_offset(bs, discard_block_offs);
-    if (refblock_offs < 0) {
-        return refblock_offs;
-    }
-
-    assert(discard_block_offs != 0);
-
-    ret = qcow2_cache_get(bs, s->refcount_block_cache, refblock_offs,
-                          &refblock);
-    if (ret < 0) {
-        return ret;
-    }
-
-    if (s->get_refcount(refblock, block_index) != 1) {
-        qcow2_signal_corruption(bs, true, -1, -1, "Invalid refcount:"
-                                " refblock offset %#" PRIx64
-                                ", reftable index %u"
-                                ", block offset %#" PRIx64
-                                ", refcount %#" PRIx64,
-                                refblock_offs,
-                                offset_to_reftable_index(s, discard_block_offs),
-                                discard_block_offs,
-                                s->get_refcount(refblock, block_index));
-        qcow2_cache_put(bs, s->refcount_block_cache, &refblock);
-        return -EINVAL;
-    }
-    s->set_refcount(refblock, block_index, 0);
-
-    qcow2_cache_entry_mark_dirty(bs, s->refcount_block_cache, refblock);
-
-    qcow2_cache_put(bs, s->refcount_block_cache, &refblock);
-
-    if (cluster_index < s->free_cluster_index) {
-        s->free_cluster_index = cluster_index;
-    }
-
-    refblock = qcow2_cache_is_table_offset(bs, s->refcount_block_cache,
-                                           discard_block_offs);
-    if (refblock) {
-        /* discard refblock from the cache if refblock is cached */
-        qcow2_cache_discard(bs, s->refcount_block_cache, refblock);
-    }
-    update_refcount_discard(bs, discard_block_offs, s->cluster_size);
-
-    return 0;
-}
-
-int qcow2_shrink_reftable(BlockDriverState *bs)
-{
-    BDRVQcow2State *s = bs->opaque;
-    uint64_t *reftable_tmp =
-        g_malloc(s->refcount_table_size * sizeof(uint64_t));
-    int i, ret;
-
-    for (i = 0; i < s->refcount_table_size; i++) {
-        int64_t refblock_offs = s->refcount_table[i] & REFT_OFFSET_MASK;
-        void *refblock;
-        bool unused_block;
-
-        if (refblock_offs == 0) {
-            reftable_tmp[i] = 0;
-            continue;
-        }
-        ret = qcow2_cache_get(bs, s->refcount_block_cache, refblock_offs,
-                              &refblock);
-        if (ret < 0) {
-            goto out;
-        }
-
-        /* the refblock has own reference */
-        if (i == offset_to_reftable_index(s, refblock_offs)) {
-            uint64_t block_index = (refblock_offs >> s->cluster_bits) &
-                                   (s->refcount_block_size - 1);
-            uint64_t refcount = s->get_refcount(refblock, block_index);
-
-            s->set_refcount(refblock, block_index, 0);
-
-            unused_block = buffer_is_zero(refblock, s->cluster_size);
-
-            s->set_refcount(refblock, block_index, refcount);
-        } else {
-            unused_block = buffer_is_zero(refblock, s->cluster_size);
-        }
-        qcow2_cache_put(bs, s->refcount_block_cache, &refblock);
-
-        reftable_tmp[i] = unused_block ? 0 : cpu_to_be64(s->refcount_table[i]);
-    }
-
-    ret = bdrv_pwrite_sync(bs->file, s->refcount_table_offset, reftable_tmp,
-                           s->refcount_table_size * sizeof(uint64_t));
-    /*
-     * If the write in the reftable failed the image may contain a partially
-     * overwritten reftable. In this case it would be better to clear the
-     * reftable in memory to avoid possible image corruption.
-     */
-    for (i = 0; i < s->refcount_table_size; i++) {
-        if (s->refcount_table[i] && !reftable_tmp[i]) {
-            if (ret == 0) {
-                ret = qcow2_discard_refcount_block(bs, s->refcount_table[i] &
-                                                       REFT_OFFSET_MASK);
-            }
-            s->refcount_table[i] = 0;
-        }
-    }
-
-    if (!s->cache_discards) {
-        qcow2_process_discards(bs, ret);
-    }
-
-out:
-    g_free(reftable_tmp);
-    return ret;
-}
-
-int64_t qcow2_get_last_cluster(BlockDriverState *bs, int64_t size)
-{
-    BDRVQcow2State *s = bs->opaque;
-    int64_t i;
-
-    for (i = size_to_clusters(s, size) - 1; i >= 0; i--) {
-        uint64_t refcount;
-        int ret = qcow2_get_refcount(bs, i, &refcount);
-        if (ret < 0) {
-            fprintf(stderr, "Can't get refcount for cluster %" PRId64 ": %s\n",
-                    i, strerror(-ret));
-            return ret;
-        }
-        if (refcount > 0) {
-            return i;
-        }
-    }
-    qcow2_signal_corruption(bs, true, -1, -1,
-                            "There are no references in the refcount table.");
-    return -EIO;
-}
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -30,6 +30,7 @@
 #include "qemu/error-report.h"
 #include "qapi/qmp/qerror.h"
 #include "qapi/qmp/qbool.h"
+#include "qapi/util.h"
 #include "qapi/qmp/types.h"
 #include "qapi-event.h"
 #include "trace.h"
@@ -126,7 +127,6 @@ static ssize_t qcow2_crypto_hdr_init_func(QCryptoBlock *block, size_t headerlen,
    /* Zero fill remaining space in cluster so it has predictable
     * content in case of future spec changes */
    clusterlen = size_to_clusters(s, headerlen) * s->cluster_size;
-    assert(qcow2_pre_write_overlap_check(bs, 0, ret, clusterlen) == 0);
    ret = bdrv_pwrite_zeroes(bs->file,
                             ret + headerlen,
                             clusterlen - headerlen, 0);
@@ -302,11 +302,10 @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
            }

            if (!(s->autoclear_features & QCOW2_AUTOCLEAR_BITMAPS)) {
-                warn_report("a program lacking bitmap support "
-                            "modified this file, so all bitmaps are now "
-                            "considered inconsistent");
-                error_printf("Some clusters may be leaked, "
-                             "run 'qemu-img check -r' on the image "
+                error_report("WARNING: a program lacking bitmap support "
+                             "modified this file, so all bitmaps are now "
+                             "considered inconsistent. Some clusters may be "
+                             "leaked, run 'qemu-img check -r' on the image "
                             "file to fix.");
                if (need_update_header != NULL) {
                    /* Updating is needed to drop invalid bitmap extension. */
@@ -376,8 +375,6 @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,

        default:
            /* unknown magic - save it in case we need to rewrite the header */
-            /* If you add a new feature, make sure to also update the fast
-             * path of qcow2_make_empty() to deal with it. */
            {
                Qcow2UnknownHeaderExtension *uext;

@@ -1142,7 +1139,7 @@ static int qcow2_do_open(BlockDriverState *bs, QDict *options, int flags,

    s->cluster_bits = header.cluster_bits;
    s->cluster_size = 1 << s->cluster_bits;
-    s->cluster_sectors = 1 << (s->cluster_bits - BDRV_SECTOR_BITS);
+    s->cluster_sectors = 1 << (s->cluster_bits - 9);

    /* Initialise version 3 header fields */
    if (header.version == 2) {
@@ -1283,12 +1280,6 @@ static int qcow2_do_open(BlockDriverState *bs, QDict *options, int flags,
        goto fail;
    }

-    if (header.refcount_table_clusters == 0 && !(flags & BDRV_O_CHECK)) {
-        error_setg(errp, "Image does not contain a reference count table");
-        ret = -EINVAL;
-        goto fail;
-    }
-
    ret = validate_table_offset(bs, s->refcount_table_offset,
                                s->refcount_table_size, sizeof(uint64_t));
    if (ret < 0) {
@@ -1369,6 +1360,16 @@ static int qcow2_do_open(BlockDriverState *bs, QDict *options, int flags,
        goto fail;
    }

+    s->cluster_cache = g_malloc(s->cluster_size);
+    /* one more sector for decompressed data alignment */
+    s->cluster_data = qemu_try_blockalign(bs->file->bs, QCOW_MAX_CRYPT_CLUSTERS
+                                                    * s->cluster_size + 512);
+    if (s->cluster_data == NULL) {
+        error_setg(errp, "Could not allocate temporary cluster buffer");
+        ret = -ENOMEM;
+        goto fail;
+    }
+
    s->cluster_cache_offset = -1;
    s->flags = flags;

@@ -1477,10 +1478,7 @@ static int qcow2_do_open(BlockDriverState *bs, QDict *options, int flags,
        BdrvCheckResult result = {0};

        ret = qcow2_check(bs, &result, BDRV_FIX_ERRORS | BDRV_FIX_LEAKS);
-        if (ret < 0 || result.check_errors) {
-            if (ret >= 0) {
-                ret = -EIO;
-            }
+        if (ret < 0) {
            error_setg_errno(errp, -ret, "Could not repair dirty image");
            goto fail;
        }
@@ -1509,6 +1507,8 @@ static int qcow2_do_open(BlockDriverState *bs, QDict *options, int flags,
    if (s->refcount_block_cache) {
        qcow2_cache_destroy(bs, s->refcount_block_cache);
    }
+    g_free(s->cluster_cache);
+    qemu_vfree(s->cluster_data);
    qcrypto_block_free(s->crypto);
    qapi_free_QCryptoBlockOpenOptions(s->crypto_opts);
    return ret;
@@ -1648,7 +1648,7 @@ static int64_t coroutine_fn qcow2_co_get_block_status(BlockDriverState *bs,

    bytes = MIN(INT_MAX, nb_sectors * BDRV_SECTOR_SIZE);
    qemu_co_mutex_lock(&s->lock);
-    ret = qcow2_get_cluster_offset(bs, sector_num << BDRV_SECTOR_BITS, &bytes,
+    ret = qcow2_get_cluster_offset(bs, sector_num << 9, &bytes,
                                   &cluster_offset);
    qemu_co_mutex_unlock(&s->lock);
    if (ret < 0) {
@@ -1820,13 +1820,15 @@ static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset,
                assert(s->crypto);
                assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
                assert((cur_bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
+                Error *err = NULL;
                if (qcrypto_block_decrypt(s->crypto,
                                          (s->crypt_physical_offset ?
                                           cluster_offset + offset_in_cluster :
-                                           offset),
+                                           offset) >> BDRV_SECTOR_BITS,
                                          cluster_data,
                                          cur_bytes,
-                                          NULL) < 0) {
+                                          &err) < 0) {
+                    error_free(err);
                    ret = -EIO;
                    goto fail;
                }
@@ -1940,6 +1942,7 @@ static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset,
        qemu_iovec_concat(&hd_qiov, qiov, bytes_done, cur_bytes);

        if (bs->encrypted) {
+            Error *err = NULL;
            assert(s->crypto);
            if (!cluster_data) {
                cluster_data = qemu_try_blockalign(bs->file->bs,
@@ -1958,9 +1961,10 @@ static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset,
            if (qcrypto_block_encrypt(s->crypto,
                                      (s->crypt_physical_offset ?
                                       cluster_offset + offset_in_cluster :
-                                       offset),
+                                       offset) >> BDRV_SECTOR_BITS,
                                      cluster_data,
-                                      cur_bytes, NULL) < 0) {
+                                      cur_bytes, &err) < 0) {
+                error_free(err);
                ret = -EIO;
                goto fail;
            }
@@ -2755,7 +2759,7 @@ static int qcow2_create2(const char *filename, int64_t total_size,
        int64_t prealloc_size =
            qcow2_calc_prealloc_size(total_size, cluster_size, refcount_order);
        qemu_opt_set_number(opts, BLOCK_OPT_SIZE, prealloc_size, &error_abort);
-        qemu_opt_set(opts, BLOCK_OPT_PREALLOC, PreallocMode_str(prealloc),
+        qemu_opt_set(opts, BLOCK_OPT_PREALLOC, PreallocMode_lookup[prealloc],
                     &error_abort);
    }

@@ -2955,8 +2959,9 @@ static int qcow2_create(const char *filename, QemuOpts *opts, Error **errp)
        goto finish;
    }
    buf = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC);
-    prealloc = qapi_enum_parse(&PreallocMode_lookup, buf,
-                               PREALLOC_MODE_OFF, &local_err);
+    prealloc = qapi_enum_parse(PreallocMode_lookup, buf,
+                               PREALLOC_MODE__MAX, PREALLOC_MODE_OFF,
+                               &local_err);
    if (local_err) {
        error_propagate(errp, local_err);
        ret = -EINVAL;
@@ -3011,21 +3016,23 @@ finish:
 }


-static bool is_zero(BlockDriverState *bs, int64_t offset, int64_t bytes)
+static bool is_zero_sectors(BlockDriverState *bs, int64_t start,
+                            uint32_t count)
 {
-    int64_t nr;
-    int res;
+    int nr;
+    BlockDriverState *file;
+    int64_t res;

-    /* Clamp to image length, before checking status of underlying sectors */
-    if (offset + bytes > bs->total_sectors * BDRV_SECTOR_SIZE) {
-        bytes = bs->total_sectors * BDRV_SECTOR_SIZE - offset;
+    if (start + count > bs->total_sectors) {
+        count = bs->total_sectors - start;
    }

-    if (!bytes) {
+    if (!count) {
        return true;
    }
-    res = bdrv_block_status_above(bs, NULL, offset, bytes, &nr, NULL, NULL);
-    return res >= 0 && (res & BDRV_BLOCK_ZERO) && nr == bytes;
+    res = bdrv_get_block_status_above(bs, NULL, start, count,
+                                      &nr, &file);
+    return res >= 0 && (res & BDRV_BLOCK_ZERO) && nr == count;
 }

 static coroutine_fn int qcow2_co_pwrite_zeroes(BlockDriverState *bs,
@@ -3043,21 +3050,24 @@ static coroutine_fn int qcow2_co_pwrite_zeroes(BlockDriverState *bs,
    }

    if (head || tail) {
+        int64_t cl_start = (offset - head) >> BDRV_SECTOR_BITS;
        uint64_t off;
        unsigned int nr;

        assert(head + bytes <= s->cluster_size);

        /* check whether remainder of cluster already reads as zero */
-        if (!(is_zero(bs, offset - head, head) &&
-              is_zero(bs, offset + bytes,
-                      tail ? s->cluster_size - tail : 0))) {
+        if (!(is_zero_sectors(bs, cl_start,
+                              DIV_ROUND_UP(head, BDRV_SECTOR_SIZE)) &&
+              is_zero_sectors(bs, (offset + bytes) >> BDRV_SECTOR_BITS,
+                              DIV_ROUND_UP(-tail & (s->cluster_size - 1),
+                                           BDRV_SECTOR_SIZE)))) {
            return -ENOTSUP;
        }

        qemu_co_mutex_lock(&s->lock);
        /* We can have new write after previous check */
-        offset = QEMU_ALIGN_DOWN(offset, s->cluster_size);
+        offset = cl_start << BDRV_SECTOR_BITS;
        bytes = s->cluster_size;
        nr = s->cluster_size;
        ret = qcow2_get_cluster_offset(bs, offset, &nr, &off);
@@ -3115,7 +3125,7 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset,
        prealloc != PREALLOC_MODE_FALLOC && prealloc != PREALLOC_MODE_FULL)
    {
        error_setg(errp, "Unsupported preallocation mode '%s'",
-                   PreallocMode_str(prealloc));
+                   PreallocMode_lookup[prealloc]);
        return -ENOTSUP;
    }

@@ -3138,67 +3148,18 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset,
    }

    old_length = bs->total_sectors * 512;
-    new_l1_size = size_to_l1(s, offset);

+    /* shrinking is currently not supported */
    if (offset < old_length) {
-        int64_t last_cluster, old_file_size;
-        if (prealloc != PREALLOC_MODE_OFF) {
-            error_setg(errp,
-                       "Preallocation can't be used for shrinking an image");
-            return -EINVAL;
-        }
+        error_setg(errp, "qcow2 doesn't support shrinking images yet");
+        return -ENOTSUP;
+    }

-        ret = qcow2_cluster_discard(bs, ROUND_UP(offset, s->cluster_size),
-                                    old_length - ROUND_UP(offset,
-                                                          s->cluster_size),
-                                    QCOW2_DISCARD_ALWAYS, true);
-        if (ret < 0) {
-            error_setg_errno(errp, -ret, "Failed to discard cropped clusters");
-            return ret;
-        }
-
-        ret = qcow2_shrink_l1_table(bs, new_l1_size);
-        if (ret < 0) {
-            error_setg_errno(errp, -ret,
-                             "Failed to reduce the number of L2 tables");
-            return ret;
-        }
-
-        ret = qcow2_shrink_reftable(bs);
-        if (ret < 0) {
-            error_setg_errno(errp, -ret,
-                             "Failed to discard unused refblocks");
-            return ret;
-        }
-
-        old_file_size = bdrv_getlength(bs->file->bs);
-        if (old_file_size < 0) {
-            error_setg_errno(errp, -old_file_size,
-                             "Failed to inquire current file length");
-            return old_file_size;
-        }
-        last_cluster = qcow2_get_last_cluster(bs, old_file_size);
-        if (last_cluster < 0) {
-            error_setg_errno(errp, -last_cluster,
-                             "Failed to find the last cluster");
-            return last_cluster;
-        }
-        if ((last_cluster + 1) * s->cluster_size < old_file_size) {
-            Error *local_err = NULL;
-
-            bdrv_truncate(bs->file, (last_cluster + 1) * s->cluster_size,
-                          PREALLOC_MODE_OFF, &local_err);
-            if (local_err) {
-                warn_reportf_err(local_err,
-                                 "Failed to truncate the tail of the image: ");
-            }
-        }
-    } else {
-        ret = qcow2_grow_l1_table(bs, new_l1_size, true);
-        if (ret < 0) {
-            error_setg_errno(errp, -ret, "Failed to grow the L1 table");
-            return ret;
-        }
+    new_l1_size = size_to_l1(s, offset);
+    ret = qcow2_grow_l1_table(bs, new_l1_size, true);
+    if (ret < 0) {
+        error_setg_errno(errp, -ret, "Failed to grow the L1 table");
+        return ret;
    }

    switch (prealloc) {
@@ -3225,7 +3186,7 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset,
        if (old_file_size < 0) {
            error_setg_errno(errp, -old_file_size,
                             "Failed to inquire current file length");
-            return old_file_size;
+            return ret;
        }
        old_file_size = ROUND_UP(old_file_size, s->cluster_size);

@@ -3255,7 +3216,7 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset,
        if (allocation_start < 0) {
            error_setg_errno(errp, -allocation_start,
                             "Failed to resize refcount structures");
-            return allocation_start;
+            return -allocation_start;
        }

        clusters_allocated = qcow2_alloc_clusters_at(bs, allocation_start,
@@ -3361,10 +3322,6 @@ qcow2_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
        return bdrv_truncate(bs->file, cluster_offset, PREALLOC_MODE_OFF, NULL);
    }

-    if (offset_into_cluster(s, offset)) {
-        return -EINVAL;
-    }
-
    buf = qemu_blockalign(bs, s->cluster_size);
    if (bytes != s->cluster_size) {
        if (bytes > s->cluster_size ||
@@ -3609,16 +3566,13 @@ static int qcow2_make_empty(BlockDriverState *bs)

    l1_clusters = DIV_ROUND_UP(s->l1_size, s->cluster_size / sizeof(uint64_t));

-    if (s->qcow_version >= 3 && !s->snapshots && !s->nb_bitmaps &&
-        3 + l1_clusters <= s->refcount_block_size &&
-        s->crypt_method_header != QCOW_CRYPT_LUKS) {
-        /* The following function only works for qcow2 v3 images (it
-         * requires the dirty flag) and only as long as there are no
-         * features that reserve extra clusters (such as snapshots,
-         * LUKS header, or persistent bitmaps), because it completely
-         * empties the image.  Furthermore, the L1 table and three
-         * additional clusters (image header, refcount table, one
-         * refcount block) have to fit inside one refcount block. */
+    if (s->qcow_version >= 3 && !s->snapshots &&
+        3 + l1_clusters <= s->refcount_block_size) {
+        /* The following function only works for qcow2 v3 images (it requires
+         * the dirty flag) and only as long as there are no snapshots (because
+         * it completely empties the image). Furthermore, the L1 table and three
+         * additional clusters (image header, refcount table, one refcount
+         * block) have to fit inside one refcount block. */
        return make_completely_empty(bs);
    }

@@ -3697,8 +3651,9 @@ static BlockMeasureInfo *qcow2_measure(QemuOpts *opts, BlockDriverState *in_bs,
    }

    optstr = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC);
-    prealloc = qapi_enum_parse(&PreallocMode_lookup, optstr,
-                               PREALLOC_MODE_OFF, &local_err);
+    prealloc = qapi_enum_parse(PreallocMode_lookup, optstr,
+                               PREALLOC_MODE__MAX, PREALLOC_MODE_OFF,
+                               &local_err);
    g_free(optstr);
    if (local_err) {
        goto err;
@@ -3739,15 +3694,21 @@ static BlockMeasureInfo *qcow2_measure(QemuOpts *opts, BlockDriverState *in_bs,
             */
            required = virtual_size;
        } else {
-            int64_t offset;
-            int64_t pnum = 0;
+            int cluster_sectors = cluster_size / BDRV_SECTOR_SIZE;
+            int64_t sector_num;
+            int pnum = 0;

-            for (offset = 0; offset < ssize; offset += pnum) {
-                int ret;
+            for (sector_num = 0;
+                 sector_num < ssize / BDRV_SECTOR_SIZE;
+                 sector_num += pnum) {
+                int nb_sectors = MIN(ssize / BDRV_SECTOR_SIZE - sector_num,
+                                     BDRV_REQUEST_MAX_SECTORS);
+                BlockDriverState *file;
+                int64_t ret;

-                ret = bdrv_block_status_above(in_bs, NULL, offset,
-                                              ssize - offset, &pnum, NULL,
-                                              NULL);
+                ret = bdrv_get_block_status_above(in_bs, NULL,
+                                                  sector_num, nb_sectors,
+                                                  &pnum, &file);
                if (ret < 0) {
                    error_setg_errno(&local_err, -ret,
                                     "Unable to get block status");
@@ -3759,10 +3720,12 @@ static BlockMeasureInfo *qcow2_measure(QemuOpts *opts, BlockDriverState *in_bs,
                } else if ((ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED)) ==
                           (BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED)) {
                    /* Extend pnum to end of cluster for next iteration */
-                    pnum = ROUND_UP(offset + pnum, cluster_size) - offset;
+                    pnum = ROUND_UP(sector_num + pnum, cluster_sectors) -
+                           sector_num;

                    /* Count clusters we've seen */
-                    required += offset % cluster_size + pnum;
+                    required += (sector_num % cluster_sectors + pnum) *
+                                BDRV_SECTOR_SIZE;
                }
            }
        }
@@ -4081,9 +4044,6 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts,
                error_report("Changing the encryption format is not supported");
                return -ENOTSUP;
            }
-        } else if (g_str_has_prefix(desc->name, "encrypt.")) {
-            error_report("Changing the encryption parameters is not supported");
-            return -ENOTSUP;
        } else if (!strcmp(desc->name, BLOCK_OPT_CLUSTER_SIZE)) {
            cluster_size = qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE,
                                             cluster_size);
@@ -4235,7 +4195,7 @@ void qcow2_signal_corruption(BlockDriverState *bs, bool fatal, int64_t offset,
    char *message;
    va_list ap;

-    fatal = fatal && bdrv_is_writable(bs);
+    fatal = fatal && !bs->read_only;

    if (s->signaled_corruption &&
        (!fatal || (s->incompatible_features & QCOW2_INCOMPAT_CORRUPT)))
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -521,12 +521,6 @@ static inline uint64_t refcount_diff(uint64_t r1, uint64_t r2)
    return r1 > r2 ? r1 - r2 : r2 - r1;
 }

-static inline
-uint32_t offset_to_reftable_index(BDRVQcow2State *s, uint64_t offset)
-{
-    return offset >> (s->refcount_block_bits + s->cluster_bits);
-}
-
 /* qcow2.c functions */
 int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov,
                  int64_t sector_num, int nb_sectors);
@@ -590,13 +584,10 @@ int qcow2_inc_refcounts_imrt(BlockDriverState *bs, BdrvCheckResult *res,
 int qcow2_change_refcount_order(BlockDriverState *bs, int refcount_order,
                                BlockDriverAmendStatusCB *status_cb,
                                void *cb_opaque, Error **errp);
-int qcow2_shrink_reftable(BlockDriverState *bs);
-int64_t qcow2_get_last_cluster(BlockDriverState *bs, int64_t size);

 /* qcow2-cluster.c functions */
 int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
                        bool exact_size);
-int qcow2_shrink_l1_table(BlockDriverState *bs, uint64_t max_size);
 int qcow2_write_l1_entry(BlockDriverState *bs, int l1_index);
 int qcow2_decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset);
 int qcow2_encrypt_sectors(BDRVQcow2State *s, int64_t sector_num,
@@ -658,9 +649,6 @@ int qcow2_cache_get(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
 int qcow2_cache_get_empty(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
    void **table);
 void qcow2_cache_put(BlockDriverState *bs, Qcow2Cache *c, void **table);
-void *qcow2_cache_is_table_offset(BlockDriverState *bs, Qcow2Cache *c,
-                                  uint64_t offset);
-void qcow2_cache_discard(BlockDriverState *bs, Qcow2Cache *c, void *table);

 /* qcow2-bitmap.c functions */
 int qcow2_check_bitmaps_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
--- a/block/qed.c
+++ b/block/qed.c
@@ -265,7 +265,7 @@ static bool qed_plug_allocating_write_reqs(BDRVQEDState *s)
    assert(!s->allocating_write_reqs_plugged);
    if (s->allocating_acb != NULL) {
        /* Another allocating write came concurrently.  This cannot happen
-         * from bdrv_qed_co_drain_begin, but it can happen when the timer runs.
+         * from bdrv_qed_co_drain, but it can happen when the timer runs.
         */
        qemu_co_mutex_unlock(&s->table_lock);
        return false;
@@ -358,7 +358,7 @@ static void bdrv_qed_attach_aio_context(BlockDriverState *bs,
    }
 }

-static void coroutine_fn bdrv_qed_co_drain_begin(BlockDriverState *bs)
+static void coroutine_fn bdrv_qed_co_drain(BlockDriverState *bs)
 {
    BDRVQEDState *s = bs->opaque;

@@ -1399,7 +1399,7 @@ static int bdrv_qed_truncate(BlockDriverState *bs, int64_t offset,

    if (prealloc != PREALLOC_MODE_OFF) {
        error_setg(errp, "Unsupported preallocation mode '%s'",
-                   PreallocMode_str(prealloc));
+                   PreallocMode_lookup[prealloc]);
        return -ENOTSUP;
    }

@@ -1608,7 +1608,7 @@ static BlockDriver bdrv_qed = {
    .bdrv_check               = bdrv_qed_check,
    .bdrv_detach_aio_context  = bdrv_qed_detach_aio_context,
    .bdrv_attach_aio_context  = bdrv_qed_attach_aio_context,
-    .bdrv_co_drain_begin      = bdrv_qed_co_drain_begin,
+    .bdrv_co_drain            = bdrv_qed_co_drain,
 };

 static void bdrv_qed_init(void)
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -867,13 +867,30 @@ static QemuOptsList quorum_runtime_opts = {
    },
 };

+static int parse_read_pattern(const char *opt)
+{
+    int i;
+
+    if (!opt) {
+        /* Set quorum as default */
+        return QUORUM_READ_PATTERN_QUORUM;
+    }
+
+    for (i = 0; i < QUORUM_READ_PATTERN__MAX; i++) {
+        if (!strcmp(opt, QuorumReadPattern_lookup[i])) {
+            return i;
+        }
+    }
+
+    return -EINVAL;
+}
+
 static int quorum_open(BlockDriverState *bs, QDict *options, int flags,
                       Error **errp)
 {
    BDRVQuorumState *s = bs->opaque;
    Error *local_err = NULL;
    QemuOpts *opts = NULL;
-    const char *pattern_str;
    bool *opened;
    int i;
    int ret = 0;
@@ -908,13 +925,7 @@ static int quorum_open(BlockDriverState *bs, QDict *options, int flags,
        goto exit;
    }

-    pattern_str = qemu_opt_get(opts, QUORUM_OPT_READ_PATTERN);
-    if (!pattern_str) {
-        ret = QUORUM_READ_PATTERN_QUORUM;
-    } else {
-        ret = qapi_enum_parse(&QuorumReadPattern_lookup, pattern_str,
-                              -EINVAL, NULL);
-    }
+    ret = parse_read_pattern(qemu_opt_get(opts, QUORUM_OPT_READ_PATTERN));
    if (ret < 0) {
        error_setg(&local_err, "Please set read-pattern as fifo or quorum");
        goto exit;
--- a/block/raw-format.c
+++ b/block/raw-format.c
@@ -167,37 +167,16 @@ static void raw_reopen_abort(BDRVReopenState *state)
    state->opaque = NULL;
 }

-/* Check and adjust the offset, against 'offset' and 'size' options. */
-static inline int raw_adjust_offset(BlockDriverState *bs, uint64_t *offset,
-                                    uint64_t bytes, bool is_write)
-{
-    BDRVRawState *s = bs->opaque;
-
-    if (s->has_size && (*offset > s->size || bytes > (s->size - *offset))) {
-        /* There's not enough space for the write, or the read request is
-         * out-of-range. Don't read/write anything to prevent leaking out of
-         * the size specified in options. */
-        return is_write ? -ENOSPC : -EINVAL;;
-    }
-
-    if (*offset > INT64_MAX - s->offset) {
-        return -EINVAL;
-    }
-    *offset += s->offset;
-
-    return 0;
-}
-
 static int coroutine_fn raw_co_preadv(BlockDriverState *bs, uint64_t offset,
                                      uint64_t bytes, QEMUIOVector *qiov,
                                      int flags)
 {
-    int ret;
+    BDRVRawState *s = bs->opaque;

-    ret = raw_adjust_offset(bs, &offset, bytes, false);
-    if (ret) {
-        return ret;
+    if (offset > UINT64_MAX - s->offset) {
+        return -EINVAL;
    }
+    offset += s->offset;

    BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
    return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags);
@@ -207,11 +186,23 @@ static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, uint64_t offset,
                                       uint64_t bytes, QEMUIOVector *qiov,
                                       int flags)
 {
+    BDRVRawState *s = bs->opaque;
    void *buf = NULL;
    BlockDriver *drv;
    QEMUIOVector local_qiov;
    int ret;

+    if (s->has_size && (offset > s->size || bytes > (s->size - offset))) {
+        /* There's not enough space for the data. Don't write anything and just
+         * fail to prevent leaking out of the size specified in options. */
+        return -ENOSPC;
+    }
+
+    if (offset > UINT64_MAX - s->offset) {
+        ret = -EINVAL;
+        goto fail;
+    }
+
    if (bs->probed && offset < BLOCK_PROBE_BUF_SIZE && bytes) {
        /* Handling partial writes would be a pain - so we just
         * require that guests have 512-byte request alignment if
@@ -246,10 +237,7 @@ static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, uint64_t offset,
        qiov = &local_qiov;
    }

-    ret = raw_adjust_offset(bs, &offset, bytes, true);
-    if (ret) {
-        goto fail;
-    }
+    offset += s->offset;

    BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
    ret = bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
@@ -279,24 +267,22 @@ static int coroutine_fn raw_co_pwrite_zeroes(BlockDriverState *bs,
                                             int64_t offset, int bytes,
                                             BdrvRequestFlags flags)
 {
-    int ret;
-
-    ret = raw_adjust_offset(bs, (uint64_t *)&offset, bytes, true);
-    if (ret) {
-        return ret;
+    BDRVRawState *s = bs->opaque;
+    if (offset > UINT64_MAX - s->offset) {
+        return -EINVAL;
    }
+    offset += s->offset;
    return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags);
 }

 static int coroutine_fn raw_co_pdiscard(BlockDriverState *bs,
                                        int64_t offset, int bytes)
 {
-    int ret;
-
-    ret = raw_adjust_offset(bs, (uint64_t *)&offset, bytes, true);
-    if (ret) {
-        return ret;
+    BDRVRawState *s = bs->opaque;
+    if (offset > UINT64_MAX - s->offset) {
+        return -EINVAL;
    }
+    offset += s->offset;
    return bdrv_co_pdiscard(bs->file->bs, offset, bytes);
 }

@@ -386,6 +372,11 @@ static int raw_truncate(BlockDriverState *bs, int64_t offset,
    return bdrv_truncate(bs->file, offset, prealloc, errp);
 }

+static int raw_media_changed(BlockDriverState *bs)
+{
+    return bdrv_media_changed(bs->file->bs);
+}
+
 static void raw_eject(BlockDriverState *bs, bool eject_flag)
 {
    bdrv_eject(bs->file->bs, eject_flag);
@@ -519,6 +510,7 @@ BlockDriver bdrv_raw = {
    .bdrv_refresh_limits  = &raw_refresh_limits,
    .bdrv_probe_blocksizes = &raw_probe_blocksizes,
    .bdrv_probe_geometry  = &raw_probe_geometry,
+    .bdrv_media_changed   = &raw_media_changed,
    .bdrv_eject           = &raw_eject,
    .bdrv_lock_medium     = &raw_lock_medium,
    .bdrv_co_ioctl        = &raw_co_ioctl,
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -265,14 +265,13 @@ static int qemu_rbd_set_keypairs(rados_t cluster, const char *keypairs_json,
        key = qstring_get_str(name);

        ret = rados_conf_set(cluster, key, qstring_get_str(value));
+        QDECREF(name);
        QDECREF(value);
        if (ret < 0) {
            error_setg_errno(errp, -ret, "invalid conf option %s", key);
-            QDECREF(name);
            ret = -EINVAL;
            break;
        }
-        QDECREF(name);
    }

    QDECREF(keypairs);
@@ -666,16 +665,10 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
    /* If we are using an rbd snapshot, we must be r/o, otherwise
     * leave as-is */
    if (s->snap != NULL) {
-        if (!bdrv_is_read_only(bs)) {
-            error_report("Opening rbd snapshots without an explicit "
-                         "read-only=on option is deprecated. Future versions "
-                         "will refuse to open the image instead of "
-                         "automatically marking the image read-only.");
-            r = bdrv_set_read_only(bs, true, &local_err);
-            if (r < 0) {
-                error_propagate(errp, local_err);
-                goto failed_open;
-            }
+        r = bdrv_set_read_only(bs, true, &local_err);
+        if (r < 0) {
+            error_propagate(errp, local_err);
+            goto failed_open;
        }
    }

@@ -951,7 +944,7 @@ static int qemu_rbd_truncate(BlockDriverState *bs, int64_t offset,

    if (prealloc != PREALLOC_MODE_OFF) {
        error_setg(errp, "Unsupported preallocation mode '%s'",
-                   PreallocMode_str(prealloc));
+                   PreallocMode_lookup[prealloc]);
        return -ENOTSUP;
    }

--- a/block/replication.c
+++ b/block/replication.c
@@ -157,17 +157,13 @@ static void replication_close(BlockDriverState *bs)

 static void replication_child_perm(BlockDriverState *bs, BdrvChild *c,
                                   const BdrvChildRole *role,
-                                   BlockReopenQueue *reopen_queue,
                                   uint64_t perm, uint64_t shared,
                                   uint64_t *nperm, uint64_t *nshared)
 {
-    *nperm = BLK_PERM_CONSISTENT_READ;
-    if ((bs->open_flags & (BDRV_O_INACTIVE | BDRV_O_RDWR)) == BDRV_O_RDWR) {
-        *nperm |= BLK_PERM_WRITE;
-    }
-    *nshared = BLK_PERM_CONSISTENT_READ \
-               | BLK_PERM_WRITE \
-               | BLK_PERM_WRITE_UNCHANGED;
+    *nperm = *nshared = BLK_PERM_CONSISTENT_READ \
+                        | BLK_PERM_WRITE \
+                        | BLK_PERM_WRITE_UNCHANGED;
+
    return;
 }

@@ -342,24 +338,12 @@ static void secondary_do_checkpoint(BDRVReplicationState *s, Error **errp)
        return;
    }

-    if (!s->active_disk->bs->drv) {
-        error_setg(errp, "Active disk %s is ejected",
-                   s->active_disk->bs->node_name);
-        return;
-    }
-
    ret = s->active_disk->bs->drv->bdrv_make_empty(s->active_disk->bs);
    if (ret < 0) {
        error_setg(errp, "Cannot make active disk empty");
        return;
    }

-    if (!s->hidden_disk->bs->drv) {
-        error_setg(errp, "Hidden disk %s is ejected",
-                   s->hidden_disk->bs->node_name);
-        return;
-    }
-
    ret = s->hidden_disk->bs->drv->bdrv_make_empty(s->hidden_disk->bs);
    if (ret < 0) {
        error_setg(errp, "Cannot make hidden disk empty");
@@ -523,9 +507,6 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode,
            return;
        }

-        /* Must be true, or the bdrv_getlength() calls would have failed */
-        assert(s->active_disk->bs->drv && s->hidden_disk->bs->drv);
-
        if (!s->active_disk->bs->drv->bdrv_make_empty ||
            !s->hidden_disk->bs->drv->bdrv_make_empty) {
            error_setg(errp,
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -591,7 +591,7 @@ static int connect_to_sdog(BDRVSheepdogState *s, Error **errp)
 {
    int fd;

-    fd = socket_connect(s->addr, errp);
+    fd = socket_connect(s->addr, NULL, NULL, errp);

    if (s->addr->type == SOCKET_ADDRESS_TYPE_INET && fd >= 0) {
        int ret = socket_set_nodelay(fd);
@@ -2176,7 +2176,7 @@ static int sd_truncate(BlockDriverState *bs, int64_t offset,

    if (prealloc != PREALLOC_MODE_OFF) {
        error_setg(errp, "Unsupported preallocation mode '%s'",
-                   PreallocMode_str(prealloc));
+                   PreallocMode_lookup[prealloc]);
        return -ENOTSUP;
    }

--- a/block/snapshot.c
+++ b/block/snapshot.c
@@ -177,35 +177,22 @@ int bdrv_snapshot_create(BlockDriverState *bs,
 }

 int bdrv_snapshot_goto(BlockDriverState *bs,
-                       const char *snapshot_id,
-                       Error **errp)
+                       const char *snapshot_id)
 {
    BlockDriver *drv = bs->drv;
    int ret, open_ret;

    if (!drv) {
-        error_setg(errp, "Block driver is closed");
        return -ENOMEDIUM;
    }
-
-    if (!QLIST_EMPTY(&bs->dirty_bitmaps)) {
-        error_setg(errp, "Device has active dirty bitmaps");
-        return -EBUSY;
-    }
-
    if (drv->bdrv_snapshot_goto) {
-        ret = drv->bdrv_snapshot_goto(bs, snapshot_id);
-        if (ret < 0) {
-            error_setg_errno(errp, -ret, "Failed to load snapshot");
-        }
-        return ret;
+        return drv->bdrv_snapshot_goto(bs, snapshot_id);
    }

    if (bs->file) {
        BlockDriverState *file;
        QDict *options = qdict_clone_shallow(bs->options);
        QDict *file_options;
-        Error *local_err = NULL;

        file = bs->file->bs;
        /* Prevent it from getting deleted when detached from bs */
@@ -219,15 +206,13 @@ int bdrv_snapshot_goto(BlockDriverState *bs,
        bdrv_unref_child(bs, bs->file);
        bs->file = NULL;

-        ret = bdrv_snapshot_goto(file, snapshot_id, errp);
-        open_ret = drv->bdrv_open(bs, options, bs->open_flags, &local_err);
+        ret = bdrv_snapshot_goto(file, snapshot_id);
+        open_ret = drv->bdrv_open(bs, options, bs->open_flags, NULL);
        QDECREF(options);
        if (open_ret < 0) {
            bdrv_unref(file);
            bs->drv = NULL;
-            /* A bdrv_snapshot_goto() error takes precedence */
-            error_propagate(errp, local_err);
-            return ret < 0 ? ret : open_ret;
+            return open_ret;
        }

        assert(bs->file->bs == file);
@@ -235,7 +220,6 @@ int bdrv_snapshot_goto(BlockDriverState *bs,
        return ret;
    }

-    error_setg(errp, "Block driver does not support snapshots");
    return -ENOTSUP;
 }

@@ -419,7 +403,6 @@ bool bdrv_all_can_snapshot(BlockDriverState **first_bad_bs)
        }
        aio_context_release(ctx);
        if (!ok) {
-            bdrv_next_cleanup(&it);
            goto fail;
        }
    }
@@ -447,7 +430,6 @@ int bdrv_all_delete_snapshot(const char *name, BlockDriverState **first_bad_bs,
        }
        aio_context_release(ctx);
        if (ret < 0) {
-            bdrv_next_cleanup(&it);
            goto fail;
        }
    }
@@ -458,10 +440,9 @@ fail:
 }


-int bdrv_all_goto_snapshot(const char *name, BlockDriverState **first_bad_bs,
-                           Error **errp)
+int bdrv_all_goto_snapshot(const char *name, BlockDriverState **first_bad_bs)
 {
-    int ret = 0;
+    int err = 0;
    BlockDriverState *bs;
    BdrvNextIterator it;

@@ -470,18 +451,17 @@ int bdrv_all_goto_snapshot(const char *name, BlockDriverState **first_bad_bs,

        aio_context_acquire(ctx);
        if (bdrv_can_snapshot(bs)) {
-            ret = bdrv_snapshot_goto(bs, name, errp);
+            err = bdrv_snapshot_goto(bs, name);
        }
        aio_context_release(ctx);
-        if (ret < 0) {
-            bdrv_next_cleanup(&it);
+        if (err < 0) {
            goto fail;
        }
    }

 fail:
    *first_bad_bs = bs;
-    return ret;
+    return err;
 }

 int bdrv_all_find_snapshot(const char *name, BlockDriverState **first_bad_bs)
@@ -500,7 +480,6 @@ int bdrv_all_find_snapshot(const char *name, BlockDriverState **first_bad_bs)
        }
        aio_context_release(ctx);
        if (err < 0) {
-            bdrv_next_cleanup(&it);
            goto fail;
        }
    }
@@ -532,7 +511,6 @@ int bdrv_all_create_snapshot(QEMUSnapshotInfo *sn,
        }
        aio_context_release(ctx);
        if (err < 0) {
-            bdrv_next_cleanup(&it);
            goto fail;
        }
    }
@@ -556,7 +534,6 @@ BlockDriverState *bdrv_all_find_vmstate_bs(void)
        aio_context_release(ctx);

        if (found) {
-            bdrv_next_cleanup(&it);
            break;
        }
    }
--- a/block/ssh.c
+++ b/block/ssh.c
@@ -556,7 +556,6 @@ static QemuOptsList ssh_runtime_opts = {
            .type = QEMU_OPT_STRING,
            .help = "Defines how and what to check the host key against",
        },
-        { /* end of list */ }
    },
 };

@@ -679,7 +678,7 @@ static int connect_to_ssh(BDRVSSHState *s, QDict *options,
    }

    /* Open the socket and connect. */
-    s->sock = inet_connect_saddr(s->inet, errp);
+    s->sock = inet_connect_saddr(s->inet, NULL, NULL, errp);
    if (s->sock < 0) {
        ret = -EIO;
        goto err;
--- a/block/stream.c
+++ b/block/stream.c
@@ -141,7 +141,7 @@ static void coroutine_fn stream_run(void *opaque)
        /* Note that even when no rate limit is applied we need to yield
         * with no pending I/O here so that bdrv_drain_all() returns.
         */
-        block_job_sleep_ns(&s->common, delay_ns);
+        block_job_sleep_ns(&s->common, QEMU_CLOCK_REALTIME, delay_ns);
        if (block_job_is_cancelled(&s->common)) {
            break;
        }
--- a/block/throttle-groups.c
+++ b/block/throttle-groups.c
--- a/block/throttle.c
+++ b/block/throttle.c
@@ -1,267 +0,0 @@
-/*
- * QEMU block throttling filter driver infrastructure
- *
- * Copyright (c) 2017 Manos Pitsidianakis
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 or
- * (at your option) version 3 of the License.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
- */
-
-#include "qemu/osdep.h"
-#include "block/throttle-groups.h"
-#include "qemu/throttle-options.h"
-#include "qapi/error.h"
-
-static QemuOptsList throttle_opts = {
-    .name = "throttle",
-    .head = QTAILQ_HEAD_INITIALIZER(throttle_opts.head),
-    .desc = {
-        {
-            .name = QEMU_OPT_THROTTLE_GROUP_NAME,
-            .type = QEMU_OPT_STRING,
-            .help = "Name of the throttle group",
-        },
-        { /* end of list */ }
-    },
-};
-
-/*
- * If this function succeeds then the throttle group name is stored in
- * @group and must be freed by the caller.
- * If there's an error then @group remains unmodified.
- */
-static int throttle_parse_options(QDict *options, char **group, Error **errp)
-{
-    int ret;
-    const char *group_name;
-    Error *local_err = NULL;
-    QemuOpts *opts = qemu_opts_create(&throttle_opts, NULL, 0, &error_abort);
-
-    qemu_opts_absorb_qdict(opts, options, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        ret = -EINVAL;
-        goto fin;
-    }
-
-    group_name = qemu_opt_get(opts, QEMU_OPT_THROTTLE_GROUP_NAME);
-    if (!group_name) {
-        error_setg(errp, "Please specify a throttle group");
-        ret = -EINVAL;
-        goto fin;
-    } else if (!throttle_group_exists(group_name)) {
-        error_setg(errp, "Throttle group '%s' does not exist", group_name);
-        ret = -EINVAL;
-        goto fin;
-    }
-
-    *group = g_strdup(group_name);
-    ret = 0;
-fin:
-    qemu_opts_del(opts);
-    return ret;
-}
-
-static int throttle_open(BlockDriverState *bs, QDict *options,
-                         int flags, Error **errp)
-{
-    ThrottleGroupMember *tgm = bs->opaque;
-    char *group;
-    int ret;
-
-    bs->file = bdrv_open_child(NULL, options, "file", bs,
-                               &child_file, false, errp);
-    if (!bs->file) {
-        return -EINVAL;
-    }
-    bs->supported_write_flags = bs->file->bs->supported_write_flags;
-    bs->supported_zero_flags = bs->file->bs->supported_zero_flags;
-
-    ret = throttle_parse_options(options, &group, errp);
-    if (ret == 0) {
-        /* Register membership to group with name group_name */
-        throttle_group_register_tgm(tgm, group, bdrv_get_aio_context(bs));
-        g_free(group);
-    }
-
-    return ret;
-}
-
-static void throttle_close(BlockDriverState *bs)
-{
-    ThrottleGroupMember *tgm = bs->opaque;
-    throttle_group_unregister_tgm(tgm);
-}
-
-
-static int64_t throttle_getlength(BlockDriverState *bs)
-{
-    return bdrv_getlength(bs->file->bs);
-}
-
-static int coroutine_fn throttle_co_preadv(BlockDriverState *bs,
-                                           uint64_t offset, uint64_t bytes,
-                                           QEMUIOVector *qiov, int flags)
-{
-
-    ThrottleGroupMember *tgm = bs->opaque;
-    throttle_group_co_io_limits_intercept(tgm, bytes, false);
-
-    return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags);
-}
-
-static int coroutine_fn throttle_co_pwritev(BlockDriverState *bs,
-                                            uint64_t offset, uint64_t bytes,
-                                            QEMUIOVector *qiov, int flags)
-{
-    ThrottleGroupMember *tgm = bs->opaque;
-    throttle_group_co_io_limits_intercept(tgm, bytes, true);
-
-    return bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
-}
-
-static int coroutine_fn throttle_co_pwrite_zeroes(BlockDriverState *bs,
-                                                  int64_t offset, int bytes,
-                                                  BdrvRequestFlags flags)
-{
-    ThrottleGroupMember *tgm = bs->opaque;
-    throttle_group_co_io_limits_intercept(tgm, bytes, true);
-
-    return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags);
-}
-
-static int coroutine_fn throttle_co_pdiscard(BlockDriverState *bs,
-                                             int64_t offset, int bytes)
-{
-    ThrottleGroupMember *tgm = bs->opaque;
-    throttle_group_co_io_limits_intercept(tgm, bytes, true);
-
-    return bdrv_co_pdiscard(bs->file->bs, offset, bytes);
-}
-
-static int throttle_co_flush(BlockDriverState *bs)
-{
-    return bdrv_co_flush(bs->file->bs);
-}
-
-static void throttle_detach_aio_context(BlockDriverState *bs)
-{
-    ThrottleGroupMember *tgm = bs->opaque;
-    throttle_group_detach_aio_context(tgm);
-}
-
-static void throttle_attach_aio_context(BlockDriverState *bs,
-                                        AioContext *new_context)
-{
-    ThrottleGroupMember *tgm = bs->opaque;
-    throttle_group_attach_aio_context(tgm, new_context);
-}
-
-static int throttle_reopen_prepare(BDRVReopenState *reopen_state,
-                                   BlockReopenQueue *queue, Error **errp)
-{
-    int ret;
-    char *group = NULL;
-
-    assert(reopen_state != NULL);
-    assert(reopen_state->bs != NULL);
-
-    ret = throttle_parse_options(reopen_state->options, &group, errp);
-    reopen_state->opaque = group;
-    return ret;
-}
-
-static void throttle_reopen_commit(BDRVReopenState *reopen_state)
-{
-    BlockDriverState *bs = reopen_state->bs;
-    ThrottleGroupMember *tgm = bs->opaque;
-    char *group = reopen_state->opaque;
-
-    assert(group);
-
-    if (strcmp(group, throttle_group_get_name(tgm))) {
-        throttle_group_unregister_tgm(tgm);
-        throttle_group_register_tgm(tgm, group, bdrv_get_aio_context(bs));
-    }
-    g_free(reopen_state->opaque);
-    reopen_state->opaque = NULL;
-}
-
-static void throttle_reopen_abort(BDRVReopenState *reopen_state)
-{
-    g_free(reopen_state->opaque);
-    reopen_state->opaque = NULL;
-}
-
-static bool throttle_recurse_is_first_non_filter(BlockDriverState *bs,
-                                                 BlockDriverState *candidate)
-{
-    return bdrv_recurse_is_first_non_filter(bs->file->bs, candidate);
-}
-
-static void coroutine_fn throttle_co_drain_begin(BlockDriverState *bs)
-{
-    ThrottleGroupMember *tgm = bs->opaque;
-    if (atomic_fetch_inc(&tgm->io_limits_disabled) == 0) {
-        throttle_group_restart_tgm(tgm);
-    }
-}
-
-static void coroutine_fn throttle_co_drain_end(BlockDriverState *bs)
-{
-    ThrottleGroupMember *tgm = bs->opaque;
-    assert(tgm->io_limits_disabled);
-    atomic_dec(&tgm->io_limits_disabled);
-}
-
-static BlockDriver bdrv_throttle = {
-    .format_name                        =   "throttle",
-    .protocol_name                      =   "throttle",
-    .instance_size                      =   sizeof(ThrottleGroupMember),
-
-    .bdrv_file_open                     =   throttle_open,
-    .bdrv_close                         =   throttle_close,
-    .bdrv_co_flush                      =   throttle_co_flush,
-
-    .bdrv_child_perm                    =   bdrv_filter_default_perms,
-
-    .bdrv_getlength                     =   throttle_getlength,
-
-    .bdrv_co_preadv                     =   throttle_co_preadv,
-    .bdrv_co_pwritev                    =   throttle_co_pwritev,
-
-    .bdrv_co_pwrite_zeroes              =   throttle_co_pwrite_zeroes,
-    .bdrv_co_pdiscard                   =   throttle_co_pdiscard,
-
-    .bdrv_recurse_is_first_non_filter   =   throttle_recurse_is_first_non_filter,
-
-    .bdrv_attach_aio_context            =   throttle_attach_aio_context,
-    .bdrv_detach_aio_context            =   throttle_detach_aio_context,
-
-    .bdrv_reopen_prepare                =   throttle_reopen_prepare,
-    .bdrv_reopen_commit                 =   throttle_reopen_commit,
-    .bdrv_reopen_abort                  =   throttle_reopen_abort,
-    .bdrv_co_get_block_status           =   bdrv_co_get_block_status_from_file,
-
-    .bdrv_co_drain_begin                =   throttle_co_drain_begin,
-    .bdrv_co_drain_end                  =   throttle_co_drain_end,
-
-    .is_filter                          =   true,
-};
-
-static void bdrv_throttle_init(void)
-{
-    bdrv_register(&bdrv_throttle);
-}
-
-block_init(bdrv_throttle_init);
--- a/block/trace-events
+++ b/block/trace-events
@@ -12,7 +12,7 @@ blk_co_pwritev(void *blk, void *bs, int64_t offset, unsigned int bytes, int flag
 bdrv_co_preadv(void *bs, int64_t offset, int64_t nbytes, unsigned int flags) "bs %p offset %"PRId64" nbytes %"PRId64" flags 0x%x"
 bdrv_co_pwritev(void *bs, int64_t offset, int64_t nbytes, unsigned int flags) "bs %p offset %"PRId64" nbytes %"PRId64" flags 0x%x"
 bdrv_co_pwrite_zeroes(void *bs, int64_t offset, int count, int flags) "bs %p offset %"PRId64" count %d flags 0x%x"
-bdrv_co_do_copy_on_readv(void *bs, int64_t offset, unsigned int bytes, int64_t cluster_offset, int64_t cluster_bytes) "bs %p offset %"PRId64" bytes %u cluster_offset %"PRId64" cluster_bytes %"PRId64
+bdrv_co_do_copy_on_readv(void *bs, int64_t offset, unsigned int bytes, int64_t cluster_offset, unsigned int cluster_bytes) "bs %p offset %"PRId64" bytes %u cluster_offset %"PRId64" cluster_bytes %u"

 # block/stream.c
 stream_one_iteration(void *s, int64_t offset, uint64_t bytes, int is_allocated) "s %p offset %" PRId64 " bytes %" PRIu64 " is_allocated %d"
--- a/block/vhdx-log.c
+++ b/block/vhdx-log.c
@@ -902,7 +902,7 @@ static int vhdx_log_write(BlockDriverState *bs, BDRVVHDXState *s,
    }

    sector_offset = offset % VHDX_LOG_SECTOR_SIZE;
-    file_offset = QEMU_ALIGN_DOWN(offset, VHDX_LOG_SECTOR_SIZE);
+    file_offset = (offset / VHDX_LOG_SECTOR_SIZE) * VHDX_LOG_SECTOR_SIZE;

    aligned_length = length;

--- a/block/vhdx.c
+++ b/block/vhdx.c
@@ -1008,6 +1008,13 @@ static int vhdx_open(BlockDriverState *bs, QDict *options, int flags,
        goto fail;
    }

+    if (flags & BDRV_O_RDWR) {
+        ret = vhdx_update_headers(bs, s, false, NULL);
+        if (ret < 0) {
+            goto fail;
+        }
+    }
+
    /* TODO: differencing files */

    return 0;
--- a/block/vpc.c
+++ b/block/vpc.c
@@ -783,7 +783,7 @@ static int calculate_geometry(int64_t total_sectors, uint16_t* cyls,
    } else {
        *secs_per_cyl = 17;
        cyls_times_heads = total_sectors / *secs_per_cyl;
-        *heads = DIV_ROUND_UP(cyls_times_heads, 1024);
+        *heads = (cyls_times_heads + 1023) / 1024;

        if (*heads < 4) {
            *heads = 4;
@@ -836,7 +836,7 @@ static int create_dynamic_disk(BlockBackend *blk, uint8_t *buf,
    offset = 3 * 512;

    memset(buf, 0xFF, 512);
-    for (i = 0; i < DIV_ROUND_UP(num_bat_entries * 4, 512); i++) {
+    for (i = 0; i < (num_bat_entries * 4 + 511) / 512; i++) {
        ret = blk_pwrite(blk, offset, buf, 512, 0);
        if (ret < 0) {
            goto fail;
--- a/block/vvfat.c
+++ b/block/vvfat.c
@@ -32,7 +32,6 @@
 #include "qapi/qmp/qbool.h"
 #include "qapi/qmp/qstring.h"
 #include "qemu/cutils.h"
-#include "qemu/error-report.h"

 #ifndef S_IWGRP
 #define S_IWGRP 0
@@ -57,6 +56,15 @@

 static void checkpoint(void);

+#ifdef __MINGW32__
+void nonono(const char* file, int line, const char* msg) {
+    fprintf(stderr, "Nonono! %s:%d %s\n", file, line, msg);
+    exit(-5);
+}
+#undef assert
+#define assert(a) do {if (!(a)) nonono(__FILE__, __LINE__, #a);}while(0)
+#endif
+
 #else

 #define DLOG(a)
@@ -441,7 +449,7 @@ static direntry_t *create_long_filename(BDRVVVFATState *s, const char *filename)
        return NULL;
    }

-    number_of_entries = DIV_ROUND_UP(length * 2, 26);
+    number_of_entries = (length * 2 + 25) / 26;

    for(i=0;i<number_of_entries;i++) {
        entry=array_get_next(&(s->directory));
@@ -1218,7 +1226,8 @@ static int vvfat_open(BlockDriverState *bs, QDict *options, int flags,

    switch (s->fat_type) {
    case 32:
-        warn_report("FAT32 has not been tested. You are welcome to do so!");
+            fprintf(stderr, "Big fat greek warning: FAT32 has not been tested. "
+                "You are welcome to do so!\n");
        break;
    case 16:
    case 12:
@@ -1259,11 +1268,7 @@ static int vvfat_open(BlockDriverState *bs, QDict *options, int flags,
                       "Unable to set VVFAT to 'rw' when drive is read-only");
            goto fail;
        }
-    } else  if (!bdrv_is_read_only(bs)) {
-        error_report("Opening non-rw vvfat images without an explicit "
-                     "read-only=on option is deprecated. Future versions "
-                     "will refuse to open the image instead of "
-                     "automatically marking the image read-only.");
+    } else  {
        /* read only is the default for safety */
        ret = bdrv_set_read_only(bs, true, &local_err);
        if (ret < 0) {
@@ -2549,7 +2554,7 @@ static int commit_one_file(BDRVVVFATState* s,
                (size > offset && c >=2 && !fat_eof(s, c)));

        ret = vvfat_read(s->bs, cluster2sector(s, c),
-            (uint8_t*)cluster, DIV_ROUND_UP(rest_size, 0x200));
+            (uint8_t*)cluster, (rest_size + 0x1ff) / 0x200);

        if (ret < 0) {
            qemu_close(fd);
@@ -2947,7 +2952,7 @@ static int do_commit(BDRVVVFATState* s)
        return ret;
    }

-    if (s->qcow->bs->drv && s->qcow->bs->drv->bdrv_make_empty) {
+    if (s->qcow->bs->drv->bdrv_make_empty) {
        s->qcow->bs->drv->bdrv_make_empty(s->qcow->bs);
    }

@@ -3023,8 +3028,7 @@ DLOG(checkpoint());
                        if (memcmp(direntries + k,
                                    array_get(&(s->directory), dir_index + k),
                                    sizeof(direntry_t))) {
-                            warn_report("tried to write to write-protected "
-                                        "file");
+                            fprintf(stderr, "Warning: tried to write to write-protected file\n");
                            return -1;
                        }
                    }
@@ -3206,7 +3210,6 @@ err:

 static void vvfat_child_perm(BlockDriverState *bs, BdrvChild *c,
                             const BdrvChildRole *role,
-                             BlockReopenQueue *reopen_queue,
                             uint64_t perm, uint64_t shared,
                             uint64_t *nperm, uint64_t *nshared)
 {
@@ -3266,11 +3269,24 @@ static void bdrv_vvfat_init(void)
 block_init(bdrv_vvfat_init);

 #ifdef DEBUG
-static void checkpoint(void)
-{
+static void checkpoint(void) {
    assert(((mapping_t*)array_get(&(vvv->mapping), 0))->end == 2);
    check1(vvv);
    check2(vvv);
    assert(!vvv->current_mapping || vvv->current_fd || (vvv->current_mapping->mode & MODE_DIRECTORY));
+#if 0
+    if (((direntry_t*)vvv->directory.pointer)[1].attributes != 0xf)
+        fprintf(stderr, "Nonono!\n");
+    mapping_t* mapping;
+    direntry_t* direntry;
+    assert(vvv->mapping.size >= vvv->mapping.item_size * vvv->mapping.next);
+    assert(vvv->directory.size >= vvv->directory.item_size * vvv->directory.next);
+    if (vvv->mapping.next<47)
+        return;
+    assert((mapping = array_get(&(vvv->mapping), 47)));
+    assert(mapping->dir_index < vvv->directory.next);
+    direntry = array_get(&(vvv->directory), mapping->dir_index);
+    assert(!memcmp(direntry->name, "USB     H  ", 11) || direntry->name[0]==0);
+#endif
 }
 #endif
--- a/blockdev.c
+++ b/blockdev.c
@@ -44,6 +44,7 @@
 #include "qapi-visit.h"
 #include "qapi/qmp/qerror.h"
 #include "qapi/qobject-output-visitor.h"
+#include "qapi/util.h"
 #include "sysemu/sysemu.h"
 #include "block/block_int.h"
 #include "qmp-commands.h"
@@ -437,8 +438,9 @@ static void extract_common_blockdev_options(QemuOpts *opts, int *bdrv_flags,

    if (detect_zeroes) {
        *detect_zeroes =
-            qapi_enum_parse(&BlockdevDetectZeroesOptions_lookup,
+            qapi_enum_parse(BlockdevDetectZeroesOptions_lookup,
                            qemu_opt_get(opts, "detect-zeroes"),
+                            BLOCKDEV_DETECT_ZEROES_OPTIONS__MAX,
                            BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF,
                            &local_error);
        if (local_error) {
@@ -1466,8 +1468,8 @@ static int action_check_completion_mode(BlkActionState *s, Error **errp)
        error_setg(errp,
                   "Action '%s' does not support Transaction property "
                   "completion-mode = %s",
-                   TransactionActionKind_str(s->action->type),
-                   ActionCompletionMode_str(s->txn_props->completion_mode));
+                   TransactionActionKind_lookup[s->action->type],
+                   ActionCompletionMode_lookup[s->txn_props->completion_mode]);
        return -1;
    }
    return 0;
@@ -2686,7 +2688,7 @@ void qmp_block_set_io_throttle(BlockIOThrottle *arg, Error **errp)
    if (throttle_enabled(&cfg)) {
        /* Enable I/O limits if they're not enabled yet, otherwise
         * just update the throttling group. */
-        if (!blk_get_public(blk)->throttle_group_member.throttle_state) {
+        if (!blk_get_public(blk)->throttle_state) {
            blk_io_limits_enable(blk,
                                 arg->has_group ? arg->group :
                                 arg->has_device ? arg->device :
@@ -2696,7 +2698,7 @@ void qmp_block_set_io_throttle(BlockIOThrottle *arg, Error **errp)
        }
        /* Set the new throttling configuration */
        blk_set_io_limits(blk, &cfg);
-    } else if (blk_get_public(blk)->throttle_group_member.throttle_state) {
+    } else if (blk_get_public(blk)->throttle_state) {
        /* If all throttling settings are set to 0, disable I/O limits */
        blk_io_limits_disable(blk);
    }
--- a/blockjob.c
+++ b/blockjob.c
@@ -37,26 +37,6 @@
 #include "qemu/timer.h"
 #include "qapi-event.h"

-/* Right now, this mutex is only needed to synchronize accesses to job->busy
- * and job->sleep_timer, such as concurrent calls to block_job_do_yield and
- * block_job_enter. */
-static QemuMutex block_job_mutex;
-
-static void block_job_lock(void)
-{
-    qemu_mutex_lock(&block_job_mutex);
-}
-
-static void block_job_unlock(void)
-{
-    qemu_mutex_unlock(&block_job_mutex);
-}
-
-static void __attribute__((__constructor__)) block_job_init(void)
-{
-    qemu_mutex_init(&block_job_mutex);
-}
-
 static void block_job_event_cancelled(BlockJob *job);
 static void block_job_event_completed(BlockJob *job, const char *msg);

@@ -172,7 +152,6 @@ void block_job_unref(BlockJob *job)
 {
    if (--job->refcnt == 0) {
        BlockDriverState *bs = blk_bs(job->blk);
-        QLIST_REMOVE(job, job_list);
        bs->job = NULL;
        block_job_remove_all_bdrv(job);
        blk_remove_aio_context_notifier(job->blk,
@@ -181,7 +160,7 @@ void block_job_unref(BlockJob *job)
        blk_unref(job->blk);
        error_free(job->blocker);
        g_free(job->id);
-        assert(!timer_pending(&job->sleep_timer));
+        QLIST_REMOVE(job, job_list);
        g_free(job);
    }
 }
@@ -229,7 +208,7 @@ static char *child_job_get_parent_desc(BdrvChild *c)
 {
    BlockJob *job = c->opaque;
    return g_strdup_printf("%s job '%s'",
-                           BlockJobType_str(job->driver->job_type),
+                           BlockJobType_lookup[job->driver->job_type],
                           job->id);
 }

@@ -308,13 +287,6 @@ static void coroutine_fn block_job_co_entry(void *opaque)
    job->driver->start(job);
 }

-static void block_job_sleep_timer_cb(void *opaque)
-{
-    BlockJob *job = opaque;
-
-    block_job_enter(job);
-}
-
 void block_job_start(BlockJob *job)
 {
    assert(job && !block_job_started(job) && job->paused &&
@@ -581,10 +553,10 @@ BlockJobInfo *block_job_query(BlockJob *job, Error **errp)
        return NULL;
    }
    info = g_new0(BlockJobInfo, 1);
-    info->type      = g_strdup(BlockJobType_str(job->driver->job_type));
+    info->type      = g_strdup(BlockJobType_lookup[job->driver->job_type]);
    info->device    = g_strdup(job->id);
    info->len       = job->len;
-    info->busy      = atomic_read(&job->busy);
+    info->busy      = job->busy;
    info->paused    = job->pause_count > 0;
    info->offset    = job->offset;
    info->speed     = job->speed;
@@ -692,12 +664,9 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver,
    job->paused        = true;
    job->pause_count   = 1;
    job->refcnt        = 1;
-    aio_timer_init(qemu_get_aio_context(), &job->sleep_timer,
-                   QEMU_CLOCK_REALTIME, SCALE_NS,
-                   block_job_sleep_timer_cb, job);

    error_setg(&job->blocker, "block device is in use by block job: %s",
-               BlockJobType_str(driver->job_type));
+               BlockJobType_lookup[driver->job_type]);
    block_job_add_bdrv(job, "main node", bs, 0, BLK_PERM_ALL, &error_abort);
    bs->job = job;

@@ -730,7 +699,6 @@ void block_job_pause_all(void)
        AioContext *aio_context = blk_get_aio_context(job->blk);

        aio_context_acquire(aio_context);
-        block_job_ref(job);
        block_job_pause(job);
        aio_context_release(aio_context);
    }
@@ -761,26 +729,6 @@ static bool block_job_should_pause(BlockJob *job)
    return job->pause_count > 0;
 }

-/* Yield, and schedule a timer to reenter the coroutine after @ns nanoseconds.
- * Reentering the job coroutine with block_job_enter() before the timer has
- * expired is allowed and cancels the timer.
- *
- * If @ns is (uint64_t) -1, no timer is scheduled and block_job_enter() must be
- * called explicitly. */
-static void block_job_do_yield(BlockJob *job, uint64_t ns)
-{
-    block_job_lock();
-    if (ns != -1) {
-        timer_mod(&job->sleep_timer, ns);
-    }
-    job->busy = false;
-    block_job_unlock();
-    qemu_coroutine_yield();
-
-    /* Set by block_job_enter before re-entering the coroutine.  */
-    assert(job->busy);
-}
-
 void coroutine_fn block_job_pause_point(BlockJob *job)
 {
    assert(job && block_job_started(job));
@@ -798,7 +746,9 @@ void coroutine_fn block_job_pause_point(BlockJob *job)

    if (block_job_should_pause(job) && !block_job_is_cancelled(job)) {
        job->paused = true;
-        block_job_do_yield(job, -1);
+        job->busy = false;
+        qemu_coroutine_yield(); /* wait for block_job_resume() */
+        job->busy = true;
        job->paused = false;
    }

@@ -809,14 +759,12 @@ void coroutine_fn block_job_pause_point(BlockJob *job)

 void block_job_resume_all(void)
 {
-    BlockJob *job, *next;
-
-    QLIST_FOREACH_SAFE(job, &block_jobs, job_list, next) {
+    BlockJob *job = NULL;
+    while ((job = block_job_next(job))) {
        AioContext *aio_context = blk_get_aio_context(job->blk);

        aio_context_acquire(aio_context);
        block_job_resume(job);
-        block_job_unref(job);
        aio_context_release(aio_context);
    }
 }
@@ -830,17 +778,9 @@ void block_job_enter(BlockJob *job)
        return;
    }

-    block_job_lock();
-    if (job->busy) {
-        block_job_unlock();
-        return;
+    if (!job->busy) {
+        bdrv_coroutine_enter(blk_bs(job->blk), job->co);
    }
-
-    assert(!job->deferred_to_main_loop);
-    timer_del(&job->sleep_timer);
-    job->busy = true;
-    block_job_unlock();
-    aio_co_wake(job->co);
 }

 bool block_job_is_cancelled(BlockJob *job)
@@ -848,7 +788,7 @@ bool block_job_is_cancelled(BlockJob *job)
    return job->cancelled;
 }

-void block_job_sleep_ns(BlockJob *job, int64_t ns)
+void block_job_sleep_ns(BlockJob *job, QEMUClockType type, int64_t ns)
 {
    assert(job->busy);

@@ -857,9 +797,11 @@ void block_job_sleep_ns(BlockJob *job, int64_t ns)
        return;
    }

+    job->busy = false;
    if (!block_job_should_pause(job)) {
-        block_job_do_yield(job, qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + ns);
+        co_aio_sleep_ns(blk_get_aio_context(job->blk), type, ns);
    }
+    job->busy = true;

    block_job_pause_point(job);
 }
@@ -873,9 +815,11 @@ void block_job_yield(BlockJob *job)
        return;
    }

+    job->busy = false;
    if (!block_job_should_pause(job)) {
-        block_job_do_yield(job, -1);
+        qemu_coroutine_yield();
    }
+    job->busy = true;

    block_job_pause_point(job);
 }
--- a/bsd-user/main.c
+++ b/bsd-user/main.c
@@ -902,6 +902,10 @@ int main(int argc, char **argv)
    /* NOTE: we need to init the CPU at this stage to get
       qemu_host_page_size */
    cpu = cpu_init(cpu_model);
+    if (!cpu) {
+        fprintf(stderr, "Unable to find CPU definition\n");
+        exit(1);
+    }
    env = cpu->env_ptr;
 #if defined(TARGET_SPARC) || defined(TARGET_PPC)
    cpu_reset(cpu);
@@ -977,8 +981,7 @@ int main(int argc, char **argv)
    /* Now that we've loaded the binary, GUEST_BASE is fixed.  Delay
       generating the prologue until now so that the prologue can take
       the real value of GUEST_BASE into account.  */
-    tcg_prologue_init(tcg_ctx);
-    tcg_region_init();
+    tcg_prologue_init(&tcg_ctx);

    /* build Task State */
    memset(ts, 0, sizeof(TaskState));
--- a/1
+++ b/1
--- a/chardev/Makefile.objs
+++ b/chardev/Makefile.objs
@@ -20,6 +20,5 @@ chardev-obj-$(CONFIG_WIN32) += char-win-stdio.o
 common-obj-y += msmouse.o wctablet.o testdev.o
 common-obj-$(CONFIG_BRLAPI) += baum.o
 baum.o-cflags := $(SDL_CFLAGS)
-baum.o-libs := $(BRLAPI_LIBS)

 common-obj-$(CONFIG_SPICE) += spice.o
--- a/chardev/baum.c
+++ b/chardev/baum.c
@@ -643,7 +643,6 @@ static void baum_chr_open(Chardev *chr,
        error_setg(errp, "brlapi__openConnection: %s",
                   brlapi_strerror(brlapi_error_location()));
        g_free(handle);
-        baum->brlapi = NULL;
        return;
    }
    baum->deferred_init = 0;
--- a/chardev/char-fd.c
+++ b/chardev/char-fd.c
@@ -84,7 +84,8 @@ static GSource *fd_chr_add_watch(Chardev *chr, GIOCondition cond)
    return qio_channel_create_watch(s->ioc_out, cond);
 }

-static void fd_chr_update_read_handler(Chardev *chr)
+static void fd_chr_update_read_handler(Chardev *chr,
+                                       GMainContext *context)
 {
    FDChardev *s = FD_CHARDEV(chr);

@@ -93,7 +94,7 @@ static void fd_chr_update_read_handler(Chardev *chr)
        chr->gsource = io_add_watch_poll(chr, s->ioc_in,
                                           fd_chr_read_poll,
                                           fd_chr_read, chr,
-                                           chr->gcontext);
+                                           context);
    }
 }

--- a/chardev/char-fe.c
+++ b/chardev/char-fe.c
@@ -253,6 +253,7 @@ void qemu_chr_fe_set_handlers(CharBackend *b,
                              bool set_open)
 {
    Chardev *s;
+    ChardevClass *cc;
    int fe_open;

    s = b->chr;
@@ -260,6 +261,7 @@ void qemu_chr_fe_set_handlers(CharBackend *b,
        return;
    }

+    cc = CHARDEV_GET_CLASS(s);
    if (!opaque && !fd_can_read && !fd_read && !fd_event) {
        fe_open = 0;
        remove_fd_in_watch(s);
@@ -271,8 +273,9 @@ void qemu_chr_fe_set_handlers(CharBackend *b,
    b->chr_event = fd_event;
    b->chr_be_change = be_change;
    b->opaque = opaque;
-
-    qemu_chr_be_update_read_handlers(s, context);
+    if (cc->chr_update_read_handler) {
+        cc->chr_update_read_handler(s, context);
+    }

    if (set_open) {
        qemu_chr_fe_set_open(b, fe_open);
--- a/chardev/char-pty.c
+++ b/chardev/char-pty.c
@@ -112,7 +112,8 @@ static void pty_chr_update_read_handler_locked(Chardev *chr)
    }
 }

-static void pty_chr_update_read_handler(Chardev *chr)
+static void pty_chr_update_read_handler(Chardev *chr,
+                                        GMainContext *context)
 {
    qemu_mutex_lock(&chr->chr_write_lock);
    pty_chr_update_read_handler_locked(chr);
@@ -218,7 +219,7 @@ static void pty_chr_state(Chardev *chr, int connected)
            chr->gsource = io_add_watch_poll(chr, s->ioc,
                                               pty_chr_read_poll,
                                               pty_chr_read,
-                                               chr, chr->gcontext);
+                                               chr, NULL);
        }
    }
 }
--- a/chardev/char-socket.c
+++ b/chardev/char-socket.c
@@ -332,6 +332,10 @@ static void tcp_chr_free_connection(Chardev *chr)
    SocketChardev *s = SOCKET_CHARDEV(chr);
    int i;

+    if (!s->connected) {
+        return;
+    }
+
    if (s->read_msgfds_num) {
        for (i = 0; i < s->read_msgfds_num; i++) {
            close(s->read_msgfds[i]);
@@ -390,25 +394,22 @@ static void update_disconnected_filename(SocketChardev *s)
                                         s->is_listen, s->is_telnet);
 }

-/* NB may be called even if tcp_chr_connect has not been
- * reached, due to TLS or telnet initialization failure,
- * so can *not* assume s->connected == true
- */
 static void tcp_chr_disconnect(Chardev *chr)
 {
    SocketChardev *s = SOCKET_CHARDEV(chr);
-    bool emit_close = s->connected;
+
+    if (!s->connected) {
+        return;
+    }

    tcp_chr_free_connection(chr);

-    if (s->listen_ioc && s->listen_tag == 0) {
+    if (s->listen_ioc) {
        s->listen_tag = qio_channel_add_watch(
            QIO_CHANNEL(s->listen_ioc), G_IO_IN, tcp_chr_accept, chr, NULL);
    }
    update_disconnected_filename(s);
-    if (emit_close) {
-        qemu_chr_be_event(chr, CHR_EVENT_CLOSED);
-    }
+    qemu_chr_be_event(chr, CHR_EVENT_CLOSED);
    if (s->reconnect_time) {
        qemu_chr_socket_restart_timer(chr);
    }
@@ -515,12 +516,13 @@ static void tcp_chr_connect(void *opaque)
        chr->gsource = io_add_watch_poll(chr, s->ioc,
                                           tcp_chr_read_poll,
                                           tcp_chr_read,
-                                           chr, chr->gcontext);
+                                           chr, NULL);
    }
    qemu_chr_be_event(chr, CHR_EVENT_OPENED);
 }

-static void tcp_chr_update_read_handler(Chardev *chr)
+static void tcp_chr_update_read_handler(Chardev *chr,
+                                        GMainContext *context)
 {
    SocketChardev *s = SOCKET_CHARDEV(chr);

@@ -533,7 +535,7 @@ static void tcp_chr_update_read_handler(Chardev *chr)
        chr->gsource = io_add_watch_poll(chr, s->ioc,
                                           tcp_chr_read_poll,
                                           tcp_chr_read, chr,
-                                           chr->gcontext);
+                                           context);
    }
 }

--- a/chardev/char-udp.c
+++ b/chardev/char-udp.c
@@ -100,7 +100,8 @@ static gboolean udp_chr_read(QIOChannel *chan, GIOCondition cond, void *opaque)
    return TRUE;
 }

-static void udp_chr_update_read_handler(Chardev *chr)
+static void udp_chr_update_read_handler(Chardev *chr,
+                                        GMainContext *context)
 {
    UdpChardev *s = UDP_CHARDEV(chr);

@@ -109,7 +110,7 @@ static void udp_chr_update_read_handler(Chardev *chr)
        chr->gsource = io_add_watch_poll(chr, s->ioc,
                                           udp_chr_read_poll,
                                           udp_chr_read, chr,
-                                           chr->gcontext);
+                                           context);
    }
 }

--- a/chardev/char.c
+++ b/chardev/char.c
@@ -180,17 +180,6 @@ void qemu_chr_be_write(Chardev *s, uint8_t *buf, int len)
    }
 }

-void qemu_chr_be_update_read_handlers(Chardev *s,
-                                      GMainContext *context)
-{
-    ChardevClass *cc = CHARDEV_GET_CLASS(s);
-
-    s->gcontext = context;
-    if (cc->chr_update_read_handler) {
-        cc->chr_update_read_handler(s);
-    }
-}
-
 int qemu_chr_add_client(Chardev *s, int fd)
 {
    return CHARDEV_GET_CLASS(s)->chr_add_client ?
@@ -942,7 +931,7 @@ ChardevReturn *qmp_chardev_add(const char *id, ChardevBackend *backend,
    ChardevReturn *ret;
    Chardev *chr;

-    cc = char_get_class(ChardevBackendKind_str(backend->type), errp);
+    cc = char_get_class(ChardevBackendKind_lookup[backend->type], errp);
    if (!cc) {
        return NULL;
    }
@@ -1000,7 +989,7 @@ ChardevReturn *qmp_chardev_change(const char *id, ChardevBackend *backend,
        return NULL;
    }

-    cc = char_get_class(ChardevBackendKind_str(backend->type), errp);
+    cc = char_get_class(ChardevBackendKind_lookup[backend->type], errp);
    if (!cc) {
        return NULL;
    }
--- a/427
+++ b/427
@@ -240,11 +240,6 @@ supported_target() {
    return 1
 }

-
-ld_has() {
-    $ld --help 2>/dev/null | grep ".$1" >/dev/null 2>&1
-}
-
 # default parameters
 source_path=$(dirname "$0")
 cpu=""
@@ -265,16 +260,6 @@ libs_qga=""
 debug_info="yes"
 stack_protector=""

-if test -e "$source_path/.git"
-then
-    git_update=yes
-    git_submodules="ui/keycodemapdb"
-else
-    git_update=no
-    git_submodules=""
-fi
-git="git"
-
 # Don't accept a target_list environment variable.
 unset target_list

@@ -297,17 +282,16 @@ curses=""
 docs=""
 fdt=""
 netmap="no"
+pixman=""
 sdl=""
 sdlabi=""
 virtfs=""
-mpath=""
 vnc="yes"
 sparse="no"
 vde=""
 vnc_sasl=""
 vnc_jpeg=""
 vnc_png=""
-xkbcommon=""
 xen=""
 xen_ctrl_version=""
 xen_pv_domain_build="no"
@@ -343,7 +327,6 @@ modules="no"
 prefix="/usr/local"
 mandir="\${prefix}/share/man"
 datadir="\${prefix}/share"
-firmwarepath="\${prefix}/share/qemu-firmware"
 qemu_docdir="\${prefix}/share/doc/qemu"
 bindir="\${prefix}/bin"
 libdir="\${prefix}/lib"
@@ -362,6 +345,7 @@ cocoa="no"
 softmmu="yes"
 linux_user="no"
 bsd_user="no"
+aix="no"
 blobs="yes"
 pkgversion=""
 pie=""
@@ -378,7 +362,6 @@ opengl_dmabuf="no"
 cpuid_h="no"
 avx2_opt="no"
 zlib="yes"
-capstone=""
 lzo=""
 snappy=""
 bzip2=""
@@ -482,7 +465,6 @@ ccas="${CCAS-$cc}"
 cpp="${CPP-$cc -E}"
 objcopy="${OBJCOPY-${cross_prefix}objcopy}"
 ld="${LD-${cross_prefix}ld}"
-ranlib="${RANLIB-${cross_prefix}ranlib}"
 nm="${NM-${cross_prefix}nm}"
 strip="${STRIP-${cross_prefix}strip}"
 windres="${WINDRES-${cross_prefix}windres}"
@@ -567,6 +549,8 @@ elif check_define __NetBSD__; then
  targetos='NetBSD'
 elif check_define __APPLE__; then
  targetos='Darwin'
+elif check_define _AIX; then
+  targetos='AIX'
 else
  # This is a fatal error, but don't report it yet, because we
  # might be going to just print the --help text, or it might
@@ -645,6 +629,9 @@ case "$cpu" in
    cpu="$cpu"
    supported_cpu="yes"
  ;;
+  ia64)
+    cpu="$cpu"
+  ;;
  i386|i486|i586|i686|i86pc|BePC)
    cpu="i386"
    supported_cpu="yes"
@@ -760,6 +747,7 @@ SunOS)
  solaris="yes"
  make="${MAKE-gmake}"
  install="${INSTALL-ginstall}"
+  ld="gld"
  smbd="${SMBD-/usr/sfw/sbin/smbd}"
  if test -f /usr/include/sys/soundcard.h ; then
    audio_drv_list="oss"
@@ -774,6 +762,10 @@ SunOS)
  LIBS="$solarisnetlibs $LIBS"
  libs_qga="$solarisnetlibs $libs_qga"
 ;;
+AIX)
+  aix="yes"
+  make="${MAKE-gmake}"
+;;
 Haiku)
  haiku="yes"
  QEMU_CFLAGS="-DB_USE_POSITIVE_POSIX_ERRORS $QEMU_CFLAGS"
@@ -832,7 +824,7 @@ if test "$mingw32" = "yes" ; then
  sysconfdir="\${prefix}"
  local_statedir=
  confsuffix=""
-  libs_qga="-lws2_32 -lwinmm -lpowrprof -lwtsapi32 -lwininet -liphlpapi -lnetapi32 $libs_qga"
+  libs_qga="-lws2_32 -lwinmm -lpowrprof -lwtsapi32 -liphlpapi -lnetapi32 $libs_qga"
 fi

 werror=""
@@ -928,10 +920,6 @@ for opt do
  ;;
  --localstatedir=*) local_statedir="$optarg"
  ;;
-  --firmwarepath=*) firmwarepath="$optarg"
-  ;;
-  --host=*|--build=*|\
-  --disable-dependency-tracking|\
  --sbindir=*|--sharedstatedir=*|\
  --oldincludedir=*|--datarootdir=*|--infodir=*|--localedir=*|\
  --htmldir=*|--dvidir=*|--pdfdir=*|--psdir=*)
@@ -940,6 +928,12 @@ for opt do
    # configure to be used by RPM and similar macros that set
    # lots of directory switches by default.
  ;;
+  --with-system-pixman) pixman="system"
+  ;;
+  --without-system-pixman) pixman="internal"
+  ;;
+  --without-pixman) pixman="none"
+  ;;
  --disable-sdl) sdl="no"
  ;;
  --enable-sdl) sdl="yes"
@@ -954,10 +948,6 @@ for opt do
  ;;
  --enable-virtfs) virtfs="yes"
  ;;
-  --disable-mpath) mpath="no"
-  ;;
-  --enable-mpath) mpath="yes"
-  ;;
  --disable-vnc) vnc="no"
  ;;
  --enable-vnc) vnc="yes"
@@ -1301,20 +1291,6 @@ for opt do
          error_exit "vhost-user isn't available on win32"
      fi
  ;;
-  --disable-capstone) capstone="no"
-  ;;
-  --enable-capstone) capstone="yes"
-  ;;
-  --enable-capstone=git) capstone="git"
-  ;;
-  --enable-capstone=system) capstone="system"
-  ;;
-  --with-git=*) git="$optarg"
-  ;;
-  --enable-git-update) git_update=yes
-  ;;
-  --disable-git-update) git_update=no
-  ;;
  *)
      echo "ERROR: unknown option $opt"
      echo "Try '$0 --help' for more information"
@@ -1447,7 +1423,6 @@ Advanced options (experts only):
  --libdir=PATH            install libraries in PATH
  --sysconfdir=PATH        install config in PATH$confsuffix
  --localstatedir=PATH     install local state in PATH (set at runtime on win32)
-  --firmwarepath=PATH      search PATH for firmware files
  --with-confsuffix=SUFFIX suffix for QEMU data inside datadir/libdir/sysconfdir [$confsuffix]
  --enable-debug           enable common debug build options
  --disable-strip          disable stripping binaries
@@ -1516,7 +1491,6 @@ disabled with --disable-FEATURE, default is enabled if available:
  vnc-png         PNG compression for VNC server
  cocoa           Cocoa UI (Mac OS X only)
  virtfs          VirtFS
-  mpath           Multipath persistent reservation passthrough
  xen             xen backend driver support
  xen-pci-passthrough
  brlapi          BrlAPI (Braile)
@@ -1562,7 +1536,6 @@ disabled with --disable-FEATURE, default is enabled if available:
  vxhs            Veritas HyperScale vDisk backend support
  crypto-afalg    Linux AF_ALG crypto backend driver
  vhost-user      vhost-user support
-  capstone        capstone disassembler support

 NOTE: The object files are built at the place where configure is launched
 EOF
@@ -1681,19 +1654,6 @@ EOF
  fi
 fi

-# Disable -Wmissing-braces on older compilers that warn even for
-# the "universal" C zero initializer {0}.
-cat > $TMPC << EOF
-struct {
-  int a[2];
-} x = {0};
-EOF
-if compile_object "-Werror" "" ; then
-  :
-else
-  QEMU_CFLAGS="$QEMU_CFLAGS -Wno-missing-braces"
-fi
-
 # Workaround for http://gcc.gnu.org/PR55489.  Happens with -fPIE/-fPIC and
 # large functions that use global variables.  The bug is in all releases of
 # GCC, but it became particularly acute in 4.6.x and 4.7.x.  It is fixed in
@@ -2077,7 +2037,7 @@ if test "$seccomp" != "no" ; then
    arm|aarch64)
        libseccomp_minver="2.2.3"
        ;;
-    ppc|ppc64|s390x)
+    ppc|ppc64)
        libseccomp_minver="2.3.0"
        ;;
    *)
@@ -2087,8 +2047,8 @@ if test "$seccomp" != "no" ; then

    if test "$libseccomp_minver" != "" &&
       $pkg_config --atleast-version=$libseccomp_minver libseccomp ; then
-        seccomp_cflags="$($pkg_config --cflags libseccomp)"
-        seccomp_libs="$($pkg_config --libs libseccomp)"
+        libs_softmmu="$libs_softmmu $($pkg_config --libs libseccomp)"
+        QEMU_CFLAGS="$QEMU_CFLAGS $($pkg_config --cflags libseccomp)"
        seccomp="yes"
    else
        if test "$seccomp" = "yes" ; then
@@ -2790,7 +2750,6 @@ if test "$sdl" != "no" ; then
 int main( void ) { return SDL_Init (SDL_INIT_VIDEO); }
 EOF
  sdl_cflags=$($sdlconfig --cflags 2>/dev/null)
-  sdl_cflags="$sdl_cflags -Wno-undef"  # workaround 2.0.8 bug
  if test "$static" = "yes" ; then
    if $pkg_config $sdlname --exists; then
      sdl_libs=$($pkg_config $sdlname --static --libs 2>/dev/null)
@@ -2841,6 +2800,7 @@ EOF
    sdl_cflags="$sdl_cflags $x11_cflags"
    sdl_libs="$sdl_libs $x11_libs"
  fi
+  libs_softmmu="$sdl_libs $libs_softmmu"
 fi

 ##########################################
@@ -2853,6 +2813,7 @@ EOF
  rdma_libs="-lrdmacm -libverbs"
  if compile_prog "" "$rdma_libs" ; then
    rdma="yes"
+    libs_softmmu="$libs_softmmu $rdma_libs"
  else
    if test "$rdma" = "yes" ; then
        error_exit \
@@ -2944,21 +2905,6 @@ EOF
  fi
 fi

-##########################################
-# xkbcommon probe
-if test "$xkbcommon" != "no" ; then
-  if $pkg_config xkbcommon --exists; then
-    xkbcommon_cflags=$($pkg_config xkbcommon --cflags)
-    xkbcommon_libs=$($pkg_config xkbcommon --libs)
-    xkbcommon=yes
-  else
-    if test "$xkbcommon" = "yes" ; then
-      feature_not_found "xkbcommon" "Install libxkbcommon-devel"
-    fi
-    xkbcommon=no
-  fi
-fi
-
 ##########################################
 # fnmatch() probe, used for ACL routines
 fnmatch="no"
@@ -3012,6 +2958,8 @@ int main(void)
 EOF
  if compile_prog "" "$vde_libs" ; then
    vde=yes
+    libs_softmmu="$vde_libs $libs_softmmu"
+    libs_tools="$vde_libs $libs_tools"
  else
    if test "$vde" = "yes" ; then
      feature_not_found "vde" "Install vde (Virtual Distributed Ethernet) devel"
@@ -3099,13 +3047,13 @@ for drv in $audio_drv_list; do
    alsa)
    audio_drv_probe $drv alsa/asoundlib.h -lasound \
        "return snd_pcm_close((snd_pcm_t *)0);"
-    alsa_libs="-lasound"
+    libs_softmmu="-lasound $libs_softmmu"
    ;;

    pa)
    audio_drv_probe $drv pulse/pulseaudio.h "-lpulse" \
        "pa_context_set_source_output_volume(NULL, 0, NULL, NULL, NULL); return 0;"
-    pulse_libs="-lpulse"
+    libs_softmmu="-lpulse $libs_softmmu"
    audio_pt_int="yes"
    ;;

@@ -3116,16 +3064,16 @@ for drv in $audio_drv_list; do
    ;;

    coreaudio)
-      coreaudio_libs="-framework CoreAudio"
+      libs_softmmu="-framework CoreAudio $libs_softmmu"
    ;;

    dsound)
-      dsound_libs="-lole32 -ldxguid"
+      libs_softmmu="-lole32 -ldxguid $libs_softmmu"
      audio_win_int="yes"
    ;;

    oss)
-      oss_libs="$oss_lib"
+      libs_softmmu="$oss_lib $libs_softmmu"
    ;;

    wav)
@@ -3153,6 +3101,7 @@ int main( void ) { return brlapi__openConnection (NULL, NULL, NULL); }
 EOF
  if compile_prog "" "$brlapi_libs" ; then
    brlapi=yes
+    libs_softmmu="$brlapi_libs $libs_softmmu"
  else
    if test "$brlapi" = "yes" ; then
      feature_not_found "brlapi" "Install brlapi devel"
@@ -3351,47 +3300,39 @@ fi
 ##########################################
 # pixman support probe

-if test "$want_tools" = "no" -a "$softmmu" = "no"; then
+if test "$pixman" = ""; then
+  if test "$want_tools" = "no" -a "$softmmu" = "no"; then
+    pixman="none"
+  elif $pkg_config --atleast-version=0.21.8 pixman-1 > /dev/null 2>&1; then
+    pixman="system"
+  else
+    pixman="internal"
+  fi
+fi
+if test "$pixman" = "none"; then
+  if test "$want_tools" != "no" -o "$softmmu" != "no"; then
+    error_exit "pixman disabled but system emulation or tools build" \
+        "enabled.  You can turn off pixman only if you also" \
+        "disable all system emulation targets and the tools" \
+        "build with '--disable-tools --disable-system'."
+  fi
  pixman_cflags=
  pixman_libs=
-elif $pkg_config --atleast-version=0.21.8 pixman-1 > /dev/null 2>&1; then
+elif test "$pixman" = "system"; then
+  # pixman version has been checked above
  pixman_cflags=$($pkg_config --cflags pixman-1)
  pixman_libs=$($pkg_config --libs pixman-1)
 else
-  error_exit "pixman >= 0.21.8 not present." \
-      "Please install the pixman devel package."
-fi
-
-##########################################
-# libmpathpersist probe
-
-if test "$mpath" != "no" ; then
-  cat > $TMPC <<EOF
-#include <libudev.h>
-#include <mpath_persist.h>
-unsigned mpath_mx_alloc_len = 1024;
-int logsink;
-static struct config *multipath_conf;
-extern struct udev *udev;
-extern struct config *get_multipath_config(void);
-extern void put_multipath_config(struct config *conf);
-struct udev *udev;
-struct config *get_multipath_config(void) { return multipath_conf; }
-void put_multipath_config(struct config *conf) { }
-
-int main(void) {
-    udev = udev_new();
-    multipath_conf = mpath_lib_init();
-    return 0;
-}
-EOF
-  if compile_prog "" "-ludev -lmultipath -lmpathpersist" ; then
-    mpathpersist=yes
-  else
-    mpathpersist=no
+  if test ! -d ${source_path}/pixman/pixman; then
+    error_exit "pixman >= 0.21.8 not present. Your options:" \
+        "  (1) Preferred: Install the pixman devel package (any recent" \
+        "      distro should have packages as Xorg needs pixman too)." \
+        "  (2) Fetch the pixman submodule, using:" \
+        "      git submodule update --init pixman"
  fi
-else
-  mpathpersist=no
+  mkdir -p pixman/pixman
+  pixman_cflags="-I\$(SRC_PATH)/pixman/pixman -I\$(BUILD_DIR)/pixman/pixman"
+  pixman_libs="-L\$(BUILD_DIR)/pixman/pixman/.libs -lpixman-1"
 fi

 ##########################################
@@ -3562,12 +3503,6 @@ else
  tpm_passthrough=no
 fi

-# TPM emulator is for all posix systems
-if test "$mingw32" != "yes"; then
-  tpm_emulator=$tpm
-else
-  tpm_emulator=no
-fi
 ##########################################
 # attr probe

@@ -3657,30 +3592,27 @@ EOF
  if compile_prog "" "$fdt_libs" ; then
    # system DTC is good - use it
    fdt=yes
+  elif test -d ${source_path}/dtc/libfdt ; then
+    # have submodule DTC - use it
+    fdt=yes
+    dtc_internal="yes"
+    mkdir -p dtc
+    if [ "$pwd_is_source_path" != "y" ] ; then
+       symlink "$source_path/dtc/Makefile" "dtc/Makefile"
+       symlink "$source_path/dtc/scripts" "dtc/scripts"
+    fi
+    fdt_cflags="-I\$(SRC_PATH)/dtc/libfdt"
+    fdt_libs="-L\$(BUILD_DIR)/dtc/libfdt $fdt_libs"
+  elif test "$fdt" = "yes" ; then
+    # have neither and want - prompt for system/submodule install
+    error_exit "DTC (libfdt) version >= 1.4.2 not present. Your options:" \
+        "  (1) Preferred: Install the DTC (libfdt) devel package" \
+        "  (2) Fetch the DTC submodule, using:" \
+        "      git submodule update --init dtc"
  else
-      # have GIT checkout, so activate dtc submodule
-      if test -e "${source_path}/.git" ; then
-          git_submodules="${git_submodules} dtc"
-      fi
-      if test -d "${source_path}/dtc/libfdt" || test -e "${source_path}/.git" ; then
-          fdt=yes
-          dtc_internal="yes"
-          mkdir -p dtc
-          if [ "$pwd_is_source_path" != "y" ] ; then
-              symlink "$source_path/dtc/Makefile" "dtc/Makefile"
-              symlink "$source_path/dtc/scripts" "dtc/scripts"
-          fi
-          fdt_cflags="-I\$(SRC_PATH)/dtc/libfdt"
-          fdt_libs="-L\$(BUILD_DIR)/dtc/libfdt $fdt_libs"
-      elif test "$fdt" = "yes" ; then
-          # Not a git build & no libfdt found, prompt for system install
-          error_exit "DTC (libfdt) version >= 1.4.2 not present." \
-                     "Please install the DTC (libfdt) devel package"
-      else
-          # don't have and don't want
-          fdt_libs=
-          fdt=no
-      fi
+    # don't have and don't want
+    fdt_libs=
+    fdt=no
  fi
 fi

@@ -3923,7 +3855,7 @@ fi
 # check if memfd is supported
 memfd=no
 cat > $TMPC << EOF
-#include <sys/mman.h>
+#include <sys/memfd.h>

 int main(void)
 {
@@ -4277,7 +4209,7 @@ elif compile_prog "" "$pthread_lib -lrt" ; then
 fi

 if test "$darwin" != "yes" -a "$mingw32" != "yes" -a "$solaris" != yes -a \
-        "$haiku" != "yes" ; then
+        "$aix" != "yes" -a "$haiku" != "yes" ; then
    libs_softmmu="-lutil $libs_softmmu"
 fi

@@ -4308,10 +4240,13 @@ EOF
 fi

 # check for smartcard support
+smartcard_cflags=""
 if test "$smartcard" != "no"; then
    if $pkg_config libcacard; then
        libcacard_cflags=$($pkg_config --cflags libcacard)
        libcacard_libs=$($pkg_config --libs libcacard)
+        QEMU_CFLAGS="$QEMU_CFLAGS $libcacard_cflags"
+        libs_softmmu="$libs_softmmu $libcacard_libs"
        smartcard="yes"
    else
        if test "$smartcard" = "yes"; then
@@ -4327,6 +4262,8 @@ if test "$libusb" != "no" ; then
        libusb="yes"
        libusb_cflags=$($pkg_config --cflags libusb-1.0)
        libusb_libs=$($pkg_config --libs libusb-1.0)
+        QEMU_CFLAGS="$QEMU_CFLAGS $libusb_cflags"
+        libs_softmmu="$libs_softmmu $libusb_libs"
    else
        if test "$libusb" = "yes"; then
            feature_not_found "libusb" "Install libusb devel >= 1.0.13"
@@ -4341,6 +4278,8 @@ if test "$usb_redir" != "no" ; then
        usb_redir="yes"
        usb_redir_cflags=$($pkg_config --cflags libusbredirparser-0.5)
        usb_redir_libs=$($pkg_config --libs libusbredirparser-0.5)
+        QEMU_CFLAGS="$QEMU_CFLAGS $usb_redir_cflags"
+        libs_softmmu="$libs_softmmu $usb_redir_libs"
    else
        if test "$usb_redir" = "yes"; then
            feature_not_found "usb-redir" "Install usbredir devel"
@@ -4446,58 +4385,6 @@ EOF
  fi
 fi

-##########################################
-# capstone
-
-case "$capstone" in
-  "" | yes)
-    if $pkg_config capstone; then
-      capstone=system
-    elif test -e "${source_path}/.git" ; then
-      capstone=git
-    elif test -e "${source_path}/capstone/Makefile" ; then
-      capstone=internal
-    elif test -z "$capstone" ; then
-      capstone=no
-    else
-      feature_not_found "capstone" "Install capstone devel or git submodule"
-    fi
-    ;;
-
-  system)
-    if ! $pkg_config capstone; then
-      feature_not_found "capstone" "Install capstone devel"
-    fi
-    ;;
-esac
-
-case "$capstone" in
-  git | internal)
-    if test "$capstone" = git; then
-      git_submodules="${git_submodules} capstone"
-    fi
-    mkdir -p capstone
-    QEMU_CFLAGS="$QEMU_CFLAGS -I\$(SRC_PATH)/capstone/include"
-    if test "$mingw32" = "yes"; then
-      LIBCAPSTONE=capstone.lib
-    else
-      LIBCAPSTONE=libcapstone.a
-    fi
-    LIBS="-L\$(BUILD_DIR)/capstone -lcapstone $LIBS"
-    ;;
-
-  system)
-    QEMU_CFLAGS="$QEMU_CFLAGS $($pkg_config --cflags capstone)"
-    LIBS="$($pkg_config --libs capstone) $LIBS"
-    ;;
-
-  no)
-    ;;
-  *)
-    error_exit "Unknown state for capstone: $capstone"
-    ;;
-esac
-
 ##########################################
 # check if we have fdatasync

@@ -4555,18 +4442,6 @@ if compile_prog "" "" ; then
    posix_syslog=yes
 fi

-##########################################
-# check if we have sem_timedwait
-
-sem_timedwait=no
-cat > $TMPC << EOF
-#include <semaphore.h>
-int main(void) { return sem_timedwait(0, 0); }
-EOF
-if compile_prog "" "" ; then
-    sem_timedwait=yes
-fi
-
 ##########################################
 # check if trace backend exists

@@ -5168,7 +5043,7 @@ fi
 # Use ASLR, no-SEH and DEP if available
 if test "$mingw32" = "yes" ; then
    for flag in --dynamicbase --no-seh --nxcompat; do
-        if ld_has $flag ; then
+        if $ld --help 2>/dev/null | grep ".$flag" >/dev/null 2>/dev/null ; then
            LDFLAGS="-Wl,$flag $LDFLAGS"
        fi
    done
@@ -5195,37 +5070,16 @@ if test "$want_tools" = "yes" ; then
  fi
 fi
 if test "$softmmu" = yes ; then
-  if test "$linux" = yes; then
-    if test "$virtfs" != no && test "$cap" = yes && test "$attr" = yes ; then
+  if test "$virtfs" != no ; then
+    if test "$cap" = yes && test "$linux" = yes && test "$attr" = yes ; then
      virtfs=yes
      tools="$tools fsdev/virtfs-proxy-helper\$(EXESUF)"
    else
      if test "$virtfs" = yes; then
-        error_exit "VirtFS requires libcap devel and libattr devel"
+        error_exit "VirtFS is supported only on Linux and requires libcap devel and libattr devel"
      fi
      virtfs=no
    fi
-    if test "$mpath" != no && test "$mpathpersist" = yes ; then
-      mpath=yes
-    else
-      if test "$mpath" = yes; then
-        error_exit "Multipath requires libmpathpersist devel"
-      fi
-      mpath=no
-    fi
-    tools="$tools scsi/qemu-pr-helper\$(EXESUF)"
-  else
-    if test "$virtfs" = yes; then
-      error_exit "VirtFS is supported only on Linux"
-    fi
-    virtfs=no
-    if test "$mpath" = yes; then
-      error_exit "Multipath is supported only on Linux"
-    fi
-    mpath=no
-  fi
-  if test "$xkbcommon" = "yes"; then
-    tools="qemu-keymap\$(EXESUF) $tools"
  fi
 fi

@@ -5305,9 +5159,9 @@ if test \( "$cpu" = "i386" -o "$cpu" = "x86_64" \) -a \
        "$targetos" != "Darwin" -a "$targetos" != "SunOS" -a \
        "$softmmu" = yes ; then
    # Different host OS linkers have different ideas about the name of the ELF
-    # emulation. Linux and OpenBSD/amd64 use 'elf_i386'; FreeBSD uses the _fbsd
-    # variant; OpenBSD/i386 uses the _obsd variant; and Windows uses i386pe.
-    for emu in elf_i386 elf_i386_fbsd elf_i386_obsd i386pe; do
+    # emulation. Linux and OpenBSD use 'elf_i386'; FreeBSD uses the _fbsd
+    # variant; and Windows uses i386pe.
+    for emu in elf_i386 elf_i386_fbsd i386pe; do
        if "$ld" -verbose 2>&1 | grep -q "^[[:space:]]*$emu[[:space:]]*$"; then
            ld_i386_emulation="$emu"
            roms="optionrom"
@@ -5410,7 +5264,6 @@ libs_softmmu="$pixman_libs $libs_softmmu"

 echo "Install prefix    $prefix"
 echo "BIOS directory    $(eval echo $qemu_datadir)"
-echo "firmware path     $(eval echo $firmwarepath)"
 echo "binary directory  $(eval echo $bindir)"
 echo "library directory $(eval echo $libdir)"
 echo "module directory  $(eval echo $qemu_moddir)"
@@ -5426,8 +5279,6 @@ echo "local state directory   queried at runtime"
 echo "Windows SDK       $win_sdk"
 fi
 echo "Source path       $source_path"
-echo "GIT binary        $git"
-echo "GIT submodules    $git_submodules"
 echo "C compiler        $cc"
 echo "Host C compiler   $host_cc"
 echo "C++ compiler      $cxx"
@@ -5454,6 +5305,7 @@ echo "static build      $static"
 if test "$darwin" = "yes" ; then
    echo "Cocoa support     $cocoa"
 fi
+echo "pixman            $pixman"
 echo "SDL support       $sdl $(echo_version $sdl $sdlversion)"
 echo "GTK support       $gtk $(echo_version $gtk $gtk_version)"
 echo "GTK GL support    $gtk_gl"
@@ -5474,7 +5326,6 @@ echo "Audio drivers     $audio_drv_list"
 echo "Block whitelist (rw) $block_drv_rw_whitelist"
 echo "Block whitelist (ro) $block_drv_ro_whitelist"
 echo "VirtFS support    $virtfs"
-echo "Multipath support $mpath"
 echo "VNC support       $vnc"
 if test "$vnc" = "yes" ; then
    echo "VNC SASL support  $vnc_sasl"
@@ -5545,7 +5396,6 @@ echo "gcov enabled      $gcov"
 echo "TPM support       $tpm"
 echo "libssh2 support   $libssh2"
 echo "TPM passthrough   $tpm_passthrough"
-echo "TPM emulator      $tpm_emulator"
 echo "QOM debugging     $qom_cast_debug"
 echo "Live block migration $live_block_migration"
 echo "lzo support       $lzo"
@@ -5557,7 +5407,6 @@ echo "jemalloc support  $jemalloc"
 echo "avx2 optimization $avx2_opt"
 echo "replication support $replication"
 echo "VxHS block device $vxhs"
-echo "capstone          $capstone"

 if test "$sdl_too_old" = "yes"; then
 echo "-> Your SDL version is too old - please upgrade to have SDL support"
@@ -5606,7 +5455,6 @@ echo "mandir=$mandir" >> $config_host_mak
 echo "sysconfdir=$sysconfdir" >> $config_host_mak
 echo "qemu_confdir=$qemu_confdir" >> $config_host_mak
 echo "qemu_datadir=$qemu_datadir" >> $config_host_mak
-echo "qemu_firmwarepath=$firmwarepath" >> $config_host_mak
 echo "qemu_docdir=$qemu_docdir" >> $config_host_mak
 echo "qemu_moddir=$qemu_moddir" >> $config_host_mak
 if test "$mingw32" = "no" ; then
@@ -5618,9 +5466,6 @@ echo "extra_cxxflags=$EXTRA_CXXFLAGS" >> $config_host_mak
 echo "extra_ldflags=$EXTRA_LDFLAGS" >> $config_host_mak
 echo "qemu_localedir=$qemu_localedir" >> $config_host_mak
 echo "libs_softmmu=$libs_softmmu" >> $config_host_mak
-echo "GIT=$git" >> $config_host_mak
-echo "GIT_SUBMODULES=$git_submodules" >> $config_host_mak
-echo "GIT_UPDATE=$git_update" >> $config_host_mak

 echo "ARCH=$ARCH" >> $config_host_mak

@@ -5673,6 +5518,10 @@ if test "$darwin" = "yes" ; then
  echo "CONFIG_DARWIN=y" >> $config_host_mak
 fi

+if test "$aix" = "yes" ; then
+  echo "CONFIG_AIX=y" >> $config_host_mak
+fi
+
 if test "$solaris" = "yes" ; then
  echo "CONFIG_SOLARIS=y" >> $config_host_mak
 fi
@@ -5691,7 +5540,6 @@ if test "$slirp" = "yes" ; then
 fi
 if test "$vde" = "yes" ; then
  echo "CONFIG_VDE=y" >> $config_host_mak
-  echo "VDE_LIBS=$vde_libs" >> $config_host_mak
 fi
 if test "$netmap" = "yes" ; then
  echo "CONFIG_NETMAP=y" >> $config_host_mak
@@ -5707,11 +5555,6 @@ for drv in $audio_drv_list; do
    def=CONFIG_$(echo $drv | LC_ALL=C tr '[a-z]' '[A-Z]')
    echo "$def=y" >> $config_host_mak
 done
-echo "ALSA_LIBS=$alsa_libs" >> $config_host_mak
-echo "PULSE_LIBS=$pulse_libs" >> $config_host_mak
-echo "COREAUDIO_LIBS=$coreaudio_libs" >> $config_host_mak
-echo "DSOUND_LIBS=$dsound_libs" >> $config_host_mak
-echo "OSS_LIBS=$oss_libs" >> $config_host_mak
 if test "$audio_pt_int" = "yes" ; then
  echo "CONFIG_AUDIO_PT_INT=y" >> $config_host_mak
 fi
@@ -5732,10 +5575,6 @@ fi
 if test "$vnc_png" = "yes" ; then
  echo "CONFIG_VNC_PNG=y" >> $config_host_mak
 fi
-if test "$xkbcommon" = "yes" ; then
-  echo "XKBCOMMON_CFLAGS=$xkbcommon_cflags" >> $config_host_mak
-  echo "XKBCOMMON_LIBS=$xkbcommon_libs" >> $config_host_mak
-fi
 if test "$fnmatch" = "yes" ; then
  echo "CONFIG_FNMATCH=y" >> $config_host_mak
 fi
@@ -5760,7 +5599,6 @@ if test "$sdl" = "yes" ; then
  echo "CONFIG_SDL=y" >> $config_host_mak
  echo "CONFIG_SDLABI=$sdlabi" >> $config_host_mak
  echo "SDL_CFLAGS=$sdl_cflags" >> $config_host_mak
-  echo "SDL_LIBS=$sdl_libs" >> $config_host_mak
 fi
 if test "$cocoa" = "yes" ; then
  echo "CONFIG_COCOA=y" >> $config_host_mak
@@ -5837,9 +5675,6 @@ fi
 if test "$inotify1" = "yes" ; then
  echo "CONFIG_INOTIFY1=y" >> $config_host_mak
 fi
-if test "$sem_timedwait" = "yes" ; then
-  echo "CONFIG_SEM_TIMEDWAIT=y" >> $config_host_mak
-fi
 if test "$byteswap_h" = "yes" ; then
  echo "CONFIG_BYTESWAP_H=y" >> $config_host_mak
 fi
@@ -5853,7 +5688,6 @@ if test "$curl" = "yes" ; then
 fi
 if test "$brlapi" = "yes" ; then
  echo "CONFIG_BRLAPI=y" >> $config_host_mak
-  echo "BRLAPI_LIBS=$brlapi_libs" >> $config_host_mak
 fi
 if test "$bluez" = "yes" ; then
  echo "CONFIG_BLUEZ=y" >> $config_host_mak
@@ -5939,9 +5773,6 @@ fi
 if test "$virtfs" = "yes" ; then
  echo "CONFIG_VIRTFS=y" >> $config_host_mak
 fi
-if test "$mpath" = "yes" ; then
-  echo "CONFIG_MPATH=y" >> $config_host_mak
-fi
 if test "$vhost_scsi" = "yes" ; then
  echo "CONFIG_VHOST_SCSI=y" >> $config_host_mak
 fi
@@ -5991,20 +5822,14 @@ fi

 if test "$smartcard" = "yes" ; then
  echo "CONFIG_SMARTCARD=y" >> $config_host_mak
-  echo "SMARTCARD_CFLAGS=$libcacard_cflags" >> $config_host_mak
-  echo "SMARTCARD_LIBS=$libcacard_libs" >> $config_host_mak
 fi

 if test "$libusb" = "yes" ; then
  echo "CONFIG_USB_LIBUSB=y" >> $config_host_mak
-  echo "LIBUSB_CFLAGS=$libusb_cflags" >> $config_host_mak
-  echo "LIBUSB_LIBS=$libusb_libs" >> $config_host_mak
 fi

 if test "$usb_redir" = "yes" ; then
  echo "CONFIG_USB_REDIR=y" >> $config_host_mak
-  echo "USB_REDIR_CFLAGS=$usb_redir_cflags" >> $config_host_mak
-  echo "USB_REDIR_LIBS=$usb_redir_libs" >> $config_host_mak
 fi

 if test "$opengl" = "yes" ; then
@@ -6045,8 +5870,6 @@ fi

 if test "$seccomp" = "yes"; then
  echo "CONFIG_SECCOMP=y" >> $config_host_mak
-  echo "SECCOMP_CFLAGS=$seccomp_cflags" >> $config_host_mak
-  echo "SECCOMP_LIBS=$seccomp_libs" >> $config_host_mak
 fi

 # XXX: suppress that
@@ -6153,16 +5976,19 @@ if test "$live_block_migration" = "yes" ; then
  echo "CONFIG_LIVE_BLOCK_MIGRATION=y" >> $config_host_mak
 fi

+# USB host support
+if test "$libusb" = "yes"; then
+  echo "HOST_USB=libusb legacy" >> $config_host_mak
+else
+  echo "HOST_USB=stub" >> $config_host_mak
+fi
+
+# TPM passthrough support?
 if test "$tpm" = "yes"; then
  echo 'CONFIG_TPM=$(CONFIG_SOFTMMU)' >> $config_host_mak
-  # TPM passthrough support?
  if test "$tpm_passthrough" = "yes"; then
    echo "CONFIG_TPM_PASSTHROUGH=y" >> $config_host_mak
  fi
-  # TPM emulator support?
-  if test "$tpm_emulator" = "yes"; then
-    echo "CONFIG_TPM_EMULATOR=y" >> $config_host_mak
-  fi
 fi

 echo "TRACE_BACKENDS=$trace_backends" >> $config_host_mak
@@ -6204,7 +6030,6 @@ echo "CONFIG_TRACE_FILE=$trace_file" >> $config_host_mak

 if test "$rdma" = "yes" ; then
  echo "CONFIG_RDMA=y" >> $config_host_mak
-  echo "RDMA_LIBS=$rdma_libs" >> $config_host_mak
 fi

 if test "$have_rtnetlink" = "yes" ; then
@@ -6234,9 +6059,6 @@ fi
 if test "$ivshmem" = "yes" ; then
  echo "CONFIG_IVSHMEM=y" >> $config_host_mak
 fi
-if test "$capstone" != "no" ; then
-  echo "CONFIG_CAPSTONE=y" >> $config_host_mak
-fi

 # Hold two types of flag:
 #   CONFIG_THREAD_SETNAME_BYTHREAD  - we've got a way of setting the name on
@@ -6292,7 +6114,6 @@ echo "CCAS=$ccas" >> $config_host_mak
 echo "CPP=$cpp" >> $config_host_mak
 echo "OBJCOPY=$objcopy" >> $config_host_mak
 echo "LD=$ld" >> $config_host_mak
-echo "RANLIB=$ranlib" >> $config_host_mak
 echo "NM=$nm" >> $config_host_mak
 echo "WINDRES=$windres" >> $config_host_mak
 echo "CFLAGS=$CFLAGS" >> $config_host_mak
@@ -6633,6 +6454,9 @@ for i in $ARCH $TARGET_BASE_ARCH ; do
  i386|x86_64|x32)
    disas_config "I386"
  ;;
+  ia64*)
+    disas_config "IA64"
+  ;;
  lm32)
    disas_config "LM32"
  ;;
@@ -6698,34 +6522,18 @@ if test "$target_linux_user" = "yes" -o "$target_bsd_user" = "yes" ; then
  ldflags="$ldflags $textseg_ldflags"
 fi

-# Newer kernels on s390 check for an S390_PGSTE program header and
-# enable the pgste page table extensions in that case. This makes
-# the vm.allocate_pgste sysctl unnecessary. We enable this program
-# header if
-#  - we build on s390x
-#  - we build the system emulation for s390x (qemu-system-s390x)
-#  - KVM is enabled
-#  - the linker supports --s390-pgste
-if test "$TARGET_ARCH" = "s390x" -a "$target_softmmu" = "yes"  -a "$ARCH" = "s390x" -a "$kvm" = "yes"; then
-    if ld_has --s390-pgste ; then
-        ldflags="-Wl,--s390-pgste $ldflags"
-    fi
-fi
-
 echo "LDFLAGS+=$ldflags" >> $config_target_mak
 echo "QEMU_CFLAGS+=$cflags" >> $config_target_mak

 done # for target in $targets

+if [ "$pixman" = "internal" ]; then
+  echo "config-host.h: subdir-pixman" >> $config_host_mak
+fi
+
 if [ "$dtc_internal" = "yes" ]; then
  echo "config-host.h: subdir-dtc" >> $config_host_mak
 fi
-if [ "$capstone" = "git" -o "$capstone" = "internal" ]; then
-  echo "config-host.h: subdir-capstone" >> $config_host_mak
-fi
-if test -n "$LIBCAPSTONE"; then
-  echo "LIBCAPSTONE=$LIBCAPSTONE" >> $config_host_mak
-fi

 if test "$numa" = "yes"; then
  echo "CONFIG_NUMA=y" >> $config_host_mak
@@ -6736,8 +6544,8 @@ if test "$ccache_cpp2" = "yes"; then
 fi

 # build tree in object directory in case the source is not in the current directory
-DIRS="tests tests/tcg tests/tcg/cris tests/tcg/lm32 tests/libqos tests/qapi-schema tests/tcg/xtensa tests/qemu-iotests tests/vm"
-DIRS="$DIRS docs docs/interop fsdev scsi"
+DIRS="tests tests/tcg tests/tcg/cris tests/tcg/lm32 tests/libqos tests/qapi-schema tests/tcg/xtensa tests/qemu-iotests"
+DIRS="$DIRS docs docs/interop fsdev"
 DIRS="$DIRS pc-bios/optionrom pc-bios/spapr-rtas pc-bios/s390-ccw"
 DIRS="$DIRS roms/seabios roms/vgabios"
 DIRS="$DIRS qapi-generated"
@@ -6787,7 +6595,6 @@ for rom in seabios vgabios ; do
    echo "OBJCOPY=objcopy" >> $config_mak
    echo "IASL=$iasl" >> $config_mak
    echo "LD=$ld" >> $config_mak
-    echo "RANLIB=$ranlib" >> $config_mak
 done

 # set up tests data directory
--- a/contrib/libvhost-user/Makefile.objs
+++ b/contrib/libvhost-user/Makefile.objs
@@ -1 +1 @@
-libvhost-user-obj-y += libvhost-user.o libvhost-user-glib.o
+libvhost-user-obj-y = libvhost-user.o
--- a/contrib/libvhost-user/libvhost-user-glib.c
+++ b/contrib/libvhost-user/libvhost-user-glib.c
@@ -1,154 +0,0 @@
-/*
- * Vhost User library
- *
- * Copyright (c) 2016 Nutanix Inc. All rights reserved.
- * Copyright (c) 2017 Red Hat, Inc.
- *
- * Authors:
- *  Marc-André Lureau <mlureau@redhat.com>
- *  Felipe Franciosi <felipe@nutanix.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or
- * later.  See the COPYING file in the top-level directory.
- */
-
-#include "qemu/osdep.h"
-
-#include "libvhost-user-glib.h"
-
-/* glib event loop integration for libvhost-user and misc callbacks */
-
-G_STATIC_ASSERT((int)G_IO_IN == (int)VU_WATCH_IN);
-G_STATIC_ASSERT((int)G_IO_OUT == (int)VU_WATCH_OUT);
-G_STATIC_ASSERT((int)G_IO_PRI == (int)VU_WATCH_PRI);
-G_STATIC_ASSERT((int)G_IO_ERR == (int)VU_WATCH_ERR);
-G_STATIC_ASSERT((int)G_IO_HUP == (int)VU_WATCH_HUP);
-
-typedef struct VugSrc {
-    GSource parent;
-    VuDev *dev;
-    GPollFD gfd;
-} VugSrc;
-
-static gboolean
-vug_src_prepare(GSource *gsrc, gint *timeout)
-{
-    g_assert(timeout);
-
-    *timeout = -1;
-    return FALSE;
-}
-
-static gboolean
-vug_src_check(GSource *gsrc)
-{
-    VugSrc *src = (VugSrc *)gsrc;
-
-    g_assert(src);
-
-    return src->gfd.revents & src->gfd.events;
-}
-
-static gboolean
-vug_src_dispatch(GSource *gsrc, GSourceFunc cb, gpointer data)
-{
-    VugSrc *src = (VugSrc *)gsrc;
-
-    g_assert(src);
-
-    ((vu_watch_cb)cb)(src->dev, src->gfd.revents, data);
-
-    return G_SOURCE_CONTINUE;
-}
-
-static GSourceFuncs vug_src_funcs = {
-    vug_src_prepare,
-    vug_src_check,
-    vug_src_dispatch,
-    NULL
-};
-
-static GSource *
-vug_source_new(VuDev *dev, int fd, GIOCondition cond,
-               vu_watch_cb vu_cb, gpointer data)
-{
-    GSource *gsrc;
-    VugSrc *src;
-    guint id;
-
-    g_assert(dev);
-    g_assert(fd >= 0);
-    g_assert(vu_cb);
-
-    gsrc = g_source_new(&vug_src_funcs, sizeof(VugSrc));
-    g_source_set_callback(gsrc, (GSourceFunc)vu_cb, data, NULL);
-    src = (VugSrc *)gsrc;
-    src->dev = dev;
-    src->gfd.fd = fd;
-    src->gfd.events = cond;
-
-    g_source_add_poll(gsrc, &src->gfd);
-    id = g_source_attach(gsrc, NULL);
-    g_assert(id);
-    g_source_unref(gsrc);
-
-    return gsrc;
-}
-
-static void
-set_watch(VuDev *vu_dev, int fd, int vu_evt, vu_watch_cb cb, void *pvt)
-{
-    GSource *src;
-    VugDev *dev;
-
-    g_assert(vu_dev);
-    g_assert(fd >= 0);
-    g_assert(cb);
-
-    dev = container_of(vu_dev, VugDev, parent);
-    src = vug_source_new(vu_dev, fd, vu_evt, cb, pvt);
-    g_hash_table_replace(dev->fdmap, GINT_TO_POINTER(fd), src);
-}
-
-static void
-remove_watch(VuDev *vu_dev, int fd)
-{
-    VugDev *dev;
-
-    g_assert(vu_dev);
-    g_assert(fd >= 0);
-
-    dev = container_of(vu_dev, VugDev, parent);
-    g_hash_table_remove(dev->fdmap, GINT_TO_POINTER(fd));
-}
-
-
-static void vug_watch(VuDev *dev, int condition, void *data)
-{
-    if (!vu_dispatch(dev) != 0) {
-        dev->panic(dev, "Error processing vhost message");
-    }
-}
-
-void
-vug_init(VugDev *dev, int socket,
-         vu_panic_cb panic, const VuDevIface *iface)
-{
-    g_assert(dev);
-    g_assert(iface);
-
-    vu_init(&dev->parent, socket, panic, set_watch, remove_watch, iface);
-    dev->fdmap = g_hash_table_new_full(NULL, NULL, NULL,
-                                       (GDestroyNotify) g_source_destroy);
-
-    dev->src = vug_source_new(&dev->parent, socket, G_IO_IN, vug_watch, NULL);
-}
-
-void
-vug_deinit(VugDev *dev)
-{
-    g_assert(dev);
-
-    g_hash_table_unref(dev->fdmap);
-    g_source_unref(dev->src);
-}
--- a/contrib/libvhost-user/libvhost-user-glib.h
+++ b/contrib/libvhost-user/libvhost-user-glib.h
@@ -1,32 +0,0 @@
-/*
- * Vhost User library
- *
- * Copyright (c) 2016 Nutanix Inc. All rights reserved.
- * Copyright (c) 2017 Red Hat, Inc.
- *
- * Authors:
- *  Marc-André Lureau <mlureau@redhat.com>
- *  Felipe Franciosi <felipe@nutanix.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or
- * later.  See the COPYING file in the top-level directory.
- */
-
-#ifndef LIBVHOST_USER_GLIB_H
-#define LIBVHOST_USER_GLIB_H
-
-#include <glib.h>
-#include "libvhost-user.h"
-
-typedef struct VugDev {
-    VuDev parent;
-
-    GHashTable *fdmap; /* fd -> gsource */
-    GSource *src;
-} VugDev;
-
-void vug_init(VugDev *dev, int socket,
-              vu_panic_cb panic, const VuDevIface *iface);
-void vug_deinit(VugDev *dev);
-
-#endif /* LIBVHOST_USER_GLIB_H */
--- a/contrib/libvhost-user/libvhost-user.c
+++ b/contrib/libvhost-user/libvhost-user.c
@@ -13,35 +13,14 @@
 * later.  See the COPYING file in the top-level directory.
 */

-/* this code avoids GLib dependency */
-#include <stdlib.h>
-#include <stdio.h>
-#include <unistd.h>
-#include <stdarg.h>
-#include <errno.h>
-#include <string.h>
-#include <assert.h>
-#include <inttypes.h>
-#include <sys/types.h>
-#include <sys/socket.h>
+#include <qemu/osdep.h>
 #include <sys/eventfd.h>
-#include <sys/mman.h>
 #include <linux/vhost.h>

-#include "qemu/compiler.h"
 #include "qemu/atomic.h"

 #include "libvhost-user.h"

-/* usually provided by GLib */
-#ifndef MIN
-#define MIN(x, y) ({                            \
-            typeof(x) _min1 = (x);              \
-            typeof(y) _min2 = (y);              \
-            (void) (&_min1 == &_min2);          \
-            _min1 < _min2 ? _min1 : _min2; })
-#endif
-
 #define VHOST_USER_HDR_SIZE offsetof(VhostUserMsg, payload.u64)

 /* The version of the protocol we support */
@@ -56,10 +35,13 @@
    } while (0)

 static const char *
-vu_request_to_string(unsigned int req)
+vu_request_to_string(int req)
 {
 #define REQ(req) [req] = #req
    static const char *vu_request_str[] = {
+        REQ(VHOST_USER_NONE),
+        REQ(VHOST_USER_GET_FEATURES),
+        REQ(VHOST_USER_SET_FEATURES),
        REQ(VHOST_USER_NONE),
        REQ(VHOST_USER_GET_FEATURES),
        REQ(VHOST_USER_SET_FEATURES),
@@ -80,10 +62,7 @@ vu_request_to_string(unsigned int req)
        REQ(VHOST_USER_GET_QUEUE_NUM),
        REQ(VHOST_USER_SET_VRING_ENABLE),
        REQ(VHOST_USER_SEND_RARP),
-        REQ(VHOST_USER_NET_SET_MTU),
-        REQ(VHOST_USER_SET_SLAVE_REQ_FD),
-        REQ(VHOST_USER_IOTLB_MSG),
-        REQ(VHOST_USER_SET_VRING_ENDIAN),
+        REQ(VHOST_USER_INPUT_GET_CONFIG),
        REQ(VHOST_USER_MAX),
    };
 #undef REQ
@@ -102,9 +81,7 @@ vu_panic(VuDev *dev, const char *msg, ...)
    va_list ap;

    va_start(ap, msg);
-    if (vasprintf(&buf, msg, ap) < 0) {
-        buf = NULL;
-    }
+    buf = g_strdup_vprintf(msg, ap);
    va_end(ap);

    dev->broken = true;
@@ -726,8 +703,7 @@ vu_set_vring_err_exec(VuDev *dev, VhostUserMsg *vmsg)
 static bool
 vu_get_protocol_features_exec(VuDev *dev, VhostUserMsg *vmsg)
 {
-    uint64_t features = 1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD |
-                        1ULL << VHOST_USER_PROTOCOL_F_SLAVE_REQ;
+    uint64_t features = 1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD;

    if (dev->iface->get_protocol_features) {
        features |= dev->iface->get_protocol_features(dev);
@@ -780,23 +756,6 @@ vu_set_vring_enable_exec(VuDev *dev, VhostUserMsg *vmsg)
    return false;
 }

-static bool
-vu_set_slave_req_fd(VuDev *dev, VhostUserMsg *vmsg)
-{
-    if (vmsg->fd_num != 1) {
-        vu_panic(dev, "Invalid slave_req_fd message (%d fd's)", vmsg->fd_num);
-        return false;
-    }
-
-    if (dev->slave_fd != -1) {
-        close(dev->slave_fd);
-    }
-    dev->slave_fd = vmsg->fds[0];
-    DPRINT("Got slave_fd: %d\n", vmsg->fds[0]);
-
-    return false;
-}
-
 static bool
 vu_process_message(VuDev *dev, VhostUserMsg *vmsg)
 {
@@ -860,8 +819,6 @@ vu_process_message(VuDev *dev, VhostUserMsg *vmsg)
        return vu_get_queue_num_exec(dev, vmsg);
    case VHOST_USER_SET_VRING_ENABLE:
        return vu_set_vring_enable_exec(dev, vmsg);
-    case VHOST_USER_SET_SLAVE_REQ_FD:
-        return vu_set_slave_req_fd(dev, vmsg);
    case VHOST_USER_NONE:
        break;
    default:
@@ -896,7 +853,7 @@ vu_dispatch(VuDev *dev)
    success = true;

 end:
-    free(vmsg.data);
+    g_free(vmsg.data);
    return success;
 }

@@ -935,10 +892,6 @@ vu_deinit(VuDev *dev)


    vu_close_log(dev);
-    if (dev->slave_fd != -1) {
-        close(dev->slave_fd);
-        dev->slave_fd = -1;
-    }

    if (dev->sock != -1) {
        close(dev->sock);
@@ -969,7 +922,6 @@ vu_init(VuDev *dev,
    dev->remove_watch = remove_watch;
    dev->iface = iface;
    dev->log_call_fd = -1;
-    dev->slave_fd = -1;
    for (i = 0; i < VHOST_MAX_NR_VIRTQUEUE; i++) {
        dev->vq[i] = (VuVirtq) {
            .call_fd = -1, .kick_fd = -1, .err_fd = -1,
@@ -991,12 +943,6 @@ vu_queue_enabled(VuDev *dev, VuVirtq *vq)
    return vq->enable;
 }

-bool
-vu_queue_started(const VuDev *dev, const VuVirtq *vq)
-{
-    return vq->started;
-}
-
 static inline uint16_t
 vring_avail_flags(VuVirtq *vq)
 {
--- a/contrib/libvhost-user/libvhost-user.h
+++ b/contrib/libvhost-user/libvhost-user.h
@@ -34,10 +34,6 @@ enum VhostUserProtocolFeature {
    VHOST_USER_PROTOCOL_F_MQ = 0,
    VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1,
    VHOST_USER_PROTOCOL_F_RARP = 2,
-    VHOST_USER_PROTOCOL_F_REPLY_ACK = 3,
-    VHOST_USER_PROTOCOL_F_NET_MTU = 4,
-    VHOST_USER_PROTOCOL_F_SLAVE_REQ = 5,
-    VHOST_USER_PROTOCOL_F_CROSS_ENDIAN = 6,

    VHOST_USER_PROTOCOL_F_MAX
 };
@@ -65,10 +61,7 @@ typedef enum VhostUserRequest {
    VHOST_USER_GET_QUEUE_NUM = 17,
    VHOST_USER_SET_VRING_ENABLE = 18,
    VHOST_USER_SEND_RARP = 19,
-    VHOST_USER_NET_SET_MTU = 20,
-    VHOST_USER_SET_SLAVE_REQ_FD = 21,
-    VHOST_USER_IOTLB_MSG = 22,
-    VHOST_USER_SET_VRING_ENDIAN = 23,
+    VHOST_USER_INPUT_GET_CONFIG = 20,
    VHOST_USER_MAX
 } VhostUserRequest;

@@ -226,7 +219,6 @@ struct VuDev {
    VuDevRegion regions[VHOST_MEMORY_MAX_NREGIONS];
    VuVirtq vq[VHOST_MAX_NR_VIRTQUEUE];
    int log_call_fd;
-    int slave_fd;
    uint64_t log_size;
    uint8_t *log_table;
    uint64_t features;
@@ -342,15 +334,6 @@ void vu_queue_set_notification(VuDev *dev, VuVirtq *vq, int enable);
 */
 bool vu_queue_enabled(VuDev *dev, VuVirtq *vq);

-/**
- * vu_queue_started:
- * @dev: a VuDev context
- * @vq: a VuVirtq queue
- *
- * Returns: whether the queue is started.
- */
-bool vu_queue_started(const VuDev *dev, const VuVirtq *vq);
-
 /**
 * vu_queue_empty:
 * @dev: a VuDev context
@@ -375,8 +358,7 @@ void vu_queue_notify(VuDev *dev, VuVirtq *vq);
 * @vq: a VuVirtq queue
 * @sz: the size of struct to return (must be >= VuVirtqElement)
 *
- * Returns: a VuVirtqElement filled from the queue or NULL. The
- * returned element must be free()-d by the caller.
+ * Returns: a VuVirtqElement filled from the queue or NULL.
 */
 void *vu_queue_pop(VuDev *dev, VuVirtq *vq, size_t sz);

--- a/contrib/vhost-user-scsi/vhost-user-scsi.c
+++ b/contrib/vhost-user-scsi/vhost-user-scsi.c
@@ -11,33 +11,263 @@
 */

 #include "qemu/osdep.h"
-#include "contrib/libvhost-user/libvhost-user-glib.h"
-#include "standard-headers/linux/virtio_scsi.h"
+#include "contrib/libvhost-user/libvhost-user.h"
+#include "hw/virtio/virtio-scsi.h"
 #include "iscsi/iscsi.h"
-#include "iscsi/scsi-lowlevel.h"

 #include <glib.h>

+/* Small compat shim from glib 2.32 */
+#ifndef G_SOURCE_CONTINUE
+#define G_SOURCE_CONTINUE TRUE
+#endif
+#ifndef G_SOURCE_REMOVE
+#define G_SOURCE_REMOVE FALSE
+#endif
+
+/* #define VUS_DEBUG 1 */
+
+/** Log helpers **/
+
+#define PPRE                                                          \
+    struct timespec ts;                                               \
+    char   timebuf[64];                                               \
+    struct tm tm;                                                     \
+    (void)clock_gettime(CLOCK_REALTIME, &ts);                         \
+    (void)strftime(timebuf, 64, "%Y%m%d %T", gmtime_r(&ts.tv_sec, &tm))
+
+#define PEXT(lvl, msg, ...) do {                                      \
+    PPRE;                                                             \
+    fprintf(stderr, "%s.%06ld " lvl ": %s:%s():%d: " msg "\n",        \
+            timebuf, ts.tv_nsec / 1000,                               \
+            __FILE__, __func__, __LINE__, ## __VA_ARGS__);            \
+} while (0)
+
+#define PNOR(lvl, msg, ...) do {                                      \
+    PPRE;                                                             \
+    fprintf(stderr, "%s.%06ld " lvl ": " msg "\n",                    \
+            timebuf, ts.tv_nsec / 1000, ## __VA_ARGS__);              \
+} while (0)
+
+#ifdef VUS_DEBUG
+#define PDBG(msg, ...) PEXT("DBG", msg, ## __VA_ARGS__)
+#define PERR(msg, ...) PEXT("ERR", msg, ## __VA_ARGS__)
+#define PLOG(msg, ...) PEXT("LOG", msg, ## __VA_ARGS__)
+#else
+#define PDBG(msg, ...) { }
+#define PERR(msg, ...) PNOR("ERR", msg, ## __VA_ARGS__)
+#define PLOG(msg, ...) PNOR("LOG", msg, ## __VA_ARGS__)
+#endif
+
+/** vhost-user-scsi specific definitions **/
+
+ /* Only 1 LUN and device supported today */
+#define VUS_MAX_LUNS 1
+#define VUS_MAX_DEVS 1
+
 #define VUS_ISCSI_INITIATOR "iqn.2016-11.com.nutanix:vhost-user-scsi"

-typedef struct VusIscsiLun {
+typedef struct iscsi_lun {
    struct iscsi_context *iscsi_ctx;
    int iscsi_lun;
-} VusIscsiLun;
+} iscsi_lun_t;

-typedef struct VusDev {
-    VugDev parent;
-
-    VusIscsiLun lun;
+typedef struct vhost_scsi_dev {
+    VuDev vu_dev;
+    int server_sock;
    GMainLoop *loop;
-} VusDev;
+    GTree *fdmap;   /* fd -> gsource context id */
+    iscsi_lun_t luns[VUS_MAX_LUNS];
+} vhost_scsi_dev_t;
+
+static vhost_scsi_dev_t *vhost_scsi_devs[VUS_MAX_DEVS];
+
+/** glib event loop integration for libvhost-user and misc callbacks **/
+
+QEMU_BUILD_BUG_ON((int)G_IO_IN != (int)VU_WATCH_IN);
+QEMU_BUILD_BUG_ON((int)G_IO_OUT != (int)VU_WATCH_OUT);
+QEMU_BUILD_BUG_ON((int)G_IO_PRI != (int)VU_WATCH_PRI);
+QEMU_BUILD_BUG_ON((int)G_IO_ERR != (int)VU_WATCH_ERR);
+QEMU_BUILD_BUG_ON((int)G_IO_HUP != (int)VU_WATCH_HUP);
+
+typedef struct vus_gsrc {
+    GSource parent;
+    vhost_scsi_dev_t *vdev_scsi;
+    GPollFD gfd;
+    vu_watch_cb vu_cb;
+} vus_gsrc_t;
+
+static gint vus_fdmap_compare(gconstpointer a, gconstpointer b)
+{
+    return (b > a) - (b < a);
+}
+
+static gboolean vus_gsrc_prepare(GSource *src, gint *timeout)
+{
+    assert(timeout);
+
+    *timeout = -1;
+    return FALSE;
+}
+
+static gboolean vus_gsrc_check(GSource *src)
+{
+    vus_gsrc_t *vus_src = (vus_gsrc_t *)src;
+
+    assert(vus_src);
+
+    return vus_src->gfd.revents & vus_src->gfd.events;
+}
+
+static gboolean vus_gsrc_dispatch(GSource *src, GSourceFunc cb, gpointer data)
+{
+    vhost_scsi_dev_t *vdev_scsi;
+    vus_gsrc_t *vus_src = (vus_gsrc_t *)src;
+
+    assert(vus_src);
+    assert(!(vus_src->vu_cb && cb));
+
+    vdev_scsi = vus_src->vdev_scsi;
+
+    assert(vdev_scsi);
+
+    if (cb) {
+        return cb(data);
+    }
+    if (vus_src->vu_cb) {
+        vus_src->vu_cb(&vdev_scsi->vu_dev, vus_src->gfd.revents, data);
+    }
+    return G_SOURCE_CONTINUE;
+}
+
+static GSourceFuncs vus_gsrc_funcs = {
+    vus_gsrc_prepare,
+    vus_gsrc_check,
+    vus_gsrc_dispatch,
+    NULL
+};
+
+static int vus_gsrc_new(vhost_scsi_dev_t *vdev_scsi, int fd, GIOCondition cond,
+                        vu_watch_cb vu_cb, GSourceFunc gsrc_cb, gpointer data)
+{
+    GSource *vus_gsrc;
+    vus_gsrc_t *vus_src;
+    guint id;
+
+    assert(vdev_scsi);
+    assert(fd >= 0);
+    assert(vu_cb || gsrc_cb);
+    assert(!(vu_cb && gsrc_cb));
+
+    vus_gsrc = g_source_new(&vus_gsrc_funcs, sizeof(vus_gsrc_t));
+    if (!vus_gsrc) {
+        PERR("Error creating GSource for new watch");
+        return -1;
+    }
+    vus_src = (vus_gsrc_t *)vus_gsrc;
+
+    vus_src->vdev_scsi = vdev_scsi;
+    vus_src->gfd.fd = fd;
+    vus_src->gfd.events = cond;
+    vus_src->vu_cb = vu_cb;
+
+    g_source_add_poll(vus_gsrc, &vus_src->gfd);
+    g_source_set_callback(vus_gsrc, gsrc_cb, data, NULL);
+    id = g_source_attach(vus_gsrc, NULL);
+    assert(id);
+    g_source_unref(vus_gsrc);
+
+    g_tree_insert(vdev_scsi->fdmap, (gpointer)(uintptr_t)fd,
+                                    (gpointer)(uintptr_t)id);
+
+    return 0;
+}
+
+/* from libiscsi's scsi-lowlevel.h **
+ *
+ * nb. We can't directly include scsi-lowlevel.h due to a namespace conflict:
+ *     QEMU's scsi.h also defines "SCSI_XFER_NONE".
+ */
+
+#define SCSI_CDB_MAX_SIZE           16
+
+struct scsi_iovector {
+    struct scsi_iovec *iov;
+    int niov;
+    int nalloc;
+    size_t offset;
+    int consumed;
+};
+
+struct scsi_allocated_memory {
+    struct scsi_allocated_memory *next;
+    char buf[0];
+};
+
+struct scsi_data {
+    int            size;
+    unsigned char *data;
+};
+
+enum scsi_sense_key {
+    SCSI_SENSE_NO_SENSE            = 0x00,
+    SCSI_SENSE_RECOVERED_ERROR     = 0x01,
+    SCSI_SENSE_NOT_READY           = 0x02,
+    SCSI_SENSE_MEDIUM_ERROR        = 0x03,
+    SCSI_SENSE_HARDWARE_ERROR      = 0x04,
+    SCSI_SENSE_ILLEGAL_REQUEST     = 0x05,
+    SCSI_SENSE_UNIT_ATTENTION      = 0x06,
+    SCSI_SENSE_DATA_PROTECTION     = 0x07,
+    SCSI_SENSE_BLANK_CHECK         = 0x08,
+    SCSI_SENSE_VENDOR_SPECIFIC     = 0x09,
+    SCSI_SENSE_COPY_ABORTED        = 0x0a,
+    SCSI_SENSE_COMMAND_ABORTED     = 0x0b,
+    SCSI_SENSE_OBSOLETE_ERROR_CODE = 0x0c,
+    SCSI_SENSE_OVERFLOW_COMMAND    = 0x0d,
+    SCSI_SENSE_MISCOMPARE          = 0x0e
+};
+
+struct scsi_sense {
+    unsigned char       error_type;
+    enum scsi_sense_key key;
+    int                 ascq;
+    unsigned            sense_specific:1;
+    unsigned            ill_param_in_cdb:1;
+    unsigned            bit_pointer_valid:1;
+    unsigned char       bit_pointer;
+    uint16_t            field_pointer;
+};
+
+enum scsi_residual {
+    SCSI_RESIDUAL_NO_RESIDUAL = 0,
+    SCSI_RESIDUAL_UNDERFLOW,
+    SCSI_RESIDUAL_OVERFLOW
+};
+
+struct scsi_task {
+    int status;
+    int cdb_size;
+    int xfer_dir;
+    int expxferlen;
+    unsigned char cdb[SCSI_CDB_MAX_SIZE];
+    enum scsi_residual residual_status;
+    size_t residual;
+    struct scsi_sense sense;
+    struct scsi_data datain;
+    struct scsi_allocated_memory *mem;
+    void *ptr;
+
+    uint32_t itt;
+    uint32_t cmdsn;
+    uint32_t lun;
+
+    struct scsi_iovector iovector_in;
+    struct scsi_iovector iovector_out;
+};

 /** libiscsi integration **/

-typedef struct virtio_scsi_cmd_req VirtIOSCSICmdReq;
-typedef struct virtio_scsi_cmd_resp VirtIOSCSICmdResp;
-
-static int vus_iscsi_add_lun(VusIscsiLun *lun, char *iscsi_uri)
+static int iscsi_add_lun(iscsi_lun_t *lun, char *iscsi_uri)
 {
    struct iscsi_url *iscsi_url;
    struct iscsi_context *iscsi_ctx;
@@ -45,32 +275,30 @@ static int vus_iscsi_add_lun(VusIscsiLun *lun, char *iscsi_uri)

    assert(lun);
    assert(iscsi_uri);
-    assert(!lun->iscsi_ctx);

    iscsi_ctx = iscsi_create_context(VUS_ISCSI_INITIATOR);
    if (!iscsi_ctx) {
-        g_warning("Unable to create iSCSI context");
+        PERR("Unable to create iSCSI context");
        return -1;
    }

    iscsi_url = iscsi_parse_full_url(iscsi_ctx, iscsi_uri);
    if (!iscsi_url) {
-        g_warning("Unable to parse iSCSI URL: %s", iscsi_get_error(iscsi_ctx));
+        PERR("Unable to parse iSCSI URL: %s", iscsi_get_error(iscsi_ctx));
        goto fail;
    }

    iscsi_set_session_type(iscsi_ctx, ISCSI_SESSION_NORMAL);
    iscsi_set_header_digest(iscsi_ctx, ISCSI_HEADER_DIGEST_NONE_CRC32C);
    if (iscsi_full_connect_sync(iscsi_ctx, iscsi_url->portal, iscsi_url->lun)) {
-        g_warning("Unable to login to iSCSI portal: %s",
-                  iscsi_get_error(iscsi_ctx));
+        PERR("Unable to login to iSCSI portal: %s", iscsi_get_error(iscsi_ctx));
        goto fail;
    }

    lun->iscsi_ctx = iscsi_ctx;
    lun->iscsi_lun = iscsi_url->lun;

-    g_debug("Context %p created for lun 0: %s", iscsi_ctx, iscsi_uri);
+    PDBG("Context %p created for lun 0: %s", iscsi_ctx, iscsi_uri);

 out:
    if (iscsi_url) {
@@ -85,14 +313,18 @@ fail:
 }

 static struct scsi_task *scsi_task_new(int cdb_len, uint8_t *cdb, int dir,
-                                       int xfer_len)
-{
+                                       int xfer_len) {
    struct scsi_task *task;

    assert(cdb_len > 0);
    assert(cdb);

-    task = g_new0(struct scsi_task, 1);
+    task = calloc(1, sizeof(struct scsi_task));
+    if (!task) {
+        PERR("Error allocating task: %s", strerror(errno));
+        return NULL;
+    }
+
    memcpy(task->cdb, cdb, cdb_len);
    task->cdb_size = cdb_len;
    task->xfer_dir = dir;
@@ -112,7 +344,7 @@ static int get_cdb_len(uint8_t *cdb)
    case 4: return 16;
    case 5: return 12;
    }
-    g_warning("Unable to determine cdb len (0x%02hhX)", cdb[0] >> 5);
+    PERR("Unable to determine cdb len (0x%02hhX)", cdb[0] >> 5);
    return -1;
 }

@@ -120,8 +352,7 @@ static int handle_cmd_sync(struct iscsi_context *ctx,
                           VirtIOSCSICmdReq *req,
                           struct iovec *out, unsigned int out_len,
                           VirtIOSCSICmdResp *rsp,
-                           struct iovec *in, unsigned int in_len)
-{
+                           struct iovec *in, unsigned int in_len) {
    struct scsi_task *task;
    uint32_t dir;
    uint32_t len;
@@ -134,7 +365,7 @@ static int handle_cmd_sync(struct iscsi_context *ctx,

    if (!(!req->lun[1] && req->lun[2] == 0x40 && !req->lun[3])) {
        /* Ignore anything different than target=0, lun=0 */
-        g_debug("Ignoring unconnected lun (0x%hhX, 0x%hhX)",
+        PDBG("Ignoring unconnected lun (0x%hhX, 0x%hhX)",
             req->lun[1], req->lun[3]);
        rsp->status = SCSI_STATUS_CHECK_CONDITION;
        memset(rsp->sense, 0, sizeof(rsp->sense));
@@ -156,32 +387,36 @@ static int handle_cmd_sync(struct iscsi_context *ctx,
    if (!out_len && !in_len) {
        dir = SCSI_XFER_NONE;
    } else if (out_len) {
-        dir = SCSI_XFER_WRITE;
+        dir = SCSI_XFER_TO_DEV;
        for (i = 0; i < out_len; i++) {
            len += out[i].iov_len;
        }
    } else {
-        dir = SCSI_XFER_READ;
+        dir = SCSI_XFER_FROM_DEV;
        for (i = 0; i < in_len; i++) {
            len += in[i].iov_len;
        }
    }

    task = scsi_task_new(cdb_len, req->cdb, dir, len);
+    if (!task) {
+        PERR("Unable to create iscsi task");
+        return -1;
+    }

-    if (dir == SCSI_XFER_WRITE) {
+    if (dir == SCSI_XFER_TO_DEV) {
        task->iovector_out.iov = (struct scsi_iovec *)out;
        task->iovector_out.niov = out_len;
-    } else if (dir == SCSI_XFER_READ) {
+    } else if (dir == SCSI_XFER_FROM_DEV) {
        task->iovector_in.iov = (struct scsi_iovec *)in;
        task->iovector_in.niov = in_len;
    }

-    g_debug("Sending iscsi cmd (cdb_len=%d, dir=%d, task=%p)",
+    PDBG("Sending iscsi cmd (cdb_len=%d, dir=%d, task=%p)",
         cdb_len, dir, task);
    if (!iscsi_scsi_command_sync(ctx, 0, task, NULL)) {
-        g_warning("Error serving SCSI command");
-        g_free(task);
+        PERR("Error serving SCSI command");
+        free(task);
        return -1;
    }

@@ -196,9 +431,9 @@ static int handle_cmd_sync(struct iscsi_context *ctx,
        memcpy(rsp->sense, &task->datain.data[2], rsp->sense_len);
    }

-    g_free(task);
+    free(task);

-    g_debug("Filled in rsp: status=%hhX, resid=%u, response=%hhX, sense_len=%u",
+    PDBG("Filled in rsp: status=%hhX, resid=%u, response=%hhX, sense_len=%u",
         rsp->status, rsp->resid, rsp->response, rsp->sense_len);

    return 0;
@@ -206,46 +441,116 @@ static int handle_cmd_sync(struct iscsi_context *ctx,

 /** libvhost-user callbacks **/

+static vhost_scsi_dev_t *vdev_scsi_find_by_vu(VuDev *vu_dev);
+
 static void vus_panic_cb(VuDev *vu_dev, const char *buf)
 {
-    VugDev *gdev;
-    VusDev *vdev_scsi;
+    vhost_scsi_dev_t *vdev_scsi;

    assert(vu_dev);

-    gdev = container_of(vu_dev, VugDev, parent);
-    vdev_scsi = container_of(gdev, VusDev, parent);
+    vdev_scsi = vdev_scsi_find_by_vu(vu_dev);
+
    if (buf) {
-        g_warning("vu_panic: %s", buf);
+        PERR("vu_panic: %s", buf);
    }

-    g_main_loop_quit(vdev_scsi->loop);
+    if (vdev_scsi) {
+        assert(vdev_scsi->loop);
+        g_main_loop_quit(vdev_scsi->loop);
+    }
+}
+
+static void vus_add_watch_cb(VuDev *vu_dev, int fd, int vu_evt, vu_watch_cb cb,
+                             void *pvt) {
+    vhost_scsi_dev_t *vdev_scsi;
+    guint id;
+
+    assert(vu_dev);
+    assert(fd >= 0);
+    assert(cb);
+
+    vdev_scsi = vdev_scsi_find_by_vu(vu_dev);
+    if (!vdev_scsi) {
+        vus_panic_cb(vu_dev, NULL);
+        return;
+    }
+
+    id = (guint)(uintptr_t)g_tree_lookup(vdev_scsi->fdmap,
+                                         (gpointer)(uintptr_t)fd);
+    if (id) {
+        GSource *vus_src = g_main_context_find_source_by_id(NULL, id);
+        assert(vus_src);
+        g_source_destroy(vus_src);
+        (void)g_tree_remove(vdev_scsi->fdmap, (gpointer)(uintptr_t)fd);
+    }
+
+    if (vus_gsrc_new(vdev_scsi, fd, vu_evt, cb, NULL, pvt)) {
+        vus_panic_cb(vu_dev, NULL);
+    }
+}
+
+static void vus_del_watch_cb(VuDev *vu_dev, int fd)
+{
+    vhost_scsi_dev_t *vdev_scsi;
+    guint id;
+
+    assert(vu_dev);
+    assert(fd >= 0);
+
+    vdev_scsi = vdev_scsi_find_by_vu(vu_dev);
+    if (!vdev_scsi) {
+        vus_panic_cb(vu_dev, NULL);
+        return;
+    }
+
+    id = (guint)(uintptr_t)g_tree_lookup(vdev_scsi->fdmap,
+                                         (gpointer)(uintptr_t)fd);
+    if (id) {
+        GSource *vus_src = g_main_context_find_source_by_id(NULL, id);
+        assert(vus_src);
+        g_source_destroy(vus_src);
+        (void)g_tree_remove(vdev_scsi->fdmap, (gpointer)(uintptr_t)fd);
+    }
+}
+
+static void vus_proc_ctl(VuDev *vu_dev, int idx)
+{
+    /* Control VQ not implemented */
+}
+
+static void vus_proc_evt(VuDev *vu_dev, int idx)
+{
+    /* Event VQ not implemented */
 }

 static void vus_proc_req(VuDev *vu_dev, int idx)
 {
-    VugDev *gdev;
-    VusDev *vdev_scsi;
+    vhost_scsi_dev_t *vdev_scsi;
    VuVirtq *vq;

    assert(vu_dev);

-    gdev = container_of(vu_dev, VugDev, parent);
-    vdev_scsi = container_of(gdev, VusDev, parent);
-    if (idx < 0 || idx >= VHOST_MAX_NR_VIRTQUEUE) {
-        g_warning("VQ Index out of range: %d", idx);
+    vdev_scsi = vdev_scsi_find_by_vu(vu_dev);
+    if (!vdev_scsi) {
+        vus_panic_cb(vu_dev, NULL);
+        return;
+    }
+
+    if ((idx < 0) || (idx >= VHOST_MAX_NR_VIRTQUEUE)) {
+        PERR("VQ Index out of range: %d", idx);
        vus_panic_cb(vu_dev, NULL);
        return;
    }

    vq = vu_get_queue(vu_dev, idx);
    if (!vq) {
-        g_warning("Error fetching VQ (dev=%p, idx=%d)", vu_dev, idx);
+        PERR("Error fetching VQ (dev=%p, idx=%d)", vu_dev, idx);
        vus_panic_cb(vu_dev, NULL);
        return;
    }

-    g_debug("Got kicked on vq[%d]@%p", idx, vq);
+    PDBG("Got kicked on vq[%d]@%p", idx, vq);

    while (1) {
        VuVirtqElement *elem;
@@ -254,29 +559,29 @@ static void vus_proc_req(VuDev *vu_dev, int idx)

        elem = vu_queue_pop(vu_dev, vq, sizeof(VuVirtqElement));
        if (!elem) {
-            g_debug("No more elements pending on vq[%d]@%p", idx, vq);
+            PDBG("No more elements pending on vq[%d]@%p", idx, vq);
            break;
        }
-        g_debug("Popped elem@%p", elem);
+        PDBG("Popped elem@%p", elem);

-        assert(!(elem->out_num > 1 && elem->in_num > 1));
-        assert(elem->out_num > 0 && elem->in_num > 0);
+        assert(!((elem->out_num > 1) && (elem->in_num > 1)));
+        assert((elem->out_num > 0) && (elem->in_num > 0));

        if (elem->out_sg[0].iov_len < sizeof(VirtIOSCSICmdReq)) {
-            g_warning("Invalid virtio-scsi req header");
+            PERR("Invalid virtio-scsi req header");
            vus_panic_cb(vu_dev, NULL);
            break;
        }
        req = (VirtIOSCSICmdReq *)elem->out_sg[0].iov_base;

        if (elem->in_sg[0].iov_len < sizeof(VirtIOSCSICmdResp)) {
-            g_warning("Invalid virtio-scsi rsp header");
+            PERR("Invalid virtio-scsi rsp header");
            vus_panic_cb(vu_dev, NULL);
            break;
        }
        rsp = (VirtIOSCSICmdResp *)elem->in_sg[0].iov_base;

-        if (handle_cmd_sync(vdev_scsi->lun.iscsi_ctx,
+        if (handle_cmd_sync(vdev_scsi->luns[0].iscsi_ctx,
                            req, &elem->out_sg[1], elem->out_num - 1,
                            rsp, &elem->in_sg[1], elem->in_num - 1) != 0) {
            vus_panic_cb(vu_dev, NULL);
@@ -296,17 +601,22 @@ static void vus_queue_set_started(VuDev *vu_dev, int idx, bool started)

    assert(vu_dev);

-    if (idx < 0 || idx >= VHOST_MAX_NR_VIRTQUEUE) {
-        g_warning("VQ Index out of range: %d", idx);
+    if ((idx < 0) || (idx >= VHOST_MAX_NR_VIRTQUEUE)) {
+        PERR("VQ Index out of range: %d", idx);
        vus_panic_cb(vu_dev, NULL);
        return;
    }

    vq = vu_get_queue(vu_dev, idx);

-    if (idx == 0 || idx == 1) {
-        g_debug("queue %d unimplemented", idx);
-    } else {
+    switch (idx) {
+    case 0:
+        vu_set_queue_handler(vu_dev, vq, started ? vus_proc_ctl : NULL);
+        break;
+    case 1:
+        vu_set_queue_handler(vu_dev, vq, started ? vus_proc_evt : NULL);
+        break;
+    default:
        vu_set_queue_handler(vu_dev, vq, started ? vus_proc_req : NULL);
    }
 }
@@ -315,6 +625,21 @@ static const VuDevIface vus_iface = {
    .queue_set_started = vus_queue_set_started,
 };

+static gboolean vus_vhost_cb(gpointer data)
+{
+    VuDev *vu_dev = (VuDev *)data;
+
+    assert(vu_dev);
+
+    if (!vu_dispatch(vu_dev) != 0) {
+        PERR("Error processing vhost message");
+        vus_panic_cb(vu_dev, NULL);
+        return G_SOURCE_REMOVE;
+    }
+
+    return G_SOURCE_CONTINUE;
+}
+
 /** misc helpers **/

 static int unix_sock_new(char *unix_fn)
@@ -356,22 +681,159 @@ fail:

 /** vhost-user-scsi **/

+static vhost_scsi_dev_t *vdev_scsi_find_by_vu(VuDev *vu_dev)
+{
+    int i;
+
+    assert(vu_dev);
+
+    for (i = 0; i < VUS_MAX_DEVS; i++) {
+        if (&vhost_scsi_devs[i]->vu_dev == vu_dev) {
+            return vhost_scsi_devs[i];
+        }
+    }
+
+    PERR("Unknown VuDev %p", vu_dev);
+    return NULL;
+}
+
+static void vdev_scsi_deinit(vhost_scsi_dev_t *vdev_scsi)
+{
+    if (!vdev_scsi) {
+        return;
+    }
+
+    if (vdev_scsi->server_sock >= 0) {
+        struct sockaddr_storage ss;
+        socklen_t sslen = sizeof(ss);
+
+        if (getsockname(vdev_scsi->server_sock, (struct sockaddr *)&ss,
+                        &sslen) == 0) {
+            struct sockaddr_un *su = (struct sockaddr_un *)&ss;
+            (void)unlink(su->sun_path);
+        }
+
+        (void)close(vdev_scsi->server_sock);
+        vdev_scsi->server_sock = -1;
+    }
+
+    if (vdev_scsi->loop) {
+        g_main_loop_unref(vdev_scsi->loop);
+        vdev_scsi->loop = NULL;
+    }
+}
+
+static vhost_scsi_dev_t *vdev_scsi_new(char *unix_fn)
+{
+    vhost_scsi_dev_t *vdev_scsi = NULL;
+
+    assert(unix_fn);
+
+    vdev_scsi = calloc(1, sizeof(vhost_scsi_dev_t));
+    if (!vdev_scsi) {
+        PERR("calloc: %s", strerror(errno));
+        return NULL;
+    }
+
+    vdev_scsi->server_sock = unix_sock_new(unix_fn);
+    if (vdev_scsi->server_sock < 0) {
+        goto err;
+    }
+
+    vdev_scsi->loop = g_main_loop_new(NULL, FALSE);
+    if (!vdev_scsi->loop) {
+        PERR("Error creating glib event loop");
+        goto err;
+    }
+
+    vdev_scsi->fdmap = g_tree_new(vus_fdmap_compare);
+    if (!vdev_scsi->fdmap) {
+        PERR("Error creating glib tree for fdmap");
+        goto err;
+    }
+
+    return vdev_scsi;
+
+err:
+    vdev_scsi_deinit(vdev_scsi);
+    free(vdev_scsi);
+
+    return NULL;
+}
+
+static int vdev_scsi_add_iscsi_lun(vhost_scsi_dev_t *vdev_scsi,
+                                   char *iscsi_uri, uint32_t lun) {
+    assert(vdev_scsi);
+    assert(iscsi_uri);
+    assert(lun < VUS_MAX_LUNS);
+
+    if (vdev_scsi->luns[lun].iscsi_ctx) {
+        PERR("Lun %d already configured", lun);
+        return -1;
+    }
+
+    if (iscsi_add_lun(&vdev_scsi->luns[lun], iscsi_uri) != 0) {
+        return -1;
+    }
+
+    return 0;
+}
+
+static int vdev_scsi_run(vhost_scsi_dev_t *vdev_scsi)
+{
+    int cli_sock;
+    int ret = 0;
+
+    assert(vdev_scsi);
+    assert(vdev_scsi->server_sock >= 0);
+    assert(vdev_scsi->loop);
+
+    cli_sock = accept(vdev_scsi->server_sock, (void *)0, (void *)0);
+    if (cli_sock < 0) {
+        perror("accept");
+        return -1;
+    }
+
+    vu_init(&vdev_scsi->vu_dev,
+            cli_sock,
+            vus_panic_cb,
+            vus_add_watch_cb,
+            vus_del_watch_cb,
+            &vus_iface);
+
+    if (vus_gsrc_new(vdev_scsi, cli_sock, G_IO_IN, NULL, vus_vhost_cb,
+                     &vdev_scsi->vu_dev)) {
+        goto fail;
+    }
+
+    g_main_loop_run(vdev_scsi->loop);
+
+out:
+    vu_deinit(&vdev_scsi->vu_dev);
+
+    return ret;
+
+fail:
+    ret = -1;
+    goto out;
+}
+
 int main(int argc, char **argv)
 {
-    VusDev *vdev_scsi = NULL;
+    vhost_scsi_dev_t *vdev_scsi = NULL;
    char *unix_fn = NULL;
    char *iscsi_uri = NULL;
-    int lsock = -1, csock = -1, opt, err = EXIT_SUCCESS;
+    int opt, err = EXIT_SUCCESS;

    while ((opt = getopt(argc, argv, "u:i:")) != -1) {
        switch (opt) {
        case 'h':
            goto help;
        case 'u':
-            unix_fn = g_strdup(optarg);
+            unix_fn = strdup(optarg);
            break;
        case 'i':
-            iscsi_uri = g_strdup(optarg);
+            iscsi_uri = strdup(optarg);
            break;
        default:
            goto help;
@@ -381,44 +843,31 @@ int main(int argc, char **argv)
        goto help;
    }

-    lsock = unix_sock_new(unix_fn);
-    if (lsock < 0) {
+    vdev_scsi = vdev_scsi_new(unix_fn);
+    if (!vdev_scsi) {
+        goto err;
+    }
+    vhost_scsi_devs[0] = vdev_scsi;
+
+    if (vdev_scsi_add_iscsi_lun(vdev_scsi, iscsi_uri, 0) != 0) {
        goto err;
    }

-    csock = accept(lsock, NULL, NULL);
-    if (csock < 0) {
-        perror("accept");
+    if (vdev_scsi_run(vdev_scsi) != 0) {
        goto err;
    }

-    vdev_scsi = g_new0(VusDev, 1);
-    vdev_scsi->loop = g_main_loop_new(NULL, FALSE);
-
-    if (vus_iscsi_add_lun(&vdev_scsi->lun, iscsi_uri) != 0) {
-        goto err;
-    }
-
-    vug_init(&vdev_scsi->parent, csock, vus_panic_cb, &vus_iface);
-
-    g_main_loop_run(vdev_scsi->loop);
-
-    vug_deinit(&vdev_scsi->parent);
-
 out:
    if (vdev_scsi) {
-        g_main_loop_unref(vdev_scsi->loop);
-        g_free(vdev_scsi);
-        unlink(unix_fn);
+        vdev_scsi_deinit(vdev_scsi);
+        free(vdev_scsi);
    }
-    if (csock >= 0) {
-        close(csock);
+    if (unix_fn) {
+        free(unix_fn);
    }
-    if (lsock >= 0) {
-        close(lsock);
+    if (iscsi_uri) {
+        free(iscsi_uri);
    }
-    g_free(unix_fn);
-    g_free(iscsi_uri);

    return err;

--- a/cpus.c
+++ b/cpus.c
@@ -843,19 +843,11 @@ void qemu_timer_notify_cb(void *opaque, QEMUClockType type)
        return;
    }

-    if (qemu_in_vcpu_thread()) {
-        /* A CPU is currently running; kick it back out to the
-         * tcg_cpu_exec() loop so it will recalculate its
-         * icount deadline immediately.
-         */
-        qemu_cpu_kick(current_cpu);
-    } else if (first_cpu) {
+    if (!qemu_in_vcpu_thread() && first_cpu) {
        /* qemu_cpu_kick is not enough to kick a halted CPU out of
         * qemu_tcg_wait_io_event.  async_run_on_cpu, instead,
         * causes cpu_thread_is_idle to return false.  This way,
         * handle_icount_deadline can run.
-         * If we have no CPUs at all for some reason, we don't
-         * need to do anything.
         */
        async_run_on_cpu(first_cpu, do_nothing, RUN_ON_CPU_NULL);
    }
@@ -1315,7 +1307,6 @@ static void *qemu_tcg_rr_cpu_thread_fn(void *arg)
    CPUState *cpu = arg;

    rcu_register_thread();
-    tcg_register_thread();

    qemu_mutex_lock_iothread();
    qemu_thread_get_self(cpu->thread);
@@ -1463,7 +1454,6 @@ static void *qemu_tcg_cpu_thread_fn(void *arg)
    g_assert(!use_icount);

    rcu_register_thread();
-    tcg_register_thread();

    qemu_mutex_lock_iothread();
    qemu_thread_get_self(cpu->thread);
@@ -1674,18 +1664,6 @@ static void qemu_tcg_init_vcpu(CPUState *cpu)
    char thread_name[VCPU_THREAD_NAME_SIZE];
    static QemuCond *single_tcg_halt_cond;
    static QemuThread *single_tcg_cpu_thread;
-    static int tcg_region_inited;
-
-    /*
-     * Initialize TCG regions--once. Now is a good time, because:
-     * (1) TCG's init context, prologue and target globals have been set up.
-     * (2) qemu_tcg_mttcg_enabled() works now (TCG init code runs before the
-     *     -accel flag is processed, so the check doesn't work then).
-     */
-    if (!tcg_region_inited) {
-        tcg_region_inited = 1;
-        tcg_region_init();
-    }

    if (qemu_tcg_mttcg_enabled() || !single_tcg_cpu_thread) {
        cpu->thread = g_malloc0(sizeof(QemuThread));
--- a/crypto/block-luks.c
+++ b/crypto/block-luks.c
@@ -257,41 +257,47 @@ qcrypto_block_luks_cipher_alg_lookup(QCryptoCipherAlgorithm alg,
    }

    error_setg(errp, "Algorithm '%s' not supported",
-               QCryptoCipherAlgorithm_str(alg));
+               QCryptoCipherAlgorithm_lookup[alg]);
    return NULL;
 }

 /* XXX replace with qapi_enum_parse() in future, when we can
 * make that function emit a more friendly error message */
 static int qcrypto_block_luks_name_lookup(const char *name,
-                                          const QEnumLookup *map,
+                                          const char *const *map,
+                                          size_t maplen,
                                          const char *type,
                                          Error **errp)
 {
-    int ret = qapi_enum_parse(map, name, -1, NULL);
-
-    if (ret < 0) {
-        error_setg(errp, "%s %s not supported", type, name);
-        return 0;
+    size_t i;
+    for (i = 0; i < maplen; i++) {
+        if (g_str_equal(map[i], name)) {
+            return i;
+        }
    }
-    return ret;
+
+    error_setg(errp, "%s %s not supported", type, name);
+    return 0;
 }

 #define qcrypto_block_luks_cipher_mode_lookup(name, errp)               \
    qcrypto_block_luks_name_lookup(name,                                \
-                                   &QCryptoCipherMode_lookup,           \
+                                   QCryptoCipherMode_lookup,            \
+                                   QCRYPTO_CIPHER_MODE__MAX,            \
                                   "Cipher mode",                       \
                                   errp)

 #define qcrypto_block_luks_hash_name_lookup(name, errp)                 \
    qcrypto_block_luks_name_lookup(name,                                \
-                                   &QCryptoHashAlgorithm_lookup,        \
+                                   QCryptoHashAlgorithm_lookup,         \
+                                   QCRYPTO_HASH_ALG__MAX,               \
                                   "Hash algorithm",                    \
                                   errp)

 #define qcrypto_block_luks_ivgen_name_lookup(name, errp)                \
    qcrypto_block_luks_name_lookup(name,                                \
-                                   &QCryptoIVGenAlgorithm_lookup,       \
+                                   QCryptoIVGenAlgorithm_lookup,        \
+                                   QCRYPTO_IVGEN_ALG__MAX,              \
                                   "IV generator",                      \
                                   errp)

@@ -392,7 +398,7 @@ qcrypto_block_luks_essiv_cipher(QCryptoCipherAlgorithm cipher,
        break;
    default:
        error_setg(errp, "Cipher %s not supported with essiv",
-                   QCryptoCipherAlgorithm_str(cipher));
+                   QCryptoCipherAlgorithm_lookup[cipher]);
        return 0;
    }
 }
@@ -846,9 +852,8 @@ qcrypto_block_luks_open(QCryptoBlock *block,
        }
    }

-    block->sector_size = QCRYPTO_BLOCK_LUKS_SECTOR_SIZE;
    block->payload_offset = luks->header.payload_offset *
-        block->sector_size;
+        QCRYPTO_BLOCK_LUKS_SECTOR_SIZE;

    luks->cipher_alg = cipheralg;
    luks->cipher_mode = ciphermode;
@@ -963,16 +968,16 @@ qcrypto_block_luks_create(QCryptoBlock *block,
        goto error;
    }

-    cipher_mode = QCryptoCipherMode_str(luks_opts.cipher_mode);
-    ivgen_alg = QCryptoIVGenAlgorithm_str(luks_opts.ivgen_alg);
+    cipher_mode = QCryptoCipherMode_lookup[luks_opts.cipher_mode];
+    ivgen_alg = QCryptoIVGenAlgorithm_lookup[luks_opts.ivgen_alg];
    if (luks_opts.has_ivgen_hash_alg) {
-        ivgen_hash_alg = QCryptoHashAlgorithm_str(luks_opts.ivgen_hash_alg);
+        ivgen_hash_alg = QCryptoHashAlgorithm_lookup[luks_opts.ivgen_hash_alg];
        cipher_mode_spec = g_strdup_printf("%s-%s:%s", cipher_mode, ivgen_alg,
                                           ivgen_hash_alg);
    } else {
        cipher_mode_spec = g_strdup_printf("%s-%s", cipher_mode, ivgen_alg);
    }
-    hash_alg = QCryptoHashAlgorithm_str(luks_opts.hash_alg);
+    hash_alg = QCryptoHashAlgorithm_lookup[luks_opts.hash_alg];


    if (strlen(cipher_alg) >= QCRYPTO_BLOCK_LUKS_CIPHER_NAME_LEN) {
@@ -1241,9 +1246,8 @@ qcrypto_block_luks_create(QCryptoBlock *block,
                   QCRYPTO_BLOCK_LUKS_SECTOR_SIZE)) *
         QCRYPTO_BLOCK_LUKS_NUM_KEY_SLOTS);

-    block->sector_size = QCRYPTO_BLOCK_LUKS_SECTOR_SIZE;
    block->payload_offset = luks->header.payload_offset *
-        block->sector_size;
+        QCRYPTO_BLOCK_LUKS_SECTOR_SIZE;

    /* Reserve header space to match payload offset */
    initfunc(block, block->payload_offset, opaque, &local_err);
@@ -1399,33 +1403,29 @@ static void qcrypto_block_luks_cleanup(QCryptoBlock *block)

 static int
 qcrypto_block_luks_decrypt(QCryptoBlock *block,
-                           uint64_t offset,
+                           uint64_t startsector,
                           uint8_t *buf,
                           size_t len,
                           Error **errp)
 {
-    assert(QEMU_IS_ALIGNED(offset, QCRYPTO_BLOCK_LUKS_SECTOR_SIZE));
-    assert(QEMU_IS_ALIGNED(len, QCRYPTO_BLOCK_LUKS_SECTOR_SIZE));
    return qcrypto_block_decrypt_helper(block->cipher,
                                        block->niv, block->ivgen,
                                        QCRYPTO_BLOCK_LUKS_SECTOR_SIZE,
-                                        offset, buf, len, errp);
+                                        startsector, buf, len, errp);
 }


 static int
 qcrypto_block_luks_encrypt(QCryptoBlock *block,
-                           uint64_t offset,
+                           uint64_t startsector,
                           uint8_t *buf,
                           size_t len,
                           Error **errp)
 {
-    assert(QEMU_IS_ALIGNED(offset, QCRYPTO_BLOCK_LUKS_SECTOR_SIZE));
-    assert(QEMU_IS_ALIGNED(len, QCRYPTO_BLOCK_LUKS_SECTOR_SIZE));
    return qcrypto_block_encrypt_helper(block->cipher,
                                        block->niv, block->ivgen,
                                        QCRYPTO_BLOCK_LUKS_SECTOR_SIZE,
-                                        offset, buf, len, errp);
+                                        startsector, buf, len, errp);
 }


--- a/crypto/block-qcow.c
+++ b/crypto/block-qcow.c
@@ -80,7 +80,6 @@ qcrypto_block_qcow_init(QCryptoBlock *block,
        goto fail;
    }

-    block->sector_size = QCRYPTO_BLOCK_QCOW_SECTOR_SIZE;
    block->payload_offset = 0;

    return 0;
@@ -143,33 +142,29 @@ qcrypto_block_qcow_cleanup(QCryptoBlock *block)

 static int
 qcrypto_block_qcow_decrypt(QCryptoBlock *block,
-                           uint64_t offset,
+                           uint64_t startsector,
                           uint8_t *buf,
                           size_t len,
                           Error **errp)
 {
-    assert(QEMU_IS_ALIGNED(offset, QCRYPTO_BLOCK_QCOW_SECTOR_SIZE));
-    assert(QEMU_IS_ALIGNED(len, QCRYPTO_BLOCK_QCOW_SECTOR_SIZE));
    return qcrypto_block_decrypt_helper(block->cipher,
                                        block->niv, block->ivgen,
                                        QCRYPTO_BLOCK_QCOW_SECTOR_SIZE,
-                                        offset, buf, len, errp);
+                                        startsector, buf, len, errp);
 }


 static int
 qcrypto_block_qcow_encrypt(QCryptoBlock *block,
-                           uint64_t offset,
+                           uint64_t startsector,
                           uint8_t *buf,
                           size_t len,
                           Error **errp)
 {
-    assert(QEMU_IS_ALIGNED(offset, QCRYPTO_BLOCK_QCOW_SECTOR_SIZE));
-    assert(QEMU_IS_ALIGNED(len, QCRYPTO_BLOCK_QCOW_SECTOR_SIZE));
    return qcrypto_block_encrypt_helper(block->cipher,
                                        block->niv, block->ivgen,
                                        QCRYPTO_BLOCK_QCOW_SECTOR_SIZE,
-                                        offset, buf, len, errp);
+                                        startsector, buf, len, errp);
 }


--- a/crypto/block.c
+++ b/crypto/block.c
@@ -61,7 +61,7 @@ QCryptoBlock *qcrypto_block_open(QCryptoBlockOpenOptions *options,
    if (options->format >= G_N_ELEMENTS(qcrypto_block_drivers) ||
        !qcrypto_block_drivers[options->format]) {
        error_setg(errp, "Unsupported block driver %s",
-                   QCryptoBlockFormat_str(options->format));
+                   QCryptoBlockFormat_lookup[options->format]);
        g_free(block);
        return NULL;
    }
@@ -92,7 +92,7 @@ QCryptoBlock *qcrypto_block_create(QCryptoBlockCreateOptions *options,
    if (options->format >= G_N_ELEMENTS(qcrypto_block_drivers) ||
        !qcrypto_block_drivers[options->format]) {
        error_setg(errp, "Unsupported block driver %s",
-                   QCryptoBlockFormat_str(options->format));
+                   QCryptoBlockFormat_lookup[options->format]);
        g_free(block);
        return NULL;
    }
@@ -127,22 +127,22 @@ QCryptoBlockInfo *qcrypto_block_get_info(QCryptoBlock *block,


 int qcrypto_block_decrypt(QCryptoBlock *block,
-                          uint64_t offset,
+                          uint64_t startsector,
                          uint8_t *buf,
                          size_t len,
                          Error **errp)
 {
-    return block->driver->decrypt(block, offset, buf, len, errp);
+    return block->driver->decrypt(block, startsector, buf, len, errp);
 }


 int qcrypto_block_encrypt(QCryptoBlock *block,
-                          uint64_t offset,
+                          uint64_t startsector,
                          uint8_t *buf,
                          size_t len,
                          Error **errp)
 {
-    return block->driver->encrypt(block, offset, buf, len, errp);
+    return block->driver->encrypt(block, startsector, buf, len, errp);
 }


@@ -170,12 +170,6 @@ uint64_t qcrypto_block_get_payload_offset(QCryptoBlock *block)
 }


-uint64_t qcrypto_block_get_sector_size(QCryptoBlock *block)
-{
-    return block->sector_size;
-}
-
-
 void qcrypto_block_free(QCryptoBlock *block)
 {
    if (!block) {
@@ -194,17 +188,13 @@ int qcrypto_block_decrypt_helper(QCryptoCipher *cipher,
                                 size_t niv,
                                 QCryptoIVGen *ivgen,
                                 int sectorsize,
-                                 uint64_t offset,
+                                 uint64_t startsector,
                                 uint8_t *buf,
                                 size_t len,
                                 Error **errp)
 {
    uint8_t *iv;
    int ret = -1;
-    uint64_t startsector = offset / sectorsize;
-
-    assert(QEMU_IS_ALIGNED(offset, sectorsize));
-    assert(QEMU_IS_ALIGNED(len, sectorsize));

    iv = niv ? g_new0(uint8_t, niv) : NULL;

@@ -247,17 +237,13 @@ int qcrypto_block_encrypt_helper(QCryptoCipher *cipher,
                                 size_t niv,
                                 QCryptoIVGen *ivgen,
                                 int sectorsize,
-                                 uint64_t offset,
+                                 uint64_t startsector,
                                 uint8_t *buf,
                                 size_t len,
                                 Error **errp)
 {
    uint8_t *iv;
    int ret = -1;
-    uint64_t startsector = offset / sectorsize;
-
-    assert(QEMU_IS_ALIGNED(offset, sectorsize));
-    assert(QEMU_IS_ALIGNED(len, sectorsize));

    iv = niv ? g_new0(uint8_t, niv) : NULL;

--- a/crypto/blockpriv.h
+++ b/crypto/blockpriv.h
@@ -36,7 +36,6 @@ struct QCryptoBlock {
    QCryptoHashAlgorithm kdfhash;
    size_t niv;
    uint64_t payload_offset; /* In bytes */
-    uint64_t sector_size; /* In bytes */
 };

 struct QCryptoBlockDriver {
@@ -82,7 +81,7 @@ int qcrypto_block_decrypt_helper(QCryptoCipher *cipher,
                                 size_t niv,
                                 QCryptoIVGen *ivgen,
                                 int sectorsize,
-                                 uint64_t offset,
+                                 uint64_t startsector,
                                 uint8_t *buf,
                                 size_t len,
                                 Error **errp);
@@ -91,7 +90,7 @@ int qcrypto_block_encrypt_helper(QCryptoCipher *cipher,
                                 size_t niv,
                                 QCryptoIVGen *ivgen,
                                 int sectorsize,
-                                 uint64_t offset,
+                                 uint64_t startsector,
                                 uint8_t *buf,
                                 size_t len,
                                 Error **errp);
--- a/crypto/cipher-afalg.c
+++ b/crypto/cipher-afalg.c
@@ -52,7 +52,7 @@ qcrypto_afalg_cipher_format_name(QCryptoCipherAlgorithm alg,
        return NULL;
    }

-    mode_name = QCryptoCipherMode_str(mode);
+    mode_name = QCryptoCipherMode_lookup[mode];
    name = g_strdup_printf("%s(%s)", mode_name, alg_name);

    return name;
--- a/crypto/cipher-builtin.c
+++ b/crypto/cipher-builtin.c
@@ -247,7 +247,7 @@ qcrypto_cipher_init_aes(QCryptoCipherMode mode,
        mode != QCRYPTO_CIPHER_MODE_ECB &&
        mode != QCRYPTO_CIPHER_MODE_XTS) {
        error_setg(errp, "Unsupported cipher mode %s",
-                   QCryptoCipherMode_str(mode));
+                   QCryptoCipherMode_lookup[mode]);
        return NULL;
    }

@@ -379,7 +379,7 @@ qcrypto_cipher_init_des_rfb(QCryptoCipherMode mode,

    if (mode != QCRYPTO_CIPHER_MODE_ECB) {
        error_setg(errp, "Unsupported cipher mode %s",
-                   QCryptoCipherMode_str(mode));
+                   QCryptoCipherMode_lookup[mode]);
        return NULL;
    }

@@ -440,7 +440,7 @@ static QCryptoCipherBuiltin *qcrypto_cipher_ctx_new(QCryptoCipherAlgorithm alg,
        break;
    default:
        error_setg(errp, "Unsupported cipher mode %s",
-                   QCryptoCipherMode_str(mode));
+                   QCryptoCipherMode_lookup[mode]);
        return NULL;
    }

@@ -460,7 +460,7 @@ static QCryptoCipherBuiltin *qcrypto_cipher_ctx_new(QCryptoCipherAlgorithm alg,
    default:
        error_setg(errp,
                   "Unsupported cipher algorithm %s",
-                   QCryptoCipherAlgorithm_str(alg));
+                   QCryptoCipherAlgorithm_lookup[alg]);
        return NULL;
    }

--- a/crypto/cipher-gcrypt.c
+++ b/crypto/cipher-gcrypt.c
@@ -105,7 +105,7 @@ static QCryptoCipherGcrypt *qcrypto_cipher_ctx_new(QCryptoCipherAlgorithm alg,
        break;
    default:
        error_setg(errp, "Unsupported cipher mode %s",
-                   QCryptoCipherMode_str(mode));
+                   QCryptoCipherMode_lookup[mode]);
        return NULL;
    }

@@ -160,7 +160,7 @@ static QCryptoCipherGcrypt *qcrypto_cipher_ctx_new(QCryptoCipherAlgorithm alg,

    default:
        error_setg(errp, "Unsupported cipher algorithm %s",
-                   QCryptoCipherAlgorithm_str(alg));
+                   QCryptoCipherAlgorithm_lookup[alg]);
        return NULL;
    }

--- a/crypto/cipher-nettle.c
+++ b/crypto/cipher-nettle.c
@@ -281,7 +281,7 @@ static QCryptoCipherNettle *qcrypto_cipher_ctx_new(QCryptoCipherAlgorithm alg,
        break;
    default:
        error_setg(errp, "Unsupported cipher mode %s",
-                   QCryptoCipherMode_str(mode));
+                   QCryptoCipherMode_lookup[mode]);
        return NULL;
    }

@@ -420,7 +420,7 @@ static QCryptoCipherNettle *qcrypto_cipher_ctx_new(QCryptoCipherAlgorithm alg,

    default:
        error_setg(errp, "Unsupported cipher algorithm %s",
-                   QCryptoCipherAlgorithm_str(alg));
+                   QCryptoCipherAlgorithm_lookup[alg]);
        goto error;
    }

@@ -491,7 +491,7 @@ qcrypto_nettle_cipher_encrypt(QCryptoCipher *cipher,

    default:
        error_setg(errp, "Unsupported cipher mode %s",
-                   QCryptoCipherMode_str(cipher->mode));
+                   QCryptoCipherMode_lookup[cipher->mode]);
        return -1;
    }
    return 0;
@@ -537,7 +537,7 @@ qcrypto_nettle_cipher_decrypt(QCryptoCipher *cipher,

    default:
        error_setg(errp, "Unsupported cipher mode %s",
-                   QCryptoCipherMode_str(cipher->mode));
+                   QCryptoCipherMode_lookup[cipher->mode]);
        return -1;
    }
    return 0;
--- a/Show More
+++ b/Show More
@@ -1 +1 @@
 .11.2
 .10.2