console: fix console resize

Only skip surface reallocation in case the old surface was created using qemu_alloc_display (via qemu_create_displaysurface) too, otherwise we might end up with a DisplaySurface with the wrong backing storage. Cc: 1658634@bugs.launchpad.net Fixes: cd958edb1f Signed-off-by: Gerd Hoffmann <kraxel@redhat.com> Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com> Tested-by: Laszlo Ersek <lersek@redhat.com> Message-id: 1485256239-12219-1-git-send-email-kraxel@redhat.com
gtk: Hardcode LC_CTYPE as C.utf-8
2017-01-31 16:09:16 +01:00 · 2017-01-31 16:09:05 +01:00 · 2017-01-31 08:52:52 +01:00 · 2017-01-31 08:50:21 +01:00 · 2017-01-31 08:49:49 +01:00 · 2017-01-31 08:14:52 +01:00
1737 changed files with 90874 additions and 31720 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -40,6 +40,7 @@
 /qmp-marshal.c
 /qemu-doc.html
 /qemu-doc.info
 /qemu-doc.txt
 /qemu-img
 /qemu-nbd
 /qemu-options.def
@@ -60,7 +61,6 @@
 *.a
 *.aux
 *.cp
 *.dvi
 *.exe
 *.msi
 *.dll
@@ -82,10 +82,6 @@
 *.d
 !/scripts/qemu-guest-agent/fsfreeze-hook.d
 *.o
 *.lo
 *.la
 *.pc
 .libs
 .sdk
 *.gcda
 *.gcno
@@ -109,6 +105,15 @@
 /pc-bios/optionrom/kvmvapic.img
 /pc-bios/s390-ccw/s390-ccw.elf
 /pc-bios/s390-ccw/s390-ccw.img
 /docs/qemu-ga-ref.html
 /docs/qemu-ga-ref.txt
 /docs/qemu-qmp-ref.html
 /docs/qemu-qmp-ref.txt
 docs/qemu-ga-ref.info*
 docs/qemu-qmp-ref.info*
 /qemu-ga-qapi.texi
 /qemu-qapi.texi
 *.tps
 .stgit-*
 cscope.*
 tags
--- a/.gitmodules
+++ b/.gitmodules
@@ -31,3 +31,6 @@
 [submodule "roms/u-boot"]
 	path = roms/u-boot
 	url = git://git.qemu-project.org/u-boot.git
 [submodule "roms/skiboot"]
 	path = roms/skiboot
 	url = git://git.qemu.org/skiboot.git
--- a/.travis.yml
+++ b/.travis.yml
@@ -4,7 +4,6 @@ python:
  - "2.4"
 compiler:
  - gcc
  - clang
 cache: ccache
 addons:
  apt:
@@ -68,6 +67,9 @@ script:
  - make -j3 && ${TEST_CMD}
 matrix:
  include:
    # Test with CLang for compile portability
    - env: CONFIG=""
      compiler: clang
    # gprof/gcov are GCC features
    - env: CONFIG="--enable-gprof --enable-gcov --disable-pie"
      compiler: gcc
@@ -101,6 +103,26 @@ matrix:
        - sudo apt-get build-dep -qq qemu
        - wget -O - http://people.linaro.org/~alex.bennee/qemu-submodule-git-seed.tar.xz | tar -xvJ
        - git submodule update --init --recursive
    # Trusty build with latest stable clang
    - env: CONFIG=""
      sudo: required
      addons:
      dist: trusty
      language: generic
      compiler: none
      env:
        - COMPILER_NAME=clang CXX=clang++-3.9 CC=clang-3.9
        - CONFIG="--cc=clang-3.9 --cxx=clang++-3.9"
      before_install:
        - wget -nv -O - http://llvm.org/apt/llvm-snapshot.gpg.key | sudo apt-key add -
        - sudo apt-add-repository -y 'deb http://llvm.org/apt/trusty llvm-toolchain-trusty-3.9 main'
        - sudo apt-get update -qq
        - sudo apt-get install -qq -y clang-3.9
        - sudo apt-get build-dep -qq qemu
        - wget -O - http://people.linaro.org/~alex.bennee/qemu-submodule-git-seed.tar.xz | tar -xvJ
        - git submodule update --init --recursive
      before_script:
        - ./configure ${CONFIG} || cat config.log
    # Using newer GCC with sanitizers
    - addons:
        apt:
--- a/18
+++ b/18
@@ -1,10 +1,28 @@
 1. Preprocessor
 1.1. Variadic macros
 For variadic macros, stick with this C99-like syntax:
 #define DPRINTF(fmt, ...)                                       \
    do { printf("IRQ: " fmt, ## __VA_ARGS__); } while (0)
 1.2. Include directives
 Order include directives as follows:
 #include "qemu/osdep.h"  /* Always first... */
 #include <...>           /* then system headers... */
 #include "..."           /* and finally QEMU headers. */
 The "qemu/osdep.h" header contains preprocessor macros that affect the behavior
 of core system headers like <stdint.h>.  It must be the first include so that
 core system headers included by external libraries get the preprocessor macros
 that QEMU depends on.
 Do not include "qemu/osdep.h" from header files since the .c file will have
 already included it.
 2. C types
 It should be common sense to use the right type, but we have collected
--- a/180
+++ b/180
@@ -63,6 +63,17 @@ W: http://wiki.qemu.org/SecurityProcess
 M: Michael S. Tsirkin <mst@redhat.com>
 L: secalert@redhat.com
 Trivial patches
 ---------------
 Trivial patches
 M: Michael Tokarev <mjt@tls.msk.ru>
 M: Laurent Vivier <laurent@vivier.eu>
 S: Maintained
 L: qemu-trivial@nongnu.org
 K: ^Subject:.*(?i)trivial
 T: git git://git.corpit.ru/qemu.git trivial-patches
 T: git git://github.com/vivier/qemu.git trivial-patches
 Guest CPU cores (TCG):
 ----------------------
 Overall
@@ -95,7 +106,7 @@ F: include/fpu/
 Alpha
 M: Richard Henderson <rth@twiddle.net>
 S: Maintained
-F: target-alpha/
+F: target/alpha/
 F: hw/alpha/
 F: tests/tcg/alpha/
 F: disas/alpha.c
@@ -104,9 +115,10 @@ ARM
 M: Peter Maydell <peter.maydell@linaro.org>
 L: qemu-arm@nongnu.org
 S: Maintained
-F: target-arm/
+F: target/arm/
 F: hw/arm/
 F: hw/cpu/a*mpcore.c
 F: include/hw/cpu/a*mpcore.h
 F: disas/arm.c
 F: disas/arm-a64.cc
 F: disas/libvixl/
@@ -114,16 +126,22 @@ F: disas/libvixl/
 CRIS
 M: Edgar E. Iglesias <edgar.iglesias@gmail.com>
 S: Maintained
-F: target-cris/
+F: target/cris/
 F: hw/cris/
 F: include/hw/cris/
 F: tests/tcg/cris/
 F: disas/cris.c
 HPPA (PA-RISC)
 M: Richard Henderson <rth@twiddle.net>
 S: Maintained
 F: target/hppa/
 F: disas/hppa.c
 LM32
 M: Michael Walle <michael@walle.cc>
 S: Maintained
-F: target-lm32/
+F: target/lm32/
 F: disas/lm32.c
 F: hw/lm32/
 F: hw/*/lm32_*
@@ -133,14 +151,15 @@ F: include/hw/lm32/
 F: tests/tcg/lm32/
 M68K
-S: Orphan
+M: Laurent Vivier <laurent@vivier.eu>
-F: target-m68k/
+S: Maintained
-F: hw/m68k/
+F: target/m68k/
 F: disas/m68k.c
 MicroBlaze
 M: Edgar E. Iglesias <edgar.iglesias@gmail.com>
 S: Maintained
-F: target-microblaze/
+F: target/microblaze/
 F: hw/microblaze/
 F: disas/microblaze.c
@@ -148,7 +167,7 @@ MIPS
 M: Aurelien Jarno <aurelien@aurel32.net>
 M: Yongbok Kim <yongbok.kim@imgtec.com>
 S: Maintained
-F: target-mips/
+F: target/mips/
 F: hw/mips/
 F: hw/misc/mips_*
 F: hw/intc/mips_gic.c
@@ -163,15 +182,23 @@ F: disas/mips.c
 Moxie
 M: Anthony Green <green@moxielogic.com>
 S: Maintained
-F: target-moxie/
+F: target/moxie/
 F: disas/moxie.c
 F: hw/moxie/
 F: default-configs/moxie-softmmu.mak
 NiosII
 M: Chris Wulff <crwulff@gmail.com>
 M: Marek Vasut <marex@denx.de>
 S: Maintained
 F: target/nios2/
 F: hw/nios2/
 F: disas/nios2.c
 OpenRISC
 M: Jia Liu <proljc@gmail.com>
 S: Maintained
-F: target-openrisc/
+F: target/openrisc/
 F: hw/openrisc/
 F: tests/tcg/openrisc/
@@ -180,7 +207,7 @@ M: David Gibson <david@gibson.dropbear.id.au>
 M: Alexander Graf <agraf@suse.de>
 L: qemu-ppc@nongnu.org
 S: Maintained
-F: target-ppc/
+F: target/ppc/
 F: hw/ppc/
 F: include/hw/ppc/
 F: disas/ppc.c
@@ -189,14 +216,14 @@ S390
 M: Richard Henderson <rth@twiddle.net>
 M: Alexander Graf <agraf@suse.de>
 S: Maintained
-F: target-s390x/
+F: target/s390x/
 F: hw/s390x/
 F: disas/s390.c
 SH4
 M: Aurelien Jarno <aurelien@aurel32.net>
 S: Odd Fixes
-F: target-sh4/
+F: target/sh4/
 F: hw/sh4/
 F: disas/sh4.c
 F: include/hw/sh4/
@@ -205,7 +232,7 @@ SPARC
 M: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
 M: Artyom Tarasenko <atar4qemu@gmail.com>
 S: Maintained
-F: target-sparc/
+F: target/sparc/
 F: hw/sparc/
 F: hw/sparc64/
 F: disas/sparc.c
@@ -213,7 +240,7 @@ F: disas/sparc.c
 UniCore32
 M: Guan Xuetao <gxt@mprc.pku.edu.cn>
 S: Maintained
-F: target-unicore32/
+F: target/unicore32/
 F: hw/unicore32/
 F: include/hw/unicore32/
@@ -222,7 +249,7 @@ M: Paolo Bonzini <pbonzini@redhat.com>
 M: Richard Henderson <rth@twiddle.net>
 M: Eduardo Habkost <ehabkost@redhat.com>
 S: Maintained
-F: target-i386/
+F: target/i386/
 F: hw/i386/
 F: disas/i386.c
@@ -230,14 +257,14 @@ Xtensa
 M: Max Filippov <jcmvbkbc@gmail.com>
 W: http://wiki.osll.spb.ru/doku.php?id=etc:users:jcmvbkbc:qemu-target-xtensa
 S: Maintained
-F: target-xtensa/
+F: target/xtensa/
 F: hw/xtensa/
 F: tests/tcg/xtensa/
 TriCore
 M: Bastian Koppelmann <kbastian@mail.uni-paderborn.de>
 S: Maintained
-F: target-tricore/
+F: target/tricore/
 F: hw/tricore/
 F: include/hw/tricore/
@@ -256,26 +283,26 @@ ARM
 M: Peter Maydell <peter.maydell@linaro.org>
 L: qemu-arm@nongnu.org
 S: Maintained
-F: target-arm/kvm.c
+F: target/arm/kvm.c
 MIPS
 M: James Hogan <james.hogan@imgtec.com>
 S: Maintained
-F: target-mips/kvm.c
+F: target/mips/kvm.c
 PPC
 M: Alexander Graf <agraf@suse.de>
 S: Maintained
-F: target-ppc/kvm.c
+F: target/ppc/kvm.c
 S390
 M: Christian Borntraeger <borntraeger@de.ibm.com>
 M: Cornelia Huck <cornelia.huck@de.ibm.com>
 M: Alexander Graf <agraf@suse.de>
 S: Maintained
-F: target-s390x/kvm.c
+F: target/s390x/kvm.c
-F: target-s390x/ioinst.[ch]
+F: target/s390x/ioinst.[ch]
-F: target-s390x/machine.c
+F: target/s390x/machine.c
 F: hw/intc/s390_flic.c
 F: hw/intc/s390_flic_kvm.c
 F: include/hw/s390x/s390_flic.h
@@ -288,7 +315,7 @@ M: Paolo Bonzini <pbonzini@redhat.com>
 M: Marcelo Tosatti <mtosatti@redhat.com>
 L: kvm@vger.kernel.org
 S: Supported
-F: target-i386/kvm.c
+F: target/i386/kvm.c
 Guest CPU Cores (Xen):
 ----------------------
@@ -408,6 +435,7 @@ M: Peter Chubb <peter.chubb@nicta.com.au>
 L: qemu-arm@nongnu.org
 S: Odd fixes
 F: hw/*/imx*
 F: include/hw/*/imx*
 F: hw/arm/kzm.c
 F: include/hw/arm/fsl-imx31.h
@@ -416,6 +444,7 @@ M: Peter Maydell <peter.maydell@linaro.org>
 L: qemu-arm@nongnu.org
 S: Maintained
 F: hw/arm/integratorcp.c
 F: hw/misc/arm_integrator_debug.c
 Musicpal
 M: Jan Kiszka <jan.kiszka@web.de>
@@ -440,6 +469,7 @@ M: Peter Maydell <peter.maydell@linaro.org>
 L: qemu-arm@nongnu.org
 S: Maintained
 F: hw/arm/realview*
 F: hw/cpu/realview_mpcore.c
 F: hw/intc/realview_gic.c
 F: include/hw/intc/realview_gic.h
@@ -452,6 +482,7 @@ F: hw/arm/spitz.c
 F: hw/arm/tosa.c
 F: hw/arm/z2.c
 F: hw/*/pxa2xx*
 F: hw/misc/mst_fpga.c
 F: include/hw/arm/pxa.h
 Stellaris
@@ -473,7 +504,8 @@ L: qemu-arm@nongnu.org
 S: Maintained
 F: hw/*/xilinx_*
 F: hw/*/cadence_*
-F: hw/misc/zynq_slcr.c
+F: hw/misc/zynq*
 F: include/hw/misc/zynq*
 X: hw/ssi/xilinx_*
 Xilinx ZynqMP
@@ -490,7 +522,6 @@ M: Shannon Zhao <shannon.zhao@linaro.org>
 L: qemu-arm@nongnu.org
 S: Maintained
 F: hw/arm/virt-acpi-build.c
 F: include/hw/arm/virt-acpi-build.h
 STM32F205
 M: Alistair Francis <alistair@alistair23.me>
@@ -532,6 +563,7 @@ M68K Machines
 an5206
 S: Orphan
 F: hw/m68k/an5206.c
 F: hw/m68k/mcf5206.c
 dummy_m68k
 S: Orphan
@@ -540,6 +572,9 @@ F: hw/m68k/dummy_m68k.c
 mcf5208
 S: Orphan
 F: hw/m68k/mcf5208.c
 F: hw/m68k/mcf_intc.c
 F: hw/char/mcf_uart.c
 F: hw/net/mcf_fec.c
 MicroBlaze Machines
 -------------------
@@ -656,6 +691,7 @@ F: include/hw/*/xics*
 F: pc-bios/spapr-rtas/*
 F: pc-bios/spapr-rtas.bin
 F: pc-bios/slof.bin
 F: pc-bios/skiboot.lid
 F: docs/specs/ppc-spapr-hcalls.txt
 F: docs/specs/ppc-spapr-hotplug.txt
 F: tests/spapr*
@@ -691,6 +727,8 @@ S: Maintained
 F: hw/sparc/sun4m.c
 F: hw/dma/sparc32_dma.c
 F: hw/dma/sun4m_iommu.c
 F: hw/misc/eccmemctl.c
 F: hw/misc/slavio_misc.c
 F: include/hw/sparc/sparc32_dma.h
 F: include/hw/sparc/sun4m.h
 F: pc-bios/openbios-sparc32
@@ -701,6 +739,13 @@ S: Maintained
 F: hw/sparc64/sun4u.c
 F: pc-bios/openbios-sparc64
 Sun4v
 M: Artyom Tarasenko <atar4qemu@gmail.com>
 S: Maintained
 F: hw/sparc64/sun4v.c
 F: hw/timer/sun4v-rtc.c
 F: include/hw/timer/sun4v-rtc.h
 Leon3
 M: Fabien Chouteau <chouteau@adacore.com>
 S: Maintained
@@ -782,6 +827,7 @@ M: Eduardo Habkost <ehabkost@redhat.com>
 M: Marcel Apfelbaum <marcel@redhat.com>
 S: Supported
 F: hw/core/machine.c
 F: hw/core/null-machine.c
 F: include/hw/boards.h
 Xtensa Machines
@@ -860,7 +906,6 @@ F: hw/acpi/*
 F: hw/smbios/*
 F: hw/i386/acpi-build.[hc]
 F: hw/arm/virt-acpi-build.c
 F: include/hw/arm/virt-acpi-build.h
 ppc4xx
 M: Alexander Graf <agraf@suse.de>
@@ -1003,6 +1048,13 @@ F: include/sysemu/rng*.h
 F: backends/rng*.c
 F: tests/virtio-rng-test.c
 virtio-crypto
 M: Gonglei <arei.gonglei@huawei.com>
 S: Supported
 F: hw/virtio/virtio-crypto.c
 F: hw/virtio/virtio-crypto-pci.c
 F: include/hw/virtio/virtio-crypto.h
 nvme
 M: Keith Busch <keith.busch@intel.com>
 L: qemu-block@nongnu.org
@@ -1061,6 +1113,13 @@ S: Maintained
 F: hw/core/generic-loader.c
 F: include/hw/core/generic-loader.h
 CHRP NVRAM
 M: Thomas Huth <thuth@redhat.com>
 S: Maintained
 F: hw/nvram/chrp_nvram.c
 F: include/hw/nvram/chrp_nvram.h
 F: tests/prom-env-test.c
 Subsystems
 ----------
 Audio
@@ -1119,6 +1178,20 @@ F: block/qapi.c
 F: qapi/block*.json
 T: git git://repo.or.cz/qemu/armbru.git block-next
 Dirty Bitmaps
 M: Fam Zheng <famz@redhat.com>
 M: John Snow <jsnow@redhat.com>
 L: qemu-block@nongnu.org
 S: Supported
 F: util/hbitmap.c
 F: block/dirty-bitmap.c
 F: include/qemu/hbitmap.h
 F: include/block/dirty-bitmap.h
 F: tests/test-hbitmap.c
 F: docs/bitmaps.md
 T: git git://github.com/famz/qemu.git bitmaps
 T: git git://github.com/jnsnow/qemu.git bitmaps
 Character device backends
 M: Paolo Bonzini <pbonzini@redhat.com>
 S: Maintained
@@ -1248,6 +1321,12 @@ S: Maintained
 F: backends/hostmem*.c
 F: include/sysemu/hostmem.h
 Cryptodev Backends
 M: Gonglei <arei.gonglei@huawei.com>
 S: Maintained
 F: include/sysemu/cryptodev*.h
 F: backends/cryptodev*.c
 QAPI
 M: Markus Armbruster <armbru@redhat.com>
 M: Michael Roth <mdroth@linux.vnet.ibm.com>
@@ -1349,6 +1428,7 @@ F: scripts/checkpatch.pl
 Migration
 M: Juan Quintela <quintela@redhat.com>
 M: Amit Shah <amit.shah@redhat.com>
 M: Dr. David Alan Gilbert <dgilbert@redhat.com>
 S: Maintained
 F: include/migration/
 F: migration/
@@ -1413,6 +1493,14 @@ F: util/uuid.c
 F: include/qemu/uuid.h
 F: tests/test-uuid.c
 COLO Framework
 M: zhanghailiang <zhang.zhanghailiang@huawei.com>
 S: Maintained
 F: migration/colo*
 F: include/migration/colo.h
 F: include/migration/failover.h
 F: docs/COLO-FT.txt
 COLO Proxy
 M: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
 M: Li Zhijian <lizhijian@cn.fujitsu.com>
@@ -1429,6 +1517,7 @@ M: Riku Voipio <riku.voipio@iki.fi>
 S: Maintained
 F: thunk.c
 F: user-exec.c
 F: user-exec-stub.c
 BSD user
 S: Orphan
@@ -1483,8 +1572,8 @@ F: tcg/mips/
 F: disas/mips.c
 PPC
-M: Vassili Karpov (malc) <av1474@comtv.ru>
+M: Richard Henderson <rth@twiddle.net>
-S: Maintained
+S: Odd Fixes
 F: tcg/ppc/
 F: disas/ppc.c
@@ -1507,28 +1596,6 @@ F: tcg/tci/
 F: tci.c
 F: disas/tci.c
 Stable branches
 ---------------
 Stable 1.0
 L: qemu-stable@nongnu.org
 T: git git://git.qemu-project.org/qemu-stable-1.0.git
 S: Orphan
 Stable 0.15
 L: qemu-stable@nongnu.org
 T: git git://git.qemu-project.org/qemu-stable-0.15.git
 S: Orphan
 Stable 0.14
 L: qemu-stable@nongnu.org
 T: git git://git.qemu-project.org/qemu-stable-0.14.git
 S: Orphan
 Stable 0.10
 L: qemu-stable@nongnu.org
 T: git git://git.qemu-project.org/qemu-stable-0.10.git
 S: Orphan
 Block drivers
 -------------
 VMDK
@@ -1575,6 +1642,7 @@ M: Peter Lieven <pl@kamp.de>
 L: qemu-block@nongnu.org
 S: Supported
 F: block/iscsi.c
 F: block/iscsi-opts.c
 NFS
 M: Jeff Cody <jcody@redhat.com>
@@ -1675,9 +1743,9 @@ L: qemu-block@nongnu.org
 S: Supported
 F: block/linux-aio.c
 F: include/block/raw-aio.h
-F: block/raw-posix.c
+F: block/raw-format.c
-F: block/raw-win32.c
+F: block/file-posix.c
-F: block/raw_bsd.c
+F: block/file-win32.c
 F: block/win32-aio.c
 qcow2
--- a/108
+++ b/108
@@ -80,8 +80,8 @@ GENERATED_HEADERS += module_block.h
 Makefile: ;
 configure: ;
-.PHONY: all clean cscope distclean dvi html info install install-doc \
+.PHONY: all clean cscope distclean html info install install-doc \
-	pdf recurse-all speed test dist msi FORCE
+	pdf txt recurse-all speed test dist msi FORCE
 $(call set-vpath, $(SRC_PATH))
@@ -90,7 +90,9 @@ LIBS+=-lz $(LIBS_TOOLS)
 HELPERS-$(CONFIG_LINUX) = qemu-bridge-helper$(EXESUF)
 ifdef BUILD_DOCS
-DOCS=qemu-doc.html qemu.1 qemu-img.1 qemu-nbd.8 qemu-ga.8
+DOCS=qemu-doc.html qemu-doc.txt qemu.1 qemu-img.1 qemu-nbd.8 qemu-ga.8
 DOCS+=docs/qemu-qmp-ref.html docs/qemu-qmp-ref.txt docs/qemu-qmp-ref.7
 DOCS+=docs/qemu-ga-ref.html docs/qemu-ga-ref.txt docs/qemu-ga-ref.7
 ifdef CONFIG_VIRTFS
 DOCS+=fsdev/virtfs-proxy-helper.1
 endif
@@ -149,6 +151,7 @@ dummy := $(call unnest-vars,, \
                qga-obj-y \
                ivshmem-client-obj-y \
                ivshmem-server-obj-y \
                libvhost-user-obj-y \
                qga-vss-dll-obj-y \
                block-obj-y \
                block-obj-m \
@@ -231,12 +234,10 @@ ALL_SUBDIRS=$(TARGET_DIRS) $(patsubst %,pc-bios/%, $(ROMS))
 recurse-all: $(SUBDIR_RULES) $(ROMSUBDIR_RULES)
-$(BUILD_DIR)/version.o: $(SRC_PATH)/version.rc config-host.h | $(BUILD_DIR)/version.lo
+$(BUILD_DIR)/version.o: $(SRC_PATH)/version.rc config-host.h
 	$(call quiet-command,$(WINDRES) -I$(BUILD_DIR) -o $@ $<,"RC","version.o")
 $(BUILD_DIR)/version.lo: $(SRC_PATH)/version.rc config-host.h
 	$(call quiet-command,$(WINDRES) -I$(BUILD_DIR) -o $@ $<,"RC","version.lo")
-Makefile: $(version-obj-y) $(version-lobj-y)
+Makefile: $(version-obj-y)
 ######################################################################
 # Build libraries
@@ -266,6 +267,7 @@ qemu-ga$(EXESUF): QEMU_CFLAGS += -I qga/qapi-generated
 gen-out-type = $(subst .,-,$(suffix $@))
 qapi-py = $(SRC_PATH)/scripts/qapi.py $(SRC_PATH)/scripts/ordereddict.py
 qapi-py += $(SRC_PATH)/scripts/qapi2texi.py
 qga/qapi-generated/qga-qapi-types.c qga/qapi-generated/qga-qapi-types.h :\
 $(SRC_PATH)/qga/qapi-schema.json $(SRC_PATH)/scripts/qapi-types.py $(qapi-py)
@@ -358,10 +360,9 @@ clean:
 	rm -f config.mak op-i386.h opc-i386.h gen-op-i386.h op-arm.h opc-arm.h gen-op-arm.h
 	rm -f qemu-options.def
 	rm -f *.msi
-	find . \( -name '*.l[oa]' -o -name '*.so' -o -name '*.dll' -o -name '*.mo' -o -name '*.[oda]' \) -type f -exec rm {} +
+	find . \( -name '*.so' -o -name '*.dll' -o -name '*.mo' -o -name '*.[oda]' \) -type f -exec rm {} +
 	rm -f $(filter-out %.tlb,$(TOOLS)) $(HELPERS-y) qemu-ga TAGS cscope.* *.pod *~ */*~
 	rm -f fsdev/*.pod
 	rm -rf .libs */.libs
 	rm -f qemu-img-cmds.h
 	rm -f ui/shader/*-vert.h ui/shader/*-frag.h
 	@# May not be present in GENERATED_HEADERS
@@ -389,12 +390,17 @@ distclean: clean
 	rm -f config-all-devices.mak config-all-disas.mak config.status
 	rm -f po/*.mo tests/qemu-iotests/common.env
 	rm -f roms/seabios/config.mak roms/vgabios/config.mak
-	rm -f qemu-doc.info qemu-doc.aux qemu-doc.cp qemu-doc.cps qemu-doc.dvi
+	rm -f qemu-doc.info qemu-doc.aux qemu-doc.cp qemu-doc.cps
 	rm -f qemu-doc.fn qemu-doc.fns qemu-doc.info qemu-doc.ky qemu-doc.kys
 	rm -f qemu-doc.log qemu-doc.pdf qemu-doc.pg qemu-doc.toc qemu-doc.tp
-	rm -f qemu-doc.vr
+	rm -f qemu-doc.vr qemu-doc.txt
 	rm -f config.log
 	rm -f linux-headers/asm
 	rm -f qemu-ga-qapi.texi qemu-qapi.texi
 	rm -f docs/qemu-qmp-ref.7 docs/qemu-ga-ref.7
 	rm -f docs/qemu-qmp-ref.txt docs/qemu-ga-ref.txt
 	rm -f docs/qemu-qmp-ref.pdf docs/qemu-ga-ref.pdf
 	rm -f docs/qemu-qmp-ref.html docs/qemu-ga-ref.html
 	for d in $(TARGET_DIRS); do \
 	rm -rf $$d || exit 1 ; \
        done
@@ -421,7 +427,7 @@ qemu-icon.bmp qemu_logo_no_text.svg \
 bamboo.dtb petalogix-s3adsp1800.dtb petalogix-ml605.dtb \
 multiboot.bin linuxboot.bin linuxboot_dma.bin kvmvapic.bin \
 s390-ccw.img \
-spapr-rtas.bin slof.bin \
+spapr-rtas.bin slof.bin skiboot.lid \
 palcode-clipper \
 u-boot.e500
 else
@@ -431,10 +437,14 @@ endif
 install-doc: $(DOCS)
 	$(INSTALL_DIR) "$(DESTDIR)$(qemu_docdir)"
 	$(INSTALL_DATA) qemu-doc.html "$(DESTDIR)$(qemu_docdir)"
-	$(INSTALL_DATA) $(SRC_PATH)/docs/qmp-commands.txt "$(DESTDIR)$(qemu_docdir)"
+	$(INSTALL_DATA) qemu-doc.txt "$(DESTDIR)$(qemu_docdir)"
 	$(INSTALL_DATA) docs/qemu-qmp-ref.html "$(DESTDIR)$(qemu_docdir)"
 	$(INSTALL_DATA) docs/qemu-qmp-ref.txt "$(DESTDIR)$(qemu_docdir)"
 ifdef CONFIG_POSIX
 	$(INSTALL_DIR) "$(DESTDIR)$(mandir)/man1"
 	$(INSTALL_DATA) qemu.1 "$(DESTDIR)$(mandir)/man1"
 	$(INSTALL_DIR) "$(DESTDIR)$(mandir)/man7"
 	$(INSTALL_DATA) docs/qemu-qmp-ref.7 "$(DESTDIR)$(mandir)/man7"
 ifneq ($(TOOLS),)
 	$(INSTALL_DATA) qemu-img.1 "$(DESTDIR)$(mandir)/man1"
 	$(INSTALL_DIR) "$(DESTDIR)$(mandir)/man8"
@@ -442,6 +452,9 @@ ifneq ($(TOOLS),)
 endif
 ifneq (,$(findstring qemu-ga,$(TOOLS)))
 	$(INSTALL_DATA) qemu-ga.8 "$(DESTDIR)$(mandir)/man8"
 	$(INSTALL_DATA) docs/qemu-ga-ref.html "$(DESTDIR)$(qemu_docdir)"
 	$(INSTALL_DATA) docs/qemu-ga-ref.txt "$(DESTDIR)$(qemu_docdir)"
 	$(INSTALL_DATA) docs/qemu-ga-ref.7 "$(DESTDIR)$(mandir)/man7"
 endif
 endif
 ifdef CONFIG_VIRTFS
@@ -530,20 +543,22 @@ ui/console-gl.o: $(SRC_PATH)/ui/console-gl.c \
 # documentation
 MAKEINFO=makeinfo
-MAKEINFOFLAGS=--no-headers --no-split --number-sections
+MAKEINFOFLAGS=--no-split --number-sections -D 'VERSION $(VERSION)'
-TEXIFLAG=$(if $(V),,--quiet)
+TEXIFLAG=$(if $(V),,--quiet) --command='@set VERSION $(VERSION)'
 %.dvi: %.texi
 	$(call quiet-command,texi2dvi $(TEXIFLAG) -I . $<,"GEN","$@")
 %.html: %.texi
-	$(call quiet-command,LC_ALL=C $(MAKEINFO) $(MAKEINFOFLAGS) --html $< -o $@, \
+	$(call quiet-command,LC_ALL=C $(MAKEINFO) $(MAKEINFOFLAGS) --no-headers \
-	"GEN","$@")
+	--html $< -o $@,"GEN","$@")
 %.info: %.texi
-	$(call quiet-command,$(MAKEINFO) $< -o $@,"GEN","$@")
+	$(call quiet-command,$(MAKEINFO) $(MAKEINFOFLAGS) $< -o $@,"GEN","$@")
 %.txt: %.texi
 	$(call quiet-command,LC_ALL=C $(MAKEINFO) $(MAKEINFOFLAGS) --no-headers \
 	--plaintext $< -o $@,"GEN","$@")
 %.pdf: %.texi
-	$(call quiet-command,texi2pdf $(TEXIFLAG) -I . $<,"GEN","$@")
+	$(call quiet-command,texi2pdf $(TEXIFLAG) -I $(SRC_PATH) -I . $< -o $@,"GEN","$@")
 qemu-options.texi: $(SRC_PATH)/qemu-options.hx $(SRC_PATH)/scripts/hxtool
 	$(call quiet-command,sh $(SRC_PATH)/scripts/hxtool -t < $< > $@,"GEN","$@")
@@ -557,47 +572,36 @@ qemu-monitor-info.texi: $(SRC_PATH)/hmp-commands-info.hx $(SRC_PATH)/scripts/hxt
 qemu-img-cmds.texi: $(SRC_PATH)/qemu-img-cmds.hx $(SRC_PATH)/scripts/hxtool
 	$(call quiet-command,sh $(SRC_PATH)/scripts/hxtool -t < $< > $@,"GEN","$@")
 qemu-qapi.texi: $(qapi-modules) $(qapi-py)
 	$(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/qapi2texi.py $< > $@,"GEN","$@")
 qemu-ga-qapi.texi: $(SRC_PATH)/qga/qapi-schema.json $(qapi-py)
 	$(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/qapi2texi.py $< > $@,"GEN","$@")
 qemu.1: qemu-doc.texi qemu-options.texi qemu-monitor.texi qemu-monitor-info.texi
 	$(call quiet-command, \
 	  perl -Ww -- $(SRC_PATH)/scripts/texi2pod.pl $< qemu.pod && \
 	  $(POD2MAN) --section=1 --center=" " --release=" " qemu.pod > $@, \
 	  "GEN","$@")
 qemu.1: qemu-option-trace.texi
 qemu-img.1: qemu-img.texi qemu-option-trace.texi qemu-img-cmds.texi
 	$(call quiet-command, \
 	  perl -Ww -- $(SRC_PATH)/scripts/texi2pod.pl $< qemu-img.pod && \
 	  $(POD2MAN) --section=1 --center=" " --release=" " qemu-img.pod > $@, \
 	  "GEN","$@")
 fsdev/virtfs-proxy-helper.1: fsdev/virtfs-proxy-helper.texi
 	$(call quiet-command, \
 	  perl -Ww -- $(SRC_PATH)/scripts/texi2pod.pl $< fsdev/virtfs-proxy-helper.pod && \
 	  $(POD2MAN) --section=1 --center=" " --release=" " fsdev/virtfs-proxy-helper.pod > $@, \
 	  "GEN","$@")
 qemu-nbd.8: qemu-nbd.texi qemu-option-trace.texi
 	$(call quiet-command, \
 	  perl -Ww -- $(SRC_PATH)/scripts/texi2pod.pl $< qemu-nbd.pod && \
 	  $(POD2MAN) --section=8 --center=" " --release=" " qemu-nbd.pod > $@, \
 	  "GEN","$@")
 qemu-ga.8: qemu-ga.texi
 	$(call quiet-command, \
 	  perl -Ww -- $(SRC_PATH)/scripts/texi2pod.pl $< qemu-ga.pod && \
 	  $(POD2MAN) --section=8 --center=" " --release=" " qemu-ga.pod > $@, \
 	  "GEN","$@")
-dvi: qemu-doc.dvi
+html: qemu-doc.html docs/qemu-qmp-ref.html docs/qemu-ga-ref.html
-html: qemu-doc.html
+info: qemu-doc.info docs/qemu-qmp-ref.info docs/qemu-ga-ref.info
-info: qemu-doc.info
+pdf: qemu-doc.pdf docs/qemu-qmp-ref.pdf docs/qemu-ga-ref.pdf
-pdf: qemu-doc.pdf
+txt: qemu-doc.txt docs/qemu-qmp-ref.txt docs/qemu-ga-ref.txt
-qemu-doc.dvi qemu-doc.html qemu-doc.info qemu-doc.pdf: \
+qemu-doc.html qemu-doc.info qemu-doc.pdf qemu-doc.txt: \
 	qemu-img.texi qemu-nbd.texi qemu-options.texi qemu-option-trace.texi \
 	qemu-monitor.texi qemu-img-cmds.texi qemu-ga.texi \
 	qemu-monitor-info.texi
 docs/qemu-ga-ref.dvi docs/qemu-ga-ref.html docs/qemu-ga-ref.info docs/qemu-ga-ref.pdf docs/qemu-ga-ref.txt docs/qemu-ga-ref.7: \
 docs/qemu-ga-ref.texi qemu-ga-qapi.texi
 docs/qemu-qmp-ref.dvi docs/qemu-qmp-ref.html docs/qemu-qmp-ref.info docs/qemu-qmp-ref.pdf docs/qemu-qmp-ref.txt docs/qemu-qmp-ref.7: \
 docs/qemu-qmp-ref.texi qemu-qapi.texi
 ifdef CONFIG_WIN32
 INSTALLER = qemu-setup-$(VERSION)$(EXESUF)
@@ -690,12 +694,12 @@ help:
 	@echo  '  docker          - Help about targets running tests inside Docker containers'
 	@echo  ''
 	@echo  'Documentation targets:'
-	@echo  '  dvi html info pdf'
+	@echo  '  html info pdf txt'
 	@echo  '                  - Build documentation in specified format'
 	@echo  ''
 ifdef CONFIG_WIN32
 	@echo  'Windows targets:'
-	@echo  '  installer       - Build NSIS-based installer for qemu-ga'
+	@echo  '  installer       - Build NSIS-based installer for QEMU'
 ifdef QEMU_GA_MSI_ENABLED
 	@echo  '  msi             - Build MSI-based installer for qemu-ga'
 endif
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -89,7 +89,7 @@ endif
 #######################################################################
 # Target-independent parts used in system and user emulation
-common-obj-y += tcg-runtime.o cpus-common.o
+common-obj-y += cpus-common.o
 common-obj-y += hw/
 common-obj-y += qom/
 common-obj-y += disas/
@@ -97,7 +97,6 @@ common-obj-y += disas/
 ######################################################################
 # Resource file for Windows executables
 version-obj-$(CONFIG_WIN32) += $(BUILD_DIR)/version.o
 version-lobj-$(CONFIG_WIN32) += $(BUILD_DIR)/version.lo
 ######################################################################
 # tracing
@@ -116,7 +115,7 @@ qga-vss-dll-obj-y = qga/
 # contrib
 ivshmem-client-obj-y = contrib/ivshmem-client/
 ivshmem-server-obj-y = contrib/ivshmem-server/
-
+libvhost-user-obj-y = contrib/libvhost-user/
 ######################################################################
 trace-events-y = trace-events
@@ -155,9 +154,11 @@ trace-events-y += hw/alpha/trace-events
 trace-events-y += ui/trace-events
 trace-events-y += audio/trace-events
 trace-events-y += net/trace-events
-trace-events-y += target-i386/trace-events
+trace-events-y += target/arm/trace-events
-trace-events-y += target-sparc/trace-events
+trace-events-y += target/i386/trace-events
-trace-events-y += target-s390x/trace-events
+trace-events-y += target/sparc/trace-events
-trace-events-y += target-ppc/trace-events
+trace-events-y += target/s390x/trace-events
 trace-events-y += target/ppc/trace-events
 trace-events-y += qom/trace-events
 trace-events-y += linux-user/trace-events
 trace-events-y += qapi/trace-events
--- a/Makefile.target
+++ b/Makefile.target
@@ -11,7 +11,7 @@ $(call set-vpath, $(SRC_PATH):$(BUILD_DIR))
 ifdef CONFIG_LINUX
 QEMU_CFLAGS += -I../linux-headers
 endif
-QEMU_CFLAGS += -I.. -I$(SRC_PATH)/target-$(TARGET_BASE_ARCH) -DNEED_CPU_H
+QEMU_CFLAGS += -I.. -I$(SRC_PATH)/target/$(TARGET_BASE_ARCH) -DNEED_CPU_H
 QEMU_CFLAGS+=-I$(SRC_PATH)/include
@@ -76,6 +76,7 @@ $(QEMU_PROG)-simpletrace.stp: $(BUILD_DIR)/trace-events-all
 else
 stap:
 endif
 .PHONY: stap
 all: $(PROGS) stap
@@ -92,9 +93,11 @@ obj-$(CONFIG_TCG_INTERPRETER) += tci.o
 obj-y += tcg/tcg-common.o
 obj-$(CONFIG_TCG_INTERPRETER) += disas/tci.o
 obj-y += fpu/softfloat.o
-obj-y += target-$(TARGET_BASE_ARCH)/
+obj-y += target/$(TARGET_BASE_ARCH)/
 obj-y += disas.o
 obj-y += tcg-runtime.o
 obj-$(call notempty,$(TARGET_XML_FILES)) += gdbstub-xml.o
 obj-$(call lnot,$(CONFIG_HAX)) += hax-stub.o
 obj-$(call lnot,$(CONFIG_KVM)) += kvm-stub.o
 obj-$(CONFIG_LIBDECNUMBER) += libdecnumber/decContext.o
@@ -113,7 +116,7 @@ QEMU_CFLAGS+=-I$(SRC_PATH)/linux-user/$(TARGET_ABI_DIR) \
             -I$(SRC_PATH)/linux-user
 obj-y += linux-user/
-obj-y += gdbstub.o thunk.o user-exec.o
+obj-y += gdbstub.o thunk.o user-exec.o user-exec-stub.o
 endif #CONFIG_LINUX_USER
@@ -126,7 +129,7 @@ QEMU_CFLAGS+=-I$(SRC_PATH)/bsd-user -I$(SRC_PATH)/bsd-user/$(TARGET_ABI_DIR) \
 			 -I$(SRC_PATH)/bsd-user/$(HOST_VARIANT_DIR)
 obj-y += bsd-user/
-obj-y += gdbstub.o user-exec.o
+obj-y += gdbstub.o user-exec.o user-exec-stub.o
 endif #CONFIG_BSD_USER
--- a/1
+++ b/1
@@ -45,6 +45,7 @@ of other UNIX targets. The simple steps to build QEMU are:
 Additional information can also be found online via the QEMU website:
  http://qemu-project.org/Hosts/Linux
  http://qemu-project.org/Hosts/Mac
  http://qemu-project.org/Hosts/W32
--- a/2
+++ b/2
@@ -1 +1 @@
-2.7.50
+2.8.50
--- a/accel.c
+++ b/accel.c
@@ -33,7 +33,6 @@
 #include "sysemu/qtest.h"
 #include "hw/xen/xen.h"
 #include "qom/object.h"
 #include "hw/boards.h"
 int tcg_tb_size;
 static bool tcg_allowed = true;
--- a/aio-posix.c
+++ b/aio-posix.c
@@ -16,8 +16,10 @@
 #include "qemu/osdep.h"
 #include "qemu-common.h"
 #include "block/block.h"
-#include "qemu/queue.h"
+#include "qemu/rcu_queue.h"
 #include "qemu/sockets.h"
 #include "qemu/cutils.h"
 #include "trace.h"
 #ifdef CONFIG_EPOLL_CREATE1
 #include <sys/epoll.h>
 #endif
@@ -27,6 +29,9 @@ struct AioHandler
    GPollFD pfd;
    IOHandler *io_read;
    IOHandler *io_write;
    AioPollFn *io_poll;
    IOHandler *io_poll_begin;
    IOHandler *io_poll_end;
    int deleted;
    void *opaque;
    bool is_external;
@@ -61,7 +66,7 @@ static bool aio_epoll_try_enable(AioContext *ctx)
    AioHandler *node;
    struct epoll_event event;
-    QLIST_FOREACH(node, &ctx->aio_handlers, node) {
+    QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
        int r;
        if (node->deleted || !node->pfd.events) {
            continue;
@@ -81,29 +86,22 @@ static void aio_epoll_update(AioContext *ctx, AioHandler *node, bool is_new)
 {
    struct epoll_event event;
    int r;
    int ctl;
    if (!ctx->epoll_enabled) {
        return;
    }
    if (!node->pfd.events) {
-        r = epoll_ctl(ctx->epollfd, EPOLL_CTL_DEL, node->pfd.fd, &event);
+        ctl = EPOLL_CTL_DEL;
        if (r) {
            aio_epoll_disable(ctx);
        }
    } else {
        event.data.ptr = node;
        event.events = epoll_events_from_pfd(node->pfd.events);
-        if (is_new) {
+        ctl = is_new ? EPOLL_CTL_ADD : EPOLL_CTL_MOD;
-            r = epoll_ctl(ctx->epollfd, EPOLL_CTL_ADD, node->pfd.fd, &event);
+    }
-            if (r) {
+
-                aio_epoll_disable(ctx);
+    r = epoll_ctl(ctx->epollfd, ctl, node->pfd.fd, &event);
-            }
+    if (r) {
-        } else {
+        aio_epoll_disable(ctx);
            r = epoll_ctl(ctx->epollfd, EPOLL_CTL_MOD, node->pfd.fd, &event);
            if (r) {
                aio_epoll_disable(ctx);
            }
        }
    }
 }
@@ -207,45 +205,61 @@ void aio_set_fd_handler(AioContext *ctx,
                        bool is_external,
                        IOHandler *io_read,
                        IOHandler *io_write,
                        AioPollFn *io_poll,
                        void *opaque)
 {
    AioHandler *node;
    bool is_new = false;
    bool deleted = false;
    qemu_lockcnt_lock(&ctx->list_lock);
    node = find_aio_handler(ctx, fd);
    /* Are we deleting the fd handler? */
-    if (!io_read && !io_write) {
+    if (!io_read && !io_write && !io_poll) {
-        if (node) {
+        if (node == NULL) {
-            g_source_remove_poll(&ctx->source, &node->pfd);
+            qemu_lockcnt_unlock(&ctx->list_lock);
            return;
        }
-            /* If the lock is held, just mark the node as deleted */
+        g_source_remove_poll(&ctx->source, &node->pfd);
-            if (ctx->walking_handlers) {
+
-                node->deleted = 1;
+        /* If the lock is held, just mark the node as deleted */
-                node->pfd.revents = 0;
+        if (qemu_lockcnt_count(&ctx->list_lock)) {
-            } else {
+            node->deleted = 1;
-                /* Otherwise, delete it for real.  We can't just mark it as
+            node->pfd.revents = 0;
-                 * deleted because deleted nodes are only cleaned up after
+        } else {
-                 * releasing the walking_handlers lock.
+            /* Otherwise, delete it for real.  We can't just mark it as
-                 */
+             * deleted because deleted nodes are only cleaned up while
-                QLIST_REMOVE(node, node);
+             * no one is walking the handlers list.
-                deleted = true;
+             */
-            }
+            QLIST_REMOVE(node, node);
            deleted = true;
        }
        if (!node->io_poll) {
            ctx->poll_disable_cnt--;
        }
    } else {
        if (node == NULL) {
            /* Alloc and insert if it's not already there */
            node = g_new0(AioHandler, 1);
            node->pfd.fd = fd;
-            QLIST_INSERT_HEAD(&ctx->aio_handlers, node, node);
+            QLIST_INSERT_HEAD_RCU(&ctx->aio_handlers, node, node);
            g_source_add_poll(&ctx->source, &node->pfd);
            is_new = true;
            ctx->poll_disable_cnt += !io_poll;
        } else {
            ctx->poll_disable_cnt += !io_poll - !node->io_poll;
        }
        /* Update handler with latest information */
        node->io_read = io_read;
        node->io_write = io_write;
        node->io_poll = io_poll;
        node->opaque = opaque;
        node->is_external = is_external;
@@ -254,71 +268,132 @@ void aio_set_fd_handler(AioContext *ctx,
    }
    aio_epoll_update(ctx, node, is_new);
    qemu_lockcnt_unlock(&ctx->list_lock);
    aio_notify(ctx);
    if (deleted) {
        g_free(node);
    }
 }
 void aio_set_fd_poll(AioContext *ctx, int fd,
                     IOHandler *io_poll_begin,
                     IOHandler *io_poll_end)
 {
    AioHandler *node = find_aio_handler(ctx, fd);
    if (!node) {
        return;
    }
    node->io_poll_begin = io_poll_begin;
    node->io_poll_end = io_poll_end;
 }
 void aio_set_event_notifier(AioContext *ctx,
                            EventNotifier *notifier,
                            bool is_external,
-                            EventNotifierHandler *io_read)
+                            EventNotifierHandler *io_read,
                            AioPollFn *io_poll)
 {
-    aio_set_fd_handler(ctx, event_notifier_get_fd(notifier),
+    aio_set_fd_handler(ctx, event_notifier_get_fd(notifier), is_external,
-                       is_external, (IOHandler *)io_read, NULL, notifier);
+                       (IOHandler *)io_read, NULL, io_poll, notifier);
 }
 void aio_set_event_notifier_poll(AioContext *ctx,
                                 EventNotifier *notifier,
                                 EventNotifierHandler *io_poll_begin,
                                 EventNotifierHandler *io_poll_end)
 {
    aio_set_fd_poll(ctx, event_notifier_get_fd(notifier),
                    (IOHandler *)io_poll_begin,
                    (IOHandler *)io_poll_end);
 }
 static void poll_set_started(AioContext *ctx, bool started)
 {
    AioHandler *node;
    if (started == ctx->poll_started) {
        return;
    }
    ctx->poll_started = started;
    qemu_lockcnt_inc(&ctx->list_lock);
    QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
        IOHandler *fn;
        if (node->deleted) {
            continue;
        }
        if (started) {
            fn = node->io_poll_begin;
        } else {
            fn = node->io_poll_end;
        }
        if (fn) {
            fn(node->opaque);
        }
    }
    qemu_lockcnt_dec(&ctx->list_lock);
 }
 bool aio_prepare(AioContext *ctx)
 {
    /* Poll mode cannot be used with glib's event loop, disable it. */
    poll_set_started(ctx, false);
    return false;
 }
 bool aio_pending(AioContext *ctx)
 {
    AioHandler *node;
-
+    bool result = false;
    QLIST_FOREACH(node, &ctx->aio_handlers, node) {
        int revents;
        revents = node->pfd.revents & node->pfd.events;
        if (revents & (G_IO_IN | G_IO_HUP | G_IO_ERR) && node->io_read &&
            aio_node_check(ctx, node->is_external)) {
            return true;
        }
        if (revents & (G_IO_OUT | G_IO_ERR) && node->io_write &&
            aio_node_check(ctx, node->is_external)) {
            return true;
        }
    }
    return false;
 }
 bool aio_dispatch(AioContext *ctx)
 {
    AioHandler *node;
    bool progress = false;
    /*
     * If there are callbacks left that have been queued, we need to call them.
     * Do not call select in this case, because it is possible that the caller
     * does not need a complete flush (as is the case for aio_poll loops).
     */
    if (aio_bh_poll(ctx)) {
        progress = true;
    }
    /*
     * We have to walk very carefully in case aio_set_fd_handler is
     * called while we're walking.
     */
-    node = QLIST_FIRST(&ctx->aio_handlers);
+    qemu_lockcnt_inc(&ctx->list_lock);
-    while (node) {
+
-        AioHandler *tmp;
+    QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
        int revents;
-        ctx->walking_handlers++;
+        revents = node->pfd.revents & node->pfd.events;
        if (revents & (G_IO_IN | G_IO_HUP | G_IO_ERR) && node->io_read &&
            aio_node_check(ctx, node->is_external)) {
            result = true;
            break;
        }
        if (revents & (G_IO_OUT | G_IO_ERR) && node->io_write &&
            aio_node_check(ctx, node->is_external)) {
            result = true;
            break;
        }
    }
    qemu_lockcnt_dec(&ctx->list_lock);
    return result;
 }
 static bool aio_dispatch_handlers(AioContext *ctx)
 {
    AioHandler *node, *tmp;
    bool progress = false;
    /*
     * We have to walk very carefully in case aio_set_fd_handler is
     * called while we're walking.
     */
    qemu_lockcnt_inc(&ctx->list_lock);
    QLIST_FOREACH_SAFE_RCU(node, &ctx->aio_handlers, node, tmp) {
        int revents;
        revents = node->pfd.revents & node->pfd.events;
        node->pfd.revents = 0;
@@ -342,17 +417,38 @@ bool aio_dispatch(AioContext *ctx)
            progress = true;
        }
-        tmp = node;
+        if (node->deleted) {
-        node = QLIST_NEXT(node, node);
+            if (qemu_lockcnt_dec_if_lock(&ctx->list_lock)) {
-
+                QLIST_REMOVE(node, node);
-        ctx->walking_handlers--;
+                g_free(node);
-
+                qemu_lockcnt_inc_and_unlock(&ctx->list_lock);
-        if (!ctx->walking_handlers && tmp->deleted) {
+            }
            QLIST_REMOVE(tmp, node);
            g_free(tmp);
        }
    }
    qemu_lockcnt_dec(&ctx->list_lock);
    return progress;
 }
 /*
 * Note that dispatch_fds == false has the side-effect of post-poning the
 * freeing of deleted handlers.
 */
 bool aio_dispatch(AioContext *ctx, bool dispatch_fds)
 {
    bool progress;
    /*
     * If there are callbacks left that have been queued, we need to call them.
     * Do not call select in this case, because it is possible that the caller
     * does not need a complete flush (as is the case for aio_poll loops).
     */
    progress = aio_bh_poll(ctx);
    if (dispatch_fds) {
        progress |= aio_dispatch_handlers(ctx);
    }
    /* Run our timers */
    progress |= timerlistgroup_run_timers(&ctx->tlg);
@@ -405,12 +501,101 @@ static void add_pollfd(AioHandler *node)
    npfd++;
 }
 static bool run_poll_handlers_once(AioContext *ctx)
 {
    bool progress = false;
    AioHandler *node;
    QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
        if (!node->deleted && node->io_poll &&
            aio_node_check(ctx, node->is_external) &&
            node->io_poll(node->opaque)) {
            progress = true;
        }
        /* Caller handles freeing deleted nodes.  Don't do it here. */
    }
    return progress;
 }
 /* run_poll_handlers:
 * @ctx: the AioContext
 * @max_ns: maximum time to poll for, in nanoseconds
 *
 * Polls for a given time.
 *
 * Note that ctx->notify_me must be non-zero so this function can detect
 * aio_notify().
 *
 * Note that the caller must have incremented ctx->list_lock.
 *
 * Returns: true if progress was made, false otherwise
 */
 static bool run_poll_handlers(AioContext *ctx, int64_t max_ns)
 {
    bool progress;
    int64_t end_time;
    assert(ctx->notify_me);
    assert(qemu_lockcnt_count(&ctx->list_lock) > 0);
    assert(ctx->poll_disable_cnt == 0);
    trace_run_poll_handlers_begin(ctx, max_ns);
    end_time = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + max_ns;
    do {
        progress = run_poll_handlers_once(ctx);
    } while (!progress && qemu_clock_get_ns(QEMU_CLOCK_REALTIME) < end_time);
    trace_run_poll_handlers_end(ctx, progress);
    return progress;
 }
 /* try_poll_mode:
 * @ctx: the AioContext
 * @blocking: busy polling is only attempted when blocking is true
 *
 * ctx->notify_me must be non-zero so this function can detect aio_notify().
 *
 * Note that the caller must have incremented ctx->list_lock.
 *
 * Returns: true if progress was made, false otherwise
 */
 static bool try_poll_mode(AioContext *ctx, bool blocking)
 {
    if (blocking && ctx->poll_max_ns && ctx->poll_disable_cnt == 0) {
        /* See qemu_soonest_timeout() uint64_t hack */
        int64_t max_ns = MIN((uint64_t)aio_compute_timeout(ctx),
                             (uint64_t)ctx->poll_ns);
        if (max_ns) {
            poll_set_started(ctx, true);
            if (run_poll_handlers(ctx, max_ns)) {
                return true;
            }
        }
    }
    poll_set_started(ctx, false);
    /* Even if we don't run busy polling, try polling once in case it can make
     * progress and the caller will be able to avoid ppoll(2)/epoll_wait(2).
     */
    return run_poll_handlers_once(ctx);
 }
 bool aio_poll(AioContext *ctx, bool blocking)
 {
    AioHandler *node;
-    int i, ret;
+    int i;
    int ret = 0;
    bool progress;
    int64_t timeout;
    int64_t start = 0;
    aio_context_acquire(ctx);
    progress = false;
@@ -426,43 +611,93 @@ bool aio_poll(AioContext *ctx, bool blocking)
        atomic_add(&ctx->notify_me, 2);
    }
-    ctx->walking_handlers++;
+    qemu_lockcnt_inc(&ctx->list_lock);
-    assert(npfd == 0);
+    if (ctx->poll_max_ns) {
        start = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
    }
-    /* fill pollfds */
+    if (try_poll_mode(ctx, blocking)) {
        progress = true;
    } else {
        assert(npfd == 0);
-    if (!aio_epoll_enabled(ctx)) {
+        /* fill pollfds */
-        QLIST_FOREACH(node, &ctx->aio_handlers, node) {
+
-            if (!node->deleted && node->pfd.events
+        if (!aio_epoll_enabled(ctx)) {
-                && aio_node_check(ctx, node->is_external)) {
+            QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
-                add_pollfd(node);
+                if (!node->deleted && node->pfd.events
                    && aio_node_check(ctx, node->is_external)) {
                    add_pollfd(node);
                }
            }
        }
        timeout = blocking ? aio_compute_timeout(ctx) : 0;
        /* wait until next event */
        if (timeout) {
            aio_context_release(ctx);
        }
        if (aio_epoll_check_poll(ctx, pollfds, npfd, timeout)) {
            AioHandler epoll_handler;
            epoll_handler.pfd.fd = ctx->epollfd;
            epoll_handler.pfd.events = G_IO_IN | G_IO_OUT | G_IO_HUP | G_IO_ERR;
            npfd = 0;
            add_pollfd(&epoll_handler);
            ret = aio_epoll(ctx, pollfds, npfd, timeout);
        } else  {
            ret = qemu_poll_ns(pollfds, npfd, timeout);
        }
        if (timeout) {
            aio_context_acquire(ctx);
        }
    }
    timeout = blocking ? aio_compute_timeout(ctx) : 0;
    /* wait until next event */
    if (timeout) {
        aio_context_release(ctx);
    }
    if (aio_epoll_check_poll(ctx, pollfds, npfd, timeout)) {
        AioHandler epoll_handler;
        epoll_handler.pfd.fd = ctx->epollfd;
        epoll_handler.pfd.events = G_IO_IN | G_IO_OUT | G_IO_HUP | G_IO_ERR;
        npfd = 0;
        add_pollfd(&epoll_handler);
        ret = aio_epoll(ctx, pollfds, npfd, timeout);
    } else  {
        ret = qemu_poll_ns(pollfds, npfd, timeout);
    }
    if (blocking) {
        atomic_sub(&ctx->notify_me, 2);
    }
-    if (timeout) {
+
-        aio_context_acquire(ctx);
+    /* Adjust polling time */
    if (ctx->poll_max_ns) {
        int64_t block_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - start;
        if (block_ns <= ctx->poll_ns) {
            /* This is the sweet spot, no adjustment needed */
        } else if (block_ns > ctx->poll_max_ns) {
            /* We'd have to poll for too long, poll less */
            int64_t old = ctx->poll_ns;
            if (ctx->poll_shrink) {
                ctx->poll_ns /= ctx->poll_shrink;
            } else {
                ctx->poll_ns = 0;
            }
            trace_poll_shrink(ctx, old, ctx->poll_ns);
        } else if (ctx->poll_ns < ctx->poll_max_ns &&
                   block_ns < ctx->poll_max_ns) {
            /* There is room to grow, poll longer */
            int64_t old = ctx->poll_ns;
            int64_t grow = ctx->poll_grow;
            if (grow == 0) {
                grow = 2;
            }
            if (ctx->poll_ns) {
                ctx->poll_ns *= grow;
            } else {
                ctx->poll_ns = 4000; /* start polling at 4 microseconds */
            }
            if (ctx->poll_ns > ctx->poll_max_ns) {
                ctx->poll_ns = ctx->poll_max_ns;
            }
            trace_poll_grow(ctx, old, ctx->poll_ns);
        }
    }
    aio_notify_accept(ctx);
@@ -475,10 +710,10 @@ bool aio_poll(AioContext *ctx, bool blocking)
    }
    npfd = 0;
-    ctx->walking_handlers--;
+    qemu_lockcnt_dec(&ctx->list_lock);
    /* Run dispatch even if there were no readable fds to run timers */
-    if (aio_dispatch(ctx)) {
+    if (aio_dispatch(ctx, ret > 0)) {
        progress = true;
    }
@@ -489,6 +724,13 @@ bool aio_poll(AioContext *ctx, bool blocking)
 void aio_context_setup(AioContext *ctx)
 {
    /* TODO remove this in final patch submission */
    if (getenv("QEMU_AIO_POLL_MAX_NS")) {
        fprintf(stderr, "The QEMU_AIO_POLL_MAX_NS environment variable has "
                "been replaced with -object iothread,poll-max-ns=NUM\n");
        exit(1);
    }
 #ifdef CONFIG_EPOLL_CREATE1
    assert(!ctx->epollfd);
    ctx->epollfd = epoll_create1(EPOLL_CLOEXEC);
@@ -500,3 +742,17 @@ void aio_context_setup(AioContext *ctx)
    }
 #endif
 }
 void aio_context_set_poll_params(AioContext *ctx, int64_t max_ns,
                                 int64_t grow, int64_t shrink, Error **errp)
 {
    /* No thread synchronization here, it doesn't matter if an incorrect value
     * is used once.
     */
    ctx->poll_max_ns = max_ns;
    ctx->poll_ns = 0;
    ctx->poll_grow = grow;
    ctx->poll_shrink = shrink;
    aio_notify(ctx);
 }
--- a/aio-win32.c
+++ b/aio-win32.c
@@ -20,6 +20,8 @@
 #include "block/block.h"
 #include "qemu/queue.h"
 #include "qemu/sockets.h"
 #include "qapi/error.h"
 #include "qemu/rcu_queue.h"
 struct AioHandler {
    EventNotifier *e;
@@ -38,11 +40,13 @@ void aio_set_fd_handler(AioContext *ctx,
                        bool is_external,
                        IOHandler *io_read,
                        IOHandler *io_write,
                        AioPollFn *io_poll,
                        void *opaque)
 {
    /* fd is a SOCKET in our case */
    AioHandler *node;
    qemu_lockcnt_lock(&ctx->list_lock);
    QLIST_FOREACH(node, &ctx->aio_handlers, node) {
        if (node->pfd.fd == fd && !node->deleted) {
            break;
@@ -52,14 +56,14 @@ void aio_set_fd_handler(AioContext *ctx,
    /* Are we deleting the fd handler? */
    if (!io_read && !io_write) {
        if (node) {
-            /* If the lock is held, just mark the node as deleted */
+            /* If aio_poll is in progress, just mark the node as deleted */
-            if (ctx->walking_handlers) {
+            if (qemu_lockcnt_count(&ctx->list_lock)) {
                node->deleted = 1;
                node->pfd.revents = 0;
            } else {
                /* Otherwise, delete it for real.  We can't just mark it as
                 * deleted because deleted nodes are only cleaned up after
-                 * releasing the walking_handlers lock.
+                 * releasing the list_lock.
                 */
                QLIST_REMOVE(node, node);
                g_free(node);
@@ -72,7 +76,7 @@ void aio_set_fd_handler(AioContext *ctx,
            /* Alloc and insert if it's not already there */
            node = g_new0(AioHandler, 1);
            node->pfd.fd = fd;
-            QLIST_INSERT_HEAD(&ctx->aio_handlers, node, node);
+            QLIST_INSERT_HEAD_RCU(&ctx->aio_handlers, node, node);
        }
        node->pfd.events = 0;
@@ -97,16 +101,26 @@ void aio_set_fd_handler(AioContext *ctx,
                       FD_CONNECT | FD_WRITE | FD_OOB);
    }
    qemu_lockcnt_unlock(&ctx->list_lock);
    aio_notify(ctx);
 }
 void aio_set_fd_poll(AioContext *ctx, int fd,
                     IOHandler *io_poll_begin,
                     IOHandler *io_poll_end)
 {
    /* Not implemented */
 }
 void aio_set_event_notifier(AioContext *ctx,
                            EventNotifier *e,
                            bool is_external,
-                            EventNotifierHandler *io_notify)
+                            EventNotifierHandler *io_notify,
                            AioPollFn *io_poll)
 {
    AioHandler *node;
    qemu_lockcnt_lock(&ctx->list_lock);
    QLIST_FOREACH(node, &ctx->aio_handlers, node) {
        if (node->e == e && !node->deleted) {
            break;
@@ -118,14 +132,14 @@ void aio_set_event_notifier(AioContext *ctx,
        if (node) {
            g_source_remove_poll(&ctx->source, &node->pfd);
-            /* If the lock is held, just mark the node as deleted */
+            /* aio_poll is in progress, just mark the node as deleted */
-            if (ctx->walking_handlers) {
+            if (qemu_lockcnt_count(&ctx->list_lock)) {
                node->deleted = 1;
                node->pfd.revents = 0;
            } else {
                /* Otherwise, delete it for real.  We can't just mark it as
                 * deleted because deleted nodes are only cleaned up after
-                 * releasing the walking_handlers lock.
+                 * releasing the list_lock.
                 */
                QLIST_REMOVE(node, node);
                g_free(node);
@@ -139,7 +153,7 @@ void aio_set_event_notifier(AioContext *ctx,
            node->pfd.fd = (uintptr_t)event_notifier_get_handle(e);
            node->pfd.events = G_IO_IN;
            node->is_external = is_external;
-            QLIST_INSERT_HEAD(&ctx->aio_handlers, node, node);
+            QLIST_INSERT_HEAD_RCU(&ctx->aio_handlers, node, node);
            g_source_add_poll(&ctx->source, &node->pfd);
        }
@@ -147,9 +161,18 @@ void aio_set_event_notifier(AioContext *ctx,
        node->io_notify = io_notify;
    }
    qemu_lockcnt_unlock(&ctx->list_lock);
    aio_notify(ctx);
 }
 void aio_set_event_notifier_poll(AioContext *ctx,
                                 EventNotifier *notifier,
                                 EventNotifierHandler *io_poll_begin,
                                 EventNotifierHandler *io_poll_end)
 {
    /* Not implemented */
 }
 bool aio_prepare(AioContext *ctx)
 {
    static struct timeval tv0;
@@ -157,10 +180,16 @@ bool aio_prepare(AioContext *ctx)
    bool have_select_revents = false;
    fd_set rfds, wfds;
    /*
     * We have to walk very carefully in case aio_set_fd_handler is
     * called while we're walking.
     */
    qemu_lockcnt_inc(&ctx->list_lock);
    /* fill fd sets */
    FD_ZERO(&rfds);
    FD_ZERO(&wfds);
-    QLIST_FOREACH(node, &ctx->aio_handlers, node) {
+    QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
        if (node->io_read) {
            FD_SET ((SOCKET)node->pfd.fd, &rfds);
        }
@@ -170,7 +199,7 @@ bool aio_prepare(AioContext *ctx)
    }
    if (select(0, &rfds, &wfds, NULL, &tv0) > 0) {
-        QLIST_FOREACH(node, &ctx->aio_handlers, node) {
+        QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
            node->pfd.revents = 0;
            if (FD_ISSET(node->pfd.fd, &rfds)) {
                node->pfd.revents |= G_IO_IN;
@@ -184,45 +213,55 @@ bool aio_prepare(AioContext *ctx)
        }
    }
    qemu_lockcnt_dec(&ctx->list_lock);
    return have_select_revents;
 }
 bool aio_pending(AioContext *ctx)
 {
    AioHandler *node;
    bool result = false;
-    QLIST_FOREACH(node, &ctx->aio_handlers, node) {
+    /*
     * We have to walk very carefully in case aio_set_fd_handler is
     * called while we're walking.
     */
    qemu_lockcnt_inc(&ctx->list_lock);
    QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
        if (node->pfd.revents && node->io_notify) {
-            return true;
+            result = true;
            break;
        }
        if ((node->pfd.revents & G_IO_IN) && node->io_read) {
-            return true;
+            result = true;
            break;
        }
        if ((node->pfd.revents & G_IO_OUT) && node->io_write) {
-            return true;
+            result = true;
            break;
        }
    }
-    return false;
+    qemu_lockcnt_dec(&ctx->list_lock);
    return result;
 }
 static bool aio_dispatch_handlers(AioContext *ctx, HANDLE event)
 {
    AioHandler *node;
    bool progress = false;
    AioHandler *tmp;
    qemu_lockcnt_inc(&ctx->list_lock);
    /*
     * We have to walk very carefully in case aio_set_fd_handler is
     * called while we're walking.
     */
-    node = QLIST_FIRST(&ctx->aio_handlers);
+    QLIST_FOREACH_SAFE_RCU(node, &ctx->aio_handlers, node, tmp) {
    while (node) {
        AioHandler *tmp;
        int revents = node->pfd.revents;
        ctx->walking_handlers++;
        if (!node->deleted &&
            (revents || event_notifier_get_handle(node->e) == event) &&
            node->io_notify) {
@@ -257,26 +296,27 @@ static bool aio_dispatch_handlers(AioContext *ctx, HANDLE event)
            }
        }
-        tmp = node;
+        if (node->deleted) {
-        node = QLIST_NEXT(node, node);
+            if (qemu_lockcnt_dec_if_lock(&ctx->list_lock)) {
-
+                QLIST_REMOVE(node, node);
-        ctx->walking_handlers--;
+                g_free(node);
-
+                qemu_lockcnt_inc_and_unlock(&ctx->list_lock);
-        if (!ctx->walking_handlers && tmp->deleted) {
+            }
            QLIST_REMOVE(tmp, node);
            g_free(tmp);
        }
    }
    qemu_lockcnt_dec(&ctx->list_lock);
    return progress;
 }
-bool aio_dispatch(AioContext *ctx)
+bool aio_dispatch(AioContext *ctx, bool dispatch_fds)
 {
    bool progress;
    progress = aio_bh_poll(ctx);
-    progress |= aio_dispatch_handlers(ctx, INVALID_HANDLE_VALUE);
+    if (dispatch_fds) {
        progress |= aio_dispatch_handlers(ctx, INVALID_HANDLE_VALUE);
    }
    progress |= timerlistgroup_run_timers(&ctx->tlg);
    return progress;
 }
@@ -303,20 +343,19 @@ bool aio_poll(AioContext *ctx, bool blocking)
        atomic_add(&ctx->notify_me, 2);
    }
    qemu_lockcnt_inc(&ctx->list_lock);
    have_select_revents = aio_prepare(ctx);
    ctx->walking_handlers++;
    /* fill fd sets */
    count = 0;
-    QLIST_FOREACH(node, &ctx->aio_handlers, node) {
+    QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
        if (!node->deleted && node->io_notify
            && aio_node_check(ctx, node->is_external)) {
            events[count++] = event_notifier_get_handle(node->e);
        }
    }
-    ctx->walking_handlers--;
+    qemu_lockcnt_dec(&ctx->list_lock);
    first = true;
    /* ctx->notifier is always registered.  */
@@ -374,3 +413,9 @@ bool aio_poll(AioContext *ctx, bool blocking)
 void aio_context_setup(AioContext *ctx)
 {
 }
 void aio_context_set_poll_params(AioContext *ctx, int64_t max_ns,
                                 int64_t grow, int64_t shrink, Error **errp)
 {
    error_setg(errp, "AioContext polling is not implemented on Windows");
 }
--- a/arch_init.c
+++ b/arch_init.c
@@ -28,7 +28,6 @@
 #include "sysemu/arch_init.h"
 #include "hw/pci/pci.h"
 #include "hw/audio/audio.h"
 #include "hw/smbios/smbios.h"
 #include "qemu/config-file.h"
 #include "qemu/error-report.h"
 #include "qmp-commands.h"
@@ -64,6 +63,8 @@ int graphic_depth = 32;
 #define QEMU_ARCH QEMU_ARCH_MIPS
 #elif defined(TARGET_MOXIE)
 #define QEMU_ARCH QEMU_ARCH_MOXIE
 #elif defined(TARGET_NIOS2)
 #define QEMU_ARCH QEMU_ARCH_NIOS2
 #elif defined(TARGET_OPENRISC)
 #define QEMU_ARCH QEMU_ARCH_OPENRISC
 #elif defined(TARGET_PPC)
@@ -84,33 +85,6 @@ int graphic_depth = 32;
 const uint32_t arch_type = QEMU_ARCH;
 static struct defconfig_file {
    const char *filename;
    /* Indicates it is an user config file (disabled by -no-user-config) */
    bool userconfig;
 } default_config_files[] = {
    { CONFIG_QEMU_CONFDIR "/qemu.conf",                   true },
    { NULL }, /* end of list */
 };
 int qemu_read_default_config_files(bool userconfig)
 {
    int ret;
    struct defconfig_file *f;
    for (f = default_config_files; f->filename; f++) {
        if (!userconfig && f->userconfig) {
            continue;
        }
        ret = qemu_read_config_file(f->filename);
        if (ret < 0 && ret != -ENOENT) {
            return ret;
        }
    }
    return 0;
 }
 struct soundhw {
    const char *name;
    const char *descr;
@@ -235,26 +209,6 @@ void audio_init(void)
    }
 }
 void do_acpitable_option(const QemuOpts *opts)
 {
 #ifdef TARGET_I386
    Error *err = NULL;
    acpi_table_add(opts, &err);
    if (err) {
        error_reportf_err(err, "Wrong acpi table provided: ");
        exit(1);
    }
 #endif
 }
 void do_smbios_option(QemuOpts *opts)
 {
 #ifdef TARGET_I386
    smbios_entry_add(opts);
 #endif
 }
 int kvm_available(void)
 {
 #ifdef CONFIG_KVM
--- a/async.c
+++ b/async.c
@@ -53,14 +53,15 @@ void aio_bh_schedule_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque)
        .cb = cb,
        .opaque = opaque,
    };
-    qemu_mutex_lock(&ctx->bh_lock);
+    qemu_lockcnt_lock(&ctx->list_lock);
    bh->next = ctx->first_bh;
    bh->scheduled = 1;
    bh->deleted = 1;
    /* Make sure that the members are ready before putting bh into list */
    smp_wmb();
    ctx->first_bh = bh;
-    qemu_mutex_unlock(&ctx->bh_lock);
+    qemu_lockcnt_unlock(&ctx->list_lock);
    aio_notify(ctx);
 }
 QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque)
@@ -72,12 +73,12 @@ QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque)
        .cb = cb,
        .opaque = opaque,
    };
-    qemu_mutex_lock(&ctx->bh_lock);
+    qemu_lockcnt_lock(&ctx->list_lock);
    bh->next = ctx->first_bh;
    /* Make sure that the members are ready before putting bh into list */
    smp_wmb();
    ctx->first_bh = bh;
-    qemu_mutex_unlock(&ctx->bh_lock);
+    qemu_lockcnt_unlock(&ctx->list_lock);
    return bh;
 }
@@ -91,14 +92,13 @@ int aio_bh_poll(AioContext *ctx)
 {
    QEMUBH *bh, **bhp, *next;
    int ret;
    bool deleted = false;
-    ctx->walking_bh++;
+    qemu_lockcnt_inc(&ctx->list_lock);
    ret = 0;
-    for (bh = ctx->first_bh; bh; bh = next) {
+    for (bh = atomic_rcu_read(&ctx->first_bh); bh; bh = next) {
-        /* Make sure that fetching bh happens before accessing its members */
+        next = atomic_rcu_read(&bh->next);
        smp_read_barrier_depends();
        next = bh->next;
        /* The atomic_xchg is paired with the one in qemu_bh_schedule.  The
         * implicit memory barrier ensures that the callback sees all writes
         * done by the scheduling thread.  It also ensures that the scheduling
@@ -106,20 +106,25 @@ int aio_bh_poll(AioContext *ctx)
         * aio_notify again if necessary.
         */
        if (atomic_xchg(&bh->scheduled, 0)) {
-            /* Idle BHs and the notify BH don't count as progress */
+            /* Idle BHs don't count as progress */
-            if (!bh->idle && bh != ctx->notify_dummy_bh) {
+            if (!bh->idle) {
                ret = 1;
            }
            bh->idle = 0;
            aio_bh_call(bh);
        }
        if (bh->deleted) {
            deleted = true;
        }
    }
    ctx->walking_bh--;
    /* remove deleted bhs */
-    if (!ctx->walking_bh) {
+    if (!deleted) {
-        qemu_mutex_lock(&ctx->bh_lock);
+        qemu_lockcnt_dec(&ctx->list_lock);
        return ret;
    }
    if (qemu_lockcnt_dec_and_lock(&ctx->list_lock)) {
        bhp = &ctx->first_bh;
        while (*bhp) {
            bh = *bhp;
@@ -130,9 +135,8 @@ int aio_bh_poll(AioContext *ctx)
                bhp = &bh->next;
            }
        }
-        qemu_mutex_unlock(&ctx->bh_lock);
+        qemu_lockcnt_unlock(&ctx->list_lock);
    }
    return ret;
 }
@@ -186,7 +190,8 @@ aio_compute_timeout(AioContext *ctx)
    int timeout = -1;
    QEMUBH *bh;
-    for (bh = ctx->first_bh; bh; bh = bh->next) {
+    for (bh = atomic_rcu_read(&ctx->first_bh); bh;
         bh = atomic_rcu_read(&bh->next)) {
        if (bh->scheduled) {
            if (bh->idle) {
                /* idle bottom halves will be polled at least
@@ -250,7 +255,7 @@ aio_ctx_dispatch(GSource     *source,
    AioContext *ctx = (AioContext *) source;
    assert(callback == NULL);
-    aio_dispatch(ctx);
+    aio_dispatch(ctx, true);
    return true;
 }
@@ -259,7 +264,6 @@ aio_ctx_finalize(GSource     *source)
 {
    AioContext *ctx = (AioContext *) source;
    qemu_bh_delete(ctx->notify_dummy_bh);
    thread_pool_free(ctx->thread_pool);
 #ifdef CONFIG_LINUX_AIO
@@ -270,7 +274,8 @@ aio_ctx_finalize(GSource     *source)
    }
 #endif
-    qemu_mutex_lock(&ctx->bh_lock);
+    qemu_lockcnt_lock(&ctx->list_lock);
    assert(!qemu_lockcnt_count(&ctx->list_lock));
    while (ctx->first_bh) {
        QEMUBH *next = ctx->first_bh->next;
@@ -280,12 +285,12 @@ aio_ctx_finalize(GSource     *source)
        g_free(ctx->first_bh);
        ctx->first_bh = next;
    }
-    qemu_mutex_unlock(&ctx->bh_lock);
+    qemu_lockcnt_unlock(&ctx->list_lock);
-    aio_set_event_notifier(ctx, &ctx->notifier, false, NULL);
+    aio_set_event_notifier(ctx, &ctx->notifier, false, NULL, NULL);
    event_notifier_cleanup(&ctx->notifier);
-    rfifolock_destroy(&ctx->lock);
+    qemu_rec_mutex_destroy(&ctx->lock);
-    qemu_mutex_destroy(&ctx->bh_lock);
+    qemu_lockcnt_destroy(&ctx->list_lock);
    timerlistgroup_deinit(&ctx->tlg);
 }
@@ -345,23 +350,19 @@ static void aio_timerlist_notify(void *opaque)
    aio_notify(opaque);
 }
 static void aio_rfifolock_cb(void *opaque)
 {
    AioContext *ctx = opaque;
    /* Kick owner thread in case they are blocked in aio_poll() */
    qemu_bh_schedule(ctx->notify_dummy_bh);
 }
 static void notify_dummy_bh(void *opaque)
 {
    /* Do nothing, we were invoked just to force the event loop to iterate */
 }
 static void event_notifier_dummy_cb(EventNotifier *e)
 {
 }
 /* Returns true if aio_notify() was called (e.g. a BH was scheduled) */
 static bool event_notifier_poll(void *opaque)
 {
    EventNotifier *e = opaque;
    AioContext *ctx = container_of(e, AioContext, notifier);
    return atomic_read(&ctx->notified);
 }
 AioContext *aio_context_new(Error **errp)
 {
    int ret;
@@ -376,19 +377,23 @@ AioContext *aio_context_new(Error **errp)
        goto fail;
    }
    g_source_set_can_recurse(&ctx->source, true);
    qemu_lockcnt_init(&ctx->list_lock);
    aio_set_event_notifier(ctx, &ctx->notifier,
                           false,
                           (EventNotifierHandler *)
-                           event_notifier_dummy_cb);
+                           event_notifier_dummy_cb,
                           event_notifier_poll);
 #ifdef CONFIG_LINUX_AIO
    ctx->linux_aio = NULL;
 #endif
    ctx->thread_pool = NULL;
-    qemu_mutex_init(&ctx->bh_lock);
+    qemu_rec_mutex_init(&ctx->lock);
    rfifolock_init(&ctx->lock, aio_rfifolock_cb, ctx);
    timerlistgroup_init(&ctx->tlg, aio_timerlist_notify, ctx);
-    ctx->notify_dummy_bh = aio_bh_new(ctx, notify_dummy_bh, NULL);
+    ctx->poll_ns = 0;
    ctx->poll_max_ns = 0;
    ctx->poll_grow = 0;
    ctx->poll_shrink = 0;
    return ctx;
 fail:
@@ -408,10 +413,10 @@ void aio_context_unref(AioContext *ctx)
 void aio_context_acquire(AioContext *ctx)
 {
-    rfifolock_lock(&ctx->lock);
+    qemu_rec_mutex_lock(&ctx->lock);
 }
 void aio_context_release(AioContext *ctx)
 {
-    rfifolock_unlock(&ctx->lock);
+    qemu_rec_mutex_unlock(&ctx->lock);
 }
--- a/atomic_template.h
+++ b/atomic_template.h
@@ -0,0 +1,215 @@
 /*
 * Atomic helper templates
 * Included from tcg-runtime.c and cputlb.c.
 *
 * Copyright (c) 2016 Red Hat, Inc
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
 */
 #if DATA_SIZE == 16
 # define SUFFIX     o
 # define DATA_TYPE  Int128
 # define BSWAP      bswap128
 #elif DATA_SIZE == 8
 # define SUFFIX     q
 # define DATA_TYPE  uint64_t
 # define BSWAP      bswap64
 #elif DATA_SIZE == 4
 # define SUFFIX     l
 # define DATA_TYPE  uint32_t
 # define BSWAP      bswap32
 #elif DATA_SIZE == 2
 # define SUFFIX     w
 # define DATA_TYPE  uint16_t
 # define BSWAP      bswap16
 #elif DATA_SIZE == 1
 # define SUFFIX     b
 # define DATA_TYPE  uint8_t
 # define BSWAP
 #else
 # error unsupported data size
 #endif
 #if DATA_SIZE >= 4
 # define ABI_TYPE  DATA_TYPE
 #else
 # define ABI_TYPE  uint32_t
 #endif
 /* Define host-endian atomic operations.  Note that END is used within
   the ATOMIC_NAME macro, and redefined below.  */
 #if DATA_SIZE == 1
 # define END
 #elif defined(HOST_WORDS_BIGENDIAN)
 # define END  _be
 #else
 # define END  _le
 #endif
 ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, target_ulong addr,
                              ABI_TYPE cmpv, ABI_TYPE newv EXTRA_ARGS)
 {
    DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;
    return atomic_cmpxchg__nocheck(haddr, cmpv, newv);
 }
 #if DATA_SIZE >= 16
 ABI_TYPE ATOMIC_NAME(ld)(CPUArchState *env, target_ulong addr EXTRA_ARGS)
 {
    DATA_TYPE val, *haddr = ATOMIC_MMU_LOOKUP;
    __atomic_load(haddr, &val, __ATOMIC_RELAXED);
    return val;
 }
 void ATOMIC_NAME(st)(CPUArchState *env, target_ulong addr,
                     ABI_TYPE val EXTRA_ARGS)
 {
    DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;
    __atomic_store(haddr, &val, __ATOMIC_RELAXED);
 }
 #else
 ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, target_ulong addr,
                           ABI_TYPE val EXTRA_ARGS)
 {
    DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;
    return atomic_xchg__nocheck(haddr, val);
 }
 #define GEN_ATOMIC_HELPER(X)                                        \
 ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr,       \
                 ABI_TYPE val EXTRA_ARGS)                           \
 {                                                                   \
    DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;                           \
    return atomic_##X(haddr, val);                                  \
 }                                                                   \
 GEN_ATOMIC_HELPER(fetch_add)
 GEN_ATOMIC_HELPER(fetch_and)
 GEN_ATOMIC_HELPER(fetch_or)
 GEN_ATOMIC_HELPER(fetch_xor)
 GEN_ATOMIC_HELPER(add_fetch)
 GEN_ATOMIC_HELPER(and_fetch)
 GEN_ATOMIC_HELPER(or_fetch)
 GEN_ATOMIC_HELPER(xor_fetch)
 #undef GEN_ATOMIC_HELPER
 #endif /* DATA SIZE >= 16 */
 #undef END
 #if DATA_SIZE > 1
 /* Define reverse-host-endian atomic operations.  Note that END is used
   within the ATOMIC_NAME macro.  */
 #ifdef HOST_WORDS_BIGENDIAN
 # define END  _le
 #else
 # define END  _be
 #endif
 ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, target_ulong addr,
                              ABI_TYPE cmpv, ABI_TYPE newv EXTRA_ARGS)
 {
    DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;
    return BSWAP(atomic_cmpxchg__nocheck(haddr, BSWAP(cmpv), BSWAP(newv)));
 }
 #if DATA_SIZE >= 16
 ABI_TYPE ATOMIC_NAME(ld)(CPUArchState *env, target_ulong addr EXTRA_ARGS)
 {
    DATA_TYPE val, *haddr = ATOMIC_MMU_LOOKUP;
    __atomic_load(haddr, &val, __ATOMIC_RELAXED);
    return BSWAP(val);
 }
 void ATOMIC_NAME(st)(CPUArchState *env, target_ulong addr,
                     ABI_TYPE val EXTRA_ARGS)
 {
    DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;
    val = BSWAP(val);
    __atomic_store(haddr, &val, __ATOMIC_RELAXED);
 }
 #else
 ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, target_ulong addr,
                           ABI_TYPE val EXTRA_ARGS)
 {
    DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;
    return BSWAP(atomic_xchg__nocheck(haddr, BSWAP(val)));
 }
 #define GEN_ATOMIC_HELPER(X)                                        \
 ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr,       \
                 ABI_TYPE val EXTRA_ARGS)                           \
 {                                                                   \
    DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;                           \
    return BSWAP(atomic_##X(haddr, BSWAP(val)));                    \
 }
 GEN_ATOMIC_HELPER(fetch_and)
 GEN_ATOMIC_HELPER(fetch_or)
 GEN_ATOMIC_HELPER(fetch_xor)
 GEN_ATOMIC_HELPER(and_fetch)
 GEN_ATOMIC_HELPER(or_fetch)
 GEN_ATOMIC_HELPER(xor_fetch)
 #undef GEN_ATOMIC_HELPER
 /* Note that for addition, we need to use a separate cmpxchg loop instead
   of bswaps for the reverse-host-endian helpers.  */
 ABI_TYPE ATOMIC_NAME(fetch_add)(CPUArchState *env, target_ulong addr,
                         ABI_TYPE val EXTRA_ARGS)
 {
    DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;
    DATA_TYPE ldo, ldn, ret, sto;
    ldo = atomic_read__nocheck(haddr);
    while (1) {
        ret = BSWAP(ldo);
        sto = BSWAP(ret + val);
        ldn = atomic_cmpxchg__nocheck(haddr, ldo, sto);
        if (ldn == ldo) {
            return ret;
        }
        ldo = ldn;
    }
 }
 ABI_TYPE ATOMIC_NAME(add_fetch)(CPUArchState *env, target_ulong addr,
                         ABI_TYPE val EXTRA_ARGS)
 {
    DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;
    DATA_TYPE ldo, ldn, ret, sto;
    ldo = atomic_read__nocheck(haddr);
    while (1) {
        ret = BSWAP(ldo) + val;
        sto = BSWAP(ret);
        ldn = atomic_cmpxchg__nocheck(haddr, ldo, sto);
        if (ldn == ldo) {
            return ret;
        }
        ldo = ldn;
    }
 }
 #endif /* DATA_SIZE >= 16 */
 #undef END
 #endif /* DATA_SIZE > 1 */
 #undef BSWAP
 #undef ABI_TYPE
 #undef DATA_TYPE
 #undef SUFFIX
 #undef DATA_SIZE
--- a/backends/Makefile.objs
+++ b/backends/Makefile.objs
@@ -9,3 +9,6 @@ common-obj-$(CONFIG_TPM) += tpm.o
 common-obj-y += hostmem.o hostmem-ram.o
 common-obj-$(CONFIG_LINUX) += hostmem-file.o
 common-obj-y += cryptodev.o
 common-obj-y += cryptodev-builtin.o
--- a/backends/baum.c
+++ b/backends/baum.c
@@ -1,7 +1,7 @@
 /*
 * QEMU Baum Braille Device
 *
- * Copyright (c) 2008 Samuel Thibault
+ * Copyright (c) 2008, 2010-2011, 2016 Samuel Thibault
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
@@ -27,12 +27,10 @@
 #include "sysemu/char.h"
 #include "qemu/timer.h"
 #include "hw/usb.h"
 #include "ui/console.h"
 #include <brlapi.h>
 #include <brlapi_constants.h>
 #include <brlapi_keycodes.h>
 #ifdef CONFIG_SDL
 #include <SDL_syswm.h>
 #endif
 #if 0
 #define DPRINTF(fmt, ...) \
@@ -87,11 +85,12 @@
 #define BUF_SIZE 256
 typedef struct {
-    CharDriverState *chr;
+    Chardev parent;
    brlapi_handle_t *brlapi;
    int brlapi_fd;
    unsigned int x, y;
    bool deferred_init;
    uint8_t in_buf[BUF_SIZE];
    uint8_t in_buf_used;
@@ -99,11 +98,17 @@ typedef struct {
    uint8_t out_buf_used, out_buf_ptr;
    QEMUTimer *cellCount_timer;
-} BaumDriverState;
+} BaumChardev;
 #define TYPE_CHARDEV_BRAILLE "chardev-braille"
 #define BAUM_CHARDEV(obj) OBJECT_CHECK(BaumChardev, (obj), TYPE_CHARDEV_BRAILLE)
 /* Let's assume NABCC by default */
-static const uint8_t nabcc_translation[256] = {
+enum way {
-    [0] = ' ',
+    DOTS2ASCII,
    ASCII2DOTS
 };
 static const uint8_t nabcc_translation[2][256] = {
 #ifndef BRLAPI_DOTS
 #define BRLAPI_DOTS(d1,d2,d3,d4,d5,d6,d7,d8) \
    ((d1?BRLAPI_DOT1:0)|\
@@ -115,111 +120,145 @@ static const uint8_t nabcc_translation[256] = {
     (d7?BRLAPI_DOT7:0)|\
     (d8?BRLAPI_DOT8:0))
 #endif
-    [BRLAPI_DOTS(1,0,0,0,0,0,0,0)] = 'a',
+#define DO(dots, ascii) \
-    [BRLAPI_DOTS(1,1,0,0,0,0,0,0)] = 'b',
+    [DOTS2ASCII][dots] = ascii, \
-    [BRLAPI_DOTS(1,0,0,1,0,0,0,0)] = 'c',
+    [ASCII2DOTS][ascii] = dots
-    [BRLAPI_DOTS(1,0,0,1,1,0,0,0)] = 'd',
+    DO(0, ' '),
-    [BRLAPI_DOTS(1,0,0,0,1,0,0,0)] = 'e',
+    DO(BRLAPI_DOTS(1, 0, 0, 0, 0, 0, 0, 0), 'a'),
-    [BRLAPI_DOTS(1,1,0,1,0,0,0,0)] = 'f',
+    DO(BRLAPI_DOTS(1, 1, 0, 0, 0, 0, 0, 0), 'b'),
-    [BRLAPI_DOTS(1,1,0,1,1,0,0,0)] = 'g',
+    DO(BRLAPI_DOTS(1, 0, 0, 1, 0, 0, 0, 0), 'c'),
-    [BRLAPI_DOTS(1,1,0,0,1,0,0,0)] = 'h',
+    DO(BRLAPI_DOTS(1, 0, 0, 1, 1, 0, 0, 0), 'd'),
-    [BRLAPI_DOTS(0,1,0,1,0,0,0,0)] = 'i',
+    DO(BRLAPI_DOTS(1, 0, 0, 0, 1, 0, 0, 0), 'e'),
-    [BRLAPI_DOTS(0,1,0,1,1,0,0,0)] = 'j',
+    DO(BRLAPI_DOTS(1, 1, 0, 1, 0, 0, 0, 0), 'f'),
-    [BRLAPI_DOTS(1,0,1,0,0,0,0,0)] = 'k',
+    DO(BRLAPI_DOTS(1, 1, 0, 1, 1, 0, 0, 0), 'g'),
-    [BRLAPI_DOTS(1,1,1,0,0,0,0,0)] = 'l',
+    DO(BRLAPI_DOTS(1, 1, 0, 0, 1, 0, 0, 0), 'h'),
-    [BRLAPI_DOTS(1,0,1,1,0,0,0,0)] = 'm',
+    DO(BRLAPI_DOTS(0, 1, 0, 1, 0, 0, 0, 0), 'i'),
-    [BRLAPI_DOTS(1,0,1,1,1,0,0,0)] = 'n',
+    DO(BRLAPI_DOTS(0, 1, 0, 1, 1, 0, 0, 0), 'j'),
-    [BRLAPI_DOTS(1,0,1,0,1,0,0,0)] = 'o',
+    DO(BRLAPI_DOTS(1, 0, 1, 0, 0, 0, 0, 0), 'k'),
-    [BRLAPI_DOTS(1,1,1,1,0,0,0,0)] = 'p',
+    DO(BRLAPI_DOTS(1, 1, 1, 0, 0, 0, 0, 0), 'l'),
-    [BRLAPI_DOTS(1,1,1,1,1,0,0,0)] = 'q',
+    DO(BRLAPI_DOTS(1, 0, 1, 1, 0, 0, 0, 0), 'm'),
-    [BRLAPI_DOTS(1,1,1,0,1,0,0,0)] = 'r',
+    DO(BRLAPI_DOTS(1, 0, 1, 1, 1, 0, 0, 0), 'n'),
-    [BRLAPI_DOTS(0,1,1,1,0,0,0,0)] = 's',
+    DO(BRLAPI_DOTS(1, 0, 1, 0, 1, 0, 0, 0), 'o'),
-    [BRLAPI_DOTS(0,1,1,1,1,0,0,0)] = 't',
+    DO(BRLAPI_DOTS(1, 1, 1, 1, 0, 0, 0, 0), 'p'),
-    [BRLAPI_DOTS(1,0,1,0,0,1,0,0)] = 'u',
+    DO(BRLAPI_DOTS(1, 1, 1, 1, 1, 0, 0, 0), 'q'),
-    [BRLAPI_DOTS(1,1,1,0,0,1,0,0)] = 'v',
+    DO(BRLAPI_DOTS(1, 1, 1, 0, 1, 0, 0, 0), 'r'),
-    [BRLAPI_DOTS(0,1,0,1,1,1,0,0)] = 'w',
+    DO(BRLAPI_DOTS(0, 1, 1, 1, 0, 0, 0, 0), 's'),
-    [BRLAPI_DOTS(1,0,1,1,0,1,0,0)] = 'x',
+    DO(BRLAPI_DOTS(0, 1, 1, 1, 1, 0, 0, 0), 't'),
-    [BRLAPI_DOTS(1,0,1,1,1,1,0,0)] = 'y',
+    DO(BRLAPI_DOTS(1, 0, 1, 0, 0, 1, 0, 0), 'u'),
-    [BRLAPI_DOTS(1,0,1,0,1,1,0,0)] = 'z',
+    DO(BRLAPI_DOTS(1, 1, 1, 0, 0, 1, 0, 0), 'v'),
    DO(BRLAPI_DOTS(0, 1, 0, 1, 1, 1, 0, 0), 'w'),
    DO(BRLAPI_DOTS(1, 0, 1, 1, 0, 1, 0, 0), 'x'),
    DO(BRLAPI_DOTS(1, 0, 1, 1, 1, 1, 0, 0), 'y'),
    DO(BRLAPI_DOTS(1, 0, 1, 0, 1, 1, 0, 0), 'z'),
-    [BRLAPI_DOTS(1,0,0,0,0,0,1,0)] = 'A',
+    DO(BRLAPI_DOTS(1, 0, 0, 0, 0, 0, 1, 0), 'A'),
-    [BRLAPI_DOTS(1,1,0,0,0,0,1,0)] = 'B',
+    DO(BRLAPI_DOTS(1, 1, 0, 0, 0, 0, 1, 0), 'B'),
-    [BRLAPI_DOTS(1,0,0,1,0,0,1,0)] = 'C',
+    DO(BRLAPI_DOTS(1, 0, 0, 1, 0, 0, 1, 0), 'C'),
-    [BRLAPI_DOTS(1,0,0,1,1,0,1,0)] = 'D',
+    DO(BRLAPI_DOTS(1, 0, 0, 1, 1, 0, 1, 0), 'D'),
-    [BRLAPI_DOTS(1,0,0,0,1,0,1,0)] = 'E',
+    DO(BRLAPI_DOTS(1, 0, 0, 0, 1, 0, 1, 0), 'E'),
-    [BRLAPI_DOTS(1,1,0,1,0,0,1,0)] = 'F',
+    DO(BRLAPI_DOTS(1, 1, 0, 1, 0, 0, 1, 0), 'F'),
-    [BRLAPI_DOTS(1,1,0,1,1,0,1,0)] = 'G',
+    DO(BRLAPI_DOTS(1, 1, 0, 1, 1, 0, 1, 0), 'G'),
-    [BRLAPI_DOTS(1,1,0,0,1,0,1,0)] = 'H',
+    DO(BRLAPI_DOTS(1, 1, 0, 0, 1, 0, 1, 0), 'H'),
-    [BRLAPI_DOTS(0,1,0,1,0,0,1,0)] = 'I',
+    DO(BRLAPI_DOTS(0, 1, 0, 1, 0, 0, 1, 0), 'I'),
-    [BRLAPI_DOTS(0,1,0,1,1,0,1,0)] = 'J',
+    DO(BRLAPI_DOTS(0, 1, 0, 1, 1, 0, 1, 0), 'J'),
-    [BRLAPI_DOTS(1,0,1,0,0,0,1,0)] = 'K',
+    DO(BRLAPI_DOTS(1, 0, 1, 0, 0, 0, 1, 0), 'K'),
-    [BRLAPI_DOTS(1,1,1,0,0,0,1,0)] = 'L',
+    DO(BRLAPI_DOTS(1, 1, 1, 0, 0, 0, 1, 0), 'L'),
-    [BRLAPI_DOTS(1,0,1,1,0,0,1,0)] = 'M',
+    DO(BRLAPI_DOTS(1, 0, 1, 1, 0, 0, 1, 0), 'M'),
-    [BRLAPI_DOTS(1,0,1,1,1,0,1,0)] = 'N',
+    DO(BRLAPI_DOTS(1, 0, 1, 1, 1, 0, 1, 0), 'N'),
-    [BRLAPI_DOTS(1,0,1,0,1,0,1,0)] = 'O',
+    DO(BRLAPI_DOTS(1, 0, 1, 0, 1, 0, 1, 0), 'O'),
-    [BRLAPI_DOTS(1,1,1,1,0,0,1,0)] = 'P',
+    DO(BRLAPI_DOTS(1, 1, 1, 1, 0, 0, 1, 0), 'P'),
-    [BRLAPI_DOTS(1,1,1,1,1,0,1,0)] = 'Q',
+    DO(BRLAPI_DOTS(1, 1, 1, 1, 1, 0, 1, 0), 'Q'),
-    [BRLAPI_DOTS(1,1,1,0,1,0,1,0)] = 'R',
+    DO(BRLAPI_DOTS(1, 1, 1, 0, 1, 0, 1, 0), 'R'),
-    [BRLAPI_DOTS(0,1,1,1,0,0,1,0)] = 'S',
+    DO(BRLAPI_DOTS(0, 1, 1, 1, 0, 0, 1, 0), 'S'),
-    [BRLAPI_DOTS(0,1,1,1,1,0,1,0)] = 'T',
+    DO(BRLAPI_DOTS(0, 1, 1, 1, 1, 0, 1, 0), 'T'),
-    [BRLAPI_DOTS(1,0,1,0,0,1,1,0)] = 'U',
+    DO(BRLAPI_DOTS(1, 0, 1, 0, 0, 1, 1, 0), 'U'),
-    [BRLAPI_DOTS(1,1,1,0,0,1,1,0)] = 'V',
+    DO(BRLAPI_DOTS(1, 1, 1, 0, 0, 1, 1, 0), 'V'),
-    [BRLAPI_DOTS(0,1,0,1,1,1,1,0)] = 'W',
+    DO(BRLAPI_DOTS(0, 1, 0, 1, 1, 1, 1, 0), 'W'),
-    [BRLAPI_DOTS(1,0,1,1,0,1,1,0)] = 'X',
+    DO(BRLAPI_DOTS(1, 0, 1, 1, 0, 1, 1, 0), 'X'),
-    [BRLAPI_DOTS(1,0,1,1,1,1,1,0)] = 'Y',
+    DO(BRLAPI_DOTS(1, 0, 1, 1, 1, 1, 1, 0), 'Y'),
-    [BRLAPI_DOTS(1,0,1,0,1,1,1,0)] = 'Z',
+    DO(BRLAPI_DOTS(1, 0, 1, 0, 1, 1, 1, 0), 'Z'),
-    [BRLAPI_DOTS(0,0,1,0,1,1,0,0)] = '0',
+    DO(BRLAPI_DOTS(0, 0, 1, 0, 1, 1, 0, 0), '0'),
-    [BRLAPI_DOTS(0,1,0,0,0,0,0,0)] = '1',
+    DO(BRLAPI_DOTS(0, 1, 0, 0, 0, 0, 0, 0), '1'),
-    [BRLAPI_DOTS(0,1,1,0,0,0,0,0)] = '2',
+    DO(BRLAPI_DOTS(0, 1, 1, 0, 0, 0, 0, 0), '2'),
-    [BRLAPI_DOTS(0,1,0,0,1,0,0,0)] = '3',
+    DO(BRLAPI_DOTS(0, 1, 0, 0, 1, 0, 0, 0), '3'),
-    [BRLAPI_DOTS(0,1,0,0,1,1,0,0)] = '4',
+    DO(BRLAPI_DOTS(0, 1, 0, 0, 1, 1, 0, 0), '4'),
-    [BRLAPI_DOTS(0,1,0,0,0,1,0,0)] = '5',
+    DO(BRLAPI_DOTS(0, 1, 0, 0, 0, 1, 0, 0), '5'),
-    [BRLAPI_DOTS(0,1,1,0,1,0,0,0)] = '6',
+    DO(BRLAPI_DOTS(0, 1, 1, 0, 1, 0, 0, 0), '6'),
-    [BRLAPI_DOTS(0,1,1,0,1,1,0,0)] = '7',
+    DO(BRLAPI_DOTS(0, 1, 1, 0, 1, 1, 0, 0), '7'),
-    [BRLAPI_DOTS(0,1,1,0,0,1,0,0)] = '8',
+    DO(BRLAPI_DOTS(0, 1, 1, 0, 0, 1, 0, 0), '8'),
-    [BRLAPI_DOTS(0,0,1,0,1,0,0,0)] = '9',
+    DO(BRLAPI_DOTS(0, 0, 1, 0, 1, 0, 0, 0), '9'),
-    [BRLAPI_DOTS(0,0,0,1,0,1,0,0)] = '.',
+    DO(BRLAPI_DOTS(0, 0, 0, 1, 0, 1, 0, 0), '.'),
-    [BRLAPI_DOTS(0,0,1,1,0,1,0,0)] = '+',
+    DO(BRLAPI_DOTS(0, 0, 1, 1, 0, 1, 0, 0), '+'),
-    [BRLAPI_DOTS(0,0,1,0,0,1,0,0)] = '-',
+    DO(BRLAPI_DOTS(0, 0, 1, 0, 0, 1, 0, 0), '-'),
-    [BRLAPI_DOTS(1,0,0,0,0,1,0,0)] = '*',
+    DO(BRLAPI_DOTS(1, 0, 0, 0, 0, 1, 0, 0), '*'),
-    [BRLAPI_DOTS(0,0,1,1,0,0,0,0)] = '/',
+    DO(BRLAPI_DOTS(0, 0, 1, 1, 0, 0, 0, 0), '/'),
-    [BRLAPI_DOTS(1,1,1,0,1,1,0,0)] = '(',
+    DO(BRLAPI_DOTS(1, 1, 1, 0, 1, 1, 0, 0), '('),
-    [BRLAPI_DOTS(0,1,1,1,1,1,0,0)] = ')',
+    DO(BRLAPI_DOTS(0, 1, 1, 1, 1, 1, 0, 0), ')'),
-    [BRLAPI_DOTS(1,1,1,1,0,1,0,0)] = '&',
+    DO(BRLAPI_DOTS(1, 1, 1, 1, 0, 1, 0, 0), '&'),
-    [BRLAPI_DOTS(0,0,1,1,1,1,0,0)] = '#',
+    DO(BRLAPI_DOTS(0, 0, 1, 1, 1, 1, 0, 0), '#'),
-    [BRLAPI_DOTS(0,0,0,0,0,1,0,0)] = ',',
+    DO(BRLAPI_DOTS(0, 0, 0, 0, 0, 1, 0, 0), ','),
-    [BRLAPI_DOTS(0,0,0,0,1,1,0,0)] = ';',
+    DO(BRLAPI_DOTS(0, 0, 0, 0, 1, 1, 0, 0), ';'),
-    [BRLAPI_DOTS(1,0,0,0,1,1,0,0)] = ':',
+    DO(BRLAPI_DOTS(1, 0, 0, 0, 1, 1, 0, 0), ':'),
-    [BRLAPI_DOTS(0,1,1,1,0,1,0,0)] = '!',
+    DO(BRLAPI_DOTS(0, 1, 1, 1, 0, 1, 0, 0), '!'),
-    [BRLAPI_DOTS(1,0,0,1,1,1,0,0)] = '?',
+    DO(BRLAPI_DOTS(1, 0, 0, 1, 1, 1, 0, 0), '?'),
-    [BRLAPI_DOTS(0,0,0,0,1,0,0,0)] = '"',
+    DO(BRLAPI_DOTS(0, 0, 0, 0, 1, 0, 0, 0), '"'),
-    [BRLAPI_DOTS(0,0,1,0,0,0,0,0)] ='\'',
+    DO(BRLAPI_DOTS(0, 0, 1, 0, 0, 0, 0, 0), '\''),
-    [BRLAPI_DOTS(0,0,0,1,0,0,0,0)] = '`',
+    DO(BRLAPI_DOTS(0, 0, 0, 1, 0, 0, 0, 0), '`'),
-    [BRLAPI_DOTS(0,0,0,1,1,0,1,0)] = '^',
+    DO(BRLAPI_DOTS(0, 0, 0, 1, 1, 0, 1, 0), '^'),
-    [BRLAPI_DOTS(0,0,0,1,1,0,0,0)] = '~',
+    DO(BRLAPI_DOTS(0, 0, 0, 1, 1, 0, 0, 0), '~'),
-    [BRLAPI_DOTS(0,1,0,1,0,1,1,0)] = '[',
+    DO(BRLAPI_DOTS(0, 1, 0, 1, 0, 1, 1, 0), '['),
-    [BRLAPI_DOTS(1,1,0,1,1,1,1,0)] = ']',
+    DO(BRLAPI_DOTS(1, 1, 0, 1, 1, 1, 1, 0), ']'),
-    [BRLAPI_DOTS(0,1,0,1,0,1,0,0)] = '{',
+    DO(BRLAPI_DOTS(0, 1, 0, 1, 0, 1, 0, 0), '{'),
-    [BRLAPI_DOTS(1,1,0,1,1,1,0,0)] = '}',
+    DO(BRLAPI_DOTS(1, 1, 0, 1, 1, 1, 0, 0), '}'),
-    [BRLAPI_DOTS(1,1,1,1,1,1,0,0)] = '=',
+    DO(BRLAPI_DOTS(1, 1, 1, 1, 1, 1, 0, 0), '='),
-    [BRLAPI_DOTS(1,1,0,0,0,1,0,0)] = '<',
+    DO(BRLAPI_DOTS(1, 1, 0, 0, 0, 1, 0, 0), '<'),
-    [BRLAPI_DOTS(0,0,1,1,1,0,0,0)] = '>',
+    DO(BRLAPI_DOTS(0, 0, 1, 1, 1, 0, 0, 0), '>'),
-    [BRLAPI_DOTS(1,1,0,1,0,1,0,0)] = '$',
+    DO(BRLAPI_DOTS(1, 1, 0, 1, 0, 1, 0, 0), '$'),
-    [BRLAPI_DOTS(1,0,0,1,0,1,0,0)] = '%',
+    DO(BRLAPI_DOTS(1, 0, 0, 1, 0, 1, 0, 0), '%'),
-    [BRLAPI_DOTS(0,0,0,1,0,0,1,0)] = '@',
+    DO(BRLAPI_DOTS(0, 0, 0, 1, 0, 0, 1, 0), '@'),
-    [BRLAPI_DOTS(1,1,0,0,1,1,0,0)] = '|',
+    DO(BRLAPI_DOTS(1, 1, 0, 0, 1, 1, 0, 0), '|'),
-    [BRLAPI_DOTS(1,1,0,0,1,1,1,0)] ='\\',
+    DO(BRLAPI_DOTS(1, 1, 0, 0, 1, 1, 1, 0), '\\'),
-    [BRLAPI_DOTS(0,0,0,1,1,1,0,0)] = '_',
+    DO(BRLAPI_DOTS(0, 0, 0, 1, 1, 1, 0, 0), '_'),
 };
-/* The serial port can receive more of our data */
+/* The guest OS has started discussing with us, finish initializing BrlAPI */
-static void baum_accept_input(struct CharDriverState *chr)
+static int baum_deferred_init(BaumChardev *baum)
 {
-    BaumDriverState *baum = chr->opaque;
+    int tty = BRLAPI_TTY_DEFAULT;
    QemuConsole *con;
    if (baum->deferred_init) {
        return 1;
    }
    if (brlapi__getDisplaySize(baum->brlapi, &baum->x, &baum->y) == -1) {
        brlapi_perror("baum: brlapi__getDisplaySize");
        return 0;
    }
    con = qemu_console_lookup_by_index(0);
    if (con && qemu_console_is_graphic(con)) {
        tty = qemu_console_get_window_id(con);
        if (tty == -1)
            tty = BRLAPI_TTY_DEFAULT;
    }
    if (brlapi__enterTtyMode(baum->brlapi, tty, NULL) == -1) {
        brlapi_perror("baum: brlapi__enterTtyMode");
        return 0;
    }
    baum->deferred_init = 1;
    return 1;
 }
 /* The serial port can receive more of our data */
 static void baum_chr_accept_input(struct Chardev *chr)
 {
    BaumChardev *baum = BAUM_CHARDEV(chr);
    int room, first;
    if (!baum->out_buf_used)
@@ -243,24 +282,25 @@ static void baum_accept_input(struct CharDriverState *chr)
 }
 /* We want to send a packet */
-static void baum_write_packet(BaumDriverState *baum, const uint8_t *buf, int len)
+static void baum_write_packet(BaumChardev *baum, const uint8_t *buf, int len)
 {
    Chardev *chr = CHARDEV(baum);
    uint8_t io_buf[1 + 2 * len], *cur = io_buf;
    int room;
    *cur++ = ESC;
    while (len--)
        if ((*cur++ = *buf++) == ESC)
            *cur++ = ESC;
-    room = qemu_chr_be_can_write(baum->chr);
+    room = qemu_chr_be_can_write(chr);
    len = cur - io_buf;
    if (len <= room) {
        /* Fits */
-        qemu_chr_be_write(baum->chr, io_buf, len);
+        qemu_chr_be_write(chr, io_buf, len);
    } else {
        int first;
        uint8_t out;
        /* Can't fit all, send what can be, and store the rest. */
-        qemu_chr_be_write(baum->chr, io_buf, room);
+        qemu_chr_be_write(chr, io_buf, room);
        len -= room;
        cur = io_buf + room;
        if (len > BUF_SIZE - baum->out_buf_used) {
@@ -285,14 +325,14 @@ static void baum_write_packet(BaumDriverState *baum, const uint8_t *buf, int len
 /* Called when the other end seems to have a wrong idea of our display size */
 static void baum_cellCount_timer_cb(void *opaque)
 {
-    BaumDriverState *baum = opaque;
+    BaumChardev *baum = BAUM_CHARDEV(opaque);
    uint8_t cell_count[] = { BAUM_RSP_CellCount, baum->x * baum->y };
    DPRINTF("Timeout waiting for DisplayData, sending cell count\n");
    baum_write_packet(baum, cell_count, sizeof(cell_count));
 }
 /* Try to interpret a whole incoming packet */
-static int baum_eat_packet(BaumDriverState *baum, const uint8_t *buf, int len)
+static int baum_eat_packet(BaumChardev *baum, const uint8_t *buf, int len)
 {
    const uint8_t *cur = buf;
    uint8_t req = 0;
@@ -346,8 +386,10 @@ static int baum_eat_packet(BaumDriverState *baum, const uint8_t *buf, int len)
                cursor = i + 1;
                c &= ~(BRLAPI_DOT7|BRLAPI_DOT8);
            }
-            if (!(c = nabcc_translation[c]))
+            c = nabcc_translation[DOTS2ASCII][c];
            if (!c) {
                c = '?';
            }
            text[i] = c;
        }
        timer_del(baum->cellCount_timer);
@@ -431,15 +473,17 @@ static int baum_eat_packet(BaumDriverState *baum, const uint8_t *buf, int len)
 }
 /* The other end is writing some data.  Store it and try to interpret */
-static int baum_write(CharDriverState *chr, const uint8_t *buf, int len)
+static int baum_chr_write(Chardev *chr, const uint8_t *buf, int len)
 {
-    BaumDriverState *baum = chr->opaque;
+    BaumChardev *baum = BAUM_CHARDEV(chr);
    int tocopy, cur, eaten, orig_len = len;
    if (!len)
        return 0;
    if (!baum->brlapi)
        return len;
    if (!baum_deferred_init(baum))
        return len;
    while (len) {
        /* Complete our buffer as much as possible */
@@ -470,20 +514,31 @@ static int baum_write(CharDriverState *chr, const uint8_t *buf, int len)
 }
 /* Send the key code to the other end */
-static void baum_send_key(BaumDriverState *baum, uint8_t type, uint8_t value) {
+static void baum_send_key(BaumChardev *baum, uint8_t type, uint8_t value)
 {
    uint8_t packet[] = { type, value };
    DPRINTF("writing key %x %x\n", type, value);
    baum_write_packet(baum, packet, sizeof(packet));
 }
 static void baum_send_key2(BaumChardev *baum, uint8_t type, uint8_t value,
                           uint8_t value2)
 {
    uint8_t packet[] = { type, value, value2 };
    DPRINTF("writing key %x %x\n", type, value);
    baum_write_packet(baum, packet, sizeof(packet));
 }
 /* We got some data on the BrlAPI socket */
 static void baum_chr_read(void *opaque)
 {
-    BaumDriverState *baum = opaque;
+    BaumChardev *baum = BAUM_CHARDEV(opaque);
    brlapi_keyCode_t code;
    int ret;
    if (!baum->brlapi)
        return;
    if (!baum_deferred_init(baum))
        return;
    while ((ret = brlapi__readKey(baum->brlapi, 0, &code)) == 1) {
        DPRINTF("got key %"BRLAPI_PRIxKEYCODE"\n", code);
        /* Emulate */
@@ -540,7 +595,17 @@ static void baum_chr_read(void *opaque)
            }
            break;
        case BRLAPI_KEY_TYPE_SYM:
-            break;
+            {
                brlapi_keyCode_t keysym = code & BRLAPI_KEY_CODE_MASK;
                if (keysym < 0x100) {
                    uint8_t dots = nabcc_translation[ASCII2DOTS][keysym];
                    if (dots) {
                        baum_send_key2(baum, BAUM_RSP_EntryKeys, 0, dots);
                        baum_send_key2(baum, BAUM_RSP_EntryKeys, 0, 0);
                    }
                }
                break;
            }
        }
    }
    if (ret == -1 && (brlapi_errno != BRLAPI_ERROR_LIBCERR || errno != EINTR)) {
@@ -551,45 +616,24 @@ static void baum_chr_read(void *opaque)
    }
 }
-static void baum_close(struct CharDriverState *chr)
+static void baum_chr_free(Chardev *chr)
 {
-    BaumDriverState *baum = chr->opaque;
+    BaumChardev *baum = BAUM_CHARDEV(chr);
    timer_free(baum->cellCount_timer);
    if (baum->brlapi) {
        brlapi__closeConnection(baum->brlapi);
        g_free(baum->brlapi);
    }
    g_free(baum);
 }
-static CharDriverState *chr_baum_init(const char *id,
+static void baum_chr_open(Chardev *chr,
-                                      ChardevBackend *backend,
+                          ChardevBackend *backend,
-                                      ChardevReturn *ret,
+                          bool *be_opened,
-                                      Error **errp)
+                          Error **errp)
 {
-    ChardevCommon *common = backend->u.braille.data;
+    BaumChardev *baum = BAUM_CHARDEV(chr);
    BaumDriverState *baum;
    CharDriverState *chr;
    brlapi_handle_t *handle;
 #if defined(CONFIG_SDL)
 #if SDL_COMPILEDVERSION < SDL_VERSIONNUM(2, 0, 0)
    SDL_SysWMinfo info;
 #endif
 #endif
    int tty;
    chr = qemu_chr_alloc(common, errp);
    if (!chr) {
        return NULL;
    }
    baum = g_malloc0(sizeof(BaumDriverState));
    baum->chr = chr;
    chr->opaque = baum;
    chr->chr_write = baum_write;
    chr->chr_accept_input = baum_accept_input;
    chr->chr_close = baum_close;
    handle = g_malloc0(brlapi_getHandleSize());
    baum->brlapi = handle;
@@ -598,52 +642,41 @@ static CharDriverState *chr_baum_init(const char *id,
    if (baum->brlapi_fd == -1) {
        error_setg(errp, "brlapi__openConnection: %s",
                   brlapi_strerror(brlapi_error_location()));
-        goto fail_handle;
+        g_free(handle);
        return;
    }
    baum->deferred_init = 0;
    baum->cellCount_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, baum_cellCount_timer_cb, baum);
    if (brlapi__getDisplaySize(handle, &baum->x, &baum->y) == -1) {
        error_setg(errp, "brlapi__getDisplaySize: %s",
                   brlapi_strerror(brlapi_error_location()));
        goto fail;
    }
 #if defined(CONFIG_SDL)
 #if SDL_COMPILEDVERSION < SDL_VERSIONNUM(2, 0, 0)
    memset(&info, 0, sizeof(info));
    SDL_VERSION(&info.version);
    if (SDL_GetWMInfo(&info))
        tty = info.info.x11.wmwindow;
    else
 #endif
 #endif
        tty = BRLAPI_TTY_DEFAULT;
    if (brlapi__enterTtyMode(handle, tty, NULL) == -1) {
        error_setg(errp, "brlapi__enterTtyMode: %s",
                   brlapi_strerror(brlapi_error_location()));
        goto fail;
    }
    qemu_set_fd_handler(baum->brlapi_fd, baum_chr_read, NULL, baum);
    return chr;
 fail:
    timer_free(baum->cellCount_timer);
    brlapi__closeConnection(handle);
 fail_handle:
    g_free(handle);
    g_free(chr);
    g_free(baum);
    return NULL;
 }
 static void char_braille_class_init(ObjectClass *oc, void *data)
 {
    ChardevClass *cc = CHARDEV_CLASS(oc);
    cc->open = baum_chr_open;
    cc->chr_write = baum_chr_write;
    cc->chr_accept_input = baum_chr_accept_input;
    cc->chr_free = baum_chr_free;
 }
 static const TypeInfo char_braille_type_info = {
    .name = TYPE_CHARDEV_BRAILLE,
    .parent = TYPE_CHARDEV,
    .instance_size = sizeof(BaumChardev),
    .class_init = char_braille_class_init,
 };
 static void register_types(void)
 {
-    register_char_driver("braille", CHARDEV_BACKEND_KIND_BRAILLE, NULL,
+    static const CharDriver driver = {
-                         chr_baum_init);
+        .kind = CHARDEV_BACKEND_KIND_BRAILLE,
    };
    register_char_driver(&driver);
    type_register_static(&char_braille_type_info);
 }
 type_init(register_types);
--- a/backends/cryptodev-builtin.c
+++ b/backends/cryptodev-builtin.c
@@ -0,0 +1,400 @@
 /*
 * QEMU Cryptodev backend for QEMU cipher APIs
 *
 * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
 *
 * Authors:
 *    Gonglei <arei.gonglei@huawei.com>
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
 *
 */
 #include "qemu/osdep.h"
 #include "sysemu/cryptodev.h"
 #include "hw/boards.h"
 #include "qapi/error.h"
 #include "standard-headers/linux/virtio_crypto.h"
 #include "crypto/cipher.h"
 /**
 * @TYPE_CRYPTODEV_BACKEND_BUILTIN:
 * name of backend that uses QEMU cipher API
 */
 #define TYPE_CRYPTODEV_BACKEND_BUILTIN "cryptodev-backend-builtin"
 #define CRYPTODEV_BACKEND_BUILTIN(obj) \
    OBJECT_CHECK(CryptoDevBackendBuiltin, \
                 (obj), TYPE_CRYPTODEV_BACKEND_BUILTIN)
 typedef struct CryptoDevBackendBuiltin
                         CryptoDevBackendBuiltin;
 typedef struct CryptoDevBackendBuiltinSession {
    QCryptoCipher *cipher;
    uint8_t direction; /* encryption or decryption */
    uint8_t type; /* cipher? hash? aead? */
    QTAILQ_ENTRY(CryptoDevBackendBuiltinSession) next;
 } CryptoDevBackendBuiltinSession;
 /* Max number of symmetric sessions */
 #define MAX_NUM_SESSIONS 256
 #define CRYPTODEV_BUITLIN_MAX_AUTH_KEY_LEN    512
 #define CRYPTODEV_BUITLIN_MAX_CIPHER_KEY_LEN  64
 struct CryptoDevBackendBuiltin {
    CryptoDevBackend parent_obj;
    CryptoDevBackendBuiltinSession *sessions[MAX_NUM_SESSIONS];
 };
 static void cryptodev_builtin_init(
             CryptoDevBackend *backend, Error **errp)
 {
    /* Only support one queue */
    int queues = backend->conf.peers.queues;
    CryptoDevBackendClient *cc;
    if (queues != 1) {
        error_setg(errp,
                  "Only support one queue in cryptdov-builtin backend");
        return;
    }
    cc = cryptodev_backend_new_client(
              "cryptodev-builtin", NULL);
    cc->info_str = g_strdup_printf("cryptodev-builtin0");
    cc->queue_index = 0;
    backend->conf.peers.ccs[0] = cc;
    backend->conf.crypto_services =
                         1u << VIRTIO_CRYPTO_SERVICE_CIPHER |
                         1u << VIRTIO_CRYPTO_SERVICE_HASH |
                         1u << VIRTIO_CRYPTO_SERVICE_MAC;
    backend->conf.cipher_algo_l = 1u << VIRTIO_CRYPTO_CIPHER_AES_CBC;
    backend->conf.hash_algo = 1u << VIRTIO_CRYPTO_HASH_SHA1;
    /*
     * Set the Maximum length of crypto request.
     * Why this value? Just avoid to overflow when
     * memory allocation for each crypto request.
     */
    backend->conf.max_size = LONG_MAX - sizeof(CryptoDevBackendSymOpInfo);
    backend->conf.max_cipher_key_len = CRYPTODEV_BUITLIN_MAX_CIPHER_KEY_LEN;
    backend->conf.max_auth_key_len = CRYPTODEV_BUITLIN_MAX_AUTH_KEY_LEN;
    cryptodev_backend_set_ready(backend, true);
 }
 static int
 cryptodev_builtin_get_unused_session_index(
                 CryptoDevBackendBuiltin *builtin)
 {
    size_t i;
    for (i = 0; i < MAX_NUM_SESSIONS; i++) {
        if (builtin->sessions[i] == NULL) {
            return i;
        }
    }
    return -1;
 }
 #define AES_KEYSIZE_128 16
 #define AES_KEYSIZE_192 24
 #define AES_KEYSIZE_256 32
 #define AES_KEYSIZE_128_XTS AES_KEYSIZE_256
 #define AES_KEYSIZE_256_XTS 64
 static int
 cryptodev_builtin_get_aes_algo(uint32_t key_len, int mode, Error **errp)
 {
    int algo;
    if (key_len == AES_KEYSIZE_128) {
        algo = QCRYPTO_CIPHER_ALG_AES_128;
    } else if (key_len == AES_KEYSIZE_192) {
        algo = QCRYPTO_CIPHER_ALG_AES_192;
    } else if (key_len == AES_KEYSIZE_256) { /* equals AES_KEYSIZE_128_XTS */
        if (mode == QCRYPTO_CIPHER_MODE_XTS) {
            algo = QCRYPTO_CIPHER_ALG_AES_128;
        } else {
            algo = QCRYPTO_CIPHER_ALG_AES_256;
        }
    } else if (key_len == AES_KEYSIZE_256_XTS) {
        if (mode == QCRYPTO_CIPHER_MODE_XTS) {
            algo = QCRYPTO_CIPHER_ALG_AES_256;
        } else {
            goto err;
        }
    } else {
        goto err;
    }
    return algo;
 err:
   error_setg(errp, "Unsupported key length :%u", key_len);
   return -1;
 }
 static int cryptodev_builtin_create_cipher_session(
                    CryptoDevBackendBuiltin *builtin,
                    CryptoDevBackendSymSessionInfo *sess_info,
                    Error **errp)
 {
    int algo;
    int mode;
    QCryptoCipher *cipher;
    int index;
    CryptoDevBackendBuiltinSession *sess;
    if (sess_info->op_type != VIRTIO_CRYPTO_SYM_OP_CIPHER) {
        error_setg(errp, "Unsupported optype :%u", sess_info->op_type);
        return -1;
    }
    index = cryptodev_builtin_get_unused_session_index(builtin);
    if (index < 0) {
        error_setg(errp, "Total number of sessions created exceeds %u",
                  MAX_NUM_SESSIONS);
        return -1;
    }
    switch (sess_info->cipher_alg) {
    case VIRTIO_CRYPTO_CIPHER_AES_ECB:
        mode = QCRYPTO_CIPHER_MODE_ECB;
        algo = cryptodev_builtin_get_aes_algo(sess_info->key_len,
                                                    mode, errp);
        if (algo < 0)  {
            return -1;
        }
        break;
    case VIRTIO_CRYPTO_CIPHER_AES_CBC:
        mode = QCRYPTO_CIPHER_MODE_CBC;
        algo = cryptodev_builtin_get_aes_algo(sess_info->key_len,
                                                    mode, errp);
        if (algo < 0)  {
            return -1;
        }
        break;
    case VIRTIO_CRYPTO_CIPHER_AES_CTR:
        mode = QCRYPTO_CIPHER_MODE_CTR;
        algo = cryptodev_builtin_get_aes_algo(sess_info->key_len,
                                                    mode, errp);
        if (algo < 0)  {
            return -1;
        }
        break;
    case VIRTIO_CRYPTO_CIPHER_AES_XTS:
        mode = QCRYPTO_CIPHER_MODE_XTS;
        algo = cryptodev_builtin_get_aes_algo(sess_info->key_len,
                                                    mode, errp);
        if (algo < 0)  {
            return -1;
        }
        break;
    case VIRTIO_CRYPTO_CIPHER_3DES_ECB:
        mode = QCRYPTO_CIPHER_MODE_ECB;
        algo = QCRYPTO_CIPHER_ALG_3DES;
        break;
    case VIRTIO_CRYPTO_CIPHER_3DES_CBC:
        mode = QCRYPTO_CIPHER_MODE_CBC;
        algo = QCRYPTO_CIPHER_ALG_3DES;
        break;
    case VIRTIO_CRYPTO_CIPHER_3DES_CTR:
        mode = QCRYPTO_CIPHER_MODE_CTR;
        algo = QCRYPTO_CIPHER_ALG_3DES;
        break;
    default:
        error_setg(errp, "Unsupported cipher alg :%u",
                   sess_info->cipher_alg);
        return -1;
    }
    cipher = qcrypto_cipher_new(algo, mode,
                               sess_info->cipher_key,
                               sess_info->key_len,
                               errp);
    if (!cipher) {
        return -1;
    }
    sess = g_new0(CryptoDevBackendBuiltinSession, 1);
    sess->cipher = cipher;
    sess->direction = sess_info->direction;
    sess->type = sess_info->op_type;
    builtin->sessions[index] = sess;
    return index;
 }
 static int64_t cryptodev_builtin_sym_create_session(
           CryptoDevBackend *backend,
           CryptoDevBackendSymSessionInfo *sess_info,
           uint32_t queue_index, Error **errp)
 {
    CryptoDevBackendBuiltin *builtin =
                      CRYPTODEV_BACKEND_BUILTIN(backend);
    int64_t session_id = -1;
    int ret;
    switch (sess_info->op_code) {
    case VIRTIO_CRYPTO_CIPHER_CREATE_SESSION:
        ret = cryptodev_builtin_create_cipher_session(
                           builtin, sess_info, errp);
        if (ret < 0) {
            return ret;
        } else {
            session_id = ret;
        }
        break;
    case VIRTIO_CRYPTO_HASH_CREATE_SESSION:
    case VIRTIO_CRYPTO_MAC_CREATE_SESSION:
    default:
        error_setg(errp, "Unsupported opcode :%" PRIu32 "",
                   sess_info->op_code);
        return -1;
    }
    return session_id;
 }
 static int cryptodev_builtin_sym_close_session(
           CryptoDevBackend *backend,
           uint64_t session_id,
           uint32_t queue_index, Error **errp)
 {
    CryptoDevBackendBuiltin *builtin =
                      CRYPTODEV_BACKEND_BUILTIN(backend);
    if (session_id >= MAX_NUM_SESSIONS ||
              builtin->sessions[session_id] == NULL) {
        error_setg(errp, "Cannot find a valid session id: %" PRIu64 "",
                      session_id);
        return -1;
    }
    qcrypto_cipher_free(builtin->sessions[session_id]->cipher);
    g_free(builtin->sessions[session_id]);
    builtin->sessions[session_id] = NULL;
    return 0;
 }
 static int cryptodev_builtin_sym_operation(
                 CryptoDevBackend *backend,
                 CryptoDevBackendSymOpInfo *op_info,
                 uint32_t queue_index, Error **errp)
 {
    CryptoDevBackendBuiltin *builtin =
                      CRYPTODEV_BACKEND_BUILTIN(backend);
    CryptoDevBackendBuiltinSession *sess;
    int ret;
    if (op_info->session_id >= MAX_NUM_SESSIONS ||
              builtin->sessions[op_info->session_id] == NULL) {
        error_setg(errp, "Cannot find a valid session id: %" PRIu64 "",
                   op_info->session_id);
        return -VIRTIO_CRYPTO_INVSESS;
    }
    if (op_info->op_type == VIRTIO_CRYPTO_SYM_OP_ALGORITHM_CHAINING) {
        error_setg(errp,
               "Algorithm chain is unsupported for cryptdoev-builtin");
        return -VIRTIO_CRYPTO_NOTSUPP;
    }
    sess = builtin->sessions[op_info->session_id];
    ret = qcrypto_cipher_setiv(sess->cipher, op_info->iv,
                               op_info->iv_len, errp);
    if (ret < 0) {
        return -VIRTIO_CRYPTO_ERR;
    }
    if (sess->direction == VIRTIO_CRYPTO_OP_ENCRYPT) {
        ret = qcrypto_cipher_encrypt(sess->cipher, op_info->src,
                                     op_info->dst, op_info->src_len, errp);
        if (ret < 0) {
            return -VIRTIO_CRYPTO_ERR;
        }
    } else {
        ret = qcrypto_cipher_decrypt(sess->cipher, op_info->src,
                                     op_info->dst, op_info->src_len, errp);
        if (ret < 0) {
            return -VIRTIO_CRYPTO_ERR;
        }
    }
    return VIRTIO_CRYPTO_OK;
 }
 static void cryptodev_builtin_cleanup(
             CryptoDevBackend *backend,
             Error **errp)
 {
    CryptoDevBackendBuiltin *builtin =
                      CRYPTODEV_BACKEND_BUILTIN(backend);
    size_t i;
    int queues = backend->conf.peers.queues;
    CryptoDevBackendClient *cc;
    for (i = 0; i < MAX_NUM_SESSIONS; i++) {
        if (builtin->sessions[i] != NULL) {
            cryptodev_builtin_sym_close_session(
                    backend, i, 0, errp);
        }
    }
    assert(queues == 1);
    for (i = 0; i < queues; i++) {
        cc = backend->conf.peers.ccs[i];
        if (cc) {
            cryptodev_backend_free_client(cc);
            backend->conf.peers.ccs[i] = NULL;
        }
    }
    cryptodev_backend_set_ready(backend, false);
 }
 static void
 cryptodev_builtin_class_init(ObjectClass *oc, void *data)
 {
    CryptoDevBackendClass *bc = CRYPTODEV_BACKEND_CLASS(oc);
    bc->init = cryptodev_builtin_init;
    bc->cleanup = cryptodev_builtin_cleanup;
    bc->create_session = cryptodev_builtin_sym_create_session;
    bc->close_session = cryptodev_builtin_sym_close_session;
    bc->do_sym_op = cryptodev_builtin_sym_operation;
 }
 static const TypeInfo cryptodev_builtin_info = {
    .name = TYPE_CRYPTODEV_BACKEND_BUILTIN,
    .parent = TYPE_CRYPTODEV_BACKEND,
    .class_init = cryptodev_builtin_class_init,
    .instance_size = sizeof(CryptoDevBackendBuiltin),
 };
 static void
 cryptodev_builtin_register_types(void)
 {
    type_register_static(&cryptodev_builtin_info);
 }
 type_init(cryptodev_builtin_register_types);
--- a/backends/cryptodev.c
+++ b/backends/cryptodev.c
@@ -0,0 +1,271 @@
 /*
 * QEMU Crypto Device Implementation
 *
 * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
 *
 * Authors:
 *    Gonglei <arei.gonglei@huawei.com>
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
 *
 */
 #include "qemu/osdep.h"
 #include "sysemu/cryptodev.h"
 #include "hw/boards.h"
 #include "qapi/error.h"
 #include "qapi/visitor.h"
 #include "qapi-types.h"
 #include "qapi-visit.h"
 #include "qemu/config-file.h"
 #include "qom/object_interfaces.h"
 #include "hw/virtio/virtio-crypto.h"
 static QTAILQ_HEAD(, CryptoDevBackendClient) crypto_clients;
 CryptoDevBackendClient *
 cryptodev_backend_new_client(const char *model,
                                    const char *name)
 {
    CryptoDevBackendClient *cc;
    cc = g_malloc0(sizeof(CryptoDevBackendClient));
    cc->model = g_strdup(model);
    if (name) {
        cc->name = g_strdup(name);
    }
    QTAILQ_INSERT_TAIL(&crypto_clients, cc, next);
    return cc;
 }
 void cryptodev_backend_free_client(
                  CryptoDevBackendClient *cc)
 {
    QTAILQ_REMOVE(&crypto_clients, cc, next);
    g_free(cc->name);
    g_free(cc->model);
    g_free(cc->info_str);
    g_free(cc);
 }
 void cryptodev_backend_cleanup(
             CryptoDevBackend *backend,
             Error **errp)
 {
    CryptoDevBackendClass *bc =
                  CRYPTODEV_BACKEND_GET_CLASS(backend);
    if (bc->cleanup) {
        bc->cleanup(backend, errp);
    }
 }
 int64_t cryptodev_backend_sym_create_session(
           CryptoDevBackend *backend,
           CryptoDevBackendSymSessionInfo *sess_info,
           uint32_t queue_index, Error **errp)
 {
    CryptoDevBackendClass *bc =
                      CRYPTODEV_BACKEND_GET_CLASS(backend);
    if (bc->create_session) {
        return bc->create_session(backend, sess_info, queue_index, errp);
    }
    return -1;
 }
 int cryptodev_backend_sym_close_session(
           CryptoDevBackend *backend,
           uint64_t session_id,
           uint32_t queue_index, Error **errp)
 {
    CryptoDevBackendClass *bc =
                      CRYPTODEV_BACKEND_GET_CLASS(backend);
    if (bc->close_session) {
        return bc->close_session(backend, session_id, queue_index, errp);
    }
    return -1;
 }
 static int cryptodev_backend_sym_operation(
                 CryptoDevBackend *backend,
                 CryptoDevBackendSymOpInfo *op_info,
                 uint32_t queue_index, Error **errp)
 {
    CryptoDevBackendClass *bc =
                      CRYPTODEV_BACKEND_GET_CLASS(backend);
    if (bc->do_sym_op) {
        return bc->do_sym_op(backend, op_info, queue_index, errp);
    }
    return -VIRTIO_CRYPTO_ERR;
 }
 int cryptodev_backend_crypto_operation(
                 CryptoDevBackend *backend,
                 void *opaque,
                 uint32_t queue_index, Error **errp)
 {
    VirtIOCryptoReq *req = opaque;
    if (req->flags == CRYPTODEV_BACKEND_ALG_SYM) {
        CryptoDevBackendSymOpInfo *op_info;
        op_info = req->u.sym_op_info;
        return cryptodev_backend_sym_operation(backend,
                         op_info, queue_index, errp);
    } else {
        error_setg(errp, "Unsupported cryptodev alg type: %" PRIu32 "",
                   req->flags);
       return -VIRTIO_CRYPTO_NOTSUPP;
    }
    return -VIRTIO_CRYPTO_ERR;
 }
 static void
 cryptodev_backend_get_queues(Object *obj, Visitor *v, const char *name,
                             void *opaque, Error **errp)
 {
    CryptoDevBackend *backend = CRYPTODEV_BACKEND(obj);
    uint32_t value = backend->conf.peers.queues;
    visit_type_uint32(v, name, &value, errp);
 }
 static void
 cryptodev_backend_set_queues(Object *obj, Visitor *v, const char *name,
                             void *opaque, Error **errp)
 {
    CryptoDevBackend *backend = CRYPTODEV_BACKEND(obj);
    Error *local_err = NULL;
    uint32_t value;
    visit_type_uint32(v, name, &value, &local_err);
    if (local_err) {
        goto out;
    }
    if (!value) {
        error_setg(&local_err, "Property '%s.%s' doesn't take value '%"
                   PRIu32 "'", object_get_typename(obj), name, value);
        goto out;
    }
    backend->conf.peers.queues = value;
 out:
    error_propagate(errp, local_err);
 }
 static void
 cryptodev_backend_complete(UserCreatable *uc, Error **errp)
 {
    CryptoDevBackend *backend = CRYPTODEV_BACKEND(uc);
    CryptoDevBackendClass *bc = CRYPTODEV_BACKEND_GET_CLASS(uc);
    Error *local_err = NULL;
    if (bc->init) {
        bc->init(backend, &local_err);
        if (local_err) {
            goto out;
        }
    }
    return;
 out:
    error_propagate(errp, local_err);
 }
 void cryptodev_backend_set_used(CryptoDevBackend *backend, bool used)
 {
    backend->is_used = used;
 }
 bool cryptodev_backend_is_used(CryptoDevBackend *backend)
 {
    return backend->is_used;
 }
 void cryptodev_backend_set_ready(CryptoDevBackend *backend, bool ready)
 {
    backend->ready = ready;
 }
 bool cryptodev_backend_is_ready(CryptoDevBackend *backend)
 {
    return backend->ready;
 }
 static bool
 cryptodev_backend_can_be_deleted(UserCreatable *uc, Error **errp)
 {
    return !cryptodev_backend_is_used(CRYPTODEV_BACKEND(uc));
 }
 static void cryptodev_backend_instance_init(Object *obj)
 {
    object_property_add(obj, "queues", "int",
                          cryptodev_backend_get_queues,
                          cryptodev_backend_set_queues,
                          NULL, NULL, NULL);
    /* Initialize devices' queues property to 1 */
    object_property_set_int(obj, 1, "queues", NULL);
 }
 static void cryptodev_backend_finalize(Object *obj)
 {
    CryptoDevBackend *backend = CRYPTODEV_BACKEND(obj);
    cryptodev_backend_cleanup(backend, NULL);
 }
 static void
 cryptodev_backend_class_init(ObjectClass *oc, void *data)
 {
    UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc);
    ucc->complete = cryptodev_backend_complete;
    ucc->can_be_deleted = cryptodev_backend_can_be_deleted;
    QTAILQ_INIT(&crypto_clients);
 }
 static const TypeInfo cryptodev_backend_info = {
    .name = TYPE_CRYPTODEV_BACKEND,
    .parent = TYPE_OBJECT,
    .instance_size = sizeof(CryptoDevBackend),
    .instance_init = cryptodev_backend_instance_init,
    .instance_finalize = cryptodev_backend_finalize,
    .class_size = sizeof(CryptoDevBackendClass),
    .class_init = cryptodev_backend_class_init,
    .interfaces = (InterfaceInfo[]) {
        { TYPE_USER_CREATABLE },
        { }
    }
 };
 static void
 cryptodev_backend_register_types(void)
 {
    type_register_static(&cryptodev_backend_info);
 }
 type_init(cryptodev_backend_register_types);
--- a/backends/hostmem-file.c
+++ b/backends/hostmem-file.c
@@ -64,14 +64,6 @@ file_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
 #endif
 }
 static void
 file_backend_class_init(ObjectClass *oc, void *data)
 {
    HostMemoryBackendClass *bc = MEMORY_BACKEND_CLASS(oc);
    bc->alloc = file_backend_memory_alloc;
 }
 static char *get_mem_path(Object *o, Error **errp)
 {
    HostMemoryBackendFile *fb = MEMORY_BACKEND_FILE(o);
@@ -112,13 +104,18 @@ static void file_memory_backend_set_share(Object *o, bool value, Error **errp)
 }
 static void
-file_backend_instance_init(Object *o)
+file_backend_class_init(ObjectClass *oc, void *data)
 {
-    object_property_add_bool(o, "share",
+    HostMemoryBackendClass *bc = MEMORY_BACKEND_CLASS(oc);
-                        file_memory_backend_get_share,
+
-                        file_memory_backend_set_share, NULL);
+    bc->alloc = file_backend_memory_alloc;
-    object_property_add_str(o, "mem-path", get_mem_path,
+
-                            set_mem_path, NULL);
+    object_class_property_add_bool(oc, "share",
        file_memory_backend_get_share, file_memory_backend_set_share,
        &error_abort);
    object_class_property_add_str(oc, "mem-path",
        get_mem_path, set_mem_path,
        &error_abort);
 }
 static void file_backend_instance_finalize(Object *o)
@@ -132,7 +129,6 @@ static const TypeInfo file_backend_info = {
    .name = TYPE_MEMORY_BACKEND_FILE,
    .parent = TYPE_MEMORY_BACKEND,
    .class_init = file_backend_class_init,
    .instance_init = file_backend_instance_init,
    .instance_finalize = file_backend_instance_finalize,
    .instance_size = sizeof(HostMemoryBackendFile),
 };
--- a/backends/hostmem.c
+++ b/backends/hostmem.c
@@ -241,26 +241,6 @@ static void host_memory_backend_init(Object *obj)
    backend->merge = machine_mem_merge(machine);
    backend->dump = machine_dump_guest_core(machine);
    backend->prealloc = mem_prealloc;
    object_property_add_bool(obj, "merge",
                        host_memory_backend_get_merge,
                        host_memory_backend_set_merge, NULL);
    object_property_add_bool(obj, "dump",
                        host_memory_backend_get_dump,
                        host_memory_backend_set_dump, NULL);
    object_property_add_bool(obj, "prealloc",
                        host_memory_backend_get_prealloc,
                        host_memory_backend_set_prealloc, NULL);
    object_property_add(obj, "size", "int",
                        host_memory_backend_get_size,
                        host_memory_backend_set_size, NULL, NULL, NULL);
    object_property_add(obj, "host-nodes", "int",
                        host_memory_backend_get_host_nodes,
                        host_memory_backend_set_host_nodes, NULL, NULL, NULL);
    object_property_add_enum(obj, "policy", "HostMemPolicy",
                             HostMemPolicy_lookup,
                             host_memory_backend_get_policy,
                             host_memory_backend_set_policy, NULL);
 }
 MemoryRegion *
@@ -368,6 +348,24 @@ host_memory_backend_can_be_deleted(UserCreatable *uc, Error **errp)
    }
 }
 static char *get_id(Object *o, Error **errp)
 {
    HostMemoryBackend *backend = MEMORY_BACKEND(o);
    return g_strdup(backend->id);
 }
 static void set_id(Object *o, const char *str, Error **errp)
 {
    HostMemoryBackend *backend = MEMORY_BACKEND(o);
    if (backend->id) {
        error_setg(errp, "cannot change property value");
        return;
    }
    backend->id = g_strdup(str);
 }
 static void
 host_memory_backend_class_init(ObjectClass *oc, void *data)
 {
@@ -375,6 +373,35 @@ host_memory_backend_class_init(ObjectClass *oc, void *data)
    ucc->complete = host_memory_backend_memory_complete;
    ucc->can_be_deleted = host_memory_backend_can_be_deleted;
    object_class_property_add_bool(oc, "merge",
        host_memory_backend_get_merge,
        host_memory_backend_set_merge, &error_abort);
    object_class_property_add_bool(oc, "dump",
        host_memory_backend_get_dump,
        host_memory_backend_set_dump, &error_abort);
    object_class_property_add_bool(oc, "prealloc",
        host_memory_backend_get_prealloc,
        host_memory_backend_set_prealloc, &error_abort);
    object_class_property_add(oc, "size", "int",
        host_memory_backend_get_size,
        host_memory_backend_set_size,
        NULL, NULL, &error_abort);
    object_class_property_add(oc, "host-nodes", "int",
        host_memory_backend_get_host_nodes,
        host_memory_backend_set_host_nodes,
        NULL, NULL, &error_abort);
    object_class_property_add_enum(oc, "policy", "HostMemPolicy",
        HostMemPolicy_lookup,
        host_memory_backend_get_policy,
        host_memory_backend_set_policy, &error_abort);
    object_class_property_add_str(oc, "id", get_id, set_id, &error_abort);
 }
 static void host_memory_backend_finalize(Object *o)
 {
    HostMemoryBackend *backend = MEMORY_BACKEND(o);
    g_free(backend->id);
 }
 static const TypeInfo host_memory_backend_info = {
@@ -385,6 +412,7 @@ static const TypeInfo host_memory_backend_info = {
    .class_init = host_memory_backend_class_init,
    .instance_size = sizeof(HostMemoryBackend),
    .instance_init = host_memory_backend_init,
    .instance_finalize = host_memory_backend_finalize,
    .interfaces = (InterfaceInfo[]) {
        { TYPE_USER_CREATABLE },
        { }
--- a/backends/msmouse.c
+++ b/backends/msmouse.c
@@ -31,18 +31,23 @@
 #define MSMOUSE_HI2(n) (((n) & 0xc0) >> 6)
 typedef struct {
-    CharDriverState *chr;
+    Chardev parent;
    QemuInputHandlerState *hs;
    int axis[INPUT_AXIS__MAX];
    bool btns[INPUT_BUTTON__MAX];
    bool btnc[INPUT_BUTTON__MAX];
    uint8_t outbuf[32];
    int outlen;
-} MouseState;
+} MouseChardev;
-static void msmouse_chr_accept_input(CharDriverState *chr)
+#define TYPE_CHARDEV_MSMOUSE "chardev-msmouse"
 #define MOUSE_CHARDEV(obj)                                      \
    OBJECT_CHECK(MouseChardev, (obj), TYPE_CHARDEV_MSMOUSE)
 static void msmouse_chr_accept_input(Chardev *chr)
 {
-    MouseState *mouse = chr->opaque;
+    MouseChardev *mouse = MOUSE_CHARDEV(chr);
    int len;
    len = qemu_chr_be_can_write(chr);
@@ -60,7 +65,7 @@ static void msmouse_chr_accept_input(CharDriverState *chr)
    }
 }
-static void msmouse_queue_event(MouseState *mouse)
+static void msmouse_queue_event(MouseChardev *mouse)
 {
    unsigned char bytes[4] = { 0x40, 0x00, 0x00, 0x00 };
    int dx, dy, count = 3;
@@ -97,7 +102,7 @@ static void msmouse_queue_event(MouseState *mouse)
 static void msmouse_input_event(DeviceState *dev, QemuConsole *src,
                                InputEvent *evt)
 {
-    MouseState *mouse = (MouseState *)dev;
+    MouseChardev *mouse = MOUSE_CHARDEV(dev);
    InputMoveEvent *move;
    InputBtnEvent *btn;
@@ -121,24 +126,24 @@ static void msmouse_input_event(DeviceState *dev, QemuConsole *src,
 static void msmouse_input_sync(DeviceState *dev)
 {
-    MouseState *mouse = (MouseState *)dev;
+    MouseChardev *mouse = MOUSE_CHARDEV(dev);
    Chardev *chr = CHARDEV(dev);
    msmouse_queue_event(mouse);
-    msmouse_chr_accept_input(mouse->chr);
+    msmouse_chr_accept_input(chr);
 }
-static int msmouse_chr_write (struct CharDriverState *s, const uint8_t *buf, int len)
+static int msmouse_chr_write(struct Chardev *s, const uint8_t *buf, int len)
 {
    /* Ignore writes to mouse port */
    return len;
 }
-static void msmouse_chr_close (struct CharDriverState *chr)
+static void msmouse_chr_free(struct Chardev *chr)
 {
-    MouseState *mouse = chr->opaque;
+    MouseChardev *mouse = MOUSE_CHARDEV(chr);
    qemu_input_handler_unregister(mouse->hs);
    g_free(mouse);
 }
 static QemuInputHandler msmouse_handler = {
@@ -148,38 +153,43 @@ static QemuInputHandler msmouse_handler = {
    .sync  = msmouse_input_sync,
 };
-static CharDriverState *qemu_chr_open_msmouse(const char *id,
+static void msmouse_chr_open(Chardev *chr,
-                                              ChardevBackend *backend,
+                             ChardevBackend *backend,
-                                              ChardevReturn *ret,
+                             bool *be_opened,
-                                              Error **errp)
+                             Error **errp)
 {
-    ChardevCommon *common = backend->u.msmouse.data;
+    MouseChardev *mouse = MOUSE_CHARDEV(chr);
    MouseState *mouse;
    CharDriverState *chr;
-    chr = qemu_chr_alloc(common, errp);
+    *be_opened = false;
    if (!chr) {
        return NULL;
    }
    chr->chr_write = msmouse_chr_write;
    chr->chr_close = msmouse_chr_close;
    chr->chr_accept_input = msmouse_chr_accept_input;
    chr->explicit_be_open = true;
    mouse = g_new0(MouseState, 1);
    mouse->hs = qemu_input_handler_register((DeviceState *)mouse,
                                            &msmouse_handler);
    mouse->chr = chr;
    chr->opaque = mouse;
    return chr;
 }
 static void char_msmouse_class_init(ObjectClass *oc, void *data)
 {
    ChardevClass *cc = CHARDEV_CLASS(oc);
    cc->open = msmouse_chr_open;
    cc->chr_write = msmouse_chr_write;
    cc->chr_accept_input = msmouse_chr_accept_input;
    cc->chr_free = msmouse_chr_free;
 }
 static const TypeInfo char_msmouse_type_info = {
    .name = TYPE_CHARDEV_MSMOUSE,
    .parent = TYPE_CHARDEV,
    .instance_size = sizeof(MouseChardev),
    .class_init = char_msmouse_class_init,
 };
 static void register_types(void)
 {
-    register_char_driver("msmouse", CHARDEV_BACKEND_KIND_MSMOUSE, NULL,
+    static const CharDriver driver = {
-                         qemu_chr_open_msmouse);
+        .kind = CHARDEV_BACKEND_KIND_MSMOUSE,
    };
    register_char_driver(&driver);
    type_register_static(&char_msmouse_type_info);
 }
 type_init(register_types);
--- a/backends/rng-egd.c
+++ b/backends/rng-egd.c
@@ -15,7 +15,6 @@
 #include "sysemu/char.h"
 #include "qapi/error.h"
 #include "qapi/qmp/qerror.h"
 #include "hw/qdev.h" /* just for DEFINE_PROP_CHR */
 #define TYPE_RNG_EGD "rng-egd"
 #define RNG_EGD(obj) OBJECT_CHECK(RngEgd, (obj), TYPE_RNG_EGD)
@@ -24,7 +23,7 @@ typedef struct RngEgd
 {
    RngBackend parent;
-    CharDriverState *chr;
+    CharBackend chr;
    char *chr_name;
 } RngEgd;
@@ -43,7 +42,7 @@ static void rng_egd_request_entropy(RngBackend *b, RngRequest *req)
        /* XXX this blocks entire thread. Rewrite to use
         * qemu_chr_fe_write and background I/O callbacks */
-        qemu_chr_fe_write_all(s->chr, header, sizeof(header));
+        qemu_chr_fe_write_all(&s->chr, header, sizeof(header));
        size -= len;
    }
@@ -87,6 +86,7 @@ static void rng_egd_chr_read(void *opaque, const uint8_t *buf, int size)
 static void rng_egd_opened(RngBackend *b, Error **errp)
 {
    RngEgd *s = RNG_EGD(b);
    Chardev *chr;
    if (s->chr_name == NULL) {
        error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
@@ -94,21 +94,19 @@ static void rng_egd_opened(RngBackend *b, Error **errp)
        return;
    }
-    s->chr = qemu_chr_find(s->chr_name);
+    chr = qemu_chr_find(s->chr_name);
-    if (s->chr == NULL) {
+    if (chr == NULL) {
        error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
                  "Device '%s' not found", s->chr_name);
        return;
    }
-
+    if (!qemu_chr_fe_init(&s->chr, chr, errp)) {
    if (qemu_chr_fe_claim(s->chr) != 0) {
        error_setg(errp, QERR_DEVICE_IN_USE, s->chr_name);
        return;
    }
    /* FIXME we should resubmit pending requests when the CDS reconnects. */
-    qemu_chr_add_handlers(s->chr, rng_egd_chr_can_read, rng_egd_chr_read,
+    qemu_chr_fe_set_handlers(&s->chr, rng_egd_chr_can_read,
-                          NULL, s);
+                             rng_egd_chr_read, NULL, s, NULL, true);
 }
 static void rng_egd_set_chardev(Object *obj, const char *value, Error **errp)
@@ -127,9 +125,10 @@ static void rng_egd_set_chardev(Object *obj, const char *value, Error **errp)
 static char *rng_egd_get_chardev(Object *obj, Error **errp)
 {
    RngEgd *s = RNG_EGD(obj);
    Chardev *chr = qemu_chr_fe_get_driver(&s->chr);
-    if (s->chr && s->chr->label) {
+    if (chr && chr->label) {
-        return g_strdup(s->chr->label);
+        return g_strdup(chr->label);
    }
    return NULL;
@@ -146,11 +145,7 @@ static void rng_egd_finalize(Object *obj)
 {
    RngEgd *s = RNG_EGD(obj);
-    if (s->chr) {
+    qemu_chr_fe_deinit(&s->chr);
        qemu_chr_add_handlers(s->chr, NULL, NULL, NULL, NULL);
        qemu_chr_fe_release(s->chr);
    }
    g_free(s->chr_name);
 }
--- a/backends/testdev.c
+++ b/backends/testdev.c
@@ -30,13 +30,18 @@
 #define BUF_SIZE 32
 typedef struct {
-    CharDriverState *chr;
+    Chardev parent;
    uint8_t in_buf[32];
    int in_buf_used;
-} TestdevCharState;
+} TestdevChardev;
 #define TYPE_CHARDEV_TESTDEV "chardev-testdev"
 #define TESTDEV_CHARDEV(obj)                                    \
    OBJECT_CHECK(TestdevChardev, (obj), TYPE_CHARDEV_TESTDEV)
 /* Try to interpret a whole incoming packet */
-static int testdev_eat_packet(TestdevCharState *testdev)
+static int testdev_eat_packet(TestdevChardev *testdev)
 {
    const uint8_t *cur = testdev->in_buf;
    int len = testdev->in_buf_used;
@@ -77,9 +82,9 @@ static int testdev_eat_packet(TestdevCharState *testdev)
 }
 /* The other end is writing some data.  Store it and try to interpret */
-static int testdev_write(CharDriverState *chr, const uint8_t *buf, int len)
+static int testdev_chr_write(Chardev *chr, const uint8_t *buf, int len)
 {
-    TestdevCharState *testdev = chr->opaque;
+    TestdevChardev *testdev = TESTDEV_CHARDEV(chr);
    int tocopy, eaten, orig_len = len;
    while (len) {
@@ -102,35 +107,28 @@ static int testdev_write(CharDriverState *chr, const uint8_t *buf, int len)
    return orig_len;
 }
-static void testdev_close(struct CharDriverState *chr)
+static void char_testdev_class_init(ObjectClass *oc, void *data)
 {
-    TestdevCharState *testdev = chr->opaque;
+    ChardevClass *cc = CHARDEV_CLASS(oc);
-    g_free(testdev);
+    cc->chr_write = testdev_chr_write;
 }
-static CharDriverState *chr_testdev_init(const char *id,
+static const TypeInfo char_testdev_type_info = {
-                                         ChardevBackend *backend,
+    .name = TYPE_CHARDEV_TESTDEV,
-                                         ChardevReturn *ret,
+    .parent = TYPE_CHARDEV,
-                                         Error **errp)
+    .instance_size = sizeof(TestdevChardev),
-{
+    .class_init = char_testdev_class_init,
-    TestdevCharState *testdev;
+};
    CharDriverState *chr;
    testdev = g_new0(TestdevCharState, 1);
    testdev->chr = chr = g_new0(CharDriverState, 1);
    chr->opaque = testdev;
    chr->chr_write = testdev_write;
    chr->chr_close = testdev_close;
    return chr;
 }
 static void register_types(void)
 {
-    register_char_driver("testdev", CHARDEV_BACKEND_KIND_TESTDEV, NULL,
+    static const CharDriver driver = {
-                         chr_testdev_init);
+        .kind = CHARDEV_BACKEND_KIND_TESTDEV,
    };
    register_char_driver(&driver);
    type_register_static(&char_testdev_type_info);
 }
 type_init(register_types);
--- a/block.c
+++ b/block.c
@@ -1428,9 +1428,11 @@ void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd)
            backing_hd->drv ? backing_hd->drv->format_name : "");
    bdrv_op_block_all(backing_hd, bs->backing_blocker);
-    /* Otherwise we won't be able to commit due to check in bdrv_commit */
+    /* Otherwise we won't be able to commit or stream */
    bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET,
                    bs->backing_blocker);
    bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_STREAM,
                    bs->backing_blocker);
    /*
     * We do backup in 3 ways:
     * 1. drive backup
@@ -1849,7 +1851,7 @@ static BlockDriverState *bdrv_open_inherit(const char *filename,
    bdrv_refresh_filename(bs);
    /* Check if any unknown options were used */
-    if (options && (qdict_size(options) != 0)) {
+    if (qdict_size(options) != 0) {
        const QDictEntry *entry = qdict_first(options);
        if (flags & BDRV_O_PROTOCOL) {
            error_setg(errp, "Block protocol '%s' doesn't support the option "
@@ -2082,7 +2084,7 @@ BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
 * to all devices.
 *
 */
-int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
+int bdrv_reopen_multiple(AioContext *ctx, BlockReopenQueue *bs_queue, Error **errp)
 {
    int ret = -1;
    BlockReopenQueueEntry *bs_entry, *next;
@@ -2090,7 +2092,9 @@ int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
    assert(bs_queue != NULL);
-    bdrv_drain_all();
+    aio_context_release(ctx);
    bdrv_drain_all_begin();
    aio_context_acquire(ctx);
    QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
        if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) {
@@ -2120,6 +2124,9 @@ cleanup:
        g_free(bs_entry);
    }
    g_free(bs_queue);
    bdrv_drain_all_end();
    return ret;
 }
@@ -2131,7 +2138,7 @@ int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp)
    Error *local_err = NULL;
    BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, NULL, bdrv_flags);
-    ret = bdrv_reopen_multiple(queue, &local_err);
+    ret = bdrv_reopen_multiple(bdrv_get_aio_context(bs), queue, &local_err);
    if (local_err != NULL) {
        error_propagate(errp, local_err);
    }
@@ -2789,7 +2796,7 @@ const char *bdrv_get_format_name(BlockDriverState *bs)
 static int qsort_strcmp(const void *a, const void *b)
 {
-    return strcmp(a, b);
+    return strcmp(*(char *const *)a, *(char *const *)b);
 }
 void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
@@ -2815,6 +2822,24 @@ void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
        }
    }
    for (i = 0; i < (int)ARRAY_SIZE(block_driver_modules); i++) {
        const char *format_name = block_driver_modules[i].format_name;
        if (format_name) {
            bool found = false;
            int j = count;
            while (formats && j && !found) {
                found = !strcmp(formats[--j], format_name);
            }
            if (!found) {
                formats = g_renew(const char *, formats, count + 1);
                formats[count++] = format_name;
            }
        }
    }
    qsort(formats, count, sizeof(formats[0]), qsort_strcmp);
    for (i = 0; i < count; i++) {
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -1,4 +1,4 @@
-block-obj-y += raw_bsd.o qcow.o vdi.o vmdk.o cloop.o bochs.o vpc.o vvfat.o dmg.o
+block-obj-y += raw-format.o qcow.o vdi.o vmdk.o cloop.o bochs.o vpc.o vvfat.o dmg.o
 block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-cache.o
 block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o
 block-obj-y += qed-check.o
@@ -6,14 +6,15 @@ block-obj-y += vhdx.o vhdx-endian.o vhdx-log.o
 block-obj-y += quorum.o
 block-obj-y += parallels.o blkdebug.o blkverify.o blkreplay.o
 block-obj-y += block-backend.o snapshot.o qapi.o
-block-obj-$(CONFIG_WIN32) += raw-win32.o win32-aio.o
+block-obj-$(CONFIG_WIN32) += file-win32.o win32-aio.o
-block-obj-$(CONFIG_POSIX) += raw-posix.o
+block-obj-$(CONFIG_POSIX) += file-posix.o
 block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
 block-obj-y += null.o mirror.o commit.o io.o
 block-obj-y += throttle-groups.o
 block-obj-y += nbd.o nbd-client.o sheepdog.o
 block-obj-$(CONFIG_LIBISCSI) += iscsi.o
 block-obj-$(if $(CONFIG_LIBISCSI),y,n) += iscsi-opts.o
 block-obj-$(CONFIG_LIBNFS) += nfs.o
 block-obj-$(CONFIG_CURL) += curl.o
 block-obj-$(CONFIG_RBD) += rbd.o
--- a/block/backup.c
+++ b/block/backup.c
@@ -16,7 +16,7 @@
 #include "trace.h"
 #include "block/block.h"
 #include "block/block_int.h"
-#include "block/blockjob.h"
+#include "block/blockjob_int.h"
 #include "block/block_backup.h"
 #include "qapi/error.h"
 #include "qapi/qmp/qerror.h"
@@ -242,6 +242,14 @@ static void backup_abort(BlockJob *job)
    }
 }
 static void backup_clean(BlockJob *job)
 {
    BackupBlockJob *s = container_of(job, BackupBlockJob, common);
    assert(s->target);
    blk_unref(s->target);
    s->target = NULL;
 }
 static void backup_attached_aio_context(BlockJob *job, AioContext *aio_context)
 {
    BackupBlockJob *s = container_of(job, BackupBlockJob, common);
@@ -300,14 +308,20 @@ void backup_cow_request_end(CowRequest *req)
    cow_request_end(req);
 }
-static const BlockJobDriver backup_job_driver = {
+static void backup_drain(BlockJob *job)
-    .instance_size          = sizeof(BackupBlockJob),
+{
-    .job_type               = BLOCK_JOB_TYPE_BACKUP,
+    BackupBlockJob *s = container_of(job, BackupBlockJob, common);
-    .set_speed              = backup_set_speed,
+
-    .commit                 = backup_commit,
+    /* Need to keep a reference in case blk_drain triggers execution
-    .abort                  = backup_abort,
+     * of backup_complete...
-    .attached_aio_context   = backup_attached_aio_context,
+     */
-};
+    if (s->target) {
        BlockBackend *target = s->target;
        blk_ref(target);
        blk_drain(target);
        blk_unref(target);
    }
 }
 static BlockErrorAction backup_error_action(BackupBlockJob *job,
                                            bool read, int error)
@@ -327,11 +341,8 @@ typedef struct {
 static void backup_complete(BlockJob *job, void *opaque)
 {
    BackupBlockJob *s = container_of(job, BackupBlockJob, common);
    BackupCompleteData *data = opaque;
    blk_unref(s->target);
    block_job_completed(job, data->ret);
    g_free(data);
 }
@@ -372,14 +383,14 @@ static int coroutine_fn backup_run_incremental(BackupBlockJob *job)
    int64_t end;
    int64_t last_cluster = -1;
    int64_t sectors_per_cluster = cluster_size_sectors(job);
-    HBitmapIter hbi;
+    BdrvDirtyBitmapIter *dbi;
    granularity = bdrv_dirty_bitmap_granularity(job->sync_bitmap);
    clusters_per_iter = MAX((granularity / job->cluster_size), 1);
-    bdrv_dirty_iter_init(job->sync_bitmap, &hbi);
+    dbi = bdrv_dirty_iter_new(job->sync_bitmap, 0);
    /* Find the next dirty sector(s) */
-    while ((sector = hbitmap_iter_next(&hbi)) != -1) {
+    while ((sector = bdrv_dirty_iter_next(dbi)) != -1) {
        cluster = sector / sectors_per_cluster;
        /* Fake progress updates for any clusters we skipped */
@@ -391,7 +402,7 @@ static int coroutine_fn backup_run_incremental(BackupBlockJob *job)
        for (end = cluster + clusters_per_iter; cluster < end; cluster++) {
            do {
                if (yield_and_check(job)) {
-                    return ret;
+                    goto out;
                }
                ret = backup_do_cow(job, cluster * sectors_per_cluster,
                                    sectors_per_cluster, &error_is_read,
@@ -399,7 +410,7 @@ static int coroutine_fn backup_run_incremental(BackupBlockJob *job)
                if ((ret < 0) &&
                    backup_error_action(job, error_is_read, -ret) ==
                    BLOCK_ERROR_ACTION_REPORT) {
-                    return ret;
+                    goto out;
                }
            } while (ret < 0);
        }
@@ -407,7 +418,7 @@ static int coroutine_fn backup_run_incremental(BackupBlockJob *job)
        /* If the bitmap granularity is smaller than the backup granularity,
         * we need to advance the iterator pointer to the next cluster. */
        if (granularity < job->cluster_size) {
-            bdrv_set_dirty_iter(&hbi, cluster * sectors_per_cluster);
+            bdrv_set_dirty_iter(dbi, cluster * sectors_per_cluster);
        }
        last_cluster = cluster - 1;
@@ -419,6 +430,8 @@ static int coroutine_fn backup_run_incremental(BackupBlockJob *job)
        job->common.offset += ((end - last_cluster - 1) * job->cluster_size);
    }
 out:
    bdrv_dirty_iter_free(dbi);
    return ret;
 }
@@ -427,7 +440,6 @@ static void coroutine_fn backup_run(void *opaque)
    BackupBlockJob *job = opaque;
    BackupCompleteData *data;
    BlockDriverState *bs = blk_bs(job->common.blk);
    BlockBackend *target = job->target;
    int64_t start, end;
    int64_t sectors_per_cluster = cluster_size_sectors(job);
    int ret = 0;
@@ -514,19 +526,30 @@ static void coroutine_fn backup_run(void *opaque)
    qemu_co_rwlock_unlock(&job->flush_rwlock);
    g_free(job->done_bitmap);
    bdrv_op_unblock_all(blk_bs(target), job->common.blocker);
    data = g_malloc(sizeof(*data));
    data->ret = ret;
    block_job_defer_to_main_loop(&job->common, backup_complete, data);
 }
-void backup_start(const char *job_id, BlockDriverState *bs,
+static const BlockJobDriver backup_job_driver = {
    .instance_size          = sizeof(BackupBlockJob),
    .job_type               = BLOCK_JOB_TYPE_BACKUP,
    .start                  = backup_run,
    .set_speed              = backup_set_speed,
    .commit                 = backup_commit,
    .abort                  = backup_abort,
    .clean                  = backup_clean,
    .attached_aio_context   = backup_attached_aio_context,
    .drain                  = backup_drain,
 };
 BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
                  BlockDriverState *target, int64_t speed,
                  MirrorSyncMode sync_mode, BdrvDirtyBitmap *sync_bitmap,
                  bool compress,
                  BlockdevOnError on_source_error,
                  BlockdevOnError on_target_error,
                  int creation_flags,
                  BlockCompletionFunc *cb, void *opaque,
                  BlockJobTxn *txn, Error **errp)
 {
@@ -540,52 +563,52 @@ void backup_start(const char *job_id, BlockDriverState *bs,
    if (bs == target) {
        error_setg(errp, "Source and target cannot be the same");
-        return;
+        return NULL;
    }
    if (!bdrv_is_inserted(bs)) {
        error_setg(errp, "Device is not inserted: %s",
                   bdrv_get_device_name(bs));
-        return;
+        return NULL;
    }
    if (!bdrv_is_inserted(target)) {
        error_setg(errp, "Device is not inserted: %s",
                   bdrv_get_device_name(target));
-        return;
+        return NULL;
    }
    if (compress && target->drv->bdrv_co_pwritev_compressed == NULL) {
        error_setg(errp, "Compression is not supported for this drive %s",
                   bdrv_get_device_name(target));
-        return;
+        return NULL;
    }
    if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_BACKUP_SOURCE, errp)) {
-        return;
+        return NULL;
    }
    if (bdrv_op_is_blocked(target, BLOCK_OP_TYPE_BACKUP_TARGET, errp)) {
-        return;
+        return NULL;
    }
    if (sync_mode == MIRROR_SYNC_MODE_INCREMENTAL) {
        if (!sync_bitmap) {
            error_setg(errp, "must provide a valid bitmap name for "
                             "\"incremental\" sync mode");
-            return;
+            return NULL;
        }
        /* Create a new bitmap, and freeze/disable this one. */
        if (bdrv_dirty_bitmap_create_successor(bs, sync_bitmap, errp) < 0) {
-            return;
+            return NULL;
        }
    } else if (sync_bitmap) {
        error_setg(errp,
                   "a sync_bitmap was provided to backup_run, "
                   "but received an incompatible sync_mode (%s)",
                   MirrorSyncMode_lookup[sync_mode]);
-        return;
+        return NULL;
    }
    len = bdrv_getlength(bs);
@@ -596,7 +619,7 @@ void backup_start(const char *job_id, BlockDriverState *bs,
    }
    job = block_job_create(job_id, &backup_job_driver, bs, speed,
-                           cb, opaque, errp);
+                           creation_flags, cb, opaque, errp);
    if (!job) {
        goto error;
    }
@@ -629,19 +652,20 @@ void backup_start(const char *job_id, BlockDriverState *bs,
        job->cluster_size = MAX(BACKUP_CLUSTER_SIZE_DEFAULT, bdi.cluster_size);
    }
-    bdrv_op_block_all(target, job->common.blocker);
+    block_job_add_bdrv(&job->common, target);
    job->common.len = len;
    job->common.co = qemu_coroutine_create(backup_run, job);
    block_job_txn_add_job(txn, &job->common);
-    qemu_coroutine_enter(job->common.co);
+
-    return;
+    return &job->common;
 error:
    if (sync_bitmap) {
        bdrv_reclaim_dirty_bitmap(bs, sync_bitmap, NULL);
    }
    if (job) {
-        blk_unref(job->target);
+        backup_clean(&job->common);
        block_job_unref(&job->common);
    }
    return NULL;
 }
--- a/block/blkdebug.c
+++ b/block/blkdebug.c
@@ -58,10 +58,6 @@ typedef struct BlkdebugSuspendedReq {
    QLIST_ENTRY(BlkdebugSuspendedReq) next;
 } BlkdebugSuspendedReq;
 static const AIOCBInfo blkdebug_aiocb_info = {
    .aiocb_size    = sizeof(BlkdebugAIOCB),
 };
 enum {
    ACTION_INJECT_ERROR,
    ACTION_SET_STATE,
@@ -77,7 +73,7 @@ typedef struct BlkdebugRule {
            int error;
            int immediately;
            int once;
-            int64_t sector;
+            int64_t offset;
        } inject;
        struct {
            int new_state;
@@ -174,6 +170,7 @@ static int add_rule(void *opaque, QemuOpts *opts, Error **errp)
    const char* event_name;
    BlkdebugEvent event;
    struct BlkdebugRule *rule;
    int64_t sector;
    /* Find the right event for the rule */
    event_name = qemu_opt_get(opts, "event");
@@ -200,7 +197,9 @@ static int add_rule(void *opaque, QemuOpts *opts, Error **errp)
        rule->options.inject.once  = qemu_opt_get_bool(opts, "once", 0);
        rule->options.inject.immediately =
            qemu_opt_get_bool(opts, "immediately", 0);
-        rule->options.inject.sector = qemu_opt_get_number(opts, "sector", -1);
+        sector = qemu_opt_get_number(opts, "sector", -1);
        rule->options.inject.offset =
            sector == -1 ? -1 : sector * BDRV_SECTOR_SIZE;
        break;
    case ACTION_SET_STATE:
@@ -408,17 +407,14 @@ out:
 static void error_callback_bh(void *opaque)
 {
-    struct BlkdebugAIOCB *acb = opaque;
+    Coroutine *co = opaque;
-    acb->common.cb(acb->common.opaque, acb->ret);
+    qemu_coroutine_enter(co);
    qemu_aio_unref(acb);
 }
-static BlockAIOCB *inject_error(BlockDriverState *bs,
+static int inject_error(BlockDriverState *bs, BlkdebugRule *rule)
    BlockCompletionFunc *cb, void *opaque, BlkdebugRule *rule)
 {
    BDRVBlkdebugState *s = bs->opaque;
    int error = rule->options.inject.error;
    struct BlkdebugAIOCB *acb;
    bool immediately = rule->options.inject.immediately;
    if (rule->options.inject.once) {
@@ -426,81 +422,79 @@ static BlockAIOCB *inject_error(BlockDriverState *bs,
        remove_rule(rule);
    }
-    if (immediately) {
+    if (!immediately) {
-        return NULL;
+        aio_bh_schedule_oneshot(bdrv_get_aio_context(bs), error_callback_bh,
                                qemu_coroutine_self());
        qemu_coroutine_yield();
    }
-    acb = qemu_aio_get(&blkdebug_aiocb_info, bs, cb, opaque);
+    return -error;
    acb->ret = -error;
    aio_bh_schedule_oneshot(bdrv_get_aio_context(bs), error_callback_bh, acb);
    return &acb->common;
 }
-static BlockAIOCB *blkdebug_aio_readv(BlockDriverState *bs,
+static int coroutine_fn
-    int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+blkdebug_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
-    BlockCompletionFunc *cb, void *opaque)
+                   QEMUIOVector *qiov, int flags)
 {
    BDRVBlkdebugState *s = bs->opaque;
    BlkdebugRule *rule = NULL;
    QSIMPLEQ_FOREACH(rule, &s->active_rules, active_next) {
-        if (rule->options.inject.sector == -1 ||
+        uint64_t inject_offset = rule->options.inject.offset;
-            (rule->options.inject.sector >= sector_num &&
+
-             rule->options.inject.sector < sector_num + nb_sectors)) {
+        if (inject_offset == -1 ||
            (inject_offset >= offset && inject_offset < offset + bytes))
        {
            break;
        }
    }
    if (rule && rule->options.inject.error) {
-        return inject_error(bs, cb, opaque, rule);
+        return inject_error(bs, rule);
    }
-    return bdrv_aio_readv(bs->file, sector_num, qiov, nb_sectors,
+    return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags);
                          cb, opaque);
 }
-static BlockAIOCB *blkdebug_aio_writev(BlockDriverState *bs,
+static int coroutine_fn
-    int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+blkdebug_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
-    BlockCompletionFunc *cb, void *opaque)
+                    QEMUIOVector *qiov, int flags)
 {
    BDRVBlkdebugState *s = bs->opaque;
    BlkdebugRule *rule = NULL;
    QSIMPLEQ_FOREACH(rule, &s->active_rules, active_next) {
-        if (rule->options.inject.sector == -1 ||
+        uint64_t inject_offset = rule->options.inject.offset;
-            (rule->options.inject.sector >= sector_num &&
+
-             rule->options.inject.sector < sector_num + nb_sectors)) {
+        if (inject_offset == -1 ||
            (inject_offset >= offset && inject_offset < offset + bytes))
        {
            break;
        }
    }
    if (rule && rule->options.inject.error) {
-        return inject_error(bs, cb, opaque, rule);
+        return inject_error(bs, rule);
    }
-    return bdrv_aio_writev(bs->file, sector_num, qiov, nb_sectors,
+    return bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
                           cb, opaque);
 }
-static BlockAIOCB *blkdebug_aio_flush(BlockDriverState *bs,
+static int blkdebug_co_flush(BlockDriverState *bs)
    BlockCompletionFunc *cb, void *opaque)
 {
    BDRVBlkdebugState *s = bs->opaque;
    BlkdebugRule *rule = NULL;
    QSIMPLEQ_FOREACH(rule, &s->active_rules, active_next) {
-        if (rule->options.inject.sector == -1) {
+        if (rule->options.inject.offset == -1) {
            break;
        }
    }
    if (rule && rule->options.inject.error) {
-        return inject_error(bs, cb, opaque, rule);
+        return inject_error(bs, rule);
    }
-    return bdrv_aio_flush(bs->file->bs, cb, opaque);
+    return bdrv_co_flush(bs->file->bs);
 }
@@ -752,9 +746,9 @@ static BlockDriver bdrv_blkdebug = {
    .bdrv_refresh_filename  = blkdebug_refresh_filename,
    .bdrv_refresh_limits    = blkdebug_refresh_limits,
-    .bdrv_aio_readv         = blkdebug_aio_readv,
+    .bdrv_co_preadv         = blkdebug_co_preadv,
-    .bdrv_aio_writev        = blkdebug_aio_writev,
+    .bdrv_co_pwritev        = blkdebug_co_pwritev,
-    .bdrv_aio_flush         = blkdebug_aio_flush,
+    .bdrv_co_flush_to_disk  = blkdebug_co_flush,
    .bdrv_debug_event           = blkdebug_debug_event,
    .bdrv_debug_breakpoint      = blkdebug_debug_breakpoint,
--- a/block/blkverify.c
+++ b/block/blkverify.c
@@ -19,38 +19,36 @@ typedef struct {
    BdrvChild *test_file;
 } BDRVBlkverifyState;
-typedef struct BlkverifyAIOCB BlkverifyAIOCB;
+typedef struct BlkverifyRequest {
-struct BlkverifyAIOCB {
+    Coroutine *co;
-    BlockAIOCB common;
+    BlockDriverState *bs;
    /* Request metadata */
    bool is_write;
-    int64_t sector_num;
+    uint64_t offset;
-    int nb_sectors;
+    uint64_t bytes;
    int flags;
    int (*request_fn)(BdrvChild *, int64_t, unsigned int, QEMUIOVector *,
                      BdrvRequestFlags);
    int ret;                    /* test image result */
    int raw_ret;                /* raw image result */
    int ret;                    /* first completed request's result */
    unsigned int done;          /* completion counter */
    QEMUIOVector *qiov;         /* user I/O vector */
-    QEMUIOVector raw_qiov;      /* cloned I/O vector for raw file */
+    QEMUIOVector *raw_qiov;     /* cloned I/O vector for raw file */
-    void *buf;                  /* buffer for raw file I/O */
+} BlkverifyRequest;
-    void (*verify)(BlkverifyAIOCB *acb);
+static void GCC_FMT_ATTR(2, 3) blkverify_err(BlkverifyRequest *r,
 };
 static const AIOCBInfo blkverify_aiocb_info = {
    .aiocb_size         = sizeof(BlkverifyAIOCB),
 };
 static void GCC_FMT_ATTR(2, 3) blkverify_err(BlkverifyAIOCB *acb,
                                             const char *fmt, ...)
 {
    va_list ap;
    va_start(ap, fmt);
-    fprintf(stderr, "blkverify: %s sector_num=%" PRId64 " nb_sectors=%d ",
+    fprintf(stderr, "blkverify: %s offset=%" PRId64 " bytes=%" PRId64 " ",
-            acb->is_write ? "write" : "read", acb->sector_num,
+            r->is_write ? "write" : "read", r->offset, r->bytes);
            acb->nb_sectors);
    vfprintf(stderr, fmt, ap);
    fprintf(stderr, "\n");
    va_end(ap);
@@ -166,113 +164,106 @@ static int64_t blkverify_getlength(BlockDriverState *bs)
    return bdrv_getlength(s->test_file->bs);
 }
-static BlkverifyAIOCB *blkverify_aio_get(BlockDriverState *bs, bool is_write,
+static void coroutine_fn blkverify_do_test_req(void *opaque)
                                         int64_t sector_num, QEMUIOVector *qiov,
                                         int nb_sectors,
                                         BlockCompletionFunc *cb,
                                         void *opaque)
 {
-    BlkverifyAIOCB *acb = qemu_aio_get(&blkverify_aiocb_info, bs, cb, opaque);
+    BlkverifyRequest *r = opaque;
    BDRVBlkverifyState *s = r->bs->opaque;
-    acb->is_write = is_write;
+    r->ret = r->request_fn(s->test_file, r->offset, r->bytes, r->qiov,
-    acb->sector_num = sector_num;
+                           r->flags);
-    acb->nb_sectors = nb_sectors;
+    r->done++;
-    acb->ret = -EINPROGRESS;
+    qemu_coroutine_enter_if_inactive(r->co);
    acb->done = 0;
    acb->qiov = qiov;
    acb->buf = NULL;
    acb->verify = NULL;
    return acb;
 }
-static void blkverify_aio_bh(void *opaque)
+static void coroutine_fn blkverify_do_raw_req(void *opaque)
 {
-    BlkverifyAIOCB *acb = opaque;
+    BlkverifyRequest *r = opaque;
-    if (acb->buf) {
+    r->raw_ret = r->request_fn(r->bs->file, r->offset, r->bytes, r->raw_qiov,
-        qemu_iovec_destroy(&acb->raw_qiov);
+                               r->flags);
-        qemu_vfree(acb->buf);
+    r->done++;
    qemu_coroutine_enter_if_inactive(r->co);
 }
 static int coroutine_fn
 blkverify_co_prwv(BlockDriverState *bs, BlkverifyRequest *r, uint64_t offset,
                  uint64_t bytes, QEMUIOVector *qiov, QEMUIOVector *raw_qiov,
                  int flags, bool is_write)
 {
    Coroutine *co_a, *co_b;
    *r = (BlkverifyRequest) {
        .co         = qemu_coroutine_self(),
        .bs         = bs,
        .offset     = offset,
        .bytes      = bytes,
        .qiov       = qiov,
        .raw_qiov   = raw_qiov,
        .flags      = flags,
        .is_write   = is_write,
        .request_fn = is_write ? bdrv_co_pwritev : bdrv_co_preadv,
    };
    co_a = qemu_coroutine_create(blkverify_do_test_req, r);
    co_b = qemu_coroutine_create(blkverify_do_raw_req, r);
    qemu_coroutine_enter(co_a);
    qemu_coroutine_enter(co_b);
    while (r->done < 2) {
        qemu_coroutine_yield();
    }
    acb->common.cb(acb->common.opaque, acb->ret);
    qemu_aio_unref(acb);
 }
-static void blkverify_aio_cb(void *opaque, int ret)
+    if (r->ret != r->raw_ret) {
-{
+        blkverify_err(r, "return value mismatch %d != %d", r->ret, r->raw_ret);
    BlkverifyAIOCB *acb = opaque;
    switch (++acb->done) {
    case 1:
        acb->ret = ret;
        break;
    case 2:
        if (acb->ret != ret) {
            blkverify_err(acb, "return value mismatch %d != %d", acb->ret, ret);
        }
        if (acb->verify) {
            acb->verify(acb);
        }
        aio_bh_schedule_oneshot(bdrv_get_aio_context(acb->common.bs),
                                blkverify_aio_bh, acb);
        break;
    }
    return r->ret;
 }
-static void blkverify_verify_readv(BlkverifyAIOCB *acb)
+static int coroutine_fn
 blkverify_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
                    QEMUIOVector *qiov, int flags)
 {
-    ssize_t offset = qemu_iovec_compare(acb->qiov, &acb->raw_qiov);
+    BlkverifyRequest r;
-    if (offset != -1) {
+    QEMUIOVector raw_qiov;
-        blkverify_err(acb, "contents mismatch in sector %" PRId64,
+    void *buf;
-                      acb->sector_num + (int64_t)(offset / BDRV_SECTOR_SIZE));
+    ssize_t cmp_offset;
    int ret;
    buf = qemu_blockalign(bs->file->bs, qiov->size);
    qemu_iovec_init(&raw_qiov, qiov->niov);
    qemu_iovec_clone(&raw_qiov, qiov, buf);
    ret = blkverify_co_prwv(bs, &r, offset, bytes, qiov, &raw_qiov, flags,
                            false);
    cmp_offset = qemu_iovec_compare(qiov, &raw_qiov);
    if (cmp_offset != -1) {
        blkverify_err(&r, "contents mismatch at offset %" PRId64,
                      offset + cmp_offset);
    }
    qemu_iovec_destroy(&raw_qiov);
    qemu_vfree(buf);
    return ret;
 }
-static BlockAIOCB *blkverify_aio_readv(BlockDriverState *bs,
+static int coroutine_fn
-        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+blkverify_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
-        BlockCompletionFunc *cb, void *opaque)
+                     QEMUIOVector *qiov, int flags)
 {
-    BDRVBlkverifyState *s = bs->opaque;
+    BlkverifyRequest r;
-    BlkverifyAIOCB *acb = blkverify_aio_get(bs, false, sector_num, qiov,
+    return blkverify_co_prwv(bs, &r, offset, bytes, qiov, qiov, flags, true);
                                            nb_sectors, cb, opaque);
    acb->verify = blkverify_verify_readv;
    acb->buf = qemu_blockalign(bs->file->bs, qiov->size);
    qemu_iovec_init(&acb->raw_qiov, acb->qiov->niov);
    qemu_iovec_clone(&acb->raw_qiov, qiov, acb->buf);
    bdrv_aio_readv(s->test_file, sector_num, qiov, nb_sectors,
                   blkverify_aio_cb, acb);
    bdrv_aio_readv(bs->file, sector_num, &acb->raw_qiov, nb_sectors,
                   blkverify_aio_cb, acb);
    return &acb->common;
 }
-static BlockAIOCB *blkverify_aio_writev(BlockDriverState *bs,
+static int blkverify_co_flush(BlockDriverState *bs)
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
        BlockCompletionFunc *cb, void *opaque)
 {
    BDRVBlkverifyState *s = bs->opaque;
    BlkverifyAIOCB *acb = blkverify_aio_get(bs, true, sector_num, qiov,
                                            nb_sectors, cb, opaque);
    bdrv_aio_writev(s->test_file, sector_num, qiov, nb_sectors,
                    blkverify_aio_cb, acb);
    bdrv_aio_writev(bs->file, sector_num, qiov, nb_sectors,
                    blkverify_aio_cb, acb);
    return &acb->common;
 }
 static BlockAIOCB *blkverify_aio_flush(BlockDriverState *bs,
                                       BlockCompletionFunc *cb,
                                       void *opaque)
 {
    BDRVBlkverifyState *s = bs->opaque;
    /* Only flush test file, the raw file is not important */
-    return bdrv_aio_flush(s->test_file->bs, cb, opaque);
+    return bdrv_co_flush(s->test_file->bs);
 }
 static bool blkverify_recurse_is_first_non_filter(BlockDriverState *bs,
@@ -332,9 +323,9 @@ static BlockDriver bdrv_blkverify = {
    .bdrv_getlength                   = blkverify_getlength,
    .bdrv_refresh_filename            = blkverify_refresh_filename,
-    .bdrv_aio_readv                   = blkverify_aio_readv,
+    .bdrv_co_preadv                   = blkverify_co_preadv,
-    .bdrv_aio_writev                  = blkverify_aio_writev,
+    .bdrv_co_pwritev                  = blkverify_co_pwritev,
-    .bdrv_aio_flush                   = blkverify_aio_flush,
+    .bdrv_co_flush                    = blkverify_co_flush,
    .is_filter                        = true,
    .bdrv_recurse_is_first_non_filter = blkverify_recurse_is_first_non_filter,
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -799,20 +799,25 @@ int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset,
                               BdrvRequestFlags flags)
 {
    int ret;
    BlockDriverState *bs = blk_bs(blk);
-    trace_blk_co_preadv(blk, blk_bs(blk), offset, bytes, flags);
+    trace_blk_co_preadv(blk, bs, offset, bytes, flags);
    ret = blk_check_byte_request(blk, offset, bytes);
    if (ret < 0) {
        return ret;
    }
    bdrv_inc_in_flight(bs);
    /* throttling disk I/O */
    if (blk->public.throttle_state) {
        throttle_group_co_io_limits_intercept(blk, bytes, false);
    }
-    return bdrv_co_preadv(blk->root, offset, bytes, qiov, flags);
+    ret = bdrv_co_preadv(blk->root, offset, bytes, qiov, flags);
    bdrv_dec_in_flight(bs);
    return ret;
 }
 int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset,
@@ -820,14 +825,17 @@ int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset,
                                BdrvRequestFlags flags)
 {
    int ret;
    BlockDriverState *bs = blk_bs(blk);
-    trace_blk_co_pwritev(blk, blk_bs(blk), offset, bytes, flags);
+    trace_blk_co_pwritev(blk, bs, offset, bytes, flags);
    ret = blk_check_byte_request(blk, offset, bytes);
    if (ret < 0) {
        return ret;
    }
    bdrv_inc_in_flight(bs);
    /* throttling disk I/O */
    if (blk->public.throttle_state) {
        throttle_group_co_io_limits_intercept(blk, bytes, true);
@@ -837,7 +845,9 @@ int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset,
        flags |= BDRV_REQ_FUA;
    }
-    return bdrv_co_pwritev(blk->root, offset, bytes, qiov, flags);
+    ret = bdrv_co_pwritev(blk->root, offset, bytes, qiov, flags);
    bdrv_dec_in_flight(bs);
    return ret;
 }
 typedef struct BlkRwCo {
@@ -868,7 +878,6 @@ static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf,
                   int64_t bytes, CoroutineEntry co_entry,
                   BdrvRequestFlags flags)
 {
    AioContext *aio_context;
    QEMUIOVector qiov;
    struct iovec iov;
    Coroutine *co;
@@ -890,11 +899,7 @@ static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf,
    co = qemu_coroutine_create(co_entry, &rwco);
    qemu_coroutine_enter(co);
-
+    BDRV_POLL_WHILE(blk_bs(blk), rwco.ret == NOT_DONE);
    aio_context = blk_get_aio_context(blk);
    while (rwco.ret == NOT_DONE) {
        aio_poll(aio_context, true);
    }
    return rwco.ret;
 }
@@ -930,6 +935,8 @@ int blk_make_zero(BlockBackend *blk, BdrvRequestFlags flags)
 static void error_callback_bh(void *opaque)
 {
    struct BlockBackendAIOCB *acb = opaque;
    bdrv_dec_in_flight(acb->common.bs);
    acb->common.cb(acb->common.opaque, acb->ret);
    qemu_aio_unref(acb);
 }
@@ -940,6 +947,7 @@ BlockAIOCB *blk_abort_aio_request(BlockBackend *blk,
 {
    struct BlockBackendAIOCB *acb;
    bdrv_inc_in_flight(blk_bs(blk));
    acb = blk_aio_get(&block_backend_aiocb_info, blk, cb, opaque);
    acb->blk = blk;
    acb->ret = ret;
@@ -962,6 +970,7 @@ static const AIOCBInfo blk_aio_em_aiocb_info = {
 static void blk_aio_complete(BlkAioEmAIOCB *acb)
 {
    if (acb->has_returned) {
        bdrv_dec_in_flight(acb->common.bs);
        acb->common.cb(acb->common.opaque, acb->rwco.ret);
        qemu_aio_unref(acb);
    }
@@ -983,6 +992,7 @@ static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, int bytes,
    BlkAioEmAIOCB *acb;
    Coroutine *co;
    bdrv_inc_in_flight(blk_bs(blk));
    acb = blk_aio_get(&blk_aio_em_aiocb_info, blk, cb, opaque);
    acb->rwco = (BlkRwCo) {
        .blk    = blk,
@@ -1099,26 +1109,36 @@ BlockAIOCB *blk_aio_pwritev(BlockBackend *blk, int64_t offset,
                        blk_aio_write_entry, flags, cb, opaque);
 }
 static void blk_aio_flush_entry(void *opaque)
 {
    BlkAioEmAIOCB *acb = opaque;
    BlkRwCo *rwco = &acb->rwco;
    rwco->ret = blk_co_flush(rwco->blk);
    blk_aio_complete(acb);
 }
 BlockAIOCB *blk_aio_flush(BlockBackend *blk,
                          BlockCompletionFunc *cb, void *opaque)
 {
-    if (!blk_is_available(blk)) {
+    return blk_aio_prwv(blk, 0, 0, NULL, blk_aio_flush_entry, 0, cb, opaque);
-        return blk_abort_aio_request(blk, cb, opaque, -ENOMEDIUM);
+}
    }
-    return bdrv_aio_flush(blk_bs(blk), cb, opaque);
+static void blk_aio_pdiscard_entry(void *opaque)
 {
    BlkAioEmAIOCB *acb = opaque;
    BlkRwCo *rwco = &acb->rwco;
    rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, acb->bytes);
    blk_aio_complete(acb);
 }
 BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk,
                             int64_t offset, int count,
                             BlockCompletionFunc *cb, void *opaque)
 {
-    int ret = blk_check_byte_request(blk, offset, count);
+    return blk_aio_prwv(blk, offset, count, NULL, blk_aio_pdiscard_entry, 0,
-    if (ret < 0) {
+                        cb, opaque);
        return blk_abort_aio_request(blk, cb, opaque, ret);
    }
    return bdrv_aio_pdiscard(blk_bs(blk), offset, count, cb, opaque);
 }
 void blk_aio_cancel(BlockAIOCB *acb)
@@ -1131,23 +1151,50 @@ void blk_aio_cancel_async(BlockAIOCB *acb)
    bdrv_aio_cancel_async(acb);
 }
-int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
+int blk_co_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
 {
    if (!blk_is_available(blk)) {
        return -ENOMEDIUM;
    }
-    return bdrv_ioctl(blk_bs(blk), req, buf);
+    return bdrv_co_ioctl(blk_bs(blk), req, buf);
 }
 static void blk_ioctl_entry(void *opaque)
 {
    BlkRwCo *rwco = opaque;
    rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset,
                             rwco->qiov->iov[0].iov_base);
 }
 int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
 {
    return blk_prw(blk, req, buf, 0, blk_ioctl_entry, 0);
 }
 static void blk_aio_ioctl_entry(void *opaque)
 {
    BlkAioEmAIOCB *acb = opaque;
    BlkRwCo *rwco = &acb->rwco;
    rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset,
                             rwco->qiov->iov[0].iov_base);
    blk_aio_complete(acb);
 }
 BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf,
                          BlockCompletionFunc *cb, void *opaque)
 {
-    if (!blk_is_available(blk)) {
+    QEMUIOVector qiov;
-        return blk_abort_aio_request(blk, cb, opaque, -ENOMEDIUM);
+    struct iovec iov;
    }
-    return bdrv_aio_ioctl(blk_bs(blk), req, buf, cb, opaque);
+    iov = (struct iovec) {
        .iov_base = buf,
        .iov_len = 0,
    };
    qemu_iovec_init_external(&qiov, &iov, 1);
    return blk_aio_prwv(blk, req, 0, &qiov, blk_aio_ioctl_entry, 0, cb, opaque);
 }
 int blk_co_pdiscard(BlockBackend *blk, int64_t offset, int count)
@@ -1169,13 +1216,15 @@ int blk_co_flush(BlockBackend *blk)
    return bdrv_co_flush(blk_bs(blk));
 }
 static void blk_flush_entry(void *opaque)
 {
    BlkRwCo *rwco = opaque;
    rwco->ret = blk_co_flush(rwco->blk);
 }
 int blk_flush(BlockBackend *blk)
 {
-    if (!blk_is_available(blk)) {
+    return blk_prw(blk, 0, NULL, 0, blk_flush_entry, 0);
        return -ENOMEDIUM;
    }
    return bdrv_flush(blk_bs(blk));
 }
 void blk_drain(BlockBackend *blk)
@@ -1344,13 +1393,14 @@ void blk_eject(BlockBackend *blk, bool eject_flag)
    if (bs) {
        bdrv_eject(bs, eject_flag);
        id = blk_get_attached_dev_id(blk);
        qapi_event_send_device_tray_moved(blk_name(blk), id,
                                          eject_flag, &error_abort);
        g_free(id);
    }
    /* Whether or not we ejected on the backend,
     * the frontend experienced a tray event. */
    id = blk_get_attached_dev_id(blk);
    qapi_event_send_device_tray_moved(blk_name(blk), id,
                                      eject_flag, &error_abort);
    g_free(id);
 }
 int blk_get_flags(BlockBackend *blk)
@@ -1555,14 +1605,15 @@ int blk_truncate(BlockBackend *blk, int64_t offset)
    return bdrv_truncate(blk_bs(blk), offset);
 }
 static void blk_pdiscard_entry(void *opaque)
 {
    BlkRwCo *rwco = opaque;
    rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, rwco->qiov->size);
 }
 int blk_pdiscard(BlockBackend *blk, int64_t offset, int count)
 {
-    int ret = blk_check_byte_request(blk, offset, count);
+    return blk_prw(blk, offset, NULL, count, blk_pdiscard_entry, 0);
    if (ret < 0) {
        return ret;
    }
    return bdrv_pdiscard(blk_bs(blk), offset, count);
 }
 int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf,
--- a/block/commit.c
+++ b/block/commit.c
@@ -15,7 +15,7 @@
 #include "qemu/osdep.h"
 #include "trace.h"
 #include "block/block_int.h"
-#include "block/blockjob.h"
+#include "block/blockjob_int.h"
 #include "qapi/error.h"
 #include "qapi/qmp/qerror.h"
 #include "qemu/ratelimit.h"
@@ -205,17 +205,19 @@ static const BlockJobDriver commit_job_driver = {
    .instance_size = sizeof(CommitBlockJob),
    .job_type      = BLOCK_JOB_TYPE_COMMIT,
    .set_speed     = commit_set_speed,
    .start         = commit_run,
 };
 void commit_start(const char *job_id, BlockDriverState *bs,
                  BlockDriverState *base, BlockDriverState *top, int64_t speed,
-                  BlockdevOnError on_error, BlockCompletionFunc *cb,
+                  BlockdevOnError on_error, const char *backing_file_str,
-                  void *opaque, const char *backing_file_str, Error **errp)
+                  Error **errp)
 {
    CommitBlockJob *s;
    BlockReopenQueue *reopen_queue = NULL;
    int orig_overlay_flags;
    int orig_base_flags;
    BlockDriverState *iter;
    BlockDriverState *overlay_bs;
    Error *local_err = NULL;
@@ -233,7 +235,7 @@ void commit_start(const char *job_id, BlockDriverState *bs,
    }
    s = block_job_create(job_id, &commit_job_driver, bs, speed,
-                         cb, opaque, errp);
+                         BLOCK_JOB_DEFAULT, NULL, NULL, errp);
    if (!s) {
        return;
    }
@@ -251,7 +253,7 @@ void commit_start(const char *job_id, BlockDriverState *bs,
                                         orig_overlay_flags | BDRV_O_RDWR);
    }
    if (reopen_queue) {
-        bdrv_reopen_multiple(reopen_queue, &local_err);
+        bdrv_reopen_multiple(bdrv_get_aio_context(bs), reopen_queue, &local_err);
        if (local_err != NULL) {
            error_propagate(errp, local_err);
            block_job_unref(&s->common);
@@ -260,6 +262,19 @@ void commit_start(const char *job_id, BlockDriverState *bs,
    }
    /* Block all nodes between top and base, because they will
     * disappear from the chain after this operation. */
    assert(bdrv_chain_contains(top, base));
    for (iter = top; iter != backing_bs(base); iter = backing_bs(iter)) {
        block_job_add_bdrv(&s->common, iter);
    }
    /* overlay_bs must be blocked because it needs to be modified to
     * update the backing image string, but if it's the root node then
     * don't block it again */
    if (bs != overlay_bs) {
        block_job_add_bdrv(&s->common, overlay_bs);
    }
    s->base = blk_new();
    blk_insert_bs(s->base, base);
@@ -274,10 +289,9 @@ void commit_start(const char *job_id, BlockDriverState *bs,
    s->backing_file_str = g_strdup(backing_file_str);
    s->on_error = on_error;
    s->common.co = qemu_coroutine_create(commit_run, s);
-    trace_commit_start(bs, base, top, s, s->common.co, opaque);
+    trace_commit_start(bs, base, top, s);
-    qemu_coroutine_enter(s->common.co);
+    block_job_start(&s->common);
 }
--- a/block/curl.c
+++ b/block/curl.c
@@ -68,12 +68,10 @@ static CURLMcode __curl_multi_socket_action(CURLM *multi_handle,
 #endif
 #define PROTOCOLS (CURLPROTO_HTTP | CURLPROTO_HTTPS | \
-                   CURLPROTO_FTP | CURLPROTO_FTPS | \
+                   CURLPROTO_FTP | CURLPROTO_FTPS)
                   CURLPROTO_TFTP)
 #define CURL_NUM_STATES 8
 #define CURL_NUM_ACB    8
 #define SECTOR_SIZE     512
 #define READ_AHEAD_DEFAULT (256 * 1024)
 #define CURL_TIMEOUT_DEFAULT 5
 #define CURL_TIMEOUT_MAX 10000
@@ -105,12 +103,17 @@ typedef struct CURLAIOCB {
    size_t end;
 } CURLAIOCB;
 typedef struct CURLSocket {
    int fd;
    QLIST_ENTRY(CURLSocket) next;
 } CURLSocket;
 typedef struct CURLState
 {
    struct BDRVCURLState *s;
    CURLAIOCB *acb[CURL_NUM_ACB];
    CURL *curl;
-    curl_socket_t sock_fd;
+    QLIST_HEAD(, CURLSocket) sockets;
    char *orig_buf;
    size_t buf_start;
    size_t buf_off;
@@ -164,27 +167,44 @@ static int curl_sock_cb(CURL *curl, curl_socket_t fd, int action,
 {
    BDRVCURLState *s;
    CURLState *state = NULL;
    CURLSocket *socket;
    curl_easy_getinfo(curl, CURLINFO_PRIVATE, (char **)&state);
    state->sock_fd = fd;
    s = state->s;
    QLIST_FOREACH(socket, &state->sockets, next) {
        if (socket->fd == fd) {
            if (action == CURL_POLL_REMOVE) {
                QLIST_REMOVE(socket, next);
                g_free(socket);
            }
            break;
        }
    }
    if (!socket) {
        socket = g_new0(CURLSocket, 1);
        socket->fd = fd;
        QLIST_INSERT_HEAD(&state->sockets, socket, next);
    }
    socket = NULL;
    DPRINTF("CURL (AIO): Sock action %d on fd %d\n", action, (int)fd);
    switch (action) {
        case CURL_POLL_IN:
            aio_set_fd_handler(s->aio_context, fd, false,
-                               curl_multi_read, NULL, state);
+                               curl_multi_read, NULL, NULL, state);
            break;
        case CURL_POLL_OUT:
            aio_set_fd_handler(s->aio_context, fd, false,
-                               NULL, curl_multi_do, state);
+                               NULL, curl_multi_do, NULL, state);
            break;
        case CURL_POLL_INOUT:
            aio_set_fd_handler(s->aio_context, fd, false,
-                               curl_multi_read, curl_multi_do, state);
+                               curl_multi_read, curl_multi_do, NULL, state);
            break;
        case CURL_POLL_REMOVE:
            aio_set_fd_handler(s->aio_context, fd, false,
-                               NULL, NULL, NULL);
+                               NULL, NULL, NULL, NULL);
            break;
    }
@@ -213,12 +233,13 @@ static size_t curl_read_cb(void *ptr, size_t size, size_t nmemb, void *opaque)
    DPRINTF("CURL: Just reading %zd bytes\n", realsize);
-    if (!s || !s->orig_buf)
+    if (!s || !s->orig_buf) {
-        return 0;
+        goto read_end;
    }
    if (s->buf_off >= s->buf_len) {
        /* buffer full, read nothing */
-        return 0;
+        goto read_end;
    }
    realsize = MIN(realsize, s->buf_len - s->buf_off);
    memcpy(s->orig_buf + s->buf_off, ptr, realsize);
@@ -231,15 +252,26 @@ static size_t curl_read_cb(void *ptr, size_t size, size_t nmemb, void *opaque)
            continue;
        if ((s->buf_off >= acb->end)) {
            size_t request_length = acb->nb_sectors * BDRV_SECTOR_SIZE;
            qemu_iovec_from_buf(acb->qiov, 0, s->orig_buf + acb->start,
                                acb->end - acb->start);
            if (acb->end - acb->start < request_length) {
                size_t offset = acb->end - acb->start;
                qemu_iovec_memset(acb->qiov, offset, 0,
                                  request_length - offset);
            }
            acb->common.cb(acb->common.opaque, 0);
            qemu_aio_unref(acb);
            s->acb[i] = NULL;
        }
    }
-    return realsize;
+read_end:
    /* curl will error out if we do not return this value */
    return size * nmemb;
 }
 static int curl_find_buf(BDRVCURLState *s, size_t start, size_t len,
@@ -247,6 +279,8 @@ static int curl_find_buf(BDRVCURLState *s, size_t start, size_t len,
 {
    int i;
    size_t end = start + len;
    size_t clamped_end = MIN(end, s->len);
    size_t clamped_len = clamped_end - start;
    for (i=0; i<CURL_NUM_STATES; i++) {
        CURLState *state = &s->states[i];
@@ -261,12 +295,15 @@ static int curl_find_buf(BDRVCURLState *s, size_t start, size_t len,
        // Does the existing buffer cover our section?
        if ((start >= state->buf_start) &&
            (start <= buf_end) &&
-            (end >= state->buf_start) &&
+            (clamped_end >= state->buf_start) &&
-            (end <= buf_end))
+            (clamped_end <= buf_end))
        {
            char *buf = state->orig_buf + (start - state->buf_start);
-            qemu_iovec_from_buf(acb->qiov, 0, buf, len);
+            qemu_iovec_from_buf(acb->qiov, 0, buf, clamped_len);
            if (clamped_len < len) {
                qemu_iovec_memset(acb->qiov, clamped_len, 0, len - clamped_len);
            }
            acb->common.cb(acb->common.opaque, 0);
            return FIND_RET_OK;
@@ -276,13 +313,13 @@ static int curl_find_buf(BDRVCURLState *s, size_t start, size_t len,
        if (state->in_use &&
            (start >= state->buf_start) &&
            (start <= buf_fend) &&
-            (end >= state->buf_start) &&
+            (clamped_end >= state->buf_start) &&
-            (end <= buf_fend))
+            (clamped_end <= buf_fend))
        {
            int j;
            acb->start = start - state->buf_start;
-            acb->end = acb->start + len;
+            acb->end = acb->start + clamped_len;
            for (j=0; j<CURL_NUM_ACB; j++) {
                if (!state->acb[j]) {
@@ -352,6 +389,7 @@ static void curl_multi_check_completion(BDRVCURLState *s)
 static void curl_multi_do(void *arg)
 {
    CURLState *s = (CURLState *)arg;
    CURLSocket *socket, *next_socket;
    int running;
    int r;
@@ -359,10 +397,13 @@ static void curl_multi_do(void *arg)
        return;
    }
-    do {
+    /* Need to use _SAFE because curl_multi_socket_action() may trigger
-        r = curl_multi_socket_action(s->s->multi, s->sock_fd, 0, &running);
+     * curl_sock_cb() which might modify this list */
-    } while(r == CURLM_CALL_MULTI_PERFORM);
+    QLIST_FOREACH_SAFE(socket, &s->sockets, next, next_socket) {
-
+        do {
            r = curl_multi_socket_action(s->s->multi, socket->fd, 0, &running);
        } while (r == CURLM_CALL_MULTI_PERFORM);
    }
 }
 static void curl_multi_read(void *arg)
@@ -466,6 +507,7 @@ static CURLState *curl_init_state(BlockDriverState *bs, BDRVCURLState *s)
 #endif
    }
    QLIST_INIT(&state->sockets);
    state->s = s;
    return state;
@@ -475,6 +517,14 @@ static void curl_clean_state(CURLState *s)
 {
    if (s->s->multi)
        curl_multi_remove_handle(s->s->multi, s->curl);
    while (!QLIST_EMPTY(&s->sockets)) {
        CURLSocket *socket = QLIST_FIRST(&s->sockets);
        QLIST_REMOVE(socket, next);
        g_free(socket);
    }
    s->in_use = 0;
 }
@@ -738,12 +788,12 @@ static void curl_readv_bh_cb(void *p)
    CURLAIOCB *acb = p;
    BDRVCURLState *s = acb->common.bs->opaque;
-    size_t start = acb->sector_num * SECTOR_SIZE;
+    size_t start = acb->sector_num * BDRV_SECTOR_SIZE;
    size_t end;
    // In case we have the requested data already (e.g. read-ahead),
    // we can just call the callback and be done.
-    switch (curl_find_buf(s, start, acb->nb_sectors * SECTOR_SIZE, acb)) {
+    switch (curl_find_buf(s, start, acb->nb_sectors * BDRV_SECTOR_SIZE, acb)) {
        case FIND_RET_OK:
            qemu_aio_unref(acb);
            // fall through
@@ -762,13 +812,13 @@ static void curl_readv_bh_cb(void *p)
    }
    acb->start = 0;
-    acb->end = (acb->nb_sectors * SECTOR_SIZE);
+    acb->end = MIN(acb->nb_sectors * BDRV_SECTOR_SIZE, s->len - start);
    state->buf_off = 0;
    g_free(state->orig_buf);
    state->buf_start = start;
-    state->buf_len = acb->end + s->readahead_size;
+    state->buf_len = MIN(acb->end + s->readahead_size, s->len - start);
-    end = MIN(start + state->buf_len, s->len) - 1;
+    end = start + state->buf_len - 1;
    state->orig_buf = g_try_malloc(state->buf_len);
    if (state->buf_len && state->orig_buf == NULL) {
        curl_clean_state(state);
@@ -779,8 +829,8 @@ static void curl_readv_bh_cb(void *p)
    state->acb[0] = acb;
    snprintf(state->range, 127, "%zd-%zd", start, end);
-    DPRINTF("CURL (AIO): Reading %d at %zd (%s)\n",
+    DPRINTF("CURL (AIO): Reading %llu at %zd (%s)\n",
-            (acb->nb_sectors * SECTOR_SIZE), start, state->range);
+            (acb->nb_sectors * BDRV_SECTOR_SIZE), start, state->range);
    curl_easy_setopt(state->curl, CURLOPT_RANGE, state->range);
    curl_multi_add_handle(s->multi, state->curl);
@@ -886,29 +936,12 @@ static BlockDriver bdrv_ftps = {
    .bdrv_attach_aio_context    = curl_attach_aio_context,
 };
 static BlockDriver bdrv_tftp = {
    .format_name                = "tftp",
    .protocol_name              = "tftp",
    .instance_size              = sizeof(BDRVCURLState),
    .bdrv_parse_filename        = curl_parse_filename,
    .bdrv_file_open             = curl_open,
    .bdrv_close                 = curl_close,
    .bdrv_getlength             = curl_getlength,
    .bdrv_aio_readv             = curl_aio_readv,
    .bdrv_detach_aio_context    = curl_detach_aio_context,
    .bdrv_attach_aio_context    = curl_attach_aio_context,
 };
 static void curl_block_init(void)
 {
    bdrv_register(&bdrv_http);
    bdrv_register(&bdrv_https);
    bdrv_register(&bdrv_ftp);
    bdrv_register(&bdrv_ftps);
    bdrv_register(&bdrv_tftp);
 }
 block_init(curl_block_init);
--- a/block/dirty-bitmap.c
+++ b/block/dirty-bitmap.c
@@ -38,13 +38,20 @@
 */
 struct BdrvDirtyBitmap {
    HBitmap *bitmap;            /* Dirty sector bitmap implementation */
    HBitmap *meta;              /* Meta dirty bitmap */
    BdrvDirtyBitmap *successor; /* Anonymous child; implies frozen status */
    char *name;                 /* Optional non-empty unique ID */
    int64_t size;               /* Size of the bitmap (Number of sectors) */
    bool disabled;              /* Bitmap is read-only */
    int active_iterators;       /* How many iterators are active */
    QLIST_ENTRY(BdrvDirtyBitmap) list;
 };
 struct BdrvDirtyBitmapIter {
    HBitmapIter hbi;
    BdrvDirtyBitmap *bitmap;
 };
 BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, const char *name)
 {
    BdrvDirtyBitmap *bm;
@@ -97,6 +104,66 @@ BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs,
    return bitmap;
 }
 /* bdrv_create_meta_dirty_bitmap
 *
 * Create a meta dirty bitmap that tracks the changes of bits in @bitmap. I.e.
 * when a dirty status bit in @bitmap is changed (either from reset to set or
 * the other way around), its respective meta dirty bitmap bit will be marked
 * dirty as well.
 *
 * @bitmap: the block dirty bitmap for which to create a meta dirty bitmap.
 * @chunk_size: how many bytes of bitmap data does each bit in the meta bitmap
 * track.
 */
 void bdrv_create_meta_dirty_bitmap(BdrvDirtyBitmap *bitmap,
                                   int chunk_size)
 {
    assert(!bitmap->meta);
    bitmap->meta = hbitmap_create_meta(bitmap->bitmap,
                                       chunk_size * BITS_PER_BYTE);
 }
 void bdrv_release_meta_dirty_bitmap(BdrvDirtyBitmap *bitmap)
 {
    assert(bitmap->meta);
    hbitmap_free_meta(bitmap->bitmap);
    bitmap->meta = NULL;
 }
 int bdrv_dirty_bitmap_get_meta(BlockDriverState *bs,
                               BdrvDirtyBitmap *bitmap, int64_t sector,
                               int nb_sectors)
 {
    uint64_t i;
    int sectors_per_bit = 1 << hbitmap_granularity(bitmap->meta);
    /* To optimize: we can make hbitmap to internally check the range in a
     * coarse level, or at least do it word by word. */
    for (i = sector; i < sector + nb_sectors; i += sectors_per_bit) {
        if (hbitmap_get(bitmap->meta, i)) {
            return true;
        }
    }
    return false;
 }
 void bdrv_dirty_bitmap_reset_meta(BlockDriverState *bs,
                                  BdrvDirtyBitmap *bitmap, int64_t sector,
                                  int nb_sectors)
 {
    hbitmap_reset(bitmap->meta, sector, nb_sectors);
 }
 int64_t bdrv_dirty_bitmap_size(const BdrvDirtyBitmap *bitmap)
 {
    return bitmap->size;
 }
 const char *bdrv_dirty_bitmap_name(const BdrvDirtyBitmap *bitmap)
 {
    return bitmap->name;
 }
 bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap)
 {
    return bitmap->successor;
@@ -212,6 +279,7 @@ void bdrv_dirty_bitmap_truncate(BlockDriverState *bs)
    QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
        assert(!bdrv_dirty_bitmap_frozen(bitmap));
        assert(!bitmap->active_iterators);
        hbitmap_truncate(bitmap->bitmap, size);
        bitmap->size = size;
    }
@@ -224,7 +292,9 @@ static void bdrv_do_release_matching_dirty_bitmap(BlockDriverState *bs,
    BdrvDirtyBitmap *bm, *next;
    QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) {
        if ((!bitmap || bm == bitmap) && (!only_named || bm->name)) {
            assert(!bm->active_iterators);
            assert(!bdrv_dirty_bitmap_frozen(bm));
            assert(!bm->meta);
            QLIST_REMOVE(bm, list);
            hbitmap_free(bm->bitmap);
            g_free(bm->name);
@@ -235,6 +305,9 @@ static void bdrv_do_release_matching_dirty_bitmap(BlockDriverState *bs,
            }
        }
    }
    if (bitmap) {
        abort();
    }
 }
 void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
@@ -320,9 +393,43 @@ uint32_t bdrv_dirty_bitmap_granularity(BdrvDirtyBitmap *bitmap)
    return BDRV_SECTOR_SIZE << hbitmap_granularity(bitmap->bitmap);
 }
-void bdrv_dirty_iter_init(BdrvDirtyBitmap *bitmap, HBitmapIter *hbi)
+uint32_t bdrv_dirty_bitmap_meta_granularity(BdrvDirtyBitmap *bitmap)
 {
-    hbitmap_iter_init(hbi, bitmap->bitmap, 0);
+    return BDRV_SECTOR_SIZE << hbitmap_granularity(bitmap->meta);
 }
 BdrvDirtyBitmapIter *bdrv_dirty_iter_new(BdrvDirtyBitmap *bitmap,
                                         uint64_t first_sector)
 {
    BdrvDirtyBitmapIter *iter = g_new(BdrvDirtyBitmapIter, 1);
    hbitmap_iter_init(&iter->hbi, bitmap->bitmap, first_sector);
    iter->bitmap = bitmap;
    bitmap->active_iterators++;
    return iter;
 }
 BdrvDirtyBitmapIter *bdrv_dirty_meta_iter_new(BdrvDirtyBitmap *bitmap)
 {
    BdrvDirtyBitmapIter *iter = g_new(BdrvDirtyBitmapIter, 1);
    hbitmap_iter_init(&iter->hbi, bitmap->meta, 0);
    iter->bitmap = bitmap;
    bitmap->active_iterators++;
    return iter;
 }
 void bdrv_dirty_iter_free(BdrvDirtyBitmapIter *iter)
 {
    if (!iter) {
        return;
    }
    assert(iter->bitmap->active_iterators > 0);
    iter->bitmap->active_iterators--;
    g_free(iter);
 }
 int64_t bdrv_dirty_iter_next(BdrvDirtyBitmapIter *iter)
 {
    return hbitmap_iter_next(&iter->hbi);
 }
 void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap,
@@ -360,6 +467,43 @@ void bdrv_undo_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *in)
    hbitmap_free(tmp);
 }
 uint64_t bdrv_dirty_bitmap_serialization_size(const BdrvDirtyBitmap *bitmap,
                                              uint64_t start, uint64_t count)
 {
    return hbitmap_serialization_size(bitmap->bitmap, start, count);
 }
 uint64_t bdrv_dirty_bitmap_serialization_align(const BdrvDirtyBitmap *bitmap)
 {
    return hbitmap_serialization_granularity(bitmap->bitmap);
 }
 void bdrv_dirty_bitmap_serialize_part(const BdrvDirtyBitmap *bitmap,
                                      uint8_t *buf, uint64_t start,
                                      uint64_t count)
 {
    hbitmap_serialize_part(bitmap->bitmap, buf, start, count);
 }
 void bdrv_dirty_bitmap_deserialize_part(BdrvDirtyBitmap *bitmap,
                                        uint8_t *buf, uint64_t start,
                                        uint64_t count, bool finish)
 {
    hbitmap_deserialize_part(bitmap->bitmap, buf, start, count, finish);
 }
 void bdrv_dirty_bitmap_deserialize_zeroes(BdrvDirtyBitmap *bitmap,
                                          uint64_t start, uint64_t count,
                                          bool finish)
 {
    hbitmap_deserialize_zeroes(bitmap->bitmap, start, count, finish);
 }
 void bdrv_dirty_bitmap_deserialize_finish(BdrvDirtyBitmap *bitmap)
 {
    hbitmap_deserialize_finish(bitmap->bitmap);
 }
 void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
                    int64_t nr_sectors)
 {
@@ -373,15 +517,19 @@ void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
 }
 /**
- * Advance an HBitmapIter to an arbitrary offset.
+ * Advance a BdrvDirtyBitmapIter to an arbitrary offset.
 */
-void bdrv_set_dirty_iter(HBitmapIter *hbi, int64_t offset)
+void bdrv_set_dirty_iter(BdrvDirtyBitmapIter *iter, int64_t sector_num)
 {
-    assert(hbi->hb);
+    hbitmap_iter_init(&iter->hbi, iter->hbi.hb, sector_num);
    hbitmap_iter_init(hbi, hbi->hb, offset);
 }
 int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap)
 {
    return hbitmap_count(bitmap->bitmap);
 }
 int64_t bdrv_get_meta_dirty_count(BdrvDirtyBitmap *bitmap)
 {
    return hbitmap_count(bitmap->meta);
 }
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -443,6 +443,7 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
    fd = qemu_open(filename, s->open_flags, 0644);
    if (fd < 0) {
        ret = -errno;
        error_setg_errno(errp, errno, "Could not open '%s'", filename);
        if (ret == -EROFS) {
            ret = -EACCES;
        }
@@ -541,7 +542,7 @@ static int raw_reopen_prepare(BDRVReopenState *state,
                              BlockReopenQueue *queue, Error **errp)
 {
    BDRVRawState *s;
-    BDRVRawReopenState *raw_s;
+    BDRVRawReopenState *rs;
    int ret = 0;
    Error *local_err = NULL;
@@ -551,15 +552,15 @@ static int raw_reopen_prepare(BDRVReopenState *state,
    s = state->bs->opaque;
    state->opaque = g_new0(BDRVRawReopenState, 1);
-    raw_s = state->opaque;
+    rs = state->opaque;
    if (s->type == FTYPE_CD) {
-        raw_s->open_flags |= O_NONBLOCK;
+        rs->open_flags |= O_NONBLOCK;
    }
-    raw_parse_flags(state->flags, &raw_s->open_flags);
+    raw_parse_flags(state->flags, &rs->open_flags);
-    raw_s->fd = -1;
+    rs->fd = -1;
    int fcntl_flags = O_APPEND | O_NONBLOCK;
 #ifdef O_NOATIME
@@ -568,35 +569,35 @@ static int raw_reopen_prepare(BDRVReopenState *state,
 #ifdef O_ASYNC
    /* Not all operating systems have O_ASYNC, and those that don't
-     * will not let us track the state into raw_s->open_flags (typically
+     * will not let us track the state into rs->open_flags (typically
     * you achieve the same effect with an ioctl, for example I_SETSIG
     * on Solaris). But we do not use O_ASYNC, so that's fine.
     */
    assert((s->open_flags & O_ASYNC) == 0);
 #endif
-    if ((raw_s->open_flags & ~fcntl_flags) == (s->open_flags & ~fcntl_flags)) {
+    if ((rs->open_flags & ~fcntl_flags) == (s->open_flags & ~fcntl_flags)) {
        /* dup the original fd */
-        raw_s->fd = qemu_dup(s->fd);
+        rs->fd = qemu_dup(s->fd);
-        if (raw_s->fd >= 0) {
+        if (rs->fd >= 0) {
-            ret = fcntl_setfl(raw_s->fd, raw_s->open_flags);
+            ret = fcntl_setfl(rs->fd, rs->open_flags);
            if (ret) {
-                qemu_close(raw_s->fd);
+                qemu_close(rs->fd);
-                raw_s->fd = -1;
+                rs->fd = -1;
            }
        }
    }
    /* If we cannot use fcntl, or fcntl failed, fall back to qemu_open() */
-    if (raw_s->fd == -1) {
+    if (rs->fd == -1) {
        const char *normalized_filename = state->bs->filename;
        ret = raw_normalize_devicepath(&normalized_filename);
        if (ret < 0) {
            error_setg_errno(errp, -ret, "Could not normalize device path");
        } else {
-            assert(!(raw_s->open_flags & O_CREAT));
+            assert(!(rs->open_flags & O_CREAT));
-            raw_s->fd = qemu_open(normalized_filename, raw_s->open_flags);
+            rs->fd = qemu_open(normalized_filename, rs->open_flags);
-            if (raw_s->fd == -1) {
+            if (rs->fd == -1) {
                error_setg_errno(errp, errno, "Could not reopen file");
                ret = -1;
            }
@@ -605,11 +606,11 @@ static int raw_reopen_prepare(BDRVReopenState *state,
    /* Fail already reopen_prepare() if we can't get a working O_DIRECT
     * alignment with the new fd. */
-    if (raw_s->fd != -1) {
+    if (rs->fd != -1) {
-        raw_probe_alignment(state->bs, raw_s->fd, &local_err);
+        raw_probe_alignment(state->bs, rs->fd, &local_err);
        if (local_err) {
-            qemu_close(raw_s->fd);
+            qemu_close(rs->fd);
-            raw_s->fd = -1;
+            rs->fd = -1;
            error_propagate(errp, local_err);
            ret = -EINVAL;
        }
@@ -620,13 +621,13 @@ static int raw_reopen_prepare(BDRVReopenState *state,
 static void raw_reopen_commit(BDRVReopenState *state)
 {
-    BDRVRawReopenState *raw_s = state->opaque;
+    BDRVRawReopenState *rs = state->opaque;
    BDRVRawState *s = state->bs->opaque;
-    s->open_flags = raw_s->open_flags;
+    s->open_flags = rs->open_flags;
    qemu_close(s->fd);
-    s->fd = raw_s->fd;
+    s->fd = rs->fd;
    g_free(state->opaque);
    state->opaque = NULL;
@@ -635,27 +636,30 @@ static void raw_reopen_commit(BDRVReopenState *state)
 static void raw_reopen_abort(BDRVReopenState *state)
 {
-    BDRVRawReopenState *raw_s = state->opaque;
+    BDRVRawReopenState *rs = state->opaque;
     /* nothing to do if NULL, we didn't get far enough */
-    if (raw_s == NULL) {
+    if (rs == NULL) {
        return;
    }
-    if (raw_s->fd >= 0) {
+    if (rs->fd >= 0) {
-        qemu_close(raw_s->fd);
+        qemu_close(rs->fd);
-        raw_s->fd = -1;
+        rs->fd = -1;
    }
    g_free(state->opaque);
    state->opaque = NULL;
 }
-static int hdev_get_max_transfer_length(int fd)
+static int hdev_get_max_transfer_length(BlockDriverState *bs, int fd)
 {
 #ifdef BLKSECTGET
-    int max_sectors = 0;
+    int max_bytes = 0;
-    if (ioctl(fd, BLKSECTGET, &max_sectors) == 0) {
+    short max_sectors = 0;
-        return max_sectors;
+    if (bs->sg && ioctl(fd, BLKSECTGET, &max_bytes) == 0) {
        return max_bytes;
    } else if (!bs->sg && ioctl(fd, BLKSECTGET, &max_sectors) == 0) {
        return max_sectors << BDRV_SECTOR_BITS;
    } else {
        return -errno;
    }
@@ -670,10 +674,10 @@ static void raw_refresh_limits(BlockDriverState *bs, Error **errp)
    struct stat st;
    if (!fstat(s->fd, &st)) {
-        if (S_ISBLK(st.st_mode)) {
+        if (S_ISBLK(st.st_mode) || S_ISCHR(st.st_mode)) {
-            int ret = hdev_get_max_transfer_length(s->fd);
+            int ret = hdev_get_max_transfer_length(bs, s->fd);
-            if (ret > 0 && ret <= BDRV_REQUEST_MAX_SECTORS) {
+            if (ret > 0 && ret <= BDRV_REQUEST_MAX_BYTES) {
-                bs->bl.max_transfer = pow2floor(ret << BDRV_SECTOR_BITS);
+                bs->bl.max_transfer = pow2floor(ret);
            }
        }
    }
@@ -2068,13 +2072,23 @@ static bool hdev_is_sg(BlockDriverState *bs)
 #if defined(__linux__)
    BDRVRawState *s = bs->opaque;
    struct stat st;
    struct sg_scsi_id scsiid;
    int sg_version;
    int ret;
-    if (stat(bs->filename, &st) >= 0 && S_ISCHR(st.st_mode) &&
+    if (stat(bs->filename, &st) < 0 || !S_ISCHR(st.st_mode)) {
-        !bdrv_ioctl(bs, SG_GET_VERSION_NUM, &sg_version) &&
+        return false;
-        !bdrv_ioctl(bs, SG_GET_SCSI_ID, &scsiid)) {
+    }
    ret = ioctl(s->fd, SG_GET_VERSION_NUM, &sg_version);
    if (ret < 0) {
        return false;
    }
    ret = ioctl(s->fd, SG_GET_SCSI_ID, &scsiid);
    if (ret >= 0) {
        DPRINTF("SG device found: type=%d, version=%d\n",
            scsiid.scsi_type, sg_version);
        return true;
--- a/block/file-win32.c
+++ b/block/file-win32.c
@@ -373,6 +373,7 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,
    if (s->hfile == INVALID_HANDLE_VALUE) {
        int err = GetLastError();
        error_setg_win32(errp, err, "Could not open '%s'", filename);
        if (err == ERROR_ACCESS_DENIED) {
            ret = -EACCES;
        } else {
--- a/block/gluster.c
+++ b/block/gluster.c
@@ -14,6 +14,7 @@
 #include "qapi/qmp/qerror.h"
 #include "qemu/uri.h"
 #include "qemu/error-report.h"
 #include "qemu/cutils.h"
 #define GLUSTER_OPT_FILENAME        "filename"
 #define GLUSTER_OPT_VOLUME          "volume"
@@ -47,7 +48,7 @@ typedef struct BDRVGlusterState {
    struct glfs_fd *fd;
    char *logfile;
    bool supports_seek_data;
-    int debug_level;
+    int debug;
 } BDRVGlusterState;
 typedef struct BDRVGlusterReopenState {
@@ -56,6 +57,19 @@ typedef struct BDRVGlusterReopenState {
 } BDRVGlusterReopenState;
 typedef struct GlfsPreopened {
    char *volume;
    glfs_t *fs;
    int ref;
 } GlfsPreopened;
 typedef struct ListElement {
    QLIST_ENTRY(ListElement) list;
    GlfsPreopened saved;
 } ListElement;
 static QLIST_HEAD(glfs_list, ListElement) glfs_list;
 static QemuOptsList qemu_gluster_create_opts = {
    .name = "qemu-gluster-create-opts",
    .head = QTAILQ_HEAD_INITIALIZER(qemu_gluster_create_opts.head),
@@ -172,7 +186,7 @@ static QemuOptsList runtime_tcp_opts = {
        },
        {
            .name = GLUSTER_OPT_PORT,
-            .type = QEMU_OPT_NUMBER,
+            .type = QEMU_OPT_STRING,
            .help = "port number on which glusterd is listening (default 24007)",
        },
        {
@@ -194,6 +208,58 @@ static QemuOptsList runtime_tcp_opts = {
    },
 };
 static void glfs_set_preopened(const char *volume, glfs_t *fs)
 {
    ListElement *entry = NULL;
    entry = g_new(ListElement, 1);
    entry->saved.volume = g_strdup(volume);
    entry->saved.fs = fs;
    entry->saved.ref = 1;
    QLIST_INSERT_HEAD(&glfs_list, entry, list);
 }
 static glfs_t *glfs_find_preopened(const char *volume)
 {
    ListElement *entry = NULL;
     QLIST_FOREACH(entry, &glfs_list, list) {
        if (strcmp(entry->saved.volume, volume) == 0) {
            entry->saved.ref++;
            return entry->saved.fs;
        }
     }
    return NULL;
 }
 static void glfs_clear_preopened(glfs_t *fs)
 {
    ListElement *entry = NULL;
    ListElement *next;
    if (fs == NULL) {
        return;
    }
    QLIST_FOREACH_SAFE(entry, &glfs_list, list, next) {
        if (entry->saved.fs == fs) {
            if (--entry->saved.ref) {
                return;
            }
            QLIST_REMOVE(entry, list);
            glfs_fini(entry->saved.fs);
            g_free(entry->saved.volume);
            g_free(entry);
        }
    }
 }
 static int parse_volume_options(BlockdevOptionsGluster *gconf, char *path)
 {
    char *p, *q;
@@ -330,22 +396,37 @@ static struct glfs *qemu_gluster_glfs_init(BlockdevOptionsGluster *gconf,
    int ret;
    int old_errno;
    GlusterServerList *server;
    unsigned long long port;
    glfs = glfs_find_preopened(gconf->volume);
    if (glfs) {
        return glfs;
    }
    glfs = glfs_new(gconf->volume);
    if (!glfs) {
        goto out;
    }
    glfs_set_preopened(gconf->volume, glfs);
    for (server = gconf->server; server; server = server->next) {
        if (server->value->type  == GLUSTER_TRANSPORT_UNIX) {
            ret = glfs_set_volfile_server(glfs,
                                   GlusterTransport_lookup[server->value->type],
                                   server->value->u.q_unix.path, 0);
        } else {
            if (parse_uint_full(server->value->u.tcp.port, &port, 10) < 0 ||
                port > 65535) {
                error_setg(errp, "'%s' is not a valid port number",
                           server->value->u.tcp.port);
                errno = EINVAL;
                goto out;
            }
            ret = glfs_set_volfile_server(glfs,
                                   GlusterTransport_lookup[server->value->type],
                                   server->value->u.tcp.host,
-                                   atoi(server->value->u.tcp.port));
+                                   (int)port);
        }
        if (ret < 0) {
@@ -353,7 +434,7 @@ static struct glfs *qemu_gluster_glfs_init(BlockdevOptionsGluster *gconf,
        }
    }
-    ret = glfs_set_logging(glfs, gconf->logfile, gconf->debug_level);
+    ret = glfs_set_logging(glfs, gconf->logfile, gconf->debug);
    if (ret < 0) {
        goto out;
    }
@@ -387,7 +468,7 @@ static struct glfs *qemu_gluster_glfs_init(BlockdevOptionsGluster *gconf,
 out:
    if (glfs) {
        old_errno = errno;
-        glfs_fini(glfs);
+        glfs_clear_preopened(glfs);
        errno = old_errno;
    }
    return NULL;
@@ -668,7 +749,10 @@ static void qemu_gluster_parse_flags(int bdrv_flags, int *open_flags)
 */
 static bool qemu_gluster_test_seek(struct glfs_fd *fd)
 {
-    off_t ret, eof;
+    off_t ret = 0;
 #if defined SEEK_HOLE && defined SEEK_DATA
    off_t eof;
    eof = glfs_lseek(fd, 0, SEEK_END);
    if (eof < 0) {
@@ -678,6 +762,8 @@ static bool qemu_gluster_test_seek(struct glfs_fd *fd)
    /* this should always fail with ENXIO if SEEK_DATA is supported */
    ret = glfs_lseek(fd, eof, SEEK_DATA);
 #endif
    return (ret < 0) && (errno == ENXIO);
 }
@@ -702,17 +788,17 @@ static int qemu_gluster_open(BlockDriverState *bs,  QDict *options,
    filename = qemu_opt_get(opts, GLUSTER_OPT_FILENAME);
-    s->debug_level = qemu_opt_get_number(opts, GLUSTER_OPT_DEBUG,
+    s->debug = qemu_opt_get_number(opts, GLUSTER_OPT_DEBUG,
-                                         GLUSTER_DEBUG_DEFAULT);
+                                   GLUSTER_DEBUG_DEFAULT);
-    if (s->debug_level < 0) {
+    if (s->debug < 0) {
-        s->debug_level = 0;
+        s->debug = 0;
-    } else if (s->debug_level > GLUSTER_DEBUG_MAX) {
+    } else if (s->debug > GLUSTER_DEBUG_MAX) {
-        s->debug_level = GLUSTER_DEBUG_MAX;
+        s->debug = GLUSTER_DEBUG_MAX;
    }
    gconf = g_new0(BlockdevOptionsGluster, 1);
-    gconf->debug_level = s->debug_level;
+    gconf->debug = s->debug;
-    gconf->has_debug_level = true;
+    gconf->has_debug = true;
    logfile = qemu_opt_get(opts, GLUSTER_OPT_LOGFILE);
    s->logfile = g_strdup(logfile ? logfile : GLUSTER_LOGFILE_DEFAULT);
@@ -762,9 +848,9 @@ out:
    if (s->fd) {
        glfs_close(s->fd);
    }
-    if (s->glfs) {
+
-        glfs_fini(s->glfs);
+    glfs_clear_preopened(s->glfs);
-    }
+
    return ret;
 }
@@ -788,8 +874,8 @@ static int qemu_gluster_reopen_prepare(BDRVReopenState *state,
    qemu_gluster_parse_flags(state->flags, &open_flags);
    gconf = g_new0(BlockdevOptionsGluster, 1);
-    gconf->debug_level = s->debug_level;
+    gconf->debug = s->debug;
-    gconf->has_debug_level = true;
+    gconf->has_debug = true;
    gconf->logfile = g_strdup(s->logfile);
    gconf->has_logfile = true;
    reop_s->glfs = qemu_gluster_init(gconf, state->bs->filename, NULL, errp);
@@ -831,9 +917,8 @@ static void qemu_gluster_reopen_commit(BDRVReopenState *state)
    if (s->fd) {
        glfs_close(s->fd);
    }
-    if (s->glfs) {
+
-        glfs_fini(s->glfs);
+    glfs_clear_preopened(s->glfs);
    }
    /* use the newly opened image / connection */
    s->fd         = reop_s->fd;
@@ -858,9 +943,7 @@ static void qemu_gluster_reopen_abort(BDRVReopenState *state)
        glfs_close(reop_s->fd);
    }
-    if (reop_s->glfs) {
+    glfs_clear_preopened(reop_s->glfs);
        glfs_fini(reop_s->glfs);
    }
    g_free(state->opaque);
    state->opaque = NULL;
@@ -928,14 +1011,14 @@ static int qemu_gluster_create(const char *filename,
    char *tmp = NULL;
    gconf = g_new0(BlockdevOptionsGluster, 1);
-    gconf->debug_level = qemu_opt_get_number_del(opts, GLUSTER_OPT_DEBUG,
+    gconf->debug = qemu_opt_get_number_del(opts, GLUSTER_OPT_DEBUG,
-                                                 GLUSTER_DEBUG_DEFAULT);
+                                           GLUSTER_DEBUG_DEFAULT);
-    if (gconf->debug_level < 0) {
+    if (gconf->debug < 0) {
-        gconf->debug_level = 0;
+        gconf->debug = 0;
-    } else if (gconf->debug_level > GLUSTER_DEBUG_MAX) {
+    } else if (gconf->debug > GLUSTER_DEBUG_MAX) {
-        gconf->debug_level = GLUSTER_DEBUG_MAX;
+        gconf->debug = GLUSTER_DEBUG_MAX;
    }
-    gconf->has_debug_level = true;
+    gconf->has_debug = true;
    gconf->logfile = qemu_opt_get_del(opts, GLUSTER_OPT_LOGFILE);
    if (!gconf->logfile) {
@@ -984,9 +1067,7 @@ static int qemu_gluster_create(const char *filename,
 out:
    g_free(tmp);
    qapi_free_BlockdevOptionsGluster(gconf);
-    if (glfs) {
+    glfs_clear_preopened(glfs);
        glfs_fini(glfs);
    }
    return ret;
 }
@@ -1059,7 +1140,7 @@ static void qemu_gluster_close(BlockDriverState *bs)
        glfs_close(s->fd);
        s->fd = NULL;
    }
-    glfs_fini(s->glfs);
+    glfs_clear_preopened(s->glfs);
 }
 static coroutine_fn int qemu_gluster_co_flush_to_disk(BlockDriverState *bs)
@@ -1172,18 +1253,20 @@ static int qemu_gluster_has_zero_init(BlockDriverState *bs)
 * If @start is in a trailing hole or beyond EOF, return -ENXIO.
 * If we can't find out, return a negative errno other than -ENXIO.
 *
- * (Shamefully copied from raw-posix.c, only miniscule adaptions.)
+ * (Shamefully copied from file-posix.c, only miniscule adaptions.)
 */
 static int find_allocation(BlockDriverState *bs, off_t start,
                           off_t *data, off_t *hole)
 {
    BDRVGlusterState *s = bs->opaque;
    off_t offs;
    if (!s->supports_seek_data) {
-        return -ENOTSUP;
+        goto exit;
    }
 #if defined SEEK_HOLE && defined SEEK_DATA
    off_t offs;
    /*
     * SEEK_DATA cases:
     * D1. offs == start: start is in data
@@ -1247,6 +1330,10 @@ static int find_allocation(BlockDriverState *bs, off_t start,
    /* D1 and H1 */
    return -EBUSY;
 #endif
 exit:
    return -ENOTSUP;
 }
 /*
@@ -1262,7 +1349,7 @@ static int find_allocation(BlockDriverState *bs, off_t start,
 * 'nb_sectors' is the max value 'pnum' should be set to.  If nb_sectors goes
 * beyond the end of the disk image it will be clamped.
 *
- * (Based on raw_co_get_block_status() from raw-posix.c.)
+ * (Based on raw_co_get_block_status() from file-posix.c.)
 */
 static int64_t coroutine_fn qemu_gluster_co_get_block_status(
        BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum,
--- a/block/io.c
+++ b/block/io.c
@@ -143,7 +143,7 @@ bool bdrv_requests_pending(BlockDriverState *bs)
 {
    BdrvChild *child;
-    if (!QLIST_EMPTY(&bs->tracked_requests)) {
+    if (atomic_read(&bs->in_flight)) {
        return true;
    }
@@ -156,16 +156,22 @@ bool bdrv_requests_pending(BlockDriverState *bs)
    return false;
 }
-static void bdrv_drain_recurse(BlockDriverState *bs)
+static bool bdrv_drain_recurse(BlockDriverState *bs)
 {
    BdrvChild *child;
    bool waited;
    waited = BDRV_POLL_WHILE(bs, atomic_read(&bs->in_flight) > 0);
    if (bs->drv && bs->drv->bdrv_drain) {
        bs->drv->bdrv_drain(bs);
    }
    QLIST_FOREACH(child, &bs->children, next) {
-        bdrv_drain_recurse(child->bs);
+        waited |= bdrv_drain_recurse(child->bs);
    }
    return waited;
 }
 typedef struct {
@@ -174,23 +180,14 @@ typedef struct {
    bool done;
 } BdrvCoDrainData;
 static void bdrv_drain_poll(BlockDriverState *bs)
 {
    bool busy = true;
    while (busy) {
        /* Keep iterating */
        busy = bdrv_requests_pending(bs);
        busy |= aio_poll(bdrv_get_aio_context(bs), busy);
    }
 }
 static void bdrv_co_drain_bh_cb(void *opaque)
 {
    BdrvCoDrainData *data = opaque;
    Coroutine *co = data->co;
    BlockDriverState *bs = data->bs;
-    bdrv_drain_poll(data->bs);
+    bdrv_dec_in_flight(bs);
    bdrv_drained_begin(bs);
    data->done = true;
    qemu_coroutine_enter(co);
 }
@@ -209,6 +206,7 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs)
        .bs = bs,
        .done = false,
    };
    bdrv_inc_in_flight(bs);
    aio_bh_schedule_oneshot(bdrv_get_aio_context(bs),
                            bdrv_co_drain_bh_cb, &data);
@@ -220,19 +218,17 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs)
 void bdrv_drained_begin(BlockDriverState *bs)
 {
    if (qemu_in_coroutine()) {
        bdrv_co_yield_to_drain(bs);
        return;
    }
    if (!bs->quiesce_counter++) {
        aio_disable_external(bdrv_get_aio_context(bs));
        bdrv_parent_drained_begin(bs);
    }
    bdrv_io_unplugged_begin(bs);
    bdrv_drain_recurse(bs);
    if (qemu_in_coroutine()) {
        bdrv_co_yield_to_drain(bs);
    } else {
        bdrv_drain_poll(bs);
    }
    bdrv_io_unplugged_end(bs);
 }
 void bdrv_drained_end(BlockDriverState *bs)
@@ -275,11 +271,17 @@ void bdrv_drain(BlockDriverState *bs)
 *
 * This function does not flush data to disk, use bdrv_flush_all() for that
 * after calling this function.
 *
 * This pauses all block jobs and disables external clients. It must
 * be paired with bdrv_drain_all_end().
 *
 * NOTE: no new block jobs or BlockDriverStates can be created between
 * the bdrv_drain_all_begin() and bdrv_drain_all_end() calls.
 */
-void bdrv_drain_all(void)
+void bdrv_drain_all_begin(void)
 {
    /* Always run first iteration so any pending completion BHs run */
-    bool busy = true;
+    bool waited = true;
    BlockDriverState *bs;
    BdrvNextIterator it;
    BlockJob *job = NULL;
@@ -298,8 +300,7 @@ void bdrv_drain_all(void)
        aio_context_acquire(aio_context);
        bdrv_parent_drained_begin(bs);
-        bdrv_io_unplugged_begin(bs);
+        aio_disable_external(aio_context);
        bdrv_drain_recurse(bs);
        aio_context_release(aio_context);
        if (!g_slist_find(aio_ctxs, aio_context)) {
@@ -313,8 +314,8 @@ void bdrv_drain_all(void)
     * request completion.  Therefore we must keep looping until there was no
     * more activity rather than simply draining each device independently.
     */
-    while (busy) {
+    while (waited) {
-        busy = false;
+        waited = false;
        for (ctx = aio_ctxs; ctx != NULL; ctx = ctx->next) {
            AioContext *aio_context = ctx->data;
@@ -322,28 +323,31 @@ void bdrv_drain_all(void)
            aio_context_acquire(aio_context);
            for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
                if (aio_context == bdrv_get_aio_context(bs)) {
-                    if (bdrv_requests_pending(bs)) {
+                    waited |= bdrv_drain_recurse(bs);
                        busy = true;
                        aio_poll(aio_context, busy);
                    }
                }
            }
            busy |= aio_poll(aio_context, false);
            aio_context_release(aio_context);
        }
    }
    g_slist_free(aio_ctxs);
 }
 void bdrv_drain_all_end(void)
 {
    BlockDriverState *bs;
    BdrvNextIterator it;
    BlockJob *job = NULL;
    for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
        AioContext *aio_context = bdrv_get_aio_context(bs);
        aio_context_acquire(aio_context);
-        bdrv_io_unplugged_end(bs);
+        aio_enable_external(aio_context);
        bdrv_parent_drained_end(bs);
        aio_context_release(aio_context);
    }
    g_slist_free(aio_ctxs);
    job = NULL;
    while ((job = block_job_next(job))) {
        AioContext *aio_context = blk_get_aio_context(job->blk);
@@ -353,6 +357,12 @@ void bdrv_drain_all(void)
    }
 }
 void bdrv_drain_all(void)
 {
    bdrv_drain_all_begin();
    bdrv_drain_all_end();
 }
 /**
 * Remove an active request from the tracked requests list
 *
@@ -476,6 +486,28 @@ static bool tracked_request_overlaps(BdrvTrackedRequest *req,
    return true;
 }
 void bdrv_inc_in_flight(BlockDriverState *bs)
 {
    atomic_inc(&bs->in_flight);
 }
 static void dummy_bh_cb(void *opaque)
 {
 }
 void bdrv_wakeup(BlockDriverState *bs)
 {
    if (bs->wakeup) {
        aio_bh_schedule_oneshot(qemu_get_aio_context(), dummy_bh_cb, NULL);
    }
 }
 void bdrv_dec_in_flight(BlockDriverState *bs)
 {
    atomic_dec(&bs->in_flight);
    bdrv_wakeup(bs);
 }
 static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
 {
    BlockDriverState *bs = self->bs;
@@ -583,13 +615,9 @@ static int bdrv_prwv_co(BdrvChild *child, int64_t offset,
        /* Fast-path if already in coroutine context */
        bdrv_rw_co_entry(&rwco);
    } else {
        AioContext *aio_context = bdrv_get_aio_context(child->bs);
        co = qemu_coroutine_create(bdrv_rw_co_entry, &rwco);
        qemu_coroutine_enter(co);
-        while (rwco.ret == NOT_DONE) {
+        BDRV_POLL_WHILE(child->bs, rwco.ret == NOT_DONE);
            aio_poll(aio_context, true);
        }
    }
    return rwco.ret;
 }
@@ -1097,6 +1125,8 @@ int coroutine_fn bdrv_co_preadv(BdrvChild *child,
        return ret;
    }
    bdrv_inc_in_flight(bs);
    /* Don't do copy-on-read if we read data before write operation */
    if (bs->copy_on_read && !(flags & BDRV_REQ_NO_SERIALISING)) {
        flags |= BDRV_REQ_COPY_ON_READ;
@@ -1132,6 +1162,7 @@ int coroutine_fn bdrv_co_preadv(BdrvChild *child,
                              use_local_qiov ? &local_qiov : qiov,
                              flags);
    tracked_request_end(&req);
    bdrv_dec_in_flight(bs);
    if (use_local_qiov) {
        qemu_iovec_destroy(&local_qiov);
@@ -1179,6 +1210,8 @@ static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
    int max_write_zeroes = MIN_NON_ZERO(bs->bl.max_pwrite_zeroes, INT_MAX);
    int alignment = MAX(bs->bl.pwrite_zeroes_alignment,
                        bs->bl.request_alignment);
    int max_transfer = MIN_NON_ZERO(bs->bl.max_transfer,
                                    MAX_WRITE_ZEROES_BOUNCE_BUFFER);
    assert(alignment % bs->bl.request_alignment == 0);
    head = offset % alignment;
@@ -1194,9 +1227,12 @@ static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
         * boundaries.
         */
        if (head) {
-            /* Make a small request up to the first aligned sector.  */
+            /* Make a small request up to the first aligned sector. For
-            num = MIN(count, alignment - head);
+             * convenience, limit this request to max_transfer even if
-            head = 0;
+             * we don't need to fall back to writes.  */
            num = MIN(MIN(count, max_transfer), alignment - head);
            head = (head + num) % alignment;
            assert(num < max_write_zeroes);
        } else if (tail && num > alignment) {
            /* Shorten the request to the last aligned sector.  */
            num -= tail;
@@ -1222,8 +1258,6 @@ static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
        if (ret == -ENOTSUP) {
            /* Fall back to bounce buffer if write zeroes is unsupported */
            int max_transfer = MIN_NON_ZERO(bs->bl.max_transfer,
                                            MAX_WRITE_ZEROES_BOUNCE_BUFFER);
            BdrvRequestFlags write_flags = flags & ~BDRV_REQ_ZERO_WRITE;
            if ((flags & BDRV_REQ_FUA) &&
@@ -1480,6 +1514,7 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
        return ret;
    }
    bdrv_inc_in_flight(bs);
    /*
     * Align write if necessary by performing a read-modify-write cycle.
     * Pad qiov with the read parts and be sure to have a tracked request not
@@ -1581,6 +1616,7 @@ fail:
    qemu_vfree(tail_buf);
 out:
    tracked_request_end(&req);
    bdrv_dec_in_flight(bs);
    return ret;
 }
@@ -1705,17 +1741,19 @@ static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
    }
    *file = NULL;
    bdrv_inc_in_flight(bs);
    ret = bs->drv->bdrv_co_get_block_status(bs, sector_num, nb_sectors, pnum,
                                            file);
    if (ret < 0) {
        *pnum = 0;
-        return ret;
+        goto out;
    }
    if (ret & BDRV_BLOCK_RAW) {
        assert(ret & BDRV_BLOCK_OFFSET_VALID);
-        return bdrv_get_block_status(bs->file->bs, ret >> BDRV_SECTOR_BITS,
+        ret = bdrv_get_block_status(bs->file->bs, ret >> BDRV_SECTOR_BITS,
-                                     *pnum, pnum, file);
+                                    *pnum, pnum, file);
        goto out;
    }
    if (ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ZERO)) {
@@ -1757,6 +1795,8 @@ static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
        }
    }
 out:
    bdrv_dec_in_flight(bs);
    return ret;
 }
@@ -1822,14 +1862,10 @@ int64_t bdrv_get_block_status_above(BlockDriverState *bs,
        /* Fast-path if already in coroutine context */
        bdrv_get_block_status_above_co_entry(&data);
    } else {
        AioContext *aio_context = bdrv_get_aio_context(bs);
        co = qemu_coroutine_create(bdrv_get_block_status_above_co_entry,
                                   &data);
        qemu_coroutine_enter(co);
-        while (!data.done) {
+        BDRV_POLL_WHILE(bs, !data.done);
            aio_poll(aio_context, true);
        }
    }
    return data.ret;
 }
@@ -2102,6 +2138,7 @@ static const AIOCBInfo bdrv_em_co_aiocb_info = {
 static void bdrv_co_complete(BlockAIOCBCoroutine *acb)
 {
    if (!acb->need_bh) {
        bdrv_dec_in_flight(acb->common.bs);
        acb->common.cb(acb->common.opaque, acb->req.error);
        qemu_aio_unref(acb);
    }
@@ -2152,6 +2189,9 @@ static BlockAIOCB *bdrv_co_aio_prw_vector(BdrvChild *child,
    Coroutine *co;
    BlockAIOCBCoroutine *acb;
    /* Matched by bdrv_co_complete's bdrv_dec_in_flight.  */
    bdrv_inc_in_flight(child->bs);
    acb = qemu_aio_get(&bdrv_em_co_aiocb_info, child->bs, cb, opaque);
    acb->child = child;
    acb->need_bh = true;
@@ -2185,6 +2225,9 @@ BlockAIOCB *bdrv_aio_flush(BlockDriverState *bs,
    Coroutine *co;
    BlockAIOCBCoroutine *acb;
    /* Matched by bdrv_co_complete's bdrv_dec_in_flight.  */
    bdrv_inc_in_flight(bs);
    acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
    acb->need_bh = true;
    acb->req.error = -EINPROGRESS;
@@ -2196,35 +2239,6 @@ BlockAIOCB *bdrv_aio_flush(BlockDriverState *bs,
    return &acb->common;
 }
 static void coroutine_fn bdrv_aio_pdiscard_co_entry(void *opaque)
 {
    BlockAIOCBCoroutine *acb = opaque;
    BlockDriverState *bs = acb->common.bs;
    acb->req.error = bdrv_co_pdiscard(bs, acb->req.offset, acb->req.bytes);
    bdrv_co_complete(acb);
 }
 BlockAIOCB *bdrv_aio_pdiscard(BlockDriverState *bs, int64_t offset, int count,
                              BlockCompletionFunc *cb, void *opaque)
 {
    Coroutine *co;
    BlockAIOCBCoroutine *acb;
    trace_bdrv_aio_pdiscard(bs, offset, count, opaque);
    acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
    acb->need_bh = true;
    acb->req.error = -EINPROGRESS;
    acb->req.offset = offset;
    acb->req.bytes = count;
    co = qemu_coroutine_create(bdrv_aio_pdiscard_co_entry, acb);
    qemu_coroutine_enter(co);
    bdrv_co_maybe_schedule_bh(acb);
    return &acb->common;
 }
 void *qemu_aio_get(const AIOCBInfo *aiocb_info, BlockDriverState *bs,
                   BlockCompletionFunc *cb, void *opaque)
 {
@@ -2273,23 +2287,22 @@ static void coroutine_fn bdrv_flush_co_entry(void *opaque)
 int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
 {
    int ret;
    BdrvTrackedRequest req;
    if (!bs || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs) ||
        bdrv_is_sg(bs)) {
        return 0;
    }
-    tracked_request_begin(&req, bs, 0, 0, BDRV_TRACKED_FLUSH);
+    bdrv_inc_in_flight(bs);
    int current_gen = bs->write_gen;
    /* Wait until any previous flushes are completed */
-    while (bs->active_flush_req != NULL) {
+    while (bs->active_flush_req) {
        qemu_co_queue_wait(&bs->flush_queue);
    }
-    bs->active_flush_req = &req;
+    bs->active_flush_req = true;
    /* Write back all layers by calling one driver function */
    if (bs->drv->bdrv_co_flush) {
@@ -2358,12 +2371,14 @@ flush_parent:
    ret = bs->file ? bdrv_co_flush(bs->file->bs) : 0;
 out:
    /* Notify any pending flushes that we have completed */
-    bs->flushed_gen = current_gen;
+    if (ret == 0) {
-    bs->active_flush_req = NULL;
+        bs->flushed_gen = current_gen;
    }
    bs->active_flush_req = false;
    /* Return value is ignored - it's ok if wait queue is empty */
    qemu_co_queue_next(&bs->flush_queue);
-    tracked_request_end(&req);
+    bdrv_dec_in_flight(bs);
    return ret;
 }
@@ -2379,13 +2394,9 @@ int bdrv_flush(BlockDriverState *bs)
        /* Fast-path if already in coroutine context */
        bdrv_flush_co_entry(&flush_co);
    } else {
        AioContext *aio_context = bdrv_get_aio_context(bs);
        co = qemu_coroutine_create(bdrv_flush_co_entry, &flush_co);
        qemu_coroutine_enter(co);
-        while (flush_co.ret == NOT_DONE) {
+        BDRV_POLL_WHILE(bs, flush_co.ret == NOT_DONE);
            aio_poll(aio_context, true);
        }
    }
    return flush_co.ret;
@@ -2409,7 +2420,7 @@ int coroutine_fn bdrv_co_pdiscard(BlockDriverState *bs, int64_t offset,
 {
    BdrvTrackedRequest req;
    int max_pdiscard, ret;
-    int head, align;
+    int head, tail, align;
    if (!bs->drv) {
        return -ENOMEDIUM;
@@ -2432,20 +2443,17 @@ int coroutine_fn bdrv_co_pdiscard(BlockDriverState *bs, int64_t offset,
        return 0;
    }
-    /* Discard is advisory, so ignore any unaligned head or tail */
+    /* Discard is advisory, but some devices track and coalesce
     * unaligned requests, so we must pass everything down rather than
     * round here.  Still, most devices will just silently ignore
     * unaligned requests (by returning -ENOTSUP), so we must fragment
     * the request accordingly.  */
    align = MAX(bs->bl.pdiscard_alignment, bs->bl.request_alignment);
    assert(align % bs->bl.request_alignment == 0);
    head = offset % align;
-    if (head) {
+    tail = (offset + count) % align;
        head = MIN(count, align - head);
        count -= head;
        offset += head;
    }
    count = QEMU_ALIGN_DOWN(count, align);
    if (!count) {
        return 0;
    }
    bdrv_inc_in_flight(bs);
    tracked_request_begin(&req, bs, offset, count, BDRV_TRACKED_DISCARD);
    ret = notifier_with_return_list_notify(&bs->before_write_notifiers, &req);
@@ -2455,11 +2463,34 @@ int coroutine_fn bdrv_co_pdiscard(BlockDriverState *bs, int64_t offset,
    max_pdiscard = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_pdiscard, INT_MAX),
                                   align);
-    assert(max_pdiscard);
+    assert(max_pdiscard >= bs->bl.request_alignment);
    while (count > 0) {
        int ret;
-        int num = MIN(count, max_pdiscard);
+        int num = count;
        if (head) {
            /* Make small requests to get to alignment boundaries. */
            num = MIN(count, align - head);
            if (!QEMU_IS_ALIGNED(num, bs->bl.request_alignment)) {
                num %= bs->bl.request_alignment;
            }
            head = (head + num) % align;
            assert(num < max_pdiscard);
        } else if (tail) {
            if (num > align) {
                /* Shorten the request to the last aligned cluster.  */
                num -= tail;
            } else if (!QEMU_IS_ALIGNED(tail, bs->bl.request_alignment) &&
                       tail > bs->bl.request_alignment) {
                tail %= bs->bl.request_alignment;
                num -= tail;
            }
        }
        /* limit request size */
        if (num > max_pdiscard) {
            num = max_pdiscard;
        }
        if (bs->drv->bdrv_co_pdiscard) {
            ret = bs->drv->bdrv_co_pdiscard(bs, offset, num);
@@ -2492,6 +2523,7 @@ out:
    bdrv_set_dirty(bs, req.offset >> BDRV_SECTOR_BITS,
                   req.bytes >> BDRV_SECTOR_BITS);
    tracked_request_end(&req);
    bdrv_dec_in_flight(bs);
    return ret;
 }
@@ -2509,106 +2541,41 @@ int bdrv_pdiscard(BlockDriverState *bs, int64_t offset, int count)
        /* Fast-path if already in coroutine context */
        bdrv_pdiscard_co_entry(&rwco);
    } else {
        AioContext *aio_context = bdrv_get_aio_context(bs);
        co = qemu_coroutine_create(bdrv_pdiscard_co_entry, &rwco);
        qemu_coroutine_enter(co);
-        while (rwco.ret == NOT_DONE) {
+        BDRV_POLL_WHILE(bs, rwco.ret == NOT_DONE);
            aio_poll(aio_context, true);
        }
    }
    return rwco.ret;
 }
-static int bdrv_co_do_ioctl(BlockDriverState *bs, int req, void *buf)
+int bdrv_co_ioctl(BlockDriverState *bs, int req, void *buf)
 {
    BlockDriver *drv = bs->drv;
    BdrvTrackedRequest tracked_req;
    CoroutineIOCompletion co = {
        .coroutine = qemu_coroutine_self(),
    };
    BlockAIOCB *acb;
-    tracked_request_begin(&tracked_req, bs, 0, 0, BDRV_TRACKED_IOCTL);
+    bdrv_inc_in_flight(bs);
-    if (!drv || !drv->bdrv_aio_ioctl) {
+    if (!drv || (!drv->bdrv_aio_ioctl && !drv->bdrv_co_ioctl)) {
        co.ret = -ENOTSUP;
        goto out;
    }
-    acb = drv->bdrv_aio_ioctl(bs, req, buf, bdrv_co_io_em_complete, &co);
+    if (drv->bdrv_co_ioctl) {
-    if (!acb) {
+        co.ret = drv->bdrv_co_ioctl(bs, req, buf);
        co.ret = -ENOTSUP;
        goto out;
    }
    qemu_coroutine_yield();
 out:
    tracked_request_end(&tracked_req);
    return co.ret;
 }
 typedef struct {
    BlockDriverState *bs;
    int req;
    void *buf;
    int ret;
 } BdrvIoctlCoData;
 static void coroutine_fn bdrv_co_ioctl_entry(void *opaque)
 {
    BdrvIoctlCoData *data = opaque;
    data->ret = bdrv_co_do_ioctl(data->bs, data->req, data->buf);
 }
 /* needed for generic scsi interface */
 int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
 {
    BdrvIoctlCoData data = {
        .bs = bs,
        .req = req,
        .buf = buf,
        .ret = -EINPROGRESS,
    };
    if (qemu_in_coroutine()) {
        /* Fast-path if already in coroutine context */
        bdrv_co_ioctl_entry(&data);
    } else {
-        Coroutine *co = qemu_coroutine_create(bdrv_co_ioctl_entry, &data);
+        acb = drv->bdrv_aio_ioctl(bs, req, buf, bdrv_co_io_em_complete, &co);
-
+        if (!acb) {
-        qemu_coroutine_enter(co);
+            co.ret = -ENOTSUP;
-        while (data.ret == -EINPROGRESS) {
+            goto out;
            aio_poll(bdrv_get_aio_context(bs), true);
        }
        qemu_coroutine_yield();
    }
-    return data.ret;
+out:
-}
+    bdrv_dec_in_flight(bs);
-
+    return co.ret;
 static void coroutine_fn bdrv_co_aio_ioctl_entry(void *opaque)
 {
    BlockAIOCBCoroutine *acb = opaque;
    acb->req.error = bdrv_co_do_ioctl(acb->common.bs,
                                      acb->req.req, acb->req.buf);
    bdrv_co_complete(acb);
 }
 BlockAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
        unsigned long int req, void *buf,
        BlockCompletionFunc *cb, void *opaque)
 {
    BlockAIOCBCoroutine *acb = qemu_aio_get(&bdrv_em_co_aiocb_info,
                                            bs, cb, opaque);
    Coroutine *co;
    acb->need_bh = true;
    acb->req.error = -EINPROGRESS;
    acb->req.req = req;
    acb->req.buf = buf;
    co = qemu_coroutine_create(bdrv_co_aio_ioctl_entry, acb);
    qemu_coroutine_enter(co);
    bdrv_co_maybe_schedule_bh(acb);
    return &acb->common;
 }
 void *qemu_blockalign(BlockDriverState *bs, size_t size)
@@ -2679,7 +2646,7 @@ void bdrv_io_plug(BlockDriverState *bs)
        bdrv_io_plug(child->bs);
    }
-    if (bs->io_plugged++ == 0 && bs->io_plug_disabled == 0) {
+    if (bs->io_plugged++ == 0) {
        BlockDriver *drv = bs->drv;
        if (drv && drv->bdrv_io_plug) {
            drv->bdrv_io_plug(bs);
@@ -2692,7 +2659,7 @@ void bdrv_io_unplug(BlockDriverState *bs)
    BdrvChild *child;
    assert(bs->io_plugged);
-    if (--bs->io_plugged == 0 && bs->io_plug_disabled == 0) {
+    if (--bs->io_plugged == 0) {
        BlockDriver *drv = bs->drv;
        if (drv && drv->bdrv_io_unplug) {
            drv->bdrv_io_unplug(bs);
@@ -2703,36 +2670,3 @@ void bdrv_io_unplug(BlockDriverState *bs)
        bdrv_io_unplug(child->bs);
    }
 }
 void bdrv_io_unplugged_begin(BlockDriverState *bs)
 {
    BdrvChild *child;
    if (bs->io_plug_disabled++ == 0 && bs->io_plugged > 0) {
        BlockDriver *drv = bs->drv;
        if (drv && drv->bdrv_io_unplug) {
            drv->bdrv_io_unplug(bs);
        }
    }
    QLIST_FOREACH(child, &bs->children, next) {
        bdrv_io_unplugged_begin(child->bs);
    }
 }
 void bdrv_io_unplugged_end(BlockDriverState *bs)
 {
    BdrvChild *child;
    assert(bs->io_plug_disabled);
    QLIST_FOREACH(child, &bs->children, next) {
        bdrv_io_unplugged_end(child->bs);
    }
    if (--bs->io_plug_disabled == 0 && bs->io_plugged > 0) {
        BlockDriver *drv = bs->drv;
        if (drv && drv->bdrv_io_plug) {
            drv->bdrv_io_plug(bs);
        }
    }
 }
--- a/block/iscsi-opts.c
+++ b/block/iscsi-opts.c
@@ -0,0 +1,69 @@
 /*
 * QEMU Block driver for iSCSI images (static options)
 *
 * Copyright (c) 2017 Peter Lieven <pl@kamp.de>
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */
 #include "qemu/osdep.h"
 #include "qemu-common.h"
 #include "qemu/config-file.h"
 static QemuOptsList qemu_iscsi_opts = {
    .name = "iscsi",
    .head = QTAILQ_HEAD_INITIALIZER(qemu_iscsi_opts.head),
    .desc = {
        {
            .name = "user",
            .type = QEMU_OPT_STRING,
            .help = "username for CHAP authentication to target",
        },{
            .name = "password",
            .type = QEMU_OPT_STRING,
            .help = "password for CHAP authentication to target",
        },{
            .name = "password-secret",
            .type = QEMU_OPT_STRING,
            .help = "ID of the secret providing password for CHAP "
                    "authentication to target",
        },{
            .name = "header-digest",
            .type = QEMU_OPT_STRING,
            .help = "HeaderDigest setting. "
                    "{CRC32C|CRC32C-NONE|NONE-CRC32C|NONE}",
        },{
            .name = "initiator-name",
            .type = QEMU_OPT_STRING,
            .help = "Initiator iqn name to use when connecting",
        },{
            .name = "timeout",
            .type = QEMU_OPT_NUMBER,
            .help = "Request timeout in seconds (default 0 = no timeout)",
        },
        { /* end of list */ }
    },
 };
 static void iscsi_block_opts_init(void)
 {
    qemu_add_opts(&qemu_iscsi_opts);
 }
 block_init(iscsi_block_opts_init);
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -202,6 +202,10 @@ static inline unsigned exp_random(double mean)
 #define SCSI_SENSE_ASCQ_PARAMETER_LIST_LENGTH_ERROR        0x1a00
 #endif
 #ifndef LIBISCSI_API_VERSION
 #define LIBISCSI_API_VERSION 20130701
 #endif
 static int iscsi_translate_sense(struct scsi_sense *sense)
 {
    int ret;
@@ -358,6 +362,7 @@ iscsi_set_events(IscsiLun *iscsilun)
                           false,
                           (ev & POLLIN) ? iscsi_process_read : NULL,
                           (ev & POLLOUT) ? iscsi_process_write : NULL,
                           NULL,
                           iscsilun);
        iscsilun->events = ev;
    }
@@ -494,14 +499,18 @@ iscsi_allocmap_update(IscsiLun *iscsilun, int64_t sector_num,
    if (allocated) {
        bitmap_set(iscsilun->allocmap, cl_num_expanded, nb_cls_expanded);
    } else {
-        bitmap_clear(iscsilun->allocmap, cl_num_shrunk, nb_cls_shrunk);
+        if (nb_cls_shrunk > 0) {
            bitmap_clear(iscsilun->allocmap, cl_num_shrunk, nb_cls_shrunk);
        }
    }
    if (iscsilun->allocmap_valid == NULL) {
        return;
    }
    if (valid) {
-        bitmap_set(iscsilun->allocmap_valid, cl_num_shrunk, nb_cls_shrunk);
+        if (nb_cls_shrunk > 0) {
            bitmap_set(iscsilun->allocmap_valid, cl_num_shrunk, nb_cls_shrunk);
        }
    } else {
        bitmap_clear(iscsilun->allocmap_valid, cl_num_expanded,
                     nb_cls_expanded);
@@ -592,6 +601,20 @@ iscsi_co_writev_flags(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
    iscsi_co_init_iscsitask(iscsilun, &iTask);
 retry:
    if (iscsilun->use_16_for_rw) {
 #if LIBISCSI_API_VERSION >= (20160603)
        iTask.task = iscsi_write16_iov_task(iscsilun->iscsi, iscsilun->lun, lba,
                                            NULL, num_sectors * iscsilun->block_size,
                                            iscsilun->block_size, 0, 0, fua, 0, 0,
                                            iscsi_co_generic_cb, &iTask,
                                            (struct scsi_iovec *)iov->iov, iov->niov);
    } else {
        iTask.task = iscsi_write10_iov_task(iscsilun->iscsi, iscsilun->lun, lba,
                                            NULL, num_sectors * iscsilun->block_size,
                                            iscsilun->block_size, 0, 0, fua, 0, 0,
                                            iscsi_co_generic_cb, &iTask,
                                            (struct scsi_iovec *)iov->iov, iov->niov);
    }
 #else
        iTask.task = iscsi_write16_task(iscsilun->iscsi, iscsilun->lun, lba,
                                        NULL, num_sectors * iscsilun->block_size,
                                        iscsilun->block_size, 0, 0, fua, 0, 0,
@@ -602,11 +625,14 @@ retry:
                                        iscsilun->block_size, 0, 0, fua, 0, 0,
                                        iscsi_co_generic_cb, &iTask);
    }
 #endif
    if (iTask.task == NULL) {
        return -ENOMEM;
    }
 #if LIBISCSI_API_VERSION < (20160603)
    scsi_task_set_iov_out(iTask.task, (struct scsi_iovec *) iov->iov,
                          iov->niov);
 #endif
    while (!iTask.complete) {
        iscsi_set_events(iscsilun);
        qemu_coroutine_yield();
@@ -789,6 +815,21 @@ static int coroutine_fn iscsi_co_readv(BlockDriverState *bs,
    iscsi_co_init_iscsitask(iscsilun, &iTask);
 retry:
    if (iscsilun->use_16_for_rw) {
 #if LIBISCSI_API_VERSION >= (20160603)
        iTask.task = iscsi_read16_iov_task(iscsilun->iscsi, iscsilun->lun, lba,
                                           num_sectors * iscsilun->block_size,
                                           iscsilun->block_size, 0, 0, 0, 0, 0,
                                           iscsi_co_generic_cb, &iTask,
                                           (struct scsi_iovec *)iov->iov, iov->niov);
    } else {
        iTask.task = iscsi_read10_iov_task(iscsilun->iscsi, iscsilun->lun, lba,
                                           num_sectors * iscsilun->block_size,
                                           iscsilun->block_size,
                                           0, 0, 0, 0, 0,
                                           iscsi_co_generic_cb, &iTask,
                                           (struct scsi_iovec *)iov->iov, iov->niov);
    }
 #else
        iTask.task = iscsi_read16_task(iscsilun->iscsi, iscsilun->lun, lba,
                                       num_sectors * iscsilun->block_size,
                                       iscsilun->block_size, 0, 0, 0, 0, 0,
@@ -800,11 +841,13 @@ retry:
                                       0, 0, 0, 0, 0,
                                       iscsi_co_generic_cb, &iTask);
    }
 #endif
    if (iTask.task == NULL) {
        return -ENOMEM;
    }
 #if LIBISCSI_API_VERSION < (20160603)
    scsi_task_set_iov_in(iTask.task, (struct scsi_iovec *) iov->iov, iov->niov);
-
+#endif
    while (!iTask.complete) {
        iscsi_set_events(iscsilun);
        qemu_coroutine_yield();
@@ -1045,7 +1088,9 @@ coroutine_fn iscsi_co_pdiscard(BlockDriverState *bs, int64_t offset, int count)
    struct IscsiTask iTask;
    struct unmap_list list;
-    assert(is_byte_request_lun_aligned(offset, count, iscsilun));
+    if (!is_byte_request_lun_aligned(offset, count, iscsilun)) {
        return -ENOTSUP;
    }
    if (!iscsilun->lbp.lbpu) {
        /* UNMAP is not supported by the target */
@@ -1486,7 +1531,7 @@ static void iscsi_detach_aio_context(BlockDriverState *bs)
    IscsiLun *iscsilun = bs->opaque;
    aio_set_fd_handler(iscsilun->aio_context, iscsi_get_fd(iscsilun->iscsi),
-                       false, NULL, NULL, NULL);
+                       false, NULL, NULL, NULL, NULL);
    iscsilun->events = 0;
    if (iscsilun->nop_timer) {
@@ -1606,7 +1651,13 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
        ret = -ENOMEM;
        goto out;
    }
-
+#if LIBISCSI_API_VERSION >= (20160603)
    if (iscsi_init_transport(iscsi, iscsi_url->transport)) {
        error_setg(errp, ("Error initializing transport."));
        ret = -EINVAL;
        goto out;
    }
 #endif
    if (iscsi_set_targetname(iscsi, iscsi_url->target)) {
        error_setg(errp, "iSCSI: Failed to set target name.");
        ret = -EINVAL;
@@ -1649,7 +1700,7 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
    /* timeout handling is broken in libiscsi before 1.15.0 */
    timeout = parse_timeout(iscsi_url->target);
-#if defined(LIBISCSI_API_VERSION) && LIBISCSI_API_VERSION >= 20150621
+#if LIBISCSI_API_VERSION >= 20150621
    iscsi_set_timeout(iscsi, timeout);
 #else
    if (timeout) {
@@ -2010,9 +2061,48 @@ static BlockDriver bdrv_iscsi = {
    .bdrv_attach_aio_context = iscsi_attach_aio_context,
 };
 #if LIBISCSI_API_VERSION >= (20160603)
 static BlockDriver bdrv_iser = {
    .format_name     = "iser",
    .protocol_name   = "iser",
    .instance_size   = sizeof(IscsiLun),
    .bdrv_needs_filename = true,
    .bdrv_file_open  = iscsi_open,
    .bdrv_close      = iscsi_close,
    .bdrv_create     = iscsi_create,
    .create_opts     = &iscsi_create_opts,
    .bdrv_reopen_prepare   = iscsi_reopen_prepare,
    .bdrv_reopen_commit    = iscsi_reopen_commit,
    .bdrv_invalidate_cache = iscsi_invalidate_cache,
    .bdrv_getlength  = iscsi_getlength,
    .bdrv_get_info   = iscsi_get_info,
    .bdrv_truncate   = iscsi_truncate,
    .bdrv_refresh_limits = iscsi_refresh_limits,
    .bdrv_co_get_block_status = iscsi_co_get_block_status,
    .bdrv_co_pdiscard      = iscsi_co_pdiscard,
    .bdrv_co_pwrite_zeroes = iscsi_co_pwrite_zeroes,
    .bdrv_co_readv         = iscsi_co_readv,
    .bdrv_co_writev_flags  = iscsi_co_writev_flags,
    .bdrv_co_flush_to_disk = iscsi_co_flush,
 #ifdef __linux__
    .bdrv_aio_ioctl   = iscsi_aio_ioctl,
 #endif
    .bdrv_detach_aio_context = iscsi_detach_aio_context,
    .bdrv_attach_aio_context = iscsi_attach_aio_context,
 };
 #endif
 static void iscsi_block_init(void)
 {
    bdrv_register(&bdrv_iscsi);
 #if LIBISCSI_API_VERSION >= (20160603)
    bdrv_register(&bdrv_iser);
 #endif
 }
 block_init(iscsi_block_init);
--- a/block/linux-aio.c
+++ b/block/linux-aio.c
@@ -255,6 +255,20 @@ static void qemu_laio_completion_cb(EventNotifier *e)
    }
 }
 static bool qemu_laio_poll_cb(void *opaque)
 {
    EventNotifier *e = opaque;
    LinuxAioState *s = container_of(e, LinuxAioState, e);
    struct io_event *events;
    if (!io_getevents_peek(s->ctx, &events)) {
        return false;
    }
    qemu_laio_process_completions_and_submit(s);
    return true;
 }
 static void laio_cancel(BlockAIOCB *blockacb)
 {
    struct qemu_laiocb *laiocb = (struct qemu_laiocb *)blockacb;
@@ -439,7 +453,7 @@ BlockAIOCB *laio_submit(BlockDriverState *bs, LinuxAioState *s, int fd,
 void laio_detach_aio_context(LinuxAioState *s, AioContext *old_context)
 {
-    aio_set_event_notifier(old_context, &s->e, false, NULL);
+    aio_set_event_notifier(old_context, &s->e, false, NULL, NULL);
    qemu_bh_delete(s->completion_bh);
 }
@@ -448,7 +462,8 @@ void laio_attach_aio_context(LinuxAioState *s, AioContext *new_context)
    s->aio_context = new_context;
    s->completion_bh = aio_bh_new(new_context, qemu_laio_completion_bh, s);
    aio_set_event_notifier(new_context, &s->e, false,
-                           qemu_laio_completion_cb);
+                           qemu_laio_completion_cb,
                           qemu_laio_poll_cb);
 }
 LinuxAioState *laio_init(void)
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -13,7 +13,7 @@
 #include "qemu/osdep.h"
 #include "trace.h"
-#include "block/blockjob.h"
+#include "block/blockjob_int.h"
 #include "block/block_int.h"
 #include "sysemu/block-backend.h"
 #include "qapi/error.h"
@@ -55,7 +55,7 @@ typedef struct MirrorBlockJob {
    int64_t bdev_length;
    unsigned long *cow_bitmap;
    BdrvDirtyBitmap *dirty_bitmap;
-    HBitmapIter hbi;
+    BdrvDirtyBitmapIter *dbi;
    uint8_t *buf;
    QSIMPLEQ_HEAD(, MirrorBuffer) buf_free;
    int buf_free_count;
@@ -330,10 +330,10 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
    int max_io_sectors = MAX((s->buf_size >> BDRV_SECTOR_BITS) / MAX_IN_FLIGHT,
                             MAX_IO_SECTORS);
-    sector_num = hbitmap_iter_next(&s->hbi);
+    sector_num = bdrv_dirty_iter_next(s->dbi);
    if (sector_num < 0) {
-        bdrv_dirty_iter_init(s->dirty_bitmap, &s->hbi);
+        bdrv_set_dirty_iter(s->dbi, 0);
-        sector_num = hbitmap_iter_next(&s->hbi);
+        sector_num = bdrv_dirty_iter_next(s->dbi);
        trace_mirror_restart_iter(s, bdrv_get_dirty_count(s->dirty_bitmap));
        assert(sector_num >= 0);
    }
@@ -349,7 +349,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
    /* Find the number of consective dirty chunks following the first dirty
     * one, and wait for in flight requests in them. */
    while (nb_chunks * sectors_per_chunk < (s->buf_size >> BDRV_SECTOR_BITS)) {
-        int64_t hbitmap_next;
+        int64_t next_dirty;
        int64_t next_sector = sector_num + nb_chunks * sectors_per_chunk;
        int64_t next_chunk = next_sector / sectors_per_chunk;
        if (next_sector >= end ||
@@ -360,13 +360,13 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
            break;
        }
-        hbitmap_next = hbitmap_iter_next(&s->hbi);
+        next_dirty = bdrv_dirty_iter_next(s->dbi);
-        if (hbitmap_next > next_sector || hbitmap_next < 0) {
+        if (next_dirty > next_sector || next_dirty < 0) {
            /* The bitmap iterator's cache is stale, refresh it */
-            bdrv_set_dirty_iter(&s->hbi, next_sector);
+            bdrv_set_dirty_iter(s->dbi, next_sector);
-            hbitmap_next = hbitmap_iter_next(&s->hbi);
+            next_dirty = bdrv_dirty_iter_next(s->dbi);
        }
-        assert(hbitmap_next == next_sector);
+        assert(next_dirty == next_sector);
        nb_chunks++;
    }
@@ -469,7 +469,11 @@ static void mirror_free_init(MirrorBlockJob *s)
    }
 }
-static void mirror_drain(MirrorBlockJob *s)
+/* This is also used for the .pause callback. There is no matching
 * mirror_resume() because mirror_run() will begin iterating again
 * when the job is resumed.
 */
 static void mirror_wait_for_all_io(MirrorBlockJob *s)
 {
    while (s->in_flight > 0) {
        mirror_wait_for_io(s);
@@ -526,8 +530,8 @@ static void mirror_exit(BlockJob *job, void *opaque)
        aio_context_release(replace_aio_context);
    }
    g_free(s->replaces);
    bdrv_op_unblock_all(target_bs, s->common.blocker);
    blk_unref(s->target);
    s->target = NULL;
    block_job_completed(&s->common, data->ret);
    g_free(data);
    bdrv_drained_end(src);
@@ -582,7 +586,7 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)
            sector_num += nb_sectors;
        }
-        mirror_drain(s);
+        mirror_wait_for_all_io(s);
    }
    /* First part, loop on the sectors and initialize the dirty bitmap.  */
@@ -611,12 +615,27 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)
    return 0;
 }
 /* Called when going out of the streaming phase to flush the bulk of the
 * data to the medium, or just before completing.
 */
 static int mirror_flush(MirrorBlockJob *s)
 {
    int ret = blk_flush(s->target);
    if (ret < 0) {
        if (mirror_error_action(s, false, -ret) == BLOCK_ERROR_ACTION_REPORT) {
            s->ret = ret;
        }
    }
    return ret;
 }
 static void coroutine_fn mirror_run(void *opaque)
 {
    MirrorBlockJob *s = opaque;
    MirrorExitData *data;
    BlockDriverState *bs = blk_bs(s->common.blk);
    BlockDriverState *target_bs = blk_bs(s->target);
    bool need_drain = true;
    int64_t length;
    BlockDriverInfo bdi;
    char backing_filename[2]; /* we only need 2 characters because we are only
@@ -679,7 +698,8 @@ static void coroutine_fn mirror_run(void *opaque)
        }
    }
-    bdrv_dirty_iter_init(s->dirty_bitmap, &s->hbi);
+    assert(!s->dbi);
    s->dbi = bdrv_dirty_iter_new(s->dirty_bitmap, 0);
    for (;;) {
        uint64_t delay_ns = 0;
        int64_t cnt, delta;
@@ -721,27 +741,23 @@ static void coroutine_fn mirror_run(void *opaque)
        should_complete = false;
        if (s->in_flight == 0 && cnt == 0) {
            trace_mirror_before_flush(s);
-            ret = blk_flush(s->target);
+            if (!s->synced) {
-            if (ret < 0) {
+                if (mirror_flush(s) < 0) {
-                if (mirror_error_action(s, false, -ret) ==
+                    /* Go check s->ret.  */
-                    BLOCK_ERROR_ACTION_REPORT) {
+                    continue;
                    goto immediate_exit;
                }
            } else {
                /* We're out of the streaming phase.  From now on, if the job
                 * is cancelled we will actually complete all pending I/O and
                 * report completion.  This way, block-job-cancel will leave
                 * the target in a consistent state.
                 */
-                if (!s->synced) {
+                block_job_event_ready(&s->common);
-                    block_job_event_ready(&s->common);
+                s->synced = true;
                    s->synced = true;
                }
                should_complete = s->should_complete ||
                    block_job_is_cancelled(&s->common);
                cnt = bdrv_get_dirty_count(s->dirty_bitmap);
            }
            should_complete = s->should_complete ||
                block_job_is_cancelled(&s->common);
            cnt = bdrv_get_dirty_count(s->dirty_bitmap);
        }
        if (cnt == 0 && should_complete) {
@@ -751,11 +767,26 @@ static void coroutine_fn mirror_run(void *opaque)
             * source has dirty data to copy!
             *
             * Note that I/O can be submitted by the guest while
-             * mirror_populate runs.
+             * mirror_populate runs, so pause it now.  Before deciding
             * whether to switch to target check one last time if I/O has
             * come in the meanwhile, and if not flush the data to disk.
             */
            trace_mirror_before_drain(s, cnt);
-            bdrv_co_drain(bs);
+
            bdrv_drained_begin(bs);
            cnt = bdrv_get_dirty_count(s->dirty_bitmap);
            if (cnt > 0 || mirror_flush(s) < 0) {
                bdrv_drained_end(bs);
                continue;
            }
            /* The two disks are in sync.  Exit and report successful
             * completion.
             */
            assert(QLIST_EMPTY(&bs->tracked_requests));
            s->common.cancelled = false;
            need_drain = false;
            break;
        }
        ret = 0;
@@ -768,13 +799,6 @@ static void coroutine_fn mirror_run(void *opaque)
        } else if (!should_complete) {
            delay_ns = (s->in_flight == 0 && cnt == 0 ? SLICE_TIME : 0);
            block_job_sleep_ns(&s->common, QEMU_CLOCK_REALTIME, delay_ns);
        } else if (cnt == 0) {
            /* The two disks are in sync.  Exit and report successful
             * completion.
             */
            assert(QLIST_EMPTY(&bs->tracked_requests));
            s->common.cancelled = false;
            break;
        }
        s->last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
    }
@@ -786,20 +810,23 @@ immediate_exit:
         * the target is a copy of the source.
         */
        assert(ret < 0 || (!s->synced && block_job_is_cancelled(&s->common)));
-        mirror_drain(s);
+        assert(need_drain);
        mirror_wait_for_all_io(s);
    }
    assert(s->in_flight == 0);
    qemu_vfree(s->buf);
    g_free(s->cow_bitmap);
    g_free(s->in_flight_bitmap);
    bdrv_dirty_iter_free(s->dbi);
    bdrv_release_dirty_bitmap(bs, s->dirty_bitmap);
    data = g_malloc(sizeof(*data));
    data->ret = ret;
-    /* Before we switch to target in mirror_exit, make sure data doesn't
+
-     * change. */
+    if (need_drain) {
-    bdrv_drained_begin(bs);
+        bdrv_drained_begin(bs);
    }
    block_job_defer_to_main_loop(&s->common, mirror_exit, data);
 }
@@ -870,14 +897,11 @@ static void mirror_complete(BlockJob *job, Error **errp)
    block_job_enter(&s->common);
 }
-/* There is no matching mirror_resume() because mirror_run() will begin
+static void mirror_pause(BlockJob *job)
 * iterating again when the job is resumed.
 */
 static void coroutine_fn mirror_pause(BlockJob *job)
 {
    MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
-    mirror_drain(s);
+    mirror_wait_for_all_io(s);
 }
 static void mirror_attached_aio_context(BlockJob *job, AioContext *new_context)
@@ -887,28 +911,47 @@ static void mirror_attached_aio_context(BlockJob *job, AioContext *new_context)
    blk_set_aio_context(s->target, new_context);
 }
 static void mirror_drain(BlockJob *job)
 {
    MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
    /* Need to keep a reference in case blk_drain triggers execution
     * of mirror_complete...
     */
    if (s->target) {
        BlockBackend *target = s->target;
        blk_ref(target);
        blk_drain(target);
        blk_unref(target);
    }
 }
 static const BlockJobDriver mirror_job_driver = {
    .instance_size          = sizeof(MirrorBlockJob),
    .job_type               = BLOCK_JOB_TYPE_MIRROR,
    .set_speed              = mirror_set_speed,
    .start                  = mirror_run,
    .complete               = mirror_complete,
    .pause                  = mirror_pause,
    .attached_aio_context   = mirror_attached_aio_context,
    .drain                  = mirror_drain,
 };
 static const BlockJobDriver commit_active_job_driver = {
    .instance_size          = sizeof(MirrorBlockJob),
    .job_type               = BLOCK_JOB_TYPE_COMMIT,
    .set_speed              = mirror_set_speed,
    .start                  = mirror_run,
    .complete               = mirror_complete,
    .pause                  = mirror_pause,
    .attached_aio_context   = mirror_attached_aio_context,
    .drain                  = mirror_drain,
 };
 static void mirror_start_job(const char *job_id, BlockDriverState *bs,
-                             BlockDriverState *target, const char *replaces,
+                             int creation_flags, BlockDriverState *target,
-                             int64_t speed, uint32_t granularity,
+                             const char *replaces, int64_t speed,
-                             int64_t buf_size,
+                             uint32_t granularity, int64_t buf_size,
                             BlockMirrorBackingMode backing_mode,
                             BlockdevOnError on_source_error,
                             BlockdevOnError on_target_error,
@@ -936,7 +979,8 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs,
        buf_size = DEFAULT_MIRROR_BUF_SIZE;
    }
-    s = block_job_create(job_id, driver, bs, speed, cb, opaque, errp);
+    s = block_job_create(job_id, driver, bs, speed, creation_flags,
                         cb, opaque, errp);
    if (!s) {
        return;
    }
@@ -965,11 +1009,18 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs,
        return;
    }
-    bdrv_op_block_all(target, s->common.blocker);
+    block_job_add_bdrv(&s->common, target);
    /* In commit_active_start() all intermediate nodes disappear, so
     * any jobs in them must be blocked */
    if (bdrv_chain_contains(bs, target)) {
        BlockDriverState *iter;
        for (iter = backing_bs(bs); iter != target; iter = backing_bs(iter)) {
            block_job_add_bdrv(&s->common, iter);
        }
    }
-    s->common.co = qemu_coroutine_create(mirror_run, s);
+    trace_mirror_start(bs, s, opaque);
-    trace_mirror_start(bs, s, s->common.co, opaque);
+    block_job_start(&s->common);
    qemu_coroutine_enter(s->common.co);
 }
 void mirror_start(const char *job_id, BlockDriverState *bs,
@@ -978,9 +1029,7 @@ void mirror_start(const char *job_id, BlockDriverState *bs,
                  MirrorSyncMode mode, BlockMirrorBackingMode backing_mode,
                  BlockdevOnError on_source_error,
                  BlockdevOnError on_target_error,
-                  bool unmap,
+                  bool unmap, Error **errp)
                  BlockCompletionFunc *cb,
                  void *opaque, Error **errp)
 {
    bool is_none_mode;
    BlockDriverState *base;
@@ -991,17 +1040,16 @@ void mirror_start(const char *job_id, BlockDriverState *bs,
    }
    is_none_mode = mode == MIRROR_SYNC_MODE_NONE;
    base = mode == MIRROR_SYNC_MODE_TOP ? backing_bs(bs) : NULL;
-    mirror_start_job(job_id, bs, target, replaces,
+    mirror_start_job(job_id, bs, BLOCK_JOB_DEFAULT, target, replaces,
                     speed, granularity, buf_size, backing_mode,
-                     on_source_error, on_target_error, unmap, cb, opaque, errp,
+                     on_source_error, on_target_error, unmap, NULL, NULL, errp,
                     &mirror_job_driver, is_none_mode, base, false);
 }
 void commit_active_start(const char *job_id, BlockDriverState *bs,
-                         BlockDriverState *base, int64_t speed,
+                         BlockDriverState *base, int creation_flags,
-                         BlockdevOnError on_error,
+                         int64_t speed, BlockdevOnError on_error,
-                         BlockCompletionFunc *cb,
+                         BlockCompletionFunc *cb, void *opaque, Error **errp,
                         void *opaque, Error **errp,
                         bool auto_complete)
 {
    int64_t length, base_length;
@@ -1040,9 +1088,9 @@ void commit_active_start(const char *job_id, BlockDriverState *bs,
        }
    }
-    mirror_start_job(job_id, bs, base, NULL, speed, 0, 0,
+    mirror_start_job(job_id, bs, creation_flags, base, NULL, speed, 0, 0,
                     MIRROR_LEAVE_BACKING_CHAIN,
-                     on_error, on_error, false, cb, opaque, &local_err,
+                     on_error, on_error, true, cb, opaque, &local_err,
                     &commit_active_job_driver, false, base, auto_complete);
    if (local_err) {
        error_propagate(errp, local_err);
--- a/block/nbd-client.c
+++ b/block/nbd-client.c
@@ -1,6 +1,7 @@
 /*
 * QEMU Block driver for  NBD
 *
 * Copyright (C) 2016 Red Hat, Inc.
 * Copyright (C) 2008 Bull S.A.S.
 *     Author: Laurent Vivier <Laurent.Vivier@bull.net>
 *
@@ -32,7 +33,7 @@
 #define HANDLE_TO_INDEX(bs, handle) ((handle) ^ ((uint64_t)(intptr_t)bs))
 #define INDEX_TO_HANDLE(bs, index)  ((index)  ^ ((uint64_t)(intptr_t)bs))
-static void nbd_recv_coroutines_enter_all(NbdClientSession *s)
+static void nbd_recv_coroutines_enter_all(NBDClientSession *s)
 {
    int i;
@@ -45,7 +46,7 @@ static void nbd_recv_coroutines_enter_all(NbdClientSession *s)
 static void nbd_teardown_connection(BlockDriverState *bs)
 {
-    NbdClientSession *client = nbd_get_client_session(bs);
+    NBDClientSession *client = nbd_get_client_session(bs);
    if (!client->ioc) { /* Already closed */
        return;
@@ -67,7 +68,7 @@ static void nbd_teardown_connection(BlockDriverState *bs)
 static void nbd_reply_ready(void *opaque)
 {
    BlockDriverState *bs = opaque;
-    NbdClientSession *s = nbd_get_client_session(bs);
+    NBDClientSession *s = nbd_get_client_session(bs);
    uint64_t i;
    int ret;
@@ -115,10 +116,10 @@ static void nbd_restart_write(void *opaque)
 }
 static int nbd_co_send_request(BlockDriverState *bs,
-                               struct nbd_request *request,
+                               NBDRequest *request,
                               QEMUIOVector *qiov)
 {
-    NbdClientSession *s = nbd_get_client_session(bs);
+    NBDClientSession *s = nbd_get_client_session(bs);
    AioContext *aio_context;
    int rc, ret, i;
@@ -144,7 +145,7 @@ static int nbd_co_send_request(BlockDriverState *bs,
    aio_context = bdrv_get_aio_context(bs);
    aio_set_fd_handler(aio_context, s->sioc->fd, false,
-                       nbd_reply_ready, nbd_restart_write, bs);
+                       nbd_reply_ready, nbd_restart_write, NULL, bs);
    if (qiov) {
        qio_channel_set_cork(s->ioc, true);
        rc = nbd_send_request(s->ioc, request);
@@ -160,15 +161,15 @@ static int nbd_co_send_request(BlockDriverState *bs,
        rc = nbd_send_request(s->ioc, request);
    }
    aio_set_fd_handler(aio_context, s->sioc->fd, false,
-                       nbd_reply_ready, NULL, bs);
+                       nbd_reply_ready, NULL, NULL, bs);
    s->send_coroutine = NULL;
    qemu_co_mutex_unlock(&s->send_mutex);
    return rc;
 }
-static void nbd_co_receive_reply(NbdClientSession *s,
+static void nbd_co_receive_reply(NBDClientSession *s,
-                                 struct nbd_request *request,
+                                 NBDRequest *request,
-                                 struct nbd_reply *reply,
+                                 NBDReply *reply,
                                 QEMUIOVector *qiov)
 {
    int ret;
@@ -194,13 +195,13 @@ static void nbd_co_receive_reply(NbdClientSession *s,
    }
 }
-static void nbd_coroutine_start(NbdClientSession *s,
+static void nbd_coroutine_start(NBDClientSession *s,
-   struct nbd_request *request)
+                                NBDRequest *request)
 {
    /* Poor man semaphore.  The free_sema is locked when no other request
     * can be accepted, and unlocked after receiving one reply.  */
-    if (s->in_flight >= MAX_NBD_REQUESTS - 1) {
+    if (s->in_flight == MAX_NBD_REQUESTS) {
-        qemu_co_mutex_lock(&s->free_sema);
+        qemu_co_queue_wait(&s->free_sema);
        assert(s->in_flight < MAX_NBD_REQUESTS);
    }
    s->in_flight++;
@@ -208,26 +209,26 @@ static void nbd_coroutine_start(NbdClientSession *s,
    /* s->recv_coroutine[i] is set as soon as we get the send_lock.  */
 }
-static void nbd_coroutine_end(NbdClientSession *s,
+static void nbd_coroutine_end(NBDClientSession *s,
-    struct nbd_request *request)
+                              NBDRequest *request)
 {
    int i = HANDLE_TO_INDEX(s, request->handle);
    s->recv_coroutine[i] = NULL;
    if (s->in_flight-- == MAX_NBD_REQUESTS) {
-        qemu_co_mutex_unlock(&s->free_sema);
+        qemu_co_queue_next(&s->free_sema);
    }
 }
 int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset,
                         uint64_t bytes, QEMUIOVector *qiov, int flags)
 {
-    NbdClientSession *client = nbd_get_client_session(bs);
+    NBDClientSession *client = nbd_get_client_session(bs);
-    struct nbd_request request = {
+    NBDRequest request = {
        .type = NBD_CMD_READ,
        .from = offset,
        .len = bytes,
    };
-    struct nbd_reply reply;
+    NBDReply reply;
    ssize_t ret;
    assert(bytes <= NBD_MAX_BUFFER_SIZE);
@@ -247,18 +248,18 @@ int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset,
 int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset,
                          uint64_t bytes, QEMUIOVector *qiov, int flags)
 {
-    NbdClientSession *client = nbd_get_client_session(bs);
+    NBDClientSession *client = nbd_get_client_session(bs);
-    struct nbd_request request = {
+    NBDRequest request = {
        .type = NBD_CMD_WRITE,
        .from = offset,
        .len = bytes,
    };
-    struct nbd_reply reply;
+    NBDReply reply;
    ssize_t ret;
    if (flags & BDRV_REQ_FUA) {
        assert(client->nbdflags & NBD_FLAG_SEND_FUA);
-        request.type |= NBD_CMD_FLAG_FUA;
+        request.flags |= NBD_CMD_FLAG_FUA;
    }
    assert(bytes <= NBD_MAX_BUFFER_SIZE);
@@ -274,11 +275,46 @@ int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset,
    return -reply.error;
 }
 int nbd_client_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
                                int count, BdrvRequestFlags flags)
 {
    ssize_t ret;
    NBDClientSession *client = nbd_get_client_session(bs);
    NBDRequest request = {
        .type = NBD_CMD_WRITE_ZEROES,
        .from = offset,
        .len = count,
    };
    NBDReply reply;
    if (!(client->nbdflags & NBD_FLAG_SEND_WRITE_ZEROES)) {
        return -ENOTSUP;
    }
    if (flags & BDRV_REQ_FUA) {
        assert(client->nbdflags & NBD_FLAG_SEND_FUA);
        request.flags |= NBD_CMD_FLAG_FUA;
    }
    if (!(flags & BDRV_REQ_MAY_UNMAP)) {
        request.flags |= NBD_CMD_FLAG_NO_HOLE;
    }
    nbd_coroutine_start(client, &request);
    ret = nbd_co_send_request(bs, &request, NULL);
    if (ret < 0) {
        reply.error = -ret;
    } else {
        nbd_co_receive_reply(client, &request, &reply, NULL);
    }
    nbd_coroutine_end(client, &request);
    return -reply.error;
 }
 int nbd_client_co_flush(BlockDriverState *bs)
 {
-    NbdClientSession *client = nbd_get_client_session(bs);
+    NBDClientSession *client = nbd_get_client_session(bs);
-    struct nbd_request request = { .type = NBD_CMD_FLUSH };
+    NBDRequest request = { .type = NBD_CMD_FLUSH };
-    struct nbd_reply reply;
+    NBDReply reply;
    ssize_t ret;
    if (!(client->nbdflags & NBD_FLAG_SEND_FLUSH)) {
@@ -301,13 +337,13 @@ int nbd_client_co_flush(BlockDriverState *bs)
 int nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset, int count)
 {
-    NbdClientSession *client = nbd_get_client_session(bs);
+    NBDClientSession *client = nbd_get_client_session(bs);
-    struct nbd_request request = {
+    NBDRequest request = {
        .type = NBD_CMD_TRIM,
        .from = offset,
        .len = count,
    };
-    struct nbd_reply reply;
+    NBDReply reply;
    ssize_t ret;
    if (!(client->nbdflags & NBD_FLAG_SEND_TRIM)) {
@@ -330,24 +366,20 @@ void nbd_client_detach_aio_context(BlockDriverState *bs)
 {
    aio_set_fd_handler(bdrv_get_aio_context(bs),
                       nbd_get_client_session(bs)->sioc->fd,
-                       false, NULL, NULL, NULL);
+                       false, NULL, NULL, NULL, NULL);
 }
 void nbd_client_attach_aio_context(BlockDriverState *bs,
                                   AioContext *new_context)
 {
    aio_set_fd_handler(new_context, nbd_get_client_session(bs)->sioc->fd,
-                       false, nbd_reply_ready, NULL, bs);
+                       false, nbd_reply_ready, NULL, NULL, bs);
 }
 void nbd_client_close(BlockDriverState *bs)
 {
-    NbdClientSession *client = nbd_get_client_session(bs);
+    NBDClientSession *client = nbd_get_client_session(bs);
-    struct nbd_request request = {
+    NBDRequest request = { .type = NBD_CMD_DISC };
        .type = NBD_CMD_DISC,
        .from = 0,
        .len = 0
    };
    if (client->ioc == NULL) {
        return;
@@ -365,7 +397,7 @@ int nbd_client_init(BlockDriverState *bs,
                    const char *hostname,
                    Error **errp)
 {
-    NbdClientSession *client = nbd_get_client_session(bs);
+    NBDClientSession *client = nbd_get_client_session(bs);
    int ret;
    /* NBD handshake */
@@ -383,10 +415,14 @@ int nbd_client_init(BlockDriverState *bs,
    }
    if (client->nbdflags & NBD_FLAG_SEND_FUA) {
        bs->supported_write_flags = BDRV_REQ_FUA;
        bs->supported_zero_flags |= BDRV_REQ_FUA;
    }
    if (client->nbdflags & NBD_FLAG_SEND_WRITE_ZEROES) {
        bs->supported_zero_flags |= BDRV_REQ_MAY_UNMAP;
    }
    qemu_co_mutex_init(&client->send_mutex);
-    qemu_co_mutex_init(&client->free_sema);
+    qemu_co_queue_init(&client->free_sema);
    client->sioc = sioc;
    object_ref(OBJECT(client->sioc));
--- a/block/nbd-client.h
+++ b/block/nbd-client.h
@@ -17,24 +17,24 @@
 #define MAX_NBD_REQUESTS    16
-typedef struct NbdClientSession {
+typedef struct NBDClientSession {
    QIOChannelSocket *sioc; /* The master data channel */
    QIOChannel *ioc; /* The current I/O channel which may differ (eg TLS) */
    uint16_t nbdflags;
    off_t size;
    CoMutex send_mutex;
-    CoMutex free_sema;
+    CoQueue free_sema;
    Coroutine *send_coroutine;
    int in_flight;
    Coroutine *recv_coroutine[MAX_NBD_REQUESTS];
-    struct nbd_reply reply;
+    NBDReply reply;
    bool is_unix;
-} NbdClientSession;
+} NBDClientSession;
-NbdClientSession *nbd_get_client_session(BlockDriverState *bs);
+NBDClientSession *nbd_get_client_session(BlockDriverState *bs);
 int nbd_client_init(BlockDriverState *bs,
                    QIOChannelSocket *sock,
@@ -48,6 +48,8 @@ int nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset, int count);
 int nbd_client_co_flush(BlockDriverState *bs);
 int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset,
                          uint64_t bytes, QEMUIOVector *qiov, int flags);
 int nbd_client_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
                                int count, BdrvRequestFlags flags);
 int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset,
                         uint64_t bytes, QEMUIOVector *qiov, int flags);
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -32,6 +32,9 @@
 #include "qemu/uri.h"
 #include "block/block_int.h"
 #include "qemu/module.h"
 #include "qapi-visit.h"
 #include "qapi/qobject-input-visitor.h"
 #include "qapi/qobject-output-visitor.h"
 #include "qapi/qmp/qdict.h"
 #include "qapi/qmp/qjson.h"
 #include "qapi/qmp/qint.h"
@@ -41,10 +44,11 @@
 #define EN_OPTSTR ":exportname="
 typedef struct BDRVNBDState {
-    NbdClientSession client;
+    NBDClientSession client;
    /* For nbd_refresh_filename() */
-    char *path, *host, *port, *export, *tlscredsid;
+    SocketAddress *saddr;
    char *export, *tlscredsid;
 } BDRVNBDState;
 static int nbd_parse_uri(const char *filename, QDict *options)
@@ -90,9 +94,13 @@ static int nbd_parse_uri(const char *filename, QDict *options)
            ret = -EINVAL;
            goto out;
        }
-        qdict_put(options, "path", qstring_from_str(qp->p[0].value));
+        qdict_put(options, "server.type", qstring_from_str("unix"));
        qdict_put(options, "server.data.path",
                  qstring_from_str(qp->p[0].value));
    } else {
        QString *host;
        char *port_str;
        /* nbd[+tcp]://host[:port]/export */
        if (!uri->server) {
            ret = -EINVAL;
@@ -107,12 +115,12 @@ static int nbd_parse_uri(const char *filename, QDict *options)
            host = qstring_from_str(uri->server);
        }
-        qdict_put(options, "host", host);
+        qdict_put(options, "server.type", qstring_from_str("inet"));
-        if (uri->port) {
+        qdict_put(options, "server.data.host", host);
-            char* port_str = g_strdup_printf("%d", uri->port);
+
-            qdict_put(options, "port", qstring_from_str(port_str));
+        port_str = g_strdup_printf("%d", uri->port ?: NBD_DEFAULT_PORT);
-            g_free(port_str);
+        qdict_put(options, "server.data.port", qstring_from_str(port_str));
-        }
+        g_free(port_str);
    }
 out:
@@ -123,6 +131,26 @@ out:
    return ret;
 }
 static bool nbd_has_filename_options_conflict(QDict *options, Error **errp)
 {
    const QDictEntry *e;
    for (e = qdict_first(options); e; e = qdict_next(options, e)) {
        if (!strcmp(e->key, "host") ||
            !strcmp(e->key, "port") ||
            !strcmp(e->key, "path") ||
            !strcmp(e->key, "export") ||
            strstart(e->key, "server.", NULL))
        {
            error_setg(errp, "Option '%s' cannot be used with a file name",
                       e->key);
            return true;
        }
    }
    return false;
 }
 static void nbd_parse_filename(const char *filename, QDict *options,
                               Error **errp)
 {
@@ -131,12 +159,7 @@ static void nbd_parse_filename(const char *filename, QDict *options,
    const char *host_spec;
    const char *unixpath;
-    if (qdict_haskey(options, "host")
+    if (nbd_has_filename_options_conflict(options, errp)) {
        || qdict_haskey(options, "port")
        || qdict_haskey(options, "path"))
    {
        error_setg(errp, "host/port/path and a file name may not be specified "
                         "at the same time");
        return;
    }
@@ -173,7 +196,8 @@ static void nbd_parse_filename(const char *filename, QDict *options,
    /* are we a UNIX or TCP socket? */
    if (strstart(host_spec, "unix:", &unixpath)) {
-        qdict_put(options, "path", qstring_from_str(unixpath));
+        qdict_put(options, "server.type", qstring_from_str("unix"));
        qdict_put(options, "server.data.path", qstring_from_str(unixpath));
    } else {
        InetSocketAddress *addr = NULL;
@@ -182,8 +206,9 @@ static void nbd_parse_filename(const char *filename, QDict *options,
            goto out;
        }
-        qdict_put(options, "host", qstring_from_str(addr->host));
+        qdict_put(options, "server.type", qstring_from_str("inet"));
-        qdict_put(options, "port", qstring_from_str(addr->port));
+        qdict_put(options, "server.data.host", qstring_from_str(addr->host));
        qdict_put(options, "server.data.port", qstring_from_str(addr->port));
        qapi_free_InetSocketAddress(addr);
    }
@@ -191,51 +216,85 @@ out:
    g_free(file);
 }
-static SocketAddress *nbd_config(BDRVNBDState *s, QemuOpts *opts, Error **errp)
+static bool nbd_process_legacy_socket_options(QDict *output_options,
                                              QemuOpts *legacy_opts,
                                              Error **errp)
 {
-    SocketAddress *saddr;
+    const char *path = qemu_opt_get(legacy_opts, "path");
    const char *host = qemu_opt_get(legacy_opts, "host");
    const char *port = qemu_opt_get(legacy_opts, "port");
    const QDictEntry *e;
-    s->path = g_strdup(qemu_opt_get(opts, "path"));
+    if (!path && !host && !port) {
-    s->host = g_strdup(qemu_opt_get(opts, "host"));
+        return true;
    if (!s->path == !s->host) {
        if (s->path) {
            error_setg(errp, "path and host may not be used at the same time.");
        } else {
            error_setg(errp, "one of path and host must be specified.");
        }
        return NULL;
    }
-    saddr = g_new0(SocketAddress, 1);
+    for (e = qdict_first(output_options); e; e = qdict_next(output_options, e))
-
+    {
-    if (s->path) {
+        if (strstart(e->key, "server.", NULL)) {
-        UnixSocketAddress *q_unix;
+            error_setg(errp, "Cannot use 'server' and path/host/port at the "
-        saddr->type = SOCKET_ADDRESS_KIND_UNIX;
+                       "same time");
-        q_unix = saddr->u.q_unix.data = g_new0(UnixSocketAddress, 1);
+            return false;
        q_unix->path = g_strdup(s->path);
    } else {
        InetSocketAddress *inet;
        s->port = g_strdup(qemu_opt_get(opts, "port"));
        saddr->type = SOCKET_ADDRESS_KIND_INET;
        inet = saddr->u.inet.data = g_new0(InetSocketAddress, 1);
        inet->host = g_strdup(s->host);
        inet->port = g_strdup(s->port);
        if (!inet->port) {
            inet->port = g_strdup_printf("%d", NBD_DEFAULT_PORT);
        }
    }
    if (path && host) {
        error_setg(errp, "path and host may not be used at the same time");
        return false;
    } else if (path) {
        if (port) {
            error_setg(errp, "port may not be used without host");
            return false;
        }
        qdict_put(output_options, "server.type", qstring_from_str("unix"));
        qdict_put(output_options, "server.data.path", qstring_from_str(path));
    } else if (host) {
        qdict_put(output_options, "server.type", qstring_from_str("inet"));
        qdict_put(output_options, "server.data.host", qstring_from_str(host));
        qdict_put(output_options, "server.data.port",
                  qstring_from_str(port ?: stringify(NBD_DEFAULT_PORT)));
    }
    return true;
 }
 static SocketAddress *nbd_config(BDRVNBDState *s, QDict *options, Error **errp)
 {
    SocketAddress *saddr = NULL;
    QDict *addr = NULL;
    QObject *crumpled_addr = NULL;
    Visitor *iv = NULL;
    Error *local_err = NULL;
    qdict_extract_subqdict(options, &addr, "server.");
    if (!qdict_size(addr)) {
        error_setg(errp, "NBD server address missing");
        goto done;
    }
    crumpled_addr = qdict_crumple(addr, errp);
    if (!crumpled_addr) {
        goto done;
    }
    iv = qobject_input_visitor_new(crumpled_addr, true);
    visit_type_SocketAddress(iv, NULL, &saddr, &local_err);
    if (local_err) {
        error_propagate(errp, local_err);
        goto done;
    }
    s->client.is_unix = saddr->type == SOCKET_ADDRESS_KIND_UNIX;
-    s->export = g_strdup(qemu_opt_get(opts, "export"));
+done:
-
+    QDECREF(addr);
    qobject_decref(crumpled_addr);
    visit_free(iv);
    return saddr;
 }
-NbdClientSession *nbd_get_client_session(BlockDriverState *bs)
+NBDClientSession *nbd_get_client_session(BlockDriverState *bs)
 {
    BDRVNBDState *s = bs->opaque;
    return &s->client;
@@ -248,6 +307,7 @@ static QIOChannelSocket *nbd_establish_connection(SocketAddress *saddr,
    Error *local_err = NULL;
    sioc = qio_channel_socket_new();
    qio_channel_set_name(QIO_CHANNEL(sioc), "nbd-client");
    qio_channel_socket_connect_sync(sioc,
                                    saddr,
@@ -332,7 +392,6 @@ static int nbd_open(BlockDriverState *bs, QDict *options, int flags,
    QemuOpts *opts = NULL;
    Error *local_err = NULL;
    QIOChannelSocket *sioc = NULL;
    SocketAddress *saddr = NULL;
    QCryptoTLSCreds *tlscreds = NULL;
    const char *hostname = NULL;
    int ret = -EINVAL;
@@ -344,12 +403,19 @@ static int nbd_open(BlockDriverState *bs, QDict *options, int flags,
        goto error;
    }
-    /* Pop the config into our state object. Exit if invalid. */
+    /* Translate @host, @port, and @path to a SocketAddress */
-    saddr = nbd_config(s, opts, errp);
+    if (!nbd_process_legacy_socket_options(options, opts, errp)) {
    if (!saddr) {
        goto error;
    }
    /* Pop the config into our state object. Exit if invalid. */
    s->saddr = nbd_config(s, options, errp);
    if (!s->saddr) {
        goto error;
    }
    s->export = g_strdup(qemu_opt_get(opts, "export"));
    s->tlscredsid = g_strdup(qemu_opt_get(opts, "tls-creds"));
    if (s->tlscredsid) {
        tlscreds = nbd_get_tls_creds(s->tlscredsid, errp);
@@ -357,17 +423,17 @@ static int nbd_open(BlockDriverState *bs, QDict *options, int flags,
            goto error;
        }
-        if (saddr->type != SOCKET_ADDRESS_KIND_INET) {
+        if (s->saddr->type != SOCKET_ADDRESS_KIND_INET) {
            error_setg(errp, "TLS only supported over IP sockets");
            goto error;
        }
-        hostname = saddr->u.inet.data->host;
+        hostname = s->saddr->u.inet.data->host;
    }
    /* establish TCP connection, return error if it fails
     * TODO: Configurable retry-until-timeout behaviour.
     */
-    sioc = nbd_establish_connection(saddr, errp);
+    sioc = nbd_establish_connection(s->saddr, errp);
    if (!sioc) {
        ret = -ECONNREFUSED;
        goto error;
@@ -384,13 +450,10 @@ static int nbd_open(BlockDriverState *bs, QDict *options, int flags,
        object_unref(OBJECT(tlscreds));
    }
    if (ret < 0) {
-        g_free(s->path);
+        qapi_free_SocketAddress(s->saddr);
        g_free(s->host);
        g_free(s->port);
        g_free(s->export);
        g_free(s->tlscredsid);
    }
    qapi_free_SocketAddress(saddr);
    qemu_opts_del(opts);
    return ret;
 }
@@ -403,6 +466,7 @@ static int nbd_co_flush(BlockDriverState *bs)
 static void nbd_refresh_limits(BlockDriverState *bs, Error **errp)
 {
    bs->bl.max_pdiscard = NBD_MAX_BUFFER_SIZE;
    bs->bl.max_pwrite_zeroes = NBD_MAX_BUFFER_SIZE;
    bs->bl.max_transfer = NBD_MAX_BUFFER_SIZE;
 }
@@ -412,9 +476,7 @@ static void nbd_close(BlockDriverState *bs)
    nbd_client_close(bs);
-    g_free(s->path);
+    qapi_free_SocketAddress(s->saddr);
    g_free(s->host);
    g_free(s->port);
    g_free(s->export);
    g_free(s->tlscredsid);
 }
@@ -441,45 +503,52 @@ static void nbd_refresh_filename(BlockDriverState *bs, QDict *options)
 {
    BDRVNBDState *s = bs->opaque;
    QDict *opts = qdict_new();
    QObject *saddr_qdict;
    Visitor *ov;
    const char *host = NULL, *port = NULL, *path = NULL;
-    qdict_put_obj(opts, "driver", QOBJECT(qstring_from_str("nbd")));
+    if (s->saddr->type == SOCKET_ADDRESS_KIND_INET) {
-
+        const InetSocketAddress *inet = s->saddr->u.inet.data;
-    if (s->path && s->export) {
+        if (!inet->has_ipv4 && !inet->has_ipv6 && !inet->has_to) {
-        snprintf(bs->exact_filename, sizeof(bs->exact_filename),
+            host = inet->host;
-                 "nbd+unix:///%s?socket=%s", s->export, s->path);
+            port = inet->port;
-    } else if (s->path && !s->export) {
+        }
-        snprintf(bs->exact_filename, sizeof(bs->exact_filename),
+    } else if (s->saddr->type == SOCKET_ADDRESS_KIND_UNIX) {
-                 "nbd+unix://?socket=%s", s->path);
+        path = s->saddr->u.q_unix.data->path;
    } else if (!s->path && s->export && s->port) {
        snprintf(bs->exact_filename, sizeof(bs->exact_filename),
                 "nbd://%s:%s/%s", s->host, s->port, s->export);
    } else if (!s->path && s->export && !s->port) {
        snprintf(bs->exact_filename, sizeof(bs->exact_filename),
                 "nbd://%s/%s", s->host, s->export);
    } else if (!s->path && !s->export && s->port) {
        snprintf(bs->exact_filename, sizeof(bs->exact_filename),
                 "nbd://%s:%s", s->host, s->port);
    } else if (!s->path && !s->export && !s->port) {
        snprintf(bs->exact_filename, sizeof(bs->exact_filename),
                 "nbd://%s", s->host);
    }
-    if (s->path) {
+    qdict_put(opts, "driver", qstring_from_str("nbd"));
-        qdict_put_obj(opts, "path", QOBJECT(qstring_from_str(s->path)));
+
-    } else if (s->port) {
+    if (path && s->export) {
-        qdict_put_obj(opts, "host", QOBJECT(qstring_from_str(s->host)));
+        snprintf(bs->exact_filename, sizeof(bs->exact_filename),
-        qdict_put_obj(opts, "port", QOBJECT(qstring_from_str(s->port)));
+                 "nbd+unix:///%s?socket=%s", s->export, path);
-    } else {
+    } else if (path && !s->export) {
-        qdict_put_obj(opts, "host", QOBJECT(qstring_from_str(s->host)));
+        snprintf(bs->exact_filename, sizeof(bs->exact_filename),
                 "nbd+unix://?socket=%s", path);
    } else if (host && s->export) {
        snprintf(bs->exact_filename, sizeof(bs->exact_filename),
                 "nbd://%s:%s/%s", host, port, s->export);
    } else if (host && !s->export) {
        snprintf(bs->exact_filename, sizeof(bs->exact_filename),
                 "nbd://%s:%s", host, port);
    }
    ov = qobject_output_visitor_new(&saddr_qdict);
    visit_type_SocketAddress(ov, NULL, &s->saddr, &error_abort);
    visit_complete(ov, &saddr_qdict);
    visit_free(ov);
    assert(qobject_type(saddr_qdict) == QTYPE_QDICT);
    qdict_put_obj(opts, "server", saddr_qdict);
    if (s->export) {
-        qdict_put_obj(opts, "export", QOBJECT(qstring_from_str(s->export)));
+        qdict_put(opts, "export", qstring_from_str(s->export));
    }
    if (s->tlscredsid) {
-        qdict_put_obj(opts, "tls-creds",
+        qdict_put(opts, "tls-creds", qstring_from_str(s->tlscredsid));
                      QOBJECT(qstring_from_str(s->tlscredsid)));
    }
    qdict_flatten(opts);
    bs->full_open_options = opts;
 }
@@ -491,6 +560,7 @@ static BlockDriver bdrv_nbd = {
    .bdrv_file_open             = nbd_open,
    .bdrv_co_preadv             = nbd_client_co_preadv,
    .bdrv_co_pwritev            = nbd_client_co_pwritev,
    .bdrv_co_pwrite_zeroes      = nbd_client_co_pwrite_zeroes,
    .bdrv_close                 = nbd_close,
    .bdrv_co_flush_to_os        = nbd_co_flush,
    .bdrv_co_pdiscard           = nbd_client_co_pdiscard,
@@ -509,6 +579,7 @@ static BlockDriver bdrv_nbd_tcp = {
    .bdrv_file_open             = nbd_open,
    .bdrv_co_preadv             = nbd_client_co_preadv,
    .bdrv_co_pwritev            = nbd_client_co_pwritev,
    .bdrv_co_pwrite_zeroes      = nbd_client_co_pwrite_zeroes,
    .bdrv_close                 = nbd_close,
    .bdrv_co_flush_to_os        = nbd_co_flush,
    .bdrv_co_pdiscard           = nbd_client_co_pdiscard,
@@ -527,6 +598,7 @@ static BlockDriver bdrv_nbd_unix = {
    .bdrv_file_open             = nbd_open,
    .bdrv_co_preadv             = nbd_client_co_preadv,
    .bdrv_co_pwritev            = nbd_client_co_pwritev,
    .bdrv_co_pwrite_zeroes      = nbd_client_co_pwrite_zeroes,
    .bdrv_close                 = nbd_close,
    .bdrv_co_flush_to_os        = nbd_co_flush,
    .bdrv_co_pdiscard           = nbd_client_co_pdiscard,
--- a/block/nfs.c
+++ b/block/nfs.c
@@ -35,8 +35,15 @@
 #include "qemu/uri.h"
 #include "qemu/cutils.h"
 #include "sysemu/sysemu.h"
 #include "qapi/qmp/qdict.h"
 #include "qapi/qmp/qint.h"
 #include "qapi/qmp/qstring.h"
 #include "qapi-visit.h"
 #include "qapi/qobject-input-visitor.h"
 #include "qapi/qobject-output-visitor.h"
 #include <nfsc/libnfs.h>
 #define QEMU_NFS_MAX_READAHEAD_SIZE 1048576
 #define QEMU_NFS_MAX_PAGECACHE_SIZE (8388608 / NFS_BLKSIZE)
 #define QEMU_NFS_MAX_DEBUG_LEVEL 2
@@ -49,9 +56,13 @@ typedef struct NFSClient {
    AioContext *aio_context;
    blkcnt_t st_blocks;
    bool cache_used;
    NFSServer *server;
    char *path;
    int64_t uid, gid, tcp_syncnt, readahead, pagecache, debug;
 } NFSClient;
 typedef struct NFSRPC {
    BlockDriverState *bs;
    int ret;
    int complete;
    QEMUIOVector *iov;
@@ -60,6 +71,122 @@ typedef struct NFSRPC {
    NFSClient *client;
 } NFSRPC;
 static int nfs_parse_uri(const char *filename, QDict *options, Error **errp)
 {
    URI *uri = NULL;
    QueryParams *qp = NULL;
    int ret = -EINVAL, i;
    uri = uri_parse(filename);
    if (!uri) {
        error_setg(errp, "Invalid URI specified");
        goto out;
    }
    if (strcmp(uri->scheme, "nfs") != 0) {
        error_setg(errp, "URI scheme must be 'nfs'");
        goto out;
    }
    if (!uri->server) {
        error_setg(errp, "missing hostname in URI");
        goto out;
    }
    if (!uri->path) {
        error_setg(errp, "missing file path in URI");
        goto out;
    }
    qp = query_params_parse(uri->query);
    if (!qp) {
        error_setg(errp, "could not parse query parameters");
        goto out;
    }
    qdict_put(options, "server.host", qstring_from_str(uri->server));
    qdict_put(options, "server.type", qstring_from_str("inet"));
    qdict_put(options, "path", qstring_from_str(uri->path));
    for (i = 0; i < qp->n; i++) {
        if (!qp->p[i].value) {
            error_setg(errp, "Value for NFS parameter expected: %s",
                       qp->p[i].name);
            goto out;
        }
        if (parse_uint_full(qp->p[i].value, NULL, 0)) {
            error_setg(errp, "Illegal value for NFS parameter: %s",
                       qp->p[i].name);
            goto out;
        }
        if (!strcmp(qp->p[i].name, "uid")) {
            qdict_put(options, "user",
                      qstring_from_str(qp->p[i].value));
        } else if (!strcmp(qp->p[i].name, "gid")) {
            qdict_put(options, "group",
                      qstring_from_str(qp->p[i].value));
        } else if (!strcmp(qp->p[i].name, "tcp-syncnt")) {
            qdict_put(options, "tcp-syn-count",
                      qstring_from_str(qp->p[i].value));
        } else if (!strcmp(qp->p[i].name, "readahead")) {
            qdict_put(options, "readahead-size",
                      qstring_from_str(qp->p[i].value));
        } else if (!strcmp(qp->p[i].name, "pagecache")) {
            qdict_put(options, "page-cache-size",
                      qstring_from_str(qp->p[i].value));
        } else if (!strcmp(qp->p[i].name, "debug")) {
            qdict_put(options, "debug",
                      qstring_from_str(qp->p[i].value));
        } else {
            error_setg(errp, "Unknown NFS parameter name: %s",
                       qp->p[i].name);
            goto out;
        }
    }
    ret = 0;
 out:
    if (qp) {
        query_params_free(qp);
    }
    if (uri) {
        uri_free(uri);
    }
    return ret;
 }
 static bool nfs_has_filename_options_conflict(QDict *options, Error **errp)
 {
    const QDictEntry *qe;
    for (qe = qdict_first(options); qe; qe = qdict_next(options, qe)) {
        if (!strcmp(qe->key, "host") ||
            !strcmp(qe->key, "path") ||
            !strcmp(qe->key, "user") ||
            !strcmp(qe->key, "group") ||
            !strcmp(qe->key, "tcp-syn-count") ||
            !strcmp(qe->key, "readahead-size") ||
            !strcmp(qe->key, "page-cache-size") ||
            !strcmp(qe->key, "debug") ||
            strstart(qe->key, "server.", NULL))
        {
            error_setg(errp, "Option %s cannot be used with a filename",
                       qe->key);
            return true;
        }
    }
    return false;
 }
 static void nfs_parse_filename(const char *filename, QDict *options,
                               Error **errp)
 {
    if (nfs_has_filename_options_conflict(options, errp)) {
        return;
    }
    nfs_parse_uri(filename, options, errp);
 }
 static void nfs_process_read(void *arg);
 static void nfs_process_write(void *arg);
@@ -70,7 +197,8 @@ static void nfs_set_events(NFSClient *client)
        aio_set_fd_handler(client->aio_context, nfs_get_fd(client->context),
                           false,
                           (ev & POLLIN) ? nfs_process_read : NULL,
-                           (ev & POLLOUT) ? nfs_process_write : NULL, client);
+                           (ev & POLLOUT) ? nfs_process_write : NULL,
                           NULL, client);
    }
    client->events = ev;
@@ -90,11 +218,12 @@ static void nfs_process_write(void *arg)
    nfs_set_events(client);
 }
-static void nfs_co_init_task(NFSClient *client, NFSRPC *task)
+static void nfs_co_init_task(BlockDriverState *bs, NFSRPC *task)
 {
    *task = (NFSRPC) {
        .co             = qemu_coroutine_self(),
-        .client         = client,
+        .bs             = bs,
        .client         = bs->opaque,
    };
 }
@@ -111,6 +240,7 @@ nfs_co_generic_cb(int ret, struct nfs_context *nfs, void *data,
 {
    NFSRPC *task = private_data;
    task->ret = ret;
    assert(!task->st);
    if (task->ret > 0 && task->iov) {
        if (task->ret <= task->iov->size) {
            qemu_iovec_from_buf(task->iov, 0, data, task->ret);
@@ -118,18 +248,11 @@ nfs_co_generic_cb(int ret, struct nfs_context *nfs, void *data,
            task->ret = -EIO;
        }
    }
    if (task->ret == 0 && task->st) {
        memcpy(task->st, data, sizeof(struct stat));
    }
    if (task->ret < 0) {
        error_report("NFS Error: %s", nfs_get_error(nfs));
    }
-    if (task->co) {
+    aio_bh_schedule_oneshot(task->client->aio_context,
-        aio_bh_schedule_oneshot(task->client->aio_context,
+                            nfs_co_generic_bh_cb, task);
                                nfs_co_generic_bh_cb, task);
    } else {
        task->complete = 1;
    }
 }
 static int coroutine_fn nfs_co_readv(BlockDriverState *bs,
@@ -139,7 +262,7 @@ static int coroutine_fn nfs_co_readv(BlockDriverState *bs,
    NFSClient *client = bs->opaque;
    NFSRPC task;
-    nfs_co_init_task(client, &task);
+    nfs_co_init_task(bs, &task);
    task.iov = iov;
    if (nfs_pread_async(client->context, client->fh,
@@ -149,8 +272,8 @@ static int coroutine_fn nfs_co_readv(BlockDriverState *bs,
        return -ENOMEM;
    }
    nfs_set_events(client);
    while (!task.complete) {
        nfs_set_events(client);
        qemu_coroutine_yield();
    }
@@ -174,7 +297,7 @@ static int coroutine_fn nfs_co_writev(BlockDriverState *bs,
    NFSRPC task;
    char *buf = NULL;
-    nfs_co_init_task(client, &task);
+    nfs_co_init_task(bs, &task);
    buf = g_try_malloc(nb_sectors * BDRV_SECTOR_SIZE);
    if (nb_sectors && buf == NULL) {
@@ -191,8 +314,8 @@ static int coroutine_fn nfs_co_writev(BlockDriverState *bs,
        return -ENOMEM;
    }
    nfs_set_events(client);
    while (!task.complete) {
        nfs_set_events(client);
        qemu_coroutine_yield();
    }
@@ -210,30 +333,59 @@ static int coroutine_fn nfs_co_flush(BlockDriverState *bs)
    NFSClient *client = bs->opaque;
    NFSRPC task;
-    nfs_co_init_task(client, &task);
+    nfs_co_init_task(bs, &task);
    if (nfs_fsync_async(client->context, client->fh, nfs_co_generic_cb,
                        &task) != 0) {
        return -ENOMEM;
    }
    nfs_set_events(client);
    while (!task.complete) {
        nfs_set_events(client);
        qemu_coroutine_yield();
    }
    return task.ret;
 }
 /* TODO Convert to fine grained options */
 static QemuOptsList runtime_opts = {
    .name = "nfs",
    .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
    .desc = {
        {
-            .name = "filename",
+            .name = "path",
            .type = QEMU_OPT_STRING,
-            .help = "URL to the NFS file",
+            .help = "Path of the image on the host",
        },
        {
            .name = "uid",
            .type = QEMU_OPT_NUMBER,
            .help = "UID value to use when talking to the server",
        },
        {
            .name = "gid",
            .type = QEMU_OPT_NUMBER,
            .help = "GID value to use when talking to the server",
        },
        {
            .name = "tcp-syncnt",
            .type = QEMU_OPT_NUMBER,
            .help = "Number of SYNs to send during the session establish",
        },
        {
            .name = "readahead",
            .type = QEMU_OPT_NUMBER,
            .help = "Set the readahead size in bytes",
        },
        {
            .name = "pagecache",
            .type = QEMU_OPT_NUMBER,
            .help = "Set the pagecache size in bytes",
        },
        {
            .name = "debug",
            .type = QEMU_OPT_NUMBER,
            .help = "Set the NFS debug level (max 2)",
        },
        { /* end of list */ }
    },
@@ -244,7 +396,7 @@ static void nfs_detach_aio_context(BlockDriverState *bs)
    NFSClient *client = bs->opaque;
    aio_set_fd_handler(client->aio_context, nfs_get_fd(client->context),
-                       false, NULL, NULL, NULL);
+                       false, NULL, NULL, NULL, NULL);
    client->events = 0;
 }
@@ -264,7 +416,7 @@ static void nfs_client_close(NFSClient *client)
            nfs_close(client->context, client->fh);
        }
        aio_set_fd_handler(client->aio_context, nfs_get_fd(client->context),
-                           false, NULL, NULL, NULL);
+                           false, NULL, NULL, NULL, NULL);
        nfs_destroy_context(client->context);
    }
    memset(client, 0, sizeof(NFSClient));
@@ -276,25 +428,65 @@ static void nfs_file_close(BlockDriverState *bs)
    nfs_client_close(client);
 }
-static int64_t nfs_client_open(NFSClient *client, const char *filename,
+static NFSServer *nfs_config(QDict *options, Error **errp)
 {
    NFSServer *server = NULL;
    QDict *addr = NULL;
    QObject *crumpled_addr = NULL;
    Visitor *iv = NULL;
    Error *local_error = NULL;
    qdict_extract_subqdict(options, &addr, "server.");
    if (!qdict_size(addr)) {
        error_setg(errp, "NFS server address missing");
        goto out;
    }
    crumpled_addr = qdict_crumple(addr, errp);
    if (!crumpled_addr) {
        goto out;
    }
    iv = qobject_input_visitor_new(crumpled_addr, true);
    visit_type_NFSServer(iv, NULL, &server, &local_error);
    if (local_error) {
        error_propagate(errp, local_error);
        goto out;
    }
 out:
    QDECREF(addr);
    qobject_decref(crumpled_addr);
    visit_free(iv);
    return server;
 }
 static int64_t nfs_client_open(NFSClient *client, QDict *options,
                               int flags, Error **errp, int open_flags)
 {
-    int ret = -EINVAL, i;
+    int ret = -EINVAL;
    QemuOpts *opts = NULL;
    Error *local_err = NULL;
    struct stat st;
    URI *uri;
    QueryParams *qp = NULL;
    char *file = NULL, *strp = NULL;
-    uri = uri_parse(filename);
+    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
-    if (!uri) {
+    qemu_opts_absorb_qdict(opts, options, &local_err);
-        error_setg(errp, "Invalid URL specified");
+    if (local_err) {
        error_propagate(errp, local_err);
        ret = -EINVAL;
        goto fail;
    }
-    if (!uri->server) {
+
-        error_setg(errp, "Invalid URL specified");
+    client->path = g_strdup(qemu_opt_get(opts, "path"));
    if (!client->path) {
        ret = -EINVAL;
        error_setg(errp, "No path was specified");
        goto fail;
    }
-    strp = strrchr(uri->path, '/');
+
    strp = strrchr(client->path, '/');
    if (strp == NULL) {
        error_setg(errp, "Invalid URL specified");
        goto fail;
@@ -302,85 +494,89 @@ static int64_t nfs_client_open(NFSClient *client, const char *filename,
    file = g_strdup(strp);
    *strp = 0;
    /* Pop the config into our state object, Exit if invalid */
    client->server = nfs_config(options, errp);
    if (!client->server) {
        ret = -EINVAL;
        goto fail;
    }
    client->context = nfs_init_context();
    if (client->context == NULL) {
        error_setg(errp, "Failed to init NFS context");
        goto fail;
    }
-    qp = query_params_parse(uri->query);
+    if (qemu_opt_get(opts, "uid")) {
-    for (i = 0; i < qp->n; i++) {
+        client->uid = qemu_opt_get_number(opts, "uid", 0);
-        unsigned long long val;
+        nfs_set_uid(client->context, client->uid);
        if (!qp->p[i].value) {
            error_setg(errp, "Value for NFS parameter expected: %s",
                       qp->p[i].name);
            goto fail;
        }
        if (parse_uint_full(qp->p[i].value, &val, 0)) {
            error_setg(errp, "Illegal value for NFS parameter: %s",
                       qp->p[i].name);
            goto fail;
        }
        if (!strcmp(qp->p[i].name, "uid")) {
            nfs_set_uid(client->context, val);
        } else if (!strcmp(qp->p[i].name, "gid")) {
            nfs_set_gid(client->context, val);
        } else if (!strcmp(qp->p[i].name, "tcp-syncnt")) {
            nfs_set_tcp_syncnt(client->context, val);
 #ifdef LIBNFS_FEATURE_READAHEAD
        } else if (!strcmp(qp->p[i].name, "readahead")) {
            if (open_flags & BDRV_O_NOCACHE) {
                error_setg(errp, "Cannot enable NFS readahead "
                                 "if cache.direct = on");
                goto fail;
            }
            if (val > QEMU_NFS_MAX_READAHEAD_SIZE) {
                error_report("NFS Warning: Truncating NFS readahead"
                             " size to %d", QEMU_NFS_MAX_READAHEAD_SIZE);
                val = QEMU_NFS_MAX_READAHEAD_SIZE;
            }
            nfs_set_readahead(client->context, val);
 #ifdef LIBNFS_FEATURE_PAGECACHE
            nfs_set_pagecache_ttl(client->context, 0);
 #endif
            client->cache_used = true;
 #endif
 #ifdef LIBNFS_FEATURE_PAGECACHE
            nfs_set_pagecache_ttl(client->context, 0);
        } else if (!strcmp(qp->p[i].name, "pagecache")) {
            if (open_flags & BDRV_O_NOCACHE) {
                error_setg(errp, "Cannot enable NFS pagecache "
                                 "if cache.direct = on");
                goto fail;
            }
            if (val > QEMU_NFS_MAX_PAGECACHE_SIZE) {
                error_report("NFS Warning: Truncating NFS pagecache"
                             " size to %d pages", QEMU_NFS_MAX_PAGECACHE_SIZE);
                val = QEMU_NFS_MAX_PAGECACHE_SIZE;
            }
            nfs_set_pagecache(client->context, val);
            nfs_set_pagecache_ttl(client->context, 0);
            client->cache_used = true;
 #endif
 #ifdef LIBNFS_FEATURE_DEBUG
        } else if (!strcmp(qp->p[i].name, "debug")) {
            /* limit the maximum debug level to avoid potential flooding
             * of our log files. */
            if (val > QEMU_NFS_MAX_DEBUG_LEVEL) {
                error_report("NFS Warning: Limiting NFS debug level"
                             " to %d", QEMU_NFS_MAX_DEBUG_LEVEL);
                val = QEMU_NFS_MAX_DEBUG_LEVEL;
            }
            nfs_set_debug(client->context, val);
 #endif
        } else {
            error_setg(errp, "Unknown NFS parameter name: %s",
                       qp->p[i].name);
            goto fail;
        }
    }
-    ret = nfs_mount(client->context, uri->server, uri->path);
+    if (qemu_opt_get(opts, "gid")) {
        client->gid = qemu_opt_get_number(opts, "gid", 0);
        nfs_set_gid(client->context, client->gid);
    }
    if (qemu_opt_get(opts, "tcp-syncnt")) {
        client->tcp_syncnt = qemu_opt_get_number(opts, "tcp-syncnt", 0);
        nfs_set_tcp_syncnt(client->context, client->tcp_syncnt);
    }
 #ifdef LIBNFS_FEATURE_READAHEAD
    if (qemu_opt_get(opts, "readahead")) {
        if (open_flags & BDRV_O_NOCACHE) {
            error_setg(errp, "Cannot enable NFS readahead "
                             "if cache.direct = on");
            goto fail;
        }
        client->readahead = qemu_opt_get_number(opts, "readahead", 0);
        if (client->readahead > QEMU_NFS_MAX_READAHEAD_SIZE) {
            error_report("NFS Warning: Truncating NFS readahead "
                         "size to %d", QEMU_NFS_MAX_READAHEAD_SIZE);
            client->readahead = QEMU_NFS_MAX_READAHEAD_SIZE;
        }
        nfs_set_readahead(client->context, client->readahead);
 #ifdef LIBNFS_FEATURE_PAGECACHE
        nfs_set_pagecache_ttl(client->context, 0);
 #endif
        client->cache_used = true;
    }
 #endif
 #ifdef LIBNFS_FEATURE_PAGECACHE
    if (qemu_opt_get(opts, "pagecache")) {
        if (open_flags & BDRV_O_NOCACHE) {
            error_setg(errp, "Cannot enable NFS pagecache "
                             "if cache.direct = on");
            goto fail;
        }
        client->pagecache = qemu_opt_get_number(opts, "pagecache", 0);
        if (client->pagecache > QEMU_NFS_MAX_PAGECACHE_SIZE) {
            error_report("NFS Warning: Truncating NFS pagecache "
                         "size to %d pages", QEMU_NFS_MAX_PAGECACHE_SIZE);
            client->pagecache = QEMU_NFS_MAX_PAGECACHE_SIZE;
        }
        nfs_set_pagecache(client->context, client->pagecache);
        nfs_set_pagecache_ttl(client->context, 0);
        client->cache_used = true;
    }
 #endif
 #ifdef LIBNFS_FEATURE_DEBUG
    if (qemu_opt_get(opts, "debug")) {
        client->debug = qemu_opt_get_number(opts, "debug", 0);
        /* limit the maximum debug level to avoid potential flooding
         * of our log files. */
        if (client->debug > QEMU_NFS_MAX_DEBUG_LEVEL) {
            error_report("NFS Warning: Limiting NFS debug level "
                         "to %d", QEMU_NFS_MAX_DEBUG_LEVEL);
            client->debug = QEMU_NFS_MAX_DEBUG_LEVEL;
        }
        nfs_set_debug(client->context, client->debug);
    }
 #endif
    ret = nfs_mount(client->context, client->server->host, client->path);
    if (ret < 0) {
        error_setg(errp, "Failed to mount nfs share: %s",
                   nfs_get_error(client->context));
@@ -413,14 +609,13 @@ static int64_t nfs_client_open(NFSClient *client, const char *filename,
    ret = DIV_ROUND_UP(st.st_size, BDRV_SECTOR_SIZE);
    client->st_blocks = st.st_blocks;
    client->has_zero_init = S_ISREG(st.st_mode);
    *strp = '/';
    goto out;
 fail:
    nfs_client_close(client);
 out:
-    if (qp) {
+    qemu_opts_del(opts);
        query_params_free(qp);
    }
    uri_free(uri);
    g_free(file);
    return ret;
 }
@@ -429,28 +624,17 @@ static int nfs_file_open(BlockDriverState *bs, QDict *options, int flags,
                         Error **errp) {
    NFSClient *client = bs->opaque;
    int64_t ret;
    QemuOpts *opts;
    Error *local_err = NULL;
    client->aio_context = bdrv_get_aio_context(bs);
-    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
+    ret = nfs_client_open(client, options,
    qemu_opts_absorb_qdict(opts, options, &local_err);
    if (local_err) {
        error_propagate(errp, local_err);
        ret = -EINVAL;
        goto out;
    }
    ret = nfs_client_open(client, qemu_opt_get(opts, "filename"),
                          (flags & BDRV_O_RDWR) ? O_RDWR : O_RDONLY,
                          errp, bs->open_flags);
    if (ret < 0) {
-        goto out;
+        return ret;
    }
    bs->total_sectors = ret;
    ret = 0;
 out:
    qemu_opts_del(opts);
    return ret;
 }
@@ -472,6 +656,7 @@ static int nfs_file_create(const char *url, QemuOpts *opts, Error **errp)
    int ret = 0;
    int64_t total_size = 0;
    NFSClient *client = g_new0(NFSClient, 1);
    QDict *options = NULL;
    client->aio_context = qemu_get_aio_context();
@@ -479,13 +664,20 @@ static int nfs_file_create(const char *url, QemuOpts *opts, Error **errp)
    total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
                          BDRV_SECTOR_SIZE);
-    ret = nfs_client_open(client, url, O_CREAT, errp, 0);
+    options = qdict_new();
    ret = nfs_parse_uri(url, options, errp);
    if (ret < 0) {
        goto out;
    }
    ret = nfs_client_open(client, options, O_CREAT, errp, 0);
    if (ret < 0) {
        goto out;
    }
    ret = nfs_ftruncate(client->context, client->fh, total_size);
    nfs_client_close(client);
 out:
    QDECREF(options);
    g_free(client);
    return ret;
 }
@@ -496,6 +688,22 @@ static int nfs_has_zero_init(BlockDriverState *bs)
    return client->has_zero_init;
 }
 static void
 nfs_get_allocated_file_size_cb(int ret, struct nfs_context *nfs, void *data,
                               void *private_data)
 {
    NFSRPC *task = private_data;
    task->ret = ret;
    if (task->ret == 0) {
        memcpy(task->st, data, sizeof(struct stat));
    }
    if (task->ret < 0) {
        error_report("NFS Error: %s", nfs_get_error(nfs));
    }
    task->complete = 1;
    bdrv_wakeup(task->bs);
 }
 static int64_t nfs_get_allocated_file_size(BlockDriverState *bs)
 {
    NFSClient *client = bs->opaque;
@@ -507,16 +715,15 @@ static int64_t nfs_get_allocated_file_size(BlockDriverState *bs)
        return client->st_blocks * 512;
    }
    task.bs = bs;
    task.st = &st;
-    if (nfs_fstat_async(client->context, client->fh, nfs_co_generic_cb,
+    if (nfs_fstat_async(client->context, client->fh, nfs_get_allocated_file_size_cb,
                        &task) != 0) {
        return -ENOMEM;
    }
-    while (!task.complete) {
+    nfs_set_events(client);
-        nfs_set_events(client);
+    BDRV_POLL_WHILE(bs, !task.complete);
        aio_poll(client->aio_context, true);
    }
    return (task.ret < 0 ? task.ret : st.st_blocks * 512);
 }
@@ -561,6 +768,67 @@ static int nfs_reopen_prepare(BDRVReopenState *state,
    return 0;
 }
 static void nfs_refresh_filename(BlockDriverState *bs, QDict *options)
 {
    NFSClient *client = bs->opaque;
    QDict *opts = qdict_new();
    QObject *server_qdict;
    Visitor *ov;
    qdict_put(opts, "driver", qstring_from_str("nfs"));
    if (client->uid && !client->gid) {
        snprintf(bs->exact_filename, sizeof(bs->exact_filename),
                 "nfs://%s%s?uid=%" PRId64, client->server->host, client->path,
                 client->uid);
    } else if (!client->uid && client->gid) {
        snprintf(bs->exact_filename, sizeof(bs->exact_filename),
                 "nfs://%s%s?gid=%" PRId64, client->server->host, client->path,
                 client->gid);
    } else if (client->uid && client->gid) {
        snprintf(bs->exact_filename, sizeof(bs->exact_filename),
                 "nfs://%s%s?uid=%" PRId64 "&gid=%" PRId64,
                 client->server->host, client->path, client->uid, client->gid);
    } else {
        snprintf(bs->exact_filename, sizeof(bs->exact_filename),
                 "nfs://%s%s", client->server->host, client->path);
    }
    ov = qobject_output_visitor_new(&server_qdict);
    visit_type_NFSServer(ov, NULL, &client->server, &error_abort);
    visit_complete(ov, &server_qdict);
    assert(qobject_type(server_qdict) == QTYPE_QDICT);
    qdict_put_obj(opts, "server", server_qdict);
    qdict_put(opts, "path", qstring_from_str(client->path));
    if (client->uid) {
        qdict_put(opts, "uid", qint_from_int(client->uid));
    }
    if (client->gid) {
        qdict_put(opts, "gid", qint_from_int(client->gid));
    }
    if (client->tcp_syncnt) {
        qdict_put(opts, "tcp-syncnt",
                      qint_from_int(client->tcp_syncnt));
    }
    if (client->readahead) {
        qdict_put(opts, "readahead",
                      qint_from_int(client->readahead));
    }
    if (client->pagecache) {
        qdict_put(opts, "pagecache",
                      qint_from_int(client->pagecache));
    }
    if (client->debug) {
        qdict_put(opts, "debug", qint_from_int(client->debug));
    }
    visit_free(ov);
    qdict_flatten(opts);
    bs->full_open_options = opts;
 }
 #ifdef LIBNFS_FEATURE_PAGECACHE
 static void nfs_invalidate_cache(BlockDriverState *bs,
                                 Error **errp)
@@ -575,7 +843,7 @@ static BlockDriver bdrv_nfs = {
    .protocol_name                  = "nfs",
    .instance_size                  = sizeof(NFSClient),
-    .bdrv_needs_filename            = true,
+    .bdrv_parse_filename            = nfs_parse_filename,
    .create_opts                    = &nfs_create_opts,
    .bdrv_has_zero_init             = nfs_has_zero_init,
@@ -593,6 +861,7 @@ static BlockDriver bdrv_nfs = {
    .bdrv_detach_aio_context        = nfs_detach_aio_context,
    .bdrv_attach_aio_context        = nfs_attach_aio_context,
    .bdrv_refresh_filename          = nfs_refresh_filename,
 #ifdef LIBNFS_FEATURE_PAGECACHE
    .bdrv_invalidate_cache          = nfs_invalidate_cache,
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -29,7 +29,7 @@
 #include "block/write-threshold.h"
 #include "qmp-commands.h"
 #include "qapi-visit.h"
-#include "qapi/qmp-output-visitor.h"
+#include "qapi/qobject-output-visitor.h"
 #include "qapi/qmp/types.h"
 #include "sysemu/block-backend.h"
 #include "qemu/cutils.h"
@@ -691,13 +691,14 @@ void bdrv_image_info_specific_dump(fprintf_function func_fprintf, void *f,
                                   ImageInfoSpecific *info_spec)
 {
    QObject *obj, *data;
-    Visitor *v = qmp_output_visitor_new(&obj);
+    Visitor *v = qobject_output_visitor_new(&obj);
    visit_type_ImageInfoSpecific(v, NULL, &info_spec, &error_abort);
    visit_complete(v, &obj);
    assert(qobject_type(obj) == QTYPE_QDICT);
    data = qdict_get(qobject_to_qdict(obj), "data");
    dump_qobject(func_fprintf, f, 1, data);
    qobject_decref(obj);
    visit_free(v);
 }
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -104,6 +104,7 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
    unsigned int len, i, shift;
    int ret;
    QCowHeader header;
    Error *local_err = NULL;
    ret = bdrv_pread(bs->file, 0, &header, sizeof(header));
    if (ret < 0) {
@@ -153,7 +154,8 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
        ret = -EINVAL;
        goto fail;
    }
-    if (!qcrypto_cipher_supports(QCRYPTO_CIPHER_ALG_AES_128)) {
+    if (!qcrypto_cipher_supports(QCRYPTO_CIPHER_ALG_AES_128,
                                 QCRYPTO_CIPHER_MODE_CBC)) {
        error_setg(errp, "AES cipher not available");
        ret = -EINVAL;
        goto fail;
@@ -251,7 +253,12 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
    error_setg(&s->migration_blocker, "The qcow format used by node '%s' "
               "does not support live migration",
               bdrv_get_device_or_node_name(bs));
-    migrate_add_blocker(s->migration_blocker);
+    ret = migrate_add_blocker(s->migration_blocker, &local_err);
    if (local_err) {
        error_propagate(errp, local_err);
        error_free(s->migration_blocker);
        goto fail;
    }
    qemu_co_mutex_init(&s->lock);
    return 0;
--- a/block/qcow2-cache.c
+++ b/block/qcow2-cache.c
@@ -22,7 +22,6 @@
 * THE SOFTWARE.
 */
 /* Needed for CONFIG_MADVISE */
 #include "qemu/osdep.h"
 #include "block/block_int.h"
 #include "qemu-common.h"
@@ -66,7 +65,8 @@ static inline int qcow2_cache_get_table_idx(BlockDriverState *bs,
 static void qcow2_cache_table_release(BlockDriverState *bs, Qcow2Cache *c,
                                      int i, int num_tables)
 {
-#if QEMU_MADV_DONTNEED != QEMU_MADV_INVALID
+/* Using MADV_DONTNEED to discard memory is a Linux-specific feature */
 #ifdef CONFIG_LINUX
    BDRVQcow2State *s = bs->opaque;
    void *t = qcow2_cache_get_table_addr(bs, c, i);
    int align = getpagesize();
@@ -74,7 +74,7 @@ static void qcow2_cache_table_release(BlockDriverState *bs, Qcow2Cache *c,
    size_t offset = QEMU_ALIGN_UP((uintptr_t) t, align) - (uintptr_t) t;
    size_t length = QEMU_ALIGN_DOWN(mem_size - offset, align);
    if (length > 0) {
-        qemu_madvise((uint8_t *) t + offset, length, QEMU_MADV_DONTNEED);
+        madvise((uint8_t *) t + offset, length, MADV_DONTNEED);
    }
 #endif
 }
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -1558,7 +1558,7 @@ fail:
 * clusters.
 */
 static int zero_single_l2(BlockDriverState *bs, uint64_t offset,
-                          uint64_t nb_clusters)
+                          uint64_t nb_clusters, int flags)
 {
    BDRVQcow2State *s = bs->opaque;
    uint64_t *l2_table;
@@ -1582,7 +1582,7 @@ static int zero_single_l2(BlockDriverState *bs, uint64_t offset,
        /* Update L2 entries */
        qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table);
-        if (old_offset & QCOW_OFLAG_COMPRESSED) {
+        if (old_offset & QCOW_OFLAG_COMPRESSED || flags & BDRV_REQ_MAY_UNMAP) {
            l2_table[l2_index + i] = cpu_to_be64(QCOW_OFLAG_ZERO);
            qcow2_free_any_clusters(bs, old_offset, 1, QCOW2_DISCARD_REQUEST);
        } else {
@@ -1595,7 +1595,8 @@ static int zero_single_l2(BlockDriverState *bs, uint64_t offset,
    return nb_clusters;
 }
-int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors)
+int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors,
                        int flags)
 {
    BDRVQcow2State *s = bs->opaque;
    uint64_t nb_clusters;
@@ -1612,7 +1613,7 @@ int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors)
    s->cache_discards = true;
    while (nb_clusters > 0) {
-        ret = zero_single_l2(bs, offset, nb_clusters);
+        ret = zero_single_l2(bs, offset, nb_clusters, flags);
        if (ret < 0) {
            goto fail;
        }
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -668,6 +668,14 @@ static int qcow2_update_options_prepare(BlockDriverState *bs,
    r->cache_clean_interval =
        qemu_opt_get_number(opts, QCOW2_OPT_CACHE_CLEAN_INTERVAL,
                            s->cache_clean_interval);
 #ifndef CONFIG_LINUX
    if (r->cache_clean_interval != 0) {
        error_setg(errp, QCOW2_OPT_CACHE_CLEAN_INTERVAL
                   " not supported on this host");
        ret = -EINVAL;
        goto fail;
    }
 #endif
    if (r->cache_clean_interval > UINT_MAX) {
        error_setg(errp, "Cache clean interval too big");
        ret = -EINVAL;
@@ -959,7 +967,8 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
        ret = -EINVAL;
        goto fail;
    }
-    if (!qcrypto_cipher_supports(QCRYPTO_CIPHER_ALG_AES_128)) {
+    if (!qcrypto_cipher_supports(QCRYPTO_CIPHER_ALG_AES_128,
                                 QCRYPTO_CIPHER_MODE_CBC)) {
        error_setg(errp, "AES cipher not available");
        ret = -EINVAL;
        goto fail;
@@ -1154,6 +1163,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
    /* Initialise locks */
    qemu_co_mutex_init(&s->lock);
    bs->supported_zero_flags = BDRV_REQ_MAY_UNMAP;
    /* Repair image if dirty */
    if (!(flags & (BDRV_O_CHECK | BDRV_O_INACTIVE)) && !bs->read_only &&
@@ -1204,6 +1214,7 @@ static void qcow2_refresh_limits(BlockDriverState *bs, Error **errp)
        bs->bl.request_alignment = BDRV_SECTOR_SIZE;
    }
    bs->bl.pwrite_zeroes_alignment = s->cluster_size;
    bs->bl.pdiscard_alignment = s->cluster_size;
 }
 static int qcow2_set_key(BlockDriverState *bs, const char *key)
@@ -2476,7 +2487,7 @@ static coroutine_fn int qcow2_co_pwrite_zeroes(BlockDriverState *bs,
    trace_qcow2_pwrite_zeroes(qemu_coroutine_self(), offset, count);
    /* Whatever is left can use real zero clusters */
-    ret = qcow2_zero_clusters(bs, offset, count >> BDRV_SECTOR_BITS);
+    ret = qcow2_zero_clusters(bs, offset, count >> BDRV_SECTOR_BITS, flags);
    qemu_co_mutex_unlock(&s->lock);
    return ret;
@@ -2488,6 +2499,11 @@ static coroutine_fn int qcow2_co_pdiscard(BlockDriverState *bs,
    int ret;
    BDRVQcow2State *s = bs->opaque;
    if (!QEMU_IS_ALIGNED(offset | count, s->cluster_size)) {
        assert(count < s->cluster_size);
        return -ENOTSUP;
    }
    qemu_co_mutex_lock(&s->lock);
    ret = qcow2_discard_clusters(bs, offset, count >> BDRV_SECTOR_BITS,
                                 QCOW2_DISCARD_REQUEST, false);
@@ -2792,7 +2808,8 @@ static int qcow2_make_empty(BlockDriverState *bs)
 {
    BDRVQcow2State *s = bs->opaque;
    uint64_t start_sector;
-    int sector_step = INT_MAX / BDRV_SECTOR_SIZE;
+    int sector_step = (QEMU_ALIGN_DOWN(INT_MAX, s->cluster_size) /
                       BDRV_SECTOR_SIZE);
    int l1_clusters, ret = 0;
    l1_clusters = DIV_ROUND_UP(s->l1_size, s->cluster_size / sizeof(uint64_t));
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -473,8 +473,6 @@ static inline uint64_t refcount_diff(uint64_t r1, uint64_t r2)
    return r1 > r2 ? r1 - r2 : r2 - r1;
 }
 // FIXME Need qcow2_ prefix to global functions
 /* qcow2.c functions */
 int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov,
                  int64_t sector_num, int nb_sectors);
@@ -547,7 +545,8 @@ uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
 int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m);
 int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset,
    int nb_sectors, enum qcow2_discard_type type, bool full_discard);
-int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors);
+int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors,
                        int flags);
 int qcow2_expand_zero_clusters(BlockDriverState *bs,
                               BlockDriverAmendStatusCB *status_cb,
--- a/block/qed-table.c
+++ b/block/qed-table.c
@@ -174,9 +174,7 @@ int qed_read_l1_table_sync(BDRVQEDState *s)
    qed_read_table(s, s->header.l1_table_offset,
                   s->l1_table, qed_sync_cb, &ret);
-    while (ret == -EINPROGRESS) {
+    BDRV_POLL_WHILE(s->bs, ret == -EINPROGRESS);
        aio_poll(bdrv_get_aio_context(s->bs), true);
    }
    return ret;
 }
@@ -195,9 +193,7 @@ int qed_write_l1_table_sync(BDRVQEDState *s, unsigned int index,
    int ret = -EINPROGRESS;
    qed_write_l1_table(s, index, n, qed_sync_cb, &ret);
-    while (ret == -EINPROGRESS) {
+    BDRV_POLL_WHILE(s->bs, ret == -EINPROGRESS);
        aio_poll(bdrv_get_aio_context(s->bs), true);
    }
    return ret;
 }
@@ -268,9 +264,7 @@ int qed_read_l2_table_sync(BDRVQEDState *s, QEDRequest *request, uint64_t offset
    int ret = -EINPROGRESS;
    qed_read_l2_table(s, request, offset, qed_sync_cb, &ret);
-    while (ret == -EINPROGRESS) {
+    BDRV_POLL_WHILE(s->bs, ret == -EINPROGRESS);
        aio_poll(bdrv_get_aio_context(s->bs), true);
    }
    return ret;
 }
@@ -290,9 +284,7 @@ int qed_write_l2_table_sync(BDRVQEDState *s, QEDRequest *request,
    int ret = -EINPROGRESS;
    qed_write_l2_table(s, request, index, n, flush, qed_sync_cb, &ret);
-    while (ret == -EINPROGRESS) {
+    BDRV_POLL_WHILE(s->bs, ret == -EINPROGRESS);
        aio_poll(bdrv_get_aio_context(s->bs), true);
    }
    return ret;
 }
--- a/block/qed.c
+++ b/block/qed.c
@@ -336,7 +336,7 @@ static void qed_need_check_timer_cb(void *opaque)
    qed_plug_allocating_write_reqs(s);
    /* Ensure writes are on disk before clearing flag */
-    bdrv_aio_flush(s->bs, qed_clear_need_check, s);
+    bdrv_aio_flush(s->bs->file->bs, qed_clear_need_check, s);
 }
 static void qed_start_need_check_timer(BDRVQEDState *s)
@@ -378,6 +378,19 @@ static void bdrv_qed_attach_aio_context(BlockDriverState *bs,
    }
 }
 static void bdrv_qed_drain(BlockDriverState *bs)
 {
    BDRVQEDState *s = bs->opaque;
    /* Fire the timer immediately in order to start doing I/O as soon as the
     * header is flushed.
     */
    if (s->need_check_timer && timer_pending(s->need_check_timer)) {
        qed_cancel_need_check_timer(s);
        qed_need_check_timer_cb(s);
    }
 }
 static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
                         Error **errp)
 {
@@ -1668,6 +1681,7 @@ static BlockDriver bdrv_qed = {
    .bdrv_check               = bdrv_qed_check,
    .bdrv_detach_aio_context  = bdrv_qed_detach_aio_context,
    .bdrv_attach_aio_context  = bdrv_qed_attach_aio_context,
    .bdrv_drain               = bdrv_qed_drain,
 };
 static void bdrv_qed_init(void)
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -97,7 +97,7 @@ typedef struct QuorumAIOCB QuorumAIOCB;
 * $children_count QuorumChildRequest.
 */
 typedef struct QuorumChildRequest {
-    BlockAIOCB *aiocb;
+    BlockDriverState *bs;
    QEMUIOVector qiov;
    uint8_t *buf;
    int ret;
@@ -110,11 +110,12 @@ typedef struct QuorumChildRequest {
 * used to do operations on each children and track overall progress.
 */
 struct QuorumAIOCB {
-    BlockAIOCB common;
+    BlockDriverState *bs;
    Coroutine *co;
    /* Request metadata */
-    uint64_t sector_num;
+    uint64_t offset;
-    int nb_sectors;
+    uint64_t bytes;
    QEMUIOVector *qiov;         /* calling IOV */
@@ -130,50 +131,18 @@ struct QuorumAIOCB {
    bool is_read;
    int vote_ret;
-    int child_iter;             /* which child to read in fifo pattern */
+    int children_read;          /* how many children have been read from */
 };
-static bool quorum_vote(QuorumAIOCB *acb);
+typedef struct QuorumCo {
-
+    QuorumAIOCB *acb;
-static void quorum_aio_cancel(BlockAIOCB *blockacb)
+    int idx;
-{
+} QuorumCo;
    QuorumAIOCB *acb = container_of(blockacb, QuorumAIOCB, common);
    BDRVQuorumState *s = acb->common.bs->opaque;
    int i;
    /* cancel all callbacks */
    for (i = 0; i < s->num_children; i++) {
        if (acb->qcrs[i].aiocb) {
            bdrv_aio_cancel_async(acb->qcrs[i].aiocb);
        }
    }
 }
 static AIOCBInfo quorum_aiocb_info = {
    .aiocb_size         = sizeof(QuorumAIOCB),
    .cancel_async       = quorum_aio_cancel,
 };
 static void quorum_aio_finalize(QuorumAIOCB *acb)
 {
    int i, ret = 0;
    if (acb->vote_ret) {
        ret = acb->vote_ret;
    }
    acb->common.cb(acb->common.opaque, ret);
    if (acb->is_read) {
        /* on the quorum case acb->child_iter == s->num_children - 1 */
        for (i = 0; i <= acb->child_iter; i++) {
            qemu_vfree(acb->qcrs[i].buf);
            qemu_iovec_destroy(&acb->qcrs[i].qiov);
        }
    }
    g_free(acb->qcrs);
-    qemu_aio_unref(acb);
+    g_free(acb);
 }
 static bool quorum_sha256_compare(QuorumVoteValue *a, QuorumVoteValue *b)
@@ -186,30 +155,26 @@ static bool quorum_64bits_compare(QuorumVoteValue *a, QuorumVoteValue *b)
    return a->l == b->l;
 }
-static QuorumAIOCB *quorum_aio_get(BDRVQuorumState *s,
+static QuorumAIOCB *quorum_aio_get(BlockDriverState *bs,
                                   BlockDriverState *bs,
                                   QEMUIOVector *qiov,
-                                   uint64_t sector_num,
+                                   uint64_t offset,
-                                   int nb_sectors,
+                                   uint64_t bytes)
                                   BlockCompletionFunc *cb,
                                   void *opaque)
 {
-    QuorumAIOCB *acb = qemu_aio_get(&quorum_aiocb_info, bs, cb, opaque);
+    BDRVQuorumState *s = bs->opaque;
    QuorumAIOCB *acb = g_new(QuorumAIOCB, 1);
    int i;
-    acb->common.bs->opaque = s;
+    *acb = (QuorumAIOCB) {
-    acb->sector_num = sector_num;
+        .co                 = qemu_coroutine_self(),
-    acb->nb_sectors = nb_sectors;
+        .bs                 = bs,
-    acb->qiov = qiov;
+        .offset             = offset,
-    acb->qcrs = g_new0(QuorumChildRequest, s->num_children);
+        .bytes              = bytes,
-    acb->count = 0;
+        .qiov               = qiov,
-    acb->success_count = 0;
+        .votes.compare      = quorum_sha256_compare,
-    acb->rewrite_count = 0;
+        .votes.vote_list    = QLIST_HEAD_INITIALIZER(acb.votes.vote_list),
-    acb->votes.compare = quorum_sha256_compare;
+    };
    QLIST_INIT(&acb->votes.vote_list);
    acb->is_read = false;
    acb->vote_ret = 0;
    acb->qcrs = g_new0(QuorumChildRequest, s->num_children);
    for (i = 0; i < s->num_children; i++) {
        acb->qcrs[i].buf = NULL;
        acb->qcrs[i].ret = 0;
@@ -219,30 +184,37 @@ static QuorumAIOCB *quorum_aio_get(BDRVQuorumState *s,
    return acb;
 }
-static void quorum_report_bad(QuorumOpType type, uint64_t sector_num,
+static void quorum_report_bad(QuorumOpType type, uint64_t offset,
-                              int nb_sectors, char *node_name, int ret)
+                              uint64_t bytes, char *node_name, int ret)
 {
    const char *msg = NULL;
    int64_t start_sector = offset / BDRV_SECTOR_SIZE;
    int64_t end_sector = DIV_ROUND_UP(offset + bytes, BDRV_SECTOR_SIZE);
    if (ret < 0) {
        msg = strerror(-ret);
    }
-    qapi_event_send_quorum_report_bad(type, !!msg, msg, node_name,
+    qapi_event_send_quorum_report_bad(type, !!msg, msg, node_name, start_sector,
-                                      sector_num, nb_sectors, &error_abort);
+                                      end_sector - start_sector, &error_abort);
 }
 static void quorum_report_failure(QuorumAIOCB *acb)
 {
-    const char *reference = bdrv_get_device_or_node_name(acb->common.bs);
+    const char *reference = bdrv_get_device_or_node_name(acb->bs);
-    qapi_event_send_quorum_failure(reference, acb->sector_num,
+    int64_t start_sector = acb->offset / BDRV_SECTOR_SIZE;
-                                   acb->nb_sectors, &error_abort);
+    int64_t end_sector = DIV_ROUND_UP(acb->offset + acb->bytes,
                                      BDRV_SECTOR_SIZE);
    qapi_event_send_quorum_failure(reference, start_sector,
                                   end_sector - start_sector, &error_abort);
 }
 static int quorum_vote_error(QuorumAIOCB *acb);
 static bool quorum_has_too_much_io_failed(QuorumAIOCB *acb)
 {
-    BDRVQuorumState *s = acb->common.bs->opaque;
+    BDRVQuorumState *s = acb->bs->opaque;
    if (acb->success_count < s->threshold) {
        acb->vote_ret = quorum_vote_error(acb);
@@ -253,22 +225,7 @@ static bool quorum_has_too_much_io_failed(QuorumAIOCB *acb)
    return false;
 }
-static void quorum_rewrite_aio_cb(void *opaque, int ret)
+static int read_fifo_child(QuorumAIOCB *acb);
 {
    QuorumAIOCB *acb = opaque;
    /* one less rewrite to do */
    acb->rewrite_count--;
    /* wait until all rewrite callbacks have completed */
    if (acb->rewrite_count) {
        return;
    }
    quorum_aio_finalize(acb);
 }
 static BlockAIOCB *read_fifo_child(QuorumAIOCB *acb);
 static void quorum_copy_qiov(QEMUIOVector *dest, QEMUIOVector *source)
 {
@@ -283,57 +240,11 @@ static void quorum_copy_qiov(QEMUIOVector *dest, QEMUIOVector *source)
    }
 }
-static void quorum_aio_cb(void *opaque, int ret)
+static void quorum_report_bad_acb(QuorumChildRequest *sacb, int ret)
 {
    QuorumChildRequest *sacb = opaque;
    QuorumAIOCB *acb = sacb->parent;
-    BDRVQuorumState *s = acb->common.bs->opaque;
+    QuorumOpType type = acb->is_read ? QUORUM_OP_TYPE_READ : QUORUM_OP_TYPE_WRITE;
-    bool rewrite = false;
+    quorum_report_bad(type, acb->offset, acb->bytes, sacb->bs->node_name, ret);
    if (ret == 0) {
        acb->success_count++;
    } else {
        QuorumOpType type;
        type = acb->is_read ? QUORUM_OP_TYPE_READ : QUORUM_OP_TYPE_WRITE;
        quorum_report_bad(type, acb->sector_num, acb->nb_sectors,
                          sacb->aiocb->bs->node_name, ret);
    }
    if (acb->is_read && s->read_pattern == QUORUM_READ_PATTERN_FIFO) {
        /* We try to read next child in FIFO order if we fail to read */
        if (ret < 0 && (acb->child_iter + 1) < s->num_children) {
            acb->child_iter++;
            read_fifo_child(acb);
            return;
        }
        if (ret == 0) {
            quorum_copy_qiov(acb->qiov, &acb->qcrs[acb->child_iter].qiov);
        }
        acb->vote_ret = ret;
        quorum_aio_finalize(acb);
        return;
    }
    sacb->ret = ret;
    acb->count++;
    assert(acb->count <= s->num_children);
    assert(acb->success_count <= s->num_children);
    if (acb->count < s->num_children) {
        return;
    }
    /* Do the vote on read */
    if (acb->is_read) {
        rewrite = quorum_vote(acb);
    } else {
        quorum_has_too_much_io_failed(acb);
    }
    /* if no rewrite is done the code will finish right away */
    if (!rewrite) {
        quorum_aio_finalize(acb);
    }
 }
 static void quorum_report_bad_versions(BDRVQuorumState *s,
@@ -348,14 +259,31 @@ static void quorum_report_bad_versions(BDRVQuorumState *s,
            continue;
        }
        QLIST_FOREACH(item, &version->items, next) {
-            quorum_report_bad(QUORUM_OP_TYPE_READ, acb->sector_num,
+            quorum_report_bad(QUORUM_OP_TYPE_READ, acb->offset, acb->bytes,
                              acb->nb_sectors,
                              s->children[item->index]->bs->node_name, 0);
        }
    }
 }
-static bool quorum_rewrite_bad_versions(BDRVQuorumState *s, QuorumAIOCB *acb,
+static void quorum_rewrite_entry(void *opaque)
 {
    QuorumCo *co = opaque;
    QuorumAIOCB *acb = co->acb;
    BDRVQuorumState *s = acb->bs->opaque;
    /* Ignore any errors, it's just a correction attempt for already
     * corrupted data. */
    bdrv_co_pwritev(s->children[co->idx], acb->offset, acb->bytes,
                    acb->qiov, 0);
    /* Wake up the caller after the last rewrite */
    acb->rewrite_count--;
    if (!acb->rewrite_count) {
        qemu_coroutine_enter_if_inactive(acb->co);
    }
 }
 static bool quorum_rewrite_bad_versions(QuorumAIOCB *acb,
                                        QuorumVoteValue *value)
 {
    QuorumVoteVersion *version;
@@ -374,7 +302,7 @@ static bool quorum_rewrite_bad_versions(BDRVQuorumState *s, QuorumAIOCB *acb,
        }
    }
-    /* quorum_rewrite_aio_cb will count down this to zero */
+    /* quorum_rewrite_entry will count down this to zero */
    acb->rewrite_count = count;
    /* now fire the correcting rewrites */
@@ -383,9 +311,14 @@ static bool quorum_rewrite_bad_versions(BDRVQuorumState *s, QuorumAIOCB *acb,
            continue;
        }
        QLIST_FOREACH(item, &version->items, next) {
-            bdrv_aio_writev(s->children[item->index], acb->sector_num,
+            Coroutine *co;
-                            acb->qiov, acb->nb_sectors, quorum_rewrite_aio_cb,
+            QuorumCo data = {
-                            acb);
+                .acb = acb,
                .idx = item->index,
            };
            co = qemu_coroutine_create(quorum_rewrite_entry, &data);
            qemu_coroutine_enter(co);
        }
    }
@@ -505,8 +438,8 @@ static void GCC_FMT_ATTR(2, 3) quorum_err(QuorumAIOCB *acb,
    va_list ap;
    va_start(ap, fmt);
-    fprintf(stderr, "quorum: sector_num=%" PRId64 " nb_sectors=%d ",
+    fprintf(stderr, "quorum: offset=%" PRIu64 " bytes=%" PRIu64 " ",
-            acb->sector_num, acb->nb_sectors);
+            acb->offset, acb->bytes);
    vfprintf(stderr, fmt, ap);
    fprintf(stderr, "\n");
    va_end(ap);
@@ -517,16 +450,15 @@ static bool quorum_compare(QuorumAIOCB *acb,
                           QEMUIOVector *a,
                           QEMUIOVector *b)
 {
-    BDRVQuorumState *s = acb->common.bs->opaque;
+    BDRVQuorumState *s = acb->bs->opaque;
    ssize_t offset;
    /* This driver will replace blkverify in this particular case */
    if (s->is_blkverify) {
        offset = qemu_iovec_compare(a, b);
        if (offset != -1) {
-            quorum_err(acb, "contents mismatch in sector %" PRId64,
+            quorum_err(acb, "contents mismatch at offset %" PRIu64,
-                       acb->sector_num +
+                       acb->offset + offset);
                       (uint64_t)(offset / BDRV_SECTOR_SIZE));
        }
        return true;
    }
@@ -537,7 +469,7 @@ static bool quorum_compare(QuorumAIOCB *acb,
 /* Do a vote to get the error code */
 static int quorum_vote_error(QuorumAIOCB *acb)
 {
-    BDRVQuorumState *s = acb->common.bs->opaque;
+    BDRVQuorumState *s = acb->bs->opaque;
    QuorumVoteVersion *winner = NULL;
    QuorumVotes error_votes;
    QuorumVoteValue result_value;
@@ -566,17 +498,16 @@ static int quorum_vote_error(QuorumAIOCB *acb)
    return ret;
 }
-static bool quorum_vote(QuorumAIOCB *acb)
+static void quorum_vote(QuorumAIOCB *acb)
 {
    bool quorum = true;
    bool rewrite = false;
    int i, j, ret;
    QuorumVoteValue hash;
-    BDRVQuorumState *s = acb->common.bs->opaque;
+    BDRVQuorumState *s = acb->bs->opaque;
    QuorumVoteVersion *winner;
    if (quorum_has_too_much_io_failed(acb)) {
-        return false;
+        return;
    }
    /* get the index of the first successful read */
@@ -604,7 +535,7 @@ static bool quorum_vote(QuorumAIOCB *acb)
    /* Every successful read agrees */
    if (quorum) {
        quorum_copy_qiov(acb->qiov, &acb->qcrs[i].qiov);
-        return false;
+        return;
    }
    /* compute hashes for each successful read, also store indexes */
@@ -639,20 +570,48 @@ static bool quorum_vote(QuorumAIOCB *acb)
    /* corruption correction is enabled */
    if (s->rewrite_corrupted) {
-        rewrite = quorum_rewrite_bad_versions(s, acb, &winner->value);
+        quorum_rewrite_bad_versions(acb, &winner->value);
    }
 free_exit:
    /* free lists */
    quorum_free_vote_list(&acb->votes);
    return rewrite;
 }
-static BlockAIOCB *read_quorum_children(QuorumAIOCB *acb)
+static void read_quorum_children_entry(void *opaque)
 {
-    BDRVQuorumState *s = acb->common.bs->opaque;
+    QuorumCo *co = opaque;
-    int i;
+    QuorumAIOCB *acb = co->acb;
    BDRVQuorumState *s = acb->bs->opaque;
    int i = co->idx;
    QuorumChildRequest *sacb = &acb->qcrs[i];
    sacb->bs = s->children[i]->bs;
    sacb->ret = bdrv_co_preadv(s->children[i], acb->offset, acb->bytes,
                               &acb->qcrs[i].qiov, 0);
    if (sacb->ret == 0) {
        acb->success_count++;
    } else {
        quorum_report_bad_acb(sacb, sacb->ret);
    }
    acb->count++;
    assert(acb->count <= s->num_children);
    assert(acb->success_count <= s->num_children);
    /* Wake up the caller after the last read */
    if (acb->count == s->num_children) {
        qemu_coroutine_enter_if_inactive(acb->co);
    }
 }
 static int read_quorum_children(QuorumAIOCB *acb)
 {
    BDRVQuorumState *s = acb->bs->opaque;
    int i, ret;
    acb->children_read = s->num_children;
    for (i = 0; i < s->num_children; i++) {
        acb->qcrs[i].buf = qemu_blockalign(s->children[i]->bs, acb->qiov->size);
        qemu_iovec_init(&acb->qcrs[i].qiov, acb->qiov->niov);
@@ -660,71 +619,131 @@ static BlockAIOCB *read_quorum_children(QuorumAIOCB *acb)
    }
    for (i = 0; i < s->num_children; i++) {
-        acb->qcrs[i].aiocb = bdrv_aio_readv(s->children[i], acb->sector_num,
+        Coroutine *co;
-                                            &acb->qcrs[i].qiov, acb->nb_sectors,
+        QuorumCo data = {
-                                            quorum_aio_cb, &acb->qcrs[i]);
+            .acb = acb,
            .idx = i,
        };
        co = qemu_coroutine_create(read_quorum_children_entry, &data);
        qemu_coroutine_enter(co);
    }
-    return &acb->common;
+    while (acb->count < s->num_children) {
        qemu_coroutine_yield();
    }
    /* Do the vote on read */
    quorum_vote(acb);
    for (i = 0; i < s->num_children; i++) {
        qemu_vfree(acb->qcrs[i].buf);
        qemu_iovec_destroy(&acb->qcrs[i].qiov);
    }
    while (acb->rewrite_count) {
        qemu_coroutine_yield();
    }
    ret = acb->vote_ret;
    return ret;
 }
-static BlockAIOCB *read_fifo_child(QuorumAIOCB *acb)
+static int read_fifo_child(QuorumAIOCB *acb)
 {
-    BDRVQuorumState *s = acb->common.bs->opaque;
+    BDRVQuorumState *s = acb->bs->opaque;
    int n, ret;
-    acb->qcrs[acb->child_iter].buf =
+    /* We try to read the next child in FIFO order if we failed to read */
-        qemu_blockalign(s->children[acb->child_iter]->bs, acb->qiov->size);
+    do {
-    qemu_iovec_init(&acb->qcrs[acb->child_iter].qiov, acb->qiov->niov);
+        n = acb->children_read++;
-    qemu_iovec_clone(&acb->qcrs[acb->child_iter].qiov, acb->qiov,
+        acb->qcrs[n].bs = s->children[n]->bs;
-                     acb->qcrs[acb->child_iter].buf);
+        ret = bdrv_co_preadv(s->children[n], acb->offset, acb->bytes,
-    acb->qcrs[acb->child_iter].aiocb =
+                             acb->qiov, 0);
-        bdrv_aio_readv(s->children[acb->child_iter], acb->sector_num,
+        if (ret < 0) {
-                       &acb->qcrs[acb->child_iter].qiov, acb->nb_sectors,
+            quorum_report_bad_acb(&acb->qcrs[n], ret);
-                       quorum_aio_cb, &acb->qcrs[acb->child_iter]);
+        }
    } while (ret < 0 && acb->children_read < s->num_children);
-    return &acb->common;
+    /* FIXME: rewrite failed children if acb->children_read > 1? */
    return ret;
 }
-static BlockAIOCB *quorum_aio_readv(BlockDriverState *bs,
+static int quorum_co_preadv(BlockDriverState *bs, uint64_t offset,
-                                    int64_t sector_num,
+                            uint64_t bytes, QEMUIOVector *qiov, int flags)
                                    QEMUIOVector *qiov,
                                    int nb_sectors,
                                    BlockCompletionFunc *cb,
                                    void *opaque)
 {
    BDRVQuorumState *s = bs->opaque;
-    QuorumAIOCB *acb = quorum_aio_get(s, bs, qiov, sector_num,
+    QuorumAIOCB *acb = quorum_aio_get(bs, qiov, offset, bytes);
-                                      nb_sectors, cb, opaque);
+    int ret;
    acb->is_read = true;
    acb->children_read = 0;
    if (s->read_pattern == QUORUM_READ_PATTERN_QUORUM) {
-        acb->child_iter = s->num_children - 1;
+        ret = read_quorum_children(acb);
-        return read_quorum_children(acb);
+    } else {
        ret = read_fifo_child(acb);
    }
    quorum_aio_finalize(acb);
-    acb->child_iter = 0;
+    return ret;
    return read_fifo_child(acb);
 }
-static BlockAIOCB *quorum_aio_writev(BlockDriverState *bs,
+static void write_quorum_entry(void *opaque)
-                                     int64_t sector_num,
+{
-                                     QEMUIOVector *qiov,
+    QuorumCo *co = opaque;
-                                     int nb_sectors,
+    QuorumAIOCB *acb = co->acb;
-                                     BlockCompletionFunc *cb,
+    BDRVQuorumState *s = acb->bs->opaque;
-                                     void *opaque)
+    int i = co->idx;
    QuorumChildRequest *sacb = &acb->qcrs[i];
    sacb->bs = s->children[i]->bs;
    sacb->ret = bdrv_co_pwritev(s->children[i], acb->offset, acb->bytes,
                                acb->qiov, 0);
    if (sacb->ret == 0) {
        acb->success_count++;
    } else {
        quorum_report_bad_acb(sacb, sacb->ret);
    }
    acb->count++;
    assert(acb->count <= s->num_children);
    assert(acb->success_count <= s->num_children);
    /* Wake up the caller after the last write */
    if (acb->count == s->num_children) {
        qemu_coroutine_enter_if_inactive(acb->co);
    }
 }
 static int quorum_co_pwritev(BlockDriverState *bs, uint64_t offset,
                             uint64_t bytes, QEMUIOVector *qiov, int flags)
 {
    BDRVQuorumState *s = bs->opaque;
-    QuorumAIOCB *acb = quorum_aio_get(s, bs, qiov, sector_num, nb_sectors,
+    QuorumAIOCB *acb = quorum_aio_get(bs, qiov, offset, bytes);
-                                      cb, opaque);
+    int i, ret;
    int i;
    for (i = 0; i < s->num_children; i++) {
-        acb->qcrs[i].aiocb = bdrv_aio_writev(s->children[i], sector_num,
+        Coroutine *co;
-                                             qiov, nb_sectors, &quorum_aio_cb,
+        QuorumCo data = {
-                                             &acb->qcrs[i]);
+            .acb = acb,
            .idx = i,
        };
        co = qemu_coroutine_create(write_quorum_entry, &data);
        qemu_coroutine_enter(co);
    }
-    return &acb->common;
+    while (acb->count < s->num_children) {
        qemu_coroutine_yield();
    }
    quorum_has_too_much_io_failed(acb);
    ret = acb->vote_ret;
    quorum_aio_finalize(acb);
    return ret;
 }
 static int64_t quorum_getlength(BlockDriverState *bs)
@@ -768,7 +787,7 @@ static coroutine_fn int quorum_co_flush(BlockDriverState *bs)
        result = bdrv_co_flush(s->children[i]->bs);
        if (result) {
            quorum_report_bad(QUORUM_OP_TYPE_FLUSH, 0,
-                              bdrv_nb_sectors(s->children[i]->bs),
+                              bdrv_getlength(s->children[i]->bs),
                              s->children[i]->bs->node_name, result);
            result_value.l = result;
            quorum_count_vote(&error_votes, &result_value, i);
@@ -1101,8 +1120,8 @@ static BlockDriver bdrv_quorum = {
    .bdrv_getlength                     = quorum_getlength,
-    .bdrv_aio_readv                     = quorum_aio_readv,
+    .bdrv_co_preadv                     = quorum_co_preadv,
-    .bdrv_aio_writev                    = quorum_aio_writev,
+    .bdrv_co_pwritev                    = quorum_co_pwritev,
    .bdrv_add_child                     = quorum_add_child,
    .bdrv_del_child                     = quorum_del_child,
--- a/block/raw-format.c
+++ b/block/raw-format.c
@@ -1,4 +1,4 @@
-/* BlockDriver implementation for "raw"
+/* BlockDriver implementation for "raw" format driver
 *
 * Copyright (C) 2010-2016 Red Hat, Inc.
 * Copyright (C) 2010, Blue Swirl <blauwirbel@gmail.com>
@@ -31,6 +31,30 @@
 #include "qapi/error.h"
 #include "qemu/option.h"
 typedef struct BDRVRawState {
    uint64_t offset;
    uint64_t size;
    bool has_size;
 } BDRVRawState;
 static QemuOptsList raw_runtime_opts = {
    .name = "raw",
    .head = QTAILQ_HEAD_INITIALIZER(raw_runtime_opts.head),
    .desc = {
        {
            .name = "offset",
            .type = QEMU_OPT_SIZE,
            .help = "offset in the disk where the image starts",
        },
        {
            .name = "size",
            .type = QEMU_OPT_SIZE,
            .help = "virtual disk size",
        },
        { /* end of list */ }
    },
 };
 static QemuOptsList raw_create_opts = {
    .name = "raw-create-opts",
    .head = QTAILQ_HEAD_INITIALIZER(raw_create_opts.head),
@@ -44,16 +68,116 @@ static QemuOptsList raw_create_opts = {
    }
 };
 static int raw_read_options(QDict *options, BlockDriverState *bs,
    BDRVRawState *s, Error **errp)
 {
    Error *local_err = NULL;
    QemuOpts *opts = NULL;
    int64_t real_size = 0;
    int ret;
    real_size = bdrv_getlength(bs->file->bs);
    if (real_size < 0) {
        error_setg_errno(errp, -real_size, "Could not get image size");
        return real_size;
    }
    opts = qemu_opts_create(&raw_runtime_opts, NULL, 0, &error_abort);
    qemu_opts_absorb_qdict(opts, options, &local_err);
    if (local_err) {
        error_propagate(errp, local_err);
        ret = -EINVAL;
        goto end;
    }
    s->offset = qemu_opt_get_size(opts, "offset", 0);
    if (s->offset > real_size) {
        error_setg(errp, "Offset (%" PRIu64 ") cannot be greater than "
            "size of the containing file (%" PRId64 ")",
            s->offset, real_size);
        ret = -EINVAL;
        goto end;
    }
    if (qemu_opt_find(opts, "size") != NULL) {
        s->size = qemu_opt_get_size(opts, "size", 0);
        s->has_size = true;
    } else {
        s->has_size = false;
        s->size = real_size - s->offset;
    }
    /* Check size and offset */
    if ((real_size - s->offset) < s->size) {
        error_setg(errp, "The sum of offset (%" PRIu64 ") and size "
            "(%" PRIu64 ") has to be smaller or equal to the "
            " actual size of the containing file (%" PRId64 ")",
            s->offset, s->size, real_size);
        ret = -EINVAL;
        goto end;
    }
    /* Make sure size is multiple of BDRV_SECTOR_SIZE to prevent rounding
     * up and leaking out of the specified area. */
    if (s->has_size && !QEMU_IS_ALIGNED(s->size, BDRV_SECTOR_SIZE)) {
        error_setg(errp, "Specified size is not multiple of %llu",
            BDRV_SECTOR_SIZE);
        ret = -EINVAL;
        goto end;
    }
    ret = 0;
 end:
    qemu_opts_del(opts);
    return ret;
 }
 static int raw_reopen_prepare(BDRVReopenState *reopen_state,
                              BlockReopenQueue *queue, Error **errp)
 {
-    return 0;
+    assert(reopen_state != NULL);
    assert(reopen_state->bs != NULL);
    reopen_state->opaque = g_new0(BDRVRawState, 1);
    return raw_read_options(
        reopen_state->options,
        reopen_state->bs,
        reopen_state->opaque,
        errp);
 }
 static void raw_reopen_commit(BDRVReopenState *state)
 {
    BDRVRawState *new_s = state->opaque;
    BDRVRawState *s = state->bs->opaque;
    memcpy(s, new_s, sizeof(BDRVRawState));
    g_free(state->opaque);
    state->opaque = NULL;
 }
 static void raw_reopen_abort(BDRVReopenState *state)
 {
    g_free(state->opaque);
    state->opaque = NULL;
 }
 static int coroutine_fn raw_co_preadv(BlockDriverState *bs, uint64_t offset,
                                      uint64_t bytes, QEMUIOVector *qiov,
                                      int flags)
 {
    BDRVRawState *s = bs->opaque;
    if (offset > UINT64_MAX - s->offset) {
        return -EINVAL;
    }
    offset += s->offset;
    BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
    return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags);
 }
@@ -62,11 +186,23 @@ static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, uint64_t offset,
                                       uint64_t bytes, QEMUIOVector *qiov,
                                       int flags)
 {
    BDRVRawState *s = bs->opaque;
    void *buf = NULL;
    BlockDriver *drv;
    QEMUIOVector local_qiov;
    int ret;
    if (s->has_size && (offset > s->size || bytes > (s->size - offset))) {
        /* There's not enough space for the data. Don't write anything and just
         * fail to prevent leaking out of the size specified in options. */
        return -ENOSPC;
    }
    if (offset > UINT64_MAX - s->offset) {
        ret = -EINVAL;
        goto fail;
    }
    if (bs->probed && offset < BLOCK_PROBE_BUF_SIZE && bytes) {
        /* Handling partial writes would be a pain - so we just
         * require that guests have 512-byte request alignment if
@@ -101,6 +237,8 @@ static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, uint64_t offset,
        qiov = &local_qiov;
    }
    offset += s->offset;
    BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
    ret = bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
@@ -117,8 +255,10 @@ static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs,
                                            int nb_sectors, int *pnum,
                                            BlockDriverState **file)
 {
    BDRVRawState *s = bs->opaque;
    *pnum = nb_sectors;
    *file = bs->file->bs;
    sector_num += s->offset / BDRV_SECTOR_SIZE;
    return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID | BDRV_BLOCK_DATA |
           (sector_num << BDRV_SECTOR_BITS);
 }
@@ -127,18 +267,49 @@ static int coroutine_fn raw_co_pwrite_zeroes(BlockDriverState *bs,
                                             int64_t offset, int count,
                                             BdrvRequestFlags flags)
 {
    BDRVRawState *s = bs->opaque;
    if (offset > UINT64_MAX - s->offset) {
        return -EINVAL;
    }
    offset += s->offset;
    return bdrv_co_pwrite_zeroes(bs->file, offset, count, flags);
 }
 static int coroutine_fn raw_co_pdiscard(BlockDriverState *bs,
                                        int64_t offset, int count)
 {
    BDRVRawState *s = bs->opaque;
    if (offset > UINT64_MAX - s->offset) {
        return -EINVAL;
    }
    offset += s->offset;
    return bdrv_co_pdiscard(bs->file->bs, offset, count);
 }
 static int64_t raw_getlength(BlockDriverState *bs)
 {
-    return bdrv_getlength(bs->file->bs);
+    int64_t len;
    BDRVRawState *s = bs->opaque;
    /* Update size. It should not change unless the file was externally
     * modified. */
    len = bdrv_getlength(bs->file->bs);
    if (len < 0) {
        return len;
    }
    if (len < s->offset) {
        s->size = 0;
    } else {
        if (s->has_size) {
            /* Try to honour the size */
            s->size = MIN(s->size, len - s->offset);
        } else {
            s->size = len - s->offset;
        }
    }
    return s->size;
 }
 static int raw_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
@@ -158,6 +329,18 @@ static void raw_refresh_limits(BlockDriverState *bs, Error **errp)
 static int raw_truncate(BlockDriverState *bs, int64_t offset)
 {
    BDRVRawState *s = bs->opaque;
    if (s->has_size) {
        return -ENOTSUP;
    }
    if (INT64_MAX - offset < s->offset) {
        return -EINVAL;
    }
    s->size = offset;
    offset += s->offset;
    return bdrv_truncate(bs->file->bs, offset);
 }
@@ -176,12 +359,13 @@ static void raw_lock_medium(BlockDriverState *bs, bool locked)
    bdrv_lock_medium(bs->file->bs, locked);
 }
-static BlockAIOCB *raw_aio_ioctl(BlockDriverState *bs,
+static int raw_co_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
                                 unsigned long int req, void *buf,
                                 BlockCompletionFunc *cb,
                                 void *opaque)
 {
-    return bdrv_aio_ioctl(bs->file->bs, req, buf, cb, opaque);
+    BDRVRawState *s = bs->opaque;
    if (s->offset || s->has_size) {
        return -ENOTSUP;
    }
    return bdrv_co_ioctl(bs->file->bs, req, buf);
 }
 static int raw_has_zero_init(BlockDriverState *bs)
@@ -197,6 +381,9 @@ static int raw_create(const char *filename, QemuOpts *opts, Error **errp)
 static int raw_open(BlockDriverState *bs, QDict *options, int flags,
                    Error **errp)
 {
    BDRVRawState *s = bs->opaque;
    int ret;
    bs->sg = bs->file->bs->sg;
    bs->supported_write_flags = BDRV_REQ_FUA &
        bs->file->bs->supported_write_flags;
@@ -214,6 +401,16 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,
                bs->file->bs->filename);
    }
    ret = raw_read_options(options, bs, s, errp);
    if (ret < 0) {
        return ret;
    }
    if (bs->sg && (s->offset || s->has_size)) {
        error_setg(errp, "Cannot use offset/size with SCSI generic devices");
        return -EINVAL;
    }
    return 0;
 }
@@ -231,18 +428,37 @@ static int raw_probe(const uint8_t *buf, int buf_size, const char *filename)
 static int raw_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
 {
-    return bdrv_probe_blocksizes(bs->file->bs, bsz);
+    BDRVRawState *s = bs->opaque;
    int ret;
    ret = bdrv_probe_blocksizes(bs->file->bs, bsz);
    if (ret < 0) {
        return ret;
    }
    if (!QEMU_IS_ALIGNED(s->offset, MAX(bsz->log, bsz->phys))) {
        return -ENOTSUP;
    }
    return 0;
 }
 static int raw_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
 {
    BDRVRawState *s = bs->opaque;
    if (s->offset || s->has_size) {
        return -ENOTSUP;
    }
    return bdrv_probe_geometry(bs->file->bs, geo);
 }
 BlockDriver bdrv_raw = {
    .format_name          = "raw",
    .instance_size        = sizeof(BDRVRawState),
    .bdrv_probe           = &raw_probe,
    .bdrv_reopen_prepare  = &raw_reopen_prepare,
    .bdrv_reopen_commit   = &raw_reopen_commit,
    .bdrv_reopen_abort    = &raw_reopen_abort,
    .bdrv_open            = &raw_open,
    .bdrv_close           = &raw_close,
    .bdrv_create          = &raw_create,
@@ -261,7 +477,7 @@ BlockDriver bdrv_raw = {
    .bdrv_media_changed   = &raw_media_changed,
    .bdrv_eject           = &raw_eject,
    .bdrv_lock_medium     = &raw_lock_medium,
-    .bdrv_aio_ioctl       = &raw_aio_ioctl,
+    .bdrv_co_ioctl        = &raw_co_ioctl,
    .create_opts          = &raw_create_opts,
    .bdrv_has_zero_init   = &raw_has_zero_init
 };
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -365,45 +365,44 @@ static int qemu_rbd_create(const char *filename, QemuOpts *opts, Error **errp)
        rados_conf_read_file(cluster, NULL);
    } else if (conf[0] != '\0' &&
               qemu_rbd_set_conf(cluster, conf, true, &local_err) < 0) {
        rados_shutdown(cluster);
        error_propagate(errp, local_err);
-        return -EIO;
+        ret = -EIO;
        goto shutdown;
    }
    if (conf[0] != '\0' &&
        qemu_rbd_set_conf(cluster, conf, false, &local_err) < 0) {
        rados_shutdown(cluster);
        error_propagate(errp, local_err);
-        return -EIO;
+        ret = -EIO;
        goto shutdown;
    }
    if (qemu_rbd_set_auth(cluster, secretid, errp) < 0) {
-        rados_shutdown(cluster);
+        ret = -EIO;
-        return -EIO;
+        goto shutdown;
    }
    ret = rados_connect(cluster);
    if (ret < 0) {
        error_setg_errno(errp, -ret, "error connecting");
-        rados_shutdown(cluster);
+        goto shutdown;
        return ret;
    }
    ret = rados_ioctx_create(cluster, pool, &io_ctx);
    if (ret < 0) {
        error_setg_errno(errp, -ret, "error opening pool %s", pool);
-        rados_shutdown(cluster);
+        goto shutdown;
        return ret;
    }
    ret = rbd_create(io_ctx, name, bytes, &obj_order);
    rados_ioctx_destroy(io_ctx);
    rados_shutdown(cluster);
    if (ret < 0) {
        error_setg_errno(errp, -ret, "error rbd create");
        return ret;
    }
    rados_ioctx_destroy(io_ctx);
 shutdown:
    rados_shutdown(cluster);
    return ret;
 }
@@ -733,7 +732,7 @@ static BlockAIOCB *qemu_rbd_aio_readv(BlockDriverState *bs,
                                      void *opaque)
 {
    return rbd_start_aio(bs, sector_num << BDRV_SECTOR_BITS, qiov,
-                         nb_sectors << BDRV_SECTOR_BITS, cb, opaque,
+                         (int64_t) nb_sectors << BDRV_SECTOR_BITS, cb, opaque,
                         RBD_AIO_READ);
 }
@@ -745,7 +744,7 @@ static BlockAIOCB *qemu_rbd_aio_writev(BlockDriverState *bs,
                                       void *opaque)
 {
    return rbd_start_aio(bs, sector_num << BDRV_SECTOR_BITS, qiov,
-                         nb_sectors << BDRV_SECTOR_BITS, cb, opaque,
+                         (int64_t) nb_sectors << BDRV_SECTOR_BITS, cb, opaque,
                         RBD_AIO_WRITE);
 }
--- a/block/replication.c
+++ b/block/replication.c
@@ -101,6 +101,11 @@ static int replication_open(BlockDriverState *bs, QDict *options,
    if (!strcmp(mode, "primary")) {
        s->mode = REPLICATION_MODE_PRIMARY;
        top_id = qemu_opt_get(opts, REPLICATION_TOP_ID);
        if (top_id) {
            error_setg(&local_err, "The primary side does not support option top-id");
            goto fail;
        }
    } else if (!strcmp(mode, "secondary")) {
        s->mode = REPLICATION_MODE_SECONDARY;
        top_id = qemu_opt_get(opts, REPLICATION_TOP_ID);
@@ -133,6 +138,9 @@ static void replication_close(BlockDriverState *bs)
    if (s->replication_state == BLOCK_REPLICATION_RUNNING) {
        replication_stop(s->rs, false, NULL);
    }
    if (s->replication_state == BLOCK_REPLICATION_FAILOVER) {
        block_job_cancel_sync(s->active_disk->bs->job);
    }
    if (s->mode == REPLICATION_MODE_SECONDARY) {
        g_free(s->top_id);
@@ -314,9 +322,10 @@ static void secondary_do_checkpoint(BDRVReplicationState *s, Error **errp)
    }
 }
-static void reopen_backing_file(BDRVReplicationState *s, bool writable,
+static void reopen_backing_file(BlockDriverState *bs, bool writable,
                                Error **errp)
 {
    BDRVReplicationState *s = bs->opaque;
    BlockReopenQueue *reopen_queue = NULL;
    int orig_hidden_flags, orig_secondary_flags;
    int new_hidden_flags, new_secondary_flags;
@@ -351,13 +360,15 @@ static void reopen_backing_file(BDRVReplicationState *s, bool writable,
    }
    if (reopen_queue) {
-        bdrv_reopen_multiple(reopen_queue, &local_err);
+        bdrv_reopen_multiple(bdrv_get_aio_context(bs),
                             reopen_queue, &local_err);
        error_propagate(errp, local_err);
    }
 }
-static void backup_job_cleanup(BDRVReplicationState *s)
+static void backup_job_cleanup(BlockDriverState *bs)
 {
    BDRVReplicationState *s = bs->opaque;
    BlockDriverState *top_bs;
    top_bs = bdrv_lookup_bs(s->top_id, s->top_id, NULL);
@@ -366,19 +377,20 @@ static void backup_job_cleanup(BDRVReplicationState *s)
    }
    bdrv_op_unblock_all(top_bs, s->blocker);
    error_free(s->blocker);
-    reopen_backing_file(s, false, NULL);
+    reopen_backing_file(bs, false, NULL);
 }
 static void backup_job_completed(void *opaque, int ret)
 {
-    BDRVReplicationState *s = opaque;
+    BlockDriverState *bs = opaque;
    BDRVReplicationState *s = bs->opaque;
    if (s->replication_state != BLOCK_REPLICATION_FAILOVER) {
        /* The backup job is cancelled unexpectedly */
        s->error = -EIO;
    }
-    backup_job_cleanup(s);
+    backup_job_cleanup(bs);
 }
 static bool check_top_bs(BlockDriverState *top_bs, BlockDriverState *bs)
@@ -409,6 +421,7 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode,
    int64_t active_length, hidden_length, disk_length;
    AioContext *aio_context;
    Error *local_err = NULL;
    BlockJob *job;
    aio_context = bdrv_get_aio_context(bs);
    aio_context_acquire(aio_context);
@@ -474,7 +487,7 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode,
        }
        /* reopen the backing file in r/w mode */
-        reopen_backing_file(s, true, &local_err);
+        reopen_backing_file(bs, true, &local_err);
        if (local_err) {
            error_propagate(errp, local_err);
            aio_context_release(aio_context);
@@ -489,23 +502,25 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode,
        if (!top_bs || !bdrv_is_root_node(top_bs) ||
            !check_top_bs(top_bs, bs)) {
            error_setg(errp, "No top_bs or it is invalid");
-            reopen_backing_file(s, false, NULL);
+            reopen_backing_file(bs, false, NULL);
            aio_context_release(aio_context);
            return;
        }
        bdrv_op_block_all(top_bs, s->blocker);
        bdrv_op_unblock(top_bs, BLOCK_OP_TYPE_DATAPLANE, s->blocker);
-        backup_start("replication-backup", s->secondary_disk->bs,
+        job = backup_job_create(NULL, s->secondary_disk->bs, s->hidden_disk->bs,
-                     s->hidden_disk->bs, 0, MIRROR_SYNC_MODE_NONE, NULL, false,
+                                0, MIRROR_SYNC_MODE_NONE, NULL, false,
-                     BLOCKDEV_ON_ERROR_REPORT, BLOCKDEV_ON_ERROR_REPORT,
+                                BLOCKDEV_ON_ERROR_REPORT,
-                     backup_job_completed, s, NULL, &local_err);
+                                BLOCKDEV_ON_ERROR_REPORT, BLOCK_JOB_INTERNAL,
                                backup_job_completed, bs, NULL, &local_err);
        if (local_err) {
            error_propagate(errp, local_err);
-            backup_job_cleanup(s);
+            backup_job_cleanup(bs);
            aio_context_release(aio_context);
            return;
        }
        block_job_start(job);
        break;
    default:
        aio_context_release(aio_context);
@@ -621,10 +636,9 @@ static void replication_stop(ReplicationState *rs, bool failover, Error **errp)
        }
        s->replication_state = BLOCK_REPLICATION_FAILOVER;
-        commit_active_start("replication-commit", s->active_disk->bs,
+        commit_active_start(NULL, s->active_disk->bs, s->secondary_disk->bs,
-                            s->secondary_disk->bs, 0, BLOCKDEV_ON_ERROR_REPORT,
+                            BLOCK_JOB_INTERNAL, 0, BLOCKDEV_ON_ERROR_REPORT,
-                            replication_done,
+                            replication_done, bs, errp, true);
                            bs, errp, true);
        break;
    default:
        aio_context_release(aio_context);
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -641,6 +641,7 @@ static void restart_co_req(void *opaque)
 typedef struct SheepdogReqCo {
    int sockfd;
    BlockDriverState *bs;
    AioContext *aio_context;
    SheepdogReq *hdr;
    void *data;
@@ -663,7 +664,7 @@ static coroutine_fn void do_co_req(void *opaque)
    co = qemu_coroutine_self();
    aio_set_fd_handler(srco->aio_context, sockfd, false,
-                       NULL, restart_co_req, co);
+                       NULL, restart_co_req, NULL, co);
    ret = send_co_req(sockfd, hdr, data, wlen);
    if (ret < 0) {
@@ -671,7 +672,7 @@ static coroutine_fn void do_co_req(void *opaque)
    }
    aio_set_fd_handler(srco->aio_context, sockfd, false,
-                       restart_co_req, NULL, co);
+                       restart_co_req, NULL, NULL, co);
    ret = qemu_co_recv(sockfd, hdr, sizeof(*hdr));
    if (ret != sizeof(*hdr)) {
@@ -697,10 +698,13 @@ out:
    /* there is at most one request for this sockfd, so it is safe to
     * set each handler to NULL. */
    aio_set_fd_handler(srco->aio_context, sockfd, false,
-                       NULL, NULL, NULL);
+                       NULL, NULL, NULL, NULL);
    srco->ret = ret;
    srco->finished = true;
    if (srco->bs) {
        bdrv_wakeup(srco->bs);
    }
 }
 /*
@@ -708,13 +712,14 @@ out:
 *
 * Return 0 on success, -errno in case of error.
 */
-static int do_req(int sockfd, AioContext *aio_context, SheepdogReq *hdr,
+static int do_req(int sockfd, BlockDriverState *bs, SheepdogReq *hdr,
                  void *data, unsigned int *wlen, unsigned int *rlen)
 {
    Coroutine *co;
    SheepdogReqCo srco = {
        .sockfd = sockfd,
-        .aio_context = aio_context,
+        .aio_context = bs ? bdrv_get_aio_context(bs) : qemu_get_aio_context(),
        .bs = bs,
        .hdr = hdr,
        .data = data,
        .wlen = wlen,
@@ -727,9 +732,14 @@ static int do_req(int sockfd, AioContext *aio_context, SheepdogReq *hdr,
        do_co_req(&srco);
    } else {
        co = qemu_coroutine_create(do_co_req, &srco);
-        qemu_coroutine_enter(co);
+        if (bs) {
-        while (!srco.finished) {
+            qemu_coroutine_enter(co);
-            aio_poll(aio_context, true);
+            BDRV_POLL_WHILE(bs, !srco.finished);
        } else {
            qemu_coroutine_enter(co);
            while (!srco.finished) {
                aio_poll(qemu_get_aio_context(), true);
            }
        }
    }
@@ -750,7 +760,7 @@ static coroutine_fn void reconnect_to_sdog(void *opaque)
    AIOReq *aio_req, *next;
    aio_set_fd_handler(s->aio_context, s->fd, false, NULL,
-                       NULL, NULL);
+                       NULL, NULL, NULL);
    close(s->fd);
    s->fd = -1;
@@ -954,7 +964,7 @@ static int get_sheep_fd(BDRVSheepdogState *s, Error **errp)
    }
    aio_set_fd_handler(s->aio_context, fd, false,
-                       co_read_response, NULL, s);
+                       co_read_response, NULL, NULL, s);
    return fd;
 }
@@ -1125,7 +1135,7 @@ static int find_vdi_name(BDRVSheepdogState *s, const char *filename,
    hdr.snapid = snapid;
    hdr.flags = SD_FLAG_CMD_WRITE;
-    ret = do_req(fd, s->aio_context, (SheepdogReq *)&hdr, buf, &wlen, &rlen);
+    ret = do_req(fd, s->bs, (SheepdogReq *)&hdr, buf, &wlen, &rlen);
    if (ret) {
        error_setg_errno(errp, -ret, "cannot get vdi info");
        goto out;
@@ -1216,7 +1226,7 @@ static void coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
    qemu_co_mutex_lock(&s->lock);
    s->co_send = qemu_coroutine_self();
    aio_set_fd_handler(s->aio_context, s->fd, false,
-                       co_read_response, co_write_request, s);
+                       co_read_response, co_write_request, NULL, s);
    socket_set_cork(s->fd, 1);
    /* send a header */
@@ -1235,12 +1245,12 @@ static void coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
 out:
    socket_set_cork(s->fd, 0);
    aio_set_fd_handler(s->aio_context, s->fd, false,
-                       co_read_response, NULL, s);
+                       co_read_response, NULL, NULL, s);
    s->co_send = NULL;
    qemu_co_mutex_unlock(&s->lock);
 }
-static int read_write_object(int fd, AioContext *aio_context, char *buf,
+static int read_write_object(int fd, BlockDriverState *bs, char *buf,
                             uint64_t oid, uint8_t copies,
                             unsigned int datalen, uint64_t offset,
                             bool write, bool create, uint32_t cache_flags)
@@ -1274,7 +1284,7 @@ static int read_write_object(int fd, AioContext *aio_context, char *buf,
    hdr.offset = offset;
    hdr.copies = copies;
-    ret = do_req(fd, aio_context, (SheepdogReq *)&hdr, buf, &wlen, &rlen);
+    ret = do_req(fd, bs, (SheepdogReq *)&hdr, buf, &wlen, &rlen);
    if (ret) {
        error_report("failed to send a request to the sheep");
        return ret;
@@ -1289,22 +1299,22 @@ static int read_write_object(int fd, AioContext *aio_context, char *buf,
    }
 }
-static int read_object(int fd, AioContext *aio_context, char *buf,
+static int read_object(int fd, BlockDriverState *bs, char *buf,
                       uint64_t oid, uint8_t copies,
                       unsigned int datalen, uint64_t offset,
                       uint32_t cache_flags)
 {
-    return read_write_object(fd, aio_context, buf, oid, copies,
+    return read_write_object(fd, bs, buf, oid, copies,
                             datalen, offset, false,
                             false, cache_flags);
 }
-static int write_object(int fd, AioContext *aio_context, char *buf,
+static int write_object(int fd, BlockDriverState *bs, char *buf,
                        uint64_t oid, uint8_t copies,
                        unsigned int datalen, uint64_t offset, bool create,
                        uint32_t cache_flags)
 {
-    return read_write_object(fd, aio_context, buf, oid, copies,
+    return read_write_object(fd, bs, buf, oid, copies,
                             datalen, offset, true,
                             create, cache_flags);
 }
@@ -1331,7 +1341,7 @@ static int reload_inode(BDRVSheepdogState *s, uint32_t snapid, const char *tag)
        goto out;
    }
-    ret = read_object(fd, s->aio_context, (char *)inode, vid_to_vdi_oid(vid),
+    ret = read_object(fd, s->bs, (char *)inode, vid_to_vdi_oid(vid),
                      s->inode.nr_copies, SD_INODE_HEADER_SIZE, 0,
                      s->cache_flags);
    if (ret < 0) {
@@ -1386,7 +1396,7 @@ static void sd_detach_aio_context(BlockDriverState *bs)
    BDRVSheepdogState *s = bs->opaque;
    aio_set_fd_handler(s->aio_context, s->fd, false, NULL,
-                       NULL, NULL);
+                       NULL, NULL, NULL);
 }
 static void sd_attach_aio_context(BlockDriverState *bs,
@@ -1396,7 +1406,7 @@ static void sd_attach_aio_context(BlockDriverState *bs,
    s->aio_context = new_context;
    aio_set_fd_handler(new_context, s->fd, false,
-                       co_read_response, NULL, s);
+                       co_read_response, NULL, NULL, s);
 }
 /* TODO Convert to fine grained options */
@@ -1489,7 +1499,7 @@ static int sd_open(BlockDriverState *bs, QDict *options, int flags,
    }
    buf = g_malloc(SD_INODE_SIZE);
-    ret = read_object(fd, s->aio_context, buf, vid_to_vdi_oid(vid),
+    ret = read_object(fd, s->bs, buf, vid_to_vdi_oid(vid),
                      0, SD_INODE_SIZE, 0, s->cache_flags);
    closesocket(fd);
@@ -1510,7 +1520,7 @@ static int sd_open(BlockDriverState *bs, QDict *options, int flags,
    return 0;
 out:
    aio_set_fd_handler(bdrv_get_aio_context(bs), s->fd,
-                       false, NULL, NULL, NULL);
+                       false, NULL, NULL, NULL, NULL);
    if (s->fd >= 0) {
        closesocket(s->fd);
    }
@@ -1549,7 +1559,7 @@ static void sd_reopen_commit(BDRVReopenState *state)
    if (s->fd) {
        aio_set_fd_handler(s->aio_context, s->fd, false,
-                           NULL, NULL, NULL);
+                           NULL, NULL, NULL, NULL);
        closesocket(s->fd);
    }
@@ -1573,7 +1583,7 @@ static void sd_reopen_abort(BDRVReopenState *state)
    if (re_s->fd) {
        aio_set_fd_handler(s->aio_context, re_s->fd, false,
-                           NULL, NULL, NULL);
+                           NULL, NULL, NULL, NULL);
        closesocket(re_s->fd);
    }
@@ -1618,7 +1628,7 @@ static int do_sd_create(BDRVSheepdogState *s, uint32_t *vdi_id, int snapshot,
    hdr.copies = s->inode.nr_copies;
    hdr.block_size_shift = s->inode.block_size_shift;
-    ret = do_req(fd, s->aio_context, (SheepdogReq *)&hdr, buf, &wlen, &rlen);
+    ret = do_req(fd, NULL, (SheepdogReq *)&hdr, buf, &wlen, &rlen);
    closesocket(fd);
@@ -1886,7 +1896,7 @@ static int sd_create(const char *filename, QemuOpts *opts,
        hdr.opcode = SD_OP_GET_CLUSTER_DEFAULT;
        hdr.proto_ver = SD_PROTO_VER;
-        ret = do_req(fd, s->aio_context, (SheepdogReq *)&hdr,
+        ret = do_req(fd, NULL, (SheepdogReq *)&hdr,
                     NULL, &wlen, &rlen);
        closesocket(fd);
        if (ret) {
@@ -1951,7 +1961,7 @@ static void sd_close(BlockDriverState *bs)
    hdr.data_length = wlen;
    hdr.flags = SD_FLAG_CMD_WRITE;
-    ret = do_req(fd, s->aio_context, (SheepdogReq *)&hdr,
+    ret = do_req(fd, s->bs, (SheepdogReq *)&hdr,
                 s->name, &wlen, &rlen);
    closesocket(fd);
@@ -1962,7 +1972,7 @@ static void sd_close(BlockDriverState *bs)
    }
    aio_set_fd_handler(bdrv_get_aio_context(bs), s->fd,
-                       false, NULL, NULL, NULL);
+                       false, NULL, NULL, NULL, NULL);
    closesocket(s->fd);
    g_free(s->host_spec);
 }
@@ -2000,7 +2010,7 @@ static int sd_truncate(BlockDriverState *bs, int64_t offset)
    /* we don't need to update entire object */
    datalen = SD_INODE_SIZE - sizeof(s->inode.data_vdi_id);
    s->inode.vdi_size = offset;
-    ret = write_object(fd, s->aio_context, (char *)&s->inode,
+    ret = write_object(fd, s->bs, (char *)&s->inode,
                       vid_to_vdi_oid(s->inode.vdi_id), s->inode.nr_copies,
                       datalen, 0, false, s->cache_flags);
    close(fd);
@@ -2070,7 +2080,7 @@ static bool sd_delete(BDRVSheepdogState *s)
        return false;
    }
-    ret = do_req(fd, s->aio_context, (SheepdogReq *)&hdr,
+    ret = do_req(fd, s->bs, (SheepdogReq *)&hdr,
                 s->name, &wlen, &rlen);
    closesocket(fd);
    if (ret) {
@@ -2126,7 +2136,7 @@ static int sd_create_branch(BDRVSheepdogState *s)
        goto out;
    }
-    ret = read_object(fd, s->aio_context, buf, vid_to_vdi_oid(vid),
+    ret = read_object(fd, s->bs, buf, vid_to_vdi_oid(vid),
                      s->inode.nr_copies, SD_INODE_SIZE, 0, s->cache_flags);
    closesocket(fd);
@@ -2411,7 +2421,7 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
        goto cleanup;
    }
-    ret = write_object(fd, s->aio_context, (char *)&s->inode,
+    ret = write_object(fd, s->bs, (char *)&s->inode,
                       vid_to_vdi_oid(s->inode.vdi_id), s->inode.nr_copies,
                       datalen, 0, false, s->cache_flags);
    if (ret < 0) {
@@ -2426,7 +2436,7 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
        goto cleanup;
    }
-    ret = read_object(fd, s->aio_context, (char *)inode,
+    ret = read_object(fd, s->bs, (char *)inode,
                      vid_to_vdi_oid(new_vid), s->inode.nr_copies, datalen, 0,
                      s->cache_flags);
@@ -2528,7 +2538,7 @@ static bool remove_objects(BDRVSheepdogState *s)
            i++;
        }
-        ret = write_object(fd, s->aio_context,
+        ret = write_object(fd, s->bs,
                           (char *)&inode->data_vdi_id[start_idx],
                           vid_to_vdi_oid(s->inode.vdi_id), inode->nr_copies,
                           (i - start_idx) * sizeof(uint32_t),
@@ -2600,7 +2610,7 @@ static int sd_snapshot_delete(BlockDriverState *bs,
        return -1;
    }
-    ret = do_req(fd, s->aio_context, (SheepdogReq *)&hdr,
+    ret = do_req(fd, s->bs, (SheepdogReq *)&hdr,
                 buf, &wlen, &rlen);
    closesocket(fd);
    if (ret) {
@@ -2652,8 +2662,7 @@ static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
    req.opcode = SD_OP_READ_VDIS;
    req.data_length = max;
-    ret = do_req(fd, s->aio_context, &req,
+    ret = do_req(fd, s->bs, &req, vdi_inuse, &wlen, &rlen);
                 vdi_inuse, &wlen, &rlen);
    closesocket(fd);
    if (ret) {
@@ -2679,7 +2688,7 @@ static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
        }
        /* we don't need to read entire object */
-        ret = read_object(fd, s->aio_context, (char *)&inode,
+        ret = read_object(fd, s->bs, (char *)&inode,
                          vid_to_vdi_oid(vid),
                          0, SD_INODE_SIZE - sizeof(inode.data_vdi_id), 0,
                          s->cache_flags);
@@ -2745,11 +2754,11 @@ static int do_load_save_vmstate(BDRVSheepdogState *s, uint8_t *data,
        create = (offset == 0);
        if (load) {
-            ret = read_object(fd, s->aio_context, (char *)data, vmstate_oid,
+            ret = read_object(fd, s->bs, (char *)data, vmstate_oid,
                              s->inode.nr_copies, data_len, offset,
                              s->cache_flags);
        } else {
-            ret = write_object(fd, s->aio_context, (char *)data, vmstate_oid,
+            ret = write_object(fd, s->bs, (char *)data, vmstate_oid,
                               s->inode.nr_copies, data_len, offset, create,
                               s->cache_flags);
        }
@@ -2820,8 +2829,9 @@ static coroutine_fn int sd_co_pdiscard(BlockDriverState *bs, int64_t offset,
    iov.iov_len = sizeof(zero);
    discard_iov.iov = &iov;
    discard_iov.niov = 1;
-    assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
+    if (!QEMU_IS_ALIGNED(offset | count, BDRV_SECTOR_SIZE)) {
-    assert((count & (BDRV_SECTOR_SIZE - 1)) == 0);
+        return -ENOTSUP;
    }
    acb = sd_aio_setup(bs, &discard_iov, offset >> BDRV_SECTOR_BITS,
                       count >> BDRV_SECTOR_BITS);
    acb->aiocb_type = AIOCB_DISCARD_OBJ;
--- a/block/ssh.c
+++ b/block/ssh.c
@@ -30,10 +30,14 @@
 #include "block/block_int.h"
 #include "qapi/error.h"
 #include "qemu/error-report.h"
 #include "qemu/cutils.h"
 #include "qemu/sockets.h"
 #include "qemu/uri.h"
 #include "qapi-visit.h"
 #include "qapi/qmp/qint.h"
 #include "qapi/qmp/qstring.h"
 #include "qapi/qobject-input-visitor.h"
 #include "qapi/qobject-output-visitor.h"
 /* DEBUG_SSH=1 enables the DPRINTF (debugging printf) statements in
 * this block driver code.
@@ -74,8 +78,9 @@ typedef struct BDRVSSHState {
     */
    LIBSSH2_SFTP_ATTRIBUTES attrs;
    InetSocketAddress *inet;
    /* Used to warn if 'flush' is not supported. */
    char *hostport;
    bool unsafe_flush_warning;
 } BDRVSSHState;
@@ -89,7 +94,6 @@ static void ssh_state_init(BDRVSSHState *s)
 static void ssh_state_free(BDRVSSHState *s)
 {
    g_free(s->hostport);
    if (s->sftp_handle) {
        libssh2_sftp_close(s->sftp_handle);
    }
@@ -193,6 +197,7 @@ static int parse_uri(const char *filename, QDict *options, Error **errp)
 {
    URI *uri = NULL;
    QueryParams *qp;
    char *port_str;
    int i;
    uri = uri_parse(filename);
@@ -225,11 +230,11 @@ static int parse_uri(const char *filename, QDict *options, Error **errp)
        qdict_put(options, "user", qstring_from_str(uri->user));
    }
-    qdict_put(options, "host", qstring_from_str(uri->server));
+    qdict_put(options, "server.host", qstring_from_str(uri->server));
-    if (uri->port) {
+    port_str = g_strdup_printf("%d", uri->port ?: 22);
-        qdict_put(options, "port", qint_from_int(uri->port));
+    qdict_put(options, "server.port", qstring_from_str(port_str));
-    }
+    g_free(port_str);
    qdict_put(options, "path", qstring_from_str(uri->path));
@@ -254,15 +259,31 @@ static int parse_uri(const char *filename, QDict *options, Error **errp)
    return -EINVAL;
 }
 static bool ssh_has_filename_options_conflict(QDict *options, Error **errp)
 {
    const QDictEntry *qe;
    for (qe = qdict_first(options); qe; qe = qdict_next(options, qe)) {
        if (!strcmp(qe->key, "host") ||
            !strcmp(qe->key, "port") ||
            !strcmp(qe->key, "path") ||
            !strcmp(qe->key, "user") ||
            !strcmp(qe->key, "host_key_check") ||
            strstart(qe->key, "server.", NULL))
        {
            error_setg(errp, "Option '%s' cannot be used with a file name",
                       qe->key);
            return true;
        }
    }
    return false;
 }
 static void ssh_parse_filename(const char *filename, QDict *options,
                               Error **errp)
 {
-    if (qdict_haskey(options, "user") ||
+    if (ssh_has_filename_options_conflict(options, errp)) {
        qdict_haskey(options, "host") ||
        qdict_haskey(options, "port") ||
        qdict_haskey(options, "path") ||
        qdict_haskey(options, "host_key_check")) {
        error_setg(errp, "user, host, port, path, host_key_check cannot be used at the same time as a file option");
        return;
    }
@@ -540,14 +561,68 @@ static QemuOptsList ssh_runtime_opts = {
    },
 };
 static bool ssh_process_legacy_socket_options(QDict *output_opts,
                                              QemuOpts *legacy_opts,
                                              Error **errp)
 {
    const char *host = qemu_opt_get(legacy_opts, "host");
    const char *port = qemu_opt_get(legacy_opts, "port");
    if (!host && port) {
        error_setg(errp, "port may not be used without host");
        return false;
    }
    if (host) {
        qdict_put(output_opts, "server.host", qstring_from_str(host));
        qdict_put(output_opts, "server.port",
                  qstring_from_str(port ?: stringify(22)));
    }
    return true;
 }
 static InetSocketAddress *ssh_config(QDict *options, Error **errp)
 {
    InetSocketAddress *inet = NULL;
    QDict *addr = NULL;
    QObject *crumpled_addr = NULL;
    Visitor *iv = NULL;
    Error *local_error = NULL;
    qdict_extract_subqdict(options, &addr, "server.");
    if (!qdict_size(addr)) {
        error_setg(errp, "SSH server address missing");
        goto out;
    }
    crumpled_addr = qdict_crumple(addr, errp);
    if (!crumpled_addr) {
        goto out;
    }
    iv = qobject_input_visitor_new(crumpled_addr, true);
    visit_type_InetSocketAddress(iv, NULL, &inet, &local_error);
    if (local_error) {
        error_propagate(errp, local_error);
        goto out;
    }
 out:
    QDECREF(addr);
    qobject_decref(crumpled_addr);
    visit_free(iv);
    return inet;
 }
 static int connect_to_ssh(BDRVSSHState *s, QDict *options,
                          int ssh_flags, int creat_mode, Error **errp)
 {
    int r, ret;
    QemuOpts *opts = NULL;
    Error *local_err = NULL;
-    const char *host, *user, *path, *host_key_check;
+    const char *user, *path, *host_key_check;
-    int port;
+    long port = 0;
    opts = qemu_opts_create(&ssh_runtime_opts, NULL, 0, &error_abort);
    qemu_opts_absorb_qdict(opts, options, &local_err);
@@ -557,15 +632,11 @@ static int connect_to_ssh(BDRVSSHState *s, QDict *options,
        goto err;
    }
-    host = qemu_opt_get(opts, "host");
+    if (!ssh_process_legacy_socket_options(options, opts, errp)) {
    if (!host) {
        ret = -EINVAL;
        error_setg(errp, "No hostname was specified");
        goto err;
    }
    port = qemu_opt_get_number(opts, "port", 22);
    path = qemu_opt_get(opts, "path");
    if (!path) {
        ret = -EINVAL;
@@ -588,12 +659,21 @@ static int connect_to_ssh(BDRVSSHState *s, QDict *options,
        host_key_check = "yes";
    }
-    /* Construct the host:port name for inet_connect. */
+    /* Pop the config into our state object, Exit if invalid */
-    g_free(s->hostport);
+    s->inet = ssh_config(options, errp);
-    s->hostport = g_strdup_printf("%s:%d", host, port);
+    if (!s->inet) {
        ret = -EINVAL;
        goto err;
    }
    if (qemu_strtol(s->inet->port, NULL, 10, &port) < 0) {
        error_setg(errp, "Use only numeric port value");
        ret = -EINVAL;
        goto err;
    }
    /* Open the socket and connect. */
-    s->sock = inet_connect(s->hostport, errp);
+    s->sock = inet_connect_saddr(s->inet, errp, NULL, NULL);
    if (s->sock < 0) {
        ret = -EIO;
        goto err;
@@ -619,7 +699,8 @@ static int connect_to_ssh(BDRVSSHState *s, QDict *options,
    }
    /* Check the remote host's key against known_hosts. */
-    ret = check_host_key(s, host, port, host_key_check, errp);
+    ret = check_host_key(s, s->inet->host, port, host_key_check,
                         errp);
    if (ret < 0) {
        goto err;
    }
@@ -830,7 +911,7 @@ static coroutine_fn void set_fd_handler(BDRVSSHState *s, BlockDriverState *bs)
            rd_handler, wr_handler);
    aio_set_fd_handler(bdrv_get_aio_context(bs), s->sock,
-                       false, rd_handler, wr_handler, co);
+                       false, rd_handler, wr_handler, NULL, co);
 }
 static coroutine_fn void clear_fd_handler(BDRVSSHState *s,
@@ -838,7 +919,7 @@ static coroutine_fn void clear_fd_handler(BDRVSSHState *s,
 {
    DPRINTF("s->sock=%d", s->sock);
    aio_set_fd_handler(bdrv_get_aio_context(bs), s->sock,
-                       false, NULL, NULL, NULL);
+                       false, NULL, NULL, NULL, NULL);
 }
 /* A non-blocking call returned EAGAIN, so yield, ensuring the
@@ -1040,7 +1121,7 @@ static void unsafe_flush_warning(BDRVSSHState *s, const char *what)
 {
    if (!s->unsafe_flush_warning) {
        error_report("warning: ssh server %s does not support fsync",
-                     s->hostport);
+                     s->inet->host);
        if (what) {
            error_report("to support fsync, you need %s", what);
        }
--- a/block/stream.c
+++ b/block/stream.c
@@ -14,7 +14,7 @@
 #include "qemu/osdep.h"
 #include "trace.h"
 #include "block/block_int.h"
-#include "block/blockjob.h"
+#include "block/blockjob_int.h"
 #include "qapi/error.h"
 #include "qapi/qmp/qerror.h"
 #include "qemu/ratelimit.h"
@@ -37,6 +37,7 @@ typedef struct StreamBlockJob {
    BlockDriverState *base;
    BlockdevOnError on_error;
    char *backing_file_str;
    int bs_flags;
 } StreamBlockJob;
 static int coroutine_fn stream_populate(BlockBackend *blk,
@@ -81,6 +82,11 @@ static void stream_complete(BlockJob *job, void *opaque)
        bdrv_set_backing_hd(bs, base);
    }
    /* Reopen the image back in read-only mode if necessary */
    if (s->bs_flags != bdrv_get_flags(bs)) {
        bdrv_reopen(bs, s->bs_flags, NULL);
    }
    g_free(s->backing_file_str);
    block_job_completed(&s->common, data->ret);
    g_free(data);
@@ -212,26 +218,43 @@ static const BlockJobDriver stream_job_driver = {
    .instance_size = sizeof(StreamBlockJob),
    .job_type      = BLOCK_JOB_TYPE_STREAM,
    .set_speed     = stream_set_speed,
    .start         = stream_run,
 };
 void stream_start(const char *job_id, BlockDriverState *bs,
                  BlockDriverState *base, const char *backing_file_str,
-                  int64_t speed, BlockdevOnError on_error,
+                  int64_t speed, BlockdevOnError on_error, Error **errp)
                  BlockCompletionFunc *cb, void *opaque, Error **errp)
 {
    StreamBlockJob *s;
    BlockDriverState *iter;
    int orig_bs_flags;
    s = block_job_create(job_id, &stream_job_driver, bs, speed,
-                         cb, opaque, errp);
+                         BLOCK_JOB_DEFAULT, NULL, NULL, errp);
    if (!s) {
        return;
    }
    /* Make sure that the image is opened in read-write mode */
    orig_bs_flags = bdrv_get_flags(bs);
    if (!(orig_bs_flags & BDRV_O_RDWR)) {
        if (bdrv_reopen(bs, orig_bs_flags | BDRV_O_RDWR, errp) != 0) {
            block_job_unref(&s->common);
            return;
        }
    }
    /* Block all intermediate nodes between bs and base, because they
     * will disappear from the chain after this operation */
    for (iter = backing_bs(bs); iter && iter != base; iter = backing_bs(iter)) {
        block_job_add_bdrv(&s->common, iter);
    }
    s->base = base;
    s->backing_file_str = g_strdup(backing_file_str);
    s->bs_flags = orig_bs_flags;
    s->on_error = on_error;
-    s->common.co = qemu_coroutine_create(stream_run, s);
+    trace_stream_start(bs, base, s);
-    trace_stream_start(bs, base, s, s->common.co, opaque);
+    block_job_start(&s->common);
    qemu_coroutine_enter(s->common.co);
 }
--- a/block/throttle-groups.c
+++ b/block/throttle-groups.c
@@ -168,6 +168,22 @@ static BlockBackend *throttle_group_next_blk(BlockBackend *blk)
    return blk_by_public(next);
 }
 /*
 * Return whether a BlockBackend has pending requests.
 *
 * This assumes that tg->lock is held.
 *
 * @blk: the BlockBackend
 * @is_write:  the type of operation (read/write)
 * @ret:       whether the BlockBackend has pending requests.
 */
 static inline bool blk_has_pending_reqs(BlockBackend *blk,
                                        bool is_write)
 {
    const BlockBackendPublic *blkp = blk_get_public(blk);
    return blkp->pending_reqs[is_write];
 }
 /* Return the next BlockBackend in the round-robin sequence with pending I/O
 * requests.
 *
@@ -188,7 +204,7 @@ static BlockBackend *next_throttle_token(BlockBackend *blk, bool is_write)
    /* get next bs round in round robin style */
    token = throttle_group_next_blk(token);
-    while (token != start && !blkp->pending_reqs[is_write]) {
+    while (token != start && !blk_has_pending_reqs(token, is_write)) {
        token = throttle_group_next_blk(token);
    }
@@ -196,10 +212,13 @@ static BlockBackend *next_throttle_token(BlockBackend *blk, bool is_write)
     * then decide the token is the current bs because chances are
     * the current bs get the current request queued.
     */
-    if (token == start && !blkp->pending_reqs[is_write]) {
+    if (token == start && !blk_has_pending_reqs(token, is_write)) {
        token = blk;
    }
    /* Either we return the original BB, or one with pending requests */
    assert(token == blk || blk_has_pending_reqs(token, is_write));
    return token;
 }
@@ -257,7 +276,7 @@ static void schedule_next_request(BlockBackend *blk, bool is_write)
    /* Check if there's any pending request to schedule next */
    token = next_throttle_token(blk, is_write);
-    if (!blkp->pending_reqs[is_write]) {
+    if (!blk_has_pending_reqs(token, is_write)) {
        return;
    }
@@ -271,7 +290,7 @@ static void schedule_next_request(BlockBackend *blk, bool is_write)
            qemu_co_queue_next(&blkp->throttled_reqs[is_write])) {
            token = blk;
        } else {
-            ThrottleTimers *tt = &blkp->throttle_timers;
+            ThrottleTimers *tt = &blk_get_public(token)->throttle_timers;
            int64_t now = qemu_clock_get_ns(tt->clock_type);
            timer_mod(tt->timers[is_write], now + 1);
            tg->any_timer_armed[is_write] = true;
--- a/block/trace-events
+++ b/block/trace-events
@@ -9,7 +9,6 @@ blk_co_preadv(void *blk, void *bs, int64_t offset, unsigned int bytes, int flags
 blk_co_pwritev(void *blk, void *bs, int64_t offset, unsigned int bytes, int flags) "blk %p bs %p offset %"PRId64" bytes %u flags %x"
 # block/io.c
 bdrv_aio_pdiscard(void *bs, int64_t offset, int count, void *opaque) "bs %p offset %"PRId64" count %d opaque %p"
 bdrv_aio_flush(void *bs, void *opaque) "bs %p opaque %p"
 bdrv_aio_readv(void *bs, int64_t sector_num, int nb_sectors, void *opaque) "bs %p sector_num %"PRId64" nb_sectors %d opaque %p"
 bdrv_aio_writev(void *bs, int64_t sector_num, int nb_sectors, void *opaque) "bs %p sector_num %"PRId64" nb_sectors %d opaque %p"
@@ -20,14 +19,14 @@ bdrv_co_do_copy_on_readv(void *bs, int64_t offset, unsigned int bytes, int64_t c
 # block/stream.c
 stream_one_iteration(void *s, int64_t sector_num, int nb_sectors, int is_allocated) "s %p sector_num %"PRId64" nb_sectors %d is_allocated %d"
-stream_start(void *bs, void *base, void *s, void *co, void *opaque) "bs %p base %p s %p co %p opaque %p"
+stream_start(void *bs, void *base, void *s) "bs %p base %p s %p"
 # block/commit.c
 commit_one_iteration(void *s, int64_t sector_num, int nb_sectors, int is_allocated) "s %p sector_num %"PRId64" nb_sectors %d is_allocated %d"
-commit_start(void *bs, void *base, void *top, void *s, void *co, void *opaque) "bs %p base %p top %p s %p co %p opaque %p"
+commit_start(void *bs, void *base, void *top, void *s) "bs %p base %p top %p s %p"
 # block/mirror.c
-mirror_start(void *bs, void *s, void *co, void *opaque) "bs %p s %p co %p opaque %p"
+mirror_start(void *bs, void *s, void *opaque) "bs %p s %p opaque %p"
 mirror_restart_iter(void *s, int64_t cnt) "s %p dirty count %"PRId64
 mirror_before_flush(void *s) "s %p"
 mirror_before_drain(void *s, int64_t cnt) "s %p dirty count %"PRId64
@@ -52,11 +51,10 @@ qmp_block_job_cancel(void *job) "job %p"
 qmp_block_job_pause(void *job) "job %p"
 qmp_block_job_resume(void *job) "job %p"
 qmp_block_job_complete(void *job) "job %p"
 block_job_cb(void *bs, void *job, int ret) "bs %p job %p ret %d"
 qmp_block_stream(void *bs, void *job) "bs %p job %p"
-# block/raw-win32.c
+# block/file-win32.c
-# block/raw-posix.c
+# block/file-posix.c
 paio_submit_co(int64_t offset, int count, int type) "offset %"PRId64" count %d type %d"
 paio_submit(void *acb, void *opaque, int64_t offset, int count, int type) "acb %p opaque %p offset %"PRId64" count %d type %d"
--- a/block/vdi.c
+++ b/block/vdi.c
@@ -361,6 +361,7 @@ static int vdi_open(BlockDriverState *bs, QDict *options, int flags,
    VdiHeader header;
    size_t bmap_size;
    int ret;
    Error *local_err = NULL;
    logout("\n");
@@ -471,7 +472,12 @@ static int vdi_open(BlockDriverState *bs, QDict *options, int flags,
    error_setg(&s->migration_blocker, "The vdi format used by node '%s' "
               "does not support live migration",
               bdrv_get_device_or_node_name(bs));
-    migrate_add_blocker(s->migration_blocker);
+    ret = migrate_add_blocker(s->migration_blocker, &local_err);
    if (local_err) {
        error_propagate(errp, local_err);
        error_free(s->migration_blocker);
        goto fail_free_bmap;
    }
    qemu_co_mutex_init(&s->write_lock);
--- a/block/vhdx.c
+++ b/block/vhdx.c
@@ -991,6 +991,17 @@ static int vhdx_open(BlockDriverState *bs, QDict *options, int flags,
        }
    }
    /* Disable migration when VHDX images are used */
    error_setg(&s->migration_blocker, "The vhdx format used by node '%s' "
               "does not support live migration",
               bdrv_get_device_or_node_name(bs));
    ret = migrate_add_blocker(s->migration_blocker, &local_err);
    if (local_err) {
        error_propagate(errp, local_err);
        error_free(s->migration_blocker);
        goto fail;
    }
    if (flags & BDRV_O_RDWR) {
        ret = vhdx_update_headers(bs, s, false, NULL);
        if (ret < 0) {
@@ -1000,12 +1011,6 @@ static int vhdx_open(BlockDriverState *bs, QDict *options, int flags,
    /* TODO: differencing files */
    /* Disable migration when VHDX images are used */
    error_setg(&s->migration_blocker, "The vhdx format used by node '%s' "
               "does not support live migration",
               bdrv_get_device_or_node_name(bs));
    migrate_add_blocker(s->migration_blocker);
    return 0;
 fail:
    vhdx_close(bs);
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -941,6 +941,7 @@ static int vmdk_open(BlockDriverState *bs, QDict *options, int flags,
    int ret;
    BDRVVmdkState *s = bs->opaque;
    uint32_t magic;
    Error *local_err = NULL;
    buf = vmdk_read_desc(bs->file, 0, errp);
    if (!buf) {
@@ -976,7 +977,13 @@ static int vmdk_open(BlockDriverState *bs, QDict *options, int flags,
    error_setg(&s->migration_blocker, "The vmdk format used by node '%s' "
               "does not support live migration",
               bdrv_get_device_or_node_name(bs));
-    migrate_add_blocker(s->migration_blocker);
+    ret = migrate_add_blocker(s->migration_blocker, &local_err);
    if (local_err) {
        error_propagate(errp, local_err);
        error_free(s->migration_blocker);
        goto fail;
    }
    g_free(buf);
    return 0;
--- a/block/vpc.c
+++ b/block/vpc.c
@@ -422,13 +422,18 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
 #endif
    }
    qemu_co_mutex_init(&s->lock);
    /* Disable migration when VHD images are used */
    error_setg(&s->migration_blocker, "The vpc format used by node '%s' "
               "does not support live migration",
               bdrv_get_device_or_node_name(bs));
-    migrate_add_blocker(s->migration_blocker);
+    ret = migrate_add_blocker(s->migration_blocker, &local_err);
    if (local_err) {
        error_propagate(errp, local_err);
        error_free(s->migration_blocker);
        goto fail;
    }
    qemu_co_mutex_init(&s->lock);
    return 0;
--- a/block/vvfat.c
+++ b/block/vvfat.c
@@ -1185,22 +1185,26 @@ static int vvfat_open(BlockDriverState *bs, QDict *options, int flags,
    s->sector_count = s->faked_sectors + s->sectors_per_cluster*s->cluster_count;
    if (s->first_sectors_number == 0x40) {
        init_mbr(s, cyls, heads, secs);
    }
    //    assert(is_consistent(s));
    qemu_co_mutex_init(&s->lock);
    /* Disable migration when vvfat is used rw */
    if (s->qcow) {
        error_setg(&s->migration_blocker,
                   "The vvfat (rw) format used by node '%s' "
                   "does not support live migration",
                   bdrv_get_device_or_node_name(bs));
-        migrate_add_blocker(s->migration_blocker);
+        ret = migrate_add_blocker(s->migration_blocker, &local_err);
        if (local_err) {
            error_propagate(errp, local_err);
            error_free(s->migration_blocker);
            goto fail;
        }
    }
    if (s->first_sectors_number == 0x40) {
        init_mbr(s, cyls, heads, secs);
    }
    qemu_co_mutex_init(&s->lock);
    ret = 0;
 fail:
    qemu_opts_del(opts);
--- a/block/win32-aio.c
+++ b/block/win32-aio.c
@@ -175,7 +175,7 @@ int win32_aio_attach(QEMUWin32AIOState *aio, HANDLE hfile)
 void win32_aio_detach_aio_context(QEMUWin32AIOState *aio,
                                  AioContext *old_context)
 {
-    aio_set_event_notifier(old_context, &aio->e, false, NULL);
+    aio_set_event_notifier(old_context, &aio->e, false, NULL, NULL);
    aio->is_aio_context_attached = false;
 }
@@ -184,7 +184,7 @@ void win32_aio_attach_aio_context(QEMUWin32AIOState *aio,
 {
    aio->is_aio_context_attached = true;
    aio_set_event_notifier(new_context, &aio->e, false,
-                           win32_aio_completion_cb);
+                           win32_aio_completion_cb, NULL);
 }
 QEMUWin32AIOState *win32_aio_init(void)
--- a/blockdev-nbd.c
+++ b/blockdev-nbd.c
@@ -44,6 +44,7 @@ static gboolean nbd_accept(QIOChannel *ioc, GIOCondition condition,
        return TRUE;
    }
    qio_channel_set_name(QIO_CHANNEL(cioc), "nbd-server");
    nbd_client_new(NULL, cioc,
                   nbd_server->tlscreds, NULL,
                   nbd_client_put);
@@ -111,6 +112,8 @@ void qmp_nbd_server_start(SocketAddress *addr,
    nbd_server = g_new0(NBDServerData, 1);
    nbd_server->watch = -1;
    nbd_server->listen_ioc = qio_channel_socket_new();
    qio_channel_set_name(QIO_CHANNEL(nbd_server->listen_ioc),
                         "nbd-listener");
    if (qio_channel_socket_listen_sync(
            nbd_server->listen_ioc, addr, errp) < 0) {
        goto error;
--- a/blockdev.c
+++ b/blockdev.c
@@ -43,7 +43,7 @@
 #include "qapi/qmp/types.h"
 #include "qapi-visit.h"
 #include "qapi/qmp/qerror.h"
-#include "qapi/qmp-output-visitor.h"
+#include "qapi/qobject-output-visitor.h"
 #include "qapi/util.h"
 #include "sysemu/sysemu.h"
 #include "block/block_int.h"
@@ -1811,7 +1811,7 @@ typedef struct DriveBackupState {
    BlockJob *job;
 } DriveBackupState;
-static void do_drive_backup(DriveBackup *backup, BlockJobTxn *txn,
+static BlockJob *do_drive_backup(DriveBackup *backup, BlockJobTxn *txn,
                            Error **errp);
 static void drive_backup_prepare(BlkActionState *common, Error **errp)
@@ -1835,23 +1835,26 @@ static void drive_backup_prepare(BlkActionState *common, Error **errp)
    bdrv_drained_begin(bs);
    state->bs = bs;
-    do_drive_backup(backup, common->block_job_txn, &local_err);
+    state->job = do_drive_backup(backup, common->block_job_txn, &local_err);
    if (local_err) {
        error_propagate(errp, local_err);
        return;
    }
 }
-    state->job = state->bs->job;
+static void drive_backup_commit(BlkActionState *common)
 {
    DriveBackupState *state = DO_UPCAST(DriveBackupState, common, common);
    assert(state->job);
    block_job_start(state->job);
 }
 static void drive_backup_abort(BlkActionState *common)
 {
    DriveBackupState *state = DO_UPCAST(DriveBackupState, common, common);
    BlockDriverState *bs = state->bs;
-    /* Only cancel if it's the job we started */
+    if (state->job) {
-    if (bs && bs->job && bs->job == state->job) {
+        block_job_cancel_sync(state->job);
        block_job_cancel_sync(bs->job);
    }
 }
@@ -1872,8 +1875,8 @@ typedef struct BlockdevBackupState {
    AioContext *aio_context;
 } BlockdevBackupState;
-static void do_blockdev_backup(BlockdevBackup *backup, BlockJobTxn *txn,
+static BlockJob *do_blockdev_backup(BlockdevBackup *backup, BlockJobTxn *txn,
-                               Error **errp);
+                                    Error **errp);
 static void blockdev_backup_prepare(BlkActionState *common, Error **errp)
 {
@@ -1906,23 +1909,26 @@ static void blockdev_backup_prepare(BlkActionState *common, Error **errp)
    state->bs = bs;
    bdrv_drained_begin(state->bs);
-    do_blockdev_backup(backup, common->block_job_txn, &local_err);
+    state->job = do_blockdev_backup(backup, common->block_job_txn, &local_err);
    if (local_err) {
        error_propagate(errp, local_err);
        return;
    }
 }
-    state->job = state->bs->job;
+static void blockdev_backup_commit(BlkActionState *common)
 {
    BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, common);
    assert(state->job);
    block_job_start(state->job);
 }
 static void blockdev_backup_abort(BlkActionState *common)
 {
    BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, common);
    BlockDriverState *bs = state->bs;
-    /* Only cancel if it's the job we started */
+    if (state->job) {
-    if (bs && bs->job && bs->job == state->job) {
+        block_job_cancel_sync(state->job);
        block_job_cancel_sync(bs->job);
    }
 }
@@ -2072,12 +2078,14 @@ static const BlkActionOps actions[] = {
    [TRANSACTION_ACTION_KIND_DRIVE_BACKUP] = {
        .instance_size = sizeof(DriveBackupState),
        .prepare = drive_backup_prepare,
        .commit = drive_backup_commit,
        .abort = drive_backup_abort,
        .clean = drive_backup_clean,
    },
    [TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP] = {
        .instance_size = sizeof(BlockdevBackupState),
        .prepare = blockdev_backup_prepare,
        .commit = blockdev_backup_commit,
        .abort = blockdev_backup_abort,
        .clean = blockdev_backup_clean,
    },
@@ -2905,39 +2913,15 @@ out:
    aio_context_release(aio_context);
 }
 static void block_job_cb(void *opaque, int ret)
 {
    /* Note that this function may be executed from another AioContext besides
     * the QEMU main loop.  If you need to access anything that assumes the
     * QEMU global mutex, use a BH or introduce a mutex.
     */
    BlockDriverState *bs = opaque;
    const char *msg = NULL;
    trace_block_job_cb(bs, bs->job, ret);
    assert(bs->job);
    if (ret < 0) {
        msg = strerror(-ret);
    }
    if (block_job_is_cancelled(bs->job)) {
        block_job_event_cancelled(bs->job);
    } else {
        block_job_event_completed(bs->job, msg);
    }
 }
 void qmp_block_stream(bool has_job_id, const char *job_id, const char *device,
                      bool has_base, const char *base,
                      bool has_base_node, const char *base_node,
                      bool has_backing_file, const char *backing_file,
                      bool has_speed, int64_t speed,
                      bool has_on_error, BlockdevOnError on_error,
                      Error **errp)
 {
-    BlockDriverState *bs;
+    BlockDriverState *bs, *iter;
    BlockDriverState *base_bs = NULL;
    AioContext *aio_context;
    Error *local_err = NULL;
@@ -2947,7 +2931,7 @@ void qmp_block_stream(bool has_job_id, const char *job_id, const char *device,
        on_error = BLOCKDEV_ON_ERROR_REPORT;
    }
-    bs = qmp_get_root_bs(device, errp);
+    bs = bdrv_lookup_bs(device, device, errp);
    if (!bs) {
        return;
    }
@@ -2955,7 +2939,9 @@ void qmp_block_stream(bool has_job_id, const char *job_id, const char *device,
    aio_context = bdrv_get_aio_context(bs);
    aio_context_acquire(aio_context);
-    if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_STREAM, errp)) {
+    if (has_base && has_base_node) {
        error_setg(errp, "'base' and 'base-node' cannot be specified "
                   "at the same time");
        goto out;
    }
@@ -2969,6 +2955,27 @@ void qmp_block_stream(bool has_job_id, const char *job_id, const char *device,
        base_name = base;
    }
    if (has_base_node) {
        base_bs = bdrv_lookup_bs(NULL, base_node, errp);
        if (!base_bs) {
            goto out;
        }
        if (bs == base_bs || !bdrv_chain_contains(bs, base_bs)) {
            error_setg(errp, "Node '%s' is not a backing image of '%s'",
                       base_node, device);
            goto out;
        }
        assert(bdrv_get_aio_context(base_bs) == aio_context);
        base_name = base_bs->filename;
    }
    /* Check for op blockers in the whole chain between bs and base */
    for (iter = bs; iter && iter != base_bs; iter = backing_bs(iter)) {
        if (bdrv_op_is_blocked(iter, BLOCK_OP_TYPE_STREAM, errp)) {
            goto out;
        }
    }
    /* if we are streaming the entire chain, the result will have no backing
     * file, and specifying one is therefore an error */
    if (base_bs == NULL && has_backing_file) {
@@ -2981,7 +2988,7 @@ void qmp_block_stream(bool has_job_id, const char *job_id, const char *device,
    base_name = has_backing_file ? backing_file : base_name;
    stream_start(has_job_id ? job_id : NULL, bs, base_bs, base_name,
-                 has_speed ? speed : 0, on_error, block_job_cb, bs, &local_err);
+                 has_speed ? speed : 0, on_error, &local_err);
    if (local_err) {
        error_propagate(errp, local_err);
        goto out;
@@ -3001,6 +3008,7 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device,
                      Error **errp)
 {
    BlockDriverState *bs;
    BlockDriverState *iter;
    BlockDriverState *base_bs, *top_bs;
    AioContext *aio_context;
    Error *local_err = NULL;
@@ -3067,8 +3075,10 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device,
    assert(bdrv_get_aio_context(base_bs) == aio_context);
-    if (bdrv_op_is_blocked(base_bs, BLOCK_OP_TYPE_COMMIT_TARGET, errp)) {
+    for (iter = top_bs; iter != backing_bs(base_bs); iter = backing_bs(iter)) {
-        goto out;
+        if (bdrv_op_is_blocked(iter, BLOCK_OP_TYPE_COMMIT_TARGET, errp)) {
            goto out;
        }
    }
    /* Do not allow attempts to commit an image into itself */
@@ -3083,12 +3093,17 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device,
                             " but 'top' is the active layer");
            goto out;
        }
-        commit_active_start(has_job_id ? job_id : NULL, bs, base_bs, speed,
+        commit_active_start(has_job_id ? job_id : NULL, bs, base_bs,
-                            on_error, block_job_cb, bs, &local_err, false);
+                            BLOCK_JOB_DEFAULT, speed, on_error, NULL, NULL,
                            &local_err, false);
    } else {
        BlockDriverState *overlay_bs = bdrv_find_overlay(bs, top_bs);
        if (bdrv_op_is_blocked(overlay_bs, BLOCK_OP_TYPE_COMMIT_TARGET, errp)) {
            goto out;
        }
        commit_start(has_job_id ? job_id : NULL, bs, base_bs, top_bs, speed,
-                     on_error, block_job_cb, bs,
+                     on_error, has_backing_file ? backing_file : NULL,
-                     has_backing_file ? backing_file : NULL, &local_err);
+                     &local_err);
    }
    if (local_err != NULL) {
        error_propagate(errp, local_err);
@@ -3099,11 +3114,13 @@ out:
    aio_context_release(aio_context);
 }
-static void do_drive_backup(DriveBackup *backup, BlockJobTxn *txn, Error **errp)
+static BlockJob *do_drive_backup(DriveBackup *backup, BlockJobTxn *txn,
                                 Error **errp)
 {
    BlockDriverState *bs;
    BlockDriverState *target_bs;
    BlockDriverState *source = NULL;
    BlockJob *job = NULL;
    BdrvDirtyBitmap *bmap = NULL;
    AioContext *aio_context;
    QDict *options = NULL;
@@ -3132,7 +3149,7 @@ static void do_drive_backup(DriveBackup *backup, BlockJobTxn *txn, Error **errp)
    bs = qmp_get_root_bs(backup->device, errp);
    if (!bs) {
-        return;
+        return NULL;
    }
    aio_context = bdrv_get_aio_context(bs);
@@ -3206,9 +3223,10 @@ static void do_drive_backup(DriveBackup *backup, BlockJobTxn *txn, Error **errp)
        }
    }
-    backup_start(backup->job_id, bs, target_bs, backup->speed, backup->sync,
+    job = backup_job_create(backup->job_id, bs, target_bs, backup->speed,
-                 bmap, backup->compress, backup->on_source_error,
+                            backup->sync, bmap, backup->compress,
-                 backup->on_target_error, block_job_cb, bs, txn, &local_err);
+                            backup->on_source_error, backup->on_target_error,
                            BLOCK_JOB_DEFAULT, NULL, NULL, txn, &local_err);
    bdrv_unref(target_bs);
    if (local_err != NULL) {
        error_propagate(errp, local_err);
@@ -3217,11 +3235,17 @@ static void do_drive_backup(DriveBackup *backup, BlockJobTxn *txn, Error **errp)
 out:
    aio_context_release(aio_context);
    return job;
 }
 void qmp_drive_backup(DriveBackup *arg, Error **errp)
 {
-    return do_drive_backup(arg, NULL, errp);
+
    BlockJob *job;
    job = do_drive_backup(arg, NULL, errp);
    if (job) {
        block_job_start(job);
    }
 }
 BlockDeviceInfoList *qmp_query_named_block_nodes(Error **errp)
@@ -3229,12 +3253,14 @@ BlockDeviceInfoList *qmp_query_named_block_nodes(Error **errp)
    return bdrv_named_nodes_list(errp);
 }
-void do_blockdev_backup(BlockdevBackup *backup, BlockJobTxn *txn, Error **errp)
+BlockJob *do_blockdev_backup(BlockdevBackup *backup, BlockJobTxn *txn,
                             Error **errp)
 {
    BlockDriverState *bs;
    BlockDriverState *target_bs;
    Error *local_err = NULL;
    AioContext *aio_context;
    BlockJob *job = NULL;
    if (!backup->has_speed) {
        backup->speed = 0;
@@ -3254,7 +3280,7 @@ void do_blockdev_backup(BlockdevBackup *backup, BlockJobTxn *txn, Error **errp)
    bs = qmp_get_root_bs(backup->device, errp);
    if (!bs) {
-        return;
+        return NULL;
    }
    aio_context = bdrv_get_aio_context(bs);
@@ -3276,19 +3302,25 @@ void do_blockdev_backup(BlockdevBackup *backup, BlockJobTxn *txn, Error **errp)
            goto out;
        }
    }
-    backup_start(backup->job_id, bs, target_bs, backup->speed, backup->sync,
+    job = backup_job_create(backup->job_id, bs, target_bs, backup->speed,
-                 NULL, backup->compress, backup->on_source_error,
+                            backup->sync, NULL, backup->compress,
-                 backup->on_target_error, block_job_cb, bs, txn, &local_err);
+                            backup->on_source_error, backup->on_target_error,
                            BLOCK_JOB_DEFAULT, NULL, NULL, txn, &local_err);
    if (local_err != NULL) {
        error_propagate(errp, local_err);
    }
 out:
    aio_context_release(aio_context);
    return job;
 }
 void qmp_blockdev_backup(BlockdevBackup *arg, Error **errp)
 {
-    do_blockdev_backup(arg, NULL, errp);
+    BlockJob *job;
    job = do_blockdev_backup(arg, NULL, errp);
    if (job) {
        block_job_start(job);
    }
 }
 /* Parameter check and block job starting for drive mirroring.
@@ -3357,8 +3389,7 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
    mirror_start(job_id, bs, target,
                 has_replaces ? replaces : NULL,
                 speed, granularity, buf_size, sync, backing_mode,
-                 on_source_error, on_target_error, unmap,
+                 on_source_error, on_target_error, unmap, errp);
                 block_job_cb, bs, errp);
 }
 void qmp_drive_mirror(DriveMirror *arg, Error **errp)
@@ -3602,7 +3633,7 @@ void qmp_block_job_cancel(const char *device,
        force = false;
    }
-    if (job->user_paused && !force) {
+    if (block_job_user_paused(job) && !force) {
        error_setg(errp, "The block job for device '%s' is currently paused",
                   device);
        goto out;
@@ -3619,13 +3650,12 @@ void qmp_block_job_pause(const char *device, Error **errp)
    AioContext *aio_context;
    BlockJob *job = find_block_job(device, &aio_context, errp);
-    if (!job || job->user_paused) {
+    if (!job || block_job_user_paused(job)) {
        return;
    }
    job->user_paused = true;
    trace_qmp_block_job_pause(job);
-    block_job_pause(job);
+    block_job_user_pause(job);
    aio_context_release(aio_context);
 }
@@ -3634,14 +3664,13 @@ void qmp_block_job_resume(const char *device, Error **errp)
    AioContext *aio_context;
    BlockJob *job = find_block_job(device, &aio_context, errp);
-    if (!job || !job->user_paused) {
+    if (!job || !block_job_user_paused(job)) {
        return;
    }
    job->user_paused = false;
    trace_qmp_block_job_resume(job);
    block_job_iostatus_reset(job);
-    block_job_resume(job);
+    block_job_user_resume(job);
    aio_context_release(aio_context);
 }
@@ -3776,7 +3805,7 @@ void qmp_blockdev_add(BlockdevOptions *options, Error **errp)
 {
    BlockDriverState *bs;
    QObject *obj;
-    Visitor *v = qmp_output_visitor_new(&obj);
+    Visitor *v = qobject_output_visitor_new(&obj);
    QDict *qdict;
    Error *local_err = NULL;
@@ -3915,13 +3944,22 @@ BlockJobInfoList *qmp_query_block_jobs(Error **errp)
    BlockJob *job;
    for (job = block_job_next(NULL); job; job = block_job_next(job)) {
-        BlockJobInfoList *elem = g_new0(BlockJobInfoList, 1);
+        BlockJobInfoList *elem;
-        AioContext *aio_context = blk_get_aio_context(job->blk);
+        AioContext *aio_context;
        if (block_job_is_internal(job)) {
            continue;
        }
        elem = g_new0(BlockJobInfoList, 1);
        aio_context = blk_get_aio_context(job->blk);
        aio_context_acquire(aio_context);
-        elem->value = block_job_query(job);
+        elem->value = block_job_query(job, errp);
        aio_context_release(aio_context);
-
+        if (!elem->value) {
            g_free(elem);
            qapi_free_BlockJobInfoList(head);
            return NULL;
        }
        *p_next = elem;
        p_next = &elem->next;
    }
--- a/blockjob.c
+++ b/blockjob.c
@@ -27,7 +27,7 @@
 #include "qemu-common.h"
 #include "trace.h"
 #include "block/block.h"
-#include "block/blockjob.h"
+#include "block/blockjob_int.h"
 #include "block/block_int.h"
 #include "sysemu/block-backend.h"
 #include "qapi/qmp/qerror.h"
@@ -38,6 +38,9 @@
 #include "qemu/timer.h"
 #include "qapi-event.h"
 static void block_job_event_cancelled(BlockJob *job);
 static void block_job_event_completed(BlockJob *job, const char *msg);
 /* Transactional group of block jobs */
 struct BlockJobTxn {
@@ -66,7 +69,7 @@ BlockJob *block_job_get(const char *id)
    BlockJob *job;
    QLIST_FOREACH(job, &block_jobs, job_list) {
-        if (!strcmp(id, job->id)) {
+        if (job->id && !strcmp(id, job->id)) {
            return job;
        }
    }
@@ -74,17 +77,6 @@ BlockJob *block_job_get(const char *id)
    return NULL;
 }
 /* Normally the job runs in its BlockBackend's AioContext.  The exception is
 * block_job_defer_to_main_loop() where it runs in the QEMU main loop.  Code
 * that supports both cases uses this helper function.
 */
 static AioContext *block_job_get_aio_context(BlockJob *job)
 {
    return job->deferred_to_main_loop ?
           qemu_get_aio_context() :
           blk_get_aio_context(job->blk);
 }
 static void block_job_attached_aio_context(AioContext *new_context,
                                           void *opaque)
 {
@@ -97,6 +89,17 @@ static void block_job_attached_aio_context(AioContext *new_context,
    block_job_resume(job);
 }
 static void block_job_drain(BlockJob *job)
 {
    /* If job is !job->busy this kicks it into the next pause point. */
    block_job_enter(job);
    blk_drain(job->blk);
    if (job->driver->drain) {
        job->driver->drain(job);
    }
 }
 static void block_job_detach_aio_context(void *opaque)
 {
    BlockJob *job = opaque;
@@ -106,31 +109,33 @@ static void block_job_detach_aio_context(void *opaque)
    block_job_pause(job);
    if (!job->paused) {
        /* If job is !job->busy this kicks it into the next pause point. */
        block_job_enter(job);
    }
    while (!job->paused && !job->completed) {
-        aio_poll(block_job_get_aio_context(job), true);
+        block_job_drain(job);
    }
    block_job_unref(job);
 }
 void block_job_add_bdrv(BlockJob *job, BlockDriverState *bs)
 {
    job->nodes = g_slist_prepend(job->nodes, bs);
    bdrv_ref(bs);
    bdrv_op_block_all(bs, job->blocker);
 }
 void *block_job_create(const char *job_id, const BlockJobDriver *driver,
-                       BlockDriverState *bs, int64_t speed,
+                       BlockDriverState *bs, int64_t speed, int flags,
                       BlockCompletionFunc *cb, void *opaque, Error **errp)
 {
    BlockBackend *blk;
    BlockJob *job;
    assert(cb);
    if (bs->job) {
        error_setg(errp, QERR_DEVICE_IN_USE, bdrv_get_device_name(bs));
        return NULL;
    }
-    if (job_id == NULL) {
+    if (job_id == NULL && !(flags & BLOCK_JOB_INTERNAL)) {
        job_id = bdrv_get_device_name(bs);
        if (!*job_id) {
            error_setg(errp, "An explicit job ID is required for this node");
@@ -138,14 +143,21 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver,
        }
    }
-    if (!id_wellformed(job_id)) {
+    if (job_id) {
-        error_setg(errp, "Invalid job ID '%s'", job_id);
+        if (flags & BLOCK_JOB_INTERNAL) {
-        return NULL;
+            error_setg(errp, "Cannot specify job ID for internal block job");
-    }
+            return NULL;
        }
-    if (block_job_get(job_id)) {
+        if (!id_wellformed(job_id)) {
-        error_setg(errp, "Job ID '%s' already in use", job_id);
+            error_setg(errp, "Invalid job ID '%s'", job_id);
-        return NULL;
+            return NULL;
        }
        if (block_job_get(job_id)) {
            error_setg(errp, "Job ID '%s' already in use", job_id);
            return NULL;
        }
    }
    blk = blk_new();
@@ -154,7 +166,7 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver,
    job = g_malloc0(driver->instance_size);
    error_setg(&job->blocker, "block device is in use by block job: %s",
               BlockJobType_lookup[driver->job_type]);
-    bdrv_op_block_all(bs, job->blocker);
+    block_job_add_bdrv(job, bs);
    bdrv_op_unblock(bs, BLOCK_OP_TYPE_DATAPLANE, job->blocker);
    job->driver        = driver;
@@ -162,7 +174,9 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver,
    job->blk           = blk;
    job->cb            = cb;
    job->opaque        = opaque;
-    job->busy          = true;
+    job->busy          = false;
    job->paused        = true;
    job->pause_count   = 1;
    job->refcnt        = 1;
    bs->job = job;
@@ -185,6 +199,28 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver,
    return job;
 }
 bool block_job_is_internal(BlockJob *job)
 {
    return (job->id == NULL);
 }
 static bool block_job_started(BlockJob *job)
 {
    return job->co;
 }
 void block_job_start(BlockJob *job)
 {
    assert(job && !block_job_started(job) && job->paused &&
           !job->busy && job->driver->start);
    job->co = qemu_coroutine_create(job->driver->start, job);
    if (--job->pause_count == 0) {
        job->paused = false;
        job->busy = true;
        qemu_coroutine_enter(job->co);
    }
 }
 void block_job_ref(BlockJob *job)
 {
    ++job->refcnt;
@@ -193,9 +229,15 @@ void block_job_ref(BlockJob *job)
 void block_job_unref(BlockJob *job)
 {
    if (--job->refcnt == 0) {
        GSList *l;
        BlockDriverState *bs = blk_bs(job->blk);
        bs->job = NULL;
-        bdrv_op_unblock_all(bs, job->blocker);
+        for (l = job->nodes; l; l = l->next) {
            bs = l->data;
            bdrv_op_unblock_all(bs, job->blocker);
            bdrv_unref(bs);
        }
        g_slist_free(job->nodes);
        blk_remove_aio_context_notifier(job->blk,
                                        block_job_attached_aio_context,
                                        block_job_detach_aio_context, job);
@@ -218,8 +260,29 @@ static void block_job_completed_single(BlockJob *job)
            job->driver->abort(job);
        }
    }
-    job->cb(job->opaque, job->ret);
+    if (job->driver->clean) {
        job->driver->clean(job);
    }
    if (job->cb) {
        job->cb(job->opaque, job->ret);
    }
    /* Emit events only if we actually started */
    if (block_job_started(job)) {
        if (block_job_is_cancelled(job)) {
            block_job_event_cancelled(job);
        } else {
            const char *msg = NULL;
            if (job->ret < 0) {
                msg = strerror(-job->ret);
            }
            block_job_event_completed(job, msg);
        }
    }
    if (job->txn) {
        QLIST_REMOVE(job, txn_list);
        block_job_txn_unref(job->txn);
    }
    block_job_unref(job);
@@ -321,7 +384,10 @@ void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp)
 void block_job_complete(BlockJob *job, Error **errp)
 {
-    if (job->pause_count || job->cancelled || !job->driver->complete) {
+    /* Should not be reachable via external interface for internal jobs */
    assert(job->id);
    if (job->pause_count || job->cancelled ||
        !block_job_started(job) || !job->driver->complete) {
        error_setg(errp, "The active block job '%s' cannot be completed",
                   job->id);
        return;
@@ -335,13 +401,26 @@ void block_job_pause(BlockJob *job)
    job->pause_count++;
 }
 void block_job_user_pause(BlockJob *job)
 {
    job->user_paused = true;
    block_job_pause(job);
 }
 static bool block_job_should_pause(BlockJob *job)
 {
    return job->pause_count > 0;
 }
 bool block_job_user_paused(BlockJob *job)
 {
    return job ? job->user_paused : 0;
 }
 void coroutine_fn block_job_pause_point(BlockJob *job)
 {
    assert(job && block_job_started(job));
    if (!block_job_should_pause(job)) {
        return;
    }
@@ -376,6 +455,14 @@ void block_job_resume(BlockJob *job)
    block_job_enter(job);
 }
 void block_job_user_resume(BlockJob *job)
 {
    if (job && job->user_paused && job->pause_count > 0) {
        job->user_paused = false;
        block_job_resume(job);
    }
 }
 void block_job_enter(BlockJob *job)
 {
    if (job->co && !job->busy) {
@@ -385,9 +472,13 @@ void block_job_enter(BlockJob *job)
 void block_job_cancel(BlockJob *job)
 {
-    job->cancelled = true;
+    if (block_job_started(job)) {
-    block_job_iostatus_reset(job);
+        job->cancelled = true;
-    block_job_enter(job);
+        block_job_iostatus_reset(job);
        block_job_enter(job);
    } else {
        block_job_completed(job, -ECANCELED);
    }
 }
 bool block_job_is_cancelled(BlockJob *job)
@@ -413,14 +504,21 @@ static int block_job_finish_sync(BlockJob *job,
    assert(blk_bs(job->blk)->job == job);
    block_job_ref(job);
    finish(job, &local_err);
    if (local_err) {
        error_propagate(errp, local_err);
        block_job_unref(job);
        return -EBUSY;
    }
    /* block_job_drain calls block_job_enter, and it should be enough to
     * induce progress until the job completes or moves to the main thread.
    */
    while (!job->deferred_to_main_loop && !job->completed) {
        block_job_drain(job);
    }
    while (!job->completed) {
-        aio_poll(block_job_get_aio_context(job), true);
+        aio_poll(qemu_get_aio_context(), true);
    }
    ret = (job->cancelled && job->ret == 0) ? -ECANCELED : job->ret;
    block_job_unref(job);
@@ -494,9 +592,15 @@ void block_job_yield(BlockJob *job)
    block_job_pause_point(job);
 }
-BlockJobInfo *block_job_query(BlockJob *job)
+BlockJobInfo *block_job_query(BlockJob *job, Error **errp)
 {
-    BlockJobInfo *info = g_new0(BlockJobInfo, 1);
+    BlockJobInfo *info;
    if (block_job_is_internal(job)) {
        error_setg(errp, "Cannot query QEMU internal jobs");
        return NULL;
    }
    info = g_new0(BlockJobInfo, 1);
    info->type      = g_strdup(BlockJobType_lookup[job->driver->job_type]);
    info->device    = g_strdup(job->id);
    info->len       = job->len;
@@ -517,8 +621,12 @@ static void block_job_iostatus_set_err(BlockJob *job, int error)
    }
 }
-void block_job_event_cancelled(BlockJob *job)
+static void block_job_event_cancelled(BlockJob *job)
 {
    if (block_job_is_internal(job)) {
        return;
    }
    qapi_event_send_block_job_cancelled(job->driver->job_type,
                                        job->id,
                                        job->len,
@@ -527,8 +635,12 @@ void block_job_event_cancelled(BlockJob *job)
                                        &error_abort);
 }
-void block_job_event_completed(BlockJob *job, const char *msg)
+static void block_job_event_completed(BlockJob *job, const char *msg)
 {
    if (block_job_is_internal(job)) {
        return;
    }
    qapi_event_send_block_job_completed(job->driver->job_type,
                                        job->id,
                                        job->len,
@@ -543,6 +655,10 @@ void block_job_event_ready(BlockJob *job)
 {
    job->ready = true;
    if (block_job_is_internal(job)) {
        return;
    }
    qapi_event_send_block_job_ready(job->driver->job_type,
                                    job->id,
                                    job->len,
@@ -573,14 +689,15 @@ BlockErrorAction block_job_error_action(BlockJob *job, BlockdevOnError on_err,
    default:
        abort();
    }
-    qapi_event_send_block_job_error(job->id,
+    if (!block_job_is_internal(job)) {
-                                    is_read ? IO_OPERATION_TYPE_READ :
+        qapi_event_send_block_job_error(job->id,
-                                    IO_OPERATION_TYPE_WRITE,
+                                        is_read ? IO_OPERATION_TYPE_READ :
-                                    action, &error_abort);
+                                        IO_OPERATION_TYPE_WRITE,
                                        action, &error_abort);
    }
    if (action == BLOCK_ERROR_ACTION_STOP) {
        /* make the pause user visible, which will be resumed from QMP. */
-        job->user_paused = true;
+        block_job_user_pause(job);
        block_job_pause(job);
        block_job_iostatus_set_err(job, error);
    }
    return action;
--- a/bsd-user/main.c
+++ b/bsd-user/main.c
@@ -651,7 +651,7 @@ void cpu_loop(CPUSPARCState *env)
 static void usage(void)
 {
    printf("qemu-" TARGET_NAME " version " QEMU_VERSION QEMU_PKGVERSION
-           ", " QEMU_COPYRIGHT "\n"
+           "\n" QEMU_COPYRIGHT "\n"
           "usage: qemu-" TARGET_NAME " [options] program [arguments...]\n"
           "BSD CPU emulator (compiled for %s emulation)\n"
           "\n"
--- a/bsd-user/mmap.c
+++ b/bsd-user/mmap.c
@@ -42,6 +42,11 @@ void mmap_unlock(void)
    }
 }
 bool have_mmap_lock(void)
 {
    return mmap_lock_count > 0 ? true : false;
 }
 /* Grab lock to make sure things are in a consistent state after fork().  */
 void mmap_fork_start(void)
 {
--- a/231
+++ b/231
@@ -28,8 +28,6 @@ TMPB="qemu-conf"
 TMPC="${TMPDIR1}/${TMPB}.c"
 TMPO="${TMPDIR1}/${TMPB}.o"
 TMPCXX="${TMPDIR1}/${TMPB}.cxx"
 TMPL="${TMPDIR1}/${TMPB}.lo"
 TMPA="${TMPDIR1}/lib${TMPB}.la"
 TMPE="${TMPDIR1}/${TMPB}.exe"
 TMPMO="${TMPDIR1}/${TMPB}.mo"
@@ -230,6 +228,7 @@ vhost_net="no"
 vhost_scsi="no"
 vhost_vsock="no"
 kvm="no"
 hax="no"
 rdma=""
 gprof="no"
 debug_tcg="no"
@@ -312,6 +311,7 @@ gnutls_rnd=""
 nettle=""
 nettle_kdf="no"
 gcrypt=""
 gcrypt_hmac="no"
 gcrypt_kdf="no"
 vte=""
 virglrenderer=""
@@ -562,6 +562,7 @@ CYGWIN*)
 ;;
 MINGW32*)
  mingw32="yes"
  hax="yes"
  audio_possible_drivers="dsound sdl"
  if check_include dsound.h; then
    audio_drv_list="dsound"
@@ -581,6 +582,8 @@ FreeBSD)
  audio_possible_drivers="oss sdl pa"
  # needed for kinfo_getvmmap(3) in libutil.h
  LIBS="-lutil $LIBS"
  # needed for kinfo_getproc
  libs_qga="-lutil $libs_qga"
  netmap=""  # enable netmap autodetect
  HOST_VARIANT_DIR="freebsd"
 ;;
@@ -609,6 +612,7 @@ OpenBSD)
 Darwin)
  bsd="yes"
  darwin="yes"
  hax="yes"
  LDFLAGS_SHARED="-bundle -undefined dynamic_lookup"
  if [ "$cpu" = "x86_64" ] ; then
    QEMU_CFLAGS="-arch x86_64 $QEMU_CFLAGS"
@@ -918,6 +922,10 @@ for opt do
  ;;
  --enable-kvm) kvm="yes"
  ;;
  --disable-hax) hax="no"
  ;;
  --enable-hax) hax="yes"
  ;;
  --disable-tcg-interpreter) tcg_interpreter="no"
  ;;
  --enable-tcg-interpreter) tcg_interpreter="yes"
@@ -1216,7 +1224,10 @@ case "$cpu" in
           cc_i386='$(CC) -m32'
           ;;
    x86_64)
-           CPU_CFLAGS="-m64"
+           # ??? Only extremely old AMD cpus do not have cmpxchg16b.
           # If we truly care, we should simply detect this case at
           # runtime and generate the fallback to serial emulation.
           CPU_CFLAGS="-m64 -mcx16"
           LDFLAGS="-m64 $LDFLAGS"
           cc_i386='$(CC) -m32'
           ;;
@@ -1363,6 +1374,7 @@ disabled with --disable-FEATURE, default is enabled if available:
  fdt             fdt device tree
  bluez           bluez stack connectivity
  kvm             KVM acceleration support
  hax             HAX acceleration support
  rdma            RDMA-based migration support
  vde             support for vde network
  netmap          support for netmap network
@@ -2406,6 +2418,19 @@ EOF
        if compile_prog "$gcrypt_cflags" "$gcrypt_libs" ; then
            gcrypt_kdf=yes
        fi
        cat > $TMPC << EOF
 #include <gcrypt.h>
 int main(void) {
  gcry_mac_hd_t handle;
  gcry_mac_open(&handle, GCRY_MAC_HMAC_MD5,
                GCRY_MAC_FLAG_SECURE, NULL);
  return 0;
 }
 EOF
        if compile_prog "$gcrypt_cflags" "$gcrypt_libs" ; then
            gcrypt_hmac=yes
        fi
    else
        if test "$gcrypt" = "yes"; then
            feature_not_found "gcrypt" "Install gcrypt devel"
@@ -2727,7 +2752,7 @@ if compile_prog "" "" ; then
 fi
 ##########################################
-# xfsctl() probe, used for raw-posix
+# xfsctl() probe, used for file-posix.c
 if test "$xfs" != "no" ; then
  cat > $TMPC << EOF
 #include <stddef.h>  /* NULL */
@@ -2914,25 +2939,41 @@ fi
 # curses probe
 if test "$curses" != "no" ; then
  if test "$mingw32" = "yes" ; then
-    curses_list="$($pkg_config --libs ncurses 2>/dev/null):-lpdcurses"
+    curses_inc_list="$($pkg_config --cflags ncurses 2>/dev/null):"
    curses_lib_list="$($pkg_config --libs ncurses 2>/dev/null):-lpdcurses"
  else
-    curses_list="$($pkg_config --libs ncurses 2>/dev/null):-lncurses:-lcurses"
+    curses_inc_list="$($pkg_config --cflags ncursesw 2>/dev/null):-I/usr/include/ncursesw:"
    curses_lib_list="$($pkg_config --libs ncursesw 2>/dev/null):-lncursesw:-lcursesw"
  fi
  curses_found=no
  cat > $TMPC << EOF
 #include <locale.h>
 #include <curses.h>
 #include <wchar.h>
 int main(void) {
  const char *s = curses_version();
  wchar_t wch = L'w';
  setlocale(LC_ALL, "");
  resize_term(0, 0);
  addwstr(L"wide chars\n");
  addnwstr(&wch, 1);
  add_wch(WACS_DEGREE);
  return s != 0;
 }
 EOF
  IFS=:
-  for curses_lib in $curses_list; do
+  for curses_inc in $curses_inc_list; do
-    unset IFS
+    IFS=:
-    if compile_prog "" "$curses_lib" ; then
+    for curses_lib in $curses_lib_list; do
-      curses_found=yes
+      unset IFS
-      libs_softmmu="$curses_lib $libs_softmmu"
+      if compile_prog "$curses_inc" "$curses_lib" ; then
        curses_found=yes
        QEMU_CFLAGS="$curses_inc $QEMU_CFLAGS"
        libs_softmmu="$curses_lib $libs_softmmu"
        break
      fi
    done
    if test "$curses_found" = yes ; then
      break
    fi
  done
@@ -3038,7 +3079,7 @@ fi
 # g_test_trap_subprocess added in 2.38. Used by some tests.
 glib_subprocess=yes
-if test "$mingw32" = "yes" || ! $pkg_config --atleast-version=2.38 glib-2.0; then
+if ! $pkg_config --atleast-version=2.38 glib-2.0; then
    glib_subprocess=no
 fi
@@ -3911,6 +3952,36 @@ if compile_prog "" "" ; then
  setns=yes
 fi
 # clock_adjtime probe
 clock_adjtime=no
 cat > $TMPC <<EOF
 #include <time.h>
 int main(void)
 {
    return clock_adjtime(0, 0);
 }
 EOF
 clock_adjtime=no
 if compile_prog "" "" ; then
  clock_adjtime=yes
 fi
 # syncfs probe
 syncfs=no
 cat > $TMPC <<EOF
 #include <unistd.h>
 int main(void)
 {
    return syncfs(0);
 }
 EOF
 syncfs=no
 if compile_prog "" "" ; then
  syncfs=yes
 fi
 # Check if tools are available to build documentation.
 if test "$docs" != "no" ; then
  if has makeinfo && has pod2man; then
@@ -4248,11 +4319,11 @@ if have_backend "ust"; then
 #include <lttng/tracepoint.h>
 int main(void) { return 0; }
 EOF
-  if compile_prog "" "" ; then
+  if compile_prog "" "-Wl,--no-as-needed -ldl" ; then
    if $pkg_config lttng-ust --exists; then
      lttng_ust_libs=$($pkg_config --libs lttng-ust)
    else
-      lttng_ust_libs="-llttng-ust"
+      lttng_ust_libs="-llttng-ust -ldl"
    fi
    if $pkg_config liburcu-bp --exists; then
      urcu_bp_libs=$($pkg_config --libs liburcu-bp)
@@ -4491,6 +4562,55 @@ if compile_prog "" "" ; then
    int128=yes
 fi
 #########################################
 # See if 128-bit atomic operations are supported.
 atomic128=no
 if test "$int128" = "yes"; then
  cat > $TMPC << EOF
 int main(void)
 {
  unsigned __int128 x = 0, y = 0;
  y = __atomic_load_16(&x, 0);
  __atomic_store_16(&x, y, 0);
  __atomic_compare_exchange_16(&x, &y, x, 0, 0, 0);
  return 0;
 }
 EOF
  if compile_prog "" "" ; then
    atomic128=yes
  fi
 fi
 #########################################
 # See if 64-bit atomic operations are supported.
 # Note that without __atomic builtins, we can only
 # assume atomic loads/stores max at pointer size.
 cat > $TMPC << EOF
 #include <stdint.h>
 int main(void)
 {
  uint64_t x = 0, y = 0;
 #ifdef __ATOMIC_RELAXED
  y = __atomic_load_8(&x, 0);
  __atomic_store_8(&x, y, 0);
  __atomic_compare_exchange_8(&x, &y, x, 0, 0, 0);
  __atomic_exchange_8(&x, y, 0);
  __atomic_fetch_add_8(&x, y, 0);
 #else
  typedef char is_host64[sizeof(void *) >= sizeof(uint64_t) ? 1 : -1];
  __sync_lock_test_and_set(&x, y);
  __sync_val_compare_and_swap(&x, y, 0);
  __sync_fetch_and_add(&x, y);
 #endif
  return 0;
 }
 EOF
 if compile_prog "" "" ; then
  atomic64=yes
 fi
 ########################################
 # check if getauxval is available.
@@ -4575,6 +4695,33 @@ if compile_prog "" "" ; then
    have_rtnetlink=yes
 fi
 ##########################################
 # check for usable AF_VSOCK environment
 have_af_vsock=no
 cat > $TMPC << EOF
 #include <errno.h>
 #include <sys/types.h>
 #include <sys/socket.h>
 #if !defined(AF_VSOCK)
 # error missing AF_VSOCK flag
 #endif
 #include <linux/vm_sockets.h>
 int main(void) {
    int sock, ret;
    struct sockaddr_vm svm;
    socklen_t len = sizeof(svm);
    sock = socket(AF_VSOCK, SOCK_STREAM, 0);
    ret = getpeername(sock, (struct sockaddr *)&svm, &len);
    if ((ret == -1) && (errno == ENOTCONN)) {
        return 0;
    }
    return -1;
 }
 EOF
 if compile_prog "" "" ; then
    have_af_vsock=yes
 fi
 #################################################
 # Sparc implicitly links with --relax, which is
 # incompatible with -r, so --no-relax should be
@@ -4590,8 +4737,14 @@ EOF
 if ! compile_object ""; then
  error_exit "Failed to compile object file for LD_REL_FLAGS test"
 fi
-if do_cc -nostdlib -Wl,-r -Wl,--no-relax -o $TMPMO $TMPO; then
+for i in '-Wl,-r -Wl,--no-relax' -Wl,-r -r; do
-  LD_REL_FLAGS="-Wl,--no-relax"
+  if do_cc -nostdlib $i -o $TMPMO $TMPO; then
    LD_REL_FLAGS=$i
    break
  fi
 done
 if test "$modules" = "yes" && test "$LD_REL_FLAGS" = ""; then
  feature_not_found "modules" "Cannot find how to build relocatable objects"
 fi
 ##########################################
@@ -4911,6 +5064,7 @@ echo "Linux AIO support $linux_aio"
 echo "ATTR/XATTR support $attr"
 echo "Install blobs     $blobs"
 echo "KVM support       $kvm"
 echo "HAX support       $hax"
 echo "RDMA support      $rdma"
 echo "TCG interpreter   $tcg_interpreter"
 echo "fdt support       $fdt"
@@ -5196,6 +5350,12 @@ fi
 if test "$setns" = "yes" ; then
  echo "CONFIG_SETNS=y" >> $config_host_mak
 fi
 if test "$clock_adjtime" = "yes" ; then
  echo "CONFIG_CLOCK_ADJTIME=y" >> $config_host_mak
 fi
 if test "$syncfs" = "yes" ; then
  echo "CONFIG_SYNCFS=y" >> $config_host_mak
 fi
 if test "$inotify" = "yes" ; then
  echo "CONFIG_INOTIFY=y" >> $config_host_mak
 fi
@@ -5241,6 +5401,9 @@ if test "$gnutls_rnd" = "yes" ; then
 fi
 if test "$gcrypt" = "yes" ; then
  echo "CONFIG_GCRYPT=y" >> $config_host_mak
  if test "$gcrypt_hmac" = "yes" ; then
    echo "CONFIG_GCRYPT_HMAC=y" >> $config_host_mak
  fi
  if test "$gcrypt_kdf" = "yes" ; then
    echo "CONFIG_GCRYPT_KDF=y" >> $config_host_mak
  fi
@@ -5447,6 +5610,14 @@ if test "$int128" = "yes" ; then
  echo "CONFIG_INT128=y" >> $config_host_mak
 fi
 if test "$atomic128" = "yes" ; then
  echo "CONFIG_ATOMIC128=y" >> $config_host_mak
 fi
 if test "$atomic64" = "yes" ; then
  echo "CONFIG_ATOMIC64=y" >> $config_host_mak
 fi
 if test "$getauxval" = "yes" ; then
  echo "CONFIG_GETAUXVAL=y" >> $config_host_mak
 fi
@@ -5544,6 +5715,10 @@ if test "$replication" = "yes" ; then
  echo "CONFIG_REPLICATION=y" >> $config_host_mak
 fi
 if test "$have_af_vsock" = "yes" ; then
  echo "CONFIG_AF_VSOCK=y" >> $config_host_mak
 fi
 # Hold two types of flag:
 #   CONFIG_THREAD_SETNAME_BYTHREAD  - we've got a way of setting the name on
 #                                     a thread we have a handle to
@@ -5668,7 +5843,7 @@ target_name=$(echo $target | cut -d '-' -f 1)
 target_bigendian="no"
 case "$target_name" in
-  armeb|lm32|m68k|microblaze|mips|mipsn32|mips64|moxie|or32|ppc|ppcemb|ppc64|ppc64abi32|s390x|sh4eb|sparc|sparc64|sparc32plus|xtensaeb)
+  armeb|hppa|lm32|m68k|microblaze|mips|mipsn32|mips64|moxie|or32|ppc|ppcemb|ppc64|ppc64abi32|s390x|sh4eb|sparc|sparc64|sparc32plus|xtensaeb)
  target_bigendian=yes
  ;;
 esac
@@ -5731,6 +5906,8 @@ case "$target_name" in
  ;;
  cris)
  ;;
  hppa)
  ;;
  lm32)
  ;;
  m68k)
@@ -5758,6 +5935,8 @@ case "$target_name" in
  ;;
  moxie)
  ;;
  nios2)
  ;;
  or32)
    TARGET_ARCH=openrisc
    TARGET_BASE_ARCH=openrisc
@@ -5873,6 +6052,15 @@ case "$target_name" in
      fi
    fi
 esac
 if test "$hax" = "yes" ; then
  if test "$target_softmmu" = "yes" ; then
    case "$target_name" in
    i386|x86_64)
      echo "CONFIG_HAX=y" >> $config_target_mak
    ;;
    esac
  fi
 fi
 if test "$target_bigendian" = "yes" ; then
  echo "TARGET_WORDS_BIGENDIAN=y" >> $config_target_mak
 fi
@@ -5930,6 +6118,9 @@ for i in $ARCH $TARGET_BASE_ARCH ; do
  cris)
    disas_config "CRIS"
  ;;
  hppa)
    disas_config "HPPA"
  ;;
  i386|x86_64|x32)
    disas_config "I386"
  ;;
@@ -5951,6 +6142,9 @@ for i in $ARCH $TARGET_BASE_ARCH ; do
  moxie*)
    disas_config "MOXIE"
  ;;
  nios2)
    disas_config "NIOS2"
  ;;
  or32)
    disas_config "OPENRISC"
  ;;
@@ -6021,7 +6215,7 @@ fi
 # build tree in object directory in case the source is not in the current directory
 DIRS="tests tests/tcg tests/tcg/cris tests/tcg/lm32 tests/libqos tests/qapi-schema tests/tcg/xtensa tests/qemu-iotests"
-DIRS="$DIRS fsdev"
+DIRS="$DIRS docs fsdev"
 DIRS="$DIRS pc-bios/optionrom pc-bios/spapr-rtas pc-bios/s390-ccw"
 DIRS="$DIRS roms/seabios roms/vgabios"
 DIRS="$DIRS qapi-generated"
@@ -6035,6 +6229,7 @@ FILES="$FILES roms/seabios/Makefile roms/vgabios/Makefile"
 FILES="$FILES pc-bios/qemu-icon.bmp"
 for bios_file in \
    $source_path/pc-bios/*.bin \
    $source_path/pc-bios/*.lid \
    $source_path/pc-bios/*.aml \
    $source_path/pc-bios/*.rom \
    $source_path/pc-bios/*.dtb \
--- a/contrib/libvhost-user/Makefile.objs
+++ b/contrib/libvhost-user/Makefile.objs
@@ -0,0 +1 @@
 libvhost-user-obj-y = libvhost-user.o
--- a/contrib/libvhost-user/libvhost-user.c
+++ b/contrib/libvhost-user/libvhost-user.c
--- a/contrib/libvhost-user/libvhost-user.h
+++ b/contrib/libvhost-user/libvhost-user.h
@@ -0,0 +1,435 @@
 /*
 * Vhost User library
 *
 * Copyright (c) 2016 Red Hat, Inc.
 *
 * Authors:
 *  Victor Kaplansky <victork@redhat.com>
 *  Marc-André Lureau <mlureau@redhat.com>
 *
 * This work is licensed under the terms of the GNU GPL, version 2 or
 * later.  See the COPYING file in the top-level directory.
 */
 #ifndef LIBVHOST_USER_H
 #define LIBVHOST_USER_H
 #include <stdint.h>
 #include <stdbool.h>
 #include <stddef.h>
 #include <linux/vhost.h>
 #include "standard-headers/linux/virtio_ring.h"
 /* Based on qemu/hw/virtio/vhost-user.c */
 #define VHOST_USER_F_PROTOCOL_FEATURES 30
 #define VHOST_LOG_PAGE 4096
 #define VHOST_MAX_NR_VIRTQUEUE 8
 #define VIRTQUEUE_MAX_SIZE 1024
 #define VHOST_MEMORY_MAX_NREGIONS 8
 enum VhostUserProtocolFeature {
    VHOST_USER_PROTOCOL_F_MQ = 0,
    VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1,
    VHOST_USER_PROTOCOL_F_RARP = 2,
    VHOST_USER_PROTOCOL_F_MAX
 };
 #define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1)
 typedef enum VhostUserRequest {
    VHOST_USER_NONE = 0,
    VHOST_USER_GET_FEATURES = 1,
    VHOST_USER_SET_FEATURES = 2,
    VHOST_USER_SET_OWNER = 3,
    VHOST_USER_RESET_OWNER = 4,
    VHOST_USER_SET_MEM_TABLE = 5,
    VHOST_USER_SET_LOG_BASE = 6,
    VHOST_USER_SET_LOG_FD = 7,
    VHOST_USER_SET_VRING_NUM = 8,
    VHOST_USER_SET_VRING_ADDR = 9,
    VHOST_USER_SET_VRING_BASE = 10,
    VHOST_USER_GET_VRING_BASE = 11,
    VHOST_USER_SET_VRING_KICK = 12,
    VHOST_USER_SET_VRING_CALL = 13,
    VHOST_USER_SET_VRING_ERR = 14,
    VHOST_USER_GET_PROTOCOL_FEATURES = 15,
    VHOST_USER_SET_PROTOCOL_FEATURES = 16,
    VHOST_USER_GET_QUEUE_NUM = 17,
    VHOST_USER_SET_VRING_ENABLE = 18,
    VHOST_USER_SEND_RARP = 19,
    VHOST_USER_INPUT_GET_CONFIG = 20,
    VHOST_USER_MAX
 } VhostUserRequest;
 typedef struct VhostUserMemoryRegion {
    uint64_t guest_phys_addr;
    uint64_t memory_size;
    uint64_t userspace_addr;
    uint64_t mmap_offset;
 } VhostUserMemoryRegion;
 typedef struct VhostUserMemory {
    uint32_t nregions;
    uint32_t padding;
    VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
 } VhostUserMemory;
 typedef struct VhostUserLog {
    uint64_t mmap_size;
    uint64_t mmap_offset;
 } VhostUserLog;
 #if defined(_WIN32)
 # define VU_PACKED __attribute__((gcc_struct, packed))
 #else
 # define VU_PACKED __attribute__((packed))
 #endif
 typedef struct VhostUserMsg {
    VhostUserRequest request;
 #define VHOST_USER_VERSION_MASK     (0x3)
 #define VHOST_USER_REPLY_MASK       (0x1 << 2)
    uint32_t flags;
    uint32_t size; /* the following payload size */
    union {
 #define VHOST_USER_VRING_IDX_MASK   (0xff)
 #define VHOST_USER_VRING_NOFD_MASK  (0x1 << 8)
        uint64_t u64;
        struct vhost_vring_state state;
        struct vhost_vring_addr addr;
        VhostUserMemory memory;
        VhostUserLog log;
    } payload;
    int fds[VHOST_MEMORY_MAX_NREGIONS];
    int fd_num;
    uint8_t *data;
 } VU_PACKED VhostUserMsg;
 typedef struct VuDevRegion {
    /* Guest Physical address. */
    uint64_t gpa;
    /* Memory region size. */
    uint64_t size;
    /* QEMU virtual address (userspace). */
    uint64_t qva;
    /* Starting offset in our mmaped space. */
    uint64_t mmap_offset;
    /* Start address of mmaped space. */
    uint64_t mmap_addr;
 } VuDevRegion;
 typedef struct VuDev VuDev;
 typedef uint64_t (*vu_get_features_cb) (VuDev *dev);
 typedef void (*vu_set_features_cb) (VuDev *dev, uint64_t features);
 typedef int (*vu_process_msg_cb) (VuDev *dev, VhostUserMsg *vmsg,
                                  int *do_reply);
 typedef void (*vu_queue_set_started_cb) (VuDev *dev, int qidx, bool started);
 typedef struct VuDevIface {
    /* called by VHOST_USER_GET_FEATURES to get the features bitmask */
    vu_get_features_cb get_features;
    /* enable vhost implementation features */
    vu_set_features_cb set_features;
    /* get the protocol feature bitmask from the underlying vhost
     * implementation */
    vu_get_features_cb get_protocol_features;
    /* enable protocol features in the underlying vhost implementation. */
    vu_set_features_cb set_protocol_features;
    /* process_msg is called for each vhost-user message received */
    /* skip libvhost-user processing if return value != 0 */
    vu_process_msg_cb process_msg;
    /* tells when queues can be processed */
    vu_queue_set_started_cb queue_set_started;
 } VuDevIface;
 typedef void (*vu_queue_handler_cb) (VuDev *dev, int qidx);
 typedef struct VuRing {
    unsigned int num;
    struct vring_desc *desc;
    struct vring_avail *avail;
    struct vring_used *used;
    uint64_t log_guest_addr;
    uint32_t flags;
 } VuRing;
 typedef struct VuVirtq {
    VuRing vring;
    /* Next head to pop */
    uint16_t last_avail_idx;
    /* Last avail_idx read from VQ. */
    uint16_t shadow_avail_idx;
    uint16_t used_idx;
    /* Last used index value we have signalled on */
    uint16_t signalled_used;
    /* Last used index value we have signalled on */
    bool signalled_used_valid;
    /* Notification enabled? */
    bool notification;
    int inuse;
    vu_queue_handler_cb handler;
    int call_fd;
    int kick_fd;
    int err_fd;
    unsigned int enable;
    bool started;
 } VuVirtq;
 enum VuWatchCondtion {
    VU_WATCH_IN = 1 << 0,
    VU_WATCH_OUT = 1 << 1,
    VU_WATCH_PRI = 1 << 2,
    VU_WATCH_ERR = 1 << 3,
    VU_WATCH_HUP = 1 << 4,
 };
 typedef void (*vu_panic_cb) (VuDev *dev, const char *err);
 typedef void (*vu_watch_cb) (VuDev *dev, int condition, void *data);
 typedef void (*vu_set_watch_cb) (VuDev *dev, int fd, int condition,
                                 vu_watch_cb cb, void *data);
 typedef void (*vu_remove_watch_cb) (VuDev *dev, int fd);
 struct VuDev {
    int sock;
    uint32_t nregions;
    VuDevRegion regions[VHOST_MEMORY_MAX_NREGIONS];
    VuVirtq vq[VHOST_MAX_NR_VIRTQUEUE];
    int log_call_fd;
    uint64_t log_size;
    uint8_t *log_table;
    uint64_t features;
    uint64_t protocol_features;
    bool broken;
    /* @set_watch: add or update the given fd to the watch set,
     * call cb when condition is met */
    vu_set_watch_cb set_watch;
    /* @remove_watch: remove the given fd from the watch set */
    vu_remove_watch_cb remove_watch;
    /* @panic: encountered an unrecoverable error, you may try to
     * re-initialize */
    vu_panic_cb panic;
    const VuDevIface *iface;
 };
 typedef struct VuVirtqElement {
    unsigned int index;
    unsigned int out_num;
    unsigned int in_num;
    struct iovec *in_sg;
    struct iovec *out_sg;
 } VuVirtqElement;
 /**
 * vu_init:
 * @dev: a VuDev context
 * @socket: the socket connected to vhost-user master
 * @panic: a panic callback
 * @set_watch: a set_watch callback
 * @remove_watch: a remove_watch callback
 * @iface: a VuDevIface structure with vhost-user device callbacks
 *
 * Intializes a VuDev vhost-user context.
 **/
 void vu_init(VuDev *dev,
             int socket,
             vu_panic_cb panic,
             vu_set_watch_cb set_watch,
             vu_remove_watch_cb remove_watch,
             const VuDevIface *iface);
 /**
 * vu_deinit:
 * @dev: a VuDev context
 *
 * Cleans up the VuDev context
 */
 void vu_deinit(VuDev *dev);
 /**
 * vu_dispatch:
 * @dev: a VuDev context
 *
 * Process one vhost-user message.
 *
 * Returns: TRUE on success, FALSE on failure.
 */
 bool vu_dispatch(VuDev *dev);
 /**
 * vu_gpa_to_va:
 * @dev: a VuDev context
 * @guest_addr: guest address
 *
 * Translate a guest address to a pointer. Returns NULL on failure.
 */
 void *vu_gpa_to_va(VuDev *dev, uint64_t guest_addr);
 /**
 * vu_get_queue:
 * @dev: a VuDev context
 * @qidx: queue index
 *
 * Returns the queue number @qidx.
 */
 VuVirtq *vu_get_queue(VuDev *dev, int qidx);
 /**
 * vu_set_queue_handler:
 * @dev: a VuDev context
 * @vq: a VuVirtq queue
 * @handler: the queue handler callback
 *
 * Set the queue handler. This function may be called several times
 * for the same queue. If called with NULL @handler, the handler is
 * removed.
 */
 void vu_set_queue_handler(VuDev *dev, VuVirtq *vq,
                          vu_queue_handler_cb handler);
 /**
 * vu_queue_set_notification:
 * @dev: a VuDev context
 * @vq: a VuVirtq queue
 * @enable: state
 *
 * Set whether the queue notifies (via event index or interrupt)
 */
 void vu_queue_set_notification(VuDev *dev, VuVirtq *vq, int enable);
 /**
 * vu_queue_enabled:
 * @dev: a VuDev context
 * @vq: a VuVirtq queue
 *
 * Returns: whether the queue is enabled.
 */
 bool vu_queue_enabled(VuDev *dev, VuVirtq *vq);
 /**
 * vu_queue_enabled:
 * @dev: a VuDev context
 * @vq: a VuVirtq queue
 *
 * Returns: whether the queue is empty.
 */
 int vu_queue_empty(VuDev *dev, VuVirtq *vq);
 /**
 * vu_queue_notify:
 * @dev: a VuDev context
 * @vq: a VuVirtq queue
 *
 * Request to notify the queue via callfd (skipped if unnecessary)
 */
 void vu_queue_notify(VuDev *dev, VuVirtq *vq);
 /**
 * vu_queue_pop:
 * @dev: a VuDev context
 * @vq: a VuVirtq queue
 * @sz: the size of struct to return (must be >= VuVirtqElement)
 *
 * Returns: a VuVirtqElement filled from the queue or NULL.
 */
 void *vu_queue_pop(VuDev *dev, VuVirtq *vq, size_t sz);
 /**
 * vu_queue_rewind:
 * @dev: a VuDev context
 * @vq: a VuVirtq queue
 * @num: number of elements to push back
 *
 * Pretend that elements weren't popped from the virtqueue.  The next
 * virtqueue_pop() will refetch the oldest element.
 *
 * Returns: true on success, false if @num is greater than the number of in use
 * elements.
 */
 bool vu_queue_rewind(VuDev *dev, VuVirtq *vq, unsigned int num);
 /**
 * vu_queue_fill:
 * @dev: a VuDev context
 * @vq: a VuVirtq queue
 * @elem: a VuVirtqElement
 * @len: length in bytes to write
 * @idx: optional offset for the used ring index (0 in general)
 *
 * Fill the used ring with @elem element.
 */
 void vu_queue_fill(VuDev *dev, VuVirtq *vq,
                   const VuVirtqElement *elem,
                   unsigned int len, unsigned int idx);
 /**
 * vu_queue_push:
 * @dev: a VuDev context
 * @vq: a VuVirtq queue
 * @elem: a VuVirtqElement
 * @len: length in bytes to write
 *
 * Helper that combines vu_queue_fill() with a vu_queue_flush().
 */
 void vu_queue_push(VuDev *dev, VuVirtq *vq,
                   const VuVirtqElement *elem, unsigned int len);
 /**
 * vu_queue_flush:
 * @dev: a VuDev context
 * @vq: a VuVirtq queue
 * @num: number of elements to flush
 *
 * Mark the last number of elements as done (used.idx is updated by
 * num elements).
 */
 void vu_queue_flush(VuDev *dev, VuVirtq *vq, unsigned int num);
 /**
 * vu_queue_get_avail_bytes:
 * @dev: a VuDev context
 * @vq: a VuVirtq queue
 * @in_bytes: in bytes
 * @out_bytes: out bytes
 * @max_in_bytes: stop counting after max_in_bytes
 * @max_out_bytes: stop counting after max_out_bytes
 *
 * Count the number of available bytes, up to max_in_bytes/max_out_bytes.
 */
 void vu_queue_get_avail_bytes(VuDev *vdev, VuVirtq *vq, unsigned int *in_bytes,
                              unsigned int *out_bytes,
                              unsigned max_in_bytes, unsigned max_out_bytes);
 /**
 * vu_queue_avail_bytes:
 * @dev: a VuDev context
 * @vq: a VuVirtq queue
 * @in_bytes: expected in bytes
 * @out_bytes: expected out bytes
 *
 * Returns: true if in_bytes <= in_total && out_bytes <= out_total
 */
 bool vu_queue_avail_bytes(VuDev *dev, VuVirtq *vq, unsigned int in_bytes,
                          unsigned int out_bytes);
 #endif /* LIBVHOST_USER_H */
--- a/cpu-exec-common.c
+++ b/cpu-exec-common.c
@@ -77,3 +77,9 @@ void cpu_loop_exit_restore(CPUState *cpu, uintptr_t pc)
    }
    siglongjmp(cpu->jmp_env, 1);
 }
 void cpu_loop_exit_atomic(CPUState *cpu, uintptr_t pc)
 {
    cpu->exception_index = EXCP_ATOMIC;
    cpu_loop_exit_restore(cpu, pc);
 }
--- a/cpu-exec.c
+++ b/cpu-exec.c
@@ -143,23 +143,20 @@ static inline tcg_target_ulong cpu_tb_exec(CPUState *cpu, TranslationBlock *itb)
    uint8_t *tb_ptr = itb->tc_ptr;
    qemu_log_mask_and_addr(CPU_LOG_EXEC, itb->pc,
-                           "Trace %p [" TARGET_FMT_lx "] %s\n",
+                           "Trace %p [%d: " TARGET_FMT_lx "] %s\n",
-                           itb->tc_ptr, itb->pc, lookup_symbol(itb->pc));
+                           itb->tc_ptr, cpu->cpu_index, itb->pc,
                           lookup_symbol(itb->pc));
 #if defined(DEBUG_DISAS)
    if (qemu_loglevel_mask(CPU_LOG_TB_CPU)
        && qemu_log_in_addr_range(itb->pc)) {
        qemu_log_lock();
 #if defined(TARGET_I386)
        log_cpu_state(cpu, CPU_DUMP_CCOP);
 #elif defined(TARGET_M68K)
        /* ??? Should not modify env state for dumping.  */
        cpu_m68k_flush_flags(env, env->cc_op);
        env->cc_op = CC_OP_FLAGS;
        env->sr = (env->sr & 0xffe0) | env->cc_dest | (env->cc_x << 4);
        log_cpu_state(cpu, 0);
 #else
        log_cpu_state(cpu, 0);
 #endif
        qemu_log_unlock();
    }
 #endif /* DEBUG_DISAS */
@@ -210,17 +207,53 @@ static void cpu_exec_nocache(CPUState *cpu, int max_cycles,
    if (max_cycles > CF_COUNT_MASK)
        max_cycles = CF_COUNT_MASK;
    tb_lock();
    tb = tb_gen_code(cpu, orig_tb->pc, orig_tb->cs_base, orig_tb->flags,
                     max_cycles | CF_NOCACHE
                         | (ignore_icount ? CF_IGNORE_ICOUNT : 0));
    tb->orig_tb = orig_tb;
    tb_unlock();
    /* execute the generated code */
    trace_exec_tb_nocache(tb, tb->pc);
    cpu_tb_exec(cpu, tb);
    tb_lock();
    tb_phys_invalidate(tb, -1);
    tb_free(tb);
    tb_unlock();
 }
 #endif
 static void cpu_exec_step(CPUState *cpu)
 {
    CPUArchState *env = (CPUArchState *)cpu->env_ptr;
    TranslationBlock *tb;
    target_ulong cs_base, pc;
    uint32_t flags;
    cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
    tb = tb_gen_code(cpu, pc, cs_base, flags,
                     1 | CF_NOCACHE | CF_IGNORE_ICOUNT);
    tb->orig_tb = NULL;
    /* execute the generated code */
    trace_exec_tb_nocache(tb, pc);
    cpu_tb_exec(cpu, tb);
    tb_phys_invalidate(tb, -1);
    tb_free(tb);
 }
-#endif
+
 void cpu_exec_step_atomic(CPUState *cpu)
 {
    start_exclusive();
    /* Since we got here, we know that parallel_cpus must be true.  */
    parallel_cpus = false;
    cpu_exec_step(cpu);
    parallel_cpus = true;
    end_exclusive();
 }
 struct tb_desc {
    target_ulong pc;
@@ -475,8 +508,8 @@ static inline void cpu_handle_interrupt(CPUState *cpu,
           True when it is, and we should restart on a new TB,
           and via longjmp via cpu_loop_exit.  */
        else {
            replay_interrupt();
            if (cc->cpu_exec_interrupt(cpu, interrupt_request)) {
                replay_interrupt();
                *last_tb = NULL;
            }
            /* The target hook may have updated the 'cpu->interrupt_request';
--- a/cpus-common.c
+++ b/cpus-common.c
@@ -109,7 +109,7 @@ void cpu_list_remove(CPUState *cpu)
 struct qemu_work_item {
    struct qemu_work_item *next;
    run_on_cpu_func func;
-    void *data;
+    run_on_cpu_data data;
    bool free, exclusive, done;
 };
@@ -129,7 +129,7 @@ static void queue_work_on_cpu(CPUState *cpu, struct qemu_work_item *wi)
    qemu_cpu_kick(cpu);
 }
-void do_run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data,
+void do_run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data,
                   QemuMutex *mutex)
 {
    struct qemu_work_item wi;
@@ -154,7 +154,7 @@ void do_run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data,
    }
 }
-void async_run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data)
+void async_run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data)
 {
    struct qemu_work_item *wi;
@@ -296,7 +296,8 @@ void cpu_exec_end(CPUState *cpu)
    }
 }
-void async_safe_run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data)
+void async_safe_run_on_cpu(CPUState *cpu, run_on_cpu_func func,
                           run_on_cpu_data data)
 {
    struct qemu_work_item *wi;
--- a/cpus.c
+++ b/cpus.c
@@ -33,7 +33,9 @@
 #include "sysemu/block-backend.h"
 #include "exec/gdbstub.h"
 #include "sysemu/dma.h"
 #include "sysemu/hw_accel.h"
 #include "sysemu/kvm.h"
 #include "sysemu/hax.h"
 #include "qmp-commands.h"
 #include "exec/exec-all.h"
@@ -69,7 +71,6 @@
 #endif /* CONFIG_LINUX */
 static CPUState *next_cpu;
 int64_t max_delay;
 int64_t max_advance;
@@ -557,7 +558,7 @@ static const VMStateDescription vmstate_timers = {
    }
 };
-static void cpu_throttle_thread(CPUState *cpu, void *opaque)
+static void cpu_throttle_thread(CPUState *cpu, run_on_cpu_data opaque)
 {
    double pct;
    double throttle_ratio;
@@ -588,7 +589,8 @@ static void cpu_throttle_timer_tick(void *opaque)
    }
    CPU_FOREACH(cpu) {
        if (!atomic_xchg(&cpu->throttle_thread_scheduled, 1)) {
-            async_run_on_cpu(cpu, cpu_throttle_thread, NULL);
+            async_run_on_cpu(cpu, cpu_throttle_thread,
                             RUN_ON_CPU_NULL);
        }
    }
@@ -915,7 +917,7 @@ void qemu_init_cpu_loop(void)
    qemu_thread_get_self(&io_thread);
 }
-void run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data)
+void run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data)
 {
    do_run_on_cpu(cpu, func, data, &qemu_global_mutex);
 }
@@ -1055,12 +1057,102 @@ static void *qemu_dummy_cpu_thread_fn(void *arg)
 #endif
 }
-static void tcg_exec_all(void);
+static int64_t tcg_get_icount_limit(void)
 {
    int64_t deadline;
    if (replay_mode != REPLAY_MODE_PLAY) {
        deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
        /* Maintain prior (possibly buggy) behaviour where if no deadline
         * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
         * INT32_MAX nanoseconds ahead, we still use INT32_MAX
         * nanoseconds.
         */
        if ((deadline < 0) || (deadline > INT32_MAX)) {
            deadline = INT32_MAX;
        }
        return qemu_icount_round(deadline);
    } else {
        return replay_get_instructions();
    }
 }
 static void handle_icount_deadline(void)
 {
    if (use_icount) {
        int64_t deadline =
            qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
        if (deadline == 0) {
            qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
        }
    }
 }
 static int tcg_cpu_exec(CPUState *cpu)
 {
    int ret;
 #ifdef CONFIG_PROFILER
    int64_t ti;
 #endif
 #ifdef CONFIG_PROFILER
    ti = profile_getclock();
 #endif
    if (use_icount) {
        int64_t count;
        int decr;
        timers_state.qemu_icount -= (cpu->icount_decr.u16.low
                                    + cpu->icount_extra);
        cpu->icount_decr.u16.low = 0;
        cpu->icount_extra = 0;
        count = tcg_get_icount_limit();
        timers_state.qemu_icount += count;
        decr = (count > 0xffff) ? 0xffff : count;
        count -= decr;
        cpu->icount_decr.u16.low = decr;
        cpu->icount_extra = count;
    }
    cpu_exec_start(cpu);
    ret = cpu_exec(cpu);
    cpu_exec_end(cpu);
 #ifdef CONFIG_PROFILER
    tcg_time += profile_getclock() - ti;
 #endif
    if (use_icount) {
        /* Fold pending instructions back into the
           instruction counter, and clear the interrupt flag.  */
        timers_state.qemu_icount -= (cpu->icount_decr.u16.low
                        + cpu->icount_extra);
        cpu->icount_decr.u32 = 0;
        cpu->icount_extra = 0;
        replay_account_executed_instructions();
    }
    return ret;
 }
 /* Destroy any remaining vCPUs which have been unplugged and have
 * finished running
 */
 static void deal_with_unplugged_cpus(void)
 {
    CPUState *cpu;
    CPU_FOREACH(cpu) {
        if (cpu->unplug && !cpu_can_run(cpu)) {
            qemu_tcg_destroy_vcpu(cpu);
            cpu->created = false;
            qemu_cond_signal(&qemu_cpu_cond);
            break;
        }
    }
 }
 static void *qemu_tcg_cpu_thread_fn(void *arg)
 {
    CPUState *cpu = arg;
    CPUState *remove_cpu = NULL;
    rcu_register_thread();
@@ -1087,34 +1179,89 @@ static void *qemu_tcg_cpu_thread_fn(void *arg)
    /* process any pending work */
    atomic_mb_set(&exit_request, 1);
    cpu = first_cpu;
    while (1) {
-        tcg_exec_all();
+        /* Account partial waits to QEMU_CLOCK_VIRTUAL.  */
        qemu_account_warp_timer();
-        if (use_icount) {
+        if (!cpu) {
-            int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
+            cpu = first_cpu;
            if (deadline == 0) {
                qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
            }
        }
-        qemu_tcg_wait_io_event(QTAILQ_FIRST(&cpus));
+
-        CPU_FOREACH(cpu) {
+        for (; cpu != NULL && !exit_request; cpu = CPU_NEXT(cpu)) {
-            if (cpu->unplug && !cpu_can_run(cpu)) {
+
-                remove_cpu = cpu;
+            qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
                              (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
            if (cpu_can_run(cpu)) {
                int r;
                r = tcg_cpu_exec(cpu);
                if (r == EXCP_DEBUG) {
                    cpu_handle_guest_debug(cpu);
                    break;
                }
            } else if (cpu->stop || cpu->stopped) {
                if (cpu->unplug) {
                    cpu = CPU_NEXT(cpu);
                }
                break;
            }
-        }
+
-        if (remove_cpu) {
+        } /* for cpu.. */
-            qemu_tcg_destroy_vcpu(remove_cpu);
+
-            cpu->created = false;
+        /* Pairs with smp_wmb in qemu_cpu_kick.  */
-            qemu_cond_signal(&qemu_cpu_cond);
+        atomic_mb_set(&exit_request, 0);
-            remove_cpu = NULL;
+
-        }
+        handle_icount_deadline();
        qemu_tcg_wait_io_event(QTAILQ_FIRST(&cpus));
        deal_with_unplugged_cpus();
    }
    return NULL;
 }
 static void *qemu_hax_cpu_thread_fn(void *arg)
 {
    CPUState *cpu = arg;
    int r;
    qemu_thread_get_self(cpu->thread);
    qemu_mutex_lock(&qemu_global_mutex);
    cpu->thread_id = qemu_get_thread_id();
    cpu->created = true;
    cpu->halted = 0;
    current_cpu = cpu;
    hax_init_vcpu(cpu);
    qemu_cond_signal(&qemu_cpu_cond);
    while (1) {
        if (cpu_can_run(cpu)) {
            r = hax_smp_cpu_exec(cpu);
            if (r == EXCP_DEBUG) {
                cpu_handle_guest_debug(cpu);
            }
        }
        while (cpu_thread_is_idle(cpu)) {
            qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
        }
 #ifdef _WIN32
        SleepEx(0, TRUE);
 #endif
        qemu_wait_io_event_common(cpu);
    }
    return NULL;
 }
 #ifdef _WIN32
 static void CALLBACK dummy_apc_func(ULONG_PTR unused)
 {
 }
 #endif
 static void qemu_cpu_kick_thread(CPUState *cpu)
 {
 #ifndef _WIN32
@@ -1130,7 +1277,13 @@ static void qemu_cpu_kick_thread(CPUState *cpu)
        exit(1);
    }
 #else /* _WIN32 */
-    abort();
+    if (!qemu_cpu_is_self(cpu)) {
        if (!QueueUserAPC(dummy_apc_func, cpu->hThread, 0)) {
            fprintf(stderr, "%s: QueueUserAPC failed with error %lu\n",
                    __func__, GetLastError());
            exit(1);
        }
    }
 #endif
 }
@@ -1153,6 +1306,13 @@ void qemu_cpu_kick(CPUState *cpu)
    if (tcg_enabled()) {
        qemu_cpu_kick_no_halt();
    } else {
        if (hax_enabled()) {
            /*
             * FIXME: race condition with the exit_request check in
             * hax_vcpu_hax_exec
             */
            cpu->exit_request = 1;
        }
        qemu_cpu_kick_thread(cpu);
    }
 }
@@ -1207,17 +1367,17 @@ void qemu_mutex_unlock_iothread(void)
    qemu_mutex_unlock(&qemu_global_mutex);
 }
-static int all_vcpus_paused(void)
+static bool all_vcpus_paused(void)
 {
    CPUState *cpu;
    CPU_FOREACH(cpu) {
        if (!cpu->stopped) {
-            return 0;
+            return false;
        }
    }
-    return 1;
+    return true;
 }
 void pause_all_vcpus(void)
@@ -1313,6 +1473,26 @@ static void qemu_tcg_init_vcpu(CPUState *cpu)
    }
 }
 static void qemu_hax_start_vcpu(CPUState *cpu)
 {
    char thread_name[VCPU_THREAD_NAME_SIZE];
    cpu->thread = g_malloc0(sizeof(QemuThread));
    cpu->halt_cond = g_malloc0(sizeof(QemuCond));
    qemu_cond_init(cpu->halt_cond);
    snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HAX",
             cpu->cpu_index);
    qemu_thread_create(cpu->thread, thread_name, qemu_hax_cpu_thread_fn,
                       cpu, QEMU_THREAD_JOINABLE);
 #ifdef _WIN32
    cpu->hThread = qemu_thread_get_handle(cpu->thread);
 #endif
    while (!cpu->created) {
        qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
    }
 }
 static void qemu_kvm_start_vcpu(CPUState *cpu)
 {
    char thread_name[VCPU_THREAD_NAME_SIZE];
@@ -1363,6 +1543,8 @@ void qemu_init_vcpu(CPUState *cpu)
    if (kvm_enabled()) {
        qemu_kvm_start_vcpu(cpu);
    } else if (hax_enabled()) {
        qemu_hax_start_vcpu(cpu);
    } else if (tcg_enabled()) {
        qemu_tcg_init_vcpu(cpu);
    } else {
@@ -1412,104 +1594,6 @@ int vm_stop_force_state(RunState state)
    }
 }
 static int64_t tcg_get_icount_limit(void)
 {
    int64_t deadline;
    if (replay_mode != REPLAY_MODE_PLAY) {
        deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
        /* Maintain prior (possibly buggy) behaviour where if no deadline
         * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
         * INT32_MAX nanoseconds ahead, we still use INT32_MAX
         * nanoseconds.
         */
        if ((deadline < 0) || (deadline > INT32_MAX)) {
            deadline = INT32_MAX;
        }
        return qemu_icount_round(deadline);
    } else {
        return replay_get_instructions();
    }
 }
 static int tcg_cpu_exec(CPUState *cpu)
 {
    int ret;
 #ifdef CONFIG_PROFILER
    int64_t ti;
 #endif
 #ifdef CONFIG_PROFILER
    ti = profile_getclock();
 #endif
    if (use_icount) {
        int64_t count;
        int decr;
        timers_state.qemu_icount -= (cpu->icount_decr.u16.low
                                    + cpu->icount_extra);
        cpu->icount_decr.u16.low = 0;
        cpu->icount_extra = 0;
        count = tcg_get_icount_limit();
        timers_state.qemu_icount += count;
        decr = (count > 0xffff) ? 0xffff : count;
        count -= decr;
        cpu->icount_decr.u16.low = decr;
        cpu->icount_extra = count;
    }
    cpu_exec_start(cpu);
    ret = cpu_exec(cpu);
    cpu_exec_end(cpu);
 #ifdef CONFIG_PROFILER
    tcg_time += profile_getclock() - ti;
 #endif
    if (use_icount) {
        /* Fold pending instructions back into the
           instruction counter, and clear the interrupt flag.  */
        timers_state.qemu_icount -= (cpu->icount_decr.u16.low
                        + cpu->icount_extra);
        cpu->icount_decr.u32 = 0;
        cpu->icount_extra = 0;
        replay_account_executed_instructions();
    }
    return ret;
 }
 static void tcg_exec_all(void)
 {
    int r;
    /* Account partial waits to QEMU_CLOCK_VIRTUAL.  */
    qemu_account_warp_timer();
    if (next_cpu == NULL) {
        next_cpu = first_cpu;
    }
    for (; next_cpu != NULL && !exit_request; next_cpu = CPU_NEXT(next_cpu)) {
        CPUState *cpu = next_cpu;
        qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
                          (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
        if (cpu_can_run(cpu)) {
            r = tcg_cpu_exec(cpu);
            if (r == EXCP_DEBUG) {
                cpu_handle_guest_debug(cpu);
                break;
            }
        } else if (cpu->stop || cpu->stopped) {
            if (cpu->unplug) {
                next_cpu = CPU_NEXT(cpu);
            }
            break;
        }
    }
    /* Pairs with smp_wmb in qemu_cpu_kick.  */
    atomic_mb_set(&exit_request, 0);
 }
 void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
 {
    /* XXX: implement xxx_cpu_list for targets that still miss it */
--- a/cputlb.c
+++ b/cputlb.c
@@ -23,15 +23,14 @@
 #include "exec/memory.h"
 #include "exec/address-spaces.h"
 #include "exec/cpu_ldst.h"
 #include "exec/cputlb.h"
 #include "exec/memory-internal.h"
 #include "exec/ram_addr.h"
 #include "exec/exec-all.h"
 #include "tcg/tcg.h"
 #include "qemu/error-report.h"
 #include "exec/log.h"
 #include "exec/helper-proto.h"
 #include "qemu/atomic.h"
 /* DEBUG defines, enable DEBUG_TLB_LOG to log to the CPU_LOG_MMU target */
 /* #define DEBUG_TLB */
@@ -61,24 +60,15 @@
 /* statistics */
 int tlb_flush_count;
-/* NOTE:
+/* This is OK because CPU architectures generally permit an
- * If flush_global is true (the usual case), flush all tlb entries.
+ * implementation to drop entries from the TLB at any time, so
- * If flush_global is false, flush (at least) all tlb entries not
+ * flushing more entries than required is only an efficiency issue,
- * marked global.
+ * not a correctness issue.
 *
 * Since QEMU doesn't currently implement a global/not-global flag
 * for tlb entries, at the moment tlb_flush() will also flush all
 * tlb entries in the flush_global == false case. This is OK because
 * CPU architectures generally permit an implementation to drop
 * entries from the TLB at any time, so flushing more entries than
 * required is only an efficiency issue, not a correctness issue.
 */
-void tlb_flush(CPUState *cpu, int flush_global)
+void tlb_flush(CPUState *cpu)
 {
    CPUArchState *env = cpu->env_ptr;
    tlb_debug("(%d)\n", flush_global);
    memset(env->tlb_table, -1, sizeof(env->tlb_table));
    memset(env->tlb_v_table, -1, sizeof(env->tlb_v_table));
    memset(cpu->tb_jmp_cache, 0, sizeof(cpu->tb_jmp_cache));
@@ -145,7 +135,7 @@ void tlb_flush_page(CPUState *cpu, target_ulong addr)
                  TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
                  env->tlb_flush_addr, env->tlb_flush_mask);
-        tlb_flush(cpu, 1);
+        tlb_flush(cpu);
        return;
    }
@@ -498,6 +488,43 @@ tb_page_addr_t get_page_addr_code(CPUArchState *env1, target_ulong addr)
    return qemu_ram_addr_from_host_nofail(p);
 }
 static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
                         target_ulong addr, uintptr_t retaddr, int size)
 {
    CPUState *cpu = ENV_GET_CPU(env);
    hwaddr physaddr = iotlbentry->addr;
    MemoryRegion *mr = iotlb_to_region(cpu, physaddr, iotlbentry->attrs);
    uint64_t val;
    physaddr = (physaddr & TARGET_PAGE_MASK) + addr;
    cpu->mem_io_pc = retaddr;
    if (mr != &io_mem_rom && mr != &io_mem_notdirty && !cpu->can_do_io) {
        cpu_io_recompile(cpu, retaddr);
    }
    cpu->mem_io_vaddr = addr;
    memory_region_dispatch_read(mr, physaddr, &val, size, iotlbentry->attrs);
    return val;
 }
 static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
                      uint64_t val, target_ulong addr,
                      uintptr_t retaddr, int size)
 {
    CPUState *cpu = ENV_GET_CPU(env);
    hwaddr physaddr = iotlbentry->addr;
    MemoryRegion *mr = iotlb_to_region(cpu, physaddr, iotlbentry->attrs);
    physaddr = (physaddr & TARGET_PAGE_MASK) + addr;
    if (mr != &io_mem_rom && mr != &io_mem_notdirty && !cpu->can_do_io) {
        cpu_io_recompile(cpu, retaddr);
    }
    cpu->mem_io_vaddr = addr;
    cpu->mem_io_pc = retaddr;
    memory_region_dispatch_write(mr, physaddr, val, size, iotlbentry->attrs);
 }
 /* Return true if ADDR is present in the victim tlb, and has been copied
   back to the main tlb.  */
 static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index,
@@ -527,34 +554,178 @@ static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index,
  victim_tlb_hit(env, mmu_idx, index, offsetof(CPUTLBEntry, TY), \
                 (ADDR) & TARGET_PAGE_MASK)
 /* Probe for whether the specified guest write access is permitted.
 * If it is not permitted then an exception will be taken in the same
 * way as if this were a real write access (and we will not return).
 * Otherwise the function will return, and there will be a valid
 * entry in the TLB for this access.
 */
 void probe_write(CPUArchState *env, target_ulong addr, int mmu_idx,
                 uintptr_t retaddr)
 {
    int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
    target_ulong tlb_addr = env->tlb_table[mmu_idx][index].addr_write;
    if ((addr & TARGET_PAGE_MASK)
        != (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
        /* TLB entry is for a different page */
        if (!VICTIM_TLB_HIT(addr_write, addr)) {
            tlb_fill(ENV_GET_CPU(env), addr, MMU_DATA_STORE, mmu_idx, retaddr);
        }
    }
 }
 /* Probe for a read-modify-write atomic operation.  Do not allow unaligned
 * operations, or io operations to proceed.  Return the host address.  */
 static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
                               TCGMemOpIdx oi, uintptr_t retaddr)
 {
    size_t mmu_idx = get_mmuidx(oi);
    size_t index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
    CPUTLBEntry *tlbe = &env->tlb_table[mmu_idx][index];
    target_ulong tlb_addr = tlbe->addr_write;
    TCGMemOp mop = get_memop(oi);
    int a_bits = get_alignment_bits(mop);
    int s_bits = mop & MO_SIZE;
    /* Adjust the given return address.  */
    retaddr -= GETPC_ADJ;
    /* Enforce guest required alignment.  */
    if (unlikely(a_bits > 0 && (addr & ((1 << a_bits) - 1)))) {
        /* ??? Maybe indicate atomic op to cpu_unaligned_access */
        cpu_unaligned_access(ENV_GET_CPU(env), addr, MMU_DATA_STORE,
                             mmu_idx, retaddr);
    }
    /* Enforce qemu required alignment.  */
    if (unlikely(addr & ((1 << s_bits) - 1))) {
        /* We get here if guest alignment was not requested,
           or was not enforced by cpu_unaligned_access above.
           We might widen the access and emulate, but for now
           mark an exception and exit the cpu loop.  */
        goto stop_the_world;
    }
    /* Check TLB entry and enforce page permissions.  */
    if ((addr & TARGET_PAGE_MASK)
        != (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
        if (!VICTIM_TLB_HIT(addr_write, addr)) {
            tlb_fill(ENV_GET_CPU(env), addr, MMU_DATA_STORE, mmu_idx, retaddr);
        }
        tlb_addr = tlbe->addr_write;
    }
    /* Notice an IO access, or a notdirty page.  */
    if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
        /* There's really nothing that can be done to
           support this apart from stop-the-world.  */
        goto stop_the_world;
    }
    /* Let the guest notice RMW on a write-only page.  */
    if (unlikely(tlbe->addr_read != tlb_addr)) {
        tlb_fill(ENV_GET_CPU(env), addr, MMU_DATA_LOAD, mmu_idx, retaddr);
        /* Since we don't support reads and writes to different addresses,
           and we do have the proper page loaded for write, this shouldn't
           ever return.  But just in case, handle via stop-the-world.  */
        goto stop_the_world;
    }
    return (void *)((uintptr_t)addr + tlbe->addend);
 stop_the_world:
    cpu_loop_exit_atomic(ENV_GET_CPU(env), retaddr);
 }
 #ifdef TARGET_WORDS_BIGENDIAN
 # define TGT_BE(X)  (X)
 # define TGT_LE(X)  BSWAP(X)
 #else
 # define TGT_BE(X)  BSWAP(X)
 # define TGT_LE(X)  (X)
 #endif
 #define MMUSUFFIX _mmu
-#define SHIFT 0
+#define DATA_SIZE 1
 #include "softmmu_template.h"
-#define SHIFT 1
+#define DATA_SIZE 2
 #include "softmmu_template.h"
-#define SHIFT 2
+#define DATA_SIZE 4
 #include "softmmu_template.h"
-#define SHIFT 3
+#define DATA_SIZE 8
 #include "softmmu_template.h"
 /* First set of helpers allows passing in of OI and RETADDR.  This makes
   them callable from other helpers.  */
 #define EXTRA_ARGS     , TCGMemOpIdx oi, uintptr_t retaddr
 #define ATOMIC_NAME(X) \
    HELPER(glue(glue(glue(atomic_ ## X, SUFFIX), END), _mmu))
 #define ATOMIC_MMU_LOOKUP  atomic_mmu_lookup(env, addr, oi, retaddr)
 #define DATA_SIZE 1
 #include "atomic_template.h"
 #define DATA_SIZE 2
 #include "atomic_template.h"
 #define DATA_SIZE 4
 #include "atomic_template.h"
 #ifdef CONFIG_ATOMIC64
 #define DATA_SIZE 8
 #include "atomic_template.h"
 #endif
 #ifdef CONFIG_ATOMIC128
 #define DATA_SIZE 16
 #include "atomic_template.h"
 #endif
 /* Second set of helpers are directly callable from TCG as helpers.  */
 #undef EXTRA_ARGS
 #undef ATOMIC_NAME
 #undef ATOMIC_MMU_LOOKUP
 #define EXTRA_ARGS         , TCGMemOpIdx oi
 #define ATOMIC_NAME(X)     HELPER(glue(glue(atomic_ ## X, SUFFIX), END))
 #define ATOMIC_MMU_LOOKUP  atomic_mmu_lookup(env, addr, oi, GETPC())
 #define DATA_SIZE 1
 #include "atomic_template.h"
 #define DATA_SIZE 2
 #include "atomic_template.h"
 #define DATA_SIZE 4
 #include "atomic_template.h"
 #ifdef CONFIG_ATOMIC64
 #define DATA_SIZE 8
 #include "atomic_template.h"
 #endif
 /* Code access functions.  */
 #undef MMUSUFFIX
 #define MMUSUFFIX _cmmu
 #undef GETPC
 #define GETPC() ((uintptr_t)0)
 #define SOFTMMU_CODE_ACCESS
-#define SHIFT 0
+#define DATA_SIZE 1
 #include "softmmu_template.h"
-#define SHIFT 1
+#define DATA_SIZE 2
 #include "softmmu_template.h"
-#define SHIFT 2
+#define DATA_SIZE 4
 #include "softmmu_template.h"
-#define SHIFT 3
+#define DATA_SIZE 8
 #include "softmmu_template.h"
--- a/crypto/Makefile.objs
+++ b/crypto/Makefile.objs
@@ -3,6 +3,10 @@ crypto-obj-y += hash.o
 crypto-obj-$(CONFIG_NETTLE) += hash-nettle.o
 crypto-obj-$(if $(CONFIG_NETTLE),n,$(CONFIG_GCRYPT)) += hash-gcrypt.o
 crypto-obj-$(if $(CONFIG_NETTLE),n,$(if $(CONFIG_GCRYPT),n,y)) += hash-glib.o
 crypto-obj-y += hmac.o
 crypto-obj-$(CONFIG_NETTLE) += hmac-nettle.o
 crypto-obj-$(CONFIG_GCRYPT_HMAC) += hmac-gcrypt.o
 crypto-obj-$(if $(CONFIG_NETTLE),n,$(if $(CONFIG_GCRYPT_HMAC),n,y)) += hmac-glib.o
 crypto-obj-y += aes.o
 crypto-obj-y += desrfb.o
 crypto-obj-y += cipher.o
--- a/crypto/cipher-builtin.c
+++ b/crypto/cipher-builtin.c
@@ -400,14 +400,26 @@ static int qcrypto_cipher_init_des_rfb(QCryptoCipher *cipher,
 }
-bool qcrypto_cipher_supports(QCryptoCipherAlgorithm alg)
+bool qcrypto_cipher_supports(QCryptoCipherAlgorithm alg,
                             QCryptoCipherMode mode)
 {
    switch (alg) {
    case QCRYPTO_CIPHER_ALG_DES_RFB:
    case QCRYPTO_CIPHER_ALG_AES_128:
    case QCRYPTO_CIPHER_ALG_AES_192:
    case QCRYPTO_CIPHER_ALG_AES_256:
        break;
    default:
        return false;
    }
    switch (mode) {
    case QCRYPTO_CIPHER_MODE_ECB:
    case QCRYPTO_CIPHER_MODE_CBC:
    case QCRYPTO_CIPHER_MODE_XTS:
        return true;
    case QCRYPTO_CIPHER_MODE_CTR:
        return false;
    default:
        return false;
    }
@@ -421,6 +433,17 @@ QCryptoCipher *qcrypto_cipher_new(QCryptoCipherAlgorithm alg,
 {
    QCryptoCipher *cipher;
    switch (mode) {
    case QCRYPTO_CIPHER_MODE_ECB:
    case QCRYPTO_CIPHER_MODE_CBC:
    case QCRYPTO_CIPHER_MODE_XTS:
        break;
    default:
        error_setg(errp, "Unsupported cipher mode %s",
                   QCryptoCipherMode_lookup[mode]);
        return NULL;
    }
    cipher = g_new0(QCryptoCipher, 1);
    cipher->alg = alg;
    cipher->mode = mode;
--- a/crypto/cipher-gcrypt.c
+++ b/crypto/cipher-gcrypt.c
@@ -24,10 +24,12 @@
 #include <gcrypt.h>
-bool qcrypto_cipher_supports(QCryptoCipherAlgorithm alg)
+bool qcrypto_cipher_supports(QCryptoCipherAlgorithm alg,
                             QCryptoCipherMode mode)
 {
    switch (alg) {
    case QCRYPTO_CIPHER_ALG_DES_RFB:
    case QCRYPTO_CIPHER_ALG_3DES:
    case QCRYPTO_CIPHER_ALG_AES_128:
    case QCRYPTO_CIPHER_ALG_AES_192:
    case QCRYPTO_CIPHER_ALG_AES_256:
@@ -37,6 +39,16 @@ bool qcrypto_cipher_supports(QCryptoCipherAlgorithm alg)
    case QCRYPTO_CIPHER_ALG_SERPENT_256:
    case QCRYPTO_CIPHER_ALG_TWOFISH_128:
    case QCRYPTO_CIPHER_ALG_TWOFISH_256:
        break;
    default:
        return false;
    }
    switch (mode) {
    case QCRYPTO_CIPHER_MODE_ECB:
    case QCRYPTO_CIPHER_MODE_CBC:
    case QCRYPTO_CIPHER_MODE_XTS:
    case QCRYPTO_CIPHER_MODE_CTR:
        return true;
    default:
        return false;
@@ -48,6 +60,7 @@ struct QCryptoCipherGcrypt {
    gcry_cipher_hd_t handle;
    gcry_cipher_hd_t tweakhandle;
    size_t blocksize;
    /* Initialization vector or Counter */
    uint8_t *iv;
 };
@@ -69,6 +82,9 @@ QCryptoCipher *qcrypto_cipher_new(QCryptoCipherAlgorithm alg,
    case QCRYPTO_CIPHER_MODE_CBC:
        gcrymode = GCRY_CIPHER_MODE_CBC;
        break;
    case QCRYPTO_CIPHER_MODE_CTR:
        gcrymode = GCRY_CIPHER_MODE_CTR;
        break;
    default:
        error_setg(errp, "Unsupported cipher mode %s",
                   QCryptoCipherMode_lookup[mode]);
@@ -84,6 +100,10 @@ QCryptoCipher *qcrypto_cipher_new(QCryptoCipherAlgorithm alg,
        gcryalg = GCRY_CIPHER_DES;
        break;
    case QCRYPTO_CIPHER_ALG_3DES:
        gcryalg = GCRY_CIPHER_3DES;
        break;
    case QCRYPTO_CIPHER_ALG_AES_128:
        gcryalg = GCRY_CIPHER_AES128;
        break;
@@ -185,6 +205,7 @@ QCryptoCipher *qcrypto_cipher_new(QCryptoCipherAlgorithm alg,
        case QCRYPTO_CIPHER_ALG_TWOFISH_256:
            ctx->blocksize = 16;
            break;
        case QCRYPTO_CIPHER_ALG_3DES:
        case QCRYPTO_CIPHER_ALG_CAST5_128:
            ctx->blocksize = 8;
            break;
@@ -339,12 +360,21 @@ int qcrypto_cipher_setiv(QCryptoCipher *cipher,
    if (ctx->iv) {
        memcpy(ctx->iv, iv, niv);
    } else {
-        gcry_cipher_reset(ctx->handle);
+        if (cipher->mode == QCRYPTO_CIPHER_MODE_CTR) {
-        err = gcry_cipher_setiv(ctx->handle, iv, niv);
+            err = gcry_cipher_setctr(ctx->handle, iv, niv);
-        if (err != 0) {
+            if (err != 0) {
-            error_setg(errp, "Cannot set IV: %s",
+                error_setg(errp, "Cannot set Counter: %s",
-                   gcry_strerror(err));
+                       gcry_strerror(err));
-            return -1;
+                return -1;
            }
        } else {
            gcry_cipher_reset(ctx->handle);
            err = gcry_cipher_setiv(ctx->handle, iv, niv);
            if (err != 0) {
                error_setg(errp, "Cannot set IV: %s",
                       gcry_strerror(err));
                return -1;
            }
        }
    }
--- a/crypto/cipher-nettle.c
+++ b/crypto/cipher-nettle.c
@@ -28,6 +28,7 @@
 #include <nettle/cast128.h>
 #include <nettle/serpent.h>
 #include <nettle/twofish.h>
 #include <nettle/ctr.h>
 typedef void (*QCryptoCipherNettleFuncWrapper)(const void *ctx,
                                               size_t length,
@@ -77,6 +78,18 @@ static void des_decrypt_native(cipher_ctx_t ctx, cipher_length_t length,
    des_decrypt(ctx, length, dst, src);
 }
 static void des3_encrypt_native(cipher_ctx_t ctx, cipher_length_t length,
                                uint8_t *dst, const uint8_t *src)
 {
    des3_encrypt(ctx, length, dst, src);
 }
 static void des3_decrypt_native(cipher_ctx_t ctx, cipher_length_t length,
                                uint8_t *dst, const uint8_t *src)
 {
    des3_decrypt(ctx, length, dst, src);
 }
 static void cast128_encrypt_native(cipher_ctx_t ctx, cipher_length_t length,
                                   uint8_t *dst, const uint8_t *src)
 {
@@ -139,6 +152,18 @@ static void des_decrypt_wrapper(const void *ctx, size_t length,
    des_decrypt(ctx, length, dst, src);
 }
 static void des3_encrypt_wrapper(const void *ctx, size_t length,
                                uint8_t *dst, const uint8_t *src)
 {
    des3_encrypt(ctx, length, dst, src);
 }
 static void des3_decrypt_wrapper(const void *ctx, size_t length,
                                uint8_t *dst, const uint8_t *src)
 {
    des3_decrypt(ctx, length, dst, src);
 }
 static void cast128_encrypt_wrapper(const void *ctx, size_t length,
                                    uint8_t *dst, const uint8_t *src)
 {
@@ -186,15 +211,17 @@ struct QCryptoCipherNettle {
    QCryptoCipherNettleFuncNative alg_decrypt_native;
    QCryptoCipherNettleFuncWrapper alg_encrypt_wrapper;
    QCryptoCipherNettleFuncWrapper alg_decrypt_wrapper;
-
+    /* Initialization vector or Counter */
    uint8_t *iv;
    size_t blocksize;
 };
-bool qcrypto_cipher_supports(QCryptoCipherAlgorithm alg)
+bool qcrypto_cipher_supports(QCryptoCipherAlgorithm alg,
                             QCryptoCipherMode mode)
 {
    switch (alg) {
    case QCRYPTO_CIPHER_ALG_DES_RFB:
    case QCRYPTO_CIPHER_ALG_3DES:
    case QCRYPTO_CIPHER_ALG_AES_128:
    case QCRYPTO_CIPHER_ALG_AES_192:
    case QCRYPTO_CIPHER_ALG_AES_256:
@@ -205,6 +232,16 @@ bool qcrypto_cipher_supports(QCryptoCipherAlgorithm alg)
    case QCRYPTO_CIPHER_ALG_TWOFISH_128:
    case QCRYPTO_CIPHER_ALG_TWOFISH_192:
    case QCRYPTO_CIPHER_ALG_TWOFISH_256:
        break;
    default:
        return false;
    }
    switch (mode) {
    case QCRYPTO_CIPHER_MODE_ECB:
    case QCRYPTO_CIPHER_MODE_CBC:
    case QCRYPTO_CIPHER_MODE_XTS:
    case QCRYPTO_CIPHER_MODE_CTR:
        return true;
    default:
        return false;
@@ -225,6 +262,7 @@ QCryptoCipher *qcrypto_cipher_new(QCryptoCipherAlgorithm alg,
    case QCRYPTO_CIPHER_MODE_ECB:
    case QCRYPTO_CIPHER_MODE_CBC:
    case QCRYPTO_CIPHER_MODE_XTS:
    case QCRYPTO_CIPHER_MODE_CTR:
        break;
    default:
        error_setg(errp, "Unsupported cipher mode %s",
@@ -241,6 +279,7 @@ QCryptoCipher *qcrypto_cipher_new(QCryptoCipherAlgorithm alg,
    cipher->mode = mode;
    ctx = g_new0(QCryptoCipherNettle, 1);
    cipher->opaque = ctx;
    switch (alg) {
    case QCRYPTO_CIPHER_ALG_DES_RFB:
@@ -257,6 +296,18 @@ QCryptoCipher *qcrypto_cipher_new(QCryptoCipherAlgorithm alg,
        ctx->blocksize = DES_BLOCK_SIZE;
        break;
    case QCRYPTO_CIPHER_ALG_3DES:
        ctx->ctx = g_new0(struct des3_ctx, 1);
        des3_set_key(ctx->ctx, key);
        ctx->alg_encrypt_native = des3_encrypt_native;
        ctx->alg_decrypt_native = des3_decrypt_native;
        ctx->alg_encrypt_wrapper = des3_encrypt_wrapper;
        ctx->alg_decrypt_wrapper = des3_decrypt_wrapper;
        ctx->blocksize = DES3_BLOCK_SIZE;
        break;
    case QCRYPTO_CIPHER_ALG_AES_128:
    case QCRYPTO_CIPHER_ALG_AES_192:
    case QCRYPTO_CIPHER_ALG_AES_256:
@@ -371,13 +422,11 @@ QCryptoCipher *qcrypto_cipher_new(QCryptoCipherAlgorithm alg,
    }
    ctx->iv = g_new0(uint8_t, ctx->blocksize);
    cipher->opaque = ctx;
    return cipher;
 error:
-    g_free(cipher);
+    qcrypto_cipher_free(cipher);
    g_free(ctx);
    return NULL;
 }
@@ -430,6 +479,12 @@ int qcrypto_cipher_encrypt(QCryptoCipher *cipher,
                    ctx->iv, len, out, in);
        break;
    case QCRYPTO_CIPHER_MODE_CTR:
        ctr_crypt(ctx->ctx, ctx->alg_encrypt_native,
                    ctx->blocksize, ctx->iv,
                    len, out, in);
        break;
    default:
        error_setg(errp, "Unsupported cipher mode %s",
                   QCryptoCipherMode_lookup[cipher->mode]);
@@ -469,6 +524,11 @@ int qcrypto_cipher_decrypt(QCryptoCipher *cipher,
                    ctx->alg_encrypt_wrapper, ctx->alg_decrypt_wrapper,
                    ctx->iv, len, out, in);
        break;
    case QCRYPTO_CIPHER_MODE_CTR:
        ctr_crypt(ctx->ctx, ctx->alg_encrypt_native,
                    ctx->blocksize, ctx->iv,
                    len, out, in);
        break;
    default:
        error_setg(errp, "Unsupported cipher mode %s",
--- a/crypto/cipher.c
+++ b/crypto/cipher.c
@@ -28,6 +28,7 @@ static size_t alg_key_len[QCRYPTO_CIPHER_ALG__MAX] = {
    [QCRYPTO_CIPHER_ALG_AES_192] = 24,
    [QCRYPTO_CIPHER_ALG_AES_256] = 32,
    [QCRYPTO_CIPHER_ALG_DES_RFB] = 8,
    [QCRYPTO_CIPHER_ALG_3DES] = 24,
    [QCRYPTO_CIPHER_ALG_CAST5_128] = 16,
    [QCRYPTO_CIPHER_ALG_SERPENT_128] = 16,
    [QCRYPTO_CIPHER_ALG_SERPENT_192] = 24,
@@ -42,6 +43,7 @@ static size_t alg_block_len[QCRYPTO_CIPHER_ALG__MAX] = {
    [QCRYPTO_CIPHER_ALG_AES_192] = 16,
    [QCRYPTO_CIPHER_ALG_AES_256] = 16,
    [QCRYPTO_CIPHER_ALG_DES_RFB] = 8,
    [QCRYPTO_CIPHER_ALG_3DES] = 8,
    [QCRYPTO_CIPHER_ALG_CAST5_128] = 8,
    [QCRYPTO_CIPHER_ALG_SERPENT_128] = 16,
    [QCRYPTO_CIPHER_ALG_SERPENT_192] = 16,
@@ -55,6 +57,7 @@ static bool mode_need_iv[QCRYPTO_CIPHER_MODE__MAX] = {
    [QCRYPTO_CIPHER_MODE_ECB] = false,
    [QCRYPTO_CIPHER_MODE_CBC] = true,
    [QCRYPTO_CIPHER_MODE_XTS] = true,
    [QCRYPTO_CIPHER_MODE_CTR] = true,
 };
@@ -106,8 +109,9 @@ qcrypto_cipher_validate_key_length(QCryptoCipherAlgorithm alg,
    }
    if (mode == QCRYPTO_CIPHER_MODE_XTS) {
-        if (alg == QCRYPTO_CIPHER_ALG_DES_RFB) {
+        if (alg == QCRYPTO_CIPHER_ALG_DES_RFB
-            error_setg(errp, "XTS mode not compatible with DES-RFB");
+                || alg == QCRYPTO_CIPHER_ALG_3DES) {
            error_setg(errp, "XTS mode not compatible with DES-RFB/3DES");
            return false;
        }
        if (nkey % 2) {
--- a/crypto/hmac-gcrypt.c
+++ b/crypto/hmac-gcrypt.c
@@ -0,0 +1,152 @@
 /*
 * QEMU Crypto hmac algorithms (based on libgcrypt)
 *
 * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
 *
 * Authors:
 *    Longpeng(Mike) <longpeng2@huawei.com>
 *
 * This work is licensed under the terms of the GNU GPL, version 2 or
 * (at your option) any later version.  See the COPYING file in the
 * top-level directory.
 *
 */
 #include "qemu/osdep.h"
 #include "qapi/error.h"
 #include "crypto/hmac.h"
 #include <gcrypt.h>
 static int qcrypto_hmac_alg_map[QCRYPTO_HASH_ALG__MAX] = {
    [QCRYPTO_HASH_ALG_MD5] = GCRY_MAC_HMAC_MD5,
    [QCRYPTO_HASH_ALG_SHA1] = GCRY_MAC_HMAC_SHA1,
    [QCRYPTO_HASH_ALG_SHA224] = GCRY_MAC_HMAC_SHA224,
    [QCRYPTO_HASH_ALG_SHA256] = GCRY_MAC_HMAC_SHA256,
    [QCRYPTO_HASH_ALG_SHA384] = GCRY_MAC_HMAC_SHA384,
    [QCRYPTO_HASH_ALG_SHA512] = GCRY_MAC_HMAC_SHA512,
    [QCRYPTO_HASH_ALG_RIPEMD160] = GCRY_MAC_HMAC_RMD160,
 };
 typedef struct QCryptoHmacGcrypt QCryptoHmacGcrypt;
 struct QCryptoHmacGcrypt {
    gcry_mac_hd_t handle;
 };
 bool qcrypto_hmac_supports(QCryptoHashAlgorithm alg)
 {
    if (alg < G_N_ELEMENTS(qcrypto_hmac_alg_map) &&
        qcrypto_hmac_alg_map[alg] != GCRY_MAC_NONE) {
        return true;
    }
    return false;
 }
 QCryptoHmac *qcrypto_hmac_new(QCryptoHashAlgorithm alg,
                              const uint8_t *key, size_t nkey,
                              Error **errp)
 {
    QCryptoHmac *hmac;
    QCryptoHmacGcrypt *ctx;
    gcry_error_t err;
    if (!qcrypto_hmac_supports(alg)) {
        error_setg(errp, "Unsupported hmac algorithm %s",
                   QCryptoHashAlgorithm_lookup[alg]);
        return NULL;
    }
    hmac = g_new0(QCryptoHmac, 1);
    hmac->alg = alg;
    ctx = g_new0(QCryptoHmacGcrypt, 1);
    err = gcry_mac_open(&ctx->handle, qcrypto_hmac_alg_map[alg],
                        GCRY_MAC_FLAG_SECURE, NULL);
    if (err != 0) {
        error_setg(errp, "Cannot initialize hmac: %s",
                   gcry_strerror(err));
        goto error;
    }
    err = gcry_mac_setkey(ctx->handle, (const void *)key, nkey);
    if (err != 0) {
        error_setg(errp, "Cannot set key: %s",
                   gcry_strerror(err));
        goto error;
    }
    hmac->opaque = ctx;
    return hmac;
 error:
    g_free(ctx);
    g_free(hmac);
    return NULL;
 }
 void qcrypto_hmac_free(QCryptoHmac *hmac)
 {
    QCryptoHmacGcrypt *ctx;
    if (!hmac) {
        return;
    }
    ctx = hmac->opaque;
    gcry_mac_close(ctx->handle);
    g_free(ctx);
    g_free(hmac);
 }
 int qcrypto_hmac_bytesv(QCryptoHmac *hmac,
                        const struct iovec *iov,
                        size_t niov,
                        uint8_t **result,
                        size_t *resultlen,
                        Error **errp)
 {
    QCryptoHmacGcrypt *ctx;
    gcry_error_t err;
    uint32_t ret;
    int i;
    ctx = hmac->opaque;
    for (i = 0; i < niov; i++) {
        gcry_mac_write(ctx->handle, iov[i].iov_base, iov[i].iov_len);
    }
    ret = gcry_mac_get_algo_maclen(qcrypto_hmac_alg_map[hmac->alg]);
    if (ret <= 0) {
        error_setg(errp, "Unable to get hmac length: %s",
                   gcry_strerror(ret));
        return -1;
    }
    if (*resultlen == 0) {
        *resultlen = ret;
        *result = g_new0(uint8_t, *resultlen);
    } else if (*resultlen != ret) {
        error_setg(errp, "Result buffer size %zu is smaller than hmac %d",
                   *resultlen, ret);
        return -1;
    }
    err = gcry_mac_read(ctx->handle, *result, resultlen);
    if (err != 0) {
        error_setg(errp, "Cannot get result: %s",
                   gcry_strerror(err));
        return -1;
    }
    err = gcry_mac_reset(ctx->handle);
    if (err != 0) {
        error_setg(errp, "Cannot reset hmac context: %s",
                   gcry_strerror(err));
        return -1;
    }
    return 0;
 }
--- a/crypto/hmac-glib.c
+++ b/crypto/hmac-glib.c
@@ -0,0 +1,166 @@
 /*
 * QEMU Crypto hmac algorithms (based on glib)
 *
 * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
 *
 * Authors:
 *    Longpeng(Mike) <longpeng2@huawei.com>
 *
 * This work is licensed under the terms of the GNU GPL, version 2 or
 * (at your option) any later version.  See the COPYING file in the
 * top-level directory.
 *
 */
 #include "qemu/osdep.h"
 #include "qapi/error.h"
 #include "crypto/hmac.h"
 /* Support for HMAC Algos has been added in GLib 2.30 */
 #if GLIB_CHECK_VERSION(2, 30, 0)
 static int qcrypto_hmac_alg_map[QCRYPTO_HASH_ALG__MAX] = {
    [QCRYPTO_HASH_ALG_MD5] = G_CHECKSUM_MD5,
    [QCRYPTO_HASH_ALG_SHA1] = G_CHECKSUM_SHA1,
    [QCRYPTO_HASH_ALG_SHA256] = G_CHECKSUM_SHA256,
 /* Support for HMAC SHA-512 in GLib 2.42 */
 #if GLIB_CHECK_VERSION(2, 42, 0)
    [QCRYPTO_HASH_ALG_SHA512] = G_CHECKSUM_SHA512,
 #else
    [QCRYPTO_HASH_ALG_SHA512] = -1,
 #endif
    [QCRYPTO_HASH_ALG_SHA224] = -1,
    [QCRYPTO_HASH_ALG_SHA384] = -1,
    [QCRYPTO_HASH_ALG_RIPEMD160] = -1,
 };
 typedef struct QCryptoHmacGlib QCryptoHmacGlib;
 struct QCryptoHmacGlib {
    GHmac *ghmac;
 };
 bool qcrypto_hmac_supports(QCryptoHashAlgorithm alg)
 {
    if (alg < G_N_ELEMENTS(qcrypto_hmac_alg_map) &&
        qcrypto_hmac_alg_map[alg] != -1) {
        return true;
    }
    return false;
 }
 QCryptoHmac *qcrypto_hmac_new(QCryptoHashAlgorithm alg,
                              const uint8_t *key, size_t nkey,
                              Error **errp)
 {
    QCryptoHmac *hmac;
    QCryptoHmacGlib *ctx;
    if (!qcrypto_hmac_supports(alg)) {
        error_setg(errp, "Unsupported hmac algorithm %s",
                   QCryptoHashAlgorithm_lookup[alg]);
        return NULL;
    }
    hmac = g_new0(QCryptoHmac, 1);
    hmac->alg = alg;
    ctx = g_new0(QCryptoHmacGlib, 1);
    ctx->ghmac = g_hmac_new(qcrypto_hmac_alg_map[alg],
                            (const uint8_t *)key, nkey);
    if (!ctx->ghmac) {
        error_setg(errp, "Cannot initialize hmac and set key");
        goto error;
    }
    hmac->opaque = ctx;
    return hmac;
 error:
    g_free(ctx);
    g_free(hmac);
    return NULL;
 }
 void qcrypto_hmac_free(QCryptoHmac *hmac)
 {
    QCryptoHmacGlib *ctx;
    if (!hmac) {
        return;
    }
    ctx = hmac->opaque;
    g_hmac_unref(ctx->ghmac);
    g_free(ctx);
    g_free(hmac);
 }
 int qcrypto_hmac_bytesv(QCryptoHmac *hmac,
                        const struct iovec *iov,
                        size_t niov,
                        uint8_t **result,
                        size_t *resultlen,
                        Error **errp)
 {
    QCryptoHmacGlib *ctx;
    int i, ret;
    ctx = hmac->opaque;
    for (i = 0; i < niov; i++) {
        g_hmac_update(ctx->ghmac, iov[i].iov_base, iov[i].iov_len);
    }
    ret = g_checksum_type_get_length(qcrypto_hmac_alg_map[hmac->alg]);
    if (ret < 0) {
        error_setg(errp, "Unable to get hmac length");
        return -1;
    }
    if (*resultlen == 0) {
        *resultlen = ret;
        *result = g_new0(uint8_t, *resultlen);
    } else if (*resultlen != ret) {
        error_setg(errp, "Result buffer size %zu is smaller than hmac %d",
                   *resultlen, ret);
        return -1;
    }
    g_hmac_get_digest(ctx->ghmac, *result, resultlen);
    return 0;
 }
 #else
 bool qcrypto_hmac_supports(QCryptoHashAlgorithm alg)
 {
    return false;
 }
 QCryptoHmac *qcrypto_hmac_new(QCryptoHashAlgorithm alg,
                              const uint8_t *key, size_t nkey,
                              Error **errp)
 {
    return NULL;
 }
 void qcrypto_hmac_free(QCryptoHmac *hmac)
 {
    return;
 }
 int qcrypto_hmac_bytesv(QCryptoHmac *hmac,
                        const struct iovec *iov,
                        size_t niov,
                        uint8_t **result,
                        size_t *resultlen,
                        Error **errp)
 {
    return -1;
 }
 #endif
--- a/crypto/hmac-nettle.c
+++ b/crypto/hmac-nettle.c
@@ -0,0 +1,175 @@
 /*
 * QEMU Crypto hmac algorithms (based on nettle)
 *
 * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
 *
 * Authors:
 *    Longpeng(Mike) <longpeng2@huawei.com>
 *
 * This work is licensed under the terms of the GNU GPL, version 2 or
 * (at your option) any later version.  See the COPYING file in the
 * top-level directory.
 *
 */
 #include "qemu/osdep.h"
 #include "qapi/error.h"
 #include "crypto/hmac.h"
 #include <nettle/hmac.h>
 typedef void (*qcrypto_nettle_hmac_setkey)(void *ctx,
              size_t key_length, const uint8_t *key);
 typedef void (*qcrypto_nettle_hmac_update)(void *ctx,
              size_t length, const uint8_t *data);
 typedef void (*qcrypto_nettle_hmac_digest)(void *ctx,
              size_t length, uint8_t *digest);
 typedef struct QCryptoHmacNettle QCryptoHmacNettle;
 struct QCryptoHmacNettle {
    union qcrypto_nettle_hmac_ctx {
        struct hmac_md5_ctx md5_ctx;
        struct hmac_sha1_ctx sha1_ctx;
        struct hmac_sha256_ctx sha256_ctx; /* equals hmac_sha224_ctx */
        struct hmac_sha512_ctx sha512_ctx; /* equals hmac_sha384_ctx */
        struct hmac_ripemd160_ctx ripemd160_ctx;
    } u;
 };
 struct qcrypto_nettle_hmac_alg {
    qcrypto_nettle_hmac_setkey setkey;
    qcrypto_nettle_hmac_update update;
    qcrypto_nettle_hmac_digest digest;
    size_t len;
 } qcrypto_hmac_alg_map[QCRYPTO_HASH_ALG__MAX] = {
    [QCRYPTO_HASH_ALG_MD5] = {
        .setkey = (qcrypto_nettle_hmac_setkey)hmac_md5_set_key,
        .update = (qcrypto_nettle_hmac_update)hmac_md5_update,
        .digest = (qcrypto_nettle_hmac_digest)hmac_md5_digest,
        .len = MD5_DIGEST_SIZE,
    },
    [QCRYPTO_HASH_ALG_SHA1] = {
        .setkey = (qcrypto_nettle_hmac_setkey)hmac_sha1_set_key,
        .update = (qcrypto_nettle_hmac_update)hmac_sha1_update,
        .digest = (qcrypto_nettle_hmac_digest)hmac_sha1_digest,
        .len = SHA1_DIGEST_SIZE,
    },
    [QCRYPTO_HASH_ALG_SHA224] = {
        .setkey = (qcrypto_nettle_hmac_setkey)hmac_sha224_set_key,
        .update = (qcrypto_nettle_hmac_update)hmac_sha224_update,
        .digest = (qcrypto_nettle_hmac_digest)hmac_sha224_digest,
        .len = SHA224_DIGEST_SIZE,
    },
    [QCRYPTO_HASH_ALG_SHA256] = {
        .setkey = (qcrypto_nettle_hmac_setkey)hmac_sha256_set_key,
        .update = (qcrypto_nettle_hmac_update)hmac_sha256_update,
        .digest = (qcrypto_nettle_hmac_digest)hmac_sha256_digest,
        .len = SHA256_DIGEST_SIZE,
    },
    [QCRYPTO_HASH_ALG_SHA384] = {
        .setkey = (qcrypto_nettle_hmac_setkey)hmac_sha384_set_key,
        .update = (qcrypto_nettle_hmac_update)hmac_sha384_update,
        .digest = (qcrypto_nettle_hmac_digest)hmac_sha384_digest,
        .len = SHA384_DIGEST_SIZE,
    },
    [QCRYPTO_HASH_ALG_SHA512] = {
        .setkey = (qcrypto_nettle_hmac_setkey)hmac_sha512_set_key,
        .update = (qcrypto_nettle_hmac_update)hmac_sha512_update,
        .digest = (qcrypto_nettle_hmac_digest)hmac_sha512_digest,
        .len = SHA512_DIGEST_SIZE,
    },
    [QCRYPTO_HASH_ALG_RIPEMD160] = {
        .setkey = (qcrypto_nettle_hmac_setkey)hmac_ripemd160_set_key,
        .update = (qcrypto_nettle_hmac_update)hmac_ripemd160_update,
        .digest = (qcrypto_nettle_hmac_digest)hmac_ripemd160_digest,
        .len = RIPEMD160_DIGEST_SIZE,
    },
 };
 bool qcrypto_hmac_supports(QCryptoHashAlgorithm alg)
 {
    if (alg < G_N_ELEMENTS(qcrypto_hmac_alg_map) &&
        qcrypto_hmac_alg_map[alg].setkey != NULL) {
        return true;
    }
    return false;
 }
 QCryptoHmac *qcrypto_hmac_new(QCryptoHashAlgorithm alg,
                              const uint8_t *key, size_t nkey,
                              Error **errp)
 {
    QCryptoHmac *hmac;
    QCryptoHmacNettle *ctx;
    if (!qcrypto_hmac_supports(alg)) {
        error_setg(errp, "Unsupported hmac algorithm %s",
                   QCryptoHashAlgorithm_lookup[alg]);
        return NULL;
    }
    hmac = g_new0(QCryptoHmac, 1);
    hmac->alg = alg;
    ctx = g_new0(QCryptoHmacNettle, 1);
    qcrypto_hmac_alg_map[alg].setkey(&ctx->u, nkey, key);
    hmac->opaque = ctx;
    return hmac;
 }
 void qcrypto_hmac_free(QCryptoHmac *hmac)
 {
    QCryptoHmacNettle *ctx;
    if (!hmac) {
        return;
    }
    ctx = hmac->opaque;
    g_free(ctx);
    g_free(hmac);
 }
 int qcrypto_hmac_bytesv(QCryptoHmac *hmac,
                        const struct iovec *iov,
                        size_t niov,
                        uint8_t **result,
                        size_t *resultlen,
                        Error **errp)
 {
    QCryptoHmacNettle *ctx;
    int i;
    ctx = (QCryptoHmacNettle *)hmac->opaque;
    for (i = 0; i < niov; ++i) {
        size_t len = iov[i].iov_len;
        uint8_t *base = iov[i].iov_base;
        while (len) {
            size_t shortlen = MIN(len, UINT_MAX);
            qcrypto_hmac_alg_map[hmac->alg].update(&ctx->u, len, base);
            len -= shortlen;
            base += len;
        }
    }
    if (*resultlen == 0) {
        *resultlen = qcrypto_hmac_alg_map[hmac->alg].len;
        *result = g_new0(uint8_t, *resultlen);
    } else if (*resultlen != qcrypto_hmac_alg_map[hmac->alg].len) {
        error_setg(errp,
                   "Result buffer size %zu is smaller than hash %zu",
                   *resultlen, qcrypto_hmac_alg_map[hmac->alg].len);
        return -1;
    }
    qcrypto_hmac_alg_map[hmac->alg].digest(&ctx->u, *resultlen, *result);
    return 0;
 }
--- a/crypto/hmac.c
+++ b/crypto/hmac.c
@@ -0,0 +1,72 @@
 /*
 * QEMU Crypto hmac algorithms
 *
 * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
 *
 * This work is licensed under the terms of the GNU GPL, version 2 or
 * (at your option) any later version.  See the COPYING file in the
 * top-level directory.
 *
 */
 #include "qemu/osdep.h"
 #include "qapi/error.h"
 #include "crypto/hmac.h"
 static const char hex[] = "0123456789abcdef";
 int qcrypto_hmac_bytes(QCryptoHmac *hmac,
                       const char *buf,
                       size_t len,
                       uint8_t **result,
                       size_t *resultlen,
                       Error **errp)
 {
    struct iovec iov = {
            .iov_base = (char *)buf,
            .iov_len = len
    };
    return qcrypto_hmac_bytesv(hmac, &iov, 1, result, resultlen, errp);
 }
 int qcrypto_hmac_digestv(QCryptoHmac *hmac,
                         const struct iovec *iov,
                         size_t niov,
                         char **digest,
                         Error **errp)
 {
    uint8_t *result = NULL;
    size_t resultlen = 0;
    size_t i;
    if (qcrypto_hmac_bytesv(hmac, iov, niov, &result, &resultlen, errp) < 0) {
        return -1;
    }
    *digest = g_new0(char, (resultlen * 2) + 1);
    for (i = 0 ; i < resultlen ; i++) {
        (*digest)[(i * 2)] = hex[(result[i] >> 4) & 0xf];
        (*digest)[(i * 2) + 1] = hex[result[i] & 0xf];
    }
    (*digest)[resultlen * 2] = '\0';
    g_free(result);
    return 0;
 }
 int qcrypto_hmac_digest(QCryptoHmac *hmac,
                        const char *buf,
                        size_t len,
                        char **digest,
                        Error **errp)
 {
    struct iovec iov = {
            .iov_base = (char *)buf,
            .iov_len = len
    };
    return qcrypto_hmac_digestv(hmac, &iov, 1, digest, errp);
 }
--- a/crypto/hmac.h
+++ b/crypto/hmac.h
@@ -0,0 +1,166 @@
 /*
 * QEMU Crypto hmac algorithms
 *
 * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
 *
 * This work is licensed under the terms of the GNU GPL, version 2 or
 * (at your option) any later version.  See the COPYING file in the
 * top-level directory.
 *
 */
 #ifndef QCRYPTO_HMAC_H
 #define QCRYPTO_HMAC_H
 #include "qapi-types.h"
 typedef struct QCryptoHmac QCryptoHmac;
 struct QCryptoHmac {
    QCryptoHashAlgorithm alg;
    void *opaque;
 };
 /**
 * qcrypto_hmac_supports:
 * @alg: the hmac algorithm
 *
 * Determine if @alg hmac algorithm is supported by
 * the current configured build
 *
 * Returns:
 *  true if the algorithm is supported, false otherwise
 */
 bool qcrypto_hmac_supports(QCryptoHashAlgorithm alg);
 /**
 * qcrypto_hmac_new:
 * @alg: the hmac algorithm
 * @key: the key bytes
 * @nkey: the length of @key
 * @errp: pointer to a NULL-initialized error object
 *
 * Creates a new hmac object with the algorithm @alg
 *
 * The @key parameter provides the bytes representing
 * the secret key to use. The @nkey parameter specifies
 * the length of @key in bytes
 *
 * Note: must use qcrypto_hmac_free() to release the
 * returned hmac object when no longer required
 *
 * Returns:
 *  a new hmac object, or NULL on error
 */
 QCryptoHmac *qcrypto_hmac_new(QCryptoHashAlgorithm alg,
                              const uint8_t *key, size_t nkey,
                              Error **errp);
 /**
 * qcrypto_hmac_free:
 * @hmac: the hmac object
 *
 * Release the memory associated with @hmac that was
 * previously allocated by qcrypto_hmac_new()
 */
 void qcrypto_hmac_free(QCryptoHmac *hmac);
 /**
 * qcrypto_hmac_bytesv:
 * @hmac: the hmac object
 * @iov: the array of memory regions to hmac
 * @niov: the length of @iov
 * @result: pointer to hold output hmac
 * @resultlen: pointer to hold length of @result
 * @errp: pointer to a NULL-initialized error object
 *
 * Computes the hmac across all the memory regions
 * present in @iov. The @result pointer will be
 * filled with raw bytes representing the computed
 * hmac, which will have length @resultlen. The
 * memory pointer in @result must be released
 * with a call to g_free() when no longer required.
 *
 * Returns:
 *  0 on success, -1 on error
 */
 int qcrypto_hmac_bytesv(QCryptoHmac *hmac,
                        const struct iovec *iov,
                        size_t niov,
                        uint8_t **result,
                        size_t *resultlen,
                        Error **errp);
 /**
 * qcrypto_hmac_bytes:
 * @hmac: the hmac object
 * @buf: the memory region to hmac
 * @len: the length of @buf
 * @result: pointer to hold output hmac
 * @resultlen: pointer to hold length of @result
 * @errp: pointer to a NULL-initialized error object
 *
 * Computes the hmac across all the memory region
 * @buf of length @len. The @result pointer will be
 * filled with raw bytes representing the computed
 * hmac, which will have length @resultlen. The
 * memory pointer in @result must be released
 * with a call to g_free() when no longer required.
 *
 * Returns:
 *  0 on success, -1 on error
 */
 int qcrypto_hmac_bytes(QCryptoHmac *hmac,
                       const char *buf,
                       size_t len,
                       uint8_t **result,
                       size_t *resultlen,
                       Error **errp);
 /**
 * qcrypto_hmac_digestv:
 * @hmac: the hmac object
 * @iov: the array of memory regions to hmac
 * @niov: the length of @iov
 * @digest: pointer to hold output hmac
 * @errp: pointer to a NULL-initialized error object
 *
 * Computes the hmac across all the memory regions
 * present in @iov. The @digest pointer will be
 * filled with the printable hex digest of the computed
 * hmac, which will be terminated by '\0'. The
 * memory pointer in @digest must be released
 * with a call to g_free() when no longer required.
 *
 * Returns:
 *  0 on success, -1 on error
 */
 int qcrypto_hmac_digestv(QCryptoHmac *hmac,
                         const struct iovec *iov,
                         size_t niov,
                         char **digest,
                         Error **errp);
 /**
 * qcrypto_hmac_digest:
 * @hmac: the hmac object
 * @buf: the memory region to hmac
 * @len: the length of @buf
 * @digest: pointer to hold output hmac
 * @errp: pointer to a NULL-initialized error object
 *
 * Computes the hmac across all the memory region
 * @buf of length @len. The @digest pointer will be
 * filled with the printable hex digest of the computed
 * hmac, which will be terminated by '\0'. The
 * memory pointer in @digest must be released
 * with a call to g_free() when no longer required.
 *
 * Returns: 0 on success, -1 on error
 */
 int qcrypto_hmac_digest(QCryptoHmac *hmac,
                        const char *buf,
                        size_t len,
                        char **digest,
                        Error **errp);
 #endif
--- a/crypto/init.c
+++ b/crypto/init.c
@@ -119,6 +119,10 @@ static struct gcry_thread_cbs qcrypto_gcrypt_thread_impl = {
 int qcrypto_init(Error **errp)
 {
 #ifdef QCRYPTO_INIT_GCRYPT_THREADS
    gcry_control(GCRYCTL_SET_THREAD_CBS, &qcrypto_gcrypt_thread_impl);
 #endif /* QCRYPTO_INIT_GCRYPT_THREADS */
 #ifdef CONFIG_GNUTLS
    int ret;
    ret = gnutls_global_init();
@@ -139,9 +143,6 @@ int qcrypto_init(Error **errp)
        error_setg(errp, "Unable to initialize gcrypt");
        return -1;
    }
 #ifdef QCRYPTO_INIT_GCRYPT_THREADS
    gcry_control(GCRYCTL_SET_THREAD_CBS, &qcrypto_gcrypt_thread_impl);
 #endif /* QCRYPTO_INIT_GCRYPT_THREADS */
    gcry_control(GCRYCTL_INITIALIZATION_FINISHED, 0);
 #endif
--- a/default-configs/hppa-linux-user.mak
+++ b/default-configs/hppa-linux-user.mak
@@ -0,0 +1 @@
 # Default configuration for hppa-linux-user
--- a/default-configs/m68k-softmmu.mak
+++ b/default-configs/m68k-softmmu.mak
@@ -1,6 +1,4 @@
 # Default configuration for m68k-softmmu
 include pci.mak
 include usb.mak
 CONFIG_COLDFIRE=y
 CONFIG_PTIMER=y
--- a/default-configs/mips-softmmu-common.mak
+++ b/default-configs/mips-softmmu-common.mak
@@ -17,6 +17,7 @@ CONFIG_FDC=y
 CONFIG_ACPI=y
 CONFIG_ACPI_X86=y
 CONFIG_ACPI_MEMORY_HOTPLUG=y
 CONFIG_ACPI_NVDIMM=y
 CONFIG_ACPI_CPU_HOTPLUG=y
 CONFIG_APM=y
 CONFIG_I8257=y
--- a/default-configs/nios2-linux-user.mak
+++ b/default-configs/nios2-linux-user.mak
@@ -0,0 +1 @@
 # Default configuration for nios2-linux-user
--- a/default-configs/nios2-softmmu.mak
+++ b/default-configs/nios2-softmmu.mak
@@ -0,0 +1,6 @@
 # Default configuration for nios2-softmmu
 CONFIG_NIOS2=y
 CONFIG_SERIAL=y
 CONFIG_PTIMER=y
 CONFIG_ALTERA_TIMER=y
--- a/default-configs/ppc64-softmmu.mak
+++ b/default-configs/ppc64-softmmu.mak
@@ -39,6 +39,7 @@ CONFIG_I8259=y
 CONFIG_XILINX=y
 CONFIG_XILINX_ETHLITE=y
 CONFIG_PSERIES=y
 CONFIG_POWERNV=y
 CONFIG_PREP=y
 CONFIG_MAC=y
 CONFIG_E500=y
--- a/Show More
+++ b/Show More
		`@@ -0,0 +1 @@`
							`# Default configuration for hppa-linux-user`
		`@@ -0,0 +1 @@`
							`# Default configuration for nios2-linux-user`