ui: add ability to specify multiple VNC listen addresses

This change allows the listen address and websocket address options for -vnc to be repeated. This causes the VNC server to listen on multiple addresses. e.g. $ $QEMU -vnc vnc=localhost:1,vnc=unix:/tmp/vnc,\ websocket=127.0.0.1:8080,websocket=[::]:8081 results in listening on 127.0.0.1:5901, 127.0.0.1:8080, ::1:5901, :::8081 & /tmp/vnc Reviewed-by: Eric Blake <eblake@redhat.com> Signed-off-by: Daniel P. Berrange <berrange@redhat.com> Message-id: 20170203120649.15637-9-berrange@redhat.com Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
util: add iterators for QemuOpts values
2017-02-09 17:28:49 +01:00 · 2017-02-09 17:28:49 +01:00 · 2017-02-09 17:28:49 +01:00 · 2017-02-09 17:28:49 +01:00 · 2017-02-09 17:28:45 +01:00 · 2017-02-08 14:59:37 +01:00
1720 changed files with 91604 additions and 33645 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -6,18 +6,12 @@
 /config.status
 /config-temp
 /trace-events-all
-/trace/generated-tracers.h
-/trace/generated-tracers.c
-/trace/generated-tracers-dtrace.h
-/trace/generated-tracers.dtrace
 /trace/generated-events.h
 /trace/generated-events.c
 /trace/generated-helpers-wrappers.h
 /trace/generated-helpers.h
 /trace/generated-helpers.c
 /trace/generated-tcg-tracers.h
-/trace/generated-ust-provider.h
-/trace/generated-ust.c
 /ui/shader/texture-blit-frag.h
 /ui/shader/texture-blit-vert.h
 *-timestamp
@@ -40,6 +34,7 @@
 /qmp-marshal.c
 /qemu-doc.html
 /qemu-doc.info
+/qemu-doc.txt
 /qemu-img
 /qemu-nbd
 /qemu-options.def
@@ -60,7 +55,6 @@
 *.a
 *.aux
 *.cp
-*.dvi
 *.exe
 *.msi
 *.dll
@@ -82,10 +76,6 @@
 *.d
 !/scripts/qemu-guest-agent/fsfreeze-hook.d
 *.o
-*.lo
-*.la
-*.pc
-.libs
 .sdk
 *.gcda
 *.gcno
@@ -109,9 +99,34 @@
 /pc-bios/optionrom/kvmvapic.img
 /pc-bios/s390-ccw/s390-ccw.elf
 /pc-bios/s390-ccw/s390-ccw.img
+/docs/qemu-ga-ref.html
+/docs/qemu-ga-ref.txt
+/docs/qemu-qmp-ref.html
+/docs/qemu-qmp-ref.txt
+docs/qemu-ga-ref.info*
+docs/qemu-qmp-ref.info*
+/qemu-ga-qapi.texi
+/qemu-qapi.texi
+*.tps
 .stgit-*
 cscope.*
 tags
 TAGS
 docker-src.*
 *~
+trace.h
+trace.c
+trace-ust.h
+trace-ust.h
+trace-dtrace.h
+trace-dtrace.dtrace
+trace-root.h
+trace-root.c
+trace-ust-root.h
+trace-ust-root.h
+trace-ust-all.h
+trace-ust-all.c
+trace-dtrace-root.h
+trace-dtrace-root.dtrace
+trace-ust-all.h
+trace-ust-all.c
--- a/.gitmodules
+++ b/.gitmodules
@@ -31,3 +31,6 @@
 [submodule "roms/u-boot"]
 	path = roms/u-boot
 	url = git://git.qemu-project.org/u-boot.git
+[submodule "roms/skiboot"]
+	path = roms/skiboot
+	url = git://git.qemu.org/skiboot.git
--- a/.travis.yml
+++ b/.travis.yml
@@ -4,7 +4,6 @@ python:
  - "2.4"
 compiler:
  - gcc
-  - clang
 cache: ccache
 addons:
  apt:
@@ -68,6 +67,9 @@ script:
  - make -j3 && ${TEST_CMD}
 matrix:
  include:
+    # Test with CLang for compile portability
+    - env: CONFIG=""
+      compiler: clang
    # gprof/gcov are GCC features
    - env: CONFIG="--enable-gprof --enable-gcov --disable-pie"
      compiler: gcc
@@ -101,6 +103,26 @@ matrix:
        - sudo apt-get build-dep -qq qemu
        - wget -O - http://people.linaro.org/~alex.bennee/qemu-submodule-git-seed.tar.xz | tar -xvJ
        - git submodule update --init --recursive
+    # Trusty build with latest stable clang
+    - env: CONFIG=""
+      sudo: required
+      addons:
+      dist: trusty
+      language: generic
+      compiler: none
+      env:
+        - COMPILER_NAME=clang CXX=clang++-3.9 CC=clang-3.9
+        - CONFIG="--cc=clang-3.9 --cxx=clang++-3.9"
+      before_install:
+        - wget -nv -O - http://llvm.org/apt/llvm-snapshot.gpg.key | sudo apt-key add -
+        - sudo apt-add-repository -y 'deb http://llvm.org/apt/trusty llvm-toolchain-trusty-3.9 main'
+        - sudo apt-get update -qq
+        - sudo apt-get install -qq -y clang-3.9
+        - sudo apt-get build-dep -qq qemu
+        - wget -O - http://people.linaro.org/~alex.bennee/qemu-submodule-git-seed.tar.xz | tar -xvJ
+        - git submodule update --init --recursive
+      before_script:
+        - ./configure ${CONFIG} || cat config.log
    # Using newer GCC with sanitizers
    - addons:
        apt:
--- a/18
+++ b/18
@@ -1,10 +1,28 @@
 1. Preprocessor

+1.1. Variadic macros
+
 For variadic macros, stick with this C99-like syntax:

 #define DPRINTF(fmt, ...)                                       \
    do { printf("IRQ: " fmt, ## __VA_ARGS__); } while (0)

+1.2. Include directives
+
+Order include directives as follows:
+
+#include "qemu/osdep.h"  /* Always first... */
+#include <...>           /* then system headers... */
+#include "..."           /* and finally QEMU headers. */
+
+The "qemu/osdep.h" header contains preprocessor macros that affect the behavior
+of core system headers like <stdint.h>.  It must be the first include so that
+core system headers included by external libraries get the preprocessor macros
+that QEMU depends on.
+
+Do not include "qemu/osdep.h" from header files since the .c file will have
+already included it.
+
 2. C types

 It should be common sense to use the right type, but we have collected
--- a/190
+++ b/190
@@ -63,6 +63,17 @@ W: http://wiki.qemu.org/SecurityProcess
 M: Michael S. Tsirkin <mst@redhat.com>
 L: secalert@redhat.com

+Trivial patches
+---------------
+Trivial patches
+M: Michael Tokarev <mjt@tls.msk.ru>
+M: Laurent Vivier <laurent@vivier.eu>
+S: Maintained
+L: qemu-trivial@nongnu.org
+K: ^Subject:.*(?i)trivial
+T: git git://git.corpit.ru/qemu.git trivial-patches
+T: git git://github.com/vivier/qemu.git trivial-patches
+
 Guest CPU cores (TCG):
 ----------------------
 Overall
@@ -95,7 +106,7 @@ F: include/fpu/
 Alpha
 M: Richard Henderson <rth@twiddle.net>
 S: Maintained
-F: target-alpha/
+F: target/alpha/
 F: hw/alpha/
 F: tests/tcg/alpha/
 F: disas/alpha.c
@@ -104,9 +115,10 @@ ARM
 M: Peter Maydell <peter.maydell@linaro.org>
 L: qemu-arm@nongnu.org
 S: Maintained
-F: target-arm/
+F: target/arm/
 F: hw/arm/
 F: hw/cpu/a*mpcore.c
+F: include/hw/cpu/a*mpcore.h
 F: disas/arm.c
 F: disas/arm-a64.cc
 F: disas/libvixl/
@@ -114,16 +126,22 @@ F: disas/libvixl/
 CRIS
 M: Edgar E. Iglesias <edgar.iglesias@gmail.com>
 S: Maintained
-F: target-cris/
+F: target/cris/
 F: hw/cris/
 F: include/hw/cris/
 F: tests/tcg/cris/
 F: disas/cris.c

+HPPA (PA-RISC)
+M: Richard Henderson <rth@twiddle.net>
+S: Maintained
+F: target/hppa/
+F: disas/hppa.c
+
 LM32
 M: Michael Walle <michael@walle.cc>
 S: Maintained
-F: target-lm32/
+F: target/lm32/
 F: disas/lm32.c
 F: hw/lm32/
 F: hw/*/lm32_*
@@ -133,14 +151,15 @@ F: include/hw/lm32/
 F: tests/tcg/lm32/

 M68K
-S: Orphan
-F: target-m68k/
-F: hw/m68k/
+M: Laurent Vivier <laurent@vivier.eu>
+S: Maintained
+F: target/m68k/
+F: disas/m68k.c

 MicroBlaze
 M: Edgar E. Iglesias <edgar.iglesias@gmail.com>
 S: Maintained
-F: target-microblaze/
+F: target/microblaze/
 F: hw/microblaze/
 F: disas/microblaze.c

@@ -148,7 +167,7 @@ MIPS
 M: Aurelien Jarno <aurelien@aurel32.net>
 M: Yongbok Kim <yongbok.kim@imgtec.com>
 S: Maintained
-F: target-mips/
+F: target/mips/
 F: hw/mips/
 F: hw/misc/mips_*
 F: hw/intc/mips_gic.c
@@ -163,15 +182,23 @@ F: disas/mips.c
 Moxie
 M: Anthony Green <green@moxielogic.com>
 S: Maintained
-F: target-moxie/
+F: target/moxie/
 F: disas/moxie.c
 F: hw/moxie/
 F: default-configs/moxie-softmmu.mak

+NiosII
+M: Chris Wulff <crwulff@gmail.com>
+M: Marek Vasut <marex@denx.de>
+S: Maintained
+F: target/nios2/
+F: hw/nios2/
+F: disas/nios2.c
+
 OpenRISC
 M: Jia Liu <proljc@gmail.com>
 S: Maintained
-F: target-openrisc/
+F: target/openrisc/
 F: hw/openrisc/
 F: tests/tcg/openrisc/

@@ -180,7 +207,7 @@ M: David Gibson <david@gibson.dropbear.id.au>
 M: Alexander Graf <agraf@suse.de>
 L: qemu-ppc@nongnu.org
 S: Maintained
-F: target-ppc/
+F: target/ppc/
 F: hw/ppc/
 F: include/hw/ppc/
 F: disas/ppc.c
@@ -189,14 +216,14 @@ S390
 M: Richard Henderson <rth@twiddle.net>
 M: Alexander Graf <agraf@suse.de>
 S: Maintained
-F: target-s390x/
+F: target/s390x/
 F: hw/s390x/
 F: disas/s390.c

 SH4
 M: Aurelien Jarno <aurelien@aurel32.net>
 S: Odd Fixes
-F: target-sh4/
+F: target/sh4/
 F: hw/sh4/
 F: disas/sh4.c
 F: include/hw/sh4/
@@ -205,7 +232,7 @@ SPARC
 M: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
 M: Artyom Tarasenko <atar4qemu@gmail.com>
 S: Maintained
-F: target-sparc/
+F: target/sparc/
 F: hw/sparc/
 F: hw/sparc64/
 F: disas/sparc.c
@@ -213,7 +240,7 @@ F: disas/sparc.c
 UniCore32
 M: Guan Xuetao <gxt@mprc.pku.edu.cn>
 S: Maintained
-F: target-unicore32/
+F: target/unicore32/
 F: hw/unicore32/
 F: include/hw/unicore32/

@@ -222,7 +249,7 @@ M: Paolo Bonzini <pbonzini@redhat.com>
 M: Richard Henderson <rth@twiddle.net>
 M: Eduardo Habkost <ehabkost@redhat.com>
 S: Maintained
-F: target-i386/
+F: target/i386/
 F: hw/i386/
 F: disas/i386.c

@@ -230,14 +257,14 @@ Xtensa
 M: Max Filippov <jcmvbkbc@gmail.com>
 W: http://wiki.osll.spb.ru/doku.php?id=etc:users:jcmvbkbc:qemu-target-xtensa
 S: Maintained
-F: target-xtensa/
+F: target/xtensa/
 F: hw/xtensa/
 F: tests/tcg/xtensa/

 TriCore
 M: Bastian Koppelmann <kbastian@mail.uni-paderborn.de>
 S: Maintained
-F: target-tricore/
+F: target/tricore/
 F: hw/tricore/
 F: include/hw/tricore/

@@ -256,26 +283,26 @@ ARM
 M: Peter Maydell <peter.maydell@linaro.org>
 L: qemu-arm@nongnu.org
 S: Maintained
-F: target-arm/kvm.c
+F: target/arm/kvm.c

 MIPS
 M: James Hogan <james.hogan@imgtec.com>
 S: Maintained
-F: target-mips/kvm.c
+F: target/mips/kvm.c

 PPC
 M: Alexander Graf <agraf@suse.de>
 S: Maintained
-F: target-ppc/kvm.c
+F: target/ppc/kvm.c

 S390
 M: Christian Borntraeger <borntraeger@de.ibm.com>
 M: Cornelia Huck <cornelia.huck@de.ibm.com>
 M: Alexander Graf <agraf@suse.de>
 S: Maintained
-F: target-s390x/kvm.c
-F: target-s390x/ioinst.[ch]
-F: target-s390x/machine.c
+F: target/s390x/kvm.c
+F: target/s390x/ioinst.[ch]
+F: target/s390x/machine.c
 F: hw/intc/s390_flic.c
 F: hw/intc/s390_flic_kvm.c
 F: include/hw/s390x/s390_flic.h
@@ -288,7 +315,7 @@ M: Paolo Bonzini <pbonzini@redhat.com>
 M: Marcelo Tosatti <mtosatti@redhat.com>
 L: kvm@vger.kernel.org
 S: Supported
-F: target-i386/kvm.c
+F: target/i386/kvm.c

 Guest CPU Cores (Xen):
 ----------------------
@@ -296,7 +323,7 @@ Guest CPU Cores (Xen):
 X86
 M: Stefano Stabellini <sstabellini@kernel.org>
 M: Anthony Perard <anthony.perard@citrix.com>
-L: xen-devel@lists.xensource.com
+L: xen-devel@lists.xenproject.org
 S: Supported
 F: xen-*
 F: */xen*
@@ -408,6 +435,7 @@ M: Peter Chubb <peter.chubb@nicta.com.au>
 L: qemu-arm@nongnu.org
 S: Odd fixes
 F: hw/*/imx*
+F: include/hw/*/imx*
 F: hw/arm/kzm.c
 F: include/hw/arm/fsl-imx31.h

@@ -416,6 +444,7 @@ M: Peter Maydell <peter.maydell@linaro.org>
 L: qemu-arm@nongnu.org
 S: Maintained
 F: hw/arm/integratorcp.c
+F: hw/misc/arm_integrator_debug.c

 Musicpal
 M: Jan Kiszka <jan.kiszka@web.de>
@@ -440,6 +469,7 @@ M: Peter Maydell <peter.maydell@linaro.org>
 L: qemu-arm@nongnu.org
 S: Maintained
 F: hw/arm/realview*
+F: hw/cpu/realview_mpcore.c
 F: hw/intc/realview_gic.c
 F: include/hw/intc/realview_gic.h

@@ -452,6 +482,7 @@ F: hw/arm/spitz.c
 F: hw/arm/tosa.c
 F: hw/arm/z2.c
 F: hw/*/pxa2xx*
+F: hw/misc/mst_fpga.c
 F: include/hw/arm/pxa.h

 Stellaris
@@ -473,7 +504,8 @@ L: qemu-arm@nongnu.org
 S: Maintained
 F: hw/*/xilinx_*
 F: hw/*/cadence_*
-F: hw/misc/zynq_slcr.c
+F: hw/misc/zynq*
+F: include/hw/misc/zynq*
 X: hw/ssi/xilinx_*

 Xilinx ZynqMP
@@ -490,7 +522,6 @@ M: Shannon Zhao <shannon.zhao@linaro.org>
 L: qemu-arm@nongnu.org
 S: Maintained
 F: hw/arm/virt-acpi-build.c
-F: include/hw/arm/virt-acpi-build.h

 STM32F205
 M: Alistair Francis <alistair@alistair23.me>
@@ -532,6 +563,7 @@ M68K Machines
 an5206
 S: Orphan
 F: hw/m68k/an5206.c
+F: hw/m68k/mcf5206.c

 dummy_m68k
 S: Orphan
@@ -540,6 +572,9 @@ F: hw/m68k/dummy_m68k.c
 mcf5208
 S: Orphan
 F: hw/m68k/mcf5208.c
+F: hw/m68k/mcf_intc.c
+F: hw/char/mcf_uart.c
+F: hw/net/mcf_fec.c

 MicroBlaze Machines
 -------------------
@@ -636,10 +671,13 @@ F: hw/misc/macio/
 F: hw/intc/heathrow_pic.c

 PReP
+M: Hervé Poussineau <hpoussin@reactos.org>
 L: qemu-devel@nongnu.org
 L: qemu-ppc@nongnu.org
-S: Odd Fixes
+S: Maintained
 F: hw/ppc/prep.c
+F: hw/ppc/prep_systemio.c
+F: hw/ppc/rs6000_mc.c
 F: hw/pci-host/prep.[hc]
 F: hw/isa/pc87312.[hc]
 F: pc-bios/ppc_rom.bin
@@ -656,6 +694,7 @@ F: include/hw/*/xics*
 F: pc-bios/spapr-rtas/*
 F: pc-bios/spapr-rtas.bin
 F: pc-bios/slof.bin
+F: pc-bios/skiboot.lid
 F: docs/specs/ppc-spapr-hcalls.txt
 F: docs/specs/ppc-spapr-hotplug.txt
 F: tests/spapr*
@@ -691,6 +730,8 @@ S: Maintained
 F: hw/sparc/sun4m.c
 F: hw/dma/sparc32_dma.c
 F: hw/dma/sun4m_iommu.c
+F: hw/misc/eccmemctl.c
+F: hw/misc/slavio_misc.c
 F: include/hw/sparc/sparc32_dma.h
 F: include/hw/sparc/sun4m.h
 F: pc-bios/openbios-sparc32
@@ -701,6 +742,13 @@ S: Maintained
 F: hw/sparc64/sun4u.c
 F: pc-bios/openbios-sparc64

+Sun4v
+M: Artyom Tarasenko <atar4qemu@gmail.com>
+S: Maintained
+F: hw/sparc64/sun4v.c
+F: hw/timer/sun4v-rtc.c
+F: include/hw/timer/sun4v-rtc.h
+
 Leon3
 M: Fabien Chouteau <chouteau@adacore.com>
 S: Maintained
@@ -782,6 +830,7 @@ M: Eduardo Habkost <ehabkost@redhat.com>
 M: Marcel Apfelbaum <marcel@redhat.com>
 S: Supported
 F: hw/core/machine.c
+F: hw/core/null-machine.c
 F: include/hw/boards.h

 Xtensa Machines
@@ -860,7 +909,6 @@ F: hw/acpi/*
 F: hw/smbios/*
 F: hw/i386/acpi-build.[hc]
 F: hw/arm/virt-acpi-build.c
-F: include/hw/arm/virt-acpi-build.h

 ppc4xx
 M: Alexander Graf <agraf@suse.de>
@@ -1003,6 +1051,13 @@ F: include/sysemu/rng*.h
 F: backends/rng*.c
 F: tests/virtio-rng-test.c

+virtio-crypto
+M: Gonglei <arei.gonglei@huawei.com>
+S: Supported
+F: hw/virtio/virtio-crypto.c
+F: hw/virtio/virtio-crypto-pci.c
+F: include/hw/virtio/virtio-crypto.h
+
 nvme
 M: Keith Busch <keith.busch@intel.com>
 L: qemu-block@nongnu.org
@@ -1061,6 +1116,13 @@ S: Maintained
 F: hw/core/generic-loader.c
 F: include/hw/core/generic-loader.h

+CHRP NVRAM
+M: Thomas Huth <thuth@redhat.com>
+S: Maintained
+F: hw/nvram/chrp_nvram.c
+F: include/hw/nvram/chrp_nvram.h
+F: tests/prom-env-test.c
+
 Subsystems
 ----------
 Audio
@@ -1119,10 +1181,25 @@ F: block/qapi.c
 F: qapi/block*.json
 T: git git://repo.or.cz/qemu/armbru.git block-next

+Dirty Bitmaps
+M: Fam Zheng <famz@redhat.com>
+M: John Snow <jsnow@redhat.com>
+L: qemu-block@nongnu.org
+S: Supported
+F: util/hbitmap.c
+F: block/dirty-bitmap.c
+F: include/qemu/hbitmap.h
+F: include/block/dirty-bitmap.h
+F: tests/test-hbitmap.c
+F: docs/bitmaps.md
+T: git git://github.com/famz/qemu.git bitmaps
+T: git git://github.com/jnsnow/qemu.git bitmaps
+
 Character device backends
 M: Paolo Bonzini <pbonzini@redhat.com>
+M: Marc-André Lureau <marcandre.lureau@redhat.com>
 S: Maintained
-F: qemu-char.c
+F: chardev/
 F: backends/msmouse.c
 F: backends/testdev.c

@@ -1248,6 +1325,12 @@ S: Maintained
 F: backends/hostmem*.c
 F: include/sysemu/hostmem.h

+Cryptodev Backends
+M: Gonglei <arei.gonglei@huawei.com>
+S: Maintained
+F: include/sysemu/cryptodev*.h
+F: backends/cryptodev*.c
+
 QAPI
 M: Markus Armbruster <armbru@redhat.com>
 M: Michael Roth <mdroth@linux.vnet.ibm.com>
@@ -1349,6 +1432,7 @@ F: scripts/checkpatch.pl
 Migration
 M: Juan Quintela <quintela@redhat.com>
 M: Amit Shah <amit.shah@redhat.com>
+M: Dr. David Alan Gilbert <dgilbert@redhat.com>
 S: Maintained
 F: include/migration/
 F: migration/
@@ -1413,6 +1497,14 @@ F: util/uuid.c
 F: include/qemu/uuid.h
 F: tests/test-uuid.c

+COLO Framework
+M: zhanghailiang <zhang.zhanghailiang@huawei.com>
+S: Maintained
+F: migration/colo*
+F: include/migration/colo.h
+F: include/migration/failover.h
+F: docs/COLO-FT.txt
+
 COLO Proxy
 M: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
 M: Li Zhijian <lizhijian@cn.fujitsu.com>
@@ -1429,6 +1521,7 @@ M: Riku Voipio <riku.voipio@iki.fi>
 S: Maintained
 F: thunk.c
 F: user-exec.c
+F: user-exec-stub.c

 BSD user
 S: Orphan
@@ -1483,8 +1576,8 @@ F: tcg/mips/
 F: disas/mips.c

 PPC
-M: Vassili Karpov (malc) <av1474@comtv.ru>
-S: Maintained
+M: Richard Henderson <rth@twiddle.net>
+S: Odd Fixes
 F: tcg/ppc/
 F: disas/ppc.c

@@ -1507,28 +1600,6 @@ F: tcg/tci/
 F: tci.c
 F: disas/tci.c

-Stable branches
---------------
-Stable 1.0
-L: qemu-stable@nongnu.org
-T: git git://git.qemu-project.org/qemu-stable-1.0.git
-S: Orphan
-
-Stable 0.15
-L: qemu-stable@nongnu.org
-T: git git://git.qemu-project.org/qemu-stable-0.15.git
-S: Orphan
-
-Stable 0.14
-L: qemu-stable@nongnu.org
-T: git git://git.qemu-project.org/qemu-stable-0.14.git
-S: Orphan
-
-Stable 0.10
-L: qemu-stable@nongnu.org
-T: git git://git.qemu-project.org/qemu-stable-0.10.git
-S: Orphan
-
 Block drivers
 -------------
 VMDK
@@ -1575,6 +1646,7 @@ M: Peter Lieven <pl@kamp.de>
 L: qemu-block@nongnu.org
 S: Supported
 F: block/iscsi.c
+F: block/iscsi-opts.c

 NFS
 M: Jeff Cody <jcody@redhat.com>
@@ -1675,9 +1747,9 @@ L: qemu-block@nongnu.org
 S: Supported
 F: block/linux-aio.c
 F: include/block/raw-aio.h
-F: block/raw-posix.c
-F: block/raw-win32.c
-F: block/raw_bsd.c
+F: block/raw-format.c
+F: block/file-posix.c
+F: block/file-win32.c
 F: block/win32-aio.c

 qcow2
--- a/266
+++ b/266
@@ -56,32 +56,143 @@ GENERATED_SOURCES += qmp-marshal.c qapi-types.c qapi-visit.c qapi-event.c
 GENERATED_HEADERS += qmp-introspect.h
 GENERATED_SOURCES += qmp-introspect.c

-GENERATED_HEADERS += trace/generated-tracers.h
-ifeq ($(findstring dtrace,$(TRACE_BACKENDS)),dtrace)
-GENERATED_HEADERS += trace/generated-tracers-dtrace.h
-endif
-GENERATED_SOURCES += trace/generated-tracers.c
-
 GENERATED_HEADERS += trace/generated-tcg-tracers.h

 GENERATED_HEADERS += trace/generated-helpers-wrappers.h
 GENERATED_HEADERS += trace/generated-helpers.h
 GENERATED_SOURCES += trace/generated-helpers.c

-ifeq ($(findstring ust,$(TRACE_BACKENDS)),ust)
-GENERATED_HEADERS += trace/generated-ust-provider.h
-GENERATED_SOURCES += trace/generated-ust.c
+ifdef CONFIG_TRACE_UST
+GENERATED_HEADERS += trace-ust-all.h
+GENERATED_SOURCES += trace-ust-all.c
 endif

 GENERATED_HEADERS += module_block.h

+TRACE_HEADERS = trace-root.h $(trace-events-subdirs:%=%/trace.h)
+TRACE_SOURCES = trace-root.c $(trace-events-subdirs:%=%/trace.c)
+TRACE_DTRACE =
+ifdef CONFIG_TRACE_DTRACE
+TRACE_HEADERS += trace-dtrace-root.h $(trace-events-subdirs:%=%/trace-dtrace.h)
+TRACE_DTRACE += trace-dtrace-root.dtrace $(trace-events-subdirs:%=%/trace-dtrace.dtrace)
+endif
+ifdef CONFIG_TRACE_UST
+TRACE_HEADERS += trace-ust-root.h $(trace-events-subdirs:%=%/trace-ust.h)
+endif
+
+GENERATED_HEADERS += $(TRACE_HEADERS)
+GENERATED_SOURCES += $(TRACE_SOURCES)
+
+trace-group-name = $(shell dirname $1 | sed -e 's/[^a-zA-Z0-9]/_/g')
+
+%/trace.h: %/trace.h-timestamp
+	@cmp $< $@ >/dev/null 2>&1 || cp $< $@
+%/trace.h-timestamp: $(SRC_PATH)/%/trace-events $(tracetool-y)
+	$(call quiet-command,$(TRACETOOL) \
+		--group=$(call trace-group-name,$@) \
+		--format=h \
+		--backends=$(TRACE_BACKENDS) \
+		$< > $@,"GEN","$(@:%-timestamp=%)")
+
+%/trace.c: %/trace.c-timestamp
+	@cmp $< $@ >/dev/null 2>&1 || cp $< $@
+%/trace.c-timestamp: $(SRC_PATH)/%/trace-events $(tracetool-y)
+	$(call quiet-command,$(TRACETOOL) \
+		--group=$(call trace-group-name,$@) \
+		--format=c \
+		--backends=$(TRACE_BACKENDS) \
+		$< > $@,"GEN","$(@:%-timestamp=%)")
+
+%/trace-ust.h: %/trace-ust.h-timestamp
+	@cmp $< $@ >/dev/null 2>&1 || cp $< $@
+%/trace-ust.h-timestamp: $(SRC_PATH)/%/trace-events $(tracetool-y)
+	$(call quiet-command,$(TRACETOOL) \
+		--group=$(call trace-group-name,$@) \
+		--format=ust-events-h \
+		--backends=$(TRACE_BACKENDS) \
+		$< > $@,"GEN","$(@:%-timestamp=%)")
+
+%/trace-dtrace.dtrace: %/trace-dtrace.dtrace-timestamp
+	@cmp $< $@ >/dev/null 2>&1 || cp $< $@
+%/trace-dtrace.dtrace-timestamp: $(SRC_PATH)/%/trace-events $(BUILD_DIR)/config-host.mak $(tracetool-y)
+	$(call quiet-command,$(TRACETOOL) \
+		--group=$(call trace-group-name,$@) \
+		--format=d \
+		--backends=$(TRACE_BACKENDS) \
+		$< > $@,"GEN","$(@:%-timestamp=%)")
+
+%/trace-dtrace.h: %/trace-dtrace.dtrace $(tracetool-y)
+	$(call quiet-command,dtrace -o $@ -h -s $<, "GEN","$@")
+
+%/trace-dtrace.o: %/trace-dtrace.dtrace $(tracetool-y)
+
+
+trace-root.h: trace-root.h-timestamp
+	@cmp $< $@ >/dev/null 2>&1 || cp $< $@
+trace-root.h-timestamp: $(SRC_PATH)/trace-events $(tracetool-y)
+	$(call quiet-command,$(TRACETOOL) \
+		--group=root \
+		--format=h \
+		--backends=$(TRACE_BACKENDS) \
+		$< > $@,"GEN","$(@:%-timestamp=%)")
+
+trace-root.c: trace-root.c-timestamp
+	@cmp $< $@ >/dev/null 2>&1 || cp $< $@
+trace-root.c-timestamp: $(SRC_PATH)/trace-events $(tracetool-y)
+	$(call quiet-command,$(TRACETOOL) \
+		--group=root \
+		--format=c \
+		--backends=$(TRACE_BACKENDS) \
+		$< > $@,"GEN","$(@:%-timestamp=%)")
+
+trace-ust-root.h: trace-ust-root.h-timestamp
+	@cmp $< $@ >/dev/null 2>&1 || cp $< $@
+trace-ust-root.h-timestamp: $(SRC_PATH)/trace-events $(tracetool-y)
+	$(call quiet-command,$(TRACETOOL) \
+		--group=root \
+		--format=ust-events-h \
+		--backends=$(TRACE_BACKENDS) \
+		$< > $@,"GEN","$(@:%-timestamp=%)")
+
+trace-ust-all.h: trace-ust-all.h-timestamp
+	@cmp $< $@ >/dev/null 2>&1 || cp $< $@
+trace-ust-all.h-timestamp: $(trace-events-files) $(tracetool-y)
+	$(call quiet-command,$(TRACETOOL) \
+		--group=all \
+		--format=ust-events-h \
+		--backends=$(TRACE_BACKENDS) \
+		$(trace-events-files) > $@,"GEN","$(@:%-timestamp=%)")
+
+trace-ust-all.c: trace-ust-all.c-timestamp
+	@cmp $< $@ >/dev/null 2>&1 || cp $< $@
+trace-ust-all.c-timestamp: $(trace-events-files) $(tracetool-y)
+	$(call quiet-command,$(TRACETOOL) \
+		--group=all \
+		--format=ust-events-c \
+		--backends=$(TRACE_BACKENDS) \
+		$(trace-events-files) > $@,"GEN","$(@:%-timestamp=%)")
+
+trace-dtrace-root.dtrace: trace-dtrace-root.dtrace-timestamp
+	@cmp $< $@ >/dev/null 2>&1 || cp $< $@
+trace-dtrace-root.dtrace-timestamp: $(SRC_PATH)/trace-events $(BUILD_DIR)/config-host.mak $(tracetool-y)
+	$(call quiet-command,$(TRACETOOL) \
+		--group=root \
+		--format=d \
+		--backends=$(TRACE_BACKENDS) \
+		$< > $@,"GEN","$(@:%-timestamp=%)")
+
+trace-dtrace-root.h: trace-dtrace-root.dtrace
+	$(call quiet-command,dtrace -o $@ -h -s $<, "GEN","$@")
+
+trace-dtrace-root.o: trace-dtrace-root.dtrace
+
 # Don't try to regenerate Makefile or configure
 # We don't generate any of them
 Makefile: ;
 configure: ;

-.PHONY: all clean cscope distclean dvi html info install install-doc \
-	pdf recurse-all speed test dist msi FORCE
+.PHONY: all clean cscope distclean html info install install-doc \
+	pdf txt recurse-all speed test dist msi FORCE

 $(call set-vpath, $(SRC_PATH))

@@ -90,7 +201,9 @@ LIBS+=-lz $(LIBS_TOOLS)
 HELPERS-$(CONFIG_LINUX) = qemu-bridge-helper$(EXESUF)

 ifdef BUILD_DOCS
-DOCS=qemu-doc.html qemu.1 qemu-img.1 qemu-nbd.8 qemu-ga.8
+DOCS=qemu-doc.html qemu-doc.txt qemu.1 qemu-img.1 qemu-nbd.8 qemu-ga.8
+DOCS+=docs/qemu-qmp-ref.html docs/qemu-qmp-ref.txt docs/qemu-qmp-ref.7
+DOCS+=docs/qemu-ga-ref.html docs/qemu-ga-ref.txt docs/qemu-ga-ref.7
 ifdef CONFIG_VIRTFS
 DOCS+=fsdev/virtfs-proxy-helper.1
 endif
@@ -145,10 +258,12 @@ endif

 dummy := $(call unnest-vars,, \
                stub-obj-y \
+                chardev-obj-y \
                util-obj-y \
                qga-obj-y \
                ivshmem-client-obj-y \
                ivshmem-server-obj-y \
+                libvhost-user-obj-y \
                qga-vss-dll-obj-y \
                block-obj-y \
                block-obj-m \
@@ -157,7 +272,8 @@ dummy := $(call unnest-vars,, \
                qom-obj-y \
                io-obj-y \
                common-obj-y \
-                common-obj-m)
+                common-obj-m \
+                trace-obj-y)

 ifneq ($(wildcard config-host.mak),)
 include $(SRC_PATH)/tests/Makefile.include
@@ -220,7 +336,8 @@ subdir-dtc:dtc/libfdt dtc/tests
 dtc/%:
 	mkdir -p $@

-$(SUBDIR_RULES): libqemuutil.a libqemustub.a $(common-obj-y) $(qom-obj-y) $(crypto-aes-obj-$(CONFIG_USER_ONLY))
+$(SUBDIR_RULES): libqemuutil.a libqemustub.a $(common-obj-y) $(chardev-obj-y) \
+	$(qom-obj-y) $(crypto-aes-obj-$(CONFIG_USER_ONLY)) $(trace-obj-y)

 ROMSUBDIR_RULES=$(patsubst %,romsubdir-%, $(ROMS))
 # Only keep -O and -g cflags
@@ -231,12 +348,10 @@ ALL_SUBDIRS=$(TARGET_DIRS) $(patsubst %,pc-bios/%, $(ROMS))

 recurse-all: $(SUBDIR_RULES) $(ROMSUBDIR_RULES)

-$(BUILD_DIR)/version.o: $(SRC_PATH)/version.rc config-host.h | $(BUILD_DIR)/version.lo
+$(BUILD_DIR)/version.o: $(SRC_PATH)/version.rc config-host.h
 	$(call quiet-command,$(WINDRES) -I$(BUILD_DIR) -o $@ $<,"RC","version.o")
-$(BUILD_DIR)/version.lo: $(SRC_PATH)/version.rc config-host.h
-	$(call quiet-command,$(WINDRES) -I$(BUILD_DIR) -o $@ $<,"RC","version.lo")

-Makefile: $(version-obj-y) $(version-lobj-y)
+Makefile: $(version-obj-y)

 ######################################################################
 # Build libraries
@@ -246,15 +361,17 @@ libqemuutil.a: $(util-obj-y)

 ######################################################################

+COMMON_LDADDS = $(trace-obj-y) libqemuutil.a libqemustub.a
+
 qemu-img.o: qemu-img-cmds.h

-qemu-img$(EXESUF): qemu-img.o $(block-obj-y) $(crypto-obj-y) $(io-obj-y) $(qom-obj-y) libqemuutil.a libqemustub.a
-qemu-nbd$(EXESUF): qemu-nbd.o $(block-obj-y) $(crypto-obj-y) $(io-obj-y) $(qom-obj-y) libqemuutil.a libqemustub.a
-qemu-io$(EXESUF): qemu-io.o $(block-obj-y) $(crypto-obj-y) $(io-obj-y) $(qom-obj-y) libqemuutil.a libqemustub.a
+qemu-img$(EXESUF): qemu-img.o $(block-obj-y) $(crypto-obj-y) $(io-obj-y) $(qom-obj-y) $(COMMON_LDADDS)
+qemu-nbd$(EXESUF): qemu-nbd.o $(block-obj-y) $(crypto-obj-y) $(io-obj-y) $(qom-obj-y) $(COMMON_LDADDS)
+qemu-io$(EXESUF): qemu-io.o $(block-obj-y) $(crypto-obj-y) $(io-obj-y) $(qom-obj-y) $(COMMON_LDADDS)

-qemu-bridge-helper$(EXESUF): qemu-bridge-helper.o libqemuutil.a libqemustub.a
+qemu-bridge-helper$(EXESUF): qemu-bridge-helper.o $(COMMON_LDADDS)

-fsdev/virtfs-proxy-helper$(EXESUF): fsdev/virtfs-proxy-helper.o fsdev/9p-marshal.o fsdev/9p-iov-marshal.o libqemuutil.a libqemustub.a
+fsdev/virtfs-proxy-helper$(EXESUF): fsdev/virtfs-proxy-helper.o fsdev/9p-marshal.o fsdev/9p-iov-marshal.o $(COMMON_LDADDS)
 fsdev/virtfs-proxy-helper$(EXESUF): LIBS += -lcap

 qemu-img-cmds.h: $(SRC_PATH)/qemu-img-cmds.hx $(SRC_PATH)/scripts/hxtool
@@ -266,6 +383,7 @@ qemu-ga$(EXESUF): QEMU_CFLAGS += -I qga/qapi-generated
 gen-out-type = $(subst .,-,$(suffix $@))

 qapi-py = $(SRC_PATH)/scripts/qapi.py $(SRC_PATH)/scripts/ordereddict.py
+qapi-py += $(SRC_PATH)/scripts/qapi2texi.py

 qga/qapi-generated/qga-qapi-types.c qga/qapi-generated/qga-qapi-types.h :\
 $(SRC_PATH)/qga/qapi-schema.json $(SRC_PATH)/scripts/qapi-types.py $(qapi-py)
@@ -318,7 +436,7 @@ $(qapi-modules) $(SRC_PATH)/scripts/qapi-introspect.py $(qapi-py)
 QGALIB_GEN=$(addprefix qga/qapi-generated/, qga-qapi-types.h qga-qapi-visit.h qga-qmp-commands.h)
 $(qga-obj-y) qemu-ga.o: $(QGALIB_GEN)

-qemu-ga$(EXESUF): $(qga-obj-y) libqemuutil.a libqemustub.a
+qemu-ga$(EXESUF): $(qga-obj-y) $(COMMON_LDADDS)
 	$(call LINK, $^)

 ifdef QEMU_GA_MSI_ENABLED
@@ -343,9 +461,9 @@ ifneq ($(EXESUF),)
 qemu-ga: qemu-ga$(EXESUF) $(QGA_VSS_PROVIDER) $(QEMU_GA_MSI)
 endif

-ivshmem-client$(EXESUF): $(ivshmem-client-obj-y) libqemuutil.a libqemustub.a
+ivshmem-client$(EXESUF): $(ivshmem-client-obj-y) $(COMMON_LDADDS)
 	$(call LINK, $^)
-ivshmem-server$(EXESUF): $(ivshmem-server-obj-y) libqemuutil.a libqemustub.a
+ivshmem-server$(EXESUF): $(ivshmem-server-obj-y) $(COMMON_LDADDS)
 	$(call LINK, $^)

 module_block.h: $(SRC_PATH)/scripts/modules/module_block.py config-host.mak
@@ -358,10 +476,9 @@ clean:
 	rm -f config.mak op-i386.h opc-i386.h gen-op-i386.h op-arm.h opc-arm.h gen-op-arm.h
 	rm -f qemu-options.def
 	rm -f *.msi
-	find . \( -name '*.l[oa]' -o -name '*.so' -o -name '*.dll' -o -name '*.mo' -o -name '*.[oda]' \) -type f -exec rm {} +
+	find . \( -name '*.so' -o -name '*.dll' -o -name '*.mo' -o -name '*.[oda]' \) -type f -exec rm {} +
 	rm -f $(filter-out %.tlb,$(TOOLS)) $(HELPERS-y) qemu-ga TAGS cscope.* *.pod *~ */*~
 	rm -f fsdev/*.pod
-	rm -rf .libs */.libs
 	rm -f qemu-img-cmds.h
 	rm -f ui/shader/*-vert.h ui/shader/*-frag.h
 	@# May not be present in GENERATED_HEADERS
@@ -389,12 +506,17 @@ distclean: clean
 	rm -f config-all-devices.mak config-all-disas.mak config.status
 	rm -f po/*.mo tests/qemu-iotests/common.env
 	rm -f roms/seabios/config.mak roms/vgabios/config.mak
-	rm -f qemu-doc.info qemu-doc.aux qemu-doc.cp qemu-doc.cps qemu-doc.dvi
+	rm -f qemu-doc.info qemu-doc.aux qemu-doc.cp qemu-doc.cps
 	rm -f qemu-doc.fn qemu-doc.fns qemu-doc.info qemu-doc.ky qemu-doc.kys
 	rm -f qemu-doc.log qemu-doc.pdf qemu-doc.pg qemu-doc.toc qemu-doc.tp
-	rm -f qemu-doc.vr
+	rm -f qemu-doc.vr qemu-doc.txt
 	rm -f config.log
 	rm -f linux-headers/asm
+	rm -f qemu-ga-qapi.texi qemu-qapi.texi
+	rm -f docs/qemu-qmp-ref.7 docs/qemu-ga-ref.7
+	rm -f docs/qemu-qmp-ref.txt docs/qemu-ga-ref.txt
+	rm -f docs/qemu-qmp-ref.pdf docs/qemu-ga-ref.pdf
+	rm -f docs/qemu-qmp-ref.html docs/qemu-ga-ref.html
 	for d in $(TARGET_DIRS); do \
 	rm -rf $$d || exit 1 ; \
        done
@@ -421,7 +543,7 @@ qemu-icon.bmp qemu_logo_no_text.svg \
 bamboo.dtb petalogix-s3adsp1800.dtb petalogix-ml605.dtb \
 multiboot.bin linuxboot.bin linuxboot_dma.bin kvmvapic.bin \
 s390-ccw.img \
-spapr-rtas.bin slof.bin \
+spapr-rtas.bin slof.bin skiboot.lid \
 palcode-clipper \
 u-boot.e500
 else
@@ -431,10 +553,14 @@ endif
 install-doc: $(DOCS)
 	$(INSTALL_DIR) "$(DESTDIR)$(qemu_docdir)"
 	$(INSTALL_DATA) qemu-doc.html "$(DESTDIR)$(qemu_docdir)"
-	$(INSTALL_DATA) $(SRC_PATH)/docs/qmp-commands.txt "$(DESTDIR)$(qemu_docdir)"
+	$(INSTALL_DATA) qemu-doc.txt "$(DESTDIR)$(qemu_docdir)"
+	$(INSTALL_DATA) docs/qemu-qmp-ref.html "$(DESTDIR)$(qemu_docdir)"
+	$(INSTALL_DATA) docs/qemu-qmp-ref.txt "$(DESTDIR)$(qemu_docdir)"
 ifdef CONFIG_POSIX
 	$(INSTALL_DIR) "$(DESTDIR)$(mandir)/man1"
 	$(INSTALL_DATA) qemu.1 "$(DESTDIR)$(mandir)/man1"
+	$(INSTALL_DIR) "$(DESTDIR)$(mandir)/man7"
+	$(INSTALL_DATA) docs/qemu-qmp-ref.7 "$(DESTDIR)$(mandir)/man7"
 ifneq ($(TOOLS),)
 	$(INSTALL_DATA) qemu-img.1 "$(DESTDIR)$(mandir)/man1"
 	$(INSTALL_DIR) "$(DESTDIR)$(mandir)/man8"
@@ -442,6 +568,9 @@ ifneq ($(TOOLS),)
 endif
 ifneq (,$(findstring qemu-ga,$(TOOLS)))
 	$(INSTALL_DATA) qemu-ga.8 "$(DESTDIR)$(mandir)/man8"
+	$(INSTALL_DATA) docs/qemu-ga-ref.html "$(DESTDIR)$(qemu_docdir)"
+	$(INSTALL_DATA) docs/qemu-ga-ref.txt "$(DESTDIR)$(qemu_docdir)"
+	$(INSTALL_DATA) docs/qemu-ga-ref.7 "$(DESTDIR)$(mandir)/man7"
 endif
 endif
 ifdef CONFIG_VIRTFS
@@ -530,20 +659,22 @@ ui/console-gl.o: $(SRC_PATH)/ui/console-gl.c \

 # documentation
 MAKEINFO=makeinfo
-MAKEINFOFLAGS=--no-headers --no-split --number-sections
-TEXIFLAG=$(if $(V),,--quiet)
-%.dvi: %.texi
-	$(call quiet-command,texi2dvi $(TEXIFLAG) -I . $<,"GEN","$@")
+MAKEINFOFLAGS=--no-split --number-sections -D 'VERSION $(VERSION)'
+TEXIFLAG=$(if $(V),,--quiet) --command='@set VERSION $(VERSION)'

 %.html: %.texi
-	$(call quiet-command,LC_ALL=C $(MAKEINFO) $(MAKEINFOFLAGS) --html $< -o $@, \
-	"GEN","$@")
+	$(call quiet-command,LC_ALL=C $(MAKEINFO) $(MAKEINFOFLAGS) --no-headers \
+	--html $< -o $@,"GEN","$@")

 %.info: %.texi
-	$(call quiet-command,$(MAKEINFO) $< -o $@,"GEN","$@")
+	$(call quiet-command,$(MAKEINFO) $(MAKEINFOFLAGS) $< -o $@,"GEN","$@")
+
+%.txt: %.texi
+	$(call quiet-command,LC_ALL=C $(MAKEINFO) $(MAKEINFOFLAGS) --no-headers \
+	--plaintext $< -o $@,"GEN","$@")

 %.pdf: %.texi
-	$(call quiet-command,texi2pdf $(TEXIFLAG) -I . $<,"GEN","$@")
+	$(call quiet-command,texi2pdf $(TEXIFLAG) -I $(SRC_PATH) -I . $< -o $@,"GEN","$@")

 qemu-options.texi: $(SRC_PATH)/qemu-options.hx $(SRC_PATH)/scripts/hxtool
 	$(call quiet-command,sh $(SRC_PATH)/scripts/hxtool -t < $< > $@,"GEN","$@")
@@ -557,47 +688,36 @@ qemu-monitor-info.texi: $(SRC_PATH)/hmp-commands-info.hx $(SRC_PATH)/scripts/hxt
 qemu-img-cmds.texi: $(SRC_PATH)/qemu-img-cmds.hx $(SRC_PATH)/scripts/hxtool
 	$(call quiet-command,sh $(SRC_PATH)/scripts/hxtool -t < $< > $@,"GEN","$@")

+qemu-qapi.texi: $(qapi-modules) $(qapi-py)
+	$(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/qapi2texi.py $< > $@,"GEN","$@")
+
+qemu-ga-qapi.texi: $(SRC_PATH)/qga/qapi-schema.json $(qapi-py)
+	$(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/qapi2texi.py $< > $@,"GEN","$@")
+
 qemu.1: qemu-doc.texi qemu-options.texi qemu-monitor.texi qemu-monitor-info.texi
-	$(call quiet-command, \
-	  perl -Ww -- $(SRC_PATH)/scripts/texi2pod.pl $< qemu.pod && \
-	  $(POD2MAN) --section=1 --center=" " --release=" " qemu.pod > $@, \
-	  "GEN","$@")
 qemu.1: qemu-option-trace.texi
-
 qemu-img.1: qemu-img.texi qemu-option-trace.texi qemu-img-cmds.texi
-	$(call quiet-command, \
-	  perl -Ww -- $(SRC_PATH)/scripts/texi2pod.pl $< qemu-img.pod && \
-	  $(POD2MAN) --section=1 --center=" " --release=" " qemu-img.pod > $@, \
-	  "GEN","$@")
-
 fsdev/virtfs-proxy-helper.1: fsdev/virtfs-proxy-helper.texi
-	$(call quiet-command, \
-	  perl -Ww -- $(SRC_PATH)/scripts/texi2pod.pl $< fsdev/virtfs-proxy-helper.pod && \
-	  $(POD2MAN) --section=1 --center=" " --release=" " fsdev/virtfs-proxy-helper.pod > $@, \
-	  "GEN","$@")
-
 qemu-nbd.8: qemu-nbd.texi qemu-option-trace.texi
-	$(call quiet-command, \
-	  perl -Ww -- $(SRC_PATH)/scripts/texi2pod.pl $< qemu-nbd.pod && \
-	  $(POD2MAN) --section=8 --center=" " --release=" " qemu-nbd.pod > $@, \
-	  "GEN","$@")
-
 qemu-ga.8: qemu-ga.texi
-	$(call quiet-command, \
-	  perl -Ww -- $(SRC_PATH)/scripts/texi2pod.pl $< qemu-ga.pod && \
-	  $(POD2MAN) --section=8 --center=" " --release=" " qemu-ga.pod > $@, \
-	  "GEN","$@")

-dvi: qemu-doc.dvi
-html: qemu-doc.html
-info: qemu-doc.info
-pdf: qemu-doc.pdf
+html: qemu-doc.html docs/qemu-qmp-ref.html docs/qemu-ga-ref.html
+info: qemu-doc.info docs/qemu-qmp-ref.info docs/qemu-ga-ref.info
+pdf: qemu-doc.pdf docs/qemu-qmp-ref.pdf docs/qemu-ga-ref.pdf
+txt: qemu-doc.txt docs/qemu-qmp-ref.txt docs/qemu-ga-ref.txt

-qemu-doc.dvi qemu-doc.html qemu-doc.info qemu-doc.pdf: \
+qemu-doc.html qemu-doc.info qemu-doc.pdf qemu-doc.txt: \
 	qemu-img.texi qemu-nbd.texi qemu-options.texi qemu-option-trace.texi \
 	qemu-monitor.texi qemu-img-cmds.texi qemu-ga.texi \
 	qemu-monitor-info.texi

+docs/qemu-ga-ref.dvi docs/qemu-ga-ref.html docs/qemu-ga-ref.info docs/qemu-ga-ref.pdf docs/qemu-ga-ref.txt docs/qemu-ga-ref.7: \
+docs/qemu-ga-ref.texi qemu-ga-qapi.texi
+
+docs/qemu-qmp-ref.dvi docs/qemu-qmp-ref.html docs/qemu-qmp-ref.info docs/qemu-qmp-ref.pdf docs/qemu-qmp-ref.txt docs/qemu-qmp-ref.7: \
+docs/qemu-qmp-ref.texi qemu-qapi.texi
+
+
 ifdef CONFIG_WIN32

 INSTALLER = qemu-setup-$(VERSION)$(EXESUF)
@@ -660,6 +780,10 @@ ifneq ($(filter-out $(UNCHECKED_GOALS),$(MAKECMDGOALS)),$(if $(MAKECMDGOALS),,fa
 Makefile: $(GENERATED_HEADERS)
 endif

+.SECONDARY: $(TRACE_HEADERS) $(TRACE_HEADERS:%=%-timestamp) \
+	$(TRACE_SOURCES) $(TRACE_SOURCES:%=%-timestamp) \
+	$(TRACE_DTRACE) $(TRACE_DTRACE:%=%-timestamp)
+
 # Include automatically generated dependency files
 # Dependencies in Makefile.objs files come from our recursive subdir rules
 -include $(wildcard *.d tests/*.d)
@@ -690,12 +814,12 @@ help:
 	@echo  '  docker          - Help about targets running tests inside Docker containers'
 	@echo  ''
 	@echo  'Documentation targets:'
-	@echo  '  dvi html info pdf'
+	@echo  '  html info pdf txt'
 	@echo  '                  - Build documentation in specified format'
 	@echo  ''
 ifdef CONFIG_WIN32
 	@echo  'Windows targets:'
-	@echo  '  installer       - Build NSIS-based installer for qemu-ga'
+	@echo  '  installer       - Build NSIS-based installer for QEMU'
 ifdef QEMU_GA_MSI_ENABLED
 	@echo  '  msi             - Build MSI-based installer for qemu-ga'
 endif
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -4,6 +4,8 @@ stub-obj-y = stubs/ crypto/
 util-obj-y = util/ qobject/ qapi/
 util-obj-y += qmp-introspect.o qapi-types.o qapi-visit.o qapi-event.o

+chardev-obj-y = chardev/
+
 #######################################################################
 # block-obj-y is code used by both qemu system emulation and qemu-img

@@ -51,8 +53,7 @@ common-obj-$(CONFIG_POSIX) += os-posix.o
 common-obj-$(CONFIG_LINUX) += fsdev/

 common-obj-y += migration/
-common-obj-y += qemu-char.o #aio.o
-common-obj-y += page_cache.o
+common-obj-y += page_cache.o #aio.o

 common-obj-$(CONFIG_SPICE) += spice-qemu-char.o

@@ -89,7 +90,7 @@ endif

 #######################################################################
 # Target-independent parts used in system and user emulation
-common-obj-y += tcg-runtime.o cpus-common.o
+common-obj-y += cpus-common.o
 common-obj-y += hw/
 common-obj-y += qom/
 common-obj-y += disas/
@@ -97,7 +98,6 @@ common-obj-y += disas/
 ######################################################################
 # Resource file for Windows executables
 version-obj-$(CONFIG_WIN32) += $(BUILD_DIR)/version.o
-version-lobj-$(CONFIG_WIN32) += $(BUILD_DIR)/version.lo

 ######################################################################
 # tracing
@@ -116,50 +116,61 @@ qga-vss-dll-obj-y = qga/
 # contrib
 ivshmem-client-obj-y = contrib/ivshmem-client/
 ivshmem-server-obj-y = contrib/ivshmem-server/
-
+libvhost-user-obj-y = contrib/libvhost-user/

 ######################################################################
-trace-events-y = trace-events
-trace-events-y += util/trace-events
-trace-events-y += crypto/trace-events
-trace-events-y += io/trace-events
-trace-events-y += migration/trace-events
-trace-events-y += block/trace-events
-trace-events-y += hw/block/trace-events
-trace-events-y += hw/char/trace-events
-trace-events-y += hw/intc/trace-events
-trace-events-y += hw/net/trace-events
-trace-events-y += hw/virtio/trace-events
-trace-events-y += hw/audio/trace-events
-trace-events-y += hw/misc/trace-events
-trace-events-y += hw/usb/trace-events
-trace-events-y += hw/scsi/trace-events
-trace-events-y += hw/nvram/trace-events
-trace-events-y += hw/display/trace-events
-trace-events-y += hw/input/trace-events
-trace-events-y += hw/timer/trace-events
-trace-events-y += hw/dma/trace-events
-trace-events-y += hw/sparc/trace-events
-trace-events-y += hw/sd/trace-events
-trace-events-y += hw/isa/trace-events
-trace-events-y += hw/mem/trace-events
-trace-events-y += hw/i386/trace-events
-trace-events-y += hw/9pfs/trace-events
-trace-events-y += hw/ppc/trace-events
-trace-events-y += hw/pci/trace-events
-trace-events-y += hw/s390x/trace-events
-trace-events-y += hw/vfio/trace-events
-trace-events-y += hw/acpi/trace-events
-trace-events-y += hw/arm/trace-events
-trace-events-y += hw/alpha/trace-events
-trace-events-y += ui/trace-events
-trace-events-y += audio/trace-events
-trace-events-y += net/trace-events
-trace-events-y += target-arm/trace-events
-trace-events-y += target-i386/trace-events
-trace-events-y += target-sparc/trace-events
-trace-events-y += target-s390x/trace-events
-trace-events-y += target-ppc/trace-events
-trace-events-y += qom/trace-events
-trace-events-y += linux-user/trace-events
-trace-events-y += qapi/trace-events
+trace-events-subdirs =
+trace-events-subdirs += util
+trace-events-subdirs += crypto
+trace-events-subdirs += io
+trace-events-subdirs += migration
+trace-events-subdirs += block
+trace-events-subdirs += hw/block
+trace-events-subdirs += hw/block/dataplane
+trace-events-subdirs += hw/char
+trace-events-subdirs += hw/intc
+trace-events-subdirs += hw/net
+trace-events-subdirs += hw/virtio
+trace-events-subdirs += hw/audio
+trace-events-subdirs += hw/misc
+trace-events-subdirs += hw/usb
+trace-events-subdirs += hw/scsi
+trace-events-subdirs += hw/nvram
+trace-events-subdirs += hw/display
+trace-events-subdirs += hw/input
+trace-events-subdirs += hw/timer
+trace-events-subdirs += hw/dma
+trace-events-subdirs += hw/sparc
+trace-events-subdirs += hw/sd
+trace-events-subdirs += hw/isa
+trace-events-subdirs += hw/mem
+trace-events-subdirs += hw/i386
+trace-events-subdirs += hw/i386/xen
+trace-events-subdirs += hw/9pfs
+trace-events-subdirs += hw/ppc
+trace-events-subdirs += hw/pci
+trace-events-subdirs += hw/s390x
+trace-events-subdirs += hw/vfio
+trace-events-subdirs += hw/acpi
+trace-events-subdirs += hw/arm
+trace-events-subdirs += hw/alpha
+trace-events-subdirs += hw/xen
+trace-events-subdirs += ui
+trace-events-subdirs += audio
+trace-events-subdirs += net
+trace-events-subdirs += target/arm
+trace-events-subdirs += target/i386
+trace-events-subdirs += target/sparc
+trace-events-subdirs += target/s390x
+trace-events-subdirs += target/ppc
+trace-events-subdirs += qom
+trace-events-subdirs += linux-user
+trace-events-subdirs += qapi
+
+trace-events-files = $(SRC_PATH)/trace-events $(trace-events-subdirs:%=$(SRC_PATH)/%/trace-events)
+
+trace-obj-y = trace-root.o
+trace-obj-y += $(trace-events-subdirs:%=%/trace.o)
+trace-obj-$(CONFIG_TRACE_UST) += trace-ust-all.o
+trace-obj-$(CONFIG_TRACE_DTRACE) += trace-dtrace-root.o
+trace-obj-$(CONFIG_TRACE_DTRACE) += $(trace-events-subdirs:%=%/trace-dtrace.o)
--- a/Makefile.target
+++ b/Makefile.target
@@ -11,7 +11,7 @@ $(call set-vpath, $(SRC_PATH):$(BUILD_DIR))
 ifdef CONFIG_LINUX
 QEMU_CFLAGS += -I../linux-headers
 endif
-QEMU_CFLAGS += -I.. -I$(SRC_PATH)/target-$(TARGET_BASE_ARCH) -DNEED_CPU_H
+QEMU_CFLAGS += -I.. -I$(SRC_PATH)/target/$(TARGET_BASE_ARCH) -DNEED_CPU_H

 QEMU_CFLAGS+=-I$(SRC_PATH)/include

@@ -50,6 +50,7 @@ endif

 $(QEMU_PROG).stp-installed: $(BUILD_DIR)/trace-events-all
 	$(call quiet-command,$(TRACETOOL) \
+		--group=all \
 		--format=stap \
 		--backends=$(TRACE_BACKENDS) \
 		--binary=$(bindir)/$(QEMU_PROG) \
@@ -59,6 +60,7 @@ $(QEMU_PROG).stp-installed: $(BUILD_DIR)/trace-events-all

 $(QEMU_PROG).stp: $(BUILD_DIR)/trace-events-all
 	$(call quiet-command,$(TRACETOOL) \
+		--group=all \
 		--format=stap \
 		--backends=$(TRACE_BACKENDS) \
 		--binary=$(realpath .)/$(QEMU_PROG) \
@@ -68,6 +70,7 @@ $(QEMU_PROG).stp: $(BUILD_DIR)/trace-events-all

 $(QEMU_PROG)-simpletrace.stp: $(BUILD_DIR)/trace-events-all
 	$(call quiet-command,$(TRACETOOL) \
+		--group=all \
 		--format=simpletrace-stap \
 		--backends=$(TRACE_BACKENDS) \
 		--probe-prefix=qemu.$(TARGET_TYPE).$(TARGET_NAME) \
@@ -76,6 +79,7 @@ $(QEMU_PROG)-simpletrace.stp: $(BUILD_DIR)/trace-events-all
 else
 stap:
 endif
+.PHONY: stap

 all: $(PROGS) stap

@@ -92,9 +96,11 @@ obj-$(CONFIG_TCG_INTERPRETER) += tci.o
 obj-y += tcg/tcg-common.o
 obj-$(CONFIG_TCG_INTERPRETER) += disas/tci.o
 obj-y += fpu/softfloat.o
-obj-y += target-$(TARGET_BASE_ARCH)/
+obj-y += target/$(TARGET_BASE_ARCH)/
 obj-y += disas.o
+obj-y += tcg-runtime.o
 obj-$(call notempty,$(TARGET_XML_FILES)) += gdbstub-xml.o
+obj-$(call lnot,$(CONFIG_HAX)) += hax-stub.o
 obj-$(call lnot,$(CONFIG_KVM)) += kvm-stub.o

 obj-$(CONFIG_LIBDECNUMBER) += libdecnumber/decContext.o
@@ -113,7 +119,7 @@ QEMU_CFLAGS+=-I$(SRC_PATH)/linux-user/$(TARGET_ABI_DIR) \
             -I$(SRC_PATH)/linux-user

 obj-y += linux-user/
-obj-y += gdbstub.o thunk.o user-exec.o
+obj-y += gdbstub.o thunk.o user-exec.o user-exec-stub.o

 endif #CONFIG_LINUX_USER

@@ -126,7 +132,7 @@ QEMU_CFLAGS+=-I$(SRC_PATH)/bsd-user -I$(SRC_PATH)/bsd-user/$(TARGET_ABI_DIR) \
 			 -I$(SRC_PATH)/bsd-user/$(HOST_VARIANT_DIR)

 obj-y += bsd-user/
-obj-y += gdbstub.o user-exec.o
+obj-y += gdbstub.o user-exec.o user-exec-stub.o

 endif #CONFIG_BSD_USER

@@ -169,31 +175,36 @@ all-obj-y := $(obj-y)
 target-obj-y :=
 block-obj-y :=
 common-obj-y :=
+chardev-obj-y :=
 include $(SRC_PATH)/Makefile.objs
 dummy := $(call unnest-vars,,target-obj-y)
 target-obj-y-save := $(target-obj-y)
 dummy := $(call unnest-vars,.., \
               block-obj-y \
               block-obj-m \
+               chardev-obj-y \
               crypto-obj-y \
               crypto-aes-obj-y \
               qom-obj-y \
               io-obj-y \
               common-obj-y \
-               common-obj-m)
+               common-obj-m \
+               trace-obj-y)
 target-obj-y := $(target-obj-y-save)
 all-obj-y += $(common-obj-y)
 all-obj-y += $(target-obj-y)
 all-obj-y += $(qom-obj-y)
-all-obj-$(CONFIG_SOFTMMU) += $(block-obj-y)
+all-obj-$(CONFIG_SOFTMMU) += $(block-obj-y) $(chardev-obj-y)
 all-obj-$(CONFIG_USER_ONLY) += $(crypto-aes-obj-y)
 all-obj-$(CONFIG_SOFTMMU) += $(crypto-obj-y)
 all-obj-$(CONFIG_SOFTMMU) += $(io-obj-y)

 $(QEMU_PROG_BUILD): config-devices.mak

+COMMON_LDADDS = $(trace-obj-y) ../libqemuutil.a ../libqemustub.a
+
 # build either PROG or PROGW
-$(QEMU_PROG_BUILD): $(all-obj-y) ../libqemuutil.a ../libqemustub.a
+$(QEMU_PROG_BUILD): $(all-obj-y) $(COMMON_LDADDS)
 	$(call LINK, $(filter-out %.mak, $^))
 ifdef CONFIG_DARWIN
 	$(call quiet-command,Rez -append $(SRC_PATH)/pc-bios/qemu.rsrc -o $@,"REZ","$(TARGET_DIR)$@")
--- a/1
+++ b/1
@@ -45,6 +45,7 @@ of other UNIX targets. The simple steps to build QEMU are:
 Additional information can also be found online via the QEMU website:

  http://qemu-project.org/Hosts/Linux
+  http://qemu-project.org/Hosts/Mac
  http://qemu-project.org/Hosts/W32


--- a/2
+++ b/2
@@ -1 +1 @@
-2.7.50
+2.8.50
--- a/accel.c
+++ b/accel.c
@@ -33,7 +33,6 @@
 #include "sysemu/qtest.h"
 #include "hw/xen/xen.h"
 #include "qom/object.h"
-#include "hw/boards.h"

 int tcg_tb_size;
 static bool tcg_allowed = true;
--- a/aio-posix.c
+++ b/aio-posix.c
@@ -16,8 +16,10 @@
 #include "qemu/osdep.h"
 #include "qemu-common.h"
 #include "block/block.h"
-#include "qemu/queue.h"
+#include "qemu/rcu_queue.h"
 #include "qemu/sockets.h"
+#include "qemu/cutils.h"
+#include "trace-root.h"
 #ifdef CONFIG_EPOLL_CREATE1
 #include <sys/epoll.h>
 #endif
@@ -27,6 +29,9 @@ struct AioHandler
    GPollFD pfd;
    IOHandler *io_read;
    IOHandler *io_write;
+    AioPollFn *io_poll;
+    IOHandler *io_poll_begin;
+    IOHandler *io_poll_end;
    int deleted;
    void *opaque;
    bool is_external;
@@ -61,7 +66,7 @@ static bool aio_epoll_try_enable(AioContext *ctx)
    AioHandler *node;
    struct epoll_event event;

-    QLIST_FOREACH(node, &ctx->aio_handlers, node) {
+    QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
        int r;
        if (node->deleted || !node->pfd.events) {
            continue;
@@ -81,29 +86,22 @@ static void aio_epoll_update(AioContext *ctx, AioHandler *node, bool is_new)
 {
    struct epoll_event event;
    int r;
+    int ctl;

    if (!ctx->epoll_enabled) {
        return;
    }
    if (!node->pfd.events) {
-        r = epoll_ctl(ctx->epollfd, EPOLL_CTL_DEL, node->pfd.fd, &event);
-        if (r) {
-            aio_epoll_disable(ctx);
-        }
+        ctl = EPOLL_CTL_DEL;
    } else {
        event.data.ptr = node;
        event.events = epoll_events_from_pfd(node->pfd.events);
-        if (is_new) {
-            r = epoll_ctl(ctx->epollfd, EPOLL_CTL_ADD, node->pfd.fd, &event);
-            if (r) {
-                aio_epoll_disable(ctx);
-            }
-        } else {
-            r = epoll_ctl(ctx->epollfd, EPOLL_CTL_MOD, node->pfd.fd, &event);
-            if (r) {
-                aio_epoll_disable(ctx);
-            }
-        }
+        ctl = is_new ? EPOLL_CTL_ADD : EPOLL_CTL_MOD;
+    }
+
+    r = epoll_ctl(ctx->epollfd, ctl, node->pfd.fd, &event);
+    if (r) {
+        aio_epoll_disable(ctx);
    }
 }

@@ -207,45 +205,61 @@ void aio_set_fd_handler(AioContext *ctx,
                        bool is_external,
                        IOHandler *io_read,
                        IOHandler *io_write,
+                        AioPollFn *io_poll,
                        void *opaque)
 {
    AioHandler *node;
    bool is_new = false;
    bool deleted = false;

+    qemu_lockcnt_lock(&ctx->list_lock);
+
    node = find_aio_handler(ctx, fd);

    /* Are we deleting the fd handler? */
-    if (!io_read && !io_write) {
-        if (node) {
-            g_source_remove_poll(&ctx->source, &node->pfd);
+    if (!io_read && !io_write && !io_poll) {
+        if (node == NULL) {
+            qemu_lockcnt_unlock(&ctx->list_lock);
+            return;
+        }

-            /* If the lock is held, just mark the node as deleted */
-            if (ctx->walking_handlers) {
-                node->deleted = 1;
-                node->pfd.revents = 0;
-            } else {
-                /* Otherwise, delete it for real.  We can't just mark it as
-                 * deleted because deleted nodes are only cleaned up after
-                 * releasing the walking_handlers lock.
-                 */
-                QLIST_REMOVE(node, node);
-                deleted = true;
-            }
+        g_source_remove_poll(&ctx->source, &node->pfd);
+
+        /* If the lock is held, just mark the node as deleted */
+        if (qemu_lockcnt_count(&ctx->list_lock)) {
+            node->deleted = 1;
+            node->pfd.revents = 0;
+        } else {
+            /* Otherwise, delete it for real.  We can't just mark it as
+             * deleted because deleted nodes are only cleaned up while
+             * no one is walking the handlers list.
+             */
+            QLIST_REMOVE(node, node);
+            deleted = true;
+        }
+
+        if (!node->io_poll) {
+            ctx->poll_disable_cnt--;
        }
    } else {
        if (node == NULL) {
            /* Alloc and insert if it's not already there */
            node = g_new0(AioHandler, 1);
            node->pfd.fd = fd;
-            QLIST_INSERT_HEAD(&ctx->aio_handlers, node, node);
+            QLIST_INSERT_HEAD_RCU(&ctx->aio_handlers, node, node);

            g_source_add_poll(&ctx->source, &node->pfd);
            is_new = true;
+
+            ctx->poll_disable_cnt += !io_poll;
+        } else {
+            ctx->poll_disable_cnt += !io_poll - !node->io_poll;
        }
+
        /* Update handler with latest information */
        node->io_read = io_read;
        node->io_write = io_write;
+        node->io_poll = io_poll;
        node->opaque = opaque;
        node->is_external = is_external;

@@ -254,71 +268,132 @@ void aio_set_fd_handler(AioContext *ctx,
    }

    aio_epoll_update(ctx, node, is_new);
+    qemu_lockcnt_unlock(&ctx->list_lock);
    aio_notify(ctx);
+
    if (deleted) {
        g_free(node);
    }
 }

+void aio_set_fd_poll(AioContext *ctx, int fd,
+                     IOHandler *io_poll_begin,
+                     IOHandler *io_poll_end)
+{
+    AioHandler *node = find_aio_handler(ctx, fd);
+
+    if (!node) {
+        return;
+    }
+
+    node->io_poll_begin = io_poll_begin;
+    node->io_poll_end = io_poll_end;
+}
+
 void aio_set_event_notifier(AioContext *ctx,
                            EventNotifier *notifier,
                            bool is_external,
-                            EventNotifierHandler *io_read)
+                            EventNotifierHandler *io_read,
+                            AioPollFn *io_poll)
 {
-    aio_set_fd_handler(ctx, event_notifier_get_fd(notifier),
-                       is_external, (IOHandler *)io_read, NULL, notifier);
+    aio_set_fd_handler(ctx, event_notifier_get_fd(notifier), is_external,
+                       (IOHandler *)io_read, NULL, io_poll, notifier);
 }

+void aio_set_event_notifier_poll(AioContext *ctx,
+                                 EventNotifier *notifier,
+                                 EventNotifierHandler *io_poll_begin,
+                                 EventNotifierHandler *io_poll_end)
+{
+    aio_set_fd_poll(ctx, event_notifier_get_fd(notifier),
+                    (IOHandler *)io_poll_begin,
+                    (IOHandler *)io_poll_end);
+}
+
+static void poll_set_started(AioContext *ctx, bool started)
+{
+    AioHandler *node;
+
+    if (started == ctx->poll_started) {
+        return;
+    }
+
+    ctx->poll_started = started;
+
+    qemu_lockcnt_inc(&ctx->list_lock);
+    QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
+        IOHandler *fn;
+
+        if (node->deleted) {
+            continue;
+        }
+
+        if (started) {
+            fn = node->io_poll_begin;
+        } else {
+            fn = node->io_poll_end;
+        }
+
+        if (fn) {
+            fn(node->opaque);
+        }
+    }
+    qemu_lockcnt_dec(&ctx->list_lock);
+}
+
+
 bool aio_prepare(AioContext *ctx)
 {
+    /* Poll mode cannot be used with glib's event loop, disable it. */
+    poll_set_started(ctx, false);
+
    return false;
 }

 bool aio_pending(AioContext *ctx)
 {
    AioHandler *node;
-
-    QLIST_FOREACH(node, &ctx->aio_handlers, node) {
-        int revents;
-
-        revents = node->pfd.revents & node->pfd.events;
-        if (revents & (G_IO_IN | G_IO_HUP | G_IO_ERR) && node->io_read &&
-            aio_node_check(ctx, node->is_external)) {
-            return true;
-        }
-        if (revents & (G_IO_OUT | G_IO_ERR) && node->io_write &&
-            aio_node_check(ctx, node->is_external)) {
-            return true;
-        }
-    }
-
-    return false;
-}
-
-bool aio_dispatch(AioContext *ctx)
-{
-    AioHandler *node;
-    bool progress = false;
-
-    /*
-     * If there are callbacks left that have been queued, we need to call them.
-     * Do not call select in this case, because it is possible that the caller
-     * does not need a complete flush (as is the case for aio_poll loops).
-     */
-    if (aio_bh_poll(ctx)) {
-        progress = true;
-    }
+    bool result = false;

    /*
     * We have to walk very carefully in case aio_set_fd_handler is
     * called while we're walking.
     */
-    node = QLIST_FIRST(&ctx->aio_handlers);
-    while (node) {
-        AioHandler *tmp;
+    qemu_lockcnt_inc(&ctx->list_lock);
+
+    QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
        int revents;

-        ctx->walking_handlers++;
+        revents = node->pfd.revents & node->pfd.events;
+        if (revents & (G_IO_IN | G_IO_HUP | G_IO_ERR) && node->io_read &&
+            aio_node_check(ctx, node->is_external)) {
+            result = true;
+            break;
+        }
+        if (revents & (G_IO_OUT | G_IO_ERR) && node->io_write &&
+            aio_node_check(ctx, node->is_external)) {
+            result = true;
+            break;
+        }
+    }
+    qemu_lockcnt_dec(&ctx->list_lock);
+
+    return result;
+}
+
+static bool aio_dispatch_handlers(AioContext *ctx)
+{
+    AioHandler *node, *tmp;
+    bool progress = false;
+
+    /*
+     * We have to walk very carefully in case aio_set_fd_handler is
+     * called while we're walking.
+     */
+    qemu_lockcnt_inc(&ctx->list_lock);
+
+    QLIST_FOREACH_SAFE_RCU(node, &ctx->aio_handlers, node, tmp) {
+        int revents;

        revents = node->pfd.revents & node->pfd.events;
        node->pfd.revents = 0;
@@ -342,17 +417,38 @@ bool aio_dispatch(AioContext *ctx)
            progress = true;
        }

-        tmp = node;
-        node = QLIST_NEXT(node, node);
-
-        ctx->walking_handlers--;
-
-        if (!ctx->walking_handlers && tmp->deleted) {
-            QLIST_REMOVE(tmp, node);
-            g_free(tmp);
+        if (node->deleted) {
+            if (qemu_lockcnt_dec_if_lock(&ctx->list_lock)) {
+                QLIST_REMOVE(node, node);
+                g_free(node);
+                qemu_lockcnt_inc_and_unlock(&ctx->list_lock);
+            }
        }
    }

+    qemu_lockcnt_dec(&ctx->list_lock);
+    return progress;
+}
+
+/*
+ * Note that dispatch_fds == false has the side-effect of post-poning the
+ * freeing of deleted handlers.
+ */
+bool aio_dispatch(AioContext *ctx, bool dispatch_fds)
+{
+    bool progress;
+
+    /*
+     * If there are callbacks left that have been queued, we need to call them.
+     * Do not call select in this case, because it is possible that the caller
+     * does not need a complete flush (as is the case for aio_poll loops).
+     */
+    progress = aio_bh_poll(ctx);
+
+    if (dispatch_fds) {
+        progress |= aio_dispatch_handlers(ctx);
+    }
+
    /* Run our timers */
    progress |= timerlistgroup_run_timers(&ctx->tlg);

@@ -405,12 +501,101 @@ static void add_pollfd(AioHandler *node)
    npfd++;
 }

+static bool run_poll_handlers_once(AioContext *ctx)
+{
+    bool progress = false;
+    AioHandler *node;
+
+    QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
+        if (!node->deleted && node->io_poll &&
+            aio_node_check(ctx, node->is_external) &&
+            node->io_poll(node->opaque)) {
+            progress = true;
+        }
+
+        /* Caller handles freeing deleted nodes.  Don't do it here. */
+    }
+
+    return progress;
+}
+
+/* run_poll_handlers:
+ * @ctx: the AioContext
+ * @max_ns: maximum time to poll for, in nanoseconds
+ *
+ * Polls for a given time.
+ *
+ * Note that ctx->notify_me must be non-zero so this function can detect
+ * aio_notify().
+ *
+ * Note that the caller must have incremented ctx->list_lock.
+ *
+ * Returns: true if progress was made, false otherwise
+ */
+static bool run_poll_handlers(AioContext *ctx, int64_t max_ns)
+{
+    bool progress;
+    int64_t end_time;
+
+    assert(ctx->notify_me);
+    assert(qemu_lockcnt_count(&ctx->list_lock) > 0);
+    assert(ctx->poll_disable_cnt == 0);
+
+    trace_run_poll_handlers_begin(ctx, max_ns);
+
+    end_time = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + max_ns;
+
+    do {
+        progress = run_poll_handlers_once(ctx);
+    } while (!progress && qemu_clock_get_ns(QEMU_CLOCK_REALTIME) < end_time);
+
+    trace_run_poll_handlers_end(ctx, progress);
+
+    return progress;
+}
+
+/* try_poll_mode:
+ * @ctx: the AioContext
+ * @blocking: busy polling is only attempted when blocking is true
+ *
+ * ctx->notify_me must be non-zero so this function can detect aio_notify().
+ *
+ * Note that the caller must have incremented ctx->list_lock.
+ *
+ * Returns: true if progress was made, false otherwise
+ */
+static bool try_poll_mode(AioContext *ctx, bool blocking)
+{
+    if (blocking && ctx->poll_max_ns && ctx->poll_disable_cnt == 0) {
+        /* See qemu_soonest_timeout() uint64_t hack */
+        int64_t max_ns = MIN((uint64_t)aio_compute_timeout(ctx),
+                             (uint64_t)ctx->poll_ns);
+
+        if (max_ns) {
+            poll_set_started(ctx, true);
+
+            if (run_poll_handlers(ctx, max_ns)) {
+                return true;
+            }
+        }
+    }
+
+    poll_set_started(ctx, false);
+
+    /* Even if we don't run busy polling, try polling once in case it can make
+     * progress and the caller will be able to avoid ppoll(2)/epoll_wait(2).
+     */
+    return run_poll_handlers_once(ctx);
+}
+
 bool aio_poll(AioContext *ctx, bool blocking)
 {
    AioHandler *node;
-    int i, ret;
+    int i;
+    int ret = 0;
    bool progress;
    int64_t timeout;
+    int64_t start = 0;

    aio_context_acquire(ctx);
    progress = false;
@@ -426,43 +611,93 @@ bool aio_poll(AioContext *ctx, bool blocking)
        atomic_add(&ctx->notify_me, 2);
    }

-    ctx->walking_handlers++;
+    qemu_lockcnt_inc(&ctx->list_lock);

-    assert(npfd == 0);
+    if (ctx->poll_max_ns) {
+        start = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
+    }

-    /* fill pollfds */
+    if (try_poll_mode(ctx, blocking)) {
+        progress = true;
+    } else {
+        assert(npfd == 0);

-    if (!aio_epoll_enabled(ctx)) {
-        QLIST_FOREACH(node, &ctx->aio_handlers, node) {
-            if (!node->deleted && node->pfd.events
-                && aio_node_check(ctx, node->is_external)) {
-                add_pollfd(node);
+        /* fill pollfds */
+
+        if (!aio_epoll_enabled(ctx)) {
+            QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
+                if (!node->deleted && node->pfd.events
+                    && aio_node_check(ctx, node->is_external)) {
+                    add_pollfd(node);
+                }
            }
        }
+
+        timeout = blocking ? aio_compute_timeout(ctx) : 0;
+
+        /* wait until next event */
+        if (timeout) {
+            aio_context_release(ctx);
+        }
+        if (aio_epoll_check_poll(ctx, pollfds, npfd, timeout)) {
+            AioHandler epoll_handler;
+
+            epoll_handler.pfd.fd = ctx->epollfd;
+            epoll_handler.pfd.events = G_IO_IN | G_IO_OUT | G_IO_HUP | G_IO_ERR;
+            npfd = 0;
+            add_pollfd(&epoll_handler);
+            ret = aio_epoll(ctx, pollfds, npfd, timeout);
+        } else  {
+            ret = qemu_poll_ns(pollfds, npfd, timeout);
+        }
+        if (timeout) {
+            aio_context_acquire(ctx);
+        }
    }

-    timeout = blocking ? aio_compute_timeout(ctx) : 0;
-
-    /* wait until next event */
-    if (timeout) {
-        aio_context_release(ctx);
-    }
-    if (aio_epoll_check_poll(ctx, pollfds, npfd, timeout)) {
-        AioHandler epoll_handler;
-
-        epoll_handler.pfd.fd = ctx->epollfd;
-        epoll_handler.pfd.events = G_IO_IN | G_IO_OUT | G_IO_HUP | G_IO_ERR;
-        npfd = 0;
-        add_pollfd(&epoll_handler);
-        ret = aio_epoll(ctx, pollfds, npfd, timeout);
-    } else  {
-        ret = qemu_poll_ns(pollfds, npfd, timeout);
-    }
    if (blocking) {
        atomic_sub(&ctx->notify_me, 2);
    }
-    if (timeout) {
-        aio_context_acquire(ctx);
+
+    /* Adjust polling time */
+    if (ctx->poll_max_ns) {
+        int64_t block_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - start;
+
+        if (block_ns <= ctx->poll_ns) {
+            /* This is the sweet spot, no adjustment needed */
+        } else if (block_ns > ctx->poll_max_ns) {
+            /* We'd have to poll for too long, poll less */
+            int64_t old = ctx->poll_ns;
+
+            if (ctx->poll_shrink) {
+                ctx->poll_ns /= ctx->poll_shrink;
+            } else {
+                ctx->poll_ns = 0;
+            }
+
+            trace_poll_shrink(ctx, old, ctx->poll_ns);
+        } else if (ctx->poll_ns < ctx->poll_max_ns &&
+                   block_ns < ctx->poll_max_ns) {
+            /* There is room to grow, poll longer */
+            int64_t old = ctx->poll_ns;
+            int64_t grow = ctx->poll_grow;
+
+            if (grow == 0) {
+                grow = 2;
+            }
+
+            if (ctx->poll_ns) {
+                ctx->poll_ns *= grow;
+            } else {
+                ctx->poll_ns = 4000; /* start polling at 4 microseconds */
+            }
+
+            if (ctx->poll_ns > ctx->poll_max_ns) {
+                ctx->poll_ns = ctx->poll_max_ns;
+            }
+
+            trace_poll_grow(ctx, old, ctx->poll_ns);
+        }
    }

    aio_notify_accept(ctx);
@@ -475,10 +710,10 @@ bool aio_poll(AioContext *ctx, bool blocking)
    }

    npfd = 0;
-    ctx->walking_handlers--;
+    qemu_lockcnt_dec(&ctx->list_lock);

    /* Run dispatch even if there were no readable fds to run timers */
-    if (aio_dispatch(ctx)) {
+    if (aio_dispatch(ctx, ret > 0)) {
        progress = true;
    }

@@ -489,6 +724,13 @@ bool aio_poll(AioContext *ctx, bool blocking)

 void aio_context_setup(AioContext *ctx)
 {
+    /* TODO remove this in final patch submission */
+    if (getenv("QEMU_AIO_POLL_MAX_NS")) {
+        fprintf(stderr, "The QEMU_AIO_POLL_MAX_NS environment variable has "
+                "been replaced with -object iothread,poll-max-ns=NUM\n");
+        exit(1);
+    }
+
 #ifdef CONFIG_EPOLL_CREATE1
    assert(!ctx->epollfd);
    ctx->epollfd = epoll_create1(EPOLL_CLOEXEC);
@@ -500,3 +742,17 @@ void aio_context_setup(AioContext *ctx)
    }
 #endif
 }
+
+void aio_context_set_poll_params(AioContext *ctx, int64_t max_ns,
+                                 int64_t grow, int64_t shrink, Error **errp)
+{
+    /* No thread synchronization here, it doesn't matter if an incorrect value
+     * is used once.
+     */
+    ctx->poll_max_ns = max_ns;
+    ctx->poll_ns = 0;
+    ctx->poll_grow = grow;
+    ctx->poll_shrink = shrink;
+
+    aio_notify(ctx);
+}
--- a/aio-win32.c
+++ b/aio-win32.c
@@ -20,6 +20,8 @@
 #include "block/block.h"
 #include "qemu/queue.h"
 #include "qemu/sockets.h"
+#include "qapi/error.h"
+#include "qemu/rcu_queue.h"

 struct AioHandler {
    EventNotifier *e;
@@ -38,11 +40,13 @@ void aio_set_fd_handler(AioContext *ctx,
                        bool is_external,
                        IOHandler *io_read,
                        IOHandler *io_write,
+                        AioPollFn *io_poll,
                        void *opaque)
 {
    /* fd is a SOCKET in our case */
    AioHandler *node;

+    qemu_lockcnt_lock(&ctx->list_lock);
    QLIST_FOREACH(node, &ctx->aio_handlers, node) {
        if (node->pfd.fd == fd && !node->deleted) {
            break;
@@ -52,14 +56,14 @@ void aio_set_fd_handler(AioContext *ctx,
    /* Are we deleting the fd handler? */
    if (!io_read && !io_write) {
        if (node) {
-            /* If the lock is held, just mark the node as deleted */
-            if (ctx->walking_handlers) {
+            /* If aio_poll is in progress, just mark the node as deleted */
+            if (qemu_lockcnt_count(&ctx->list_lock)) {
                node->deleted = 1;
                node->pfd.revents = 0;
            } else {
                /* Otherwise, delete it for real.  We can't just mark it as
                 * deleted because deleted nodes are only cleaned up after
-                 * releasing the walking_handlers lock.
+                 * releasing the list_lock.
                 */
                QLIST_REMOVE(node, node);
                g_free(node);
@@ -72,7 +76,7 @@ void aio_set_fd_handler(AioContext *ctx,
            /* Alloc and insert if it's not already there */
            node = g_new0(AioHandler, 1);
            node->pfd.fd = fd;
-            QLIST_INSERT_HEAD(&ctx->aio_handlers, node, node);
+            QLIST_INSERT_HEAD_RCU(&ctx->aio_handlers, node, node);
        }

        node->pfd.events = 0;
@@ -97,16 +101,26 @@ void aio_set_fd_handler(AioContext *ctx,
                       FD_CONNECT | FD_WRITE | FD_OOB);
    }

+    qemu_lockcnt_unlock(&ctx->list_lock);
    aio_notify(ctx);
 }

+void aio_set_fd_poll(AioContext *ctx, int fd,
+                     IOHandler *io_poll_begin,
+                     IOHandler *io_poll_end)
+{
+    /* Not implemented */
+}
+
 void aio_set_event_notifier(AioContext *ctx,
                            EventNotifier *e,
                            bool is_external,
-                            EventNotifierHandler *io_notify)
+                            EventNotifierHandler *io_notify,
+                            AioPollFn *io_poll)
 {
    AioHandler *node;

+    qemu_lockcnt_lock(&ctx->list_lock);
    QLIST_FOREACH(node, &ctx->aio_handlers, node) {
        if (node->e == e && !node->deleted) {
            break;
@@ -118,14 +132,14 @@ void aio_set_event_notifier(AioContext *ctx,
        if (node) {
            g_source_remove_poll(&ctx->source, &node->pfd);

-            /* If the lock is held, just mark the node as deleted */
-            if (ctx->walking_handlers) {
+            /* aio_poll is in progress, just mark the node as deleted */
+            if (qemu_lockcnt_count(&ctx->list_lock)) {
                node->deleted = 1;
                node->pfd.revents = 0;
            } else {
                /* Otherwise, delete it for real.  We can't just mark it as
                 * deleted because deleted nodes are only cleaned up after
-                 * releasing the walking_handlers lock.
+                 * releasing the list_lock.
                 */
                QLIST_REMOVE(node, node);
                g_free(node);
@@ -139,7 +153,7 @@ void aio_set_event_notifier(AioContext *ctx,
            node->pfd.fd = (uintptr_t)event_notifier_get_handle(e);
            node->pfd.events = G_IO_IN;
            node->is_external = is_external;
-            QLIST_INSERT_HEAD(&ctx->aio_handlers, node, node);
+            QLIST_INSERT_HEAD_RCU(&ctx->aio_handlers, node, node);

            g_source_add_poll(&ctx->source, &node->pfd);
        }
@@ -147,9 +161,18 @@ void aio_set_event_notifier(AioContext *ctx,
        node->io_notify = io_notify;
    }

+    qemu_lockcnt_unlock(&ctx->list_lock);
    aio_notify(ctx);
 }

+void aio_set_event_notifier_poll(AioContext *ctx,
+                                 EventNotifier *notifier,
+                                 EventNotifierHandler *io_poll_begin,
+                                 EventNotifierHandler *io_poll_end)
+{
+    /* Not implemented */
+}
+
 bool aio_prepare(AioContext *ctx)
 {
    static struct timeval tv0;
@@ -157,10 +180,16 @@ bool aio_prepare(AioContext *ctx)
    bool have_select_revents = false;
    fd_set rfds, wfds;

+    /*
+     * We have to walk very carefully in case aio_set_fd_handler is
+     * called while we're walking.
+     */
+    qemu_lockcnt_inc(&ctx->list_lock);
+
    /* fill fd sets */
    FD_ZERO(&rfds);
    FD_ZERO(&wfds);
-    QLIST_FOREACH(node, &ctx->aio_handlers, node) {
+    QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
        if (node->io_read) {
            FD_SET ((SOCKET)node->pfd.fd, &rfds);
        }
@@ -170,7 +199,7 @@ bool aio_prepare(AioContext *ctx)
    }

    if (select(0, &rfds, &wfds, NULL, &tv0) > 0) {
-        QLIST_FOREACH(node, &ctx->aio_handlers, node) {
+        QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
            node->pfd.revents = 0;
            if (FD_ISSET(node->pfd.fd, &rfds)) {
                node->pfd.revents |= G_IO_IN;
@@ -184,45 +213,55 @@ bool aio_prepare(AioContext *ctx)
        }
    }

+    qemu_lockcnt_dec(&ctx->list_lock);
    return have_select_revents;
 }

 bool aio_pending(AioContext *ctx)
 {
    AioHandler *node;
+    bool result = false;

-    QLIST_FOREACH(node, &ctx->aio_handlers, node) {
+    /*
+     * We have to walk very carefully in case aio_set_fd_handler is
+     * called while we're walking.
+     */
+    qemu_lockcnt_inc(&ctx->list_lock);
+    QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
        if (node->pfd.revents && node->io_notify) {
-            return true;
+            result = true;
+            break;
        }

        if ((node->pfd.revents & G_IO_IN) && node->io_read) {
-            return true;
+            result = true;
+            break;
        }
        if ((node->pfd.revents & G_IO_OUT) && node->io_write) {
-            return true;
+            result = true;
+            break;
        }
    }

-    return false;
+    qemu_lockcnt_dec(&ctx->list_lock);
+    return result;
 }

 static bool aio_dispatch_handlers(AioContext *ctx, HANDLE event)
 {
    AioHandler *node;
    bool progress = false;
+    AioHandler *tmp;
+
+    qemu_lockcnt_inc(&ctx->list_lock);

    /*
     * We have to walk very carefully in case aio_set_fd_handler is
     * called while we're walking.
     */
-    node = QLIST_FIRST(&ctx->aio_handlers);
-    while (node) {
-        AioHandler *tmp;
+    QLIST_FOREACH_SAFE_RCU(node, &ctx->aio_handlers, node, tmp) {
        int revents = node->pfd.revents;

-        ctx->walking_handlers++;
-
        if (!node->deleted &&
            (revents || event_notifier_get_handle(node->e) == event) &&
            node->io_notify) {
@@ -257,26 +296,27 @@ static bool aio_dispatch_handlers(AioContext *ctx, HANDLE event)
            }
        }

-        tmp = node;
-        node = QLIST_NEXT(node, node);
-
-        ctx->walking_handlers--;
-
-        if (!ctx->walking_handlers && tmp->deleted) {
-            QLIST_REMOVE(tmp, node);
-            g_free(tmp);
+        if (node->deleted) {
+            if (qemu_lockcnt_dec_if_lock(&ctx->list_lock)) {
+                QLIST_REMOVE(node, node);
+                g_free(node);
+                qemu_lockcnt_inc_and_unlock(&ctx->list_lock);
+            }
        }
    }

+    qemu_lockcnt_dec(&ctx->list_lock);
    return progress;
 }

-bool aio_dispatch(AioContext *ctx)
+bool aio_dispatch(AioContext *ctx, bool dispatch_fds)
 {
    bool progress;

    progress = aio_bh_poll(ctx);
-    progress |= aio_dispatch_handlers(ctx, INVALID_HANDLE_VALUE);
+    if (dispatch_fds) {
+        progress |= aio_dispatch_handlers(ctx, INVALID_HANDLE_VALUE);
+    }
    progress |= timerlistgroup_run_timers(&ctx->tlg);
    return progress;
 }
@@ -303,20 +343,19 @@ bool aio_poll(AioContext *ctx, bool blocking)
        atomic_add(&ctx->notify_me, 2);
    }

+    qemu_lockcnt_inc(&ctx->list_lock);
    have_select_revents = aio_prepare(ctx);

-    ctx->walking_handlers++;
-
    /* fill fd sets */
    count = 0;
-    QLIST_FOREACH(node, &ctx->aio_handlers, node) {
+    QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
        if (!node->deleted && node->io_notify
            && aio_node_check(ctx, node->is_external)) {
            events[count++] = event_notifier_get_handle(node->e);
        }
    }

-    ctx->walking_handlers--;
+    qemu_lockcnt_dec(&ctx->list_lock);
    first = true;

    /* ctx->notifier is always registered.  */
@@ -374,3 +413,9 @@ bool aio_poll(AioContext *ctx, bool blocking)
 void aio_context_setup(AioContext *ctx)
 {
 }
+
+void aio_context_set_poll_params(AioContext *ctx, int64_t max_ns,
+                                 int64_t grow, int64_t shrink, Error **errp)
+{
+    error_setg(errp, "AioContext polling is not implemented on Windows");
+}
--- a/arch_init.c
+++ b/arch_init.c
@@ -28,7 +28,6 @@
 #include "sysemu/arch_init.h"
 #include "hw/pci/pci.h"
 #include "hw/audio/audio.h"
-#include "hw/smbios/smbios.h"
 #include "qemu/config-file.h"
 #include "qemu/error-report.h"
 #include "qmp-commands.h"
@@ -64,6 +63,8 @@ int graphic_depth = 32;
 #define QEMU_ARCH QEMU_ARCH_MIPS
 #elif defined(TARGET_MOXIE)
 #define QEMU_ARCH QEMU_ARCH_MOXIE
+#elif defined(TARGET_NIOS2)
+#define QEMU_ARCH QEMU_ARCH_NIOS2
 #elif defined(TARGET_OPENRISC)
 #define QEMU_ARCH QEMU_ARCH_OPENRISC
 #elif defined(TARGET_PPC)
@@ -84,33 +85,6 @@ int graphic_depth = 32;

 const uint32_t arch_type = QEMU_ARCH;

-static struct defconfig_file {
-    const char *filename;
-    /* Indicates it is an user config file (disabled by -no-user-config) */
-    bool userconfig;
-} default_config_files[] = {
-    { CONFIG_QEMU_CONFDIR "/qemu.conf",                   true },
-    { NULL }, /* end of list */
-};
-
-int qemu_read_default_config_files(bool userconfig)
-{
-    int ret;
-    struct defconfig_file *f;
-
-    for (f = default_config_files; f->filename; f++) {
-        if (!userconfig && f->userconfig) {
-            continue;
-        }
-        ret = qemu_read_config_file(f->filename);
-        if (ret < 0 && ret != -ENOENT) {
-            return ret;
-        }
-    }
-
-    return 0;
-}
-
 struct soundhw {
    const char *name;
    const char *descr;
@@ -235,26 +209,6 @@ void audio_init(void)
    }
 }

-void do_acpitable_option(const QemuOpts *opts)
-{
-#ifdef TARGET_I386
-    Error *err = NULL;
-
-    acpi_table_add(opts, &err);
-    if (err) {
-        error_reportf_err(err, "Wrong acpi table provided: ");
-        exit(1);
-    }
-#endif
-}
-
-void do_smbios_option(QemuOpts *opts)
-{
-#ifdef TARGET_I386
-    smbios_entry_add(opts);
-#endif
-}
-
 int kvm_available(void)
 {
 #ifdef CONFIG_KVM
--- a/async.c
+++ b/async.c
@@ -53,14 +53,15 @@ void aio_bh_schedule_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque)
        .cb = cb,
        .opaque = opaque,
    };
-    qemu_mutex_lock(&ctx->bh_lock);
+    qemu_lockcnt_lock(&ctx->list_lock);
    bh->next = ctx->first_bh;
    bh->scheduled = 1;
    bh->deleted = 1;
    /* Make sure that the members are ready before putting bh into list */
    smp_wmb();
    ctx->first_bh = bh;
-    qemu_mutex_unlock(&ctx->bh_lock);
+    qemu_lockcnt_unlock(&ctx->list_lock);
+    aio_notify(ctx);
 }

 QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque)
@@ -72,12 +73,12 @@ QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque)
        .cb = cb,
        .opaque = opaque,
    };
-    qemu_mutex_lock(&ctx->bh_lock);
+    qemu_lockcnt_lock(&ctx->list_lock);
    bh->next = ctx->first_bh;
    /* Make sure that the members are ready before putting bh into list */
    smp_wmb();
    ctx->first_bh = bh;
-    qemu_mutex_unlock(&ctx->bh_lock);
+    qemu_lockcnt_unlock(&ctx->list_lock);
    return bh;
 }

@@ -91,14 +92,13 @@ int aio_bh_poll(AioContext *ctx)
 {
    QEMUBH *bh, **bhp, *next;
    int ret;
+    bool deleted = false;

-    ctx->walking_bh++;
+    qemu_lockcnt_inc(&ctx->list_lock);

    ret = 0;
-    for (bh = ctx->first_bh; bh; bh = next) {
-        /* Make sure that fetching bh happens before accessing its members */
-        smp_read_barrier_depends();
-        next = bh->next;
+    for (bh = atomic_rcu_read(&ctx->first_bh); bh; bh = next) {
+        next = atomic_rcu_read(&bh->next);
        /* The atomic_xchg is paired with the one in qemu_bh_schedule.  The
         * implicit memory barrier ensures that the callback sees all writes
         * done by the scheduling thread.  It also ensures that the scheduling
@@ -106,20 +106,25 @@ int aio_bh_poll(AioContext *ctx)
         * aio_notify again if necessary.
         */
        if (atomic_xchg(&bh->scheduled, 0)) {
-            /* Idle BHs and the notify BH don't count as progress */
-            if (!bh->idle && bh != ctx->notify_dummy_bh) {
+            /* Idle BHs don't count as progress */
+            if (!bh->idle) {
                ret = 1;
            }
            bh->idle = 0;
            aio_bh_call(bh);
        }
+        if (bh->deleted) {
+            deleted = true;
+        }
    }

-    ctx->walking_bh--;
-
    /* remove deleted bhs */
-    if (!ctx->walking_bh) {
-        qemu_mutex_lock(&ctx->bh_lock);
+    if (!deleted) {
+        qemu_lockcnt_dec(&ctx->list_lock);
+        return ret;
+    }
+
+    if (qemu_lockcnt_dec_and_lock(&ctx->list_lock)) {
        bhp = &ctx->first_bh;
        while (*bhp) {
            bh = *bhp;
@@ -130,9 +135,8 @@ int aio_bh_poll(AioContext *ctx)
                bhp = &bh->next;
            }
        }
-        qemu_mutex_unlock(&ctx->bh_lock);
+        qemu_lockcnt_unlock(&ctx->list_lock);
    }
-
    return ret;
 }

@@ -186,7 +190,8 @@ aio_compute_timeout(AioContext *ctx)
    int timeout = -1;
    QEMUBH *bh;

-    for (bh = ctx->first_bh; bh; bh = bh->next) {
+    for (bh = atomic_rcu_read(&ctx->first_bh); bh;
+         bh = atomic_rcu_read(&bh->next)) {
        if (bh->scheduled) {
            if (bh->idle) {
                /* idle bottom halves will be polled at least
@@ -250,7 +255,7 @@ aio_ctx_dispatch(GSource     *source,
    AioContext *ctx = (AioContext *) source;

    assert(callback == NULL);
-    aio_dispatch(ctx);
+    aio_dispatch(ctx, true);
    return true;
 }

@@ -259,7 +264,6 @@ aio_ctx_finalize(GSource     *source)
 {
    AioContext *ctx = (AioContext *) source;

-    qemu_bh_delete(ctx->notify_dummy_bh);
    thread_pool_free(ctx->thread_pool);

 #ifdef CONFIG_LINUX_AIO
@@ -270,7 +274,8 @@ aio_ctx_finalize(GSource     *source)
    }
 #endif

-    qemu_mutex_lock(&ctx->bh_lock);
+    qemu_lockcnt_lock(&ctx->list_lock);
+    assert(!qemu_lockcnt_count(&ctx->list_lock));
    while (ctx->first_bh) {
        QEMUBH *next = ctx->first_bh->next;

@@ -280,12 +285,12 @@ aio_ctx_finalize(GSource     *source)
        g_free(ctx->first_bh);
        ctx->first_bh = next;
    }
-    qemu_mutex_unlock(&ctx->bh_lock);
+    qemu_lockcnt_unlock(&ctx->list_lock);

-    aio_set_event_notifier(ctx, &ctx->notifier, false, NULL);
+    aio_set_event_notifier(ctx, &ctx->notifier, false, NULL, NULL);
    event_notifier_cleanup(&ctx->notifier);
-    rfifolock_destroy(&ctx->lock);
-    qemu_mutex_destroy(&ctx->bh_lock);
+    qemu_rec_mutex_destroy(&ctx->lock);
+    qemu_lockcnt_destroy(&ctx->list_lock);
    timerlistgroup_deinit(&ctx->tlg);
 }

@@ -345,23 +350,19 @@ static void aio_timerlist_notify(void *opaque)
    aio_notify(opaque);
 }

-static void aio_rfifolock_cb(void *opaque)
-{
-    AioContext *ctx = opaque;
-
-    /* Kick owner thread in case they are blocked in aio_poll() */
-    qemu_bh_schedule(ctx->notify_dummy_bh);
-}
-
-static void notify_dummy_bh(void *opaque)
-{
-    /* Do nothing, we were invoked just to force the event loop to iterate */
-}
-
 static void event_notifier_dummy_cb(EventNotifier *e)
 {
 }

+/* Returns true if aio_notify() was called (e.g. a BH was scheduled) */
+static bool event_notifier_poll(void *opaque)
+{
+    EventNotifier *e = opaque;
+    AioContext *ctx = container_of(e, AioContext, notifier);
+
+    return atomic_read(&ctx->notified);
+}
+
 AioContext *aio_context_new(Error **errp)
 {
    int ret;
@@ -376,19 +377,23 @@ AioContext *aio_context_new(Error **errp)
        goto fail;
    }
    g_source_set_can_recurse(&ctx->source, true);
+    qemu_lockcnt_init(&ctx->list_lock);
    aio_set_event_notifier(ctx, &ctx->notifier,
                           false,
                           (EventNotifierHandler *)
-                           event_notifier_dummy_cb);
+                           event_notifier_dummy_cb,
+                           event_notifier_poll);
 #ifdef CONFIG_LINUX_AIO
    ctx->linux_aio = NULL;
 #endif
    ctx->thread_pool = NULL;
-    qemu_mutex_init(&ctx->bh_lock);
-    rfifolock_init(&ctx->lock, aio_rfifolock_cb, ctx);
+    qemu_rec_mutex_init(&ctx->lock);
    timerlistgroup_init(&ctx->tlg, aio_timerlist_notify, ctx);

-    ctx->notify_dummy_bh = aio_bh_new(ctx, notify_dummy_bh, NULL);
+    ctx->poll_ns = 0;
+    ctx->poll_max_ns = 0;
+    ctx->poll_grow = 0;
+    ctx->poll_shrink = 0;

    return ctx;
 fail:
@@ -408,10 +413,10 @@ void aio_context_unref(AioContext *ctx)

 void aio_context_acquire(AioContext *ctx)
 {
-    rfifolock_lock(&ctx->lock);
+    qemu_rec_mutex_lock(&ctx->lock);
 }

 void aio_context_release(AioContext *ctx)
 {
-    rfifolock_unlock(&ctx->lock);
+    qemu_rec_mutex_unlock(&ctx->lock);
 }
--- a/atomic_template.h
+++ b/atomic_template.h
@@ -0,0 +1,215 @@
+/*
+ * Atomic helper templates
+ * Included from tcg-runtime.c and cputlb.c.
+ *
+ * Copyright (c) 2016 Red Hat, Inc
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#if DATA_SIZE == 16
+# define SUFFIX     o
+# define DATA_TYPE  Int128
+# define BSWAP      bswap128
+#elif DATA_SIZE == 8
+# define SUFFIX     q
+# define DATA_TYPE  uint64_t
+# define BSWAP      bswap64
+#elif DATA_SIZE == 4
+# define SUFFIX     l
+# define DATA_TYPE  uint32_t
+# define BSWAP      bswap32
+#elif DATA_SIZE == 2
+# define SUFFIX     w
+# define DATA_TYPE  uint16_t
+# define BSWAP      bswap16
+#elif DATA_SIZE == 1
+# define SUFFIX     b
+# define DATA_TYPE  uint8_t
+# define BSWAP
+#else
+# error unsupported data size
+#endif
+
+#if DATA_SIZE >= 4
+# define ABI_TYPE  DATA_TYPE
+#else
+# define ABI_TYPE  uint32_t
+#endif
+
+/* Define host-endian atomic operations.  Note that END is used within
+   the ATOMIC_NAME macro, and redefined below.  */
+#if DATA_SIZE == 1
+# define END
+#elif defined(HOST_WORDS_BIGENDIAN)
+# define END  _be
+#else
+# define END  _le
+#endif
+
+ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, target_ulong addr,
+                              ABI_TYPE cmpv, ABI_TYPE newv EXTRA_ARGS)
+{
+    DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;
+    return atomic_cmpxchg__nocheck(haddr, cmpv, newv);
+}
+
+#if DATA_SIZE >= 16
+ABI_TYPE ATOMIC_NAME(ld)(CPUArchState *env, target_ulong addr EXTRA_ARGS)
+{
+    DATA_TYPE val, *haddr = ATOMIC_MMU_LOOKUP;
+    __atomic_load(haddr, &val, __ATOMIC_RELAXED);
+    return val;
+}
+
+void ATOMIC_NAME(st)(CPUArchState *env, target_ulong addr,
+                     ABI_TYPE val EXTRA_ARGS)
+{
+    DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;
+    __atomic_store(haddr, &val, __ATOMIC_RELAXED);
+}
+#else
+ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, target_ulong addr,
+                           ABI_TYPE val EXTRA_ARGS)
+{
+    DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;
+    return atomic_xchg__nocheck(haddr, val);
+}
+
+#define GEN_ATOMIC_HELPER(X)                                        \
+ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr,       \
+                 ABI_TYPE val EXTRA_ARGS)                           \
+{                                                                   \
+    DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;                           \
+    return atomic_##X(haddr, val);                                  \
+}                                                                   \
+
+GEN_ATOMIC_HELPER(fetch_add)
+GEN_ATOMIC_HELPER(fetch_and)
+GEN_ATOMIC_HELPER(fetch_or)
+GEN_ATOMIC_HELPER(fetch_xor)
+GEN_ATOMIC_HELPER(add_fetch)
+GEN_ATOMIC_HELPER(and_fetch)
+GEN_ATOMIC_HELPER(or_fetch)
+GEN_ATOMIC_HELPER(xor_fetch)
+
+#undef GEN_ATOMIC_HELPER
+#endif /* DATA SIZE >= 16 */
+
+#undef END
+
+#if DATA_SIZE > 1
+
+/* Define reverse-host-endian atomic operations.  Note that END is used
+   within the ATOMIC_NAME macro.  */
+#ifdef HOST_WORDS_BIGENDIAN
+# define END  _le
+#else
+# define END  _be
+#endif
+
+ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, target_ulong addr,
+                              ABI_TYPE cmpv, ABI_TYPE newv EXTRA_ARGS)
+{
+    DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;
+    return BSWAP(atomic_cmpxchg__nocheck(haddr, BSWAP(cmpv), BSWAP(newv)));
+}
+
+#if DATA_SIZE >= 16
+ABI_TYPE ATOMIC_NAME(ld)(CPUArchState *env, target_ulong addr EXTRA_ARGS)
+{
+    DATA_TYPE val, *haddr = ATOMIC_MMU_LOOKUP;
+    __atomic_load(haddr, &val, __ATOMIC_RELAXED);
+    return BSWAP(val);
+}
+
+void ATOMIC_NAME(st)(CPUArchState *env, target_ulong addr,
+                     ABI_TYPE val EXTRA_ARGS)
+{
+    DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;
+    val = BSWAP(val);
+    __atomic_store(haddr, &val, __ATOMIC_RELAXED);
+}
+#else
+ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, target_ulong addr,
+                           ABI_TYPE val EXTRA_ARGS)
+{
+    DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;
+    return BSWAP(atomic_xchg__nocheck(haddr, BSWAP(val)));
+}
+
+#define GEN_ATOMIC_HELPER(X)                                        \
+ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr,       \
+                 ABI_TYPE val EXTRA_ARGS)                           \
+{                                                                   \
+    DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;                           \
+    return BSWAP(atomic_##X(haddr, BSWAP(val)));                    \
+}
+
+GEN_ATOMIC_HELPER(fetch_and)
+GEN_ATOMIC_HELPER(fetch_or)
+GEN_ATOMIC_HELPER(fetch_xor)
+GEN_ATOMIC_HELPER(and_fetch)
+GEN_ATOMIC_HELPER(or_fetch)
+GEN_ATOMIC_HELPER(xor_fetch)
+
+#undef GEN_ATOMIC_HELPER
+
+/* Note that for addition, we need to use a separate cmpxchg loop instead
+   of bswaps for the reverse-host-endian helpers.  */
+ABI_TYPE ATOMIC_NAME(fetch_add)(CPUArchState *env, target_ulong addr,
+                         ABI_TYPE val EXTRA_ARGS)
+{
+    DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;
+    DATA_TYPE ldo, ldn, ret, sto;
+
+    ldo = atomic_read__nocheck(haddr);
+    while (1) {
+        ret = BSWAP(ldo);
+        sto = BSWAP(ret + val);
+        ldn = atomic_cmpxchg__nocheck(haddr, ldo, sto);
+        if (ldn == ldo) {
+            return ret;
+        }
+        ldo = ldn;
+    }
+}
+
+ABI_TYPE ATOMIC_NAME(add_fetch)(CPUArchState *env, target_ulong addr,
+                         ABI_TYPE val EXTRA_ARGS)
+{
+    DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;
+    DATA_TYPE ldo, ldn, ret, sto;
+
+    ldo = atomic_read__nocheck(haddr);
+    while (1) {
+        ret = BSWAP(ldo) + val;
+        sto = BSWAP(ret);
+        ldn = atomic_cmpxchg__nocheck(haddr, ldo, sto);
+        if (ldn == ldo) {
+            return ret;
+        }
+        ldo = ldn;
+    }
+}
+#endif /* DATA_SIZE >= 16 */
+
+#undef END
+#endif /* DATA_SIZE > 1 */
+
+#undef BSWAP
+#undef ABI_TYPE
+#undef DATA_TYPE
+#undef SUFFIX
+#undef DATA_SIZE
--- a/backends/Makefile.objs
+++ b/backends/Makefile.objs
@@ -9,3 +9,6 @@ common-obj-$(CONFIG_TPM) += tpm.o

 common-obj-y += hostmem.o hostmem-ram.o
 common-obj-$(CONFIG_LINUX) += hostmem-file.o
+
+common-obj-y += cryptodev.o
+common-obj-y += cryptodev-builtin.o
--- a/backends/baum.c
+++ b/backends/baum.c
@@ -1,7 +1,7 @@
 /*
 * QEMU Baum Braille Device
 *
- * Copyright (c) 2008 Samuel Thibault
+ * Copyright (c) 2008, 2010-2011, 2016 Samuel Thibault
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
@@ -27,12 +27,10 @@
 #include "sysemu/char.h"
 #include "qemu/timer.h"
 #include "hw/usb.h"
+#include "ui/console.h"
 #include <brlapi.h>
 #include <brlapi_constants.h>
 #include <brlapi_keycodes.h>
-#ifdef CONFIG_SDL
-#include <SDL_syswm.h>
-#endif

 #if 0
 #define DPRINTF(fmt, ...) \
@@ -87,11 +85,12 @@
 #define BUF_SIZE 256

 typedef struct {
-    CharDriverState *chr;
+    Chardev parent;

    brlapi_handle_t *brlapi;
    int brlapi_fd;
    unsigned int x, y;
+    bool deferred_init;

    uint8_t in_buf[BUF_SIZE];
    uint8_t in_buf_used;
@@ -99,11 +98,17 @@ typedef struct {
    uint8_t out_buf_used, out_buf_ptr;

    QEMUTimer *cellCount_timer;
-} BaumDriverState;
+} BaumChardev;
+
+#define TYPE_CHARDEV_BRAILLE "chardev-braille"
+#define BAUM_CHARDEV(obj) OBJECT_CHECK(BaumChardev, (obj), TYPE_CHARDEV_BRAILLE)

 /* Let's assume NABCC by default */
-static const uint8_t nabcc_translation[256] = {
-    [0] = ' ',
+enum way {
+    DOTS2ASCII,
+    ASCII2DOTS
+};
+static const uint8_t nabcc_translation[2][256] = {
 #ifndef BRLAPI_DOTS
 #define BRLAPI_DOTS(d1,d2,d3,d4,d5,d6,d7,d8) \
    ((d1?BRLAPI_DOT1:0)|\
@@ -115,111 +120,145 @@ static const uint8_t nabcc_translation[256] = {
     (d7?BRLAPI_DOT7:0)|\
     (d8?BRLAPI_DOT8:0))
 #endif
-    [BRLAPI_DOTS(1,0,0,0,0,0,0,0)] = 'a',
-    [BRLAPI_DOTS(1,1,0,0,0,0,0,0)] = 'b',
-    [BRLAPI_DOTS(1,0,0,1,0,0,0,0)] = 'c',
-    [BRLAPI_DOTS(1,0,0,1,1,0,0,0)] = 'd',
-    [BRLAPI_DOTS(1,0,0,0,1,0,0,0)] = 'e',
-    [BRLAPI_DOTS(1,1,0,1,0,0,0,0)] = 'f',
-    [BRLAPI_DOTS(1,1,0,1,1,0,0,0)] = 'g',
-    [BRLAPI_DOTS(1,1,0,0,1,0,0,0)] = 'h',
-    [BRLAPI_DOTS(0,1,0,1,0,0,0,0)] = 'i',
-    [BRLAPI_DOTS(0,1,0,1,1,0,0,0)] = 'j',
-    [BRLAPI_DOTS(1,0,1,0,0,0,0,0)] = 'k',
-    [BRLAPI_DOTS(1,1,1,0,0,0,0,0)] = 'l',
-    [BRLAPI_DOTS(1,0,1,1,0,0,0,0)] = 'm',
-    [BRLAPI_DOTS(1,0,1,1,1,0,0,0)] = 'n',
-    [BRLAPI_DOTS(1,0,1,0,1,0,0,0)] = 'o',
-    [BRLAPI_DOTS(1,1,1,1,0,0,0,0)] = 'p',
-    [BRLAPI_DOTS(1,1,1,1,1,0,0,0)] = 'q',
-    [BRLAPI_DOTS(1,1,1,0,1,0,0,0)] = 'r',
-    [BRLAPI_DOTS(0,1,1,1,0,0,0,0)] = 's',
-    [BRLAPI_DOTS(0,1,1,1,1,0,0,0)] = 't',
-    [BRLAPI_DOTS(1,0,1,0,0,1,0,0)] = 'u',
-    [BRLAPI_DOTS(1,1,1,0,0,1,0,0)] = 'v',
-    [BRLAPI_DOTS(0,1,0,1,1,1,0,0)] = 'w',
-    [BRLAPI_DOTS(1,0,1,1,0,1,0,0)] = 'x',
-    [BRLAPI_DOTS(1,0,1,1,1,1,0,0)] = 'y',
-    [BRLAPI_DOTS(1,0,1,0,1,1,0,0)] = 'z',
+#define DO(dots, ascii) \
+    [DOTS2ASCII][dots] = ascii, \
+    [ASCII2DOTS][ascii] = dots
+    DO(0, ' '),
+    DO(BRLAPI_DOTS(1, 0, 0, 0, 0, 0, 0, 0), 'a'),
+    DO(BRLAPI_DOTS(1, 1, 0, 0, 0, 0, 0, 0), 'b'),
+    DO(BRLAPI_DOTS(1, 0, 0, 1, 0, 0, 0, 0), 'c'),
+    DO(BRLAPI_DOTS(1, 0, 0, 1, 1, 0, 0, 0), 'd'),
+    DO(BRLAPI_DOTS(1, 0, 0, 0, 1, 0, 0, 0), 'e'),
+    DO(BRLAPI_DOTS(1, 1, 0, 1, 0, 0, 0, 0), 'f'),
+    DO(BRLAPI_DOTS(1, 1, 0, 1, 1, 0, 0, 0), 'g'),
+    DO(BRLAPI_DOTS(1, 1, 0, 0, 1, 0, 0, 0), 'h'),
+    DO(BRLAPI_DOTS(0, 1, 0, 1, 0, 0, 0, 0), 'i'),
+    DO(BRLAPI_DOTS(0, 1, 0, 1, 1, 0, 0, 0), 'j'),
+    DO(BRLAPI_DOTS(1, 0, 1, 0, 0, 0, 0, 0), 'k'),
+    DO(BRLAPI_DOTS(1, 1, 1, 0, 0, 0, 0, 0), 'l'),
+    DO(BRLAPI_DOTS(1, 0, 1, 1, 0, 0, 0, 0), 'm'),
+    DO(BRLAPI_DOTS(1, 0, 1, 1, 1, 0, 0, 0), 'n'),
+    DO(BRLAPI_DOTS(1, 0, 1, 0, 1, 0, 0, 0), 'o'),
+    DO(BRLAPI_DOTS(1, 1, 1, 1, 0, 0, 0, 0), 'p'),
+    DO(BRLAPI_DOTS(1, 1, 1, 1, 1, 0, 0, 0), 'q'),
+    DO(BRLAPI_DOTS(1, 1, 1, 0, 1, 0, 0, 0), 'r'),
+    DO(BRLAPI_DOTS(0, 1, 1, 1, 0, 0, 0, 0), 's'),
+    DO(BRLAPI_DOTS(0, 1, 1, 1, 1, 0, 0, 0), 't'),
+    DO(BRLAPI_DOTS(1, 0, 1, 0, 0, 1, 0, 0), 'u'),
+    DO(BRLAPI_DOTS(1, 1, 1, 0, 0, 1, 0, 0), 'v'),
+    DO(BRLAPI_DOTS(0, 1, 0, 1, 1, 1, 0, 0), 'w'),
+    DO(BRLAPI_DOTS(1, 0, 1, 1, 0, 1, 0, 0), 'x'),
+    DO(BRLAPI_DOTS(1, 0, 1, 1, 1, 1, 0, 0), 'y'),
+    DO(BRLAPI_DOTS(1, 0, 1, 0, 1, 1, 0, 0), 'z'),

-    [BRLAPI_DOTS(1,0,0,0,0,0,1,0)] = 'A',
-    [BRLAPI_DOTS(1,1,0,0,0,0,1,0)] = 'B',
-    [BRLAPI_DOTS(1,0,0,1,0,0,1,0)] = 'C',
-    [BRLAPI_DOTS(1,0,0,1,1,0,1,0)] = 'D',
-    [BRLAPI_DOTS(1,0,0,0,1,0,1,0)] = 'E',
-    [BRLAPI_DOTS(1,1,0,1,0,0,1,0)] = 'F',
-    [BRLAPI_DOTS(1,1,0,1,1,0,1,0)] = 'G',
-    [BRLAPI_DOTS(1,1,0,0,1,0,1,0)] = 'H',
-    [BRLAPI_DOTS(0,1,0,1,0,0,1,0)] = 'I',
-    [BRLAPI_DOTS(0,1,0,1,1,0,1,0)] = 'J',
-    [BRLAPI_DOTS(1,0,1,0,0,0,1,0)] = 'K',
-    [BRLAPI_DOTS(1,1,1,0,0,0,1,0)] = 'L',
-    [BRLAPI_DOTS(1,0,1,1,0,0,1,0)] = 'M',
-    [BRLAPI_DOTS(1,0,1,1,1,0,1,0)] = 'N',
-    [BRLAPI_DOTS(1,0,1,0,1,0,1,0)] = 'O',
-    [BRLAPI_DOTS(1,1,1,1,0,0,1,0)] = 'P',
-    [BRLAPI_DOTS(1,1,1,1,1,0,1,0)] = 'Q',
-    [BRLAPI_DOTS(1,1,1,0,1,0,1,0)] = 'R',
-    [BRLAPI_DOTS(0,1,1,1,0,0,1,0)] = 'S',
-    [BRLAPI_DOTS(0,1,1,1,1,0,1,0)] = 'T',
-    [BRLAPI_DOTS(1,0,1,0,0,1,1,0)] = 'U',
-    [BRLAPI_DOTS(1,1,1,0,0,1,1,0)] = 'V',
-    [BRLAPI_DOTS(0,1,0,1,1,1,1,0)] = 'W',
-    [BRLAPI_DOTS(1,0,1,1,0,1,1,0)] = 'X',
-    [BRLAPI_DOTS(1,0,1,1,1,1,1,0)] = 'Y',
-    [BRLAPI_DOTS(1,0,1,0,1,1,1,0)] = 'Z',
+    DO(BRLAPI_DOTS(1, 0, 0, 0, 0, 0, 1, 0), 'A'),
+    DO(BRLAPI_DOTS(1, 1, 0, 0, 0, 0, 1, 0), 'B'),
+    DO(BRLAPI_DOTS(1, 0, 0, 1, 0, 0, 1, 0), 'C'),
+    DO(BRLAPI_DOTS(1, 0, 0, 1, 1, 0, 1, 0), 'D'),
+    DO(BRLAPI_DOTS(1, 0, 0, 0, 1, 0, 1, 0), 'E'),
+    DO(BRLAPI_DOTS(1, 1, 0, 1, 0, 0, 1, 0), 'F'),
+    DO(BRLAPI_DOTS(1, 1, 0, 1, 1, 0, 1, 0), 'G'),
+    DO(BRLAPI_DOTS(1, 1, 0, 0, 1, 0, 1, 0), 'H'),
+    DO(BRLAPI_DOTS(0, 1, 0, 1, 0, 0, 1, 0), 'I'),
+    DO(BRLAPI_DOTS(0, 1, 0, 1, 1, 0, 1, 0), 'J'),
+    DO(BRLAPI_DOTS(1, 0, 1, 0, 0, 0, 1, 0), 'K'),
+    DO(BRLAPI_DOTS(1, 1, 1, 0, 0, 0, 1, 0), 'L'),
+    DO(BRLAPI_DOTS(1, 0, 1, 1, 0, 0, 1, 0), 'M'),
+    DO(BRLAPI_DOTS(1, 0, 1, 1, 1, 0, 1, 0), 'N'),
+    DO(BRLAPI_DOTS(1, 0, 1, 0, 1, 0, 1, 0), 'O'),
+    DO(BRLAPI_DOTS(1, 1, 1, 1, 0, 0, 1, 0), 'P'),
+    DO(BRLAPI_DOTS(1, 1, 1, 1, 1, 0, 1, 0), 'Q'),
+    DO(BRLAPI_DOTS(1, 1, 1, 0, 1, 0, 1, 0), 'R'),
+    DO(BRLAPI_DOTS(0, 1, 1, 1, 0, 0, 1, 0), 'S'),
+    DO(BRLAPI_DOTS(0, 1, 1, 1, 1, 0, 1, 0), 'T'),
+    DO(BRLAPI_DOTS(1, 0, 1, 0, 0, 1, 1, 0), 'U'),
+    DO(BRLAPI_DOTS(1, 1, 1, 0, 0, 1, 1, 0), 'V'),
+    DO(BRLAPI_DOTS(0, 1, 0, 1, 1, 1, 1, 0), 'W'),
+    DO(BRLAPI_DOTS(1, 0, 1, 1, 0, 1, 1, 0), 'X'),
+    DO(BRLAPI_DOTS(1, 0, 1, 1, 1, 1, 1, 0), 'Y'),
+    DO(BRLAPI_DOTS(1, 0, 1, 0, 1, 1, 1, 0), 'Z'),

-    [BRLAPI_DOTS(0,0,1,0,1,1,0,0)] = '0',
-    [BRLAPI_DOTS(0,1,0,0,0,0,0,0)] = '1',
-    [BRLAPI_DOTS(0,1,1,0,0,0,0,0)] = '2',
-    [BRLAPI_DOTS(0,1,0,0,1,0,0,0)] = '3',
-    [BRLAPI_DOTS(0,1,0,0,1,1,0,0)] = '4',
-    [BRLAPI_DOTS(0,1,0,0,0,1,0,0)] = '5',
-    [BRLAPI_DOTS(0,1,1,0,1,0,0,0)] = '6',
-    [BRLAPI_DOTS(0,1,1,0,1,1,0,0)] = '7',
-    [BRLAPI_DOTS(0,1,1,0,0,1,0,0)] = '8',
-    [BRLAPI_DOTS(0,0,1,0,1,0,0,0)] = '9',
+    DO(BRLAPI_DOTS(0, 0, 1, 0, 1, 1, 0, 0), '0'),
+    DO(BRLAPI_DOTS(0, 1, 0, 0, 0, 0, 0, 0), '1'),
+    DO(BRLAPI_DOTS(0, 1, 1, 0, 0, 0, 0, 0), '2'),
+    DO(BRLAPI_DOTS(0, 1, 0, 0, 1, 0, 0, 0), '3'),
+    DO(BRLAPI_DOTS(0, 1, 0, 0, 1, 1, 0, 0), '4'),
+    DO(BRLAPI_DOTS(0, 1, 0, 0, 0, 1, 0, 0), '5'),
+    DO(BRLAPI_DOTS(0, 1, 1, 0, 1, 0, 0, 0), '6'),
+    DO(BRLAPI_DOTS(0, 1, 1, 0, 1, 1, 0, 0), '7'),
+    DO(BRLAPI_DOTS(0, 1, 1, 0, 0, 1, 0, 0), '8'),
+    DO(BRLAPI_DOTS(0, 0, 1, 0, 1, 0, 0, 0), '9'),

-    [BRLAPI_DOTS(0,0,0,1,0,1,0,0)] = '.',
-    [BRLAPI_DOTS(0,0,1,1,0,1,0,0)] = '+',
-    [BRLAPI_DOTS(0,0,1,0,0,1,0,0)] = '-',
-    [BRLAPI_DOTS(1,0,0,0,0,1,0,0)] = '*',
-    [BRLAPI_DOTS(0,0,1,1,0,0,0,0)] = '/',
-    [BRLAPI_DOTS(1,1,1,0,1,1,0,0)] = '(',
-    [BRLAPI_DOTS(0,1,1,1,1,1,0,0)] = ')',
+    DO(BRLAPI_DOTS(0, 0, 0, 1, 0, 1, 0, 0), '.'),
+    DO(BRLAPI_DOTS(0, 0, 1, 1, 0, 1, 0, 0), '+'),
+    DO(BRLAPI_DOTS(0, 0, 1, 0, 0, 1, 0, 0), '-'),
+    DO(BRLAPI_DOTS(1, 0, 0, 0, 0, 1, 0, 0), '*'),
+    DO(BRLAPI_DOTS(0, 0, 1, 1, 0, 0, 0, 0), '/'),
+    DO(BRLAPI_DOTS(1, 1, 1, 0, 1, 1, 0, 0), '('),
+    DO(BRLAPI_DOTS(0, 1, 1, 1, 1, 1, 0, 0), ')'),

-    [BRLAPI_DOTS(1,1,1,1,0,1,0,0)] = '&',
-    [BRLAPI_DOTS(0,0,1,1,1,1,0,0)] = '#',
+    DO(BRLAPI_DOTS(1, 1, 1, 1, 0, 1, 0, 0), '&'),
+    DO(BRLAPI_DOTS(0, 0, 1, 1, 1, 1, 0, 0), '#'),

-    [BRLAPI_DOTS(0,0,0,0,0,1,0,0)] = ',',
-    [BRLAPI_DOTS(0,0,0,0,1,1,0,0)] = ';',
-    [BRLAPI_DOTS(1,0,0,0,1,1,0,0)] = ':',
-    [BRLAPI_DOTS(0,1,1,1,0,1,0,0)] = '!',
-    [BRLAPI_DOTS(1,0,0,1,1,1,0,0)] = '?',
-    [BRLAPI_DOTS(0,0,0,0,1,0,0,0)] = '"',
-    [BRLAPI_DOTS(0,0,1,0,0,0,0,0)] ='\'',
-    [BRLAPI_DOTS(0,0,0,1,0,0,0,0)] = '`',
-    [BRLAPI_DOTS(0,0,0,1,1,0,1,0)] = '^',
-    [BRLAPI_DOTS(0,0,0,1,1,0,0,0)] = '~',
-    [BRLAPI_DOTS(0,1,0,1,0,1,1,0)] = '[',
-    [BRLAPI_DOTS(1,1,0,1,1,1,1,0)] = ']',
-    [BRLAPI_DOTS(0,1,0,1,0,1,0,0)] = '{',
-    [BRLAPI_DOTS(1,1,0,1,1,1,0,0)] = '}',
-    [BRLAPI_DOTS(1,1,1,1,1,1,0,0)] = '=',
-    [BRLAPI_DOTS(1,1,0,0,0,1,0,0)] = '<',
-    [BRLAPI_DOTS(0,0,1,1,1,0,0,0)] = '>',
-    [BRLAPI_DOTS(1,1,0,1,0,1,0,0)] = '$',
-    [BRLAPI_DOTS(1,0,0,1,0,1,0,0)] = '%',
-    [BRLAPI_DOTS(0,0,0,1,0,0,1,0)] = '@',
-    [BRLAPI_DOTS(1,1,0,0,1,1,0,0)] = '|',
-    [BRLAPI_DOTS(1,1,0,0,1,1,1,0)] ='\\',
-    [BRLAPI_DOTS(0,0,0,1,1,1,0,0)] = '_',
+    DO(BRLAPI_DOTS(0, 0, 0, 0, 0, 1, 0, 0), ','),
+    DO(BRLAPI_DOTS(0, 0, 0, 0, 1, 1, 0, 0), ';'),
+    DO(BRLAPI_DOTS(1, 0, 0, 0, 1, 1, 0, 0), ':'),
+    DO(BRLAPI_DOTS(0, 1, 1, 1, 0, 1, 0, 0), '!'),
+    DO(BRLAPI_DOTS(1, 0, 0, 1, 1, 1, 0, 0), '?'),
+    DO(BRLAPI_DOTS(0, 0, 0, 0, 1, 0, 0, 0), '"'),
+    DO(BRLAPI_DOTS(0, 0, 1, 0, 0, 0, 0, 0), '\''),
+    DO(BRLAPI_DOTS(0, 0, 0, 1, 0, 0, 0, 0), '`'),
+    DO(BRLAPI_DOTS(0, 0, 0, 1, 1, 0, 1, 0), '^'),
+    DO(BRLAPI_DOTS(0, 0, 0, 1, 1, 0, 0, 0), '~'),
+    DO(BRLAPI_DOTS(0, 1, 0, 1, 0, 1, 1, 0), '['),
+    DO(BRLAPI_DOTS(1, 1, 0, 1, 1, 1, 1, 0), ']'),
+    DO(BRLAPI_DOTS(0, 1, 0, 1, 0, 1, 0, 0), '{'),
+    DO(BRLAPI_DOTS(1, 1, 0, 1, 1, 1, 0, 0), '}'),
+    DO(BRLAPI_DOTS(1, 1, 1, 1, 1, 1, 0, 0), '='),
+    DO(BRLAPI_DOTS(1, 1, 0, 0, 0, 1, 0, 0), '<'),
+    DO(BRLAPI_DOTS(0, 0, 1, 1, 1, 0, 0, 0), '>'),
+    DO(BRLAPI_DOTS(1, 1, 0, 1, 0, 1, 0, 0), '$'),
+    DO(BRLAPI_DOTS(1, 0, 0, 1, 0, 1, 0, 0), '%'),
+    DO(BRLAPI_DOTS(0, 0, 0, 1, 0, 0, 1, 0), '@'),
+    DO(BRLAPI_DOTS(1, 1, 0, 0, 1, 1, 0, 0), '|'),
+    DO(BRLAPI_DOTS(1, 1, 0, 0, 1, 1, 1, 0), '\\'),
+    DO(BRLAPI_DOTS(0, 0, 0, 1, 1, 1, 0, 0), '_'),
 };

-/* The serial port can receive more of our data */
-static void baum_accept_input(struct CharDriverState *chr)
+/* The guest OS has started discussing with us, finish initializing BrlAPI */
+static int baum_deferred_init(BaumChardev *baum)
 {
-    BaumDriverState *baum = chr->opaque;
+    int tty = BRLAPI_TTY_DEFAULT;
+    QemuConsole *con;
+
+    if (baum->deferred_init) {
+        return 1;
+    }
+
+    if (brlapi__getDisplaySize(baum->brlapi, &baum->x, &baum->y) == -1) {
+        brlapi_perror("baum: brlapi__getDisplaySize");
+        return 0;
+    }
+
+    con = qemu_console_lookup_by_index(0);
+    if (con && qemu_console_is_graphic(con)) {
+        tty = qemu_console_get_window_id(con);
+        if (tty == -1)
+            tty = BRLAPI_TTY_DEFAULT;
+    }
+
+    if (brlapi__enterTtyMode(baum->brlapi, tty, NULL) == -1) {
+        brlapi_perror("baum: brlapi__enterTtyMode");
+        return 0;
+    }
+    baum->deferred_init = 1;
+    return 1;
+}
+
+/* The serial port can receive more of our data */
+static void baum_chr_accept_input(struct Chardev *chr)
+{
+    BaumChardev *baum = BAUM_CHARDEV(chr);
    int room, first;

    if (!baum->out_buf_used)
@@ -243,24 +282,25 @@ static void baum_accept_input(struct CharDriverState *chr)
 }

 /* We want to send a packet */
-static void baum_write_packet(BaumDriverState *baum, const uint8_t *buf, int len)
+static void baum_write_packet(BaumChardev *baum, const uint8_t *buf, int len)
 {
+    Chardev *chr = CHARDEV(baum);
    uint8_t io_buf[1 + 2 * len], *cur = io_buf;
    int room;
    *cur++ = ESC;
    while (len--)
        if ((*cur++ = *buf++) == ESC)
            *cur++ = ESC;
-    room = qemu_chr_be_can_write(baum->chr);
+    room = qemu_chr_be_can_write(chr);
    len = cur - io_buf;
    if (len <= room) {
        /* Fits */
-        qemu_chr_be_write(baum->chr, io_buf, len);
+        qemu_chr_be_write(chr, io_buf, len);
    } else {
        int first;
        uint8_t out;
        /* Can't fit all, send what can be, and store the rest. */
-        qemu_chr_be_write(baum->chr, io_buf, room);
+        qemu_chr_be_write(chr, io_buf, room);
        len -= room;
        cur = io_buf + room;
        if (len > BUF_SIZE - baum->out_buf_used) {
@@ -285,14 +325,14 @@ static void baum_write_packet(BaumDriverState *baum, const uint8_t *buf, int len
 /* Called when the other end seems to have a wrong idea of our display size */
 static void baum_cellCount_timer_cb(void *opaque)
 {
-    BaumDriverState *baum = opaque;
+    BaumChardev *baum = BAUM_CHARDEV(opaque);
    uint8_t cell_count[] = { BAUM_RSP_CellCount, baum->x * baum->y };
    DPRINTF("Timeout waiting for DisplayData, sending cell count\n");
    baum_write_packet(baum, cell_count, sizeof(cell_count));
 }

 /* Try to interpret a whole incoming packet */
-static int baum_eat_packet(BaumDriverState *baum, const uint8_t *buf, int len)
+static int baum_eat_packet(BaumChardev *baum, const uint8_t *buf, int len)
 {
    const uint8_t *cur = buf;
    uint8_t req = 0;
@@ -346,8 +386,10 @@ static int baum_eat_packet(BaumDriverState *baum, const uint8_t *buf, int len)
                cursor = i + 1;
                c &= ~(BRLAPI_DOT7|BRLAPI_DOT8);
            }
-            if (!(c = nabcc_translation[c]))
+            c = nabcc_translation[DOTS2ASCII][c];
+            if (!c) {
                c = '?';
+            }
            text[i] = c;
        }
        timer_del(baum->cellCount_timer);
@@ -431,15 +473,17 @@ static int baum_eat_packet(BaumDriverState *baum, const uint8_t *buf, int len)
 }

 /* The other end is writing some data.  Store it and try to interpret */
-static int baum_write(CharDriverState *chr, const uint8_t *buf, int len)
+static int baum_chr_write(Chardev *chr, const uint8_t *buf, int len)
 {
-    BaumDriverState *baum = chr->opaque;
+    BaumChardev *baum = BAUM_CHARDEV(chr);
    int tocopy, cur, eaten, orig_len = len;

    if (!len)
        return 0;
    if (!baum->brlapi)
        return len;
+    if (!baum_deferred_init(baum))
+        return len;

    while (len) {
        /* Complete our buffer as much as possible */
@@ -470,20 +514,31 @@ static int baum_write(CharDriverState *chr, const uint8_t *buf, int len)
 }

 /* Send the key code to the other end */
-static void baum_send_key(BaumDriverState *baum, uint8_t type, uint8_t value) {
+static void baum_send_key(BaumChardev *baum, uint8_t type, uint8_t value)
+{
    uint8_t packet[] = { type, value };
    DPRINTF("writing key %x %x\n", type, value);
    baum_write_packet(baum, packet, sizeof(packet));
 }

+static void baum_send_key2(BaumChardev *baum, uint8_t type, uint8_t value,
+                           uint8_t value2)
+{
+    uint8_t packet[] = { type, value, value2 };
+    DPRINTF("writing key %x %x\n", type, value);
+    baum_write_packet(baum, packet, sizeof(packet));
+}
+
 /* We got some data on the BrlAPI socket */
 static void baum_chr_read(void *opaque)
 {
-    BaumDriverState *baum = opaque;
+    BaumChardev *baum = BAUM_CHARDEV(opaque);
    brlapi_keyCode_t code;
    int ret;
    if (!baum->brlapi)
        return;
+    if (!baum_deferred_init(baum))
+        return;
    while ((ret = brlapi__readKey(baum->brlapi, 0, &code)) == 1) {
        DPRINTF("got key %"BRLAPI_PRIxKEYCODE"\n", code);
        /* Emulate */
@@ -540,7 +595,17 @@ static void baum_chr_read(void *opaque)
            }
            break;
        case BRLAPI_KEY_TYPE_SYM:
-            break;
+            {
+                brlapi_keyCode_t keysym = code & BRLAPI_KEY_CODE_MASK;
+                if (keysym < 0x100) {
+                    uint8_t dots = nabcc_translation[ASCII2DOTS][keysym];
+                    if (dots) {
+                        baum_send_key2(baum, BAUM_RSP_EntryKeys, 0, dots);
+                        baum_send_key2(baum, BAUM_RSP_EntryKeys, 0, 0);
+                    }
+                }
+                break;
+            }
        }
    }
    if (ret == -1 && (brlapi_errno != BRLAPI_ERROR_LIBCERR || errno != EINTR)) {
@@ -551,46 +616,24 @@ static void baum_chr_read(void *opaque)
    }
 }

-static void baum_free(struct CharDriverState *chr)
+static void char_braille_finalize(Object *obj)
 {
-    BaumDriverState *baum = chr->opaque;
+    BaumChardev *baum = BAUM_CHARDEV(obj);

    timer_free(baum->cellCount_timer);
    if (baum->brlapi) {
        brlapi__closeConnection(baum->brlapi);
        g_free(baum->brlapi);
    }
-    g_free(baum);
 }

-static CharDriverState *chr_baum_init(const char *id,
-                                      ChardevBackend *backend,
-                                      ChardevReturn *ret,
-                                      bool *be_opened,
-                                      Error **errp)
+static void baum_chr_open(Chardev *chr,
+                          ChardevBackend *backend,
+                          bool *be_opened,
+                          Error **errp)
 {
-    ChardevCommon *common = backend->u.braille.data;
-    BaumDriverState *baum;
-    CharDriverState *chr;
+    BaumChardev *baum = BAUM_CHARDEV(chr);
    brlapi_handle_t *handle;
-#if defined(CONFIG_SDL)
-#if SDL_COMPILEDVERSION < SDL_VERSIONNUM(2, 0, 0)
-    SDL_SysWMinfo info;
-#endif
-#endif
-    int tty;
-
-    chr = qemu_chr_alloc(common, errp);
-    if (!chr) {
-        return NULL;
-    }
-    baum = g_malloc0(sizeof(BaumDriverState));
-    baum->chr = chr;
-
-    chr->opaque = baum;
-    chr->chr_write = baum_write;
-    chr->chr_accept_input = baum_accept_input;
-    chr->chr_free = baum_free;

    handle = g_malloc0(brlapi_getHandleSize());
    baum->brlapi = handle;
@@ -599,52 +642,36 @@ static CharDriverState *chr_baum_init(const char *id,
    if (baum->brlapi_fd == -1) {
        error_setg(errp, "brlapi__openConnection: %s",
                   brlapi_strerror(brlapi_error_location()));
-        goto fail_handle;
+        g_free(handle);
+        return;
    }
+    baum->deferred_init = 0;

    baum->cellCount_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, baum_cellCount_timer_cb, baum);

-    if (brlapi__getDisplaySize(handle, &baum->x, &baum->y) == -1) {
-        error_setg(errp, "brlapi__getDisplaySize: %s",
-                   brlapi_strerror(brlapi_error_location()));
-        goto fail;
-    }
-
-#if defined(CONFIG_SDL)
-#if SDL_COMPILEDVERSION < SDL_VERSIONNUM(2, 0, 0)
-    memset(&info, 0, sizeof(info));
-    SDL_VERSION(&info.version);
-    if (SDL_GetWMInfo(&info))
-        tty = info.info.x11.wmwindow;
-    else
-#endif
-#endif
-        tty = BRLAPI_TTY_DEFAULT;
-
-    if (brlapi__enterTtyMode(handle, tty, NULL) == -1) {
-        error_setg(errp, "brlapi__enterTtyMode: %s",
-                   brlapi_strerror(brlapi_error_location()));
-        goto fail;
-    }
-
    qemu_set_fd_handler(baum->brlapi_fd, baum_chr_read, NULL, baum);
-
-    return chr;
-
-fail:
-    timer_free(baum->cellCount_timer);
-    brlapi__closeConnection(handle);
-fail_handle:
-    g_free(handle);
-    g_free(chr);
-    g_free(baum);
-    return NULL;
 }

+static void char_braille_class_init(ObjectClass *oc, void *data)
+{
+    ChardevClass *cc = CHARDEV_CLASS(oc);
+
+    cc->open = baum_chr_open;
+    cc->chr_write = baum_chr_write;
+    cc->chr_accept_input = baum_chr_accept_input;
+}
+
+static const TypeInfo char_braille_type_info = {
+    .name = TYPE_CHARDEV_BRAILLE,
+    .parent = TYPE_CHARDEV,
+    .instance_size = sizeof(BaumChardev),
+    .instance_finalize = char_braille_finalize,
+    .class_init = char_braille_class_init,
+};
+
 static void register_types(void)
 {
-    register_char_driver("braille", CHARDEV_BACKEND_KIND_BRAILLE, NULL,
-                         chr_baum_init);
+    type_register_static(&char_braille_type_info);
 }

 type_init(register_types);
--- a/backends/cryptodev-builtin.c
+++ b/backends/cryptodev-builtin.c
@@ -0,0 +1,400 @@
+/*
+ * QEMU Cryptodev backend for QEMU cipher APIs
+ *
+ * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
+ *
+ * Authors:
+ *    Gonglei <arei.gonglei@huawei.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "sysemu/cryptodev.h"
+#include "hw/boards.h"
+#include "qapi/error.h"
+#include "standard-headers/linux/virtio_crypto.h"
+#include "crypto/cipher.h"
+
+
+/**
+ * @TYPE_CRYPTODEV_BACKEND_BUILTIN:
+ * name of backend that uses QEMU cipher API
+ */
+#define TYPE_CRYPTODEV_BACKEND_BUILTIN "cryptodev-backend-builtin"
+
+#define CRYPTODEV_BACKEND_BUILTIN(obj) \
+    OBJECT_CHECK(CryptoDevBackendBuiltin, \
+                 (obj), TYPE_CRYPTODEV_BACKEND_BUILTIN)
+
+typedef struct CryptoDevBackendBuiltin
+                         CryptoDevBackendBuiltin;
+
+typedef struct CryptoDevBackendBuiltinSession {
+    QCryptoCipher *cipher;
+    uint8_t direction; /* encryption or decryption */
+    uint8_t type; /* cipher? hash? aead? */
+    QTAILQ_ENTRY(CryptoDevBackendBuiltinSession) next;
+} CryptoDevBackendBuiltinSession;
+
+/* Max number of symmetric sessions */
+#define MAX_NUM_SESSIONS 256
+
+#define CRYPTODEV_BUITLIN_MAX_AUTH_KEY_LEN    512
+#define CRYPTODEV_BUITLIN_MAX_CIPHER_KEY_LEN  64
+
+struct CryptoDevBackendBuiltin {
+    CryptoDevBackend parent_obj;
+
+    CryptoDevBackendBuiltinSession *sessions[MAX_NUM_SESSIONS];
+};
+
+static void cryptodev_builtin_init(
+             CryptoDevBackend *backend, Error **errp)
+{
+    /* Only support one queue */
+    int queues = backend->conf.peers.queues;
+    CryptoDevBackendClient *cc;
+
+    if (queues != 1) {
+        error_setg(errp,
+                  "Only support one queue in cryptdov-builtin backend");
+        return;
+    }
+
+    cc = cryptodev_backend_new_client(
+              "cryptodev-builtin", NULL);
+    cc->info_str = g_strdup_printf("cryptodev-builtin0");
+    cc->queue_index = 0;
+    backend->conf.peers.ccs[0] = cc;
+
+    backend->conf.crypto_services =
+                         1u << VIRTIO_CRYPTO_SERVICE_CIPHER |
+                         1u << VIRTIO_CRYPTO_SERVICE_HASH |
+                         1u << VIRTIO_CRYPTO_SERVICE_MAC;
+    backend->conf.cipher_algo_l = 1u << VIRTIO_CRYPTO_CIPHER_AES_CBC;
+    backend->conf.hash_algo = 1u << VIRTIO_CRYPTO_HASH_SHA1;
+    /*
+     * Set the Maximum length of crypto request.
+     * Why this value? Just avoid to overflow when
+     * memory allocation for each crypto request.
+     */
+    backend->conf.max_size = LONG_MAX - sizeof(CryptoDevBackendSymOpInfo);
+    backend->conf.max_cipher_key_len = CRYPTODEV_BUITLIN_MAX_CIPHER_KEY_LEN;
+    backend->conf.max_auth_key_len = CRYPTODEV_BUITLIN_MAX_AUTH_KEY_LEN;
+
+    cryptodev_backend_set_ready(backend, true);
+}
+
+static int
+cryptodev_builtin_get_unused_session_index(
+                 CryptoDevBackendBuiltin *builtin)
+{
+    size_t i;
+
+    for (i = 0; i < MAX_NUM_SESSIONS; i++) {
+        if (builtin->sessions[i] == NULL) {
+            return i;
+        }
+    }
+
+    return -1;
+}
+
+#define AES_KEYSIZE_128 16
+#define AES_KEYSIZE_192 24
+#define AES_KEYSIZE_256 32
+#define AES_KEYSIZE_128_XTS AES_KEYSIZE_256
+#define AES_KEYSIZE_256_XTS 64
+
+static int
+cryptodev_builtin_get_aes_algo(uint32_t key_len, int mode, Error **errp)
+{
+    int algo;
+
+    if (key_len == AES_KEYSIZE_128) {
+        algo = QCRYPTO_CIPHER_ALG_AES_128;
+    } else if (key_len == AES_KEYSIZE_192) {
+        algo = QCRYPTO_CIPHER_ALG_AES_192;
+    } else if (key_len == AES_KEYSIZE_256) { /* equals AES_KEYSIZE_128_XTS */
+        if (mode == QCRYPTO_CIPHER_MODE_XTS) {
+            algo = QCRYPTO_CIPHER_ALG_AES_128;
+        } else {
+            algo = QCRYPTO_CIPHER_ALG_AES_256;
+        }
+    } else if (key_len == AES_KEYSIZE_256_XTS) {
+        if (mode == QCRYPTO_CIPHER_MODE_XTS) {
+            algo = QCRYPTO_CIPHER_ALG_AES_256;
+        } else {
+            goto err;
+        }
+    } else {
+        goto err;
+    }
+
+    return algo;
+
+err:
+   error_setg(errp, "Unsupported key length :%u", key_len);
+   return -1;
+}
+
+static int cryptodev_builtin_create_cipher_session(
+                    CryptoDevBackendBuiltin *builtin,
+                    CryptoDevBackendSymSessionInfo *sess_info,
+                    Error **errp)
+{
+    int algo;
+    int mode;
+    QCryptoCipher *cipher;
+    int index;
+    CryptoDevBackendBuiltinSession *sess;
+
+    if (sess_info->op_type != VIRTIO_CRYPTO_SYM_OP_CIPHER) {
+        error_setg(errp, "Unsupported optype :%u", sess_info->op_type);
+        return -1;
+    }
+
+    index = cryptodev_builtin_get_unused_session_index(builtin);
+    if (index < 0) {
+        error_setg(errp, "Total number of sessions created exceeds %u",
+                  MAX_NUM_SESSIONS);
+        return -1;
+    }
+
+    switch (sess_info->cipher_alg) {
+    case VIRTIO_CRYPTO_CIPHER_AES_ECB:
+        mode = QCRYPTO_CIPHER_MODE_ECB;
+        algo = cryptodev_builtin_get_aes_algo(sess_info->key_len,
+                                                    mode, errp);
+        if (algo < 0)  {
+            return -1;
+        }
+        break;
+    case VIRTIO_CRYPTO_CIPHER_AES_CBC:
+        mode = QCRYPTO_CIPHER_MODE_CBC;
+        algo = cryptodev_builtin_get_aes_algo(sess_info->key_len,
+                                                    mode, errp);
+        if (algo < 0)  {
+            return -1;
+        }
+        break;
+    case VIRTIO_CRYPTO_CIPHER_AES_CTR:
+        mode = QCRYPTO_CIPHER_MODE_CTR;
+        algo = cryptodev_builtin_get_aes_algo(sess_info->key_len,
+                                                    mode, errp);
+        if (algo < 0)  {
+            return -1;
+        }
+        break;
+    case VIRTIO_CRYPTO_CIPHER_AES_XTS:
+        mode = QCRYPTO_CIPHER_MODE_XTS;
+        algo = cryptodev_builtin_get_aes_algo(sess_info->key_len,
+                                                    mode, errp);
+        if (algo < 0)  {
+            return -1;
+        }
+        break;
+    case VIRTIO_CRYPTO_CIPHER_3DES_ECB:
+        mode = QCRYPTO_CIPHER_MODE_ECB;
+        algo = QCRYPTO_CIPHER_ALG_3DES;
+        break;
+    case VIRTIO_CRYPTO_CIPHER_3DES_CBC:
+        mode = QCRYPTO_CIPHER_MODE_CBC;
+        algo = QCRYPTO_CIPHER_ALG_3DES;
+        break;
+    case VIRTIO_CRYPTO_CIPHER_3DES_CTR:
+        mode = QCRYPTO_CIPHER_MODE_CTR;
+        algo = QCRYPTO_CIPHER_ALG_3DES;
+        break;
+    default:
+        error_setg(errp, "Unsupported cipher alg :%u",
+                   sess_info->cipher_alg);
+        return -1;
+    }
+
+    cipher = qcrypto_cipher_new(algo, mode,
+                               sess_info->cipher_key,
+                               sess_info->key_len,
+                               errp);
+    if (!cipher) {
+        return -1;
+    }
+
+    sess = g_new0(CryptoDevBackendBuiltinSession, 1);
+    sess->cipher = cipher;
+    sess->direction = sess_info->direction;
+    sess->type = sess_info->op_type;
+
+    builtin->sessions[index] = sess;
+
+    return index;
+}
+
+static int64_t cryptodev_builtin_sym_create_session(
+           CryptoDevBackend *backend,
+           CryptoDevBackendSymSessionInfo *sess_info,
+           uint32_t queue_index, Error **errp)
+{
+    CryptoDevBackendBuiltin *builtin =
+                      CRYPTODEV_BACKEND_BUILTIN(backend);
+    int64_t session_id = -1;
+    int ret;
+
+    switch (sess_info->op_code) {
+    case VIRTIO_CRYPTO_CIPHER_CREATE_SESSION:
+        ret = cryptodev_builtin_create_cipher_session(
+                           builtin, sess_info, errp);
+        if (ret < 0) {
+            return ret;
+        } else {
+            session_id = ret;
+        }
+        break;
+    case VIRTIO_CRYPTO_HASH_CREATE_SESSION:
+    case VIRTIO_CRYPTO_MAC_CREATE_SESSION:
+    default:
+        error_setg(errp, "Unsupported opcode :%" PRIu32 "",
+                   sess_info->op_code);
+        return -1;
+    }
+
+    return session_id;
+}
+
+static int cryptodev_builtin_sym_close_session(
+           CryptoDevBackend *backend,
+           uint64_t session_id,
+           uint32_t queue_index, Error **errp)
+{
+    CryptoDevBackendBuiltin *builtin =
+                      CRYPTODEV_BACKEND_BUILTIN(backend);
+
+    if (session_id >= MAX_NUM_SESSIONS ||
+              builtin->sessions[session_id] == NULL) {
+        error_setg(errp, "Cannot find a valid session id: %" PRIu64 "",
+                      session_id);
+        return -1;
+    }
+
+    qcrypto_cipher_free(builtin->sessions[session_id]->cipher);
+    g_free(builtin->sessions[session_id]);
+    builtin->sessions[session_id] = NULL;
+    return 0;
+}
+
+static int cryptodev_builtin_sym_operation(
+                 CryptoDevBackend *backend,
+                 CryptoDevBackendSymOpInfo *op_info,
+                 uint32_t queue_index, Error **errp)
+{
+    CryptoDevBackendBuiltin *builtin =
+                      CRYPTODEV_BACKEND_BUILTIN(backend);
+    CryptoDevBackendBuiltinSession *sess;
+    int ret;
+
+    if (op_info->session_id >= MAX_NUM_SESSIONS ||
+              builtin->sessions[op_info->session_id] == NULL) {
+        error_setg(errp, "Cannot find a valid session id: %" PRIu64 "",
+                   op_info->session_id);
+        return -VIRTIO_CRYPTO_INVSESS;
+    }
+
+    if (op_info->op_type == VIRTIO_CRYPTO_SYM_OP_ALGORITHM_CHAINING) {
+        error_setg(errp,
+               "Algorithm chain is unsupported for cryptdoev-builtin");
+        return -VIRTIO_CRYPTO_NOTSUPP;
+    }
+
+    sess = builtin->sessions[op_info->session_id];
+
+    ret = qcrypto_cipher_setiv(sess->cipher, op_info->iv,
+                               op_info->iv_len, errp);
+    if (ret < 0) {
+        return -VIRTIO_CRYPTO_ERR;
+    }
+
+    if (sess->direction == VIRTIO_CRYPTO_OP_ENCRYPT) {
+        ret = qcrypto_cipher_encrypt(sess->cipher, op_info->src,
+                                     op_info->dst, op_info->src_len, errp);
+        if (ret < 0) {
+            return -VIRTIO_CRYPTO_ERR;
+        }
+    } else {
+        ret = qcrypto_cipher_decrypt(sess->cipher, op_info->src,
+                                     op_info->dst, op_info->src_len, errp);
+        if (ret < 0) {
+            return -VIRTIO_CRYPTO_ERR;
+        }
+    }
+    return VIRTIO_CRYPTO_OK;
+}
+
+static void cryptodev_builtin_cleanup(
+             CryptoDevBackend *backend,
+             Error **errp)
+{
+    CryptoDevBackendBuiltin *builtin =
+                      CRYPTODEV_BACKEND_BUILTIN(backend);
+    size_t i;
+    int queues = backend->conf.peers.queues;
+    CryptoDevBackendClient *cc;
+
+    for (i = 0; i < MAX_NUM_SESSIONS; i++) {
+        if (builtin->sessions[i] != NULL) {
+            cryptodev_builtin_sym_close_session(
+                    backend, i, 0, errp);
+        }
+    }
+
+    assert(queues == 1);
+
+    for (i = 0; i < queues; i++) {
+        cc = backend->conf.peers.ccs[i];
+        if (cc) {
+            cryptodev_backend_free_client(cc);
+            backend->conf.peers.ccs[i] = NULL;
+        }
+    }
+
+    cryptodev_backend_set_ready(backend, false);
+}
+
+static void
+cryptodev_builtin_class_init(ObjectClass *oc, void *data)
+{
+    CryptoDevBackendClass *bc = CRYPTODEV_BACKEND_CLASS(oc);
+
+    bc->init = cryptodev_builtin_init;
+    bc->cleanup = cryptodev_builtin_cleanup;
+    bc->create_session = cryptodev_builtin_sym_create_session;
+    bc->close_session = cryptodev_builtin_sym_close_session;
+    bc->do_sym_op = cryptodev_builtin_sym_operation;
+}
+
+static const TypeInfo cryptodev_builtin_info = {
+    .name = TYPE_CRYPTODEV_BACKEND_BUILTIN,
+    .parent = TYPE_CRYPTODEV_BACKEND,
+    .class_init = cryptodev_builtin_class_init,
+    .instance_size = sizeof(CryptoDevBackendBuiltin),
+};
+
+static void
+cryptodev_builtin_register_types(void)
+{
+    type_register_static(&cryptodev_builtin_info);
+}
+
+type_init(cryptodev_builtin_register_types);
--- a/backends/cryptodev.c
+++ b/backends/cryptodev.c
@@ -0,0 +1,271 @@
+/*
+ * QEMU Crypto Device Implementation
+ *
+ * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
+ *
+ * Authors:
+ *    Gonglei <arei.gonglei@huawei.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "sysemu/cryptodev.h"
+#include "hw/boards.h"
+#include "qapi/error.h"
+#include "qapi/visitor.h"
+#include "qapi-types.h"
+#include "qapi-visit.h"
+#include "qemu/config-file.h"
+#include "qom/object_interfaces.h"
+#include "hw/virtio/virtio-crypto.h"
+
+
+static QTAILQ_HEAD(, CryptoDevBackendClient) crypto_clients;
+
+
+CryptoDevBackendClient *
+cryptodev_backend_new_client(const char *model,
+                                    const char *name)
+{
+    CryptoDevBackendClient *cc;
+
+    cc = g_malloc0(sizeof(CryptoDevBackendClient));
+    cc->model = g_strdup(model);
+    if (name) {
+        cc->name = g_strdup(name);
+    }
+
+    QTAILQ_INSERT_TAIL(&crypto_clients, cc, next);
+
+    return cc;
+}
+
+void cryptodev_backend_free_client(
+                  CryptoDevBackendClient *cc)
+{
+    QTAILQ_REMOVE(&crypto_clients, cc, next);
+    g_free(cc->name);
+    g_free(cc->model);
+    g_free(cc->info_str);
+    g_free(cc);
+}
+
+void cryptodev_backend_cleanup(
+             CryptoDevBackend *backend,
+             Error **errp)
+{
+    CryptoDevBackendClass *bc =
+                  CRYPTODEV_BACKEND_GET_CLASS(backend);
+
+    if (bc->cleanup) {
+        bc->cleanup(backend, errp);
+    }
+}
+
+int64_t cryptodev_backend_sym_create_session(
+           CryptoDevBackend *backend,
+           CryptoDevBackendSymSessionInfo *sess_info,
+           uint32_t queue_index, Error **errp)
+{
+    CryptoDevBackendClass *bc =
+                      CRYPTODEV_BACKEND_GET_CLASS(backend);
+
+    if (bc->create_session) {
+        return bc->create_session(backend, sess_info, queue_index, errp);
+    }
+
+    return -1;
+}
+
+int cryptodev_backend_sym_close_session(
+           CryptoDevBackend *backend,
+           uint64_t session_id,
+           uint32_t queue_index, Error **errp)
+{
+    CryptoDevBackendClass *bc =
+                      CRYPTODEV_BACKEND_GET_CLASS(backend);
+
+    if (bc->close_session) {
+        return bc->close_session(backend, session_id, queue_index, errp);
+    }
+
+    return -1;
+}
+
+static int cryptodev_backend_sym_operation(
+                 CryptoDevBackend *backend,
+                 CryptoDevBackendSymOpInfo *op_info,
+                 uint32_t queue_index, Error **errp)
+{
+    CryptoDevBackendClass *bc =
+                      CRYPTODEV_BACKEND_GET_CLASS(backend);
+
+    if (bc->do_sym_op) {
+        return bc->do_sym_op(backend, op_info, queue_index, errp);
+    }
+
+    return -VIRTIO_CRYPTO_ERR;
+}
+
+int cryptodev_backend_crypto_operation(
+                 CryptoDevBackend *backend,
+                 void *opaque,
+                 uint32_t queue_index, Error **errp)
+{
+    VirtIOCryptoReq *req = opaque;
+
+    if (req->flags == CRYPTODEV_BACKEND_ALG_SYM) {
+        CryptoDevBackendSymOpInfo *op_info;
+        op_info = req->u.sym_op_info;
+
+        return cryptodev_backend_sym_operation(backend,
+                         op_info, queue_index, errp);
+    } else {
+        error_setg(errp, "Unsupported cryptodev alg type: %" PRIu32 "",
+                   req->flags);
+       return -VIRTIO_CRYPTO_NOTSUPP;
+    }
+
+    return -VIRTIO_CRYPTO_ERR;
+}
+
+static void
+cryptodev_backend_get_queues(Object *obj, Visitor *v, const char *name,
+                             void *opaque, Error **errp)
+{
+    CryptoDevBackend *backend = CRYPTODEV_BACKEND(obj);
+    uint32_t value = backend->conf.peers.queues;
+
+    visit_type_uint32(v, name, &value, errp);
+}
+
+static void
+cryptodev_backend_set_queues(Object *obj, Visitor *v, const char *name,
+                             void *opaque, Error **errp)
+{
+    CryptoDevBackend *backend = CRYPTODEV_BACKEND(obj);
+    Error *local_err = NULL;
+    uint32_t value;
+
+    visit_type_uint32(v, name, &value, &local_err);
+    if (local_err) {
+        goto out;
+    }
+    if (!value) {
+        error_setg(&local_err, "Property '%s.%s' doesn't take value '%"
+                   PRIu32 "'", object_get_typename(obj), name, value);
+        goto out;
+    }
+    backend->conf.peers.queues = value;
+out:
+    error_propagate(errp, local_err);
+}
+
+static void
+cryptodev_backend_complete(UserCreatable *uc, Error **errp)
+{
+    CryptoDevBackend *backend = CRYPTODEV_BACKEND(uc);
+    CryptoDevBackendClass *bc = CRYPTODEV_BACKEND_GET_CLASS(uc);
+    Error *local_err = NULL;
+
+    if (bc->init) {
+        bc->init(backend, &local_err);
+        if (local_err) {
+            goto out;
+        }
+    }
+
+    return;
+
+out:
+    error_propagate(errp, local_err);
+}
+
+void cryptodev_backend_set_used(CryptoDevBackend *backend, bool used)
+{
+    backend->is_used = used;
+}
+
+bool cryptodev_backend_is_used(CryptoDevBackend *backend)
+{
+    return backend->is_used;
+}
+
+void cryptodev_backend_set_ready(CryptoDevBackend *backend, bool ready)
+{
+    backend->ready = ready;
+}
+
+bool cryptodev_backend_is_ready(CryptoDevBackend *backend)
+{
+    return backend->ready;
+}
+
+static bool
+cryptodev_backend_can_be_deleted(UserCreatable *uc, Error **errp)
+{
+    return !cryptodev_backend_is_used(CRYPTODEV_BACKEND(uc));
+}
+
+static void cryptodev_backend_instance_init(Object *obj)
+{
+    object_property_add(obj, "queues", "int",
+                          cryptodev_backend_get_queues,
+                          cryptodev_backend_set_queues,
+                          NULL, NULL, NULL);
+    /* Initialize devices' queues property to 1 */
+    object_property_set_int(obj, 1, "queues", NULL);
+}
+
+static void cryptodev_backend_finalize(Object *obj)
+{
+    CryptoDevBackend *backend = CRYPTODEV_BACKEND(obj);
+
+    cryptodev_backend_cleanup(backend, NULL);
+}
+
+static void
+cryptodev_backend_class_init(ObjectClass *oc, void *data)
+{
+    UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc);
+
+    ucc->complete = cryptodev_backend_complete;
+    ucc->can_be_deleted = cryptodev_backend_can_be_deleted;
+
+    QTAILQ_INIT(&crypto_clients);
+}
+
+static const TypeInfo cryptodev_backend_info = {
+    .name = TYPE_CRYPTODEV_BACKEND,
+    .parent = TYPE_OBJECT,
+    .instance_size = sizeof(CryptoDevBackend),
+    .instance_init = cryptodev_backend_instance_init,
+    .instance_finalize = cryptodev_backend_finalize,
+    .class_size = sizeof(CryptoDevBackendClass),
+    .class_init = cryptodev_backend_class_init,
+    .interfaces = (InterfaceInfo[]) {
+        { TYPE_USER_CREATABLE },
+        { }
+    }
+};
+
+static void
+cryptodev_backend_register_types(void)
+{
+    type_register_static(&cryptodev_backend_info);
+}
+
+type_init(cryptodev_backend_register_types);
--- a/backends/hostmem.c
+++ b/backends/hostmem.c
@@ -348,6 +348,24 @@ host_memory_backend_can_be_deleted(UserCreatable *uc, Error **errp)
    }
 }

+static char *get_id(Object *o, Error **errp)
+{
+    HostMemoryBackend *backend = MEMORY_BACKEND(o);
+
+    return g_strdup(backend->id);
+}
+
+static void set_id(Object *o, const char *str, Error **errp)
+{
+    HostMemoryBackend *backend = MEMORY_BACKEND(o);
+
+    if (backend->id) {
+        error_setg(errp, "cannot change property value");
+        return;
+    }
+    backend->id = g_strdup(str);
+}
+
 static void
 host_memory_backend_class_init(ObjectClass *oc, void *data)
 {
@@ -377,6 +395,13 @@ host_memory_backend_class_init(ObjectClass *oc, void *data)
        HostMemPolicy_lookup,
        host_memory_backend_get_policy,
        host_memory_backend_set_policy, &error_abort);
+    object_class_property_add_str(oc, "id", get_id, set_id, &error_abort);
+}
+
+static void host_memory_backend_finalize(Object *o)
+{
+    HostMemoryBackend *backend = MEMORY_BACKEND(o);
+    g_free(backend->id);
 }

 static const TypeInfo host_memory_backend_info = {
@@ -387,6 +412,7 @@ static const TypeInfo host_memory_backend_info = {
    .class_init = host_memory_backend_class_init,
    .instance_size = sizeof(HostMemoryBackend),
    .instance_init = host_memory_backend_init,
+    .instance_finalize = host_memory_backend_finalize,
    .interfaces = (InterfaceInfo[]) {
        { TYPE_USER_CREATABLE },
        { }
--- a/backends/msmouse.c
+++ b/backends/msmouse.c
@@ -31,18 +31,23 @@
 #define MSMOUSE_HI2(n) (((n) & 0xc0) >> 6)

 typedef struct {
-    CharDriverState *chr;
+    Chardev parent;
+
    QemuInputHandlerState *hs;
    int axis[INPUT_AXIS__MAX];
    bool btns[INPUT_BUTTON__MAX];
    bool btnc[INPUT_BUTTON__MAX];
    uint8_t outbuf[32];
    int outlen;
-} MouseState;
+} MouseChardev;

-static void msmouse_chr_accept_input(CharDriverState *chr)
+#define TYPE_CHARDEV_MSMOUSE "chardev-msmouse"
+#define MOUSE_CHARDEV(obj)                                      \
+    OBJECT_CHECK(MouseChardev, (obj), TYPE_CHARDEV_MSMOUSE)
+
+static void msmouse_chr_accept_input(Chardev *chr)
 {
-    MouseState *mouse = chr->opaque;
+    MouseChardev *mouse = MOUSE_CHARDEV(chr);
    int len;

    len = qemu_chr_be_can_write(chr);
@@ -60,7 +65,7 @@ static void msmouse_chr_accept_input(CharDriverState *chr)
    }
 }

-static void msmouse_queue_event(MouseState *mouse)
+static void msmouse_queue_event(MouseChardev *mouse)
 {
    unsigned char bytes[4] = { 0x40, 0x00, 0x00, 0x00 };
    int dx, dy, count = 3;
@@ -97,7 +102,7 @@ static void msmouse_queue_event(MouseState *mouse)
 static void msmouse_input_event(DeviceState *dev, QemuConsole *src,
                                InputEvent *evt)
 {
-    MouseState *mouse = (MouseState *)dev;
+    MouseChardev *mouse = MOUSE_CHARDEV(dev);
    InputMoveEvent *move;
    InputBtnEvent *btn;

@@ -121,24 +126,24 @@ static void msmouse_input_event(DeviceState *dev, QemuConsole *src,

 static void msmouse_input_sync(DeviceState *dev)
 {
-    MouseState *mouse = (MouseState *)dev;
+    MouseChardev *mouse = MOUSE_CHARDEV(dev);
+    Chardev *chr = CHARDEV(dev);

    msmouse_queue_event(mouse);
-    msmouse_chr_accept_input(mouse->chr);
+    msmouse_chr_accept_input(chr);
 }

-static int msmouse_chr_write (struct CharDriverState *s, const uint8_t *buf, int len)
+static int msmouse_chr_write(struct Chardev *s, const uint8_t *buf, int len)
 {
    /* Ignore writes to mouse port */
    return len;
 }

-static void msmouse_chr_free(struct CharDriverState *chr)
+static void char_msmouse_finalize(Object *obj)
 {
-    MouseState *mouse = chr->opaque;
+    MouseChardev *mouse = MOUSE_CHARDEV(obj);

    qemu_input_handler_unregister(mouse->hs);
-    g_free(mouse);
 }

 static QemuInputHandler msmouse_handler = {
@@ -148,39 +153,38 @@ static QemuInputHandler msmouse_handler = {
    .sync  = msmouse_input_sync,
 };

-static CharDriverState *qemu_chr_open_msmouse(const char *id,
-                                              ChardevBackend *backend,
-                                              ChardevReturn *ret,
-                                              bool *be_opened,
-                                              Error **errp)
+static void msmouse_chr_open(Chardev *chr,
+                             ChardevBackend *backend,
+                             bool *be_opened,
+                             Error **errp)
 {
-    ChardevCommon *common = backend->u.msmouse.data;
-    MouseState *mouse;
-    CharDriverState *chr;
+    MouseChardev *mouse = MOUSE_CHARDEV(chr);

-    chr = qemu_chr_alloc(common, errp);
-    if (!chr) {
-        return NULL;
-    }
-    chr->chr_write = msmouse_chr_write;
-    chr->chr_free = msmouse_chr_free;
-    chr->chr_accept_input = msmouse_chr_accept_input;
    *be_opened = false;
-
-    mouse = g_new0(MouseState, 1);
    mouse->hs = qemu_input_handler_register((DeviceState *)mouse,
                                            &msmouse_handler);
-
-    mouse->chr = chr;
-    chr->opaque = mouse;
-
-    return chr;
 }

+static void char_msmouse_class_init(ObjectClass *oc, void *data)
+{
+    ChardevClass *cc = CHARDEV_CLASS(oc);
+
+    cc->open = msmouse_chr_open;
+    cc->chr_write = msmouse_chr_write;
+    cc->chr_accept_input = msmouse_chr_accept_input;
+}
+
+static const TypeInfo char_msmouse_type_info = {
+    .name = TYPE_CHARDEV_MSMOUSE,
+    .parent = TYPE_CHARDEV,
+    .instance_size = sizeof(MouseChardev),
+    .instance_finalize = char_msmouse_finalize,
+    .class_init = char_msmouse_class_init,
+};
+
 static void register_types(void)
 {
-    register_char_driver("msmouse", CHARDEV_BACKEND_KIND_MSMOUSE, NULL,
-                         qemu_chr_open_msmouse);
+    type_register_static(&char_msmouse_type_info);
 }

 type_init(register_types);
--- a/backends/rng-egd.c
+++ b/backends/rng-egd.c
@@ -86,7 +86,7 @@ static void rng_egd_chr_read(void *opaque, const uint8_t *buf, int size)
 static void rng_egd_opened(RngBackend *b, Error **errp)
 {
    RngEgd *s = RNG_EGD(b);
-    CharDriverState *chr;
+    Chardev *chr;

    if (s->chr_name == NULL) {
        error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
@@ -125,7 +125,7 @@ static void rng_egd_set_chardev(Object *obj, const char *value, Error **errp)
 static char *rng_egd_get_chardev(Object *obj, Error **errp)
 {
    RngEgd *s = RNG_EGD(obj);
-    CharDriverState *chr = qemu_chr_fe_get_driver(&s->chr);
+    Chardev *chr = qemu_chr_fe_get_driver(&s->chr);

    if (chr && chr->label) {
        return g_strdup(chr->label);
--- a/backends/testdev.c
+++ b/backends/testdev.c
@@ -30,13 +30,18 @@
 #define BUF_SIZE 32

 typedef struct {
-    CharDriverState *chr;
+    Chardev parent;
+
    uint8_t in_buf[32];
    int in_buf_used;
-} TestdevCharState;
+} TestdevChardev;
+
+#define TYPE_CHARDEV_TESTDEV "chardev-testdev"
+#define TESTDEV_CHARDEV(obj)                                    \
+    OBJECT_CHECK(TestdevChardev, (obj), TYPE_CHARDEV_TESTDEV)

 /* Try to interpret a whole incoming packet */
-static int testdev_eat_packet(TestdevCharState *testdev)
+static int testdev_eat_packet(TestdevChardev *testdev)
 {
    const uint8_t *cur = testdev->in_buf;
    int len = testdev->in_buf_used;
@@ -77,9 +82,9 @@ static int testdev_eat_packet(TestdevCharState *testdev)
 }

 /* The other end is writing some data.  Store it and try to interpret */
-static int testdev_write(CharDriverState *chr, const uint8_t *buf, int len)
+static int testdev_chr_write(Chardev *chr, const uint8_t *buf, int len)
 {
-    TestdevCharState *testdev = chr->opaque;
+    TestdevChardev *testdev = TESTDEV_CHARDEV(chr);
    int tocopy, eaten, orig_len = len;

    while (len) {
@@ -102,36 +107,23 @@ static int testdev_write(CharDriverState *chr, const uint8_t *buf, int len)
    return orig_len;
 }

-static void testdev_free(struct CharDriverState *chr)
+static void char_testdev_class_init(ObjectClass *oc, void *data)
 {
-    TestdevCharState *testdev = chr->opaque;
+    ChardevClass *cc = CHARDEV_CLASS(oc);

-    g_free(testdev);
+    cc->chr_write = testdev_chr_write;
 }

-static CharDriverState *chr_testdev_init(const char *id,
-                                         ChardevBackend *backend,
-                                         ChardevReturn *ret,
-                                         bool *be_opened,
-                                         Error **errp)
-{
-    TestdevCharState *testdev;
-    CharDriverState *chr;
-
-    testdev = g_new0(TestdevCharState, 1);
-    testdev->chr = chr = g_new0(CharDriverState, 1);
-
-    chr->opaque = testdev;
-    chr->chr_write = testdev_write;
-    chr->chr_free = testdev_free;
-
-    return chr;
-}
+static const TypeInfo char_testdev_type_info = {
+    .name = TYPE_CHARDEV_TESTDEV,
+    .parent = TYPE_CHARDEV,
+    .instance_size = sizeof(TestdevChardev),
+    .class_init = char_testdev_class_init,
+};

 static void register_types(void)
 {
-    register_char_driver("testdev", CHARDEV_BACKEND_KIND_TESTDEV, NULL,
-                         chr_testdev_init);
+    type_register_static(&char_testdev_type_info);
 }

 type_init(register_types);
--- a/balloon.c
+++ b/balloon.c
@@ -29,7 +29,7 @@
 #include "exec/cpu-common.h"
 #include "sysemu/kvm.h"
 #include "sysemu/balloon.h"
-#include "trace.h"
+#include "trace-root.h"
 #include "qmp-commands.h"
 #include "qapi/qmp/qerror.h"
 #include "qapi/qmp/qjson.h"
--- a/block.c
+++ b/block.c
@@ -22,7 +22,7 @@
 * THE SOFTWARE.
 */
 #include "qemu/osdep.h"
-#include "trace.h"
+#include "block/trace.h"
 #include "block/block_int.h"
 #include "block/blockjob.h"
 #include "block/nbd.h"
@@ -1428,9 +1428,11 @@ void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd)
            backing_hd->drv ? backing_hd->drv->format_name : "");

    bdrv_op_block_all(backing_hd, bs->backing_blocker);
-    /* Otherwise we won't be able to commit due to check in bdrv_commit */
+    /* Otherwise we won't be able to commit or stream */
    bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET,
                    bs->backing_blocker);
+    bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_STREAM,
+                    bs->backing_blocker);
    /*
     * We do backup in 3 ways:
     * 1. drive backup
@@ -1849,7 +1851,7 @@ static BlockDriverState *bdrv_open_inherit(const char *filename,
    bdrv_refresh_filename(bs);

    /* Check if any unknown options were used */
-    if (options && (qdict_size(options) != 0)) {
+    if (qdict_size(options) != 0) {
        const QDictEntry *entry = qdict_first(options);
        if (flags & BDRV_O_PROTOCOL) {
            error_setg(errp, "Block protocol '%s' doesn't support the option "
@@ -2082,7 +2084,7 @@ BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
 * to all devices.
 *
 */
-int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
+int bdrv_reopen_multiple(AioContext *ctx, BlockReopenQueue *bs_queue, Error **errp)
 {
    int ret = -1;
    BlockReopenQueueEntry *bs_entry, *next;
@@ -2090,7 +2092,9 @@ int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)

    assert(bs_queue != NULL);

-    bdrv_drain_all();
+    aio_context_release(ctx);
+    bdrv_drain_all_begin();
+    aio_context_acquire(ctx);

    QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
        if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) {
@@ -2120,6 +2124,9 @@ cleanup:
        g_free(bs_entry);
    }
    g_free(bs_queue);
+
+    bdrv_drain_all_end();
+
    return ret;
 }

@@ -2131,7 +2138,7 @@ int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp)
    Error *local_err = NULL;
    BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, NULL, bdrv_flags);

-    ret = bdrv_reopen_multiple(queue, &local_err);
+    ret = bdrv_reopen_multiple(bdrv_get_aio_context(bs), queue, &local_err);
    if (local_err != NULL) {
        error_propagate(errp, local_err);
    }
@@ -2789,7 +2796,7 @@ const char *bdrv_get_format_name(BlockDriverState *bs)

 static int qsort_strcmp(const void *a, const void *b)
 {
-    return strcmp(a, b);
+    return strcmp(*(char *const *)a, *(char *const *)b);
 }

 void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
@@ -2815,6 +2822,24 @@ void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
        }
    }

+    for (i = 0; i < (int)ARRAY_SIZE(block_driver_modules); i++) {
+        const char *format_name = block_driver_modules[i].format_name;
+
+        if (format_name) {
+            bool found = false;
+            int j = count;
+
+            while (formats && j && !found) {
+                found = !strcmp(formats[--j], format_name);
+            }
+
+            if (!found) {
+                formats = g_renew(const char *, formats, count + 1);
+                formats[count++] = format_name;
+            }
+        }
+    }
+
    qsort(formats, count, sizeof(formats[0]), qsort_strcmp);

    for (i = 0; i < count; i++) {
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -1,4 +1,4 @@
-block-obj-y += raw_bsd.o qcow.o vdi.o vmdk.o cloop.o bochs.o vpc.o vvfat.o dmg.o
+block-obj-y += raw-format.o qcow.o vdi.o vmdk.o cloop.o bochs.o vpc.o vvfat.o dmg.o
 block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-cache.o
 block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o
 block-obj-y += qed-check.o
@@ -6,14 +6,15 @@ block-obj-y += vhdx.o vhdx-endian.o vhdx-log.o
 block-obj-y += quorum.o
 block-obj-y += parallels.o blkdebug.o blkverify.o blkreplay.o
 block-obj-y += block-backend.o snapshot.o qapi.o
-block-obj-$(CONFIG_WIN32) += raw-win32.o win32-aio.o
-block-obj-$(CONFIG_POSIX) += raw-posix.o
+block-obj-$(CONFIG_WIN32) += file-win32.o win32-aio.o
+block-obj-$(CONFIG_POSIX) += file-posix.o
 block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
 block-obj-y += null.o mirror.o commit.o io.o
 block-obj-y += throttle-groups.o

 block-obj-y += nbd.o nbd-client.o sheepdog.o
 block-obj-$(CONFIG_LIBISCSI) += iscsi.o
+block-obj-$(if $(CONFIG_LIBISCSI),y,n) += iscsi-opts.o
 block-obj-$(CONFIG_LIBNFS) += nfs.o
 block-obj-$(CONFIG_CURL) += curl.o
 block-obj-$(CONFIG_RBD) += rbd.o
--- a/block/backup.c
+++ b/block/backup.c
@@ -16,7 +16,7 @@
 #include "trace.h"
 #include "block/block.h"
 #include "block/block_int.h"
-#include "block/blockjob.h"
+#include "block/blockjob_int.h"
 #include "block/block_backup.h"
 #include "qapi/error.h"
 #include "qapi/qmp/qerror.h"
@@ -242,6 +242,14 @@ static void backup_abort(BlockJob *job)
    }
 }

+static void backup_clean(BlockJob *job)
+{
+    BackupBlockJob *s = container_of(job, BackupBlockJob, common);
+    assert(s->target);
+    blk_unref(s->target);
+    s->target = NULL;
+}
+
 static void backup_attached_aio_context(BlockJob *job, AioContext *aio_context)
 {
    BackupBlockJob *s = container_of(job, BackupBlockJob, common);
@@ -300,14 +308,20 @@ void backup_cow_request_end(CowRequest *req)
    cow_request_end(req);
 }

-static const BlockJobDriver backup_job_driver = {
-    .instance_size          = sizeof(BackupBlockJob),
-    .job_type               = BLOCK_JOB_TYPE_BACKUP,
-    .set_speed              = backup_set_speed,
-    .commit                 = backup_commit,
-    .abort                  = backup_abort,
-    .attached_aio_context   = backup_attached_aio_context,
-};
+static void backup_drain(BlockJob *job)
+{
+    BackupBlockJob *s = container_of(job, BackupBlockJob, common);
+
+    /* Need to keep a reference in case blk_drain triggers execution
+     * of backup_complete...
+     */
+    if (s->target) {
+        BlockBackend *target = s->target;
+        blk_ref(target);
+        blk_drain(target);
+        blk_unref(target);
+    }
+}

 static BlockErrorAction backup_error_action(BackupBlockJob *job,
                                            bool read, int error)
@@ -327,11 +341,8 @@ typedef struct {

 static void backup_complete(BlockJob *job, void *opaque)
 {
-    BackupBlockJob *s = container_of(job, BackupBlockJob, common);
    BackupCompleteData *data = opaque;

-    blk_unref(s->target);
-
    block_job_completed(job, data->ret);
    g_free(data);
 }
@@ -429,7 +440,6 @@ static void coroutine_fn backup_run(void *opaque)
    BackupBlockJob *job = opaque;
    BackupCompleteData *data;
    BlockDriverState *bs = blk_bs(job->common.blk);
-    BlockBackend *target = job->target;
    int64_t start, end;
    int64_t sectors_per_cluster = cluster_size_sectors(job);
    int ret = 0;
@@ -516,19 +526,30 @@ static void coroutine_fn backup_run(void *opaque)
    qemu_co_rwlock_unlock(&job->flush_rwlock);
    g_free(job->done_bitmap);

-    bdrv_op_unblock_all(blk_bs(target), job->common.blocker);
-
    data = g_malloc(sizeof(*data));
    data->ret = ret;
    block_job_defer_to_main_loop(&job->common, backup_complete, data);
 }

-void backup_start(const char *job_id, BlockDriverState *bs,
+static const BlockJobDriver backup_job_driver = {
+    .instance_size          = sizeof(BackupBlockJob),
+    .job_type               = BLOCK_JOB_TYPE_BACKUP,
+    .start                  = backup_run,
+    .set_speed              = backup_set_speed,
+    .commit                 = backup_commit,
+    .abort                  = backup_abort,
+    .clean                  = backup_clean,
+    .attached_aio_context   = backup_attached_aio_context,
+    .drain                  = backup_drain,
+};
+
+BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
                  BlockDriverState *target, int64_t speed,
                  MirrorSyncMode sync_mode, BdrvDirtyBitmap *sync_bitmap,
                  bool compress,
                  BlockdevOnError on_source_error,
                  BlockdevOnError on_target_error,
+                  int creation_flags,
                  BlockCompletionFunc *cb, void *opaque,
                  BlockJobTxn *txn, Error **errp)
 {
@@ -542,52 +563,52 @@ void backup_start(const char *job_id, BlockDriverState *bs,

    if (bs == target) {
        error_setg(errp, "Source and target cannot be the same");
-        return;
+        return NULL;
    }

    if (!bdrv_is_inserted(bs)) {
        error_setg(errp, "Device is not inserted: %s",
                   bdrv_get_device_name(bs));
-        return;
+        return NULL;
    }

    if (!bdrv_is_inserted(target)) {
        error_setg(errp, "Device is not inserted: %s",
                   bdrv_get_device_name(target));
-        return;
+        return NULL;
    }

    if (compress && target->drv->bdrv_co_pwritev_compressed == NULL) {
        error_setg(errp, "Compression is not supported for this drive %s",
                   bdrv_get_device_name(target));
-        return;
+        return NULL;
    }

    if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_BACKUP_SOURCE, errp)) {
-        return;
+        return NULL;
    }

    if (bdrv_op_is_blocked(target, BLOCK_OP_TYPE_BACKUP_TARGET, errp)) {
-        return;
+        return NULL;
    }

    if (sync_mode == MIRROR_SYNC_MODE_INCREMENTAL) {
        if (!sync_bitmap) {
            error_setg(errp, "must provide a valid bitmap name for "
                             "\"incremental\" sync mode");
-            return;
+            return NULL;
        }

        /* Create a new bitmap, and freeze/disable this one. */
        if (bdrv_dirty_bitmap_create_successor(bs, sync_bitmap, errp) < 0) {
-            return;
+            return NULL;
        }
    } else if (sync_bitmap) {
        error_setg(errp,
                   "a sync_bitmap was provided to backup_run, "
                   "but received an incompatible sync_mode (%s)",
                   MirrorSyncMode_lookup[sync_mode]);
-        return;
+        return NULL;
    }

    len = bdrv_getlength(bs);
@@ -598,7 +619,7 @@ void backup_start(const char *job_id, BlockDriverState *bs,
    }

    job = block_job_create(job_id, &backup_job_driver, bs, speed,
-                           cb, opaque, errp);
+                           creation_flags, cb, opaque, errp);
    if (!job) {
        goto error;
    }
@@ -631,19 +652,20 @@ void backup_start(const char *job_id, BlockDriverState *bs,
        job->cluster_size = MAX(BACKUP_CLUSTER_SIZE_DEFAULT, bdi.cluster_size);
    }

-    bdrv_op_block_all(target, job->common.blocker);
+    block_job_add_bdrv(&job->common, target);
    job->common.len = len;
-    job->common.co = qemu_coroutine_create(backup_run, job);
    block_job_txn_add_job(txn, &job->common);
-    qemu_coroutine_enter(job->common.co);
-    return;
+
+    return &job->common;

 error:
    if (sync_bitmap) {
        bdrv_reclaim_dirty_bitmap(bs, sync_bitmap, NULL);
    }
    if (job) {
-        blk_unref(job->target);
+        backup_clean(&job->common);
        block_job_unref(&job->common);
    }
+
+    return NULL;
 }
--- a/block/blkdebug.c
+++ b/block/blkdebug.c
@@ -58,10 +58,6 @@ typedef struct BlkdebugSuspendedReq {
    QLIST_ENTRY(BlkdebugSuspendedReq) next;
 } BlkdebugSuspendedReq;

-static const AIOCBInfo blkdebug_aiocb_info = {
-    .aiocb_size    = sizeof(BlkdebugAIOCB),
-};
-
 enum {
    ACTION_INJECT_ERROR,
    ACTION_SET_STATE,
@@ -77,7 +73,7 @@ typedef struct BlkdebugRule {
            int error;
            int immediately;
            int once;
-            int64_t sector;
+            int64_t offset;
        } inject;
        struct {
            int new_state;
@@ -174,6 +170,7 @@ static int add_rule(void *opaque, QemuOpts *opts, Error **errp)
    const char* event_name;
    BlkdebugEvent event;
    struct BlkdebugRule *rule;
+    int64_t sector;

    /* Find the right event for the rule */
    event_name = qemu_opt_get(opts, "event");
@@ -200,7 +197,9 @@ static int add_rule(void *opaque, QemuOpts *opts, Error **errp)
        rule->options.inject.once  = qemu_opt_get_bool(opts, "once", 0);
        rule->options.inject.immediately =
            qemu_opt_get_bool(opts, "immediately", 0);
-        rule->options.inject.sector = qemu_opt_get_number(opts, "sector", -1);
+        sector = qemu_opt_get_number(opts, "sector", -1);
+        rule->options.inject.offset =
+            sector == -1 ? -1 : sector * BDRV_SECTOR_SIZE;
        break;

    case ACTION_SET_STATE:
@@ -408,17 +407,14 @@ out:

 static void error_callback_bh(void *opaque)
 {
-    struct BlkdebugAIOCB *acb = opaque;
-    acb->common.cb(acb->common.opaque, acb->ret);
-    qemu_aio_unref(acb);
+    Coroutine *co = opaque;
+    qemu_coroutine_enter(co);
 }

-static BlockAIOCB *inject_error(BlockDriverState *bs,
-    BlockCompletionFunc *cb, void *opaque, BlkdebugRule *rule)
+static int inject_error(BlockDriverState *bs, BlkdebugRule *rule)
 {
    BDRVBlkdebugState *s = bs->opaque;
    int error = rule->options.inject.error;
-    struct BlkdebugAIOCB *acb;
    bool immediately = rule->options.inject.immediately;

    if (rule->options.inject.once) {
@@ -426,81 +422,79 @@ static BlockAIOCB *inject_error(BlockDriverState *bs,
        remove_rule(rule);
    }

-    if (immediately) {
-        return NULL;
+    if (!immediately) {
+        aio_bh_schedule_oneshot(bdrv_get_aio_context(bs), error_callback_bh,
+                                qemu_coroutine_self());
+        qemu_coroutine_yield();
    }

-    acb = qemu_aio_get(&blkdebug_aiocb_info, bs, cb, opaque);
-    acb->ret = -error;
-
-    aio_bh_schedule_oneshot(bdrv_get_aio_context(bs), error_callback_bh, acb);
-
-    return &acb->common;
+    return -error;
 }

-static BlockAIOCB *blkdebug_aio_readv(BlockDriverState *bs,
-    int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-    BlockCompletionFunc *cb, void *opaque)
+static int coroutine_fn
+blkdebug_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
+                   QEMUIOVector *qiov, int flags)
 {
    BDRVBlkdebugState *s = bs->opaque;
    BlkdebugRule *rule = NULL;

    QSIMPLEQ_FOREACH(rule, &s->active_rules, active_next) {
-        if (rule->options.inject.sector == -1 ||
-            (rule->options.inject.sector >= sector_num &&
-             rule->options.inject.sector < sector_num + nb_sectors)) {
+        uint64_t inject_offset = rule->options.inject.offset;
+
+        if (inject_offset == -1 ||
+            (inject_offset >= offset && inject_offset < offset + bytes))
+        {
            break;
        }
    }

    if (rule && rule->options.inject.error) {
-        return inject_error(bs, cb, opaque, rule);
+        return inject_error(bs, rule);
    }

-    return bdrv_aio_readv(bs->file, sector_num, qiov, nb_sectors,
-                          cb, opaque);
+    return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags);
 }

-static BlockAIOCB *blkdebug_aio_writev(BlockDriverState *bs,
-    int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-    BlockCompletionFunc *cb, void *opaque)
+static int coroutine_fn
+blkdebug_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
+                    QEMUIOVector *qiov, int flags)
 {
    BDRVBlkdebugState *s = bs->opaque;
    BlkdebugRule *rule = NULL;

    QSIMPLEQ_FOREACH(rule, &s->active_rules, active_next) {
-        if (rule->options.inject.sector == -1 ||
-            (rule->options.inject.sector >= sector_num &&
-             rule->options.inject.sector < sector_num + nb_sectors)) {
+        uint64_t inject_offset = rule->options.inject.offset;
+
+        if (inject_offset == -1 ||
+            (inject_offset >= offset && inject_offset < offset + bytes))
+        {
            break;
        }
    }

    if (rule && rule->options.inject.error) {
-        return inject_error(bs, cb, opaque, rule);
+        return inject_error(bs, rule);
    }

-    return bdrv_aio_writev(bs->file, sector_num, qiov, nb_sectors,
-                           cb, opaque);
+    return bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
 }

-static BlockAIOCB *blkdebug_aio_flush(BlockDriverState *bs,
-    BlockCompletionFunc *cb, void *opaque)
+static int blkdebug_co_flush(BlockDriverState *bs)
 {
    BDRVBlkdebugState *s = bs->opaque;
    BlkdebugRule *rule = NULL;

    QSIMPLEQ_FOREACH(rule, &s->active_rules, active_next) {
-        if (rule->options.inject.sector == -1) {
+        if (rule->options.inject.offset == -1) {
            break;
        }
    }

    if (rule && rule->options.inject.error) {
-        return inject_error(bs, cb, opaque, rule);
+        return inject_error(bs, rule);
    }

-    return bdrv_aio_flush(bs->file->bs, cb, opaque);
+    return bdrv_co_flush(bs->file->bs);
 }


@@ -752,9 +746,9 @@ static BlockDriver bdrv_blkdebug = {
    .bdrv_refresh_filename  = blkdebug_refresh_filename,
    .bdrv_refresh_limits    = blkdebug_refresh_limits,

-    .bdrv_aio_readv         = blkdebug_aio_readv,
-    .bdrv_aio_writev        = blkdebug_aio_writev,
-    .bdrv_aio_flush         = blkdebug_aio_flush,
+    .bdrv_co_preadv         = blkdebug_co_preadv,
+    .bdrv_co_pwritev        = blkdebug_co_pwritev,
+    .bdrv_co_flush_to_disk  = blkdebug_co_flush,

    .bdrv_debug_event           = blkdebug_debug_event,
    .bdrv_debug_breakpoint      = blkdebug_debug_breakpoint,
--- a/block/blkverify.c
+++ b/block/blkverify.c
@@ -19,38 +19,36 @@ typedef struct {
    BdrvChild *test_file;
 } BDRVBlkverifyState;

-typedef struct BlkverifyAIOCB BlkverifyAIOCB;
-struct BlkverifyAIOCB {
-    BlockAIOCB common;
+typedef struct BlkverifyRequest {
+    Coroutine *co;
+    BlockDriverState *bs;

    /* Request metadata */
    bool is_write;
-    int64_t sector_num;
-    int nb_sectors;
+    uint64_t offset;
+    uint64_t bytes;
+    int flags;
+
+    int (*request_fn)(BdrvChild *, int64_t, unsigned int, QEMUIOVector *,
+                      BdrvRequestFlags);
+
+    int ret;                    /* test image result */
+    int raw_ret;                /* raw image result */

-    int ret;                    /* first completed request's result */
    unsigned int done;          /* completion counter */

    QEMUIOVector *qiov;         /* user I/O vector */
-    QEMUIOVector raw_qiov;      /* cloned I/O vector for raw file */
-    void *buf;                  /* buffer for raw file I/O */
+    QEMUIOVector *raw_qiov;     /* cloned I/O vector for raw file */
+} BlkverifyRequest;

-    void (*verify)(BlkverifyAIOCB *acb);
-};
-
-static const AIOCBInfo blkverify_aiocb_info = {
-    .aiocb_size         = sizeof(BlkverifyAIOCB),
-};
-
-static void GCC_FMT_ATTR(2, 3) blkverify_err(BlkverifyAIOCB *acb,
+static void GCC_FMT_ATTR(2, 3) blkverify_err(BlkverifyRequest *r,
                                             const char *fmt, ...)
 {
    va_list ap;

    va_start(ap, fmt);
-    fprintf(stderr, "blkverify: %s sector_num=%" PRId64 " nb_sectors=%d ",
-            acb->is_write ? "write" : "read", acb->sector_num,
-            acb->nb_sectors);
+    fprintf(stderr, "blkverify: %s offset=%" PRId64 " bytes=%" PRId64 " ",
+            r->is_write ? "write" : "read", r->offset, r->bytes);
    vfprintf(stderr, fmt, ap);
    fprintf(stderr, "\n");
    va_end(ap);
@@ -166,113 +164,106 @@ static int64_t blkverify_getlength(BlockDriverState *bs)
    return bdrv_getlength(s->test_file->bs);
 }

-static BlkverifyAIOCB *blkverify_aio_get(BlockDriverState *bs, bool is_write,
-                                         int64_t sector_num, QEMUIOVector *qiov,
-                                         int nb_sectors,
-                                         BlockCompletionFunc *cb,
-                                         void *opaque)
+static void coroutine_fn blkverify_do_test_req(void *opaque)
 {
-    BlkverifyAIOCB *acb = qemu_aio_get(&blkverify_aiocb_info, bs, cb, opaque);
+    BlkverifyRequest *r = opaque;
+    BDRVBlkverifyState *s = r->bs->opaque;

-    acb->is_write = is_write;
-    acb->sector_num = sector_num;
-    acb->nb_sectors = nb_sectors;
-    acb->ret = -EINPROGRESS;
-    acb->done = 0;
-    acb->qiov = qiov;
-    acb->buf = NULL;
-    acb->verify = NULL;
-    return acb;
+    r->ret = r->request_fn(s->test_file, r->offset, r->bytes, r->qiov,
+                           r->flags);
+    r->done++;
+    qemu_coroutine_enter_if_inactive(r->co);
 }

-static void blkverify_aio_bh(void *opaque)
+static void coroutine_fn blkverify_do_raw_req(void *opaque)
 {
-    BlkverifyAIOCB *acb = opaque;
+    BlkverifyRequest *r = opaque;

-    if (acb->buf) {
-        qemu_iovec_destroy(&acb->raw_qiov);
-        qemu_vfree(acb->buf);
+    r->raw_ret = r->request_fn(r->bs->file, r->offset, r->bytes, r->raw_qiov,
+                               r->flags);
+    r->done++;
+    qemu_coroutine_enter_if_inactive(r->co);
+}
+
+static int coroutine_fn
+blkverify_co_prwv(BlockDriverState *bs, BlkverifyRequest *r, uint64_t offset,
+                  uint64_t bytes, QEMUIOVector *qiov, QEMUIOVector *raw_qiov,
+                  int flags, bool is_write)
+{
+    Coroutine *co_a, *co_b;
+
+    *r = (BlkverifyRequest) {
+        .co         = qemu_coroutine_self(),
+        .bs         = bs,
+        .offset     = offset,
+        .bytes      = bytes,
+        .qiov       = qiov,
+        .raw_qiov   = raw_qiov,
+        .flags      = flags,
+        .is_write   = is_write,
+        .request_fn = is_write ? bdrv_co_pwritev : bdrv_co_preadv,
+    };
+
+    co_a = qemu_coroutine_create(blkverify_do_test_req, r);
+    co_b = qemu_coroutine_create(blkverify_do_raw_req, r);
+
+    qemu_coroutine_enter(co_a);
+    qemu_coroutine_enter(co_b);
+
+    while (r->done < 2) {
+        qemu_coroutine_yield();
    }
-    acb->common.cb(acb->common.opaque, acb->ret);
-    qemu_aio_unref(acb);
-}

-static void blkverify_aio_cb(void *opaque, int ret)
-{
-    BlkverifyAIOCB *acb = opaque;
-
-    switch (++acb->done) {
-    case 1:
-        acb->ret = ret;
-        break;
-
-    case 2:
-        if (acb->ret != ret) {
-            blkverify_err(acb, "return value mismatch %d != %d", acb->ret, ret);
-        }
-
-        if (acb->verify) {
-            acb->verify(acb);
-        }
-
-        aio_bh_schedule_oneshot(bdrv_get_aio_context(acb->common.bs),
-                                blkverify_aio_bh, acb);
-        break;
+    if (r->ret != r->raw_ret) {
+        blkverify_err(r, "return value mismatch %d != %d", r->ret, r->raw_ret);
    }
+
+    return r->ret;
 }

-static void blkverify_verify_readv(BlkverifyAIOCB *acb)
+static int coroutine_fn
+blkverify_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
+                    QEMUIOVector *qiov, int flags)
 {
-    ssize_t offset = qemu_iovec_compare(acb->qiov, &acb->raw_qiov);
-    if (offset != -1) {
-        blkverify_err(acb, "contents mismatch in sector %" PRId64,
-                      acb->sector_num + (int64_t)(offset / BDRV_SECTOR_SIZE));
+    BlkverifyRequest r;
+    QEMUIOVector raw_qiov;
+    void *buf;
+    ssize_t cmp_offset;
+    int ret;
+
+    buf = qemu_blockalign(bs->file->bs, qiov->size);
+    qemu_iovec_init(&raw_qiov, qiov->niov);
+    qemu_iovec_clone(&raw_qiov, qiov, buf);
+
+    ret = blkverify_co_prwv(bs, &r, offset, bytes, qiov, &raw_qiov, flags,
+                            false);
+
+    cmp_offset = qemu_iovec_compare(qiov, &raw_qiov);
+    if (cmp_offset != -1) {
+        blkverify_err(&r, "contents mismatch at offset %" PRId64,
+                      offset + cmp_offset);
    }
+
+    qemu_iovec_destroy(&raw_qiov);
+    qemu_vfree(buf);
+
+    return ret;
 }

-static BlockAIOCB *blkverify_aio_readv(BlockDriverState *bs,
-        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockCompletionFunc *cb, void *opaque)
+static int coroutine_fn
+blkverify_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
+                     QEMUIOVector *qiov, int flags)
 {
-    BDRVBlkverifyState *s = bs->opaque;
-    BlkverifyAIOCB *acb = blkverify_aio_get(bs, false, sector_num, qiov,
-                                            nb_sectors, cb, opaque);
-
-    acb->verify = blkverify_verify_readv;
-    acb->buf = qemu_blockalign(bs->file->bs, qiov->size);
-    qemu_iovec_init(&acb->raw_qiov, acb->qiov->niov);
-    qemu_iovec_clone(&acb->raw_qiov, qiov, acb->buf);
-
-    bdrv_aio_readv(s->test_file, sector_num, qiov, nb_sectors,
-                   blkverify_aio_cb, acb);
-    bdrv_aio_readv(bs->file, sector_num, &acb->raw_qiov, nb_sectors,
-                   blkverify_aio_cb, acb);
-    return &acb->common;
+    BlkverifyRequest r;
+    return blkverify_co_prwv(bs, &r, offset, bytes, qiov, qiov, flags, true);
 }

-static BlockAIOCB *blkverify_aio_writev(BlockDriverState *bs,
-        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockCompletionFunc *cb, void *opaque)
-{
-    BDRVBlkverifyState *s = bs->opaque;
-    BlkverifyAIOCB *acb = blkverify_aio_get(bs, true, sector_num, qiov,
-                                            nb_sectors, cb, opaque);
-
-    bdrv_aio_writev(s->test_file, sector_num, qiov, nb_sectors,
-                    blkverify_aio_cb, acb);
-    bdrv_aio_writev(bs->file, sector_num, qiov, nb_sectors,
-                    blkverify_aio_cb, acb);
-    return &acb->common;
-}
-
-static BlockAIOCB *blkverify_aio_flush(BlockDriverState *bs,
-                                       BlockCompletionFunc *cb,
-                                       void *opaque)
+static int blkverify_co_flush(BlockDriverState *bs)
 {
    BDRVBlkverifyState *s = bs->opaque;

    /* Only flush test file, the raw file is not important */
-    return bdrv_aio_flush(s->test_file->bs, cb, opaque);
+    return bdrv_co_flush(s->test_file->bs);
 }

 static bool blkverify_recurse_is_first_non_filter(BlockDriverState *bs,
@@ -332,9 +323,9 @@ static BlockDriver bdrv_blkverify = {
    .bdrv_getlength                   = blkverify_getlength,
    .bdrv_refresh_filename            = blkverify_refresh_filename,

-    .bdrv_aio_readv                   = blkverify_aio_readv,
-    .bdrv_aio_writev                  = blkverify_aio_writev,
-    .bdrv_aio_flush                   = blkverify_aio_flush,
+    .bdrv_co_preadv                   = blkverify_co_preadv,
+    .bdrv_co_pwritev                  = blkverify_co_pwritev,
+    .bdrv_co_flush                    = blkverify_co_flush,

    .is_filter                        = true,
    .bdrv_recurse_is_first_non_filter = blkverify_recurse_is_first_non_filter,
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -799,20 +799,25 @@ int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset,
                               BdrvRequestFlags flags)
 {
    int ret;
+    BlockDriverState *bs = blk_bs(blk);

-    trace_blk_co_preadv(blk, blk_bs(blk), offset, bytes, flags);
+    trace_blk_co_preadv(blk, bs, offset, bytes, flags);

    ret = blk_check_byte_request(blk, offset, bytes);
    if (ret < 0) {
        return ret;
    }

+    bdrv_inc_in_flight(bs);
+
    /* throttling disk I/O */
    if (blk->public.throttle_state) {
        throttle_group_co_io_limits_intercept(blk, bytes, false);
    }

-    return bdrv_co_preadv(blk->root, offset, bytes, qiov, flags);
+    ret = bdrv_co_preadv(blk->root, offset, bytes, qiov, flags);
+    bdrv_dec_in_flight(bs);
+    return ret;
 }

 int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset,
@@ -820,14 +825,17 @@ int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset,
                                BdrvRequestFlags flags)
 {
    int ret;
+    BlockDriverState *bs = blk_bs(blk);

-    trace_blk_co_pwritev(blk, blk_bs(blk), offset, bytes, flags);
+    trace_blk_co_pwritev(blk, bs, offset, bytes, flags);

    ret = blk_check_byte_request(blk, offset, bytes);
    if (ret < 0) {
        return ret;
    }

+    bdrv_inc_in_flight(bs);
+
    /* throttling disk I/O */
    if (blk->public.throttle_state) {
        throttle_group_co_io_limits_intercept(blk, bytes, true);
@@ -837,7 +845,9 @@ int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset,
        flags |= BDRV_REQ_FUA;
    }

-    return bdrv_co_pwritev(blk->root, offset, bytes, qiov, flags);
+    ret = bdrv_co_pwritev(blk->root, offset, bytes, qiov, flags);
+    bdrv_dec_in_flight(bs);
+    return ret;
 }

 typedef struct BlkRwCo {
@@ -868,7 +878,6 @@ static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf,
                   int64_t bytes, CoroutineEntry co_entry,
                   BdrvRequestFlags flags)
 {
-    AioContext *aio_context;
    QEMUIOVector qiov;
    struct iovec iov;
    Coroutine *co;
@@ -890,11 +899,7 @@ static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf,

    co = qemu_coroutine_create(co_entry, &rwco);
    qemu_coroutine_enter(co);
-
-    aio_context = blk_get_aio_context(blk);
-    while (rwco.ret == NOT_DONE) {
-        aio_poll(aio_context, true);
-    }
+    BDRV_POLL_WHILE(blk_bs(blk), rwco.ret == NOT_DONE);

    return rwco.ret;
 }
@@ -930,6 +935,8 @@ int blk_make_zero(BlockBackend *blk, BdrvRequestFlags flags)
 static void error_callback_bh(void *opaque)
 {
    struct BlockBackendAIOCB *acb = opaque;
+
+    bdrv_dec_in_flight(acb->common.bs);
    acb->common.cb(acb->common.opaque, acb->ret);
    qemu_aio_unref(acb);
 }
@@ -940,6 +947,7 @@ BlockAIOCB *blk_abort_aio_request(BlockBackend *blk,
 {
    struct BlockBackendAIOCB *acb;

+    bdrv_inc_in_flight(blk_bs(blk));
    acb = blk_aio_get(&block_backend_aiocb_info, blk, cb, opaque);
    acb->blk = blk;
    acb->ret = ret;
@@ -962,6 +970,7 @@ static const AIOCBInfo blk_aio_em_aiocb_info = {
 static void blk_aio_complete(BlkAioEmAIOCB *acb)
 {
    if (acb->has_returned) {
+        bdrv_dec_in_flight(acb->common.bs);
        acb->common.cb(acb->common.opaque, acb->rwco.ret);
        qemu_aio_unref(acb);
    }
@@ -983,6 +992,7 @@ static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, int bytes,
    BlkAioEmAIOCB *acb;
    Coroutine *co;

+    bdrv_inc_in_flight(blk_bs(blk));
    acb = blk_aio_get(&blk_aio_em_aiocb_info, blk, cb, opaque);
    acb->rwco = (BlkRwCo) {
        .blk    = blk,
@@ -1099,26 +1109,36 @@ BlockAIOCB *blk_aio_pwritev(BlockBackend *blk, int64_t offset,
                        blk_aio_write_entry, flags, cb, opaque);
 }

+static void blk_aio_flush_entry(void *opaque)
+{
+    BlkAioEmAIOCB *acb = opaque;
+    BlkRwCo *rwco = &acb->rwco;
+
+    rwco->ret = blk_co_flush(rwco->blk);
+    blk_aio_complete(acb);
+}
+
 BlockAIOCB *blk_aio_flush(BlockBackend *blk,
                          BlockCompletionFunc *cb, void *opaque)
 {
-    if (!blk_is_available(blk)) {
-        return blk_abort_aio_request(blk, cb, opaque, -ENOMEDIUM);
-    }
+    return blk_aio_prwv(blk, 0, 0, NULL, blk_aio_flush_entry, 0, cb, opaque);
+}

-    return bdrv_aio_flush(blk_bs(blk), cb, opaque);
+static void blk_aio_pdiscard_entry(void *opaque)
+{
+    BlkAioEmAIOCB *acb = opaque;
+    BlkRwCo *rwco = &acb->rwco;
+
+    rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, acb->bytes);
+    blk_aio_complete(acb);
 }

 BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk,
                             int64_t offset, int count,
                             BlockCompletionFunc *cb, void *opaque)
 {
-    int ret = blk_check_byte_request(blk, offset, count);
-    if (ret < 0) {
-        return blk_abort_aio_request(blk, cb, opaque, ret);
-    }
-
-    return bdrv_aio_pdiscard(blk_bs(blk), offset, count, cb, opaque);
+    return blk_aio_prwv(blk, offset, count, NULL, blk_aio_pdiscard_entry, 0,
+                        cb, opaque);
 }

 void blk_aio_cancel(BlockAIOCB *acb)
@@ -1131,23 +1151,50 @@ void blk_aio_cancel_async(BlockAIOCB *acb)
    bdrv_aio_cancel_async(acb);
 }

-int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
+int blk_co_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
 {
    if (!blk_is_available(blk)) {
        return -ENOMEDIUM;
    }

-    return bdrv_ioctl(blk_bs(blk), req, buf);
+    return bdrv_co_ioctl(blk_bs(blk), req, buf);
+}
+
+static void blk_ioctl_entry(void *opaque)
+{
+    BlkRwCo *rwco = opaque;
+    rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset,
+                             rwco->qiov->iov[0].iov_base);
+}
+
+int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
+{
+    return blk_prw(blk, req, buf, 0, blk_ioctl_entry, 0);
+}
+
+static void blk_aio_ioctl_entry(void *opaque)
+{
+    BlkAioEmAIOCB *acb = opaque;
+    BlkRwCo *rwco = &acb->rwco;
+
+    rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset,
+                             rwco->qiov->iov[0].iov_base);
+    blk_aio_complete(acb);
 }

 BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf,
                          BlockCompletionFunc *cb, void *opaque)
 {
-    if (!blk_is_available(blk)) {
-        return blk_abort_aio_request(blk, cb, opaque, -ENOMEDIUM);
-    }
+    QEMUIOVector qiov;
+    struct iovec iov;

-    return bdrv_aio_ioctl(blk_bs(blk), req, buf, cb, opaque);
+    iov = (struct iovec) {
+        .iov_base = buf,
+        .iov_len = 0,
+    };
+    qemu_iovec_init_external(&qiov, &iov, 1);
+
+    return blk_aio_prwv(blk, req, 0, &qiov, blk_aio_ioctl_entry, 0, cb, opaque);
 }

 int blk_co_pdiscard(BlockBackend *blk, int64_t offset, int count)
@@ -1169,13 +1216,15 @@ int blk_co_flush(BlockBackend *blk)
    return bdrv_co_flush(blk_bs(blk));
 }

+static void blk_flush_entry(void *opaque)
+{
+    BlkRwCo *rwco = opaque;
+    rwco->ret = blk_co_flush(rwco->blk);
+}
+
 int blk_flush(BlockBackend *blk)
 {
-    if (!blk_is_available(blk)) {
-        return -ENOMEDIUM;
-    }
-
-    return bdrv_flush(blk_bs(blk));
+    return blk_prw(blk, 0, NULL, 0, blk_flush_entry, 0);
 }

 void blk_drain(BlockBackend *blk)
@@ -1344,13 +1393,14 @@ void blk_eject(BlockBackend *blk, bool eject_flag)

    if (bs) {
        bdrv_eject(bs, eject_flag);
-
-        id = blk_get_attached_dev_id(blk);
-        qapi_event_send_device_tray_moved(blk_name(blk), id,
-                                          eject_flag, &error_abort);
-        g_free(id);
-
    }
+
+    /* Whether or not we ejected on the backend,
+     * the frontend experienced a tray event. */
+    id = blk_get_attached_dev_id(blk);
+    qapi_event_send_device_tray_moved(blk_name(blk), id,
+                                      eject_flag, &error_abort);
+    g_free(id);
 }

 int blk_get_flags(BlockBackend *blk)
@@ -1555,14 +1605,15 @@ int blk_truncate(BlockBackend *blk, int64_t offset)
    return bdrv_truncate(blk_bs(blk), offset);
 }

+static void blk_pdiscard_entry(void *opaque)
+{
+    BlkRwCo *rwco = opaque;
+    rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, rwco->qiov->size);
+}
+
 int blk_pdiscard(BlockBackend *blk, int64_t offset, int count)
 {
-    int ret = blk_check_byte_request(blk, offset, count);
-    if (ret < 0) {
-        return ret;
-    }
-
-    return bdrv_pdiscard(blk_bs(blk), offset, count);
+    return blk_prw(blk, offset, NULL, count, blk_pdiscard_entry, 0);
 }

 int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf,
--- a/block/commit.c
+++ b/block/commit.c
@@ -15,7 +15,7 @@
 #include "qemu/osdep.h"
 #include "trace.h"
 #include "block/block_int.h"
-#include "block/blockjob.h"
+#include "block/blockjob_int.h"
 #include "qapi/error.h"
 #include "qapi/qmp/qerror.h"
 #include "qemu/ratelimit.h"
@@ -205,17 +205,19 @@ static const BlockJobDriver commit_job_driver = {
    .instance_size = sizeof(CommitBlockJob),
    .job_type      = BLOCK_JOB_TYPE_COMMIT,
    .set_speed     = commit_set_speed,
+    .start         = commit_run,
 };

 void commit_start(const char *job_id, BlockDriverState *bs,
                  BlockDriverState *base, BlockDriverState *top, int64_t speed,
-                  BlockdevOnError on_error, BlockCompletionFunc *cb,
-                  void *opaque, const char *backing_file_str, Error **errp)
+                  BlockdevOnError on_error, const char *backing_file_str,
+                  Error **errp)
 {
    CommitBlockJob *s;
    BlockReopenQueue *reopen_queue = NULL;
    int orig_overlay_flags;
    int orig_base_flags;
+    BlockDriverState *iter;
    BlockDriverState *overlay_bs;
    Error *local_err = NULL;

@@ -233,7 +235,7 @@ void commit_start(const char *job_id, BlockDriverState *bs,
    }

    s = block_job_create(job_id, &commit_job_driver, bs, speed,
-                         cb, opaque, errp);
+                         BLOCK_JOB_DEFAULT, NULL, NULL, errp);
    if (!s) {
        return;
    }
@@ -251,7 +253,7 @@ void commit_start(const char *job_id, BlockDriverState *bs,
                                         orig_overlay_flags | BDRV_O_RDWR);
    }
    if (reopen_queue) {
-        bdrv_reopen_multiple(reopen_queue, &local_err);
+        bdrv_reopen_multiple(bdrv_get_aio_context(bs), reopen_queue, &local_err);
        if (local_err != NULL) {
            error_propagate(errp, local_err);
            block_job_unref(&s->common);
@@ -260,6 +262,19 @@ void commit_start(const char *job_id, BlockDriverState *bs,
    }


+    /* Block all nodes between top and base, because they will
+     * disappear from the chain after this operation. */
+    assert(bdrv_chain_contains(top, base));
+    for (iter = top; iter != backing_bs(base); iter = backing_bs(iter)) {
+        block_job_add_bdrv(&s->common, iter);
+    }
+    /* overlay_bs must be blocked because it needs to be modified to
+     * update the backing image string, but if it's the root node then
+     * don't block it again */
+    if (bs != overlay_bs) {
+        block_job_add_bdrv(&s->common, overlay_bs);
+    }
+
    s->base = blk_new();
    blk_insert_bs(s->base, base);

@@ -274,10 +289,9 @@ void commit_start(const char *job_id, BlockDriverState *bs,
    s->backing_file_str = g_strdup(backing_file_str);

    s->on_error = on_error;
-    s->common.co = qemu_coroutine_create(commit_run, s);

-    trace_commit_start(bs, base, top, s, s->common.co, opaque);
-    qemu_coroutine_enter(s->common.co);
+    trace_commit_start(bs, base, top, s);
+    block_job_start(&s->common);
 }


--- a/block/curl.c
+++ b/block/curl.c
@@ -68,12 +68,10 @@ static CURLMcode __curl_multi_socket_action(CURLM *multi_handle,
 #endif

 #define PROTOCOLS (CURLPROTO_HTTP | CURLPROTO_HTTPS | \
-                   CURLPROTO_FTP | CURLPROTO_FTPS | \
-                   CURLPROTO_TFTP)
+                   CURLPROTO_FTP | CURLPROTO_FTPS)

 #define CURL_NUM_STATES 8
 #define CURL_NUM_ACB    8
-#define SECTOR_SIZE     512
 #define READ_AHEAD_DEFAULT (256 * 1024)
 #define CURL_TIMEOUT_DEFAULT 5
 #define CURL_TIMEOUT_MAX 10000
@@ -105,12 +103,17 @@ typedef struct CURLAIOCB {
    size_t end;
 } CURLAIOCB;

+typedef struct CURLSocket {
+    int fd;
+    QLIST_ENTRY(CURLSocket) next;
+} CURLSocket;
+
 typedef struct CURLState
 {
    struct BDRVCURLState *s;
    CURLAIOCB *acb[CURL_NUM_ACB];
    CURL *curl;
-    curl_socket_t sock_fd;
+    QLIST_HEAD(, CURLSocket) sockets;
    char *orig_buf;
    size_t buf_start;
    size_t buf_off;
@@ -164,27 +167,44 @@ static int curl_sock_cb(CURL *curl, curl_socket_t fd, int action,
 {
    BDRVCURLState *s;
    CURLState *state = NULL;
+    CURLSocket *socket;
+
    curl_easy_getinfo(curl, CURLINFO_PRIVATE, (char **)&state);
-    state->sock_fd = fd;
    s = state->s;

+    QLIST_FOREACH(socket, &state->sockets, next) {
+        if (socket->fd == fd) {
+            if (action == CURL_POLL_REMOVE) {
+                QLIST_REMOVE(socket, next);
+                g_free(socket);
+            }
+            break;
+        }
+    }
+    if (!socket) {
+        socket = g_new0(CURLSocket, 1);
+        socket->fd = fd;
+        QLIST_INSERT_HEAD(&state->sockets, socket, next);
+    }
+    socket = NULL;
+
    DPRINTF("CURL (AIO): Sock action %d on fd %d\n", action, (int)fd);
    switch (action) {
        case CURL_POLL_IN:
            aio_set_fd_handler(s->aio_context, fd, false,
-                               curl_multi_read, NULL, state);
+                               curl_multi_read, NULL, NULL, state);
            break;
        case CURL_POLL_OUT:
            aio_set_fd_handler(s->aio_context, fd, false,
-                               NULL, curl_multi_do, state);
+                               NULL, curl_multi_do, NULL, state);
            break;
        case CURL_POLL_INOUT:
            aio_set_fd_handler(s->aio_context, fd, false,
-                               curl_multi_read, curl_multi_do, state);
+                               curl_multi_read, curl_multi_do, NULL, state);
            break;
        case CURL_POLL_REMOVE:
            aio_set_fd_handler(s->aio_context, fd, false,
-                               NULL, NULL, NULL);
+                               NULL, NULL, NULL, NULL);
            break;
    }

@@ -213,12 +233,13 @@ static size_t curl_read_cb(void *ptr, size_t size, size_t nmemb, void *opaque)

    DPRINTF("CURL: Just reading %zd bytes\n", realsize);

-    if (!s || !s->orig_buf)
-        return 0;
+    if (!s || !s->orig_buf) {
+        goto read_end;
+    }

    if (s->buf_off >= s->buf_len) {
        /* buffer full, read nothing */
-        return 0;
+        goto read_end;
    }
    realsize = MIN(realsize, s->buf_len - s->buf_off);
    memcpy(s->orig_buf + s->buf_off, ptr, realsize);
@@ -231,15 +252,26 @@ static size_t curl_read_cb(void *ptr, size_t size, size_t nmemb, void *opaque)
            continue;

        if ((s->buf_off >= acb->end)) {
+            size_t request_length = acb->nb_sectors * BDRV_SECTOR_SIZE;
+
            qemu_iovec_from_buf(acb->qiov, 0, s->orig_buf + acb->start,
                                acb->end - acb->start);
+
+            if (acb->end - acb->start < request_length) {
+                size_t offset = acb->end - acb->start;
+                qemu_iovec_memset(acb->qiov, offset, 0,
+                                  request_length - offset);
+            }
+
            acb->common.cb(acb->common.opaque, 0);
            qemu_aio_unref(acb);
            s->acb[i] = NULL;
        }
    }

-    return realsize;
+read_end:
+    /* curl will error out if we do not return this value */
+    return size * nmemb;
 }

 static int curl_find_buf(BDRVCURLState *s, size_t start, size_t len,
@@ -247,6 +279,8 @@ static int curl_find_buf(BDRVCURLState *s, size_t start, size_t len,
 {
    int i;
    size_t end = start + len;
+    size_t clamped_end = MIN(end, s->len);
+    size_t clamped_len = clamped_end - start;

    for (i=0; i<CURL_NUM_STATES; i++) {
        CURLState *state = &s->states[i];
@@ -261,12 +295,15 @@ static int curl_find_buf(BDRVCURLState *s, size_t start, size_t len,
        // Does the existing buffer cover our section?
        if ((start >= state->buf_start) &&
            (start <= buf_end) &&
-            (end >= state->buf_start) &&
-            (end <= buf_end))
+            (clamped_end >= state->buf_start) &&
+            (clamped_end <= buf_end))
        {
            char *buf = state->orig_buf + (start - state->buf_start);

-            qemu_iovec_from_buf(acb->qiov, 0, buf, len);
+            qemu_iovec_from_buf(acb->qiov, 0, buf, clamped_len);
+            if (clamped_len < len) {
+                qemu_iovec_memset(acb->qiov, clamped_len, 0, len - clamped_len);
+            }
            acb->common.cb(acb->common.opaque, 0);

            return FIND_RET_OK;
@@ -276,13 +313,13 @@ static int curl_find_buf(BDRVCURLState *s, size_t start, size_t len,
        if (state->in_use &&
            (start >= state->buf_start) &&
            (start <= buf_fend) &&
-            (end >= state->buf_start) &&
-            (end <= buf_fend))
+            (clamped_end >= state->buf_start) &&
+            (clamped_end <= buf_fend))
        {
            int j;

            acb->start = start - state->buf_start;
-            acb->end = acb->start + len;
+            acb->end = acb->start + clamped_len;

            for (j=0; j<CURL_NUM_ACB; j++) {
                if (!state->acb[j]) {
@@ -352,6 +389,7 @@ static void curl_multi_check_completion(BDRVCURLState *s)
 static void curl_multi_do(void *arg)
 {
    CURLState *s = (CURLState *)arg;
+    CURLSocket *socket, *next_socket;
    int running;
    int r;

@@ -359,10 +397,13 @@ static void curl_multi_do(void *arg)
        return;
    }

-    do {
-        r = curl_multi_socket_action(s->s->multi, s->sock_fd, 0, &running);
-    } while(r == CURLM_CALL_MULTI_PERFORM);
-
+    /* Need to use _SAFE because curl_multi_socket_action() may trigger
+     * curl_sock_cb() which might modify this list */
+    QLIST_FOREACH_SAFE(socket, &s->sockets, next, next_socket) {
+        do {
+            r = curl_multi_socket_action(s->s->multi, socket->fd, 0, &running);
+        } while (r == CURLM_CALL_MULTI_PERFORM);
+    }
 }

 static void curl_multi_read(void *arg)
@@ -466,6 +507,7 @@ static CURLState *curl_init_state(BlockDriverState *bs, BDRVCURLState *s)
 #endif
    }

+    QLIST_INIT(&state->sockets);
    state->s = s;

    return state;
@@ -475,6 +517,14 @@ static void curl_clean_state(CURLState *s)
 {
    if (s->s->multi)
        curl_multi_remove_handle(s->s->multi, s->curl);
+
+    while (!QLIST_EMPTY(&s->sockets)) {
+        CURLSocket *socket = QLIST_FIRST(&s->sockets);
+
+        QLIST_REMOVE(socket, next);
+        g_free(socket);
+    }
+
    s->in_use = 0;
 }

@@ -738,12 +788,12 @@ static void curl_readv_bh_cb(void *p)
    CURLAIOCB *acb = p;
    BDRVCURLState *s = acb->common.bs->opaque;

-    size_t start = acb->sector_num * SECTOR_SIZE;
+    size_t start = acb->sector_num * BDRV_SECTOR_SIZE;
    size_t end;

    // In case we have the requested data already (e.g. read-ahead),
    // we can just call the callback and be done.
-    switch (curl_find_buf(s, start, acb->nb_sectors * SECTOR_SIZE, acb)) {
+    switch (curl_find_buf(s, start, acb->nb_sectors * BDRV_SECTOR_SIZE, acb)) {
        case FIND_RET_OK:
            qemu_aio_unref(acb);
            // fall through
@@ -762,13 +812,13 @@ static void curl_readv_bh_cb(void *p)
    }

    acb->start = 0;
-    acb->end = (acb->nb_sectors * SECTOR_SIZE);
+    acb->end = MIN(acb->nb_sectors * BDRV_SECTOR_SIZE, s->len - start);

    state->buf_off = 0;
    g_free(state->orig_buf);
    state->buf_start = start;
-    state->buf_len = acb->end + s->readahead_size;
-    end = MIN(start + state->buf_len, s->len) - 1;
+    state->buf_len = MIN(acb->end + s->readahead_size, s->len - start);
+    end = start + state->buf_len - 1;
    state->orig_buf = g_try_malloc(state->buf_len);
    if (state->buf_len && state->orig_buf == NULL) {
        curl_clean_state(state);
@@ -779,8 +829,8 @@ static void curl_readv_bh_cb(void *p)
    state->acb[0] = acb;

    snprintf(state->range, 127, "%zd-%zd", start, end);
-    DPRINTF("CURL (AIO): Reading %d at %zd (%s)\n",
-            (acb->nb_sectors * SECTOR_SIZE), start, state->range);
+    DPRINTF("CURL (AIO): Reading %llu at %zd (%s)\n",
+            (acb->nb_sectors * BDRV_SECTOR_SIZE), start, state->range);
    curl_easy_setopt(state->curl, CURLOPT_RANGE, state->range);

    curl_multi_add_handle(s->multi, state->curl);
@@ -886,29 +936,12 @@ static BlockDriver bdrv_ftps = {
    .bdrv_attach_aio_context    = curl_attach_aio_context,
 };

-static BlockDriver bdrv_tftp = {
-    .format_name                = "tftp",
-    .protocol_name              = "tftp",
-
-    .instance_size              = sizeof(BDRVCURLState),
-    .bdrv_parse_filename        = curl_parse_filename,
-    .bdrv_file_open             = curl_open,
-    .bdrv_close                 = curl_close,
-    .bdrv_getlength             = curl_getlength,
-
-    .bdrv_aio_readv             = curl_aio_readv,
-
-    .bdrv_detach_aio_context    = curl_detach_aio_context,
-    .bdrv_attach_aio_context    = curl_attach_aio_context,
-};
-
 static void curl_block_init(void)
 {
    bdrv_register(&bdrv_http);
    bdrv_register(&bdrv_https);
    bdrv_register(&bdrv_ftp);
    bdrv_register(&bdrv_ftps);
-    bdrv_register(&bdrv_tftp);
 }

 block_init(curl_block_init);
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -542,7 +542,7 @@ static int raw_reopen_prepare(BDRVReopenState *state,
                              BlockReopenQueue *queue, Error **errp)
 {
    BDRVRawState *s;
-    BDRVRawReopenState *raw_s;
+    BDRVRawReopenState *rs;
    int ret = 0;
    Error *local_err = NULL;

@@ -552,15 +552,15 @@ static int raw_reopen_prepare(BDRVReopenState *state,
    s = state->bs->opaque;

    state->opaque = g_new0(BDRVRawReopenState, 1);
-    raw_s = state->opaque;
+    rs = state->opaque;

    if (s->type == FTYPE_CD) {
-        raw_s->open_flags |= O_NONBLOCK;
+        rs->open_flags |= O_NONBLOCK;
    }

-    raw_parse_flags(state->flags, &raw_s->open_flags);
+    raw_parse_flags(state->flags, &rs->open_flags);

-    raw_s->fd = -1;
+    rs->fd = -1;

    int fcntl_flags = O_APPEND | O_NONBLOCK;
 #ifdef O_NOATIME
@@ -569,35 +569,35 @@ static int raw_reopen_prepare(BDRVReopenState *state,

 #ifdef O_ASYNC
    /* Not all operating systems have O_ASYNC, and those that don't
-     * will not let us track the state into raw_s->open_flags (typically
+     * will not let us track the state into rs->open_flags (typically
     * you achieve the same effect with an ioctl, for example I_SETSIG
     * on Solaris). But we do not use O_ASYNC, so that's fine.
     */
    assert((s->open_flags & O_ASYNC) == 0);
 #endif

-    if ((raw_s->open_flags & ~fcntl_flags) == (s->open_flags & ~fcntl_flags)) {
+    if ((rs->open_flags & ~fcntl_flags) == (s->open_flags & ~fcntl_flags)) {
        /* dup the original fd */
-        raw_s->fd = qemu_dup(s->fd);
-        if (raw_s->fd >= 0) {
-            ret = fcntl_setfl(raw_s->fd, raw_s->open_flags);
+        rs->fd = qemu_dup(s->fd);
+        if (rs->fd >= 0) {
+            ret = fcntl_setfl(rs->fd, rs->open_flags);
            if (ret) {
-                qemu_close(raw_s->fd);
-                raw_s->fd = -1;
+                qemu_close(rs->fd);
+                rs->fd = -1;
            }
        }
    }

    /* If we cannot use fcntl, or fcntl failed, fall back to qemu_open() */
-    if (raw_s->fd == -1) {
+    if (rs->fd == -1) {
        const char *normalized_filename = state->bs->filename;
        ret = raw_normalize_devicepath(&normalized_filename);
        if (ret < 0) {
            error_setg_errno(errp, -ret, "Could not normalize device path");
        } else {
-            assert(!(raw_s->open_flags & O_CREAT));
-            raw_s->fd = qemu_open(normalized_filename, raw_s->open_flags);
-            if (raw_s->fd == -1) {
+            assert(!(rs->open_flags & O_CREAT));
+            rs->fd = qemu_open(normalized_filename, rs->open_flags);
+            if (rs->fd == -1) {
                error_setg_errno(errp, errno, "Could not reopen file");
                ret = -1;
            }
@@ -606,11 +606,11 @@ static int raw_reopen_prepare(BDRVReopenState *state,

    /* Fail already reopen_prepare() if we can't get a working O_DIRECT
     * alignment with the new fd. */
-    if (raw_s->fd != -1) {
-        raw_probe_alignment(state->bs, raw_s->fd, &local_err);
+    if (rs->fd != -1) {
+        raw_probe_alignment(state->bs, rs->fd, &local_err);
        if (local_err) {
-            qemu_close(raw_s->fd);
-            raw_s->fd = -1;
+            qemu_close(rs->fd);
+            rs->fd = -1;
            error_propagate(errp, local_err);
            ret = -EINVAL;
        }
@@ -621,13 +621,13 @@ static int raw_reopen_prepare(BDRVReopenState *state,

 static void raw_reopen_commit(BDRVReopenState *state)
 {
-    BDRVRawReopenState *raw_s = state->opaque;
+    BDRVRawReopenState *rs = state->opaque;
    BDRVRawState *s = state->bs->opaque;

-    s->open_flags = raw_s->open_flags;
+    s->open_flags = rs->open_flags;

    qemu_close(s->fd);
-    s->fd = raw_s->fd;
+    s->fd = rs->fd;

    g_free(state->opaque);
    state->opaque = NULL;
@@ -636,27 +636,30 @@ static void raw_reopen_commit(BDRVReopenState *state)

 static void raw_reopen_abort(BDRVReopenState *state)
 {
-    BDRVRawReopenState *raw_s = state->opaque;
+    BDRVRawReopenState *rs = state->opaque;

     /* nothing to do if NULL, we didn't get far enough */
-    if (raw_s == NULL) {
+    if (rs == NULL) {
        return;
    }

-    if (raw_s->fd >= 0) {
-        qemu_close(raw_s->fd);
-        raw_s->fd = -1;
+    if (rs->fd >= 0) {
+        qemu_close(rs->fd);
+        rs->fd = -1;
    }
    g_free(state->opaque);
    state->opaque = NULL;
 }

-static int hdev_get_max_transfer_length(int fd)
+static int hdev_get_max_transfer_length(BlockDriverState *bs, int fd)
 {
 #ifdef BLKSECTGET
-    int max_sectors = 0;
-    if (ioctl(fd, BLKSECTGET, &max_sectors) == 0) {
-        return max_sectors;
+    int max_bytes = 0;
+    short max_sectors = 0;
+    if (bs->sg && ioctl(fd, BLKSECTGET, &max_bytes) == 0) {
+        return max_bytes;
+    } else if (!bs->sg && ioctl(fd, BLKSECTGET, &max_sectors) == 0) {
+        return max_sectors << BDRV_SECTOR_BITS;
    } else {
        return -errno;
    }
@@ -671,10 +674,10 @@ static void raw_refresh_limits(BlockDriverState *bs, Error **errp)
    struct stat st;

    if (!fstat(s->fd, &st)) {
-        if (S_ISBLK(st.st_mode)) {
-            int ret = hdev_get_max_transfer_length(s->fd);
-            if (ret > 0 && ret <= BDRV_REQUEST_MAX_SECTORS) {
-                bs->bl.max_transfer = pow2floor(ret << BDRV_SECTOR_BITS);
+        if (S_ISBLK(st.st_mode) || S_ISCHR(st.st_mode)) {
+            int ret = hdev_get_max_transfer_length(bs, s->fd);
+            if (ret > 0 && ret <= BDRV_REQUEST_MAX_BYTES) {
+                bs->bl.max_transfer = pow2floor(ret);
            }
        }
    }
@@ -2069,13 +2072,23 @@ static bool hdev_is_sg(BlockDriverState *bs)

 #if defined(__linux__)

+    BDRVRawState *s = bs->opaque;
    struct stat st;
    struct sg_scsi_id scsiid;
    int sg_version;
+    int ret;

-    if (stat(bs->filename, &st) >= 0 && S_ISCHR(st.st_mode) &&
-        !bdrv_ioctl(bs, SG_GET_VERSION_NUM, &sg_version) &&
-        !bdrv_ioctl(bs, SG_GET_SCSI_ID, &scsiid)) {
+    if (stat(bs->filename, &st) < 0 || !S_ISCHR(st.st_mode)) {
+        return false;
+    }
+
+    ret = ioctl(s->fd, SG_GET_VERSION_NUM, &sg_version);
+    if (ret < 0) {
+        return false;
+    }
+
+    ret = ioctl(s->fd, SG_GET_SCSI_ID, &scsiid);
+    if (ret >= 0) {
        DPRINTF("SG device found: type=%d, version=%d\n",
            scsiid.scsi_type, sg_version);
        return true;
--- a/block/file-win32.c
+++ b/block/file-win32.c
--- a/block/gluster.c
+++ b/block/gluster.c
@@ -14,6 +14,7 @@
 #include "qapi/qmp/qerror.h"
 #include "qemu/uri.h"
 #include "qemu/error-report.h"
+#include "qemu/cutils.h"

 #define GLUSTER_OPT_FILENAME        "filename"
 #define GLUSTER_OPT_VOLUME          "volume"
@@ -47,7 +48,7 @@ typedef struct BDRVGlusterState {
    struct glfs_fd *fd;
    char *logfile;
    bool supports_seek_data;
-    int debug_level;
+    int debug;
 } BDRVGlusterState;

 typedef struct BDRVGlusterReopenState {
@@ -56,6 +57,19 @@ typedef struct BDRVGlusterReopenState {
 } BDRVGlusterReopenState;


+typedef struct GlfsPreopened {
+    char *volume;
+    glfs_t *fs;
+    int ref;
+} GlfsPreopened;
+
+typedef struct ListElement {
+    QLIST_ENTRY(ListElement) list;
+    GlfsPreopened saved;
+} ListElement;
+
+static QLIST_HEAD(glfs_list, ListElement) glfs_list;
+
 static QemuOptsList qemu_gluster_create_opts = {
    .name = "qemu-gluster-create-opts",
    .head = QTAILQ_HEAD_INITIALIZER(qemu_gluster_create_opts.head),
@@ -172,7 +186,7 @@ static QemuOptsList runtime_tcp_opts = {
        },
        {
            .name = GLUSTER_OPT_PORT,
-            .type = QEMU_OPT_NUMBER,
+            .type = QEMU_OPT_STRING,
            .help = "port number on which glusterd is listening (default 24007)",
        },
        {
@@ -194,6 +208,58 @@ static QemuOptsList runtime_tcp_opts = {
    },
 };

+static void glfs_set_preopened(const char *volume, glfs_t *fs)
+{
+    ListElement *entry = NULL;
+
+    entry = g_new(ListElement, 1);
+
+    entry->saved.volume = g_strdup(volume);
+
+    entry->saved.fs = fs;
+    entry->saved.ref = 1;
+
+    QLIST_INSERT_HEAD(&glfs_list, entry, list);
+}
+
+static glfs_t *glfs_find_preopened(const char *volume)
+{
+    ListElement *entry = NULL;
+
+     QLIST_FOREACH(entry, &glfs_list, list) {
+        if (strcmp(entry->saved.volume, volume) == 0) {
+            entry->saved.ref++;
+            return entry->saved.fs;
+        }
+     }
+
+    return NULL;
+}
+
+static void glfs_clear_preopened(glfs_t *fs)
+{
+    ListElement *entry = NULL;
+    ListElement *next;
+
+    if (fs == NULL) {
+        return;
+    }
+
+    QLIST_FOREACH_SAFE(entry, &glfs_list, list, next) {
+        if (entry->saved.fs == fs) {
+            if (--entry->saved.ref) {
+                return;
+            }
+
+            QLIST_REMOVE(entry, list);
+
+            glfs_fini(entry->saved.fs);
+            g_free(entry->saved.volume);
+            g_free(entry);
+        }
+    }
+}
+
 static int parse_volume_options(BlockdevOptionsGluster *gconf, char *path)
 {
    char *p, *q;
@@ -330,22 +396,37 @@ static struct glfs *qemu_gluster_glfs_init(BlockdevOptionsGluster *gconf,
    int ret;
    int old_errno;
    GlusterServerList *server;
+    unsigned long long port;
+
+    glfs = glfs_find_preopened(gconf->volume);
+    if (glfs) {
+        return glfs;
+    }

    glfs = glfs_new(gconf->volume);
    if (!glfs) {
        goto out;
    }

+    glfs_set_preopened(gconf->volume, glfs);
+
    for (server = gconf->server; server; server = server->next) {
        if (server->value->type  == GLUSTER_TRANSPORT_UNIX) {
            ret = glfs_set_volfile_server(glfs,
                                   GlusterTransport_lookup[server->value->type],
                                   server->value->u.q_unix.path, 0);
        } else {
+            if (parse_uint_full(server->value->u.tcp.port, &port, 10) < 0 ||
+                port > 65535) {
+                error_setg(errp, "'%s' is not a valid port number",
+                           server->value->u.tcp.port);
+                errno = EINVAL;
+                goto out;
+            }
            ret = glfs_set_volfile_server(glfs,
                                   GlusterTransport_lookup[server->value->type],
                                   server->value->u.tcp.host,
-                                   atoi(server->value->u.tcp.port));
+                                   (int)port);
        }

        if (ret < 0) {
@@ -353,7 +434,7 @@ static struct glfs *qemu_gluster_glfs_init(BlockdevOptionsGluster *gconf,
        }
    }

-    ret = glfs_set_logging(glfs, gconf->logfile, gconf->debug_level);
+    ret = glfs_set_logging(glfs, gconf->logfile, gconf->debug);
    if (ret < 0) {
        goto out;
    }
@@ -387,7 +468,7 @@ static struct glfs *qemu_gluster_glfs_init(BlockdevOptionsGluster *gconf,
 out:
    if (glfs) {
        old_errno = errno;
-        glfs_fini(glfs);
+        glfs_clear_preopened(glfs);
        errno = old_errno;
    }
    return NULL;
@@ -668,7 +749,10 @@ static void qemu_gluster_parse_flags(int bdrv_flags, int *open_flags)
 */
 static bool qemu_gluster_test_seek(struct glfs_fd *fd)
 {
-    off_t ret, eof;
+    off_t ret = 0;
+
+#if defined SEEK_HOLE && defined SEEK_DATA
+    off_t eof;

    eof = glfs_lseek(fd, 0, SEEK_END);
    if (eof < 0) {
@@ -678,6 +762,8 @@ static bool qemu_gluster_test_seek(struct glfs_fd *fd)

    /* this should always fail with ENXIO if SEEK_DATA is supported */
    ret = glfs_lseek(fd, eof, SEEK_DATA);
+#endif
+
    return (ret < 0) && (errno == ENXIO);
 }

@@ -702,17 +788,17 @@ static int qemu_gluster_open(BlockDriverState *bs,  QDict *options,

    filename = qemu_opt_get(opts, GLUSTER_OPT_FILENAME);

-    s->debug_level = qemu_opt_get_number(opts, GLUSTER_OPT_DEBUG,
-                                         GLUSTER_DEBUG_DEFAULT);
-    if (s->debug_level < 0) {
-        s->debug_level = 0;
-    } else if (s->debug_level > GLUSTER_DEBUG_MAX) {
-        s->debug_level = GLUSTER_DEBUG_MAX;
+    s->debug = qemu_opt_get_number(opts, GLUSTER_OPT_DEBUG,
+                                   GLUSTER_DEBUG_DEFAULT);
+    if (s->debug < 0) {
+        s->debug = 0;
+    } else if (s->debug > GLUSTER_DEBUG_MAX) {
+        s->debug = GLUSTER_DEBUG_MAX;
    }

    gconf = g_new0(BlockdevOptionsGluster, 1);
-    gconf->debug_level = s->debug_level;
-    gconf->has_debug_level = true;
+    gconf->debug = s->debug;
+    gconf->has_debug = true;

    logfile = qemu_opt_get(opts, GLUSTER_OPT_LOGFILE);
    s->logfile = g_strdup(logfile ? logfile : GLUSTER_LOGFILE_DEFAULT);
@@ -762,9 +848,9 @@ out:
    if (s->fd) {
        glfs_close(s->fd);
    }
-    if (s->glfs) {
-        glfs_fini(s->glfs);
-    }
+
+    glfs_clear_preopened(s->glfs);
+
    return ret;
 }

@@ -788,8 +874,8 @@ static int qemu_gluster_reopen_prepare(BDRVReopenState *state,
    qemu_gluster_parse_flags(state->flags, &open_flags);

    gconf = g_new0(BlockdevOptionsGluster, 1);
-    gconf->debug_level = s->debug_level;
-    gconf->has_debug_level = true;
+    gconf->debug = s->debug;
+    gconf->has_debug = true;
    gconf->logfile = g_strdup(s->logfile);
    gconf->has_logfile = true;
    reop_s->glfs = qemu_gluster_init(gconf, state->bs->filename, NULL, errp);
@@ -831,9 +917,8 @@ static void qemu_gluster_reopen_commit(BDRVReopenState *state)
    if (s->fd) {
        glfs_close(s->fd);
    }
-    if (s->glfs) {
-        glfs_fini(s->glfs);
-    }
+
+    glfs_clear_preopened(s->glfs);

    /* use the newly opened image / connection */
    s->fd         = reop_s->fd;
@@ -858,9 +943,7 @@ static void qemu_gluster_reopen_abort(BDRVReopenState *state)
        glfs_close(reop_s->fd);
    }

-    if (reop_s->glfs) {
-        glfs_fini(reop_s->glfs);
-    }
+    glfs_clear_preopened(reop_s->glfs);

    g_free(state->opaque);
    state->opaque = NULL;
@@ -928,14 +1011,14 @@ static int qemu_gluster_create(const char *filename,
    char *tmp = NULL;

    gconf = g_new0(BlockdevOptionsGluster, 1);
-    gconf->debug_level = qemu_opt_get_number_del(opts, GLUSTER_OPT_DEBUG,
-                                                 GLUSTER_DEBUG_DEFAULT);
-    if (gconf->debug_level < 0) {
-        gconf->debug_level = 0;
-    } else if (gconf->debug_level > GLUSTER_DEBUG_MAX) {
-        gconf->debug_level = GLUSTER_DEBUG_MAX;
+    gconf->debug = qemu_opt_get_number_del(opts, GLUSTER_OPT_DEBUG,
+                                           GLUSTER_DEBUG_DEFAULT);
+    if (gconf->debug < 0) {
+        gconf->debug = 0;
+    } else if (gconf->debug > GLUSTER_DEBUG_MAX) {
+        gconf->debug = GLUSTER_DEBUG_MAX;
    }
-    gconf->has_debug_level = true;
+    gconf->has_debug = true;

    gconf->logfile = qemu_opt_get_del(opts, GLUSTER_OPT_LOGFILE);
    if (!gconf->logfile) {
@@ -984,9 +1067,7 @@ static int qemu_gluster_create(const char *filename,
 out:
    g_free(tmp);
    qapi_free_BlockdevOptionsGluster(gconf);
-    if (glfs) {
-        glfs_fini(glfs);
-    }
+    glfs_clear_preopened(glfs);
    return ret;
 }

@@ -1059,7 +1140,7 @@ static void qemu_gluster_close(BlockDriverState *bs)
        glfs_close(s->fd);
        s->fd = NULL;
    }
-    glfs_fini(s->glfs);
+    glfs_clear_preopened(s->glfs);
 }

 static coroutine_fn int qemu_gluster_co_flush_to_disk(BlockDriverState *bs)
@@ -1172,18 +1253,20 @@ static int qemu_gluster_has_zero_init(BlockDriverState *bs)
 * If @start is in a trailing hole or beyond EOF, return -ENXIO.
 * If we can't find out, return a negative errno other than -ENXIO.
 *
- * (Shamefully copied from raw-posix.c, only miniscule adaptions.)
+ * (Shamefully copied from file-posix.c, only miniscule adaptions.)
 */
 static int find_allocation(BlockDriverState *bs, off_t start,
                           off_t *data, off_t *hole)
 {
    BDRVGlusterState *s = bs->opaque;
-    off_t offs;

    if (!s->supports_seek_data) {
-        return -ENOTSUP;
+        goto exit;
    }

+#if defined SEEK_HOLE && defined SEEK_DATA
+    off_t offs;
+
    /*
     * SEEK_DATA cases:
     * D1. offs == start: start is in data
@@ -1247,6 +1330,10 @@ static int find_allocation(BlockDriverState *bs, off_t start,

    /* D1 and H1 */
    return -EBUSY;
+#endif
+
+exit:
+    return -ENOTSUP;
 }

 /*
@@ -1262,7 +1349,7 @@ static int find_allocation(BlockDriverState *bs, off_t start,
 * 'nb_sectors' is the max value 'pnum' should be set to.  If nb_sectors goes
 * beyond the end of the disk image it will be clamped.
 *
- * (Based on raw_co_get_block_status() from raw-posix.c.)
+ * (Based on raw_co_get_block_status() from file-posix.c.)
 */
 static int64_t coroutine_fn qemu_gluster_co_get_block_status(
        BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum,
--- a/block/io.c
+++ b/block/io.c
@@ -143,7 +143,7 @@ bool bdrv_requests_pending(BlockDriverState *bs)
 {
    BdrvChild *child;

-    if (!QLIST_EMPTY(&bs->tracked_requests)) {
+    if (atomic_read(&bs->in_flight)) {
        return true;
    }

@@ -156,16 +156,22 @@ bool bdrv_requests_pending(BlockDriverState *bs)
    return false;
 }

-static void bdrv_drain_recurse(BlockDriverState *bs)
+static bool bdrv_drain_recurse(BlockDriverState *bs)
 {
    BdrvChild *child;
+    bool waited;
+
+    waited = BDRV_POLL_WHILE(bs, atomic_read(&bs->in_flight) > 0);

    if (bs->drv && bs->drv->bdrv_drain) {
        bs->drv->bdrv_drain(bs);
    }
+
    QLIST_FOREACH(child, &bs->children, next) {
-        bdrv_drain_recurse(child->bs);
+        waited |= bdrv_drain_recurse(child->bs);
    }
+
+    return waited;
 }

 typedef struct {
@@ -174,23 +180,14 @@ typedef struct {
    bool done;
 } BdrvCoDrainData;

-static void bdrv_drain_poll(BlockDriverState *bs)
-{
-    bool busy = true;
-
-    while (busy) {
-        /* Keep iterating */
-        busy = bdrv_requests_pending(bs);
-        busy |= aio_poll(bdrv_get_aio_context(bs), busy);
-    }
-}
-
 static void bdrv_co_drain_bh_cb(void *opaque)
 {
    BdrvCoDrainData *data = opaque;
    Coroutine *co = data->co;
+    BlockDriverState *bs = data->bs;

-    bdrv_drain_poll(data->bs);
+    bdrv_dec_in_flight(bs);
+    bdrv_drained_begin(bs);
    data->done = true;
    qemu_coroutine_enter(co);
 }
@@ -209,6 +206,7 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs)
        .bs = bs,
        .done = false,
    };
+    bdrv_inc_in_flight(bs);
    aio_bh_schedule_oneshot(bdrv_get_aio_context(bs),
                            bdrv_co_drain_bh_cb, &data);

@@ -220,19 +218,17 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs)

 void bdrv_drained_begin(BlockDriverState *bs)
 {
+    if (qemu_in_coroutine()) {
+        bdrv_co_yield_to_drain(bs);
+        return;
+    }
+
    if (!bs->quiesce_counter++) {
        aio_disable_external(bdrv_get_aio_context(bs));
        bdrv_parent_drained_begin(bs);
    }

-    bdrv_io_unplugged_begin(bs);
    bdrv_drain_recurse(bs);
-    if (qemu_in_coroutine()) {
-        bdrv_co_yield_to_drain(bs);
-    } else {
-        bdrv_drain_poll(bs);
-    }
-    bdrv_io_unplugged_end(bs);
 }

 void bdrv_drained_end(BlockDriverState *bs)
@@ -275,11 +271,17 @@ void bdrv_drain(BlockDriverState *bs)
 *
 * This function does not flush data to disk, use bdrv_flush_all() for that
 * after calling this function.
+ *
+ * This pauses all block jobs and disables external clients. It must
+ * be paired with bdrv_drain_all_end().
+ *
+ * NOTE: no new block jobs or BlockDriverStates can be created between
+ * the bdrv_drain_all_begin() and bdrv_drain_all_end() calls.
 */
-void bdrv_drain_all(void)
+void bdrv_drain_all_begin(void)
 {
    /* Always run first iteration so any pending completion BHs run */
-    bool busy = true;
+    bool waited = true;
    BlockDriverState *bs;
    BdrvNextIterator it;
    BlockJob *job = NULL;
@@ -298,8 +300,7 @@ void bdrv_drain_all(void)

        aio_context_acquire(aio_context);
        bdrv_parent_drained_begin(bs);
-        bdrv_io_unplugged_begin(bs);
-        bdrv_drain_recurse(bs);
+        aio_disable_external(aio_context);
        aio_context_release(aio_context);

        if (!g_slist_find(aio_ctxs, aio_context)) {
@@ -313,8 +314,8 @@ void bdrv_drain_all(void)
     * request completion.  Therefore we must keep looping until there was no
     * more activity rather than simply draining each device independently.
     */
-    while (busy) {
-        busy = false;
+    while (waited) {
+        waited = false;

        for (ctx = aio_ctxs; ctx != NULL; ctx = ctx->next) {
            AioContext *aio_context = ctx->data;
@@ -322,28 +323,31 @@ void bdrv_drain_all(void)
            aio_context_acquire(aio_context);
            for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
                if (aio_context == bdrv_get_aio_context(bs)) {
-                    if (bdrv_requests_pending(bs)) {
-                        busy = true;
-                        aio_poll(aio_context, busy);
-                    }
+                    waited |= bdrv_drain_recurse(bs);
                }
            }
-            busy |= aio_poll(aio_context, false);
            aio_context_release(aio_context);
        }
    }

+    g_slist_free(aio_ctxs);
+}
+
+void bdrv_drain_all_end(void)
+{
+    BlockDriverState *bs;
+    BdrvNextIterator it;
+    BlockJob *job = NULL;
+
    for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
        AioContext *aio_context = bdrv_get_aio_context(bs);

        aio_context_acquire(aio_context);
-        bdrv_io_unplugged_end(bs);
+        aio_enable_external(aio_context);
        bdrv_parent_drained_end(bs);
        aio_context_release(aio_context);
    }
-    g_slist_free(aio_ctxs);

-    job = NULL;
    while ((job = block_job_next(job))) {
        AioContext *aio_context = blk_get_aio_context(job->blk);

@@ -353,6 +357,12 @@ void bdrv_drain_all(void)
    }
 }

+void bdrv_drain_all(void)
+{
+    bdrv_drain_all_begin();
+    bdrv_drain_all_end();
+}
+
 /**
 * Remove an active request from the tracked requests list
 *
@@ -476,6 +486,28 @@ static bool tracked_request_overlaps(BdrvTrackedRequest *req,
    return true;
 }

+void bdrv_inc_in_flight(BlockDriverState *bs)
+{
+    atomic_inc(&bs->in_flight);
+}
+
+static void dummy_bh_cb(void *opaque)
+{
+}
+
+void bdrv_wakeup(BlockDriverState *bs)
+{
+    if (bs->wakeup) {
+        aio_bh_schedule_oneshot(qemu_get_aio_context(), dummy_bh_cb, NULL);
+    }
+}
+
+void bdrv_dec_in_flight(BlockDriverState *bs)
+{
+    atomic_dec(&bs->in_flight);
+    bdrv_wakeup(bs);
+}
+
 static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
 {
    BlockDriverState *bs = self->bs;
@@ -583,13 +615,9 @@ static int bdrv_prwv_co(BdrvChild *child, int64_t offset,
        /* Fast-path if already in coroutine context */
        bdrv_rw_co_entry(&rwco);
    } else {
-        AioContext *aio_context = bdrv_get_aio_context(child->bs);
-
        co = qemu_coroutine_create(bdrv_rw_co_entry, &rwco);
        qemu_coroutine_enter(co);
-        while (rwco.ret == NOT_DONE) {
-            aio_poll(aio_context, true);
-        }
+        BDRV_POLL_WHILE(child->bs, rwco.ret == NOT_DONE);
    }
    return rwco.ret;
 }
@@ -1097,6 +1125,8 @@ int coroutine_fn bdrv_co_preadv(BdrvChild *child,
        return ret;
    }

+    bdrv_inc_in_flight(bs);
+
    /* Don't do copy-on-read if we read data before write operation */
    if (bs->copy_on_read && !(flags & BDRV_REQ_NO_SERIALISING)) {
        flags |= BDRV_REQ_COPY_ON_READ;
@@ -1132,6 +1162,7 @@ int coroutine_fn bdrv_co_preadv(BdrvChild *child,
                              use_local_qiov ? &local_qiov : qiov,
                              flags);
    tracked_request_end(&req);
+    bdrv_dec_in_flight(bs);

    if (use_local_qiov) {
        qemu_iovec_destroy(&local_qiov);
@@ -1179,6 +1210,8 @@ static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
    int max_write_zeroes = MIN_NON_ZERO(bs->bl.max_pwrite_zeroes, INT_MAX);
    int alignment = MAX(bs->bl.pwrite_zeroes_alignment,
                        bs->bl.request_alignment);
+    int max_transfer = MIN_NON_ZERO(bs->bl.max_transfer,
+                                    MAX_WRITE_ZEROES_BOUNCE_BUFFER);

    assert(alignment % bs->bl.request_alignment == 0);
    head = offset % alignment;
@@ -1194,9 +1227,12 @@ static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
         * boundaries.
         */
        if (head) {
-            /* Make a small request up to the first aligned sector.  */
-            num = MIN(count, alignment - head);
-            head = 0;
+            /* Make a small request up to the first aligned sector. For
+             * convenience, limit this request to max_transfer even if
+             * we don't need to fall back to writes.  */
+            num = MIN(MIN(count, max_transfer), alignment - head);
+            head = (head + num) % alignment;
+            assert(num < max_write_zeroes);
        } else if (tail && num > alignment) {
            /* Shorten the request to the last aligned sector.  */
            num -= tail;
@@ -1222,8 +1258,6 @@ static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,

        if (ret == -ENOTSUP) {
            /* Fall back to bounce buffer if write zeroes is unsupported */
-            int max_transfer = MIN_NON_ZERO(bs->bl.max_transfer,
-                                            MAX_WRITE_ZEROES_BOUNCE_BUFFER);
            BdrvRequestFlags write_flags = flags & ~BDRV_REQ_ZERO_WRITE;

            if ((flags & BDRV_REQ_FUA) &&
@@ -1480,6 +1514,7 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
        return ret;
    }

+    bdrv_inc_in_flight(bs);
    /*
     * Align write if necessary by performing a read-modify-write cycle.
     * Pad qiov with the read parts and be sure to have a tracked request not
@@ -1581,6 +1616,7 @@ fail:
    qemu_vfree(tail_buf);
 out:
    tracked_request_end(&req);
+    bdrv_dec_in_flight(bs);
    return ret;
 }

@@ -1705,17 +1741,19 @@ static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
    }

    *file = NULL;
+    bdrv_inc_in_flight(bs);
    ret = bs->drv->bdrv_co_get_block_status(bs, sector_num, nb_sectors, pnum,
                                            file);
    if (ret < 0) {
        *pnum = 0;
-        return ret;
+        goto out;
    }

    if (ret & BDRV_BLOCK_RAW) {
        assert(ret & BDRV_BLOCK_OFFSET_VALID);
-        return bdrv_get_block_status(bs->file->bs, ret >> BDRV_SECTOR_BITS,
-                                     *pnum, pnum, file);
+        ret = bdrv_get_block_status(bs->file->bs, ret >> BDRV_SECTOR_BITS,
+                                    *pnum, pnum, file);
+        goto out;
    }

    if (ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ZERO)) {
@@ -1757,6 +1795,8 @@ static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
        }
    }

+out:
+    bdrv_dec_in_flight(bs);
    return ret;
 }

@@ -1822,14 +1862,10 @@ int64_t bdrv_get_block_status_above(BlockDriverState *bs,
        /* Fast-path if already in coroutine context */
        bdrv_get_block_status_above_co_entry(&data);
    } else {
-        AioContext *aio_context = bdrv_get_aio_context(bs);
-
        co = qemu_coroutine_create(bdrv_get_block_status_above_co_entry,
                                   &data);
        qemu_coroutine_enter(co);
-        while (!data.done) {
-            aio_poll(aio_context, true);
-        }
+        BDRV_POLL_WHILE(bs, !data.done);
    }
    return data.ret;
 }
@@ -2102,6 +2138,7 @@ static const AIOCBInfo bdrv_em_co_aiocb_info = {
 static void bdrv_co_complete(BlockAIOCBCoroutine *acb)
 {
    if (!acb->need_bh) {
+        bdrv_dec_in_flight(acb->common.bs);
        acb->common.cb(acb->common.opaque, acb->req.error);
        qemu_aio_unref(acb);
    }
@@ -2152,6 +2189,9 @@ static BlockAIOCB *bdrv_co_aio_prw_vector(BdrvChild *child,
    Coroutine *co;
    BlockAIOCBCoroutine *acb;

+    /* Matched by bdrv_co_complete's bdrv_dec_in_flight.  */
+    bdrv_inc_in_flight(child->bs);
+
    acb = qemu_aio_get(&bdrv_em_co_aiocb_info, child->bs, cb, opaque);
    acb->child = child;
    acb->need_bh = true;
@@ -2185,6 +2225,9 @@ BlockAIOCB *bdrv_aio_flush(BlockDriverState *bs,
    Coroutine *co;
    BlockAIOCBCoroutine *acb;

+    /* Matched by bdrv_co_complete's bdrv_dec_in_flight.  */
+    bdrv_inc_in_flight(bs);
+
    acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
    acb->need_bh = true;
    acb->req.error = -EINPROGRESS;
@@ -2196,35 +2239,6 @@ BlockAIOCB *bdrv_aio_flush(BlockDriverState *bs,
    return &acb->common;
 }

-static void coroutine_fn bdrv_aio_pdiscard_co_entry(void *opaque)
-{
-    BlockAIOCBCoroutine *acb = opaque;
-    BlockDriverState *bs = acb->common.bs;
-
-    acb->req.error = bdrv_co_pdiscard(bs, acb->req.offset, acb->req.bytes);
-    bdrv_co_complete(acb);
-}
-
-BlockAIOCB *bdrv_aio_pdiscard(BlockDriverState *bs, int64_t offset, int count,
-                              BlockCompletionFunc *cb, void *opaque)
-{
-    Coroutine *co;
-    BlockAIOCBCoroutine *acb;
-
-    trace_bdrv_aio_pdiscard(bs, offset, count, opaque);
-
-    acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
-    acb->need_bh = true;
-    acb->req.error = -EINPROGRESS;
-    acb->req.offset = offset;
-    acb->req.bytes = count;
-    co = qemu_coroutine_create(bdrv_aio_pdiscard_co_entry, acb);
-    qemu_coroutine_enter(co);
-
-    bdrv_co_maybe_schedule_bh(acb);
-    return &acb->common;
-}
-
 void *qemu_aio_get(const AIOCBInfo *aiocb_info, BlockDriverState *bs,
                   BlockCompletionFunc *cb, void *opaque)
 {
@@ -2273,23 +2287,22 @@ static void coroutine_fn bdrv_flush_co_entry(void *opaque)
 int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
 {
    int ret;
-    BdrvTrackedRequest req;

    if (!bs || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs) ||
        bdrv_is_sg(bs)) {
        return 0;
    }

-    tracked_request_begin(&req, bs, 0, 0, BDRV_TRACKED_FLUSH);
+    bdrv_inc_in_flight(bs);

    int current_gen = bs->write_gen;

    /* Wait until any previous flushes are completed */
-    while (bs->active_flush_req != NULL) {
+    while (bs->active_flush_req) {
        qemu_co_queue_wait(&bs->flush_queue);
    }

-    bs->active_flush_req = &req;
+    bs->active_flush_req = true;

    /* Write back all layers by calling one driver function */
    if (bs->drv->bdrv_co_flush) {
@@ -2358,12 +2371,14 @@ flush_parent:
    ret = bs->file ? bdrv_co_flush(bs->file->bs) : 0;
 out:
    /* Notify any pending flushes that we have completed */
-    bs->flushed_gen = current_gen;
-    bs->active_flush_req = NULL;
+    if (ret == 0) {
+        bs->flushed_gen = current_gen;
+    }
+    bs->active_flush_req = false;
    /* Return value is ignored - it's ok if wait queue is empty */
    qemu_co_queue_next(&bs->flush_queue);

-    tracked_request_end(&req);
+    bdrv_dec_in_flight(bs);
    return ret;
 }

@@ -2379,13 +2394,9 @@ int bdrv_flush(BlockDriverState *bs)
        /* Fast-path if already in coroutine context */
        bdrv_flush_co_entry(&flush_co);
    } else {
-        AioContext *aio_context = bdrv_get_aio_context(bs);
-
        co = qemu_coroutine_create(bdrv_flush_co_entry, &flush_co);
        qemu_coroutine_enter(co);
-        while (flush_co.ret == NOT_DONE) {
-            aio_poll(aio_context, true);
-        }
+        BDRV_POLL_WHILE(bs, flush_co.ret == NOT_DONE);
    }

    return flush_co.ret;
@@ -2409,7 +2420,7 @@ int coroutine_fn bdrv_co_pdiscard(BlockDriverState *bs, int64_t offset,
 {
    BdrvTrackedRequest req;
    int max_pdiscard, ret;
-    int head, align;
+    int head, tail, align;

    if (!bs->drv) {
        return -ENOMEDIUM;
@@ -2432,20 +2443,17 @@ int coroutine_fn bdrv_co_pdiscard(BlockDriverState *bs, int64_t offset,
        return 0;
    }

-    /* Discard is advisory, so ignore any unaligned head or tail */
+    /* Discard is advisory, but some devices track and coalesce
+     * unaligned requests, so we must pass everything down rather than
+     * round here.  Still, most devices will just silently ignore
+     * unaligned requests (by returning -ENOTSUP), so we must fragment
+     * the request accordingly.  */
    align = MAX(bs->bl.pdiscard_alignment, bs->bl.request_alignment);
    assert(align % bs->bl.request_alignment == 0);
    head = offset % align;
-    if (head) {
-        head = MIN(count, align - head);
-        count -= head;
-        offset += head;
-    }
-    count = QEMU_ALIGN_DOWN(count, align);
-    if (!count) {
-        return 0;
-    }
+    tail = (offset + count) % align;

+    bdrv_inc_in_flight(bs);
    tracked_request_begin(&req, bs, offset, count, BDRV_TRACKED_DISCARD);

    ret = notifier_with_return_list_notify(&bs->before_write_notifiers, &req);
@@ -2455,11 +2463,34 @@ int coroutine_fn bdrv_co_pdiscard(BlockDriverState *bs, int64_t offset,

    max_pdiscard = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_pdiscard, INT_MAX),
                                   align);
-    assert(max_pdiscard);
+    assert(max_pdiscard >= bs->bl.request_alignment);

    while (count > 0) {
        int ret;
-        int num = MIN(count, max_pdiscard);
+        int num = count;
+
+        if (head) {
+            /* Make small requests to get to alignment boundaries. */
+            num = MIN(count, align - head);
+            if (!QEMU_IS_ALIGNED(num, bs->bl.request_alignment)) {
+                num %= bs->bl.request_alignment;
+            }
+            head = (head + num) % align;
+            assert(num < max_pdiscard);
+        } else if (tail) {
+            if (num > align) {
+                /* Shorten the request to the last aligned cluster.  */
+                num -= tail;
+            } else if (!QEMU_IS_ALIGNED(tail, bs->bl.request_alignment) &&
+                       tail > bs->bl.request_alignment) {
+                tail %= bs->bl.request_alignment;
+                num -= tail;
+            }
+        }
+        /* limit request size */
+        if (num > max_pdiscard) {
+            num = max_pdiscard;
+        }

        if (bs->drv->bdrv_co_pdiscard) {
            ret = bs->drv->bdrv_co_pdiscard(bs, offset, num);
@@ -2492,6 +2523,7 @@ out:
    bdrv_set_dirty(bs, req.offset >> BDRV_SECTOR_BITS,
                   req.bytes >> BDRV_SECTOR_BITS);
    tracked_request_end(&req);
+    bdrv_dec_in_flight(bs);
    return ret;
 }

@@ -2509,106 +2541,41 @@ int bdrv_pdiscard(BlockDriverState *bs, int64_t offset, int count)
        /* Fast-path if already in coroutine context */
        bdrv_pdiscard_co_entry(&rwco);
    } else {
-        AioContext *aio_context = bdrv_get_aio_context(bs);
-
        co = qemu_coroutine_create(bdrv_pdiscard_co_entry, &rwco);
        qemu_coroutine_enter(co);
-        while (rwco.ret == NOT_DONE) {
-            aio_poll(aio_context, true);
-        }
+        BDRV_POLL_WHILE(bs, rwco.ret == NOT_DONE);
    }

    return rwco.ret;
 }

-static int bdrv_co_do_ioctl(BlockDriverState *bs, int req, void *buf)
+int bdrv_co_ioctl(BlockDriverState *bs, int req, void *buf)
 {
    BlockDriver *drv = bs->drv;
-    BdrvTrackedRequest tracked_req;
    CoroutineIOCompletion co = {
        .coroutine = qemu_coroutine_self(),
    };
    BlockAIOCB *acb;

-    tracked_request_begin(&tracked_req, bs, 0, 0, BDRV_TRACKED_IOCTL);
-    if (!drv || !drv->bdrv_aio_ioctl) {
+    bdrv_inc_in_flight(bs);
+    if (!drv || (!drv->bdrv_aio_ioctl && !drv->bdrv_co_ioctl)) {
        co.ret = -ENOTSUP;
        goto out;
    }

-    acb = drv->bdrv_aio_ioctl(bs, req, buf, bdrv_co_io_em_complete, &co);
-    if (!acb) {
-        co.ret = -ENOTSUP;
-        goto out;
-    }
-    qemu_coroutine_yield();
-out:
-    tracked_request_end(&tracked_req);
-    return co.ret;
-}
-
-typedef struct {
-    BlockDriverState *bs;
-    int req;
-    void *buf;
-    int ret;
-} BdrvIoctlCoData;
-
-static void coroutine_fn bdrv_co_ioctl_entry(void *opaque)
-{
-    BdrvIoctlCoData *data = opaque;
-    data->ret = bdrv_co_do_ioctl(data->bs, data->req, data->buf);
-}
-
-/* needed for generic scsi interface */
-int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
-{
-    BdrvIoctlCoData data = {
-        .bs = bs,
-        .req = req,
-        .buf = buf,
-        .ret = -EINPROGRESS,
-    };
-
-    if (qemu_in_coroutine()) {
-        /* Fast-path if already in coroutine context */
-        bdrv_co_ioctl_entry(&data);
+    if (drv->bdrv_co_ioctl) {
+        co.ret = drv->bdrv_co_ioctl(bs, req, buf);
    } else {
-        Coroutine *co = qemu_coroutine_create(bdrv_co_ioctl_entry, &data);
-
-        qemu_coroutine_enter(co);
-        while (data.ret == -EINPROGRESS) {
-            aio_poll(bdrv_get_aio_context(bs), true);
+        acb = drv->bdrv_aio_ioctl(bs, req, buf, bdrv_co_io_em_complete, &co);
+        if (!acb) {
+            co.ret = -ENOTSUP;
+            goto out;
        }
+        qemu_coroutine_yield();
    }
-    return data.ret;
-}
-
-static void coroutine_fn bdrv_co_aio_ioctl_entry(void *opaque)
-{
-    BlockAIOCBCoroutine *acb = opaque;
-    acb->req.error = bdrv_co_do_ioctl(acb->common.bs,
-                                      acb->req.req, acb->req.buf);
-    bdrv_co_complete(acb);
-}
-
-BlockAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
-        unsigned long int req, void *buf,
-        BlockCompletionFunc *cb, void *opaque)
-{
-    BlockAIOCBCoroutine *acb = qemu_aio_get(&bdrv_em_co_aiocb_info,
-                                            bs, cb, opaque);
-    Coroutine *co;
-
-    acb->need_bh = true;
-    acb->req.error = -EINPROGRESS;
-    acb->req.req = req;
-    acb->req.buf = buf;
-    co = qemu_coroutine_create(bdrv_co_aio_ioctl_entry, acb);
-    qemu_coroutine_enter(co);
-
-    bdrv_co_maybe_schedule_bh(acb);
-    return &acb->common;
+out:
+    bdrv_dec_in_flight(bs);
+    return co.ret;
 }

 void *qemu_blockalign(BlockDriverState *bs, size_t size)
@@ -2679,7 +2646,7 @@ void bdrv_io_plug(BlockDriverState *bs)
        bdrv_io_plug(child->bs);
    }

-    if (bs->io_plugged++ == 0 && bs->io_plug_disabled == 0) {
+    if (bs->io_plugged++ == 0) {
        BlockDriver *drv = bs->drv;
        if (drv && drv->bdrv_io_plug) {
            drv->bdrv_io_plug(bs);
@@ -2692,7 +2659,7 @@ void bdrv_io_unplug(BlockDriverState *bs)
    BdrvChild *child;

    assert(bs->io_plugged);
-    if (--bs->io_plugged == 0 && bs->io_plug_disabled == 0) {
+    if (--bs->io_plugged == 0) {
        BlockDriver *drv = bs->drv;
        if (drv && drv->bdrv_io_unplug) {
            drv->bdrv_io_unplug(bs);
@@ -2703,36 +2670,3 @@ void bdrv_io_unplug(BlockDriverState *bs)
        bdrv_io_unplug(child->bs);
    }
 }
-
-void bdrv_io_unplugged_begin(BlockDriverState *bs)
-{
-    BdrvChild *child;
-
-    if (bs->io_plug_disabled++ == 0 && bs->io_plugged > 0) {
-        BlockDriver *drv = bs->drv;
-        if (drv && drv->bdrv_io_unplug) {
-            drv->bdrv_io_unplug(bs);
-        }
-    }
-
-    QLIST_FOREACH(child, &bs->children, next) {
-        bdrv_io_unplugged_begin(child->bs);
-    }
-}
-
-void bdrv_io_unplugged_end(BlockDriverState *bs)
-{
-    BdrvChild *child;
-
-    assert(bs->io_plug_disabled);
-    QLIST_FOREACH(child, &bs->children, next) {
-        bdrv_io_unplugged_end(child->bs);
-    }
-
-    if (--bs->io_plug_disabled == 0 && bs->io_plugged > 0) {
-        BlockDriver *drv = bs->drv;
-        if (drv && drv->bdrv_io_plug) {
-            drv->bdrv_io_plug(bs);
-        }
-    }
-}
--- a/block/iscsi-opts.c
+++ b/block/iscsi-opts.c
@@ -0,0 +1,69 @@
+/*
+ * QEMU Block driver for iSCSI images (static options)
+ *
+ * Copyright (c) 2017 Peter Lieven <pl@kamp.de>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "qemu/config-file.h"
+
+static QemuOptsList qemu_iscsi_opts = {
+    .name = "iscsi",
+    .head = QTAILQ_HEAD_INITIALIZER(qemu_iscsi_opts.head),
+    .desc = {
+        {
+            .name = "user",
+            .type = QEMU_OPT_STRING,
+            .help = "username for CHAP authentication to target",
+        },{
+            .name = "password",
+            .type = QEMU_OPT_STRING,
+            .help = "password for CHAP authentication to target",
+        },{
+            .name = "password-secret",
+            .type = QEMU_OPT_STRING,
+            .help = "ID of the secret providing password for CHAP "
+                    "authentication to target",
+        },{
+            .name = "header-digest",
+            .type = QEMU_OPT_STRING,
+            .help = "HeaderDigest setting. "
+                    "{CRC32C|CRC32C-NONE|NONE-CRC32C|NONE}",
+        },{
+            .name = "initiator-name",
+            .type = QEMU_OPT_STRING,
+            .help = "Initiator iqn name to use when connecting",
+        },{
+            .name = "timeout",
+            .type = QEMU_OPT_NUMBER,
+            .help = "Request timeout in seconds (default 0 = no timeout)",
+        },
+        { /* end of list */ }
+    },
+};
+
+static void iscsi_block_opts_init(void)
+{
+    qemu_add_opts(&qemu_iscsi_opts);
+}
+
+block_init(iscsi_block_opts_init);
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -362,6 +362,7 @@ iscsi_set_events(IscsiLun *iscsilun)
                           false,
                           (ev & POLLIN) ? iscsi_process_read : NULL,
                           (ev & POLLOUT) ? iscsi_process_write : NULL,
+                           NULL,
                           iscsilun);
        iscsilun->events = ev;
    }
@@ -498,14 +499,18 @@ iscsi_allocmap_update(IscsiLun *iscsilun, int64_t sector_num,
    if (allocated) {
        bitmap_set(iscsilun->allocmap, cl_num_expanded, nb_cls_expanded);
    } else {
-        bitmap_clear(iscsilun->allocmap, cl_num_shrunk, nb_cls_shrunk);
+        if (nb_cls_shrunk > 0) {
+            bitmap_clear(iscsilun->allocmap, cl_num_shrunk, nb_cls_shrunk);
+        }
    }

    if (iscsilun->allocmap_valid == NULL) {
        return;
    }
    if (valid) {
-        bitmap_set(iscsilun->allocmap_valid, cl_num_shrunk, nb_cls_shrunk);
+        if (nb_cls_shrunk > 0) {
+            bitmap_set(iscsilun->allocmap_valid, cl_num_shrunk, nb_cls_shrunk);
+        }
    } else {
        bitmap_clear(iscsilun->allocmap_valid, cl_num_expanded,
                     nb_cls_expanded);
@@ -1083,7 +1088,9 @@ coroutine_fn iscsi_co_pdiscard(BlockDriverState *bs, int64_t offset, int count)
    struct IscsiTask iTask;
    struct unmap_list list;

-    assert(is_byte_request_lun_aligned(offset, count, iscsilun));
+    if (!is_byte_request_lun_aligned(offset, count, iscsilun)) {
+        return -ENOTSUP;
+    }

    if (!iscsilun->lbp.lbpu) {
        /* UNMAP is not supported by the target */
@@ -1524,7 +1531,7 @@ static void iscsi_detach_aio_context(BlockDriverState *bs)
    IscsiLun *iscsilun = bs->opaque;

    aio_set_fd_handler(iscsilun->aio_context, iscsi_get_fd(iscsilun->iscsi),
-                       false, NULL, NULL, NULL);
+                       false, NULL, NULL, NULL, NULL);
    iscsilun->events = 0;

    if (iscsilun->nop_timer) {
--- a/block/linux-aio.c
+++ b/block/linux-aio.c
@@ -255,6 +255,20 @@ static void qemu_laio_completion_cb(EventNotifier *e)
    }
 }

+static bool qemu_laio_poll_cb(void *opaque)
+{
+    EventNotifier *e = opaque;
+    LinuxAioState *s = container_of(e, LinuxAioState, e);
+    struct io_event *events;
+
+    if (!io_getevents_peek(s->ctx, &events)) {
+        return false;
+    }
+
+    qemu_laio_process_completions_and_submit(s);
+    return true;
+}
+
 static void laio_cancel(BlockAIOCB *blockacb)
 {
    struct qemu_laiocb *laiocb = (struct qemu_laiocb *)blockacb;
@@ -439,7 +453,7 @@ BlockAIOCB *laio_submit(BlockDriverState *bs, LinuxAioState *s, int fd,

 void laio_detach_aio_context(LinuxAioState *s, AioContext *old_context)
 {
-    aio_set_event_notifier(old_context, &s->e, false, NULL);
+    aio_set_event_notifier(old_context, &s->e, false, NULL, NULL);
    qemu_bh_delete(s->completion_bh);
 }

@@ -448,7 +462,8 @@ void laio_attach_aio_context(LinuxAioState *s, AioContext *new_context)
    s->aio_context = new_context;
    s->completion_bh = aio_bh_new(new_context, qemu_laio_completion_bh, s);
    aio_set_event_notifier(new_context, &s->e, false,
-                           qemu_laio_completion_cb);
+                           qemu_laio_completion_cb,
+                           qemu_laio_poll_cb);
 }

 LinuxAioState *laio_init(void)
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -13,7 +13,7 @@

 #include "qemu/osdep.h"
 #include "trace.h"
-#include "block/blockjob.h"
+#include "block/blockjob_int.h"
 #include "block/block_int.h"
 #include "sysemu/block-backend.h"
 #include "qapi/error.h"
@@ -469,7 +469,11 @@ static void mirror_free_init(MirrorBlockJob *s)
    }
 }

-static void mirror_drain(MirrorBlockJob *s)
+/* This is also used for the .pause callback. There is no matching
+ * mirror_resume() because mirror_run() will begin iterating again
+ * when the job is resumed.
+ */
+static void mirror_wait_for_all_io(MirrorBlockJob *s)
 {
    while (s->in_flight > 0) {
        mirror_wait_for_io(s);
@@ -526,8 +530,8 @@ static void mirror_exit(BlockJob *job, void *opaque)
        aio_context_release(replace_aio_context);
    }
    g_free(s->replaces);
-    bdrv_op_unblock_all(target_bs, s->common.blocker);
    blk_unref(s->target);
+    s->target = NULL;
    block_job_completed(&s->common, data->ret);
    g_free(data);
    bdrv_drained_end(src);
@@ -582,7 +586,7 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)
            sector_num += nb_sectors;
        }

-        mirror_drain(s);
+        mirror_wait_for_all_io(s);
    }

    /* First part, loop on the sectors and initialize the dirty bitmap.  */
@@ -611,12 +615,27 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)
    return 0;
 }

+/* Called when going out of the streaming phase to flush the bulk of the
+ * data to the medium, or just before completing.
+ */
+static int mirror_flush(MirrorBlockJob *s)
+{
+    int ret = blk_flush(s->target);
+    if (ret < 0) {
+        if (mirror_error_action(s, false, -ret) == BLOCK_ERROR_ACTION_REPORT) {
+            s->ret = ret;
+        }
+    }
+    return ret;
+}
+
 static void coroutine_fn mirror_run(void *opaque)
 {
    MirrorBlockJob *s = opaque;
    MirrorExitData *data;
    BlockDriverState *bs = blk_bs(s->common.blk);
    BlockDriverState *target_bs = blk_bs(s->target);
+    bool need_drain = true;
    int64_t length;
    BlockDriverInfo bdi;
    char backing_filename[2]; /* we only need 2 characters because we are only
@@ -722,27 +741,23 @@ static void coroutine_fn mirror_run(void *opaque)
        should_complete = false;
        if (s->in_flight == 0 && cnt == 0) {
            trace_mirror_before_flush(s);
-            ret = blk_flush(s->target);
-            if (ret < 0) {
-                if (mirror_error_action(s, false, -ret) ==
-                    BLOCK_ERROR_ACTION_REPORT) {
-                    goto immediate_exit;
+            if (!s->synced) {
+                if (mirror_flush(s) < 0) {
+                    /* Go check s->ret.  */
+                    continue;
                }
-            } else {
                /* We're out of the streaming phase.  From now on, if the job
                 * is cancelled we will actually complete all pending I/O and
                 * report completion.  This way, block-job-cancel will leave
                 * the target in a consistent state.
                 */
-                if (!s->synced) {
-                    block_job_event_ready(&s->common);
-                    s->synced = true;
-                }
-
-                should_complete = s->should_complete ||
-                    block_job_is_cancelled(&s->common);
-                cnt = bdrv_get_dirty_count(s->dirty_bitmap);
+                block_job_event_ready(&s->common);
+                s->synced = true;
            }
+
+            should_complete = s->should_complete ||
+                block_job_is_cancelled(&s->common);
+            cnt = bdrv_get_dirty_count(s->dirty_bitmap);
        }

        if (cnt == 0 && should_complete) {
@@ -752,11 +767,26 @@ static void coroutine_fn mirror_run(void *opaque)
             * source has dirty data to copy!
             *
             * Note that I/O can be submitted by the guest while
-             * mirror_populate runs.
+             * mirror_populate runs, so pause it now.  Before deciding
+             * whether to switch to target check one last time if I/O has
+             * come in the meanwhile, and if not flush the data to disk.
             */
            trace_mirror_before_drain(s, cnt);
-            bdrv_co_drain(bs);
+
+            bdrv_drained_begin(bs);
            cnt = bdrv_get_dirty_count(s->dirty_bitmap);
+            if (cnt > 0 || mirror_flush(s) < 0) {
+                bdrv_drained_end(bs);
+                continue;
+            }
+
+            /* The two disks are in sync.  Exit and report successful
+             * completion.
+             */
+            assert(QLIST_EMPTY(&bs->tracked_requests));
+            s->common.cancelled = false;
+            need_drain = false;
+            break;
        }

        ret = 0;
@@ -769,13 +799,6 @@ static void coroutine_fn mirror_run(void *opaque)
        } else if (!should_complete) {
            delay_ns = (s->in_flight == 0 && cnt == 0 ? SLICE_TIME : 0);
            block_job_sleep_ns(&s->common, QEMU_CLOCK_REALTIME, delay_ns);
-        } else if (cnt == 0) {
-            /* The two disks are in sync.  Exit and report successful
-             * completion.
-             */
-            assert(QLIST_EMPTY(&bs->tracked_requests));
-            s->common.cancelled = false;
-            break;
        }
        s->last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
    }
@@ -787,7 +810,8 @@ immediate_exit:
         * the target is a copy of the source.
         */
        assert(ret < 0 || (!s->synced && block_job_is_cancelled(&s->common)));
-        mirror_drain(s);
+        assert(need_drain);
+        mirror_wait_for_all_io(s);
    }

    assert(s->in_flight == 0);
@@ -799,9 +823,10 @@ immediate_exit:

    data = g_malloc(sizeof(*data));
    data->ret = ret;
-    /* Before we switch to target in mirror_exit, make sure data doesn't
-     * change. */
-    bdrv_drained_begin(bs);
+
+    if (need_drain) {
+        bdrv_drained_begin(bs);
+    }
    block_job_defer_to_main_loop(&s->common, mirror_exit, data);
 }

@@ -872,14 +897,11 @@ static void mirror_complete(BlockJob *job, Error **errp)
    block_job_enter(&s->common);
 }

-/* There is no matching mirror_resume() because mirror_run() will begin
- * iterating again when the job is resumed.
- */
-static void coroutine_fn mirror_pause(BlockJob *job)
+static void mirror_pause(BlockJob *job)
 {
    MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);

-    mirror_drain(s);
+    mirror_wait_for_all_io(s);
 }

 static void mirror_attached_aio_context(BlockJob *job, AioContext *new_context)
@@ -889,28 +911,47 @@ static void mirror_attached_aio_context(BlockJob *job, AioContext *new_context)
    blk_set_aio_context(s->target, new_context);
 }

+static void mirror_drain(BlockJob *job)
+{
+    MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
+
+    /* Need to keep a reference in case blk_drain triggers execution
+     * of mirror_complete...
+     */
+    if (s->target) {
+        BlockBackend *target = s->target;
+        blk_ref(target);
+        blk_drain(target);
+        blk_unref(target);
+    }
+}
+
 static const BlockJobDriver mirror_job_driver = {
    .instance_size          = sizeof(MirrorBlockJob),
    .job_type               = BLOCK_JOB_TYPE_MIRROR,
    .set_speed              = mirror_set_speed,
+    .start                  = mirror_run,
    .complete               = mirror_complete,
    .pause                  = mirror_pause,
    .attached_aio_context   = mirror_attached_aio_context,
+    .drain                  = mirror_drain,
 };

 static const BlockJobDriver commit_active_job_driver = {
    .instance_size          = sizeof(MirrorBlockJob),
    .job_type               = BLOCK_JOB_TYPE_COMMIT,
    .set_speed              = mirror_set_speed,
+    .start                  = mirror_run,
    .complete               = mirror_complete,
    .pause                  = mirror_pause,
    .attached_aio_context   = mirror_attached_aio_context,
+    .drain                  = mirror_drain,
 };

 static void mirror_start_job(const char *job_id, BlockDriverState *bs,
-                             BlockDriverState *target, const char *replaces,
-                             int64_t speed, uint32_t granularity,
-                             int64_t buf_size,
+                             int creation_flags, BlockDriverState *target,
+                             const char *replaces, int64_t speed,
+                             uint32_t granularity, int64_t buf_size,
                             BlockMirrorBackingMode backing_mode,
                             BlockdevOnError on_source_error,
                             BlockdevOnError on_target_error,
@@ -938,7 +979,8 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs,
        buf_size = DEFAULT_MIRROR_BUF_SIZE;
    }

-    s = block_job_create(job_id, driver, bs, speed, cb, opaque, errp);
+    s = block_job_create(job_id, driver, bs, speed, creation_flags,
+                         cb, opaque, errp);
    if (!s) {
        return;
    }
@@ -967,11 +1009,18 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs,
        return;
    }

-    bdrv_op_block_all(target, s->common.blocker);
+    block_job_add_bdrv(&s->common, target);
+    /* In commit_active_start() all intermediate nodes disappear, so
+     * any jobs in them must be blocked */
+    if (bdrv_chain_contains(bs, target)) {
+        BlockDriverState *iter;
+        for (iter = backing_bs(bs); iter != target; iter = backing_bs(iter)) {
+            block_job_add_bdrv(&s->common, iter);
+        }
+    }

-    s->common.co = qemu_coroutine_create(mirror_run, s);
-    trace_mirror_start(bs, s, s->common.co, opaque);
-    qemu_coroutine_enter(s->common.co);
+    trace_mirror_start(bs, s, opaque);
+    block_job_start(&s->common);
 }

 void mirror_start(const char *job_id, BlockDriverState *bs,
@@ -980,9 +1029,7 @@ void mirror_start(const char *job_id, BlockDriverState *bs,
                  MirrorSyncMode mode, BlockMirrorBackingMode backing_mode,
                  BlockdevOnError on_source_error,
                  BlockdevOnError on_target_error,
-                  bool unmap,
-                  BlockCompletionFunc *cb,
-                  void *opaque, Error **errp)
+                  bool unmap, Error **errp)
 {
    bool is_none_mode;
    BlockDriverState *base;
@@ -993,17 +1040,16 @@ void mirror_start(const char *job_id, BlockDriverState *bs,
    }
    is_none_mode = mode == MIRROR_SYNC_MODE_NONE;
    base = mode == MIRROR_SYNC_MODE_TOP ? backing_bs(bs) : NULL;
-    mirror_start_job(job_id, bs, target, replaces,
+    mirror_start_job(job_id, bs, BLOCK_JOB_DEFAULT, target, replaces,
                     speed, granularity, buf_size, backing_mode,
-                     on_source_error, on_target_error, unmap, cb, opaque, errp,
+                     on_source_error, on_target_error, unmap, NULL, NULL, errp,
                     &mirror_job_driver, is_none_mode, base, false);
 }

 void commit_active_start(const char *job_id, BlockDriverState *bs,
-                         BlockDriverState *base, int64_t speed,
-                         BlockdevOnError on_error,
-                         BlockCompletionFunc *cb,
-                         void *opaque, Error **errp,
+                         BlockDriverState *base, int creation_flags,
+                         int64_t speed, BlockdevOnError on_error,
+                         BlockCompletionFunc *cb, void *opaque, Error **errp,
                         bool auto_complete)
 {
    int64_t length, base_length;
@@ -1042,9 +1088,9 @@ void commit_active_start(const char *job_id, BlockDriverState *bs,
        }
    }

-    mirror_start_job(job_id, bs, base, NULL, speed, 0, 0,
+    mirror_start_job(job_id, bs, creation_flags, base, NULL, speed, 0, 0,
                     MIRROR_LEAVE_BACKING_CHAIN,
-                     on_error, on_error, false, cb, opaque, &local_err,
+                     on_error, on_error, true, cb, opaque, &local_err,
                     &commit_active_job_driver, false, base, auto_complete);
    if (local_err) {
        error_propagate(errp, local_err);
--- a/block/nbd-client.c
+++ b/block/nbd-client.c
@@ -1,6 +1,7 @@
 /*
 * QEMU Block driver for  NBD
 *
+ * Copyright (C) 2016 Red Hat, Inc.
 * Copyright (C) 2008 Bull S.A.S.
 *     Author: Laurent Vivier <Laurent.Vivier@bull.net>
 *
@@ -32,7 +33,7 @@
 #define HANDLE_TO_INDEX(bs, handle) ((handle) ^ ((uint64_t)(intptr_t)bs))
 #define INDEX_TO_HANDLE(bs, index)  ((index)  ^ ((uint64_t)(intptr_t)bs))

-static void nbd_recv_coroutines_enter_all(NbdClientSession *s)
+static void nbd_recv_coroutines_enter_all(NBDClientSession *s)
 {
    int i;

@@ -45,7 +46,7 @@ static void nbd_recv_coroutines_enter_all(NbdClientSession *s)

 static void nbd_teardown_connection(BlockDriverState *bs)
 {
-    NbdClientSession *client = nbd_get_client_session(bs);
+    NBDClientSession *client = nbd_get_client_session(bs);

    if (!client->ioc) { /* Already closed */
        return;
@@ -67,7 +68,7 @@ static void nbd_teardown_connection(BlockDriverState *bs)
 static void nbd_reply_ready(void *opaque)
 {
    BlockDriverState *bs = opaque;
-    NbdClientSession *s = nbd_get_client_session(bs);
+    NBDClientSession *s = nbd_get_client_session(bs);
    uint64_t i;
    int ret;

@@ -115,10 +116,10 @@ static void nbd_restart_write(void *opaque)
 }

 static int nbd_co_send_request(BlockDriverState *bs,
-                               struct nbd_request *request,
+                               NBDRequest *request,
                               QEMUIOVector *qiov)
 {
-    NbdClientSession *s = nbd_get_client_session(bs);
+    NBDClientSession *s = nbd_get_client_session(bs);
    AioContext *aio_context;
    int rc, ret, i;

@@ -144,7 +145,7 @@ static int nbd_co_send_request(BlockDriverState *bs,
    aio_context = bdrv_get_aio_context(bs);

    aio_set_fd_handler(aio_context, s->sioc->fd, false,
-                       nbd_reply_ready, nbd_restart_write, bs);
+                       nbd_reply_ready, nbd_restart_write, NULL, bs);
    if (qiov) {
        qio_channel_set_cork(s->ioc, true);
        rc = nbd_send_request(s->ioc, request);
@@ -160,15 +161,15 @@ static int nbd_co_send_request(BlockDriverState *bs,
        rc = nbd_send_request(s->ioc, request);
    }
    aio_set_fd_handler(aio_context, s->sioc->fd, false,
-                       nbd_reply_ready, NULL, bs);
+                       nbd_reply_ready, NULL, NULL, bs);
    s->send_coroutine = NULL;
    qemu_co_mutex_unlock(&s->send_mutex);
    return rc;
 }

-static void nbd_co_receive_reply(NbdClientSession *s,
-                                 struct nbd_request *request,
-                                 struct nbd_reply *reply,
+static void nbd_co_receive_reply(NBDClientSession *s,
+                                 NBDRequest *request,
+                                 NBDReply *reply,
                                 QEMUIOVector *qiov)
 {
    int ret;
@@ -194,13 +195,13 @@ static void nbd_co_receive_reply(NbdClientSession *s,
    }
 }

-static void nbd_coroutine_start(NbdClientSession *s,
-   struct nbd_request *request)
+static void nbd_coroutine_start(NBDClientSession *s,
+                                NBDRequest *request)
 {
    /* Poor man semaphore.  The free_sema is locked when no other request
     * can be accepted, and unlocked after receiving one reply.  */
-    if (s->in_flight >= MAX_NBD_REQUESTS - 1) {
-        qemu_co_mutex_lock(&s->free_sema);
+    if (s->in_flight == MAX_NBD_REQUESTS) {
+        qemu_co_queue_wait(&s->free_sema);
        assert(s->in_flight < MAX_NBD_REQUESTS);
    }
    s->in_flight++;
@@ -208,26 +209,26 @@ static void nbd_coroutine_start(NbdClientSession *s,
    /* s->recv_coroutine[i] is set as soon as we get the send_lock.  */
 }

-static void nbd_coroutine_end(NbdClientSession *s,
-    struct nbd_request *request)
+static void nbd_coroutine_end(NBDClientSession *s,
+                              NBDRequest *request)
 {
    int i = HANDLE_TO_INDEX(s, request->handle);
    s->recv_coroutine[i] = NULL;
    if (s->in_flight-- == MAX_NBD_REQUESTS) {
-        qemu_co_mutex_unlock(&s->free_sema);
+        qemu_co_queue_next(&s->free_sema);
    }
 }

 int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset,
                         uint64_t bytes, QEMUIOVector *qiov, int flags)
 {
-    NbdClientSession *client = nbd_get_client_session(bs);
-    struct nbd_request request = {
+    NBDClientSession *client = nbd_get_client_session(bs);
+    NBDRequest request = {
        .type = NBD_CMD_READ,
        .from = offset,
        .len = bytes,
    };
-    struct nbd_reply reply;
+    NBDReply reply;
    ssize_t ret;

    assert(bytes <= NBD_MAX_BUFFER_SIZE);
@@ -247,18 +248,18 @@ int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset,
 int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset,
                          uint64_t bytes, QEMUIOVector *qiov, int flags)
 {
-    NbdClientSession *client = nbd_get_client_session(bs);
-    struct nbd_request request = {
+    NBDClientSession *client = nbd_get_client_session(bs);
+    NBDRequest request = {
        .type = NBD_CMD_WRITE,
        .from = offset,
        .len = bytes,
    };
-    struct nbd_reply reply;
+    NBDReply reply;
    ssize_t ret;

    if (flags & BDRV_REQ_FUA) {
        assert(client->nbdflags & NBD_FLAG_SEND_FUA);
-        request.type |= NBD_CMD_FLAG_FUA;
+        request.flags |= NBD_CMD_FLAG_FUA;
    }

    assert(bytes <= NBD_MAX_BUFFER_SIZE);
@@ -274,11 +275,46 @@ int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset,
    return -reply.error;
 }

+int nbd_client_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
+                                int count, BdrvRequestFlags flags)
+{
+    ssize_t ret;
+    NBDClientSession *client = nbd_get_client_session(bs);
+    NBDRequest request = {
+        .type = NBD_CMD_WRITE_ZEROES,
+        .from = offset,
+        .len = count,
+    };
+    NBDReply reply;
+
+    if (!(client->nbdflags & NBD_FLAG_SEND_WRITE_ZEROES)) {
+        return -ENOTSUP;
+    }
+
+    if (flags & BDRV_REQ_FUA) {
+        assert(client->nbdflags & NBD_FLAG_SEND_FUA);
+        request.flags |= NBD_CMD_FLAG_FUA;
+    }
+    if (!(flags & BDRV_REQ_MAY_UNMAP)) {
+        request.flags |= NBD_CMD_FLAG_NO_HOLE;
+    }
+
+    nbd_coroutine_start(client, &request);
+    ret = nbd_co_send_request(bs, &request, NULL);
+    if (ret < 0) {
+        reply.error = -ret;
+    } else {
+        nbd_co_receive_reply(client, &request, &reply, NULL);
+    }
+    nbd_coroutine_end(client, &request);
+    return -reply.error;
+}
+
 int nbd_client_co_flush(BlockDriverState *bs)
 {
-    NbdClientSession *client = nbd_get_client_session(bs);
-    struct nbd_request request = { .type = NBD_CMD_FLUSH };
-    struct nbd_reply reply;
+    NBDClientSession *client = nbd_get_client_session(bs);
+    NBDRequest request = { .type = NBD_CMD_FLUSH };
+    NBDReply reply;
    ssize_t ret;

    if (!(client->nbdflags & NBD_FLAG_SEND_FLUSH)) {
@@ -301,13 +337,13 @@ int nbd_client_co_flush(BlockDriverState *bs)

 int nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset, int count)
 {
-    NbdClientSession *client = nbd_get_client_session(bs);
-    struct nbd_request request = {
+    NBDClientSession *client = nbd_get_client_session(bs);
+    NBDRequest request = {
        .type = NBD_CMD_TRIM,
        .from = offset,
        .len = count,
    };
-    struct nbd_reply reply;
+    NBDReply reply;
    ssize_t ret;

    if (!(client->nbdflags & NBD_FLAG_SEND_TRIM)) {
@@ -330,24 +366,20 @@ void nbd_client_detach_aio_context(BlockDriverState *bs)
 {
    aio_set_fd_handler(bdrv_get_aio_context(bs),
                       nbd_get_client_session(bs)->sioc->fd,
-                       false, NULL, NULL, NULL);
+                       false, NULL, NULL, NULL, NULL);
 }

 void nbd_client_attach_aio_context(BlockDriverState *bs,
                                   AioContext *new_context)
 {
    aio_set_fd_handler(new_context, nbd_get_client_session(bs)->sioc->fd,
-                       false, nbd_reply_ready, NULL, bs);
+                       false, nbd_reply_ready, NULL, NULL, bs);
 }

 void nbd_client_close(BlockDriverState *bs)
 {
-    NbdClientSession *client = nbd_get_client_session(bs);
-    struct nbd_request request = {
-        .type = NBD_CMD_DISC,
-        .from = 0,
-        .len = 0
-    };
+    NBDClientSession *client = nbd_get_client_session(bs);
+    NBDRequest request = { .type = NBD_CMD_DISC };

    if (client->ioc == NULL) {
        return;
@@ -365,7 +397,7 @@ int nbd_client_init(BlockDriverState *bs,
                    const char *hostname,
                    Error **errp)
 {
-    NbdClientSession *client = nbd_get_client_session(bs);
+    NBDClientSession *client = nbd_get_client_session(bs);
    int ret;

    /* NBD handshake */
@@ -383,10 +415,14 @@ int nbd_client_init(BlockDriverState *bs,
    }
    if (client->nbdflags & NBD_FLAG_SEND_FUA) {
        bs->supported_write_flags = BDRV_REQ_FUA;
+        bs->supported_zero_flags |= BDRV_REQ_FUA;
+    }
+    if (client->nbdflags & NBD_FLAG_SEND_WRITE_ZEROES) {
+        bs->supported_zero_flags |= BDRV_REQ_MAY_UNMAP;
    }

    qemu_co_mutex_init(&client->send_mutex);
-    qemu_co_mutex_init(&client->free_sema);
+    qemu_co_queue_init(&client->free_sema);
    client->sioc = sioc;
    object_ref(OBJECT(client->sioc));

--- a/block/nbd-client.h
+++ b/block/nbd-client.h
@@ -17,24 +17,24 @@

 #define MAX_NBD_REQUESTS    16

-typedef struct NbdClientSession {
+typedef struct NBDClientSession {
    QIOChannelSocket *sioc; /* The master data channel */
    QIOChannel *ioc; /* The current I/O channel which may differ (eg TLS) */
    uint16_t nbdflags;
    off_t size;

    CoMutex send_mutex;
-    CoMutex free_sema;
+    CoQueue free_sema;
    Coroutine *send_coroutine;
    int in_flight;

    Coroutine *recv_coroutine[MAX_NBD_REQUESTS];
-    struct nbd_reply reply;
+    NBDReply reply;

    bool is_unix;
-} NbdClientSession;
+} NBDClientSession;

-NbdClientSession *nbd_get_client_session(BlockDriverState *bs);
+NBDClientSession *nbd_get_client_session(BlockDriverState *bs);

 int nbd_client_init(BlockDriverState *bs,
                    QIOChannelSocket *sock,
@@ -48,6 +48,8 @@ int nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset, int count);
 int nbd_client_co_flush(BlockDriverState *bs);
 int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset,
                          uint64_t bytes, QEMUIOVector *qiov, int flags);
+int nbd_client_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
+                                int count, BdrvRequestFlags flags);
 int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset,
                         uint64_t bytes, QEMUIOVector *qiov, int flags);

--- a/block/nbd.c
+++ b/block/nbd.c
@@ -32,6 +32,9 @@
 #include "qemu/uri.h"
 #include "block/block_int.h"
 #include "qemu/module.h"
+#include "qapi-visit.h"
+#include "qapi/qobject-input-visitor.h"
+#include "qapi/qobject-output-visitor.h"
 #include "qapi/qmp/qdict.h"
 #include "qapi/qmp/qjson.h"
 #include "qapi/qmp/qint.h"
@@ -41,10 +44,11 @@
 #define EN_OPTSTR ":exportname="

 typedef struct BDRVNBDState {
-    NbdClientSession client;
+    NBDClientSession client;

    /* For nbd_refresh_filename() */
-    char *path, *host, *port, *export, *tlscredsid;
+    SocketAddress *saddr;
+    char *export, *tlscredsid;
 } BDRVNBDState;

 static int nbd_parse_uri(const char *filename, QDict *options)
@@ -90,9 +94,13 @@ static int nbd_parse_uri(const char *filename, QDict *options)
            ret = -EINVAL;
            goto out;
        }
-        qdict_put(options, "path", qstring_from_str(qp->p[0].value));
+        qdict_put(options, "server.type", qstring_from_str("unix"));
+        qdict_put(options, "server.data.path",
+                  qstring_from_str(qp->p[0].value));
    } else {
        QString *host;
+        char *port_str;
+
        /* nbd[+tcp]://host[:port]/export */
        if (!uri->server) {
            ret = -EINVAL;
@@ -107,12 +115,12 @@ static int nbd_parse_uri(const char *filename, QDict *options)
            host = qstring_from_str(uri->server);
        }

-        qdict_put(options, "host", host);
-        if (uri->port) {
-            char* port_str = g_strdup_printf("%d", uri->port);
-            qdict_put(options, "port", qstring_from_str(port_str));
-            g_free(port_str);
-        }
+        qdict_put(options, "server.type", qstring_from_str("inet"));
+        qdict_put(options, "server.data.host", host);
+
+        port_str = g_strdup_printf("%d", uri->port ?: NBD_DEFAULT_PORT);
+        qdict_put(options, "server.data.port", qstring_from_str(port_str));
+        g_free(port_str);
    }

 out:
@@ -123,6 +131,26 @@ out:
    return ret;
 }

+static bool nbd_has_filename_options_conflict(QDict *options, Error **errp)
+{
+    const QDictEntry *e;
+
+    for (e = qdict_first(options); e; e = qdict_next(options, e)) {
+        if (!strcmp(e->key, "host") ||
+            !strcmp(e->key, "port") ||
+            !strcmp(e->key, "path") ||
+            !strcmp(e->key, "export") ||
+            strstart(e->key, "server.", NULL))
+        {
+            error_setg(errp, "Option '%s' cannot be used with a file name",
+                       e->key);
+            return true;
+        }
+    }
+
+    return false;
+}
+
 static void nbd_parse_filename(const char *filename, QDict *options,
                               Error **errp)
 {
@@ -131,12 +159,7 @@ static void nbd_parse_filename(const char *filename, QDict *options,
    const char *host_spec;
    const char *unixpath;

-    if (qdict_haskey(options, "host")
-        || qdict_haskey(options, "port")
-        || qdict_haskey(options, "path"))
-    {
-        error_setg(errp, "host/port/path and a file name may not be specified "
-                         "at the same time");
+    if (nbd_has_filename_options_conflict(options, errp)) {
        return;
    }

@@ -173,7 +196,8 @@ static void nbd_parse_filename(const char *filename, QDict *options,

    /* are we a UNIX or TCP socket? */
    if (strstart(host_spec, "unix:", &unixpath)) {
-        qdict_put(options, "path", qstring_from_str(unixpath));
+        qdict_put(options, "server.type", qstring_from_str("unix"));
+        qdict_put(options, "server.data.path", qstring_from_str(unixpath));
    } else {
        InetSocketAddress *addr = NULL;

@@ -182,8 +206,9 @@ static void nbd_parse_filename(const char *filename, QDict *options,
            goto out;
        }

-        qdict_put(options, "host", qstring_from_str(addr->host));
-        qdict_put(options, "port", qstring_from_str(addr->port));
+        qdict_put(options, "server.type", qstring_from_str("inet"));
+        qdict_put(options, "server.data.host", qstring_from_str(addr->host));
+        qdict_put(options, "server.data.port", qstring_from_str(addr->port));
        qapi_free_InetSocketAddress(addr);
    }

@@ -191,51 +216,85 @@ out:
    g_free(file);
 }

-static SocketAddress *nbd_config(BDRVNBDState *s, QemuOpts *opts, Error **errp)
+static bool nbd_process_legacy_socket_options(QDict *output_options,
+                                              QemuOpts *legacy_opts,
+                                              Error **errp)
 {
-    SocketAddress *saddr;
+    const char *path = qemu_opt_get(legacy_opts, "path");
+    const char *host = qemu_opt_get(legacy_opts, "host");
+    const char *port = qemu_opt_get(legacy_opts, "port");
+    const QDictEntry *e;

-    s->path = g_strdup(qemu_opt_get(opts, "path"));
-    s->host = g_strdup(qemu_opt_get(opts, "host"));
-
-    if (!s->path == !s->host) {
-        if (s->path) {
-            error_setg(errp, "path and host may not be used at the same time.");
-        } else {
-            error_setg(errp, "one of path and host must be specified.");
-        }
-        return NULL;
+    if (!path && !host && !port) {
+        return true;
    }

-    saddr = g_new0(SocketAddress, 1);
-
-    if (s->path) {
-        UnixSocketAddress *q_unix;
-        saddr->type = SOCKET_ADDRESS_KIND_UNIX;
-        q_unix = saddr->u.q_unix.data = g_new0(UnixSocketAddress, 1);
-        q_unix->path = g_strdup(s->path);
-    } else {
-        InetSocketAddress *inet;
-
-        s->port = g_strdup(qemu_opt_get(opts, "port"));
-
-        saddr->type = SOCKET_ADDRESS_KIND_INET;
-        inet = saddr->u.inet.data = g_new0(InetSocketAddress, 1);
-        inet->host = g_strdup(s->host);
-        inet->port = g_strdup(s->port);
-        if (!inet->port) {
-            inet->port = g_strdup_printf("%d", NBD_DEFAULT_PORT);
+    for (e = qdict_first(output_options); e; e = qdict_next(output_options, e))
+    {
+        if (strstart(e->key, "server.", NULL)) {
+            error_setg(errp, "Cannot use 'server' and path/host/port at the "
+                       "same time");
+            return false;
        }
    }

+    if (path && host) {
+        error_setg(errp, "path and host may not be used at the same time");
+        return false;
+    } else if (path) {
+        if (port) {
+            error_setg(errp, "port may not be used without host");
+            return false;
+        }
+
+        qdict_put(output_options, "server.type", qstring_from_str("unix"));
+        qdict_put(output_options, "server.data.path", qstring_from_str(path));
+    } else if (host) {
+        qdict_put(output_options, "server.type", qstring_from_str("inet"));
+        qdict_put(output_options, "server.data.host", qstring_from_str(host));
+        qdict_put(output_options, "server.data.port",
+                  qstring_from_str(port ?: stringify(NBD_DEFAULT_PORT)));
+    }
+
+    return true;
+}
+
+static SocketAddress *nbd_config(BDRVNBDState *s, QDict *options, Error **errp)
+{
+    SocketAddress *saddr = NULL;
+    QDict *addr = NULL;
+    QObject *crumpled_addr = NULL;
+    Visitor *iv = NULL;
+    Error *local_err = NULL;
+
+    qdict_extract_subqdict(options, &addr, "server.");
+    if (!qdict_size(addr)) {
+        error_setg(errp, "NBD server address missing");
+        goto done;
+    }
+
+    crumpled_addr = qdict_crumple(addr, errp);
+    if (!crumpled_addr) {
+        goto done;
+    }
+
+    iv = qobject_input_visitor_new(crumpled_addr, true);
+    visit_type_SocketAddress(iv, NULL, &saddr, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        goto done;
+    }
+
    s->client.is_unix = saddr->type == SOCKET_ADDRESS_KIND_UNIX;

-    s->export = g_strdup(qemu_opt_get(opts, "export"));
-
+done:
+    QDECREF(addr);
+    qobject_decref(crumpled_addr);
+    visit_free(iv);
    return saddr;
 }

-NbdClientSession *nbd_get_client_session(BlockDriverState *bs)
+NBDClientSession *nbd_get_client_session(BlockDriverState *bs)
 {
    BDRVNBDState *s = bs->opaque;
    return &s->client;
@@ -248,6 +307,7 @@ static QIOChannelSocket *nbd_establish_connection(SocketAddress *saddr,
    Error *local_err = NULL;

    sioc = qio_channel_socket_new();
+    qio_channel_set_name(QIO_CHANNEL(sioc), "nbd-client");

    qio_channel_socket_connect_sync(sioc,
                                    saddr,
@@ -332,7 +392,6 @@ static int nbd_open(BlockDriverState *bs, QDict *options, int flags,
    QemuOpts *opts = NULL;
    Error *local_err = NULL;
    QIOChannelSocket *sioc = NULL;
-    SocketAddress *saddr = NULL;
    QCryptoTLSCreds *tlscreds = NULL;
    const char *hostname = NULL;
    int ret = -EINVAL;
@@ -344,12 +403,19 @@ static int nbd_open(BlockDriverState *bs, QDict *options, int flags,
        goto error;
    }

-    /* Pop the config into our state object. Exit if invalid. */
-    saddr = nbd_config(s, opts, errp);
-    if (!saddr) {
+    /* Translate @host, @port, and @path to a SocketAddress */
+    if (!nbd_process_legacy_socket_options(options, opts, errp)) {
        goto error;
    }

+    /* Pop the config into our state object. Exit if invalid. */
+    s->saddr = nbd_config(s, options, errp);
+    if (!s->saddr) {
+        goto error;
+    }
+
+    s->export = g_strdup(qemu_opt_get(opts, "export"));
+
    s->tlscredsid = g_strdup(qemu_opt_get(opts, "tls-creds"));
    if (s->tlscredsid) {
        tlscreds = nbd_get_tls_creds(s->tlscredsid, errp);
@@ -357,17 +423,17 @@ static int nbd_open(BlockDriverState *bs, QDict *options, int flags,
            goto error;
        }

-        if (saddr->type != SOCKET_ADDRESS_KIND_INET) {
+        if (s->saddr->type != SOCKET_ADDRESS_KIND_INET) {
            error_setg(errp, "TLS only supported over IP sockets");
            goto error;
        }
-        hostname = saddr->u.inet.data->host;
+        hostname = s->saddr->u.inet.data->host;
    }

    /* establish TCP connection, return error if it fails
     * TODO: Configurable retry-until-timeout behaviour.
     */
-    sioc = nbd_establish_connection(saddr, errp);
+    sioc = nbd_establish_connection(s->saddr, errp);
    if (!sioc) {
        ret = -ECONNREFUSED;
        goto error;
@@ -384,13 +450,10 @@ static int nbd_open(BlockDriverState *bs, QDict *options, int flags,
        object_unref(OBJECT(tlscreds));
    }
    if (ret < 0) {
-        g_free(s->path);
-        g_free(s->host);
-        g_free(s->port);
+        qapi_free_SocketAddress(s->saddr);
        g_free(s->export);
        g_free(s->tlscredsid);
    }
-    qapi_free_SocketAddress(saddr);
    qemu_opts_del(opts);
    return ret;
 }
@@ -403,6 +466,7 @@ static int nbd_co_flush(BlockDriverState *bs)
 static void nbd_refresh_limits(BlockDriverState *bs, Error **errp)
 {
    bs->bl.max_pdiscard = NBD_MAX_BUFFER_SIZE;
+    bs->bl.max_pwrite_zeroes = NBD_MAX_BUFFER_SIZE;
    bs->bl.max_transfer = NBD_MAX_BUFFER_SIZE;
 }

@@ -412,9 +476,7 @@ static void nbd_close(BlockDriverState *bs)

    nbd_client_close(bs);

-    g_free(s->path);
-    g_free(s->host);
-    g_free(s->port);
+    qapi_free_SocketAddress(s->saddr);
    g_free(s->export);
    g_free(s->tlscredsid);
 }
@@ -441,45 +503,52 @@ static void nbd_refresh_filename(BlockDriverState *bs, QDict *options)
 {
    BDRVNBDState *s = bs->opaque;
    QDict *opts = qdict_new();
+    QObject *saddr_qdict;
+    Visitor *ov;
+    const char *host = NULL, *port = NULL, *path = NULL;

-    qdict_put_obj(opts, "driver", QOBJECT(qstring_from_str("nbd")));
-
-    if (s->path && s->export) {
-        snprintf(bs->exact_filename, sizeof(bs->exact_filename),
-                 "nbd+unix:///%s?socket=%s", s->export, s->path);
-    } else if (s->path && !s->export) {
-        snprintf(bs->exact_filename, sizeof(bs->exact_filename),
-                 "nbd+unix://?socket=%s", s->path);
-    } else if (!s->path && s->export && s->port) {
-        snprintf(bs->exact_filename, sizeof(bs->exact_filename),
-                 "nbd://%s:%s/%s", s->host, s->port, s->export);
-    } else if (!s->path && s->export && !s->port) {
-        snprintf(bs->exact_filename, sizeof(bs->exact_filename),
-                 "nbd://%s/%s", s->host, s->export);
-    } else if (!s->path && !s->export && s->port) {
-        snprintf(bs->exact_filename, sizeof(bs->exact_filename),
-                 "nbd://%s:%s", s->host, s->port);
-    } else if (!s->path && !s->export && !s->port) {
-        snprintf(bs->exact_filename, sizeof(bs->exact_filename),
-                 "nbd://%s", s->host);
+    if (s->saddr->type == SOCKET_ADDRESS_KIND_INET) {
+        const InetSocketAddress *inet = s->saddr->u.inet.data;
+        if (!inet->has_ipv4 && !inet->has_ipv6 && !inet->has_to) {
+            host = inet->host;
+            port = inet->port;
+        }
+    } else if (s->saddr->type == SOCKET_ADDRESS_KIND_UNIX) {
+        path = s->saddr->u.q_unix.data->path;
    }

-    if (s->path) {
-        qdict_put_obj(opts, "path", QOBJECT(qstring_from_str(s->path)));
-    } else if (s->port) {
-        qdict_put_obj(opts, "host", QOBJECT(qstring_from_str(s->host)));
-        qdict_put_obj(opts, "port", QOBJECT(qstring_from_str(s->port)));
-    } else {
-        qdict_put_obj(opts, "host", QOBJECT(qstring_from_str(s->host)));
+    qdict_put(opts, "driver", qstring_from_str("nbd"));
+
+    if (path && s->export) {
+        snprintf(bs->exact_filename, sizeof(bs->exact_filename),
+                 "nbd+unix:///%s?socket=%s", s->export, path);
+    } else if (path && !s->export) {
+        snprintf(bs->exact_filename, sizeof(bs->exact_filename),
+                 "nbd+unix://?socket=%s", path);
+    } else if (host && s->export) {
+        snprintf(bs->exact_filename, sizeof(bs->exact_filename),
+                 "nbd://%s:%s/%s", host, port, s->export);
+    } else if (host && !s->export) {
+        snprintf(bs->exact_filename, sizeof(bs->exact_filename),
+                 "nbd://%s:%s", host, port);
    }
+
+    ov = qobject_output_visitor_new(&saddr_qdict);
+    visit_type_SocketAddress(ov, NULL, &s->saddr, &error_abort);
+    visit_complete(ov, &saddr_qdict);
+    visit_free(ov);
+    assert(qobject_type(saddr_qdict) == QTYPE_QDICT);
+
+    qdict_put_obj(opts, "server", saddr_qdict);
+
    if (s->export) {
-        qdict_put_obj(opts, "export", QOBJECT(qstring_from_str(s->export)));
+        qdict_put(opts, "export", qstring_from_str(s->export));
    }
    if (s->tlscredsid) {
-        qdict_put_obj(opts, "tls-creds",
-                      QOBJECT(qstring_from_str(s->tlscredsid)));
+        qdict_put(opts, "tls-creds", qstring_from_str(s->tlscredsid));
    }

+    qdict_flatten(opts);
    bs->full_open_options = opts;
 }

@@ -491,6 +560,7 @@ static BlockDriver bdrv_nbd = {
    .bdrv_file_open             = nbd_open,
    .bdrv_co_preadv             = nbd_client_co_preadv,
    .bdrv_co_pwritev            = nbd_client_co_pwritev,
+    .bdrv_co_pwrite_zeroes      = nbd_client_co_pwrite_zeroes,
    .bdrv_close                 = nbd_close,
    .bdrv_co_flush_to_os        = nbd_co_flush,
    .bdrv_co_pdiscard           = nbd_client_co_pdiscard,
@@ -509,6 +579,7 @@ static BlockDriver bdrv_nbd_tcp = {
    .bdrv_file_open             = nbd_open,
    .bdrv_co_preadv             = nbd_client_co_preadv,
    .bdrv_co_pwritev            = nbd_client_co_pwritev,
+    .bdrv_co_pwrite_zeroes      = nbd_client_co_pwrite_zeroes,
    .bdrv_close                 = nbd_close,
    .bdrv_co_flush_to_os        = nbd_co_flush,
    .bdrv_co_pdiscard           = nbd_client_co_pdiscard,
@@ -527,6 +598,7 @@ static BlockDriver bdrv_nbd_unix = {
    .bdrv_file_open             = nbd_open,
    .bdrv_co_preadv             = nbd_client_co_preadv,
    .bdrv_co_pwritev            = nbd_client_co_pwritev,
+    .bdrv_co_pwrite_zeroes      = nbd_client_co_pwrite_zeroes,
    .bdrv_close                 = nbd_close,
    .bdrv_co_flush_to_os        = nbd_co_flush,
    .bdrv_co_pdiscard           = nbd_client_co_pdiscard,
--- a/block/nfs.c
+++ b/block/nfs.c
@@ -35,8 +35,15 @@
 #include "qemu/uri.h"
 #include "qemu/cutils.h"
 #include "sysemu/sysemu.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/qmp/qint.h"
+#include "qapi/qmp/qstring.h"
+#include "qapi-visit.h"
+#include "qapi/qobject-input-visitor.h"
+#include "qapi/qobject-output-visitor.h"
 #include <nfsc/libnfs.h>

+
 #define QEMU_NFS_MAX_READAHEAD_SIZE 1048576
 #define QEMU_NFS_MAX_PAGECACHE_SIZE (8388608 / NFS_BLKSIZE)
 #define QEMU_NFS_MAX_DEBUG_LEVEL 2
@@ -49,9 +56,13 @@ typedef struct NFSClient {
    AioContext *aio_context;
    blkcnt_t st_blocks;
    bool cache_used;
+    NFSServer *server;
+    char *path;
+    int64_t uid, gid, tcp_syncnt, readahead, pagecache, debug;
 } NFSClient;

 typedef struct NFSRPC {
+    BlockDriverState *bs;
    int ret;
    int complete;
    QEMUIOVector *iov;
@@ -60,6 +71,122 @@ typedef struct NFSRPC {
    NFSClient *client;
 } NFSRPC;

+static int nfs_parse_uri(const char *filename, QDict *options, Error **errp)
+{
+    URI *uri = NULL;
+    QueryParams *qp = NULL;
+    int ret = -EINVAL, i;
+
+    uri = uri_parse(filename);
+    if (!uri) {
+        error_setg(errp, "Invalid URI specified");
+        goto out;
+    }
+    if (strcmp(uri->scheme, "nfs") != 0) {
+        error_setg(errp, "URI scheme must be 'nfs'");
+        goto out;
+    }
+
+    if (!uri->server) {
+        error_setg(errp, "missing hostname in URI");
+        goto out;
+    }
+
+    if (!uri->path) {
+        error_setg(errp, "missing file path in URI");
+        goto out;
+    }
+
+    qp = query_params_parse(uri->query);
+    if (!qp) {
+        error_setg(errp, "could not parse query parameters");
+        goto out;
+    }
+
+    qdict_put(options, "server.host", qstring_from_str(uri->server));
+    qdict_put(options, "server.type", qstring_from_str("inet"));
+    qdict_put(options, "path", qstring_from_str(uri->path));
+
+    for (i = 0; i < qp->n; i++) {
+        if (!qp->p[i].value) {
+            error_setg(errp, "Value for NFS parameter expected: %s",
+                       qp->p[i].name);
+            goto out;
+        }
+        if (parse_uint_full(qp->p[i].value, NULL, 0)) {
+            error_setg(errp, "Illegal value for NFS parameter: %s",
+                       qp->p[i].name);
+            goto out;
+        }
+        if (!strcmp(qp->p[i].name, "uid")) {
+            qdict_put(options, "user",
+                      qstring_from_str(qp->p[i].value));
+        } else if (!strcmp(qp->p[i].name, "gid")) {
+            qdict_put(options, "group",
+                      qstring_from_str(qp->p[i].value));
+        } else if (!strcmp(qp->p[i].name, "tcp-syncnt")) {
+            qdict_put(options, "tcp-syn-count",
+                      qstring_from_str(qp->p[i].value));
+        } else if (!strcmp(qp->p[i].name, "readahead")) {
+            qdict_put(options, "readahead-size",
+                      qstring_from_str(qp->p[i].value));
+        } else if (!strcmp(qp->p[i].name, "pagecache")) {
+            qdict_put(options, "page-cache-size",
+                      qstring_from_str(qp->p[i].value));
+        } else if (!strcmp(qp->p[i].name, "debug")) {
+            qdict_put(options, "debug",
+                      qstring_from_str(qp->p[i].value));
+        } else {
+            error_setg(errp, "Unknown NFS parameter name: %s",
+                       qp->p[i].name);
+            goto out;
+        }
+    }
+    ret = 0;
+out:
+    if (qp) {
+        query_params_free(qp);
+    }
+    if (uri) {
+        uri_free(uri);
+    }
+    return ret;
+}
+
+static bool nfs_has_filename_options_conflict(QDict *options, Error **errp)
+{
+    const QDictEntry *qe;
+
+    for (qe = qdict_first(options); qe; qe = qdict_next(options, qe)) {
+        if (!strcmp(qe->key, "host") ||
+            !strcmp(qe->key, "path") ||
+            !strcmp(qe->key, "user") ||
+            !strcmp(qe->key, "group") ||
+            !strcmp(qe->key, "tcp-syn-count") ||
+            !strcmp(qe->key, "readahead-size") ||
+            !strcmp(qe->key, "page-cache-size") ||
+            !strcmp(qe->key, "debug") ||
+            strstart(qe->key, "server.", NULL))
+        {
+            error_setg(errp, "Option %s cannot be used with a filename",
+                       qe->key);
+            return true;
+        }
+    }
+
+    return false;
+}
+
+static void nfs_parse_filename(const char *filename, QDict *options,
+                               Error **errp)
+{
+    if (nfs_has_filename_options_conflict(options, errp)) {
+        return;
+    }
+
+    nfs_parse_uri(filename, options, errp);
+}
+
 static void nfs_process_read(void *arg);
 static void nfs_process_write(void *arg);

@@ -70,7 +197,8 @@ static void nfs_set_events(NFSClient *client)
        aio_set_fd_handler(client->aio_context, nfs_get_fd(client->context),
                           false,
                           (ev & POLLIN) ? nfs_process_read : NULL,
-                           (ev & POLLOUT) ? nfs_process_write : NULL, client);
+                           (ev & POLLOUT) ? nfs_process_write : NULL,
+                           NULL, client);

    }
    client->events = ev;
@@ -90,11 +218,12 @@ static void nfs_process_write(void *arg)
    nfs_set_events(client);
 }

-static void nfs_co_init_task(NFSClient *client, NFSRPC *task)
+static void nfs_co_init_task(BlockDriverState *bs, NFSRPC *task)
 {
    *task = (NFSRPC) {
        .co             = qemu_coroutine_self(),
-        .client         = client,
+        .bs             = bs,
+        .client         = bs->opaque,
    };
 }

@@ -111,6 +240,7 @@ nfs_co_generic_cb(int ret, struct nfs_context *nfs, void *data,
 {
    NFSRPC *task = private_data;
    task->ret = ret;
+    assert(!task->st);
    if (task->ret > 0 && task->iov) {
        if (task->ret <= task->iov->size) {
            qemu_iovec_from_buf(task->iov, 0, data, task->ret);
@@ -118,18 +248,11 @@ nfs_co_generic_cb(int ret, struct nfs_context *nfs, void *data,
            task->ret = -EIO;
        }
    }
-    if (task->ret == 0 && task->st) {
-        memcpy(task->st, data, sizeof(struct stat));
-    }
    if (task->ret < 0) {
        error_report("NFS Error: %s", nfs_get_error(nfs));
    }
-    if (task->co) {
-        aio_bh_schedule_oneshot(task->client->aio_context,
-                                nfs_co_generic_bh_cb, task);
-    } else {
-        task->complete = 1;
-    }
+    aio_bh_schedule_oneshot(task->client->aio_context,
+                            nfs_co_generic_bh_cb, task);
 }

 static int coroutine_fn nfs_co_readv(BlockDriverState *bs,
@@ -139,7 +262,7 @@ static int coroutine_fn nfs_co_readv(BlockDriverState *bs,
    NFSClient *client = bs->opaque;
    NFSRPC task;

-    nfs_co_init_task(client, &task);
+    nfs_co_init_task(bs, &task);
    task.iov = iov;

    if (nfs_pread_async(client->context, client->fh,
@@ -149,8 +272,8 @@ static int coroutine_fn nfs_co_readv(BlockDriverState *bs,
        return -ENOMEM;
    }

+    nfs_set_events(client);
    while (!task.complete) {
-        nfs_set_events(client);
        qemu_coroutine_yield();
    }

@@ -174,7 +297,7 @@ static int coroutine_fn nfs_co_writev(BlockDriverState *bs,
    NFSRPC task;
    char *buf = NULL;

-    nfs_co_init_task(client, &task);
+    nfs_co_init_task(bs, &task);

    buf = g_try_malloc(nb_sectors * BDRV_SECTOR_SIZE);
    if (nb_sectors && buf == NULL) {
@@ -191,8 +314,8 @@ static int coroutine_fn nfs_co_writev(BlockDriverState *bs,
        return -ENOMEM;
    }

+    nfs_set_events(client);
    while (!task.complete) {
-        nfs_set_events(client);
        qemu_coroutine_yield();
    }

@@ -210,30 +333,59 @@ static int coroutine_fn nfs_co_flush(BlockDriverState *bs)
    NFSClient *client = bs->opaque;
    NFSRPC task;

-    nfs_co_init_task(client, &task);
+    nfs_co_init_task(bs, &task);

    if (nfs_fsync_async(client->context, client->fh, nfs_co_generic_cb,
                        &task) != 0) {
        return -ENOMEM;
    }

+    nfs_set_events(client);
    while (!task.complete) {
-        nfs_set_events(client);
        qemu_coroutine_yield();
    }

    return task.ret;
 }

-/* TODO Convert to fine grained options */
 static QemuOptsList runtime_opts = {
    .name = "nfs",
    .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
    .desc = {
        {
-            .name = "filename",
+            .name = "path",
            .type = QEMU_OPT_STRING,
-            .help = "URL to the NFS file",
+            .help = "Path of the image on the host",
+        },
+        {
+            .name = "uid",
+            .type = QEMU_OPT_NUMBER,
+            .help = "UID value to use when talking to the server",
+        },
+        {
+            .name = "gid",
+            .type = QEMU_OPT_NUMBER,
+            .help = "GID value to use when talking to the server",
+        },
+        {
+            .name = "tcp-syncnt",
+            .type = QEMU_OPT_NUMBER,
+            .help = "Number of SYNs to send during the session establish",
+        },
+        {
+            .name = "readahead",
+            .type = QEMU_OPT_NUMBER,
+            .help = "Set the readahead size in bytes",
+        },
+        {
+            .name = "pagecache",
+            .type = QEMU_OPT_NUMBER,
+            .help = "Set the pagecache size in bytes",
+        },
+        {
+            .name = "debug",
+            .type = QEMU_OPT_NUMBER,
+            .help = "Set the NFS debug level (max 2)",
        },
        { /* end of list */ }
    },
@@ -244,7 +396,7 @@ static void nfs_detach_aio_context(BlockDriverState *bs)
    NFSClient *client = bs->opaque;

    aio_set_fd_handler(client->aio_context, nfs_get_fd(client->context),
-                       false, NULL, NULL, NULL);
+                       false, NULL, NULL, NULL, NULL);
    client->events = 0;
 }

@@ -264,7 +416,7 @@ static void nfs_client_close(NFSClient *client)
            nfs_close(client->context, client->fh);
        }
        aio_set_fd_handler(client->aio_context, nfs_get_fd(client->context),
-                           false, NULL, NULL, NULL);
+                           false, NULL, NULL, NULL, NULL);
        nfs_destroy_context(client->context);
    }
    memset(client, 0, sizeof(NFSClient));
@@ -276,25 +428,65 @@ static void nfs_file_close(BlockDriverState *bs)
    nfs_client_close(client);
 }

-static int64_t nfs_client_open(NFSClient *client, const char *filename,
+static NFSServer *nfs_config(QDict *options, Error **errp)
+{
+    NFSServer *server = NULL;
+    QDict *addr = NULL;
+    QObject *crumpled_addr = NULL;
+    Visitor *iv = NULL;
+    Error *local_error = NULL;
+
+    qdict_extract_subqdict(options, &addr, "server.");
+    if (!qdict_size(addr)) {
+        error_setg(errp, "NFS server address missing");
+        goto out;
+    }
+
+    crumpled_addr = qdict_crumple(addr, errp);
+    if (!crumpled_addr) {
+        goto out;
+    }
+
+    iv = qobject_input_visitor_new(crumpled_addr, true);
+    visit_type_NFSServer(iv, NULL, &server, &local_error);
+    if (local_error) {
+        error_propagate(errp, local_error);
+        goto out;
+    }
+
+out:
+    QDECREF(addr);
+    qobject_decref(crumpled_addr);
+    visit_free(iv);
+    return server;
+}
+
+
+static int64_t nfs_client_open(NFSClient *client, QDict *options,
                               int flags, Error **errp, int open_flags)
 {
-    int ret = -EINVAL, i;
+    int ret = -EINVAL;
+    QemuOpts *opts = NULL;
+    Error *local_err = NULL;
    struct stat st;
-    URI *uri;
-    QueryParams *qp = NULL;
    char *file = NULL, *strp = NULL;

-    uri = uri_parse(filename);
-    if (!uri) {
-        error_setg(errp, "Invalid URL specified");
+    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
+    qemu_opts_absorb_qdict(opts, options, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        ret = -EINVAL;
        goto fail;
    }
-    if (!uri->server) {
-        error_setg(errp, "Invalid URL specified");
+
+    client->path = g_strdup(qemu_opt_get(opts, "path"));
+    if (!client->path) {
+        ret = -EINVAL;
+        error_setg(errp, "No path was specified");
        goto fail;
    }
-    strp = strrchr(uri->path, '/');
+
+    strp = strrchr(client->path, '/');
    if (strp == NULL) {
        error_setg(errp, "Invalid URL specified");
        goto fail;
@@ -302,85 +494,89 @@ static int64_t nfs_client_open(NFSClient *client, const char *filename,
    file = g_strdup(strp);
    *strp = 0;

+    /* Pop the config into our state object, Exit if invalid */
+    client->server = nfs_config(options, errp);
+    if (!client->server) {
+        ret = -EINVAL;
+        goto fail;
+    }
+
    client->context = nfs_init_context();
    if (client->context == NULL) {
        error_setg(errp, "Failed to init NFS context");
        goto fail;
    }

-    qp = query_params_parse(uri->query);
-    for (i = 0; i < qp->n; i++) {
-        unsigned long long val;
-        if (!qp->p[i].value) {
-            error_setg(errp, "Value for NFS parameter expected: %s",
-                       qp->p[i].name);
-            goto fail;
-        }
-        if (parse_uint_full(qp->p[i].value, &val, 0)) {
-            error_setg(errp, "Illegal value for NFS parameter: %s",
-                       qp->p[i].name);
-            goto fail;
-        }
-        if (!strcmp(qp->p[i].name, "uid")) {
-            nfs_set_uid(client->context, val);
-        } else if (!strcmp(qp->p[i].name, "gid")) {
-            nfs_set_gid(client->context, val);
-        } else if (!strcmp(qp->p[i].name, "tcp-syncnt")) {
-            nfs_set_tcp_syncnt(client->context, val);
-#ifdef LIBNFS_FEATURE_READAHEAD
-        } else if (!strcmp(qp->p[i].name, "readahead")) {
-            if (open_flags & BDRV_O_NOCACHE) {
-                error_setg(errp, "Cannot enable NFS readahead "
-                                 "if cache.direct = on");
-                goto fail;
-            }
-            if (val > QEMU_NFS_MAX_READAHEAD_SIZE) {
-                error_report("NFS Warning: Truncating NFS readahead"
-                             " size to %d", QEMU_NFS_MAX_READAHEAD_SIZE);
-                val = QEMU_NFS_MAX_READAHEAD_SIZE;
-            }
-            nfs_set_readahead(client->context, val);
-#ifdef LIBNFS_FEATURE_PAGECACHE
-            nfs_set_pagecache_ttl(client->context, 0);
-#endif
-            client->cache_used = true;
-#endif
-#ifdef LIBNFS_FEATURE_PAGECACHE
-            nfs_set_pagecache_ttl(client->context, 0);
-        } else if (!strcmp(qp->p[i].name, "pagecache")) {
-            if (open_flags & BDRV_O_NOCACHE) {
-                error_setg(errp, "Cannot enable NFS pagecache "
-                                 "if cache.direct = on");
-                goto fail;
-            }
-            if (val > QEMU_NFS_MAX_PAGECACHE_SIZE) {
-                error_report("NFS Warning: Truncating NFS pagecache"
-                             " size to %d pages", QEMU_NFS_MAX_PAGECACHE_SIZE);
-                val = QEMU_NFS_MAX_PAGECACHE_SIZE;
-            }
-            nfs_set_pagecache(client->context, val);
-            nfs_set_pagecache_ttl(client->context, 0);
-            client->cache_used = true;
-#endif
-#ifdef LIBNFS_FEATURE_DEBUG
-        } else if (!strcmp(qp->p[i].name, "debug")) {
-            /* limit the maximum debug level to avoid potential flooding
-             * of our log files. */
-            if (val > QEMU_NFS_MAX_DEBUG_LEVEL) {
-                error_report("NFS Warning: Limiting NFS debug level"
-                             " to %d", QEMU_NFS_MAX_DEBUG_LEVEL);
-                val = QEMU_NFS_MAX_DEBUG_LEVEL;
-            }
-            nfs_set_debug(client->context, val);
-#endif
-        } else {
-            error_setg(errp, "Unknown NFS parameter name: %s",
-                       qp->p[i].name);
-            goto fail;
-        }
+    if (qemu_opt_get(opts, "uid")) {
+        client->uid = qemu_opt_get_number(opts, "uid", 0);
+        nfs_set_uid(client->context, client->uid);
    }

-    ret = nfs_mount(client->context, uri->server, uri->path);
+    if (qemu_opt_get(opts, "gid")) {
+        client->gid = qemu_opt_get_number(opts, "gid", 0);
+        nfs_set_gid(client->context, client->gid);
+    }
+
+    if (qemu_opt_get(opts, "tcp-syncnt")) {
+        client->tcp_syncnt = qemu_opt_get_number(opts, "tcp-syncnt", 0);
+        nfs_set_tcp_syncnt(client->context, client->tcp_syncnt);
+    }
+
+#ifdef LIBNFS_FEATURE_READAHEAD
+    if (qemu_opt_get(opts, "readahead")) {
+        if (open_flags & BDRV_O_NOCACHE) {
+            error_setg(errp, "Cannot enable NFS readahead "
+                             "if cache.direct = on");
+            goto fail;
+        }
+        client->readahead = qemu_opt_get_number(opts, "readahead", 0);
+        if (client->readahead > QEMU_NFS_MAX_READAHEAD_SIZE) {
+            error_report("NFS Warning: Truncating NFS readahead "
+                         "size to %d", QEMU_NFS_MAX_READAHEAD_SIZE);
+            client->readahead = QEMU_NFS_MAX_READAHEAD_SIZE;
+        }
+        nfs_set_readahead(client->context, client->readahead);
+#ifdef LIBNFS_FEATURE_PAGECACHE
+        nfs_set_pagecache_ttl(client->context, 0);
+#endif
+        client->cache_used = true;
+    }
+#endif
+
+#ifdef LIBNFS_FEATURE_PAGECACHE
+    if (qemu_opt_get(opts, "pagecache")) {
+        if (open_flags & BDRV_O_NOCACHE) {
+            error_setg(errp, "Cannot enable NFS pagecache "
+                             "if cache.direct = on");
+            goto fail;
+        }
+        client->pagecache = qemu_opt_get_number(opts, "pagecache", 0);
+        if (client->pagecache > QEMU_NFS_MAX_PAGECACHE_SIZE) {
+            error_report("NFS Warning: Truncating NFS pagecache "
+                         "size to %d pages", QEMU_NFS_MAX_PAGECACHE_SIZE);
+            client->pagecache = QEMU_NFS_MAX_PAGECACHE_SIZE;
+        }
+        nfs_set_pagecache(client->context, client->pagecache);
+        nfs_set_pagecache_ttl(client->context, 0);
+        client->cache_used = true;
+    }
+#endif
+
+#ifdef LIBNFS_FEATURE_DEBUG
+    if (qemu_opt_get(opts, "debug")) {
+        client->debug = qemu_opt_get_number(opts, "debug", 0);
+        /* limit the maximum debug level to avoid potential flooding
+         * of our log files. */
+        if (client->debug > QEMU_NFS_MAX_DEBUG_LEVEL) {
+            error_report("NFS Warning: Limiting NFS debug level "
+                         "to %d", QEMU_NFS_MAX_DEBUG_LEVEL);
+            client->debug = QEMU_NFS_MAX_DEBUG_LEVEL;
+        }
+        nfs_set_debug(client->context, client->debug);
+    }
+#endif
+
+    ret = nfs_mount(client->context, client->server->host, client->path);
    if (ret < 0) {
        error_setg(errp, "Failed to mount nfs share: %s",
                   nfs_get_error(client->context));
@@ -413,14 +609,13 @@ static int64_t nfs_client_open(NFSClient *client, const char *filename,
    ret = DIV_ROUND_UP(st.st_size, BDRV_SECTOR_SIZE);
    client->st_blocks = st.st_blocks;
    client->has_zero_init = S_ISREG(st.st_mode);
+    *strp = '/';
    goto out;
+
 fail:
    nfs_client_close(client);
 out:
-    if (qp) {
-        query_params_free(qp);
-    }
-    uri_free(uri);
+    qemu_opts_del(opts);
    g_free(file);
    return ret;
 }
@@ -429,28 +624,17 @@ static int nfs_file_open(BlockDriverState *bs, QDict *options, int flags,
                         Error **errp) {
    NFSClient *client = bs->opaque;
    int64_t ret;
-    QemuOpts *opts;
-    Error *local_err = NULL;

    client->aio_context = bdrv_get_aio_context(bs);

-    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
-    qemu_opts_absorb_qdict(opts, options, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        ret = -EINVAL;
-        goto out;
-    }
-    ret = nfs_client_open(client, qemu_opt_get(opts, "filename"),
+    ret = nfs_client_open(client, options,
                          (flags & BDRV_O_RDWR) ? O_RDWR : O_RDONLY,
                          errp, bs->open_flags);
    if (ret < 0) {
-        goto out;
+        return ret;
    }
    bs->total_sectors = ret;
    ret = 0;
-out:
-    qemu_opts_del(opts);
    return ret;
 }

@@ -472,6 +656,7 @@ static int nfs_file_create(const char *url, QemuOpts *opts, Error **errp)
    int ret = 0;
    int64_t total_size = 0;
    NFSClient *client = g_new0(NFSClient, 1);
+    QDict *options = NULL;

    client->aio_context = qemu_get_aio_context();

@@ -479,13 +664,20 @@ static int nfs_file_create(const char *url, QemuOpts *opts, Error **errp)
    total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
                          BDRV_SECTOR_SIZE);

-    ret = nfs_client_open(client, url, O_CREAT, errp, 0);
+    options = qdict_new();
+    ret = nfs_parse_uri(url, options, errp);
+    if (ret < 0) {
+        goto out;
+    }
+
+    ret = nfs_client_open(client, options, O_CREAT, errp, 0);
    if (ret < 0) {
        goto out;
    }
    ret = nfs_ftruncate(client->context, client->fh, total_size);
    nfs_client_close(client);
 out:
+    QDECREF(options);
    g_free(client);
    return ret;
 }
@@ -496,6 +688,22 @@ static int nfs_has_zero_init(BlockDriverState *bs)
    return client->has_zero_init;
 }

+static void
+nfs_get_allocated_file_size_cb(int ret, struct nfs_context *nfs, void *data,
+                               void *private_data)
+{
+    NFSRPC *task = private_data;
+    task->ret = ret;
+    if (task->ret == 0) {
+        memcpy(task->st, data, sizeof(struct stat));
+    }
+    if (task->ret < 0) {
+        error_report("NFS Error: %s", nfs_get_error(nfs));
+    }
+    task->complete = 1;
+    bdrv_wakeup(task->bs);
+}
+
 static int64_t nfs_get_allocated_file_size(BlockDriverState *bs)
 {
    NFSClient *client = bs->opaque;
@@ -507,16 +715,15 @@ static int64_t nfs_get_allocated_file_size(BlockDriverState *bs)
        return client->st_blocks * 512;
    }

+    task.bs = bs;
    task.st = &st;
-    if (nfs_fstat_async(client->context, client->fh, nfs_co_generic_cb,
+    if (nfs_fstat_async(client->context, client->fh, nfs_get_allocated_file_size_cb,
                        &task) != 0) {
        return -ENOMEM;
    }

-    while (!task.complete) {
-        nfs_set_events(client);
-        aio_poll(client->aio_context, true);
-    }
+    nfs_set_events(client);
+    BDRV_POLL_WHILE(bs, !task.complete);

    return (task.ret < 0 ? task.ret : st.st_blocks * 512);
 }
@@ -561,6 +768,67 @@ static int nfs_reopen_prepare(BDRVReopenState *state,
    return 0;
 }

+static void nfs_refresh_filename(BlockDriverState *bs, QDict *options)
+{
+    NFSClient *client = bs->opaque;
+    QDict *opts = qdict_new();
+    QObject *server_qdict;
+    Visitor *ov;
+
+    qdict_put(opts, "driver", qstring_from_str("nfs"));
+
+    if (client->uid && !client->gid) {
+        snprintf(bs->exact_filename, sizeof(bs->exact_filename),
+                 "nfs://%s%s?uid=%" PRId64, client->server->host, client->path,
+                 client->uid);
+    } else if (!client->uid && client->gid) {
+        snprintf(bs->exact_filename, sizeof(bs->exact_filename),
+                 "nfs://%s%s?gid=%" PRId64, client->server->host, client->path,
+                 client->gid);
+    } else if (client->uid && client->gid) {
+        snprintf(bs->exact_filename, sizeof(bs->exact_filename),
+                 "nfs://%s%s?uid=%" PRId64 "&gid=%" PRId64,
+                 client->server->host, client->path, client->uid, client->gid);
+    } else {
+        snprintf(bs->exact_filename, sizeof(bs->exact_filename),
+                 "nfs://%s%s", client->server->host, client->path);
+    }
+
+    ov = qobject_output_visitor_new(&server_qdict);
+    visit_type_NFSServer(ov, NULL, &client->server, &error_abort);
+    visit_complete(ov, &server_qdict);
+    assert(qobject_type(server_qdict) == QTYPE_QDICT);
+
+    qdict_put_obj(opts, "server", server_qdict);
+    qdict_put(opts, "path", qstring_from_str(client->path));
+
+    if (client->uid) {
+        qdict_put(opts, "uid", qint_from_int(client->uid));
+    }
+    if (client->gid) {
+        qdict_put(opts, "gid", qint_from_int(client->gid));
+    }
+    if (client->tcp_syncnt) {
+        qdict_put(opts, "tcp-syncnt",
+                      qint_from_int(client->tcp_syncnt));
+    }
+    if (client->readahead) {
+        qdict_put(opts, "readahead",
+                      qint_from_int(client->readahead));
+    }
+    if (client->pagecache) {
+        qdict_put(opts, "pagecache",
+                      qint_from_int(client->pagecache));
+    }
+    if (client->debug) {
+        qdict_put(opts, "debug", qint_from_int(client->debug));
+    }
+
+    visit_free(ov);
+    qdict_flatten(opts);
+    bs->full_open_options = opts;
+}
+
 #ifdef LIBNFS_FEATURE_PAGECACHE
 static void nfs_invalidate_cache(BlockDriverState *bs,
                                 Error **errp)
@@ -575,7 +843,7 @@ static BlockDriver bdrv_nfs = {
    .protocol_name                  = "nfs",

    .instance_size                  = sizeof(NFSClient),
-    .bdrv_needs_filename            = true,
+    .bdrv_parse_filename            = nfs_parse_filename,
    .create_opts                    = &nfs_create_opts,

    .bdrv_has_zero_init             = nfs_has_zero_init,
@@ -593,6 +861,7 @@ static BlockDriver bdrv_nfs = {

    .bdrv_detach_aio_context        = nfs_detach_aio_context,
    .bdrv_attach_aio_context        = nfs_attach_aio_context,
+    .bdrv_refresh_filename          = nfs_refresh_filename,

 #ifdef LIBNFS_FEATURE_PAGECACHE
    .bdrv_invalidate_cache          = nfs_invalidate_cache,
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -104,6 +104,7 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
    unsigned int len, i, shift;
    int ret;
    QCowHeader header;
+    Error *local_err = NULL;

    ret = bdrv_pread(bs->file, 0, &header, sizeof(header));
    if (ret < 0) {
@@ -252,7 +253,12 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
    error_setg(&s->migration_blocker, "The qcow format used by node '%s' "
               "does not support live migration",
               bdrv_get_device_or_node_name(bs));
-    migrate_add_blocker(s->migration_blocker);
+    ret = migrate_add_blocker(s->migration_blocker, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        error_free(s->migration_blocker);
+        goto fail;
+    }

    qemu_co_mutex_init(&s->lock);
    return 0;
--- a/block/qcow2-cache.c
+++ b/block/qcow2-cache.c
@@ -22,7 +22,6 @@
 * THE SOFTWARE.
 */

-/* Needed for CONFIG_MADVISE */
 #include "qemu/osdep.h"
 #include "block/block_int.h"
 #include "qemu-common.h"
@@ -66,7 +65,8 @@ static inline int qcow2_cache_get_table_idx(BlockDriverState *bs,
 static void qcow2_cache_table_release(BlockDriverState *bs, Qcow2Cache *c,
                                      int i, int num_tables)
 {
-#if QEMU_MADV_DONTNEED != QEMU_MADV_INVALID
+/* Using MADV_DONTNEED to discard memory is a Linux-specific feature */
+#ifdef CONFIG_LINUX
    BDRVQcow2State *s = bs->opaque;
    void *t = qcow2_cache_get_table_addr(bs, c, i);
    int align = getpagesize();
@@ -74,7 +74,7 @@ static void qcow2_cache_table_release(BlockDriverState *bs, Qcow2Cache *c,
    size_t offset = QEMU_ALIGN_UP((uintptr_t) t, align) - (uintptr_t) t;
    size_t length = QEMU_ALIGN_DOWN(mem_size - offset, align);
    if (length > 0) {
-        qemu_madvise((uint8_t *) t + offset, length, QEMU_MADV_DONTNEED);
+        madvise((uint8_t *) t + offset, length, MADV_DONTNEED);
    }
 #endif
 }
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -668,6 +668,14 @@ static int qcow2_update_options_prepare(BlockDriverState *bs,
    r->cache_clean_interval =
        qemu_opt_get_number(opts, QCOW2_OPT_CACHE_CLEAN_INTERVAL,
                            s->cache_clean_interval);
+#ifndef CONFIG_LINUX
+    if (r->cache_clean_interval != 0) {
+        error_setg(errp, QCOW2_OPT_CACHE_CLEAN_INTERVAL
+                   " not supported on this host");
+        ret = -EINVAL;
+        goto fail;
+    }
+#endif
    if (r->cache_clean_interval > UINT_MAX) {
        error_setg(errp, "Cache clean interval too big");
        ret = -EINVAL;
@@ -1206,6 +1214,7 @@ static void qcow2_refresh_limits(BlockDriverState *bs, Error **errp)
        bs->bl.request_alignment = BDRV_SECTOR_SIZE;
    }
    bs->bl.pwrite_zeroes_alignment = s->cluster_size;
+    bs->bl.pdiscard_alignment = s->cluster_size;
 }

 static int qcow2_set_key(BlockDriverState *bs, const char *key)
@@ -2490,6 +2499,11 @@ static coroutine_fn int qcow2_co_pdiscard(BlockDriverState *bs,
    int ret;
    BDRVQcow2State *s = bs->opaque;

+    if (!QEMU_IS_ALIGNED(offset | count, s->cluster_size)) {
+        assert(count < s->cluster_size);
+        return -ENOTSUP;
+    }
+
    qemu_co_mutex_lock(&s->lock);
    ret = qcow2_discard_clusters(bs, offset, count >> BDRV_SECTOR_BITS,
                                 QCOW2_DISCARD_REQUEST, false);
@@ -2794,7 +2808,8 @@ static int qcow2_make_empty(BlockDriverState *bs)
 {
    BDRVQcow2State *s = bs->opaque;
    uint64_t start_sector;
-    int sector_step = INT_MAX / BDRV_SECTOR_SIZE;
+    int sector_step = (QEMU_ALIGN_DOWN(INT_MAX, s->cluster_size) /
+                       BDRV_SECTOR_SIZE);
    int l1_clusters, ret = 0;

    l1_clusters = DIV_ROUND_UP(s->l1_size, s->cluster_size / sizeof(uint64_t));
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -473,8 +473,6 @@ static inline uint64_t refcount_diff(uint64_t r1, uint64_t r2)
    return r1 > r2 ? r1 - r2 : r2 - r1;
 }

-// FIXME Need qcow2_ prefix to global functions
-
 /* qcow2.c functions */
 int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov,
                  int64_t sector_num, int nb_sectors);
--- a/block/qed-table.c
+++ b/block/qed-table.c
@@ -174,9 +174,7 @@ int qed_read_l1_table_sync(BDRVQEDState *s)

    qed_read_table(s, s->header.l1_table_offset,
                   s->l1_table, qed_sync_cb, &ret);
-    while (ret == -EINPROGRESS) {
-        aio_poll(bdrv_get_aio_context(s->bs), true);
-    }
+    BDRV_POLL_WHILE(s->bs, ret == -EINPROGRESS);

    return ret;
 }
@@ -195,9 +193,7 @@ int qed_write_l1_table_sync(BDRVQEDState *s, unsigned int index,
    int ret = -EINPROGRESS;

    qed_write_l1_table(s, index, n, qed_sync_cb, &ret);
-    while (ret == -EINPROGRESS) {
-        aio_poll(bdrv_get_aio_context(s->bs), true);
-    }
+    BDRV_POLL_WHILE(s->bs, ret == -EINPROGRESS);

    return ret;
 }
@@ -268,9 +264,7 @@ int qed_read_l2_table_sync(BDRVQEDState *s, QEDRequest *request, uint64_t offset
    int ret = -EINPROGRESS;

    qed_read_l2_table(s, request, offset, qed_sync_cb, &ret);
-    while (ret == -EINPROGRESS) {
-        aio_poll(bdrv_get_aio_context(s->bs), true);
-    }
+    BDRV_POLL_WHILE(s->bs, ret == -EINPROGRESS);

    return ret;
 }
@@ -290,9 +284,7 @@ int qed_write_l2_table_sync(BDRVQEDState *s, QEDRequest *request,
    int ret = -EINPROGRESS;

    qed_write_l2_table(s, request, index, n, flush, qed_sync_cb, &ret);
-    while (ret == -EINPROGRESS) {
-        aio_poll(bdrv_get_aio_context(s->bs), true);
-    }
+    BDRV_POLL_WHILE(s->bs, ret == -EINPROGRESS);

    return ret;
 }
--- a/block/qed.c
+++ b/block/qed.c
@@ -336,7 +336,7 @@ static void qed_need_check_timer_cb(void *opaque)
    qed_plug_allocating_write_reqs(s);

    /* Ensure writes are on disk before clearing flag */
-    bdrv_aio_flush(s->bs, qed_clear_need_check, s);
+    bdrv_aio_flush(s->bs->file->bs, qed_clear_need_check, s);
 }

 static void qed_start_need_check_timer(BDRVQEDState *s)
@@ -378,6 +378,19 @@ static void bdrv_qed_attach_aio_context(BlockDriverState *bs,
    }
 }

+static void bdrv_qed_drain(BlockDriverState *bs)
+{
+    BDRVQEDState *s = bs->opaque;
+
+    /* Fire the timer immediately in order to start doing I/O as soon as the
+     * header is flushed.
+     */
+    if (s->need_check_timer && timer_pending(s->need_check_timer)) {
+        qed_cancel_need_check_timer(s);
+        qed_need_check_timer_cb(s);
+    }
+}
+
 static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
                         Error **errp)
 {
@@ -1668,6 +1681,7 @@ static BlockDriver bdrv_qed = {
    .bdrv_check               = bdrv_qed_check,
    .bdrv_detach_aio_context  = bdrv_qed_detach_aio_context,
    .bdrv_attach_aio_context  = bdrv_qed_attach_aio_context,
+    .bdrv_drain               = bdrv_qed_drain,
 };

 static void bdrv_qed_init(void)
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -97,7 +97,7 @@ typedef struct QuorumAIOCB QuorumAIOCB;
 * $children_count QuorumChildRequest.
 */
 typedef struct QuorumChildRequest {
-    BlockAIOCB *aiocb;
+    BlockDriverState *bs;
    QEMUIOVector qiov;
    uint8_t *buf;
    int ret;
@@ -110,11 +110,12 @@ typedef struct QuorumChildRequest {
 * used to do operations on each children and track overall progress.
 */
 struct QuorumAIOCB {
-    BlockAIOCB common;
+    BlockDriverState *bs;
+    Coroutine *co;

    /* Request metadata */
-    uint64_t sector_num;
-    int nb_sectors;
+    uint64_t offset;
+    uint64_t bytes;

    QEMUIOVector *qiov;         /* calling IOV */

@@ -133,32 +134,15 @@ struct QuorumAIOCB {
    int children_read;          /* how many children have been read from */
 };

-static bool quorum_vote(QuorumAIOCB *acb);
-
-static void quorum_aio_cancel(BlockAIOCB *blockacb)
-{
-    QuorumAIOCB *acb = container_of(blockacb, QuorumAIOCB, common);
-    BDRVQuorumState *s = acb->common.bs->opaque;
-    int i;
-
-    /* cancel all callbacks */
-    for (i = 0; i < s->num_children; i++) {
-        if (acb->qcrs[i].aiocb) {
-            bdrv_aio_cancel_async(acb->qcrs[i].aiocb);
-        }
-    }
-}
-
-static AIOCBInfo quorum_aiocb_info = {
-    .aiocb_size         = sizeof(QuorumAIOCB),
-    .cancel_async       = quorum_aio_cancel,
-};
+typedef struct QuorumCo {
+    QuorumAIOCB *acb;
+    int idx;
+} QuorumCo;

 static void quorum_aio_finalize(QuorumAIOCB *acb)
 {
-    acb->common.cb(acb->common.opaque, acb->vote_ret);
    g_free(acb->qcrs);
-    qemu_aio_unref(acb);
+    g_free(acb);
 }

 static bool quorum_sha256_compare(QuorumVoteValue *a, QuorumVoteValue *b)
@@ -171,30 +155,26 @@ static bool quorum_64bits_compare(QuorumVoteValue *a, QuorumVoteValue *b)
    return a->l == b->l;
 }

-static QuorumAIOCB *quorum_aio_get(BDRVQuorumState *s,
-                                   BlockDriverState *bs,
+static QuorumAIOCB *quorum_aio_get(BlockDriverState *bs,
                                   QEMUIOVector *qiov,
-                                   uint64_t sector_num,
-                                   int nb_sectors,
-                                   BlockCompletionFunc *cb,
-                                   void *opaque)
+                                   uint64_t offset,
+                                   uint64_t bytes)
 {
-    QuorumAIOCB *acb = qemu_aio_get(&quorum_aiocb_info, bs, cb, opaque);
+    BDRVQuorumState *s = bs->opaque;
+    QuorumAIOCB *acb = g_new(QuorumAIOCB, 1);
    int i;

-    acb->common.bs->opaque = s;
-    acb->sector_num = sector_num;
-    acb->nb_sectors = nb_sectors;
-    acb->qiov = qiov;
-    acb->qcrs = g_new0(QuorumChildRequest, s->num_children);
-    acb->count = 0;
-    acb->success_count = 0;
-    acb->rewrite_count = 0;
-    acb->votes.compare = quorum_sha256_compare;
-    QLIST_INIT(&acb->votes.vote_list);
-    acb->is_read = false;
-    acb->vote_ret = 0;
+    *acb = (QuorumAIOCB) {
+        .co                 = qemu_coroutine_self(),
+        .bs                 = bs,
+        .offset             = offset,
+        .bytes              = bytes,
+        .qiov               = qiov,
+        .votes.compare      = quorum_sha256_compare,
+        .votes.vote_list    = QLIST_HEAD_INITIALIZER(acb.votes.vote_list),
+    };

+    acb->qcrs = g_new0(QuorumChildRequest, s->num_children);
    for (i = 0; i < s->num_children; i++) {
        acb->qcrs[i].buf = NULL;
        acb->qcrs[i].ret = 0;
@@ -204,30 +184,37 @@ static QuorumAIOCB *quorum_aio_get(BDRVQuorumState *s,
    return acb;
 }

-static void quorum_report_bad(QuorumOpType type, uint64_t sector_num,
-                              int nb_sectors, char *node_name, int ret)
+static void quorum_report_bad(QuorumOpType type, uint64_t offset,
+                              uint64_t bytes, char *node_name, int ret)
 {
    const char *msg = NULL;
+    int64_t start_sector = offset / BDRV_SECTOR_SIZE;
+    int64_t end_sector = DIV_ROUND_UP(offset + bytes, BDRV_SECTOR_SIZE);
+
    if (ret < 0) {
        msg = strerror(-ret);
    }

-    qapi_event_send_quorum_report_bad(type, !!msg, msg, node_name,
-                                      sector_num, nb_sectors, &error_abort);
+    qapi_event_send_quorum_report_bad(type, !!msg, msg, node_name, start_sector,
+                                      end_sector - start_sector, &error_abort);
 }

 static void quorum_report_failure(QuorumAIOCB *acb)
 {
-    const char *reference = bdrv_get_device_or_node_name(acb->common.bs);
-    qapi_event_send_quorum_failure(reference, acb->sector_num,
-                                   acb->nb_sectors, &error_abort);
+    const char *reference = bdrv_get_device_or_node_name(acb->bs);
+    int64_t start_sector = acb->offset / BDRV_SECTOR_SIZE;
+    int64_t end_sector = DIV_ROUND_UP(acb->offset + acb->bytes,
+                                      BDRV_SECTOR_SIZE);
+
+    qapi_event_send_quorum_failure(reference, start_sector,
+                                   end_sector - start_sector, &error_abort);
 }

 static int quorum_vote_error(QuorumAIOCB *acb);

 static bool quorum_has_too_much_io_failed(QuorumAIOCB *acb)
 {
-    BDRVQuorumState *s = acb->common.bs->opaque;
+    BDRVQuorumState *s = acb->bs->opaque;

    if (acb->success_count < s->threshold) {
        acb->vote_ret = quorum_vote_error(acb);
@@ -238,22 +225,7 @@ static bool quorum_has_too_much_io_failed(QuorumAIOCB *acb)
    return false;
 }

-static void quorum_rewrite_aio_cb(void *opaque, int ret)
-{
-    QuorumAIOCB *acb = opaque;
-
-    /* one less rewrite to do */
-    acb->rewrite_count--;
-
-    /* wait until all rewrite callbacks have completed */
-    if (acb->rewrite_count) {
-        return;
-    }
-
-    quorum_aio_finalize(acb);
-}
-
-static BlockAIOCB *read_fifo_child(QuorumAIOCB *acb);
+static int read_fifo_child(QuorumAIOCB *acb);

 static void quorum_copy_qiov(QEMUIOVector *dest, QEMUIOVector *source)
 {
@@ -272,70 +244,7 @@ static void quorum_report_bad_acb(QuorumChildRequest *sacb, int ret)
 {
    QuorumAIOCB *acb = sacb->parent;
    QuorumOpType type = acb->is_read ? QUORUM_OP_TYPE_READ : QUORUM_OP_TYPE_WRITE;
-    quorum_report_bad(type, acb->sector_num, acb->nb_sectors,
-                      sacb->aiocb->bs->node_name, ret);
-}
-
-static void quorum_fifo_aio_cb(void *opaque, int ret)
-{
-    QuorumChildRequest *sacb = opaque;
-    QuorumAIOCB *acb = sacb->parent;
-    BDRVQuorumState *s = acb->common.bs->opaque;
-
-    assert(acb->is_read && s->read_pattern == QUORUM_READ_PATTERN_FIFO);
-
-    if (ret < 0) {
-        quorum_report_bad_acb(sacb, ret);
-
-        /* We try to read next child in FIFO order if we fail to read */
-        if (acb->children_read < s->num_children) {
-            read_fifo_child(acb);
-            return;
-        }
-    }
-
-    acb->vote_ret = ret;
-
-    /* FIXME: rewrite failed children if acb->children_read > 1? */
-    quorum_aio_finalize(acb);
-}
-
-static void quorum_aio_cb(void *opaque, int ret)
-{
-    QuorumChildRequest *sacb = opaque;
-    QuorumAIOCB *acb = sacb->parent;
-    BDRVQuorumState *s = acb->common.bs->opaque;
-    bool rewrite = false;
-    int i;
-
-    sacb->ret = ret;
-    if (ret == 0) {
-        acb->success_count++;
-    } else {
-        quorum_report_bad_acb(sacb, ret);
-    }
-    acb->count++;
-    assert(acb->count <= s->num_children);
-    assert(acb->success_count <= s->num_children);
-    if (acb->count < s->num_children) {
-        return;
-    }
-
-    /* Do the vote on read */
-    if (acb->is_read) {
-        rewrite = quorum_vote(acb);
-        for (i = 0; i < s->num_children; i++) {
-            qemu_vfree(acb->qcrs[i].buf);
-            qemu_iovec_destroy(&acb->qcrs[i].qiov);
-        }
-    } else {
-        quorum_has_too_much_io_failed(acb);
-    }
-
-    /* if no rewrite is done the code will finish right away */
-    if (!rewrite) {
-        quorum_aio_finalize(acb);
-    }
+    quorum_report_bad(type, acb->offset, acb->bytes, sacb->bs->node_name, ret);
 }

 static void quorum_report_bad_versions(BDRVQuorumState *s,
@@ -350,14 +259,31 @@ static void quorum_report_bad_versions(BDRVQuorumState *s,
            continue;
        }
        QLIST_FOREACH(item, &version->items, next) {
-            quorum_report_bad(QUORUM_OP_TYPE_READ, acb->sector_num,
-                              acb->nb_sectors,
+            quorum_report_bad(QUORUM_OP_TYPE_READ, acb->offset, acb->bytes,
                              s->children[item->index]->bs->node_name, 0);
        }
    }
 }

-static bool quorum_rewrite_bad_versions(BDRVQuorumState *s, QuorumAIOCB *acb,
+static void quorum_rewrite_entry(void *opaque)
+{
+    QuorumCo *co = opaque;
+    QuorumAIOCB *acb = co->acb;
+    BDRVQuorumState *s = acb->bs->opaque;
+
+    /* Ignore any errors, it's just a correction attempt for already
+     * corrupted data. */
+    bdrv_co_pwritev(s->children[co->idx], acb->offset, acb->bytes,
+                    acb->qiov, 0);
+
+    /* Wake up the caller after the last rewrite */
+    acb->rewrite_count--;
+    if (!acb->rewrite_count) {
+        qemu_coroutine_enter_if_inactive(acb->co);
+    }
+}
+
+static bool quorum_rewrite_bad_versions(QuorumAIOCB *acb,
                                        QuorumVoteValue *value)
 {
    QuorumVoteVersion *version;
@@ -376,7 +302,7 @@ static bool quorum_rewrite_bad_versions(BDRVQuorumState *s, QuorumAIOCB *acb,
        }
    }

-    /* quorum_rewrite_aio_cb will count down this to zero */
+    /* quorum_rewrite_entry will count down this to zero */
    acb->rewrite_count = count;

    /* now fire the correcting rewrites */
@@ -385,9 +311,14 @@ static bool quorum_rewrite_bad_versions(BDRVQuorumState *s, QuorumAIOCB *acb,
            continue;
        }
        QLIST_FOREACH(item, &version->items, next) {
-            bdrv_aio_writev(s->children[item->index], acb->sector_num,
-                            acb->qiov, acb->nb_sectors, quorum_rewrite_aio_cb,
-                            acb);
+            Coroutine *co;
+            QuorumCo data = {
+                .acb = acb,
+                .idx = item->index,
+            };
+
+            co = qemu_coroutine_create(quorum_rewrite_entry, &data);
+            qemu_coroutine_enter(co);
        }
    }

@@ -507,8 +438,8 @@ static void GCC_FMT_ATTR(2, 3) quorum_err(QuorumAIOCB *acb,
    va_list ap;

    va_start(ap, fmt);
-    fprintf(stderr, "quorum: sector_num=%" PRId64 " nb_sectors=%d ",
-            acb->sector_num, acb->nb_sectors);
+    fprintf(stderr, "quorum: offset=%" PRIu64 " bytes=%" PRIu64 " ",
+            acb->offset, acb->bytes);
    vfprintf(stderr, fmt, ap);
    fprintf(stderr, "\n");
    va_end(ap);
@@ -519,16 +450,15 @@ static bool quorum_compare(QuorumAIOCB *acb,
                           QEMUIOVector *a,
                           QEMUIOVector *b)
 {
-    BDRVQuorumState *s = acb->common.bs->opaque;
+    BDRVQuorumState *s = acb->bs->opaque;
    ssize_t offset;

    /* This driver will replace blkverify in this particular case */
    if (s->is_blkverify) {
        offset = qemu_iovec_compare(a, b);
        if (offset != -1) {
-            quorum_err(acb, "contents mismatch in sector %" PRId64,
-                       acb->sector_num +
-                       (uint64_t)(offset / BDRV_SECTOR_SIZE));
+            quorum_err(acb, "contents mismatch at offset %" PRIu64,
+                       acb->offset + offset);
        }
        return true;
    }
@@ -539,7 +469,7 @@ static bool quorum_compare(QuorumAIOCB *acb,
 /* Do a vote to get the error code */
 static int quorum_vote_error(QuorumAIOCB *acb)
 {
-    BDRVQuorumState *s = acb->common.bs->opaque;
+    BDRVQuorumState *s = acb->bs->opaque;
    QuorumVoteVersion *winner = NULL;
    QuorumVotes error_votes;
    QuorumVoteValue result_value;
@@ -568,17 +498,16 @@ static int quorum_vote_error(QuorumAIOCB *acb)
    return ret;
 }

-static bool quorum_vote(QuorumAIOCB *acb)
+static void quorum_vote(QuorumAIOCB *acb)
 {
    bool quorum = true;
-    bool rewrite = false;
    int i, j, ret;
    QuorumVoteValue hash;
-    BDRVQuorumState *s = acb->common.bs->opaque;
+    BDRVQuorumState *s = acb->bs->opaque;
    QuorumVoteVersion *winner;

    if (quorum_has_too_much_io_failed(acb)) {
-        return false;
+        return;
    }

    /* get the index of the first successful read */
@@ -606,7 +535,7 @@ static bool quorum_vote(QuorumAIOCB *acb)
    /* Every successful read agrees */
    if (quorum) {
        quorum_copy_qiov(acb->qiov, &acb->qcrs[i].qiov);
-        return false;
+        return;
    }

    /* compute hashes for each successful read, also store indexes */
@@ -641,19 +570,46 @@ static bool quorum_vote(QuorumAIOCB *acb)

    /* corruption correction is enabled */
    if (s->rewrite_corrupted) {
-        rewrite = quorum_rewrite_bad_versions(s, acb, &winner->value);
+        quorum_rewrite_bad_versions(acb, &winner->value);
    }

 free_exit:
    /* free lists */
    quorum_free_vote_list(&acb->votes);
-    return rewrite;
 }

-static BlockAIOCB *read_quorum_children(QuorumAIOCB *acb)
+static void read_quorum_children_entry(void *opaque)
 {
-    BDRVQuorumState *s = acb->common.bs->opaque;
-    int i;
+    QuorumCo *co = opaque;
+    QuorumAIOCB *acb = co->acb;
+    BDRVQuorumState *s = acb->bs->opaque;
+    int i = co->idx;
+    QuorumChildRequest *sacb = &acb->qcrs[i];
+
+    sacb->bs = s->children[i]->bs;
+    sacb->ret = bdrv_co_preadv(s->children[i], acb->offset, acb->bytes,
+                               &acb->qcrs[i].qiov, 0);
+
+    if (sacb->ret == 0) {
+        acb->success_count++;
+    } else {
+        quorum_report_bad_acb(sacb, sacb->ret);
+    }
+
+    acb->count++;
+    assert(acb->count <= s->num_children);
+    assert(acb->success_count <= s->num_children);
+
+    /* Wake up the caller after the last read */
+    if (acb->count == s->num_children) {
+        qemu_coroutine_enter_if_inactive(acb->co);
+    }
+}
+
+static int read_quorum_children(QuorumAIOCB *acb)
+{
+    BDRVQuorumState *s = acb->bs->opaque;
+    int i, ret;

    acb->children_read = s->num_children;
    for (i = 0; i < s->num_children; i++) {
@@ -663,65 +619,131 @@ static BlockAIOCB *read_quorum_children(QuorumAIOCB *acb)
    }

    for (i = 0; i < s->num_children; i++) {
-        acb->qcrs[i].aiocb = bdrv_aio_readv(s->children[i], acb->sector_num,
-                                            &acb->qcrs[i].qiov, acb->nb_sectors,
-                                            quorum_aio_cb, &acb->qcrs[i]);
+        Coroutine *co;
+        QuorumCo data = {
+            .acb = acb,
+            .idx = i,
+        };
+
+        co = qemu_coroutine_create(read_quorum_children_entry, &data);
+        qemu_coroutine_enter(co);
    }

-    return &acb->common;
+    while (acb->count < s->num_children) {
+        qemu_coroutine_yield();
+    }
+
+    /* Do the vote on read */
+    quorum_vote(acb);
+    for (i = 0; i < s->num_children; i++) {
+        qemu_vfree(acb->qcrs[i].buf);
+        qemu_iovec_destroy(&acb->qcrs[i].qiov);
+    }
+
+    while (acb->rewrite_count) {
+        qemu_coroutine_yield();
+    }
+
+    ret = acb->vote_ret;
+
+    return ret;
 }

-static BlockAIOCB *read_fifo_child(QuorumAIOCB *acb)
+static int read_fifo_child(QuorumAIOCB *acb)
 {
-    BDRVQuorumState *s = acb->common.bs->opaque;
-    int n = acb->children_read++;
+    BDRVQuorumState *s = acb->bs->opaque;
+    int n, ret;

-    acb->qcrs[n].aiocb = bdrv_aio_readv(s->children[n], acb->sector_num,
-                                        acb->qiov, acb->nb_sectors,
-                                        quorum_fifo_aio_cb, &acb->qcrs[n]);
+    /* We try to read the next child in FIFO order if we failed to read */
+    do {
+        n = acb->children_read++;
+        acb->qcrs[n].bs = s->children[n]->bs;
+        ret = bdrv_co_preadv(s->children[n], acb->offset, acb->bytes,
+                             acb->qiov, 0);
+        if (ret < 0) {
+            quorum_report_bad_acb(&acb->qcrs[n], ret);
+        }
+    } while (ret < 0 && acb->children_read < s->num_children);

-    return &acb->common;
+    /* FIXME: rewrite failed children if acb->children_read > 1? */
+
+    return ret;
 }

-static BlockAIOCB *quorum_aio_readv(BlockDriverState *bs,
-                                    int64_t sector_num,
-                                    QEMUIOVector *qiov,
-                                    int nb_sectors,
-                                    BlockCompletionFunc *cb,
-                                    void *opaque)
+static int quorum_co_preadv(BlockDriverState *bs, uint64_t offset,
+                            uint64_t bytes, QEMUIOVector *qiov, int flags)
 {
    BDRVQuorumState *s = bs->opaque;
-    QuorumAIOCB *acb = quorum_aio_get(s, bs, qiov, sector_num,
-                                      nb_sectors, cb, opaque);
+    QuorumAIOCB *acb = quorum_aio_get(bs, qiov, offset, bytes);
+    int ret;
+
    acb->is_read = true;
    acb->children_read = 0;

    if (s->read_pattern == QUORUM_READ_PATTERN_QUORUM) {
-        return read_quorum_children(acb);
+        ret = read_quorum_children(acb);
+    } else {
+        ret = read_fifo_child(acb);
    }
+    quorum_aio_finalize(acb);

-    return read_fifo_child(acb);
+    return ret;
 }

-static BlockAIOCB *quorum_aio_writev(BlockDriverState *bs,
-                                     int64_t sector_num,
-                                     QEMUIOVector *qiov,
-                                     int nb_sectors,
-                                     BlockCompletionFunc *cb,
-                                     void *opaque)
+static void write_quorum_entry(void *opaque)
+{
+    QuorumCo *co = opaque;
+    QuorumAIOCB *acb = co->acb;
+    BDRVQuorumState *s = acb->bs->opaque;
+    int i = co->idx;
+    QuorumChildRequest *sacb = &acb->qcrs[i];
+
+    sacb->bs = s->children[i]->bs;
+    sacb->ret = bdrv_co_pwritev(s->children[i], acb->offset, acb->bytes,
+                                acb->qiov, 0);
+    if (sacb->ret == 0) {
+        acb->success_count++;
+    } else {
+        quorum_report_bad_acb(sacb, sacb->ret);
+    }
+    acb->count++;
+    assert(acb->count <= s->num_children);
+    assert(acb->success_count <= s->num_children);
+
+    /* Wake up the caller after the last write */
+    if (acb->count == s->num_children) {
+        qemu_coroutine_enter_if_inactive(acb->co);
+    }
+}
+
+static int quorum_co_pwritev(BlockDriverState *bs, uint64_t offset,
+                             uint64_t bytes, QEMUIOVector *qiov, int flags)
 {
    BDRVQuorumState *s = bs->opaque;
-    QuorumAIOCB *acb = quorum_aio_get(s, bs, qiov, sector_num, nb_sectors,
-                                      cb, opaque);
-    int i;
+    QuorumAIOCB *acb = quorum_aio_get(bs, qiov, offset, bytes);
+    int i, ret;

    for (i = 0; i < s->num_children; i++) {
-        acb->qcrs[i].aiocb = bdrv_aio_writev(s->children[i], sector_num,
-                                             qiov, nb_sectors, &quorum_aio_cb,
-                                             &acb->qcrs[i]);
+        Coroutine *co;
+        QuorumCo data = {
+            .acb = acb,
+            .idx = i,
+        };
+
+        co = qemu_coroutine_create(write_quorum_entry, &data);
+        qemu_coroutine_enter(co);
    }

-    return &acb->common;
+    while (acb->count < s->num_children) {
+        qemu_coroutine_yield();
+    }
+
+    quorum_has_too_much_io_failed(acb);
+
+    ret = acb->vote_ret;
+    quorum_aio_finalize(acb);
+
+    return ret;
 }

 static int64_t quorum_getlength(BlockDriverState *bs)
@@ -765,7 +787,7 @@ static coroutine_fn int quorum_co_flush(BlockDriverState *bs)
        result = bdrv_co_flush(s->children[i]->bs);
        if (result) {
            quorum_report_bad(QUORUM_OP_TYPE_FLUSH, 0,
-                              bdrv_nb_sectors(s->children[i]->bs),
+                              bdrv_getlength(s->children[i]->bs),
                              s->children[i]->bs->node_name, result);
            result_value.l = result;
            quorum_count_vote(&error_votes, &result_value, i);
@@ -1098,8 +1120,8 @@ static BlockDriver bdrv_quorum = {

    .bdrv_getlength                     = quorum_getlength,

-    .bdrv_aio_readv                     = quorum_aio_readv,
-    .bdrv_aio_writev                    = quorum_aio_writev,
+    .bdrv_co_preadv                     = quorum_co_preadv,
+    .bdrv_co_pwritev                    = quorum_co_pwritev,

    .bdrv_add_child                     = quorum_add_child,
    .bdrv_del_child                     = quorum_del_child,
--- a/block/raw-format.c
+++ b/block/raw-format.c
@@ -1,4 +1,4 @@
-/* BlockDriver implementation for "raw"
+/* BlockDriver implementation for "raw" format driver
 *
 * Copyright (C) 2010-2016 Red Hat, Inc.
 * Copyright (C) 2010, Blue Swirl <blauwirbel@gmail.com>
@@ -31,6 +31,30 @@
 #include "qapi/error.h"
 #include "qemu/option.h"

+typedef struct BDRVRawState {
+    uint64_t offset;
+    uint64_t size;
+    bool has_size;
+} BDRVRawState;
+
+static QemuOptsList raw_runtime_opts = {
+    .name = "raw",
+    .head = QTAILQ_HEAD_INITIALIZER(raw_runtime_opts.head),
+    .desc = {
+        {
+            .name = "offset",
+            .type = QEMU_OPT_SIZE,
+            .help = "offset in the disk where the image starts",
+        },
+        {
+            .name = "size",
+            .type = QEMU_OPT_SIZE,
+            .help = "virtual disk size",
+        },
+        { /* end of list */ }
+    },
+};
+
 static QemuOptsList raw_create_opts = {
    .name = "raw-create-opts",
    .head = QTAILQ_HEAD_INITIALIZER(raw_create_opts.head),
@@ -44,16 +68,116 @@ static QemuOptsList raw_create_opts = {
    }
 };

+static int raw_read_options(QDict *options, BlockDriverState *bs,
+    BDRVRawState *s, Error **errp)
+{
+    Error *local_err = NULL;
+    QemuOpts *opts = NULL;
+    int64_t real_size = 0;
+    int ret;
+
+    real_size = bdrv_getlength(bs->file->bs);
+    if (real_size < 0) {
+        error_setg_errno(errp, -real_size, "Could not get image size");
+        return real_size;
+    }
+
+    opts = qemu_opts_create(&raw_runtime_opts, NULL, 0, &error_abort);
+    qemu_opts_absorb_qdict(opts, options, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        ret = -EINVAL;
+        goto end;
+    }
+
+    s->offset = qemu_opt_get_size(opts, "offset", 0);
+    if (s->offset > real_size) {
+        error_setg(errp, "Offset (%" PRIu64 ") cannot be greater than "
+            "size of the containing file (%" PRId64 ")",
+            s->offset, real_size);
+        ret = -EINVAL;
+        goto end;
+    }
+
+    if (qemu_opt_find(opts, "size") != NULL) {
+        s->size = qemu_opt_get_size(opts, "size", 0);
+        s->has_size = true;
+    } else {
+        s->has_size = false;
+        s->size = real_size - s->offset;
+    }
+
+    /* Check size and offset */
+    if ((real_size - s->offset) < s->size) {
+        error_setg(errp, "The sum of offset (%" PRIu64 ") and size "
+            "(%" PRIu64 ") has to be smaller or equal to the "
+            " actual size of the containing file (%" PRId64 ")",
+            s->offset, s->size, real_size);
+        ret = -EINVAL;
+        goto end;
+    }
+
+    /* Make sure size is multiple of BDRV_SECTOR_SIZE to prevent rounding
+     * up and leaking out of the specified area. */
+    if (s->has_size && !QEMU_IS_ALIGNED(s->size, BDRV_SECTOR_SIZE)) {
+        error_setg(errp, "Specified size is not multiple of %llu",
+            BDRV_SECTOR_SIZE);
+        ret = -EINVAL;
+        goto end;
+    }
+
+    ret = 0;
+
+end:
+
+    qemu_opts_del(opts);
+
+    return ret;
+}
+
 static int raw_reopen_prepare(BDRVReopenState *reopen_state,
                              BlockReopenQueue *queue, Error **errp)
 {
-    return 0;
+    assert(reopen_state != NULL);
+    assert(reopen_state->bs != NULL);
+
+    reopen_state->opaque = g_new0(BDRVRawState, 1);
+
+    return raw_read_options(
+        reopen_state->options,
+        reopen_state->bs,
+        reopen_state->opaque,
+        errp);
+}
+
+static void raw_reopen_commit(BDRVReopenState *state)
+{
+    BDRVRawState *new_s = state->opaque;
+    BDRVRawState *s = state->bs->opaque;
+
+    memcpy(s, new_s, sizeof(BDRVRawState));
+
+    g_free(state->opaque);
+    state->opaque = NULL;
+}
+
+static void raw_reopen_abort(BDRVReopenState *state)
+{
+    g_free(state->opaque);
+    state->opaque = NULL;
 }

 static int coroutine_fn raw_co_preadv(BlockDriverState *bs, uint64_t offset,
                                      uint64_t bytes, QEMUIOVector *qiov,
                                      int flags)
 {
+    BDRVRawState *s = bs->opaque;
+
+    if (offset > UINT64_MAX - s->offset) {
+        return -EINVAL;
+    }
+    offset += s->offset;
+
    BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
    return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags);
 }
@@ -62,11 +186,23 @@ static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, uint64_t offset,
                                       uint64_t bytes, QEMUIOVector *qiov,
                                       int flags)
 {
+    BDRVRawState *s = bs->opaque;
    void *buf = NULL;
    BlockDriver *drv;
    QEMUIOVector local_qiov;
    int ret;

+    if (s->has_size && (offset > s->size || bytes > (s->size - offset))) {
+        /* There's not enough space for the data. Don't write anything and just
+         * fail to prevent leaking out of the size specified in options. */
+        return -ENOSPC;
+    }
+
+    if (offset > UINT64_MAX - s->offset) {
+        ret = -EINVAL;
+        goto fail;
+    }
+
    if (bs->probed && offset < BLOCK_PROBE_BUF_SIZE && bytes) {
        /* Handling partial writes would be a pain - so we just
         * require that guests have 512-byte request alignment if
@@ -101,6 +237,8 @@ static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, uint64_t offset,
        qiov = &local_qiov;
    }

+    offset += s->offset;
+
    BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
    ret = bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);

@@ -117,8 +255,10 @@ static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs,
                                            int nb_sectors, int *pnum,
                                            BlockDriverState **file)
 {
+    BDRVRawState *s = bs->opaque;
    *pnum = nb_sectors;
    *file = bs->file->bs;
+    sector_num += s->offset / BDRV_SECTOR_SIZE;
    return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID | BDRV_BLOCK_DATA |
           (sector_num << BDRV_SECTOR_BITS);
 }
@@ -127,18 +267,49 @@ static int coroutine_fn raw_co_pwrite_zeroes(BlockDriverState *bs,
                                             int64_t offset, int count,
                                             BdrvRequestFlags flags)
 {
+    BDRVRawState *s = bs->opaque;
+    if (offset > UINT64_MAX - s->offset) {
+        return -EINVAL;
+    }
+    offset += s->offset;
    return bdrv_co_pwrite_zeroes(bs->file, offset, count, flags);
 }

 static int coroutine_fn raw_co_pdiscard(BlockDriverState *bs,
                                        int64_t offset, int count)
 {
+    BDRVRawState *s = bs->opaque;
+    if (offset > UINT64_MAX - s->offset) {
+        return -EINVAL;
+    }
+    offset += s->offset;
    return bdrv_co_pdiscard(bs->file->bs, offset, count);
 }

 static int64_t raw_getlength(BlockDriverState *bs)
 {
-    return bdrv_getlength(bs->file->bs);
+    int64_t len;
+    BDRVRawState *s = bs->opaque;
+
+    /* Update size. It should not change unless the file was externally
+     * modified. */
+    len = bdrv_getlength(bs->file->bs);
+    if (len < 0) {
+        return len;
+    }
+
+    if (len < s->offset) {
+        s->size = 0;
+    } else {
+        if (s->has_size) {
+            /* Try to honour the size */
+            s->size = MIN(s->size, len - s->offset);
+        } else {
+            s->size = len - s->offset;
+        }
+    }
+
+    return s->size;
 }

 static int raw_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
@@ -158,6 +329,18 @@ static void raw_refresh_limits(BlockDriverState *bs, Error **errp)

 static int raw_truncate(BlockDriverState *bs, int64_t offset)
 {
+    BDRVRawState *s = bs->opaque;
+
+    if (s->has_size) {
+        return -ENOTSUP;
+    }
+
+    if (INT64_MAX - offset < s->offset) {
+        return -EINVAL;
+    }
+
+    s->size = offset;
+    offset += s->offset;
    return bdrv_truncate(bs->file->bs, offset);
 }

@@ -176,12 +359,13 @@ static void raw_lock_medium(BlockDriverState *bs, bool locked)
    bdrv_lock_medium(bs->file->bs, locked);
 }

-static BlockAIOCB *raw_aio_ioctl(BlockDriverState *bs,
-                                 unsigned long int req, void *buf,
-                                 BlockCompletionFunc *cb,
-                                 void *opaque)
+static int raw_co_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
 {
-    return bdrv_aio_ioctl(bs->file->bs, req, buf, cb, opaque);
+    BDRVRawState *s = bs->opaque;
+    if (s->offset || s->has_size) {
+        return -ENOTSUP;
+    }
+    return bdrv_co_ioctl(bs->file->bs, req, buf);
 }

 static int raw_has_zero_init(BlockDriverState *bs)
@@ -197,6 +381,9 @@ static int raw_create(const char *filename, QemuOpts *opts, Error **errp)
 static int raw_open(BlockDriverState *bs, QDict *options, int flags,
                    Error **errp)
 {
+    BDRVRawState *s = bs->opaque;
+    int ret;
+
    bs->sg = bs->file->bs->sg;
    bs->supported_write_flags = BDRV_REQ_FUA &
        bs->file->bs->supported_write_flags;
@@ -214,6 +401,16 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,
                bs->file->bs->filename);
    }

+    ret = raw_read_options(options, bs, s, errp);
+    if (ret < 0) {
+        return ret;
+    }
+
+    if (bs->sg && (s->offset || s->has_size)) {
+        error_setg(errp, "Cannot use offset/size with SCSI generic devices");
+        return -EINVAL;
+    }
+
    return 0;
 }

@@ -231,18 +428,37 @@ static int raw_probe(const uint8_t *buf, int buf_size, const char *filename)

 static int raw_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
 {
-    return bdrv_probe_blocksizes(bs->file->bs, bsz);
+    BDRVRawState *s = bs->opaque;
+    int ret;
+
+    ret = bdrv_probe_blocksizes(bs->file->bs, bsz);
+    if (ret < 0) {
+        return ret;
+    }
+
+    if (!QEMU_IS_ALIGNED(s->offset, MAX(bsz->log, bsz->phys))) {
+        return -ENOTSUP;
+    }
+
+    return 0;
 }

 static int raw_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
 {
+    BDRVRawState *s = bs->opaque;
+    if (s->offset || s->has_size) {
+        return -ENOTSUP;
+    }
    return bdrv_probe_geometry(bs->file->bs, geo);
 }

 BlockDriver bdrv_raw = {
    .format_name          = "raw",
+    .instance_size        = sizeof(BDRVRawState),
    .bdrv_probe           = &raw_probe,
    .bdrv_reopen_prepare  = &raw_reopen_prepare,
+    .bdrv_reopen_commit   = &raw_reopen_commit,
+    .bdrv_reopen_abort    = &raw_reopen_abort,
    .bdrv_open            = &raw_open,
    .bdrv_close           = &raw_close,
    .bdrv_create          = &raw_create,
@@ -261,7 +477,7 @@ BlockDriver bdrv_raw = {
    .bdrv_media_changed   = &raw_media_changed,
    .bdrv_eject           = &raw_eject,
    .bdrv_lock_medium     = &raw_lock_medium,
-    .bdrv_aio_ioctl       = &raw_aio_ioctl,
+    .bdrv_co_ioctl        = &raw_co_ioctl,
    .create_opts          = &raw_create_opts,
    .bdrv_has_zero_init   = &raw_has_zero_init
 };
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -365,45 +365,44 @@ static int qemu_rbd_create(const char *filename, QemuOpts *opts, Error **errp)
        rados_conf_read_file(cluster, NULL);
    } else if (conf[0] != '\0' &&
               qemu_rbd_set_conf(cluster, conf, true, &local_err) < 0) {
-        rados_shutdown(cluster);
        error_propagate(errp, local_err);
-        return -EIO;
+        ret = -EIO;
+        goto shutdown;
    }

    if (conf[0] != '\0' &&
        qemu_rbd_set_conf(cluster, conf, false, &local_err) < 0) {
-        rados_shutdown(cluster);
        error_propagate(errp, local_err);
-        return -EIO;
+        ret = -EIO;
+        goto shutdown;
    }

    if (qemu_rbd_set_auth(cluster, secretid, errp) < 0) {
-        rados_shutdown(cluster);
-        return -EIO;
+        ret = -EIO;
+        goto shutdown;
    }

    ret = rados_connect(cluster);
    if (ret < 0) {
        error_setg_errno(errp, -ret, "error connecting");
-        rados_shutdown(cluster);
-        return ret;
+        goto shutdown;
    }

    ret = rados_ioctx_create(cluster, pool, &io_ctx);
    if (ret < 0) {
        error_setg_errno(errp, -ret, "error opening pool %s", pool);
-        rados_shutdown(cluster);
-        return ret;
+        goto shutdown;
    }

    ret = rbd_create(io_ctx, name, bytes, &obj_order);
-    rados_ioctx_destroy(io_ctx);
-    rados_shutdown(cluster);
    if (ret < 0) {
        error_setg_errno(errp, -ret, "error rbd create");
-        return ret;
    }

+    rados_ioctx_destroy(io_ctx);
+
+shutdown:
+    rados_shutdown(cluster);
    return ret;
 }

--- a/block/replication.c
+++ b/block/replication.c
@@ -138,6 +138,9 @@ static void replication_close(BlockDriverState *bs)
    if (s->replication_state == BLOCK_REPLICATION_RUNNING) {
        replication_stop(s->rs, false, NULL);
    }
+    if (s->replication_state == BLOCK_REPLICATION_FAILOVER) {
+        block_job_cancel_sync(s->active_disk->bs->job);
+    }

    if (s->mode == REPLICATION_MODE_SECONDARY) {
        g_free(s->top_id);
@@ -319,9 +322,10 @@ static void secondary_do_checkpoint(BDRVReplicationState *s, Error **errp)
    }
 }

-static void reopen_backing_file(BDRVReplicationState *s, bool writable,
+static void reopen_backing_file(BlockDriverState *bs, bool writable,
                                Error **errp)
 {
+    BDRVReplicationState *s = bs->opaque;
    BlockReopenQueue *reopen_queue = NULL;
    int orig_hidden_flags, orig_secondary_flags;
    int new_hidden_flags, new_secondary_flags;
@@ -356,13 +360,15 @@ static void reopen_backing_file(BDRVReplicationState *s, bool writable,
    }

    if (reopen_queue) {
-        bdrv_reopen_multiple(reopen_queue, &local_err);
+        bdrv_reopen_multiple(bdrv_get_aio_context(bs),
+                             reopen_queue, &local_err);
        error_propagate(errp, local_err);
    }
 }

-static void backup_job_cleanup(BDRVReplicationState *s)
+static void backup_job_cleanup(BlockDriverState *bs)
 {
+    BDRVReplicationState *s = bs->opaque;
    BlockDriverState *top_bs;

    top_bs = bdrv_lookup_bs(s->top_id, s->top_id, NULL);
@@ -371,19 +377,20 @@ static void backup_job_cleanup(BDRVReplicationState *s)
    }
    bdrv_op_unblock_all(top_bs, s->blocker);
    error_free(s->blocker);
-    reopen_backing_file(s, false, NULL);
+    reopen_backing_file(bs, false, NULL);
 }

 static void backup_job_completed(void *opaque, int ret)
 {
-    BDRVReplicationState *s = opaque;
+    BlockDriverState *bs = opaque;
+    BDRVReplicationState *s = bs->opaque;

    if (s->replication_state != BLOCK_REPLICATION_FAILOVER) {
        /* The backup job is cancelled unexpectedly */
        s->error = -EIO;
    }

-    backup_job_cleanup(s);
+    backup_job_cleanup(bs);
 }

 static bool check_top_bs(BlockDriverState *top_bs, BlockDriverState *bs)
@@ -414,6 +421,7 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode,
    int64_t active_length, hidden_length, disk_length;
    AioContext *aio_context;
    Error *local_err = NULL;
+    BlockJob *job;

    aio_context = bdrv_get_aio_context(bs);
    aio_context_acquire(aio_context);
@@ -479,7 +487,7 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode,
        }

        /* reopen the backing file in r/w mode */
-        reopen_backing_file(s, true, &local_err);
+        reopen_backing_file(bs, true, &local_err);
        if (local_err) {
            error_propagate(errp, local_err);
            aio_context_release(aio_context);
@@ -494,23 +502,25 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode,
        if (!top_bs || !bdrv_is_root_node(top_bs) ||
            !check_top_bs(top_bs, bs)) {
            error_setg(errp, "No top_bs or it is invalid");
-            reopen_backing_file(s, false, NULL);
+            reopen_backing_file(bs, false, NULL);
            aio_context_release(aio_context);
            return;
        }
        bdrv_op_block_all(top_bs, s->blocker);
        bdrv_op_unblock(top_bs, BLOCK_OP_TYPE_DATAPLANE, s->blocker);

-        backup_start("replication-backup", s->secondary_disk->bs,
-                     s->hidden_disk->bs, 0, MIRROR_SYNC_MODE_NONE, NULL, false,
-                     BLOCKDEV_ON_ERROR_REPORT, BLOCKDEV_ON_ERROR_REPORT,
-                     backup_job_completed, s, NULL, &local_err);
+        job = backup_job_create(NULL, s->secondary_disk->bs, s->hidden_disk->bs,
+                                0, MIRROR_SYNC_MODE_NONE, NULL, false,
+                                BLOCKDEV_ON_ERROR_REPORT,
+                                BLOCKDEV_ON_ERROR_REPORT, BLOCK_JOB_INTERNAL,
+                                backup_job_completed, bs, NULL, &local_err);
        if (local_err) {
            error_propagate(errp, local_err);
-            backup_job_cleanup(s);
+            backup_job_cleanup(bs);
            aio_context_release(aio_context);
            return;
        }
+        block_job_start(job);
        break;
    default:
        aio_context_release(aio_context);
@@ -626,10 +636,9 @@ static void replication_stop(ReplicationState *rs, bool failover, Error **errp)
        }

        s->replication_state = BLOCK_REPLICATION_FAILOVER;
-        commit_active_start("replication-commit", s->active_disk->bs,
-                            s->secondary_disk->bs, 0, BLOCKDEV_ON_ERROR_REPORT,
-                            replication_done,
-                            bs, errp, true);
+        commit_active_start(NULL, s->active_disk->bs, s->secondary_disk->bs,
+                            BLOCK_JOB_INTERNAL, 0, BLOCKDEV_ON_ERROR_REPORT,
+                            replication_done, bs, errp, true);
        break;
    default:
        aio_context_release(aio_context);
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -306,6 +306,7 @@ static inline size_t count_data_objs(const struct SheepdogInode *inode)
    } while (0)

 typedef struct SheepdogAIOCB SheepdogAIOCB;
+typedef struct BDRVSheepdogState BDRVSheepdogState;

 typedef struct AIOReq {
    SheepdogAIOCB *aiocb;
@@ -334,7 +335,7 @@ enum AIOCBState {
       || y->max_affect_data_idx < x->min_affect_data_idx))

 struct SheepdogAIOCB {
-    BlockAIOCB common;
+    BDRVSheepdogState *s;

    QEMUIOVector *qiov;

@@ -345,9 +346,6 @@ struct SheepdogAIOCB {
    enum AIOCBState aiocb_type;

    Coroutine *coroutine;
-    void (*aio_done_func)(SheepdogAIOCB *);
-
-    bool cancelable;
    int nr_pending;

    uint32_t min_affect_data_idx;
@@ -365,7 +363,7 @@ struct SheepdogAIOCB {
    QLIST_ENTRY(SheepdogAIOCB) aiocb_siblings;
 };

-typedef struct BDRVSheepdogState {
+struct BDRVSheepdogState {
    BlockDriverState *bs;
    AioContext *aio_context;

@@ -392,7 +390,7 @@ typedef struct BDRVSheepdogState {

    CoQueue overlapping_queue;
    QLIST_HEAD(inflight_aiocb_head, SheepdogAIOCB) inflight_aiocb_head;
-} BDRVSheepdogState;
+};

 typedef struct BDRVSheepdogReopenState {
    int fd;
@@ -450,14 +448,13 @@ static const char * sd_strerror(int err)
 *
 * 1. In sd_co_rw_vector, we send the I/O requests to the server and
 *    link the requests to the inflight_list in the
- *    BDRVSheepdogState.  The function exits without waiting for
+ *    BDRVSheepdogState.  The function yields while waiting for
 *    receiving the response.
 *
 * 2. We receive the response in aio_read_response, the fd handler to
- *    the sheepdog connection.  If metadata update is needed, we send
- *    the write request to the vdi object in sd_write_done, the write
- *    completion function.  We switch back to sd_co_readv/writev after
- *    all the requests belonging to the AIOCB are finished.
+ *    the sheepdog connection.  We switch back to sd_co_readv/sd_writev
+ *    after all the requests belonging to the AIOCB are finished.  If
+ *    needed, sd_co_writev will send another requests for the vdi object.
 */

 static inline AIOReq *alloc_aio_req(BDRVSheepdogState *s, SheepdogAIOCB *acb,
@@ -482,94 +479,34 @@ static inline AIOReq *alloc_aio_req(BDRVSheepdogState *s, SheepdogAIOCB *acb,
    return aio_req;
 }

-static inline void free_aio_req(BDRVSheepdogState *s, AIOReq *aio_req)
+static void wait_for_overlapping_aiocb(BDRVSheepdogState *s, SheepdogAIOCB *acb)
 {
-    SheepdogAIOCB *acb = aio_req->aiocb;
+    SheepdogAIOCB *cb;

-    acb->cancelable = false;
-    QLIST_REMOVE(aio_req, aio_siblings);
-    g_free(aio_req);
-
-    acb->nr_pending--;
-}
-
-static void coroutine_fn sd_finish_aiocb(SheepdogAIOCB *acb)
-{
-    qemu_coroutine_enter(acb->coroutine);
-    qemu_aio_unref(acb);
-}
-
-/*
- * Check whether the specified acb can be canceled
- *
- * We can cancel aio when any request belonging to the acb is:
- *  - Not processed by the sheepdog server.
- *  - Not linked to the inflight queue.
- */
-static bool sd_acb_cancelable(const SheepdogAIOCB *acb)
-{
-    BDRVSheepdogState *s = acb->common.bs->opaque;
-    AIOReq *aioreq;
-
-    if (!acb->cancelable) {
-        return false;
-    }
-
-    QLIST_FOREACH(aioreq, &s->inflight_aio_head, aio_siblings) {
-        if (aioreq->aiocb == acb) {
-            return false;
+retry:
+    QLIST_FOREACH(cb, &s->inflight_aiocb_head, aiocb_siblings) {
+        if (AIOCBOverlapping(acb, cb)) {
+            qemu_co_queue_wait(&s->overlapping_queue);
+            goto retry;
        }
    }
-
-    return true;
 }

-static void sd_aio_cancel(BlockAIOCB *blockacb)
+static void sd_aio_setup(SheepdogAIOCB *acb, BDRVSheepdogState *s,
+                         QEMUIOVector *qiov, int64_t sector_num, int nb_sectors,
+                         int type)
 {
-    SheepdogAIOCB *acb = (SheepdogAIOCB *)blockacb;
-    BDRVSheepdogState *s = acb->common.bs->opaque;
-    AIOReq *aioreq, *next;
-
-    if (sd_acb_cancelable(acb)) {
-        /* Remove outstanding requests from failed queue.  */
-        QLIST_FOREACH_SAFE(aioreq, &s->failed_aio_head, aio_siblings,
-                           next) {
-            if (aioreq->aiocb == acb) {
-                free_aio_req(s, aioreq);
-            }
-        }
-
-        assert(acb->nr_pending == 0);
-        if (acb->common.cb) {
-            acb->common.cb(acb->common.opaque, -ECANCELED);
-        }
-        sd_finish_aiocb(acb);
-    }
-}
-
-static const AIOCBInfo sd_aiocb_info = {
-    .aiocb_size     = sizeof(SheepdogAIOCB),
-    .cancel_async   = sd_aio_cancel,
-};
-
-static SheepdogAIOCB *sd_aio_setup(BlockDriverState *bs, QEMUIOVector *qiov,
-                                   int64_t sector_num, int nb_sectors)
-{
-    SheepdogAIOCB *acb;
    uint32_t object_size;
-    BDRVSheepdogState *s = bs->opaque;

    object_size = (UINT32_C(1) << s->inode.block_size_shift);

-    acb = qemu_aio_get(&sd_aiocb_info, bs, NULL, NULL);
+    acb->s = s;

    acb->qiov = qiov;

    acb->sector_num = sector_num;
    acb->nb_sectors = nb_sectors;

-    acb->aio_done_func = NULL;
-    acb->cancelable = true;
    acb->coroutine = qemu_coroutine_self();
    acb->ret = 0;
    acb->nr_pending = 0;
@@ -580,8 +517,14 @@ static SheepdogAIOCB *sd_aio_setup(BlockDriverState *bs, QEMUIOVector *qiov,

    acb->min_dirty_data_idx = UINT32_MAX;
    acb->max_dirty_data_idx = 0;
+    acb->aiocb_type = type;

-    return acb;
+    if (type == AIOCB_FLUSH_CACHE) {
+        return;
+    }
+
+    wait_for_overlapping_aiocb(s, acb);
+    QLIST_INSERT_HEAD(&s->inflight_aiocb_head, acb, aiocb_siblings);
 }

 /* Return -EIO in case of error, file descriptor on success */
@@ -641,6 +584,7 @@ static void restart_co_req(void *opaque)

 typedef struct SheepdogReqCo {
    int sockfd;
+    BlockDriverState *bs;
    AioContext *aio_context;
    SheepdogReq *hdr;
    void *data;
@@ -663,7 +607,7 @@ static coroutine_fn void do_co_req(void *opaque)

    co = qemu_coroutine_self();
    aio_set_fd_handler(srco->aio_context, sockfd, false,
-                       NULL, restart_co_req, co);
+                       NULL, restart_co_req, NULL, co);

    ret = send_co_req(sockfd, hdr, data, wlen);
    if (ret < 0) {
@@ -671,7 +615,7 @@ static coroutine_fn void do_co_req(void *opaque)
    }

    aio_set_fd_handler(srco->aio_context, sockfd, false,
-                       restart_co_req, NULL, co);
+                       restart_co_req, NULL, NULL, co);

    ret = qemu_co_recv(sockfd, hdr, sizeof(*hdr));
    if (ret != sizeof(*hdr)) {
@@ -697,10 +641,13 @@ out:
    /* there is at most one request for this sockfd, so it is safe to
     * set each handler to NULL. */
    aio_set_fd_handler(srco->aio_context, sockfd, false,
-                       NULL, NULL, NULL);
+                       NULL, NULL, NULL, NULL);

    srco->ret = ret;
    srco->finished = true;
+    if (srco->bs) {
+        bdrv_wakeup(srco->bs);
+    }
 }

 /*
@@ -708,13 +655,14 @@ out:
 *
 * Return 0 on success, -errno in case of error.
 */
-static int do_req(int sockfd, AioContext *aio_context, SheepdogReq *hdr,
+static int do_req(int sockfd, BlockDriverState *bs, SheepdogReq *hdr,
                  void *data, unsigned int *wlen, unsigned int *rlen)
 {
    Coroutine *co;
    SheepdogReqCo srco = {
        .sockfd = sockfd,
-        .aio_context = aio_context,
+        .aio_context = bs ? bdrv_get_aio_context(bs) : qemu_get_aio_context(),
+        .bs = bs,
        .hdr = hdr,
        .data = data,
        .wlen = wlen,
@@ -727,9 +675,14 @@ static int do_req(int sockfd, AioContext *aio_context, SheepdogReq *hdr,
        do_co_req(&srco);
    } else {
        co = qemu_coroutine_create(do_co_req, &srco);
-        qemu_coroutine_enter(co);
-        while (!srco.finished) {
-            aio_poll(aio_context, true);
+        if (bs) {
+            qemu_coroutine_enter(co);
+            BDRV_POLL_WHILE(bs, !srco.finished);
+        } else {
+            qemu_coroutine_enter(co);
+            while (!srco.finished) {
+                aio_poll(qemu_get_aio_context(), true);
+            }
        }
    }

@@ -750,7 +703,7 @@ static coroutine_fn void reconnect_to_sdog(void *opaque)
    AIOReq *aio_req, *next;

    aio_set_fd_handler(s->aio_context, s->fd, false, NULL,
-                       NULL, NULL);
+                       NULL, NULL, NULL);
    close(s->fd);
    s->fd = -1;

@@ -787,7 +740,6 @@ static coroutine_fn void reconnect_to_sdog(void *opaque)
    while (!QLIST_EMPTY(&s->failed_aio_head)) {
        aio_req = QLIST_FIRST(&s->failed_aio_head);
        QLIST_REMOVE(aio_req, aio_siblings);
-        QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings);
        resend_aioreq(s, aio_req);
    }
 }
@@ -830,9 +782,6 @@ static void coroutine_fn aio_read_response(void *opaque)

    switch (acb->aiocb_type) {
    case AIOCB_WRITE_UDATA:
-        /* this coroutine context is no longer suitable for co_recv
-         * because we may send data to update vdi objects */
-        s->co_recv = NULL;
        if (!is_data_obj(aio_req->oid)) {
            break;
        }
@@ -880,6 +829,12 @@ static void coroutine_fn aio_read_response(void *opaque)
        }
    }

+    /* No more data for this aio_req (reload_inode below uses its own file
+     * descriptor handler which doesn't use co_recv).
+    */
+    s->co_recv = NULL;
+
+    QLIST_REMOVE(aio_req, aio_siblings);
    switch (rsp.result) {
    case SD_RES_SUCCESS:
        break;
@@ -897,26 +852,26 @@ static void coroutine_fn aio_read_response(void *opaque)
            aio_req->oid = vid_to_vdi_oid(s->inode.vdi_id);
        }
        resend_aioreq(s, aio_req);
-        goto out;
+        return;
    default:
        acb->ret = -EIO;
        error_report("%s", sd_strerror(rsp.result));
        break;
    }

-    free_aio_req(s, aio_req);
-    if (!acb->nr_pending) {
+    g_free(aio_req);
+
+    if (!--acb->nr_pending) {
        /*
         * We've finished all requests which belong to the AIOCB, so
         * we can switch back to sd_co_readv/writev now.
         */
-        acb->aio_done_func(acb);
+        qemu_coroutine_enter(acb->coroutine);
    }
-out:
-    s->co_recv = NULL;
+
    return;
+
 err:
-    s->co_recv = NULL;
    reconnect_to_sdog(opaque);
 }

@@ -954,7 +909,7 @@ static int get_sheep_fd(BDRVSheepdogState *s, Error **errp)
    }

    aio_set_fd_handler(s->aio_context, fd, false,
-                       co_read_response, NULL, s);
+                       co_read_response, NULL, NULL, s);
    return fd;
 }

@@ -1125,7 +1080,7 @@ static int find_vdi_name(BDRVSheepdogState *s, const char *filename,
    hdr.snapid = snapid;
    hdr.flags = SD_FLAG_CMD_WRITE;

-    ret = do_req(fd, s->aio_context, (SheepdogReq *)&hdr, buf, &wlen, &rlen);
+    ret = do_req(fd, s->bs, (SheepdogReq *)&hdr, buf, &wlen, &rlen);
    if (ret) {
        error_setg_errno(errp, -ret, "cannot get vdi info");
        goto out;
@@ -1166,6 +1121,8 @@ static void coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
    uint64_t old_oid = aio_req->base_oid;
    bool create = aio_req->create;

+    QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings);
+
    if (!nr_copies) {
        error_report("bug");
    }
@@ -1216,7 +1173,7 @@ static void coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
    qemu_co_mutex_lock(&s->lock);
    s->co_send = qemu_coroutine_self();
    aio_set_fd_handler(s->aio_context, s->fd, false,
-                       co_read_response, co_write_request, s);
+                       co_read_response, co_write_request, NULL, s);
    socket_set_cork(s->fd, 1);

    /* send a header */
@@ -1235,12 +1192,12 @@ static void coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
 out:
    socket_set_cork(s->fd, 0);
    aio_set_fd_handler(s->aio_context, s->fd, false,
-                       co_read_response, NULL, s);
+                       co_read_response, NULL, NULL, s);
    s->co_send = NULL;
    qemu_co_mutex_unlock(&s->lock);
 }

-static int read_write_object(int fd, AioContext *aio_context, char *buf,
+static int read_write_object(int fd, BlockDriverState *bs, char *buf,
                             uint64_t oid, uint8_t copies,
                             unsigned int datalen, uint64_t offset,
                             bool write, bool create, uint32_t cache_flags)
@@ -1274,7 +1231,7 @@ static int read_write_object(int fd, AioContext *aio_context, char *buf,
    hdr.offset = offset;
    hdr.copies = copies;

-    ret = do_req(fd, aio_context, (SheepdogReq *)&hdr, buf, &wlen, &rlen);
+    ret = do_req(fd, bs, (SheepdogReq *)&hdr, buf, &wlen, &rlen);
    if (ret) {
        error_report("failed to send a request to the sheep");
        return ret;
@@ -1289,22 +1246,22 @@ static int read_write_object(int fd, AioContext *aio_context, char *buf,
    }
 }

-static int read_object(int fd, AioContext *aio_context, char *buf,
+static int read_object(int fd, BlockDriverState *bs, char *buf,
                       uint64_t oid, uint8_t copies,
                       unsigned int datalen, uint64_t offset,
                       uint32_t cache_flags)
 {
-    return read_write_object(fd, aio_context, buf, oid, copies,
+    return read_write_object(fd, bs, buf, oid, copies,
                             datalen, offset, false,
                             false, cache_flags);
 }

-static int write_object(int fd, AioContext *aio_context, char *buf,
+static int write_object(int fd, BlockDriverState *bs, char *buf,
                        uint64_t oid, uint8_t copies,
                        unsigned int datalen, uint64_t offset, bool create,
                        uint32_t cache_flags)
 {
-    return read_write_object(fd, aio_context, buf, oid, copies,
+    return read_write_object(fd, bs, buf, oid, copies,
                             datalen, offset, true,
                             create, cache_flags);
 }
@@ -1331,7 +1288,7 @@ static int reload_inode(BDRVSheepdogState *s, uint32_t snapid, const char *tag)
        goto out;
    }

-    ret = read_object(fd, s->aio_context, (char *)inode, vid_to_vdi_oid(vid),
+    ret = read_object(fd, s->bs, (char *)inode, vid_to_vdi_oid(vid),
                      s->inode.nr_copies, SD_INODE_HEADER_SIZE, 0,
                      s->cache_flags);
    if (ret < 0) {
@@ -1386,7 +1343,7 @@ static void sd_detach_aio_context(BlockDriverState *bs)
    BDRVSheepdogState *s = bs->opaque;

    aio_set_fd_handler(s->aio_context, s->fd, false, NULL,
-                       NULL, NULL);
+                       NULL, NULL, NULL);
 }

 static void sd_attach_aio_context(BlockDriverState *bs,
@@ -1396,7 +1353,7 @@ static void sd_attach_aio_context(BlockDriverState *bs,

    s->aio_context = new_context;
    aio_set_fd_handler(new_context, s->fd, false,
-                       co_read_response, NULL, s);
+                       co_read_response, NULL, NULL, s);
 }

 /* TODO Convert to fine grained options */
@@ -1489,7 +1446,7 @@ static int sd_open(BlockDriverState *bs, QDict *options, int flags,
    }

    buf = g_malloc(SD_INODE_SIZE);
-    ret = read_object(fd, s->aio_context, buf, vid_to_vdi_oid(vid),
+    ret = read_object(fd, s->bs, buf, vid_to_vdi_oid(vid),
                      0, SD_INODE_SIZE, 0, s->cache_flags);

    closesocket(fd);
@@ -1510,7 +1467,7 @@ static int sd_open(BlockDriverState *bs, QDict *options, int flags,
    return 0;
 out:
    aio_set_fd_handler(bdrv_get_aio_context(bs), s->fd,
-                       false, NULL, NULL, NULL);
+                       false, NULL, NULL, NULL, NULL);
    if (s->fd >= 0) {
        closesocket(s->fd);
    }
@@ -1549,7 +1506,7 @@ static void sd_reopen_commit(BDRVReopenState *state)

    if (s->fd) {
        aio_set_fd_handler(s->aio_context, s->fd, false,
-                           NULL, NULL, NULL);
+                           NULL, NULL, NULL, NULL);
        closesocket(s->fd);
    }

@@ -1573,7 +1530,7 @@ static void sd_reopen_abort(BDRVReopenState *state)

    if (re_s->fd) {
        aio_set_fd_handler(s->aio_context, re_s->fd, false,
-                           NULL, NULL, NULL);
+                           NULL, NULL, NULL, NULL);
        closesocket(re_s->fd);
    }

@@ -1618,7 +1575,7 @@ static int do_sd_create(BDRVSheepdogState *s, uint32_t *vdi_id, int snapshot,
    hdr.copies = s->inode.nr_copies;
    hdr.block_size_shift = s->inode.block_size_shift;

-    ret = do_req(fd, s->aio_context, (SheepdogReq *)&hdr, buf, &wlen, &rlen);
+    ret = do_req(fd, NULL, (SheepdogReq *)&hdr, buf, &wlen, &rlen);

    closesocket(fd);

@@ -1886,7 +1843,7 @@ static int sd_create(const char *filename, QemuOpts *opts,
        hdr.opcode = SD_OP_GET_CLUSTER_DEFAULT;
        hdr.proto_ver = SD_PROTO_VER;

-        ret = do_req(fd, s->aio_context, (SheepdogReq *)&hdr,
+        ret = do_req(fd, NULL, (SheepdogReq *)&hdr,
                     NULL, &wlen, &rlen);
        closesocket(fd);
        if (ret) {
@@ -1951,7 +1908,7 @@ static void sd_close(BlockDriverState *bs)
    hdr.data_length = wlen;
    hdr.flags = SD_FLAG_CMD_WRITE;

-    ret = do_req(fd, s->aio_context, (SheepdogReq *)&hdr,
+    ret = do_req(fd, s->bs, (SheepdogReq *)&hdr,
                 s->name, &wlen, &rlen);

    closesocket(fd);
@@ -1962,7 +1919,7 @@ static void sd_close(BlockDriverState *bs)
    }

    aio_set_fd_handler(bdrv_get_aio_context(bs), s->fd,
-                       false, NULL, NULL, NULL);
+                       false, NULL, NULL, NULL, NULL);
    closesocket(s->fd);
    g_free(s->host_spec);
 }
@@ -2000,7 +1957,7 @@ static int sd_truncate(BlockDriverState *bs, int64_t offset)
    /* we don't need to update entire object */
    datalen = SD_INODE_SIZE - sizeof(s->inode.data_vdi_id);
    s->inode.vdi_size = offset;
-    ret = write_object(fd, s->aio_context, (char *)&s->inode,
+    ret = write_object(fd, s->bs, (char *)&s->inode,
                       vid_to_vdi_oid(s->inode.vdi_id), s->inode.nr_copies,
                       datalen, 0, false, s->cache_flags);
    close(fd);
@@ -2015,11 +1972,10 @@ static int sd_truncate(BlockDriverState *bs, int64_t offset)
 /*
 * This function is called after writing data objects.  If we need to
 * update metadata, this sends a write request to the vdi object.
- * Otherwise, this switches back to sd_co_readv/writev.
 */
 static void coroutine_fn sd_write_done(SheepdogAIOCB *acb)
 {
-    BDRVSheepdogState *s = acb->common.bs->opaque;
+    BDRVSheepdogState *s = acb->s;
    struct iovec iov;
    AIOReq *aio_req;
    uint32_t offset, data_len, mn, mx;
@@ -2028,6 +1984,7 @@ static void coroutine_fn sd_write_done(SheepdogAIOCB *acb)
    mx = acb->max_dirty_data_idx;
    if (mn <= mx) {
        /* we need to update the vdi object. */
+        ++acb->nr_pending;
        offset = sizeof(s->inode) - sizeof(s->inode.data_vdi_id) +
            mn * sizeof(s->inode.data_vdi_id[0]);
        data_len = (mx - mn + 1) * sizeof(s->inode.data_vdi_id[0]);
@@ -2039,15 +1996,11 @@ static void coroutine_fn sd_write_done(SheepdogAIOCB *acb)
        iov.iov_len = sizeof(s->inode);
        aio_req = alloc_aio_req(s, acb, vid_to_vdi_oid(s->inode.vdi_id),
                                data_len, offset, 0, false, 0, offset);
-        QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings);
        add_aio_request(s, aio_req, &iov, 1, AIOCB_WRITE_UDATA);
-
-        acb->aio_done_func = sd_finish_aiocb;
-        acb->aiocb_type = AIOCB_WRITE_UDATA;
-        return;
+        if (--acb->nr_pending) {
+            qemu_coroutine_yield();
+        }
    }
-
-    sd_finish_aiocb(acb);
 }

 /* Delete current working VDI on the snapshot chain */
@@ -2070,7 +2023,7 @@ static bool sd_delete(BDRVSheepdogState *s)
        return false;
    }

-    ret = do_req(fd, s->aio_context, (SheepdogReq *)&hdr,
+    ret = do_req(fd, s->bs, (SheepdogReq *)&hdr,
                 s->name, &wlen, &rlen);
    closesocket(fd);
    if (ret) {
@@ -2126,7 +2079,7 @@ static int sd_create_branch(BDRVSheepdogState *s)
        goto out;
    }

-    ret = read_object(fd, s->aio_context, buf, vid_to_vdi_oid(vid),
+    ret = read_object(fd, s->bs, buf, vid_to_vdi_oid(vid),
                      s->inode.nr_copies, SD_INODE_SIZE, 0, s->cache_flags);

    closesocket(fd);
@@ -2159,16 +2112,15 @@ out:
 * Returns 1 when we need to wait a response, 0 when there is no sent
 * request and -errno in error cases.
 */
-static int coroutine_fn sd_co_rw_vector(void *p)
+static void coroutine_fn sd_co_rw_vector(SheepdogAIOCB *acb)
 {
-    SheepdogAIOCB *acb = p;
    int ret = 0;
    unsigned long len, done = 0, total = acb->nb_sectors * BDRV_SECTOR_SIZE;
    unsigned long idx;
    uint32_t object_size;
    uint64_t oid;
    uint64_t offset;
-    BDRVSheepdogState *s = acb->common.bs->opaque;
+    BDRVSheepdogState *s = acb->s;
    SheepdogInode *inode = &s->inode;
    AIOReq *aio_req;

@@ -2180,7 +2132,7 @@ static int coroutine_fn sd_co_rw_vector(void *p)
        ret = sd_create_branch(s);
        if (ret) {
            acb->ret = -EIO;
-            goto out;
+            return;
        }
    }

@@ -2245,8 +2197,6 @@ static int coroutine_fn sd_co_rw_vector(void *p)
                                old_oid,
                                acb->aiocb_type == AIOCB_DISCARD_OBJ ?
                                0 : done);
-        QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings);
-
        add_aio_request(s, aio_req, acb->qiov->iov, acb->qiov->niov,
                        acb->aiocb_type);
    done:
@@ -2254,31 +2204,25 @@ static int coroutine_fn sd_co_rw_vector(void *p)
        idx++;
        done += len;
    }
-out:
-    if (!--acb->nr_pending) {
-        return acb->ret;
+    if (--acb->nr_pending) {
+        qemu_coroutine_yield();
    }
-    return 1;
 }

-static bool check_overlapping_aiocb(BDRVSheepdogState *s, SheepdogAIOCB *aiocb)
+static void sd_aio_complete(SheepdogAIOCB *acb)
 {
-    SheepdogAIOCB *cb;
-
-    QLIST_FOREACH(cb, &s->inflight_aiocb_head, aiocb_siblings) {
-        if (AIOCBOverlapping(aiocb, cb)) {
-            return true;
-        }
+    if (acb->aiocb_type == AIOCB_FLUSH_CACHE) {
+        return;
    }

-    QLIST_INSERT_HEAD(&s->inflight_aiocb_head, aiocb, aiocb_siblings);
-    return false;
+    QLIST_REMOVE(acb, aiocb_siblings);
+    qemu_co_queue_restart_all(&acb->s->overlapping_queue);
 }

 static coroutine_fn int sd_co_writev(BlockDriverState *bs, int64_t sector_num,
                        int nb_sectors, QEMUIOVector *qiov)
 {
-    SheepdogAIOCB *acb;
+    SheepdogAIOCB acb;
    int ret;
    int64_t offset = (sector_num + nb_sectors) * BDRV_SECTOR_SIZE;
    BDRVSheepdogState *s = bs->opaque;
@@ -2290,85 +2234,50 @@ static coroutine_fn int sd_co_writev(BlockDriverState *bs, int64_t sector_num,
        }
    }

-    acb = sd_aio_setup(bs, qiov, sector_num, nb_sectors);
-    acb->aio_done_func = sd_write_done;
-    acb->aiocb_type = AIOCB_WRITE_UDATA;
+    sd_aio_setup(&acb, s, qiov, sector_num, nb_sectors, AIOCB_WRITE_UDATA);
+    sd_co_rw_vector(&acb);
+    sd_write_done(&acb);
+    sd_aio_complete(&acb);

-retry:
-    if (check_overlapping_aiocb(s, acb)) {
-        qemu_co_queue_wait(&s->overlapping_queue);
-        goto retry;
-    }
-
-    ret = sd_co_rw_vector(acb);
-    if (ret <= 0) {
-        QLIST_REMOVE(acb, aiocb_siblings);
-        qemu_co_queue_restart_all(&s->overlapping_queue);
-        qemu_aio_unref(acb);
-        return ret;
-    }
-
-    qemu_coroutine_yield();
-
-    QLIST_REMOVE(acb, aiocb_siblings);
-    qemu_co_queue_restart_all(&s->overlapping_queue);
-
-    return acb->ret;
+    return acb.ret;
 }

 static coroutine_fn int sd_co_readv(BlockDriverState *bs, int64_t sector_num,
                       int nb_sectors, QEMUIOVector *qiov)
 {
-    SheepdogAIOCB *acb;
-    int ret;
+    SheepdogAIOCB acb;
    BDRVSheepdogState *s = bs->opaque;

-    acb = sd_aio_setup(bs, qiov, sector_num, nb_sectors);
-    acb->aiocb_type = AIOCB_READ_UDATA;
-    acb->aio_done_func = sd_finish_aiocb;
+    sd_aio_setup(&acb, s, qiov, sector_num, nb_sectors, AIOCB_READ_UDATA);
+    sd_co_rw_vector(&acb);
+    sd_aio_complete(&acb);

-retry:
-    if (check_overlapping_aiocb(s, acb)) {
-        qemu_co_queue_wait(&s->overlapping_queue);
-        goto retry;
-    }
-
-    ret = sd_co_rw_vector(acb);
-    if (ret <= 0) {
-        QLIST_REMOVE(acb, aiocb_siblings);
-        qemu_co_queue_restart_all(&s->overlapping_queue);
-        qemu_aio_unref(acb);
-        return ret;
-    }
-
-    qemu_coroutine_yield();
-
-    QLIST_REMOVE(acb, aiocb_siblings);
-    qemu_co_queue_restart_all(&s->overlapping_queue);
-    return acb->ret;
+    return acb.ret;
 }

 static int coroutine_fn sd_co_flush_to_disk(BlockDriverState *bs)
 {
    BDRVSheepdogState *s = bs->opaque;
-    SheepdogAIOCB *acb;
+    SheepdogAIOCB acb;
    AIOReq *aio_req;

    if (s->cache_flags != SD_FLAG_CMD_CACHE) {
        return 0;
    }

-    acb = sd_aio_setup(bs, NULL, 0, 0);
-    acb->aiocb_type = AIOCB_FLUSH_CACHE;
-    acb->aio_done_func = sd_finish_aiocb;
+    sd_aio_setup(&acb, s, NULL, 0, 0, AIOCB_FLUSH_CACHE);

-    aio_req = alloc_aio_req(s, acb, vid_to_vdi_oid(s->inode.vdi_id),
+    acb.nr_pending++;
+    aio_req = alloc_aio_req(s, &acb, vid_to_vdi_oid(s->inode.vdi_id),
                            0, 0, 0, false, 0, 0);
-    QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings);
-    add_aio_request(s, aio_req, NULL, 0, acb->aiocb_type);
+    add_aio_request(s, aio_req, NULL, 0, acb.aiocb_type);

-    qemu_coroutine_yield();
-    return acb->ret;
+    if (--acb.nr_pending) {
+        qemu_coroutine_yield();
+    }
+
+    sd_aio_complete(&acb);
+    return acb.ret;
 }

 static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
@@ -2411,7 +2320,7 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
        goto cleanup;
    }

-    ret = write_object(fd, s->aio_context, (char *)&s->inode,
+    ret = write_object(fd, s->bs, (char *)&s->inode,
                       vid_to_vdi_oid(s->inode.vdi_id), s->inode.nr_copies,
                       datalen, 0, false, s->cache_flags);
    if (ret < 0) {
@@ -2426,7 +2335,7 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
        goto cleanup;
    }

-    ret = read_object(fd, s->aio_context, (char *)inode,
+    ret = read_object(fd, s->bs, (char *)inode,
                      vid_to_vdi_oid(new_vid), s->inode.nr_copies, datalen, 0,
                      s->cache_flags);

@@ -2528,7 +2437,7 @@ static bool remove_objects(BDRVSheepdogState *s)
            i++;
        }

-        ret = write_object(fd, s->aio_context,
+        ret = write_object(fd, s->bs,
                           (char *)&inode->data_vdi_id[start_idx],
                           vid_to_vdi_oid(s->inode.vdi_id), inode->nr_copies,
                           (i - start_idx) * sizeof(uint32_t),
@@ -2600,7 +2509,7 @@ static int sd_snapshot_delete(BlockDriverState *bs,
        return -1;
    }

-    ret = do_req(fd, s->aio_context, (SheepdogReq *)&hdr,
+    ret = do_req(fd, s->bs, (SheepdogReq *)&hdr,
                 buf, &wlen, &rlen);
    closesocket(fd);
    if (ret) {
@@ -2652,8 +2561,7 @@ static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
    req.opcode = SD_OP_READ_VDIS;
    req.data_length = max;

-    ret = do_req(fd, s->aio_context, &req,
-                 vdi_inuse, &wlen, &rlen);
+    ret = do_req(fd, s->bs, &req, vdi_inuse, &wlen, &rlen);

    closesocket(fd);
    if (ret) {
@@ -2679,7 +2587,7 @@ static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
        }

        /* we don't need to read entire object */
-        ret = read_object(fd, s->aio_context, (char *)&inode,
+        ret = read_object(fd, s->bs, (char *)&inode,
                          vid_to_vdi_oid(vid),
                          0, SD_INODE_SIZE - sizeof(inode.data_vdi_id), 0,
                          s->cache_flags);
@@ -2745,11 +2653,11 @@ static int do_load_save_vmstate(BDRVSheepdogState *s, uint8_t *data,

        create = (offset == 0);
        if (load) {
-            ret = read_object(fd, s->aio_context, (char *)data, vmstate_oid,
+            ret = read_object(fd, s->bs, (char *)data, vmstate_oid,
                              s->inode.nr_copies, data_len, offset,
                              s->cache_flags);
        } else {
-            ret = write_object(fd, s->aio_context, (char *)data, vmstate_oid,
+            ret = write_object(fd, s->bs, (char *)data, vmstate_oid,
                               s->inode.nr_copies, data_len, offset, create,
                               s->cache_flags);
        }
@@ -2803,9 +2711,8 @@ static int sd_load_vmstate(BlockDriverState *bs, QEMUIOVector *qiov,
 static coroutine_fn int sd_co_pdiscard(BlockDriverState *bs, int64_t offset,
                                      int count)
 {
-    SheepdogAIOCB *acb;
+    SheepdogAIOCB acb;
    BDRVSheepdogState *s = bs->opaque;
-    int ret;
    QEMUIOVector discard_iov;
    struct iovec iov;
    uint32_t zero = 0;
@@ -2820,33 +2727,15 @@ static coroutine_fn int sd_co_pdiscard(BlockDriverState *bs, int64_t offset,
    iov.iov_len = sizeof(zero);
    discard_iov.iov = &iov;
    discard_iov.niov = 1;
-    assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
-    assert((count & (BDRV_SECTOR_SIZE - 1)) == 0);
-    acb = sd_aio_setup(bs, &discard_iov, offset >> BDRV_SECTOR_BITS,
-                       count >> BDRV_SECTOR_BITS);
-    acb->aiocb_type = AIOCB_DISCARD_OBJ;
-    acb->aio_done_func = sd_finish_aiocb;
-
-retry:
-    if (check_overlapping_aiocb(s, acb)) {
-        qemu_co_queue_wait(&s->overlapping_queue);
-        goto retry;
+    if (!QEMU_IS_ALIGNED(offset | count, BDRV_SECTOR_SIZE)) {
+        return -ENOTSUP;
    }
+    sd_aio_setup(&acb, s, &discard_iov, offset >> BDRV_SECTOR_BITS,
+                 count >> BDRV_SECTOR_BITS, AIOCB_DISCARD_OBJ);
+    sd_co_rw_vector(&acb);
+    sd_aio_complete(&acb);

-    ret = sd_co_rw_vector(acb);
-    if (ret <= 0) {
-        QLIST_REMOVE(acb, aiocb_siblings);
-        qemu_co_queue_restart_all(&s->overlapping_queue);
-        qemu_aio_unref(acb);
-        return ret;
-    }
-
-    qemu_coroutine_yield();
-
-    QLIST_REMOVE(acb, aiocb_siblings);
-    qemu_co_queue_restart_all(&s->overlapping_queue);
-
-    return acb->ret;
+    return acb.ret;
 }

 static coroutine_fn int64_t
--- a/block/ssh.c
+++ b/block/ssh.c
@@ -30,10 +30,14 @@
 #include "block/block_int.h"
 #include "qapi/error.h"
 #include "qemu/error-report.h"
+#include "qemu/cutils.h"
 #include "qemu/sockets.h"
 #include "qemu/uri.h"
+#include "qapi-visit.h"
 #include "qapi/qmp/qint.h"
 #include "qapi/qmp/qstring.h"
+#include "qapi/qobject-input-visitor.h"
+#include "qapi/qobject-output-visitor.h"

 /* DEBUG_SSH=1 enables the DPRINTF (debugging printf) statements in
 * this block driver code.
@@ -74,8 +78,9 @@ typedef struct BDRVSSHState {
     */
    LIBSSH2_SFTP_ATTRIBUTES attrs;

+    InetSocketAddress *inet;
+
    /* Used to warn if 'flush' is not supported. */
-    char *hostport;
    bool unsafe_flush_warning;
 } BDRVSSHState;

@@ -89,7 +94,6 @@ static void ssh_state_init(BDRVSSHState *s)

 static void ssh_state_free(BDRVSSHState *s)
 {
-    g_free(s->hostport);
    if (s->sftp_handle) {
        libssh2_sftp_close(s->sftp_handle);
    }
@@ -193,6 +197,7 @@ static int parse_uri(const char *filename, QDict *options, Error **errp)
 {
    URI *uri = NULL;
    QueryParams *qp;
+    char *port_str;
    int i;

    uri = uri_parse(filename);
@@ -225,11 +230,11 @@ static int parse_uri(const char *filename, QDict *options, Error **errp)
        qdict_put(options, "user", qstring_from_str(uri->user));
    }

-    qdict_put(options, "host", qstring_from_str(uri->server));
+    qdict_put(options, "server.host", qstring_from_str(uri->server));

-    if (uri->port) {
-        qdict_put(options, "port", qint_from_int(uri->port));
-    }
+    port_str = g_strdup_printf("%d", uri->port ?: 22);
+    qdict_put(options, "server.port", qstring_from_str(port_str));
+    g_free(port_str);

    qdict_put(options, "path", qstring_from_str(uri->path));

@@ -254,15 +259,31 @@ static int parse_uri(const char *filename, QDict *options, Error **errp)
    return -EINVAL;
 }

+static bool ssh_has_filename_options_conflict(QDict *options, Error **errp)
+{
+    const QDictEntry *qe;
+
+    for (qe = qdict_first(options); qe; qe = qdict_next(options, qe)) {
+        if (!strcmp(qe->key, "host") ||
+            !strcmp(qe->key, "port") ||
+            !strcmp(qe->key, "path") ||
+            !strcmp(qe->key, "user") ||
+            !strcmp(qe->key, "host_key_check") ||
+            strstart(qe->key, "server.", NULL))
+        {
+            error_setg(errp, "Option '%s' cannot be used with a file name",
+                       qe->key);
+            return true;
+        }
+    }
+
+    return false;
+}
+
 static void ssh_parse_filename(const char *filename, QDict *options,
                               Error **errp)
 {
-    if (qdict_haskey(options, "user") ||
-        qdict_haskey(options, "host") ||
-        qdict_haskey(options, "port") ||
-        qdict_haskey(options, "path") ||
-        qdict_haskey(options, "host_key_check")) {
-        error_setg(errp, "user, host, port, path, host_key_check cannot be used at the same time as a file option");
+    if (ssh_has_filename_options_conflict(options, errp)) {
        return;
    }

@@ -540,14 +561,68 @@ static QemuOptsList ssh_runtime_opts = {
    },
 };

+static bool ssh_process_legacy_socket_options(QDict *output_opts,
+                                              QemuOpts *legacy_opts,
+                                              Error **errp)
+{
+    const char *host = qemu_opt_get(legacy_opts, "host");
+    const char *port = qemu_opt_get(legacy_opts, "port");
+
+    if (!host && port) {
+        error_setg(errp, "port may not be used without host");
+        return false;
+    }
+
+    if (host) {
+        qdict_put(output_opts, "server.host", qstring_from_str(host));
+        qdict_put(output_opts, "server.port",
+                  qstring_from_str(port ?: stringify(22)));
+    }
+
+    return true;
+}
+
+static InetSocketAddress *ssh_config(QDict *options, Error **errp)
+{
+    InetSocketAddress *inet = NULL;
+    QDict *addr = NULL;
+    QObject *crumpled_addr = NULL;
+    Visitor *iv = NULL;
+    Error *local_error = NULL;
+
+    qdict_extract_subqdict(options, &addr, "server.");
+    if (!qdict_size(addr)) {
+        error_setg(errp, "SSH server address missing");
+        goto out;
+    }
+
+    crumpled_addr = qdict_crumple(addr, errp);
+    if (!crumpled_addr) {
+        goto out;
+    }
+
+    iv = qobject_input_visitor_new(crumpled_addr, true);
+    visit_type_InetSocketAddress(iv, NULL, &inet, &local_error);
+    if (local_error) {
+        error_propagate(errp, local_error);
+        goto out;
+    }
+
+out:
+    QDECREF(addr);
+    qobject_decref(crumpled_addr);
+    visit_free(iv);
+    return inet;
+}
+
 static int connect_to_ssh(BDRVSSHState *s, QDict *options,
                          int ssh_flags, int creat_mode, Error **errp)
 {
    int r, ret;
    QemuOpts *opts = NULL;
    Error *local_err = NULL;
-    const char *host, *user, *path, *host_key_check;
-    int port;
+    const char *user, *path, *host_key_check;
+    long port = 0;

    opts = qemu_opts_create(&ssh_runtime_opts, NULL, 0, &error_abort);
    qemu_opts_absorb_qdict(opts, options, &local_err);
@@ -557,15 +632,11 @@ static int connect_to_ssh(BDRVSSHState *s, QDict *options,
        goto err;
    }

-    host = qemu_opt_get(opts, "host");
-    if (!host) {
+    if (!ssh_process_legacy_socket_options(options, opts, errp)) {
        ret = -EINVAL;
-        error_setg(errp, "No hostname was specified");
        goto err;
    }

-    port = qemu_opt_get_number(opts, "port", 22);
-
    path = qemu_opt_get(opts, "path");
    if (!path) {
        ret = -EINVAL;
@@ -588,12 +659,21 @@ static int connect_to_ssh(BDRVSSHState *s, QDict *options,
        host_key_check = "yes";
    }

-    /* Construct the host:port name for inet_connect. */
-    g_free(s->hostport);
-    s->hostport = g_strdup_printf("%s:%d", host, port);
+    /* Pop the config into our state object, Exit if invalid */
+    s->inet = ssh_config(options, errp);
+    if (!s->inet) {
+        ret = -EINVAL;
+        goto err;
+    }
+
+    if (qemu_strtol(s->inet->port, NULL, 10, &port) < 0) {
+        error_setg(errp, "Use only numeric port value");
+        ret = -EINVAL;
+        goto err;
+    }

    /* Open the socket and connect. */
-    s->sock = inet_connect(s->hostport, errp);
+    s->sock = inet_connect_saddr(s->inet, errp, NULL, NULL);
    if (s->sock < 0) {
        ret = -EIO;
        goto err;
@@ -619,7 +699,8 @@ static int connect_to_ssh(BDRVSSHState *s, QDict *options,
    }

    /* Check the remote host's key against known_hosts. */
-    ret = check_host_key(s, host, port, host_key_check, errp);
+    ret = check_host_key(s, s->inet->host, port, host_key_check,
+                         errp);
    if (ret < 0) {
        goto err;
    }
@@ -830,7 +911,7 @@ static coroutine_fn void set_fd_handler(BDRVSSHState *s, BlockDriverState *bs)
            rd_handler, wr_handler);

    aio_set_fd_handler(bdrv_get_aio_context(bs), s->sock,
-                       false, rd_handler, wr_handler, co);
+                       false, rd_handler, wr_handler, NULL, co);
 }

 static coroutine_fn void clear_fd_handler(BDRVSSHState *s,
@@ -838,7 +919,7 @@ static coroutine_fn void clear_fd_handler(BDRVSSHState *s,
 {
    DPRINTF("s->sock=%d", s->sock);
    aio_set_fd_handler(bdrv_get_aio_context(bs), s->sock,
-                       false, NULL, NULL, NULL);
+                       false, NULL, NULL, NULL, NULL);
 }

 /* A non-blocking call returned EAGAIN, so yield, ensuring the
@@ -1040,7 +1121,7 @@ static void unsafe_flush_warning(BDRVSSHState *s, const char *what)
 {
    if (!s->unsafe_flush_warning) {
        error_report("warning: ssh server %s does not support fsync",
-                     s->hostport);
+                     s->inet->host);
        if (what) {
            error_report("to support fsync, you need %s", what);
        }
--- a/block/stream.c
+++ b/block/stream.c
@@ -14,7 +14,7 @@
 #include "qemu/osdep.h"
 #include "trace.h"
 #include "block/block_int.h"
-#include "block/blockjob.h"
+#include "block/blockjob_int.h"
 #include "qapi/error.h"
 #include "qapi/qmp/qerror.h"
 #include "qemu/ratelimit.h"
@@ -37,6 +37,7 @@ typedef struct StreamBlockJob {
    BlockDriverState *base;
    BlockdevOnError on_error;
    char *backing_file_str;
+    int bs_flags;
 } StreamBlockJob;

 static int coroutine_fn stream_populate(BlockBackend *blk,
@@ -81,6 +82,11 @@ static void stream_complete(BlockJob *job, void *opaque)
        bdrv_set_backing_hd(bs, base);
    }

+    /* Reopen the image back in read-only mode if necessary */
+    if (s->bs_flags != bdrv_get_flags(bs)) {
+        bdrv_reopen(bs, s->bs_flags, NULL);
+    }
+
    g_free(s->backing_file_str);
    block_job_completed(&s->common, data->ret);
    g_free(data);
@@ -212,26 +218,43 @@ static const BlockJobDriver stream_job_driver = {
    .instance_size = sizeof(StreamBlockJob),
    .job_type      = BLOCK_JOB_TYPE_STREAM,
    .set_speed     = stream_set_speed,
+    .start         = stream_run,
 };

 void stream_start(const char *job_id, BlockDriverState *bs,
                  BlockDriverState *base, const char *backing_file_str,
-                  int64_t speed, BlockdevOnError on_error,
-                  BlockCompletionFunc *cb, void *opaque, Error **errp)
+                  int64_t speed, BlockdevOnError on_error, Error **errp)
 {
    StreamBlockJob *s;
+    BlockDriverState *iter;
+    int orig_bs_flags;

    s = block_job_create(job_id, &stream_job_driver, bs, speed,
-                         cb, opaque, errp);
+                         BLOCK_JOB_DEFAULT, NULL, NULL, errp);
    if (!s) {
        return;
    }

+    /* Make sure that the image is opened in read-write mode */
+    orig_bs_flags = bdrv_get_flags(bs);
+    if (!(orig_bs_flags & BDRV_O_RDWR)) {
+        if (bdrv_reopen(bs, orig_bs_flags | BDRV_O_RDWR, errp) != 0) {
+            block_job_unref(&s->common);
+            return;
+        }
+    }
+
+    /* Block all intermediate nodes between bs and base, because they
+     * will disappear from the chain after this operation */
+    for (iter = backing_bs(bs); iter && iter != base; iter = backing_bs(iter)) {
+        block_job_add_bdrv(&s->common, iter);
+    }
+
    s->base = base;
    s->backing_file_str = g_strdup(backing_file_str);
+    s->bs_flags = orig_bs_flags;

    s->on_error = on_error;
-    s->common.co = qemu_coroutine_create(stream_run, s);
-    trace_stream_start(bs, base, s, s->common.co, opaque);
-    qemu_coroutine_enter(s->common.co);
+    trace_stream_start(bs, base, s);
+    block_job_start(&s->common);
 }
--- a/block/trace-events
+++ b/block/trace-events
@@ -9,7 +9,6 @@ blk_co_preadv(void *blk, void *bs, int64_t offset, unsigned int bytes, int flags
 blk_co_pwritev(void *blk, void *bs, int64_t offset, unsigned int bytes, int flags) "blk %p bs %p offset %"PRId64" bytes %u flags %x"

 # block/io.c
-bdrv_aio_pdiscard(void *bs, int64_t offset, int count, void *opaque) "bs %p offset %"PRId64" count %d opaque %p"
 bdrv_aio_flush(void *bs, void *opaque) "bs %p opaque %p"
 bdrv_aio_readv(void *bs, int64_t sector_num, int nb_sectors, void *opaque) "bs %p sector_num %"PRId64" nb_sectors %d opaque %p"
 bdrv_aio_writev(void *bs, int64_t sector_num, int nb_sectors, void *opaque) "bs %p sector_num %"PRId64" nb_sectors %d opaque %p"
@@ -20,14 +19,14 @@ bdrv_co_do_copy_on_readv(void *bs, int64_t offset, unsigned int bytes, int64_t c

 # block/stream.c
 stream_one_iteration(void *s, int64_t sector_num, int nb_sectors, int is_allocated) "s %p sector_num %"PRId64" nb_sectors %d is_allocated %d"
-stream_start(void *bs, void *base, void *s, void *co, void *opaque) "bs %p base %p s %p co %p opaque %p"
+stream_start(void *bs, void *base, void *s) "bs %p base %p s %p"

 # block/commit.c
 commit_one_iteration(void *s, int64_t sector_num, int nb_sectors, int is_allocated) "s %p sector_num %"PRId64" nb_sectors %d is_allocated %d"
-commit_start(void *bs, void *base, void *top, void *s, void *co, void *opaque) "bs %p base %p top %p s %p co %p opaque %p"
+commit_start(void *bs, void *base, void *top, void *s) "bs %p base %p top %p s %p"

 # block/mirror.c
-mirror_start(void *bs, void *s, void *co, void *opaque) "bs %p s %p co %p opaque %p"
+mirror_start(void *bs, void *s, void *opaque) "bs %p s %p opaque %p"
 mirror_restart_iter(void *s, int64_t cnt) "s %p dirty count %"PRId64
 mirror_before_flush(void *s) "s %p"
 mirror_before_drain(void *s, int64_t cnt) "s %p dirty count %"PRId64
@@ -36,8 +35,6 @@ mirror_one_iteration(void *s, int64_t sector_num, int nb_sectors) "s %p sector_n
 mirror_iteration_done(void *s, int64_t sector_num, int nb_sectors, int ret) "s %p sector_num %"PRId64" nb_sectors %d ret %d"
 mirror_yield(void *s, int64_t cnt, int buf_free_count, int in_flight) "s %p dirty count %"PRId64" free buffers %d in_flight %d"
 mirror_yield_in_flight(void *s, int64_t sector_num, int in_flight) "s %p sector_num %"PRId64" in_flight %d"
-mirror_yield_buf_busy(void *s, int nb_chunks, int in_flight) "s %p requested chunks %d in_flight %d"
-mirror_break_buf_busy(void *s, int nb_chunks, int in_flight) "s %p requested chunks %d in_flight %d"

 # block/backup.c
 backup_do_cow_enter(void *job, int64_t start, int64_t sector_num, int nb_sectors) "job %p start %"PRId64" sector_num %"PRId64" nb_sectors %d"
@@ -52,11 +49,10 @@ qmp_block_job_cancel(void *job) "job %p"
 qmp_block_job_pause(void *job) "job %p"
 qmp_block_job_resume(void *job) "job %p"
 qmp_block_job_complete(void *job) "job %p"
-block_job_cb(void *bs, void *job, int ret) "bs %p job %p ret %d"
 qmp_block_stream(void *bs, void *job) "bs %p job %p"

-# block/raw-win32.c
-# block/raw-posix.c
+# block/file-win32.c
+# block/file-posix.c
 paio_submit_co(int64_t offset, int count, int type) "offset %"PRId64" count %d type %d"
 paio_submit(void *acb, void *opaque, int64_t offset, int count, int type) "acb %p opaque %p offset %"PRId64" count %d type %d"

--- a/block/vdi.c
+++ b/block/vdi.c
@@ -361,6 +361,7 @@ static int vdi_open(BlockDriverState *bs, QDict *options, int flags,
    VdiHeader header;
    size_t bmap_size;
    int ret;
+    Error *local_err = NULL;

    logout("\n");

@@ -471,7 +472,12 @@ static int vdi_open(BlockDriverState *bs, QDict *options, int flags,
    error_setg(&s->migration_blocker, "The vdi format used by node '%s' "
               "does not support live migration",
               bdrv_get_device_or_node_name(bs));
-    migrate_add_blocker(s->migration_blocker);
+    ret = migrate_add_blocker(s->migration_blocker, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        error_free(s->migration_blocker);
+        goto fail_free_bmap;
+    }

    qemu_co_mutex_init(&s->write_lock);

--- a/block/vhdx.c
+++ b/block/vhdx.c
@@ -991,6 +991,17 @@ static int vhdx_open(BlockDriverState *bs, QDict *options, int flags,
        }
    }

+    /* Disable migration when VHDX images are used */
+    error_setg(&s->migration_blocker, "The vhdx format used by node '%s' "
+               "does not support live migration",
+               bdrv_get_device_or_node_name(bs));
+    ret = migrate_add_blocker(s->migration_blocker, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        error_free(s->migration_blocker);
+        goto fail;
+    }
+
    if (flags & BDRV_O_RDWR) {
        ret = vhdx_update_headers(bs, s, false, NULL);
        if (ret < 0) {
@@ -1000,12 +1011,6 @@ static int vhdx_open(BlockDriverState *bs, QDict *options, int flags,

    /* TODO: differencing files */

-    /* Disable migration when VHDX images are used */
-    error_setg(&s->migration_blocker, "The vhdx format used by node '%s' "
-               "does not support live migration",
-               bdrv_get_device_or_node_name(bs));
-    migrate_add_blocker(s->migration_blocker);
-
    return 0;
 fail:
    vhdx_close(bs);
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -941,6 +941,7 @@ static int vmdk_open(BlockDriverState *bs, QDict *options, int flags,
    int ret;
    BDRVVmdkState *s = bs->opaque;
    uint32_t magic;
+    Error *local_err = NULL;

    buf = vmdk_read_desc(bs->file, 0, errp);
    if (!buf) {
@@ -976,7 +977,13 @@ static int vmdk_open(BlockDriverState *bs, QDict *options, int flags,
    error_setg(&s->migration_blocker, "The vmdk format used by node '%s' "
               "does not support live migration",
               bdrv_get_device_or_node_name(bs));
-    migrate_add_blocker(s->migration_blocker);
+    ret = migrate_add_blocker(s->migration_blocker, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        error_free(s->migration_blocker);
+        goto fail;
+    }
+
    g_free(buf);
    return 0;

--- a/block/vpc.c
+++ b/block/vpc.c
@@ -422,13 +422,18 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
 #endif
    }

-    qemu_co_mutex_init(&s->lock);
-
    /* Disable migration when VHD images are used */
    error_setg(&s->migration_blocker, "The vpc format used by node '%s' "
               "does not support live migration",
               bdrv_get_device_or_node_name(bs));
-    migrate_add_blocker(s->migration_blocker);
+    ret = migrate_add_blocker(s->migration_blocker, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        error_free(s->migration_blocker);
+        goto fail;
+    }
+
+    qemu_co_mutex_init(&s->lock);

    return 0;

--- a/block/vvfat.c
+++ b/block/vvfat.c
@@ -1185,22 +1185,26 @@ static int vvfat_open(BlockDriverState *bs, QDict *options, int flags,

    s->sector_count = s->faked_sectors + s->sectors_per_cluster*s->cluster_count;

-    if (s->first_sectors_number == 0x40) {
-        init_mbr(s, cyls, heads, secs);
-    }
-
-    //    assert(is_consistent(s));
-    qemu_co_mutex_init(&s->lock);
-
    /* Disable migration when vvfat is used rw */
    if (s->qcow) {
        error_setg(&s->migration_blocker,
                   "The vvfat (rw) format used by node '%s' "
                   "does not support live migration",
                   bdrv_get_device_or_node_name(bs));
-        migrate_add_blocker(s->migration_blocker);
+        ret = migrate_add_blocker(s->migration_blocker, &local_err);
+        if (local_err) {
+            error_propagate(errp, local_err);
+            error_free(s->migration_blocker);
+            goto fail;
+        }
    }

+    if (s->first_sectors_number == 0x40) {
+        init_mbr(s, cyls, heads, secs);
+    }
+
+    qemu_co_mutex_init(&s->lock);
+
    ret = 0;
 fail:
    qemu_opts_del(opts);
--- a/block/win32-aio.c
+++ b/block/win32-aio.c
@@ -175,7 +175,7 @@ int win32_aio_attach(QEMUWin32AIOState *aio, HANDLE hfile)
 void win32_aio_detach_aio_context(QEMUWin32AIOState *aio,
                                  AioContext *old_context)
 {
-    aio_set_event_notifier(old_context, &aio->e, false, NULL);
+    aio_set_event_notifier(old_context, &aio->e, false, NULL, NULL);
    aio->is_aio_context_attached = false;
 }

@@ -184,7 +184,7 @@ void win32_aio_attach_aio_context(QEMUWin32AIOState *aio,
 {
    aio->is_aio_context_attached = true;
    aio_set_event_notifier(new_context, &aio->e, false,
-                           win32_aio_completion_cb);
+                           win32_aio_completion_cb, NULL);
 }

 QEMUWin32AIOState *win32_aio_init(void)
--- a/blockdev-nbd.c
+++ b/blockdev-nbd.c
@@ -16,7 +16,6 @@
 #include "qapi/qmp/qerror.h"
 #include "sysemu/sysemu.h"
 #include "qmp-commands.h"
-#include "trace.h"
 #include "block/nbd.h"
 #include "io/channel-socket.h"

@@ -44,6 +43,7 @@ static gboolean nbd_accept(QIOChannel *ioc, GIOCondition condition,
        return TRUE;
    }

+    qio_channel_set_name(QIO_CHANNEL(cioc), "nbd-server");
    nbd_client_new(NULL, cioc,
                   nbd_server->tlscreds, NULL,
                   nbd_client_put);
@@ -111,6 +111,8 @@ void qmp_nbd_server_start(SocketAddress *addr,
    nbd_server = g_new0(NBDServerData, 1);
    nbd_server->watch = -1;
    nbd_server->listen_ioc = qio_channel_socket_new();
+    qio_channel_set_name(QIO_CHANNEL(nbd_server->listen_ioc),
+                         "nbd-listener");
    if (qio_channel_socket_listen_sync(
            nbd_server->listen_ioc, addr, errp) < 0) {
        goto error;
--- a/blockdev.c
+++ b/blockdev.c
@@ -48,7 +48,7 @@
 #include "sysemu/sysemu.h"
 #include "block/block_int.h"
 #include "qmp-commands.h"
-#include "trace.h"
+#include "block/trace.h"
 #include "sysemu/arch_init.h"
 #include "qemu/cutils.h"
 #include "qemu/help_option.h"
@@ -1811,7 +1811,7 @@ typedef struct DriveBackupState {
    BlockJob *job;
 } DriveBackupState;

-static void do_drive_backup(DriveBackup *backup, BlockJobTxn *txn,
+static BlockJob *do_drive_backup(DriveBackup *backup, BlockJobTxn *txn,
                            Error **errp);

 static void drive_backup_prepare(BlkActionState *common, Error **errp)
@@ -1835,23 +1835,26 @@ static void drive_backup_prepare(BlkActionState *common, Error **errp)
    bdrv_drained_begin(bs);
    state->bs = bs;

-    do_drive_backup(backup, common->block_job_txn, &local_err);
+    state->job = do_drive_backup(backup, common->block_job_txn, &local_err);
    if (local_err) {
        error_propagate(errp, local_err);
        return;
    }
+}

-    state->job = state->bs->job;
+static void drive_backup_commit(BlkActionState *common)
+{
+    DriveBackupState *state = DO_UPCAST(DriveBackupState, common, common);
+    assert(state->job);
+    block_job_start(state->job);
 }

 static void drive_backup_abort(BlkActionState *common)
 {
    DriveBackupState *state = DO_UPCAST(DriveBackupState, common, common);
-    BlockDriverState *bs = state->bs;

-    /* Only cancel if it's the job we started */
-    if (bs && bs->job && bs->job == state->job) {
-        block_job_cancel_sync(bs->job);
+    if (state->job) {
+        block_job_cancel_sync(state->job);
    }
 }

@@ -1872,8 +1875,8 @@ typedef struct BlockdevBackupState {
    AioContext *aio_context;
 } BlockdevBackupState;

-static void do_blockdev_backup(BlockdevBackup *backup, BlockJobTxn *txn,
-                               Error **errp);
+static BlockJob *do_blockdev_backup(BlockdevBackup *backup, BlockJobTxn *txn,
+                                    Error **errp);

 static void blockdev_backup_prepare(BlkActionState *common, Error **errp)
 {
@@ -1906,23 +1909,26 @@ static void blockdev_backup_prepare(BlkActionState *common, Error **errp)
    state->bs = bs;
    bdrv_drained_begin(state->bs);

-    do_blockdev_backup(backup, common->block_job_txn, &local_err);
+    state->job = do_blockdev_backup(backup, common->block_job_txn, &local_err);
    if (local_err) {
        error_propagate(errp, local_err);
        return;
    }
+}

-    state->job = state->bs->job;
+static void blockdev_backup_commit(BlkActionState *common)
+{
+    BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, common);
+    assert(state->job);
+    block_job_start(state->job);
 }

 static void blockdev_backup_abort(BlkActionState *common)
 {
    BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, common);
-    BlockDriverState *bs = state->bs;

-    /* Only cancel if it's the job we started */
-    if (bs && bs->job && bs->job == state->job) {
-        block_job_cancel_sync(bs->job);
+    if (state->job) {
+        block_job_cancel_sync(state->job);
    }
 }

@@ -2072,12 +2078,14 @@ static const BlkActionOps actions[] = {
    [TRANSACTION_ACTION_KIND_DRIVE_BACKUP] = {
        .instance_size = sizeof(DriveBackupState),
        .prepare = drive_backup_prepare,
+        .commit = drive_backup_commit,
        .abort = drive_backup_abort,
        .clean = drive_backup_clean,
    },
    [TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP] = {
        .instance_size = sizeof(BlockdevBackupState),
        .prepare = blockdev_backup_prepare,
+        .commit = blockdev_backup_commit,
        .abort = blockdev_backup_abort,
        .clean = blockdev_backup_clean,
    },
@@ -2905,39 +2913,15 @@ out:
    aio_context_release(aio_context);
 }

-static void block_job_cb(void *opaque, int ret)
-{
-    /* Note that this function may be executed from another AioContext besides
-     * the QEMU main loop.  If you need to access anything that assumes the
-     * QEMU global mutex, use a BH or introduce a mutex.
-     */
-
-    BlockDriverState *bs = opaque;
-    const char *msg = NULL;
-
-    trace_block_job_cb(bs, bs->job, ret);
-
-    assert(bs->job);
-
-    if (ret < 0) {
-        msg = strerror(-ret);
-    }
-
-    if (block_job_is_cancelled(bs->job)) {
-        block_job_event_cancelled(bs->job);
-    } else {
-        block_job_event_completed(bs->job, msg);
-    }
-}
-
 void qmp_block_stream(bool has_job_id, const char *job_id, const char *device,
                      bool has_base, const char *base,
+                      bool has_base_node, const char *base_node,
                      bool has_backing_file, const char *backing_file,
                      bool has_speed, int64_t speed,
                      bool has_on_error, BlockdevOnError on_error,
                      Error **errp)
 {
-    BlockDriverState *bs;
+    BlockDriverState *bs, *iter;
    BlockDriverState *base_bs = NULL;
    AioContext *aio_context;
    Error *local_err = NULL;
@@ -2947,7 +2931,7 @@ void qmp_block_stream(bool has_job_id, const char *job_id, const char *device,
        on_error = BLOCKDEV_ON_ERROR_REPORT;
    }

-    bs = qmp_get_root_bs(device, errp);
+    bs = bdrv_lookup_bs(device, device, errp);
    if (!bs) {
        return;
    }
@@ -2955,7 +2939,9 @@ void qmp_block_stream(bool has_job_id, const char *job_id, const char *device,
    aio_context = bdrv_get_aio_context(bs);
    aio_context_acquire(aio_context);

-    if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_STREAM, errp)) {
+    if (has_base && has_base_node) {
+        error_setg(errp, "'base' and 'base-node' cannot be specified "
+                   "at the same time");
        goto out;
    }

@@ -2969,6 +2955,27 @@ void qmp_block_stream(bool has_job_id, const char *job_id, const char *device,
        base_name = base;
    }

+    if (has_base_node) {
+        base_bs = bdrv_lookup_bs(NULL, base_node, errp);
+        if (!base_bs) {
+            goto out;
+        }
+        if (bs == base_bs || !bdrv_chain_contains(bs, base_bs)) {
+            error_setg(errp, "Node '%s' is not a backing image of '%s'",
+                       base_node, device);
+            goto out;
+        }
+        assert(bdrv_get_aio_context(base_bs) == aio_context);
+        base_name = base_bs->filename;
+    }
+
+    /* Check for op blockers in the whole chain between bs and base */
+    for (iter = bs; iter && iter != base_bs; iter = backing_bs(iter)) {
+        if (bdrv_op_is_blocked(iter, BLOCK_OP_TYPE_STREAM, errp)) {
+            goto out;
+        }
+    }
+
    /* if we are streaming the entire chain, the result will have no backing
     * file, and specifying one is therefore an error */
    if (base_bs == NULL && has_backing_file) {
@@ -2981,7 +2988,7 @@ void qmp_block_stream(bool has_job_id, const char *job_id, const char *device,
    base_name = has_backing_file ? backing_file : base_name;

    stream_start(has_job_id ? job_id : NULL, bs, base_bs, base_name,
-                 has_speed ? speed : 0, on_error, block_job_cb, bs, &local_err);
+                 has_speed ? speed : 0, on_error, &local_err);
    if (local_err) {
        error_propagate(errp, local_err);
        goto out;
@@ -3001,6 +3008,7 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device,
                      Error **errp)
 {
    BlockDriverState *bs;
+    BlockDriverState *iter;
    BlockDriverState *base_bs, *top_bs;
    AioContext *aio_context;
    Error *local_err = NULL;
@@ -3067,8 +3075,10 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device,

    assert(bdrv_get_aio_context(base_bs) == aio_context);

-    if (bdrv_op_is_blocked(base_bs, BLOCK_OP_TYPE_COMMIT_TARGET, errp)) {
-        goto out;
+    for (iter = top_bs; iter != backing_bs(base_bs); iter = backing_bs(iter)) {
+        if (bdrv_op_is_blocked(iter, BLOCK_OP_TYPE_COMMIT_TARGET, errp)) {
+            goto out;
+        }
    }

    /* Do not allow attempts to commit an image into itself */
@@ -3083,12 +3093,17 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device,
                             " but 'top' is the active layer");
            goto out;
        }
-        commit_active_start(has_job_id ? job_id : NULL, bs, base_bs, speed,
-                            on_error, block_job_cb, bs, &local_err, false);
+        commit_active_start(has_job_id ? job_id : NULL, bs, base_bs,
+                            BLOCK_JOB_DEFAULT, speed, on_error, NULL, NULL,
+                            &local_err, false);
    } else {
+        BlockDriverState *overlay_bs = bdrv_find_overlay(bs, top_bs);
+        if (bdrv_op_is_blocked(overlay_bs, BLOCK_OP_TYPE_COMMIT_TARGET, errp)) {
+            goto out;
+        }
        commit_start(has_job_id ? job_id : NULL, bs, base_bs, top_bs, speed,
-                     on_error, block_job_cb, bs,
-                     has_backing_file ? backing_file : NULL, &local_err);
+                     on_error, has_backing_file ? backing_file : NULL,
+                     &local_err);
    }
    if (local_err != NULL) {
        error_propagate(errp, local_err);
@@ -3099,11 +3114,13 @@ out:
    aio_context_release(aio_context);
 }

-static void do_drive_backup(DriveBackup *backup, BlockJobTxn *txn, Error **errp)
+static BlockJob *do_drive_backup(DriveBackup *backup, BlockJobTxn *txn,
+                                 Error **errp)
 {
    BlockDriverState *bs;
    BlockDriverState *target_bs;
    BlockDriverState *source = NULL;
+    BlockJob *job = NULL;
    BdrvDirtyBitmap *bmap = NULL;
    AioContext *aio_context;
    QDict *options = NULL;
@@ -3132,7 +3149,7 @@ static void do_drive_backup(DriveBackup *backup, BlockJobTxn *txn, Error **errp)

    bs = qmp_get_root_bs(backup->device, errp);
    if (!bs) {
-        return;
+        return NULL;
    }

    aio_context = bdrv_get_aio_context(bs);
@@ -3206,9 +3223,10 @@ static void do_drive_backup(DriveBackup *backup, BlockJobTxn *txn, Error **errp)
        }
    }

-    backup_start(backup->job_id, bs, target_bs, backup->speed, backup->sync,
-                 bmap, backup->compress, backup->on_source_error,
-                 backup->on_target_error, block_job_cb, bs, txn, &local_err);
+    job = backup_job_create(backup->job_id, bs, target_bs, backup->speed,
+                            backup->sync, bmap, backup->compress,
+                            backup->on_source_error, backup->on_target_error,
+                            BLOCK_JOB_DEFAULT, NULL, NULL, txn, &local_err);
    bdrv_unref(target_bs);
    if (local_err != NULL) {
        error_propagate(errp, local_err);
@@ -3217,11 +3235,17 @@ static void do_drive_backup(DriveBackup *backup, BlockJobTxn *txn, Error **errp)

 out:
    aio_context_release(aio_context);
+    return job;
 }

 void qmp_drive_backup(DriveBackup *arg, Error **errp)
 {
-    return do_drive_backup(arg, NULL, errp);
+
+    BlockJob *job;
+    job = do_drive_backup(arg, NULL, errp);
+    if (job) {
+        block_job_start(job);
+    }
 }

 BlockDeviceInfoList *qmp_query_named_block_nodes(Error **errp)
@@ -3229,12 +3253,14 @@ BlockDeviceInfoList *qmp_query_named_block_nodes(Error **errp)
    return bdrv_named_nodes_list(errp);
 }

-void do_blockdev_backup(BlockdevBackup *backup, BlockJobTxn *txn, Error **errp)
+BlockJob *do_blockdev_backup(BlockdevBackup *backup, BlockJobTxn *txn,
+                             Error **errp)
 {
    BlockDriverState *bs;
    BlockDriverState *target_bs;
    Error *local_err = NULL;
    AioContext *aio_context;
+    BlockJob *job = NULL;

    if (!backup->has_speed) {
        backup->speed = 0;
@@ -3254,7 +3280,7 @@ void do_blockdev_backup(BlockdevBackup *backup, BlockJobTxn *txn, Error **errp)

    bs = qmp_get_root_bs(backup->device, errp);
    if (!bs) {
-        return;
+        return NULL;
    }

    aio_context = bdrv_get_aio_context(bs);
@@ -3276,19 +3302,25 @@ void do_blockdev_backup(BlockdevBackup *backup, BlockJobTxn *txn, Error **errp)
            goto out;
        }
    }
-    backup_start(backup->job_id, bs, target_bs, backup->speed, backup->sync,
-                 NULL, backup->compress, backup->on_source_error,
-                 backup->on_target_error, block_job_cb, bs, txn, &local_err);
+    job = backup_job_create(backup->job_id, bs, target_bs, backup->speed,
+                            backup->sync, NULL, backup->compress,
+                            backup->on_source_error, backup->on_target_error,
+                            BLOCK_JOB_DEFAULT, NULL, NULL, txn, &local_err);
    if (local_err != NULL) {
        error_propagate(errp, local_err);
    }
 out:
    aio_context_release(aio_context);
+    return job;
 }

 void qmp_blockdev_backup(BlockdevBackup *arg, Error **errp)
 {
-    do_blockdev_backup(arg, NULL, errp);
+    BlockJob *job;
+    job = do_blockdev_backup(arg, NULL, errp);
+    if (job) {
+        block_job_start(job);
+    }
 }

 /* Parameter check and block job starting for drive mirroring.
@@ -3357,8 +3389,7 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
    mirror_start(job_id, bs, target,
                 has_replaces ? replaces : NULL,
                 speed, granularity, buf_size, sync, backing_mode,
-                 on_source_error, on_target_error, unmap,
-                 block_job_cb, bs, errp);
+                 on_source_error, on_target_error, unmap, errp);
 }

 void qmp_drive_mirror(DriveMirror *arg, Error **errp)
@@ -3602,7 +3633,7 @@ void qmp_block_job_cancel(const char *device,
        force = false;
    }

-    if (job->user_paused && !force) {
+    if (block_job_user_paused(job) && !force) {
        error_setg(errp, "The block job for device '%s' is currently paused",
                   device);
        goto out;
@@ -3619,13 +3650,12 @@ void qmp_block_job_pause(const char *device, Error **errp)
    AioContext *aio_context;
    BlockJob *job = find_block_job(device, &aio_context, errp);

-    if (!job || job->user_paused) {
+    if (!job || block_job_user_paused(job)) {
        return;
    }

-    job->user_paused = true;
    trace_qmp_block_job_pause(job);
-    block_job_pause(job);
+    block_job_user_pause(job);
    aio_context_release(aio_context);
 }

@@ -3634,14 +3664,13 @@ void qmp_block_job_resume(const char *device, Error **errp)
    AioContext *aio_context;
    BlockJob *job = find_block_job(device, &aio_context, errp);

-    if (!job || !job->user_paused) {
+    if (!job || !block_job_user_paused(job)) {
        return;
    }

-    job->user_paused = false;
    trace_qmp_block_job_resume(job);
    block_job_iostatus_reset(job);
-    block_job_resume(job);
+    block_job_user_resume(job);
    aio_context_release(aio_context);
 }

@@ -3915,13 +3944,22 @@ BlockJobInfoList *qmp_query_block_jobs(Error **errp)
    BlockJob *job;

    for (job = block_job_next(NULL); job; job = block_job_next(job)) {
-        BlockJobInfoList *elem = g_new0(BlockJobInfoList, 1);
-        AioContext *aio_context = blk_get_aio_context(job->blk);
+        BlockJobInfoList *elem;
+        AioContext *aio_context;

+        if (block_job_is_internal(job)) {
+            continue;
+        }
+        elem = g_new0(BlockJobInfoList, 1);
+        aio_context = blk_get_aio_context(job->blk);
        aio_context_acquire(aio_context);
-        elem->value = block_job_query(job);
+        elem->value = block_job_query(job, errp);
        aio_context_release(aio_context);
-
+        if (!elem->value) {
+            g_free(elem);
+            qapi_free_BlockJobInfoList(head);
+            return NULL;
+        }
        *p_next = elem;
        p_next = &elem->next;
    }
--- a/blockjob.c
+++ b/blockjob.c
@@ -25,9 +25,8 @@

 #include "qemu/osdep.h"
 #include "qemu-common.h"
-#include "trace.h"
 #include "block/block.h"
-#include "block/blockjob.h"
+#include "block/blockjob_int.h"
 #include "block/block_int.h"
 #include "sysemu/block-backend.h"
 #include "qapi/qmp/qerror.h"
@@ -38,6 +37,9 @@
 #include "qemu/timer.h"
 #include "qapi-event.h"

+static void block_job_event_cancelled(BlockJob *job);
+static void block_job_event_completed(BlockJob *job, const char *msg);
+
 /* Transactional group of block jobs */
 struct BlockJobTxn {

@@ -66,7 +68,7 @@ BlockJob *block_job_get(const char *id)
    BlockJob *job;

    QLIST_FOREACH(job, &block_jobs, job_list) {
-        if (!strcmp(id, job->id)) {
+        if (job->id && !strcmp(id, job->id)) {
            return job;
        }
    }
@@ -74,17 +76,6 @@ BlockJob *block_job_get(const char *id)
    return NULL;
 }

-/* Normally the job runs in its BlockBackend's AioContext.  The exception is
- * block_job_defer_to_main_loop() where it runs in the QEMU main loop.  Code
- * that supports both cases uses this helper function.
- */
-static AioContext *block_job_get_aio_context(BlockJob *job)
-{
-    return job->deferred_to_main_loop ?
-           qemu_get_aio_context() :
-           blk_get_aio_context(job->blk);
-}
-
 static void block_job_attached_aio_context(AioContext *new_context,
                                           void *opaque)
 {
@@ -97,6 +88,17 @@ static void block_job_attached_aio_context(AioContext *new_context,
    block_job_resume(job);
 }

+static void block_job_drain(BlockJob *job)
+{
+    /* If job is !job->busy this kicks it into the next pause point. */
+    block_job_enter(job);
+
+    blk_drain(job->blk);
+    if (job->driver->drain) {
+        job->driver->drain(job);
+    }
+}
+
 static void block_job_detach_aio_context(void *opaque)
 {
    BlockJob *job = opaque;
@@ -106,31 +108,33 @@ static void block_job_detach_aio_context(void *opaque)

    block_job_pause(job);

-    if (!job->paused) {
-        /* If job is !job->busy this kicks it into the next pause point. */
-        block_job_enter(job);
-    }
    while (!job->paused && !job->completed) {
-        aio_poll(block_job_get_aio_context(job), true);
+        block_job_drain(job);
    }

    block_job_unref(job);
 }

+void block_job_add_bdrv(BlockJob *job, BlockDriverState *bs)
+{
+    job->nodes = g_slist_prepend(job->nodes, bs);
+    bdrv_ref(bs);
+    bdrv_op_block_all(bs, job->blocker);
+}
+
 void *block_job_create(const char *job_id, const BlockJobDriver *driver,
-                       BlockDriverState *bs, int64_t speed,
+                       BlockDriverState *bs, int64_t speed, int flags,
                       BlockCompletionFunc *cb, void *opaque, Error **errp)
 {
    BlockBackend *blk;
    BlockJob *job;

-    assert(cb);
    if (bs->job) {
        error_setg(errp, QERR_DEVICE_IN_USE, bdrv_get_device_name(bs));
        return NULL;
    }

-    if (job_id == NULL) {
+    if (job_id == NULL && !(flags & BLOCK_JOB_INTERNAL)) {
        job_id = bdrv_get_device_name(bs);
        if (!*job_id) {
            error_setg(errp, "An explicit job ID is required for this node");
@@ -138,14 +142,21 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver,
        }
    }

-    if (!id_wellformed(job_id)) {
-        error_setg(errp, "Invalid job ID '%s'", job_id);
-        return NULL;
-    }
+    if (job_id) {
+        if (flags & BLOCK_JOB_INTERNAL) {
+            error_setg(errp, "Cannot specify job ID for internal block job");
+            return NULL;
+        }

-    if (block_job_get(job_id)) {
-        error_setg(errp, "Job ID '%s' already in use", job_id);
-        return NULL;
+        if (!id_wellformed(job_id)) {
+            error_setg(errp, "Invalid job ID '%s'", job_id);
+            return NULL;
+        }
+
+        if (block_job_get(job_id)) {
+            error_setg(errp, "Job ID '%s' already in use", job_id);
+            return NULL;
+        }
    }

    blk = blk_new();
@@ -154,7 +165,7 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver,
    job = g_malloc0(driver->instance_size);
    error_setg(&job->blocker, "block device is in use by block job: %s",
               BlockJobType_lookup[driver->job_type]);
-    bdrv_op_block_all(bs, job->blocker);
+    block_job_add_bdrv(job, bs);
    bdrv_op_unblock(bs, BLOCK_OP_TYPE_DATAPLANE, job->blocker);

    job->driver        = driver;
@@ -162,7 +173,9 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver,
    job->blk           = blk;
    job->cb            = cb;
    job->opaque        = opaque;
-    job->busy          = true;
+    job->busy          = false;
+    job->paused        = true;
+    job->pause_count   = 1;
    job->refcnt        = 1;
    bs->job = job;

@@ -185,6 +198,28 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver,
    return job;
 }

+bool block_job_is_internal(BlockJob *job)
+{
+    return (job->id == NULL);
+}
+
+static bool block_job_started(BlockJob *job)
+{
+    return job->co;
+}
+
+void block_job_start(BlockJob *job)
+{
+    assert(job && !block_job_started(job) && job->paused &&
+           !job->busy && job->driver->start);
+    job->co = qemu_coroutine_create(job->driver->start, job);
+    if (--job->pause_count == 0) {
+        job->paused = false;
+        job->busy = true;
+        qemu_coroutine_enter(job->co);
+    }
+}
+
 void block_job_ref(BlockJob *job)
 {
    ++job->refcnt;
@@ -193,9 +228,15 @@ void block_job_ref(BlockJob *job)
 void block_job_unref(BlockJob *job)
 {
    if (--job->refcnt == 0) {
+        GSList *l;
        BlockDriverState *bs = blk_bs(job->blk);
        bs->job = NULL;
-        bdrv_op_unblock_all(bs, job->blocker);
+        for (l = job->nodes; l; l = l->next) {
+            bs = l->data;
+            bdrv_op_unblock_all(bs, job->blocker);
+            bdrv_unref(bs);
+        }
+        g_slist_free(job->nodes);
        blk_remove_aio_context_notifier(job->blk,
                                        block_job_attached_aio_context,
                                        block_job_detach_aio_context, job);
@@ -218,8 +259,29 @@ static void block_job_completed_single(BlockJob *job)
            job->driver->abort(job);
        }
    }
-    job->cb(job->opaque, job->ret);
+    if (job->driver->clean) {
+        job->driver->clean(job);
+    }
+
+    if (job->cb) {
+        job->cb(job->opaque, job->ret);
+    }
+
+    /* Emit events only if we actually started */
+    if (block_job_started(job)) {
+        if (block_job_is_cancelled(job)) {
+            block_job_event_cancelled(job);
+        } else {
+            const char *msg = NULL;
+            if (job->ret < 0) {
+                msg = strerror(-job->ret);
+            }
+            block_job_event_completed(job, msg);
+        }
+    }
+
    if (job->txn) {
+        QLIST_REMOVE(job, txn_list);
        block_job_txn_unref(job->txn);
    }
    block_job_unref(job);
@@ -321,7 +383,10 @@ void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp)

 void block_job_complete(BlockJob *job, Error **errp)
 {
-    if (job->pause_count || job->cancelled || !job->driver->complete) {
+    /* Should not be reachable via external interface for internal jobs */
+    assert(job->id);
+    if (job->pause_count || job->cancelled ||
+        !block_job_started(job) || !job->driver->complete) {
        error_setg(errp, "The active block job '%s' cannot be completed",
                   job->id);
        return;
@@ -335,13 +400,26 @@ void block_job_pause(BlockJob *job)
    job->pause_count++;
 }

+void block_job_user_pause(BlockJob *job)
+{
+    job->user_paused = true;
+    block_job_pause(job);
+}
+
 static bool block_job_should_pause(BlockJob *job)
 {
    return job->pause_count > 0;
 }

+bool block_job_user_paused(BlockJob *job)
+{
+    return job ? job->user_paused : 0;
+}
+
 void coroutine_fn block_job_pause_point(BlockJob *job)
 {
+    assert(job && block_job_started(job));
+
    if (!block_job_should_pause(job)) {
        return;
    }
@@ -376,6 +454,14 @@ void block_job_resume(BlockJob *job)
    block_job_enter(job);
 }

+void block_job_user_resume(BlockJob *job)
+{
+    if (job && job->user_paused && job->pause_count > 0) {
+        job->user_paused = false;
+        block_job_resume(job);
+    }
+}
+
 void block_job_enter(BlockJob *job)
 {
    if (job->co && !job->busy) {
@@ -385,9 +471,13 @@ void block_job_enter(BlockJob *job)

 void block_job_cancel(BlockJob *job)
 {
-    job->cancelled = true;
-    block_job_iostatus_reset(job);
-    block_job_enter(job);
+    if (block_job_started(job)) {
+        job->cancelled = true;
+        block_job_iostatus_reset(job);
+        block_job_enter(job);
+    } else {
+        block_job_completed(job, -ECANCELED);
+    }
 }

 bool block_job_is_cancelled(BlockJob *job)
@@ -413,14 +503,21 @@ static int block_job_finish_sync(BlockJob *job,
    assert(blk_bs(job->blk)->job == job);

    block_job_ref(job);
+
    finish(job, &local_err);
    if (local_err) {
        error_propagate(errp, local_err);
        block_job_unref(job);
        return -EBUSY;
    }
+    /* block_job_drain calls block_job_enter, and it should be enough to
+     * induce progress until the job completes or moves to the main thread.
+    */
+    while (!job->deferred_to_main_loop && !job->completed) {
+        block_job_drain(job);
+    }
    while (!job->completed) {
-        aio_poll(block_job_get_aio_context(job), true);
+        aio_poll(qemu_get_aio_context(), true);
    }
    ret = (job->cancelled && job->ret == 0) ? -ECANCELED : job->ret;
    block_job_unref(job);
@@ -494,9 +591,15 @@ void block_job_yield(BlockJob *job)
    block_job_pause_point(job);
 }

-BlockJobInfo *block_job_query(BlockJob *job)
+BlockJobInfo *block_job_query(BlockJob *job, Error **errp)
 {
-    BlockJobInfo *info = g_new0(BlockJobInfo, 1);
+    BlockJobInfo *info;
+
+    if (block_job_is_internal(job)) {
+        error_setg(errp, "Cannot query QEMU internal jobs");
+        return NULL;
+    }
+    info = g_new0(BlockJobInfo, 1);
    info->type      = g_strdup(BlockJobType_lookup[job->driver->job_type]);
    info->device    = g_strdup(job->id);
    info->len       = job->len;
@@ -517,8 +620,12 @@ static void block_job_iostatus_set_err(BlockJob *job, int error)
    }
 }

-void block_job_event_cancelled(BlockJob *job)
+static void block_job_event_cancelled(BlockJob *job)
 {
+    if (block_job_is_internal(job)) {
+        return;
+    }
+
    qapi_event_send_block_job_cancelled(job->driver->job_type,
                                        job->id,
                                        job->len,
@@ -527,8 +634,12 @@ void block_job_event_cancelled(BlockJob *job)
                                        &error_abort);
 }

-void block_job_event_completed(BlockJob *job, const char *msg)
+static void block_job_event_completed(BlockJob *job, const char *msg)
 {
+    if (block_job_is_internal(job)) {
+        return;
+    }
+
    qapi_event_send_block_job_completed(job->driver->job_type,
                                        job->id,
                                        job->len,
@@ -543,6 +654,10 @@ void block_job_event_ready(BlockJob *job)
 {
    job->ready = true;

+    if (block_job_is_internal(job)) {
+        return;
+    }
+
    qapi_event_send_block_job_ready(job->driver->job_type,
                                    job->id,
                                    job->len,
@@ -573,14 +688,15 @@ BlockErrorAction block_job_error_action(BlockJob *job, BlockdevOnError on_err,
    default:
        abort();
    }
-    qapi_event_send_block_job_error(job->id,
-                                    is_read ? IO_OPERATION_TYPE_READ :
-                                    IO_OPERATION_TYPE_WRITE,
-                                    action, &error_abort);
+    if (!block_job_is_internal(job)) {
+        qapi_event_send_block_job_error(job->id,
+                                        is_read ? IO_OPERATION_TYPE_READ :
+                                        IO_OPERATION_TYPE_WRITE,
+                                        action, &error_abort);
+    }
    if (action == BLOCK_ERROR_ACTION_STOP) {
        /* make the pause user visible, which will be resumed from QMP. */
-        job->user_paused = true;
-        block_job_pause(job);
+        block_job_user_pause(job);
        block_job_iostatus_set_err(job, error);
    }
    return action;
--- a/bsd-user/mmap.c
+++ b/bsd-user/mmap.c
@@ -42,6 +42,11 @@ void mmap_unlock(void)
    }
 }

+bool have_mmap_lock(void)
+{
+    return mmap_lock_count > 0 ? true : false;
+}
+
 /* Grab lock to make sure things are in a consistent state after fork().  */
 void mmap_fork_start(void)
 {
--- a/chardev/Makefile.objs
+++ b/chardev/Makefile.objs
@@ -0,0 +1,17 @@
+chardev-obj-y += char.o
+chardev-obj-$(CONFIG_WIN32) += char-console.o
+chardev-obj-$(CONFIG_POSIX) += char-fd.o
+chardev-obj-y += char-file.o
+chardev-obj-y += char-io.o
+chardev-obj-y += char-mux.o
+chardev-obj-y += char-null.o
+chardev-obj-$(CONFIG_POSIX) += char-parallel.o
+chardev-obj-y += char-pipe.o
+chardev-obj-$(CONFIG_POSIX) += char-pty.o
+chardev-obj-y += char-ringbuf.o
+chardev-obj-y += char-serial.o
+chardev-obj-y += char-socket.o
+chardev-obj-y += char-stdio.o
+chardev-obj-y += char-udp.o
+chardev-obj-$(CONFIG_WIN32) += char-win.o
+chardev-obj-$(CONFIG_WIN32) += char-win-stdio.o
--- a/chardev/char-console.c
+++ b/chardev/char-console.c
@@ -0,0 +1,53 @@
+/*
+ * QEMU System Emulator
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "qemu/osdep.h"
+#include "char-win.h"
+
+static void qemu_chr_open_win_con(Chardev *chr,
+                                  ChardevBackend *backend,
+                                  bool *be_opened,
+                                  Error **errp)
+{
+    qemu_chr_open_win_file(chr, GetStdHandle(STD_OUTPUT_HANDLE));
+}
+
+static void char_console_class_init(ObjectClass *oc, void *data)
+{
+    ChardevClass *cc = CHARDEV_CLASS(oc);
+
+    cc->open = qemu_chr_open_win_con;
+}
+
+static const TypeInfo char_console_type_info = {
+    .name = TYPE_CHARDEV_CONSOLE,
+    .parent = TYPE_CHARDEV_WIN,
+    .class_init = char_console_class_init,
+};
+
+static void register_types(void)
+{
+    type_register_static(&char_console_type_info);
+}
+
+type_init(register_types);
--- a/chardev/char-fd.c
+++ b/chardev/char-fd.c
@@ -0,0 +1,170 @@
+/*
+ * QEMU System Emulator
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "qemu/osdep.h"
+#include "qemu/sockets.h"
+#include "qapi/error.h"
+#include "qemu-common.h"
+#include "sysemu/char.h"
+#include "io/channel-file.h"
+
+#include "char-fd.h"
+#include "char-io.h"
+
+/* Called with chr_write_lock held.  */
+static int fd_chr_write(Chardev *chr, const uint8_t *buf, int len)
+{
+    FDChardev *s = FD_CHARDEV(chr);
+
+    return io_channel_send(s->ioc_out, buf, len);
+}
+
+static gboolean fd_chr_read(QIOChannel *chan, GIOCondition cond, void *opaque)
+{
+    Chardev *chr = CHARDEV(opaque);
+    FDChardev *s = FD_CHARDEV(opaque);
+    int len;
+    uint8_t buf[CHR_READ_BUF_LEN];
+    ssize_t ret;
+
+    len = sizeof(buf);
+    if (len > s->max_size) {
+        len = s->max_size;
+    }
+    if (len == 0) {
+        return TRUE;
+    }
+
+    ret = qio_channel_read(
+        chan, (gchar *)buf, len, NULL);
+    if (ret == 0) {
+        remove_fd_in_watch(chr);
+        qemu_chr_be_event(chr, CHR_EVENT_CLOSED);
+        return FALSE;
+    }
+    if (ret > 0) {
+        qemu_chr_be_write(chr, buf, ret);
+    }
+
+    return TRUE;
+}
+
+static int fd_chr_read_poll(void *opaque)
+{
+    Chardev *chr = CHARDEV(opaque);
+    FDChardev *s = FD_CHARDEV(opaque);
+
+    s->max_size = qemu_chr_be_can_write(chr);
+    return s->max_size;
+}
+
+static GSource *fd_chr_add_watch(Chardev *chr, GIOCondition cond)
+{
+    FDChardev *s = FD_CHARDEV(chr);
+    return qio_channel_create_watch(s->ioc_out, cond);
+}
+
+static void fd_chr_update_read_handler(Chardev *chr,
+                                       GMainContext *context)
+{
+    FDChardev *s = FD_CHARDEV(chr);
+
+    remove_fd_in_watch(chr);
+    if (s->ioc_in) {
+        chr->fd_in_tag = io_add_watch_poll(chr, s->ioc_in,
+                                           fd_chr_read_poll,
+                                           fd_chr_read, chr,
+                                           context);
+    }
+}
+
+static void char_fd_finalize(Object *obj)
+{
+    Chardev *chr = CHARDEV(obj);
+    FDChardev *s = FD_CHARDEV(obj);
+
+    remove_fd_in_watch(chr);
+    if (s->ioc_in) {
+        object_unref(OBJECT(s->ioc_in));
+    }
+    if (s->ioc_out) {
+        object_unref(OBJECT(s->ioc_out));
+    }
+
+    qemu_chr_be_event(chr, CHR_EVENT_CLOSED);
+}
+
+int qmp_chardev_open_file_source(char *src, int flags, Error **errp)
+{
+    int fd = -1;
+
+    TFR(fd = qemu_open(src, flags, 0666));
+    if (fd == -1) {
+        error_setg_file_open(errp, errno, src);
+    }
+    return fd;
+}
+
+/* open a character device to a unix fd */
+void qemu_chr_open_fd(Chardev *chr,
+                      int fd_in, int fd_out)
+{
+    FDChardev *s = FD_CHARDEV(chr);
+    char *name;
+
+    s->ioc_in = QIO_CHANNEL(qio_channel_file_new_fd(fd_in));
+    name = g_strdup_printf("chardev-file-in-%s", chr->label);
+    qio_channel_set_name(QIO_CHANNEL(s->ioc_in), name);
+    g_free(name);
+    s->ioc_out = QIO_CHANNEL(qio_channel_file_new_fd(fd_out));
+    name = g_strdup_printf("chardev-file-out-%s", chr->label);
+    qio_channel_set_name(QIO_CHANNEL(s->ioc_out), name);
+    g_free(name);
+    qemu_set_nonblock(fd_out);
+    s->chr = chr;
+}
+
+static void char_fd_class_init(ObjectClass *oc, void *data)
+{
+    ChardevClass *cc = CHARDEV_CLASS(oc);
+
+    cc->chr_add_watch = fd_chr_add_watch;
+    cc->chr_write = fd_chr_write;
+    cc->chr_update_read_handler = fd_chr_update_read_handler;
+}
+
+static const TypeInfo char_fd_type_info = {
+    .name = TYPE_CHARDEV_FD,
+    .parent = TYPE_CHARDEV,
+    .instance_size = sizeof(FDChardev),
+    .instance_finalize = char_fd_finalize,
+    .class_init = char_fd_class_init,
+    .abstract = true,
+};
+
+static void register_types(void)
+{
+    type_register_static(&char_fd_type_info);
+}
+
+type_init(register_types);
--- a/chardev/char-fd.h
+++ b/chardev/char-fd.h
@@ -0,0 +1,44 @@
+/*
+ * QEMU System Emulator
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef CHAR_FD_H
+#define CHAR_FD_H
+
+#include "io/channel.h"
+#include "sysemu/char.h"
+
+typedef struct FDChardev {
+    Chardev parent;
+    Chardev *chr;
+    QIOChannel *ioc_in, *ioc_out;
+    int max_size;
+} FDChardev;
+
+#define TYPE_CHARDEV_FD "chardev-fd"
+
+#define FD_CHARDEV(obj) OBJECT_CHECK(FDChardev, (obj), TYPE_CHARDEV_FD)
+
+void qemu_chr_open_fd(Chardev *chr, int fd_in, int fd_out);
+int qmp_chardev_open_file_source(char *src, int flags, Error **errp);
+
+#endif /* CHAR_FD_H */
--- a/chardev/char-file.c
+++ b/chardev/char-file.c
@@ -0,0 +1,139 @@
+/*
+ * QEMU System Emulator
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qemu-common.h"
+#include "sysemu/char.h"
+
+#ifdef _WIN32
+#include "char-win.h"
+#else
+#include "char-fd.h"
+#endif
+
+static void qmp_chardev_open_file(Chardev *chr,
+                                  ChardevBackend *backend,
+                                  bool *be_opened,
+                                  Error **errp)
+{
+    ChardevFile *file = backend->u.file.data;
+#ifdef _WIN32
+    HANDLE out;
+    DWORD accessmode;
+    DWORD flags;
+
+    if (file->has_in) {
+        error_setg(errp, "input file not supported");
+        return;
+    }
+
+    if (file->has_append && file->append) {
+        /* Append to file if it already exists. */
+        accessmode = FILE_GENERIC_WRITE & ~FILE_WRITE_DATA;
+        flags = OPEN_ALWAYS;
+    } else {
+        /* Truncate file if it already exists. */
+        accessmode = GENERIC_WRITE;
+        flags = CREATE_ALWAYS;
+    }
+
+    out = CreateFile(file->out, accessmode, FILE_SHARE_READ, NULL, flags,
+                     FILE_ATTRIBUTE_NORMAL, NULL);
+    if (out == INVALID_HANDLE_VALUE) {
+        error_setg(errp, "open %s failed", file->out);
+        return;
+    }
+
+    qemu_chr_open_win_file(chr, out);
+#else
+    int flags, in = -1, out;
+
+    flags = O_WRONLY | O_CREAT | O_BINARY;
+    if (file->has_append && file->append) {
+        flags |= O_APPEND;
+    } else {
+        flags |= O_TRUNC;
+    }
+
+    out = qmp_chardev_open_file_source(file->out, flags, errp);
+    if (out < 0) {
+        return;
+    }
+
+    if (file->has_in) {
+        flags = O_RDONLY;
+        in = qmp_chardev_open_file_source(file->in, flags, errp);
+        if (in < 0) {
+            qemu_close(out);
+            return;
+        }
+    }
+
+    qemu_chr_open_fd(chr, in, out);
+#endif
+}
+
+static void qemu_chr_parse_file_out(QemuOpts *opts, ChardevBackend *backend,
+                                    Error **errp)
+{
+    const char *path = qemu_opt_get(opts, "path");
+    ChardevFile *file;
+
+    backend->type = CHARDEV_BACKEND_KIND_FILE;
+    if (path == NULL) {
+        error_setg(errp, "chardev: file: no filename given");
+        return;
+    }
+    file = backend->u.file.data = g_new0(ChardevFile, 1);
+    qemu_chr_parse_common(opts, qapi_ChardevFile_base(file));
+    file->out = g_strdup(path);
+
+    file->has_append = true;
+    file->append = qemu_opt_get_bool(opts, "append", false);
+}
+
+static void char_file_class_init(ObjectClass *oc, void *data)
+{
+    ChardevClass *cc = CHARDEV_CLASS(oc);
+
+    cc->parse = qemu_chr_parse_file_out;
+    cc->open = qmp_chardev_open_file;
+}
+
+static const TypeInfo char_file_type_info = {
+    .name = TYPE_CHARDEV_FILE,
+#ifdef _WIN32
+    .parent = TYPE_CHARDEV_WIN,
+#else
+    .parent = TYPE_CHARDEV_FD,
+#endif
+    .class_init = char_file_class_init,
+};
+
+static void register_types(void)
+{
+    type_register_static(&char_file_type_info);
+}
+
+type_init(register_types);
--- a/chardev/char-io.c
+++ b/chardev/char-io.c
@@ -0,0 +1,192 @@
+/*
+ * QEMU System Emulator
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "qemu/osdep.h"
+#include "char-io.h"
+
+typedef struct IOWatchPoll {
+    GSource parent;
+
+    QIOChannel *ioc;
+    GSource *src;
+
+    IOCanReadHandler *fd_can_read;
+    GSourceFunc fd_read;
+    void *opaque;
+    GMainContext *context;
+} IOWatchPoll;
+
+static IOWatchPoll *io_watch_poll_from_source(GSource *source)
+{
+    return container_of(source, IOWatchPoll, parent);
+}
+
+static gboolean io_watch_poll_prepare(GSource *source,
+                                      gint *timeout)
+{
+    IOWatchPoll *iwp = io_watch_poll_from_source(source);
+    bool now_active = iwp->fd_can_read(iwp->opaque) > 0;
+    bool was_active = iwp->src != NULL;
+    if (was_active == now_active) {
+        return FALSE;
+    }
+
+    if (now_active) {
+        iwp->src = qio_channel_create_watch(
+            iwp->ioc, G_IO_IN | G_IO_ERR | G_IO_HUP | G_IO_NVAL);
+        g_source_set_callback(iwp->src, iwp->fd_read, iwp->opaque, NULL);
+        g_source_attach(iwp->src, iwp->context);
+    } else {
+        g_source_destroy(iwp->src);
+        g_source_unref(iwp->src);
+        iwp->src = NULL;
+    }
+    return FALSE;
+}
+
+static gboolean io_watch_poll_check(GSource *source)
+{
+    return FALSE;
+}
+
+static gboolean io_watch_poll_dispatch(GSource *source, GSourceFunc callback,
+                                       gpointer user_data)
+{
+    abort();
+}
+
+static void io_watch_poll_finalize(GSource *source)
+{
+    /* Due to a glib bug, removing the last reference to a source
+     * inside a finalize callback causes recursive locking (and a
+     * deadlock).  This is not a problem inside other callbacks,
+     * including dispatch callbacks, so we call io_remove_watch_poll
+     * to remove this source.  At this point, iwp->src must
+     * be NULL, or we would leak it.
+     *
+     * This would be solved much more elegantly by child sources,
+     * but we support older glib versions that do not have them.
+     */
+    IOWatchPoll *iwp = io_watch_poll_from_source(source);
+    assert(iwp->src == NULL);
+}
+
+static GSourceFuncs io_watch_poll_funcs = {
+    .prepare = io_watch_poll_prepare,
+    .check = io_watch_poll_check,
+    .dispatch = io_watch_poll_dispatch,
+    .finalize = io_watch_poll_finalize,
+};
+
+guint io_add_watch_poll(Chardev *chr,
+                        QIOChannel *ioc,
+                        IOCanReadHandler *fd_can_read,
+                        QIOChannelFunc fd_read,
+                        gpointer user_data,
+                        GMainContext *context)
+{
+    IOWatchPoll *iwp;
+    int tag;
+    char *name;
+
+    iwp = (IOWatchPoll *) g_source_new(&io_watch_poll_funcs,
+                                       sizeof(IOWatchPoll));
+    iwp->fd_can_read = fd_can_read;
+    iwp->opaque = user_data;
+    iwp->ioc = ioc;
+    iwp->fd_read = (GSourceFunc) fd_read;
+    iwp->src = NULL;
+    iwp->context = context;
+
+    name = g_strdup_printf("chardev-iowatch-%s", chr->label);
+    g_source_set_name((GSource *)iwp, name);
+    g_free(name);
+
+    tag = g_source_attach(&iwp->parent, context);
+    g_source_unref(&iwp->parent);
+    return tag;
+}
+
+static void io_remove_watch_poll(guint tag)
+{
+    GSource *source;
+    IOWatchPoll *iwp;
+
+    g_return_if_fail(tag > 0);
+
+    source = g_main_context_find_source_by_id(NULL, tag);
+    g_return_if_fail(source != NULL);
+
+    iwp = io_watch_poll_from_source(source);
+    if (iwp->src) {
+        g_source_destroy(iwp->src);
+        g_source_unref(iwp->src);
+        iwp->src = NULL;
+    }
+    g_source_destroy(&iwp->parent);
+}
+
+void remove_fd_in_watch(Chardev *chr)
+{
+    if (chr->fd_in_tag) {
+        io_remove_watch_poll(chr->fd_in_tag);
+        chr->fd_in_tag = 0;
+    }
+}
+
+int io_channel_send_full(QIOChannel *ioc,
+                         const void *buf, size_t len,
+                         int *fds, size_t nfds)
+{
+    size_t offset = 0;
+
+    while (offset < len) {
+        ssize_t ret = 0;
+        struct iovec iov = { .iov_base = (char *)buf + offset,
+                             .iov_len = len - offset };
+
+        ret = qio_channel_writev_full(
+            ioc, &iov, 1,
+            fds, nfds, NULL);
+        if (ret == QIO_CHANNEL_ERR_BLOCK) {
+            if (offset) {
+                return offset;
+            }
+
+            errno = EAGAIN;
+            return -1;
+        } else if (ret < 0) {
+            errno = EINVAL;
+            return -1;
+        }
+
+        offset += ret;
+    }
+
+    return offset;
+}
+
+int io_channel_send(QIOChannel *ioc, const void *buf, size_t len)
+{
+    return io_channel_send_full(ioc, buf, len, NULL, 0);
+}
--- a/chardev/char-io.h
+++ b/chardev/char-io.h
@@ -0,0 +1,46 @@
+/*
+ * QEMU System Emulator
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef CHAR_IO_H
+#define CHAR_IO_H
+
+#include "qemu-common.h"
+#include "io/channel.h"
+#include "sysemu/char.h"
+
+/* Can only be used for read */
+guint io_add_watch_poll(Chardev *chr,
+                        QIOChannel *ioc,
+                        IOCanReadHandler *fd_can_read,
+                        QIOChannelFunc fd_read,
+                        gpointer user_data,
+                        GMainContext *context);
+
+void remove_fd_in_watch(Chardev *chr);
+
+int io_channel_send(QIOChannel *ioc, const void *buf, size_t len);
+
+int io_channel_send_full(QIOChannel *ioc, const void *buf, size_t len,
+                         int *fds, size_t nfds);
+
+#endif /* CHAR_IO_H */
--- a/chardev/char-mux.c
+++ b/chardev/char-mux.c
@@ -0,0 +1,358 @@
+/*
+ * QEMU System Emulator
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qemu-common.h"
+#include "sysemu/char.h"
+#include "sysemu/block-backend.h"
+#include "char-mux.h"
+
+/* MUX driver for serial I/O splitting */
+
+/* Called with chr_write_lock held.  */
+static int mux_chr_write(Chardev *chr, const uint8_t *buf, int len)
+{
+    MuxChardev *d = MUX_CHARDEV(chr);
+    int ret;
+    if (!d->timestamps) {
+        ret = qemu_chr_fe_write(&d->chr, buf, len);
+    } else {
+        int i;
+
+        ret = 0;
+        for (i = 0; i < len; i++) {
+            if (d->linestart) {
+                char buf1[64];
+                int64_t ti;
+                int secs;
+
+                ti = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+                if (d->timestamps_start == -1) {
+                    d->timestamps_start = ti;
+                }
+                ti -= d->timestamps_start;
+                secs = ti / 1000;
+                snprintf(buf1, sizeof(buf1),
+                         "[%02d:%02d:%02d.%03d] ",
+                         secs / 3600,
+                         (secs / 60) % 60,
+                         secs % 60,
+                         (int)(ti % 1000));
+                /* XXX this blocks entire thread. Rewrite to use
+                 * qemu_chr_fe_write and background I/O callbacks */
+                qemu_chr_fe_write_all(&d->chr,
+                                      (uint8_t *)buf1, strlen(buf1));
+                d->linestart = 0;
+            }
+            ret += qemu_chr_fe_write(&d->chr, buf + i, 1);
+            if (buf[i] == '\n') {
+                d->linestart = 1;
+            }
+        }
+    }
+    return ret;
+}
+
+static const char * const mux_help[] = {
+    "% h    print this help\n\r",
+    "% x    exit emulator\n\r",
+    "% s    save disk data back to file (if -snapshot)\n\r",
+    "% t    toggle console timestamps\n\r",
+    "% b    send break (magic sysrq)\n\r",
+    "% c    switch between console and monitor\n\r",
+    "% %  sends %\n\r",
+    NULL
+};
+
+int term_escape_char = 0x01; /* ctrl-a is used for escape */
+static void mux_print_help(Chardev *chr)
+{
+    int i, j;
+    char ebuf[15] = "Escape-Char";
+    char cbuf[50] = "\n\r";
+
+    if (term_escape_char > 0 && term_escape_char < 26) {
+        snprintf(cbuf, sizeof(cbuf), "\n\r");
+        snprintf(ebuf, sizeof(ebuf), "C-%c", term_escape_char - 1 + 'a');
+    } else {
+        snprintf(cbuf, sizeof(cbuf),
+                 "\n\rEscape-Char set to Ascii: 0x%02x\n\r\n\r",
+                 term_escape_char);
+    }
+    /* XXX this blocks entire thread. Rewrite to use
+     * qemu_chr_fe_write and background I/O callbacks */
+    qemu_chr_write_all(chr, (uint8_t *)cbuf, strlen(cbuf));
+    for (i = 0; mux_help[i] != NULL; i++) {
+        for (j = 0; mux_help[i][j] != '\0'; j++) {
+            if (mux_help[i][j] == '%') {
+                qemu_chr_write_all(chr, (uint8_t *)ebuf, strlen(ebuf));
+            } else {
+                qemu_chr_write_all(chr, (uint8_t *)&mux_help[i][j], 1);
+            }
+        }
+    }
+}
+
+void mux_chr_send_event(MuxChardev *d, int mux_nr, int event)
+{
+    CharBackend *be = d->backends[mux_nr];
+
+    if (be && be->chr_event) {
+        be->chr_event(be->opaque, event);
+    }
+}
+
+static int mux_proc_byte(Chardev *chr, MuxChardev *d, int ch)
+{
+    if (d->term_got_escape) {
+        d->term_got_escape = 0;
+        if (ch == term_escape_char) {
+            goto send_char;
+        }
+        switch (ch) {
+        case '?':
+        case 'h':
+            mux_print_help(chr);
+            break;
+        case 'x':
+            {
+                 const char *term =  "QEMU: Terminated\n\r";
+                 qemu_chr_write_all(chr, (uint8_t *)term, strlen(term));
+                 exit(0);
+                 break;
+            }
+        case 's':
+            blk_commit_all();
+            break;
+        case 'b':
+            qemu_chr_be_event(chr, CHR_EVENT_BREAK);
+            break;
+        case 'c':
+            assert(d->mux_cnt > 0); /* handler registered with first fe */
+            /* Switch to the next registered device */
+            mux_set_focus(chr, (d->focus + 1) % d->mux_cnt);
+            break;
+        case 't':
+            d->timestamps = !d->timestamps;
+            d->timestamps_start = -1;
+            d->linestart = 0;
+            break;
+        }
+    } else if (ch == term_escape_char) {
+        d->term_got_escape = 1;
+    } else {
+    send_char:
+        return 1;
+    }
+    return 0;
+}
+
+static void mux_chr_accept_input(Chardev *chr)
+{
+    MuxChardev *d = MUX_CHARDEV(chr);
+    int m = d->focus;
+    CharBackend *be = d->backends[m];
+
+    while (be && d->prod[m] != d->cons[m] &&
+           be->chr_can_read && be->chr_can_read(be->opaque)) {
+        be->chr_read(be->opaque,
+                     &d->buffer[m][d->cons[m]++ & MUX_BUFFER_MASK], 1);
+    }
+}
+
+static int mux_chr_can_read(void *opaque)
+{
+    MuxChardev *d = MUX_CHARDEV(opaque);
+    int m = d->focus;
+    CharBackend *be = d->backends[m];
+
+    if ((d->prod[m] - d->cons[m]) < MUX_BUFFER_SIZE) {
+        return 1;
+    }
+
+    if (be && be->chr_can_read) {
+        return be->chr_can_read(be->opaque);
+    }
+
+    return 0;
+}
+
+static void mux_chr_read(void *opaque, const uint8_t *buf, int size)
+{
+    Chardev *chr = CHARDEV(opaque);
+    MuxChardev *d = MUX_CHARDEV(opaque);
+    int m = d->focus;
+    CharBackend *be = d->backends[m];
+    int i;
+
+    mux_chr_accept_input(opaque);
+
+    for (i = 0; i < size; i++)
+        if (mux_proc_byte(chr, d, buf[i])) {
+            if (d->prod[m] == d->cons[m] &&
+                be && be->chr_can_read &&
+                be->chr_can_read(be->opaque)) {
+                be->chr_read(be->opaque, &buf[i], 1);
+            } else {
+                d->buffer[m][d->prod[m]++ & MUX_BUFFER_MASK] = buf[i];
+            }
+        }
+}
+
+bool muxes_realized;
+
+static void mux_chr_event(void *opaque, int event)
+{
+    MuxChardev *d = MUX_CHARDEV(opaque);
+    int i;
+
+    if (!muxes_realized) {
+        return;
+    }
+
+    /* Send the event to all registered listeners */
+    for (i = 0; i < d->mux_cnt; i++) {
+        mux_chr_send_event(d, i, event);
+    }
+}
+
+static GSource *mux_chr_add_watch(Chardev *s, GIOCondition cond)
+{
+    MuxChardev *d = MUX_CHARDEV(s);
+    Chardev *chr = qemu_chr_fe_get_driver(&d->chr);
+    ChardevClass *cc = CHARDEV_GET_CLASS(chr);
+
+    if (!cc->chr_add_watch) {
+        return NULL;
+    }
+
+    return cc->chr_add_watch(chr, cond);
+}
+
+static void char_mux_finalize(Object *obj)
+{
+    MuxChardev *d = MUX_CHARDEV(obj);
+    int i;
+
+    for (i = 0; i < d->mux_cnt; i++) {
+        CharBackend *be = d->backends[i];
+        if (be) {
+            be->chr = NULL;
+        }
+    }
+    qemu_chr_fe_deinit(&d->chr);
+}
+
+void mux_chr_set_handlers(Chardev *chr, GMainContext *context)
+{
+    MuxChardev *d = MUX_CHARDEV(chr);
+
+    /* Fix up the real driver with mux routines */
+    qemu_chr_fe_set_handlers(&d->chr,
+                             mux_chr_can_read,
+                             mux_chr_read,
+                             mux_chr_event,
+                             chr,
+                             context, true);
+}
+
+void mux_set_focus(Chardev *chr, int focus)
+{
+    MuxChardev *d = MUX_CHARDEV(chr);
+
+    assert(focus >= 0);
+    assert(focus < d->mux_cnt);
+
+    if (d->focus != -1) {
+        mux_chr_send_event(d, d->focus, CHR_EVENT_MUX_OUT);
+    }
+
+    d->focus = focus;
+    mux_chr_send_event(d, d->focus, CHR_EVENT_MUX_IN);
+}
+
+static void qemu_chr_open_mux(Chardev *chr,
+                              ChardevBackend *backend,
+                              bool *be_opened,
+                              Error **errp)
+{
+    ChardevMux *mux = backend->u.mux.data;
+    Chardev *drv;
+    MuxChardev *d = MUX_CHARDEV(chr);
+
+    drv = qemu_chr_find(mux->chardev);
+    if (drv == NULL) {
+        error_setg(errp, "mux: base chardev %s not found", mux->chardev);
+        return;
+    }
+
+    d->focus = -1;
+    /* only default to opened state if we've realized the initial
+     * set of muxes
+     */
+    *be_opened = muxes_realized;
+    qemu_chr_fe_init(&d->chr, drv, errp);
+}
+
+static void qemu_chr_parse_mux(QemuOpts *opts, ChardevBackend *backend,
+                               Error **errp)
+{
+    const char *chardev = qemu_opt_get(opts, "chardev");
+    ChardevMux *mux;
+
+    if (chardev == NULL) {
+        error_setg(errp, "chardev: mux: no chardev given");
+        return;
+    }
+    backend->type = CHARDEV_BACKEND_KIND_MUX;
+    mux = backend->u.mux.data = g_new0(ChardevMux, 1);
+    qemu_chr_parse_common(opts, qapi_ChardevMux_base(mux));
+    mux->chardev = g_strdup(chardev);
+}
+
+static void char_mux_class_init(ObjectClass *oc, void *data)
+{
+    ChardevClass *cc = CHARDEV_CLASS(oc);
+
+    cc->parse = qemu_chr_parse_mux;
+    cc->open = qemu_chr_open_mux;
+    cc->chr_write = mux_chr_write;
+    cc->chr_accept_input = mux_chr_accept_input;
+    cc->chr_add_watch = mux_chr_add_watch;
+}
+
+static const TypeInfo char_mux_type_info = {
+    .name = TYPE_CHARDEV_MUX,
+    .parent = TYPE_CHARDEV,
+    .class_init = char_mux_class_init,
+    .instance_size = sizeof(MuxChardev),
+    .instance_finalize = char_mux_finalize,
+};
+
+static void register_types(void)
+{
+    type_register_static(&char_mux_type_info);
+}
+
+type_init(register_types);
--- a/chardev/char-mux.h
+++ b/chardev/char-mux.h
@@ -0,0 +1,63 @@
+/*
+ * QEMU System Emulator
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef CHAR_MUX_H
+#define CHAR_MUX_H
+
+#include "sysemu/char.h"
+
+extern bool muxes_realized;
+
+#define MAX_MUX 4
+#define MUX_BUFFER_SIZE 32 /* Must be a power of 2.  */
+#define MUX_BUFFER_MASK (MUX_BUFFER_SIZE - 1)
+typedef struct MuxChardev {
+    Chardev parent;
+    CharBackend *backends[MAX_MUX];
+    CharBackend chr;
+    int focus;
+    int mux_cnt;
+    int term_got_escape;
+    int max_size;
+    /* Intermediate input buffer catches escape sequences even if the
+       currently active device is not accepting any input - but only until it
+       is full as well. */
+    unsigned char buffer[MAX_MUX][MUX_BUFFER_SIZE];
+    int prod[MAX_MUX];
+    int cons[MAX_MUX];
+    int timestamps;
+
+    /* Protected by the Chardev chr_write_lock.  */
+    int linestart;
+    int64_t timestamps_start;
+} MuxChardev;
+
+#define MUX_CHARDEV(obj) OBJECT_CHECK(MuxChardev, (obj), TYPE_CHARDEV_MUX)
+#define CHARDEV_IS_MUX(chr)                             \
+    object_dynamic_cast(OBJECT(chr), TYPE_CHARDEV_MUX)
+
+void mux_chr_set_handlers(Chardev *chr, GMainContext *context);
+void mux_set_focus(Chardev *chr, int focus);
+void mux_chr_send_event(MuxChardev *d, int mux_nr, int event);
+
+#endif /* CHAR_MUX_H */
--- a/chardev/char-null.c
+++ b/chardev/char-null.c
@@ -0,0 +1,54 @@
+/*
+ * QEMU System Emulator
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "qemu/osdep.h"
+#include "sysemu/char.h"
+
+static void null_chr_open(Chardev *chr,
+                          ChardevBackend *backend,
+                          bool *be_opened,
+                          Error **errp)
+{
+    *be_opened = false;
+}
+
+static void char_null_class_init(ObjectClass *oc, void *data)
+{
+    ChardevClass *cc = CHARDEV_CLASS(oc);
+
+    cc->open = null_chr_open;
+}
+
+static const TypeInfo char_null_type_info = {
+    .name = TYPE_CHARDEV_NULL,
+    .parent = TYPE_CHARDEV,
+    .instance_size = sizeof(Chardev),
+    .class_init = char_null_class_init,
+};
+
+static void register_types(void)
+{
+    type_register_static(&char_null_type_info);
+}
+
+type_init(register_types);
--- a/chardev/char-parallel.c
+++ b/chardev/char-parallel.c
@@ -0,0 +1,316 @@
+/*
+ * QEMU System Emulator
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "qemu/osdep.h"
+#include "sysemu/char.h"
+#include "qapi/error.h"
+#include <sys/ioctl.h>
+
+#ifdef CONFIG_BSD
+#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
+#include <dev/ppbus/ppi.h>
+#include <dev/ppbus/ppbconf.h>
+#elif defined(__DragonFly__)
+#include <dev/misc/ppi/ppi.h>
+#include <bus/ppbus/ppbconf.h>
+#endif
+#else
+#ifdef __linux__
+#include <linux/ppdev.h>
+#include <linux/parport.h>
+#endif
+#endif
+
+#include "char-fd.h"
+#include "char-parallel.h"
+
+#if defined(__linux__)
+
+typedef struct {
+    Chardev parent;
+    int fd;
+    int mode;
+} ParallelChardev;
+
+#define PARALLEL_CHARDEV(obj) \
+    OBJECT_CHECK(ParallelChardev, (obj), TYPE_CHARDEV_PARALLEL)
+
+static int pp_hw_mode(ParallelChardev *s, uint16_t mode)
+{
+    if (s->mode != mode) {
+        int m = mode;
+        if (ioctl(s->fd, PPSETMODE, &m) < 0) {
+            return 0;
+        }
+        s->mode = mode;
+    }
+    return 1;
+}
+
+static int pp_ioctl(Chardev *chr, int cmd, void *arg)
+{
+    ParallelChardev *drv = PARALLEL_CHARDEV(chr);
+    int fd = drv->fd;
+    uint8_t b;
+
+    switch (cmd) {
+    case CHR_IOCTL_PP_READ_DATA:
+        if (ioctl(fd, PPRDATA, &b) < 0) {
+            return -ENOTSUP;
+        }
+        *(uint8_t *)arg = b;
+        break;
+    case CHR_IOCTL_PP_WRITE_DATA:
+        b = *(uint8_t *)arg;
+        if (ioctl(fd, PPWDATA, &b) < 0) {
+            return -ENOTSUP;
+        }
+        break;
+    case CHR_IOCTL_PP_READ_CONTROL:
+        if (ioctl(fd, PPRCONTROL, &b) < 0) {
+            return -ENOTSUP;
+        }
+        /* Linux gives only the lowest bits, and no way to know data
+           direction! For better compatibility set the fixed upper
+           bits. */
+        *(uint8_t *)arg = b | 0xc0;
+        break;
+    case CHR_IOCTL_PP_WRITE_CONTROL:
+        b = *(uint8_t *)arg;
+        if (ioctl(fd, PPWCONTROL, &b) < 0) {
+            return -ENOTSUP;
+        }
+        break;
+    case CHR_IOCTL_PP_READ_STATUS:
+        if (ioctl(fd, PPRSTATUS, &b) < 0) {
+            return -ENOTSUP;
+        }
+        *(uint8_t *)arg = b;
+        break;
+    case CHR_IOCTL_PP_DATA_DIR:
+        if (ioctl(fd, PPDATADIR, (int *)arg) < 0) {
+            return -ENOTSUP;
+        }
+        break;
+    case CHR_IOCTL_PP_EPP_READ_ADDR:
+        if (pp_hw_mode(drv, IEEE1284_MODE_EPP | IEEE1284_ADDR)) {
+            struct ParallelIOArg *parg = arg;
+            int n = read(fd, parg->buffer, parg->count);
+            if (n != parg->count) {
+                return -EIO;
+            }
+        }
+        break;
+    case CHR_IOCTL_PP_EPP_READ:
+        if (pp_hw_mode(drv, IEEE1284_MODE_EPP)) {
+            struct ParallelIOArg *parg = arg;
+            int n = read(fd, parg->buffer, parg->count);
+            if (n != parg->count) {
+                return -EIO;
+            }
+        }
+        break;
+    case CHR_IOCTL_PP_EPP_WRITE_ADDR:
+        if (pp_hw_mode(drv, IEEE1284_MODE_EPP | IEEE1284_ADDR)) {
+            struct ParallelIOArg *parg = arg;
+            int n = write(fd, parg->buffer, parg->count);
+            if (n != parg->count) {
+                return -EIO;
+            }
+        }
+        break;
+    case CHR_IOCTL_PP_EPP_WRITE:
+        if (pp_hw_mode(drv, IEEE1284_MODE_EPP)) {
+            struct ParallelIOArg *parg = arg;
+            int n = write(fd, parg->buffer, parg->count);
+            if (n != parg->count) {
+                return -EIO;
+            }
+        }
+        break;
+    default:
+        return -ENOTSUP;
+    }
+    return 0;
+}
+
+static void qemu_chr_open_pp_fd(Chardev *chr,
+                                int fd,
+                                bool *be_opened,
+                                Error **errp)
+{
+    ParallelChardev *drv = PARALLEL_CHARDEV(chr);
+
+    if (ioctl(fd, PPCLAIM) < 0) {
+        error_setg_errno(errp, errno, "not a parallel port");
+        close(fd);
+        return;
+    }
+
+    drv->fd = fd;
+    drv->mode = IEEE1284_MODE_COMPAT;
+}
+#endif /* __linux__ */
+
+#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__DragonFly__)
+
+typedef struct {
+    Chardev parent;
+    int fd;
+} ParallelChardev;
+
+#define PARALLEL_CHARDEV(obj)                                   \
+    OBJECT_CHECK(ParallelChardev, (obj), TYPE_CHARDEV_PARALLEL)
+
+static int pp_ioctl(Chardev *chr, int cmd, void *arg)
+{
+    ParallelChardev *drv = PARALLEL_CHARDEV(chr);
+    uint8_t b;
+
+    switch (cmd) {
+    case CHR_IOCTL_PP_READ_DATA:
+        if (ioctl(drv->fd, PPIGDATA, &b) < 0) {
+            return -ENOTSUP;
+        }
+        *(uint8_t *)arg = b;
+        break;
+    case CHR_IOCTL_PP_WRITE_DATA:
+        b = *(uint8_t *)arg;
+        if (ioctl(drv->fd, PPISDATA, &b) < 0) {
+            return -ENOTSUP;
+        }
+        break;
+    case CHR_IOCTL_PP_READ_CONTROL:
+        if (ioctl(drv->fd, PPIGCTRL, &b) < 0) {
+            return -ENOTSUP;
+        }
+        *(uint8_t *)arg = b;
+        break;
+    case CHR_IOCTL_PP_WRITE_CONTROL:
+        b = *(uint8_t *)arg;
+        if (ioctl(drv->fd, PPISCTRL, &b) < 0) {
+            return -ENOTSUP;
+        }
+        break;
+    case CHR_IOCTL_PP_READ_STATUS:
+        if (ioctl(drv->fd, PPIGSTATUS, &b) < 0) {
+            return -ENOTSUP;
+        }
+        *(uint8_t *)arg = b;
+        break;
+    default:
+        return -ENOTSUP;
+    }
+    return 0;
+}
+
+static void qemu_chr_open_pp_fd(Chardev *chr,
+                                int fd,
+                                bool *be_opened,
+                                Error **errp)
+{
+    ParallelChardev *drv = PARALLEL_CHARDEV(chr);
+    drv->fd = fd;
+    *be_opened = false;
+}
+#endif
+
+#ifdef HAVE_CHARDEV_PARPORT
+static void qmp_chardev_open_parallel(Chardev *chr,
+                                      ChardevBackend *backend,
+                                      bool *be_opened,
+                                      Error **errp)
+{
+    ChardevHostdev *parallel = backend->u.parallel.data;
+    int fd;
+
+    fd = qmp_chardev_open_file_source(parallel->device, O_RDWR, errp);
+    if (fd < 0) {
+        return;
+    }
+    qemu_chr_open_pp_fd(chr, fd, be_opened, errp);
+}
+
+static void qemu_chr_parse_parallel(QemuOpts *opts, ChardevBackend *backend,
+                                    Error **errp)
+{
+    const char *device = qemu_opt_get(opts, "path");
+    ChardevHostdev *parallel;
+
+    if (device == NULL) {
+        error_setg(errp, "chardev: parallel: no device path given");
+        return;
+    }
+    backend->type = CHARDEV_BACKEND_KIND_PARALLEL;
+    parallel = backend->u.parallel.data = g_new0(ChardevHostdev, 1);
+    qemu_chr_parse_common(opts, qapi_ChardevHostdev_base(parallel));
+    parallel->device = g_strdup(device);
+}
+
+static void char_parallel_class_init(ObjectClass *oc, void *data)
+{
+    ChardevClass *cc = CHARDEV_CLASS(oc);
+
+    cc->parse = qemu_chr_parse_parallel;
+    cc->open = qmp_chardev_open_parallel;
+#if defined(__linux__)
+    cc->chr_ioctl = pp_ioctl;
+#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || \
+    defined(__DragonFly__)
+    cc->chr_ioctl = pp_ioctl;
+#endif
+}
+
+static void char_parallel_finalize(Object *obj)
+{
+#if defined(__linux__)
+    Chardev *chr = CHARDEV(obj);
+    ParallelChardev *drv = PARALLEL_CHARDEV(chr);
+    int fd = drv->fd;
+
+    pp_hw_mode(drv, IEEE1284_MODE_COMPAT);
+    ioctl(fd, PPRELEASE);
+    close(fd);
+    qemu_chr_be_event(chr, CHR_EVENT_CLOSED);
+#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || \
+    defined(__DragonFly__)
+    /* FIXME: close fd? */
+#endif
+}
+
+static const TypeInfo char_parallel_type_info = {
+    .name = TYPE_CHARDEV_PARALLEL,
+    .parent = TYPE_CHARDEV,
+    .instance_size = sizeof(ParallelChardev),
+    .instance_finalize = char_parallel_finalize,
+    .class_init = char_parallel_class_init,
+};
+
+static void register_types(void)
+{
+    type_register_static(&char_parallel_type_info);
+}
+
+type_init(register_types);
+
+#endif
--- a/chardev/char-parallel.h
+++ b/chardev/char-parallel.h
@@ -0,0 +1,32 @@
+/*
+ * QEMU System Emulator
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef CHAR_PARALLEL_H
+#define CHAR_PARALLEL_H
+
+#if defined(__linux__) || defined(__FreeBSD__) || \
+    defined(__FreeBSD_kernel__) || defined(__DragonFly__)
+#define HAVE_CHARDEV_PARPORT 1
+#endif
+
+#endif /* CHAR_PARALLEL_H */
--- a/chardev/char-pipe.c
+++ b/chardev/char-pipe.c
@@ -0,0 +1,191 @@
+/*
+ * QEMU System Emulator
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "sysemu/char.h"
+
+#ifdef _WIN32
+#include "char-win.h"
+#else
+#include "char-fd.h"
+#endif
+
+#ifdef _WIN32
+#define MAXCONNECT 1
+#define NTIMEOUT 5000
+
+static int win_chr_pipe_init(Chardev *chr, const char *filename,
+                             Error **errp)
+{
+    WinChardev *s = WIN_CHARDEV(chr);
+    OVERLAPPED ov;
+    int ret;
+    DWORD size;
+    char *openname;
+
+    s->fpipe = TRUE;
+
+    s->hsend = CreateEvent(NULL, TRUE, FALSE, NULL);
+    if (!s->hsend) {
+        error_setg(errp, "Failed CreateEvent");
+        goto fail;
+    }
+    s->hrecv = CreateEvent(NULL, TRUE, FALSE, NULL);
+    if (!s->hrecv) {
+        error_setg(errp, "Failed CreateEvent");
+        goto fail;
+    }
+
+    openname = g_strdup_printf("\\\\.\\pipe\\%s", filename);
+    s->hcom = CreateNamedPipe(openname,
+                              PIPE_ACCESS_DUPLEX | FILE_FLAG_OVERLAPPED,
+                              PIPE_TYPE_BYTE | PIPE_READMODE_BYTE |
+                              PIPE_WAIT,
+                              MAXCONNECT, NSENDBUF, NRECVBUF, NTIMEOUT, NULL);
+    g_free(openname);
+    if (s->hcom == INVALID_HANDLE_VALUE) {
+        error_setg(errp, "Failed CreateNamedPipe (%lu)", GetLastError());
+        s->hcom = NULL;
+        goto fail;
+    }
+
+    ZeroMemory(&ov, sizeof(ov));
+    ov.hEvent = CreateEvent(NULL, TRUE, FALSE, NULL);
+    ret = ConnectNamedPipe(s->hcom, &ov);
+    if (ret) {
+        error_setg(errp, "Failed ConnectNamedPipe");
+        goto fail;
+    }
+
+    ret = GetOverlappedResult(s->hcom, &ov, &size, TRUE);
+    if (!ret) {
+        error_setg(errp, "Failed GetOverlappedResult");
+        if (ov.hEvent) {
+            CloseHandle(ov.hEvent);
+            ov.hEvent = NULL;
+        }
+        goto fail;
+    }
+
+    if (ov.hEvent) {
+        CloseHandle(ov.hEvent);
+        ov.hEvent = NULL;
+    }
+    qemu_add_polling_cb(win_chr_pipe_poll, chr);
+    return 0;
+
+ fail:
+    return -1;
+}
+
+static void qemu_chr_open_pipe(Chardev *chr,
+                               ChardevBackend *backend,
+                               bool *be_opened,
+                               Error **errp)
+{
+    ChardevHostdev *opts = backend->u.pipe.data;
+    const char *filename = opts->device;
+
+    if (win_chr_pipe_init(chr, filename, errp) < 0) {
+        return;
+    }
+}
+
+#else
+
+static void qemu_chr_open_pipe(Chardev *chr,
+                               ChardevBackend *backend,
+                               bool *be_opened,
+                               Error **errp)
+{
+    ChardevHostdev *opts = backend->u.pipe.data;
+    int fd_in, fd_out;
+    char *filename_in;
+    char *filename_out;
+    const char *filename = opts->device;
+
+    filename_in = g_strdup_printf("%s.in", filename);
+    filename_out = g_strdup_printf("%s.out", filename);
+    TFR(fd_in = qemu_open(filename_in, O_RDWR | O_BINARY));
+    TFR(fd_out = qemu_open(filename_out, O_RDWR | O_BINARY));
+    g_free(filename_in);
+    g_free(filename_out);
+    if (fd_in < 0 || fd_out < 0) {
+        if (fd_in >= 0) {
+            close(fd_in);
+        }
+        if (fd_out >= 0) {
+            close(fd_out);
+        }
+        TFR(fd_in = fd_out = qemu_open(filename, O_RDWR | O_BINARY));
+        if (fd_in < 0) {
+            error_setg_file_open(errp, errno, filename);
+            return;
+        }
+    }
+    qemu_chr_open_fd(chr, fd_in, fd_out);
+}
+
+#endif /* !_WIN32 */
+
+static void qemu_chr_parse_pipe(QemuOpts *opts, ChardevBackend *backend,
+                                Error **errp)
+{
+    const char *device = qemu_opt_get(opts, "path");
+    ChardevHostdev *dev;
+
+    if (device == NULL) {
+        error_setg(errp, "chardev: pipe: no device path given");
+        return;
+    }
+    backend->type = CHARDEV_BACKEND_KIND_PIPE;
+    dev = backend->u.pipe.data = g_new0(ChardevHostdev, 1);
+    qemu_chr_parse_common(opts, qapi_ChardevHostdev_base(dev));
+    dev->device = g_strdup(device);
+}
+
+static void char_pipe_class_init(ObjectClass *oc, void *data)
+{
+    ChardevClass *cc = CHARDEV_CLASS(oc);
+
+    cc->parse = qemu_chr_parse_pipe;
+    cc->open = qemu_chr_open_pipe;
+}
+
+static const TypeInfo char_pipe_type_info = {
+    .name = TYPE_CHARDEV_PIPE,
+#ifdef _WIN32
+    .parent = TYPE_CHARDEV_WIN,
+#else
+    .parent = TYPE_CHARDEV_FD,
+#endif
+    .class_init = char_pipe_class_init,
+};
+
+static void register_types(void)
+{
+    type_register_static(&char_pipe_type_info);
+}
+
+type_init(register_types);
--- a/chardev/char-pty.c
+++ b/chardev/char-pty.c
@@ -0,0 +1,300 @@
+/*
+ * QEMU System Emulator
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qemu-common.h"
+#include "sysemu/char.h"
+#include "io/channel-file.h"
+#include "qemu/sockets.h"
+#include "qemu/error-report.h"
+
+#include "char-io.h"
+
+#if defined(__linux__) || defined(__sun__) || defined(__FreeBSD__)      \
+    || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) \
+    || defined(__GLIBC__)
+
+typedef struct {
+    Chardev parent;
+    QIOChannel *ioc;
+    int read_bytes;
+
+    /* Protected by the Chardev chr_write_lock.  */
+    int connected;
+    guint timer_tag;
+    guint open_tag;
+} PtyChardev;
+
+#define PTY_CHARDEV(obj) OBJECT_CHECK(PtyChardev, (obj), TYPE_CHARDEV_PTY)
+
+static void pty_chr_update_read_handler_locked(Chardev *chr);
+static void pty_chr_state(Chardev *chr, int connected);
+
+static gboolean pty_chr_timer(gpointer opaque)
+{
+    struct Chardev *chr = CHARDEV(opaque);
+    PtyChardev *s = PTY_CHARDEV(opaque);
+
+    qemu_mutex_lock(&chr->chr_write_lock);
+    s->timer_tag = 0;
+    s->open_tag = 0;
+    if (!s->connected) {
+        /* Next poll ... */
+        pty_chr_update_read_handler_locked(chr);
+    }
+    qemu_mutex_unlock(&chr->chr_write_lock);
+    return FALSE;
+}
+
+/* Called with chr_write_lock held.  */
+static void pty_chr_rearm_timer(Chardev *chr, int ms)
+{
+    PtyChardev *s = PTY_CHARDEV(chr);
+    char *name;
+
+    if (s->timer_tag) {
+        g_source_remove(s->timer_tag);
+        s->timer_tag = 0;
+    }
+
+    if (ms == 1000) {
+        name = g_strdup_printf("pty-timer-secs-%s", chr->label);
+        s->timer_tag = g_timeout_add_seconds(1, pty_chr_timer, chr);
+    } else {
+        name = g_strdup_printf("pty-timer-ms-%s", chr->label);
+        s->timer_tag = g_timeout_add(ms, pty_chr_timer, chr);
+    }
+    g_source_set_name_by_id(s->timer_tag, name);
+    g_free(name);
+}
+
+/* Called with chr_write_lock held.  */
+static void pty_chr_update_read_handler_locked(Chardev *chr)
+{
+    PtyChardev *s = PTY_CHARDEV(chr);
+    GPollFD pfd;
+    int rc;
+    QIOChannelFile *fioc = QIO_CHANNEL_FILE(s->ioc);
+
+    pfd.fd = fioc->fd;
+    pfd.events = G_IO_OUT;
+    pfd.revents = 0;
+    do {
+        rc = g_poll(&pfd, 1, 0);
+    } while (rc == -1 && errno == EINTR);
+    assert(rc >= 0);
+
+    if (pfd.revents & G_IO_HUP) {
+        pty_chr_state(chr, 0);
+    } else {
+        pty_chr_state(chr, 1);
+    }
+}
+
+static void pty_chr_update_read_handler(Chardev *chr,
+                                        GMainContext *context)
+{
+    qemu_mutex_lock(&chr->chr_write_lock);
+    pty_chr_update_read_handler_locked(chr);
+    qemu_mutex_unlock(&chr->chr_write_lock);
+}
+
+/* Called with chr_write_lock held.  */
+static int char_pty_chr_write(Chardev *chr, const uint8_t *buf, int len)
+{
+    PtyChardev *s = PTY_CHARDEV(chr);
+
+    if (!s->connected) {
+        /* guest sends data, check for (re-)connect */
+        pty_chr_update_read_handler_locked(chr);
+        if (!s->connected) {
+            return 0;
+        }
+    }
+    return io_channel_send(s->ioc, buf, len);
+}
+
+static GSource *pty_chr_add_watch(Chardev *chr, GIOCondition cond)
+{
+    PtyChardev *s = PTY_CHARDEV(chr);
+    if (!s->connected) {
+        return NULL;
+    }
+    return qio_channel_create_watch(s->ioc, cond);
+}
+
+static int pty_chr_read_poll(void *opaque)
+{
+    Chardev *chr = CHARDEV(opaque);
+    PtyChardev *s = PTY_CHARDEV(opaque);
+
+    s->read_bytes = qemu_chr_be_can_write(chr);
+    return s->read_bytes;
+}
+
+static gboolean pty_chr_read(QIOChannel *chan, GIOCondition cond, void *opaque)
+{
+    Chardev *chr = CHARDEV(opaque);
+    PtyChardev *s = PTY_CHARDEV(opaque);
+    gsize len;
+    uint8_t buf[CHR_READ_BUF_LEN];
+    ssize_t ret;
+
+    len = sizeof(buf);
+    if (len > s->read_bytes) {
+        len = s->read_bytes;
+    }
+    if (len == 0) {
+        return TRUE;
+    }
+    ret = qio_channel_read(s->ioc, (char *)buf, len, NULL);
+    if (ret <= 0) {
+        pty_chr_state(chr, 0);
+        return FALSE;
+    } else {
+        pty_chr_state(chr, 1);
+        qemu_chr_be_write(chr, buf, ret);
+    }
+    return TRUE;
+}
+
+static gboolean qemu_chr_be_generic_open_func(gpointer opaque)
+{
+    Chardev *chr = CHARDEV(opaque);
+    PtyChardev *s = PTY_CHARDEV(opaque);
+
+    s->open_tag = 0;
+    qemu_chr_be_generic_open(chr);
+    return FALSE;
+}
+
+/* Called with chr_write_lock held.  */
+static void pty_chr_state(Chardev *chr, int connected)
+{
+    PtyChardev *s = PTY_CHARDEV(chr);
+
+    if (!connected) {
+        if (s->open_tag) {
+            g_source_remove(s->open_tag);
+            s->open_tag = 0;
+        }
+        remove_fd_in_watch(chr);
+        s->connected = 0;
+        /* (re-)connect poll interval for idle guests: once per second.
+         * We check more frequently in case the guests sends data to
+         * the virtual device linked to our pty. */
+        pty_chr_rearm_timer(chr, 1000);
+    } else {
+        if (s->timer_tag) {
+            g_source_remove(s->timer_tag);
+            s->timer_tag = 0;
+        }
+        if (!s->connected) {
+            g_assert(s->open_tag == 0);
+            s->connected = 1;
+            s->open_tag = g_idle_add(qemu_chr_be_generic_open_func, chr);
+        }
+        if (!chr->fd_in_tag) {
+            chr->fd_in_tag = io_add_watch_poll(chr, s->ioc,
+                                               pty_chr_read_poll,
+                                               pty_chr_read,
+                                               chr, NULL);
+        }
+    }
+}
+
+static void char_pty_finalize(Object *obj)
+{
+    Chardev *chr = CHARDEV(obj);
+    PtyChardev *s = PTY_CHARDEV(obj);
+
+    qemu_mutex_lock(&chr->chr_write_lock);
+    pty_chr_state(chr, 0);
+    object_unref(OBJECT(s->ioc));
+    if (s->timer_tag) {
+        g_source_remove(s->timer_tag);
+        s->timer_tag = 0;
+    }
+    qemu_mutex_unlock(&chr->chr_write_lock);
+    qemu_chr_be_event(chr, CHR_EVENT_CLOSED);
+}
+
+static void char_pty_open(Chardev *chr,
+                          ChardevBackend *backend,
+                          bool *be_opened,
+                          Error **errp)
+{
+    PtyChardev *s;
+    int master_fd, slave_fd;
+    char pty_name[PATH_MAX];
+    char *name;
+
+    master_fd = qemu_openpty_raw(&slave_fd, pty_name);
+    if (master_fd < 0) {
+        error_setg_errno(errp, errno, "Failed to create PTY");
+        return;
+    }
+
+    close(slave_fd);
+    qemu_set_nonblock(master_fd);
+
+    chr->filename = g_strdup_printf("pty:%s", pty_name);
+    error_report("char device redirected to %s (label %s)",
+                 pty_name, chr->label);
+
+    s = PTY_CHARDEV(chr);
+    s->ioc = QIO_CHANNEL(qio_channel_file_new_fd(master_fd));
+    name = g_strdup_printf("chardev-pty-%s", chr->label);
+    qio_channel_set_name(QIO_CHANNEL(s->ioc), name);
+    g_free(name);
+    s->timer_tag = 0;
+    *be_opened = false;
+}
+
+static void char_pty_class_init(ObjectClass *oc, void *data)
+{
+    ChardevClass *cc = CHARDEV_CLASS(oc);
+
+    cc->open = char_pty_open;
+    cc->chr_write = char_pty_chr_write;
+    cc->chr_update_read_handler = pty_chr_update_read_handler;
+    cc->chr_add_watch = pty_chr_add_watch;
+}
+
+static const TypeInfo char_pty_type_info = {
+    .name = TYPE_CHARDEV_PTY,
+    .parent = TYPE_CHARDEV,
+    .instance_size = sizeof(PtyChardev),
+    .instance_finalize = char_pty_finalize,
+    .class_init = char_pty_class_init,
+};
+
+static void register_types(void)
+{
+    type_register_static(&char_pty_type_info);
+}
+
+type_init(register_types);
+
+#endif
--- a/chardev/char-ringbuf.c
+++ b/chardev/char-ringbuf.c
@@ -0,0 +1,249 @@
+/*
+ * QEMU System Emulator
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "qemu/osdep.h"
+#include "sysemu/char.h"
+#include "qmp-commands.h"
+#include "qemu/base64.h"
+
+/* Ring buffer chardev */
+
+typedef struct {
+    Chardev parent;
+    size_t size;
+    size_t prod;
+    size_t cons;
+    uint8_t *cbuf;
+} RingBufChardev;
+
+#define RINGBUF_CHARDEV(obj)                                    \
+    OBJECT_CHECK(RingBufChardev, (obj), TYPE_CHARDEV_RINGBUF)
+
+static size_t ringbuf_count(const Chardev *chr)
+{
+    const RingBufChardev *d = RINGBUF_CHARDEV(chr);
+
+    return d->prod - d->cons;
+}
+
+static int ringbuf_chr_write(Chardev *chr, const uint8_t *buf, int len)
+{
+    RingBufChardev *d = RINGBUF_CHARDEV(chr);
+    int i;
+
+    if (!buf || (len < 0)) {
+        return -1;
+    }
+
+    for (i = 0; i < len; i++) {
+        d->cbuf[d->prod++ & (d->size - 1)] = buf[i];
+        if (d->prod - d->cons > d->size) {
+            d->cons = d->prod - d->size;
+        }
+    }
+
+    return len;
+}
+
+static int ringbuf_chr_read(Chardev *chr, uint8_t *buf, int len)
+{
+    RingBufChardev *d = RINGBUF_CHARDEV(chr);
+    int i;
+
+    qemu_mutex_lock(&chr->chr_write_lock);
+    for (i = 0; i < len && d->cons != d->prod; i++) {
+        buf[i] = d->cbuf[d->cons++ & (d->size - 1)];
+    }
+    qemu_mutex_unlock(&chr->chr_write_lock);
+
+    return i;
+}
+
+static void char_ringbuf_finalize(Object *obj)
+{
+    RingBufChardev *d = RINGBUF_CHARDEV(obj);
+
+    g_free(d->cbuf);
+}
+
+static void qemu_chr_open_ringbuf(Chardev *chr,
+                                  ChardevBackend *backend,
+                                  bool *be_opened,
+                                  Error **errp)
+{
+    ChardevRingbuf *opts = backend->u.ringbuf.data;
+    RingBufChardev *d = RINGBUF_CHARDEV(chr);
+
+    d->size = opts->has_size ? opts->size : 65536;
+
+    /* The size must be power of 2 */
+    if (d->size & (d->size - 1)) {
+        error_setg(errp, "size of ringbuf chardev must be power of two");
+        return;
+    }
+
+    d->prod = 0;
+    d->cons = 0;
+    d->cbuf = g_malloc0(d->size);
+}
+
+void qmp_ringbuf_write(const char *device, const char *data,
+                       bool has_format, enum DataFormat format,
+                       Error **errp)
+{
+    Chardev *chr;
+    const uint8_t *write_data;
+    int ret;
+    gsize write_count;
+
+    chr = qemu_chr_find(device);
+    if (!chr) {
+        error_setg(errp, "Device '%s' not found", device);
+        return;
+    }
+
+    if (!CHARDEV_IS_RINGBUF(chr)) {
+        error_setg(errp, "%s is not a ringbuf device", device);
+        return;
+    }
+
+    if (has_format && (format == DATA_FORMAT_BASE64)) {
+        write_data = qbase64_decode(data, -1,
+                                    &write_count,
+                                    errp);
+        if (!write_data) {
+            return;
+        }
+    } else {
+        write_data = (uint8_t *)data;
+        write_count = strlen(data);
+    }
+
+    ret = ringbuf_chr_write(chr, write_data, write_count);
+
+    if (write_data != (uint8_t *)data) {
+        g_free((void *)write_data);
+    }
+
+    if (ret < 0) {
+        error_setg(errp, "Failed to write to device %s", device);
+        return;
+    }
+}
+
+char *qmp_ringbuf_read(const char *device, int64_t size,
+                       bool has_format, enum DataFormat format,
+                       Error **errp)
+{
+    Chardev *chr;
+    uint8_t *read_data;
+    size_t count;
+    char *data;
+
+    chr = qemu_chr_find(device);
+    if (!chr) {
+        error_setg(errp, "Device '%s' not found", device);
+        return NULL;
+    }
+
+    if (!CHARDEV_IS_RINGBUF(chr)) {
+        error_setg(errp, "%s is not a ringbuf device", device);
+        return NULL;
+    }
+
+    if (size <= 0) {
+        error_setg(errp, "size must be greater than zero");
+        return NULL;
+    }
+
+    count = ringbuf_count(chr);
+    size = size > count ? count : size;
+    read_data = g_malloc(size + 1);
+
+    ringbuf_chr_read(chr, read_data, size);
+
+    if (has_format && (format == DATA_FORMAT_BASE64)) {
+        data = g_base64_encode(read_data, size);
+        g_free(read_data);
+    } else {
+        /*
+         * FIXME should read only complete, valid UTF-8 characters up
+         * to @size bytes.  Invalid sequences should be replaced by a
+         * suitable replacement character.  Except when (and only
+         * when) ring buffer lost characters since last read, initial
+         * continuation characters should be dropped.
+         */
+        read_data[size] = 0;
+        data = (char *)read_data;
+    }
+
+    return data;
+}
+
+static void qemu_chr_parse_ringbuf(QemuOpts *opts, ChardevBackend *backend,
+                                   Error **errp)
+{
+    int val;
+    ChardevRingbuf *ringbuf;
+
+    backend->type = CHARDEV_BACKEND_KIND_RINGBUF;
+    ringbuf = backend->u.ringbuf.data = g_new0(ChardevRingbuf, 1);
+    qemu_chr_parse_common(opts, qapi_ChardevRingbuf_base(ringbuf));
+
+    val = qemu_opt_get_size(opts, "size", 0);
+    if (val != 0) {
+        ringbuf->has_size = true;
+        ringbuf->size = val;
+    }
+}
+
+static void char_ringbuf_class_init(ObjectClass *oc, void *data)
+{
+    ChardevClass *cc = CHARDEV_CLASS(oc);
+
+    cc->parse = qemu_chr_parse_ringbuf;
+    cc->open = qemu_chr_open_ringbuf;
+    cc->chr_write = ringbuf_chr_write;
+}
+
+static const TypeInfo char_ringbuf_type_info = {
+    .name = TYPE_CHARDEV_RINGBUF,
+    .parent = TYPE_CHARDEV,
+    .class_init = char_ringbuf_class_init,
+    .instance_size = sizeof(RingBufChardev),
+    .instance_finalize = char_ringbuf_finalize,
+};
+
+/* Bug-compatibility: */
+static const TypeInfo char_memory_type_info = {
+    .name = TYPE_CHARDEV_MEMORY,
+    .parent = TYPE_CHARDEV_RINGBUF,
+};
+
+static void register_types(void)
+{
+    type_register_static(&char_ringbuf_type_info);
+    type_register_static(&char_memory_type_info);
+}
+
+type_init(register_types);
--- a/chardev/char-serial.c
+++ b/chardev/char-serial.c
@@ -0,0 +1,318 @@
+/*
+ * QEMU System Emulator
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "qemu/osdep.h"
+#include "qemu/sockets.h"
+#include "io/channel-file.h"
+#include "qapi/error.h"
+
+#ifdef _WIN32
+#include "char-win.h"
+#else
+#include <sys/ioctl.h>
+#include <termios.h>
+#include "char-fd.h"
+#endif
+
+#include "char-serial.h"
+
+#ifdef _WIN32
+
+static void qmp_chardev_open_serial(Chardev *chr,
+                                    ChardevBackend *backend,
+                                    bool *be_opened,
+                                    Error **errp)
+{
+    ChardevHostdev *serial = backend->u.serial.data;
+
+    win_chr_init(chr, serial->device, errp);
+}
+
+#elif defined(__linux__) || defined(__sun__) || defined(__FreeBSD__)      \
+    || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) \
+    || defined(__GLIBC__)
+
+static void tty_serial_init(int fd, int speed,
+                            int parity, int data_bits, int stop_bits)
+{
+    struct termios tty;
+    speed_t spd;
+
+#if 0
+    printf("tty_serial_init: speed=%d parity=%c data=%d stop=%d\n",
+           speed, parity, data_bits, stop_bits);
+#endif
+    tcgetattr(fd, &tty);
+
+#define check_speed(val) if (speed <= val) { spd = B##val; break; }
+    speed = speed * 10 / 11;
+    do {
+        check_speed(50);
+        check_speed(75);
+        check_speed(110);
+        check_speed(134);
+        check_speed(150);
+        check_speed(200);
+        check_speed(300);
+        check_speed(600);
+        check_speed(1200);
+        check_speed(1800);
+        check_speed(2400);
+        check_speed(4800);
+        check_speed(9600);
+        check_speed(19200);
+        check_speed(38400);
+        /* Non-Posix values follow. They may be unsupported on some systems. */
+        check_speed(57600);
+        check_speed(115200);
+#ifdef B230400
+        check_speed(230400);
+#endif
+#ifdef B460800
+        check_speed(460800);
+#endif
+#ifdef B500000
+        check_speed(500000);
+#endif
+#ifdef B576000
+        check_speed(576000);
+#endif
+#ifdef B921600
+        check_speed(921600);
+#endif
+#ifdef B1000000
+        check_speed(1000000);
+#endif
+#ifdef B1152000
+        check_speed(1152000);
+#endif
+#ifdef B1500000
+        check_speed(1500000);
+#endif
+#ifdef B2000000
+        check_speed(2000000);
+#endif
+#ifdef B2500000
+        check_speed(2500000);
+#endif
+#ifdef B3000000
+        check_speed(3000000);
+#endif
+#ifdef B3500000
+        check_speed(3500000);
+#endif
+#ifdef B4000000
+        check_speed(4000000);
+#endif
+        spd = B115200;
+    } while (0);
+
+    cfsetispeed(&tty, spd);
+    cfsetospeed(&tty, spd);
+
+    tty.c_iflag &= ~(IGNBRK | BRKINT | PARMRK | ISTRIP
+                     | INLCR | IGNCR | ICRNL | IXON);
+    tty.c_oflag |= OPOST;
+    tty.c_lflag &= ~(ECHO | ECHONL | ICANON | IEXTEN | ISIG);
+    tty.c_cflag &= ~(CSIZE | PARENB | PARODD | CRTSCTS | CSTOPB);
+    switch (data_bits) {
+    default:
+    case 8:
+        tty.c_cflag |= CS8;
+        break;
+    case 7:
+        tty.c_cflag |= CS7;
+        break;
+    case 6:
+        tty.c_cflag |= CS6;
+        break;
+    case 5:
+        tty.c_cflag |= CS5;
+        break;
+    }
+    switch (parity) {
+    default:
+    case 'N':
+        break;
+    case 'E':
+        tty.c_cflag |= PARENB;
+        break;
+    case 'O':
+        tty.c_cflag |= PARENB | PARODD;
+        break;
+    }
+    if (stop_bits == 2) {
+        tty.c_cflag |= CSTOPB;
+    }
+
+    tcsetattr(fd, TCSANOW, &tty);
+}
+
+static int tty_serial_ioctl(Chardev *chr, int cmd, void *arg)
+{
+    FDChardev *s = FD_CHARDEV(chr);
+    QIOChannelFile *fioc = QIO_CHANNEL_FILE(s->ioc_in);
+
+    switch (cmd) {
+    case CHR_IOCTL_SERIAL_SET_PARAMS:
+        {
+            QEMUSerialSetParams *ssp = arg;
+            tty_serial_init(fioc->fd,
+                            ssp->speed, ssp->parity,
+                            ssp->data_bits, ssp->stop_bits);
+        }
+        break;
+    case CHR_IOCTL_SERIAL_SET_BREAK:
+        {
+            int enable = *(int *)arg;
+            if (enable) {
+                tcsendbreak(fioc->fd, 1);
+            }
+        }
+        break;
+    case CHR_IOCTL_SERIAL_GET_TIOCM:
+        {
+            int sarg = 0;
+            int *targ = (int *)arg;
+            ioctl(fioc->fd, TIOCMGET, &sarg);
+            *targ = 0;
+            if (sarg & TIOCM_CTS) {
+                *targ |= CHR_TIOCM_CTS;
+            }
+            if (sarg & TIOCM_CAR) {
+                *targ |= CHR_TIOCM_CAR;
+            }
+            if (sarg & TIOCM_DSR) {
+                *targ |= CHR_TIOCM_DSR;
+            }
+            if (sarg & TIOCM_RI) {
+                *targ |= CHR_TIOCM_RI;
+            }
+            if (sarg & TIOCM_DTR) {
+                *targ |= CHR_TIOCM_DTR;
+            }
+            if (sarg & TIOCM_RTS) {
+                *targ |= CHR_TIOCM_RTS;
+            }
+        }
+        break;
+    case CHR_IOCTL_SERIAL_SET_TIOCM:
+        {
+            int sarg = *(int *)arg;
+            int targ = 0;
+            ioctl(fioc->fd, TIOCMGET, &targ);
+            targ &= ~(CHR_TIOCM_CTS | CHR_TIOCM_CAR | CHR_TIOCM_DSR
+                     | CHR_TIOCM_RI | CHR_TIOCM_DTR | CHR_TIOCM_RTS);
+            if (sarg & CHR_TIOCM_CTS) {
+                targ |= TIOCM_CTS;
+            }
+            if (sarg & CHR_TIOCM_CAR) {
+                targ |= TIOCM_CAR;
+            }
+            if (sarg & CHR_TIOCM_DSR) {
+                targ |= TIOCM_DSR;
+            }
+            if (sarg & CHR_TIOCM_RI) {
+                targ |= TIOCM_RI;
+            }
+            if (sarg & CHR_TIOCM_DTR) {
+                targ |= TIOCM_DTR;
+            }
+            if (sarg & CHR_TIOCM_RTS) {
+                targ |= TIOCM_RTS;
+            }
+            ioctl(fioc->fd, TIOCMSET, &targ);
+        }
+        break;
+    default:
+        return -ENOTSUP;
+    }
+    return 0;
+}
+
+static void qmp_chardev_open_serial(Chardev *chr,
+                                    ChardevBackend *backend,
+                                    bool *be_opened,
+                                    Error **errp)
+{
+    ChardevHostdev *serial = backend->u.serial.data;
+    int fd;
+
+    fd = qmp_chardev_open_file_source(serial->device, O_RDWR, errp);
+    if (fd < 0) {
+        return;
+    }
+    qemu_set_nonblock(fd);
+    tty_serial_init(fd, 115200, 'N', 8, 1);
+
+    qemu_chr_open_fd(chr, fd, fd);
+}
+#endif /* __linux__ || __sun__ */
+
+#ifdef HAVE_CHARDEV_SERIAL
+static void qemu_chr_parse_serial(QemuOpts *opts, ChardevBackend *backend,
+                                  Error **errp)
+{
+    const char *device = qemu_opt_get(opts, "path");
+    ChardevHostdev *serial;
+
+    if (device == NULL) {
+        error_setg(errp, "chardev: serial/tty: no device path given");
+        return;
+    }
+    backend->type = CHARDEV_BACKEND_KIND_SERIAL;
+    serial = backend->u.serial.data = g_new0(ChardevHostdev, 1);
+    qemu_chr_parse_common(opts, qapi_ChardevHostdev_base(serial));
+    serial->device = g_strdup(device);
+}
+
+static void char_serial_class_init(ObjectClass *oc, void *data)
+{
+    ChardevClass *cc = CHARDEV_CLASS(oc);
+
+    cc->parse = qemu_chr_parse_serial;
+    cc->open = qmp_chardev_open_serial;
+#ifndef _WIN32
+    cc->chr_ioctl = tty_serial_ioctl;
+#endif
+}
+
+
+static const TypeInfo char_serial_type_info = {
+    .name = TYPE_CHARDEV_SERIAL,
+#ifdef _WIN32
+    .parent = TYPE_CHARDEV_WIN,
+#else
+    .parent = TYPE_CHARDEV_FD,
+#endif
+    .class_init = char_serial_class_init,
+};
+
+static void register_types(void)
+{
+    type_register_static(&char_serial_type_info);
+}
+
+type_init(register_types);
+
+#endif
--- a/chardev/char-serial.h
+++ b/chardev/char-serial.h
@@ -0,0 +1,35 @@
+/*
+ * QEMU System Emulator
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef CHAR_SERIAL_H
+#define CHAR_SERIAL_H
+
+#ifdef _WIN32
+#define HAVE_CHARDEV_SERIAL 1
+#elif defined(__linux__) || defined(__sun__) || defined(__FreeBSD__)    \
+    || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) \
+    || defined(__GLIBC__)
+#define HAVE_CHARDEV_SERIAL 1
+#endif
+
+#endif
--- a/chardev/char-socket.c
+++ b/chardev/char-socket.c
--- a/chardev/char-stdio.c
+++ b/chardev/char-stdio.c
@@ -0,0 +1,164 @@
+/*
+ * QEMU System Emulator
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "qemu/osdep.h"
+#include "qemu/sockets.h"
+#include "qapi/error.h"
+#include "qemu-common.h"
+#include "sysemu/char.h"
+
+#ifdef _WIN32
+#include "char-win.h"
+#include "char-win-stdio.h"
+#else
+#include <termios.h>
+#include "char-fd.h"
+#endif
+
+#ifndef _WIN32
+/* init terminal so that we can grab keys */
+static struct termios oldtty;
+static int old_fd0_flags;
+static bool stdio_in_use;
+static bool stdio_allow_signal;
+static bool stdio_echo_state;
+
+static void term_exit(void)
+{
+    tcsetattr(0, TCSANOW, &oldtty);
+    fcntl(0, F_SETFL, old_fd0_flags);
+}
+
+static void qemu_chr_set_echo_stdio(Chardev *chr, bool echo)
+{
+    struct termios tty;
+
+    stdio_echo_state = echo;
+    tty = oldtty;
+    if (!echo) {
+        tty.c_iflag &= ~(IGNBRK | BRKINT | PARMRK | ISTRIP
+                         | INLCR | IGNCR | ICRNL | IXON);
+        tty.c_oflag |= OPOST;
+        tty.c_lflag &= ~(ECHO | ECHONL | ICANON | IEXTEN);
+        tty.c_cflag &= ~(CSIZE | PARENB);
+        tty.c_cflag |= CS8;
+        tty.c_cc[VMIN] = 1;
+        tty.c_cc[VTIME] = 0;
+    }
+    if (!stdio_allow_signal) {
+        tty.c_lflag &= ~ISIG;
+    }
+
+    tcsetattr(0, TCSANOW, &tty);
+}
+
+static void term_stdio_handler(int sig)
+{
+    /* restore echo after resume from suspend. */
+    qemu_chr_set_echo_stdio(NULL, stdio_echo_state);
+}
+
+static void qemu_chr_open_stdio(Chardev *chr,
+                                ChardevBackend *backend,
+                                bool *be_opened,
+                                Error **errp)
+{
+    ChardevStdio *opts = backend->u.stdio.data;
+    struct sigaction act;
+
+    if (is_daemonized()) {
+        error_setg(errp, "cannot use stdio with -daemonize");
+        return;
+    }
+
+    if (stdio_in_use) {
+        error_setg(errp, "cannot use stdio by multiple character devices");
+        return;
+    }
+
+    stdio_in_use = true;
+    old_fd0_flags = fcntl(0, F_GETFL);
+    tcgetattr(0, &oldtty);
+    qemu_set_nonblock(0);
+    atexit(term_exit);
+
+    memset(&act, 0, sizeof(act));
+    act.sa_handler = term_stdio_handler;
+    sigaction(SIGCONT, &act, NULL);
+
+    qemu_chr_open_fd(chr, 0, 1);
+
+    if (opts->has_signal) {
+        stdio_allow_signal = opts->signal;
+    }
+    qemu_chr_set_echo_stdio(chr, false);
+}
+#endif
+
+static void qemu_chr_parse_stdio(QemuOpts *opts, ChardevBackend *backend,
+                                 Error **errp)
+{
+    ChardevStdio *stdio;
+
+    backend->type = CHARDEV_BACKEND_KIND_STDIO;
+    stdio = backend->u.stdio.data = g_new0(ChardevStdio, 1);
+    qemu_chr_parse_common(opts, qapi_ChardevStdio_base(stdio));
+    stdio->has_signal = true;
+    stdio->signal = qemu_opt_get_bool(opts, "signal", true);
+}
+
+static void char_stdio_class_init(ObjectClass *oc, void *data)
+{
+    ChardevClass *cc = CHARDEV_CLASS(oc);
+
+    cc->parse = qemu_chr_parse_stdio;
+#ifndef _WIN32
+    cc->open = qemu_chr_open_stdio;
+    cc->chr_set_echo = qemu_chr_set_echo_stdio;
+#endif
+}
+
+static void char_stdio_finalize(Object *obj)
+{
+#ifndef _WIN32
+    term_exit();
+#endif
+}
+
+static const TypeInfo char_stdio_type_info = {
+    .name = TYPE_CHARDEV_STDIO,
+#ifdef _WIN32
+    .parent = TYPE_CHARDEV_WIN_STDIO,
+#else
+    .parent = TYPE_CHARDEV_FD,
+#endif
+    .instance_finalize = char_stdio_finalize,
+    .class_init = char_stdio_class_init,
+};
+
+static void register_types(void)
+{
+    type_register_static(&char_stdio_type_info);
+}
+
+type_init(register_types);
--- a/chardev/char-udp.c
+++ b/chardev/char-udp.c
@@ -0,0 +1,233 @@
+/*
+ * QEMU System Emulator
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "qemu/osdep.h"
+#include "sysemu/char.h"
+#include "io/channel-socket.h"
+#include "qapi/error.h"
+
+#include "char-io.h"
+
+/***********************************************************/
+/* UDP Net console */
+
+typedef struct {
+    Chardev parent;
+    QIOChannel *ioc;
+    uint8_t buf[CHR_READ_BUF_LEN];
+    int bufcnt;
+    int bufptr;
+    int max_size;
+} UdpChardev;
+
+#define UDP_CHARDEV(obj) OBJECT_CHECK(UdpChardev, (obj), TYPE_CHARDEV_UDP)
+
+/* Called with chr_write_lock held.  */
+static int udp_chr_write(Chardev *chr, const uint8_t *buf, int len)
+{
+    UdpChardev *s = UDP_CHARDEV(chr);
+
+    return qio_channel_write(
+        s->ioc, (const char *)buf, len, NULL);
+}
+
+static int udp_chr_read_poll(void *opaque)
+{
+    Chardev *chr = CHARDEV(opaque);
+    UdpChardev *s = UDP_CHARDEV(opaque);
+
+    s->max_size = qemu_chr_be_can_write(chr);
+
+    /* If there were any stray characters in the queue process them
+     * first
+     */
+    while (s->max_size > 0 && s->bufptr < s->bufcnt) {
+        qemu_chr_be_write(chr, &s->buf[s->bufptr], 1);
+        s->bufptr++;
+        s->max_size = qemu_chr_be_can_write(chr);
+    }
+    return s->max_size;
+}
+
+static gboolean udp_chr_read(QIOChannel *chan, GIOCondition cond, void *opaque)
+{
+    Chardev *chr = CHARDEV(opaque);
+    UdpChardev *s = UDP_CHARDEV(opaque);
+    ssize_t ret;
+
+    if (s->max_size == 0) {
+        return TRUE;
+    }
+    ret = qio_channel_read(
+        s->ioc, (char *)s->buf, sizeof(s->buf), NULL);
+    if (ret <= 0) {
+        remove_fd_in_watch(chr);
+        return FALSE;
+    }
+    s->bufcnt = ret;
+
+    s->bufptr = 0;
+    while (s->max_size > 0 && s->bufptr < s->bufcnt) {
+        qemu_chr_be_write(chr, &s->buf[s->bufptr], 1);
+        s->bufptr++;
+        s->max_size = qemu_chr_be_can_write(chr);
+    }
+
+    return TRUE;
+}
+
+static void udp_chr_update_read_handler(Chardev *chr,
+                                        GMainContext *context)
+{
+    UdpChardev *s = UDP_CHARDEV(chr);
+
+    remove_fd_in_watch(chr);
+    if (s->ioc) {
+        chr->fd_in_tag = io_add_watch_poll(chr, s->ioc,
+                                           udp_chr_read_poll,
+                                           udp_chr_read, chr,
+                                           context);
+    }
+}
+
+static void char_udp_finalize(Object *obj)
+{
+    Chardev *chr = CHARDEV(obj);
+    UdpChardev *s = UDP_CHARDEV(obj);
+
+    remove_fd_in_watch(chr);
+    if (s->ioc) {
+        object_unref(OBJECT(s->ioc));
+    }
+    qemu_chr_be_event(chr, CHR_EVENT_CLOSED);
+}
+
+static void qemu_chr_parse_udp(QemuOpts *opts, ChardevBackend *backend,
+                               Error **errp)
+{
+    const char *host = qemu_opt_get(opts, "host");
+    const char *port = qemu_opt_get(opts, "port");
+    const char *localaddr = qemu_opt_get(opts, "localaddr");
+    const char *localport = qemu_opt_get(opts, "localport");
+    bool has_local = false;
+    SocketAddress *addr;
+    ChardevUdp *udp;
+
+    backend->type = CHARDEV_BACKEND_KIND_UDP;
+    if (host == NULL || strlen(host) == 0) {
+        host = "localhost";
+    }
+    if (port == NULL || strlen(port) == 0) {
+        error_setg(errp, "chardev: udp: remote port not specified");
+        return;
+    }
+    if (localport == NULL || strlen(localport) == 0) {
+        localport = "0";
+    } else {
+        has_local = true;
+    }
+    if (localaddr == NULL || strlen(localaddr) == 0) {
+        localaddr = "";
+    } else {
+        has_local = true;
+    }
+
+    udp = backend->u.udp.data = g_new0(ChardevUdp, 1);
+    qemu_chr_parse_common(opts, qapi_ChardevUdp_base(udp));
+
+    addr = g_new0(SocketAddress, 1);
+    addr->type = SOCKET_ADDRESS_KIND_INET;
+    addr->u.inet.data = g_new(InetSocketAddress, 1);
+    *addr->u.inet.data = (InetSocketAddress) {
+        .host = g_strdup(host),
+        .port = g_strdup(port),
+        .has_ipv4 = qemu_opt_get(opts, "ipv4"),
+        .ipv4 = qemu_opt_get_bool(opts, "ipv4", 0),
+        .has_ipv6 = qemu_opt_get(opts, "ipv6"),
+        .ipv6 = qemu_opt_get_bool(opts, "ipv6", 0),
+    };
+    udp->remote = addr;
+
+    if (has_local) {
+        udp->has_local = true;
+        addr = g_new0(SocketAddress, 1);
+        addr->type = SOCKET_ADDRESS_KIND_INET;
+        addr->u.inet.data = g_new(InetSocketAddress, 1);
+        *addr->u.inet.data = (InetSocketAddress) {
+            .host = g_strdup(localaddr),
+            .port = g_strdup(localport),
+        };
+        udp->local = addr;
+    }
+}
+
+static void qmp_chardev_open_udp(Chardev *chr,
+                                 ChardevBackend *backend,
+                                 bool *be_opened,
+                                 Error **errp)
+{
+    ChardevUdp *udp = backend->u.udp.data;
+    QIOChannelSocket *sioc = qio_channel_socket_new();
+    char *name;
+    UdpChardev *s = UDP_CHARDEV(chr);
+
+    if (qio_channel_socket_dgram_sync(sioc,
+                                      udp->local, udp->remote,
+                                      errp) < 0) {
+        object_unref(OBJECT(sioc));
+        return;
+    }
+
+    name = g_strdup_printf("chardev-udp-%s", chr->label);
+    qio_channel_set_name(QIO_CHANNEL(sioc), name);
+    g_free(name);
+
+    s->ioc = QIO_CHANNEL(sioc);
+    /* be isn't opened until we get a connection */
+    *be_opened = false;
+}
+
+static void char_udp_class_init(ObjectClass *oc, void *data)
+{
+    ChardevClass *cc = CHARDEV_CLASS(oc);
+
+    cc->parse = qemu_chr_parse_udp;
+    cc->open = qmp_chardev_open_udp;
+    cc->chr_write = udp_chr_write;
+    cc->chr_update_read_handler = udp_chr_update_read_handler;
+}
+
+static const TypeInfo char_udp_type_info = {
+    .name = TYPE_CHARDEV_UDP,
+    .parent = TYPE_CHARDEV,
+    .instance_size = sizeof(UdpChardev),
+    .instance_finalize = char_udp_finalize,
+    .class_init = char_udp_class_init,
+};
+
+static void register_types(void)
+{
+    type_register_static(&char_udp_type_info);
+}
+
+type_init(register_types);
--- a/chardev/char-win-stdio.c
+++ b/chardev/char-win-stdio.c
@@ -0,0 +1,266 @@
+/*
+ * QEMU System Emulator
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "char-win.h"
+#include "char-win-stdio.h"
+
+typedef struct {
+    Chardev parent;
+    HANDLE  hStdIn;
+    HANDLE  hInputReadyEvent;
+    HANDLE  hInputDoneEvent;
+    HANDLE  hInputThread;
+    uint8_t win_stdio_buf;
+} WinStdioChardev;
+
+#define WIN_STDIO_CHARDEV(obj)                                          \
+    OBJECT_CHECK(WinStdioChardev, (obj), TYPE_CHARDEV_WIN_STDIO)
+
+static void win_stdio_wait_func(void *opaque)
+{
+    Chardev *chr = CHARDEV(opaque);
+    WinStdioChardev *stdio = WIN_STDIO_CHARDEV(opaque);
+    INPUT_RECORD       buf[4];
+    int                ret;
+    DWORD              dwSize;
+    int                i;
+
+    ret = ReadConsoleInput(stdio->hStdIn, buf, ARRAY_SIZE(buf), &dwSize);
+
+    if (!ret) {
+        /* Avoid error storm */
+        qemu_del_wait_object(stdio->hStdIn, NULL, NULL);
+        return;
+    }
+
+    for (i = 0; i < dwSize; i++) {
+        KEY_EVENT_RECORD *kev = &buf[i].Event.KeyEvent;
+
+        if (buf[i].EventType == KEY_EVENT && kev->bKeyDown) {
+            int j;
+            if (kev->uChar.AsciiChar != 0) {
+                for (j = 0; j < kev->wRepeatCount; j++) {
+                    if (qemu_chr_be_can_write(chr)) {
+                        uint8_t c = kev->uChar.AsciiChar;
+                        qemu_chr_be_write(chr, &c, 1);
+                    }
+                }
+            }
+        }
+    }
+}
+
+static DWORD WINAPI win_stdio_thread(LPVOID param)
+{
+    WinStdioChardev *stdio = WIN_STDIO_CHARDEV(param);
+    int                ret;
+    DWORD              dwSize;
+
+    while (1) {
+
+        /* Wait for one byte */
+        ret = ReadFile(stdio->hStdIn, &stdio->win_stdio_buf, 1, &dwSize, NULL);
+
+        /* Exit in case of error, continue if nothing read */
+        if (!ret) {
+            break;
+        }
+        if (!dwSize) {
+            continue;
+        }
+
+        /* Some terminal emulator returns \r\n for Enter, just pass \n */
+        if (stdio->win_stdio_buf == '\r') {
+            continue;
+        }
+
+        /* Signal the main thread and wait until the byte was eaten */
+        if (!SetEvent(stdio->hInputReadyEvent)) {
+            break;
+        }
+        if (WaitForSingleObject(stdio->hInputDoneEvent, INFINITE)
+            != WAIT_OBJECT_0) {
+            break;
+        }
+    }
+
+    qemu_del_wait_object(stdio->hInputReadyEvent, NULL, NULL);
+    return 0;
+}
+
+static void win_stdio_thread_wait_func(void *opaque)
+{
+    Chardev *chr = CHARDEV(opaque);
+    WinStdioChardev *stdio = WIN_STDIO_CHARDEV(opaque);
+
+    if (qemu_chr_be_can_write(chr)) {
+        qemu_chr_be_write(chr, &stdio->win_stdio_buf, 1);
+    }
+
+    SetEvent(stdio->hInputDoneEvent);
+}
+
+static void qemu_chr_set_echo_win_stdio(Chardev *chr, bool echo)
+{
+    WinStdioChardev *stdio = WIN_STDIO_CHARDEV(chr);
+    DWORD              dwMode = 0;
+
+    GetConsoleMode(stdio->hStdIn, &dwMode);
+
+    if (echo) {
+        SetConsoleMode(stdio->hStdIn, dwMode | ENABLE_ECHO_INPUT);
+    } else {
+        SetConsoleMode(stdio->hStdIn, dwMode & ~ENABLE_ECHO_INPUT);
+    }
+}
+
+static void qemu_chr_open_stdio(Chardev *chr,
+                                ChardevBackend *backend,
+                                bool *be_opened,
+                                Error **errp)
+{
+    WinStdioChardev *stdio = WIN_STDIO_CHARDEV(chr);
+    DWORD              dwMode;
+    int                is_console = 0;
+
+    stdio->hStdIn = GetStdHandle(STD_INPUT_HANDLE);
+    if (stdio->hStdIn == INVALID_HANDLE_VALUE) {
+        error_setg(errp, "cannot open stdio: invalid handle");
+        return;
+    }
+
+    is_console = GetConsoleMode(stdio->hStdIn, &dwMode) != 0;
+
+    if (is_console) {
+        if (qemu_add_wait_object(stdio->hStdIn,
+                                 win_stdio_wait_func, chr)) {
+            error_setg(errp, "qemu_add_wait_object: failed");
+            goto err1;
+        }
+    } else {
+        DWORD   dwId;
+
+        stdio->hInputReadyEvent = CreateEvent(NULL, FALSE, FALSE, NULL);
+        stdio->hInputDoneEvent  = CreateEvent(NULL, FALSE, FALSE, NULL);
+        if (stdio->hInputReadyEvent == INVALID_HANDLE_VALUE
+            || stdio->hInputDoneEvent == INVALID_HANDLE_VALUE) {
+            error_setg(errp, "cannot create event");
+            goto err2;
+        }
+        if (qemu_add_wait_object(stdio->hInputReadyEvent,
+                                 win_stdio_thread_wait_func, chr)) {
+            error_setg(errp, "qemu_add_wait_object: failed");
+            goto err2;
+        }
+        stdio->hInputThread     = CreateThread(NULL, 0, win_stdio_thread,
+                                               chr, 0, &dwId);
+
+        if (stdio->hInputThread == INVALID_HANDLE_VALUE) {
+            error_setg(errp, "cannot create stdio thread");
+            goto err3;
+        }
+    }
+
+    dwMode |= ENABLE_LINE_INPUT;
+
+    if (is_console) {
+        /* set the terminal in raw mode */
+        /* ENABLE_QUICK_EDIT_MODE | ENABLE_EXTENDED_FLAGS */
+        dwMode |= ENABLE_PROCESSED_INPUT;
+    }
+
+    SetConsoleMode(stdio->hStdIn, dwMode);
+
+    qemu_chr_set_echo_win_stdio(chr, false);
+
+    return;
+
+err3:
+    qemu_del_wait_object(stdio->hInputReadyEvent, NULL, NULL);
+err2:
+    CloseHandle(stdio->hInputReadyEvent);
+    CloseHandle(stdio->hInputDoneEvent);
+err1:
+    qemu_del_wait_object(stdio->hStdIn, NULL, NULL);
+}
+
+static void char_win_stdio_finalize(Object *obj)
+{
+    WinStdioChardev *stdio = WIN_STDIO_CHARDEV(obj);
+
+    if (stdio->hInputReadyEvent != INVALID_HANDLE_VALUE) {
+        CloseHandle(stdio->hInputReadyEvent);
+    }
+    if (stdio->hInputDoneEvent != INVALID_HANDLE_VALUE) {
+        CloseHandle(stdio->hInputDoneEvent);
+    }
+    if (stdio->hInputThread != INVALID_HANDLE_VALUE) {
+        TerminateThread(stdio->hInputThread, 0);
+    }
+}
+
+static int win_stdio_write(Chardev *chr, const uint8_t *buf, int len)
+{
+    HANDLE  hStdOut = GetStdHandle(STD_OUTPUT_HANDLE);
+    DWORD   dwSize;
+    int     len1;
+
+    len1 = len;
+
+    while (len1 > 0) {
+        if (!WriteFile(hStdOut, buf, len1, &dwSize, NULL)) {
+            break;
+        }
+        buf  += dwSize;
+        len1 -= dwSize;
+    }
+
+    return len - len1;
+}
+
+static void char_win_stdio_class_init(ObjectClass *oc, void *data)
+{
+    ChardevClass *cc = CHARDEV_CLASS(oc);
+
+    cc->open = qemu_chr_open_stdio;
+    cc->chr_write = win_stdio_write;
+    cc->chr_set_echo = qemu_chr_set_echo_win_stdio;
+}
+
+static const TypeInfo char_win_stdio_type_info = {
+    .name = TYPE_CHARDEV_WIN_STDIO,
+    .parent = TYPE_CHARDEV,
+    .instance_size = sizeof(WinStdioChardev),
+    .instance_finalize = char_win_stdio_finalize,
+    .class_init = char_win_stdio_class_init,
+    .abstract = true,
+};
+
+static void register_types(void)
+{
+    type_register_static(&char_win_stdio_type_info);
+}
+
+type_init(register_types);
--- a/chardev/char-win-stdio.h
+++ b/chardev/char-win-stdio.h
@@ -0,0 +1,29 @@
+/*
+ * QEMU System Emulator
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef CHAR_WIN_STDIO_H
+#define CHAR_WIN_STDIO_H
+
+#define TYPE_CHARDEV_WIN_STDIO "chardev-win-stdio"
+
+#endif /* CHAR_WIN_STDIO_H */
--- a/chardev/char-win.c
+++ b/chardev/char-win.c
@@ -0,0 +1,265 @@
+/*
+ * QEMU System Emulator
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "qapi/error.h"
+#include "char-win.h"
+
+static void win_chr_readfile(Chardev *chr)
+{
+    WinChardev *s = WIN_CHARDEV(chr);
+
+    int ret, err;
+    uint8_t buf[CHR_READ_BUF_LEN];
+    DWORD size;
+
+    ZeroMemory(&s->orecv, sizeof(s->orecv));
+    s->orecv.hEvent = s->hrecv;
+    ret = ReadFile(s->hcom, buf, s->len, &size, &s->orecv);
+    if (!ret) {
+        err = GetLastError();
+        if (err == ERROR_IO_PENDING) {
+            ret = GetOverlappedResult(s->hcom, &s->orecv, &size, TRUE);
+        }
+    }
+
+    if (size > 0) {
+        qemu_chr_be_write(chr, buf, size);
+    }
+}
+
+static void win_chr_read(Chardev *chr)
+{
+    WinChardev *s = WIN_CHARDEV(chr);
+
+    if (s->len > s->max_size) {
+        s->len = s->max_size;
+    }
+    if (s->len == 0) {
+        return;
+    }
+
+    win_chr_readfile(chr);
+}
+
+static int win_chr_read_poll(Chardev *chr)
+{
+    WinChardev *s = WIN_CHARDEV(chr);
+
+    s->max_size = qemu_chr_be_can_write(chr);
+    return s->max_size;
+}
+
+static int win_chr_poll(void *opaque)
+{
+    Chardev *chr = CHARDEV(opaque);
+    WinChardev *s = WIN_CHARDEV(opaque);
+    COMSTAT status;
+    DWORD comerr;
+
+    ClearCommError(s->hcom, &comerr, &status);
+    if (status.cbInQue > 0) {
+        s->len = status.cbInQue;
+        win_chr_read_poll(chr);
+        win_chr_read(chr);
+        return 1;
+    }
+    return 0;
+}
+
+int win_chr_init(Chardev *chr, const char *filename, Error **errp)
+{
+    WinChardev *s = WIN_CHARDEV(chr);
+    COMMCONFIG comcfg;
+    COMMTIMEOUTS cto = { 0, 0, 0, 0, 0};
+    COMSTAT comstat;
+    DWORD size;
+    DWORD err;
+
+    s->hsend = CreateEvent(NULL, TRUE, FALSE, NULL);
+    if (!s->hsend) {
+        error_setg(errp, "Failed CreateEvent");
+        goto fail;
+    }
+    s->hrecv = CreateEvent(NULL, TRUE, FALSE, NULL);
+    if (!s->hrecv) {
+        error_setg(errp, "Failed CreateEvent");
+        goto fail;
+    }
+
+    s->hcom = CreateFile(filename, GENERIC_READ | GENERIC_WRITE, 0, NULL,
+                      OPEN_EXISTING, FILE_FLAG_OVERLAPPED, 0);
+    if (s->hcom == INVALID_HANDLE_VALUE) {
+        error_setg(errp, "Failed CreateFile (%lu)", GetLastError());
+        s->hcom = NULL;
+        goto fail;
+    }
+
+    if (!SetupComm(s->hcom, NRECVBUF, NSENDBUF)) {
+        error_setg(errp, "Failed SetupComm");
+        goto fail;
+    }
+
+    ZeroMemory(&comcfg, sizeof(COMMCONFIG));
+    size = sizeof(COMMCONFIG);
+    GetDefaultCommConfig(filename, &comcfg, &size);
+    comcfg.dcb.DCBlength = sizeof(DCB);
+    CommConfigDialog(filename, NULL, &comcfg);
+
+    if (!SetCommState(s->hcom, &comcfg.dcb)) {
+        error_setg(errp, "Failed SetCommState");
+        goto fail;
+    }
+
+    if (!SetCommMask(s->hcom, EV_ERR)) {
+        error_setg(errp, "Failed SetCommMask");
+        goto fail;
+    }
+
+    cto.ReadIntervalTimeout = MAXDWORD;
+    if (!SetCommTimeouts(s->hcom, &cto)) {
+        error_setg(errp, "Failed SetCommTimeouts");
+        goto fail;
+    }
+
+    if (!ClearCommError(s->hcom, &err, &comstat)) {
+        error_setg(errp, "Failed ClearCommError");
+        goto fail;
+    }
+    qemu_add_polling_cb(win_chr_poll, chr);
+    return 0;
+
+ fail:
+    return -1;
+}
+
+int win_chr_pipe_poll(void *opaque)
+{
+    Chardev *chr = CHARDEV(opaque);
+    WinChardev *s = WIN_CHARDEV(opaque);
+    DWORD size;
+
+    PeekNamedPipe(s->hcom, NULL, 0, NULL, &size, NULL);
+    if (size > 0) {
+        s->len = size;
+        win_chr_read_poll(chr);
+        win_chr_read(chr);
+        return 1;
+    }
+    return 0;
+}
+
+/* Called with chr_write_lock held.  */
+static int win_chr_write(Chardev *chr, const uint8_t *buf, int len1)
+{
+    WinChardev *s = WIN_CHARDEV(chr);
+    DWORD len, ret, size, err;
+
+    len = len1;
+    ZeroMemory(&s->osend, sizeof(s->osend));
+    s->osend.hEvent = s->hsend;
+    while (len > 0) {
+        if (s->hsend) {
+            ret = WriteFile(s->hcom, buf, len, &size, &s->osend);
+        } else {
+            ret = WriteFile(s->hcom, buf, len, &size, NULL);
+        }
+        if (!ret) {
+            err = GetLastError();
+            if (err == ERROR_IO_PENDING) {
+                ret = GetOverlappedResult(s->hcom, &s->osend, &size, TRUE);
+                if (ret) {
+                    buf += size;
+                    len -= size;
+                } else {
+                    break;
+                }
+            } else {
+                break;
+            }
+        } else {
+            buf += size;
+            len -= size;
+        }
+    }
+    return len1 - len;
+}
+
+static void char_win_finalize(Object *obj)
+{
+    Chardev *chr = CHARDEV(obj);
+    WinChardev *s = WIN_CHARDEV(chr);
+
+    if (s->skip_free) {
+        return;
+    }
+
+    if (s->hsend) {
+        CloseHandle(s->hsend);
+    }
+    if (s->hrecv) {
+        CloseHandle(s->hrecv);
+    }
+    if (s->hcom) {
+        CloseHandle(s->hcom);
+    }
+    if (s->fpipe) {
+        qemu_del_polling_cb(win_chr_pipe_poll, chr);
+    } else {
+        qemu_del_polling_cb(win_chr_poll, chr);
+    }
+
+    qemu_chr_be_event(chr, CHR_EVENT_CLOSED);
+}
+
+void qemu_chr_open_win_file(Chardev *chr, HANDLE fd_out)
+{
+    WinChardev *s = WIN_CHARDEV(chr);
+
+    s->skip_free = true;
+    s->hcom = fd_out;
+}
+
+static void char_win_class_init(ObjectClass *oc, void *data)
+{
+    ChardevClass *cc = CHARDEV_CLASS(oc);
+
+    cc->chr_write = win_chr_write;
+}
+
+static const TypeInfo char_win_type_info = {
+    .name = TYPE_CHARDEV_WIN,
+    .parent = TYPE_CHARDEV,
+    .instance_size = sizeof(WinChardev),
+    .instance_finalize = char_win_finalize,
+    .class_init = char_win_class_init,
+    .abstract = true,
+};
+
+static void register_types(void)
+{
+    type_register_static(&char_win_type_info);
+}
+
+type_init(register_types);
--- a/chardev/char-win.h
+++ b/chardev/char-win.h
@@ -0,0 +1,53 @@
+/*
+ * QEMU System Emulator
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef CHAR_WIN_H
+#define CHAR_WIN_H
+
+#include "sysemu/char.h"
+
+typedef struct {
+    Chardev parent;
+    int max_size;
+    HANDLE hcom, hrecv, hsend;
+    OVERLAPPED orecv;
+    BOOL fpipe;
+    DWORD len;
+
+    /* Protected by the Chardev chr_write_lock.  */
+    OVERLAPPED osend;
+    /* FIXME: file/console do not finalize */
+    bool skip_free;
+} WinChardev;
+
+#define NSENDBUF 2048
+#define NRECVBUF 2048
+
+#define TYPE_CHARDEV_WIN "chardev-win"
+#define WIN_CHARDEV(obj) OBJECT_CHECK(WinChardev, (obj), TYPE_CHARDEV_WIN)
+
+void qemu_chr_open_win_file(Chardev *chr, HANDLE fd_out);
+int win_chr_init(Chardev *chr, const char *filename, Error **errp);
+int win_chr_pipe_poll(void *opaque);
+
+#endif /* CHAR_WIN_H */
--- a/chardev/char.c
+++ b/chardev/char.c
--- a/197
+++ b/197
@@ -28,8 +28,6 @@ TMPB="qemu-conf"
 TMPC="${TMPDIR1}/${TMPB}.c"
 TMPO="${TMPDIR1}/${TMPB}.o"
 TMPCXX="${TMPDIR1}/${TMPB}.cxx"
-TMPL="${TMPDIR1}/${TMPB}.lo"
-TMPA="${TMPDIR1}/lib${TMPB}.la"
 TMPE="${TMPDIR1}/${TMPB}.exe"
 TMPMO="${TMPDIR1}/${TMPB}.mo"

@@ -230,6 +228,7 @@ vhost_net="no"
 vhost_scsi="no"
 vhost_vsock="no"
 kvm="no"
+hax="no"
 rdma=""
 gprof="no"
 debug_tcg="no"
@@ -312,6 +311,7 @@ gnutls_rnd=""
 nettle=""
 nettle_kdf="no"
 gcrypt=""
+gcrypt_hmac="no"
 gcrypt_kdf="no"
 vte=""
 virglrenderer=""
@@ -562,6 +562,7 @@ CYGWIN*)
 ;;
 MINGW32*)
  mingw32="yes"
+  hax="yes"
  audio_possible_drivers="dsound sdl"
  if check_include dsound.h; then
    audio_drv_list="dsound"
@@ -581,6 +582,8 @@ FreeBSD)
  audio_possible_drivers="oss sdl pa"
  # needed for kinfo_getvmmap(3) in libutil.h
  LIBS="-lutil $LIBS"
+  # needed for kinfo_getproc
+  libs_qga="-lutil $libs_qga"
  netmap=""  # enable netmap autodetect
  HOST_VARIANT_DIR="freebsd"
 ;;
@@ -609,6 +612,7 @@ OpenBSD)
 Darwin)
  bsd="yes"
  darwin="yes"
+  hax="yes"
  LDFLAGS_SHARED="-bundle -undefined dynamic_lookup"
  if [ "$cpu" = "x86_64" ] ; then
    QEMU_CFLAGS="-arch x86_64 $QEMU_CFLAGS"
@@ -918,6 +922,10 @@ for opt do
  ;;
  --enable-kvm) kvm="yes"
  ;;
+  --disable-hax) hax="no"
+  ;;
+  --enable-hax) hax="yes"
+  ;;
  --disable-tcg-interpreter) tcg_interpreter="no"
  ;;
  --enable-tcg-interpreter) tcg_interpreter="yes"
@@ -1216,7 +1224,10 @@ case "$cpu" in
           cc_i386='$(CC) -m32'
           ;;
    x86_64)
-           CPU_CFLAGS="-m64"
+           # ??? Only extremely old AMD cpus do not have cmpxchg16b.
+           # If we truly care, we should simply detect this case at
+           # runtime and generate the fallback to serial emulation.
+           CPU_CFLAGS="-m64 -mcx16"
           LDFLAGS="-m64 $LDFLAGS"
           cc_i386='$(CC) -m32'
           ;;
@@ -1363,6 +1374,7 @@ disabled with --disable-FEATURE, default is enabled if available:
  fdt             fdt device tree
  bluez           bluez stack connectivity
  kvm             KVM acceleration support
+  hax             HAX acceleration support
  rdma            RDMA-based migration support
  vde             support for vde network
  netmap          support for netmap network
@@ -1462,7 +1474,7 @@ fi

 gcc_flags="-Wold-style-declaration -Wold-style-definition -Wtype-limits"
 gcc_flags="-Wformat-security -Wformat-y2k -Winit-self -Wignored-qualifiers $gcc_flags"
-gcc_flags="-Wmissing-include-dirs -Wempty-body -Wnested-externs $gcc_flags"
+gcc_flags="-Wno-missing-include-dirs -Wempty-body -Wnested-externs $gcc_flags"
 gcc_flags="-Wendif-labels -Wno-shift-negative-value $gcc_flags"
 gcc_flags="-Wno-initializer-overrides $gcc_flags"
 gcc_flags="-Wno-string-plus-int $gcc_flags"
@@ -2406,6 +2418,19 @@ EOF
        if compile_prog "$gcrypt_cflags" "$gcrypt_libs" ; then
            gcrypt_kdf=yes
        fi
+
+        cat > $TMPC << EOF
+#include <gcrypt.h>
+int main(void) {
+  gcry_mac_hd_t handle;
+  gcry_mac_open(&handle, GCRY_MAC_HMAC_MD5,
+                GCRY_MAC_FLAG_SECURE, NULL);
+  return 0;
+}
+EOF
+        if compile_prog "$gcrypt_cflags" "$gcrypt_libs" ; then
+            gcrypt_hmac=yes
+        fi
    else
        if test "$gcrypt" = "yes"; then
            feature_not_found "gcrypt" "Install gcrypt devel"
@@ -2727,7 +2752,7 @@ if compile_prog "" "" ; then
 fi

 ##########################################
-# xfsctl() probe, used for raw-posix
+# xfsctl() probe, used for file-posix.c
 if test "$xfs" != "no" ; then
  cat > $TMPC << EOF
 #include <stddef.h>  /* NULL */
@@ -2914,25 +2939,41 @@ fi
 # curses probe
 if test "$curses" != "no" ; then
  if test "$mingw32" = "yes" ; then
-    curses_list="$($pkg_config --libs ncurses 2>/dev/null):-lpdcurses"
+    curses_inc_list="$($pkg_config --cflags ncurses 2>/dev/null):"
+    curses_lib_list="$($pkg_config --libs ncurses 2>/dev/null):-lpdcurses"
  else
-    curses_list="$($pkg_config --libs ncurses 2>/dev/null):-lncurses:-lcurses"
+    curses_inc_list="$($pkg_config --cflags ncursesw 2>/dev/null):-I/usr/include/ncursesw:"
+    curses_lib_list="$($pkg_config --libs ncursesw 2>/dev/null):-lncursesw:-lcursesw"
  fi
  curses_found=no
  cat > $TMPC << EOF
+#include <locale.h>
 #include <curses.h>
+#include <wchar.h>
 int main(void) {
  const char *s = curses_version();
+  wchar_t wch = L'w';
+  setlocale(LC_ALL, "");
  resize_term(0, 0);
+  addwstr(L"wide chars\n");
+  addnwstr(&wch, 1);
+  add_wch(WACS_DEGREE);
  return s != 0;
 }
 EOF
  IFS=:
-  for curses_lib in $curses_list; do
-    unset IFS
-    if compile_prog "" "$curses_lib" ; then
-      curses_found=yes
-      libs_softmmu="$curses_lib $libs_softmmu"
+  for curses_inc in $curses_inc_list; do
+    IFS=:
+    for curses_lib in $curses_lib_list; do
+      unset IFS
+      if compile_prog "$curses_inc" "$curses_lib" ; then
+        curses_found=yes
+        QEMU_CFLAGS="$curses_inc $QEMU_CFLAGS"
+        libs_softmmu="$curses_lib $libs_softmmu"
+        break
+      fi
+    done
+    if test "$curses_found" = yes ; then
      break
    fi
  done
@@ -3038,7 +3079,7 @@ fi

 # g_test_trap_subprocess added in 2.38. Used by some tests.
 glib_subprocess=yes
-if test "$mingw32" = "yes" || ! $pkg_config --atleast-version=2.38 glib-2.0; then
+if ! $pkg_config --atleast-version=2.38 glib-2.0; then
    glib_subprocess=no
 fi

@@ -4278,11 +4319,11 @@ if have_backend "ust"; then
 #include <lttng/tracepoint.h>
 int main(void) { return 0; }
 EOF
-  if compile_prog "" "" ; then
+  if compile_prog "" "-Wl,--no-as-needed -ldl" ; then
    if $pkg_config lttng-ust --exists; then
      lttng_ust_libs=$($pkg_config --libs lttng-ust)
    else
-      lttng_ust_libs="-llttng-ust"
+      lttng_ust_libs="-llttng-ust -ldl"
    fi
    if $pkg_config liburcu-bp --exists; then
      urcu_bp_libs=$($pkg_config --libs liburcu-bp)
@@ -4521,6 +4562,55 @@ if compile_prog "" "" ; then
    int128=yes
 fi

+#########################################
+# See if 128-bit atomic operations are supported.
+
+atomic128=no
+if test "$int128" = "yes"; then
+  cat > $TMPC << EOF
+int main(void)
+{
+  unsigned __int128 x = 0, y = 0;
+  y = __atomic_load_16(&x, 0);
+  __atomic_store_16(&x, y, 0);
+  __atomic_compare_exchange_16(&x, &y, x, 0, 0, 0);
+  return 0;
+}
+EOF
+  if compile_prog "" "" ; then
+    atomic128=yes
+  fi
+fi
+
+#########################################
+# See if 64-bit atomic operations are supported.
+# Note that without __atomic builtins, we can only
+# assume atomic loads/stores max at pointer size.
+
+cat > $TMPC << EOF
+#include <stdint.h>
+int main(void)
+{
+  uint64_t x = 0, y = 0;
+#ifdef __ATOMIC_RELAXED
+  y = __atomic_load_8(&x, 0);
+  __atomic_store_8(&x, y, 0);
+  __atomic_compare_exchange_8(&x, &y, x, 0, 0, 0);
+  __atomic_exchange_8(&x, y, 0);
+  __atomic_fetch_add_8(&x, y, 0);
+#else
+  typedef char is_host64[sizeof(void *) >= sizeof(uint64_t) ? 1 : -1];
+  __sync_lock_test_and_set(&x, y);
+  __sync_val_compare_and_swap(&x, y, 0);
+  __sync_fetch_and_add(&x, y);
+#endif
+  return 0;
+}
+EOF
+if compile_prog "" "" ; then
+  atomic64=yes
+fi
+
 ########################################
 # check if getauxval is available.

@@ -4605,6 +4695,33 @@ if compile_prog "" "" ; then
    have_rtnetlink=yes
 fi

+##########################################
+# check for usable AF_VSOCK environment
+have_af_vsock=no
+cat > $TMPC << EOF
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#if !defined(AF_VSOCK)
+# error missing AF_VSOCK flag
+#endif
+#include <linux/vm_sockets.h>
+int main(void) {
+    int sock, ret;
+    struct sockaddr_vm svm;
+    socklen_t len = sizeof(svm);
+    sock = socket(AF_VSOCK, SOCK_STREAM, 0);
+    ret = getpeername(sock, (struct sockaddr *)&svm, &len);
+    if ((ret == -1) && (errno == ENOTCONN)) {
+        return 0;
+    }
+    return -1;
+}
+EOF
+if compile_prog "" "" ; then
+    have_af_vsock=yes
+fi
+
 #################################################
 # Sparc implicitly links with --relax, which is
 # incompatible with -r, so --no-relax should be
@@ -4620,8 +4737,14 @@ EOF
 if ! compile_object ""; then
  error_exit "Failed to compile object file for LD_REL_FLAGS test"
 fi
-if do_cc -nostdlib -Wl,-r -Wl,--no-relax -o $TMPMO $TMPO; then
-  LD_REL_FLAGS="-Wl,--no-relax"
+for i in '-Wl,-r -Wl,--no-relax' -Wl,-r -r; do
+  if do_cc -nostdlib $i -o $TMPMO $TMPO; then
+    LD_REL_FLAGS=$i
+    break
+  fi
+done
+if test "$modules" = "yes" && test "$LD_REL_FLAGS" = ""; then
+  feature_not_found "modules" "Cannot find how to build relocatable objects"
 fi

 ##########################################
@@ -4941,6 +5064,7 @@ echo "Linux AIO support $linux_aio"
 echo "ATTR/XATTR support $attr"
 echo "Install blobs     $blobs"
 echo "KVM support       $kvm"
+echo "HAX support       $hax"
 echo "RDMA support      $rdma"
 echo "TCG interpreter   $tcg_interpreter"
 echo "fdt support       $fdt"
@@ -5277,6 +5401,9 @@ if test "$gnutls_rnd" = "yes" ; then
 fi
 if test "$gcrypt" = "yes" ; then
  echo "CONFIG_GCRYPT=y" >> $config_host_mak
+  if test "$gcrypt_hmac" = "yes" ; then
+    echo "CONFIG_GCRYPT_HMAC=y" >> $config_host_mak
+  fi
  if test "$gcrypt_kdf" = "yes" ; then
    echo "CONFIG_GCRYPT_KDF=y" >> $config_host_mak
  fi
@@ -5483,6 +5610,14 @@ if test "$int128" = "yes" ; then
  echo "CONFIG_INT128=y" >> $config_host_mak
 fi

+if test "$atomic128" = "yes" ; then
+  echo "CONFIG_ATOMIC128=y" >> $config_host_mak
+fi
+
+if test "$atomic64" = "yes" ; then
+  echo "CONFIG_ATOMIC64=y" >> $config_host_mak
+fi
+
 if test "$getauxval" = "yes" ; then
  echo "CONFIG_GETAUXVAL=y" >> $config_host_mak
 fi
@@ -5580,6 +5715,10 @@ if test "$replication" = "yes" ; then
  echo "CONFIG_REPLICATION=y" >> $config_host_mak
 fi

+if test "$have_af_vsock" = "yes" ; then
+  echo "CONFIG_AF_VSOCK=y" >> $config_host_mak
+fi
+
 # Hold two types of flag:
 #   CONFIG_THREAD_SETNAME_BYTHREAD  - we've got a way of setting the name on
 #                                     a thread we have a handle to
@@ -5704,7 +5843,7 @@ target_name=$(echo $target | cut -d '-' -f 1)
 target_bigendian="no"

 case "$target_name" in
-  armeb|lm32|m68k|microblaze|mips|mipsn32|mips64|moxie|or32|ppc|ppcemb|ppc64|ppc64abi32|s390x|sh4eb|sparc|sparc64|sparc32plus|xtensaeb)
+  armeb|hppa|lm32|m68k|microblaze|mips|mipsn32|mips64|moxie|or32|ppc|ppcemb|ppc64|ppc64abi32|s390x|sh4eb|sparc|sparc64|sparc32plus|xtensaeb)
  target_bigendian=yes
  ;;
 esac
@@ -5767,6 +5906,8 @@ case "$target_name" in
  ;;
  cris)
  ;;
+  hppa)
+  ;;
  lm32)
  ;;
  m68k)
@@ -5794,6 +5935,8 @@ case "$target_name" in
  ;;
  moxie)
  ;;
+  nios2)
+  ;;
  or32)
    TARGET_ARCH=openrisc
    TARGET_BASE_ARCH=openrisc
@@ -5909,6 +6052,15 @@ case "$target_name" in
      fi
    fi
 esac
+if test "$hax" = "yes" ; then
+  if test "$target_softmmu" = "yes" ; then
+    case "$target_name" in
+    i386|x86_64)
+      echo "CONFIG_HAX=y" >> $config_target_mak
+    ;;
+    esac
+  fi
+fi
 if test "$target_bigendian" = "yes" ; then
  echo "TARGET_WORDS_BIGENDIAN=y" >> $config_target_mak
 fi
@@ -5966,6 +6118,9 @@ for i in $ARCH $TARGET_BASE_ARCH ; do
  cris)
    disas_config "CRIS"
  ;;
+  hppa)
+    disas_config "HPPA"
+  ;;
  i386|x86_64|x32)
    disas_config "I386"
  ;;
@@ -5987,6 +6142,9 @@ for i in $ARCH $TARGET_BASE_ARCH ; do
  moxie*)
    disas_config "MOXIE"
  ;;
+  nios2)
+    disas_config "NIOS2"
+  ;;
  or32)
    disas_config "OPENRISC"
  ;;
@@ -6057,7 +6215,7 @@ fi

 # build tree in object directory in case the source is not in the current directory
 DIRS="tests tests/tcg tests/tcg/cris tests/tcg/lm32 tests/libqos tests/qapi-schema tests/tcg/xtensa tests/qemu-iotests"
-DIRS="$DIRS fsdev"
+DIRS="$DIRS docs fsdev"
 DIRS="$DIRS pc-bios/optionrom pc-bios/spapr-rtas pc-bios/s390-ccw"
 DIRS="$DIRS roms/seabios roms/vgabios"
 DIRS="$DIRS qapi-generated"
@@ -6071,6 +6229,7 @@ FILES="$FILES roms/seabios/Makefile roms/vgabios/Makefile"
 FILES="$FILES pc-bios/qemu-icon.bmp"
 for bios_file in \
    $source_path/pc-bios/*.bin \
+    $source_path/pc-bios/*.lid \
    $source_path/pc-bios/*.aml \
    $source_path/pc-bios/*.rom \
    $source_path/pc-bios/*.dtb \
--- a/contrib/libvhost-user/Makefile.objs
+++ b/contrib/libvhost-user/Makefile.objs
@@ -0,0 +1 @@
+libvhost-user-obj-y = libvhost-user.o
--- a/contrib/libvhost-user/libvhost-user.c
+++ b/contrib/libvhost-user/libvhost-user.c
--- a/contrib/libvhost-user/libvhost-user.h
+++ b/contrib/libvhost-user/libvhost-user.h
@@ -0,0 +1,435 @@
+/*
+ * Vhost User library
+ *
+ * Copyright (c) 2016 Red Hat, Inc.
+ *
+ * Authors:
+ *  Victor Kaplansky <victork@redhat.com>
+ *  Marc-André Lureau <mlureau@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later.  See the COPYING file in the top-level directory.
+ */
+
+#ifndef LIBVHOST_USER_H
+#define LIBVHOST_USER_H
+
+#include <stdint.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <linux/vhost.h>
+#include "standard-headers/linux/virtio_ring.h"
+
+/* Based on qemu/hw/virtio/vhost-user.c */
+#define VHOST_USER_F_PROTOCOL_FEATURES 30
+#define VHOST_LOG_PAGE 4096
+
+#define VHOST_MAX_NR_VIRTQUEUE 8
+#define VIRTQUEUE_MAX_SIZE 1024
+
+#define VHOST_MEMORY_MAX_NREGIONS 8
+
+enum VhostUserProtocolFeature {
+    VHOST_USER_PROTOCOL_F_MQ = 0,
+    VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1,
+    VHOST_USER_PROTOCOL_F_RARP = 2,
+
+    VHOST_USER_PROTOCOL_F_MAX
+};
+
+#define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1)
+
+typedef enum VhostUserRequest {
+    VHOST_USER_NONE = 0,
+    VHOST_USER_GET_FEATURES = 1,
+    VHOST_USER_SET_FEATURES = 2,
+    VHOST_USER_SET_OWNER = 3,
+    VHOST_USER_RESET_OWNER = 4,
+    VHOST_USER_SET_MEM_TABLE = 5,
+    VHOST_USER_SET_LOG_BASE = 6,
+    VHOST_USER_SET_LOG_FD = 7,
+    VHOST_USER_SET_VRING_NUM = 8,
+    VHOST_USER_SET_VRING_ADDR = 9,
+    VHOST_USER_SET_VRING_BASE = 10,
+    VHOST_USER_GET_VRING_BASE = 11,
+    VHOST_USER_SET_VRING_KICK = 12,
+    VHOST_USER_SET_VRING_CALL = 13,
+    VHOST_USER_SET_VRING_ERR = 14,
+    VHOST_USER_GET_PROTOCOL_FEATURES = 15,
+    VHOST_USER_SET_PROTOCOL_FEATURES = 16,
+    VHOST_USER_GET_QUEUE_NUM = 17,
+    VHOST_USER_SET_VRING_ENABLE = 18,
+    VHOST_USER_SEND_RARP = 19,
+    VHOST_USER_INPUT_GET_CONFIG = 20,
+    VHOST_USER_MAX
+} VhostUserRequest;
+
+typedef struct VhostUserMemoryRegion {
+    uint64_t guest_phys_addr;
+    uint64_t memory_size;
+    uint64_t userspace_addr;
+    uint64_t mmap_offset;
+} VhostUserMemoryRegion;
+
+typedef struct VhostUserMemory {
+    uint32_t nregions;
+    uint32_t padding;
+    VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
+} VhostUserMemory;
+
+typedef struct VhostUserLog {
+    uint64_t mmap_size;
+    uint64_t mmap_offset;
+} VhostUserLog;
+
+#if defined(_WIN32)
+# define VU_PACKED __attribute__((gcc_struct, packed))
+#else
+# define VU_PACKED __attribute__((packed))
+#endif
+
+typedef struct VhostUserMsg {
+    VhostUserRequest request;
+
+#define VHOST_USER_VERSION_MASK     (0x3)
+#define VHOST_USER_REPLY_MASK       (0x1 << 2)
+    uint32_t flags;
+    uint32_t size; /* the following payload size */
+
+    union {
+#define VHOST_USER_VRING_IDX_MASK   (0xff)
+#define VHOST_USER_VRING_NOFD_MASK  (0x1 << 8)
+        uint64_t u64;
+        struct vhost_vring_state state;
+        struct vhost_vring_addr addr;
+        VhostUserMemory memory;
+        VhostUserLog log;
+    } payload;
+
+    int fds[VHOST_MEMORY_MAX_NREGIONS];
+    int fd_num;
+    uint8_t *data;
+} VU_PACKED VhostUserMsg;
+
+typedef struct VuDevRegion {
+    /* Guest Physical address. */
+    uint64_t gpa;
+    /* Memory region size. */
+    uint64_t size;
+    /* QEMU virtual address (userspace). */
+    uint64_t qva;
+    /* Starting offset in our mmaped space. */
+    uint64_t mmap_offset;
+    /* Start address of mmaped space. */
+    uint64_t mmap_addr;
+} VuDevRegion;
+
+typedef struct VuDev VuDev;
+
+typedef uint64_t (*vu_get_features_cb) (VuDev *dev);
+typedef void (*vu_set_features_cb) (VuDev *dev, uint64_t features);
+typedef int (*vu_process_msg_cb) (VuDev *dev, VhostUserMsg *vmsg,
+                                  int *do_reply);
+typedef void (*vu_queue_set_started_cb) (VuDev *dev, int qidx, bool started);
+
+typedef struct VuDevIface {
+    /* called by VHOST_USER_GET_FEATURES to get the features bitmask */
+    vu_get_features_cb get_features;
+    /* enable vhost implementation features */
+    vu_set_features_cb set_features;
+    /* get the protocol feature bitmask from the underlying vhost
+     * implementation */
+    vu_get_features_cb get_protocol_features;
+    /* enable protocol features in the underlying vhost implementation. */
+    vu_set_features_cb set_protocol_features;
+    /* process_msg is called for each vhost-user message received */
+    /* skip libvhost-user processing if return value != 0 */
+    vu_process_msg_cb process_msg;
+    /* tells when queues can be processed */
+    vu_queue_set_started_cb queue_set_started;
+} VuDevIface;
+
+typedef void (*vu_queue_handler_cb) (VuDev *dev, int qidx);
+
+typedef struct VuRing {
+    unsigned int num;
+    struct vring_desc *desc;
+    struct vring_avail *avail;
+    struct vring_used *used;
+    uint64_t log_guest_addr;
+    uint32_t flags;
+} VuRing;
+
+typedef struct VuVirtq {
+    VuRing vring;
+
+    /* Next head to pop */
+    uint16_t last_avail_idx;
+
+    /* Last avail_idx read from VQ. */
+    uint16_t shadow_avail_idx;
+
+    uint16_t used_idx;
+
+    /* Last used index value we have signalled on */
+    uint16_t signalled_used;
+
+    /* Last used index value we have signalled on */
+    bool signalled_used_valid;
+
+    /* Notification enabled? */
+    bool notification;
+
+    int inuse;
+
+    vu_queue_handler_cb handler;
+
+    int call_fd;
+    int kick_fd;
+    int err_fd;
+    unsigned int enable;
+    bool started;
+} VuVirtq;
+
+enum VuWatchCondtion {
+    VU_WATCH_IN = 1 << 0,
+    VU_WATCH_OUT = 1 << 1,
+    VU_WATCH_PRI = 1 << 2,
+    VU_WATCH_ERR = 1 << 3,
+    VU_WATCH_HUP = 1 << 4,
+};
+
+typedef void (*vu_panic_cb) (VuDev *dev, const char *err);
+typedef void (*vu_watch_cb) (VuDev *dev, int condition, void *data);
+typedef void (*vu_set_watch_cb) (VuDev *dev, int fd, int condition,
+                                 vu_watch_cb cb, void *data);
+typedef void (*vu_remove_watch_cb) (VuDev *dev, int fd);
+
+struct VuDev {
+    int sock;
+    uint32_t nregions;
+    VuDevRegion regions[VHOST_MEMORY_MAX_NREGIONS];
+    VuVirtq vq[VHOST_MAX_NR_VIRTQUEUE];
+    int log_call_fd;
+    uint64_t log_size;
+    uint8_t *log_table;
+    uint64_t features;
+    uint64_t protocol_features;
+    bool broken;
+
+    /* @set_watch: add or update the given fd to the watch set,
+     * call cb when condition is met */
+    vu_set_watch_cb set_watch;
+
+    /* @remove_watch: remove the given fd from the watch set */
+    vu_remove_watch_cb remove_watch;
+
+    /* @panic: encountered an unrecoverable error, you may try to
+     * re-initialize */
+    vu_panic_cb panic;
+    const VuDevIface *iface;
+};
+
+typedef struct VuVirtqElement {
+    unsigned int index;
+    unsigned int out_num;
+    unsigned int in_num;
+    struct iovec *in_sg;
+    struct iovec *out_sg;
+} VuVirtqElement;
+
+/**
+ * vu_init:
+ * @dev: a VuDev context
+ * @socket: the socket connected to vhost-user master
+ * @panic: a panic callback
+ * @set_watch: a set_watch callback
+ * @remove_watch: a remove_watch callback
+ * @iface: a VuDevIface structure with vhost-user device callbacks
+ *
+ * Intializes a VuDev vhost-user context.
+ **/
+void vu_init(VuDev *dev,
+             int socket,
+             vu_panic_cb panic,
+             vu_set_watch_cb set_watch,
+             vu_remove_watch_cb remove_watch,
+             const VuDevIface *iface);
+
+
+/**
+ * vu_deinit:
+ * @dev: a VuDev context
+ *
+ * Cleans up the VuDev context
+ */
+void vu_deinit(VuDev *dev);
+
+/**
+ * vu_dispatch:
+ * @dev: a VuDev context
+ *
+ * Process one vhost-user message.
+ *
+ * Returns: TRUE on success, FALSE on failure.
+ */
+bool vu_dispatch(VuDev *dev);
+
+/**
+ * vu_gpa_to_va:
+ * @dev: a VuDev context
+ * @guest_addr: guest address
+ *
+ * Translate a guest address to a pointer. Returns NULL on failure.
+ */
+void *vu_gpa_to_va(VuDev *dev, uint64_t guest_addr);
+
+/**
+ * vu_get_queue:
+ * @dev: a VuDev context
+ * @qidx: queue index
+ *
+ * Returns the queue number @qidx.
+ */
+VuVirtq *vu_get_queue(VuDev *dev, int qidx);
+
+/**
+ * vu_set_queue_handler:
+ * @dev: a VuDev context
+ * @vq: a VuVirtq queue
+ * @handler: the queue handler callback
+ *
+ * Set the queue handler. This function may be called several times
+ * for the same queue. If called with NULL @handler, the handler is
+ * removed.
+ */
+void vu_set_queue_handler(VuDev *dev, VuVirtq *vq,
+                          vu_queue_handler_cb handler);
+
+
+/**
+ * vu_queue_set_notification:
+ * @dev: a VuDev context
+ * @vq: a VuVirtq queue
+ * @enable: state
+ *
+ * Set whether the queue notifies (via event index or interrupt)
+ */
+void vu_queue_set_notification(VuDev *dev, VuVirtq *vq, int enable);
+
+/**
+ * vu_queue_enabled:
+ * @dev: a VuDev context
+ * @vq: a VuVirtq queue
+ *
+ * Returns: whether the queue is enabled.
+ */
+bool vu_queue_enabled(VuDev *dev, VuVirtq *vq);
+
+/**
+ * vu_queue_enabled:
+ * @dev: a VuDev context
+ * @vq: a VuVirtq queue
+ *
+ * Returns: whether the queue is empty.
+ */
+int vu_queue_empty(VuDev *dev, VuVirtq *vq);
+
+/**
+ * vu_queue_notify:
+ * @dev: a VuDev context
+ * @vq: a VuVirtq queue
+ *
+ * Request to notify the queue via callfd (skipped if unnecessary)
+ */
+void vu_queue_notify(VuDev *dev, VuVirtq *vq);
+
+/**
+ * vu_queue_pop:
+ * @dev: a VuDev context
+ * @vq: a VuVirtq queue
+ * @sz: the size of struct to return (must be >= VuVirtqElement)
+ *
+ * Returns: a VuVirtqElement filled from the queue or NULL.
+ */
+void *vu_queue_pop(VuDev *dev, VuVirtq *vq, size_t sz);
+
+/**
+ * vu_queue_rewind:
+ * @dev: a VuDev context
+ * @vq: a VuVirtq queue
+ * @num: number of elements to push back
+ *
+ * Pretend that elements weren't popped from the virtqueue.  The next
+ * virtqueue_pop() will refetch the oldest element.
+ *
+ * Returns: true on success, false if @num is greater than the number of in use
+ * elements.
+ */
+bool vu_queue_rewind(VuDev *dev, VuVirtq *vq, unsigned int num);
+
+/**
+ * vu_queue_fill:
+ * @dev: a VuDev context
+ * @vq: a VuVirtq queue
+ * @elem: a VuVirtqElement
+ * @len: length in bytes to write
+ * @idx: optional offset for the used ring index (0 in general)
+ *
+ * Fill the used ring with @elem element.
+ */
+void vu_queue_fill(VuDev *dev, VuVirtq *vq,
+                   const VuVirtqElement *elem,
+                   unsigned int len, unsigned int idx);
+
+/**
+ * vu_queue_push:
+ * @dev: a VuDev context
+ * @vq: a VuVirtq queue
+ * @elem: a VuVirtqElement
+ * @len: length in bytes to write
+ *
+ * Helper that combines vu_queue_fill() with a vu_queue_flush().
+ */
+void vu_queue_push(VuDev *dev, VuVirtq *vq,
+                   const VuVirtqElement *elem, unsigned int len);
+
+/**
+ * vu_queue_flush:
+ * @dev: a VuDev context
+ * @vq: a VuVirtq queue
+ * @num: number of elements to flush
+ *
+ * Mark the last number of elements as done (used.idx is updated by
+ * num elements).
+*/
+void vu_queue_flush(VuDev *dev, VuVirtq *vq, unsigned int num);
+
+/**
+ * vu_queue_get_avail_bytes:
+ * @dev: a VuDev context
+ * @vq: a VuVirtq queue
+ * @in_bytes: in bytes
+ * @out_bytes: out bytes
+ * @max_in_bytes: stop counting after max_in_bytes
+ * @max_out_bytes: stop counting after max_out_bytes
+ *
+ * Count the number of available bytes, up to max_in_bytes/max_out_bytes.
+ */
+void vu_queue_get_avail_bytes(VuDev *vdev, VuVirtq *vq, unsigned int *in_bytes,
+                              unsigned int *out_bytes,
+                              unsigned max_in_bytes, unsigned max_out_bytes);
+
+/**
+ * vu_queue_avail_bytes:
+ * @dev: a VuDev context
+ * @vq: a VuVirtq queue
+ * @in_bytes: expected in bytes
+ * @out_bytes: expected out bytes
+ *
+ * Returns: true if in_bytes <= in_total && out_bytes <= out_total
+ */
+bool vu_queue_avail_bytes(VuDev *dev, VuVirtq *vq, unsigned int in_bytes,
+                          unsigned int out_bytes);
+
+#endif /* LIBVHOST_USER_H */
--- a/cpu-exec-common.c
+++ b/cpu-exec-common.c
@@ -77,3 +77,9 @@ void cpu_loop_exit_restore(CPUState *cpu, uintptr_t pc)
    }
    siglongjmp(cpu->jmp_env, 1);
 }
+
+void cpu_loop_exit_atomic(CPUState *cpu, uintptr_t pc)
+{
+    cpu->exception_index = EXCP_ATOMIC;
+    cpu_loop_exit_restore(cpu, pc);
+}
--- a/cpu-exec.c
+++ b/cpu-exec.c
@@ -18,7 +18,7 @@
 */
 #include "qemu/osdep.h"
 #include "cpu.h"
-#include "trace.h"
+#include "trace-root.h"
 #include "disas/disas.h"
 #include "exec/exec-all.h"
 #include "tcg.h"
@@ -143,23 +143,20 @@ static inline tcg_target_ulong cpu_tb_exec(CPUState *cpu, TranslationBlock *itb)
    uint8_t *tb_ptr = itb->tc_ptr;

    qemu_log_mask_and_addr(CPU_LOG_EXEC, itb->pc,
-                           "Trace %p [" TARGET_FMT_lx "] %s\n",
-                           itb->tc_ptr, itb->pc, lookup_symbol(itb->pc));
+                           "Trace %p [%d: " TARGET_FMT_lx "] %s\n",
+                           itb->tc_ptr, cpu->cpu_index, itb->pc,
+                           lookup_symbol(itb->pc));

 #if defined(DEBUG_DISAS)
    if (qemu_loglevel_mask(CPU_LOG_TB_CPU)
        && qemu_log_in_addr_range(itb->pc)) {
+        qemu_log_lock();
 #if defined(TARGET_I386)
        log_cpu_state(cpu, CPU_DUMP_CCOP);
-#elif defined(TARGET_M68K)
-        /* ??? Should not modify env state for dumping.  */
-        cpu_m68k_flush_flags(env, env->cc_op);
-        env->cc_op = CC_OP_FLAGS;
-        env->sr = (env->sr & 0xffe0) | env->cc_dest | (env->cc_x << 4);
-        log_cpu_state(cpu, 0);
 #else
        log_cpu_state(cpu, 0);
 #endif
+        qemu_log_unlock();
    }
 #endif /* DEBUG_DISAS */

@@ -210,17 +207,53 @@ static void cpu_exec_nocache(CPUState *cpu, int max_cycles,
    if (max_cycles > CF_COUNT_MASK)
        max_cycles = CF_COUNT_MASK;

+    tb_lock();
    tb = tb_gen_code(cpu, orig_tb->pc, orig_tb->cs_base, orig_tb->flags,
                     max_cycles | CF_NOCACHE
                         | (ignore_icount ? CF_IGNORE_ICOUNT : 0));
    tb->orig_tb = orig_tb;
+    tb_unlock();
+
    /* execute the generated code */
    trace_exec_tb_nocache(tb, tb->pc);
    cpu_tb_exec(cpu, tb);
+
+    tb_lock();
+    tb_phys_invalidate(tb, -1);
+    tb_free(tb);
+    tb_unlock();
+}
+#endif
+
+static void cpu_exec_step(CPUState *cpu)
+{
+    CPUArchState *env = (CPUArchState *)cpu->env_ptr;
+    TranslationBlock *tb;
+    target_ulong cs_base, pc;
+    uint32_t flags;
+
+    cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
+    tb = tb_gen_code(cpu, pc, cs_base, flags,
+                     1 | CF_NOCACHE | CF_IGNORE_ICOUNT);
+    tb->orig_tb = NULL;
+    /* execute the generated code */
+    trace_exec_tb_nocache(tb, pc);
+    cpu_tb_exec(cpu, tb);
    tb_phys_invalidate(tb, -1);
    tb_free(tb);
 }
-#endif
+
+void cpu_exec_step_atomic(CPUState *cpu)
+{
+    start_exclusive();
+
+    /* Since we got here, we know that parallel_cpus must be true.  */
+    parallel_cpus = false;
+    cpu_exec_step(cpu);
+    parallel_cpus = true;
+
+    end_exclusive();
+}

 struct tb_desc {
    target_ulong pc;
@@ -475,8 +508,8 @@ static inline void cpu_handle_interrupt(CPUState *cpu,
           True when it is, and we should restart on a new TB,
           and via longjmp via cpu_loop_exit.  */
        else {
-            replay_interrupt();
            if (cc->cpu_exec_interrupt(cpu, interrupt_request)) {
+                replay_interrupt();
                *last_tb = NULL;
            }
            /* The target hook may have updated the 'cpu->interrupt_request';
--- a/Show More
+++ b/Show More
@@ -1 +1 @@
 .7.50
 .8.50