Update VERSION for 1.7.2 release

Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
Allow mismatched virtio config-len
2014-07-21 17:42:15 -05:00 · 2014-07-15 19:28:02 -05:00 · 2014-07-15 19:28:02 -05:00 · 2014-07-15 19:28:02 -05:00 · 2014-07-15 19:28:02 -05:00 · 2014-07-15 19:28:02 -05:00
3478 changed files with 231866 additions and 560744 deletions
--- a/.dir-locals.el
+++ b/.dir-locals.el
@@ -1,2 +0,0 @@
-((c-mode . ((c-file-style . "stroustrup")
-	    (indent-tabs-mode . nil))))
--- a/.gitignore
+++ b/.gitignore
@@ -1,75 +1,72 @@
-/config-devices.*
-/config-all-devices.*
-/config-all-disas.*
-/config-host.*
-/config-target.*
-/config.status
-/config-temp
-/trace/generated-tracers.h
-/trace/generated-tracers.c
-/trace/generated-tracers-dtrace.h
-/trace/generated-tracers.dtrace
-/trace/generated-events.h
-/trace/generated-events.c
-/trace/generated-helpers-wrappers.h
-/trace/generated-helpers.h
-/trace/generated-helpers.c
-/trace/generated-tcg-tracers.h
-/trace/generated-ust-provider.h
-/trace/generated-ust.c
-/ui/shader/texture-blit-frag.h
-/ui/shader/texture-blit-vert.h
+config-devices.*
+config-all-devices.*
+config-all-disas.*
+config-host.*
+config-target.*
+trace/generated-tracers.h
+trace/generated-tracers.c
+trace/generated-tracers-dtrace.h
+trace/generated-tracers.dtrace
+trace/generated-events.h
+trace/generated-events.c
+libcacard/trace/generated-tracers.c
 *-timestamp
-/*-softmmu
-/*-darwin-user
-/*-linux-user
-/*-bsd-user
-/ivshmem-client
-/ivshmem-server
-/libdis*
-/libuser
-/linux-headers/asm
-/qga/qapi-generated
-/qapi-generated
-/qapi-types.[ch]
-/qapi-visit.[ch]
-/qapi-event.[ch]
-/qmp-commands.h
-/qmp-introspect.[ch]
-/qmp-marshal.c
-/qemu-doc.html
-/qemu-tech.html
-/qemu-doc.info
-/qemu-tech.info
-/qemu-img
-/qemu-nbd
-/qemu-options.def
-/qemu-options.texi
-/qemu-img-cmds.texi
-/qemu-img-cmds.h
-/qemu-io
-/qemu-ga
-/qemu-bridge-helper
-/qemu-monitor.texi
-/qemu-monitor-info.texi
-/qmp-commands.txt
-/vscclient
-/fsdev/virtfs-proxy-helper
-*.[1-9]
+*-softmmu
+*-darwin-user
+*-linux-user
+*-bsd-user
+libdis*
+libuser
+linux-headers/asm
+qapi-generated
+qapi-types.[ch]
+qapi-visit.[ch]
+qmp-commands.h
+qmp-marshal.c
+qemu-doc.html
+qemu-tech.html
+qemu-doc.info
+qemu-tech.info
+qemu.1
+qemu.pod
+qemu-img.1
+qemu-img.pod
+qemu-img
+qemu-nbd
+qemu-nbd.8
+qemu-nbd.pod
+qemu-options.def
+qemu-options.texi
+qemu-img-cmds.texi
+qemu-img-cmds.h
+qemu-io
+qemu-ga
+qemu-bridge-helper
+qemu-monitor.texi
+vscclient
+qmp-commands.txt
+test-bitops
+test-coroutine
+test-int128
+test-opts-visitor
+test-qmp-input-visitor
+test-qmp-output-visitor
+test-string-input-visitor
+test-string-output-visitor
+test-visitor-serialization
+fsdev/virtfs-proxy-helper
+fsdev/virtfs-proxy-helper.1
+fsdev/virtfs-proxy-helper.pod
+.gdbinit
 *.a
 *.aux
 *.cp
 *.dvi
 *.exe
-*.msi
-*.dll
-*.so
-*.mo
 *.fn
 *.ky
 *.log
 *.pdf
-*.pod
 *.cps
 *.fns
 *.kys
@@ -79,31 +76,35 @@
 *.tp
 *.vr
 *.d
-!/scripts/qemu-guest-agent/fsfreeze-hook.d
+!scripts/qemu-guest-agent/fsfreeze-hook.d
 *.o
 *.lo
 *.la
 *.pc
 .libs
 .sdk
+*.swp
+*.orig
+.pc
 *.gcda
 *.gcno
-/pc-bios/bios-pq/status
-/pc-bios/vgabios-pq/status
-/pc-bios/optionrom/linuxboot.asm
-/pc-bios/optionrom/linuxboot.bin
-/pc-bios/optionrom/linuxboot.raw
-/pc-bios/optionrom/linuxboot.img
-/pc-bios/optionrom/multiboot.asm
-/pc-bios/optionrom/multiboot.bin
-/pc-bios/optionrom/multiboot.raw
-/pc-bios/optionrom/multiboot.img
-/pc-bios/optionrom/kvmvapic.asm
-/pc-bios/optionrom/kvmvapic.bin
-/pc-bios/optionrom/kvmvapic.raw
-/pc-bios/optionrom/kvmvapic.img
-/pc-bios/s390-ccw/s390-ccw.elf
-/pc-bios/s390-ccw/s390-ccw.img
+patches
+pc-bios/bios-pq/status
+pc-bios/vgabios-pq/status
+pc-bios/optionrom/linuxboot.asm
+pc-bios/optionrom/linuxboot.bin
+pc-bios/optionrom/linuxboot.raw
+pc-bios/optionrom/linuxboot.img
+pc-bios/optionrom/multiboot.asm
+pc-bios/optionrom/multiboot.bin
+pc-bios/optionrom/multiboot.raw
+pc-bios/optionrom/multiboot.img
+pc-bios/optionrom/kvmvapic.asm
+pc-bios/optionrom/kvmvapic.bin
+pc-bios/optionrom/kvmvapic.raw
+pc-bios/optionrom/kvmvapic.img
+pc-bios/s390-ccw/s390-ccw.elf
+pc-bios/s390-ccw/s390-ccw.img
 .stgit-*
 cscope.*
 tags
--- a/.gitmodules
+++ b/.gitmodules
@@ -13,9 +13,6 @@
 [submodule "roms/openbios"]
 	path = roms/openbios
 	url = git://git.qemu-project.org/openbios.git
-[submodule "roms/openhackware"]
-	path = roms/openhackware
-	url = git://git.qemu-project.org/openhackware.git
 [submodule "roms/qemu-palcode"]
 	path = roms/qemu-palcode
 	url = git://github.com/rth7680/qemu-palcode.git
@@ -28,6 +25,3 @@
 [submodule "dtc"]
 	path = dtc
 	url = git://git.qemu-project.org/dtc.git
-[submodule "roms/u-boot"]
-	path = roms/u-boot
-	url = git://git.qemu-project.org/u-boot.git
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,124 +1,71 @@
-sudo: false
 language: c
 python:
  - "2.4"
 compiler:
  - gcc
  - clang
-cache: ccache
-addons:
-  apt:
-    packages:
-      - libaio-dev
-      - libattr1-dev
-      - libbrlapi-dev
-      - libcap-ng-dev
-      - libgnutls-dev
-      - libgtk-3-dev
-      - libiscsi-dev
-      - liblttng-ust-dev
-      - libncurses5-dev
-      - libnss3-dev
-      - libpixman-1-dev
-      - libpng12-dev
-      - librados-dev
-      - libsdl1.2-dev
-      - libseccomp-dev
-      - libspice-protocol-dev
-      - libspice-server-dev
-      - libssh2-1-dev
-      - liburcu-dev
-      - libusb-1.0-0-dev
-      - libvte-2.90-dev
-      - sparse
-      - uuid-dev
-
-notifications:
-  irc:
-    channels:
-      - "irc.oftc.net#qemu"
-    on_success: change
-    on_failure: always
 env:
  global:
    - TEST_CMD="make check"
    - EXTRA_CONFIG=""
+    # Development packages, EXTRA_PKGS saved for additional builds
+    - CORE_PKGS="libusb-1.0-0-dev libiscsi-dev librados-dev libncurses5-dev"
+    - NET_PKGS="libseccomp-dev libgnutls-dev libssh2-1-dev  libspice-server-dev libspice-protocol-dev libnss3-dev"
+    - GUI_PKGS="libgtk-3-dev libvte-2.90-dev libsdl1.2-dev libpng12-dev libpixman-1-dev"
+    - EXTRA_PKGS=""
  matrix:
-    # Group major targets together with their linux-user counterparts
-    - TARGETS=alpha-softmmu,alpha-linux-user,cris-softmmu,cris-linux-user,m68k-softmmu,m68k-linux-user,microblaze-softmmu,microblazeel-softmmu,microblaze-linux-user,microblazeel-linux-user
-    - TARGETS=arm-softmmu,arm-linux-user,armeb-linux-user,aarch64-softmmu,aarch64-linux-user
-    - TARGETS=i386-softmmu,i386-linux-user,x86_64-softmmu,x86_64-linux-user
-    - TARGETS=mips-softmmu,mips64-softmmu,mips64el-softmmu,mipsel-softmmu,mips-linux-user,mips64-linux-user,mips64el-linux-user,mipsel-linux-user,mipsn32-linux-user,mipsn32el-linux-user
-    - TARGETS=or32-softmmu,or32-linux-user,ppc-softmmu,ppc64-softmmu,ppcemb-softmmu,ppc-linux-user,ppc64-linux-user,ppc64abi32-linux-user,ppc64le-linux-user
-    - TARGETS=s390x-softmmu,s390x-linux-user,sh4-softmmu,sh4eb-softmmu,sh4-linux-user,sh4eb-linux-user,sparc-softmmu,sparc64-softmmu,sparc-linux-user,sparc32plus-linux-user,sparc64-linux-user,unicore32-softmmu,unicore32-linux-user
-    # Group remaining softmmu only targets into one build
-    - TARGETS=lm32-softmmu,moxie-softmmu,tricore-softmmu,xtensa-softmmu,xtensaeb-softmmu
-git:
-  # we want to do this ourselves
-  submodules: false
+  - TARGETS=alpha-softmmu,alpha-linux-user
+  - TARGETS=arm-softmmu,arm-linux-user
+  - TARGETS=cris-softmmu
+  - TARGETS=i386-softmmu,x86_64-softmmu
+  - TARGETS=lm32-softmmu
+  - TARGETS=m68k-softmmu
+  - TARGETS=microblaze-softmmu,microblazeel-softmmu
+  - TARGETS=mips-softmmu,mips64-softmmu,mips64el-softmmu,mipsel-softmmu
+  - TARGETS=moxie-softmmu
+  - TARGETS=or32-softmmu,
+  - TARGETS=ppc-softmmu,ppc64-softmmu,ppcemb-softmmu
+  - TARGETS=s390x-softmmu
+  - TARGETS=sh4-softmmu,sh4eb-softmmu
+  - TARGETS=sparc-softmmu,sparc64-softmmu
+  - TARGETS=unicore32-softmmu
+  - TARGETS=xtensa-softmmu,xtensaeb-softmmu
 before_install:
-  - wget -O - http://people.linaro.org/~alex.bennee/qemu-submodule-git-seed.tar.xz | tar -xvJ
  - git submodule update --init --recursive
-before_script:
-  - ./configure --target-list=${TARGETS} --enable-debug-tcg ${EXTRA_CONFIG}
-script:
-  - make -j2 && ${TEST_CMD}
+  - sudo apt-get update -qq
+  - sudo apt-get install -qq ${CORE_PKGS} ${NET_PKGS} ${GUI_PKGS} ${EXTRA_PKGS}
+script: "./configure --target-list=${TARGETS} ${EXTRA_CONFIG} && make && ${TEST_CMD}"
 matrix:
  # We manually include a number of additional build for non-standard bits
  include:
    # Debug related options
-    - env: TARGETS=x86_64-softmmu
+    - env: TARGETS=i386-softmmu,x86_64-softmmu
           EXTRA_CONFIG="--enable-debug"
      compiler: gcc
-    # We currently disable "make check"
-    - env: TARGETS=alpha-softmmu
+    - env: TARGETS=i386-softmmu,x86_64-softmmu
           EXTRA_CONFIG="--enable-debug --enable-tcg-interpreter"
-           TEST_CMD=""
-      compiler: gcc
-    # Disable a few of the optional features
-    - env: TARGETS=x86_64-softmmu
-           EXTRA_CONFIG="--disable-linux-aio --disable-cap-ng --disable-attr --disable-brlapi --disable-uuid --disable-libusb"
      compiler: gcc
    # Currently configure doesn't force --disable-pie
-    - env: TARGETS=x86_64-softmmu
+    - env: TARGETS=i386-softmmu,x86_64-softmmu
           EXTRA_CONFIG="--enable-gprof --enable-gcov --disable-pie"
      compiler: gcc
-    # Sparse
-    - env: TARGETS=x86_64-softmmu
+    - env: TARGETS=i386-softmmu,x86_64-softmmu
+           EXTRA_PKGS="sparse"
           EXTRA_CONFIG="--enable-sparse"
      compiler: gcc
-    # Modules
-    - env: TARGETS=arm-softmmu,x86_64-softmmu
-           EXTRA_CONFIG="--enable-modules"
-      compiler: gcc
    # All the trace backends (apart from dtrace)
-    - env: TARGETS=i386-softmmu
-           EXTRA_CONFIG="--enable-trace-backends=log"
+    - env: TARGETS=i386-softmmu,x86_64-softmmu
+           EXTRA_CONFIG="--enable-trace-backend=stderr"
      compiler: gcc
-    # We currently disable "make check" (until 41fc57e44ed regression fixed)
-    - env: TARGETS=x86_64-softmmu
-           EXTRA_CONFIG="--enable-trace-backends=simple"
+    - env: TARGETS=i386-softmmu,x86_64-softmmu
+           EXTRA_CONFIG="--enable-trace-backend=simple"
+      compiler: gcc
+    - env: TARGETS=i386-softmmu,x86_64-softmmu
+           EXTRA_CONFIG="--enable-trace-backend=ftrace"
           TEST_CMD=""
      compiler: gcc
-    # We currently disable "make check"
-    - env: TARGETS=x86_64-softmmu
-           EXTRA_CONFIG="--enable-trace-backends=ftrace"
-           TEST_CMD=""
-      compiler: gcc
-    # We currently disable "make check"
-    - env: TARGETS=x86_64-softmmu
-           EXTRA_CONFIG="--enable-trace-backends=ust"
-           TEST_CMD=""
-      compiler: gcc
-    # All the co-routine backends (apart from windows)
-    # We currently disable "make check"
-    - env: TARGETS=x86_64-softmmu
-           EXTRA_CONFIG="--with-coroutine=gthread"
-           TEST_CMD=""
-      compiler: gcc
-    - env: TARGETS=x86_64-softmmu
-           EXTRA_CONFIG="--with-coroutine=ucontext"
-      compiler: gcc
-    - env: TARGETS=x86_64-softmmu
-           EXTRA_CONFIG="--with-coroutine=sigaltstack"
-      compiler: gcc
+    # This disabled make check for the ftrace backend which needs more setting up
+    # Currently broken on 12.04 due to mis-packaged liburcu and changed API, will be pulled.
+    #- env: TARGETS=i386-softmmu,x86_64-softmmu
+    #       EXTRA_PKGS="liblttng-ust-dev liburcu-dev"
+    #       EXTRA_CONFIG="--enable-trace-backend=ust"
--- a/26
+++ b/26
@@ -84,29 +84,3 @@ and clarity it comes on a line by itself:
 Rationale: a consistent (except for functions...) bracing style reduces
 ambiguity and avoids needless churn when lines are added or removed.
 Furthermore, it is the QEMU coding style.
-
-5. Declarations
-
-Mixed declarations (interleaving statements and declarations within
-blocks) are generally not allowed; declarations should be at the beginning
-of blocks.
-
-Every now and then, an exception is made for declarations inside a
-#ifdef or #ifndef block: if the code looks nicer, such declarations can
-be placed at the top of the block even if there are statements above.
-On the other hand, however, it's often best to move that #ifdef/#ifndef
-block to a separate function altogether.
-
-6. Conditional statements
-
-When comparing a variable for (in)equality with a constant, list the
-constant on the right, as in:
-
-if (a == 1) {
-    /* Reads like: "If a equals 1" */
-    do_something();
-}
-
-Rationale: Yoda conditions (as in 'if (1 == a)') are awkward to read.
-Besides, good compilers already warn users when '==' is mis-typed as '=',
-even when the constant is on the right.
--- a/55
+++ b/55
@@ -157,58 +157,3 @@ painful. These are:
 * you may assume that integers are 2s complement representation
 * you may assume that right shift of a signed integer duplicates
   the sign bit (ie it is an arithmetic shift, not a logical shift)
-
-7. Error handling and reporting
-
-7.1 Reporting errors to the human user
-
-Do not use printf(), fprintf() or monitor_printf().  Instead, use
-error_report() or error_vreport() from error-report.h.  This ensures the
-error is reported in the right place (current monitor or stderr), and in
-a uniform format.
-
-Use error_printf() & friends to print additional information.
-
-error_report() prints the current location.  In certain common cases
-like command line parsing, the current location is tracked
-automatically.  To manipulate it manually, use the loc_*() from
-error-report.h.
-
-7.2 Propagating errors
-
-An error can't always be reported to the user right where it's detected,
-but often needs to be propagated up the call chain to a place that can
-handle it.  This can be done in various ways.
-
-The most flexible one is Error objects.  See error.h for usage
-information.
-
-Use the simplest suitable method to communicate success / failure to
-callers.  Stick to common methods: non-negative on success / -1 on
-error, non-negative / -errno, non-null / null, or Error objects.
-
-Example: when a function returns a non-null pointer on success, and it
-can fail only in one way (as far as the caller is concerned), returning
-null on failure is just fine, and certainly simpler and a lot easier on
-the eyes than propagating an Error object through an Error ** parameter.
-
-Example: when a function's callers need to report details on failure
-only the function really knows, use Error **, and set suitable errors.
-
-Do not report an error to the user when you're also returning an error
-for somebody else to handle.  Leave the reporting to the place that
-consumes the error returned.
-
-7.3 Handling errors
-
-Calling exit() is fine when handling configuration errors during
-startup.  It's problematic during normal operation.  In particular,
-monitor commands should never exit().
-
-Do not call exit() or abort() to handle an error that can be triggered
-by the guest (e.g., some unimplemented corner case in guest code
-translation or device emulation).  Guests should not be able to
-terminate QEMU.
-
-Note that &error_fatal is just another way to exit(1), and &error_abort
-is just another way to abort().
--- a/2
+++ b/2
@@ -11,7 +11,7 @@ option) any later version.

 As of July 2013, contributions under version 2 of the GNU General Public
 License (and no later version) are only accepted for the following files
-or directories: bsd-user/, linux-user/, hw/vfio/, hw/xen/xen_pt*.
+or directories: bsd-user/, linux-user/, hw/misc/vfio.c, hw/xen/xen_pt*.

 3) The Tiny Code Generator (TCG) is released under the BSD license
   (see license headers in files).
--- a/866
+++ b/866
--- a/270
+++ b/270
@@ -3,11 +3,6 @@
 # Always point to the root of the build tree (needs GNU make).
 BUILD_DIR=$(CURDIR)

-# Before including a proper config-host.mak, assume we are in the source tree
-SRC_PATH=.
-
-UNCHECKED_GOALS := %clean TAGS cscope ctags
-
 # All following code might depend on configuration variables
 ifneq ($(wildcard config-host.mak),)
 # Put the all: rule here so that config-host.mak can contain dependencies.
@@ -43,45 +38,32 @@ config-host.mak: $(SRC_PATH)/configure
 	fi
 else
 config-host.mak:
-ifneq ($(filter-out $(UNCHECKED_GOALS),$(MAKECMDGOALS)),$(if $(MAKECMDGOALS),,fail))
+ifneq ($(filter-out %clean,$(MAKECMDGOALS)),$(if $(MAKECMDGOALS),,fail))
 	@echo "Please call configure before running make!"
 	@exit 1
 endif
 endif

 GENERATED_HEADERS = config-host.h qemu-options.def
-GENERATED_HEADERS += qmp-commands.h qapi-types.h qapi-visit.h qapi-event.h
-GENERATED_SOURCES += qmp-marshal.c qapi-types.c qapi-visit.c qapi-event.c
-GENERATED_HEADERS += qmp-introspect.h
-GENERATED_SOURCES += qmp-introspect.c
+GENERATED_HEADERS += qmp-commands.h qapi-types.h qapi-visit.h
+GENERATED_SOURCES += qmp-marshal.c qapi-types.c qapi-visit.c

 GENERATED_HEADERS += trace/generated-events.h
 GENERATED_SOURCES += trace/generated-events.c

 GENERATED_HEADERS += trace/generated-tracers.h
-ifeq ($(findstring dtrace,$(TRACE_BACKENDS)),dtrace)
+ifeq ($(TRACE_BACKEND),dtrace)
 GENERATED_HEADERS += trace/generated-tracers-dtrace.h
 endif
 GENERATED_SOURCES += trace/generated-tracers.c

-GENERATED_HEADERS += trace/generated-tcg-tracers.h
-
-GENERATED_HEADERS += trace/generated-helpers-wrappers.h
-GENERATED_HEADERS += trace/generated-helpers.h
-GENERATED_SOURCES += trace/generated-helpers.c
-
-ifeq ($(findstring ust,$(TRACE_BACKENDS)),ust)
-GENERATED_HEADERS += trace/generated-ust-provider.h
-GENERATED_SOURCES += trace/generated-ust.c
-endif
-
 # Don't try to regenerate Makefile or configure
 # We don't generate any of them
 Makefile: ;
 configure: ;

 .PHONY: all clean cscope distclean dvi html info install install-doc \
-	pdf recurse-all speed test dist msi
+	pdf recurse-all speed test dist

 $(call set-vpath, $(SRC_PATH))

@@ -90,11 +72,7 @@ LIBS+=-lz $(LIBS_TOOLS)
 HELPERS-$(CONFIG_LINUX) = qemu-bridge-helper$(EXESUF)

 ifdef BUILD_DOCS
-DOCS=qemu-doc.html qemu-tech.html qemu.1 qemu-img.1 qemu-nbd.8 qemu-ga.8
-DOCS+=qmp-commands.txt
-ifdef CONFIG_LINUX
-DOCS+=kvm_stat.1
-endif
+DOCS=qemu-doc.html qemu-tech.html qemu.1 qemu-img.1 qemu-nbd.8 qmp-commands.txt
 ifdef CONFIG_VIRTFS
 DOCS+=fsdev/virtfs-proxy-helper.1
 endif
@@ -120,9 +98,8 @@ endif
 -include $(SUBDIR_DEVICES_MAK_DEP)

 %/config-devices.mak: default-configs/%.mak
-	$(call quiet-command, \
-            $(SHELL) $(SRC_PATH)/scripts/make_device_config.sh $< $*-config-devices.mak.d $@ > $@.tmp, "  GEN   $@.tmp")
-	$(call quiet-command, if test -f $@; then \
+	$(call quiet-command,$(SHELL) $(SRC_PATH)/scripts/make_device_config.sh $@ $<, "  GEN   $@")
+	@if test -f $@; then \
 	  if cmp -s $@.old $@; then \
 	    mv $@.tmp $@; \
 	    cp -p $@ $@.old; \
@@ -138,36 +115,20 @@ endif
 	 else \
 	  mv $@.tmp $@; \
 	  cp -p $@ $@.old; \
-	 fi, "  GEN   $@");
+	 fi

 defconfig:
 	rm -f config-all-devices.mak $(SUBDIR_DEVICES_MAK)

 ifneq ($(wildcard config-host.mak),)
 include $(SRC_PATH)/Makefile.objs
-endif
-
-dummy := $(call unnest-vars,, \
-                stub-obj-y \
-                util-obj-y \
-                qga-obj-y \
-                ivshmem-client-obj-y \
-                ivshmem-server-obj-y \
-                qga-vss-dll-obj-y \
-                block-obj-y \
-                block-obj-m \
-                crypto-obj-y \
-                crypto-aes-obj-y \
-                qom-obj-y \
-                io-obj-y \
-                common-obj-y \
-                common-obj-m)
-
-ifneq ($(wildcard config-host.mak),)
 include $(SRC_PATH)/tests/Makefile
 endif
+ifeq ($(CONFIG_SMARTCARD_NSS),y)
+include $(SRC_PATH)/libcacard/Makefile
+endif

-all: $(DOCS) $(TOOLS) $(HELPERS-y) recurse-all modules
+all: $(DOCS) $(TOOLS) $(HELPERS-y) recurse-all

 config-host.h: config-host.h-timestamp
 config-host.h-timestamp: config-host.mak
@@ -177,9 +138,6 @@ qemu-options.def: $(SRC_PATH)/qemu-options.hx
 SUBDIR_RULES=$(patsubst %,subdir-%, $(TARGET_DIRS))
 SOFTMMU_SUBDIR_RULES=$(filter %-softmmu,$(SUBDIR_RULES))

-$(SOFTMMU_SUBDIR_RULES): $(block-obj-y)
-$(SOFTMMU_SUBDIR_RULES): $(crypto-obj-y)
-$(SOFTMMU_SUBDIR_RULES): $(io-obj-y)
 $(SOFTMMU_SUBDIR_RULES): config-all-devices.mak

 subdir-%:
@@ -204,7 +162,7 @@ subdir-dtc:dtc/libfdt dtc/tests
 dtc/%:
 	mkdir -p $@

-$(SUBDIR_RULES): libqemuutil.a libqemustub.a $(common-obj-y) $(qom-obj-y) $(crypto-aes-obj-$(CONFIG_USER_ONLY))
+$(SUBDIR_RULES): libqemuutil.a libqemustub.a $(common-obj-y)

 ROMSUBDIR_RULES=$(patsubst %,romsubdir-%, $(ROMS))
 romsubdir-%:
@@ -214,9 +172,11 @@ ALL_SUBDIRS=$(TARGET_DIRS) $(patsubst %,pc-bios/%, $(ROMS))

 recurse-all: $(SUBDIR_RULES) $(ROMSUBDIR_RULES)

-$(BUILD_DIR)/version.o: $(SRC_PATH)/version.rc config-host.h | $(BUILD_DIR)/version.lo
+bt-host.o: QEMU_CFLAGS += $(BLUEZ_CFLAGS)
+
+$(BUILD_DIR)/version.o: $(SRC_PATH)/version.rc $(BUILD_DIR)/config-host.h | $(BUILD_DIR)/version.lo
 	$(call quiet-command,$(WINDRES) -I$(BUILD_DIR) -o $@ $<,"  RC    version.o")
-$(BUILD_DIR)/version.lo: $(SRC_PATH)/version.rc config-host.h
+$(BUILD_DIR)/version.lo: $(SRC_PATH)/version.rc $(BUILD_DIR)/config-host.h
 	$(call quiet-command,$(WINDRES) -I$(BUILD_DIR) -o $@ $<,"  RC    version.lo")

 Makefile: $(version-obj-y) $(version-lobj-y)
@@ -225,22 +185,19 @@ Makefile: $(version-obj-y) $(version-lobj-y)
 # Build libraries

 libqemustub.a: $(stub-obj-y)
-libqemuutil.a: $(util-obj-y)
-
-block-modules = $(foreach o,$(block-obj-m),"$(basename $(subst /,-,$o))",) NULL
-util/module.o-cflags = -D'CONFIG_BLOCK_MODULES=$(block-modules)'
+libqemuutil.a: $(util-obj-y) qapi-types.o qapi-visit.o

 ######################################################################

 qemu-img.o: qemu-img-cmds.h

-qemu-img$(EXESUF): qemu-img.o $(block-obj-y) $(crypto-obj-y) $(io-obj-y) $(qom-obj-y) libqemuutil.a libqemustub.a
-qemu-nbd$(EXESUF): qemu-nbd.o $(block-obj-y) $(crypto-obj-y) $(io-obj-y) $(qom-obj-y) libqemuutil.a libqemustub.a
-qemu-io$(EXESUF): qemu-io.o $(block-obj-y) $(crypto-obj-y) $(io-obj-y) $(qom-obj-y) libqemuutil.a libqemustub.a
+qemu-img$(EXESUF): qemu-img.o $(block-obj-y) libqemuutil.a libqemustub.a
+qemu-nbd$(EXESUF): qemu-nbd.o $(block-obj-y) libqemuutil.a libqemustub.a
+qemu-io$(EXESUF): qemu-io.o $(block-obj-y) libqemuutil.a libqemustub.a

-qemu-bridge-helper$(EXESUF): qemu-bridge-helper.o libqemuutil.a libqemustub.a
+qemu-bridge-helper$(EXESUF): qemu-bridge-helper.o

-fsdev/virtfs-proxy-helper$(EXESUF): fsdev/virtfs-proxy-helper.o fsdev/9p-marshal.o fsdev/9p-iov-marshal.o libqemuutil.a libqemustub.a
+fsdev/virtfs-proxy-helper$(EXESUF): fsdev/virtfs-proxy-helper.o fsdev/virtio-9p-marshal.o libqemuutil.a libqemustub.a
 fsdev/virtfs-proxy-helper$(EXESUF): LIBS += -lcap

 qemu-img-cmds.h: $(SRC_PATH)/qemu-img-cmds.hx
@@ -255,51 +212,23 @@ qapi-py = $(SRC_PATH)/scripts/qapi.py $(SRC_PATH)/scripts/ordereddict.py

 qga/qapi-generated/qga-qapi-types.c qga/qapi-generated/qga-qapi-types.h :\
 $(SRC_PATH)/qga/qapi-schema.json $(SRC_PATH)/scripts/qapi-types.py $(qapi-py)
-	$(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/qapi-types.py \
-		$(gen-out-type) -o qga/qapi-generated -p "qga-" $<, \
-		"  GEN   $@")
+	$(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/qapi-types.py $(gen-out-type) -o qga/qapi-generated -p "qga-" < $<, "  GEN   $@")
 qga/qapi-generated/qga-qapi-visit.c qga/qapi-generated/qga-qapi-visit.h :\
 $(SRC_PATH)/qga/qapi-schema.json $(SRC_PATH)/scripts/qapi-visit.py $(qapi-py)
-	$(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/qapi-visit.py \
-		$(gen-out-type) -o qga/qapi-generated -p "qga-" $<, \
-		"  GEN   $@")
+	$(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/qapi-visit.py $(gen-out-type) -o qga/qapi-generated -p "qga-" < $<, "  GEN   $@")
 qga/qapi-generated/qga-qmp-commands.h qga/qapi-generated/qga-qmp-marshal.c :\
 $(SRC_PATH)/qga/qapi-schema.json $(SRC_PATH)/scripts/qapi-commands.py $(qapi-py)
-	$(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/qapi-commands.py \
-		$(gen-out-type) -o qga/qapi-generated -p "qga-" $<, \
-		"  GEN   $@")
-
-qapi-modules = $(SRC_PATH)/qapi-schema.json $(SRC_PATH)/qapi/common.json \
-               $(SRC_PATH)/qapi/block.json $(SRC_PATH)/qapi/block-core.json \
-               $(SRC_PATH)/qapi/event.json $(SRC_PATH)/qapi/introspect.json \
-               $(SRC_PATH)/qapi/crypto.json $(SRC_PATH)/qapi/rocker.json \
-               $(SRC_PATH)/qapi/trace.json
+	$(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/qapi-commands.py $(gen-out-type) -o qga/qapi-generated -p "qga-" < $<, "  GEN   $@")

 qapi-types.c qapi-types.h :\
-$(qapi-modules) $(SRC_PATH)/scripts/qapi-types.py $(qapi-py)
-	$(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/qapi-types.py \
-		$(gen-out-type) -o "." -b $<, \
-		"  GEN   $@")
+$(SRC_PATH)/qapi-schema.json $(SRC_PATH)/scripts/qapi-types.py $(qapi-py)
+	$(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/qapi-types.py $(gen-out-type) -o "." -b < $<, "  GEN   $@")
 qapi-visit.c qapi-visit.h :\
-$(qapi-modules) $(SRC_PATH)/scripts/qapi-visit.py $(qapi-py)
-	$(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/qapi-visit.py \
-		$(gen-out-type) -o "." -b $<, \
-		"  GEN   $@")
-qapi-event.c qapi-event.h :\
-$(qapi-modules) $(SRC_PATH)/scripts/qapi-event.py $(qapi-py)
-	$(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/qapi-event.py \
-		$(gen-out-type) -o "." $<, \
-		"  GEN   $@")
+$(SRC_PATH)/qapi-schema.json $(SRC_PATH)/scripts/qapi-visit.py $(qapi-py)
+	$(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/qapi-visit.py $(gen-out-type) -o "." -b < $<, "  GEN   $@")
 qmp-commands.h qmp-marshal.c :\
-$(qapi-modules) $(SRC_PATH)/scripts/qapi-commands.py $(qapi-py)
-	$(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/qapi-commands.py \
-		$(gen-out-type) -o "." -m $<, \
-		"  GEN   $@")
-qmp-introspect.h qmp-introspect.c :\
-$(qapi-modules) $(SRC_PATH)/scripts/qapi-introspect.py $(qapi-py)
-	$(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/qapi-introspect.py \
-		$(gen-out-type) -o "." $<, \
-		"  GEN   $@")
+$(SRC_PATH)/qapi-schema.json $(SRC_PATH)/scripts/qapi-commands.py $(qapi-py)
+	$(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/qapi-commands.py $(gen-out-type) -m -o "." < $<, "  GEN   $@")

 QGALIB_GEN=$(addprefix qga/qapi-generated/, qga-qapi-types.h qga-qapi-visit.h qga-qmp-commands.h)
 $(qga-obj-y) qemu-ga.o: $(QGALIB_GEN)
@@ -307,44 +236,16 @@ $(qga-obj-y) qemu-ga.o: $(QGALIB_GEN)
 qemu-ga$(EXESUF): $(qga-obj-y) libqemuutil.a libqemustub.a
 	$(call LINK, $^)

-ifdef QEMU_GA_MSI_ENABLED
-QEMU_GA_MSI=qemu-ga-$(ARCH).msi
-
-msi: $(QEMU_GA_MSI)
-
-$(QEMU_GA_MSI): qemu-ga.exe $(QGA_VSS_PROVIDER)
-
-$(QEMU_GA_MSI): config-host.mak
-
-$(QEMU_GA_MSI):  $(SRC_PATH)/qga/installer/qemu-ga.wxs
-	$(call quiet-command,QEMU_GA_VERSION="$(QEMU_GA_VERSION)" QEMU_GA_MANUFACTURER="$(QEMU_GA_MANUFACTURER)" QEMU_GA_DISTRO="$(QEMU_GA_DISTRO)" BUILD_DIR="$(BUILD_DIR)" \
-	wixl -o $@ $(QEMU_GA_MSI_ARCH) $(QEMU_GA_MSI_WITH_VSS) $(QEMU_GA_MSI_MINGW_DLL_PATH) $<, "  WIXL  $@")
-else
-msi:
-	@echo "MSI build not configured or dependency resolution failed (reconfigure with --enable-guest-agent-msi option)"
-endif
-
-ifneq ($(EXESUF),)
-.PHONY: qemu-ga
-qemu-ga: qemu-ga$(EXESUF) $(QGA_VSS_PROVIDER) $(QEMU_GA_MSI)
-endif
-
-ivshmem-client$(EXESUF): $(ivshmem-client-obj-y) libqemuutil.a libqemustub.a
-	$(call LINK, $^)
-ivshmem-server$(EXESUF): $(ivshmem-server-obj-y) libqemuutil.a libqemustub.a
-	$(call LINK, $^)
-
 clean:
 # avoid old build problems by removing potentially incorrect old files
 	rm -f config.mak op-i386.h opc-i386.h gen-op-i386.h op-arm.h opc-arm.h gen-op-arm.h
 	rm -f qemu-options.def
-	rm -f *.msi
-	find . \( -name '*.l[oa]' -o -name '*.so' -o -name '*.dll' -o -name '*.mo' -o -name '*.[oda]' \) -type f -exec rm {} +
+	find . -name '*.[oda]' -type f -exec rm -f {} +
+	find . -name '*.l[oa]' -type f -exec rm -f {} +
 	rm -f $(filter-out %.tlb,$(TOOLS)) $(HELPERS-y) qemu-ga TAGS cscope.* *.pod *~ */*~
 	rm -f fsdev/*.pod
 	rm -rf .libs */.libs
 	rm -f qemu-img-cmds.h
-	rm -f ui/shader/*-vert.h ui/shader/*-frag.h
 	@# May not be present in GENERATED_HEADERS
 	rm -f trace/generated-tracers-dtrace.dtrace*
 	rm -f trace/generated-tracers-dtrace.h*
@@ -365,9 +266,9 @@ qemu-%.tar.bz2:
 	$(SRC_PATH)/scripts/make-release "$(SRC_PATH)" "$(patsubst qemu-%.tar.bz2,%,$@)"

 distclean: clean
-	rm -f config-host.mak config-host.h* config-host.ld $(DOCS) qemu-options.texi qemu-img-cmds.texi qemu-monitor.texi qemu-monitor-info.texi
-	rm -f config-all-devices.mak config-all-disas.mak config.status
-	rm -f po/*.mo tests/qemu-iotests/common.env
+	rm -f config-host.mak config-host.h* config-host.ld $(DOCS) qemu-options.texi qemu-img-cmds.texi qemu-monitor.texi
+	rm -f config-all-devices.mak config-all-disas.mak
+	rm -f po/*.mo
 	rm -f roms/seabios/config.mak roms/vgabios/config.mak
 	rm -f qemu-doc.info qemu-doc.aux qemu-doc.cp qemu-doc.cps qemu-doc.dvi
 	rm -f qemu-doc.fn qemu-doc.fns qemu-doc.info qemu-doc.ky qemu-doc.kys
@@ -380,8 +281,8 @@ distclean: clean
 	rm -rf $$d || exit 1 ; \
        done
 	rm -Rf .sdk
-	if test -f pixman/config.log; then $(MAKE) -C pixman distclean; fi
-	if test -f dtc/version_gen.h; then $(MAKE) $(DTC_MAKE_ARGS) clean; fi
+	if test -f pixman/config.log; then make -C pixman distclean; fi
+	if test -f dtc/version_gen.h; then make $(DTC_MAKE_ARGS) clean; fi

 KEYMAPS=da     en-gb  et  fr     fr-ch  is  lt  modifiers  no  pt-br  sv \
 ar      de     en-us  fi  fr-be  hr     it  lv  nl         pl  ru     th \
@@ -389,10 +290,10 @@ common  de-ch  es     fo  fr-ca  hu     ja  mk  nl-be      pt  sl     tr \
 bepo    cz

 ifdef INSTALL_BLOBS
-BLOBS=bios.bin bios-256k.bin sgabios.bin vgabios.bin vgabios-cirrus.bin \
-vgabios-stdvga.bin vgabios-vmware.bin vgabios-qxl.bin vgabios-virtio.bin \
-acpi-dsdt.aml \
-ppc_rom.bin openbios-sparc32 openbios-sparc64 openbios-ppc QEMU,tcx.bin QEMU,cgthree.bin \
+BLOBS=bios.bin sgabios.bin vgabios.bin vgabios-cirrus.bin \
+vgabios-stdvga.bin vgabios-vmware.bin vgabios-qxl.bin \
+acpi-dsdt.aml q35-acpi-dsdt.aml \
+ppc_rom.bin openbios-sparc32 openbios-sparc64 openbios-ppc QEMU,tcx.bin \
 pxe-e1000.rom pxe-eepro100.rom pxe-ne2k_pci.rom \
 pxe-pcnet.rom pxe-rtl8139.rom pxe-virtio.rom \
 efi-e1000.rom efi-eepro100.rom efi-ne2k_pci.rom \
@@ -400,10 +301,10 @@ efi-pcnet.rom efi-rtl8139.rom efi-virtio.rom \
 qemu-icon.bmp qemu_logo_no_text.svg \
 bamboo.dtb petalogix-s3adsp1800.dtb petalogix-ml605.dtb \
 multiboot.bin linuxboot.bin kvmvapic.bin \
+s390-zipl.rom \
 s390-ccw.img \
 spapr-rtas.bin slof.bin \
-palcode-clipper \
-u-boot.e500
+palcode-clipper
 else
 BLOBS=
 endif
@@ -420,9 +321,6 @@ ifneq ($(TOOLS),)
 	$(INSTALL_DIR) "$(DESTDIR)$(mandir)/man8"
 	$(INSTALL_DATA) qemu-nbd.8 "$(DESTDIR)$(mandir)/man8"
 endif
-ifneq (,$(findstring qemu-ga,$(TOOLS)))
-	$(INSTALL_DATA) qemu-ga.8 "$(DESTDIR)$(mandir)/man8"
-endif
 endif
 ifdef CONFIG_VIRTFS
 	$(INSTALL_DIR) "$(DESTDIR)$(mandir)/man1"
@@ -439,22 +337,21 @@ ifneq (,$(findstring qemu-ga,$(TOOLS)))
 endif
 endif

+install-confdir:
+	$(INSTALL_DIR) "$(DESTDIR)$(qemu_confdir)"

-install: all $(if $(BUILD_DOCS),install-doc) \
+install-sysconfig: install-datadir install-confdir
+	$(INSTALL_DATA) $(SRC_PATH)/sysconfigs/target/target-x86_64.conf "$(DESTDIR)$(qemu_confdir)"
+
+install: all $(if $(BUILD_DOCS),install-doc) install-sysconfig \
 install-datadir install-localstatedir
+	$(INSTALL_DIR) "$(DESTDIR)$(bindir)"
 ifneq ($(TOOLS),)
-	$(call install-prog,$(subst qemu-ga,qemu-ga$(EXESUF),$(TOOLS)),$(DESTDIR)$(bindir))
-endif
-ifneq ($(CONFIG_MODULES),)
-	$(INSTALL_DIR) "$(DESTDIR)$(qemu_moddir)"
-	for s in $(modules-m:.mo=$(DSOSUF)); do \
-		t="$(DESTDIR)$(qemu_moddir)/$$(echo $$s | tr / -)"; \
-		$(INSTALL_LIB) $$s "$$t"; \
-		test -z "$(STRIP)" || $(STRIP) "$$t"; \
-	done
+	$(INSTALL_PROG) $(STRIP_OPT) $(TOOLS) "$(DESTDIR)$(bindir)"
 endif
 ifneq ($(HELPERS-y),)
-	$(call install-prog,$(HELPERS-y),$(DESTDIR)$(libexecdir))
+	$(INSTALL_DIR) "$(DESTDIR)$(libexecdir)"
+	$(INSTALL_PROG) $(STRIP_OPT) $(HELPERS-y) "$(DESTDIR)$(libexecdir)"
 endif
 ifneq ($(BLOBS),)
 	set -e; for x in $(BLOBS); do \
@@ -468,45 +365,23 @@ endif
 	set -e; for x in $(KEYMAPS); do \
 		$(INSTALL_DATA) $(SRC_PATH)/pc-bios/keymaps/$$x "$(DESTDIR)$(qemu_datadir)/keymaps"; \
 	done
-	$(INSTALL_DATA) $(SRC_PATH)/trace-events "$(DESTDIR)$(qemu_datadir)/trace-events"
 	for d in $(TARGET_DIRS); do \
-	$(MAKE) $(SUBDIR_MAKEFLAGS) TARGET_DIR=$$d/ -C $$d $@ || exit 1 ; \
+	$(MAKE) -C $$d $@ || exit 1 ; \
        done

 # various test targets
 test speed: all
 	$(MAKE) -C tests/tcg $@

-.PHONY: ctags
-ctags:
-	rm -f $@
-	find "$(SRC_PATH)" -name '*.[hc]' -exec ctags --append {} +
-
 .PHONY: TAGS
 TAGS:
 	rm -f $@
 	find "$(SRC_PATH)" -name '*.[hc]' -exec etags --append {} +

 cscope:
-	rm -f "$(SRC_PATH)"/cscope.*
-	find "$(SRC_PATH)/" -name "*.[chsS]" -print | sed 's,^\./,,' > "$(SRC_PATH)/cscope.files"
-	cscope -b -i"$(SRC_PATH)/cscope.files"
-
-# opengl shader programs
-ui/shader/%-vert.h: $(SRC_PATH)/ui/shader/%.vert $(SRC_PATH)/scripts/shaderinclude.pl
-	@mkdir -p $(dir $@)
-	$(call quiet-command,\
-		perl $(SRC_PATH)/scripts/shaderinclude.pl $< > $@,\
-		"  VERT  $@")
-
-ui/shader/%-frag.h: $(SRC_PATH)/ui/shader/%.frag $(SRC_PATH)/scripts/shaderinclude.pl
-	@mkdir -p $(dir $@)
-	$(call quiet-command,\
-		perl $(SRC_PATH)/scripts/shaderinclude.pl $< > $@,\
-		"  FRAG  $@")
-
-ui/console-gl.o: $(SRC_PATH)/ui/console-gl.c \
-	ui/shader/texture-blit-vert.h ui/shader/texture-blit-frag.h
+	rm -f ./cscope.*
+	find "$(SRC_PATH)" -name "*.[chsS]" -print | sed 's,^\./,,' > ./cscope.files
+	cscope -b

 # documentation
 MAKEINFO=makeinfo
@@ -531,16 +406,13 @@ qemu-options.texi: $(SRC_PATH)/qemu-options.hx
 qemu-monitor.texi: $(SRC_PATH)/hmp-commands.hx
 	$(call quiet-command,sh $(SRC_PATH)/scripts/hxtool -t < $< > $@,"  GEN   $@")

-qemu-monitor-info.texi: $(SRC_PATH)/hmp-commands-info.hx
-	$(call quiet-command,sh $(SRC_PATH)/scripts/hxtool -t < $< > $@,"  GEN   $@")
-
 qmp-commands.txt: $(SRC_PATH)/qmp-commands.hx
 	$(call quiet-command,sh $(SRC_PATH)/scripts/hxtool -q < $< > $@,"  GEN   $@")

 qemu-img-cmds.texi: $(SRC_PATH)/qemu-img-cmds.hx
 	$(call quiet-command,sh $(SRC_PATH)/scripts/hxtool -t < $< > $@,"  GEN   $@")

-qemu.1: qemu-doc.texi qemu-options.texi qemu-monitor.texi qemu-monitor-info.texi
+qemu.1: qemu-doc.texi qemu-options.texi qemu-monitor.texi
 	$(call quiet-command, \
 	  perl -Ww -- $(SRC_PATH)/scripts/texi2pod.pl $< qemu.pod && \
 	  $(POD2MAN) --section=1 --center=" " --release=" " qemu.pod > $@, \
@@ -564,18 +436,6 @@ qemu-nbd.8: qemu-nbd.texi
 	  $(POD2MAN) --section=8 --center=" " --release=" " qemu-nbd.pod > $@, \
 	  "  GEN   $@")

-qemu-ga.8: qemu-ga.texi
-	$(call quiet-command, \
-	  perl -Ww -- $(SRC_PATH)/scripts/texi2pod.pl $< qemu-ga.pod && \
-	  $(POD2MAN) --section=8 --center=" " --release=" " qemu-ga.pod > $@, \
-	  "  GEN   $@")
-
-kvm_stat.1: scripts/kvm/kvm_stat.texi
-	$(call quiet-command, \
-	  perl -Ww -- $(SRC_PATH)/scripts/texi2pod.pl $< kvm_stat.pod && \
-	  $(POD2MAN) --section=1 --center=" " --release=" " kvm_stat.pod > $@, \
-	  "  GEN   $@")
-
 dvi: qemu-doc.dvi qemu-tech.dvi
 html: qemu-doc.html qemu-tech.html
 info: qemu-doc.info qemu-tech.info
@@ -583,8 +443,7 @@ pdf: qemu-doc.pdf qemu-tech.pdf

 qemu-doc.dvi qemu-doc.html qemu-doc.info qemu-doc.pdf: \
 	qemu-img.texi qemu-nbd.texi qemu-options.texi \
-	qemu-monitor.texi qemu-img-cmds.texi qemu-ga.texi \
-	qemu-monitor-info.texi
+	qemu-monitor.texi qemu-img-cmds.texi

 ifdef CONFIG_WIN32

@@ -609,7 +468,7 @@ installer: $(INSTALLER)
 INSTDIR=/tmp/qemu-nsis

 $(INSTALLER): $(SRC_PATH)/qemu.nsi
-	$(MAKE) install prefix=${INSTDIR}
+	make install prefix=${INSTDIR}
 ifdef SIGNCODE
 	(cd ${INSTDIR}; \
         for i in *.exe; do \
@@ -634,7 +493,6 @@ endif # SIGNCODE
                $(if $(DLL_PATH),-DDLLDIR="$(DLL_PATH)") \
                -DSRCDIR="$(SRC_PATH)" \
                -DOUTFILE="$(INSTALLER)" \
-                -DDISPLAYVERSION="$(VERSION)" \
                $(SRC_PATH)/qemu.nsi
 	rm -r ${INSTDIR}
 ifdef SIGNCODE
@@ -644,7 +502,7 @@ endif # CONFIG_WIN

 # Add a dependency on the generated files, so that they are always
 # rebuilt before other object files
-ifneq ($(filter-out $(UNCHECKED_GOALS),$(MAKECMDGOALS)),$(if $(MAKECMDGOALS),,fail))
+ifneq ($(filter-out %clean,$(MAKECMDGOALS)),$(if $(MAKECMDGOALS),,fail))
 Makefile: $(GENERATED_HEADERS)
 endif

--- a/Makefile.objs
+++ b/Makefile.objs
@@ -1,38 +1,39 @@
 #######################################################################
 # Common libraries for tools and emulators
 stub-obj-y = stubs/
-util-obj-y = util/ qobject/ qapi/
-util-obj-y += qmp-introspect.o qapi-types.o qapi-visit.o qapi-event.o
+util-obj-y = util/ qobject/ qapi/ trace/

 #######################################################################
 # block-obj-y is code used by both qemu system emulation and qemu-img

 block-obj-y = async.o thread-pool.o
-block-obj-y += nbd/
-block-obj-y += block.o blockjob.o
+block-obj-y += nbd.o block.o blockjob.o
 block-obj-y += main-loop.o iohandler.o qemu-timer.o
 block-obj-$(CONFIG_POSIX) += aio-posix.o
 block-obj-$(CONFIG_WIN32) += aio-win32.o
 block-obj-y += block/
+block-obj-y += qapi-types.o qapi-visit.o
 block-obj-y += qemu-io-cmds.o

-block-obj-m = block/
+block-obj-y += qemu-coroutine.o qemu-coroutine-lock.o qemu-coroutine-io.o
+block-obj-y += qemu-coroutine-sleep.o
+block-obj-y += coroutine-$(CONFIG_COROUTINE_BACKEND).o

-#######################################################################
-# crypto-obj-y is code used by both qemu system emulation and qemu-img
+ifeq ($(CONFIG_VIRTIO)$(CONFIG_VIRTFS)$(CONFIG_PCI),yyy)
+# Lots of the fsdev/9pcode is pulled in by vl.c via qemu_fsdev_add.
+# only pull in the actual virtio-9p device if we also enabled virtio.
+CONFIG_REALLY_VIRTFS=y
+endif

-crypto-obj-y = crypto/
-crypto-aes-obj-y = crypto/
+######################################################################
+# smartcard

-#######################################################################
-# qom-obj-y is code used by both qemu system emulation and qemu-img
-
-qom-obj-y = qom/
-
-#######################################################################
-# io-obj-y is code used by both qemu system emulation and qemu-img
-
-io-obj-y = io/
+libcacard-y += libcacard/cac.o libcacard/event.o
+libcacard-y += libcacard/vcard.o libcacard/vreader.o
+libcacard-y += libcacard/vcard_emul_nss.o
+libcacard-y += libcacard/vcard_emul_type.o
+libcacard-y += libcacard/card_7816.o
+libcacard-y += libcacard/vcardt.o

 ######################################################################
 # Target independent part of system emulation. The long term path is to
@@ -40,35 +41,33 @@ io-obj-y = io/
 # single QEMU executable should support all CPUs and machines.

 ifeq ($(CONFIG_SOFTMMU),y)
-common-obj-y = blockdev.o blockdev-nbd.o block/
-common-obj-y += iothread.o
+common-obj-y = $(block-obj-y) blockdev.o blockdev-nbd.o block/
 common-obj-y += net/
+common-obj-y += readline.o
 common-obj-y += qdev-monitor.o device-hotplug.o
 common-obj-$(CONFIG_WIN32) += os-win32.o
 common-obj-$(CONFIG_POSIX) += os-posix.o

 common-obj-$(CONFIG_LINUX) += fsdev/

-common-obj-y += migration/
+common-obj-y += migration.o migration-tcp.o
+common-obj-$(CONFIG_RDMA) += migration-rdma.o
 common-obj-y += qemu-char.o #aio.o
-common-obj-y += page_cache.o
-common-obj-y += qjson.o
+common-obj-y += block-migration.o
+common-obj-y += page_cache.o xbzrle.o
+
+common-obj-$(CONFIG_POSIX) += migration-exec.o migration-unix.o migration-fd.o

 common-obj-$(CONFIG_SPICE) += spice-qemu-char.o

 common-obj-y += audio/
 common-obj-y += hw/
-common-obj-y += accel.o
-
-common-obj-y += replay/

 common-obj-y += ui/
 common-obj-y += bt-host.o bt-vhci.o
-bt-host.o-cflags := $(BLUEZ_CFLAGS)

 common-obj-y += dma-helpers.o
 common-obj-y += vl.o
-vl.o-cflags := $(GPROF_CFLAGS) $(SDL_CFLAGS)
 common-obj-y += tpm.o

 common-obj-$(CONFIG_SLIRP) += slirp/
@@ -77,18 +76,23 @@ common-obj-y += backends/

 common-obj-$(CONFIG_SECCOMP) += qemu-seccomp.o

-common-obj-$(CONFIG_FDT) += device_tree.o
+common-obj-$(CONFIG_SMARTCARD_NSS) += $(libcacard-y)

 ######################################################################
 # qapi

 common-obj-y += qmp-marshal.o
-common-obj-y += qmp-introspect.o
 common-obj-y += qmp.o hmp.o
 endif

+######################################################################
+# some qapi visitors are used by both system and user emulation:
+
+common-obj-y += qapi-visit.o qapi-types.o
+
 #######################################################################
 # Target-independent parts used in system and user emulation
+common-obj-y += qemu-log.o
 common-obj-y += tcg-runtime.o
 common-obj-y += hw/
 common-obj-y += qom/
@@ -99,20 +103,25 @@ common-obj-y += disas/
 version-obj-$(CONFIG_WIN32) += $(BUILD_DIR)/version.o
 version-lobj-$(CONFIG_WIN32) += $(BUILD_DIR)/version.lo

-######################################################################
-# tracing
-util-obj-y +=  trace/
-target-obj-y += trace/
-
 ######################################################################
 # guest agent

 # FIXME: a few definitions from qapi-types.o/qapi-visit.o are needed
 # by libqemuutil.a.  These should be moved to a separate .json schema.
-qga-obj-y = qga/
+qga-obj-y = qga/ qapi-types.o qapi-visit.o
 qga-vss-dll-obj-y = qga/

-######################################################################
-# contrib
-ivshmem-client-obj-y = contrib/ivshmem-client/
-ivshmem-server-obj-y = contrib/ivshmem-server/
+vl.o: QEMU_CFLAGS+=$(GPROF_CFLAGS)
+
+vl.o: QEMU_CFLAGS+=$(SDL_CFLAGS)
+
+QEMU_CFLAGS+=$(GLIB_CFLAGS)
+
+nested-vars += \
+	stub-obj-y \
+	util-obj-y \
+	qga-obj-y \
+	qga-vss-dll-obj-y \
+	block-obj-y \
+	common-obj-y
+dummy := $(call unnest-vars)
--- a/Makefile.target
+++ b/Makefile.target
@@ -1,13 +1,11 @@
 # -*- Mode: makefile -*-

-BUILD_DIR?=$(CURDIR)/..
-
 include ../config-host.mak
 include config-target.mak
 include config-devices.mak
 include $(SRC_PATH)/rules.mak

-$(call set-vpath, $(SRC_PATH):$(BUILD_DIR))
+$(call set-vpath, $(SRC_PATH))
 ifdef CONFIG_LINUX
 QEMU_CFLAGS += -I../linux-headers
 endif
@@ -18,29 +16,26 @@ QEMU_CFLAGS+=-I$(SRC_PATH)/include
 ifdef CONFIG_USER_ONLY
 # user emulator name
 QEMU_PROG=qemu-$(TARGET_NAME)
-QEMU_PROG_BUILD = $(QEMU_PROG)
 else
 # system emulator name
-QEMU_PROG=qemu-system-$(TARGET_NAME)$(EXESUF)
 ifneq (,$(findstring -mwindows,$(libs_softmmu)))
 # Terminate program name with a 'w' because the linker builds a windows executable.
 QEMU_PROGW=qemu-system-$(TARGET_NAME)w$(EXESUF)
-$(QEMU_PROG): $(QEMU_PROGW)
-	$(call quiet-command,$(OBJCOPY) --subsystem console $(QEMU_PROGW) $(QEMU_PROG),"  GEN   $(TARGET_DIR)$(QEMU_PROG)")
-QEMU_PROG_BUILD = $(QEMU_PROGW)
-else
-QEMU_PROG_BUILD = $(QEMU_PROG)
-endif
+endif # windows executable
+QEMU_PROG=qemu-system-$(TARGET_NAME)$(EXESUF)
 endif

-PROGS=$(QEMU_PROG) $(QEMU_PROGW)
+PROGS=$(QEMU_PROG)
+ifdef QEMU_PROGW
+PROGS+=$(QEMU_PROGW)
+endif
 STPFILES=

 config-target.h: config-target.h-timestamp
 config-target.h-timestamp: config-target.mak

 ifdef CONFIG_TRACE_SYSTEMTAP
-stap: $(QEMU_PROG).stp-installed $(QEMU_PROG).stp $(QEMU_PROG)-simpletrace.stp
+stap: $(QEMU_PROG).stp-installed $(QEMU_PROG).stp

 ifdef CONFIG_USER_ONLY
 TARGET_TYPE=user
@@ -51,7 +46,7 @@ endif
 $(QEMU_PROG).stp-installed: $(SRC_PATH)/trace-events
 	$(call quiet-command,$(TRACETOOL) \
 		--format=stap \
-		--backends=$(TRACE_BACKENDS) \
+		--backend=$(TRACE_BACKEND) \
 		--binary=$(bindir)/$(QEMU_PROG) \
 		--target-name=$(TARGET_NAME) \
 		--target-type=$(TARGET_TYPE) \
@@ -60,19 +55,12 @@ $(QEMU_PROG).stp-installed: $(SRC_PATH)/trace-events
 $(QEMU_PROG).stp: $(SRC_PATH)/trace-events
 	$(call quiet-command,$(TRACETOOL) \
 		--format=stap \
-		--backends=$(TRACE_BACKENDS) \
+		--backend=$(TRACE_BACKEND) \
 		--binary=$(realpath .)/$(QEMU_PROG) \
 		--target-name=$(TARGET_NAME) \
 		--target-type=$(TARGET_TYPE) \
 		< $< > $@,"  GEN   $(TARGET_DIR)$(QEMU_PROG).stp")

-$(QEMU_PROG)-simpletrace.stp: $(SRC_PATH)/trace-events
-	$(call quiet-command,$(TRACETOOL) \
-		--format=simpletrace-stap \
-		--backends=$(TRACE_BACKENDS) \
-		--probe-prefix=qemu.$(TARGET_TYPE).$(TARGET_NAME) \
-		< $< > $@,"  GEN   $(TARGET_DIR)$(QEMU_PROG)-simpletrace.stp")
-
 else
 stap:
 endif
@@ -85,11 +73,8 @@ all: $(PROGS) stap
 #########################################################
 # cpu emulator library
 obj-y = exec.o translate-all.o cpu-exec.o
-obj-y += translate-common.o
-obj-y += cpu-exec-common.o
-obj-y += tcg/tcg.o tcg/tcg-op.o tcg/optimize.o
+obj-y += tcg/tcg.o tcg/optimize.o
 obj-$(CONFIG_TCG_INTERPRETER) += tci.o
-obj-y += tcg/tcg-common.o
 obj-$(CONFIG_TCG_INTERPRETER) += disas/tci.o
 obj-y += fpu/softfloat.o
 obj-y += target-$(TARGET_BASE_ARCH)/
@@ -97,12 +82,6 @@ obj-y += disas.o
 obj-$(call notempty,$(TARGET_XML_FILES)) += gdbstub-xml.o
 obj-$(call lnot,$(CONFIG_KVM)) += kvm-stub.o

-obj-$(CONFIG_LIBDECNUMBER) += libdecnumber/decContext.o
-obj-$(CONFIG_LIBDECNUMBER) += libdecnumber/decNumber.o
-obj-$(CONFIG_LIBDECNUMBER) += libdecnumber/dpd/decimal32.o
-obj-$(CONFIG_LIBDECNUMBER) += libdecnumber/dpd/decimal64.o
-obj-$(CONFIG_LIBDECNUMBER) += libdecnumber/dpd/decimal128.o
-
 #########################################################
 # Linux user emulator target

@@ -120,8 +99,7 @@ endif #CONFIG_LINUX_USER

 ifdef CONFIG_BSD_USER

-QEMU_CFLAGS+=-I$(SRC_PATH)/bsd-user -I$(SRC_PATH)/bsd-user/$(TARGET_ABI_DIR) \
-			 -I$(SRC_PATH)/bsd-user/$(HOST_VARIANT_DIR)
+QEMU_CFLAGS+=-I$(SRC_PATH)/bsd-user -I$(SRC_PATH)/bsd-user/$(TARGET_ABI_DIR)

 obj-y += bsd-user/
 obj-y += gdbstub.o user-exec.o
@@ -131,21 +109,19 @@ endif #CONFIG_BSD_USER
 #########################################################
 # System emulator target
 ifdef CONFIG_SOFTMMU
-obj-y += arch_init.o cpus.o monitor.o gdbstub.o balloon.o ioport.o numa.o
-obj-y += qtest.o bootdevice.o
+obj-y += arch_init.o cpus.o monitor.o gdbstub.o balloon.o ioport.o
+obj-y += qtest.o
 obj-y += hw/
+obj-$(CONFIG_FDT) += device_tree.o
 obj-$(CONFIG_KVM) += kvm-all.o
-obj-y += memory.o cputlb.o
+obj-y += memory.o savevm.o cputlb.o
 obj-y += memory_mapping.o
 obj-y += dump.o
-obj-y += migration/ram.o migration/savevm.o
-LIBS := $(libs_softmmu) $(LIBS)
+LIBS+=$(libs_softmmu)

 # xen support
-obj-$(CONFIG_XEN) += xen-common.o
-obj-$(CONFIG_XEN_I386) += xen-hvm.o xen-mapcache.o
-obj-$(call lnot,$(CONFIG_XEN)) += xen-common-stub.o
-obj-$(call lnot,$(CONFIG_XEN_I386)) += xen-hvm-stub.o
+obj-$(CONFIG_XEN) += xen-all.o xen-mapcache.o
+obj-$(call lnot,$(CONFIG_XEN)) += xen-stub.o

 # Hardware support
 ifeq ($(TARGET_NAME), sparc64)
@@ -154,48 +130,36 @@ else
 obj-y += hw/$(TARGET_BASE_ARCH)/
 endif

-GENERATED_HEADERS += hmp-commands.h hmp-commands-info.h qmp-commands-old.h
+main.o: QEMU_CFLAGS+=$(GPROF_CFLAGS)
+
+GENERATED_HEADERS += hmp-commands.h qmp-commands-old.h

 endif # CONFIG_SOFTMMU

 # Workaround for http://gcc.gnu.org/PR55489, see configure.
 %/translate.o: QEMU_CFLAGS += $(TRANSLATE_OPT_CFLAGS)

-dummy := $(call unnest-vars,,obj-y)
-all-obj-y := $(obj-y)
+nested-vars += obj-y

-target-obj-y :=
-block-obj-y :=
-common-obj-y :=
+# This resolves all nested paths, so it must come last
 include $(SRC_PATH)/Makefile.objs
-dummy := $(call unnest-vars,,target-obj-y)
-target-obj-y-save := $(target-obj-y)
-dummy := $(call unnest-vars,.., \
-               block-obj-y \
-               block-obj-m \
-               crypto-obj-y \
-               crypto-aes-obj-y \
-               qom-obj-y \
-               io-obj-y \
-               common-obj-y \
-               common-obj-m)
-target-obj-y := $(target-obj-y-save)
-all-obj-y += $(common-obj-y)
-all-obj-y += $(target-obj-y)
-all-obj-y += $(qom-obj-y)
-all-obj-$(CONFIG_SOFTMMU) += $(block-obj-y)
-all-obj-$(CONFIG_USER_ONLY) += $(crypto-aes-obj-y)
-all-obj-$(CONFIG_SOFTMMU) += $(crypto-obj-y)
-all-obj-$(CONFIG_SOFTMMU) += $(io-obj-y)

-$(QEMU_PROG_BUILD): config-devices.mak
+all-obj-y = $(obj-y)
+all-obj-y += $(addprefix ../, $(common-obj-y))

-# build either PROG or PROGW
-$(QEMU_PROG_BUILD): $(all-obj-y) ../libqemuutil.a ../libqemustub.a
-	$(call LINK, $(filter-out %.mak, $^))
-ifdef CONFIG_DARWIN
-	$(call quiet-command,Rez -append $(SRC_PATH)/pc-bios/qemu.rsrc -o $@,"  REZ   $(TARGET_DIR)$@")
-	$(call quiet-command,SetFile -a C $@,"  SETFILE $(TARGET_DIR)$@")
+ifndef CONFIG_HAIKU
+LIBS+=-lm
+endif
+
+ifdef QEMU_PROGW
+# The linker builds a windows executable. Make also a console executable.
+$(QEMU_PROGW): $(all-obj-y) ../libqemuutil.a ../libqemustub.a
+	$(call LINK,$^)
+$(QEMU_PROG): $(QEMU_PROGW)
+	$(call quiet-command,$(OBJCOPY) --subsystem console $(QEMU_PROGW) $(QEMU_PROG),"  GEN   $(TARGET_DIR)$(QEMU_PROG)")
+else
+$(QEMU_PROG): $(all-obj-y) ../libqemuutil.a ../libqemustub.a
+	$(call LINK,$^)
 endif

 gdbstub-xml.c: $(TARGET_XML_FILES) $(SRC_PATH)/scripts/feature_to_c.sh
@@ -204,9 +168,6 @@ gdbstub-xml.c: $(TARGET_XML_FILES) $(SRC_PATH)/scripts/feature_to_c.sh
 hmp-commands.h: $(SRC_PATH)/hmp-commands.hx
 	$(call quiet-command,sh $(SRC_PATH)/scripts/hxtool -h < $< > $@,"  GEN   $(TARGET_DIR)$@")

-hmp-commands-info.h: $(SRC_PATH)/hmp-commands-info.hx
-	$(call quiet-command,sh $(SRC_PATH)/scripts/hxtool -h < $< > $@,"  GEN   $(TARGET_DIR)$@")
-
 qmp-commands-old.h: $(SRC_PATH)/qmp-commands.hx
 	$(call quiet-command,sh $(SRC_PATH)/scripts/hxtool -h < $< > $@,"  GEN   $(TARGET_DIR)$@")

@@ -220,12 +181,14 @@ endif

 install: all
 ifneq ($(PROGS),)
-	$(call install-prog,$(PROGS),$(DESTDIR)$(bindir))
+	$(INSTALL) -m 755 $(PROGS) "$(DESTDIR)$(bindir)"
+ifneq ($(STRIP),)
+	$(STRIP) $(patsubst %,"$(DESTDIR)$(bindir)/%",$(PROGS))
+endif
 endif
 ifdef CONFIG_TRACE_SYSTEMTAP
 	$(INSTALL_DIR) "$(DESTDIR)$(qemu_datadir)/../systemtap/tapset"
 	$(INSTALL_DATA) $(QEMU_PROG).stp-installed "$(DESTDIR)$(qemu_datadir)/../systemtap/tapset/$(QEMU_PROG).stp"
-	$(INSTALL_DATA) $(QEMU_PROG)-simpletrace.stp "$(DESTDIR)$(qemu_datadir)/../systemtap/tapset/$(QEMU_PROG)-simpletrace.stp"
 endif

 GENERATED_HEADERS += config-target.h
--- a/108
+++ b/108
@@ -1,107 +1,3 @@
-         QEMU README
-         ===========
+Read the documentation in qemu-doc.html or on http://wiki.qemu-project.org

-QEMU is a generic and open source machine & userspace emulator and
-virtualizer.
-
-QEMU is capable of emulating a complete machine in software without any
-need for hardware virtualization support. By using dynamic translation,
-it achieves very good performance. QEMU can also integrate with the Xen
-and KVM hypervisors to provide emulated hardware while allowing the
-hypervisor to manage the CPU. With hypervisor support, QEMU can achieve
-near native performance for CPUs. When QEMU emulates CPUs directly it is
-capable of running operating systems made for one machine (e.g. an ARMv7
-board) on a different machine (e.g. an x86_64 PC board).
-
-QEMU is also capable of providing userspace API virtualization for Linux
-and BSD kernel interfaces. This allows binaries compiled against one
-architecture ABI (e.g. the Linux PPC64 ABI) to be run on a host using a
-different architecture ABI (e.g. the Linux x86_64 ABI). This does not
-involve any hardware emulation, simply CPU and syscall emulation.
-
-QEMU aims to fit into a variety of use cases. It can be invoked directly
-by users wishing to have full control over its behaviour and settings.
-It also aims to facilitate integration into higher level management
-layers, by providing a stable command line interface and monitor API.
-It is commonly invoked indirectly via the libvirt library when using
-open source applications such as oVirt, OpenStack and virt-manager.
-
-QEMU as a whole is released under the GNU General Public License,
-version 2. For full licensing details, consult the LICENSE file.
-
-
-Building
-========
-
-QEMU is multi-platform software intended to be buildable on all modern
-Linux platforms, OS-X, Win32 (via the Mingw64 toolchain) and a variety
-of other UNIX targets. The simple steps to build QEMU are:
-
-  mkdir build
-  cd build
-  ../configure
-  make
-
-Complete details of the process for building and configuring QEMU for
-all supported host platforms can be found in the qemu-tech.html file.
-Additional information can also be found online via the QEMU website:
-
-  http://qemu-project.org/Hosts/Linux
-  http://qemu-project.org/Hosts/W32
-
-
-Submitting patches
-==================
-
-The QEMU source code is maintained under the GIT version control system.
-
-   git clone git://git.qemu-project.org/qemu.git
-
-When submitting patches, the preferred approach is to use 'git
-format-patch' and/or 'git send-email' to format & send the mail to the
-qemu-devel@nongnu.org mailing list. All patches submitted must contain
-a 'Signed-off-by' line from the author. Patches should follow the
-guidelines set out in the HACKING and CODING_STYLE files.
-
-Additional information on submitting patches can be found online via
-the QEMU website
-
-  http://qemu-project.org/Contribute/SubmitAPatch
-  http://qemu-project.org/Contribute/TrivialPatches
-
-
-Bug reporting
-=============
-
-The QEMU project uses Launchpad as its primary upstream bug tracker. Bugs
-found when running code built from QEMU git or upstream released sources
-should be reported via:
-
-  https://bugs.launchpad.net/qemu/
-
-If using QEMU via an operating system vendor pre-built binary package, it
-is preferable to report bugs to the vendor's own bug tracker first. If
-the bug is also known to affect latest upstream code, it can also be
-reported via launchpad.
-
-For additional information on bug reporting consult:
-
-  http://qemu-project.org/Contribute/ReportABug
-
-
-Contact
-=======
-
-The QEMU community can be contacted in a number of ways, with the two
-main methods being email and IRC
-
- - qemu-devel@nongnu.org
-   http://lists.nongnu.org/mailman/listinfo/qemu-devel
- - #qemu on irc.oftc.net
-
-Information on additional methods of contacting the community can be
-found online via the QEMU website:
-
-  http://qemu-project.org/Contribute/StartHere
-
-- End
+- QEMU team
--- a/2
+++ b/2
@@ -1 +1 @@
-2.5.50
+1.7.2
--- a/accel.c
+++ b/accel.c
@@ -1,158 +0,0 @@
-/*
- * QEMU System Emulator, accelerator interfaces
- *
- * Copyright (c) 2003-2008 Fabrice Bellard
- * Copyright (c) 2014 Red Hat Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-#include "qemu/osdep.h"
-#include "sysemu/accel.h"
-#include "hw/boards.h"
-#include "qemu-common.h"
-#include "sysemu/arch_init.h"
-#include "sysemu/sysemu.h"
-#include "sysemu/kvm.h"
-#include "sysemu/qtest.h"
-#include "hw/xen/xen.h"
-#include "qom/object.h"
-#include "hw/boards.h"
-
-int tcg_tb_size;
-static bool tcg_allowed = true;
-
-static int tcg_init(MachineState *ms)
-{
-    tcg_exec_init(tcg_tb_size * 1024 * 1024);
-    return 0;
-}
-
-static const TypeInfo accel_type = {
-    .name = TYPE_ACCEL,
-    .parent = TYPE_OBJECT,
-    .class_size = sizeof(AccelClass),
-    .instance_size = sizeof(AccelState),
-};
-
-/* Lookup AccelClass from opt_name. Returns NULL if not found */
-static AccelClass *accel_find(const char *opt_name)
-{
-    char *class_name = g_strdup_printf(ACCEL_CLASS_NAME("%s"), opt_name);
-    AccelClass *ac = ACCEL_CLASS(object_class_by_name(class_name));
-    g_free(class_name);
-    return ac;
-}
-
-static int accel_init_machine(AccelClass *acc, MachineState *ms)
-{
-    ObjectClass *oc = OBJECT_CLASS(acc);
-    const char *cname = object_class_get_name(oc);
-    AccelState *accel = ACCEL(object_new(cname));
-    int ret;
-    ms->accelerator = accel;
-    *(acc->allowed) = true;
-    ret = acc->init_machine(ms);
-    if (ret < 0) {
-        ms->accelerator = NULL;
-        *(acc->allowed) = false;
-        object_unref(OBJECT(accel));
-    }
-    return ret;
-}
-
-int configure_accelerator(MachineState *ms)
-{
-    const char *p;
-    char buf[10];
-    int ret;
-    bool accel_initialised = false;
-    bool init_failed = false;
-    AccelClass *acc = NULL;
-
-    p = qemu_opt_get(qemu_get_machine_opts(), "accel");
-    if (p == NULL) {
-        /* Use the default "accelerator", tcg */
-        p = "tcg";
-    }
-
-    while (!accel_initialised && *p != '\0') {
-        if (*p == ':') {
-            p++;
-        }
-        p = get_opt_name(buf, sizeof(buf), p, ':');
-        acc = accel_find(buf);
-        if (!acc) {
-            fprintf(stderr, "\"%s\" accelerator not found.\n", buf);
-            continue;
-        }
-        if (acc->available && !acc->available()) {
-            printf("%s not supported for this target\n",
-                   acc->name);
-            continue;
-        }
-        ret = accel_init_machine(acc, ms);
-        if (ret < 0) {
-            init_failed = true;
-            fprintf(stderr, "failed to initialize %s: %s\n",
-                    acc->name,
-                    strerror(-ret));
-        } else {
-            accel_initialised = true;
-        }
-    }
-
-    if (!accel_initialised) {
-        if (!init_failed) {
-            fprintf(stderr, "No accelerator found!\n");
-        }
-        exit(1);
-    }
-
-    if (init_failed) {
-        fprintf(stderr, "Back to %s accelerator.\n", acc->name);
-    }
-
-    return !accel_initialised;
-}
-
-
-static void tcg_accel_class_init(ObjectClass *oc, void *data)
-{
-    AccelClass *ac = ACCEL_CLASS(oc);
-    ac->name = "tcg";
-    ac->init_machine = tcg_init;
-    ac->allowed = &tcg_allowed;
-}
-
-#define TYPE_TCG_ACCEL ACCEL_CLASS_NAME("tcg")
-
-static const TypeInfo tcg_accel_type = {
-    .name = TYPE_TCG_ACCEL,
-    .parent = TYPE_ACCEL,
-    .class_init = tcg_accel_class_init,
-};
-
-static void register_accel_types(void)
-{
-    type_register_static(&accel_type);
-    type_register_static(&tcg_accel_type);
-}
-
-type_init(register_accel_types);
--- a/aio-posix.c
+++ b/aio-posix.c
@@ -13,14 +13,10 @@
 * GNU GPL, version 2 or (at your option) any later version.
 */

-#include "qemu/osdep.h"
 #include "qemu-common.h"
 #include "block/block.h"
 #include "qemu/queue.h"
 #include "qemu/sockets.h"
-#ifdef CONFIG_EPOLL
-#include <sys/epoll.h>
-#endif

 struct AioHandler
 {
@@ -28,167 +24,11 @@ struct AioHandler
    IOHandler *io_read;
    IOHandler *io_write;
    int deleted;
+    int pollfds_idx;
    void *opaque;
-    bool is_external;
    QLIST_ENTRY(AioHandler) node;
 };

-#ifdef CONFIG_EPOLL
-
-/* The fd number threashold to switch to epoll */
-#define EPOLL_ENABLE_THRESHOLD 64
-
-static void aio_epoll_disable(AioContext *ctx)
-{
-    ctx->epoll_available = false;
-    if (!ctx->epoll_enabled) {
-        return;
-    }
-    ctx->epoll_enabled = false;
-    close(ctx->epollfd);
-}
-
-static inline int epoll_events_from_pfd(int pfd_events)
-{
-    return (pfd_events & G_IO_IN ? EPOLLIN : 0) |
-           (pfd_events & G_IO_OUT ? EPOLLOUT : 0) |
-           (pfd_events & G_IO_HUP ? EPOLLHUP : 0) |
-           (pfd_events & G_IO_ERR ? EPOLLERR : 0);
-}
-
-static bool aio_epoll_try_enable(AioContext *ctx)
-{
-    AioHandler *node;
-    struct epoll_event event;
-
-    QLIST_FOREACH(node, &ctx->aio_handlers, node) {
-        int r;
-        if (node->deleted || !node->pfd.events) {
-            continue;
-        }
-        event.events = epoll_events_from_pfd(node->pfd.events);
-        event.data.ptr = node;
-        r = epoll_ctl(ctx->epollfd, EPOLL_CTL_ADD, node->pfd.fd, &event);
-        if (r) {
-            return false;
-        }
-    }
-    ctx->epoll_enabled = true;
-    return true;
-}
-
-static void aio_epoll_update(AioContext *ctx, AioHandler *node, bool is_new)
-{
-    struct epoll_event event;
-    int r;
-
-    if (!ctx->epoll_enabled) {
-        return;
-    }
-    if (!node->pfd.events) {
-        r = epoll_ctl(ctx->epollfd, EPOLL_CTL_DEL, node->pfd.fd, &event);
-        if (r) {
-            aio_epoll_disable(ctx);
-        }
-    } else {
-        event.data.ptr = node;
-        event.events = epoll_events_from_pfd(node->pfd.events);
-        if (is_new) {
-            r = epoll_ctl(ctx->epollfd, EPOLL_CTL_ADD, node->pfd.fd, &event);
-            if (r) {
-                aio_epoll_disable(ctx);
-            }
-        } else {
-            r = epoll_ctl(ctx->epollfd, EPOLL_CTL_MOD, node->pfd.fd, &event);
-            if (r) {
-                aio_epoll_disable(ctx);
-            }
-        }
-    }
-}
-
-static int aio_epoll(AioContext *ctx, GPollFD *pfds,
-                     unsigned npfd, int64_t timeout)
-{
-    AioHandler *node;
-    int i, ret = 0;
-    struct epoll_event events[128];
-
-    assert(npfd == 1);
-    assert(pfds[0].fd == ctx->epollfd);
-    if (timeout > 0) {
-        ret = qemu_poll_ns(pfds, npfd, timeout);
-    }
-    if (timeout <= 0 || ret > 0) {
-        ret = epoll_wait(ctx->epollfd, events,
-                         sizeof(events) / sizeof(events[0]),
-                         timeout);
-        if (ret <= 0) {
-            goto out;
-        }
-        for (i = 0; i < ret; i++) {
-            int ev = events[i].events;
-            node = events[i].data.ptr;
-            node->pfd.revents = (ev & EPOLLIN ? G_IO_IN : 0) |
-                (ev & EPOLLOUT ? G_IO_OUT : 0) |
-                (ev & EPOLLHUP ? G_IO_HUP : 0) |
-                (ev & EPOLLERR ? G_IO_ERR : 0);
-        }
-    }
-out:
-    return ret;
-}
-
-static bool aio_epoll_enabled(AioContext *ctx)
-{
-    /* Fall back to ppoll when external clients are disabled. */
-    return !aio_external_disabled(ctx) && ctx->epoll_enabled;
-}
-
-static bool aio_epoll_check_poll(AioContext *ctx, GPollFD *pfds,
-                                 unsigned npfd, int64_t timeout)
-{
-    if (!ctx->epoll_available) {
-        return false;
-    }
-    if (aio_epoll_enabled(ctx)) {
-        return true;
-    }
-    if (npfd >= EPOLL_ENABLE_THRESHOLD) {
-        if (aio_epoll_try_enable(ctx)) {
-            return true;
-        } else {
-            aio_epoll_disable(ctx);
-        }
-    }
-    return false;
-}
-
-#else
-
-static void aio_epoll_update(AioContext *ctx, AioHandler *node, bool is_new)
-{
-}
-
-static int aio_epoll(AioContext *ctx, GPollFD *pfds,
-                     unsigned npfd, int64_t timeout)
-{
-    assert(false);
-}
-
-static bool aio_epoll_enabled(AioContext *ctx)
-{
-    return false;
-}
-
-static bool aio_epoll_check_poll(AioContext *ctx, GPollFD *pfds,
-                          unsigned npfd, int64_t timeout)
-{
-    return false;
-}
-
-#endif
-
 static AioHandler *find_aio_handler(AioContext *ctx, int fd)
 {
    AioHandler *node;
@@ -204,14 +44,11 @@ static AioHandler *find_aio_handler(AioContext *ctx, int fd)

 void aio_set_fd_handler(AioContext *ctx,
                        int fd,
-                        bool is_external,
                        IOHandler *io_read,
                        IOHandler *io_write,
                        void *opaque)
 {
    AioHandler *node;
-    bool is_new = false;
-    bool deleted = false;

    node = find_aio_handler(ctx, fd);

@@ -230,48 +67,37 @@ void aio_set_fd_handler(AioContext *ctx,
                 * releasing the walking_handlers lock.
                 */
                QLIST_REMOVE(node, node);
-                deleted = true;
+                g_free(node);
            }
        }
    } else {
        if (node == NULL) {
            /* Alloc and insert if it's not already there */
-            node = g_new0(AioHandler, 1);
+            node = g_malloc0(sizeof(AioHandler));
            node->pfd.fd = fd;
            QLIST_INSERT_HEAD(&ctx->aio_handlers, node, node);

            g_source_add_poll(&ctx->source, &node->pfd);
-            is_new = true;
        }
        /* Update handler with latest information */
        node->io_read = io_read;
        node->io_write = io_write;
        node->opaque = opaque;
-        node->is_external = is_external;
+        node->pollfds_idx = -1;

        node->pfd.events = (io_read ? G_IO_IN | G_IO_HUP | G_IO_ERR : 0);
        node->pfd.events |= (io_write ? G_IO_OUT | G_IO_ERR : 0);
    }

-    aio_epoll_update(ctx, node, is_new);
    aio_notify(ctx);
-    if (deleted) {
-        g_free(node);
-    }
 }

 void aio_set_event_notifier(AioContext *ctx,
                            EventNotifier *notifier,
-                            bool is_external,
                            EventNotifierHandler *io_read)
 {
    aio_set_fd_handler(ctx, event_notifier_get_fd(notifier),
-                       is_external, (IOHandler *)io_read, NULL, notifier);
-}
-
-bool aio_prepare(AioContext *ctx)
-{
-    return false;
+                       (IOHandler *)io_read, NULL, notifier);
 }

 bool aio_pending(AioContext *ctx)
@@ -293,22 +119,13 @@ bool aio_pending(AioContext *ctx)
    return false;
 }

-bool aio_dispatch(AioContext *ctx)
+static bool aio_dispatch(AioContext *ctx)
 {
    AioHandler *node;
    bool progress = false;

    /*
-     * If there are callbacks left that have been queued, we need to call them.
-     * Do not call select in this case, because it is possible that the caller
-     * does not need a complete flush (as is the case for aio_poll loops).
-     */
-    if (aio_bh_poll(ctx)) {
-        progress = true;
-    }
-
-    /*
-     * We have to walk very carefully in case aio_set_fd_handler is
+     * We have to walk very carefully in case qemu_aio_set_fd_handler is
     * called while we're walking.
     */
    node = QLIST_FIRST(&ctx->aio_handlers);
@@ -355,141 +172,76 @@ bool aio_dispatch(AioContext *ctx)
    return progress;
 }

-/* These thread-local variables are used only in a small part of aio_poll
- * around the call to the poll() system call.  In particular they are not
- * used while aio_poll is performing callbacks, which makes it much easier
- * to think about reentrancy!
- *
- * Stack-allocated arrays would be perfect but they have size limitations;
- * heap allocation is expensive enough that we want to reuse arrays across
- * calls to aio_poll().  And because poll() has to be called without holding
- * any lock, the arrays cannot be stored in AioContext.  Thread-local data
- * has none of the disadvantages of these three options.
- */
-static __thread GPollFD *pollfds;
-static __thread AioHandler **nodes;
-static __thread unsigned npfd, nalloc;
-static __thread Notifier pollfds_cleanup_notifier;
-
-static void pollfds_cleanup(Notifier *n, void *unused)
-{
-    g_assert(npfd == 0);
-    g_free(pollfds);
-    g_free(nodes);
-    nalloc = 0;
-}
-
-static void add_pollfd(AioHandler *node)
-{
-    if (npfd == nalloc) {
-        if (nalloc == 0) {
-            pollfds_cleanup_notifier.notify = pollfds_cleanup;
-            qemu_thread_atexit_add(&pollfds_cleanup_notifier);
-            nalloc = 8;
-        } else {
-            g_assert(nalloc <= INT_MAX);
-            nalloc *= 2;
-        }
-        pollfds = g_renew(GPollFD, pollfds, nalloc);
-        nodes = g_renew(AioHandler *, nodes, nalloc);
-    }
-    nodes[npfd] = node;
-    pollfds[npfd] = (GPollFD) {
-        .fd = node->pfd.fd,
-        .events = node->pfd.events,
-    };
-    npfd++;
-}
-
 bool aio_poll(AioContext *ctx, bool blocking)
 {
    AioHandler *node;
-    int i, ret;
+    int ret;
    bool progress;
-    int64_t timeout;

-    aio_context_acquire(ctx);
    progress = false;

-    /* aio_notify can avoid the expensive event_notifier_set if
-     * everything (file descriptors, bottom halves, timers) will
-     * be re-evaluated before the next blocking poll().  This is
-     * already true when aio_poll is called with blocking == false;
-     * if blocking == true, it is only true after poll() returns,
-     * so disable the optimization now.
+    /*
+     * If there are callbacks left that have been queued, we need to call them.
+     * Do not call select in this case, because it is possible that the caller
+     * does not need a complete flush (as is the case for qemu_aio_wait loops).
     */
-    if (blocking) {
-        atomic_add(&ctx->notify_me, 2);
+    if (aio_bh_poll(ctx)) {
+        blocking = false;
+        progress = true;
+    }
+
+    if (aio_dispatch(ctx)) {
+        progress = true;
+    }
+
+    if (progress && !blocking) {
+        return true;
    }

    ctx->walking_handlers++;

-    assert(npfd == 0);
+    g_array_set_size(ctx->pollfds, 0);

    /* fill pollfds */
    QLIST_FOREACH(node, &ctx->aio_handlers, node) {
-        if (!node->deleted && node->pfd.events
-            && !aio_epoll_enabled(ctx)
-            && aio_node_check(ctx, node->is_external)) {
-            add_pollfd(node);
+        node->pollfds_idx = -1;
+        if (!node->deleted && node->pfd.events) {
+            GPollFD pfd = {
+                .fd = node->pfd.fd,
+                .events = node->pfd.events,
+            };
+            node->pollfds_idx = ctx->pollfds->len;
+            g_array_append_val(ctx->pollfds, pfd);
        }
    }

-    timeout = blocking ? aio_compute_timeout(ctx) : 0;
+    ctx->walking_handlers--;
+
+    /* early return if we only have the aio_notify() fd */
+    if (ctx->pollfds->len == 1) {
+        return progress;
+    }

    /* wait until next event */
-    if (timeout) {
-        aio_context_release(ctx);
-    }
-    if (aio_epoll_check_poll(ctx, pollfds, npfd, timeout)) {
-        AioHandler epoll_handler;
-
-        epoll_handler.pfd.fd = ctx->epollfd;
-        epoll_handler.pfd.events = G_IO_IN | G_IO_OUT | G_IO_HUP | G_IO_ERR;
-        npfd = 0;
-        add_pollfd(&epoll_handler);
-        ret = aio_epoll(ctx, pollfds, npfd, timeout);
-    } else  {
-        ret = qemu_poll_ns(pollfds, npfd, timeout);
-    }
-    if (blocking) {
-        atomic_sub(&ctx->notify_me, 2);
-    }
-    if (timeout) {
-        aio_context_acquire(ctx);
-    }
-
-    aio_notify_accept(ctx);
+    ret = qemu_poll_ns((GPollFD *)ctx->pollfds->data,
+                         ctx->pollfds->len,
+                         blocking ? timerlistgroup_deadline_ns(&ctx->tlg) : 0);

    /* if we have any readable fds, dispatch event */
    if (ret > 0) {
-        for (i = 0; i < npfd; i++) {
-            nodes[i]->pfd.revents = pollfds[i].revents;
+        QLIST_FOREACH(node, &ctx->aio_handlers, node) {
+            if (node->pollfds_idx != -1) {
+                GPollFD *pfd = &g_array_index(ctx->pollfds, GPollFD,
+                                              node->pollfds_idx);
+                node->pfd.revents = pfd->revents;
+            }
        }
    }

-    npfd = 0;
-    ctx->walking_handlers--;
-
    /* Run dispatch even if there were no readable fds to run timers */
    if (aio_dispatch(ctx)) {
        progress = true;
    }

-    aio_context_release(ctx);
-
    return progress;
 }
-
-void aio_context_setup(AioContext *ctx, Error **errp)
-{
-#ifdef CONFIG_EPOLL
-    assert(!ctx->epollfd);
-    ctx->epollfd = epoll_create1(EPOLL_CLOEXEC);
-    if (ctx->epollfd == -1) {
-        ctx->epoll_available = false;
-    } else {
-        ctx->epoll_available = true;
-    }
-#endif
-}
--- a/aio-win32.c
+++ b/aio-win32.c
@@ -15,7 +15,6 @@
 * GNU GPL, version 2 or (at your option) any later version.
 */

-#include "qemu/osdep.h"
 #include "qemu-common.h"
 #include "block/block.h"
 #include "qemu/queue.h"
@@ -23,86 +22,14 @@

 struct AioHandler {
    EventNotifier *e;
-    IOHandler *io_read;
-    IOHandler *io_write;
    EventNotifierHandler *io_notify;
    GPollFD pfd;
    int deleted;
-    void *opaque;
-    bool is_external;
    QLIST_ENTRY(AioHandler) node;
 };

-void aio_set_fd_handler(AioContext *ctx,
-                        int fd,
-                        bool is_external,
-                        IOHandler *io_read,
-                        IOHandler *io_write,
-                        void *opaque)
-{
-    /* fd is a SOCKET in our case */
-    AioHandler *node;
-
-    QLIST_FOREACH(node, &ctx->aio_handlers, node) {
-        if (node->pfd.fd == fd && !node->deleted) {
-            break;
-        }
-    }
-
-    /* Are we deleting the fd handler? */
-    if (!io_read && !io_write) {
-        if (node) {
-            /* If the lock is held, just mark the node as deleted */
-            if (ctx->walking_handlers) {
-                node->deleted = 1;
-                node->pfd.revents = 0;
-            } else {
-                /* Otherwise, delete it for real.  We can't just mark it as
-                 * deleted because deleted nodes are only cleaned up after
-                 * releasing the walking_handlers lock.
-                 */
-                QLIST_REMOVE(node, node);
-                g_free(node);
-            }
-        }
-    } else {
-        HANDLE event;
-
-        if (node == NULL) {
-            /* Alloc and insert if it's not already there */
-            node = g_new0(AioHandler, 1);
-            node->pfd.fd = fd;
-            QLIST_INSERT_HEAD(&ctx->aio_handlers, node, node);
-        }
-
-        node->pfd.events = 0;
-        if (node->io_read) {
-            node->pfd.events |= G_IO_IN;
-        }
-        if (node->io_write) {
-            node->pfd.events |= G_IO_OUT;
-        }
-
-        node->e = &ctx->notifier;
-
-        /* Update handler with latest information */
-        node->opaque = opaque;
-        node->io_read = io_read;
-        node->io_write = io_write;
-        node->is_external = is_external;
-
-        event = event_notifier_get_handle(&ctx->notifier);
-        WSAEventSelect(node->pfd.fd, event,
-                       FD_READ | FD_ACCEPT | FD_CLOSE |
-                       FD_CONNECT | FD_WRITE | FD_OOB);
-    }
-
-    aio_notify(ctx);
-}
-
 void aio_set_event_notifier(AioContext *ctx,
                            EventNotifier *e,
-                            bool is_external,
                            EventNotifierHandler *io_notify)
 {
    AioHandler *node;
@@ -134,11 +61,10 @@ void aio_set_event_notifier(AioContext *ctx,
    } else {
        if (node == NULL) {
            /* Alloc and insert if it's not already there */
-            node = g_new0(AioHandler, 1);
+            node = g_malloc0(sizeof(AioHandler));
            node->e = e;
            node->pfd.fd = (uintptr_t)event_notifier_get_handle(e);
            node->pfd.events = G_IO_IN;
-            node->is_external = is_external;
            QLIST_INSERT_HEAD(&ctx->aio_handlers, node, node);

            g_source_add_poll(&ctx->source, &node->pfd);
@@ -150,43 +76,6 @@ void aio_set_event_notifier(AioContext *ctx,
    aio_notify(ctx);
 }

-bool aio_prepare(AioContext *ctx)
-{
-    static struct timeval tv0;
-    AioHandler *node;
-    bool have_select_revents = false;
-    fd_set rfds, wfds;
-
-    /* fill fd sets */
-    FD_ZERO(&rfds);
-    FD_ZERO(&wfds);
-    QLIST_FOREACH(node, &ctx->aio_handlers, node) {
-        if (node->io_read) {
-            FD_SET ((SOCKET)node->pfd.fd, &rfds);
-        }
-        if (node->io_write) {
-            FD_SET ((SOCKET)node->pfd.fd, &wfds);
-        }
-    }
-
-    if (select(0, &rfds, &wfds, NULL, &tv0) > 0) {
-        QLIST_FOREACH(node, &ctx->aio_handlers, node) {
-            node->pfd.revents = 0;
-            if (FD_ISSET(node->pfd.fd, &rfds)) {
-                node->pfd.revents |= G_IO_IN;
-                have_select_revents = true;
-            }
-
-            if (FD_ISSET(node->pfd.fd, &wfds)) {
-                node->pfd.revents |= G_IO_OUT;
-                have_select_revents = true;
-            }
-        }
-    }
-
-    return have_select_revents;
-}
-
 bool aio_pending(AioContext *ctx)
 {
    AioHandler *node;
@@ -195,37 +84,47 @@ bool aio_pending(AioContext *ctx)
        if (node->pfd.revents && node->io_notify) {
            return true;
        }
-
-        if ((node->pfd.revents & G_IO_IN) && node->io_read) {
-            return true;
-        }
-        if ((node->pfd.revents & G_IO_OUT) && node->io_write) {
-            return true;
-        }
    }

    return false;
 }

-static bool aio_dispatch_handlers(AioContext *ctx, HANDLE event)
+bool aio_poll(AioContext *ctx, bool blocking)
 {
    AioHandler *node;
-    bool progress = false;
+    HANDLE events[MAXIMUM_WAIT_OBJECTS + 1];
+    bool progress;
+    int count;
+    int timeout;
+
+    progress = false;

    /*
-     * We have to walk very carefully in case aio_set_fd_handler is
+     * If there are callbacks left that have been queued, we need to call then.
+     * Do not call select in this case, because it is possible that the caller
+     * does not need a complete flush (as is the case for qemu_aio_wait loops).
+     */
+    if (aio_bh_poll(ctx)) {
+        blocking = false;
+        progress = true;
+    }
+
+    /* Run timers */
+    progress |= timerlistgroup_run_timers(&ctx->tlg);
+
+    /*
+     * Then dispatch any pending callbacks from the GSource.
+     *
+     * We have to walk very carefully in case qemu_aio_set_fd_handler is
     * called while we're walking.
     */
    node = QLIST_FIRST(&ctx->aio_handlers);
    while (node) {
        AioHandler *tmp;
-        int revents = node->pfd.revents;

        ctx->walking_handlers++;

-        if (!node->deleted &&
-            (revents || event_notifier_get_handle(node->e) == event) &&
-            node->io_notify) {
+        if (node->pfd.revents && node->io_notify) {
            node->pfd.revents = 0;
            node->io_notify(node->e);

@@ -235,28 +134,6 @@ static bool aio_dispatch_handlers(AioContext *ctx, HANDLE event)
            }
        }

-        if (!node->deleted &&
-            (node->io_read || node->io_write)) {
-            node->pfd.revents = 0;
-            if ((revents & G_IO_IN) && node->io_read) {
-                node->io_read(node->opaque);
-                progress = true;
-            }
-            if ((revents & G_IO_OUT) && node->io_write) {
-                node->io_write(node->opaque);
-                progress = true;
-            }
-
-            /* if the next select() will return an event, we have progressed */
-            if (event == event_notifier_get_handle(&ctx->notifier)) {
-                WSANETWORKEVENTS ev;
-                WSAEnumNetworkEvents(node->pfd.fd, event, &ev);
-                if (ev.lNetworkEvents) {
-                    progress = true;
-                }
-            }
-        }
-
        tmp = node;
        node = QLIST_NEXT(node, node);

@@ -268,109 +145,84 @@ static bool aio_dispatch_handlers(AioContext *ctx, HANDLE event)
        }
    }

-    return progress;
-}
-
-bool aio_dispatch(AioContext *ctx)
-{
-    bool progress;
-
-    progress = aio_bh_poll(ctx);
-    progress |= aio_dispatch_handlers(ctx, INVALID_HANDLE_VALUE);
-    progress |= timerlistgroup_run_timers(&ctx->tlg);
-    return progress;
-}
-
-bool aio_poll(AioContext *ctx, bool blocking)
-{
-    AioHandler *node;
-    HANDLE events[MAXIMUM_WAIT_OBJECTS + 1];
-    bool progress, have_select_revents, first;
-    int count;
-    int timeout;
-
-    aio_context_acquire(ctx);
-    progress = false;
-
-    /* aio_notify can avoid the expensive event_notifier_set if
-     * everything (file descriptors, bottom halves, timers) will
-     * be re-evaluated before the next blocking poll().  This is
-     * already true when aio_poll is called with blocking == false;
-     * if blocking == true, it is only true after poll() returns,
-     * so disable the optimization now.
-     */
-    if (blocking) {
-        atomic_add(&ctx->notify_me, 2);
+    if (progress && !blocking) {
+        return true;
    }

-    have_select_revents = aio_prepare(ctx);
-
    ctx->walking_handlers++;

    /* fill fd sets */
    count = 0;
    QLIST_FOREACH(node, &ctx->aio_handlers, node) {
-        if (!node->deleted && node->io_notify
-            && aio_node_check(ctx, node->is_external)) {
+        if (!node->deleted && node->io_notify) {
            events[count++] = event_notifier_get_handle(node->e);
        }
    }

    ctx->walking_handlers--;
-    first = true;

-    /* ctx->notifier is always registered.  */
-    assert(count > 0);
+    /* early return if we only have the aio_notify() fd */
+    if (count == 1) {
+        return progress;
+    }

-    /* Multiple iterations, all of them non-blocking except the first,
-     * may be necessary to process all pending events.  After the first
-     * WaitForMultipleObjects call ctx->notify_me will be decremented.
-     */
-    do {
-        HANDLE event;
+    /* wait until next event */
+    while (count > 0) {
        int ret;

-        timeout = blocking && !have_select_revents
-            ? qemu_timeout_ns_to_ms(aio_compute_timeout(ctx)) : 0;
-        if (timeout) {
-            aio_context_release(ctx);
-        }
+        timeout = blocking ?
+            qemu_timeout_ns_to_ms(timerlistgroup_deadline_ns(&ctx->tlg)) : 0;
        ret = WaitForMultipleObjects(count, events, FALSE, timeout);
-        if (blocking) {
-            assert(first);
-            atomic_sub(&ctx->notify_me, 2);
-        }
-        if (timeout) {
-            aio_context_acquire(ctx);
-        }
-
-        if (first) {
-            aio_notify_accept(ctx);
-            progress |= aio_bh_poll(ctx);
-            first = false;
-        }

        /* if we have any signaled events, dispatch event */
-        event = NULL;
-        if ((DWORD) (ret - WAIT_OBJECT_0) < count) {
-            event = events[ret - WAIT_OBJECT_0];
-            events[ret - WAIT_OBJECT_0] = events[--count];
-        } else if (!have_select_revents) {
+        if ((DWORD) (ret - WAIT_OBJECT_0) >= count) {
            break;
        }

-        have_select_revents = false;
        blocking = false;

-        progress |= aio_dispatch_handlers(ctx, event);
-    } while (count > 0);
+        /* we have to walk very carefully in case
+         * qemu_aio_set_fd_handler is called while we're walking */
+        node = QLIST_FIRST(&ctx->aio_handlers);
+        while (node) {
+            AioHandler *tmp;

-    progress |= timerlistgroup_run_timers(&ctx->tlg);
+            ctx->walking_handlers++;
+
+            if (!node->deleted &&
+                event_notifier_get_handle(node->e) == events[ret - WAIT_OBJECT_0] &&
+                node->io_notify) {
+                node->io_notify(node->e);
+
+                /* aio_notify() does not count as progress */
+                if (node->e != &ctx->notifier) {
+                    progress = true;
+                }
+            }
+
+            tmp = node;
+            node = QLIST_NEXT(node, node);
+
+            ctx->walking_handlers--;
+
+            if (!ctx->walking_handlers && tmp->deleted) {
+                QLIST_REMOVE(tmp, node);
+                g_free(tmp);
+            }
+        }
+
+        /* Try again, but only call each handler once.  */
+        events[ret - WAIT_OBJECT_0] = events[--count];
+    }
+
+    if (blocking) {
+        /* Run the timers a second time. We do this because otherwise aio_wait
+         * will not note progress - and will stop a drain early - if we have
+         * a timer that was not ready to run entering g_poll but is ready
+         * after g_poll. This will only do anything if a timer has expired.
+         */
+        progress |= timerlistgroup_run_timers(&ctx->tlg);
+    }

-    aio_context_release(ctx);
    return progress;
 }
-
-void aio_context_setup(AioContext *ctx, Error **errp)
-{
-}
--- a/arch_init.c
+++ b/arch_init.c
--- a/async.c
+++ b/async.c
@@ -22,12 +22,10 @@
 * THE SOFTWARE.
 */

-#include "qemu/osdep.h"
 #include "qemu-common.h"
 #include "block/aio.h"
 #include "block/thread-pool.h"
 #include "qemu/main-loop.h"
-#include "qemu/atomic.h"

 /***********************************************************/
 /* bottom halves (can be seen as timers which expire ASAP) */
@@ -45,12 +43,10 @@ struct QEMUBH {
 QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque)
 {
    QEMUBH *bh;
-    bh = g_new(QEMUBH, 1);
-    *bh = (QEMUBH){
-        .ctx = ctx,
-        .cb = cb,
-        .opaque = opaque,
-    };
+    bh = g_malloc0(sizeof(QEMUBH));
+    bh->ctx = ctx;
+    bh->cb = cb;
+    bh->opaque = opaque;
    qemu_mutex_lock(&ctx->bh_lock);
    bh->next = ctx->first_bh;
    /* Make sure that the members are ready before putting bh into list */
@@ -60,11 +56,6 @@ QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque)
    return bh;
 }

-void aio_bh_call(QEMUBH *bh)
-{
-    bh->cb(bh->opaque);
-}
-
 /* Multiple occurrences of aio_bh_poll cannot be called concurrently */
 int aio_bh_poll(AioContext *ctx)
 {
@@ -78,19 +69,16 @@ int aio_bh_poll(AioContext *ctx)
        /* Make sure that fetching bh happens before accessing its members */
        smp_read_barrier_depends();
        next = bh->next;
-        /* The atomic_xchg is paired with the one in qemu_bh_schedule.  The
-         * implicit memory barrier ensures that the callback sees all writes
-         * done by the scheduling thread.  It also ensures that the scheduling
-         * thread sees the zero before bh->cb has run, and thus will call
-         * aio_notify again if necessary.
-         */
-        if (!bh->deleted && atomic_xchg(&bh->scheduled, 0)) {
-            /* Idle BHs and the notify BH don't count as progress */
-            if (!bh->idle && bh != ctx->notify_dummy_bh) {
+        if (!bh->deleted && bh->scheduled) {
+            bh->scheduled = 0;
+            /* Paired with write barrier in bh schedule to ensure reading for
+             * idle & callbacks coming after bh's scheduling.
+             */
+            smp_rmb();
+            if (!bh->idle)
                ret = 1;
-            }
            bh->idle = 0;
-            aio_bh_call(bh);
+            bh->cb(bh->opaque);
        }
    }

@@ -117,28 +105,33 @@ int aio_bh_poll(AioContext *ctx)

 void qemu_bh_schedule_idle(QEMUBH *bh)
 {
+    if (bh->scheduled)
+        return;
    bh->idle = 1;
    /* Make sure that idle & any writes needed by the callback are done
     * before the locations are read in the aio_bh_poll.
     */
-    atomic_mb_set(&bh->scheduled, 1);
+    smp_wmb();
+    bh->scheduled = 1;
 }

 void qemu_bh_schedule(QEMUBH *bh)
 {
    AioContext *ctx;

+    if (bh->scheduled)
+        return;
    ctx = bh->ctx;
    bh->idle = 0;
-    /* The memory barrier implicit in atomic_xchg makes sure that:
+    /* Make sure that:
     * 1. idle & any writes needed by the callback are done before the
     *    locations are read in the aio_bh_poll.
     * 2. ctx is loaded before scheduled is set and the callback has a chance
     *    to execute.
     */
-    if (atomic_xchg(&bh->scheduled, 1) == 0) {
-        aio_notify(ctx);
-    }
+    smp_mb();
+    bh->scheduled = 1;
+    aio_notify(ctx);
 }


@@ -158,50 +151,39 @@ void qemu_bh_delete(QEMUBH *bh)
    bh->deleted = 1;
 }

-int64_t
-aio_compute_timeout(AioContext *ctx)
+static gboolean
+aio_ctx_prepare(GSource *source, gint    *timeout)
 {
-    int64_t deadline;
-    int timeout = -1;
+    AioContext *ctx = (AioContext *) source;
    QEMUBH *bh;
+    int deadline;

+    /* We assume there is no timeout already supplied */
+    *timeout = -1;
    for (bh = ctx->first_bh; bh; bh = bh->next) {
        if (!bh->deleted && bh->scheduled) {
            if (bh->idle) {
                /* idle bottom halves will be polled at least
                 * every 10ms */
-                timeout = 10000000;
+                *timeout = 10;
            } else {
                /* non-idle bottom halves will be executed
                 * immediately */
-                return 0;
+                *timeout = 0;
+                return true;
            }
        }
    }

-    deadline = timerlistgroup_deadline_ns(&ctx->tlg);
+    deadline = qemu_timeout_ns_to_ms(timerlistgroup_deadline_ns(&ctx->tlg));
    if (deadline == 0) {
-        return 0;
-    } else {
-        return qemu_soonest_timeout(timeout, deadline);
-    }
-}
-
-static gboolean
-aio_ctx_prepare(GSource *source, gint    *timeout)
-{
-    AioContext *ctx = (AioContext *) source;
-
-    atomic_or(&ctx->notify_me, 1);
-
-    /* We assume there is no timeout already supplied */
-    *timeout = qemu_timeout_ns_to_ms(aio_compute_timeout(ctx));
-
-    if (aio_prepare(ctx)) {
        *timeout = 0;
+        return true;
+    } else {
+        *timeout = qemu_soonest_timeout(*timeout, deadline);
    }

-    return *timeout == 0;
+    return false;
 }

 static gboolean
@@ -210,9 +192,6 @@ aio_ctx_check(GSource *source)
    AioContext *ctx = (AioContext *) source;
    QEMUBH *bh;

-    atomic_and(&ctx->notify_me, ~1);
-    aio_notify_accept(ctx);
-
    for (bh = ctx->first_bh; bh; bh = bh->next) {
        if (!bh->deleted && bh->scheduled) {
            return true;
@@ -229,7 +208,7 @@ aio_ctx_dispatch(GSource     *source,
    AioContext *ctx = (AioContext *) source;

    assert(callback == NULL);
-    aio_dispatch(ctx);
+    aio_poll(ctx, false);
    return true;
 }

@@ -238,25 +217,11 @@ aio_ctx_finalize(GSource     *source)
 {
    AioContext *ctx = (AioContext *) source;

-    qemu_bh_delete(ctx->notify_dummy_bh);
    thread_pool_free(ctx->thread_pool);
-
-    qemu_mutex_lock(&ctx->bh_lock);
-    while (ctx->first_bh) {
-        QEMUBH *next = ctx->first_bh->next;
-
-        /* qemu_bh_delete() must have been called on BHs in this AioContext */
-        assert(ctx->first_bh->deleted);
-
-        g_free(ctx->first_bh);
-        ctx->first_bh = next;
-    }
-    qemu_mutex_unlock(&ctx->bh_lock);
-
-    aio_set_event_notifier(ctx, &ctx->notifier, false, NULL);
+    aio_set_event_notifier(ctx, &ctx->notifier, NULL);
    event_notifier_cleanup(&ctx->notifier);
-    rfifolock_destroy(&ctx->lock);
    qemu_mutex_destroy(&ctx->bh_lock);
+    g_array_free(ctx->pollfds, TRUE);
    timerlistgroup_deinit(&ctx->tlg);
 }

@@ -283,21 +248,7 @@ ThreadPool *aio_get_thread_pool(AioContext *ctx)

 void aio_notify(AioContext *ctx)
 {
-    /* Write e.g. bh->scheduled before reading ctx->notify_me.  Pairs
-     * with atomic_or in aio_ctx_prepare or atomic_add in aio_poll.
-     */
-    smp_mb();
-    if (ctx->notify_me) {
-        event_notifier_set(&ctx->notifier);
-        atomic_mb_set(&ctx->notified, true);
-    }
-}
-
-void aio_notify_accept(AioContext *ctx)
-{
-    if (atomic_xchg(&ctx->notified, false)) {
-        event_notifier_test_and_clear(&ctx->notifier);
-    }
+    event_notifier_set(&ctx->notifier);
 }

 static void aio_timerlist_notify(void *opaque)
@@ -305,56 +256,20 @@ static void aio_timerlist_notify(void *opaque)
    aio_notify(opaque);
 }

-static void aio_rfifolock_cb(void *opaque)
+AioContext *aio_context_new(void)
 {
-    AioContext *ctx = opaque;
-
-    /* Kick owner thread in case they are blocked in aio_poll() */
-    qemu_bh_schedule(ctx->notify_dummy_bh);
-}
-
-static void notify_dummy_bh(void *opaque)
-{
-    /* Do nothing, we were invoked just to force the event loop to iterate */
-}
-
-static void event_notifier_dummy_cb(EventNotifier *e)
-{
-}
-
-AioContext *aio_context_new(Error **errp)
-{
-    int ret;
    AioContext *ctx;
-    Error *local_err = NULL;
-
    ctx = (AioContext *) g_source_new(&aio_source_funcs, sizeof(AioContext));
-    aio_context_setup(ctx, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        goto fail;
-    }
-    ret = event_notifier_init(&ctx->notifier, false);
-    if (ret < 0) {
-        error_setg_errno(errp, -ret, "Failed to initialize event notifier");
-        goto fail;
-    }
-    g_source_set_can_recurse(&ctx->source, true);
-    aio_set_event_notifier(ctx, &ctx->notifier,
-                           false,
-                           (EventNotifierHandler *)
-                           event_notifier_dummy_cb);
+    ctx->pollfds = g_array_new(FALSE, FALSE, sizeof(GPollFD));
    ctx->thread_pool = NULL;
    qemu_mutex_init(&ctx->bh_lock);
-    rfifolock_init(&ctx->lock, aio_rfifolock_cb, ctx);
+    event_notifier_init(&ctx->notifier, false);
+    aio_set_event_notifier(ctx, &ctx->notifier, 
+                           (EventNotifierHandler *)
+                           event_notifier_test_and_clear);
    timerlistgroup_init(&ctx->tlg, aio_timerlist_notify, ctx);

-    ctx->notify_dummy_bh = aio_bh_new(ctx, notify_dummy_bh, NULL);
-
    return ctx;
-fail:
-    g_source_destroy(&ctx->source);
-    return NULL;
 }

 void aio_context_ref(AioContext *ctx)
@@ -366,13 +281,3 @@ void aio_context_unref(AioContext *ctx)
 {
    g_source_unref(&ctx->source);
 }
-
-void aio_context_acquire(AioContext *ctx)
-{
-    rfifolock_lock(&ctx->lock);
-}
-
-void aio_context_release(AioContext *ctx)
-{
-    rfifolock_unlock(&ctx->lock);
-}
--- a/audio/Makefile.objs
+++ b/audio/Makefile.objs
@@ -5,9 +5,13 @@ common-obj-$(CONFIG_SPICE) += spiceaudio.o
 common-obj-$(CONFIG_COREAUDIO) += coreaudio.o
 common-obj-$(CONFIG_ALSA) += alsaaudio.o
 common-obj-$(CONFIG_DSOUND) += dsoundaudio.o
+common-obj-$(CONFIG_FMOD) += fmodaudio.o
+common-obj-$(CONFIG_ESD) += esdaudio.o
 common-obj-$(CONFIG_PA) += paaudio.o
+common-obj-$(CONFIG_WINWAVE) += winwaveaudio.o
 common-obj-$(CONFIG_AUDIO_PT_INT) += audio_pt_int.o
 common-obj-$(CONFIG_AUDIO_WIN_INT) += audio_win_int.o
 common-obj-y += wavcapture.o

-sdlaudio.o-cflags := $(SDL_CFLAGS)
+$(obj)/audio.o $(obj)/fmodaudio.o: QEMU_CFLAGS += $(FMOD_CFLAGS)
+$(obj)/sdlaudio.o: QEMU_CFLAGS += $(SDL_CFLAGS)
--- a/audio/alsaaudio.c
+++ b/audio/alsaaudio.c
@@ -21,12 +21,10 @@
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */
-#include "qemu/osdep.h"
 #include <alsa/asoundlib.h>
 #include "qemu-common.h"
 #include "qemu/main-loop.h"
 #include "audio.h"
-#include "trace.h"

 #if QEMU_GNUC_PREREQ(4, 3)
 #pragma GCC diagnostic ignored "-Waddress"
@@ -35,28 +33,9 @@
 #define AUDIO_CAP "alsa"
 #include "audio_int.h"

-typedef struct ALSAConf {
-    int size_in_usec_in;
-    int size_in_usec_out;
-    const char *pcm_name_in;
-    const char *pcm_name_out;
-    unsigned int buffer_size_in;
-    unsigned int period_size_in;
-    unsigned int buffer_size_out;
-    unsigned int period_size_out;
-    unsigned int threshold;
-
-    int buffer_size_in_overridden;
-    int period_size_in_overridden;
-
-    int buffer_size_out_overridden;
-    int period_size_out_overridden;
-} ALSAConf;
-
 struct pollhlp {
    snd_pcm_t *handle;
    struct pollfd *pfds;
-    ALSAConf *conf;
    int count;
    int mask;
 };
@@ -77,6 +56,30 @@ typedef struct ALSAVoiceIn {
    struct pollhlp pollhlp;
 } ALSAVoiceIn;

+static struct {
+    int size_in_usec_in;
+    int size_in_usec_out;
+    const char *pcm_name_in;
+    const char *pcm_name_out;
+    unsigned int buffer_size_in;
+    unsigned int period_size_in;
+    unsigned int buffer_size_out;
+    unsigned int period_size_out;
+    unsigned int threshold;
+
+    int buffer_size_in_overridden;
+    int period_size_in_overridden;
+
+    int buffer_size_out_overridden;
+    int period_size_out_overridden;
+    int verbose;
+} conf = {
+    .buffer_size_out = 4096,
+    .period_size_out = 1024,
+    .pcm_name_out = "default",
+    .pcm_name_in = "default",
+};
+
 struct alsa_params_req {
    int freq;
    snd_pcm_format_t fmt;
@@ -202,7 +205,9 @@ static void alsa_poll_handler (void *opaque)
    }

    if (!(revents & hlp->mask)) {
-        trace_alsa_revents(revents);
+        if (conf.verbose) {
+            dolog ("revents = %d\n", revents);
+        }
        return;
    }

@@ -261,14 +266,31 @@ static int alsa_poll_helper (snd_pcm_t *handle, struct pollhlp *hlp, int mask)

    for (i = 0; i < count; ++i) {
        if (pfds[i].events & POLLIN) {
-            qemu_set_fd_handler (pfds[i].fd, alsa_poll_handler, NULL, hlp);
+            err = qemu_set_fd_handler (pfds[i].fd, alsa_poll_handler,
+                                       NULL, hlp);
        }
        if (pfds[i].events & POLLOUT) {
-            trace_alsa_pollout(i, pfds[i].fd);
-            qemu_set_fd_handler (pfds[i].fd, NULL, alsa_poll_handler, hlp);
+            if (conf.verbose) {
+                dolog ("POLLOUT %d %d\n", i, pfds[i].fd);
+            }
+            err = qemu_set_fd_handler (pfds[i].fd, NULL,
+                                       alsa_poll_handler, hlp);
+        }
+        if (conf.verbose) {
+            dolog ("Set handler events=%#x index=%d fd=%d err=%d\n",
+                   pfds[i].events, i, pfds[i].fd, err);
        }
-        trace_alsa_set_handler(pfds[i].events, i, pfds[i].fd, err);

+        if (err) {
+            dolog ("Failed to set handler events=%#x index=%d fd=%d err=%d\n",
+                   pfds[i].events, i, pfds[i].fd, err);
+
+            while (i--) {
+                qemu_set_fd_handler (pfds[i].fd, NULL, NULL, NULL);
+            }
+            g_free (pfds);
+            return -1;
+        }
    }
    hlp->pfds = pfds;
    hlp->count = count;
@@ -454,15 +476,14 @@ static void alsa_set_threshold (snd_pcm_t *handle, snd_pcm_uframes_t threshold)
 }

 static int alsa_open (int in, struct alsa_params_req *req,
-                      struct alsa_params_obt *obt, snd_pcm_t **handlep,
-                      ALSAConf *conf)
+                      struct alsa_params_obt *obt, snd_pcm_t **handlep)
 {
    snd_pcm_t *handle;
    snd_pcm_hw_params_t *hw_params;
    int err;
    int size_in_usec;
    unsigned int freq, nchannels;
-    const char *pcm_name = in ? conf->pcm_name_in : conf->pcm_name_out;
+    const char *pcm_name = in ? conf.pcm_name_in : conf.pcm_name_out;
    snd_pcm_uframes_t obt_buffer_size;
    const char *typ = in ? "ADC" : "DAC";
    snd_pcm_format_t obtfmt;
@@ -501,7 +522,7 @@ static int alsa_open (int in, struct alsa_params_req *req,
    }

    err = snd_pcm_hw_params_set_format (handle, hw_params, req->fmt);
-    if (err < 0) {
+    if (err < 0 && conf.verbose) {
        alsa_logerr2 (err, typ, "Failed to set format %d\n", req->fmt);
    }

@@ -633,7 +654,7 @@ static int alsa_open (int in, struct alsa_params_req *req,
        goto err;
    }

-    if (!in && conf->threshold) {
+    if (!in && conf.threshold) {
        snd_pcm_uframes_t threshold;
        int bytes_per_sec;

@@ -655,7 +676,7 @@ static int alsa_open (int in, struct alsa_params_req *req,
            break;
        }

-        threshold = (conf->threshold * bytes_per_sec) / 1000;
+        threshold = (conf.threshold * bytes_per_sec) / 1000;
        alsa_set_threshold (handle, threshold);
    }

@@ -665,9 +686,10 @@ static int alsa_open (int in, struct alsa_params_req *req,

    *handlep = handle;

-    if (obtfmt != req->fmt ||
+    if (conf.verbose &&
+        (obtfmt != req->fmt ||
         obt->nchannels != req->nchannels ||
-         obt->freq != req->freq) {
+         obt->freq != req->freq)) {
        dolog ("Audio parameters for %s\n", typ);
        alsa_dump_info (req, obt, obtfmt);
    }
@@ -721,7 +743,9 @@ static void alsa_write_pending (ALSAVoiceOut *alsa)
            if (written <= 0) {
                switch (written) {
                case 0:
-                    trace_alsa_wrote_zero(len);
+                    if (conf.verbose) {
+                        dolog ("Failed to write %d frames (wrote zero)\n", len);
+                    }
                    return;

                case -EPIPE:
@@ -730,7 +754,9 @@ static void alsa_write_pending (ALSAVoiceOut *alsa)
                                     len);
                        return;
                    }
-                    trace_alsa_xrun_out();
+                    if (conf.verbose) {
+                        dolog ("Recovering from playback xrun\n");
+                    }
                    continue;

                case -ESTRPIPE:
@@ -741,7 +767,9 @@ static void alsa_write_pending (ALSAVoiceOut *alsa)
                                     len);
                        return;
                    }
-                    trace_alsa_resume_out();
+                    if (conf.verbose) {
+                        dolog ("Resuming suspended output stream\n");
+                    }
                    continue;

                case -EAGAIN:
@@ -787,31 +815,31 @@ static void alsa_fini_out (HWVoiceOut *hw)
    ldebug ("alsa_fini\n");
    alsa_anal_close (&alsa->handle, &alsa->pollhlp);

-    g_free(alsa->pcm_buf);
-    alsa->pcm_buf = NULL;
+    if (alsa->pcm_buf) {
+        g_free (alsa->pcm_buf);
+        alsa->pcm_buf = NULL;
+    }
 }

-static int alsa_init_out(HWVoiceOut *hw, struct audsettings *as,
-                         void *drv_opaque)
+static int alsa_init_out (HWVoiceOut *hw, struct audsettings *as)
 {
    ALSAVoiceOut *alsa = (ALSAVoiceOut *) hw;
    struct alsa_params_req req;
    struct alsa_params_obt obt;
    snd_pcm_t *handle;
    struct audsettings obt_as;
-    ALSAConf *conf = drv_opaque;

    req.fmt = aud_to_alsafmt (as->fmt, as->endianness);
    req.freq = as->freq;
    req.nchannels = as->nchannels;
-    req.period_size = conf->period_size_out;
-    req.buffer_size = conf->buffer_size_out;
-    req.size_in_usec = conf->size_in_usec_out;
+    req.period_size = conf.period_size_out;
+    req.buffer_size = conf.buffer_size_out;
+    req.size_in_usec = conf.size_in_usec_out;
    req.override_mask =
-        (conf->period_size_out_overridden ? 1 : 0) |
-        (conf->buffer_size_out_overridden ? 2 : 0);
+        (conf.period_size_out_overridden ? 1 : 0) |
+        (conf.buffer_size_out_overridden ? 2 : 0);

-    if (alsa_open (0, &req, &obt, &handle, conf)) {
+    if (alsa_open (0, &req, &obt, &handle)) {
        return -1;
    }

@@ -832,7 +860,6 @@ static int alsa_init_out(HWVoiceOut *hw, struct audsettings *as,
    }

    alsa->handle = handle;
-    alsa->pollhlp.conf = conf;
    return 0;
 }

@@ -903,26 +930,25 @@ static int alsa_ctl_out (HWVoiceOut *hw, int cmd, ...)
    return -1;
 }

-static int alsa_init_in(HWVoiceIn *hw, struct audsettings *as, void *drv_opaque)
+static int alsa_init_in (HWVoiceIn *hw, struct audsettings *as)
 {
    ALSAVoiceIn *alsa = (ALSAVoiceIn *) hw;
    struct alsa_params_req req;
    struct alsa_params_obt obt;
    snd_pcm_t *handle;
    struct audsettings obt_as;
-    ALSAConf *conf = drv_opaque;

    req.fmt = aud_to_alsafmt (as->fmt, as->endianness);
    req.freq = as->freq;
    req.nchannels = as->nchannels;
-    req.period_size = conf->period_size_in;
-    req.buffer_size = conf->buffer_size_in;
-    req.size_in_usec = conf->size_in_usec_in;
+    req.period_size = conf.period_size_in;
+    req.buffer_size = conf.buffer_size_in;
+    req.size_in_usec = conf.size_in_usec_in;
    req.override_mask =
-        (conf->period_size_in_overridden ? 1 : 0) |
-        (conf->buffer_size_in_overridden ? 2 : 0);
+        (conf.period_size_in_overridden ? 1 : 0) |
+        (conf.buffer_size_in_overridden ? 2 : 0);

-    if (alsa_open (1, &req, &obt, &handle, conf)) {
+    if (alsa_open (1, &req, &obt, &handle)) {
        return -1;
    }

@@ -943,7 +969,6 @@ static int alsa_init_in(HWVoiceIn *hw, struct audsettings *as, void *drv_opaque)
    }

    alsa->handle = handle;
-    alsa->pollhlp.conf = conf;
    return 0;
 }

@@ -953,8 +978,10 @@ static void alsa_fini_in (HWVoiceIn *hw)

    alsa_anal_close (&alsa->handle, &alsa->pollhlp);

-    g_free(alsa->pcm_buf);
-    alsa->pcm_buf = NULL;
+    if (alsa->pcm_buf) {
+        g_free (alsa->pcm_buf);
+        alsa->pcm_buf = NULL;
+    }
 }

 static int alsa_run_in (HWVoiceIn *hw)
@@ -999,10 +1026,14 @@ static int alsa_run_in (HWVoiceIn *hw)
                dolog ("Failed to resume suspended input stream\n");
                return 0;
            }
-            trace_alsa_resume_in();
+            if (conf.verbose) {
+                dolog ("Resuming suspended input stream\n");
+            }
            break;
        default:
-            trace_alsa_no_frames(state);
+            if (conf.verbose) {
+                dolog ("No frames available and ALSA state is %d\n", state);
+            }
            return 0;
        }
    }
@@ -1037,7 +1068,9 @@ static int alsa_run_in (HWVoiceIn *hw)
            if (nread <= 0) {
                switch (nread) {
                case 0:
-                    trace_alsa_read_zero(len);
+                    if (conf.verbose) {
+                        dolog ("Failed to read %ld frames (read zero)\n", len);
+                    }
                    goto exit;

                case -EPIPE:
@@ -1045,7 +1078,9 @@ static int alsa_run_in (HWVoiceIn *hw)
                        alsa_logerr (nread, "Failed to read %ld frames\n", len);
                        goto exit;
                    }
-                    trace_alsa_xrun_in();
+                    if (conf.verbose) {
+                        dolog ("Recovering from capture xrun\n");
+                    }
                    continue;

                case -EAGAIN:
@@ -1117,85 +1152,82 @@ static int alsa_ctl_in (HWVoiceIn *hw, int cmd, ...)
    return -1;
 }

-static ALSAConf glob_conf = {
-    .buffer_size_out = 4096,
-    .period_size_out = 1024,
-    .pcm_name_out = "default",
-    .pcm_name_in = "default",
-};
-
 static void *alsa_audio_init (void)
 {
-    ALSAConf *conf = g_malloc(sizeof(ALSAConf));
-    *conf = glob_conf;
-    return conf;
+    return &conf;
 }

 static void alsa_audio_fini (void *opaque)
 {
-    g_free(opaque);
+    (void) opaque;
 }

 static struct audio_option alsa_options[] = {
    {
        .name        = "DAC_SIZE_IN_USEC",
        .tag         = AUD_OPT_BOOL,
-        .valp        = &glob_conf.size_in_usec_out,
+        .valp        = &conf.size_in_usec_out,
        .descr       = "DAC period/buffer size in microseconds (otherwise in frames)"
    },
    {
        .name        = "DAC_PERIOD_SIZE",
        .tag         = AUD_OPT_INT,
-        .valp        = &glob_conf.period_size_out,
+        .valp        = &conf.period_size_out,
        .descr       = "DAC period size (0 to go with system default)",
-        .overriddenp = &glob_conf.period_size_out_overridden
+        .overriddenp = &conf.period_size_out_overridden
    },
    {
        .name        = "DAC_BUFFER_SIZE",
        .tag         = AUD_OPT_INT,
-        .valp        = &glob_conf.buffer_size_out,
+        .valp        = &conf.buffer_size_out,
        .descr       = "DAC buffer size (0 to go with system default)",
-        .overriddenp = &glob_conf.buffer_size_out_overridden
+        .overriddenp = &conf.buffer_size_out_overridden
    },
    {
        .name        = "ADC_SIZE_IN_USEC",
        .tag         = AUD_OPT_BOOL,
-        .valp        = &glob_conf.size_in_usec_in,
+        .valp        = &conf.size_in_usec_in,
        .descr       =
        "ADC period/buffer size in microseconds (otherwise in frames)"
    },
    {
        .name        = "ADC_PERIOD_SIZE",
        .tag         = AUD_OPT_INT,
-        .valp        = &glob_conf.period_size_in,
+        .valp        = &conf.period_size_in,
        .descr       = "ADC period size (0 to go with system default)",
-        .overriddenp = &glob_conf.period_size_in_overridden
+        .overriddenp = &conf.period_size_in_overridden
    },
    {
        .name        = "ADC_BUFFER_SIZE",
        .tag         = AUD_OPT_INT,
-        .valp        = &glob_conf.buffer_size_in,
+        .valp        = &conf.buffer_size_in,
        .descr       = "ADC buffer size (0 to go with system default)",
-        .overriddenp = &glob_conf.buffer_size_in_overridden
+        .overriddenp = &conf.buffer_size_in_overridden
    },
    {
        .name        = "THRESHOLD",
        .tag         = AUD_OPT_INT,
-        .valp        = &glob_conf.threshold,
+        .valp        = &conf.threshold,
        .descr       = "(undocumented)"
    },
    {
        .name        = "DAC_DEV",
        .tag         = AUD_OPT_STR,
-        .valp        = &glob_conf.pcm_name_out,
+        .valp        = &conf.pcm_name_out,
        .descr       = "DAC device name (for instance dmix)"
    },
    {
        .name        = "ADC_DEV",
        .tag         = AUD_OPT_STR,
-        .valp        = &glob_conf.pcm_name_in,
+        .valp        = &conf.pcm_name_in,
        .descr       = "ADC device name"
    },
+    {
+        .name        = "VERBOSE",
+        .tag         = AUD_OPT_BOOL,
+        .valp        = &conf.verbose,
+        .descr       = "Behave in a more verbose way"
+    },
    { /* End of list */ }
 };

--- a/audio/audio.c
+++ b/audio/audio.c
@@ -21,7 +21,6 @@
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */
-#include "qemu/osdep.h"
 #include "hw/hw.h"
 #include "audio.h"
 #include "monitor/monitor.h"
@@ -31,6 +30,7 @@
 #define AUDIO_CAP "audio"
 #include "audio_int.h"

+/* #define DEBUG_PLIVE */
 /* #define DEBUG_LIVE */
 /* #define DEBUG_OUT */
 /* #define DEBUG_CAPTURE */
@@ -66,6 +66,8 @@ static struct {
        int hertz;
        int64_t ticks;
    } period;
+    int plive;
+    int log_to_monitor;
    int try_poll_in;
    int try_poll_out;
 } conf = {
@@ -93,7 +95,9 @@ static struct {
        }
    },

-    .period = { .hertz = 100 },
+    .period = { .hertz = 250 },
+    .plive = 0,
+    .log_to_monitor = 0,
    .try_poll_in = 1,
    .try_poll_out = 1,
 };
@@ -327,11 +331,20 @@ static const char *audio_get_conf_str (const char *key,

 void AUD_vlog (const char *cap, const char *fmt, va_list ap)
 {
-    if (cap) {
-        fprintf(stderr, "%s: ", cap);
-    }
+    if (conf.log_to_monitor) {
+        if (cap) {
+            monitor_printf(default_mon, "%s: ", cap);
+        }

-    vfprintf(stderr, fmt, ap);
+        monitor_vprintf(default_mon, fmt, ap);
+    }
+    else {
+        if (cap) {
+            fprintf (stderr, "%s: ", cap);
+        }
+
+        vfprintf (stderr, fmt, ap);
+    }
 }

 void AUD_log (const char *cap, const char *fmt, ...)
@@ -1441,6 +1454,9 @@ static void audio_run_out (AudioState *s)
            while (sw) {
                sw1 = sw->entries.le_next;
                if (!sw->active && !sw->callback.fn) {
+#ifdef DEBUG_PLIVE
+                    dolog ("Finishing with old voice\n");
+#endif
                    audio_close_out (sw);
                }
                sw = sw1;
@@ -1632,6 +1648,18 @@ static struct audio_option audio_options[] = {
        .valp  = &conf.period.hertz,
        .descr = "Timer period in HZ (0 - use lowest possible)"
    },
+    {
+        .name  = "PLIVE",
+        .tag   = AUD_OPT_BOOL,
+        .valp  = &conf.plive,
+        .descr = "(undocumented)"
+    },
+    {
+        .name  = "LOG_TO_MONITOR",
+        .tag   = AUD_OPT_BOOL,
+        .valp  = &conf.log_to_monitor,
+        .descr = "Print logging messages to monitor instead of stderr"
+    },
    { /* End of list */ }
 };

@@ -1784,7 +1812,8 @@ static const VMStateDescription vmstate_audio = {
    .name = "audio",
    .version_id = 1,
    .minimum_version_id = 1,
-    .fields = (VMStateField[]) {
+    .minimum_version_id_old = 1,
+    .fields      = (VMStateField []) {
        VMSTATE_END_OF_LIST()
    }
 };
@@ -1807,6 +1836,9 @@ static void audio_init (void)
    atexit (audio_atexit);

    s->ts = timer_new_ns(QEMU_CLOCK_VIRTUAL, audio_timer, s);
+    if (!s->ts) {
+        hw_error("Could not create audio timer\n");
+    }

    audio_process_options ("AUDIO", audio_options);

@@ -1857,8 +1889,12 @@ static void audio_init (void)

    if (!done) {
        done = !audio_driver_init (s, &no_audio_driver);
-        assert(done);
-        dolog("warning: Using timer based audio emulation\n");
+        if (!done) {
+            hw_error("Could not initialize audio subsystem\n");
+        }
+        else {
+            dolog ("warning: Using timer based audio emulation\n");
+        }
    }

    if (conf.period.hertz <= 0) {
--- a/audio/audio.h
+++ b/audio/audio.h
@@ -24,6 +24,7 @@
 #ifndef QEMU_AUDIO_H
 #define QEMU_AUDIO_H

+#include "config-host.h"
 #include "qemu/queue.h"

 typedef void (*audio_callback_fn) (void *opaque, int avail);
--- a/audio/audio_int.h
+++ b/audio/audio_int.h
@@ -156,13 +156,13 @@ struct audio_driver {
 };

 struct audio_pcm_ops {
-    int  (*init_out)(HWVoiceOut *hw, struct audsettings *as, void *drv_opaque);
+    int  (*init_out)(HWVoiceOut *hw, struct audsettings *as);
    void (*fini_out)(HWVoiceOut *hw);
    int  (*run_out) (HWVoiceOut *hw, int live);
    int  (*write)   (SWVoiceOut *sw, void *buf, int size);
    int  (*ctl_out) (HWVoiceOut *hw, int cmd, ...);

-    int  (*init_in) (HWVoiceIn *hw, struct audsettings *as, void *drv_opaque);
+    int  (*init_in) (HWVoiceIn *hw, struct audsettings *as);
    void (*fini_in) (HWVoiceIn *hw);
    int  (*run_in)  (HWVoiceIn *hw);
    int  (*read)    (SWVoiceIn *sw, void *buf, int size);
@@ -206,11 +206,14 @@ extern struct audio_driver no_audio_driver;
 extern struct audio_driver oss_audio_driver;
 extern struct audio_driver sdl_audio_driver;
 extern struct audio_driver wav_audio_driver;
+extern struct audio_driver fmod_audio_driver;
 extern struct audio_driver alsa_audio_driver;
 extern struct audio_driver coreaudio_audio_driver;
 extern struct audio_driver dsound_audio_driver;
+extern struct audio_driver esd_audio_driver;
 extern struct audio_driver pa_audio_driver;
 extern struct audio_driver spice_audio_driver;
+extern struct audio_driver winwave_audio_driver;
 extern const struct mixeng_volume nominal_volume;

 void audio_pcm_init_info (struct audio_pcm_info *info, struct audsettings *as);
--- a/audio/audio_pt_int.c
+++ b/audio/audio_pt_int.c
@@ -1,4 +1,3 @@
-#include "qemu/osdep.h"
 #include "qemu-common.h"
 #include "audio.h"

--- a/audio/audio_template.h
+++ b/audio/audio_template.h
@@ -71,7 +71,10 @@ static void glue (audio_init_nb_voices_, TYPE) (struct audio_driver *drv)

 static void glue (audio_pcm_hw_free_resources_, TYPE) (HW *hw)
 {
-    g_free (HWBUF);
+    if (HWBUF) {
+        g_free (HWBUF);
+    }
+
    HWBUF = NULL;
 }

@@ -89,7 +92,9 @@ static int glue (audio_pcm_hw_alloc_resources_, TYPE) (HW *hw)

 static void glue (audio_pcm_sw_free_resources_, TYPE) (SW *sw)
 {
-    g_free (sw->buf);
+    if (sw->buf) {
+        g_free (sw->buf);
+    }

    if (sw->rate) {
        st_rate_stop (sw->rate);
@@ -167,8 +172,10 @@ static int glue (audio_pcm_sw_init_, TYPE) (
 static void glue (audio_pcm_sw_fini_, TYPE) (SW *sw)
 {
    glue (audio_pcm_sw_free_resources_, TYPE) (sw);
-    g_free (sw->name);
-    sw->name = NULL;
+    if (sw->name) {
+        g_free (sw->name);
+        sw->name = NULL;
+    }
 }

 static void glue (audio_pcm_hw_add_sw_, TYPE) (HW *hw, SW *sw)
@@ -191,9 +198,9 @@ static void glue (audio_pcm_hw_gc_, TYPE) (HW **hwp)
        audio_detach_capture (hw);
 #endif
        QLIST_REMOVE (hw, entries);
-        glue (hw->pcm_ops->fini_, TYPE) (hw);
        glue (s->nb_hw_voices_, TYPE) += 1;
        glue (audio_pcm_hw_free_resources_ ,TYPE) (hw);
+        glue (hw->pcm_ops->fini_, TYPE) (hw);
        g_free (hw);
        *hwp = NULL;
    }
@@ -262,7 +269,7 @@ static HW *glue (audio_pcm_hw_add_new_, TYPE) (struct audsettings *as)
 #ifdef DAC
    QLIST_INIT (&hw->cap_head);
 #endif
-    if (glue (hw->pcm_ops->init_, TYPE) (hw, as, s->drv_opaque)) {
+    if (glue (hw->pcm_ops->init_, TYPE) (hw, as)) {
        goto err0;
    }

@@ -398,6 +405,10 @@ SW *glue (AUD_open_, TYPE) (
    )
 {
    AudioState *s = &glob_audio_state;
+#ifdef DAC
+    int live = 0;
+    SW *old_sw = NULL;
+#endif

    if (audio_bug (AUDIO_FUNC, !card || !name || !callback_fn || !as)) {
        dolog ("card=%p name=%p callback_fn=%p as=%p\n",
@@ -422,6 +433,29 @@ SW *glue (AUD_open_, TYPE) (
        return sw;
    }

+#ifdef DAC
+    if (conf.plive && sw && (!sw->active && !sw->empty)) {
+        live = sw->total_hw_samples_mixed;
+
+#ifdef DEBUG_PLIVE
+        dolog ("Replacing voice %s with %d live samples\n", SW_NAME (sw), live);
+        dolog ("Old %s freq %d, bits %d, channels %d\n",
+               SW_NAME (sw), sw->info.freq, sw->info.bits, sw->info.nchannels);
+        dolog ("New %s freq %d, bits %d, channels %d\n",
+               name,
+               as->freq,
+               (as->fmt == AUD_FMT_S16 || as->fmt == AUD_FMT_U16) ? 16 : 8,
+               as->nchannels);
+#endif
+
+        if (live) {
+            old_sw = sw;
+            old_sw->callback.fn = NULL;
+            sw = NULL;
+        }
+    }
+#endif
+
    if (!glue (conf.fixed_, TYPE).enabled && sw) {
        glue (AUD_close_, TYPE) (card, sw);
        sw = NULL;
@@ -454,6 +488,20 @@ SW *glue (AUD_open_, TYPE) (
    sw->callback.fn = callback_fn;
    sw->callback.opaque = callback_opaque;

+#ifdef DAC
+    if (live) {
+        int mixed =
+            (live << old_sw->info.shift)
+            * old_sw->info.bytes_per_second
+            / sw->info.bytes_per_second;
+
+#ifdef DEBUG_PLIVE
+        dolog ("Silence will be mixed %d\n", mixed);
+#endif
+        sw->total_hw_samples_mixed += mixed;
+    }
+#endif
+
 #ifdef DEBUG_AUDIO
    dolog ("%s\n", name);
    audio_pcm_print_info ("hw", &sw->hw->info);
--- a/audio/audio_win_int.c
+++ b/audio/audio_win_int.c
@@ -1,6 +1,5 @@
 /* public domain */

-#include "qemu/osdep.h"
 #include "qemu-common.h"

 #define AUDIO_CAP "win-int"
--- a/audio/coreaudio.c
+++ b/audio/coreaudio.c
@@ -22,8 +22,8 @@
 * THE SOFTWARE.
 */

-#include "qemu/osdep.h"
 #include <CoreAudio/CoreAudio.h>
+#include <string.h>             /* strerror */
 #include <pthread.h>            /* pthread_X */

 #include "qemu-common.h"
@@ -32,250 +32,28 @@
 #define AUDIO_CAP "coreaudio"
 #include "audio_int.h"

-#ifndef MAC_OS_X_VERSION_10_6
-#define MAC_OS_X_VERSION_10_6 1060
-#endif
-
-static int isAtexit;
-
-typedef struct {
+struct {
    int buffer_frames;
    int nbuffers;
-} CoreaudioConf;
+    int isAtexit;
+} conf = {
+    .buffer_frames = 512,
+    .nbuffers = 4,
+    .isAtexit = 0
+};

 typedef struct coreaudioVoiceOut {
    HWVoiceOut hw;
    pthread_mutex_t mutex;
+    int isAtexit;
    AudioDeviceID outputDeviceID;
    UInt32 audioDevicePropertyBufferFrameSize;
    AudioStreamBasicDescription outputStreamBasicDescription;
-    AudioDeviceIOProcID ioprocid;
    int live;
    int decr;
    int rpos;
 } coreaudioVoiceOut;

-#if MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6
-/* The APIs used here only become available from 10.6 */
-
-static OSStatus coreaudio_get_voice(AudioDeviceID *id)
-{
-    UInt32 size = sizeof(*id);
-    AudioObjectPropertyAddress addr = {
-        kAudioHardwarePropertyDefaultOutputDevice,
-        kAudioObjectPropertyScopeGlobal,
-        kAudioObjectPropertyElementMaster
-    };
-
-    return AudioObjectGetPropertyData(kAudioObjectSystemObject,
-                                      &addr,
-                                      0,
-                                      NULL,
-                                      &size,
-                                      id);
-}
-
-static OSStatus coreaudio_get_framesizerange(AudioDeviceID id,
-                                             AudioValueRange *framerange)
-{
-    UInt32 size = sizeof(*framerange);
-    AudioObjectPropertyAddress addr = {
-        kAudioDevicePropertyBufferFrameSizeRange,
-        kAudioDevicePropertyScopeOutput,
-        kAudioObjectPropertyElementMaster
-    };
-
-    return AudioObjectGetPropertyData(id,
-                                      &addr,
-                                      0,
-                                      NULL,
-                                      &size,
-                                      framerange);
-}
-
-static OSStatus coreaudio_get_framesize(AudioDeviceID id, UInt32 *framesize)
-{
-    UInt32 size = sizeof(*framesize);
-    AudioObjectPropertyAddress addr = {
-        kAudioDevicePropertyBufferFrameSize,
-        kAudioDevicePropertyScopeOutput,
-        kAudioObjectPropertyElementMaster
-    };
-
-    return AudioObjectGetPropertyData(id,
-                                      &addr,
-                                      0,
-                                      NULL,
-                                      &size,
-                                      framesize);
-}
-
-static OSStatus coreaudio_set_framesize(AudioDeviceID id, UInt32 *framesize)
-{
-    UInt32 size = sizeof(*framesize);
-    AudioObjectPropertyAddress addr = {
-        kAudioDevicePropertyBufferFrameSize,
-        kAudioDevicePropertyScopeOutput,
-        kAudioObjectPropertyElementMaster
-    };
-
-    return AudioObjectSetPropertyData(id,
-                                      &addr,
-                                      0,
-                                      NULL,
-                                      size,
-                                      framesize);
-}
-
-static OSStatus coreaudio_get_streamformat(AudioDeviceID id,
-                                           AudioStreamBasicDescription *d)
-{
-    UInt32 size = sizeof(*d);
-    AudioObjectPropertyAddress addr = {
-        kAudioDevicePropertyStreamFormat,
-        kAudioDevicePropertyScopeOutput,
-        kAudioObjectPropertyElementMaster
-    };
-
-    return AudioObjectGetPropertyData(id,
-                                      &addr,
-                                      0,
-                                      NULL,
-                                      &size,
-                                      d);
-}
-
-static OSStatus coreaudio_set_streamformat(AudioDeviceID id,
-                                           AudioStreamBasicDescription *d)
-{
-    UInt32 size = sizeof(*d);
-    AudioObjectPropertyAddress addr = {
-        kAudioDevicePropertyStreamFormat,
-        kAudioDevicePropertyScopeOutput,
-        kAudioObjectPropertyElementMaster
-    };
-
-    return AudioObjectSetPropertyData(id,
-                                      &addr,
-                                      0,
-                                      NULL,
-                                      size,
-                                      d);
-}
-
-static OSStatus coreaudio_get_isrunning(AudioDeviceID id, UInt32 *result)
-{
-    UInt32 size = sizeof(*result);
-    AudioObjectPropertyAddress addr = {
-        kAudioDevicePropertyDeviceIsRunning,
-        kAudioDevicePropertyScopeOutput,
-        kAudioObjectPropertyElementMaster
-    };
-
-    return AudioObjectGetPropertyData(id,
-                                      &addr,
-                                      0,
-                                      NULL,
-                                      &size,
-                                      result);
-}
-#else
-/* Legacy versions of functions using deprecated APIs */
-
-static OSStatus coreaudio_get_voice(AudioDeviceID *id)
-{
-    UInt32 size = sizeof(*id);
-
-    return AudioHardwareGetProperty(
-        kAudioHardwarePropertyDefaultOutputDevice,
-        &size,
-        id);
-}
-
-static OSStatus coreaudio_get_framesizerange(AudioDeviceID id,
-                                             AudioValueRange *framerange)
-{
-    UInt32 size = sizeof(*framerange);
-
-    return AudioDeviceGetProperty(
-        id,
-        0,
-        0,
-        kAudioDevicePropertyBufferFrameSizeRange,
-        &size,
-        framerange);
-}
-
-static OSStatus coreaudio_get_framesize(AudioDeviceID id, UInt32 *framesize)
-{
-    UInt32 size = sizeof(*framesize);
-
-    return AudioDeviceGetProperty(
-        id,
-        0,
-        false,
-        kAudioDevicePropertyBufferFrameSize,
-        &size,
-        framesize);
-}
-
-static OSStatus coreaudio_set_framesize(AudioDeviceID id, UInt32 *framesize)
-{
-    UInt32 size = sizeof(*framesize);
-
-    return AudioDeviceSetProperty(
-        id,
-        NULL,
-        0,
-        false,
-        kAudioDevicePropertyBufferFrameSize,
-        size,
-        framesize);
-}
-
-static OSStatus coreaudio_get_streamformat(AudioDeviceID id,
-                                           AudioStreamBasicDescription *d)
-{
-    UInt32 size = sizeof(*d);
-
-    return AudioDeviceGetProperty(
-        id,
-        0,
-        false,
-        kAudioDevicePropertyStreamFormat,
-        &size,
-        d);
-}
-
-static OSStatus coreaudio_set_streamformat(AudioDeviceID id,
-                                           AudioStreamBasicDescription *d)
-{
-    UInt32 size = sizeof(*d);
-
-    return AudioDeviceSetProperty(
-        id,
-        0,
-        0,
-        0,
-        kAudioDevicePropertyStreamFormat,
-        size,
-        d);
-}
-
-static OSStatus coreaudio_get_isrunning(AudioDeviceID id, UInt32 *result)
-{
-    UInt32 size = sizeof(*result);
-
-    return AudioDeviceGetProperty(
-        id,
-        0,
-        0,
-        kAudioDevicePropertyDeviceIsRunning,
-        &size,
-        result);
-}
-#endif
-
 static void coreaudio_logstatus (OSStatus status)
 {
    const char *str = "BUG";
@@ -370,7 +148,10 @@ static inline UInt32 isPlaying (AudioDeviceID outputDeviceID)
 {
    OSStatus status;
    UInt32 result = 0;
-    status = coreaudio_get_isrunning(outputDeviceID, &result);
+    UInt32 propertySize = sizeof(outputDeviceID);
+    status = AudioDeviceGetProperty(
+        outputDeviceID, 0, 0,
+        kAudioDevicePropertyDeviceIsRunning, &propertySize, &result);
    if (status != kAudioHardwareNoError) {
        coreaudio_logerr(status,
                         "Could not determine whether Device is playing\n");
@@ -380,7 +161,7 @@ static inline UInt32 isPlaying (AudioDeviceID outputDeviceID)

 static void coreaudio_atexit (void)
 {
-    isAtexit = 1;
+    conf.isAtexit = 1;
 }

 static int coreaudio_lock (coreaudioVoiceOut *core, const char *fn_name)
@@ -506,15 +287,14 @@ static int coreaudio_write (SWVoiceOut *sw, void *buf, int len)
    return audio_pcm_sw_write (sw, buf, len);
 }

-static int coreaudio_init_out(HWVoiceOut *hw, struct audsettings *as,
-                              void *drv_opaque)
+static int coreaudio_init_out (HWVoiceOut *hw, struct audsettings *as)
 {
    OSStatus status;
    coreaudioVoiceOut *core = (coreaudioVoiceOut *) hw;
+    UInt32 propertySize;
    int err;
    const char *typ = "playback";
    AudioValueRange frameRange;
-    CoreaudioConf *conf = drv_opaque;

    /* create mutex */
    err = pthread_mutex_init(&core->mutex, NULL);
@@ -525,7 +305,12 @@ static int coreaudio_init_out(HWVoiceOut *hw, struct audsettings *as,

    audio_pcm_init_info (&hw->info, as);

-    status = coreaudio_get_voice(&core->outputDeviceID);
+    /* open default output device */
+    propertySize = sizeof(core->outputDeviceID);
+    status = AudioHardwareGetProperty(
+        kAudioHardwarePropertyDefaultOutputDevice,
+        &propertySize,
+        &core->outputDeviceID);
    if (status != kAudioHardwareNoError) {
        coreaudio_logerr2 (status, typ,
                           "Could not get default output Device\n");
@@ -537,29 +322,42 @@ static int coreaudio_init_out(HWVoiceOut *hw, struct audsettings *as,
    }

    /* get minimum and maximum buffer frame sizes */
-    status = coreaudio_get_framesizerange(core->outputDeviceID,
-                                          &frameRange);
+    propertySize = sizeof(frameRange);
+    status = AudioDeviceGetProperty(
+        core->outputDeviceID,
+        0,
+        0,
+        kAudioDevicePropertyBufferFrameSizeRange,
+        &propertySize,
+        &frameRange);
    if (status != kAudioHardwareNoError) {
        coreaudio_logerr2 (status, typ,
                           "Could not get device buffer frame range\n");
        return -1;
    }

-    if (frameRange.mMinimum > conf->buffer_frames) {
+    if (frameRange.mMinimum > conf.buffer_frames) {
        core->audioDevicePropertyBufferFrameSize = (UInt32) frameRange.mMinimum;
        dolog ("warning: Upsizing Buffer Frames to %f\n", frameRange.mMinimum);
    }
-    else if (frameRange.mMaximum < conf->buffer_frames) {
+    else if (frameRange.mMaximum < conf.buffer_frames) {
        core->audioDevicePropertyBufferFrameSize = (UInt32) frameRange.mMaximum;
        dolog ("warning: Downsizing Buffer Frames to %f\n", frameRange.mMaximum);
    }
    else {
-        core->audioDevicePropertyBufferFrameSize = conf->buffer_frames;
+        core->audioDevicePropertyBufferFrameSize = conf.buffer_frames;
    }

    /* set Buffer Frame Size */
-    status = coreaudio_set_framesize(core->outputDeviceID,
-                                     &core->audioDevicePropertyBufferFrameSize);
+    propertySize = sizeof(core->audioDevicePropertyBufferFrameSize);
+    status = AudioDeviceSetProperty(
+        core->outputDeviceID,
+        NULL,
+        0,
+        false,
+        kAudioDevicePropertyBufferFrameSize,
+        propertySize,
+        &core->audioDevicePropertyBufferFrameSize);
    if (status != kAudioHardwareNoError) {
        coreaudio_logerr2 (status, typ,
                           "Could not set device buffer frame size %" PRIu32 "\n",
@@ -568,18 +366,30 @@ static int coreaudio_init_out(HWVoiceOut *hw, struct audsettings *as,
    }

    /* get Buffer Frame Size */
-    status = coreaudio_get_framesize(core->outputDeviceID,
-                                     &core->audioDevicePropertyBufferFrameSize);
+    propertySize = sizeof(core->audioDevicePropertyBufferFrameSize);
+    status = AudioDeviceGetProperty(
+        core->outputDeviceID,
+        0,
+        false,
+        kAudioDevicePropertyBufferFrameSize,
+        &propertySize,
+        &core->audioDevicePropertyBufferFrameSize);
    if (status != kAudioHardwareNoError) {
        coreaudio_logerr2 (status, typ,
                           "Could not get device buffer frame size\n");
        return -1;
    }
-    hw->samples = conf->nbuffers * core->audioDevicePropertyBufferFrameSize;
+    hw->samples = conf.nbuffers * core->audioDevicePropertyBufferFrameSize;

    /* get StreamFormat */
-    status = coreaudio_get_streamformat(core->outputDeviceID,
-                                        &core->outputStreamBasicDescription);
+    propertySize = sizeof(core->outputStreamBasicDescription);
+    status = AudioDeviceGetProperty(
+        core->outputDeviceID,
+        0,
+        false,
+        kAudioDevicePropertyStreamFormat,
+        &propertySize,
+        &core->outputStreamBasicDescription);
    if (status != kAudioHardwareNoError) {
        coreaudio_logerr2 (status, typ,
                           "Could not get Device Stream properties\n");
@@ -589,8 +399,15 @@ static int coreaudio_init_out(HWVoiceOut *hw, struct audsettings *as,

    /* set Samplerate */
    core->outputStreamBasicDescription.mSampleRate = (Float64) as->freq;
-    status = coreaudio_set_streamformat(core->outputDeviceID,
-                                        &core->outputStreamBasicDescription);
+    propertySize = sizeof(core->outputStreamBasicDescription);
+    status = AudioDeviceSetProperty(
+        core->outputDeviceID,
+        0,
+        0,
+        0,
+        kAudioDevicePropertyStreamFormat,
+        propertySize,
+        &core->outputStreamBasicDescription);
    if (status != kAudioHardwareNoError) {
        coreaudio_logerr2 (status, typ, "Could not set samplerate %d\n",
                           as->freq);
@@ -599,12 +416,8 @@ static int coreaudio_init_out(HWVoiceOut *hw, struct audsettings *as,
    }

    /* set Callback */
-    core->ioprocid = NULL;
-    status = AudioDeviceCreateIOProcID(core->outputDeviceID,
-                                       audioDeviceIOProc,
-                                       hw,
-                                       &core->ioprocid);
-    if (status != kAudioHardwareNoError || core->ioprocid == NULL) {
+    status = AudioDeviceAddIOProc(core->outputDeviceID, audioDeviceIOProc, hw);
+    if (status != kAudioHardwareNoError) {
        coreaudio_logerr2 (status, typ, "Could not set IOProc\n");
        core->outputDeviceID = kAudioDeviceUnknown;
        return -1;
@@ -612,10 +425,10 @@ static int coreaudio_init_out(HWVoiceOut *hw, struct audsettings *as,

    /* start Playback */
    if (!isPlaying(core->outputDeviceID)) {
-        status = AudioDeviceStart(core->outputDeviceID, core->ioprocid);
+        status = AudioDeviceStart(core->outputDeviceID, audioDeviceIOProc);
        if (status != kAudioHardwareNoError) {
            coreaudio_logerr2 (status, typ, "Could not start playback\n");
-            AudioDeviceDestroyIOProcID(core->outputDeviceID, core->ioprocid);
+            AudioDeviceRemoveIOProc(core->outputDeviceID, audioDeviceIOProc);
            core->outputDeviceID = kAudioDeviceUnknown;
            return -1;
        }
@@ -630,18 +443,18 @@ static void coreaudio_fini_out (HWVoiceOut *hw)
    int err;
    coreaudioVoiceOut *core = (coreaudioVoiceOut *) hw;

-    if (!isAtexit) {
+    if (!conf.isAtexit) {
        /* stop playback */
        if (isPlaying(core->outputDeviceID)) {
-            status = AudioDeviceStop(core->outputDeviceID, core->ioprocid);
+            status = AudioDeviceStop(core->outputDeviceID, audioDeviceIOProc);
            if (status != kAudioHardwareNoError) {
                coreaudio_logerr (status, "Could not stop playback\n");
            }
        }

        /* remove callback */
-        status = AudioDeviceDestroyIOProcID(core->outputDeviceID,
-                                            core->ioprocid);
+        status = AudioDeviceRemoveIOProc(core->outputDeviceID,
+                                         audioDeviceIOProc);
        if (status != kAudioHardwareNoError) {
            coreaudio_logerr (status, "Could not remove IOProc\n");
        }
@@ -664,7 +477,7 @@ static int coreaudio_ctl_out (HWVoiceOut *hw, int cmd, ...)
    case VOICE_ENABLE:
        /* start playback */
        if (!isPlaying(core->outputDeviceID)) {
-            status = AudioDeviceStart(core->outputDeviceID, core->ioprocid);
+            status = AudioDeviceStart(core->outputDeviceID, audioDeviceIOProc);
            if (status != kAudioHardwareNoError) {
                coreaudio_logerr (status, "Could not resume playback\n");
            }
@@ -673,10 +486,9 @@ static int coreaudio_ctl_out (HWVoiceOut *hw, int cmd, ...)

    case VOICE_DISABLE:
        /* stop playback */
-        if (!isAtexit) {
+        if (!conf.isAtexit) {
            if (isPlaying(core->outputDeviceID)) {
-                status = AudioDeviceStop(core->outputDeviceID,
-                                         core->ioprocid);
+                status = AudioDeviceStop(core->outputDeviceID, audioDeviceIOProc);
                if (status != kAudioHardwareNoError) {
                    coreaudio_logerr (status, "Could not pause playback\n");
                }
@@ -687,36 +499,28 @@ static int coreaudio_ctl_out (HWVoiceOut *hw, int cmd, ...)
    return 0;
 }

-static CoreaudioConf glob_conf = {
-    .buffer_frames = 512,
-    .nbuffers = 4,
-};
-
 static void *coreaudio_audio_init (void)
 {
-    CoreaudioConf *conf = g_malloc(sizeof(CoreaudioConf));
-    *conf = glob_conf;
-
    atexit(coreaudio_atexit);
-    return conf;
+    return &coreaudio_audio_init;
 }

 static void coreaudio_audio_fini (void *opaque)
 {
-    g_free(opaque);
+    (void) opaque;
 }

 static struct audio_option coreaudio_options[] = {
    {
        .name  = "BUFFER_SIZE",
        .tag   = AUD_OPT_INT,
-        .valp  = &glob_conf.buffer_frames,
+        .valp  = &conf.buffer_frames,
        .descr = "Size of the buffer in frames"
    },
    {
        .name  = "BUFFER_COUNT",
        .tag   = AUD_OPT_INT,
-        .valp  = &glob_conf.nbuffers,
+        .valp  = &conf.nbuffers,
        .descr = "Number of buffers"
    },
    { /* End of list */ }
--- a/audio/dsound_template.h
+++ b/audio/dsound_template.h
@@ -67,11 +67,11 @@ static int glue (dsound_lock_, TYPE) (
    LPVOID *p2p,
    DWORD *blen1p,
    DWORD *blen2p,
-    int entire,
-    dsound *s
+    int entire
    )
 {
    HRESULT hr;
+    int i;
    LPVOID p1 = NULL, p2 = NULL;
    DWORD blen1 = 0, blen2 = 0;
    DWORD flag;
@@ -81,18 +81,37 @@ static int glue (dsound_lock_, TYPE) (
 #else
    flag = entire ? DSBLOCK_ENTIREBUFFER : 0;
 #endif
-    hr = glue(IFACE, _Lock)(buf, pos, len, &p1, &blen1, &p2, &blen2, flag);
+    for (i = 0; i < conf.lock_retries; ++i) {
+        hr = glue (IFACE, _Lock) (
+            buf,
+            pos,
+            len,
+            &p1,
+            &blen1,
+            &p2,
+            &blen2,
+            flag
+            );

-    if (FAILED (hr)) {
+        if (FAILED (hr)) {
 #ifndef DSBTYPE_IN
-        if (hr == DSERR_BUFFERLOST) {
-            if (glue (dsound_restore_, TYPE) (buf, s)) {
-                dsound_logerr (hr, "Could not lock " NAME "\n");
+            if (hr == DSERR_BUFFERLOST) {
+                if (glue (dsound_restore_, TYPE) (buf)) {
+                    dsound_logerr (hr, "Could not lock " NAME "\n");
+                    goto fail;
+                }
+                continue;
            }
+#endif
+            dsound_logerr (hr, "Could not lock " NAME "\n");
            goto fail;
        }
-#endif
-        dsound_logerr (hr, "Could not lock " NAME "\n");
+
+        break;
+    }
+
+    if (i == conf.lock_retries) {
+        dolog ("%d attempts to lock " NAME " failed\n", i);
        goto fail;
    }

@@ -155,19 +174,16 @@ static void dsound_fini_out (HWVoiceOut *hw)
 }

 #ifdef DSBTYPE_IN
-static int dsound_init_in(HWVoiceIn *hw, struct audsettings *as,
-                          void *drv_opaque)
+static int dsound_init_in (HWVoiceIn *hw, struct audsettings *as)
 #else
-static int dsound_init_out(HWVoiceOut *hw, struct audsettings *as,
-                           void *drv_opaque)
+static int dsound_init_out (HWVoiceOut *hw, struct audsettings *as)
 #endif
 {
    int err;
    HRESULT hr;
-    dsound *s = drv_opaque;
+    dsound *s = &glob_dsound;
    WAVEFORMATEX wfx;
    struct audsettings obt_as;
-    DSoundConf *conf = &s->conf;
 #ifdef DSBTYPE_IN
    const char *typ = "ADC";
    DSoundVoiceIn *ds = (DSoundVoiceIn *) hw;
@@ -194,7 +210,7 @@ static int dsound_init_out(HWVoiceOut *hw, struct audsettings *as,
    bd.dwSize = sizeof (bd);
    bd.lpwfxFormat = &wfx;
 #ifdef DSBTYPE_IN
-    bd.dwBufferBytes = conf->bufsize_in;
+    bd.dwBufferBytes = conf.bufsize_in;
    hr = IDirectSoundCapture_CreateCaptureBuffer (
        s->dsound_capture,
        &bd,
@@ -203,7 +219,7 @@ static int dsound_init_out(HWVoiceOut *hw, struct audsettings *as,
        );
 #else
    bd.dwFlags = DSBCAPS_STICKYFOCUS | DSBCAPS_GETCURRENTPOSITION2;
-    bd.dwBufferBytes = conf->bufsize_out;
+    bd.dwBufferBytes = conf.bufsize_out;
    hr = IDirectSound_CreateSoundBuffer (
        s->dsound,
        &bd,
@@ -253,7 +269,6 @@ static int dsound_init_out(HWVoiceOut *hw, struct audsettings *as,
            );
    }
    hw->samples = bc.dwBufferBytes >> hw->info.shift;
-    ds->s = s;

 #ifdef DEBUG_DSOUND
    dolog ("caps %ld, desc %ld\n",
--- a/audio/dsoundaudio.c
+++ b/audio/dsoundaudio.c
@@ -26,7 +26,6 @@
 * SEAL 1.07 by Carlos 'pel' Hasan was used as documentation
 */

-#include "qemu/osdep.h"
 #include "qemu-common.h"
 #include "audio.h"

@@ -42,25 +41,42 @@

 /* #define DEBUG_DSOUND */

-typedef struct {
+static struct {
+    int lock_retries;
+    int restore_retries;
+    int getstatus_retries;
+    int set_primary;
    int bufsize_in;
    int bufsize_out;
+    struct audsettings settings;
    int latency_millis;
-} DSoundConf;
+} conf = {
+    .lock_retries       = 1,
+    .restore_retries    = 1,
+    .getstatus_retries  = 1,
+    .set_primary        = 0,
+    .bufsize_in         = 16384,
+    .bufsize_out        = 16384,
+    .settings.freq      = 44100,
+    .settings.nchannels = 2,
+    .settings.fmt       = AUD_FMT_S16,
+    .latency_millis     = 10
+};

 typedef struct {
    LPDIRECTSOUND dsound;
    LPDIRECTSOUNDCAPTURE dsound_capture;
+    LPDIRECTSOUNDBUFFER dsound_primary_buffer;
    struct audsettings settings;
-    DSoundConf conf;
 } dsound;

+static dsound glob_dsound;
+
 typedef struct {
    HWVoiceOut hw;
    LPDIRECTSOUNDBUFFER dsound_buffer;
    DWORD old_pos;
    int first_time;
-    dsound *s;
 #ifdef DEBUG_DSOUND
    DWORD old_ppos;
    DWORD played;
@@ -72,7 +88,6 @@ typedef struct {
    HWVoiceIn hw;
    int first_time;
    LPDIRECTSOUNDCAPTUREBUFFER dsound_capture_buffer;
-    dsound *s;
 } DSoundVoiceIn;

 static void dsound_log_hresult (HRESULT hr)
@@ -266,17 +281,29 @@ static void print_wave_format (WAVEFORMATEX *wfx)
 }
 #endif

-static int dsound_restore_out (LPDIRECTSOUNDBUFFER dsb, dsound *s)
+static int dsound_restore_out (LPDIRECTSOUNDBUFFER dsb)
 {
    HRESULT hr;
+    int i;

-    hr = IDirectSoundBuffer_Restore (dsb);
+    for (i = 0; i < conf.restore_retries; ++i) {
+        hr = IDirectSoundBuffer_Restore (dsb);

-    if (hr != DS_OK) {
-        dsound_logerr (hr, "Could not restore playback buffer\n");
-        return -1;
+        switch (hr) {
+        case DS_OK:
+            return 0;
+
+        case DSERR_BUFFERLOST:
+            continue;
+
+        default:
+            dsound_logerr (hr, "Could not restore playback buffer\n");
+            return -1;
+        }
    }
-    return 0;
+
+    dolog ("%d attempts to restore playback buffer failed\n", i);
+    return -1;
 }

 #include "dsound_template.h"
@@ -284,20 +311,25 @@ static int dsound_restore_out (LPDIRECTSOUNDBUFFER dsb, dsound *s)
 #include "dsound_template.h"
 #undef DSBTYPE_IN

-static int dsound_get_status_out (LPDIRECTSOUNDBUFFER dsb, DWORD *statusp,
-                                  dsound *s)
+static int dsound_get_status_out (LPDIRECTSOUNDBUFFER dsb, DWORD *statusp)
 {
    HRESULT hr;
+    int i;

-    hr = IDirectSoundBuffer_GetStatus (dsb, statusp);
-    if (FAILED (hr)) {
-        dsound_logerr (hr, "Could not get playback buffer status\n");
-        return -1;
-    }
+    for (i = 0; i < conf.getstatus_retries; ++i) {
+        hr = IDirectSoundBuffer_GetStatus (dsb, statusp);
+        if (FAILED (hr)) {
+            dsound_logerr (hr, "Could not get playback buffer status\n");
+            return -1;
+        }

-    if (*statusp & DSERR_BUFFERLOST) {
-        dsound_restore_out(dsb, s);
-        return -1;
+        if (*statusp & DSERR_BUFFERLOST) {
+            if (dsound_restore_out (dsb)) {
+                return -1;
+            }
+            continue;
+        }
+        break;
    }

    return 0;
@@ -344,8 +376,7 @@ static void dsound_write_sample (HWVoiceOut *hw, uint8_t *dst, int dst_len)
    hw->rpos = pos % hw->samples;
 }

-static void dsound_clear_sample (HWVoiceOut *hw, LPDIRECTSOUNDBUFFER dsb,
-                                 dsound *s)
+static void dsound_clear_sample (HWVoiceOut *hw, LPDIRECTSOUNDBUFFER dsb)
 {
    int err;
    LPVOID p1, p2;
@@ -358,8 +389,7 @@ static void dsound_clear_sample (HWVoiceOut *hw, LPDIRECTSOUNDBUFFER dsb,
        hw->samples << hw->info.shift,
        &p1, &p2,
        &blen1, &blen2,
-        1,
-        s
+        1
        );
    if (err) {
        return;
@@ -385,9 +415,25 @@ static void dsound_clear_sample (HWVoiceOut *hw, LPDIRECTSOUNDBUFFER dsb,
    dsound_unlock_out (dsb, p1, p2, blen1, blen2);
 }

-static int dsound_open (dsound *s)
+static void dsound_close (dsound *s)
 {
    HRESULT hr;
+
+    if (s->dsound_primary_buffer) {
+        hr = IDirectSoundBuffer_Release (s->dsound_primary_buffer);
+        if (FAILED (hr)) {
+            dsound_logerr (hr, "Could not release primary buffer\n");
+        }
+        s->dsound_primary_buffer = NULL;
+    }
+}
+
+static int dsound_open (dsound *s)
+{
+    int err;
+    HRESULT hr;
+    WAVEFORMATEX wfx;
+    DSBUFFERDESC dsbd;
    HWND hwnd;

    hwnd = GetForegroundWindow ();
@@ -403,7 +449,63 @@ static int dsound_open (dsound *s)
        return -1;
    }

+    if (!conf.set_primary) {
+        return 0;
+    }
+
+    err = waveformat_from_audio_settings (&wfx, &conf.settings);
+    if (err) {
+        return -1;
+    }
+
+    memset (&dsbd, 0, sizeof (dsbd));
+    dsbd.dwSize = sizeof (dsbd);
+    dsbd.dwFlags = DSBCAPS_PRIMARYBUFFER;
+    dsbd.dwBufferBytes = 0;
+    dsbd.lpwfxFormat = NULL;
+
+    hr = IDirectSound_CreateSoundBuffer (
+        s->dsound,
+        &dsbd,
+        &s->dsound_primary_buffer,
+        NULL
+        );
+    if (FAILED (hr)) {
+        dsound_logerr (hr, "Could not create primary playback buffer\n");
+        return -1;
+    }
+
+    hr = IDirectSoundBuffer_SetFormat (s->dsound_primary_buffer, &wfx);
+    if (FAILED (hr)) {
+        dsound_logerr (hr, "Could not set primary playback buffer format\n");
+    }
+
+    hr = IDirectSoundBuffer_GetFormat (
+        s->dsound_primary_buffer,
+        &wfx,
+        sizeof (wfx),
+        NULL
+        );
+    if (FAILED (hr)) {
+        dsound_logerr (hr, "Could not get primary playback buffer format\n");
+        goto fail0;
+    }
+
+#ifdef DEBUG_DSOUND
+    dolog ("Primary\n");
+    print_wave_format (&wfx);
+#endif
+
+    err = waveformat_to_audio_settings (&wfx, &s->settings);
+    if (err) {
+        goto fail0;
+    }
+
    return 0;
+
+ fail0:
+    dsound_close (s);
+    return -1;
 }

 static int dsound_ctl_out (HWVoiceOut *hw, int cmd, ...)
@@ -412,7 +514,6 @@ static int dsound_ctl_out (HWVoiceOut *hw, int cmd, ...)
    DWORD status;
    DSoundVoiceOut *ds = (DSoundVoiceOut *) hw;
    LPDIRECTSOUNDBUFFER dsb = ds->dsound_buffer;
-    dsound *s = ds->s;

    if (!dsb) {
        dolog ("Attempt to control voice without a buffer\n");
@@ -421,7 +522,7 @@ static int dsound_ctl_out (HWVoiceOut *hw, int cmd, ...)

    switch (cmd) {
    case VOICE_ENABLE:
-        if (dsound_get_status_out (dsb, &status, s)) {
+        if (dsound_get_status_out (dsb, &status)) {
            return -1;
        }

@@ -430,7 +531,7 @@ static int dsound_ctl_out (HWVoiceOut *hw, int cmd, ...)
            return 0;
        }

-        dsound_clear_sample (hw, dsb, s);
+        dsound_clear_sample (hw, dsb);

        hr = IDirectSoundBuffer_Play (dsb, 0, 0, DSBPLAY_LOOPING);
        if (FAILED (hr)) {
@@ -440,7 +541,7 @@ static int dsound_ctl_out (HWVoiceOut *hw, int cmd, ...)
        break;

    case VOICE_DISABLE:
-        if (dsound_get_status_out (dsb, &status, s)) {
+        if (dsound_get_status_out (dsb, &status)) {
            return -1;
        }

@@ -477,8 +578,6 @@ static int dsound_run_out (HWVoiceOut *hw, int live)
    DWORD wpos, ppos, old_pos;
    LPVOID p1, p2;
    int bufsize;
-    dsound *s = ds->s;
-    DSoundConf *conf = &s->conf;

    if (!dsb) {
        dolog ("Attempt to run empty with playback buffer\n");
@@ -501,14 +600,14 @@ static int dsound_run_out (HWVoiceOut *hw, int live)
    len = live << hwshift;

    if (ds->first_time) {
-        if (conf->latency_millis) {
+        if (conf.latency_millis) {
            DWORD cur_blat;

            cur_blat = audio_ring_dist (wpos, ppos, bufsize);
            ds->first_time = 0;
            old_pos = wpos;
            old_pos +=
-                millis_to_bytes (&hw->info, conf->latency_millis) - cur_blat;
+                millis_to_bytes (&hw->info, conf.latency_millis) - cur_blat;
            old_pos %= bufsize;
            old_pos &= ~hw->info.align;
        }
@@ -564,8 +663,7 @@ static int dsound_run_out (HWVoiceOut *hw, int live)
        len,
        &p1, &p2,
        &blen1, &blen2,
-        0,
-        s
+        0
        );
    if (err) {
        return 0;
@@ -668,7 +766,6 @@ static int dsound_run_in (HWVoiceIn *hw)
    DWORD cpos, rpos;
    LPVOID p1, p2;
    int hwshift;
-    dsound *s = ds->s;

    if (!dscb) {
        dolog ("Attempt to run without capture buffer\n");
@@ -723,8 +820,7 @@ static int dsound_run_in (HWVoiceIn *hw)
        &p2,
        &blen1,
        &blen2,
-        0,
-        s
+        0
        );
    if (err) {
        return 0;
@@ -747,19 +843,12 @@ static int dsound_run_in (HWVoiceIn *hw)
    return decr;
 }

-static DSoundConf glob_conf = {
-    .bufsize_in         = 16384,
-    .bufsize_out        = 16384,
-    .latency_millis     = 10
-};
-
 static void dsound_audio_fini (void *opaque)
 {
    HRESULT hr;
    dsound *s = opaque;

    if (!s->dsound) {
-        g_free(s);
        return;
    }

@@ -770,7 +859,6 @@ static void dsound_audio_fini (void *opaque)
    s->dsound = NULL;

    if (!s->dsound_capture) {
-        g_free(s);
        return;
    }

@@ -779,21 +867,17 @@ static void dsound_audio_fini (void *opaque)
        dsound_logerr (hr, "Could not release DirectSoundCapture\n");
    }
    s->dsound_capture = NULL;
-
-    g_free(s);
 }

 static void *dsound_audio_init (void)
 {
    int err;
    HRESULT hr;
-    dsound *s = g_malloc0(sizeof(dsound));
+    dsound *s = &glob_dsound;

-    s->conf = glob_conf;
    hr = CoInitialize (NULL);
    if (FAILED (hr)) {
        dsound_logerr (hr, "Could not initialize COM\n");
-        g_free(s);
        return NULL;
    }

@@ -806,7 +890,6 @@ static void *dsound_audio_init (void)
        );
    if (FAILED (hr)) {
        dsound_logerr (hr, "Could not create DirectSound instance\n");
-        g_free(s);
        return NULL;
    }

@@ -818,7 +901,7 @@ static void *dsound_audio_init (void)
        if (FAILED (hr)) {
            dsound_logerr (hr, "Could not release DirectSound\n");
        }
-        g_free(s);
+        s->dsound = NULL;
        return NULL;
    }

@@ -855,22 +938,64 @@ static void *dsound_audio_init (void)
 }

 static struct audio_option dsound_options[] = {
+    {
+        .name  = "LOCK_RETRIES",
+        .tag   = AUD_OPT_INT,
+        .valp  = &conf.lock_retries,
+        .descr = "Number of times to attempt locking the buffer"
+    },
+    {
+        .name  = "RESTOURE_RETRIES",
+        .tag   = AUD_OPT_INT,
+        .valp  = &conf.restore_retries,
+        .descr = "Number of times to attempt restoring the buffer"
+    },
+    {
+        .name  = "GETSTATUS_RETRIES",
+        .tag   = AUD_OPT_INT,
+        .valp  = &conf.getstatus_retries,
+        .descr = "Number of times to attempt getting status of the buffer"
+    },
+    {
+        .name  = "SET_PRIMARY",
+        .tag   = AUD_OPT_BOOL,
+        .valp  = &conf.set_primary,
+        .descr = "Set the parameters of primary buffer"
+    },
    {
        .name  = "LATENCY_MILLIS",
        .tag   = AUD_OPT_INT,
-        .valp  = &glob_conf.latency_millis,
+        .valp  = &conf.latency_millis,
        .descr = "(undocumented)"
    },
+    {
+        .name  = "PRIMARY_FREQ",
+        .tag   = AUD_OPT_INT,
+        .valp  = &conf.settings.freq,
+        .descr = "Primary buffer frequency"
+    },
+    {
+        .name  = "PRIMARY_CHANNELS",
+        .tag   = AUD_OPT_INT,
+        .valp  = &conf.settings.nchannels,
+        .descr = "Primary buffer number of channels (1 - mono, 2 - stereo)"
+    },
+    {
+        .name  = "PRIMARY_FMT",
+        .tag   = AUD_OPT_FMT,
+        .valp  = &conf.settings.fmt,
+        .descr = "Primary buffer format"
+    },
    {
        .name  = "BUFSIZE_OUT",
        .tag   = AUD_OPT_INT,
-        .valp  = &glob_conf.bufsize_out,
+        .valp  = &conf.bufsize_out,
        .descr = "(undocumented)"
    },
    {
        .name  = "BUFSIZE_IN",
        .tag   = AUD_OPT_INT,
-        .valp  = &glob_conf.bufsize_in,
+        .valp  = &conf.bufsize_in,
        .descr = "(undocumented)"
    },
    { /* End of list */ }
--- a/audio/esdaudio.c
+++ b/audio/esdaudio.c
@@ -0,0 +1,557 @@
+/*
+ * QEMU ESD audio driver
+ *
+ * Copyright (c) 2006 Frederick Reeve (brushed up by malc)
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include <esd.h>
+#include "qemu-common.h"
+#include "audio.h"
+
+#define AUDIO_CAP "esd"
+#include "audio_int.h"
+#include "audio_pt_int.h"
+
+typedef struct {
+    HWVoiceOut hw;
+    int done;
+    int live;
+    int decr;
+    int rpos;
+    void *pcm_buf;
+    int fd;
+    struct audio_pt pt;
+} ESDVoiceOut;
+
+typedef struct {
+    HWVoiceIn hw;
+    int done;
+    int dead;
+    int incr;
+    int wpos;
+    void *pcm_buf;
+    int fd;
+    struct audio_pt pt;
+} ESDVoiceIn;
+
+static struct {
+    int samples;
+    int divisor;
+    char *dac_host;
+    char *adc_host;
+} conf = {
+    .samples = 1024,
+    .divisor = 2,
+};
+
+static void GCC_FMT_ATTR (2, 3) qesd_logerr (int err, const char *fmt, ...)
+{
+    va_list ap;
+
+    va_start (ap, fmt);
+    AUD_vlog (AUDIO_CAP, fmt, ap);
+    va_end (ap);
+
+    AUD_log (AUDIO_CAP, "Reason: %s\n", strerror (err));
+}
+
+/* playback */
+static void *qesd_thread_out (void *arg)
+{
+    ESDVoiceOut *esd = arg;
+    HWVoiceOut *hw = &esd->hw;
+    int threshold;
+
+    threshold = conf.divisor ? hw->samples / conf.divisor : 0;
+
+    if (audio_pt_lock (&esd->pt, AUDIO_FUNC)) {
+        return NULL;
+    }
+
+    for (;;) {
+        int decr, to_mix, rpos;
+
+        for (;;) {
+            if (esd->done) {
+                goto exit;
+            }
+
+            if (esd->live > threshold) {
+                break;
+            }
+
+            if (audio_pt_wait (&esd->pt, AUDIO_FUNC)) {
+                goto exit;
+            }
+        }
+
+        decr = to_mix = esd->live;
+        rpos = hw->rpos;
+
+        if (audio_pt_unlock (&esd->pt, AUDIO_FUNC)) {
+            return NULL;
+        }
+
+        while (to_mix) {
+            ssize_t written;
+            int chunk = audio_MIN (to_mix, hw->samples - rpos);
+            struct st_sample *src = hw->mix_buf + rpos;
+
+            hw->clip (esd->pcm_buf, src, chunk);
+
+        again:
+            written = write (esd->fd, esd->pcm_buf, chunk << hw->info.shift);
+            if (written == -1) {
+                if (errno == EINTR || errno == EAGAIN) {
+                    goto again;
+                }
+                qesd_logerr (errno, "write failed\n");
+                return NULL;
+            }
+
+            if (written != chunk << hw->info.shift) {
+                int wsamples = written >> hw->info.shift;
+                int wbytes = wsamples << hw->info.shift;
+                if (wbytes != written) {
+                    dolog ("warning: Misaligned write %d (requested %zd), "
+                           "alignment %d\n",
+                           wbytes, written, hw->info.align + 1);
+                }
+                to_mix -= wsamples;
+                rpos = (rpos + wsamples) % hw->samples;
+                break;
+            }
+
+            rpos = (rpos + chunk) % hw->samples;
+            to_mix -= chunk;
+        }
+
+        if (audio_pt_lock (&esd->pt, AUDIO_FUNC)) {
+            return NULL;
+        }
+
+        esd->rpos = rpos;
+        esd->live -= decr;
+        esd->decr += decr;
+    }
+
+ exit:
+    audio_pt_unlock (&esd->pt, AUDIO_FUNC);
+    return NULL;
+}
+
+static int qesd_run_out (HWVoiceOut *hw, int live)
+{
+    int decr;
+    ESDVoiceOut *esd = (ESDVoiceOut *) hw;
+
+    if (audio_pt_lock (&esd->pt, AUDIO_FUNC)) {
+        return 0;
+    }
+
+    decr = audio_MIN (live, esd->decr);
+    esd->decr -= decr;
+    esd->live = live - decr;
+    hw->rpos = esd->rpos;
+    if (esd->live > 0) {
+        audio_pt_unlock_and_signal (&esd->pt, AUDIO_FUNC);
+    }
+    else {
+        audio_pt_unlock (&esd->pt, AUDIO_FUNC);
+    }
+    return decr;
+}
+
+static int qesd_write (SWVoiceOut *sw, void *buf, int len)
+{
+    return audio_pcm_sw_write (sw, buf, len);
+}
+
+static int qesd_init_out (HWVoiceOut *hw, struct audsettings *as)
+{
+    ESDVoiceOut *esd = (ESDVoiceOut *) hw;
+    struct audsettings obt_as = *as;
+    int esdfmt = ESD_STREAM | ESD_PLAY;
+
+    esdfmt |= (as->nchannels == 2) ? ESD_STEREO : ESD_MONO;
+    switch (as->fmt) {
+    case AUD_FMT_S8:
+    case AUD_FMT_U8:
+        esdfmt |= ESD_BITS8;
+        obt_as.fmt = AUD_FMT_U8;
+        break;
+
+    case AUD_FMT_S32:
+    case AUD_FMT_U32:
+        dolog ("Will use 16 instead of 32 bit samples\n");
+        /* fall through */
+    case AUD_FMT_S16:
+    case AUD_FMT_U16:
+    deffmt:
+        esdfmt |= ESD_BITS16;
+        obt_as.fmt = AUD_FMT_S16;
+        break;
+
+    default:
+        dolog ("Internal logic error: Bad audio format %d\n", as->fmt);
+        goto deffmt;
+
+    }
+    obt_as.endianness = AUDIO_HOST_ENDIANNESS;
+
+    audio_pcm_init_info (&hw->info, &obt_as);
+
+    hw->samples = conf.samples;
+    esd->pcm_buf = audio_calloc (AUDIO_FUNC, hw->samples, 1 << hw->info.shift);
+    if (!esd->pcm_buf) {
+        dolog ("Could not allocate buffer (%d bytes)\n",
+               hw->samples << hw->info.shift);
+        return -1;
+    }
+
+    esd->fd = esd_play_stream (esdfmt, as->freq, conf.dac_host, NULL);
+    if (esd->fd < 0) {
+        qesd_logerr (errno, "esd_play_stream failed\n");
+        goto fail1;
+    }
+
+    if (audio_pt_init (&esd->pt, qesd_thread_out, esd, AUDIO_CAP, AUDIO_FUNC)) {
+        goto fail2;
+    }
+
+    return 0;
+
+ fail2:
+    if (close (esd->fd)) {
+        qesd_logerr (errno, "%s: close on esd socket(%d) failed\n",
+                     AUDIO_FUNC, esd->fd);
+    }
+    esd->fd = -1;
+
+ fail1:
+    g_free (esd->pcm_buf);
+    esd->pcm_buf = NULL;
+    return -1;
+}
+
+static void qesd_fini_out (HWVoiceOut *hw)
+{
+    void *ret;
+    ESDVoiceOut *esd = (ESDVoiceOut *) hw;
+
+    audio_pt_lock (&esd->pt, AUDIO_FUNC);
+    esd->done = 1;
+    audio_pt_unlock_and_signal (&esd->pt, AUDIO_FUNC);
+    audio_pt_join (&esd->pt, &ret, AUDIO_FUNC);
+
+    if (esd->fd >= 0) {
+        if (close (esd->fd)) {
+            qesd_logerr (errno, "failed to close esd socket\n");
+        }
+        esd->fd = -1;
+    }
+
+    audio_pt_fini (&esd->pt, AUDIO_FUNC);
+
+    g_free (esd->pcm_buf);
+    esd->pcm_buf = NULL;
+}
+
+static int qesd_ctl_out (HWVoiceOut *hw, int cmd, ...)
+{
+    (void) hw;
+    (void) cmd;
+    return 0;
+}
+
+/* capture */
+static void *qesd_thread_in (void *arg)
+{
+    ESDVoiceIn *esd = arg;
+    HWVoiceIn *hw = &esd->hw;
+    int threshold;
+
+    threshold = conf.divisor ? hw->samples / conf.divisor : 0;
+
+    if (audio_pt_lock (&esd->pt, AUDIO_FUNC)) {
+        return NULL;
+    }
+
+    for (;;) {
+        int incr, to_grab, wpos;
+
+        for (;;) {
+            if (esd->done) {
+                goto exit;
+            }
+
+            if (esd->dead > threshold) {
+                break;
+            }
+
+            if (audio_pt_wait (&esd->pt, AUDIO_FUNC)) {
+                goto exit;
+            }
+        }
+
+        incr = to_grab = esd->dead;
+        wpos = hw->wpos;
+
+        if (audio_pt_unlock (&esd->pt, AUDIO_FUNC)) {
+            return NULL;
+        }
+
+        while (to_grab) {
+            ssize_t nread;
+            int chunk = audio_MIN (to_grab, hw->samples - wpos);
+            void *buf = advance (esd->pcm_buf, wpos);
+
+        again:
+            nread = read (esd->fd, buf, chunk << hw->info.shift);
+            if (nread == -1) {
+                if (errno == EINTR || errno == EAGAIN) {
+                    goto again;
+                }
+                qesd_logerr (errno, "read failed\n");
+                return NULL;
+            }
+
+            if (nread != chunk << hw->info.shift) {
+                int rsamples = nread >> hw->info.shift;
+                int rbytes = rsamples << hw->info.shift;
+                if (rbytes != nread) {
+                    dolog ("warning: Misaligned write %d (requested %zd), "
+                           "alignment %d\n",
+                           rbytes, nread, hw->info.align + 1);
+                }
+                to_grab -= rsamples;
+                wpos = (wpos + rsamples) % hw->samples;
+                break;
+            }
+
+            hw->conv (hw->conv_buf + wpos, buf, nread >> hw->info.shift);
+            wpos = (wpos + chunk) % hw->samples;
+            to_grab -= chunk;
+        }
+
+        if (audio_pt_lock (&esd->pt, AUDIO_FUNC)) {
+            return NULL;
+        }
+
+        esd->wpos = wpos;
+        esd->dead -= incr;
+        esd->incr += incr;
+    }
+
+ exit:
+    audio_pt_unlock (&esd->pt, AUDIO_FUNC);
+    return NULL;
+}
+
+static int qesd_run_in (HWVoiceIn *hw)
+{
+    int live, incr, dead;
+    ESDVoiceIn *esd = (ESDVoiceIn *) hw;
+
+    if (audio_pt_lock (&esd->pt, AUDIO_FUNC)) {
+        return 0;
+    }
+
+    live = audio_pcm_hw_get_live_in (hw);
+    dead = hw->samples - live;
+    incr = audio_MIN (dead, esd->incr);
+    esd->incr -= incr;
+    esd->dead = dead - incr;
+    hw->wpos = esd->wpos;
+    if (esd->dead > 0) {
+        audio_pt_unlock_and_signal (&esd->pt, AUDIO_FUNC);
+    }
+    else {
+        audio_pt_unlock (&esd->pt, AUDIO_FUNC);
+    }
+    return incr;
+}
+
+static int qesd_read (SWVoiceIn *sw, void *buf, int len)
+{
+    return audio_pcm_sw_read (sw, buf, len);
+}
+
+static int qesd_init_in (HWVoiceIn *hw, struct audsettings *as)
+{
+    ESDVoiceIn *esd = (ESDVoiceIn *) hw;
+    struct audsettings obt_as = *as;
+    int esdfmt = ESD_STREAM | ESD_RECORD;
+
+    esdfmt |= (as->nchannels == 2) ? ESD_STEREO : ESD_MONO;
+    switch (as->fmt) {
+    case AUD_FMT_S8:
+    case AUD_FMT_U8:
+        esdfmt |= ESD_BITS8;
+        obt_as.fmt = AUD_FMT_U8;
+        break;
+
+    case AUD_FMT_S16:
+    case AUD_FMT_U16:
+        esdfmt |= ESD_BITS16;
+        obt_as.fmt = AUD_FMT_S16;
+        break;
+
+    case AUD_FMT_S32:
+    case AUD_FMT_U32:
+        dolog ("Will use 16 instead of 32 bit samples\n");
+        esdfmt |= ESD_BITS16;
+        obt_as.fmt = AUD_FMT_S16;
+        break;
+    }
+    obt_as.endianness = AUDIO_HOST_ENDIANNESS;
+
+    audio_pcm_init_info (&hw->info, &obt_as);
+
+    hw->samples = conf.samples;
+    esd->pcm_buf = audio_calloc (AUDIO_FUNC, hw->samples, 1 << hw->info.shift);
+    if (!esd->pcm_buf) {
+        dolog ("Could not allocate buffer (%d bytes)\n",
+               hw->samples << hw->info.shift);
+        return -1;
+    }
+
+    esd->fd = esd_record_stream (esdfmt, as->freq, conf.adc_host, NULL);
+    if (esd->fd < 0) {
+        qesd_logerr (errno, "esd_record_stream failed\n");
+        goto fail1;
+    }
+
+    if (audio_pt_init (&esd->pt, qesd_thread_in, esd, AUDIO_CAP, AUDIO_FUNC)) {
+        goto fail2;
+    }
+
+    return 0;
+
+ fail2:
+    if (close (esd->fd)) {
+        qesd_logerr (errno, "%s: close on esd socket(%d) failed\n",
+                     AUDIO_FUNC, esd->fd);
+    }
+    esd->fd = -1;
+
+ fail1:
+    g_free (esd->pcm_buf);
+    esd->pcm_buf = NULL;
+    return -1;
+}
+
+static void qesd_fini_in (HWVoiceIn *hw)
+{
+    void *ret;
+    ESDVoiceIn *esd = (ESDVoiceIn *) hw;
+
+    audio_pt_lock (&esd->pt, AUDIO_FUNC);
+    esd->done = 1;
+    audio_pt_unlock_and_signal (&esd->pt, AUDIO_FUNC);
+    audio_pt_join (&esd->pt, &ret, AUDIO_FUNC);
+
+    if (esd->fd >= 0) {
+        if (close (esd->fd)) {
+            qesd_logerr (errno, "failed to close esd socket\n");
+        }
+        esd->fd = -1;
+    }
+
+    audio_pt_fini (&esd->pt, AUDIO_FUNC);
+
+    g_free (esd->pcm_buf);
+    esd->pcm_buf = NULL;
+}
+
+static int qesd_ctl_in (HWVoiceIn *hw, int cmd, ...)
+{
+    (void) hw;
+    (void) cmd;
+    return 0;
+}
+
+/* common */
+static void *qesd_audio_init (void)
+{
+    return &conf;
+}
+
+static void qesd_audio_fini (void *opaque)
+{
+    (void) opaque;
+    ldebug ("esd_fini");
+}
+
+struct audio_option qesd_options[] = {
+    {
+        .name  = "SAMPLES",
+        .tag   = AUD_OPT_INT,
+        .valp  = &conf.samples,
+        .descr = "buffer size in samples"
+    },
+    {
+        .name  = "DIVISOR",
+        .tag   = AUD_OPT_INT,
+        .valp  = &conf.divisor,
+        .descr = "threshold divisor"
+    },
+    {
+        .name  = "DAC_HOST",
+        .tag   = AUD_OPT_STR,
+        .valp  = &conf.dac_host,
+        .descr = "playback host"
+    },
+    {
+        .name  = "ADC_HOST",
+        .tag   = AUD_OPT_STR,
+        .valp  = &conf.adc_host,
+        .descr = "capture host"
+    },
+    { /* End of list */ }
+};
+
+static struct audio_pcm_ops qesd_pcm_ops = {
+    .init_out = qesd_init_out,
+    .fini_out = qesd_fini_out,
+    .run_out  = qesd_run_out,
+    .write    = qesd_write,
+    .ctl_out  = qesd_ctl_out,
+
+    .init_in  = qesd_init_in,
+    .fini_in  = qesd_fini_in,
+    .run_in   = qesd_run_in,
+    .read     = qesd_read,
+    .ctl_in   = qesd_ctl_in,
+};
+
+struct audio_driver esd_audio_driver = {
+    .name           = "esd",
+    .descr          = "http://en.wikipedia.org/wiki/Esound",
+    .options        = qesd_options,
+    .init           = qesd_audio_init,
+    .fini           = qesd_audio_fini,
+    .pcm_ops        = &qesd_pcm_ops,
+    .can_be_default = 0,
+    .max_voices_out = INT_MAX,
+    .max_voices_in  = INT_MAX,
+    .voice_size_out = sizeof (ESDVoiceOut),
+    .voice_size_in  = sizeof (ESDVoiceIn)
+};
--- a/audio/fmodaudio.c
+++ b/audio/fmodaudio.c
@@ -0,0 +1,685 @@
+/*
+ * QEMU FMOD audio driver
+ *
+ * Copyright (c) 2004-2005 Vassili Karpov (malc)
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include <fmod.h>
+#include <fmod_errors.h>
+#include "qemu-common.h"
+#include "audio.h"
+
+#define AUDIO_CAP "fmod"
+#include "audio_int.h"
+
+typedef struct FMODVoiceOut {
+    HWVoiceOut hw;
+    unsigned int old_pos;
+    FSOUND_SAMPLE *fmod_sample;
+    int channel;
+} FMODVoiceOut;
+
+typedef struct FMODVoiceIn {
+    HWVoiceIn hw;
+    FSOUND_SAMPLE *fmod_sample;
+} FMODVoiceIn;
+
+static struct {
+    const char *drvname;
+    int nb_samples;
+    int freq;
+    int nb_channels;
+    int bufsize;
+    int broken_adc;
+} conf = {
+    .nb_samples  = 2048 * 2,
+    .freq        = 44100,
+    .nb_channels = 2,
+};
+
+static void GCC_FMT_ATTR (1, 2) fmod_logerr (const char *fmt, ...)
+{
+    va_list ap;
+
+    va_start (ap, fmt);
+    AUD_vlog (AUDIO_CAP, fmt, ap);
+    va_end (ap);
+
+    AUD_log (AUDIO_CAP, "Reason: %s\n",
+             FMOD_ErrorString (FSOUND_GetError ()));
+}
+
+static void GCC_FMT_ATTR (2, 3) fmod_logerr2 (
+    const char *typ,
+    const char *fmt,
+    ...
+    )
+{
+    va_list ap;
+
+    AUD_log (AUDIO_CAP, "Could not initialize %s\n", typ);
+
+    va_start (ap, fmt);
+    AUD_vlog (AUDIO_CAP, fmt, ap);
+    va_end (ap);
+
+    AUD_log (AUDIO_CAP, "Reason: %s\n",
+             FMOD_ErrorString (FSOUND_GetError ()));
+}
+
+static int fmod_write (SWVoiceOut *sw, void *buf, int len)
+{
+    return audio_pcm_sw_write (sw, buf, len);
+}
+
+static void fmod_clear_sample (FMODVoiceOut *fmd)
+{
+    HWVoiceOut *hw = &fmd->hw;
+    int status;
+    void *p1 = 0, *p2 = 0;
+    unsigned int len1 = 0, len2 = 0;
+
+    status = FSOUND_Sample_Lock (
+        fmd->fmod_sample,
+        0,
+        hw->samples << hw->info.shift,
+        &p1,
+        &p2,
+        &len1,
+        &len2
+        );
+
+    if (!status) {
+        fmod_logerr ("Failed to lock sample\n");
+        return;
+    }
+
+    if ((len1 & hw->info.align) || (len2 & hw->info.align)) {
+        dolog ("Lock returned misaligned length %d, %d, alignment %d\n",
+               len1, len2, hw->info.align + 1);
+        goto fail;
+    }
+
+    if ((len1 + len2) - (hw->samples << hw->info.shift)) {
+        dolog ("Lock returned incomplete length %d, %d\n",
+               len1 + len2, hw->samples << hw->info.shift);
+        goto fail;
+    }
+
+    audio_pcm_info_clear_buf (&hw->info, p1, hw->samples);
+
+ fail:
+    status = FSOUND_Sample_Unlock (fmd->fmod_sample, p1, p2, len1, len2);
+    if (!status) {
+        fmod_logerr ("Failed to unlock sample\n");
+    }
+}
+
+static void fmod_write_sample (HWVoiceOut *hw, uint8_t *dst, int dst_len)
+{
+    int src_len1 = dst_len;
+    int src_len2 = 0;
+    int pos = hw->rpos + dst_len;
+    struct st_sample *src1 = hw->mix_buf + hw->rpos;
+    struct st_sample *src2 = NULL;
+
+    if (pos > hw->samples) {
+        src_len1 = hw->samples - hw->rpos;
+        src2 = hw->mix_buf;
+        src_len2 = dst_len - src_len1;
+        pos = src_len2;
+    }
+
+    if (src_len1) {
+        hw->clip (dst, src1, src_len1);
+    }
+
+    if (src_len2) {
+        dst = advance (dst, src_len1 << hw->info.shift);
+        hw->clip (dst, src2, src_len2);
+    }
+
+    hw->rpos = pos % hw->samples;
+}
+
+static int fmod_unlock_sample (FSOUND_SAMPLE *sample, void *p1, void *p2,
+                               unsigned int blen1, unsigned int blen2)
+{
+    int status = FSOUND_Sample_Unlock (sample, p1, p2, blen1, blen2);
+    if (!status) {
+        fmod_logerr ("Failed to unlock sample\n");
+        return -1;
+    }
+    return 0;
+}
+
+static int fmod_lock_sample (
+    FSOUND_SAMPLE *sample,
+    struct audio_pcm_info *info,
+    int pos,
+    int len,
+    void **p1,
+    void **p2,
+    unsigned int *blen1,
+    unsigned int *blen2
+    )
+{
+    int status;
+
+    status = FSOUND_Sample_Lock (
+        sample,
+        pos << info->shift,
+        len << info->shift,
+        p1,
+        p2,
+        blen1,
+        blen2
+        );
+
+    if (!status) {
+        fmod_logerr ("Failed to lock sample\n");
+        return -1;
+    }
+
+    if ((*blen1 & info->align) || (*blen2 & info->align)) {
+        dolog ("Lock returned misaligned length %d, %d, alignment %d\n",
+               *blen1, *blen2, info->align + 1);
+
+        fmod_unlock_sample (sample, *p1, *p2, *blen1, *blen2);
+
+        *p1 = NULL - 1;
+        *p2 = NULL - 1;
+        *blen1 = ~0U;
+        *blen2 = ~0U;
+        return -1;
+    }
+
+    if (!*p1 && *blen1) {
+        dolog ("warning: !p1 && blen1=%d\n", *blen1);
+        *blen1 = 0;
+    }
+
+    if (!p2 && *blen2) {
+        dolog ("warning: !p2 && blen2=%d\n", *blen2);
+        *blen2 = 0;
+    }
+
+    return 0;
+}
+
+static int fmod_run_out (HWVoiceOut *hw, int live)
+{
+    FMODVoiceOut *fmd = (FMODVoiceOut *) hw;
+    int decr;
+    void *p1 = 0, *p2 = 0;
+    unsigned int blen1 = 0, blen2 = 0;
+    unsigned int len1 = 0, len2 = 0;
+
+    if (!hw->pending_disable) {
+        return 0;
+    }
+
+    decr = live;
+
+    if (fmd->channel >= 0) {
+        int len = decr;
+        int old_pos = fmd->old_pos;
+        int ppos = FSOUND_GetCurrentPosition (fmd->channel);
+
+        if (ppos == old_pos || !ppos) {
+            return 0;
+        }
+
+        if ((old_pos < ppos) && ((old_pos + len) > ppos)) {
+            len = ppos - old_pos;
+        }
+        else {
+            if ((old_pos > ppos) && ((old_pos + len) > (ppos + hw->samples))) {
+                len = hw->samples - old_pos + ppos;
+            }
+        }
+        decr = len;
+
+        if (audio_bug (AUDIO_FUNC, decr < 0)) {
+            dolog ("decr=%d live=%d ppos=%d old_pos=%d len=%d\n",
+                   decr, live, ppos, old_pos, len);
+            return 0;
+        }
+    }
+
+
+    if (!decr) {
+        return 0;
+    }
+
+    if (fmod_lock_sample (fmd->fmod_sample, &fmd->hw.info,
+                          fmd->old_pos, decr,
+                          &p1, &p2,
+                          &blen1, &blen2)) {
+        return 0;
+    }
+
+    len1 = blen1 >> hw->info.shift;
+    len2 = blen2 >> hw->info.shift;
+    ldebug ("%p %p %d %d %d %d\n", p1, p2, len1, len2, blen1, blen2);
+    decr = len1 + len2;
+
+    if (p1 && len1) {
+        fmod_write_sample (hw, p1, len1);
+    }
+
+    if (p2 && len2) {
+        fmod_write_sample (hw, p2, len2);
+    }
+
+    fmod_unlock_sample (fmd->fmod_sample, p1, p2, blen1, blen2);
+
+    fmd->old_pos = (fmd->old_pos + decr) % hw->samples;
+    return decr;
+}
+
+static int aud_to_fmodfmt (audfmt_e fmt, int stereo)
+{
+    int mode = FSOUND_LOOP_NORMAL;
+
+    switch (fmt) {
+    case AUD_FMT_S8:
+        mode |= FSOUND_SIGNED | FSOUND_8BITS;
+        break;
+
+    case AUD_FMT_U8:
+        mode |= FSOUND_UNSIGNED | FSOUND_8BITS;
+        break;
+
+    case AUD_FMT_S16:
+        mode |= FSOUND_SIGNED | FSOUND_16BITS;
+        break;
+
+    case AUD_FMT_U16:
+        mode |= FSOUND_UNSIGNED | FSOUND_16BITS;
+        break;
+
+    default:
+        dolog ("Internal logic error: Bad audio format %d\n", fmt);
+#ifdef DEBUG_FMOD
+        abort ();
+#endif
+        mode |= FSOUND_8BITS;
+    }
+    mode |= stereo ? FSOUND_STEREO : FSOUND_MONO;
+    return mode;
+}
+
+static void fmod_fini_out (HWVoiceOut *hw)
+{
+    FMODVoiceOut *fmd = (FMODVoiceOut *) hw;
+
+    if (fmd->fmod_sample) {
+        FSOUND_Sample_Free (fmd->fmod_sample);
+        fmd->fmod_sample = 0;
+
+        if (fmd->channel >= 0) {
+            FSOUND_StopSound (fmd->channel);
+        }
+    }
+}
+
+static int fmod_init_out (HWVoiceOut *hw, struct audsettings *as)
+{
+    int mode, channel;
+    FMODVoiceOut *fmd = (FMODVoiceOut *) hw;
+    struct audsettings obt_as = *as;
+
+    mode = aud_to_fmodfmt (as->fmt, as->nchannels == 2 ? 1 : 0);
+    fmd->fmod_sample = FSOUND_Sample_Alloc (
+        FSOUND_FREE,            /* index */
+        conf.nb_samples,        /* length */
+        mode,                   /* mode */
+        as->freq,               /* freq */
+        255,                    /* volume */
+        128,                    /* pan */
+        255                     /* priority */
+        );
+
+    if (!fmd->fmod_sample) {
+        fmod_logerr2 ("DAC", "Failed to allocate FMOD sample\n");
+        return -1;
+    }
+
+    channel = FSOUND_PlaySoundEx (FSOUND_FREE, fmd->fmod_sample, 0, 1);
+    if (channel < 0) {
+        fmod_logerr2 ("DAC", "Failed to start playing sound\n");
+        FSOUND_Sample_Free (fmd->fmod_sample);
+        return -1;
+    }
+    fmd->channel = channel;
+
+    /* FMOD always operates on little endian frames? */
+    obt_as.endianness = 0;
+    audio_pcm_init_info (&hw->info, &obt_as);
+    hw->samples = conf.nb_samples;
+    return 0;
+}
+
+static int fmod_ctl_out (HWVoiceOut *hw, int cmd, ...)
+{
+    int status;
+    FMODVoiceOut *fmd = (FMODVoiceOut *) hw;
+
+    switch (cmd) {
+    case VOICE_ENABLE:
+        fmod_clear_sample (fmd);
+        status = FSOUND_SetPaused (fmd->channel, 0);
+        if (!status) {
+            fmod_logerr ("Failed to resume channel %d\n", fmd->channel);
+        }
+        break;
+
+    case VOICE_DISABLE:
+        status = FSOUND_SetPaused (fmd->channel, 1);
+        if (!status) {
+            fmod_logerr ("Failed to pause channel %d\n", fmd->channel);
+        }
+        break;
+    }
+    return 0;
+}
+
+static int fmod_init_in (HWVoiceIn *hw, struct audsettings *as)
+{
+    int mode;
+    FMODVoiceIn *fmd = (FMODVoiceIn *) hw;
+    struct audsettings obt_as = *as;
+
+    if (conf.broken_adc) {
+        return -1;
+    }
+
+    mode = aud_to_fmodfmt (as->fmt, as->nchannels == 2 ? 1 : 0);
+    fmd->fmod_sample = FSOUND_Sample_Alloc (
+        FSOUND_FREE,            /* index */
+        conf.nb_samples,        /* length */
+        mode,                   /* mode */
+        as->freq,               /* freq */
+        255,                    /* volume */
+        128,                    /* pan */
+        255                     /* priority */
+        );
+
+    if (!fmd->fmod_sample) {
+        fmod_logerr2 ("ADC", "Failed to allocate FMOD sample\n");
+        return -1;
+    }
+
+    /* FMOD always operates on little endian frames? */
+    obt_as.endianness = 0;
+    audio_pcm_init_info (&hw->info, &obt_as);
+    hw->samples = conf.nb_samples;
+    return 0;
+}
+
+static void fmod_fini_in (HWVoiceIn *hw)
+{
+    FMODVoiceIn *fmd = (FMODVoiceIn *) hw;
+
+    if (fmd->fmod_sample) {
+        FSOUND_Record_Stop ();
+        FSOUND_Sample_Free (fmd->fmod_sample);
+        fmd->fmod_sample = 0;
+    }
+}
+
+static int fmod_run_in (HWVoiceIn *hw)
+{
+    FMODVoiceIn *fmd = (FMODVoiceIn *) hw;
+    int hwshift = hw->info.shift;
+    int live, dead, new_pos, len;
+    unsigned int blen1 = 0, blen2 = 0;
+    unsigned int len1, len2;
+    unsigned int decr;
+    void *p1, *p2;
+
+    live = audio_pcm_hw_get_live_in (hw);
+    dead = hw->samples - live;
+    if (!dead) {
+        return 0;
+    }
+
+    new_pos = FSOUND_Record_GetPosition ();
+    if (new_pos < 0) {
+        fmod_logerr ("Could not get recording position\n");
+        return 0;
+    }
+
+    len = audio_ring_dist (new_pos,  hw->wpos, hw->samples);
+    if (!len) {
+        return 0;
+    }
+    len = audio_MIN (len, dead);
+
+    if (fmod_lock_sample (fmd->fmod_sample, &fmd->hw.info,
+                          hw->wpos, len,
+                          &p1, &p2,
+                          &blen1, &blen2)) {
+        return 0;
+    }
+
+    len1 = blen1 >> hwshift;
+    len2 = blen2 >> hwshift;
+    decr = len1 + len2;
+
+    if (p1 && blen1) {
+        hw->conv (hw->conv_buf + hw->wpos, p1, len1);
+    }
+    if (p2 && len2) {
+        hw->conv (hw->conv_buf, p2, len2);
+    }
+
+    fmod_unlock_sample (fmd->fmod_sample, p1, p2, blen1, blen2);
+    hw->wpos = (hw->wpos + decr) % hw->samples;
+    return decr;
+}
+
+static struct {
+    const char *name;
+    int type;
+} drvtab[] = {
+    { .name = "none",   .type = FSOUND_OUTPUT_NOSOUND },
+#ifdef _WIN32
+    { .name = "winmm",  .type = FSOUND_OUTPUT_WINMM   },
+    { .name = "dsound", .type = FSOUND_OUTPUT_DSOUND  },
+    { .name = "a3d",    .type = FSOUND_OUTPUT_A3D     },
+    { .name = "asio",   .type = FSOUND_OUTPUT_ASIO    },
+#endif
+#ifdef __linux__
+    { .name = "oss",    .type = FSOUND_OUTPUT_OSS     },
+    { .name = "alsa",   .type = FSOUND_OUTPUT_ALSA    },
+    { .name = "esd",    .type = FSOUND_OUTPUT_ESD     },
+#endif
+#ifdef __APPLE__
+    { .name = "mac",    .type = FSOUND_OUTPUT_MAC     },
+#endif
+#if 0
+    { .name = "xbox",   .type = FSOUND_OUTPUT_XBOX    },
+    { .name = "ps2",    .type = FSOUND_OUTPUT_PS2     },
+    { .name = "gcube",  .type = FSOUND_OUTPUT_GC      },
+#endif
+    { .name = "none-realtime", .type = FSOUND_OUTPUT_NOSOUND_NONREALTIME }
+};
+
+static void *fmod_audio_init (void)
+{
+    size_t i;
+    double ver;
+    int status;
+    int output_type = -1;
+    const char *drv = conf.drvname;
+
+    ver = FSOUND_GetVersion ();
+    if (ver < FMOD_VERSION) {
+        dolog ("Wrong FMOD version %f, need at least %f\n", ver, FMOD_VERSION);
+        return NULL;
+    }
+
+#ifdef __linux__
+    if (ver < 3.75) {
+        dolog ("FMOD before 3.75 has bug preventing ADC from working\n"
+               "ADC will be disabled.\n");
+        conf.broken_adc = 1;
+    }
+#endif
+
+    if (drv) {
+        int found = 0;
+        for (i = 0; i < ARRAY_SIZE (drvtab); i++) {
+            if (!strcmp (drv, drvtab[i].name)) {
+                output_type = drvtab[i].type;
+                found = 1;
+                break;
+            }
+        }
+        if (!found) {
+            dolog ("Unknown FMOD driver `%s'\n", drv);
+            dolog ("Valid drivers:\n");
+            for (i = 0; i < ARRAY_SIZE (drvtab); i++) {
+                dolog ("  %s\n", drvtab[i].name);
+            }
+        }
+    }
+
+    if (output_type != -1) {
+        status = FSOUND_SetOutput (output_type);
+        if (!status) {
+            fmod_logerr ("FSOUND_SetOutput(%d) failed\n", output_type);
+            return NULL;
+        }
+    }
+
+    if (conf.bufsize) {
+        status = FSOUND_SetBufferSize (conf.bufsize);
+        if (!status) {
+            fmod_logerr ("FSOUND_SetBufferSize (%d) failed\n", conf.bufsize);
+        }
+    }
+
+    status = FSOUND_Init (conf.freq, conf.nb_channels, 0);
+    if (!status) {
+        fmod_logerr ("FSOUND_Init failed\n");
+        return NULL;
+    }
+
+    return &conf;
+}
+
+static int fmod_read (SWVoiceIn *sw, void *buf, int size)
+{
+    return audio_pcm_sw_read (sw, buf, size);
+}
+
+static int fmod_ctl_in (HWVoiceIn *hw, int cmd, ...)
+{
+    int status;
+    FMODVoiceIn *fmd = (FMODVoiceIn *) hw;
+
+    switch (cmd) {
+    case VOICE_ENABLE:
+        status = FSOUND_Record_StartSample (fmd->fmod_sample, 1);
+        if (!status) {
+            fmod_logerr ("Failed to start recording\n");
+        }
+        break;
+
+    case VOICE_DISABLE:
+        status = FSOUND_Record_Stop ();
+        if (!status) {
+            fmod_logerr ("Failed to stop recording\n");
+        }
+        break;
+    }
+    return 0;
+}
+
+static void fmod_audio_fini (void *opaque)
+{
+    (void) opaque;
+    FSOUND_Close ();
+}
+
+static struct audio_option fmod_options[] = {
+    {
+        .name  = "DRV",
+        .tag   = AUD_OPT_STR,
+        .valp  = &conf.drvname,
+        .descr = "FMOD driver"
+    },
+    {
+        .name  = "FREQ",
+        .tag   = AUD_OPT_INT,
+        .valp  = &conf.freq,
+        .descr = "Default frequency"
+    },
+    {
+        .name  = "SAMPLES",
+        .tag   = AUD_OPT_INT,
+        .valp  = &conf.nb_samples,
+        .descr = "Buffer size in samples"
+    },
+    {
+        .name  = "CHANNELS",
+        .tag   = AUD_OPT_INT,
+        .valp  = &conf.nb_channels,
+        .descr = "Number of default channels (1 - mono, 2 - stereo)"
+    },
+    {
+        .name  = "BUFSIZE",
+        .tag   = AUD_OPT_INT,
+        .valp  = &conf.bufsize,
+        .descr = "(undocumented)"
+    },
+    { /* End of list */ }
+};
+
+static struct audio_pcm_ops fmod_pcm_ops = {
+    .init_out = fmod_init_out,
+    .fini_out = fmod_fini_out,
+    .run_out  = fmod_run_out,
+    .write    = fmod_write,
+    .ctl_out  = fmod_ctl_out,
+
+    .init_in  = fmod_init_in,
+    .fini_in  = fmod_fini_in,
+    .run_in   = fmod_run_in,
+    .read     = fmod_read,
+    .ctl_in   = fmod_ctl_in
+};
+
+struct audio_driver fmod_audio_driver = {
+    .name           = "fmod",
+    .descr          = "FMOD 3.xx http://www.fmod.org",
+    .options        = fmod_options,
+    .init           = fmod_audio_init,
+    .fini           = fmod_audio_fini,
+    .pcm_ops        = &fmod_pcm_ops,
+    .can_be_default = 1,
+    .max_voices_out = INT_MAX,
+    .max_voices_in  = INT_MAX,
+    .voice_size_out = sizeof (FMODVoiceOut),
+    .voice_size_in  = sizeof (FMODVoiceIn)
+};
--- a/audio/mixeng.c
+++ b/audio/mixeng.c
@@ -22,7 +22,6 @@
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */
-#include "qemu/osdep.h"
 #include "qemu-common.h"
 #include "audio.h"

--- a/audio/noaudio.c
+++ b/audio/noaudio.c
@@ -21,7 +21,6 @@
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */
-#include "qemu/osdep.h"
 #include "qemu-common.h"
 #include "audio.h"
 #include "qemu/timer.h"
@@ -64,7 +63,7 @@ static int no_write (SWVoiceOut *sw, void *buf, int len)
    return audio_pcm_sw_write (sw, buf, len);
 }

-static int no_init_out(HWVoiceOut *hw, struct audsettings *as, void *drv_opaque)
+static int no_init_out (HWVoiceOut *hw, struct audsettings *as)
 {
    audio_pcm_init_info (&hw->info, as);
    hw->samples = 1024;
@@ -83,7 +82,7 @@ static int no_ctl_out (HWVoiceOut *hw, int cmd, ...)
    return 0;
 }

-static int no_init_in(HWVoiceIn *hw, struct audsettings *as, void *drv_opaque)
+static int no_init_in (HWVoiceIn *hw, struct audsettings *as)
 {
    audio_pcm_init_info (&hw->info, as);
    hw->samples = 1024;
--- a/audio/ossaudio.c
+++ b/audio/ossaudio.c
@@ -21,15 +21,15 @@
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */
-#include "qemu/osdep.h"
+#include <stdlib.h>
 #include <sys/mman.h>
+#include <sys/types.h>
 #include <sys/ioctl.h>
 #include <sys/soundcard.h>
 #include "qemu-common.h"
 #include "qemu/main-loop.h"
 #include "qemu/host-utils.h"
 #include "audio.h"
-#include "trace.h"

 #define AUDIO_CAP "oss"
 #include "audio_int.h"
@@ -38,16 +38,6 @@
 #define USE_DSP_POLICY
 #endif

-typedef struct OSSConf {
-    int try_mmap;
-    int nfrags;
-    int fragsize;
-    const char *devpath_out;
-    const char *devpath_in;
-    int exclusive;
-    int policy;
-} OSSConf;
-
 typedef struct OSSVoiceOut {
    HWVoiceOut hw;
    void *pcm_buf;
@@ -57,7 +47,6 @@ typedef struct OSSVoiceOut {
    int fragsize;
    int mmapped;
    int pending;
-    OSSConf *conf;
 } OSSVoiceOut;

 typedef struct OSSVoiceIn {
@@ -66,9 +55,28 @@ typedef struct OSSVoiceIn {
    int fd;
    int nfrags;
    int fragsize;
-    OSSConf *conf;
 } OSSVoiceIn;

+static struct {
+    int try_mmap;
+    int nfrags;
+    int fragsize;
+    const char *devpath_out;
+    const char *devpath_in;
+    int debug;
+    int exclusive;
+    int policy;
+} conf = {
+    .try_mmap = 0,
+    .nfrags = 4,
+    .fragsize = 4096,
+    .devpath_out = "/dev/dsp",
+    .devpath_in = "/dev/dsp",
+    .debug = 0,
+    .exclusive = 0,
+    .policy = 5
+};
+
 struct oss_params {
    int freq;
    audfmt_e fmt;
@@ -130,18 +138,18 @@ static void oss_helper_poll_in (void *opaque)
    audio_run ("oss_poll_in");
 }

-static void oss_poll_out (HWVoiceOut *hw)
+static int oss_poll_out (HWVoiceOut *hw)
 {
    OSSVoiceOut *oss = (OSSVoiceOut *) hw;

-    qemu_set_fd_handler (oss->fd, NULL, oss_helper_poll_out, NULL);
+    return qemu_set_fd_handler (oss->fd, NULL, oss_helper_poll_out, NULL);
 }

-static void oss_poll_in (HWVoiceIn *hw)
+static int oss_poll_in (HWVoiceIn *hw)
 {
    OSSVoiceIn *oss = (OSSVoiceIn *) hw;

-    qemu_set_fd_handler (oss->fd, oss_helper_poll_in, NULL, NULL);
+    return qemu_set_fd_handler (oss->fd, oss_helper_poll_in, NULL, NULL);
 }

 static int oss_write (SWVoiceOut *sw, void *buf, int len)
@@ -264,18 +272,18 @@ static int oss_get_version (int fd, int *version, const char *typ)
 #endif

 static int oss_open (int in, struct oss_params *req,
-                     struct oss_params *obt, int *pfd, OSSConf* conf)
+                     struct oss_params *obt, int *pfd)
 {
    int fd;
-    int oflags = conf->exclusive ? O_EXCL : 0;
+    int oflags = conf.exclusive ? O_EXCL : 0;
    audio_buf_info abinfo;
    int fmt, freq, nchannels;
    int setfragment = 1;
-    const char *dspname = in ? conf->devpath_in : conf->devpath_out;
+    const char *dspname = in ? conf.devpath_in : conf.devpath_out;
    const char *typ = in ? "ADC" : "DAC";

    /* Kludge needed to have working mmap on Linux */
-    oflags |= conf->try_mmap ? O_RDWR : (in ? O_RDONLY : O_WRONLY);
+    oflags |= conf.try_mmap ? O_RDWR : (in ? O_RDONLY : O_WRONLY);

    fd = open (dspname, oflags | O_NONBLOCK);
    if (-1 == fd) {
@@ -309,18 +317,20 @@ static int oss_open (int in, struct oss_params *req,
    }

 #ifdef USE_DSP_POLICY
-    if (conf->policy >= 0) {
+    if (conf.policy >= 0) {
        int version;

        if (!oss_get_version (fd, &version, typ)) {
-            trace_oss_version(version);
+            if (conf.debug) {
+                dolog ("OSS version = %#x\n", version);
+            }

            if (version >= 0x040000) {
-                int policy = conf->policy;
+                int policy = conf.policy;
                if (ioctl (fd, SNDCTL_DSP_POLICY, &policy)) {
                    oss_logerr2 (errno, typ,
                                 "Failed to set timing policy to %d\n",
-                                 conf->policy);
+                                 conf.policy);
                    goto err;
                }
                setfragment = 0;
@@ -448,12 +458,19 @@ static int oss_run_out (HWVoiceOut *hw, int live)
        }

        if (abinfo.bytes > bufsize) {
-            trace_oss_invalid_available_size(abinfo.bytes, bufsize);
+            if (conf.debug) {
+                dolog ("warning: Invalid available size, size=%d bufsize=%d\n"
+                       "please report your OS/audio hw to av1474@comtv.ru\n",
+                       abinfo.bytes, bufsize);
+            }
            abinfo.bytes = bufsize;
        }

        if (abinfo.bytes < 0) {
-            trace_oss_invalid_available_size(abinfo.bytes, bufsize);
+            if (conf.debug) {
+                dolog ("warning: Invalid available size, size=%d bufsize=%d\n",
+                       abinfo.bytes, bufsize);
+            }
            return 0;
        }

@@ -493,8 +510,7 @@ static void oss_fini_out (HWVoiceOut *hw)
    }
 }

-static int oss_init_out(HWVoiceOut *hw, struct audsettings *as,
-                        void *drv_opaque)
+static int oss_init_out (HWVoiceOut *hw, struct audsettings *as)
 {
    OSSVoiceOut *oss = (OSSVoiceOut *) hw;
    struct oss_params req, obt;
@@ -503,17 +519,16 @@ static int oss_init_out(HWVoiceOut *hw, struct audsettings *as,
    int fd;
    audfmt_e effective_fmt;
    struct audsettings obt_as;
-    OSSConf *conf = drv_opaque;

    oss->fd = -1;

    req.fmt = aud_to_ossfmt (as->fmt, as->endianness);
    req.freq = as->freq;
    req.nchannels = as->nchannels;
-    req.fragsize = conf->fragsize;
-    req.nfrags = conf->nfrags;
+    req.fragsize = conf.fragsize;
+    req.nfrags = conf.nfrags;

-    if (oss_open (0, &req, &obt, &fd, conf)) {
+    if (oss_open (0, &req, &obt, &fd)) {
        return -1;
    }

@@ -540,7 +555,7 @@ static int oss_init_out(HWVoiceOut *hw, struct audsettings *as,
    hw->samples = (obt.nfrags * obt.fragsize) >> hw->info.shift;

    oss->mmapped = 0;
-    if (conf->try_mmap) {
+    if (conf.try_mmap) {
        oss->pcm_buf = mmap (
            NULL,
            hw->samples << hw->info.shift,
@@ -600,7 +615,6 @@ static int oss_init_out(HWVoiceOut *hw, struct audsettings *as,
    }

    oss->fd = fd;
-    oss->conf = conf;
    return 0;
 }

@@ -620,8 +634,7 @@ static int oss_ctl_out (HWVoiceOut *hw, int cmd, ...)
            va_end (ap);

            ldebug ("enabling voice\n");
-            if (poll_mode) {
-                oss_poll_out (hw);
+            if (poll_mode && oss_poll_out (hw)) {
                poll_mode = 0;
            }
            hw->poll_mode = poll_mode;
@@ -663,7 +676,7 @@ static int oss_ctl_out (HWVoiceOut *hw, int cmd, ...)
    return 0;
 }

-static int oss_init_in(HWVoiceIn *hw, struct audsettings *as, void *drv_opaque)
+static int oss_init_in (HWVoiceIn *hw, struct audsettings *as)
 {
    OSSVoiceIn *oss = (OSSVoiceIn *) hw;
    struct oss_params req, obt;
@@ -672,16 +685,15 @@ static int oss_init_in(HWVoiceIn *hw, struct audsettings *as, void *drv_opaque)
    int fd;
    audfmt_e effective_fmt;
    struct audsettings obt_as;
-    OSSConf *conf = drv_opaque;

    oss->fd = -1;

    req.fmt = aud_to_ossfmt (as->fmt, as->endianness);
    req.freq = as->freq;
    req.nchannels = as->nchannels;
-    req.fragsize = conf->fragsize;
-    req.nfrags = conf->nfrags;
-    if (oss_open (1, &req, &obt, &fd, conf)) {
+    req.fragsize = conf.fragsize;
+    req.nfrags = conf.nfrags;
+    if (oss_open (1, &req, &obt, &fd)) {
        return -1;
    }

@@ -715,7 +727,6 @@ static int oss_init_in(HWVoiceIn *hw, struct audsettings *as, void *drv_opaque)
    }

    oss->fd = fd;
-    oss->conf = conf;
    return 0;
 }

@@ -725,8 +736,10 @@ static void oss_fini_in (HWVoiceIn *hw)

    oss_anal_close (&oss->fd);

-    g_free(oss->pcm_buf);
-    oss->pcm_buf = NULL;
+    if (oss->pcm_buf) {
+        g_free (oss->pcm_buf);
+        oss->pcm_buf = NULL;
+    }
 }

 static int oss_run_in (HWVoiceIn *hw)
@@ -817,8 +830,7 @@ static int oss_ctl_in (HWVoiceIn *hw, int cmd, ...)
            poll_mode = va_arg (ap, int);
            va_end (ap);

-            if (poll_mode) {
-                oss_poll_in (hw);
+            if (poll_mode && oss_poll_in (hw)) {
                poll_mode = 0;
            }
            hw->poll_mode = poll_mode;
@@ -835,79 +847,71 @@ static int oss_ctl_in (HWVoiceIn *hw, int cmd, ...)
    return 0;
 }

-static OSSConf glob_conf = {
-    .try_mmap = 0,
-    .nfrags = 4,
-    .fragsize = 4096,
-    .devpath_out = "/dev/dsp",
-    .devpath_in = "/dev/dsp",
-    .exclusive = 0,
-    .policy = 5
-};
-
 static void *oss_audio_init (void)
 {
-    OSSConf *conf = g_malloc(sizeof(OSSConf));
-    *conf = glob_conf;
-
-    if (access(conf->devpath_in, R_OK | W_OK) < 0 ||
-        access(conf->devpath_out, R_OK | W_OK) < 0) {
-        g_free(conf);
+    if (access(conf.devpath_in, R_OK | W_OK) < 0 ||
+        access(conf.devpath_out, R_OK | W_OK) < 0) {
        return NULL;
    }
-    return conf;
+    return &conf;
 }

 static void oss_audio_fini (void *opaque)
 {
-    g_free(opaque);
+    (void) opaque;
 }

 static struct audio_option oss_options[] = {
    {
        .name  = "FRAGSIZE",
        .tag   = AUD_OPT_INT,
-        .valp  = &glob_conf.fragsize,
+        .valp  = &conf.fragsize,
        .descr = "Fragment size in bytes"
    },
    {
        .name  = "NFRAGS",
        .tag   = AUD_OPT_INT,
-        .valp  = &glob_conf.nfrags,
+        .valp  = &conf.nfrags,
        .descr = "Number of fragments"
    },
    {
        .name  = "MMAP",
        .tag   = AUD_OPT_BOOL,
-        .valp  = &glob_conf.try_mmap,
+        .valp  = &conf.try_mmap,
        .descr = "Try using memory mapped access"
    },
    {
        .name  = "DAC_DEV",
        .tag   = AUD_OPT_STR,
-        .valp  = &glob_conf.devpath_out,
+        .valp  = &conf.devpath_out,
        .descr = "Path to DAC device"
    },
    {
        .name  = "ADC_DEV",
        .tag   = AUD_OPT_STR,
-        .valp  = &glob_conf.devpath_in,
+        .valp  = &conf.devpath_in,
        .descr = "Path to ADC device"
    },
    {
        .name  = "EXCLUSIVE",
        .tag   = AUD_OPT_BOOL,
-        .valp  = &glob_conf.exclusive,
+        .valp  = &conf.exclusive,
        .descr = "Open device in exclusive mode (vmix wont work)"
    },
 #ifdef USE_DSP_POLICY
    {
        .name  = "POLICY",
        .tag   = AUD_OPT_INT,
-        .valp  = &glob_conf.policy,
+        .valp  = &conf.policy,
        .descr = "Set the timing policy of the device, -1 to use fragment mode",
    },
 #endif
+    {
+        .name  = "DEBUG",
+        .tag   = AUD_OPT_BOOL,
+        .valp  = &conf.debug,
+        .descr = "Turn on some debugging messages"
+    },
    { /* End of list */ }
 };

--- a/audio/paaudio.c
+++ b/audio/paaudio.c
@@ -1,5 +1,4 @@
 /* public domain */
-#include "qemu/osdep.h"
 #include "qemu-common.h"
 #include "audio.h"

@@ -9,19 +8,6 @@
 #include "audio_int.h"
 #include "audio_pt_int.h"

-typedef struct {
-    int samples;
-    char *server;
-    char *sink;
-    char *source;
-} PAConf;
-
-typedef struct {
-    PAConf conf;
-    pa_threaded_mainloop *mainloop;
-    pa_context *context;
-} paaudio;
-
 typedef struct {
    HWVoiceOut hw;
    int done;
@@ -31,7 +17,6 @@ typedef struct {
    pa_stream *stream;
    void *pcm_buf;
    struct audio_pt pt;
-    paaudio *g;
 } PAVoiceOut;

 typedef struct {
@@ -45,10 +30,20 @@ typedef struct {
    struct audio_pt pt;
    const void *read_data;
    size_t read_index, read_length;
-    paaudio *g;
 } PAVoiceIn;

-static void qpa_audio_fini(void *opaque);
+typedef struct {
+    int samples;
+    char *server;
+    char *sink;
+    char *source;
+    pa_threaded_mainloop *mainloop;
+    pa_context *context;
+} paaudio;
+
+static paaudio glob_paaudio = {
+    .samples = 4096,
+};

 static void GCC_FMT_ATTR (2, 3) qpa_logerr (int err, const char *fmt, ...)
 {
@@ -111,7 +106,7 @@ static inline int PA_STREAM_IS_GOOD(pa_stream_state_t x)

 static int qpa_simple_read (PAVoiceIn *p, void *data, size_t length, int *rerror)
 {
-    paaudio *g = p->g;
+    paaudio *g = &glob_paaudio;

    pa_threaded_mainloop_lock (g->mainloop);

@@ -165,7 +160,7 @@ unlock_and_fail:

 static int qpa_simple_write (PAVoiceOut *p, const void *data, size_t length, int *rerror)
 {
-    paaudio *g = p->g;
+    paaudio *g = &glob_paaudio;

    pa_threaded_mainloop_lock (g->mainloop);

@@ -227,7 +222,7 @@ static void *qpa_thread_out (void *arg)
            }
        }

-        decr = to_mix = audio_MIN (pa->live, pa->g->conf.samples >> 2);
+        decr = to_mix = audio_MIN (pa->live, glob_paaudio.samples >> 2);
        rpos = pa->rpos;

        if (audio_pt_unlock (&pa->pt, AUDIO_FUNC)) {
@@ -319,7 +314,7 @@ static void *qpa_thread_in (void *arg)
            }
        }

-        incr = to_grab = audio_MIN (pa->dead, pa->g->conf.samples >> 2);
+        incr = to_grab = audio_MIN (pa->dead, glob_paaudio.samples >> 2);
        wpos = pa->wpos;

        if (audio_pt_unlock (&pa->pt, AUDIO_FUNC)) {
@@ -435,7 +430,7 @@ static audfmt_e pa_to_audfmt (pa_sample_format_t fmt, int *endianness)

 static void context_state_cb (pa_context *c, void *userdata)
 {
-    paaudio *g = userdata;
+    paaudio *g = &glob_paaudio;

    switch (pa_context_get_state(c)) {
    case PA_CONTEXT_READY:
@@ -454,7 +449,7 @@ static void context_state_cb (pa_context *c, void *userdata)

 static void stream_state_cb (pa_stream *s, void * userdata)
 {
-    paaudio *g = userdata;
+    paaudio *g = &glob_paaudio;

    switch (pa_stream_get_state (s)) {

@@ -472,21 +467,23 @@ static void stream_state_cb (pa_stream *s, void * userdata)

 static void stream_request_cb (pa_stream *s, size_t length, void *userdata)
 {
-    paaudio *g = userdata;
+    paaudio *g = &glob_paaudio;

    pa_threaded_mainloop_signal (g->mainloop, 0);
 }

 static pa_stream *qpa_simple_new (
-        paaudio *g,
+        const char *server,
        const char *name,
        pa_stream_direction_t dir,
        const char *dev,
+        const char *stream_name,
        const pa_sample_spec *ss,
        const pa_channel_map *map,
        const pa_buffer_attr *attr,
        int *rerror)
 {
+    paaudio *g = &glob_paaudio;
    int r;
    pa_stream *stream;

@@ -537,36 +534,35 @@ fail:
    return NULL;
 }

-static int qpa_init_out(HWVoiceOut *hw, struct audsettings *as,
-                        void *drv_opaque)
+static int qpa_init_out (HWVoiceOut *hw, struct audsettings *as)
 {
    int error;
-    pa_sample_spec ss;
-    pa_buffer_attr ba;
+    static pa_sample_spec ss;
+    static pa_buffer_attr ba;
    struct audsettings obt_as = *as;
    PAVoiceOut *pa = (PAVoiceOut *) hw;
-    paaudio *g = pa->g = drv_opaque;

    ss.format = audfmt_to_pa (as->fmt, as->endianness);
    ss.channels = as->nchannels;
    ss.rate = as->freq;

    /*
-     * qemu audio tick runs at 100 Hz (by default), so processing
-     * data chunks worth 10 ms of sound should be a good fit.
+     * qemu audio tick runs at 250 Hz (by default), so processing
+     * data chunks worth 4 ms of sound should be a good fit.
     */
-    ba.tlength = pa_usec_to_bytes (10 * 1000, &ss);
-    ba.minreq = pa_usec_to_bytes (5 * 1000, &ss);
+    ba.tlength = pa_usec_to_bytes (4 * 1000, &ss);
+    ba.minreq = pa_usec_to_bytes (2 * 1000, &ss);
    ba.maxlength = -1;
    ba.prebuf = -1;

    obt_as.fmt = pa_to_audfmt (ss.format, &obt_as.endianness);

    pa->stream = qpa_simple_new (
-        g,
+        glob_paaudio.server,
        "qemu",
        PA_STREAM_PLAYBACK,
-        g->conf.sink,
+        glob_paaudio.sink,
+        "pcm.playback",
        &ss,
        NULL,                   /* channel map */
        &ba,                    /* buffering attributes */
@@ -578,7 +574,7 @@ static int qpa_init_out(HWVoiceOut *hw, struct audsettings *as,
    }

    audio_pcm_init_info (&hw->info, &obt_as);
-    hw->samples = g->conf.samples;
+    hw->samples = glob_paaudio.samples;
    pa->pcm_buf = audio_calloc (AUDIO_FUNC, hw->samples, 1 << hw->info.shift);
    pa->rpos = hw->rpos;
    if (!pa->pcm_buf) {
@@ -605,13 +601,12 @@ static int qpa_init_out(HWVoiceOut *hw, struct audsettings *as,
    return -1;
 }

-static int qpa_init_in(HWVoiceIn *hw, struct audsettings *as, void *drv_opaque)
+static int qpa_init_in (HWVoiceIn *hw, struct audsettings *as)
 {
    int error;
-    pa_sample_spec ss;
+    static pa_sample_spec ss;
    struct audsettings obt_as = *as;
    PAVoiceIn *pa = (PAVoiceIn *) hw;
-    paaudio *g = pa->g = drv_opaque;

    ss.format = audfmt_to_pa (as->fmt, as->endianness);
    ss.channels = as->nchannels;
@@ -620,10 +615,11 @@ static int qpa_init_in(HWVoiceIn *hw, struct audsettings *as, void *drv_opaque)
    obt_as.fmt = pa_to_audfmt (ss.format, &obt_as.endianness);

    pa->stream = qpa_simple_new (
-        g,
+        glob_paaudio.server,
        "qemu",
        PA_STREAM_RECORD,
-        g->conf.source,
+        glob_paaudio.source,
+        "pcm.capture",
        &ss,
        NULL,                   /* channel map */
        NULL,                   /* buffering attributes */
@@ -635,7 +631,7 @@ static int qpa_init_in(HWVoiceIn *hw, struct audsettings *as, void *drv_opaque)
    }

    audio_pcm_init_info (&hw->info, &obt_as);
-    hw->samples = g->conf.samples;
+    hw->samples = glob_paaudio.samples;
    pa->pcm_buf = audio_calloc (AUDIO_FUNC, hw->samples, 1 << hw->info.shift);
    pa->wpos = hw->wpos;
    if (!pa->pcm_buf) {
@@ -707,7 +703,7 @@ static int qpa_ctl_out (HWVoiceOut *hw, int cmd, ...)
    PAVoiceOut *pa = (PAVoiceOut *) hw;
    pa_operation *op;
    pa_cvolume v;
-    paaudio *g = pa->g;
+    paaudio *g = &glob_paaudio;

 #ifdef PA_CHECK_VERSION    /* macro is present in 0.9.16+ */
    pa_cvolume_init (&v);  /* function is present in 0.9.13+ */
@@ -759,7 +755,7 @@ static int qpa_ctl_in (HWVoiceIn *hw, int cmd, ...)
    PAVoiceIn *pa = (PAVoiceIn *) hw;
    pa_operation *op;
    pa_cvolume v;
-    paaudio *g = pa->g;
+    paaudio *g = &glob_paaudio;

 #ifdef PA_CHECK_VERSION
    pa_cvolume_init (&v);
@@ -809,31 +805,23 @@ static int qpa_ctl_in (HWVoiceIn *hw, int cmd, ...)
 }

 /* common */
-static PAConf glob_conf = {
-    .samples = 4096,
-};
-
 static void *qpa_audio_init (void)
 {
-    paaudio *g = g_malloc(sizeof(paaudio));
-    g->conf = glob_conf;
-    g->mainloop = NULL;
-    g->context = NULL;
+    paaudio *g = &glob_paaudio;

    g->mainloop = pa_threaded_mainloop_new ();
    if (!g->mainloop) {
        goto fail;
    }

-    g->context = pa_context_new (pa_threaded_mainloop_get_api (g->mainloop),
-                                 g->conf.server);
+    g->context = pa_context_new (pa_threaded_mainloop_get_api (g->mainloop), glob_paaudio.server);
    if (!g->context) {
        goto fail;
    }

    pa_context_set_state_callback (g->context, context_state_cb, g);

-    if (pa_context_connect (g->context, g->conf.server, 0, NULL) < 0) {
+    if (pa_context_connect (g->context, glob_paaudio.server, 0, NULL) < 0) {
        qpa_logerr (pa_context_errno (g->context),
                    "pa_context_connect() failed\n");
        goto fail;
@@ -866,13 +854,12 @@ static void *qpa_audio_init (void)

    pa_threaded_mainloop_unlock (g->mainloop);

-    return g;
+    return &glob_paaudio;

 unlock_and_fail:
    pa_threaded_mainloop_unlock (g->mainloop);
 fail:
    AUD_log (AUDIO_CAP, "Failed to initialize PA context");
-    qpa_audio_fini(g);
    return NULL;
 }

@@ -887,38 +874,39 @@ static void qpa_audio_fini (void *opaque)
    if (g->context) {
        pa_context_disconnect (g->context);
        pa_context_unref (g->context);
+        g->context = NULL;
    }

    if (g->mainloop) {
        pa_threaded_mainloop_free (g->mainloop);
    }

-    g_free(g);
+    g->mainloop = NULL;
 }

 struct audio_option qpa_options[] = {
    {
        .name  = "SAMPLES",
        .tag   = AUD_OPT_INT,
-        .valp  = &glob_conf.samples,
+        .valp  = &glob_paaudio.samples,
        .descr = "buffer size in samples"
    },
    {
        .name  = "SERVER",
        .tag   = AUD_OPT_STR,
-        .valp  = &glob_conf.server,
+        .valp  = &glob_paaudio.server,
        .descr = "server address"
    },
    {
        .name  = "SINK",
        .tag   = AUD_OPT_STR,
-        .valp  = &glob_conf.sink,
+        .valp  = &glob_paaudio.sink,
        .descr = "sink device name"
    },
    {
        .name  = "SOURCE",
        .tag   = AUD_OPT_STR,
-        .valp  = &glob_conf.source,
+        .valp  = &glob_paaudio.source,
        .descr = "source device name"
    },
    { /* End of list */ }
--- a/audio/sdlaudio.c
+++ b/audio/sdlaudio.c
@@ -21,7 +21,6 @@
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */
-#include "qemu/osdep.h"
 #include <SDL.h>
 #include <SDL_thread.h>
 #include "qemu-common.h"
@@ -56,7 +55,6 @@ static struct SDLAudioState {
    SDL_mutex *mutex;
    SDL_sem *sem;
    int initialized;
-    bool driver_created;
 } glob_sdl;
 typedef struct SDLAudioState SDLAudioState;

@@ -334,8 +332,7 @@ static void sdl_fini_out (HWVoiceOut *hw)
    sdl_close (&glob_sdl);
 }

-static int sdl_init_out(HWVoiceOut *hw, struct audsettings *as,
-                        void *drv_opaque)
+static int sdl_init_out (HWVoiceOut *hw, struct audsettings *as)
 {
    SDLVoiceOut *sdl = (SDLVoiceOut *) hw;
    SDLAudioState *s = &glob_sdl;
@@ -395,10 +392,6 @@ static int sdl_ctl_out (HWVoiceOut *hw, int cmd, ...)
 static void *sdl_audio_init (void)
 {
    SDLAudioState *s = &glob_sdl;
-    if (s->driver_created) {
-        sdl_logerr("Can't create multiple sdl backends\n");
-        return NULL;
-    }

    if (SDL_InitSubSystem (SDL_INIT_AUDIO)) {
        sdl_logerr ("SDL failed to initialize audio subsystem\n");
@@ -420,7 +413,6 @@ static void *sdl_audio_init (void)
        return NULL;
    }

-    s->driver_created = true;
    return s;
 }

@@ -431,7 +423,6 @@ static void sdl_audio_fini (void *opaque)
    SDL_DestroySemaphore (s->sem);
    SDL_DestroyMutex (s->mutex);
    SDL_QuitSubSystem (SDL_INIT_AUDIO);
-    s->driver_created = false;
 }

 static struct audio_option sdl_options[] = {
--- a/audio/spiceaudio.c
+++ b/audio/spiceaudio.c
@@ -17,9 +17,7 @@
 * along with this program; if not, see <http://www.gnu.org/licenses/>.
 */

-#include "qemu/osdep.h"
 #include "hw/hw.h"
-#include "qemu/error-report.h"
 #include "qemu/timer.h"
 #include "ui/qemu-spice.h"

@@ -27,17 +25,8 @@
 #include "audio.h"
 #include "audio_int.h"

-#if SPICE_INTERFACE_PLAYBACK_MAJOR > 1 || SPICE_INTERFACE_PLAYBACK_MINOR >= 3
-#define LINE_OUT_SAMPLES (480 * 4)
-#else
-#define LINE_OUT_SAMPLES (256 * 4)
-#endif
-
-#if SPICE_INTERFACE_RECORD_MAJOR > 2 || SPICE_INTERFACE_RECORD_MINOR >= 3
-#define LINE_IN_SAMPLES (480 * 4)
-#else
-#define LINE_IN_SAMPLES (256 * 4)
-#endif
+#define LINE_IN_SAMPLES 1024
+#define LINE_OUT_SAMPLES 1024

 typedef struct SpiceRateCtl {
    int64_t               start_ticks;
@@ -107,7 +96,7 @@ static int rate_get_samples (struct audio_pcm_info *info, SpiceRateCtl *rate)
    bytes = muldiv64 (ticks, info->bytes_per_second, get_ticks_per_sec ());
    samples = (bytes - rate->bytes_sent) >> info->shift;
    if (samples < 0 || samples > 65536) {
-        error_report("Resetting rate control (%" PRId64 " samples)", samples);
+        fprintf (stderr, "Resetting rate control (%" PRId64 " samples)\n", samples);
        rate_start (rate);
        samples = 0;
    }
@@ -117,17 +106,12 @@ static int rate_get_samples (struct audio_pcm_info *info, SpiceRateCtl *rate)

 /* playback */

-static int line_out_init(HWVoiceOut *hw, struct audsettings *as,
-                         void *drv_opaque)
+static int line_out_init (HWVoiceOut *hw, struct audsettings *as)
 {
    SpiceVoiceOut *out = container_of (hw, SpiceVoiceOut, hw);
    struct audsettings settings;

-#if SPICE_INTERFACE_PLAYBACK_MAJOR > 1 || SPICE_INTERFACE_PLAYBACK_MINOR >= 3
-    settings.freq       = spice_server_get_best_playback_rate(NULL);
-#else
    settings.freq       = SPICE_INTERFACE_PLAYBACK_FREQ;
-#endif
    settings.nchannels  = SPICE_INTERFACE_PLAYBACK_CHAN;
    settings.fmt        = AUD_FMT_S16;
    settings.endianness = AUDIO_HOST_ENDIANNESS;
@@ -138,9 +122,6 @@ static int line_out_init(HWVoiceOut *hw, struct audsettings *as,

    out->sin.base.sif = &playback_sif.base;
    qemu_spice_add_interface (&out->sin.base);
-#if SPICE_INTERFACE_PLAYBACK_MAJOR > 1 || SPICE_INTERFACE_PLAYBACK_MINOR >= 3
-    spice_server_set_playback_rate(&out->sin, settings.freq);
-#endif
    return 0;
 }

@@ -246,16 +227,12 @@ static int line_out_ctl (HWVoiceOut *hw, int cmd, ...)

 /* record */

-static int line_in_init(HWVoiceIn *hw, struct audsettings *as, void *drv_opaque)
+static int line_in_init (HWVoiceIn *hw, struct audsettings *as)
 {
    SpiceVoiceIn *in = container_of (hw, SpiceVoiceIn, hw);
    struct audsettings settings;

-#if SPICE_INTERFACE_RECORD_MAJOR > 2 || SPICE_INTERFACE_RECORD_MINOR >= 3
-    settings.freq       = spice_server_get_best_record_rate(NULL);
-#else
    settings.freq       = SPICE_INTERFACE_RECORD_FREQ;
-#endif
    settings.nchannels  = SPICE_INTERFACE_RECORD_CHAN;
    settings.fmt        = AUD_FMT_S16;
    settings.endianness = AUDIO_HOST_ENDIANNESS;
@@ -266,9 +243,6 @@ static int line_in_init(HWVoiceIn *hw, struct audsettings *as, void *drv_opaque)

    in->sin.base.sif = &record_sif.base;
    qemu_spice_add_interface (&in->sin.base);
-#if SPICE_INTERFACE_RECORD_MAJOR > 2 || SPICE_INTERFACE_RECORD_MINOR >= 3
-    spice_server_set_record_rate(&in->sin, settings.freq);
-#endif
    return 0;
 }

--- a/audio/wavaudio.c
+++ b/audio/wavaudio.c
@@ -21,7 +21,6 @@
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */
-#include "qemu/osdep.h"
 #include "hw/hw.h"
 #include "qemu/timer.h"
 #include "audio.h"
@@ -37,10 +36,15 @@ typedef struct WAVVoiceOut {
    int total_samples;
 } WAVVoiceOut;

-typedef struct {
+static struct {
    struct audsettings settings;
    const char *wav_path;
-} WAVConf;
+} conf = {
+    .settings.freq      = 44100,
+    .settings.nchannels = 2,
+    .settings.fmt       = AUD_FMT_S16,
+    .wav_path           = "qemu.wav"
+};

 static int wav_run_out (HWVoiceOut *hw, int live)
 {
@@ -101,8 +105,7 @@ static void le_store (uint8_t *buf, uint32_t val, int len)
    }
 }

-static int wav_init_out(HWVoiceOut *hw, struct audsettings *as,
-                        void *drv_opaque)
+static int wav_init_out (HWVoiceOut *hw, struct audsettings *as)
 {
    WAVVoiceOut *wav = (WAVVoiceOut *) hw;
    int bits16 = 0, stereo = 0;
@@ -112,8 +115,9 @@ static int wav_init_out(HWVoiceOut *hw, struct audsettings *as,
        0x02, 0x00, 0x44, 0xac, 0x00, 0x00, 0x10, 0xb1, 0x02, 0x00, 0x04,
        0x00, 0x10, 0x00, 0x64, 0x61, 0x74, 0x61, 0x00, 0x00, 0x00, 0x00
    };
-    WAVConf *conf = drv_opaque;
-    struct audsettings wav_as = conf->settings;
+    struct audsettings wav_as = conf.settings;
+
+    (void) as;

    stereo = wav_as.nchannels == 2;
    switch (wav_as.fmt) {
@@ -151,10 +155,10 @@ static int wav_init_out(HWVoiceOut *hw, struct audsettings *as,
    le_store (hdr + 28, hw->info.freq << (bits16 + stereo), 4);
    le_store (hdr + 32, 1 << (bits16 + stereo), 2);

-    wav->f = fopen (conf->wav_path, "wb");
+    wav->f = fopen (conf.wav_path, "wb");
    if (!wav->f) {
        dolog ("Failed to open wave file `%s'\nReason: %s\n",
-               conf->wav_path, strerror (errno));
+               conf.wav_path, strerror (errno));
        g_free (wav->pcm_buf);
        wav->pcm_buf = NULL;
        return -1;
@@ -222,49 +226,40 @@ static int wav_ctl_out (HWVoiceOut *hw, int cmd, ...)
    return 0;
 }

-static WAVConf glob_conf = {
-    .settings.freq      = 44100,
-    .settings.nchannels = 2,
-    .settings.fmt       = AUD_FMT_S16,
-    .wav_path           = "qemu.wav"
-};
-
 static void *wav_audio_init (void)
 {
-    WAVConf *conf = g_malloc(sizeof(WAVConf));
-    *conf = glob_conf;
-    return conf;
+    return &conf;
 }

 static void wav_audio_fini (void *opaque)
 {
+    (void) opaque;
    ldebug ("wav_fini");
-    g_free(opaque);
 }

 static struct audio_option wav_options[] = {
    {
        .name  = "FREQUENCY",
        .tag   = AUD_OPT_INT,
-        .valp  = &glob_conf.settings.freq,
+        .valp  = &conf.settings.freq,
        .descr = "Frequency"
    },
    {
        .name  = "FORMAT",
        .tag   = AUD_OPT_FMT,
-        .valp  = &glob_conf.settings.fmt,
+        .valp  = &conf.settings.fmt,
        .descr = "Format"
    },
    {
        .name  = "DAC_FIXED_CHANNELS",
        .tag   = AUD_OPT_INT,
-        .valp  = &glob_conf.settings.nchannels,
+        .valp  = &conf.settings.nchannels,
        .descr = "Number of channels (1 - mono, 2 - stereo)"
    },
    {
        .name  = "PATH",
        .tag   = AUD_OPT_STR,
-        .valp  = &glob_conf.wav_path,
+        .valp  = &conf.wav_path,
        .descr = "Path to wave file"
    },
    { /* End of list */ }
--- a/audio/wavcapture.c
+++ b/audio/wavcapture.c
@@ -1,7 +1,5 @@
-#include "qemu/osdep.h"
 #include "hw/hw.h"
 #include "monitor/monitor.h"
-#include "qemu/error-report.h"
 #include "audio.h"

 typedef struct {
@@ -65,7 +63,8 @@ static void wav_destroy (void *opaque)
        }
    doclose:
        if (fclose (wav->f)) {
-            error_report("wav_destroy: fclose failed: %s", strerror(errno));
+            fprintf (stderr, "wav_destroy: fclose failed: %s",
+                     strerror (errno));
        }
    }

--- a/audio/winwaveaudio.c
+++ b/audio/winwaveaudio.c
@@ -0,0 +1,717 @@
+/* public domain */
+
+#include "qemu-common.h"
+#include "sysemu/sysemu.h"
+#include "audio.h"
+
+#define AUDIO_CAP "winwave"
+#include "audio_int.h"
+
+#include <windows.h>
+#include <mmsystem.h>
+
+#include "audio_win_int.h"
+
+static struct {
+    int dac_headers;
+    int dac_samples;
+    int adc_headers;
+    int adc_samples;
+} conf = {
+    .dac_headers = 4,
+    .dac_samples = 1024,
+    .adc_headers = 4,
+    .adc_samples = 1024
+};
+
+typedef struct {
+    HWVoiceOut hw;
+    HWAVEOUT hwo;
+    WAVEHDR *hdrs;
+    HANDLE event;
+    void *pcm_buf;
+    int avail;
+    int pending;
+    int curhdr;
+    int paused;
+    CRITICAL_SECTION crit_sect;
+} WaveVoiceOut;
+
+typedef struct {
+    HWVoiceIn hw;
+    HWAVEIN hwi;
+    WAVEHDR *hdrs;
+    HANDLE event;
+    void *pcm_buf;
+    int curhdr;
+    int paused;
+    int rpos;
+    int avail;
+    CRITICAL_SECTION crit_sect;
+} WaveVoiceIn;
+
+static void winwave_log_mmresult (MMRESULT mr)
+{
+    const char *str = "BUG";
+
+    switch (mr) {
+    case MMSYSERR_NOERROR:
+        str = "Success";
+        break;
+
+    case MMSYSERR_INVALHANDLE:
+        str = "Specified device handle is invalid";
+        break;
+
+    case MMSYSERR_BADDEVICEID:
+        str = "Specified device id is out of range";
+        break;
+
+    case MMSYSERR_NODRIVER:
+        str = "No device driver is present";
+        break;
+
+    case MMSYSERR_NOMEM:
+        str = "Unable to allocate or lock memory";
+        break;
+
+    case WAVERR_SYNC:
+        str = "Device is synchronous but waveOutOpen was called "
+            "without using the WINWAVE_ALLOWSYNC flag";
+        break;
+
+    case WAVERR_UNPREPARED:
+        str = "The data block pointed to by the pwh parameter "
+            "hasn't been prepared";
+        break;
+
+    case WAVERR_STILLPLAYING:
+        str = "There are still buffers in the queue";
+        break;
+
+    default:
+        dolog ("Reason: Unknown (MMRESULT %#x)\n", mr);
+        return;
+    }
+
+    dolog ("Reason: %s\n", str);
+}
+
+static void GCC_FMT_ATTR (2, 3) winwave_logerr (
+    MMRESULT mr,
+    const char *fmt,
+    ...
+    )
+{
+    va_list ap;
+
+    va_start (ap, fmt);
+    AUD_vlog (AUDIO_CAP, fmt, ap);
+    va_end (ap);
+
+    AUD_log (NULL, " failed\n");
+    winwave_log_mmresult (mr);
+}
+
+static void winwave_anal_close_out (WaveVoiceOut *wave)
+{
+    MMRESULT mr;
+
+    mr = waveOutClose (wave->hwo);
+    if (mr != MMSYSERR_NOERROR) {
+        winwave_logerr (mr, "waveOutClose");
+    }
+    wave->hwo = NULL;
+}
+
+static void CALLBACK winwave_callback_out (
+    HWAVEOUT hwo,
+    UINT msg,
+    DWORD_PTR dwInstance,
+    DWORD_PTR dwParam1,
+    DWORD_PTR dwParam2
+    )
+{
+    WaveVoiceOut *wave = (WaveVoiceOut *) dwInstance;
+
+    switch (msg) {
+    case WOM_DONE:
+        {
+            WAVEHDR *h = (WAVEHDR *) dwParam1;
+            if (!h->dwUser) {
+                h->dwUser = 1;
+                EnterCriticalSection (&wave->crit_sect);
+                {
+                    wave->avail += conf.dac_samples;
+                }
+                LeaveCriticalSection (&wave->crit_sect);
+                if (wave->hw.poll_mode) {
+                    if (!SetEvent (wave->event)) {
+                        dolog ("DAC SetEvent failed %lx\n", GetLastError ());
+                    }
+                }
+            }
+        }
+        break;
+
+    case WOM_CLOSE:
+    case WOM_OPEN:
+        break;
+
+    default:
+        dolog ("unknown wave out callback msg %x\n", msg);
+    }
+}
+
+static int winwave_init_out (HWVoiceOut *hw, struct audsettings *as)
+{
+    int i;
+    int err;
+    MMRESULT mr;
+    WAVEFORMATEX wfx;
+    WaveVoiceOut *wave;
+
+    wave = (WaveVoiceOut *) hw;
+
+    InitializeCriticalSection (&wave->crit_sect);
+
+    err = waveformat_from_audio_settings (&wfx, as);
+    if (err) {
+        goto err0;
+    }
+
+    mr = waveOutOpen (&wave->hwo, WAVE_MAPPER, &wfx,
+                      (DWORD_PTR) winwave_callback_out,
+                      (DWORD_PTR) wave, CALLBACK_FUNCTION);
+    if (mr != MMSYSERR_NOERROR) {
+        winwave_logerr (mr, "waveOutOpen");
+        goto err1;
+    }
+
+    wave->hdrs = audio_calloc (AUDIO_FUNC, conf.dac_headers,
+                               sizeof (*wave->hdrs));
+    if (!wave->hdrs) {
+        goto err2;
+    }
+
+    audio_pcm_init_info (&hw->info, as);
+    hw->samples = conf.dac_samples * conf.dac_headers;
+    wave->avail = hw->samples;
+
+    wave->pcm_buf = audio_calloc (AUDIO_FUNC, conf.dac_samples,
+                                  conf.dac_headers << hw->info.shift);
+    if (!wave->pcm_buf) {
+        goto err3;
+    }
+
+    for (i = 0; i < conf.dac_headers; ++i) {
+        WAVEHDR *h = &wave->hdrs[i];
+
+        h->dwUser = 0;
+        h->dwBufferLength = conf.dac_samples << hw->info.shift;
+        h->lpData = advance (wave->pcm_buf, i * h->dwBufferLength);
+        h->dwFlags = 0;
+
+        mr = waveOutPrepareHeader (wave->hwo, h, sizeof (*h));
+        if (mr != MMSYSERR_NOERROR) {
+            winwave_logerr (mr, "waveOutPrepareHeader(%d)", i);
+            goto err4;
+        }
+    }
+
+    return 0;
+
+ err4:
+    g_free (wave->pcm_buf);
+ err3:
+    g_free (wave->hdrs);
+ err2:
+    winwave_anal_close_out (wave);
+ err1:
+ err0:
+    return -1;
+}
+
+static int winwave_write (SWVoiceOut *sw, void *buf, int len)
+{
+    return audio_pcm_sw_write (sw, buf, len);
+}
+
+static int winwave_run_out (HWVoiceOut *hw, int live)
+{
+    WaveVoiceOut *wave = (WaveVoiceOut *) hw;
+    int decr;
+    int doreset;
+
+    EnterCriticalSection (&wave->crit_sect);
+    {
+        decr = audio_MIN (live, wave->avail);
+        decr = audio_pcm_hw_clip_out (hw, wave->pcm_buf, decr, wave->pending);
+        wave->pending += decr;
+        wave->avail -= decr;
+    }
+    LeaveCriticalSection (&wave->crit_sect);
+
+    doreset = hw->poll_mode && (wave->pending >= conf.dac_samples);
+    if (doreset && !ResetEvent (wave->event)) {
+        dolog ("DAC ResetEvent failed %lx\n", GetLastError ());
+    }
+
+    while (wave->pending >= conf.dac_samples) {
+        MMRESULT mr;
+        WAVEHDR *h = &wave->hdrs[wave->curhdr];
+
+        h->dwUser = 0;
+        mr = waveOutWrite (wave->hwo, h, sizeof (*h));
+        if (mr != MMSYSERR_NOERROR) {
+            winwave_logerr (mr, "waveOutWrite(%d)", wave->curhdr);
+            break;
+        }
+
+        wave->pending -= conf.dac_samples;
+        wave->curhdr = (wave->curhdr + 1) % conf.dac_headers;
+    }
+
+    return decr;
+}
+
+static void winwave_poll (void *opaque)
+{
+    (void) opaque;
+    audio_run ("winwave_poll");
+}
+
+static void winwave_fini_out (HWVoiceOut *hw)
+{
+    int i;
+    MMRESULT mr;
+    WaveVoiceOut *wave = (WaveVoiceOut *) hw;
+
+    mr = waveOutReset (wave->hwo);
+    if (mr != MMSYSERR_NOERROR) {
+        winwave_logerr (mr, "waveOutReset");
+    }
+
+    for (i = 0; i < conf.dac_headers; ++i) {
+        mr = waveOutUnprepareHeader (wave->hwo, &wave->hdrs[i],
+                                     sizeof (wave->hdrs[i]));
+        if (mr != MMSYSERR_NOERROR) {
+            winwave_logerr (mr, "waveOutUnprepareHeader(%d)", i);
+        }
+    }
+
+    winwave_anal_close_out (wave);
+
+    if (wave->event) {
+        qemu_del_wait_object (wave->event, winwave_poll, wave);
+        if (!CloseHandle (wave->event)) {
+            dolog ("DAC CloseHandle failed %lx\n", GetLastError ());
+        }
+        wave->event = NULL;
+    }
+
+    g_free (wave->pcm_buf);
+    wave->pcm_buf = NULL;
+
+    g_free (wave->hdrs);
+    wave->hdrs = NULL;
+}
+
+static int winwave_ctl_out (HWVoiceOut *hw, int cmd, ...)
+{
+    MMRESULT mr;
+    WaveVoiceOut *wave = (WaveVoiceOut *) hw;
+
+    switch (cmd) {
+    case VOICE_ENABLE:
+        {
+            va_list ap;
+            int poll_mode;
+
+            va_start (ap, cmd);
+            poll_mode = va_arg (ap, int);
+            va_end (ap);
+
+            if (poll_mode && !wave->event) {
+                wave->event = CreateEvent (NULL, TRUE, TRUE, NULL);
+                if (!wave->event) {
+                    dolog ("DAC CreateEvent: %lx, poll mode will be disabled\n",
+                           GetLastError ());
+                }
+            }
+
+            if (wave->event) {
+                int ret;
+
+                ret = qemu_add_wait_object (wave->event, winwave_poll, wave);
+                hw->poll_mode = (ret == 0);
+            }
+            else {
+                hw->poll_mode = 0;
+            }
+            wave->paused = 0;
+        }
+        return 0;
+
+    case VOICE_DISABLE:
+        if (!wave->paused) {
+            mr = waveOutReset (wave->hwo);
+            if (mr != MMSYSERR_NOERROR) {
+                winwave_logerr (mr, "waveOutReset");
+            }
+            else {
+                wave->paused = 1;
+            }
+        }
+        if (wave->event) {
+            qemu_del_wait_object (wave->event, winwave_poll, wave);
+        }
+        return 0;
+    }
+    return -1;
+}
+
+static void winwave_anal_close_in (WaveVoiceIn *wave)
+{
+    MMRESULT mr;
+
+    mr = waveInClose (wave->hwi);
+    if (mr != MMSYSERR_NOERROR) {
+        winwave_logerr (mr, "waveInClose");
+    }
+    wave->hwi = NULL;
+}
+
+static void CALLBACK winwave_callback_in (
+    HWAVEIN *hwi,
+    UINT msg,
+    DWORD_PTR dwInstance,
+    DWORD_PTR dwParam1,
+    DWORD_PTR dwParam2
+    )
+{
+    WaveVoiceIn *wave = (WaveVoiceIn *) dwInstance;
+
+    switch (msg) {
+    case WIM_DATA:
+        {
+            WAVEHDR *h = (WAVEHDR *) dwParam1;
+            if (!h->dwUser) {
+                h->dwUser = 1;
+                EnterCriticalSection (&wave->crit_sect);
+                {
+                    wave->avail += conf.adc_samples;
+                }
+                LeaveCriticalSection (&wave->crit_sect);
+                if (wave->hw.poll_mode) {
+                    if (!SetEvent (wave->event)) {
+                        dolog ("ADC SetEvent failed %lx\n", GetLastError ());
+                    }
+                }
+            }
+        }
+        break;
+
+    case WIM_CLOSE:
+    case WIM_OPEN:
+        break;
+
+    default:
+        dolog ("unknown wave in callback msg %x\n", msg);
+    }
+}
+
+static void winwave_add_buffers (WaveVoiceIn *wave, int samples)
+{
+    int doreset;
+
+    doreset = wave->hw.poll_mode && (samples >= conf.adc_samples);
+    if (doreset && !ResetEvent (wave->event)) {
+        dolog ("ADC ResetEvent failed %lx\n", GetLastError ());
+    }
+
+    while (samples >= conf.adc_samples) {
+        MMRESULT mr;
+        WAVEHDR *h = &wave->hdrs[wave->curhdr];
+
+        h->dwUser = 0;
+        mr = waveInAddBuffer (wave->hwi, h, sizeof (*h));
+        if (mr != MMSYSERR_NOERROR) {
+            winwave_logerr (mr, "waveInAddBuffer(%d)", wave->curhdr);
+        }
+        wave->curhdr = (wave->curhdr + 1) % conf.adc_headers;
+        samples -= conf.adc_samples;
+    }
+}
+
+static int winwave_init_in (HWVoiceIn *hw, struct audsettings *as)
+{
+    int i;
+    int err;
+    MMRESULT mr;
+    WAVEFORMATEX wfx;
+    WaveVoiceIn *wave;
+
+    wave = (WaveVoiceIn *) hw;
+
+    InitializeCriticalSection (&wave->crit_sect);
+
+    err = waveformat_from_audio_settings (&wfx, as);
+    if (err) {
+        goto err0;
+    }
+
+    mr = waveInOpen (&wave->hwi, WAVE_MAPPER, &wfx,
+                     (DWORD_PTR) winwave_callback_in,
+                     (DWORD_PTR) wave, CALLBACK_FUNCTION);
+    if (mr != MMSYSERR_NOERROR) {
+        winwave_logerr (mr, "waveInOpen");
+        goto err1;
+    }
+
+    wave->hdrs = audio_calloc (AUDIO_FUNC, conf.dac_headers,
+                               sizeof (*wave->hdrs));
+    if (!wave->hdrs) {
+        goto err2;
+    }
+
+    audio_pcm_init_info (&hw->info, as);
+    hw->samples = conf.adc_samples * conf.adc_headers;
+    wave->avail = 0;
+
+    wave->pcm_buf = audio_calloc (AUDIO_FUNC, conf.adc_samples,
+                                  conf.adc_headers << hw->info.shift);
+    if (!wave->pcm_buf) {
+        goto err3;
+    }
+
+    for (i = 0; i < conf.adc_headers; ++i) {
+        WAVEHDR *h = &wave->hdrs[i];
+
+        h->dwUser = 0;
+        h->dwBufferLength = conf.adc_samples << hw->info.shift;
+        h->lpData = advance (wave->pcm_buf, i * h->dwBufferLength);
+        h->dwFlags = 0;
+
+        mr = waveInPrepareHeader (wave->hwi, h, sizeof (*h));
+        if (mr != MMSYSERR_NOERROR) {
+            winwave_logerr (mr, "waveInPrepareHeader(%d)", i);
+            goto err4;
+        }
+    }
+
+    wave->paused = 1;
+    winwave_add_buffers (wave, hw->samples);
+    return 0;
+
+ err4:
+    g_free (wave->pcm_buf);
+ err3:
+    g_free (wave->hdrs);
+ err2:
+    winwave_anal_close_in (wave);
+ err1:
+ err0:
+    return -1;
+}
+
+static void winwave_fini_in (HWVoiceIn *hw)
+{
+    int i;
+    MMRESULT mr;
+    WaveVoiceIn *wave = (WaveVoiceIn *) hw;
+
+    mr = waveInReset (wave->hwi);
+    if (mr != MMSYSERR_NOERROR) {
+        winwave_logerr (mr, "waveInReset");
+    }
+
+    for (i = 0; i < conf.adc_headers; ++i) {
+        mr = waveInUnprepareHeader (wave->hwi, &wave->hdrs[i],
+                                     sizeof (wave->hdrs[i]));
+        if (mr != MMSYSERR_NOERROR) {
+            winwave_logerr (mr, "waveInUnprepareHeader(%d)", i);
+        }
+    }
+
+    winwave_anal_close_in (wave);
+
+    if (wave->event) {
+        qemu_del_wait_object (wave->event, winwave_poll, wave);
+        if (!CloseHandle (wave->event)) {
+            dolog ("ADC CloseHandle failed %lx\n", GetLastError ());
+        }
+        wave->event = NULL;
+    }
+
+    g_free (wave->pcm_buf);
+    wave->pcm_buf = NULL;
+
+    g_free (wave->hdrs);
+    wave->hdrs = NULL;
+}
+
+static int winwave_run_in (HWVoiceIn *hw)
+{
+    WaveVoiceIn *wave = (WaveVoiceIn *) hw;
+    int live = audio_pcm_hw_get_live_in (hw);
+    int dead = hw->samples - live;
+    int decr, ret;
+
+    if (!dead) {
+        return 0;
+    }
+
+    EnterCriticalSection (&wave->crit_sect);
+    {
+        decr = audio_MIN (dead, wave->avail);
+        wave->avail -= decr;
+    }
+    LeaveCriticalSection (&wave->crit_sect);
+
+    ret = decr;
+    while (decr) {
+        int left = hw->samples - hw->wpos;
+        int conv = audio_MIN (left, decr);
+        hw->conv (hw->conv_buf + hw->wpos,
+                  advance (wave->pcm_buf, wave->rpos << hw->info.shift),
+                  conv);
+
+        wave->rpos = (wave->rpos + conv) % hw->samples;
+        hw->wpos = (hw->wpos + conv) % hw->samples;
+        decr -= conv;
+    }
+
+    winwave_add_buffers (wave, ret);
+    return ret;
+}
+
+static int winwave_read (SWVoiceIn *sw, void *buf, int size)
+{
+    return audio_pcm_sw_read (sw, buf, size);
+}
+
+static int winwave_ctl_in (HWVoiceIn *hw, int cmd, ...)
+{
+    MMRESULT mr;
+    WaveVoiceIn *wave = (WaveVoiceIn *) hw;
+
+    switch (cmd) {
+    case VOICE_ENABLE:
+        {
+            va_list ap;
+            int poll_mode;
+
+            va_start (ap, cmd);
+            poll_mode = va_arg (ap, int);
+            va_end (ap);
+
+            if (poll_mode && !wave->event) {
+                wave->event = CreateEvent (NULL, TRUE, TRUE, NULL);
+                if (!wave->event) {
+                    dolog ("ADC CreateEvent: %lx, poll mode will be disabled\n",
+                           GetLastError ());
+                }
+            }
+
+            if (wave->event) {
+                int ret;
+
+                ret = qemu_add_wait_object (wave->event, winwave_poll, wave);
+                hw->poll_mode = (ret == 0);
+            }
+            else {
+                hw->poll_mode = 0;
+            }
+            if (wave->paused) {
+                mr = waveInStart (wave->hwi);
+                if (mr != MMSYSERR_NOERROR) {
+                    winwave_logerr (mr, "waveInStart");
+                }
+                wave->paused = 0;
+            }
+        }
+        return 0;
+
+    case VOICE_DISABLE:
+        if (!wave->paused) {
+            mr = waveInStop (wave->hwi);
+            if (mr != MMSYSERR_NOERROR) {
+                winwave_logerr (mr, "waveInStop");
+            }
+            else {
+                wave->paused = 1;
+            }
+        }
+        if (wave->event) {
+            qemu_del_wait_object (wave->event, winwave_poll, wave);
+        }
+        return 0;
+    }
+    return 0;
+}
+
+static void *winwave_audio_init (void)
+{
+    return &conf;
+}
+
+static void winwave_audio_fini (void *opaque)
+{
+    (void) opaque;
+}
+
+static struct audio_option winwave_options[] = {
+    {
+        .name        = "DAC_HEADERS",
+        .tag         = AUD_OPT_INT,
+        .valp        = &conf.dac_headers,
+        .descr       = "DAC number of headers",
+    },
+    {
+        .name        = "DAC_SAMPLES",
+        .tag         = AUD_OPT_INT,
+        .valp        = &conf.dac_samples,
+        .descr       = "DAC number of samples per header",
+    },
+    {
+        .name        = "ADC_HEADERS",
+        .tag         = AUD_OPT_INT,
+        .valp        = &conf.adc_headers,
+        .descr       = "ADC number of headers",
+    },
+    {
+        .name        = "ADC_SAMPLES",
+        .tag         = AUD_OPT_INT,
+        .valp        = &conf.adc_samples,
+        .descr       = "ADC number of samples per header",
+    },
+    { /* End of list */ }
+};
+
+static struct audio_pcm_ops winwave_pcm_ops = {
+    .init_out = winwave_init_out,
+    .fini_out = winwave_fini_out,
+    .run_out  = winwave_run_out,
+    .write    = winwave_write,
+    .ctl_out  = winwave_ctl_out,
+    .init_in  = winwave_init_in,
+    .fini_in  = winwave_fini_in,
+    .run_in   = winwave_run_in,
+    .read     = winwave_read,
+    .ctl_in   = winwave_ctl_in
+};
+
+struct audio_driver winwave_audio_driver = {
+    .name           = "winwave",
+    .descr          = "Windows Waveform Audio http://msdn.microsoft.com",
+    .options        = winwave_options,
+    .init           = winwave_audio_init,
+    .fini           = winwave_audio_fini,
+    .pcm_ops        = &winwave_pcm_ops,
+    .can_be_default = 1,
+    .max_voices_out = INT_MAX,
+    .max_voices_in  = INT_MAX,
+    .voice_size_out = sizeof (WaveVoiceOut),
+    .voice_size_in  = sizeof (WaveVoiceIn)
+};
--- a/backends/Makefile.objs
+++ b/backends/Makefile.objs
@@ -1,11 +1,8 @@
 common-obj-y += rng.o rng-egd.o
 common-obj-$(CONFIG_POSIX) += rng-random.o

-common-obj-y += msmouse.o testdev.o
+common-obj-y += msmouse.o
 common-obj-$(CONFIG_BRLAPI) += baum.o
-baum.o-cflags := $(SDL_CFLAGS)
+$(obj)/baum.o: QEMU_CFLAGS += $(SDL_CFLAGS) 

 common-obj-$(CONFIG_TPM) += tpm.o
-
-common-obj-y += hostmem.o hostmem-ram.o
-common-obj-$(CONFIG_LINUX) += hostmem-file.o
--- a/backends/baum.c
+++ b/backends/baum.c
@@ -21,7 +21,6 @@
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */
-#include "qemu/osdep.h"
 #include "qemu-common.h"
 #include "sysemu/char.h"
 #include "qemu/timer.h"
@@ -304,7 +303,7 @@ static int baum_eat_packet(BaumDriverState *baum, const uint8_t *buf, int len)
                return 0;
            cur++;
        }
-        DPRINTF("Dropped %td bytes!\n", cur - buf);
+        DPRINTF("Dropped %d bytes!\n", cur - buf);
    }

 #define EAT(c) do {\
@@ -562,28 +561,18 @@ static void baum_close(struct CharDriverState *chr)
    g_free(baum);
 }

-static CharDriverState *chr_baum_init(const char *id,
-                                      ChardevBackend *backend,
-                                      ChardevReturn *ret,
-                                      Error **errp)
+CharDriverState *chr_baum_init(void)
 {
-    ChardevCommon *common = backend->u.braille;
    BaumDriverState *baum;
    CharDriverState *chr;
    brlapi_handle_t *handle;
-#if defined(CONFIG_SDL)
-#if SDL_COMPILEDVERSION < SDL_VERSIONNUM(2, 0, 0)
+#ifdef CONFIG_SDL
    SDL_SysWMinfo info;
-#endif
 #endif
    int tty;

-    chr = qemu_chr_alloc(common, errp);
-    if (!chr) {
-        return NULL;
-    }
    baum = g_malloc0(sizeof(BaumDriverState));
-    baum->chr = chr;
+    baum->chr = chr = g_malloc0(sizeof(CharDriverState));

    chr->opaque = baum;
    chr->chr_write = baum_write;
@@ -595,33 +584,28 @@ static CharDriverState *chr_baum_init(const char *id,

    baum->brlapi_fd = brlapi__openConnection(handle, NULL, NULL);
    if (baum->brlapi_fd == -1) {
-        error_setg(errp, "brlapi__openConnection: %s",
-                   brlapi_strerror(brlapi_error_location()));
+        brlapi_perror("baum_init: brlapi_openConnection");
        goto fail_handle;
    }

    baum->cellCount_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, baum_cellCount_timer_cb, baum);

    if (brlapi__getDisplaySize(handle, &baum->x, &baum->y) == -1) {
-        error_setg(errp, "brlapi__getDisplaySize: %s",
-                   brlapi_strerror(brlapi_error_location()));
+        brlapi_perror("baum_init: brlapi_getDisplaySize");
        goto fail;
    }

-#if defined(CONFIG_SDL)
-#if SDL_COMPILEDVERSION < SDL_VERSIONNUM(2, 0, 0)
+#ifdef CONFIG_SDL
    memset(&info, 0, sizeof(info));
    SDL_VERSION(&info.version);
    if (SDL_GetWMInfo(&info))
        tty = info.info.x11.wmwindow;
    else
-#endif
 #endif
        tty = BRLAPI_TTY_DEFAULT;

    if (brlapi__enterTtyMode(handle, tty, NULL) == -1) {
-        error_setg(errp, "brlapi__enterTtyMode: %s",
-                   brlapi_strerror(brlapi_error_location()));
+        brlapi_perror("baum_init: brlapi_enterTtyMode");
        goto fail;
    }

@@ -641,8 +625,7 @@ fail_handle:

 static void register_types(void)
 {
-    register_char_driver("braille", CHARDEV_BACKEND_KIND_BRAILLE, NULL,
-                         chr_baum_init);
+    register_char_driver_qapi("braille", CHARDEV_BACKEND_KIND_BRAILLE, NULL);
 }

 type_init(register_types);
--- a/backends/hostmem-file.c
+++ b/backends/hostmem-file.c
@@ -1,133 +0,0 @@
-/*
- * QEMU Host Memory Backend for hugetlbfs
- *
- * Copyright (C) 2013-2014 Red Hat Inc
- *
- * Authors:
- *   Paolo Bonzini <pbonzini@redhat.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- */
-#include "qemu/osdep.h"
-#include "qemu-common.h"
-#include "sysemu/hostmem.h"
-#include "sysemu/sysemu.h"
-#include "qom/object_interfaces.h"
-
-/* hostmem-file.c */
-/**
- * @TYPE_MEMORY_BACKEND_FILE:
- * name of backend that uses mmap on a file descriptor
- */
-#define TYPE_MEMORY_BACKEND_FILE "memory-backend-file"
-
-#define MEMORY_BACKEND_FILE(obj) \
-    OBJECT_CHECK(HostMemoryBackendFile, (obj), TYPE_MEMORY_BACKEND_FILE)
-
-typedef struct HostMemoryBackendFile HostMemoryBackendFile;
-
-struct HostMemoryBackendFile {
-    HostMemoryBackend parent_obj;
-
-    bool share;
-    char *mem_path;
-};
-
-static void
-file_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
-{
-    HostMemoryBackendFile *fb = MEMORY_BACKEND_FILE(backend);
-
-    if (!backend->size) {
-        error_setg(errp, "can't create backend with size 0");
-        return;
-    }
-    if (!fb->mem_path) {
-        error_setg(errp, "mem-path property not set");
-        return;
-    }
-#ifndef CONFIG_LINUX
-    error_setg(errp, "-mem-path not supported on this host");
-#else
-    if (!memory_region_size(&backend->mr)) {
-        backend->force_prealloc = mem_prealloc;
-        memory_region_init_ram_from_file(&backend->mr, OBJECT(backend),
-                                 object_get_canonical_path(OBJECT(backend)),
-                                 backend->size, fb->share,
-                                 fb->mem_path, errp);
-    }
-#endif
-}
-
-static void
-file_backend_class_init(ObjectClass *oc, void *data)
-{
-    HostMemoryBackendClass *bc = MEMORY_BACKEND_CLASS(oc);
-
-    bc->alloc = file_backend_memory_alloc;
-}
-
-static char *get_mem_path(Object *o, Error **errp)
-{
-    HostMemoryBackendFile *fb = MEMORY_BACKEND_FILE(o);
-
-    return g_strdup(fb->mem_path);
-}
-
-static void set_mem_path(Object *o, const char *str, Error **errp)
-{
-    HostMemoryBackend *backend = MEMORY_BACKEND(o);
-    HostMemoryBackendFile *fb = MEMORY_BACKEND_FILE(o);
-
-    if (memory_region_size(&backend->mr)) {
-        error_setg(errp, "cannot change property value");
-        return;
-    }
-    g_free(fb->mem_path);
-    fb->mem_path = g_strdup(str);
-}
-
-static bool file_memory_backend_get_share(Object *o, Error **errp)
-{
-    HostMemoryBackendFile *fb = MEMORY_BACKEND_FILE(o);
-
-    return fb->share;
-}
-
-static void file_memory_backend_set_share(Object *o, bool value, Error **errp)
-{
-    HostMemoryBackend *backend = MEMORY_BACKEND(o);
-    HostMemoryBackendFile *fb = MEMORY_BACKEND_FILE(o);
-
-    if (memory_region_size(&backend->mr)) {
-        error_setg(errp, "cannot change property value");
-        return;
-    }
-    fb->share = value;
-}
-
-static void
-file_backend_instance_init(Object *o)
-{
-    object_property_add_bool(o, "share",
-                        file_memory_backend_get_share,
-                        file_memory_backend_set_share, NULL);
-    object_property_add_str(o, "mem-path", get_mem_path,
-                            set_mem_path, NULL);
-}
-
-static const TypeInfo file_backend_info = {
-    .name = TYPE_MEMORY_BACKEND_FILE,
-    .parent = TYPE_MEMORY_BACKEND,
-    .class_init = file_backend_class_init,
-    .instance_init = file_backend_instance_init,
-    .instance_size = sizeof(HostMemoryBackendFile),
-};
-
-static void register_types(void)
-{
-    type_register_static(&file_backend_info);
-}
-
-type_init(register_types);
--- a/backends/hostmem-ram.c
+++ b/backends/hostmem-ram.c
@@ -1,54 +0,0 @@
-/*
- * QEMU Host Memory Backend
- *
- * Copyright (C) 2013-2014 Red Hat Inc
- *
- * Authors:
- *   Igor Mammedov <imammedo@redhat.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- */
-#include "qemu/osdep.h"
-#include "sysemu/hostmem.h"
-#include "qom/object_interfaces.h"
-
-#define TYPE_MEMORY_BACKEND_RAM "memory-backend-ram"
-
-
-static void
-ram_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
-{
-    char *path;
-
-    if (!backend->size) {
-        error_setg(errp, "can't create backend with size 0");
-        return;
-    }
-
-    path = object_get_canonical_path_component(OBJECT(backend));
-    memory_region_init_ram(&backend->mr, OBJECT(backend), path,
-                           backend->size, errp);
-    g_free(path);
-}
-
-static void
-ram_backend_class_init(ObjectClass *oc, void *data)
-{
-    HostMemoryBackendClass *bc = MEMORY_BACKEND_CLASS(oc);
-
-    bc->alloc = ram_backend_memory_alloc;
-}
-
-static const TypeInfo ram_backend_info = {
-    .name = TYPE_MEMORY_BACKEND_RAM,
-    .parent = TYPE_MEMORY_BACKEND,
-    .class_init = ram_backend_class_init,
-};
-
-static void register_types(void)
-{
-    type_register_static(&ram_backend_info);
-}
-
-type_init(register_types);
--- a/backends/hostmem.c
+++ b/backends/hostmem.c
@@ -1,375 +0,0 @@
-/*
- * QEMU Host Memory Backend
- *
- * Copyright (C) 2013-2014 Red Hat Inc
- *
- * Authors:
- *   Igor Mammedov <imammedo@redhat.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- */
-#include "qemu/osdep.h"
-#include "sysemu/hostmem.h"
-#include "hw/boards.h"
-#include "qapi/visitor.h"
-#include "qapi-types.h"
-#include "qapi-visit.h"
-#include "qemu/config-file.h"
-#include "qom/object_interfaces.h"
-
-#ifdef CONFIG_NUMA
-#include <numaif.h>
-QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_DEFAULT != MPOL_DEFAULT);
-QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_PREFERRED != MPOL_PREFERRED);
-QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_BIND != MPOL_BIND);
-QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_INTERLEAVE != MPOL_INTERLEAVE);
-#endif
-
-static void
-host_memory_backend_get_size(Object *obj, Visitor *v, const char *name,
-                             void *opaque, Error **errp)
-{
-    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
-    uint64_t value = backend->size;
-
-    visit_type_size(v, name, &value, errp);
-}
-
-static void
-host_memory_backend_set_size(Object *obj, Visitor *v, const char *name,
-                             void *opaque, Error **errp)
-{
-    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
-    Error *local_err = NULL;
-    uint64_t value;
-
-    if (memory_region_size(&backend->mr)) {
-        error_setg(&local_err, "cannot change property value");
-        goto out;
-    }
-
-    visit_type_size(v, name, &value, &local_err);
-    if (local_err) {
-        goto out;
-    }
-    if (!value) {
-        error_setg(&local_err, "Property '%s.%s' doesn't take value '%"
-                   PRIu64 "'", object_get_typename(obj), name, value);
-        goto out;
-    }
-    backend->size = value;
-out:
-    error_propagate(errp, local_err);
-}
-
-static void
-host_memory_backend_get_host_nodes(Object *obj, Visitor *v, const char *name,
-                                   void *opaque, Error **errp)
-{
-    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
-    uint16List *host_nodes = NULL;
-    uint16List **node = &host_nodes;
-    unsigned long value;
-
-    value = find_first_bit(backend->host_nodes, MAX_NODES);
-    if (value == MAX_NODES) {
-        return;
-    }
-
-    *node = g_malloc0(sizeof(**node));
-    (*node)->value = value;
-    node = &(*node)->next;
-
-    do {
-        value = find_next_bit(backend->host_nodes, MAX_NODES, value + 1);
-        if (value == MAX_NODES) {
-            break;
-        }
-
-        *node = g_malloc0(sizeof(**node));
-        (*node)->value = value;
-        node = &(*node)->next;
-    } while (true);
-
-    visit_type_uint16List(v, name, &host_nodes, errp);
-}
-
-static void
-host_memory_backend_set_host_nodes(Object *obj, Visitor *v, const char *name,
-                                   void *opaque, Error **errp)
-{
-#ifdef CONFIG_NUMA
-    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
-    uint16List *l = NULL;
-
-    visit_type_uint16List(v, name, &l, errp);
-
-    while (l) {
-        bitmap_set(backend->host_nodes, l->value, 1);
-        l = l->next;
-    }
-#else
-    error_setg(errp, "NUMA node binding are not supported by this QEMU");
-#endif
-}
-
-static int
-host_memory_backend_get_policy(Object *obj, Error **errp G_GNUC_UNUSED)
-{
-    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
-    return backend->policy;
-}
-
-static void
-host_memory_backend_set_policy(Object *obj, int policy, Error **errp)
-{
-    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
-    backend->policy = policy;
-
-#ifndef CONFIG_NUMA
-    if (policy != HOST_MEM_POLICY_DEFAULT) {
-        error_setg(errp, "NUMA policies are not supported by this QEMU");
-    }
-#endif
-}
-
-static bool host_memory_backend_get_merge(Object *obj, Error **errp)
-{
-    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
-
-    return backend->merge;
-}
-
-static void host_memory_backend_set_merge(Object *obj, bool value, Error **errp)
-{
-    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
-
-    if (!memory_region_size(&backend->mr)) {
-        backend->merge = value;
-        return;
-    }
-
-    if (value != backend->merge) {
-        void *ptr = memory_region_get_ram_ptr(&backend->mr);
-        uint64_t sz = memory_region_size(&backend->mr);
-
-        qemu_madvise(ptr, sz,
-                     value ? QEMU_MADV_MERGEABLE : QEMU_MADV_UNMERGEABLE);
-        backend->merge = value;
-    }
-}
-
-static bool host_memory_backend_get_dump(Object *obj, Error **errp)
-{
-    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
-
-    return backend->dump;
-}
-
-static void host_memory_backend_set_dump(Object *obj, bool value, Error **errp)
-{
-    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
-
-    if (!memory_region_size(&backend->mr)) {
-        backend->dump = value;
-        return;
-    }
-
-    if (value != backend->dump) {
-        void *ptr = memory_region_get_ram_ptr(&backend->mr);
-        uint64_t sz = memory_region_size(&backend->mr);
-
-        qemu_madvise(ptr, sz,
-                     value ? QEMU_MADV_DODUMP : QEMU_MADV_DONTDUMP);
-        backend->dump = value;
-    }
-}
-
-static bool host_memory_backend_get_prealloc(Object *obj, Error **errp)
-{
-    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
-
-    return backend->prealloc || backend->force_prealloc;
-}
-
-static void host_memory_backend_set_prealloc(Object *obj, bool value,
-                                             Error **errp)
-{
-    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
-
-    if (backend->force_prealloc) {
-        if (value) {
-            error_setg(errp,
-                       "remove -mem-prealloc to use the prealloc property");
-            return;
-        }
-    }
-
-    if (!memory_region_size(&backend->mr)) {
-        backend->prealloc = value;
-        return;
-    }
-
-    if (value && !backend->prealloc) {
-        int fd = memory_region_get_fd(&backend->mr);
-        void *ptr = memory_region_get_ram_ptr(&backend->mr);
-        uint64_t sz = memory_region_size(&backend->mr);
-
-        os_mem_prealloc(fd, ptr, sz);
-        backend->prealloc = true;
-    }
-}
-
-static void host_memory_backend_init(Object *obj)
-{
-    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
-    MachineState *machine = MACHINE(qdev_get_machine());
-
-    backend->merge = machine_mem_merge(machine);
-    backend->dump = machine_dump_guest_core(machine);
-    backend->prealloc = mem_prealloc;
-
-    object_property_add_bool(obj, "merge",
-                        host_memory_backend_get_merge,
-                        host_memory_backend_set_merge, NULL);
-    object_property_add_bool(obj, "dump",
-                        host_memory_backend_get_dump,
-                        host_memory_backend_set_dump, NULL);
-    object_property_add_bool(obj, "prealloc",
-                        host_memory_backend_get_prealloc,
-                        host_memory_backend_set_prealloc, NULL);
-    object_property_add(obj, "size", "int",
-                        host_memory_backend_get_size,
-                        host_memory_backend_set_size, NULL, NULL, NULL);
-    object_property_add(obj, "host-nodes", "int",
-                        host_memory_backend_get_host_nodes,
-                        host_memory_backend_set_host_nodes, NULL, NULL, NULL);
-    object_property_add_enum(obj, "policy", "HostMemPolicy",
-                             HostMemPolicy_lookup,
-                             host_memory_backend_get_policy,
-                             host_memory_backend_set_policy, NULL);
-}
-
-MemoryRegion *
-host_memory_backend_get_memory(HostMemoryBackend *backend, Error **errp)
-{
-    return memory_region_size(&backend->mr) ? &backend->mr : NULL;
-}
-
-static void
-host_memory_backend_memory_complete(UserCreatable *uc, Error **errp)
-{
-    HostMemoryBackend *backend = MEMORY_BACKEND(uc);
-    HostMemoryBackendClass *bc = MEMORY_BACKEND_GET_CLASS(uc);
-    Error *local_err = NULL;
-    void *ptr;
-    uint64_t sz;
-
-    if (bc->alloc) {
-        bc->alloc(backend, &local_err);
-        if (local_err) {
-            error_propagate(errp, local_err);
-            return;
-        }
-
-        ptr = memory_region_get_ram_ptr(&backend->mr);
-        sz = memory_region_size(&backend->mr);
-
-        if (backend->merge) {
-            qemu_madvise(ptr, sz, QEMU_MADV_MERGEABLE);
-        }
-        if (!backend->dump) {
-            qemu_madvise(ptr, sz, QEMU_MADV_DONTDUMP);
-        }
-#ifdef CONFIG_NUMA
-        unsigned long lastbit = find_last_bit(backend->host_nodes, MAX_NODES);
-        /* lastbit == MAX_NODES means maxnode = 0 */
-        unsigned long maxnode = (lastbit + 1) % (MAX_NODES + 1);
-        /* ensure policy won't be ignored in case memory is preallocated
-         * before mbind(). note: MPOL_MF_STRICT is ignored on hugepages so
-         * this doesn't catch hugepage case. */
-        unsigned flags = MPOL_MF_STRICT | MPOL_MF_MOVE;
-
-        /* check for invalid host-nodes and policies and give more verbose
-         * error messages than mbind(). */
-        if (maxnode && backend->policy == MPOL_DEFAULT) {
-            error_setg(errp, "host-nodes must be empty for policy default,"
-                       " or you should explicitly specify a policy other"
-                       " than default");
-            return;
-        } else if (maxnode == 0 && backend->policy != MPOL_DEFAULT) {
-            error_setg(errp, "host-nodes must be set for policy %s",
-                       HostMemPolicy_lookup[backend->policy]);
-            return;
-        }
-
-        /* We can have up to MAX_NODES nodes, but we need to pass maxnode+1
-         * as argument to mbind() due to an old Linux bug (feature?) which
-         * cuts off the last specified node. This means backend->host_nodes
-         * must have MAX_NODES+1 bits available.
-         */
-        assert(sizeof(backend->host_nodes) >=
-               BITS_TO_LONGS(MAX_NODES + 1) * sizeof(unsigned long));
-        assert(maxnode <= MAX_NODES);
-        if (mbind(ptr, sz, backend->policy,
-                  maxnode ? backend->host_nodes : NULL, maxnode + 1, flags)) {
-            if (backend->policy != MPOL_DEFAULT || errno != ENOSYS) {
-                error_setg_errno(errp, errno,
-                                 "cannot bind memory to host NUMA nodes");
-                return;
-            }
-        }
-#endif
-        /* Preallocate memory after the NUMA policy has been instantiated.
-         * This is necessary to guarantee memory is allocated with
-         * specified NUMA policy in place.
-         */
-        if (backend->prealloc) {
-            os_mem_prealloc(memory_region_get_fd(&backend->mr), ptr, sz);
-        }
-    }
-}
-
-static bool
-host_memory_backend_can_be_deleted(UserCreatable *uc, Error **errp)
-{
-    MemoryRegion *mr;
-
-    mr = host_memory_backend_get_memory(MEMORY_BACKEND(uc), errp);
-    if (memory_region_is_mapped(mr)) {
-        return false;
-    } else {
-        return true;
-    }
-}
-
-static void
-host_memory_backend_class_init(ObjectClass *oc, void *data)
-{
-    UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc);
-
-    ucc->complete = host_memory_backend_memory_complete;
-    ucc->can_be_deleted = host_memory_backend_can_be_deleted;
-}
-
-static const TypeInfo host_memory_backend_info = {
-    .name = TYPE_MEMORY_BACKEND,
-    .parent = TYPE_OBJECT,
-    .abstract = true,
-    .class_size = sizeof(HostMemoryBackendClass),
-    .class_init = host_memory_backend_class_init,
-    .instance_size = sizeof(HostMemoryBackend),
-    .instance_init = host_memory_backend_init,
-    .interfaces = (InterfaceInfo[]) {
-        { TYPE_USER_CREATABLE },
-        { }
-    }
-};
-
-static void register_types(void)
-{
-    type_register_static(&host_memory_backend_info);
-}
-
-type_init(register_types);
--- a/backends/msmouse.c
+++ b/backends/msmouse.c
@@ -21,7 +21,7 @@
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */
-#include "qemu/osdep.h"
+#include <stdlib.h>
 #include "qemu-common.h"
 #include "sysemu/char.h"
 #include "ui/console.h"
@@ -63,18 +63,11 @@ static void msmouse_chr_close (struct CharDriverState *chr)
    g_free (chr);
 }

-static CharDriverState *qemu_chr_open_msmouse(const char *id,
-                                              ChardevBackend *backend,
-                                              ChardevReturn *ret,
-                                              Error **errp)
+CharDriverState *qemu_chr_open_msmouse(void)
 {
-    ChardevCommon *common = backend->u.msmouse;
    CharDriverState *chr;

-    chr = qemu_chr_alloc(common, errp);
-    if (!chr) {
-        return NULL;
-    }
+    chr = g_malloc0(sizeof(CharDriverState));
    chr->chr_write = msmouse_chr_write;
    chr->chr_close = msmouse_chr_close;
    chr->explicit_be_open = true;
@@ -86,8 +79,7 @@ static CharDriverState *qemu_chr_open_msmouse(const char *id,

 static void register_types(void)
 {
-    register_char_driver("msmouse", CHARDEV_BACKEND_KIND_MSMOUSE, NULL,
-                         qemu_chr_open_msmouse);
+    register_char_driver_qapi("msmouse", CHARDEV_BACKEND_KIND_MSMOUSE, NULL);
 }

 type_init(register_types);
--- a/backends/rng-egd.c
+++ b/backends/rng-egd.c
@@ -10,7 +10,6 @@
 * See the COPYING file in the top-level directory.
 */

-#include "qemu/osdep.h"
 #include "sysemu/rng.h"
 #include "sysemu/char.h"
 #include "qapi/qmp/qerror.h"
@@ -25,12 +24,33 @@ typedef struct RngEgd

    CharDriverState *chr;
    char *chr_name;
+
+    GSList *requests;
 } RngEgd;

-static void rng_egd_request_entropy(RngBackend *b, RngRequest *req)
+typedef struct RngRequest
+{
+    EntropyReceiveFunc *receive_entropy;
+    uint8_t *data;
+    void *opaque;
+    size_t offset;
+    size_t size;
+} RngRequest;
+
+static void rng_egd_request_entropy(RngBackend *b, size_t size,
+                                    EntropyReceiveFunc *receive_entropy,
+                                    void *opaque)
 {
    RngEgd *s = RNG_EGD(b);
-    size_t size = req->size;
+    RngRequest *req;
+
+    req = g_malloc(sizeof(*req));
+
+    req->offset = 0;
+    req->size = size;
+    req->receive_entropy = receive_entropy;
+    req->opaque = opaque;
+    req->data = g_malloc(req->size);

    while (size > 0) {
        uint8_t header[2];
@@ -44,15 +64,24 @@ static void rng_egd_request_entropy(RngBackend *b, RngRequest *req)

        size -= len;
    }
+
+    s->requests = g_slist_append(s->requests, req);
+}
+
+static void rng_egd_free_request(RngRequest *req)
+{
+    g_free(req->data);
+    g_free(req);
 }

 static int rng_egd_chr_can_read(void *opaque)
 {
    RngEgd *s = RNG_EGD(opaque);
-    RngRequest *req;
+    GSList *i;
    int size = 0;

-    QSIMPLEQ_FOREACH(req, &s->parent.requests, next) {
+    for (i = s->requests; i; i = i->next) {
+        RngRequest *req = i->data;
        size += req->size - req->offset;
    }

@@ -64,8 +93,8 @@ static void rng_egd_chr_read(void *opaque, const uint8_t *buf, int size)
    RngEgd *s = RNG_EGD(opaque);
    size_t buf_offset = 0;

-    while (size > 0 && !QSIMPLEQ_EMPTY(&s->parent.requests)) {
-        RngRequest *req = QSIMPLEQ_FIRST(&s->parent.requests);
+    while (size > 0 && s->requests) {
+        RngRequest *req = s->requests->data;
        int len = MIN(size, req->size - req->offset);

        memcpy(req->data + req->offset, buf + buf_offset, len);
@@ -74,32 +103,56 @@ static void rng_egd_chr_read(void *opaque, const uint8_t *buf, int size)
        size -= len;

        if (req->offset == req->size) {
+            s->requests = g_slist_remove_link(s->requests, s->requests);
+
            req->receive_entropy(req->opaque, req->data, req->size);

-            rng_backend_finalize_request(&s->parent, req);
+            rng_egd_free_request(req);
        }
    }
 }

+static void rng_egd_free_requests(RngEgd *s)
+{
+    GSList *i;
+
+    for (i = s->requests; i; i = i->next) {
+        rng_egd_free_request(i->data);
+    }
+
+    g_slist_free(s->requests);
+    s->requests = NULL;
+}
+
+static void rng_egd_cancel_requests(RngBackend *b)
+{
+    RngEgd *s = RNG_EGD(b);
+
+    /* We simply delete the list of pending requests.  If there is data in the 
+     * queue waiting to be read, this is okay, because there will always be
+     * more data than we requested originally
+     */
+    rng_egd_free_requests(s);
+}
+
 static void rng_egd_opened(RngBackend *b, Error **errp)
 {
    RngEgd *s = RNG_EGD(b);

    if (s->chr_name == NULL) {
-        error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
-                   "chardev", "a valid character device");
+        error_set(errp, QERR_INVALID_PARAMETER_VALUE,
+                  "chardev", "a valid character device");
        return;
    }

    s->chr = qemu_chr_find(s->chr_name);
    if (s->chr == NULL) {
-        error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
-                  "Device '%s' not found", s->chr_name);
+        error_set(errp, QERR_DEVICE_NOT_FOUND, s->chr_name);
        return;
    }

    if (qemu_chr_fe_claim(s->chr) != 0) {
-        error_setg(errp, QERR_DEVICE_IN_USE, s->chr_name);
+        error_set(errp, QERR_DEVICE_IN_USE, s->chr_name);
        return;
    }

@@ -114,9 +167,8 @@ static void rng_egd_set_chardev(Object *obj, const char *value, Error **errp)
    RngEgd *s = RNG_EGD(b);

    if (b->opened) {
-        error_setg(errp, QERR_PERMISSION_DENIED);
+        error_set(errp, QERR_PERMISSION_DENIED);
    } else {
-        g_free(s->chr_name);
        s->chr_name = g_strdup(value);
    }
 }
@@ -149,6 +201,8 @@ static void rng_egd_finalize(Object *obj)
    }

    g_free(s->chr_name);
+
+    rng_egd_free_requests(s);
 }

 static void rng_egd_class_init(ObjectClass *klass, void *data)
@@ -156,6 +210,7 @@ static void rng_egd_class_init(ObjectClass *klass, void *data)
    RngBackendClass *rbc = RNG_BACKEND_CLASS(klass);

    rbc->request_entropy = rng_egd_request_entropy;
+    rbc->cancel_requests = rng_egd_cancel_requests;
    rbc->opened = rng_egd_opened;
 }

--- a/backends/rng-random.c
+++ b/backends/rng-random.c
@@ -10,7 +10,6 @@
 * See the COPYING file in the top-level directory.
 */

-#include "qemu/osdep.h"
 #include "sysemu/rng-random.h"
 #include "sysemu/rng.h"
 #include "qapi/qmp/qerror.h"
@@ -22,6 +21,10 @@ struct RndRandom

    int fd;
    char *filename;
+
+    EntropyReceiveFunc *receive_func;
+    void *opaque;
+    size_t size;
 };

 /**
@@ -34,35 +37,36 @@ struct RndRandom
 static void entropy_available(void *opaque)
 {
    RndRandom *s = RNG_RANDOM(opaque);
+    uint8_t buffer[s->size];
+    ssize_t len;

-    while (!QSIMPLEQ_EMPTY(&s->parent.requests)) {
-        RngRequest *req = QSIMPLEQ_FIRST(&s->parent.requests);
-        ssize_t len;
-
-        len = read(s->fd, req->data, req->size);
-        if (len < 0 && errno == EAGAIN) {
-            return;
-        }
-        g_assert(len != -1);
-
-        req->receive_entropy(req->opaque, req->data, len);
-
-        rng_backend_finalize_request(&s->parent, req);
+    len = read(s->fd, buffer, s->size);
+    if (len < 0 && errno == EAGAIN) {
+        return;
    }
+    g_assert(len != -1);
+
+    s->receive_func(s->opaque, buffer, len);
+    s->receive_func = NULL;

-    /* We've drained all requests, the fd handler can be reset. */
    qemu_set_fd_handler(s->fd, NULL, NULL, NULL);
 }

-static void rng_random_request_entropy(RngBackend *b, RngRequest *req)
+static void rng_random_request_entropy(RngBackend *b, size_t size,
+                                        EntropyReceiveFunc *receive_entropy,
+                                        void *opaque)
 {
    RndRandom *s = RNG_RANDOM(b);

-    if (QSIMPLEQ_EMPTY(&s->parent.requests)) {
-        /* If there are no pending requests yet, we need to
-         * install our fd handler. */
-        qemu_set_fd_handler(s->fd, entropy_available, NULL, s);
+    if (s->receive_func) {
+        s->receive_func(s->opaque, NULL, 0);
    }
+
+    s->receive_func = receive_entropy;
+    s->opaque = opaque;
+    s->size = size;
+
+    qemu_set_fd_handler(s->fd, entropy_available, NULL, s);
 }

 static void rng_random_opened(RngBackend *b, Error **errp)
@@ -70,8 +74,8 @@ static void rng_random_opened(RngBackend *b, Error **errp)
    RndRandom *s = RNG_RANDOM(b);

    if (s->filename == NULL) {
-        error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
-                   "filename", "a valid filename");
+        error_set(errp, QERR_INVALID_PARAMETER_VALUE,
+                  "filename", "a valid filename");
    } else {
        s->fd = qemu_open(s->filename, O_RDONLY | O_NONBLOCK);
        if (s->fd == -1) {
@@ -84,7 +88,11 @@ static char *rng_random_get_filename(Object *obj, Error **errp)
 {
    RndRandom *s = RNG_RANDOM(obj);

-    return g_strdup(s->filename);
+    if (s->filename) {
+        return g_strdup(s->filename);
+    }
+
+    return NULL;
 }

 static void rng_random_set_filename(Object *obj, const char *filename,
@@ -94,11 +102,14 @@ static void rng_random_set_filename(Object *obj, const char *filename,
    RndRandom *s = RNG_RANDOM(obj);

    if (b->opened) {
-        error_setg(errp, QERR_PERMISSION_DENIED);
+        error_set(errp, QERR_PERMISSION_DENIED);
        return;
    }

-    g_free(s->filename);
+    if (s->filename) {
+        g_free(s->filename);
+    }
+
    s->filename = g_strdup(filename);
 }

@@ -112,15 +123,15 @@ static void rng_random_init(Object *obj)
                            NULL);

    s->filename = g_strdup("/dev/random");
-    s->fd = -1;
 }

 static void rng_random_finalize(Object *obj)
 {
    RndRandom *s = RNG_RANDOM(obj);

+    qemu_set_fd_handler(s->fd, NULL, NULL, NULL);
+
    if (s->fd != -1) {
-        qemu_set_fd_handler(s->fd, NULL, NULL, NULL);
        qemu_close(s->fd);
    }

--- a/backends/rng.c
+++ b/backends/rng.c
@@ -10,30 +10,26 @@
 * See the COPYING file in the top-level directory.
 */

-#include "qemu/osdep.h"
 #include "sysemu/rng.h"
 #include "qapi/qmp/qerror.h"
-#include "qom/object_interfaces.h"

 void rng_backend_request_entropy(RngBackend *s, size_t size,
                                 EntropyReceiveFunc *receive_entropy,
                                 void *opaque)
 {
    RngBackendClass *k = RNG_BACKEND_GET_CLASS(s);
-    RngRequest *req;

    if (k->request_entropy) {
-        req = g_malloc(sizeof(*req));
+        k->request_entropy(s, size, receive_entropy, opaque);
+    }
+}

-        req->offset = 0;
-        req->size = size;
-        req->receive_entropy = receive_entropy;
-        req->opaque = opaque;
-        req->data = g_malloc(req->size);
+void rng_backend_cancel_requests(RngBackend *s)
+{
+    RngBackendClass *k = RNG_BACKEND_GET_CLASS(s);

-        k->request_entropy(s, req);
-
-        QSIMPLEQ_INSERT_TAIL(&s->requests, req, next);
+    if (k->cancel_requests) {
+        k->cancel_requests(s);
    }
 }

@@ -44,99 +40,49 @@ static bool rng_backend_prop_get_opened(Object *obj, Error **errp)
    return s->opened;
 }

-static void rng_backend_complete(UserCreatable *uc, Error **errp)
+void rng_backend_open(RngBackend *s, Error **errp)
 {
-    object_property_set_bool(OBJECT(uc), true, "opened", errp);
+    object_property_set_bool(OBJECT(s), true, "opened", errp);
 }

 static void rng_backend_prop_set_opened(Object *obj, bool value, Error **errp)
 {
    RngBackend *s = RNG_BACKEND(obj);
    RngBackendClass *k = RNG_BACKEND_GET_CLASS(s);
-    Error *local_err = NULL;

    if (value == s->opened) {
        return;
    }

    if (!value && s->opened) {
-        error_setg(errp, QERR_PERMISSION_DENIED);
+        error_set(errp, QERR_PERMISSION_DENIED);
        return;
    }

    if (k->opened) {
-        k->opened(s, &local_err);
-        if (local_err) {
-            error_propagate(errp, local_err);
-            return;
-        }
+        k->opened(s, errp);
    }

-    s->opened = true;
-}
-
-static void rng_backend_free_request(RngRequest *req)
-{
-    g_free(req->data);
-    g_free(req);
-}
-
-static void rng_backend_free_requests(RngBackend *s)
-{
-    RngRequest *req, *next;
-
-    QSIMPLEQ_FOREACH_SAFE(req, &s->requests, next, next) {
-        rng_backend_free_request(req);
+    if (!error_is_set(errp)) {
+        s->opened = value;
    }
-
-    QSIMPLEQ_INIT(&s->requests);
-}
-
-void rng_backend_finalize_request(RngBackend *s, RngRequest *req)
-{
-    QSIMPLEQ_REMOVE(&s->requests, req, RngRequest, next);
-    rng_backend_free_request(req);
 }

 static void rng_backend_init(Object *obj)
 {
-    RngBackend *s = RNG_BACKEND(obj);
-
-    QSIMPLEQ_INIT(&s->requests);
-
    object_property_add_bool(obj, "opened",
                             rng_backend_prop_get_opened,
                             rng_backend_prop_set_opened,
                             NULL);
 }

-static void rng_backend_finalize(Object *obj)
-{
-    RngBackend *s = RNG_BACKEND(obj);
-
-    rng_backend_free_requests(s);
-}
-
-static void rng_backend_class_init(ObjectClass *oc, void *data)
-{
-    UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc);
-
-    ucc->complete = rng_backend_complete;
-}
-
 static const TypeInfo rng_backend_info = {
    .name = TYPE_RNG_BACKEND,
    .parent = TYPE_OBJECT,
    .instance_size = sizeof(RngBackend),
    .instance_init = rng_backend_init,
-    .instance_finalize = rng_backend_finalize,
    .class_size = sizeof(RngBackendClass),
-    .class_init = rng_backend_class_init,
    .abstract = true,
-    .interfaces = (InterfaceInfo[]) {
-        { TYPE_USER_CREATABLE },
-        { }
-    }
 };

 static void register_types(void)
--- a/backends/testdev.c
+++ b/backends/testdev.c
@@ -1,136 +0,0 @@
-/*
- * QEMU Char Device for testsuite control
- *
- * Copyright (c) 2014 Red Hat, Inc.
- *
- * Author: Paolo Bonzini <pbonzini@redhat.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-#include "qemu/osdep.h"
-#include "qemu-common.h"
-#include "sysemu/char.h"
-
-#define BUF_SIZE 32
-
-typedef struct {
-    CharDriverState *chr;
-    uint8_t in_buf[32];
-    int in_buf_used;
-} TestdevCharState;
-
-/* Try to interpret a whole incoming packet */
-static int testdev_eat_packet(TestdevCharState *testdev)
-{
-    const uint8_t *cur = testdev->in_buf;
-    int len = testdev->in_buf_used;
-    uint8_t c;
-    int arg;
-
-#define EAT(c) do { \
-    if (!len--) {   \
-        return 0;   \
-    }               \
-    c = *cur++;     \
-} while (0)
-
-    EAT(c);
-
-    while (isspace(c)) {
-        EAT(c);
-    }
-
-    arg = 0;
-    while (isdigit(c)) {
-        arg = arg * 10 + c - '0';
-        EAT(c);
-    }
-
-    while (isspace(c)) {
-        EAT(c);
-    }
-
-    switch (c) {
-    case 'q':
-        exit((arg << 1) | 1);
-        break;
-    default:
-        break;
-    }
-    return cur - testdev->in_buf;
-}
-
-/* The other end is writing some data.  Store it and try to interpret */
-static int testdev_write(CharDriverState *chr, const uint8_t *buf, int len)
-{
-    TestdevCharState *testdev = chr->opaque;
-    int tocopy, eaten, orig_len = len;
-
-    while (len) {
-        /* Complete our buffer as much as possible */
-        tocopy = MIN(len, BUF_SIZE - testdev->in_buf_used);
-
-        memcpy(testdev->in_buf + testdev->in_buf_used, buf, tocopy);
-        testdev->in_buf_used += tocopy;
-        buf += tocopy;
-        len -= tocopy;
-
-        /* Interpret it as much as possible */
-        while (testdev->in_buf_used > 0 &&
-               (eaten = testdev_eat_packet(testdev)) > 0) {
-            memmove(testdev->in_buf, testdev->in_buf + eaten,
-                    testdev->in_buf_used - eaten);
-            testdev->in_buf_used -= eaten;
-        }
-    }
-    return orig_len;
-}
-
-static void testdev_close(struct CharDriverState *chr)
-{
-    TestdevCharState *testdev = chr->opaque;
-
-    g_free(testdev);
-}
-
-static CharDriverState *chr_testdev_init(const char *id,
-                                         ChardevBackend *backend,
-                                         ChardevReturn *ret,
-                                         Error **errp)
-{
-    TestdevCharState *testdev;
-    CharDriverState *chr;
-
-    testdev = g_new0(TestdevCharState, 1);
-    testdev->chr = chr = g_new0(CharDriverState, 1);
-
-    chr->opaque = testdev;
-    chr->chr_write = testdev_write;
-    chr->chr_close = testdev_close;
-
-    return chr;
-}
-
-static void register_types(void)
-{
-    register_char_driver("testdev", CHARDEV_BACKEND_KIND_TESTDEV, NULL,
-                         chr_testdev_init);
-}
-
-type_init(register_types);
--- a/backends/tpm.c
+++ b/backends/tpm.c
@@ -12,7 +12,6 @@
 * Based on backends/rng.c by Anthony Liguori
 */

-#include "qemu/osdep.h"
 #include "sysemu/tpm_backend.h"
 #include "qapi/qmp/qerror.h"
 #include "sysemu/tpm.h"
@@ -37,7 +36,7 @@ void tpm_backend_destroy(TPMBackend *s)
 {
    TPMBackendClass *k = TPM_BACKEND_GET_CLASS(s);

-    k->ops->destroy(s);
+    return k->ops->destroy(s);
 }

 int tpm_backend_init(TPMBackend *s, TPMState *state,
@@ -97,20 +96,6 @@ bool tpm_backend_get_tpm_established_flag(TPMBackend *s)
    return k->ops->get_tpm_established_flag(s);
 }

-int tpm_backend_reset_tpm_established_flag(TPMBackend *s, uint8_t locty)
-{
-    TPMBackendClass *k = TPM_BACKEND_GET_CLASS(s);
-
-    return k->ops->reset_tpm_established_flag(s, locty);
-}
-
-TPMVersion tpm_backend_get_tpm_version(TPMBackend *s)
-{
-    TPMBackendClass *k = TPM_BACKEND_GET_CLASS(s);
-
-    return k->ops->get_tpm_version(s);
-}
-
 static bool tpm_backend_prop_get_opened(Object *obj, Error **errp)
 {
    TPMBackend *s = TPM_BACKEND(obj);
@@ -127,26 +112,23 @@ static void tpm_backend_prop_set_opened(Object *obj, bool value, Error **errp)
 {
    TPMBackend *s = TPM_BACKEND(obj);
    TPMBackendClass *k = TPM_BACKEND_GET_CLASS(s);
-    Error *local_err = NULL;

    if (value == s->opened) {
        return;
    }

    if (!value && s->opened) {
-        error_setg(errp, QERR_PERMISSION_DENIED);
+        error_set(errp, QERR_PERMISSION_DENIED);
        return;
    }

    if (k->opened) {
-        k->opened(s, &local_err);
-        if (local_err) {
-            error_propagate(errp, local_err);
-            return;
-        }
+        k->opened(s, errp);
    }

-    s->opened = true;
+    if (!error_is_set(errp)) {
+        s->opened = value;
+    }
 }

 static void tpm_backend_instance_init(Object *obj)
@@ -180,6 +162,17 @@ void tpm_backend_thread_end(TPMBackendThread *tbt)
    }
 }

+void tpm_backend_thread_tpm_reset(TPMBackendThread *tbt,
+                                  GFunc func, gpointer user_data)
+{
+    if (!tbt->pool) {
+        tpm_backend_thread_create(tbt, func, user_data);
+    } else {
+        g_thread_pool_push(tbt->pool, (gpointer)TPM_BACKEND_CMD_TPM_RESET,
+                           NULL);
+    }
+}
+
 static const TypeInfo tpm_backend_info = {
    .name = TYPE_TPM_BACKEND,
    .parent = TYPE_OBJECT,
--- a/balloon.c
+++ b/balloon.c
@@ -24,45 +24,17 @@
 * THE SOFTWARE.
 */

-#include "qemu/osdep.h"
-#include "qemu-common.h"
+#include "monitor/monitor.h"
 #include "exec/cpu-common.h"
 #include "sysemu/kvm.h"
 #include "sysemu/balloon.h"
 #include "trace.h"
 #include "qmp-commands.h"
-#include "qapi/qmp/qerror.h"
 #include "qapi/qmp/qjson.h"

 static QEMUBalloonEvent *balloon_event_fn;
 static QEMUBalloonStatus *balloon_stat_fn;
 static void *balloon_opaque;
-static bool balloon_inhibited;
-
-bool qemu_balloon_is_inhibited(void)
-{
-    return balloon_inhibited;
-}
-
-void qemu_balloon_inhibit(bool state)
-{
-    balloon_inhibited = state;
-}
-
-static bool have_balloon(Error **errp)
-{
-    if (kvm_enabled() && !kvm_has_sync_mmu()) {
-        error_set(errp, ERROR_CLASS_KVM_MISSING_CAP,
-                  "Using KVM without synchronous MMU, balloon unavailable");
-        return false;
-    }
-    if (!balloon_event_fn) {
-        error_set(errp, ERROR_CLASS_DEVICE_NOT_ACTIVE,
-                  "No balloon device has been activated");
-        return false;
-    }
-    return true;
-}

 int qemu_add_balloon_handler(QEMUBalloonEvent *event_func,
                             QEMUBalloonStatus *stat_func, void *opaque)
@@ -71,6 +43,7 @@ int qemu_add_balloon_handler(QEMUBalloonEvent *event_func,
        /* We're already registered one balloon handler.  How many can
         * a guest really have?
         */
+        error_report("Another balloon device already registered");
        return -1;
    }
    balloon_event_fn = event_func;
@@ -89,30 +62,71 @@ void qemu_remove_balloon_handler(void *opaque)
    balloon_opaque = NULL;
 }

+static int qemu_balloon(ram_addr_t target)
+{
+    if (!balloon_event_fn) {
+        return 0;
+    }
+    trace_balloon_event(balloon_opaque, target);
+    balloon_event_fn(balloon_opaque, target);
+    return 1;
+}
+
+static int qemu_balloon_status(BalloonInfo *info)
+{
+    if (!balloon_stat_fn) {
+        return 0;
+    }
+    balloon_stat_fn(balloon_opaque, info);
+    return 1;
+}
+
+void qemu_balloon_changed(int64_t actual)
+{
+    QObject *data;
+
+    data = qobject_from_jsonf("{ 'actual': %" PRId64 " }",
+                              actual);
+
+    monitor_protocol_event(QEVENT_BALLOON_CHANGE, data);
+
+    qobject_decref(data);
+}
+
+
 BalloonInfo *qmp_query_balloon(Error **errp)
 {
    BalloonInfo *info;

-    if (!have_balloon(errp)) {
+    if (kvm_enabled() && !kvm_has_sync_mmu()) {
+        error_set(errp, QERR_KVM_MISSING_CAP, "synchronous MMU", "balloon");
        return NULL;
    }

    info = g_malloc0(sizeof(*info));
-    balloon_stat_fn(balloon_opaque, info);
+
+    if (qemu_balloon_status(info) == 0) {
+        error_set(errp, QERR_DEVICE_NOT_ACTIVE, "balloon");
+        qapi_free_BalloonInfo(info);
+        return NULL;
+    }
+
    return info;
 }

-void qmp_balloon(int64_t target, Error **errp)
+void qmp_balloon(int64_t value, Error **errp)
 {
-    if (!have_balloon(errp)) {
+    if (kvm_enabled() && !kvm_has_sync_mmu()) {
+        error_set(errp, QERR_KVM_MISSING_CAP, "synchronous MMU", "balloon");
        return;
    }

-    if (target <= 0) {
-        error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "target", "a size");
+    if (value <= 0) {
+        error_set(errp, QERR_INVALID_PARAMETER_VALUE, "target", "a size");
        return;
    }
-
-    trace_balloon_event(balloon_opaque, target);
-    balloon_event_fn(balloon_opaque, target);
+    
+    if (qemu_balloon(value) == 0) {
+        error_set(errp, QERR_DEVICE_NOT_ACTIVE, "balloon");
+    }
 }
--- a/block-migration.c
+++ b/block-migration.c
@@ -13,18 +13,15 @@
 * GNU GPL, version 2 or (at your option) any later version.
 */

-#include "qemu/osdep.h"
 #include "qemu-common.h"
-#include "block/block.h"
-#include "qemu/error-report.h"
-#include "qemu/main-loop.h"
+#include "block/block_int.h"
 #include "hw/hw.h"
 #include "qemu/queue.h"
 #include "qemu/timer.h"
 #include "migration/block.h"
 #include "migration/migration.h"
 #include "sysemu/blockdev.h"
-#include "sysemu/block-backend.h"
+#include <assert.h>

 #define BLOCK_SIZE                       (1 << 20)
 #define BDRV_SECTORS_PER_DIRTY_CHUNK     (BLOCK_SIZE >> BDRV_SECTOR_BITS)
@@ -36,8 +33,6 @@

 #define MAX_IS_ALLOCATED_SEARCH 65536

-#define MAX_INFLIGHT_IO 512
-
 //#define DEBUG_BLK_MIGRATION

 #ifdef DEBUG_BLK_MIGRATION
@@ -54,25 +49,15 @@ typedef struct BlkMigDevState {
    int shared_base;
    int64_t total_sectors;
    QSIMPLEQ_ENTRY(BlkMigDevState) entry;
-    Error *blocker;

    /* Only used by migration thread.  Does not need a lock.  */
    int bulk_completed;
    int64_t cur_sector;
    int64_t cur_dirty;

-    /* Data in the aio_bitmap is protected by block migration lock.
-     * Allocation and free happen during setup and cleanup respectively.
-     */
-    unsigned long *aio_bitmap;
-
    /* Protected by block migration lock.  */
+    unsigned long *aio_bitmap;
    int64_t completed_sectors;
-
-    /* During migration this is protected by iothread lock / AioContext.
-     * Allocation and free happen during setup and cleanup respectively.
-     */
-    BdrvDirtyBitmap *dirty_bitmap;
 } BlkMigDevState;

 typedef struct BlkMigBlock {
@@ -83,7 +68,7 @@ typedef struct BlkMigBlock {
    int nr_sectors;
    struct iovec iov;
    QEMUIOVector qiov;
-    BlockAIOCB *aiocb;
+    BlockDriverAIOCB *aiocb;

    /* Protected by block migration lock.  */
    int ret;
@@ -108,7 +93,7 @@ typedef struct BlkMigState {
    int prev_progress;
    int bulk_completed;

-    /* Lock must be taken _inside_ the iothread lock and any AioContexts.  */
+    /* Lock must be taken _inside_ the iothread lock.  */
    QemuMutex lock;
 } BlkMigState;

@@ -143,9 +128,9 @@ static void blk_send(QEMUFile *f, BlkMigBlock * blk)
                     | flags);

    /* device name */
-    len = strlen(bdrv_get_device_name(blk->bmds->bs));
+    len = strlen(blk->bmds->bs->device_name);
    qemu_put_byte(f, len);
-    qemu_put_buffer(f, (uint8_t *)bdrv_get_device_name(blk->bmds->bs), len);
+    qemu_put_buffer(f, (uint8_t *)blk->bmds->bs->device_name, len);

    /* if a block is zero we need to flush here since the network
     * bandwidth is now a lot higher than the storage device bandwidth.
@@ -199,7 +184,7 @@ static int bmds_aio_inflight(BlkMigDevState *bmds, int64_t sector)
 {
    int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;

-    if (sector < bdrv_nb_sectors(bmds->bs)) {
+    if ((sector << BDRV_SECTOR_BITS) < bdrv_getlength(bmds->bs)) {
        return !!(bmds->aio_bitmap[chunk / (sizeof(unsigned long) * 8)] &
            (1UL << (chunk % (sizeof(unsigned long) * 8))));
    } else {
@@ -236,7 +221,8 @@ static void alloc_aio_bitmap(BlkMigDevState *bmds)
    BlockDriverState *bs = bmds->bs;
    int64_t bitmap_size;

-    bitmap_size = bdrv_nb_sectors(bs) + BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
+    bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
+            BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
    bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8;

    bmds->aio_bitmap = g_malloc0(bitmap_size);
@@ -272,13 +258,11 @@ static int mig_save_device_bulk(QEMUFile *f, BlkMigDevState *bmds)

    if (bmds->shared_base) {
        qemu_mutex_lock_iothread();
-        aio_context_acquire(bdrv_get_aio_context(bs));
        while (cur_sector < total_sectors &&
               !bdrv_is_allocated(bs, cur_sector, MAX_IS_ALLOCATED_SEARCH,
                                  &nr_sectors)) {
            cur_sector += nr_sectors;
        }
-        aio_context_release(bdrv_get_aio_context(bs));
        qemu_mutex_unlock_iothread();
    }

@@ -298,7 +282,7 @@ static int mig_save_device_bulk(QEMUFile *f, BlkMigDevState *bmds)
        nr_sectors = total_sectors - cur_sector;
    }

-    blk = g_new(BlkMigBlock, 1);
+    blk = g_malloc(sizeof(BlkMigBlock));
    blk->buf = g_malloc(BLOCK_SIZE);
    blk->bmds = bmds;
    blk->sector = cur_sector;
@@ -312,21 +296,11 @@ static int mig_save_device_bulk(QEMUFile *f, BlkMigDevState *bmds)
    block_mig_state.submitted++;
    blk_mig_unlock();

-    /* We do not know if bs is under the main thread (and thus does
-     * not acquire the AioContext when doing AIO) or rather under
-     * dataplane.  Thus acquire both the iothread mutex and the
-     * AioContext.
-     *
-     * This is ugly and will disappear when we make bdrv_* thread-safe,
-     * without the need to acquire the AioContext.
-     */
    qemu_mutex_lock_iothread();
-    aio_context_acquire(bdrv_get_aio_context(bmds->bs));
    blk->aiocb = bdrv_aio_readv(bs, cur_sector, &blk->qiov,
                                nr_sectors, blk_mig_read_cb, blk);

-    bdrv_reset_dirty_bitmap(bmds->dirty_bitmap, cur_sector, nr_sectors);
-    aio_context_release(bdrv_get_aio_context(bmds->bs));
+    bdrv_reset_dirty(bs, cur_sector, nr_sectors);
    qemu_mutex_unlock_iothread();

    bmds->cur_sector = cur_sector + nr_sectors;
@@ -335,53 +309,51 @@ static int mig_save_device_bulk(QEMUFile *f, BlkMigDevState *bmds)

 /* Called with iothread lock taken.  */

-static int set_dirty_tracking(void)
+static void set_dirty_tracking(int enable)
 {
    BlkMigDevState *bmds;
-    int ret;

    QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
-        aio_context_acquire(bdrv_get_aio_context(bmds->bs));
-        bmds->dirty_bitmap = bdrv_create_dirty_bitmap(bmds->bs, BLOCK_SIZE,
-                                                      NULL, NULL);
-        aio_context_release(bdrv_get_aio_context(bmds->bs));
-        if (!bmds->dirty_bitmap) {
-            ret = -errno;
-            goto fail;
-        }
+        bdrv_set_dirty_tracking(bmds->bs, enable ? BLOCK_SIZE : 0);
    }
-    return 0;
-
-fail:
-    QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
-        if (bmds->dirty_bitmap) {
-            aio_context_acquire(bdrv_get_aio_context(bmds->bs));
-            bdrv_release_dirty_bitmap(bmds->bs, bmds->dirty_bitmap);
-            aio_context_release(bdrv_get_aio_context(bmds->bs));
-        }
-    }
-    return ret;
 }

-/* Called with iothread lock taken.  */
-
-static void unset_dirty_tracking(void)
+static void init_blk_migration_it(void *opaque, BlockDriverState *bs)
 {
    BlkMigDevState *bmds;
+    int64_t sectors;

-    QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
-        aio_context_acquire(bdrv_get_aio_context(bmds->bs));
-        bdrv_release_dirty_bitmap(bmds->bs, bmds->dirty_bitmap);
-        aio_context_release(bdrv_get_aio_context(bmds->bs));
+    if (!bdrv_is_read_only(bs)) {
+        sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
+        if (sectors <= 0) {
+            return;
+        }
+
+        bmds = g_malloc0(sizeof(BlkMigDevState));
+        bmds->bs = bs;
+        bmds->bulk_completed = 0;
+        bmds->total_sectors = sectors;
+        bmds->completed_sectors = 0;
+        bmds->shared_base = block_mig_state.shared_base;
+        alloc_aio_bitmap(bmds);
+        bdrv_set_in_use(bs, 1);
+        bdrv_ref(bs);
+
+        block_mig_state.total_sector_sum += sectors;
+
+        if (bmds->shared_base) {
+            DPRINTF("Start migration for %s with shared base image\n",
+                    bs->device_name);
+        } else {
+            DPRINTF("Start full migration for %s\n", bs->device_name);
+        }
+
+        QSIMPLEQ_INSERT_TAIL(&block_mig_state.bmds_list, bmds, entry);
    }
 }

 static void init_blk_migration(QEMUFile *f)
 {
-    BlockDriverState *bs;
-    BlkMigDevState *bmds;
-    int64_t sectors;
-
    block_mig_state.submitted = 0;
    block_mig_state.read_done = 0;
    block_mig_state.transferred = 0;
@@ -390,38 +362,7 @@ static void init_blk_migration(QEMUFile *f)
    block_mig_state.bulk_completed = 0;
    block_mig_state.zero_blocks = migrate_zero_blocks();

-    for (bs = bdrv_next(NULL); bs; bs = bdrv_next(bs)) {
-        if (bdrv_is_read_only(bs)) {
-            continue;
-        }
-
-        sectors = bdrv_nb_sectors(bs);
-        if (sectors <= 0) {
-            return;
-        }
-
-        bmds = g_new0(BlkMigDevState, 1);
-        bmds->bs = bs;
-        bmds->bulk_completed = 0;
-        bmds->total_sectors = sectors;
-        bmds->completed_sectors = 0;
-        bmds->shared_base = block_mig_state.shared_base;
-        alloc_aio_bitmap(bmds);
-        error_setg(&bmds->blocker, "block device is in use by migration");
-        bdrv_op_block_all(bs, bmds->blocker);
-        bdrv_ref(bs);
-
-        block_mig_state.total_sector_sum += sectors;
-
-        if (bmds->shared_base) {
-            DPRINTF("Start migration for %s with shared base image\n",
-                    bdrv_get_device_name(bs));
-        } else {
-            DPRINTF("Start full migration for %s\n", bdrv_get_device_name(bs));
-        }
-
-        QSIMPLEQ_INSERT_TAIL(&block_mig_state.bmds_list, bmds, entry);
-    }
+    bdrv_iterate(init_blk_migration_it, NULL);
 }

 /* Called with no lock taken.  */
@@ -472,7 +413,7 @@ static void blk_mig_reset_dirty_cursor(void)
    }
 }

-/* Called with iothread lock and AioContext taken.  */
+/* Called with iothread lock taken.  */

 static int mig_save_device_dirty(QEMUFile *f, BlkMigDevState *bmds,
                                 int is_async)
@@ -487,18 +428,18 @@ static int mig_save_device_dirty(QEMUFile *f, BlkMigDevState *bmds,
        blk_mig_lock();
        if (bmds_aio_inflight(bmds, sector)) {
            blk_mig_unlock();
-            bdrv_drain(bmds->bs);
+            bdrv_drain_all();
        } else {
            blk_mig_unlock();
        }
-        if (bdrv_get_dirty(bmds->bs, bmds->dirty_bitmap, sector)) {
+        if (bdrv_get_dirty(bmds->bs, sector)) {

            if (total_sectors - sector < BDRV_SECTORS_PER_DIRTY_CHUNK) {
                nr_sectors = total_sectors - sector;
            } else {
                nr_sectors = BDRV_SECTORS_PER_DIRTY_CHUNK;
            }
-            blk = g_new(BlkMigBlock, 1);
+            blk = g_malloc(sizeof(BlkMigBlock));
            blk->buf = g_malloc(BLOCK_SIZE);
            blk->bmds = bmds;
            blk->sector = sector;
@@ -527,7 +468,7 @@ static int mig_save_device_dirty(QEMUFile *f, BlkMigDevState *bmds,
                g_free(blk);
            }

-            bdrv_reset_dirty_bitmap(bmds->dirty_bitmap, sector, nr_sectors);
+            bdrv_reset_dirty(bmds->bs, sector, nr_sectors);
            break;
        }
        sector += BDRV_SECTORS_PER_DIRTY_CHUNK;
@@ -555,9 +496,7 @@ static int blk_mig_save_dirty_block(QEMUFile *f, int is_async)
    int ret = 1;

    QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
-        aio_context_acquire(bdrv_get_aio_context(bmds->bs));
        ret = mig_save_device_dirty(f, bmds, is_async);
-        aio_context_release(bdrv_get_aio_context(bmds->bs));
        if (ret <= 0) {
            break;
        }
@@ -615,9 +554,7 @@ static int64_t get_remaining_dirty(void)
    int64_t dirty = 0;

    QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
-        aio_context_acquire(bdrv_get_aio_context(bmds->bs));
-        dirty += bdrv_get_dirty_count(bmds->dirty_bitmap);
-        aio_context_release(bdrv_get_aio_context(bmds->bs));
+        dirty += bdrv_get_dirty_count(bmds->bs);
    }

    return dirty << BDRV_SECTOR_BITS;
@@ -625,32 +562,24 @@ static int64_t get_remaining_dirty(void)

 /* Called with iothread lock taken.  */

-static void block_migration_cleanup(void *opaque)
+static void blk_mig_cleanup(void)
 {
    BlkMigDevState *bmds;
    BlkMigBlock *blk;
-    AioContext *ctx;

    bdrv_drain_all();

-    unset_dirty_tracking();
+    set_dirty_tracking(0);

+    blk_mig_lock();
    while ((bmds = QSIMPLEQ_FIRST(&block_mig_state.bmds_list)) != NULL) {
        QSIMPLEQ_REMOVE_HEAD(&block_mig_state.bmds_list, entry);
-        bdrv_op_unblock_all(bmds->bs, bmds->blocker);
-        error_free(bmds->blocker);
-
-        /* Save ctx, because bmds->bs can disappear during bdrv_unref.  */
-        ctx = bdrv_get_aio_context(bmds->bs);
-        aio_context_acquire(ctx);
+        bdrv_set_in_use(bmds->bs, 0);
        bdrv_unref(bmds->bs);
-        aio_context_release(ctx);
-
        g_free(bmds->aio_bitmap);
        g_free(bmds);
    }

-    blk_mig_lock();
    while ((blk = QSIMPLEQ_FIRST(&block_mig_state.blk_list)) != NULL) {
        QSIMPLEQ_REMOVE_HEAD(&block_mig_state.blk_list, entry);
        g_free(blk->buf);
@@ -659,6 +588,11 @@ static void block_migration_cleanup(void *opaque)
    blk_mig_unlock();
 }

+static void block_migration_cancel(void *opaque)
+{
+    blk_mig_cleanup();
+}
+
 static int block_save_setup(QEMUFile *f, void *opaque)
 {
    int ret;
@@ -670,14 +604,9 @@ static int block_save_setup(QEMUFile *f, void *opaque)
    init_blk_migration(f);

    /* start track dirty blocks */
-    ret = set_dirty_tracking();
-
+    set_dirty_tracking(1);
    qemu_mutex_unlock_iothread();

-    if (ret) {
-        return ret;
-    }
-
    ret = flush_blks(f);
    blk_mig_reset_dirty_cursor();
    qemu_put_be64(f, BLK_MIG_FLAG_EOS);
@@ -689,7 +618,6 @@ static int block_save_iterate(QEMUFile *f, void *opaque)
 {
    int ret;
    int64_t last_ftell = qemu_ftell(f);
-    int64_t delta_ftell;

    DPRINTF("Enter save live iterate submitted %d transferred %d\n",
            block_mig_state.submitted, block_mig_state.transferred);
@@ -705,10 +633,7 @@ static int block_save_iterate(QEMUFile *f, void *opaque)
    blk_mig_lock();
    while ((block_mig_state.submitted +
            block_mig_state.read_done) * BLOCK_SIZE <
-           qemu_file_get_rate_limit(f) &&
-           (block_mig_state.submitted +
-            block_mig_state.read_done) <
-           MAX_INFLIGHT_IO) {
+           qemu_file_get_rate_limit(f)) {
        blk_mig_unlock();
        if (block_mig_state.bulk_completed == 0) {
            /* first finish the bulk phase */
@@ -742,14 +667,7 @@ static int block_save_iterate(QEMUFile *f, void *opaque)
    }

    qemu_put_be64(f, BLK_MIG_FLAG_EOS);
-    delta_ftell = qemu_ftell(f) - last_ftell;
-    if (delta_ftell > 0) {
-        return 1;
-    } else if (delta_ftell < 0) {
-        return -1;
-    } else {
-        return 0;
-    }
+    return qemu_ftell(f) - last_ftell;
 }

 /* Called with iothread lock taken.  */
@@ -788,33 +706,30 @@ static int block_save_complete(QEMUFile *f, void *opaque)

    qemu_put_be64(f, BLK_MIG_FLAG_EOS);

+    blk_mig_cleanup();
    return 0;
 }

-static void block_save_pending(QEMUFile *f, void *opaque, uint64_t max_size,
-                               uint64_t *non_postcopiable_pending,
-                               uint64_t *postcopiable_pending)
+static uint64_t block_save_pending(QEMUFile *f, void *opaque, uint64_t max_size)
 {
    /* Estimate pending number of bytes to send */
    uint64_t pending;

    qemu_mutex_lock_iothread();
-    pending = get_remaining_dirty();
-    qemu_mutex_unlock_iothread();
-
    blk_mig_lock();
-    pending += block_mig_state.submitted * BLOCK_SIZE +
-               block_mig_state.read_done * BLOCK_SIZE;
-    blk_mig_unlock();
+    pending = get_remaining_dirty() +
+                       block_mig_state.submitted * BLOCK_SIZE +
+                       block_mig_state.read_done * BLOCK_SIZE;

    /* Report at least one block pending during bulk phase */
-    if (pending <= max_size && !block_mig_state.bulk_completed) {
-        pending = max_size + BLOCK_SIZE;
+    if (pending == 0 && !block_mig_state.bulk_completed) {
+        pending = BLOCK_SIZE;
    }
+    blk_mig_unlock();
+    qemu_mutex_unlock_iothread();

    DPRINTF("Enter save live pending  %" PRIu64 "\n", pending);
-    /* We don't do postcopy */
-    *non_postcopiable_pending += pending;
+    return pending;
 }

 static int block_load(QEMUFile *f, void *opaque, int version_id)
@@ -824,8 +739,6 @@ static int block_load(QEMUFile *f, void *opaque, int version_id)
    char device_name[256];
    int64_t addr;
    BlockDriverState *bs, *bs_prev = NULL;
-    BlockBackend *blk;
-    Error *local_err = NULL;
    uint8_t *buf;
    int64_t total_sectors = 0;
    int nr_sectors;
@@ -843,33 +756,21 @@ static int block_load(QEMUFile *f, void *opaque, int version_id)
            qemu_get_buffer(f, (uint8_t *)device_name, len);
            device_name[len] = '\0';

-            blk = blk_by_name(device_name);
-            if (!blk) {
-                fprintf(stderr, "Error unknown block device %s\n",
-                        device_name);
-                return -EINVAL;
-            }
-            bs = blk_bs(blk);
+            bs = bdrv_find(device_name);
            if (!bs) {
-                fprintf(stderr, "Block device %s has no medium\n",
+                fprintf(stderr, "Error unknown block device %s\n",
                        device_name);
                return -EINVAL;
            }

            if (bs != bs_prev) {
                bs_prev = bs;
-                total_sectors = bdrv_nb_sectors(bs);
+                total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
                if (total_sectors <= 0) {
                    error_report("Error getting length of block device %s",
                                 device_name);
                    return -EINVAL;
                }
-
-                bdrv_invalidate_cache(bs, &local_err);
-                if (local_err) {
-                    error_report_err(local_err);
-                    return -EINVAL;
-                }
            }

            if (total_sectors - addr < BDRV_SECTORS_PER_DIRTY_CHUNK) {
@@ -879,8 +780,7 @@ static int block_load(QEMUFile *f, void *opaque, int version_id)
            }

            if (flags & BLK_MIG_FLAG_ZERO_BLOCK) {
-                ret = bdrv_write_zeroes(bs, addr, nr_sectors,
-                                        BDRV_REQ_MAY_UNMAP);
+                ret = bdrv_write_zeroes(bs, addr, nr_sectors);
            } else {
                buf = g_malloc(BLOCK_SIZE);
                qemu_get_buffer(f, buf, BLOCK_SIZE);
@@ -926,14 +826,14 @@ static bool block_is_active(void *opaque)
    return block_mig_state.blk_enable == 1;
 }

-static SaveVMHandlers savevm_block_handlers = {
+SaveVMHandlers savevm_block_handlers = {
    .set_params = block_set_params,
    .save_live_setup = block_save_setup,
    .save_live_iterate = block_save_iterate,
-    .save_live_complete_precopy = block_save_complete,
+    .save_live_complete = block_save_complete,
    .save_live_pending = block_save_pending,
    .load_state = block_load,
-    .cleanup = block_migration_cleanup,
+    .cancel = block_migration_cancel,
    .is_active = block_is_active,
 };

--- a/block.c
+++ b/block.c
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -1,44 +1,26 @@
-block-obj-y += raw_bsd.o qcow.o vdi.o vmdk.o cloop.o bochs.o vpc.o vvfat.o
+block-obj-y += raw_bsd.o cow.o qcow.o vdi.o vmdk.o cloop.o dmg.o bochs.o vpc.o vvfat.o
 block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-cache.o
 block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o
 block-obj-y += qed-check.o
 block-obj-$(CONFIG_VHDX) += vhdx.o vhdx-endian.o vhdx-log.o
-block-obj-y += quorum.o
 block-obj-y += parallels.o blkdebug.o blkverify.o
-block-obj-y += block-backend.o snapshot.o qapi.o
+block-obj-y += snapshot.o qapi.o
 block-obj-$(CONFIG_WIN32) += raw-win32.o win32-aio.o
 block-obj-$(CONFIG_POSIX) += raw-posix.o
 block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
-block-obj-y += null.o mirror.o io.o
-block-obj-y += throttle-groups.o

-block-obj-y += nbd.o nbd-client.o sheepdog.o
+ifeq ($(CONFIG_POSIX),y)
+block-obj-y += nbd.o sheepdog.o
 block-obj-$(CONFIG_LIBISCSI) += iscsi.o
-block-obj-$(CONFIG_LIBNFS) += nfs.o
 block-obj-$(CONFIG_CURL) += curl.o
 block-obj-$(CONFIG_RBD) += rbd.o
 block-obj-$(CONFIG_GLUSTERFS) += gluster.o
-block-obj-$(CONFIG_ARCHIPELAGO) += archipelago.o
 block-obj-$(CONFIG_LIBSSH2) += ssh.o
-block-obj-y += accounting.o dirty-bitmap.o
-block-obj-y += write-threshold.o
+endif

 common-obj-y += stream.o
 common-obj-y += commit.o
+common-obj-y += mirror.o
 common-obj-y += backup.o

-iscsi.o-cflags     := $(LIBISCSI_CFLAGS)
-iscsi.o-libs       := $(LIBISCSI_LIBS)
-curl.o-cflags      := $(CURL_CFLAGS)
-curl.o-libs        := $(CURL_LIBS)
-rbd.o-cflags       := $(RBD_CFLAGS)
-rbd.o-libs         := $(RBD_LIBS)
-gluster.o-cflags   := $(GLUSTERFS_CFLAGS)
-gluster.o-libs     := $(GLUSTERFS_LIBS)
-ssh.o-cflags       := $(LIBSSH2_CFLAGS)
-ssh.o-libs         := $(LIBSSH2_LIBS)
-archipelago.o-libs := $(ARCHIPELAGO_LIBS)
-block-obj-m        += dmg.o
-dmg.o-libs         := $(BZIP2_LIBS)
-qcow.o-libs        := -lz
-linux-aio.o-libs   := -laio
+$(obj)/curl.o: QEMU_CFLAGS+=$(CURL_CFLAGS)
--- a/block/accounting.c
+++ b/block/accounting.c
@@ -1,173 +0,0 @@
-/*
- * QEMU System Emulator block accounting
- *
- * Copyright (c) 2011 Christoph Hellwig
- * Copyright (c) 2015 Igalia, S.L.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-#include "qemu/osdep.h"
-#include "block/accounting.h"
-#include "block/block_int.h"
-#include "qemu/timer.h"
-#include "sysemu/qtest.h"
-
-static QEMUClockType clock_type = QEMU_CLOCK_REALTIME;
-static const int qtest_latency_ns = NANOSECONDS_PER_SECOND / 1000;
-
-void block_acct_init(BlockAcctStats *stats, bool account_invalid,
-                     bool account_failed)
-{
-    stats->account_invalid = account_invalid;
-    stats->account_failed = account_failed;
-
-    if (qtest_enabled()) {
-        clock_type = QEMU_CLOCK_VIRTUAL;
-    }
-}
-
-void block_acct_cleanup(BlockAcctStats *stats)
-{
-    BlockAcctTimedStats *s, *next;
-    QSLIST_FOREACH_SAFE(s, &stats->intervals, entries, next) {
-        g_free(s);
-    }
-}
-
-void block_acct_add_interval(BlockAcctStats *stats, unsigned interval_length)
-{
-    BlockAcctTimedStats *s;
-    unsigned i;
-
-    s = g_new0(BlockAcctTimedStats, 1);
-    s->interval_length = interval_length;
-    QSLIST_INSERT_HEAD(&stats->intervals, s, entries);
-
-    for (i = 0; i < BLOCK_MAX_IOTYPE; i++) {
-        timed_average_init(&s->latency[i], clock_type,
-                           (uint64_t) interval_length * NANOSECONDS_PER_SECOND);
-    }
-}
-
-BlockAcctTimedStats *block_acct_interval_next(BlockAcctStats *stats,
-                                              BlockAcctTimedStats *s)
-{
-    if (s == NULL) {
-        return QSLIST_FIRST(&stats->intervals);
-    } else {
-        return QSLIST_NEXT(s, entries);
-    }
-}
-
-void block_acct_start(BlockAcctStats *stats, BlockAcctCookie *cookie,
-                      int64_t bytes, enum BlockAcctType type)
-{
-    assert(type < BLOCK_MAX_IOTYPE);
-
-    cookie->bytes = bytes;
-    cookie->start_time_ns = qemu_clock_get_ns(clock_type);
-    cookie->type = type;
-}
-
-void block_acct_done(BlockAcctStats *stats, BlockAcctCookie *cookie)
-{
-    BlockAcctTimedStats *s;
-    int64_t time_ns = qemu_clock_get_ns(clock_type);
-    int64_t latency_ns = time_ns - cookie->start_time_ns;
-
-    if (qtest_enabled()) {
-        latency_ns = qtest_latency_ns;
-    }
-
-    assert(cookie->type < BLOCK_MAX_IOTYPE);
-
-    stats->nr_bytes[cookie->type] += cookie->bytes;
-    stats->nr_ops[cookie->type]++;
-    stats->total_time_ns[cookie->type] += latency_ns;
-    stats->last_access_time_ns = time_ns;
-
-    QSLIST_FOREACH(s, &stats->intervals, entries) {
-        timed_average_account(&s->latency[cookie->type], latency_ns);
-    }
-}
-
-void block_acct_failed(BlockAcctStats *stats, BlockAcctCookie *cookie)
-{
-    assert(cookie->type < BLOCK_MAX_IOTYPE);
-
-    stats->failed_ops[cookie->type]++;
-
-    if (stats->account_failed) {
-        BlockAcctTimedStats *s;
-        int64_t time_ns = qemu_clock_get_ns(clock_type);
-        int64_t latency_ns = time_ns - cookie->start_time_ns;
-
-        if (qtest_enabled()) {
-            latency_ns = qtest_latency_ns;
-        }
-
-        stats->total_time_ns[cookie->type] += latency_ns;
-        stats->last_access_time_ns = time_ns;
-
-        QSLIST_FOREACH(s, &stats->intervals, entries) {
-            timed_average_account(&s->latency[cookie->type], latency_ns);
-        }
-    }
-}
-
-void block_acct_invalid(BlockAcctStats *stats, enum BlockAcctType type)
-{
-    assert(type < BLOCK_MAX_IOTYPE);
-
-    /* block_acct_done() and block_acct_failed() update
-     * total_time_ns[], but this one does not. The reason is that
-     * invalid requests are accounted during their submission,
-     * therefore there's no actual I/O involved. */
-
-    stats->invalid_ops[type]++;
-
-    if (stats->account_invalid) {
-        stats->last_access_time_ns = qemu_clock_get_ns(clock_type);
-    }
-}
-
-void block_acct_merge_done(BlockAcctStats *stats, enum BlockAcctType type,
-                      int num_requests)
-{
-    assert(type < BLOCK_MAX_IOTYPE);
-    stats->merged[type] += num_requests;
-}
-
-int64_t block_acct_idle_time_ns(BlockAcctStats *stats)
-{
-    return qemu_clock_get_ns(clock_type) - stats->last_access_time_ns;
-}
-
-double block_acct_queue_depth(BlockAcctTimedStats *stats,
-                              enum BlockAcctType type)
-{
-    uint64_t sum, elapsed;
-
-    assert(type < BLOCK_MAX_IOTYPE);
-
-    sum = timed_average_sum(&stats->latency[type], &elapsed);
-
-    return (double) sum / elapsed;
-}
--- a/block/archipelago.c
+++ b/block/archipelago.c
--- a/block/backup.c
+++ b/block/backup.c
@@ -11,18 +11,20 @@
 *
 */

-#include "qemu/osdep.h"
+#include <stdio.h>
+#include <errno.h>
+#include <unistd.h>

 #include "trace.h"
 #include "block/block.h"
 #include "block/block_int.h"
 #include "block/blockjob.h"
-#include "qapi/qmp/qerror.h"
 #include "qemu/ratelimit.h"
-#include "sysemu/block-backend.h"
-#include "qemu/bitmap.h"

-#define BACKUP_CLUSTER_SIZE_DEFAULT (1 << 16)
+#define BACKUP_CLUSTER_BITS 16
+#define BACKUP_CLUSTER_SIZE (1 << BACKUP_CLUSTER_BITS)
+#define BACKUP_SECTORS_PER_CLUSTER (BACKUP_CLUSTER_SIZE / BDRV_SECTOR_SIZE)
+
 #define SLICE_TIME 100000000ULL /* ns */

 typedef struct CowRequest {
@@ -35,25 +37,16 @@ typedef struct CowRequest {
 typedef struct BackupBlockJob {
    BlockJob common;
    BlockDriverState *target;
-    /* bitmap for sync=incremental */
-    BdrvDirtyBitmap *sync_bitmap;
    MirrorSyncMode sync_mode;
    RateLimit limit;
    BlockdevOnError on_source_error;
    BlockdevOnError on_target_error;
    CoRwlock flush_rwlock;
    uint64_t sectors_read;
-    unsigned long *done_bitmap;
-    int64_t cluster_size;
+    HBitmap *bitmap;
    QLIST_HEAD(, CowRequest) inflight_reqs;
 } BackupBlockJob;

-/* Size of a cluster in sectors, instead of bytes. */
-static inline int64_t cluster_size_sectors(BackupBlockJob *job)
-{
-  return job->cluster_size / BDRV_SECTOR_SIZE;
-}
-
 /* See if in-flight requests overlap and wait for them to complete */
 static void coroutine_fn wait_for_overlapping_requests(BackupBlockJob *job,
                                                       int64_t start,
@@ -93,8 +86,7 @@ static void cow_request_end(CowRequest *req)

 static int coroutine_fn backup_do_cow(BlockDriverState *bs,
                                      int64_t sector_num, int nb_sectors,
-                                      bool *error_is_read,
-                                      bool is_write_notifier)
+                                      bool *error_is_read)
 {
    BackupBlockJob *job = (BackupBlockJob *)bs->job;
    CowRequest cow_request;
@@ -102,14 +94,13 @@ static int coroutine_fn backup_do_cow(BlockDriverState *bs,
    QEMUIOVector bounce_qiov;
    void *bounce_buffer = NULL;
    int ret = 0;
-    int64_t sectors_per_cluster = cluster_size_sectors(job);
    int64_t start, end;
    int n;

    qemu_co_rwlock_rdlock(&job->flush_rwlock);

-    start = sector_num / sectors_per_cluster;
-    end = DIV_ROUND_UP(sector_num + nb_sectors, sectors_per_cluster);
+    start = sector_num / BACKUP_SECTORS_PER_CLUSTER;
+    end = DIV_ROUND_UP(sector_num + nb_sectors, BACKUP_SECTORS_PER_CLUSTER);

    trace_backup_do_cow_enter(job, start, sector_num, nb_sectors);

@@ -117,32 +108,26 @@ static int coroutine_fn backup_do_cow(BlockDriverState *bs,
    cow_request_begin(&cow_request, job, start, end);

    for (; start < end; start++) {
-        if (test_bit(start, job->done_bitmap)) {
+        if (hbitmap_get(job->bitmap, start)) {
            trace_backup_do_cow_skip(job, start);
            continue; /* already copied */
        }

        trace_backup_do_cow_process(job, start);

-        n = MIN(sectors_per_cluster,
+        n = MIN(BACKUP_SECTORS_PER_CLUSTER,
                job->common.len / BDRV_SECTOR_SIZE -
-                start * sectors_per_cluster);
+                start * BACKUP_SECTORS_PER_CLUSTER);

        if (!bounce_buffer) {
-            bounce_buffer = qemu_blockalign(bs, job->cluster_size);
+            bounce_buffer = qemu_blockalign(bs, BACKUP_CLUSTER_SIZE);
        }
        iov.iov_base = bounce_buffer;
        iov.iov_len = n * BDRV_SECTOR_SIZE;
        qemu_iovec_init_external(&bounce_qiov, &iov, 1);

-        if (is_write_notifier) {
-            ret = bdrv_co_readv_no_serialising(bs,
-                                           start * sectors_per_cluster,
-                                           n, &bounce_qiov);
-        } else {
-            ret = bdrv_co_readv(bs, start * sectors_per_cluster, n,
-                                &bounce_qiov);
-        }
+        ret = bdrv_co_readv(bs, start * BACKUP_SECTORS_PER_CLUSTER, n,
+                            &bounce_qiov);
        if (ret < 0) {
            trace_backup_do_cow_read_fail(job, start, ret);
            if (error_is_read) {
@@ -153,11 +138,10 @@ static int coroutine_fn backup_do_cow(BlockDriverState *bs,

        if (buffer_is_zero(iov.iov_base, iov.iov_len)) {
            ret = bdrv_co_write_zeroes(job->target,
-                                       start * sectors_per_cluster,
-                                       n, BDRV_REQ_MAY_UNMAP);
+                                       start * BACKUP_SECTORS_PER_CLUSTER, n);
        } else {
            ret = bdrv_co_writev(job->target,
-                                 start * sectors_per_cluster, n,
+                                 start * BACKUP_SECTORS_PER_CLUSTER, n,
                                 &bounce_qiov);
        }
        if (ret < 0) {
@@ -168,7 +152,7 @@ static int coroutine_fn backup_do_cow(BlockDriverState *bs,
            goto out;
        }

-        set_bit(start, job->done_bitmap);
+        hbitmap_set(job->bitmap, start, 1);

        /* Publish progress, guest I/O counts as progress too.  Note that the
         * offset field is an opaque progress value, it is not a disk offset.
@@ -196,13 +180,8 @@ static int coroutine_fn backup_before_write_notify(
        void *opaque)
 {
    BdrvTrackedRequest *req = opaque;
-    int64_t sector_num = req->offset >> BDRV_SECTOR_BITS;
-    int nb_sectors = req->bytes >> BDRV_SECTOR_BITS;

-    assert((req->offset & (BDRV_SECTOR_SIZE - 1)) == 0);
-    assert((req->bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
-
-    return backup_do_cow(req->bs, sector_num, nb_sectors, NULL, true);
+    return backup_do_cow(req->bs, req->sector_num, req->nb_sectors, NULL);
 }

 static void backup_set_speed(BlockJob *job, int64_t speed, Error **errp)
@@ -210,7 +189,7 @@ static void backup_set_speed(BlockJob *job, int64_t speed, Error **errp)
    BackupBlockJob *s = container_of(job, BackupBlockJob, common);

    if (speed < 0) {
-        error_setg(errp, QERR_INVALID_PARAMETER, "speed");
+        error_set(errp, QERR_INVALID_PARAMETER, "speed");
        return;
    }
    ratelimit_set_speed(&s->limit, speed / BDRV_SECTOR_SIZE, SLICE_TIME);
@@ -220,41 +199,7 @@ static void backup_iostatus_reset(BlockJob *job)
 {
    BackupBlockJob *s = container_of(job, BackupBlockJob, common);

-    if (s->target->blk) {
-        blk_iostatus_reset(s->target->blk);
-    }
-}
-
-static void backup_cleanup_sync_bitmap(BackupBlockJob *job, int ret)
-{
-    BdrvDirtyBitmap *bm;
-    BlockDriverState *bs = job->common.bs;
-
-    if (ret < 0 || block_job_is_cancelled(&job->common)) {
-        /* Merge the successor back into the parent, delete nothing. */
-        bm = bdrv_reclaim_dirty_bitmap(bs, job->sync_bitmap, NULL);
-        assert(bm);
-    } else {
-        /* Everything is fine, delete this bitmap and install the backup. */
-        bm = bdrv_dirty_bitmap_abdicate(bs, job->sync_bitmap, NULL);
-        assert(bm);
-    }
-}
-
-static void backup_commit(BlockJob *job)
-{
-    BackupBlockJob *s = container_of(job, BackupBlockJob, common);
-    if (s->sync_bitmap) {
-        backup_cleanup_sync_bitmap(s, 0);
-    }
-}
-
-static void backup_abort(BlockJob *job)
-{
-    BackupBlockJob *s = container_of(job, BackupBlockJob, common);
-    if (s->sync_bitmap) {
-        backup_cleanup_sync_bitmap(s, -1);
-    }
+    bdrv_iostatus_reset(s->target);
 }

 static const BlockJobDriver backup_job_driver = {
@@ -262,8 +207,6 @@ static const BlockJobDriver backup_job_driver = {
    .job_type       = BLOCK_JOB_TYPE_BACKUP,
    .set_speed      = backup_set_speed,
    .iostatus_reset = backup_iostatus_reset,
-    .commit         = backup_commit,
-    .abort          = backup_abort,
 };

 static BlockErrorAction backup_error_action(BackupBlockJob *job,
@@ -278,112 +221,9 @@ static BlockErrorAction backup_error_action(BackupBlockJob *job,
    }
 }

-typedef struct {
-    int ret;
-} BackupCompleteData;
-
-static void backup_complete(BlockJob *job, void *opaque)
-{
-    BackupBlockJob *s = container_of(job, BackupBlockJob, common);
-    BackupCompleteData *data = opaque;
-
-    bdrv_unref(s->target);
-
-    block_job_completed(job, data->ret);
-    g_free(data);
-}
-
-static bool coroutine_fn yield_and_check(BackupBlockJob *job)
-{
-    if (block_job_is_cancelled(&job->common)) {
-        return true;
-    }
-
-    /* we need to yield so that bdrv_drain_all() returns.
-     * (without, VM does not reboot)
-     */
-    if (job->common.speed) {
-        uint64_t delay_ns = ratelimit_calculate_delay(&job->limit,
-                                                      job->sectors_read);
-        job->sectors_read = 0;
-        block_job_sleep_ns(&job->common, QEMU_CLOCK_REALTIME, delay_ns);
-    } else {
-        block_job_sleep_ns(&job->common, QEMU_CLOCK_REALTIME, 0);
-    }
-
-    if (block_job_is_cancelled(&job->common)) {
-        return true;
-    }
-
-    return false;
-}
-
-static int coroutine_fn backup_run_incremental(BackupBlockJob *job)
-{
-    bool error_is_read;
-    int ret = 0;
-    int clusters_per_iter;
-    uint32_t granularity;
-    int64_t sector;
-    int64_t cluster;
-    int64_t end;
-    int64_t last_cluster = -1;
-    int64_t sectors_per_cluster = cluster_size_sectors(job);
-    BlockDriverState *bs = job->common.bs;
-    HBitmapIter hbi;
-
-    granularity = bdrv_dirty_bitmap_granularity(job->sync_bitmap);
-    clusters_per_iter = MAX((granularity / job->cluster_size), 1);
-    bdrv_dirty_iter_init(job->sync_bitmap, &hbi);
-
-    /* Find the next dirty sector(s) */
-    while ((sector = hbitmap_iter_next(&hbi)) != -1) {
-        cluster = sector / sectors_per_cluster;
-
-        /* Fake progress updates for any clusters we skipped */
-        if (cluster != last_cluster + 1) {
-            job->common.offset += ((cluster - last_cluster - 1) *
-                                   job->cluster_size);
-        }
-
-        for (end = cluster + clusters_per_iter; cluster < end; cluster++) {
-            do {
-                if (yield_and_check(job)) {
-                    return ret;
-                }
-                ret = backup_do_cow(bs, cluster * sectors_per_cluster,
-                                    sectors_per_cluster, &error_is_read,
-                                    false);
-                if ((ret < 0) &&
-                    backup_error_action(job, error_is_read, -ret) ==
-                    BLOCK_ERROR_ACTION_REPORT) {
-                    return ret;
-                }
-            } while (ret < 0);
-        }
-
-        /* If the bitmap granularity is smaller than the backup granularity,
-         * we need to advance the iterator pointer to the next cluster. */
-        if (granularity < job->cluster_size) {
-            bdrv_set_dirty_iter(&hbi, cluster * sectors_per_cluster);
-        }
-
-        last_cluster = cluster - 1;
-    }
-
-    /* Play some final catchup with the progress meter */
-    end = DIV_ROUND_UP(job->common.len, job->cluster_size);
-    if (last_cluster + 1 < end) {
-        job->common.offset += ((end - last_cluster - 1) * job->cluster_size);
-    }
-
-    return ret;
-}
-
 static void coroutine_fn backup_run(void *opaque)
 {
    BackupBlockJob *job = opaque;
-    BackupCompleteData *data;
    BlockDriverState *bs = job->common.bs;
    BlockDriverState *target = job->target;
    BlockdevOnError on_target_error = job->on_target_error;
@@ -391,22 +231,20 @@ static void coroutine_fn backup_run(void *opaque)
        .notify = backup_before_write_notify,
    };
    int64_t start, end;
-    int64_t sectors_per_cluster = cluster_size_sectors(job);
    int ret = 0;

    QLIST_INIT(&job->inflight_reqs);
    qemu_co_rwlock_init(&job->flush_rwlock);

    start = 0;
-    end = DIV_ROUND_UP(job->common.len, job->cluster_size);
+    end = DIV_ROUND_UP(job->common.len / BDRV_SECTOR_SIZE,
+                       BACKUP_SECTORS_PER_CLUSTER);

-    job->done_bitmap = bitmap_new(end);
+    job->bitmap = hbitmap_alloc(end, 0);

    bdrv_set_enable_write_cache(target, true);
-    if (target->blk) {
-        blk_set_on_error(target->blk, on_target_error, on_target_error);
-        blk_iostatus_enable(target->blk);
-    }
+    bdrv_set_on_error(target, on_target_error, on_target_error);
+    bdrv_iostatus_enable(target);

    bdrv_add_before_write_notifier(bs, &before_write);

@@ -418,13 +256,28 @@ static void coroutine_fn backup_run(void *opaque)
            qemu_coroutine_yield();
            job->common.busy = true;
        }
-    } else if (job->sync_mode == MIRROR_SYNC_MODE_INCREMENTAL) {
-        ret = backup_run_incremental(job);
    } else {
        /* Both FULL and TOP SYNC_MODE's require copying.. */
        for (; start < end; start++) {
            bool error_is_read;
-            if (yield_and_check(job)) {
+
+            if (block_job_is_cancelled(&job->common)) {
+                break;
+            }
+
+            /* we need to yield so that qemu_aio_flush() returns.
+             * (without, VM does not reboot)
+             */
+            if (job->common.speed) {
+                uint64_t delay_ns = ratelimit_calculate_delay(
+                        &job->limit, job->sectors_read);
+                job->sectors_read = 0;
+                block_job_sleep_ns(&job->common, QEMU_CLOCK_REALTIME, delay_ns);
+            } else {
+                block_job_sleep_ns(&job->common, QEMU_CLOCK_REALTIME, 0);
+            }
+
+            if (block_job_is_cancelled(&job->common)) {
                break;
            }

@@ -435,7 +288,7 @@ static void coroutine_fn backup_run(void *opaque)
                /* Check to see if these blocks are already in the
                 * backing file. */

-                for (i = 0; i < sectors_per_cluster;) {
+                for (i = 0; i < BACKUP_SECTORS_PER_CLUSTER;) {
                    /* bdrv_is_allocated() only returns true/false based
                     * on the first set of sectors it comes across that
                     * are are all in the same state.
@@ -444,11 +297,11 @@ static void coroutine_fn backup_run(void *opaque)
                     * needed but at some point that is always the case. */
                    alloced =
                        bdrv_is_allocated(bs,
-                                start * sectors_per_cluster + i,
-                                sectors_per_cluster - i, &n);
+                                start * BACKUP_SECTORS_PER_CLUSTER + i,
+                                BACKUP_SECTORS_PER_CLUSTER - i, &n);
                    i += n;

-                    if (alloced == 1 || n == 0) {
+                    if (alloced == 1) {
                        break;
                    }
                }
@@ -460,13 +313,13 @@ static void coroutine_fn backup_run(void *opaque)
                }
            }
            /* FULL sync mode we copy the whole drive. */
-            ret = backup_do_cow(bs, start * sectors_per_cluster,
-                                sectors_per_cluster, &error_is_read, false);
+            ret = backup_do_cow(bs, start * BACKUP_SECTORS_PER_CLUSTER,
+                    BACKUP_SECTORS_PER_CLUSTER, &error_is_read);
            if (ret < 0) {
                /* Depending on error action, fail now or retry cluster */
                BlockErrorAction action =
                    backup_error_action(job, error_is_read, -ret);
-                if (action == BLOCK_ERROR_ACTION_REPORT) {
+                if (action == BDRV_ACTION_REPORT) {
                    break;
                } else {
                    start--;
@@ -481,82 +334,32 @@ static void coroutine_fn backup_run(void *opaque)
    /* wait until pending backup_do_cow() calls have completed */
    qemu_co_rwlock_wrlock(&job->flush_rwlock);
    qemu_co_rwlock_unlock(&job->flush_rwlock);
-    g_free(job->done_bitmap);

-    if (target->blk) {
-        blk_iostatus_disable(target->blk);
-    }
-    bdrv_op_unblock_all(target, job->common.blocker);
+    hbitmap_free(job->bitmap);

-    data = g_malloc(sizeof(*data));
-    data->ret = ret;
-    block_job_defer_to_main_loop(&job->common, backup_complete, data);
+    bdrv_iostatus_disable(target);
+    bdrv_unref(target);
+
+    block_job_completed(&job->common, ret);
 }

 void backup_start(BlockDriverState *bs, BlockDriverState *target,
                  int64_t speed, MirrorSyncMode sync_mode,
-                  BdrvDirtyBitmap *sync_bitmap,
                  BlockdevOnError on_source_error,
                  BlockdevOnError on_target_error,
-                  BlockCompletionFunc *cb, void *opaque,
-                  BlockJobTxn *txn, Error **errp)
+                  BlockDriverCompletionFunc *cb, void *opaque,
+                  Error **errp)
 {
    int64_t len;
-    BlockDriverInfo bdi;
-    int ret;

    assert(bs);
    assert(target);
    assert(cb);

-    if (bs == target) {
-        error_setg(errp, "Source and target cannot be the same");
-        return;
-    }
-
    if ((on_source_error == BLOCKDEV_ON_ERROR_STOP ||
         on_source_error == BLOCKDEV_ON_ERROR_ENOSPC) &&
-        (!bs->blk || !blk_iostatus_is_enabled(bs->blk))) {
-        error_setg(errp, QERR_INVALID_PARAMETER, "on-source-error");
-        return;
-    }
-
-    if (!bdrv_is_inserted(bs)) {
-        error_setg(errp, "Device is not inserted: %s",
-                   bdrv_get_device_name(bs));
-        return;
-    }
-
-    if (!bdrv_is_inserted(target)) {
-        error_setg(errp, "Device is not inserted: %s",
-                   bdrv_get_device_name(target));
-        return;
-    }
-
-    if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_BACKUP_SOURCE, errp)) {
-        return;
-    }
-
-    if (bdrv_op_is_blocked(target, BLOCK_OP_TYPE_BACKUP_TARGET, errp)) {
-        return;
-    }
-
-    if (sync_mode == MIRROR_SYNC_MODE_INCREMENTAL) {
-        if (!sync_bitmap) {
-            error_setg(errp, "must provide a valid bitmap name for "
-                             "\"incremental\" sync mode");
-            return;
-        }
-
-        /* Create a new bitmap, and freeze/disable this one. */
-        if (bdrv_dirty_bitmap_create_successor(bs, sync_bitmap, errp) < 0) {
-            return;
-        }
-    } else if (sync_bitmap) {
-        error_setg(errp,
-                   "a sync_bitmap was provided to backup_run, "
-                   "but received an incompatible sync_mode (%s)",
-                   MirrorSyncMode_lookup[sync_mode]);
+        !bdrv_iostatus_is_enabled(bs)) {
+        error_set(errp, QERR_INVALID_PARAMETER, "on-source-error");
        return;
    }

@@ -564,49 +367,20 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target,
    if (len < 0) {
        error_setg_errno(errp, -len, "unable to get length for '%s'",
                         bdrv_get_device_name(bs));
-        goto error;
+        return;
    }

    BackupBlockJob *job = block_job_create(&backup_job_driver, bs, speed,
                                           cb, opaque, errp);
    if (!job) {
-        goto error;
+        return;
    }

    job->on_source_error = on_source_error;
    job->on_target_error = on_target_error;
    job->target = target;
    job->sync_mode = sync_mode;
-    job->sync_bitmap = sync_mode == MIRROR_SYNC_MODE_INCREMENTAL ?
-                       sync_bitmap : NULL;
-
-    /* If there is no backing file on the target, we cannot rely on COW if our
-     * backup cluster size is smaller than the target cluster size. Even for
-     * targets with a backing file, try to avoid COW if possible. */
-    ret = bdrv_get_info(job->target, &bdi);
-    if (ret < 0 && !target->backing) {
-        error_setg_errno(errp, -ret,
-            "Couldn't determine the cluster size of the target image, "
-            "which has no backing file");
-        error_append_hint(errp,
-            "Aborting, since this may create an unusable destination image\n");
-        goto error;
-    } else if (ret < 0 && target->backing) {
-        /* Not fatal; just trudge on ahead. */
-        job->cluster_size = BACKUP_CLUSTER_SIZE_DEFAULT;
-    } else {
-        job->cluster_size = MAX(BACKUP_CLUSTER_SIZE_DEFAULT, bdi.cluster_size);
-    }
-
-    bdrv_op_block_all(target, job->common.blocker);
    job->common.len = len;
    job->common.co = qemu_coroutine_create(backup_run);
-    block_job_txn_add_job(txn, &job->common);
    qemu_coroutine_enter(job->common.co, job);
-    return;
-
- error:
-    if (sync_bitmap) {
-        bdrv_reclaim_dirty_bitmap(bs, sync_bitmap, NULL);
-    }
 }
--- a/block/blkdebug.c
+++ b/block/blkdebug.c
@@ -22,28 +22,22 @@
 * THE SOFTWARE.
 */

-#include "qemu/osdep.h"
 #include "qemu-common.h"
 #include "qemu/config-file.h"
 #include "block/block_int.h"
 #include "qemu/module.h"
-#include "qapi/qmp/qbool.h"
-#include "qapi/qmp/qdict.h"
-#include "qapi/qmp/qint.h"
-#include "qapi/qmp/qstring.h"
-#include "sysemu/qtest.h"

 typedef struct BDRVBlkdebugState {
    int state;
    int new_state;

-    QLIST_HEAD(, BlkdebugRule) rules[BLKDBG__MAX];
+    QLIST_HEAD(, BlkdebugRule) rules[BLKDBG_EVENT_MAX];
    QSIMPLEQ_HEAD(, BlkdebugRule) active_rules;
    QLIST_HEAD(, BlkdebugSuspendedReq) suspended_reqs;
 } BDRVBlkdebugState;

 typedef struct BlkdebugAIOCB {
-    BlockAIOCB common;
+    BlockDriverAIOCB common;
    QEMUBH *bh;
    int ret;
 } BlkdebugAIOCB;
@@ -54,8 +48,11 @@ typedef struct BlkdebugSuspendedReq {
    QLIST_ENTRY(BlkdebugSuspendedReq) next;
 } BlkdebugSuspendedReq;

+static void blkdebug_aio_cancel(BlockDriverAIOCB *blockacb);
+
 static const AIOCBInfo blkdebug_aiocb_info = {
-    .aiocb_size    = sizeof(BlkdebugAIOCB),
+    .aiocb_size = sizeof(BlkdebugAIOCB),
+    .cancel     = blkdebug_aio_cancel,
 };

 enum {
@@ -65,7 +62,7 @@ enum {
 };

 typedef struct BlkdebugRule {
-    BlkdebugEvent event;
+    BlkDebugEvent event;
    int action;
    int state;
    union {
@@ -144,12 +141,59 @@ static QemuOptsList *config_groups[] = {
    NULL
 };

-static int get_event_by_name(const char *name, BlkdebugEvent *event)
+static const char *event_names[BLKDBG_EVENT_MAX] = {
+    [BLKDBG_L1_UPDATE]                      = "l1_update",
+    [BLKDBG_L1_GROW_ALLOC_TABLE]            = "l1_grow.alloc_table",
+    [BLKDBG_L1_GROW_WRITE_TABLE]            = "l1_grow.write_table",
+    [BLKDBG_L1_GROW_ACTIVATE_TABLE]         = "l1_grow.activate_table",
+
+    [BLKDBG_L2_LOAD]                        = "l2_load",
+    [BLKDBG_L2_UPDATE]                      = "l2_update",
+    [BLKDBG_L2_UPDATE_COMPRESSED]           = "l2_update_compressed",
+    [BLKDBG_L2_ALLOC_COW_READ]              = "l2_alloc.cow_read",
+    [BLKDBG_L2_ALLOC_WRITE]                 = "l2_alloc.write",
+
+    [BLKDBG_READ_AIO]                       = "read_aio",
+    [BLKDBG_READ_BACKING_AIO]               = "read_backing_aio",
+    [BLKDBG_READ_COMPRESSED]                = "read_compressed",
+
+    [BLKDBG_WRITE_AIO]                      = "write_aio",
+    [BLKDBG_WRITE_COMPRESSED]               = "write_compressed",
+
+    [BLKDBG_VMSTATE_LOAD]                   = "vmstate_load",
+    [BLKDBG_VMSTATE_SAVE]                   = "vmstate_save",
+
+    [BLKDBG_COW_READ]                       = "cow_read",
+    [BLKDBG_COW_WRITE]                      = "cow_write",
+
+    [BLKDBG_REFTABLE_LOAD]                  = "reftable_load",
+    [BLKDBG_REFTABLE_GROW]                  = "reftable_grow",
+    [BLKDBG_REFTABLE_UPDATE]                = "reftable_update",
+
+    [BLKDBG_REFBLOCK_LOAD]                  = "refblock_load",
+    [BLKDBG_REFBLOCK_UPDATE]                = "refblock_update",
+    [BLKDBG_REFBLOCK_UPDATE_PART]           = "refblock_update_part",
+    [BLKDBG_REFBLOCK_ALLOC]                 = "refblock_alloc",
+    [BLKDBG_REFBLOCK_ALLOC_HOOKUP]          = "refblock_alloc.hookup",
+    [BLKDBG_REFBLOCK_ALLOC_WRITE]           = "refblock_alloc.write",
+    [BLKDBG_REFBLOCK_ALLOC_WRITE_BLOCKS]    = "refblock_alloc.write_blocks",
+    [BLKDBG_REFBLOCK_ALLOC_WRITE_TABLE]     = "refblock_alloc.write_table",
+    [BLKDBG_REFBLOCK_ALLOC_SWITCH_TABLE]    = "refblock_alloc.switch_table",
+
+    [BLKDBG_CLUSTER_ALLOC]                  = "cluster_alloc",
+    [BLKDBG_CLUSTER_ALLOC_BYTES]            = "cluster_alloc_bytes",
+    [BLKDBG_CLUSTER_FREE]                   = "cluster_free",
+
+    [BLKDBG_FLUSH_TO_OS]                    = "flush_to_os",
+    [BLKDBG_FLUSH_TO_DISK]                  = "flush_to_disk",
+};
+
+static int get_event_by_name(const char *name, BlkDebugEvent *event)
 {
    int i;

-    for (i = 0; i < BLKDBG__MAX; i++) {
-        if (!strcmp(BlkdebugEvent_lookup[i], name)) {
+    for (i = 0; i < BLKDBG_EVENT_MAX; i++) {
+        if (!strcmp(event_names[i], name)) {
            *event = i;
            return 0;
        }
@@ -163,21 +207,17 @@ struct add_rule_data {
    int action;
 };

-static int add_rule(void *opaque, QemuOpts *opts, Error **errp)
+static int add_rule(QemuOpts *opts, void *opaque)
 {
    struct add_rule_data *d = opaque;
    BDRVBlkdebugState *s = d->s;
    const char* event_name;
-    BlkdebugEvent event;
+    BlkDebugEvent event;
    struct BlkdebugRule *rule;

    /* Find the right event for the rule */
    event_name = qemu_opt_get(opts, "event");
-    if (!event_name) {
-        error_setg(errp, "Missing event name for rule");
-        return -1;
-    } else if (get_event_by_name(event_name, &event) < 0) {
-        error_setg(errp, "Invalid event name \"%s\"", event_name);
+    if (!event_name || get_event_by_name(event_name, &event) < 0) {
        return -1;
    }

@@ -231,60 +271,34 @@ static void remove_rule(BlkdebugRule *rule)
    g_free(rule);
 }

-static int read_config(BDRVBlkdebugState *s, const char *filename,
-                       QDict *options, Error **errp)
+static int read_config(BDRVBlkdebugState *s, const char *filename)
 {
-    FILE *f = NULL;
+    FILE *f;
    int ret;
    struct add_rule_data d;
-    Error *local_err = NULL;

-    if (filename) {
-        f = fopen(filename, "r");
-        if (f == NULL) {
-            error_setg_errno(errp, errno, "Could not read blkdebug config file");
-            return -errno;
-        }
-
-        ret = qemu_config_parse(f, config_groups, filename);
-        if (ret < 0) {
-            error_setg(errp, "Could not parse blkdebug config file");
-            ret = -EINVAL;
-            goto fail;
-        }
+    f = fopen(filename, "r");
+    if (f == NULL) {
+        return -errno;
    }

-    qemu_config_parse_qdict(options, config_groups, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        ret = -EINVAL;
+    ret = qemu_config_parse(f, config_groups, filename);
+    if (ret < 0) {
        goto fail;
    }

    d.s = s;
    d.action = ACTION_INJECT_ERROR;
-    qemu_opts_foreach(&inject_error_opts, add_rule, &d, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        ret = -EINVAL;
-        goto fail;
-    }
+    qemu_opts_foreach(&inject_error_opts, add_rule, &d, 0);

    d.action = ACTION_SET_STATE;
-    qemu_opts_foreach(&set_state_opts, add_rule, &d, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        ret = -EINVAL;
-        goto fail;
-    }
+    qemu_opts_foreach(&set_state_opts, add_rule, &d, 0);

    ret = 0;
 fail:
    qemu_opts_reset(&inject_error_opts);
    qemu_opts_reset(&set_state_opts);
-    if (f) {
-        fclose(f);
-    }
+    fclose(f);
    return ret;
 }

@@ -296,9 +310,7 @@ static void blkdebug_parse_filename(const char *filename, QDict *options,

    /* Parse the blkdebug: prefix */
    if (!strstart(filename, "blkdebug:", &filename)) {
-        /* There was no prefix; therefore, all options have to be already
-           present in the QDict (except for the filename) */
-        qdict_put(options, "x-image", qstring_from_str(filename));
+        error_setg(errp, "File name string must start with 'blkdebug:'");
        return;
    }

@@ -334,11 +346,6 @@ static QemuOptsList runtime_opts = {
            .type = QEMU_OPT_STRING,
            .help = "[internal use only, will be removed]",
        },
-        {
-            .name = "align",
-            .type = QEMU_OPT_SIZE,
-            .help = "Required alignment in bytes",
-        },
        { /* end of list */ }
    },
 };
@@ -349,53 +356,46 @@ static int blkdebug_open(BlockDriverState *bs, QDict *options, int flags,
    BDRVBlkdebugState *s = bs->opaque;
    QemuOpts *opts;
    Error *local_err = NULL;
-    const char *config;
-    uint64_t align;
+    const char *filename, *config;
    int ret;

-    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
+    opts = qemu_opts_create_nofail(&runtime_opts);
    qemu_opts_absorb_qdict(opts, options, &local_err);
-    if (local_err) {
+    if (error_is_set(&local_err)) {
        error_propagate(errp, local_err);
        ret = -EINVAL;
-        goto out;
+        goto fail;
    }

-    /* Read rules from config file or command line options */
+    /* Read rules from config file */
    config = qemu_opt_get(opts, "config");
-    ret = read_config(s, config, options, errp);
-    if (ret) {
-        goto out;
+    if (config) {
+        ret = read_config(s, config);
+        if (ret < 0) {
+            error_setg_errno(errp, -ret, "Could not read blkdebug config file");
+            goto fail;
+        }
    }

    /* Set initial state */
    s->state = 1;

-    /* Open the image file */
-    bs->file = bdrv_open_child(qemu_opt_get(opts, "x-image"), options, "image",
-                               bs, &child_file, false, &local_err);
-    if (local_err) {
+    /* Open the backing file */
+    filename = qemu_opt_get(opts, "x-image");
+    if (filename == NULL) {
+        error_setg(errp, "Could not retrieve image file name");
        ret = -EINVAL;
-        error_propagate(errp, local_err);
-        goto out;
+        goto fail;
    }

-    /* Set request alignment */
-    align = qemu_opt_get_size(opts, "align", bs->request_alignment);
-    if (align > 0 && align < INT_MAX && !(align & (align - 1))) {
-        bs->request_alignment = align;
-    } else {
-        error_setg(errp, "Invalid alignment");
-        ret = -EINVAL;
-        goto fail_unref;
+    ret = bdrv_file_open(&bs->file, filename, NULL, flags, &local_err);
+    if (ret < 0) {
+        error_propagate(errp, local_err);
+        goto fail;
    }

    ret = 0;
-    goto out;
-
-fail_unref:
-    bdrv_unref_child(bs, bs->file);
-out:
+fail:
    qemu_opts_del(opts);
    return ret;
 }
@@ -405,40 +405,44 @@ static void error_callback_bh(void *opaque)
    struct BlkdebugAIOCB *acb = opaque;
    qemu_bh_delete(acb->bh);
    acb->common.cb(acb->common.opaque, acb->ret);
-    qemu_aio_unref(acb);
+    qemu_aio_release(acb);
 }

-static BlockAIOCB *inject_error(BlockDriverState *bs,
-    BlockCompletionFunc *cb, void *opaque, BlkdebugRule *rule)
+static void blkdebug_aio_cancel(BlockDriverAIOCB *blockacb)
+{
+    BlkdebugAIOCB *acb = container_of(blockacb, BlkdebugAIOCB, common);
+    qemu_aio_release(acb);
+}
+
+static BlockDriverAIOCB *inject_error(BlockDriverState *bs,
+    BlockDriverCompletionFunc *cb, void *opaque, BlkdebugRule *rule)
 {
    BDRVBlkdebugState *s = bs->opaque;
    int error = rule->options.inject.error;
    struct BlkdebugAIOCB *acb;
    QEMUBH *bh;
-    bool immediately = rule->options.inject.immediately;

    if (rule->options.inject.once) {
-        QSIMPLEQ_REMOVE(&s->active_rules, rule, BlkdebugRule, active_next);
-        remove_rule(rule);
+        QSIMPLEQ_INIT(&s->active_rules);
    }

-    if (immediately) {
+    if (rule->options.inject.immediately) {
        return NULL;
    }

    acb = qemu_aio_get(&blkdebug_aiocb_info, bs, cb, opaque);
    acb->ret = -error;

-    bh = aio_bh_new(bdrv_get_aio_context(bs), error_callback_bh, acb);
+    bh = qemu_bh_new(error_callback_bh, acb);
    acb->bh = bh;
    qemu_bh_schedule(bh);

    return &acb->common;
 }

-static BlockAIOCB *blkdebug_aio_readv(BlockDriverState *bs,
+static BlockDriverAIOCB *blkdebug_aio_readv(BlockDriverState *bs,
    int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-    BlockCompletionFunc *cb, void *opaque)
+    BlockDriverCompletionFunc *cb, void *opaque)
 {
    BDRVBlkdebugState *s = bs->opaque;
    BlkdebugRule *rule = NULL;
@@ -455,13 +459,12 @@ static BlockAIOCB *blkdebug_aio_readv(BlockDriverState *bs,
        return inject_error(bs, cb, opaque, rule);
    }

-    return bdrv_aio_readv(bs->file->bs, sector_num, qiov, nb_sectors,
-                          cb, opaque);
+    return bdrv_aio_readv(bs->file, sector_num, qiov, nb_sectors, cb, opaque);
 }

-static BlockAIOCB *blkdebug_aio_writev(BlockDriverState *bs,
+static BlockDriverAIOCB *blkdebug_aio_writev(BlockDriverState *bs,
    int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-    BlockCompletionFunc *cb, void *opaque)
+    BlockDriverCompletionFunc *cb, void *opaque)
 {
    BDRVBlkdebugState *s = bs->opaque;
    BlkdebugRule *rule = NULL;
@@ -478,27 +481,7 @@ static BlockAIOCB *blkdebug_aio_writev(BlockDriverState *bs,
        return inject_error(bs, cb, opaque, rule);
    }

-    return bdrv_aio_writev(bs->file->bs, sector_num, qiov, nb_sectors,
-                           cb, opaque);
-}
-
-static BlockAIOCB *blkdebug_aio_flush(BlockDriverState *bs,
-    BlockCompletionFunc *cb, void *opaque)
-{
-    BDRVBlkdebugState *s = bs->opaque;
-    BlkdebugRule *rule = NULL;
-
-    QSIMPLEQ_FOREACH(rule, &s->active_rules, active_next) {
-        if (rule->options.inject.sector == -1) {
-            break;
-        }
-    }
-
-    if (rule && rule->options.inject.error) {
-        return inject_error(bs, cb, opaque, rule);
-    }
-
-    return bdrv_aio_flush(bs->file->bs, cb, opaque);
+    return bdrv_aio_writev(bs->file, sector_num, qiov, nb_sectors, cb, opaque);
 }


@@ -508,7 +491,7 @@ static void blkdebug_close(BlockDriverState *bs)
    BlkdebugRule *rule, *next;
    int i;

-    for (i = 0; i < BLKDBG__MAX; i++) {
+    for (i = 0; i < BLKDBG_EVENT_MAX; i++) {
        QLIST_FOREACH_SAFE(rule, &s->rules[i], next, next) {
            remove_rule(rule);
        }
@@ -528,13 +511,9 @@ static void suspend_request(BlockDriverState *bs, BlkdebugRule *rule)
    remove_rule(rule);
    QLIST_INSERT_HEAD(&s->suspended_reqs, &r, next);

-    if (!qtest_enabled()) {
-        printf("blkdebug: Suspended request '%s'\n", r.tag);
-    }
+    printf("blkdebug: Suspended request '%s'\n", r.tag);
    qemu_coroutine_yield();
-    if (!qtest_enabled()) {
-        printf("blkdebug: Resuming request '%s'\n", r.tag);
-    }
+    printf("blkdebug: Resuming request '%s'\n", r.tag);

    QLIST_REMOVE(&r, next);
    g_free(r.tag);
@@ -571,13 +550,13 @@ static bool process_rule(BlockDriverState *bs, struct BlkdebugRule *rule,
    return injected;
 }

-static void blkdebug_debug_event(BlockDriverState *bs, BlkdebugEvent event)
+static void blkdebug_debug_event(BlockDriverState *bs, BlkDebugEvent event)
 {
    BDRVBlkdebugState *s = bs->opaque;
    struct BlkdebugRule *rule, *next;
    bool injected;

-    assert((int)event >= 0 && event < BLKDBG__MAX);
+    assert((int)event >= 0 && event < BLKDBG_EVENT_MAX);

    injected = false;
    s->new_state = s->state;
@@ -592,7 +571,7 @@ static int blkdebug_debug_breakpoint(BlockDriverState *bs, const char *event,
 {
    BDRVBlkdebugState *s = bs->opaque;
    struct BlkdebugRule *rule;
-    BlkdebugEvent blkdebug_event;
+    BlkDebugEvent blkdebug_event;

    if (get_event_by_name(event, &blkdebug_event) < 0) {
        return -ENOENT;
@@ -615,9 +594,9 @@ static int blkdebug_debug_breakpoint(BlockDriverState *bs, const char *event,
 static int blkdebug_debug_resume(BlockDriverState *bs, const char *tag)
 {
    BDRVBlkdebugState *s = bs->opaque;
-    BlkdebugSuspendedReq *r, *next;
+    BlkdebugSuspendedReq *r;

-    QLIST_FOREACH_SAFE(r, &s->suspended_reqs, next, next) {
+    QLIST_FOREACH(r, &s->suspended_reqs, next) {
        if (!strcmp(r->tag, tag)) {
            qemu_coroutine_enter(r->co, NULL);
            return 0;
@@ -626,31 +605,6 @@ static int blkdebug_debug_resume(BlockDriverState *bs, const char *tag)
    return -ENOENT;
 }

-static int blkdebug_debug_remove_breakpoint(BlockDriverState *bs,
-                                            const char *tag)
-{
-    BDRVBlkdebugState *s = bs->opaque;
-    BlkdebugSuspendedReq *r, *r_next;
-    BlkdebugRule *rule, *next;
-    int i, ret = -ENOENT;
-
-    for (i = 0; i < BLKDBG__MAX; i++) {
-        QLIST_FOREACH_SAFE(rule, &s->rules[i], next, next) {
-            if (rule->action == ACTION_SUSPEND &&
-                !strcmp(rule->options.suspend.tag, tag)) {
-                remove_rule(rule);
-                ret = 0;
-            }
-        }
-    }
-    QLIST_FOREACH_SAFE(r, &s->suspended_reqs, next, r_next) {
-        if (!strcmp(r->tag, tag)) {
-            qemu_coroutine_enter(r->co, NULL);
-            ret = 0;
-        }
-    }
-    return ret;
-}

 static bool blkdebug_debug_is_suspended(BlockDriverState *bs, const char *tag)
 {
@@ -667,62 +621,7 @@ static bool blkdebug_debug_is_suspended(BlockDriverState *bs, const char *tag)

 static int64_t blkdebug_getlength(BlockDriverState *bs)
 {
-    return bdrv_getlength(bs->file->bs);
-}
-
-static int blkdebug_truncate(BlockDriverState *bs, int64_t offset)
-{
-    return bdrv_truncate(bs->file->bs, offset);
-}
-
-static void blkdebug_refresh_filename(BlockDriverState *bs, QDict *options)
-{
-    QDict *opts;
-    const QDictEntry *e;
-    bool force_json = false;
-
-    for (e = qdict_first(options); e; e = qdict_next(options, e)) {
-        if (strcmp(qdict_entry_key(e), "config") &&
-            strcmp(qdict_entry_key(e), "x-image"))
-        {
-            force_json = true;
-            break;
-        }
-    }
-
-    if (force_json && !bs->file->bs->full_open_options) {
-        /* The config file cannot be recreated, so creating a plain filename
-         * is impossible */
-        return;
-    }
-
-    if (!force_json && bs->file->bs->exact_filename[0]) {
-        snprintf(bs->exact_filename, sizeof(bs->exact_filename),
-                 "blkdebug:%s:%s",
-                 qdict_get_try_str(options, "config") ?: "",
-                 bs->file->bs->exact_filename);
-    }
-
-    opts = qdict_new();
-    qdict_put_obj(opts, "driver", QOBJECT(qstring_from_str("blkdebug")));
-
-    QINCREF(bs->file->bs->full_open_options);
-    qdict_put_obj(opts, "image", QOBJECT(bs->file->bs->full_open_options));
-
-    for (e = qdict_first(options); e; e = qdict_next(options, e)) {
-        if (strcmp(qdict_entry_key(e), "x-image")) {
-            qobject_incref(qdict_entry_value(e));
-            qdict_put_obj(opts, qdict_entry_key(e), qdict_entry_value(e));
-        }
-    }
-
-    bs->full_open_options = opts;
-}
-
-static int blkdebug_reopen_prepare(BDRVReopenState *reopen_state,
-                                   BlockReopenQueue *queue, Error **errp)
-{
-    return 0;
+    return bdrv_getlength(bs->file);
 }

 static BlockDriver bdrv_blkdebug = {
@@ -733,19 +632,13 @@ static BlockDriver bdrv_blkdebug = {
    .bdrv_parse_filename    = blkdebug_parse_filename,
    .bdrv_file_open         = blkdebug_open,
    .bdrv_close             = blkdebug_close,
-    .bdrv_reopen_prepare    = blkdebug_reopen_prepare,
    .bdrv_getlength         = blkdebug_getlength,
-    .bdrv_truncate          = blkdebug_truncate,
-    .bdrv_refresh_filename  = blkdebug_refresh_filename,

    .bdrv_aio_readv         = blkdebug_aio_readv,
    .bdrv_aio_writev        = blkdebug_aio_writev,
-    .bdrv_aio_flush         = blkdebug_aio_flush,

    .bdrv_debug_event           = blkdebug_debug_event,
    .bdrv_debug_breakpoint      = blkdebug_debug_breakpoint,
-    .bdrv_debug_remove_breakpoint
-                                = blkdebug_debug_remove_breakpoint,
    .bdrv_debug_resume          = blkdebug_debug_resume,
    .bdrv_debug_is_suspended    = blkdebug_debug_is_suspended,
 };
--- a/block/blkverify.c
+++ b/block/blkverify.c
@@ -7,19 +7,17 @@
 * See the COPYING file in the top-level directory.
 */

-#include "qemu/osdep.h"
+#include <stdarg.h>
 #include "qemu/sockets.h" /* for EINPROGRESS on Windows */
 #include "block/block_int.h"
-#include "qapi/qmp/qdict.h"
-#include "qapi/qmp/qstring.h"

 typedef struct {
-    BdrvChild *test_file;
+    BlockDriverState *test_file;
 } BDRVBlkverifyState;

 typedef struct BlkverifyAIOCB BlkverifyAIOCB;
 struct BlkverifyAIOCB {
-    BlockAIOCB common;
+    BlockDriverAIOCB common;
    QEMUBH *bh;

    /* Request metadata */
@@ -29,6 +27,7 @@ struct BlkverifyAIOCB {

    int ret;                    /* first completed request's result */
    unsigned int done;          /* completion counter */
+    bool *finished;             /* completion signal for cancel */

    QEMUIOVector *qiov;         /* user I/O vector */
    QEMUIOVector raw_qiov;      /* cloned I/O vector for raw file */
@@ -37,8 +36,21 @@ struct BlkverifyAIOCB {
    void (*verify)(BlkverifyAIOCB *acb);
 };

+static void blkverify_aio_cancel(BlockDriverAIOCB *blockacb)
+{
+    BlkverifyAIOCB *acb = (BlkverifyAIOCB *)blockacb;
+    bool finished = false;
+
+    /* Wait until request completes, invokes its callback, and frees itself */
+    acb->finished = &finished;
+    while (!finished) {
+        qemu_aio_wait();
+    }
+}
+
 static const AIOCBInfo blkverify_aiocb_info = {
    .aiocb_size         = sizeof(BlkverifyAIOCB),
+    .cancel             = blkverify_aio_cancel,
 };

 static void GCC_FMT_ATTR(2, 3) blkverify_err(BlkverifyAIOCB *acb,
@@ -66,9 +78,7 @@ static void blkverify_parse_filename(const char *filename, QDict *options,

    /* Parse the blkverify: prefix */
    if (!strstart(filename, "blkverify:", &filename)) {
-        /* There was no prefix; therefore, all options have to be already
-           present in the QDict (except for the filename) */
-        qdict_put(options, "x-image", qstring_from_str(filename));
+        error_setg(errp, "File name string must start with 'blkverify:'");
        return;
    }

@@ -112,41 +122,50 @@ static int blkverify_open(BlockDriverState *bs, QDict *options, int flags,
    BDRVBlkverifyState *s = bs->opaque;
    QemuOpts *opts;
    Error *local_err = NULL;
+    const char *filename, *raw;
    int ret;

-    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
+    opts = qemu_opts_create_nofail(&runtime_opts);
    qemu_opts_absorb_qdict(opts, options, &local_err);
-    if (local_err) {
+    if (error_is_set(&local_err)) {
        error_propagate(errp, local_err);
        ret = -EINVAL;
        goto fail;
    }

-    /* Open the raw file */
-    bs->file = bdrv_open_child(qemu_opt_get(opts, "x-raw"), options, "raw",
-                               bs, &child_file, false, &local_err);
-    if (local_err) {
+    /* Parse the raw image filename */
+    raw = qemu_opt_get(opts, "x-raw");
+    if (raw == NULL) {
+        error_setg(errp, "Could not retrieve raw image filename");
        ret = -EINVAL;
+        goto fail;
+    }
+
+    ret = bdrv_file_open(&bs->file, raw, NULL, flags, &local_err);
+    if (ret < 0) {
        error_propagate(errp, local_err);
        goto fail;
    }

    /* Open the test file */
-    s->test_file = bdrv_open_child(qemu_opt_get(opts, "x-image"), options,
-                                   "test", bs, &child_format, false,
-                                   &local_err);
-    if (local_err) {
+    filename = qemu_opt_get(opts, "x-image");
+    if (filename == NULL) {
+        error_setg(errp, "Could not retrieve test image filename");
        ret = -EINVAL;
+        goto fail;
+    }
+
+    s->test_file = bdrv_new("");
+    ret = bdrv_open(s->test_file, filename, NULL, flags, NULL, &local_err);
+    if (ret < 0) {
        error_propagate(errp, local_err);
+        bdrv_unref(s->test_file);
+        s->test_file = NULL;
        goto fail;
    }

    ret = 0;
 fail:
-    if (ret < 0) {
-        bdrv_unref_child(bs, bs->file);
-    }
-    qemu_opts_del(opts);
    return ret;
 }

@@ -154,7 +173,7 @@ static void blkverify_close(BlockDriverState *bs)
 {
    BDRVBlkverifyState *s = bs->opaque;

-    bdrv_unref_child(bs, s->test_file);
+    bdrv_unref(s->test_file);
    s->test_file = NULL;
 }

@@ -162,13 +181,117 @@ static int64_t blkverify_getlength(BlockDriverState *bs)
 {
    BDRVBlkverifyState *s = bs->opaque;

-    return bdrv_getlength(s->test_file->bs);
+    return bdrv_getlength(s->test_file);
+}
+
+/**
+ * Check that I/O vector contents are identical
+ *
+ * @a:          I/O vector
+ * @b:          I/O vector
+ * @ret:        Offset to first mismatching byte or -1 if match
+ */
+static ssize_t blkverify_iovec_compare(QEMUIOVector *a, QEMUIOVector *b)
+{
+    int i;
+    ssize_t offset = 0;
+
+    assert(a->niov == b->niov);
+    for (i = 0; i < a->niov; i++) {
+        size_t len = 0;
+        uint8_t *p = (uint8_t *)a->iov[i].iov_base;
+        uint8_t *q = (uint8_t *)b->iov[i].iov_base;
+
+        assert(a->iov[i].iov_len == b->iov[i].iov_len);
+        while (len < a->iov[i].iov_len && *p++ == *q++) {
+            len++;
+        }
+
+        offset += len;
+
+        if (len != a->iov[i].iov_len) {
+            return offset;
+        }
+    }
+    return -1;
+}
+
+typedef struct {
+    int src_index;
+    struct iovec *src_iov;
+    void *dest_base;
+} IOVectorSortElem;
+
+static int sortelem_cmp_src_base(const void *a, const void *b)
+{
+    const IOVectorSortElem *elem_a = a;
+    const IOVectorSortElem *elem_b = b;
+
+    /* Don't overflow */
+    if (elem_a->src_iov->iov_base < elem_b->src_iov->iov_base) {
+        return -1;
+    } else if (elem_a->src_iov->iov_base > elem_b->src_iov->iov_base) {
+        return 1;
+    } else {
+        return 0;
+    }
+}
+
+static int sortelem_cmp_src_index(const void *a, const void *b)
+{
+    const IOVectorSortElem *elem_a = a;
+    const IOVectorSortElem *elem_b = b;
+
+    return elem_a->src_index - elem_b->src_index;
+}
+
+/**
+ * Copy contents of I/O vector
+ *
+ * The relative relationships of overlapping iovecs are preserved.  This is
+ * necessary to ensure identical semantics in the cloned I/O vector.
+ */
+static void blkverify_iovec_clone(QEMUIOVector *dest, const QEMUIOVector *src,
+                                  void *buf)
+{
+    IOVectorSortElem sortelems[src->niov];
+    void *last_end;
+    int i;
+
+    /* Sort by source iovecs by base address */
+    for (i = 0; i < src->niov; i++) {
+        sortelems[i].src_index = i;
+        sortelems[i].src_iov = &src->iov[i];
+    }
+    qsort(sortelems, src->niov, sizeof(sortelems[0]), sortelem_cmp_src_base);
+
+    /* Allocate buffer space taking into account overlapping iovecs */
+    last_end = NULL;
+    for (i = 0; i < src->niov; i++) {
+        struct iovec *cur = sortelems[i].src_iov;
+        ptrdiff_t rewind = 0;
+
+        /* Detect overlap */
+        if (last_end && last_end > cur->iov_base) {
+            rewind = last_end - cur->iov_base;
+        }
+
+        sortelems[i].dest_base = buf - rewind;
+        buf += cur->iov_len - MIN(rewind, cur->iov_len);
+        last_end = MAX(cur->iov_base + cur->iov_len, last_end);
+    }
+
+    /* Sort by source iovec index and build destination iovec */
+    qsort(sortelems, src->niov, sizeof(sortelems[0]), sortelem_cmp_src_index);
+    for (i = 0; i < src->niov; i++) {
+        qemu_iovec_add(dest, sortelems[i].dest_base, src->iov[i].iov_len);
+    }
 }

 static BlkverifyAIOCB *blkverify_aio_get(BlockDriverState *bs, bool is_write,
                                         int64_t sector_num, QEMUIOVector *qiov,
                                         int nb_sectors,
-                                         BlockCompletionFunc *cb,
+                                         BlockDriverCompletionFunc *cb,
                                         void *opaque)
 {
    BlkverifyAIOCB *acb = qemu_aio_get(&blkverify_aiocb_info, bs, cb, opaque);
@@ -182,6 +305,7 @@ static BlkverifyAIOCB *blkverify_aio_get(BlockDriverState *bs, bool is_write,
    acb->qiov = qiov;
    acb->buf = NULL;
    acb->verify = NULL;
+    acb->finished = NULL;
    return acb;
 }

@@ -195,7 +319,10 @@ static void blkverify_aio_bh(void *opaque)
        qemu_vfree(acb->buf);
    }
    acb->common.cb(acb->common.opaque, acb->ret);
-    qemu_aio_unref(acb);
+    if (acb->finished) {
+        *acb->finished = true;
+    }
+    qemu_aio_release(acb);
 }

 static void blkverify_aio_cb(void *opaque, int ret)
@@ -216,8 +343,7 @@ static void blkverify_aio_cb(void *opaque, int ret)
            acb->verify(acb);
        }

-        acb->bh = aio_bh_new(bdrv_get_aio_context(acb->common.bs),
-                             blkverify_aio_bh, acb);
+        acb->bh = qemu_bh_new(blkverify_aio_bh, acb);
        qemu_bh_schedule(acb->bh);
        break;
    }
@@ -225,140 +351,73 @@ static void blkverify_aio_cb(void *opaque, int ret)

 static void blkverify_verify_readv(BlkverifyAIOCB *acb)
 {
-    ssize_t offset = qemu_iovec_compare(acb->qiov, &acb->raw_qiov);
+    ssize_t offset = blkverify_iovec_compare(acb->qiov, &acb->raw_qiov);
    if (offset != -1) {
        blkverify_err(acb, "contents mismatch in sector %" PRId64,
                      acb->sector_num + (int64_t)(offset / BDRV_SECTOR_SIZE));
    }
 }

-static BlockAIOCB *blkverify_aio_readv(BlockDriverState *bs,
+static BlockDriverAIOCB *blkverify_aio_readv(BlockDriverState *bs,
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockCompletionFunc *cb, void *opaque)
+        BlockDriverCompletionFunc *cb, void *opaque)
 {
    BDRVBlkverifyState *s = bs->opaque;
    BlkverifyAIOCB *acb = blkverify_aio_get(bs, false, sector_num, qiov,
                                            nb_sectors, cb, opaque);

    acb->verify = blkverify_verify_readv;
-    acb->buf = qemu_blockalign(bs->file->bs, qiov->size);
+    acb->buf = qemu_blockalign(bs->file, qiov->size);
    qemu_iovec_init(&acb->raw_qiov, acb->qiov->niov);
-    qemu_iovec_clone(&acb->raw_qiov, qiov, acb->buf);
+    blkverify_iovec_clone(&acb->raw_qiov, qiov, acb->buf);

-    bdrv_aio_readv(s->test_file->bs, sector_num, qiov, nb_sectors,
+    bdrv_aio_readv(s->test_file, sector_num, qiov, nb_sectors,
                   blkverify_aio_cb, acb);
-    bdrv_aio_readv(bs->file->bs, sector_num, &acb->raw_qiov, nb_sectors,
+    bdrv_aio_readv(bs->file, sector_num, &acb->raw_qiov, nb_sectors,
                   blkverify_aio_cb, acb);
    return &acb->common;
 }

-static BlockAIOCB *blkverify_aio_writev(BlockDriverState *bs,
+static BlockDriverAIOCB *blkverify_aio_writev(BlockDriverState *bs,
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockCompletionFunc *cb, void *opaque)
+        BlockDriverCompletionFunc *cb, void *opaque)
 {
    BDRVBlkverifyState *s = bs->opaque;
    BlkverifyAIOCB *acb = blkverify_aio_get(bs, true, sector_num, qiov,
                                            nb_sectors, cb, opaque);

-    bdrv_aio_writev(s->test_file->bs, sector_num, qiov, nb_sectors,
+    bdrv_aio_writev(s->test_file, sector_num, qiov, nb_sectors,
                    blkverify_aio_cb, acb);
-    bdrv_aio_writev(bs->file->bs, sector_num, qiov, nb_sectors,
+    bdrv_aio_writev(bs->file, sector_num, qiov, nb_sectors,
                    blkverify_aio_cb, acb);
    return &acb->common;
 }

-static BlockAIOCB *blkverify_aio_flush(BlockDriverState *bs,
-                                       BlockCompletionFunc *cb,
-                                       void *opaque)
+static BlockDriverAIOCB *blkverify_aio_flush(BlockDriverState *bs,
+                                             BlockDriverCompletionFunc *cb,
+                                             void *opaque)
 {
    BDRVBlkverifyState *s = bs->opaque;

    /* Only flush test file, the raw file is not important */
-    return bdrv_aio_flush(s->test_file->bs, cb, opaque);
-}
-
-static bool blkverify_recurse_is_first_non_filter(BlockDriverState *bs,
-                                                  BlockDriverState *candidate)
-{
-    BDRVBlkverifyState *s = bs->opaque;
-
-    bool perm = bdrv_recurse_is_first_non_filter(bs->file->bs, candidate);
-
-    if (perm) {
-        return true;
-    }
-
-    return bdrv_recurse_is_first_non_filter(s->test_file->bs, candidate);
-}
-
-/* Propagate AioContext changes to ->test_file */
-static void blkverify_detach_aio_context(BlockDriverState *bs)
-{
-    BDRVBlkverifyState *s = bs->opaque;
-
-    bdrv_detach_aio_context(s->test_file->bs);
-}
-
-static void blkverify_attach_aio_context(BlockDriverState *bs,
-                                         AioContext *new_context)
-{
-    BDRVBlkverifyState *s = bs->opaque;
-
-    bdrv_attach_aio_context(s->test_file->bs, new_context);
-}
-
-static void blkverify_refresh_filename(BlockDriverState *bs, QDict *options)
-{
-    BDRVBlkverifyState *s = bs->opaque;
-
-    /* bs->file->bs has already been refreshed */
-    bdrv_refresh_filename(s->test_file->bs);
-
-    if (bs->file->bs->full_open_options
-        && s->test_file->bs->full_open_options)
-    {
-        QDict *opts = qdict_new();
-        qdict_put_obj(opts, "driver", QOBJECT(qstring_from_str("blkverify")));
-
-        QINCREF(bs->file->bs->full_open_options);
-        qdict_put_obj(opts, "raw", QOBJECT(bs->file->bs->full_open_options));
-        QINCREF(s->test_file->bs->full_open_options);
-        qdict_put_obj(opts, "test",
-                      QOBJECT(s->test_file->bs->full_open_options));
-
-        bs->full_open_options = opts;
-    }
-
-    if (bs->file->bs->exact_filename[0]
-        && s->test_file->bs->exact_filename[0])
-    {
-        snprintf(bs->exact_filename, sizeof(bs->exact_filename),
-                 "blkverify:%s:%s",
-                 bs->file->bs->exact_filename,
-                 s->test_file->bs->exact_filename);
-    }
+    return bdrv_aio_flush(s->test_file, cb, opaque);
 }

 static BlockDriver bdrv_blkverify = {
-    .format_name                      = "blkverify",
-    .protocol_name                    = "blkverify",
-    .instance_size                    = sizeof(BDRVBlkverifyState),
+    .format_name            = "blkverify",
+    .protocol_name          = "blkverify",
+    .instance_size          = sizeof(BDRVBlkverifyState),

-    .bdrv_parse_filename              = blkverify_parse_filename,
-    .bdrv_file_open                   = blkverify_open,
-    .bdrv_close                       = blkverify_close,
-    .bdrv_getlength                   = blkverify_getlength,
-    .bdrv_refresh_filename            = blkverify_refresh_filename,
+    .bdrv_parse_filename    = blkverify_parse_filename,
+    .bdrv_file_open         = blkverify_open,
+    .bdrv_close             = blkverify_close,
+    .bdrv_getlength         = blkverify_getlength,

-    .bdrv_aio_readv                   = blkverify_aio_readv,
-    .bdrv_aio_writev                  = blkverify_aio_writev,
-    .bdrv_aio_flush                   = blkverify_aio_flush,
+    .bdrv_aio_readv         = blkverify_aio_readv,
+    .bdrv_aio_writev        = blkverify_aio_writev,
+    .bdrv_aio_flush         = blkverify_aio_flush,

-    .bdrv_attach_aio_context          = blkverify_attach_aio_context,
-    .bdrv_detach_aio_context          = blkverify_detach_aio_context,
-
-    .is_filter                        = true,
-    .bdrv_recurse_is_first_non_filter = blkverify_recurse_is_first_non_filter,
+    .bdrv_check_ext_snapshot = bdrv_check_ext_snapshot_forbidden,
 };

 static void bdrv_blkverify_init(void)
--- a/block/block-backend.c
+++ b/block/block-backend.c
--- a/block/bochs.c
+++ b/block/bochs.c
@@ -22,7 +22,6 @@
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */
-#include "qemu/osdep.h"
 #include "qemu-common.h"
 #include "block/block_int.h"
 #include "qemu/module.h"
@@ -104,7 +103,7 @@ static int bochs_open(BlockDriverState *bs, QDict *options, int flags,

    bs->read_only = 1; // no write support yet

-    ret = bdrv_pread(bs->file->bs, 0, &bochs, sizeof(bochs));
+    ret = bdrv_pread(bs->file, 0, &bochs, sizeof(bochs));
    if (ret < 0) {
        return ret;
    }
@@ -114,8 +113,7 @@ static int bochs_open(BlockDriverState *bs, QDict *options, int flags,
        strcmp(bochs.subtype, GROWING_TYPE) ||
 	((le32_to_cpu(bochs.version) != HEADER_VERSION) &&
 	(le32_to_cpu(bochs.version) != HEADER_V1))) {
-        error_setg(errp, "Image not in Bochs format");
-        return -EINVAL;
+        return -EMEDIUMTYPE;
    }

    if (le32_to_cpu(bochs.version) == HEADER_V1) {
@@ -132,13 +130,9 @@ static int bochs_open(BlockDriverState *bs, QDict *options, int flags,
        return -EFBIG;
    }

-    s->catalog_bitmap = g_try_new(uint32_t, s->catalog_size);
-    if (s->catalog_size && s->catalog_bitmap == NULL) {
-        error_setg(errp, "Could not allocate memory for catalog");
-        return -ENOMEM;
-    }
+    s->catalog_bitmap = g_malloc(s->catalog_size * 4);

-    ret = bdrv_pread(bs->file->bs, le32_to_cpu(bochs.header), s->catalog_bitmap,
+    ret = bdrv_pread(bs->file, le32_to_cpu(bochs.header), s->catalog_bitmap,
                     s->catalog_size * 4);
    if (ret < 0) {
        goto fail;
@@ -153,26 +147,16 @@ static int bochs_open(BlockDriverState *bs, QDict *options, int flags,
    s->extent_blocks = 1 + (le32_to_cpu(bochs.extent) - 1) / 512;

    s->extent_size = le32_to_cpu(bochs.extent);
-    if (s->extent_size < BDRV_SECTOR_SIZE) {
-        /* bximage actually never creates extents smaller than 4k */
-        error_setg(errp, "Extent size must be at least 512");
-        ret = -EINVAL;
-        goto fail;
-    } else if (!is_power_of_2(s->extent_size)) {
-        error_setg(errp, "Extent size %" PRIu32 " is not a power of two",
-                   s->extent_size);
-        ret = -EINVAL;
-        goto fail;
+    if (s->extent_size == 0) {
+        error_setg(errp, "Extent size may not be zero");
+        return -EINVAL;
    } else if (s->extent_size > 0x800000) {
        error_setg(errp, "Extent size %" PRIu32 " is too large",
                   s->extent_size);
-        ret = -EINVAL;
-        goto fail;
+        return -EINVAL;
    }

-    if (s->catalog_size < DIV_ROUND_UP(bs->total_sectors,
-                                       s->extent_size / BDRV_SECTOR_SIZE))
-    {
+    if (s->catalog_size < bs->total_sectors / s->extent_size) {
        error_setg(errp, "Catalog size is too small for this disk size");
        ret = -EINVAL;
        goto fail;
@@ -192,14 +176,13 @@ static int64_t seek_to_sector(BlockDriverState *bs, int64_t sector_num)
    uint64_t offset = sector_num * 512;
    uint64_t extent_index, extent_offset, bitmap_offset;
    char bitmap_entry;
-    int ret;

    // seek to sector
    extent_index = offset / s->extent_size;
    extent_offset = (offset % s->extent_size) / 512;

    if (s->catalog_bitmap[extent_index] == 0xffffffff) {
-	return 0; /* not allocated */
+	return -1; /* not allocated */
    }

    bitmap_offset = s->data_offset +
@@ -207,14 +190,13 @@ static int64_t seek_to_sector(BlockDriverState *bs, int64_t sector_num)
        (s->extent_blocks + s->bitmap_blocks));

    /* read in bitmap for current extent */
-    ret = bdrv_pread(bs->file->bs, bitmap_offset + (extent_offset / 8),
-                     &bitmap_entry, 1);
-    if (ret < 0) {
-        return ret;
+    if (bdrv_pread(bs->file, bitmap_offset + (extent_offset / 8),
+                   &bitmap_entry, 1) != 1) {
+        return -1;
    }

    if (!((bitmap_entry >> (extent_offset % 8)) & 1)) {
-	return 0; /* not allocated */
+	return -1; /* not allocated */
    }

    return bitmap_offset + (512 * (s->bitmap_blocks + extent_offset));
@@ -227,16 +209,13 @@ static int bochs_read(BlockDriverState *bs, int64_t sector_num,

    while (nb_sectors > 0) {
        int64_t block_offset = seek_to_sector(bs, sector_num);
-        if (block_offset < 0) {
-            return block_offset;
-        } else if (block_offset > 0) {
-            ret = bdrv_pread(bs->file->bs, block_offset, buf, 512);
-            if (ret < 0) {
-                return ret;
+        if (block_offset >= 0) {
+            ret = bdrv_pread(bs->file, block_offset, buf, 512);
+            if (ret != 512) {
+                return -1;
            }
-        } else {
+        } else
            memset(buf, 0, 512);
-        }
        nb_sectors--;
        sector_num++;
        buf += 512;
--- a/block/cloop.c
+++ b/block/cloop.c
@@ -21,7 +21,6 @@
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */
-#include "qemu/osdep.h"
 #include "qemu-common.h"
 #include "block/block_int.h"
 #include "qemu/module.h"
@@ -67,13 +66,13 @@ static int cloop_open(BlockDriverState *bs, QDict *options, int flags,
    bs->read_only = 1;

    /* read header */
-    ret = bdrv_pread(bs->file->bs, 128, &s->block_size, 4);
+    ret = bdrv_pread(bs->file, 128, &s->block_size, 4);
    if (ret < 0) {
        return ret;
    }
    s->block_size = be32_to_cpu(s->block_size);
    if (s->block_size % 512) {
-        error_setg(errp, "block_size %" PRIu32 " must be a multiple of 512",
+        error_setg(errp, "block_size %u must be a multiple of 512",
                   s->block_size);
        return -EINVAL;
    }
@@ -87,13 +86,13 @@ static int cloop_open(BlockDriverState *bs, QDict *options, int flags,
     * need a buffer this big.
     */
    if (s->block_size > MAX_BLOCK_SIZE) {
-        error_setg(errp, "block_size %" PRIu32 " must be %u MB or less",
+        error_setg(errp, "block_size %u must be %u MB or less",
                   s->block_size,
                   MAX_BLOCK_SIZE / (1024 * 1024));
        return -EINVAL;
    }

-    ret = bdrv_pread(bs->file->bs, 128 + 4, &s->n_blocks, 4);
+    ret = bdrv_pread(bs->file, 128 + 4, &s->n_blocks, 4);
    if (ret < 0) {
        return ret;
    }
@@ -102,7 +101,7 @@ static int cloop_open(BlockDriverState *bs, QDict *options, int flags,
    /* read offsets */
    if (s->n_blocks > (UINT32_MAX - 1) / sizeof(uint64_t)) {
        /* Prevent integer overflow */
-        error_setg(errp, "n_blocks %" PRIu32 " must be %zu or less",
+        error_setg(errp, "n_blocks %u must be %zu or less",
                   s->n_blocks,
                   (UINT32_MAX - 1) / sizeof(uint64_t));
        return -EINVAL;
@@ -117,14 +116,9 @@ static int cloop_open(BlockDriverState *bs, QDict *options, int flags,
                   "try increasing block size");
        return -EINVAL;
    }
+    s->offsets = g_malloc(offsets_size);

-    s->offsets = g_try_malloc(offsets_size);
-    if (s->offsets == NULL) {
-        error_setg(errp, "Could not allocate offsets table");
-        return -ENOMEM;
-    }
-
-    ret = bdrv_pread(bs->file->bs, 128 + 4 + 4, s->offsets, offsets_size);
+    ret = bdrv_pread(bs->file, 128 + 4 + 4, s->offsets, offsets_size);
    if (ret < 0) {
        goto fail;
    }
@@ -139,7 +133,7 @@ static int cloop_open(BlockDriverState *bs, QDict *options, int flags,

        if (s->offsets[i] < s->offsets[i - 1]) {
            error_setg(errp, "offsets not monotonically increasing at "
-                       "index %" PRIu32 ", image file is corrupt", i);
+                       "index %u, image file is corrupt", i);
            ret = -EINVAL;
            goto fail;
        }
@@ -152,8 +146,8 @@ static int cloop_open(BlockDriverState *bs, QDict *options, int flags,
         * ridiculous s->compressed_block allocation.
         */
        if (size > 2 * MAX_BLOCK_SIZE) {
-            error_setg(errp, "invalid compressed block size at index %" PRIu32
-                       ", image file is corrupt", i);
+            error_setg(errp, "invalid compressed block size at index %u, "
+                       "image file is corrupt", i);
            ret = -EINVAL;
            goto fail;
        }
@@ -164,20 +158,8 @@ static int cloop_open(BlockDriverState *bs, QDict *options, int flags,
    }

    /* initialize zlib engine */
-    s->compressed_block = g_try_malloc(max_compressed_block_size + 1);
-    if (s->compressed_block == NULL) {
-        error_setg(errp, "Could not allocate compressed_block");
-        ret = -ENOMEM;
-        goto fail;
-    }
-
-    s->uncompressed_block = g_try_malloc(s->block_size);
-    if (s->uncompressed_block == NULL) {
-        error_setg(errp, "Could not allocate uncompressed_block");
-        ret = -ENOMEM;
-        goto fail;
-    }
-
+    s->compressed_block = g_malloc(max_compressed_block_size + 1);
+    s->uncompressed_block = g_malloc(s->block_size);
    if (inflateInit(&s->zstream) != Z_OK) {
        ret = -EINVAL;
        goto fail;
@@ -204,8 +186,8 @@ static inline int cloop_read_block(BlockDriverState *bs, int block_num)
        int ret;
        uint32_t bytes = s->offsets[block_num + 1] - s->offsets[block_num];

-        ret = bdrv_pread(bs->file->bs, s->offsets[block_num],
-                         s->compressed_block, bytes);
+        ret = bdrv_pread(bs->file, s->offsets[block_num], s->compressed_block,
+                         bytes);
        if (ret != bytes) {
            return -1;
        }
--- a/block/commit.c
+++ b/block/commit.c
@@ -12,13 +12,10 @@
 *
 */

-#include "qemu/osdep.h"
 #include "trace.h"
 #include "block/block_int.h"
 #include "block/blockjob.h"
-#include "qapi/qmp/qerror.h"
 #include "qemu/ratelimit.h"
-#include "sysemu/block-backend.h"

 enum {
    /*
@@ -40,7 +37,6 @@ typedef struct CommitBlockJob {
    BlockdevOnError on_error;
    int base_flags;
    int orig_overlay_flags;
-    char *backing_file_str;
 } CommitBlockJob;

 static int coroutine_fn commit_populate(BlockDriverState *bs,
@@ -63,50 +59,17 @@ static int coroutine_fn commit_populate(BlockDriverState *bs,
    return 0;
 }

-typedef struct {
-    int ret;
-} CommitCompleteData;
-
-static void commit_complete(BlockJob *job, void *opaque)
+static void coroutine_fn commit_run(void *opaque)
 {
-    CommitBlockJob *s = container_of(job, CommitBlockJob, common);
-    CommitCompleteData *data = opaque;
+    CommitBlockJob *s = opaque;
    BlockDriverState *active = s->active;
    BlockDriverState *top = s->top;
    BlockDriverState *base = s->base;
    BlockDriverState *overlay_bs;
-    int ret = data->ret;
-
-    if (!block_job_is_cancelled(&s->common) && ret == 0) {
-        /* success */
-        ret = bdrv_drop_intermediate(active, top, base, s->backing_file_str);
-    }
-
-    /* restore base open flags here if appropriate (e.g., change the base back
-     * to r/o). These reopens do not need to be atomic, since we won't abort
-     * even on failure here */
-    if (s->base_flags != bdrv_get_flags(base)) {
-        bdrv_reopen(base, s->base_flags, NULL);
-    }
-    overlay_bs = bdrv_find_overlay(active, top);
-    if (overlay_bs && s->orig_overlay_flags != bdrv_get_flags(overlay_bs)) {
-        bdrv_reopen(overlay_bs, s->orig_overlay_flags, NULL);
-    }
-    g_free(s->backing_file_str);
-    block_job_completed(&s->common, ret);
-    g_free(data);
-}
-
-static void coroutine_fn commit_run(void *opaque)
-{
-    CommitBlockJob *s = opaque;
-    CommitCompleteData *data;
-    BlockDriverState *top = s->top;
-    BlockDriverState *base = s->base;
    int64_t sector_num, end;
    int ret = 0;
    int n = 0;
-    void *buf = NULL;
+    void *buf;
    int bytes_written = 0;
    int64_t base_len;

@@ -114,18 +77,18 @@ static void coroutine_fn commit_run(void *opaque)


    if (s->common.len < 0) {
-        goto out;
+        goto exit_restore_reopen;
    }

    ret = base_len = bdrv_getlength(base);
    if (base_len < 0) {
-        goto out;
+        goto exit_restore_reopen;
    }

    if (base_len < s->common.len) {
        ret = bdrv_truncate(base, s->common.len);
        if (ret) {
-            goto out;
+            goto exit_restore_reopen;
        }
    }

@@ -164,7 +127,7 @@ wait:
            if (s->on_error == BLOCKDEV_ON_ERROR_STOP ||
                s->on_error == BLOCKDEV_ON_ERROR_REPORT||
                (s->on_error == BLOCKDEV_ON_ERROR_ENOSPC && ret == -ENOSPC)) {
-                goto out;
+                goto exit_free_buf;
            } else {
                n = 0;
                continue;
@@ -176,12 +139,27 @@ wait:

    ret = 0;

-out:
+    if (!block_job_is_cancelled(&s->common) && sector_num == end) {
+        /* success */
+        ret = bdrv_drop_intermediate(active, top, base);
+    }
+
+exit_free_buf:
    qemu_vfree(buf);

-    data = g_malloc(sizeof(*data));
-    data->ret = ret;
-    block_job_defer_to_main_loop(&s->common, commit_complete, data);
+exit_restore_reopen:
+    /* restore base open flags here if appropriate (e.g., change the base back
+     * to r/o). These reopens do not need to be atomic, since we won't abort
+     * even on failure here */
+    if (s->base_flags != bdrv_get_flags(base)) {
+        bdrv_reopen(base, s->base_flags, NULL);
+    }
+    overlay_bs = bdrv_find_overlay(active, top);
+    if (overlay_bs && s->orig_overlay_flags != bdrv_get_flags(overlay_bs)) {
+        bdrv_reopen(overlay_bs, s->orig_overlay_flags, NULL);
+    }
+
+    block_job_completed(&s->common, ret);
 }

 static void commit_set_speed(BlockJob *job, int64_t speed, Error **errp)
@@ -189,7 +167,7 @@ static void commit_set_speed(BlockJob *job, int64_t speed, Error **errp)
    CommitBlockJob *s = container_of(job, CommitBlockJob, common);

    if (speed < 0) {
-        error_setg(errp, QERR_INVALID_PARAMETER, "speed");
+        error_set(errp, QERR_INVALID_PARAMETER, "speed");
        return;
    }
    ratelimit_set_speed(&s->limit, speed / BDRV_SECTOR_SIZE, SLICE_TIME);
@@ -203,8 +181,8 @@ static const BlockJobDriver commit_job_driver = {

 void commit_start(BlockDriverState *bs, BlockDriverState *base,
                  BlockDriverState *top, int64_t speed,
-                  BlockdevOnError on_error, BlockCompletionFunc *cb,
-                  void *opaque, const char *backing_file_str, Error **errp)
+                  BlockdevOnError on_error, BlockDriverCompletionFunc *cb,
+                  void *opaque, Error **errp)
 {
    CommitBlockJob *s;
    BlockReopenQueue *reopen_queue = NULL;
@@ -215,12 +193,18 @@ void commit_start(BlockDriverState *bs, BlockDriverState *base,

    if ((on_error == BLOCKDEV_ON_ERROR_STOP ||
         on_error == BLOCKDEV_ON_ERROR_ENOSPC) &&
-        (!bs->blk || !blk_iostatus_is_enabled(bs->blk))) {
-        error_setg(errp, "Invalid parameter combination");
+        !bdrv_iostatus_is_enabled(bs)) {
+        error_set(errp, QERR_INVALID_PARAMETER_COMBINATION);
+        return;
+    }
+
+    /* Once we support top == active layer, remove this check */
+    if (top == bs) {
+        error_setg(errp,
+                   "Top image as the active layer is currently unsupported");
        return;
    }

-    assert(top != bs);
    if (top == base) {
        error_setg(errp, "Invalid files for merge: top and base are the same");
        return;
@@ -237,14 +221,14 @@ void commit_start(BlockDriverState *bs, BlockDriverState *base,
    orig_overlay_flags = bdrv_get_flags(overlay_bs);

    /* convert base & overlay_bs to r/w, if necessary */
-    if (!(orig_overlay_flags & BDRV_O_RDWR)) {
-        reopen_queue = bdrv_reopen_queue(reopen_queue, overlay_bs, NULL,
-                                         orig_overlay_flags | BDRV_O_RDWR);
-    }
    if (!(orig_base_flags & BDRV_O_RDWR)) {
-        reopen_queue = bdrv_reopen_queue(reopen_queue, base, NULL,
+        reopen_queue = bdrv_reopen_queue(reopen_queue, base,
                                         orig_base_flags | BDRV_O_RDWR);
    }
+    if (!(orig_overlay_flags & BDRV_O_RDWR)) {
+        reopen_queue = bdrv_reopen_queue(reopen_queue, overlay_bs,
+                                         orig_overlay_flags | BDRV_O_RDWR);
+    }
    if (reopen_queue) {
        bdrv_reopen_multiple(reopen_queue, &local_err);
        if (local_err != NULL) {
@@ -266,8 +250,6 @@ void commit_start(BlockDriverState *bs, BlockDriverState *base,
    s->base_flags          = orig_base_flags;
    s->orig_overlay_flags  = orig_overlay_flags;

-    s->backing_file_str = g_strdup(backing_file_str);
-
    s->on_error = on_error;
    s->common.co = qemu_coroutine_create(commit_run);

--- a/block/cow.c
+++ b/block/cow.c
@@ -0,0 +1,403 @@
+/*
+ * Block driver for the COW format
+ *
+ * Copyright (c) 2004 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "qemu-common.h"
+#include "block/block_int.h"
+#include "qemu/module.h"
+
+/**************************************************************/
+/* COW block driver using file system holes */
+
+/* user mode linux compatible COW file */
+#define COW_MAGIC 0x4f4f4f4d  /* MOOO */
+#define COW_VERSION 2
+
+struct cow_header_v2 {
+    uint32_t magic;
+    uint32_t version;
+    char backing_file[1024];
+    int32_t mtime;
+    uint64_t size;
+    uint32_t sectorsize;
+};
+
+typedef struct BDRVCowState {
+    CoMutex lock;
+    int64_t cow_sectors_offset;
+} BDRVCowState;
+
+static int cow_probe(const uint8_t *buf, int buf_size, const char *filename)
+{
+    const struct cow_header_v2 *cow_header = (const void *)buf;
+
+    if (buf_size >= sizeof(struct cow_header_v2) &&
+        be32_to_cpu(cow_header->magic) == COW_MAGIC &&
+        be32_to_cpu(cow_header->version) == COW_VERSION)
+        return 100;
+    else
+        return 0;
+}
+
+static int cow_open(BlockDriverState *bs, QDict *options, int flags,
+                    Error **errp)
+{
+    BDRVCowState *s = bs->opaque;
+    struct cow_header_v2 cow_header;
+    int bitmap_size;
+    int64_t size;
+    int ret;
+
+    /* see if it is a cow image */
+    ret = bdrv_pread(bs->file, 0, &cow_header, sizeof(cow_header));
+    if (ret < 0) {
+        goto fail;
+    }
+
+    if (be32_to_cpu(cow_header.magic) != COW_MAGIC) {
+        ret = -EMEDIUMTYPE;
+        goto fail;
+    }
+
+    if (be32_to_cpu(cow_header.version) != COW_VERSION) {
+        char version[64];
+        snprintf(version, sizeof(version),
+               "COW version %d", cow_header.version);
+        qerror_report(QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
+            bs->device_name, "cow", version);
+        ret = -ENOTSUP;
+        goto fail;
+    }
+
+    /* cow image found */
+    size = be64_to_cpu(cow_header.size);
+    bs->total_sectors = size / 512;
+
+    pstrcpy(bs->backing_file, sizeof(bs->backing_file),
+            cow_header.backing_file);
+
+    bitmap_size = ((bs->total_sectors + 7) >> 3) + sizeof(cow_header);
+    s->cow_sectors_offset = (bitmap_size + 511) & ~511;
+    qemu_co_mutex_init(&s->lock);
+    return 0;
+ fail:
+    return ret;
+}
+
+/*
+ * XXX(hch): right now these functions are extremely inefficient.
+ * We should just read the whole bitmap we'll need in one go instead.
+ */
+static inline int cow_set_bit(BlockDriverState *bs, int64_t bitnum, bool *first)
+{
+    uint64_t offset = sizeof(struct cow_header_v2) + bitnum / 8;
+    uint8_t bitmap;
+    int ret;
+
+    ret = bdrv_pread(bs->file, offset, &bitmap, sizeof(bitmap));
+    if (ret < 0) {
+       return ret;
+    }
+
+    if (bitmap & (1 << (bitnum % 8))) {
+        return 0;
+    }
+
+    if (*first) {
+        ret = bdrv_flush(bs->file);
+        if (ret < 0) {
+            return ret;
+        }
+        *first = false;
+    }
+
+    bitmap |= (1 << (bitnum % 8));
+
+    ret = bdrv_pwrite(bs->file, offset, &bitmap, sizeof(bitmap));
+    if (ret < 0) {
+       return ret;
+    }
+    return 0;
+}
+
+#define BITS_PER_BITMAP_SECTOR (512 * 8)
+
+/* Cannot use bitmap.c on big-endian machines.  */
+static int cow_test_bit(int64_t bitnum, const uint8_t *bitmap)
+{
+    return (bitmap[bitnum / 8] & (1 << (bitnum & 7))) != 0;
+}
+
+static int cow_find_streak(const uint8_t *bitmap, int value, int start, int nb_sectors)
+{
+    int streak_value = value ? 0xFF : 0;
+    int last = MIN(start + nb_sectors, BITS_PER_BITMAP_SECTOR);
+    int bitnum = start;
+    while (bitnum < last) {
+        if ((bitnum & 7) == 0 && bitmap[bitnum / 8] == streak_value) {
+            bitnum += 8;
+            continue;
+        }
+        if (cow_test_bit(bitnum, bitmap) == value) {
+            bitnum++;
+            continue;
+        }
+        break;
+    }
+    return MIN(bitnum, last) - start;
+}
+
+/* Return true if first block has been changed (ie. current version is
+ * in COW file).  Set the number of continuous blocks for which that
+ * is true. */
+static int coroutine_fn cow_co_is_allocated(BlockDriverState *bs,
+        int64_t sector_num, int nb_sectors, int *num_same)
+{
+    int64_t bitnum = sector_num + sizeof(struct cow_header_v2) * 8;
+    uint64_t offset = (bitnum / 8) & -BDRV_SECTOR_SIZE;
+    uint8_t bitmap[BDRV_SECTOR_SIZE];
+    int ret;
+    int changed;
+
+    ret = bdrv_pread(bs->file, offset, &bitmap, sizeof(bitmap));
+    if (ret < 0) {
+        return ret;
+    }
+
+    bitnum &= BITS_PER_BITMAP_SECTOR - 1;
+    changed = cow_test_bit(bitnum, bitmap);
+    *num_same = cow_find_streak(bitmap, changed, bitnum, nb_sectors);
+    return changed;
+}
+
+static int64_t coroutine_fn cow_co_get_block_status(BlockDriverState *bs,
+        int64_t sector_num, int nb_sectors, int *num_same)
+{
+    BDRVCowState *s = bs->opaque;
+    int ret = cow_co_is_allocated(bs, sector_num, nb_sectors, num_same);
+    int64_t offset = s->cow_sectors_offset + (sector_num << BDRV_SECTOR_BITS);
+    if (ret < 0) {
+        return ret;
+    }
+    return (ret ? BDRV_BLOCK_DATA : 0) | offset | BDRV_BLOCK_OFFSET_VALID;
+}
+
+static int cow_update_bitmap(BlockDriverState *bs, int64_t sector_num,
+        int nb_sectors)
+{
+    int error = 0;
+    int i;
+    bool first = true;
+
+    for (i = 0; i < nb_sectors; i++) {
+        error = cow_set_bit(bs, sector_num + i, &first);
+        if (error) {
+            break;
+        }
+    }
+
+    return error;
+}
+
+static int coroutine_fn cow_read(BlockDriverState *bs, int64_t sector_num,
+                                 uint8_t *buf, int nb_sectors)
+{
+    BDRVCowState *s = bs->opaque;
+    int ret, n;
+
+    while (nb_sectors > 0) {
+        ret = cow_co_is_allocated(bs, sector_num, nb_sectors, &n);
+        if (ret < 0) {
+            return ret;
+        }
+        if (ret) {
+            ret = bdrv_pread(bs->file,
+                        s->cow_sectors_offset + sector_num * 512,
+                        buf, n * 512);
+            if (ret < 0) {
+                return ret;
+            }
+        } else {
+            if (bs->backing_hd) {
+                /* read from the base image */
+                ret = bdrv_read(bs->backing_hd, sector_num, buf, n);
+                if (ret < 0) {
+                    return ret;
+                }
+            } else {
+                memset(buf, 0, n * 512);
+            }
+        }
+        nb_sectors -= n;
+        sector_num += n;
+        buf += n * 512;
+    }
+    return 0;
+}
+
+static coroutine_fn int cow_co_read(BlockDriverState *bs, int64_t sector_num,
+                                    uint8_t *buf, int nb_sectors)
+{
+    int ret;
+    BDRVCowState *s = bs->opaque;
+    qemu_co_mutex_lock(&s->lock);
+    ret = cow_read(bs, sector_num, buf, nb_sectors);
+    qemu_co_mutex_unlock(&s->lock);
+    return ret;
+}
+
+static int cow_write(BlockDriverState *bs, int64_t sector_num,
+                     const uint8_t *buf, int nb_sectors)
+{
+    BDRVCowState *s = bs->opaque;
+    int ret;
+
+    ret = bdrv_pwrite(bs->file, s->cow_sectors_offset + sector_num * 512,
+                      buf, nb_sectors * 512);
+    if (ret < 0) {
+        return ret;
+    }
+
+    return cow_update_bitmap(bs, sector_num, nb_sectors);
+}
+
+static coroutine_fn int cow_co_write(BlockDriverState *bs, int64_t sector_num,
+                                     const uint8_t *buf, int nb_sectors)
+{
+    int ret;
+    BDRVCowState *s = bs->opaque;
+    qemu_co_mutex_lock(&s->lock);
+    ret = cow_write(bs, sector_num, buf, nb_sectors);
+    qemu_co_mutex_unlock(&s->lock);
+    return ret;
+}
+
+static void cow_close(BlockDriverState *bs)
+{
+}
+
+static int cow_create(const char *filename, QEMUOptionParameter *options,
+                      Error **errp)
+{
+    struct cow_header_v2 cow_header;
+    struct stat st;
+    int64_t image_sectors = 0;
+    const char *image_filename = NULL;
+    Error *local_err = NULL;
+    int ret;
+    BlockDriverState *cow_bs;
+
+    /* Read out options */
+    while (options && options->name) {
+        if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
+            image_sectors = options->value.n / 512;
+        } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) {
+            image_filename = options->value.s;
+        }
+        options++;
+    }
+
+    ret = bdrv_create_file(filename, options, &local_err);
+    if (ret < 0) {
+        qerror_report_err(local_err);
+        error_free(local_err);
+        return ret;
+    }
+
+    ret = bdrv_file_open(&cow_bs, filename, NULL, BDRV_O_RDWR, &local_err);
+    if (ret < 0) {
+        qerror_report_err(local_err);
+        error_free(local_err);
+        return ret;
+    }
+
+    memset(&cow_header, 0, sizeof(cow_header));
+    cow_header.magic = cpu_to_be32(COW_MAGIC);
+    cow_header.version = cpu_to_be32(COW_VERSION);
+    if (image_filename) {
+        /* Note: if no file, we put a dummy mtime */
+        cow_header.mtime = cpu_to_be32(0);
+
+        if (stat(image_filename, &st) != 0) {
+            goto mtime_fail;
+        }
+        cow_header.mtime = cpu_to_be32(st.st_mtime);
+    mtime_fail:
+        pstrcpy(cow_header.backing_file, sizeof(cow_header.backing_file),
+                image_filename);
+    }
+    cow_header.sectorsize = cpu_to_be32(512);
+    cow_header.size = cpu_to_be64(image_sectors * 512);
+    ret = bdrv_pwrite(cow_bs, 0, &cow_header, sizeof(cow_header));
+    if (ret < 0) {
+        goto exit;
+    }
+
+    /* resize to include at least all the bitmap */
+    ret = bdrv_truncate(cow_bs,
+        sizeof(cow_header) + ((image_sectors + 7) >> 3));
+    if (ret < 0) {
+        goto exit;
+    }
+
+exit:
+    bdrv_unref(cow_bs);
+    return ret;
+}
+
+static QEMUOptionParameter cow_create_options[] = {
+    {
+        .name = BLOCK_OPT_SIZE,
+        .type = OPT_SIZE,
+        .help = "Virtual disk size"
+    },
+    {
+        .name = BLOCK_OPT_BACKING_FILE,
+        .type = OPT_STRING,
+        .help = "File name of a base image"
+    },
+    { NULL }
+};
+
+static BlockDriver bdrv_cow = {
+    .format_name    = "cow",
+    .instance_size  = sizeof(BDRVCowState),
+
+    .bdrv_probe     = cow_probe,
+    .bdrv_open      = cow_open,
+    .bdrv_close     = cow_close,
+    .bdrv_create    = cow_create,
+    .bdrv_has_zero_init     = bdrv_has_zero_init_1,
+
+    .bdrv_read              = cow_co_read,
+    .bdrv_write             = cow_co_write,
+    .bdrv_co_get_block_status   = cow_co_get_block_status,
+
+    .create_options = cow_create_options,
+};
+
+static void bdrv_cow_init(void)
+{
+    bdrv_register(&bdrv_cow);
+}
+
+block_init(bdrv_cow_init);
--- a/block/curl.c
+++ b/block/curl.c
@@ -21,16 +21,11 @@
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */
-#include "qemu/osdep.h"
 #include "qemu-common.h"
-#include "qemu/error-report.h"
 #include "block/block_int.h"
-#include "qapi/qmp/qbool.h"
-#include "qapi/qmp/qstring.h"
-#include "crypto/secret.h"
 #include <curl/curl.h>

-// #define DEBUG_CURL
+// #define DEBUG
 // #define DEBUG_VERBOSE

 #ifdef DEBUG_CURL
@@ -42,21 +37,6 @@
 #if LIBCURL_VERSION_NUM >= 0x071000
 /* The multi interface timer callback was introduced in 7.16.0 */
 #define NEED_CURL_TIMER_CALLBACK
-#define HAVE_SOCKET_ACTION
-#endif
-
-#ifndef HAVE_SOCKET_ACTION
-/* If curl_multi_socket_action isn't available, define it statically here in
- * terms of curl_multi_socket. Note that ev_bitmask will be ignored, which is
- * less efficient but still safe. */
-static CURLMcode __curl_multi_socket_action(CURLM *multi_handle,
-                                            curl_socket_t sockfd,
-                                            int ev_bitmask,
-                                            int *running_handles)
-{
-    return curl_multi_socket(multi_handle, sockfd, running_handles);
-}
-#define curl_multi_socket_action __curl_multi_socket_action
 #endif

 #define PROTOCOLS (CURLPROTO_HTTP | CURLPROTO_HTTPS | \
@@ -66,28 +46,16 @@ static CURLMcode __curl_multi_socket_action(CURLM *multi_handle,
 #define CURL_NUM_STATES 8
 #define CURL_NUM_ACB    8
 #define SECTOR_SIZE     512
-#define READ_AHEAD_DEFAULT (256 * 1024)
-#define CURL_TIMEOUT_DEFAULT 5
-#define CURL_TIMEOUT_MAX 10000
+#define READ_AHEAD_SIZE (256 * 1024)

 #define FIND_RET_NONE   0
 #define FIND_RET_OK     1
 #define FIND_RET_WAIT   2

-#define CURL_BLOCK_OPT_URL       "url"
-#define CURL_BLOCK_OPT_READAHEAD "readahead"
-#define CURL_BLOCK_OPT_SSLVERIFY "sslverify"
-#define CURL_BLOCK_OPT_TIMEOUT "timeout"
-#define CURL_BLOCK_OPT_COOKIE    "cookie"
-#define CURL_BLOCK_OPT_USERNAME "username"
-#define CURL_BLOCK_OPT_PASSWORD_SECRET "password-secret"
-#define CURL_BLOCK_OPT_PROXY_USERNAME "proxy-username"
-#define CURL_BLOCK_OPT_PROXY_PASSWORD_SECRET "proxy-password-secret"
-
 struct BDRVCURLState;

 typedef struct CURLAIOCB {
-    BlockAIOCB common;
+    BlockDriverAIOCB common;
    QEMUBH *bh;
    QEMUIOVector *qiov;

@@ -103,7 +71,6 @@ typedef struct CURLState
    struct BDRVCURLState *s;
    CURLAIOCB *acb[CURL_NUM_ACB];
    CURL *curl;
-    curl_socket_t sock_fd;
    char *orig_buf;
    size_t buf_start;
    size_t buf_off;
@@ -120,20 +87,11 @@ typedef struct BDRVCURLState {
    CURLState states[CURL_NUM_STATES];
    char *url;
    size_t readahead_size;
-    bool sslverify;
-    uint64_t timeout;
-    char *cookie;
    bool accept_range;
-    AioContext *aio_context;
-    char *username;
-    char *password;
-    char *proxyusername;
-    char *proxypassword;
 } BDRVCURLState;

 static void curl_clean_state(CURLState *s);
 static void curl_multi_do(void *arg);
-static void curl_multi_read(void *arg);

 #ifdef NEED_CURL_TIMER_CALLBACK
 static int curl_timer_cb(CURLM *multi, long timeout_ms, void *opaque)
@@ -153,31 +111,21 @@ static int curl_timer_cb(CURLM *multi, long timeout_ms, void *opaque)
 #endif

 static int curl_sock_cb(CURL *curl, curl_socket_t fd, int action,
-                        void *userp, void *sp)
+                        void *s, void *sp)
 {
-    BDRVCURLState *s;
-    CURLState *state = NULL;
-    curl_easy_getinfo(curl, CURLINFO_PRIVATE, (char **)&state);
-    state->sock_fd = fd;
-    s = state->s;
-
    DPRINTF("CURL (AIO): Sock action %d on fd %d\n", action, fd);
    switch (action) {
        case CURL_POLL_IN:
-            aio_set_fd_handler(s->aio_context, fd, false,
-                               curl_multi_read, NULL, state);
+            qemu_aio_set_fd_handler(fd, curl_multi_do, NULL, s);
            break;
        case CURL_POLL_OUT:
-            aio_set_fd_handler(s->aio_context, fd, false,
-                               NULL, curl_multi_do, state);
+            qemu_aio_set_fd_handler(fd, NULL, curl_multi_do, s);
            break;
        case CURL_POLL_INOUT:
-            aio_set_fd_handler(s->aio_context, fd, false,
-                               curl_multi_read, curl_multi_do, state);
+            qemu_aio_set_fd_handler(fd, curl_multi_do, curl_multi_do, s);
            break;
        case CURL_POLL_REMOVE:
-            aio_set_fd_handler(s->aio_context, fd, false,
-                               NULL, NULL, NULL);
+            qemu_aio_set_fd_handler(fd, NULL, NULL, NULL);
            break;
    }

@@ -207,7 +155,7 @@ static size_t curl_read_cb(void *ptr, size_t size, size_t nmemb, void *opaque)
    DPRINTF("CURL: Just reading %zd bytes\n", realsize);

    if (!s || !s->orig_buf)
-        return 0;
+        goto read_end;

    if (s->buf_off >= s->buf_len) {
        /* buffer full, read nothing */
@@ -227,11 +175,12 @@ static size_t curl_read_cb(void *ptr, size_t size, size_t nmemb, void *opaque)
            qemu_iovec_from_buf(acb->qiov, 0, s->orig_buf + acb->start,
                                acb->end - acb->start);
            acb->common.cb(acb->common.opaque, 0);
-            qemu_aio_unref(acb);
+            qemu_aio_release(acb);
            s->acb[i] = NULL;
        }
    }

+read_end:
    return realsize;
 }

@@ -266,8 +215,7 @@ static int curl_find_buf(BDRVCURLState *s, size_t start, size_t len,
        }

        // Wait for unfinished chunks
-        if (state->in_use &&
-            (start >= state->buf_start) &&
+        if ((start >= state->buf_start) &&
            (start <= buf_fend) &&
            (end >= state->buf_start) &&
            (end <= buf_fend))
@@ -289,81 +237,68 @@ static int curl_find_buf(BDRVCURLState *s, size_t start, size_t len,
    return FIND_RET_NONE;
 }

-static void curl_multi_check_completion(BDRVCURLState *s)
+static void curl_multi_read(BDRVCURLState *s)
 {
    int msgs_in_queue;

    /* Try to find done transfers, so we can free the easy
     * handle again. */
-    for (;;) {
+    do {
        CURLMsg *msg;
        msg = curl_multi_info_read(s->multi, &msgs_in_queue);

-        /* Quit when there are no more completions */
        if (!msg)
            break;
-
-        if (msg->msg == CURLMSG_DONE) {
-            CURLState *state = NULL;
-            curl_easy_getinfo(msg->easy_handle, CURLINFO_PRIVATE,
-                              (char **)&state);
-
-            /* ACBs for successful messages get completed in curl_read_cb */
-            if (msg->data.result != CURLE_OK) {
-                int i;
-                static int errcount = 100;
-
-                /* Don't lose the original error message from curl, since
-                 * it contains extra data.
-                 */
-                if (errcount > 0) {
-                    error_report("curl: %s", state->errmsg);
-                    if (--errcount == 0) {
-                        error_report("curl: further errors suppressed");
-                    }
-                }
-
-                for (i = 0; i < CURL_NUM_ACB; i++) {
-                    CURLAIOCB *acb = state->acb[i];
-
-                    if (acb == NULL) {
-                        continue;
-                    }
-
-                    acb->common.cb(acb->common.opaque, -EPROTO);
-                    qemu_aio_unref(acb);
-                    state->acb[i] = NULL;
-                }
-            }
-
-            curl_clean_state(state);
+        if (msg->msg == CURLMSG_NONE)
            break;
+
+        switch (msg->msg) {
+            case CURLMSG_DONE:
+            {
+                CURLState *state = NULL;
+                curl_easy_getinfo(msg->easy_handle, CURLINFO_PRIVATE, (char**)&state);
+
+                /* ACBs for successful messages get completed in curl_read_cb */
+                if (msg->data.result != CURLE_OK) {
+                    int i;
+                    for (i = 0; i < CURL_NUM_ACB; i++) {
+                        CURLAIOCB *acb = state->acb[i];
+
+                        if (acb == NULL) {
+                            continue;
+                        }
+
+                        acb->common.cb(acb->common.opaque, -EIO);
+                        qemu_aio_release(acb);
+                        state->acb[i] = NULL;
+                    }
+                }
+
+                curl_clean_state(state);
+                break;
+            }
+            default:
+                msgs_in_queue = 0;
+                break;
        }
-    }
+    } while(msgs_in_queue);
 }

 static void curl_multi_do(void *arg)
 {
-    CURLState *s = (CURLState *)arg;
+    BDRVCURLState *s = (BDRVCURLState *)arg;
    int running;
    int r;

-    if (!s->s->multi) {
+    if (!s->multi) {
        return;
    }

    do {
-        r = curl_multi_socket_action(s->s->multi, s->sock_fd, 0, &running);
+        r = curl_multi_socket_all(s->multi, &running);
    } while(r == CURLM_CALL_MULTI_PERFORM);

-}
-
-static void curl_multi_read(void *arg)
-{
-    CURLState *s = (CURLState *)arg;
-
-    curl_multi_do(arg);
-    curl_multi_check_completion(s->s);
+    curl_multi_read(s);
 }

 static void curl_multi_timeout_do(void *arg)
@@ -378,13 +313,13 @@ static void curl_multi_timeout_do(void *arg)

    curl_multi_socket_action(s->multi, CURL_SOCKET_TIMEOUT, 0, &running);

-    curl_multi_check_completion(s);
+    curl_multi_read(s);
 #else
    abort();
 #endif
 }

-static CURLState *curl_init_state(BlockDriverState *bs, BDRVCURLState *s)
+static CURLState *curl_init_state(BDRVCURLState *s)
 {
    CURLState *state = NULL;
    int i, j;
@@ -402,62 +337,44 @@ static CURLState *curl_init_state(BlockDriverState *bs, BDRVCURLState *s)
            break;
        }
        if (!state) {
-            aio_poll(bdrv_get_aio_context(bs), true);
+            g_usleep(100);
+            curl_multi_do(s);
        }
    } while(!state);

-    if (!state->curl) {
-        state->curl = curl_easy_init();
-        if (!state->curl) {
-            return NULL;
-        }
-        curl_easy_setopt(state->curl, CURLOPT_URL, s->url);
-        curl_easy_setopt(state->curl, CURLOPT_SSL_VERIFYPEER,
-                         (long) s->sslverify);
-        if (s->cookie) {
-            curl_easy_setopt(state->curl, CURLOPT_COOKIE, s->cookie);
-        }
-        curl_easy_setopt(state->curl, CURLOPT_TIMEOUT, (long)s->timeout);
-        curl_easy_setopt(state->curl, CURLOPT_WRITEFUNCTION,
-                         (void *)curl_read_cb);
-        curl_easy_setopt(state->curl, CURLOPT_WRITEDATA, (void *)state);
-        curl_easy_setopt(state->curl, CURLOPT_PRIVATE, (void *)state);
-        curl_easy_setopt(state->curl, CURLOPT_AUTOREFERER, 1);
-        curl_easy_setopt(state->curl, CURLOPT_FOLLOWLOCATION, 1);
-        curl_easy_setopt(state->curl, CURLOPT_NOSIGNAL, 1);
-        curl_easy_setopt(state->curl, CURLOPT_ERRORBUFFER, state->errmsg);
-        curl_easy_setopt(state->curl, CURLOPT_FAILONERROR, 1);
+    if (state->curl)
+        goto has_curl;

-        if (s->username) {
-            curl_easy_setopt(state->curl, CURLOPT_USERNAME, s->username);
-        }
-        if (s->password) {
-            curl_easy_setopt(state->curl, CURLOPT_PASSWORD, s->password);
-        }
-        if (s->proxyusername) {
-            curl_easy_setopt(state->curl,
-                             CURLOPT_PROXYUSERNAME, s->proxyusername);
-        }
-        if (s->proxypassword) {
-            curl_easy_setopt(state->curl,
-                             CURLOPT_PROXYPASSWORD, s->proxypassword);
-        }
+    state->curl = curl_easy_init();
+    if (!state->curl)
+        return NULL;
+    curl_easy_setopt(state->curl, CURLOPT_URL, s->url);
+    curl_easy_setopt(state->curl, CURLOPT_TIMEOUT, 5);
+    curl_easy_setopt(state->curl, CURLOPT_WRITEFUNCTION, (void *)curl_read_cb);
+    curl_easy_setopt(state->curl, CURLOPT_WRITEDATA, (void *)state);
+    curl_easy_setopt(state->curl, CURLOPT_PRIVATE, (void *)state);
+    curl_easy_setopt(state->curl, CURLOPT_AUTOREFERER, 1);
+    curl_easy_setopt(state->curl, CURLOPT_FOLLOWLOCATION, 1);
+    curl_easy_setopt(state->curl, CURLOPT_NOSIGNAL, 1);
+    curl_easy_setopt(state->curl, CURLOPT_ERRORBUFFER, state->errmsg);
+    curl_easy_setopt(state->curl, CURLOPT_FAILONERROR, 1);

-        /* Restrict supported protocols to avoid security issues in the more
-         * obscure protocols.  For example, do not allow POP3/SMTP/IMAP see
-         * CVE-2013-0249.
-         *
-         * Restricting protocols is only supported from 7.19.4 upwards.
-         */
+    /* Restrict supported protocols to avoid security issues in the more
+     * obscure protocols.  For example, do not allow POP3/SMTP/IMAP see
+     * CVE-2013-0249.
+     *
+     * Restricting protocols is only supported from 7.19.4 upwards.
+     */
 #if LIBCURL_VERSION_NUM >= 0x071304
-        curl_easy_setopt(state->curl, CURLOPT_PROTOCOLS, PROTOCOLS);
-        curl_easy_setopt(state->curl, CURLOPT_REDIR_PROTOCOLS, PROTOCOLS);
+    curl_easy_setopt(state->curl, CURLOPT_PROTOCOLS, PROTOCOLS);
+    curl_easy_setopt(state->curl, CURLOPT_REDIR_PROTOCOLS, PROTOCOLS);
 #endif

 #ifdef DEBUG_VERBOSE
-        curl_easy_setopt(state->curl, CURLOPT_VERBOSE, 1);
+    curl_easy_setopt(state->curl, CURLOPT_VERBOSE, 1);
 #endif
-    }
+
+has_curl:

    state->s = s;

@@ -474,50 +391,43 @@ static void curl_clean_state(CURLState *s)
 static void curl_parse_filename(const char *filename, QDict *options,
                                Error **errp)
 {
-    qdict_put(options, CURL_BLOCK_OPT_URL, qstring_from_str(filename));
-}

-static void curl_detach_aio_context(BlockDriverState *bs)
-{
-    BDRVCURLState *s = bs->opaque;
-    int i;
+    #define RA_OPTSTR ":readahead="
+    char *file;
+    char *ra;
+    const char *ra_val;
+    int parse_state = 0;

-    for (i = 0; i < CURL_NUM_STATES; i++) {
-        if (s->states[i].in_use) {
-            curl_clean_state(&s->states[i]);
+    file = g_strdup(filename);
+
+    /* Parse a trailing ":readahead=#:" param, if present. */
+    ra = file + strlen(file) - 1;
+    while (ra >= file) {
+        if (parse_state == 0) {
+            if (*ra == ':') {
+                parse_state++;
+            } else {
+                break;
+            }
+        } else if (parse_state == 1) {
+            if (*ra > '9' || *ra < '0') {
+                char *opt_start = ra - strlen(RA_OPTSTR) + 1;
+                if (opt_start > file &&
+                    strncmp(opt_start, RA_OPTSTR, strlen(RA_OPTSTR)) == 0) {
+                    ra_val = ra + 1;
+                    ra -= strlen(RA_OPTSTR) - 1;
+                    *ra = '\0';
+                    qdict_put(options, "readahead", qstring_from_str(ra_val));
+                }
+                break;
+            }
        }
-        if (s->states[i].curl) {
-            curl_easy_cleanup(s->states[i].curl);
-            s->states[i].curl = NULL;
-        }
-        g_free(s->states[i].orig_buf);
-        s->states[i].orig_buf = NULL;
-    }
-    if (s->multi) {
-        curl_multi_cleanup(s->multi);
-        s->multi = NULL;
+        ra--;
    }

-    timer_del(&s->timer);
-}
+    qdict_put(options, "url", qstring_from_str(file));

-static void curl_attach_aio_context(BlockDriverState *bs,
-                                    AioContext *new_context)
-{
-    BDRVCURLState *s = bs->opaque;
-
-    aio_timer_init(new_context, &s->timer,
-                   QEMU_CLOCK_REALTIME, SCALE_NS,
-                   curl_multi_timeout_do, s);
-
-    assert(!s->multi);
-    s->multi = curl_multi_init();
-    s->aio_context = new_context;
-    curl_multi_setopt(s->multi, CURLMOPT_SOCKETFUNCTION, curl_sock_cb);
-#ifdef NEED_CURL_TIMER_CALLBACK
-    curl_multi_setopt(s->multi, CURLMOPT_TIMERDATA, s);
-    curl_multi_setopt(s->multi, CURLMOPT_TIMERFUNCTION, curl_timer_cb);
-#endif
+    g_free(file);
 }

 static QemuOptsList runtime_opts = {
@@ -525,55 +435,19 @@ static QemuOptsList runtime_opts = {
    .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
    .desc = {
        {
-            .name = CURL_BLOCK_OPT_URL,
+            .name = "url",
            .type = QEMU_OPT_STRING,
            .help = "URL to open",
        },
        {
-            .name = CURL_BLOCK_OPT_READAHEAD,
+            .name = "readahead",
            .type = QEMU_OPT_SIZE,
            .help = "Readahead size",
        },
-        {
-            .name = CURL_BLOCK_OPT_SSLVERIFY,
-            .type = QEMU_OPT_BOOL,
-            .help = "Verify SSL certificate"
-        },
-        {
-            .name = CURL_BLOCK_OPT_TIMEOUT,
-            .type = QEMU_OPT_NUMBER,
-            .help = "Curl timeout"
-        },
-        {
-            .name = CURL_BLOCK_OPT_COOKIE,
-            .type = QEMU_OPT_STRING,
-            .help = "Pass the cookie or list of cookies with each request"
-        },
-        {
-            .name = CURL_BLOCK_OPT_USERNAME,
-            .type = QEMU_OPT_STRING,
-            .help = "Username for HTTP auth"
-        },
-        {
-            .name = CURL_BLOCK_OPT_PASSWORD_SECRET,
-            .type = QEMU_OPT_STRING,
-            .help = "ID of secret used as password for HTTP auth",
-        },
-        {
-            .name = CURL_BLOCK_OPT_PROXY_USERNAME,
-            .type = QEMU_OPT_STRING,
-            .help = "Username for HTTP proxy auth"
-        },
-        {
-            .name = CURL_BLOCK_OPT_PROXY_PASSWORD_SECRET,
-            .type = QEMU_OPT_STRING,
-            .help = "ID of secret used as password for HTTP proxy auth",
-        },
        { /* end of list */ }
    },
 };

-
 static int curl_open(BlockDriverState *bs, QDict *options, int flags,
                     Error **errp)
 {
@@ -582,79 +456,46 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags,
    QemuOpts *opts;
    Error *local_err = NULL;
    const char *file;
-    const char *cookie;
    double d;
-    const char *secretid;

    static int inited = 0;

    if (flags & BDRV_O_RDWR) {
-        error_setg(errp, "curl block device does not support writes");
+        qerror_report(ERROR_CLASS_GENERIC_ERROR,
+                      "curl block device does not support writes");
        return -EROFS;
    }

-    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
+    opts = qemu_opts_create_nofail(&runtime_opts);
    qemu_opts_absorb_qdict(opts, options, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
+    if (error_is_set(&local_err)) {
+        qerror_report_err(local_err);
+        error_free(local_err);
        goto out_noclean;
    }

-    s->readahead_size = qemu_opt_get_size(opts, CURL_BLOCK_OPT_READAHEAD,
-                                          READ_AHEAD_DEFAULT);
+    s->readahead_size = qemu_opt_get_size(opts, "readahead", READ_AHEAD_SIZE);
    if ((s->readahead_size & 0x1ff) != 0) {
-        error_setg(errp, "HTTP_READAHEAD_SIZE %zd is not a multiple of 512",
-                   s->readahead_size);
+        fprintf(stderr, "HTTP_READAHEAD_SIZE %zd is not a multiple of 512\n",
+                s->readahead_size);
        goto out_noclean;
    }

-    s->timeout = qemu_opt_get_number(opts, CURL_BLOCK_OPT_TIMEOUT,
-                                     CURL_TIMEOUT_DEFAULT);
-    if (s->timeout > CURL_TIMEOUT_MAX) {
-        error_setg(errp, "timeout parameter is too large or negative");
-        goto out_noclean;
-    }
-
-    s->sslverify = qemu_opt_get_bool(opts, CURL_BLOCK_OPT_SSLVERIFY, true);
-
-    cookie = qemu_opt_get(opts, CURL_BLOCK_OPT_COOKIE);
-    s->cookie = g_strdup(cookie);
-
-    file = qemu_opt_get(opts, CURL_BLOCK_OPT_URL);
+    file = qemu_opt_get(opts, "url");
    if (file == NULL) {
-        error_setg(errp, "curl block driver requires an 'url' option");
+        qerror_report(ERROR_CLASS_GENERIC_ERROR, "curl block driver requires "
+                      "an 'url' option");
        goto out_noclean;
    }

-    s->username = g_strdup(qemu_opt_get(opts, CURL_BLOCK_OPT_USERNAME));
-    secretid = qemu_opt_get(opts, CURL_BLOCK_OPT_PASSWORD_SECRET);
-
-    if (secretid) {
-        s->password = qcrypto_secret_lookup_as_utf8(secretid, errp);
-        if (!s->password) {
-            goto out_noclean;
-        }
-    }
-
-    s->proxyusername = g_strdup(
-        qemu_opt_get(opts, CURL_BLOCK_OPT_PROXY_USERNAME));
-    secretid = qemu_opt_get(opts, CURL_BLOCK_OPT_PROXY_PASSWORD_SECRET);
-    if (secretid) {
-        s->proxypassword = qcrypto_secret_lookup_as_utf8(secretid, errp);
-        if (!s->proxypassword) {
-            goto out_noclean;
-        }
-    }
-
    if (!inited) {
        curl_global_init(CURL_GLOBAL_ALL);
        inited = 1;
    }

    DPRINTF("CURL: Opening %s\n", file);
-    s->aio_context = bdrv_get_aio_context(bs);
    s->url = g_strdup(file);
-    state = curl_init_state(bs, s);
+    state = curl_init_state(s);
    if (!state)
        goto out_noclean;

@@ -685,31 +526,49 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags,
    curl_easy_cleanup(state->curl);
    state->curl = NULL;

-    curl_attach_aio_context(bs, bdrv_get_aio_context(bs));
+    aio_timer_init(bdrv_get_aio_context(bs), &s->timer,
+                   QEMU_CLOCK_REALTIME, SCALE_NS,
+                   curl_multi_timeout_do, s);
+
+    // Now we know the file exists and its size, so let's
+    // initialize the multi interface!
+
+    s->multi = curl_multi_init();
+    curl_multi_setopt(s->multi, CURLMOPT_SOCKETDATA, s);
+    curl_multi_setopt(s->multi, CURLMOPT_SOCKETFUNCTION, curl_sock_cb);
+#ifdef NEED_CURL_TIMER_CALLBACK
+    curl_multi_setopt(s->multi, CURLMOPT_TIMERDATA, s);
+    curl_multi_setopt(s->multi, CURLMOPT_TIMERFUNCTION, curl_timer_cb);
+#endif
+    curl_multi_do(s);

    qemu_opts_del(opts);
    return 0;

 out:
-    error_setg(errp, "CURL: Error opening file: %s", state->errmsg);
+    fprintf(stderr, "CURL: Error opening file: %s\n", state->errmsg);
    curl_easy_cleanup(state->curl);
    state->curl = NULL;
 out_noclean:
-    g_free(s->cookie);
    g_free(s->url);
    qemu_opts_del(opts);
    return -EINVAL;
 }

+static void curl_aio_cancel(BlockDriverAIOCB *blockacb)
+{
+    // Do we have to implement canceling? Seems to work without...
+}
+
 static const AIOCBInfo curl_aiocb_info = {
    .aiocb_size         = sizeof(CURLAIOCB),
+    .cancel             = curl_aio_cancel,
 };


 static void curl_readv_bh_cb(void *p)
 {
    CURLState *state;
-    int running;

    CURLAIOCB *acb = p;
    BDRVCURLState *s = acb->common.bs->opaque;
@@ -724,7 +583,7 @@ static void curl_readv_bh_cb(void *p)
    // we can just call the callback and be done.
    switch (curl_find_buf(s, start, acb->nb_sectors * SECTOR_SIZE, acb)) {
        case FIND_RET_OK:
-            qemu_aio_unref(acb);
+            qemu_aio_release(acb);
            // fall through
        case FIND_RET_WAIT:
            return;
@@ -733,10 +592,10 @@ static void curl_readv_bh_cb(void *p)
    }

    // No cache found, so let's start a new request
-    state = curl_init_state(acb->common.bs, s);
+    state = curl_init_state(s);
    if (!state) {
        acb->common.cb(acb->common.opaque, -EIO);
-        qemu_aio_unref(acb);
+        qemu_aio_release(acb);
        return;
    }

@@ -744,17 +603,12 @@ static void curl_readv_bh_cb(void *p)
    acb->end = (acb->nb_sectors * SECTOR_SIZE);

    state->buf_off = 0;
-    g_free(state->orig_buf);
+    if (state->orig_buf)
+        g_free(state->orig_buf);
    state->buf_start = start;
    state->buf_len = acb->end + s->readahead_size;
    end = MIN(start + state->buf_len, s->len) - 1;
-    state->orig_buf = g_try_malloc(state->buf_len);
-    if (state->buf_len && state->orig_buf == NULL) {
-        curl_clean_state(state);
-        acb->common.cb(acb->common.opaque, -ENOMEM);
-        qemu_aio_unref(acb);
-        return;
-    }
+    state->orig_buf = g_malloc(state->buf_len);
    state->acb[0] = acb;

    snprintf(state->range, 127, "%zd-%zd", start, end);
@@ -763,14 +617,13 @@ static void curl_readv_bh_cb(void *p)
    curl_easy_setopt(state->curl, CURLOPT_RANGE, state->range);

    curl_multi_add_handle(s->multi, state->curl);
+    curl_multi_do(s);

-    /* Tell curl it needs to kick things off */
-    curl_multi_socket_action(s->multi, CURL_SOCKET_TIMEOUT, 0, &running);
 }

-static BlockAIOCB *curl_aio_readv(BlockDriverState *bs,
+static BlockDriverAIOCB *curl_aio_readv(BlockDriverState *bs,
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockCompletionFunc *cb, void *opaque)
+        BlockDriverCompletionFunc *cb, void *opaque)
 {
    CURLAIOCB *acb;

@@ -780,7 +633,7 @@ static BlockAIOCB *curl_aio_readv(BlockDriverState *bs,
    acb->sector_num = sector_num;
    acb->nb_sectors = nb_sectors;

-    acb->bh = aio_bh_new(bdrv_get_aio_context(bs), curl_readv_bh_cb, acb);
+    acb->bh = qemu_bh_new(curl_readv_bh_cb, acb);
    qemu_bh_schedule(acb->bh);
    return &acb->common;
 }
@@ -788,11 +641,26 @@ static BlockAIOCB *curl_aio_readv(BlockDriverState *bs,
 static void curl_close(BlockDriverState *bs)
 {
    BDRVCURLState *s = bs->opaque;
+    int i;

    DPRINTF("CURL: Close\n");
-    curl_detach_aio_context(bs);
+    for (i=0; i<CURL_NUM_STATES; i++) {
+        if (s->states[i].in_use)
+            curl_clean_state(&s->states[i]);
+        if (s->states[i].curl) {
+            curl_easy_cleanup(s->states[i].curl);
+            s->states[i].curl = NULL;
+        }
+        if (s->states[i].orig_buf) {
+            g_free(s->states[i].orig_buf);
+            s->states[i].orig_buf = NULL;
+        }
+    }
+    if (s->multi)
+        curl_multi_cleanup(s->multi);
+
+    timer_del(&s->timer);

-    g_free(s->cookie);
    g_free(s->url);
 }

@@ -803,83 +671,68 @@ static int64_t curl_getlength(BlockDriverState *bs)
 }

 static BlockDriver bdrv_http = {
-    .format_name                = "http",
-    .protocol_name              = "http",
+    .format_name            = "http",
+    .protocol_name          = "http",

-    .instance_size              = sizeof(BDRVCURLState),
-    .bdrv_parse_filename        = curl_parse_filename,
-    .bdrv_file_open             = curl_open,
-    .bdrv_close                 = curl_close,
-    .bdrv_getlength             = curl_getlength,
+    .instance_size          = sizeof(BDRVCURLState),
+    .bdrv_parse_filename    = curl_parse_filename,
+    .bdrv_file_open         = curl_open,
+    .bdrv_close             = curl_close,
+    .bdrv_getlength         = curl_getlength,

-    .bdrv_aio_readv             = curl_aio_readv,
-
-    .bdrv_detach_aio_context    = curl_detach_aio_context,
-    .bdrv_attach_aio_context    = curl_attach_aio_context,
+    .bdrv_aio_readv         = curl_aio_readv,
 };

 static BlockDriver bdrv_https = {
-    .format_name                = "https",
-    .protocol_name              = "https",
+    .format_name            = "https",
+    .protocol_name          = "https",

-    .instance_size              = sizeof(BDRVCURLState),
-    .bdrv_parse_filename        = curl_parse_filename,
-    .bdrv_file_open             = curl_open,
-    .bdrv_close                 = curl_close,
-    .bdrv_getlength             = curl_getlength,
+    .instance_size          = sizeof(BDRVCURLState),
+    .bdrv_parse_filename    = curl_parse_filename,
+    .bdrv_file_open         = curl_open,
+    .bdrv_close             = curl_close,
+    .bdrv_getlength         = curl_getlength,

-    .bdrv_aio_readv             = curl_aio_readv,
-
-    .bdrv_detach_aio_context    = curl_detach_aio_context,
-    .bdrv_attach_aio_context    = curl_attach_aio_context,
+    .bdrv_aio_readv         = curl_aio_readv,
 };

 static BlockDriver bdrv_ftp = {
-    .format_name                = "ftp",
-    .protocol_name              = "ftp",
+    .format_name            = "ftp",
+    .protocol_name          = "ftp",

-    .instance_size              = sizeof(BDRVCURLState),
-    .bdrv_parse_filename        = curl_parse_filename,
-    .bdrv_file_open             = curl_open,
-    .bdrv_close                 = curl_close,
-    .bdrv_getlength             = curl_getlength,
+    .instance_size          = sizeof(BDRVCURLState),
+    .bdrv_parse_filename    = curl_parse_filename,
+    .bdrv_file_open         = curl_open,
+    .bdrv_close             = curl_close,
+    .bdrv_getlength         = curl_getlength,

-    .bdrv_aio_readv             = curl_aio_readv,
-
-    .bdrv_detach_aio_context    = curl_detach_aio_context,
-    .bdrv_attach_aio_context    = curl_attach_aio_context,
+    .bdrv_aio_readv         = curl_aio_readv,
 };

 static BlockDriver bdrv_ftps = {
-    .format_name                = "ftps",
-    .protocol_name              = "ftps",
+    .format_name            = "ftps",
+    .protocol_name          = "ftps",

-    .instance_size              = sizeof(BDRVCURLState),
-    .bdrv_parse_filename        = curl_parse_filename,
-    .bdrv_file_open             = curl_open,
-    .bdrv_close                 = curl_close,
-    .bdrv_getlength             = curl_getlength,
+    .instance_size          = sizeof(BDRVCURLState),
+    .bdrv_parse_filename    = curl_parse_filename,
+    .bdrv_file_open         = curl_open,
+    .bdrv_close             = curl_close,
+    .bdrv_getlength         = curl_getlength,

-    .bdrv_aio_readv             = curl_aio_readv,
-
-    .bdrv_detach_aio_context    = curl_detach_aio_context,
-    .bdrv_attach_aio_context    = curl_attach_aio_context,
+    .bdrv_aio_readv         = curl_aio_readv,
 };

 static BlockDriver bdrv_tftp = {
-    .format_name                = "tftp",
-    .protocol_name              = "tftp",
+    .format_name            = "tftp",
+    .protocol_name          = "tftp",

-    .instance_size              = sizeof(BDRVCURLState),
-    .bdrv_parse_filename        = curl_parse_filename,
-    .bdrv_file_open             = curl_open,
-    .bdrv_close                 = curl_close,
-    .bdrv_getlength             = curl_getlength,
+    .instance_size          = sizeof(BDRVCURLState),
+    .bdrv_parse_filename    = curl_parse_filename,
+    .bdrv_file_open         = curl_open,
+    .bdrv_close             = curl_close,
+    .bdrv_getlength         = curl_getlength,

-    .bdrv_aio_readv             = curl_aio_readv,
-
-    .bdrv_detach_aio_context    = curl_detach_aio_context,
-    .bdrv_attach_aio_context    = curl_attach_aio_context,
+    .bdrv_aio_readv         = curl_aio_readv,
 };

 static void curl_block_init(void)
--- a/block/dirty-bitmap.c
+++ b/block/dirty-bitmap.c
@@ -1,387 +0,0 @@
-/*
- * Block Dirty Bitmap
- *
- * Copyright (c) 2016 Red Hat. Inc
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-#include "qemu/osdep.h"
-#include "config-host.h"
-#include "qemu-common.h"
-#include "trace.h"
-#include "block/block_int.h"
-#include "block/blockjob.h"
-
-/**
- * A BdrvDirtyBitmap can be in three possible states:
- * (1) successor is NULL and disabled is false: full r/w mode
- * (2) successor is NULL and disabled is true: read only mode ("disabled")
- * (3) successor is set: frozen mode.
- *     A frozen bitmap cannot be renamed, deleted, anonymized, cleared, set,
- *     or enabled. A frozen bitmap can only abdicate() or reclaim().
- */
-struct BdrvDirtyBitmap {
-    HBitmap *bitmap;            /* Dirty sector bitmap implementation */
-    BdrvDirtyBitmap *successor; /* Anonymous child; implies frozen status */
-    char *name;                 /* Optional non-empty unique ID */
-    int64_t size;               /* Size of the bitmap (Number of sectors) */
-    bool disabled;              /* Bitmap is read-only */
-    QLIST_ENTRY(BdrvDirtyBitmap) list;
-};
-
-BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, const char *name)
-{
-    BdrvDirtyBitmap *bm;
-
-    assert(name);
-    QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
-        if (bm->name && !strcmp(name, bm->name)) {
-            return bm;
-        }
-    }
-    return NULL;
-}
-
-void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap)
-{
-    assert(!bdrv_dirty_bitmap_frozen(bitmap));
-    g_free(bitmap->name);
-    bitmap->name = NULL;
-}
-
-BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs,
-                                          uint32_t granularity,
-                                          const char *name,
-                                          Error **errp)
-{
-    int64_t bitmap_size;
-    BdrvDirtyBitmap *bitmap;
-    uint32_t sector_granularity;
-
-    assert((granularity & (granularity - 1)) == 0);
-
-    if (name && bdrv_find_dirty_bitmap(bs, name)) {
-        error_setg(errp, "Bitmap already exists: %s", name);
-        return NULL;
-    }
-    sector_granularity = granularity >> BDRV_SECTOR_BITS;
-    assert(sector_granularity);
-    bitmap_size = bdrv_nb_sectors(bs);
-    if (bitmap_size < 0) {
-        error_setg_errno(errp, -bitmap_size, "could not get length of device");
-        errno = -bitmap_size;
-        return NULL;
-    }
-    bitmap = g_new0(BdrvDirtyBitmap, 1);
-    bitmap->bitmap = hbitmap_alloc(bitmap_size, ctz32(sector_granularity));
-    bitmap->size = bitmap_size;
-    bitmap->name = g_strdup(name);
-    bitmap->disabled = false;
-    QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list);
-    return bitmap;
-}
-
-bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap)
-{
-    return bitmap->successor;
-}
-
-bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap)
-{
-    return !(bitmap->disabled || bitmap->successor);
-}
-
-DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap)
-{
-    if (bdrv_dirty_bitmap_frozen(bitmap)) {
-        return DIRTY_BITMAP_STATUS_FROZEN;
-    } else if (!bdrv_dirty_bitmap_enabled(bitmap)) {
-        return DIRTY_BITMAP_STATUS_DISABLED;
-    } else {
-        return DIRTY_BITMAP_STATUS_ACTIVE;
-    }
-}
-
-/**
- * Create a successor bitmap destined to replace this bitmap after an operation.
- * Requires that the bitmap is not frozen and has no successor.
- */
-int bdrv_dirty_bitmap_create_successor(BlockDriverState *bs,
-                                       BdrvDirtyBitmap *bitmap, Error **errp)
-{
-    uint64_t granularity;
-    BdrvDirtyBitmap *child;
-
-    if (bdrv_dirty_bitmap_frozen(bitmap)) {
-        error_setg(errp, "Cannot create a successor for a bitmap that is "
-                   "currently frozen");
-        return -1;
-    }
-    assert(!bitmap->successor);
-
-    /* Create an anonymous successor */
-    granularity = bdrv_dirty_bitmap_granularity(bitmap);
-    child = bdrv_create_dirty_bitmap(bs, granularity, NULL, errp);
-    if (!child) {
-        return -1;
-    }
-
-    /* Successor will be on or off based on our current state. */
-    child->disabled = bitmap->disabled;
-
-    /* Install the successor and freeze the parent */
-    bitmap->successor = child;
-    return 0;
-}
-
-/**
- * For a bitmap with a successor, yield our name to the successor,
- * delete the old bitmap, and return a handle to the new bitmap.
- */
-BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs,
-                                            BdrvDirtyBitmap *bitmap,
-                                            Error **errp)
-{
-    char *name;
-    BdrvDirtyBitmap *successor = bitmap->successor;
-
-    if (successor == NULL) {
-        error_setg(errp, "Cannot relinquish control if "
-                   "there's no successor present");
-        return NULL;
-    }
-
-    name = bitmap->name;
-    bitmap->name = NULL;
-    successor->name = name;
-    bitmap->successor = NULL;
-    bdrv_release_dirty_bitmap(bs, bitmap);
-
-    return successor;
-}
-
-/**
- * In cases of failure where we can no longer safely delete the parent,
- * we may wish to re-join the parent and child/successor.
- * The merged parent will be un-frozen, but not explicitly re-enabled.
- */
-BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs,
-                                           BdrvDirtyBitmap *parent,
-                                           Error **errp)
-{
-    BdrvDirtyBitmap *successor = parent->successor;
-
-    if (!successor) {
-        error_setg(errp, "Cannot reclaim a successor when none is present");
-        return NULL;
-    }
-
-    if (!hbitmap_merge(parent->bitmap, successor->bitmap)) {
-        error_setg(errp, "Merging of parent and successor bitmap failed");
-        return NULL;
-    }
-    bdrv_release_dirty_bitmap(bs, successor);
-    parent->successor = NULL;
-
-    return parent;
-}
-
-/**
- * Truncates _all_ bitmaps attached to a BDS.
- */
-void bdrv_dirty_bitmap_truncate(BlockDriverState *bs)
-{
-    BdrvDirtyBitmap *bitmap;
-    uint64_t size = bdrv_nb_sectors(bs);
-
-    QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
-        assert(!bdrv_dirty_bitmap_frozen(bitmap));
-        hbitmap_truncate(bitmap->bitmap, size);
-        bitmap->size = size;
-    }
-}
-
-static void bdrv_do_release_matching_dirty_bitmap(BlockDriverState *bs,
-                                                  BdrvDirtyBitmap *bitmap,
-                                                  bool only_named)
-{
-    BdrvDirtyBitmap *bm, *next;
-    QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) {
-        if ((!bitmap || bm == bitmap) && (!only_named || bm->name)) {
-            assert(!bdrv_dirty_bitmap_frozen(bm));
-            QLIST_REMOVE(bm, list);
-            hbitmap_free(bm->bitmap);
-            g_free(bm->name);
-            g_free(bm);
-
-            if (bitmap) {
-                return;
-            }
-        }
-    }
-}
-
-void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
-{
-    bdrv_do_release_matching_dirty_bitmap(bs, bitmap, false);
-}
-
-/**
- * Release all named dirty bitmaps attached to a BDS (for use in bdrv_close()).
- * There must not be any frozen bitmaps attached.
- */
-void bdrv_release_named_dirty_bitmaps(BlockDriverState *bs)
-{
-    bdrv_do_release_matching_dirty_bitmap(bs, NULL, true);
-}
-
-void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
-{
-    assert(!bdrv_dirty_bitmap_frozen(bitmap));
-    bitmap->disabled = true;
-}
-
-void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
-{
-    assert(!bdrv_dirty_bitmap_frozen(bitmap));
-    bitmap->disabled = false;
-}
-
-BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs)
-{
-    BdrvDirtyBitmap *bm;
-    BlockDirtyInfoList *list = NULL;
-    BlockDirtyInfoList **plist = &list;
-
-    QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
-        BlockDirtyInfo *info = g_new0(BlockDirtyInfo, 1);
-        BlockDirtyInfoList *entry = g_new0(BlockDirtyInfoList, 1);
-        info->count = bdrv_get_dirty_count(bm);
-        info->granularity = bdrv_dirty_bitmap_granularity(bm);
-        info->has_name = !!bm->name;
-        info->name = g_strdup(bm->name);
-        info->status = bdrv_dirty_bitmap_status(bm);
-        entry->value = info;
-        *plist = entry;
-        plist = &entry->next;
-    }
-
-    return list;
-}
-
-int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
-                   int64_t sector)
-{
-    if (bitmap) {
-        return hbitmap_get(bitmap->bitmap, sector);
-    } else {
-        return 0;
-    }
-}
-
-/**
- * Chooses a default granularity based on the existing cluster size,
- * but clamped between [4K, 64K]. Defaults to 64K in the case that there
- * is no cluster size information available.
- */
-uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState *bs)
-{
-    BlockDriverInfo bdi;
-    uint32_t granularity;
-
-    if (bdrv_get_info(bs, &bdi) >= 0 && bdi.cluster_size > 0) {
-        granularity = MAX(4096, bdi.cluster_size);
-        granularity = MIN(65536, granularity);
-    } else {
-        granularity = 65536;
-    }
-
-    return granularity;
-}
-
-uint32_t bdrv_dirty_bitmap_granularity(BdrvDirtyBitmap *bitmap)
-{
-    return BDRV_SECTOR_SIZE << hbitmap_granularity(bitmap->bitmap);
-}
-
-void bdrv_dirty_iter_init(BdrvDirtyBitmap *bitmap, HBitmapIter *hbi)
-{
-    hbitmap_iter_init(hbi, bitmap->bitmap, 0);
-}
-
-void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap,
-                           int64_t cur_sector, int nr_sectors)
-{
-    assert(bdrv_dirty_bitmap_enabled(bitmap));
-    hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
-}
-
-void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
-                             int64_t cur_sector, int nr_sectors)
-{
-    assert(bdrv_dirty_bitmap_enabled(bitmap));
-    hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
-}
-
-void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out)
-{
-    assert(bdrv_dirty_bitmap_enabled(bitmap));
-    if (!out) {
-        hbitmap_reset_all(bitmap->bitmap);
-    } else {
-        HBitmap *backup = bitmap->bitmap;
-        bitmap->bitmap = hbitmap_alloc(bitmap->size,
-                                       hbitmap_granularity(backup));
-        *out = backup;
-    }
-}
-
-void bdrv_undo_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *in)
-{
-    HBitmap *tmp = bitmap->bitmap;
-    assert(bdrv_dirty_bitmap_enabled(bitmap));
-    bitmap->bitmap = in;
-    hbitmap_free(tmp);
-}
-
-void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
-                    int nr_sectors)
-{
-    BdrvDirtyBitmap *bitmap;
-    QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
-        if (!bdrv_dirty_bitmap_enabled(bitmap)) {
-            continue;
-        }
-        hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
-    }
-}
-
-/**
- * Advance an HBitmapIter to an arbitrary offset.
- */
-void bdrv_set_dirty_iter(HBitmapIter *hbi, int64_t offset)
-{
-    assert(hbi->hb);
-    hbitmap_iter_init(hbi, hbi->hb, offset);
-}
-
-int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap)
-{
-    return hbitmap_count(bitmap->bitmap);
-}
--- a/block/dmg.c
+++ b/block/dmg.c
@@ -21,17 +21,11 @@
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */
-#include "qemu/osdep.h"
 #include "qemu-common.h"
 #include "block/block_int.h"
 #include "qemu/bswap.h"
-#include "qemu/error-report.h"
 #include "qemu/module.h"
 #include <zlib.h>
-#ifdef CONFIG_BZIP2
-#include <bzlib.h>
-#endif
-#include <glib.h>

 enum {
    /* Limit chunk sizes to prevent unreasonable amounts of memory being used
@@ -61,9 +55,6 @@ typedef struct BDRVDMGState {
    uint8_t *compressed_chunk;
    uint8_t *uncompressed_chunk;
    z_stream zstream;
-#ifdef CONFIG_BZIP2
-    bz_stream bzstream;
-#endif
 } BDRVDMGState;

 static int dmg_probe(const uint8_t *buf, int buf_size, const char *filename)
@@ -86,7 +77,7 @@ static int read_uint64(BlockDriverState *bs, int64_t offset, uint64_t *result)
    uint64_t buffer;
    int ret;

-    ret = bdrv_pread(bs->file->bs, offset, &buffer, 8);
+    ret = bdrv_pread(bs->file, offset, &buffer, 8);
    if (ret < 0) {
        return ret;
    }
@@ -100,7 +91,7 @@ static int read_uint32(BlockDriverState *bs, int64_t offset, uint32_t *result)
    uint32_t buffer;
    int ret;

-    ret = bdrv_pread(bs->file->bs, offset, &buffer, 4);
+    ret = bdrv_pread(bs->file, offset, &buffer, 4);
    if (ret < 0) {
        return ret;
    }
@@ -109,16 +100,6 @@ static int read_uint32(BlockDriverState *bs, int64_t offset, uint32_t *result)
    return 0;
 }

-static inline uint64_t buff_read_uint64(const uint8_t *buffer, int64_t offset)
-{
-    return be64_to_cpu(*(uint64_t *)&buffer[offset]);
-}
-
-static inline uint32_t buff_read_uint32(const uint8_t *buffer, int64_t offset)
-{
-    return be32_to_cpu(*(uint32_t *)&buffer[offset]);
-}
-
 /* Increase max chunk sizes, if necessary.  This function is used to calculate
 * the buffer sizes needed for compressed/uncompressed chunk I/O.
 */
@@ -131,7 +112,6 @@ static void update_max_chunk_size(BDRVDMGState *s, uint32_t chunk,

    switch (s->types[chunk]) {
    case 0x80000005: /* zlib compressed */
-    case 0x80000006: /* bzip2 compressed */
        compressed_size = s->lengths[chunk];
        uncompressed_sectors = s->sectorcounts[chunk];
        break;
@@ -139,9 +119,7 @@ static void update_max_chunk_size(BDRVDMGState *s, uint32_t chunk,
        uncompressed_sectors = (s->lengths[chunk] + 511) / 512;
        break;
    case 2: /* zero */
-        /* as the all-zeroes block may be large, it is treated specially: the
-         * sector is not copied from a large buffer, a simple memset is used
-         * instead. Therefore uncompressed_sectors does not need to be set. */
+        uncompressed_sectors = s->sectorcounts[chunk];
        break;
    }

@@ -153,377 +131,161 @@ static void update_max_chunk_size(BDRVDMGState *s, uint32_t chunk,
    }
 }

-static int64_t dmg_find_koly_offset(BlockDriverState *file_bs, Error **errp)
-{
-    int64_t length;
-    int64_t offset = 0;
-    uint8_t buffer[515];
-    int i, ret;
-
-    /* bdrv_getlength returns a multiple of block size (512), rounded up. Since
-     * dmg images can have odd sizes, try to look for the "koly" magic which
-     * marks the begin of the UDIF trailer (512 bytes). This magic can be found
-     * in the last 511 bytes of the second-last sector or the first 4 bytes of
-     * the last sector (search space: 515 bytes) */
-    length = bdrv_getlength(file_bs);
-    if (length < 0) {
-        error_setg_errno(errp, -length,
-            "Failed to get file size while reading UDIF trailer");
-        return length;
-    } else if (length < 512) {
-        error_setg(errp, "dmg file must be at least 512 bytes long");
-        return -EINVAL;
-    }
-    if (length > 511 + 512) {
-        offset = length - 511 - 512;
-    }
-    length = length < 515 ? length : 515;
-    ret = bdrv_pread(file_bs, offset, buffer, length);
-    if (ret < 0) {
-        error_setg_errno(errp, -ret, "Failed while reading UDIF trailer");
-        return ret;
-    }
-    for (i = 0; i < length - 3; i++) {
-        if (buffer[i] == 'k' && buffer[i+1] == 'o' &&
-            buffer[i+2] == 'l' && buffer[i+3] == 'y') {
-            return offset + i;
-        }
-    }
-    error_setg(errp, "Could not locate UDIF trailer in dmg file");
-    return -EINVAL;
-}
-
-/* used when building the sector table */
-typedef struct DmgHeaderState {
-    /* used internally by dmg_read_mish_block to remember offsets of blocks
-     * across calls */
-    uint64_t data_fork_offset;
-    /* exported for dmg_open */
-    uint32_t max_compressed_size;
-    uint32_t max_sectors_per_chunk;
-} DmgHeaderState;
-
-static bool dmg_is_known_block_type(uint32_t entry_type)
-{
-    switch (entry_type) {
-    case 0x00000001:    /* uncompressed */
-    case 0x00000002:    /* zeroes */
-    case 0x80000005:    /* zlib */
-#ifdef CONFIG_BZIP2
-    case 0x80000006:    /* bzip2 */
-#endif
-        return true;
-    default:
-        return false;
-    }
-}
-
-static int dmg_read_mish_block(BDRVDMGState *s, DmgHeaderState *ds,
-                               uint8_t *buffer, uint32_t count)
-{
-    uint32_t type, i;
-    int ret;
-    size_t new_size;
-    uint32_t chunk_count;
-    int64_t offset = 0;
-    uint64_t data_offset;
-    uint64_t in_offset = ds->data_fork_offset;
-    uint64_t out_offset;
-
-    type = buff_read_uint32(buffer, offset);
-    /* skip data that is not a valid MISH block (invalid magic or too small) */
-    if (type != 0x6d697368 || count < 244) {
-        /* assume success for now */
-        return 0;
-    }
-
-    /* chunk offsets are relative to this sector number */
-    out_offset = buff_read_uint64(buffer, offset + 8);
-
-    /* location in data fork for (compressed) blob (in bytes) */
-    data_offset = buff_read_uint64(buffer, offset + 0x18);
-    in_offset += data_offset;
-
-    /* move to begin of chunk entries */
-    offset += 204;
-
-    chunk_count = (count - 204) / 40;
-    new_size = sizeof(uint64_t) * (s->n_chunks + chunk_count);
-    s->types = g_realloc(s->types, new_size / 2);
-    s->offsets = g_realloc(s->offsets, new_size);
-    s->lengths = g_realloc(s->lengths, new_size);
-    s->sectors = g_realloc(s->sectors, new_size);
-    s->sectorcounts = g_realloc(s->sectorcounts, new_size);
-
-    for (i = s->n_chunks; i < s->n_chunks + chunk_count; i++) {
-        s->types[i] = buff_read_uint32(buffer, offset);
-        if (!dmg_is_known_block_type(s->types[i])) {
-            chunk_count--;
-            i--;
-            offset += 40;
-            continue;
-        }
-
-        /* sector number */
-        s->sectors[i] = buff_read_uint64(buffer, offset + 8);
-        s->sectors[i] += out_offset;
-
-        /* sector count */
-        s->sectorcounts[i] = buff_read_uint64(buffer, offset + 0x10);
-
-        /* all-zeroes sector (type 2) does not need to be "uncompressed" and can
-         * therefore be unbounded. */
-        if (s->types[i] != 2 && s->sectorcounts[i] > DMG_SECTORCOUNTS_MAX) {
-            error_report("sector count %" PRIu64 " for chunk %" PRIu32
-                         " is larger than max (%u)",
-                         s->sectorcounts[i], i, DMG_SECTORCOUNTS_MAX);
-            ret = -EINVAL;
-            goto fail;
-        }
-
-        /* offset in (compressed) data fork */
-        s->offsets[i] = buff_read_uint64(buffer, offset + 0x18);
-        s->offsets[i] += in_offset;
-
-        /* length in (compressed) data fork */
-        s->lengths[i] = buff_read_uint64(buffer, offset + 0x20);
-
-        if (s->lengths[i] > DMG_LENGTHS_MAX) {
-            error_report("length %" PRIu64 " for chunk %" PRIu32
-                         " is larger than max (%u)",
-                         s->lengths[i], i, DMG_LENGTHS_MAX);
-            ret = -EINVAL;
-            goto fail;
-        }
-
-        update_max_chunk_size(s, i, &ds->max_compressed_size,
-                              &ds->max_sectors_per_chunk);
-        offset += 40;
-    }
-    s->n_chunks += chunk_count;
-    return 0;
-
-fail:
-    return ret;
-}
-
-static int dmg_read_resource_fork(BlockDriverState *bs, DmgHeaderState *ds,
-                                  uint64_t info_begin, uint64_t info_length)
-{
-    BDRVDMGState *s = bs->opaque;
-    int ret;
-    uint32_t count, rsrc_data_offset;
-    uint8_t *buffer = NULL;
-    uint64_t info_end;
-    uint64_t offset;
-
-    /* read offset from begin of resource fork (info_begin) to resource data */
-    ret = read_uint32(bs, info_begin, &rsrc_data_offset);
-    if (ret < 0) {
-        goto fail;
-    } else if (rsrc_data_offset > info_length) {
-        ret = -EINVAL;
-        goto fail;
-    }
-
-    /* read length of resource data */
-    ret = read_uint32(bs, info_begin + 8, &count);
-    if (ret < 0) {
-        goto fail;
-    } else if (count == 0 || rsrc_data_offset + count > info_length) {
-        ret = -EINVAL;
-        goto fail;
-    }
-
-    /* begin of resource data (consisting of one or more resources) */
-    offset = info_begin + rsrc_data_offset;
-
-    /* end of resource data (there is possibly a following resource map
-     * which will be ignored). */
-    info_end = offset + count;
-
-    /* read offsets (mish blocks) from one or more resources in resource data */
-    while (offset < info_end) {
-        /* size of following resource */
-        ret = read_uint32(bs, offset, &count);
-        if (ret < 0) {
-            goto fail;
-        } else if (count == 0 || count > info_end - offset) {
-            ret = -EINVAL;
-            goto fail;
-        }
-        offset += 4;
-
-        buffer = g_realloc(buffer, count);
-        ret = bdrv_pread(bs->file->bs, offset, buffer, count);
-        if (ret < 0) {
-            goto fail;
-        }
-
-        ret = dmg_read_mish_block(s, ds, buffer, count);
-        if (ret < 0) {
-            goto fail;
-        }
-        /* advance offset by size of resource */
-        offset += count;
-    }
-    ret = 0;
-
-fail:
-    g_free(buffer);
-    return ret;
-}
-
-static int dmg_read_plist_xml(BlockDriverState *bs, DmgHeaderState *ds,
-                              uint64_t info_begin, uint64_t info_length)
-{
-    BDRVDMGState *s = bs->opaque;
-    int ret;
-    uint8_t *buffer = NULL;
-    char *data_begin, *data_end;
-
-    /* Have at least some length to avoid NULL for g_malloc. Attempt to set a
-     * safe upper cap on the data length. A test sample had a XML length of
-     * about 1 MiB. */
-    if (info_length == 0 || info_length > 16 * 1024 * 1024) {
-        ret = -EINVAL;
-        goto fail;
-    }
-
-    buffer = g_malloc(info_length + 1);
-    buffer[info_length] = '\0';
-    ret = bdrv_pread(bs->file->bs, info_begin, buffer, info_length);
-    if (ret != info_length) {
-        ret = -EINVAL;
-        goto fail;
-    }
-
-    /* look for <data>...</data>. The data is 284 (0x11c) bytes after base64
-     * decode. The actual data element has 431 (0x1af) bytes which includes tabs
-     * and line feeds. */
-    data_end = (char *)buffer;
-    while ((data_begin = strstr(data_end, "<data>")) != NULL) {
-        guchar *mish;
-        gsize out_len = 0;
-
-        data_begin += 6;
-        data_end = strstr(data_begin, "</data>");
-        /* malformed XML? */
-        if (data_end == NULL) {
-            ret = -EINVAL;
-            goto fail;
-        }
-        *data_end++ = '\0';
-        mish = g_base64_decode(data_begin, &out_len);
-        ret = dmg_read_mish_block(s, ds, mish, (uint32_t)out_len);
-        g_free(mish);
-        if (ret < 0) {
-            goto fail;
-        }
-    }
-    ret = 0;
-
-fail:
-    g_free(buffer);
-    return ret;
-}
-
 static int dmg_open(BlockDriverState *bs, QDict *options, int flags,
                    Error **errp)
 {
    BDRVDMGState *s = bs->opaque;
-    DmgHeaderState ds;
-    uint64_t rsrc_fork_offset, rsrc_fork_length;
-    uint64_t plist_xml_offset, plist_xml_length;
+    uint64_t info_begin, info_end, last_in_offset, last_out_offset;
+    uint32_t count, tmp;
+    uint32_t max_compressed_size = 1, max_sectors_per_chunk = 1, i;
    int64_t offset;
    int ret;

    bs->read_only = 1;
    s->n_chunks = 0;
    s->offsets = s->lengths = s->sectors = s->sectorcounts = NULL;
-    /* used by dmg_read_mish_block to keep track of the current I/O position */
-    ds.data_fork_offset = 0;
-    ds.max_compressed_size = 1;
-    ds.max_sectors_per_chunk = 1;

-    /* locate the UDIF trailer */
-    offset = dmg_find_koly_offset(bs->file->bs, errp);
+    /* read offset of info blocks */
+    offset = bdrv_getlength(bs->file);
    if (offset < 0) {
        ret = offset;
        goto fail;
    }
+    offset -= 0x1d8;

-    /* offset of data fork (DataForkOffset) */
-    ret = read_uint64(bs, offset + 0x18, &ds.data_fork_offset);
+    ret = read_uint64(bs, offset, &info_begin);
    if (ret < 0) {
        goto fail;
-    } else if (ds.data_fork_offset > offset) {
+    } else if (info_begin == 0) {
        ret = -EINVAL;
        goto fail;
    }

-    /* offset of resource fork (RsrcForkOffset) */
-    ret = read_uint64(bs, offset + 0x28, &rsrc_fork_offset);
+    ret = read_uint32(bs, info_begin, &tmp);
    if (ret < 0) {
        goto fail;
-    }
-    ret = read_uint64(bs, offset + 0x30, &rsrc_fork_length);
-    if (ret < 0) {
-        goto fail;
-    }
-    if (rsrc_fork_offset >= offset ||
-        rsrc_fork_length > offset - rsrc_fork_offset) {
+    } else if (tmp != 0x100) {
        ret = -EINVAL;
        goto fail;
    }
-    /* offset of property list (XMLOffset) */
-    ret = read_uint64(bs, offset + 0xd8, &plist_xml_offset);
+
+    ret = read_uint32(bs, info_begin + 4, &count);
    if (ret < 0) {
        goto fail;
-    }
-    ret = read_uint64(bs, offset + 0xe0, &plist_xml_length);
-    if (ret < 0) {
-        goto fail;
-    }
-    if (plist_xml_offset >= offset ||
-        plist_xml_length > offset - plist_xml_offset) {
+    } else if (count == 0) {
        ret = -EINVAL;
        goto fail;
    }
-    ret = read_uint64(bs, offset + 0x1ec, (uint64_t *)&bs->total_sectors);
-    if (ret < 0) {
-        goto fail;
-    }
-    if (bs->total_sectors < 0) {
-        ret = -EINVAL;
-        goto fail;
-    }
-    if (rsrc_fork_length != 0) {
-        ret = dmg_read_resource_fork(bs, &ds,
-                                     rsrc_fork_offset, rsrc_fork_length);
+    info_end = info_begin + count;
+
+    offset = info_begin + 0x100;
+
+    /* read offsets */
+    last_in_offset = last_out_offset = 0;
+    while (offset < info_end) {
+        uint32_t type;
+
+        ret = read_uint32(bs, offset, &count);
+        if (ret < 0) {
+            goto fail;
+        } else if (count == 0) {
+            ret = -EINVAL;
+            goto fail;
+        }
+        offset += 4;
+
+        ret = read_uint32(bs, offset, &type);
        if (ret < 0) {
            goto fail;
        }
-    } else if (plist_xml_length != 0) {
-        ret = dmg_read_plist_xml(bs, &ds, plist_xml_offset, plist_xml_length);
-        if (ret < 0) {
-            goto fail;
+
+        if (type == 0x6d697368 && count >= 244) {
+            size_t new_size;
+            uint32_t chunk_count;
+
+            offset += 4;
+            offset += 200;
+
+            chunk_count = (count - 204) / 40;
+            new_size = sizeof(uint64_t) * (s->n_chunks + chunk_count);
+            s->types = g_realloc(s->types, new_size / 2);
+            s->offsets = g_realloc(s->offsets, new_size);
+            s->lengths = g_realloc(s->lengths, new_size);
+            s->sectors = g_realloc(s->sectors, new_size);
+            s->sectorcounts = g_realloc(s->sectorcounts, new_size);
+
+            for (i = s->n_chunks; i < s->n_chunks + chunk_count; i++) {
+                ret = read_uint32(bs, offset, &s->types[i]);
+                if (ret < 0) {
+                    goto fail;
+                }
+                offset += 4;
+                if (s->types[i] != 0x80000005 && s->types[i] != 1 &&
+                    s->types[i] != 2) {
+                    if (s->types[i] == 0xffffffff && i > 0) {
+                        last_in_offset = s->offsets[i - 1] + s->lengths[i - 1];
+                        last_out_offset = s->sectors[i - 1] +
+                                          s->sectorcounts[i - 1];
+                    }
+                    chunk_count--;
+                    i--;
+                    offset += 36;
+                    continue;
+                }
+                offset += 4;
+
+                ret = read_uint64(bs, offset, &s->sectors[i]);
+                if (ret < 0) {
+                    goto fail;
+                }
+                s->sectors[i] += last_out_offset;
+                offset += 8;
+
+                ret = read_uint64(bs, offset, &s->sectorcounts[i]);
+                if (ret < 0) {
+                    goto fail;
+                }
+                offset += 8;
+
+                if (s->sectorcounts[i] > DMG_SECTORCOUNTS_MAX) {
+                    error_report("sector count %" PRIu64 " for chunk %u is "
+                                 "larger than max (%u)",
+                                 s->sectorcounts[i], i, DMG_SECTORCOUNTS_MAX);
+                    ret = -EINVAL;
+                    goto fail;
+                }
+
+                ret = read_uint64(bs, offset, &s->offsets[i]);
+                if (ret < 0) {
+                    goto fail;
+                }
+                s->offsets[i] += last_in_offset;
+                offset += 8;
+
+                ret = read_uint64(bs, offset, &s->lengths[i]);
+                if (ret < 0) {
+                    goto fail;
+                }
+                offset += 8;
+
+                if (s->lengths[i] > DMG_LENGTHS_MAX) {
+                    error_report("length %" PRIu64 " for chunk %u is larger "
+                                 "than max (%u)",
+                                 s->lengths[i], i, DMG_LENGTHS_MAX);
+                    ret = -EINVAL;
+                    goto fail;
+                }
+
+                update_max_chunk_size(s, i, &max_compressed_size,
+                                      &max_sectors_per_chunk);
+            }
+            s->n_chunks += chunk_count;
        }
-    } else {
-        ret = -EINVAL;
-        goto fail;
    }

    /* initialize zlib engine */
-    s->compressed_chunk = qemu_try_blockalign(bs->file->bs,
-                                              ds.max_compressed_size + 1);
-    s->uncompressed_chunk = qemu_try_blockalign(bs->file->bs,
-                                                512 * ds.max_sectors_per_chunk);
-    if (s->compressed_chunk == NULL || s->uncompressed_chunk == NULL) {
-        ret = -ENOMEM;
-        goto fail;
-    }
-
+    s->compressed_chunk = g_malloc(max_compressed_size + 1);
+    s->uncompressed_chunk = g_malloc(512 * max_sectors_per_chunk);
    if (inflateInit(&s->zstream) != Z_OK) {
        ret = -EINVAL;
        goto fail;
@@ -540,8 +302,8 @@ fail:
    g_free(s->lengths);
    g_free(s->sectors);
    g_free(s->sectorcounts);
-    qemu_vfree(s->compressed_chunk);
-    qemu_vfree(s->uncompressed_chunk);
+    g_free(s->compressed_chunk);
+    g_free(s->uncompressed_chunk);
    return ret;
 }

@@ -580,20 +342,17 @@ static inline int dmg_read_chunk(BlockDriverState *bs, uint64_t sector_num)
    if (!is_sector_in_chunk(s, s->current_chunk, sector_num)) {
        int ret;
        uint32_t chunk = search_chunk(s, sector_num);
-#ifdef CONFIG_BZIP2
-        uint64_t total_out;
-#endif

        if (chunk >= s->n_chunks) {
            return -1;
        }

        s->current_chunk = s->n_chunks;
-        switch (s->types[chunk]) { /* block entry type */
+        switch (s->types[chunk]) {
        case 0x80000005: { /* zlib compressed */
            /* we need to buffer, because only the chunk as whole can be
             * inflated. */
-            ret = bdrv_pread(bs->file->bs, s->offsets[chunk],
+            ret = bdrv_pread(bs->file, s->offsets[chunk],
                             s->compressed_chunk, s->lengths[chunk]);
            if (ret != s->lengths[chunk]) {
                return -1;
@@ -613,44 +372,15 @@ static inline int dmg_read_chunk(BlockDriverState *bs, uint64_t sector_num)
                return -1;
            }
            break; }
-#ifdef CONFIG_BZIP2
-        case 0x80000006: /* bzip2 compressed */
-            /* we need to buffer, because only the chunk as whole can be
-             * inflated. */
-            ret = bdrv_pread(bs->file->bs, s->offsets[chunk],
-                             s->compressed_chunk, s->lengths[chunk]);
-            if (ret != s->lengths[chunk]) {
-                return -1;
-            }
-
-            ret = BZ2_bzDecompressInit(&s->bzstream, 0, 0);
-            if (ret != BZ_OK) {
-                return -1;
-            }
-            s->bzstream.next_in = (char *)s->compressed_chunk;
-            s->bzstream.avail_in = (unsigned int) s->lengths[chunk];
-            s->bzstream.next_out = (char *)s->uncompressed_chunk;
-            s->bzstream.avail_out = (unsigned int) 512 * s->sectorcounts[chunk];
-            ret = BZ2_bzDecompress(&s->bzstream);
-            total_out = ((uint64_t)s->bzstream.total_out_hi32 << 32) +
-                        s->bzstream.total_out_lo32;
-            BZ2_bzDecompressEnd(&s->bzstream);
-            if (ret != BZ_STREAM_END ||
-                total_out != 512 * s->sectorcounts[chunk]) {
-                return -1;
-            }
-            break;
-#endif /* CONFIG_BZIP2 */
        case 1: /* copy */
-            ret = bdrv_pread(bs->file->bs, s->offsets[chunk],
+            ret = bdrv_pread(bs->file, s->offsets[chunk],
                             s->uncompressed_chunk, s->lengths[chunk]);
            if (ret != s->lengths[chunk]) {
                return -1;
            }
            break;
        case 2: /* zero */
-            /* see dmg_read, it is treated specially. No buffer needs to be
-             * pre-filled, the zeroes can be set directly. */
+            memset(s->uncompressed_chunk, 0, 512 * s->sectorcounts[chunk]);
            break;
        }
        s->current_chunk = chunk;
@@ -669,13 +399,6 @@ static int dmg_read(BlockDriverState *bs, int64_t sector_num,
        if (dmg_read_chunk(bs, sector_num + i) != 0) {
            return -1;
        }
-        /* Special case: current chunk is all zeroes. Do not perform a memcpy as
-         * s->uncompressed_chunk may be too small to cover the large all-zeroes
-         * section. dmg_read_chunk is called to find s->current_chunk */
-        if (s->types[s->current_chunk] == 2) { /* all zeroes block entry */
-            memset(buf + i * 512, 0, 512);
-            continue;
-        }
        sector_offset_in_chunk = sector_num + i - s->sectors[s->current_chunk];
        memcpy(buf + i * 512,
               s->uncompressed_chunk + sector_offset_in_chunk * 512, 512);
@@ -703,8 +426,8 @@ static void dmg_close(BlockDriverState *bs)
    g_free(s->lengths);
    g_free(s->sectors);
    g_free(s->sectorcounts);
-    qemu_vfree(s->compressed_chunk);
-    qemu_vfree(s->uncompressed_chunk);
+    g_free(s->compressed_chunk);
+    g_free(s->uncompressed_chunk);

    inflateEnd(&s->zstream);
 }
--- a/block/gluster.c
+++ b/block/gluster.c
@@ -3,28 +3,42 @@
 *
 * Copyright (C) 2012 Bharata B Rao <bharata@linux.vnet.ibm.com>
 *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
+ * Pipe handling mechanism in AIO implementation is derived from
+ * block/rbd.c. Hence,
 *
+ * Copyright (C) 2010-2011 Christian Brunner <chb@muc.de>,
+ *                         Josh Durgin <josh.durgin@dreamhost.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ * Contributions after 2012-01-13 are licensed under the terms of the
+ * GNU GPL, version 2 or (at your option) any later version.
 */
-#include "qemu/osdep.h"
 #include <glusterfs/api/glfs.h>
 #include "block/block_int.h"
+#include "qemu/sockets.h"
 #include "qemu/uri.h"

 typedef struct GlusterAIOCB {
+    BlockDriverAIOCB common;
    int64_t size;
    int ret;
+    bool *finished;
    QEMUBH *bh;
-    Coroutine *coroutine;
-    AioContext *aio_context;
 } GlusterAIOCB;

 typedef struct BDRVGlusterState {
    struct glfs *glfs;
+    int fds[2];
    struct glfs_fd *fd;
+    int event_reader_pos;
+    GlusterAIOCB *event_acb;
 } BDRVGlusterState;

+#define GLUSTER_FD_READ  0
+#define GLUSTER_FD_WRITE 1
+
 typedef struct GlusterConf {
    char *server;
    int port;
@@ -35,13 +49,11 @@ typedef struct GlusterConf {

 static void qemu_gluster_gconf_free(GlusterConf *gconf)
 {
-    if (gconf) {
-        g_free(gconf->server);
-        g_free(gconf->volname);
-        g_free(gconf->image);
-        g_free(gconf->transport);
-        g_free(gconf);
-    }
+    g_free(gconf->server);
+    g_free(gconf->volname);
+    g_free(gconf->image);
+    g_free(gconf->transport);
+    g_free(gconf);
 }

 static int parse_volume_options(GlusterConf *gconf, char *path)
@@ -82,7 +94,7 @@ static int parse_volume_options(GlusterConf *gconf, char *path)
 * 'server' specifies the server where the volume file specification for
 * the given volume resides. This can be either hostname, ipv4 address
 * or ipv6 address. ipv6 address needs to be within square brackets [ ].
- * If transport type is 'unix', then 'server' field should not be specified.
+ * If transport type is 'unix', then 'server' field should not be specifed.
 * The 'socket' field needs to be populated with the path to unix domain
 * socket.
 *
@@ -119,7 +131,7 @@ static int qemu_gluster_parseuri(GlusterConf *gconf, const char *filename)
    }

    /* transport */
-    if (!uri->scheme || !strcmp(uri->scheme, "gluster")) {
+    if (!strcmp(uri->scheme, "gluster")) {
        gconf->transport = g_strdup("tcp");
    } else if (!strcmp(uri->scheme, "gluster+tcp")) {
        gconf->transport = g_strdup("tcp");
@@ -155,7 +167,7 @@ static int qemu_gluster_parseuri(GlusterConf *gconf, const char *filename)
        }
        gconf->server = g_strdup(qp->p[0].value);
    } else {
-        gconf->server = g_strdup(uri->server ? uri->server : "localhost");
+        gconf->server = g_strdup(uri->server);
        gconf->port = uri->port;
    }

@@ -167,8 +179,7 @@ out:
    return ret;
 }

-static struct glfs *qemu_gluster_init(GlusterConf *gconf, const char *filename,
-                                      Error **errp)
+static struct glfs *qemu_gluster_init(GlusterConf *gconf, const char *filename)
 {
    struct glfs *glfs = NULL;
    int ret;
@@ -176,8 +187,8 @@ static struct glfs *qemu_gluster_init(GlusterConf *gconf, const char *filename,

    ret = qemu_gluster_parseuri(gconf, filename);
    if (ret < 0) {
-        error_setg(errp, "Usage: file=gluster[+transport]://[server[:port]]/"
-                   "volname/image[?socket=...]");
+        error_report("Usage: file=gluster[+transport]://[server[:port]]/"
+            "volname/image[?socket=...]");
        errno = -ret;
        goto out;
    }
@@ -204,16 +215,9 @@ static struct glfs *qemu_gluster_init(GlusterConf *gconf, const char *filename,

    ret = glfs_init(glfs);
    if (ret) {
-        error_setg_errno(errp, errno,
-                         "Gluster connection failed for server=%s port=%d "
-                         "volume=%s image=%s transport=%s", gconf->server,
-                         gconf->port, gconf->volname, gconf->image,
-                         gconf->transport);
-
-        /* glfs_init sometimes doesn't set errno although docs suggest that */
-        if (errno == 0)
-            errno = EINVAL;
-
+        error_report("Gluster connection failed for server=%s port=%d "
+             "volume=%s image=%s transport=%s", gconf->server, gconf->port,
+             gconf->volname, gconf->image, gconf->transport);
        goto out;
    }
    return glfs;
@@ -227,32 +231,46 @@ out:
    return NULL;
 }

-static void qemu_gluster_complete_aio(void *opaque)
+static void qemu_gluster_complete_aio(GlusterAIOCB *acb, BDRVGlusterState *s)
 {
-    GlusterAIOCB *acb = (GlusterAIOCB *)opaque;
+    int ret;
+    bool *finished = acb->finished;
+    BlockDriverCompletionFunc *cb = acb->common.cb;
+    void *opaque = acb->common.opaque;

-    qemu_bh_delete(acb->bh);
-    acb->bh = NULL;
-    qemu_coroutine_enter(acb->coroutine, NULL);
-}
-
-/*
- * AIO callback routine called from GlusterFS thread.
- */
-static void gluster_finish_aiocb(struct glfs_fd *fd, ssize_t ret, void *arg)
-{
-    GlusterAIOCB *acb = (GlusterAIOCB *)arg;
-
-    if (!ret || ret == acb->size) {
-        acb->ret = 0; /* Success */
-    } else if (ret < 0) {
-        acb->ret = ret; /* Read/Write failed */
+    if (!acb->ret || acb->ret == acb->size) {
+        ret = 0; /* Success */
+    } else if (acb->ret < 0) {
+        ret = acb->ret; /* Read/Write failed */
    } else {
-        acb->ret = -EIO; /* Partial read/write - fail it */
+        ret = -EIO; /* Partial read/write - fail it */
    }

-    acb->bh = aio_bh_new(acb->aio_context, qemu_gluster_complete_aio, acb);
-    qemu_bh_schedule(acb->bh);
+    qemu_aio_release(acb);
+    cb(opaque, ret);
+    if (finished) {
+        *finished = true;
+    }
+}
+
+static void qemu_gluster_aio_event_reader(void *opaque)
+{
+    BDRVGlusterState *s = opaque;
+    ssize_t ret;
+
+    do {
+        char *p = (char *)&s->event_acb;
+
+        ret = read(s->fds[GLUSTER_FD_READ], p + s->event_reader_pos,
+                   sizeof(s->event_acb) - s->event_reader_pos);
+        if (ret > 0) {
+            s->event_reader_pos += ret;
+            if (s->event_reader_pos == sizeof(s->event_acb)) {
+                s->event_reader_pos = 0;
+                qemu_gluster_complete_aio(s->event_acb, s);
+            }
+        }
+    } while (ret < 0 && errno == EINTR);
 }

 /* TODO Convert to fine grained options */
@@ -269,57 +287,60 @@ static QemuOptsList runtime_opts = {
    },
 };

-static void qemu_gluster_parse_flags(int bdrv_flags, int *open_flags)
-{
-    assert(open_flags != NULL);
-
-    *open_flags |= O_BINARY;
-
-    if (bdrv_flags & BDRV_O_RDWR) {
-        *open_flags |= O_RDWR;
-    } else {
-        *open_flags |= O_RDONLY;
-    }
-
-    if ((bdrv_flags & BDRV_O_NOCACHE)) {
-        *open_flags |= O_DIRECT;
-    }
-}
-
 static int qemu_gluster_open(BlockDriverState *bs,  QDict *options,
                             int bdrv_flags, Error **errp)
 {
    BDRVGlusterState *s = bs->opaque;
-    int open_flags = 0;
+    int open_flags = O_BINARY;
    int ret = 0;
-    GlusterConf *gconf = g_new0(GlusterConf, 1);
+    GlusterConf *gconf = g_malloc0(sizeof(GlusterConf));
    QemuOpts *opts;
    Error *local_err = NULL;
    const char *filename;

-    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
+    opts = qemu_opts_create_nofail(&runtime_opts);
    qemu_opts_absorb_qdict(opts, options, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
+    if (error_is_set(&local_err)) {
+        qerror_report_err(local_err);
+        error_free(local_err);
        ret = -EINVAL;
        goto out;
    }

    filename = qemu_opt_get(opts, "filename");

-    s->glfs = qemu_gluster_init(gconf, filename, errp);
+
+    s->glfs = qemu_gluster_init(gconf, filename);
    if (!s->glfs) {
        ret = -errno;
        goto out;
    }

-    qemu_gluster_parse_flags(bdrv_flags, &open_flags);
+    if (bdrv_flags & BDRV_O_RDWR) {
+        open_flags |= O_RDWR;
+    } else {
+        open_flags |= O_RDONLY;
+    }
+
+    if ((bdrv_flags & BDRV_O_NOCACHE)) {
+        open_flags |= O_DIRECT;
+    }

    s->fd = glfs_open(s->glfs, gconf->image, open_flags);
    if (!s->fd) {
        ret = -errno;
+        goto out;
    }

+    ret = qemu_pipe(s->fds);
+    if (ret < 0) {
+        ret = -errno;
+        goto out;
+    }
+    fcntl(s->fds[GLUSTER_FD_READ], F_SETFL, O_NONBLOCK);
+    qemu_aio_set_fd_handler(s->fds[GLUSTER_FD_READ],
+        qemu_gluster_aio_event_reader, NULL, s);
+
 out:
    qemu_opts_del(opts);
    qemu_gluster_gconf_free(gconf);
@@ -335,176 +356,26 @@ out:
    return ret;
 }

-typedef struct BDRVGlusterReopenState {
-    struct glfs *glfs;
-    struct glfs_fd *fd;
-} BDRVGlusterReopenState;
-
-
-static int qemu_gluster_reopen_prepare(BDRVReopenState *state,
-                                       BlockReopenQueue *queue, Error **errp)
-{
-    int ret = 0;
-    BDRVGlusterReopenState *reop_s;
-    GlusterConf *gconf = NULL;
-    int open_flags = 0;
-
-    assert(state != NULL);
-    assert(state->bs != NULL);
-
-    state->opaque = g_new0(BDRVGlusterReopenState, 1);
-    reop_s = state->opaque;
-
-    qemu_gluster_parse_flags(state->flags, &open_flags);
-
-    gconf = g_new0(GlusterConf, 1);
-
-    reop_s->glfs = qemu_gluster_init(gconf, state->bs->filename, errp);
-    if (reop_s->glfs == NULL) {
-        ret = -errno;
-        goto exit;
-    }
-
-    reop_s->fd = glfs_open(reop_s->glfs, gconf->image, open_flags);
-    if (reop_s->fd == NULL) {
-        /* reops->glfs will be cleaned up in _abort */
-        ret = -errno;
-        goto exit;
-    }
-
-exit:
-    /* state->opaque will be freed in either the _abort or _commit */
-    qemu_gluster_gconf_free(gconf);
-    return ret;
-}
-
-static void qemu_gluster_reopen_commit(BDRVReopenState *state)
-{
-    BDRVGlusterReopenState *reop_s = state->opaque;
-    BDRVGlusterState *s = state->bs->opaque;
-
-
-    /* close the old */
-    if (s->fd) {
-        glfs_close(s->fd);
-    }
-    if (s->glfs) {
-        glfs_fini(s->glfs);
-    }
-
-    /* use the newly opened image / connection */
-    s->fd         = reop_s->fd;
-    s->glfs       = reop_s->glfs;
-
-    g_free(state->opaque);
-    state->opaque = NULL;
-
-    return;
-}
-
-
-static void qemu_gluster_reopen_abort(BDRVReopenState *state)
-{
-    BDRVGlusterReopenState *reop_s = state->opaque;
-
-    if (reop_s == NULL) {
-        return;
-    }
-
-    if (reop_s->fd) {
-        glfs_close(reop_s->fd);
-    }
-
-    if (reop_s->glfs) {
-        glfs_fini(reop_s->glfs);
-    }
-
-    g_free(state->opaque);
-    state->opaque = NULL;
-
-    return;
-}
-
-#ifdef CONFIG_GLUSTERFS_ZEROFILL
-static coroutine_fn int qemu_gluster_co_write_zeroes(BlockDriverState *bs,
-        int64_t sector_num, int nb_sectors, BdrvRequestFlags flags)
-{
-    int ret;
-    GlusterAIOCB acb;
-    BDRVGlusterState *s = bs->opaque;
-    off_t size = nb_sectors * BDRV_SECTOR_SIZE;
-    off_t offset = sector_num * BDRV_SECTOR_SIZE;
-
-    acb.size = size;
-    acb.ret = 0;
-    acb.coroutine = qemu_coroutine_self();
-    acb.aio_context = bdrv_get_aio_context(bs);
-
-    ret = glfs_zerofill_async(s->fd, offset, size, gluster_finish_aiocb, &acb);
-    if (ret < 0) {
-        return -errno;
-    }
-
-    qemu_coroutine_yield();
-    return acb.ret;
-}
-
-static inline bool gluster_supports_zerofill(void)
-{
-    return 1;
-}
-
-static inline int qemu_gluster_zerofill(struct glfs_fd *fd, int64_t offset,
-        int64_t size)
-{
-    return glfs_zerofill(fd, offset, size);
-}
-
-#else
-static inline bool gluster_supports_zerofill(void)
-{
-    return 0;
-}
-
-static inline int qemu_gluster_zerofill(struct glfs_fd *fd, int64_t offset,
-        int64_t size)
-{
-    return 0;
-}
-#endif
-
 static int qemu_gluster_create(const char *filename,
-                               QemuOpts *opts, Error **errp)
+        QEMUOptionParameter *options, Error **errp)
 {
    struct glfs *glfs;
    struct glfs_fd *fd;
    int ret = 0;
-    int prealloc = 0;
    int64_t total_size = 0;
-    char *tmp = NULL;
-    GlusterConf *gconf = g_new0(GlusterConf, 1);
+    GlusterConf *gconf = g_malloc0(sizeof(GlusterConf));

-    glfs = qemu_gluster_init(gconf, filename, errp);
+    glfs = qemu_gluster_init(gconf, filename);
    if (!glfs) {
        ret = -errno;
        goto out;
    }

-    total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
-                          BDRV_SECTOR_SIZE);
-
-    tmp = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC);
-    if (!tmp || !strcmp(tmp, "off")) {
-        prealloc = 0;
-    } else if (!strcmp(tmp, "full") &&
-               gluster_supports_zerofill()) {
-        prealloc = 1;
-    } else {
-        error_setg(errp, "Invalid preallocation mode: '%s'"
-            " or GlusterFS doesn't support zerofill API",
-            tmp);
-        ret = -EINVAL;
-        goto out;
+    while (options && options->name) {
+        if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
+            total_size = options->value.n / BDRV_SECTOR_SIZE;
+        }
+        options++;
    }

    fd = glfs_creat(glfs, gconf->image,
@@ -512,20 +383,14 @@ static int qemu_gluster_create(const char *filename,
    if (!fd) {
        ret = -errno;
    } else {
-        if (!glfs_ftruncate(fd, total_size)) {
-            if (prealloc && qemu_gluster_zerofill(fd, 0, total_size)) {
-                ret = -errno;
-            }
-        } else {
+        if (glfs_ftruncate(fd, total_size * BDRV_SECTOR_SIZE) != 0) {
            ret = -errno;
        }
-
        if (glfs_close(fd) != 0) {
            ret = -errno;
        }
    }
 out:
-    g_free(tmp);
    qemu_gluster_gconf_free(gconf);
    if (glfs) {
        glfs_fini(glfs);
@@ -533,34 +398,75 @@ out:
    return ret;
 }

-static coroutine_fn int qemu_gluster_co_rw(BlockDriverState *bs,
-        int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, int write)
+static void qemu_gluster_aio_cancel(BlockDriverAIOCB *blockacb)
+{
+    GlusterAIOCB *acb = (GlusterAIOCB *)blockacb;
+    bool finished = false;
+
+    acb->finished = &finished;
+    while (!finished) {
+        qemu_aio_wait();
+    }
+}
+
+static const AIOCBInfo gluster_aiocb_info = {
+    .aiocb_size = sizeof(GlusterAIOCB),
+    .cancel = qemu_gluster_aio_cancel,
+};
+
+static void gluster_finish_aiocb(struct glfs_fd *fd, ssize_t ret, void *arg)
+{
+    GlusterAIOCB *acb = (GlusterAIOCB *)arg;
+    BlockDriverState *bs = acb->common.bs;
+    BDRVGlusterState *s = bs->opaque;
+    int retval;
+
+    acb->ret = ret;
+    retval = qemu_write_full(s->fds[GLUSTER_FD_WRITE], &acb, sizeof(acb));
+    if (retval != sizeof(acb)) {
+        /*
+         * Gluster AIO callback thread failed to notify the waiting
+         * QEMU thread about IO completion.
+         */
+        error_report("Gluster AIO completion failed: %s", strerror(errno));
+        abort();
+    }
+}
+
+static BlockDriverAIOCB *qemu_gluster_aio_rw(BlockDriverState *bs,
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+        BlockDriverCompletionFunc *cb, void *opaque, int write)
 {
    int ret;
-    GlusterAIOCB acb;
+    GlusterAIOCB *acb;
    BDRVGlusterState *s = bs->opaque;
-    size_t size = nb_sectors * BDRV_SECTOR_SIZE;
-    off_t offset = sector_num * BDRV_SECTOR_SIZE;
+    size_t size;
+    off_t offset;

-    acb.size = size;
-    acb.ret = 0;
-    acb.coroutine = qemu_coroutine_self();
-    acb.aio_context = bdrv_get_aio_context(bs);
+    offset = sector_num * BDRV_SECTOR_SIZE;
+    size = nb_sectors * BDRV_SECTOR_SIZE;
+
+    acb = qemu_aio_get(&gluster_aiocb_info, bs, cb, opaque);
+    acb->size = size;
+    acb->ret = 0;
+    acb->finished = NULL;

    if (write) {
        ret = glfs_pwritev_async(s->fd, qiov->iov, qiov->niov, offset, 0,
-            gluster_finish_aiocb, &acb);
+            &gluster_finish_aiocb, acb);
    } else {
        ret = glfs_preadv_async(s->fd, qiov->iov, qiov->niov, offset, 0,
-            gluster_finish_aiocb, &acb);
+            &gluster_finish_aiocb, acb);
    }

    if (ret < 0) {
-        return -errno;
+        goto out;
    }
+    return &acb->common;

-    qemu_coroutine_yield();
-    return acb.ret;
+out:
+    qemu_aio_release(acb);
+    return NULL;
 }

 static int qemu_gluster_truncate(BlockDriverState *bs, int64_t offset)
@@ -576,60 +482,71 @@ static int qemu_gluster_truncate(BlockDriverState *bs, int64_t offset)
    return 0;
 }

-static coroutine_fn int qemu_gluster_co_readv(BlockDriverState *bs,
-        int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
+static BlockDriverAIOCB *qemu_gluster_aio_readv(BlockDriverState *bs,
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+        BlockDriverCompletionFunc *cb, void *opaque)
 {
-    return qemu_gluster_co_rw(bs, sector_num, nb_sectors, qiov, 0);
+    return qemu_gluster_aio_rw(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
 }

-static coroutine_fn int qemu_gluster_co_writev(BlockDriverState *bs,
-        int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
+static BlockDriverAIOCB *qemu_gluster_aio_writev(BlockDriverState *bs,
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+        BlockDriverCompletionFunc *cb, void *opaque)
 {
-    return qemu_gluster_co_rw(bs, sector_num, nb_sectors, qiov, 1);
+    return qemu_gluster_aio_rw(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
 }

-static coroutine_fn int qemu_gluster_co_flush_to_disk(BlockDriverState *bs)
+static BlockDriverAIOCB *qemu_gluster_aio_flush(BlockDriverState *bs,
+        BlockDriverCompletionFunc *cb, void *opaque)
 {
    int ret;
-    GlusterAIOCB acb;
+    GlusterAIOCB *acb;
    BDRVGlusterState *s = bs->opaque;

-    acb.size = 0;
-    acb.ret = 0;
-    acb.coroutine = qemu_coroutine_self();
-    acb.aio_context = bdrv_get_aio_context(bs);
+    acb = qemu_aio_get(&gluster_aiocb_info, bs, cb, opaque);
+    acb->size = 0;
+    acb->ret = 0;
+    acb->finished = NULL;

-    ret = glfs_fsync_async(s->fd, gluster_finish_aiocb, &acb);
+    ret = glfs_fsync_async(s->fd, &gluster_finish_aiocb, acb);
    if (ret < 0) {
-        return -errno;
+        goto out;
    }
+    return &acb->common;

-    qemu_coroutine_yield();
-    return acb.ret;
+out:
+    qemu_aio_release(acb);
+    return NULL;
 }

 #ifdef CONFIG_GLUSTERFS_DISCARD
-static coroutine_fn int qemu_gluster_co_discard(BlockDriverState *bs,
-        int64_t sector_num, int nb_sectors)
+static BlockDriverAIOCB *qemu_gluster_aio_discard(BlockDriverState *bs,
+        int64_t sector_num, int nb_sectors, BlockDriverCompletionFunc *cb,
+        void *opaque)
 {
    int ret;
-    GlusterAIOCB acb;
+    GlusterAIOCB *acb;
    BDRVGlusterState *s = bs->opaque;
-    size_t size = nb_sectors * BDRV_SECTOR_SIZE;
-    off_t offset = sector_num * BDRV_SECTOR_SIZE;
+    size_t size;
+    off_t offset;

-    acb.size = 0;
-    acb.ret = 0;
-    acb.coroutine = qemu_coroutine_self();
-    acb.aio_context = bdrv_get_aio_context(bs);
+    offset = sector_num * BDRV_SECTOR_SIZE;
+    size = nb_sectors * BDRV_SECTOR_SIZE;

-    ret = glfs_discard_async(s->fd, offset, size, gluster_finish_aiocb, &acb);
+    acb = qemu_aio_get(&gluster_aiocb_info, bs, cb, opaque);
+    acb->size = 0;
+    acb->ret = 0;
+    acb->finished = NULL;
+
+    ret = glfs_discard_async(s->fd, offset, size, &gluster_finish_aiocb, acb);
    if (ret < 0) {
-        return -errno;
+        goto out;
    }
+    return &acb->common;

-    qemu_coroutine_yield();
-    return acb.ret;
+out:
+    qemu_aio_release(acb);
+    return NULL;
 }
 #endif

@@ -664,6 +581,10 @@ static void qemu_gluster_close(BlockDriverState *bs)
 {
    BDRVGlusterState *s = bs->opaque;

+    close(s->fds[GLUSTER_FD_READ]);
+    close(s->fds[GLUSTER_FD_WRITE]);
+    qemu_aio_set_fd_handler(s->fds[GLUSTER_FD_READ], NULL, NULL, NULL);
+
    if (s->fd) {
        glfs_close(s->fd);
        s->fd = NULL;
@@ -677,22 +598,13 @@ static int qemu_gluster_has_zero_init(BlockDriverState *bs)
    return 0;
 }

-static QemuOptsList qemu_gluster_create_opts = {
-    .name = "qemu-gluster-create-opts",
-    .head = QTAILQ_HEAD_INITIALIZER(qemu_gluster_create_opts.head),
-    .desc = {
-        {
-            .name = BLOCK_OPT_SIZE,
-            .type = QEMU_OPT_SIZE,
-            .help = "Virtual disk size"
-        },
-        {
-            .name = BLOCK_OPT_PREALLOC,
-            .type = QEMU_OPT_STRING,
-            .help = "Preallocation mode (allowed values: off, full)"
-        },
-        { /* end of list */ }
-    }
+static QEMUOptionParameter qemu_gluster_create_options[] = {
+    {
+        .name = BLOCK_OPT_SIZE,
+        .type = OPT_SIZE,
+        .help = "Virtual disk size"
+    },
+    { NULL }
 };

 static BlockDriver bdrv_gluster = {
@@ -701,25 +613,19 @@ static BlockDriver bdrv_gluster = {
    .instance_size                = sizeof(BDRVGlusterState),
    .bdrv_needs_filename          = true,
    .bdrv_file_open               = qemu_gluster_open,
-    .bdrv_reopen_prepare          = qemu_gluster_reopen_prepare,
-    .bdrv_reopen_commit           = qemu_gluster_reopen_commit,
-    .bdrv_reopen_abort            = qemu_gluster_reopen_abort,
    .bdrv_close                   = qemu_gluster_close,
    .bdrv_create                  = qemu_gluster_create,
    .bdrv_getlength               = qemu_gluster_getlength,
    .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size,
    .bdrv_truncate                = qemu_gluster_truncate,
-    .bdrv_co_readv                = qemu_gluster_co_readv,
-    .bdrv_co_writev               = qemu_gluster_co_writev,
-    .bdrv_co_flush_to_disk        = qemu_gluster_co_flush_to_disk,
+    .bdrv_aio_readv               = qemu_gluster_aio_readv,
+    .bdrv_aio_writev              = qemu_gluster_aio_writev,
+    .bdrv_aio_flush               = qemu_gluster_aio_flush,
    .bdrv_has_zero_init           = qemu_gluster_has_zero_init,
 #ifdef CONFIG_GLUSTERFS_DISCARD
-    .bdrv_co_discard              = qemu_gluster_co_discard,
+    .bdrv_aio_discard             = qemu_gluster_aio_discard,
 #endif
-#ifdef CONFIG_GLUSTERFS_ZEROFILL
-    .bdrv_co_write_zeroes         = qemu_gluster_co_write_zeroes,
-#endif
-    .create_opts                  = &qemu_gluster_create_opts,
+    .create_options               = qemu_gluster_create_options,
 };

 static BlockDriver bdrv_gluster_tcp = {
@@ -728,25 +634,19 @@ static BlockDriver bdrv_gluster_tcp = {
    .instance_size                = sizeof(BDRVGlusterState),
    .bdrv_needs_filename          = true,
    .bdrv_file_open               = qemu_gluster_open,
-    .bdrv_reopen_prepare          = qemu_gluster_reopen_prepare,
-    .bdrv_reopen_commit           = qemu_gluster_reopen_commit,
-    .bdrv_reopen_abort            = qemu_gluster_reopen_abort,
    .bdrv_close                   = qemu_gluster_close,
    .bdrv_create                  = qemu_gluster_create,
    .bdrv_getlength               = qemu_gluster_getlength,
    .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size,
    .bdrv_truncate                = qemu_gluster_truncate,
-    .bdrv_co_readv                = qemu_gluster_co_readv,
-    .bdrv_co_writev               = qemu_gluster_co_writev,
-    .bdrv_co_flush_to_disk        = qemu_gluster_co_flush_to_disk,
+    .bdrv_aio_readv               = qemu_gluster_aio_readv,
+    .bdrv_aio_writev              = qemu_gluster_aio_writev,
+    .bdrv_aio_flush               = qemu_gluster_aio_flush,
    .bdrv_has_zero_init           = qemu_gluster_has_zero_init,
 #ifdef CONFIG_GLUSTERFS_DISCARD
-    .bdrv_co_discard              = qemu_gluster_co_discard,
+    .bdrv_aio_discard             = qemu_gluster_aio_discard,
 #endif
-#ifdef CONFIG_GLUSTERFS_ZEROFILL
-    .bdrv_co_write_zeroes         = qemu_gluster_co_write_zeroes,
-#endif
-    .create_opts                  = &qemu_gluster_create_opts,
+    .create_options               = qemu_gluster_create_options,
 };

 static BlockDriver bdrv_gluster_unix = {
@@ -755,25 +655,19 @@ static BlockDriver bdrv_gluster_unix = {
    .instance_size                = sizeof(BDRVGlusterState),
    .bdrv_needs_filename          = true,
    .bdrv_file_open               = qemu_gluster_open,
-    .bdrv_reopen_prepare          = qemu_gluster_reopen_prepare,
-    .bdrv_reopen_commit           = qemu_gluster_reopen_commit,
-    .bdrv_reopen_abort            = qemu_gluster_reopen_abort,
    .bdrv_close                   = qemu_gluster_close,
    .bdrv_create                  = qemu_gluster_create,
    .bdrv_getlength               = qemu_gluster_getlength,
    .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size,
    .bdrv_truncate                = qemu_gluster_truncate,
-    .bdrv_co_readv                = qemu_gluster_co_readv,
-    .bdrv_co_writev               = qemu_gluster_co_writev,
-    .bdrv_co_flush_to_disk        = qemu_gluster_co_flush_to_disk,
+    .bdrv_aio_readv               = qemu_gluster_aio_readv,
+    .bdrv_aio_writev              = qemu_gluster_aio_writev,
+    .bdrv_aio_flush               = qemu_gluster_aio_flush,
    .bdrv_has_zero_init           = qemu_gluster_has_zero_init,
 #ifdef CONFIG_GLUSTERFS_DISCARD
-    .bdrv_co_discard              = qemu_gluster_co_discard,
+    .bdrv_aio_discard             = qemu_gluster_aio_discard,
 #endif
-#ifdef CONFIG_GLUSTERFS_ZEROFILL
-    .bdrv_co_write_zeroes         = qemu_gluster_co_write_zeroes,
-#endif
-    .create_opts                  = &qemu_gluster_create_opts,
+    .create_options               = qemu_gluster_create_options,
 };

 static BlockDriver bdrv_gluster_rdma = {
@@ -782,25 +676,19 @@ static BlockDriver bdrv_gluster_rdma = {
    .instance_size                = sizeof(BDRVGlusterState),
    .bdrv_needs_filename          = true,
    .bdrv_file_open               = qemu_gluster_open,
-    .bdrv_reopen_prepare          = qemu_gluster_reopen_prepare,
-    .bdrv_reopen_commit           = qemu_gluster_reopen_commit,
-    .bdrv_reopen_abort            = qemu_gluster_reopen_abort,
    .bdrv_close                   = qemu_gluster_close,
    .bdrv_create                  = qemu_gluster_create,
    .bdrv_getlength               = qemu_gluster_getlength,
    .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size,
    .bdrv_truncate                = qemu_gluster_truncate,
-    .bdrv_co_readv                = qemu_gluster_co_readv,
-    .bdrv_co_writev               = qemu_gluster_co_writev,
-    .bdrv_co_flush_to_disk        = qemu_gluster_co_flush_to_disk,
+    .bdrv_aio_readv               = qemu_gluster_aio_readv,
+    .bdrv_aio_writev              = qemu_gluster_aio_writev,
+    .bdrv_aio_flush               = qemu_gluster_aio_flush,
    .bdrv_has_zero_init           = qemu_gluster_has_zero_init,
 #ifdef CONFIG_GLUSTERFS_DISCARD
-    .bdrv_co_discard              = qemu_gluster_co_discard,
+    .bdrv_aio_discard             = qemu_gluster_aio_discard,
 #endif
-#ifdef CONFIG_GLUSTERFS_ZEROFILL
-    .bdrv_co_write_zeroes         = qemu_gluster_co_write_zeroes,
-#endif
-    .create_opts                  = &qemu_gluster_create_opts,
+    .create_options               = qemu_gluster_create_options,
 };

 static void bdrv_gluster_init(void)
--- a/block/io.c
+++ b/block/io.c
--- a/block/iscsi.c
+++ b/block/iscsi.c
--- a/block/linux-aio.c
+++ b/block/linux-aio.c
@@ -7,7 +7,6 @@
 * This work is licensed under the terms of the GNU GPL, version 2 or later.
 * See the COPYING file in the top-level directory.
 */
-#include "qemu/osdep.h"
 #include "qemu-common.h"
 #include "block/aio.h"
 #include "qemu/queue.h"
@@ -26,42 +25,22 @@
 */
 #define MAX_EVENTS 128

-#define MAX_QUEUED_IO  128
-
 struct qemu_laiocb {
-    BlockAIOCB common;
+    BlockDriverAIOCB common;
    struct qemu_laio_state *ctx;
    struct iocb iocb;
    ssize_t ret;
    size_t nbytes;
    QEMUIOVector *qiov;
    bool is_read;
-    QSIMPLEQ_ENTRY(qemu_laiocb) next;
+    QLIST_ENTRY(qemu_laiocb) node;
 };

-typedef struct {
-    int plugged;
-    unsigned int n;
-    bool blocked;
-    QSIMPLEQ_HEAD(, qemu_laiocb) pending;
-} LaioQueue;
-
 struct qemu_laio_state {
    io_context_t ctx;
    EventNotifier e;
-
-    /* io queue for submit at batch */
-    LaioQueue io_q;
-
-    /* I/O completion processing */
-    QEMUBH *completion_bh;
-    struct io_event events[MAX_EVENTS];
-    int event_idx;
-    int event_max;
 };

-static void ioq_submit(struct qemu_laio_state *s);
-
 static inline ssize_t io_event_ret(struct io_event *ev)
 {
    return (ssize_t)(((uint64_t)ev->res2 << 32) | ev->res);
@@ -88,159 +67,77 @@ static void qemu_laio_process_completion(struct qemu_laio_state *s,
                ret = -EINVAL;
            }
        }
-    }
-    laiocb->common.cb(laiocb->common.opaque, ret);

-    qemu_aio_unref(laiocb);
-}
-
-/* The completion BH fetches completed I/O requests and invokes their
- * callbacks.
- *
- * The function is somewhat tricky because it supports nested event loops, for
- * example when a request callback invokes aio_poll().  In order to do this,
- * the completion events array and index are kept in qemu_laio_state.  The BH
- * reschedules itself as long as there are completions pending so it will
- * either be called again in a nested event loop or will be called after all
- * events have been completed.  When there are no events left to complete, the
- * BH returns without rescheduling.
- */
-static void qemu_laio_completion_bh(void *opaque)
-{
-    struct qemu_laio_state *s = opaque;
-
-    /* Fetch more completion events when empty */
-    if (s->event_idx == s->event_max) {
-        do {
-            struct timespec ts = { 0 };
-            s->event_max = io_getevents(s->ctx, MAX_EVENTS, MAX_EVENTS,
-                                        s->events, &ts);
-        } while (s->event_max == -EINTR);
-
-        s->event_idx = 0;
-        if (s->event_max <= 0) {
-            s->event_max = 0;
-            return; /* no more events */
-        }
+        laiocb->common.cb(laiocb->common.opaque, ret);
    }

-    /* Reschedule so nested event loops see currently pending completions */
-    qemu_bh_schedule(s->completion_bh);
-
-    /* Process completion events */
-    while (s->event_idx < s->event_max) {
-        struct iocb *iocb = s->events[s->event_idx].obj;
-        struct qemu_laiocb *laiocb =
-                container_of(iocb, struct qemu_laiocb, iocb);
-
-        laiocb->ret = io_event_ret(&s->events[s->event_idx]);
-        s->event_idx++;
-
-        qemu_laio_process_completion(s, laiocb);
-    }
-
-    if (!s->io_q.plugged && !QSIMPLEQ_EMPTY(&s->io_q.pending)) {
-        ioq_submit(s);
-    }
+    qemu_aio_release(laiocb);
 }

 static void qemu_laio_completion_cb(EventNotifier *e)
 {
    struct qemu_laio_state *s = container_of(e, struct qemu_laio_state, e);

-    if (event_notifier_test_and_clear(&s->e)) {
-        qemu_bh_schedule(s->completion_bh);
+    while (event_notifier_test_and_clear(&s->e)) {
+        struct io_event events[MAX_EVENTS];
+        struct timespec ts = { 0 };
+        int nevents, i;
+
+        do {
+            nevents = io_getevents(s->ctx, MAX_EVENTS, MAX_EVENTS, events, &ts);
+        } while (nevents == -EINTR);
+
+        for (i = 0; i < nevents; i++) {
+            struct iocb *iocb = events[i].obj;
+            struct qemu_laiocb *laiocb =
+                    container_of(iocb, struct qemu_laiocb, iocb);
+
+            laiocb->ret = io_event_ret(&events[i]);
+            qemu_laio_process_completion(s, laiocb);
+        }
    }
 }

-static void laio_cancel(BlockAIOCB *blockacb)
+static void laio_cancel(BlockDriverAIOCB *blockacb)
 {
    struct qemu_laiocb *laiocb = (struct qemu_laiocb *)blockacb;
    struct io_event event;
    int ret;

-    if (laiocb->ret != -EINPROGRESS) {
+    if (laiocb->ret != -EINPROGRESS)
        return;
-    }
+
+    /*
+     * Note that as of Linux 2.6.31 neither the block device code nor any
+     * filesystem implements cancellation of AIO request.
+     * Thus the polling loop below is the normal code path.
+     */
    ret = io_cancel(laiocb->ctx->ctx, &laiocb->iocb, &event);
-    laiocb->ret = -ECANCELED;
-    if (ret != 0) {
-        /* iocb is not cancelled, cb will be called by the event loop later */
+    if (ret == 0) {
+        laiocb->ret = -ECANCELED;
        return;
    }

-    laiocb->common.cb(laiocb->common.opaque, laiocb->ret);
+    /*
+     * We have to wait for the iocb to finish.
+     *
+     * The only way to get the iocb status update is by polling the io context.
+     * We might be able to do this slightly more optimal by removing the
+     * O_NONBLOCK flag.
+     */
+    while (laiocb->ret == -EINPROGRESS) {
+        qemu_laio_completion_cb(&laiocb->ctx->e);
+    }
 }

 static const AIOCBInfo laio_aiocb_info = {
    .aiocb_size         = sizeof(struct qemu_laiocb),
-    .cancel_async       = laio_cancel,
+    .cancel             = laio_cancel,
 };

-static void ioq_init(LaioQueue *io_q)
-{
-    QSIMPLEQ_INIT(&io_q->pending);
-    io_q->plugged = 0;
-    io_q->n = 0;
-    io_q->blocked = false;
-}
-
-static void ioq_submit(struct qemu_laio_state *s)
-{
-    int ret, len;
-    struct qemu_laiocb *aiocb;
-    struct iocb *iocbs[MAX_QUEUED_IO];
-    QSIMPLEQ_HEAD(, qemu_laiocb) completed;
-
-    do {
-        len = 0;
-        QSIMPLEQ_FOREACH(aiocb, &s->io_q.pending, next) {
-            iocbs[len++] = &aiocb->iocb;
-            if (len == MAX_QUEUED_IO) {
-                break;
-            }
-        }
-
-        ret = io_submit(s->ctx, len, iocbs);
-        if (ret == -EAGAIN) {
-            break;
-        }
-        if (ret < 0) {
-            abort();
-        }
-
-        s->io_q.n -= ret;
-        aiocb = container_of(iocbs[ret - 1], struct qemu_laiocb, iocb);
-        QSIMPLEQ_SPLIT_AFTER(&s->io_q.pending, aiocb, next, &completed);
-    } while (ret == len && !QSIMPLEQ_EMPTY(&s->io_q.pending));
-    s->io_q.blocked = (s->io_q.n > 0);
-}
-
-void laio_io_plug(BlockDriverState *bs, void *aio_ctx)
-{
-    struct qemu_laio_state *s = aio_ctx;
-
-    s->io_q.plugged++;
-}
-
-void laio_io_unplug(BlockDriverState *bs, void *aio_ctx, bool unplug)
-{
-    struct qemu_laio_state *s = aio_ctx;
-
-    assert(s->io_q.plugged > 0 || !unplug);
-
-    if (unplug && --s->io_q.plugged > 0) {
-        return;
-    }
-
-    if (!s->io_q.blocked && !QSIMPLEQ_EMPTY(&s->io_q.pending)) {
-        ioq_submit(s);
-    }
-}
-
-BlockAIOCB *laio_submit(BlockDriverState *bs, void *aio_ctx, int fd,
+BlockDriverAIOCB *laio_submit(BlockDriverState *bs, void *aio_ctx, int fd,
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockCompletionFunc *cb, void *opaque, int type)
+        BlockDriverCompletionFunc *cb, void *opaque, int type)
 {
    struct qemu_laio_state *s = aio_ctx;
    struct qemu_laiocb *laiocb;
@@ -271,36 +168,15 @@ BlockAIOCB *laio_submit(BlockDriverState *bs, void *aio_ctx, int fd,
    }
    io_set_eventfd(&laiocb->iocb, event_notifier_get_fd(&s->e));

-    QSIMPLEQ_INSERT_TAIL(&s->io_q.pending, laiocb, next);
-    s->io_q.n++;
-    if (!s->io_q.blocked &&
-        (!s->io_q.plugged || s->io_q.n >= MAX_QUEUED_IO)) {
-        ioq_submit(s);
-    }
+    if (io_submit(s->ctx, 1, &iocbs) < 0)
+        goto out_free_aiocb;
    return &laiocb->common;

 out_free_aiocb:
-    qemu_aio_unref(laiocb);
+    qemu_aio_release(laiocb);
    return NULL;
 }

-void laio_detach_aio_context(void *s_, AioContext *old_context)
-{
-    struct qemu_laio_state *s = s_;
-
-    aio_set_event_notifier(old_context, &s->e, false, NULL);
-    qemu_bh_delete(s->completion_bh);
-}
-
-void laio_attach_aio_context(void *s_, AioContext *new_context)
-{
-    struct qemu_laio_state *s = s_;
-
-    s->completion_bh = aio_bh_new(new_context, qemu_laio_completion_bh, s);
-    aio_set_event_notifier(new_context, &s->e, false,
-                           qemu_laio_completion_cb);
-}
-
 void *laio_init(void)
 {
    struct qemu_laio_state *s;
@@ -314,7 +190,7 @@ void *laio_init(void)
        goto out_close_efd;
    }

-    ioq_init(&s->io_q);
+    qemu_aio_set_event_notifier(&s->e, qemu_laio_completion_cb);

    return s;

@@ -324,16 +200,3 @@ out_free_state:
    g_free(s);
    return NULL;
 }
-
-void laio_cleanup(void *s_)
-{
-    struct qemu_laio_state *s = s_;
-
-    event_notifier_cleanup(&s->e);
-
-    if (io_destroy(s->ctx) != 0) {
-        fprintf(stderr, "%s: destroy AIO context %p failed\n",
-                        __func__, &s->ctx);
-    }
-    g_free(s);
-}
--- a/block/mirror.c
+++ b/block/mirror.c
--- a/block/nbd-client.c
+++ b/block/nbd-client.c
@@ -1,439 +0,0 @@
-/*
- * QEMU Block driver for  NBD
- *
- * Copyright (C) 2008 Bull S.A.S.
- *     Author: Laurent Vivier <Laurent.Vivier@bull.net>
- *
- * Some parts:
- *    Copyright (C) 2007 Anthony Liguori <anthony@codemonkey.ws>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-#include "qemu/osdep.h"
-#include "nbd-client.h"
-
-#define HANDLE_TO_INDEX(bs, handle) ((handle) ^ ((uint64_t)(intptr_t)bs))
-#define INDEX_TO_HANDLE(bs, index)  ((index)  ^ ((uint64_t)(intptr_t)bs))
-
-static void nbd_recv_coroutines_enter_all(NbdClientSession *s)
-{
-    int i;
-
-    for (i = 0; i < MAX_NBD_REQUESTS; i++) {
-        if (s->recv_coroutine[i]) {
-            qemu_coroutine_enter(s->recv_coroutine[i], NULL);
-        }
-    }
-}
-
-static void nbd_teardown_connection(BlockDriverState *bs)
-{
-    NbdClientSession *client = nbd_get_client_session(bs);
-
-    if (!client->ioc) { /* Already closed */
-        return;
-    }
-
-    /* finish any pending coroutines */
-    qio_channel_shutdown(client->ioc,
-                         QIO_CHANNEL_SHUTDOWN_BOTH,
-                         NULL);
-    nbd_recv_coroutines_enter_all(client);
-
-    nbd_client_detach_aio_context(bs);
-    object_unref(OBJECT(client->sioc));
-    client->sioc = NULL;
-    object_unref(OBJECT(client->ioc));
-    client->ioc = NULL;
-}
-
-static void nbd_reply_ready(void *opaque)
-{
-    BlockDriverState *bs = opaque;
-    NbdClientSession *s = nbd_get_client_session(bs);
-    uint64_t i;
-    int ret;
-
-    if (!s->ioc) { /* Already closed */
-        return;
-    }
-
-    if (s->reply.handle == 0) {
-        /* No reply already in flight.  Fetch a header.  It is possible
-         * that another thread has done the same thing in parallel, so
-         * the socket is not readable anymore.
-         */
-        ret = nbd_receive_reply(s->ioc, &s->reply);
-        if (ret == -EAGAIN) {
-            return;
-        }
-        if (ret < 0) {
-            s->reply.handle = 0;
-            goto fail;
-        }
-    }
-
-    /* There's no need for a mutex on the receive side, because the
-     * handler acts as a synchronization point and ensures that only
-     * one coroutine is called until the reply finishes.  */
-    i = HANDLE_TO_INDEX(s, s->reply.handle);
-    if (i >= MAX_NBD_REQUESTS) {
-        goto fail;
-    }
-
-    if (s->recv_coroutine[i]) {
-        qemu_coroutine_enter(s->recv_coroutine[i], NULL);
-        return;
-    }
-
-fail:
-    nbd_teardown_connection(bs);
-}
-
-static void nbd_restart_write(void *opaque)
-{
-    BlockDriverState *bs = opaque;
-
-    qemu_coroutine_enter(nbd_get_client_session(bs)->send_coroutine, NULL);
-}
-
-static int nbd_co_send_request(BlockDriverState *bs,
-                               struct nbd_request *request,
-                               QEMUIOVector *qiov, int offset)
-{
-    NbdClientSession *s = nbd_get_client_session(bs);
-    AioContext *aio_context;
-    int rc, ret, i;
-
-    qemu_co_mutex_lock(&s->send_mutex);
-
-    for (i = 0; i < MAX_NBD_REQUESTS; i++) {
-        if (s->recv_coroutine[i] == NULL) {
-            s->recv_coroutine[i] = qemu_coroutine_self();
-            break;
-        }
-    }
-
-    g_assert(qemu_in_coroutine());
-    assert(i < MAX_NBD_REQUESTS);
-    request->handle = INDEX_TO_HANDLE(s, i);
-
-    if (!s->ioc) {
-        qemu_co_mutex_unlock(&s->send_mutex);
-        return -EPIPE;
-    }
-
-    s->send_coroutine = qemu_coroutine_self();
-    aio_context = bdrv_get_aio_context(bs);
-
-    aio_set_fd_handler(aio_context, s->sioc->fd, false,
-                       nbd_reply_ready, nbd_restart_write, bs);
-    if (qiov) {
-        qio_channel_set_cork(s->ioc, true);
-        rc = nbd_send_request(s->ioc, request);
-        if (rc >= 0) {
-            ret = nbd_wr_syncv(s->ioc, qiov->iov, qiov->niov,
-                               offset, request->len, 0);
-            if (ret != request->len) {
-                rc = -EIO;
-            }
-        }
-        qio_channel_set_cork(s->ioc, false);
-    } else {
-        rc = nbd_send_request(s->ioc, request);
-    }
-    aio_set_fd_handler(aio_context, s->sioc->fd, false,
-                       nbd_reply_ready, NULL, bs);
-    s->send_coroutine = NULL;
-    qemu_co_mutex_unlock(&s->send_mutex);
-    return rc;
-}
-
-static void nbd_co_receive_reply(NbdClientSession *s,
-    struct nbd_request *request, struct nbd_reply *reply,
-    QEMUIOVector *qiov, int offset)
-{
-    int ret;
-
-    /* Wait until we're woken up by the read handler.  TODO: perhaps
-     * peek at the next reply and avoid yielding if it's ours?  */
-    qemu_coroutine_yield();
-    *reply = s->reply;
-    if (reply->handle != request->handle ||
-        !s->ioc) {
-        reply->error = EIO;
-    } else {
-        if (qiov && reply->error == 0) {
-            ret = nbd_wr_syncv(s->ioc, qiov->iov, qiov->niov,
-                               offset, request->len, 1);
-            if (ret != request->len) {
-                reply->error = EIO;
-            }
-        }
-
-        /* Tell the read handler to read another header.  */
-        s->reply.handle = 0;
-    }
-}
-
-static void nbd_coroutine_start(NbdClientSession *s,
-   struct nbd_request *request)
-{
-    /* Poor man semaphore.  The free_sema is locked when no other request
-     * can be accepted, and unlocked after receiving one reply.  */
-    if (s->in_flight >= MAX_NBD_REQUESTS - 1) {
-        qemu_co_mutex_lock(&s->free_sema);
-        assert(s->in_flight < MAX_NBD_REQUESTS);
-    }
-    s->in_flight++;
-
-    /* s->recv_coroutine[i] is set as soon as we get the send_lock.  */
-}
-
-static void nbd_coroutine_end(NbdClientSession *s,
-    struct nbd_request *request)
-{
-    int i = HANDLE_TO_INDEX(s, request->handle);
-    s->recv_coroutine[i] = NULL;
-    if (s->in_flight-- == MAX_NBD_REQUESTS) {
-        qemu_co_mutex_unlock(&s->free_sema);
-    }
-}
-
-static int nbd_co_readv_1(BlockDriverState *bs, int64_t sector_num,
-                          int nb_sectors, QEMUIOVector *qiov,
-                          int offset)
-{
-    NbdClientSession *client = nbd_get_client_session(bs);
-    struct nbd_request request = { .type = NBD_CMD_READ };
-    struct nbd_reply reply;
-    ssize_t ret;
-
-    request.from = sector_num * 512;
-    request.len = nb_sectors * 512;
-
-    nbd_coroutine_start(client, &request);
-    ret = nbd_co_send_request(bs, &request, NULL, 0);
-    if (ret < 0) {
-        reply.error = -ret;
-    } else {
-        nbd_co_receive_reply(client, &request, &reply, qiov, offset);
-    }
-    nbd_coroutine_end(client, &request);
-    return -reply.error;
-
-}
-
-static int nbd_co_writev_1(BlockDriverState *bs, int64_t sector_num,
-                           int nb_sectors, QEMUIOVector *qiov,
-                           int offset)
-{
-    NbdClientSession *client = nbd_get_client_session(bs);
-    struct nbd_request request = { .type = NBD_CMD_WRITE };
-    struct nbd_reply reply;
-    ssize_t ret;
-
-    if (!bdrv_enable_write_cache(bs) &&
-        (client->nbdflags & NBD_FLAG_SEND_FUA)) {
-        request.type |= NBD_CMD_FLAG_FUA;
-    }
-
-    request.from = sector_num * 512;
-    request.len = nb_sectors * 512;
-
-    nbd_coroutine_start(client, &request);
-    ret = nbd_co_send_request(bs, &request, qiov, offset);
-    if (ret < 0) {
-        reply.error = -ret;
-    } else {
-        nbd_co_receive_reply(client, &request, &reply, NULL, 0);
-    }
-    nbd_coroutine_end(client, &request);
-    return -reply.error;
-}
-
-/* qemu-nbd has a limit of slightly less than 1M per request.  Try to
- * remain aligned to 4K. */
-#define NBD_MAX_SECTORS 2040
-
-int nbd_client_co_readv(BlockDriverState *bs, int64_t sector_num,
-                        int nb_sectors, QEMUIOVector *qiov)
-{
-    int offset = 0;
-    int ret;
-    while (nb_sectors > NBD_MAX_SECTORS) {
-        ret = nbd_co_readv_1(bs, sector_num, NBD_MAX_SECTORS, qiov, offset);
-        if (ret < 0) {
-            return ret;
-        }
-        offset += NBD_MAX_SECTORS * 512;
-        sector_num += NBD_MAX_SECTORS;
-        nb_sectors -= NBD_MAX_SECTORS;
-    }
-    return nbd_co_readv_1(bs, sector_num, nb_sectors, qiov, offset);
-}
-
-int nbd_client_co_writev(BlockDriverState *bs, int64_t sector_num,
-                         int nb_sectors, QEMUIOVector *qiov)
-{
-    int offset = 0;
-    int ret;
-    while (nb_sectors > NBD_MAX_SECTORS) {
-        ret = nbd_co_writev_1(bs, sector_num, NBD_MAX_SECTORS, qiov, offset);
-        if (ret < 0) {
-            return ret;
-        }
-        offset += NBD_MAX_SECTORS * 512;
-        sector_num += NBD_MAX_SECTORS;
-        nb_sectors -= NBD_MAX_SECTORS;
-    }
-    return nbd_co_writev_1(bs, sector_num, nb_sectors, qiov, offset);
-}
-
-int nbd_client_co_flush(BlockDriverState *bs)
-{
-    NbdClientSession *client = nbd_get_client_session(bs);
-    struct nbd_request request = { .type = NBD_CMD_FLUSH };
-    struct nbd_reply reply;
-    ssize_t ret;
-
-    if (!(client->nbdflags & NBD_FLAG_SEND_FLUSH)) {
-        return 0;
-    }
-
-    if (client->nbdflags & NBD_FLAG_SEND_FUA) {
-        request.type |= NBD_CMD_FLAG_FUA;
-    }
-
-    request.from = 0;
-    request.len = 0;
-
-    nbd_coroutine_start(client, &request);
-    ret = nbd_co_send_request(bs, &request, NULL, 0);
-    if (ret < 0) {
-        reply.error = -ret;
-    } else {
-        nbd_co_receive_reply(client, &request, &reply, NULL, 0);
-    }
-    nbd_coroutine_end(client, &request);
-    return -reply.error;
-}
-
-int nbd_client_co_discard(BlockDriverState *bs, int64_t sector_num,
-                          int nb_sectors)
-{
-    NbdClientSession *client = nbd_get_client_session(bs);
-    struct nbd_request request = { .type = NBD_CMD_TRIM };
-    struct nbd_reply reply;
-    ssize_t ret;
-
-    if (!(client->nbdflags & NBD_FLAG_SEND_TRIM)) {
-        return 0;
-    }
-    request.from = sector_num * 512;
-    request.len = nb_sectors * 512;
-
-    nbd_coroutine_start(client, &request);
-    ret = nbd_co_send_request(bs, &request, NULL, 0);
-    if (ret < 0) {
-        reply.error = -ret;
-    } else {
-        nbd_co_receive_reply(client, &request, &reply, NULL, 0);
-    }
-    nbd_coroutine_end(client, &request);
-    return -reply.error;
-
-}
-
-void nbd_client_detach_aio_context(BlockDriverState *bs)
-{
-    aio_set_fd_handler(bdrv_get_aio_context(bs),
-                       nbd_get_client_session(bs)->sioc->fd,
-                       false, NULL, NULL, NULL);
-}
-
-void nbd_client_attach_aio_context(BlockDriverState *bs,
-                                   AioContext *new_context)
-{
-    aio_set_fd_handler(new_context, nbd_get_client_session(bs)->sioc->fd,
-                       false, nbd_reply_ready, NULL, bs);
-}
-
-void nbd_client_close(BlockDriverState *bs)
-{
-    NbdClientSession *client = nbd_get_client_session(bs);
-    struct nbd_request request = {
-        .type = NBD_CMD_DISC,
-        .from = 0,
-        .len = 0
-    };
-
-    if (client->ioc == NULL) {
-        return;
-    }
-
-    nbd_send_request(client->ioc, &request);
-
-    nbd_teardown_connection(bs);
-}
-
-int nbd_client_init(BlockDriverState *bs,
-                    QIOChannelSocket *sioc,
-                    const char *export,
-                    QCryptoTLSCreds *tlscreds,
-                    const char *hostname,
-                    Error **errp)
-{
-    NbdClientSession *client = nbd_get_client_session(bs);
-    int ret;
-
-    /* NBD handshake */
-    logout("session init %s\n", export);
-    qio_channel_set_blocking(QIO_CHANNEL(sioc), true, NULL);
-
-    ret = nbd_receive_negotiate(QIO_CHANNEL(sioc), export,
-                                &client->nbdflags,
-                                tlscreds, hostname,
-                                &client->ioc,
-                                &client->size, errp);
-    if (ret < 0) {
-        logout("Failed to negotiate with the NBD server\n");
-        return ret;
-    }
-
-    qemu_co_mutex_init(&client->send_mutex);
-    qemu_co_mutex_init(&client->free_sema);
-    client->sioc = sioc;
-    object_ref(OBJECT(client->sioc));
-
-    if (!client->ioc) {
-        client->ioc = QIO_CHANNEL(sioc);
-        object_ref(OBJECT(client->ioc));
-    }
-
-    /* Now that we're connected, set the socket to be non-blocking and
-     * kick the reply mechanism.  */
-    qio_channel_set_blocking(QIO_CHANNEL(sioc), false, NULL);
-
-    nbd_client_attach_aio_context(bs, bdrv_get_aio_context(bs));
-
-    logout("Established connection with NBD server\n");
-    return 0;
-}
--- a/block/nbd-client.h
+++ b/block/nbd-client.h
@@ -1,59 +0,0 @@
-#ifndef NBD_CLIENT_H
-#define NBD_CLIENT_H
-
-#include "qemu-common.h"
-#include "block/nbd.h"
-#include "block/block_int.h"
-#include "io/channel-socket.h"
-
-/* #define DEBUG_NBD */
-
-#if defined(DEBUG_NBD)
-#define logout(fmt, ...) \
-    fprintf(stderr, "nbd\t%-24s" fmt, __func__, ##__VA_ARGS__)
-#else
-#define logout(fmt, ...) ((void)0)
-#endif
-
-#define MAX_NBD_REQUESTS    16
-
-typedef struct NbdClientSession {
-    QIOChannelSocket *sioc; /* The master data channel */
-    QIOChannel *ioc; /* The current I/O channel which may differ (eg TLS) */
-    uint32_t nbdflags;
-    off_t size;
-
-    CoMutex send_mutex;
-    CoMutex free_sema;
-    Coroutine *send_coroutine;
-    int in_flight;
-
-    Coroutine *recv_coroutine[MAX_NBD_REQUESTS];
-    struct nbd_reply reply;
-
-    bool is_unix;
-} NbdClientSession;
-
-NbdClientSession *nbd_get_client_session(BlockDriverState *bs);
-
-int nbd_client_init(BlockDriverState *bs,
-                    QIOChannelSocket *sock,
-                    const char *export_name,
-                    QCryptoTLSCreds *tlscreds,
-                    const char *hostname,
-                    Error **errp);
-void nbd_client_close(BlockDriverState *bs);
-
-int nbd_client_co_discard(BlockDriverState *bs, int64_t sector_num,
-                          int nb_sectors);
-int nbd_client_co_flush(BlockDriverState *bs);
-int nbd_client_co_writev(BlockDriverState *bs, int64_t sector_num,
-                         int nb_sectors, QEMUIOVector *qiov);
-int nbd_client_co_readv(BlockDriverState *bs, int64_t sector_num,
-                        int nb_sectors, QEMUIOVector *qiov);
-
-void nbd_client_detach_aio_context(BlockDriverState *bs);
-void nbd_client_attach_aio_context(BlockDriverState *bs,
-                                   AioContext *new_context);
-
-#endif /* NBD_CLIENT_H */
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -26,21 +26,51 @@
 * THE SOFTWARE.
 */

-#include "qemu/osdep.h"
-#include "block/nbd-client.h"
+#include "qemu-common.h"
+#include "block/nbd.h"
 #include "qemu/uri.h"
 #include "block/block_int.h"
 #include "qemu/module.h"
-#include "qapi/qmp/qdict.h"
+#include "qemu/sockets.h"
 #include "qapi/qmp/qjson.h"
 #include "qapi/qmp/qint.h"
-#include "qapi/qmp/qstring.h"

+#include <sys/types.h>
+#include <unistd.h>

 #define EN_OPTSTR ":exportname="

+/* #define DEBUG_NBD */
+
+#if defined(DEBUG_NBD)
+#define logout(fmt, ...) \
+                fprintf(stderr, "nbd\t%-24s" fmt, __func__, ##__VA_ARGS__)
+#else
+#define logout(fmt, ...) ((void)0)
+#endif
+
+#define MAX_NBD_REQUESTS	16
+#define HANDLE_TO_INDEX(bs, handle) ((handle) ^ ((uint64_t)(intptr_t)bs))
+#define INDEX_TO_HANDLE(bs, index)  ((index)  ^ ((uint64_t)(intptr_t)bs))
+
 typedef struct BDRVNBDState {
-    NbdClientSession client;
+    int sock;
+    uint32_t nbdflags;
+    off_t size;
+    size_t blocksize;
+
+    CoMutex send_mutex;
+    CoMutex free_sema;
+    Coroutine *send_coroutine;
+    int in_flight;
+
+    Coroutine *recv_coroutine[MAX_NBD_REQUESTS];
+    struct nbd_reply reply;
+
+    bool is_unix;
+    QemuOpts *socket_opts;
+
+    char *export_name; /* An NBD server may export several devices */
 } BDRVNBDState;

 static int nbd_parse_uri(const char *filename, QDict *options)
@@ -174,7 +204,7 @@ static void nbd_parse_filename(const char *filename, QDict *options,
        InetSocketAddress *addr = NULL;

        addr = inet_parse(host_spec, errp);
-        if (!addr) {
+        if (error_is_set(errp)) {
            goto out;
        }

@@ -187,320 +217,468 @@ out:
    g_free(file);
 }

-static SocketAddress *nbd_config(BDRVNBDState *s, QDict *options, char **export,
-                                 Error **errp)
+static int nbd_config(BDRVNBDState *s, QDict *options)
 {
-    SocketAddress *saddr;
-
-    if (qdict_haskey(options, "path") == qdict_haskey(options, "host")) {
-        if (qdict_haskey(options, "path")) {
-            error_setg(errp, "path and host may not be used at the same time.");
-        } else {
-            error_setg(errp, "one of path and host must be specified.");
-        }
-        return NULL;
-    }
-
-    saddr = g_new0(SocketAddress, 1);
+    Error *local_err = NULL;

    if (qdict_haskey(options, "path")) {
-        UnixSocketAddress *q_unix;
-        saddr->type = SOCKET_ADDRESS_KIND_UNIX;
-        q_unix = saddr->u.q_unix = g_new0(UnixSocketAddress, 1);
-        q_unix->path = g_strdup(qdict_get_str(options, "path"));
-        qdict_del(options, "path");
-    } else {
-        InetSocketAddress *inet;
-        saddr->type = SOCKET_ADDRESS_KIND_INET;
-        inet = saddr->u.inet = g_new0(InetSocketAddress, 1);
-        inet->host = g_strdup(qdict_get_str(options, "host"));
-        if (!qdict_get_try_str(options, "port")) {
-            inet->port = g_strdup_printf("%d", NBD_DEFAULT_PORT);
-        } else {
-            inet->port = g_strdup(qdict_get_str(options, "port"));
+        if (qdict_haskey(options, "host")) {
+            qerror_report(ERROR_CLASS_GENERIC_ERROR, "path and host may not "
+                          "be used at the same time.");
+            return -EINVAL;
        }
-        qdict_del(options, "host");
-        qdict_del(options, "port");
+        s->is_unix = true;
+    } else if (qdict_haskey(options, "host")) {
+        s->is_unix = false;
+    } else {
+        return -EINVAL;
    }

-    s->client.is_unix = saddr->type == SOCKET_ADDRESS_KIND_UNIX;
+    s->socket_opts = qemu_opts_create_nofail(&socket_optslist);

-    *export = g_strdup(qdict_get_try_str(options, "export"));
-    if (*export) {
+    qemu_opts_absorb_qdict(s->socket_opts, options, &local_err);
+    if (error_is_set(&local_err)) {
+        qerror_report_err(local_err);
+        error_free(local_err);
+        return -EINVAL;
+    }
+
+    if (!qemu_opt_get(s->socket_opts, "port")) {
+        qemu_opt_set_number(s->socket_opts, "port", NBD_DEFAULT_PORT);
+    }
+
+    s->export_name = g_strdup(qdict_get_try_str(options, "export"));
+    if (s->export_name) {
        qdict_del(options, "export");
    }

-    return saddr;
+    return 0;
 }

-NbdClientSession *nbd_get_client_session(BlockDriverState *bs)
+
+static void nbd_coroutine_start(BDRVNBDState *s, struct nbd_request *request)
+{
+    int i;
+
+    /* Poor man semaphore.  The free_sema is locked when no other request
+     * can be accepted, and unlocked after receiving one reply.  */
+    if (s->in_flight >= MAX_NBD_REQUESTS - 1) {
+        qemu_co_mutex_lock(&s->free_sema);
+        assert(s->in_flight < MAX_NBD_REQUESTS);
+    }
+    s->in_flight++;
+
+    for (i = 0; i < MAX_NBD_REQUESTS; i++) {
+        if (s->recv_coroutine[i] == NULL) {
+            s->recv_coroutine[i] = qemu_coroutine_self();
+            break;
+        }
+    }
+
+    assert(i < MAX_NBD_REQUESTS);
+    request->handle = INDEX_TO_HANDLE(s, i);
+}
+
+static void nbd_reply_ready(void *opaque)
+{
+    BDRVNBDState *s = opaque;
+    uint64_t i;
+    int ret;
+
+    if (s->reply.handle == 0) {
+        /* No reply already in flight.  Fetch a header.  It is possible
+         * that another thread has done the same thing in parallel, so
+         * the socket is not readable anymore.
+         */
+        ret = nbd_receive_reply(s->sock, &s->reply);
+        if (ret == -EAGAIN) {
+            return;
+        }
+        if (ret < 0) {
+            s->reply.handle = 0;
+            goto fail;
+        }
+    }
+
+    /* There's no need for a mutex on the receive side, because the
+     * handler acts as a synchronization point and ensures that only
+     * one coroutine is called until the reply finishes.  */
+    i = HANDLE_TO_INDEX(s, s->reply.handle);
+    if (i >= MAX_NBD_REQUESTS) {
+        goto fail;
+    }
+
+    if (s->recv_coroutine[i]) {
+        qemu_coroutine_enter(s->recv_coroutine[i], NULL);
+        return;
+    }
+
+fail:
+    for (i = 0; i < MAX_NBD_REQUESTS; i++) {
+        if (s->recv_coroutine[i]) {
+            qemu_coroutine_enter(s->recv_coroutine[i], NULL);
+        }
+    }
+}
+
+static void nbd_restart_write(void *opaque)
+{
+    BDRVNBDState *s = opaque;
+    qemu_coroutine_enter(s->send_coroutine, NULL);
+}
+
+static int nbd_co_send_request(BDRVNBDState *s, struct nbd_request *request,
+                               QEMUIOVector *qiov, int offset)
+{
+    int rc, ret;
+
+    qemu_co_mutex_lock(&s->send_mutex);
+    s->send_coroutine = qemu_coroutine_self();
+    qemu_aio_set_fd_handler(s->sock, nbd_reply_ready, nbd_restart_write, s);
+    if (qiov) {
+        if (!s->is_unix) {
+            socket_set_cork(s->sock, 1);
+        }
+        rc = nbd_send_request(s->sock, request);
+        if (rc >= 0) {
+            ret = qemu_co_sendv(s->sock, qiov->iov, qiov->niov,
+                                offset, request->len);
+            if (ret != request->len) {
+                rc = -EIO;
+            }
+        }
+        if (!s->is_unix) {
+            socket_set_cork(s->sock, 0);
+        }
+    } else {
+        rc = nbd_send_request(s->sock, request);
+    }
+    qemu_aio_set_fd_handler(s->sock, nbd_reply_ready, NULL, s);
+    s->send_coroutine = NULL;
+    qemu_co_mutex_unlock(&s->send_mutex);
+    return rc;
+}
+
+static void nbd_co_receive_reply(BDRVNBDState *s, struct nbd_request *request,
+                                 struct nbd_reply *reply,
+                                 QEMUIOVector *qiov, int offset)
+{
+    int ret;
+
+    /* Wait until we're woken up by the read handler.  TODO: perhaps
+     * peek at the next reply and avoid yielding if it's ours?  */
+    qemu_coroutine_yield();
+    *reply = s->reply;
+    if (reply->handle != request->handle) {
+        reply->error = EIO;
+    } else {
+        if (qiov && reply->error == 0) {
+            ret = qemu_co_recvv(s->sock, qiov->iov, qiov->niov,
+                                offset, request->len);
+            if (ret != request->len) {
+                reply->error = EIO;
+            }
+        }
+
+        /* Tell the read handler to read another header.  */
+        s->reply.handle = 0;
+    }
+}
+
+static void nbd_coroutine_end(BDRVNBDState *s, struct nbd_request *request)
+{
+    int i = HANDLE_TO_INDEX(s, request->handle);
+    s->recv_coroutine[i] = NULL;
+    if (s->in_flight-- == MAX_NBD_REQUESTS) {
+        qemu_co_mutex_unlock(&s->free_sema);
+    }
+}
+
+static int nbd_establish_connection(BlockDriverState *bs)
 {
    BDRVNBDState *s = bs->opaque;
-    return &s->client;
+    int sock;
+    int ret;
+    off_t size;
+    size_t blocksize;
+
+    if (s->is_unix) {
+        sock = unix_socket_outgoing(qemu_opt_get(s->socket_opts, "path"));
+    } else {
+        sock = tcp_socket_outgoing_opts(s->socket_opts);
+        if (sock >= 0) {
+            socket_set_nodelay(sock);
+        }
+    }
+
+    /* Failed to establish connection */
+    if (sock < 0) {
+        logout("Failed to establish connection to NBD server\n");
+        return -errno;
+    }
+
+    /* NBD handshake */
+    ret = nbd_receive_negotiate(sock, s->export_name, &s->nbdflags, &size,
+                                &blocksize);
+    if (ret < 0) {
+        logout("Failed to negotiate with the NBD server\n");
+        closesocket(sock);
+        return ret;
+    }
+
+    /* Now that we're connected, set the socket to be non-blocking and
+     * kick the reply mechanism.  */
+    qemu_set_nonblock(sock);
+    qemu_aio_set_fd_handler(sock, nbd_reply_ready, NULL, s);
+
+    s->sock = sock;
+    s->size = size;
+    s->blocksize = blocksize;
+
+    logout("Established connection with NBD server\n");
+    return 0;
 }

-static QIOChannelSocket *nbd_establish_connection(SocketAddress *saddr,
-                                                  Error **errp)
+static void nbd_teardown_connection(BlockDriverState *bs)
 {
-    QIOChannelSocket *sioc;
-    Error *local_err = NULL;
+    BDRVNBDState *s = bs->opaque;
+    struct nbd_request request;

-    sioc = qio_channel_socket_new();
+    request.type = NBD_CMD_DISC;
+    request.from = 0;
+    request.len = 0;
+    nbd_send_request(s->sock, &request);

-    qio_channel_socket_connect_sync(sioc,
-                                    saddr,
-                                    &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        return NULL;
-    }
-
-    qio_channel_set_delay(QIO_CHANNEL(sioc), false);
-
-    return sioc;
+    qemu_aio_set_fd_handler(s->sock, NULL, NULL, NULL);
+    closesocket(s->sock);
 }

-
-static QCryptoTLSCreds *nbd_get_tls_creds(const char *id, Error **errp)
-{
-    Object *obj;
-    QCryptoTLSCreds *creds;
-
-    obj = object_resolve_path_component(
-        object_get_objects_root(), id);
-    if (!obj) {
-        error_setg(errp, "No TLS credentials with id '%s'",
-                   id);
-        return NULL;
-    }
-    creds = (QCryptoTLSCreds *)
-        object_dynamic_cast(obj, TYPE_QCRYPTO_TLS_CREDS);
-    if (!creds) {
-        error_setg(errp, "Object with id '%s' is not TLS credentials",
-                   id);
-        return NULL;
-    }
-
-    if (creds->endpoint != QCRYPTO_TLS_CREDS_ENDPOINT_CLIENT) {
-        error_setg(errp,
-                   "Expecting TLS credentials with a client endpoint");
-        return NULL;
-    }
-    object_ref(obj);
-    return creds;
-}
-
-
 static int nbd_open(BlockDriverState *bs, QDict *options, int flags,
                    Error **errp)
 {
    BDRVNBDState *s = bs->opaque;
-    char *export = NULL;
-    QIOChannelSocket *sioc = NULL;
-    SocketAddress *saddr;
-    const char *tlscredsid;
-    QCryptoTLSCreds *tlscreds = NULL;
-    const char *hostname = NULL;
-    int ret = -EINVAL;
+    int result;
+
+    qemu_co_mutex_init(&s->send_mutex);
+    qemu_co_mutex_init(&s->free_sema);

    /* Pop the config into our state object. Exit if invalid. */
-    saddr = nbd_config(s, options, &export, errp);
-    if (!saddr) {
-        goto error;
-    }
-
-    tlscredsid = g_strdup(qdict_get_try_str(options, "tls-creds"));
-    if (tlscredsid) {
-        qdict_del(options, "tls-creds");
-        tlscreds = nbd_get_tls_creds(tlscredsid, errp);
-        if (!tlscreds) {
-            goto error;
-        }
-
-        if (saddr->type != SOCKET_ADDRESS_KIND_INET) {
-            error_setg(errp, "TLS only supported over IP sockets");
-            goto error;
-        }
-        hostname = saddr->u.inet->host;
+    result = nbd_config(s, options);
+    if (result != 0) {
+        return result;
    }

    /* establish TCP connection, return error if it fails
     * TODO: Configurable retry-until-timeout behaviour.
     */
-    sioc = nbd_establish_connection(saddr, errp);
-    if (!sioc) {
-        ret = -ECONNREFUSED;
-        goto error;
+    result = nbd_establish_connection(bs);
+
+    return result;
+}
+
+static int nbd_co_readv_1(BlockDriverState *bs, int64_t sector_num,
+                          int nb_sectors, QEMUIOVector *qiov,
+                          int offset)
+{
+    BDRVNBDState *s = bs->opaque;
+    struct nbd_request request;
+    struct nbd_reply reply;
+    ssize_t ret;
+
+    request.type = NBD_CMD_READ;
+    request.from = sector_num * 512;
+    request.len = nb_sectors * 512;
+
+    nbd_coroutine_start(s, &request);
+    ret = nbd_co_send_request(s, &request, NULL, 0);
+    if (ret < 0) {
+        reply.error = -ret;
+    } else {
+        nbd_co_receive_reply(s, &request, &reply, qiov, offset);
+    }
+    nbd_coroutine_end(s, &request);
+    return -reply.error;
+
+}
+
+static int nbd_co_writev_1(BlockDriverState *bs, int64_t sector_num,
+                           int nb_sectors, QEMUIOVector *qiov,
+                           int offset)
+{
+    BDRVNBDState *s = bs->opaque;
+    struct nbd_request request;
+    struct nbd_reply reply;
+    ssize_t ret;
+
+    request.type = NBD_CMD_WRITE;
+    if (!bdrv_enable_write_cache(bs) && (s->nbdflags & NBD_FLAG_SEND_FUA)) {
+        request.type |= NBD_CMD_FLAG_FUA;
    }

-    /* NBD handshake */
-    ret = nbd_client_init(bs, sioc, export,
-                          tlscreds, hostname, errp);
- error:
-    if (sioc) {
-        object_unref(OBJECT(sioc));
+    request.from = sector_num * 512;
+    request.len = nb_sectors * 512;
+
+    nbd_coroutine_start(s, &request);
+    ret = nbd_co_send_request(s, &request, qiov, offset);
+    if (ret < 0) {
+        reply.error = -ret;
+    } else {
+        nbd_co_receive_reply(s, &request, &reply, NULL, 0);
    }
-    if (tlscreds) {
-        object_unref(OBJECT(tlscreds));
-    }
-    qapi_free_SocketAddress(saddr);
-    g_free(export);
-    return ret;
+    nbd_coroutine_end(s, &request);
+    return -reply.error;
 }

+/* qemu-nbd has a limit of slightly less than 1M per request.  Try to
+ * remain aligned to 4K. */
+#define NBD_MAX_SECTORS 2040
+
 static int nbd_co_readv(BlockDriverState *bs, int64_t sector_num,
                        int nb_sectors, QEMUIOVector *qiov)
 {
-    return nbd_client_co_readv(bs, sector_num, nb_sectors, qiov);
+    int offset = 0;
+    int ret;
+    while (nb_sectors > NBD_MAX_SECTORS) {
+        ret = nbd_co_readv_1(bs, sector_num, NBD_MAX_SECTORS, qiov, offset);
+        if (ret < 0) {
+            return ret;
+        }
+        offset += NBD_MAX_SECTORS * 512;
+        sector_num += NBD_MAX_SECTORS;
+        nb_sectors -= NBD_MAX_SECTORS;
+    }
+    return nbd_co_readv_1(bs, sector_num, nb_sectors, qiov, offset);
 }

 static int nbd_co_writev(BlockDriverState *bs, int64_t sector_num,
                         int nb_sectors, QEMUIOVector *qiov)
 {
-    return nbd_client_co_writev(bs, sector_num, nb_sectors, qiov);
+    int offset = 0;
+    int ret;
+    while (nb_sectors > NBD_MAX_SECTORS) {
+        ret = nbd_co_writev_1(bs, sector_num, NBD_MAX_SECTORS, qiov, offset);
+        if (ret < 0) {
+            return ret;
+        }
+        offset += NBD_MAX_SECTORS * 512;
+        sector_num += NBD_MAX_SECTORS;
+        nb_sectors -= NBD_MAX_SECTORS;
+    }
+    return nbd_co_writev_1(bs, sector_num, nb_sectors, qiov, offset);
 }

 static int nbd_co_flush(BlockDriverState *bs)
 {
-    return nbd_client_co_flush(bs);
-}
+    BDRVNBDState *s = bs->opaque;
+    struct nbd_request request;
+    struct nbd_reply reply;
+    ssize_t ret;

-static void nbd_refresh_limits(BlockDriverState *bs, Error **errp)
-{
-    bs->bl.max_discard = UINT32_MAX >> BDRV_SECTOR_BITS;
-    bs->bl.max_transfer_length = UINT32_MAX >> BDRV_SECTOR_BITS;
+    if (!(s->nbdflags & NBD_FLAG_SEND_FLUSH)) {
+        return 0;
+    }
+
+    request.type = NBD_CMD_FLUSH;
+    if (s->nbdflags & NBD_FLAG_SEND_FUA) {
+        request.type |= NBD_CMD_FLAG_FUA;
+    }
+
+    request.from = 0;
+    request.len = 0;
+
+    nbd_coroutine_start(s, &request);
+    ret = nbd_co_send_request(s, &request, NULL, 0);
+    if (ret < 0) {
+        reply.error = -ret;
+    } else {
+        nbd_co_receive_reply(s, &request, &reply, NULL, 0);
+    }
+    nbd_coroutine_end(s, &request);
+    return -reply.error;
 }

 static int nbd_co_discard(BlockDriverState *bs, int64_t sector_num,
                          int nb_sectors)
 {
-    return nbd_client_co_discard(bs, sector_num, nb_sectors);
+    BDRVNBDState *s = bs->opaque;
+    struct nbd_request request;
+    struct nbd_reply reply;
+    ssize_t ret;
+
+    if (!(s->nbdflags & NBD_FLAG_SEND_TRIM)) {
+        return 0;
+    }
+    request.type = NBD_CMD_TRIM;
+    request.from = sector_num * 512;
+    request.len = nb_sectors * 512;
+
+    nbd_coroutine_start(s, &request);
+    ret = nbd_co_send_request(s, &request, NULL, 0);
+    if (ret < 0) {
+        reply.error = -ret;
+    } else {
+        nbd_co_receive_reply(s, &request, &reply, NULL, 0);
+    }
+    nbd_coroutine_end(s, &request);
+    return -reply.error;
 }

 static void nbd_close(BlockDriverState *bs)
 {
-    nbd_client_close(bs);
+    BDRVNBDState *s = bs->opaque;
+    g_free(s->export_name);
+    qemu_opts_del(s->socket_opts);
+
+    nbd_teardown_connection(bs);
 }

 static int64_t nbd_getlength(BlockDriverState *bs)
 {
    BDRVNBDState *s = bs->opaque;

-    return s->client.size;
-}
-
-static void nbd_detach_aio_context(BlockDriverState *bs)
-{
-    nbd_client_detach_aio_context(bs);
-}
-
-static void nbd_attach_aio_context(BlockDriverState *bs,
-                                   AioContext *new_context)
-{
-    nbd_client_attach_aio_context(bs, new_context);
-}
-
-static void nbd_refresh_filename(BlockDriverState *bs, QDict *options)
-{
-    QDict *opts = qdict_new();
-    const char *path   = qdict_get_try_str(options, "path");
-    const char *host   = qdict_get_try_str(options, "host");
-    const char *port   = qdict_get_try_str(options, "port");
-    const char *export = qdict_get_try_str(options, "export");
-    const char *tlscreds = qdict_get_try_str(options, "tls-creds");
-
-    qdict_put_obj(opts, "driver", QOBJECT(qstring_from_str("nbd")));
-
-    if (path && export) {
-        snprintf(bs->exact_filename, sizeof(bs->exact_filename),
-                 "nbd+unix:///%s?socket=%s", export, path);
-    } else if (path && !export) {
-        snprintf(bs->exact_filename, sizeof(bs->exact_filename),
-                 "nbd+unix://?socket=%s", path);
-    } else if (!path && export && port) {
-        snprintf(bs->exact_filename, sizeof(bs->exact_filename),
-                 "nbd://%s:%s/%s", host, port, export);
-    } else if (!path && export && !port) {
-        snprintf(bs->exact_filename, sizeof(bs->exact_filename),
-                 "nbd://%s/%s", host, export);
-    } else if (!path && !export && port) {
-        snprintf(bs->exact_filename, sizeof(bs->exact_filename),
-                 "nbd://%s:%s", host, port);
-    } else if (!path && !export && !port) {
-        snprintf(bs->exact_filename, sizeof(bs->exact_filename),
-                 "nbd://%s", host);
-    }
-
-    if (path) {
-        qdict_put_obj(opts, "path", QOBJECT(qstring_from_str(path)));
-    } else if (port) {
-        qdict_put_obj(opts, "host", QOBJECT(qstring_from_str(host)));
-        qdict_put_obj(opts, "port", QOBJECT(qstring_from_str(port)));
-    } else {
-        qdict_put_obj(opts, "host", QOBJECT(qstring_from_str(host)));
-    }
-    if (export) {
-        qdict_put_obj(opts, "export", QOBJECT(qstring_from_str(export)));
-    }
-    if (tlscreds) {
-        qdict_put_obj(opts, "tls-creds", QOBJECT(qstring_from_str(tlscreds)));
-    }
-
-    bs->full_open_options = opts;
+    return s->size;
 }

 static BlockDriver bdrv_nbd = {
-    .format_name                = "nbd",
-    .protocol_name              = "nbd",
-    .instance_size              = sizeof(BDRVNBDState),
-    .bdrv_parse_filename        = nbd_parse_filename,
-    .bdrv_file_open             = nbd_open,
-    .bdrv_co_readv              = nbd_co_readv,
-    .bdrv_co_writev             = nbd_co_writev,
-    .bdrv_close                 = nbd_close,
-    .bdrv_co_flush_to_os        = nbd_co_flush,
-    .bdrv_co_discard            = nbd_co_discard,
-    .bdrv_refresh_limits        = nbd_refresh_limits,
-    .bdrv_getlength             = nbd_getlength,
-    .bdrv_detach_aio_context    = nbd_detach_aio_context,
-    .bdrv_attach_aio_context    = nbd_attach_aio_context,
-    .bdrv_refresh_filename      = nbd_refresh_filename,
+    .format_name         = "nbd",
+    .protocol_name       = "nbd",
+    .instance_size       = sizeof(BDRVNBDState),
+    .bdrv_parse_filename = nbd_parse_filename,
+    .bdrv_file_open      = nbd_open,
+    .bdrv_co_readv       = nbd_co_readv,
+    .bdrv_co_writev      = nbd_co_writev,
+    .bdrv_close          = nbd_close,
+    .bdrv_co_flush_to_os = nbd_co_flush,
+    .bdrv_co_discard     = nbd_co_discard,
+    .bdrv_getlength      = nbd_getlength,
 };

 static BlockDriver bdrv_nbd_tcp = {
-    .format_name                = "nbd",
-    .protocol_name              = "nbd+tcp",
-    .instance_size              = sizeof(BDRVNBDState),
-    .bdrv_parse_filename        = nbd_parse_filename,
-    .bdrv_file_open             = nbd_open,
-    .bdrv_co_readv              = nbd_co_readv,
-    .bdrv_co_writev             = nbd_co_writev,
-    .bdrv_close                 = nbd_close,
-    .bdrv_co_flush_to_os        = nbd_co_flush,
-    .bdrv_co_discard            = nbd_co_discard,
-    .bdrv_refresh_limits        = nbd_refresh_limits,
-    .bdrv_getlength             = nbd_getlength,
-    .bdrv_detach_aio_context    = nbd_detach_aio_context,
-    .bdrv_attach_aio_context    = nbd_attach_aio_context,
-    .bdrv_refresh_filename      = nbd_refresh_filename,
+    .format_name         = "nbd",
+    .protocol_name       = "nbd+tcp",
+    .instance_size       = sizeof(BDRVNBDState),
+    .bdrv_parse_filename = nbd_parse_filename,
+    .bdrv_file_open      = nbd_open,
+    .bdrv_co_readv       = nbd_co_readv,
+    .bdrv_co_writev      = nbd_co_writev,
+    .bdrv_close          = nbd_close,
+    .bdrv_co_flush_to_os = nbd_co_flush,
+    .bdrv_co_discard     = nbd_co_discard,
+    .bdrv_getlength      = nbd_getlength,
 };

 static BlockDriver bdrv_nbd_unix = {
-    .format_name                = "nbd",
-    .protocol_name              = "nbd+unix",
-    .instance_size              = sizeof(BDRVNBDState),
-    .bdrv_parse_filename        = nbd_parse_filename,
-    .bdrv_file_open             = nbd_open,
-    .bdrv_co_readv              = nbd_co_readv,
-    .bdrv_co_writev             = nbd_co_writev,
-    .bdrv_close                 = nbd_close,
-    .bdrv_co_flush_to_os        = nbd_co_flush,
-    .bdrv_co_discard            = nbd_co_discard,
-    .bdrv_refresh_limits        = nbd_refresh_limits,
-    .bdrv_getlength             = nbd_getlength,
-    .bdrv_detach_aio_context    = nbd_detach_aio_context,
-    .bdrv_attach_aio_context    = nbd_attach_aio_context,
-    .bdrv_refresh_filename      = nbd_refresh_filename,
+    .format_name         = "nbd",
+    .protocol_name       = "nbd+unix",
+    .instance_size       = sizeof(BDRVNBDState),
+    .bdrv_parse_filename = nbd_parse_filename,
+    .bdrv_file_open      = nbd_open,
+    .bdrv_co_readv       = nbd_co_readv,
+    .bdrv_co_writev      = nbd_co_writev,
+    .bdrv_close          = nbd_close,
+    .bdrv_co_flush_to_os = nbd_co_flush,
+    .bdrv_co_discard     = nbd_co_discard,
+    .bdrv_getlength      = nbd_getlength,
 };

 static void bdrv_nbd_init(void)
--- a/block/nfs.c
+++ b/block/nfs.c
@@ -1,561 +0,0 @@
-/*
- * QEMU Block driver for native access to files on NFS shares
- *
- * Copyright (c) 2014 Peter Lieven <pl@kamp.de>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-#include "qemu/osdep.h"
-
-#include <poll.h>
-#include "qemu-common.h"
-#include "qemu/config-file.h"
-#include "qemu/error-report.h"
-#include "block/block_int.h"
-#include "trace.h"
-#include "qemu/iov.h"
-#include "qemu/uri.h"
-#include "sysemu/sysemu.h"
-#include <nfsc/libnfs.h>
-
-#define QEMU_NFS_MAX_READAHEAD_SIZE 1048576
-#define QEMU_NFS_MAX_DEBUG_LEVEL 2
-
-typedef struct NFSClient {
-    struct nfs_context *context;
-    struct nfsfh *fh;
-    int events;
-    bool has_zero_init;
-    AioContext *aio_context;
-    blkcnt_t st_blocks;
-} NFSClient;
-
-typedef struct NFSRPC {
-    int ret;
-    int complete;
-    QEMUIOVector *iov;
-    struct stat *st;
-    Coroutine *co;
-    QEMUBH *bh;
-    NFSClient *client;
-} NFSRPC;
-
-static void nfs_process_read(void *arg);
-static void nfs_process_write(void *arg);
-
-static void nfs_set_events(NFSClient *client)
-{
-    int ev = nfs_which_events(client->context);
-    if (ev != client->events) {
-        aio_set_fd_handler(client->aio_context, nfs_get_fd(client->context),
-                           false,
-                           (ev & POLLIN) ? nfs_process_read : NULL,
-                           (ev & POLLOUT) ? nfs_process_write : NULL, client);
-
-    }
-    client->events = ev;
-}
-
-static void nfs_process_read(void *arg)
-{
-    NFSClient *client = arg;
-    nfs_service(client->context, POLLIN);
-    nfs_set_events(client);
-}
-
-static void nfs_process_write(void *arg)
-{
-    NFSClient *client = arg;
-    nfs_service(client->context, POLLOUT);
-    nfs_set_events(client);
-}
-
-static void nfs_co_init_task(NFSClient *client, NFSRPC *task)
-{
-    *task = (NFSRPC) {
-        .co             = qemu_coroutine_self(),
-        .client         = client,
-    };
-}
-
-static void nfs_co_generic_bh_cb(void *opaque)
-{
-    NFSRPC *task = opaque;
-    task->complete = 1;
-    qemu_bh_delete(task->bh);
-    qemu_coroutine_enter(task->co, NULL);
-}
-
-static void
-nfs_co_generic_cb(int ret, struct nfs_context *nfs, void *data,
-                  void *private_data)
-{
-    NFSRPC *task = private_data;
-    task->ret = ret;
-    if (task->ret > 0 && task->iov) {
-        if (task->ret <= task->iov->size) {
-            qemu_iovec_from_buf(task->iov, 0, data, task->ret);
-        } else {
-            task->ret = -EIO;
-        }
-    }
-    if (task->ret == 0 && task->st) {
-        memcpy(task->st, data, sizeof(struct stat));
-    }
-    if (task->ret < 0) {
-        error_report("NFS Error: %s", nfs_get_error(nfs));
-    }
-    if (task->co) {
-        task->bh = aio_bh_new(task->client->aio_context,
-                              nfs_co_generic_bh_cb, task);
-        qemu_bh_schedule(task->bh);
-    } else {
-        task->complete = 1;
-    }
-}
-
-static int coroutine_fn nfs_co_readv(BlockDriverState *bs,
-                                     int64_t sector_num, int nb_sectors,
-                                     QEMUIOVector *iov)
-{
-    NFSClient *client = bs->opaque;
-    NFSRPC task;
-
-    nfs_co_init_task(client, &task);
-    task.iov = iov;
-
-    if (nfs_pread_async(client->context, client->fh,
-                        sector_num * BDRV_SECTOR_SIZE,
-                        nb_sectors * BDRV_SECTOR_SIZE,
-                        nfs_co_generic_cb, &task) != 0) {
-        return -ENOMEM;
-    }
-
-    while (!task.complete) {
-        nfs_set_events(client);
-        qemu_coroutine_yield();
-    }
-
-    if (task.ret < 0) {
-        return task.ret;
-    }
-
-    /* zero pad short reads */
-    if (task.ret < iov->size) {
-        qemu_iovec_memset(iov, task.ret, 0, iov->size - task.ret);
-    }
-
-    return 0;
-}
-
-static int coroutine_fn nfs_co_writev(BlockDriverState *bs,
-                                        int64_t sector_num, int nb_sectors,
-                                        QEMUIOVector *iov)
-{
-    NFSClient *client = bs->opaque;
-    NFSRPC task;
-    char *buf = NULL;
-
-    nfs_co_init_task(client, &task);
-
-    buf = g_try_malloc(nb_sectors * BDRV_SECTOR_SIZE);
-    if (nb_sectors && buf == NULL) {
-        return -ENOMEM;
-    }
-
-    qemu_iovec_to_buf(iov, 0, buf, nb_sectors * BDRV_SECTOR_SIZE);
-
-    if (nfs_pwrite_async(client->context, client->fh,
-                         sector_num * BDRV_SECTOR_SIZE,
-                         nb_sectors * BDRV_SECTOR_SIZE,
-                         buf, nfs_co_generic_cb, &task) != 0) {
-        g_free(buf);
-        return -ENOMEM;
-    }
-
-    while (!task.complete) {
-        nfs_set_events(client);
-        qemu_coroutine_yield();
-    }
-
-    g_free(buf);
-
-    if (task.ret != nb_sectors * BDRV_SECTOR_SIZE) {
-        return task.ret < 0 ? task.ret : -EIO;
-    }
-
-    return 0;
-}
-
-static int coroutine_fn nfs_co_flush(BlockDriverState *bs)
-{
-    NFSClient *client = bs->opaque;
-    NFSRPC task;
-
-    nfs_co_init_task(client, &task);
-
-    if (nfs_fsync_async(client->context, client->fh, nfs_co_generic_cb,
-                        &task) != 0) {
-        return -ENOMEM;
-    }
-
-    while (!task.complete) {
-        nfs_set_events(client);
-        qemu_coroutine_yield();
-    }
-
-    return task.ret;
-}
-
-/* TODO Convert to fine grained options */
-static QemuOptsList runtime_opts = {
-    .name = "nfs",
-    .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
-    .desc = {
-        {
-            .name = "filename",
-            .type = QEMU_OPT_STRING,
-            .help = "URL to the NFS file",
-        },
-        { /* end of list */ }
-    },
-};
-
-static void nfs_detach_aio_context(BlockDriverState *bs)
-{
-    NFSClient *client = bs->opaque;
-
-    aio_set_fd_handler(client->aio_context, nfs_get_fd(client->context),
-                       false, NULL, NULL, NULL);
-    client->events = 0;
-}
-
-static void nfs_attach_aio_context(BlockDriverState *bs,
-                                   AioContext *new_context)
-{
-    NFSClient *client = bs->opaque;
-
-    client->aio_context = new_context;
-    nfs_set_events(client);
-}
-
-static void nfs_client_close(NFSClient *client)
-{
-    if (client->context) {
-        if (client->fh) {
-            nfs_close(client->context, client->fh);
-        }
-        aio_set_fd_handler(client->aio_context, nfs_get_fd(client->context),
-                           false, NULL, NULL, NULL);
-        nfs_destroy_context(client->context);
-    }
-    memset(client, 0, sizeof(NFSClient));
-}
-
-static void nfs_file_close(BlockDriverState *bs)
-{
-    NFSClient *client = bs->opaque;
-    nfs_client_close(client);
-}
-
-static int64_t nfs_client_open(NFSClient *client, const char *filename,
-                               int flags, Error **errp)
-{
-    int ret = -EINVAL, i;
-    struct stat st;
-    URI *uri;
-    QueryParams *qp = NULL;
-    char *file = NULL, *strp = NULL;
-
-    uri = uri_parse(filename);
-    if (!uri) {
-        error_setg(errp, "Invalid URL specified");
-        goto fail;
-    }
-    if (!uri->server) {
-        error_setg(errp, "Invalid URL specified");
-        goto fail;
-    }
-    strp = strrchr(uri->path, '/');
-    if (strp == NULL) {
-        error_setg(errp, "Invalid URL specified");
-        goto fail;
-    }
-    file = g_strdup(strp);
-    *strp = 0;
-
-    client->context = nfs_init_context();
-    if (client->context == NULL) {
-        error_setg(errp, "Failed to init NFS context");
-        goto fail;
-    }
-
-    qp = query_params_parse(uri->query);
-    for (i = 0; i < qp->n; i++) {
-        unsigned long long val;
-        if (!qp->p[i].value) {
-            error_setg(errp, "Value for NFS parameter expected: %s",
-                       qp->p[i].name);
-            goto fail;
-        }
-        if (parse_uint_full(qp->p[i].value, &val, 0)) {
-            error_setg(errp, "Illegal value for NFS parameter: %s",
-                       qp->p[i].name);
-            goto fail;
-        }
-        if (!strcmp(qp->p[i].name, "uid")) {
-            nfs_set_uid(client->context, val);
-        } else if (!strcmp(qp->p[i].name, "gid")) {
-            nfs_set_gid(client->context, val);
-        } else if (!strcmp(qp->p[i].name, "tcp-syncnt")) {
-            nfs_set_tcp_syncnt(client->context, val);
-#ifdef LIBNFS_FEATURE_READAHEAD
-        } else if (!strcmp(qp->p[i].name, "readahead")) {
-            if (val > QEMU_NFS_MAX_READAHEAD_SIZE) {
-                error_report("NFS Warning: Truncating NFS readahead"
-                             " size to %d", QEMU_NFS_MAX_READAHEAD_SIZE);
-                val = QEMU_NFS_MAX_READAHEAD_SIZE;
-            }
-            nfs_set_readahead(client->context, val);
-#endif
-#ifdef LIBNFS_FEATURE_DEBUG
-        } else if (!strcmp(qp->p[i].name, "debug")) {
-            /* limit the maximum debug level to avoid potential flooding
-             * of our log files. */
-            if (val > QEMU_NFS_MAX_DEBUG_LEVEL) {
-                error_report("NFS Warning: Limiting NFS debug level"
-                             " to %d", QEMU_NFS_MAX_DEBUG_LEVEL);
-                val = QEMU_NFS_MAX_DEBUG_LEVEL;
-            }
-            nfs_set_debug(client->context, val);
-#endif
-        } else {
-            error_setg(errp, "Unknown NFS parameter name: %s",
-                       qp->p[i].name);
-            goto fail;
-        }
-    }
-
-    ret = nfs_mount(client->context, uri->server, uri->path);
-    if (ret < 0) {
-        error_setg(errp, "Failed to mount nfs share: %s",
-                   nfs_get_error(client->context));
-        goto fail;
-    }
-
-    if (flags & O_CREAT) {
-        ret = nfs_creat(client->context, file, 0600, &client->fh);
-        if (ret < 0) {
-            error_setg(errp, "Failed to create file: %s",
-                       nfs_get_error(client->context));
-            goto fail;
-        }
-    } else {
-        ret = nfs_open(client->context, file, flags, &client->fh);
-        if (ret < 0) {
-            error_setg(errp, "Failed to open file : %s",
-                       nfs_get_error(client->context));
-            goto fail;
-        }
-    }
-
-    ret = nfs_fstat(client->context, client->fh, &st);
-    if (ret < 0) {
-        error_setg(errp, "Failed to fstat file: %s",
-                   nfs_get_error(client->context));
-        goto fail;
-    }
-
-    ret = DIV_ROUND_UP(st.st_size, BDRV_SECTOR_SIZE);
-    client->st_blocks = st.st_blocks;
-    client->has_zero_init = S_ISREG(st.st_mode);
-    goto out;
-fail:
-    nfs_client_close(client);
-out:
-    if (qp) {
-        query_params_free(qp);
-    }
-    uri_free(uri);
-    g_free(file);
-    return ret;
-}
-
-static int nfs_file_open(BlockDriverState *bs, QDict *options, int flags,
-                         Error **errp) {
-    NFSClient *client = bs->opaque;
-    int64_t ret;
-    QemuOpts *opts;
-    Error *local_err = NULL;
-
-    client->aio_context = bdrv_get_aio_context(bs);
-
-    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
-    qemu_opts_absorb_qdict(opts, options, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        ret = -EINVAL;
-        goto out;
-    }
-    ret = nfs_client_open(client, qemu_opt_get(opts, "filename"),
-                          (flags & BDRV_O_RDWR) ? O_RDWR : O_RDONLY,
-                          errp);
-    if (ret < 0) {
-        goto out;
-    }
-    bs->total_sectors = ret;
-    ret = 0;
-out:
-    qemu_opts_del(opts);
-    return ret;
-}
-
-static QemuOptsList nfs_create_opts = {
-    .name = "nfs-create-opts",
-    .head = QTAILQ_HEAD_INITIALIZER(nfs_create_opts.head),
-    .desc = {
-        {
-            .name = BLOCK_OPT_SIZE,
-            .type = QEMU_OPT_SIZE,
-            .help = "Virtual disk size"
-        },
-        { /* end of list */ }
-    }
-};
-
-static int nfs_file_create(const char *url, QemuOpts *opts, Error **errp)
-{
-    int ret = 0;
-    int64_t total_size = 0;
-    NFSClient *client = g_new0(NFSClient, 1);
-
-    client->aio_context = qemu_get_aio_context();
-
-    /* Read out options */
-    total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
-                          BDRV_SECTOR_SIZE);
-
-    ret = nfs_client_open(client, url, O_CREAT, errp);
-    if (ret < 0) {
-        goto out;
-    }
-    ret = nfs_ftruncate(client->context, client->fh, total_size);
-    nfs_client_close(client);
-out:
-    g_free(client);
-    return ret;
-}
-
-static int nfs_has_zero_init(BlockDriverState *bs)
-{
-    NFSClient *client = bs->opaque;
-    return client->has_zero_init;
-}
-
-static int64_t nfs_get_allocated_file_size(BlockDriverState *bs)
-{
-    NFSClient *client = bs->opaque;
-    NFSRPC task = {0};
-    struct stat st;
-
-    if (bdrv_is_read_only(bs) &&
-        !(bs->open_flags & BDRV_O_NOCACHE)) {
-        return client->st_blocks * 512;
-    }
-
-    task.st = &st;
-    if (nfs_fstat_async(client->context, client->fh, nfs_co_generic_cb,
-                        &task) != 0) {
-        return -ENOMEM;
-    }
-
-    while (!task.complete) {
-        nfs_set_events(client);
-        aio_poll(client->aio_context, true);
-    }
-
-    return (task.ret < 0 ? task.ret : st.st_blocks * 512);
-}
-
-static int nfs_file_truncate(BlockDriverState *bs, int64_t offset)
-{
-    NFSClient *client = bs->opaque;
-    return nfs_ftruncate(client->context, client->fh, offset);
-}
-
-/* Note that this will not re-establish a connection with the NFS server
- * - it is effectively a NOP.  */
-static int nfs_reopen_prepare(BDRVReopenState *state,
-                              BlockReopenQueue *queue, Error **errp)
-{
-    NFSClient *client = state->bs->opaque;
-    struct stat st;
-    int ret = 0;
-
-    if (state->flags & BDRV_O_RDWR && bdrv_is_read_only(state->bs)) {
-        error_setg(errp, "Cannot open a read-only mount as read-write");
-        return -EACCES;
-    }
-
-    /* Update cache for read-only reopens */
-    if (!(state->flags & BDRV_O_RDWR)) {
-        ret = nfs_fstat(client->context, client->fh, &st);
-        if (ret < 0) {
-            error_setg(errp, "Failed to fstat file: %s",
-                       nfs_get_error(client->context));
-            return ret;
-        }
-        client->st_blocks = st.st_blocks;
-    }
-
-    return 0;
-}
-
-static BlockDriver bdrv_nfs = {
-    .format_name                    = "nfs",
-    .protocol_name                  = "nfs",
-
-    .instance_size                  = sizeof(NFSClient),
-    .bdrv_needs_filename            = true,
-    .create_opts                    = &nfs_create_opts,
-
-    .bdrv_has_zero_init             = nfs_has_zero_init,
-    .bdrv_get_allocated_file_size   = nfs_get_allocated_file_size,
-    .bdrv_truncate                  = nfs_file_truncate,
-
-    .bdrv_file_open                 = nfs_file_open,
-    .bdrv_close                     = nfs_file_close,
-    .bdrv_create                    = nfs_file_create,
-    .bdrv_reopen_prepare            = nfs_reopen_prepare,
-
-    .bdrv_co_readv                  = nfs_co_readv,
-    .bdrv_co_writev                 = nfs_co_writev,
-    .bdrv_co_flush_to_disk          = nfs_co_flush,
-
-    .bdrv_detach_aio_context        = nfs_detach_aio_context,
-    .bdrv_attach_aio_context        = nfs_attach_aio_context,
-};
-
-static void nfs_block_init(void)
-{
-    bdrv_register(&bdrv_nfs);
-}
-
-block_init(nfs_block_init);
--- a/block/null.c
+++ b/block/null.c
@@ -1,223 +0,0 @@
-/*
- * Null block driver
- *
- * Authors:
- *  Fam Zheng <famz@redhat.com>
- *
- * Copyright (C) 2014 Red Hat, Inc.
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- */
-
-#include "qemu/osdep.h"
-#include "block/block_int.h"
-
-#define NULL_OPT_LATENCY "latency-ns"
-
-typedef struct {
-    int64_t length;
-    int64_t latency_ns;
-} BDRVNullState;
-
-static QemuOptsList runtime_opts = {
-    .name = "null",
-    .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
-    .desc = {
-        {
-            .name = "filename",
-            .type = QEMU_OPT_STRING,
-            .help = "",
-        },
-        {
-            .name = BLOCK_OPT_SIZE,
-            .type = QEMU_OPT_SIZE,
-            .help = "size of the null block",
-        },
-        {
-            .name = NULL_OPT_LATENCY,
-            .type = QEMU_OPT_NUMBER,
-            .help = "nanoseconds (approximated) to wait "
-                    "before completing request",
-        },
-        { /* end of list */ }
-    },
-};
-
-static int null_file_open(BlockDriverState *bs, QDict *options, int flags,
-                          Error **errp)
-{
-    QemuOpts *opts;
-    BDRVNullState *s = bs->opaque;
-    int ret = 0;
-
-    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
-    qemu_opts_absorb_qdict(opts, options, &error_abort);
-    s->length =
-        qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 1 << 30);
-    s->latency_ns =
-        qemu_opt_get_number(opts, NULL_OPT_LATENCY, 0);
-    if (s->latency_ns < 0) {
-        error_setg(errp, "latency-ns is invalid");
-        ret = -EINVAL;
-    }
-    qemu_opts_del(opts);
-    return ret;
-}
-
-static void null_close(BlockDriverState *bs)
-{
-}
-
-static int64_t null_getlength(BlockDriverState *bs)
-{
-    BDRVNullState *s = bs->opaque;
-    return s->length;
-}
-
-static coroutine_fn int null_co_common(BlockDriverState *bs)
-{
-    BDRVNullState *s = bs->opaque;
-
-    if (s->latency_ns) {
-        co_aio_sleep_ns(bdrv_get_aio_context(bs), QEMU_CLOCK_REALTIME,
-                        s->latency_ns);
-    }
-    return 0;
-}
-
-static coroutine_fn int null_co_readv(BlockDriverState *bs,
-                                      int64_t sector_num, int nb_sectors,
-                                      QEMUIOVector *qiov)
-{
-    return null_co_common(bs);
-}
-
-static coroutine_fn int null_co_writev(BlockDriverState *bs,
-                                       int64_t sector_num, int nb_sectors,
-                                       QEMUIOVector *qiov)
-{
-    return null_co_common(bs);
-}
-
-static coroutine_fn int null_co_flush(BlockDriverState *bs)
-{
-    return null_co_common(bs);
-}
-
-typedef struct {
-    BlockAIOCB common;
-    QEMUBH *bh;
-    QEMUTimer timer;
-} NullAIOCB;
-
-static const AIOCBInfo null_aiocb_info = {
-    .aiocb_size = sizeof(NullAIOCB),
-};
-
-static void null_bh_cb(void *opaque)
-{
-    NullAIOCB *acb = opaque;
-    acb->common.cb(acb->common.opaque, 0);
-    qemu_bh_delete(acb->bh);
-    qemu_aio_unref(acb);
-}
-
-static void null_timer_cb(void *opaque)
-{
-    NullAIOCB *acb = opaque;
-    acb->common.cb(acb->common.opaque, 0);
-    timer_deinit(&acb->timer);
-    qemu_aio_unref(acb);
-}
-
-static inline BlockAIOCB *null_aio_common(BlockDriverState *bs,
-                                          BlockCompletionFunc *cb,
-                                          void *opaque)
-{
-    NullAIOCB *acb;
-    BDRVNullState *s = bs->opaque;
-
-    acb = qemu_aio_get(&null_aiocb_info, bs, cb, opaque);
-    /* Only emulate latency after vcpu is running. */
-    if (s->latency_ns) {
-        aio_timer_init(bdrv_get_aio_context(bs), &acb->timer,
-                       QEMU_CLOCK_REALTIME, SCALE_NS,
-                       null_timer_cb, acb);
-        timer_mod_ns(&acb->timer,
-                     qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + s->latency_ns);
-    } else {
-        acb->bh = aio_bh_new(bdrv_get_aio_context(bs), null_bh_cb, acb);
-        qemu_bh_schedule(acb->bh);
-    }
-    return &acb->common;
-}
-
-static BlockAIOCB *null_aio_readv(BlockDriverState *bs,
-                                  int64_t sector_num, QEMUIOVector *qiov,
-                                  int nb_sectors,
-                                  BlockCompletionFunc *cb,
-                                  void *opaque)
-{
-    return null_aio_common(bs, cb, opaque);
-}
-
-static BlockAIOCB *null_aio_writev(BlockDriverState *bs,
-                                   int64_t sector_num, QEMUIOVector *qiov,
-                                   int nb_sectors,
-                                   BlockCompletionFunc *cb,
-                                   void *opaque)
-{
-    return null_aio_common(bs, cb, opaque);
-}
-
-static BlockAIOCB *null_aio_flush(BlockDriverState *bs,
-                                  BlockCompletionFunc *cb,
-                                  void *opaque)
-{
-    return null_aio_common(bs, cb, opaque);
-}
-
-static int null_reopen_prepare(BDRVReopenState *reopen_state,
-                               BlockReopenQueue *queue, Error **errp)
-{
-    return 0;
-}
-
-static BlockDriver bdrv_null_co = {
-    .format_name            = "null-co",
-    .protocol_name          = "null-co",
-    .instance_size          = sizeof(BDRVNullState),
-
-    .bdrv_file_open         = null_file_open,
-    .bdrv_close             = null_close,
-    .bdrv_getlength         = null_getlength,
-
-    .bdrv_co_readv          = null_co_readv,
-    .bdrv_co_writev         = null_co_writev,
-    .bdrv_co_flush_to_disk  = null_co_flush,
-    .bdrv_reopen_prepare    = null_reopen_prepare,
-};
-
-static BlockDriver bdrv_null_aio = {
-    .format_name            = "null-aio",
-    .protocol_name          = "null-aio",
-    .instance_size          = sizeof(BDRVNullState),
-
-    .bdrv_file_open         = null_file_open,
-    .bdrv_close             = null_close,
-    .bdrv_getlength         = null_getlength,
-
-    .bdrv_aio_readv         = null_aio_readv,
-    .bdrv_aio_writev        = null_aio_writev,
-    .bdrv_aio_flush         = null_aio_flush,
-    .bdrv_reopen_prepare    = null_reopen_prepare,
-};
-
-static void bdrv_null_init(void)
-{
-    bdrv_register(&bdrv_null_co);
-    bdrv_register(&bdrv_null_aio);
-}
-
-block_init(bdrv_null_init);
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -2,12 +2,8 @@
 * Block driver for Parallels disk image format
 *
 * Copyright (c) 2007 Alex Beregszaszi
- * Copyright (c) 2015 Denis V. Lunev <den@openvz.org>
 *
- * This code was originally based on comparing different disk images created
- * by Parallels. Currently it is based on opened OpenVZ sources
- * available at
- *     http://git.openvz.org/?p=ploop;a=summary
+ * This code is based on comparing different disk images created by Parallels.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
@@ -27,569 +23,73 @@
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */
-#include "qemu/osdep.h"
 #include "qemu-common.h"
 #include "block/block_int.h"
-#include "sysemu/block-backend.h"
 #include "qemu/module.h"
-#include "qemu/bitmap.h"
-#include "qapi/util.h"

 /**************************************************************/

 #define HEADER_MAGIC "WithoutFreeSpace"
-#define HEADER_MAGIC2 "WithouFreSpacExt"
 #define HEADER_VERSION 2
-#define HEADER_INUSE_MAGIC  (0x746F6E59)
-
-#define DEFAULT_CLUSTER_SIZE 1048576        /* 1 MiB */
-
+#define HEADER_SIZE 64

 // always little-endian
-typedef struct ParallelsHeader {
+struct parallels_header {
    char magic[16]; // "WithoutFreeSpace"
    uint32_t version;
    uint32_t heads;
    uint32_t cylinders;
    uint32_t tracks;
-    uint32_t bat_entries;
-    uint64_t nb_sectors;
-    uint32_t inuse;
-    uint32_t data_off;
-    char padding[12];
-} QEMU_PACKED ParallelsHeader;
-
-
-typedef enum ParallelsPreallocMode {
-    PRL_PREALLOC_MODE_FALLOCATE = 0,
-    PRL_PREALLOC_MODE_TRUNCATE = 1,
-    PRL_PREALLOC_MODE__MAX = 2,
-} ParallelsPreallocMode;
-
-static const char *prealloc_mode_lookup[] = {
-    "falloc",
-    "truncate",
-    NULL,
-};
-
+    uint32_t catalog_entries;
+    uint32_t nb_sectors;
+    char padding[24];
+} QEMU_PACKED;

 typedef struct BDRVParallelsState {
-    /** Locking is conservative, the lock protects
-     *   - image file extending (truncate, fallocate)
-     *   - any access to block allocation table
-     */
    CoMutex lock;

-    ParallelsHeader *header;
-    uint32_t header_size;
-    bool header_unclean;
-
-    unsigned long *bat_dirty_bmap;
-    unsigned int  bat_dirty_block;
-
-    uint32_t *bat_bitmap;
-    unsigned int bat_size;
-
-    int64_t  data_end;
-    uint64_t prealloc_size;
-    ParallelsPreallocMode prealloc_mode;
+    uint32_t *catalog_bitmap;
+    unsigned int catalog_size;

    unsigned int tracks;
-
-    unsigned int off_multiplier;
 } BDRVParallelsState;

-
-#define PARALLELS_OPT_PREALLOC_MODE     "prealloc-mode"
-#define PARALLELS_OPT_PREALLOC_SIZE     "prealloc-size"
-
-static QemuOptsList parallels_runtime_opts = {
-    .name = "parallels",
-    .head = QTAILQ_HEAD_INITIALIZER(parallels_runtime_opts.head),
-    .desc = {
-        {
-            .name = PARALLELS_OPT_PREALLOC_SIZE,
-            .type = QEMU_OPT_SIZE,
-            .help = "Preallocation size on image expansion",
-            .def_value_str = "128MiB",
-        },
-        {
-            .name = PARALLELS_OPT_PREALLOC_MODE,
-            .type = QEMU_OPT_STRING,
-            .help = "Preallocation mode on image expansion "
-                    "(allowed values: falloc, truncate)",
-            .def_value_str = "falloc",
-        },
-        { /* end of list */ },
-    },
-};
-
-
-static int64_t bat2sect(BDRVParallelsState *s, uint32_t idx)
+static int parallels_probe(const uint8_t *buf, int buf_size, const char *filename)
 {
-    return (uint64_t)le32_to_cpu(s->bat_bitmap[idx]) * s->off_multiplier;
-}
+    const struct parallels_header *ph = (const void *)buf;

-static uint32_t bat_entry_off(uint32_t idx)
-{
-    return sizeof(ParallelsHeader) + sizeof(uint32_t) * idx;
-}
+    if (buf_size < HEADER_SIZE)
+	return 0;

-static int64_t seek_to_sector(BDRVParallelsState *s, int64_t sector_num)
-{
-    uint32_t index, offset;
-
-    index = sector_num / s->tracks;
-    offset = sector_num % s->tracks;
-
-    /* not allocated */
-    if ((index >= s->bat_size) || (s->bat_bitmap[index] == 0)) {
-        return -1;
-    }
-    return bat2sect(s, index) + offset;
-}
-
-static int cluster_remainder(BDRVParallelsState *s, int64_t sector_num,
-        int nb_sectors)
-{
-    int ret = s->tracks - sector_num % s->tracks;
-    return MIN(nb_sectors, ret);
-}
-
-static int64_t block_status(BDRVParallelsState *s, int64_t sector_num,
-                            int nb_sectors, int *pnum)
-{
-    int64_t start_off = -2, prev_end_off = -2;
-
-    *pnum = 0;
-    while (nb_sectors > 0 || start_off == -2) {
-        int64_t offset = seek_to_sector(s, sector_num);
-        int to_end;
-
-        if (start_off == -2) {
-            start_off = offset;
-            prev_end_off = offset;
-        } else if (offset != prev_end_off) {
-            break;
-        }
-
-        to_end = cluster_remainder(s, sector_num, nb_sectors);
-        nb_sectors -= to_end;
-        sector_num += to_end;
-        *pnum += to_end;
-
-        if (offset > 0) {
-            prev_end_off += to_end;
-        }
-    }
-    return start_off;
-}
-
-static int64_t allocate_clusters(BlockDriverState *bs, int64_t sector_num,
-                                 int nb_sectors, int *pnum)
-{
-    BDRVParallelsState *s = bs->opaque;
-    uint32_t idx, to_allocate, i;
-    int64_t pos, space;
-
-    pos = block_status(s, sector_num, nb_sectors, pnum);
-    if (pos > 0) {
-        return pos;
-    }
-
-    idx = sector_num / s->tracks;
-    if (idx >= s->bat_size) {
-        return -EINVAL;
-    }
-
-    to_allocate = (sector_num + *pnum + s->tracks - 1) / s->tracks - idx;
-    space = to_allocate * s->tracks;
-    if (s->data_end + space > bdrv_getlength(bs->file->bs) >> BDRV_SECTOR_BITS) {
-        int ret;
-        space += s->prealloc_size;
-        if (s->prealloc_mode == PRL_PREALLOC_MODE_FALLOCATE) {
-            ret = bdrv_write_zeroes(bs->file->bs, s->data_end, space, 0);
-        } else {
-            ret = bdrv_truncate(bs->file->bs,
-                                (s->data_end + space) << BDRV_SECTOR_BITS);
-        }
-        if (ret < 0) {
-            return ret;
-        }
-    }
-
-    for (i = 0; i < to_allocate; i++) {
-        s->bat_bitmap[idx + i] = cpu_to_le32(s->data_end / s->off_multiplier);
-        s->data_end += s->tracks;
-        bitmap_set(s->bat_dirty_bmap,
-                   bat_entry_off(idx + i) / s->bat_dirty_block, 1);
-    }
-
-    return bat2sect(s, idx) + sector_num % s->tracks;
-}
-
-
-static coroutine_fn int parallels_co_flush_to_os(BlockDriverState *bs)
-{
-    BDRVParallelsState *s = bs->opaque;
-    unsigned long size = DIV_ROUND_UP(s->header_size, s->bat_dirty_block);
-    unsigned long bit;
-
-    qemu_co_mutex_lock(&s->lock);
-
-    bit = find_first_bit(s->bat_dirty_bmap, size);
-    while (bit < size) {
-        uint32_t off = bit * s->bat_dirty_block;
-        uint32_t to_write = s->bat_dirty_block;
-        int ret;
-
-        if (off + to_write > s->header_size) {
-            to_write = s->header_size - off;
-        }
-        ret = bdrv_pwrite(bs->file->bs, off, (uint8_t *)s->header + off,
-                          to_write);
-        if (ret < 0) {
-            qemu_co_mutex_unlock(&s->lock);
-            return ret;
-        }
-        bit = find_next_bit(s->bat_dirty_bmap, size, bit + 1);
-    }
-    bitmap_zero(s->bat_dirty_bmap, size);
-
-    qemu_co_mutex_unlock(&s->lock);
-    return 0;
-}
-
-
-static int64_t coroutine_fn parallels_co_get_block_status(BlockDriverState *bs,
-        int64_t sector_num, int nb_sectors, int *pnum, BlockDriverState **file)
-{
-    BDRVParallelsState *s = bs->opaque;
-    int64_t offset;
-
-    qemu_co_mutex_lock(&s->lock);
-    offset = block_status(s, sector_num, nb_sectors, pnum);
-    qemu_co_mutex_unlock(&s->lock);
-
-    if (offset < 0) {
-        return 0;
-    }
-
-    *file = bs->file->bs;
-    return (offset << BDRV_SECTOR_BITS) |
-        BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID;
-}
-
-static coroutine_fn int parallels_co_writev(BlockDriverState *bs,
-        int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
-{
-    BDRVParallelsState *s = bs->opaque;
-    uint64_t bytes_done = 0;
-    QEMUIOVector hd_qiov;
-    int ret = 0;
-
-    qemu_iovec_init(&hd_qiov, qiov->niov);
-
-    while (nb_sectors > 0) {
-        int64_t position;
-        int n, nbytes;
-
-        qemu_co_mutex_lock(&s->lock);
-        position = allocate_clusters(bs, sector_num, nb_sectors, &n);
-        qemu_co_mutex_unlock(&s->lock);
-        if (position < 0) {
-            ret = (int)position;
-            break;
-        }
-
-        nbytes = n << BDRV_SECTOR_BITS;
-
-        qemu_iovec_reset(&hd_qiov);
-        qemu_iovec_concat(&hd_qiov, qiov, bytes_done, nbytes);
-
-        ret = bdrv_co_writev(bs->file->bs, position, n, &hd_qiov);
-        if (ret < 0) {
-            break;
-        }
-
-        nb_sectors -= n;
-        sector_num += n;
-        bytes_done += nbytes;
-    }
-
-    qemu_iovec_destroy(&hd_qiov);
-    return ret;
-}
-
-static coroutine_fn int parallels_co_readv(BlockDriverState *bs,
-        int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
-{
-    BDRVParallelsState *s = bs->opaque;
-    uint64_t bytes_done = 0;
-    QEMUIOVector hd_qiov;
-    int ret = 0;
-
-    qemu_iovec_init(&hd_qiov, qiov->niov);
-
-    while (nb_sectors > 0) {
-        int64_t position;
-        int n, nbytes;
-
-        qemu_co_mutex_lock(&s->lock);
-        position = block_status(s, sector_num, nb_sectors, &n);
-        qemu_co_mutex_unlock(&s->lock);
-
-        nbytes = n << BDRV_SECTOR_BITS;
-
-        if (position < 0) {
-            qemu_iovec_memset(qiov, bytes_done, 0, nbytes);
-        } else {
-            qemu_iovec_reset(&hd_qiov);
-            qemu_iovec_concat(&hd_qiov, qiov, bytes_done, nbytes);
-
-            ret = bdrv_co_readv(bs->file->bs, position, n, &hd_qiov);
-            if (ret < 0) {
-                break;
-            }
-        }
-
-        nb_sectors -= n;
-        sector_num += n;
-        bytes_done += nbytes;
-    }
-
-    qemu_iovec_destroy(&hd_qiov);
-    return ret;
-}
-
-
-static int parallels_check(BlockDriverState *bs, BdrvCheckResult *res,
-                           BdrvCheckMode fix)
-{
-    BDRVParallelsState *s = bs->opaque;
-    int64_t size, prev_off, high_off;
-    int ret;
-    uint32_t i;
-    bool flush_bat = false;
-    int cluster_size = s->tracks << BDRV_SECTOR_BITS;
-
-    size = bdrv_getlength(bs->file->bs);
-    if (size < 0) {
-        res->check_errors++;
-        return size;
-    }
-
-    if (s->header_unclean) {
-        fprintf(stderr, "%s image was not closed correctly\n",
-                fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR");
-        res->corruptions++;
-        if (fix & BDRV_FIX_ERRORS) {
-            /* parallels_close will do the job right */
-            res->corruptions_fixed++;
-            s->header_unclean = false;
-        }
-    }
-
-    res->bfi.total_clusters = s->bat_size;
-    res->bfi.compressed_clusters = 0; /* compression is not supported */
-
-    high_off = 0;
-    prev_off = 0;
-    for (i = 0; i < s->bat_size; i++) {
-        int64_t off = bat2sect(s, i) << BDRV_SECTOR_BITS;
-        if (off == 0) {
-            prev_off = 0;
-            continue;
-        }
-
-        /* cluster outside the image */
-        if (off > size) {
-            fprintf(stderr, "%s cluster %u is outside image\n",
-                    fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", i);
-            res->corruptions++;
-            if (fix & BDRV_FIX_ERRORS) {
-                prev_off = 0;
-                s->bat_bitmap[i] = 0;
-                res->corruptions_fixed++;
-                flush_bat = true;
-                continue;
-            }
-        }
-
-        res->bfi.allocated_clusters++;
-        if (off > high_off) {
-            high_off = off;
-        }
-
-        if (prev_off != 0 && (prev_off + cluster_size) != off) {
-            res->bfi.fragmented_clusters++;
-        }
-        prev_off = off;
-    }
-
-    if (flush_bat) {
-        ret = bdrv_pwrite_sync(bs->file->bs, 0, s->header, s->header_size);
-        if (ret < 0) {
-            res->check_errors++;
-            return ret;
-        }
-    }
-
-    res->image_end_offset = high_off + cluster_size;
-    if (size > res->image_end_offset) {
-        int64_t count;
-        count = DIV_ROUND_UP(size - res->image_end_offset, cluster_size);
-        fprintf(stderr, "%s space leaked at the end of the image %" PRId64 "\n",
-                fix & BDRV_FIX_LEAKS ? "Repairing" : "ERROR",
-                size - res->image_end_offset);
-        res->leaks += count;
-        if (fix & BDRV_FIX_LEAKS) {
-            ret = bdrv_truncate(bs->file->bs, res->image_end_offset);
-            if (ret < 0) {
-                res->check_errors++;
-                return ret;
-            }
-            res->leaks_fixed += count;
-        }
-    }
+    if (!memcmp(ph->magic, HEADER_MAGIC, 16) &&
+	(le32_to_cpu(ph->version) == HEADER_VERSION))
+	return 100;

    return 0;
 }

-
-static int parallels_create(const char *filename, QemuOpts *opts, Error **errp)
-{
-    int64_t total_size, cl_size;
-    uint8_t tmp[BDRV_SECTOR_SIZE];
-    Error *local_err = NULL;
-    BlockBackend *file;
-    uint32_t bat_entries, bat_sectors;
-    ParallelsHeader header;
-    int ret;
-
-    total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
-                          BDRV_SECTOR_SIZE);
-    cl_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_CLUSTER_SIZE,
-                          DEFAULT_CLUSTER_SIZE), BDRV_SECTOR_SIZE);
-
-    ret = bdrv_create_file(filename, opts, &local_err);
-    if (ret < 0) {
-        error_propagate(errp, local_err);
-        return ret;
-    }
-
-    file = blk_new_open("image", filename, NULL, NULL,
-                        BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_PROTOCOL,
-                        &local_err);
-    if (file == NULL) {
-        error_propagate(errp, local_err);
-        return -EIO;
-    }
-
-    blk_set_allow_write_beyond_eof(file, true);
-
-    ret = blk_truncate(file, 0);
-    if (ret < 0) {
-        goto exit;
-    }
-
-    bat_entries = DIV_ROUND_UP(total_size, cl_size);
-    bat_sectors = DIV_ROUND_UP(bat_entry_off(bat_entries), cl_size);
-    bat_sectors = (bat_sectors *  cl_size) >> BDRV_SECTOR_BITS;
-
-    memset(&header, 0, sizeof(header));
-    memcpy(header.magic, HEADER_MAGIC2, sizeof(header.magic));
-    header.version = cpu_to_le32(HEADER_VERSION);
-    /* don't care much about geometry, it is not used on image level */
-    header.heads = cpu_to_le32(16);
-    header.cylinders = cpu_to_le32(total_size / BDRV_SECTOR_SIZE / 16 / 32);
-    header.tracks = cpu_to_le32(cl_size >> BDRV_SECTOR_BITS);
-    header.bat_entries = cpu_to_le32(bat_entries);
-    header.nb_sectors = cpu_to_le64(DIV_ROUND_UP(total_size, BDRV_SECTOR_SIZE));
-    header.data_off = cpu_to_le32(bat_sectors);
-
-    /* write all the data */
-    memset(tmp, 0, sizeof(tmp));
-    memcpy(tmp, &header, sizeof(header));
-
-    ret = blk_pwrite(file, 0, tmp, BDRV_SECTOR_SIZE);
-    if (ret < 0) {
-        goto exit;
-    }
-    ret = blk_write_zeroes(file, 1, bat_sectors - 1, 0);
-    if (ret < 0) {
-        goto exit;
-    }
-    ret = 0;
-
-done:
-    blk_unref(file);
-    return ret;
-
-exit:
-    error_setg_errno(errp, -ret, "Failed to create Parallels image");
-    goto done;
-}
-
-
-static int parallels_probe(const uint8_t *buf, int buf_size,
-                           const char *filename)
-{
-    const ParallelsHeader *ph = (const void *)buf;
-
-    if (buf_size < sizeof(ParallelsHeader)) {
-        return 0;
-    }
-
-    if ((!memcmp(ph->magic, HEADER_MAGIC, 16) ||
-           !memcmp(ph->magic, HEADER_MAGIC2, 16)) &&
-           (le32_to_cpu(ph->version) == HEADER_VERSION)) {
-        return 100;
-    }
-
-    return 0;
-}
-
-static int parallels_update_header(BlockDriverState *bs)
-{
-    BDRVParallelsState *s = bs->opaque;
-    unsigned size = MAX(bdrv_opt_mem_align(bs->file->bs),
-                        sizeof(ParallelsHeader));
-
-    if (size > s->header_size) {
-        size = s->header_size;
-    }
-    return bdrv_pwrite_sync(bs->file->bs, 0, s->header, size);
-}
-
 static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
                          Error **errp)
 {
    BDRVParallelsState *s = bs->opaque;
-    ParallelsHeader ph;
-    int ret, size, i;
-    QemuOpts *opts = NULL;
-    Error *local_err = NULL;
-    char *buf;
+    int i;
+    struct parallels_header ph;
+    int ret;

-    ret = bdrv_pread(bs->file->bs, 0, &ph, sizeof(ph));
+    bs->read_only = 1; // no write support yet
+
+    ret = bdrv_pread(bs->file, 0, &ph, sizeof(ph));
    if (ret < 0) {
        goto fail;
    }

-    bs->total_sectors = le64_to_cpu(ph.nb_sectors);
+    if (memcmp(ph.magic, HEADER_MAGIC, 16) ||
+        (le32_to_cpu(ph.version) != HEADER_VERSION)) {
+        ret = -EMEDIUMTYPE;
+        goto fail;
+    }

-    if (le32_to_cpu(ph.version) != HEADER_VERSION) {
-        goto fail_format;
-    }
-    if (!memcmp(ph.magic, HEADER_MAGIC, 16)) {
-        s->off_multiplier = 1;
-        bs->total_sectors = 0xffffffff & bs->total_sectors;
-    } else if (!memcmp(ph.magic, HEADER_MAGIC2, 16)) {
-        s->off_multiplier = le32_to_cpu(ph.tracks);
-    } else {
-        goto fail_format;
-    }
+    bs->total_sectors = le32_to_cpu(ph.nb_sectors);

    s->tracks = le32_to_cpu(ph.tracks);
    if (s->tracks == 0) {
@@ -597,165 +97,87 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
        ret = -EINVAL;
        goto fail;
    }
-    if (s->tracks > INT32_MAX/513) {
-        error_setg(errp, "Invalid image: Too big cluster");
-        ret = -EFBIG;
-        goto fail;
-    }

-    s->bat_size = le32_to_cpu(ph.bat_entries);
-    if (s->bat_size > INT_MAX / sizeof(uint32_t)) {
+    s->catalog_size = le32_to_cpu(ph.catalog_entries);
+    if (s->catalog_size > INT_MAX / 4) {
        error_setg(errp, "Catalog too large");
        ret = -EFBIG;
        goto fail;
    }
+    s->catalog_bitmap = g_malloc(s->catalog_size * 4);

-    size = bat_entry_off(s->bat_size);
-    s->header_size = ROUND_UP(size, bdrv_opt_mem_align(bs->file->bs));
-    s->header = qemu_try_blockalign(bs->file->bs, s->header_size);
-    if (s->header == NULL) {
-        ret = -ENOMEM;
-        goto fail;
-    }
-    s->data_end = le32_to_cpu(ph.data_off);
-    if (s->data_end == 0) {
-        s->data_end = ROUND_UP(bat_entry_off(s->bat_size), BDRV_SECTOR_SIZE);
-    }
-    if (s->data_end < s->header_size) {
-        /* there is not enough unused space to fit to block align between BAT
-           and actual data. We can't avoid read-modify-write... */
-        s->header_size = size;
-    }
-
-    ret = bdrv_pread(bs->file->bs, 0, s->header, s->header_size);
+    ret = bdrv_pread(bs->file, 64, s->catalog_bitmap, s->catalog_size * 4);
    if (ret < 0) {
        goto fail;
    }
-    s->bat_bitmap = (uint32_t *)(s->header + 1);

-    for (i = 0; i < s->bat_size; i++) {
-        int64_t off = bat2sect(s, i);
-        if (off >= s->data_end) {
-            s->data_end = off + s->tracks;
-        }
-    }
-
-    if (le32_to_cpu(ph.inuse) == HEADER_INUSE_MAGIC) {
-        /* Image was not closed correctly. The check is mandatory */
-        s->header_unclean = true;
-        if ((flags & BDRV_O_RDWR) && !(flags & BDRV_O_CHECK)) {
-            error_setg(errp, "parallels: Image was not closed correctly; "
-                       "cannot be opened read/write");
-            ret = -EACCES;
-            goto fail;
-        }
-    }
-
-    opts = qemu_opts_create(&parallels_runtime_opts, NULL, 0, &local_err);
-    if (local_err != NULL) {
-        goto fail_options;
-    }
-
-    qemu_opts_absorb_qdict(opts, options, &local_err);
-    if (local_err != NULL) {
-        goto fail_options;
-    }
-
-    s->prealloc_size =
-        qemu_opt_get_size_del(opts, PARALLELS_OPT_PREALLOC_SIZE, 0);
-    s->prealloc_size = MAX(s->tracks, s->prealloc_size >> BDRV_SECTOR_BITS);
-    buf = qemu_opt_get_del(opts, PARALLELS_OPT_PREALLOC_MODE);
-    s->prealloc_mode = qapi_enum_parse(prealloc_mode_lookup, buf,
-            PRL_PREALLOC_MODE__MAX, PRL_PREALLOC_MODE_FALLOCATE, &local_err);
-    g_free(buf);
-    if (local_err != NULL) {
-        goto fail_options;
-    }
-    if (!bdrv_has_zero_init(bs->file->bs) ||
-            bdrv_truncate(bs->file->bs, bdrv_getlength(bs->file->bs)) != 0) {
-        s->prealloc_mode = PRL_PREALLOC_MODE_FALLOCATE;
-    }
-
-    if (flags & BDRV_O_RDWR) {
-        s->header->inuse = cpu_to_le32(HEADER_INUSE_MAGIC);
-        ret = parallels_update_header(bs);
-        if (ret < 0) {
-            goto fail;
-        }
-    }
-
-    s->bat_dirty_block = 4 * getpagesize();
-    s->bat_dirty_bmap =
-        bitmap_new(DIV_ROUND_UP(s->header_size, s->bat_dirty_block));
+    for (i = 0; i < s->catalog_size; i++)
+	le32_to_cpus(&s->catalog_bitmap[i]);

    qemu_co_mutex_init(&s->lock);
    return 0;

-fail_format:
-    error_setg(errp, "Image not in Parallels format");
-    ret = -EINVAL;
 fail:
-    qemu_vfree(s->header);
+    g_free(s->catalog_bitmap);
    return ret;
-
-fail_options:
-    error_propagate(errp, local_err);
-    ret = -EINVAL;
-    goto fail;
 }

+static int64_t seek_to_sector(BlockDriverState *bs, int64_t sector_num)
+{
+    BDRVParallelsState *s = bs->opaque;
+    uint32_t index, offset;
+
+    index = sector_num / s->tracks;
+    offset = sector_num % s->tracks;
+
+    /* not allocated */
+    if ((index > s->catalog_size) || (s->catalog_bitmap[index] == 0))
+	return -1;
+    return (uint64_t)(s->catalog_bitmap[index] + offset) * 512;
+}
+
+static int parallels_read(BlockDriverState *bs, int64_t sector_num,
+                    uint8_t *buf, int nb_sectors)
+{
+    while (nb_sectors > 0) {
+        int64_t position = seek_to_sector(bs, sector_num);
+        if (position >= 0) {
+            if (bdrv_pread(bs->file, position, buf, 512) != 512)
+                return -1;
+        } else {
+            memset(buf, 0, 512);
+        }
+        nb_sectors--;
+        sector_num++;
+        buf += 512;
+    }
+    return 0;
+}
+
+static coroutine_fn int parallels_co_read(BlockDriverState *bs, int64_t sector_num,
+                                          uint8_t *buf, int nb_sectors)
+{
+    int ret;
+    BDRVParallelsState *s = bs->opaque;
+    qemu_co_mutex_lock(&s->lock);
+    ret = parallels_read(bs, sector_num, buf, nb_sectors);
+    qemu_co_mutex_unlock(&s->lock);
+    return ret;
+}

 static void parallels_close(BlockDriverState *bs)
 {
    BDRVParallelsState *s = bs->opaque;
-
-    if (bs->open_flags & BDRV_O_RDWR) {
-        s->header->inuse = 0;
-        parallels_update_header(bs);
-    }
-
-    if (bs->open_flags & BDRV_O_RDWR) {
-        bdrv_truncate(bs->file->bs, s->data_end << BDRV_SECTOR_BITS);
-    }
-
-    g_free(s->bat_dirty_bmap);
-    qemu_vfree(s->header);
+    g_free(s->catalog_bitmap);
 }

-static QemuOptsList parallels_create_opts = {
-    .name = "parallels-create-opts",
-    .head = QTAILQ_HEAD_INITIALIZER(parallels_create_opts.head),
-    .desc = {
-        {
-            .name = BLOCK_OPT_SIZE,
-            .type = QEMU_OPT_SIZE,
-            .help = "Virtual disk size",
-        },
-        {
-            .name = BLOCK_OPT_CLUSTER_SIZE,
-            .type = QEMU_OPT_SIZE,
-            .help = "Parallels image cluster size",
-            .def_value_str = stringify(DEFAULT_CLUSTER_SIZE),
-        },
-        { /* end of list */ }
-    }
-};
-
 static BlockDriver bdrv_parallels = {
    .format_name	= "parallels",
    .instance_size	= sizeof(BDRVParallelsState),
    .bdrv_probe		= parallels_probe,
    .bdrv_open		= parallels_open,
+    .bdrv_read          = parallels_co_read,
    .bdrv_close		= parallels_close,
-    .bdrv_co_get_block_status = parallels_co_get_block_status,
-    .bdrv_has_zero_init       = bdrv_has_zero_init_1,
-    .bdrv_co_flush_to_os      = parallels_co_flush_to_os,
-    .bdrv_co_readv  = parallels_co_readv,
-    .bdrv_co_writev = parallels_co_writev,
-
-    .bdrv_create    = parallels_create,
-    .bdrv_check     = parallels_check,
-    .create_opts    = &parallels_create_opts,
 };

 static void bdrv_parallels_init(void)
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -22,126 +22,12 @@
 * THE SOFTWARE.
 */

-#include "qemu/osdep.h"
 #include "block/qapi.h"
 #include "block/block_int.h"
-#include "block/throttle-groups.h"
-#include "block/write-threshold.h"
 #include "qmp-commands.h"
 #include "qapi-visit.h"
 #include "qapi/qmp-output-visitor.h"
 #include "qapi/qmp/types.h"
-#include "sysemu/block-backend.h"
-
-BlockDeviceInfo *bdrv_block_device_info(BlockDriverState *bs, Error **errp)
-{
-    ImageInfo **p_image_info;
-    BlockDriverState *bs0;
-    BlockDeviceInfo *info = g_malloc0(sizeof(*info));
-
-    info->file                   = g_strdup(bs->filename);
-    info->ro                     = bs->read_only;
-    info->drv                    = g_strdup(bs->drv->format_name);
-    info->encrypted              = bs->encrypted;
-    info->encryption_key_missing = bdrv_key_required(bs);
-
-    info->cache = g_new(BlockdevCacheInfo, 1);
-    *info->cache = (BlockdevCacheInfo) {
-        .writeback      = bdrv_enable_write_cache(bs),
-        .direct         = !!(bs->open_flags & BDRV_O_NOCACHE),
-        .no_flush       = !!(bs->open_flags & BDRV_O_NO_FLUSH),
-    };
-
-    if (bs->node_name[0]) {
-        info->has_node_name = true;
-        info->node_name = g_strdup(bs->node_name);
-    }
-
-    if (bs->backing_file[0]) {
-        info->has_backing_file = true;
-        info->backing_file = g_strdup(bs->backing_file);
-    }
-
-    info->backing_file_depth = bdrv_get_backing_file_depth(bs);
-    info->detect_zeroes = bs->detect_zeroes;
-
-    if (bs->throttle_state) {
-        ThrottleConfig cfg;
-
-        throttle_group_get_config(bs, &cfg);
-
-        info->bps     = cfg.buckets[THROTTLE_BPS_TOTAL].avg;
-        info->bps_rd  = cfg.buckets[THROTTLE_BPS_READ].avg;
-        info->bps_wr  = cfg.buckets[THROTTLE_BPS_WRITE].avg;
-
-        info->iops    = cfg.buckets[THROTTLE_OPS_TOTAL].avg;
-        info->iops_rd = cfg.buckets[THROTTLE_OPS_READ].avg;
-        info->iops_wr = cfg.buckets[THROTTLE_OPS_WRITE].avg;
-
-        info->has_bps_max     = cfg.buckets[THROTTLE_BPS_TOTAL].max;
-        info->bps_max         = cfg.buckets[THROTTLE_BPS_TOTAL].max;
-        info->has_bps_rd_max  = cfg.buckets[THROTTLE_BPS_READ].max;
-        info->bps_rd_max      = cfg.buckets[THROTTLE_BPS_READ].max;
-        info->has_bps_wr_max  = cfg.buckets[THROTTLE_BPS_WRITE].max;
-        info->bps_wr_max      = cfg.buckets[THROTTLE_BPS_WRITE].max;
-
-        info->has_iops_max    = cfg.buckets[THROTTLE_OPS_TOTAL].max;
-        info->iops_max        = cfg.buckets[THROTTLE_OPS_TOTAL].max;
-        info->has_iops_rd_max = cfg.buckets[THROTTLE_OPS_READ].max;
-        info->iops_rd_max     = cfg.buckets[THROTTLE_OPS_READ].max;
-        info->has_iops_wr_max = cfg.buckets[THROTTLE_OPS_WRITE].max;
-        info->iops_wr_max     = cfg.buckets[THROTTLE_OPS_WRITE].max;
-
-        info->has_bps_max_length     = info->has_bps_max;
-        info->bps_max_length         =
-            cfg.buckets[THROTTLE_BPS_TOTAL].burst_length;
-        info->has_bps_rd_max_length  = info->has_bps_rd_max;
-        info->bps_rd_max_length      =
-            cfg.buckets[THROTTLE_BPS_READ].burst_length;
-        info->has_bps_wr_max_length  = info->has_bps_wr_max;
-        info->bps_wr_max_length      =
-            cfg.buckets[THROTTLE_BPS_WRITE].burst_length;
-
-        info->has_iops_max_length    = info->has_iops_max;
-        info->iops_max_length        =
-            cfg.buckets[THROTTLE_OPS_TOTAL].burst_length;
-        info->has_iops_rd_max_length = info->has_iops_rd_max;
-        info->iops_rd_max_length     =
-            cfg.buckets[THROTTLE_OPS_READ].burst_length;
-        info->has_iops_wr_max_length = info->has_iops_wr_max;
-        info->iops_wr_max_length     =
-            cfg.buckets[THROTTLE_OPS_WRITE].burst_length;
-
-        info->has_iops_size = cfg.op_size;
-        info->iops_size = cfg.op_size;
-
-        info->has_group = true;
-        info->group = g_strdup(throttle_group_get_name(bs));
-    }
-
-    info->write_threshold = bdrv_write_threshold_get(bs);
-
-    bs0 = bs;
-    p_image_info = &info->image;
-    while (1) {
-        Error *local_err = NULL;
-        bdrv_query_image_info(bs0, p_image_info, &local_err);
-        if (local_err) {
-            error_propagate(errp, local_err);
-            qapi_free_BlockDeviceInfo(info);
-            return NULL;
-        }
-        if (bs0->drv && bs0->backing) {
-            bs0 = bs0->backing->bs;
-            (*p_image_info)->has_backing_image = true;
-            p_image_info = &((*p_image_info)->backing_image);
-        } else {
-            break;
-        }
-    }
-
-    return info;
-}

 /*
 * Returns 0 on success, with *p_list either set to describe snapshot
@@ -224,26 +110,19 @@ void bdrv_query_image_info(BlockDriverState *bs,
                           ImageInfo **p_info,
                           Error **errp)
 {
-    int64_t size;
+    uint64_t total_sectors;
    const char *backing_filename;
+    char backing_filename2[1024];
    BlockDriverInfo bdi;
    int ret;
    Error *err = NULL;
-    ImageInfo *info;
+    ImageInfo *info = g_new0(ImageInfo, 1);

-    aio_context_acquire(bdrv_get_aio_context(bs));
+    bdrv_get_geometry(bs, &total_sectors);

-    size = bdrv_getlength(bs);
-    if (size < 0) {
-        error_setg_errno(errp, -size, "Can't get size of device '%s'",
-                         bdrv_get_device_name(bs));
-        goto out;
-    }
-
-    info = g_new0(ImageInfo, 1);
    info->filename        = g_strdup(bs->filename);
    info->format          = g_strdup(bdrv_get_format_name(bs));
-    info->virtual_size    = size;
+    info->virtual_size    = total_sectors * 512;
    info->actual_size     = bdrv_get_allocated_file_size(bs);
    info->has_actual_size = info->actual_size >= 0;
    if (bdrv_is_encrypted(bs)) {
@@ -263,23 +142,14 @@ void bdrv_query_image_info(BlockDriverState *bs,

    backing_filename = bs->backing_file;
    if (backing_filename[0] != '\0') {
-        char *backing_filename2 = g_malloc0(PATH_MAX);
        info->backing_filename = g_strdup(backing_filename);
        info->has_backing_filename = true;
-        bdrv_get_full_backing_filename(bs, backing_filename2, PATH_MAX, &err);
-        if (err) {
-            /* Can't reconstruct the full backing filename, so we must omit
-             * this field and apply a Best Effort to this query. */
-            g_free(backing_filename2);
-            backing_filename2 = NULL;
-            error_free(err);
-            err = NULL;
-        }
+        bdrv_get_full_backing_filename(bs, backing_filename2,
+                                       sizeof(backing_filename2));

-        /* Always report the full_backing_filename if present, even if it's the
-         * same as backing_filename. That they are same is useful info. */
-        if (backing_filename2) {
-            info->full_backing_filename = g_strdup(backing_filename2);
+        if (strcmp(backing_filename, backing_filename2) != 0) {
+            info->full_backing_filename =
+                        g_strdup(backing_filename2);
            info->has_full_backing_filename = true;
        }

@@ -287,7 +157,6 @@ void bdrv_query_image_info(BlockDriverState *bs,
            info->backing_filename_format = g_strdup(bs->backing_format);
            info->has_backing_filename_format = true;
        }
-        g_free(backing_filename2);
    }

    ret = bdrv_query_snapshot_info_list(bs, &info->snapshots, &err);
@@ -305,46 +174,116 @@ void bdrv_query_image_info(BlockDriverState *bs,
    default:
        error_propagate(errp, err);
        qapi_free_ImageInfo(info);
-        goto out;
+        return;
    }

    *p_info = info;
-
-out:
-    aio_context_release(bdrv_get_aio_context(bs));
 }

 /* @p_info will be set only on success. */
-static void bdrv_query_info(BlockBackend *blk, BlockInfo **p_info,
-                            Error **errp)
+void bdrv_query_info(BlockDriverState *bs,
+                     BlockInfo **p_info,
+                     Error **errp)
 {
    BlockInfo *info = g_malloc0(sizeof(*info));
-    BlockDriverState *bs = blk_bs(blk);
-    info->device = g_strdup(blk_name(blk));
+    BlockDriverState *bs0;
+    ImageInfo **p_image_info;
+    Error *local_err = NULL;
+    info->device = g_strdup(bs->device_name);
    info->type = g_strdup("unknown");
-    info->locked = blk_dev_is_medium_locked(blk);
-    info->removable = blk_dev_has_removable_media(blk);
+    info->locked = bdrv_dev_is_medium_locked(bs);
+    info->removable = bdrv_dev_has_removable_media(bs);

-    if (blk_dev_has_tray(blk)) {
+    if (bdrv_dev_has_removable_media(bs)) {
        info->has_tray_open = true;
-        info->tray_open = blk_dev_is_tray_open(blk);
+        info->tray_open = bdrv_dev_is_tray_open(bs);
    }

-    if (blk_iostatus_is_enabled(blk)) {
+    if (bdrv_iostatus_is_enabled(bs)) {
        info->has_io_status = true;
-        info->io_status = blk_iostatus(blk);
+        info->io_status = bs->iostatus;
    }

-    if (bs && !QLIST_EMPTY(&bs->dirty_bitmaps)) {
-        info->has_dirty_bitmaps = true;
-        info->dirty_bitmaps = bdrv_query_dirty_bitmaps(bs);
+    if (bs->dirty_bitmap) {
+        info->has_dirty = true;
+        info->dirty = g_malloc0(sizeof(*info->dirty));
+        info->dirty->count = bdrv_get_dirty_count(bs) * BDRV_SECTOR_SIZE;
+        info->dirty->granularity =
+         ((int64_t) BDRV_SECTOR_SIZE << hbitmap_granularity(bs->dirty_bitmap));
    }

-    if (bs && bs->drv) {
+    if (bs->drv) {
        info->has_inserted = true;
-        info->inserted = bdrv_block_device_info(bs, errp);
-        if (info->inserted == NULL) {
-            goto err;
+        info->inserted = g_malloc0(sizeof(*info->inserted));
+        info->inserted->file = g_strdup(bs->filename);
+        info->inserted->ro = bs->read_only;
+        info->inserted->drv = g_strdup(bs->drv->format_name);
+        info->inserted->encrypted = bs->encrypted;
+        info->inserted->encryption_key_missing = bdrv_key_required(bs);
+
+        if (bs->backing_file[0]) {
+            info->inserted->has_backing_file = true;
+            info->inserted->backing_file = g_strdup(bs->backing_file);
+        }
+
+        info->inserted->backing_file_depth = bdrv_get_backing_file_depth(bs);
+
+        if (bs->io_limits_enabled) {
+            ThrottleConfig cfg;
+            throttle_get_config(&bs->throttle_state, &cfg);
+            info->inserted->bps     = cfg.buckets[THROTTLE_BPS_TOTAL].avg;
+            info->inserted->bps_rd  = cfg.buckets[THROTTLE_BPS_READ].avg;
+            info->inserted->bps_wr  = cfg.buckets[THROTTLE_BPS_WRITE].avg;
+
+            info->inserted->iops    = cfg.buckets[THROTTLE_OPS_TOTAL].avg;
+            info->inserted->iops_rd = cfg.buckets[THROTTLE_OPS_READ].avg;
+            info->inserted->iops_wr = cfg.buckets[THROTTLE_OPS_WRITE].avg;
+
+            info->inserted->has_bps_max     =
+                cfg.buckets[THROTTLE_BPS_TOTAL].max;
+            info->inserted->bps_max         =
+                cfg.buckets[THROTTLE_BPS_TOTAL].max;
+            info->inserted->has_bps_rd_max  =
+                cfg.buckets[THROTTLE_BPS_READ].max;
+            info->inserted->bps_rd_max      =
+                cfg.buckets[THROTTLE_BPS_READ].max;
+            info->inserted->has_bps_wr_max  =
+                cfg.buckets[THROTTLE_BPS_WRITE].max;
+            info->inserted->bps_wr_max      =
+                cfg.buckets[THROTTLE_BPS_WRITE].max;
+
+            info->inserted->has_iops_max    =
+                cfg.buckets[THROTTLE_OPS_TOTAL].max;
+            info->inserted->iops_max        =
+                cfg.buckets[THROTTLE_OPS_TOTAL].max;
+            info->inserted->has_iops_rd_max =
+                cfg.buckets[THROTTLE_OPS_READ].max;
+            info->inserted->iops_rd_max     =
+                cfg.buckets[THROTTLE_OPS_READ].max;
+            info->inserted->has_iops_wr_max =
+                cfg.buckets[THROTTLE_OPS_WRITE].max;
+            info->inserted->iops_wr_max     =
+                cfg.buckets[THROTTLE_OPS_WRITE].max;
+
+            info->inserted->has_iops_size = cfg.op_size;
+            info->inserted->iops_size = cfg.op_size;
+        }
+
+        bs0 = bs;
+        p_image_info = &info->inserted->image;
+        while (1) {
+            bdrv_query_image_info(bs0, p_image_info, &local_err);
+            if (error_is_set(&local_err)) {
+                error_propagate(errp, local_err);
+                goto err;
+            }
+            if (bs0->drv && bs0->backing_hd) {
+                bs0 = bs0->backing_hd;
+                (*p_image_info)->has_backing_image = true;
+                p_image_info = &((*p_image_info)->backing_image);
+            } else {
+                break;
+            }
        }
    }

@@ -355,116 +294,31 @@ static void bdrv_query_info(BlockBackend *blk, BlockInfo **p_info,
    qapi_free_BlockInfo(info);
 }

-static BlockStats *bdrv_query_stats(BlockBackend *blk,
-                                    const BlockDriverState *bs,
-                                    bool query_backing);
-
-static void bdrv_query_blk_stats(BlockStats *s, BlockBackend *blk)
-{
-    BlockAcctStats *stats = blk_get_stats(blk);
-    BlockAcctTimedStats *ts = NULL;
-
-    s->has_device = true;
-    s->device = g_strdup(blk_name(blk));
-
-    s->stats->rd_bytes = stats->nr_bytes[BLOCK_ACCT_READ];
-    s->stats->wr_bytes = stats->nr_bytes[BLOCK_ACCT_WRITE];
-    s->stats->rd_operations = stats->nr_ops[BLOCK_ACCT_READ];
-    s->stats->wr_operations = stats->nr_ops[BLOCK_ACCT_WRITE];
-
-    s->stats->failed_rd_operations = stats->failed_ops[BLOCK_ACCT_READ];
-    s->stats->failed_wr_operations = stats->failed_ops[BLOCK_ACCT_WRITE];
-    s->stats->failed_flush_operations = stats->failed_ops[BLOCK_ACCT_FLUSH];
-
-    s->stats->invalid_rd_operations = stats->invalid_ops[BLOCK_ACCT_READ];
-    s->stats->invalid_wr_operations = stats->invalid_ops[BLOCK_ACCT_WRITE];
-    s->stats->invalid_flush_operations =
-        stats->invalid_ops[BLOCK_ACCT_FLUSH];
-
-    s->stats->rd_merged = stats->merged[BLOCK_ACCT_READ];
-    s->stats->wr_merged = stats->merged[BLOCK_ACCT_WRITE];
-    s->stats->flush_operations = stats->nr_ops[BLOCK_ACCT_FLUSH];
-    s->stats->wr_total_time_ns = stats->total_time_ns[BLOCK_ACCT_WRITE];
-    s->stats->rd_total_time_ns = stats->total_time_ns[BLOCK_ACCT_READ];
-    s->stats->flush_total_time_ns = stats->total_time_ns[BLOCK_ACCT_FLUSH];
-
-    s->stats->has_idle_time_ns = stats->last_access_time_ns > 0;
-    if (s->stats->has_idle_time_ns) {
-        s->stats->idle_time_ns = block_acct_idle_time_ns(stats);
-    }
-
-    s->stats->account_invalid = stats->account_invalid;
-    s->stats->account_failed = stats->account_failed;
-
-    while ((ts = block_acct_interval_next(stats, ts))) {
-        BlockDeviceTimedStatsList *timed_stats =
-            g_malloc0(sizeof(*timed_stats));
-        BlockDeviceTimedStats *dev_stats = g_malloc0(sizeof(*dev_stats));
-        timed_stats->next = s->stats->timed_stats;
-        timed_stats->value = dev_stats;
-        s->stats->timed_stats = timed_stats;
-
-        TimedAverage *rd = &ts->latency[BLOCK_ACCT_READ];
-        TimedAverage *wr = &ts->latency[BLOCK_ACCT_WRITE];
-        TimedAverage *fl = &ts->latency[BLOCK_ACCT_FLUSH];
-
-        dev_stats->interval_length = ts->interval_length;
-
-        dev_stats->min_rd_latency_ns = timed_average_min(rd);
-        dev_stats->max_rd_latency_ns = timed_average_max(rd);
-        dev_stats->avg_rd_latency_ns = timed_average_avg(rd);
-
-        dev_stats->min_wr_latency_ns = timed_average_min(wr);
-        dev_stats->max_wr_latency_ns = timed_average_max(wr);
-        dev_stats->avg_wr_latency_ns = timed_average_avg(wr);
-
-        dev_stats->min_flush_latency_ns = timed_average_min(fl);
-        dev_stats->max_flush_latency_ns = timed_average_max(fl);
-        dev_stats->avg_flush_latency_ns = timed_average_avg(fl);
-
-        dev_stats->avg_rd_queue_depth =
-            block_acct_queue_depth(ts, BLOCK_ACCT_READ);
-        dev_stats->avg_wr_queue_depth =
-            block_acct_queue_depth(ts, BLOCK_ACCT_WRITE);
-    }
-}
-
-static void bdrv_query_bds_stats(BlockStats *s, const BlockDriverState *bs,
-                                 bool query_backing)
-{
-    if (bdrv_get_node_name(bs)[0]) {
-        s->has_node_name = true;
-        s->node_name = g_strdup(bdrv_get_node_name(bs));
-    }
-
-    s->stats->wr_highest_offset = bs->wr_highest_offset;
-
-    if (bs->file) {
-        s->has_parent = true;
-        s->parent = bdrv_query_stats(NULL, bs->file->bs, query_backing);
-    }
-
-    if (query_backing && bs->backing) {
-        s->has_backing = true;
-        s->backing = bdrv_query_stats(NULL, bs->backing->bs, query_backing);
-    }
-
-}
-
-static BlockStats *bdrv_query_stats(BlockBackend *blk,
-                                    const BlockDriverState *bs,
-                                    bool query_backing)
+BlockStats *bdrv_query_stats(const BlockDriverState *bs)
 {
    BlockStats *s;

    s = g_malloc0(sizeof(*s));
-    s->stats = g_malloc0(sizeof(*s->stats));

-    if (blk) {
-        bdrv_query_blk_stats(s, blk);
+    if (bs->device_name[0]) {
+        s->has_device = true;
+        s->device = g_strdup(bs->device_name);
    }
-    if (bs) {
-        bdrv_query_bds_stats(s, bs, query_backing);
+
+    s->stats = g_malloc0(sizeof(*s->stats));
+    s->stats->rd_bytes = bs->nr_bytes[BDRV_ACCT_READ];
+    s->stats->wr_bytes = bs->nr_bytes[BDRV_ACCT_WRITE];
+    s->stats->rd_operations = bs->nr_ops[BDRV_ACCT_READ];
+    s->stats->wr_operations = bs->nr_ops[BDRV_ACCT_WRITE];
+    s->stats->wr_highest_offset = bs->wr_highest_sector * BDRV_SECTOR_SIZE;
+    s->stats->flush_operations = bs->nr_ops[BDRV_ACCT_FLUSH];
+    s->stats->wr_total_time_ns = bs->total_time_ns[BDRV_ACCT_WRITE];
+    s->stats->rd_total_time_ns = bs->total_time_ns[BDRV_ACCT_READ];
+    s->stats->flush_total_time_ns = bs->total_time_ns[BDRV_ACCT_FLUSH];
+
+    if (bs->file) {
+        s->has_parent = true;
+        s->parent = bdrv_query_stats(bs->file);
    }

    return s;
@@ -473,17 +327,15 @@ static BlockStats *bdrv_query_stats(BlockBackend *blk,
 BlockInfoList *qmp_query_block(Error **errp)
 {
    BlockInfoList *head = NULL, **p_next = &head;
-    BlockBackend *blk;
+    BlockDriverState *bs = NULL;
    Error *local_err = NULL;

-    for (blk = blk_next(NULL); blk; blk = blk_next(blk)) {
+     while ((bs = bdrv_next(bs))) {
        BlockInfoList *info = g_malloc0(sizeof(*info));
-        bdrv_query_info(blk, &info->value, &local_err);
-        if (local_err) {
+        bdrv_query_info(bs, &info->value, &local_err);
+        if (error_is_set(&local_err)) {
            error_propagate(errp, local_err);
-            g_free(info);
-            qapi_free_BlockInfoList(head);
-            return NULL;
+            goto err;
        }

        *p_next = info;
@@ -491,41 +343,20 @@ BlockInfoList *qmp_query_block(Error **errp)
    }

    return head;
+
+ err:
+    qapi_free_BlockInfoList(head);
+    return NULL;
 }

-static bool next_query_bds(BlockBackend **blk, BlockDriverState **bs,
-                           bool query_nodes)
-{
-    if (query_nodes) {
-        *bs = bdrv_next_node(*bs);
-        return !!*bs;
-    }
-
-    *blk = blk_next(*blk);
-    *bs = *blk ? blk_bs(*blk) : NULL;
-
-    return !!*blk;
-}
-
-BlockStatsList *qmp_query_blockstats(bool has_query_nodes,
-                                     bool query_nodes,
-                                     Error **errp)
+BlockStatsList *qmp_query_blockstats(Error **errp)
 {
    BlockStatsList *head = NULL, **p_next = &head;
-    BlockBackend *blk = NULL;
    BlockDriverState *bs = NULL;

-    /* Just to be safe if query_nodes is not always initialized */
-    query_nodes = has_query_nodes && query_nodes;
-
-    while (next_query_bds(&blk, &bs, query_nodes)) {
+     while ((bs = bdrv_next(bs))) {
        BlockStatsList *info = g_malloc0(sizeof(*info));
-        AioContext *ctx = blk ? blk_get_aio_context(blk)
-                              : bdrv_get_aio_context(bs);
-
-        aio_context_acquire(ctx);
-        info->value = bdrv_query_stats(blk, bs, !query_nodes);
-        aio_context_release(ctx);
+        info->value = bdrv_query_stats(bs);

        *p_next = info;
        p_next = &info->next;
@@ -538,7 +369,7 @@ BlockStatsList *qmp_query_blockstats(bool has_query_nodes,

 static char *get_human_readable_size(char *buf, int buf_size, int64_t size)
 {
-    static const char suffixes[NB_SUFFIXES] = {'K', 'M', 'G', 'T'};
+    static const char suffixes[NB_SUFFIXES] = "KMGT";
    int64_t base;
    int i;

@@ -634,9 +465,18 @@ static void dump_qobject(fprintf_function func_fprintf, void *f,
        }
        case QTYPE_QBOOL: {
            QBool *value = qobject_to_qbool(obj);
-            func_fprintf(f, "%s", qbool_get_bool(value) ? "true" : "false");
+            func_fprintf(f, "%s", qbool_get_int(value) ? "true" : "false");
            break;
        }
+        case QTYPE_QERROR: {
+            QString *value = qerror_human((QError *)obj);
+            func_fprintf(f, "%s", qstring_get_str(value));
+            QDECREF(value);
+            break;
+        }
+        case QTYPE_NONE:
+            break;
+        case QTYPE_MAX:
        default:
            abort();
    }
@@ -649,7 +489,7 @@ static void dump_qlist(fprintf_function func_fprintf, void *f, int indentation,
    int i = 0;

    for (entry = qlist_first(list); entry; entry = qlist_next(entry), i++) {
-        QType type = qobject_type(entry->value);
+        qtype_code type = qobject_type(entry->value);
        bool composite = (type == QTYPE_QDICT || type == QTYPE_QLIST);
        const char *format = composite ? "%*s[%i]:\n" : "%*s[%i]: ";

@@ -667,7 +507,7 @@ static void dump_qdict(fprintf_function func_fprintf, void *f, int indentation,
    const QDictEntry *entry;

    for (entry = qdict_first(dict); entry; entry = qdict_next(dict, entry)) {
-        QType type = qobject_type(entry->value);
+        qtype_code type = qobject_type(entry->value);
        bool composite = (type == QTYPE_QDICT || type == QTYPE_QLIST);
        const char *format = composite ? "%*s%s:\n" : "%*s%s: ";
        char key[strlen(entry->key) + 1];
@@ -690,11 +530,12 @@ static void dump_qdict(fprintf_function func_fprintf, void *f, int indentation,
 void bdrv_image_info_specific_dump(fprintf_function func_fprintf, void *f,
                                   ImageInfoSpecific *info_spec)
 {
+    Error *local_err = NULL;
    QmpOutputVisitor *ov = qmp_output_visitor_new();
    QObject *obj, *data;

-    visit_type_ImageInfoSpecific(qmp_output_get_visitor(ov), NULL, &info_spec,
-                                 &error_abort);
+    visit_type_ImageInfoSpecific(qmp_output_get_visitor(ov), &info_spec, NULL,
+                                 &local_err);
    obj = qmp_output_get_qobject(ov);
    assert(qobject_type(obj) == QTYPE_QDICT);
    data = qdict_get(qobject_to_qdict(obj), "data");
@@ -737,10 +578,7 @@ void bdrv_image_info_dump(fprintf_function func_fprintf, void *f,

    if (info->has_backing_filename) {
        func_fprintf(f, "backing file: %s", info->backing_filename);
-        if (!info->has_full_backing_filename) {
-            func_fprintf(f, " (cannot determine actual path)");
-        } else if (strcmp(info->backing_filename,
-                          info->full_backing_filename) != 0) {
+        if (info->has_full_backing_filename) {
            func_fprintf(f, " (actual path: %s)", info->full_backing_filename);
        }
        func_fprintf(f, "\n");
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -21,14 +21,11 @@
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */
-#include "qemu/osdep.h"
 #include "qemu-common.h"
 #include "block/block_int.h"
-#include "sysemu/block-backend.h"
 #include "qemu/module.h"
 #include <zlib.h>
-#include "qapi/qmp/qerror.h"
-#include "crypto/cipher.h"
+#include "qemu/aes.h"
 #include "migration/migration.h"

 /**************************************************************/
@@ -74,8 +71,10 @@ typedef struct BDRVQcowState {
    uint8_t *cluster_cache;
    uint8_t *cluster_data;
    uint64_t cluster_cache_offset;
-    QCryptoCipher *cipher; /* NULL if no key yet */
+    uint32_t crypt_method; /* current crypt method, 0 if no key yet */
    uint32_t crypt_method_header;
+    AES_KEY aes_encrypt_key;
+    AES_KEY aes_decrypt_key;
    CoMutex lock;
    Error *migration_blocker;
 } BDRVQcowState;
@@ -102,7 +101,7 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
    int ret;
    QCowHeader header;

-    ret = bdrv_pread(bs->file->bs, 0, &header, sizeof(header));
+    ret = bdrv_pread(bs->file, 0, &header, sizeof(header));
    if (ret < 0) {
        goto fail;
    }
@@ -116,16 +115,14 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
    be64_to_cpus(&header.l1_table_offset);

    if (header.magic != QCOW_MAGIC) {
-        error_setg(errp, "Image not in qcow format");
-        ret = -EINVAL;
+        ret = -EMEDIUMTYPE;
        goto fail;
    }
    if (header.version != QCOW_VERSION) {
        char version[64];
-        snprintf(version, sizeof(version), "QCOW version %" PRIu32,
-                 header.version);
-        error_setg(errp, QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
-                   bdrv_get_device_or_node_name(bs), "qcow", version);
+        snprintf(version, sizeof(version), "QCOW version %d", header.version);
+        qerror_report(QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
+            bs->device_name, "qcow", version);
        ret = -ENOTSUP;
        goto fail;
    }
@@ -150,12 +147,6 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
    }

    if (header.crypt_method > QCOW_CRYPT_AES) {
-        error_setg(errp, "invalid encryption method in qcow header");
-        ret = -EINVAL;
-        goto fail;
-    }
-    if (!qcrypto_cipher_supports(QCRYPTO_CIPHER_ALG_AES_128)) {
-        error_setg(errp, "AES cipher not available");
        ret = -EINVAL;
        goto fail;
    }
@@ -188,14 +179,9 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
    }

    s->l1_table_offset = header.l1_table_offset;
-    s->l1_table = g_try_new(uint64_t, s->l1_size);
-    if (s->l1_table == NULL) {
-        error_setg(errp, "Could not allocate memory for L1 table");
-        ret = -ENOMEM;
-        goto fail;
-    }
+    s->l1_table = g_malloc(s->l1_size * sizeof(uint64_t));

-    ret = bdrv_pread(bs->file->bs, s->l1_table_offset, s->l1_table,
+    ret = bdrv_pread(bs->file, s->l1_table_offset, s->l1_table,
               s->l1_size * sizeof(uint64_t));
    if (ret < 0) {
        goto fail;
@@ -204,16 +190,8 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
    for(i = 0;i < s->l1_size; i++) {
        be64_to_cpus(&s->l1_table[i]);
    }
-
-    /* alloc L2 cache (max. 64k * 16 * 8 = 8 MB) */
-    s->l2_cache =
-        qemu_try_blockalign(bs->file->bs,
-                            s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t));
-    if (s->l2_cache == NULL) {
-        error_setg(errp, "Could not allocate L2 table cache");
-        ret = -ENOMEM;
-        goto fail;
-    }
+    /* alloc L2 cache */
+    s->l2_cache = g_malloc(s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t));
    s->cluster_cache = g_malloc(s->cluster_size);
    s->cluster_data = g_malloc(s->cluster_size);
    s->cluster_cache_offset = -1;
@@ -221,12 +199,12 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
    /* read the backing file name */
    if (header.backing_file_offset != 0) {
        len = header.backing_file_size;
-        if (len > 1023 || len >= sizeof(bs->backing_file)) {
+        if (len > 1023) {
            error_setg(errp, "Backing file name too long");
            ret = -EINVAL;
            goto fail;
        }
-        ret = bdrv_pread(bs->file->bs, header.backing_file_offset,
+        ret = bdrv_pread(bs->file, header.backing_file_offset,
                   bs->backing_file, len);
        if (ret < 0) {
            goto fail;
@@ -235,9 +213,9 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
    }

    /* Disable migration when qcow images are used */
-    error_setg(&s->migration_blocker, "The qcow format used by node '%s' "
-               "does not support live migration",
-               bdrv_get_device_or_node_name(bs));
+    error_set(&s->migration_blocker,
+              QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
+              "qcow", bs->device_name, "live migration");
    migrate_add_blocker(s->migration_blocker);

    qemu_co_mutex_init(&s->lock);
@@ -245,7 +223,7 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,

 fail:
    g_free(s->l1_table);
-    qemu_vfree(s->l2_cache);
+    g_free(s->l2_cache);
    g_free(s->cluster_cache);
    g_free(s->cluster_data);
    return ret;
@@ -265,7 +243,6 @@ static int qcow_set_key(BlockDriverState *bs, const char *key)
    BDRVQcowState *s = bs->opaque;
    uint8_t keybuf[16];
    int len, i;
-    Error *err;

    memset(keybuf, 0, 16);
    len = strlen(key);
@@ -276,68 +253,38 @@ static int qcow_set_key(BlockDriverState *bs, const char *key)
    for(i = 0;i < len;i++) {
        keybuf[i] = key[i];
    }
-    assert(bs->encrypted);
+    s->crypt_method = s->crypt_method_header;

-    qcrypto_cipher_free(s->cipher);
-    s->cipher = qcrypto_cipher_new(
-        QCRYPTO_CIPHER_ALG_AES_128,
-        QCRYPTO_CIPHER_MODE_CBC,
-        keybuf, G_N_ELEMENTS(keybuf),
-        &err);
-
-    if (!s->cipher) {
-        /* XXX would be nice if errors in this method could
-         * be properly propagate to the caller. Would need
-         * the bdrv_set_key() API signature to be fixed. */
-        error_free(err);
+    if (AES_set_encrypt_key(keybuf, 128, &s->aes_encrypt_key) != 0)
+        return -1;
+    if (AES_set_decrypt_key(keybuf, 128, &s->aes_decrypt_key) != 0)
        return -1;
-    }
    return 0;
 }

 /* The crypt function is compatible with the linux cryptoloop
   algorithm for < 4 GB images. NOTE: out_buf == in_buf is
   supported */
-static int encrypt_sectors(BDRVQcowState *s, int64_t sector_num,
-                           uint8_t *out_buf, const uint8_t *in_buf,
-                           int nb_sectors, bool enc, Error **errp)
+static void encrypt_sectors(BDRVQcowState *s, int64_t sector_num,
+                            uint8_t *out_buf, const uint8_t *in_buf,
+                            int nb_sectors, int enc,
+                            const AES_KEY *key)
 {
    union {
        uint64_t ll[2];
        uint8_t b[16];
    } ivec;
    int i;
-    int ret;

    for(i = 0; i < nb_sectors; i++) {
        ivec.ll[0] = cpu_to_le64(sector_num);
        ivec.ll[1] = 0;
-        if (qcrypto_cipher_setiv(s->cipher,
-                                 ivec.b, G_N_ELEMENTS(ivec.b),
-                                 errp) < 0) {
-            return -1;
-        }
-        if (enc) {
-            ret = qcrypto_cipher_encrypt(s->cipher,
-                                         in_buf,
-                                         out_buf,
-                                         512,
-                                         errp);
-        } else {
-            ret = qcrypto_cipher_decrypt(s->cipher,
-                                         in_buf,
-                                         out_buf,
-                                         512,
-                                         errp);
-        }
-        if (ret < 0) {
-            return -1;
-        }
+        AES_cbc_encrypt(in_buf, out_buf, 512, key,
+                        ivec.b, enc);
        sector_num++;
        in_buf += 512;
        out_buf += 512;
    }
-    return 0;
 }

 /* 'allocate' is:
@@ -371,13 +318,13 @@ static uint64_t get_cluster_offset(BlockDriverState *bs,
        if (!allocate)
            return 0;
        /* allocate a new l2 entry */
-        l2_offset = bdrv_getlength(bs->file->bs);
+        l2_offset = bdrv_getlength(bs->file);
        /* round to cluster size */
        l2_offset = (l2_offset + s->cluster_size - 1) & ~(s->cluster_size - 1);
        /* update the L1 entry */
        s->l1_table[l1_index] = l2_offset;
        tmp = cpu_to_be64(l2_offset);
-        if (bdrv_pwrite_sync(bs->file->bs,
+        if (bdrv_pwrite_sync(bs->file,
                s->l1_table_offset + l1_index * sizeof(tmp),
                &tmp, sizeof(tmp)) < 0)
            return 0;
@@ -407,12 +354,11 @@ static uint64_t get_cluster_offset(BlockDriverState *bs,
    l2_table = s->l2_cache + (min_index << s->l2_bits);
    if (new_l2_table) {
        memset(l2_table, 0, s->l2_size * sizeof(uint64_t));
-        if (bdrv_pwrite_sync(bs->file->bs, l2_offset, l2_table,
+        if (bdrv_pwrite_sync(bs->file, l2_offset, l2_table,
                s->l2_size * sizeof(uint64_t)) < 0)
            return 0;
    } else {
-        if (bdrv_pread(bs->file->bs, l2_offset, l2_table,
-                       s->l2_size * sizeof(uint64_t)) !=
+        if (bdrv_pread(bs->file, l2_offset, l2_table, s->l2_size * sizeof(uint64_t)) !=
            s->l2_size * sizeof(uint64_t))
            return 0;
    }
@@ -433,42 +379,34 @@ static uint64_t get_cluster_offset(BlockDriverState *bs,
               overwritten */
            if (decompress_cluster(bs, cluster_offset) < 0)
                return 0;
-            cluster_offset = bdrv_getlength(bs->file->bs);
+            cluster_offset = bdrv_getlength(bs->file);
            cluster_offset = (cluster_offset + s->cluster_size - 1) &
                ~(s->cluster_size - 1);
            /* write the cluster content */
-            if (bdrv_pwrite(bs->file->bs, cluster_offset, s->cluster_cache,
-                            s->cluster_size) !=
+            if (bdrv_pwrite(bs->file, cluster_offset, s->cluster_cache, s->cluster_size) !=
                s->cluster_size)
                return -1;
        } else {
-            cluster_offset = bdrv_getlength(bs->file->bs);
+            cluster_offset = bdrv_getlength(bs->file);
            if (allocate == 1) {
                /* round to cluster size */
                cluster_offset = (cluster_offset + s->cluster_size - 1) &
                    ~(s->cluster_size - 1);
-                bdrv_truncate(bs->file->bs, cluster_offset + s->cluster_size);
+                bdrv_truncate(bs->file, cluster_offset + s->cluster_size);
                /* if encrypted, we must initialize the cluster
                   content which won't be written */
-                if (bs->encrypted &&
+                if (s->crypt_method &&
                    (n_end - n_start) < s->cluster_sectors) {
                    uint64_t start_sect;
-                    assert(s->cipher);
                    start_sect = (offset & ~(s->cluster_size - 1)) >> 9;
                    memset(s->cluster_data + 512, 0x00, 512);
                    for(i = 0; i < s->cluster_sectors; i++) {
                        if (i < n_start || i >= n_end) {
-                            Error *err = NULL;
-                            if (encrypt_sectors(s, start_sect + i,
-                                                s->cluster_data,
-                                                s->cluster_data + 512, 1,
-                                                true, &err) < 0) {
-                                error_free(err);
-                                errno = EIO;
-                                return -1;
-                            }
-                            if (bdrv_pwrite(bs->file->bs,
-                                            cluster_offset + i * 512,
+                            encrypt_sectors(s, start_sect + i,
+                                            s->cluster_data,
+                                            s->cluster_data + 512, 1, 1,
+                                            &s->aes_encrypt_key);
+                            if (bdrv_pwrite(bs->file, cluster_offset + i * 512,
                                            s->cluster_data, 512) != 512)
                                return -1;
                        }
@@ -482,7 +420,7 @@ static uint64_t get_cluster_offset(BlockDriverState *bs,
        /* update L2 table */
        tmp = cpu_to_be64(cluster_offset);
        l2_table[l2_index] = tmp;
-        if (bdrv_pwrite_sync(bs->file->bs, l2_offset + l2_index * sizeof(tmp),
+        if (bdrv_pwrite_sync(bs->file, l2_offset + l2_index * sizeof(tmp),
                &tmp, sizeof(tmp)) < 0)
            return 0;
    }
@@ -490,7 +428,7 @@ static uint64_t get_cluster_offset(BlockDriverState *bs,
 }

 static int64_t coroutine_fn qcow_co_get_block_status(BlockDriverState *bs,
-        int64_t sector_num, int nb_sectors, int *pnum, BlockDriverState **file)
+        int64_t sector_num, int nb_sectors, int *pnum)
 {
    BDRVQcowState *s = bs->opaque;
    int index_in_cluster, n;
@@ -507,11 +445,10 @@ static int64_t coroutine_fn qcow_co_get_block_status(BlockDriverState *bs,
    if (!cluster_offset) {
        return 0;
    }
-    if ((cluster_offset & QCOW_OFLAG_COMPRESSED) || s->cipher) {
+    if ((cluster_offset & QCOW_OFLAG_COMPRESSED) || s->crypt_method) {
        return BDRV_BLOCK_DATA;
    }
    cluster_offset |= (index_in_cluster << BDRV_SECTOR_BITS);
-    *file = bs->file->bs;
    return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | cluster_offset;
 }

@@ -552,7 +489,7 @@ static int decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset)
    if (s->cluster_cache_offset != coffset) {
        csize = cluster_offset >> (63 - s->cluster_bits);
        csize &= (s->cluster_size - 1);
-        ret = bdrv_pread(bs->file->bs, coffset, s->cluster_data, csize);
+        ret = bdrv_pread(bs->file, coffset, s->cluster_data, csize);
        if (ret != csize)
            return -1;
        if (decompress_buffer(s->cluster_cache, s->cluster_size,
@@ -575,13 +512,9 @@ static coroutine_fn int qcow_co_readv(BlockDriverState *bs, int64_t sector_num,
    QEMUIOVector hd_qiov;
    uint8_t *buf;
    void *orig_buf;
-    Error *err = NULL;

    if (qiov->niov > 1) {
-        buf = orig_buf = qemu_try_blockalign(bs, qiov->size);
-        if (buf == NULL) {
-            return -ENOMEM;
-        }
+        buf = orig_buf = qemu_blockalign(bs, qiov->size);
    } else {
        orig_buf = NULL;
        buf = (uint8_t *)qiov->iov->iov_base;
@@ -600,13 +533,13 @@ static coroutine_fn int qcow_co_readv(BlockDriverState *bs, int64_t sector_num,
        }

        if (!cluster_offset) {
-            if (bs->backing) {
+            if (bs->backing_hd) {
                /* read from the base image */
                hd_iov.iov_base = (void *)buf;
                hd_iov.iov_len = n * 512;
                qemu_iovec_init_external(&hd_qiov, &hd_iov, 1);
                qemu_co_mutex_unlock(&s->lock);
-                ret = bdrv_co_readv(bs->backing->bs, sector_num,
+                ret = bdrv_co_readv(bs->backing_hd, sector_num,
                                    n, &hd_qiov);
                qemu_co_mutex_lock(&s->lock);
                if (ret < 0) {
@@ -631,19 +564,17 @@ static coroutine_fn int qcow_co_readv(BlockDriverState *bs, int64_t sector_num,
            hd_iov.iov_len = n * 512;
            qemu_iovec_init_external(&hd_qiov, &hd_iov, 1);
            qemu_co_mutex_unlock(&s->lock);
-            ret = bdrv_co_readv(bs->file->bs,
+            ret = bdrv_co_readv(bs->file,
                                (cluster_offset >> 9) + index_in_cluster,
                                n, &hd_qiov);
            qemu_co_mutex_lock(&s->lock);
            if (ret < 0) {
                break;
            }
-            if (bs->encrypted) {
-                assert(s->cipher);
-                if (encrypt_sectors(s, sector_num, buf, buf,
-                                    n, false, &err) < 0) {
-                    goto fail;
-                }
+            if (s->crypt_method) {
+                encrypt_sectors(s, sector_num, buf, buf,
+                                n, 0,
+                                &s->aes_decrypt_key);
            }
        }
        ret = 0;
@@ -664,7 +595,6 @@ done:
    return ret;

 fail:
-    error_free(err);
    ret = -EIO;
    goto done;
 }
@@ -686,10 +616,7 @@ static coroutine_fn int qcow_co_writev(BlockDriverState *bs, int64_t sector_num,
    s->cluster_cache_offset = -1; /* disable compressed cache */

    if (qiov->niov > 1) {
-        buf = orig_buf = qemu_try_blockalign(bs, qiov->size);
-        if (buf == NULL) {
-            return -ENOMEM;
-        }
+        buf = orig_buf = qemu_blockalign(bs, qiov->size);
        qemu_iovec_to_buf(qiov, 0, buf, qiov->size);
    } else {
        orig_buf = NULL;
@@ -712,18 +639,12 @@ static coroutine_fn int qcow_co_writev(BlockDriverState *bs, int64_t sector_num,
            ret = -EIO;
            break;
        }
-        if (bs->encrypted) {
-            Error *err = NULL;
-            assert(s->cipher);
+        if (s->crypt_method) {
            if (!cluster_data) {
                cluster_data = g_malloc0(s->cluster_size);
            }
-            if (encrypt_sectors(s, sector_num, cluster_data, buf,
-                                n, true, &err) < 0) {
-                error_free(err);
-                ret = -EIO;
-                break;
-            }
+            encrypt_sectors(s, sector_num, cluster_data, buf,
+                            n, 1, &s->aes_encrypt_key);
            src_buf = cluster_data;
        } else {
            src_buf = buf;
@@ -733,7 +654,7 @@ static coroutine_fn int qcow_co_writev(BlockDriverState *bs, int64_t sector_num,
        hd_iov.iov_len = n * 512;
        qemu_iovec_init_external(&hd_qiov, &hd_iov, 1);
        qemu_co_mutex_unlock(&s->lock);
-        ret = bdrv_co_writev(bs->file->bs,
+        ret = bdrv_co_writev(bs->file,
                             (cluster_offset >> 9) + index_in_cluster,
                             n, &hd_qiov);
        qemu_co_mutex_lock(&s->lock);
@@ -760,10 +681,8 @@ static void qcow_close(BlockDriverState *bs)
 {
    BDRVQcowState *s = bs->opaque;

-    qcrypto_cipher_free(s->cipher);
-    s->cipher = NULL;
    g_free(s->l1_table);
-    qemu_vfree(s->l2_cache);
+    g_free(s->l2_cache);
    g_free(s->cluster_cache);
    g_free(s->cluster_data);

@@ -771,44 +690,46 @@ static void qcow_close(BlockDriverState *bs)
    error_free(s->migration_blocker);
 }

-static int qcow_create(const char *filename, QemuOpts *opts, Error **errp)
+static int qcow_create(const char *filename, QEMUOptionParameter *options,
+                       Error **errp)
 {
    int header_size, backing_filename_len, l1_size, shift, i;
    QCowHeader header;
    uint8_t *tmp;
    int64_t total_size = 0;
-    char *backing_file = NULL;
+    const char *backing_file = NULL;
    int flags = 0;
    Error *local_err = NULL;
    int ret;
-    BlockBackend *qcow_blk;
+    BlockDriverState *qcow_bs;

    /* Read out options */
-    total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
-                          BDRV_SECTOR_SIZE);
-    backing_file = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FILE);
-    if (qemu_opt_get_bool_del(opts, BLOCK_OPT_ENCRYPT, false)) {
-        flags |= BLOCK_FLAG_ENCRYPT;
+    while (options && options->name) {
+        if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
+            total_size = options->value.n / 512;
+        } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) {
+            backing_file = options->value.s;
+        } else if (!strcmp(options->name, BLOCK_OPT_ENCRYPT)) {
+            flags |= options->value.n ? BLOCK_FLAG_ENCRYPT : 0;
+        }
+        options++;
    }

-    ret = bdrv_create_file(filename, opts, &local_err);
+    ret = bdrv_create_file(filename, options, &local_err);
    if (ret < 0) {
-        error_propagate(errp, local_err);
-        goto cleanup;
+        qerror_report_err(local_err);
+        error_free(local_err);
+        return ret;
    }

-    qcow_blk = blk_new_open("image", filename, NULL, NULL,
-                            BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_PROTOCOL,
-                            &local_err);
-    if (qcow_blk == NULL) {
-        error_propagate(errp, local_err);
-        ret = -EIO;
-        goto cleanup;
+    ret = bdrv_file_open(&qcow_bs, filename, NULL, BDRV_O_RDWR, &local_err);
+    if (ret < 0) {
+        qerror_report_err(local_err);
+        error_free(local_err);
+        return ret;
    }

-    blk_set_allow_write_beyond_eof(qcow_blk, true);
-
-    ret = blk_truncate(qcow_blk, 0);
+    ret = bdrv_truncate(qcow_bs, 0);
    if (ret < 0) {
        goto exit;
    }
@@ -816,7 +737,7 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp)
    memset(&header, 0, sizeof(header));
    header.magic = cpu_to_be32(QCOW_MAGIC);
    header.version = cpu_to_be32(QCOW_VERSION);
-    header.size = cpu_to_be64(total_size);
+    header.size = cpu_to_be64(total_size * 512);
    header_size = sizeof(header);
    backing_filename_len = 0;
    if (backing_file) {
@@ -830,7 +751,7 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp)
            backing_file = NULL;
        }
        header.cluster_bits = 9; /* 512 byte cluster to avoid copying
-                                    unmodified sectors */
+                                    unmodifyed sectors */
        header.l2_bits = 12; /* 32 KB L2 tables */
    } else {
        header.cluster_bits = 12; /* 4 KB clusters */
@@ -838,7 +759,7 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp)
    }
    header_size = (header_size + 7) & ~7;
    shift = header.cluster_bits + header.l2_bits;
-    l1_size = (total_size + (1LL << shift) - 1) >> shift;
+    l1_size = ((total_size * 512) + (1LL << shift) - 1) >> shift;

    header.l1_table_offset = cpu_to_be64(header_size);
    if (flags & BLOCK_FLAG_ENCRYPT) {
@@ -848,13 +769,13 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp)
    }

    /* write all the data */
-    ret = blk_pwrite(qcow_blk, 0, &header, sizeof(header));
+    ret = bdrv_pwrite(qcow_bs, 0, &header, sizeof(header));
    if (ret != sizeof(header)) {
        goto exit;
    }

    if (backing_file) {
-        ret = blk_pwrite(qcow_blk, sizeof(header),
+        ret = bdrv_pwrite(qcow_bs, sizeof(header),
            backing_file, backing_filename_len);
        if (ret != backing_filename_len) {
            goto exit;
@@ -864,7 +785,7 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp)
    tmp = g_malloc0(BDRV_SECTOR_SIZE);
    for (i = 0; i < ((sizeof(uint64_t)*l1_size + BDRV_SECTOR_SIZE - 1)/
        BDRV_SECTOR_SIZE); i++) {
-        ret = blk_pwrite(qcow_blk, header_size +
+        ret = bdrv_pwrite(qcow_bs, header_size +
            BDRV_SECTOR_SIZE*i, tmp, BDRV_SECTOR_SIZE);
        if (ret != BDRV_SECTOR_SIZE) {
            g_free(tmp);
@@ -875,9 +796,7 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp)
    g_free(tmp);
    ret = 0;
 exit:
-    blk_unref(qcow_blk);
-cleanup:
-    g_free(backing_file);
+    bdrv_unref(qcow_bs);
    return ret;
 }

@@ -888,10 +807,10 @@ static int qcow_make_empty(BlockDriverState *bs)
    int ret;

    memset(s->l1_table, 0, l1_length);
-    if (bdrv_pwrite_sync(bs->file->bs, s->l1_table_offset, s->l1_table,
+    if (bdrv_pwrite_sync(bs->file, s->l1_table_offset, s->l1_table,
            l1_length) < 0)
        return -1;
-    ret = bdrv_truncate(bs->file->bs, s->l1_table_offset + l1_length);
+    ret = bdrv_truncate(bs->file, s->l1_table_offset + l1_length);
    if (ret < 0)
        return ret;

@@ -971,7 +890,7 @@ static int qcow_write_compressed(BlockDriverState *bs, int64_t sector_num,
        }

        cluster_offset &= s->cluster_offset_mask;
-        ret = bdrv_pwrite(bs->file->bs, cluster_offset, out_buf, out_len);
+        ret = bdrv_pwrite(bs->file, cluster_offset, out_buf, out_len);
        if (ret < 0) {
            goto fail;
        }
@@ -990,28 +909,24 @@ static int qcow_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
    return 0;
 }

-static QemuOptsList qcow_create_opts = {
-    .name = "qcow-create-opts",
-    .head = QTAILQ_HEAD_INITIALIZER(qcow_create_opts.head),
-    .desc = {
-        {
-            .name = BLOCK_OPT_SIZE,
-            .type = QEMU_OPT_SIZE,
-            .help = "Virtual disk size"
-        },
-        {
-            .name = BLOCK_OPT_BACKING_FILE,
-            .type = QEMU_OPT_STRING,
-            .help = "File name of a base image"
-        },
-        {
-            .name = BLOCK_OPT_ENCRYPT,
-            .type = QEMU_OPT_BOOL,
-            .help = "Encrypt the image",
-            .def_value_str = "off"
-        },
-        { /* end of list */ }
-    }
+
+static QEMUOptionParameter qcow_create_options[] = {
+    {
+        .name = BLOCK_OPT_SIZE,
+        .type = OPT_SIZE,
+        .help = "Virtual disk size"
+    },
+    {
+        .name = BLOCK_OPT_BACKING_FILE,
+        .type = OPT_STRING,
+        .help = "File name of a base image"
+    },
+    {
+        .name = BLOCK_OPT_ENCRYPT,
+        .type = OPT_FLAG,
+        .help = "Encrypt the image"
+    },
+    { NULL }
 };

 static BlockDriver bdrv_qcow = {
@@ -1020,10 +935,9 @@ static BlockDriver bdrv_qcow = {
    .bdrv_probe		= qcow_probe,
    .bdrv_open		= qcow_open,
    .bdrv_close		= qcow_close,
-    .bdrv_reopen_prepare    = qcow_reopen_prepare,
-    .bdrv_create            = qcow_create,
+    .bdrv_reopen_prepare = qcow_reopen_prepare,
+    .bdrv_create	= qcow_create,
    .bdrv_has_zero_init     = bdrv_has_zero_init_1,
-    .supports_backing       = true,

    .bdrv_co_readv          = qcow_co_readv,
    .bdrv_co_writev         = qcow_co_writev,
@@ -1034,7 +948,7 @@ static BlockDriver bdrv_qcow = {
    .bdrv_write_compressed  = qcow_write_compressed,
    .bdrv_get_info          = qcow_get_info,

-    .create_opts            = &qcow_create_opts,
+    .create_options = qcow_create_options,
 };

 static void bdrv_qcow_init(void)
--- a/block/qcow2-cache.c
+++ b/block/qcow2-cache.c
@@ -22,132 +22,52 @@
 * THE SOFTWARE.
 */

-/* Needed for CONFIG_MADVISE */
-#include "qemu/osdep.h"
-
-#if defined(CONFIG_MADVISE) || defined(CONFIG_POSIX_MADVISE)
-#include <sys/mman.h>
-#endif
-
 #include "block/block_int.h"
 #include "qemu-common.h"
 #include "qcow2.h"
 #include "trace.h"

 typedef struct Qcow2CachedTable {
-    int64_t  offset;
-    uint64_t lru_counter;
-    int      ref;
-    bool     dirty;
+    void*   table;
+    int64_t offset;
+    bool    dirty;
+    int     cache_hits;
+    int     ref;
 } Qcow2CachedTable;

 struct Qcow2Cache {
-    Qcow2CachedTable       *entries;
-    struct Qcow2Cache      *depends;
+    Qcow2CachedTable*       entries;
+    struct Qcow2Cache*      depends;
    int                     size;
    bool                    depends_on_flush;
-    void                   *table_array;
-    uint64_t                lru_counter;
-    uint64_t                cache_clean_lru_counter;
 };

-static inline void *qcow2_cache_get_table_addr(BlockDriverState *bs,
-                    Qcow2Cache *c, int table)
-{
-    BDRVQcow2State *s = bs->opaque;
-    return (uint8_t *) c->table_array + (size_t) table * s->cluster_size;
-}
-
-static inline int qcow2_cache_get_table_idx(BlockDriverState *bs,
-                  Qcow2Cache *c, void *table)
-{
-    BDRVQcow2State *s = bs->opaque;
-    ptrdiff_t table_offset = (uint8_t *) table - (uint8_t *) c->table_array;
-    int idx = table_offset / s->cluster_size;
-    assert(idx >= 0 && idx < c->size && table_offset % s->cluster_size == 0);
-    return idx;
-}
-
-static void qcow2_cache_table_release(BlockDriverState *bs, Qcow2Cache *c,
-                                      int i, int num_tables)
-{
-#if QEMU_MADV_DONTNEED != QEMU_MADV_INVALID
-    BDRVQcow2State *s = bs->opaque;
-    void *t = qcow2_cache_get_table_addr(bs, c, i);
-    int align = getpagesize();
-    size_t mem_size = (size_t) s->cluster_size * num_tables;
-    size_t offset = QEMU_ALIGN_UP((uintptr_t) t, align) - (uintptr_t) t;
-    size_t length = QEMU_ALIGN_DOWN(mem_size - offset, align);
-    if (length > 0) {
-        qemu_madvise((uint8_t *) t + offset, length, QEMU_MADV_DONTNEED);
-    }
-#endif
-}
-
-static inline bool can_clean_entry(Qcow2Cache *c, int i)
-{
-    Qcow2CachedTable *t = &c->entries[i];
-    return t->ref == 0 && !t->dirty && t->offset != 0 &&
-        t->lru_counter <= c->cache_clean_lru_counter;
-}
-
-void qcow2_cache_clean_unused(BlockDriverState *bs, Qcow2Cache *c)
-{
-    int i = 0;
-    while (i < c->size) {
-        int to_clean = 0;
-
-        /* Skip the entries that we don't need to clean */
-        while (i < c->size && !can_clean_entry(c, i)) {
-            i++;
-        }
-
-        /* And count how many we can clean in a row */
-        while (i < c->size && can_clean_entry(c, i)) {
-            c->entries[i].offset = 0;
-            c->entries[i].lru_counter = 0;
-            i++;
-            to_clean++;
-        }
-
-        if (to_clean > 0) {
-            qcow2_cache_table_release(bs, c, i - to_clean, to_clean);
-        }
-    }
-
-    c->cache_clean_lru_counter = c->lru_counter;
-}
-
 Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables)
 {
-    BDRVQcow2State *s = bs->opaque;
+    BDRVQcowState *s = bs->opaque;
    Qcow2Cache *c;
+    int i;

-    c = g_new0(Qcow2Cache, 1);
+    c = g_malloc0(sizeof(*c));
    c->size = num_tables;
-    c->entries = g_try_new0(Qcow2CachedTable, num_tables);
-    c->table_array = qemu_try_blockalign(bs->file->bs,
-                                         (size_t) num_tables * s->cluster_size);
+    c->entries = g_malloc0(sizeof(*c->entries) * num_tables);

-    if (!c->entries || !c->table_array) {
-        qemu_vfree(c->table_array);
-        g_free(c->entries);
-        g_free(c);
-        c = NULL;
+    for (i = 0; i < c->size; i++) {
+        c->entries[i].table = qemu_blockalign(bs, s->cluster_size);
    }

    return c;
 }

-int qcow2_cache_destroy(BlockDriverState *bs, Qcow2Cache *c)
+int qcow2_cache_destroy(BlockDriverState* bs, Qcow2Cache *c)
 {
    int i;

    for (i = 0; i < c->size; i++) {
        assert(c->entries[i].ref == 0);
+        qemu_vfree(c->entries[i].table);
    }

-    qemu_vfree(c->table_array);
    g_free(c->entries);
    g_free(c);

@@ -171,7 +91,7 @@ static int qcow2_cache_flush_dependency(BlockDriverState *bs, Qcow2Cache *c)

 static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i)
 {
-    BDRVQcow2State *s = bs->opaque;
+    BDRVQcowState *s = bs->opaque;
    int ret = 0;

    if (!c->entries[i].dirty || !c->entries[i].offset) {
@@ -184,7 +104,7 @@ static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i)
    if (c->depends) {
        ret = qcow2_cache_flush_dependency(bs, c);
    } else if (c->depends_on_flush) {
-        ret = bdrv_flush(bs->file->bs);
+        ret = bdrv_flush(bs->file);
        if (ret >= 0) {
            c->depends_on_flush = false;
        }
@@ -215,8 +135,8 @@ static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i)
        BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE);
    }

-    ret = bdrv_pwrite(bs->file->bs, c->entries[i].offset,
-                      qcow2_cache_get_table_addr(bs, c, i), s->cluster_size);
+    ret = bdrv_pwrite(bs->file, c->entries[i].offset, c->entries[i].table,
+        s->cluster_size);
    if (ret < 0) {
        return ret;
    }
@@ -228,7 +148,7 @@ static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i)

 int qcow2_cache_flush(BlockDriverState *bs, Qcow2Cache *c)
 {
-    BDRVQcow2State *s = bs->opaque;
+    BDRVQcowState *s = bs->opaque;
    int result = 0;
    int ret;
    int i;
@@ -243,7 +163,7 @@ int qcow2_cache_flush(BlockDriverState *bs, Qcow2Cache *c)
    }

    if (result == 0) {
-        ret = bdrv_flush(bs->file->bs);
+        ret = bdrv_flush(bs->file);
        if (ret < 0) {
            result = ret;
        }
@@ -292,55 +212,66 @@ int qcow2_cache_empty(BlockDriverState *bs, Qcow2Cache *c)
    for (i = 0; i < c->size; i++) {
        assert(c->entries[i].ref == 0);
        c->entries[i].offset = 0;
-        c->entries[i].lru_counter = 0;
+        c->entries[i].cache_hits = 0;
    }

-    qcow2_cache_table_release(bs, c, 0, c->size);
-
-    c->lru_counter = 0;
-
    return 0;
 }

+static int qcow2_cache_find_entry_to_replace(Qcow2Cache *c)
+{
+    int i;
+    int min_count = INT_MAX;
+    int min_index = -1;
+
+
+    for (i = 0; i < c->size; i++) {
+        if (c->entries[i].ref) {
+            continue;
+        }
+
+        if (c->entries[i].cache_hits < min_count) {
+            min_index = i;
+            min_count = c->entries[i].cache_hits;
+        }
+
+        /* Give newer hits priority */
+        /* TODO Check how to optimize the replacement strategy */
+        c->entries[i].cache_hits /= 2;
+    }
+
+    if (min_index == -1) {
+        /* This can't happen in current synchronous code, but leave the check
+         * here as a reminder for whoever starts using AIO with the cache */
+        abort();
+    }
+    return min_index;
+}
+
 static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c,
    uint64_t offset, void **table, bool read_from_disk)
 {
-    BDRVQcow2State *s = bs->opaque;
+    BDRVQcowState *s = bs->opaque;
    int i;
    int ret;
-    int lookup_index;
-    uint64_t min_lru_counter = UINT64_MAX;
-    int min_lru_index = -1;

    trace_qcow2_cache_get(qemu_coroutine_self(), c == s->l2_table_cache,
                          offset, read_from_disk);

    /* Check if the table is already cached */
-    i = lookup_index = (offset / s->cluster_size * 4) % c->size;
-    do {
-        const Qcow2CachedTable *t = &c->entries[i];
-        if (t->offset == offset) {
+    for (i = 0; i < c->size; i++) {
+        if (c->entries[i].offset == offset) {
            goto found;
        }
-        if (t->ref == 0 && t->lru_counter < min_lru_counter) {
-            min_lru_counter = t->lru_counter;
-            min_lru_index = i;
-        }
-        if (++i == c->size) {
-            i = 0;
-        }
-    } while (i != lookup_index);
-
-    if (min_lru_index == -1) {
-        /* This can't happen in current synchronous code, but leave the check
-         * here as a reminder for whoever starts using AIO with the cache */
-        abort();
    }

-    /* Cache miss: write a table back and replace it */
-    i = min_lru_index;
+    /* If not, write a table back and replace it */
+    i = qcow2_cache_find_entry_to_replace(c);
    trace_qcow2_cache_get_replace_entry(qemu_coroutine_self(),
                                        c == s->l2_table_cache, i);
+    if (i < 0) {
+        return i;
+    }

    ret = qcow2_cache_entry_flush(bs, c, i);
    if (ret < 0) {
@@ -355,20 +286,22 @@ static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c,
            BLKDBG_EVENT(bs->file, BLKDBG_L2_LOAD);
        }

-        ret = bdrv_pread(bs->file->bs, offset,
-                         qcow2_cache_get_table_addr(bs, c, i),
-                         s->cluster_size);
+        ret = bdrv_pread(bs->file, offset, c->entries[i].table, s->cluster_size);
        if (ret < 0) {
            return ret;
        }
    }

+    /* Give the table some hits for the start so that it won't be replaced
+     * immediately. The number 32 is completely arbitrary. */
+    c->entries[i].cache_hits = 32;
    c->entries[i].offset = offset;

    /* And return the right table */
 found:
+    c->entries[i].cache_hits++;
    c->entries[i].ref++;
-    *table = qcow2_cache_get_table_addr(bs, c, i);
+    *table = c->entries[i].table;

    trace_qcow2_cache_get_done(qemu_coroutine_self(),
                               c == s->l2_table_cache, i);
@@ -388,24 +321,36 @@ int qcow2_cache_get_empty(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
    return qcow2_cache_do_get(bs, c, offset, table, false);
 }

-void qcow2_cache_put(BlockDriverState *bs, Qcow2Cache *c, void **table)
+int qcow2_cache_put(BlockDriverState *bs, Qcow2Cache *c, void **table)
 {
-    int i = qcow2_cache_get_table_idx(bs, c, *table);
+    int i;

+    for (i = 0; i < c->size; i++) {
+        if (c->entries[i].table == *table) {
+            goto found;
+        }
+    }
+    return -ENOENT;
+
+found:
    c->entries[i].ref--;
    *table = NULL;

-    if (c->entries[i].ref == 0) {
-        c->entries[i].lru_counter = ++c->lru_counter;
-    }
-
    assert(c->entries[i].ref >= 0);
+    return 0;
 }

-void qcow2_cache_entry_mark_dirty(BlockDriverState *bs, Qcow2Cache *c,
-     void *table)
+void qcow2_cache_entry_mark_dirty(Qcow2Cache *c, void *table)
 {
-    int i = qcow2_cache_get_table_idx(bs, c, table);
-    assert(c->entries[i].offset != 0);
+    int i;
+
+    for (i = 0; i < c->size; i++) {
+        if (c->entries[i].table == table) {
+            goto found;
+        }
+    }
+    abort();
+
+found:
    c->entries[i].dirty = true;
 }
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
--- a/block/qcow2-snapshot.c
+++ b/block/qcow2-snapshot.c
@@ -22,15 +22,13 @@
 * THE SOFTWARE.
 */

-#include "qemu/osdep.h"
 #include "qemu-common.h"
 #include "block/block_int.h"
 #include "block/qcow2.h"
-#include "qemu/error-report.h"

 void qcow2_free_snapshots(BlockDriverState *bs)
 {
-    BDRVQcow2State *s = bs->opaque;
+    BDRVQcowState *s = bs->opaque;
    int i;

    for(i = 0; i < s->nb_snapshots; i++) {
@@ -44,7 +42,7 @@ void qcow2_free_snapshots(BlockDriverState *bs)

 int qcow2_read_snapshots(BlockDriverState *bs)
 {
-    BDRVQcow2State *s = bs->opaque;
+    BDRVQcowState *s = bs->opaque;
    QCowSnapshotHeader h;
    QCowSnapshotExtraData extra;
    QCowSnapshot *sn;
@@ -60,12 +58,12 @@ int qcow2_read_snapshots(BlockDriverState *bs)
    }

    offset = s->snapshots_offset;
-    s->snapshots = g_new0(QCowSnapshot, s->nb_snapshots);
+    s->snapshots = g_malloc0(s->nb_snapshots * sizeof(QCowSnapshot));

    for(i = 0; i < s->nb_snapshots; i++) {
        /* Read statically sized part of the snapshot header */
        offset = align_offset(offset, 8);
-        ret = bdrv_pread(bs->file->bs, offset, &h, sizeof(h));
+        ret = bdrv_pread(bs->file, offset, &h, sizeof(h));
        if (ret < 0) {
            goto fail;
        }
@@ -84,7 +82,7 @@ int qcow2_read_snapshots(BlockDriverState *bs)
        name_size = be16_to_cpu(h.name_size);

        /* Read extra data */
-        ret = bdrv_pread(bs->file->bs, offset, &extra,
+        ret = bdrv_pread(bs->file, offset, &extra,
                         MIN(sizeof(extra), extra_data_size));
        if (ret < 0) {
            goto fail;
@@ -103,7 +101,7 @@ int qcow2_read_snapshots(BlockDriverState *bs)

        /* Read snapshot ID */
        sn->id_str = g_malloc(id_str_size + 1);
-        ret = bdrv_pread(bs->file->bs, offset, sn->id_str, id_str_size);
+        ret = bdrv_pread(bs->file, offset, sn->id_str, id_str_size);
        if (ret < 0) {
            goto fail;
        }
@@ -112,20 +110,14 @@ int qcow2_read_snapshots(BlockDriverState *bs)

        /* Read snapshot name */
        sn->name = g_malloc(name_size + 1);
-        ret = bdrv_pread(bs->file->bs, offset, sn->name, name_size);
+        ret = bdrv_pread(bs->file, offset, sn->name, name_size);
        if (ret < 0) {
            goto fail;
        }
        offset += name_size;
        sn->name[name_size] = '\0';
-
-        if (offset - s->snapshots_offset > QCOW_MAX_SNAPSHOTS_SIZE) {
-            ret = -EFBIG;
-            goto fail;
-        }
    }

-    assert(offset - s->snapshots_offset <= INT_MAX);
    s->snapshots_size = offset - s->snapshots_offset;
    return 0;

@@ -137,7 +129,7 @@ fail:
 /* add at the end of the file a new list of snapshots */
 static int qcow2_write_snapshots(BlockDriverState *bs)
 {
-    BDRVQcow2State *s = bs->opaque;
+    BDRVQcowState *s = bs->opaque;
    QCowSnapshot *sn;
    QCowSnapshotHeader h;
    QCowSnapshotExtraData extra;
@@ -146,7 +138,7 @@ static int qcow2_write_snapshots(BlockDriverState *bs)
        uint32_t nb_snapshots;
        uint64_t snapshots_offset;
    } QEMU_PACKED header_data;
-    int64_t offset, snapshots_offset = 0;
+    int64_t offset, snapshots_offset;
    int ret;

    /* compute the size of the snapshots */
@@ -158,14 +150,7 @@ static int qcow2_write_snapshots(BlockDriverState *bs)
        offset += sizeof(extra);
        offset += strlen(sn->id_str);
        offset += strlen(sn->name);
-
-        if (offset > QCOW_MAX_SNAPSHOTS_SIZE) {
-            ret = -EFBIG;
-            goto fail;
-        }
    }
-
-    assert(offset <= INT_MAX);
    snapshots_size = offset;

    /* Allocate space for the new snapshot list */
@@ -215,25 +200,25 @@ static int qcow2_write_snapshots(BlockDriverState *bs)
        h.name_size = cpu_to_be16(name_size);
        offset = align_offset(offset, 8);

-        ret = bdrv_pwrite(bs->file->bs, offset, &h, sizeof(h));
+        ret = bdrv_pwrite(bs->file, offset, &h, sizeof(h));
        if (ret < 0) {
            goto fail;
        }
        offset += sizeof(h);

-        ret = bdrv_pwrite(bs->file->bs, offset, &extra, sizeof(extra));
+        ret = bdrv_pwrite(bs->file, offset, &extra, sizeof(extra));
        if (ret < 0) {
            goto fail;
        }
        offset += sizeof(extra);

-        ret = bdrv_pwrite(bs->file->bs, offset, sn->id_str, id_str_size);
+        ret = bdrv_pwrite(bs->file, offset, sn->id_str, id_str_size);
        if (ret < 0) {
            goto fail;
        }
        offset += id_str_size;

-        ret = bdrv_pwrite(bs->file->bs, offset, sn->name, name_size);
+        ret = bdrv_pwrite(bs->file, offset, sn->name, name_size);
        if (ret < 0) {
            goto fail;
        }
@@ -255,7 +240,7 @@ static int qcow2_write_snapshots(BlockDriverState *bs)
    header_data.nb_snapshots        = cpu_to_be32(s->nb_snapshots);
    header_data.snapshots_offset    = cpu_to_be64(snapshots_offset);

-    ret = bdrv_pwrite_sync(bs->file->bs, offsetof(QCowHeader, nb_snapshots),
+    ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, nb_snapshots),
                           &header_data, sizeof(header_data));
    if (ret < 0) {
        goto fail;
@@ -279,7 +264,7 @@ fail:
 static void find_new_snapshot_id(BlockDriverState *bs,
                                 char *id_str, int id_str_size)
 {
-    BDRVQcow2State *s = bs->opaque;
+    BDRVQcowState *s = bs->opaque;
    QCowSnapshot *sn;
    int i;
    unsigned long id, id_max = 0;
@@ -297,7 +282,7 @@ static int find_snapshot_by_id_and_name(BlockDriverState *bs,
                                        const char *id,
                                        const char *name)
 {
-    BDRVQcow2State *s = bs->opaque;
+    BDRVQcowState *s = bs->opaque;
    int i;

    if (id && name) {
@@ -339,7 +324,7 @@ static int find_snapshot_by_id_or_name(BlockDriverState *bs,
 /* if no id is provided, a new one is constructed */
 int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
 {
-    BDRVQcow2State *s = bs->opaque;
+    BDRVQcowState *s = bs->opaque;
    QCowSnapshot *new_snapshot_list = NULL;
    QCowSnapshot *old_snapshot_list = NULL;
    QCowSnapshot sn1, *sn = &sn1;
@@ -353,8 +338,10 @@ int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)

    memset(sn, 0, sizeof(*sn));

-    /* Generate an ID */
-    find_new_snapshot_id(bs, sn_info->id_str, sizeof(sn_info->id_str));
+    /* Generate an ID if it wasn't passed */
+    if (sn_info->id_str[0] == '\0') {
+        find_new_snapshot_id(bs, sn_info->id_str, sizeof(sn_info->id_str));
+    }

    /* Check that the ID is unique */
    if (find_snapshot_by_id_and_name(bs, sn_info->id_str, NULL) >= 0) {
@@ -381,12 +368,7 @@ int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
    sn->l1_table_offset = l1_table_offset;
    sn->l1_size = s->l1_size;

-    l1_table = g_try_new(uint64_t, s->l1_size);
-    if (s->l1_size && l1_table == NULL) {
-        ret = -ENOMEM;
-        goto fail;
-    }
-
+    l1_table = g_malloc(s->l1_size * sizeof(uint64_t));
    for(i = 0; i < s->l1_size; i++) {
        l1_table[i] = cpu_to_be64(s->l1_table[i]);
    }
@@ -397,7 +379,7 @@ int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
        goto fail;
    }

-    ret = bdrv_pwrite(bs->file->bs, sn->l1_table_offset, l1_table,
+    ret = bdrv_pwrite(bs->file, sn->l1_table_offset, l1_table,
                      s->l1_size * sizeof(uint64_t));
    if (ret < 0) {
        goto fail;
@@ -417,7 +399,7 @@ int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
    }

    /* Append the new snapshot to the snapshot list */
-    new_snapshot_list = g_new(QCowSnapshot, s->nb_snapshots + 1);
+    new_snapshot_list = g_malloc((s->nb_snapshots + 1) * sizeof(QCowSnapshot));
    if (s->snapshots) {
        memcpy(new_snapshot_list, s->snapshots,
               s->nb_snapshots * sizeof(QCowSnapshot));
@@ -441,7 +423,7 @@ int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
    qcow2_discard_clusters(bs, qcow2_vm_state_offset(s),
                           align_offset(sn->vm_state_size, s->cluster_size)
                                >> BDRV_SECTOR_BITS,
-                           QCOW2_DISCARD_NEVER, false);
+                           QCOW2_DISCARD_NEVER);

 #ifdef DEBUG_ALLOC
    {
@@ -462,7 +444,7 @@ fail:
 /* copy the snapshot 'snapshot_name' into the current disk image */
 int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
 {
-    BDRVQcow2State *s = bs->opaque;
+    BDRVQcowState *s = bs->opaque;
    QCowSnapshot *sn;
    int i, snapshot_index;
    int cur_l1_bytes, sn_l1_bytes;
@@ -504,14 +486,9 @@ int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
     * Decrease the refcount referenced by the old one only when the L1
     * table is overwritten.
     */
-    sn_l1_table = g_try_malloc0(cur_l1_bytes);
-    if (cur_l1_bytes && sn_l1_table == NULL) {
-        ret = -ENOMEM;
-        goto fail;
-    }
+    sn_l1_table = g_malloc0(cur_l1_bytes);

-    ret = bdrv_pread(bs->file->bs, sn->l1_table_offset,
-                     sn_l1_table, sn_l1_bytes);
+    ret = bdrv_pread(bs->file, sn->l1_table_offset, sn_l1_table, sn_l1_bytes);
    if (ret < 0) {
        goto fail;
    }
@@ -528,7 +505,7 @@ int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
        goto fail;
    }

-    ret = bdrv_pwrite_sync(bs->file->bs, s->l1_table_offset, sn_l1_table,
+    ret = bdrv_pwrite_sync(bs->file, s->l1_table_offset, sn_l1_table,
                           cur_l1_bytes);
    if (ret < 0) {
        goto fail;
@@ -589,7 +566,7 @@ int qcow2_snapshot_delete(BlockDriverState *bs,
                          const char *name,
                          Error **errp)
 {
-    BDRVQcow2State *s = bs->opaque;
+    BDRVQcowState *s = bs->opaque;
    QCowSnapshot sn;
    int snapshot_index, ret;

@@ -608,8 +585,7 @@ int qcow2_snapshot_delete(BlockDriverState *bs,
    s->nb_snapshots--;
    ret = qcow2_write_snapshots(bs);
    if (ret < 0) {
-        error_setg_errno(errp, -ret,
-                         "Failed to remove snapshot from snapshot list");
+        error_setg(errp, "Failed to remove snapshot from snapshot list");
        return ret;
    }

@@ -627,7 +603,7 @@ int qcow2_snapshot_delete(BlockDriverState *bs,
    ret = qcow2_update_snapshot_refcount(bs, sn.l1_table_offset,
                                         sn.l1_size, -1);
    if (ret < 0) {
-        error_setg_errno(errp, -ret, "Failed to free the cluster and L1 table");
+        error_setg(errp, "Failed to free the cluster and L1 table");
        return ret;
    }
    qcow2_free_clusters(bs, sn.l1_table_offset, sn.l1_size * sizeof(uint64_t),
@@ -636,8 +612,7 @@ int qcow2_snapshot_delete(BlockDriverState *bs,
    /* must update the copied flag on the current cluster offsets */
    ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 0);
    if (ret < 0) {
-        error_setg_errno(errp, -ret,
-                         "Failed to update snapshot status in disk");
+        error_setg(errp, "Failed to update snapshot status in disk");
        return ret;
    }

@@ -652,7 +627,7 @@ int qcow2_snapshot_delete(BlockDriverState *bs,

 int qcow2_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
 {
-    BDRVQcow2State *s = bs->opaque;
+    BDRVQcowState *s = bs->opaque;
    QEMUSnapshotInfo *sn_tab, *sn_info;
    QCowSnapshot *sn;
    int i;
@@ -662,7 +637,7 @@ int qcow2_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
        return s->nb_snapshots;
    }

-    sn_tab = g_new0(QEMUSnapshotInfo, s->nb_snapshots);
+    sn_tab = g_malloc0(s->nb_snapshots * sizeof(QEMUSnapshotInfo));
    for(i = 0; i < s->nb_snapshots; i++) {
        sn_info = sn_tab + i;
        sn = s->snapshots + i;
@@ -679,13 +654,10 @@ int qcow2_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
    return s->nb_snapshots;
 }

-int qcow2_snapshot_load_tmp(BlockDriverState *bs,
-                            const char *snapshot_id,
-                            const char *name,
-                            Error **errp)
+int qcow2_snapshot_load_tmp(BlockDriverState *bs, const char *snapshot_name)
 {
    int i, snapshot_index;
-    BDRVQcow2State *s = bs->opaque;
+    BDRVQcowState *s = bs->opaque;
    QCowSnapshot *sn;
    uint64_t *new_l1_table;
    int new_l1_bytes;
@@ -694,36 +666,28 @@ int qcow2_snapshot_load_tmp(BlockDriverState *bs,
    assert(bs->read_only);

    /* Search the snapshot */
-    snapshot_index = find_snapshot_by_id_and_name(bs, snapshot_id, name);
+    snapshot_index = find_snapshot_by_id_or_name(bs, snapshot_name);
    if (snapshot_index < 0) {
-        error_setg(errp,
-                   "Can't find snapshot");
        return -ENOENT;
    }
    sn = &s->snapshots[snapshot_index];

    /* Allocate and read in the snapshot's L1 table */
-    if (sn->l1_size > QCOW_MAX_L1_SIZE / sizeof(uint64_t)) {
-        error_setg(errp, "Snapshot L1 table too large");
+    if (sn->l1_size > QCOW_MAX_L1_SIZE) {
+        error_report("Snapshot L1 table too large");
        return -EFBIG;
    }
    new_l1_bytes = sn->l1_size * sizeof(uint64_t);
-    new_l1_table = qemu_try_blockalign(bs->file->bs,
-                                       align_offset(new_l1_bytes, 512));
-    if (new_l1_table == NULL) {
-        return -ENOMEM;
-    }
+    new_l1_table = g_malloc0(align_offset(new_l1_bytes, 512));

-    ret = bdrv_pread(bs->file->bs, sn->l1_table_offset,
-                     new_l1_table, new_l1_bytes);
+    ret = bdrv_pread(bs->file, sn->l1_table_offset, new_l1_table, new_l1_bytes);
    if (ret < 0) {
-        error_setg(errp, "Failed to read l1 table for snapshot");
-        qemu_vfree(new_l1_table);
+        g_free(new_l1_table);
        return ret;
    }

    /* Switch the L1 table */
-    qemu_vfree(s->l1_table);
+    g_free(s->l1_table);

    s->l1_size = sn->l1_size;
    s->l1_table_offset = sn->l1_table_offset;
--- a/block/qcow2.c
+++ b/block/qcow2.c
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -25,8 +25,8 @@
 #ifndef BLOCK_QCOW2_H
 #define BLOCK_QCOW2_H

-#include "crypto/cipher.h"
-#include "qemu/coroutine.h"
+#include "qemu/aes.h"
+#include "block/coroutine.h"

 //#define DEBUG_ALLOC
 //#define DEBUG_ALLOC2
@@ -48,10 +48,6 @@
 * (128 GB for 512 byte clusters, 2 EB for 2 MB clusters) */
 #define QCOW_MAX_L1_SIZE 0x2000000

-/* Allow for an average of 1k per snapshot table entry, should be plenty of
- * space for snapshot names and IDs */
-#define QCOW_MAX_SNAPSHOTS_SIZE (1024 * QCOW_MAX_SNAPSHOTS)
-
 /* indicate that the refcount of the referenced cluster is exactly one. */
 #define QCOW_OFLAG_COPIED     (1ULL << 63)
 /* indicate that the cluster is compressed (they never have the copied flag) */
@@ -59,22 +55,15 @@
 /* The cluster reads as all zeros */
 #define QCOW_OFLAG_ZERO (1ULL << 0)

+#define REFCOUNT_SHIFT 1 /* refcount size is 2 bytes */
+
 #define MIN_CLUSTER_BITS 9
 #define MAX_CLUSTER_BITS 21

-/* Must be at least 2 to cover COW */
-#define MIN_L2_CACHE_SIZE 2 /* clusters */
+#define L2_CACHE_SIZE 16

 /* Must be at least 4 to cover all cases of refcount table growth */
-#define MIN_REFCOUNT_CACHE_SIZE 4 /* clusters */
-
-/* Whichever is more */
-#define DEFAULT_L2_CACHE_CLUSTERS 8 /* clusters */
-#define DEFAULT_L2_CACHE_BYTE_SIZE 1048576 /* bytes */
-
-/* The refblock cache needs only a fourth of the L2 cache size to cover as many
- * clusters */
-#define DEFAULT_L2_REFCOUNT_SIZE_RATIO 4
+#define REFCOUNT_CACHE_SIZE 4

 #define DEFAULT_CLUSTER_SIZE 65536

@@ -84,7 +73,6 @@
 #define QCOW2_OPT_DISCARD_SNAPSHOT "pass-discard-snapshot"
 #define QCOW2_OPT_DISCARD_OTHER "pass-discard-other"
 #define QCOW2_OPT_OVERLAP "overlap-check"
-#define QCOW2_OPT_OVERLAP_TEMPLATE "overlap-check.template"
 #define QCOW2_OPT_OVERLAP_MAIN_HEADER "overlap-check.main-header"
 #define QCOW2_OPT_OVERLAP_ACTIVE_L1 "overlap-check.active-l1"
 #define QCOW2_OPT_OVERLAP_ACTIVE_L2 "overlap-check.active-l2"
@@ -93,10 +81,6 @@
 #define QCOW2_OPT_OVERLAP_SNAPSHOT_TABLE "overlap-check.snapshot-table"
 #define QCOW2_OPT_OVERLAP_INACTIVE_L1 "overlap-check.inactive-l1"
 #define QCOW2_OPT_OVERLAP_INACTIVE_L2 "overlap-check.inactive-l2"
-#define QCOW2_OPT_CACHE_SIZE "cache-size"
-#define QCOW2_OPT_L2_CACHE_SIZE "l2-cache-size"
-#define QCOW2_OPT_REFCOUNT_CACHE_SIZE "refcount-cache-size"
-#define QCOW2_OPT_CACHE_CLEAN_INTERVAL "cache-clean-interval"

 typedef struct QCowHeader {
    uint32_t magic;
@@ -217,12 +201,7 @@ typedef struct Qcow2DiscardRegion {
    QTAILQ_ENTRY(Qcow2DiscardRegion) next;
 } Qcow2DiscardRegion;

-typedef uint64_t Qcow2GetRefcountFunc(const void *refcount_array,
-                                      uint64_t index);
-typedef void Qcow2SetRefcountFunc(void *refcount_array,
-                                  uint64_t index, uint64_t value);
-
-typedef struct BDRVQcow2State {
+typedef struct BDRVQcowState {
    int cluster_bits;
    int cluster_size;
    int cluster_sectors;
@@ -230,8 +209,6 @@ typedef struct BDRVQcow2State {
    int l2_size;
    int l1_size;
    int l1_vm_state_index;
-    int refcount_block_bits;
-    int refcount_block_size;
    int csize_shift;
    int csize_mask;
    uint64_t cluster_offset_mask;
@@ -240,8 +217,6 @@ typedef struct BDRVQcow2State {

    Qcow2Cache* l2_table_cache;
    Qcow2Cache* refcount_block_cache;
-    QEMUTimer *cache_clean_timer;
-    unsigned cache_clean_interval;

    uint8_t *cluster_cache;
    uint8_t *cluster_data;
@@ -256,8 +231,10 @@ typedef struct BDRVQcow2State {

    CoMutex lock;

-    QCryptoCipher *cipher; /* current cipher, NULL if no key yet */
+    uint32_t crypt_method; /* current crypt method, 0 if no key yet */
    uint32_t crypt_method_header;
+    AES_KEY aes_encrypt_key;
+    AES_KEY aes_decrypt_key;
    uint64_t snapshots_offset;
    int snapshots_size;
    unsigned int nb_snapshots;
@@ -267,16 +244,10 @@ typedef struct BDRVQcow2State {
    int qcow_version;
    bool use_lazy_refcounts;
    int refcount_order;
-    int refcount_bits;
-    uint64_t refcount_max;
-
-    Qcow2GetRefcountFunc *get_refcount;
-    Qcow2SetRefcountFunc *set_refcount;

    bool discard_passthrough[QCOW2_DISCARD_MAX];

    int overlap_check; /* bitmask of Qcow2MetadataOverlap values */
-    bool signaled_corruption;

    uint64_t incompatible_features;
    uint64_t compatible_features;
@@ -287,13 +258,20 @@ typedef struct BDRVQcow2State {
    QLIST_HEAD(, Qcow2UnknownHeaderExtension) unknown_header_ext;
    QTAILQ_HEAD (, Qcow2DiscardRegion) discards;
    bool cache_discards;
+} BDRVQcowState;

-    /* Backing file path and format as stored in the image (this is not the
-     * effective path/format, which may be the result of a runtime option
-     * override) */
-    char *image_backing_file;
-    char *image_backing_format;
-} BDRVQcow2State;
+/* XXX: use std qcow open function ? */
+typedef struct QCowCreateState {
+    int cluster_size;
+    int cluster_bits;
+    uint16_t *refcount_block;
+    uint64_t *refcount_table;
+    int64_t l1_table_offset;
+    int64_t refcount_table_offset;
+    int64_t refcount_block_offset;
+} QCowCreateState;
+
+struct QCowAIOCB;

 typedef struct Qcow2COWRegion {
    /**
@@ -397,34 +375,34 @@ typedef enum QCow2MetadataOverlap {
 #define QCOW2_OL_ALL \
    (QCOW2_OL_CACHED | QCOW2_OL_INACTIVE_L2)

-#define L1E_OFFSET_MASK 0x00fffffffffffe00ULL
-#define L2E_OFFSET_MASK 0x00fffffffffffe00ULL
+#define L1E_OFFSET_MASK 0x00ffffffffffff00ULL
+#define L2E_OFFSET_MASK 0x00ffffffffffff00ULL
 #define L2E_COMPRESSED_OFFSET_SIZE_MASK 0x3fffffffffffffffULL

-#define REFT_OFFSET_MASK 0xfffffffffffffe00ULL
+#define REFT_OFFSET_MASK 0xffffffffffffff00ULL

-static inline int64_t start_of_cluster(BDRVQcow2State *s, int64_t offset)
+static inline int64_t start_of_cluster(BDRVQcowState *s, int64_t offset)
 {
    return offset & ~(s->cluster_size - 1);
 }

-static inline int64_t offset_into_cluster(BDRVQcow2State *s, int64_t offset)
+static inline int64_t offset_into_cluster(BDRVQcowState *s, int64_t offset)
 {
    return offset & (s->cluster_size - 1);
 }

-static inline uint64_t size_to_clusters(BDRVQcow2State *s, uint64_t size)
+static inline int size_to_clusters(BDRVQcowState *s, int64_t size)
 {
    return (size + (s->cluster_size - 1)) >> s->cluster_bits;
 }

-static inline int64_t size_to_l1(BDRVQcow2State *s, int64_t size)
+static inline int64_t size_to_l1(BDRVQcowState *s, int64_t size)
 {
    int shift = s->cluster_bits + s->l2_bits;
    return (size + (1ULL << shift) - 1) >> shift;
 }

-static inline int offset_to_l2_index(BDRVQcow2State *s, int64_t offset)
+static inline int offset_to_l2_index(BDRVQcowState *s, int64_t offset)
 {
    return (offset >> s->cluster_bits) & (s->l2_size - 1);
 }
@@ -435,12 +413,12 @@ static inline int64_t align_offset(int64_t offset, int n)
    return offset;
 }

-static inline int64_t qcow2_vm_state_offset(BDRVQcow2State *s)
+static inline int64_t qcow2_vm_state_offset(BDRVQcowState *s)
 {
    return (int64_t)s->l1_vm_state_index << (s->cluster_bits + s->l2_bits);
 }

-static inline uint64_t qcow2_max_refcount_clusters(BDRVQcow2State *s)
+static inline uint64_t qcow2_max_refcount_clusters(BDRVQcowState *s)
 {
    return QCOW_MAX_REFTABLE_SIZE >> s->cluster_bits;
 }
@@ -459,7 +437,7 @@ static inline int qcow2_get_cluster_type(uint64_t l2_entry)
 }

 /* Check whether refcounts are eager or lazy */
-static inline bool qcow2_need_accurate_refcounts(BDRVQcow2State *s)
+static inline bool qcow2_need_accurate_refcounts(BDRVQcowState *s)
 {
    return !(s->incompatible_features & QCOW2_INCOMPAT_DIRTY);
 }
@@ -475,11 +453,6 @@ static inline uint64_t l2meta_cow_end(QCowL2Meta *m)
        + (m->cow_end.nb_sectors << BDRV_SECTOR_BITS);
 }

-static inline uint64_t refcount_diff(uint64_t r1, uint64_t r2)
-{
-    return r1 > r2 ? r1 - r2 : r2 - r1;
-}
-
 // FIXME Need qcow2_ prefix to global functions

 /* qcow2.c functions */
@@ -491,24 +464,16 @@ int qcow2_mark_corrupt(BlockDriverState *bs);
 int qcow2_mark_consistent(BlockDriverState *bs);
 int qcow2_update_header(BlockDriverState *bs);

-void qcow2_signal_corruption(BlockDriverState *bs, bool fatal, int64_t offset,
-                             int64_t size, const char *message_format, ...)
-                             GCC_FMT_ATTR(5, 6);
-
 /* qcow2-refcount.c functions */
 int qcow2_refcount_init(BlockDriverState *bs);
 void qcow2_refcount_close(BlockDriverState *bs);

-int qcow2_get_refcount(BlockDriverState *bs, int64_t cluster_index,
-                       uint64_t *refcount);
-
 int qcow2_update_cluster_refcount(BlockDriverState *bs, int64_t cluster_index,
-                                  uint64_t addend, bool decrease,
-                                  enum qcow2_discard_type type);
+                                  int addend, enum qcow2_discard_type type);

 int64_t qcow2_alloc_clusters(BlockDriverState *bs, uint64_t size);
-int64_t qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset,
-                                int64_t nb_clusters);
+int qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset,
+    int nb_clusters);
 int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size);
 void qcow2_free_clusters(BlockDriverState *bs,
                          int64_t offset, int64_t size,
@@ -529,36 +494,31 @@ int qcow2_check_metadata_overlap(BlockDriverState *bs, int ign, int64_t offset,
 int qcow2_pre_write_overlap_check(BlockDriverState *bs, int ign, int64_t offset,
                                  int64_t size);

-int qcow2_change_refcount_order(BlockDriverState *bs, int refcount_order,
-                                BlockDriverAmendStatusCB *status_cb,
-                                void *cb_opaque, Error **errp);
-
 /* qcow2-cluster.c functions */
 int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
                        bool exact_size);
 int qcow2_write_l1_entry(BlockDriverState *bs, int l1_index);
 void qcow2_l2_cache_reset(BlockDriverState *bs);
 int qcow2_decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset);
-int qcow2_encrypt_sectors(BDRVQcow2State *s, int64_t sector_num,
-                          uint8_t *out_buf, const uint8_t *in_buf,
-                          int nb_sectors, bool enc, Error **errp);
+void qcow2_encrypt_sectors(BDRVQcowState *s, int64_t sector_num,
+                     uint8_t *out_buf, const uint8_t *in_buf,
+                     int nb_sectors, int enc,
+                     const AES_KEY *key);

 int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
    int *num, uint64_t *cluster_offset);
 int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset,
-    int *num, uint64_t *host_offset, QCowL2Meta **m);
+    int n_start, int n_end, int *num, uint64_t *host_offset, QCowL2Meta **m);
 uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
                                         uint64_t offset,
                                         int compressed_size);

 int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m);
 int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset,
-    int nb_sectors, enum qcow2_discard_type type, bool full_discard);
+    int nb_sectors, enum qcow2_discard_type type);
 int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors);

-int qcow2_expand_zero_clusters(BlockDriverState *bs,
-                               BlockDriverAmendStatusCB *status_cb,
-                               void *cb_opaque);
+int qcow2_expand_zero_clusters(BlockDriverState *bs);

 /* qcow2-snapshot.c functions */
 int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info);
@@ -568,10 +528,7 @@ int qcow2_snapshot_delete(BlockDriverState *bs,
                          const char *name,
                          Error **errp);
 int qcow2_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab);
-int qcow2_snapshot_load_tmp(BlockDriverState *bs,
-                            const char *snapshot_id,
-                            const char *name,
-                            Error **errp);
+int qcow2_snapshot_load_tmp(BlockDriverState *bs, const char *snapshot_name);

 void qcow2_free_snapshots(BlockDriverState *bs);
 int qcow2_read_snapshots(BlockDriverState *bs);
@@ -580,20 +537,18 @@ int qcow2_read_snapshots(BlockDriverState *bs);
 Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables);
 int qcow2_cache_destroy(BlockDriverState* bs, Qcow2Cache *c);

-void qcow2_cache_entry_mark_dirty(BlockDriverState *bs, Qcow2Cache *c,
-     void *table);
+void qcow2_cache_entry_mark_dirty(Qcow2Cache *c, void *table);
 int qcow2_cache_flush(BlockDriverState *bs, Qcow2Cache *c);
 int qcow2_cache_set_dependency(BlockDriverState *bs, Qcow2Cache *c,
    Qcow2Cache *dependency);
 void qcow2_cache_depends_on_flush(Qcow2Cache *c);

-void qcow2_cache_clean_unused(BlockDriverState *bs, Qcow2Cache *c);
 int qcow2_cache_empty(BlockDriverState *bs, Qcow2Cache *c);

 int qcow2_cache_get(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
    void **table);
 int qcow2_cache_get_empty(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
    void **table);
-void qcow2_cache_put(BlockDriverState *bs, Qcow2Cache *c, void **table);
+int qcow2_cache_put(BlockDriverState *bs, Qcow2Cache *c, void **table);

 #endif
--- a/block/qed-check.c
+++ b/block/qed-check.c
@@ -11,7 +11,6 @@
 *
 */

-#include "qemu/osdep.h"
 #include "qed.h"

 typedef struct {
@@ -228,10 +227,8 @@ int qed_check(BDRVQEDState *s, BdrvCheckResult *result, bool fix)
    };
    int ret;

-    check.used_clusters = g_try_new0(uint32_t, (check.nclusters + 31) / 32);
-    if (check.nclusters && check.used_clusters == NULL) {
-        return -ENOMEM;
-    }
+    check.used_clusters = g_malloc0(((check.nclusters + 31) / 32) *
+                                       sizeof(check.used_clusters[0]));

    check.result->bfi.total_clusters =
        (s->header.image_size + s->header.cluster_size - 1) /
--- a/block/qed-cluster.c
+++ b/block/qed-cluster.c
@@ -12,7 +12,6 @@
 *
 */

-#include "qemu/osdep.h"
 #include "qed.h"

 /**
--- a/block/qed-gencb.c
+++ b/block/qed-gencb.c
@@ -11,10 +11,9 @@
 *
 */

-#include "qemu/osdep.h"
 #include "qed.h"

-void *gencb_alloc(size_t len, BlockCompletionFunc *cb, void *opaque)
+void *gencb_alloc(size_t len, BlockDriverCompletionFunc *cb, void *opaque)
 {
    GenericCB *gencb = g_malloc(len);
    gencb->cb = cb;
@@ -25,7 +24,7 @@ void *gencb_alloc(size_t len, BlockCompletionFunc *cb, void *opaque)
 void gencb_complete(void *opaque, int ret)
 {
    GenericCB *gencb = opaque;
-    BlockCompletionFunc *cb = gencb->cb;
+    BlockDriverCompletionFunc *cb = gencb->cb;
    void *user_opaque = gencb->opaque;

    g_free(gencb);
--- a/block/qed-l2-cache.c
+++ b/block/qed-l2-cache.c
@@ -50,7 +50,6 @@
 * table will be deleted in favor of the existing cache entry.
 */

-#include "qemu/osdep.h"
 #include "trace.h"
 #include "qed.h"

--- a/block/qed-table.c
+++ b/block/qed-table.c
@@ -12,7 +12,6 @@
 *
 */

-#include "qemu/osdep.h"
 #include "trace.h"
 #include "qemu/sockets.h" /* for EINPROGRESS on Windows */
 #include "qed.h"
@@ -50,7 +49,7 @@ out:
 }

 static void qed_read_table(BDRVQEDState *s, uint64_t offset, QEDTable *table,
-                           BlockCompletionFunc *cb, void *opaque)
+                           BlockDriverCompletionFunc *cb, void *opaque)
 {
    QEDReadTableCB *read_table_cb = gencb_alloc(sizeof(*read_table_cb),
                                                cb, opaque);
@@ -64,7 +63,7 @@ static void qed_read_table(BDRVQEDState *s, uint64_t offset, QEDTable *table,
    read_table_cb->iov.iov_len = s->header.cluster_size * s->header.table_size,

    qemu_iovec_init_external(qiov, &read_table_cb->iov, 1);
-    bdrv_aio_readv(s->bs->file->bs, offset / BDRV_SECTOR_SIZE, qiov,
+    bdrv_aio_readv(s->bs->file, offset / BDRV_SECTOR_SIZE, qiov,
                   qiov->size / BDRV_SECTOR_SIZE,
                   qed_read_table_cb, read_table_cb);
 }
@@ -120,7 +119,7 @@ out:
 */
 static void qed_write_table(BDRVQEDState *s, uint64_t offset, QEDTable *table,
                            unsigned int index, unsigned int n, bool flush,
-                            BlockCompletionFunc *cb, void *opaque)
+                            BlockDriverCompletionFunc *cb, void *opaque)
 {
    QEDWriteTableCB *write_table_cb;
    unsigned int sector_mask = BDRV_SECTOR_SIZE / sizeof(uint64_t) - 1;
@@ -153,7 +152,7 @@ static void qed_write_table(BDRVQEDState *s, uint64_t offset, QEDTable *table,
    /* Adjust for offset into table */
    offset += start * sizeof(uint64_t);

-    bdrv_aio_writev(s->bs->file->bs, offset / BDRV_SECTOR_SIZE,
+    bdrv_aio_writev(s->bs->file, offset / BDRV_SECTOR_SIZE,
                    &write_table_cb->qiov,
                    write_table_cb->qiov.size / BDRV_SECTOR_SIZE,
                    qed_write_table_cb, write_table_cb);
@@ -174,14 +173,14 @@ int qed_read_l1_table_sync(BDRVQEDState *s)
    qed_read_table(s, s->header.l1_table_offset,
                   s->l1_table, qed_sync_cb, &ret);
    while (ret == -EINPROGRESS) {
-        aio_poll(bdrv_get_aio_context(s->bs), true);
+        qemu_aio_wait();
    }

    return ret;
 }

 void qed_write_l1_table(BDRVQEDState *s, unsigned int index, unsigned int n,
-                        BlockCompletionFunc *cb, void *opaque)
+                        BlockDriverCompletionFunc *cb, void *opaque)
 {
    BLKDBG_EVENT(s->bs->file, BLKDBG_L1_UPDATE);
    qed_write_table(s, s->header.l1_table_offset,
@@ -195,7 +194,7 @@ int qed_write_l1_table_sync(BDRVQEDState *s, unsigned int index,

    qed_write_l1_table(s, index, n, qed_sync_cb, &ret);
    while (ret == -EINPROGRESS) {
-        aio_poll(bdrv_get_aio_context(s->bs), true);
+        qemu_aio_wait();
    }

    return ret;
@@ -236,7 +235,7 @@ static void qed_read_l2_table_cb(void *opaque, int ret)
 }

 void qed_read_l2_table(BDRVQEDState *s, QEDRequest *request, uint64_t offset,
-                       BlockCompletionFunc *cb, void *opaque)
+                       BlockDriverCompletionFunc *cb, void *opaque)
 {
    QEDReadL2TableCB *read_l2_table_cb;

@@ -268,7 +267,7 @@ int qed_read_l2_table_sync(BDRVQEDState *s, QEDRequest *request, uint64_t offset

    qed_read_l2_table(s, request, offset, qed_sync_cb, &ret);
    while (ret == -EINPROGRESS) {
-        aio_poll(bdrv_get_aio_context(s->bs), true);
+        qemu_aio_wait();
    }

    return ret;
@@ -276,7 +275,7 @@ int qed_read_l2_table_sync(BDRVQEDState *s, QEDRequest *request, uint64_t offset

 void qed_write_l2_table(BDRVQEDState *s, QEDRequest *request,
                        unsigned int index, unsigned int n, bool flush,
-                        BlockCompletionFunc *cb, void *opaque)
+                        BlockDriverCompletionFunc *cb, void *opaque)
 {
    BLKDBG_EVENT(s->bs->file, BLKDBG_L2_UPDATE);
    qed_write_table(s, request->l2_table->offset,
@@ -290,7 +289,7 @@ int qed_write_l2_table_sync(BDRVQEDState *s, QEDRequest *request,

    qed_write_l2_table(s, request, index, n, flush, qed_sync_cb, &ret);
    while (ret == -EINPROGRESS) {
-        aio_poll(bdrv_get_aio_context(s->bs), true);
+        qemu_aio_wait();
    }

    return ret;
--- a/block/qed.c
+++ b/block/qed.c
@@ -12,16 +12,27 @@
 *
 */

-#include "qemu/osdep.h"
 #include "qemu/timer.h"
 #include "trace.h"
 #include "qed.h"
 #include "qapi/qmp/qerror.h"
 #include "migration/migration.h"
-#include "sysemu/block-backend.h"
+
+static void qed_aio_cancel(BlockDriverAIOCB *blockacb)
+{
+    QEDAIOCB *acb = (QEDAIOCB *)blockacb;
+    bool finished = false;
+
+    /* Wait for the request to finish */
+    acb->finished = &finished;
+    while (!finished) {
+        qemu_aio_wait();
+    }
+}

 static const AIOCBInfo qed_aiocb_info = {
    .aiocb_size         = sizeof(QEDAIOCB),
+    .cancel             = qed_aio_cancel,
 };

 static int bdrv_qed_probe(const uint8_t *buf, int buf_size,
@@ -84,7 +95,7 @@ int qed_write_header_sync(BDRVQEDState *s)
    int ret;

    qed_header_cpu_to_le(&s->header, &le);
-    ret = bdrv_pwrite(s->bs->file->bs, 0, &le, sizeof(le));
+    ret = bdrv_pwrite(s->bs->file, 0, &le, sizeof(le));
    if (ret != sizeof(le)) {
        return ret;
    }
@@ -121,7 +132,7 @@ static void qed_write_header_read_cb(void *opaque, int ret)
    /* Update header */
    qed_header_cpu_to_le(&s->header, (QEDHeader *)write_header_cb->buf);

-    bdrv_aio_writev(s->bs->file->bs, 0, &write_header_cb->qiov,
+    bdrv_aio_writev(s->bs->file, 0, &write_header_cb->qiov,
                    write_header_cb->nsectors, qed_write_header_cb,
                    write_header_cb);
 }
@@ -132,7 +143,7 @@ static void qed_write_header_read_cb(void *opaque, int ret)
 * This function only updates known header fields in-place and does not affect
 * extra data after the QED header.
 */
-static void qed_write_header(BDRVQEDState *s, BlockCompletionFunc cb,
+static void qed_write_header(BDRVQEDState *s, BlockDriverCompletionFunc cb,
                             void *opaque)
 {
    /* We must write full sectors for O_DIRECT but cannot necessarily generate
@@ -154,7 +165,7 @@ static void qed_write_header(BDRVQEDState *s, BlockCompletionFunc cb,
    write_header_cb->iov.iov_len = len;
    qemu_iovec_init_external(&write_header_cb->qiov, &write_header_cb->iov, 1);

-    bdrv_aio_readv(s->bs->file->bs, 0, &write_header_cb->qiov, nsectors,
+    bdrv_aio_readv(s->bs->file, 0, &write_header_cb->qiov, nsectors,
                   qed_write_header_read_cb, write_header_cb);
 }

@@ -356,37 +367,10 @@ static void qed_cancel_need_check_timer(BDRVQEDState *s)
    timer_del(s->need_check_timer);
 }

-static void bdrv_qed_detach_aio_context(BlockDriverState *bs)
+static void bdrv_qed_rebind(BlockDriverState *bs)
 {
    BDRVQEDState *s = bs->opaque;
-
-    qed_cancel_need_check_timer(s);
-    timer_free(s->need_check_timer);
-}
-
-static void bdrv_qed_attach_aio_context(BlockDriverState *bs,
-                                        AioContext *new_context)
-{
-    BDRVQEDState *s = bs->opaque;
-
-    s->need_check_timer = aio_timer_new(new_context,
-                                        QEMU_CLOCK_VIRTUAL, SCALE_NS,
-                                        qed_need_check_timer_cb, s);
-    if (s->header.features & QED_F_NEED_CHECK) {
-        qed_start_need_check_timer(s);
-    }
-}
-
-static void bdrv_qed_drain(BlockDriverState *bs)
-{
-    BDRVQEDState *s = bs->opaque;
-
-    /* Cancel timer and start doing I/O that were meant to happen as if it
-     * fired, that way we get bdrv_drain() taking care of the ongoing requests
-     * correctly. */
-    qed_cancel_need_check_timer(s);
-    qed_plug_allocating_write_reqs(s);
-    bdrv_aio_flush(s->bs, qed_clear_need_check, s);
+    s->bs = bs;
 }

 static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
@@ -400,23 +384,22 @@ static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
    s->bs = bs;
    QSIMPLEQ_INIT(&s->allocating_write_reqs);

-    ret = bdrv_pread(bs->file->bs, 0, &le_header, sizeof(le_header));
+    ret = bdrv_pread(bs->file, 0, &le_header, sizeof(le_header));
    if (ret < 0) {
        return ret;
    }
    qed_header_le_to_cpu(&le_header, &s->header);

    if (s->header.magic != QED_MAGIC) {
-        error_setg(errp, "Image not in QED format");
-        return -EINVAL;
+        return -EMEDIUMTYPE;
    }
    if (s->header.features & ~QED_FEATURE_MASK) {
        /* image uses unsupported feature bits */
        char buf[64];
        snprintf(buf, sizeof(buf), "%" PRIx64,
            s->header.features & ~QED_FEATURE_MASK);
-        error_setg(errp, QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
-                   bdrv_get_device_or_node_name(bs), "QED", buf);
+        qerror_report(QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
+            bs->device_name, "QED", buf);
        return -ENOTSUP;
    }
    if (!qed_is_cluster_size_valid(s->header.cluster_size)) {
@@ -424,7 +407,7 @@ static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
    }

    /* Round down file size to the last cluster */
-    file_size = bdrv_getlength(bs->file->bs);
+    file_size = bdrv_getlength(bs->file);
    if (file_size < 0) {
        return file_size;
    }
@@ -444,14 +427,9 @@ static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,

    s->table_nelems = (s->header.cluster_size * s->header.table_size) /
                      sizeof(uint64_t);
-    s->l2_shift = ctz32(s->header.cluster_size);
+    s->l2_shift = ffs(s->header.cluster_size) - 1;
    s->l2_mask = s->table_nelems - 1;
-    s->l1_shift = s->l2_shift + ctz32(s->table_nelems);
-
-    /* Header size calculation must not overflow uint32_t */
-    if (s->header.header_size > UINT32_MAX / s->header.cluster_size) {
-        return -EINVAL;
-    }
+    s->l1_shift = s->l2_shift + ffs(s->table_nelems) - 1;

    if ((s->header.features & QED_F_BACKING_FILE)) {
        if ((uint64_t)s->header.backing_filename_offset +
@@ -460,7 +438,7 @@ static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
            return -EINVAL;
        }

-        ret = qed_read_string(bs->file->bs, s->header.backing_filename_offset,
+        ret = qed_read_string(bs->file, s->header.backing_filename_offset,
                              s->header.backing_filename_size, bs->backing_file,
                              sizeof(bs->backing_file));
        if (ret < 0) {
@@ -479,7 +457,7 @@ static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
     * feature is no longer valid.
     */
    if ((s->header.autoclear_features & ~QED_AUTOCLEAR_FEATURE_MASK) != 0 &&
-        !bdrv_is_read_only(bs->file->bs) && !(flags & BDRV_O_INACTIVE)) {
+        !bdrv_is_read_only(bs->file) && !(flags & BDRV_O_INCOMING)) {
        s->header.autoclear_features &= QED_AUTOCLEAR_FEATURE_MASK;

        ret = qed_write_header_sync(s);
@@ -488,7 +466,7 @@ static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
        }

        /* From here on only known autoclear feature bits are valid */
-        bdrv_flush(bs->file->bs);
+        bdrv_flush(bs->file);
    }

    s->l1_table = qed_alloc_table(s);
@@ -506,8 +484,8 @@ static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
         * potentially inconsistent images to be opened read-only.  This can
         * aid data recovery from an otherwise inconsistent image.
         */
-        if (!bdrv_is_read_only(bs->file->bs) &&
-            !(flags & BDRV_O_INACTIVE)) {
+        if (!bdrv_is_read_only(bs->file) &&
+            !(flags & BDRV_O_INCOMING)) {
            BdrvCheckResult result = {0};

            ret = qed_check(s, &result, true);
@@ -517,7 +495,8 @@ static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
        }
    }

-    bdrv_qed_attach_aio_context(bs, bdrv_get_aio_context(bs));
+    s->need_check_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
+                                            qed_need_check_timer_cb, s);

 out:
    if (ret) {
@@ -527,13 +506,6 @@ out:
    return ret;
 }

-static void bdrv_qed_refresh_limits(BlockDriverState *bs, Error **errp)
-{
-    BDRVQEDState *s = bs->opaque;
-
-    bs->bl.write_zeroes_alignment = s->header.cluster_size >> BDRV_SECTOR_BITS;
-}
-
 /* We have nothing to do for QED reopen, stubs just return
 * success */
 static int bdrv_qed_reopen_prepare(BDRVReopenState *state,
@@ -546,10 +518,11 @@ static void bdrv_qed_close(BlockDriverState *bs)
 {
    BDRVQEDState *s = bs->opaque;

-    bdrv_qed_detach_aio_context(bs);
+    qed_cancel_need_check_timer(s);
+    timer_free(s->need_check_timer);

    /* Ensure writes reach stable storage */
-    bdrv_flush(bs->file->bs);
+    bdrv_flush(bs->file);

    /* Clean shutdown, no check required on next open */
    if (s->header.features & QED_F_NEED_CHECK) {
@@ -563,8 +536,7 @@ static void bdrv_qed_close(BlockDriverState *bs)

 static int qed_create(const char *filename, uint32_t cluster_size,
                      uint64_t image_size, uint32_t table_size,
-                      const char *backing_file, const char *backing_fmt,
-                      QemuOpts *opts, Error **errp)
+                      const char *backing_file, const char *backing_fmt)
 {
    QEDHeader header = {
        .magic = QED_MAGIC,
@@ -581,26 +553,25 @@ static int qed_create(const char *filename, uint32_t cluster_size,
    size_t l1_size = header.cluster_size * header.table_size;
    Error *local_err = NULL;
    int ret = 0;
-    BlockBackend *blk;
+    BlockDriverState *bs = NULL;

-    ret = bdrv_create_file(filename, opts, &local_err);
+    ret = bdrv_create_file(filename, NULL, &local_err);
    if (ret < 0) {
-        error_propagate(errp, local_err);
+        qerror_report_err(local_err);
+        error_free(local_err);
        return ret;
    }

-    blk = blk_new_open("image", filename, NULL, NULL,
-                       BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_PROTOCOL,
-                       &local_err);
-    if (blk == NULL) {
-        error_propagate(errp, local_err);
-        return -EIO;
+    ret = bdrv_file_open(&bs, filename, NULL, BDRV_O_RDWR | BDRV_O_CACHE_WB,
+                         &local_err);
+    if (ret < 0) {
+        qerror_report_err(local_err);
+        error_free(local_err);
+        return ret;
    }

-    blk_set_allow_write_beyond_eof(blk, true);
-
    /* File must start empty and grow, check truncate is supported */
-    ret = blk_truncate(blk, 0);
+    ret = bdrv_truncate(bs, 0);
    if (ret < 0) {
        goto out;
    }
@@ -616,18 +587,18 @@ static int qed_create(const char *filename, uint32_t cluster_size,
    }

    qed_header_cpu_to_le(&header, &le_header);
-    ret = blk_pwrite(blk, 0, &le_header, sizeof(le_header));
+    ret = bdrv_pwrite(bs, 0, &le_header, sizeof(le_header));
    if (ret < 0) {
        goto out;
    }
-    ret = blk_pwrite(blk, sizeof(le_header), backing_file,
-                     header.backing_filename_size);
+    ret = bdrv_pwrite(bs, sizeof(le_header), backing_file,
+                      header.backing_filename_size);
    if (ret < 0) {
        goto out;
    }

    l1_table = g_malloc0(l1_size);
-    ret = blk_pwrite(blk, header.l1_table_offset, l1_table, l1_size);
+    ret = bdrv_pwrite(bs, header.l1_table_offset, l1_table, l1_size);
    if (ret < 0) {
        goto out;
    }
@@ -635,58 +606,57 @@ static int qed_create(const char *filename, uint32_t cluster_size,
    ret = 0; /* success */
 out:
    g_free(l1_table);
-    blk_unref(blk);
+    bdrv_unref(bs);
    return ret;
 }

-static int bdrv_qed_create(const char *filename, QemuOpts *opts, Error **errp)
+static int bdrv_qed_create(const char *filename, QEMUOptionParameter *options,
+                           Error **errp)
 {
    uint64_t image_size = 0;
    uint32_t cluster_size = QED_DEFAULT_CLUSTER_SIZE;
    uint32_t table_size = QED_DEFAULT_TABLE_SIZE;
-    char *backing_file = NULL;
-    char *backing_fmt = NULL;
-    int ret;
+    const char *backing_file = NULL;
+    const char *backing_fmt = NULL;

-    image_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
-                          BDRV_SECTOR_SIZE);
-    backing_file = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FILE);
-    backing_fmt = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FMT);
-    cluster_size = qemu_opt_get_size_del(opts,
-                                         BLOCK_OPT_CLUSTER_SIZE,
-                                         QED_DEFAULT_CLUSTER_SIZE);
-    table_size = qemu_opt_get_size_del(opts, BLOCK_OPT_TABLE_SIZE,
-                                       QED_DEFAULT_TABLE_SIZE);
+    while (options && options->name) {
+        if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
+            image_size = options->value.n;
+        } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) {
+            backing_file = options->value.s;
+        } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FMT)) {
+            backing_fmt = options->value.s;
+        } else if (!strcmp(options->name, BLOCK_OPT_CLUSTER_SIZE)) {
+            if (options->value.n) {
+                cluster_size = options->value.n;
+            }
+        } else if (!strcmp(options->name, BLOCK_OPT_TABLE_SIZE)) {
+            if (options->value.n) {
+                table_size = options->value.n;
+            }
+        }
+        options++;
+    }

    if (!qed_is_cluster_size_valid(cluster_size)) {
-        error_setg(errp, "QED cluster size must be within range [%u, %u] "
-                         "and power of 2",
-                   QED_MIN_CLUSTER_SIZE, QED_MAX_CLUSTER_SIZE);
-        ret = -EINVAL;
-        goto finish;
+        fprintf(stderr, "QED cluster size must be within range [%u, %u] and power of 2\n",
+                QED_MIN_CLUSTER_SIZE, QED_MAX_CLUSTER_SIZE);
+        return -EINVAL;
    }
    if (!qed_is_table_size_valid(table_size)) {
-        error_setg(errp, "QED table size must be within range [%u, %u] "
-                         "and power of 2",
-                   QED_MIN_TABLE_SIZE, QED_MAX_TABLE_SIZE);
-        ret = -EINVAL;
-        goto finish;
+        fprintf(stderr, "QED table size must be within range [%u, %u] and power of 2\n",
+                QED_MIN_TABLE_SIZE, QED_MAX_TABLE_SIZE);
+        return -EINVAL;
    }
    if (!qed_is_image_size_valid(image_size, cluster_size, table_size)) {
-        error_setg(errp, "QED image size must be a non-zero multiple of "
-                         "cluster size and less than %" PRIu64 " bytes",
-                   qed_max_image_size(cluster_size, table_size));
-        ret = -EINVAL;
-        goto finish;
+        fprintf(stderr, "QED image size must be a non-zero multiple of "
+                        "cluster size and less than %" PRIu64 " bytes\n",
+                qed_max_image_size(cluster_size, table_size));
+        return -EINVAL;
    }

-    ret = qed_create(filename, cluster_size, image_size, table_size,
-                     backing_file, backing_fmt, opts, errp);
-
-finish:
-    g_free(backing_file);
-    g_free(backing_fmt);
-    return ret;
+    return qed_create(filename, cluster_size, image_size, table_size,
+                      backing_file, backing_fmt);
 }

 typedef struct {
@@ -695,7 +665,6 @@ typedef struct {
    uint64_t pos;
    int64_t status;
    int *pnum;
-    BlockDriverState **file;
 } QEDIsAllocatedCB;

 static void qed_is_allocated_cb(void *opaque, int ret, uint64_t offset, size_t len)
@@ -707,7 +676,6 @@ static void qed_is_allocated_cb(void *opaque, int ret, uint64_t offset, size_t l
    case QED_CLUSTER_FOUND:
        offset |= qed_offset_into_cluster(s, cb->pos);
        cb->status = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | offset;
-        *cb->file = cb->bs->file->bs;
        break;
    case QED_CLUSTER_ZERO:
        cb->status = BDRV_BLOCK_ZERO;
@@ -729,8 +697,7 @@ static void qed_is_allocated_cb(void *opaque, int ret, uint64_t offset, size_t l

 static int64_t coroutine_fn bdrv_qed_co_get_block_status(BlockDriverState *bs,
                                                 int64_t sector_num,
-                                                 int nb_sectors, int *pnum,
-                                                 BlockDriverState **file)
+                                                 int nb_sectors, int *pnum)
 {
    BDRVQEDState *s = bs->opaque;
    size_t len = (size_t)nb_sectors * BDRV_SECTOR_SIZE;
@@ -739,7 +706,6 @@ static int64_t coroutine_fn bdrv_qed_co_get_block_status(BlockDriverState *bs,
        .pos = (uint64_t)sector_num * BDRV_SECTOR_SIZE,
        .status = BDRV_BLOCK_OFFSET_MASK,
        .pnum = pnum,
-        .file = file,
    };
    QEDRequest request = { .l2_table = NULL };

@@ -756,6 +722,11 @@ static int64_t coroutine_fn bdrv_qed_co_get_block_status(BlockDriverState *bs,
    return cb.status;
 }

+static int bdrv_qed_make_empty(BlockDriverState *bs)
+{
+    return -ENOTSUP;
+}
+
 static BDRVQEDState *acb_to_s(QEDAIOCB *acb)
 {
    return acb->common.bs->opaque;
@@ -764,20 +735,18 @@ static BDRVQEDState *acb_to_s(QEDAIOCB *acb)
 /**
 * Read from the backing file or zero-fill if no backing file
 *
- * @s:              QED state
- * @pos:            Byte position in device
- * @qiov:           Destination I/O vector
- * @backing_qiov:   Possibly shortened copy of qiov, to be allocated here
- * @cb:             Completion function
- * @opaque:         User data for completion function
+ * @s:          QED state
+ * @pos:        Byte position in device
+ * @qiov:       Destination I/O vector
+ * @cb:         Completion function
+ * @opaque:     User data for completion function
 *
 * This function reads qiov->size bytes starting at pos from the backing file.
 * If there is no backing file then zeroes are read.
 */
 static void qed_read_backing_file(BDRVQEDState *s, uint64_t pos,
                                  QEMUIOVector *qiov,
-                                  QEMUIOVector **backing_qiov,
-                                  BlockCompletionFunc *cb, void *opaque)
+                                  BlockDriverCompletionFunc *cb, void *opaque)
 {
    uint64_t backing_length = 0;
    size_t size;
@@ -785,8 +754,8 @@ static void qed_read_backing_file(BDRVQEDState *s, uint64_t pos,
    /* If there is a backing file, get its length.  Treat the absence of a
     * backing file like a zero length backing file.
     */
-    if (s->bs->backing) {
-        int64_t l = bdrv_getlength(s->bs->backing->bs);
+    if (s->bs->backing_hd) {
+        int64_t l = bdrv_getlength(s->bs->backing_hd);
        if (l < 0) {
            cb(opaque, l);
            return;
@@ -809,21 +778,15 @@ static void qed_read_backing_file(BDRVQEDState *s, uint64_t pos,
    /* If the read straddles the end of the backing file, shorten it */
    size = MIN((uint64_t)backing_length - pos, qiov->size);

-    assert(*backing_qiov == NULL);
-    *backing_qiov = g_new(QEMUIOVector, 1);
-    qemu_iovec_init(*backing_qiov, qiov->niov);
-    qemu_iovec_concat(*backing_qiov, qiov, 0, size);
-
    BLKDBG_EVENT(s->bs->file, BLKDBG_READ_BACKING_AIO);
-    bdrv_aio_readv(s->bs->backing->bs, pos / BDRV_SECTOR_SIZE,
-                   *backing_qiov, size / BDRV_SECTOR_SIZE, cb, opaque);
+    bdrv_aio_readv(s->bs->backing_hd, pos / BDRV_SECTOR_SIZE,
+                   qiov, size / BDRV_SECTOR_SIZE, cb, opaque);
 }

 typedef struct {
    GenericCB gencb;
    BDRVQEDState *s;
    QEMUIOVector qiov;
-    QEMUIOVector *backing_qiov;
    struct iovec iov;
    uint64_t offset;
 } CopyFromBackingFileCB;
@@ -840,19 +803,13 @@ static void qed_copy_from_backing_file_write(void *opaque, int ret)
    CopyFromBackingFileCB *copy_cb = opaque;
    BDRVQEDState *s = copy_cb->s;

-    if (copy_cb->backing_qiov) {
-        qemu_iovec_destroy(copy_cb->backing_qiov);
-        g_free(copy_cb->backing_qiov);
-        copy_cb->backing_qiov = NULL;
-    }
-
    if (ret) {
        qed_copy_from_backing_file_cb(copy_cb, ret);
        return;
    }

    BLKDBG_EVENT(s->bs->file, BLKDBG_COW_WRITE);
-    bdrv_aio_writev(s->bs->file->bs, copy_cb->offset / BDRV_SECTOR_SIZE,
+    bdrv_aio_writev(s->bs->file, copy_cb->offset / BDRV_SECTOR_SIZE,
                    &copy_cb->qiov, copy_cb->qiov.size / BDRV_SECTOR_SIZE,
                    qed_copy_from_backing_file_cb, copy_cb);
 }
@@ -869,7 +826,7 @@ static void qed_copy_from_backing_file_write(void *opaque, int ret)
 */
 static void qed_copy_from_backing_file(BDRVQEDState *s, uint64_t pos,
                                       uint64_t len, uint64_t offset,
-                                       BlockCompletionFunc *cb,
+                                       BlockDriverCompletionFunc *cb,
                                       void *opaque)
 {
    CopyFromBackingFileCB *copy_cb;
@@ -883,12 +840,11 @@ static void qed_copy_from_backing_file(BDRVQEDState *s, uint64_t pos,
    copy_cb = gencb_alloc(sizeof(*copy_cb), cb, opaque);
    copy_cb->s = s;
    copy_cb->offset = offset;
-    copy_cb->backing_qiov = NULL;
    copy_cb->iov.iov_base = qemu_blockalign(s->bs, len);
    copy_cb->iov.iov_len = len;
    qemu_iovec_init_external(&copy_cb->qiov, &copy_cb->iov, 1);

-    qed_read_backing_file(s, pos, &copy_cb->qiov, &copy_cb->backing_qiov,
+    qed_read_backing_file(s, pos, &copy_cb->qiov,
                          qed_copy_from_backing_file_write, copy_cb);
 }

@@ -920,15 +876,21 @@ static void qed_update_l2_table(BDRVQEDState *s, QEDTable *table, int index,
 static void qed_aio_complete_bh(void *opaque)
 {
    QEDAIOCB *acb = opaque;
-    BlockCompletionFunc *cb = acb->common.cb;
+    BlockDriverCompletionFunc *cb = acb->common.cb;
    void *user_opaque = acb->common.opaque;
    int ret = acb->bh_ret;
+    bool *finished = acb->finished;

    qemu_bh_delete(acb->bh);
-    qemu_aio_unref(acb);
+    qemu_aio_release(acb);

    /* Invoke callback */
    cb(user_opaque, ret);
+
+    /* Signal cancel completion */
+    if (finished) {
+        *finished = true;
+    }
 }

 static void qed_aio_complete(QEDAIOCB *acb, int ret)
@@ -949,8 +911,7 @@ static void qed_aio_complete(QEDAIOCB *acb, int ret)

    /* Arrange for a bh to invoke the completion function */
    acb->bh_ret = ret;
-    acb->bh = aio_bh_new(bdrv_get_aio_context(acb->common.bs),
-                         qed_aio_complete_bh, acb);
+    acb->bh = qemu_bh_new(qed_aio_complete_bh, acb);
    qemu_bh_schedule(acb->bh);

    /* Start next allocating write request waiting behind this one.  Note that
@@ -1068,7 +1029,7 @@ static void qed_aio_write_flush_before_l2_update(void *opaque, int ret)
    QEDAIOCB *acb = opaque;
    BDRVQEDState *s = acb_to_s(acb);

-    if (!bdrv_aio_flush(s->bs->file->bs, qed_aio_write_l2_update_cb, opaque)) {
+    if (!bdrv_aio_flush(s->bs->file, qed_aio_write_l2_update_cb, opaque)) {
        qed_aio_complete(acb, -EIO);
    }
 }
@@ -1082,7 +1043,7 @@ static void qed_aio_write_main(void *opaque, int ret)
    BDRVQEDState *s = acb_to_s(acb);
    uint64_t offset = acb->cur_cluster +
                      qed_offset_into_cluster(s, acb->cur_pos);
-    BlockCompletionFunc *next_fn;
+    BlockDriverCompletionFunc *next_fn;

    trace_qed_aio_write_main(s, acb, ret, offset, acb->cur_qiov.size);

@@ -1094,7 +1055,7 @@ static void qed_aio_write_main(void *opaque, int ret)
    if (acb->find_cluster_ret == QED_CLUSTER_FOUND) {
        next_fn = qed_aio_next_io;
    } else {
-        if (s->bs->backing) {
+        if (s->bs->backing_hd) {
            next_fn = qed_aio_write_flush_before_l2_update;
        } else {
            next_fn = qed_aio_write_l2_update_cb;
@@ -1102,7 +1063,7 @@ static void qed_aio_write_main(void *opaque, int ret)
    }

    BLKDBG_EVENT(s->bs->file, BLKDBG_WRITE_AIO);
-    bdrv_aio_writev(s->bs->file->bs, offset / BDRV_SECTOR_SIZE,
+    bdrv_aio_writev(s->bs->file, offset / BDRV_SECTOR_SIZE,
                    &acb->cur_qiov, acb->cur_qiov.size / BDRV_SECTOR_SIZE,
                    next_fn, acb);
 }
@@ -1152,7 +1113,7 @@ static void qed_aio_write_prefill(void *opaque, int ret)
 static bool qed_should_set_need_check(BDRVQEDState *s)
 {
    /* The flush before L2 update path ensures consistency */
-    if (s->bs->backing) {
+    if (s->bs->backing_hd) {
        return false;
    }

@@ -1182,7 +1143,7 @@ static void qed_aio_write_zero_cluster(void *opaque, int ret)
 static void qed_aio_write_alloc(QEDAIOCB *acb, size_t len)
 {
    BDRVQEDState *s = acb_to_s(acb);
-    BlockCompletionFunc *cb;
+    BlockDriverCompletionFunc *cb;

    /* Cancel timer when the first allocating request comes in */
    if (QSIMPLEQ_EMPTY(&s->allocating_write_reqs)) {
@@ -1239,11 +1200,7 @@ static void qed_aio_write_inplace(QEDAIOCB *acb, uint64_t offset, size_t len)
        struct iovec *iov = acb->qiov->iov;

        if (!iov->iov_base) {
-            iov->iov_base = qemu_try_blockalign(acb->common.bs, iov->iov_len);
-            if (iov->iov_base == NULL) {
-                qed_aio_complete(acb, -ENOMEM);
-                return;
-            }
+            iov->iov_base = qemu_blockalign(acb->common.bs, iov->iov_len);
            memset(iov->iov_base, 0, iov->iov_len);
        }
    }
@@ -1329,12 +1286,12 @@ static void qed_aio_read_data(void *opaque, int ret,
        return;
    } else if (ret != QED_CLUSTER_FOUND) {
        qed_read_backing_file(s, acb->cur_pos, &acb->cur_qiov,
-                              &acb->backing_qiov, qed_aio_next_io, acb);
+                              qed_aio_next_io, acb);
        return;
    }

    BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
-    bdrv_aio_readv(bs->file->bs, offset / BDRV_SECTOR_SIZE,
+    bdrv_aio_readv(bs->file, offset / BDRV_SECTOR_SIZE,
                   &acb->cur_qiov, acb->cur_qiov.size / BDRV_SECTOR_SIZE,
                   qed_aio_next_io, acb);
    return;
@@ -1355,12 +1312,6 @@ static void qed_aio_next_io(void *opaque, int ret)

    trace_qed_aio_next_io(s, acb, ret, acb->cur_pos + acb->cur_qiov.size);

-    if (acb->backing_qiov) {
-        qemu_iovec_destroy(acb->backing_qiov);
-        g_free(acb->backing_qiov);
-        acb->backing_qiov = NULL;
-    }
-
    /* Handle I/O error */
    if (ret) {
        qed_aio_complete(acb, ret);
@@ -1383,11 +1334,11 @@ static void qed_aio_next_io(void *opaque, int ret)
                      io_fn, acb);
 }

-static BlockAIOCB *qed_aio_setup(BlockDriverState *bs,
-                                 int64_t sector_num,
-                                 QEMUIOVector *qiov, int nb_sectors,
-                                 BlockCompletionFunc *cb,
-                                 void *opaque, int flags)
+static BlockDriverAIOCB *qed_aio_setup(BlockDriverState *bs,
+                                       int64_t sector_num,
+                                       QEMUIOVector *qiov, int nb_sectors,
+                                       BlockDriverCompletionFunc *cb,
+                                       void *opaque, int flags)
 {
    QEDAIOCB *acb = qemu_aio_get(&qed_aiocb_info, bs, cb, opaque);

@@ -1395,11 +1346,11 @@ static BlockAIOCB *qed_aio_setup(BlockDriverState *bs,
                        opaque, flags);

    acb->flags = flags;
+    acb->finished = NULL;
    acb->qiov = qiov;
    acb->qiov_offset = 0;
    acb->cur_pos = (uint64_t)sector_num * BDRV_SECTOR_SIZE;
    acb->end_pos = acb->cur_pos + nb_sectors * BDRV_SECTOR_SIZE;
-    acb->backing_qiov = NULL;
    acb->request.l2_table = NULL;
    qemu_iovec_init(&acb->cur_qiov, qiov->niov);

@@ -1408,20 +1359,20 @@ static BlockAIOCB *qed_aio_setup(BlockDriverState *bs,
    return &acb->common;
 }

-static BlockAIOCB *bdrv_qed_aio_readv(BlockDriverState *bs,
-                                      int64_t sector_num,
-                                      QEMUIOVector *qiov, int nb_sectors,
-                                      BlockCompletionFunc *cb,
-                                      void *opaque)
+static BlockDriverAIOCB *bdrv_qed_aio_readv(BlockDriverState *bs,
+                                            int64_t sector_num,
+                                            QEMUIOVector *qiov, int nb_sectors,
+                                            BlockDriverCompletionFunc *cb,
+                                            void *opaque)
 {
    return qed_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
 }

-static BlockAIOCB *bdrv_qed_aio_writev(BlockDriverState *bs,
-                                       int64_t sector_num,
-                                       QEMUIOVector *qiov, int nb_sectors,
-                                       BlockCompletionFunc *cb,
-                                       void *opaque)
+static BlockDriverAIOCB *bdrv_qed_aio_writev(BlockDriverState *bs,
+                                             int64_t sector_num,
+                                             QEMUIOVector *qiov, int nb_sectors,
+                                             BlockDriverCompletionFunc *cb,
+                                             void *opaque)
 {
    return qed_aio_setup(bs, sector_num, qiov, nb_sectors, cb,
                         opaque, QED_AIOCB_WRITE);
@@ -1446,17 +1397,16 @@ static void coroutine_fn qed_co_write_zeroes_cb(void *opaque, int ret)

 static int coroutine_fn bdrv_qed_co_write_zeroes(BlockDriverState *bs,
                                                 int64_t sector_num,
-                                                 int nb_sectors,
-                                                 BdrvRequestFlags flags)
+                                                 int nb_sectors)
 {
-    BlockAIOCB *blockacb;
+    BlockDriverAIOCB *blockacb;
    BDRVQEDState *s = bs->opaque;
    QEDWriteZeroesCB cb = { .done = false };
    QEMUIOVector qiov;
    struct iovec iov;

    /* Refuse if there are untouched backing file sectors */
-    if (bs->backing) {
+    if (bs->backing_hd) {
        if (qed_offset_into_cluster(s, sector_num * BDRV_SECTOR_SIZE) != 0) {
            return -ENOTSUP;
        }
@@ -1524,8 +1474,6 @@ static int bdrv_qed_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
    memset(bdi, 0, sizeof(*bdi));
    bdi->cluster_size = s->header.cluster_size;
    bdi->is_dirty = s->header.features & QED_F_NEED_CHECK;
-    bdi->unallocated_blocks_are_zero = true;
-    bdi->can_write_zeroes_with_unmap = true;
    return 0;
 }

@@ -1593,7 +1541,7 @@ static int bdrv_qed_change_backing_file(BlockDriverState *bs,
    }

    /* Write new header */
-    ret = bdrv_pwrite_sync(bs->file->bs, 0, buffer, buffer_len);
+    ret = bdrv_pwrite_sync(bs->file, 0, buffer, buffer_len);
    g_free(buffer);
    if (ret == 0) {
        memcpy(&s->header, &new_header, sizeof(new_header));
@@ -1601,30 +1549,13 @@ static int bdrv_qed_change_backing_file(BlockDriverState *bs,
    return ret;
 }

-static void bdrv_qed_invalidate_cache(BlockDriverState *bs, Error **errp)
+static void bdrv_qed_invalidate_cache(BlockDriverState *bs)
 {
    BDRVQEDState *s = bs->opaque;
-    Error *local_err = NULL;
-    int ret;

    bdrv_qed_close(bs);
-
-    bdrv_invalidate_cache(bs->file->bs, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        return;
-    }
-
    memset(s, 0, sizeof(BDRVQEDState));
-    ret = bdrv_qed_open(bs, NULL, bs->open_flags, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        error_prepend(errp, "Could not reopen qed layer: ");
-        return;
-    } else if (ret < 0) {
-        error_setg_errno(errp, -ret, "Could not reopen qed layer");
-        return;
-    }
+    bdrv_qed_open(bs, NULL, bs->open_flags, NULL);
 }

 static int bdrv_qed_check(BlockDriverState *bs, BdrvCheckResult *result,
@@ -1635,66 +1566,55 @@ static int bdrv_qed_check(BlockDriverState *bs, BdrvCheckResult *result,
    return qed_check(s, result, !!fix);
 }

-static QemuOptsList qed_create_opts = {
-    .name = "qed-create-opts",
-    .head = QTAILQ_HEAD_INITIALIZER(qed_create_opts.head),
-    .desc = {
-        {
-            .name = BLOCK_OPT_SIZE,
-            .type = QEMU_OPT_SIZE,
-            .help = "Virtual disk size"
-        },
-        {
-            .name = BLOCK_OPT_BACKING_FILE,
-            .type = QEMU_OPT_STRING,
-            .help = "File name of a base image"
-        },
-        {
-            .name = BLOCK_OPT_BACKING_FMT,
-            .type = QEMU_OPT_STRING,
-            .help = "Image format of the base image"
-        },
-        {
-            .name = BLOCK_OPT_CLUSTER_SIZE,
-            .type = QEMU_OPT_SIZE,
-            .help = "Cluster size (in bytes)",
-            .def_value_str = stringify(QED_DEFAULT_CLUSTER_SIZE)
-        },
-        {
-            .name = BLOCK_OPT_TABLE_SIZE,
-            .type = QEMU_OPT_SIZE,
-            .help = "L1/L2 table size (in clusters)"
-        },
-        { /* end of list */ }
-    }
+static QEMUOptionParameter qed_create_options[] = {
+    {
+        .name = BLOCK_OPT_SIZE,
+        .type = OPT_SIZE,
+        .help = "Virtual disk size (in bytes)"
+    }, {
+        .name = BLOCK_OPT_BACKING_FILE,
+        .type = OPT_STRING,
+        .help = "File name of a base image"
+    }, {
+        .name = BLOCK_OPT_BACKING_FMT,
+        .type = OPT_STRING,
+        .help = "Image format of the base image"
+    }, {
+        .name = BLOCK_OPT_CLUSTER_SIZE,
+        .type = OPT_SIZE,
+        .help = "Cluster size (in bytes)",
+        .value = { .n = QED_DEFAULT_CLUSTER_SIZE },
+    }, {
+        .name = BLOCK_OPT_TABLE_SIZE,
+        .type = OPT_SIZE,
+        .help = "L1/L2 table size (in clusters)"
+    },
+    { /* end of list */ }
 };

 static BlockDriver bdrv_qed = {
    .format_name              = "qed",
    .instance_size            = sizeof(BDRVQEDState),
-    .create_opts              = &qed_create_opts,
-    .supports_backing         = true,
+    .create_options           = qed_create_options,

    .bdrv_probe               = bdrv_qed_probe,
+    .bdrv_rebind              = bdrv_qed_rebind,
    .bdrv_open                = bdrv_qed_open,
    .bdrv_close               = bdrv_qed_close,
    .bdrv_reopen_prepare      = bdrv_qed_reopen_prepare,
    .bdrv_create              = bdrv_qed_create,
    .bdrv_has_zero_init       = bdrv_has_zero_init_1,
    .bdrv_co_get_block_status = bdrv_qed_co_get_block_status,
+    .bdrv_make_empty          = bdrv_qed_make_empty,
    .bdrv_aio_readv           = bdrv_qed_aio_readv,
    .bdrv_aio_writev          = bdrv_qed_aio_writev,
    .bdrv_co_write_zeroes     = bdrv_qed_co_write_zeroes,
    .bdrv_truncate            = bdrv_qed_truncate,
    .bdrv_getlength           = bdrv_qed_getlength,
    .bdrv_get_info            = bdrv_qed_get_info,
-    .bdrv_refresh_limits      = bdrv_qed_refresh_limits,
    .bdrv_change_backing_file = bdrv_qed_change_backing_file,
    .bdrv_invalidate_cache    = bdrv_qed_invalidate_cache,
    .bdrv_check               = bdrv_qed_check,
-    .bdrv_detach_aio_context  = bdrv_qed_detach_aio_context,
-    .bdrv_attach_aio_context  = bdrv_qed_attach_aio_context,
-    .bdrv_drain               = bdrv_qed_drain,
 };

 static void bdrv_qed_init(void)
--- a/block/qed.h
+++ b/block/qed.h
@@ -43,7 +43,7 @@
 *
 * All fields are little-endian on disk.
 */
-#define  QED_DEFAULT_CLUSTER_SIZE  65536
+
 enum {
    QED_MAGIC = 'Q' | 'E' << 8 | 'D' << 16 | '\0' << 24,

@@ -69,6 +69,7 @@ enum {
     */
    QED_MIN_CLUSTER_SIZE = 4 * 1024, /* in bytes */
    QED_MAX_CLUSTER_SIZE = 64 * 1024 * 1024,
+    QED_DEFAULT_CLUSTER_SIZE = 64 * 1024,

    /* Allocated clusters are tracked using a 2-level pagetable.  Table size is
     * a multiple of clusters so large maximum image sizes can be supported
@@ -128,11 +129,12 @@ enum {
 };

 typedef struct QEDAIOCB {
-    BlockAIOCB common;
+    BlockDriverAIOCB common;
    QEMUBH *bh;
    int bh_ret;                     /* final return status for completion bh */
    QSIMPLEQ_ENTRY(QEDAIOCB) next;  /* next request */
    int flags;                      /* QED_AIOCB_* bits ORed together */
+    bool *finished;                 /* signal for cancel completion */
    uint64_t end_pos;               /* request end on block device, in bytes */

    /* User scatter-gather list */
@@ -141,7 +143,6 @@ typedef struct QEDAIOCB {

    /* Current cluster scatter-gather list */
    QEMUIOVector cur_qiov;
-    QEMUIOVector *backing_qiov;
    uint64_t cur_pos;               /* position on block device, in bytes */
    uint64_t cur_cluster;           /* cluster offset in image file */
    unsigned int cur_nclusters;     /* number of clusters being accessed */
@@ -202,11 +203,11 @@ typedef void QEDFindClusterFunc(void *opaque, int ret, uint64_t offset, size_t l
 * Generic callback for chaining async callbacks
 */
 typedef struct {
-    BlockCompletionFunc *cb;
+    BlockDriverCompletionFunc *cb;
    void *opaque;
 } GenericCB;

-void *gencb_alloc(size_t len, BlockCompletionFunc *cb, void *opaque);
+void *gencb_alloc(size_t len, BlockDriverCompletionFunc *cb, void *opaque);
 void gencb_complete(void *opaque, int ret);

 /**
@@ -229,16 +230,16 @@ void qed_commit_l2_cache_entry(L2TableCache *l2_cache, CachedL2Table *l2_table);
 */
 int qed_read_l1_table_sync(BDRVQEDState *s);
 void qed_write_l1_table(BDRVQEDState *s, unsigned int index, unsigned int n,
-                        BlockCompletionFunc *cb, void *opaque);
+                        BlockDriverCompletionFunc *cb, void *opaque);
 int qed_write_l1_table_sync(BDRVQEDState *s, unsigned int index,
                            unsigned int n);
 int qed_read_l2_table_sync(BDRVQEDState *s, QEDRequest *request,
                           uint64_t offset);
 void qed_read_l2_table(BDRVQEDState *s, QEDRequest *request, uint64_t offset,
-                       BlockCompletionFunc *cb, void *opaque);
+                       BlockDriverCompletionFunc *cb, void *opaque);
 void qed_write_l2_table(BDRVQEDState *s, QEDRequest *request,
                        unsigned int index, unsigned int n, bool flush,
-                        BlockCompletionFunc *cb, void *opaque);
+                        BlockDriverCompletionFunc *cb, void *opaque);
 int qed_write_l2_table_sync(BDRVQEDState *s, QEDRequest *request,
                            unsigned int index, unsigned int n, bool flush);

--- a/block/quorum.c
+++ b/block/quorum.c
--- a/block/raw-aio.h
+++ b/block/raw-aio.h
@@ -21,10 +21,9 @@
 #define QEMU_AIO_IOCTL        0x0004
 #define QEMU_AIO_FLUSH        0x0008
 #define QEMU_AIO_DISCARD      0x0010
-#define QEMU_AIO_WRITE_ZEROES 0x0020
 #define QEMU_AIO_TYPE_MASK \
        (QEMU_AIO_READ|QEMU_AIO_WRITE|QEMU_AIO_IOCTL|QEMU_AIO_FLUSH| \
-         QEMU_AIO_DISCARD|QEMU_AIO_WRITE_ZEROES)
+         QEMU_AIO_DISCARD)

 /* AIO flags */
 #define QEMU_AIO_MISALIGNED   0x1000
@@ -34,29 +33,19 @@
 /* linux-aio.c - Linux native implementation */
 #ifdef CONFIG_LINUX_AIO
 void *laio_init(void);
-void laio_cleanup(void *s);
-BlockAIOCB *laio_submit(BlockDriverState *bs, void *aio_ctx, int fd,
+BlockDriverAIOCB *laio_submit(BlockDriverState *bs, void *aio_ctx, int fd,
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockCompletionFunc *cb, void *opaque, int type);
-void laio_detach_aio_context(void *s, AioContext *old_context);
-void laio_attach_aio_context(void *s, AioContext *new_context);
-void laio_io_plug(BlockDriverState *bs, void *aio_ctx);
-void laio_io_unplug(BlockDriverState *bs, void *aio_ctx, bool unplug);
+        BlockDriverCompletionFunc *cb, void *opaque, int type);
 #endif

 #ifdef _WIN32
 typedef struct QEMUWin32AIOState QEMUWin32AIOState;
 QEMUWin32AIOState *win32_aio_init(void);
-void win32_aio_cleanup(QEMUWin32AIOState *aio);
 int win32_aio_attach(QEMUWin32AIOState *aio, HANDLE hfile);
-BlockAIOCB *win32_aio_submit(BlockDriverState *bs,
+BlockDriverAIOCB *win32_aio_submit(BlockDriverState *bs,
        QEMUWin32AIOState *aio, HANDLE hfile,
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockCompletionFunc *cb, void *opaque, int type);
-void win32_aio_detach_aio_context(QEMUWin32AIOState *aio,
-                                  AioContext *old_context);
-void win32_aio_attach_aio_context(QEMUWin32AIOState *aio,
-                                  AioContext *new_context);
+        BlockDriverCompletionFunc *cb, void *opaque, int type);
 #endif

 #endif /* QEMU_RAW_AIO_H */
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
--- a/block/raw-win32.c
+++ b/block/raw-win32.c
@@ -21,7 +21,6 @@
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */
-#include "qemu/osdep.h"
 #include "qemu-common.h"
 #include "qemu/timer.h"
 #include "block/block_int.h"
@@ -30,7 +29,6 @@
 #include "trace.h"
 #include "block/thread-pool.h"
 #include "qemu/iov.h"
-#include "qapi/qmp/qstring.h"
 #include <windows.h>
 #include <winioctl.h>

@@ -38,6 +36,8 @@
 #define FTYPE_CD     1
 #define FTYPE_HARDDISK 2

+static QEMUWin32AIOState *aio;
+
 typedef struct RawWin32AIOData {
    BlockDriverState *bs;
    HANDLE hfile;
@@ -103,7 +103,7 @@ static int aio_worker(void *arg)
    switch (aiocb->aio_type & QEMU_AIO_TYPE_MASK) {
    case QEMU_AIO_READ:
        count = handle_aiocb_rw(aiocb);
-        if (count < aiocb->aio_nbytes) {
+        if (count < aiocb->aio_nbytes && aiocb->bs->growable) {
            /* A short read means that we have reached EOF. Pad the buffer
             * with zeros for bytes after EOF. */
            iov_memset(aiocb->aio_iov, aiocb->aio_niov, count,
@@ -120,9 +120,9 @@ static int aio_worker(void *arg)
    case QEMU_AIO_WRITE:
        count = handle_aiocb_rw(aiocb);
        if (count == aiocb->aio_nbytes) {
-            ret = 0;
+            count = 0;
        } else {
-            ret = -EINVAL;
+            count = -EINVAL;
        }
        break;
    case QEMU_AIO_FLUSH:
@@ -136,15 +136,15 @@ static int aio_worker(void *arg)
        break;
    }

-    g_free(aiocb);
+    g_slice_free(RawWin32AIOData, aiocb);
    return ret;
 }

-static BlockAIOCB *paio_submit(BlockDriverState *bs, HANDLE hfile,
+static BlockDriverAIOCB *paio_submit(BlockDriverState *bs, HANDLE hfile,
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockCompletionFunc *cb, void *opaque, int type)
+        BlockDriverCompletionFunc *cb, void *opaque, int type)
 {
-    RawWin32AIOData *acb = g_new(RawWin32AIOData, 1);
+    RawWin32AIOData *acb = g_slice_new(RawWin32AIOData);
    ThreadPool *pool;

    acb->bs = bs;
@@ -202,54 +202,6 @@ static int set_sparse(int fd)
 				 NULL, 0, NULL, 0, &returned, NULL);
 }

-static void raw_detach_aio_context(BlockDriverState *bs)
-{
-    BDRVRawState *s = bs->opaque;
-
-    if (s->aio) {
-        win32_aio_detach_aio_context(s->aio, bdrv_get_aio_context(bs));
-    }
-}
-
-static void raw_attach_aio_context(BlockDriverState *bs,
-                                   AioContext *new_context)
-{
-    BDRVRawState *s = bs->opaque;
-
-    if (s->aio) {
-        win32_aio_attach_aio_context(s->aio, new_context);
-    }
-}
-
-static void raw_probe_alignment(BlockDriverState *bs)
-{
-    BDRVRawState *s = bs->opaque;
-    DWORD sectorsPerCluster, freeClusters, totalClusters, count;
-    DISK_GEOMETRY_EX dg;
-    BOOL status;
-
-    if (s->type == FTYPE_CD) {
-        bs->request_alignment = 2048;
-        return;
-    }
-    if (s->type == FTYPE_HARDDISK) {
-        status = DeviceIoControl(s->hfile, IOCTL_DISK_GET_DRIVE_GEOMETRY_EX,
-                                 NULL, 0, &dg, sizeof(dg), &count, NULL);
-        if (status != 0) {
-            bs->request_alignment = dg.Geometry.BytesPerSector;
-            return;
-        }
-        /* try GetDiskFreeSpace too */
-    }
-
-    if (s->drive_path[0]) {
-        GetDiskFreeSpace(s->drive_path, &sectorsPerCluster,
-                         &dg.Geometry.BytesPerSector,
-                         &freeClusters, &totalClusters);
-        bs->request_alignment = dg.Geometry.BytesPerSector;
-    }
-}
-
 static void raw_parse_flags(int flags, int *access_flags, DWORD *overlapped)
 {
    assert(access_flags != NULL);
@@ -270,17 +222,6 @@ static void raw_parse_flags(int flags, int *access_flags, DWORD *overlapped)
    }
 }

-static void raw_parse_filename(const char *filename, QDict *options,
-                               Error **errp)
-{
-    /* The filename does not have to be prefixed by the protocol name, since
-     * "file" is the default protocol; therefore, the return value of this
-     * function call can be ignored. */
-    strstart(filename, "file:", &filename);
-
-    qdict_put_obj(options, "filename", QOBJECT(qstring_from_str(filename)));
-}
-
 static QemuOptsList raw_runtime_opts = {
    .name = "raw",
    .head = QTAILQ_HEAD_INITIALIZER(raw_runtime_opts.head),
@@ -307,9 +248,9 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,

    s->type = FTYPE_FILE;

-    opts = qemu_opts_create(&raw_runtime_opts, NULL, 0, &error_abort);
+    opts = qemu_opts_create_nofail(&raw_runtime_opts);
    qemu_opts_absorb_qdict(opts, options, &local_err);
-    if (local_err) {
+    if (error_is_set(&local_err)) {
        error_propagate(errp, local_err);
        ret = -EINVAL;
        goto fail;
@@ -319,15 +260,13 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,

    raw_parse_flags(flags, &access_flags, &overlapped);

-    if (filename[0] && filename[1] == ':') {
-        snprintf(s->drive_path, sizeof(s->drive_path), "%c:\\", filename[0]);
-    } else if (filename[0] == '\\' && filename[1] == '\\') {
-        s->drive_path[0] = 0;
-    } else {
-        /* Relative path.  */
-        char buf[MAX_PATH];
-        GetCurrentDirectory(MAX_PATH, buf);
-        snprintf(s->drive_path, sizeof(s->drive_path), "%c:\\", buf[0]);
+    if ((flags & BDRV_O_NATIVE_AIO) && aio == NULL) {
+        aio = win32_aio_init();
+        if (aio == NULL) {
+            error_setg(errp, "Could not initialize AIO");
+            ret = -EINVAL;
+            goto fail;
+        }
    }

    s->hfile = CreateFile(filename, access_flags,
@@ -345,35 +284,24 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,
    }

    if (flags & BDRV_O_NATIVE_AIO) {
-        s->aio = win32_aio_init();
-        if (s->aio == NULL) {
-            CloseHandle(s->hfile);
-            error_setg(errp, "Could not initialize AIO");
-            ret = -EINVAL;
-            goto fail;
-        }
-
-        ret = win32_aio_attach(s->aio, s->hfile);
+        ret = win32_aio_attach(aio, s->hfile);
        if (ret < 0) {
-            win32_aio_cleanup(s->aio);
            CloseHandle(s->hfile);
            error_setg_errno(errp, -ret, "Could not enable AIO");
            goto fail;
        }
-
-        win32_aio_attach_aio_context(s->aio, bdrv_get_aio_context(bs));
+        s->aio = aio;
    }

-    raw_probe_alignment(bs);
    ret = 0;
 fail:
    qemu_opts_del(opts);
    return ret;
 }

-static BlockAIOCB *raw_aio_readv(BlockDriverState *bs,
+static BlockDriverAIOCB *raw_aio_readv(BlockDriverState *bs,
                         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-                         BlockCompletionFunc *cb, void *opaque)
+                         BlockDriverCompletionFunc *cb, void *opaque)
 {
    BDRVRawState *s = bs->opaque;
    if (s->aio) {
@@ -385,9 +313,9 @@ static BlockAIOCB *raw_aio_readv(BlockDriverState *bs,
    }
 }

-static BlockAIOCB *raw_aio_writev(BlockDriverState *bs,
+static BlockDriverAIOCB *raw_aio_writev(BlockDriverState *bs,
                          int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-                          BlockCompletionFunc *cb, void *opaque)
+                          BlockDriverCompletionFunc *cb, void *opaque)
 {
    BDRVRawState *s = bs->opaque;
    if (s->aio) {
@@ -399,8 +327,8 @@ static BlockAIOCB *raw_aio_writev(BlockDriverState *bs,
    }
 }

-static BlockAIOCB *raw_aio_flush(BlockDriverState *bs,
-                         BlockCompletionFunc *cb, void *opaque)
+static BlockDriverAIOCB *raw_aio_flush(BlockDriverState *bs,
+                         BlockDriverCompletionFunc *cb, void *opaque)
 {
    BDRVRawState *s = bs->opaque;
    return paio_submit(bs, s->hfile, 0, NULL, 0, cb, opaque, QEMU_AIO_FLUSH);
@@ -409,17 +337,7 @@ static BlockAIOCB *raw_aio_flush(BlockDriverState *bs,
 static void raw_close(BlockDriverState *bs)
 {
    BDRVRawState *s = bs->opaque;
-
-    if (s->aio) {
-        win32_aio_detach_aio_context(s->aio, bdrv_get_aio_context(bs));
-        win32_aio_cleanup(s->aio);
-        s->aio = NULL;
-    }
-
    CloseHandle(s->hfile);
-    if (bs->open_flags & BDRV_O_TEMPORARY) {
-        unlink(bs->filename);
-    }
 }

 static int raw_truncate(BlockDriverState *bs, int64_t offset)
@@ -505,16 +423,19 @@ static int64_t raw_get_allocated_file_size(BlockDriverState *bs)
    return st.st_size;
 }

-static int raw_create(const char *filename, QemuOpts *opts, Error **errp)
+static int raw_create(const char *filename, QEMUOptionParameter *options,
+                      Error **errp)
 {
    int fd;
    int64_t total_size = 0;

-    strstart(filename, "file:", &filename);
-
    /* Read out options */
-    total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
-                          BDRV_SECTOR_SIZE);
+    while (options && options->name) {
+        if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
+            total_size = options->value.n / 512;
+        }
+        options++;
+    }

    fd = qemu_open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY,
                   0644);
@@ -523,34 +444,28 @@ static int raw_create(const char *filename, QemuOpts *opts, Error **errp)
        return -EIO;
    }
    set_sparse(fd);
-    ftruncate(fd, total_size);
+    ftruncate(fd, total_size * 512);
    qemu_close(fd);
    return 0;
 }

-
-static QemuOptsList raw_create_opts = {
-    .name = "raw-create-opts",
-    .head = QTAILQ_HEAD_INITIALIZER(raw_create_opts.head),
-    .desc = {
-        {
-            .name = BLOCK_OPT_SIZE,
-            .type = QEMU_OPT_SIZE,
-            .help = "Virtual disk size"
-        },
-        { /* end of list */ }
-    }
+static QEMUOptionParameter raw_create_options[] = {
+    {
+        .name = BLOCK_OPT_SIZE,
+        .type = OPT_SIZE,
+        .help = "Virtual disk size"
+    },
+    { NULL }
 };

-BlockDriver bdrv_file = {
+static BlockDriver bdrv_file = {
    .format_name	= "file",
    .protocol_name	= "file",
    .instance_size	= sizeof(BDRVRawState),
    .bdrv_needs_filename = true,
-    .bdrv_parse_filename = raw_parse_filename,
-    .bdrv_file_open     = raw_open,
-    .bdrv_close         = raw_close,
-    .bdrv_create        = raw_create,
+    .bdrv_file_open	= raw_open,
+    .bdrv_close		= raw_close,
+    .bdrv_create	= raw_create,
    .bdrv_has_zero_init = bdrv_has_zero_init_1,

    .bdrv_aio_readv     = raw_aio_readv,
@@ -562,7 +477,7 @@ BlockDriver bdrv_file = {
    .bdrv_get_allocated_file_size
                        = raw_get_allocated_file_size,

-    .create_opts        = &raw_create_opts,
+    .create_options = raw_create_options,
 };

 /***********************************************/
@@ -623,15 +538,6 @@ static int hdev_probe_device(const char *filename)
    return 0;
 }

-static void hdev_parse_filename(const char *filename, QDict *options,
-                                Error **errp)
-{
-    /* The prefix is optional, just as for "file". */
-    strstart(filename, "host_device:", &filename);
-
-    qdict_put_obj(options, "filename", QOBJECT(qstring_from_str(filename)));
-}
-
 static int hdev_open(BlockDriverState *bs, QDict *options, int flags,
                     Error **errp)
 {
@@ -644,10 +550,9 @@ static int hdev_open(BlockDriverState *bs, QDict *options, int flags,
    Error *local_err = NULL;
    const char *filename;

-    QemuOpts *opts = qemu_opts_create(&raw_runtime_opts, NULL, 0,
-                                      &error_abort);
+    QemuOpts *opts = qemu_opts_create_nofail(&raw_runtime_opts);
    qemu_opts_absorb_qdict(opts, options, &local_err);
-    if (local_err) {
+    if (error_is_set(&local_err)) {
        error_propagate(errp, local_err);
        ret = -EINVAL;
        goto done;
@@ -702,7 +607,6 @@ static BlockDriver bdrv_host_device = {
    .protocol_name	= "host_device",
    .instance_size	= sizeof(BDRVRawState),
    .bdrv_needs_filename = true,
-    .bdrv_parse_filename = hdev_parse_filename,
    .bdrv_probe_device	= hdev_probe_device,
    .bdrv_file_open	= hdev_open,
    .bdrv_close		= raw_close,
@@ -711,9 +615,6 @@ static BlockDriver bdrv_host_device = {
    .bdrv_aio_writev    = raw_aio_writev,
    .bdrv_aio_flush     = raw_aio_flush,

-    .bdrv_detach_aio_context = raw_detach_aio_context,
-    .bdrv_attach_aio_context = raw_attach_aio_context,
-
    .bdrv_getlength      = raw_getlength,
    .has_variable_length = true,

--- a/block/raw_bsd.c
+++ b/block/raw_bsd.c
@@ -26,21 +26,16 @@
 * IN THE SOFTWARE.
 */

-#include "qemu/osdep.h"
 #include "block/block_int.h"
 #include "qemu/option.h"

-static QemuOptsList raw_create_opts = {
-    .name = "raw-create-opts",
-    .head = QTAILQ_HEAD_INITIALIZER(raw_create_opts.head),
-    .desc = {
-        {
-            .name = BLOCK_OPT_SIZE,
-            .type = QEMU_OPT_SIZE,
-            .help = "Virtual disk size"
-        },
-        { /* end of list */ }
-    }
+static QEMUOptionParameter raw_create_options[] = {
+    {
+        .name = BLOCK_OPT_SIZE,
+        .type = OPT_SIZE,
+        .help = "Virtual disk size"
+    },
+    { 0 }
 };

 static int raw_reopen_prepare(BDRVReopenState *reopen_state,
@@ -53,145 +48,98 @@ static int coroutine_fn raw_co_readv(BlockDriverState *bs, int64_t sector_num,
                                     int nb_sectors, QEMUIOVector *qiov)
 {
    BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
-    return bdrv_co_readv(bs->file->bs, sector_num, nb_sectors, qiov);
+    return bdrv_co_readv(bs->file, sector_num, nb_sectors, qiov);
 }

 static int coroutine_fn raw_co_writev(BlockDriverState *bs, int64_t sector_num,
                                      int nb_sectors, QEMUIOVector *qiov)
 {
-    void *buf = NULL;
-    BlockDriver *drv;
-    QEMUIOVector local_qiov;
-    int ret;
-
-    if (bs->probed && sector_num == 0) {
-        /* As long as these conditions are true, we can't get partial writes to
-         * the probe buffer and can just directly check the request. */
-        QEMU_BUILD_BUG_ON(BLOCK_PROBE_BUF_SIZE != 512);
-        QEMU_BUILD_BUG_ON(BDRV_SECTOR_SIZE != 512);
-
-        if (nb_sectors == 0) {
-            /* qemu_iovec_to_buf() would fail, but we want to return success
-             * instead of -EINVAL in this case. */
-            return 0;
-        }
-
-        buf = qemu_try_blockalign(bs->file->bs, 512);
-        if (!buf) {
-            ret = -ENOMEM;
-            goto fail;
-        }
-
-        ret = qemu_iovec_to_buf(qiov, 0, buf, 512);
-        if (ret != 512) {
-            ret = -EINVAL;
-            goto fail;
-        }
-
-        drv = bdrv_probe_all(buf, 512, NULL);
-        if (drv != bs->drv) {
-            ret = -EPERM;
-            goto fail;
-        }
-
-        /* Use the checked buffer, a malicious guest might be overwriting its
-         * original buffer in the background. */
-        qemu_iovec_init(&local_qiov, qiov->niov + 1);
-        qemu_iovec_add(&local_qiov, buf, 512);
-        qemu_iovec_concat(&local_qiov, qiov, 512, qiov->size - 512);
-        qiov = &local_qiov;
-    }
-
    BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
-    ret = bdrv_co_writev(bs->file->bs, sector_num, nb_sectors, qiov);
-
-fail:
-    if (qiov == &local_qiov) {
-        qemu_iovec_destroy(&local_qiov);
-    }
-    qemu_vfree(buf);
-    return ret;
+    return bdrv_co_writev(bs->file, sector_num, nb_sectors, qiov);
 }

 static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs,
                                            int64_t sector_num,
-                                            int nb_sectors, int *pnum,
-                                            BlockDriverState **file)
+                                            int nb_sectors, int *pnum)
 {
    *pnum = nb_sectors;
-    *file = bs->file->bs;
    return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID | BDRV_BLOCK_DATA |
           (sector_num << BDRV_SECTOR_BITS);
 }

 static int coroutine_fn raw_co_write_zeroes(BlockDriverState *bs,
-                                            int64_t sector_num, int nb_sectors,
-                                            BdrvRequestFlags flags)
+                                            int64_t sector_num, int nb_sectors)
 {
-    return bdrv_co_write_zeroes(bs->file->bs, sector_num, nb_sectors, flags);
+    return bdrv_co_write_zeroes(bs->file, sector_num, nb_sectors);
 }

 static int coroutine_fn raw_co_discard(BlockDriverState *bs,
                                       int64_t sector_num, int nb_sectors)
 {
-    return bdrv_co_discard(bs->file->bs, sector_num, nb_sectors);
+    return bdrv_co_discard(bs->file, sector_num, nb_sectors);
 }

 static int64_t raw_getlength(BlockDriverState *bs)
 {
-    return bdrv_getlength(bs->file->bs);
+    return bdrv_getlength(bs->file);
 }

 static int raw_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
 {
-    return bdrv_get_info(bs->file->bs, bdi);
-}
-
-static void raw_refresh_limits(BlockDriverState *bs, Error **errp)
-{
-    bs->bl = bs->file->bs->bl;
+    return bdrv_get_info(bs->file, bdi);
 }

 static int raw_truncate(BlockDriverState *bs, int64_t offset)
 {
-    return bdrv_truncate(bs->file->bs, offset);
+    return bdrv_truncate(bs->file, offset);
+}
+
+static int raw_is_inserted(BlockDriverState *bs)
+{
+    return bdrv_is_inserted(bs->file);
 }

 static int raw_media_changed(BlockDriverState *bs)
 {
-    return bdrv_media_changed(bs->file->bs);
+    return bdrv_media_changed(bs->file);
 }

 static void raw_eject(BlockDriverState *bs, bool eject_flag)
 {
-    bdrv_eject(bs->file->bs, eject_flag);
+    bdrv_eject(bs->file, eject_flag);
 }

 static void raw_lock_medium(BlockDriverState *bs, bool locked)
 {
-    bdrv_lock_medium(bs->file->bs, locked);
+    bdrv_lock_medium(bs->file, locked);
 }

-static BlockAIOCB *raw_aio_ioctl(BlockDriverState *bs,
-                                 unsigned long int req, void *buf,
-                                 BlockCompletionFunc *cb,
-                                 void *opaque)
+static int raw_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
 {
-    return bdrv_aio_ioctl(bs->file->bs, req, buf, cb, opaque);
+    return bdrv_ioctl(bs->file, req, buf);
+}
+
+static BlockDriverAIOCB *raw_aio_ioctl(BlockDriverState *bs,
+                                       unsigned long int req, void *buf,
+                                       BlockDriverCompletionFunc *cb,
+                                       void *opaque)
+{
+    return bdrv_aio_ioctl(bs->file, req, buf, cb, opaque);
 }

 static int raw_has_zero_init(BlockDriverState *bs)
 {
-    return bdrv_has_zero_init(bs->file->bs);
+    return bdrv_has_zero_init(bs->file);
 }

-static int raw_create(const char *filename, QemuOpts *opts, Error **errp)
+static int raw_create(const char *filename, QEMUOptionParameter *options,
+                      Error **errp)
 {
    Error *local_err = NULL;
    int ret;

-    ret = bdrv_create_file(filename, opts, &local_err);
-    if (local_err) {
+    ret = bdrv_create_file(filename, options, &local_err);
+    if (error_is_set(&local_err)) {
        error_propagate(errp, local_err);
    }
    return ret;
@@ -200,19 +148,7 @@ static int raw_create(const char *filename, QemuOpts *opts, Error **errp)
 static int raw_open(BlockDriverState *bs, QDict *options, int flags,
                    Error **errp)
 {
-    bs->sg = bs->file->bs->sg;
-
-    if (bs->probed && !bdrv_is_read_only(bs)) {
-        fprintf(stderr,
-                "WARNING: Image format was not specified for '%s' and probing "
-                "guessed raw.\n"
-                "         Automatically detecting the format is dangerous for "
-                "raw images, write operations on block 0 will be restricted.\n"
-                "         Specify the 'raw' format explicitly to remove the "
-                "restrictions.\n",
-                bs->file->bs->filename);
-    }
-
+    bs->sg = bs->file->sg;
    return 0;
 }

@@ -228,17 +164,7 @@ static int raw_probe(const uint8_t *buf, int buf_size, const char *filename)
    return 1;
 }

-static int raw_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
-{
-    return bdrv_probe_blocksizes(bs->file->bs, bsz);
-}
-
-static int raw_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
-{
-    return bdrv_probe_geometry(bs->file->bs, geo);
-}
-
-BlockDriver bdrv_raw = {
+static BlockDriver bdrv_raw = {
    .format_name          = "raw",
    .bdrv_probe           = &raw_probe,
    .bdrv_reopen_prepare  = &raw_reopen_prepare,
@@ -254,14 +180,13 @@ BlockDriver bdrv_raw = {
    .bdrv_getlength       = &raw_getlength,
    .has_variable_length  = true,
    .bdrv_get_info        = &raw_get_info,
-    .bdrv_refresh_limits  = &raw_refresh_limits,
-    .bdrv_probe_blocksizes = &raw_probe_blocksizes,
-    .bdrv_probe_geometry  = &raw_probe_geometry,
+    .bdrv_is_inserted     = &raw_is_inserted,
    .bdrv_media_changed   = &raw_media_changed,
    .bdrv_eject           = &raw_eject,
    .bdrv_lock_medium     = &raw_lock_medium,
+    .bdrv_ioctl           = &raw_ioctl,
    .bdrv_aio_ioctl       = &raw_aio_ioctl,
-    .create_opts          = &raw_create_opts,
+    .create_options       = &raw_create_options[0],
    .bdrv_has_zero_init   = &raw_has_zero_init
 };

--- a/block/rbd.c
+++ b/block/rbd.c
@@ -11,12 +11,11 @@
 * GNU GPL, version 2 or (at your option) any later version.
 */

-#include "qemu/osdep.h"
+#include <inttypes.h>

 #include "qemu-common.h"
 #include "qemu/error-report.h"
 #include "block/block_int.h"
-#include "crypto/secret.h"

 #include <rbd/librbd.h>

@@ -69,36 +68,49 @@ typedef enum {
 } RBDAIOCmd;

 typedef struct RBDAIOCB {
-    BlockAIOCB common;
+    BlockDriverAIOCB common;
    QEMUBH *bh;
    int64_t ret;
    QEMUIOVector *qiov;
    char *bounce;
    RBDAIOCmd cmd;
+    int64_t sector_num;
    int error;
    struct BDRVRBDState *s;
+    int cancelled;
+    int status;
 } RBDAIOCB;

 typedef struct RADOSCB {
+    int rcbid;
    RBDAIOCB *acb;
    struct BDRVRBDState *s;
+    int done;
    int64_t size;
    char *buf;
    int64_t ret;
 } RADOSCB;

+#define RBD_FD_READ 0
+#define RBD_FD_WRITE 1
+
 typedef struct BDRVRBDState {
+    int fds[2];
    rados_t cluster;
    rados_ioctx_t io_ctx;
    rbd_image_t image;
    char name[RBD_MAX_IMAGE_NAME_SIZE];
    char *snap;
+    int event_reader_pos;
+    RADOSCB *event_rcb;
 } BDRVRBDState;

+static void rbd_aio_bh_cb(void *opaque);
+
 static int qemu_rbd_next_tok(char *dst, int dst_len,
                             char *src, char delim,
                             const char *name,
-                             char **p, Error **errp)
+                             char **p)
 {
    int l;
    char *end;
@@ -121,10 +133,10 @@ static int qemu_rbd_next_tok(char *dst, int dst_len,
    }
    l = strlen(src);
    if (l >= dst_len) {
-        error_setg(errp, "%s too long", name);
+        error_report("%s too long", name);
        return -EINVAL;
    } else if (l == 0) {
-        error_setg(errp, "%s too short", name);
+        error_report("%s too short", name);
        return -EINVAL;
    }

@@ -150,15 +162,13 @@ static int qemu_rbd_parsename(const char *filename,
                              char *pool, int pool_len,
                              char *snap, int snap_len,
                              char *name, int name_len,
-                              char *conf, int conf_len,
-                              Error **errp)
+                              char *conf, int conf_len)
 {
    const char *start;
    char *p, *buf;
    int ret;

    if (!strstart(filename, "rbd:", &start)) {
-        error_setg(errp, "File name must start with 'rbd:'");
        return -EINVAL;
    }

@@ -167,8 +177,7 @@ static int qemu_rbd_parsename(const char *filename,
    *snap = '\0';
    *conf = '\0';

-    ret = qemu_rbd_next_tok(pool, pool_len, p,
-                            '/', "pool name", &p, errp);
+    ret = qemu_rbd_next_tok(pool, pool_len, p, '/', "pool name", &p);
    if (ret < 0 || !p) {
        ret = -EINVAL;
        goto done;
@@ -176,25 +185,21 @@ static int qemu_rbd_parsename(const char *filename,
    qemu_rbd_unescape(pool);

    if (strchr(p, '@')) {
-        ret = qemu_rbd_next_tok(name, name_len, p,
-                                '@', "object name", &p, errp);
+        ret = qemu_rbd_next_tok(name, name_len, p, '@', "object name", &p);
        if (ret < 0) {
            goto done;
        }
-        ret = qemu_rbd_next_tok(snap, snap_len, p,
-                                ':', "snap name", &p, errp);
+        ret = qemu_rbd_next_tok(snap, snap_len, p, ':', "snap name", &p);
        qemu_rbd_unescape(snap);
    } else {
-        ret = qemu_rbd_next_tok(name, name_len, p,
-                                ':', "object name", &p, errp);
+        ret = qemu_rbd_next_tok(name, name_len, p, ':', "object name", &p);
    }
    qemu_rbd_unescape(name);
    if (ret < 0 || !p) {
        goto done;
    }

-    ret = qemu_rbd_next_tok(conf, conf_len, p,
-                            '\0', "configuration", &p, errp);
+    ret = qemu_rbd_next_tok(conf, conf_len, p, '\0', "configuration", &p);

 done:
    g_free(buf);
@@ -229,30 +234,7 @@ static char *qemu_rbd_parse_clientname(const char *conf, char *clientname)
    return NULL;
 }

-
-static int qemu_rbd_set_auth(rados_t cluster, const char *secretid,
-                             Error **errp)
-{
-    if (secretid == 0) {
-        return 0;
-    }
-
-    gchar *secret = qcrypto_secret_lookup_as_base64(secretid,
-                                                    errp);
-    if (!secret) {
-        return -1;
-    }
-
-    rados_conf_set(cluster, "key", secret);
-    g_free(secret);
-
-    return 0;
-}
-
-
-static int qemu_rbd_set_conf(rados_t cluster, const char *conf,
-                             bool only_read_conf_file,
-                             Error **errp)
+static int qemu_rbd_set_conf(rados_t cluster, const char *conf)
 {
    char *p, *buf;
    char name[RBD_MAX_CONF_NAME_SIZE];
@@ -264,41 +246,37 @@ static int qemu_rbd_set_conf(rados_t cluster, const char *conf,

    while (p) {
        ret = qemu_rbd_next_tok(name, sizeof(name), p,
-                                '=', "conf option name", &p, errp);
+                                '=', "conf option name", &p);
        if (ret < 0) {
            break;
        }
        qemu_rbd_unescape(name);

        if (!p) {
-            error_setg(errp, "conf option %s has no value", name);
+            error_report("conf option %s has no value", name);
            ret = -EINVAL;
            break;
        }

        ret = qemu_rbd_next_tok(value, sizeof(value), p,
-                                ':', "conf option value", &p, errp);
+                                ':', "conf option value", &p);
        if (ret < 0) {
            break;
        }
        qemu_rbd_unescape(value);

        if (strcmp(name, "conf") == 0) {
-            /* read the conf file alone, so it doesn't override more
-               specific settings for a particular device */
-            if (only_read_conf_file) {
-                ret = rados_conf_read_file(cluster, value);
-                if (ret < 0) {
-                    error_setg(errp, "error reading conf file %s", value);
-                    break;
-                }
+            ret = rados_conf_read_file(cluster, value);
+            if (ret < 0) {
+                error_report("error reading conf file %s", value);
+                break;
            }
        } else if (strcmp(name, "id") == 0) {
            /* ignore, this is parsed by qemu_rbd_parse_clientname() */
-        } else if (!only_read_conf_file) {
+        } else {
            ret = rados_conf_set(cluster, name, value);
            if (ret < 0) {
-                error_setg(errp, "invalid conf option %s", name);
+                error_report("invalid conf option %s", name);
                ret = -EINVAL;
                break;
            }
@@ -309,9 +287,9 @@ static int qemu_rbd_set_conf(rados_t cluster, const char *conf,
    return ret;
 }

-static int qemu_rbd_create(const char *filename, QemuOpts *opts, Error **errp)
+static int qemu_rbd_create(const char *filename, QEMUOptionParameter *options,
+                           Error **errp)
 {
-    Error *local_err = NULL;
    int64_t bytes = 0;
    int64_t objsize;
    int obj_order = 0;
@@ -321,73 +299,64 @@ static int qemu_rbd_create(const char *filename, QemuOpts *opts, Error **errp)
    char conf[RBD_MAX_CONF_SIZE];
    char clientname_buf[RBD_MAX_CONF_SIZE];
    char *clientname;
-    const char *secretid;
    rados_t cluster;
    rados_ioctx_t io_ctx;
    int ret;

-    secretid = qemu_opt_get(opts, "password-secret");
-
    if (qemu_rbd_parsename(filename, pool, sizeof(pool),
                           snap_buf, sizeof(snap_buf),
                           name, sizeof(name),
-                           conf, sizeof(conf), &local_err) < 0) {
-        error_propagate(errp, local_err);
+                           conf, sizeof(conf)) < 0) {
        return -EINVAL;
    }

    /* Read out options */
-    bytes = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
-                     BDRV_SECTOR_SIZE);
-    objsize = qemu_opt_get_size_del(opts, BLOCK_OPT_CLUSTER_SIZE, 0);
-    if (objsize) {
-        if ((objsize - 1) & objsize) {    /* not a power of 2? */
-            error_setg(errp, "obj size needs to be power of 2");
-            return -EINVAL;
+    while (options && options->name) {
+        if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
+            bytes = options->value.n;
+        } else if (!strcmp(options->name, BLOCK_OPT_CLUSTER_SIZE)) {
+            if (options->value.n) {
+                objsize = options->value.n;
+                if ((objsize - 1) & objsize) {    /* not a power of 2? */
+                    error_report("obj size needs to be power of 2");
+                    return -EINVAL;
+                }
+                if (objsize < 4096) {
+                    error_report("obj size too small");
+                    return -EINVAL;
+                }
+                obj_order = ffs(objsize) - 1;
+            }
        }
-        if (objsize < 4096) {
-            error_setg(errp, "obj size too small");
-            return -EINVAL;
-        }
-        obj_order = ctz32(objsize);
+        options++;
    }

    clientname = qemu_rbd_parse_clientname(conf, clientname_buf);
    if (rados_create(&cluster, clientname) < 0) {
-        error_setg(errp, "error initializing");
+        error_report("error initializing");
        return -EIO;
    }

    if (strstr(conf, "conf=") == NULL) {
        /* try default location, but ignore failure */
        rados_conf_read_file(cluster, NULL);
-    } else if (conf[0] != '\0' &&
-               qemu_rbd_set_conf(cluster, conf, true, &local_err) < 0) {
-        rados_shutdown(cluster);
-        error_propagate(errp, local_err);
-        return -EIO;
    }

    if (conf[0] != '\0' &&
-        qemu_rbd_set_conf(cluster, conf, false, &local_err) < 0) {
-        rados_shutdown(cluster);
-        error_propagate(errp, local_err);
-        return -EIO;
-    }
-
-    if (qemu_rbd_set_auth(cluster, secretid, errp) < 0) {
+        qemu_rbd_set_conf(cluster, conf) < 0) {
+        error_report("error setting config options");
        rados_shutdown(cluster);
        return -EIO;
    }

    if (rados_connect(cluster) < 0) {
-        error_setg(errp, "error connecting");
+        error_report("error connecting");
        rados_shutdown(cluster);
        return -EIO;
    }

    if (rados_ioctx_create(cluster, pool, &io_ctx) < 0) {
-        error_setg(errp, "error opening pool %s", pool);
+        error_report("error opening pool %s", pool);
        rados_shutdown(cluster);
        return -EIO;
    }
@@ -400,8 +369,9 @@ static int qemu_rbd_create(const char *filename, QemuOpts *opts, Error **errp)
 }

 /*
- * This aio completion is being called from rbd_finish_bh() and runs in qemu
- * BH context.
+ * This aio completion is being called from qemu_rbd_aio_event_reader()
+ * and runs in qemu context. It schedules a bh, but just in case the aio
+ * was not cancelled before.
 */
 static void qemu_rbd_complete_aio(RADOSCB *rcb)
 {
@@ -431,16 +401,36 @@ static void qemu_rbd_complete_aio(RADOSCB *rcb)
            acb->ret = r;
        }
    }
-
+    /* Note that acb->bh can be NULL in case where the aio was cancelled */
+    acb->bh = qemu_bh_new(rbd_aio_bh_cb, acb);
+    qemu_bh_schedule(acb->bh);
    g_free(rcb);
+}

-    if (acb->cmd == RBD_AIO_READ) {
-        qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
-    }
-    qemu_vfree(acb->bounce);
-    acb->common.cb(acb->common.opaque, (acb->ret > 0 ? 0 : acb->ret));
+/*
+ * aio fd read handler. It runs in the qemu context and calls the
+ * completion handling of completed rados aio operations.
+ */
+static void qemu_rbd_aio_event_reader(void *opaque)
+{
+    BDRVRBDState *s = opaque;

-    qemu_aio_unref(acb);
+    ssize_t ret;
+
+    do {
+        char *p = (char *)&s->event_rcb;
+
+        /* now read the rcb pointer that was sent from a non qemu thread */
+        ret = read(s->fds[RBD_FD_READ], p + s->event_reader_pos,
+                   sizeof(s->event_rcb) - s->event_reader_pos);
+        if (ret > 0) {
+            s->event_reader_pos += ret;
+            if (s->event_reader_pos == sizeof(s->event_rcb)) {
+                s->event_reader_pos = 0;
+                qemu_rbd_complete_aio(s->event_rcb);
+            }
+        }
+    } while (ret < 0 && errno == EINTR);
 }

 /* TODO Convert to fine grained options */
@@ -453,11 +443,6 @@ static QemuOptsList runtime_opts = {
            .type = QEMU_OPT_STRING,
            .help = "Specification of the rbd image",
        },
-        {
-            .name = "password-secret",
-            .type = QEMU_OPT_STRING,
-            .help = "ID of secret providing the password",
-        },
        { /* end of list */ }
    },
 };
@@ -471,27 +456,26 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
    char conf[RBD_MAX_CONF_SIZE];
    char clientname_buf[RBD_MAX_CONF_SIZE];
    char *clientname;
-    const char *secretid;
    QemuOpts *opts;
    Error *local_err = NULL;
    const char *filename;
    int r;

-    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
+    opts = qemu_opts_create_nofail(&runtime_opts);
    qemu_opts_absorb_qdict(opts, options, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
+    if (error_is_set(&local_err)) {
+        qerror_report_err(local_err);
+        error_free(local_err);
        qemu_opts_del(opts);
        return -EINVAL;
    }

    filename = qemu_opt_get(opts, "filename");
-    secretid = qemu_opt_get(opts, "password-secret");

    if (qemu_rbd_parsename(filename, pool, sizeof(pool),
                           snap_buf, sizeof(snap_buf),
                           s->name, sizeof(s->name),
-                           conf, sizeof(conf), errp) < 0) {
+                           conf, sizeof(conf)) < 0) {
        r = -EINVAL;
        goto failed_opts;
    }
@@ -499,7 +483,7 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
    clientname = qemu_rbd_parse_clientname(conf, clientname_buf);
    r = rados_create(&s->cluster, clientname);
    if (r < 0) {
-        error_setg(errp, "error initializing");
+        error_report("error initializing");
        goto failed_opts;
    }

@@ -508,28 +492,6 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
        s->snap = g_strdup(snap_buf);
    }

-    if (strstr(conf, "conf=") == NULL) {
-        /* try default location, but ignore failure */
-        rados_conf_read_file(s->cluster, NULL);
-    } else if (conf[0] != '\0') {
-        r = qemu_rbd_set_conf(s->cluster, conf, true, errp);
-        if (r < 0) {
-            goto failed_shutdown;
-        }
-    }
-
-    if (conf[0] != '\0') {
-        r = qemu_rbd_set_conf(s->cluster, conf, false, errp);
-        if (r < 0) {
-            goto failed_shutdown;
-        }
-    }
-
-    if (qemu_rbd_set_auth(s->cluster, secretid, errp) < 0) {
-        r = -EIO;
-        goto failed_shutdown;
-    }
-
    /*
     * Fallback to more conservative semantics if setting cache
     * options fails. Ignore errors from setting rbd_cache because the
@@ -543,29 +505,56 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
        rados_conf_set(s->cluster, "rbd_cache", "true");
    }

+    if (strstr(conf, "conf=") == NULL) {
+        /* try default location, but ignore failure */
+        rados_conf_read_file(s->cluster, NULL);
+    }
+
+    if (conf[0] != '\0') {
+        r = qemu_rbd_set_conf(s->cluster, conf);
+        if (r < 0) {
+            error_report("error setting config options");
+            goto failed_shutdown;
+        }
+    }
+
    r = rados_connect(s->cluster);
    if (r < 0) {
-        error_setg(errp, "error connecting");
+        error_report("error connecting");
        goto failed_shutdown;
    }

    r = rados_ioctx_create(s->cluster, pool, &s->io_ctx);
    if (r < 0) {
-        error_setg(errp, "error opening pool %s", pool);
+        error_report("error opening pool %s", pool);
        goto failed_shutdown;
    }

    r = rbd_open(s->io_ctx, s->name, &s->image, s->snap);
    if (r < 0) {
-        error_setg(errp, "error reading header from %s", s->name);
+        error_report("error reading header from %s", s->name);
        goto failed_open;
    }

    bs->read_only = (s->snap != NULL);

+    s->event_reader_pos = 0;
+    r = qemu_pipe(s->fds);
+    if (r < 0) {
+        error_report("error opening eventfd");
+        goto failed;
+    }
+    fcntl(s->fds[0], F_SETFL, O_NONBLOCK);
+    fcntl(s->fds[1], F_SETFL, O_NONBLOCK);
+    qemu_aio_set_fd_handler(s->fds[RBD_FD_READ], qemu_rbd_aio_event_reader,
+                            NULL, s);
+
+
    qemu_opts_del(opts);
    return 0;

+failed:
+    rbd_close(s->image);
 failed_open:
    rados_ioctx_destroy(s->io_ctx);
 failed_shutdown:
@@ -580,21 +569,65 @@ static void qemu_rbd_close(BlockDriverState *bs)
 {
    BDRVRBDState *s = bs->opaque;

+    close(s->fds[0]);
+    close(s->fds[1]);
+    qemu_aio_set_fd_handler(s->fds[RBD_FD_READ], NULL, NULL, NULL);
+
    rbd_close(s->image);
    rados_ioctx_destroy(s->io_ctx);
    g_free(s->snap);
    rados_shutdown(s->cluster);
 }

+/*
+ * Cancel aio. Since we don't reference acb in a non qemu threads,
+ * it is safe to access it here.
+ */
+static void qemu_rbd_aio_cancel(BlockDriverAIOCB *blockacb)
+{
+    RBDAIOCB *acb = (RBDAIOCB *) blockacb;
+    acb->cancelled = 1;
+
+    while (acb->status == -EINPROGRESS) {
+        qemu_aio_wait();
+    }
+
+    qemu_aio_release(acb);
+}
+
 static const AIOCBInfo rbd_aiocb_info = {
    .aiocb_size = sizeof(RBDAIOCB),
+    .cancel = qemu_rbd_aio_cancel,
 };

-static void rbd_finish_bh(void *opaque)
+static int qemu_rbd_send_pipe(BDRVRBDState *s, RADOSCB *rcb)
 {
-    RADOSCB *rcb = opaque;
-    qemu_bh_delete(rcb->acb->bh);
-    qemu_rbd_complete_aio(rcb);
+    int ret = 0;
+    while (1) {
+        fd_set wfd;
+        int fd = s->fds[RBD_FD_WRITE];
+
+        /* send the op pointer to the qemu thread that is responsible
+           for the aio/op completion. Must do it in a qemu thread context */
+        ret = write(fd, (void *)&rcb, sizeof(rcb));
+        if (ret >= 0) {
+            break;
+        }
+        if (errno == EINTR) {
+            continue;
+        }
+        if (errno != EAGAIN) {
+            break;
+        }
+
+        FD_ZERO(&wfd);
+        FD_SET(fd, &wfd);
+        do {
+            ret = select(fd + 1, NULL, &wfd, NULL, NULL);
+        } while (ret < 0 && errno == EINTR);
+    }
+
+    return ret;
 }

 /*
@@ -602,19 +635,40 @@ static void rbd_finish_bh(void *opaque)
 *
 * Note: this function is being called from a non qemu thread so
 * we need to be careful about what we do here. Generally we only
- * schedule a BH, and do the rest of the io completion handling
- * from rbd_finish_bh() which runs in a qemu context.
+ * write to the block notification pipe, and do the rest of the
+ * io completion handling from qemu_rbd_aio_event_reader() which
+ * runs in a qemu context.
 */
 static void rbd_finish_aiocb(rbd_completion_t c, RADOSCB *rcb)
 {
-    RBDAIOCB *acb = rcb->acb;
-
+    int ret;
    rcb->ret = rbd_aio_get_return_value(c);
    rbd_aio_release(c);
+    ret = qemu_rbd_send_pipe(rcb->s, rcb);
+    if (ret < 0) {
+        error_report("failed writing to acb->s->fds");
+        g_free(rcb);
+    }
+}

-    acb->bh = aio_bh_new(bdrv_get_aio_context(acb->common.bs),
-                         rbd_finish_bh, rcb);
-    qemu_bh_schedule(acb->bh);
+/* Callback when all queued rbd_aio requests are complete */
+
+static void rbd_aio_bh_cb(void *opaque)
+{
+    RBDAIOCB *acb = opaque;
+
+    if (acb->cmd == RBD_AIO_READ) {
+        qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
+    }
+    qemu_vfree(acb->bounce);
+    acb->common.cb(acb->common.opaque, (acb->ret > 0 ? 0 : acb->ret));
+    qemu_bh_delete(acb->bh);
+    acb->bh = NULL;
+    acb->status = 0;
+
+    if (!acb->cancelled) {
+        qemu_aio_release(acb);
+    }
 }

 static int rbd_aio_discard_wrapper(rbd_image_t image,
@@ -639,16 +693,16 @@ static int rbd_aio_flush_wrapper(rbd_image_t image,
 #endif
 }

-static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
-                                 int64_t sector_num,
-                                 QEMUIOVector *qiov,
-                                 int nb_sectors,
-                                 BlockCompletionFunc *cb,
-                                 void *opaque,
-                                 RBDAIOCmd cmd)
+static BlockDriverAIOCB *rbd_start_aio(BlockDriverState *bs,
+                                       int64_t sector_num,
+                                       QEMUIOVector *qiov,
+                                       int nb_sectors,
+                                       BlockDriverCompletionFunc *cb,
+                                       void *opaque,
+                                       RBDAIOCmd cmd)
 {
    RBDAIOCB *acb;
-    RADOSCB *rcb = NULL;
+    RADOSCB *rcb;
    rbd_completion_t c;
    int64_t off, size;
    char *buf;
@@ -662,15 +716,14 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
    if (cmd == RBD_AIO_DISCARD || cmd == RBD_AIO_FLUSH) {
        acb->bounce = NULL;
    } else {
-        acb->bounce = qemu_try_blockalign(bs, qiov->size);
-        if (acb->bounce == NULL) {
-            goto failed;
-        }
+        acb->bounce = qemu_blockalign(bs, qiov->size);
    }
    acb->ret = 0;
    acb->error = 0;
    acb->s = s;
+    acb->cancelled = 0;
    acb->bh = NULL;
+    acb->status = -EINPROGRESS;

    if (cmd == RBD_AIO_WRITE) {
        qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
@@ -681,7 +734,8 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
    off = sector_num * BDRV_SECTOR_SIZE;
    size = nb_sectors * BDRV_SECTOR_SIZE;

-    rcb = g_new(RADOSCB, 1);
+    rcb = g_malloc(sizeof(RADOSCB));
+    rcb->done = 0;
    rcb->acb = acb;
    rcb->buf = buf;
    rcb->s = acb->s;
@@ -709,46 +763,43 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
    }

    if (r < 0) {
-        goto failed_completion;
+        goto failed;
    }

    return &acb->common;

-failed_completion:
-    rbd_aio_release(c);
 failed:
    g_free(rcb);
-    qemu_vfree(acb->bounce);
-    qemu_aio_unref(acb);
+    qemu_aio_release(acb);
    return NULL;
 }

-static BlockAIOCB *qemu_rbd_aio_readv(BlockDriverState *bs,
-                                      int64_t sector_num,
-                                      QEMUIOVector *qiov,
-                                      int nb_sectors,
-                                      BlockCompletionFunc *cb,
-                                      void *opaque)
+static BlockDriverAIOCB *qemu_rbd_aio_readv(BlockDriverState *bs,
+                                            int64_t sector_num,
+                                            QEMUIOVector *qiov,
+                                            int nb_sectors,
+                                            BlockDriverCompletionFunc *cb,
+                                            void *opaque)
 {
    return rbd_start_aio(bs, sector_num, qiov, nb_sectors, cb, opaque,
                         RBD_AIO_READ);
 }

-static BlockAIOCB *qemu_rbd_aio_writev(BlockDriverState *bs,
-                                       int64_t sector_num,
-                                       QEMUIOVector *qiov,
-                                       int nb_sectors,
-                                       BlockCompletionFunc *cb,
-                                       void *opaque)
+static BlockDriverAIOCB *qemu_rbd_aio_writev(BlockDriverState *bs,
+                                             int64_t sector_num,
+                                             QEMUIOVector *qiov,
+                                             int nb_sectors,
+                                             BlockDriverCompletionFunc *cb,
+                                             void *opaque)
 {
    return rbd_start_aio(bs, sector_num, qiov, nb_sectors, cb, opaque,
                         RBD_AIO_WRITE);
 }

 #ifdef LIBRBD_SUPPORTS_AIO_FLUSH
-static BlockAIOCB *qemu_rbd_aio_flush(BlockDriverState *bs,
-                                      BlockCompletionFunc *cb,
-                                      void *opaque)
+static BlockDriverAIOCB *qemu_rbd_aio_flush(BlockDriverState *bs,
+                                            BlockDriverCompletionFunc *cb,
+                                            void *opaque)
 {
    return rbd_start_aio(bs, 0, NULL, 0, cb, opaque, RBD_AIO_FLUSH);
 }
@@ -890,7 +941,7 @@ static int qemu_rbd_snap_list(BlockDriverState *bs,
    int max_snaps = RBD_MAX_SNAPS;

    do {
-        snaps = g_new(rbd_snap_info_t, max_snaps);
+        snaps = g_malloc(sizeof(*snaps) * max_snaps);
        snap_count = rbd_snap_list(s->image, snaps, &max_snaps);
        if (snap_count <= 0) {
            g_free(snaps);
@@ -901,7 +952,7 @@ static int qemu_rbd_snap_list(BlockDriverState *bs,
        goto done;
    }

-    sn_tab = g_new0(QEMUSnapshotInfo, snap_count);
+    sn_tab = g_malloc0(snap_count * sizeof(QEMUSnapshotInfo));

    for (i = 0; i < snap_count; i++) {
        const char *snap_name = snaps[i].name;
@@ -924,50 +975,29 @@ static int qemu_rbd_snap_list(BlockDriverState *bs,
 }

 #ifdef LIBRBD_SUPPORTS_DISCARD
-static BlockAIOCB* qemu_rbd_aio_discard(BlockDriverState *bs,
-                                        int64_t sector_num,
-                                        int nb_sectors,
-                                        BlockCompletionFunc *cb,
-                                        void *opaque)
+static BlockDriverAIOCB* qemu_rbd_aio_discard(BlockDriverState *bs,
+                                              int64_t sector_num,
+                                              int nb_sectors,
+                                              BlockDriverCompletionFunc *cb,
+                                              void *opaque)
 {
    return rbd_start_aio(bs, sector_num, NULL, nb_sectors, cb, opaque,
                         RBD_AIO_DISCARD);
 }
 #endif

-#ifdef LIBRBD_SUPPORTS_INVALIDATE
-static void qemu_rbd_invalidate_cache(BlockDriverState *bs,
-                                      Error **errp)
-{
-    BDRVRBDState *s = bs->opaque;
-    int r = rbd_invalidate_cache(s->image);
-    if (r < 0) {
-        error_setg_errno(errp, -r, "Failed to invalidate the cache");
-    }
-}
-#endif
-
-static QemuOptsList qemu_rbd_create_opts = {
-    .name = "rbd-create-opts",
-    .head = QTAILQ_HEAD_INITIALIZER(qemu_rbd_create_opts.head),
-    .desc = {
-        {
-            .name = BLOCK_OPT_SIZE,
-            .type = QEMU_OPT_SIZE,
-            .help = "Virtual disk size"
-        },
-        {
-            .name = BLOCK_OPT_CLUSTER_SIZE,
-            .type = QEMU_OPT_SIZE,
-            .help = "RBD object size"
-        },
-        {
-            .name = "password-secret",
-            .type = QEMU_OPT_STRING,
-            .help = "ID of secret providing the password",
-        },
-        { /* end of list */ }
-    }
+static QEMUOptionParameter qemu_rbd_create_options[] = {
+    {
+     .name = BLOCK_OPT_SIZE,
+     .type = OPT_SIZE,
+     .help = "Virtual disk size"
+    },
+    {
+     .name = BLOCK_OPT_CLUSTER_SIZE,
+     .type = OPT_SIZE,
+     .help = "RBD object size"
+    },
+    {NULL}
 };

 static BlockDriver bdrv_rbd = {
@@ -979,7 +1009,7 @@ static BlockDriver bdrv_rbd = {
    .bdrv_create        = qemu_rbd_create,
    .bdrv_has_zero_init = bdrv_has_zero_init_1,
    .bdrv_get_info      = qemu_rbd_getinfo,
-    .create_opts        = &qemu_rbd_create_opts,
+    .create_options     = qemu_rbd_create_options,
    .bdrv_getlength     = qemu_rbd_getlength,
    .bdrv_truncate      = qemu_rbd_truncate,
    .protocol_name      = "rbd",
@@ -1001,9 +1031,6 @@ static BlockDriver bdrv_rbd = {
    .bdrv_snapshot_delete   = qemu_rbd_snap_remove,
    .bdrv_snapshot_list     = qemu_rbd_snap_list,
    .bdrv_snapshot_goto     = qemu_rbd_snap_rollback,
-#ifdef LIBRBD_SUPPORTS_INVALIDATE
-    .bdrv_invalidate_cache  = qemu_rbd_invalidate_cache,
-#endif
 };

 static void bdrv_rbd_init(void)
--- a/Show More
+++ b/Show More
@@ -1 +1 @@
 .5.50
 .7.2