Update VERSION for 1.5.2 release

Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
qga: escape cmdline args when registering win32 service (CVE-2013-2231)
2013-07-25 14:52:08 -05:00 · 2013-07-23 12:02:47 -05:00 · 2013-07-23 12:02:47 -05:00 · 2013-07-23 12:02:47 -05:00 · 2013-07-23 12:01:16 -05:00 · 2013-07-23 11:40:26 -05:00
1133 changed files with 24778 additions and 47420 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -82,6 +82,7 @@ fsdev/virtfs-proxy-helper.pod
 *.swp
 *.orig
 .pc
+*.patch
 *.gcda
 *.gcno
 patches
--- a/.gitmodules
+++ b/.gitmodules
@@ -15,7 +15,7 @@
 	url = git://git.qemu.org/openbios.git
 [submodule "roms/qemu-palcode"]
 	path = roms/qemu-palcode
-	url = git://github.com/rth7680/qemu-palcode.git
+	url = git://repo.or.cz/qemu-palcode.git
 [submodule "roms/sgabios"]
 	path = roms/sgabios
 	url = git://git.qemu.org/sgabios.git
--- a/.mailmap
+++ b/.mailmap
@@ -2,7 +2,7 @@
 # into proper addresses so that they are counted properly in git shortlog output.
 #
 Andrzej Zaborowski <balrogg@gmail.com> balrog <balrog@c046a42c-6fe2-441c-8c8c-71466251a162>
-Anthony Liguori <anthony@codemonkey.ws> aliguori <aliguori@c046a42c-6fe2-441c-8c8c-71466251a162>
+Anthony Liguori <aliguori@us.ibm.com> aliguori <aliguori@c046a42c-6fe2-441c-8c8c-71466251a162>
 Aurelien Jarno <aurelien@aurel32.net> aurel32 <aurel32@c046a42c-6fe2-441c-8c8c-71466251a162>
 Blue Swirl <blauwirbel@gmail.com> blueswir1 <blueswir1@c046a42c-6fe2-441c-8c8c-71466251a162>
 Edgar E. Iglesias <edgar.iglesias@gmail.com> edgar_igl <edgar_igl@c046a42c-6fe2-441c-8c8c-71466251a162>
--- a/19
+++ b/19
@@ -40,23 +40,8 @@ speaking, the size of guest memory can always fit into ram_addr_t but
 it would not be correct to store an actual guest physical address in a
 ram_addr_t.

-For CPU virtual addresses there are several possible types.
-vaddr is the best type to use to hold a CPU virtual address in
-target-independent code. It is guaranteed to be large enough to hold a
-virtual address for any target, and it does not change size from target
-to target. It is always unsigned.
-target_ulong is a type the size of a virtual address on the CPU; this means
-it may be 32 or 64 bits depending on which target is being built. It should
-therefore be used only in target-specific code, and in some
-performance-critical built-per-target core code such as the TLB code.
-There is also a signed version, target_long.
-abi_ulong is for the *-user targets, and represents a type the size of
-'void *' in that target's ABI. (This may not be the same as the size of a
-full CPU virtual address in the case of target ABIs which use 32 bit pointers
-on 64 bit CPUs, like sparc32plus.) Definitions of structures that must match
-the target's ABI must use this type for anything that on the target is defined
-to be an 'unsigned long' or a pointer type.
-There is also a signed version, abi_long.
+Use target_ulong (or abi_ulong) for CPU virtual addresses, however
+devices should not need to use target_ulong.

 Of course, take all of the above with a grain of salt.  If you're about
 to use some system interface that requires a type like size_t, pid_t or
--- a/15
+++ b/15
@@ -1,21 +1,16 @@
 The following points clarify the QEMU license:

-1) QEMU as a whole is released under the GNU General Public License,
-version 2.
+1) QEMU as a whole is released under the GNU General Public License

 2) Parts of QEMU have specific licenses which are compatible with the
-GNU General Public License, version 2. Hence each source file contains
-its own licensing information.  Source files with no licensing information
-are released under the GNU General Public License, version 2 or (at your
-option) any later version.
+GNU General Public License. Hence each source file contains its own
+licensing information.

-As of July 2013, contributions under version 2 of the GNU General Public
-License (and no later version) are only accepted for the following files
-or directories: bsd-user/, linux-user/, hw/misc/vfio.c, hw/xen/xen_pt*.
+Many hardware device emulation sources are released under the BSD license.

 3) The Tiny Code Generator (TCG) is released under the BSD license
   (see license headers in files).

 4) QEMU is a trademark of Fabrice Bellard.

-Fabrice Bellard and the QEMU team
+Fabrice Bellard.
--- a/109
+++ b/109
@@ -50,7 +50,7 @@ Descriptions of section entries:

 General Project Administration
 ------------------------------
-M: Anthony Liguori <anthony@codemonkey.ws>
+M: Anthony Liguori <aliguori@us.ibm.com>
 M: Paul Brook <paul@codesourcery.com>

 Guest CPU cores (TCG):
@@ -80,7 +80,6 @@ M: Michael Walle <michael@walle.cc>
 S: Maintained
 F: target-lm32/
 F: hw/lm32/
-F: hw/char/lm32_*

 M68K
 M: Paul Brook <paul@codesourcery.com>
@@ -105,12 +104,6 @@ M: Anthony Green <green@moxielogic.com>
 S: Maintained
 F: target-moxie/

-OpenRISC
-M: Jia Liu <proljc@gmail.com>
-S: Maintained
-F: target-openrisc/
-F: hw/openrisc/
-
 PowerPC
 M: Alexander Graf <agraf@suse.de>
 L: qemu-ppc@nongnu.org
@@ -162,7 +155,7 @@ Guest CPU Cores (KVM):

 Overall
 M: Gleb Natapov <gleb@redhat.com>
-M: Paolo Bonzini <pbonzini@redhat.com>
+M: Marcelo Tosatti <mtosatti@redhat.com>
 L: kvm@vger.kernel.org
 S: Supported
 F: kvm-*
@@ -225,7 +218,7 @@ ARM Machines
 Exynos
 M: Evgeny Voevodin <e.voevodin@samsung.com>
 M: Maksim Kozlov <m.kozlov@samsung.com>
-M: Igor Mitsyanko <i.mitsyanko@gmail.com>
+M: Igor Mitsyanko <i.mitsyanko@samsung.com>
 M: Dmitry Solodkiy <d.solodkiy@samsung.com>
 S: Maintained
 F: hw/*/exynos*
@@ -310,7 +303,11 @@ Axis Dev88
 M: Edgar E. Iglesias <edgar.iglesias@gmail.com>
 S: Maintained
 F: hw/cris/axis_dev88.c
-F: hw/*/etraxfs_*.c
+
+etraxfs
+M: Edgar E. Iglesias <edgar.iglesias@gmail.com>
+S: Maintained
+F: hw/cris/etraxfs.c

 LM32 Machines
 -------------
@@ -346,7 +343,7 @@ MicroBlaze Machines
 petalogix_s3adsp1800
 M: Edgar E. Iglesias <edgar.iglesias@gmail.com>
 S: Maintained
-F: hw/microblaze/petalogix_s3adsp1800_mmu.c
+F: hw/microblaze/petalogix_s3adsp1800.c

 petalogix_ml605
 M: Peter Crosthwaite <peter.crosthwaite@petalogix.com>
@@ -375,13 +372,6 @@ M: Aurelien Jarno <aurelien@aurel32.net>
 S: Maintained
 F: hw/mips/mips_r4k.c

-OpenRISC Machines
-----------------
-or1k-sim
-M: Jia Liu <proljc@gmail.com>
-S: Maintained
-F: hw/openrisc/openrisc_sim.c
-
 PowerPC Machines
 ----------------
 405
@@ -417,8 +407,8 @@ M: Alexander Graf <agraf@suse.de>
 L: qemu-ppc@nongnu.org
 S: Maintained
 F: hw/ppc/mac_newworld.c
-F: hw/pci-host/uninorth.c
-F: hw/pci-bridge/dec.[hc]
+F: hw/pci/devices/host-uninorth.c
+F: hw/pci/devices/host-dec.[hc]
 F: hw/misc/macio/

 Old World
@@ -426,7 +416,7 @@ M: Alexander Graf <agraf@suse.de>
 L: qemu-ppc@nongnu.org
 S: Maintained
 F: hw/ppc/mac_oldworld.c
-F: hw/pci-host/grackle.c
+F: hw/pci/devices/host-grackle.c
 F: hw/misc/macio/

 PReP
@@ -434,36 +424,33 @@ M: Andreas Färber <andreas.faerber@web.de>
 L: qemu-ppc@nongnu.org
 S: Odd Fixes
 F: hw/ppc/prep.c
-F: hw/pci-host/prep.[hc]
+F: hw/pci/devices/host-prep.[hc]
 F: hw/isa/pc87312.[hc]

 sPAPR
+M: David Gibson <david@gibson.dropbear.id.au>
 M: Alexander Graf <agraf@suse.de>
 L: qemu-ppc@nongnu.org
 S: Supported
 F: hw/*/spapr*
-F: include/hw/*/spapr*
-F: hw/*/xics*
-F: include/hw/*/xics*
-F: pc-bios/spapr-rtas/*

 virtex_ml507
 M: Edgar E. Iglesias <edgar.iglesias@gmail.com>
 L: qemu-ppc@nongnu.org
 S: Odd Fixes
-F: hw/ppc/virtex_ml507.c
+F: hw/pci/virtex_ml507.c

 SH4 Machines
 ------------
 R2D
 M: Magnus Damm <magnus.damm@gmail.com>
 S: Maintained
-F: hw/sh4/r2d.c
+F: hw/sh/r2d.c

 Shix
 M: Magnus Damm <magnus.damm@gmail.com>
 S: Orphan
-F: hw/sh4/shix.c
+F: hw/sh/shix.c

 SPARC Machines
 --------------
@@ -488,7 +475,7 @@ S390 Machines
 S390 Virtio
 M: Alexander Graf <agraf@suse.de>
 S: Maintained
-F: hw/s390x/s390-*.c
+F: hw/s390/s390-*.c

 S390 Virtio-ccw
 M: Cornelia Huck <cornelia.huck@de.ibm.com>
@@ -509,7 +496,7 @@ F: hw/unicore32/
 X86 Machines
 ------------
 PC
-M: Anthony Liguori <anthony@codemonkey.ws>
+M: Anthony Liguori <aliguori@us.ibm.com>
 S: Supported
 F: hw/i386/pc.[ch]
 F: hw/i386/pc_piix.c
@@ -557,7 +544,7 @@ M: Alexander Graf <agraf@suse.de>
 M: Scott Wood <scottwood@freescale.com>
 L: qemu-ppc@nongnu.org
 S: Supported
-F: hw/ppc/e500*
+F: hw/ppc/e500_*

 SCSI
 M: Paolo Bonzini <pbonzini@redhat.com>
@@ -585,7 +572,7 @@ F: hw/usb/*
 VFIO
 M: Alex Williamson <alex.williamson@redhat.com>
 S: Supported
-F: hw/misc/vfio.c
+F: hw/pci/vfio.c

 vhost
 M: Michael S. Tsirkin <mst@redhat.com>
@@ -593,7 +580,7 @@ S: Supported
 F: hw/*/*vhost*

 virtio
-M: Anthony Liguori <anthony@codemonkey.ws>
+M: Anthony Liguori <aliguori@us.ibm.com>
 S: Supported
 F: hw/*/virtio*

@@ -622,11 +609,6 @@ S: Supported
 F: hw/char/virtio-serial-bus.c
 F: hw/char/virtio-console.c

-nvme
-M: Keith Busch <keith.busch@intel.com>
-S: Supported
-F: hw/block/nvme*
-
 Xilinx EDK
 M: Peter Crosthwaite <peter.crosthwaite@petalogix.com>
 M: Edgar E. Iglesias <edgar.iglesias@gmail.com>
@@ -651,7 +633,7 @@ F: block/
 F: hw/block/

 Character Devices
-M: Anthony Liguori <anthony@codemonkey.ws>
+M: Anthony Liguori <aliguori@us.ibm.com>
 S: Maintained
 F: qemu-char.c

@@ -659,7 +641,7 @@ CPU
 M: Andreas Färber <afaerber@suse.de>
 S: Supported
 F: qom/cpu.c
-F: include/qom/cpu.h
+F: include/qemu/cpu.h
 F: target-i386/cpu.c

 ICC Bus
@@ -672,7 +654,7 @@ Device Tree
 M: Peter Crosthwaite <peter.crosthwaite@petalogix.com>
 M: Alexander Graf <agraf@suse.de>
 S: Maintained
-F: device_tree.[ch]
+F: device-tree.[ch]

 GDB stub
 M: qemu-devel@nongnu.org
@@ -683,13 +665,13 @@ F: gdb-xml/
 SPICE
 M: Gerd Hoffmann <kraxel@redhat.com>
 S: Supported
-F: include/ui/qemu-spice.h
+F: ui/qemu-spice.h
 F: ui/spice-*.c
 F: audio/spiceaudio.c
 F: hw/display/qxl*

 Graphics
-M: Anthony Liguori <anthony@codemonkey.ws>
+M: Anthony Liguori <aliguori@us.ibm.com>
 S: Maintained
 F: ui/

@@ -699,19 +681,18 @@ S: Odd Fixes
 F: ui/cocoa.m

 Main loop
-M: Anthony Liguori <anthony@codemonkey.ws>
+M: Anthony Liguori <aliguori@us.ibm.com>
 S: Supported
 F: vl.c

-Human Monitor (HMP)
+Monitor (QMP/HMP)
 M: Luiz Capitulino <lcapitulino@redhat.com>
+M: Markus Armbruster <armbru@redhat.com>
 S: Supported
 F: monitor.c
-F: hmp.c
-F: hmp-commands.hx

 Network device layer
-M: Anthony Liguori <anthony@codemonkey.ws>
+M: Anthony Liguori <aliguori@us.ibm.com>
 M: Stefan Hajnoczi <stefanha@redhat.com>
 S: Maintained
 F: net/
@@ -725,27 +706,6 @@ F: nbd.*
 F: qemu-nbd.c
 T: git git://github.com/bonzini/qemu.git nbd-next

-QAPI
-M: Luiz Capitulino <lcapitulino@redhat.com>
-M: Michael Roth <mdroth@linux.vnet.ibm.com>
-S: Supported
-F: qapi/
-
-QAPI Schema
-M: Eric Blake <eblake@redhat.com>
-M: Luiz Capitulino <lcapitulino@redhat.com>
-M: Markus Armbruster <armbru@redhat.com>
-S: Supported
-F: qapi-schema.json
-
-QMP
-M: Luiz Capitulino <lcapitulino@redhat.com>
-S: Supported
-F: qmp.c
-F: monitor.c
-F: qmp-commands.hx
-F: QMP/
-
 SLIRP
 M: Jan Kiszka <jan.kiszka@siemens.com>
 S: Maintained
@@ -782,16 +742,9 @@ Tiny Code Generator (TCG)
 -------------------------
 Common code
 M: qemu-devel@nongnu.org
-M: Richard Henderson <rth@twiddle.net>
 S: Maintained
 F: tcg/

-AArch64 target
-M: Claudio Fontana <claudio.fontana@huawei.com>
-M: Claudio Fontana <claudio.fontana@gmail.com>
-S: Maintained
-F: tcg/aarch64/
-
 ARM target
 M: Andrzej Zaborowski <balrogg@gmail.com>
 S: Maintained
--- a/87
+++ b/87
@@ -146,11 +146,10 @@ $(SRC_PATH)/pixman/configure:
 	(cd $(SRC_PATH)/pixman; autoreconf -v --install)

 DTC_MAKE_ARGS=-I$(SRC_PATH)/dtc VPATH=$(SRC_PATH)/dtc -C dtc V="$(V)" LIBFDT_srcdir=$(SRC_PATH)/dtc/libfdt
-DTC_CFLAGS=$(CFLAGS) $(QEMU_CFLAGS)
-DTC_CPPFLAGS=-I$(BUILD_DIR)/dtc -I$(SRC_PATH)/dtc -I$(SRC_PATH)/dtc/libfdt
+DTC_CFLAGS=$(CFLAGS) $(QEMU_CFLAGS) -I$(BUILD_DIR)/dtc -I$(SRC_PATH)/dtc -I$(SRC_PATH)/dtc/libfdt

 subdir-dtc:dtc/libfdt dtc/tests
-	$(call quiet-command,$(MAKE) $(DTC_MAKE_ARGS) CPPFLAGS="$(DTC_CPPFLAGS)" CFLAGS="$(DTC_CFLAGS)" LDFLAGS="$(LDFLAGS)" ARFLAGS="$(ARFLAGS)" CC="$(CC)" AR="$(AR)" LD="$(LD)" $(SUBDIR_MAKEFLAGS) libfdt/libfdt.a,)
+	$(call quiet-command,$(MAKE) $(DTC_MAKE_ARGS) CFLAGS="$(DTC_CFLAGS)" LDFLAGS="$(LDFLAGS)" ARFLAGS="$(ARFLAGS)" CC="$(CC)" AR="$(AR)" LD="$(LD)" $(SUBDIR_MAKEFLAGS) libfdt/libfdt.a,)

 dtc/%:
 	mkdir -p $@
@@ -167,8 +166,11 @@ recurse-all: $(SUBDIR_RULES) $(ROMSUBDIR_RULES)

 bt-host.o: QEMU_CFLAGS += $(BLUEZ_CFLAGS)

-$(BUILD_DIR)/version.o: $(SRC_PATH)/version.rc $(BUILD_DIR)/config-host.h | $(BUILD_DIR)/version.lo
-$(BUILD_DIR)/version.lo: $(SRC_PATH)/version.rc $(BUILD_DIR)/config-host.h
+version.o: $(SRC_PATH)/version.rc config-host.h | version.lo
+version.lo: $(SRC_PATH)/version.rc config-host.h
+
+version-obj-$(CONFIG_WIN32) += version.o
+version-lobj-$(CONFIG_WIN32) += version.lo

 Makefile: $(version-obj-y) $(version-lobj-y)

@@ -176,7 +178,7 @@ Makefile: $(version-obj-y) $(version-lobj-y)
 # Build libraries

 libqemustub.a: $(stub-obj-y)
-libqemuutil.a: $(util-obj-y) qapi-types.o qapi-visit.o
+libqemuutil.a: $(util-obj-y)

 ######################################################################

@@ -184,7 +186,7 @@ qemu-img.o: qemu-img-cmds.h

 qemu-img$(EXESUF): qemu-img.o $(block-obj-y) libqemuutil.a libqemustub.a
 qemu-nbd$(EXESUF): qemu-nbd.o $(block-obj-y) libqemuutil.a libqemustub.a
-qemu-io$(EXESUF): qemu-io.o $(block-obj-y) libqemuutil.a libqemustub.a
+qemu-io$(EXESUF): qemu-io.o cmd.o $(block-obj-y) libqemuutil.a libqemustub.a

 qemu-bridge-helper$(EXESUF): qemu-bridge-helper.o

@@ -213,10 +215,10 @@ $(SRC_PATH)/qga/qapi-schema.json $(SRC_PATH)/scripts/qapi-commands.py $(qapi-py)

 qapi-types.c qapi-types.h :\
 $(SRC_PATH)/qapi-schema.json $(SRC_PATH)/scripts/qapi-types.py $(qapi-py)
-	$(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/qapi-types.py $(gen-out-type) -o "." -b < $<, "  GEN   $@")
+	$(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/qapi-types.py $(gen-out-type) -o "." < $<, "  GEN   $@")
 qapi-visit.c qapi-visit.h :\
 $(SRC_PATH)/qapi-schema.json $(SRC_PATH)/scripts/qapi-visit.py $(qapi-py)
-	$(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/qapi-visit.py $(gen-out-type) -o "." -b < $<, "  GEN   $@")
+	$(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/qapi-visit.py $(gen-out-type) -o "."  < $<, "  GEN   $@")
 qmp-commands.h qmp-marshal.c :\
 $(SRC_PATH)/qapi-schema.json $(SRC_PATH)/scripts/qapi-commands.py $(qapi-py)
 	$(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/qapi-commands.py $(gen-out-type) -m -o "." < $<, "  GEN   $@")
@@ -287,7 +289,7 @@ pxe-e1000.rom pxe-eepro100.rom pxe-ne2k_pci.rom \
 pxe-pcnet.rom pxe-rtl8139.rom pxe-virtio.rom \
 efi-e1000.rom efi-eepro100.rom efi-ne2k_pci.rom \
 efi-pcnet.rom efi-rtl8139.rom efi-virtio.rom \
-qemu-icon.bmp qemu_logo_no_text.svg \
+qemu-icon.bmp \
 bamboo.dtb petalogix-s3adsp1800.dtb petalogix-ml605.dtb \
 multiboot.bin linuxboot.bin kvmvapic.bin \
 s390-zipl.rom \
@@ -319,21 +321,13 @@ endif
 install-datadir:
 	$(INSTALL_DIR) "$(DESTDIR)$(qemu_datadir)"

-install-localstatedir:
-ifdef CONFIG_POSIX
-ifneq (,$(findstring qemu-ga,$(TOOLS)))
-	$(INSTALL_DIR) "$(DESTDIR)$(qemu_localstatedir)"/run
-endif
-endif
-
 install-confdir:
 	$(INSTALL_DIR) "$(DESTDIR)$(qemu_confdir)"

 install-sysconfig: install-datadir install-confdir
 	$(INSTALL_DATA) $(SRC_PATH)/sysconfigs/target/target-x86_64.conf "$(DESTDIR)$(qemu_confdir)"

-install: all $(if $(BUILD_DOCS),install-doc) install-sysconfig \
-install-datadir install-localstatedir
+install: all $(if $(BUILD_DOCS),install-doc) install-sysconfig install-datadir
 	$(INSTALL_DIR) "$(DESTDIR)$(bindir)"
 ifneq ($(TOOLS),)
 	$(INSTALL_PROG) $(STRIP_OPT) $(TOOLS) "$(DESTDIR)$(bindir)"
@@ -434,61 +428,6 @@ qemu-doc.dvi qemu-doc.html qemu-doc.info qemu-doc.pdf: \
 	qemu-img.texi qemu-nbd.texi qemu-options.texi \
 	qemu-monitor.texi qemu-img-cmds.texi

-ifdef CONFIG_WIN32
-
-INSTALLER = qemu-setup-$(VERSION)$(EXESUF)
-
-nsisflags = -V2 -NOCD
-
-ifneq ($(wildcard $(SRC_PATH)/dll),)
-ifeq ($(ARCH),x86_64)
-# 64 bit executables
-DLL_PATH = $(SRC_PATH)/dll/w64
-nsisflags += -DW64
-else
-# 32 bit executables
-DLL_PATH = $(SRC_PATH)/dll/w32
-endif
-endif
-
-.PHONY: installer
-installer: $(INSTALLER)
-
-INSTDIR=/tmp/qemu-nsis
-
-$(INSTALLER): $(SRC_PATH)/qemu.nsi
-	make install prefix=${INSTDIR}
-ifdef SIGNCODE
-	(cd ${INSTDIR}; \
-         for i in *.exe; do \
-           $(SIGNCODE) $${i}; \
-         done \
-        )
-endif # SIGNCODE
-	(cd ${INSTDIR}; \
-         for i in qemu-system-*.exe; do \
-           arch=$${i%.exe}; \
-           arch=$${arch#qemu-system-}; \
-           echo Section \"$$arch\" Section_$$arch; \
-           echo SetOutPath \"\$$INSTDIR\"; \
-           echo File \"\$${BINDIR}\\$$i\"; \
-           echo SectionEnd; \
-         done \
-        ) >${INSTDIR}/system-emulations.nsh
-	makensis $(nsisflags) \
-                $(if $(BUILD_DOCS),-DCONFIG_DOCUMENTATION="y") \
-                $(if $(CONFIG_GTK),-DCONFIG_GTK="y") \
-                -DBINDIR="${INSTDIR}" \
-                $(if $(DLL_PATH),-DDLLDIR="$(DLL_PATH)") \
-                -DSRCDIR="$(SRC_PATH)" \
-                -DOUTFILE="$(INSTALLER)" \
-                $(SRC_PATH)/qemu.nsi
-	rm -r ${INSTDIR}
-ifdef SIGNCODE
-	$(SIGNCODE) $(INSTALLER)
-endif # SIGNCODE
-endif # CONFIG_WIN
-
 # Add a dependency on the generated files, so that they are always
 # rebuilt before other object files
 ifneq ($(filter-out %clean,$(MAKECMDGOALS)),$(if $(MAKECMDGOALS),,fail))
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -13,7 +13,6 @@ block-obj-$(CONFIG_POSIX) += aio-posix.o
 block-obj-$(CONFIG_WIN32) += aio-win32.o
 block-obj-y += block/
 block-obj-y += qapi-types.o qapi-visit.o
-block-obj-y += qemu-io-cmds.o

 block-obj-y += qemu-coroutine.o qemu-coroutine-lock.o qemu-coroutine-io.o
 block-obj-y += qemu-coroutine-sleep.o
@@ -51,7 +50,6 @@ common-obj-$(CONFIG_POSIX) += os-posix.o
 common-obj-$(CONFIG_LINUX) += fsdev/

 common-obj-y += migration.o migration-tcp.o
-common-obj-$(CONFIG_RDMA) += migration-rdma.o
 common-obj-y += qemu-char.o #aio.o
 common-obj-y += block-migration.o
 common-obj-y += page_cache.o xbzrle.o
@@ -98,11 +96,6 @@ common-obj-y += hw/
 common-obj-y += qom/
 common-obj-y += disas/

-######################################################################
-# Resource file for Windows executables
-version-obj-$(CONFIG_WIN32) += $(BUILD_DIR)/version.o
-version-lobj-$(CONFIG_WIN32) += $(BUILD_DIR)/version.lo
-
 ######################################################################
 # guest agent

--- a/Makefile.target
+++ b/Makefile.target
@@ -15,14 +15,14 @@ QEMU_CFLAGS+=-I$(SRC_PATH)/include

 ifdef CONFIG_USER_ONLY
 # user emulator name
-QEMU_PROG=qemu-$(TARGET_NAME)
+QEMU_PROG=qemu-$(TARGET_ARCH2)
 else
 # system emulator name
 ifneq (,$(findstring -mwindows,$(libs_softmmu)))
 # Terminate program name with a 'w' because the linker builds a windows executable.
-QEMU_PROGW=qemu-system-$(TARGET_NAME)w$(EXESUF)
+QEMU_PROGW=qemu-system-$(TARGET_ARCH2)w$(EXESUF)
 endif # windows executable
-QEMU_PROG=qemu-system-$(TARGET_NAME)$(EXESUF)
+QEMU_PROG=qemu-system-$(TARGET_ARCH2)$(EXESUF)
 endif

 PROGS=$(QEMU_PROG)
@@ -35,7 +35,7 @@ config-target.h: config-target.h-timestamp
 config-target.h-timestamp: config-target.mak

 ifdef CONFIG_TRACE_SYSTEMTAP
-stap: $(QEMU_PROG).stp-installed $(QEMU_PROG).stp
+stap: $(QEMU_PROG).stp

 ifdef CONFIG_USER_ONLY
 TARGET_TYPE=user
@@ -43,24 +43,14 @@ else
 TARGET_TYPE=system
 endif

-$(QEMU_PROG).stp-installed: $(SRC_PATH)/trace-events
-	$(call quiet-command,$(TRACETOOL) \
-		--format=stap \
-		--backend=$(TRACE_BACKEND) \
-		--binary=$(bindir)/$(QEMU_PROG) \
-		--target-name=$(TARGET_NAME) \
-		--target-type=$(TARGET_TYPE) \
-		< $< > $@,"  GEN   $(TARGET_DIR)$(QEMU_PROG).stp-installed")
-
 $(QEMU_PROG).stp: $(SRC_PATH)/trace-events
 	$(call quiet-command,$(TRACETOOL) \
 		--format=stap \
 		--backend=$(TRACE_BACKEND) \
-		--binary=$(realpath .)/$(QEMU_PROG) \
-		--target-name=$(TARGET_NAME) \
+		--binary=$(bindir)/$(QEMU_PROG) \
+		--target-arch=$(TARGET_ARCH) \
 		--target-type=$(TARGET_TYPE) \
 		< $< > $@,"  GEN   $(TARGET_DIR)$(QEMU_PROG).stp")
-
 else
 stap:
 endif
@@ -73,6 +63,8 @@ all: $(PROGS) stap
 CONFIG_NO_PCI = $(if $(subst n,,$(CONFIG_PCI)),n,y)
 CONFIG_NO_KVM = $(if $(subst n,,$(CONFIG_KVM)),n,y)
 CONFIG_NO_XEN = $(if $(subst n,,$(CONFIG_XEN)),n,y)
+CONFIG_NO_GET_MEMORY_MAPPING = $(if $(subst n,,$(CONFIG_HAVE_GET_MEMORY_MAPPING)),n,y)
+CONFIG_NO_CORE_DUMP = $(if $(subst n,,$(CONFIG_HAVE_CORE_DUMP)),n,y)

 #########################################################
 # cpu emulator library
@@ -103,7 +95,7 @@ endif #CONFIG_LINUX_USER

 ifdef CONFIG_BSD_USER

-QEMU_CFLAGS+=-I$(SRC_PATH)/bsd-user -I$(SRC_PATH)/bsd-user/$(TARGET_ABI_DIR)
+QEMU_CFLAGS+=-I$(SRC_PATH)/bsd-user -I$(SRC_PATH)/bsd-user/$(TARGET_ARCH)

 obj-y += bsd-user/
 obj-y += gdbstub.o user-exec.o
@@ -119,8 +111,10 @@ obj-y += hw/
 obj-$(CONFIG_FDT) += device_tree.o
 obj-$(CONFIG_KVM) += kvm-all.o
 obj-y += memory.o savevm.o cputlb.o
-obj-y += memory_mapping.o
-obj-y += dump.o
+obj-$(CONFIG_HAVE_GET_MEMORY_MAPPING) += memory_mapping.o
+obj-$(CONFIG_HAVE_CORE_DUMP) += dump.o
+obj-$(CONFIG_NO_GET_MEMORY_MAPPING) += memory_mapping-stub.o
+obj-$(CONFIG_NO_CORE_DUMP) += dump-stub.o
 LIBS+=$(libs_softmmu)

 # xen support
@@ -128,7 +122,7 @@ obj-$(CONFIG_XEN) += xen-all.o xen-mapcache.o
 obj-$(CONFIG_NO_XEN) += xen-stub.o

 # Hardware support
-ifeq ($(TARGET_NAME), sparc64)
+ifeq ($(TARGET_ARCH), sparc64)
 obj-y += hw/sparc64/
 else
 obj-y += hw/$(TARGET_BASE_ARCH)/
@@ -192,7 +186,7 @@ endif
 endif
 ifdef CONFIG_TRACE_SYSTEMTAP
 	$(INSTALL_DIR) "$(DESTDIR)$(qemu_datadir)/../systemtap/tapset"
-	$(INSTALL_DATA) $(QEMU_PROG).stp-installed "$(DESTDIR)$(qemu_datadir)/../systemtap/tapset/$(QEMU_PROG).stp"
+	$(INSTALL_DATA) $(QEMU_PROG).stp "$(DESTDIR)$(qemu_datadir)/../systemtap/tapset"
 endif

 GENERATED_HEADERS += config-target.h
--- a/QMP/qmp-events.txt
+++ b/QMP/qmp-events.txt
@@ -172,23 +172,6 @@ Data:
  },
  "timestamp": { "seconds": 1265044230, "microseconds": 450486 } }

-NIC_RX_FILTER_CHANGED
-----------------
-
-The event is emitted once until the query command is executed,
-the first event will always be emitted.
-
-Data:
-
- "name": net client name (json-string)
- "path": device path (json-string)
-
-{ "event": "NIC_RX_FILTER_CHANGED",
-  "data": { "name": "vnet0",
-            "path": "/machine/peripheral/vnet0/virtio-backend" },
-  "timestamp": { "seconds": 1368697518, "microseconds": 326866 } }
-}
-
 RESET
 -----

@@ -220,8 +203,7 @@ Emitted when the guest changes the RTC time.

 Data:

- "offset": Offset between base RTC clock (as specified by -rtc base), and
-new RTC clock value (json-number)
+- "offset": delta against the host UTC in seconds (json-number)

 Example:

--- a/2
+++ b/2
@@ -1 +1 @@
-1.6.50
+1.5.2
--- a/aio-posix.c
+++ b/aio-posix.c
@@ -23,6 +23,7 @@ struct AioHandler
    GPollFD pfd;
    IOHandler *io_read;
    IOHandler *io_write;
+    AioFlushHandler *io_flush;
    int deleted;
    int pollfds_idx;
    void *opaque;
@@ -46,6 +47,7 @@ void aio_set_fd_handler(AioContext *ctx,
                        int fd,
                        IOHandler *io_read,
                        IOHandler *io_write,
+                        AioFlushHandler *io_flush,
                        void *opaque)
 {
    AioHandler *node;
@@ -82,6 +84,7 @@ void aio_set_fd_handler(AioContext *ctx,
        /* Update handler with latest information */
        node->io_read = io_read;
        node->io_write = io_write;
+        node->io_flush = io_flush;
        node->opaque = opaque;
        node->pollfds_idx = -1;

@@ -94,10 +97,12 @@ void aio_set_fd_handler(AioContext *ctx,

 void aio_set_event_notifier(AioContext *ctx,
                            EventNotifier *notifier,
-                            EventNotifierHandler *io_read)
+                            EventNotifierHandler *io_read,
+                            AioFlushEventNotifierHandler *io_flush)
 {
    aio_set_fd_handler(ctx, event_notifier_get_fd(notifier),
-                       (IOHandler *)io_read, NULL, notifier);
+                       (IOHandler *)io_read, NULL,
+                       (AioFlushHandler *)io_flush, notifier);
 }

 bool aio_pending(AioContext *ctx)
@@ -142,11 +147,7 @@ static bool aio_dispatch(AioContext *ctx)
            (revents & (G_IO_IN | G_IO_HUP | G_IO_ERR)) &&
            node->io_read) {
            node->io_read(node->opaque);
-
-            /* aio_notify() does not count as progress */
-            if (node->opaque != &ctx->notifier) {
-                progress = true;
-            }
+            progress = true;
        }
        if (!node->deleted &&
            (revents & (G_IO_OUT | G_IO_ERR)) &&
@@ -172,7 +173,7 @@ bool aio_poll(AioContext *ctx, bool blocking)
 {
    AioHandler *node;
    int ret;
-    bool progress;
+    bool busy, progress;

    progress = false;

@@ -199,8 +200,20 @@ bool aio_poll(AioContext *ctx, bool blocking)
    g_array_set_size(ctx->pollfds, 0);

    /* fill pollfds */
+    busy = false;
    QLIST_FOREACH(node, &ctx->aio_handlers, node) {
        node->pollfds_idx = -1;
+
+        /* If there aren't pending AIO operations, don't invoke callbacks.
+         * Otherwise, if there are no AIO requests, qemu_aio_wait() would
+         * wait indefinitely.
+         */
+        if (!node->deleted && node->io_flush) {
+            if (node->io_flush(node->opaque) == 0) {
+                continue;
+            }
+            busy = true;
+        }
        if (!node->deleted && node->pfd.events) {
            GPollFD pfd = {
                .fd = node->pfd.fd,
@@ -213,8 +226,8 @@ bool aio_poll(AioContext *ctx, bool blocking)

    ctx->walking_handlers--;

-    /* early return if we only have the aio_notify() fd */
-    if (ctx->pollfds->len == 1) {
+    /* No AIO operations?  Get us out of here */
+    if (!busy) {
        return progress;
    }

@@ -237,5 +250,6 @@ bool aio_poll(AioContext *ctx, bool blocking)
        }
    }

-    return progress;
+    assert(progress || busy);
+    return true;
 }
--- a/aio-win32.c
+++ b/aio-win32.c
@@ -23,6 +23,7 @@
 struct AioHandler {
    EventNotifier *e;
    EventNotifierHandler *io_notify;
+    AioFlushEventNotifierHandler *io_flush;
    GPollFD pfd;
    int deleted;
    QLIST_ENTRY(AioHandler) node;
@@ -30,7 +31,8 @@ struct AioHandler {

 void aio_set_event_notifier(AioContext *ctx,
                            EventNotifier *e,
-                            EventNotifierHandler *io_notify)
+                            EventNotifierHandler *io_notify,
+                            AioFlushEventNotifierHandler *io_flush)
 {
    AioHandler *node;

@@ -71,6 +73,7 @@ void aio_set_event_notifier(AioContext *ctx,
        }
        /* Update handler with latest information */
        node->io_notify = io_notify;
+        node->io_flush = io_flush;
    }

    aio_notify(ctx);
@@ -93,7 +96,7 @@ bool aio_poll(AioContext *ctx, bool blocking)
 {
    AioHandler *node;
    HANDLE events[MAXIMUM_WAIT_OBJECTS + 1];
-    bool progress;
+    bool busy, progress;
    int count;

    progress = false;
@@ -123,11 +126,7 @@ bool aio_poll(AioContext *ctx, bool blocking)
        if (node->pfd.revents && node->io_notify) {
            node->pfd.revents = 0;
            node->io_notify(node->e);
-
-            /* aio_notify() does not count as progress */
-            if (node->opaque != &ctx->notifier) {
-                progress = true;
-            }
+            progress = true;
        }

        tmp = node;
@@ -148,8 +147,19 @@ bool aio_poll(AioContext *ctx, bool blocking)
    ctx->walking_handlers++;

    /* fill fd sets */
+    busy = false;
    count = 0;
    QLIST_FOREACH(node, &ctx->aio_handlers, node) {
+        /* If there aren't pending AIO operations, don't invoke callbacks.
+         * Otherwise, if there are no AIO requests, qemu_aio_wait() would
+         * wait indefinitely.
+         */
+        if (!node->deleted && node->io_flush) {
+            if (node->io_flush(node->e) == 0) {
+                continue;
+            }
+            busy = true;
+        }
        if (!node->deleted && node->io_notify) {
            events[count++] = event_notifier_get_handle(node->e);
        }
@@ -157,8 +167,8 @@ bool aio_poll(AioContext *ctx, bool blocking)

    ctx->walking_handlers--;

-    /* early return if we only have the aio_notify() fd */
-    if (count == 1) {
+    /* No AIO operations?  Get us out of here */
+    if (!busy) {
        return progress;
    }

@@ -186,11 +196,7 @@ bool aio_poll(AioContext *ctx, bool blocking)
                event_notifier_get_handle(node->e) == events[ret - WAIT_OBJECT_0] &&
                node->io_notify) {
                node->io_notify(node->e);
-
-                /* aio_notify() does not count as progress */
-                if (node->opaque != &ctx->notifier) {
-                    progress = true;
-                }
+                progress = true;
            }

            tmp = node;
@@ -208,5 +214,6 @@ bool aio_poll(AioContext *ctx, bool blocking)
        events[ret - WAIT_OBJECT_0] = events[--count];
    }

-    return progress;
+    assert(progress || busy);
+    return true;
 }
--- a/arch_init.c
+++ b/arch_init.c
@@ -65,7 +65,7 @@ int graphic_depth = 8;
 #else
 int graphic_width = 800;
 int graphic_height = 600;
-int graphic_depth = 32;
+int graphic_depth = 15;
 #endif


@@ -104,9 +104,6 @@ int graphic_depth = 32;
 #endif

 const uint32_t arch_type = QEMU_ARCH;
-static bool mig_throttle_on;
-static int dirty_rate_high_cnt;
-static void check_guest_throttling(void);

 /***********************************************************/
 /* ram save/restore */
@@ -118,7 +115,6 @@ static void check_guest_throttling(void);
 #define RAM_SAVE_FLAG_EOS      0x10
 #define RAM_SAVE_FLAG_CONTINUE 0x20
 #define RAM_SAVE_FLAG_XBZRLE   0x40
-/* 0x80 is reserved in migration.h start with 0x100 next */


 static struct defconfig_file {
@@ -127,7 +123,7 @@ static struct defconfig_file {
    bool userconfig;
 } default_config_files[] = {
    { CONFIG_QEMU_CONFDIR "/qemu.conf",                   true },
-    { CONFIG_QEMU_CONFDIR "/target-" TARGET_NAME ".conf", true },
+    { CONFIG_QEMU_CONFDIR "/target-" TARGET_ARCH ".conf", true },
    { NULL }, /* end of list */
 };

@@ -342,8 +338,7 @@ ram_addr_t migration_bitmap_find_and_reset_dirty(MemoryRegion *mr,
 {
    unsigned long base = mr->ram_addr >> TARGET_PAGE_BITS;
    unsigned long nr = base + (start >> TARGET_PAGE_BITS);
-    uint64_t mr_size = TARGET_PAGE_ALIGN(memory_region_size(mr));
-    unsigned long size = base + (mr_size >> TARGET_PAGE_BITS);
+    unsigned long size = base + (int128_get64(mr->size) >> TARGET_PAGE_BITS);

    unsigned long next;

@@ -383,21 +378,15 @@ static void migration_bitmap_sync(void)
    uint64_t num_dirty_pages_init = migration_dirty_pages;
    MigrationState *s = migrate_get_current();
    static int64_t start_time;
-    static int64_t bytes_xfer_prev;
    static int64_t num_dirty_pages_period;
    int64_t end_time;
-    int64_t bytes_xfer_now;
-
-    if (!bytes_xfer_prev) {
-        bytes_xfer_prev = ram_bytes_transferred();
-    }

    if (!start_time) {
        start_time = qemu_get_clock_ms(rt_clock);
    }

    trace_migration_bitmap_sync_start();
-    address_space_sync_dirty_bitmap(&address_space_memory);
+    memory_global_sync_dirty_bitmap(get_system_memory());

    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
        for (addr = 0; addr < block->length; addr += TARGET_PAGE_SIZE) {
@@ -415,25 +404,6 @@ static void migration_bitmap_sync(void)

    /* more than 1 second = 1000 millisecons */
    if (end_time > start_time + 1000) {
-        if (migrate_auto_converge()) {
-            /* The following detection logic can be refined later. For now:
-               Check to see if the dirtied bytes is 50% more than the approx.
-               amount of bytes that just got transferred since the last time we
-               were in this routine. If that happens >N times (for now N==4)
-               we turn on the throttle down logic */
-            bytes_xfer_now = ram_bytes_transferred();
-            if (s->dirty_pages_rate &&
-               (num_dirty_pages_period * TARGET_PAGE_SIZE >
-                   (bytes_xfer_now - bytes_xfer_prev)/2) &&
-               (dirty_rate_high_cnt++ > 4)) {
-                    trace_migration_throttle();
-                    mig_throttle_on = true;
-                    dirty_rate_high_cnt = 0;
-             }
-             bytes_xfer_prev = bytes_xfer_now;
-        } else {
-             mig_throttle_on = false;
-        }
        s->dirty_pages_rate = num_dirty_pages_period * 1000
            / (end_time - start_time);
        s->dirty_bytes_rate = s->dirty_pages_rate * TARGET_PAGE_SIZE;
@@ -477,7 +447,6 @@ static int ram_save_block(QEMUFile *f, bool last_stage)
                ram_bulk_stage = false;
            }
        } else {
-            int ret;
            uint8_t *p;
            int cont = (block == last_sent_block) ?
                RAM_SAVE_FLAG_CONTINUE : 0;
@@ -486,23 +455,17 @@ static int ram_save_block(QEMUFile *f, bool last_stage)

            /* In doubt sent page as normal */
            bytes_sent = -1;
-            ret = ram_control_save_page(f, block->offset,
-                               offset, TARGET_PAGE_SIZE, &bytes_sent);
-
-            if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
-                if (ret != RAM_SAVE_CONTROL_DELAYED) {
-                    if (bytes_sent > 0) {
-                        acct_info.norm_pages++;
-                    } else if (bytes_sent == 0) {
-                        acct_info.dup_pages++;
-                    }
-                }
-            } else if (is_zero_page(p)) {
+            if (is_zero_page(p)) {
                acct_info.dup_pages++;
-                bytes_sent = save_block_hdr(f, block, offset, cont,
-                                            RAM_SAVE_FLAG_COMPRESS);
-                qemu_put_byte(f, 0);
-                bytes_sent++;
+                if (!ram_bulk_stage) {
+                    bytes_sent = save_block_hdr(f, block, offset, cont,
+                                                RAM_SAVE_FLAG_COMPRESS);
+                    qemu_put_byte(f, 0);
+                    bytes_sent++;
+                } else {
+                    acct_info.skipped_pages++;
+                    bytes_sent = 0;
+                }
            } else if (!ram_bulk_stage && migrate_use_xbzrle()) {
                current_addr = block->offset + offset;
                bytes_sent = save_xbzrle_page(f, p, current_addr, block,
@@ -535,18 +498,6 @@ static int ram_save_block(QEMUFile *f, bool last_stage)

 static uint64_t bytes_transferred;

-void acct_update_position(QEMUFile *f, size_t size, bool zero)
-{
-    uint64_t pages = size / TARGET_PAGE_SIZE;
-    if (zero) {
-        acct_info.dup_pages += pages;
-    } else {
-        acct_info.norm_pages += pages;
-        bytes_transferred += size;
-        qemu_update_position(f, size);
-    }
-}
-
 static ram_addr_t ram_save_remaining(void)
 {
    return migration_dirty_pages;
@@ -615,8 +566,6 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
    migration_bitmap = bitmap_new(ram_pages);
    bitmap_set(migration_bitmap, 0, ram_pages);
    migration_dirty_pages = ram_pages;
-    mig_throttle_on = false;
-    dirty_rate_high_cnt = 0;

    if (migrate_use_xbzrle()) {
        XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() /
@@ -649,10 +598,6 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
    }

    qemu_mutex_unlock_ramlist();
-
-    ram_control_before_iterate(f, RAM_CONTROL_SETUP);
-    ram_control_after_iterate(f, RAM_CONTROL_SETUP);
-
    qemu_put_be64(f, RAM_SAVE_FLAG_EOS);

    return 0;
@@ -671,8 +616,6 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
        reset_ram_globals();
    }

-    ram_control_before_iterate(f, RAM_CONTROL_ROUND);
-
    t0 = qemu_get_clock_ns(rt_clock);
    i = 0;
    while ((ret = qemu_file_rate_limit(f)) == 0) {
@@ -685,7 +628,6 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
        }
        total_sent += bytes_sent;
        acct_info.iterations++;
-        check_guest_throttling();
        /* we want to check in the 1st loop, just in case it was the 1st time
           and we had to sync the dirty bitmap.
           qemu_get_clock_ns() is a bit expensive, so we only check each some
@@ -704,12 +646,6 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)

    qemu_mutex_unlock_ramlist();

-    /*
-     * Must occur before EOS (or any QEMUFile operation)
-     * because of RDMA protocol.
-     */
-    ram_control_after_iterate(f, RAM_CONTROL_ROUND);
-
    if (ret < 0) {
        bytes_transferred += total_sent;
        return ret;
@@ -727,8 +663,6 @@ static int ram_save_complete(QEMUFile *f, void *opaque)
    qemu_mutex_lock_ramlist();
    migration_bitmap_sync();

-    ram_control_before_iterate(f, RAM_CONTROL_FINISH);
-
    /* try transferring iterative blocks of memory */

    /* flush all remaining blocks regardless of rate limiting */
@@ -742,8 +676,6 @@ static int ram_save_complete(QEMUFile *f, void *opaque)
        }
        bytes_transferred += bytes_sent;
    }
-
-    ram_control_after_iterate(f, RAM_CONTROL_FINISH);
    migration_end();

    qemu_mutex_unlock_ramlist();
@@ -838,24 +770,6 @@ static inline void *host_from_stream_offset(QEMUFile *f,
    return NULL;
 }

-/*
- * If a page (or a whole RDMA chunk) has been
- * determined to be zero, then zap it.
- */
-void ram_handle_compressed(void *host, uint8_t ch, uint64_t size)
-{
-    if (ch != 0 || !is_zero_page(host)) {
-        memset(host, ch, size);
-#ifndef _WIN32
-        if (ch == 0 &&
-            (!kvm_enabled() || kvm_has_sync_mmu()) &&
-            getpagesize() <= TARGET_PAGE_SIZE) {
-            qemu_madvise(host, TARGET_PAGE_SIZE, QEMU_MADV_DONTNEED);
-        }
-#endif
-    }
-}
-
 static int ram_load(QEMUFile *f, void *opaque, int version_id)
 {
    ram_addr_t addr;
@@ -894,10 +808,6 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
                    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
                        if (!strncmp(id, block->idstr, sizeof(id))) {
                            if (block->length != length) {
-                                fprintf(stderr,
-                                        "Length mismatch: %s: " RAM_ADDR_FMT
-                                        " in != " RAM_ADDR_FMT "\n", id, length,
-                                        block->length);
                                ret =  -EINVAL;
                                goto done;
                            }
@@ -927,7 +837,14 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
            }

            ch = qemu_get_byte(f);
-            ram_handle_compressed(host, ch, TARGET_PAGE_SIZE);
+            memset(host, ch, TARGET_PAGE_SIZE);
+#ifndef _WIN32
+            if (ch == 0 &&
+                (!kvm_enabled() || kvm_has_sync_mmu()) &&
+                getpagesize() <= TARGET_PAGE_SIZE) {
+                qemu_madvise(host, TARGET_PAGE_SIZE, QEMU_MADV_DONTNEED);
+            }
+#endif
        } else if (flags & RAM_SAVE_FLAG_PAGE) {
            void *host;

@@ -947,8 +864,6 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
                ret = -EINVAL;
                goto done;
            }
-        } else if (flags & RAM_SAVE_FLAG_HOOK) {
-            ram_control_load_hook(f, flags);
        }
        error = qemu_file_get_error(f);
        if (error) {
@@ -1178,57 +1093,7 @@ TargetInfo *qmp_query_target(Error **errp)
 {
    TargetInfo *info = g_malloc0(sizeof(*info));

-    info->arch = g_strdup(TARGET_NAME);
+    info->arch = TARGET_TYPE;

    return info;
 }
-
-/* Stub function that's gets run on the vcpu when its brought out of the
-   VM to run inside qemu via async_run_on_cpu()*/
-static void mig_sleep_cpu(void *opq)
-{
-    qemu_mutex_unlock_iothread();
-    g_usleep(30*1000);
-    qemu_mutex_lock_iothread();
-}
-
-/* To reduce the dirty rate explicitly disallow the VCPUs from spending
-   much time in the VM. The migration thread will try to catchup.
-   Workload will experience a performance drop.
-*/
-static void mig_throttle_cpu_down(CPUState *cpu, void *data)
-{
-    async_run_on_cpu(cpu, mig_sleep_cpu, NULL);
-}
-
-static void mig_throttle_guest_down(void)
-{
-    qemu_mutex_lock_iothread();
-    qemu_for_each_cpu(mig_throttle_cpu_down, NULL);
-    qemu_mutex_unlock_iothread();
-}
-
-static void check_guest_throttling(void)
-{
-    static int64_t t0;
-    int64_t        t1;
-
-    if (!mig_throttle_on) {
-        return;
-    }
-
-    if (!t0)  {
-        t0 = qemu_get_clock_ns(rt_clock);
-        return;
-    }
-
-    t1 = qemu_get_clock_ns(rt_clock);
-
-    /* If it has been more than 40 ms since the last time the guest
-     * was throttled then do it again.
-     */
-    if (40 < (t1-t0)/1000000) {
-        mig_throttle_guest_down();
-        t0 = t1;
-    }
-}
--- a/async.c
+++ b/async.c
@@ -47,16 +47,11 @@ QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque)
    bh->ctx = ctx;
    bh->cb = cb;
    bh->opaque = opaque;
-    qemu_mutex_lock(&ctx->bh_lock);
    bh->next = ctx->first_bh;
-    /* Make sure that the members are ready before putting bh into list */
-    smp_wmb();
    ctx->first_bh = bh;
-    qemu_mutex_unlock(&ctx->bh_lock);
    return bh;
 }

-/* Multiple occurrences of aio_bh_poll cannot be called concurrently */
 int aio_bh_poll(AioContext *ctx)
 {
    QEMUBH *bh, **bhp, *next;
@@ -66,15 +61,9 @@ int aio_bh_poll(AioContext *ctx)

    ret = 0;
    for (bh = ctx->first_bh; bh; bh = next) {
-        /* Make sure that fetching bh happens before accessing its members */
-        smp_read_barrier_depends();
        next = bh->next;
        if (!bh->deleted && bh->scheduled) {
            bh->scheduled = 0;
-            /* Paired with write barrier in bh schedule to ensure reading for
-             * idle & callbacks coming after bh's scheduling.
-             */
-            smp_rmb();
            if (!bh->idle)
                ret = 1;
            bh->idle = 0;
@@ -86,7 +75,6 @@ int aio_bh_poll(AioContext *ctx)

    /* remove deleted bhs */
    if (!ctx->walking_bh) {
-        qemu_mutex_lock(&ctx->bh_lock);
        bhp = &ctx->first_bh;
        while (*bhp) {
            bh = *bhp;
@@ -97,7 +85,6 @@ int aio_bh_poll(AioContext *ctx)
                bhp = &bh->next;
            }
        }
-        qemu_mutex_unlock(&ctx->bh_lock);
    }

    return ret;
@@ -107,38 +94,24 @@ void qemu_bh_schedule_idle(QEMUBH *bh)
 {
    if (bh->scheduled)
        return;
-    bh->idle = 1;
-    /* Make sure that idle & any writes needed by the callback are done
-     * before the locations are read in the aio_bh_poll.
-     */
-    smp_wmb();
    bh->scheduled = 1;
+    bh->idle = 1;
 }

 void qemu_bh_schedule(QEMUBH *bh)
 {
    if (bh->scheduled)
        return;
-    bh->idle = 0;
-    /* Make sure that idle & any writes needed by the callback are done
-     * before the locations are read in the aio_bh_poll.
-     */
-    smp_wmb();
    bh->scheduled = 1;
+    bh->idle = 0;
    aio_notify(bh->ctx);
 }

-
-/* This func is async.
- */
 void qemu_bh_cancel(QEMUBH *bh)
 {
    bh->scheduled = 0;
 }

-/* This func is async.The bottom half will do the delete action at the finial
- * end.
- */
 void qemu_bh_delete(QEMUBH *bh)
 {
    bh->scheduled = 0;
@@ -201,9 +174,8 @@ aio_ctx_finalize(GSource     *source)
    AioContext *ctx = (AioContext *) source;

    thread_pool_free(ctx->thread_pool);
-    aio_set_event_notifier(ctx, &ctx->notifier, NULL);
+    aio_set_event_notifier(ctx, &ctx->notifier, NULL, NULL);
    event_notifier_cleanup(&ctx->notifier);
-    qemu_mutex_destroy(&ctx->bh_lock);
    g_array_free(ctx->pollfds, TRUE);
 }

@@ -239,11 +211,10 @@ AioContext *aio_context_new(void)
    ctx = (AioContext *) g_source_new(&aio_source_funcs, sizeof(AioContext));
    ctx->pollfds = g_array_new(FALSE, FALSE, sizeof(GPollFD));
    ctx->thread_pool = NULL;
-    qemu_mutex_init(&ctx->bh_lock);
    event_notifier_init(&ctx->notifier, false);
    aio_set_event_notifier(ctx, &ctx->notifier, 
                           (EventNotifierHandler *)
-                           event_notifier_test_and_clear);
+                           event_notifier_test_and_clear, NULL);

    return ctx;
 }
--- a/audio/audio_int.h
+++ b/audio/audio_int.h
@@ -243,13 +243,38 @@ static inline int audio_ring_dist (int dst, int src, int len)
    return (dst >= src) ? (dst - src) : (len - src + dst);
 }

-#define dolog(fmt, ...) AUD_log(AUDIO_CAP, fmt, ## __VA_ARGS__)
+static void GCC_ATTR dolog (const char *fmt, ...)
+{
+    va_list ap;
+
+    va_start (ap, fmt);
+    AUD_vlog (AUDIO_CAP, fmt, ap);
+    va_end (ap);
+}

 #ifdef DEBUG
-#define ldebug(fmt, ...) AUD_log(AUDIO_CAP, fmt, ## __VA_ARGS__)
+static void GCC_ATTR ldebug (const char *fmt, ...)
+{
+    va_list ap;
+
+    va_start (ap, fmt);
+    AUD_vlog (AUDIO_CAP, fmt, ap);
+    va_end (ap);
+}
 #else
-#define ldebug(fmt, ...) (void)0
+#if defined NDEBUG && defined __GNUC__
+#define ldebug(...)
+#elif defined NDEBUG && defined _MSC_VER
+#define ldebug __noop
+#else
+static void GCC_ATTR ldebug (const char *fmt, ...)
+{
+    (void) fmt;
+}
 #endif
+#endif
+
+#undef GCC_ATTR

 #define AUDIO_STRINGIFY_(n) #n
 #define AUDIO_STRINGIFY(n) AUDIO_STRINGIFY_(n)
--- a/audio/audio_win_int.c
+++ b/audio/audio_win_int.c
@@ -1,6 +1,7 @@
 /* public domain */

 #include "qemu-common.h"
+#include "audio.h"

 #define AUDIO_CAP "win-int"
 #include <windows.h>
--- a/backends/rng-random.c
+++ b/backends/rng-random.c
@@ -78,8 +78,9 @@ static void rng_random_opened(RngBackend *b, Error **errp)
                  "filename", "a valid filename");
    } else {
        s->fd = qemu_open(s->filename, O_RDONLY | O_NONBLOCK);
+
        if (s->fd == -1) {
-            error_setg_file_open(errp, errno, s->filename);
+            error_set(errp, QERR_OPEN_FILE_FAILED, s->filename);
        }
    }
 }
--- a/block-migration.c
+++ b/block-migration.c
@@ -29,7 +29,6 @@
 #define BLK_MIG_FLAG_DEVICE_BLOCK       0x01
 #define BLK_MIG_FLAG_EOS                0x02
 #define BLK_MIG_FLAG_PROGRESS           0x04
-#define BLK_MIG_FLAG_ZERO_BLOCK         0x08

 #define MAX_IS_ALLOCATED_SEARCH 65536

@@ -81,7 +80,6 @@ typedef struct BlkMigState {
    int shared_base;
    QSIMPLEQ_HEAD(bmds_list, BlkMigDevState) bmds_list;
    int64_t total_sector_sum;
-    bool zero_blocks;

    /* Protected by lock.  */
    QSIMPLEQ_HEAD(blk_list, BlkMigBlock) blk_list;
@@ -116,30 +114,16 @@ static void blk_mig_unlock(void)
 static void blk_send(QEMUFile *f, BlkMigBlock * blk)
 {
    int len;
-    uint64_t flags = BLK_MIG_FLAG_DEVICE_BLOCK;
-
-    if (block_mig_state.zero_blocks &&
-        buffer_is_zero(blk->buf, BLOCK_SIZE)) {
-        flags |= BLK_MIG_FLAG_ZERO_BLOCK;
-    }

    /* sector number and flags */
    qemu_put_be64(f, (blk->sector << BDRV_SECTOR_BITS)
-                     | flags);
+                     | BLK_MIG_FLAG_DEVICE_BLOCK);

    /* device name */
    len = strlen(blk->bmds->bs->device_name);
    qemu_put_byte(f, len);
    qemu_put_buffer(f, (uint8_t *)blk->bmds->bs->device_name, len);

-    /* if a block is zero we need to flush here since the network
-     * bandwidth is now a lot higher than the storage device bandwidth.
-     * thus if we queue zero blocks we slow down the migration */
-    if (flags & BLK_MIG_FLAG_ZERO_BLOCK) {
-        qemu_fflush(f);
-        return;
-    }
-
    qemu_put_buffer(f, blk->buf, BLOCK_SIZE);
 }

@@ -360,7 +344,6 @@ static void init_blk_migration(QEMUFile *f)
    block_mig_state.total_sector_sum = 0;
    block_mig_state.prev_progress = -1;
    block_mig_state.bulk_completed = 0;
-    block_mig_state.zero_blocks = migrate_zero_blocks();

    bdrv_iterate(init_blk_migration_it, NULL);
 }
@@ -779,15 +762,12 @@ static int block_load(QEMUFile *f, void *opaque, int version_id)
                nr_sectors = BDRV_SECTORS_PER_DIRTY_CHUNK;
            }

-            if (flags & BLK_MIG_FLAG_ZERO_BLOCK) {
-                ret = bdrv_write_zeroes(bs, addr, nr_sectors);
-            } else {
-                buf = g_malloc(BLOCK_SIZE);
-                qemu_get_buffer(f, buf, BLOCK_SIZE);
-                ret = bdrv_write(bs, addr, buf, nr_sectors);
-                g_free(buf);
-            }
+            buf = g_malloc(BLOCK_SIZE);

+            qemu_get_buffer(f, buf, BLOCK_SIZE);
+            ret = bdrv_write(bs, addr, buf, nr_sectors);
+
+            g_free(buf);
            if (ret < 0) {
                return ret;
            }
--- a/block.c
+++ b/block.c
@@ -99,6 +99,9 @@ static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
 static QLIST_HEAD(, BlockDriver) bdrv_drivers =
    QLIST_HEAD_INITIALIZER(bdrv_drivers);

+/* The device to use for VM snapshots */
+static BlockDriverState *bs_snapshots;
+
 /* If non-zero, use only whitelisted block drivers */
 static int use_bdrv_whitelist;

@@ -127,7 +130,7 @@ void bdrv_io_limits_disable(BlockDriverState *bs)
 {
    bs->io_limits_enabled = false;

-    do {} while (qemu_co_enter_next(&bs->throttled_reqs));
+    while (qemu_co_queue_next(&bs->throttled_reqs));

    if (bs->block_timer) {
        qemu_del_timer(bs->block_timer);
@@ -143,11 +146,12 @@ static void bdrv_block_timer(void *opaque)
 {
    BlockDriverState *bs = opaque;

-    qemu_co_enter_next(&bs->throttled_reqs);
+    qemu_co_queue_next(&bs->throttled_reqs);
 }

 void bdrv_io_limits_enable(BlockDriverState *bs)
 {
+    qemu_co_queue_init(&bs->throttled_reqs);
    bs->block_timer = qemu_new_timer_ns(vm_clock, bdrv_block_timer, bs);
    bs->io_limits_enabled = true;
 }
@@ -304,8 +308,6 @@ BlockDriverState *bdrv_new(const char *device_name)
    }
    bdrv_iostatus_disable(bs);
    notifier_list_init(&bs->close_notifiers);
-    notifier_with_return_list_init(&bs->before_write_notifiers);
-    qemu_co_queue_init(&bs->throttled_reqs);

    return bs;
 }
@@ -326,40 +328,28 @@ BlockDriver *bdrv_find_format(const char *format_name)
    return NULL;
 }

-static int bdrv_is_whitelisted(BlockDriver *drv, bool read_only)
+static int bdrv_is_whitelisted(BlockDriver *drv)
 {
-    static const char *whitelist_rw[] = {
-        CONFIG_BDRV_RW_WHITELIST
-    };
-    static const char *whitelist_ro[] = {
-        CONFIG_BDRV_RO_WHITELIST
+    static const char *whitelist[] = {
+        CONFIG_BDRV_WHITELIST
    };
    const char **p;

-    if (!whitelist_rw[0] && !whitelist_ro[0]) {
+    if (!whitelist[0])
        return 1;               /* no whitelist, anything goes */
-    }

-    for (p = whitelist_rw; *p; p++) {
+    for (p = whitelist; *p; p++) {
        if (!strcmp(drv->format_name, *p)) {
            return 1;
        }
    }
-    if (read_only) {
-        for (p = whitelist_ro; *p; p++) {
-            if (!strcmp(drv->format_name, *p)) {
-                return 1;
-            }
-        }
-    }
    return 0;
 }

-BlockDriver *bdrv_find_whitelisted_format(const char *format_name,
-                                          bool read_only)
+BlockDriver *bdrv_find_whitelisted_format(const char *format_name)
 {
    BlockDriver *drv = bdrv_find_format(format_name);
-    return drv && bdrv_is_whitelisted(drv, read_only) ? drv : NULL;
+    return drv && bdrv_is_whitelisted(drv) ? drv : NULL;
 }

 typedef struct CreateCo {
@@ -417,7 +407,7 @@ int bdrv_create_file(const char* filename, QEMUOptionParameter *options)
 {
    BlockDriver *drv;

-    drv = bdrv_find_protocol(filename, true);
+    drv = bdrv_find_protocol(filename);
    if (drv == NULL) {
        return -ENOENT;
    }
@@ -482,8 +472,7 @@ static BlockDriver *find_hdev_driver(const char *filename)
    return drv;
 }

-BlockDriver *bdrv_find_protocol(const char *filename,
-                                bool allow_protocol_prefix)
+BlockDriver *bdrv_find_protocol(const char *filename)
 {
    BlockDriver *drv1;
    char protocol[128];
@@ -504,10 +493,9 @@ BlockDriver *bdrv_find_protocol(const char *filename,
        return drv1;
    }

-    if (!path_has_protocol(filename) || !allow_protocol_prefix) {
+    if (!path_has_protocol(filename)) {
        return bdrv_find_format("file");
    }
-
    p = strchr(filename, ':');
    assert(p != NULL);
    len = p - filename;
@@ -696,6 +684,10 @@ static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file,

    trace_bdrv_open_common(bs, filename ?: "", flags, drv->format_name);

+    if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv)) {
+        return -ENOTSUP;
+    }
+
    /* bdrv_open() with directly using a protocol as drv. This layer is already
     * opened, so assign it to bs (while file becomes a closed BlockDriverState)
     * and return immediately. */
@@ -706,15 +698,9 @@ static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file,

    bs->open_flags = flags;
    bs->buffer_alignment = 512;
-    open_flags = bdrv_open_flags(bs, flags);
-    bs->read_only = !(open_flags & BDRV_O_RDWR);
-
-    if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
-        return -ENOTSUP;
-    }

    assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
-    if (!bs->read_only && (flags & BDRV_O_COPY_ON_READ)) {
+    if ((flags & BDRV_O_RDWR) && (flags & BDRV_O_COPY_ON_READ)) {
        bdrv_enable_copy_on_read(bs);
    }

@@ -728,6 +714,9 @@ static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file,
    bs->opaque = g_malloc0(drv->instance_size);

    bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
+    open_flags = bdrv_open_flags(bs, flags);
+
+    bs->read_only = !(open_flags & BDRV_O_RDWR);

    /* Open the image, either directly or using a protocol */
    if (drv->bdrv_file_open) {
@@ -786,7 +775,6 @@ int bdrv_file_open(BlockDriverState **pbs, const char *filename,
    BlockDriverState *bs;
    BlockDriver *drv;
    const char *drvname;
-    bool allow_protocol_prefix = false;
    int ret;

    /* NULL means an empty set of options */
@@ -803,7 +791,6 @@ int bdrv_file_open(BlockDriverState **pbs, const char *filename,
        filename = qdict_get_try_str(options, "filename");
    } else if (filename && !qdict_haskey(options, "filename")) {
        qdict_put(options, "filename", qstring_from_str(filename));
-        allow_protocol_prefix = true;
    } else {
        qerror_report(ERROR_CLASS_GENERIC_ERROR, "Can't specify 'file' and "
                      "'filename' options at the same time");
@@ -814,13 +801,10 @@ int bdrv_file_open(BlockDriverState **pbs, const char *filename,
    /* Find the right block driver */
    drvname = qdict_get_try_str(options, "driver");
    if (drvname) {
-        drv = bdrv_find_whitelisted_format(drvname, !(flags & BDRV_O_RDWR));
+        drv = bdrv_find_whitelisted_format(drvname);
        qdict_del(options, "driver");
    } else if (filename) {
-        drv = bdrv_find_protocol(filename, allow_protocol_prefix);
-        if (!drv) {
-            qerror_report(ERROR_CLASS_GENERIC_ERROR, "Unknown protocol");
-        }
+        drv = bdrv_find_protocol(filename);
    } else {
        qerror_report(ERROR_CLASS_GENERIC_ERROR,
                      "Must specify either driver or file");
@@ -970,7 +954,6 @@ int bdrv_open(BlockDriverState *bs, const char *filename, QDict *options,
    char tmp_filename[PATH_MAX + 1];
    BlockDriverState *file = NULL;
    QDict *file_options = NULL;
-    const char *drvname;

    /* NULL means an empty set of options */
    if (options == NULL) {
@@ -1054,18 +1037,12 @@ int bdrv_open(BlockDriverState *bs, const char *filename, QDict *options,
    extract_subqdict(options, &file_options, "file.");

    ret = bdrv_file_open(&file, filename, file_options,
-                         bdrv_open_flags(bs, flags | BDRV_O_UNMAP));
+                         bdrv_open_flags(bs, flags));
    if (ret < 0) {
        goto fail;
    }

    /* Find the right image format driver */
-    drvname = qdict_get_try_str(options, "driver");
-    if (drvname) {
-        drv = bdrv_find_whitelisted_format(drvname, !(flags & BDRV_O_RDWR));
-        qdict_del(options, "driver");
-    }
-
    if (!drv) {
        ret = find_image_format(file, filename, &drv);
    }
@@ -1306,8 +1283,8 @@ int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
            if (local_err != NULL) {
                error_propagate(errp, local_err);
            } else {
-                error_setg(errp, "failed while preparing to reopen image '%s'",
-                           reopen_state->bs->filename);
+                error_set(errp, QERR_OPEN_FILE_FAILED,
+                          reopen_state->bs->filename);
            }
            goto error;
        }
@@ -1372,15 +1349,17 @@ void bdrv_reopen_abort(BDRVReopenState *reopen_state)

 void bdrv_close(BlockDriverState *bs)
 {
+    bdrv_flush(bs);
    if (bs->job) {
        block_job_cancel_sync(bs->job);
    }
-    bdrv_drain_all(); /* complete I/O */
-    bdrv_flush(bs);
-    bdrv_drain_all(); /* in case flush left pending I/O */
+    bdrv_drain_all();
    notifier_list_notify(&bs->close_notifiers, bs);

    if (bs->drv) {
+        if (bs == bs_snapshots) {
+            bs_snapshots = NULL;
+        }
        if (bs->backing_hd) {
            bdrv_delete(bs->backing_hd);
            bs->backing_hd = NULL;
@@ -1428,35 +1407,6 @@ void bdrv_close_all(void)
    }
 }

-/* Check if any requests are in-flight (including throttled requests) */
-static bool bdrv_requests_pending(BlockDriverState *bs)
-{
-    if (!QLIST_EMPTY(&bs->tracked_requests)) {
-        return true;
-    }
-    if (!qemu_co_queue_empty(&bs->throttled_reqs)) {
-        return true;
-    }
-    if (bs->file && bdrv_requests_pending(bs->file)) {
-        return true;
-    }
-    if (bs->backing_hd && bdrv_requests_pending(bs->backing_hd)) {
-        return true;
-    }
-    return false;
-}
-
-static bool bdrv_requests_pending_all(void)
-{
-    BlockDriverState *bs;
-    QTAILQ_FOREACH(bs, &bdrv_states, list) {
-        if (bdrv_requests_pending(bs)) {
-            return true;
-        }
-    }
-    return false;
-}
-
 /*
 * Wait for pending requests to complete across all BlockDriverStates
 *
@@ -1471,22 +1421,27 @@ static bool bdrv_requests_pending_all(void)
 */
 void bdrv_drain_all(void)
 {
-    /* Always run first iteration so any pending completion BHs run */
-    bool busy = true;
    BlockDriverState *bs;
+    bool busy;
+
+    do {
+        busy = qemu_aio_wait();

-    while (busy) {
        /* FIXME: We do not have timer support here, so this is effectively
         * a busy wait.
         */
        QTAILQ_FOREACH(bs, &bdrv_states, list) {
-            while (qemu_co_enter_next(&bs->throttled_reqs)) {
+            if (!qemu_co_queue_empty(&bs->throttled_reqs)) {
+                qemu_co_queue_restart_all(&bs->throttled_reqs);
                busy = true;
            }
        }
+    } while (busy);

-        busy = bdrv_requests_pending_all();
-        busy |= aio_poll(qemu_get_aio_context(), busy);
+    /* If requests are still pending there is a bug somewhere */
+    QTAILQ_FOREACH(bs, &bdrv_states, list) {
+        assert(QLIST_EMPTY(&bs->tracked_requests));
+        assert(qemu_co_queue_empty(&bs->throttled_reqs));
    }
 }

@@ -1631,11 +1586,12 @@ void bdrv_delete(BlockDriverState *bs)
    assert(!bs->job);
    assert(!bs->in_use);

-    bdrv_close(bs);
-
    /* remove from list, if necessary */
    bdrv_make_anon(bs);

+    bdrv_close(bs);
+
+    assert(bs != bs_snapshots);
    g_free(bs);
 }

@@ -1679,6 +1635,9 @@ void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops,
 {
    bs->dev_ops = ops;
    bs->dev_opaque = opaque;
+    if (bdrv_dev_has_removable_media(bs) && bs == bs_snapshots) {
+        bs_snapshots = NULL;
+    }
 }

 void bdrv_emit_qmp_error_event(const BlockDriverState *bdrv,
@@ -1880,6 +1839,16 @@ int bdrv_commit_all(void)
    return 0;
 }

+struct BdrvTrackedRequest {
+    BlockDriverState *bs;
+    int64_t sector_num;
+    int nb_sectors;
+    bool is_write;
+    QLIST_ENTRY(BdrvTrackedRequest) list;
+    Coroutine *co; /* owner, used for deadlock detection */
+    CoQueue wait_queue; /* coroutines blocked on this request */
+};
+
 /**
 * Remove an active request from the tracked requests list
 *
@@ -2193,7 +2162,6 @@ typedef struct RwCo {
    QEMUIOVector *qiov;
    bool is_write;
    int ret;
-    BdrvRequestFlags flags;
 } RwCo;

 static void coroutine_fn bdrv_rw_co_entry(void *opaque)
@@ -2202,12 +2170,10 @@ static void coroutine_fn bdrv_rw_co_entry(void *opaque)

    if (!rwco->is_write) {
        rwco->ret = bdrv_co_do_readv(rwco->bs, rwco->sector_num,
-                                     rwco->nb_sectors, rwco->qiov,
-                                     rwco->flags);
+                                     rwco->nb_sectors, rwco->qiov, 0);
    } else {
        rwco->ret = bdrv_co_do_writev(rwco->bs, rwco->sector_num,
-                                      rwco->nb_sectors, rwco->qiov,
-                                      rwco->flags);
+                                      rwco->nb_sectors, rwco->qiov, 0);
    }
 }

@@ -2215,8 +2181,7 @@ static void coroutine_fn bdrv_rw_co_entry(void *opaque)
 * Process a vectored synchronous request using coroutines
 */
 static int bdrv_rwv_co(BlockDriverState *bs, int64_t sector_num,
-                       QEMUIOVector *qiov, bool is_write,
-                       BdrvRequestFlags flags)
+                       QEMUIOVector *qiov, bool is_write)
 {
    Coroutine *co;
    RwCo rwco = {
@@ -2226,7 +2191,6 @@ static int bdrv_rwv_co(BlockDriverState *bs, int64_t sector_num,
        .qiov = qiov,
        .is_write = is_write,
        .ret = NOT_DONE,
-        .flags = flags,
    };
    assert((qiov->size & (BDRV_SECTOR_SIZE - 1)) == 0);

@@ -2258,7 +2222,7 @@ static int bdrv_rwv_co(BlockDriverState *bs, int64_t sector_num,
 * Process a synchronous request using coroutines
 */
 static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
-                      int nb_sectors, bool is_write, BdrvRequestFlags flags)
+                      int nb_sectors, bool is_write)
 {
    QEMUIOVector qiov;
    struct iovec iov = {
@@ -2267,14 +2231,14 @@ static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
    };

    qemu_iovec_init_external(&qiov, &iov, 1);
-    return bdrv_rwv_co(bs, sector_num, &qiov, is_write, flags);
+    return bdrv_rwv_co(bs, sector_num, &qiov, is_write);
 }

 /* return < 0 if error. See bdrv_write() for the return codes */
 int bdrv_read(BlockDriverState *bs, int64_t sector_num,
              uint8_t *buf, int nb_sectors)
 {
-    return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false, 0);
+    return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false);
 }

 /* Just like bdrv_read(), but with I/O throttling temporarily disabled */
@@ -2286,7 +2250,7 @@ int bdrv_read_unthrottled(BlockDriverState *bs, int64_t sector_num,

    enabled = bs->io_limits_enabled;
    bs->io_limits_enabled = false;
-    ret = bdrv_read(bs, sector_num, buf, nb_sectors);
+    ret = bdrv_read(bs, 0, buf, 1);
    bs->io_limits_enabled = enabled;
    return ret;
 }
@@ -2300,18 +2264,12 @@ int bdrv_read_unthrottled(BlockDriverState *bs, int64_t sector_num,
 int bdrv_write(BlockDriverState *bs, int64_t sector_num,
               const uint8_t *buf, int nb_sectors)
 {
-    return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true, 0);
+    return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true);
 }

 int bdrv_writev(BlockDriverState *bs, int64_t sector_num, QEMUIOVector *qiov)
 {
-    return bdrv_rwv_co(bs, sector_num, qiov, true, 0);
-}
-
-int bdrv_write_zeroes(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
-{
-    return bdrv_rw_co(bs, sector_num, NULL, nb_sectors, true,
-                      BDRV_REQ_ZERO_WRITE);
+    return bdrv_rwv_co(bs, sector_num, qiov, true);
 }

 int bdrv_pread(BlockDriverState *bs, int64_t offset,
@@ -2661,11 +2619,7 @@ static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,

    tracked_request_begin(&req, bs, sector_num, nb_sectors, true);

-    ret = notifier_with_return_list_notify(&bs->before_write_notifiers, &req);
-
-    if (ret < 0) {
-        /* Do nothing, write notifier decided to fail this request */
-    } else if (flags & BDRV_REQ_ZERO_WRITE) {
+    if (flags & BDRV_REQ_ZERO_WRITE) {
        ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors);
    } else {
        ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
@@ -2952,24 +2906,13 @@ int bdrv_get_flags(BlockDriverState *bs)
    return bs->open_flags;
 }

-int bdrv_flush_all(void)
+void bdrv_flush_all(void)
 {
    BlockDriverState *bs;
-    int result = 0;

    QTAILQ_FOREACH(bs, &bdrv_states, list) {
-        int ret = bdrv_flush(bs);
-        if (ret < 0 && !result) {
-            result = ret;
-        }
+        bdrv_flush(bs);
    }
-
-    return result;
-}
-
-int bdrv_has_zero_init_1(BlockDriverState *bs)
-{
-    return 1;
 }

 int bdrv_has_zero_init(BlockDriverState *bs)
@@ -2980,8 +2923,7 @@ int bdrv_has_zero_init(BlockDriverState *bs)
        return bs->drv->bdrv_has_zero_init(bs);
    }

-    /* safe default */
-    return 0;
+    return 1;
 }

 typedef struct BdrvCoIsAllocatedData {
@@ -3157,6 +3099,128 @@ int bdrv_is_allocated_above(BlockDriverState *top, BlockDriverState *base,
    return data.ret;
 }

+BlockInfo *bdrv_query_info(BlockDriverState *bs)
+{
+    BlockInfo *info = g_malloc0(sizeof(*info));
+    info->device = g_strdup(bs->device_name);
+    info->type = g_strdup("unknown");
+    info->locked = bdrv_dev_is_medium_locked(bs);
+    info->removable = bdrv_dev_has_removable_media(bs);
+
+    if (bdrv_dev_has_removable_media(bs)) {
+        info->has_tray_open = true;
+        info->tray_open = bdrv_dev_is_tray_open(bs);
+    }
+
+    if (bdrv_iostatus_is_enabled(bs)) {
+        info->has_io_status = true;
+        info->io_status = bs->iostatus;
+    }
+
+    if (bs->dirty_bitmap) {
+        info->has_dirty = true;
+        info->dirty = g_malloc0(sizeof(*info->dirty));
+        info->dirty->count = bdrv_get_dirty_count(bs) * BDRV_SECTOR_SIZE;
+        info->dirty->granularity =
+            ((int64_t) BDRV_SECTOR_SIZE << hbitmap_granularity(bs->dirty_bitmap));
+    }
+
+    if (bs->drv) {
+        info->has_inserted = true;
+        info->inserted = g_malloc0(sizeof(*info->inserted));
+        info->inserted->file = g_strdup(bs->filename);
+        info->inserted->ro = bs->read_only;
+        info->inserted->drv = g_strdup(bs->drv->format_name);
+        info->inserted->encrypted = bs->encrypted;
+        info->inserted->encryption_key_missing = bdrv_key_required(bs);
+
+        if (bs->backing_file[0]) {
+            info->inserted->has_backing_file = true;
+            info->inserted->backing_file = g_strdup(bs->backing_file);
+        }
+
+        info->inserted->backing_file_depth = bdrv_get_backing_file_depth(bs);
+
+        if (bs->io_limits_enabled) {
+            info->inserted->bps =
+                           bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL];
+            info->inserted->bps_rd =
+                           bs->io_limits.bps[BLOCK_IO_LIMIT_READ];
+            info->inserted->bps_wr =
+                           bs->io_limits.bps[BLOCK_IO_LIMIT_WRITE];
+            info->inserted->iops =
+                           bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL];
+            info->inserted->iops_rd =
+                           bs->io_limits.iops[BLOCK_IO_LIMIT_READ];
+            info->inserted->iops_wr =
+                           bs->io_limits.iops[BLOCK_IO_LIMIT_WRITE];
+        }
+    }
+    return info;
+}
+
+BlockInfoList *qmp_query_block(Error **errp)
+{
+    BlockInfoList *head = NULL, **p_next = &head;
+    BlockDriverState *bs;
+
+    QTAILQ_FOREACH(bs, &bdrv_states, list) {
+        BlockInfoList *info = g_malloc0(sizeof(*info));
+        info->value = bdrv_query_info(bs);
+
+        *p_next = info;
+        p_next = &info->next;
+    }
+
+    return head;
+}
+
+BlockStats *bdrv_query_stats(const BlockDriverState *bs)
+{
+    BlockStats *s;
+
+    s = g_malloc0(sizeof(*s));
+
+    if (bs->device_name[0]) {
+        s->has_device = true;
+        s->device = g_strdup(bs->device_name);
+    }
+
+    s->stats = g_malloc0(sizeof(*s->stats));
+    s->stats->rd_bytes = bs->nr_bytes[BDRV_ACCT_READ];
+    s->stats->wr_bytes = bs->nr_bytes[BDRV_ACCT_WRITE];
+    s->stats->rd_operations = bs->nr_ops[BDRV_ACCT_READ];
+    s->stats->wr_operations = bs->nr_ops[BDRV_ACCT_WRITE];
+    s->stats->wr_highest_offset = bs->wr_highest_sector * BDRV_SECTOR_SIZE;
+    s->stats->flush_operations = bs->nr_ops[BDRV_ACCT_FLUSH];
+    s->stats->wr_total_time_ns = bs->total_time_ns[BDRV_ACCT_WRITE];
+    s->stats->rd_total_time_ns = bs->total_time_ns[BDRV_ACCT_READ];
+    s->stats->flush_total_time_ns = bs->total_time_ns[BDRV_ACCT_FLUSH];
+
+    if (bs->file) {
+        s->has_parent = true;
+        s->parent = bdrv_query_stats(bs->file);
+    }
+
+    return s;
+}
+
+BlockStatsList *qmp_query_blockstats(Error **errp)
+{
+    BlockStatsList *head = NULL, **p_next = &head;
+    BlockDriverState *bs;
+
+    QTAILQ_FOREACH(bs, &bdrv_states, list) {
+        BlockStatsList *info = g_malloc0(sizeof(*info));
+        info->value = bdrv_query_stats(bs);
+
+        *p_next = info;
+        p_next = &info->next;
+    }
+
+    return head;
+}
+
 const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
 {
    if (bs->backing_hd && bs->backing_hd->encrypted)
@@ -3243,11 +3307,13 @@ int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,

 void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
 {
-    if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) {
+    BlockDriver *drv = bs->drv;
+
+    if (!drv || !drv->bdrv_debug_event) {
        return;
    }

-    bs->drv->bdrv_debug_event(bs, event);
+    drv->bdrv_debug_event(bs, event);
 }

 int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
@@ -3290,11 +3356,129 @@ bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag)
    return false;
 }

+/**************************************************************/
+/* handling of snapshots */
+
+int bdrv_can_snapshot(BlockDriverState *bs)
+{
+    BlockDriver *drv = bs->drv;
+    if (!drv || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
+        return 0;
+    }
+
+    if (!drv->bdrv_snapshot_create) {
+        if (bs->file != NULL) {
+            return bdrv_can_snapshot(bs->file);
+        }
+        return 0;
+    }
+
+    return 1;
+}
+
 int bdrv_is_snapshot(BlockDriverState *bs)
 {
    return !!(bs->open_flags & BDRV_O_SNAPSHOT);
 }

+BlockDriverState *bdrv_snapshots(void)
+{
+    BlockDriverState *bs;
+
+    if (bs_snapshots) {
+        return bs_snapshots;
+    }
+
+    bs = NULL;
+    while ((bs = bdrv_next(bs))) {
+        if (bdrv_can_snapshot(bs)) {
+            bs_snapshots = bs;
+            return bs;
+        }
+    }
+    return NULL;
+}
+
+int bdrv_snapshot_create(BlockDriverState *bs,
+                         QEMUSnapshotInfo *sn_info)
+{
+    BlockDriver *drv = bs->drv;
+    if (!drv)
+        return -ENOMEDIUM;
+    if (drv->bdrv_snapshot_create)
+        return drv->bdrv_snapshot_create(bs, sn_info);
+    if (bs->file)
+        return bdrv_snapshot_create(bs->file, sn_info);
+    return -ENOTSUP;
+}
+
+int bdrv_snapshot_goto(BlockDriverState *bs,
+                       const char *snapshot_id)
+{
+    BlockDriver *drv = bs->drv;
+    int ret, open_ret;
+
+    if (!drv)
+        return -ENOMEDIUM;
+    if (drv->bdrv_snapshot_goto)
+        return drv->bdrv_snapshot_goto(bs, snapshot_id);
+
+    if (bs->file) {
+        drv->bdrv_close(bs);
+        ret = bdrv_snapshot_goto(bs->file, snapshot_id);
+        open_ret = drv->bdrv_open(bs, NULL, bs->open_flags);
+        if (open_ret < 0) {
+            bdrv_delete(bs->file);
+            bs->drv = NULL;
+            return open_ret;
+        }
+        return ret;
+    }
+
+    return -ENOTSUP;
+}
+
+int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
+{
+    BlockDriver *drv = bs->drv;
+    if (!drv)
+        return -ENOMEDIUM;
+    if (drv->bdrv_snapshot_delete)
+        return drv->bdrv_snapshot_delete(bs, snapshot_id);
+    if (bs->file)
+        return bdrv_snapshot_delete(bs->file, snapshot_id);
+    return -ENOTSUP;
+}
+
+int bdrv_snapshot_list(BlockDriverState *bs,
+                       QEMUSnapshotInfo **psn_info)
+{
+    BlockDriver *drv = bs->drv;
+    if (!drv)
+        return -ENOMEDIUM;
+    if (drv->bdrv_snapshot_list)
+        return drv->bdrv_snapshot_list(bs, psn_info);
+    if (bs->file)
+        return bdrv_snapshot_list(bs->file, psn_info);
+    return -ENOTSUP;
+}
+
+int bdrv_snapshot_load_tmp(BlockDriverState *bs,
+        const char *snapshot_name)
+{
+    BlockDriver *drv = bs->drv;
+    if (!drv) {
+        return -ENOMEDIUM;
+    }
+    if (!bs->read_only) {
+        return -EINVAL;
+    }
+    if (drv->bdrv_snapshot_load_tmp) {
+        return drv->bdrv_snapshot_load_tmp(bs, snapshot_name);
+    }
+    return -ENOTSUP;
+}
+
 /* backing_file can either be relative, or absolute, or a protocol.  If it is
 * relative, it must be relative to the chain.  So, passing in bs->filename
 * from a BDS as backing_file should not be done, as that may be relative to
@@ -3390,6 +3574,69 @@ BlockDriverState *bdrv_find_base(BlockDriverState *bs)
    return curr_bs;
 }

+#define NB_SUFFIXES 4
+
+char *get_human_readable_size(char *buf, int buf_size, int64_t size)
+{
+    static const char suffixes[NB_SUFFIXES] = "KMGT";
+    int64_t base;
+    int i;
+
+    if (size <= 999) {
+        snprintf(buf, buf_size, "%" PRId64, size);
+    } else {
+        base = 1024;
+        for(i = 0; i < NB_SUFFIXES; i++) {
+            if (size < (10 * base)) {
+                snprintf(buf, buf_size, "%0.1f%c",
+                         (double)size / base,
+                         suffixes[i]);
+                break;
+            } else if (size < (1000 * base) || i == (NB_SUFFIXES - 1)) {
+                snprintf(buf, buf_size, "%" PRId64 "%c",
+                         ((size + (base >> 1)) / base),
+                         suffixes[i]);
+                break;
+            }
+            base = base * 1024;
+        }
+    }
+    return buf;
+}
+
+char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn)
+{
+    char buf1[128], date_buf[128], clock_buf[128];
+    struct tm tm;
+    time_t ti;
+    int64_t secs;
+
+    if (!sn) {
+        snprintf(buf, buf_size,
+                 "%-10s%-20s%7s%20s%15s",
+                 "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
+    } else {
+        ti = sn->date_sec;
+        localtime_r(&ti, &tm);
+        strftime(date_buf, sizeof(date_buf),
+                 "%Y-%m-%d %H:%M:%S", &tm);
+        secs = sn->vm_clock_nsec / 1000000000;
+        snprintf(clock_buf, sizeof(clock_buf),
+                 "%02d:%02d:%02d.%03d",
+                 (int)(secs / 3600),
+                 (int)((secs / 60) % 60),
+                 (int)(secs % 60),
+                 (int)((sn->vm_clock_nsec / 1000000) % 1000));
+        snprintf(buf, buf_size,
+                 "%-10s%-20s%7s%20s%15s",
+                 sn->id_str, sn->name,
+                 get_human_readable_size(buf1, sizeof(buf1), sn->vm_state_size),
+                 date_buf,
+                 clock_buf);
+    }
+    return buf;
+}
+
 /**************************************************************/
 /* async I/Os */

@@ -4079,7 +4326,6 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
    }

    /* Write back cached data to the OS even with cache=unsafe */
-    BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_OS);
    if (bs->drv->bdrv_co_flush_to_os) {
        ret = bs->drv->bdrv_co_flush_to_os(bs);
        if (ret < 0) {
@@ -4092,7 +4338,6 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
        goto flush_parent;
    }

-    BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_DISK);
    if (bs->drv->bdrv_co_flush_to_disk) {
        ret = bs->drv->bdrv_co_flush_to_disk(bs);
    } else if (bs->drv->bdrv_aio_flush) {
@@ -4507,7 +4752,7 @@ void bdrv_img_create(const char *filename, const char *fmt,
        return;
    }

-    proto_drv = bdrv_find_protocol(filename, true);
+    proto_drv = bdrv_find_protocol(filename);
    if (!proto_drv) {
        error_setg(errp, "Unknown protocol '%s'", filename);
        return;
@@ -4638,9 +4883,3 @@ AioContext *bdrv_get_aio_context(BlockDriverState *bs)
    /* Currently BlockDriverState always uses the main loop AioContext */
    return qemu_get_aio_context();
 }
-
-void bdrv_add_before_write_notifier(BlockDriverState *bs,
-                                    NotifierWithReturn *notifier)
-{
-    notifier_with_return_list_add(&bs->before_write_notifiers, notifier);
-}
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -4,7 +4,6 @@ block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o
 block-obj-y += qed-check.o
 block-obj-y += vhdx.o
 block-obj-y += parallels.o blkdebug.o blkverify.o
-block-obj-y += snapshot.o qapi.o
 block-obj-$(CONFIG_WIN32) += raw-win32.o win32-aio.o
 block-obj-$(CONFIG_POSIX) += raw-posix.o
 block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
@@ -21,6 +20,5 @@ endif
 common-obj-y += stream.o
 common-obj-y += commit.o
 common-obj-y += mirror.o
-common-obj-y += backup.o

 $(obj)/curl.o: QEMU_CFLAGS+=$(CURL_CFLAGS)
--- a/block/backup.c
+++ b/block/backup.c
@@ -1,386 +0,0 @@
-/*
- * QEMU backup
- *
- * Copyright (C) 2013 Proxmox Server Solutions
- *
- * Authors:
- *  Dietmar Maurer (dietmar@proxmox.com)
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- *
- */
-
-#include <stdio.h>
-#include <errno.h>
-#include <unistd.h>
-
-#include "trace.h"
-#include "block/block.h"
-#include "block/block_int.h"
-#include "block/blockjob.h"
-#include "qemu/ratelimit.h"
-
-#define BACKUP_CLUSTER_BITS 16
-#define BACKUP_CLUSTER_SIZE (1 << BACKUP_CLUSTER_BITS)
-#define BACKUP_SECTORS_PER_CLUSTER (BACKUP_CLUSTER_SIZE / BDRV_SECTOR_SIZE)
-
-#define SLICE_TIME 100000000ULL /* ns */
-
-typedef struct CowRequest {
-    int64_t start;
-    int64_t end;
-    QLIST_ENTRY(CowRequest) list;
-    CoQueue wait_queue; /* coroutines blocked on this request */
-} CowRequest;
-
-typedef struct BackupBlockJob {
-    BlockJob common;
-    BlockDriverState *target;
-    MirrorSyncMode sync_mode;
-    RateLimit limit;
-    BlockdevOnError on_source_error;
-    BlockdevOnError on_target_error;
-    CoRwlock flush_rwlock;
-    uint64_t sectors_read;
-    HBitmap *bitmap;
-    QLIST_HEAD(, CowRequest) inflight_reqs;
-} BackupBlockJob;
-
-/* See if in-flight requests overlap and wait for them to complete */
-static void coroutine_fn wait_for_overlapping_requests(BackupBlockJob *job,
-                                                       int64_t start,
-                                                       int64_t end)
-{
-    CowRequest *req;
-    bool retry;
-
-    do {
-        retry = false;
-        QLIST_FOREACH(req, &job->inflight_reqs, list) {
-            if (end > req->start && start < req->end) {
-                qemu_co_queue_wait(&req->wait_queue);
-                retry = true;
-                break;
-            }
-        }
-    } while (retry);
-}
-
-/* Keep track of an in-flight request */
-static void cow_request_begin(CowRequest *req, BackupBlockJob *job,
-                                     int64_t start, int64_t end)
-{
-    req->start = start;
-    req->end = end;
-    qemu_co_queue_init(&req->wait_queue);
-    QLIST_INSERT_HEAD(&job->inflight_reqs, req, list);
-}
-
-/* Forget about a completed request */
-static void cow_request_end(CowRequest *req)
-{
-    QLIST_REMOVE(req, list);
-    qemu_co_queue_restart_all(&req->wait_queue);
-}
-
-static int coroutine_fn backup_do_cow(BlockDriverState *bs,
-                                      int64_t sector_num, int nb_sectors,
-                                      bool *error_is_read)
-{
-    BackupBlockJob *job = (BackupBlockJob *)bs->job;
-    CowRequest cow_request;
-    struct iovec iov;
-    QEMUIOVector bounce_qiov;
-    void *bounce_buffer = NULL;
-    int ret = 0;
-    int64_t start, end;
-    int n;
-
-    qemu_co_rwlock_rdlock(&job->flush_rwlock);
-
-    start = sector_num / BACKUP_SECTORS_PER_CLUSTER;
-    end = DIV_ROUND_UP(sector_num + nb_sectors, BACKUP_SECTORS_PER_CLUSTER);
-
-    trace_backup_do_cow_enter(job, start, sector_num, nb_sectors);
-
-    wait_for_overlapping_requests(job, start, end);
-    cow_request_begin(&cow_request, job, start, end);
-
-    for (; start < end; start++) {
-        if (hbitmap_get(job->bitmap, start)) {
-            trace_backup_do_cow_skip(job, start);
-            continue; /* already copied */
-        }
-
-        trace_backup_do_cow_process(job, start);
-
-        n = MIN(BACKUP_SECTORS_PER_CLUSTER,
-                job->common.len / BDRV_SECTOR_SIZE -
-                start * BACKUP_SECTORS_PER_CLUSTER);
-
-        if (!bounce_buffer) {
-            bounce_buffer = qemu_blockalign(bs, BACKUP_CLUSTER_SIZE);
-        }
-        iov.iov_base = bounce_buffer;
-        iov.iov_len = n * BDRV_SECTOR_SIZE;
-        qemu_iovec_init_external(&bounce_qiov, &iov, 1);
-
-        ret = bdrv_co_readv(bs, start * BACKUP_SECTORS_PER_CLUSTER, n,
-                            &bounce_qiov);
-        if (ret < 0) {
-            trace_backup_do_cow_read_fail(job, start, ret);
-            if (error_is_read) {
-                *error_is_read = true;
-            }
-            goto out;
-        }
-
-        if (buffer_is_zero(iov.iov_base, iov.iov_len)) {
-            ret = bdrv_co_write_zeroes(job->target,
-                                       start * BACKUP_SECTORS_PER_CLUSTER, n);
-        } else {
-            ret = bdrv_co_writev(job->target,
-                                 start * BACKUP_SECTORS_PER_CLUSTER, n,
-                                 &bounce_qiov);
-        }
-        if (ret < 0) {
-            trace_backup_do_cow_write_fail(job, start, ret);
-            if (error_is_read) {
-                *error_is_read = false;
-            }
-            goto out;
-        }
-
-        hbitmap_set(job->bitmap, start, 1);
-
-        /* Publish progress, guest I/O counts as progress too.  Note that the
-         * offset field is an opaque progress value, it is not a disk offset.
-         */
-        job->sectors_read += n;
-        job->common.offset += n * BDRV_SECTOR_SIZE;
-    }
-
-out:
-    if (bounce_buffer) {
-        qemu_vfree(bounce_buffer);
-    }
-
-    cow_request_end(&cow_request);
-
-    trace_backup_do_cow_return(job, sector_num, nb_sectors, ret);
-
-    qemu_co_rwlock_unlock(&job->flush_rwlock);
-
-    return ret;
-}
-
-static int coroutine_fn backup_before_write_notify(
-        NotifierWithReturn *notifier,
-        void *opaque)
-{
-    BdrvTrackedRequest *req = opaque;
-
-    return backup_do_cow(req->bs, req->sector_num, req->nb_sectors, NULL);
-}
-
-static void backup_set_speed(BlockJob *job, int64_t speed, Error **errp)
-{
-    BackupBlockJob *s = container_of(job, BackupBlockJob, common);
-
-    if (speed < 0) {
-        error_set(errp, QERR_INVALID_PARAMETER, "speed");
-        return;
-    }
-    ratelimit_set_speed(&s->limit, speed / BDRV_SECTOR_SIZE, SLICE_TIME);
-}
-
-static void backup_iostatus_reset(BlockJob *job)
-{
-    BackupBlockJob *s = container_of(job, BackupBlockJob, common);
-
-    bdrv_iostatus_reset(s->target);
-}
-
-static const BlockJobType backup_job_type = {
-    .instance_size  = sizeof(BackupBlockJob),
-    .job_type       = "backup",
-    .set_speed      = backup_set_speed,
-    .iostatus_reset = backup_iostatus_reset,
-};
-
-static BlockErrorAction backup_error_action(BackupBlockJob *job,
-                                            bool read, int error)
-{
-    if (read) {
-        return block_job_error_action(&job->common, job->common.bs,
-                                      job->on_source_error, true, error);
-    } else {
-        return block_job_error_action(&job->common, job->target,
-                                      job->on_target_error, false, error);
-    }
-}
-
-static void coroutine_fn backup_run(void *opaque)
-{
-    BackupBlockJob *job = opaque;
-    BlockDriverState *bs = job->common.bs;
-    BlockDriverState *target = job->target;
-    BlockdevOnError on_target_error = job->on_target_error;
-    NotifierWithReturn before_write = {
-        .notify = backup_before_write_notify,
-    };
-    int64_t start, end;
-    int ret = 0;
-
-    QLIST_INIT(&job->inflight_reqs);
-    qemu_co_rwlock_init(&job->flush_rwlock);
-
-    start = 0;
-    end = DIV_ROUND_UP(job->common.len / BDRV_SECTOR_SIZE,
-                       BACKUP_SECTORS_PER_CLUSTER);
-
-    job->bitmap = hbitmap_alloc(end, 0);
-
-    bdrv_set_enable_write_cache(target, true);
-    bdrv_set_on_error(target, on_target_error, on_target_error);
-    bdrv_iostatus_enable(target);
-
-    bdrv_add_before_write_notifier(bs, &before_write);
-
-    if (job->sync_mode == MIRROR_SYNC_MODE_NONE) {
-        while (!block_job_is_cancelled(&job->common)) {
-            /* Yield until the job is cancelled.  We just let our before_write
-             * notify callback service CoW requests. */
-            job->common.busy = false;
-            qemu_coroutine_yield();
-            job->common.busy = true;
-        }
-    } else {
-        /* Both FULL and TOP SYNC_MODE's require copying.. */
-        for (; start < end; start++) {
-            bool error_is_read;
-
-            if (block_job_is_cancelled(&job->common)) {
-                break;
-            }
-
-            /* we need to yield so that qemu_aio_flush() returns.
-             * (without, VM does not reboot)
-             */
-            if (job->common.speed) {
-                uint64_t delay_ns = ratelimit_calculate_delay(
-                        &job->limit, job->sectors_read);
-                job->sectors_read = 0;
-                block_job_sleep_ns(&job->common, rt_clock, delay_ns);
-            } else {
-                block_job_sleep_ns(&job->common, rt_clock, 0);
-            }
-
-            if (block_job_is_cancelled(&job->common)) {
-                break;
-            }
-
-            if (job->sync_mode == MIRROR_SYNC_MODE_TOP) {
-                int i, n;
-                int alloced = 0;
-
-                /* Check to see if these blocks are already in the
-                 * backing file. */
-
-                for (i = 0; i < BACKUP_SECTORS_PER_CLUSTER;) {
-                    /* bdrv_co_is_allocated() only returns true/false based
-                     * on the first set of sectors it comes accross that
-                     * are are all in the same state.
-                     * For that reason we must verify each sector in the
-                     * backup cluster length.  We end up copying more than
-                     * needed but at some point that is always the case. */
-                    alloced =
-                        bdrv_co_is_allocated(bs,
-                                start * BACKUP_SECTORS_PER_CLUSTER + i,
-                                BACKUP_SECTORS_PER_CLUSTER - i, &n);
-                    i += n;
-
-                    if (alloced == 1) {
-                        break;
-                    }
-                }
-
-                /* If the above loop never found any sectors that are in
-                 * the topmost image, skip this backup. */
-                if (alloced == 0) {
-                    continue;
-                }
-            }
-            /* FULL sync mode we copy the whole drive. */
-            ret = backup_do_cow(bs, start * BACKUP_SECTORS_PER_CLUSTER,
-                    BACKUP_SECTORS_PER_CLUSTER, &error_is_read);
-            if (ret < 0) {
-                /* Depending on error action, fail now or retry cluster */
-                BlockErrorAction action =
-                    backup_error_action(job, error_is_read, -ret);
-                if (action == BDRV_ACTION_REPORT) {
-                    break;
-                } else {
-                    start--;
-                    continue;
-                }
-            }
-        }
-    }
-
-    notifier_with_return_remove(&before_write);
-
-    /* wait until pending backup_do_cow() calls have completed */
-    qemu_co_rwlock_wrlock(&job->flush_rwlock);
-    qemu_co_rwlock_unlock(&job->flush_rwlock);
-
-    hbitmap_free(job->bitmap);
-
-    bdrv_iostatus_disable(target);
-    bdrv_delete(target);
-
-    block_job_completed(&job->common, ret);
-}
-
-void backup_start(BlockDriverState *bs, BlockDriverState *target,
-                  int64_t speed, MirrorSyncMode sync_mode,
-                  BlockdevOnError on_source_error,
-                  BlockdevOnError on_target_error,
-                  BlockDriverCompletionFunc *cb, void *opaque,
-                  Error **errp)
-{
-    int64_t len;
-
-    assert(bs);
-    assert(target);
-    assert(cb);
-
-    if ((on_source_error == BLOCKDEV_ON_ERROR_STOP ||
-         on_source_error == BLOCKDEV_ON_ERROR_ENOSPC) &&
-        !bdrv_iostatus_is_enabled(bs)) {
-        error_set(errp, QERR_INVALID_PARAMETER, "on-source-error");
-        return;
-    }
-
-    len = bdrv_getlength(bs);
-    if (len < 0) {
-        error_setg_errno(errp, -len, "unable to get length for '%s'",
-                         bdrv_get_device_name(bs));
-        return;
-    }
-
-    BackupBlockJob *job = block_job_create(&backup_job_type, bs, speed,
-                                           cb, opaque, errp);
-    if (!job) {
-        return;
-    }
-
-    job->on_source_error = on_source_error;
-    job->on_target_error = on_target_error;
-    job->target = target;
-    job->sync_mode = sync_mode;
-    job->common.len = len;
-    job->common.co = qemu_coroutine_create(backup_run);
-    qemu_coroutine_enter(job->common.co, job);
-}
--- a/block/blkdebug.c
+++ b/block/blkdebug.c
@@ -182,9 +182,6 @@ static const char *event_names[BLKDBG_EVENT_MAX] = {
    [BLKDBG_CLUSTER_ALLOC]                  = "cluster_alloc",
    [BLKDBG_CLUSTER_ALLOC_BYTES]            = "cluster_alloc_bytes",
    [BLKDBG_CLUSTER_FREE]                   = "cluster_free",
-
-    [BLKDBG_FLUSH_TO_OS]                    = "flush_to_os",
-    [BLKDBG_FLUSH_TO_DISK]                  = "flush_to_disk",
 };

 static int get_event_by_name(const char *name, BlkDebugEvent *event)
--- a/block/commit.c
+++ b/block/commit.c
@@ -173,7 +173,7 @@ static void commit_set_speed(BlockJob *job, int64_t speed, Error **errp)
    ratelimit_set_speed(&s->limit, speed / BDRV_SECTOR_SIZE, SLICE_TIME);
 }

-static const BlockJobType commit_job_type = {
+static BlockJobType commit_job_type = {
    .instance_size = sizeof(CommitBlockJob),
    .job_type      = "commit",
    .set_speed     = commit_set_speed,
--- a/block/cow.c
+++ b/block/cow.c
@@ -340,7 +340,6 @@ static BlockDriver bdrv_cow = {
    .bdrv_open      = cow_open,
    .bdrv_close     = cow_close,
    .bdrv_create    = cow_create,
-    .bdrv_has_zero_init     = bdrv_has_zero_init_1,

    .bdrv_read              = cow_co_read,
    .bdrv_write             = cow_co_write,
--- a/block/curl.c
+++ b/block/curl.c
@@ -81,11 +81,11 @@ typedef struct BDRVCURLState {
    CURLState states[CURL_NUM_STATES];
    char *url;
    size_t readahead_size;
-    bool accept_range;
 } BDRVCURLState;

 static void curl_clean_state(CURLState *s);
 static void curl_multi_do(void *arg);
+static int curl_aio_flush(void *opaque);

 static int curl_sock_cb(CURL *curl, curl_socket_t fd, int action,
                        void *s, void *sp)
@@ -93,31 +93,31 @@ static int curl_sock_cb(CURL *curl, curl_socket_t fd, int action,
    DPRINTF("CURL (AIO): Sock action %d on fd %d\n", action, fd);
    switch (action) {
        case CURL_POLL_IN:
-            qemu_aio_set_fd_handler(fd, curl_multi_do, NULL, s);
+            qemu_aio_set_fd_handler(fd, curl_multi_do, NULL, curl_aio_flush, s);
            break;
        case CURL_POLL_OUT:
-            qemu_aio_set_fd_handler(fd, NULL, curl_multi_do, s);
+            qemu_aio_set_fd_handler(fd, NULL, curl_multi_do, curl_aio_flush, s);
            break;
        case CURL_POLL_INOUT:
-            qemu_aio_set_fd_handler(fd, curl_multi_do, curl_multi_do, s);
+            qemu_aio_set_fd_handler(fd, curl_multi_do, curl_multi_do,
+                                    curl_aio_flush, s);
            break;
        case CURL_POLL_REMOVE:
-            qemu_aio_set_fd_handler(fd, NULL, NULL, NULL);
+            qemu_aio_set_fd_handler(fd, NULL, NULL, NULL, NULL);
            break;
    }

    return 0;
 }

-static size_t curl_header_cb(void *ptr, size_t size, size_t nmemb, void *opaque)
+static size_t curl_size_cb(void *ptr, size_t size, size_t nmemb, void *opaque)
 {
-    BDRVCURLState *s = opaque;
+    CURLState *s = ((CURLState*)opaque);
    size_t realsize = size * nmemb;
-    const char *accept_line = "Accept-Ranges: bytes";
+    size_t fsize;

-    if (realsize >= strlen(accept_line)
-        && strncmp((char *)ptr, accept_line, strlen(accept_line)) == 0) {
-        s->accept_range = true;
+    if(sscanf(ptr, "Content-Length: %zd", &fsize) == 1) {
+        s->s->len = fsize;
    }

    return realsize;
@@ -406,12 +406,6 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags)

    static int inited = 0;

-    if (flags & BDRV_O_RDWR) {
-        qerror_report(ERROR_CLASS_GENERIC_ERROR,
-                      "curl block device does not support writes");
-        return -EROFS;
-    }
-
    opts = qemu_opts_create_nofail(&runtime_opts);
    qemu_opts_absorb_qdict(opts, options, &local_err);
    if (error_is_set(&local_err)) {
@@ -447,25 +441,17 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags)

    // Get file size

-    s->accept_range = false;
    curl_easy_setopt(state->curl, CURLOPT_NOBODY, 1);
-    curl_easy_setopt(state->curl, CURLOPT_HEADERFUNCTION,
-                     curl_header_cb);
-    curl_easy_setopt(state->curl, CURLOPT_HEADERDATA, s);
+    curl_easy_setopt(state->curl, CURLOPT_WRITEFUNCTION, (void *)curl_size_cb);
    if (curl_easy_perform(state->curl))
        goto out;
    curl_easy_getinfo(state->curl, CURLINFO_CONTENT_LENGTH_DOWNLOAD, &d);
+    curl_easy_setopt(state->curl, CURLOPT_WRITEFUNCTION, (void *)curl_read_cb);
+    curl_easy_setopt(state->curl, CURLOPT_NOBODY, 0);
    if (d)
        s->len = (size_t)d;
    else if(!s->len)
        goto out;
-    if ((!strncasecmp(s->url, "http://", strlen("http://"))
-        || !strncasecmp(s->url, "https://", strlen("https://")))
-        && !s->accept_range) {
-        pstrcpy(state->errmsg, CURL_ERROR_SIZE,
-                "Server does not support 'range' (byte ranges).");
-        goto out;
-    }
    DPRINTF("CURL: Size = %zd\n", s->len);

    curl_clean_state(state);
@@ -476,8 +462,8 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags)
    // initialize the multi interface!

    s->multi = curl_multi_init();
-    curl_multi_setopt(s->multi, CURLMOPT_SOCKETDATA, s);
-    curl_multi_setopt(s->multi, CURLMOPT_SOCKETFUNCTION, curl_sock_cb);
+    curl_multi_setopt( s->multi, CURLMOPT_SOCKETDATA, s); 
+    curl_multi_setopt( s->multi, CURLMOPT_SOCKETFUNCTION, curl_sock_cb ); 
    curl_multi_do(s);

    qemu_opts_del(opts);
@@ -493,6 +479,21 @@ out_noclean:
    return -EINVAL;
 }

+static int curl_aio_flush(void *opaque)
+{
+    BDRVCURLState *s = opaque;
+    int i, j;
+
+    for (i=0; i < CURL_NUM_STATES; i++) {
+        for(j=0; j < CURL_NUM_ACB; j++) {
+            if (s->states[i].acb[j]) {
+                return 1;
+            }
+        }
+    }
+    return 0;
+}
+
 static void curl_aio_cancel(BlockDriverAIOCB *blockacb)
 {
    // Do we have to implement canceling? Seems to work without...
--- a/block/gluster.c
+++ b/block/gluster.c
@@ -32,6 +32,7 @@ typedef struct BDRVGlusterState {
    struct glfs *glfs;
    int fds[2];
    struct glfs_fd *fd;
+    int qemu_aio_count;
    int event_reader_pos;
    GlusterAIOCB *event_acb;
 } BDRVGlusterState;
@@ -246,6 +247,7 @@ static void qemu_gluster_complete_aio(GlusterAIOCB *acb, BDRVGlusterState *s)
        ret = -EIO; /* Partial read/write - fail it */
    }

+    s->qemu_aio_count--;
    qemu_aio_release(acb);
    cb(opaque, ret);
    if (finished) {
@@ -273,6 +275,13 @@ static void qemu_gluster_aio_event_reader(void *opaque)
    } while (ret < 0 && errno == EINTR);
 }

+static int qemu_gluster_aio_flush_cb(void *opaque)
+{
+    BDRVGlusterState *s = opaque;
+
+    return (s->qemu_aio_count > 0);
+}
+
 /* TODO Convert to fine grained options */
 static QemuOptsList runtime_opts = {
    .name = "gluster",
@@ -339,7 +348,7 @@ static int qemu_gluster_open(BlockDriverState *bs,  QDict *options,
    }
    fcntl(s->fds[GLUSTER_FD_READ], F_SETFL, O_NONBLOCK);
    qemu_aio_set_fd_handler(s->fds[GLUSTER_FD_READ],
-        qemu_gluster_aio_event_reader, NULL, s);
+        qemu_gluster_aio_event_reader, NULL, qemu_gluster_aio_flush_cb, s);

 out:
    qemu_opts_del(opts);
@@ -436,9 +445,11 @@ static void gluster_finish_aiocb(struct glfs_fd *fd, ssize_t ret, void *arg)
        qemu_mutex_lock_iothread(); /* We are in gluster thread context */
        acb->common.cb(acb->common.opaque, -EIO);
        qemu_aio_release(acb);
+        s->qemu_aio_count--;
        close(s->fds[GLUSTER_FD_READ]);
        close(s->fds[GLUSTER_FD_WRITE]);
-        qemu_aio_set_fd_handler(s->fds[GLUSTER_FD_READ], NULL, NULL, NULL);
+        qemu_aio_set_fd_handler(s->fds[GLUSTER_FD_READ], NULL, NULL, NULL,
+            NULL);
        bs->drv = NULL; /* Make the disk inaccessible */
        qemu_mutex_unlock_iothread();
    }
@@ -456,6 +467,7 @@ static BlockDriverAIOCB *qemu_gluster_aio_rw(BlockDriverState *bs,

    offset = sector_num * BDRV_SECTOR_SIZE;
    size = nb_sectors * BDRV_SECTOR_SIZE;
+    s->qemu_aio_count++;

    acb = qemu_aio_get(&gluster_aiocb_info, bs, cb, opaque);
    acb->size = size;
@@ -476,23 +488,11 @@ static BlockDriverAIOCB *qemu_gluster_aio_rw(BlockDriverState *bs,
    return &acb->common;

 out:
+    s->qemu_aio_count--;
    qemu_aio_release(acb);
    return NULL;
 }

-static int qemu_gluster_truncate(BlockDriverState *bs, int64_t offset)
-{
-    int ret;
-    BDRVGlusterState *s = bs->opaque;
-
-    ret = glfs_ftruncate(s->fd, offset);
-    if (ret < 0) {
-        return -errno;
-    }
-
-    return 0;
-}
-
 static BlockDriverAIOCB *qemu_gluster_aio_readv(BlockDriverState *bs,
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
        BlockDriverCompletionFunc *cb, void *opaque)
@@ -518,6 +518,7 @@ static BlockDriverAIOCB *qemu_gluster_aio_flush(BlockDriverState *bs,
    acb->size = 0;
    acb->ret = 0;
    acb->finished = NULL;
+    s->qemu_aio_count++;

    ret = glfs_fsync_async(s->fd, &gluster_finish_aiocb, acb);
    if (ret < 0) {
@@ -526,41 +527,11 @@ static BlockDriverAIOCB *qemu_gluster_aio_flush(BlockDriverState *bs,
    return &acb->common;

 out:
+    s->qemu_aio_count--;
    qemu_aio_release(acb);
    return NULL;
 }

-#ifdef CONFIG_GLUSTERFS_DISCARD
-static BlockDriverAIOCB *qemu_gluster_aio_discard(BlockDriverState *bs,
-        int64_t sector_num, int nb_sectors, BlockDriverCompletionFunc *cb,
-        void *opaque)
-{
-    int ret;
-    GlusterAIOCB *acb;
-    BDRVGlusterState *s = bs->opaque;
-    size_t size;
-    off_t offset;
-
-    offset = sector_num * BDRV_SECTOR_SIZE;
-    size = nb_sectors * BDRV_SECTOR_SIZE;
-
-    acb = qemu_aio_get(&gluster_aiocb_info, bs, cb, opaque);
-    acb->size = 0;
-    acb->ret = 0;
-    acb->finished = NULL;
-
-    ret = glfs_discard_async(s->fd, offset, size, &gluster_finish_aiocb, acb);
-    if (ret < 0) {
-        goto out;
-    }
-    return &acb->common;
-
-out:
-    qemu_aio_release(acb);
-    return NULL;
-}
-#endif
-
 static int64_t qemu_gluster_getlength(BlockDriverState *bs)
 {
    BDRVGlusterState *s = bs->opaque;
@@ -594,7 +565,7 @@ static void qemu_gluster_close(BlockDriverState *bs)

    close(s->fds[GLUSTER_FD_READ]);
    close(s->fds[GLUSTER_FD_WRITE]);
-    qemu_aio_set_fd_handler(s->fds[GLUSTER_FD_READ], NULL, NULL, NULL);
+    qemu_aio_set_fd_handler(s->fds[GLUSTER_FD_READ], NULL, NULL, NULL, NULL);

    if (s->fd) {
        glfs_close(s->fd);
@@ -603,12 +574,6 @@ static void qemu_gluster_close(BlockDriverState *bs)
    glfs_fini(s->glfs);
 }

-static int qemu_gluster_has_zero_init(BlockDriverState *bs)
-{
-    /* GlusterFS volume could be backed by a block device */
-    return 0;
-}
-
 static QEMUOptionParameter qemu_gluster_create_options[] = {
    {
        .name = BLOCK_OPT_SIZE,
@@ -627,14 +592,9 @@ static BlockDriver bdrv_gluster = {
    .bdrv_create                  = qemu_gluster_create,
    .bdrv_getlength               = qemu_gluster_getlength,
    .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size,
-    .bdrv_truncate                = qemu_gluster_truncate,
    .bdrv_aio_readv               = qemu_gluster_aio_readv,
    .bdrv_aio_writev              = qemu_gluster_aio_writev,
    .bdrv_aio_flush               = qemu_gluster_aio_flush,
-    .bdrv_has_zero_init           = qemu_gluster_has_zero_init,
-#ifdef CONFIG_GLUSTERFS_DISCARD
-    .bdrv_aio_discard             = qemu_gluster_aio_discard,
-#endif
    .create_options               = qemu_gluster_create_options,
 };

@@ -647,14 +607,9 @@ static BlockDriver bdrv_gluster_tcp = {
    .bdrv_create                  = qemu_gluster_create,
    .bdrv_getlength               = qemu_gluster_getlength,
    .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size,
-    .bdrv_truncate                = qemu_gluster_truncate,
    .bdrv_aio_readv               = qemu_gluster_aio_readv,
    .bdrv_aio_writev              = qemu_gluster_aio_writev,
    .bdrv_aio_flush               = qemu_gluster_aio_flush,
-    .bdrv_has_zero_init           = qemu_gluster_has_zero_init,
-#ifdef CONFIG_GLUSTERFS_DISCARD
-    .bdrv_aio_discard             = qemu_gluster_aio_discard,
-#endif
    .create_options               = qemu_gluster_create_options,
 };

@@ -667,14 +622,9 @@ static BlockDriver bdrv_gluster_unix = {
    .bdrv_create                  = qemu_gluster_create,
    .bdrv_getlength               = qemu_gluster_getlength,
    .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size,
-    .bdrv_truncate                = qemu_gluster_truncate,
    .bdrv_aio_readv               = qemu_gluster_aio_readv,
    .bdrv_aio_writev              = qemu_gluster_aio_writev,
    .bdrv_aio_flush               = qemu_gluster_aio_flush,
-    .bdrv_has_zero_init           = qemu_gluster_has_zero_init,
-#ifdef CONFIG_GLUSTERFS_DISCARD
-    .bdrv_aio_discard             = qemu_gluster_aio_discard,
-#endif
    .create_options               = qemu_gluster_create_options,
 };

@@ -687,14 +637,9 @@ static BlockDriver bdrv_gluster_rdma = {
    .bdrv_create                  = qemu_gluster_create,
    .bdrv_getlength               = qemu_gluster_getlength,
    .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size,
-    .bdrv_truncate                = qemu_gluster_truncate,
    .bdrv_aio_readv               = qemu_gluster_aio_readv,
    .bdrv_aio_writev              = qemu_gluster_aio_writev,
    .bdrv_aio_flush               = qemu_gluster_aio_flush,
-    .bdrv_has_zero_init           = qemu_gluster_has_zero_init,
-#ifdef CONFIG_GLUSTERFS_DISCARD
-    .bdrv_aio_discard             = qemu_gluster_aio_discard,
-#endif
    .create_options               = qemu_gluster_create_options,
 };

--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -32,7 +32,6 @@
 #include "block/block_int.h"
 #include "trace.h"
 #include "block/scsi.h"
-#include "qemu/iov.h"

 #include <iscsi/iscsi.h>
 #include <iscsi/scsi-lowlevel.h>
@@ -62,6 +61,8 @@ typedef struct IscsiAIOCB {
    int status;
    int canceled;
    int retries;
+    size_t read_size;
+    size_t read_offset;
    int64_t sector_num;
    int nb_sectors;
 #ifdef __linux__
@@ -146,6 +147,13 @@ static const AIOCBInfo iscsi_aiocb_info = {
 static void iscsi_process_read(void *arg);
 static void iscsi_process_write(void *arg);

+static int iscsi_process_flush(void *arg)
+{
+    IscsiLun *iscsilun = arg;
+
+    return iscsi_queue_length(iscsilun->iscsi) > 0;
+}
+
 static void
 iscsi_set_events(IscsiLun *iscsilun)
 {
@@ -159,6 +167,7 @@ iscsi_set_events(IscsiLun *iscsilun)
        qemu_aio_set_fd_handler(iscsi_get_fd(iscsi),
                      iscsi_process_read,
                      (ev & POLLOUT) ? iscsi_process_write : NULL,
+                      iscsi_process_flush,
                      iscsilun);

    }
@@ -224,30 +233,11 @@ iscsi_aio_write16_cb(struct iscsi_context *iscsi, int status,
    iscsi_schedule_bh(acb);
 }

-static int64_t sector_lun2qemu(int64_t sector, IscsiLun *iscsilun)
-{
-    return sector * iscsilun->block_size / BDRV_SECTOR_SIZE;
-}
-
 static int64_t sector_qemu2lun(int64_t sector, IscsiLun *iscsilun)
 {
    return sector * BDRV_SECTOR_SIZE / iscsilun->block_size;
 }

-static bool is_request_lun_aligned(int64_t sector_num, int nb_sectors,
-                                      IscsiLun *iscsilun)
-{
-    if ((sector_num * BDRV_SECTOR_SIZE) % iscsilun->block_size ||
-        (nb_sectors * BDRV_SECTOR_SIZE) % iscsilun->block_size) {
-            error_report("iSCSI misaligned request: "
-                         "iscsilun->block_size %u, sector_num %" PRIi64
-                         ", nb_sectors %d",
-                         iscsilun->block_size, sector_num, nb_sectors);
-            return 0;
-    }
-    return 1;
-}
-
 static int
 iscsi_aio_writev_acb(IscsiAIOCB *acb)
 {
@@ -295,7 +285,7 @@ iscsi_aio_writev_acb(IscsiAIOCB *acb)
    lba = sector_qemu2lun(acb->sector_num, acb->iscsilun);
    *(uint32_t *)&acb->task->cdb[2]  = htonl(lba >> 32);
    *(uint32_t *)&acb->task->cdb[6]  = htonl(lba & 0xffffffff);
-    num_sectors = sector_qemu2lun(acb->nb_sectors, acb->iscsilun);
+    num_sectors = size / acb->iscsilun->block_size;
    *(uint32_t *)&acb->task->cdb[10] = htonl(num_sectors);
    acb->task->expxferlen = size;

@@ -332,10 +322,6 @@ iscsi_aio_writev(BlockDriverState *bs, int64_t sector_num,
    IscsiLun *iscsilun = bs->opaque;
    IscsiAIOCB *acb;

-    if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
-        return NULL;
-    }
-
    acb = qemu_aio_get(&iscsi_aiocb_info, bs, cb, opaque);
    trace_iscsi_aio_writev(iscsilun->iscsi, sector_num, nb_sectors, opaque, acb);

@@ -393,7 +379,6 @@ static int
 iscsi_aio_readv_acb(IscsiAIOCB *acb)
 {
    struct iscsi_context *iscsi = acb->iscsilun->iscsi;
-    size_t size;
    uint64_t lba;
    uint32_t num_sectors;
    int ret;
@@ -406,7 +391,20 @@ iscsi_aio_readv_acb(IscsiAIOCB *acb)
    acb->status      = -EINPROGRESS;
    acb->buf         = NULL;

-    size = acb->nb_sectors * BDRV_SECTOR_SIZE;
+    /* If LUN blocksize is bigger than BDRV_BLOCK_SIZE a read from QEMU
+     * may be misaligned to the LUN, so we may need to read some extra
+     * data.
+     */
+    acb->read_offset = 0;
+    if (acb->iscsilun->block_size > BDRV_SECTOR_SIZE) {
+        uint64_t bdrv_offset = BDRV_SECTOR_SIZE * acb->sector_num;
+
+        acb->read_offset  = bdrv_offset % acb->iscsilun->block_size;
+    }
+
+    num_sectors  = (acb->read_size + acb->iscsilun->block_size
+                    + acb->read_offset - 1)
+                    / acb->iscsilun->block_size;

    acb->task = malloc(sizeof(struct scsi_task));
    if (acb->task == NULL) {
@@ -417,9 +415,8 @@ iscsi_aio_readv_acb(IscsiAIOCB *acb)
    memset(acb->task, 0, sizeof(struct scsi_task));

    acb->task->xfer_dir = SCSI_XFER_READ;
-    acb->task->expxferlen = size;
    lba = sector_qemu2lun(acb->sector_num, acb->iscsilun);
-    num_sectors = sector_qemu2lun(acb->nb_sectors, acb->iscsilun);
+    acb->task->expxferlen = acb->read_size;

    switch (acb->iscsilun->type) {
    case TYPE_DISK:
@@ -467,10 +464,6 @@ iscsi_aio_readv(BlockDriverState *bs, int64_t sector_num,
    IscsiLun *iscsilun = bs->opaque;
    IscsiAIOCB *acb;

-    if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
-        return NULL;
-    }
-
    acb = qemu_aio_get(&iscsi_aiocb_info, bs, cb, opaque);
    trace_iscsi_aio_readv(iscsilun->iscsi, sector_num, nb_sectors, opaque, acb);

@@ -478,6 +471,7 @@ iscsi_aio_readv(BlockDriverState *bs, int64_t sector_num,
    acb->sector_num  = sector_num;
    acb->iscsilun    = iscsilun;
    acb->qiov        = qiov;
+    acb->read_size   = BDRV_SECTOR_SIZE * (size_t)acb->nb_sectors;
    acb->retries     = ISCSI_CMD_RETRIES;

    if (iscsi_aio_readv_acb(acb) != 0) {
@@ -657,9 +651,6 @@ iscsi_aio_ioctl_cb(struct iscsi_context *iscsi, int status,
 {
    IscsiAIOCB *acb = opaque;

-    g_free(acb->buf);
-    acb->buf = NULL;
-
    if (acb->canceled != 0) {
        return;
    }
@@ -736,30 +727,14 @@ static BlockDriverAIOCB *iscsi_aio_ioctl(BlockDriverState *bs,
    memcpy(&acb->task->cdb[0], acb->ioh->cmdp, acb->ioh->cmd_len);
    acb->task->expxferlen = acb->ioh->dxfer_len;

-    data.size = 0;
    if (acb->task->xfer_dir == SCSI_XFER_WRITE) {
-        if (acb->ioh->iovec_count == 0) {
-            data.data = acb->ioh->dxferp;
-            data.size = acb->ioh->dxfer_len;
-        } else {
-#if defined(LIBISCSI_FEATURE_IOVECTOR)
-            scsi_task_set_iov_out(acb->task,
-                                 (struct scsi_iovec *) acb->ioh->dxferp,
-                                 acb->ioh->iovec_count);
-#else
-            struct iovec *iov = (struct iovec *)acb->ioh->dxferp;
-
-            acb->buf = g_malloc(acb->ioh->dxfer_len);
-            data.data = acb->buf;
-            data.size = iov_to_buf(iov, acb->ioh->iovec_count, 0,
-                                   acb->buf, acb->ioh->dxfer_len);
-#endif
-        }
+        data.data = acb->ioh->dxferp;
+        data.size = acb->ioh->dxfer_len;
    }
-
    if (iscsi_scsi_command_async(iscsi, iscsilun->lun, acb->task,
                                 iscsi_aio_ioctl_cb,
-                                 (data.size > 0) ? &data : NULL,
+                                 (acb->task->xfer_dir == SCSI_XFER_WRITE) ?
+                                     &data : NULL,
                                 acb) != 0) {
        scsi_free_scsi_task(acb->task);
        qemu_aio_release(acb);
@@ -768,26 +743,9 @@ static BlockDriverAIOCB *iscsi_aio_ioctl(BlockDriverState *bs,

    /* tell libiscsi to read straight into the buffer we got from ioctl */
    if (acb->task->xfer_dir == SCSI_XFER_READ) {
-        if (acb->ioh->iovec_count == 0) {
-            scsi_task_add_data_in_buffer(acb->task,
-                                         acb->ioh->dxfer_len,
-                                         acb->ioh->dxferp);
-        } else {
-#if defined(LIBISCSI_FEATURE_IOVECTOR)
-            scsi_task_set_iov_in(acb->task,
-                                 (struct scsi_iovec *) acb->ioh->dxferp,
-                                 acb->ioh->iovec_count);
-#else
-            int i;
-            for (i = 0; i < acb->ioh->iovec_count; i++) {
-                struct iovec *iov = (struct iovec *)acb->ioh->dxferp;
-
-                scsi_task_add_data_in_buffer(acb->task,
-                    iov[i].iov_len,
-                    iov[i].iov_base);
-            }
-#endif
-        }
+        scsi_task_add_data_in_buffer(acb->task,
+                                     acb->ioh->dxfer_len,
+                                     acb->ioh->dxferp);
    }

    iscsi_set_events(iscsilun);
@@ -1160,7 +1118,8 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags)
    if ((ret = iscsi_readcapacity_sync(iscsilun)) != 0) {
        goto out;
    }
-    bs->total_sectors = sector_lun2qemu(iscsilun->num_blocks, iscsilun);
+    bs->total_sectors    = iscsilun->num_blocks *
+                           iscsilun->block_size / BDRV_SECTOR_SIZE ;

    /* Medium changer or tape. We dont have any emulation for this so this must
     * be sg ioctl compatible. We force it to be sg, otherwise qemu will try
@@ -1207,7 +1166,7 @@ static void iscsi_close(BlockDriverState *bs)
        qemu_del_timer(iscsilun->nop_timer);
        qemu_free_timer(iscsilun->nop_timer);
    }
-    qemu_aio_set_fd_handler(iscsi_get_fd(iscsi), NULL, NULL, NULL);
+    qemu_aio_set_fd_handler(iscsi_get_fd(iscsi), NULL, NULL, NULL, NULL);
    iscsi_destroy_context(iscsi);
    memset(iscsilun, 0, sizeof(IscsiLun));
 }
@@ -1276,7 +1235,6 @@ static int iscsi_create(const char *filename, QEMUOptionParameter *options)
    }
    if (bs.total_sectors < total_size) {
        ret = -ENOSPC;
-        goto out;
    }

    ret = 0;
--- a/block/linux-aio.c
+++ b/block/linux-aio.c
@@ -39,6 +39,7 @@ struct qemu_laiocb {
 struct qemu_laio_state {
    io_context_t ctx;
    EventNotifier e;
+    int count;
 };

 static inline ssize_t io_event_ret(struct io_event *ev)
@@ -54,6 +55,8 @@ static void qemu_laio_process_completion(struct qemu_laio_state *s,
 {
    int ret;

+    s->count--;
+
    ret = laiocb->ret;
    if (ret != -ECANCELED) {
        if (ret == laiocb->nbytes) {
@@ -98,6 +101,13 @@ static void qemu_laio_completion_cb(EventNotifier *e)
    }
 }

+static int qemu_laio_flush_cb(EventNotifier *e)
+{
+    struct qemu_laio_state *s = container_of(e, struct qemu_laio_state, e);
+
+    return (s->count > 0) ? 1 : 0;
+}
+
 static void laio_cancel(BlockDriverAIOCB *blockacb)
 {
    struct qemu_laiocb *laiocb = (struct qemu_laiocb *)blockacb;
@@ -167,11 +177,14 @@ BlockDriverAIOCB *laio_submit(BlockDriverState *bs, void *aio_ctx, int fd,
        goto out_free_aiocb;
    }
    io_set_eventfd(&laiocb->iocb, event_notifier_get_fd(&s->e));
+    s->count++;

    if (io_submit(s->ctx, 1, &iocbs) < 0)
-        goto out_free_aiocb;
+        goto out_dec_count;
    return &laiocb->common;

+out_dec_count:
+    s->count--;
 out_free_aiocb:
    qemu_aio_release(laiocb);
    return NULL;
@@ -190,7 +203,8 @@ void *laio_init(void)
        goto out_close_efd;
    }

-    qemu_aio_set_event_notifier(&s->e, qemu_laio_completion_cb);
+    qemu_aio_set_event_notifier(&s->e, qemu_laio_completion_cb,
+                                qemu_laio_flush_cb);

    return s;

--- a/block/mirror.c
+++ b/block/mirror.c
@@ -512,7 +512,7 @@ static void mirror_complete(BlockJob *job, Error **errp)
        char backing_filename[PATH_MAX];
        bdrv_get_full_backing_filename(s->target, backing_filename,
                                       sizeof(backing_filename));
-        error_setg_file_open(errp, -ret, backing_filename);
+        error_set(errp, QERR_OPEN_FILE_FAILED, backing_filename);
        return;
    }
    if (!s->synced) {
@@ -524,7 +524,7 @@ static void mirror_complete(BlockJob *job, Error **errp)
    block_job_resume(job);
 }

-static const BlockJobType mirror_job_type = {
+static BlockJobType mirror_job_type = {
    .instance_size = sizeof(MirrorBlockJob),
    .job_type      = "mirror",
    .set_speed     = mirror_set_speed,
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -279,6 +279,13 @@ static void nbd_coroutine_start(BDRVNBDState *s, struct nbd_request *request)
    request->handle = INDEX_TO_HANDLE(s, i);
 }

+static int nbd_have_request(void *opaque)
+{
+    BDRVNBDState *s = opaque;
+
+    return s->in_flight > 0;
+}
+
 static void nbd_reply_ready(void *opaque)
 {
    BDRVNBDState *s = opaque;
@@ -334,7 +341,8 @@ static int nbd_co_send_request(BDRVNBDState *s, struct nbd_request *request,

    qemu_co_mutex_lock(&s->send_mutex);
    s->send_coroutine = qemu_coroutine_self();
-    qemu_aio_set_fd_handler(s->sock, nbd_reply_ready, nbd_restart_write, s);
+    qemu_aio_set_fd_handler(s->sock, nbd_reply_ready, nbd_restart_write,
+                            nbd_have_request, s);
    if (qiov) {
        if (!s->is_unix) {
            socket_set_cork(s->sock, 1);
@@ -353,7 +361,8 @@ static int nbd_co_send_request(BDRVNBDState *s, struct nbd_request *request,
    } else {
        rc = nbd_send_request(s->sock, request);
    }
-    qemu_aio_set_fd_handler(s->sock, nbd_reply_ready, NULL, s);
+    qemu_aio_set_fd_handler(s->sock, nbd_reply_ready, NULL,
+                            nbd_have_request, s);
    s->send_coroutine = NULL;
    qemu_co_mutex_unlock(&s->send_mutex);
    return rc;
@@ -429,7 +438,8 @@ static int nbd_establish_connection(BlockDriverState *bs)
    /* Now that we're connected, set the socket to be non-blocking and
     * kick the reply mechanism.  */
    qemu_set_nonblock(sock);
-    qemu_aio_set_fd_handler(sock, nbd_reply_ready, NULL, s);
+    qemu_aio_set_fd_handler(sock, nbd_reply_ready, NULL,
+                            nbd_have_request, s);

    s->sock = sock;
    s->size = size;
@@ -449,7 +459,7 @@ static void nbd_teardown_connection(BlockDriverState *bs)
    request.len = 0;
    nbd_send_request(s->sock, &request);

-    qemu_aio_set_fd_handler(s->sock, NULL, NULL, NULL);
+    qemu_aio_set_fd_handler(s->sock, NULL, NULL, NULL, NULL);
    closesocket(s->sock);
 }

--- a/block/qapi.c
+++ b/block/qapi.c
@@ -1,470 +0,0 @@
-/*
- * Block layer qmp and info dump related functions
- *
- * Copyright (c) 2003-2008 Fabrice Bellard
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-#include "block/qapi.h"
-#include "block/block_int.h"
-#include "qmp-commands.h"
-
-/*
- * Returns 0 on success, with *p_list either set to describe snapshot
- * information, or NULL because there are no snapshots.  Returns -errno on
- * error, with *p_list untouched.
- */
-int bdrv_query_snapshot_info_list(BlockDriverState *bs,
-                                  SnapshotInfoList **p_list,
-                                  Error **errp)
-{
-    int i, sn_count;
-    QEMUSnapshotInfo *sn_tab = NULL;
-    SnapshotInfoList *info_list, *cur_item = NULL, *head = NULL;
-    SnapshotInfo *info;
-
-    sn_count = bdrv_snapshot_list(bs, &sn_tab);
-    if (sn_count < 0) {
-        const char *dev = bdrv_get_device_name(bs);
-        switch (sn_count) {
-        case -ENOMEDIUM:
-            error_setg(errp, "Device '%s' is not inserted", dev);
-            break;
-        case -ENOTSUP:
-            error_setg(errp,
-                       "Device '%s' does not support internal snapshots",
-                       dev);
-            break;
-        default:
-            error_setg_errno(errp, -sn_count,
-                             "Can't list snapshots of device '%s'", dev);
-            break;
-        }
-        return sn_count;
-    }
-
-    for (i = 0; i < sn_count; i++) {
-        info = g_new0(SnapshotInfo, 1);
-        info->id            = g_strdup(sn_tab[i].id_str);
-        info->name          = g_strdup(sn_tab[i].name);
-        info->vm_state_size = sn_tab[i].vm_state_size;
-        info->date_sec      = sn_tab[i].date_sec;
-        info->date_nsec     = sn_tab[i].date_nsec;
-        info->vm_clock_sec  = sn_tab[i].vm_clock_nsec / 1000000000;
-        info->vm_clock_nsec = sn_tab[i].vm_clock_nsec % 1000000000;
-
-        info_list = g_new0(SnapshotInfoList, 1);
-        info_list->value = info;
-
-        /* XXX: waiting for the qapi to support qemu-queue.h types */
-        if (!cur_item) {
-            head = cur_item = info_list;
-        } else {
-            cur_item->next = info_list;
-            cur_item = info_list;
-        }
-
-    }
-
-    g_free(sn_tab);
-    *p_list = head;
-    return 0;
-}
-
-/**
- * bdrv_query_image_info:
- * @bs: block device to examine
- * @p_info: location to store image information
- * @errp: location to store error information
- *
- * Store "flat" image information in @p_info.
- *
- * "Flat" means it does *not* query backing image information,
- * i.e. (*pinfo)->has_backing_image will be set to false and
- * (*pinfo)->backing_image to NULL even when the image does in fact have
- * a backing image.
- *
- * @p_info will be set only on success. On error, store error in @errp.
- */
-void bdrv_query_image_info(BlockDriverState *bs,
-                           ImageInfo **p_info,
-                           Error **errp)
-{
-    uint64_t total_sectors;
-    const char *backing_filename;
-    char backing_filename2[1024];
-    BlockDriverInfo bdi;
-    int ret;
-    Error *err = NULL;
-    ImageInfo *info = g_new0(ImageInfo, 1);
-
-    bdrv_get_geometry(bs, &total_sectors);
-
-    info->filename        = g_strdup(bs->filename);
-    info->format          = g_strdup(bdrv_get_format_name(bs));
-    info->virtual_size    = total_sectors * 512;
-    info->actual_size     = bdrv_get_allocated_file_size(bs);
-    info->has_actual_size = info->actual_size >= 0;
-    if (bdrv_is_encrypted(bs)) {
-        info->encrypted = true;
-        info->has_encrypted = true;
-    }
-    if (bdrv_get_info(bs, &bdi) >= 0) {
-        if (bdi.cluster_size != 0) {
-            info->cluster_size = bdi.cluster_size;
-            info->has_cluster_size = true;
-        }
-        info->dirty_flag = bdi.is_dirty;
-        info->has_dirty_flag = true;
-    }
-    backing_filename = bs->backing_file;
-    if (backing_filename[0] != '\0') {
-        info->backing_filename = g_strdup(backing_filename);
-        info->has_backing_filename = true;
-        bdrv_get_full_backing_filename(bs, backing_filename2,
-                                       sizeof(backing_filename2));
-
-        if (strcmp(backing_filename, backing_filename2) != 0) {
-            info->full_backing_filename =
-                        g_strdup(backing_filename2);
-            info->has_full_backing_filename = true;
-        }
-
-        if (bs->backing_format[0]) {
-            info->backing_filename_format = g_strdup(bs->backing_format);
-            info->has_backing_filename_format = true;
-        }
-    }
-
-    ret = bdrv_query_snapshot_info_list(bs, &info->snapshots, &err);
-    switch (ret) {
-    case 0:
-        if (info->snapshots) {
-            info->has_snapshots = true;
-        }
-        break;
-    /* recoverable error */
-    case -ENOMEDIUM:
-    case -ENOTSUP:
-        error_free(err);
-        break;
-    default:
-        error_propagate(errp, err);
-        qapi_free_ImageInfo(info);
-        return;
-    }
-
-    *p_info = info;
-}
-
-/* @p_info will be set only on success. */
-void bdrv_query_info(BlockDriverState *bs,
-                     BlockInfo **p_info,
-                     Error **errp)
-{
-    BlockInfo *info = g_malloc0(sizeof(*info));
-    BlockDriverState *bs0;
-    ImageInfo **p_image_info;
-    Error *local_err = NULL;
-    info->device = g_strdup(bs->device_name);
-    info->type = g_strdup("unknown");
-    info->locked = bdrv_dev_is_medium_locked(bs);
-    info->removable = bdrv_dev_has_removable_media(bs);
-
-    if (bdrv_dev_has_removable_media(bs)) {
-        info->has_tray_open = true;
-        info->tray_open = bdrv_dev_is_tray_open(bs);
-    }
-
-    if (bdrv_iostatus_is_enabled(bs)) {
-        info->has_io_status = true;
-        info->io_status = bs->iostatus;
-    }
-
-    if (bs->dirty_bitmap) {
-        info->has_dirty = true;
-        info->dirty = g_malloc0(sizeof(*info->dirty));
-        info->dirty->count = bdrv_get_dirty_count(bs) * BDRV_SECTOR_SIZE;
-        info->dirty->granularity =
-         ((int64_t) BDRV_SECTOR_SIZE << hbitmap_granularity(bs->dirty_bitmap));
-    }
-
-    if (bs->drv) {
-        info->has_inserted = true;
-        info->inserted = g_malloc0(sizeof(*info->inserted));
-        info->inserted->file = g_strdup(bs->filename);
-        info->inserted->ro = bs->read_only;
-        info->inserted->drv = g_strdup(bs->drv->format_name);
-        info->inserted->encrypted = bs->encrypted;
-        info->inserted->encryption_key_missing = bdrv_key_required(bs);
-
-        if (bs->backing_file[0]) {
-            info->inserted->has_backing_file = true;
-            info->inserted->backing_file = g_strdup(bs->backing_file);
-        }
-
-        info->inserted->backing_file_depth = bdrv_get_backing_file_depth(bs);
-
-        if (bs->io_limits_enabled) {
-            info->inserted->bps =
-                           bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL];
-            info->inserted->bps_rd =
-                           bs->io_limits.bps[BLOCK_IO_LIMIT_READ];
-            info->inserted->bps_wr =
-                           bs->io_limits.bps[BLOCK_IO_LIMIT_WRITE];
-            info->inserted->iops =
-                           bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL];
-            info->inserted->iops_rd =
-                           bs->io_limits.iops[BLOCK_IO_LIMIT_READ];
-            info->inserted->iops_wr =
-                           bs->io_limits.iops[BLOCK_IO_LIMIT_WRITE];
-        }
-
-        bs0 = bs;
-        p_image_info = &info->inserted->image;
-        while (1) {
-            bdrv_query_image_info(bs0, p_image_info, &local_err);
-            if (error_is_set(&local_err)) {
-                error_propagate(errp, local_err);
-                goto err;
-            }
-            if (bs0->drv && bs0->backing_hd) {
-                bs0 = bs0->backing_hd;
-                (*p_image_info)->has_backing_image = true;
-                p_image_info = &((*p_image_info)->backing_image);
-            } else {
-                break;
-            }
-        }
-    }
-
-    *p_info = info;
-    return;
-
- err:
-    qapi_free_BlockInfo(info);
-}
-
-BlockStats *bdrv_query_stats(const BlockDriverState *bs)
-{
-    BlockStats *s;
-
-    s = g_malloc0(sizeof(*s));
-
-    if (bs->device_name[0]) {
-        s->has_device = true;
-        s->device = g_strdup(bs->device_name);
-    }
-
-    s->stats = g_malloc0(sizeof(*s->stats));
-    s->stats->rd_bytes = bs->nr_bytes[BDRV_ACCT_READ];
-    s->stats->wr_bytes = bs->nr_bytes[BDRV_ACCT_WRITE];
-    s->stats->rd_operations = bs->nr_ops[BDRV_ACCT_READ];
-    s->stats->wr_operations = bs->nr_ops[BDRV_ACCT_WRITE];
-    s->stats->wr_highest_offset = bs->wr_highest_sector * BDRV_SECTOR_SIZE;
-    s->stats->flush_operations = bs->nr_ops[BDRV_ACCT_FLUSH];
-    s->stats->wr_total_time_ns = bs->total_time_ns[BDRV_ACCT_WRITE];
-    s->stats->rd_total_time_ns = bs->total_time_ns[BDRV_ACCT_READ];
-    s->stats->flush_total_time_ns = bs->total_time_ns[BDRV_ACCT_FLUSH];
-
-    if (bs->file) {
-        s->has_parent = true;
-        s->parent = bdrv_query_stats(bs->file);
-    }
-
-    return s;
-}
-
-BlockInfoList *qmp_query_block(Error **errp)
-{
-    BlockInfoList *head = NULL, **p_next = &head;
-    BlockDriverState *bs = NULL;
-    Error *local_err = NULL;
-
-     while ((bs = bdrv_next(bs))) {
-        BlockInfoList *info = g_malloc0(sizeof(*info));
-        bdrv_query_info(bs, &info->value, &local_err);
-        if (error_is_set(&local_err)) {
-            error_propagate(errp, local_err);
-            goto err;
-        }
-
-        *p_next = info;
-        p_next = &info->next;
-    }
-
-    return head;
-
- err:
-    qapi_free_BlockInfoList(head);
-    return NULL;
-}
-
-BlockStatsList *qmp_query_blockstats(Error **errp)
-{
-    BlockStatsList *head = NULL, **p_next = &head;
-    BlockDriverState *bs = NULL;
-
-     while ((bs = bdrv_next(bs))) {
-        BlockStatsList *info = g_malloc0(sizeof(*info));
-        info->value = bdrv_query_stats(bs);
-
-        *p_next = info;
-        p_next = &info->next;
-    }
-
-    return head;
-}
-
-#define NB_SUFFIXES 4
-
-static char *get_human_readable_size(char *buf, int buf_size, int64_t size)
-{
-    static const char suffixes[NB_SUFFIXES] = "KMGT";
-    int64_t base;
-    int i;
-
-    if (size <= 999) {
-        snprintf(buf, buf_size, "%" PRId64, size);
-    } else {
-        base = 1024;
-        for (i = 0; i < NB_SUFFIXES; i++) {
-            if (size < (10 * base)) {
-                snprintf(buf, buf_size, "%0.1f%c",
-                         (double)size / base,
-                         suffixes[i]);
-                break;
-            } else if (size < (1000 * base) || i == (NB_SUFFIXES - 1)) {
-                snprintf(buf, buf_size, "%" PRId64 "%c",
-                         ((size + (base >> 1)) / base),
-                         suffixes[i]);
-                break;
-            }
-            base = base * 1024;
-        }
-    }
-    return buf;
-}
-
-void bdrv_snapshot_dump(fprintf_function func_fprintf, void *f,
-                        QEMUSnapshotInfo *sn)
-{
-    char buf1[128], date_buf[128], clock_buf[128];
-    struct tm tm;
-    time_t ti;
-    int64_t secs;
-
-    if (!sn) {
-        func_fprintf(f,
-                     "%-10s%-20s%7s%20s%15s",
-                     "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
-    } else {
-        ti = sn->date_sec;
-        localtime_r(&ti, &tm);
-        strftime(date_buf, sizeof(date_buf),
-                 "%Y-%m-%d %H:%M:%S", &tm);
-        secs = sn->vm_clock_nsec / 1000000000;
-        snprintf(clock_buf, sizeof(clock_buf),
-                 "%02d:%02d:%02d.%03d",
-                 (int)(secs / 3600),
-                 (int)((secs / 60) % 60),
-                 (int)(secs % 60),
-                 (int)((sn->vm_clock_nsec / 1000000) % 1000));
-        func_fprintf(f,
-                     "%-10s%-20s%7s%20s%15s",
-                     sn->id_str, sn->name,
-                     get_human_readable_size(buf1, sizeof(buf1),
-                                             sn->vm_state_size),
-                     date_buf,
-                     clock_buf);
-    }
-}
-
-void bdrv_image_info_dump(fprintf_function func_fprintf, void *f,
-                          ImageInfo *info)
-{
-    char size_buf[128], dsize_buf[128];
-    if (!info->has_actual_size) {
-        snprintf(dsize_buf, sizeof(dsize_buf), "unavailable");
-    } else {
-        get_human_readable_size(dsize_buf, sizeof(dsize_buf),
-                                info->actual_size);
-    }
-    get_human_readable_size(size_buf, sizeof(size_buf), info->virtual_size);
-    func_fprintf(f,
-                 "image: %s\n"
-                 "file format: %s\n"
-                 "virtual size: %s (%" PRId64 " bytes)\n"
-                 "disk size: %s\n",
-                 info->filename, info->format, size_buf,
-                 info->virtual_size,
-                 dsize_buf);
-
-    if (info->has_encrypted && info->encrypted) {
-        func_fprintf(f, "encrypted: yes\n");
-    }
-
-    if (info->has_cluster_size) {
-        func_fprintf(f, "cluster_size: %" PRId64 "\n",
-                       info->cluster_size);
-    }
-
-    if (info->has_dirty_flag && info->dirty_flag) {
-        func_fprintf(f, "cleanly shut down: no\n");
-    }
-
-    if (info->has_backing_filename) {
-        func_fprintf(f, "backing file: %s", info->backing_filename);
-        if (info->has_full_backing_filename) {
-            func_fprintf(f, " (actual path: %s)", info->full_backing_filename);
-        }
-        func_fprintf(f, "\n");
-        if (info->has_backing_filename_format) {
-            func_fprintf(f, "backing file format: %s\n",
-                         info->backing_filename_format);
-        }
-    }
-
-    if (info->has_snapshots) {
-        SnapshotInfoList *elem;
-
-        func_fprintf(f, "Snapshot list:\n");
-        bdrv_snapshot_dump(func_fprintf, f, NULL);
-        func_fprintf(f, "\n");
-
-        /* Ideally bdrv_snapshot_dump() would operate on SnapshotInfoList but
-         * we convert to the block layer's native QEMUSnapshotInfo for now.
-         */
-        for (elem = info->snapshots; elem; elem = elem->next) {
-            QEMUSnapshotInfo sn = {
-                .vm_state_size = elem->value->vm_state_size,
-                .date_sec = elem->value->date_sec,
-                .date_nsec = elem->value->date_nsec,
-                .vm_clock_nsec = elem->value->vm_clock_sec * 1000000000ULL +
-                                 elem->value->vm_clock_nsec,
-            };
-
-            pstrcpy(sn.id_str, sizeof(sn.id_str), elem->value->id);
-            pstrcpy(sn.name, sizeof(sn.name), elem->value->name);
-            bdrv_snapshot_dump(func_fprintf, f, &sn);
-            func_fprintf(f, "\n");
-        }
-    }
-}
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -892,7 +892,6 @@ static BlockDriver bdrv_qcow = {
    .bdrv_close		= qcow_close,
    .bdrv_reopen_prepare = qcow_reopen_prepare,
    .bdrv_create	= qcow_create,
-    .bdrv_has_zero_init     = bdrv_has_zero_init_1,

    .bdrv_co_readv          = qcow_co_readv,
    .bdrv_co_writev         = qcow_co_writev,
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -98,16 +98,14 @@ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
        goto fail;
    }
    g_free(s->l1_table);
-    qcow2_free_clusters(bs, s->l1_table_offset, s->l1_size * sizeof(uint64_t),
-                        QCOW2_DISCARD_OTHER);
+    qcow2_free_clusters(bs, s->l1_table_offset, s->l1_size * sizeof(uint64_t));
    s->l1_table_offset = new_l1_table_offset;
    s->l1_table = new_l1_table;
    s->l1_size = new_l1_size;
    return 0;
 fail:
    g_free(new_l1_table);
-    qcow2_free_clusters(bs, new_l1_table_offset, new_l1_size2,
-                        QCOW2_DISCARD_OTHER);
+    qcow2_free_clusters(bs, new_l1_table_offset, new_l1_size2);
    return ret;
 }

@@ -550,8 +548,7 @@ static int get_cluster_table(BlockDriverState *bs, uint64_t offset,

        /* Then decrease the refcount of the old table */
        if (l2_offset) {
-            qcow2_free_clusters(bs, l2_offset, s->l2_size * sizeof(uint64_t),
-                                QCOW2_DISCARD_OTHER);
+            qcow2_free_clusters(bs, l2_offset, s->l2_size * sizeof(uint64_t));
        }
    }

@@ -718,14 +715,10 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
    /*
     * If this was a COW, we need to decrease the refcount of the old cluster.
     * Also flush bs->file to get the right order for L2 and refcount update.
-     *
-     * Don't discard clusters that reach a refcount of 0 (e.g. compressed
-     * clusters), the next write will reuse them anyway.
     */
    if (j != 0) {
        for (i = 0; i < j; i++) {
-            qcow2_free_any_clusters(bs, be64_to_cpu(old_cluster[i]), 1,
-                                    QCOW2_DISCARD_NEVER);
+            qcow2_free_any_clusters(bs, be64_to_cpu(old_cluster[i]), 1);
        }
    }

@@ -1346,7 +1339,7 @@ static int discard_single_l2(BlockDriverState *bs, uint64_t offset,
        l2_table[l2_index + i] = cpu_to_be64(0);

        /* Then decrease the refcount */
-        qcow2_free_any_clusters(bs, old_offset, 1, QCOW2_DISCARD_REQUEST);
+        qcow2_free_any_clusters(bs, old_offset, 1);
    }

    ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
@@ -1377,25 +1370,18 @@ int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset,

    nb_clusters = size_to_clusters(s, end_offset - offset);

-    s->cache_discards = true;
-
    /* Each L2 table is handled by its own loop iteration */
    while (nb_clusters > 0) {
        ret = discard_single_l2(bs, offset, nb_clusters);
        if (ret < 0) {
-            goto fail;
+            return ret;
        }

        nb_clusters -= ret;
        offset += (ret * s->cluster_size);
    }

-    ret = 0;
-fail:
-    s->cache_discards = false;
-    qcow2_process_discards(bs, ret);
-
-    return ret;
+    return 0;
 }

 /*
@@ -1429,7 +1415,7 @@ static int zero_single_l2(BlockDriverState *bs, uint64_t offset,
        qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
        if (old_offset & QCOW_OFLAG_COMPRESSED) {
            l2_table[l2_index + i] = cpu_to_be64(QCOW_OFLAG_ZERO);
-            qcow2_free_any_clusters(bs, old_offset, 1, QCOW2_DISCARD_REQUEST);
+            qcow2_free_any_clusters(bs, old_offset, 1);
        } else {
            l2_table[l2_index + i] |= cpu_to_be64(QCOW_OFLAG_ZERO);
        }
@@ -1457,22 +1443,15 @@ int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors)
    /* Each L2 table is handled by its own loop iteration */
    nb_clusters = size_to_clusters(s, nb_sectors << BDRV_SECTOR_BITS);

-    s->cache_discards = true;
-
    while (nb_clusters > 0) {
        ret = zero_single_l2(bs, offset, nb_clusters);
        if (ret < 0) {
-            goto fail;
+            return ret;
        }

        nb_clusters -= ret;
        offset += (ret * s->cluster_size);
    }

-    ret = 0;
-fail:
-    s->cache_discards = false;
-    qcow2_process_discards(bs, ret);
-
-    return ret;
+    return 0;
 }
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -29,7 +29,7 @@
 static int64_t alloc_clusters_noref(BlockDriverState *bs, int64_t size);
 static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs,
                            int64_t offset, int64_t length,
-                            int addend, enum qcow2_discard_type type);
+                            int addend);


 /*********************************************************/
@@ -235,8 +235,7 @@ static int alloc_refcount_block(BlockDriverState *bs,
    } else {
        /* Described somewhere else. This can recurse at most twice before we
         * arrive at a block that describes itself. */
-        ret = update_refcount(bs, new_block, s->cluster_size, 1,
-                              QCOW2_DISCARD_NEVER);
+        ret = update_refcount(bs, new_block, s->cluster_size, 1);
        if (ret < 0) {
            goto fail_block;
        }
@@ -400,8 +399,7 @@ static int alloc_refcount_block(BlockDriverState *bs,

    /* Free old table. Remember, we must not change free_cluster_index */
    uint64_t old_free_cluster_index = s->free_cluster_index;
-    qcow2_free_clusters(bs, old_table_offset, old_table_size * sizeof(uint64_t),
-                        QCOW2_DISCARD_OTHER);
+    qcow2_free_clusters(bs, old_table_offset, old_table_size * sizeof(uint64_t));
    s->free_cluster_index = old_free_cluster_index;

    ret = load_refcount_block(bs, new_block, (void**) refcount_block);
@@ -420,77 +418,9 @@ fail_block:
    return ret;
 }

-void qcow2_process_discards(BlockDriverState *bs, int ret)
-{
-    BDRVQcowState *s = bs->opaque;
-    Qcow2DiscardRegion *d, *next;
-
-    QTAILQ_FOREACH_SAFE(d, &s->discards, next, next) {
-        QTAILQ_REMOVE(&s->discards, d, next);
-
-        /* Discard is optional, ignore the return value */
-        if (ret >= 0) {
-            bdrv_discard(bs->file,
-                         d->offset >> BDRV_SECTOR_BITS,
-                         d->bytes >> BDRV_SECTOR_BITS);
-        }
-
-        g_free(d);
-    }
-}
-
-static void update_refcount_discard(BlockDriverState *bs,
-                                    uint64_t offset, uint64_t length)
-{
-    BDRVQcowState *s = bs->opaque;
-    Qcow2DiscardRegion *d, *p, *next;
-
-    QTAILQ_FOREACH(d, &s->discards, next) {
-        uint64_t new_start = MIN(offset, d->offset);
-        uint64_t new_end = MAX(offset + length, d->offset + d->bytes);
-
-        if (new_end - new_start <= length + d->bytes) {
-            /* There can't be any overlap, areas ending up here have no
-             * references any more and therefore shouldn't get freed another
-             * time. */
-            assert(d->bytes + length == new_end - new_start);
-            d->offset = new_start;
-            d->bytes = new_end - new_start;
-            goto found;
-        }
-    }
-
-    d = g_malloc(sizeof(*d));
-    *d = (Qcow2DiscardRegion) {
-        .bs     = bs,
-        .offset = offset,
-        .bytes  = length,
-    };
-    QTAILQ_INSERT_TAIL(&s->discards, d, next);
-
-found:
-    /* Merge discard requests if they are adjacent now */
-    QTAILQ_FOREACH_SAFE(p, &s->discards, next, next) {
-        if (p == d
-            || p->offset > d->offset + d->bytes
-            || d->offset > p->offset + p->bytes)
-        {
-            continue;
-        }
-
-        /* Still no overlap possible */
-        assert(p->offset == d->offset + d->bytes
-            || d->offset == p->offset + p->bytes);
-
-        QTAILQ_REMOVE(&s->discards, p, next);
-        d->offset = MIN(d->offset, p->offset);
-        d->bytes += p->bytes;
-    }
-}
-
 /* XXX: cache several refcount block clusters ? */
 static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs,
-    int64_t offset, int64_t length, int addend, enum qcow2_discard_type type)
+    int64_t offset, int64_t length, int addend)
 {
    BDRVQcowState *s = bs->opaque;
    int64_t start, last, cluster_offset;
@@ -556,18 +486,10 @@ static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs,
            s->free_cluster_index = cluster_index;
        }
        refcount_block[block_index] = cpu_to_be16(refcount);
-
-        if (refcount == 0 && s->discard_passthrough[type]) {
-            update_refcount_discard(bs, cluster_offset, s->cluster_size);
-        }
    }

    ret = 0;
 fail:
-    if (!s->cache_discards) {
-        qcow2_process_discards(bs, ret);
-    }
-
    /* Write last changed block to disk */
    if (refcount_block) {
        int wret;
@@ -584,8 +506,7 @@ fail:
     */
    if (ret < 0) {
        int dummy;
-        dummy = update_refcount(bs, offset, cluster_offset - offset, -addend,
-                                QCOW2_DISCARD_NEVER);
+        dummy = update_refcount(bs, offset, cluster_offset - offset, -addend);
        (void)dummy;
    }

@@ -601,14 +522,12 @@ fail:
 */
 static int update_cluster_refcount(BlockDriverState *bs,
                                   int64_t cluster_index,
-                                   int addend,
-                                   enum qcow2_discard_type type)
+                                   int addend)
 {
    BDRVQcowState *s = bs->opaque;
    int ret;

-    ret = update_refcount(bs, cluster_index << s->cluster_bits, 1, addend,
-                          type);
+    ret = update_refcount(bs, cluster_index << s->cluster_bits, 1, addend);
    if (ret < 0) {
        return ret;
    }
@@ -660,7 +579,7 @@ int64_t qcow2_alloc_clusters(BlockDriverState *bs, int64_t size)
        return offset;
    }

-    ret = update_refcount(bs, offset, size, 1, QCOW2_DISCARD_NEVER);
+    ret = update_refcount(bs, offset, size, 1);
    if (ret < 0) {
        return ret;
    }
@@ -692,8 +611,7 @@ int qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset,
    old_free_cluster_index = s->free_cluster_index;
    s->free_cluster_index = cluster_index + i;

-    ret = update_refcount(bs, offset, i << s->cluster_bits, 1,
-                          QCOW2_DISCARD_NEVER);
+    ret = update_refcount(bs, offset, i << s->cluster_bits, 1);
    if (ret < 0) {
        return ret;
    }
@@ -731,8 +649,7 @@ int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size)
        if (free_in_cluster == 0)
            s->free_byte_offset = 0;
        if ((offset & (s->cluster_size - 1)) != 0)
-            update_cluster_refcount(bs, offset >> s->cluster_bits, 1,
-                                    QCOW2_DISCARD_NEVER);
+            update_cluster_refcount(bs, offset >> s->cluster_bits, 1);
    } else {
        offset = qcow2_alloc_clusters(bs, s->cluster_size);
        if (offset < 0) {
@@ -742,8 +659,7 @@ int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size)
        if ((cluster_offset + s->cluster_size) == offset) {
            /* we are lucky: contiguous data */
            offset = s->free_byte_offset;
-            update_cluster_refcount(bs, offset >> s->cluster_bits, 1,
-                                    QCOW2_DISCARD_NEVER);
+            update_cluster_refcount(bs, offset >> s->cluster_bits, 1);
            s->free_byte_offset += size;
        } else {
            s->free_byte_offset = offset;
@@ -760,13 +676,12 @@ int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size)
 }

 void qcow2_free_clusters(BlockDriverState *bs,
-                          int64_t offset, int64_t size,
-                          enum qcow2_discard_type type)
+                          int64_t offset, int64_t size)
 {
    int ret;

    BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_FREE);
-    ret = update_refcount(bs, offset, size, -1, type);
+    ret = update_refcount(bs, offset, size, -1);
    if (ret < 0) {
        fprintf(stderr, "qcow2_free_clusters failed: %s\n", strerror(-ret));
        /* TODO Remember the clusters to free them later and avoid leaking */
@@ -777,8 +692,8 @@ void qcow2_free_clusters(BlockDriverState *bs,
 * Free a cluster using its L2 entry (handles clusters of all types, e.g.
 * normal cluster, compressed cluster, etc.)
 */
-void qcow2_free_any_clusters(BlockDriverState *bs, uint64_t l2_entry,
-                             int nb_clusters, enum qcow2_discard_type type)
+void qcow2_free_any_clusters(BlockDriverState *bs,
+    uint64_t l2_entry, int nb_clusters)
 {
    BDRVQcowState *s = bs->opaque;

@@ -790,12 +705,12 @@ void qcow2_free_any_clusters(BlockDriverState *bs, uint64_t l2_entry,
                           s->csize_mask) + 1;
            qcow2_free_clusters(bs,
                (l2_entry & s->cluster_offset_mask) & ~511,
-                nb_csectors * 512, type);
+                nb_csectors * 512);
        }
        break;
    case QCOW2_CLUSTER_NORMAL:
        qcow2_free_clusters(bs, l2_entry & L2E_OFFSET_MASK,
-                            nb_clusters << s->cluster_bits, type);
+                            nb_clusters << s->cluster_bits);
        break;
    case QCOW2_CLUSTER_UNALLOCATED:
    case QCOW2_CLUSTER_ZERO:
@@ -826,8 +741,6 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
    l1_table = NULL;
    l1_size2 = l1_size * sizeof(uint64_t);

-    s->cache_discards = true;
-
    /* WARNING: qcow2_snapshot_goto relies on this function not using the
     * l1_table_offset when it is the current s->l1_table_offset! Be careful
     * when changing this! */
@@ -872,8 +785,7 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
                            int ret;
                            ret = update_refcount(bs,
                                (offset & s->cluster_offset_mask) & ~511,
-                                nb_csectors * 512, addend,
-                                QCOW2_DISCARD_SNAPSHOT);
+                                nb_csectors * 512, addend);
                            if (ret < 0) {
                                goto fail;
                            }
@@ -883,8 +795,7 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
                    } else {
                        uint64_t cluster_index = (offset & L2E_OFFSET_MASK) >> s->cluster_bits;
                        if (addend != 0) {
-                            refcount = update_cluster_refcount(bs, cluster_index, addend,
-                                                               QCOW2_DISCARD_SNAPSHOT);
+                            refcount = update_cluster_refcount(bs, cluster_index, addend);
                        } else {
                            refcount = get_refcount(bs, cluster_index);
                        }
@@ -916,8 +827,7 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,


            if (addend != 0) {
-                refcount = update_cluster_refcount(bs, l2_offset >> s->cluster_bits, addend,
-                                                   QCOW2_DISCARD_SNAPSHOT);
+                refcount = update_cluster_refcount(bs, l2_offset >> s->cluster_bits, addend);
            } else {
                refcount = get_refcount(bs, l2_offset >> s->cluster_bits);
            }
@@ -940,9 +850,6 @@ fail:
        qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
    }

-    s->cache_discards = false;
-    qcow2_process_discards(bs, ret);
-
    /* Update L1 only if it isn't deleted anyway (addend = -1) */
    if (ret == 0 && addend >= 0 && l1_modified) {
        for (i = 0; i < l1_size; i++) {
@@ -1346,8 +1253,7 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,

            if (num_fixed) {
                ret = update_refcount(bs, i << s->cluster_bits, 1,
-                                      refcount2 - refcount1,
-                                      QCOW2_DISCARD_ALWAYS);
+                                      refcount2 - refcount1);
                if (ret >= 0) {
                    (*num_fixed)++;
                    continue;
--- a/block/qcow2-snapshot.c
+++ b/block/qcow2-snapshot.c
@@ -262,8 +262,7 @@ static int qcow2_write_snapshots(BlockDriverState *bs)
    }

    /* free the old snapshot table */
-    qcow2_free_clusters(bs, s->snapshots_offset, s->snapshots_size,
-                        QCOW2_DISCARD_SNAPSHOT);
+    qcow2_free_clusters(bs, s->snapshots_offset, s->snapshots_size);
    s->snapshots_offset = snapshots_offset;
    s->snapshots_size = snapshots_size;
    return 0;
@@ -570,8 +569,7 @@ int qcow2_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
    if (ret < 0) {
        return ret;
    }
-    qcow2_free_clusters(bs, sn.l1_table_offset, sn.l1_size * sizeof(uint64_t),
-                        QCOW2_DISCARD_SNAPSHOT);
+    qcow2_free_clusters(bs, sn.l1_table_offset, sn.l1_size * sizeof(uint64_t));

    /* must update the copied flag on the current cluster offsets */
    ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 0);
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -291,26 +291,10 @@ static QemuOptsList qcow2_runtime_opts = {
    .head = QTAILQ_HEAD_INITIALIZER(qcow2_runtime_opts.head),
    .desc = {
        {
-            .name = QCOW2_OPT_LAZY_REFCOUNTS,
+            .name = "lazy_refcounts",
            .type = QEMU_OPT_BOOL,
            .help = "Postpone refcount updates",
        },
-        {
-            .name = QCOW2_OPT_DISCARD_REQUEST,
-            .type = QEMU_OPT_BOOL,
-            .help = "Pass guest discard requests to the layer below",
-        },
-        {
-            .name = QCOW2_OPT_DISCARD_SNAPSHOT,
-            .type = QEMU_OPT_BOOL,
-            .help = "Generate discard requests when snapshot related space "
-                    "is freed",
-        },
-        {
-            .name = QCOW2_OPT_DISCARD_OTHER,
-            .type = QEMU_OPT_BOOL,
-            .help = "Generate discard requests when other clusters are freed",
-        },
        { /* end of list */ }
    },
 };
@@ -486,7 +470,6 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags)
    }

    QLIST_INIT(&s->cluster_allocs);
-    QTAILQ_INIT(&s->discards);

    /* read qcow2 extensions */
    if (qcow2_read_extensions(bs, header.header_length, ext_end, NULL)) {
@@ -549,16 +532,6 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags)
    s->use_lazy_refcounts = qemu_opt_get_bool(opts, QCOW2_OPT_LAZY_REFCOUNTS,
        (s->compatible_features & QCOW2_COMPAT_LAZY_REFCOUNTS));

-    s->discard_passthrough[QCOW2_DISCARD_NEVER] = false;
-    s->discard_passthrough[QCOW2_DISCARD_ALWAYS] = true;
-    s->discard_passthrough[QCOW2_DISCARD_REQUEST] =
-        qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_REQUEST,
-                          flags & BDRV_O_UNMAP);
-    s->discard_passthrough[QCOW2_DISCARD_SNAPSHOT] =
-        qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_SNAPSHOT, true);
-    s->discard_passthrough[QCOW2_DISCARD_OTHER] =
-        qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_OTHER, false);
-
    qemu_opts_del(opts);

    if (s->use_lazy_refcounts && s->qcow_version < 3) {
@@ -1223,8 +1196,7 @@ static int preallocate(BlockDriverState *bs)

        ret = qcow2_alloc_cluster_link_l2(bs, meta);
        if (ret < 0) {
-            qcow2_free_any_clusters(bs, meta->alloc_offset, meta->nb_clusters,
-                                    QCOW2_DISCARD_NEVER);
+            qcow2_free_any_clusters(bs, meta->alloc_offset, meta->nb_clusters);
            return ret;
        }

@@ -1785,7 +1757,6 @@ static BlockDriver bdrv_qcow2 = {
    .bdrv_close         = qcow2_close,
    .bdrv_reopen_prepare  = qcow2_reopen_prepare,
    .bdrv_create        = qcow2_create,
-    .bdrv_has_zero_init = bdrv_has_zero_init_1,
    .bdrv_co_is_allocated = qcow2_co_is_allocated,
    .bdrv_set_key       = qcow2_set_key,
    .bdrv_make_empty    = qcow2_make_empty,
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -59,10 +59,7 @@
 #define DEFAULT_CLUSTER_SIZE 65536


-#define QCOW2_OPT_LAZY_REFCOUNTS "lazy-refcounts"
-#define QCOW2_OPT_DISCARD_REQUEST "pass-discard-request"
-#define QCOW2_OPT_DISCARD_SNAPSHOT "pass-discard-snapshot"
-#define QCOW2_OPT_DISCARD_OTHER "pass-discard-other"
+#define QCOW2_OPT_LAZY_REFCOUNTS "lazy_refcounts"

 typedef struct QCowHeader {
    uint32_t magic;
@@ -132,28 +129,12 @@ enum {
    QCOW2_COMPAT_FEAT_MASK            = QCOW2_COMPAT_LAZY_REFCOUNTS,
 };

-enum qcow2_discard_type {
-    QCOW2_DISCARD_NEVER = 0,
-    QCOW2_DISCARD_ALWAYS,
-    QCOW2_DISCARD_REQUEST,
-    QCOW2_DISCARD_SNAPSHOT,
-    QCOW2_DISCARD_OTHER,
-    QCOW2_DISCARD_MAX
-};
-
 typedef struct Qcow2Feature {
    uint8_t type;
    uint8_t bit;
    char    name[46];
 } QEMU_PACKED Qcow2Feature;

-typedef struct Qcow2DiscardRegion {
-    BlockDriverState *bs;
-    uint64_t offset;
-    uint64_t bytes;
-    QTAILQ_ENTRY(Qcow2DiscardRegion) next;
-} Qcow2DiscardRegion;
-
 typedef struct BDRVQcowState {
    int cluster_bits;
    int cluster_size;
@@ -197,8 +178,6 @@ typedef struct BDRVQcowState {
    int qcow_version;
    bool use_lazy_refcounts;

-    bool discard_passthrough[QCOW2_DISCARD_MAX];
-
    uint64_t incompatible_features;
    uint64_t compatible_features;
    uint64_t autoclear_features;
@@ -206,8 +185,6 @@ typedef struct BDRVQcowState {
    size_t unknown_header_fields_size;
    void* unknown_header_fields;
    QLIST_HEAD(, Qcow2UnknownHeaderExtension) unknown_header_ext;
-    QTAILQ_HEAD (, Qcow2DiscardRegion) discards;
-    bool cache_discards;
 } BDRVQcowState;

 /* XXX: use std qcow open function ? */
@@ -372,10 +349,9 @@ int qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset,
    int nb_clusters);
 int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size);
 void qcow2_free_clusters(BlockDriverState *bs,
-                          int64_t offset, int64_t size,
-                          enum qcow2_discard_type type);
-void qcow2_free_any_clusters(BlockDriverState *bs, uint64_t l2_entry,
-                             int nb_clusters, enum qcow2_discard_type type);
+    int64_t offset, int64_t size);
+void qcow2_free_any_clusters(BlockDriverState *bs,
+    uint64_t cluster_offset, int nb_clusters);

 int qcow2_update_snapshot_refcount(BlockDriverState *bs,
    int64_t l1_table_offset, int l1_size, int addend);
@@ -383,8 +359,6 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
 int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
                          BdrvCheckMode fix);

-void qcow2_process_discards(BlockDriverState *bs, int ret);
-
 /* qcow2-cluster.c functions */
 int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
                        bool exact_size);
--- a/block/qed.c
+++ b/block/qed.c
@@ -1574,7 +1574,6 @@ static BlockDriver bdrv_qed = {
    .bdrv_close               = bdrv_qed_close,
    .bdrv_reopen_prepare      = bdrv_qed_reopen_prepare,
    .bdrv_create              = bdrv_qed_create,
-    .bdrv_has_zero_init       = bdrv_has_zero_init_1,
    .bdrv_co_is_allocated     = bdrv_qed_co_is_allocated,
    .bdrv_make_empty          = bdrv_qed_make_empty,
    .bdrv_aio_readv           = bdrv_qed_aio_readv,
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -1199,7 +1199,6 @@ static BlockDriver bdrv_file = {
    .bdrv_reopen_abort = raw_reopen_abort,
    .bdrv_close = raw_close,
    .bdrv_create = raw_create,
-    .bdrv_has_zero_init = bdrv_has_zero_init_1,
    .bdrv_co_is_allocated = raw_co_is_allocated,

    .bdrv_aio_readv = raw_aio_readv,
@@ -1351,7 +1350,6 @@ static int hdev_open(BlockDriverState *bs, QDict *options, int flags)
                qemu_close(fd);
            }
            filename = bsdPath;
-            qdict_put(options, "filename", qstring_from_str(filename));
        }

        if ( mediaIterator )
@@ -1528,6 +1526,11 @@ static int hdev_create(const char *filename, QEMUOptionParameter *options)
    return ret;
 }

+static int hdev_has_zero_init(BlockDriverState *bs)
+{
+    return 0;
+}
+
 static BlockDriver bdrv_host_device = {
    .format_name        = "host_device",
    .protocol_name        = "host_device",
@@ -1540,6 +1543,7 @@ static BlockDriver bdrv_host_device = {
    .bdrv_reopen_abort   = raw_reopen_abort,
    .bdrv_create        = hdev_create,
    .create_options     = raw_create_options,
+    .bdrv_has_zero_init = hdev_has_zero_init,

    .bdrv_aio_readv	= raw_aio_readv,
    .bdrv_aio_writev	= raw_aio_writev,
@@ -1664,6 +1668,7 @@ static BlockDriver bdrv_host_floppy = {
    .bdrv_reopen_abort   = raw_reopen_abort,
    .bdrv_create        = hdev_create,
    .create_options     = raw_create_options,
+    .bdrv_has_zero_init = hdev_has_zero_init,

    .bdrv_aio_readv     = raw_aio_readv,
    .bdrv_aio_writev    = raw_aio_writev,
@@ -1765,6 +1770,7 @@ static BlockDriver bdrv_host_cdrom = {
    .bdrv_reopen_abort   = raw_reopen_abort,
    .bdrv_create        = hdev_create,
    .create_options     = raw_create_options,
+    .bdrv_has_zero_init = hdev_has_zero_init,

    .bdrv_aio_readv     = raw_aio_readv,
    .bdrv_aio_writev    = raw_aio_writev,
@@ -1886,6 +1892,7 @@ static BlockDriver bdrv_host_cdrom = {
    .bdrv_reopen_abort   = raw_reopen_abort,
    .bdrv_create        = hdev_create,
    .create_options     = raw_create_options,
+    .bdrv_has_zero_init = hdev_has_zero_init,

    .bdrv_aio_readv     = raw_aio_readv,
    .bdrv_aio_writev    = raw_aio_writev,
--- a/block/raw-win32.c
+++ b/block/raw-win32.c
@@ -459,7 +459,6 @@ static BlockDriver bdrv_file = {
    .bdrv_file_open	= raw_open,
    .bdrv_close		= raw_close,
    .bdrv_create	= raw_create,
-    .bdrv_has_zero_init = bdrv_has_zero_init_1,

    .bdrv_aio_readv     = raw_aio_readv,
    .bdrv_aio_writev    = raw_aio_writev,
@@ -571,6 +570,11 @@ static int hdev_open(BlockDriverState *bs, QDict *options, int flags)
    return 0;
 }

+static int hdev_has_zero_init(BlockDriverState *bs)
+{
+    return 0;
+}
+
 static BlockDriver bdrv_host_device = {
    .format_name	= "host_device",
    .protocol_name	= "host_device",
@@ -578,6 +582,7 @@ static BlockDriver bdrv_host_device = {
    .bdrv_probe_device	= hdev_probe_device,
    .bdrv_file_open	= hdev_open,
    .bdrv_close		= raw_close,
+    .bdrv_has_zero_init = hdev_has_zero_init,

    .bdrv_aio_readv     = raw_aio_readv,
    .bdrv_aio_writev    = raw_aio_writev,
--- a/block/raw.c
+++ b/block/raw.c
@@ -1,26 +1,3 @@
-/*
- * Block driver for RAW format
- *
- * Copyright (c) 2006 Fabrice Bellard
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */

 #include "qemu-common.h"
 #include "block/block_int.h"
@@ -65,13 +42,6 @@ static int coroutine_fn raw_co_is_allocated(BlockDriverState *bs,
    return bdrv_co_is_allocated(bs->file, sector_num, nb_sectors, pnum);
 }

-static int coroutine_fn raw_co_write_zeroes(BlockDriverState *bs,
-                                            int64_t sector_num,
-                                            int nb_sectors)
-{
-    return bdrv_co_write_zeroes(bs->file, sector_num, nb_sectors);
-}
-
 static int64_t raw_getlength(BlockDriverState *bs)
 {
    return bdrv_getlength(bs->file);
@@ -144,11 +114,6 @@ static int raw_has_zero_init(BlockDriverState *bs)
    return bdrv_has_zero_init(bs->file);
 }

-static int raw_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
-{
-    return bdrv_get_info(bs->file, bdi);
-}
-
 static BlockDriver bdrv_raw = {
    .format_name        = "raw",

@@ -163,12 +128,10 @@ static BlockDriver bdrv_raw = {
    .bdrv_co_readv          = raw_co_readv,
    .bdrv_co_writev         = raw_co_writev,
    .bdrv_co_is_allocated   = raw_co_is_allocated,
-    .bdrv_co_write_zeroes   = raw_co_write_zeroes,
    .bdrv_co_discard        = raw_co_discard,

    .bdrv_probe         = raw_probe,
    .bdrv_getlength     = raw_getlength,
-    .bdrv_get_info      = raw_get_info,
    .bdrv_truncate      = raw_truncate,

    .bdrv_is_inserted   = raw_is_inserted,
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -100,6 +100,7 @@ typedef struct BDRVRBDState {
    rados_ioctx_t io_ctx;
    rbd_image_t image;
    char name[RBD_MAX_IMAGE_NAME_SIZE];
+    int qemu_aio_count;
    char *snap;
    int event_reader_pos;
    RADOSCB *event_rcb;
@@ -427,11 +428,19 @@ static void qemu_rbd_aio_event_reader(void *opaque)
            if (s->event_reader_pos == sizeof(s->event_rcb)) {
                s->event_reader_pos = 0;
                qemu_rbd_complete_aio(s->event_rcb);
+                s->qemu_aio_count--;
            }
        }
    } while (ret < 0 && errno == EINTR);
 }

+static int qemu_rbd_aio_flush_cb(void *opaque)
+{
+    BDRVRBDState *s = opaque;
+
+    return (s->qemu_aio_count > 0);
+}
+
 /* TODO Convert to fine grained options */
 static QemuOptsList runtime_opts = {
    .name = "rbd",
@@ -545,7 +554,7 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags)
    fcntl(s->fds[0], F_SETFL, O_NONBLOCK);
    fcntl(s->fds[1], F_SETFL, O_NONBLOCK);
    qemu_aio_set_fd_handler(s->fds[RBD_FD_READ], qemu_rbd_aio_event_reader,
-                            NULL, s);
+                            NULL, qemu_rbd_aio_flush_cb, s);


    qemu_opts_del(opts);
@@ -569,7 +578,7 @@ static void qemu_rbd_close(BlockDriverState *bs)

    close(s->fds[0]);
    close(s->fds[1]);
-    qemu_aio_set_fd_handler(s->fds[RBD_FD_READ], NULL, NULL, NULL);
+    qemu_aio_set_fd_handler(s->fds[RBD_FD_READ], NULL, NULL, NULL, NULL);

    rbd_close(s->image);
    rados_ioctx_destroy(s->io_ctx);
@@ -732,6 +741,8 @@ static BlockDriverAIOCB *rbd_start_aio(BlockDriverState *bs,
    off = sector_num * BDRV_SECTOR_SIZE;
    size = nb_sectors * BDRV_SECTOR_SIZE;

+    s->qemu_aio_count++; /* All the RADOSCB */
+
    rcb = g_malloc(sizeof(RADOSCB));
    rcb->done = 0;
    rcb->acb = acb;
@@ -768,6 +779,7 @@ static BlockDriverAIOCB *rbd_start_aio(BlockDriverState *bs,

 failed:
    g_free(rcb);
+    s->qemu_aio_count--;
    qemu_aio_release(acb);
    return NULL;
 }
@@ -984,7 +996,6 @@ static BlockDriver bdrv_rbd = {
    .bdrv_file_open     = qemu_rbd_open,
    .bdrv_close         = qemu_rbd_close,
    .bdrv_create        = qemu_rbd_create,
-    .bdrv_has_zero_init = bdrv_has_zero_init_1,
    .bdrv_get_info      = qemu_rbd_getinfo,
    .create_options     = qemu_rbd_create_options,
    .bdrv_getlength     = qemu_rbd_getlength,
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -242,14 +242,14 @@ static inline bool is_snapshot(struct SheepdogInode *inode)
    return !!inode->snap_ctime;
 }

-#undef DPRINTF
+#undef dprintf
 #ifdef DEBUG_SDOG
-#define DPRINTF(fmt, args...)                                       \
+#define dprintf(fmt, args...)                                       \
    do {                                                            \
        fprintf(stdout, "%s %d: " fmt, __func__, __LINE__, ##args); \
    } while (0)
 #else
-#define DPRINTF(fmt, args...)
+#define dprintf(fmt, args...)
 #endif

 typedef struct SheepdogAIOCB SheepdogAIOCB;
@@ -509,6 +509,13 @@ static void restart_co_req(void *opaque)
    qemu_coroutine_enter(co, NULL);
 }

+static int have_co_req(void *opaque)
+{
+    /* this handler is set only when there is a pending request, so
+     * always returns 1. */
+    return 1;
+}
+
 typedef struct SheepdogReqCo {
    int sockfd;
    SheepdogReq *hdr;
@@ -531,14 +538,14 @@ static coroutine_fn void do_co_req(void *opaque)
    unsigned int *rlen = srco->rlen;

    co = qemu_coroutine_self();
-    qemu_aio_set_fd_handler(sockfd, NULL, restart_co_req, co);
+    qemu_aio_set_fd_handler(sockfd, NULL, restart_co_req, have_co_req, co);

    ret = send_co_req(sockfd, hdr, data, wlen);
    if (ret < 0) {
        goto out;
    }

-    qemu_aio_set_fd_handler(sockfd, restart_co_req, NULL, co);
+    qemu_aio_set_fd_handler(sockfd, restart_co_req, NULL, have_co_req, co);

    ret = qemu_co_recv(sockfd, hdr, sizeof(*hdr));
    if (ret < sizeof(*hdr)) {
@@ -563,7 +570,7 @@ static coroutine_fn void do_co_req(void *opaque)
 out:
    /* there is at most one request for this sockfd, so it is safe to
     * set each handler to NULL. */
-    qemu_aio_set_fd_handler(sockfd, NULL, NULL, NULL);
+    qemu_aio_set_fd_handler(sockfd, NULL, NULL, NULL, NULL);

    srco->ret = ret;
    srco->finished = true;
@@ -722,7 +729,7 @@ static void coroutine_fn aio_read_response(void *opaque)
        break;
    case AIOCB_FLUSH_CACHE:
        if (rsp.result == SD_RES_INVALID_PARMS) {
-            DPRINTF("disable cache since the server doesn't support it\n");
+            dprintf("disable cache since the server doesn't support it\n");
            s->cache_flags = SD_FLAG_CMD_DIRECT;
            rsp.result = SD_RES_SUCCESS;
        }
@@ -789,6 +796,14 @@ static void co_write_request(void *opaque)
    qemu_coroutine_enter(s->co_send, NULL);
 }

+static int aio_flush_request(void *opaque)
+{
+    BDRVSheepdogState *s = opaque;
+
+    return !QLIST_EMPTY(&s->inflight_aio_head) ||
+        !QLIST_EMPTY(&s->pending_aio_head);
+}
+
 /*
 * Return a socket discriptor to read/write objects.
 *
@@ -804,7 +819,7 @@ static int get_sheep_fd(BDRVSheepdogState *s)
        return fd;
    }

-    qemu_aio_set_fd_handler(fd, co_read_response, NULL, s);
+    qemu_aio_set_fd_handler(fd, co_read_response, NULL, aio_flush_request, s);
    return fd;
 }

@@ -1054,7 +1069,8 @@ static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,

    qemu_co_mutex_lock(&s->lock);
    s->co_send = qemu_coroutine_self();
-    qemu_aio_set_fd_handler(s->fd, co_read_response, co_write_request, s);
+    qemu_aio_set_fd_handler(s->fd, co_read_response, co_write_request,
+                            aio_flush_request, s);
    socket_set_cork(s->fd, 1);

    /* send a header */
@@ -1075,7 +1091,8 @@ static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
    }

    socket_set_cork(s->fd, 0);
-    qemu_aio_set_fd_handler(s->fd, co_read_response, NULL, s);
+    qemu_aio_set_fd_handler(s->fd, co_read_response, NULL,
+                            aio_flush_request, s);
    qemu_co_mutex_unlock(&s->lock);

    return 0;
@@ -1212,7 +1229,7 @@ static int coroutine_fn resend_aioreq(BDRVSheepdogState *s, AIOReq *aio_req)
         * the same object */
        QLIST_FOREACH(areq, &s->inflight_aio_head, aio_siblings) {
            if (areq != aio_req && areq->oid == aio_req->oid) {
-                DPRINTF("simultaneous CoW to %" PRIx64 "\n", aio_req->oid);
+                dprintf("simultaneous CoW to %" PRIx64 "\n", aio_req->oid);
                QLIST_REMOVE(aio_req, aio_siblings);
                QLIST_INSERT_HEAD(&s->pending_aio_head, aio_req, aio_siblings);
                return SD_RES_SUCCESS;
@@ -1302,7 +1319,7 @@ static int sd_open(BlockDriverState *bs, QDict *options, int flags)
    s->discard_supported = true;

    if (snapid || tag[0] != '\0') {
-        DPRINTF("%" PRIx32 " snapshot inode was open.\n", vid);
+        dprintf("%" PRIx32 " snapshot inode was open.\n", vid);
        s->is_snapshot = true;
    }

@@ -1333,7 +1350,7 @@ static int sd_open(BlockDriverState *bs, QDict *options, int flags)
    g_free(buf);
    return 0;
 out:
-    qemu_aio_set_fd_handler(s->fd, NULL, NULL, NULL);
+    qemu_aio_set_fd_handler(s->fd, NULL, NULL, NULL, NULL);
    if (s->fd >= 0) {
        closesocket(s->fd);
    }
@@ -1493,7 +1510,7 @@ static int sd_create(const char *filename, QEMUOptionParameter *options)
        BlockDriver *drv;

        /* Currently, only Sheepdog backing image is supported. */
-        drv = bdrv_find_protocol(backing_file, true);
+        drv = bdrv_find_protocol(backing_file);
        if (!drv || strcmp(drv->protocol_name, "sheepdog") != 0) {
            error_report("backing_file must be a sheepdog image");
            ret = -EINVAL;
@@ -1537,7 +1554,7 @@ static void sd_close(BlockDriverState *bs)
    unsigned int wlen, rlen = 0;
    int fd, ret;

-    DPRINTF("%s\n", s->name);
+    dprintf("%s\n", s->name);

    fd = connect_to_sdog(s);
    if (fd < 0) {
@@ -1561,7 +1578,7 @@ static void sd_close(BlockDriverState *bs)
        error_report("%s, %s", sd_strerror(rsp->result), s->name);
    }

-    qemu_aio_set_fd_handler(s->fd, NULL, NULL, NULL);
+    qemu_aio_set_fd_handler(s->fd, NULL, NULL, NULL, NULL);
    closesocket(s->fd);
    g_free(s->host_spec);
 }
@@ -1697,7 +1714,7 @@ static int sd_create_branch(BDRVSheepdogState *s)
    char *buf;
    bool deleted;

-    DPRINTF("%" PRIx32 " is snapshot.\n", s->inode.vdi_id);
+    dprintf("%" PRIx32 " is snapshot.\n", s->inode.vdi_id);

    buf = g_malloc(SD_INODE_SIZE);

@@ -1713,7 +1730,7 @@ static int sd_create_branch(BDRVSheepdogState *s)
        goto out;
    }

-    DPRINTF("%" PRIx32 " is created.\n", vid);
+    dprintf("%" PRIx32 " is created.\n", vid);

    fd = connect_to_sdog(s);
    if (fd < 0) {
@@ -1734,7 +1751,7 @@ static int sd_create_branch(BDRVSheepdogState *s)

    s->is_snapshot = false;
    ret = 0;
-    DPRINTF("%" PRIx32 " was newly created.\n", s->inode.vdi_id);
+    dprintf("%" PRIx32 " was newly created.\n", s->inode.vdi_id);

 out:
    g_free(buf);
@@ -1824,11 +1841,11 @@ static int coroutine_fn sd_co_rw_vector(void *p)
        }

        if (create) {
-            DPRINTF("update ino (%" PRIu32 ") %" PRIu64 " %" PRIu64 " %ld\n",
+            dprintf("update ino (%" PRIu32 ") %" PRIu64 " %" PRIu64 " %ld\n",
                    inode->vdi_id, oid,
                    vid_to_data_oid(inode->data_vdi_id[idx], idx), idx);
            oid = vid_to_data_oid(inode->vdi_id, idx);
-            DPRINTF("new oid %" PRIx64 "\n", oid);
+            dprintf("new oid %" PRIx64 "\n", oid);
        }

        aio_req = alloc_aio_req(s, acb, oid, len, offset, flags, old_oid, done);
@@ -1961,7 +1978,7 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
    SheepdogInode *inode;
    unsigned int datalen;

-    DPRINTF("sn_info: name %s id_str %s s: name %s vm_state_size %" PRId64 " "
+    dprintf("sn_info: name %s id_str %s s: name %s vm_state_size %" PRId64 " "
            "is_snapshot %d\n", sn_info->name, sn_info->id_str,
            s->name, sn_info->vm_state_size, s->is_snapshot);

@@ -1972,7 +1989,7 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
        return -EINVAL;
    }

-    DPRINTF("%s %s\n", sn_info->name, sn_info->id_str);
+    dprintf("%s %s\n", sn_info->name, sn_info->id_str);

    s->inode.vm_state_size = sn_info->vm_state_size;
    s->inode.vm_clock_nsec = sn_info->vm_clock_nsec;
@@ -2016,7 +2033,7 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
    }

    memcpy(&s->inode, inode, datalen);
-    DPRINTF("s->inode: name %s snap_id %x oid %x\n",
+    dprintf("s->inode: name %s snap_id %x oid %x\n",
            s->inode.name, s->inode.snap_id, s->inode.vdi_id);

 cleanup:
@@ -2046,7 +2063,7 @@ static int sd_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
    if (snapid) {
        tag[0] = 0;
    } else {
-        pstrcpy(tag, sizeof(tag), snapshot_id);
+        pstrcpy(tag, sizeof(tag), s->name);
    }

    ret = reload_inode(s, snapid, tag);
@@ -2054,11 +2071,14 @@ static int sd_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
        goto out;
    }

-    ret = sd_create_branch(s);
-    if (ret) {
+    if (!s->inode.vm_state_size) {
+        error_report("Invalid snapshot");
+        ret = -ENOENT;
        goto out;
    }

+    s->is_snapshot = true;
+
    g_free(old_s);

    return 0;
@@ -2176,9 +2196,8 @@ static int do_load_save_vmstate(BDRVSheepdogState *s, uint8_t *data,
    int fd, ret = 0, remaining = size;
    unsigned int data_len;
    uint64_t vmstate_oid;
-    uint64_t offset;
    uint32_t vdi_index;
-    uint32_t vdi_id = load ? s->inode.parent_vdi_id : s->inode.vdi_id;
+    uint64_t offset;

    fd = connect_to_sdog(s);
    if (fd < 0) {
@@ -2191,7 +2210,7 @@ static int do_load_save_vmstate(BDRVSheepdogState *s, uint8_t *data,

        data_len = MIN(remaining, SD_DATA_OBJ_SIZE - offset);

-        vmstate_oid = vid_to_vmstate_oid(vdi_id, vdi_index);
+        vmstate_oid = vid_to_vmstate_oid(s->inode.vdi_id, vdi_index);

        create = (offset == 0);
        if (load) {
@@ -2330,7 +2349,6 @@ static BlockDriver bdrv_sheepdog = {
    .bdrv_file_open = sd_open,
    .bdrv_close     = sd_close,
    .bdrv_create    = sd_create,
-    .bdrv_has_zero_init = bdrv_has_zero_init_1,
    .bdrv_getlength = sd_getlength,
    .bdrv_truncate  = sd_truncate,

@@ -2358,7 +2376,6 @@ static BlockDriver bdrv_sheepdog_tcp = {
    .bdrv_file_open = sd_open,
    .bdrv_close     = sd_close,
    .bdrv_create    = sd_create,
-    .bdrv_has_zero_init = bdrv_has_zero_init_1,
    .bdrv_getlength = sd_getlength,
    .bdrv_truncate  = sd_truncate,

@@ -2386,7 +2403,6 @@ static BlockDriver bdrv_sheepdog_unix = {
    .bdrv_file_open = sd_open,
    .bdrv_close     = sd_close,
    .bdrv_create    = sd_create,
-    .bdrv_has_zero_init = bdrv_has_zero_init_1,
    .bdrv_getlength = sd_getlength,
    .bdrv_truncate  = sd_truncate,

--- a/block/snapshot.c
+++ b/block/snapshot.c
@@ -1,157 +0,0 @@
-/*
- * Block layer snapshot related functions
- *
- * Copyright (c) 2003-2008 Fabrice Bellard
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-#include "block/snapshot.h"
-#include "block/block_int.h"
-
-int bdrv_snapshot_find(BlockDriverState *bs, QEMUSnapshotInfo *sn_info,
-                       const char *name)
-{
-    QEMUSnapshotInfo *sn_tab, *sn;
-    int nb_sns, i, ret;
-
-    ret = -ENOENT;
-    nb_sns = bdrv_snapshot_list(bs, &sn_tab);
-    if (nb_sns < 0) {
-        return ret;
-    }
-    for (i = 0; i < nb_sns; i++) {
-        sn = &sn_tab[i];
-        if (!strcmp(sn->id_str, name) || !strcmp(sn->name, name)) {
-            *sn_info = *sn;
-            ret = 0;
-            break;
-        }
-    }
-    g_free(sn_tab);
-    return ret;
-}
-
-int bdrv_can_snapshot(BlockDriverState *bs)
-{
-    BlockDriver *drv = bs->drv;
-    if (!drv || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
-        return 0;
-    }
-
-    if (!drv->bdrv_snapshot_create) {
-        if (bs->file != NULL) {
-            return bdrv_can_snapshot(bs->file);
-        }
-        return 0;
-    }
-
-    return 1;
-}
-
-int bdrv_snapshot_create(BlockDriverState *bs,
-                         QEMUSnapshotInfo *sn_info)
-{
-    BlockDriver *drv = bs->drv;
-    if (!drv) {
-        return -ENOMEDIUM;
-    }
-    if (drv->bdrv_snapshot_create) {
-        return drv->bdrv_snapshot_create(bs, sn_info);
-    }
-    if (bs->file) {
-        return bdrv_snapshot_create(bs->file, sn_info);
-    }
-    return -ENOTSUP;
-}
-
-int bdrv_snapshot_goto(BlockDriverState *bs,
-                       const char *snapshot_id)
-{
-    BlockDriver *drv = bs->drv;
-    int ret, open_ret;
-
-    if (!drv) {
-        return -ENOMEDIUM;
-    }
-    if (drv->bdrv_snapshot_goto) {
-        return drv->bdrv_snapshot_goto(bs, snapshot_id);
-    }
-
-    if (bs->file) {
-        drv->bdrv_close(bs);
-        ret = bdrv_snapshot_goto(bs->file, snapshot_id);
-        open_ret = drv->bdrv_open(bs, NULL, bs->open_flags);
-        if (open_ret < 0) {
-            bdrv_delete(bs->file);
-            bs->drv = NULL;
-            return open_ret;
-        }
-        return ret;
-    }
-
-    return -ENOTSUP;
-}
-
-int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
-{
-    BlockDriver *drv = bs->drv;
-    if (!drv) {
-        return -ENOMEDIUM;
-    }
-    if (drv->bdrv_snapshot_delete) {
-        return drv->bdrv_snapshot_delete(bs, snapshot_id);
-    }
-    if (bs->file) {
-        return bdrv_snapshot_delete(bs->file, snapshot_id);
-    }
-    return -ENOTSUP;
-}
-
-int bdrv_snapshot_list(BlockDriverState *bs,
-                       QEMUSnapshotInfo **psn_info)
-{
-    BlockDriver *drv = bs->drv;
-    if (!drv) {
-        return -ENOMEDIUM;
-    }
-    if (drv->bdrv_snapshot_list) {
-        return drv->bdrv_snapshot_list(bs, psn_info);
-    }
-    if (bs->file) {
-        return bdrv_snapshot_list(bs->file, psn_info);
-    }
-    return -ENOTSUP;
-}
-
-int bdrv_snapshot_load_tmp(BlockDriverState *bs,
-        const char *snapshot_name)
-{
-    BlockDriver *drv = bs->drv;
-    if (!drv) {
-        return -ENOMEDIUM;
-    }
-    if (!bs->read_only) {
-        return -EINVAL;
-    }
-    if (drv->bdrv_snapshot_load_tmp) {
-        return drv->bdrv_snapshot_load_tmp(bs, snapshot_name);
-    }
-    return -ENOTSUP;
-}
--- a/block/ssh.c
+++ b/block/ssh.c
@@ -716,21 +716,6 @@ static void ssh_close(BlockDriverState *bs)
    ssh_state_free(s);
 }

-static int ssh_has_zero_init(BlockDriverState *bs)
-{
-    BDRVSSHState *s = bs->opaque;
-    /* Assume false, unless we can positively prove it's true. */
-    int has_zero_init = 0;
-
-    if (s->attrs.flags & LIBSSH2_SFTP_ATTR_PERMISSIONS) {
-        if (s->attrs.permissions & LIBSSH2_SFTP_S_IFREG) {
-            has_zero_init = 1;
-        }
-    }
-
-    return has_zero_init;
-}
-
 static void restart_coroutine(void *opaque)
 {
    Coroutine *co = opaque;
@@ -740,6 +725,14 @@ static void restart_coroutine(void *opaque)
    qemu_coroutine_enter(co, NULL);
 }

+/* Always true because when we have called set_fd_handler there is
+ * always a request being processed.
+ */
+static int return_true(void *opaque)
+{
+    return 1;
+}
+
 static coroutine_fn void set_fd_handler(BDRVSSHState *s)
 {
    int r;
@@ -758,13 +751,13 @@ static coroutine_fn void set_fd_handler(BDRVSSHState *s)
    DPRINTF("s->sock=%d rd_handler=%p wr_handler=%p", s->sock,
            rd_handler, wr_handler);

-    qemu_aio_set_fd_handler(s->sock, rd_handler, wr_handler, co);
+    qemu_aio_set_fd_handler(s->sock, rd_handler, wr_handler, return_true, co);
 }

 static coroutine_fn void clear_fd_handler(BDRVSSHState *s)
 {
    DPRINTF("s->sock=%d", s->sock);
-    qemu_aio_set_fd_handler(s->sock, NULL, NULL, NULL);
+    qemu_aio_set_fd_handler(s->sock, NULL, NULL, NULL, NULL);
 }

 /* A non-blocking call returned EAGAIN, so yield, ensuring the
@@ -1044,7 +1037,6 @@ static BlockDriver bdrv_ssh = {
    .bdrv_file_open               = ssh_file_open,
    .bdrv_create                  = ssh_create,
    .bdrv_close                   = ssh_close,
-    .bdrv_has_zero_init           = ssh_has_zero_init,
    .bdrv_co_readv                = ssh_co_readv,
    .bdrv_co_writev               = ssh_co_writev,
    .bdrv_getlength               = ssh_getlength,
--- a/block/stream.c
+++ b/block/stream.c
@@ -57,11 +57,6 @@ static void close_unused_images(BlockDriverState *top, BlockDriverState *base,
    BlockDriverState *intermediate;
    intermediate = top->backing_hd;

-    /* Must assign before bdrv_delete() to prevent traversing dangling pointer
-     * while we delete backing image instances.
-     */
-    top->backing_hd = base;
-
    while (intermediate) {
        BlockDriverState *unused;

@@ -75,6 +70,7 @@ static void close_unused_images(BlockDriverState *top, BlockDriverState *base,
        unused->backing_hd = NULL;
        bdrv_delete(unused);
    }
+    top->backing_hd = base;
 }

 static void coroutine_fn stream_run(void *opaque)
@@ -202,7 +198,7 @@ static void stream_set_speed(BlockJob *job, int64_t speed, Error **errp)
    ratelimit_set_speed(&s->limit, speed / BDRV_SECTOR_SIZE, SLICE_TIME);
 }

-static const BlockJobType stream_job_type = {
+static BlockJobType stream_job_type = {
    .instance_size = sizeof(StreamBlockJob),
    .job_type      = "stream",
    .set_speed     = stream_set_speed,
--- a/block/vdi.c
+++ b/block/vdi.c
@@ -779,7 +779,6 @@ static BlockDriver bdrv_vdi = {
    .bdrv_close = vdi_close,
    .bdrv_reopen_prepare = vdi_reopen_prepare,
    .bdrv_create = vdi_create,
-    .bdrv_has_zero_init = bdrv_has_zero_init_1,
    .bdrv_co_is_allocated = vdi_co_is_allocated,
    .bdrv_make_empty = vdi_make_empty,

--- a/block/vhdx.h
+++ b/block/vhdx.h
@@ -168,7 +168,7 @@ typedef struct QEMU_PACKED VHDXLogEntryHeader {
                                           vhdx_header.  If not found in
                                           vhdx_header, it is invalid */
    uint64_t    flushed_file_offset;    /* see spec for full details - this
-                                           should be vhdx file size in bytes */
+                                           sould be vhdx file size in bytes */
    uint64_t    last_file_offset;       /* size in bytes that all allocated
                                           file structures fit into */
 } VHDXLogEntryHeader;
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -62,20 +62,19 @@ typedef struct {
    uint32_t cylinders;
    uint32_t heads;
    uint32_t sectors_per_track;
-} QEMU_PACKED VMDK3Header;
+} VMDK3Header;

 typedef struct {
    uint32_t version;
    uint32_t flags;
-    uint64_t capacity;
-    uint64_t granularity;
-    uint64_t desc_offset;
-    uint64_t desc_size;
-    /* Number of GrainTableEntries per GrainTable */
-    uint32_t num_gtes_per_gt;
-    uint64_t rgd_offset;
-    uint64_t gd_offset;
-    uint64_t grain_offset;
+    int64_t capacity;
+    int64_t granularity;
+    int64_t desc_offset;
+    int64_t desc_size;
+    int32_t num_gtes_per_gte;
+    int64_t rgd_offset;
+    int64_t gd_offset;
+    int64_t grain_offset;
    char filler[1];
    char check_bytes[4];
    uint16_t compressAlgorithm;
@@ -110,7 +109,7 @@ typedef struct VmdkExtent {

 typedef struct BDRVVmdkState {
    CoMutex lock;
-    uint64_t desc_offset;
+    int desc_offset;
    bool cid_updated;
    uint32_t parent_cid;
    int num_extents;
@@ -132,7 +131,7 @@ typedef struct VmdkGrainMarker {
    uint64_t lba;
    uint32_t size;
    uint8_t  data[0];
-} QEMU_PACKED VmdkGrainMarker;
+} VmdkGrainMarker;

 enum {
    MARKER_END_OF_STREAM    = 0,
@@ -386,22 +385,15 @@ static int vmdk_parent_open(BlockDriverState *bs)

 /* Create and append extent to the extent array. Return the added VmdkExtent
 * address. return NULL if allocation failed. */
-static int vmdk_add_extent(BlockDriverState *bs,
+static VmdkExtent *vmdk_add_extent(BlockDriverState *bs,
                           BlockDriverState *file, bool flat, int64_t sectors,
                           int64_t l1_offset, int64_t l1_backup_offset,
                           uint32_t l1_size,
-                           int l2_size, uint64_t cluster_sectors,
-                           VmdkExtent **new_extent)
+                           int l2_size, unsigned int cluster_sectors)
 {
    VmdkExtent *extent;
    BDRVVmdkState *s = bs->opaque;

-    if (cluster_sectors > 0x200000) {
-        /* 0x200000 * 512Bytes = 1GB for one cluster is unrealistic */
-        error_report("invalid granularity, image may be corrupt");
-        return -EINVAL;
-    }
-
    s->extents = g_realloc(s->extents,
                              (s->num_extents + 1) * sizeof(VmdkExtent));
    extent = &s->extents[s->num_extents];
@@ -424,10 +416,7 @@ static int vmdk_add_extent(BlockDriverState *bs,
        extent->end_sector = extent->sectors;
    }
    bs->total_sectors = extent->end_sector;
-    if (new_extent) {
-        *new_extent = extent;
-    }
-    return 0;
+    return extent;
 }

 static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent)
@@ -486,17 +475,12 @@ static int vmdk_open_vmdk3(BlockDriverState *bs,
    if (ret < 0) {
        return ret;
    }
-
-    ret = vmdk_add_extent(bs,
+    extent = vmdk_add_extent(bs,
                             bs->file, false,
                             le32_to_cpu(header.disk_sectors),
                             le32_to_cpu(header.l1dir_offset) << 9,
                             0, 1 << 6, 1 << 9,
-                             le32_to_cpu(header.granularity),
-                             &extent);
-    if (ret < 0) {
-        return ret;
-    }
+                             le32_to_cpu(header.granularity));
    ret = vmdk_init_tables(bs, extent);
    if (ret) {
        /* free extent allocated by vmdk_add_extent */
@@ -506,7 +490,7 @@ static int vmdk_open_vmdk3(BlockDriverState *bs,
 }

 static int vmdk_open_desc_file(BlockDriverState *bs, int flags,
-                               uint64_t desc_offset);
+                               int64_t desc_offset);

 static int vmdk_open_vmdk4(BlockDriverState *bs,
                           BlockDriverState *file,
@@ -524,7 +508,7 @@ static int vmdk_open_vmdk4(BlockDriverState *bs,
        return ret;
    }
    if (header.capacity == 0) {
-        uint64_t desc_offset = le64_to_cpu(header.desc_offset);
+        int64_t desc_offset = le64_to_cpu(header.desc_offset);
        if (desc_offset) {
            return vmdk_open_desc_file(bs, flags, desc_offset << 9);
        }
@@ -577,49 +561,23 @@ static int vmdk_open_vmdk4(BlockDriverState *bs,
        header = footer.header;
    }

-    if (le32_to_cpu(header.version) >= 3) {
-        char buf[64];
-        snprintf(buf, sizeof(buf), "VMDK version %d",
-                 le32_to_cpu(header.version));
-        qerror_report(QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
-                bs->device_name, "vmdk", buf);
-        return -ENOTSUP;
-    }
-
-    if (le32_to_cpu(header.num_gtes_per_gt) > 512) {
-        error_report("L2 table size too big");
-        return -EINVAL;
-    }
-
-    l1_entry_sectors = le32_to_cpu(header.num_gtes_per_gt)
+    l1_entry_sectors = le32_to_cpu(header.num_gtes_per_gte)
                        * le64_to_cpu(header.granularity);
    if (l1_entry_sectors == 0) {
        return -EINVAL;
    }
    l1_size = (le64_to_cpu(header.capacity) + l1_entry_sectors - 1)
                / l1_entry_sectors;
-    if (l1_size > 512 * 1024 * 1024) {
-        /* although with big capacity and small l1_entry_sectors, we can get a
-         * big l1_size, we don't want unbounded value to allocate the table.
-         * Limit it to 512M, which is 16PB for default cluster and L2 table
-         * size */
-        error_report("L1 size too big");
-        return -EFBIG;
-    }
    if (le32_to_cpu(header.flags) & VMDK4_FLAG_RGD) {
        l1_backup_offset = le64_to_cpu(header.rgd_offset) << 9;
    }
-    ret = vmdk_add_extent(bs, file, false,
+    extent = vmdk_add_extent(bs, file, false,
                          le64_to_cpu(header.capacity),
                          le64_to_cpu(header.gd_offset) << 9,
                          l1_backup_offset,
                          l1_size,
-                          le32_to_cpu(header.num_gtes_per_gt),
-                          le64_to_cpu(header.granularity),
-                          &extent);
-    if (ret < 0) {
-        return ret;
-    }
+                          le32_to_cpu(header.num_gtes_per_gte),
+                          le64_to_cpu(header.granularity));
    extent->compressed =
        le16_to_cpu(header.compressAlgorithm) == VMDK4_COMPRESSION_DEFLATE;
    extent->has_marker = le32_to_cpu(header.flags) & VMDK4_FLAG_MARKER;
@@ -735,11 +693,8 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
            /* FLAT extent */
            VmdkExtent *extent;

-            ret = vmdk_add_extent(bs, extent_file, true, sectors,
-                            0, 0, 0, 0, sectors, &extent);
-            if (ret < 0) {
-                return ret;
-            }
+            extent = vmdk_add_extent(bs, extent_file, true, sectors,
+                            0, 0, 0, 0, sectors);
            extent->flat_start_offset = flat_offset << 9;
        } else if (!strcmp(type, "SPARSE")) {
            /* SPARSE extent */
@@ -764,43 +719,30 @@ next_line:
 }

 static int vmdk_open_desc_file(BlockDriverState *bs, int flags,
-                               uint64_t desc_offset)
+                               int64_t desc_offset)
 {
    int ret;
-    char *buf = NULL;
+    char buf[2048];
    char ct[128];
    BDRVVmdkState *s = bs->opaque;
-    int64_t size;

-    size = bdrv_getlength(bs->file);
-    if (size < 0) {
-        return -EINVAL;
-    }
-
-    size = MIN(size, 1 << 20);  /* avoid unbounded allocation */
-    buf = g_malloc0(size + 1);
-
-    ret = bdrv_pread(bs->file, desc_offset, buf, size);
+    ret = bdrv_pread(bs->file, desc_offset, buf, sizeof(buf));
    if (ret < 0) {
-        goto exit;
+        return ret;
    }
+    buf[2047] = '\0';
    if (vmdk_parse_description(buf, "createType", ct, sizeof(ct))) {
-        ret = -EMEDIUMTYPE;
-        goto exit;
+        return -EMEDIUMTYPE;
    }
    if (strcmp(ct, "monolithicFlat") &&
        strcmp(ct, "twoGbMaxExtentSparse") &&
        strcmp(ct, "twoGbMaxExtentFlat")) {
        fprintf(stderr,
                "VMDK: Not supported image type \"%s\""".\n", ct);
-        ret = -ENOTSUP;
-        goto exit;
+        return -ENOTSUP;
    }
    s->desc_offset = 0;
-    ret = vmdk_parse_extents(buf, bs, bs->file->filename);
-exit:
-    g_free(buf);
-    return ret;
+    return vmdk_parse_extents(buf, bs, bs->file->filename);
 }

 static int vmdk_open(BlockDriverState *bs, QDict *options, int flags)
@@ -843,17 +785,16 @@ static int get_whole_cluster(BlockDriverState *bs,
                uint64_t offset,
                bool allocate)
 {
-    int ret = VMDK_OK;
-    uint8_t *whole_grain = NULL;
+    /* 128 sectors * 512 bytes each = grain size 64KB */
+    uint8_t  whole_grain[extent->cluster_sectors * 512];

    /* we will be here if it's first write on non-exist grain(cluster).
     * try to read from parent image, if exist */
    if (bs->backing_hd) {
-        whole_grain =
-            qemu_blockalign(bs, extent->cluster_sectors << BDRV_SECTOR_BITS);
+        int ret;
+
        if (!vmdk_is_cid_valid(bs)) {
-            ret = VMDK_ERROR;
-            goto exit;
+            return VMDK_ERROR;
        }

        /* floor offset to cluster */
@@ -861,21 +802,17 @@ static int get_whole_cluster(BlockDriverState *bs,
        ret = bdrv_read(bs->backing_hd, offset >> 9, whole_grain,
                extent->cluster_sectors);
        if (ret < 0) {
-            ret = VMDK_ERROR;
-            goto exit;
+            return VMDK_ERROR;
        }

        /* Write grain only into the active image */
        ret = bdrv_write(extent->file, cluster_offset, whole_grain,
                extent->cluster_sectors);
        if (ret < 0) {
-            ret = VMDK_ERROR;
-            goto exit;
+            return VMDK_ERROR;
        }
    }
-exit:
-    qemu_vfree(whole_grain);
-    return ret;
+    return VMDK_OK;
 }

 static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data)
@@ -1241,10 +1178,8 @@ static coroutine_fn int vmdk_co_read(BlockDriverState *bs, int64_t sector_num,
 /**
 * vmdk_write:
 * @zeroed:       buf is ignored (data is zero), use zeroed_grain GTE feature
- *                if possible, otherwise return -ENOTSUP.
- * @zero_dry_run: used for zeroed == true only, don't update L2 table, just try
- *                with each cluster. By dry run we can find if the zero write
- *                is possible without modifying image data.
+ * if possible, otherwise return -ENOTSUP.
+ * @zero_dry_run: used for zeroed == true only, don't update L2 table, just
 *
 * Returns: error code with 0 for success.
 */
@@ -1371,8 +1306,6 @@ static int coroutine_fn vmdk_co_write_zeroes(BlockDriverState *bs,
    int ret;
    BDRVVmdkState *s = bs->opaque;
    qemu_co_mutex_lock(&s->lock);
-    /* write zeroes could fail if sectors not aligned to cluster, test it with
-     * dry_run == true before really updating image */
    ret = vmdk_write(bs, sector_num, NULL, nb_sectors, true, true);
    if (!ret) {
        ret = vmdk_write(bs, sector_num, NULL, nb_sectors, true, false);
@@ -1412,12 +1345,12 @@ static int vmdk_create_extent(const char *filename, int64_t filesize,
    header.compressAlgorithm = compress ? VMDK4_COMPRESSION_DEFLATE : 0;
    header.capacity = filesize / 512;
    header.granularity = 128;
-    header.num_gtes_per_gt = 512;
+    header.num_gtes_per_gte = 512;

    grains = (filesize / 512 + header.granularity - 1) / header.granularity;
-    gt_size = ((header.num_gtes_per_gt * sizeof(uint32_t)) + 511) >> 9;
+    gt_size = ((header.num_gtes_per_gte * sizeof(uint32_t)) + 511) >> 9;
    gt_count =
-        (grains + header.num_gtes_per_gt - 1) / header.num_gtes_per_gt;
+        (grains + header.num_gtes_per_gte - 1) / header.num_gtes_per_gte;
    gd_size = (gt_count * sizeof(uint32_t) + 511) >> 9;

    header.desc_offset = 1;
@@ -1433,7 +1366,7 @@ static int vmdk_create_extent(const char *filename, int64_t filesize,
    header.flags = cpu_to_le32(header.flags);
    header.capacity = cpu_to_le64(header.capacity);
    header.granularity = cpu_to_le64(header.granularity);
-    header.num_gtes_per_gt = cpu_to_le32(header.num_gtes_per_gt);
+    header.num_gtes_per_gte = cpu_to_le32(header.num_gtes_per_gte);
    header.desc_offset = cpu_to_le64(header.desc_offset);
    header.desc_size = cpu_to_le64(header.desc_size);
    header.rgd_offset = cpu_to_le64(header.rgd_offset);
@@ -1532,6 +1465,45 @@ static int filename_decompose(const char *filename, char *path, char *prefix,
    return VMDK_OK;
 }

+static int relative_path(char *dest, int dest_size,
+        const char *base, const char *target)
+{
+    int i = 0;
+    int n = 0;
+    const char *p, *q;
+#ifdef _WIN32
+    const char *sep = "\\";
+#else
+    const char *sep = "/";
+#endif
+
+    if (!(dest && base && target)) {
+        return VMDK_ERROR;
+    }
+    if (path_is_absolute(target)) {
+        pstrcpy(dest, dest_size, target);
+        return VMDK_OK;
+    }
+    while (base[i] == target[i]) {
+        i++;
+    }
+    p = &base[i];
+    q = &target[i];
+    while (*p) {
+        if (*p == *sep) {
+            n++;
+        }
+        p++;
+    }
+    dest[0] = '\0';
+    for (; n; n--) {
+        pstrcat(dest, dest_size, "..");
+        pstrcat(dest, dest_size, sep);
+    }
+    pstrcat(dest, dest_size, q);
+    return VMDK_OK;
+}
+
 static int vmdk_create(const char *filename, QEMUOptionParameter *options)
 {
    int fd, idx = 0;
@@ -1631,6 +1603,7 @@ static int vmdk_create(const char *filename, QEMUOptionParameter *options)
        return -ENOTSUP;
    }
    if (backing_file) {
+        char parent_filename[PATH_MAX];
        BlockDriverState *bs = bdrv_new("");
        ret = bdrv_open(bs, backing_file, NULL, 0, NULL);
        if (ret != 0) {
@@ -1643,8 +1616,10 @@ static int vmdk_create(const char *filename, QEMUOptionParameter *options)
        }
        parent_cid = vmdk_read_cid(bs, 0);
        bdrv_delete(bs);
+        relative_path(parent_filename, sizeof(parent_filename),
+                      filename, backing_file);
        snprintf(parent_desc_line, sizeof(parent_desc_line),
-                "parentFileNameHint=\"%s\"", backing_file);
+                "parentFileNameHint=\"%s\"", parent_filename);
    }

    /* Create extents */
@@ -1769,23 +1744,6 @@ static int64_t vmdk_get_allocated_file_size(BlockDriverState *bs)
    return ret;
 }

-static int vmdk_has_zero_init(BlockDriverState *bs)
-{
-    int i;
-    BDRVVmdkState *s = bs->opaque;
-
-    /* If has a flat extent and its underlying storage doesn't have zero init,
-     * return 0. */
-    for (i = 0; i < s->num_extents; i++) {
-        if (s->extents[i].flat) {
-            if (!bdrv_has_zero_init(s->extents[i].file)) {
-                return 0;
-            }
-        }
-    }
-    return 1;
-}
-
 static QEMUOptionParameter vmdk_create_options[] = {
    {
        .name = BLOCK_OPT_SIZE,
@@ -1824,22 +1782,21 @@ static QEMUOptionParameter vmdk_create_options[] = {
 };

 static BlockDriver bdrv_vmdk = {
-    .format_name                  = "vmdk",
-    .instance_size                = sizeof(BDRVVmdkState),
-    .bdrv_probe                   = vmdk_probe,
-    .bdrv_open                    = vmdk_open,
-    .bdrv_reopen_prepare          = vmdk_reopen_prepare,
-    .bdrv_read                    = vmdk_co_read,
-    .bdrv_write                   = vmdk_co_write,
-    .bdrv_co_write_zeroes         = vmdk_co_write_zeroes,
-    .bdrv_close                   = vmdk_close,
-    .bdrv_create                  = vmdk_create,
-    .bdrv_co_flush_to_disk        = vmdk_co_flush,
-    .bdrv_co_is_allocated         = vmdk_co_is_allocated,
-    .bdrv_get_allocated_file_size = vmdk_get_allocated_file_size,
-    .bdrv_has_zero_init           = vmdk_has_zero_init,
+    .format_name    = "vmdk",
+    .instance_size  = sizeof(BDRVVmdkState),
+    .bdrv_probe     = vmdk_probe,
+    .bdrv_open      = vmdk_open,
+    .bdrv_reopen_prepare = vmdk_reopen_prepare,
+    .bdrv_read      = vmdk_co_read,
+    .bdrv_write     = vmdk_co_write,
+    .bdrv_co_write_zeroes = vmdk_co_write_zeroes,
+    .bdrv_close     = vmdk_close,
+    .bdrv_create    = vmdk_create,
+    .bdrv_co_flush_to_disk  = vmdk_co_flush,
+    .bdrv_co_is_allocated   = vmdk_co_is_allocated,
+    .bdrv_get_allocated_file_size  = vmdk_get_allocated_file_size,

-    .create_options               = vmdk_create_options,
+    .create_options = vmdk_create_options,
 };

 static void bdrv_vmdk_init(void)
--- a/block/vpc.c
+++ b/block/vpc.c
@@ -786,18 +786,6 @@ static int vpc_create(const char *filename, QEMUOptionParameter *options)
    return ret;
 }

-static int vpc_has_zero_init(BlockDriverState *bs)
-{
-    BDRVVPCState *s = bs->opaque;
-    struct vhd_footer *footer =  (struct vhd_footer *) s->footer_buf;
-
-    if (cpu_to_be32(footer->type) == VHD_FIXED) {
-        return bdrv_has_zero_init(bs->file);
-    } else {
-        return 1;
-    }
-}
-
 static void vpc_close(BlockDriverState *bs)
 {
    BDRVVPCState *s = bs->opaque;
@@ -830,17 +818,16 @@ static BlockDriver bdrv_vpc = {
    .format_name    = "vpc",
    .instance_size  = sizeof(BDRVVPCState),

-    .bdrv_probe             = vpc_probe,
-    .bdrv_open              = vpc_open,
-    .bdrv_close             = vpc_close,
-    .bdrv_reopen_prepare    = vpc_reopen_prepare,
-    .bdrv_create            = vpc_create,
+    .bdrv_probe     = vpc_probe,
+    .bdrv_open      = vpc_open,
+    .bdrv_close     = vpc_close,
+    .bdrv_reopen_prepare = vpc_reopen_prepare,
+    .bdrv_create    = vpc_create,

    .bdrv_read              = vpc_co_read,
    .bdrv_write             = vpc_co_write,

-    .create_options         = vpc_create_options,
-    .bdrv_has_zero_init     = vpc_has_zero_init,
+    .create_options = vpc_create_options,
 };

 static void bdrv_vpc_init(void)
--- a/block/vvfat.c
+++ b/block/vvfat.c
@@ -1164,8 +1164,8 @@ DLOG(if (stderr == NULL) {
    s->sector_count = cyls * heads * secs - (s->first_sectors_number - 1);

    if (qemu_opt_get_bool(opts, "rw", false)) {
-        ret = enable_write_target(s);
-        if (ret < 0) {
+        if (enable_write_target(s)) {
+            ret = -EIO;
            goto fail;
        }
        bs->read_only = 0;
@@ -2917,7 +2917,9 @@ static int enable_write_target(BDRVVVFATState *s)
    s->qcow_filename = g_malloc(1024);
    ret = get_tmp_filename(s->qcow_filename, 1024);
    if (ret < 0) {
-        goto err;
+        g_free(s->qcow_filename);
+        s->qcow_filename = NULL;
+        return ret;
    }

    bdrv_qcow = bdrv_find_format("qcow");
@@ -2925,18 +2927,18 @@ static int enable_write_target(BDRVVVFATState *s)
    set_option_parameter_int(options, BLOCK_OPT_SIZE, s->sector_count * 512);
    set_option_parameter(options, BLOCK_OPT_BACKING_FILE, "fat:");

-    ret = bdrv_create(bdrv_qcow, s->qcow_filename, options);
-    if (ret < 0) {
-        goto err;
-    }
+    if (bdrv_create(bdrv_qcow, s->qcow_filename, options) < 0)
+	return -1;

    s->qcow = bdrv_new("");
+    if (s->qcow == NULL) {
+        return -1;
+    }

    ret = bdrv_open(s->qcow, s->qcow_filename, NULL,
            BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_NO_FLUSH, bdrv_qcow);
    if (ret < 0) {
-        bdrv_delete(s->qcow);
-        goto err;
+	return ret;
    }

 #ifndef _WIN32
@@ -2949,11 +2951,6 @@ static int enable_write_target(BDRVVVFATState *s)
    *(void**)s->bs->backing_hd->opaque = s;

    return 0;
-
-err:
-    g_free(s->qcow_filename);
-    s->qcow_filename = NULL;
-    return ret;
 }

 static void vvfat_close(BlockDriverState *bs)
--- a/block/win32-aio.c
+++ b/block/win32-aio.c
@@ -25,6 +25,7 @@
 #include "qemu/timer.h"
 #include "block/block_int.h"
 #include "qemu/module.h"
+#include "qemu-common.h"
 #include "block/aio.h"
 #include "raw-aio.h"
 #include "qemu/event_notifier.h"
--- a/blockdev.c
+++ b/blockdev.c
@@ -46,7 +46,6 @@

 static QTAILQ_HEAD(drivelist, DriveInfo) drives = QTAILQ_HEAD_INITIALIZER(drives);
 extern QemuOptsList qemu_common_drive_opts;
-extern QemuOptsList qemu_old_drive_opts;

 static const char *const if_name[IF_COUNT] = {
    [IF_NONE] = "none",
@@ -313,8 +312,7 @@ static bool do_check_io_limits(BlockIOLimit *io_limits, Error **errp)
    return true;
 }

-static DriveInfo *blockdev_init(QemuOpts *all_opts,
-                                BlockInterfaceType block_default_type)
+DriveInfo *drive_init(QemuOpts *all_opts, BlockInterfaceType block_default_type)
 {
    const char *buf;
    const char *file = NULL;
@@ -324,6 +322,7 @@ static DriveInfo *blockdev_init(QemuOpts *all_opts,
    enum { MEDIA_DISK, MEDIA_CDROM } media;
    int bus_id, unit_id;
    int cyls, heads, secs, translation;
+    BlockDriver *drv = NULL;
    int max_devs;
    int index;
    int ro = 0;
@@ -339,8 +338,6 @@ static DriveInfo *blockdev_init(QemuOpts *all_opts,
    QemuOpts *opts;
    QDict *bs_opts;
    const char *id;
-    bool has_driver_specific_opts;
-    BlockDriver *drv = NULL;

    translation = BIOS_ATA_TRANSLATION_AUTO;
    media = MEDIA_DISK;
@@ -368,8 +365,6 @@ static DriveInfo *blockdev_init(QemuOpts *all_opts,
        qdict_del(bs_opts, "id");
    }

-    has_driver_specific_opts = !!qdict_size(bs_opts);
-
    /* extract parameters */
    bus_id  = qemu_opt_get_number(opts, "bus", 0);
    unit_id = qemu_opt_get_number(opts, "unit", -1);
@@ -380,7 +375,7 @@ static DriveInfo *blockdev_init(QemuOpts *all_opts,
    secs  = qemu_opt_get_number(opts, "secs", 0);

    snapshot = qemu_opt_get_bool(opts, "snapshot", 0);
-    ro = qemu_opt_get_bool(opts, "read-only", 0);
+    ro = qemu_opt_get_bool(opts, "readonly", 0);
    copy_on_read = qemu_opt_get_bool(opts, "copy-on-read", false);

    file = qemu_opt_get(opts, "file");
@@ -454,14 +449,12 @@ static DriveInfo *blockdev_init(QemuOpts *all_opts,
        }
    }

-    if (qemu_opt_get_bool(opts, "cache.writeback", true)) {
-        bdrv_flags |= BDRV_O_CACHE_WB;
-    }
-    if (qemu_opt_get_bool(opts, "cache.direct", false)) {
-        bdrv_flags |= BDRV_O_NOCACHE;
-    }
-    if (qemu_opt_get_bool(opts, "cache.no-flush", true)) {
-        bdrv_flags |= BDRV_O_NO_FLUSH;
+    bdrv_flags |= BDRV_O_CACHE_WB;
+    if ((buf = qemu_opt_get(opts, "cache")) != NULL) {
+        if (bdrv_parse_cache_flags(buf, &bdrv_flags) != 0) {
+            error_report("invalid cache option");
+            return NULL;
+        }
    }

 #ifdef CONFIG_LINUX_AIO
@@ -484,8 +477,7 @@ static DriveInfo *blockdev_init(QemuOpts *all_opts,
            error_printf("\n");
            return NULL;
        }
-
-        drv = bdrv_find_whitelisted_format(buf, ro);
+        drv = bdrv_find_whitelisted_format(buf);
        if (!drv) {
            error_report("'%s' invalid format", buf);
            return NULL;
@@ -494,17 +486,17 @@ static DriveInfo *blockdev_init(QemuOpts *all_opts,

    /* disk I/O throttling */
    io_limits.bps[BLOCK_IO_LIMIT_TOTAL]  =
-        qemu_opt_get_number(opts, "throttling.bps-total", 0);
+                           qemu_opt_get_number(opts, "bps", 0);
    io_limits.bps[BLOCK_IO_LIMIT_READ]   =
-        qemu_opt_get_number(opts, "throttling.bps-read", 0);
+                           qemu_opt_get_number(opts, "bps_rd", 0);
    io_limits.bps[BLOCK_IO_LIMIT_WRITE]  =
-        qemu_opt_get_number(opts, "throttling.bps-write", 0);
+                           qemu_opt_get_number(opts, "bps_wr", 0);
    io_limits.iops[BLOCK_IO_LIMIT_TOTAL] =
-        qemu_opt_get_number(opts, "throttling.iops-total", 0);
+                           qemu_opt_get_number(opts, "iops", 0);
    io_limits.iops[BLOCK_IO_LIMIT_READ]  =
-        qemu_opt_get_number(opts, "throttling.iops-read", 0);
+                           qemu_opt_get_number(opts, "iops_rd", 0);
    io_limits.iops[BLOCK_IO_LIMIT_WRITE] =
-        qemu_opt_get_number(opts, "throttling.iops-write", 0);
+                           qemu_opt_get_number(opts, "iops_wr", 0);

    if (!do_check_io_limits(&io_limits, &error)) {
        error_report("%s", error_get_pretty(error));
@@ -666,7 +658,7 @@ static DriveInfo *blockdev_init(QemuOpts *all_opts,
        abort();
    }
    if (!file || !*file) {
-        if (has_driver_specific_opts) {
+        if (qdict_size(bs_opts)) {
            file = NULL;
        } else {
            return dinfo;
@@ -692,7 +684,7 @@ static DriveInfo *blockdev_init(QemuOpts *all_opts,
    } else if (ro == 1) {
        if (type != IF_SCSI && type != IF_VIRTIO && type != IF_FLOPPY &&
            type != IF_NONE && type != IF_PFLASH) {
-            error_report("read-only not supported by this bus type");
+            error_report("readonly not supported by this bus type");
            goto err;
        }
    }
@@ -700,17 +692,16 @@ static DriveInfo *blockdev_init(QemuOpts *all_opts,
    bdrv_flags |= ro ? 0 : BDRV_O_RDWR;

    if (ro && copy_on_read) {
-        error_report("warning: disabling copy_on_read on read-only drive");
+        error_report("warning: disabling copy_on_read on readonly drive");
    }

-    QINCREF(bs_opts);
    ret = bdrv_open(dinfo->bdrv, file, bs_opts, bdrv_flags, drv);
+    bs_opts = NULL;

    if (ret < 0) {
        if (ret == -EMEDIUMTYPE) {
            error_report("could not open disk image %s: not in %s format",
-                         file ?: dinfo->id, drv ? drv->format_name :
-                         qdict_get_str(bs_opts, "driver"));
+                         file ?: dinfo->id, drv->format_name);
        } else {
            error_report("could not open disk image %s: %s",
                         file ?: dinfo->id, strerror(-ret));
@@ -721,7 +712,6 @@ static DriveInfo *blockdev_init(QemuOpts *all_opts,
    if (bdrv_key_required(dinfo->bdrv))
        autostart = 0;

-    QDECREF(bs_opts);
    qemu_opts_del(opts);

    return dinfo;
@@ -736,80 +726,6 @@ err:
    return NULL;
 }

-static void qemu_opt_rename(QemuOpts *opts, const char *from, const char *to)
-{
-    const char *value;
-
-    value = qemu_opt_get(opts, from);
-    if (value) {
-        qemu_opt_set(opts, to, value);
-        qemu_opt_unset(opts, from);
-    }
-}
-
-DriveInfo *drive_init(QemuOpts *all_opts, BlockInterfaceType block_default_type)
-{
-    const char *value;
-
-    /*
-     * Check that only old options are used by copying into a QemuOpts with
-     * stricter checks. Going through a QDict seems to be the easiest way to
-     * achieve this...
-     */
-    QemuOpts* check_opts;
-    QDict *qdict;
-    Error *local_err = NULL;
-
-    qdict = qemu_opts_to_qdict(all_opts, NULL);
-    check_opts = qemu_opts_from_qdict(&qemu_old_drive_opts, qdict, &local_err);
-    QDECREF(qdict);
-
-    if (error_is_set(&local_err)) {
-        qerror_report_err(local_err);
-        error_free(local_err);
-        return NULL;
-    }
-    qemu_opts_del(check_opts);
-
-    /* Change legacy command line options into QMP ones */
-    qemu_opt_rename(all_opts, "iops", "throttling.iops-total");
-    qemu_opt_rename(all_opts, "iops_rd", "throttling.iops-read");
-    qemu_opt_rename(all_opts, "iops_wr", "throttling.iops-write");
-
-    qemu_opt_rename(all_opts, "bps", "throttling.bps-total");
-    qemu_opt_rename(all_opts, "bps_rd", "throttling.bps-read");
-    qemu_opt_rename(all_opts, "bps_wr", "throttling.bps-write");
-
-    qemu_opt_rename(all_opts, "readonly", "read-only");
-
-    value = qemu_opt_get(all_opts, "cache");
-    if (value) {
-        int flags = 0;
-
-        if (bdrv_parse_cache_flags(value, &flags) != 0) {
-            error_report("invalid cache option");
-            return NULL;
-        }
-
-        /* Specific options take precedence */
-        if (!qemu_opt_get(all_opts, "cache.writeback")) {
-            qemu_opt_set_bool(all_opts, "cache.writeback",
-                              !!(flags & BDRV_O_CACHE_WB));
-        }
-        if (!qemu_opt_get(all_opts, "cache.direct")) {
-            qemu_opt_set_bool(all_opts, "cache.direct",
-                              !!(flags & BDRV_O_NOCACHE));
-        }
-        if (!qemu_opt_get(all_opts, "cache.no-flush")) {
-            qemu_opt_set_bool(all_opts, "cache.no-flush",
-                              !!(flags & BDRV_O_NO_FLUSH));
-        }
-        qemu_opt_unset(all_opts, "cache");
-    }
-
-    return blockdev_init(all_opts, block_default_type);
-}
-
 void do_commit(Monitor *mon, const QDict *qdict)
 {
    const char *device = qdict_get_str(qdict, "device");
@@ -834,8 +750,8 @@ void do_commit(Monitor *mon, const QDict *qdict)

 static void blockdev_do_action(int kind, void *data, Error **errp)
 {
-    TransactionAction action;
-    TransactionActionList list;
+    BlockdevAction action;
+    BlockdevActionList list;

    action.kind = kind;
    action.data = data;
@@ -857,238 +773,31 @@ void qmp_blockdev_snapshot_sync(const char *device, const char *snapshot_file,
        .has_mode = has_mode,
        .mode = mode,
    };
-    blockdev_do_action(TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC,
-                       &snapshot, errp);
+    blockdev_do_action(BLOCKDEV_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC, &snapshot,
+                       errp);
 }


 /* New and old BlockDriverState structs for group snapshots */
-
-typedef struct BlkTransactionState BlkTransactionState;
-
-/* Only prepare() may fail. In a single transaction, only one of commit() or
-   abort() will be called, clean() will always be called if it present. */
-typedef struct BdrvActionOps {
-    /* Size of state struct, in bytes. */
-    size_t instance_size;
-    /* Prepare the work, must NOT be NULL. */
-    void (*prepare)(BlkTransactionState *common, Error **errp);
-    /* Commit the changes, can be NULL. */
-    void (*commit)(BlkTransactionState *common);
-    /* Abort the changes on fail, can be NULL. */
-    void (*abort)(BlkTransactionState *common);
-    /* Clean up resource in the end, can be NULL. */
-    void (*clean)(BlkTransactionState *common);
-} BdrvActionOps;
-
-/*
- * This structure must be arranged as first member in child type, assuming
- * that compiler will also arrange it to the same address with parent instance.
- * Later it will be used in free().
- */
-struct BlkTransactionState {
-    TransactionAction *action;
-    const BdrvActionOps *ops;
-    QSIMPLEQ_ENTRY(BlkTransactionState) entry;
-};
-
-/* external snapshot private data */
-typedef struct ExternalSnapshotState {
-    BlkTransactionState common;
+typedef struct BlkTransactionStates {
    BlockDriverState *old_bs;
    BlockDriverState *new_bs;
-} ExternalSnapshotState;
-
-static void external_snapshot_prepare(BlkTransactionState *common,
-                                      Error **errp)
-{
-    BlockDriver *drv;
-    int flags, ret;
-    Error *local_err = NULL;
-    const char *device;
-    const char *new_image_file;
-    const char *format = "qcow2";
-    enum NewImageMode mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS;
-    ExternalSnapshotState *state =
-                             DO_UPCAST(ExternalSnapshotState, common, common);
-    TransactionAction *action = common->action;
-
-    /* get parameters */
-    g_assert(action->kind == TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC);
-
-    device = action->blockdev_snapshot_sync->device;
-    new_image_file = action->blockdev_snapshot_sync->snapshot_file;
-    if (action->blockdev_snapshot_sync->has_format) {
-        format = action->blockdev_snapshot_sync->format;
-    }
-    if (action->blockdev_snapshot_sync->has_mode) {
-        mode = action->blockdev_snapshot_sync->mode;
-    }
-
-    /* start processing */
-    drv = bdrv_find_format(format);
-    if (!drv) {
-        error_set(errp, QERR_INVALID_BLOCK_FORMAT, format);
-        return;
-    }
-
-    state->old_bs = bdrv_find(device);
-    if (!state->old_bs) {
-        error_set(errp, QERR_DEVICE_NOT_FOUND, device);
-        return;
-    }
-
-    if (!bdrv_is_inserted(state->old_bs)) {
-        error_set(errp, QERR_DEVICE_HAS_NO_MEDIUM, device);
-        return;
-    }
-
-    if (bdrv_in_use(state->old_bs)) {
-        error_set(errp, QERR_DEVICE_IN_USE, device);
-        return;
-    }
-
-    if (!bdrv_is_read_only(state->old_bs)) {
-        if (bdrv_flush(state->old_bs)) {
-            error_set(errp, QERR_IO_ERROR);
-            return;
-        }
-    }
-
-    flags = state->old_bs->open_flags;
-
-    /* create new image w/backing file */
-    if (mode != NEW_IMAGE_MODE_EXISTING) {
-        bdrv_img_create(new_image_file, format,
-                        state->old_bs->filename,
-                        state->old_bs->drv->format_name,
-                        NULL, -1, flags, &local_err, false);
-        if (error_is_set(&local_err)) {
-            error_propagate(errp, local_err);
-            return;
-        }
-    }
-
-    /* We will manually add the backing_hd field to the bs later */
-    state->new_bs = bdrv_new("");
-    /* TODO Inherit bs->options or only take explicit options with an
-     * extended QMP command? */
-    ret = bdrv_open(state->new_bs, new_image_file, NULL,
-                    flags | BDRV_O_NO_BACKING, drv);
-    if (ret != 0) {
-        error_setg_file_open(errp, -ret, new_image_file);
-    }
-}
-
-static void external_snapshot_commit(BlkTransactionState *common)
-{
-    ExternalSnapshotState *state =
-                             DO_UPCAST(ExternalSnapshotState, common, common);
-
-    /* This removes our old bs and adds the new bs */
-    bdrv_append(state->new_bs, state->old_bs);
-    /* We don't need (or want) to use the transactional
-     * bdrv_reopen_multiple() across all the entries at once, because we
-     * don't want to abort all of them if one of them fails the reopen */
-    bdrv_reopen(state->new_bs, state->new_bs->open_flags & ~BDRV_O_RDWR,
-                NULL);
-}
-
-static void external_snapshot_abort(BlkTransactionState *common)
-{
-    ExternalSnapshotState *state =
-                             DO_UPCAST(ExternalSnapshotState, common, common);
-    if (state->new_bs) {
-        bdrv_delete(state->new_bs);
-    }
-}
-
-typedef struct DriveBackupState {
-    BlkTransactionState common;
-    BlockDriverState *bs;
-    BlockJob *job;
-} DriveBackupState;
-
-static void drive_backup_prepare(BlkTransactionState *common, Error **errp)
-{
-    DriveBackupState *state = DO_UPCAST(DriveBackupState, common, common);
-    DriveBackup *backup;
-    Error *local_err = NULL;
-
-    assert(common->action->kind == TRANSACTION_ACTION_KIND_DRIVE_BACKUP);
-    backup = common->action->drive_backup;
-
-    qmp_drive_backup(backup->device, backup->target,
-                     backup->has_format, backup->format,
-                     backup->sync,
-                     backup->has_mode, backup->mode,
-                     backup->has_speed, backup->speed,
-                     backup->has_on_source_error, backup->on_source_error,
-                     backup->has_on_target_error, backup->on_target_error,
-                     &local_err);
-    if (error_is_set(&local_err)) {
-        error_propagate(errp, local_err);
-        state->bs = NULL;
-        state->job = NULL;
-        return;
-    }
-
-    state->bs = bdrv_find(backup->device);
-    state->job = state->bs->job;
-}
-
-static void drive_backup_abort(BlkTransactionState *common)
-{
-    DriveBackupState *state = DO_UPCAST(DriveBackupState, common, common);
-    BlockDriverState *bs = state->bs;
-
-    /* Only cancel if it's the job we started */
-    if (bs && bs->job && bs->job == state->job) {
-        block_job_cancel_sync(bs->job);
-    }
-}
-
-static void abort_prepare(BlkTransactionState *common, Error **errp)
-{
-    error_setg(errp, "Transaction aborted using Abort action");
-}
-
-static void abort_commit(BlkTransactionState *common)
-{
-    g_assert_not_reached(); /* this action never succeeds */
-}
-
-static const BdrvActionOps actions[] = {
-    [TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC] = {
-        .instance_size = sizeof(ExternalSnapshotState),
-        .prepare  = external_snapshot_prepare,
-        .commit   = external_snapshot_commit,
-        .abort = external_snapshot_abort,
-    },
-    [TRANSACTION_ACTION_KIND_DRIVE_BACKUP] = {
-        .instance_size = sizeof(DriveBackupState),
-        .prepare = drive_backup_prepare,
-        .abort = drive_backup_abort,
-    },
-    [TRANSACTION_ACTION_KIND_ABORT] = {
-        .instance_size = sizeof(BlkTransactionState),
-        .prepare = abort_prepare,
-        .commit = abort_commit,
-    },
-};
+    QSIMPLEQ_ENTRY(BlkTransactionStates) entry;
+} BlkTransactionStates;

 /*
 * 'Atomic' group snapshots.  The snapshots are taken as a set, and if any fail
 *  then we do not pivot any of the devices in the group, and abandon the
 *  snapshots
 */
-void qmp_transaction(TransactionActionList *dev_list, Error **errp)
+void qmp_transaction(BlockdevActionList *dev_list, Error **errp)
 {
-    TransactionActionList *dev_entry = dev_list;
-    BlkTransactionState *state, *next;
+    int ret = 0;
+    BlockdevActionList *dev_entry = dev_list;
+    BlkTransactionStates *states, *next;
    Error *local_err = NULL;

-    QSIMPLEQ_HEAD(snap_bdrv_states, BlkTransactionState) snap_bdrv_states;
+    QSIMPLEQ_HEAD(snap_bdrv_states, BlkTransactionStates) snap_bdrv_states;
    QSIMPLEQ_INIT(&snap_bdrv_states);

    /* drain all i/o before any snapshots */
@@ -1096,31 +805,109 @@ void qmp_transaction(TransactionActionList *dev_list, Error **errp)

    /* We don't do anything in this loop that commits us to the snapshot */
    while (NULL != dev_entry) {
-        TransactionAction *dev_info = NULL;
-        const BdrvActionOps *ops;
+        BlockdevAction *dev_info = NULL;
+        BlockDriver *proto_drv;
+        BlockDriver *drv;
+        int flags;
+        enum NewImageMode mode;
+        const char *new_image_file;
+        const char *device;
+        const char *format = "qcow2";

        dev_info = dev_entry->value;
        dev_entry = dev_entry->next;

-        assert(dev_info->kind < ARRAY_SIZE(actions));
+        states = g_malloc0(sizeof(BlkTransactionStates));
+        QSIMPLEQ_INSERT_TAIL(&snap_bdrv_states, states, entry);

-        ops = &actions[dev_info->kind];
-        state = g_malloc0(ops->instance_size);
-        state->ops = ops;
-        state->action = dev_info;
-        QSIMPLEQ_INSERT_TAIL(&snap_bdrv_states, state, entry);
+        switch (dev_info->kind) {
+        case BLOCKDEV_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC:
+            device = dev_info->blockdev_snapshot_sync->device;
+            if (!dev_info->blockdev_snapshot_sync->has_mode) {
+                dev_info->blockdev_snapshot_sync->mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS;
+            }
+            new_image_file = dev_info->blockdev_snapshot_sync->snapshot_file;
+            if (dev_info->blockdev_snapshot_sync->has_format) {
+                format = dev_info->blockdev_snapshot_sync->format;
+            }
+            mode = dev_info->blockdev_snapshot_sync->mode;
+            break;
+        default:
+            abort();
+        }

-        state->ops->prepare(state, &local_err);
-        if (error_is_set(&local_err)) {
-            error_propagate(errp, local_err);
+        drv = bdrv_find_format(format);
+        if (!drv) {
+            error_set(errp, QERR_INVALID_BLOCK_FORMAT, format);
+            goto delete_and_fail;
+        }
+
+        states->old_bs = bdrv_find(device);
+        if (!states->old_bs) {
+            error_set(errp, QERR_DEVICE_NOT_FOUND, device);
+            goto delete_and_fail;
+        }
+
+        if (!bdrv_is_inserted(states->old_bs)) {
+            error_set(errp, QERR_DEVICE_HAS_NO_MEDIUM, device);
+            goto delete_and_fail;
+        }
+
+        if (bdrv_in_use(states->old_bs)) {
+            error_set(errp, QERR_DEVICE_IN_USE, device);
+            goto delete_and_fail;
+        }
+
+        if (!bdrv_is_read_only(states->old_bs)) {
+            if (bdrv_flush(states->old_bs)) {
+                error_set(errp, QERR_IO_ERROR);
+                goto delete_and_fail;
+            }
+        }
+
+        flags = states->old_bs->open_flags;
+
+        proto_drv = bdrv_find_protocol(new_image_file);
+        if (!proto_drv) {
+            error_set(errp, QERR_INVALID_BLOCK_FORMAT, format);
+            goto delete_and_fail;
+        }
+
+        /* create new image w/backing file */
+        if (mode != NEW_IMAGE_MODE_EXISTING) {
+            bdrv_img_create(new_image_file, format,
+                            states->old_bs->filename,
+                            states->old_bs->drv->format_name,
+                            NULL, -1, flags, &local_err, false);
+            if (error_is_set(&local_err)) {
+                error_propagate(errp, local_err);
+                goto delete_and_fail;
+            }
+        }
+
+        /* We will manually add the backing_hd field to the bs later */
+        states->new_bs = bdrv_new("");
+        /* TODO Inherit bs->options or only take explicit options with an
+         * extended QMP command? */
+        ret = bdrv_open(states->new_bs, new_image_file, NULL,
+                        flags | BDRV_O_NO_BACKING, drv);
+        if (ret != 0) {
+            error_set(errp, QERR_OPEN_FILE_FAILED, new_image_file);
            goto delete_and_fail;
        }
    }

-    QSIMPLEQ_FOREACH(state, &snap_bdrv_states, entry) {
-        if (state->ops->commit) {
-            state->ops->commit(state);
-        }
+
+    /* Now we are going to do the actual pivot.  Everything up to this point
+     * is reversible, but we are committed at this point */
+    QSIMPLEQ_FOREACH(states, &snap_bdrv_states, entry) {
+        /* This removes our old bs from the bdrv_states, and adds the new bs */
+        bdrv_append(states->new_bs, states->old_bs);
+        /* We don't need (or want) to use the transactional
+         * bdrv_reopen_multiple() across all the entries at once, because we
+         * don't want to abort all of them if one of them fails the reopen */
+        bdrv_reopen(states->new_bs, states->new_bs->open_flags & ~BDRV_O_RDWR,
+                    NULL);
    }

    /* success */
@@ -1131,17 +918,14 @@ delete_and_fail:
    * failure, and it is all-or-none; abandon each new bs, and keep using
    * the original bs for all images
    */
-    QSIMPLEQ_FOREACH(state, &snap_bdrv_states, entry) {
-        if (state->ops->abort) {
-            state->ops->abort(state);
+    QSIMPLEQ_FOREACH(states, &snap_bdrv_states, entry) {
+        if (states->new_bs) {
+             bdrv_delete(states->new_bs);
        }
    }
 exit:
-    QSIMPLEQ_FOREACH_SAFE(state, &snap_bdrv_states, entry, next) {
-        if (state->ops->clean) {
-            state->ops->clean(state);
-        }
-        g_free(state);
+    QSIMPLEQ_FOREACH_SAFE(states, &snap_bdrv_states, entry, next) {
+        g_free(states);
    }
 }

@@ -1206,11 +990,8 @@ static void qmp_bdrv_open_encrypted(BlockDriverState *bs, const char *filename,
                                    int bdrv_flags, BlockDriver *drv,
                                    const char *password, Error **errp)
 {
-    int ret;
-
-    ret = bdrv_open(bs, filename, NULL, bdrv_flags, drv);
-    if (ret < 0) {
-        error_setg_file_open(errp, -ret, filename);
+    if (bdrv_open(bs, filename, NULL, bdrv_flags, drv) < 0) {
+        error_set(errp, QERR_OPEN_FILE_FAILED, filename);
        return;
    }

@@ -1243,7 +1024,7 @@ void qmp_change_blockdev(const char *device, const char *filename,
    }

    if (format) {
-        drv = bdrv_find_whitelisted_format(format, bs->read_only);
+        drv = bdrv_find_whitelisted_format(format);
        if (!drv) {
            error_set(errp, QERR_INVALID_BLOCK_FORMAT, format);
            return;
@@ -1504,123 +1285,6 @@ void qmp_block_commit(const char *device,
    drive_get_ref(drive_get_by_blockdev(bs));
 }

-void qmp_drive_backup(const char *device, const char *target,
-                      bool has_format, const char *format,
-                      enum MirrorSyncMode sync,
-                      bool has_mode, enum NewImageMode mode,
-                      bool has_speed, int64_t speed,
-                      bool has_on_source_error, BlockdevOnError on_source_error,
-                      bool has_on_target_error, BlockdevOnError on_target_error,
-                      Error **errp)
-{
-    BlockDriverState *bs;
-    BlockDriverState *target_bs;
-    BlockDriverState *source = NULL;
-    BlockDriver *drv = NULL;
-    Error *local_err = NULL;
-    int flags;
-    int64_t size;
-    int ret;
-
-    if (!has_speed) {
-        speed = 0;
-    }
-    if (!has_on_source_error) {
-        on_source_error = BLOCKDEV_ON_ERROR_REPORT;
-    }
-    if (!has_on_target_error) {
-        on_target_error = BLOCKDEV_ON_ERROR_REPORT;
-    }
-    if (!has_mode) {
-        mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS;
-    }
-
-    bs = bdrv_find(device);
-    if (!bs) {
-        error_set(errp, QERR_DEVICE_NOT_FOUND, device);
-        return;
-    }
-
-    if (!bdrv_is_inserted(bs)) {
-        error_set(errp, QERR_DEVICE_HAS_NO_MEDIUM, device);
-        return;
-    }
-
-    if (!has_format) {
-        format = mode == NEW_IMAGE_MODE_EXISTING ? NULL : bs->drv->format_name;
-    }
-    if (format) {
-        drv = bdrv_find_format(format);
-        if (!drv) {
-            error_set(errp, QERR_INVALID_BLOCK_FORMAT, format);
-            return;
-        }
-    }
-
-    if (bdrv_in_use(bs)) {
-        error_set(errp, QERR_DEVICE_IN_USE, device);
-        return;
-    }
-
-    flags = bs->open_flags | BDRV_O_RDWR;
-
-    /* See if we have a backing HD we can use to create our new image
-     * on top of. */
-    if (sync == MIRROR_SYNC_MODE_TOP) {
-        source = bs->backing_hd;
-        if (!source) {
-            sync = MIRROR_SYNC_MODE_FULL;
-        }
-    }
-    if (sync == MIRROR_SYNC_MODE_NONE) {
-        source = bs;
-    }
-
-    size = bdrv_getlength(bs);
-    if (size < 0) {
-        error_setg_errno(errp, -size, "bdrv_getlength failed");
-        return;
-    }
-
-    if (mode != NEW_IMAGE_MODE_EXISTING) {
-        assert(format && drv);
-        if (source) {
-            bdrv_img_create(target, format, source->filename,
-                            source->drv->format_name, NULL,
-                            size, flags, &local_err, false);
-        } else {
-            bdrv_img_create(target, format, NULL, NULL, NULL,
-                            size, flags, &local_err, false);
-        }
-    }
-
-    if (error_is_set(&local_err)) {
-        error_propagate(errp, local_err);
-        return;
-    }
-
-    target_bs = bdrv_new("");
-    ret = bdrv_open(target_bs, target, NULL, flags, drv);
-    if (ret < 0) {
-        bdrv_delete(target_bs);
-        error_setg_file_open(errp, -ret, target);
-        return;
-    }
-
-    backup_start(bs, target_bs, speed, sync, on_source_error, on_target_error,
-                 block_job_cb, bs, &local_err);
-    if (local_err != NULL) {
-        bdrv_delete(target_bs);
-        error_propagate(errp, local_err);
-        return;
-    }
-
-    /* Grab a reference so hotplug does not delete the BlockDriverState from
-     * underneath us.
-     */
-    drive_get_ref(drive_get_by_blockdev(bs));
-}
-
 #define DEFAULT_MIRROR_BUF_SIZE   (10 << 20)

 void qmp_drive_mirror(const char *device, const char *target,
@@ -1636,10 +1300,11 @@ void qmp_drive_mirror(const char *device, const char *target,
 {
    BlockDriverState *bs;
    BlockDriverState *source, *target_bs;
+    BlockDriver *proto_drv;
    BlockDriver *drv = NULL;
    Error *local_err = NULL;
    int flags;
-    int64_t size;
+    uint64_t size;
    int ret;

    if (!has_speed) {
@@ -1703,12 +1368,14 @@ void qmp_drive_mirror(const char *device, const char *target,
        sync = MIRROR_SYNC_MODE_FULL;
    }

-    size = bdrv_getlength(bs);
-    if (size < 0) {
-        error_setg_errno(errp, -size, "bdrv_getlength failed");
+    proto_drv = bdrv_find_protocol(target);
+    if (!proto_drv) {
+        error_set(errp, QERR_INVALID_BLOCK_FORMAT, format);
        return;
    }

+    bdrv_get_geometry(bs, &size);
+    size *= 512;
    if (sync == MIRROR_SYNC_MODE_FULL && mode != NEW_IMAGE_MODE_EXISTING) {
        /* create new image w/o backing file */
        assert(format && drv);
@@ -1741,9 +1408,10 @@ void qmp_drive_mirror(const char *device, const char *target,
     */
    target_bs = bdrv_new("");
    ret = bdrv_open(target_bs, target, NULL, flags | BDRV_O_NO_BACKING, drv);
+
    if (ret < 0) {
        bdrv_delete(target_bs);
-        error_setg_file_open(errp, -ret, target);
+        error_set(errp, QERR_OPEN_FILE_FAILED, target);
        return;
    }

@@ -1921,17 +1589,10 @@ QemuOptsList qemu_common_drive_opts = {
            .type = QEMU_OPT_STRING,
            .help = "discard operation (ignore/off, unmap/on)",
        },{
-            .name = "cache.writeback",
-            .type = QEMU_OPT_BOOL,
-            .help = "enables writeback mode for any caches",
-        },{
-            .name = "cache.direct",
-            .type = QEMU_OPT_BOOL,
-            .help = "enables use of O_DIRECT (bypass the host page cache)",
-        },{
-            .name = "cache.no-flush",
-            .type = QEMU_OPT_BOOL,
-            .help = "ignore any flush requests for the device",
+            .name = "cache",
+            .type = QEMU_OPT_STRING,
+            .help = "host cache usage (none, writeback, writethrough, "
+                    "directsync, unsafe)",
        },{
            .name = "aio",
            .type = QEMU_OPT_STRING,
@@ -1957,31 +1618,31 @@ QemuOptsList qemu_common_drive_opts = {
            .type = QEMU_OPT_STRING,
            .help = "pci address (virtio only)",
        },{
-            .name = "read-only",
+            .name = "readonly",
            .type = QEMU_OPT_BOOL,
            .help = "open drive file as read-only",
        },{
-            .name = "throttling.iops-total",
+            .name = "iops",
            .type = QEMU_OPT_NUMBER,
            .help = "limit total I/O operations per second",
        },{
-            .name = "throttling.iops-read",
+            .name = "iops_rd",
            .type = QEMU_OPT_NUMBER,
            .help = "limit read operations per second",
        },{
-            .name = "throttling.iops-write",
+            .name = "iops_wr",
            .type = QEMU_OPT_NUMBER,
            .help = "limit write operations per second",
        },{
-            .name = "throttling.bps-total",
+            .name = "bps",
            .type = QEMU_OPT_NUMBER,
            .help = "limit total bytes per second",
        },{
-            .name = "throttling.bps-read",
+            .name = "bps_rd",
            .type = QEMU_OPT_NUMBER,
            .help = "limit read bytes per second",
        },{
-            .name = "throttling.bps-write",
+            .name = "bps_wr",
            .type = QEMU_OPT_NUMBER,
            .help = "limit write bytes per second",
        },{
@@ -1997,9 +1658,9 @@ QemuOptsList qemu_common_drive_opts = {
    },
 };

-QemuOptsList qemu_old_drive_opts = {
+QemuOptsList qemu_drive_opts = {
    .name = "drive",
-    .head = QTAILQ_HEAD_INITIALIZER(qemu_old_drive_opts.head),
+    .head = QTAILQ_HEAD_INITIALIZER(qemu_drive_opts.head),
    .desc = {
        {
            .name = "bus",
@@ -2118,15 +1779,3 @@ QemuOptsList qemu_old_drive_opts = {
        { /* end of list */ }
    },
 };
-
-QemuOptsList qemu_drive_opts = {
-    .name = "drive",
-    .head = QTAILQ_HEAD_INITIALIZER(qemu_drive_opts.head),
-    .desc = {
-        /*
-         * no elements => accept any params
-         * validation will happen later
-         */
-        { /* end of list */ }
-    },
-};
--- a/bsd-user/elfload.c
+++ b/bsd-user/elfload.c
@@ -98,7 +98,7 @@ enum {
 static const char *get_elf_platform(void)
 {
    static char elf_platform[] = "i386";
-    int family = object_property_get_int(OBJECT(thread_cpu), "family", NULL);
+    int family = (thread_env->cpuid_version >> 8) & 0xff;
    if (family > 6)
        family = 6;
    if (family >= 3)
@@ -110,9 +110,7 @@ static const char *get_elf_platform(void)

 static uint32_t get_elf_hwcap(void)
 {
-    X86CPU *cpu = X86_CPU(thread_cpu);
-
-    return cpu->env.features[FEAT_1_EDX];
+    return thread_env->features[FEAT_1_EDX];
 }

 #ifdef TARGET_X86_64
--- a/bsd-user/main.c
+++ b/bsd-user/main.c
@@ -92,7 +92,7 @@ void fork_start(void)
 void fork_end(int child)
 {
    if (child) {
-        gdbserver_fork((CPUArchState *)thread_cpu->env_ptr);
+        gdbserver_fork(thread_env);
    }
 }

@@ -511,7 +511,6 @@ static void flush_windows(CPUSPARCState *env)

 void cpu_loop(CPUSPARCState *env)
 {
-    CPUState *cs = CPU(sparc_env_get_cpu(env));
    int trapnr, ret, syscall_nr;
    //target_siginfo_t info;

@@ -643,7 +642,7 @@ void cpu_loop(CPUSPARCState *env)
            {
                int sig;

-                sig = gdb_handlesig(cs, TARGET_SIGTRAP);
+                sig = gdb_handlesig (env, TARGET_SIGTRAP);
 #if 0
                if (sig)
                  {
@@ -660,7 +659,7 @@ void cpu_loop(CPUSPARCState *env)
        badtrap:
 #endif
            printf ("Unhandled trap: 0x%x\n", trapnr);
-            cpu_dump_state(cs, stderr, fprintf, 0);
+            cpu_dump_state(env, stderr, fprintf, 0);
            exit (1);
        }
        process_pending_signals (env);
@@ -671,8 +670,8 @@ void cpu_loop(CPUSPARCState *env)

 static void usage(void)
 {
-    printf("qemu-" TARGET_NAME " version " QEMU_VERSION ", Copyright (c) 2003-2008 Fabrice Bellard\n"
-           "usage: qemu-" TARGET_NAME " [options] program [arguments...]\n"
+    printf("qemu-" TARGET_ARCH " version " QEMU_VERSION ", Copyright (c) 2003-2008 Fabrice Bellard\n"
+           "usage: qemu-" TARGET_ARCH " [options] program [arguments...]\n"
           "BSD CPU emulator (compiled for %s emulation)\n"
           "\n"
           "Standard options:\n"
@@ -707,13 +706,13 @@ static void usage(void)
           "Note that if you provide several changes to single variable\n"
           "last change will stay in effect.\n"
           ,
-           TARGET_NAME,
+           TARGET_ARCH,
           interp_prefix,
           x86_stack_size);
    exit(1);
 }

-THREAD CPUState *thread_cpu;
+THREAD CPUArchState *thread_env;

 /* Assumes contents are already zeroed.  */
 void init_task_state(TaskState *ts)
@@ -738,7 +737,6 @@ int main(int argc, char **argv)
    struct image_info info1, *info = &info1;
    TaskState ts1, *ts = &ts1;
    CPUArchState *env;
-    CPUState *cpu;
    int optind;
    const char *r;
    int gdbstub_port = 0;
@@ -913,11 +911,10 @@ int main(int argc, char **argv)
        fprintf(stderr, "Unable to find CPU definition\n");
        exit(1);
    }
-    cpu = ENV_GET_CPU(env);
 #if defined(TARGET_SPARC) || defined(TARGET_PPC)
-    cpu_reset(cpu);
+    cpu_reset(ENV_GET_CPU(env));
 #endif
-    thread_cpu = cpu;
+    thread_env = env;

    if (getenv("QEMU_STRACE")) {
        do_strace = 1;
@@ -1136,7 +1133,7 @@ int main(int argc, char **argv)

    if (gdbstub_port) {
        gdbserver_start (gdbstub_port);
-        gdb_handlesig(cpu, 0);
+        gdb_handlesig(env, 0);
    }
    cpu_loop(env);
    /* never exits */
--- a/bsd-user/qemu.h
+++ b/bsd-user/qemu.h
@@ -139,7 +139,7 @@ abi_long do_openbsd_syscall(void *cpu_env, int num, abi_long arg1,
                            abi_long arg2, abi_long arg3, abi_long arg4,
                            abi_long arg5, abi_long arg6);
 void gemu_log(const char *fmt, ...) GCC_FMT_ATTR(1, 2);
-extern THREAD CPUState *thread_cpu;
+extern THREAD CPUArchState *thread_env;
 void cpu_loop(CPUArchState *env);
 char *target_strerror(int err);
 int get_osversion(void);
--- a/cmd.c
+++ b/cmd.c
@@ -0,0 +1,612 @@
+/*
+ * Copyright (c) 2003-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <errno.h>
+#include <sys/time.h>
+#include <getopt.h>
+
+#include "cmd.h"
+#include "block/aio.h"
+#include "qemu/main-loop.h"
+
+#define _(x)	x	/* not gettext support yet */
+
+/* from libxcmd/command.c */
+
+cmdinfo_t	*cmdtab;
+int		ncmds;
+
+static argsfunc_t	args_func;
+static checkfunc_t	check_func;
+static int		ncmdline;
+static char		**cmdline;
+
+static int
+compare(const void *a, const void *b)
+{
+	return strcmp(((const cmdinfo_t *)a)->name,
+		      ((const cmdinfo_t *)b)->name);
+}
+
+void add_command(const cmdinfo_t *ci)
+{
+    cmdtab = g_realloc((void *)cmdtab, ++ncmds * sizeof(*cmdtab));
+    cmdtab[ncmds - 1] = *ci;
+    qsort(cmdtab, ncmds, sizeof(*cmdtab), compare);
+}
+
+static int
+check_command(
+	const cmdinfo_t	*ci)
+{
+	if (check_func)
+		return check_func(ci);
+	return 1;
+}
+
+void
+add_check_command(
+	checkfunc_t	cf)
+{
+	check_func = cf;
+}
+
+int
+command_usage(
+	const cmdinfo_t *ci)
+{
+	printf("%s %s -- %s\n", ci->name, ci->args, ci->oneline);
+	return 0;
+}
+
+int
+command(
+	const cmdinfo_t	*ct,
+	int		argc,
+	char		**argv)
+{
+	char		*cmd = argv[0];
+
+	if (!check_command(ct))
+		return 0;
+
+	if (argc-1 < ct->argmin || (ct->argmax != -1 && argc-1 > ct->argmax)) {
+		if (ct->argmax == -1)
+			fprintf(stderr,
+	_("bad argument count %d to %s, expected at least %d arguments\n"),
+				argc-1, cmd, ct->argmin);
+		else if (ct->argmin == ct->argmax)
+			fprintf(stderr,
+	_("bad argument count %d to %s, expected %d arguments\n"),
+				argc-1, cmd, ct->argmin);
+		else
+			fprintf(stderr,
+	_("bad argument count %d to %s, expected between %d and %d arguments\n"),
+			argc-1, cmd, ct->argmin, ct->argmax);
+		return 0;
+	}
+	optind = 0;
+	return ct->cfunc(argc, argv);
+}
+
+const cmdinfo_t *
+find_command(
+	const char	*cmd)
+{
+	cmdinfo_t	*ct;
+
+	for (ct = cmdtab; ct < &cmdtab[ncmds]; ct++) {
+		if (strcmp(ct->name, cmd) == 0 ||
+		    (ct->altname && strcmp(ct->altname, cmd) == 0))
+			return (const cmdinfo_t *)ct;
+	}
+	return NULL;
+}
+
+void add_user_command(char *optarg)
+{
+    cmdline = g_realloc(cmdline, ++ncmdline * sizeof(char *));
+    cmdline[ncmdline-1] = optarg;
+}
+
+static int
+args_command(
+	int	index)
+{
+	if (args_func)
+		return args_func(index);
+	return 0;
+}
+
+void
+add_args_command(
+	argsfunc_t	af)
+{
+	args_func = af;
+}
+
+static void prep_fetchline(void *opaque)
+{
+    int *fetchable = opaque;
+
+    qemu_set_fd_handler(STDIN_FILENO, NULL, NULL, NULL);
+    *fetchable= 1;
+}
+
+static char *get_prompt(void);
+
+void command_loop(void)
+{
+    int c, i, j = 0, done = 0, fetchable = 0, prompted = 0;
+    char *input;
+    char **v;
+    const cmdinfo_t *ct;
+
+    for (i = 0; !done && i < ncmdline; i++) {
+        input = strdup(cmdline[i]);
+        if (!input) {
+            fprintf(stderr, _("cannot strdup command '%s': %s\n"),
+                    cmdline[i], strerror(errno));
+            exit(1);
+        }
+        v = breakline(input, &c);
+        if (c) {
+            ct = find_command(v[0]);
+            if (ct) {
+                if (ct->flags & CMD_FLAG_GLOBAL) {
+                    done = command(ct, c, v);
+                } else {
+                    j = 0;
+                    while (!done && (j = args_command(j))) {
+                        done = command(ct, c, v);
+                    }
+                }
+            } else {
+                fprintf(stderr, _("command \"%s\" not found\n"), v[0]);
+            }
+	}
+        doneline(input, v);
+    }
+    if (cmdline) {
+        g_free(cmdline);
+        return;
+    }
+
+    while (!done) {
+        if (!prompted) {
+            printf("%s", get_prompt());
+            fflush(stdout);
+            qemu_set_fd_handler(STDIN_FILENO, prep_fetchline, NULL, &fetchable);
+            prompted = 1;
+        }
+
+        main_loop_wait(false);
+
+        if (!fetchable) {
+            continue;
+        }
+        input = fetchline();
+        if (input == NULL) {
+            break;
+        }
+        v = breakline(input, &c);
+        if (c) {
+            ct = find_command(v[0]);
+            if (ct) {
+                done = command(ct, c, v);
+            } else {
+                fprintf(stderr, _("command \"%s\" not found\n"), v[0]);
+            }
+        }
+        doneline(input, v);
+
+        prompted = 0;
+        fetchable = 0;
+    }
+    qemu_set_fd_handler(STDIN_FILENO, NULL, NULL, NULL);
+}
+
+/* from libxcmd/input.c */
+
+#if defined(ENABLE_READLINE)
+# include <readline/history.h>
+# include <readline/readline.h>
+#elif defined(ENABLE_EDITLINE)
+# include <histedit.h>
+#endif
+
+static char *
+get_prompt(void)
+{
+	static char	prompt[FILENAME_MAX + 2 /*"> "*/ + 1 /*"\0"*/ ];
+
+	if (!prompt[0])
+		snprintf(prompt, sizeof(prompt), "%s> ", progname);
+	return prompt;
+}
+
+#if defined(ENABLE_READLINE)
+char *
+fetchline(void)
+{
+	char	*line;
+
+	line = readline(get_prompt());
+	if (line && *line)
+		add_history(line);
+	return line;
+}
+#elif defined(ENABLE_EDITLINE)
+static char *el_get_prompt(EditLine *e) { return get_prompt(); }
+char *
+fetchline(void)
+{
+	static EditLine	*el;
+	static History	*hist;
+	HistEvent	hevent;
+	char		*line;
+	int		count;
+
+	if (!el) {
+		hist = history_init();
+		history(hist, &hevent, H_SETSIZE, 100);
+		el = el_init(progname, stdin, stdout, stderr);
+		el_source(el, NULL);
+		el_set(el, EL_SIGNAL, 1);
+		el_set(el, EL_PROMPT, el_get_prompt);
+		el_set(el, EL_HIST, history, (const char *)hist);
+	}
+	line = strdup(el_gets(el, &count));
+	if (line) {
+		if (count > 0)
+			line[count-1] = '\0';
+		if (*line)
+			history(hist, &hevent, H_ENTER, line);
+	}
+	return line;
+}
+#else
+# define MAXREADLINESZ	1024
+char *
+fetchline(void)
+{
+	char	*p, *line = malloc(MAXREADLINESZ);
+
+	if (!line)
+		return NULL;
+	if (!fgets(line, MAXREADLINESZ, stdin)) {
+		free(line);
+		return NULL;
+	}
+	p = line + strlen(line);
+	if (p != line && p[-1] == '\n')
+		p[-1] = '\0';
+	return line;
+}
+#endif
+
+static char *qemu_strsep(char **input, const char *delim)
+{
+    char *result = *input;
+    if (result != NULL) {
+        char *p;
+
+        for (p = result; *p != '\0'; p++) {
+            if (strchr(delim, *p)) {
+                break;
+            }
+        }
+        if (*p == '\0') {
+            *input = NULL;
+        } else {
+            *p = '\0';
+            *input = p + 1;
+        }
+    }
+    return result;
+}
+
+char **breakline(char *input, int *count)
+{
+    int c = 0;
+    char *p;
+    char **rval = calloc(sizeof(char *), 1);
+    char **tmp;
+
+    while (rval && (p = qemu_strsep(&input, " ")) != NULL) {
+        if (!*p) {
+            continue;
+        }
+        c++;
+        tmp = realloc(rval, sizeof(*rval) * (c + 1));
+        if (!tmp) {
+            free(rval);
+            rval = NULL;
+            c = 0;
+            break;
+        } else {
+            rval = tmp;
+        }
+        rval[c - 1] = p;
+        rval[c] = NULL;
+    }
+    *count = c;
+    return rval;
+}
+
+void
+doneline(
+	char	*input,
+	char	**vec)
+{
+	free(input);
+	free(vec);
+}
+
+#define EXABYTES(x)	((long long)(x) << 60)
+#define PETABYTES(x)	((long long)(x) << 50)
+#define TERABYTES(x)	((long long)(x) << 40)
+#define GIGABYTES(x)	((long long)(x) << 30)
+#define MEGABYTES(x)	((long long)(x) << 20)
+#define KILOBYTES(x)	((long long)(x) << 10)
+
+long long
+cvtnum(
+	char		*s)
+{
+	long long	i;
+	char		*sp;
+	int		c;
+
+	i = strtoll(s, &sp, 0);
+	if (i == 0 && sp == s)
+		return -1LL;
+	if (*sp == '\0')
+		return i;
+
+	if (sp[1] != '\0')
+		return -1LL;
+
+	c = qemu_tolower(*sp);
+	switch (c) {
+	default:
+		return i;
+	case 'k':
+		return KILOBYTES(i);
+	case 'm':
+		return MEGABYTES(i);
+	case 'g':
+		return GIGABYTES(i);
+	case 't':
+		return TERABYTES(i);
+	case 'p':
+		return PETABYTES(i);
+	case 'e':
+		return  EXABYTES(i);
+	}
+	return -1LL;
+}
+
+#define TO_EXABYTES(x)	((x) / EXABYTES(1))
+#define TO_PETABYTES(x)	((x) / PETABYTES(1))
+#define TO_TERABYTES(x)	((x) / TERABYTES(1))
+#define TO_GIGABYTES(x)	((x) / GIGABYTES(1))
+#define TO_MEGABYTES(x)	((x) / MEGABYTES(1))
+#define TO_KILOBYTES(x)	((x) / KILOBYTES(1))
+
+void
+cvtstr(
+	double		value,
+	char		*str,
+	size_t		size)
+{
+	char		*trim;
+	const char	*suffix;
+
+	if (value >= EXABYTES(1)) {
+		suffix = " EiB";
+		snprintf(str, size - 4, "%.3f", TO_EXABYTES(value));
+	} else if (value >= PETABYTES(1)) {
+		suffix = " PiB";
+		snprintf(str, size - 4, "%.3f", TO_PETABYTES(value));
+	} else if (value >= TERABYTES(1)) {
+		suffix = " TiB";
+		snprintf(str, size - 4, "%.3f", TO_TERABYTES(value));
+	} else if (value >= GIGABYTES(1)) {
+		suffix = " GiB";
+		snprintf(str, size - 4, "%.3f", TO_GIGABYTES(value));
+	} else if (value >= MEGABYTES(1)) {
+		suffix = " MiB";
+		snprintf(str, size - 4, "%.3f", TO_MEGABYTES(value));
+	} else if (value >= KILOBYTES(1)) {
+		suffix = " KiB";
+		snprintf(str, size - 4, "%.3f", TO_KILOBYTES(value));
+	} else {
+		suffix = " bytes";
+		snprintf(str, size - 6, "%f", value);
+	}
+
+	trim = strstr(str, ".000");
+	if (trim) {
+		strcpy(trim, suffix);
+	} else {
+		strcat(str, suffix);
+	}
+}
+
+struct timeval
+tsub(struct timeval t1, struct timeval t2)
+{
+	t1.tv_usec -= t2.tv_usec;
+	if (t1.tv_usec < 0) {
+		t1.tv_usec += 1000000;
+		t1.tv_sec--;
+	}
+	t1.tv_sec -= t2.tv_sec;
+	return t1;
+}
+
+double
+tdiv(double value, struct timeval tv)
+{
+	return value / ((double)tv.tv_sec + ((double)tv.tv_usec / 1000000.0));
+}
+
+#define HOURS(sec)	((sec) / (60 * 60))
+#define MINUTES(sec)	(((sec) % (60 * 60)) / 60)
+#define SECONDS(sec)	((sec) % 60)
+
+void
+timestr(
+	struct timeval	*tv,
+	char		*ts,
+	size_t		size,
+	int		format)
+{
+	double		usec = (double)tv->tv_usec / 1000000.0;
+
+	if (format & TERSE_FIXED_TIME) {
+		if (!HOURS(tv->tv_sec)) {
+			snprintf(ts, size, "%u:%02u.%02u",
+				(unsigned int) MINUTES(tv->tv_sec),
+				(unsigned int) SECONDS(tv->tv_sec),
+				(unsigned int) (usec * 100));
+			return;
+		}
+		format |= VERBOSE_FIXED_TIME;	/* fallback if hours needed */
+	}
+
+	if ((format & VERBOSE_FIXED_TIME) || tv->tv_sec) {
+		snprintf(ts, size, "%u:%02u:%02u.%02u",
+			(unsigned int) HOURS(tv->tv_sec),
+			(unsigned int) MINUTES(tv->tv_sec),
+			(unsigned int) SECONDS(tv->tv_sec),
+			(unsigned int) (usec * 100));
+	} else {
+		snprintf(ts, size, "0.%04u sec", (unsigned int) (usec * 10000));
+	}
+}
+
+
+/* from libxcmd/quit.c */
+
+static cmdinfo_t quit_cmd;
+
+/* ARGSUSED */
+static int
+quit_f(
+	int	argc,
+	char	**argv)
+{
+	return 1;
+}
+
+void
+quit_init(void)
+{
+	quit_cmd.name = _("quit");
+	quit_cmd.altname = _("q");
+	quit_cmd.cfunc = quit_f;
+	quit_cmd.argmin = -1;
+	quit_cmd.argmax = -1;
+	quit_cmd.flags = CMD_FLAG_GLOBAL;
+	quit_cmd.oneline = _("exit the program");
+
+	add_command(&quit_cmd);
+}
+
+/* from libxcmd/help.c */
+
+static cmdinfo_t help_cmd;
+static void help_onecmd(const char *cmd, const cmdinfo_t *ct);
+static void help_oneline(const char *cmd, const cmdinfo_t *ct);
+
+static void
+help_all(void)
+{
+	const cmdinfo_t	*ct;
+
+	for (ct = cmdtab; ct < &cmdtab[ncmds]; ct++)
+		help_oneline(ct->name, ct);
+	printf(_("\nUse 'help commandname' for extended help.\n"));
+}
+
+static int
+help_f(
+	int		argc,
+	char		**argv)
+{
+	const cmdinfo_t	*ct;
+
+	if (argc == 1) {
+		help_all();
+		return 0;
+	}
+	ct = find_command(argv[1]);
+	if (ct == NULL) {
+		printf(_("command %s not found\n"), argv[1]);
+		return 0;
+	}
+	help_onecmd(argv[1], ct);
+	return 0;
+}
+
+static void
+help_onecmd(
+	const char	*cmd,
+	const cmdinfo_t	*ct)
+{
+	help_oneline(cmd, ct);
+	if (ct->help)
+		ct->help();
+}
+
+static void
+help_oneline(
+	const char	*cmd,
+	const cmdinfo_t	*ct)
+{
+	if (cmd)
+		printf("%s ", cmd);
+	else {
+		printf("%s ", ct->name);
+		if (ct->altname)
+			printf("(or %s) ", ct->altname);
+	}
+	if (ct->args)
+		printf("%s ", ct->args);
+	printf("-- %s\n", ct->oneline);
+}
+
+void
+help_init(void)
+{
+	help_cmd.name = _("help");
+	help_cmd.altname = _("?");
+	help_cmd.cfunc = help_f;
+	help_cmd.argmin = 0;
+	help_cmd.argmax = 1;
+	help_cmd.flags = CMD_FLAG_GLOBAL;
+	help_cmd.args = _("[command]");
+	help_cmd.oneline = _("help for one or all commands");
+
+	add_command(&help_cmd);
+}
--- a/cmd.h
+++ b/cmd.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __COMMAND_H__
+#define __COMMAND_H__
+
+#define CMD_FLAG_GLOBAL	((int)0x80000000)	/* don't iterate "args" */
+
+typedef int (*cfunc_t)(int argc, char **argv);
+typedef void (*helpfunc_t)(void);
+
+typedef struct cmdinfo {
+	const char	*name;
+	const char	*altname;
+	cfunc_t		cfunc;
+	int		argmin;
+	int		argmax;
+	int		canpush;
+	int		flags;
+	const char	*args;
+	const char	*oneline;
+	helpfunc_t      help;
+} cmdinfo_t;
+
+extern cmdinfo_t	*cmdtab;
+extern int		ncmds;
+
+void help_init(void);
+void quit_init(void);
+
+typedef int (*argsfunc_t)(int index);
+typedef int (*checkfunc_t)(const cmdinfo_t *ci);
+
+void add_command(const cmdinfo_t *ci);
+void add_user_command(char *optarg);
+void add_args_command(argsfunc_t af);
+void add_check_command(checkfunc_t cf);
+
+const cmdinfo_t *find_command(const char *cmd);
+
+void command_loop(void);
+int command_usage(const cmdinfo_t *ci);
+int command(const cmdinfo_t *ci, int argc, char **argv);
+
+/* from input.h */
+char **breakline(char *input, int *count);
+void doneline(char *input, char **vec);
+char *fetchline(void);
+
+long long cvtnum(char *s);
+void cvtstr(double value, char *str, size_t sz);
+
+struct timeval tsub(struct timeval t1, struct timeval t2);
+double tdiv(double value, struct timeval tv);
+
+enum {
+	DEFAULT_TIME		= 0x0,
+	TERSE_FIXED_TIME	= 0x1,
+	VERBOSE_FIXED_TIME	= 0x2
+};
+
+void timestr(struct timeval *tv, char *str, size_t sz, int flags);
+
+extern char *progname;
+
+#endif	/* __COMMAND_H__ */
--- a/453
+++ b/453
@@ -123,8 +123,7 @@ interp_prefix="/usr/gnemul/qemu-%M"
 static="no"
 cross_prefix=""
 audio_drv_list=""
-block_drv_rw_whitelist=""
-block_drv_ro_whitelist=""
+block_drv_whitelist=""
 host_cc="cc"
 libs_softmmu=""
 libs_tools=""
@@ -155,6 +154,7 @@ curl=""
 curses=""
 docs=""
 fdt=""
+nptl=""
 pixman=""
 sdl=""
 virtfs=""
@@ -179,7 +179,6 @@ xfs=""
 vhost_net="no"
 vhost_scsi="no"
 kvm="no"
-rdma=""
 gprof="no"
 debug_tcg="no"
 debug="no"
@@ -231,13 +230,12 @@ libusb=""
 usb_redir=""
 glx=""
 zlib="yes"
-guest_agent=""
+guest_agent="yes"
 want_tools="yes"
 libiscsi=""
 coroutine=""
 seccomp=""
 glusterfs=""
-glusterfs_discard="no"
 virtio_blk_data_plane=""
 gtk=""
 gtkabi="2.0"
@@ -387,8 +385,6 @@ elif check_define __s390__ ; then
  fi
 elif check_define __arm__ ; then
  cpu="arm"
-elif check_define __aarch64__ ; then
-  cpu="aarch64"
 elif check_define __hppa__ ; then
  cpu="hppa"
 else
@@ -411,9 +407,6 @@ case "$cpu" in
  armv*b|armv*l|arm)
    cpu="arm"
  ;;
-  aarch64)
-    cpu="aarch64"
-  ;;
  hppa|parisc|parisc64)
    cpu="hppa"
  ;;
@@ -553,15 +546,13 @@ Haiku)
  if [ "$cpu" = "i386" -o "$cpu" = "x86_64" ] ; then
    audio_possible_drivers="$audio_possible_drivers fmod"
  fi
-  QEMU_INCLUDES="-I\$(SRC_PATH)/linux-headers -I$(pwd)/linux-headers $QEMU_INCLUDES"
+  QEMU_INCLUDES="-I\$(SRC_PATH)/linux-headers $QEMU_INCLUDES"
 ;;
 esac

 if [ "$bsd" = "yes" ] ; then
  if [ "$darwin" != "yes" ] ; then
-    if [ "$targetos" != "FreeBSD" ]; then
-      usb="bsd"
-    fi
+    usb="bsd"
    bsd_user="yes"
  fi
 fi
@@ -596,7 +587,7 @@ EOF
  qemu_docdir="\${prefix}"
  bindir="\${prefix}"
  sysconfdir="\${prefix}"
-  local_statedir=
+  local_statedir="\${prefix}"
  confsuffix=""
  libs_qga="-lws2_32 -lwinmm -lpowrprof $libs_qga"
 fi
@@ -717,9 +708,7 @@ for opt do
  ;;
  --audio-drv-list=*) audio_drv_list="$optarg"
  ;;
-  --block-drv-rw-whitelist=*|--block-drv-whitelist=*) block_drv_rw_whitelist=`echo "$optarg" | sed -e 's/,/ /g'`
-  ;;
-  --block-drv-ro-whitelist=*) block_drv_ro_whitelist=`echo "$optarg" | sed -e 's/,/ /g'`
+  --block-drv-whitelist=*) block_drv_whitelist=`echo "$optarg" | sed -e 's/,/ /g'`
  ;;
  --enable-debug-tcg) debug_tcg="yes"
  ;;
@@ -855,6 +844,10 @@ for opt do
  ;;
  --enable-fdt) fdt="yes"
  ;;
+  --disable-nptl) nptl="no"
+  ;;
+  --enable-nptl) nptl="yes"
+  ;;
  --enable-mixemu) mixemu="yes"
  ;;
  --disable-linux-aio) linux_aio="no"
@@ -933,10 +926,6 @@ for opt do
  ;;
  --enable-gtk) gtk="yes"
  ;;
-  --enable-rdma) rdma="yes"
-  ;;
-  --disable-rdma) rdma="no"
-  ;;
  --with-gtkabi=*) gtkabi="$optarg"
  ;;
  --enable-tpm) tpm="yes"
@@ -985,22 +974,78 @@ EXTRA_CFLAGS="$CPU_CFLAGS $EXTRA_CFLAGS"

 default_target_list=""

-mak_wilds=""
-
-if [ "$softmmu" = "yes" ]; then
-    mak_wilds="${mak_wilds} $source_path/default-configs/*-softmmu.mak"
+# these targets are portable
+if [ "$softmmu" = "yes" ] ; then
+    default_target_list="\
+i386-softmmu \
+x86_64-softmmu \
+alpha-softmmu \
+arm-softmmu \
+cris-softmmu \
+lm32-softmmu \
+m68k-softmmu \
+microblaze-softmmu \
+microblazeel-softmmu \
+mips-softmmu \
+mipsel-softmmu \
+mips64-softmmu \
+mips64el-softmmu \
+moxie-softmmu \
+or32-softmmu \
+ppc-softmmu \
+ppcemb-softmmu \
+ppc64-softmmu \
+sh4-softmmu \
+sh4eb-softmmu \
+sparc-softmmu \
+sparc64-softmmu \
+s390x-softmmu \
+xtensa-softmmu \
+xtensaeb-softmmu \
+unicore32-softmmu \
+"
 fi
-if [ "$linux_user" = "yes" ]; then
-    mak_wilds="${mak_wilds} $source_path/default-configs/*-linux-user.mak"
+# the following are Linux specific
+if [ "$linux_user" = "yes" ] ; then
+    default_target_list="${default_target_list}\
+i386-linux-user \
+x86_64-linux-user \
+alpha-linux-user \
+arm-linux-user \
+armeb-linux-user \
+cris-linux-user \
+m68k-linux-user \
+microblaze-linux-user \
+microblazeel-linux-user \
+mips-linux-user \
+mipsel-linux-user \
+mips64-linux-user \
+mips64el-linux-user \
+mipsn32-linux-user \
+mipsn32el-linux-user \
+or32-linux-user \
+ppc-linux-user \
+ppc64-linux-user \
+ppc64abi32-linux-user \
+sh4-linux-user \
+sh4eb-linux-user \
+sparc-linux-user \
+sparc64-linux-user \
+sparc32plus-linux-user \
+unicore32-linux-user \
+s390x-linux-user \
+"
 fi
-if [ "$bsd_user" = "yes" ]; then
-    mak_wilds="${mak_wilds} $source_path/default-configs/*-bsd-user.mak"
+# the following are BSD specific
+if [ "$bsd_user" = "yes" ] ; then
+    default_target_list="${default_target_list}\
+i386-bsd-user \
+x86_64-bsd-user \
+sparc-bsd-user \
+sparc64-bsd-user \
+"
 fi

-for config in $mak_wilds; do
-    default_target_list="${default_target_list} $(basename "$config" .mak)"
-done
-
 if test x"$show_help" = x"yes" ; then
 cat << EOF

@@ -1037,7 +1082,7 @@ echo "  --docdir=PATH            install documentation in PATH$confsuffix"
 echo "  --bindir=PATH            install binaries in PATH"
 echo "  --libdir=PATH            install libraries in PATH"
 echo "  --sysconfdir=PATH        install config in PATH$confsuffix"
-echo "  --localstatedir=PATH     install local state in PATH (set at runtime on win32)"
+echo "  --localstatedir=PATH     install local state in PATH"
 echo "  --with-confsuffix=SUFFIX suffix for QEMU data inside datadir and sysconfdir [$confsuffix]"
 echo "  --enable-debug-tcg       enable TCG debugging"
 echo "  --disable-debug-tcg      disable TCG debugging (default)"
@@ -1060,12 +1105,7 @@ echo "  --disable-cocoa          disable Cocoa (Mac OS X only)"
 echo "  --enable-cocoa           enable Cocoa (default on Mac OS X)"
 echo "  --audio-drv-list=LIST    set audio drivers list:"
 echo "                           Available drivers: $audio_possible_drivers"
-echo "  --block-drv-whitelist=L  Same as --block-drv-rw-whitelist=L"
-echo "  --block-drv-rw-whitelist=L"
-echo "                           set block driver read-write whitelist"
-echo "                           (affects only QEMU, not qemu-img)"
-echo "  --block-drv-ro-whitelist=L"
-echo "                           set block driver read-only whitelist"
+echo "  --block-drv-whitelist=L  set block driver whitelist"
 echo "                           (affects only QEMU, not qemu-img)"
 echo "  --enable-mixemu          enable mixer emulation"
 echo "  --disable-xen            disable xen backend driver support"
@@ -1095,9 +1135,9 @@ echo "  --enable-bluez           enable bluez stack connectivity"
 echo "  --disable-slirp          disable SLIRP userspace network connectivity"
 echo "  --disable-kvm            disable KVM acceleration support"
 echo "  --enable-kvm             enable KVM acceleration support"
-echo "  --disable-rdma           disable RDMA-based migration support"
-echo "  --enable-rdma            enable RDMA-based migration support"
 echo "  --enable-tcg-interpreter enable TCG with bytecode interpreter (TCI)"
+echo "  --disable-nptl           disable usermode NPTL support"
+echo "  --enable-nptl            enable usermode NPTL support"
 echo "  --enable-system          enable all system emulation targets"
 echo "  --disable-system         disable all system emulation targets"
 echo "  --enable-user            enable supported user emulation targets"
@@ -1359,19 +1399,6 @@ if test -z "${target_list+xxx}" ; then
 else
    target_list=`echo "$target_list" | sed -e 's/,/ /g'`
 fi
-
-# Check that we recognised the target name; this allows a more
-# friendly error message than if we let it fall through.
-for target in $target_list; do
-    case " $default_target_list " in
-        *" $target "*)
-            ;;
-        *)
-            error_exit "Unknown target name '$target'"
-            ;;
-    esac
-done
-
 # see if system emulation was really requested
 case " $target_list " in
  *"-softmmu "*) softmmu=yes
@@ -1432,7 +1459,7 @@ fi
 ##########################################
 # NPTL probe

-if test "$linux_user" = "yes"; then
+if test "$nptl" != "no" ; then
  cat > $TMPC <<EOF
 #include <sched.h>
 #include <linux/futex.h>
@@ -1443,8 +1470,14 @@ int main(void) {
  return 0;
 }
 EOF
-  if ! compile_object ; then
-    feature_not_found "nptl"
+
+  if compile_object ; then
+    nptl=yes
+  else
+    if test "$nptl" = "yes" ; then
+      feature_not_found "nptl"
+    fi
+    nptl=no
  fi
 fi

@@ -1469,7 +1502,7 @@ libs_softmmu="$libs_softmmu -lz"
 # libseccomp check

 if test "$seccomp" != "no" ; then
-    if $pkg_config --atleast-version=2.1.0 libseccomp --modversion >/dev/null 2>&1; then
+    if $pkg_config --atleast-version=1.0.0 libseccomp --modversion >/dev/null 2>&1; then
        libs_softmmu="$libs_softmmu `$pkg_config --libs libseccomp`"
        QEMU_CFLAGS="$QEMU_CFLAGS `$pkg_config --cflags libseccomp`"
 	seccomp="yes"
@@ -1692,23 +1725,19 @@ if test "$gtk" != "no"; then
      vtepackage="vte"
      vteversion="0.24.0"
    fi
-    if ! $pkg_config --exists "$gtkpackage >= $gtkversion"; then
-        if test "$gtk" = "yes" ; then
-            feature_not_found "gtk"
-        fi
-        gtk="no"
-    elif ! $pkg_config --exists "$vtepackage >= $vteversion"; then
-        if test "$gtk" = "yes" ; then
-            error_exit "libvte not found (required for gtk support)"
-        fi
-        gtk="no"
-    else
+    if $pkg_config --exists "$gtkpackage >= $gtkversion" && \
+       $pkg_config --exists "$vtepackage >= $vteversion"; then
 	gtk_cflags=`$pkg_config --cflags $gtkpackage 2>/dev/null`
 	gtk_libs=`$pkg_config --libs $gtkpackage 2>/dev/null`
 	vte_cflags=`$pkg_config --cflags $vtepackage 2>/dev/null`
 	vte_libs=`$pkg_config --libs $vtepackage 2>/dev/null`
 	libs_softmmu="$gtk_libs $vte_libs $libs_softmmu"
 	gtk="yes"
+    else
+	if test "$gtk" = "yes" ; then
+	    feature_not_found "gtk"
+	fi
+	gtk="no"
    fi
 fi

@@ -1795,30 +1824,6 @@ EOF
  libs_softmmu="$sdl_libs $libs_softmmu"
 fi

-##########################################
-# RDMA needs OpenFabrics libraries
-if test "$rdma" != "no" ; then
-  cat > $TMPC <<EOF
-#include <rdma/rdma_cma.h>
-int main(void) { return 0; }
-EOF
-  rdma_libs="-lrdmacm -libverbs"
-  if compile_prog "" "$rdma_libs" ; then
-    rdma="yes"
-    libs_softmmu="$libs_softmmu $rdma_libs"
-  else
-    if test "$rdma" = "yes" ; then
-        error_exit \
-            " OpenFabrics librdmacm/libibverbs not present." \
-            " Your options:" \
-            "  (1) Fast: Install infiniband packages from your distro." \
-            "  (2) Cleanest: Install libraries from www.openfabrics.org" \
-            "  (3) Also: Install softiwarp if you don't have RDMA hardware"
-    fi
-    rdma="no"
-  fi
-fi
-
 ##########################################
 # VNC TLS/WS detection
 if test "$vnc" = "yes" -a \( "$vnc_tls" != "no" -o "$vnc_ws" != "no" \) ; then
@@ -2148,12 +2153,13 @@ fi

 ##########################################
 # curses probe
-if test "$curses" != "no" ; then
-  if test "$mingw32" = "yes" ; then
+if test "$mingw32" = "yes" ; then
    curses_list="-lpdcurses"
-  else
-    curses_list="$($pkg_config --libs ncurses 2>/dev/null):-lncurses:-lcurses"
-  fi
+else
+    curses_list="-lncurses:-lcurses:$($pkg_config --libs ncurses 2>/dev/null)"
+fi
+
+if test "$curses" != "no" ; then
  curses_found=no
  cat > $TMPC << EOF
 #include <curses.h>
@@ -2185,12 +2191,14 @@ fi

 ##########################################
 # curl probe
+
+if $pkg_config libcurl --modversion >/dev/null 2>&1; then
+  curlconfig="$pkg_config libcurl"
+else
+  curlconfig=curl-config
+fi
+
 if test "$curl" != "no" ; then
-  if $pkg_config libcurl --modversion >/dev/null 2>&1; then
-    curlconfig="$pkg_config libcurl"
-  else
-    curlconfig=curl-config
-  fi
  cat > $TMPC << EOF
 #include <curl/curl.h>
 int main(void) { curl_easy_init(); curl_multi_setopt(0, 0, 0); return 0; }
@@ -2509,31 +2517,9 @@ fi

 ##########################################
 # fdt probe
-# fdt support is mandatory for at least some target architectures,
-# so insist on it if we're building those system emulators.
-fdt_required=no
-for target in $target_list; do
-  case $target in
-    arm*-softmmu|ppc*-softmmu|microblaze*-softmmu)
-      fdt_required=yes
-    ;;
-  esac
-done
-
-if test "$fdt_required" = "yes"; then
-  if test "$fdt" = "no"; then
-    error_exit "fdt disabled but some requested targets require it." \
-      "You can turn off fdt only if you also disable all the system emulation" \
-      "targets which need it (by specifying a cut down --target-list)."
-  fi
-  fdt=yes
-fi
-
 if test "$fdt" != "no" ; then
  fdt_libs="-lfdt"
-  # explicitly check for libfdt_env.h as it is missing in some stable installs
  cat > $TMPC << EOF
-#include <libfdt_env.h>
 int main(void) { return 0; }
 EOF
  if compile_prog "" "$fdt_libs" ; then
@@ -2552,7 +2538,7 @@ EOF
    fdt_libs="-L\$(BUILD_DIR)/dtc/libfdt $fdt_libs"
  elif test "$fdt" = "yes" ; then
    # have neither and want - prompt for system/submodule install
-    error_exit "DTC not present. Your options:" \
+    error_exit "ERROR: DTC not present. Your options:" \
        "  (1) Preferred: Install the DTC devel package" \
        "  (2) Fetch the DTC submodule, using:" \
        "      git submodule update --init dtc"
@@ -2589,24 +2575,49 @@ fi
 ##########################################
 # glusterfs probe
 if test "$glusterfs" != "no" ; then
-  if $pkg_config --atleast-version=3 glusterfs-api >/dev/null 2>&1; then
-    glusterfs="yes"
-    glusterfs_cflags=`$pkg_config --cflags glusterfs-api 2>/dev/null`
-    glusterfs_libs=`$pkg_config --libs glusterfs-api 2>/dev/null`
-    CFLAGS="$CFLAGS $glusterfs_cflags"
+  cat > $TMPC <<EOF
+#include <glusterfs/api/glfs.h>
+int main(void) {
+    (void) glfs_new("volume");
+    return 0;
+}
+EOF
+  glusterfs_libs="-lgfapi -lgfrpc -lgfxdr"
+  if compile_prog "" "$glusterfs_libs" ; then
+    glusterfs=yes
    libs_tools="$glusterfs_libs $libs_tools"
    libs_softmmu="$glusterfs_libs $libs_softmmu"
-    if $pkg_config --atleast-version=5 glusterfs-api >/dev/null 2>&1; then
-      glusterfs_discard="yes"
-    fi
  else
    if test "$glusterfs" = "yes" ; then
      feature_not_found "GlusterFS backend support"
    fi
-    glusterfs="no"
+    glusterfs=no
  fi
 fi

+#
+# Check for xxxat() functions when we are building linux-user
+# emulator.  This is done because older glibc versions don't
+# have syscall stubs for these implemented.
+#
+atfile=no
+cat > $TMPC << EOF
+#define _ATFILE_SOURCE
+#include <sys/types.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+int
+main(void)
+{
+	/* try to unlink nonexisting file */
+	return (unlinkat(AT_FDCWD, "nonexistent_file", 0));
+}
+EOF
+if compile_prog "" "" ; then
+  atfile=yes
+fi
+
 # Check for inotify functions when we are building linux-user
 # emulator.  This is done because older glibc versions don't
 # have syscall stubs for these implemented.  In that case we
@@ -3350,7 +3361,6 @@ __uint128_t b;
 int main (void) {
  a = a + b;
  b = a * b;
-  a = a * a;
  return 0;
 }
 EOF
@@ -3444,15 +3454,10 @@ if test "$softmmu" = yes ; then
      virtfs=no
    fi
  fi
-fi
-if [ "$guest_agent" != "no" ]; then
  if [ "$linux" = "yes" -o "$bsd" = "yes" -o "$solaris" = "yes" ] ; then
+    if [ "$guest_agent" = "yes" ]; then
      tools="qemu-ga\$(EXESUF) $tools"
-      guest_agent=yes
-  elif [ "$guest_agent" != yes ]; then
-      guest_agent=no
-  else
-      error_exit "Guest agent is not supported on this platform"
+    fi
  fi
 fi

@@ -3471,36 +3476,6 @@ if test "$cpu" = "s390x" ; then
  roms="$roms s390-ccw"
 fi

-# Probe for the need for relocating the user-only binary.
-if test "$pie" = "no" ; then
-  textseg_addr=
-  case "$cpu" in
-    arm | hppa | i386 | m68k | ppc | ppc64 | s390* | sparc | sparc64 | x86_64)
-      textseg_addr=0x60000000
-      ;;
-    mips)
-      textseg_addr=0x400000
-      ;;
-  esac
-  if [ -n "$textseg_addr" ]; then
-    cat > $TMPC <<EOF
-    int main(void) { return 0; }
-EOF
-    textseg_ldflags="-Wl,-Ttext-segment=$textseg_addr"
-    if ! compile_prog "" "$textseg_ldflags"; then
-      # In case ld does not support -Ttext-segment, edit the default linker
-      # script via sed to set the .text start addr.  This is needed on FreeBSD
-      # at least.
-      $ld --verbose | sed \
-        -e '1,/==================================================/d' \
-        -e '/==================================================/,$d' \
-        -e "s/[.] = [0-9a-fx]* [+] SIZEOF_HEADERS/. = $textseg_addr + SIZEOF_HEADERS/" \
-        -e "s/__executable_start = [0-9a-fx]*/__executable_start = $textseg_addr/" > config-host.ld
-      textseg_ldflags="-Wl,-T../config-host.ld"
-    fi
-  fi
-fi
-
 # add pixman flags after all config tests are done
 QEMU_CFLAGS="$QEMU_CFLAGS $pixman_cflags $fdt_cflags"
 libs_softmmu="$libs_softmmu $pixman_libs"
@@ -3512,12 +3487,10 @@ echo "library directory `eval echo $libdir`"
 echo "libexec directory `eval echo $libexecdir`"
 echo "include directory `eval echo $includedir`"
 echo "config directory  `eval echo $sysconfdir`"
-if test "$mingw32" = "no" ; then
 echo "local state directory   `eval echo $local_statedir`"
+if test "$mingw32" = "no" ; then
 echo "Manual directory  `eval echo $mandir`"
 echo "ELF interp prefix $interp_prefix"
-else
-echo "local state directory   queried at runtime"
 fi
 echo "Source path       $source_path"
 echo "C compiler        $cc"
@@ -3552,8 +3525,7 @@ echo "curses support    $curses"
 echo "curl support      $curl"
 echo "mingw32 support   $mingw32"
 echo "Audio drivers     $audio_drv_list"
-echo "Block whitelist (rw) $block_drv_rw_whitelist"
-echo "Block whitelist (ro) $block_drv_ro_whitelist"
+echo "Block whitelist   $block_drv_whitelist"
 echo "Mixer emulation   $mixemu"
 echo "VirtFS support    $virtfs"
 echo "VNC support       $vnc"
@@ -3573,6 +3545,7 @@ echo "bluez  support    $bluez"
 echo "Documentation     $docs"
 [ ! -z "$uname_release" ] && \
 echo "uname -r          $uname_release"
+echo "NPTL support      $nptl"
 echo "GUEST_BASE        $guest_base"
 echo "PIE               $pie"
 echo "vde support       $vde"
@@ -3580,7 +3553,6 @@ echo "Linux AIO support $linux_aio"
 echo "ATTR/XATTR support $attr"
 echo "Install blobs     $blobs"
 echo "KVM support       $kvm"
-echo "RDMA support      $rdma"
 echo "TCG interpreter   $tcg_interpreter"
 echo "fdt support       $fdt"
 echo "preadv support    $preadv"
@@ -3619,6 +3591,7 @@ echo "-> Your SDL version is too old - please upgrade to have SDL support"
 fi

 config_host_mak="config-host.mak"
+config_host_ld="config-host.ld"

 echo "# Automatically generated by configure - do not modify" >config-all-disas.mak

@@ -3638,9 +3611,7 @@ echo "sysconfdir=$sysconfdir" >> $config_host_mak
 echo "qemu_confdir=$qemu_confdir" >> $config_host_mak
 echo "qemu_datadir=$qemu_datadir" >> $config_host_mak
 echo "qemu_docdir=$qemu_docdir" >> $config_host_mak
-if test "$mingw32" = "no" ; then
-  echo "qemu_localstatedir=$local_statedir" >> $config_host_mak
-fi
+echo "qemu_localstatedir=$local_statedir" >> $config_host_mak
 echo "qemu_helperdir=$libexecdir" >> $config_host_mak
 echo "extra_cflags=$EXTRA_CFLAGS" >> $config_host_mak
 echo "extra_ldflags=$EXTRA_LDFLAGS" >> $config_host_mak
@@ -3650,7 +3621,7 @@ echo "libs_softmmu=$libs_softmmu" >> $config_host_mak
 echo "ARCH=$ARCH" >> $config_host_mak

 case "$cpu" in
-  arm|i386|x86_64|ppc|aarch64)
+  arm|i386|x86_64|ppc)
    # The TCG interpreter currently does not support ld/st optimization.
    if test "$tcg_interpreter" = "no" ; then
        echo "CONFIG_QEMU_LDST_OPTIMIZATION=y" >> $config_host_mak
@@ -3733,8 +3704,7 @@ fi
 if test "$audio_win_int" = "yes" ; then
  echo "CONFIG_AUDIO_WIN_INT=y" >> $config_host_mak
 fi
-echo "CONFIG_BDRV_RW_WHITELIST=$block_drv_rw_whitelist" >> $config_host_mak
-echo "CONFIG_BDRV_RO_WHITELIST=$block_drv_ro_whitelist" >> $config_host_mak
+echo "CONFIG_BDRV_WHITELIST=$block_drv_whitelist" >> $config_host_mak
 if test "$mixemu" = "yes" ; then
  echo "CONFIG_MIXEMU=y" >> $config_host_mak
 fi
@@ -3784,6 +3754,9 @@ fi
 if test "$curses" = "yes" ; then
  echo "CONFIG_CURSES=y" >> $config_host_mak
 fi
+if test "$atfile" = "yes" ; then
+  echo "CONFIG_ATFILE=y" >> $config_host_mak
+fi
 if test "$utimens" = "yes" ; then
  echo "CONFIG_UTIMENSAT=y" >> $config_host_mak
 fi
@@ -3991,10 +3964,6 @@ if test "$glusterfs" = "yes" ; then
  echo "CONFIG_GLUSTERFS=y" >> $config_host_mak
 fi

-if test "$glusterfs_discard" = "yes" ; then
-  echo "CONFIG_GLUSTERFS_DISCARD=y" >> $config_host_mak
-fi
-
 if test "$libssh2" = "yes" ; then
  echo "CONFIG_LIBSSH2=y" >> $config_host_mak
 fi
@@ -4069,10 +4038,6 @@ if test "$trace_default" = "yes"; then
  echo "CONFIG_TRACE_DEFAULT=y" >> $config_host_mak
 fi

-if test "$rdma" = "yes" ; then
-  echo "CONFIG_RDMA=y" >> $config_host_mak
-fi
-
 if test "$tcg_interpreter" = "yes"; then
  QEMU_INCLUDES="-I\$(SRC_PATH)/tcg/tci $QEMU_INCLUDES"
 elif test "$ARCH" = "sparc64" ; then
@@ -4137,6 +4102,18 @@ if test "$gcov" = "yes" ; then
  echo "GCOV=$gcov_tool" >> $config_host_mak
 fi

+# generate list of library paths for linker script
+
+$ld --verbose -v 2> /dev/null | grep SEARCH_DIR > ${config_host_ld}
+
+if test -f ${config_host_ld}~ ; then
+  if cmp -s $config_host_ld ${config_host_ld}~ ; then
+    mv ${config_host_ld}~ $config_host_ld
+  else
+    rm ${config_host_ld}~
+  fi
+fi
+
 # use included Linux headers
 if test "$linux" = "yes" ; then
  mkdir -p linux-headers
@@ -4150,9 +4127,6 @@ if test "$linux" = "yes" ; then
  s390x)
    linux_arch=s390
    ;;
-  aarch64)
-    linux_arch=arm64
-    ;;
  *)
    # For most CPUs the kernel architecture name and QEMU CPU name match.
    linux_arch="$cpu"
@@ -4167,10 +4141,10 @@ fi
 for target in $target_list; do
 target_dir="$target"
 config_target_mak=$target_dir/config-target.mak
-target_name=`echo $target | cut -d '-' -f 1`
+target_arch2=`echo $target | cut -d '-' -f 1`
 target_bigendian="no"

-case "$target_name" in
+case "$target_arch2" in
  armeb|lm32|m68k|microblaze|mips|mipsn32|mips64|moxie|or32|ppc|ppcemb|ppc64|ppc64abi32|s390x|sh4eb|sparc|sparc64|sparc32plus|xtensaeb)
  target_bigendian=yes
  ;;
@@ -4180,17 +4154,17 @@ target_user_only="no"
 target_linux_user="no"
 target_bsd_user="no"
 case "$target" in
-  ${target_name}-softmmu)
+  ${target_arch2}-softmmu)
    target_softmmu="yes"
    ;;
-  ${target_name}-linux-user)
+  ${target_arch2}-linux-user)
    if test "$linux" != "yes" ; then
      error_exit "Target '$target' is only available on a Linux host"
    fi
    target_user_only="yes"
    target_linux_user="yes"
    ;;
-  ${target_name}-bsd-user)
+  ${target_arch2}-bsd-user)
    if test "$bsd" != "yes" ; then
      error_exit "Target '$target' is only available on a BSD host"
    fi
@@ -4207,27 +4181,31 @@ mkdir -p $target_dir
 echo "# Automatically generated by configure - do not modify" > $config_target_mak

 bflt="no"
-interp_prefix1=`echo "$interp_prefix" | sed "s/%M/$target_name/g"`
+target_nptl="no"
+interp_prefix1=`echo "$interp_prefix" | sed "s/%M/$target_arch2/g"`
 gdb_xml_files=""

-TARGET_ARCH="$target_name"
+TARGET_ARCH="$target_arch2"
 TARGET_BASE_ARCH=""
 TARGET_ABI_DIR=""

-case "$target_name" in
+case "$target_arch2" in
  i386)
  ;;
  x86_64)
    TARGET_BASE_ARCH=i386
  ;;
  alpha)
+    target_nptl="yes"
  ;;
  arm|armeb)
    TARGET_ARCH=arm
    bflt="yes"
+    target_nptl="yes"
    gdb_xml_files="arm-core.xml arm-vfp.xml arm-vfp3.xml arm-neon.xml"
  ;;
  cris)
+    target_nptl="yes"
  ;;
  lm32)
  ;;
@@ -4238,10 +4216,12 @@ case "$target_name" in
  microblaze|microblazeel)
    TARGET_ARCH=microblaze
    bflt="yes"
+    target_nptl="yes"
  ;;
  mips|mipsel)
    TARGET_ARCH=mips
    echo "TARGET_ABI_MIPSO32=y" >> $config_target_mak
+    target_nptl="yes"
  ;;
  mipsn32|mipsn32el)
    TARGET_ARCH=mips64
@@ -4262,11 +4242,13 @@ case "$target_name" in
  ;;
  ppc)
    gdb_xml_files="power-core.xml power-fpu.xml power-altivec.xml power-spe.xml"
+    target_nptl="yes"
  ;;
  ppcemb)
    TARGET_BASE_ARCH=ppc
    TARGET_ABI_DIR=ppc
    gdb_xml_files="power-core.xml power-fpu.xml power-altivec.xml power-spe.xml"
+    target_nptl="yes"
  ;;
  ppc64)
    TARGET_BASE_ARCH=ppc
@@ -4283,6 +4265,7 @@ case "$target_name" in
  sh4|sh4eb)
    TARGET_ARCH=sh4
    bflt="yes"
+    target_nptl="yes"
  ;;
  sparc)
  ;;
@@ -4296,6 +4279,7 @@ case "$target_name" in
    echo "TARGET_ABI32=y" >> $config_target_mak
  ;;
  s390x)
+    target_nptl="yes"
  ;;
  unicore32)
  ;;
@@ -4317,15 +4301,17 @@ upper() {
    echo "$@"| LC_ALL=C tr '[a-z]' '[A-Z]'
 }

+echo "TARGET_ARCH=$TARGET_ARCH" >> $config_target_mak
 target_arch_name="`upper $TARGET_ARCH`"
 echo "TARGET_$target_arch_name=y" >> $config_target_mak
-echo "TARGET_NAME=$target_name" >> $config_target_mak
+echo "TARGET_ARCH2=$target_arch2" >> $config_target_mak
+echo "TARGET_TYPE=TARGET_TYPE_`upper $target_arch2`" >> $config_target_mak
 echo "TARGET_BASE_ARCH=$TARGET_BASE_ARCH" >> $config_target_mak
 if [ "$TARGET_ABI_DIR" = "" ]; then
  TARGET_ABI_DIR=$TARGET_ARCH
 fi
 echo "TARGET_ABI_DIR=$TARGET_ABI_DIR" >> $config_target_mak
-case "$target_name" in
+case "$target_arch2" in
  i386|x86_64)
    if test "$xen" = "yes" -a "$target_softmmu" = "yes" ; then
      echo "CONFIG_XEN=y" >> $config_target_mak
@@ -4336,28 +4322,36 @@ case "$target_name" in
    ;;
  *)
 esac
-case "$target_name" in
+case "$target_arch2" in
  arm|i386|x86_64|ppcemb|ppc|ppc64|s390x)
    # Make sure the target and host cpus are compatible
    if test "$kvm" = "yes" -a "$target_softmmu" = "yes" -a \
-      \( "$target_name" = "$cpu" -o \
-      \( "$target_name" = "ppcemb" -a "$cpu" = "ppc" \) -o \
-      \( "$target_name" = "ppc64"  -a "$cpu" = "ppc" \) -o \
-      \( "$target_name" = "ppc"    -a "$cpu" = "ppc64" \) -o \
-      \( "$target_name" = "ppcemb" -a "$cpu" = "ppc64" \) -o \
-      \( "$target_name" = "x86_64" -a "$cpu" = "i386"   \) -o \
-      \( "$target_name" = "i386"   -a "$cpu" = "x86_64" \) \) ; then
+      \( "$target_arch2" = "$cpu" -o \
+      \( "$target_arch2" = "ppcemb" -a "$cpu" = "ppc" \) -o \
+      \( "$target_arch2" = "ppc64"  -a "$cpu" = "ppc" \) -o \
+      \( "$target_arch2" = "ppc"    -a "$cpu" = "ppc64" \) -o \
+      \( "$target_arch2" = "ppcemb" -a "$cpu" = "ppc64" \) -o \
+      \( "$target_arch2" = "x86_64" -a "$cpu" = "i386"   \) -o \
+      \( "$target_arch2" = "i386"   -a "$cpu" = "x86_64" \) \) ; then
      echo "CONFIG_KVM=y" >> $config_target_mak
      if test "$vhost_net" = "yes" ; then
        echo "CONFIG_VHOST_NET=y" >> $config_target_mak
      fi
    fi
 esac
+case "$target_arch2" in
+  i386|x86_64)
+    echo "CONFIG_HAVE_GET_MEMORY_MAPPING=y" >> $config_target_mak
+esac
 if test "$target_bigendian" = "yes" ; then
  echo "TARGET_WORDS_BIGENDIAN=y" >> $config_target_mak
 fi
 if test "$target_softmmu" = "yes" ; then
  echo "CONFIG_SOFTMMU=y" >> $config_target_mak
+  case "$target_arch2" in
+    i386|x86_64)
+      echo "CONFIG_HAVE_CORE_DUMP=y" >> $config_target_mak
+  esac
 fi
 if test "$target_user_only" = "yes" ; then
  echo "CONFIG_USER_ONLY=y" >> $config_target_mak
@@ -4377,6 +4371,10 @@ fi
 if test "$target_user_only" = "yes" -a "$bflt" = "yes"; then
  echo "TARGET_HAS_BFLT=y" >> $config_target_mak
 fi
+if test "$target_user_only" = "yes" \
+        -a "$nptl" = "yes" -a "$target_nptl" = "yes"; then
+  echo "CONFIG_USE_NPTL=y" >> $config_target_mak
+fi
 if test "$target_user_only" = "yes" -a "$guest_base" = "yes"; then
  echo "CONFIG_USE_GUEST_BASE=y" >> $config_target_mak
 fi
@@ -4485,8 +4483,21 @@ if test "$gprof" = "yes" ; then
  fi
 fi

+if test "$ARCH" = "tci"; then
+  linker_script=""
+else
+  linker_script="-Wl,-T../config-host.ld -Wl,-T,\$(SRC_PATH)/ldscripts/\$(ARCH).ld"
+fi
+
 if test "$target_linux_user" = "yes" -o "$target_bsd_user" = "yes" ; then
-  ldflags="$ldflags $textseg_ldflags"
+  case "$ARCH" in
+  alpha | s390x)
+    # The default placement of the application is fine.
+    ;;
+  *)
+    ldflags="$linker_script $ldflags"
+    ;;
+  esac
 fi

 echo "LDFLAGS+=$ldflags" >> $config_target_mak
@@ -4498,22 +4509,18 @@ if [ "$pixman" = "internal" ]; then
  echo "config-host.h: subdir-pixman" >> $config_host_mak
 fi

-if test "$rdma" = "yes" ; then
-echo "CONFIG_RDMA=y" >> $config_host_mak
-fi
-
 if [ "$dtc_internal" = "yes" ]; then
  echo "config-host.h: subdir-dtc" >> $config_host_mak
 fi

 # build tree in object directory in case the source is not in the current directory
-DIRS="tests tests/tcg tests/tcg/cris tests/tcg/lm32 tests/libqos tests/qapi-schema tests/tcg/xtensa"
+DIRS="tests tests/tcg tests/tcg/cris tests/tcg/lm32 tests/libqos"
 DIRS="$DIRS pc-bios/optionrom pc-bios/spapr-rtas pc-bios/s390-ccw"
 DIRS="$DIRS roms/seabios roms/vgabios"
 DIRS="$DIRS qapi-generated"
 FILES="Makefile tests/tcg/Makefile qdict-test-data.txt"
 FILES="$FILES tests/tcg/cris/Makefile tests/tcg/cris/.gdbinit"
-FILES="$FILES tests/tcg/lm32/Makefile tests/tcg/xtensa/Makefile po/Makefile"
+FILES="$FILES tests/tcg/lm32/Makefile po/Makefile"
 FILES="$FILES pc-bios/optionrom/Makefile pc-bios/keymaps"
 FILES="$FILES pc-bios/spapr-rtas/Makefile"
 FILES="$FILES pc-bios/s390-ccw/Makefile"
--- a/cpu-exec.c
+++ b/cpu-exec.c
@@ -59,14 +59,8 @@ static inline tcg_target_ulong cpu_tb_exec(CPUState *cpu, uint8_t *tb_ptr)
         * counter hit zero); we must restore the guest PC to the address
         * of the start of the TB.
         */
-        CPUClass *cc = CPU_GET_CLASS(cpu);
        TranslationBlock *tb = (TranslationBlock *)(next_tb & ~TB_EXIT_MASK);
-        if (cc->synchronize_from_tb) {
-            cc->synchronize_from_tb(cpu, tb);
-        } else {
-            assert(cc->set_pc);
-            cc->set_pc(cpu, tb->pc);
-        }
+        cpu_pc_from_tb(env, tb);
    }
    if ((next_tb & TB_EXIT_MASK) == TB_EXIT_REQUESTED) {
        /* We were asked to stop executing TBs (probably a pending
@@ -219,12 +213,12 @@ int cpu_exec(CPUArchState *env)
        cpu->halted = 0;
    }

-    current_cpu = cpu;
+    cpu_single_env = env;

-    /* As long as current_cpu is null, up to the assignment just above,
+    /* As long as cpu_single_env is null, up to the assignment just above,
     * requests by other threads to exit the execution loop are expected to
     * be issued using the exit_request global. We must make sure that our
-     * evaluation of the global value is performed past the current_cpu
+     * evaluation of the global value is performed past the cpu_single_env
     * value transition point, which requires a memory barrier as well as
     * an instruction scheduling constraint on modern architectures.  */
    smp_mb();
@@ -236,7 +230,7 @@ int cpu_exec(CPUArchState *env)
 #if defined(TARGET_I386)
    /* put eflags in CPU temporary format */
    CC_SRC = env->eflags & (CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C);
-    env->df = 1 - (2 * ((env->eflags >> 10) & 1));
+    DF = 1 - (2 * ((env->eflags >> 10) & 1));
    CC_OP = CC_OP_EFLAGS;
    env->eflags &= ~(DF_MASK | CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C);
 #elif defined(TARGET_SPARC)
@@ -297,7 +291,7 @@ int cpu_exec(CPUArchState *env)
            for(;;) {
                interrupt_request = cpu->interrupt_request;
                if (unlikely(interrupt_request)) {
-                    if (unlikely(cpu->singlestep_enabled & SSTEP_NOIRQ)) {
+                    if (unlikely(env->singlestep_enabled & SSTEP_NOIRQ)) {
                        /* Mask out external interrupts for this step. */
                        interrupt_request &= ~CPU_INTERRUPT_SSTEP_MASK;
                    }
@@ -337,7 +331,7 @@ int cpu_exec(CPUArchState *env)
                            cpu_svm_check_intercept_param(env, SVM_EXIT_SMI,
                                                          0);
                            cpu->interrupt_request &= ~CPU_INTERRUPT_SMI;
-                            do_smm_enter(x86_env_get_cpu(env));
+                            do_smm_enter(env);
                            next_tb = 0;
                        } else if ((interrupt_request & CPU_INTERRUPT_NMI) &&
                                   !(env->hflags2 & HF2_NMI_MASK)) {
@@ -583,15 +577,15 @@ int cpu_exec(CPUArchState *env)
                if (qemu_loglevel_mask(CPU_LOG_TB_CPU)) {
                    /* restore flags in standard format */
 #if defined(TARGET_I386)
-                    log_cpu_state(cpu, CPU_DUMP_CCOP);
+                    log_cpu_state(env, CPU_DUMP_CCOP);
 #elif defined(TARGET_M68K)
                    cpu_m68k_flush_flags(env, env->cc_op);
                    env->cc_op = CC_OP_FLAGS;
                    env->sr = (env->sr & 0xffe0)
                              | env->cc_dest | (env->cc_x << 4);
-                    log_cpu_state(cpu, 0);
+                    log_cpu_state(env, 0);
 #else
-                    log_cpu_state(cpu, 0);
+                    log_cpu_state(env, 0);
 #endif
                }
 #endif /* DEBUG_DISAS */
@@ -679,8 +673,7 @@ int cpu_exec(CPUArchState *env)
        } else {
            /* Reload env after longjmp - the compiler may have smashed all
             * local variables as longjmp is marked 'noreturn'. */
-            cpu = current_cpu;
-            env = cpu->env_ptr;
+            env = cpu_single_env;
        }
    } /* for(;;) */

@@ -688,7 +681,7 @@ int cpu_exec(CPUArchState *env)
 #if defined(TARGET_I386)
    /* restore flags in standard format */
    env->eflags = env->eflags | cpu_cc_compute_all(env, CC_OP)
-        | (env->df & DF_MASK);
+        | (DF & DF_MASK);
 #elif defined(TARGET_ARM)
    /* XXX: Save/restore host fpu exception state?.  */
 #elif defined(TARGET_UNICORE32)
@@ -714,7 +707,7 @@ int cpu_exec(CPUArchState *env)
 #error unsupported target CPU
 #endif

-    /* fail safe : never use current_cpu outside cpu_exec() */
-    current_cpu = NULL;
+    /* fail safe : never use cpu_single_env outside cpu_exec() */
+    cpu_single_env = NULL;
    return ret;
 }
--- a/cpus.c
+++ b/cpus.c
@@ -60,23 +60,20 @@

 #endif /* CONFIG_LINUX */

-static CPUState *next_cpu;
+static CPUArchState *next_cpu;

-bool cpu_is_stopped(CPUState *cpu)
+static bool cpu_thread_is_idle(CPUArchState *env)
 {
-    return cpu->stopped || !runstate_is_running();
-}
+    CPUState *cpu = ENV_GET_CPU(env);

-static bool cpu_thread_is_idle(CPUState *cpu)
-{
    if (cpu->stop || cpu->queued_work_first) {
        return false;
    }
-    if (cpu_is_stopped(cpu)) {
+    if (cpu->stopped || !runstate_is_running()) {
        return true;
    }
    if (!cpu->halted || qemu_cpu_has_work(cpu) ||
-        kvm_halt_in_kernel()) {
+        kvm_async_interrupts_enabled()) {
        return false;
    }
    return true;
@@ -84,10 +81,10 @@ static bool cpu_thread_is_idle(CPUState *cpu)

 static bool all_cpu_threads_idle(void)
 {
-    CPUState *cpu;
+    CPUArchState *env;

-    for (cpu = first_cpu; cpu != NULL; cpu = cpu->next_cpu) {
-        if (!cpu_thread_is_idle(cpu)) {
+    for (env = first_cpu; env != NULL; env = env->next_cpu) {
+        if (!cpu_thread_is_idle(env)) {
            return false;
        }
    }
@@ -117,17 +114,16 @@ typedef struct TimersState {
    int64_t dummy;
 } TimersState;

-static TimersState timers_state;
+TimersState timers_state;

 /* Return the virtual CPU time, based on the instruction counter.  */
 int64_t cpu_get_icount(void)
 {
    int64_t icount;
-    CPUState *cpu = current_cpu;
+    CPUArchState *env = cpu_single_env;

    icount = qemu_icount;
-    if (cpu) {
-        CPUArchState *env = cpu->env_ptr;
+    if (env) {
        if (!can_do_io(env)) {
            fprintf(stderr, "Bad clock read\n");
        }
@@ -393,15 +389,17 @@ void configure_icount(const char *option)
 void hw_error(const char *fmt, ...)
 {
    va_list ap;
+    CPUArchState *env;
    CPUState *cpu;

    va_start(ap, fmt);
    fprintf(stderr, "qemu: hardware error: ");
    vfprintf(stderr, fmt, ap);
    fprintf(stderr, "\n");
-    for (cpu = first_cpu; cpu != NULL; cpu = cpu->next_cpu) {
+    for (env = first_cpu; env != NULL; env = env->next_cpu) {
+        cpu = ENV_GET_CPU(env);
        fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
-        cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
+        cpu_dump_state(env, stderr, fprintf, CPU_DUMP_FPU);
    }
    va_end(ap);
    abort();
@@ -409,7 +407,7 @@ void hw_error(const char *fmt, ...)

 void cpu_synchronize_all_states(void)
 {
-    CPUState *cpu;
+    CPUArchState *cpu;

    for (cpu = first_cpu; cpu; cpu = cpu->next_cpu) {
        cpu_synchronize_state(cpu);
@@ -418,38 +416,38 @@ void cpu_synchronize_all_states(void)

 void cpu_synchronize_all_post_reset(void)
 {
-    CPUState *cpu;
+    CPUArchState *cpu;

    for (cpu = first_cpu; cpu; cpu = cpu->next_cpu) {
-        cpu_synchronize_post_reset(cpu);
+        cpu_synchronize_post_reset(ENV_GET_CPU(cpu));
    }
 }

 void cpu_synchronize_all_post_init(void)
 {
-    CPUState *cpu;
+    CPUArchState *cpu;

    for (cpu = first_cpu; cpu; cpu = cpu->next_cpu) {
-        cpu_synchronize_post_init(cpu);
+        cpu_synchronize_post_init(ENV_GET_CPU(cpu));
    }
 }

-static int do_vm_stop(RunState state)
+bool cpu_is_stopped(CPUState *cpu)
 {
-    int ret = 0;
+    return !runstate_is_running() || cpu->stopped;
+}

+static void do_vm_stop(RunState state)
+{
    if (runstate_is_running()) {
        cpu_disable_ticks();
        pause_all_vcpus();
        runstate_set(state);
        vm_state_notify(0, state);
+        bdrv_drain_all();
+        bdrv_flush_all();
        monitor_protocol_event(QEVENT_STOP, NULL);
    }
-
-    bdrv_drain_all();
-    ret = bdrv_flush_all();
-
-    return ret;
 }

 static bool cpu_can_run(CPUState *cpu)
@@ -457,23 +455,25 @@ static bool cpu_can_run(CPUState *cpu)
    if (cpu->stop) {
        return false;
    }
-    if (cpu_is_stopped(cpu)) {
+    if (cpu->stopped || !runstate_is_running()) {
        return false;
    }
    return true;
 }

-static void cpu_handle_guest_debug(CPUState *cpu)
+static void cpu_handle_guest_debug(CPUArchState *env)
 {
-    gdb_set_stop_cpu(cpu);
+    CPUState *cpu = ENV_GET_CPU(env);
+
+    gdb_set_stop_cpu(env);
    qemu_system_debug_request();
    cpu->stopped = true;
 }

 static void cpu_signal(int sig)
 {
-    if (current_cpu) {
-        cpu_exit(current_cpu);
+    if (cpu_single_env) {
+        cpu_exit(cpu_single_env);
    }
    exit_request = 1;
 }
@@ -570,7 +570,7 @@ static void dummy_signal(int sig)
 {
 }

-static void qemu_kvm_init_cpu_signals(CPUState *cpu)
+static void qemu_kvm_init_cpu_signals(CPUArchState *env)
 {
    int r;
    sigset_t set;
@@ -583,7 +583,7 @@ static void qemu_kvm_init_cpu_signals(CPUState *cpu)
    pthread_sigmask(SIG_BLOCK, NULL, &set);
    sigdelset(&set, SIG_IPI);
    sigdelset(&set, SIGBUS);
-    r = kvm_set_signal_mask(cpu, &set);
+    r = kvm_set_signal_mask(env, &set);
    if (r) {
        fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
        exit(1);
@@ -605,7 +605,7 @@ static void qemu_tcg_init_cpu_signals(void)
 }

 #else /* _WIN32 */
-static void qemu_kvm_init_cpu_signals(CPUState *cpu)
+static void qemu_kvm_init_cpu_signals(CPUArchState *env)
 {
    abort();
 }
@@ -653,7 +653,6 @@ void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)

    wi.func = func;
    wi.data = data;
-    wi.free = false;
    if (cpu->queued_work_first == NULL) {
        cpu->queued_work_first = &wi;
    } else {
@@ -665,38 +664,13 @@ void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)

    qemu_cpu_kick(cpu);
    while (!wi.done) {
-        CPUState *self_cpu = current_cpu;
+        CPUArchState *self_env = cpu_single_env;

        qemu_cond_wait(&qemu_work_cond, &qemu_global_mutex);
-        current_cpu = self_cpu;
+        cpu_single_env = self_env;
    }
 }

-void async_run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
-{
-    struct qemu_work_item *wi;
-
-    if (qemu_cpu_is_self(cpu)) {
-        func(data);
-        return;
-    }
-
-    wi = g_malloc0(sizeof(struct qemu_work_item));
-    wi->func = func;
-    wi->data = data;
-    wi->free = true;
-    if (cpu->queued_work_first == NULL) {
-        cpu->queued_work_first = wi;
-    } else {
-        cpu->queued_work_last->next = wi;
-    }
-    cpu->queued_work_last = wi;
-    wi->next = NULL;
-    wi->done = false;
-
-    qemu_cpu_kick(cpu);
-}
-
 static void flush_queued_work(CPUState *cpu)
 {
    struct qemu_work_item *wi;
@@ -709,9 +683,6 @@ static void flush_queued_work(CPUState *cpu)
        cpu->queued_work_first = wi->next;
        wi->func(wi->data);
        wi->done = true;
-        if (wi->free) {
-            g_free(wi);
-        }
    }
    cpu->queued_work_last = NULL;
    qemu_cond_broadcast(&qemu_work_cond);
@@ -730,7 +701,7 @@ static void qemu_wait_io_event_common(CPUState *cpu)

 static void qemu_tcg_wait_io_event(void)
 {
-    CPUState *cpu;
+    CPUArchState *env;

    while (all_cpu_threads_idle()) {
       /* Start accounting real time to the virtual clock if the CPUs
@@ -743,14 +714,16 @@ static void qemu_tcg_wait_io_event(void)
        qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex);
    }

-    for (cpu = first_cpu; cpu != NULL; cpu = cpu->next_cpu) {
-        qemu_wait_io_event_common(cpu);
+    for (env = first_cpu; env != NULL; env = env->next_cpu) {
+        qemu_wait_io_event_common(ENV_GET_CPU(env));
    }
 }

-static void qemu_kvm_wait_io_event(CPUState *cpu)
+static void qemu_kvm_wait_io_event(CPUArchState *env)
 {
-    while (cpu_thread_is_idle(cpu)) {
+    CPUState *cpu = ENV_GET_CPU(env);
+
+    while (cpu_thread_is_idle(env)) {
        qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
    }

@@ -760,13 +733,14 @@ static void qemu_kvm_wait_io_event(CPUState *cpu)

 static void *qemu_kvm_cpu_thread_fn(void *arg)
 {
-    CPUState *cpu = arg;
+    CPUArchState *env = arg;
+    CPUState *cpu = ENV_GET_CPU(env);
    int r;

    qemu_mutex_lock(&qemu_global_mutex);
    qemu_thread_get_self(cpu->thread);
    cpu->thread_id = qemu_get_thread_id();
-    current_cpu = cpu;
+    cpu_single_env = env;

    r = kvm_init_vcpu(cpu);
    if (r < 0) {
@@ -774,7 +748,7 @@ static void *qemu_kvm_cpu_thread_fn(void *arg)
        exit(1);
    }

-    qemu_kvm_init_cpu_signals(cpu);
+    qemu_kvm_init_cpu_signals(env);

    /* signal CPU creation */
    cpu->created = true;
@@ -782,12 +756,12 @@ static void *qemu_kvm_cpu_thread_fn(void *arg)

    while (1) {
        if (cpu_can_run(cpu)) {
-            r = kvm_cpu_exec(cpu);
+            r = kvm_cpu_exec(env);
            if (r == EXCP_DEBUG) {
-                cpu_handle_guest_debug(cpu);
+                cpu_handle_guest_debug(env);
            }
        }
-        qemu_kvm_wait_io_event(cpu);
+        qemu_kvm_wait_io_event(env);
    }

    return NULL;
@@ -799,7 +773,8 @@ static void *qemu_dummy_cpu_thread_fn(void *arg)
    fprintf(stderr, "qtest is not supported under Windows\n");
    exit(1);
 #else
-    CPUState *cpu = arg;
+    CPUArchState *env = arg;
+    CPUState *cpu = ENV_GET_CPU(env);
    sigset_t waitset;
    int r;

@@ -814,9 +789,9 @@ static void *qemu_dummy_cpu_thread_fn(void *arg)
    cpu->created = true;
    qemu_cond_signal(&qemu_cpu_cond);

-    current_cpu = cpu;
+    cpu_single_env = env;
    while (1) {
-        current_cpu = NULL;
+        cpu_single_env = NULL;
        qemu_mutex_unlock_iothread();
        do {
            int sig;
@@ -827,7 +802,7 @@ static void *qemu_dummy_cpu_thread_fn(void *arg)
            exit(1);
        }
        qemu_mutex_lock_iothread();
-        current_cpu = cpu;
+        cpu_single_env = env;
        qemu_wait_io_event_common(cpu);
    }

@@ -846,6 +821,7 @@ static void tcg_signal_cpu_creation(CPUState *cpu, void *data)
 static void *qemu_tcg_cpu_thread_fn(void *arg)
 {
    CPUState *cpu = arg;
+    CPUArchState *env;

    qemu_tcg_init_cpu_signals();
    qemu_thread_get_self(cpu->thread);
@@ -855,12 +831,12 @@ static void *qemu_tcg_cpu_thread_fn(void *arg)
    qemu_cond_signal(&qemu_cpu_cond);

    /* wait for initial kick-off after machine start */
-    while (first_cpu->stopped) {
+    while (ENV_GET_CPU(first_cpu)->stopped) {
        qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);

        /* process any pending work */
-        for (cpu = first_cpu; cpu != NULL; cpu = cpu->next_cpu) {
-            qemu_wait_io_event_common(cpu);
+        for (env = first_cpu; env != NULL; env = env->next_cpu) {
+            qemu_wait_io_event_common(ENV_GET_CPU(env));
        }
    }

@@ -926,11 +902,12 @@ void qemu_cpu_kick(CPUState *cpu)
 void qemu_cpu_kick_self(void)
 {
 #ifndef _WIN32
-    assert(current_cpu);
+    assert(cpu_single_env);
+    CPUState *cpu_single_cpu = ENV_GET_CPU(cpu_single_env);

-    if (!current_cpu->thread_kicked) {
-        qemu_cpu_kick_thread(current_cpu);
-        current_cpu->thread_kicked = true;
+    if (!cpu_single_cpu->thread_kicked) {
+        qemu_cpu_kick_thread(cpu_single_cpu);
+        cpu_single_cpu->thread_kicked = true;
    }
 #else
    abort();
@@ -944,7 +921,7 @@ bool qemu_cpu_is_self(CPUState *cpu)

 static bool qemu_in_vcpu_thread(void)
 {
-    return current_cpu && qemu_cpu_is_self(current_cpu);
+    return cpu_single_env && qemu_cpu_is_self(ENV_GET_CPU(cpu_single_env));
 }

 void qemu_mutex_lock_iothread(void)
@@ -954,7 +931,7 @@ void qemu_mutex_lock_iothread(void)
    } else {
        iothread_requesting_mutex = true;
        if (qemu_mutex_trylock(&qemu_global_mutex)) {
-            qemu_cpu_kick_thread(first_cpu);
+            qemu_cpu_kick_thread(ENV_GET_CPU(first_cpu));
            qemu_mutex_lock(&qemu_global_mutex);
        }
        iothread_requesting_mutex = false;
@@ -969,13 +946,14 @@ void qemu_mutex_unlock_iothread(void)

 static int all_vcpus_paused(void)
 {
-    CPUState *cpu = first_cpu;
+    CPUArchState *penv = first_cpu;

-    while (cpu) {
-        if (!cpu->stopped) {
+    while (penv) {
+        CPUState *pcpu = ENV_GET_CPU(penv);
+        if (!pcpu->stopped) {
            return 0;
        }
-        cpu = cpu->next_cpu;
+        penv = penv->next_cpu;
    }

    return 1;
@@ -983,23 +961,25 @@ static int all_vcpus_paused(void)

 void pause_all_vcpus(void)
 {
-    CPUState *cpu = first_cpu;
+    CPUArchState *penv = first_cpu;

    qemu_clock_enable(vm_clock, false);
-    while (cpu) {
-        cpu->stop = true;
-        qemu_cpu_kick(cpu);
-        cpu = cpu->next_cpu;
+    while (penv) {
+        CPUState *pcpu = ENV_GET_CPU(penv);
+        pcpu->stop = true;
+        qemu_cpu_kick(pcpu);
+        penv = penv->next_cpu;
    }

    if (qemu_in_vcpu_thread()) {
        cpu_stop_current();
        if (!kvm_enabled()) {
-            cpu = first_cpu;
-            while (cpu) {
-                cpu->stop = false;
-                cpu->stopped = true;
-                cpu = cpu->next_cpu;
+            penv = first_cpu;
+            while (penv) {
+                CPUState *pcpu = ENV_GET_CPU(penv);
+                pcpu->stop = false;
+                pcpu->stopped = true;
+                penv = penv->next_cpu;
            }
            return;
        }
@@ -1007,10 +987,10 @@ void pause_all_vcpus(void)

    while (!all_vcpus_paused()) {
        qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
-        cpu = first_cpu;
-        while (cpu) {
-            qemu_cpu_kick(cpu);
-            cpu = cpu->next_cpu;
+        penv = first_cpu;
+        while (penv) {
+            qemu_cpu_kick(ENV_GET_CPU(penv));
+            penv = penv->next_cpu;
        }
    }
 }
@@ -1024,12 +1004,13 @@ void cpu_resume(CPUState *cpu)

 void resume_all_vcpus(void)
 {
-    CPUState *cpu = first_cpu;
+    CPUArchState *penv = first_cpu;

    qemu_clock_enable(vm_clock, true);
-    while (cpu) {
-        cpu_resume(cpu);
-        cpu = cpu->next_cpu;
+    while (penv) {
+        CPUState *pcpu = ENV_GET_CPU(penv);
+        cpu_resume(pcpu);
+        penv = penv->next_cpu;
    }
 }

@@ -1056,55 +1037,63 @@ static void qemu_tcg_init_vcpu(CPUState *cpu)
    }
 }

-static void qemu_kvm_start_vcpu(CPUState *cpu)
+static void qemu_kvm_start_vcpu(CPUArchState *env)
 {
+    CPUState *cpu = ENV_GET_CPU(env);
+
    cpu->thread = g_malloc0(sizeof(QemuThread));
    cpu->halt_cond = g_malloc0(sizeof(QemuCond));
    qemu_cond_init(cpu->halt_cond);
-    qemu_thread_create(cpu->thread, qemu_kvm_cpu_thread_fn, cpu,
+    qemu_thread_create(cpu->thread, qemu_kvm_cpu_thread_fn, env,
                       QEMU_THREAD_JOINABLE);
    while (!cpu->created) {
        qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
    }
 }

-static void qemu_dummy_start_vcpu(CPUState *cpu)
+static void qemu_dummy_start_vcpu(CPUArchState *env)
 {
+    CPUState *cpu = ENV_GET_CPU(env);
+
    cpu->thread = g_malloc0(sizeof(QemuThread));
    cpu->halt_cond = g_malloc0(sizeof(QemuCond));
    qemu_cond_init(cpu->halt_cond);
-    qemu_thread_create(cpu->thread, qemu_dummy_cpu_thread_fn, cpu,
+    qemu_thread_create(cpu->thread, qemu_dummy_cpu_thread_fn, env,
                       QEMU_THREAD_JOINABLE);
    while (!cpu->created) {
        qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
    }
 }

-void qemu_init_vcpu(CPUState *cpu)
+void qemu_init_vcpu(void *_env)
 {
+    CPUArchState *env = _env;
+    CPUState *cpu = ENV_GET_CPU(env);
+
    cpu->nr_cores = smp_cores;
    cpu->nr_threads = smp_threads;
    cpu->stopped = true;
    if (kvm_enabled()) {
-        qemu_kvm_start_vcpu(cpu);
+        qemu_kvm_start_vcpu(env);
    } else if (tcg_enabled()) {
        qemu_tcg_init_vcpu(cpu);
    } else {
-        qemu_dummy_start_vcpu(cpu);
+        qemu_dummy_start_vcpu(env);
    }
 }

 void cpu_stop_current(void)
 {
-    if (current_cpu) {
-        current_cpu->stop = false;
-        current_cpu->stopped = true;
-        cpu_exit(current_cpu);
+    if (cpu_single_env) {
+        CPUState *cpu_single_cpu = ENV_GET_CPU(cpu_single_env);
+        cpu_single_cpu->stop = false;
+        cpu_single_cpu->stopped = true;
+        cpu_exit(cpu_single_env);
        qemu_cond_signal(&qemu_pause_cond);
    }
 }

-int vm_stop(RunState state)
+void vm_stop(RunState state)
 {
    if (qemu_in_vcpu_thread()) {
        qemu_system_vmstop_request(state);
@@ -1113,23 +1102,19 @@ int vm_stop(RunState state)
         * vm_stop() has been requested.
         */
        cpu_stop_current();
-        return 0;
+        return;
    }
-
-    return do_vm_stop(state);
+    do_vm_stop(state);
 }

 /* does a state transition even if the VM is already stopped,
   current state is forgotten forever */
-int vm_stop_force_state(RunState state)
+void vm_stop_force_state(RunState state)
 {
    if (runstate_is_running()) {
-        return vm_stop(state);
+        vm_stop(state);
    } else {
        runstate_set(state);
-        /* Make sure to return an error if the flush in a previous vm_stop()
-         * failed. */
-        return bdrv_flush_all();
    }
 }

@@ -1182,16 +1167,16 @@ static void tcg_exec_all(void)
        next_cpu = first_cpu;
    }
    for (; next_cpu != NULL && !exit_request; next_cpu = next_cpu->next_cpu) {
-        CPUState *cpu = next_cpu;
-        CPUArchState *env = cpu->env_ptr;
+        CPUArchState *env = next_cpu;
+        CPUState *cpu = ENV_GET_CPU(env);

        qemu_clock_enable(vm_clock,
-                          (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
+                          (env->singlestep_enabled & SSTEP_NOTIMER) == 0);

        if (cpu_can_run(cpu)) {
            r = tcg_cpu_exec(env);
            if (r == EXCP_DEBUG) {
-                cpu_handle_guest_debug(cpu);
+                cpu_handle_guest_debug(env);
                break;
            }
        } else if (cpu->stop || cpu->stopped) {
@@ -1203,10 +1188,12 @@ static void tcg_exec_all(void)

 void set_numa_modes(void)
 {
+    CPUArchState *env;
    CPUState *cpu;
    int i;

-    for (cpu = first_cpu; cpu != NULL; cpu = cpu->next_cpu) {
+    for (env = first_cpu; env != NULL; env = env->next_cpu) {
+        cpu = ENV_GET_CPU(env);
        for (i = 0; i < nb_numa_nodes; i++) {
            if (test_bit(cpu->cpu_index, node_cpumask[i])) {
                cpu->numa_node = i;
@@ -1226,30 +1213,18 @@ void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
 CpuInfoList *qmp_query_cpus(Error **errp)
 {
    CpuInfoList *head = NULL, *cur_item = NULL;
-    CPUState *cpu;
+    CPUArchState *env;

-    for (cpu = first_cpu; cpu != NULL; cpu = cpu->next_cpu) {
+    for (env = first_cpu; env != NULL; env = env->next_cpu) {
+        CPUState *cpu = ENV_GET_CPU(env);
        CpuInfoList *info;
-#if defined(TARGET_I386)
-        X86CPU *x86_cpu = X86_CPU(cpu);
-        CPUX86State *env = &x86_cpu->env;
-#elif defined(TARGET_PPC)
-        PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu);
-        CPUPPCState *env = &ppc_cpu->env;
-#elif defined(TARGET_SPARC)
-        SPARCCPU *sparc_cpu = SPARC_CPU(cpu);
-        CPUSPARCState *env = &sparc_cpu->env;
-#elif defined(TARGET_MIPS)
-        MIPSCPU *mips_cpu = MIPS_CPU(cpu);
-        CPUMIPSState *env = &mips_cpu->env;
-#endif

-        cpu_synchronize_state(cpu);
+        cpu_synchronize_state(env);

        info = g_malloc0(sizeof(*info));
        info->value = g_malloc0(sizeof(*info->value));
        info->value->CPU = cpu->cpu_index;
-        info->value->current = (cpu == first_cpu);
+        info->value->current = (env == first_cpu);
        info->value->halted = cpu->halted;
        info->value->thread_id = cpu->thread_id;
 #if defined(TARGET_I386)
@@ -1285,6 +1260,7 @@ void qmp_memsave(int64_t addr, int64_t size, const char *filename,
 {
    FILE *f;
    uint32_t l;
+    CPUArchState *env;
    CPUState *cpu;
    uint8_t buf[1024];

@@ -1298,10 +1274,11 @@ void qmp_memsave(int64_t addr, int64_t size, const char *filename,
                  "a CPU number");
        return;
    }
+    env = cpu->env_ptr;

    f = fopen(filename, "wb");
    if (!f) {
-        error_setg_file_open(errp, errno, filename);
+        error_set(errp, QERR_OPEN_FILE_FAILED, filename);
        return;
    }

@@ -1309,7 +1286,7 @@ void qmp_memsave(int64_t addr, int64_t size, const char *filename,
        l = sizeof(buf);
        if (l > size)
            l = size;
-        cpu_memory_rw_debug(cpu, addr, buf, l, 0);
+        cpu_memory_rw_debug(env, addr, buf, l, 0);
        if (fwrite(buf, 1, l, f) != l) {
            error_set(errp, QERR_IO_ERROR);
            goto exit;
@@ -1331,7 +1308,7 @@ void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,

    f = fopen(filename, "wb");
    if (!f) {
-        error_setg_file_open(errp, errno, filename);
+        error_set(errp, QERR_OPEN_FILE_FAILED, filename);
        return;
    }

@@ -1355,14 +1332,11 @@ exit:
 void qmp_inject_nmi(Error **errp)
 {
 #if defined(TARGET_I386)
-    CPUState *cs;
-
-    for (cs = first_cpu; cs != NULL; cs = cs->next_cpu) {
-        X86CPU *cpu = X86_CPU(cs);
-        CPUX86State *env = &cpu->env;
+    CPUArchState *env;

+    for (env = first_cpu; env != NULL; env = env->next_cpu) {
        if (!env->apic_state) {
-            cpu_interrupt(cs, CPU_INTERRUPT_NMI);
+            cpu_interrupt(CPU(x86_env_get_cpu(env)), CPU_INTERRUPT_NMI);
        } else {
            apic_deliver_nmi(env->apic_state);
        }
--- a/cputlb.c
+++ b/cputlb.c
@@ -158,17 +158,6 @@ void tlb_reset_dirty_range(CPUTLBEntry *tlb_entry, uintptr_t start,
    }
 }

-static inline ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
-{
-    ram_addr_t ram_addr;
-
-    if (qemu_ram_addr_from_host(ptr, &ram_addr) == NULL) {
-        fprintf(stderr, "Bad ram pointer %p\n", ptr);
-        abort();
-    }
-    return ram_addr;
-}
-
 static inline void tlb_update_dirty(CPUTLBEntry *tlb_entry)
 {
    ram_addr_t ram_addr;
@@ -186,13 +175,11 @@ static inline void tlb_update_dirty(CPUTLBEntry *tlb_entry)

 void cpu_tlb_reset_dirty_all(ram_addr_t start1, ram_addr_t length)
 {
-    CPUState *cpu;
    CPUArchState *env;

-    for (cpu = first_cpu; cpu != NULL; cpu = cpu->next_cpu) {
+    for (env = first_cpu; env != NULL; env = env->next_cpu) {
        int mmu_idx;

-        env = cpu->env_ptr;
        for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
            unsigned int i;

@@ -261,37 +248,36 @@ void tlb_set_page(CPUArchState *env, target_ulong vaddr,
    target_ulong code_address;
    uintptr_t addend;
    CPUTLBEntry *te;
-    hwaddr iotlb, xlat, sz;
+    hwaddr iotlb;

    assert(size >= TARGET_PAGE_SIZE);
    if (size != TARGET_PAGE_SIZE) {
        tlb_add_large_page(env, vaddr, size);
    }
-
-    sz = size;
-    section = address_space_translate_for_iotlb(&address_space_memory, paddr,
-                                                &xlat, &sz);
-    assert(sz >= TARGET_PAGE_SIZE);
-
+    section = phys_page_find(address_space_memory.dispatch, paddr >> TARGET_PAGE_BITS);
 #if defined(DEBUG_TLB)
    printf("tlb_set_page: vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx
-           " prot=%x idx=%d\n",
-           vaddr, paddr, prot, mmu_idx);
+           " prot=%x idx=%d pd=0x%08lx\n",
+           vaddr, paddr, prot, mmu_idx, pd);
 #endif

    address = vaddr;
-    if (!memory_region_is_ram(section->mr) && !memory_region_is_romd(section->mr)) {
-        /* IO memory case */
+    if (!(memory_region_is_ram(section->mr) ||
+          memory_region_is_romd(section->mr))) {
+        /* IO memory case (romd handled later) */
        address |= TLB_MMIO;
-        addend = 0;
+    }
+    if (memory_region_is_ram(section->mr) ||
+        memory_region_is_romd(section->mr)) {
+        addend = (uintptr_t)memory_region_get_ram_ptr(section->mr)
+        + memory_region_section_addr(section, paddr);
    } else {
-        /* TLB_MMIO for rom/romd handled below */
-        addend = (uintptr_t)memory_region_get_ram_ptr(section->mr) + xlat;
+        addend = 0;
    }

    code_address = address;
-    iotlb = memory_region_section_get_iotlb(env, section, vaddr, paddr, xlat,
-                                            prot, &address);
+    iotlb = memory_region_section_get_iotlb(env, section, vaddr, paddr, prot,
+                                            &address);

    index = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
    env->iotlb[mmu_idx][index] = iotlb - vaddr;
@@ -314,7 +300,9 @@ void tlb_set_page(CPUArchState *env, target_ulong vaddr,
            /* Write access calls the I/O callback.  */
            te->addr_write = address | TLB_MMIO;
        } else if (memory_region_is_ram(section->mr)
-                   && !cpu_physical_memory_is_dirty(section->mr->ram_addr + xlat)) {
+                   && !cpu_physical_memory_is_dirty(
+                           section->mr->ram_addr
+                           + memory_region_section_addr(section, paddr))) {
            te->addr_write = address | TLB_NOTDIRTY;
        } else {
            te->addr_write = address;
@@ -344,15 +332,12 @@ tb_page_addr_t get_page_addr_code(CPUArchState *env1, target_ulong addr)
    pd = env1->iotlb[mmu_idx][page_index] & ~TARGET_PAGE_MASK;
    mr = iotlb_to_region(pd);
    if (memory_region_is_unassigned(mr)) {
-        CPUState *cpu = ENV_GET_CPU(env1);
-        CPUClass *cc = CPU_GET_CLASS(cpu);
-
-        if (cc->do_unassigned_access) {
-            cc->do_unassigned_access(cpu, addr, false, true, 0, 4);
-        } else {
-            cpu_abort(env1, "Trying to execute code outside RAM or ROM at 0x"
-                      TARGET_FMT_lx "\n", addr);
-        }
+#if defined(TARGET_ALPHA) || defined(TARGET_MIPS) || defined(TARGET_SPARC)
+        cpu_unassigned_access(env1, addr, 0, 1, 0, 4);
+#else
+        cpu_abort(env1, "Trying to execute code outside RAM or ROM at 0x"
+                  TARGET_FMT_lx "\n", addr);
+#endif
    }
    p = (void *)((uintptr_t)addr + env1->tlb_table[mmu_idx][page_index].addend);
    return qemu_ram_addr_from_host_nofail(p);
--- a/default-configs/alpha-softmmu.mak
+++ b/default-configs/alpha-softmmu.mak
@@ -14,4 +14,3 @@ CONFIG_VMWARE_VGA=y
 CONFIG_IDE_CMD646=y
 CONFIG_I8259=y
 CONFIG_MC146818RTC=y
-CONFIG_ISA_TESTDEV=y
--- a/default-configs/arm-softmmu.mak
+++ b/default-configs/arm-softmmu.mak
@@ -34,9 +34,9 @@ CONFIG_PFLASH_CFI02=y
 CONFIG_MICRODRIVE=y
 CONFIG_USB_MUSB=y

+CONFIG_ARM9MPCORE=y
 CONFIG_ARM11MPCORE=y
-CONFIG_A9MPCORE=y
-CONFIG_A15MPCORE=y
+CONFIG_ARM15MPCORE=y

 CONFIG_ARM_GIC=y
 CONFIG_ARM_GIC_KVM=$(CONFIG_KVM)
--- a/default-configs/i386-softmmu.mak
+++ b/default-configs/i386-softmmu.mak
@@ -28,11 +28,13 @@ CONFIG_APPLESMC=y
 CONFIG_I8259=y
 CONFIG_PFLASH_CFI01=y
 CONFIG_TPM_TIS=$(CONFIG_TPM)
-CONFIG_PCI_HOTPLUG_OLD=y
+CONFIG_PCI_HOTPLUG=y
 CONFIG_MC146818RTC=y
 CONFIG_PAM=y
 CONFIG_PCI_PIIX=y
+CONFIG_PCI_HOTPLUG=y
 CONFIG_WDT_IB700=y
+CONFIG_PC_SYSFW=y
 CONFIG_XEN_I386=$(CONFIG_XEN)
 CONFIG_ISA_DEBUG=y
 CONFIG_ISA_TESTDEV=y
--- a/default-configs/mips-softmmu.mak
+++ b/default-configs/mips-softmmu.mak
@@ -3,6 +3,7 @@
 include pci.mak
 include sound.mak
 include usb.mak
+CONFIG_ISA_MMIO=y
 CONFIG_ESP=y
 CONFIG_VGA=y
 CONFIG_VGA_PCI=y
@@ -33,5 +34,3 @@ CONFIG_I8259=y
 CONFIG_JAZZ_LED=y
 CONFIG_MC146818RTC=y
 CONFIG_VT82C686=y
-CONFIG_ISA_TESTDEV=y
-CONFIG_EMPTY_SLOT=y
--- a/default-configs/mips64-softmmu.mak
+++ b/default-configs/mips64-softmmu.mak
@@ -3,6 +3,7 @@
 include pci.mak
 include sound.mak
 include usb.mak
+CONFIG_ISA_MMIO=y
 CONFIG_ESP=y
 CONFIG_VGA=y
 CONFIG_VGA_PCI=y
@@ -33,5 +34,3 @@ CONFIG_I8259=y
 CONFIG_JAZZ_LED=y
 CONFIG_MC146818RTC=y
 CONFIG_VT82C686=y
-CONFIG_ISA_TESTDEV=y
-CONFIG_EMPTY_SLOT=y
--- a/default-configs/mips64el-softmmu.mak
+++ b/default-configs/mips64el-softmmu.mak
@@ -3,6 +3,7 @@
 include pci.mak
 include sound.mak
 include usb.mak
+CONFIG_ISA_MMIO=y
 CONFIG_ESP=y
 CONFIG_VGA=y
 CONFIG_VGA_PCI=y
@@ -35,5 +36,3 @@ CONFIG_I8259=y
 CONFIG_JAZZ_LED=y
 CONFIG_MC146818RTC=y
 CONFIG_VT82C686=y
-CONFIG_ISA_TESTDEV=y
-CONFIG_EMPTY_SLOT=y
--- a/default-configs/mipsel-softmmu.mak
+++ b/default-configs/mipsel-softmmu.mak
@@ -3,6 +3,7 @@
 include pci.mak
 include sound.mak
 include usb.mak
+CONFIG_ISA_MMIO=y
 CONFIG_ESP=y
 CONFIG_VGA=y
 CONFIG_VGA_PCI=y
@@ -33,5 +34,3 @@ CONFIG_I8259=y
 CONFIG_JAZZ_LED=y
 CONFIG_MC146818RTC=y
 CONFIG_VT82C686=y
-CONFIG_ISA_TESTDEV=y
-CONFIG_EMPTY_SLOT=y
--- a/default-configs/pci.mak
+++ b/default-configs/pci.mak
@@ -29,4 +29,3 @@ CONFIG_SERIAL_PCI=y
 CONFIG_IPACK=y
 CONFIG_WDT_IB6300ESB=y
 CONFIG_PCI_TESTDEV=y
-CONFIG_NVME_PCI=y
--- a/default-configs/ppc-softmmu.mak
+++ b/default-configs/ppc-softmmu.mak
@@ -42,8 +42,6 @@ CONFIG_I8259=y
 CONFIG_XILINX=y
 CONFIG_XILINX_ETHLITE=y
 CONFIG_OPENPIC=y
-CONFIG_E500=y
-CONFIG_OPENPIC_KVM=$(and $(CONFIG_E500),$(CONFIG_KVM))
+CONFIG_E500=$(CONFIG_FDT)
 # For PReP
 CONFIG_MC146818RTC=y
-CONFIG_ISA_TESTDEV=y
--- a/default-configs/ppc64-softmmu.mak
+++ b/default-configs/ppc64-softmmu.mak
@@ -42,17 +42,9 @@ CONFIG_I8259=y
 CONFIG_XILINX=y
 CONFIG_XILINX_ETHLITE=y
 CONFIG_OPENPIC=y
-CONFIG_PSERIES=y
-CONFIG_E500=y
-CONFIG_OPENPIC_KVM=$(and $(CONFIG_E500),$(CONFIG_KVM))
+CONFIG_PSERIES=$(CONFIG_FDT)
+CONFIG_E500=$(CONFIG_FDT)
 # For pSeries
-CONFIG_XICS=$(CONFIG_PSERIES)
+CONFIG_PCI_HOTPLUG=y
 # For PReP
-CONFIG_I82378=y
-CONFIG_I8259=y
-CONFIG_I8254=y
-CONFIG_PCSPK=y
-CONFIG_I82374=y
-CONFIG_I8257=y
 CONFIG_MC146818RTC=y
-CONFIG_ISA_TESTDEV=y
--- a/default-configs/ppcemb-softmmu.mak
+++ b/default-configs/ppcemb-softmmu.mak
@@ -37,8 +37,6 @@ CONFIG_I8259=y
 CONFIG_XILINX=y
 CONFIG_XILINX_ETHLITE=y
 CONFIG_OPENPIC=y
-CONFIG_E500=y
-CONFIG_OPENPIC_KVM=$(and $(CONFIG_E500),$(CONFIG_KVM))
+CONFIG_E500=$(CONFIG_FDT)
 # For PReP
 CONFIG_MC146818RTC=y
-CONFIG_ISA_TESTDEV=y
--- a/default-configs/sh4-softmmu.mak
+++ b/default-configs/sh4-softmmu.mak
@@ -5,14 +5,7 @@ include usb.mak
 CONFIG_SERIAL=y
 CONFIG_PTIMER=y
 CONFIG_PFLASH_CFI02=y
+CONFIG_ISA_MMIO=y
 CONFIG_SH4=y
 CONFIG_IDE_MMIO=y
 CONFIG_SM501=y
-CONFIG_ISA_TESTDEV=y
-CONFIG_I82378=y
-CONFIG_I8259=y
-CONFIG_I8254=y
-CONFIG_PCSPK=y
-CONFIG_I82374=y
-CONFIG_I8257=y
-CONFIG_MC146818RTC=y
--- a/default-configs/sh4eb-softmmu.mak
+++ b/default-configs/sh4eb-softmmu.mak
@@ -5,14 +5,7 @@ include usb.mak
 CONFIG_SERIAL=y
 CONFIG_PTIMER=y
 CONFIG_PFLASH_CFI02=y
+CONFIG_ISA_MMIO=y
 CONFIG_SH4=y
 CONFIG_IDE_MMIO=y
 CONFIG_SM501=y
-CONFIG_ISA_TESTDEV=y
-CONFIG_I82378=y
-CONFIG_I8259=y
-CONFIG_I8254=y
-CONFIG_PCSPK=y
-CONFIG_I82374=y
-CONFIG_I8257=y
-CONFIG_MC146818RTC=y
--- a/default-configs/sparc64-softmmu.mak
+++ b/default-configs/sparc64-softmmu.mak
@@ -15,4 +15,3 @@ CONFIG_IDE_ISA=y
 CONFIG_IDE_CMD646=y
 CONFIG_PCI_APB=y
 CONFIG_MC146818RTC=y
-CONFIG_ISA_TESTDEV=y
--- a/default-configs/x86_64-softmmu.mak
+++ b/default-configs/x86_64-softmmu.mak
@@ -28,11 +28,13 @@ CONFIG_APPLESMC=y
 CONFIG_I8259=y
 CONFIG_PFLASH_CFI01=y
 CONFIG_TPM_TIS=$(CONFIG_TPM)
-CONFIG_PCI_HOTPLUG_OLD=y
+CONFIG_PCI_HOTPLUG=y
 CONFIG_MC146818RTC=y
 CONFIG_PAM=y
 CONFIG_PCI_PIIX=y
+CONFIG_PCI_HOTPLUG=y
 CONFIG_WDT_IB700=y
+CONFIG_PC_SYSFW=y
 CONFIG_XEN_I386=$(CONFIG_XEN)
 CONFIG_ISA_DEBUG=y
 CONFIG_ISA_TESTDEV=y
--- a/device_tree.c
+++ b/device_tree.c
@@ -21,7 +21,6 @@
 #include "config.h"
 #include "qemu-common.h"
 #include "sysemu/device_tree.h"
-#include "sysemu/sysemu.h"
 #include "hw/loader.h"
 #include "qemu/option.h"
 #include "qemu/config-file.h"
@@ -214,7 +213,7 @@ uint32_t qemu_devtree_get_phandle(void *fdt, const char *path)
    uint32_t r;

    r = fdt_get_phandle(fdt, findnode_nofail(fdt, path));
-    if (r == 0) {
+    if (r <= 0) {
        fprintf(stderr, "%s: Couldn't get phandle for %s: %s\n", __func__,
                path, fdt_strerror(r));
        exit(1);
@@ -240,8 +239,15 @@ uint32_t qemu_devtree_alloc_phandle(void *fdt)
     * which phandle id to start allocting phandles.
     */
    if (!phandle) {
-        phandle = qemu_opt_get_number(qemu_get_machine_opts(),
-                                      "phandle_start", 0);
+        QemuOpts *machine_opts;
+        machine_opts = qemu_opts_find(qemu_find_opts("machine"), 0);
+        if (machine_opts) {
+            const char *phandle_start;
+            phandle_start = qemu_opt_get(machine_opts, "phandle_start");
+            if (phandle_start) {
+                phandle = strtoul(phandle_start, NULL, 0);
+            }
+        }
    }

    if (!phandle) {
@@ -301,43 +307,15 @@ int qemu_devtree_add_subnode(void *fdt, const char *name)

 void qemu_devtree_dumpdtb(void *fdt, int size)
 {
-    const char *dumpdtb = qemu_opt_get(qemu_get_machine_opts(), "dumpdtb");
+    QemuOpts *machine_opts;

-    if (dumpdtb) {
-        /* Dump the dtb to a file and quit */
-        exit(g_file_set_contents(dumpdtb, fdt, size, NULL) ? 0 : 1);
-    }
-}
-
-int qemu_devtree_setprop_sized_cells_from_array(void *fdt,
-                                                const char *node_path,
-                                                const char *property,
-                                                int numvalues,
-                                                uint64_t *values)
-{
-    uint32_t *propcells;
-    uint64_t value;
-    int cellnum, vnum, ncells;
-    uint32_t hival;
-
-    propcells = g_new0(uint32_t, numvalues * 2);
-
-    cellnum = 0;
-    for (vnum = 0; vnum < numvalues; vnum++) {
-        ncells = values[vnum * 2];
-        if (ncells != 1 && ncells != 2) {
-            return -1;
+    machine_opts = qemu_opts_find(qemu_find_opts("machine"), 0);
+    if (machine_opts) {
+        const char *dumpdtb = qemu_opt_get(machine_opts, "dumpdtb");
+        if (dumpdtb) {
+            /* Dump the dtb to a file and quit */
+            exit(g_file_set_contents(dumpdtb, fdt, size, NULL) ? 0 : 1);
        }
-        value = values[vnum * 2 + 1];
-        hival = cpu_to_be32(value >> 32);
-        if (ncells > 1) {
-            propcells[cellnum++] = hival;
-        } else if (hival != 0) {
-            return -1;
-        }
-        propcells[cellnum++] = cpu_to_be32(value);
    }

-    return qemu_devtree_setprop(fdt, node_path, property, propcells,
-                                cellnum * sizeof(uint32_t));
 }
--- a/disas.c
+++ b/disas.c
@@ -39,7 +39,7 @@ target_read_memory (bfd_vma memaddr,
 {
    CPUDebug *s = container_of(info, CPUDebug, info);

-    cpu_memory_rw_debug(ENV_GET_CPU(s->env), memaddr, myaddr, length, 0);
+    cpu_memory_rw_debug(s->env, memaddr, myaddr, length, 0);
    return 0;
 }

@@ -392,7 +392,7 @@ monitor_read_memory (bfd_vma memaddr, bfd_byte *myaddr, int length,
    if (monitor_disas_is_physical) {
        cpu_physical_memory_read(memaddr, myaddr, length);
    } else {
-        cpu_memory_rw_debug(ENV_GET_CPU(s->env), memaddr, myaddr, length, 0);
+        cpu_memory_rw_debug(s->env, memaddr,myaddr, length, 0);
    }
    return 0;
 }
--- a/dma-helpers.c
+++ b/dma-helpers.c
@@ -14,36 +14,41 @@

 /* #define DEBUG_IOMMU */

-int dma_memory_set(AddressSpace *as, dma_addr_t addr, uint8_t c, dma_addr_t len)
+static void do_dma_memory_set(AddressSpace *as,
+                              dma_addr_t addr, uint8_t c, dma_addr_t len)
 {
-    dma_barrier(as, DMA_DIRECTION_FROM_DEVICE);
-
 #define FILLBUF_SIZE 512
    uint8_t fillbuf[FILLBUF_SIZE];
    int l;
-    bool error = false;

    memset(fillbuf, c, FILLBUF_SIZE);
    while (len > 0) {
        l = len < FILLBUF_SIZE ? len : FILLBUF_SIZE;
-        error |= address_space_rw(as, addr, fillbuf, l, true);
+        address_space_rw(as, addr, fillbuf, l, true);
        len -= l;
        addr += l;
    }
-
-    return error;
 }

-void qemu_sglist_init(QEMUSGList *qsg, DeviceState *dev, int alloc_hint,
-                      AddressSpace *as)
+int dma_memory_set(DMAContext *dma, dma_addr_t addr, uint8_t c, dma_addr_t len)
+{
+    dma_barrier(dma, DMA_DIRECTION_FROM_DEVICE);
+
+    if (dma_has_iommu(dma)) {
+        return iommu_dma_memory_set(dma, addr, c, len);
+    }
+    do_dma_memory_set(dma->as, addr, c, len);
+
+    return 0;
+}
+
+void qemu_sglist_init(QEMUSGList *qsg, int alloc_hint, DMAContext *dma)
 {
    qsg->sg = g_malloc(alloc_hint * sizeof(ScatterGatherEntry));
    qsg->nsg = 0;
    qsg->nalloc = alloc_hint;
    qsg->size = 0;
-    qsg->as = as;
-    qsg->dev = dev;
-    object_ref(OBJECT(dev));
+    qsg->dma = dma;
 }

 void qemu_sglist_add(QEMUSGList *qsg, dma_addr_t base, dma_addr_t len)
@@ -60,7 +65,6 @@ void qemu_sglist_add(QEMUSGList *qsg, dma_addr_t base, dma_addr_t len)

 void qemu_sglist_destroy(QEMUSGList *qsg)
 {
-    object_unref(OBJECT(qsg->dev));
    g_free(qsg->sg);
    memset(qsg, 0, sizeof(*qsg));
 }
@@ -104,7 +108,7 @@ static void dma_bdrv_unmap(DMAAIOCB *dbs)
    int i;

    for (i = 0; i < dbs->iov.niov; ++i) {
-        dma_memory_unmap(dbs->sg->as, dbs->iov.iov[i].iov_base,
+        dma_memory_unmap(dbs->sg->dma, dbs->iov.iov[i].iov_base,
                         dbs->iov.iov[i].iov_len, dbs->dir,
                         dbs->iov.iov[i].iov_len);
    }
@@ -152,7 +156,7 @@ static void dma_bdrv_cb(void *opaque, int ret)
    while (dbs->sg_cur_index < dbs->sg->nsg) {
        cur_addr = dbs->sg->sg[dbs->sg_cur_index].base + dbs->sg_cur_byte;
        cur_len = dbs->sg->sg[dbs->sg_cur_index].len - dbs->sg_cur_byte;
-        mem = dma_memory_map(dbs->sg->as, cur_addr, &cur_len, dbs->dir);
+        mem = dma_memory_map(dbs->sg->dma, cur_addr, &cur_len, dbs->dir);
        if (!mem)
            break;
        qemu_iovec_add(&dbs->iov, mem, cur_len);
@@ -249,7 +253,7 @@ static uint64_t dma_buf_rw(uint8_t *ptr, int32_t len, QEMUSGList *sg,
    while (len > 0) {
        ScatterGatherEntry entry = sg->sg[sg_cur_index++];
        int32_t xfer = MIN(len, entry.len);
-        dma_memory_rw(sg->as, entry.base, ptr, xfer, dir);
+        dma_memory_rw(sg->dma, entry.base, ptr, xfer, dir);
        ptr += xfer;
        len -= xfer;
        resid -= xfer;
@@ -273,3 +277,158 @@ void dma_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie,
 {
    bdrv_acct_start(bs, cookie, sg->size, type);
 }
+
+bool iommu_dma_memory_valid(DMAContext *dma, dma_addr_t addr, dma_addr_t len,
+                            DMADirection dir)
+{
+    hwaddr paddr, plen;
+
+#ifdef DEBUG_IOMMU
+    fprintf(stderr, "dma_memory_check context=%p addr=0x" DMA_ADDR_FMT
+            " len=0x" DMA_ADDR_FMT " dir=%d\n", dma, addr, len, dir);
+#endif
+
+    while (len) {
+        if (dma->translate(dma, addr, &paddr, &plen, dir) != 0) {
+            return false;
+        }
+
+        /* The translation might be valid for larger regions. */
+        if (plen > len) {
+            plen = len;
+        }
+
+        len -= plen;
+        addr += plen;
+    }
+
+    return true;
+}
+
+int iommu_dma_memory_rw(DMAContext *dma, dma_addr_t addr,
+                        void *buf, dma_addr_t len, DMADirection dir)
+{
+    hwaddr paddr, plen;
+    int err;
+
+#ifdef DEBUG_IOMMU
+    fprintf(stderr, "dma_memory_rw context=%p addr=0x" DMA_ADDR_FMT " len=0x"
+            DMA_ADDR_FMT " dir=%d\n", dma, addr, len, dir);
+#endif
+
+    while (len) {
+        err = dma->translate(dma, addr, &paddr, &plen, dir);
+        if (err) {
+	    /*
+             * In case of failure on reads from the guest, we clean the
+             * destination buffer so that a device that doesn't test
+             * for errors will not expose qemu internal memory.
+	     */
+	    memset(buf, 0, len);
+            return -1;
+        }
+
+        /* The translation might be valid for larger regions. */
+        if (plen > len) {
+            plen = len;
+        }
+
+        address_space_rw(dma->as, paddr, buf, plen, dir == DMA_DIRECTION_FROM_DEVICE);
+
+        len -= plen;
+        addr += plen;
+        buf += plen;
+    }
+
+    return 0;
+}
+
+int iommu_dma_memory_set(DMAContext *dma, dma_addr_t addr, uint8_t c,
+                         dma_addr_t len)
+{
+    hwaddr paddr, plen;
+    int err;
+
+#ifdef DEBUG_IOMMU
+    fprintf(stderr, "dma_memory_set context=%p addr=0x" DMA_ADDR_FMT
+            " len=0x" DMA_ADDR_FMT "\n", dma, addr, len);
+#endif
+
+    while (len) {
+        err = dma->translate(dma, addr, &paddr, &plen,
+                             DMA_DIRECTION_FROM_DEVICE);
+        if (err) {
+            return err;
+        }
+
+        /* The translation might be valid for larger regions. */
+        if (plen > len) {
+            plen = len;
+        }
+
+        do_dma_memory_set(dma->as, paddr, c, plen);
+
+        len -= plen;
+        addr += plen;
+    }
+
+    return 0;
+}
+
+void dma_context_init(DMAContext *dma, AddressSpace *as, DMATranslateFunc translate,
+                      DMAMapFunc map, DMAUnmapFunc unmap)
+{
+#ifdef DEBUG_IOMMU
+    fprintf(stderr, "dma_context_init(%p, %p, %p, %p)\n",
+            dma, translate, map, unmap);
+#endif
+    dma->as = as;
+    dma->translate = translate;
+    dma->map = map;
+    dma->unmap = unmap;
+}
+
+void *iommu_dma_memory_map(DMAContext *dma, dma_addr_t addr, dma_addr_t *len,
+                           DMADirection dir)
+{
+    int err;
+    hwaddr paddr, plen;
+    void *buf;
+
+    if (dma->map) {
+        return dma->map(dma, addr, len, dir);
+    }
+
+    plen = *len;
+    err = dma->translate(dma, addr, &paddr, &plen, dir);
+    if (err) {
+        return NULL;
+    }
+
+    /*
+     * If this is true, the virtual region is contiguous,
+     * but the translated physical region isn't. We just
+     * clamp *len, much like address_space_map() does.
+     */
+    if (plen < *len) {
+        *len = plen;
+    }
+
+    buf = address_space_map(dma->as, paddr, &plen, dir == DMA_DIRECTION_FROM_DEVICE);
+    *len = plen;
+
+    return buf;
+}
+
+void iommu_dma_memory_unmap(DMAContext *dma, void *buffer, dma_addr_t len,
+                            DMADirection dir, dma_addr_t access_len)
+{
+    if (dma->unmap) {
+        dma->unmap(dma, buffer, len, dir, access_len);
+        return;
+    }
+
+    address_space_unmap(dma->as, buffer, len, dir == DMA_DIRECTION_FROM_DEVICE,
+                        access_len);
+
+}
--- a/docs/atomics.txt
+++ b/docs/atomics.txt
@@ -1,352 +0,0 @@
-CPUs perform independent memory operations effectively in random order.
-but this can be a problem for CPU-CPU interaction (including interactions
-between QEMU and the guest).  Multi-threaded programs use various tools
-to instruct the compiler and the CPU to restrict the order to something
-that is consistent with the expectations of the programmer.
-
-The most basic tool is locking.  Mutexes, condition variables and
-semaphores are used in QEMU, and should be the default approach to
-synchronization.  Anything else is considerably harder, but it's
-also justified more often than one would like.  The two tools that
-are provided by qemu/atomic.h are memory barriers and atomic operations.
-
-Macros defined by qemu/atomic.h fall in three camps:
-
- compiler barriers: barrier();
-
- weak atomic access and manual memory barriers: atomic_read(),
-  atomic_set(), smp_rmb(), smp_wmb(), smp_mb(), smp_read_barrier_depends();
-
- sequentially consistent atomic access: everything else.
-
-
-COMPILER MEMORY BARRIER
-=======================
-
-barrier() prevents the compiler from moving the memory accesses either
-side of it to the other side.  The compiler barrier has no direct effect
-on the CPU, which may then reorder things however it wishes.
-
-barrier() is mostly used within qemu/atomic.h itself.  On some
-architectures, CPU guarantees are strong enough that blocking compiler
-optimizations already ensures the correct order of execution.  In this
-case, qemu/atomic.h will reduce stronger memory barriers to simple
-compiler barriers.
-
-Still, barrier() can be useful when writing code that can be interrupted
-by signal handlers.
-
-
-SEQUENTIALLY CONSISTENT ATOMIC ACCESS
-=====================================
-
-Most of the operations in the qemu/atomic.h header ensure *sequential
-consistency*, where "the result of any execution is the same as if the
-operations of all the processors were executed in some sequential order,
-and the operations of each individual processor appear in this sequence
-in the order specified by its program".
-
-qemu/atomic.h provides the following set of atomic read-modify-write
-operations:
-
-    void atomic_inc(ptr)
-    void atomic_dec(ptr)
-    void atomic_add(ptr, val)
-    void atomic_sub(ptr, val)
-    void atomic_and(ptr, val)
-    void atomic_or(ptr, val)
-
-    typeof(*ptr) atomic_fetch_inc(ptr)
-    typeof(*ptr) atomic_fetch_dec(ptr)
-    typeof(*ptr) atomic_fetch_add(ptr, val)
-    typeof(*ptr) atomic_fetch_sub(ptr, val)
-    typeof(*ptr) atomic_fetch_and(ptr, val)
-    typeof(*ptr) atomic_fetch_or(ptr, val)
-    typeof(*ptr) atomic_xchg(ptr, val
-    typeof(*ptr) atomic_cmpxchg(ptr, old, new)
-
-all of which return the old value of *ptr.  These operations are
-polymorphic; they operate on any type that is as wide as an int.
-
-Sequentially consistent loads and stores can be done using:
-
-    atomic_fetch_add(ptr, 0) for loads
-    atomic_xchg(ptr, val) for stores
-
-However, they are quite expensive on some platforms, notably POWER and
-ARM.  Therefore, qemu/atomic.h provides two primitives with slightly
-weaker constraints:
-
-    typeof(*ptr) atomic_mb_read(ptr)
-    void         atomic_mb_set(ptr, val)
-
-The semantics of these primitives map to Java volatile variables,
-and are strongly related to memory barriers as used in the Linux
-kernel (see below).
-
-As long as you use atomic_mb_read and atomic_mb_set, accesses cannot
-be reordered with each other, and it is also not possible to reorder
-"normal" accesses around them.
-
-However, and this is the important difference between
-atomic_mb_read/atomic_mb_set and sequential consistency, it is important
-for both threads to access the same volatile variable.  It is not the
-case that everything visible to thread A when it writes volatile field f
-becomes visible to thread B after it reads volatile field g. The store
-and load have to "match" (i.e., be performed on the same volatile
-field) to achieve the right semantics.
-
-
-These operations operate on any type that is as wide as an int or smaller.
-
-
-WEAK ATOMIC ACCESS AND MANUAL MEMORY BARRIERS
-=============================================
-
-Compared to sequentially consistent atomic access, programming with
-weaker consistency models can be considerably more complicated.
-In general, if the algorithm you are writing includes both writes
-and reads on the same side, it is generally simpler to use sequentially
-consistent primitives.
-
-When using this model, variables are accessed with atomic_read() and
-atomic_set(), and restrictions to the ordering of accesses is enforced
-using the smp_rmb(), smp_wmb(), smp_mb() and smp_read_barrier_depends()
-memory barriers.
-
-atomic_read() and atomic_set() prevents the compiler from using
-optimizations that might otherwise optimize accesses out of existence
-on the one hand, or that might create unsolicited accesses on the other.
-In general this should not have any effect, because the same compiler
-barriers are already implied by memory barriers.  However, it is useful
-to do so, because it tells readers which variables are shared with
-other threads, and which are local to the current thread or protected
-by other, more mundane means.
-
-Memory barriers control the order of references to shared memory.
-They come in four kinds:
-
- smp_rmb() guarantees that all the LOAD operations specified before
-  the barrier will appear to happen before all the LOAD operations
-  specified after the barrier with respect to the other components of
-  the system.
-
-  In other words, smp_rmb() puts a partial ordering on loads, but is not
-  required to have any effect on stores.
-
- smp_wmb() guarantees that all the STORE operations specified before
-  the barrier will appear to happen before all the STORE operations
-  specified after the barrier with respect to the other components of
-  the system.
-
-  In other words, smp_wmb() puts a partial ordering on stores, but is not
-  required to have any effect on loads.
-
- smp_mb() guarantees that all the LOAD and STORE operations specified
-  before the barrier will appear to happen before all the LOAD and
-  STORE operations specified after the barrier with respect to the other
-  components of the system.
-
-  smp_mb() puts a partial ordering on both loads and stores.  It is
-  stronger than both a read and a write memory barrier; it implies both
-  smp_rmb() and smp_wmb(), but it also prevents STOREs coming before the
-  barrier from overtaking LOADs coming after the barrier and vice versa.
-
- smp_read_barrier_depends() is a weaker kind of read barrier.  On
-  most processors, whenever two loads are performed such that the
-  second depends on the result of the first (e.g., the first load
-  retrieves the address to which the second load will be directed),
-  the processor will guarantee that the first LOAD will appear to happen
-  before the second with respect to the other components of the system.
-  However, this is not always true---for example, it was not true on
-  Alpha processors.  Whenever this kind of access happens to shared
-  memory (that is not protected by a lock), a read barrier is needed,
-  and smp_read_barrier_depends() can be used instead of smp_rmb().
-
-  Note that the first load really has to have a _data_ dependency and not
-  a control dependency.  If the address for the second load is dependent
-  on the first load, but the dependency is through a conditional rather
-  than actually loading the address itself, then it's a _control_
-  dependency and a full read barrier or better is required.
-
-
-This is the set of barriers that is required *between* two atomic_read()
-and atomic_set() operations to achieve sequential consistency:
-
-                    |               2nd operation             |
-                    |-----------------------------------------|
-     1st operation  | (after last) | atomic_read | atomic_set |
-     ---------------+--------------+-------------+------------|
-     (before first) |              | none        | smp_wmb()  |
-     ---------------+--------------+-------------+------------|
-     atomic_read    | smp_rmb()    | smp_rmb()*  | **         |
-     ---------------+--------------+-------------+------------|
-     atomic_set     | none         | smp_mb()*** | smp_wmb()  |
-     ---------------+--------------+-------------+------------|
-
-       * Or smp_read_barrier_depends().
-
-      ** This requires a load-store barrier.  How to achieve this varies
-         depending on the machine, but in practice smp_rmb()+smp_wmb()
-         should have the desired effect.  For example, on PowerPC the
-         lwsync instruction is a combined load-load, load-store and
-         store-store barrier.
-
-     *** This requires a store-load barrier.  On most machines, the only
-         way to achieve this is a full barrier.
-
-
-You can see that the two possible definitions of atomic_mb_read()
-and atomic_mb_set() are the following:
-
-    1) atomic_mb_read(p)   = atomic_read(p); smp_rmb()
-       atomic_mb_set(p, v) = smp_wmb(); atomic_set(p, v); smp_mb()
-
-    2) atomic_mb_read(p)   = smp_mb() atomic_read(p); smp_rmb()
-       atomic_mb_set(p, v) = smp_wmb(); atomic_set(p, v);
-
-Usually the former is used, because smp_mb() is expensive and a program
-normally has more reads than writes.  Therefore it makes more sense to
-make atomic_mb_set() the more expensive operation.
-
-There are two common cases in which atomic_mb_read and atomic_mb_set
-generate too many memory barriers, and thus it can be useful to manually
-place barriers instead:
-
- when a data structure has one thread that is always a writer
-  and one thread that is always a reader, manual placement of
-  memory barriers makes the write side faster.  Furthermore,
-  correctness is easy to check for in this case using the "pairing"
-  trick that is explained below:
-
-     thread 1                                thread 1
-     -------------------------               ------------------------
-     (other writes)
-                                             smp_wmb()
-     atomic_mb_set(&a, x)                    atomic_set(&a, x)
-                                             smp_wmb()
-     atomic_mb_set(&b, y)                    atomic_set(&b, y)
-
-                                       =>
-     thread 2                                thread 2
-     -------------------------               ------------------------
-     y = atomic_mb_read(&b)                  y = atomic_read(&b)
-                                             smp_rmb()
-     x = atomic_mb_read(&a)                  x = atomic_read(&a)
-                                             smp_rmb()
-
- sometimes, a thread is accessing many variables that are otherwise
-  unrelated to each other (for example because, apart from the current
-  thread, exactly one other thread will read or write each of these
-  variables).  In this case, it is possible to "hoist" the implicit
-  barriers provided by atomic_mb_read() and atomic_mb_set() outside
-  a loop.  For example, the above definition atomic_mb_read() gives
-  the following transformation:
-
-     n = 0;                                  n = 0;
-     for (i = 0; i < 10; i++)          =>    for (i = 0; i < 10; i++)
-       n += atomic_mb_read(&a[i]);             n += atomic_read(&a[i]);
-                                             smp_rmb();
-
-  Similarly, atomic_mb_set() can be transformed as follows:
-  smp_mb():
-
-                                             smp_wmb();
-     for (i = 0; i < 10; i++)          =>    for (i = 0; i < 10; i++)
-       atomic_mb_set(&a[i], false);            atomic_set(&a[i], false);
-                                             smp_mb();
-
-
-The two tricks can be combined.  In this case, splitting a loop in
-two lets you hoist the barriers out of the loops _and_ eliminate the
-expensive smp_mb():
-
-                                             smp_wmb();
-     for (i = 0; i < 10; i++) {        =>    for (i = 0; i < 10; i++)
-       atomic_mb_set(&a[i], false);            atomic_set(&a[i], false);
-       atomic_mb_set(&b[i], false);          smb_wmb();
-     }                                       for (i = 0; i < 10; i++)
-                                               atomic_set(&a[i], false);
-                                             smp_mb();
-
-  The other thread can still use atomic_mb_read()/atomic_mb_set()
-
-
-Memory barrier pairing
----------------------
-
-A useful rule of thumb is that memory barriers should always, or almost
-always, be paired with another barrier.  In the case of QEMU, however,
-note that the other barrier may actually be in a driver that runs in
-the guest!
-
-For the purposes of pairing, smp_read_barrier_depends() and smp_rmb()
-both count as read barriers.  A read barriers shall pair with a write
-barrier or a full barrier; a write barrier shall pair with a read
-barrier or a full barrier.  A full barrier can pair with anything.
-For example:
-
-        thread 1             thread 2
-        ===============      ===============
-        a = 1;
-        smp_wmb();
-        b = 2;               x = b;
-                             smp_rmb();
-                             y = a;
-
-Note that the "writing" thread are accessing the variables in the
-opposite order as the "reading" thread.  This is expected: stores
-before the write barrier will normally match the loads after the
-read barrier, and vice versa.  The same is true for more than 2
-access and for data dependency barriers:
-
-        thread 1             thread 2
-        ===============      ===============
-        b[2] = 1;
-        smp_wmb();
-        x->i = 2;
-        smp_wmb();
-        a = x;               x = a;
-                             smp_read_barrier_depends();
-                             y = x->i;
-                             smp_read_barrier_depends();
-                             z = b[y];
-
-smp_wmb() also pairs with atomic_mb_read(), and smp_rmb() also pairs
-with atomic_mb_set().
-
-
-COMPARISON WITH LINUX KERNEL MEMORY BARRIERS
-============================================
-
-Here is a list of differences between Linux kernel atomic operations
-and memory barriers, and the equivalents in QEMU:
-
- atomic operations in Linux are always on a 32-bit int type and
-  use a boxed atomic_t type; atomic operations in QEMU are polymorphic
-  and use normal C types.
-
- atomic_read and atomic_set in Linux give no guarantee at all;
-  atomic_read and atomic_set in QEMU include a compiler barrier
-  (similar to the ACCESS_ONCE macro in Linux).
-
- most atomic read-modify-write operations in Linux return void;
-  in QEMU, all of them return the old value of the variable.
-
- different atomic read-modify-write operations in Linux imply
-  a different set of memory barriers; in QEMU, all of them enforce
-  sequential consistency, which means they imply full memory barriers
-  before and after the operation.
-
- Linux does not have an equivalent of atomic_mb_read() and
-  atomic_mb_set().  In particular, note that set_mb() is a little
-  weaker than atomic_mb_set().
-
-
-SOURCES
-=======
-
-* Documentation/memory-barriers.txt from the Linux kernel
-
-* "The JSR-133 Cookbook for Compiler Writers", available at
-  http://g.oswego.edu/dl/jmm/cookbook.html
--- a/docs/migration.txt
+++ b/docs/migration.txt
@@ -41,7 +41,7 @@ All these four migration protocols use the same infrastructure to
 save/restore state devices.  This infrastructure is shared with the
 savevm/loadvm functionality.

-=== State Live Migration ===
+=== State Live Migration ==

 This is used for RAM and block devices.  It is not yet ported to vmstate.
 <Fill more information here>
@@ -83,7 +83,7 @@ pointer that is passed to all functions.
 The important functions for us are put_buffer()/get_buffer() that
 allow to write/read a buffer into the QEMUFile.

-=== How to save the state of one device ===
+=== How to save the state of one device ==

 The state of a device is saved using intermediate buffers.  There are
 some helper functions to assist this saving.
@@ -97,7 +97,7 @@ associated with a series of fields saved.  The save_state always saves
 the state as the newer version.  But load_state sometimes is able to
 load state from an older version.

-=== Legacy way ===
+ === Legacy way ===

 This way is going to disappear as soon as all current users are ported to VMSTATE.

@@ -133,7 +133,7 @@ to interpret that definition to be able to load/save the state.  As
 the state is declared only once, it can't go out of sync in the
 save/load functions.

-An example (from hw/input/pckbd.c)
+An example (from hw/pckbd.c)

 static const VMStateDescription vmstate_kbd = {
    .name = "pckbd",
@@ -158,9 +158,9 @@ We registered this with:
 Note: talk about how vmstate <-> qdev interact, and what the instance ids mean.

 You can search for VMSTATE_* macros for lots of types used in QEMU in
-include/hw/hw.h.
+hw/hw.h.

-=== More about versions ===
+=== More about versions ==

 You can see that there are several version fields:

@@ -227,7 +227,7 @@ using a specific functionality, ....

 It is impossible to create a way to make migration from any version to
 any other version to work.  But we can do better than only allowing
-migration from older versions to newer ones.  For that fields that are
+migration from older versions no newer ones.  For that fields that are
 only needed sometimes, we add the idea of subsections.  A subsection
 is "like" a device vmstate, but with a particularity, it has a Boolean
 function that tells if that values are needed to be sent or not.  If
@@ -247,8 +247,7 @@ static bool ide_drive_pio_state_needed(void *opaque)
 {
    IDEState *s = opaque;

-    return ((s->status & DRQ_STAT) != 0)
-        || (s->bus->error_status & BM_STATUS_PIO_RETRY);
+    return (s->status & DRQ_STAT) != 0;
 }

 const VMStateDescription vmstate_ide_drive_pio_state = {
--- a/docs/q35-chipset.cfg
+++ b/docs/q35-chipset.cfg
@@ -91,29 +91,6 @@
  port = "4"
  chassis = "4"

-##
-# Example PCIe switch with two downstream ports
-#
-#[device "pcie-switch-upstream-port-1"]
-#  driver = "x3130-upstream"
-#  bus = "ich9-pcie-port-4"
-#  addr = "00.0"
-#
-#[device "pcie-switch-downstream-port-1-1"]
-#  driver = "xio3130-downstream"
-#  multifunction = "on"
-#  bus = "pcie-switch-upstream-port-1"
-#  addr = "00.0"
-#  port = "1"
-#  chassis = "5"
-#
-#[device "pcie-switch-downstream-port-1-2"]
-#  driver = "xio3130-downstream"
-#  multifunction = "on"
-#  bus = "pcie-switch-upstream-port-1"
-#  addr = "00.1"
-#  port = "1"
-#  chassis = "6"

 [device "ich9-ehci-1"]
  driver = "ich9-usb-ehci1"
--- a/docs/qapi-code-gen.txt
+++ b/docs/qapi-code-gen.txt
@@ -34,15 +34,9 @@ OrderedDicts so that ordering is preserved.
 There are two basic syntaxes used, type definitions and command definitions.

 The first syntax defines a type and is represented by a dictionary.  There are
-three kinds of user-defined types that are supported: complex types,
-enumeration types and union types.
+two kinds of types that are supported: complex user-defined types, and enums.

-Generally speaking, types definitions should always use CamelCase for the type
-names. Command names should be all lower case with words separated by a hyphen.
-
-=== Complex types ===
-
-A complex type is a dictionary containing a single key whose value is a
+A complex type is a dictionary containing a single key who's value is a
 dictionary.  This corresponds to a struct in C or an Object in JSON.  An
 example of a complex type is:

@@ -53,104 +47,13 @@ The use of '*' as a prefix to the name means the member is optional.  Optional
 members should always be added to the end of the dictionary to preserve
 backwards compatibility.

-=== Enumeration types ===
-
-An enumeration type is a dictionary containing a single key whose value is a
+An enumeration type is a dictionary containing a single key who's value is a
 list of strings.  An example enumeration is:

 { 'enum': 'MyEnum', 'data': [ 'value1', 'value2', 'value3' ] }

-=== Union types ===
-
-Union types are used to let the user choose between several different data
-types.  A union type is defined using a dictionary as explained in the
-following paragraphs.
-
-
-A simple union type defines a mapping from discriminator values to data types
-like in this example:
-
- { 'type': 'FileOptions', 'data': { 'filename': 'str' } }
- { 'type': 'Qcow2Options',
-   'data': { 'backing-file': 'str', 'lazy-refcounts': 'bool' } }
-
- { 'union': 'BlockdevOptions',
-   'data': { 'file': 'FileOptions',
-             'qcow2': 'Qcow2Options' } }
-
-In the QMP wire format, a simple union is represented by a dictionary that
-contains the 'type' field as a discriminator, and a 'data' field that is of the
-specified data type corresponding to the discriminator value:
-
- { "type": "qcow2", "data" : { "backing-file": "/some/place/my-image",
-                               "lazy-refcounts": true } }
-
-
-A union definition can specify a complex type as its base. In this case, the
-fields of the complex type are included as top-level fields of the union
-dictionary in the QMP wire format. An example definition is:
-
- { 'type': 'BlockdevCommonOptions', 'data': { 'readonly': 'bool' } }
- { 'union': 'BlockdevOptions',
-   'base': 'BlockdevCommonOptions',
-   'data': { 'raw': 'RawOptions',
-             'qcow2': 'Qcow2Options' } }
-
-And it looks like this on the wire:
-
- { "type": "qcow2",
-   "readonly": false,
-   "data" : { "backing-file": "/some/place/my-image",
-              "lazy-refcounts": true } }
-
-
-Flat union types avoid the nesting on the wire. They are used whenever a
-specific field of the base type is declared as the discriminator ('type' is
-then no longer generated). The discriminator must always be a string field.
-The above example can then be modified as follows:
-
- { 'type': 'BlockdevCommonOptions',
-   'data': { 'driver': 'str', 'readonly': 'bool' } }
- { 'union': 'BlockdevOptions',
-   'base': 'BlockdevCommonOptions',
-   'discriminator': 'driver',
-   'data': { 'raw': 'RawOptions',
-             'qcow2': 'Qcow2Options' } }
-
-Resulting in this JSON object:
-
- { "driver": "qcow2",
-   "readonly": false,
-   "backing-file": "/some/place/my-image",
-   "lazy-refcounts": true }
-
-
-A special type of unions are anonymous unions. They don't form a dictionary in
-the wire format but allow the direct use of different types in their place. As
-they aren't structured, they don't have any explicit discriminator but use
-the (QObject) data type of their value as an implicit discriminator. This means
-that they are restricted to using only one discriminator value per QObject
-type. For example, you cannot have two different complex types in an anonymous
-union, or two different integer types.
-
-Anonymous unions are declared using an empty dictionary as their discriminator.
-The discriminator values never appear on the wire, they are only used in the
-generated C code. Anonymous unions cannot have a base type.
-
- { 'union': 'BlockRef',
-   'discriminator': {},
-   'data': { 'definition': 'BlockdevOptions',
-             'reference': 'str' } }
-
-This example allows using both of the following example objects:
-
- { "file": "my_existing_block_device_id" }
- { "file": { "driver": "file",
-             "readonly": false,
-             'filename': "/tmp/mydisk.qcow2" } }
-
-
-=== Commands ===
+Generally speaking, complex types and enums should always use CamelCase for
+the type names.

 Commands are defined by using a list containing three members.  The first
 member is the command name, the second member is a dictionary containing
@@ -162,6 +65,8 @@ An example command is:
   'data': { 'arg1': 'str', '*arg2': 'str' },
   'returns': 'str' }

+Command names should be all lower case with words separated by a hyphen.
+

 == Code generation ==

--- a/docs/rdma.txt
+++ b/docs/rdma.txt
@@ -1,424 +0,0 @@
-(RDMA: Remote Direct Memory Access)
-RDMA Live Migration Specification, Version # 1
-==============================================
-Wiki: http://wiki.qemu.org/Features/RDMALiveMigration
-Github: git@github.com:hinesmr/qemu.git, 'rdma' branch
-
-Copyright (C) 2013 Michael R. Hines <mrhines@us.ibm.com>
-
-An *exhaustive* paper (2010) shows additional performance details
-linked on the QEMU wiki above.
-
-Contents:
-=========
-* Introduction
-* Before running
-* Running
-* Performance
-* RDMA Migration Protocol Description
-* Versioning and Capabilities
-* QEMUFileRDMA Interface
-* Migration of pc.ram
-* Error handling
-* TODO
-
-Introduction:
-=============
-
-RDMA helps make your migration more deterministic under heavy load because
-of the significantly lower latency and higher throughput over TCP/IP. This is
-because the RDMA I/O architecture reduces the number of interrupts and
-data copies by bypassing the host networking stack. In particular, a TCP-based
-migration, under certain types of memory-bound workloads, may take a more
-unpredicatable amount of time to complete the migration if the amount of
-memory tracked during each live migration iteration round cannot keep pace
-with the rate of dirty memory produced by the workload.
-
-RDMA currently comes in two flavors: both Ethernet based (RoCE, or RDMA
-over Converged Ethernet) as well as Infiniband-based. This implementation of
-migration using RDMA is capable of using both technologies because of
-the use of the OpenFabrics OFED software stack that abstracts out the
-programming model irrespective of the underlying hardware.
-
-Refer to openfabrics.org or your respective RDMA hardware vendor for
-an understanding on how to verify that you have the OFED software stack
-installed in your environment. You should be able to successfully link
-against the "librdmacm" and "libibverbs" libraries and development headers
-for a working build of QEMU to run successfully using RDMA Migration.
-
-BEFORE RUNNING:
-===============
-
-Use of RDMA during migration requires pinning and registering memory
-with the hardware. This means that memory must be physically resident
-before the hardware can transmit that memory to another machine.
-If this is not acceptable for your application or product, then the use
-of RDMA migration may in fact be harmful to co-located VMs or other
-software on the machine if there is not sufficient memory available to
-relocate the entire footprint of the virtual machine. If so, then the
-use of RDMA is discouraged and it is recommended to use standard TCP migration.
-
-Experimental: Next, decide if you want dynamic page registration.
-For example, if you have an 8GB RAM virtual machine, but only 1GB
-is in active use, then enabling this feature will cause all 8GB to
-be pinned and resident in memory. This feature mostly affects the
-bulk-phase round of the migration and can be enabled for extremely
-high-performance RDMA hardware using the following command:
-
-QEMU Monitor Command:
-$ migrate_set_capability x-rdma-pin-all on # disabled by default
-
-Performing this action will cause all 8GB to be pinned, so if that's
-not what you want, then please ignore this step altogether.
-
-On the other hand, this will also significantly speed up the bulk round
-of the migration, which can greatly reduce the "total" time of your migration.
-Example performance of this using an idle VM in the previous example
-can be found in the "Performance" section.
-
-Note: for very large virtual machines (hundreds of GBs), pinning all
-*all* of the memory of your virtual machine in the kernel is very expensive
-may extend the initial bulk iteration time by many seconds,
-and thus extending the total migration time. However, this will not
-affect the determinism or predictability of your migration you will
-still gain from the benefits of advanced pinning with RDMA.
-
-RUNNING:
-========
-
-First, set the migration speed to match your hardware's capabilities:
-
-QEMU Monitor Command:
-$ migrate_set_speed 40g # or whatever is the MAX of your RDMA device
-
-Next, on the destination machine, add the following to the QEMU command line:
-
-qemu ..... -incoming x-rdma:host:port
-
-Finally, perform the actual migration on the source machine:
-
-QEMU Monitor Command:
-$ migrate -d x-rdma:host:port
-
-PERFORMANCE
-===========
-
-Here is a brief summary of total migration time and downtime using RDMA:
-Using a 40gbps infiniband link performing a worst-case stress test,
-using an 8GB RAM virtual machine:
-
-Using the following command:
-$ apt-get install stress
-$ stress --vm-bytes 7500M --vm 1 --vm-keep
-
-1. Migration throughput: 26 gigabits/second.
-2. Downtime (stop time) varies between 15 and 100 milliseconds.
-
-EFFECTS of memory registration on bulk phase round:
-
-For example, in the same 8GB RAM example with all 8GB of memory in
-active use and the VM itself is completely idle using the same 40 gbps
-infiniband link:
-
-1. x-rdma-pin-all disabled total time: approximately 7.5 seconds @ 9.5 Gbps
-2. x-rdma-pin-all enabled total time: approximately 4 seconds @ 26 Gbps
-
-These numbers would of course scale up to whatever size virtual machine
-you have to migrate using RDMA.
-
-Enabling this feature does *not* have any measurable affect on
-migration *downtime*. This is because, without this feature, all of the
-memory will have already been registered already in advance during
-the bulk round and does not need to be re-registered during the successive
-iteration rounds.
-
-RDMA Protocol Description:
-==========================
-
-Migration with RDMA is separated into two parts:
-
-1. The transmission of the pages using RDMA
-2. Everything else (a control channel is introduced)
-
-"Everything else" is transmitted using a formal
-protocol now, consisting of infiniband SEND messages.
-
-An infiniband SEND message is the standard ibverbs
-message used by applications of infiniband hardware.
-The only difference between a SEND message and an RDMA
-message is that SEND messages cause notifications
-to be posted to the completion queue (CQ) on the
-infiniband receiver side, whereas RDMA messages (used
-for pc.ram) do not (to behave like an actual DMA).
-
-Messages in infiniband require two things:
-
-1. registration of the memory that will be transmitted
-2. (SEND only) work requests to be posted on both
-   sides of the network before the actual transmission
-   can occur.
-
-RDMA messages are much easier to deal with. Once the memory
-on the receiver side is registered and pinned, we're
-basically done. All that is required is for the sender
-side to start dumping bytes onto the link.
-
-(Memory is not released from pinning until the migration
-completes, given that RDMA migrations are very fast.)
-
-SEND messages require more coordination because the
-receiver must have reserved space (using a receive
-work request) on the receive queue (RQ) before QEMUFileRDMA
-can start using them to carry all the bytes as
-a control transport for migration of device state.
-
-To begin the migration, the initial connection setup is
-as follows (migration-rdma.c):
-
-1. Receiver and Sender are started (command line or libvirt):
-2. Both sides post two RQ work requests
-3. Receiver does listen()
-4. Sender does connect()
-5. Receiver accept()
-6. Check versioning and capabilities (described later)
-
-At this point, we define a control channel on top of SEND messages
-which is described by a formal protocol. Each SEND message has a
-header portion and a data portion (but together are transmitted
-as a single SEND message).
-
-Header:
-    * Length               (of the data portion, uint32, network byte order)
-    * Type                 (what command to perform, uint32, network byte order)
-    * Repeat               (Number of commands in data portion, same type only)
-
-The 'Repeat' field is here to support future multiple page registrations
-in a single message without any need to change the protocol itself
-so that the protocol is compatible against multiple versions of QEMU.
-Version #1 requires that all server implementations of the protocol must
-check this field and register all requests found in the array of commands located
-in the data portion and return an equal number of results in the response.
-The maximum number of repeats is hard-coded to 4096. This is a conservative
-limit based on the maximum size of a SEND message along with empirical
-observations on the maximum future benefit of simultaneous page registrations.
-
-The 'type' field has 12 different command values:
-     1. Unused
-     2. Error                      (sent to the source during bad things)
-     3. Ready                      (control-channel is available)
-     4. QEMU File                  (for sending non-live device state)
-     5. RAM Blocks request         (used right after connection setup)
-     6. RAM Blocks result          (used right after connection setup)
-     7. Compress page              (zap zero page and skip registration)
-     8. Register request           (dynamic chunk registration)
-     9. Register result            ('rkey' to be used by sender)
-    10. Register finished          (registration for current iteration finished)
-    11. Unregister request         (unpin previously registered memory)
-    12. Unregister finished        (confirmation that unpin completed)
-
-A single control message, as hinted above, can contain within the data
-portion an array of many commands of the same type. If there is more than
-one command, then the 'repeat' field will be greater than 1.
-
-After connection setup, message 5 & 6 are used to exchange ram block
-information and optionally pin all the memory if requested by the user.
-
-After ram block exchange is completed, we have two protocol-level
-functions, responsible for communicating control-channel commands
-using the above list of values:
-
-Logically:
-
-qemu_rdma_exchange_recv(header, expected command type)
-
-1. We transmit a READY command to let the sender know that
-   we are *ready* to receive some data bytes on the control channel.
-2. Before attempting to receive the expected command, we post another
-   RQ work request to replace the one we just used up.
-3. Block on a CQ event channel and wait for the SEND to arrive.
-4. When the send arrives, librdmacm will unblock us.
-5. Verify that the command-type and version received matches the one we expected.
-
-qemu_rdma_exchange_send(header, data, optional response header & data):
-
-1. Block on the CQ event channel waiting for a READY command
-   from the receiver to tell us that the receiver
-   is *ready* for us to transmit some new bytes.
-2. Optionally: if we are expecting a response from the command
-   (that we have not yet transmitted), let's post an RQ
-   work request to receive that data a few moments later.
-3. When the READY arrives, librdmacm will
-   unblock us and we immediately post a RQ work request
-   to replace the one we just used up.
-4. Now, we can actually post the work request to SEND
-   the requested command type of the header we were asked for.
-5. Optionally, if we are expecting a response (as before),
-   we block again and wait for that response using the additional
-   work request we previously posted. (This is used to carry
-   'Register result' commands #6 back to the sender which
-   hold the rkey need to perform RDMA. Note that the virtual address
-   corresponding to this rkey was already exchanged at the beginning
-   of the connection (described below).
-
-All of the remaining command types (not including 'ready')
-described above all use the aformentioned two functions to do the hard work:
-
-1. After connection setup, RAMBlock information is exchanged using
-   this protocol before the actual migration begins. This information includes
-   a description of each RAMBlock on the server side as well as the virtual addresses
-   and lengths of each RAMBlock. This is used by the client to determine the
-   start and stop locations of chunks and how to register them dynamically
-   before performing the RDMA operations.
-2. During runtime, once a 'chunk' becomes full of pages ready to
-   be sent with RDMA, the registration commands are used to ask the
-   other side to register the memory for this chunk and respond
-   with the result (rkey) of the registration.
-3. Also, the QEMUFile interfaces also call these functions (described below)
-   when transmitting non-live state, such as devices or to send
-   its own protocol information during the migration process.
-4. Finally, zero pages are only checked if a page has not yet been registered
-   using chunk registration (or not checked at all and unconditionally
-   written if chunk registration is disabled. This is accomplished using
-   the "Compress" command listed above. If the page *has* been registered
-   then we check the entire chunk for zero. Only if the entire chunk is
-   zero, then we send a compress command to zap the page on the other side.
-
-Versioning and Capabilities
-===========================
-Current version of the protocol is version #1.
-
-The same version applies to both for protocol traffic and capabilities
-negotiation. (i.e. There is only one version number that is referred to
-by all communication).
-
-librdmacm provides the user with a 'private data' area to be exchanged
-at connection-setup time before any infiniband traffic is generated.
-
-Header:
-    * Version (protocol version validated before send/recv occurs),
-                                               uint32, network byte order
-    * Flags   (bitwise OR of each capability),
-                                               uint32, network byte order
-
-There is no data portion of this header right now, so there is
-no length field. The maximum size of the 'private data' section
-is only 192 bytes per the Infiniband specification, so it's not
-very useful for data anyway. This structure needs to remain small.
-
-This private data area is a convenient place to check for protocol
-versioning because the user does not need to register memory to
-transmit a few bytes of version information.
-
-This is also a convenient place to negotiate capabilities
-(like dynamic page registration).
-
-If the version is invalid, we throw an error.
-
-If the version is new, we only negotiate the capabilities that the
-requested version is able to perform and ignore the rest.
-
-Currently there is only one capability in Version #1: dynamic page registration
-
-Finally: Negotiation happens with the Flags field: If the primary-VM
-sets a flag, but the destination does not support this capability, it
-will return a zero-bit for that flag and the primary-VM will understand
-that as not being an available capability and will thus disable that
-capability on the primary-VM side.
-
-QEMUFileRDMA Interface:
-=======================
-
-QEMUFileRDMA introduces a couple of new functions:
-
-1. qemu_rdma_get_buffer()               (QEMUFileOps rdma_read_ops)
-2. qemu_rdma_put_buffer()               (QEMUFileOps rdma_write_ops)
-
-These two functions are very short and simply use the protocol
-describe above to deliver bytes without changing the upper-level
-users of QEMUFile that depend on a bytestream abstraction.
-
-Finally, how do we handoff the actual bytes to get_buffer()?
-
-Again, because we're trying to "fake" a bytestream abstraction
-using an analogy not unlike individual UDP frames, we have
-to hold on to the bytes received from control-channel's SEND
-messages in memory.
-
-Each time we receive a complete "QEMU File" control-channel
-message, the bytes from SEND are copied into a small local holding area.
-
-Then, we return the number of bytes requested by get_buffer()
-and leave the remaining bytes in the holding area until get_buffer()
-comes around for another pass.
-
-If the buffer is empty, then we follow the same steps
-listed above and issue another "QEMU File" protocol command,
-asking for a new SEND message to re-fill the buffer.
-
-Migration of pc.ram:
-====================
-
-At the beginning of the migration, (migration-rdma.c),
-the sender and the receiver populate the list of RAMBlocks
-to be registered with each other into a structure.
-Then, using the aforementioned protocol, they exchange a
-description of these blocks with each other, to be used later
-during the iteration of main memory. This description includes
-a list of all the RAMBlocks, their offsets and lengths, virtual
-addresses and possibly includes pre-registered RDMA keys in case dynamic
-page registration was disabled on the server-side, otherwise not.
-
-Main memory is not migrated with the aforementioned protocol,
-but is instead migrated with normal RDMA Write operations.
-
-Pages are migrated in "chunks" (hard-coded to 1 Megabyte right now).
-Chunk size is not dynamic, but it could be in a future implementation.
-There's nothing to indicate that this is useful right now.
-
-When a chunk is full (or a flush() occurs), the memory backed by
-the chunk is registered with librdmacm is pinned in memory on
-both sides using the aforementioned protocol.
-After pinning, an RDMA Write is generated and transmitted
-for the entire chunk.
-
-Chunks are also transmitted in batches: This means that we
-do not request that the hardware signal the completion queue
-for the completion of *every* chunk. The current batch size
-is about 64 chunks (corresponding to 64 MB of memory).
-Only the last chunk in a batch must be signaled.
-This helps keep everything as asynchronous as possible
-and helps keep the hardware busy performing RDMA operations.
-
-Error-handling:
-===============
-
-Infiniband has what is called a "Reliable, Connected"
-link (one of 4 choices). This is the mode in which
-we use for RDMA migration.
-
-If a *single* message fails,
-the decision is to abort the migration entirely and
-cleanup all the RDMA descriptors and unregister all
-the memory.
-
-After cleanup, the Virtual Machine is returned to normal
-operation the same way that would happen if the TCP
-socket is broken during a non-RDMA based migration.
-
-TODO:
-=====
-1. 'migrate x-rdma:host:port' and '-incoming x-rdma' options will be
-   renamed to 'rdma' after the experimental phase of this work has
-   completed upstream.
-2. Currently, 'ulimit -l' mlock() limits as well as cgroups swap limits
-   are not compatible with infinband memory pinning and will result in
-   an aborted migration (but with the source VM left unaffected).
-3. Use of the recent /proc/<pid>/pagemap would likely speed up
-   the use of KSM and ballooning while using RDMA.
-4. Also, some form of balloon-device usage tracking would also
-   help alleviate some issues.
-5. Move UNREGISTER requests to a separate thread.
-6. Use LRU to provide more fine-grained direction of UNREGISTER
-   requests for unpinning memory in an overcommitted environment.
-7. Expose UNREGISTER support to the user by way of workload-specific
-   hints about application behavior.
--- a/docs/tracing.txt
+++ b/docs/tracing.txt
@@ -225,7 +225,7 @@ probes:
    scripts/tracetool --dtrace --stap \
                      --binary path/to/qemu-binary \
                      --target-type system \
-                      --target-name x86_64 \
+                      --target-arch x86_64 \
                      <trace-events >qemu.stp

 == Trace event properties ==
--- a/stubs/dump.c
+++ b/stubs/dump.c
@@ -16,8 +16,15 @@
 #include "qapi/qmp/qerror.h"
 #include "qmp-commands.h"

-int cpu_get_dump_info(ArchDumpInfo *info,
-                      const struct GuestPhysBlockList *guest_phys_blocks)
+/* we need this function in hmp.c */
+void qmp_dump_guest_memory(bool paging, const char *file, bool has_begin,
+                           int64_t begin, bool has_length, int64_t length,
+                           Error **errp)
+{
+    error_set(errp, QERR_UNSUPPORTED);
+}
+
+int cpu_get_dump_info(ArchDumpInfo *info)
 {
    return -1;
 }
--- a/dump.c
+++ b/dump.c
@@ -21,7 +21,6 @@
 #include "sysemu/dump.h"
 #include "sysemu/sysemu.h"
 #include "sysemu/memory_mapping.h"
-#include "sysemu/cpus.h"
 #include "qapi/error.h"
 #include "qmp-commands.h"

@@ -59,7 +58,6 @@ static uint64_t cpu_convert_to_target64(uint64_t val, int endian)
 }

 typedef struct DumpState {
-    GuestPhysBlockList guest_phys_blocks;
    ArchDumpInfo dump_info;
    MemoryMappingList list;
    uint16_t phdr_num;
@@ -70,7 +68,7 @@ typedef struct DumpState {
    hwaddr memory_offset;
    int fd;

-    GuestPhysBlock *next_block;
+    RAMBlock *block;
    ram_addr_t start;
    bool has_filter;
    int64_t begin;
@@ -82,7 +80,6 @@ static int dump_cleanup(DumpState *s)
 {
    int ret = 0;

-    guest_phys_blocks_free(&s->guest_phys_blocks);
    memory_mapping_list_free(&s->list);
    if (s->fd != -1) {
        close(s->fd);
@@ -189,8 +186,7 @@ static int write_elf32_header(DumpState *s)
 }

 static int write_elf64_load(DumpState *s, MemoryMapping *memory_mapping,
-                            int phdr_index, hwaddr offset,
-                            hwaddr filesz)
+                            int phdr_index, hwaddr offset)
 {
    Elf64_Phdr phdr;
    int ret;
@@ -200,12 +196,15 @@ static int write_elf64_load(DumpState *s, MemoryMapping *memory_mapping,
    phdr.p_type = cpu_convert_to_target32(PT_LOAD, endian);
    phdr.p_offset = cpu_convert_to_target64(offset, endian);
    phdr.p_paddr = cpu_convert_to_target64(memory_mapping->phys_addr, endian);
-    phdr.p_filesz = cpu_convert_to_target64(filesz, endian);
+    if (offset == -1) {
+        /* When the memory is not stored into vmcore, offset will be -1 */
+        phdr.p_filesz = 0;
+    } else {
+        phdr.p_filesz = cpu_convert_to_target64(memory_mapping->length, endian);
+    }
    phdr.p_memsz = cpu_convert_to_target64(memory_mapping->length, endian);
    phdr.p_vaddr = cpu_convert_to_target64(memory_mapping->virt_addr, endian);

-    assert(memory_mapping->length >= filesz);
-
    ret = fd_write_vmcore(&phdr, sizeof(Elf64_Phdr), s);
    if (ret < 0) {
        dump_error(s, "dump: failed to write program header table.\n");
@@ -216,8 +215,7 @@ static int write_elf64_load(DumpState *s, MemoryMapping *memory_mapping,
 }

 static int write_elf32_load(DumpState *s, MemoryMapping *memory_mapping,
-                            int phdr_index, hwaddr offset,
-                            hwaddr filesz)
+                            int phdr_index, hwaddr offset)
 {
    Elf32_Phdr phdr;
    int ret;
@@ -227,12 +225,15 @@ static int write_elf32_load(DumpState *s, MemoryMapping *memory_mapping,
    phdr.p_type = cpu_convert_to_target32(PT_LOAD, endian);
    phdr.p_offset = cpu_convert_to_target32(offset, endian);
    phdr.p_paddr = cpu_convert_to_target32(memory_mapping->phys_addr, endian);
-    phdr.p_filesz = cpu_convert_to_target32(filesz, endian);
+    if (offset == -1) {
+        /* When the memory is not stored into vmcore, offset will be -1 */
+        phdr.p_filesz = 0;
+    } else {
+        phdr.p_filesz = cpu_convert_to_target32(memory_mapping->length, endian);
+    }
    phdr.p_memsz = cpu_convert_to_target32(memory_mapping->length, endian);
    phdr.p_vaddr = cpu_convert_to_target32(memory_mapping->virt_addr, endian);

-    assert(memory_mapping->length >= filesz);
-
    ret = fd_write_vmcore(&phdr, sizeof(Elf32_Phdr), s);
    if (ret < 0) {
        dump_error(s, "dump: failed to write program header table.\n");
@@ -273,11 +274,13 @@ static inline int cpu_index(CPUState *cpu)

 static int write_elf64_notes(DumpState *s)
 {
+    CPUArchState *env;
    CPUState *cpu;
    int ret;
    int id;

-    for (cpu = first_cpu; cpu != NULL; cpu = cpu->next_cpu) {
+    for (env = first_cpu; env != NULL; env = env->next_cpu) {
+        cpu = ENV_GET_CPU(env);
        id = cpu_index(cpu);
        ret = cpu_write_elf64_note(fd_write_vmcore, cpu, id, s);
        if (ret < 0) {
@@ -286,7 +289,7 @@ static int write_elf64_notes(DumpState *s)
        }
    }

-    for (cpu = first_cpu; cpu != NULL; cpu = cpu->next_cpu) {
+    for (env = first_cpu; env != NULL; env = env->next_cpu) {
        ret = cpu_write_elf64_qemunote(fd_write_vmcore, cpu, s);
        if (ret < 0) {
            dump_error(s, "dump: failed to write CPU status.\n");
@@ -323,11 +326,13 @@ static int write_elf32_note(DumpState *s)

 static int write_elf32_notes(DumpState *s)
 {
+    CPUArchState *env;
    CPUState *cpu;
    int ret;
    int id;

-    for (cpu = first_cpu; cpu != NULL; cpu = cpu->next_cpu) {
+    for (env = first_cpu; env != NULL; env = env->next_cpu) {
+        cpu = ENV_GET_CPU(env);
        id = cpu_index(cpu);
        ret = cpu_write_elf32_note(fd_write_vmcore, cpu, id, s);
        if (ret < 0) {
@@ -336,7 +341,7 @@ static int write_elf32_notes(DumpState *s)
        }
    }

-    for (cpu = first_cpu; cpu != NULL; cpu = cpu->next_cpu) {
+    for (env = first_cpu; env != NULL; env = env->next_cpu) {
        ret = cpu_write_elf32_qemunote(fd_write_vmcore, cpu, s);
        if (ret < 0) {
            dump_error(s, "dump: failed to write CPU status.\n");
@@ -391,14 +396,14 @@ static int write_data(DumpState *s, void *buf, int length)
 }

 /* write the memroy to vmcore. 1 page per I/O. */
-static int write_memory(DumpState *s, GuestPhysBlock *block, ram_addr_t start,
+static int write_memory(DumpState *s, RAMBlock *block, ram_addr_t start,
                        int64_t size)
 {
    int64_t i;
    int ret;

    for (i = 0; i < size / TARGET_PAGE_SIZE; i++) {
-        ret = write_data(s, block->host_addr + start + i * TARGET_PAGE_SIZE,
+        ret = write_data(s, block->host + start + i * TARGET_PAGE_SIZE,
                         TARGET_PAGE_SIZE);
        if (ret < 0) {
            return ret;
@@ -406,7 +411,7 @@ static int write_memory(DumpState *s, GuestPhysBlock *block, ram_addr_t start,
    }

    if ((size % TARGET_PAGE_SIZE) != 0) {
-        ret = write_data(s, block->host_addr + start + i * TARGET_PAGE_SIZE,
+        ret = write_data(s, block->host + start + i * TARGET_PAGE_SIZE,
                         size % TARGET_PAGE_SIZE);
        if (ret < 0) {
            return ret;
@@ -416,71 +421,57 @@ static int write_memory(DumpState *s, GuestPhysBlock *block, ram_addr_t start,
    return 0;
 }

-/* get the memory's offset and size in the vmcore */
-static void get_offset_range(hwaddr phys_addr,
-                             ram_addr_t mapping_length,
-                             DumpState *s,
-                             hwaddr *p_offset,
-                             hwaddr *p_filesz)
+/* get the memory's offset in the vmcore */
+static hwaddr get_offset(hwaddr phys_addr,
+                                     DumpState *s)
 {
-    GuestPhysBlock *block;
+    RAMBlock *block;
    hwaddr offset = s->memory_offset;
    int64_t size_in_block, start;

-    /* When the memory is not stored into vmcore, offset will be -1 */
-    *p_offset = -1;
-    *p_filesz = 0;
-
    if (s->has_filter) {
        if (phys_addr < s->begin || phys_addr >= s->begin + s->length) {
-            return;
+            return -1;
        }
    }

-    QTAILQ_FOREACH(block, &s->guest_phys_blocks.head, next) {
+    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
        if (s->has_filter) {
-            if (block->target_start >= s->begin + s->length ||
-                block->target_end <= s->begin) {
+            if (block->offset >= s->begin + s->length ||
+                block->offset + block->length <= s->begin) {
                /* This block is out of the range */
                continue;
            }

-            if (s->begin <= block->target_start) {
-                start = block->target_start;
+            if (s->begin <= block->offset) {
+                start = block->offset;
            } else {
                start = s->begin;
            }

-            size_in_block = block->target_end - start;
-            if (s->begin + s->length < block->target_end) {
-                size_in_block -= block->target_end - (s->begin + s->length);
+            size_in_block = block->length - (start - block->offset);
+            if (s->begin + s->length < block->offset + block->length) {
+                size_in_block -= block->offset + block->length -
+                                 (s->begin + s->length);
            }
        } else {
-            start = block->target_start;
-            size_in_block = block->target_end - block->target_start;
+            start = block->offset;
+            size_in_block = block->length;
        }

        if (phys_addr >= start && phys_addr < start + size_in_block) {
-            *p_offset = phys_addr - start + offset;
-
-            /* The offset range mapped from the vmcore file must not spill over
-             * the GuestPhysBlock, clamp it. The rest of the mapping will be
-             * zero-filled in memory at load time; see
-             * <http://refspecs.linuxbase.org/elf/gabi4+/ch5.pheader.html>.
-             */
-            *p_filesz = phys_addr + mapping_length <= start + size_in_block ?
-                        mapping_length :
-                        size_in_block - (phys_addr - start);
-            return;
+            return phys_addr - start + offset;
        }

        offset += size_in_block;
    }
+
+    return -1;
 }

 static int write_elf_loads(DumpState *s)
 {
-    hwaddr offset, filesz;
+    hwaddr offset;
    MemoryMapping *memory_mapping;
    uint32_t phdr_index = 1;
    int ret;
@@ -493,15 +484,11 @@ static int write_elf_loads(DumpState *s)
    }

    QTAILQ_FOREACH(memory_mapping, &s->list.head, next) {
-        get_offset_range(memory_mapping->phys_addr,
-                         memory_mapping->length,
-                         s, &offset, &filesz);
+        offset = get_offset(memory_mapping->phys_addr, s);
        if (s->dump_info.d_class == ELFCLASS64) {
-            ret = write_elf64_load(s, memory_mapping, phdr_index++, offset,
-                                   filesz);
+            ret = write_elf64_load(s, memory_mapping, phdr_index++, offset);
        } else {
-            ret = write_elf32_load(s, memory_mapping, phdr_index++, offset,
-                                   filesz);
+            ret = write_elf32_load(s, memory_mapping, phdr_index++, offset);
        }

        if (ret < 0) {
@@ -612,7 +599,7 @@ static int dump_completed(DumpState *s)
    return 0;
 }

-static int get_next_block(DumpState *s, GuestPhysBlock *block)
+static int get_next_block(DumpState *s, RAMBlock *block)
 {
    while (1) {
        block = QTAILQ_NEXT(block, next);
@@ -622,16 +609,16 @@ static int get_next_block(DumpState *s, GuestPhysBlock *block)
        }

        s->start = 0;
-        s->next_block = block;
+        s->block = block;
        if (s->has_filter) {
-            if (block->target_start >= s->begin + s->length ||
-                block->target_end <= s->begin) {
+            if (block->offset >= s->begin + s->length ||
+                block->offset + block->length <= s->begin) {
                /* This block is out of the range */
                continue;
            }

-            if (s->begin > block->target_start) {
-                s->start = s->begin - block->target_start;
+            if (s->begin > block->offset) {
+                s->start = s->begin - block->offset;
            }
        }

@@ -642,18 +629,18 @@ static int get_next_block(DumpState *s, GuestPhysBlock *block)
 /* write all memory to vmcore */
 static int dump_iterate(DumpState *s)
 {
-    GuestPhysBlock *block;
+    RAMBlock *block;
    int64_t size;
    int ret;

    while (1) {
-        block = s->next_block;
+        block = s->block;

-        size = block->target_end - block->target_start;
+        size = block->length;
        if (s->has_filter) {
            size -= s->start;
-            if (s->begin + s->length < block->target_end) {
-                size -= block->target_end - (s->begin + s->length);
+            if (s->begin + s->length < block->offset + block->length) {
+                size -= block->offset + block->length - (s->begin + s->length);
            }
        }
        ret = write_memory(s, block, s->start, size);
@@ -688,23 +675,23 @@ static int create_vmcore(DumpState *s)

 static ram_addr_t get_start_block(DumpState *s)
 {
-    GuestPhysBlock *block;
+    RAMBlock *block;

    if (!s->has_filter) {
-        s->next_block = QTAILQ_FIRST(&s->guest_phys_blocks.head);
+        s->block = QTAILQ_FIRST(&ram_list.blocks);
        return 0;
    }

-    QTAILQ_FOREACH(block, &s->guest_phys_blocks.head, next) {
-        if (block->target_start >= s->begin + s->length ||
-            block->target_end <= s->begin) {
+    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
+        if (block->offset >= s->begin + s->length ||
+            block->offset + block->length <= s->begin) {
            /* This block is out of the range */
            continue;
        }

-        s->next_block = block;
-        if (s->begin > block->target_start) {
-            s->start = s->begin - block->target_start;
+        s->block = block;
+        if (s->begin > block->offset) {
+            s->start = s->begin - block->offset;
        } else {
            s->start = 0;
        }
@@ -717,9 +704,8 @@ static ram_addr_t get_start_block(DumpState *s)
 static int dump_init(DumpState *s, int fd, bool paging, bool has_filter,
                     int64_t begin, int64_t length, Error **errp)
 {
-    CPUState *cpu;
+    CPUArchState *env;
    int nr_cpus;
-    Error *err = NULL;
    int ret;

    if (runstate_is_running()) {
@@ -729,35 +715,32 @@ static int dump_init(DumpState *s, int fd, bool paging, bool has_filter,
        s->resume = false;
    }

-    /* If we use KVM, we should synchronize the registers before we get dump
-     * info or physmap info.
-     */
-    cpu_synchronize_all_states();
-    nr_cpus = 0;
-    for (cpu = first_cpu; cpu != NULL; cpu = cpu->next_cpu) {
-        nr_cpus++;
-    }
-
    s->errp = errp;
    s->fd = fd;
    s->has_filter = has_filter;
    s->begin = begin;
    s->length = length;
-
-    guest_phys_blocks_init(&s->guest_phys_blocks);
-    guest_phys_blocks_append(&s->guest_phys_blocks);
-
    s->start = get_start_block(s);
    if (s->start == -1) {
        error_set(errp, QERR_INVALID_PARAMETER, "begin");
        goto cleanup;
    }

-    /* get dump info: endian, class and architecture.
+    /*
+     * get dump info: endian, class and architecture.
     * If the target architecture is not supported, cpu_get_dump_info() will
     * return -1.
+     *
+     * if we use kvm, we should synchronize the register before we get dump
+     * info.
     */
-    ret = cpu_get_dump_info(&s->dump_info, &s->guest_phys_blocks);
+    nr_cpus = 0;
+    for (env = first_cpu; env != NULL; env = env->next_cpu) {
+        cpu_synchronize_state(env);
+        nr_cpus++;
+    }
+
+    ret = cpu_get_dump_info(&s->dump_info);
    if (ret < 0) {
        error_set(errp, QERR_UNSUPPORTED);
        goto cleanup;
@@ -773,13 +756,9 @@ static int dump_init(DumpState *s, int fd, bool paging, bool has_filter,
    /* get memory mapping */
    memory_mapping_list_init(&s->list);
    if (paging) {
-        qemu_get_guest_memory_mapping(&s->list, &s->guest_phys_blocks, &err);
-        if (err != NULL) {
-            error_propagate(errp, err);
-            goto cleanup;
-        }
+        qemu_get_guest_memory_mapping(&s->list);
    } else {
-        qemu_get_guest_simple_memory_mapping(&s->list, &s->guest_phys_blocks);
+        qemu_get_guest_simple_memory_mapping(&s->list);
    }

    if (s->has_filter) {
@@ -831,8 +810,6 @@ static int dump_init(DumpState *s, int fd, bool paging, bool has_filter,
    return 0;

 cleanup:
-    guest_phys_blocks_free(&s->guest_phys_blocks);
-
    if (s->resume) {
        vm_start();
    }
@@ -870,7 +847,7 @@ void qmp_dump_guest_memory(bool paging, const char *file, bool has_begin,
    if  (strstart(file, "file:", &p)) {
        fd = qemu_open(p, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, S_IRUSR);
        if (fd < 0) {
-            error_setg_file_open(errp, errno, p);
+            error_set(errp, QERR_OPEN_FILE_FAILED, p);
            return;
        }
    }
@@ -880,7 +857,7 @@ void qmp_dump_guest_memory(bool paging, const char *file, bool has_begin,
        return;
    }

-    s = g_malloc0(sizeof(DumpState));
+    s = g_malloc(sizeof(DumpState));

    ret = dump_init(s, fd, paging, has_begin, begin, length, errp);
    if (ret < 0) {
--- a/exec.c
+++ b/exec.c
--- a/fpu/softfloat-macros.h
+++ b/fpu/softfloat-macros.h
@@ -168,7 +168,7 @@ INLINE void
        z0 = a0>>count;
    }
    else {
-        z1 = (count < 128) ? (a0 >> (count & 63)) : 0;
+        z1 = ( count < 64 ) ? ( a0>>( count & 63 ) ) : 0;
        z0 = 0;
    }
    *z1Ptr = z1;
--- a/fsdev/qemu-fsdev.c
+++ b/fsdev/qemu-fsdev.c
@@ -76,8 +76,6 @@ int qemu_fsdev_add(QemuOpts *opts)

    if (fsle->fse.ops->parse_opts) {
        if (fsle->fse.ops->parse_opts(opts, &fsle->fse)) {
-            g_free(fsle->fse.fsdev_id);
-            g_free(fsle);
            return -1;
        }
    }
--- a/gdbstub.c
+++ b/gdbstub.c
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -991,6 +991,7 @@ server will ask the spice/vnc client to automatically reconnect using the
 new parameters (if specified) once the vm migration finished successfully.
 ETEXI

+#if defined(CONFIG_HAVE_CORE_DUMP)
    {
        .name       = "dump-guest-memory",
        .args_type  = "paging:-p,filename:F,begin:i?,length:i?",
@@ -1014,6 +1015,7 @@ gdb.
    length: the memory size, in bytes. It's optional, and should be specified
            with begin together.
 ETEXI
+#endif

    {
        .name       = "snapshot_blkdev",
@@ -1056,26 +1058,6 @@ STEXI
@findex drive_mirror
 Start mirroring a block device's writes to a new destination,
 using the specified target.
-ETEXI
-
-    {
-        .name       = "drive_backup",
-        .args_type  = "reuse:-n,full:-f,device:B,target:s,format:s?",
-        .params     = "[-n] [-f] device target [format]",
-        .help       = "initiates a point-in-time\n\t\t\t"
-                      "copy for a device. The device's contents are\n\t\t\t"
-                      "copied to the new image file, excluding data that\n\t\t\t"
-                      "is written after the command is started.\n\t\t\t"
-                      "The -n flag requests QEMU to reuse the image found\n\t\t\t"
-                      "in new-image-file, instead of recreating it from scratch.\n\t\t\t"
-                      "The -f flag requests QEMU to copy the whole disk,\n\t\t\t"
-                      "so that the result does not need a backing file.\n\t\t\t",
-        .mhandler.cmd = hmp_drive_backup,
-    },
-STEXI
-@item drive_backup
-@findex drive_backup
-Start a point-in-time copy of a block device to a specificed target.
 ETEXI

    {
@@ -1097,7 +1079,7 @@ STEXI
 Add drive to PCI storage controller.
 ETEXI

-#if defined(CONFIG_PCI_HOTPLUG_OLD)
+#if defined(TARGET_I386)
    {
        .name       = "pci_add",
        .args_type  = "pci_addr:s,type:s,opts:s?",
@@ -1113,7 +1095,7 @@ STEXI
 Hot-add PCI device.
 ETEXI

-#if defined(CONFIG_PCI_HOTPLUG_OLD)
+#if defined(TARGET_I386)
    {
        .name       = "pci_del",
        .args_type  = "pci_addr:s",
@@ -1568,22 +1550,6 @@ STEXI

 Removes the chardev @var{id}.

-ETEXI
-
-    {
-        .name       = "qemu-io",
-        .args_type  = "device:B,command:s",
-        .params     = "[device] \"[command]\"",
-        .help       = "run a qemu-io command on a block device",
-        .mhandler.cmd = hmp_qemu_io,
-    },
-
-STEXI
-@item qemu-io @var{device} @var{command}
-@findex qemu-io
-
-Executes a qemu-io command on the given block device.
-
 ETEXI

    {
--- a/hmp.c
+++ b/hmp.c
@@ -22,8 +22,6 @@
 #include "qemu/sockets.h"
 #include "monitor/monitor.h"
 #include "ui/console.h"
-#include "block/qapi.h"
-#include "qemu-io.h"

 static void hmp_handle_error(Monitor *mon, Error **errp)
 {
@@ -164,17 +162,11 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict)
            monitor_printf(mon, "downtime: %" PRIu64 " milliseconds\n",
                           info->downtime);
        }
-        if (info->has_setup_time) {
-            monitor_printf(mon, "setup: %" PRIu64 " milliseconds\n",
-                           info->setup_time);
-        }
    }

    if (info->has_ram) {
        monitor_printf(mon, "transferred ram: %" PRIu64 " kbytes\n",
                       info->ram->transferred >> 10);
-        monitor_printf(mon, "throughput: %0.2f mbps\n",
-                       info->ram->mbps);
        monitor_printf(mon, "remaining ram: %" PRIu64 " kbytes\n",
                       info->ram->remaining >> 10);
        monitor_printf(mon, "total ram: %" PRIu64 " kbytes\n",
@@ -285,88 +277,52 @@ void hmp_info_cpus(Monitor *mon, const QDict *qdict)
 void hmp_info_block(Monitor *mon, const QDict *qdict)
 {
    BlockInfoList *block_list, *info;
-    ImageInfo *image_info;
-    const char *device = qdict_get_try_str(qdict, "device");
-    bool verbose = qdict_get_try_bool(qdict, "verbose", 0);

    block_list = qmp_query_block(NULL);

    for (info = block_list; info; info = info->next) {
-        if (device && strcmp(device, info->value->device)) {
-            continue;
+        monitor_printf(mon, "%s: removable=%d",
+                       info->value->device, info->value->removable);
+
+        if (info->value->removable) {
+            monitor_printf(mon, " locked=%d", info->value->locked);
+            monitor_printf(mon, " tray-open=%d", info->value->tray_open);
        }

-        if (info != block_list) {
-            monitor_printf(mon, "\n");
-        }
-
-        monitor_printf(mon, "%s", info->value->device);
-        if (info->value->has_inserted) {
-            monitor_printf(mon, ": %s (%s%s%s)\n",
-                           info->value->inserted->file,
-                           info->value->inserted->drv,
-                           info->value->inserted->ro ? ", read-only" : "",
-                           info->value->inserted->encrypted ? ", encrypted" : "");
-        } else {
-            monitor_printf(mon, ": [not inserted]\n");
-        }
-
-        if (info->value->has_io_status && info->value->io_status != BLOCK_DEVICE_IO_STATUS_OK) {
-            monitor_printf(mon, "    I/O status:       %s\n",
+        if (info->value->has_io_status) {
+            monitor_printf(mon, " io-status=%s",
                           BlockDeviceIoStatus_lookup[info->value->io_status]);
        }

-        if (info->value->removable) {
-            monitor_printf(mon, "    Removable device: %slocked, tray %s\n",
-                           info->value->locked ? "" : "not ",
-                           info->value->tray_open ? "open" : "closed");
-        }
+        if (info->value->has_inserted) {
+            monitor_printf(mon, " file=");
+            monitor_print_filename(mon, info->value->inserted->file);

+            if (info->value->inserted->has_backing_file) {
+                monitor_printf(mon, " backing_file=");
+                monitor_print_filename(mon, info->value->inserted->backing_file);
+                monitor_printf(mon, " backing_file_depth=%" PRId64,
+                    info->value->inserted->backing_file_depth);
+            }
+            monitor_printf(mon, " ro=%d drv=%s encrypted=%d",
+                           info->value->inserted->ro,
+                           info->value->inserted->drv,
+                           info->value->inserted->encrypted);

-        if (!info->value->has_inserted) {
-            continue;
-        }
-
-        if (info->value->inserted->has_backing_file) {
-            monitor_printf(mon,
-                           "    Backing file:     %s "
-                           "(chain depth: %" PRId64 ")\n",
-                           info->value->inserted->backing_file,
-                           info->value->inserted->backing_file_depth);
-        }
-
-        if (info->value->inserted->bps
-            || info->value->inserted->bps_rd
-            || info->value->inserted->bps_wr
-            || info->value->inserted->iops
-            || info->value->inserted->iops_rd
-            || info->value->inserted->iops_wr)
-        {
-            monitor_printf(mon, "    I/O throttling:   bps=%" PRId64
-                            " bps_rd=%" PRId64  " bps_wr=%" PRId64
-                            " iops=%" PRId64 " iops_rd=%" PRId64
-                            " iops_wr=%" PRId64 "\n",
+            monitor_printf(mon, " bps=%" PRId64 " bps_rd=%" PRId64
+                            " bps_wr=%" PRId64 " iops=%" PRId64
+                            " iops_rd=%" PRId64 " iops_wr=%" PRId64,
                            info->value->inserted->bps,
                            info->value->inserted->bps_rd,
                            info->value->inserted->bps_wr,
                            info->value->inserted->iops,
                            info->value->inserted->iops_rd,
                            info->value->inserted->iops_wr);
+        } else {
+            monitor_printf(mon, " [not inserted]");
        }

-        if (verbose) {
-            monitor_printf(mon, "\nImages:\n");
-            image_info = info->value->inserted->image;
-            while (1) {
-                    bdrv_image_info_dump((fprintf_function)monitor_printf,
-                                         mon, image_info);
-                if (image_info->has_backing_image) {
-                    image_info = image_info->backing_image;
-                } else {
-                    break;
-                }
-            }
-        }
+        monitor_printf(mon, "\n");
    }

    qapi_free_BlockInfoList(block_list);
@@ -911,34 +867,6 @@ void hmp_drive_mirror(Monitor *mon, const QDict *qdict)
    hmp_handle_error(mon, &errp);
 }

-void hmp_drive_backup(Monitor *mon, const QDict *qdict)
-{
-    const char *device = qdict_get_str(qdict, "device");
-    const char *filename = qdict_get_str(qdict, "target");
-    const char *format = qdict_get_try_str(qdict, "format");
-    int reuse = qdict_get_try_bool(qdict, "reuse", 0);
-    int full = qdict_get_try_bool(qdict, "full", 0);
-    enum NewImageMode mode;
-    Error *errp = NULL;
-
-    if (!filename) {
-        error_set(&errp, QERR_MISSING_PARAMETER, "target");
-        hmp_handle_error(mon, &errp);
-        return;
-    }
-
-    if (reuse) {
-        mode = NEW_IMAGE_MODE_EXISTING;
-    } else {
-        mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS;
-    }
-
-    qmp_drive_backup(device, filename, !!format, format,
-                     full ? MIRROR_SYNC_MODE_FULL : MIRROR_SYNC_MODE_TOP,
-                     true, mode, false, 0, false, 0, false, 0, &errp);
-    hmp_handle_error(mon, &errp);
-}
-
 void hmp_snapshot_blkdev(Monitor *mon, const QDict *qdict)
 {
    const char *device = qdict_get_str(qdict, "device");
@@ -1497,20 +1425,3 @@ void hmp_chardev_remove(Monitor *mon, const QDict *qdict)
    qmp_chardev_remove(qdict_get_str(qdict, "id"), &local_err);
    hmp_handle_error(mon, &local_err);
 }
-
-void hmp_qemu_io(Monitor *mon, const QDict *qdict)
-{
-    BlockDriverState *bs;
-    const char* device = qdict_get_str(qdict, "device");
-    const char* command = qdict_get_str(qdict, "command");
-    Error *err = NULL;
-
-    bs = bdrv_find(device);
-    if (bs) {
-        qemuio_command(bs, command);
-    } else {
-        error_set(&err, QERR_DEVICE_NOT_FOUND, device);
-    }
-
-    hmp_handle_error(mon, &err);
-}
--- a/hmp.h
+++ b/hmp.h
@@ -55,7 +55,6 @@ void hmp_balloon(Monitor *mon, const QDict *qdict);
 void hmp_block_resize(Monitor *mon, const QDict *qdict);
 void hmp_snapshot_blkdev(Monitor *mon, const QDict *qdict);
 void hmp_drive_mirror(Monitor *mon, const QDict *qdict);
-void hmp_drive_backup(Monitor *mon, const QDict *qdict);
 void hmp_migrate_cancel(Monitor *mon, const QDict *qdict);
 void hmp_migrate_set_downtime(Monitor *mon, const QDict *qdict);
 void hmp_migrate_set_speed(Monitor *mon, const QDict *qdict);
@@ -86,6 +85,5 @@ void hmp_nbd_server_add(Monitor *mon, const QDict *qdict);
 void hmp_nbd_server_stop(Monitor *mon, const QDict *qdict);
 void hmp_chardev_add(Monitor *mon, const QDict *qdict);
 void hmp_chardev_remove(Monitor *mon, const QDict *qdict);
-void hmp_qemu_io(Monitor *mon, const QDict *qdict);

 #endif
--- a/hw/9pfs/cofile.c
+++ b/hw/9pfs/cofile.c
@@ -38,10 +38,6 @@ int v9fs_co_st_gen(V9fsPDU *pdu, V9fsPath *path, mode_t st_mode,
            });
        v9fs_path_unlock(s);
    }
-    /* The ioctl may not be supported depending on the path */
-    if (err == -ENOTTY) {
-        err = 0;
-    }
    return err;
 }

--- a/hw/9pfs/virtio-9p-device.c
+++ b/hw/9pfs/virtio-9p-device.c
@@ -61,8 +61,6 @@ static int virtio_9p_device_init(VirtIODevice *vdev)

    s->vq = virtio_add_queue(vdev, MAX_REQ, handle_9p_output);

-    v9fs_path_init(&path);
-
    fse = get_fsdev_fsentry(s->fsconf.fsdev_id);

    if (!fse) {
@@ -70,14 +68,14 @@ static int virtio_9p_device_init(VirtIODevice *vdev)
        fprintf(stderr, "Virtio-9p device couldn't find fsdev with the "
                "id = %s\n",
                s->fsconf.fsdev_id ? s->fsconf.fsdev_id : "NULL");
-        goto out;
+        return -1;
    }

    if (!s->fsconf.tag) {
        /* we haven't specified a mount_tag */
        fprintf(stderr, "fsdev with id %s needs mount_tag arguments\n",
                s->fsconf.fsdev_id);
-        goto out;
+        return -1;
    }

    s->ctx.export_flags = fse->export_flags;
@@ -87,10 +85,10 @@ static int virtio_9p_device_init(VirtIODevice *vdev)
    if (len > MAX_TAG_LEN - 1) {
        fprintf(stderr, "mount tag '%s' (%d bytes) is longer than "
                "maximum (%d bytes)", s->fsconf.tag, len, MAX_TAG_LEN - 1);
-        goto out;
+        return -1;
    }

-    s->tag = g_strdup(s->fsconf.tag);
+    s->tag = strdup(s->fsconf.tag);
    s->ctx.uid = -1;

    s->ops = fse->ops;
@@ -101,11 +99,11 @@ static int virtio_9p_device_init(VirtIODevice *vdev)
    if (s->ops->init(&s->ctx) < 0) {
        fprintf(stderr, "Virtio-9p Failed to initialize fs-driver with id:%s"
                " and export path:%s\n", s->fsconf.fsdev_id, s->ctx.fs_root);
-        goto out;
+        return -1;
    }
    if (v9fs_init_worker_threads() < 0) {
        fprintf(stderr, "worker thread initialization failed\n");
-        goto out;
+        return -1;
    }

    /*
@@ -113,29 +111,22 @@ static int virtio_9p_device_init(VirtIODevice *vdev)
     * call back to do that. Since we are in the init path, we don't
     * use co-routines here.
     */
+    v9fs_path_init(&path);
    if (s->ops->name_to_path(&s->ctx, NULL, "/", &path) < 0) {
        fprintf(stderr,
                "error in converting name to path %s", strerror(errno));
-        goto out;
+        return -1;
    }
    if (s->ops->lstat(&s->ctx, &path, &stat)) {
        fprintf(stderr, "share path %s does not exist\n", fse->path);
-        goto out;
+        return -1;
    } else if (!S_ISDIR(stat.st_mode)) {
        fprintf(stderr, "share path %s is not a directory\n", fse->path);
-        goto out;
+        return -1;
    }
    v9fs_path_free(&path);

    return 0;
-out:
-    g_free(s->ctx.fs_root);
-    g_free(s->tag);
-    virtio_cleanup(vdev);
-    v9fs_path_free(&path);
-
-    return -1;
-
 }

 /* virtio-9p device */
@@ -150,7 +141,6 @@ static void virtio_9p_class_init(ObjectClass *klass, void *data)
    DeviceClass *dc = DEVICE_CLASS(klass);
    VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
    dc->props = virtio_9p_properties;
-    set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
    vdc->init = virtio_9p_device_init;
    vdc->get_features = virtio_9p_get_features;
    vdc->get_config = virtio_9p_get_config;
--- a/hw/9pfs/virtio-9p-local.c
+++ b/hw/9pfs/virtio-9p-local.c
@@ -59,33 +59,6 @@ static const char *local_mapped_attr_path(FsContext *ctx,
    return buffer;
 }

-static FILE *local_fopen(const char *path, const char *mode)
-{
-    int fd, o_mode = 0;
-    FILE *fp;
-    int flags = O_NOFOLLOW;
-    /*
-     * only supports two modes
-     */
-    if (mode[0] == 'r') {
-        flags |= O_RDONLY;
-    } else if (mode[0] == 'w') {
-        flags |= O_WRONLY | O_TRUNC | O_CREAT;
-        o_mode = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH;
-    } else {
-        return NULL;
-    }
-    fd = open(path, flags, o_mode);
-    if (fd == -1) {
-        return NULL;
-    }
-    fp = fdopen(fd, mode);
-    if (!fp) {
-        close(fd);
-    }
-    return fp;
-}
-
 #define ATTR_MAX 100
 static void local_mapped_file_attr(FsContext *ctx, const char *path,
                                   struct stat *stbuf)
@@ -95,7 +68,7 @@ static void local_mapped_file_attr(FsContext *ctx, const char *path,
    char attr_path[PATH_MAX];

    local_mapped_attr_path(ctx, path, attr_path);
-    fp = local_fopen(attr_path, "r");
+    fp = fopen(attr_path, "r");
    if (!fp) {
        return;
    }
@@ -179,7 +152,7 @@ static int local_set_mapped_file_attr(FsContext *ctx,
    char attr_path[PATH_MAX];
    int uid = -1, gid = -1, mode = -1, rdev = -1;

-    fp = local_fopen(local_mapped_attr_path(ctx, path, attr_path), "r");
+    fp = fopen(local_mapped_attr_path(ctx, path, attr_path), "r");
    if (!fp) {
        goto create_map_file;
    }
@@ -206,7 +179,7 @@ create_map_file:
    }

 update_map_file:
-    fp = local_fopen(attr_path, "w");
+    fp = fopen(attr_path, "w");
    if (!fp) {
        ret = -1;
        goto err_out;
@@ -343,7 +316,7 @@ static int local_open(FsContext *ctx, V9fsPath *fs_path,
    char buffer[PATH_MAX];
    char *path = fs_path->data;

-    fs->fd = open(rpath(ctx, path, buffer), flags | O_NOFOLLOW);
+    fs->fd = open(rpath(ctx, path, buffer), flags);
    return fs->fd;
 }

@@ -628,11 +601,6 @@ static int local_open2(FsContext *fs_ctx, V9fsPath *dir_path, const char *name,
    V9fsString fullname;
    char buffer[PATH_MAX];

-    /*
-     * Mark all the open to not follow symlinks
-     */
-    flags |= O_NOFOLLOW;
-
    v9fs_string_init(&fullname);
    v9fs_string_sprintf(&fullname, "%s/%s", dir_path->data, name);
    path = fullname.data;
@@ -708,9 +676,8 @@ static int local_symlink(FsContext *fs_ctx, const char *oldpath,
    if (fs_ctx->export_flags & V9FS_SM_MAPPED) {
        int fd;
        ssize_t oldpath_size, write_size;
-        fd = open(rpath(fs_ctx, newpath, buffer),
-                  O_CREAT|O_EXCL|O_RDWR|O_NOFOLLOW,
-                  SM_LOCAL_MODE_BITS);
+        fd = open(rpath(fs_ctx, newpath, buffer), O_CREAT|O_EXCL|O_RDWR,
+                SM_LOCAL_MODE_BITS);
        if (fd == -1) {
            err = fd;
            goto out;
@@ -738,8 +705,7 @@ static int local_symlink(FsContext *fs_ctx, const char *oldpath,
    } else if (fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) {
        int fd;
        ssize_t oldpath_size, write_size;
-        fd = open(rpath(fs_ctx, newpath, buffer),
-                  O_CREAT|O_EXCL|O_RDWR|O_NOFOLLOW,
+        fd = open(rpath(fs_ctx, newpath, buffer), O_CREAT|O_EXCL|O_RDWR,
                  SM_LOCAL_MODE_BITS);
        if (fd == -1) {
            err = fd;
--- a/hw/9pfs/virtio-9p-proxy.c
+++ b/hw/9pfs/virtio-9p-proxy.c
@@ -1153,12 +1153,10 @@ static int proxy_init(FsContext *ctx)
        sock_id = atoi(ctx->fs_root);
        if (sock_id < 0) {
            fprintf(stderr, "socket descriptor not initialized\n");
-            g_free(proxy);
            return -1;
        }
    }
    g_free(ctx->fs_root);
-    ctx->fs_root = NULL;

    proxy->in_iovec.iov_base  = g_malloc(PROXY_MAX_IO_SZ + PROXY_HDR_SZ);
    proxy->in_iovec.iov_len   = PROXY_MAX_IO_SZ + PROXY_HDR_SZ;
--- a/Show More
+++ b/Show More
@@ -1 +1 @@
 .6.50
 .5.2