update VERSION for v1.3.1

Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
qxl: Fix SPICE_RING_PROD_ITEM(), SPICE_RING_CONS_ITEM() sanity check
2013-01-28 10:38:28 -06:00 · 2013-01-21 14:44:31 -06:00 · 2013-01-21 14:08:52 -06:00 · 2013-01-21 14:07:21 -06:00 · 2013-01-21 14:06:50 -06:00 · 2013-01-21 13:52:23 -06:00
1481 changed files with 89765 additions and 31247 deletions
--- a/.exrc
+++ b/.exrc
@@ -0,0 +1,7 @@
+"VIM settings to match QEMU coding style.  They are activated by adding the
+"following settings (without the " symbol) as last two lines in $HOME/.vimrc:
+"set secure
+"set exrc
+set expandtab
+set shiftwidth=4
+set smarttab
--- a/.gitignore
+++ b/.gitignore
@@ -12,8 +12,6 @@ trace-dtrace.dtrace
 *-linux-user
 *-bsd-user
 libdis*
-libhw32
-libhw64
 libuser
 linux-headers/asm
 qapi-generated
--- a/.gitmodules
+++ b/.gitmodules
@@ -19,3 +19,6 @@
 [submodule "roms/sgabios"]
 	path = roms/sgabios
 	url = git://git.qemu.org/sgabios.git
+[submodule "pixman"]
+	path = pixman
+	url = git://anongit.freedesktop.org/pixman
--- a/11
+++ b/11
@@ -32,7 +32,7 @@ mandatory for VMState fields.

 Don't use Linux kernel internal types like u32, __u32 or __le32.

-Use target_phys_addr_t for guest physical addresses except pcibus_t
+Use hwaddr for guest physical addresses except pcibus_t
 for PCI addresses.  In addition, ram_addr_t is a QEMU internal address
 space that maps guest RAM physical addresses into an intermediate
 address space that can map to host virtual address spaces.  Generally
@@ -91,10 +91,11 @@ emulators.

 4. String manipulation

-Do not use the strncpy function.  According to the man page, it does
-*not* guarantee a NULL-terminated buffer, which makes it extremely dangerous
-to use.  Instead, use functionally equivalent function:
-void pstrcpy(char *buf, int buf_size, const char *str)
+Do not use the strncpy function.  As mentioned in the man page, it does *not*
+guarantee a NULL-terminated buffer, which makes it extremely dangerous to use.
+It also zeros trailing destination bytes out to the specified length.  Instead,
+use this similar function when possible, but note its different signature:
+void pstrcpy(char *dest, int dest_buf_size, const char *src)

 Don't use strcat because it can't check for buffer overflows, but:
 char *pstrcat(char *buf, int buf_size, const char *s)
--- a/81
+++ b/81
@@ -268,6 +268,7 @@ S: Maintained
 F: hw/xilinx_zynq.c
 F: hw/zynq_slcr.c
 F: hw/cadence_*
+F: hw/xilinx_spips.c

 CRIS Machines
 -------------
@@ -349,9 +350,31 @@ PowerPC Machines
 405
 M: Alexander Graf <agraf@suse.de>
 L: qemu-ppc@nongnu.org
-S: Maintained
+S: Odd Fixes
 F: hw/ppc405_boards.c

+Bamboo
+M: Alexander Graf <agraf@suse.de>
+L: qemu-ppc@nongnu.org
+S: Odd Fixes
+F: hw/ppc440_bamboo.c
+
+e500
+M: Alexander Graf <agraf@suse.de>
+M: Scott Wood <scottwood@freescale.com>
+L: qemu-ppc@nongnu.org
+S: Supported
+F: hw/ppc/e500.[hc]
+F: hw/ppc/e500plat.c
+
+mpc8544ds
+M: Alexander Graf <agraf@suse.de>
+M: Scott Wood <scottwood@freescale.com>
+L: qemu-ppc@nongnu.org
+S: Supported
+F: hw/ppc/mpc8544ds.c
+F: hw/mpc8544_guts.c
+
 New World
 M: Alexander Graf <agraf@suse.de>
 L: qemu-ppc@nongnu.org
@@ -374,6 +397,19 @@ S: Odd Fixes
 F: hw/ppc_prep.c
 F: hw/prep_pci.[hc]

+sPAPR
+M: David Gibson <david@gibson.dropbear.id.au>
+M: Alexander Graf <agraf@suse.de>
+L: qemu-ppc@nongnu.org
+S: Supported
+F: hw/spapr*
+
+virtex_ml507
+M: Edgar E. Iglesias <edgar.iglesias@gmail.com>
+L: qemu-ppc@nongnu.org
+S: Odd Fixes
+F: hw/virtex_ml507.c
+
 SH4 Machines
 ------------
 R2D
@@ -398,6 +434,12 @@ M: Blue Swirl <blauwirbel@gmail.com>
 S: Maintained
 F: hw/sun4u.c

+Leon3
+M: Fabien Chouteau <chouteau@adacore.com>
+S: Maintained
+F: hw/leon3.c
+F: hw/grlib*
+
 S390 Machines
 -------------
 S390 Virtio
@@ -451,6 +493,19 @@ S: Supported
 F: hw/pci*
 F: hw/piix*

+ppc4xx
+M: Alexander Graf <agraf@suse.de>
+L: qemu-ppc@nongnu.org
+S: Odd Fixes
+F: hw/ppc4xx*.[hc]
+
+ppce500
+M: Alexander Graf <agraf@suse.de>
+M: Scott Wood <scottwood@freescale.com>
+L: qemu-ppc@nongnu.org
+S: Supported
+F: hw/ppce500_*
+
 SCSI
 M: Paolo Bonzini <pbonzini@redhat.com>
 S: Supported
@@ -463,11 +518,22 @@ M: Paul Brook <paul@codesourcery.com>
 S: Odd Fixes
 F: hw/lsi53c895a.c

+SSI
+M: Peter Crosthwaite <peter.crosthwaite@petalogix.com>
+S: Maintained
+F: hw/ssi.*
+F: hw/m25p80.c
+
 USB
 M: Gerd Hoffmann <kraxel@redhat.com>
 S: Maintained
 F: hw/usb*

+VFIO
+M: Alex Williamson <alex.williamson@redhat.com>
+S: Supported
+F: hw/vfio*
+
 vhost
 M: Michael S. Tsirkin <mst@redhat.com>
 S: Supported
@@ -487,6 +553,7 @@ T: git git://github.com/kvaneesh/QEMU.git

 virtio-blk
 M: Kevin Wolf <kwolf@redhat.com>
+M: Stefan Hajnoczi <stefanha@redhat.com>
 S: Supported
 F: hw/virtio-blk*

@@ -506,6 +573,7 @@ F: hw/xilinx_intc.c
 F: hw/xilinx_ethlite.c
 F: hw/xilinx_timer.c
 F: hw/xilinx.h
+F: hw/xilinx_spi.c

 Subsystems
 ----------
@@ -516,6 +584,7 @@ F: audio/

 Block
 M: Kevin Wolf <kwolf@redhat.com>
+M: Stefan Hajnoczi <stefanha@redhat.com>
 S: Supported
 F: block*
 F: block/
@@ -525,6 +594,12 @@ M: Anthony Liguori <aliguori@us.ibm.com>
 S: Maintained
 F: qemu-char.c

+CPU
+M: Andreas Färber <afaerber@suse.de>
+S: Supported
+F: qom/cpu.c
+F: include/qemu/cpu.h
+
 Device Tree
 M: Peter Crosthwaite <peter.crosthwaite@petalogix.com>
 M: Alexander Graf <agraf@suse.de>
@@ -568,7 +643,7 @@ F: monitor.c

 Network device layer
 M: Anthony Liguori <aliguori@us.ibm.com>
-M: Stefan Hajnoczi <stefanha@gmail.com>
+M: Stefan Hajnoczi <stefanha@redhat.com>
 S: Maintained
 F: net/
 T: git git://github.com/stefanha/qemu.git net
@@ -588,7 +663,7 @@ F: slirp/
 T: git git://git.kiszka.org/qemu.git queues/slirp

 Tracing
-M: Stefan Hajnoczi <stefanha@gmail.com>
+M: Stefan Hajnoczi <stefanha@redhat.com>
 S: Maintained
 F: trace/
 F: scripts/tracetool.py
--- a/54
+++ b/54
@@ -8,15 +8,28 @@ ifneq ($(wildcard config-host.mak),)
 # Put the all: rule here so that config-host.mak can contain dependencies.
 all:
 include config-host.mak
+
+# Check that we're not trying to do an out-of-tree build from
+# a tree that's been used for an in-tree build.
+ifneq ($(realpath $(SRC_PATH)),$(realpath .))
+ifneq ($(wildcard $(SRC_PATH)/config-host.mak),)
+$(error This is an out of tree build but your source tree ($(SRC_PATH)) \
+seems to have been used for an in-tree build. You can fix this by running \
+"make distclean && rm -rf *-linux-user *-softmmu" in your source tree)
+endif
+endif
+
 include $(SRC_PATH)/rules.mak
 config-host.mak: $(SRC_PATH)/configure
 	@echo $@ is out-of-date, running configure
 	@sed -n "/.*Configured with/s/[^:]*: //p" $@ | sh
 else
 config-host.mak:
+ifneq ($(filter-out %clean,$(MAKECMDGOALS)),$(if $(MAKECMDGOALS),,fail))
 	@echo "Please call configure before running make!"
 	@exit 1
 endif
+endif

 GENERATED_HEADERS = config-host.h trace.h qemu-options.def
 ifeq ($(TRACE_BACKEND),dtrace)
@@ -52,8 +65,13 @@ SUBDIR_MAKEFLAGS=$(if $(V),,--no-print-directory) BUILD_DIR=$(BUILD_DIR)
 SUBDIR_DEVICES_MAK=$(patsubst %, %/config-devices.mak, $(TARGET_DIRS))
 SUBDIR_DEVICES_MAK_DEP=$(patsubst %, %/config-devices.mak.d, $(TARGET_DIRS))

+ifeq ($(SUBDIR_DEVICES_MAK),)
+config-all-devices.mak:
+	$(call quiet-command,echo '# no devices' > $@,"  GEN   $@")
+else
 config-all-devices.mak: $(SUBDIR_DEVICES_MAK)
 	$(call quiet-command,cat $(SUBDIR_DEVICES_MAK) | grep =y | sort -u > $@,"  GEN   $@")
+endif

 -include $(SUBDIR_DEVICES_MAK_DEP)

@@ -100,6 +118,17 @@ endif

 subdir-libcacard: $(oslib-obj-y) $(trace-obj-y) qemu-timer-common.o

+subdir-pixman: pixman/Makefile
+	$(call quiet-command,$(MAKE) $(SUBDIR_MAKEFLAGS) -C pixman V="$(V)" all,)
+
+pixman/Makefile: $(SRC_PATH)/pixman/configure
+	(cd pixman; CFLAGS="$(CFLAGS) -fPIC" $(SRC_PATH)/pixman/configure $(AUTOCONF_HOST) --disable-gtk --disable-shared --enable-static)
+
+$(SRC_PATH)/pixman/configure:
+	(cd $(SRC_PATH)/pixman; autoreconf -v --install)
+
+$(SUBDIR_RULES): libqemustub.a
+
 $(filter %-softmmu,$(SUBDIR_RULES)): $(universal-obj-y) $(trace-obj-y) $(common-obj-y) $(extra-obj-y) subdir-libdis

 $(filter %-user,$(SUBDIR_RULES)): $(universal-obj-y) $(trace-obj-y) subdir-libdis-user subdir-libuser
@@ -130,6 +159,12 @@ version.o: $(SRC_PATH)/version.rc config-host.h
 	$(call quiet-command,$(WINDRES) -I. -o $@ $<,"  RC    $(TARGET_DIR)$@")

 version-obj-$(CONFIG_WIN32) += version.o
+
+######################################################################
+# Build library with stubs
+
+libqemustub.a: $(stub-obj-y)
+
 ######################################################################
 # Support building shared library libcacard

@@ -153,17 +188,16 @@ endif
 qemu-img.o: qemu-img-cmds.h

 tools-obj-y = $(oslib-obj-y) $(trace-obj-y) qemu-tool.o qemu-timer.o \
-	qemu-timer-common.o main-loop.o notify.o \
-	iohandler.o cutils.o iov.o async.o
+	main-loop.o iohandler.o error.o
 tools-obj-$(CONFIG_POSIX) += compatfd.o

-qemu-img$(EXESUF): qemu-img.o $(tools-obj-y) $(block-obj-y)
-qemu-nbd$(EXESUF): qemu-nbd.o $(tools-obj-y) $(block-obj-y)
-qemu-io$(EXESUF): qemu-io.o cmd.o $(tools-obj-y) $(block-obj-y)
+qemu-img$(EXESUF): qemu-img.o $(tools-obj-y) $(block-obj-y) libqemustub.a
+qemu-nbd$(EXESUF): qemu-nbd.o $(tools-obj-y) $(block-obj-y) libqemustub.a
+qemu-io$(EXESUF): qemu-io.o cmd.o $(tools-obj-y) $(block-obj-y) libqemustub.a

 qemu-bridge-helper$(EXESUF): qemu-bridge-helper.o

-vscclient$(EXESUF): $(libcacard-y) $(oslib-obj-y) $(trace-obj-y) $(tools-obj-y) qemu-timer-common.o libcacard/vscclient.o
+vscclient$(EXESUF): $(libcacard-y) $(oslib-obj-y) $(trace-obj-y) libcacard/vscclient.o libqemustub.a
 	$(call quiet-command,$(CC) $(LDFLAGS) -o $@ $^ $(libcacard_libs) $(LIBS),"  LINK  $@")

 fsdev/virtfs-proxy-helper$(EXESUF): fsdev/virtfs-proxy-helper.o fsdev/virtio-9p-marshal.o oslib-posix.o $(trace-obj-y)
@@ -206,9 +240,9 @@ $(SRC_PATH)/qapi-schema.json $(SRC_PATH)/scripts/qapi-commands.py $(qapi-py)
 QGALIB_GEN=$(addprefix qga/qapi-generated/, qga-qapi-types.h qga-qapi-visit.h qga-qmp-commands.h)
 $(qga-obj-y) qemu-ga.o: $(QGALIB_GEN)

-qemu-ga$(EXESUF): qemu-ga.o $(qga-obj-y) $(tools-obj-y) $(qapi-obj-y) $(qobject-obj-y) $(version-obj-y)
+qemu-ga$(EXESUF): qemu-ga.o $(qga-obj-y) $(oslib-obj-y) $(trace-obj-y) $(qapi-obj-y) $(qobject-obj-y) $(version-obj-y) libqemustub.a

-QEMULIBS=libhw32 libhw64 libuser libdis libdis-user
+QEMULIBS=libuser libdis libdis-user

 clean:
 # avoid old build problems by removing potentially incorrect old files
@@ -252,6 +286,7 @@ distclean: clean
 	for d in $(TARGET_DIRS) $(QEMULIBS); do \
 	rm -rf $$d || exit 1 ; \
        done
+	if test -f pixman/config.log; then make -C pixman distclean; fi

 KEYMAPS=da     en-gb  et  fr     fr-ch  is  lt  modifiers  no  pt-br  sv \
 ar      de     en-us  fi  fr-be  hr     it  lv  nl         pl  ru     th \
@@ -297,7 +332,6 @@ install-confdir:

 install-sysconfig: install-datadir install-confdir
 	$(INSTALL_DATA) $(SRC_PATH)/sysconfigs/target/target-x86_64.conf "$(DESTDIR)$(qemu_confdir)"
-	$(INSTALL_DATA) $(SRC_PATH)/sysconfigs/target/cpus-x86_64.conf "$(DESTDIR)$(qemu_datadir)"

 install: all $(if $(BUILD_DOCS),install-doc) install-sysconfig install-datadir
 	$(INSTALL_DIR) "$(DESTDIR)$(bindir)"
@@ -398,7 +432,9 @@ qemu-doc.dvi qemu-doc.html qemu-doc.info qemu-doc.pdf: \

 # Add a dependency on the generated files, so that they are always
 # rebuilt before other object files
+ifneq ($(filter-out %clean,$(MAKECMDGOALS)),$(if $(MAKECMDGOALS),,fail))
 Makefile: $(GENERATED_HEADERS)
+endif

 # Include automatically generated dependency files
 # Dependencies in Makefile.objs files come from our recursive subdir rules
--- a/Makefile.hw
+++ b/Makefile.hw
@@ -1,23 +0,0 @@
-# Makefile for qemu target independent devices.
-
-include ../config-host.mak
-include ../config-all-devices.mak
-include config.mak
-include $(SRC_PATH)/rules.mak
-
-.PHONY: all
-
-$(call set-vpath, $(SRC_PATH))
-
-QEMU_CFLAGS+=-I..
-QEMU_CFLAGS += -I$(SRC_PATH)/include
-
-include $(SRC_PATH)/Makefile.objs
-
-all: $(hw-obj-y)
-# Dummy command so that make thinks it has done something
-	@true
-
-clean:
-	rm -f $(addsuffix *.o, $(sort $(dir $(hw-obj-y))))
-	rm -f $(addsuffix *.d, $(sort $(dir $(hw-obj-y))))
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -1,3 +1,7 @@
+#######################################################################
+# Stub library, linked in tools
+stub-obj-y = stubs/
+
 #######################################################################
 # Target-independent parts used in system and user emulation
 universal-obj-y =
@@ -19,7 +23,7 @@ universal-obj-y += $(qom-obj-y)

 #######################################################################
 # oslib-obj-y is code depending on the OS (win32 vs posix)
-oslib-obj-y = osdep.o
+oslib-obj-y = osdep.o cutils.o qemu-timer-common.o
 oslib-obj-$(CONFIG_WIN32) += oslib-win32.o qemu-thread-win32.o
 oslib-obj-$(CONFIG_POSIX) += oslib-posix.o qemu-thread-posix.o

@@ -41,12 +45,14 @@ coroutine-obj-$(CONFIG_WIN32) += coroutine-win32.o
 #######################################################################
 # block-obj-y is code used by both qemu system emulation and qemu-img

-block-obj-y = cutils.o iov.o cache-utils.o qemu-option.o module.o async.o
-block-obj-y += nbd.o block.o aio.o aes.o qemu-config.o qemu-progress.o qemu-sockets.o
+block-obj-y = iov.o cache-utils.o qemu-option.o module.o async.o
+block-obj-y += nbd.o block.o blockjob.o aes.o qemu-config.o
+block-obj-y += thread-pool.o qemu-progress.o qemu-sockets.o uri.o notify.o
 block-obj-y += $(coroutine-obj-y) $(qobject-obj-y) $(version-obj-y)
-block-obj-$(CONFIG_POSIX) += posix-aio-compat.o
-block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
+block-obj-$(CONFIG_POSIX) += event_notifier-posix.o aio-posix.o
+block-obj-$(CONFIG_WIN32) += event_notifier-win32.o aio-win32.o
 block-obj-y += block/
+block-obj-y += $(qapi-obj-y) qapi-types.o qapi-visit.o

 ifeq ($(CONFIG_VIRTIO)$(CONFIG_VIRTFS)$(CONFIG_PCI),yyy)
 # Lots of the fsdev/9pcode is pulled in by vl.c via qemu_fsdev_add.
@@ -59,10 +65,11 @@ endif
 # suppress *all* target specific code in case of system emulation, i.e. a
 # single QEMU executable should support all CPUs and machines.

-common-obj-y = $(block-obj-y) blockdev.o
+common-obj-y = $(block-obj-y) blockdev.o blockdev-nbd.o block/
 common-obj-y += net.o net/
 common-obj-y += qom/
 common-obj-y += readline.o console.o cursor.o
+common-obj-y += qemu-pixman.o
 common-obj-y += $(oslib-obj-y)
 common-obj-$(CONFIG_WIN32) += os-win32.o
 common-obj-$(CONFIG_POSIX) += os-posix.o
@@ -75,7 +82,6 @@ common-obj-y += input.o
 common-obj-y += buffered_file.o migration.o migration-tcp.o
 common-obj-y += qemu-char.o #aio.o
 common-obj-y += block-migration.o iohandler.o
-common-obj-y += pflib.o
 common-obj-y += bitmap.o bitops.o
 common-obj-y += page_cache.o

@@ -89,13 +95,17 @@ common-obj-y += hw/
 common-obj-y += ui/
 common-obj-y += bt-host.o bt-vhci.o

-common-obj-y += iov.o acl.o
+common-obj-y += dma-helpers.o
+common-obj-y += acl.o
 common-obj-$(CONFIG_POSIX) += compatfd.o
-common-obj-y += notify.o event_notifier.o
 common-obj-y += qemu-timer.o qemu-timer-common.o
+common-obj-y += qtest.o
+common-obj-y += vl.o

 common-obj-$(CONFIG_SLIRP) += slirp/

+common-obj-y += backends/
+
 ######################################################################
 # libseccomp
 ifeq ($(CONFIG_SECCOMP),y)
@@ -108,17 +118,12 @@ endif
 user-obj-y =
 user-obj-y += envlist.o path.o
 user-obj-y += tcg-runtime.o host-utils.o
-user-obj-y += cutils.o iov.o cache-utils.o
+user-obj-y += cache-utils.o
 user-obj-y += module.o
 user-obj-y += qemu-user.o
 user-obj-y += $(trace-obj-y)
 user-obj-y += qom/

-######################################################################
-# libhw
-
-hw-obj-y = vl.o dma-helpers.o qtest.o hw/
-
 ######################################################################
 # libdis
 # NOTE: the disassembler code is only needed for debugging
@@ -228,9 +233,8 @@ universal-obj-y += $(qapi-obj-y)
 ######################################################################
 # guest agent

-qga-obj-y = qga/ qemu-ga.o module.o
-qga-obj-$(CONFIG_WIN32) += oslib-win32.o
-qga-obj-$(CONFIG_POSIX) += oslib-posix.o qemu-sockets.o qemu-option.o
+qga-obj-y = qga/ qemu-ga.o module.o qemu-tool.o
+qga-obj-$(CONFIG_POSIX) += qemu-sockets.o qemu-option.o

 vl.o: QEMU_CFLAGS+=$(GPROF_CFLAGS)

@@ -239,11 +243,11 @@ vl.o: QEMU_CFLAGS+=$(SDL_CFLAGS)
 QEMU_CFLAGS+=$(GLIB_CFLAGS)

 nested-vars += \
-	hw-obj-y \
+	stub-obj-y \
 	qga-obj-y \
-	block-obj-y \
 	qom-obj-y \
 	qapi-obj-y \
+	block-obj-y \
 	user-obj-y \
 	common-obj-y \
 	extra-obj-y
--- a/Makefile.target
+++ b/Makefile.target
@@ -4,9 +4,6 @@ include ../config-host.mak
 include config-devices.mak
 include config-target.mak
 include $(SRC_PATH)/rules.mak
-ifneq ($(HWDIR),)
-include $(HWDIR)/config.mak
-endif

 $(call set-vpath, $(SRC_PATH))
 ifdef CONFIG_LINUX
@@ -80,14 +77,6 @@ obj-$(CONFIG_GDBSTUB_XML) += gdbstub-xml.o

 tci-dis.o: QEMU_CFLAGS += -I$(SRC_PATH)/tcg -I$(SRC_PATH)/tcg/tci

-# HELPER_CFLAGS is used for all the legacy code compiled with static register
-# variables
-user-exec.o: QEMU_CFLAGS += $(HELPER_CFLAGS)
-
-# Note: this is a workaround. The real fix is to avoid compiling
-# cpu_signal_handler() in user-exec.c.
-%/signal.o: QEMU_CFLAGS += $(HELPER_CFLAGS)
-
 #########################################################
 # Linux user emulator target

@@ -154,6 +143,9 @@ GENERATED_HEADERS += hmp-commands.h qmp-commands-old.h

 endif # CONFIG_SOFTMMU

+# Workaround for http://gcc.gnu.org/PR55489, see configure.
+%/translate.o: QEMU_CFLAGS += $(TRANSLATE_OPT_CFLAGS)
+
 nested-vars += obj-y

 # This resolves all nested paths, so it must come last
@@ -165,7 +157,6 @@ all-obj-y += $(addprefix ../, $(universal-obj-y))
 ifdef CONFIG_SOFTMMU
 all-obj-y += $(addprefix ../, $(common-obj-y))
 all-obj-y += $(addprefix ../libdis/, $(libdis-y))
-all-obj-y += $(addprefix $(HWDIR)/, $(hw-obj-y))
 all-obj-y += $(addprefix ../, $(trace-obj-y))
 else
 all-obj-y += $(addprefix ../libuser/, $(user-obj-y))
@@ -174,12 +165,12 @@ endif #CONFIG_LINUX_USER

 ifdef QEMU_PROGW
 # The linker builds a windows executable. Make also a console executable.
-$(QEMU_PROGW): $(all-obj-y)
+$(QEMU_PROGW): $(all-obj-y) ../libqemustub.a
 	$(call LINK,$^)
 $(QEMU_PROG): $(QEMU_PROGW)
 	$(call quiet-command,$(OBJCOPY) --subsystem console $(QEMU_PROGW) $(QEMU_PROG),"  GEN   $(TARGET_DIR)$(QEMU_PROG)")
 else
-$(QEMU_PROG): $(all-obj-y)
+$(QEMU_PROG): $(all-obj-y) ../libqemustub.a
 	$(call LINK,$^)
 endif

--- a/QMP/qemu-ga-client
+++ b/QMP/qemu-ga-client
@@ -0,0 +1,299 @@
+#!/usr/bin/python
+
+# QEMU Guest Agent Client
+#
+# Copyright (C) 2012 Ryota Ozaki <ozaki.ryota@gmail.com>
+#
+# This work is licensed under the terms of the GNU GPL, version 2.  See
+# the COPYING file in the top-level directory.
+#
+# Usage:
+#
+# Start QEMU with:
+#
+# # qemu [...] -chardev socket,path=/tmp/qga.sock,server,nowait,id=qga0 \
+#   -device virtio-serial -device virtserialport,chardev=qga0,name=org.qemu.guest_agent.0
+#
+# Run the script:
+#
+# $ qemu-ga-client --address=/tmp/qga.sock <command> [args...]
+#
+# or
+#
+# $ export QGA_CLIENT_ADDRESS=/tmp/qga.sock
+# $ qemu-ga-client <command> [args...]
+#
+# For example:
+#
+# $ qemu-ga-client cat /etc/resolv.conf
+# # Generated by NetworkManager
+# nameserver 10.0.2.3
+# $ qemu-ga-client fsfreeze status
+# thawed
+# $ qemu-ga-client fsfreeze freeze
+# 2 filesystems frozen
+#
+# See also: http://wiki.qemu.org/Features/QAPI/GuestAgent
+#
+
+import base64
+import random
+
+import qmp
+
+
+class QemuGuestAgent(qmp.QEMUMonitorProtocol):
+    def __getattr__(self, name):
+        def wrapper(**kwds):
+            return self.command('guest-' + name.replace('_', '-'), **kwds)
+        return wrapper
+
+
+class QemuGuestAgentClient:
+    error = QemuGuestAgent.error
+
+    def __init__(self, address):
+        self.qga = QemuGuestAgent(address)
+        self.qga.connect(negotiate=False)
+
+    def sync(self, timeout=3):
+        # Avoid being blocked forever
+        if not self.ping(timeout):
+            raise EnvironmentError('Agent seems not alive')
+        uid = random.randint(0, (1 << 32) - 1)
+        while True:
+            ret = self.qga.sync(id=uid)
+            if isinstance(ret, int) and int(ret) == uid:
+                break
+
+    def __file_read_all(self, handle):
+        eof = False
+        data = ''
+        while not eof:
+            ret = self.qga.file_read(handle=handle, count=1024)
+            _data = base64.b64decode(ret['buf-b64'])
+            data += _data
+            eof = ret['eof']
+        return data
+
+    def read(self, path):
+        handle = self.qga.file_open(path=path)
+        try:
+            data = self.__file_read_all(handle)
+        finally:
+            self.qga.file_close(handle=handle)
+        return data
+
+    def info(self):
+        info = self.qga.info()
+
+        msgs = []
+        msgs.append('version: ' + info['version'])
+        msgs.append('supported_commands:')
+        enabled = [c['name'] for c in info['supported_commands'] if c['enabled']]
+        msgs.append('\tenabled: ' + ', '.join(enabled))
+        disabled = [c['name'] for c in info['supported_commands'] if not c['enabled']]
+        msgs.append('\tdisabled: ' + ', '.join(disabled))
+
+        return '\n'.join(msgs)
+
+    def __gen_ipv4_netmask(self, prefixlen):
+        mask = int('1' * prefixlen + '0' * (32 - prefixlen), 2)
+        return '.'.join([str(mask >> 24),
+                         str((mask >> 16) & 0xff),
+                         str((mask >> 8) & 0xff),
+                         str(mask & 0xff)])
+
+    def ifconfig(self):
+        nifs = self.qga.network_get_interfaces()
+
+        msgs = []
+        for nif in nifs:
+            msgs.append(nif['name'] + ':')
+            if 'ip-addresses' in nif:
+                for ipaddr in nif['ip-addresses']:
+                    if ipaddr['ip-address-type'] == 'ipv4':
+                        addr = ipaddr['ip-address']
+                        mask = self.__gen_ipv4_netmask(int(ipaddr['prefix']))
+                        msgs.append("\tinet %s  netmask %s" % (addr, mask))
+                    elif ipaddr['ip-address-type'] == 'ipv6':
+                        addr = ipaddr['ip-address']
+                        prefix = ipaddr['prefix']
+                        msgs.append("\tinet6 %s  prefixlen %s" % (addr, prefix))
+            if nif['hardware-address'] != '00:00:00:00:00:00':
+                msgs.append("\tether " + nif['hardware-address'])
+
+        return '\n'.join(msgs)
+
+    def ping(self, timeout):
+        self.qga.settimeout(timeout)
+        try:
+            self.qga.ping()
+        except self.qga.timeout:
+            return False
+        return True
+
+    def fsfreeze(self, cmd):
+        if cmd not in ['status', 'freeze', 'thaw']:
+            raise StandardError('Invalid command: ' + cmd)
+
+        return getattr(self.qga, 'fsfreeze' + '_' + cmd)()
+
+    def fstrim(self, minimum=0):
+        return getattr(self.qga, 'fstrim')(minimum=minimum)
+
+    def suspend(self, mode):
+        if mode not in ['disk', 'ram', 'hybrid']:
+            raise StandardError('Invalid mode: ' + mode)
+
+        try:
+            getattr(self.qga, 'suspend' + '_' + mode)()
+            # On error exception will raise
+        except self.qga.timeout:
+            # On success command will timed out
+            return
+
+    def shutdown(self, mode='powerdown'):
+        if mode not in ['powerdown', 'halt', 'reboot']:
+            raise StandardError('Invalid mode: ' + mode)
+
+        try:
+            self.qga.shutdown(mode=mode)
+        except self.qga.timeout:
+            return
+
+
+def _cmd_cat(client, args):
+    if len(args) != 1:
+        print('Invalid argument')
+        print('Usage: cat <file>')
+        sys.exit(1)
+    print(client.read(args[0]))
+
+
+def _cmd_fsfreeze(client, args):
+    usage = 'Usage: fsfreeze status|freeze|thaw'
+    if len(args) != 1:
+        print('Invalid argument')
+        print(usage)
+        sys.exit(1)
+    if args[0] not in ['status', 'freeze', 'thaw']:
+        print('Invalid command: ' + args[0])
+        print(usage)
+        sys.exit(1)
+    cmd = args[0]
+    ret = client.fsfreeze(cmd)
+    if cmd == 'status':
+        print(ret)
+    elif cmd == 'freeze':
+        print("%d filesystems frozen" % ret)
+    else:
+        print("%d filesystems thawed" % ret)
+
+
+def _cmd_fstrim(client, args):
+    if len(args) == 0:
+        minimum = 0
+    else:
+        minimum = int(args[0])
+    print(client.fstrim(minimum))
+
+
+def _cmd_ifconfig(client, args):
+    print(client.ifconfig())
+
+
+def _cmd_info(client, args):
+    print(client.info())
+
+
+def _cmd_ping(client, args):
+    if len(args) == 0:
+        timeout = 3
+    else:
+        timeout = float(args[0])
+    alive = client.ping(timeout)
+    if not alive:
+        print("Not responded in %s sec" % args[0])
+        sys.exit(1)
+
+
+def _cmd_suspend(client, args):
+    usage = 'Usage: suspend disk|ram|hybrid'
+    if len(args) != 1:
+        print('Less argument')
+        print(usage)
+        sys.exit(1)
+    if args[0] not in ['disk', 'ram', 'hybrid']:
+        print('Invalid command: ' + args[0])
+        print(usage)
+        sys.exit(1)
+    client.suspend(args[0])
+
+
+def _cmd_shutdown(client, args):
+    client.shutdown()
+_cmd_powerdown = _cmd_shutdown
+
+
+def _cmd_halt(client, args):
+    client.shutdown('halt')
+
+
+def _cmd_reboot(client, args):
+    client.shutdown('reboot')
+
+
+commands = [m.replace('_cmd_', '') for m in dir() if '_cmd_' in m]
+
+
+def main(address, cmd, args):
+    if not os.path.exists(address):
+        print('%s not found' % address)
+        sys.exit(1)
+
+    if cmd not in commands:
+        print('Invalid command: ' + cmd)
+        print('Available commands: ' + ', '.join(commands))
+        sys.exit(1)
+
+    try:
+        client = QemuGuestAgentClient(address)
+    except QemuGuestAgent.error, e:
+        import errno
+
+        print(e)
+        if e.errno == errno.ECONNREFUSED:
+            print('Hint: qemu is not running?')
+        sys.exit(1)
+
+    if cmd != 'ping':
+        client.sync()
+
+    globals()['_cmd_' + cmd](client, args)
+
+
+if __name__ == '__main__':
+    import sys
+    import os
+    import optparse
+
+    address = os.environ['QGA_CLIENT_ADDRESS'] if 'QGA_CLIENT_ADDRESS' in os.environ else None
+
+    usage = "%prog [--address=<unix_path>|<ipv4_address>] <command> [args...]\n"
+    usage += '<command>: ' + ', '.join(commands)
+    parser = optparse.OptionParser(usage=usage)
+    parser.add_option('--address', action='store', type='string',
+                      default=address, help='Specify a ip:port pair or a unix socket path')
+    options, args = parser.parse_args()
+
+    address = options.address
+    if address is None:
+        parser.error('address is not specified')
+        sys.exit(1)
+
+    if len(args) == 0:
+        parser.error('Less argument')
+        sys.exit(1)
+
+    main(address, args[0], args[1:])
--- a/QMP/qmp-events.txt
+++ b/QMP/qmp-events.txt
@@ -50,7 +50,8 @@ Emitted when a block job has been cancelled.

 Data:

- "type":     Job type ("stream" for image streaming, json-string)
+- "type":     Job type (json-string; "stream" for image streaming
+                                     "commit" for block commit)
 - "device":   Device name (json-string)
 - "len":      Maximum progress value (json-int)
 - "offset":   Current progress value (json-int)
@@ -73,7 +74,8 @@ Emitted when a block job has completed.

 Data:

- "type":     Job type ("stream" for image streaming, json-string)
+- "type":     Job type (json-string; "stream" for image streaming
+                                     "commit" for block commit)
 - "device":   Device name (json-string)
 - "len":      Maximum progress value (json-int)
 - "offset":   Current progress value (json-int)
@@ -94,6 +96,46 @@ Example:
               "speed": 0 },
     "timestamp": { "seconds": 1267061043, "microseconds": 959568 } }

+BLOCK_JOB_ERROR
+---------------
+
+Emitted when a block job encounters an error.
+
+Data:
+
+- "device": device name (json-string)
+- "operation": I/O operation (json-string, "read" or "write")
+- "action": action that has been taken, it's one of the following (json-string):
+    "ignore": error has been ignored, the job may fail later
+    "report": error will be reported and the job canceled
+    "stop": error caused job to be paused
+
+Example:
+
+{ "event": "BLOCK_JOB_ERROR",
+    "data": { "device": "ide0-hd1",
+              "operation": "write",
+              "action": "stop" },
+    "timestamp": { "seconds": 1265044230, "microseconds": 450486 } }
+
+BLOCK_JOB_READY
+---------------
+
+Emitted when a block job is ready to complete.
+
+Data:
+
+- "device": device name (json-string)
+
+Example:
+
+{ "event": "BLOCK_JOB_READY",
+    "data": { "device": "ide0-hd1" },
+    "timestamp": { "seconds": 1265044230, "microseconds": 450486 } }
+
+Note: The "ready to complete" status is always reset by a BLOCK_JOB_ERROR
+event.
+
 DEVICE_TRAY_MOVED
 -----------------

--- a/QMP/qmp-shell
+++ b/QMP/qmp-shell
@@ -33,6 +33,7 @@
 import qmp
 import readline
 import sys
+import pprint

 class QMPCompleter(list):
    def complete(self, text, state):
@@ -52,10 +53,11 @@ class QMPShellBadPort(QMPShellError):
 # TODO: QMPShell's interface is a bit ugly (eg. _fill_completion() and
 #       _execute_cmd()). Let's design a better one.
 class QMPShell(qmp.QEMUMonitorProtocol):
-    def __init__(self, address):
+    def __init__(self, address, pp=None):
        qmp.QEMUMonitorProtocol.__init__(self, self.__get_address(address))
        self._greeting = None
        self._completer = None
+        self._pp = pp

    def __get_address(self, arg):
        """
@@ -114,7 +116,11 @@ class QMPShell(qmp.QEMUMonitorProtocol):
        if resp is None:
            print 'Disconnected'
            return False
-        print resp
+
+        if self._pp is not None:
+            self._pp.pprint(resp)
+        else:
+            print resp
        return True

    def connect(self):
@@ -222,22 +228,36 @@ def die(msg):
 def fail_cmdline(option=None):
    if option:
        sys.stderr.write('ERROR: bad command-line option \'%s\'\n' % option)
-    sys.stderr.write('qemu-shell [ -H ] < UNIX socket path> | < TCP address:port >\n')
+    sys.stderr.write('qemu-shell [ -p ] [ -H ] < UNIX socket path> | < TCP address:port >\n')
    sys.exit(1)

 def main():
    addr = ''
+    qemu = None
+    hmp = False
+    pp = None
+
    try:
-        if len(sys.argv) == 2:
-            qemu = QMPShell(sys.argv[1])
-            addr = sys.argv[1]
-        elif len(sys.argv) == 3:
-            if sys.argv[1] != '-H':
-                fail_cmdline(sys.argv[1])
-            qemu = HMPShell(sys.argv[2])
-            addr = sys.argv[2]
-        else:
-                fail_cmdline()
+        for arg in sys.argv[1:]:
+            if arg == "-H":
+                if qemu is not None:
+                    fail_cmdline(arg)
+                hmp = True
+            elif arg == "-p":
+                if pp is not None:
+                    fail_cmdline(arg)
+                pp = pprint.PrettyPrinter(indent=4)
+            else:
+                if qemu is not None:
+                    fail_cmdline(arg)
+                if hmp:
+                    qemu = HMPShell(arg)
+                else:
+                    qemu = QMPShell(arg, pp)
+                addr = arg
+
+        if qemu is None:
+            fail_cmdline()
    except QMPShellBadPort:
        die('bad port number in command-line')

--- a/QMP/qmp.py
+++ b/QMP/qmp.py
@@ -49,7 +49,6 @@ class QEMUMonitorProtocol:
        return socket.socket(family, socket.SOCK_STREAM)

    def __negotiate_capabilities(self):
-        self.__sockfile = self.__sock.makefile()
        greeting = self.__json_read()
        if greeting is None or not greeting.has_key('QMP'):
            raise QMPConnectError
@@ -73,7 +72,7 @@ class QEMUMonitorProtocol:

    error = socket.error

-    def connect(self):
+    def connect(self, negotiate=True):
        """
        Connect to the QMP Monitor and perform capabilities negotiation.

@@ -83,7 +82,9 @@ class QEMUMonitorProtocol:
        @raise QMPCapabilitiesError if fails to negotiate capabilities
        """
        self.__sock.connect(self.__address)
-        return self.__negotiate_capabilities()
+        self.__sockfile = self.__sock.makefile()
+        if negotiate:
+            return self.__negotiate_capabilities()

    def accept(self):
        """
@@ -95,6 +96,7 @@ class QEMUMonitorProtocol:
        @raise QMPCapabilitiesError if fails to negotiate capabilities
        """
        self.__sock, _ = self.__sock.accept()
+        self.__sockfile = self.__sock.makefile()
        return self.__negotiate_capabilities()

    def cmd_obj(self, qmp_cmd):
@@ -134,6 +136,26 @@ class QEMUMonitorProtocol:
            raise Exception(ret['error']['desc'])
        return ret['return']

+    def pull_event(self, wait=False):
+        """
+        Get and delete the first available QMP event.
+
+        @param wait: block until an event is available (bool)
+        """
+        self.__sock.setblocking(0)
+        try:
+            self.__json_read()
+        except socket.error, err:
+            if err[0] == errno.EAGAIN:
+                # No data available
+                pass
+        self.__sock.setblocking(1)
+        if not self.__events and wait:
+            self.__json_read(only_event=True)
+        event = self.__events[0]
+        del self.__events[0]
+        return event
+
    def get_events(self, wait=False):
        """
        Get a list of available QMP events.
@@ -161,3 +183,8 @@ class QEMUMonitorProtocol:
    def close(self):
        self.__sock.close()
        self.__sockfile.close()
+
+    timeout = socket.timeout
+
+    def settimeout(self, timeout):
+        self.__sock.settimeout(timeout)
--- a/2
+++ b/2
@@ -1 +1 @@
-1.2.0
+1.3.1
--- a/aio-posix.c
+++ b/aio-posix.c
@@ -0,0 +1,269 @@
+/*
+ * QEMU aio implementation
+ *
+ * Copyright IBM, Corp. 2008
+ *
+ * Authors:
+ *  Anthony Liguori   <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ * Contributions after 2012-01-13 are licensed under the terms of the
+ * GNU GPL, version 2 or (at your option) any later version.
+ */
+
+#include "qemu-common.h"
+#include "block.h"
+#include "qemu-queue.h"
+#include "qemu_socket.h"
+
+struct AioHandler
+{
+    GPollFD pfd;
+    IOHandler *io_read;
+    IOHandler *io_write;
+    AioFlushHandler *io_flush;
+    int deleted;
+    void *opaque;
+    QLIST_ENTRY(AioHandler) node;
+};
+
+static AioHandler *find_aio_handler(AioContext *ctx, int fd)
+{
+    AioHandler *node;
+
+    QLIST_FOREACH(node, &ctx->aio_handlers, node) {
+        if (node->pfd.fd == fd)
+            if (!node->deleted)
+                return node;
+    }
+
+    return NULL;
+}
+
+void aio_set_fd_handler(AioContext *ctx,
+                        int fd,
+                        IOHandler *io_read,
+                        IOHandler *io_write,
+                        AioFlushHandler *io_flush,
+                        void *opaque)
+{
+    AioHandler *node;
+
+    node = find_aio_handler(ctx, fd);
+
+    /* Are we deleting the fd handler? */
+    if (!io_read && !io_write) {
+        if (node) {
+            g_source_remove_poll(&ctx->source, &node->pfd);
+
+            /* If the lock is held, just mark the node as deleted */
+            if (ctx->walking_handlers) {
+                node->deleted = 1;
+                node->pfd.revents = 0;
+            } else {
+                /* Otherwise, delete it for real.  We can't just mark it as
+                 * deleted because deleted nodes are only cleaned up after
+                 * releasing the walking_handlers lock.
+                 */
+                QLIST_REMOVE(node, node);
+                g_free(node);
+            }
+        }
+    } else {
+        if (node == NULL) {
+            /* Alloc and insert if it's not already there */
+            node = g_malloc0(sizeof(AioHandler));
+            node->pfd.fd = fd;
+            QLIST_INSERT_HEAD(&ctx->aio_handlers, node, node);
+
+            g_source_add_poll(&ctx->source, &node->pfd);
+        }
+        /* Update handler with latest information */
+        node->io_read = io_read;
+        node->io_write = io_write;
+        node->io_flush = io_flush;
+        node->opaque = opaque;
+
+        node->pfd.events = (io_read ? G_IO_IN | G_IO_HUP : 0);
+        node->pfd.events |= (io_write ? G_IO_OUT : 0);
+    }
+
+    aio_notify(ctx);
+}
+
+void aio_set_event_notifier(AioContext *ctx,
+                            EventNotifier *notifier,
+                            EventNotifierHandler *io_read,
+                            AioFlushEventNotifierHandler *io_flush)
+{
+    aio_set_fd_handler(ctx, event_notifier_get_fd(notifier),
+                       (IOHandler *)io_read, NULL,
+                       (AioFlushHandler *)io_flush, notifier);
+}
+
+bool aio_pending(AioContext *ctx)
+{
+    AioHandler *node;
+
+    QLIST_FOREACH(node, &ctx->aio_handlers, node) {
+        int revents;
+
+        /*
+         * FIXME: right now we cannot get G_IO_HUP and G_IO_ERR because
+         * main-loop.c is still select based (due to the slirp legacy).
+         * If main-loop.c ever switches to poll, G_IO_ERR should be
+         * tested too.  Dispatching G_IO_ERR to both handlers should be
+         * okay, since handlers need to be ready for spurious wakeups.
+         */
+        revents = node->pfd.revents & node->pfd.events;
+        if (revents & (G_IO_IN | G_IO_HUP | G_IO_ERR) && node->io_read) {
+            return true;
+        }
+        if (revents & (G_IO_OUT | G_IO_ERR) && node->io_write) {
+            return true;
+        }
+    }
+
+    return false;
+}
+
+bool aio_poll(AioContext *ctx, bool blocking)
+{
+    static struct timeval tv0;
+    AioHandler *node;
+    fd_set rdfds, wrfds;
+    int max_fd = -1;
+    int ret;
+    bool busy, progress;
+
+    progress = false;
+
+    /*
+     * If there are callbacks left that have been queued, we need to call then.
+     * Do not call select in this case, because it is possible that the caller
+     * does not need a complete flush (as is the case for qemu_aio_wait loops).
+     */
+    if (aio_bh_poll(ctx)) {
+        blocking = false;
+        progress = true;
+    }
+
+    /*
+     * Then dispatch any pending callbacks from the GSource.
+     *
+     * We have to walk very carefully in case qemu_aio_set_fd_handler is
+     * called while we're walking.
+     */
+    node = QLIST_FIRST(&ctx->aio_handlers);
+    while (node) {
+        AioHandler *tmp;
+        int revents;
+
+        ctx->walking_handlers++;
+
+        revents = node->pfd.revents & node->pfd.events;
+        node->pfd.revents = 0;
+
+        /* See comment in aio_pending.  */
+        if (revents & (G_IO_IN | G_IO_HUP | G_IO_ERR) && node->io_read) {
+            node->io_read(node->opaque);
+            progress = true;
+        }
+        if (revents & (G_IO_OUT | G_IO_ERR) && node->io_write) {
+            node->io_write(node->opaque);
+            progress = true;
+        }
+
+        tmp = node;
+        node = QLIST_NEXT(node, node);
+
+        ctx->walking_handlers--;
+
+        if (!ctx->walking_handlers && tmp->deleted) {
+            QLIST_REMOVE(tmp, node);
+            g_free(tmp);
+        }
+    }
+
+    if (progress && !blocking) {
+        return true;
+    }
+
+    ctx->walking_handlers++;
+
+    FD_ZERO(&rdfds);
+    FD_ZERO(&wrfds);
+
+    /* fill fd sets */
+    busy = false;
+    QLIST_FOREACH(node, &ctx->aio_handlers, node) {
+        /* If there aren't pending AIO operations, don't invoke callbacks.
+         * Otherwise, if there are no AIO requests, qemu_aio_wait() would
+         * wait indefinitely.
+         */
+        if (!node->deleted && node->io_flush) {
+            if (node->io_flush(node->opaque) == 0) {
+                continue;
+            }
+            busy = true;
+        }
+        if (!node->deleted && node->io_read) {
+            FD_SET(node->pfd.fd, &rdfds);
+            max_fd = MAX(max_fd, node->pfd.fd + 1);
+        }
+        if (!node->deleted && node->io_write) {
+            FD_SET(node->pfd.fd, &wrfds);
+            max_fd = MAX(max_fd, node->pfd.fd + 1);
+        }
+    }
+
+    ctx->walking_handlers--;
+
+    /* No AIO operations?  Get us out of here */
+    if (!busy) {
+        return progress;
+    }
+
+    /* wait until next event */
+    ret = select(max_fd, &rdfds, &wrfds, NULL, blocking ? NULL : &tv0);
+
+    /* if we have any readable fds, dispatch event */
+    if (ret > 0) {
+        /* we have to walk very carefully in case
+         * qemu_aio_set_fd_handler is called while we're walking */
+        node = QLIST_FIRST(&ctx->aio_handlers);
+        while (node) {
+            AioHandler *tmp;
+
+            ctx->walking_handlers++;
+
+            if (!node->deleted &&
+                FD_ISSET(node->pfd.fd, &rdfds) &&
+                node->io_read) {
+                node->io_read(node->opaque);
+                progress = true;
+            }
+            if (!node->deleted &&
+                FD_ISSET(node->pfd.fd, &wrfds) &&
+                node->io_write) {
+                node->io_write(node->opaque);
+                progress = true;
+            }
+
+            tmp = node;
+            node = QLIST_NEXT(node, node);
+
+            ctx->walking_handlers--;
+
+            if (!ctx->walking_handlers && tmp->deleted) {
+                QLIST_REMOVE(tmp, node);
+                g_free(tmp);
+            }
+        }
+    }
+
+    assert(progress || busy);
+    return true;
+}
--- a/aio-win32.c
+++ b/aio-win32.c
@@ -0,0 +1,219 @@
+/*
+ * QEMU aio implementation
+ *
+ * Copyright IBM Corp., 2008
+ * Copyright Red Hat Inc., 2012
+ *
+ * Authors:
+ *  Anthony Liguori   <aliguori@us.ibm.com>
+ *  Paolo Bonzini     <pbonzini@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ * Contributions after 2012-01-13 are licensed under the terms of the
+ * GNU GPL, version 2 or (at your option) any later version.
+ */
+
+#include "qemu-common.h"
+#include "block.h"
+#include "qemu-queue.h"
+#include "qemu_socket.h"
+
+struct AioHandler {
+    EventNotifier *e;
+    EventNotifierHandler *io_notify;
+    AioFlushEventNotifierHandler *io_flush;
+    GPollFD pfd;
+    int deleted;
+    QLIST_ENTRY(AioHandler) node;
+};
+
+void aio_set_event_notifier(AioContext *ctx,
+                            EventNotifier *e,
+                            EventNotifierHandler *io_notify,
+                            AioFlushEventNotifierHandler *io_flush)
+{
+    AioHandler *node;
+
+    QLIST_FOREACH(node, &ctx->aio_handlers, node) {
+        if (node->e == e && !node->deleted) {
+            break;
+        }
+    }
+
+    /* Are we deleting the fd handler? */
+    if (!io_notify) {
+        if (node) {
+            g_source_remove_poll(&ctx->source, &node->pfd);
+
+            /* If the lock is held, just mark the node as deleted */
+            if (ctx->walking_handlers) {
+                node->deleted = 1;
+                node->pfd.revents = 0;
+            } else {
+                /* Otherwise, delete it for real.  We can't just mark it as
+                 * deleted because deleted nodes are only cleaned up after
+                 * releasing the walking_handlers lock.
+                 */
+                QLIST_REMOVE(node, node);
+                g_free(node);
+            }
+        }
+    } else {
+        if (node == NULL) {
+            /* Alloc and insert if it's not already there */
+            node = g_malloc0(sizeof(AioHandler));
+            node->e = e;
+            node->pfd.fd = (uintptr_t)event_notifier_get_handle(e);
+            node->pfd.events = G_IO_IN;
+            QLIST_INSERT_HEAD(&ctx->aio_handlers, node, node);
+
+            g_source_add_poll(&ctx->source, &node->pfd);
+        }
+        /* Update handler with latest information */
+        node->io_notify = io_notify;
+        node->io_flush = io_flush;
+    }
+
+    aio_notify(ctx);
+}
+
+bool aio_pending(AioContext *ctx)
+{
+    AioHandler *node;
+
+    QLIST_FOREACH(node, &ctx->aio_handlers, node) {
+        if (node->pfd.revents && node->io_notify) {
+            return true;
+        }
+    }
+
+    return false;
+}
+
+bool aio_poll(AioContext *ctx, bool blocking)
+{
+    AioHandler *node;
+    HANDLE events[MAXIMUM_WAIT_OBJECTS + 1];
+    bool busy, progress;
+    int count;
+
+    progress = false;
+
+    /*
+     * If there are callbacks left that have been queued, we need to call then.
+     * Do not call select in this case, because it is possible that the caller
+     * does not need a complete flush (as is the case for qemu_aio_wait loops).
+     */
+    if (aio_bh_poll(ctx)) {
+        blocking = false;
+        progress = true;
+    }
+
+    /*
+     * Then dispatch any pending callbacks from the GSource.
+     *
+     * We have to walk very carefully in case qemu_aio_set_fd_handler is
+     * called while we're walking.
+     */
+    node = QLIST_FIRST(&ctx->aio_handlers);
+    while (node) {
+        AioHandler *tmp;
+
+        ctx->walking_handlers++;
+
+        if (node->pfd.revents && node->io_notify) {
+            node->pfd.revents = 0;
+            node->io_notify(node->e);
+            progress = true;
+        }
+
+        tmp = node;
+        node = QLIST_NEXT(node, node);
+
+        ctx->walking_handlers--;
+
+        if (!ctx->walking_handlers && tmp->deleted) {
+            QLIST_REMOVE(tmp, node);
+            g_free(tmp);
+        }
+    }
+
+    if (progress && !blocking) {
+        return true;
+    }
+
+    ctx->walking_handlers++;
+
+    /* fill fd sets */
+    busy = false;
+    count = 0;
+    QLIST_FOREACH(node, &ctx->aio_handlers, node) {
+        /* If there aren't pending AIO operations, don't invoke callbacks.
+         * Otherwise, if there are no AIO requests, qemu_aio_wait() would
+         * wait indefinitely.
+         */
+        if (!node->deleted && node->io_flush) {
+            if (node->io_flush(node->e) == 0) {
+                continue;
+            }
+            busy = true;
+        }
+        if (!node->deleted && node->io_notify) {
+            events[count++] = event_notifier_get_handle(node->e);
+        }
+    }
+
+    ctx->walking_handlers--;
+
+    /* No AIO operations?  Get us out of here */
+    if (!busy) {
+        return progress;
+    }
+
+    /* wait until next event */
+    while (count > 0) {
+        int timeout = blocking ? INFINITE : 0;
+        int ret = WaitForMultipleObjects(count, events, FALSE, timeout);
+
+        /* if we have any signaled events, dispatch event */
+        if ((DWORD) (ret - WAIT_OBJECT_0) >= count) {
+            break;
+        }
+
+        blocking = false;
+
+        /* we have to walk very carefully in case
+         * qemu_aio_set_fd_handler is called while we're walking */
+        node = QLIST_FIRST(&ctx->aio_handlers);
+        while (node) {
+            AioHandler *tmp;
+
+            ctx->walking_handlers++;
+
+            if (!node->deleted &&
+                event_notifier_get_handle(node->e) == events[ret - WAIT_OBJECT_0] &&
+                node->io_notify) {
+                node->io_notify(node->e);
+                progress = true;
+            }
+
+            tmp = node;
+            node = QLIST_NEXT(node, node);
+
+            ctx->walking_handlers--;
+
+            if (!ctx->walking_handlers && tmp->deleted) {
+                QLIST_REMOVE(tmp, node);
+                g_free(tmp);
+            }
+        }
+
+        /* Try again, but only call each handler once.  */
+        events[ret - WAIT_OBJECT_0] = events[--count];
+    }
+
+    assert(progress || busy);
+    return true;
+}
--- a/aio.c
+++ b/aio.c
@@ -1,194 +0,0 @@
-/*
- * QEMU aio implementation
- *
- * Copyright IBM, Corp. 2008
- *
- * Authors:
- *  Anthony Liguori   <aliguori@us.ibm.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2.  See
- * the COPYING file in the top-level directory.
- *
- * Contributions after 2012-01-13 are licensed under the terms of the
- * GNU GPL, version 2 or (at your option) any later version.
- */
-
-#include "qemu-common.h"
-#include "block.h"
-#include "qemu-queue.h"
-#include "qemu_socket.h"
-
-typedef struct AioHandler AioHandler;
-
-/* The list of registered AIO handlers */
-static QLIST_HEAD(, AioHandler) aio_handlers;
-
-/* This is a simple lock used to protect the aio_handlers list.  Specifically,
- * it's used to ensure that no callbacks are removed while we're walking and
- * dispatching callbacks.
- */
-static int walking_handlers;
-
-struct AioHandler
-{
-    int fd;
-    IOHandler *io_read;
-    IOHandler *io_write;
-    AioFlushHandler *io_flush;
-    int deleted;
-    void *opaque;
-    QLIST_ENTRY(AioHandler) node;
-};
-
-static AioHandler *find_aio_handler(int fd)
-{
-    AioHandler *node;
-
-    QLIST_FOREACH(node, &aio_handlers, node) {
-        if (node->fd == fd)
-            if (!node->deleted)
-                return node;
-    }
-
-    return NULL;
-}
-
-int qemu_aio_set_fd_handler(int fd,
-                            IOHandler *io_read,
-                            IOHandler *io_write,
-                            AioFlushHandler *io_flush,
-                            void *opaque)
-{
-    AioHandler *node;
-
-    node = find_aio_handler(fd);
-
-    /* Are we deleting the fd handler? */
-    if (!io_read && !io_write) {
-        if (node) {
-            /* If the lock is held, just mark the node as deleted */
-            if (walking_handlers)
-                node->deleted = 1;
-            else {
-                /* Otherwise, delete it for real.  We can't just mark it as
-                 * deleted because deleted nodes are only cleaned up after
-                 * releasing the walking_handlers lock.
-                 */
-                QLIST_REMOVE(node, node);
-                g_free(node);
-            }
-        }
-    } else {
-        if (node == NULL) {
-            /* Alloc and insert if it's not already there */
-            node = g_malloc0(sizeof(AioHandler));
-            node->fd = fd;
-            QLIST_INSERT_HEAD(&aio_handlers, node, node);
-        }
-        /* Update handler with latest information */
-        node->io_read = io_read;
-        node->io_write = io_write;
-        node->io_flush = io_flush;
-        node->opaque = opaque;
-    }
-
-    qemu_set_fd_handler2(fd, NULL, io_read, io_write, opaque);
-
-    return 0;
-}
-
-void qemu_aio_flush(void)
-{
-    while (qemu_aio_wait());
-}
-
-bool qemu_aio_wait(void)
-{
-    AioHandler *node;
-    fd_set rdfds, wrfds;
-    int max_fd = -1;
-    int ret;
-    bool busy;
-
-    /*
-     * If there are callbacks left that have been queued, we need to call then.
-     * Do not call select in this case, because it is possible that the caller
-     * does not need a complete flush (as is the case for qemu_aio_wait loops).
-     */
-    if (qemu_bh_poll()) {
-        return true;
-    }
-
-    walking_handlers = 1;
-
-    FD_ZERO(&rdfds);
-    FD_ZERO(&wrfds);
-
-    /* fill fd sets */
-    busy = false;
-    QLIST_FOREACH(node, &aio_handlers, node) {
-        /* If there aren't pending AIO operations, don't invoke callbacks.
-         * Otherwise, if there are no AIO requests, qemu_aio_wait() would
-         * wait indefinitely.
-         */
-        if (node->io_flush) {
-            if (node->io_flush(node->opaque) == 0) {
-                continue;
-            }
-            busy = true;
-        }
-        if (!node->deleted && node->io_read) {
-            FD_SET(node->fd, &rdfds);
-            max_fd = MAX(max_fd, node->fd + 1);
-        }
-        if (!node->deleted && node->io_write) {
-            FD_SET(node->fd, &wrfds);
-            max_fd = MAX(max_fd, node->fd + 1);
-        }
-    }
-
-    walking_handlers = 0;
-
-    /* No AIO operations?  Get us out of here */
-    if (!busy) {
-        return false;
-    }
-
-    /* wait until next event */
-    ret = select(max_fd, &rdfds, &wrfds, NULL, NULL);
-
-    /* if we have any readable fds, dispatch event */
-    if (ret > 0) {
-        walking_handlers = 1;
-
-        /* we have to walk very carefully in case
-         * qemu_aio_set_fd_handler is called while we're walking */
-        node = QLIST_FIRST(&aio_handlers);
-        while (node) {
-            AioHandler *tmp;
-
-            if (!node->deleted &&
-                FD_ISSET(node->fd, &rdfds) &&
-                node->io_read) {
-                node->io_read(node->opaque);
-            }
-            if (!node->deleted &&
-                FD_ISSET(node->fd, &wrfds) &&
-                node->io_write) {
-                node->io_write(node->opaque);
-            }
-
-            tmp = node;
-            node = QLIST_NEXT(node, node);
-
-            if (tmp->deleted) {
-                QLIST_REMOVE(tmp, node);
-                g_free(tmp);
-            }
-        }
-
-        walking_handlers = 0;
-    }
-
-    return true;
-}
--- a/arch_init.c
+++ b/arch_init.c
@@ -31,6 +31,8 @@
 #include "config.h"
 #include "monitor.h"
 #include "sysemu.h"
+#include "bitops.h"
+#include "bitmap.h"
 #include "arch_init.h"
 #include "audio/audio.h"
 #include "hw/pc.h"
@@ -45,6 +47,7 @@
 #include "hw/pcspk.h"
 #include "qemu/page_cache.h"
 #include "qmp-commands.h"
+#include "trace.h"

 #ifdef DEBUG_ARCH_INIT
 #define DPRINTF(fmt, ...) \
@@ -136,7 +139,6 @@ static struct defconfig_file {
    /* Indicates it is an user config file (disabled by -no-user-config) */
    bool userconfig;
 } default_config_files[] = {
-    { CONFIG_QEMU_DATADIR "/cpus-" TARGET_ARCH ".conf",  false },
    { CONFIG_QEMU_CONFDIR "/qemu.conf",                   true },
    { CONFIG_QEMU_CONFDIR "/target-" TARGET_ARCH ".conf", true },
    { NULL }, /* end of list */
@@ -331,6 +333,78 @@ static int save_xbzrle_page(QEMUFile *f, uint8_t *current_data,

 static RAMBlock *last_block;
 static ram_addr_t last_offset;
+static unsigned long *migration_bitmap;
+static uint64_t migration_dirty_pages;
+
+static inline bool migration_bitmap_test_and_reset_dirty(MemoryRegion *mr,
+                                                         ram_addr_t offset)
+{
+    bool ret;
+    int nr = (mr->ram_addr + offset) >> TARGET_PAGE_BITS;
+
+    ret = test_and_clear_bit(nr, migration_bitmap);
+
+    if (ret) {
+        migration_dirty_pages--;
+    }
+    return ret;
+}
+
+static inline bool migration_bitmap_set_dirty(MemoryRegion *mr,
+                                              ram_addr_t offset)
+{
+    bool ret;
+    int nr = (mr->ram_addr + offset) >> TARGET_PAGE_BITS;
+
+    ret = test_and_set_bit(nr, migration_bitmap);
+
+    if (!ret) {
+        migration_dirty_pages++;
+    }
+    return ret;
+}
+
+static void migration_bitmap_sync(void)
+{
+    RAMBlock *block;
+    ram_addr_t addr;
+    uint64_t num_dirty_pages_init = migration_dirty_pages;
+    MigrationState *s = migrate_get_current();
+    static int64_t start_time;
+    static int64_t num_dirty_pages_period;
+    int64_t end_time;
+
+    if (!start_time) {
+        start_time = qemu_get_clock_ms(rt_clock);
+    }
+
+    trace_migration_bitmap_sync_start();
+    memory_global_sync_dirty_bitmap(get_system_memory());
+
+    QLIST_FOREACH(block, &ram_list.blocks, next) {
+        for (addr = 0; addr < block->length; addr += TARGET_PAGE_SIZE) {
+            if (memory_region_get_dirty(block->mr, addr, TARGET_PAGE_SIZE,
+                                        DIRTY_MEMORY_MIGRATION)) {
+                migration_bitmap_set_dirty(block->mr, addr);
+            }
+        }
+        memory_region_reset_dirty(block->mr, 0, block->length,
+                                  DIRTY_MEMORY_MIGRATION);
+    }
+    trace_migration_bitmap_sync_end(migration_dirty_pages
+                                    - num_dirty_pages_init);
+    num_dirty_pages_period += migration_dirty_pages - num_dirty_pages_init;
+    end_time = qemu_get_clock_ms(rt_clock);
+
+    /* more than 1 second = 1000 millisecons */
+    if (end_time > start_time + 1000) {
+        s->dirty_pages_rate = num_dirty_pages_period * 1000
+            / (end_time - start_time);
+        start_time = end_time;
+        num_dirty_pages_period = 0;
+    }
+}
+

 /*
 * ram_save_block: Writes a page of memory to the stream f
@@ -353,14 +427,10 @@ static int ram_save_block(QEMUFile *f, bool last_stage)

    do {
        mr = block->mr;
-        if (memory_region_get_dirty(mr, offset, TARGET_PAGE_SIZE,
-                                    DIRTY_MEMORY_MIGRATION)) {
+        if (migration_bitmap_test_and_reset_dirty(mr, offset)) {
            uint8_t *p;
            int cont = (block == last_block) ? RAM_SAVE_FLAG_CONTINUE : 0;

-            memory_region_reset_dirty(mr, offset, TARGET_PAGE_SIZE,
-                                      DIRTY_MEMORY_MIGRATION);
-
            p = memory_region_get_ram_ptr(mr) + offset;

            if (is_dup_page(p)) {
@@ -410,7 +480,7 @@ static uint64_t bytes_transferred;

 static ram_addr_t ram_save_remaining(void)
 {
-    return ram_list.dirty_pages;
+    return migration_dirty_pages;
 }

 uint64_t ram_bytes_remaining(void)
@@ -465,9 +535,13 @@ static void sort_ram_list(void)

 static void migration_end(void)
 {
-    memory_global_dirty_log_stop();
+    if (migration_bitmap) {
+        memory_global_dirty_log_stop();
+        g_free(migration_bitmap);
+        migration_bitmap = NULL;
+    }

-    if (migrate_use_xbzrle()) {
+    if (XBZRLE.cache) {
        cache_fini(XBZRLE.cache);
        g_free(XBZRLE.cache);
        g_free(XBZRLE.encoded_buf);
@@ -482,17 +556,27 @@ static void ram_migration_cancel(void *opaque)
    migration_end();
 }

+
+static void reset_ram_globals(void)
+{
+    last_block = NULL;
+    last_offset = 0;
+    sort_ram_list();
+}
+
 #define MAX_WAIT 50 /* ms, half buffered_file limit */

 static int ram_save_setup(QEMUFile *f, void *opaque)
 {
-    ram_addr_t addr;
    RAMBlock *block;
+    int64_t ram_pages = last_ram_offset() >> TARGET_PAGE_BITS;
+
+    migration_bitmap = bitmap_new(ram_pages);
+    bitmap_set(migration_bitmap, 0, ram_pages);
+    migration_dirty_pages = ram_pages;

    bytes_transferred = 0;
-    last_block = NULL;
-    last_offset = 0;
-    sort_ram_list();
+    reset_ram_globals();

    if (migrate_use_xbzrle()) {
        XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() /
@@ -507,17 +591,8 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
        acct_clear();
    }

-    /* Make sure all dirty bits are set */
-    QLIST_FOREACH(block, &ram_list.blocks, next) {
-        for (addr = 0; addr < block->length; addr += TARGET_PAGE_SIZE) {
-            if (!memory_region_get_dirty(block->mr, addr, TARGET_PAGE_SIZE,
-                                         DIRTY_MEMORY_MIGRATION)) {
-                memory_region_set_dirty(block->mr, addr, TARGET_PAGE_SIZE);
-            }
-        }
-    }
-
    memory_global_dirty_log_start();
+    migration_bitmap_sync();

    qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE);

@@ -538,7 +613,8 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
    double bwidth = 0;
    int ret;
    int i;
-    uint64_t expected_time;
+    uint64_t expected_downtime;
+    MigrationState *s = migrate_get_current();

    bytes_transferred_last = bytes_transferred;
    bwidth = qemu_get_clock_ns(rt_clock);
@@ -562,7 +638,7 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
        if ((i & 63) == 0) {
            uint64_t t1 = (qemu_get_clock_ns(rt_clock) - bwidth) / 1000000;
            if (t1 > MAX_WAIT) {
-                DPRINTF("big wait: " PRIu64 " milliseconds, %d iterations\n",
+                DPRINTF("big wait: %" PRIu64 " milliseconds, %d iterations\n",
                        t1, i);
                break;
            }
@@ -577,31 +653,32 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
    bwidth = qemu_get_clock_ns(rt_clock) - bwidth;
    bwidth = (bytes_transferred - bytes_transferred_last) / bwidth;

-    /* if we haven't transferred anything this round, force expected_time to a
-     * a very high value, but without crashing */
+    /* if we haven't transferred anything this round, force
+     * expected_downtime to a very high value, but without
+     * crashing */
    if (bwidth == 0) {
        bwidth = 0.000001;
    }

    qemu_put_be64(f, RAM_SAVE_FLAG_EOS);

-    expected_time = ram_save_remaining() * TARGET_PAGE_SIZE / bwidth;
+    expected_downtime = ram_save_remaining() * TARGET_PAGE_SIZE / bwidth;
+    DPRINTF("ram_save_live: expected(%" PRIu64 ") <= max(" PRIu64 ")?\n",
+            expected_downtime, migrate_max_downtime());

-    DPRINTF("ram_save_live: expected(" PRIu64 ") <= max(" PRIu64 ")?\n",
-            expected_time, migrate_max_downtime());
+    if (expected_downtime <= migrate_max_downtime()) {
+        migration_bitmap_sync();
+        expected_downtime = ram_save_remaining() * TARGET_PAGE_SIZE / bwidth;
+        s->expected_downtime = expected_downtime / 1000000; /* ns -> ms */

-    if (expected_time <= migrate_max_downtime()) {
-        memory_global_sync_dirty_bitmap(get_system_memory());
-        expected_time = ram_save_remaining() * TARGET_PAGE_SIZE / bwidth;
-
-        return expected_time <= migrate_max_downtime();
+        return expected_downtime <= migrate_max_downtime();
    }
    return 0;
 }

 static int ram_save_complete(QEMUFile *f, void *opaque)
 {
-    memory_global_sync_dirty_bitmap(get_system_memory());
+    migration_bitmap_sync();

    /* try transferring iterative blocks of memory */

@@ -616,7 +693,7 @@ static int ram_save_complete(QEMUFile *f, void *opaque)
        }
        bytes_transferred += bytes_sent;
    }
-    memory_global_dirty_log_stop();
+    migration_end();

    qemu_put_be64(f, RAM_SAVE_FLAG_EOS);

@@ -764,7 +841,8 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
            memset(host, ch, TARGET_PAGE_SIZE);
 #ifndef _WIN32
            if (ch == 0 &&
-                (!kvm_enabled() || kvm_has_sync_mmu())) {
+                (!kvm_enabled() || kvm_has_sync_mmu()) &&
+                getpagesize() <= TARGET_PAGE_SIZE) {
                qemu_madvise(host, TARGET_PAGE_SIZE, QEMU_MADV_DONTNEED);
            }
 #endif
@@ -799,8 +877,8 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
    } while (!(flags & RAM_SAVE_FLAG_EOS));

 done:
-    DPRINTF("Completed load of VM with exit code %d seq iteration " PRIu64 "\n",
-            ret, seq_iter);
+    DPRINTF("Completed load of VM with exit code %d seq iteration "
+            "%" PRIu64 "\n", ret, seq_iter);
    return ret;
 }

@@ -922,11 +1000,16 @@ void select_soundhw(const char *optarg)
    if (is_help_option(optarg)) {
    show_valid_cards:

+#ifdef HAS_AUDIO_CHOICE
        printf("Valid sound card names (comma separated):\n");
        for (c = soundhw; c->name; ++c) {
            printf ("%-11s %s\n", c->name, c->descr);
        }
        printf("\n-soundhw all will enable all of the above\n");
+#else
+        printf("Machine has no user-selectable audio hardware "
+               "(it may or may not have always-present audio hardware).\n");
+#endif
        exit(!is_help_option(optarg));
    }
    else {
--- a/arch_init.h
+++ b/arch_init.h
@@ -34,6 +34,6 @@ int tcg_available(void);
 int kvm_available(void);
 int xen_available(void);

-CpuDefinitionInfoList GCC_WEAK_DECL *arch_query_cpu_definitions(Error **errp);
+CpuDefinitionInfoList *arch_query_cpu_definitions(Error **errp);

 #endif
--- a/async.c
+++ b/async.c
@@ -26,13 +26,11 @@
 #include "qemu-aio.h"
 #include "main-loop.h"

-/* Anchor of the list of Bottom Halves belonging to the context */
-static struct QEMUBH *first_bh;
-
 /***********************************************************/
 /* bottom halves (can be seen as timers which expire ASAP) */

 struct QEMUBH {
+    AioContext *ctx;
    QEMUBHFunc *cb;
    void *opaque;
    QEMUBH *next;
@@ -41,27 +39,27 @@ struct QEMUBH {
    bool deleted;
 };

-QEMUBH *qemu_bh_new(QEMUBHFunc *cb, void *opaque)
+QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque)
 {
    QEMUBH *bh;
    bh = g_malloc0(sizeof(QEMUBH));
+    bh->ctx = ctx;
    bh->cb = cb;
    bh->opaque = opaque;
-    bh->next = first_bh;
-    first_bh = bh;
+    bh->next = ctx->first_bh;
+    ctx->first_bh = bh;
    return bh;
 }

-int qemu_bh_poll(void)
+int aio_bh_poll(AioContext *ctx)
 {
    QEMUBH *bh, **bhp, *next;
    int ret;
-    static int nesting = 0;

-    nesting++;
+    ctx->walking_bh++;

    ret = 0;
-    for (bh = first_bh; bh; bh = next) {
+    for (bh = ctx->first_bh; bh; bh = next) {
        next = bh->next;
        if (!bh->deleted && bh->scheduled) {
            bh->scheduled = 0;
@@ -72,11 +70,11 @@ int qemu_bh_poll(void)
        }
    }

-    nesting--;
+    ctx->walking_bh--;

    /* remove deleted bhs */
-    if (!nesting) {
-        bhp = &first_bh;
+    if (!ctx->walking_bh) {
+        bhp = &ctx->first_bh;
        while (*bhp) {
            bh = *bhp;
            if (bh->deleted) {
@@ -105,8 +103,7 @@ void qemu_bh_schedule(QEMUBH *bh)
        return;
    bh->scheduled = 1;
    bh->idle = 0;
-    /* stop the currently executing CPU to execute the BH ASAP */
-    qemu_notify_event();
+    aio_notify(bh->ctx);
 }

 void qemu_bh_cancel(QEMUBH *bh)
@@ -120,23 +117,106 @@ void qemu_bh_delete(QEMUBH *bh)
    bh->deleted = 1;
 }

-void qemu_bh_update_timeout(uint32_t *timeout)
+static gboolean
+aio_ctx_prepare(GSource *source, gint    *timeout)
 {
+    AioContext *ctx = (AioContext *) source;
    QEMUBH *bh;

-    for (bh = first_bh; bh; bh = bh->next) {
+    for (bh = ctx->first_bh; bh; bh = bh->next) {
        if (!bh->deleted && bh->scheduled) {
            if (bh->idle) {
                /* idle bottom halves will be polled at least
                 * every 10ms */
-                *timeout = MIN(10, *timeout);
+                *timeout = 10;
            } else {
                /* non-idle bottom halves will be executed
                 * immediately */
                *timeout = 0;
-                break;
+                return true;
            }
        }
    }
+
+    return false;
 }

+static gboolean
+aio_ctx_check(GSource *source)
+{
+    AioContext *ctx = (AioContext *) source;
+    QEMUBH *bh;
+
+    for (bh = ctx->first_bh; bh; bh = bh->next) {
+        if (!bh->deleted && bh->scheduled) {
+            return true;
+	}
+    }
+    return aio_pending(ctx);
+}
+
+static gboolean
+aio_ctx_dispatch(GSource     *source,
+                 GSourceFunc  callback,
+                 gpointer     user_data)
+{
+    AioContext *ctx = (AioContext *) source;
+
+    assert(callback == NULL);
+    aio_poll(ctx, false);
+    return true;
+}
+
+static void
+aio_ctx_finalize(GSource     *source)
+{
+    AioContext *ctx = (AioContext *) source;
+
+    aio_set_event_notifier(ctx, &ctx->notifier, NULL, NULL);
+    event_notifier_cleanup(&ctx->notifier);
+}
+
+static GSourceFuncs aio_source_funcs = {
+    aio_ctx_prepare,
+    aio_ctx_check,
+    aio_ctx_dispatch,
+    aio_ctx_finalize
+};
+
+GSource *aio_get_g_source(AioContext *ctx)
+{
+    g_source_ref(&ctx->source);
+    return &ctx->source;
+}
+
+void aio_notify(AioContext *ctx)
+{
+    event_notifier_set(&ctx->notifier);
+}
+
+AioContext *aio_context_new(void)
+{
+    AioContext *ctx;
+    ctx = (AioContext *) g_source_new(&aio_source_funcs, sizeof(AioContext));
+    event_notifier_init(&ctx->notifier, false);
+    aio_set_event_notifier(ctx, &ctx->notifier, 
+                           (EventNotifierHandler *)
+                           event_notifier_test_and_clear, NULL);
+
+    return ctx;
+}
+
+void aio_context_ref(AioContext *ctx)
+{
+    g_source_ref(&ctx->source);
+}
+
+void aio_context_unref(AioContext *ctx)
+{
+    g_source_unref(&ctx->source);
+}
+
+void aio_flush(AioContext *ctx)
+{
+    while (aio_poll(ctx, true));
+}
--- a/audio/audio_template.h
+++ b/audio/audio_template.h
@@ -410,15 +410,15 @@ SW *glue (AUD_open_, TYPE) (
    SW *old_sw = NULL;
 #endif

-    ldebug ("open %s, freq %d, nchannels %d, fmt %d\n",
-            name, as->freq, as->nchannels, as->fmt);
-
    if (audio_bug (AUDIO_FUNC, !card || !name || !callback_fn || !as)) {
        dolog ("card=%p name=%p callback_fn=%p as=%p\n",
               card, name, callback_fn, as);
        goto fail;
    }

+    ldebug ("open %s, freq %d, nchannels %d, fmt %d\n",
+            name, as->freq, as->nchannels, as->fmt);
+
    if (audio_bug (AUDIO_FUNC, audio_validate_settings (as))) {
        audio_print_settings (as);
        goto fail;
--- a/backends/Makefile.objs
+++ b/backends/Makefile.objs
@@ -0,0 +1,2 @@
+common-obj-y += rng.o rng-egd.o
+common-obj-$(CONFIG_POSIX) += rng-random.o
--- a/backends/rng-egd.c
+++ b/backends/rng-egd.c
@@ -0,0 +1,224 @@
+/*
+ * QEMU Random Number Generator Backend
+ *
+ * Copyright IBM, Corp. 2012
+ *
+ * Authors:
+ *  Anthony Liguori   <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/rng.h"
+#include "qemu-char.h"
+#include "qerror.h"
+#include "hw/qdev.h" /* just for DEFINE_PROP_CHR */
+
+#define TYPE_RNG_EGD "rng-egd"
+#define RNG_EGD(obj) OBJECT_CHECK(RngEgd, (obj), TYPE_RNG_EGD)
+
+typedef struct RngEgd
+{
+    RngBackend parent;
+
+    CharDriverState *chr;
+    char *chr_name;
+
+    GSList *requests;
+} RngEgd;
+
+typedef struct RngRequest
+{
+    EntropyReceiveFunc *receive_entropy;
+    uint8_t *data;
+    void *opaque;
+    size_t offset;
+    size_t size;
+} RngRequest;
+
+static void rng_egd_request_entropy(RngBackend *b, size_t size,
+                                    EntropyReceiveFunc *receive_entropy,
+                                    void *opaque)
+{
+    RngEgd *s = RNG_EGD(b);
+    RngRequest *req;
+
+    req = g_malloc(sizeof(*req));
+
+    req->offset = 0;
+    req->size = size;
+    req->receive_entropy = receive_entropy;
+    req->opaque = opaque;
+    req->data = g_malloc(req->size);
+
+    while (size > 0) {
+        uint8_t header[2];
+        uint8_t len = MIN(size, 255);
+
+        /* synchronous entropy request */
+        header[0] = 0x02;
+        header[1] = len;
+
+        qemu_chr_fe_write(s->chr, header, sizeof(header));
+
+        size -= len;
+    }
+
+    s->requests = g_slist_append(s->requests, req);
+}
+
+static void rng_egd_free_request(RngRequest *req)
+{
+    g_free(req->data);
+    g_free(req);
+}
+
+static int rng_egd_chr_can_read(void *opaque)
+{
+    RngEgd *s = RNG_EGD(opaque);
+    GSList *i;
+    int size = 0;
+
+    for (i = s->requests; i; i = i->next) {
+        RngRequest *req = i->data;
+        size += req->size - req->offset;
+    }
+
+    return size;
+}
+
+static void rng_egd_chr_read(void *opaque, const uint8_t *buf, int size)
+{
+    RngEgd *s = RNG_EGD(opaque);
+
+    while (size > 0 && s->requests) {
+        RngRequest *req = s->requests->data;
+        int len = MIN(size, req->size - req->offset);
+
+        memcpy(req->data + req->offset, buf, len);
+        req->offset += len;
+        size -= len;
+
+        if (req->offset == req->size) {
+            s->requests = g_slist_remove_link(s->requests, s->requests);
+
+            req->receive_entropy(req->opaque, req->data, req->size);
+
+            rng_egd_free_request(req);
+        }
+    }
+}
+
+static void rng_egd_free_requests(RngEgd *s)
+{
+    GSList *i;
+
+    for (i = s->requests; i; i = i->next) {
+        rng_egd_free_request(i->data);
+    }
+
+    g_slist_free(s->requests);
+    s->requests = NULL;
+}
+
+static void rng_egd_cancel_requests(RngBackend *b)
+{
+    RngEgd *s = RNG_EGD(b);
+
+    /* We simply delete the list of pending requests.  If there is data in the 
+     * queue waiting to be read, this is okay, because there will always be
+     * more data than we requested originally
+     */
+    rng_egd_free_requests(s);
+}
+
+static void rng_egd_opened(RngBackend *b, Error **errp)
+{
+    RngEgd *s = RNG_EGD(b);
+
+    if (s->chr_name == NULL) {
+        error_set(errp, QERR_INVALID_PARAMETER_VALUE,
+                  "chardev", "a valid character device");
+        return;
+    }
+
+    s->chr = qemu_chr_find(s->chr_name);
+    if (s->chr == NULL) {
+        error_set(errp, QERR_DEVICE_NOT_FOUND, s->chr_name);
+        return;
+    }
+
+    /* FIXME we should resubmit pending requests when the CDS reconnects. */
+    qemu_chr_add_handlers(s->chr, rng_egd_chr_can_read, rng_egd_chr_read,
+                          NULL, s);
+}
+
+static void rng_egd_set_chardev(Object *obj, const char *value, Error **errp)
+{
+    RngBackend *b = RNG_BACKEND(obj);
+    RngEgd *s = RNG_EGD(b);
+
+    if (b->opened) {
+        error_set(errp, QERR_PERMISSION_DENIED);
+    } else {
+        g_free(s->chr_name);
+        s->chr_name = g_strdup(value);
+    }
+}
+
+static char *rng_egd_get_chardev(Object *obj, Error **errp)
+{
+    RngEgd *s = RNG_EGD(obj);
+
+    if (s->chr && s->chr->label) {
+        return g_strdup(s->chr->label);
+    }
+
+    return NULL;
+}
+
+static void rng_egd_init(Object *obj)
+{
+    object_property_add_str(obj, "chardev",
+                            rng_egd_get_chardev, rng_egd_set_chardev,
+                            NULL);
+}
+
+static void rng_egd_finalize(Object *obj)
+{
+    RngEgd *s = RNG_EGD(obj);
+
+    if (s->chr) {
+        qemu_chr_add_handlers(s->chr, NULL, NULL, NULL, NULL);
+    }
+
+    g_free(s->chr_name);
+
+    rng_egd_free_requests(s);
+}
+
+static void rng_egd_class_init(ObjectClass *klass, void *data)
+{
+    RngBackendClass *rbc = RNG_BACKEND_CLASS(klass);
+
+    rbc->request_entropy = rng_egd_request_entropy;
+    rbc->cancel_requests = rng_egd_cancel_requests;
+    rbc->opened = rng_egd_opened;
+}
+
+static TypeInfo rng_egd_info = {
+    .name = TYPE_RNG_EGD,
+    .parent = TYPE_RNG_BACKEND,
+    .instance_size = sizeof(RngEgd),
+    .class_init = rng_egd_class_init,
+    .instance_init = rng_egd_init,
+    .instance_finalize = rng_egd_finalize,
+};
+
+static void register_types(void)
+{
+    type_register_static(&rng_egd_info);
+}
+
+type_init(register_types);
--- a/backends/rng-random.c
+++ b/backends/rng-random.c
@@ -0,0 +1,161 @@
+/*
+ * QEMU Random Number Generator Backend
+ *
+ * Copyright IBM, Corp. 2012
+ *
+ * Authors:
+ *  Anthony Liguori   <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/rng-random.h"
+#include "qemu/rng.h"
+#include "qerror.h"
+#include "main-loop.h"
+
+struct RndRandom
+{
+    RngBackend parent;
+
+    int fd;
+    char *filename;
+
+    EntropyReceiveFunc *receive_func;
+    void *opaque;
+    size_t size;
+};
+
+/**
+ * A simple and incomplete backend to request entropy from /dev/random.
+ *
+ * This backend exposes an additional "filename" property that can be used to
+ * set the filename to use to open the backend.
+ */
+
+static void entropy_available(void *opaque)
+{
+    RndRandom *s = RNG_RANDOM(opaque);
+    uint8_t buffer[s->size];
+    ssize_t len;
+
+    len = read(s->fd, buffer, s->size);
+    g_assert(len != -1);
+
+    s->receive_func(s->opaque, buffer, len);
+    s->receive_func = NULL;
+
+    qemu_set_fd_handler(s->fd, NULL, NULL, NULL);
+}
+
+static void rng_random_request_entropy(RngBackend *b, size_t size,
+                                        EntropyReceiveFunc *receive_entropy,
+                                        void *opaque)
+{
+    RndRandom *s = RNG_RANDOM(b);
+
+    if (s->receive_func) {
+        s->receive_func(s->opaque, NULL, 0);
+    }
+
+    s->receive_func = receive_entropy;
+    s->opaque = opaque;
+    s->size = size;
+
+    qemu_set_fd_handler(s->fd, entropy_available, NULL, s);
+}
+
+static void rng_random_opened(RngBackend *b, Error **errp)
+{
+    RndRandom *s = RNG_RANDOM(b);
+
+    if (s->filename == NULL) {
+        error_set(errp, QERR_INVALID_PARAMETER_VALUE,
+                  "filename", "a valid filename");
+    } else {
+        s->fd = open(s->filename, O_RDONLY | O_NONBLOCK);
+
+        if (s->fd == -1) {
+            error_set(errp, QERR_OPEN_FILE_FAILED, s->filename);
+        }
+    }
+}
+
+static char *rng_random_get_filename(Object *obj, Error **errp)
+{
+    RndRandom *s = RNG_RANDOM(obj);
+
+    if (s->filename) {
+        return g_strdup(s->filename);
+    }
+
+    return NULL;
+}
+
+static void rng_random_set_filename(Object *obj, const char *filename,
+                                 Error **errp)
+{
+    RngBackend *b = RNG_BACKEND(obj);
+    RndRandom *s = RNG_RANDOM(obj);
+
+    if (b->opened) {
+        error_set(errp, QERR_PERMISSION_DENIED);
+        return;
+    }
+
+    if (s->filename) {
+        g_free(s->filename);
+    }
+
+    s->filename = g_strdup(filename);
+}
+
+static void rng_random_init(Object *obj)
+{
+    RndRandom *s = RNG_RANDOM(obj);
+
+    object_property_add_str(obj, "filename",
+                            rng_random_get_filename,
+                            rng_random_set_filename,
+                            NULL);
+
+    s->filename = g_strdup("/dev/random");
+}
+
+static void rng_random_finalize(Object *obj)
+{
+    RndRandom *s = RNG_RANDOM(obj);
+
+    qemu_set_fd_handler(s->fd, NULL, NULL, NULL);
+
+    if (s->fd != -1) {
+        close(s->fd);
+    }
+
+    g_free(s->filename);
+}
+
+static void rng_random_class_init(ObjectClass *klass, void *data)
+{
+    RngBackendClass *rbc = RNG_BACKEND_CLASS(klass);
+
+    rbc->request_entropy = rng_random_request_entropy;
+    rbc->opened = rng_random_opened;
+}
+
+static TypeInfo rng_random_info = {
+    .name = TYPE_RNG_RANDOM,
+    .parent = TYPE_RNG_BACKEND,
+    .instance_size = sizeof(RndRandom),
+    .class_init = rng_random_class_init,
+    .instance_init = rng_random_init,
+    .instance_finalize = rng_random_finalize,
+};
+
+static void register_types(void)
+{
+    type_register_static(&rng_random_info);
+}
+
+type_init(register_types);
--- a/backends/rng.c
+++ b/backends/rng.c
@@ -0,0 +1,93 @@
+/*
+ * QEMU Random Number Generator Backend
+ *
+ * Copyright IBM, Corp. 2012
+ *
+ * Authors:
+ *  Anthony Liguori   <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/rng.h"
+#include "qerror.h"
+
+void rng_backend_request_entropy(RngBackend *s, size_t size,
+                                 EntropyReceiveFunc *receive_entropy,
+                                 void *opaque)
+{
+    RngBackendClass *k = RNG_BACKEND_GET_CLASS(s);
+
+    if (k->request_entropy) {
+        k->request_entropy(s, size, receive_entropy, opaque);
+    }
+}
+
+void rng_backend_cancel_requests(RngBackend *s)
+{
+    RngBackendClass *k = RNG_BACKEND_GET_CLASS(s);
+
+    if (k->cancel_requests) {
+        k->cancel_requests(s);
+    }
+}
+
+static bool rng_backend_prop_get_opened(Object *obj, Error **errp)
+{
+    RngBackend *s = RNG_BACKEND(obj);
+
+    return s->opened;
+}
+
+void rng_backend_open(RngBackend *s, Error **errp)
+{
+    object_property_set_bool(OBJECT(s), true, "opened", errp);
+}
+
+static void rng_backend_prop_set_opened(Object *obj, bool value, Error **errp)
+{
+    RngBackend *s = RNG_BACKEND(obj);
+    RngBackendClass *k = RNG_BACKEND_GET_CLASS(s);
+
+    if (value == s->opened) {
+        return;
+    }
+
+    if (!value && s->opened) {
+        error_set(errp, QERR_PERMISSION_DENIED);
+        return;
+    }
+
+    if (k->opened) {
+        k->opened(s, errp);
+    }
+
+    if (!error_is_set(errp)) {
+        s->opened = value;
+    }
+}
+
+static void rng_backend_init(Object *obj)
+{
+    object_property_add_bool(obj, "opened",
+                             rng_backend_prop_get_opened,
+                             rng_backend_prop_set_opened,
+                             NULL);
+}
+
+static TypeInfo rng_backend_info = {
+    .name = TYPE_RNG_BACKEND,
+    .parent = TYPE_OBJECT,
+    .instance_size = sizeof(RngBackend),
+    .instance_init = rng_backend_init,
+    .class_size = sizeof(RngBackendClass),
+    .abstract = true,
+};
+
+static void register_types(void)
+{
+    type_register_static(&rng_backend_info);
+}
+
+type_init(register_types);
--- a/block-migration.c
+++ b/block-migration.c
@@ -423,20 +423,23 @@ static int mig_save_device_dirty(QEMUFile *f, BlkMigDevState *bmds,

 error:
    DPRINTF("Error reading sector %" PRId64 "\n", sector);
-    qemu_file_set_error(f, ret);
    g_free(blk->buf);
    g_free(blk);
-    return 0;
+    return ret;
 }

+/* return value:
+ * 0: too much data for max_downtime
+ * 1: few enough data for max_downtime
+*/
 static int blk_mig_save_dirty_block(QEMUFile *f, int is_async)
 {
    BlkMigDevState *bmds;
-    int ret = 0;
+    int ret = 1;

    QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
-        if (mig_save_device_dirty(f, bmds, is_async) == 0) {
-            ret = 1;
+        ret = mig_save_device_dirty(f, bmds, is_async);
+        if (ret <= 0) {
            break;
        }
    }
@@ -444,9 +447,10 @@ static int blk_mig_save_dirty_block(QEMUFile *f, int is_async)
    return ret;
 }

-static void flush_blks(QEMUFile* f)
+static int flush_blks(QEMUFile *f)
 {
    BlkMigBlock *blk;
+    int ret = 0;

    DPRINTF("%s Enter submitted %d read_done %d transferred %d\n",
            __FUNCTION__, block_mig_state.submitted, block_mig_state.read_done,
@@ -457,7 +461,7 @@ static void flush_blks(QEMUFile* f)
            break;
        }
        if (blk->ret < 0) {
-            qemu_file_set_error(f, blk->ret);
+            ret = blk->ret;
            break;
        }
        blk_send(f, blk);
@@ -474,6 +478,7 @@ static void flush_blks(QEMUFile* f)
    DPRINTF("%s Exit submitted %d read_done %d transferred %d\n", __FUNCTION__,
            block_mig_state.submitted, block_mig_state.read_done,
            block_mig_state.transferred);
+    return ret;
 }

 static int64_t get_remaining_dirty(void)
@@ -519,6 +524,8 @@ static void blk_mig_cleanup(void)
    BlkMigDevState *bmds;
    BlkMigBlock *blk;

+    bdrv_drain_all();
+
    set_dirty_tracking(0);

    while ((bmds = QSIMPLEQ_FIRST(&block_mig_state.bmds_list)) != NULL) {
@@ -553,9 +560,7 @@ static int block_save_setup(QEMUFile *f, void *opaque)
    /* start track dirty blocks */
    set_dirty_tracking(1);

-    flush_blks(f);
-
-    ret = qemu_file_get_error(f);
+    ret = flush_blks(f);
    if (ret) {
        blk_mig_cleanup();
        return ret;
@@ -575,9 +580,7 @@ static int block_save_iterate(QEMUFile *f, void *opaque)
    DPRINTF("Enter save live iterate submitted %d transferred %d\n",
            block_mig_state.submitted, block_mig_state.transferred);

-    flush_blks(f);
-
-    ret = qemu_file_get_error(f);
+    ret = flush_blks(f);
    if (ret) {
        blk_mig_cleanup();
        return ret;
@@ -596,16 +599,19 @@ static int block_save_iterate(QEMUFile *f, void *opaque)
                block_mig_state.bulk_completed = 1;
            }
        } else {
-            if (blk_mig_save_dirty_block(f, 1) == 0) {
+            ret = blk_mig_save_dirty_block(f, 1);
+            if (ret != 0) {
                /* no more dirty blocks */
                break;
            }
        }
    }
+    if (ret) {
+        blk_mig_cleanup();
+        return ret;
+    }

-    flush_blks(f);
-
-    ret = qemu_file_get_error(f);
+    ret = flush_blks(f);
    if (ret) {
        blk_mig_cleanup();
        return ret;
@@ -623,9 +629,7 @@ static int block_save_complete(QEMUFile *f, void *opaque)
    DPRINTF("Enter save live complete submitted %d transferred %d\n",
            block_mig_state.submitted, block_mig_state.transferred);

-    flush_blks(f);
-
-    ret = qemu_file_get_error(f);
+    ret = flush_blks(f);
    if (ret) {
        blk_mig_cleanup();
        return ret;
@@ -637,18 +641,16 @@ static int block_save_complete(QEMUFile *f, void *opaque)
       all async read completed */
    assert(block_mig_state.submitted == 0);

-    while (blk_mig_save_dirty_block(f, 0) != 0) {
-        /* Do nothing */
-    }
+    do {
+        ret = blk_mig_save_dirty_block(f, 0);
+    } while (ret == 0);
+
    blk_mig_cleanup();
-
-    /* report completion */
-    qemu_put_be64(f, (100 << BDRV_SECTOR_BITS) | BLK_MIG_FLAG_PROGRESS);
-
-    ret = qemu_file_get_error(f);
    if (ret) {
        return ret;
    }
+    /* report completion */
+    qemu_put_be64(f, (100 << BDRV_SECTOR_BITS) | BLK_MIG_FLAG_PROGRESS);

    DPRINTF("Block migration completed\n");

--- a/block.c
+++ b/block.c
--- a/block.h
+++ b/block.h
@@ -6,9 +6,11 @@
 #include "qemu-option.h"
 #include "qemu-coroutine.h"
 #include "qobject.h"
+#include "qapi-types.h"

 /* block.c */
 typedef struct BlockDriver BlockDriver;
+typedef struct BlockJob BlockJob;

 typedef struct BlockDriverInfo {
    /* in bytes, 0 if irrelevant */
@@ -80,6 +82,7 @@ typedef struct BlockDevOps {
 #define BDRV_O_COPY_ON_READ 0x0400 /* copy read backing sectors into image */
 #define BDRV_O_INCOMING    0x0800  /* consistency hint for incoming migration */
 #define BDRV_O_CHECK       0x1000  /* open solely for consistency check */
+#define BDRV_O_ALLOW_RDWR  0x2000  /* allow reopen to change from r/o to r/w */

 #define BDRV_O_CACHE_MASK  (BDRV_O_NOCACHE | BDRV_O_CACHE_WB | BDRV_O_NO_FLUSH)

@@ -88,21 +91,23 @@ typedef struct BlockDevOps {
 #define BDRV_SECTOR_MASK   ~(BDRV_SECTOR_SIZE - 1)

 typedef enum {
-    BLOCK_ERR_REPORT, BLOCK_ERR_IGNORE, BLOCK_ERR_STOP_ENOSPC,
-    BLOCK_ERR_STOP_ANY
+    BDRV_ACTION_REPORT, BDRV_ACTION_IGNORE, BDRV_ACTION_STOP
 } BlockErrorAction;

-typedef enum {
-    BDRV_ACTION_REPORT, BDRV_ACTION_IGNORE, BDRV_ACTION_STOP
-} BlockQMPEventAction;
+typedef QSIMPLEQ_HEAD(BlockReopenQueue, BlockReopenQueueEntry) BlockReopenQueue;
+
+typedef struct BDRVReopenState {
+    BlockDriverState *bs;
+    int flags;
+    void *opaque;
+} BDRVReopenState;
+

 void bdrv_iostatus_enable(BlockDriverState *bs);
 void bdrv_iostatus_reset(BlockDriverState *bs);
 void bdrv_iostatus_disable(BlockDriverState *bs);
 bool bdrv_iostatus_is_enabled(const BlockDriverState *bs);
 void bdrv_iostatus_set_err(BlockDriverState *bs, int error);
-void bdrv_emit_qmp_error_event(const BlockDriverState *bdrv,
-                               BlockQMPEventAction action, int is_read);
 void bdrv_info_print(Monitor *mon, const QObject *data);
 void bdrv_info(Monitor *mon, QObject **ret_data);
 void bdrv_stats_print(Monitor *mon, const QObject *data);
@@ -128,9 +133,19 @@ void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top);
 void bdrv_delete(BlockDriverState *bs);
 int bdrv_parse_cache_flags(const char *mode, int *flags);
 int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags);
+int bdrv_open_backing_file(BlockDriverState *bs);
 int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
              BlockDriver *drv);
+BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
+                                    BlockDriverState *bs, int flags);
+int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp);
+int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp);
+int bdrv_reopen_prepare(BDRVReopenState *reopen_state,
+                        BlockReopenQueue *queue, Error **errp);
+void bdrv_reopen_commit(BDRVReopenState *reopen_state);
+void bdrv_reopen_abort(BDRVReopenState *reopen_state);
 void bdrv_close(BlockDriverState *bs);
+void bdrv_add_close_notifier(BlockDriverState *bs, Notifier *notify);
 int bdrv_attach_dev(BlockDriverState *bs, void *dev);
 void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev);
 void bdrv_detach_dev(BlockDriverState *bs, void *dev);
@@ -185,6 +200,11 @@ int bdrv_commit_all(void);
 int bdrv_change_backing_file(BlockDriverState *bs,
    const char *backing_file, const char *backing_fmt);
 void bdrv_register(BlockDriver *bdrv);
+int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
+                           BlockDriverState *base);
+BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
+                                    BlockDriverState *bs);
+BlockDriverState *bdrv_find_base(BlockDriverState *bs);


 typedef struct BdrvCheckResult {
@@ -259,9 +279,12 @@ int bdrv_has_zero_init(BlockDriverState *bs);
 int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
                      int *pnum);

-void bdrv_set_on_error(BlockDriverState *bs, BlockErrorAction on_read_error,
-                       BlockErrorAction on_write_error);
-BlockErrorAction bdrv_get_on_error(BlockDriverState *bs, int is_read);
+void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error,
+                       BlockdevOnError on_write_error);
+BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read);
+BlockErrorAction bdrv_get_error_action(BlockDriverState *bs, bool is_read, int error);
+void bdrv_error_action(BlockDriverState *bs, BlockErrorAction action,
+                       bool is_read, int error);
 int bdrv_is_read_only(BlockDriverState *bs);
 int bdrv_is_sg(BlockDriverState *bs);
 int bdrv_enable_write_cache(BlockDriverState *bs);
@@ -292,6 +315,8 @@ void bdrv_get_backing_filename(BlockDriverState *bs,
                               char *filename, int filename_size);
 void bdrv_get_full_backing_filename(BlockDriverState *bs,
                                    char *dest, size_t sz);
+BlockInfo *bdrv_query_info(BlockDriverState *s);
+BlockStats *bdrv_query_stats(const BlockDriverState *bs);
 int bdrv_can_snapshot(BlockDriverState *bs);
 int bdrv_is_snapshot(BlockDriverState *bs);
 BlockDriverState *bdrv_snapshots(void);
@@ -329,8 +354,9 @@ void *qemu_blockalign(BlockDriverState *bs, size_t size);

 void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable);
 int bdrv_get_dirty(BlockDriverState *bs, int64_t sector);
-void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
-                      int nr_sectors);
+void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors);
+void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors);
+int64_t bdrv_get_next_dirty(BlockDriverState *bs, int64_t sector);
 int64_t bdrv_get_dirty_count(BlockDriverState *bs);

 void bdrv_enable_copy_on_read(BlockDriverState *bs);
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -2,10 +2,19 @@ block-obj-y += raw.o cow.o qcow.o vdi.o vmdk.o cloop.o dmg.o bochs.o vpc.o vvfat
 block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-cache.o
 block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o
 block-obj-y += qed-check.o
-block-obj-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o
-block-obj-y += stream.o
-block-obj-$(CONFIG_WIN32) += raw-win32.o
+block-obj-y += parallels.o blkdebug.o blkverify.o
+block-obj-$(CONFIG_WIN32) += raw-win32.o win32-aio.o
 block-obj-$(CONFIG_POSIX) += raw-posix.o
+block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
+
+ifeq ($(CONFIG_POSIX),y)
+block-obj-y += nbd.o sheepdog.o
 block-obj-$(CONFIG_LIBISCSI) += iscsi.o
 block-obj-$(CONFIG_CURL) += curl.o
 block-obj-$(CONFIG_RBD) += rbd.o
+block-obj-$(CONFIG_GLUSTERFS) += gluster.o
+endif
+
+common-obj-y += stream.o
+common-obj-y += commit.o
+common-obj-y += mirror.o
--- a/block/blkdebug.c
+++ b/block/blkdebug.c
@@ -28,6 +28,7 @@

 typedef struct BDRVBlkdebugState {
    int state;
+    int new_state;
    QLIST_HEAD(, BlkdebugRule) rules[BLKDBG_EVENT_MAX];
    QSIMPLEQ_HEAD(, BlkdebugRule) active_rules;
 } BDRVBlkdebugState;
@@ -40,7 +41,7 @@ typedef struct BlkdebugAIOCB {

 static void blkdebug_aio_cancel(BlockDriverAIOCB *blockacb);

-static AIOPool blkdebug_aio_pool = {
+static const AIOCBInfo blkdebug_aiocb_info = {
    .aiocb_size = sizeof(BlkdebugAIOCB),
    .cancel     = blkdebug_aio_cancel,
 };
@@ -334,7 +335,7 @@ static BlockDriverAIOCB *inject_error(BlockDriverState *bs,
        return NULL;
    }

-    acb = qemu_aio_get(&blkdebug_aio_pool, bs, cb, opaque);
+    acb = qemu_aio_get(&blkdebug_aiocb_info, bs, cb, opaque);
    acb->ret = -error;

    bh = qemu_bh_new(error_callback_bh, acb);
@@ -403,12 +404,12 @@ static void blkdebug_close(BlockDriverState *bs)
 }

 static bool process_rule(BlockDriverState *bs, struct BlkdebugRule *rule,
-    int old_state, bool injected)
+    bool injected)
 {
    BDRVBlkdebugState *s = bs->opaque;

    /* Only process rules for the current state */
-    if (rule->state && rule->state != old_state) {
+    if (rule->state && rule->state != s->state) {
        return injected;
    }

@@ -423,7 +424,7 @@ static bool process_rule(BlockDriverState *bs, struct BlkdebugRule *rule,
        break;

    case ACTION_SET_STATE:
-        s->state = rule->options.set_state.new_state;
+        s->new_state = rule->options.set_state.new_state;
        break;
    }
    return injected;
@@ -433,15 +434,16 @@ static void blkdebug_debug_event(BlockDriverState *bs, BlkDebugEvent event)
 {
    BDRVBlkdebugState *s = bs->opaque;
    struct BlkdebugRule *rule;
-    int old_state = s->state;
    bool injected;

    assert((int)event >= 0 && event < BLKDBG_EVENT_MAX);

    injected = false;
+    s->new_state = s->state;
    QLIST_FOREACH(rule, &s->rules[event], next) {
-        injected = process_rule(bs, rule, old_state, injected);
+        injected = process_rule(bs, rule, injected);
    }
+    s->state = s->new_state;
 }

 static int64_t blkdebug_getlength(BlockDriverState *bs)
--- a/block/blkverify.c
+++ b/block/blkverify.c
@@ -48,7 +48,7 @@ static void blkverify_aio_cancel(BlockDriverAIOCB *blockacb)
    }
 }

-static AIOPool blkverify_aio_pool = {
+static const AIOCBInfo blkverify_aiocb_info = {
    .aiocb_size         = sizeof(BlkverifyAIOCB),
    .cancel             = blkverify_aio_cancel,
 };
@@ -233,7 +233,7 @@ static BlkverifyAIOCB *blkverify_aio_get(BlockDriverState *bs, bool is_write,
                                         BlockDriverCompletionFunc *cb,
                                         void *opaque)
 {
-    BlkverifyAIOCB *acb = qemu_aio_get(&blkverify_aio_pool, bs, cb, opaque);
+    BlkverifyAIOCB *acb = qemu_aio_get(&blkverify_aiocb_info, bs, cb, opaque);

    acb->bh = NULL;
    acb->is_write = is_write;
--- a/block/commit.c
+++ b/block/commit.c
@@ -0,0 +1,259 @@
+/*
+ * Live block commit
+ *
+ * Copyright Red Hat, Inc. 2012
+ *
+ * Authors:
+ *  Jeff Cody   <jcody@redhat.com>
+ *  Based on stream.c by Stefan Hajnoczi
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include "trace.h"
+#include "block_int.h"
+#include "blockjob.h"
+#include "qemu/ratelimit.h"
+
+enum {
+    /*
+     * Size of data buffer for populating the image file.  This should be large
+     * enough to process multiple clusters in a single call, so that populating
+     * contiguous regions of the image is efficient.
+     */
+    COMMIT_BUFFER_SIZE = 512 * 1024, /* in bytes */
+};
+
+#define SLICE_TIME 100000000ULL /* ns */
+
+typedef struct CommitBlockJob {
+    BlockJob common;
+    RateLimit limit;
+    BlockDriverState *active;
+    BlockDriverState *top;
+    BlockDriverState *base;
+    BlockdevOnError on_error;
+    int base_flags;
+    int orig_overlay_flags;
+} CommitBlockJob;
+
+static int coroutine_fn commit_populate(BlockDriverState *bs,
+                                        BlockDriverState *base,
+                                        int64_t sector_num, int nb_sectors,
+                                        void *buf)
+{
+    int ret = 0;
+
+    ret = bdrv_read(bs, sector_num, buf, nb_sectors);
+    if (ret) {
+        return ret;
+    }
+
+    ret = bdrv_write(base, sector_num, buf, nb_sectors);
+    if (ret) {
+        return ret;
+    }
+
+    return 0;
+}
+
+static void coroutine_fn commit_run(void *opaque)
+{
+    CommitBlockJob *s = opaque;
+    BlockDriverState *active = s->active;
+    BlockDriverState *top = s->top;
+    BlockDriverState *base = s->base;
+    BlockDriverState *overlay_bs = NULL;
+    int64_t sector_num, end;
+    int ret = 0;
+    int n = 0;
+    void *buf;
+    int bytes_written = 0;
+    int64_t base_len;
+
+    ret = s->common.len = bdrv_getlength(top);
+
+
+    if (s->common.len < 0) {
+        goto exit_restore_reopen;
+    }
+
+    ret = base_len = bdrv_getlength(base);
+    if (base_len < 0) {
+        goto exit_restore_reopen;
+    }
+
+    if (base_len < s->common.len) {
+        ret = bdrv_truncate(base, s->common.len);
+        if (ret) {
+            goto exit_restore_reopen;
+        }
+    }
+
+    overlay_bs = bdrv_find_overlay(active, top);
+
+    end = s->common.len >> BDRV_SECTOR_BITS;
+    buf = qemu_blockalign(top, COMMIT_BUFFER_SIZE);
+
+    for (sector_num = 0; sector_num < end; sector_num += n) {
+        uint64_t delay_ns = 0;
+        bool copy;
+
+wait:
+        /* Note that even when no rate limit is applied we need to yield
+         * with no pending I/O here so that qemu_aio_flush() returns.
+         */
+        block_job_sleep_ns(&s->common, rt_clock, delay_ns);
+        if (block_job_is_cancelled(&s->common)) {
+            break;
+        }
+        /* Copy if allocated above the base */
+        ret = bdrv_co_is_allocated_above(top, base, sector_num,
+                                         COMMIT_BUFFER_SIZE / BDRV_SECTOR_SIZE,
+                                         &n);
+        copy = (ret == 1);
+        trace_commit_one_iteration(s, sector_num, n, ret);
+        if (copy) {
+            if (s->common.speed) {
+                delay_ns = ratelimit_calculate_delay(&s->limit, n);
+                if (delay_ns > 0) {
+                    goto wait;
+                }
+            }
+            ret = commit_populate(top, base, sector_num, n, buf);
+            bytes_written += n * BDRV_SECTOR_SIZE;
+        }
+        if (ret < 0) {
+            if (s->on_error == BLOCKDEV_ON_ERROR_STOP ||
+                s->on_error == BLOCKDEV_ON_ERROR_REPORT||
+                (s->on_error == BLOCKDEV_ON_ERROR_ENOSPC && ret == -ENOSPC)) {
+                goto exit_free_buf;
+            } else {
+                n = 0;
+                continue;
+            }
+        }
+        /* Publish progress */
+        s->common.offset += n * BDRV_SECTOR_SIZE;
+    }
+
+    ret = 0;
+
+    if (!block_job_is_cancelled(&s->common) && sector_num == end) {
+        /* success */
+        ret = bdrv_drop_intermediate(active, top, base);
+    }
+
+exit_free_buf:
+    qemu_vfree(buf);
+
+exit_restore_reopen:
+    /* restore base open flags here if appropriate (e.g., change the base back
+     * to r/o). These reopens do not need to be atomic, since we won't abort
+     * even on failure here */
+    if (s->base_flags != bdrv_get_flags(base)) {
+        bdrv_reopen(base, s->base_flags, NULL);
+    }
+    if (s->orig_overlay_flags != bdrv_get_flags(overlay_bs)) {
+        bdrv_reopen(overlay_bs, s->orig_overlay_flags, NULL);
+    }
+
+    block_job_completed(&s->common, ret);
+}
+
+static void commit_set_speed(BlockJob *job, int64_t speed, Error **errp)
+{
+    CommitBlockJob *s = container_of(job, CommitBlockJob, common);
+
+    if (speed < 0) {
+        error_set(errp, QERR_INVALID_PARAMETER, "speed");
+        return;
+    }
+    ratelimit_set_speed(&s->limit, speed / BDRV_SECTOR_SIZE, SLICE_TIME);
+}
+
+static BlockJobType commit_job_type = {
+    .instance_size = sizeof(CommitBlockJob),
+    .job_type      = "commit",
+    .set_speed     = commit_set_speed,
+};
+
+void commit_start(BlockDriverState *bs, BlockDriverState *base,
+                  BlockDriverState *top, int64_t speed,
+                  BlockdevOnError on_error, BlockDriverCompletionFunc *cb,
+                  void *opaque, Error **errp)
+{
+    CommitBlockJob *s;
+    BlockReopenQueue *reopen_queue = NULL;
+    int orig_overlay_flags;
+    int orig_base_flags;
+    BlockDriverState *overlay_bs;
+    Error *local_err = NULL;
+
+    if ((on_error == BLOCKDEV_ON_ERROR_STOP ||
+         on_error == BLOCKDEV_ON_ERROR_ENOSPC) &&
+        !bdrv_iostatus_is_enabled(bs)) {
+        error_set(errp, QERR_INVALID_PARAMETER_COMBINATION);
+        return;
+    }
+
+    /* Once we support top == active layer, remove this check */
+    if (top == bs) {
+        error_setg(errp,
+                   "Top image as the active layer is currently unsupported");
+        return;
+    }
+
+    if (top == base) {
+        error_setg(errp, "Invalid files for merge: top and base are the same");
+        return;
+    }
+
+    overlay_bs = bdrv_find_overlay(bs, top);
+
+    if (overlay_bs == NULL) {
+        error_setg(errp, "Could not find overlay image for %s:", top->filename);
+        return;
+    }
+
+    orig_base_flags    = bdrv_get_flags(base);
+    orig_overlay_flags = bdrv_get_flags(overlay_bs);
+
+    /* convert base & overlay_bs to r/w, if necessary */
+    if (!(orig_base_flags & BDRV_O_RDWR)) {
+        reopen_queue = bdrv_reopen_queue(reopen_queue, base,
+                                         orig_base_flags | BDRV_O_RDWR);
+    }
+    if (!(orig_overlay_flags & BDRV_O_RDWR)) {
+        reopen_queue = bdrv_reopen_queue(reopen_queue, overlay_bs,
+                                         orig_overlay_flags | BDRV_O_RDWR);
+    }
+    if (reopen_queue) {
+        bdrv_reopen_multiple(reopen_queue, &local_err);
+        if (local_err != NULL) {
+            error_propagate(errp, local_err);
+            return;
+        }
+    }
+
+
+    s = block_job_create(&commit_job_type, bs, speed, cb, opaque, errp);
+    if (!s) {
+        return;
+    }
+
+    s->base   = base;
+    s->top    = top;
+    s->active = bs;
+
+    s->base_flags          = orig_base_flags;
+    s->orig_overlay_flags  = orig_overlay_flags;
+
+    s->on_error = on_error;
+    s->common.co = qemu_coroutine_create(commit_run);
+
+    trace_commit_start(bs, base, top, s, s->common.co, opaque);
+    qemu_coroutine_enter(s->common.co, s);
+}
--- a/block/curl.c
+++ b/block/curl.c
@@ -438,7 +438,7 @@ static void curl_aio_cancel(BlockDriverAIOCB *blockacb)
    // Do we have to implement canceling? Seems to work without...
 }

-static AIOPool curl_aio_pool = {
+static const AIOCBInfo curl_aiocb_info = {
    .aiocb_size         = sizeof(CURLAIOCB),
    .cancel             = curl_aio_cancel,
 };
@@ -505,7 +505,7 @@ static BlockDriverAIOCB *curl_aio_readv(BlockDriverState *bs,
 {
    CURLAIOCB *acb;

-    acb = qemu_aio_get(&curl_aio_pool, bs, cb, opaque);
+    acb = qemu_aio_get(&curl_aiocb_info, bs, cb, opaque);

    acb->qiov = qiov;
    acb->sector_num = sector_num;
@@ -542,8 +542,7 @@ static void curl_close(BlockDriverState *bs)
    }
    if (s->multi)
        curl_multi_cleanup(s->multi);
-    if (s->url)
-        free(s->url);
+    g_free(s->url);
 }

 static int64_t curl_getlength(BlockDriverState *bs)
--- a/block/gluster.c
+++ b/block/gluster.c
@@ -0,0 +1,624 @@
+/*
+ * GlusterFS backend for QEMU
+ *
+ * Copyright (C) 2012 Bharata B Rao <bharata@linux.vnet.ibm.com>
+ *
+ * Pipe handling mechanism in AIO implementation is derived from
+ * block/rbd.c. Hence,
+ *
+ * Copyright (C) 2010-2011 Christian Brunner <chb@muc.de>,
+ *                         Josh Durgin <josh.durgin@dreamhost.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ * Contributions after 2012-01-13 are licensed under the terms of the
+ * GNU GPL, version 2 or (at your option) any later version.
+ */
+#include <glusterfs/api/glfs.h>
+#include "block_int.h"
+#include "qemu_socket.h"
+#include "uri.h"
+
+typedef struct GlusterAIOCB {
+    BlockDriverAIOCB common;
+    int64_t size;
+    int ret;
+    bool *finished;
+    QEMUBH *bh;
+} GlusterAIOCB;
+
+typedef struct BDRVGlusterState {
+    struct glfs *glfs;
+    int fds[2];
+    struct glfs_fd *fd;
+    int qemu_aio_count;
+    int event_reader_pos;
+    GlusterAIOCB *event_acb;
+} BDRVGlusterState;
+
+#define GLUSTER_FD_READ  0
+#define GLUSTER_FD_WRITE 1
+
+typedef struct GlusterConf {
+    char *server;
+    int port;
+    char *volname;
+    char *image;
+    char *transport;
+} GlusterConf;
+
+static void qemu_gluster_gconf_free(GlusterConf *gconf)
+{
+    g_free(gconf->server);
+    g_free(gconf->volname);
+    g_free(gconf->image);
+    g_free(gconf->transport);
+    g_free(gconf);
+}
+
+static int parse_volume_options(GlusterConf *gconf, char *path)
+{
+    char *p, *q;
+
+    if (!path) {
+        return -EINVAL;
+    }
+
+    /* volume */
+    p = q = path + strspn(path, "/");
+    p += strcspn(p, "/");
+    if (*p == '\0') {
+        return -EINVAL;
+    }
+    gconf->volname = g_strndup(q, p - q);
+
+    /* image */
+    p += strspn(p, "/");
+    if (*p == '\0') {
+        return -EINVAL;
+    }
+    gconf->image = g_strdup(p);
+    return 0;
+}
+
+/*
+ * file=gluster[+transport]://[server[:port]]/volname/image[?socket=...]
+ *
+ * 'gluster' is the protocol.
+ *
+ * 'transport' specifies the transport type used to connect to gluster
+ * management daemon (glusterd). Valid transport types are
+ * tcp, unix and rdma. If a transport type isn't specified, then tcp
+ * type is assumed.
+ *
+ * 'server' specifies the server where the volume file specification for
+ * the given volume resides. This can be either hostname, ipv4 address
+ * or ipv6 address. ipv6 address needs to be within square brackets [ ].
+ * If transport type is 'unix', then 'server' field should not be specifed.
+ * The 'socket' field needs to be populated with the path to unix domain
+ * socket.
+ *
+ * 'port' is the port number on which glusterd is listening. This is optional
+ * and if not specified, QEMU will send 0 which will make gluster to use the
+ * default port. If the transport type is unix, then 'port' should not be
+ * specified.
+ *
+ * 'volname' is the name of the gluster volume which contains the VM image.
+ *
+ * 'image' is the path to the actual VM image that resides on gluster volume.
+ *
+ * Examples:
+ *
+ * file=gluster://1.2.3.4/testvol/a.img
+ * file=gluster+tcp://1.2.3.4/testvol/a.img
+ * file=gluster+tcp://1.2.3.4:24007/testvol/dir/a.img
+ * file=gluster+tcp://[1:2:3:4:5:6:7:8]/testvol/dir/a.img
+ * file=gluster+tcp://[1:2:3:4:5:6:7:8]:24007/testvol/dir/a.img
+ * file=gluster+tcp://server.domain.com:24007/testvol/dir/a.img
+ * file=gluster+unix:///testvol/dir/a.img?socket=/tmp/glusterd.socket
+ * file=gluster+rdma://1.2.3.4:24007/testvol/a.img
+ */
+static int qemu_gluster_parseuri(GlusterConf *gconf, const char *filename)
+{
+    URI *uri;
+    QueryParams *qp = NULL;
+    bool is_unix = false;
+    int ret = 0;
+
+    uri = uri_parse(filename);
+    if (!uri) {
+        return -EINVAL;
+    }
+
+    /* transport */
+    if (!strcmp(uri->scheme, "gluster")) {
+        gconf->transport = g_strdup("tcp");
+    } else if (!strcmp(uri->scheme, "gluster+tcp")) {
+        gconf->transport = g_strdup("tcp");
+    } else if (!strcmp(uri->scheme, "gluster+unix")) {
+        gconf->transport = g_strdup("unix");
+        is_unix = true;
+    } else if (!strcmp(uri->scheme, "gluster+rdma")) {
+        gconf->transport = g_strdup("rdma");
+    } else {
+        ret = -EINVAL;
+        goto out;
+    }
+
+    ret = parse_volume_options(gconf, uri->path);
+    if (ret < 0) {
+        goto out;
+    }
+
+    qp = query_params_parse(uri->query);
+    if (qp->n > 1 || (is_unix && !qp->n) || (!is_unix && qp->n)) {
+        ret = -EINVAL;
+        goto out;
+    }
+
+    if (is_unix) {
+        if (uri->server || uri->port) {
+            ret = -EINVAL;
+            goto out;
+        }
+        if (strcmp(qp->p[0].name, "socket")) {
+            ret = -EINVAL;
+            goto out;
+        }
+        gconf->server = g_strdup(qp->p[0].value);
+    } else {
+        gconf->server = g_strdup(uri->server);
+        gconf->port = uri->port;
+    }
+
+out:
+    if (qp) {
+        query_params_free(qp);
+    }
+    uri_free(uri);
+    return ret;
+}
+
+static struct glfs *qemu_gluster_init(GlusterConf *gconf, const char *filename)
+{
+    struct glfs *glfs = NULL;
+    int ret;
+    int old_errno;
+
+    ret = qemu_gluster_parseuri(gconf, filename);
+    if (ret < 0) {
+        error_report("Usage: file=gluster[+transport]://[server[:port]]/"
+            "volname/image[?socket=...]");
+        errno = -ret;
+        goto out;
+    }
+
+    glfs = glfs_new(gconf->volname);
+    if (!glfs) {
+        goto out;
+    }
+
+    ret = glfs_set_volfile_server(glfs, gconf->transport, gconf->server,
+            gconf->port);
+    if (ret < 0) {
+        goto out;
+    }
+
+    /*
+     * TODO: Use GF_LOG_ERROR instead of hard code value of 4 here when
+     * GlusterFS makes GF_LOG_* macros available to libgfapi users.
+     */
+    ret = glfs_set_logging(glfs, "-", 4);
+    if (ret < 0) {
+        goto out;
+    }
+
+    ret = glfs_init(glfs);
+    if (ret) {
+        error_report("Gluster connection failed for server=%s port=%d "
+             "volume=%s image=%s transport=%s\n", gconf->server, gconf->port,
+             gconf->volname, gconf->image, gconf->transport);
+        goto out;
+    }
+    return glfs;
+
+out:
+    if (glfs) {
+        old_errno = errno;
+        glfs_fini(glfs);
+        errno = old_errno;
+    }
+    return NULL;
+}
+
+static void qemu_gluster_complete_aio(GlusterAIOCB *acb, BDRVGlusterState *s)
+{
+    int ret;
+    bool *finished = acb->finished;
+    BlockDriverCompletionFunc *cb = acb->common.cb;
+    void *opaque = acb->common.opaque;
+
+    if (!acb->ret || acb->ret == acb->size) {
+        ret = 0; /* Success */
+    } else if (acb->ret < 0) {
+        ret = acb->ret; /* Read/Write failed */
+    } else {
+        ret = -EIO; /* Partial read/write - fail it */
+    }
+
+    s->qemu_aio_count--;
+    qemu_aio_release(acb);
+    cb(opaque, ret);
+    if (finished) {
+        *finished = true;
+    }
+}
+
+static void qemu_gluster_aio_event_reader(void *opaque)
+{
+    BDRVGlusterState *s = opaque;
+    ssize_t ret;
+
+    do {
+        char *p = (char *)&s->event_acb;
+
+        ret = read(s->fds[GLUSTER_FD_READ], p + s->event_reader_pos,
+                   sizeof(s->event_acb) - s->event_reader_pos);
+        if (ret > 0) {
+            s->event_reader_pos += ret;
+            if (s->event_reader_pos == sizeof(s->event_acb)) {
+                s->event_reader_pos = 0;
+                qemu_gluster_complete_aio(s->event_acb, s);
+            }
+        }
+    } while (ret < 0 && errno == EINTR);
+}
+
+static int qemu_gluster_aio_flush_cb(void *opaque)
+{
+    BDRVGlusterState *s = opaque;
+
+    return (s->qemu_aio_count > 0);
+}
+
+static int qemu_gluster_open(BlockDriverState *bs, const char *filename,
+    int bdrv_flags)
+{
+    BDRVGlusterState *s = bs->opaque;
+    int open_flags = O_BINARY;
+    int ret = 0;
+    GlusterConf *gconf = g_malloc0(sizeof(GlusterConf));
+
+    s->glfs = qemu_gluster_init(gconf, filename);
+    if (!s->glfs) {
+        ret = -errno;
+        goto out;
+    }
+
+    if (bdrv_flags & BDRV_O_RDWR) {
+        open_flags |= O_RDWR;
+    } else {
+        open_flags |= O_RDONLY;
+    }
+
+    if ((bdrv_flags & BDRV_O_NOCACHE)) {
+        open_flags |= O_DIRECT;
+    }
+
+    s->fd = glfs_open(s->glfs, gconf->image, open_flags);
+    if (!s->fd) {
+        ret = -errno;
+        goto out;
+    }
+
+    ret = qemu_pipe(s->fds);
+    if (ret < 0) {
+        ret = -errno;
+        goto out;
+    }
+    fcntl(s->fds[GLUSTER_FD_READ], F_SETFL, O_NONBLOCK);
+    qemu_aio_set_fd_handler(s->fds[GLUSTER_FD_READ],
+        qemu_gluster_aio_event_reader, NULL, qemu_gluster_aio_flush_cb, s);
+
+out:
+    qemu_gluster_gconf_free(gconf);
+    if (!ret) {
+        return ret;
+    }
+    if (s->fd) {
+        glfs_close(s->fd);
+    }
+    if (s->glfs) {
+        glfs_fini(s->glfs);
+    }
+    return ret;
+}
+
+static int qemu_gluster_create(const char *filename,
+        QEMUOptionParameter *options)
+{
+    struct glfs *glfs;
+    struct glfs_fd *fd;
+    int ret = 0;
+    int64_t total_size = 0;
+    GlusterConf *gconf = g_malloc0(sizeof(GlusterConf));
+
+    glfs = qemu_gluster_init(gconf, filename);
+    if (!glfs) {
+        ret = -errno;
+        goto out;
+    }
+
+    while (options && options->name) {
+        if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
+            total_size = options->value.n / BDRV_SECTOR_SIZE;
+        }
+        options++;
+    }
+
+    fd = glfs_creat(glfs, gconf->image,
+        O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, S_IRUSR | S_IWUSR);
+    if (!fd) {
+        ret = -errno;
+    } else {
+        if (glfs_ftruncate(fd, total_size * BDRV_SECTOR_SIZE) != 0) {
+            ret = -errno;
+        }
+        if (glfs_close(fd) != 0) {
+            ret = -errno;
+        }
+    }
+out:
+    qemu_gluster_gconf_free(gconf);
+    if (glfs) {
+        glfs_fini(glfs);
+    }
+    return ret;
+}
+
+static void qemu_gluster_aio_cancel(BlockDriverAIOCB *blockacb)
+{
+    GlusterAIOCB *acb = (GlusterAIOCB *)blockacb;
+    bool finished = false;
+
+    acb->finished = &finished;
+    while (!finished) {
+        qemu_aio_wait();
+    }
+}
+
+static const AIOCBInfo gluster_aiocb_info = {
+    .aiocb_size = sizeof(GlusterAIOCB),
+    .cancel = qemu_gluster_aio_cancel,
+};
+
+static void gluster_finish_aiocb(struct glfs_fd *fd, ssize_t ret, void *arg)
+{
+    GlusterAIOCB *acb = (GlusterAIOCB *)arg;
+    BlockDriverState *bs = acb->common.bs;
+    BDRVGlusterState *s = bs->opaque;
+    int retval;
+
+    acb->ret = ret;
+    retval = qemu_write_full(s->fds[GLUSTER_FD_WRITE], &acb, sizeof(acb));
+    if (retval != sizeof(acb)) {
+        /*
+         * Gluster AIO callback thread failed to notify the waiting
+         * QEMU thread about IO completion.
+         *
+         * Complete this IO request and make the disk inaccessible for
+         * subsequent reads and writes.
+         */
+        error_report("Gluster failed to notify QEMU about IO completion");
+
+        qemu_mutex_lock_iothread(); /* We are in gluster thread context */
+        acb->common.cb(acb->common.opaque, -EIO);
+        qemu_aio_release(acb);
+        s->qemu_aio_count--;
+        close(s->fds[GLUSTER_FD_READ]);
+        close(s->fds[GLUSTER_FD_WRITE]);
+        qemu_aio_set_fd_handler(s->fds[GLUSTER_FD_READ], NULL, NULL, NULL,
+            NULL);
+        bs->drv = NULL; /* Make the disk inaccessible */
+        qemu_mutex_unlock_iothread();
+    }
+}
+
+static BlockDriverAIOCB *qemu_gluster_aio_rw(BlockDriverState *bs,
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+        BlockDriverCompletionFunc *cb, void *opaque, int write)
+{
+    int ret;
+    GlusterAIOCB *acb;
+    BDRVGlusterState *s = bs->opaque;
+    size_t size;
+    off_t offset;
+
+    offset = sector_num * BDRV_SECTOR_SIZE;
+    size = nb_sectors * BDRV_SECTOR_SIZE;
+    s->qemu_aio_count++;
+
+    acb = qemu_aio_get(&gluster_aiocb_info, bs, cb, opaque);
+    acb->size = size;
+    acb->ret = 0;
+    acb->finished = NULL;
+
+    if (write) {
+        ret = glfs_pwritev_async(s->fd, qiov->iov, qiov->niov, offset, 0,
+            &gluster_finish_aiocb, acb);
+    } else {
+        ret = glfs_preadv_async(s->fd, qiov->iov, qiov->niov, offset, 0,
+            &gluster_finish_aiocb, acb);
+    }
+
+    if (ret < 0) {
+        goto out;
+    }
+    return &acb->common;
+
+out:
+    s->qemu_aio_count--;
+    qemu_aio_release(acb);
+    return NULL;
+}
+
+static BlockDriverAIOCB *qemu_gluster_aio_readv(BlockDriverState *bs,
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+        BlockDriverCompletionFunc *cb, void *opaque)
+{
+    return qemu_gluster_aio_rw(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
+}
+
+static BlockDriverAIOCB *qemu_gluster_aio_writev(BlockDriverState *bs,
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+        BlockDriverCompletionFunc *cb, void *opaque)
+{
+    return qemu_gluster_aio_rw(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
+}
+
+static BlockDriverAIOCB *qemu_gluster_aio_flush(BlockDriverState *bs,
+        BlockDriverCompletionFunc *cb, void *opaque)
+{
+    int ret;
+    GlusterAIOCB *acb;
+    BDRVGlusterState *s = bs->opaque;
+
+    acb = qemu_aio_get(&gluster_aiocb_info, bs, cb, opaque);
+    acb->size = 0;
+    acb->ret = 0;
+    acb->finished = NULL;
+    s->qemu_aio_count++;
+
+    ret = glfs_fsync_async(s->fd, &gluster_finish_aiocb, acb);
+    if (ret < 0) {
+        goto out;
+    }
+    return &acb->common;
+
+out:
+    s->qemu_aio_count--;
+    qemu_aio_release(acb);
+    return NULL;
+}
+
+static int64_t qemu_gluster_getlength(BlockDriverState *bs)
+{
+    BDRVGlusterState *s = bs->opaque;
+    int64_t ret;
+
+    ret = glfs_lseek(s->fd, 0, SEEK_END);
+    if (ret < 0) {
+        return -errno;
+    } else {
+        return ret;
+    }
+}
+
+static int64_t qemu_gluster_allocated_file_size(BlockDriverState *bs)
+{
+    BDRVGlusterState *s = bs->opaque;
+    struct stat st;
+    int ret;
+
+    ret = glfs_fstat(s->fd, &st);
+    if (ret < 0) {
+        return -errno;
+    } else {
+        return st.st_blocks * 512;
+    }
+}
+
+static void qemu_gluster_close(BlockDriverState *bs)
+{
+    BDRVGlusterState *s = bs->opaque;
+
+    close(s->fds[GLUSTER_FD_READ]);
+    close(s->fds[GLUSTER_FD_WRITE]);
+    qemu_aio_set_fd_handler(s->fds[GLUSTER_FD_READ], NULL, NULL, NULL, NULL);
+
+    if (s->fd) {
+        glfs_close(s->fd);
+        s->fd = NULL;
+    }
+    glfs_fini(s->glfs);
+}
+
+static QEMUOptionParameter qemu_gluster_create_options[] = {
+    {
+        .name = BLOCK_OPT_SIZE,
+        .type = OPT_SIZE,
+        .help = "Virtual disk size"
+    },
+    { NULL }
+};
+
+static BlockDriver bdrv_gluster = {
+    .format_name                  = "gluster",
+    .protocol_name                = "gluster",
+    .instance_size                = sizeof(BDRVGlusterState),
+    .bdrv_file_open               = qemu_gluster_open,
+    .bdrv_close                   = qemu_gluster_close,
+    .bdrv_create                  = qemu_gluster_create,
+    .bdrv_getlength               = qemu_gluster_getlength,
+    .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size,
+    .bdrv_aio_readv               = qemu_gluster_aio_readv,
+    .bdrv_aio_writev              = qemu_gluster_aio_writev,
+    .bdrv_aio_flush               = qemu_gluster_aio_flush,
+    .create_options               = qemu_gluster_create_options,
+};
+
+static BlockDriver bdrv_gluster_tcp = {
+    .format_name                  = "gluster",
+    .protocol_name                = "gluster+tcp",
+    .instance_size                = sizeof(BDRVGlusterState),
+    .bdrv_file_open               = qemu_gluster_open,
+    .bdrv_close                   = qemu_gluster_close,
+    .bdrv_create                  = qemu_gluster_create,
+    .bdrv_getlength               = qemu_gluster_getlength,
+    .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size,
+    .bdrv_aio_readv               = qemu_gluster_aio_readv,
+    .bdrv_aio_writev              = qemu_gluster_aio_writev,
+    .bdrv_aio_flush               = qemu_gluster_aio_flush,
+    .create_options               = qemu_gluster_create_options,
+};
+
+static BlockDriver bdrv_gluster_unix = {
+    .format_name                  = "gluster",
+    .protocol_name                = "gluster+unix",
+    .instance_size                = sizeof(BDRVGlusterState),
+    .bdrv_file_open               = qemu_gluster_open,
+    .bdrv_close                   = qemu_gluster_close,
+    .bdrv_create                  = qemu_gluster_create,
+    .bdrv_getlength               = qemu_gluster_getlength,
+    .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size,
+    .bdrv_aio_readv               = qemu_gluster_aio_readv,
+    .bdrv_aio_writev              = qemu_gluster_aio_writev,
+    .bdrv_aio_flush               = qemu_gluster_aio_flush,
+    .create_options               = qemu_gluster_create_options,
+};
+
+static BlockDriver bdrv_gluster_rdma = {
+    .format_name                  = "gluster",
+    .protocol_name                = "gluster+rdma",
+    .instance_size                = sizeof(BDRVGlusterState),
+    .bdrv_file_open               = qemu_gluster_open,
+    .bdrv_close                   = qemu_gluster_close,
+    .bdrv_create                  = qemu_gluster_create,
+    .bdrv_getlength               = qemu_gluster_getlength,
+    .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size,
+    .bdrv_aio_readv               = qemu_gluster_aio_readv,
+    .bdrv_aio_writev              = qemu_gluster_aio_writev,
+    .bdrv_aio_flush               = qemu_gluster_aio_flush,
+    .create_options               = qemu_gluster_create_options,
+};
+
+static void bdrv_gluster_init(void)
+{
+    bdrv_register(&bdrv_gluster_rdma);
+    bdrv_register(&bdrv_gluster_unix);
+    bdrv_register(&bdrv_gluster_tcp);
+    bdrv_register(&bdrv_gluster);
+}
+
+block_init(bdrv_gluster_init);
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -65,13 +65,6 @@ typedef struct IscsiAIOCB {
 #endif
 } IscsiAIOCB;

-struct IscsiTask {
-    IscsiLun *iscsilun;
-    BlockDriverState *bs;
-    int status;
-    int complete;
-};
-
 static void
 iscsi_bh_cb(void *p)
 {
@@ -133,7 +126,7 @@ iscsi_aio_cancel(BlockDriverAIOCB *blockacb)
    }
 }

-static AIOPool iscsi_aio_pool = {
+static const AIOCBInfo iscsi_aiocb_info = {
    .aiocb_size         = sizeof(IscsiAIOCB),
    .cancel             = iscsi_aio_cancel,
 };
@@ -167,12 +160,6 @@ iscsi_set_events(IscsiLun *iscsilun)

    }

-    /* If we just added an event, the callback might be delayed
-     * unless we call qemu_notify_event().
-     */
-    if (ev & ~iscsilun->events) {
-        qemu_notify_event();
-    }
    iscsilun->events = ev;
 }

@@ -240,7 +227,7 @@ iscsi_aio_writev(BlockDriverState *bs, int64_t sector_num,
    uint64_t lba;
    struct iscsi_data data;

-    acb = qemu_aio_get(&iscsi_aio_pool, bs, cb, opaque);
+    acb = qemu_aio_get(&iscsi_aiocb_info, bs, cb, opaque);
    trace_iscsi_aio_writev(iscsi, sector_num, nb_sectors, opaque, acb);

    acb->iscsilun = iscsilun;
@@ -268,10 +255,6 @@ iscsi_aio_writev(BlockDriverState *bs, int64_t sector_num,
    acb->task->xfer_dir = SCSI_XFER_WRITE;
    acb->task->cdb_size = 16;
    acb->task->cdb[0] = 0x8a;
-    if (!(bs->open_flags & BDRV_O_CACHE_WB)) {
-        /* set FUA on writes when cache mode is write through */
-        acb->task->cdb[1] |= 0x04;
-    }
    lba = sector_qemu2lun(sector_num, iscsilun);
    *(uint32_t *)&acb->task->cdb[2]  = htonl(lba >> 32);
    *(uint32_t *)&acb->task->cdb[6]  = htonl(lba & 0xffffffff);
@@ -335,7 +318,7 @@ iscsi_aio_readv(BlockDriverState *bs, int64_t sector_num,

    qemu_read_size = BDRV_SECTOR_SIZE * (size_t)nb_sectors;

-    acb = qemu_aio_get(&iscsi_aio_pool, bs, cb, opaque);
+    acb = qemu_aio_get(&iscsi_aiocb_info, bs, cb, opaque);
    trace_iscsi_aio_readv(iscsi, sector_num, nb_sectors, opaque, acb);

    acb->iscsilun = iscsilun;
@@ -390,7 +373,7 @@ iscsi_aio_readv(BlockDriverState *bs, int64_t sector_num,
        *(uint16_t *)&acb->task->cdb[7] = htons(num_sectors);
        break;
    }
-    
+
    if (iscsi_scsi_command_async(iscsi, iscsilun->lun, acb->task,
                                 iscsi_aio_read16_cb,
                                 NULL,
@@ -440,7 +423,7 @@ iscsi_aio_flush(BlockDriverState *bs,
    struct iscsi_context *iscsi = iscsilun->iscsi;
    IscsiAIOCB *acb;

-    acb = qemu_aio_get(&iscsi_aio_pool, bs, cb, opaque);
+    acb = qemu_aio_get(&iscsi_aiocb_info, bs, cb, opaque);

    acb->iscsilun = iscsilun;
    acb->canceled   = 0;
@@ -493,7 +476,7 @@ iscsi_aio_discard(BlockDriverState *bs,
    IscsiAIOCB *acb;
    struct unmap_list list[1];

-    acb = qemu_aio_get(&iscsi_aio_pool, bs, cb, opaque);
+    acb = qemu_aio_get(&iscsi_aiocb_info, bs, cb, opaque);

    acb->iscsilun = iscsilun;
    acb->canceled   = 0;
@@ -568,7 +551,7 @@ static BlockDriverAIOCB *iscsi_aio_ioctl(BlockDriverState *bs,

    assert(req == SG_IO);

-    acb = qemu_aio_get(&iscsi_aio_pool, bs, cb, opaque);
+    acb = qemu_aio_get(&iscsi_aiocb_info, bs, cb, opaque);

    acb->iscsilun = iscsilun;
    acb->canceled    = 0;
@@ -628,9 +611,17 @@ static BlockDriverAIOCB *iscsi_aio_ioctl(BlockDriverState *bs,
    return &acb->common;
 }

+
+static void ioctl_cb(void *opaque, int status)
+{
+    int *p_status = opaque;
+    *p_status = status;
+}
+
 static int iscsi_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
 {
    IscsiLun *iscsilun = bs->opaque;
+    int status;

    switch (req) {
    case SG_GET_VERSION_NUM:
@@ -639,6 +630,15 @@ static int iscsi_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
    case SG_GET_SCSI_ID:
        ((struct sg_scsi_id *)buf)->scsi_type = iscsilun->type;
        break;
+    case SG_IO:
+        status = -EINPROGRESS;
+        iscsi_aio_ioctl(bs, req, buf, ioctl_cb, &status);
+
+        while (status == -EINPROGRESS) {
+            qemu_aio_wait();
+        }
+
+        return 0;
    default:
        return -1;
    }
@@ -658,163 +658,6 @@ iscsi_getlength(BlockDriverState *bs)
    return len;
 }

-static void
-iscsi_readcapacity16_cb(struct iscsi_context *iscsi, int status,
-                        void *command_data, void *opaque)
-{
-    struct IscsiTask *itask = opaque;
-    struct scsi_readcapacity16 *rc16;
-    struct scsi_task *task = command_data;
-
-    if (status != 0) {
-        error_report("iSCSI: Failed to read capacity of iSCSI lun. %s",
-                     iscsi_get_error(iscsi));
-        itask->status   = 1;
-        itask->complete = 1;
-        scsi_free_scsi_task(task);
-        return;
-    }
-
-    rc16 = scsi_datain_unmarshall(task);
-    if (rc16 == NULL) {
-        error_report("iSCSI: Failed to unmarshall readcapacity16 data.");
-        itask->status   = 1;
-        itask->complete = 1;
-        scsi_free_scsi_task(task);
-        return;
-    }
-
-    itask->iscsilun->block_size = rc16->block_length;
-    itask->iscsilun->num_blocks = rc16->returned_lba + 1;
-    itask->bs->total_sectors    = itask->iscsilun->num_blocks *
-                               itask->iscsilun->block_size / BDRV_SECTOR_SIZE ;
-
-    itask->status   = 0;
-    itask->complete = 1;
-    scsi_free_scsi_task(task);
-}
-
-static void
-iscsi_readcapacity10_cb(struct iscsi_context *iscsi, int status,
-                        void *command_data, void *opaque)
-{
-    struct IscsiTask *itask = opaque;
-    struct scsi_readcapacity10 *rc10;
-    struct scsi_task *task = command_data;
-
-    if (status != 0) {
-        error_report("iSCSI: Failed to read capacity of iSCSI lun. %s",
-                     iscsi_get_error(iscsi));
-        itask->status   = 1;
-        itask->complete = 1;
-        scsi_free_scsi_task(task);
-        return;
-    }
-
-    rc10 = scsi_datain_unmarshall(task);
-    if (rc10 == NULL) {
-        error_report("iSCSI: Failed to unmarshall readcapacity10 data.");
-        itask->status   = 1;
-        itask->complete = 1;
-        scsi_free_scsi_task(task);
-        return;
-    }
-
-    itask->iscsilun->block_size = rc10->block_size;
-    if (rc10->lba == 0) {
-        /* blank disk loaded */
-        itask->iscsilun->num_blocks = 0;
-    } else {
-        itask->iscsilun->num_blocks = rc10->lba + 1;
-    }
-    itask->bs->total_sectors    = itask->iscsilun->num_blocks *
-                               itask->iscsilun->block_size / BDRV_SECTOR_SIZE ;
-
-    itask->status   = 0;
-    itask->complete = 1;
-    scsi_free_scsi_task(task);
-}
-
-static void
-iscsi_inquiry_cb(struct iscsi_context *iscsi, int status, void *command_data,
-                 void *opaque)
-{
-    struct IscsiTask *itask = opaque;
-    struct scsi_task *task = command_data;
-    struct scsi_inquiry_standard *inq;
-
-    if (status != 0) {
-        itask->status   = 1;
-        itask->complete = 1;
-        scsi_free_scsi_task(task);
-        return;
-    }
-
-    inq = scsi_datain_unmarshall(task);
-    if (inq == NULL) {
-        error_report("iSCSI: Failed to unmarshall inquiry data.");
-        itask->status   = 1;
-        itask->complete = 1;
-        scsi_free_scsi_task(task);
-        return;
-    }
-
-    itask->iscsilun->type = inq->periperal_device_type;
-
-    scsi_free_scsi_task(task);
-
-    switch (itask->iscsilun->type) {
-    case TYPE_DISK:
-        task = iscsi_readcapacity16_task(iscsi, itask->iscsilun->lun,
-                                   iscsi_readcapacity16_cb, opaque);
-        if (task == NULL) {
-            error_report("iSCSI: failed to send readcapacity16 command.");
-            itask->status   = 1;
-            itask->complete = 1;
-            return;
-        }
-        break;
-    case TYPE_ROM:
-        task = iscsi_readcapacity10_task(iscsi, itask->iscsilun->lun,
-                                   0, 0,
-                                   iscsi_readcapacity10_cb, opaque);
-        if (task == NULL) {
-            error_report("iSCSI: failed to send readcapacity16 command.");
-            itask->status   = 1;
-            itask->complete = 1;
-            return;
-        }
-        break;
-    default:
-        itask->status   = 0;
-        itask->complete = 1;
-    }
-}
-
-static void
-iscsi_connect_cb(struct iscsi_context *iscsi, int status, void *command_data,
-                 void *opaque)
-{
-    struct IscsiTask *itask = opaque;
-    struct scsi_task *task;
-
-    if (status != 0) {
-        itask->status   = 1;
-        itask->complete = 1;
-        return;
-    }
-
-    task = iscsi_inquiry_task(iscsi, itask->iscsilun->lun,
-                              0, 0, 36,
-                              iscsi_inquiry_cb, opaque);
-    if (task == NULL) {
-        error_report("iSCSI: failed to send inquiry command.");
-        itask->status   = 1;
-        itask->complete = 1;
-        return;
-    }
-}
-
 static int parse_chap(struct iscsi_context *iscsi, const char *target)
 {
    QemuOptsList *list;
@@ -927,7 +770,10 @@ static int iscsi_open(BlockDriverState *bs, const char *filename, int flags)
    IscsiLun *iscsilun = bs->opaque;
    struct iscsi_context *iscsi = NULL;
    struct iscsi_url *iscsi_url = NULL;
-    struct IscsiTask task;
+    struct scsi_task *task = NULL;
+    struct scsi_inquiry_standard *inq = NULL;
+    struct scsi_readcapacity10 *rc10 = NULL;
+    struct scsi_readcapacity16 *rc16 = NULL;
    char *initiator_name = NULL;
    int ret;

@@ -940,8 +786,7 @@ static int iscsi_open(BlockDriverState *bs, const char *filename, int flags)

    iscsi_url = iscsi_parse_full_url(iscsi, filename);
    if (iscsi_url == NULL) {
-        error_report("Failed to parse URL : %s %s", filename,
-                     iscsi_get_error(iscsi));
+        error_report("Failed to parse URL : %s", filename);
        ret = -EINVAL;
        goto out;
    }
@@ -991,33 +836,80 @@ static int iscsi_open(BlockDriverState *bs, const char *filename, int flags)
    /* check if we got HEADER_DIGEST via the options */
    parse_header_digest(iscsi, iscsi_url->target);

-    task.iscsilun = iscsilun;
-    task.status = 0;
-    task.complete = 0;
-    task.bs = bs;
+    if (iscsi_full_connect_sync(iscsi, iscsi_url->portal, iscsi_url->lun) != 0) {
+        error_report("iSCSI: Failed to connect to LUN : %s",
+            iscsi_get_error(iscsi));
+        ret = -EINVAL;
+        goto out;
+    }

    iscsilun->iscsi = iscsi;
    iscsilun->lun   = iscsi_url->lun;

-    if (iscsi_full_connect_async(iscsi, iscsi_url->portal, iscsi_url->lun,
-                                 iscsi_connect_cb, &task)
-        != 0) {
-        error_report("iSCSI: Failed to start async connect.");
+    task = iscsi_inquiry_sync(iscsi, iscsilun->lun, 0, 0, 36);
+
+    if (task == NULL || task->status != SCSI_STATUS_GOOD) {
+        error_report("iSCSI: failed to send inquiry command.");
        ret = -EINVAL;
        goto out;
    }

-    while (!task.complete) {
-        iscsi_set_events(iscsilun);
-        qemu_aio_wait();
-    }
-    if (task.status != 0) {
-        error_report("iSCSI: Failed to connect to LUN : %s",
-                     iscsi_get_error(iscsi));
+    inq = scsi_datain_unmarshall(task);
+    if (inq == NULL) {
+        error_report("iSCSI: Failed to unmarshall inquiry data.");
        ret = -EINVAL;
        goto out;
    }

+    iscsilun->type = inq->periperal_device_type;
+
+    scsi_free_scsi_task(task);
+
+    switch (iscsilun->type) {
+    case TYPE_DISK:
+        task = iscsi_readcapacity16_sync(iscsi, iscsilun->lun);
+        if (task == NULL || task->status != SCSI_STATUS_GOOD) {
+            error_report("iSCSI: failed to send readcapacity16 command.");
+            ret = -EINVAL;
+            goto out;
+        }
+        rc16 = scsi_datain_unmarshall(task);
+        if (rc16 == NULL) {
+            error_report("iSCSI: Failed to unmarshall readcapacity16 data.");
+            ret = -EINVAL;
+            goto out;
+        }
+        iscsilun->block_size = rc16->block_length;
+        iscsilun->num_blocks = rc16->returned_lba + 1;
+        break;
+    case TYPE_ROM:
+        task = iscsi_readcapacity10_sync(iscsi, iscsilun->lun, 0, 0);
+        if (task == NULL || task->status != SCSI_STATUS_GOOD) {
+            error_report("iSCSI: failed to send readcapacity10 command.");
+            ret = -EINVAL;
+            goto out;
+        }
+        rc10 = scsi_datain_unmarshall(task);
+        if (rc10 == NULL) {
+            error_report("iSCSI: Failed to unmarshall readcapacity10 data.");
+            ret = -EINVAL;
+            goto out;
+        }
+        iscsilun->block_size = rc10->block_size;
+        if (rc10->lba == 0) {
+            /* blank disk loaded */
+            iscsilun->num_blocks = 0;
+        } else {
+            iscsilun->num_blocks = rc10->lba + 1;
+        }
+        break;
+    default:
+        break;
+    }
+
+    bs->total_sectors    = iscsilun->num_blocks *
+                           iscsilun->block_size / BDRV_SECTOR_SIZE ;
+
    /* Medium changer or tape. We dont have any emulation for this so this must
     * be sg ioctl compatible. We force it to be sg, otherwise qemu will try
     * to read from the device to guess the image format.
@@ -1036,6 +928,9 @@ out:
    if (iscsi_url != NULL) {
        iscsi_destroy_url(iscsi_url);
    }
+    if (task != NULL) {
+        scsi_free_scsi_task(task);
+    }

    if (ret) {
        if (iscsi != NULL) {
@@ -1056,6 +951,11 @@ static void iscsi_close(BlockDriverState *bs)
    memset(iscsilun, 0, sizeof(IscsiLun));
 }

+static int iscsi_has_zero_init(BlockDriverState *bs)
+{
+    return 0;
+}
+
 static BlockDriver bdrv_iscsi = {
    .format_name     = "iscsi",
    .protocol_name   = "iscsi",
@@ -1071,6 +971,7 @@ static BlockDriver bdrv_iscsi = {
    .bdrv_aio_flush  = iscsi_aio_flush,

    .bdrv_aio_discard = iscsi_aio_discard,
+    .bdrv_has_zero_init = iscsi_has_zero_init,

 #ifdef __linux__
    .bdrv_ioctl       = iscsi_ioctl,
--- a/block/linux-aio.c
+++ b/block/linux-aio.c
@@ -9,9 +9,10 @@
 */
 #include "qemu-common.h"
 #include "qemu-aio.h"
-#include "block/raw-posix-aio.h"
+#include "qemu-queue.h"
+#include "block/raw-aio.h"
+#include "event_notifier.h"

-#include <sys/eventfd.h>
 #include <libaio.h>

 /*
@@ -37,7 +38,7 @@ struct qemu_laiocb {

 struct qemu_laio_state {
    io_context_t ctx;
-    int efd;
+    EventNotifier e;
    int count;
 };

@@ -76,29 +77,17 @@ static void qemu_laio_process_completion(struct qemu_laio_state *s,
    qemu_aio_release(laiocb);
 }

-static void qemu_laio_completion_cb(void *opaque)
+static void qemu_laio_completion_cb(EventNotifier *e)
 {
-    struct qemu_laio_state *s = opaque;
+    struct qemu_laio_state *s = container_of(e, struct qemu_laio_state, e);

-    while (1) {
+    while (event_notifier_test_and_clear(&s->e)) {
        struct io_event events[MAX_EVENTS];
-        uint64_t val;
-        ssize_t ret;
        struct timespec ts = { 0 };
        int nevents, i;

        do {
-            ret = read(s->efd, &val, sizeof(val));
-        } while (ret == -1 && errno == EINTR);
-
-        if (ret == -1 && errno == EAGAIN)
-            break;
-
-        if (ret != 8)
-            break;
-
-        do {
-            nevents = io_getevents(s->ctx, val, MAX_EVENTS, events, &ts);
+            nevents = io_getevents(s->ctx, MAX_EVENTS, MAX_EVENTS, events, &ts);
        } while (nevents == -EINTR);

        for (i = 0; i < nevents; i++) {
@@ -112,9 +101,9 @@ static void qemu_laio_completion_cb(void *opaque)
    }
 }

-static int qemu_laio_flush_cb(void *opaque)
+static int qemu_laio_flush_cb(EventNotifier *e)
 {
-    struct qemu_laio_state *s = opaque;
+    struct qemu_laio_state *s = container_of(e, struct qemu_laio_state, e);

    return (s->count > 0) ? 1 : 0;
 }
@@ -146,11 +135,12 @@ static void laio_cancel(BlockDriverAIOCB *blockacb)
     * We might be able to do this slightly more optimal by removing the
     * O_NONBLOCK flag.
     */
-    while (laiocb->ret == -EINPROGRESS)
-        qemu_laio_completion_cb(laiocb->ctx);
+    while (laiocb->ret == -EINPROGRESS) {
+        qemu_laio_completion_cb(&laiocb->ctx->e);
+    }
 }

-static AIOPool laio_pool = {
+static const AIOCBInfo laio_aiocb_info = {
    .aiocb_size         = sizeof(struct qemu_laiocb),
    .cancel             = laio_cancel,
 };
@@ -164,7 +154,7 @@ BlockDriverAIOCB *laio_submit(BlockDriverState *bs, void *aio_ctx, int fd,
    struct iocb *iocbs;
    off_t offset = sector_num * 512;

-    laiocb = qemu_aio_get(&laio_pool, bs, cb, opaque);
+    laiocb = qemu_aio_get(&laio_aiocb_info, bs, cb, opaque);
    laiocb->nbytes = nb_sectors * 512;
    laiocb->ctx = s;
    laiocb->ret = -EINPROGRESS;
@@ -186,7 +176,7 @@ BlockDriverAIOCB *laio_submit(BlockDriverState *bs, void *aio_ctx, int fd,
                        __func__, type);
        goto out_free_aiocb;
    }
-    io_set_eventfd(&laiocb->iocb, s->efd);
+    io_set_eventfd(&laiocb->iocb, event_notifier_get_fd(&s->e));
    s->count++;

    if (io_submit(s->ctx, 1, &iocbs) < 0)
@@ -205,21 +195,21 @@ void *laio_init(void)
    struct qemu_laio_state *s;

    s = g_malloc0(sizeof(*s));
-    s->efd = eventfd(0, 0);
-    if (s->efd == -1)
+    if (event_notifier_init(&s->e, false) < 0) {
        goto out_free_state;
-    fcntl(s->efd, F_SETFL, O_NONBLOCK);
+    }

-    if (io_setup(MAX_EVENTS, &s->ctx) != 0)
+    if (io_setup(MAX_EVENTS, &s->ctx) != 0) {
        goto out_close_efd;
+    }

-    qemu_aio_set_fd_handler(s->efd, qemu_laio_completion_cb, NULL,
-        qemu_laio_flush_cb, s);
+    qemu_aio_set_event_notifier(&s->e, qemu_laio_completion_cb,
+                                qemu_laio_flush_cb);

    return s;

 out_close_efd:
-    close(s->efd);
+    event_notifier_cleanup(&s->e);
 out_free_state:
    g_free(s);
    return NULL;
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -0,0 +1,322 @@
+/*
+ * Image mirroring
+ *
+ * Copyright Red Hat, Inc. 2012
+ *
+ * Authors:
+ *  Paolo Bonzini  <pbonzini@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include "trace.h"
+#include "blockjob.h"
+#include "block_int.h"
+#include "qemu/ratelimit.h"
+
+enum {
+    /*
+     * Size of data buffer for populating the image file.  This should be large
+     * enough to process multiple clusters in a single call, so that populating
+     * contiguous regions of the image is efficient.
+     */
+    BLOCK_SIZE = 512 * BDRV_SECTORS_PER_DIRTY_CHUNK, /* in bytes */
+};
+
+#define SLICE_TIME 100000000ULL /* ns */
+
+typedef struct MirrorBlockJob {
+    BlockJob common;
+    RateLimit limit;
+    BlockDriverState *target;
+    MirrorSyncMode mode;
+    BlockdevOnError on_source_error, on_target_error;
+    bool synced;
+    bool should_complete;
+    int64_t sector_num;
+    uint8_t *buf;
+} MirrorBlockJob;
+
+static BlockErrorAction mirror_error_action(MirrorBlockJob *s, bool read,
+                                            int error)
+{
+    s->synced = false;
+    if (read) {
+        return block_job_error_action(&s->common, s->common.bs,
+                                      s->on_source_error, true, error);
+    } else {
+        return block_job_error_action(&s->common, s->target,
+                                      s->on_target_error, false, error);
+    }
+}
+
+static int coroutine_fn mirror_iteration(MirrorBlockJob *s,
+                                         BlockErrorAction *p_action)
+{
+    BlockDriverState *source = s->common.bs;
+    BlockDriverState *target = s->target;
+    QEMUIOVector qiov;
+    int ret, nb_sectors;
+    int64_t end;
+    struct iovec iov;
+
+    end = s->common.len >> BDRV_SECTOR_BITS;
+    s->sector_num = bdrv_get_next_dirty(source, s->sector_num);
+    nb_sectors = MIN(BDRV_SECTORS_PER_DIRTY_CHUNK, end - s->sector_num);
+    bdrv_reset_dirty(source, s->sector_num, nb_sectors);
+
+    /* Copy the dirty cluster.  */
+    iov.iov_base = s->buf;
+    iov.iov_len  = nb_sectors * 512;
+    qemu_iovec_init_external(&qiov, &iov, 1);
+
+    trace_mirror_one_iteration(s, s->sector_num, nb_sectors);
+    ret = bdrv_co_readv(source, s->sector_num, nb_sectors, &qiov);
+    if (ret < 0) {
+        *p_action = mirror_error_action(s, true, -ret);
+        goto fail;
+    }
+    ret = bdrv_co_writev(target, s->sector_num, nb_sectors, &qiov);
+    if (ret < 0) {
+        *p_action = mirror_error_action(s, false, -ret);
+        s->synced = false;
+        goto fail;
+    }
+    return 0;
+
+fail:
+    /* Try again later.  */
+    bdrv_set_dirty(source, s->sector_num, nb_sectors);
+    return ret;
+}
+
+static void coroutine_fn mirror_run(void *opaque)
+{
+    MirrorBlockJob *s = opaque;
+    BlockDriverState *bs = s->common.bs;
+    int64_t sector_num, end;
+    int ret = 0;
+    int n;
+
+    if (block_job_is_cancelled(&s->common)) {
+        goto immediate_exit;
+    }
+
+    s->common.len = bdrv_getlength(bs);
+    if (s->common.len < 0) {
+        block_job_completed(&s->common, s->common.len);
+        return;
+    }
+
+    end = s->common.len >> BDRV_SECTOR_BITS;
+    s->buf = qemu_blockalign(bs, BLOCK_SIZE);
+
+    if (s->mode != MIRROR_SYNC_MODE_NONE) {
+        /* First part, loop on the sectors and initialize the dirty bitmap.  */
+        BlockDriverState *base;
+        base = s->mode == MIRROR_SYNC_MODE_FULL ? NULL : bs->backing_hd;
+        for (sector_num = 0; sector_num < end; ) {
+            int64_t next = (sector_num | (BDRV_SECTORS_PER_DIRTY_CHUNK - 1)) + 1;
+            ret = bdrv_co_is_allocated_above(bs, base,
+                                             sector_num, next - sector_num, &n);
+
+            if (ret < 0) {
+                goto immediate_exit;
+            }
+
+            assert(n > 0);
+            if (ret == 1) {
+                bdrv_set_dirty(bs, sector_num, n);
+                sector_num = next;
+            } else {
+                sector_num += n;
+            }
+        }
+    }
+
+    s->sector_num = -1;
+    for (;;) {
+        uint64_t delay_ns;
+        int64_t cnt;
+        bool should_complete;
+
+        cnt = bdrv_get_dirty_count(bs);
+        if (cnt != 0) {
+            BlockErrorAction action = BDRV_ACTION_REPORT;
+            ret = mirror_iteration(s, &action);
+            if (ret < 0 && action == BDRV_ACTION_REPORT) {
+                goto immediate_exit;
+            }
+            cnt = bdrv_get_dirty_count(bs);
+        }
+
+        should_complete = false;
+        if (cnt == 0) {
+            trace_mirror_before_flush(s);
+            ret = bdrv_flush(s->target);
+            if (ret < 0) {
+                if (mirror_error_action(s, false, -ret) == BDRV_ACTION_REPORT) {
+                    goto immediate_exit;
+                }
+            } else {
+                /* We're out of the streaming phase.  From now on, if the job
+                 * is cancelled we will actually complete all pending I/O and
+                 * report completion.  This way, block-job-cancel will leave
+                 * the target in a consistent state.
+                 */
+                s->common.offset = end * BDRV_SECTOR_SIZE;
+                if (!s->synced) {
+                    block_job_ready(&s->common);
+                    s->synced = true;
+                }
+
+                should_complete = s->should_complete ||
+                    block_job_is_cancelled(&s->common);
+                cnt = bdrv_get_dirty_count(bs);
+            }
+        }
+
+        if (cnt == 0 && should_complete) {
+            /* The dirty bitmap is not updated while operations are pending.
+             * If we're about to exit, wait for pending operations before
+             * calling bdrv_get_dirty_count(bs), or we may exit while the
+             * source has dirty data to copy!
+             *
+             * Note that I/O can be submitted by the guest while
+             * mirror_populate runs.
+             */
+            trace_mirror_before_drain(s, cnt);
+            bdrv_drain_all();
+            cnt = bdrv_get_dirty_count(bs);
+        }
+
+        ret = 0;
+        trace_mirror_before_sleep(s, cnt, s->synced);
+        if (!s->synced) {
+            /* Publish progress */
+            s->common.offset = end * BDRV_SECTOR_SIZE - cnt * BLOCK_SIZE;
+
+            if (s->common.speed) {
+                delay_ns = ratelimit_calculate_delay(&s->limit, BDRV_SECTORS_PER_DIRTY_CHUNK);
+            } else {
+                delay_ns = 0;
+            }
+
+            /* Note that even when no rate limit is applied we need to yield
+             * with no pending I/O here so that qemu_aio_flush() returns.
+             */
+            block_job_sleep_ns(&s->common, rt_clock, delay_ns);
+            if (block_job_is_cancelled(&s->common)) {
+                break;
+            }
+        } else if (!should_complete) {
+            delay_ns = (cnt == 0 ? SLICE_TIME : 0);
+            block_job_sleep_ns(&s->common, rt_clock, delay_ns);
+        } else if (cnt == 0) {
+            /* The two disks are in sync.  Exit and report successful
+             * completion.
+             */
+            assert(QLIST_EMPTY(&bs->tracked_requests));
+            s->common.cancelled = false;
+            break;
+        }
+    }
+
+immediate_exit:
+    g_free(s->buf);
+    bdrv_set_dirty_tracking(bs, false);
+    bdrv_iostatus_disable(s->target);
+    if (s->should_complete && ret == 0) {
+        if (bdrv_get_flags(s->target) != bdrv_get_flags(s->common.bs)) {
+            bdrv_reopen(s->target, bdrv_get_flags(s->common.bs), NULL);
+        }
+        bdrv_swap(s->target, s->common.bs);
+    }
+    bdrv_close(s->target);
+    bdrv_delete(s->target);
+    block_job_completed(&s->common, ret);
+}
+
+static void mirror_set_speed(BlockJob *job, int64_t speed, Error **errp)
+{
+    MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
+
+    if (speed < 0) {
+        error_set(errp, QERR_INVALID_PARAMETER, "speed");
+        return;
+    }
+    ratelimit_set_speed(&s->limit, speed / BDRV_SECTOR_SIZE, SLICE_TIME);
+}
+
+static void mirror_iostatus_reset(BlockJob *job)
+{
+    MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
+
+    bdrv_iostatus_reset(s->target);
+}
+
+static void mirror_complete(BlockJob *job, Error **errp)
+{
+    MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
+    int ret;
+
+    ret = bdrv_open_backing_file(s->target);
+    if (ret < 0) {
+        char backing_filename[PATH_MAX];
+        bdrv_get_full_backing_filename(s->target, backing_filename,
+                                       sizeof(backing_filename));
+        error_set(errp, QERR_OPEN_FILE_FAILED, backing_filename);
+        return;
+    }
+    if (!s->synced) {
+        error_set(errp, QERR_BLOCK_JOB_NOT_READY, job->bs->device_name);
+        return;
+    }
+
+    s->should_complete = true;
+    block_job_resume(job);
+}
+
+static BlockJobType mirror_job_type = {
+    .instance_size = sizeof(MirrorBlockJob),
+    .job_type      = "mirror",
+    .set_speed     = mirror_set_speed,
+    .iostatus_reset= mirror_iostatus_reset,
+    .complete      = mirror_complete,
+};
+
+void mirror_start(BlockDriverState *bs, BlockDriverState *target,
+                  int64_t speed, MirrorSyncMode mode,
+                  BlockdevOnError on_source_error,
+                  BlockdevOnError on_target_error,
+                  BlockDriverCompletionFunc *cb,
+                  void *opaque, Error **errp)
+{
+    MirrorBlockJob *s;
+
+    if ((on_source_error == BLOCKDEV_ON_ERROR_STOP ||
+         on_source_error == BLOCKDEV_ON_ERROR_ENOSPC) &&
+        !bdrv_iostatus_is_enabled(bs)) {
+        error_set(errp, QERR_INVALID_PARAMETER, "on-source-error");
+        return;
+    }
+
+    s = block_job_create(&mirror_job_type, bs, speed, cb, opaque, errp);
+    if (!s) {
+        return;
+    }
+
+    s->on_source_error = on_source_error;
+    s->on_target_error = on_target_error;
+    s->target = target;
+    s->mode = mode;
+    bdrv_set_dirty_tracking(bs, true);
+    bdrv_set_enable_write_cache(s->target, true);
+    bdrv_set_on_error(s->target, on_target_error, on_target_error);
+    bdrv_iostatus_enable(s->target);
+    s->common.co = qemu_coroutine_create(mirror_run);
+    trace_mirror_start(bs, s, s->common.co, opaque);
+    qemu_coroutine_enter(s->common.co, s);
+}
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -28,6 +28,7 @@

 #include "qemu-common.h"
 #include "nbd.h"
+#include "uri.h"
 #include "block_int.h"
 #include "module.h"
 #include "qemu_socket.h"
@@ -55,7 +56,6 @@ typedef struct BDRVNBDState {
    uint32_t nbdflags;
    off_t size;
    size_t blocksize;
-    char *export_name; /* An NBD server may export several devices */

    CoMutex send_mutex;
    CoMutex free_sema;
@@ -65,13 +65,75 @@ typedef struct BDRVNBDState {
    Coroutine *recv_coroutine[MAX_NBD_REQUESTS];
    struct nbd_reply reply;

-    /* If it begins with  '/', this is a UNIX domain socket. Otherwise,
-     * it's a string of the form <hostname|ip4|\[ip6\]>:port
-     */
+    int is_unix;
    char *host_spec;
+    char *export_name; /* An NBD server may export several devices */
 } BDRVNBDState;

-static int nbd_config(BDRVNBDState *s, const char *filename, int flags)
+static int nbd_parse_uri(BDRVNBDState *s, const char *filename)
+{
+    URI *uri;
+    const char *p;
+    QueryParams *qp = NULL;
+    int ret = 0;
+
+    uri = uri_parse(filename);
+    if (!uri) {
+        return -EINVAL;
+    }
+
+    /* transport */
+    if (!strcmp(uri->scheme, "nbd")) {
+        s->is_unix = false;
+    } else if (!strcmp(uri->scheme, "nbd+tcp")) {
+        s->is_unix = false;
+    } else if (!strcmp(uri->scheme, "nbd+unix")) {
+        s->is_unix = true;
+    } else {
+        ret = -EINVAL;
+        goto out;
+    }
+
+    p = uri->path ? uri->path : "/";
+    p += strspn(p, "/");
+    if (p[0]) {
+        s->export_name = g_strdup(p);
+    }
+
+    qp = query_params_parse(uri->query);
+    if (qp->n > 1 || (s->is_unix && !qp->n) || (!s->is_unix && qp->n)) {
+        ret = -EINVAL;
+        goto out;
+    }
+
+    if (s->is_unix) {
+        /* nbd+unix:///export?socket=path */
+        if (uri->server || uri->port || strcmp(qp->p[0].name, "socket")) {
+            ret = -EINVAL;
+            goto out;
+        }
+        s->host_spec = g_strdup(qp->p[0].value);
+    } else {
+        /* nbd[+tcp]://host:port/export */
+        if (!uri->server) {
+            ret = -EINVAL;
+            goto out;
+        }
+        if (!uri->port) {
+            uri->port = NBD_DEFAULT_PORT;
+        }
+        s->host_spec = g_strdup_printf("%s:%d", uri->server, uri->port);
+    }
+
+out:
+    if (qp) {
+        query_params_free(qp);
+    }
+    uri_free(uri);
+    return ret;
+}
+
+static int nbd_config(BDRVNBDState *s, const char *filename)
 {
    char *file;
    char *export_name;
@@ -79,6 +141,10 @@ static int nbd_config(BDRVNBDState *s, const char *filename, int flags)
    const char *unixpath;
    int err = -EINVAL;

+    if (strstr(filename, "://")) {
+        return nbd_parse_uri(s, filename);
+    }
+
    file = g_strdup(filename);

    export_name = strstr(file, EN_OPTSTR);
@@ -98,11 +164,10 @@ static int nbd_config(BDRVNBDState *s, const char *filename, int flags)

    /* are we a UNIX or TCP socket? */
    if (strstart(host_spec, "unix:", &unixpath)) {
-        if (unixpath[0] != '/') { /* We demand  an absolute path*/
-            goto out;
-        }
+        s->is_unix = true;
        s->host_spec = g_strdup(unixpath);
    } else {
+        s->is_unix = false;
        s->host_spec = g_strdup(host_spec);
    }

@@ -262,7 +327,7 @@ static int nbd_establish_connection(BlockDriverState *bs)
    off_t size;
    size_t blocksize;

-    if (s->host_spec[0] == '/') {
+    if (s->is_unix) {
        sock = unix_socket_outgoing(s->host_spec);
    } else {
        sock = tcp_socket_outgoing_spec(s->host_spec);
@@ -320,7 +385,7 @@ static int nbd_open(BlockDriverState *bs, const char* filename, int flags)
    qemu_co_mutex_init(&s->free_sema);

    /* Pop the config into our state object. Exit if invalid. */
-    result = nbd_config(s, filename, flags);
+    result = nbd_config(s, filename);
    if (result != 0) {
        return result;
    }
@@ -498,6 +563,33 @@ static int64_t nbd_getlength(BlockDriverState *bs)

 static BlockDriver bdrv_nbd = {
    .format_name         = "nbd",
+    .protocol_name       = "nbd",
+    .instance_size       = sizeof(BDRVNBDState),
+    .bdrv_file_open      = nbd_open,
+    .bdrv_co_readv       = nbd_co_readv,
+    .bdrv_co_writev      = nbd_co_writev,
+    .bdrv_close          = nbd_close,
+    .bdrv_co_flush_to_os = nbd_co_flush,
+    .bdrv_co_discard     = nbd_co_discard,
+    .bdrv_getlength      = nbd_getlength,
+};
+
+static BlockDriver bdrv_nbd_tcp = {
+    .format_name         = "nbd",
+    .protocol_name       = "nbd+tcp",
+    .instance_size       = sizeof(BDRVNBDState),
+    .bdrv_file_open      = nbd_open,
+    .bdrv_co_readv       = nbd_co_readv,
+    .bdrv_co_writev      = nbd_co_writev,
+    .bdrv_close          = nbd_close,
+    .bdrv_co_flush_to_os = nbd_co_flush,
+    .bdrv_co_discard     = nbd_co_discard,
+    .bdrv_getlength      = nbd_getlength,
+};
+
+static BlockDriver bdrv_nbd_unix = {
+    .format_name         = "nbd",
+    .protocol_name       = "nbd+unix",
    .instance_size       = sizeof(BDRVNBDState),
    .bdrv_file_open      = nbd_open,
    .bdrv_co_readv       = nbd_co_readv,
@@ -506,12 +598,13 @@ static BlockDriver bdrv_nbd = {
    .bdrv_co_flush_to_os = nbd_co_flush,
    .bdrv_co_discard     = nbd_co_discard,
    .bdrv_getlength      = nbd_getlength,
-    .protocol_name       = "nbd",
 };

 static void bdrv_nbd_init(void)
 {
    bdrv_register(&bdrv_nbd);
+    bdrv_register(&bdrv_nbd_tcp);
+    bdrv_register(&bdrv_nbd_unix);
 }

 block_init(bdrv_nbd_init);
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -197,6 +197,15 @@ static int qcow_open(BlockDriverState *bs, int flags)
    return ret;
 }

+
+/* We have nothing to do for QCOW reopen, stubs just return
+ * success */
+static int qcow_reopen_prepare(BDRVReopenState *state,
+                               BlockReopenQueue *queue, Error **errp)
+{
+    return 0;
+}
+
 static int qcow_set_key(BlockDriverState *bs, const char *key)
 {
    BDRVQcowState *s = bs->opaque;
@@ -868,6 +877,7 @@ static BlockDriver bdrv_qcow = {
    .bdrv_probe		= qcow_probe,
    .bdrv_open		= qcow_open,
    .bdrv_close		= qcow_close,
+    .bdrv_reopen_prepare = qcow_reopen_prepare,
    .bdrv_create	= qcow_create,

    .bdrv_co_readv          = qcow_co_readv,
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -301,7 +301,8 @@ static int alloc_refcount_block(BlockDriverState *bs,
    uint64_t last_table_size;
    uint64_t blocks_clusters;
    do {
-        uint64_t table_clusters = size_to_clusters(s, table_size);
+        uint64_t table_clusters =
+            size_to_clusters(s, table_size * sizeof(uint64_t));
        blocks_clusters = 1 +
            ((table_clusters + refcount_block_clusters - 1)
            / refcount_block_clusters);
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -52,6 +52,7 @@ typedef struct {
    uint32_t magic;
    uint32_t len;
 } QCowExtension;
+
 #define  QCOW2_EXT_MAGIC_END 0
 #define  QCOW2_EXT_MAGIC_BACKING_FORMAT 0xE2792ACA
 #define  QCOW2_EXT_MAGIC_FEATURE_TABLE 0x6803f857
@@ -558,6 +559,14 @@ static int qcow2_set_key(BlockDriverState *bs, const char *key)
    return 0;
 }

+/* We have nothing to do for QCOW2 reopen, stubs just return
+ * success */
+static int qcow2_reopen_prepare(BDRVReopenState *state,
+                                BlockReopenQueue *queue, Error **errp)
+{
+    return 0;
+}
+
 static int coroutine_fn qcow2_co_is_allocated(BlockDriverState *bs,
        int64_t sector_num, int nb_sectors, int *pnum)
 {
@@ -1087,6 +1096,7 @@ int qcow2_update_header(BlockDriverState *bs)
            goto fail;
        }

+        /* Using strncpy is ok here, since buf is not NUL-terminated. */
        strncpy(buf, bs->backing_file, buflen);

        header->backing_file_offset = cpu_to_be64(buf - ((char*) header));
@@ -1679,6 +1689,7 @@ static BlockDriver bdrv_qcow2 = {
    .bdrv_probe         = qcow2_probe,
    .bdrv_open          = qcow2_open,
    .bdrv_close         = qcow2_close,
+    .bdrv_reopen_prepare  = qcow2_reopen_prepare,
    .bdrv_create        = qcow2_create,
    .bdrv_co_is_allocated = qcow2_co_is_allocated,
    .bdrv_set_key       = qcow2_set_key,
--- a/block/qed-table.c
+++ b/block/qed-table.c
@@ -103,7 +103,6 @@ static void qed_write_table_cb(void *opaque, int ret)
 out:
    qemu_vfree(write_table_cb->table);
    gencb_complete(&write_table_cb->gencb, ret);
-    return;
 }

 /**
--- a/block/qed.c
+++ b/block/qed.c
@@ -30,7 +30,7 @@ static void qed_aio_cancel(BlockDriverAIOCB *blockacb)
    }
 }

-static AIOPool qed_aio_pool = {
+static const AIOCBInfo qed_aiocb_info = {
    .aiocb_size         = sizeof(QEDAIOCB),
    .cancel             = qed_aio_cancel,
 };
@@ -505,6 +505,14 @@ out:
    return ret;
 }

+/* We have nothing to do for QED reopen, stubs just return
+ * success */
+static int bdrv_qed_reopen_prepare(BDRVReopenState *state,
+                                   BlockReopenQueue *queue, Error **errp)
+{
+    return 0;
+}
+
 static void bdrv_qed_close(BlockDriverState *bs)
 {
    BDRVQEDState *s = bs->opaque;
@@ -1303,7 +1311,7 @@ static BlockDriverAIOCB *qed_aio_setup(BlockDriverState *bs,
                                       BlockDriverCompletionFunc *cb,
                                       void *opaque, int flags)
 {
-    QEDAIOCB *acb = qemu_aio_get(&qed_aio_pool, bs, cb, opaque);
+    QEDAIOCB *acb = qemu_aio_get(&qed_aiocb_info, bs, cb, opaque);

    trace_qed_aio_setup(bs->opaque, acb, sector_num, nb_sectors,
                        opaque, flags);
@@ -1564,6 +1572,7 @@ static BlockDriver bdrv_qed = {
    .bdrv_rebind              = bdrv_qed_rebind,
    .bdrv_open                = bdrv_qed_open,
    .bdrv_close               = bdrv_qed_close,
+    .bdrv_reopen_prepare      = bdrv_qed_reopen_prepare,
    .bdrv_create              = bdrv_qed_create,
    .bdrv_co_is_allocated     = bdrv_qed_co_is_allocated,
    .bdrv_make_empty          = bdrv_qed_make_empty,
--- a/block/raw-posix-aio.h
+++ b/block/raw-posix-aio.h
@@ -1,5 +1,5 @@
 /*
- * QEMU Posix block I/O backend AIO support
+ * Declarations for AIO in the raw protocol
 *
 * Copyright IBM, Corp. 2008
 *
@@ -12,8 +12,8 @@
 * Contributions after 2012-01-13 are licensed under the terms of the
 * GNU GPL, version 2 or (at your option) any later version.
 */
-#ifndef QEMU_RAW_POSIX_AIO_H
-#define QEMU_RAW_POSIX_AIO_H
+#ifndef QEMU_RAW_AIO_H
+#define QEMU_RAW_AIO_H

 /* AIO request types */
 #define QEMU_AIO_READ         0x0001
@@ -27,19 +27,22 @@
 #define QEMU_AIO_MISALIGNED   0x1000


-/* posix-aio-compat.c - thread pool based implementation */
-int paio_init(void);
-BlockDriverAIOCB *paio_submit(BlockDriverState *bs, int fd,
-        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockDriverCompletionFunc *cb, void *opaque, int type);
-BlockDriverAIOCB *paio_ioctl(BlockDriverState *bs, int fd,
-        unsigned long int req, void *buf,
-        BlockDriverCompletionFunc *cb, void *opaque);
-
 /* linux-aio.c - Linux native implementation */
+#ifdef CONFIG_LINUX_AIO
 void *laio_init(void);
 BlockDriverAIOCB *laio_submit(BlockDriverState *bs, void *aio_ctx, int fd,
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
        BlockDriverCompletionFunc *cb, void *opaque, int type);
+#endif

-#endif /* QEMU_RAW_POSIX_AIO_H */
+#ifdef _WIN32
+typedef struct QEMUWin32AIOState QEMUWin32AIOState;
+QEMUWin32AIOState *win32_aio_init(void);
+int win32_aio_attach(QEMUWin32AIOState *aio, HANDLE hfile);
+BlockDriverAIOCB *win32_aio_submit(BlockDriverState *bs,
+        QEMUWin32AIOState *aio, HANDLE hfile,
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+        BlockDriverCompletionFunc *cb, void *opaque, int type);
+#endif
+
+#endif /* QEMU_RAW_AIO_H */
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -27,7 +27,10 @@
 #include "qemu-log.h"
 #include "block_int.h"
 #include "module.h"
-#include "block/raw-posix-aio.h"
+#include "trace.h"
+#include "thread-pool.h"
+#include "iov.h"
+#include "raw-aio.h"

 #if defined(__APPLE__) && (__MACH__)
 #include <paths.h>
@@ -133,16 +136,36 @@ typedef struct BDRVRawState {
    int use_aio;
    void *aio_ctx;
 #endif
-    uint8_t *aligned_buf;
-    unsigned aligned_buf_size;
 #ifdef CONFIG_XFS
    bool is_xfs : 1;
 #endif
 } BDRVRawState;

+typedef struct BDRVRawReopenState {
+    int fd;
+    int open_flags;
+#ifdef CONFIG_LINUX_AIO
+    int use_aio;
+#endif
+} BDRVRawReopenState;
+
 static int fd_open(BlockDriverState *bs);
 static int64_t raw_getlength(BlockDriverState *bs);

+typedef struct RawPosixAIOData {
+    BlockDriverState *bs;
+    int aio_fildes;
+    union {
+        struct iovec *aio_iov;
+        void *aio_ioctl_buf;
+    };
+    int aio_niov;
+    size_t aio_nbytes;
+#define aio_ioctl_cmd   aio_nbytes /* for QEMU_AIO_IOCTL */
+    off_t aio_offset;
+    int aio_type;
+} RawPosixAIOData;
+
 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
 static int cdrom_reopen(BlockDriverState *bs);
 #endif
@@ -185,6 +208,57 @@ static int raw_normalize_devicepath(const char **filename)
 }
 #endif

+static void raw_parse_flags(int bdrv_flags, int *open_flags)
+{
+    assert(open_flags != NULL);
+
+    *open_flags |= O_BINARY;
+    *open_flags &= ~O_ACCMODE;
+    if (bdrv_flags & BDRV_O_RDWR) {
+        *open_flags |= O_RDWR;
+    } else {
+        *open_flags |= O_RDONLY;
+    }
+
+    /* Use O_DSYNC for write-through caching, no flags for write-back caching,
+     * and O_DIRECT for no caching. */
+    if ((bdrv_flags & BDRV_O_NOCACHE)) {
+        *open_flags |= O_DIRECT;
+    }
+}
+
+#ifdef CONFIG_LINUX_AIO
+static int raw_set_aio(void **aio_ctx, int *use_aio, int bdrv_flags)
+{
+    int ret = -1;
+    assert(aio_ctx != NULL);
+    assert(use_aio != NULL);
+    /*
+     * Currently Linux do AIO only for files opened with O_DIRECT
+     * specified so check NOCACHE flag too
+     */
+    if ((bdrv_flags & (BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO)) ==
+                      (BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO)) {
+
+        /* if non-NULL, laio_init() has already been run */
+        if (*aio_ctx == NULL) {
+            *aio_ctx = laio_init();
+            if (!*aio_ctx) {
+                goto error;
+            }
+        }
+        *use_aio = 1;
+    } else {
+        *use_aio = 0;
+    }
+
+    ret = 0;
+
+error:
+    return ret;
+}
+#endif
+
 static int raw_open_common(BlockDriverState *bs, const char *filename,
                           int bdrv_flags, int open_flags)
 {
@@ -196,20 +270,8 @@ static int raw_open_common(BlockDriverState *bs, const char *filename,
        return ret;
    }

-    s->open_flags = open_flags | O_BINARY;
-    s->open_flags &= ~O_ACCMODE;
-    if (bdrv_flags & BDRV_O_RDWR) {
-        s->open_flags |= O_RDWR;
-    } else {
-        s->open_flags |= O_RDONLY;
-    }
-
-    /* Use O_DSYNC for write-through caching, no flags for write-back caching,
-     * and O_DIRECT for no caching. */
-    if ((bdrv_flags & BDRV_O_NOCACHE))
-        s->open_flags |= O_DIRECT;
-    if (!(bdrv_flags & BDRV_O_CACHE_WB))
-        s->open_flags |= O_DSYNC;
+    s->open_flags = open_flags;
+    raw_parse_flags(bdrv_flags, &s->open_flags);

    s->fd = -1;
    fd = qemu_open(filename, s->open_flags, 0644);
@@ -220,45 +282,13 @@ static int raw_open_common(BlockDriverState *bs, const char *filename,
        return ret;
    }
    s->fd = fd;
-    s->aligned_buf = NULL;
-
-    if ((bdrv_flags & BDRV_O_NOCACHE)) {
-        /*
-         * Allocate a buffer for read/modify/write cycles.  Chose the size
-         * pessimistically as we don't know the block size yet.
-         */
-        s->aligned_buf_size = 32 * MAX_BLOCKSIZE;
-        s->aligned_buf = qemu_memalign(MAX_BLOCKSIZE, s->aligned_buf_size);
-        if (s->aligned_buf == NULL) {
-            goto out_close;
-        }
-    }
-
-    /* We're falling back to POSIX AIO in some cases so init always */
-    if (paio_init() < 0) {
-        goto out_free_buf;
-    }

 #ifdef CONFIG_LINUX_AIO
-    /*
-     * Currently Linux do AIO only for files opened with O_DIRECT
-     * specified so check NOCACHE flag too
-     */
-    if ((bdrv_flags & (BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO)) ==
-                      (BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO)) {
-
-        s->aio_ctx = laio_init();
-        if (!s->aio_ctx) {
-            goto out_free_buf;
-        }
-        s->use_aio = 1;
-    } else
-#endif
-    {
-#ifdef CONFIG_LINUX_AIO
-        s->use_aio = 0;
-#endif
+    if (raw_set_aio(&s->aio_ctx, &s->use_aio, bdrv_flags)) {
+        qemu_close(fd);
+        return -errno;
    }
+#endif

 #ifdef CONFIG_XFS
    if (platform_test_xfs_fd(s->fd)) {
@@ -267,12 +297,6 @@ static int raw_open_common(BlockDriverState *bs, const char *filename,
 #endif

    return 0;
-
-out_free_buf:
-    qemu_vfree(s->aligned_buf);
-out_close:
-    qemu_close(fd);
-    return -errno;
 }

 static int raw_open(BlockDriverState *bs, const char *filename, int flags)
@@ -283,6 +307,113 @@ static int raw_open(BlockDriverState *bs, const char *filename, int flags)
    return raw_open_common(bs, filename, flags, 0);
 }

+static int raw_reopen_prepare(BDRVReopenState *state,
+                              BlockReopenQueue *queue, Error **errp)
+{
+    BDRVRawState *s;
+    BDRVRawReopenState *raw_s;
+    int ret = 0;
+
+    assert(state != NULL);
+    assert(state->bs != NULL);
+
+    s = state->bs->opaque;
+
+    state->opaque = g_malloc0(sizeof(BDRVRawReopenState));
+    raw_s = state->opaque;
+
+#ifdef CONFIG_LINUX_AIO
+    raw_s->use_aio = s->use_aio;
+
+    /* we can use s->aio_ctx instead of a copy, because the use_aio flag is
+     * valid in the 'false' condition even if aio_ctx is set, and raw_set_aio()
+     * won't override aio_ctx if aio_ctx is non-NULL */
+    if (raw_set_aio(&s->aio_ctx, &raw_s->use_aio, state->flags)) {
+        return -1;
+    }
+#endif
+
+    if (s->type == FTYPE_FD || s->type == FTYPE_CD) {
+        raw_s->open_flags |= O_NONBLOCK;
+    }
+
+    raw_parse_flags(state->flags, &raw_s->open_flags);
+
+    raw_s->fd = -1;
+
+    int fcntl_flags = O_APPEND | O_ASYNC | O_NONBLOCK;
+#ifdef O_NOATIME
+    fcntl_flags |= O_NOATIME;
+#endif
+
+    if ((raw_s->open_flags & ~fcntl_flags) == (s->open_flags & ~fcntl_flags)) {
+        /* dup the original fd */
+        /* TODO: use qemu fcntl wrapper */
+#ifdef F_DUPFD_CLOEXEC
+        raw_s->fd = fcntl(s->fd, F_DUPFD_CLOEXEC, 0);
+#else
+        raw_s->fd = dup(s->fd);
+        if (raw_s->fd != -1) {
+            qemu_set_cloexec(raw_s->fd);
+        }
+#endif
+        if (raw_s->fd >= 0) {
+            ret = fcntl_setfl(raw_s->fd, raw_s->open_flags);
+            if (ret) {
+                qemu_close(raw_s->fd);
+                raw_s->fd = -1;
+            }
+        }
+    }
+
+    /* If we cannot use fcntl, or fcntl failed, fall back to qemu_open() */
+    if (raw_s->fd == -1) {
+        assert(!(raw_s->open_flags & O_CREAT));
+        raw_s->fd = qemu_open(state->bs->filename, raw_s->open_flags);
+        if (raw_s->fd == -1) {
+            ret = -1;
+        }
+    }
+    return ret;
+}
+
+
+static void raw_reopen_commit(BDRVReopenState *state)
+{
+    BDRVRawReopenState *raw_s = state->opaque;
+    BDRVRawState *s = state->bs->opaque;
+
+    s->open_flags = raw_s->open_flags;
+
+    qemu_close(s->fd);
+    s->fd = raw_s->fd;
+#ifdef CONFIG_LINUX_AIO
+    s->use_aio = raw_s->use_aio;
+#endif
+
+    g_free(state->opaque);
+    state->opaque = NULL;
+}
+
+
+static void raw_reopen_abort(BDRVReopenState *state)
+{
+    BDRVRawReopenState *raw_s = state->opaque;
+
+     /* nothing to do if NULL, we didn't get far enough */
+    if (raw_s == NULL) {
+        return;
+    }
+
+    if (raw_s->fd >= 0) {
+        qemu_close(raw_s->fd);
+        raw_s->fd = -1;
+    }
+    g_free(state->opaque);
+    state->opaque = NULL;
+}
+
+
 /* XXX: use host sector size if necessary with:
 #ifdef DIOCGSECTORSIZE
        {
@@ -316,6 +447,275 @@ static int qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
    return 1;
 }

+static ssize_t handle_aiocb_ioctl(RawPosixAIOData *aiocb)
+{
+    int ret;
+
+    ret = ioctl(aiocb->aio_fildes, aiocb->aio_ioctl_cmd, aiocb->aio_ioctl_buf);
+    if (ret == -1) {
+        return -errno;
+    }
+
+    return 0;
+}
+
+static ssize_t handle_aiocb_flush(RawPosixAIOData *aiocb)
+{
+    int ret;
+
+    ret = qemu_fdatasync(aiocb->aio_fildes);
+    if (ret == -1) {
+        return -errno;
+    }
+    return 0;
+}
+
+#ifdef CONFIG_PREADV
+
+static bool preadv_present = true;
+
+static ssize_t
+qemu_preadv(int fd, const struct iovec *iov, int nr_iov, off_t offset)
+{
+    return preadv(fd, iov, nr_iov, offset);
+}
+
+static ssize_t
+qemu_pwritev(int fd, const struct iovec *iov, int nr_iov, off_t offset)
+{
+    return pwritev(fd, iov, nr_iov, offset);
+}
+
+#else
+
+static bool preadv_present = false;
+
+static ssize_t
+qemu_preadv(int fd, const struct iovec *iov, int nr_iov, off_t offset)
+{
+    return -ENOSYS;
+}
+
+static ssize_t
+qemu_pwritev(int fd, const struct iovec *iov, int nr_iov, off_t offset)
+{
+    return -ENOSYS;
+}
+
+#endif
+
+static ssize_t handle_aiocb_rw_vector(RawPosixAIOData *aiocb)
+{
+    ssize_t len;
+
+    do {
+        if (aiocb->aio_type & QEMU_AIO_WRITE)
+            len = qemu_pwritev(aiocb->aio_fildes,
+                               aiocb->aio_iov,
+                               aiocb->aio_niov,
+                               aiocb->aio_offset);
+         else
+            len = qemu_preadv(aiocb->aio_fildes,
+                              aiocb->aio_iov,
+                              aiocb->aio_niov,
+                              aiocb->aio_offset);
+    } while (len == -1 && errno == EINTR);
+
+    if (len == -1) {
+        return -errno;
+    }
+    return len;
+}
+
+/*
+ * Read/writes the data to/from a given linear buffer.
+ *
+ * Returns the number of bytes handles or -errno in case of an error. Short
+ * reads are only returned if the end of the file is reached.
+ */
+static ssize_t handle_aiocb_rw_linear(RawPosixAIOData *aiocb, char *buf)
+{
+    ssize_t offset = 0;
+    ssize_t len;
+
+    while (offset < aiocb->aio_nbytes) {
+        if (aiocb->aio_type & QEMU_AIO_WRITE) {
+            len = pwrite(aiocb->aio_fildes,
+                         (const char *)buf + offset,
+                         aiocb->aio_nbytes - offset,
+                         aiocb->aio_offset + offset);
+        } else {
+            len = pread(aiocb->aio_fildes,
+                        buf + offset,
+                        aiocb->aio_nbytes - offset,
+                        aiocb->aio_offset + offset);
+        }
+        if (len == -1 && errno == EINTR) {
+            continue;
+        } else if (len == -1) {
+            offset = -errno;
+            break;
+        } else if (len == 0) {
+            break;
+        }
+        offset += len;
+    }
+
+    return offset;
+}
+
+static ssize_t handle_aiocb_rw(RawPosixAIOData *aiocb)
+{
+    ssize_t nbytes;
+    char *buf;
+
+    if (!(aiocb->aio_type & QEMU_AIO_MISALIGNED)) {
+        /*
+         * If there is just a single buffer, and it is properly aligned
+         * we can just use plain pread/pwrite without any problems.
+         */
+        if (aiocb->aio_niov == 1) {
+             return handle_aiocb_rw_linear(aiocb, aiocb->aio_iov->iov_base);
+        }
+        /*
+         * We have more than one iovec, and all are properly aligned.
+         *
+         * Try preadv/pwritev first and fall back to linearizing the
+         * buffer if it's not supported.
+         */
+        if (preadv_present) {
+            nbytes = handle_aiocb_rw_vector(aiocb);
+            if (nbytes == aiocb->aio_nbytes ||
+                (nbytes < 0 && nbytes != -ENOSYS)) {
+                return nbytes;
+            }
+            preadv_present = false;
+        }
+
+        /*
+         * XXX(hch): short read/write.  no easy way to handle the reminder
+         * using these interfaces.  For now retry using plain
+         * pread/pwrite?
+         */
+    }
+
+    /*
+     * Ok, we have to do it the hard way, copy all segments into
+     * a single aligned buffer.
+     */
+    buf = qemu_blockalign(aiocb->bs, aiocb->aio_nbytes);
+    if (aiocb->aio_type & QEMU_AIO_WRITE) {
+        char *p = buf;
+        int i;
+
+        for (i = 0; i < aiocb->aio_niov; ++i) {
+            memcpy(p, aiocb->aio_iov[i].iov_base, aiocb->aio_iov[i].iov_len);
+            p += aiocb->aio_iov[i].iov_len;
+        }
+    }
+
+    nbytes = handle_aiocb_rw_linear(aiocb, buf);
+    if (!(aiocb->aio_type & QEMU_AIO_WRITE)) {
+        char *p = buf;
+        size_t count = aiocb->aio_nbytes, copy;
+        int i;
+
+        for (i = 0; i < aiocb->aio_niov && count; ++i) {
+            copy = count;
+            if (copy > aiocb->aio_iov[i].iov_len) {
+                copy = aiocb->aio_iov[i].iov_len;
+            }
+            memcpy(aiocb->aio_iov[i].iov_base, p, copy);
+            p     += copy;
+            count -= copy;
+        }
+    }
+    qemu_vfree(buf);
+
+    return nbytes;
+}
+
+static int aio_worker(void *arg)
+{
+    RawPosixAIOData *aiocb = arg;
+    ssize_t ret = 0;
+
+    switch (aiocb->aio_type & QEMU_AIO_TYPE_MASK) {
+    case QEMU_AIO_READ:
+        ret = handle_aiocb_rw(aiocb);
+        if (ret >= 0 && ret < aiocb->aio_nbytes && aiocb->bs->growable) {
+            iov_memset(aiocb->aio_iov, aiocb->aio_niov, ret,
+                      0, aiocb->aio_nbytes - ret);
+
+            ret = aiocb->aio_nbytes;
+        }
+        if (ret == aiocb->aio_nbytes) {
+            ret = 0;
+        } else if (ret >= 0 && ret < aiocb->aio_nbytes) {
+            ret = -EINVAL;
+        }
+        break;
+    case QEMU_AIO_WRITE:
+        ret = handle_aiocb_rw(aiocb);
+        if (ret == aiocb->aio_nbytes) {
+            ret = 0;
+        } else if (ret >= 0 && ret < aiocb->aio_nbytes) {
+            ret = -EINVAL;
+        }
+        break;
+    case QEMU_AIO_FLUSH:
+        ret = handle_aiocb_flush(aiocb);
+        break;
+    case QEMU_AIO_IOCTL:
+        ret = handle_aiocb_ioctl(aiocb);
+        break;
+    default:
+        fprintf(stderr, "invalid aio request (0x%x)\n", aiocb->aio_type);
+        ret = -EINVAL;
+        break;
+    }
+
+    g_slice_free(RawPosixAIOData, aiocb);
+    return ret;
+}
+
+static BlockDriverAIOCB *paio_submit(BlockDriverState *bs, int fd,
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+        BlockDriverCompletionFunc *cb, void *opaque, int type)
+{
+    RawPosixAIOData *acb = g_slice_new(RawPosixAIOData);
+
+    acb->bs = bs;
+    acb->aio_type = type;
+    acb->aio_fildes = fd;
+
+    if (qiov) {
+        acb->aio_iov = qiov->iov;
+        acb->aio_niov = qiov->niov;
+    }
+    acb->aio_nbytes = nb_sectors * 512;
+    acb->aio_offset = sector_num * 512;
+
+    trace_paio_submit(acb, opaque, sector_num, nb_sectors, type);
+    return thread_pool_submit_aio(aio_worker, acb, cb, opaque);
+}
+
+static BlockDriverAIOCB *paio_ioctl(BlockDriverState *bs, int fd,
+        unsigned long int req, void *buf,
+        BlockDriverCompletionFunc *cb, void *opaque)
+{
+    RawPosixAIOData *acb = g_slice_new(RawPosixAIOData);
+
+    acb->bs = bs;
+    acb->aio_type = QEMU_AIO_IOCTL;
+    acb->aio_fildes = fd;
+    acb->aio_offset = 0;
+    acb->aio_ioctl_buf = buf;
+    acb->aio_ioctl_cmd = req;
+
+    return thread_pool_submit_aio(aio_worker, acb, cb, opaque);
+}
+
 static BlockDriverAIOCB *raw_aio_submit(BlockDriverState *bs,
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
        BlockDriverCompletionFunc *cb, void *opaque, int type)
@@ -330,7 +730,7 @@ static BlockDriverAIOCB *raw_aio_submit(BlockDriverState *bs,
     * boundary.  Check if this is the case or tell the low-level
     * driver that it needs to copy the buffer.
     */
-    if (s->aligned_buf) {
+    if ((bs->open_flags & BDRV_O_NOCACHE)) {
        if (!qiov_is_aligned(bs, qiov)) {
            type |= QEMU_AIO_MISALIGNED;
 #ifdef CONFIG_LINUX_AIO
@@ -378,8 +778,6 @@ static void raw_close(BlockDriverState *bs)
    if (s->fd >= 0) {
        qemu_close(s->fd);
        s->fd = -1;
-        if (s->aligned_buf != NULL)
-            qemu_vfree(s->aligned_buf);
    }
 }

@@ -735,6 +1133,9 @@ static BlockDriver bdrv_file = {
    .instance_size = sizeof(BDRVRawState),
    .bdrv_probe = NULL, /* no probe for protocols */
    .bdrv_file_open = raw_open,
+    .bdrv_reopen_prepare = raw_reopen_prepare,
+    .bdrv_reopen_commit = raw_reopen_commit,
+    .bdrv_reopen_abort = raw_reopen_abort,
    .bdrv_close = raw_close,
    .bdrv_create = raw_create,
    .bdrv_co_discard = raw_co_discard,
@@ -1004,6 +1405,9 @@ static BlockDriver bdrv_host_device = {
    .bdrv_probe_device  = hdev_probe_device,
    .bdrv_file_open     = hdev_open,
    .bdrv_close         = raw_close,
+    .bdrv_reopen_prepare = raw_reopen_prepare,
+    .bdrv_reopen_commit  = raw_reopen_commit,
+    .bdrv_reopen_abort   = raw_reopen_abort,
    .bdrv_create        = hdev_create,
    .create_options     = raw_create_options,
    .bdrv_has_zero_init = hdev_has_zero_init,
@@ -1125,6 +1529,9 @@ static BlockDriver bdrv_host_floppy = {
    .bdrv_probe_device	= floppy_probe_device,
    .bdrv_file_open     = floppy_open,
    .bdrv_close         = raw_close,
+    .bdrv_reopen_prepare = raw_reopen_prepare,
+    .bdrv_reopen_commit  = raw_reopen_commit,
+    .bdrv_reopen_abort   = raw_reopen_abort,
    .bdrv_create        = hdev_create,
    .create_options     = raw_create_options,
    .bdrv_has_zero_init = hdev_has_zero_init,
@@ -1224,6 +1631,9 @@ static BlockDriver bdrv_host_cdrom = {
    .bdrv_probe_device	= cdrom_probe_device,
    .bdrv_file_open     = cdrom_open,
    .bdrv_close         = raw_close,
+    .bdrv_reopen_prepare = raw_reopen_prepare,
+    .bdrv_reopen_commit  = raw_reopen_commit,
+    .bdrv_reopen_abort   = raw_reopen_abort,
    .bdrv_create        = hdev_create,
    .create_options     = raw_create_options,
    .bdrv_has_zero_init = hdev_has_zero_init,
@@ -1343,6 +1753,9 @@ static BlockDriver bdrv_host_cdrom = {
    .bdrv_probe_device	= cdrom_probe_device,
    .bdrv_file_open     = cdrom_open,
    .bdrv_close         = raw_close,
+    .bdrv_reopen_prepare = raw_reopen_prepare,
+    .bdrv_reopen_commit  = raw_reopen_commit,
+    .bdrv_reopen_abort   = raw_reopen_abort,
    .bdrv_create        = hdev_create,
    .create_options     = raw_create_options,
    .bdrv_has_zero_init = hdev_has_zero_init,
--- a/block/raw-win32.c
+++ b/block/raw-win32.c
@@ -25,6 +25,10 @@
 #include "qemu-timer.h"
 #include "block_int.h"
 #include "module.h"
+#include "raw-aio.h"
+#include "trace.h"
+#include "thread-pool.h"
+#include "iov.h"
 #include <windows.h>
 #include <winioctl.h>

@@ -32,12 +36,130 @@
 #define FTYPE_CD     1
 #define FTYPE_HARDDISK 2

+static QEMUWin32AIOState *aio;
+
+typedef struct RawWin32AIOData {
+    BlockDriverState *bs;
+    HANDLE hfile;
+    struct iovec *aio_iov;
+    int aio_niov;
+    size_t aio_nbytes;
+    off64_t aio_offset;
+    int aio_type;
+} RawWin32AIOData;
+
 typedef struct BDRVRawState {
    HANDLE hfile;
    int type;
    char drive_path[16]; /* format: "d:\" */
+    QEMUWin32AIOState *aio;
 } BDRVRawState;

+/*
+ * Read/writes the data to/from a given linear buffer.
+ *
+ * Returns the number of bytes handles or -errno in case of an error. Short
+ * reads are only returned if the end of the file is reached.
+ */
+static size_t handle_aiocb_rw(RawWin32AIOData *aiocb)
+{
+    size_t offset = 0;
+    int i;
+
+    for (i = 0; i < aiocb->aio_niov; i++) {
+        OVERLAPPED ov;
+        DWORD ret, ret_count, len;
+
+        memset(&ov, 0, sizeof(ov));
+        ov.Offset = (aiocb->aio_offset + offset);
+        ov.OffsetHigh = (aiocb->aio_offset + offset) >> 32;
+        len = aiocb->aio_iov[i].iov_len;
+        if (aiocb->aio_type & QEMU_AIO_WRITE) {
+            ret = WriteFile(aiocb->hfile, aiocb->aio_iov[i].iov_base,
+                            len, &ret_count, &ov);
+        } else {
+            ret = ReadFile(aiocb->hfile, aiocb->aio_iov[i].iov_base,
+                           len, &ret_count, &ov);
+        }
+        if (!ret) {
+            ret_count = 0;
+        }
+        if (ret_count != len) {
+            break;
+        }
+        offset += len;
+    }
+
+    return offset;
+}
+
+static int aio_worker(void *arg)
+{
+    RawWin32AIOData *aiocb = arg;
+    ssize_t ret = 0;
+    size_t count;
+
+    switch (aiocb->aio_type & QEMU_AIO_TYPE_MASK) {
+    case QEMU_AIO_READ:
+        count = handle_aiocb_rw(aiocb);
+        if (count < aiocb->aio_nbytes && aiocb->bs->growable) {
+            /* A short read means that we have reached EOF. Pad the buffer
+             * with zeros for bytes after EOF. */
+            iov_memset(aiocb->aio_iov, aiocb->aio_niov, count,
+                      0, aiocb->aio_nbytes - count);
+
+            count = aiocb->aio_nbytes;
+        }
+        if (count == aiocb->aio_nbytes) {
+            ret = 0;
+        } else {
+            ret = -EINVAL;
+        }
+        break;
+    case QEMU_AIO_WRITE:
+        count = handle_aiocb_rw(aiocb);
+        if (count == aiocb->aio_nbytes) {
+            count = 0;
+        } else {
+            count = -EINVAL;
+        }
+        break;
+    case QEMU_AIO_FLUSH:
+        if (!FlushFileBuffers(aiocb->hfile)) {
+            return -EIO;
+        }
+        break;
+    default:
+        fprintf(stderr, "invalid aio request (0x%x)\n", aiocb->aio_type);
+        ret = -EINVAL;
+        break;
+    }
+
+    g_slice_free(RawWin32AIOData, aiocb);
+    return ret;
+}
+
+static BlockDriverAIOCB *paio_submit(BlockDriverState *bs, HANDLE hfile,
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+        BlockDriverCompletionFunc *cb, void *opaque, int type)
+{
+    RawWin32AIOData *acb = g_slice_new(RawWin32AIOData);
+
+    acb->bs = bs;
+    acb->hfile = hfile;
+    acb->aio_type = type;
+
+    if (qiov) {
+        acb->aio_iov = qiov->iov;
+        acb->aio_niov = qiov->niov;
+    }
+    acb->aio_nbytes = nb_sectors * 512;
+    acb->aio_offset = sector_num * 512;
+
+    trace_paio_submit(acb, opaque, sector_num, nb_sectors, type);
+    return thread_pool_submit_aio(aio_worker, acb, cb, opaque);
+}
+
 int qemu_ftruncate64(int fd, int64_t length)
 {
    LARGE_INTEGER li;
@@ -77,6 +199,26 @@ static int set_sparse(int fd)
 				 NULL, 0, NULL, 0, &returned, NULL);
 }

+static void raw_parse_flags(int flags, int *access_flags, DWORD *overlapped)
+{
+    assert(access_flags != NULL);
+    assert(overlapped != NULL);
+
+    if (flags & BDRV_O_RDWR) {
+        *access_flags = GENERIC_READ | GENERIC_WRITE;
+    } else {
+        *access_flags = GENERIC_READ;
+    }
+
+    *overlapped = FILE_ATTRIBUTE_NORMAL;
+    if (flags & BDRV_O_NATIVE_AIO) {
+        *overlapped |= FILE_FLAG_OVERLAPPED;
+    }
+    if (flags & BDRV_O_NOCACHE) {
+        *overlapped |= FILE_FLAG_NO_BUFFERING;
+    }
+}
+
 static int raw_open(BlockDriverState *bs, const char *filename, int flags)
 {
    BDRVRawState *s = bs->opaque;
@@ -85,17 +227,15 @@ static int raw_open(BlockDriverState *bs, const char *filename, int flags)

    s->type = FTYPE_FILE;

-    if (flags & BDRV_O_RDWR) {
-        access_flags = GENERIC_READ | GENERIC_WRITE;
-    } else {
-        access_flags = GENERIC_READ;
+    raw_parse_flags(flags, &access_flags, &overlapped);
+    
+    if ((flags & BDRV_O_NATIVE_AIO) && aio == NULL) {
+        aio = win32_aio_init();
+        if (aio == NULL) {
+            return -EINVAL;
+        }
    }

-    overlapped = FILE_ATTRIBUTE_NORMAL;
-    if (flags & BDRV_O_NOCACHE)
-        overlapped |= FILE_FLAG_NO_BUFFERING;
-    if (!(flags & BDRV_O_CACHE_WB))
-        overlapped |= FILE_FLAG_WRITE_THROUGH;
    s->hfile = CreateFile(filename, access_flags,
                          FILE_SHARE_READ, NULL,
                          OPEN_EXISTING, overlapped, NULL);
@@ -104,64 +244,53 @@ static int raw_open(BlockDriverState *bs, const char *filename, int flags)

        if (err == ERROR_ACCESS_DENIED)
            return -EACCES;
-        return -1;
+        return -EINVAL;
+    }
+
+    if (flags & BDRV_O_NATIVE_AIO) {
+        int ret = win32_aio_attach(aio, s->hfile);
+        if (ret < 0) {
+            CloseHandle(s->hfile);
+            return ret;
+        }
+        s->aio = aio;
    }
    return 0;
 }

-static int raw_read(BlockDriverState *bs, int64_t sector_num,
-                    uint8_t *buf, int nb_sectors)
+static BlockDriverAIOCB *raw_aio_readv(BlockDriverState *bs,
+                         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+                         BlockDriverCompletionFunc *cb, void *opaque)
 {
    BDRVRawState *s = bs->opaque;
-    OVERLAPPED ov;
-    DWORD ret_count;
-    int ret;
-    int64_t offset = sector_num * 512;
-    int count = nb_sectors * 512;
-
-    memset(&ov, 0, sizeof(ov));
-    ov.Offset = offset;
-    ov.OffsetHigh = offset >> 32;
-    ret = ReadFile(s->hfile, buf, count, &ret_count, &ov);
-    if (!ret)
-        return ret_count;
-    if (ret_count == count)
-        ret_count = 0;
-    return ret_count;
-}
-
-static int raw_write(BlockDriverState *bs, int64_t sector_num,
-                     const uint8_t *buf, int nb_sectors)
-{
-    BDRVRawState *s = bs->opaque;
-    OVERLAPPED ov;
-    DWORD ret_count;
-    int ret;
-    int64_t offset = sector_num * 512;
-    int count = nb_sectors * 512;
-
-    memset(&ov, 0, sizeof(ov));
-    ov.Offset = offset;
-    ov.OffsetHigh = offset >> 32;
-    ret = WriteFile(s->hfile, buf, count, &ret_count, &ov);
-    if (!ret)
-        return ret_count;
-    if (ret_count == count)
-        ret_count = 0;
-    return ret_count;
-}
-
-static int raw_flush(BlockDriverState *bs)
-{
-    BDRVRawState *s = bs->opaque;
-    int ret;
-
-    ret = FlushFileBuffers(s->hfile);
-    if (ret == 0) {
-        return -EIO;
+    if (s->aio) {
+        return win32_aio_submit(bs, s->aio, s->hfile, sector_num, qiov,
+                                nb_sectors, cb, opaque, QEMU_AIO_READ); 
+    } else {
+        return paio_submit(bs, s->hfile, sector_num, qiov, nb_sectors,
+                           cb, opaque, QEMU_AIO_READ);
    }
+}

-    return 0;
+static BlockDriverAIOCB *raw_aio_writev(BlockDriverState *bs,
+                          int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+                          BlockDriverCompletionFunc *cb, void *opaque)
+{
+    BDRVRawState *s = bs->opaque;
+    if (s->aio) {
+        return win32_aio_submit(bs, s->aio, s->hfile, sector_num, qiov,
+                                nb_sectors, cb, opaque, QEMU_AIO_WRITE); 
+    } else {
+        return paio_submit(bs, s->hfile, sector_num, qiov, nb_sectors,
+                           cb, opaque, QEMU_AIO_WRITE);
+    }
+}
+
+static BlockDriverAIOCB *raw_aio_flush(BlockDriverState *bs,
+                         BlockDriverCompletionFunc *cb, void *opaque)
+{
+    BDRVRawState *s = bs->opaque;
+    return paio_submit(bs, s->hfile, 0, NULL, 0, cb, opaque, QEMU_AIO_FLUSH);
 }

 static void raw_close(BlockDriverState *bs)
@@ -282,9 +411,9 @@ static BlockDriver bdrv_file = {
    .bdrv_close		= raw_close,
    .bdrv_create	= raw_create,

-    .bdrv_read              = raw_read,
-    .bdrv_write             = raw_write,
-    .bdrv_co_flush_to_disk  = raw_flush,
+    .bdrv_aio_readv     = raw_aio_readv,
+    .bdrv_aio_writev    = raw_aio_writev,
+    .bdrv_aio_flush     = raw_aio_flush,

    .bdrv_truncate	= raw_truncate,
    .bdrv_getlength	= raw_getlength,
@@ -374,18 +503,10 @@ static int hdev_open(BlockDriverState *bs, const char *filename, int flags)
    }
    s->type = find_device_type(bs, filename);

-    if (flags & BDRV_O_RDWR) {
-        access_flags = GENERIC_READ | GENERIC_WRITE;
-    } else {
-        access_flags = GENERIC_READ;
-    }
+    raw_parse_flags(flags, &access_flags, &overlapped);
+
    create_flags = OPEN_EXISTING;

-    overlapped = FILE_ATTRIBUTE_NORMAL;
-    if (flags & BDRV_O_NOCACHE)
-        overlapped |= FILE_FLAG_NO_BUFFERING;
-    if (!(flags & BDRV_O_CACHE_WB))
-        overlapped |= FILE_FLAG_WRITE_THROUGH;
    s->hfile = CreateFile(filename, access_flags,
                          FILE_SHARE_READ, NULL,
                          create_flags, overlapped, NULL);
@@ -413,9 +534,9 @@ static BlockDriver bdrv_host_device = {
    .bdrv_close		= raw_close,
    .bdrv_has_zero_init = hdev_has_zero_init,

-    .bdrv_read              = raw_read,
-    .bdrv_write             = raw_write,
-    .bdrv_co_flush_to_disk  = raw_flush,
+    .bdrv_aio_readv     = raw_aio_readv,
+    .bdrv_aio_writev    = raw_aio_writev,
+    .bdrv_aio_flush     = raw_aio_flush,

    .bdrv_getlength	= raw_getlength,
    .bdrv_get_allocated_file_size
--- a/block/raw.c
+++ b/block/raw.c
@@ -9,6 +9,14 @@ static int raw_open(BlockDriverState *bs, int flags)
    return 0;
 }

+/* We have nothing to do for raw reopen, stubs just return
+ * success */
+static int raw_reopen_prepare(BDRVReopenState *state,
+                              BlockReopenQueue *queue,  Error **errp)
+{
+    return 0;
+}
+
 static int coroutine_fn raw_co_readv(BlockDriverState *bs, int64_t sector_num,
                                     int nb_sectors, QEMUIOVector *qiov)
 {
@@ -115,6 +123,8 @@ static BlockDriver bdrv_raw = {
    .bdrv_open          = raw_open,
    .bdrv_close         = raw_close,

+    .bdrv_reopen_prepare  = raw_reopen_prepare,
+
    .bdrv_co_readv          = raw_co_readv,
    .bdrv_co_writev         = raw_co_writev,
    .bdrv_co_is_allocated   = raw_co_is_allocated,
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -69,7 +69,7 @@ typedef enum {
 typedef struct RBDAIOCB {
    BlockDriverAIOCB common;
    QEMUBH *bh;
-    int ret;
+    int64_t ret;
    QEMUIOVector *qiov;
    char *bounce;
    RBDAIOCmd cmd;
@@ -86,7 +86,7 @@ typedef struct RADOSCB {
    int done;
    int64_t size;
    char *buf;
-    int ret;
+    int64_t ret;
 } RADOSCB;

 #define RBD_FD_READ 0
@@ -487,12 +487,6 @@ static int qemu_rbd_open(BlockDriverState *bs, const char *filename, int flags)
        rados_conf_set(s->cluster, "rbd_cache", "false");
    } else {
        rados_conf_set(s->cluster, "rbd_cache", "true");
-        if (!(flags & BDRV_O_CACHE_WB)) {
-            r = rados_conf_set(s->cluster, "rbd_cache_max_dirty", "0");
-            if (r < 0) {
-                rados_conf_set(s->cluster, "rbd_cache", "false");
-            }
-        }
    }

    if (strstr(conf, "conf=") == NULL) {
@@ -576,7 +570,7 @@ static void qemu_rbd_aio_cancel(BlockDriverAIOCB *blockacb)
    acb->cancelled = 1;
 }

-static AIOPool rbd_aio_pool = {
+static const AIOCBInfo rbd_aiocb_info = {
    .aiocb_size = sizeof(RBDAIOCB),
    .cancel = qemu_rbd_aio_cancel,
 };
@@ -678,7 +672,7 @@ static BlockDriverAIOCB *rbd_start_aio(BlockDriverState *bs,

    BDRVRBDState *s = bs->opaque;

-    acb = qemu_aio_get(&rbd_aio_pool, bs, cb, opaque);
+    acb = qemu_aio_get(&rbd_aiocb_info, bs, cb, opaque);
    acb->cmd = cmd;
    acb->qiov = qiov;
    if (cmd == RBD_AIO_DISCARD) {
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -201,12 +201,12 @@ static inline uint64_t fnv_64a_buf(void *buf, size_t len, uint64_t hval)
    return hval;
 }

-static inline int is_data_obj_writable(SheepdogInode *inode, unsigned int idx)
+static inline bool is_data_obj_writable(SheepdogInode *inode, unsigned int idx)
 {
    return inode->vdi_id == inode->data_vdi_id[idx];
 }

-static inline int is_data_obj(uint64_t oid)
+static inline bool is_data_obj(uint64_t oid)
 {
    return !(VDI_BIT & oid);
 }
@@ -231,7 +231,7 @@ static inline uint64_t vid_to_data_oid(uint32_t vid, uint32_t idx)
    return ((uint64_t)vid << VDI_SPACE_SHIFT) | idx;
 }

-static inline int is_snapshot(struct SheepdogInode *inode)
+static inline bool is_snapshot(struct SheepdogInode *inode)
 {
    return !!inode->snap_ctime;
 }
@@ -281,7 +281,7 @@ struct SheepdogAIOCB {
    Coroutine *coroutine;
    void (*aio_done_func)(SheepdogAIOCB *);

-    int canceled;
+    bool canceled;
    int nr_pending;
 };

@@ -292,8 +292,8 @@ typedef struct BDRVSheepdogState {
    uint32_t max_dirty_data_idx;

    char name[SD_MAX_VDI_LEN];
-    int is_snapshot;
-    uint8_t cache_enabled;
+    bool is_snapshot;
+    bool cache_enabled;

    char *addr;
    char *port;
@@ -417,10 +417,10 @@ static void sd_aio_cancel(BlockDriverAIOCB *blockacb)
     */
    acb->ret = -EIO;
    qemu_coroutine_enter(acb->coroutine, NULL);
-    acb->canceled = 1;
+    acb->canceled = true;
 }

-static AIOPool sd_aio_pool = {
+static const AIOCBInfo sd_aiocb_info = {
    .aiocb_size = sizeof(SheepdogAIOCB),
    .cancel = sd_aio_cancel,
 };
@@ -431,7 +431,7 @@ static SheepdogAIOCB *sd_aio_setup(BlockDriverState *bs, QEMUIOVector *qiov,
 {
    SheepdogAIOCB *acb;

-    acb = qemu_aio_get(&sd_aio_pool, bs, cb, opaque);
+    acb = qemu_aio_get(&sd_aiocb_info, bs, cb, opaque);

    acb->qiov = qiov;

@@ -439,7 +439,7 @@ static SheepdogAIOCB *sd_aio_setup(BlockDriverState *bs, QEMUIOVector *qiov,
    acb->nb_sectors = nb_sectors;

    acb->aio_done_func = NULL;
-    acb->canceled = 0;
+    acb->canceled = false;
    acb->coroutine = qemu_coroutine_self();
    acb->ret = 0;
    acb->nr_pending = 0;
@@ -613,7 +613,7 @@ static int do_req(int sockfd, SheepdogReq *hdr, void *data,
 }

 static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
-                           struct iovec *iov, int niov, int create,
+                           struct iovec *iov, int niov, bool create,
                           enum AIOCBState aiocb_type);


@@ -646,7 +646,7 @@ static void coroutine_fn send_pending_req(BDRVSheepdogState *s, uint64_t oid)
        QLIST_REMOVE(aio_req, aio_siblings);
        QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings);
        ret = add_aio_request(s, aio_req, acb->qiov->iov,
-                              acb->qiov->niov, 0, acb->aiocb_type);
+                              acb->qiov->niov, false, acb->aiocb_type);
        if (ret < 0) {
            error_report("add_aio_request is failed");
            free_aio_req(s, aio_req);
@@ -866,14 +866,14 @@ static int parse_vdiname(BDRVSheepdogState *s, const char *filename,
        s->port = 0;
    }

-    strncpy(vdi, p, SD_MAX_VDI_LEN);
+    pstrcpy(vdi, SD_MAX_VDI_LEN, p);

    p = strchr(vdi, ':');
    if (p) {
        *p++ = '\0';
        *snapid = strtoul(p, NULL, 10);
        if (*snapid == 0) {
-            strncpy(tag, p, SD_MAX_VDI_TAG_LEN);
+            pstrcpy(tag, SD_MAX_VDI_TAG_LEN, p);
        }
    } else {
        *snapid = CURRENT_VDI_ID; /* search current vdi */
@@ -900,7 +900,10 @@ static int find_vdi_name(BDRVSheepdogState *s, char *filename, uint32_t snapid,
        return fd;
    }

-    memset(buf, 0, sizeof(buf));
+    /* This pair of strncpy calls ensures that the buffer is zero-filled,
+     * which is desirable since we'll soon be sending those bytes, and
+     * don't want the send_req to read uninitialized data.
+     */
    strncpy(buf, filename, SD_MAX_VDI_LEN);
    strncpy(buf + SD_MAX_VDI_LEN, tag, SD_MAX_VDI_TAG_LEN);

@@ -940,7 +943,7 @@ out:
 }

 static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
-                           struct iovec *iov, int niov, int create,
+                           struct iovec *iov, int niov, bool create,
                           enum AIOCBState aiocb_type)
 {
    int nr_copies = s->inode.nr_copies;
@@ -1019,7 +1022,7 @@ static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,

 static int read_write_object(int fd, char *buf, uint64_t oid, int copies,
                             unsigned int datalen, uint64_t offset,
-                             int write, int create, uint8_t cache)
+                             bool write, bool create, bool cache)
 {
    SheepdogObjReq hdr;
    SheepdogObjRsp *rsp = (SheepdogObjRsp *)&hdr;
@@ -1068,18 +1071,18 @@ static int read_write_object(int fd, char *buf, uint64_t oid, int copies,
 }

 static int read_object(int fd, char *buf, uint64_t oid, int copies,
-                       unsigned int datalen, uint64_t offset, uint8_t cache)
+                       unsigned int datalen, uint64_t offset, bool cache)
 {
-    return read_write_object(fd, buf, oid, copies, datalen, offset, 0, 0,
-                             cache);
+    return read_write_object(fd, buf, oid, copies, datalen, offset, false,
+                             false, cache);
 }

 static int write_object(int fd, char *buf, uint64_t oid, int copies,
-                        unsigned int datalen, uint64_t offset, int create,
-                        uint8_t cache)
+                        unsigned int datalen, uint64_t offset, bool create,
+                        bool cache)
 {
-    return read_write_object(fd, buf, oid, copies, datalen, offset, 1, create,
-                             cache);
+    return read_write_object(fd, buf, oid, copies, datalen, offset, true,
+                             create, cache);
 }

 static int sd_open(BlockDriverState *bs, const char *filename, int flags)
@@ -1114,19 +1117,17 @@ static int sd_open(BlockDriverState *bs, const char *filename, int flags)
        goto out;
    }

-    if (flags & BDRV_O_CACHE_WB) {
-        s->cache_enabled = 1;
-        s->flush_fd = connect_to_sdog(s->addr, s->port);
-        if (s->flush_fd < 0) {
-            error_report("failed to connect");
-            ret = s->flush_fd;
-            goto out;
-        }
+    s->cache_enabled = true;
+    s->flush_fd = connect_to_sdog(s->addr, s->port);
+    if (s->flush_fd < 0) {
+        error_report("failed to connect");
+        ret = s->flush_fd;
+        goto out;
    }

    if (snapid || tag[0] != '\0') {
        dprintf("%" PRIx32 " snapshot inode was open.\n", vid);
-        s->is_snapshot = 1;
+        s->is_snapshot = true;
    }

    fd = connect_to_sdog(s->addr, s->port);
@@ -1151,7 +1152,7 @@ static int sd_open(BlockDriverState *bs, const char *filename, int flags)
    s->max_dirty_data_idx = 0;

    bs->total_sectors = s->inode.vdi_size / SECTOR_SIZE;
-    strncpy(s->name, vdi, sizeof(s->name));
+    pstrcpy(s->name, sizeof(s->name), vdi);
    qemu_co_mutex_init(&s->lock);
    g_free(buf);
    return 0;
@@ -1179,8 +1180,11 @@ static int do_sd_create(char *filename, int64_t vdi_size,
        return fd;
    }

+    /* FIXME: would it be better to fail (e.g., return -EIO) when filename
+     * does not fit in buf?  For now, just truncate and avoid buffer overrun.
+     */
    memset(buf, 0, sizeof(buf));
-    strncpy(buf, filename, SD_MAX_VDI_LEN);
+    pstrcpy(buf, sizeof(buf), filename);

    memset(&hdr, 0, sizeof(hdr));
    hdr.opcode = SD_OP_NEW_VDI;
@@ -1266,7 +1270,7 @@ static int sd_create(const char *filename, QEMUOptionParameter *options)
    BDRVSheepdogState *s;
    char vdi[SD_MAX_VDI_LEN], tag[SD_MAX_VDI_TAG_LEN];
    uint32_t snapid;
-    int prealloc = 0;
+    bool prealloc = false;
    const char *vdiname;

    s = g_malloc0(sizeof(BDRVSheepdogState));
@@ -1288,9 +1292,9 @@ static int sd_create(const char *filename, QEMUOptionParameter *options)
            backing_file = options->value.s;
        } else if (!strcmp(options->name, BLOCK_OPT_PREALLOC)) {
            if (!options->value.s || !strcmp(options->value.s, "off")) {
-                prealloc = 0;
+                prealloc = false;
            } else if (!strcmp(options->value.s, "full")) {
-                prealloc = 1;
+                prealloc = true;
            } else {
                error_report("Invalid preallocation mode: '%s'",
                             options->value.s);
@@ -1418,7 +1422,7 @@ static int sd_truncate(BlockDriverState *bs, int64_t offset)
    datalen = SD_INODE_SIZE - sizeof(s->inode.data_vdi_id);
    s->inode.vdi_size = offset;
    ret = write_object(fd, (char *)&s->inode, vid_to_vdi_oid(s->inode.vdi_id),
-                       s->inode.nr_copies, datalen, 0, 0, s->cache_enabled);
+                       s->inode.nr_copies, datalen, 0, false, s->cache_enabled);
    close(fd);

    if (ret < 0) {
@@ -1457,7 +1461,7 @@ static void coroutine_fn sd_write_done(SheepdogAIOCB *acb)
        aio_req = alloc_aio_req(s, acb, vid_to_vdi_oid(s->inode.vdi_id),
                                data_len, offset, 0, 0, offset);
        QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings);
-        ret = add_aio_request(s, aio_req, &iov, 1, 0, AIOCB_WRITE_UDATA);
+        ret = add_aio_request(s, aio_req, &iov, 1, false, AIOCB_WRITE_UDATA);
        if (ret) {
            free_aio_req(s, aio_req);
            acb->ret = -EIO;
@@ -1511,7 +1515,7 @@ static int sd_create_branch(BDRVSheepdogState *s)

    memcpy(&s->inode, buf, sizeof(s->inode));

-    s->is_snapshot = 0;
+    s->is_snapshot = false;
    ret = 0;
    dprintf("%" PRIx32 " was newly created.\n", s->inode.vdi_id);

@@ -1566,7 +1570,7 @@ static int coroutine_fn sd_co_rw_vector(void *p)
    while (done != total) {
        uint8_t flags = 0;
        uint64_t old_oid = 0;
-        int create = 0;
+        bool create = false;

        oid = vid_to_data_oid(inode->data_vdi_id[idx], idx);

@@ -1581,10 +1585,10 @@ static int coroutine_fn sd_co_rw_vector(void *p)
            break;
        case AIOCB_WRITE_UDATA:
            if (!inode->data_vdi_id[idx]) {
-                create = 1;
+                create = true;
            } else if (!is_data_obj_writable(inode, idx)) {
                /* Copy-On-Write */
-                create = 1;
+                create = true;
                old_oid = oid;
                flags = SD_FLAG_CMD_COW;
            }
@@ -1718,7 +1722,7 @@ static int coroutine_fn sd_co_flush_to_disk(BlockDriverState *bs)
    if (rsp->result == SD_RES_INVALID_PARMS) {
        dprintf("disable write cache since the server doesn't support it\n");

-        s->cache_enabled = 0;
+        s->cache_enabled = false;
        closesocket(s->flush_fd);
        return 0;
    }
@@ -1754,6 +1758,9 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)

    s->inode.vm_state_size = sn_info->vm_state_size;
    s->inode.vm_clock_nsec = sn_info->vm_clock_nsec;
+    /* It appears that inode.tag does not require a NUL terminator,
+     * which means this use of strncpy is ok.
+     */
    strncpy(s->inode.tag, sn_info->name, sizeof(s->inode.tag));
    /* we don't need to update entire object */
    datalen = SD_INODE_SIZE - sizeof(s->inode.data_vdi_id);
@@ -1766,7 +1773,7 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
    }

    ret = write_object(fd, (char *)&s->inode, vid_to_vdi_oid(s->inode.vdi_id),
-                       s->inode.nr_copies, datalen, 0, 0, s->cache_enabled);
+                       s->inode.nr_copies, datalen, 0, false, s->cache_enabled);
    if (ret < 0) {
        error_report("failed to write snapshot's inode.");
        goto cleanup;
@@ -1813,13 +1820,13 @@ static int sd_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)

    memcpy(old_s, s, sizeof(BDRVSheepdogState));

-    memset(vdi, 0, sizeof(vdi));
-    strncpy(vdi, s->name, sizeof(vdi));
+    pstrcpy(vdi, sizeof(vdi), s->name);

-    memset(tag, 0, sizeof(tag));
    snapid = strtoul(snapshot_id, NULL, 10);
-    if (!snapid) {
-        strncpy(tag, s->name, sizeof(tag));
+    if (snapid) {
+        tag[0] = 0;
+    } else {
+        pstrcpy(tag, sizeof(tag), s->name);
    }

    ret = find_vdi_name(s, vdi, snapid, tag, &vid, 1);
@@ -1853,7 +1860,7 @@ static int sd_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
        goto out;
    }

-    s->is_snapshot = 1;
+    s->is_snapshot = true;

    g_free(buf);
    g_free(old_s);
@@ -1948,8 +1955,9 @@ static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)

            snprintf(sn_tab[found].id_str, sizeof(sn_tab[found].id_str), "%u",
                     inode.snap_id);
-            strncpy(sn_tab[found].name, inode.tag,
-                    MIN(sizeof(sn_tab[found].name), sizeof(inode.tag)));
+            pstrcpy(sn_tab[found].name,
+                    MIN(sizeof(sn_tab[found].name), sizeof(inode.tag)),
+                    inode.tag);
            found++;
        }
    }
@@ -1970,8 +1978,8 @@ out:
 static int do_load_save_vmstate(BDRVSheepdogState *s, uint8_t *data,
                                int64_t pos, int size, int load)
 {
-    int fd, create;
-    int ret = 0, remaining = size;
+    bool create;
+    int fd, ret = 0, remaining = size;
    unsigned int data_len;
    uint64_t vmstate_oid;
    uint32_t vdi_index;
@@ -1986,7 +1994,7 @@ static int do_load_save_vmstate(BDRVSheepdogState *s, uint8_t *data,
        vdi_index = pos / SD_DATA_OBJ_SIZE;
        offset = pos % SD_DATA_OBJ_SIZE;

-        data_len = MIN(remaining, SD_DATA_OBJ_SIZE);
+        data_len = MIN(remaining, SD_DATA_OBJ_SIZE - offset);

        vmstate_oid = vid_to_vmstate_oid(s->inode.vdi_id, vdi_index);

@@ -2007,6 +2015,7 @@ static int do_load_save_vmstate(BDRVSheepdogState *s, uint8_t *data,
        }

        pos += data_len;
+        data += data_len;
        remaining -= data_len;
    }
    ret = size;
--- a/block/stream.c
+++ b/block/stream.c
@@ -13,6 +13,7 @@

 #include "trace.h"
 #include "block_int.h"
+#include "blockjob.h"
 #include "qemu/ratelimit.h"

 enum {
@@ -30,6 +31,7 @@ typedef struct StreamBlockJob {
    BlockJob common;
    RateLimit limit;
    BlockDriverState *base;
+    BlockdevOnError on_error;
    char backing_file_id[1024];
 } StreamBlockJob;

@@ -77,13 +79,14 @@ static void coroutine_fn stream_run(void *opaque)
    BlockDriverState *bs = s->common.bs;
    BlockDriverState *base = s->base;
    int64_t sector_num, end;
+    int error = 0;
    int ret = 0;
    int n = 0;
    void *buf;

    s->common.len = bdrv_getlength(bs);
    if (s->common.len < 0) {
-        block_job_complete(&s->common, s->common.len);
+        block_job_completed(&s->common, s->common.len);
        return;
    }

@@ -141,7 +144,19 @@ wait:
            ret = stream_populate(bs, sector_num, n, buf);
        }
        if (ret < 0) {
-            break;
+            BlockErrorAction action =
+                block_job_error_action(&s->common, s->common.bs, s->on_error,
+                                       true, -ret);
+            if (action == BDRV_ACTION_STOP) {
+                n = 0;
+                continue;
+            }
+            if (error == 0) {
+                error = ret;
+            }
+            if (action == BDRV_ACTION_REPORT) {
+                break;
+            }
        }
        ret = 0;

@@ -153,6 +168,9 @@ wait:
        bdrv_disable_copy_on_read(bs);
    }

+    /* Do not remove the backing file if an error was there but ignored.  */
+    ret = error;
+
    if (!block_job_is_cancelled(&s->common) && sector_num == end && ret == 0) {
        const char *base_id = NULL, *base_fmt = NULL;
        if (base) {
@@ -166,7 +184,7 @@ wait:
    }

    qemu_vfree(buf);
-    block_job_complete(&s->common, ret);
+    block_job_completed(&s->common, ret);
 }

 static void stream_set_speed(BlockJob *job, int64_t speed, Error **errp)
@@ -188,11 +206,19 @@ static BlockJobType stream_job_type = {

 void stream_start(BlockDriverState *bs, BlockDriverState *base,
                  const char *base_id, int64_t speed,
+                  BlockdevOnError on_error,
                  BlockDriverCompletionFunc *cb,
                  void *opaque, Error **errp)
 {
    StreamBlockJob *s;

+    if ((on_error == BLOCKDEV_ON_ERROR_STOP ||
+         on_error == BLOCKDEV_ON_ERROR_ENOSPC) &&
+        !bdrv_iostatus_is_enabled(bs)) {
+        error_set(errp, QERR_INVALID_PARAMETER, "on-error");
+        return;
+    }
+
    s = block_job_create(&stream_job_type, bs, speed, cb, opaque, errp);
    if (!s) {
        return;
@@ -203,6 +229,7 @@ void stream_start(BlockDriverState *bs, BlockDriverState *base,
        pstrcpy(s->backing_file_id, sizeof(s->backing_file_id), base_id);
    }

+    s->on_error = on_error;
    s->common.co = qemu_coroutine_create(stream_run);
    trace_stream_start(bs, base, s, s->common.co, opaque);
    qemu_coroutine_enter(s->common.co, s);
--- a/block/vdi.c
+++ b/block/vdi.c
@@ -60,9 +60,6 @@
 /* TODO: move uuid emulation to some central place in QEMU. */
 #include "sysemu.h"     /* UUID_FMT */
 typedef unsigned char uuid_t[16];
-void uuid_generate(uuid_t out);
-int uuid_is_null(const uuid_t uu);
-void uuid_unparse(const uuid_t uu, char *out);
 #endif

 /* Code configuration options. */
@@ -124,18 +121,18 @@ void uuid_unparse(const uuid_t uu, char *out);
 #define VDI_IS_ALLOCATED(X) ((X) < VDI_DISCARDED)

 #if !defined(CONFIG_UUID)
-void uuid_generate(uuid_t out)
+static inline void uuid_generate(uuid_t out)
 {
    memset(out, 0, sizeof(uuid_t));
 }

-int uuid_is_null(const uuid_t uu)
+static inline int uuid_is_null(const uuid_t uu)
 {
    uuid_t null_uuid = { 0 };
    return memcmp(uu, null_uuid, sizeof(uuid_t)) == 0;
 }

-void uuid_unparse(const uuid_t uu, char *out)
+static inline void uuid_unparse(const uuid_t uu, char *out)
 {
    snprintf(out, 37, UUID_FMT,
            uu[0], uu[1], uu[2], uu[3], uu[4], uu[5], uu[6], uu[7],
@@ -454,6 +451,12 @@ static int vdi_open(BlockDriverState *bs, int flags)
    return -1;
 }

+static int vdi_reopen_prepare(BDRVReopenState *state,
+                              BlockReopenQueue *queue, Error **errp)
+{
+    return 0;
+}
+
 static int coroutine_fn vdi_co_is_allocated(BlockDriverState *bs,
        int64_t sector_num, int nb_sectors, int *pnum)
 {
@@ -628,7 +631,6 @@ static int vdi_create(const char *filename, QEMUOptionParameter *options)
    VdiHeader header;
    size_t i;
    size_t bmap_size;
-    uint32_t *bmap;

    logout("\n");

@@ -693,21 +695,21 @@ static int vdi_create(const char *filename, QEMUOptionParameter *options)
        result = -errno;
    }

-    bmap = NULL;
    if (bmap_size > 0) {
-        bmap = (uint32_t *)g_malloc0(bmap_size);
-    }
-    for (i = 0; i < blocks; i++) {
-        if (image_type == VDI_TYPE_STATIC) {
-            bmap[i] = i;
-        } else {
-            bmap[i] = VDI_UNALLOCATED;
+        uint32_t *bmap = g_malloc0(bmap_size);
+        for (i = 0; i < blocks; i++) {
+            if (image_type == VDI_TYPE_STATIC) {
+                bmap[i] = i;
+            } else {
+                bmap[i] = VDI_UNALLOCATED;
+            }
        }
+        if (write(fd, bmap, bmap_size) < 0) {
+            result = -errno;
+        }
+        g_free(bmap);
    }
-    if (write(fd, bmap, bmap_size) < 0) {
-        result = -errno;
-    }
-    g_free(bmap);
+
    if (image_type == VDI_TYPE_STATIC) {
        if (ftruncate(fd, sizeof(header) + bmap_size + blocks * block_size)) {
            result = -errno;
@@ -762,6 +764,7 @@ static BlockDriver bdrv_vdi = {
    .bdrv_probe = vdi_probe,
    .bdrv_open = vdi_open,
    .bdrv_close = vdi_close,
+    .bdrv_reopen_prepare = vdi_reopen_prepare,
    .bdrv_create = vdi_create,
    .bdrv_co_is_allocated = vdi_co_is_allocated,
    .bdrv_make_empty = vdi_make_empty,
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -300,6 +300,40 @@ static int vmdk_is_cid_valid(BlockDriverState *bs)
    return 1;
 }

+/* Queue extents, if any, for reopen() */
+static int vmdk_reopen_prepare(BDRVReopenState *state,
+                               BlockReopenQueue *queue, Error **errp)
+{
+    BDRVVmdkState *s;
+    int ret = -1;
+    int i;
+    VmdkExtent *e;
+
+    assert(state != NULL);
+    assert(state->bs != NULL);
+
+    if (queue == NULL) {
+        error_set(errp, ERROR_CLASS_GENERIC_ERROR,
+                 "No reopen queue for VMDK extents");
+        goto exit;
+    }
+
+    s = state->bs->opaque;
+
+    assert(s != NULL);
+
+    for (i = 0; i < s->num_extents; i++) {
+        e = &s->extents[i];
+        if (e->file != state->bs->file) {
+            bdrv_reopen_queue(queue, e->file, state->flags);
+        }
+    }
+    ret = 0;
+
+exit:
+    return ret;
+}
+
 static int vmdk_parent_open(BlockDriverState *bs)
 {
    char *p_name;
@@ -1058,6 +1092,7 @@ static int vmdk_read(BlockDriverState *bs, int64_t sector_num,
    BDRVVmdkState *s = bs->opaque;
    int ret;
    uint64_t n, index_in_cluster;
+    uint64_t extent_begin_sector, extent_relative_sector_num;
    VmdkExtent *extent = NULL;
    uint64_t cluster_offset;

@@ -1069,7 +1104,9 @@ static int vmdk_read(BlockDriverState *bs, int64_t sector_num,
        ret = get_cluster_offset(
                            bs, extent, NULL,
                            sector_num << 9, 0, &cluster_offset);
-        index_in_cluster = sector_num % extent->cluster_sectors;
+        extent_begin_sector = extent->end_sector - extent->sectors;
+        extent_relative_sector_num = sector_num - extent_begin_sector;
+        index_in_cluster = extent_relative_sector_num % extent->cluster_sectors;
        n = extent->cluster_sectors - index_in_cluster;
        if (n > nb_sectors) {
            n = nb_sectors;
@@ -1120,6 +1157,7 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
    VmdkExtent *extent = NULL;
    int n, ret;
    int64_t index_in_cluster;
+    uint64_t extent_begin_sector, extent_relative_sector_num;
    uint64_t cluster_offset;
    VmdkMetaData m_data;

@@ -1162,7 +1200,9 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
        if (ret) {
            return -EINVAL;
        }
-        index_in_cluster = sector_num % extent->cluster_sectors;
+        extent_begin_sector = extent->end_sector - extent->sectors;
+        extent_relative_sector_num = sector_num - extent_begin_sector;
+        index_in_cluster = extent_relative_sector_num % extent->cluster_sectors;
        n = extent->cluster_sectors - index_in_cluster;
        if (n > nb_sectors) {
            n = nb_sectors;
@@ -1374,8 +1414,7 @@ static int relative_path(char *dest, int dest_size,
        return -1;
    }
    if (path_is_absolute(target)) {
-        dest[dest_size - 1] = '\0';
-        strncpy(dest, target, dest_size - 1);
+        pstrcpy(dest, dest_size, target);
        return 0;
    }
    while (base[i] == target[i]) {
@@ -1646,6 +1685,7 @@ static BlockDriver bdrv_vmdk = {
    .instance_size  = sizeof(BDRVVmdkState),
    .bdrv_probe     = vmdk_probe,
    .bdrv_open      = vmdk_open,
+    .bdrv_reopen_prepare = vmdk_reopen_prepare,
    .bdrv_read      = vmdk_co_read,
    .bdrv_write     = vmdk_co_write,
    .bdrv_close     = vmdk_close,
--- a/block/vpc.c
+++ b/block/vpc.c
@@ -265,6 +265,12 @@ static int vpc_open(BlockDriverState *bs, int flags)
    return err;
 }

+static int vpc_reopen_prepare(BDRVReopenState *state,
+                              BlockReopenQueue *queue, Error **errp)
+{
+    return 0;
+}
+
 /*
 * Returns the absolute byte offset of the given sector in the image file.
 * If the sector is not allocated, -1 is returned instead.
@@ -783,6 +789,7 @@ static BlockDriver bdrv_vpc = {
    .bdrv_probe     = vpc_probe,
    .bdrv_open      = vpc_open,
    .bdrv_close     = vpc_close,
+    .bdrv_reopen_prepare = vpc_reopen_prepare,
    .bdrv_create    = vpc_create,

    .bdrv_read              = vpc_co_read,
--- a/block/win32-aio.c
+++ b/block/win32-aio.c
@@ -0,0 +1,226 @@
+/*
+ * Block driver for RAW files (win32)
+ *
+ * Copyright (c) 2006 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "qemu-common.h"
+#include "qemu-timer.h"
+#include "block_int.h"
+#include "module.h"
+#include "qemu-common.h"
+#include "qemu-aio.h"
+#include "raw-aio.h"
+#include "event_notifier.h"
+#include <windows.h>
+#include <winioctl.h>
+
+#define FTYPE_FILE 0
+#define FTYPE_CD     1
+#define FTYPE_HARDDISK 2
+
+struct QEMUWin32AIOState {
+    HANDLE hIOCP;
+    EventNotifier e;
+    int count;
+};
+
+typedef struct QEMUWin32AIOCB {
+    BlockDriverAIOCB common;
+    struct QEMUWin32AIOState *ctx;
+    int nbytes;
+    OVERLAPPED ov;
+    QEMUIOVector *qiov;
+    void *buf;
+    bool is_read;
+    bool is_linear;
+} QEMUWin32AIOCB;
+
+/*
+ * Completes an AIO request (calls the callback and frees the ACB).
+ */
+static void win32_aio_process_completion(QEMUWin32AIOState *s,
+    QEMUWin32AIOCB *waiocb, DWORD count)
+{
+    int ret;
+    s->count--;
+
+    if (waiocb->ov.Internal != 0) {
+        ret = -EIO;
+    } else {
+        ret = 0;
+        if (count < waiocb->nbytes) {
+            /* Short reads mean EOF, pad with zeros. */
+            if (waiocb->is_read) {
+                qemu_iovec_memset(waiocb->qiov, count, 0,
+                    waiocb->qiov->size - count);
+            } else {
+                ret = -EINVAL;
+            }
+       }
+    }
+
+    if (!waiocb->is_linear) {
+        if (ret == 0 && waiocb->is_read) {
+            QEMUIOVector *qiov = waiocb->qiov;
+            char *p = waiocb->buf;
+            int i;
+
+            for (i = 0; i < qiov->niov; ++i) {
+                memcpy(qiov->iov[i].iov_base, p, qiov->iov[i].iov_len);
+                p += qiov->iov[i].iov_len;
+            }
+        }
+        qemu_vfree(waiocb->buf);
+    }
+
+
+    waiocb->common.cb(waiocb->common.opaque, ret);
+    qemu_aio_release(waiocb);
+}
+
+static void win32_aio_completion_cb(EventNotifier *e)
+{
+    QEMUWin32AIOState *s = container_of(e, QEMUWin32AIOState, e);
+    DWORD count;
+    ULONG_PTR key;
+    OVERLAPPED *ov;
+
+    event_notifier_test_and_clear(&s->e);
+    while (GetQueuedCompletionStatus(s->hIOCP, &count, &key, &ov, 0)) {
+        QEMUWin32AIOCB *waiocb = container_of(ov, QEMUWin32AIOCB, ov);
+
+        win32_aio_process_completion(s, waiocb, count);
+    }
+}
+
+static int win32_aio_flush_cb(EventNotifier *e)
+{
+    QEMUWin32AIOState *s = container_of(e, QEMUWin32AIOState, e);
+
+    return (s->count > 0) ? 1 : 0;
+}
+
+static void win32_aio_cancel(BlockDriverAIOCB *blockacb)
+{
+    QEMUWin32AIOCB *waiocb = (QEMUWin32AIOCB *)blockacb;
+
+    /*
+     * CancelIoEx is only supported in Vista and newer.  For now, just
+     * wait for completion.
+     */
+    while (!HasOverlappedIoCompleted(&waiocb->ov)) {
+        qemu_aio_wait();
+    }
+}
+
+static const AIOCBInfo win32_aiocb_info = {
+    .aiocb_size         = sizeof(QEMUWin32AIOCB),
+    .cancel             = win32_aio_cancel,
+};
+
+BlockDriverAIOCB *win32_aio_submit(BlockDriverState *bs,
+        QEMUWin32AIOState *aio, HANDLE hfile,
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+        BlockDriverCompletionFunc *cb, void *opaque, int type)
+{
+    struct QEMUWin32AIOCB *waiocb;
+    uint64_t offset = sector_num * 512;
+    DWORD rc;
+
+    waiocb = qemu_aio_get(&win32_aiocb_info, bs, cb, opaque);
+    waiocb->nbytes = nb_sectors * 512;
+    waiocb->qiov = qiov;
+    waiocb->is_read = (type == QEMU_AIO_READ);
+
+    if (qiov->niov > 1) {
+        waiocb->buf = qemu_blockalign(bs, qiov->size);
+        if (type & QEMU_AIO_WRITE) {
+            char *p = waiocb->buf;
+            int i;
+
+            for (i = 0; i < qiov->niov; ++i) {
+                memcpy(p, qiov->iov[i].iov_base, qiov->iov[i].iov_len);
+                p += qiov->iov[i].iov_len;
+            }
+        }
+        waiocb->is_linear = false;
+    } else {
+        waiocb->buf = qiov->iov[0].iov_base;
+        waiocb->is_linear = true;
+    }
+
+    memset(&waiocb->ov, 0, sizeof(waiocb->ov));
+    waiocb->ov.Offset = (DWORD)offset;
+    waiocb->ov.OffsetHigh = (DWORD)(offset >> 32);
+    waiocb->ov.hEvent = event_notifier_get_handle(&aio->e);
+
+    aio->count++;
+
+    if (type & QEMU_AIO_READ) {
+        rc = ReadFile(hfile, waiocb->buf, waiocb->nbytes, NULL, &waiocb->ov);
+    } else {
+        rc = WriteFile(hfile, waiocb->buf, waiocb->nbytes, NULL, &waiocb->ov);
+    }
+    if(rc == 0 && GetLastError() != ERROR_IO_PENDING) {
+        goto out_dec_count;
+    }
+    return &waiocb->common;
+
+out_dec_count:
+    aio->count--;
+    qemu_aio_release(waiocb);
+    return NULL;
+}
+
+int win32_aio_attach(QEMUWin32AIOState *aio, HANDLE hfile)
+{
+    if (CreateIoCompletionPort(hfile, aio->hIOCP, (ULONG_PTR) 0, 0) == NULL) {
+        return -EINVAL;
+    } else {
+        return 0;
+    }
+}
+
+QEMUWin32AIOState *win32_aio_init(void)
+{
+    QEMUWin32AIOState *s;
+
+    s = g_malloc0(sizeof(*s));
+    if (event_notifier_init(&s->e, false) < 0) {
+        goto out_free_state;
+    }
+
+    s->hIOCP = CreateIoCompletionPort(INVALID_HANDLE_VALUE, NULL, 0, 0);
+    if (s->hIOCP == NULL) {
+        goto out_close_efd;
+    }
+
+    qemu_aio_set_event_notifier(&s->e, win32_aio_completion_cb,
+                                win32_aio_flush_cb);
+
+    return s;
+
+out_close_efd:
+    event_notifier_cleanup(&s->e);
+out_free_state:
+    g_free(s);
+    return NULL;
+}
--- a/block_int.h
+++ b/block_int.h
@@ -31,6 +31,7 @@
 #include "qemu-timer.h"
 #include "qapi-types.h"
 #include "qerror.h"
+#include "monitor.h"

 #define BLOCK_FLAG_ENCRYPT          1
 #define BLOCK_FLAG_COMPAT6          4
@@ -67,78 +68,18 @@ typedef struct BlockIOBaseValue {
    uint64_t ios[2];
 } BlockIOBaseValue;

-typedef struct BlockJob BlockJob;
-
-/**
- * BlockJobType:
- *
- * A class type for block job objects.
- */
-typedef struct BlockJobType {
-    /** Derived BlockJob struct size */
-    size_t instance_size;
-
-    /** String describing the operation, part of query-block-jobs QMP API */
-    const char *job_type;
-
-    /** Optional callback for job types that support setting a speed limit */
-    void (*set_speed)(BlockJob *job, int64_t speed, Error **errp);
-} BlockJobType;
-
-/**
- * BlockJob:
- *
- * Long-running operation on a BlockDriverState.
- */
-struct BlockJob {
-    /** The job type, including the job vtable.  */
-    const BlockJobType *job_type;
-
-    /** The block device on which the job is operating.  */
-    BlockDriverState *bs;
-
-    /**
-     * The coroutine that executes the job.  If not NULL, it is
-     * reentered when busy is false and the job is cancelled.
-     */
-    Coroutine *co;
-
-    /**
-     * Set to true if the job should cancel itself.  The flag must
-     * always be tested just before toggling the busy flag from false
-     * to true.  After a job has been cancelled, it should only yield
-     * if #qemu_aio_wait will ("sooner or later") reenter the coroutine.
-     */
-    bool cancelled;
-
-    /**
-     * Set to false by the job while it is in a quiescent state, where
-     * no I/O is pending and the job has yielded on any condition
-     * that is not detected by #qemu_aio_wait, such as a timer.
-     */
-    bool busy;
-
-    /** Offset that is published by the query-block-jobs QMP API */
-    int64_t offset;
-
-    /** Length that is published by the query-block-jobs QMP API */
-    int64_t len;
-
-    /** Speed that was set with @block_job_set_speed.  */
-    int64_t speed;
-
-    /** The completion function that will be called when the job completes.  */
-    BlockDriverCompletionFunc *cb;
-
-    /** The opaque value that is passed to the completion function.  */
-    void *opaque;
-};
-
 struct BlockDriver {
    const char *format_name;
    int instance_size;
    int (*bdrv_probe)(const uint8_t *buf, int buf_size, const char *filename);
    int (*bdrv_probe_device)(const char *filename);
+
+    /* For handling image reopen for split or non-split files */
+    int (*bdrv_reopen_prepare)(BDRVReopenState *reopen_state,
+                               BlockReopenQueue *queue, Error **errp);
+    void (*bdrv_reopen_commit)(BDRVReopenState *reopen_state);
+    void (*bdrv_reopen_abort)(BDRVReopenState *reopen_state);
+
    int (*bdrv_open)(BlockDriverState *bs, int flags);
    int (*bdrv_file_open)(BlockDriverState *bs, const char *filename, int flags);
    int (*bdrv_read)(BlockDriverState *bs, int64_t sector_num,
@@ -268,7 +209,6 @@ struct BlockDriverState {
    int64_t total_sectors; /* if we are reading a disk image, give its
                              size in sectors */
    int read_only; /* if true, the media is read only */
-    int keep_read_only; /* if true, the media was requested to stay read only */
    int open_flags; /* flags used to open the file, re-used for re-open */
    int encrypted; /* if true, the media is encrypted */
    int valid_key; /* if true, a valid encryption key has been set */
@@ -293,6 +233,8 @@ struct BlockDriverState {
    BlockDriverState *backing_hd;
    BlockDriverState *file;

+    NotifierList close_notifiers;
+
    /* number of in-flight copy-on-read requests */
    unsigned int copy_on_read_in_flight;

@@ -323,7 +265,7 @@ struct BlockDriverState {

    /* NOTE: the following infos are only hints for real hardware
       drivers. They are not used by the block driver */
-    BlockErrorAction on_read_error, on_write_error;
+    BlockdevOnError on_read_error, on_write_error;
    bool iostatus_enabled;
    BlockDeviceIoStatus iostatus;
    char device_name[32];
@@ -336,6 +278,7 @@ struct BlockDriverState {

    /* long-running background operation */
    BlockJob *job;
+
 };

 int get_tmp_filename(char *filename, int size);
@@ -346,92 +289,9 @@ void bdrv_set_io_limits(BlockDriverState *bs,
 #ifdef _WIN32
 int is_windows_drive(const char *filename);
 #endif
-
-/**
- * block_job_create:
- * @job_type: The class object for the newly-created job.
- * @bs: The block
- * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
- * @cb: Completion function for the job.
- * @opaque: Opaque pointer value passed to @cb.
- * @errp: Error object.
- *
- * Create a new long-running block device job and return it.  The job
- * will call @cb asynchronously when the job completes.  Note that
- * @bs may have been closed at the time the @cb it is called.  If
- * this is the case, the job may be reported as either cancelled or
- * completed.
- *
- * This function is not part of the public job interface; it should be
- * called from a wrapper that is specific to the job type.
- */
-void *block_job_create(const BlockJobType *job_type, BlockDriverState *bs,
-                       int64_t speed, BlockDriverCompletionFunc *cb,
-                       void *opaque, Error **errp);
-
-/**
- * block_job_sleep_ns:
- * @job: The job that calls the function.
- * @clock: The clock to sleep on.
- * @ns: How many nanoseconds to stop for.
- *
- * Put the job to sleep (assuming that it wasn't canceled) for @ns
- * nanoseconds.  Canceling the job will interrupt the wait immediately.
- */
-void block_job_sleep_ns(BlockJob *job, QEMUClock *clock, int64_t ns);
-
-/**
- * block_job_complete:
- * @job: The job being completed.
- * @ret: The status code.
- *
- * Call the completion function that was registered at creation time, and
- * free @job.
- */
-void block_job_complete(BlockJob *job, int ret);
-
-/**
- * block_job_set_speed:
- * @job: The job to set the speed for.
- * @speed: The new value
- * @errp: Error object.
- *
- * Set a rate-limiting parameter for the job; the actual meaning may
- * vary depending on the job type.
- */
-void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp);
-
-/**
- * block_job_cancel:
- * @job: The job to be canceled.
- *
- * Asynchronously cancel the specified job.
- */
-void block_job_cancel(BlockJob *job);
-
-/**
- * block_job_is_cancelled:
- * @job: The job being queried.
- *
- * Returns whether the job is scheduled for cancellation.
- */
-bool block_job_is_cancelled(BlockJob *job);
-
-/**
- * block_job_cancel:
- * @job: The job to be canceled.
- *
- * Asynchronously cancel the job and wait for it to reach a quiescent
- * state.  Note that the completion callback will still be called
- * asynchronously, hence it is *not* valid to call #bdrv_delete
- * immediately after #block_job_cancel_sync.  Users of block jobs
- * will usually protect the BlockDriverState objects with a reference
- * count, should this be a concern.
- *
- * Returns the return value from the job if the job actually completed
- * during the call, or -ECANCELED if it was canceled.
- */
-int block_job_cancel_sync(BlockJob *job);
+void bdrv_emit_qmp_error_event(const BlockDriverState *bdrv,
+                               enum MonitorEvent ev,
+                               BlockErrorAction action, bool is_read);

 /**
 * stream_start:
@@ -441,6 +301,7 @@ int block_job_cancel_sync(BlockJob *job);
 * @base_id: The file name that will be written to @bs as the new
 * backing file if the job completes.  Ignored if @base is %NULL.
 * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
+ * @on_error: The action to take upon error.
 * @cb: Completion function for the job.
 * @opaque: Opaque pointer value passed to @cb.
 * @errp: Error object.
@@ -452,7 +313,47 @@ int block_job_cancel_sync(BlockJob *job);
 * @base_id in the written image and to @base in the live BlockDriverState.
 */
 void stream_start(BlockDriverState *bs, BlockDriverState *base,
-                  const char *base_id, int64_t speed,
+                  const char *base_id, int64_t speed, BlockdevOnError on_error,
+                  BlockDriverCompletionFunc *cb,
+                  void *opaque, Error **errp);
+
+/**
+ * commit_start:
+ * @bs: Top Block device
+ * @base: Block device that will be written into, and become the new top
+ * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
+ * @on_error: The action to take upon error.
+ * @cb: Completion function for the job.
+ * @opaque: Opaque pointer value passed to @cb.
+ * @errp: Error object.
+ *
+ */
+void commit_start(BlockDriverState *bs, BlockDriverState *base,
+                 BlockDriverState *top, int64_t speed,
+                 BlockdevOnError on_error, BlockDriverCompletionFunc *cb,
+                 void *opaque, Error **errp);
+
+/*
+ * mirror_start:
+ * @bs: Block device to operate on.
+ * @target: Block device to write to.
+ * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
+ * @mode: Whether to collapse all images in the chain to the target.
+ * @on_source_error: The action to take upon error reading from the source.
+ * @on_target_error: The action to take upon error writing to the target.
+ * @cb: Completion function for the job.
+ * @opaque: Opaque pointer value passed to @cb.
+ * @errp: Error object.
+ *
+ * Start a mirroring operation on @bs.  Clusters that are allocated
+ * in @bs will be written to @bs until the job is cancelled or
+ * manually completed.  At the end of a successful mirroring job,
+ * @bs will be switched to read from @target.
+ */
+void mirror_start(BlockDriverState *bs, BlockDriverState *target,
+                  int64_t speed, MirrorSyncMode mode,
+                  BlockdevOnError on_source_error,
+                  BlockdevOnError on_target_error,
                  BlockDriverCompletionFunc *cb,
                  void *opaque, Error **errp);

--- a/blockdev-nbd.c
+++ b/blockdev-nbd.c
@@ -0,0 +1,133 @@
+/*
+ * Serving QEMU block devices via NBD
+ *
+ * Copyright (c) 2012 Red Hat, Inc.
+ *
+ * Author: Paolo Bonzini <pbonzini@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later.  See the COPYING file in the top-level directory.
+ */
+
+#include "blockdev.h"
+#include "hw/block-common.h"
+#include "monitor.h"
+#include "qerror.h"
+#include "sysemu.h"
+#include "qmp-commands.h"
+#include "trace.h"
+#include "nbd.h"
+#include "qemu_socket.h"
+
+static int server_fd = -1;
+
+static void nbd_accept(void *opaque)
+{
+    struct sockaddr_in addr;
+    socklen_t addr_len = sizeof(addr);
+
+    int fd = accept(server_fd, (struct sockaddr *)&addr, &addr_len);
+    if (fd >= 0) {
+        nbd_client_new(NULL, fd, nbd_client_put);
+    }
+}
+
+void qmp_nbd_server_start(SocketAddress *addr, Error **errp)
+{
+    if (server_fd != -1) {
+        error_setg(errp, "NBD server already running");
+        return;
+    }
+
+    server_fd = socket_listen(addr, errp);
+    if (server_fd != -1) {
+        qemu_set_fd_handler2(server_fd, NULL, nbd_accept, NULL, NULL);
+    }
+}
+
+/* Hook into the BlockDriverState notifiers to close the export when
+ * the file is closed.
+ */
+typedef struct NBDCloseNotifier {
+    Notifier n;
+    NBDExport *exp;
+    QTAILQ_ENTRY(NBDCloseNotifier) next;
+} NBDCloseNotifier;
+
+static QTAILQ_HEAD(, NBDCloseNotifier) close_notifiers =
+    QTAILQ_HEAD_INITIALIZER(close_notifiers);
+
+static void nbd_close_notifier(Notifier *n, void *data)
+{
+    NBDCloseNotifier *cn = DO_UPCAST(NBDCloseNotifier, n, n);
+
+    notifier_remove(&cn->n);
+    QTAILQ_REMOVE(&close_notifiers, cn, next);
+
+    nbd_export_close(cn->exp);
+    nbd_export_put(cn->exp);
+    g_free(cn);
+}
+
+static void nbd_server_put_ref(NBDExport *exp)
+{
+    BlockDriverState *bs = nbd_export_get_blockdev(exp);
+    drive_put_ref(drive_get_by_blockdev(bs));
+}
+
+void qmp_nbd_server_add(const char *device, bool has_writable, bool writable,
+                        Error **errp)
+{
+    BlockDriverState *bs;
+    NBDExport *exp;
+    NBDCloseNotifier *n;
+
+    if (server_fd == -1) {
+        error_setg(errp, "NBD server not running");
+        return;
+    }
+
+    if (nbd_export_find(device)) {
+        error_setg(errp, "NBD server already exporting device '%s'", device);
+        return;
+    }
+
+    bs = bdrv_find(device);
+    if (!bs) {
+        error_set(errp, QERR_DEVICE_NOT_FOUND, device);
+        return;
+    }
+
+    if (!has_writable) {
+        writable = false;
+    }
+    if (bdrv_is_read_only(bs)) {
+        writable = false;
+    }
+
+    exp = nbd_export_new(bs, 0, -1, writable ? 0 : NBD_FLAG_READ_ONLY,
+                         nbd_server_put_ref);
+
+    nbd_export_set_name(exp, device);
+    drive_get_ref(drive_get_by_blockdev(bs));
+
+    n = g_malloc0(sizeof(NBDCloseNotifier));
+    n->n.notify = nbd_close_notifier;
+    n->exp = exp;
+    bdrv_add_close_notifier(bs, &n->n);
+    QTAILQ_INSERT_TAIL(&close_notifiers, n, next);
+}
+
+void qmp_nbd_server_stop(Error **errp)
+{
+    while (!QTAILQ_EMPTY(&close_notifiers)) {
+        NBDCloseNotifier *cn = QTAILQ_FIRST(&close_notifiers);
+        nbd_close_notifier(&cn->n, nbd_export_get_blockdev(cn->exp));
+    }
+
+    if (server_fd != -1) {
+        qemu_set_fd_handler2(server_fd, NULL, NULL, NULL, NULL);
+        close(server_fd);
+        server_fd = -1;
+    }
+}
--- a/blockdev.c
+++ b/blockdev.c
@@ -9,6 +9,7 @@

 #include "blockdev.h"
 #include "hw/block-common.h"
+#include "blockjob.h"
 #include "monitor.h"
 #include "qerror.h"
 #include "qemu-option.h"
@@ -237,16 +238,16 @@ static void drive_put_ref_bh_schedule(DriveInfo *dinfo)
    qemu_bh_schedule(s->bh);
 }

-static int parse_block_error_action(const char *buf, int is_read)
+static int parse_block_error_action(const char *buf, bool is_read)
 {
    if (!strcmp(buf, "ignore")) {
-        return BLOCK_ERR_IGNORE;
+        return BLOCKDEV_ON_ERROR_IGNORE;
    } else if (!is_read && !strcmp(buf, "enospc")) {
-        return BLOCK_ERR_STOP_ENOSPC;
+        return BLOCKDEV_ON_ERROR_ENOSPC;
    } else if (!strcmp(buf, "stop")) {
-        return BLOCK_ERR_STOP_ANY;
+        return BLOCKDEV_ON_ERROR_STOP;
    } else if (!strcmp(buf, "report")) {
-        return BLOCK_ERR_REPORT;
+        return BLOCKDEV_ON_ERROR_REPORT;
    } else {
        error_report("'%s' invalid %s error action",
                     buf, is_read ? "read" : "write");
@@ -432,7 +433,13 @@ DriveInfo *drive_init(QemuOpts *opts, int default_to_scsi)
        return NULL;
    }

-    on_write_error = BLOCK_ERR_STOP_ENOSPC;
+    if (qemu_opt_get(opts, "boot") != NULL) {
+        fprintf(stderr, "qemu-kvm: boot=on|off is deprecated and will be "
+                "ignored. Future versions will reject this parameter. Please "
+                "update your scripts.\n");
+    }
+
+    on_write_error = BLOCKDEV_ON_ERROR_ENOSPC;
    if ((buf = qemu_opt_get(opts, "werror")) != NULL) {
        if (type != IF_IDE && type != IF_SCSI && type != IF_VIRTIO && type != IF_NONE) {
            error_report("werror is not supported by this bus type");
@@ -445,7 +452,7 @@ DriveInfo *drive_init(QemuOpts *opts, int default_to_scsi)
        }
    }

-    on_read_error = BLOCK_ERR_REPORT;
+    on_read_error = BLOCKDEV_ON_ERROR_REPORT;
    if ((buf = qemu_opt_get(opts, "rerror")) != NULL) {
        if (type != IF_IDE && type != IF_VIRTIO && type != IF_SCSI && type != IF_NONE) {
            error_report("rerror is not supported by this bus type");
@@ -527,6 +534,8 @@ DriveInfo *drive_init(QemuOpts *opts, int default_to_scsi)
                     if_name[type], mediastr, unit_id);
    }
    dinfo->bdrv = bdrv_new(dinfo->id);
+    dinfo->bdrv->open_flags = snapshot ? BDRV_O_SNAPSHOT : 0;
+    dinfo->bdrv->read_only = ro;
    dinfo->devaddr = devaddr;
    dinfo->type = type;
    dinfo->bus = bus_id;
@@ -803,6 +812,11 @@ void qmp_transaction(BlockdevActionList *dev_list, Error **errp)
    QSIMPLEQ_FOREACH(states, &snap_bdrv_states, entry) {
        /* This removes our old bs from the bdrv_states, and adds the new bs */
        bdrv_append(states->new_bs, states->old_bs);
+        /* We don't need (or want) to use the transactional
+         * bdrv_reopen_multiple() across all the entries at once, because we
+         * don't want to abort all of them if one of them fails the reopen */
+        bdrv_reopen(states->new_bs, states->new_bs->open_flags & ~BDRV_O_RDWR,
+                    NULL);
    }

    /* success */
@@ -822,7 +836,6 @@ exit:
    QSIMPLEQ_FOREACH_SAFE(states, &snap_bdrv_states, entry, next) {
        g_free(states);
    }
-    return;
 }


@@ -1049,26 +1062,12 @@ void qmp_block_resize(const char *device, int64_t size, Error **errp)
    }
 }

-static QObject *qobject_from_block_job(BlockJob *job)
-{
-    return qobject_from_jsonf("{ 'type': %s,"
-                              "'device': %s,"
-                              "'len': %" PRId64 ","
-                              "'offset': %" PRId64 ","
-                              "'speed': %" PRId64 " }",
-                              job->job_type->job_type,
-                              bdrv_get_device_name(job->bs),
-                              job->len,
-                              job->offset,
-                              job->speed);
-}
-
-static void block_stream_cb(void *opaque, int ret)
+static void block_job_cb(void *opaque, int ret)
 {
    BlockDriverState *bs = opaque;
    QObject *obj;

-    trace_block_stream_cb(bs, bs->job, ret);
+    trace_block_job_cb(bs, bs->job, ret);

    assert(bs->job);
    obj = qobject_from_block_job(bs->job);
@@ -1088,13 +1087,18 @@ static void block_stream_cb(void *opaque, int ret)
 }

 void qmp_block_stream(const char *device, bool has_base,
-                      const char *base, bool has_speed,
-                      int64_t speed, Error **errp)
+                      const char *base, bool has_speed, int64_t speed,
+                      bool has_on_error, BlockdevOnError on_error,
+                      Error **errp)
 {
    BlockDriverState *bs;
    BlockDriverState *base_bs = NULL;
    Error *local_err = NULL;

+    if (!has_on_error) {
+        on_error = BLOCKDEV_ON_ERROR_REPORT;
+    }
+
    bs = bdrv_find(device);
    if (!bs) {
        error_set(errp, QERR_DEVICE_NOT_FOUND, device);
@@ -1110,7 +1114,7 @@ void qmp_block_stream(const char *device, bool has_base,
    }

    stream_start(bs, base_bs, base, has_speed ? speed : 0,
-                 block_stream_cb, bs, &local_err);
+                 on_error, block_job_cb, bs, &local_err);
    if (error_is_set(&local_err)) {
        error_propagate(errp, local_err);
        return;
@@ -1124,6 +1128,199 @@ void qmp_block_stream(const char *device, bool has_base,
    trace_qmp_block_stream(bs, bs->job);
 }

+void qmp_block_commit(const char *device,
+                      bool has_base, const char *base, const char *top,
+                      bool has_speed, int64_t speed,
+                      Error **errp)
+{
+    BlockDriverState *bs;
+    BlockDriverState *base_bs, *top_bs;
+    Error *local_err = NULL;
+    /* This will be part of the QMP command, if/when the
+     * BlockdevOnError change for blkmirror makes it in
+     */
+    BlockdevOnError on_error = BLOCKDEV_ON_ERROR_REPORT;
+
+    /* drain all i/o before commits */
+    bdrv_drain_all();
+
+    bs = bdrv_find(device);
+    if (!bs) {
+        error_set(errp, QERR_DEVICE_NOT_FOUND, device);
+        return;
+    }
+
+    /* default top_bs is the active layer */
+    top_bs = bs;
+
+    if (top) {
+        if (strcmp(bs->filename, top) != 0) {
+            top_bs = bdrv_find_backing_image(bs, top);
+        }
+    }
+
+    if (top_bs == NULL) {
+        error_setg(errp, "Top image file %s not found", top ? top : "NULL");
+        return;
+    }
+
+    if (has_base && base) {
+        base_bs = bdrv_find_backing_image(top_bs, base);
+    } else {
+        base_bs = bdrv_find_base(top_bs);
+    }
+
+    if (base_bs == NULL) {
+        error_set(errp, QERR_BASE_NOT_FOUND, base ? base : "NULL");
+        return;
+    }
+
+    commit_start(bs, base_bs, top_bs, speed, on_error, block_job_cb, bs,
+                &local_err);
+    if (local_err != NULL) {
+        error_propagate(errp, local_err);
+        return;
+    }
+    /* Grab a reference so hotplug does not delete the BlockDriverState from
+     * underneath us.
+     */
+    drive_get_ref(drive_get_by_blockdev(bs));
+}
+
+void qmp_drive_mirror(const char *device, const char *target,
+                      bool has_format, const char *format,
+                      enum MirrorSyncMode sync,
+                      bool has_mode, enum NewImageMode mode,
+                      bool has_speed, int64_t speed,
+                      bool has_on_source_error, BlockdevOnError on_source_error,
+                      bool has_on_target_error, BlockdevOnError on_target_error,
+                      Error **errp)
+{
+    BlockDriverInfo bdi;
+    BlockDriverState *bs;
+    BlockDriverState *source, *target_bs;
+    BlockDriver *proto_drv;
+    BlockDriver *drv = NULL;
+    Error *local_err = NULL;
+    int flags;
+    uint64_t size;
+    int ret;
+
+    if (!has_speed) {
+        speed = 0;
+    }
+    if (!has_on_source_error) {
+        on_source_error = BLOCKDEV_ON_ERROR_REPORT;
+    }
+    if (!has_on_target_error) {
+        on_target_error = BLOCKDEV_ON_ERROR_REPORT;
+    }
+    if (!has_mode) {
+        mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS;
+    }
+
+    bs = bdrv_find(device);
+    if (!bs) {
+        error_set(errp, QERR_DEVICE_NOT_FOUND, device);
+        return;
+    }
+
+    if (!bdrv_is_inserted(bs)) {
+        error_set(errp, QERR_DEVICE_HAS_NO_MEDIUM, device);
+        return;
+    }
+
+    if (!has_format) {
+        format = mode == NEW_IMAGE_MODE_EXISTING ? NULL : bs->drv->format_name;
+    }
+    if (format) {
+        drv = bdrv_find_format(format);
+        if (!drv) {
+            error_set(errp, QERR_INVALID_BLOCK_FORMAT, format);
+            return;
+        }
+    }
+
+    if (bdrv_in_use(bs)) {
+        error_set(errp, QERR_DEVICE_IN_USE, device);
+        return;
+    }
+
+    flags = bs->open_flags | BDRV_O_RDWR;
+    source = bs->backing_hd;
+    if (!source && sync == MIRROR_SYNC_MODE_TOP) {
+        sync = MIRROR_SYNC_MODE_FULL;
+    }
+
+    proto_drv = bdrv_find_protocol(target);
+    if (!proto_drv) {
+        error_set(errp, QERR_INVALID_BLOCK_FORMAT, format);
+        return;
+    }
+
+    if (sync == MIRROR_SYNC_MODE_FULL && mode != NEW_IMAGE_MODE_EXISTING) {
+        /* create new image w/o backing file */
+        assert(format && drv);
+        bdrv_get_geometry(bs, &size);
+        size *= 512;
+        ret = bdrv_img_create(target, format,
+                              NULL, NULL, NULL, size, flags);
+    } else {
+        switch (mode) {
+        case NEW_IMAGE_MODE_EXISTING:
+            ret = 0;
+            break;
+        case NEW_IMAGE_MODE_ABSOLUTE_PATHS:
+            /* create new image with backing file */
+            ret = bdrv_img_create(target, format,
+                                  source->filename,
+                                  source->drv->format_name,
+                                  NULL, -1, flags);
+            break;
+        default:
+            abort();
+        }
+    }
+
+    if (ret) {
+        error_set(errp, QERR_OPEN_FILE_FAILED, target);
+        return;
+    }
+
+    target_bs = bdrv_new("");
+    ret = bdrv_open(target_bs, target, flags | BDRV_O_NO_BACKING, drv);
+
+    if (ret < 0) {
+        bdrv_delete(target_bs);
+        error_set(errp, QERR_OPEN_FILE_FAILED, target);
+        return;
+    }
+
+    /* We need a backing file if we will copy parts of a cluster.  */
+    if (bdrv_get_info(target_bs, &bdi) >= 0 && bdi.cluster_size != 0 &&
+        bdi.cluster_size >= BDRV_SECTORS_PER_DIRTY_CHUNK * 512) {
+        ret = bdrv_open_backing_file(target_bs);
+        if (ret < 0) {
+            bdrv_delete(target_bs);
+            error_set(errp, QERR_OPEN_FILE_FAILED, target);
+            return;
+        }
+    }
+
+    mirror_start(bs, target_bs, speed, sync, on_source_error, on_target_error,
+                 block_job_cb, bs, &local_err);
+    if (local_err != NULL) {
+        bdrv_delete(target_bs);
+        error_propagate(errp, local_err);
+        return;
+    }
+
+    /* Grab a reference so hotplug does not delete the BlockDriverState from
+     * underneath us.
+     */
+    drive_get_ref(drive_get_by_blockdev(bs));
+}
+
 static BlockJob *find_block_job(const char *device)
 {
    BlockDriverState *bs;
@@ -1140,19 +1337,28 @@ void qmp_block_job_set_speed(const char *device, int64_t speed, Error **errp)
    BlockJob *job = find_block_job(device);

    if (!job) {
-        error_set(errp, QERR_DEVICE_NOT_ACTIVE, device);
+        error_set(errp, QERR_BLOCK_JOB_NOT_ACTIVE, device);
        return;
    }

    block_job_set_speed(job, speed, errp);
 }

-void qmp_block_job_cancel(const char *device, Error **errp)
+void qmp_block_job_cancel(const char *device,
+                          bool has_force, bool force, Error **errp)
 {
    BlockJob *job = find_block_job(device);

+    if (!has_force) {
+        force = false;
+    }
+
    if (!job) {
-        error_set(errp, QERR_DEVICE_NOT_ACTIVE, device);
+        error_set(errp, QERR_BLOCK_JOB_NOT_ACTIVE, device);
+        return;
+    }
+    if (job->paused && !force) {
+        error_set(errp, QERR_BLOCK_JOB_PAUSED, device);
        return;
    }

@@ -1160,25 +1366,53 @@ void qmp_block_job_cancel(const char *device, Error **errp)
    block_job_cancel(job);
 }

+void qmp_block_job_pause(const char *device, Error **errp)
+{
+    BlockJob *job = find_block_job(device);
+
+    if (!job) {
+        error_set(errp, QERR_BLOCK_JOB_NOT_ACTIVE, device);
+        return;
+    }
+
+    trace_qmp_block_job_pause(job);
+    block_job_pause(job);
+}
+
+void qmp_block_job_resume(const char *device, Error **errp)
+{
+    BlockJob *job = find_block_job(device);
+
+    if (!job) {
+        error_set(errp, QERR_BLOCK_JOB_NOT_ACTIVE, device);
+        return;
+    }
+
+    trace_qmp_block_job_resume(job);
+    block_job_resume(job);
+}
+
+void qmp_block_job_complete(const char *device, Error **errp)
+{
+    BlockJob *job = find_block_job(device);
+
+    if (!job) {
+        error_set(errp, QERR_BLOCK_JOB_NOT_ACTIVE, device);
+        return;
+    }
+
+    trace_qmp_block_job_complete(job);
+    block_job_complete(job, errp);
+}
+
 static void do_qmp_query_block_jobs_one(void *opaque, BlockDriverState *bs)
 {
    BlockJobInfoList **prev = opaque;
    BlockJob *job = bs->job;

    if (job) {
-        BlockJobInfoList *elem;
-        BlockJobInfo *info = g_new(BlockJobInfo, 1);
-        *info = (BlockJobInfo){
-            .type   = g_strdup(job->job_type->job_type),
-            .device = g_strdup(bdrv_get_device_name(bs)),
-            .len    = job->len,
-            .offset = job->offset,
-            .speed  = job->speed,
-        };
-
-        elem = g_new0(BlockJobInfoList, 1);
-        elem->value = info;
-
+        BlockJobInfoList *elem = g_new0(BlockJobInfoList, 1);
+        elem->value = block_job_query(bs->job);
        (*prev)->next = elem;
        *prev = elem;
    }
--- a/blockjob.c
+++ b/blockjob.c
@@ -0,0 +1,283 @@
+/*
+ * QEMU System Emulator block driver
+ *
+ * Copyright (c) 2011 IBM Corp.
+ * Copyright (c) 2012 Red Hat, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "config-host.h"
+#include "qemu-common.h"
+#include "trace.h"
+#include "monitor.h"
+#include "block.h"
+#include "blockjob.h"
+#include "block_int.h"
+#include "qjson.h"
+#include "qemu-coroutine.h"
+#include "qmp-commands.h"
+#include "qemu-timer.h"
+
+void *block_job_create(const BlockJobType *job_type, BlockDriverState *bs,
+                       int64_t speed, BlockDriverCompletionFunc *cb,
+                       void *opaque, Error **errp)
+{
+    BlockJob *job;
+
+    if (bs->job || bdrv_in_use(bs)) {
+        error_set(errp, QERR_DEVICE_IN_USE, bdrv_get_device_name(bs));
+        return NULL;
+    }
+    bdrv_set_in_use(bs, 1);
+
+    job = g_malloc0(job_type->instance_size);
+    job->job_type      = job_type;
+    job->bs            = bs;
+    job->cb            = cb;
+    job->opaque        = opaque;
+    job->busy          = true;
+    bs->job = job;
+
+    /* Only set speed when necessary to avoid NotSupported error */
+    if (speed != 0) {
+        Error *local_err = NULL;
+
+        block_job_set_speed(job, speed, &local_err);
+        if (error_is_set(&local_err)) {
+            bs->job = NULL;
+            g_free(job);
+            bdrv_set_in_use(bs, 0);
+            error_propagate(errp, local_err);
+            return NULL;
+        }
+    }
+    return job;
+}
+
+void block_job_completed(BlockJob *job, int ret)
+{
+    BlockDriverState *bs = job->bs;
+
+    assert(bs->job == job);
+    job->cb(job->opaque, ret);
+    bs->job = NULL;
+    g_free(job);
+    bdrv_set_in_use(bs, 0);
+}
+
+void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp)
+{
+    Error *local_err = NULL;
+
+    if (!job->job_type->set_speed) {
+        error_set(errp, QERR_NOT_SUPPORTED);
+        return;
+    }
+    job->job_type->set_speed(job, speed, &local_err);
+    if (error_is_set(&local_err)) {
+        error_propagate(errp, local_err);
+        return;
+    }
+
+    job->speed = speed;
+}
+
+void block_job_complete(BlockJob *job, Error **errp)
+{
+    if (job->paused || job->cancelled || !job->job_type->complete) {
+        error_set(errp, QERR_BLOCK_JOB_NOT_READY, job->bs->device_name);
+        return;
+    }
+
+    job->job_type->complete(job, errp);
+}
+
+void block_job_pause(BlockJob *job)
+{
+    job->paused = true;
+}
+
+bool block_job_is_paused(BlockJob *job)
+{
+    return job->paused;
+}
+
+void block_job_resume(BlockJob *job)
+{
+    job->paused = false;
+    block_job_iostatus_reset(job);
+    if (job->co && !job->busy) {
+        qemu_coroutine_enter(job->co, NULL);
+    }
+}
+
+void block_job_cancel(BlockJob *job)
+{
+    job->cancelled = true;
+    block_job_resume(job);
+}
+
+bool block_job_is_cancelled(BlockJob *job)
+{
+    return job->cancelled;
+}
+
+void block_job_iostatus_reset(BlockJob *job)
+{
+    job->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
+    if (job->job_type->iostatus_reset) {
+        job->job_type->iostatus_reset(job);
+    }
+}
+
+struct BlockCancelData {
+    BlockJob *job;
+    BlockDriverCompletionFunc *cb;
+    void *opaque;
+    bool cancelled;
+    int ret;
+};
+
+static void block_job_cancel_cb(void *opaque, int ret)
+{
+    struct BlockCancelData *data = opaque;
+
+    data->cancelled = block_job_is_cancelled(data->job);
+    data->ret = ret;
+    data->cb(data->opaque, ret);
+}
+
+int block_job_cancel_sync(BlockJob *job)
+{
+    struct BlockCancelData data;
+    BlockDriverState *bs = job->bs;
+
+    assert(bs->job == job);
+
+    /* Set up our own callback to store the result and chain to
+     * the original callback.
+     */
+    data.job = job;
+    data.cb = job->cb;
+    data.opaque = job->opaque;
+    data.ret = -EINPROGRESS;
+    job->cb = block_job_cancel_cb;
+    job->opaque = &data;
+    block_job_cancel(job);
+    while (data.ret == -EINPROGRESS) {
+        qemu_aio_wait();
+    }
+    return (data.cancelled && data.ret == 0) ? -ECANCELED : data.ret;
+}
+
+void block_job_sleep_ns(BlockJob *job, QEMUClock *clock, int64_t ns)
+{
+    assert(job->busy);
+
+    /* Check cancellation *before* setting busy = false, too!  */
+    if (block_job_is_cancelled(job)) {
+        return;
+    }
+
+    job->busy = false;
+    if (block_job_is_paused(job)) {
+        qemu_coroutine_yield();
+    } else {
+        co_sleep_ns(clock, ns);
+    }
+    job->busy = true;
+}
+
+BlockJobInfo *block_job_query(BlockJob *job)
+{
+    BlockJobInfo *info = g_new0(BlockJobInfo, 1);
+    info->type      = g_strdup(job->job_type->job_type);
+    info->device    = g_strdup(bdrv_get_device_name(job->bs));
+    info->len       = job->len;
+    info->busy      = job->busy;
+    info->paused    = job->paused;
+    info->offset    = job->offset;
+    info->speed     = job->speed;
+    info->io_status = job->iostatus;
+    return info;
+}
+
+static void block_job_iostatus_set_err(BlockJob *job, int error)
+{
+    if (job->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
+        job->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
+                                          BLOCK_DEVICE_IO_STATUS_FAILED;
+    }
+}
+
+
+QObject *qobject_from_block_job(BlockJob *job)
+{
+    return qobject_from_jsonf("{ 'type': %s,"
+                              "'device': %s,"
+                              "'len': %" PRId64 ","
+                              "'offset': %" PRId64 ","
+                              "'speed': %" PRId64 " }",
+                              job->job_type->job_type,
+                              bdrv_get_device_name(job->bs),
+                              job->len,
+                              job->offset,
+                              job->speed);
+}
+
+void block_job_ready(BlockJob *job)
+{
+    QObject *data = qobject_from_block_job(job);
+    monitor_protocol_event(QEVENT_BLOCK_JOB_READY, data);
+    qobject_decref(data);
+}
+
+BlockErrorAction block_job_error_action(BlockJob *job, BlockDriverState *bs,
+                                        BlockdevOnError on_err,
+                                        int is_read, int error)
+{
+    BlockErrorAction action;
+
+    switch (on_err) {
+    case BLOCKDEV_ON_ERROR_ENOSPC:
+        action = (error == ENOSPC) ? BDRV_ACTION_STOP : BDRV_ACTION_REPORT;
+        break;
+    case BLOCKDEV_ON_ERROR_STOP:
+        action = BDRV_ACTION_STOP;
+        break;
+    case BLOCKDEV_ON_ERROR_REPORT:
+        action = BDRV_ACTION_REPORT;
+        break;
+    case BLOCKDEV_ON_ERROR_IGNORE:
+        action = BDRV_ACTION_IGNORE;
+        break;
+    default:
+        abort();
+    }
+    bdrv_emit_qmp_error_event(job->bs, QEVENT_BLOCK_JOB_ERROR, action, is_read);
+    if (action == BDRV_ACTION_STOP) {
+        block_job_pause(job);
+        block_job_iostatus_set_err(job, error);
+        if (bs != job->bs) {
+            bdrv_iostatus_set_err(bs, error);
+        }
+    }
+    return action;
+}
--- a/blockjob.h
+++ b/blockjob.h
@@ -0,0 +1,278 @@
+/*
+ * Declarations for long-running block device operations
+ *
+ * Copyright (c) 2011 IBM Corp.
+ * Copyright (c) 2012 Red Hat, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef BLOCKJOB_H
+#define BLOCKJOB_H 1
+
+#include "block.h"
+
+/**
+ * BlockJobType:
+ *
+ * A class type for block job objects.
+ */
+typedef struct BlockJobType {
+    /** Derived BlockJob struct size */
+    size_t instance_size;
+
+    /** String describing the operation, part of query-block-jobs QMP API */
+    const char *job_type;
+
+    /** Optional callback for job types that support setting a speed limit */
+    void (*set_speed)(BlockJob *job, int64_t speed, Error **errp);
+
+    /** Optional callback for job types that need to forward I/O status reset */
+    void (*iostatus_reset)(BlockJob *job);
+
+    /**
+     * Optional callback for job types whose completion must be triggered
+     * manually.
+     */
+    void (*complete)(BlockJob *job, Error **errp);
+} BlockJobType;
+
+/**
+ * BlockJob:
+ *
+ * Long-running operation on a BlockDriverState.
+ */
+struct BlockJob {
+    /** The job type, including the job vtable.  */
+    const BlockJobType *job_type;
+
+    /** The block device on which the job is operating.  */
+    BlockDriverState *bs;
+
+    /**
+     * The coroutine that executes the job.  If not NULL, it is
+     * reentered when busy is false and the job is cancelled.
+     */
+    Coroutine *co;
+
+    /**
+     * Set to true if the job should cancel itself.  The flag must
+     * always be tested just before toggling the busy flag from false
+     * to true.  After a job has been cancelled, it should only yield
+     * if #qemu_aio_wait will ("sooner or later") reenter the coroutine.
+     */
+    bool cancelled;
+
+    /**
+     * Set to true if the job is either paused, or will pause itself
+     * as soon as possible (if busy == true).
+     */
+    bool paused;
+
+    /**
+     * Set to false by the job while it is in a quiescent state, where
+     * no I/O is pending and the job has yielded on any condition
+     * that is not detected by #qemu_aio_wait, such as a timer.
+     */
+    bool busy;
+
+    /** Status that is published by the query-block-jobs QMP API */
+    BlockDeviceIoStatus iostatus;
+
+    /** Offset that is published by the query-block-jobs QMP API */
+    int64_t offset;
+
+    /** Length that is published by the query-block-jobs QMP API */
+    int64_t len;
+
+    /** Speed that was set with @block_job_set_speed.  */
+    int64_t speed;
+
+    /** The completion function that will be called when the job completes.  */
+    BlockDriverCompletionFunc *cb;
+
+    /** The opaque value that is passed to the completion function.  */
+    void *opaque;
+};
+
+/**
+ * block_job_create:
+ * @job_type: The class object for the newly-created job.
+ * @bs: The block
+ * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
+ * @cb: Completion function for the job.
+ * @opaque: Opaque pointer value passed to @cb.
+ * @errp: Error object.
+ *
+ * Create a new long-running block device job and return it.  The job
+ * will call @cb asynchronously when the job completes.  Note that
+ * @bs may have been closed at the time the @cb it is called.  If
+ * this is the case, the job may be reported as either cancelled or
+ * completed.
+ *
+ * This function is not part of the public job interface; it should be
+ * called from a wrapper that is specific to the job type.
+ */
+void *block_job_create(const BlockJobType *job_type, BlockDriverState *bs,
+                       int64_t speed, BlockDriverCompletionFunc *cb,
+                       void *opaque, Error **errp);
+
+/**
+ * block_job_sleep_ns:
+ * @job: The job that calls the function.
+ * @clock: The clock to sleep on.
+ * @ns: How many nanoseconds to stop for.
+ *
+ * Put the job to sleep (assuming that it wasn't canceled) for @ns
+ * nanoseconds.  Canceling the job will interrupt the wait immediately.
+ */
+void block_job_sleep_ns(BlockJob *job, QEMUClock *clock, int64_t ns);
+
+/**
+ * block_job_completed:
+ * @job: The job being completed.
+ * @ret: The status code.
+ *
+ * Call the completion function that was registered at creation time, and
+ * free @job.
+ */
+void block_job_completed(BlockJob *job, int ret);
+
+/**
+ * block_job_set_speed:
+ * @job: The job to set the speed for.
+ * @speed: The new value
+ * @errp: Error object.
+ *
+ * Set a rate-limiting parameter for the job; the actual meaning may
+ * vary depending on the job type.
+ */
+void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp);
+
+/**
+ * block_job_cancel:
+ * @job: The job to be canceled.
+ *
+ * Asynchronously cancel the specified job.
+ */
+void block_job_cancel(BlockJob *job);
+
+/**
+ * block_job_complete:
+ * @job: The job to be completed.
+ * @errp: Error object.
+ *
+ * Asynchronously complete the specified job.
+ */
+void block_job_complete(BlockJob *job, Error **errp);
+
+/**
+ * block_job_is_cancelled:
+ * @job: The job being queried.
+ *
+ * Returns whether the job is scheduled for cancellation.
+ */
+bool block_job_is_cancelled(BlockJob *job);
+
+/**
+ * block_job_query:
+ * @job: The job to get information about.
+ *
+ * Return information about a job.
+ */
+BlockJobInfo *block_job_query(BlockJob *job);
+
+/**
+ * block_job_pause:
+ * @job: The job to be paused.
+ *
+ * Asynchronously pause the specified job.
+ */
+void block_job_pause(BlockJob *job);
+
+/**
+ * block_job_resume:
+ * @job: The job to be resumed.
+ *
+ * Resume the specified job.
+ */
+void block_job_resume(BlockJob *job);
+
+/**
+ * qobject_from_block_job:
+ * @job: The job whose information is requested.
+ *
+ * Return a QDict corresponding to @job's query-block-jobs entry.
+ */
+QObject *qobject_from_block_job(BlockJob *job);
+
+/**
+ * block_job_ready:
+ * @job: The job which is now ready to complete.
+ *
+ * Send a BLOCK_JOB_READY event for the specified job.
+ */
+void block_job_ready(BlockJob *job);
+
+/**
+ * block_job_is_paused:
+ * @job: The job being queried.
+ *
+ * Returns whether the job is currently paused, or will pause
+ * as soon as it reaches a sleeping point.
+ */
+bool block_job_is_paused(BlockJob *job);
+
+/**
+ * block_job_cancel_sync:
+ * @job: The job to be canceled.
+ *
+ * Synchronously cancel the job.  The completion callback is called
+ * before the function returns.  The job may actually complete
+ * instead of canceling itself; the circumstances under which this
+ * happens depend on the kind of job that is active.
+ *
+ * Returns the return value from the job if the job actually completed
+ * during the call, or -ECANCELED if it was canceled.
+ */
+int block_job_cancel_sync(BlockJob *job);
+
+/**
+ * block_job_iostatus_reset:
+ * @job: The job whose I/O status should be reset.
+ *
+ * Reset I/O status on @job and on BlockDriverState objects it uses,
+ * other than job->bs.
+ */
+void block_job_iostatus_reset(BlockJob *job);
+
+/**
+ * block_job_error_action:
+ * @job: The job to signal an error for.
+ * @bs: The block device on which to set an I/O error.
+ * @on_err: The error action setting.
+ * @is_read: Whether the operation was a read.
+ * @error: The error that was reported.
+ *
+ * Report an I/O error for a block job and possibly stop the VM.  Return the
+ * action that was selected based on @on_err and @error.
+ */
+BlockErrorAction block_job_error_action(BlockJob *job, BlockDriverState *bs,
+                                        BlockdevOnError on_err,
+                                        int is_read, int error);
+#endif
--- a/buffered_file.c
+++ b/buffered_file.c
@@ -23,11 +23,7 @@

 typedef struct QEMUFileBuffered
 {
-    BufferedPutFunc *put_buffer;
-    BufferedPutReadyFunc *put_ready;
-    BufferedWaitForUnfreezeFunc *wait_for_unfreeze;
-    BufferedCloseFunc *close;
-    void *opaque;
+    MigrationState *migration_state;
    QEMUFile *file;
    int freeze_output;
    size_t bytes_xfer;
@@ -50,70 +46,60 @@ static void buffered_append(QEMUFileBuffered *s,
                            const uint8_t *buf, size_t size)
 {
    if (size > (s->buffer_capacity - s->buffer_size)) {
-        void *tmp;
-
        DPRINTF("increasing buffer capacity from %zu by %zu\n",
                s->buffer_capacity, size + 1024);

        s->buffer_capacity += size + 1024;

-        tmp = g_realloc(s->buffer, s->buffer_capacity);
-        if (tmp == NULL) {
-            fprintf(stderr, "qemu file buffer expansion failed\n");
-            exit(1);
-        }
-
-        s->buffer = tmp;
+        s->buffer = g_realloc(s->buffer, s->buffer_capacity);
    }

    memcpy(s->buffer + s->buffer_size, buf, size);
    s->buffer_size += size;
 }

-static void buffered_flush(QEMUFileBuffered *s)
+static ssize_t buffered_flush(QEMUFileBuffered *s)
 {
    size_t offset = 0;
-    int error;
-
-    error = qemu_file_get_error(s->file);
-    if (error != 0) {
-        DPRINTF("flush when error, bailing: %s\n", strerror(-error));
-        return;
-    }
+    ssize_t ret = 0;

    DPRINTF("flushing %zu byte(s) of data\n", s->buffer_size);

-    while (offset < s->buffer_size) {
-        ssize_t ret;
-
-        ret = s->put_buffer(s->opaque, s->buffer + offset,
-                            s->buffer_size - offset);
+    while (s->bytes_xfer < s->xfer_limit && offset < s->buffer_size) {
+        size_t to_send = MIN(s->buffer_size - offset, s->xfer_limit - s->bytes_xfer);
+        ret = migrate_fd_put_buffer(s->migration_state, s->buffer + offset,
+                                    to_send);
        if (ret == -EAGAIN) {
            DPRINTF("backend not ready, freezing\n");
+            ret = 0;
            s->freeze_output = 1;
            break;
        }

        if (ret <= 0) {
            DPRINTF("error flushing data, %zd\n", ret);
-            qemu_file_set_error(s->file, ret);
            break;
        } else {
            DPRINTF("flushed %zd byte(s)\n", ret);
            offset += ret;
+            s->bytes_xfer += ret;
        }
    }

    DPRINTF("flushed %zu of %zu byte(s)\n", offset, s->buffer_size);
    memmove(s->buffer, s->buffer + offset, s->buffer_size - offset);
    s->buffer_size -= offset;
+
+    if (ret < 0) {
+        return ret;
+    }
+    return offset;
 }

 static int buffered_put_buffer(void *opaque, const uint8_t *buf, int64_t pos, int size)
 {
    QEMUFileBuffered *s = opaque;
-    int offset = 0, error;
-    ssize_t ret;
+    ssize_t error;

    DPRINTF("putting %d bytes at %" PRId64 "\n", size, pos);

@@ -126,65 +112,54 @@ static int buffered_put_buffer(void *opaque, const uint8_t *buf, int64_t pos, in
    DPRINTF("unfreezing output\n");
    s->freeze_output = 0;

-    buffered_flush(s);
-
-    while (!s->freeze_output && offset < size) {
-        if (s->bytes_xfer > s->xfer_limit) {
-            DPRINTF("transfer limit exceeded when putting\n");
-            break;
-        }
-
-        ret = s->put_buffer(s->opaque, buf + offset, size - offset);
-        if (ret == -EAGAIN) {
-            DPRINTF("backend not ready, freezing\n");
-            s->freeze_output = 1;
-            break;
-        }
-
-        if (ret <= 0) {
-            DPRINTF("error putting\n");
-            qemu_file_set_error(s->file, ret);
-            offset = -EINVAL;
-            break;
-        }
-
-        DPRINTF("put %zd byte(s)\n", ret);
-        offset += ret;
-        s->bytes_xfer += ret;
+    if (size > 0) {
+        DPRINTF("buffering %d bytes\n", size - offset);
+        buffered_append(s, buf, size);
    }

-    if (offset >= 0) {
-        DPRINTF("buffering %d bytes\n", size - offset);
-        buffered_append(s, buf + offset, size - offset);
-        offset = size;
+    error = buffered_flush(s);
+    if (error < 0) {
+        DPRINTF("buffered flush error. bailing: %s\n", strerror(-error));
+        return error;
    }

    if (pos == 0 && size == 0) {
        DPRINTF("file is ready\n");
-        if (s->bytes_xfer <= s->xfer_limit) {
+        if (!s->freeze_output && s->bytes_xfer < s->xfer_limit) {
            DPRINTF("notifying client\n");
-            s->put_ready(s->opaque);
+            migrate_fd_put_ready(s->migration_state);
        }
    }

-    return offset;
+    return size;
 }

 static int buffered_close(void *opaque)
 {
    QEMUFileBuffered *s = opaque;
-    int ret;
+    ssize_t ret = 0;
+    int ret2;

    DPRINTF("closing\n");

+    s->xfer_limit = INT_MAX;
    while (!qemu_file_get_error(s->file) && s->buffer_size) {
-        buffered_flush(s);
-        if (s->freeze_output)
-            s->wait_for_unfreeze(s->opaque);
+        ret = buffered_flush(s);
+        if (ret < 0) {
+            break;
+        }
+        if (s->freeze_output) {
+            ret = migrate_fd_wait_for_unfreeze(s->migration_state);
+            if (ret < 0) {
+                break;
+            }
+        }
    }

-    ret = s->close(s->opaque);
-
+    ret2 = migrate_fd_close(s->migration_state);
+    if (ret >= 0) {
+        ret = ret2;
+    }
    qemu_del_timer(s->timer);
    qemu_free_timer(s->timer);
    g_free(s->buffer);
@@ -199,6 +174,13 @@ static int buffered_close(void *opaque)
 *   1: Time to stop
 *   negative: There has been an error
 */
+static int buffered_get_fd(void *opaque)
+{
+    QEMUFileBuffered *s = opaque;
+
+    return qemu_get_fd(s->file);
+}
+
 static int buffered_rate_limit(void *opaque)
 {
    QEMUFileBuffered *s = opaque;
@@ -256,34 +238,28 @@ static void buffered_rate_tick(void *opaque)

    s->bytes_xfer = 0;

-    buffered_flush(s);
-
-    /* Add some checks around this */
-    s->put_ready(s->opaque);
+    buffered_put_buffer(s, NULL, 0, 0);
 }

-QEMUFile *qemu_fopen_ops_buffered(void *opaque,
-                                  size_t bytes_per_sec,
-                                  BufferedPutFunc *put_buffer,
-                                  BufferedPutReadyFunc *put_ready,
-                                  BufferedWaitForUnfreezeFunc *wait_for_unfreeze,
-                                  BufferedCloseFunc *close)
+static const QEMUFileOps buffered_file_ops = {
+    .get_fd =         buffered_get_fd,
+    .put_buffer =     buffered_put_buffer,
+    .close =          buffered_close,
+    .rate_limit =     buffered_rate_limit,
+    .get_rate_limit = buffered_get_rate_limit,
+    .set_rate_limit = buffered_set_rate_limit,
+};
+
+QEMUFile *qemu_fopen_ops_buffered(MigrationState *migration_state)
 {
    QEMUFileBuffered *s;

    s = g_malloc0(sizeof(*s));

-    s->opaque = opaque;
-    s->xfer_limit = bytes_per_sec / 10;
-    s->put_buffer = put_buffer;
-    s->put_ready = put_ready;
-    s->wait_for_unfreeze = wait_for_unfreeze;
-    s->close = close;
+    s->migration_state = migration_state;
+    s->xfer_limit = migration_state->bandwidth_limit / 10;

-    s->file = qemu_fopen_ops(s, buffered_put_buffer, NULL,
-                             buffered_close, buffered_rate_limit,
-                             buffered_set_rate_limit,
-			     buffered_get_rate_limit);
+    s->file = qemu_fopen_ops(s, &buffered_file_ops);

    s->timer = qemu_new_timer_ms(rt_clock, buffered_rate_tick, s);

--- a/buffered_file.h
+++ b/buffered_file.h
@@ -15,16 +15,8 @@
 #define QEMU_BUFFERED_FILE_H

 #include "hw/hw.h"
+#include "migration.h"

-typedef ssize_t (BufferedPutFunc)(void *opaque, const void *data, size_t size);
-typedef void (BufferedPutReadyFunc)(void *opaque);
-typedef void (BufferedWaitForUnfreezeFunc)(void *opaque);
-typedef int (BufferedCloseFunc)(void *opaque);
-
-QEMUFile *qemu_fopen_ops_buffered(void *opaque, size_t xfer_limit,
-                                  BufferedPutFunc *put_buffer,
-                                  BufferedPutReadyFunc *put_ready,
-                                  BufferedWaitForUnfreezeFunc *wait_for_unfreeze,
-                                  BufferedCloseFunc *close);
+QEMUFile *qemu_fopen_ops_buffered(MigrationState *migration_state);

 #endif
--- a/compiler.h
+++ b/compiler.h
@@ -44,14 +44,12 @@
   /* Use gnu_printf when supported (qemu uses standard format strings). */
 #  define GCC_ATTR __attribute__((__unused__, format(gnu_printf, 1, 2)))
 #  define GCC_FMT_ATTR(n, m) __attribute__((format(gnu_printf, n, m)))
+#  if defined(_WIN32)
+    /* Map __printf__ to __gnu_printf__ because we want standard format strings
+     * even when MinGW or GLib include files use __printf__. */
+#   define __printf__ __gnu_printf__
+#  endif
 # endif
-#if defined(_WIN32)
-#define GCC_WEAK __attribute__((weak))
-#define GCC_WEAK_DECL GCC_WEAK
-#else
-#define GCC_WEAK __attribute__((weak))
-#define GCC_WEAK_DECL
-#endif
 #else
 #define GCC_ATTR /**/
 #define GCC_FMT_ATTR(n, m)
--- a/366
+++ b/366
@@ -111,14 +111,12 @@ source_path=`dirname "$0"`
 cpu=""
 interp_prefix="/usr/gnemul/qemu-%M"
 static="no"
-sparc_cpu=""
 cross_prefix=""
 audio_drv_list=""
 audio_card_list="ac97 es1370 sb16 hda"
 audio_possible_cards="ac97 es1370 sb16 cs4231a adlib gus hda"
 block_drv_whitelist=""
 host_cc="gcc"
-helper_cflags=""
 libs_softmmu=""
 libs_tools=""
 audio_pt_int=""
@@ -127,7 +125,8 @@ cc_i386=i386-pc-linux-gnu-gcc
 libs_qga=""
 debug_info="yes"

-target_list=""
+# Don't accept a target_list environment variable.
+unset target_list

 # Default value for a variable defining feature "foo".
 #  * foo="no"  feature will only be used if --enable-foo arg is given
@@ -148,6 +147,7 @@ curses=""
 docs=""
 fdt=""
 nptl=""
+pixman=""
 sdl=""
 virtfs=""
 vnc="yes"
@@ -183,8 +183,10 @@ datadir="\${prefix}/share"
 qemu_docdir="\${prefix}/share/doc/qemu"
 bindir="\${prefix}/bin"
 libdir="\${prefix}/lib"
+libexecdir="\${prefix}/libexec"
 includedir="\${prefix}/include"
 sysconfdir="\${prefix}/etc"
+local_statedir="\${prefix}/var"
 confsuffix="/qemu"
 slirp="yes"
 fmod_lib=""
@@ -198,7 +200,7 @@ cocoa="no"
 softmmu="yes"
 linux_user="no"
 bsd_user="no"
-guest_base=""
+guest_base="yes"
 uname_release=""
 mixemu="no"
 aix="no"
@@ -216,9 +218,11 @@ usb_redir=""
 opengl=""
 zlib="yes"
 guest_agent="yes"
+want_tools="yes"
 libiscsi=""
 coroutine=""
 seccomp=""
+glusterfs=""

 # parse CC options first
 for opt do
@@ -240,21 +244,6 @@ for opt do
  ;;
  --disable-debug-info) debug_info="no"
  ;;
-  --sparc_cpu=*)
-    sparc_cpu="$optarg"
-    case $sparc_cpu in
-    v7|v8|v8plus|v8plusa)
-      cpu="sparc"
-    ;;
-    v9)
-      cpu="sparc64"
-    ;;
-    *)
-      echo "undefined SPARC architecture. Exiting";
-      exit 1
-    ;;
-    esac
-  ;;
  esac
 done
 # OS specific
@@ -342,8 +331,6 @@ elif check_define __i386__ ; then
 elif check_define __x86_64__ ; then
  cpu="x86_64"
 elif check_define __sparc__ ; then
-  # We can't check for 64 bit (when gcc is biarch) or V8PLUSA
-  # They must be specified using --sparc_cpu
  if check_define __arch64__ ; then
    cpu="sparc64"
  else
@@ -569,6 +556,7 @@ EOF
  qemu_docdir="\${prefix}"
  bindir="\${prefix}"
  sysconfdir="\${prefix}"
+  local_statedir="\${prefix}"
  confsuffix=""
  libs_qga="-lws2_32 -lwinmm -lpowrprof $libs_qga"
 fi
@@ -633,6 +621,8 @@ for opt do
  ;;
  --libdir=*) libdir="$optarg"
  ;;
+  --libexecdir=*) libexecdir="$optarg"
+  ;;
  --includedir=*) includedir="$optarg"
  ;;
  --datadir=*) datadir="$optarg"
@@ -643,7 +633,9 @@ for opt do
  ;;
  --sysconfdir=*) sysconfdir="$optarg"
  ;;
-  --sbindir=*|--libexecdir=*|--sharedstatedir=*|--localstatedir=*|\
+  --localstatedir=*) local_statedir="$optarg"
+  ;;
+  --sbindir=*|--sharedstatedir=*|\
  --oldincludedir=*|--datarootdir=*|--infodir=*|--localedir=*|\
  --htmldir=*|--dvidir=*|--pdfdir=*|--psdir=*)
    # These switches are silently ignored, for compatibility with
@@ -651,6 +643,10 @@ for opt do
    # configure to be used by RPM and similar macros that set
    # lots of directory switches by default.
  ;;
+  --with-system-pixman) pixman="system"
+  ;;
+  --without-system-pixman) pixman="internal"
+  ;;
  --disable-sdl) sdl="no"
  ;;
  --enable-sdl) sdl="yes"
@@ -789,8 +785,6 @@ for opt do
  ;;
  --enable-uname-release=*) uname_release="$optarg"
  ;;
-  --sparc_cpu=*)
-  ;;
  --enable-werror) werror="yes"
  ;;
  --disable-werror) werror="no"
@@ -865,92 +859,53 @@ for opt do
  ;;
  --disable-guest-agent) guest_agent="no"
  ;;
+  --enable-tools) want_tools="yes"
+  ;;
+  --disable-tools) want_tools="no"
+  ;;
  --enable-seccomp) seccomp="yes"
  ;;
  --disable-seccomp) seccomp="no"
  ;;
+  --disable-glusterfs) glusterfs="no"
+  ;;
+  --enable-glusterfs) glusterfs="yes"
+  ;;
  *) echo "ERROR: unknown option $opt"; show_help="yes"
  ;;
  esac
 done

-#
-# If cpu ~= sparc and  sparc_cpu hasn't been defined, plug in the right
-# QEMU_CFLAGS/LDFLAGS (assume sparc_v8plus for 32-bit and sparc_v9 for 64-bit)
-#
-host_guest_base="no"
 case "$cpu" in
-    sparc) case $sparc_cpu in
-           v7|v8)
-             QEMU_CFLAGS="-mcpu=${sparc_cpu} -D__sparc_${sparc_cpu}__ $QEMU_CFLAGS"
-           ;;
-           v8plus|v8plusa)
-             QEMU_CFLAGS="-mcpu=ultrasparc -D__sparc_${sparc_cpu}__ $QEMU_CFLAGS"
-           ;;
-           *) # sparc_cpu not defined in the command line
-             QEMU_CFLAGS="-mcpu=ultrasparc -D__sparc_v8plus__ $QEMU_CFLAGS"
-           esac
+    sparc)
           LDFLAGS="-m32 $LDFLAGS"
-           QEMU_CFLAGS="-m32 -ffixed-g2 -ffixed-g3 $QEMU_CFLAGS"
-           if test "$solaris" = "no" ; then
-             QEMU_CFLAGS="-ffixed-g1 -ffixed-g6 $QEMU_CFLAGS"
-             helper_cflags="-ffixed-i0"
-           fi
+           QEMU_CFLAGS="-m32 -mcpu=ultrasparc $QEMU_CFLAGS"
           ;;
    sparc64)
-           QEMU_CFLAGS="-m64 -mcpu=ultrasparc -D__sparc_v9__ $QEMU_CFLAGS"
           LDFLAGS="-m64 $LDFLAGS"
-           QEMU_CFLAGS="-ffixed-g5 -ffixed-g6 -ffixed-g7 $QEMU_CFLAGS"
-           if test "$solaris" != "no" ; then
-             QEMU_CFLAGS="-ffixed-g1 $QEMU_CFLAGS"
-           fi
+           QEMU_CFLAGS="-m64 -mcpu=ultrasparc $QEMU_CFLAGS"
           ;;
    s390)
           QEMU_CFLAGS="-m31 -march=z990 $QEMU_CFLAGS"
           LDFLAGS="-m31 $LDFLAGS"
-           host_guest_base="yes"
           ;;
    s390x)
           QEMU_CFLAGS="-m64 -march=z990 $QEMU_CFLAGS"
           LDFLAGS="-m64 $LDFLAGS"
-           host_guest_base="yes"
           ;;
    i386)
           QEMU_CFLAGS="-m32 $QEMU_CFLAGS"
           LDFLAGS="-m32 $LDFLAGS"
           cc_i386='$(CC) -m32'
-           helper_cflags="-fomit-frame-pointer"
-           host_guest_base="yes"
           ;;
    x86_64)
           QEMU_CFLAGS="-m64 $QEMU_CFLAGS"
           LDFLAGS="-m64 $LDFLAGS"
           cc_i386='$(CC) -m32'
-           host_guest_base="yes"
-           ;;
-    arm*)
-           host_guest_base="yes"
-           ;;
-    ppc*)
-           host_guest_base="yes"
-           ;;
-    mips*)
-           host_guest_base="yes"
-           ;;
-    ia64*)
-           host_guest_base="yes"
-           ;;
-    hppa*)
-           host_guest_base="yes"
-           ;;
-    unicore32*)
-           host_guest_base="yes"
           ;;
+    # No special flags required for other host CPUs
 esac

-[ -z "$guest_base" ] && guest_base="$host_guest_base"
-
-
 default_target_list=""

 # these targets are portable
@@ -1055,6 +1010,7 @@ echo "  --datadir=PATH           install firmware in PATH$confsuffix"
 echo "  --docdir=PATH            install documentation in PATH$confsuffix"
 echo "  --bindir=PATH            install binaries in PATH"
 echo "  --sysconfdir=PATH        install config in PATH$confsuffix"
+echo "  --localstatedir=PATH     install local state in PATH"
 echo "  --with-confsuffix=SUFFIX suffix for QEMU data inside datadir and sysconfdir [$confsuffix]"
 echo "  --enable-debug-tcg       enable TCG debugging"
 echo "  --disable-debug-tcg      disable TCG debugging (default)"
@@ -1161,6 +1117,8 @@ echo "  --disable-seccomp        disable seccomp support"
 echo "  --enable-seccomp         enables seccomp support"
 echo "  --with-coroutine=BACKEND coroutine backend. Supported options:"
 echo "                           gthread, ucontext, sigaltstack, windows"
+echo "  --enable-glusterfs       enable GlusterFS backend"
+echo "  --disable-glusterfs      disable GlusterFS backend"
 echo ""
 echo "NOTE: The object files are built at the place where configure is launched"
 exit 1
@@ -1207,6 +1165,7 @@ gcc_flags="-Wold-style-declaration -Wold-style-definition -Wtype-limits"
 gcc_flags="-Wformat-security -Wformat-y2k -Winit-self -Wignored-qualifiers $gcc_flags"
 gcc_flags="-Wmissing-include-dirs -Wempty-body -Wnested-externs $gcc_flags"
 gcc_flags="-fstack-protector-all -Wendif-labels $gcc_flags"
+gcc_flags="-Wno-initializer-overrides $gcc_flags"
 # Note that we do not add -Werror to gcc_flags here, because that would
 # enable it for all configure tests. If a configure test failed due
 # to -Werror this would just silently disable some features,
@@ -1215,11 +1174,30 @@ cat > $TMPC << EOF
 int main(void) { return 0; }
 EOF
 for flag in $gcc_flags; do
-    if compile_prog "-Werror $flag" "" ; then
+    # Use the positive sense of the flag when testing for -Wno-wombat
+    # support (gcc will happily accept the -Wno- form of unknown
+    # warning options).
+    optflag="$(echo $flag | sed -e 's/^-Wno-/-W/')"
+    if compile_prog "-Werror $optflag" "" ; then
 	QEMU_CFLAGS="$QEMU_CFLAGS $flag"
    fi
 done

+# Workaround for http://gcc.gnu.org/PR55489.  Happens with -fPIE/-fPIC and
+# large functions that use global variables.  The bug is in all releases of
+# GCC, but it became particularly acute in 4.6.x and 4.7.x.  It is fixed in
+# 4.7.3 and 4.8.0.  We should be able to delete this at the end of 2013.
+cat > $TMPC << EOF
+#if __GNUC__ == 4 && (__GNUC_MINOR__ == 6 || (__GNUC_MINOR__ == 7 && __GNUC_PATCHLEVEL__ <= 2))
+int main(void) { return 0; }
+#else
+#error No bug in this compiler.
+#endif
+EOF
+if compile_prog "-Werror -fno-gcse" "" ; then
+  TRANSLATE_OPT_CFLAGS=-fno-gcse
+fi
+
 if test "$static" = "yes" ; then
  if test "$pie" = "yes" ; then
    echo "static and pie are mutually incompatible"
@@ -1314,15 +1292,11 @@ if ! "$python" -c 'import sys; sys.exit(sys.version_info < (2,4) or sys.version_
  exit 1
 fi

-if test -z "$target_list" ; then
+if test -z "${target_list+xxx}" ; then
    target_list="$default_target_list"
 else
    target_list=`echo "$target_list" | sed -e 's/,/ /g'`
 fi
-if test -z "$target_list" ; then
-    echo "No targets enabled"
-    exit 1
-fi
 # see if system emulation was really requested
 case " $target_list " in
  *"-softmmu "*) softmmu=yes
@@ -1347,7 +1321,7 @@ if test -z "$cross_prefix" ; then
 # big/little endian test
 cat > $TMPC << EOF
 #include <inttypes.h>
-int main(int argc, char ** argv){
+int main(void) {
        volatile uint32_t i=0x01234567;
        return (*((uint8_t*)(&i))) == 0x67;
 }
@@ -1424,14 +1398,14 @@ fi
 # libseccomp check

 if test "$seccomp" != "no" ; then
-    if $pkg_config libseccomp --modversion >/dev/null 2>&1; then
+    if $pkg_config --atleast-version=1.0.0 libseccomp --modversion >/dev/null 2>&1; then
        LIBS=`$pkg_config --libs libseccomp`
 	seccomp="yes"
    else
-	seccomp="no"
 	if test "$seccomp" = "yes"; then
            feature_not_found "libseccomp"
 	fi
+	seccomp="no"
    fi
 fi
 ##########################################
@@ -2140,6 +2114,33 @@ else
    exit 1
 fi

+##########################################
+# pixman support probe
+
+if test "$pixman" = ""; then
+  if $pkg_config pixman-1 > /dev/null 2>&1; then
+    pixman="system"
+  else
+    pixman="internal"
+  fi
+fi
+if test "$pixman" = "system"; then
+  pixman_cflags=`$pkg_config --cflags pixman-1 2>/dev/null`
+  pixman_libs=`$pkg_config --libs pixman-1 2>/dev/null`
+else
+  if test ! -d ${source_path}/pixman/pixman; then
+    echo "ERROR: pixman not present. Your options:"
+    echo "  (1) Prefered: Install the pixman devel package (any recent"
+    echo "      distro should have packages as Xorg needs pixman too)."
+    echo "  (2) Fetch the pixman submodule, using:"
+    echo "      git submodule update --init pixman"
+    exit 1
+  fi
+  mkdir -p pixman/pixman
+  pixman_cflags="-I\$(SRC_PATH)/pixman/pixman -I\$(BUILD_DIR)/pixman/pixman"
+  pixman_libs="-L\$(BUILD_DIR)/pixman/pixman/.libs -lpixman-1"
+fi
+
 ##########################################
 # libcap probe

@@ -2340,6 +2341,29 @@ EOF
  fi
 fi

+##########################################
+# glusterfs probe
+if test "$glusterfs" != "no" ; then
+  cat > $TMPC <<EOF
+#include <glusterfs/api/glfs.h>
+int main(void) {
+    (void) glfs_new("volume");
+    return 0;
+}
+EOF
+  glusterfs_libs="-lgfapi -lgfrpc -lgfxdr"
+  if compile_prog "" "$glusterfs_libs" ; then
+    glusterfs=yes
+    libs_tools="$glusterfs_libs $libs_tools"
+    libs_softmmu="$glusterfs_libs $libs_softmmu"
+  else
+    if test "$glusterfs" = "yes" ; then
+      feature_not_found "GlusterFS backend support"
+    fi
+    glusterfs=no
+  fi
+fi
+
 #
 # Check for xxxat() functions when we are building linux-user
 # emulator.  This is done because older glibc versions don't
@@ -2426,8 +2450,7 @@ cat > $TMPC << EOF
 int main(void)
 {
    int pipefd[2];
-    pipe2(pipefd, O_CLOEXEC);
-    return 0;
+    return pipe2(pipefd, O_CLOEXEC);
 }
 EOF
 if compile_prog "" "" ; then
@@ -2667,18 +2690,45 @@ EOF
 fi


+##########################################
+# Do we need libm
+cat > $TMPC << EOF
+#include <math.h>
+int main(void) { return isnan(sin(0.0)); }
+EOF
+if compile_prog "" "" ; then
+  :
+elif compile_prog "" "-lm" ; then
+  LIBS="-lm $LIBS"
+  libs_qga="-lm $libs_qga"
+else
+  echo
+  echo "Error: libm check failed"
+  echo
+  exit 1
+fi
+
 ##########################################
 # Do we need librt
+# uClibc provides 2 versions of clock_gettime(), one with realtime
+# support and one without. This means that the clock_gettime() don't
+# need -lrt. We still need it for timer_create() so we check for this
+# function in addition.
 cat > $TMPC <<EOF
 #include <signal.h>
 #include <time.h>
-int main(void) { return clock_gettime(CLOCK_REALTIME, NULL); }
+int main(void) {
+  timer_create(CLOCK_REALTIME, NULL, NULL);
+  return clock_gettime(CLOCK_REALTIME, NULL);
+}
 EOF

 if compile_prog "" "" ; then
  :
-elif compile_prog "" "-lrt" ; then
+# we need pthread for static linking. use previous pthread test result
+elif compile_prog "" "-lrt $pthread_lib" ; then
  LIBS="-lrt $LIBS"
+  libs_qga="-lrt $libs_qga"
 fi

 if test "$darwin" != "yes" -a "$mingw32" != "yes" -a "$solaris" != yes -a \
@@ -2695,12 +2745,14 @@ int main(void) { spice_server_new(); return 0; }
 EOF
  spice_cflags=$($pkg_config --cflags spice-protocol spice-server 2>/dev/null)
  spice_libs=$($pkg_config --libs spice-protocol spice-server 2>/dev/null)
-  if $pkg_config --atleast-version=0.8.2 spice-server >/dev/null 2>&1 && \
-     $pkg_config --atleast-version=0.8.1 spice-protocol > /dev/null 2>&1 && \
+  if $pkg_config --atleast-version=0.12.0 spice-server >/dev/null 2>&1 && \
+     $pkg_config --atleast-version=0.12.2 spice-protocol > /dev/null 2>&1 && \
     compile_prog "$spice_cflags" "$spice_libs" ; then
    spice="yes"
    libs_softmmu="$libs_softmmu $spice_libs"
    QEMU_CFLAGS="$QEMU_CFLAGS $spice_cflags"
+    spice_protocol_version=$($pkg_config --modversion spice-protocol)
+    spice_server_version=$($pkg_config --modversion spice-server)
  else
    if test "$spice" = "yes" ; then
      feature_not_found "spice"
@@ -2750,12 +2802,12 @@ fi

 # check for usbredirparser for usb network redirection support
 if test "$usb_redir" != "no" ; then
-    if $pkg_config --atleast-version=0.3.4 libusbredirparser >/dev/null 2>&1 ; then
+    if $pkg_config --atleast-version=0.5.3 libusbredirparser-0.5 >/dev/null 2>&1 ; then
        usb_redir="yes"
-        usb_redir_cflags=$($pkg_config --cflags libusbredirparser 2>/dev/null)
-        usb_redir_libs=$($pkg_config --libs libusbredirparser 2>/dev/null)
+        usb_redir_cflags=$($pkg_config --cflags libusbredirparser-0.5 2>/dev/null)
+        usb_redir_libs=$($pkg_config --libs libusbredirparser-0.5 2>/dev/null)
        QEMU_CFLAGS="$QEMU_CFLAGS $usb_redir_cflags"
-        LIBS="$LIBS $usb_redir_libs"
+        libs_softmmu="$libs_softmmu $usb_redir_libs"
    else
        if test "$usb_redir" = "yes"; then
            feature_not_found "usb-redir"
@@ -2811,6 +2863,24 @@ if compile_prog "" "" ; then
    posix_madvise=yes
 fi

+##########################################
+# check if we have usable SIGEV_THREAD_ID
+
+sigev_thread_id=no
+cat > $TMPC << EOF
+#include <signal.h>
+int main(void) {
+  struct sigevent ev;
+  ev.sigev_notify = SIGEV_THREAD_ID;
+  ev._sigev_un._tid = 0;
+  asm volatile("" : : "g"(&ev));
+  return 0;
+}
+EOF
+if compile_prog "" "" ; then
+    sigev_thread_id=yes
+fi
+
 ##########################################
 # check if trace backend exists

@@ -2868,7 +2938,7 @@ static int sfaa(int *ptr)
  return __sync_fetch_and_and(ptr, 0);
 }

-int main(int argc, char **argv)
+int main(void)
 {
  int val = 42;
  sfaa(&val);
@@ -2963,11 +3033,12 @@ if compile_prog "-Werror" "" ; then
 fi

 ########################################
-# check if we have valgrind/valgrind.h
+# check if we have valgrind/valgrind.h and valgrind/memcheck.h

 valgrind_h=no
 cat > $TMPC << EOF
 #include <valgrind/valgrind.h>
+#include <valgrind/memcheck.h>
 int main(void) {
  return 0;
 }
@@ -3039,9 +3110,14 @@ fi
 qemu_confdir=$sysconfdir$confsuffix
 qemu_datadir=$datadir$confsuffix

-tools=
-if test "$softmmu" = yes ; then
+tools=""
+if test "$want_tools" = "yes" ; then
  tools="qemu-img\$(EXESUF) qemu-io\$(EXESUF) $tools"
+  if [ "$linux" = "yes" -o "$bsd" = "yes" -o "$solaris" = "yes" ] ; then
+    tools="qemu-nbd\$(EXESUF) $tools"
+  fi
+fi
+if test "$softmmu" = yes ; then
  if test "$virtfs" != no ; then
    if test "$cap" = yes && test "$linux" = yes && test "$attr" = yes ; then
      virtfs=yes
@@ -3055,14 +3131,13 @@ if test "$softmmu" = yes ; then
    fi
  fi
  if [ "$linux" = "yes" -o "$bsd" = "yes" -o "$solaris" = "yes" ] ; then
-      tools="qemu-nbd\$(EXESUF) $tools"
    if [ "$guest_agent" = "yes" ]; then
      tools="qemu-ga\$(EXESUF) $tools"
    fi
  fi
-fi
-if test "$smartcard_nss" = "yes" ; then
-  tools="vscclient\$(EXESUF) $tools"
+  if test "$smartcard_nss" = "yes" ; then
+    tools="vscclient\$(EXESUF) $tools"
+  fi
 fi

 # Mac OS X ships with a broken assembler
@@ -3076,12 +3151,18 @@ if test "$cpu" = "ppc64" -a "$targetos" != "Darwin" ; then
  roms="$roms spapr-rtas"
 fi

+# add pixman flags after all config tests are done
+QEMU_CFLAGS="$QEMU_CFLAGS $pixman_cflags"
+libs_softmmu="$libs_softmmu $pixman_libs"
+
 echo "Install prefix    $prefix"
 echo "BIOS directory    `eval echo $qemu_datadir`"
 echo "binary directory  `eval echo $bindir`"
 echo "library directory `eval echo $libdir`"
+echo "libexec directory `eval echo $libexecdir`"
 echo "include directory `eval echo $includedir`"
 echo "config directory  `eval echo $sysconfdir`"
+echo "local state directory   `eval echo $local_statedir`"
 if test "$mingw32" = "no" ; then
 echo "Manual directory  `eval echo $mandir`"
 echo "ELF interp prefix $interp_prefix"
@@ -3112,6 +3193,7 @@ echo "-Werror enabled   $werror"
 if test "$darwin" = "yes" ; then
    echo "Cocoa support     $cocoa"
 fi
+echo "pixman            $pixman"
 echo "SDL support       $sdl"
 echo "curses support    $curses"
 echo "curl support      $curl"
@@ -3151,12 +3233,13 @@ echo "preadv support    $preadv"
 echo "fdatasync         $fdatasync"
 echo "madvise           $madvise"
 echo "posix_madvise     $posix_madvise"
+echo "sigev_thread_id   $sigev_thread_id"
 echo "uuid support      $uuid"
 echo "libcap-ng support $cap_ng"
 echo "vhost-net support $vhost_net"
 echo "Trace backend     $trace_backend"
 echo "Trace output file $trace_file-<pid>"
-echo "spice support     $spice"
+echo "spice support     $spice ($spice_protocol_version/$spice_server_version)"
 echo "rbd support       $rbd"
 echo "xfsctl support    $xfs"
 echo "nss used          $smartcard_nss"
@@ -3166,6 +3249,7 @@ echo "libiscsi support  $libiscsi"
 echo "build guest agent $guest_agent"
 echo "seccomp support   $seccomp"
 echo "coroutine backend $coroutine_backend"
+echo "GlusterFS support $glusterfs"

 if test "$sdl_too_old" = "yes"; then
 echo "-> Your SDL version is too old - please upgrade to have SDL support"
@@ -3183,14 +3267,15 @@ echo all: >> $config_host_mak
 echo "prefix=$prefix" >> $config_host_mak
 echo "bindir=$bindir" >> $config_host_mak
 echo "libdir=$libdir" >> $config_host_mak
+echo "libexecdir=$libexecdir" >> $config_host_mak
 echo "includedir=$includedir" >> $config_host_mak
 echo "mandir=$mandir" >> $config_host_mak
 echo "sysconfdir=$sysconfdir" >> $config_host_mak
 echo "qemu_confdir=$qemu_confdir" >> $config_host_mak
 echo "qemu_datadir=$qemu_datadir" >> $config_host_mak
 echo "qemu_docdir=$qemu_docdir" >> $config_host_mak
-echo "libexecdir=\${prefix}/libexec" >> $config_host_mak
-echo "CONFIG_QEMU_HELPERDIR=\"$prefix/libexec\"" >> $config_host_mak
+echo "qemu_localstatedir=$local_statedir" >> $config_host_mak
+echo "qemu_helperdir=$libexecdir" >> $config_host_mak

 echo "ARCH=$ARCH" >> $config_host_mak
 if test "$debug_tcg" = "yes" ; then
@@ -3433,6 +3518,9 @@ fi
 if test "$posix_madvise" = "yes" ; then
  echo "CONFIG_POSIX_MADVISE=y" >> $config_host_mak
 fi
+if test "$sigev_thread_id" = "yes" ; then
+  echo "CONFIG_SIGEV_THREAD_ID=y" >> $config_host_mak
+fi

 if test "$spice" = "yes" ; then
  echo "CONFIG_SPICE=y" >> $config_host_mak
@@ -3504,6 +3592,10 @@ if test "$has_environ" = "yes" ; then
  echo "CONFIG_HAS_ENVIRON=y" >> $config_host_mak
 fi

+if test "$glusterfs" = "yes" ; then
+  echo "CONFIG_GLUSTERFS=y" >> $config_host_mak
+fi
+
 # USB host support
 case "$usb" in
 linux)
@@ -3572,7 +3664,11 @@ if test "$sparse" = "yes" ; then
  echo "HOST_CC      := REAL_CC=\"\$(HOST_CC)\" cgcc"  >> $config_host_mak
  echo "QEMU_CFLAGS  += -Wbitwise -Wno-transparent-union -Wno-old-initializer -Wno-non-pointer-null" >> $config_host_mak
 fi
-echo "HELPER_CFLAGS=$helper_cflags" >> $config_host_mak
+if test "$cross_prefix" != ""; then
+  echo "AUTOCONF_HOST := --host=${cross_prefix%-}"     >> $config_host_mak
+else
+  echo "AUTOCONF_HOST := "                             >> $config_host_mak
+fi
 echo "LDFLAGS=$LDFLAGS" >> $config_host_mak
 echo "ARLIBS_BEGIN=$arlibs_begin" >> $config_host_mak
 echo "ARLIBS_END=$arlibs_end" >> $config_host_mak
@@ -3581,6 +3677,7 @@ echo "LIBS_TOOLS+=$libs_tools" >> $config_host_mak
 echo "EXESUF=$EXESUF" >> $config_host_mak
 echo "LIBS_QGA+=$libs_qga" >> $config_host_mak
 echo "POD2MAN=$POD2MAN" >> $config_host_mak
+echo "TRANSLATE_OPT_CFLAGS=$TRANSLATE_OPT_CFLAGS" >> $config_host_mak

 # generate list of library paths for linker script

@@ -3683,15 +3780,12 @@ TARGET_ABI_DIR=""

 case "$target_arch2" in
  i386)
-    target_phys_bits=64
  ;;
  x86_64)
    TARGET_BASE_ARCH=i386
-    target_phys_bits=64
    target_long_alignment=8
  ;;
  alpha)
-    target_phys_bits=64
    target_long_alignment=8
    target_nptl="yes"
  ;;
@@ -3700,22 +3794,18 @@ case "$target_arch2" in
    bflt="yes"
    target_nptl="yes"
    gdb_xml_files="arm-core.xml arm-vfp.xml arm-vfp3.xml arm-neon.xml"
-    target_phys_bits=64
    target_llong_alignment=4
    target_libs_softmmu="$fdt_libs"
  ;;
  cris)
    target_nptl="yes"
-    target_phys_bits=32
  ;;
  lm32)
-    target_phys_bits=32
    target_libs_softmmu="$opengl_libs"
  ;;
  m68k)
    bflt="yes"
    gdb_xml_files="cf-core.xml cf-fp.xml"
-    target_phys_bits=32
    target_int_alignment=2
    target_long_alignment=2
    target_llong_alignment=2
@@ -3724,36 +3814,30 @@ case "$target_arch2" in
    TARGET_ARCH=microblaze
    bflt="yes"
    target_nptl="yes"
-    target_phys_bits=32
    target_libs_softmmu="$fdt_libs"
  ;;
  mips|mipsel)
    TARGET_ARCH=mips
    echo "TARGET_ABI_MIPSO32=y" >> $config_target_mak
    target_nptl="yes"
-    target_phys_bits=64
  ;;
  mipsn32|mipsn32el)
    TARGET_ARCH=mipsn32
    TARGET_BASE_ARCH=mips
    echo "TARGET_ABI_MIPSN32=y" >> $config_target_mak
-    target_phys_bits=64
  ;;
  mips64|mips64el)
    TARGET_ARCH=mips64
    TARGET_BASE_ARCH=mips
    echo "TARGET_ABI_MIPSN64=y" >> $config_target_mak
-    target_phys_bits=64
    target_long_alignment=8
  ;;
  or32)
    TARGET_ARCH=openrisc
    TARGET_BASE_ARCH=openrisc
-    target_phys_bits=32
  ;;
  ppc)
    gdb_xml_files="power-core.xml power-fpu.xml power-altivec.xml power-spe.xml"
-    target_phys_bits=64
    target_nptl="yes"
    target_libs_softmmu="$fdt_libs"
  ;;
@@ -3761,7 +3845,6 @@ case "$target_arch2" in
    TARGET_BASE_ARCH=ppc
    TARGET_ABI_DIR=ppc
    gdb_xml_files="power-core.xml power-fpu.xml power-altivec.xml power-spe.xml"
-    target_phys_bits=64
    target_nptl="yes"
    target_libs_softmmu="$fdt_libs"
  ;;
@@ -3769,7 +3852,6 @@ case "$target_arch2" in
    TARGET_BASE_ARCH=ppc
    TARGET_ABI_DIR=ppc
    gdb_xml_files="power64-core.xml power-fpu.xml power-altivec.xml power-spe.xml"
-    target_phys_bits=64
    target_long_alignment=8
    target_libs_softmmu="$fdt_libs"
  ;;
@@ -3779,21 +3861,17 @@ case "$target_arch2" in
    TARGET_ABI_DIR=ppc
    echo "TARGET_ABI32=y" >> $config_target_mak
    gdb_xml_files="power64-core.xml power-fpu.xml power-altivec.xml power-spe.xml"
-    target_phys_bits=64
    target_libs_softmmu="$fdt_libs"
  ;;
  sh4|sh4eb)
    TARGET_ARCH=sh4
    bflt="yes"
    target_nptl="yes"
-    target_phys_bits=32
  ;;
  sparc)
-    target_phys_bits=64
  ;;
  sparc64)
    TARGET_BASE_ARCH=sparc
-    target_phys_bits=64
    target_long_alignment=8
  ;;
  sparc32plus)
@@ -3801,19 +3879,15 @@ case "$target_arch2" in
    TARGET_BASE_ARCH=sparc
    TARGET_ABI_DIR=sparc
    echo "TARGET_ABI32=y" >> $config_target_mak
-    target_phys_bits=64
  ;;
  s390x)
    target_nptl="yes"
-    target_phys_bits=64
    target_long_alignment=8
  ;;
  unicore32)
-    target_phys_bits=32
  ;;
  xtensa|xtensaeb)
    TARGET_ARCH=xtensa
-    target_phys_bits=32
  ;;
  *)
    echo "Unsupported target CPU"
@@ -3827,17 +3901,19 @@ fi

 symlink "$source_path/Makefile.target" "$target_dir/Makefile"

-
-case "$target_arch2" in
-  alpha | i386 | or32 | sparc* | x86_64 | xtensa* | ppc*)
-    echo "CONFIG_TCG_PASS_AREG0=y" >> $config_target_mak
-  ;;
-esac
-
 upper() {
    echo "$@"| LC_ALL=C tr '[a-z]' '[A-Z]'
 }

+case "$cpu" in
+  i386|x86_64|ppc)
+    # The TCG interpreter currently does not support ld/st optimization.
+    if test "$tcg_interpreter" = "no" ; then
+        echo "CONFIG_QEMU_LDST_OPTIMIZATION=y" >> $config_target_mak
+    fi
+  ;;
+esac
+
 echo "TARGET_SHORT_ALIGNMENT=$target_short_alignment" >> $config_target_mak
 echo "TARGET_INT_ALIGNMENT=$target_int_alignment" >> $config_target_mak
 echo "TARGET_LONG_ALIGNMENT=$target_long_alignment" >> $config_target_mak
@@ -3855,7 +3931,6 @@ echo "TARGET_ABI_DIR=$TARGET_ABI_DIR" >> $config_target_mak
 case "$target_arch2" in
  i386|x86_64)
    if test "$xen" = "yes" -a "$target_softmmu" = "yes" ; then
-      target_phys_bits=64
      echo "CONFIG_XEN=y" >> $config_target_mak
      if test "$xen_pci_passthrough" = yes; then
        echo "CONFIG_XEN_PCI_PASSTHROUGH=y" >> "$config_target_mak"
@@ -3895,11 +3970,8 @@ if test "$target_bigendian" = "yes" ; then
  echo "TARGET_WORDS_BIGENDIAN=y" >> $config_target_mak
 fi
 if test "$target_softmmu" = "yes" ; then
-  echo "TARGET_PHYS_ADDR_BITS=$target_phys_bits" >> $config_target_mak
  echo "CONFIG_SOFTMMU=y" >> $config_target_mak
  echo "LIBS+=$libs_softmmu $target_libs_softmmu" >> $config_target_mak
-  echo "HWDIR=../libhw$target_phys_bits" >> $config_target_mak
-  echo "subdir-$target: subdir-libhw$target_phys_bits" >> $config_host_mak
  if test "$smartcard_nss" = "yes" ; then
    echo "subdir-$target: subdir-libcacard" >> $config_host_mak
  fi
@@ -4085,10 +4157,6 @@ fi

 if test "$target_linux_user" = "yes" -o "$target_bsd_user" = "yes" ; then
  case "$ARCH" in
-  sparc)
-    # -static is used to avoid g1/g3 usage by the dynamic linker
-    ldflags="$linker_script -static $ldflags"
-    ;;
  alpha | s390x)
    # The default placement of the application is fine.
    ;;
@@ -4104,6 +4172,10 @@ echo "QEMU_INCLUDES+=$includes" >> $config_target_mak

 done # for target in $targets

+if [ "$pixman" = "internal" ]; then
+  echo "config-host.h: subdir-pixman" >> $config_host_mak
+fi
+
 # build tree in object directory in case the source is not in the current directory
 DIRS="tests tests/tcg tests/tcg/cris tests/tcg/lm32"
 DIRS="$DIRS pc-bios/optionrom pc-bios/spapr-rtas"
@@ -4145,12 +4217,6 @@ for rom in seabios vgabios ; do
    echo "LD=$ld" >> $config_mak
 done

-for hwlib in 32 64; do
-  d=libhw$hwlib
-  symlink "$source_path/Makefile.hw" "$d/Makefile"
-  echo "QEMU_CFLAGS+=-DTARGET_PHYS_ADDR_BITS=$hwlib" > $d/config.mak
-done
-
 d=libuser
 symlink "$source_path/Makefile.user" "$d/Makefile"

--- a/console.c
+++ b/console.c
@@ -24,6 +24,7 @@
 #include "qemu-common.h"
 #include "console.h"
 #include "qemu-timer.h"
+#include "qmp-commands.h"

 //#define DEBUG_CONSOLE
 #define DEFAULT_BACKSCROLL 512
@@ -113,20 +114,20 @@ typedef enum {
    TEXT_CONSOLE_FIXED_SIZE
 } console_type_t;

-/* ??? This is mis-named.
-   It is used for both text and graphical consoles.  */
-struct TextConsole {
+struct QemuConsole {
    int index;
    console_type_t console_type;
    DisplayState *ds;
+
    /* Graphic console state.  */
    vga_hw_update_ptr hw_update;
    vga_hw_invalidate_ptr hw_invalidate;
    vga_hw_screen_dump_ptr hw_screen_dump;
    vga_hw_text_update_ptr hw_text_update;
    void *hw;
-
    int g_width, g_height;
+
+    /* Text console state */
    int width;
    int height;
    int total_height;
@@ -160,8 +161,8 @@ struct TextConsole {
 };

 static DisplayState *display_state;
-static TextConsole *active_console;
-static TextConsole *consoles[MAX_CONSOLES];
+static QemuConsole *active_console;
+static QemuConsole *consoles[MAX_CONSOLES];
 static int nb_consoles = 0;

 void vga_hw_update(void)
@@ -176,9 +177,9 @@ void vga_hw_invalidate(void)
        active_console->hw_invalidate(active_console->hw);
 }

-void vga_hw_screen_dump(const char *filename)
+void qmp_screendump(const char *filename, Error **errp)
 {
-    TextConsole *previous_active_console;
+    QemuConsole *previous_active_console;
    bool cswitch;

    previous_active_console = active_console;
@@ -190,9 +191,9 @@ void vga_hw_screen_dump(const char *filename)
        console_select(0);
    }
    if (consoles[0] && consoles[0]->hw_screen_dump) {
-        consoles[0]->hw_screen_dump(consoles[0]->hw, filename, cswitch);
+        consoles[0]->hw_screen_dump(consoles[0]->hw, filename, cswitch, errp);
    } else {
-        error_report("screen dump not implemented");
+        error_setg(errp, "device doesn't support screendump\n");
    }

    if (cswitch) {
@@ -520,7 +521,7 @@ static void vga_putcharxy(DisplayState *ds, int x, int y, int ch,
    }
 }

-static void text_console_resize(TextConsole *s)
+static void text_console_resize(QemuConsole *s)
 {
    TextCell *cells, *c, *c1;
    int w1, x, y, last_width;
@@ -552,7 +553,7 @@ static void text_console_resize(TextConsole *s)
    s->cells = cells;
 }

-static inline void text_update_xy(TextConsole *s, int x, int y)
+static inline void text_update_xy(QemuConsole *s, int x, int y)
 {
    s->text_x[0] = MIN(s->text_x[0], x);
    s->text_x[1] = MAX(s->text_x[1], x);
@@ -560,7 +561,7 @@ static inline void text_update_xy(TextConsole *s, int x, int y)
    s->text_y[1] = MAX(s->text_y[1], y);
 }

-static void invalidate_xy(TextConsole *s, int x, int y)
+static void invalidate_xy(QemuConsole *s, int x, int y)
 {
    if (s->update_x0 > x * FONT_WIDTH)
        s->update_x0 = x * FONT_WIDTH;
@@ -572,7 +573,7 @@ static void invalidate_xy(TextConsole *s, int x, int y)
        s->update_y1 = (y + 1) * FONT_HEIGHT;
 }

-static void update_xy(TextConsole *s, int x, int y)
+static void update_xy(QemuConsole *s, int x, int y)
 {
    TextCell *c;
    int y1, y2;
@@ -596,7 +597,7 @@ static void update_xy(TextConsole *s, int x, int y)
    }
 }

-static void console_show_cursor(TextConsole *s, int show)
+static void console_show_cursor(QemuConsole *s, int show)
 {
    TextCell *c;
    int y, y1;
@@ -630,42 +631,45 @@ static void console_show_cursor(TextConsole *s, int show)
    }
 }

-static void console_refresh(TextConsole *s)
+static void console_refresh(QemuConsole *s)
 {
    TextCell *c;
    int x, y, y1;

    if (s != active_console)
        return;
-    if (!ds_get_bits_per_pixel(s->ds)) {
+
+    if (s->ds->have_text) {
        s->text_x[0] = 0;
        s->text_y[0] = 0;
        s->text_x[1] = s->width - 1;
        s->text_y[1] = s->height - 1;
        s->cursor_invalidate = 1;
-        return;
    }

-    vga_fill_rect(s->ds, 0, 0, ds_get_width(s->ds), ds_get_height(s->ds),
-                  color_table[0][COLOR_BLACK]);
-    y1 = s->y_displayed;
-    for(y = 0; y < s->height; y++) {
-        c = s->cells + y1 * s->width;
-        for(x = 0; x < s->width; x++) {
-            vga_putcharxy(s->ds, x, y, c->ch,
-                          &(c->t_attrib));
-            c++;
+    if (s->ds->have_gfx) {
+        vga_fill_rect(s->ds, 0, 0, ds_get_width(s->ds), ds_get_height(s->ds),
+                      color_table[0][COLOR_BLACK]);
+        y1 = s->y_displayed;
+        for (y = 0; y < s->height; y++) {
+            c = s->cells + y1 * s->width;
+            for (x = 0; x < s->width; x++) {
+                vga_putcharxy(s->ds, x, y, c->ch,
+                              &(c->t_attrib));
+                c++;
+            }
+            if (++y1 == s->total_height) {
+                y1 = 0;
+            }
        }
-        if (++y1 == s->total_height)
-            y1 = 0;
+        console_show_cursor(s, 1);
+        dpy_gfx_update(s->ds, 0, 0, ds_get_width(s->ds), ds_get_height(s->ds));
    }
-    console_show_cursor(s, 1);
-    dpy_update(s->ds, 0, 0, ds_get_width(s->ds), ds_get_height(s->ds));
 }

 static void console_scroll(int ydelta)
 {
-    TextConsole *s;
+    QemuConsole *s;
    int i, y1;

    s = active_console;
@@ -697,7 +701,7 @@ static void console_scroll(int ydelta)
    console_refresh(s);
 }

-static void console_put_lf(TextConsole *s)
+static void console_put_lf(QemuConsole *s)
 {
    TextCell *c;
    int x, y1;
@@ -748,7 +752,7 @@ static void console_put_lf(TextConsole *s)
 * NOTE: I know this code is not very efficient (checking every color for it
 * self) but it is more readable and better maintainable.
 */
-static void console_handle_escape(TextConsole *s)
+static void console_handle_escape(QemuConsole *s)
 {
    int i;

@@ -841,7 +845,7 @@ static void console_handle_escape(TextConsole *s)
    }
 }

-static void console_clear_xy(TextConsole *s, int x, int y)
+static void console_clear_xy(QemuConsole *s, int x, int y)
 {
    int y1 = (s->y_base + y) % s->total_height;
    TextCell *c = &s->cells[y1 * s->width + x];
@@ -851,7 +855,7 @@ static void console_clear_xy(TextConsole *s, int x, int y)
 }

 /* set cursor, checking bounds */
-static void set_cursor(TextConsole *s, int x, int y)
+static void set_cursor(QemuConsole *s, int x, int y)
 {
    if (x < 0) {
        x = 0;
@@ -870,7 +874,7 @@ static void set_cursor(TextConsole *s, int x, int y)
    s->y = y;
 }

-static void console_putchar(TextConsole *s, int ch)
+static void console_putchar(QemuConsole *s, int ch)
 {
    TextCell *c;
    int y1, i;
@@ -937,8 +941,11 @@ static void console_putchar(TextConsole *s, int ch)
    case TTY_STATE_CSI: /* handle escape sequence parameters */
        if (ch >= '0' && ch <= '9') {
            if (s->nb_esc_params < MAX_ESC_PARAMS) {
-                s->esc_params[s->nb_esc_params] =
-                    s->esc_params[s->nb_esc_params] * 10 + ch - '0';
+                int *param = &s->esc_params[s->nb_esc_params];
+                int digit = (ch - '0');
+
+                *param = (*param <= (INT_MAX - digit) / 10) ?
+                         *param * 10 + digit : INT_MAX;
            }
        } else {
            if (s->nb_esc_params < MAX_ESC_PARAMS)
@@ -1074,7 +1081,7 @@ static void console_putchar(TextConsole *s, int ch)

 void console_select(unsigned int index)
 {
-    TextConsole *s;
+    QemuConsole *s;

    if (index >= MAX_CONSOLES)
        return;
@@ -1090,24 +1097,24 @@ void console_select(unsigned int index)
            qemu_del_timer(active_console->cursor_timer);
        }
        active_console = s;
-        if (ds_get_bits_per_pixel(s->ds)) {
+        if (ds->have_gfx) {
            ds->surface = qemu_resize_displaysurface(ds, s->g_width, s->g_height);
-        } else {
-            s->ds->surface->width = s->width;
-            s->ds->surface->height = s->height;
+            dpy_gfx_resize(ds);
+        }
+        if (ds->have_text) {
+            dpy_text_resize(ds, s->width, s->height);
        }
        if (s->cursor_timer) {
            qemu_mod_timer(s->cursor_timer,
                   qemu_get_clock_ms(rt_clock) + CONSOLE_CURSOR_PERIOD / 2);
        }
-        dpy_resize(s->ds);
        vga_hw_invalidate();
    }
 }

 static int console_puts(CharDriverState *chr, const uint8_t *buf, int len)
 {
-    TextConsole *s = chr->opaque;
+    QemuConsole *s = chr->opaque;
    int i;

    s->update_x0 = s->width * FONT_WIDTH;
@@ -1119,17 +1126,17 @@ static int console_puts(CharDriverState *chr, const uint8_t *buf, int len)
        console_putchar(s, buf[i]);
    }
    console_show_cursor(s, 1);
-    if (ds_get_bits_per_pixel(s->ds) && s->update_x0 < s->update_x1) {
-        dpy_update(s->ds, s->update_x0, s->update_y0,
-                   s->update_x1 - s->update_x0,
-                   s->update_y1 - s->update_y0);
+    if (s->ds->have_gfx && s->update_x0 < s->update_x1) {
+        dpy_gfx_update(s->ds, s->update_x0, s->update_y0,
+                       s->update_x1 - s->update_x0,
+                       s->update_y1 - s->update_y0);
    }
    return len;
 }

 static void kbd_send_chars(void *opaque)
 {
-    TextConsole *s = opaque;
+    QemuConsole *s = opaque;
    int len;
    uint8_t buf[16];

@@ -1152,7 +1159,7 @@ static void kbd_send_chars(void *opaque)
 /* called when an ascii key is pressed */
 void kbd_put_keysym(int keysym)
 {
-    TextConsole *s;
+    QemuConsole *s;
    uint8_t buf[16], *q;
    int c;

@@ -1207,7 +1214,7 @@ void kbd_put_keysym(int keysym)

 static void text_console_invalidate(void *opaque)
 {
-    TextConsole *s = (TextConsole *) opaque;
+    QemuConsole *s = (QemuConsole *) opaque;
    if (!ds_get_bits_per_pixel(s->ds) && s->console_type == TEXT_CONSOLE) {
        s->g_width = ds_get_width(s->ds);
        s->g_height = ds_get_height(s->ds);
@@ -1218,7 +1225,7 @@ static void text_console_invalidate(void *opaque)

 static void text_console_update(void *opaque, console_ch_t *chardata)
 {
-    TextConsole *s = (TextConsole *) opaque;
+    QemuConsole *s = (QemuConsole *) opaque;
    int i, j, src;

    if (s->text_x[0] <= s->text_x[1]) {
@@ -1230,23 +1237,23 @@ static void text_console_update(void *opaque, console_ch_t *chardata)
                                (s->cells[src].t_attrib.fgcol << 12) |
                                (s->cells[src].t_attrib.bgcol << 8) |
                                (s->cells[src].t_attrib.bold << 21));
-        dpy_update(s->ds, s->text_x[0], s->text_y[0],
-                   s->text_x[1] - s->text_x[0], i - s->text_y[0]);
+        dpy_text_update(s->ds, s->text_x[0], s->text_y[0],
+                        s->text_x[1] - s->text_x[0], i - s->text_y[0]);
        s->text_x[0] = s->width;
        s->text_y[0] = s->height;
        s->text_x[1] = 0;
        s->text_y[1] = 0;
    }
    if (s->cursor_invalidate) {
-        dpy_cursor(s->ds, s->x, s->y);
+        dpy_text_cursor(s->ds, s->x, s->y);
        s->cursor_invalidate = 0;
    }
 }

-static TextConsole *get_graphic_console(DisplayState *ds)
+static QemuConsole *get_graphic_console(DisplayState *ds)
 {
    int i;
-    TextConsole *s;
+    QemuConsole *s;
    for (i = 0; i < nb_consoles; i++) {
        s = consoles[i];
        if (s->console_type == GRAPHIC_CONSOLE && s->ds == ds)
@@ -1255,14 +1262,14 @@ static TextConsole *get_graphic_console(DisplayState *ds)
    return NULL;
 }

-static TextConsole *new_console(DisplayState *ds, console_type_t console_type)
+static QemuConsole *new_console(DisplayState *ds, console_type_t console_type)
 {
-    TextConsole *s;
+    QemuConsole *s;
    int i;

    if (nb_consoles >= MAX_CONSOLES)
        return NULL;
-    s = g_malloc0(sizeof(TextConsole));
+    s = g_malloc0(sizeof(QemuConsole));
    if (!active_console || ((active_console->console_type != GRAPHIC_CONSOLE) &&
        (console_type == GRAPHIC_CONSOLE))) {
        active_console = s;
@@ -1287,85 +1294,86 @@ static TextConsole *new_console(DisplayState *ds, console_type_t console_type)
    return s;
 }

-static DisplaySurface* defaultallocator_create_displaysurface(int width, int height)
+static void qemu_alloc_display(DisplaySurface *surface, int width, int height,
+                               int linesize, PixelFormat pf, int newflags)
 {
-    DisplaySurface *surface = (DisplaySurface*) g_malloc0(sizeof(DisplaySurface));
-
-    int linesize = width * 4;
-    qemu_alloc_display(surface, width, height, linesize,
-                       qemu_default_pixelformat(32), 0);
-    return surface;
-}
-
-static DisplaySurface* defaultallocator_resize_displaysurface(DisplaySurface *surface,
-                                          int width, int height)
-{
-    int linesize = width * 4;
-    qemu_alloc_display(surface, width, height, linesize,
-                       qemu_default_pixelformat(32), 0);
-    return surface;
-}
-
-void qemu_alloc_display(DisplaySurface *surface, int width, int height,
-                        int linesize, PixelFormat pf, int newflags)
-{
-    void *data;
-    surface->width = width;
-    surface->height = height;
-    surface->linesize = linesize;
    surface->pf = pf;
-    if (surface->flags & QEMU_ALLOCATED_FLAG) {
-        data = g_realloc(surface->data,
-                            surface->linesize * surface->height);
-    } else {
-        data = g_malloc(surface->linesize * surface->height);
-    }
-    surface->data = (uint8_t *)data;
+
+    qemu_pixman_image_unref(surface->image);
+    surface->image = NULL;
+
+    surface->format = qemu_pixman_get_format(&pf);
+    assert(surface->format != 0);
+    surface->image = pixman_image_create_bits(surface->format,
+                                              width, height,
+                                              NULL, linesize);
+    assert(surface->image != NULL);
+
    surface->flags = newflags | QEMU_ALLOCATED_FLAG;
 #ifdef HOST_WORDS_BIGENDIAN
    surface->flags |= QEMU_BIG_ENDIAN_FLAG;
 #endif
 }

-DisplaySurface* qemu_create_displaysurface_from(int width, int height, int bpp,
-                                              int linesize, uint8_t *data)
+DisplaySurface *qemu_create_displaysurface(DisplayState *ds,
+                                           int width, int height)
 {
-    DisplaySurface *surface = (DisplaySurface*) g_malloc0(sizeof(DisplaySurface));
+    DisplaySurface *surface = g_new0(DisplaySurface, 1);
+
+    int linesize = width * 4;
+    qemu_alloc_display(surface, width, height, linesize,
+                       qemu_default_pixelformat(32), 0);
+    return surface;
+}
+
+DisplaySurface *qemu_resize_displaysurface(DisplayState *ds,
+                                           int width, int height)
+{
+    int linesize = width * 4;
+
+    trace_displaysurface_resize(ds, ds->surface, width, height);
+    qemu_alloc_display(ds->surface, width, height, linesize,
+                       qemu_default_pixelformat(32), 0);
+    return ds->surface;
+}
+
+DisplaySurface *qemu_create_displaysurface_from(int width, int height, int bpp,
+                                                int linesize, uint8_t *data)
+{
+    DisplaySurface *surface = g_new0(DisplaySurface, 1);

-    surface->width = width;
-    surface->height = height;
-    surface->linesize = linesize;
    surface->pf = qemu_default_pixelformat(bpp);
+
+    surface->format = qemu_pixman_get_format(&surface->pf);
+    assert(surface->format != 0);
+    surface->image = pixman_image_create_bits(surface->format,
+                                              width, height,
+                                              (void *)data, linesize);
+    assert(surface->image != NULL);
+
 #ifdef HOST_WORDS_BIGENDIAN
    surface->flags = QEMU_BIG_ENDIAN_FLAG;
 #endif
-    surface->data = data;

    return surface;
 }

-static void defaultallocator_free_displaysurface(DisplaySurface *surface)
+void qemu_free_displaysurface(DisplayState *ds)
 {
-    if (surface == NULL)
+    trace_displaysurface_free(ds, ds->surface);
+    if (ds->surface == NULL) {
        return;
-    if (surface->flags & QEMU_ALLOCATED_FLAG)
-        g_free(surface->data);
-    g_free(surface);
+    }
+    qemu_pixman_image_unref(ds->surface->image);
+    g_free(ds->surface);
 }

-static struct DisplayAllocator default_allocator = {
-    defaultallocator_create_displaysurface,
-    defaultallocator_resize_displaysurface,
-    defaultallocator_free_displaysurface
-};
-
 static void dumb_display_init(void)
 {
    DisplayState *ds = g_malloc0(sizeof(DisplayState));
    int width = 640;
    int height = 480;

-    ds->allocator = &default_allocator;
    if (is_fixedsize_console()) {
        width = active_console->g_width;
        height = active_console->g_height;
@@ -1395,29 +1403,16 @@ DisplayState *get_displaystate(void)
    return display_state;
 }

-DisplayAllocator *register_displayallocator(DisplayState *ds, DisplayAllocator *da)
-{
-    if(ds->allocator ==  &default_allocator) {
-        DisplaySurface *surf;
-        surf = da->create_displaysurface(ds_get_width(ds), ds_get_height(ds));
-        defaultallocator_free_displaysurface(ds->surface);
-        ds->surface = surf;
-        ds->allocator = da;
-    }
-    return ds->allocator;
-}
-
 DisplayState *graphic_console_init(vga_hw_update_ptr update,
                                   vga_hw_invalidate_ptr invalidate,
                                   vga_hw_screen_dump_ptr screen_dump,
                                   vga_hw_text_update_ptr text_update,
                                   void *opaque)
 {
-    TextConsole *s;
+    QemuConsole *s;
    DisplayState *ds;

    ds = (DisplayState *) g_malloc0(sizeof(DisplayState));
-    ds->allocator = &default_allocator; 
    ds->surface = qemu_create_displaysurface(ds, 640, 480);

    s = new_console(ds, GRAPHIC_CONSOLE);
@@ -1459,14 +1454,14 @@ void console_color_init(DisplayState *ds)

 static void text_console_set_echo(CharDriverState *chr, bool echo)
 {
-    TextConsole *s = chr->opaque;
+    QemuConsole *s = chr->opaque;

    s->echo = echo;
 }

 static void text_console_update_cursor(void *opaque)
 {
-    TextConsole *s = opaque;
+    QemuConsole *s = opaque;

    s->cursor_visible_phase = !s->cursor_visible_phase;
    vga_hw_invalidate();
@@ -1476,7 +1471,7 @@ static void text_console_update_cursor(void *opaque)

 static void text_console_do_init(CharDriverState *chr, DisplayState *ds)
 {
-    TextConsole *s;
+    QemuConsole *s;
    static int color_inited;

    s = chr->opaque;
@@ -1539,7 +1534,7 @@ static void text_console_do_init(CharDriverState *chr, DisplayState *ds)
 CharDriverState *text_console_init(QemuOpts *opts)
 {
    CharDriverState *chr;
-    TextConsole *s;
+    QemuConsole *s;
    unsigned width;
    unsigned height;

@@ -1585,14 +1580,14 @@ void text_consoles_set_display(DisplayState *ds)

 void qemu_console_resize(DisplayState *ds, int width, int height)
 {
-    TextConsole *s = get_graphic_console(ds);
+    QemuConsole *s = get_graphic_console(ds);
    if (!s) return;

    s->g_width = width;
    s->g_height = height;
    if (is_graphic_console()) {
        ds->surface = qemu_resize_displaysurface(ds, width, height);
-        dpy_resize(ds);
+        dpy_gfx_resize(ds);
    }
 }

@@ -1600,7 +1595,7 @@ void qemu_console_copy(DisplayState *ds, int src_x, int src_y,
                       int dst_x, int dst_y, int w, int h)
 {
    if (is_graphic_console()) {
-        dpy_copy(ds, src_x, src_y, dst_x, dst_y, w, h);
+        dpy_gfx_copy(ds, src_x, src_y, dst_x, dst_y, w, h);
    }
 }

@@ -1611,7 +1606,7 @@ PixelFormat qemu_different_endianness_pixelformat(int bpp)
    memset(&pf, 0x00, sizeof(PixelFormat));

    pf.bits_per_pixel = bpp;
-    pf.bytes_per_pixel = bpp / 8;
+    pf.bytes_per_pixel = DIV_ROUND_UP(bpp, 8);
    pf.depth = bpp == 32 ? 24 : bpp;

    switch (bpp) {
@@ -1660,13 +1655,12 @@ PixelFormat qemu_default_pixelformat(int bpp)
    memset(&pf, 0x00, sizeof(PixelFormat));

    pf.bits_per_pixel = bpp;
-    pf.bytes_per_pixel = bpp / 8;
+    pf.bytes_per_pixel = DIV_ROUND_UP(bpp, 8);
    pf.depth = bpp == 32 ? 24 : bpp;

    switch (bpp) {
        case 15:
            pf.bits_per_pixel = 16;
-            pf.bytes_per_pixel = 2;
            pf.rmask = 0x00007c00;
            pf.gmask = 0x000003E0;
            pf.bmask = 0x0000001F;
@@ -1712,18 +1706,15 @@ PixelFormat qemu_default_pixelformat(int bpp)
            pf.rmask = 0x00FF0000;
            pf.gmask = 0x0000FF00;
            pf.bmask = 0x000000FF;
-            pf.amax = 255;
            pf.rmax = 255;
            pf.gmax = 255;
            pf.bmax = 255;
-            pf.ashift = 24;
            pf.rshift = 16;
            pf.gshift = 8;
            pf.bshift = 0;
            pf.rbits = 8;
            pf.gbits = 8;
            pf.bbits = 8;
-            pf.abits = 8;
            break;
        default:
            break;
--- a/console.h
+++ b/console.h
@@ -2,10 +2,13 @@
 #define CONSOLE_H

 #include "qemu-char.h"
+#include "qemu-pixman.h"
 #include "qdict.h"
 #include "notify.h"
 #include "monitor.h"
 #include "trace.h"
+#include "qapi-types.h"
+#include "error.h"

 /* keyboard/mouse support */

@@ -105,7 +108,6 @@ void kbd_put_keysym(int keysym);

 #define QEMU_BIG_ENDIAN_FLAG    0x01
 #define QEMU_ALLOCATED_FLAG     0x02
-#define QEMU_REALPIXELS_FLAG    0x04

 struct PixelFormat {
    uint8_t bits_per_pixel;
@@ -118,11 +120,9 @@ struct PixelFormat {
 };

 struct DisplaySurface {
+    pixman_format_code_t format;
+    pixman_image_t *image;
    uint8_t flags;
-    int width;
-    int height;
-    int linesize;        /* bytes per line */
-    uint8_t *data;

    struct PixelFormat pf;
 };
@@ -152,35 +152,32 @@ struct DisplayChangeListener {
    int idle;
    uint64_t gui_timer_interval;

-    void (*dpy_update)(struct DisplayState *s, int x, int y, int w, int h);
-    void (*dpy_resize)(struct DisplayState *s);
-    void (*dpy_setdata)(struct DisplayState *s);
    void (*dpy_refresh)(struct DisplayState *s);
-    void (*dpy_copy)(struct DisplayState *s, int src_x, int src_y,
-                     int dst_x, int dst_y, int w, int h);
-    void (*dpy_fill)(struct DisplayState *s, int x, int y,
-                     int w, int h, uint32_t c);
+
+    void (*dpy_gfx_update)(struct DisplayState *s, int x, int y, int w, int h);
+    void (*dpy_gfx_resize)(struct DisplayState *s);
+    void (*dpy_gfx_setdata)(struct DisplayState *s);
+    void (*dpy_gfx_copy)(struct DisplayState *s, int src_x, int src_y,
+                         int dst_x, int dst_y, int w, int h);
+
    void (*dpy_text_cursor)(struct DisplayState *s, int x, int y);
+    void (*dpy_text_resize)(struct DisplayState *s, int w, int h);
+    void (*dpy_text_update)(struct DisplayState *s, int x, int y, int w, int h);

-    struct DisplayChangeListener *next;
-};
+    void (*dpy_mouse_set)(struct DisplayState *s, int x, int y, int on);
+    void (*dpy_cursor_define)(struct DisplayState *s, QEMUCursor *cursor);

-struct DisplayAllocator {
-    DisplaySurface* (*create_displaysurface)(int width, int height);
-    DisplaySurface* (*resize_displaysurface)(DisplaySurface *surface, int width, int height);
-    void (*free_displaysurface)(DisplaySurface *surface);
+    QLIST_ENTRY(DisplayChangeListener) next;
 };

 struct DisplayState {
    struct DisplaySurface *surface;
    void *opaque;
    struct QEMUTimer *gui_timer;
+    bool have_gfx;
+    bool have_text;

-    struct DisplayAllocator* allocator;
-    struct DisplayChangeListener* listeners;
-
-    void (*mouse_set)(int x, int y, int on);
-    void (*cursor_define)(QEMUCursor *cursor);
+    QLIST_HEAD(, DisplayChangeListener) listeners;

    struct DisplayState *next;
 };
@@ -189,29 +186,14 @@ void register_displaystate(DisplayState *ds);
 DisplayState *get_displaystate(void);
 DisplaySurface* qemu_create_displaysurface_from(int width, int height, int bpp,
                                                int linesize, uint8_t *data);
-void qemu_alloc_display(DisplaySurface *surface, int width, int height,
-                        int linesize, PixelFormat pf, int newflags);
 PixelFormat qemu_different_endianness_pixelformat(int bpp);
 PixelFormat qemu_default_pixelformat(int bpp);

-DisplayAllocator *register_displayallocator(DisplayState *ds, DisplayAllocator *da);
-
-static inline DisplaySurface* qemu_create_displaysurface(DisplayState *ds, int width, int height)
-{
-    return ds->allocator->create_displaysurface(width, height);    
-}
-
-static inline DisplaySurface* qemu_resize_displaysurface(DisplayState *ds, int width, int height)
-{
-    trace_displaysurface_resize(ds, ds->surface, width, height);
-    return ds->allocator->resize_displaysurface(ds->surface, width, height);
-}
-
-static inline void qemu_free_displaysurface(DisplayState *ds)
-{
-    trace_displaysurface_free(ds, ds->surface);
-    ds->allocator->free_displaysurface(ds->surface);
-}
+DisplaySurface *qemu_create_displaysurface(DisplayState *ds,
+                                           int width, int height);
+DisplaySurface *qemu_resize_displaysurface(DisplayState *ds,
+                                           int width, int height);
+void qemu_free_displaysurface(DisplayState *ds);

 static inline int is_surface_bgr(DisplaySurface *surface)
 {
@@ -223,109 +205,201 @@ static inline int is_surface_bgr(DisplaySurface *surface)

 static inline int is_buffer_shared(DisplaySurface *surface)
 {
-    return (!(surface->flags & QEMU_ALLOCATED_FLAG) &&
-            !(surface->flags & QEMU_REALPIXELS_FLAG));
+    return !(surface->flags & QEMU_ALLOCATED_FLAG);
 }

+void gui_setup_refresh(DisplayState *ds);
+
 static inline void register_displaychangelistener(DisplayState *ds, DisplayChangeListener *dcl)
 {
-    dcl->next = ds->listeners;
-    ds->listeners = dcl;
-}
-
-static inline void dpy_update(DisplayState *s, int x, int y, int w, int h)
-{
-    struct DisplayChangeListener *dcl = s->listeners;
-    while (dcl != NULL) {
-        dcl->dpy_update(s, x, y, w, h);
-        dcl = dcl->next;
+    QLIST_INSERT_HEAD(&ds->listeners, dcl, next);
+    gui_setup_refresh(ds);
+    if (dcl->dpy_gfx_resize) {
+        dcl->dpy_gfx_resize(ds);
    }
 }

-static inline void dpy_resize(DisplayState *s)
+static inline void unregister_displaychangelistener(DisplayState *ds,
+                                                    DisplayChangeListener *dcl)
 {
-    struct DisplayChangeListener *dcl = s->listeners;
-    while (dcl != NULL) {
-        dcl->dpy_resize(s);
-        dcl = dcl->next;
+    QLIST_REMOVE(dcl, next);
+    gui_setup_refresh(ds);
+}
+
+static inline void dpy_gfx_update(DisplayState *s, int x, int y, int w, int h)
+{
+    struct DisplayChangeListener *dcl;
+    QLIST_FOREACH(dcl, &s->listeners, next) {
+        if (dcl->dpy_gfx_update) {
+            dcl->dpy_gfx_update(s, x, y, w, h);
+        }
    }
 }

-static inline void dpy_setdata(DisplayState *s)
+static inline void dpy_gfx_resize(DisplayState *s)
 {
-    struct DisplayChangeListener *dcl = s->listeners;
-    while (dcl != NULL) {
-        if (dcl->dpy_setdata) dcl->dpy_setdata(s);
-        dcl = dcl->next;
+    struct DisplayChangeListener *dcl;
+    QLIST_FOREACH(dcl, &s->listeners, next) {
+        if (dcl->dpy_gfx_resize) {
+            dcl->dpy_gfx_resize(s);
+        }
+    }
+}
+
+static inline void dpy_gfx_setdata(DisplayState *s)
+{
+    struct DisplayChangeListener *dcl;
+    QLIST_FOREACH(dcl, &s->listeners, next) {
+        if (dcl->dpy_gfx_setdata) {
+            dcl->dpy_gfx_setdata(s);
+        }
    }
 }

 static inline void dpy_refresh(DisplayState *s)
 {
-    struct DisplayChangeListener *dcl = s->listeners;
-    while (dcl != NULL) {
-        if (dcl->dpy_refresh) dcl->dpy_refresh(s);
-        dcl = dcl->next;
+    struct DisplayChangeListener *dcl;
+    QLIST_FOREACH(dcl, &s->listeners, next) {
+        if (dcl->dpy_refresh) {
+            dcl->dpy_refresh(s);
+        }
    }
 }

-static inline void dpy_copy(struct DisplayState *s, int src_x, int src_y,
-                             int dst_x, int dst_y, int w, int h) {
-    struct DisplayChangeListener *dcl = s->listeners;
-    while (dcl != NULL) {
-        if (dcl->dpy_copy)
-            dcl->dpy_copy(s, src_x, src_y, dst_x, dst_y, w, h);
-        else /* TODO */
-            dcl->dpy_update(s, dst_x, dst_y, w, h);
-        dcl = dcl->next;
+static inline void dpy_gfx_copy(struct DisplayState *s, int src_x, int src_y,
+                             int dst_x, int dst_y, int w, int h)
+{
+    struct DisplayChangeListener *dcl;
+    QLIST_FOREACH(dcl, &s->listeners, next) {
+        if (dcl->dpy_gfx_copy) {
+            dcl->dpy_gfx_copy(s, src_x, src_y, dst_x, dst_y, w, h);
+        } else { /* TODO */
+            dcl->dpy_gfx_update(s, dst_x, dst_y, w, h);
+        }
    }
 }

-static inline void dpy_fill(struct DisplayState *s, int x, int y,
-                             int w, int h, uint32_t c) {
-    struct DisplayChangeListener *dcl = s->listeners;
-    while (dcl != NULL) {
-        if (dcl->dpy_fill) dcl->dpy_fill(s, x, y, w, h, c);
-        dcl = dcl->next;
+static inline void dpy_text_cursor(struct DisplayState *s, int x, int y)
+{
+    struct DisplayChangeListener *dcl;
+    QLIST_FOREACH(dcl, &s->listeners, next) {
+        if (dcl->dpy_text_cursor) {
+            dcl->dpy_text_cursor(s, x, y);
+        }
    }
 }

-static inline void dpy_cursor(struct DisplayState *s, int x, int y) {
-    struct DisplayChangeListener *dcl = s->listeners;
-    while (dcl != NULL) {
-        if (dcl->dpy_text_cursor) dcl->dpy_text_cursor(s, x, y);
-        dcl = dcl->next;
+static inline void dpy_text_update(DisplayState *s, int x, int y, int w, int h)
+{
+    struct DisplayChangeListener *dcl;
+    QLIST_FOREACH(dcl, &s->listeners, next) {
+        if (dcl->dpy_text_update) {
+            dcl->dpy_text_update(s, x, y, w, h);
+        }
    }
 }

+static inline void dpy_text_resize(DisplayState *s, int w, int h)
+{
+    struct DisplayChangeListener *dcl;
+    QLIST_FOREACH(dcl, &s->listeners, next) {
+        if (dcl->dpy_text_resize) {
+            dcl->dpy_text_resize(s, w, h);
+        }
+    }
+}
+
+static inline void dpy_mouse_set(struct DisplayState *s, int x, int y, int on)
+{
+    struct DisplayChangeListener *dcl;
+    QLIST_FOREACH(dcl, &s->listeners, next) {
+        if (dcl->dpy_mouse_set) {
+            dcl->dpy_mouse_set(s, x, y, on);
+        }
+    }
+}
+
+static inline void dpy_cursor_define(struct DisplayState *s, QEMUCursor *cursor)
+{
+    struct DisplayChangeListener *dcl;
+    QLIST_FOREACH(dcl, &s->listeners, next) {
+        if (dcl->dpy_cursor_define) {
+            dcl->dpy_cursor_define(s, cursor);
+        }
+    }
+}
+
+static inline bool dpy_cursor_define_supported(struct DisplayState *s)
+{
+    struct DisplayChangeListener *dcl;
+    QLIST_FOREACH(dcl, &s->listeners, next) {
+        if (dcl->dpy_cursor_define) {
+            return true;
+        }
+    }
+    return false;
+}
+
 static inline int ds_get_linesize(DisplayState *ds)
 {
-    return ds->surface->linesize;
+    return pixman_image_get_stride(ds->surface->image);
 }

 static inline uint8_t* ds_get_data(DisplayState *ds)
 {
-    return ds->surface->data;
+    return (void *)pixman_image_get_data(ds->surface->image);
 }

 static inline int ds_get_width(DisplayState *ds)
 {
-    return ds->surface->width;
+    return pixman_image_get_width(ds->surface->image);
 }

 static inline int ds_get_height(DisplayState *ds)
 {
-    return ds->surface->height;
+    return pixman_image_get_height(ds->surface->image);
 }

 static inline int ds_get_bits_per_pixel(DisplayState *ds)
 {
-    return ds->surface->pf.bits_per_pixel;
+    int bits = PIXMAN_FORMAT_BPP(ds->surface->format);
+    return bits;
 }

 static inline int ds_get_bytes_per_pixel(DisplayState *ds)
 {
-    return ds->surface->pf.bytes_per_pixel;
+    int bits = PIXMAN_FORMAT_BPP(ds->surface->format);
+    return (bits + 7) / 8;
+}
+
+static inline pixman_format_code_t ds_get_format(DisplayState *ds)
+{
+    return ds->surface->format;
+}
+
+static inline pixman_image_t *ds_get_image(DisplayState *ds)
+{
+    return ds->surface->image;
+}
+
+static inline int ds_get_depth(DisplayState *ds)
+{
+    return ds->surface->pf.depth;
+}
+
+static inline int ds_get_rmask(DisplayState *ds)
+{
+    return ds->surface->pf.rmask;
+}
+
+static inline int ds_get_gmask(DisplayState *ds)
+{
+    return ds->surface->pf.gmask;
+}
+
+static inline int ds_get_bmask(DisplayState *ds)
+{
+    return ds->surface->pf.bmask;
 }

 #ifdef CONFIG_CURSES
@@ -343,7 +417,8 @@ static inline void console_write_ch(console_ch_t *dest, uint32_t ch)

 typedef void (*vga_hw_update_ptr)(void *);
 typedef void (*vga_hw_invalidate_ptr)(void *);
-typedef void (*vga_hw_screen_dump_ptr)(void *, const char *, bool cswitch);
+typedef void (*vga_hw_screen_dump_ptr)(void *, const char *, bool cswitch,
+                                       Error **errp);
 typedef void (*vga_hw_text_update_ptr)(void *, console_ch_t *);

 DisplayState *graphic_console_init(vga_hw_update_ptr update,
@@ -354,7 +429,6 @@ DisplayState *graphic_console_init(vga_hw_update_ptr update,

 void vga_hw_update(void);
 void vga_hw_invalidate(void);
-void vga_hw_screen_dump(const char *filename);
 void vga_hw_text_update(console_ch_t *chardata);

 int is_graphic_console(void);
@@ -375,10 +449,8 @@ void cocoa_display_init(DisplayState *ds, int full_screen);

 /* vnc.c */
 void vnc_display_init(DisplayState *ds);
-void vnc_display_close(DisplayState *ds);
-int vnc_display_open(DisplayState *ds, const char *display);
+void vnc_display_open(DisplayState *ds, const char *display, Error **errp);
 void vnc_display_add_client(DisplayState *ds, int csock, int skipauth);
-int vnc_display_disable_login(DisplayState *ds);
 char *vnc_display_local_addr(DisplayState *ds);
 #ifdef CONFIG_VNC
 int vnc_display_password(DisplayState *ds, const char *password);
@@ -397,4 +469,8 @@ static inline int vnc_display_pw_expire(DisplayState *ds, time_t expires)
 /* curses.c */
 void curses_display_init(DisplayState *ds, int full_screen);

+/* input.c */
+int index_from_key(const char *key);
+int index_from_keycode(int code);
+
 #endif
--- a/coroutine-sigaltstack.c
+++ b/coroutine-sigaltstack.c
@@ -171,8 +171,8 @@ static Coroutine *coroutine_new(void)
    CoroutineThreadState *coTS;
    struct sigaction sa;
    struct sigaction osa;
-    struct sigaltstack ss;
-    struct sigaltstack oss;
+    stack_t ss;
+    stack_t oss;
    sigset_t sigs;
    sigset_t osigs;
    jmp_buf old_env;
--- a/cpu-all.h
+++ b/cpu-all.h
@@ -260,14 +260,6 @@ extern unsigned long reserved_va;
 #define stfl(p, v) stfl_raw(p, v)
 #define stfq(p, v) stfq_raw(p, v)

-#ifndef CONFIG_TCG_PASS_AREG0
-#define ldub_code(p) ldub_raw(p)
-#define ldsb_code(p) ldsb_raw(p)
-#define lduw_code(p) lduw_raw(p)
-#define ldsw_code(p) ldsw_raw(p)
-#define ldl_code(p) ldl_raw(p)
-#define ldq_code(p) ldq_raw(p)
-#else
 #define cpu_ldub_code(env1, p) ldub_raw(p)
 #define cpu_ldsb_code(env1, p) ldsb_raw(p)
 #define cpu_lduw_code(env1, p) lduw_raw(p)
@@ -296,7 +288,6 @@ extern unsigned long reserved_va;
 #define cpu_stw_kernel(env, addr, data) stw_raw(addr, data)
 #define cpu_stl_kernel(env, addr, data) stl_raw(addr, data)
 #define cpu_stq_kernel(env, addr, data) stq_raw(addr, data)
-#endif

 #define ldub_kernel(p) ldub_raw(p)
 #define ldsb_kernel(p) ldsb_raw(p)
@@ -313,7 +304,6 @@ extern unsigned long reserved_va;
 #define stfl_kernel(p, v) stfl_raw(p, v)
 #define stfq_kernel(p, vt) stfq_raw(p, v)

-#ifdef CONFIG_TCG_PASS_AREG0
 #define cpu_ldub_data(env, addr) ldub_raw(addr)
 #define cpu_lduw_data(env, addr) lduw_raw(addr)
 #define cpu_ldl_data(env, addr) ldl_raw(addr)
@@ -321,7 +311,6 @@ extern unsigned long reserved_va;
 #define cpu_stb_data(env, addr, data) stb_raw(addr, data)
 #define cpu_stw_data(env, addr, data) stw_raw(addr, data)
 #define cpu_stl_data(env, addr, data) stl_raw(addr, data)
-#endif
 #endif /* defined(CONFIG_USER_ONLY) */

 /* page related stuff */
@@ -367,6 +356,9 @@ CPUArchState *cpu_copy(CPUArchState *env);
 CPUArchState *qemu_get_cpu(int cpu);

 #define CPU_DUMP_CODE 0x00010000
+#define CPU_DUMP_FPU 0x00020000 /* dump FPU register state, not just integer */
+/* dump info about TCG QEMU's condition code optimization state */
+#define CPU_DUMP_CCOP 0x00040000

 void cpu_dump_state(CPUArchState *env, FILE *f, fprintf_function cpu_fprintf,
                    int flags);
@@ -446,8 +438,6 @@ void cpu_reset_interrupt(CPUArchState *env, int mask);

 void cpu_exit(CPUArchState *s);

-bool qemu_cpu_has_work(CPUArchState *env);
-
 /* Breakpoint/watchpoint flags */
 #define BP_MEM_READ           0x01
 #define BP_MEM_WRITE          0x02
@@ -474,15 +464,13 @@ void cpu_watchpoint_remove_all(CPUArchState *env, int mask);
 #define SSTEP_NOTIMER 0x4  /* Do not Timers while single stepping */

 void cpu_single_step(CPUArchState *env, int enabled);
-int cpu_is_stopped(CPUArchState *env);
-void run_on_cpu(CPUArchState *env, void (*func)(void *data), void *data);

 #if !defined(CONFIG_USER_ONLY)

 /* Return the physical page corresponding to a virtual one. Use it
   only for debugging because no protection checks are done. Return -1
   if no page found. */
-target_phys_addr_t cpu_get_phys_page_debug(CPUArchState *env, target_ulong addr);
+hwaddr cpu_get_phys_page_debug(CPUArchState *env, target_ulong addr);

 /* memory API */

@@ -508,7 +496,6 @@ typedef struct RAMBlock {
 typedef struct RAMList {
    uint8_t *phys_dirty;
    QLIST_HEAD(, RAMBlock) blocks;
-    uint64_t dirty_pages;
 } RAMList;
 extern RAMList ram_list;

@@ -526,6 +513,7 @@ extern int mem_prealloc;
 #define TLB_MMIO        (1 << 5)

 void dump_exec_info(FILE *f, fprintf_function cpu_fprintf);
+ram_addr_t last_ram_offset(void);
 #endif /* !CONFIG_USER_ONLY */

 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
--- a/cpu-common.h
+++ b/cpu-common.h
@@ -3,7 +3,7 @@

 /* CPU interfaces that are target independent.  */

-#include "targphys.h"
+#include "hwaddr.h"

 #ifndef NEED_CPU_H
 #include "poison.h"
@@ -21,7 +21,7 @@ enum device_endian {
 };

 /* address in the RAM (different from a physical address) */
-#if defined(CONFIG_XEN_BACKEND) && TARGET_PHYS_ADDR_BITS == 64
+#if defined(CONFIG_XEN_BACKEND)
 typedef uint64_t ram_addr_t;
 #  define RAM_ADDR_MAX UINT64_MAX
 #  define RAM_ADDR_FMT "%" PRIx64
@@ -33,43 +33,38 @@ typedef uintptr_t ram_addr_t;

 /* memory API */

-typedef void CPUWriteMemoryFunc(void *opaque, target_phys_addr_t addr, uint32_t value);
-typedef uint32_t CPUReadMemoryFunc(void *opaque, target_phys_addr_t addr);
+typedef void CPUWriteMemoryFunc(void *opaque, hwaddr addr, uint32_t value);
+typedef uint32_t CPUReadMemoryFunc(void *opaque, hwaddr addr);

 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length);
 /* This should only be used for ram local to a device.  */
 void *qemu_get_ram_ptr(ram_addr_t addr);
-void *qemu_ram_ptr_length(ram_addr_t addr, ram_addr_t *size);
-/* Same but slower, to use for migration, where the order of
- * RAMBlocks must not change. */
-void *qemu_safe_ram_ptr(ram_addr_t addr);
 void qemu_put_ram_ptr(void *addr);
 /* This should not be used by devices.  */
 int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr);
 ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr);
 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev);

-void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
+void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
                            int len, int is_write);
-static inline void cpu_physical_memory_read(target_phys_addr_t addr,
+static inline void cpu_physical_memory_read(hwaddr addr,
                                            void *buf, int len)
 {
    cpu_physical_memory_rw(addr, buf, len, 0);
 }
-static inline void cpu_physical_memory_write(target_phys_addr_t addr,
+static inline void cpu_physical_memory_write(hwaddr addr,
                                             const void *buf, int len)
 {
    cpu_physical_memory_rw(addr, (void *)buf, len, 1);
 }
-void *cpu_physical_memory_map(target_phys_addr_t addr,
-                              target_phys_addr_t *plen,
+void *cpu_physical_memory_map(hwaddr addr,
+                              hwaddr *plen,
                              int is_write);
-void cpu_physical_memory_unmap(void *buffer, target_phys_addr_t len,
-                               int is_write, target_phys_addr_t access_len);
+void cpu_physical_memory_unmap(void *buffer, hwaddr len,
+                               int is_write, hwaddr access_len);
 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque));
-void cpu_unregister_map_client(void *cookie);

-bool cpu_physical_memory_is_io(target_phys_addr_t phys_addr);
+bool cpu_physical_memory_is_io(hwaddr phys_addr);

 /* Coalesced MMIO regions are areas where write operations can be reordered.
 * This usually implies that write operations are side-effect free.  This allows
@@ -78,33 +73,33 @@ bool cpu_physical_memory_is_io(target_phys_addr_t phys_addr);
 */
 void qemu_flush_coalesced_mmio_buffer(void);

-uint32_t ldub_phys(target_phys_addr_t addr);
-uint32_t lduw_le_phys(target_phys_addr_t addr);
-uint32_t lduw_be_phys(target_phys_addr_t addr);
-uint32_t ldl_le_phys(target_phys_addr_t addr);
-uint32_t ldl_be_phys(target_phys_addr_t addr);
-uint64_t ldq_le_phys(target_phys_addr_t addr);
-uint64_t ldq_be_phys(target_phys_addr_t addr);
-void stb_phys(target_phys_addr_t addr, uint32_t val);
-void stw_le_phys(target_phys_addr_t addr, uint32_t val);
-void stw_be_phys(target_phys_addr_t addr, uint32_t val);
-void stl_le_phys(target_phys_addr_t addr, uint32_t val);
-void stl_be_phys(target_phys_addr_t addr, uint32_t val);
-void stq_le_phys(target_phys_addr_t addr, uint64_t val);
-void stq_be_phys(target_phys_addr_t addr, uint64_t val);
+uint32_t ldub_phys(hwaddr addr);
+uint32_t lduw_le_phys(hwaddr addr);
+uint32_t lduw_be_phys(hwaddr addr);
+uint32_t ldl_le_phys(hwaddr addr);
+uint32_t ldl_be_phys(hwaddr addr);
+uint64_t ldq_le_phys(hwaddr addr);
+uint64_t ldq_be_phys(hwaddr addr);
+void stb_phys(hwaddr addr, uint32_t val);
+void stw_le_phys(hwaddr addr, uint32_t val);
+void stw_be_phys(hwaddr addr, uint32_t val);
+void stl_le_phys(hwaddr addr, uint32_t val);
+void stl_be_phys(hwaddr addr, uint32_t val);
+void stq_le_phys(hwaddr addr, uint64_t val);
+void stq_be_phys(hwaddr addr, uint64_t val);

 #ifdef NEED_CPU_H
-uint32_t lduw_phys(target_phys_addr_t addr);
-uint32_t ldl_phys(target_phys_addr_t addr);
-uint64_t ldq_phys(target_phys_addr_t addr);
-void stl_phys_notdirty(target_phys_addr_t addr, uint32_t val);
-void stq_phys_notdirty(target_phys_addr_t addr, uint64_t val);
-void stw_phys(target_phys_addr_t addr, uint32_t val);
-void stl_phys(target_phys_addr_t addr, uint32_t val);
-void stq_phys(target_phys_addr_t addr, uint64_t val);
+uint32_t lduw_phys(hwaddr addr);
+uint32_t ldl_phys(hwaddr addr);
+uint64_t ldq_phys(hwaddr addr);
+void stl_phys_notdirty(hwaddr addr, uint32_t val);
+void stq_phys_notdirty(hwaddr addr, uint64_t val);
+void stw_phys(hwaddr addr, uint32_t val);
+void stl_phys(hwaddr addr, uint32_t val);
+void stq_phys(hwaddr addr, uint64_t val);
 #endif

-void cpu_physical_memory_write_rom(target_phys_addr_t addr,
+void cpu_physical_memory_write_rom(hwaddr addr,
                                   const uint8_t *buf, int len);

 extern struct MemoryRegion io_mem_ram;
--- a/cpu-defs.h
+++ b/cpu-defs.h
@@ -29,7 +29,7 @@
 #include <signal.h>
 #include "osdep.h"
 #include "qemu-queue.h"
-#include "targphys.h"
+#include "hwaddr.h"

 #ifndef TARGET_LONG_BITS
 #error TARGET_LONG_BITS must be defined before including this header
@@ -111,7 +111,7 @@ extern int CPUTLBEntry_wrong_size[sizeof(CPUTLBEntry) == (1 << CPU_TLB_ENTRY_BIT
 #define CPU_COMMON_TLB \
    /* The meaning of the MMU modes is defined in the target code. */   \
    CPUTLBEntry tlb_table[NB_MMU_MODES][CPU_TLB_SIZE];                  \
-    target_phys_addr_t iotlb[NB_MMU_MODES][CPU_TLB_SIZE];               \
+    hwaddr iotlb[NB_MMU_MODES][CPU_TLB_SIZE];               \
    target_ulong tlb_flush_addr;                                        \
    target_ulong tlb_flush_mask;

@@ -201,15 +201,9 @@ typedef struct CPUWatchpoint {
    int nr_cores;  /* number of cores within this CPU package */        \
    int nr_threads;/* number of threads within this CPU */              \
    int running; /* Nonzero if cpu is currently running(usermode).  */  \
-    int thread_id;                                                      \
    /* user data */                                                     \
    void *opaque;                                                       \
                                                                        \
-    uint32_t created;                                                   \
-    uint32_t stop;   /* Stop request */                                 \
-    uint32_t stopped; /* Artificially stopped */                        \
-    struct QemuCond *halt_cond;                                         \
-    struct qemu_work_item *queued_work_first, *queued_work_last;        \
    const char *cpu_model_str;                                          \
    struct KVMState *kvm_state;                                         \
    struct kvm_run *kvm_run;                                            \
--- a/cpu-exec.c
+++ b/cpu-exec.c
@@ -27,9 +27,9 @@ int tb_invalidated_flag;

 //#define CONFIG_DEBUG_EXEC

-bool qemu_cpu_has_work(CPUArchState *env)
+bool qemu_cpu_has_work(CPUState *cpu)
 {
-    return cpu_has_work(env);
+    return cpu_has_work(cpu);
 }

 void cpu_loop_exit(CPUArchState *env)
@@ -181,16 +181,14 @@ volatile sig_atomic_t exit_request;

 int cpu_exec(CPUArchState *env)
 {
-#ifdef TARGET_PPC
    CPUState *cpu = ENV_GET_CPU(env);
-#endif
    int ret, interrupt_request;
    TranslationBlock *tb;
    uint8_t *tc_ptr;
    tcg_target_ulong next_tb;

    if (env->halted) {
-        if (!cpu_has_work(env)) {
+        if (!cpu_has_work(cpu)) {
            return EXCP_HALTED;
        }

@@ -552,7 +550,7 @@ int cpu_exec(CPUArchState *env)
 #if defined(TARGET_I386)
                    env->eflags = env->eflags | cpu_cc_compute_all(env, CC_OP)
                        | (DF & DF_MASK);
-                    log_cpu_state(env, X86_DUMP_CCOP);
+                    log_cpu_state(env, CPU_DUMP_CCOP);
                    env->eflags &= ~(DF_MASK | CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C);
 #elif defined(TARGET_M68K)
                    cpu_m68k_flush_flags(env, env->cc_op);
--- a/cpus.c
+++ b/cpus.c
@@ -64,13 +64,15 @@ static CPUArchState *next_cpu;

 static bool cpu_thread_is_idle(CPUArchState *env)
 {
-    if (env->stop || env->queued_work_first) {
+    CPUState *cpu = ENV_GET_CPU(env);
+
+    if (cpu->stop || cpu->queued_work_first) {
        return false;
    }
-    if (env->stopped || !runstate_is_running()) {
+    if (cpu->stopped || !runstate_is_running()) {
        return true;
    }
-    if (!env->halted || qemu_cpu_has_work(env) ||
+    if (!env->halted || qemu_cpu_has_work(cpu) ||
        kvm_async_interrupts_enabled()) {
        return false;
    }
@@ -395,11 +397,7 @@ void hw_error(const char *fmt, ...)
    fprintf(stderr, "\n");
    for(env = first_cpu; env != NULL; env = env->next_cpu) {
        fprintf(stderr, "CPU #%d:\n", env->cpu_index);
-#ifdef TARGET_I386
-        cpu_dump_state(env, stderr, fprintf, X86_DUMP_FPU);
-#else
-        cpu_dump_state(env, stderr, fprintf, 0);
-#endif
+        cpu_dump_state(env, stderr, fprintf, CPU_DUMP_FPU);
    }
    va_end(ap);
    abort();
@@ -432,9 +430,9 @@ void cpu_synchronize_all_post_init(void)
    }
 }

-int cpu_is_stopped(CPUArchState *env)
+bool cpu_is_stopped(CPUState *cpu)
 {
-    return !runstate_is_running() || env->stopped;
+    return !runstate_is_running() || cpu->stopped;
 }

 static void do_vm_stop(RunState state)
@@ -450,22 +448,24 @@ static void do_vm_stop(RunState state)
    }
 }

-static int cpu_can_run(CPUArchState *env)
+static bool cpu_can_run(CPUState *cpu)
 {
-    if (env->stop) {
-        return 0;
+    if (cpu->stop) {
+        return false;
    }
-    if (env->stopped || !runstate_is_running()) {
-        return 0;
+    if (cpu->stopped || !runstate_is_running()) {
+        return false;
    }
-    return 1;
+    return true;
 }

 static void cpu_handle_guest_debug(CPUArchState *env)
 {
+    CPUState *cpu = ENV_GET_CPU(env);
+
    gdb_set_stop_cpu(env);
    qemu_system_debug_request();
-    env->stopped = 1;
+    cpu->stopped = true;
 }

 static void cpu_signal(int sig)
@@ -613,7 +613,7 @@ static void qemu_tcg_init_cpu_signals(void)
 }
 #endif /* _WIN32 */

-QemuMutex qemu_global_mutex;
+static QemuMutex qemu_global_mutex;
 static QemuCond qemu_io_proceeded_cond;
 static bool iothread_requesting_mutex;

@@ -640,27 +640,27 @@ void qemu_init_cpu_loop(void)
    qemu_thread_get_self(&io_thread);
 }

-void run_on_cpu(CPUArchState *env, void (*func)(void *data), void *data)
+void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
 {
    struct qemu_work_item wi;

-    if (qemu_cpu_is_self(env)) {
+    if (qemu_cpu_is_self(cpu)) {
        func(data);
        return;
    }

    wi.func = func;
    wi.data = data;
-    if (!env->queued_work_first) {
-        env->queued_work_first = &wi;
+    if (cpu->queued_work_first == NULL) {
+        cpu->queued_work_first = &wi;
    } else {
-        env->queued_work_last->next = &wi;
+        cpu->queued_work_last->next = &wi;
    }
-    env->queued_work_last = &wi;
+    cpu->queued_work_last = &wi;
    wi.next = NULL;
    wi.done = false;

-    qemu_cpu_kick(env);
+    qemu_cpu_kick(cpu);
    while (!wi.done) {
        CPUArchState *self_env = cpu_single_env;

@@ -669,33 +669,31 @@ void run_on_cpu(CPUArchState *env, void (*func)(void *data), void *data)
    }
 }

-static void flush_queued_work(CPUArchState *env)
+static void flush_queued_work(CPUState *cpu)
 {
    struct qemu_work_item *wi;

-    if (!env->queued_work_first) {
+    if (cpu->queued_work_first == NULL) {
        return;
    }

-    while ((wi = env->queued_work_first)) {
-        env->queued_work_first = wi->next;
+    while ((wi = cpu->queued_work_first)) {
+        cpu->queued_work_first = wi->next;
        wi->func(wi->data);
        wi->done = true;
    }
-    env->queued_work_last = NULL;
+    cpu->queued_work_last = NULL;
    qemu_cond_broadcast(&qemu_work_cond);
 }

-static void qemu_wait_io_event_common(CPUArchState *env)
+static void qemu_wait_io_event_common(CPUState *cpu)
 {
-    CPUState *cpu = ENV_GET_CPU(env);
-
-    if (env->stop) {
-        env->stop = 0;
-        env->stopped = 1;
+    if (cpu->stop) {
+        cpu->stop = false;
+        cpu->stopped = true;
        qemu_cond_signal(&qemu_pause_cond);
    }
-    flush_queued_work(env);
+    flush_queued_work(cpu);
    cpu->thread_kicked = false;
 }

@@ -715,18 +713,20 @@ static void qemu_tcg_wait_io_event(void)
    }

    for (env = first_cpu; env != NULL; env = env->next_cpu) {
-        qemu_wait_io_event_common(env);
+        qemu_wait_io_event_common(ENV_GET_CPU(env));
    }
 }

 static void qemu_kvm_wait_io_event(CPUArchState *env)
 {
+    CPUState *cpu = ENV_GET_CPU(env);
+
    while (cpu_thread_is_idle(env)) {
-        qemu_cond_wait(env->halt_cond, &qemu_global_mutex);
+        qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
    }

    qemu_kvm_eat_signals(env);
-    qemu_wait_io_event_common(env);
+    qemu_wait_io_event_common(cpu);
 }

 static void *qemu_kvm_cpu_thread_fn(void *arg)
@@ -737,7 +737,7 @@ static void *qemu_kvm_cpu_thread_fn(void *arg)

    qemu_mutex_lock(&qemu_global_mutex);
    qemu_thread_get_self(cpu->thread);
-    env->thread_id = qemu_get_thread_id();
+    cpu->thread_id = qemu_get_thread_id();
    cpu_single_env = env;

    r = kvm_init_vcpu(env);
@@ -749,11 +749,11 @@ static void *qemu_kvm_cpu_thread_fn(void *arg)
    qemu_kvm_init_cpu_signals(env);

    /* signal CPU creation */
-    env->created = 1;
+    cpu->created = true;
    qemu_cond_signal(&qemu_cpu_cond);

    while (1) {
-        if (cpu_can_run(env)) {
+        if (cpu_can_run(cpu)) {
            r = kvm_cpu_exec(env);
            if (r == EXCP_DEBUG) {
                cpu_handle_guest_debug(env);
@@ -778,13 +778,13 @@ static void *qemu_dummy_cpu_thread_fn(void *arg)

    qemu_mutex_lock_iothread();
    qemu_thread_get_self(cpu->thread);
-    env->thread_id = qemu_get_thread_id();
+    cpu->thread_id = qemu_get_thread_id();

    sigemptyset(&waitset);
    sigaddset(&waitset, SIG_IPI);

    /* signal CPU creation */
-    env->created = 1;
+    cpu->created = true;
    qemu_cond_signal(&qemu_cpu_cond);

    cpu_single_env = env;
@@ -801,7 +801,7 @@ static void *qemu_dummy_cpu_thread_fn(void *arg)
        }
        qemu_mutex_lock_iothread();
        cpu_single_env = env;
-        qemu_wait_io_event_common(env);
+        qemu_wait_io_event_common(cpu);
    }

    return NULL;
@@ -812,8 +812,8 @@ static void tcg_exec_all(void);

 static void *qemu_tcg_cpu_thread_fn(void *arg)
 {
-    CPUArchState *env = arg;
-    CPUState *cpu = ENV_GET_CPU(env);
+    CPUState *cpu = arg;
+    CPUArchState *env;

    qemu_tcg_init_cpu_signals();
    qemu_thread_get_self(cpu->thread);
@@ -821,18 +821,19 @@ static void *qemu_tcg_cpu_thread_fn(void *arg)
    /* signal CPU creation */
    qemu_mutex_lock(&qemu_global_mutex);
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
-        env->thread_id = qemu_get_thread_id();
-        env->created = 1;
+        cpu = ENV_GET_CPU(env);
+        cpu->thread_id = qemu_get_thread_id();
+        cpu->created = true;
    }
    qemu_cond_signal(&qemu_cpu_cond);

    /* wait for initial kick-off after machine start */
-    while (first_cpu->stopped) {
+    while (ENV_GET_CPU(first_cpu)->stopped) {
        qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);

        /* process any pending work */
        for (env = first_cpu; env != NULL; env = env->next_cpu) {
-            qemu_wait_io_event_common(env);
+            qemu_wait_io_event_common(ENV_GET_CPU(env));
        }
    }

@@ -847,9 +848,8 @@ static void *qemu_tcg_cpu_thread_fn(void *arg)
    return NULL;
 }

-static void qemu_cpu_kick_thread(CPUArchState *env)
+static void qemu_cpu_kick_thread(CPUState *cpu)
 {
-    CPUState *cpu = ENV_GET_CPU(env);
 #ifndef _WIN32
    int err;

@@ -859,7 +859,7 @@ static void qemu_cpu_kick_thread(CPUArchState *env)
        exit(1);
    }
 #else /* _WIN32 */
-    if (!qemu_cpu_is_self(env)) {
+    if (!qemu_cpu_is_self(cpu)) {
        SuspendThread(cpu->hThread);
        cpu_signal(0);
        ResumeThread(cpu->hThread);
@@ -867,14 +867,11 @@ static void qemu_cpu_kick_thread(CPUArchState *env)
 #endif
 }

-void qemu_cpu_kick(void *_env)
+void qemu_cpu_kick(CPUState *cpu)
 {
-    CPUArchState *env = _env;
-    CPUState *cpu = ENV_GET_CPU(env);
-
-    qemu_cond_broadcast(env->halt_cond);
+    qemu_cond_broadcast(cpu->halt_cond);
    if (!tcg_enabled() && !cpu->thread_kicked) {
-        qemu_cpu_kick_thread(env);
+        qemu_cpu_kick_thread(cpu);
        cpu->thread_kicked = true;
    }
 }
@@ -886,7 +883,7 @@ void qemu_cpu_kick_self(void)
    CPUState *cpu_single_cpu = ENV_GET_CPU(cpu_single_env);

    if (!cpu_single_cpu->thread_kicked) {
-        qemu_cpu_kick_thread(cpu_single_env);
+        qemu_cpu_kick_thread(cpu_single_cpu);
        cpu_single_cpu->thread_kicked = true;
    }
 #else
@@ -894,14 +891,16 @@ void qemu_cpu_kick_self(void)
 #endif
 }

-int qemu_cpu_is_self(void *_env)
+bool qemu_cpu_is_self(CPUState *cpu)
 {
-    CPUArchState *env = _env;
-    CPUState *cpu = ENV_GET_CPU(env);
-
    return qemu_thread_is_self(cpu->thread);
 }

+static bool qemu_in_vcpu_thread(void)
+{
+    return cpu_single_env && qemu_cpu_is_self(ENV_GET_CPU(cpu_single_env));
+}
+
 void qemu_mutex_lock_iothread(void)
 {
    if (!tcg_enabled()) {
@@ -909,7 +908,7 @@ void qemu_mutex_lock_iothread(void)
    } else {
        iothread_requesting_mutex = true;
        if (qemu_mutex_trylock(&qemu_global_mutex)) {
-            qemu_cpu_kick_thread(first_cpu);
+            qemu_cpu_kick_thread(ENV_GET_CPU(first_cpu));
            qemu_mutex_lock(&qemu_global_mutex);
        }
        iothread_requesting_mutex = false;
@@ -927,7 +926,8 @@ static int all_vcpus_paused(void)
    CPUArchState *penv = first_cpu;

    while (penv) {
-        if (!penv->stopped) {
+        CPUState *pcpu = ENV_GET_CPU(penv);
+        if (!pcpu->stopped) {
            return 0;
        }
        penv = penv->next_cpu;
@@ -942,17 +942,19 @@ void pause_all_vcpus(void)

    qemu_clock_enable(vm_clock, false);
    while (penv) {
-        penv->stop = 1;
-        qemu_cpu_kick(penv);
+        CPUState *pcpu = ENV_GET_CPU(penv);
+        pcpu->stop = true;
+        qemu_cpu_kick(pcpu);
        penv = penv->next_cpu;
    }

-    if (!qemu_thread_is_self(&io_thread)) {
+    if (qemu_in_vcpu_thread()) {
        cpu_stop_current();
        if (!kvm_enabled()) {
            while (penv) {
-                penv->stop = 0;
-                penv->stopped = 1;
+                CPUState *pcpu = ENV_GET_CPU(penv);
+                pcpu->stop = 0;
+                pcpu->stopped = true;
                penv = penv->next_cpu;
            }
            return;
@@ -963,7 +965,7 @@ void pause_all_vcpus(void)
        qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
        penv = first_cpu;
        while (penv) {
-            qemu_cpu_kick(penv);
+            qemu_cpu_kick(ENV_GET_CPU(penv));
            penv = penv->next_cpu;
        }
    }
@@ -975,36 +977,34 @@ void resume_all_vcpus(void)

    qemu_clock_enable(vm_clock, true);
    while (penv) {
-        penv->stop = 0;
-        penv->stopped = 0;
-        qemu_cpu_kick(penv);
+        CPUState *pcpu = ENV_GET_CPU(penv);
+        pcpu->stop = false;
+        pcpu->stopped = false;
+        qemu_cpu_kick(pcpu);
        penv = penv->next_cpu;
    }
 }

-static void qemu_tcg_init_vcpu(void *_env)
+static void qemu_tcg_init_vcpu(CPUState *cpu)
 {
-    CPUArchState *env = _env;
-    CPUState *cpu = ENV_GET_CPU(env);
-
    /* share a single thread for all cpus with TCG */
    if (!tcg_cpu_thread) {
        cpu->thread = g_malloc0(sizeof(QemuThread));
-        env->halt_cond = g_malloc0(sizeof(QemuCond));
-        qemu_cond_init(env->halt_cond);
-        tcg_halt_cond = env->halt_cond;
-        qemu_thread_create(cpu->thread, qemu_tcg_cpu_thread_fn, env,
+        cpu->halt_cond = g_malloc0(sizeof(QemuCond));
+        qemu_cond_init(cpu->halt_cond);
+        tcg_halt_cond = cpu->halt_cond;
+        qemu_thread_create(cpu->thread, qemu_tcg_cpu_thread_fn, cpu,
                           QEMU_THREAD_JOINABLE);
 #ifdef _WIN32
        cpu->hThread = qemu_thread_get_handle(cpu->thread);
 #endif
-        while (env->created == 0) {
+        while (!cpu->created) {
            qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
        }
        tcg_cpu_thread = cpu->thread;
    } else {
        cpu->thread = tcg_cpu_thread;
-        env->halt_cond = tcg_halt_cond;
+        cpu->halt_cond = tcg_halt_cond;
    }
 }

@@ -1013,11 +1013,11 @@ static void qemu_kvm_start_vcpu(CPUArchState *env)
    CPUState *cpu = ENV_GET_CPU(env);

    cpu->thread = g_malloc0(sizeof(QemuThread));
-    env->halt_cond = g_malloc0(sizeof(QemuCond));
-    qemu_cond_init(env->halt_cond);
+    cpu->halt_cond = g_malloc0(sizeof(QemuCond));
+    qemu_cond_init(cpu->halt_cond);
    qemu_thread_create(cpu->thread, qemu_kvm_cpu_thread_fn, env,
                       QEMU_THREAD_JOINABLE);
-    while (env->created == 0) {
+    while (!cpu->created) {
        qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
    }
 }
@@ -1027,11 +1027,11 @@ static void qemu_dummy_start_vcpu(CPUArchState *env)
    CPUState *cpu = ENV_GET_CPU(env);

    cpu->thread = g_malloc0(sizeof(QemuThread));
-    env->halt_cond = g_malloc0(sizeof(QemuCond));
-    qemu_cond_init(env->halt_cond);
+    cpu->halt_cond = g_malloc0(sizeof(QemuCond));
+    qemu_cond_init(cpu->halt_cond);
    qemu_thread_create(cpu->thread, qemu_dummy_cpu_thread_fn, env,
                       QEMU_THREAD_JOINABLE);
-    while (env->created == 0) {
+    while (!cpu->created) {
        qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
    }
 }
@@ -1039,14 +1039,15 @@ static void qemu_dummy_start_vcpu(CPUArchState *env)
 void qemu_init_vcpu(void *_env)
 {
    CPUArchState *env = _env;
+    CPUState *cpu = ENV_GET_CPU(env);

    env->nr_cores = smp_cores;
    env->nr_threads = smp_threads;
-    env->stopped = 1;
+    cpu->stopped = true;
    if (kvm_enabled()) {
        qemu_kvm_start_vcpu(env);
    } else if (tcg_enabled()) {
-        qemu_tcg_init_vcpu(env);
+        qemu_tcg_init_vcpu(cpu);
    } else {
        qemu_dummy_start_vcpu(env);
    }
@@ -1055,8 +1056,9 @@ void qemu_init_vcpu(void *_env)
 void cpu_stop_current(void)
 {
    if (cpu_single_env) {
-        cpu_single_env->stop = 0;
-        cpu_single_env->stopped = 1;
+        CPUState *cpu_single_cpu = ENV_GET_CPU(cpu_single_env);
+        cpu_single_cpu->stop = false;
+        cpu_single_cpu->stopped = true;
        cpu_exit(cpu_single_env);
        qemu_cond_signal(&qemu_pause_cond);
    }
@@ -1064,7 +1066,7 @@ void cpu_stop_current(void)

 void vm_stop(RunState state)
 {
-    if (!qemu_thread_is_self(&io_thread)) {
+    if (qemu_in_vcpu_thread()) {
        qemu_system_vmstop_request(state);
        /*
         * FIXME: should not return to device code in case
@@ -1137,17 +1139,18 @@ static void tcg_exec_all(void)
    }
    for (; next_cpu != NULL && !exit_request; next_cpu = next_cpu->next_cpu) {
        CPUArchState *env = next_cpu;
+        CPUState *cpu = ENV_GET_CPU(env);

        qemu_clock_enable(vm_clock,
                          (env->singlestep_enabled & SSTEP_NOTIMER) == 0);

-        if (cpu_can_run(env)) {
+        if (cpu_can_run(cpu)) {
            r = tcg_cpu_exec(env);
            if (r == EXCP_DEBUG) {
                cpu_handle_guest_debug(env);
                break;
            }
-        } else if (env->stop || env->stopped) {
+        } else if (cpu->stop || cpu->stopped) {
            break;
        }
    }
@@ -1192,10 +1195,8 @@ void set_cpu_log_filename(const char *optarg)
 void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
 {
    /* XXX: implement xxx_cpu_list for targets that still miss it */
-#if defined(cpu_list_id)
-    cpu_list_id(f, cpu_fprintf, optarg);
-#elif defined(cpu_list)
-    cpu_list(f, cpu_fprintf); /* deprecated */
+#if defined(cpu_list)
+    cpu_list(f, cpu_fprintf);
 #endif
 }

@@ -1204,7 +1205,8 @@ CpuInfoList *qmp_query_cpus(Error **errp)
    CpuInfoList *head = NULL, *cur_item = NULL;
    CPUArchState *env;

-    for(env = first_cpu; env != NULL; env = env->next_cpu) {
+    for (env = first_cpu; env != NULL; env = env->next_cpu) {
+        CPUState *cpu = ENV_GET_CPU(env);
        CpuInfoList *info;

        cpu_synchronize_state(env);
@@ -1214,7 +1216,7 @@ CpuInfoList *qmp_query_cpus(Error **errp)
        info->value->CPU = env->cpu_index;
        info->value->current = (env == first_cpu);
        info->value->halted = env->halted;
-        info->value->thread_id = env->thread_id;
+        info->value->thread_id = cpu->thread_id;
 #if defined(TARGET_I386)
        info->value->has_pc = true;
        info->value->pc = env->eip + env->segs[R_CS].base;
--- a/cputlb.c
+++ b/cputlb.c
@@ -21,11 +21,11 @@
 #include "cpu.h"
 #include "exec-all.h"
 #include "memory.h"
+#include "exec-memory.h"

 #include "cputlb.h"

-#define WANT_EXEC_OBSOLETE
-#include "exec-obsolete.h"
+#include "memory-internal.h"

 //#define DEBUG_TLB
 //#define DEBUG_TLB_CHECK
@@ -237,7 +237,7 @@ static void tlb_add_large_page(CPUArchState *env, target_ulong vaddr,
   is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the
   supplied size is only used by tlb_flush_page.  */
 void tlb_set_page(CPUArchState *env, target_ulong vaddr,
-                  target_phys_addr_t paddr, int prot,
+                  hwaddr paddr, int prot,
                  int mmu_idx, target_ulong size)
 {
    MemoryRegionSection *section;
@@ -246,13 +246,13 @@ void tlb_set_page(CPUArchState *env, target_ulong vaddr,
    target_ulong code_address;
    uintptr_t addend;
    CPUTLBEntry *te;
-    target_phys_addr_t iotlb;
+    hwaddr iotlb;

    assert(size >= TARGET_PAGE_SIZE);
    if (size != TARGET_PAGE_SIZE) {
        tlb_add_large_page(env, vaddr, size);
    }
-    section = phys_page_find(paddr >> TARGET_PAGE_BITS);
+    section = phys_page_find(address_space_memory.dispatch, paddr >> TARGET_PAGE_BITS);
 #if defined(DEBUG_TLB)
    printf("tlb_set_page: vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx
           " prot=%x idx=%d pd=0x%08lx\n",
@@ -325,11 +325,7 @@ tb_page_addr_t get_page_addr_code(CPUArchState *env1, target_ulong addr)
    mmu_idx = cpu_mmu_index(env1);
    if (unlikely(env1->tlb_table[mmu_idx][page_index].addr_code !=
                 (addr & TARGET_PAGE_MASK))) {
-#ifdef CONFIG_TCG_PASS_AREG0
        cpu_ldub_code(env1, addr);
-#else
-        ldub_code(addr);
-#endif
    }
    pd = env1->iotlb[mmu_idx][page_index] & ~TARGET_PAGE_MASK;
    mr = iotlb_to_region(pd);
@@ -348,7 +344,6 @@ tb_page_addr_t get_page_addr_code(CPUArchState *env1, target_ulong addr)
 #define MMUSUFFIX _cmmu
 #undef GETPC
 #define GETPC() ((uintptr_t)0)
-#define env cpu_single_env
 #define SOFTMMU_CODE_ACCESS

 #define SHIFT 0
--- a/cputlb.h
+++ b/cputlb.h
@@ -26,17 +26,18 @@ void tlb_unprotect_code_phys(CPUArchState *env, ram_addr_t ram_addr,
                             target_ulong vaddr);
 void tlb_reset_dirty_range(CPUTLBEntry *tlb_entry, uintptr_t start,
                           uintptr_t length);
-MemoryRegionSection *phys_page_find(target_phys_addr_t index);
+MemoryRegionSection *phys_page_find(struct AddressSpaceDispatch *d,
+                                    hwaddr index);
 void cpu_tlb_reset_dirty_all(ram_addr_t start1, ram_addr_t length);
 void tlb_set_dirty(CPUArchState *env, target_ulong vaddr);
 extern int tlb_flush_count;

 /* exec.c */
 void tb_flush_jmp_cache(CPUArchState *env, target_ulong addr);
-target_phys_addr_t memory_region_section_get_iotlb(CPUArchState *env,
+hwaddr memory_region_section_get_iotlb(CPUArchState *env,
                                                   MemoryRegionSection *section,
                                                   target_ulong vaddr,
-                                                   target_phys_addr_t paddr,
+                                                   hwaddr paddr,
                                                   int prot,
                                                   target_ulong *address);
 bool memory_region_is_unassigned(MemoryRegion *mr);
--- a/cutils.c
+++ b/cutils.c
@@ -115,7 +115,7 @@ time_t mktimegm(struct tm *tm)
        m += 12;
        y--;
    }
-    t = 86400 * (d + (153 * m - 457) / 5 + 365 * y + y / 4 - y / 100 + 
+    t = 86400ULL * (d + (153 * m - 457) / 5 + 365 * y + y / 4 - y / 100 + 
                 y / 400 - 719469);
    t += 3600 * tm->tm_hour + 60 * tm->tm_min + tm->tm_sec;
    return t;
@@ -142,109 +142,6 @@ int qemu_fdatasync(int fd)
 #endif
 }

-/* io vectors */
-
-void qemu_iovec_init(QEMUIOVector *qiov, int alloc_hint)
-{
-    qiov->iov = g_malloc(alloc_hint * sizeof(struct iovec));
-    qiov->niov = 0;
-    qiov->nalloc = alloc_hint;
-    qiov->size = 0;
-}
-
-void qemu_iovec_init_external(QEMUIOVector *qiov, struct iovec *iov, int niov)
-{
-    int i;
-
-    qiov->iov = iov;
-    qiov->niov = niov;
-    qiov->nalloc = -1;
-    qiov->size = 0;
-    for (i = 0; i < niov; i++)
-        qiov->size += iov[i].iov_len;
-}
-
-void qemu_iovec_add(QEMUIOVector *qiov, void *base, size_t len)
-{
-    assert(qiov->nalloc != -1);
-
-    if (qiov->niov == qiov->nalloc) {
-        qiov->nalloc = 2 * qiov->nalloc + 1;
-        qiov->iov = g_realloc(qiov->iov, qiov->nalloc * sizeof(struct iovec));
-    }
-    qiov->iov[qiov->niov].iov_base = base;
-    qiov->iov[qiov->niov].iov_len = len;
-    qiov->size += len;
-    ++qiov->niov;
-}
-
-/*
- * Concatenates (partial) iovecs from src to the end of dst.
- * It starts copying after skipping `soffset' bytes at the
- * beginning of src and adds individual vectors from src to
- * dst copies up to `sbytes' bytes total, or up to the end
- * of src if it comes first.  This way, it is okay to specify
- * very large value for `sbytes' to indicate "up to the end
- * of src".
- * Only vector pointers are processed, not the actual data buffers.
- */
-void qemu_iovec_concat(QEMUIOVector *dst,
-                       QEMUIOVector *src, size_t soffset, size_t sbytes)
-{
-    int i;
-    size_t done;
-    struct iovec *siov = src->iov;
-    assert(dst->nalloc != -1);
-    assert(src->size >= soffset);
-    for (i = 0, done = 0; done < sbytes && i < src->niov; i++) {
-        if (soffset < siov[i].iov_len) {
-            size_t len = MIN(siov[i].iov_len - soffset, sbytes - done);
-            qemu_iovec_add(dst, siov[i].iov_base + soffset, len);
-            done += len;
-            soffset = 0;
-        } else {
-            soffset -= siov[i].iov_len;
-        }
-    }
-    /* return done; */
-}
-
-void qemu_iovec_destroy(QEMUIOVector *qiov)
-{
-    assert(qiov->nalloc != -1);
-
-    qemu_iovec_reset(qiov);
-    g_free(qiov->iov);
-    qiov->nalloc = 0;
-    qiov->iov = NULL;
-}
-
-void qemu_iovec_reset(QEMUIOVector *qiov)
-{
-    assert(qiov->nalloc != -1);
-
-    qiov->niov = 0;
-    qiov->size = 0;
-}
-
-size_t qemu_iovec_to_buf(QEMUIOVector *qiov, size_t offset,
-                         void *buf, size_t bytes)
-{
-    return iov_to_buf(qiov->iov, qiov->niov, offset, buf, bytes);
-}
-
-size_t qemu_iovec_from_buf(QEMUIOVector *qiov, size_t offset,
-                           const void *buf, size_t bytes)
-{
-    return iov_from_buf(qiov->iov, qiov->niov, offset, buf, bytes);
-}
-
-size_t qemu_iovec_memset(QEMUIOVector *qiov, size_t offset,
-                         int fillc, size_t bytes)
-{
-    return iov_memset(qiov->iov, qiov->niov, offset, fillc, bytes);
-}
-
 /*
 * Checks if a buffer is all zeroes
 *
@@ -383,11 +280,6 @@ int qemu_parse_fd(const char *param)
    return fd;
 }

-int qemu_parse_fdset(const char *param)
-{
-    return qemu_parse_fd(param);
-}
-
 /* round down to the nearest power of 2*/
 int64_t pow2floor(int64_t value)
 {
--- a/def-helper.h
+++ b/def-helper.h
@@ -128,6 +128,8 @@
 #define DEF_HELPER_5(name, ret, t1, t2, t3, t4, t5) \
    DEF_HELPER_FLAGS_5(name, 0, ret, t1, t2, t3, t4, t5)

+/* MAX_OPC_PARAM_IARGS must be set to n if last entry is DEF_HELPER_FLAGS_n. */
+
 #endif /* DEF_HELPER_H */

 #ifndef GEN_HELPER
--- a/default-configs/arm-softmmu.mak
+++ b/default-configs/arm-softmmu.mak
@@ -22,6 +22,7 @@ CONFIG_ADS7846=y
 CONFIG_MAX111X=y
 CONFIG_SSI=y
 CONFIG_SSI_SD=y
+CONFIG_SSI_M25P80=y
 CONFIG_LAN9118=y
 CONFIG_SMC91C111=y
 CONFIG_DS1338=y
--- a/default-configs/microblaze-softmmu.mak
+++ b/default-configs/microblaze-softmmu.mak
@@ -5,3 +5,5 @@ CONFIG_PFLASH_CFI01=y
 CONFIG_SERIAL=y
 CONFIG_XILINX=y
 CONFIG_XILINX_AXI=y
+CONFIG_SSI=y
+CONFIG_SSI_M25P80=y
--- a/default-configs/microblazeel-softmmu.mak
+++ b/default-configs/microblazeel-softmmu.mak
@@ -5,3 +5,5 @@ CONFIG_PFLASH_CFI01=y
 CONFIG_SERIAL=y
 CONFIG_XILINX=y
 CONFIG_XILINX_AXI=y
+CONFIG_SSI=y
+CONFIG_SSI_M25P80=y
--- a/default-configs/pci.mak
+++ b/default-configs/pci.mak
@@ -19,3 +19,5 @@ CONFIG_IDE_PCI=y
 CONFIG_AHCI=y
 CONFIG_ESP=y
 CONFIG_ESP_PCI=y
+CONFIG_SERIAL=y
+CONFIG_SERIAL_PCI=y
--- a/default-configs/sparc64-softmmu.mak
+++ b/default-configs/sparc64-softmmu.mak
@@ -6,7 +6,6 @@ CONFIG_M48T59=y
 CONFIG_PTIMER=y
 CONFIG_VGA=y
 CONFIG_VGA_PCI=y
-CONFIG_VGA_CIRRUS=y
 CONFIG_SERIAL=y
 CONFIG_PARALLEL=y
 CONFIG_PCKBD=y
--- a/device_tree.c
+++ b/device_tree.c
@@ -304,3 +304,18 @@ int qemu_devtree_add_subnode(void *fdt, const char *name)
    g_free(dupname);
    return retval;
 }
+
+void qemu_devtree_dumpdtb(void *fdt, int size)
+{
+    QemuOpts *machine_opts;
+
+    machine_opts = qemu_opts_find(qemu_find_opts("machine"), 0);
+    if (machine_opts) {
+        const char *dumpdtb = qemu_opt_get(machine_opts, "dumpdtb");
+        if (dumpdtb) {
+            /* Dump the dtb to a file and quit */
+            exit(g_file_set_contents(dumpdtb, fdt, size, NULL) ? 0 : 1);
+        }
+    }
+
+}
--- a/device_tree.h
+++ b/device_tree.h
@@ -49,4 +49,6 @@ int qemu_devtree_add_subnode(void *fdt, const char *name);
                             sizeof(qdt_tmp));                                \
    } while (0)

+void qemu_devtree_dumpdtb(void *fdt, int size);
+
 #endif /* __DEVICE_TREE_H__ */
--- a/disas.c
+++ b/disas.c
@@ -7,6 +7,11 @@
 #include "cpu.h"
 #include "disas.h"

+typedef struct CPUDebug {
+    struct disassemble_info info;
+    CPUArchState *env;
+} CPUDebug;
+
 /* Filled in by elfload.c.  Simplistic, but will do for now. */
 struct syminfo *syminfos = NULL;

@@ -32,7 +37,9 @@ target_read_memory (bfd_vma memaddr,
                    int length,
                    struct disassemble_info *info)
 {
-    cpu_memory_rw_debug(cpu_single_env, memaddr, myaddr, length, 0);
+    CPUDebug *s = container_of(info, CPUDebug, info);
+
+    cpu_memory_rw_debug(s->env, memaddr, myaddr, length, 0);
    return 0;
 }

@@ -158,32 +165,35 @@ print_insn_thumb1(bfd_vma pc, disassemble_info *info)
    ppc  - nonzero means little endian
    other targets - unused
 */
-void target_disas(FILE *out, target_ulong code, target_ulong size, int flags)
+void target_disas(FILE *out, CPUArchState *env, target_ulong code,
+                  target_ulong size, int flags)
 {
    target_ulong pc;
    int count;
-    struct disassemble_info disasm_info;
+    CPUDebug s;
    int (*print_insn)(bfd_vma pc, disassemble_info *info);

-    INIT_DISASSEMBLE_INFO(disasm_info, out, fprintf);
+    INIT_DISASSEMBLE_INFO(s.info, out, fprintf);

-    disasm_info.read_memory_func = target_read_memory;
-    disasm_info.buffer_vma = code;
-    disasm_info.buffer_length = size;
-    disasm_info.print_address_func = generic_print_target_address;
+    s.env = env;
+    s.info.read_memory_func = target_read_memory;
+    s.info.buffer_vma = code;
+    s.info.buffer_length = size;
+    s.info.print_address_func = generic_print_target_address;

 #ifdef TARGET_WORDS_BIGENDIAN
-    disasm_info.endian = BFD_ENDIAN_BIG;
+    s.info.endian = BFD_ENDIAN_BIG;
 #else
-    disasm_info.endian = BFD_ENDIAN_LITTLE;
+    s.info.endian = BFD_ENDIAN_LITTLE;
 #endif
 #if defined(TARGET_I386)
-    if (flags == 2)
-        disasm_info.mach = bfd_mach_x86_64;
-    else if (flags == 1)
-        disasm_info.mach = bfd_mach_i386_i8086;
-    else
-        disasm_info.mach = bfd_mach_i386_i386;
+    if (flags == 2) {
+        s.info.mach = bfd_mach_x86_64;
+    } else if (flags == 1) {
+        s.info.mach = bfd_mach_i386_i8086;
+    } else {
+        s.info.mach = bfd_mach_i386_i386;
+    }
    print_insn = print_insn_i386;
 #elif defined(TARGET_ARM)
    if (flags & 1) {
@@ -193,27 +203,28 @@ void target_disas(FILE *out, target_ulong code, target_ulong size, int flags)
    }
    if (flags & 2) {
 #ifdef TARGET_WORDS_BIGENDIAN
-        disasm_info.endian = BFD_ENDIAN_LITTLE;
+        s.info.endian = BFD_ENDIAN_LITTLE;
 #else
-        disasm_info.endian = BFD_ENDIAN_BIG;
+        s.info.endian = BFD_ENDIAN_BIG;
 #endif
    }
 #elif defined(TARGET_SPARC)
    print_insn = print_insn_sparc;
 #ifdef TARGET_SPARC64
-    disasm_info.mach = bfd_mach_sparc_v9b;
+    s.info.mach = bfd_mach_sparc_v9b;
 #endif
 #elif defined(TARGET_PPC)
-    if (flags >> 16)
-        disasm_info.endian = BFD_ENDIAN_LITTLE;
+    if (flags >> 16) {
+        s.info.endian = BFD_ENDIAN_LITTLE;
+    }
    if (flags & 0xFFFF) {
        /* If we have a precise definitions of the instructions set, use it */
-        disasm_info.mach = flags & 0xFFFF;
+        s.info.mach = flags & 0xFFFF;
    } else {
 #ifdef TARGET_PPC64
-        disasm_info.mach = bfd_mach_ppc64;
+        s.info.mach = bfd_mach_ppc64;
 #else
-        disasm_info.mach = bfd_mach_ppc;
+        s.info.mach = bfd_mach_ppc;
 #endif
    }
    print_insn = print_insn_ppc;
@@ -226,27 +237,27 @@ void target_disas(FILE *out, target_ulong code, target_ulong size, int flags)
    print_insn = print_insn_little_mips;
 #endif
 #elif defined(TARGET_SH4)
-    disasm_info.mach = bfd_mach_sh4;
+    s.info.mach = bfd_mach_sh4;
    print_insn = print_insn_sh;
 #elif defined(TARGET_ALPHA)
-    disasm_info.mach = bfd_mach_alpha_ev6;
+    s.info.mach = bfd_mach_alpha_ev6;
    print_insn = print_insn_alpha;
 #elif defined(TARGET_CRIS)
    if (flags != 32) {
-        disasm_info.mach = bfd_mach_cris_v0_v10;
+        s.info.mach = bfd_mach_cris_v0_v10;
        print_insn = print_insn_crisv10;
    } else {
-        disasm_info.mach = bfd_mach_cris_v32;
+        s.info.mach = bfd_mach_cris_v32;
        print_insn = print_insn_crisv32;
    }
 #elif defined(TARGET_S390X)
-    disasm_info.mach = bfd_mach_s390_64;
+    s.info.mach = bfd_mach_s390_64;
    print_insn = print_insn_s390;
 #elif defined(TARGET_MICROBLAZE)
-    disasm_info.mach = bfd_arch_microblaze;
+    s.info.mach = bfd_arch_microblaze;
    print_insn = print_insn_microblaze;
 #elif defined(TARGET_LM32)
-    disasm_info.mach = bfd_mach_lm32;
+    s.info.mach = bfd_mach_lm32;
    print_insn = print_insn_lm32;
 #else
    fprintf(out, "0x" TARGET_FMT_lx
@@ -256,14 +267,14 @@ void target_disas(FILE *out, target_ulong code, target_ulong size, int flags)

    for (pc = code; size > 0; pc += count, size -= count) {
 	fprintf(out, "0x" TARGET_FMT_lx ":  ", pc);
-	count = print_insn(pc, &disasm_info);
+	count = print_insn(pc, &s.info);
 #if 0
        {
            int i;
            uint8_t b;
            fprintf(out, " {");
            for(i = 0; i < count; i++) {
-                target_read_memory(pc + i, &b, 1, &disasm_info);
+                target_read_memory(pc + i, &b, 1, &s.info);
                fprintf(out, " %02x", b);
            }
            fprintf(out, " }");
@@ -287,28 +298,28 @@ void disas(FILE *out, void *code, unsigned long size)
 {
    uintptr_t pc;
    int count;
-    struct disassemble_info disasm_info;
+    CPUDebug s;
    int (*print_insn)(bfd_vma pc, disassemble_info *info);

-    INIT_DISASSEMBLE_INFO(disasm_info, out, fprintf);
-    disasm_info.print_address_func = generic_print_host_address;
+    INIT_DISASSEMBLE_INFO(s.info, out, fprintf);
+    s.info.print_address_func = generic_print_host_address;

-    disasm_info.buffer = code;
-    disasm_info.buffer_vma = (uintptr_t)code;
-    disasm_info.buffer_length = size;
+    s.info.buffer = code;
+    s.info.buffer_vma = (uintptr_t)code;
+    s.info.buffer_length = size;

 #ifdef HOST_WORDS_BIGENDIAN
-    disasm_info.endian = BFD_ENDIAN_BIG;
+    s.info.endian = BFD_ENDIAN_BIG;
 #else
-    disasm_info.endian = BFD_ENDIAN_LITTLE;
+    s.info.endian = BFD_ENDIAN_LITTLE;
 #endif
 #if defined(CONFIG_TCG_INTERPRETER)
    print_insn = print_insn_tci;
 #elif defined(__i386__)
-    disasm_info.mach = bfd_mach_i386_i386;
+    s.info.mach = bfd_mach_i386_i386;
    print_insn = print_insn_i386;
 #elif defined(__x86_64__)
-    disasm_info.mach = bfd_mach_x86_64;
+    s.info.mach = bfd_mach_x86_64;
    print_insn = print_insn_i386;
 #elif defined(_ARCH_PPC)
    print_insn = print_insn_ppc;
@@ -316,9 +327,7 @@ void disas(FILE *out, void *code, unsigned long size)
    print_insn = print_insn_alpha;
 #elif defined(__sparc__)
    print_insn = print_insn_sparc;
-#if defined(__sparc_v8plus__) || defined(__sparc_v8plusa__) || defined(__sparc_v9__)
-    disasm_info.mach = bfd_mach_sparc_v9b;
-#endif
+    s.info.mach = bfd_mach_sparc_v9b;
 #elif defined(__arm__)
    print_insn = print_insn_arm;
 #elif defined(__MIPSEB__)
@@ -340,7 +349,7 @@ void disas(FILE *out, void *code, unsigned long size)
 #endif
    for (pc = (uintptr_t)code; size > 0; pc += count, size -= count) {
        fprintf(out, "0x%08" PRIxPTR ":  ", pc);
-	count = print_insn(pc, &disasm_info);
+        count = print_insn(pc, &s.info);
 	fprintf(out, "\n");
 	if (count < 0)
 	    break;
@@ -368,16 +377,17 @@ const char *lookup_symbol(target_ulong orig_addr)
 #include "monitor.h"

 static int monitor_disas_is_physical;
-static CPUArchState *monitor_disas_env;

 static int
 monitor_read_memory (bfd_vma memaddr, bfd_byte *myaddr, int length,
                     struct disassemble_info *info)
 {
+    CPUDebug *s = container_of(info, CPUDebug, info);
+
    if (monitor_disas_is_physical) {
        cpu_physical_memory_read(memaddr, myaddr, length);
    } else {
-        cpu_memory_rw_debug(monitor_disas_env, memaddr,myaddr, length, 0);
+        cpu_memory_rw_debug(s->env, memaddr,myaddr, length, 0);
    }
    return 0;
 }
@@ -396,30 +406,31 @@ void monitor_disas(Monitor *mon, CPUArchState *env,
                   target_ulong pc, int nb_insn, int is_physical, int flags)
 {
    int count, i;
-    struct disassemble_info disasm_info;
+    CPUDebug s;
    int (*print_insn)(bfd_vma pc, disassemble_info *info);

-    INIT_DISASSEMBLE_INFO(disasm_info, (FILE *)mon, monitor_fprintf);
+    INIT_DISASSEMBLE_INFO(s.info, (FILE *)mon, monitor_fprintf);

-    monitor_disas_env = env;
+    s.env = env;
    monitor_disas_is_physical = is_physical;
-    disasm_info.read_memory_func = monitor_read_memory;
-    disasm_info.print_address_func = generic_print_target_address;
+    s.info.read_memory_func = monitor_read_memory;
+    s.info.print_address_func = generic_print_target_address;

-    disasm_info.buffer_vma = pc;
+    s.info.buffer_vma = pc;

 #ifdef TARGET_WORDS_BIGENDIAN
-    disasm_info.endian = BFD_ENDIAN_BIG;
+    s.info.endian = BFD_ENDIAN_BIG;
 #else
-    disasm_info.endian = BFD_ENDIAN_LITTLE;
+    s.info.endian = BFD_ENDIAN_LITTLE;
 #endif
 #if defined(TARGET_I386)
-    if (flags == 2)
-        disasm_info.mach = bfd_mach_x86_64;
-    else if (flags == 1)
-        disasm_info.mach = bfd_mach_i386_i8086;
-    else
-        disasm_info.mach = bfd_mach_i386_i386;
+    if (flags == 2) {
+        s.info.mach = bfd_mach_x86_64;
+    } else if (flags == 1) {
+        s.info.mach = bfd_mach_i386_i8086;
+    } else {
+        s.info.mach = bfd_mach_i386_i386;
+    }
    print_insn = print_insn_i386;
 #elif defined(TARGET_ARM)
    print_insn = print_insn_arm;
@@ -428,13 +439,13 @@ void monitor_disas(Monitor *mon, CPUArchState *env,
 #elif defined(TARGET_SPARC)
    print_insn = print_insn_sparc;
 #ifdef TARGET_SPARC64
-    disasm_info.mach = bfd_mach_sparc_v9b;
+    s.info.mach = bfd_mach_sparc_v9b;
 #endif
 #elif defined(TARGET_PPC)
 #ifdef TARGET_PPC64
-    disasm_info.mach = bfd_mach_ppc64;
+    s.info.mach = bfd_mach_ppc64;
 #else
-    disasm_info.mach = bfd_mach_ppc;
+    s.info.mach = bfd_mach_ppc;
 #endif
    print_insn = print_insn_ppc;
 #elif defined(TARGET_M68K)
@@ -446,13 +457,13 @@ void monitor_disas(Monitor *mon, CPUArchState *env,
    print_insn = print_insn_little_mips;
 #endif
 #elif defined(TARGET_SH4)
-    disasm_info.mach = bfd_mach_sh4;
+    s.info.mach = bfd_mach_sh4;
    print_insn = print_insn_sh;
 #elif defined(TARGET_S390X)
-    disasm_info.mach = bfd_mach_s390_64;
+    s.info.mach = bfd_mach_s390_64;
    print_insn = print_insn_s390;
 #elif defined(TARGET_LM32)
-    disasm_info.mach = bfd_mach_lm32;
+    s.info.mach = bfd_mach_lm32;
    print_insn = print_insn_lm32;
 #else
    monitor_printf(mon, "0x" TARGET_FMT_lx
@@ -462,7 +473,7 @@ void monitor_disas(Monitor *mon, CPUArchState *env,

    for(i = 0; i < nb_insn; i++) {
 	monitor_printf(mon, "0x" TARGET_FMT_lx ":  ", pc);
-	count = print_insn(pc, &disasm_info);
+        count = print_insn(pc, &s.info);
 	monitor_printf(mon, "\n");
 	if (count < 0)
 	    break;
--- a/disas.h
+++ b/disas.h
@@ -6,7 +6,8 @@
 #ifdef NEED_CPU_H
 /* Disassemble this for me please... (debugging). */
 void disas(FILE *out, void *code, unsigned long size);
-void target_disas(FILE *out, target_ulong code, target_ulong size, int flags);
+void target_disas(FILE *out, CPUArchState *env, target_ulong code,
+                  target_ulong size, int flags);

 void monitor_disas(Monitor *mon, CPUArchState *env,
                   target_ulong pc, int nb_insn, int is_physical, int flags);
@@ -22,7 +23,7 @@ struct elf64_sym;
 #if defined(CONFIG_USER_ONLY)
 typedef const char *(*lookup_symbol_t)(struct syminfo *s, target_ulong orig_addr);
 #else
-typedef const char *(*lookup_symbol_t)(struct syminfo *s, target_phys_addr_t orig_addr);
+typedef const char *(*lookup_symbol_t)(struct syminfo *s, hwaddr orig_addr);
 #endif

 struct syminfo {
--- a/dma-helpers.c
+++ b/dma-helpers.c
@@ -14,7 +14,8 @@

 /* #define DEBUG_IOMMU */

-static void do_dma_memory_set(dma_addr_t addr, uint8_t c, dma_addr_t len)
+static void do_dma_memory_set(AddressSpace *as,
+                              dma_addr_t addr, uint8_t c, dma_addr_t len)
 {
 #define FILLBUF_SIZE 512
    uint8_t fillbuf[FILLBUF_SIZE];
@@ -23,7 +24,7 @@ static void do_dma_memory_set(dma_addr_t addr, uint8_t c, dma_addr_t len)
    memset(fillbuf, c, FILLBUF_SIZE);
    while (len > 0) {
        l = len < FILLBUF_SIZE ? len : FILLBUF_SIZE;
-        cpu_physical_memory_rw(addr, fillbuf, l, true);
+        address_space_rw(as, addr, fillbuf, l, true);
        len -= l;
        addr += l;
    }
@@ -36,7 +37,7 @@ int dma_memory_set(DMAContext *dma, dma_addr_t addr, uint8_t c, dma_addr_t len)
    if (dma_has_iommu(dma)) {
        return iommu_dma_memory_set(dma, addr, c, len);
    }
-    do_dma_memory_set(addr, c, len);
+    do_dma_memory_set(dma->as, addr, c, len);

    return 0;
 }
@@ -194,7 +195,7 @@ static void dma_aio_cancel(BlockDriverAIOCB *acb)
    dma_complete(dbs, 0);
 }

-static AIOPool dma_aio_pool = {
+static const AIOCBInfo dma_aiocb_info = {
    .aiocb_size         = sizeof(DMAAIOCB),
    .cancel             = dma_aio_cancel,
 };
@@ -204,7 +205,7 @@ BlockDriverAIOCB *dma_bdrv_io(
    DMAIOFunc *io_func, BlockDriverCompletionFunc *cb,
    void *opaque, DMADirection dir)
 {
-    DMAAIOCB *dbs = qemu_aio_get(&dma_aio_pool, bs, cb, opaque);
+    DMAAIOCB *dbs = qemu_aio_get(&dma_aiocb_info, bs, cb, opaque);

    trace_dma_bdrv_io(dbs, bs, sector_num, (dir == DMA_DIRECTION_TO_DEVICE));

@@ -280,7 +281,7 @@ void dma_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie,
 bool iommu_dma_memory_valid(DMAContext *dma, dma_addr_t addr, dma_addr_t len,
                            DMADirection dir)
 {
-    target_phys_addr_t paddr, plen;
+    hwaddr paddr, plen;

 #ifdef DEBUG_IOMMU
    fprintf(stderr, "dma_memory_check context=%p addr=0x" DMA_ADDR_FMT
@@ -307,7 +308,7 @@ bool iommu_dma_memory_valid(DMAContext *dma, dma_addr_t addr, dma_addr_t len,
 int iommu_dma_memory_rw(DMAContext *dma, dma_addr_t addr,
                        void *buf, dma_addr_t len, DMADirection dir)
 {
-    target_phys_addr_t paddr, plen;
+    hwaddr paddr, plen;
    int err;

 #ifdef DEBUG_IOMMU
@@ -332,8 +333,7 @@ int iommu_dma_memory_rw(DMAContext *dma, dma_addr_t addr,
            plen = len;
        }

-        cpu_physical_memory_rw(paddr, buf, plen,
-                               dir == DMA_DIRECTION_FROM_DEVICE);
+        address_space_rw(dma->as, paddr, buf, plen, dir == DMA_DIRECTION_FROM_DEVICE);

        len -= plen;
        addr += plen;
@@ -346,7 +346,7 @@ int iommu_dma_memory_rw(DMAContext *dma, dma_addr_t addr,
 int iommu_dma_memory_set(DMAContext *dma, dma_addr_t addr, uint8_t c,
                         dma_addr_t len)
 {
-    target_phys_addr_t paddr, plen;
+    hwaddr paddr, plen;
    int err;

 #ifdef DEBUG_IOMMU
@@ -366,7 +366,7 @@ int iommu_dma_memory_set(DMAContext *dma, dma_addr_t addr, uint8_t c,
            plen = len;
        }

-        do_dma_memory_set(paddr, c, plen);
+        do_dma_memory_set(dma->as, paddr, c, plen);

        len -= plen;
        addr += plen;
@@ -375,13 +375,14 @@ int iommu_dma_memory_set(DMAContext *dma, dma_addr_t addr, uint8_t c,
    return 0;
 }

-void dma_context_init(DMAContext *dma, DMATranslateFunc translate,
+void dma_context_init(DMAContext *dma, AddressSpace *as, DMATranslateFunc translate,
                      DMAMapFunc map, DMAUnmapFunc unmap)
 {
 #ifdef DEBUG_IOMMU
    fprintf(stderr, "dma_context_init(%p, %p, %p, %p)\n",
            dma, translate, map, unmap);
 #endif
+    dma->as = as;
    dma->translate = translate;
    dma->map = map;
    dma->unmap = unmap;
@@ -391,7 +392,7 @@ void *iommu_dma_memory_map(DMAContext *dma, dma_addr_t addr, dma_addr_t *len,
                           DMADirection dir)
 {
    int err;
-    target_phys_addr_t paddr, plen;
+    hwaddr paddr, plen;
    void *buf;

    if (dma->map) {
@@ -407,14 +408,13 @@ void *iommu_dma_memory_map(DMAContext *dma, dma_addr_t addr, dma_addr_t *len,
    /*
     * If this is true, the virtual region is contiguous,
     * but the translated physical region isn't. We just
-     * clamp *len, much like cpu_physical_memory_map() does.
+     * clamp *len, much like address_space_map() does.
     */
    if (plen < *len) {
        *len = plen;
    }

-    buf = cpu_physical_memory_map(paddr, &plen,
-                                  dir == DMA_DIRECTION_FROM_DEVICE);
+    buf = address_space_map(dma->as, paddr, &plen, dir == DMA_DIRECTION_FROM_DEVICE);
    *len = plen;

    return buf;
@@ -428,8 +428,7 @@ void iommu_dma_memory_unmap(DMAContext *dma, void *buffer, dma_addr_t len,
        return;
    }

-    cpu_physical_memory_unmap(buffer, len,
-                              dir == DMA_DIRECTION_FROM_DEVICE,
-                              access_len);
+    address_space_unmap(dma->as, buffer, len, dir == DMA_DIRECTION_FROM_DEVICE,
+                        access_len);

 }
--- a/dma.h
+++ b/dma.h
@@ -11,6 +11,7 @@
 #define DMA_H

 #include <stdio.h>
+#include "memory.h"
 #include "hw/hw.h"
 #include "block.h"
 #include "kvm.h"
@@ -31,7 +32,7 @@ struct QEMUSGList {
    DMAContext *dma;
 };

-#if defined(TARGET_PHYS_ADDR_BITS)
+#ifndef CONFIG_USER_ONLY

 /*
 * When an IOMMU is present, bus addresses become distinct from
@@ -47,8 +48,8 @@ typedef uint64_t dma_addr_t;

 typedef int DMATranslateFunc(DMAContext *dma,
                             dma_addr_t addr,
-                             target_phys_addr_t *paddr,
-                             target_phys_addr_t *len,
+                             hwaddr *paddr,
+                             hwaddr *len,
                             DMADirection dir);
 typedef void* DMAMapFunc(DMAContext *dma,
                         dma_addr_t addr,
@@ -61,11 +62,17 @@ typedef void DMAUnmapFunc(DMAContext *dma,
                          dma_addr_t access_len);

 struct DMAContext {
+    AddressSpace *as;
    DMATranslateFunc *translate;
    DMAMapFunc *map;
    DMAUnmapFunc *unmap;
 };

+/* A global DMA context corresponding to the address_space_memory
+ * AddressSpace, for sysbus devices which do DMA.
+ */
+extern DMAContext dma_context_memory;
+
 static inline void dma_barrier(DMAContext *dma, DMADirection dir)
 {
    /*
@@ -93,7 +100,7 @@ static inline void dma_barrier(DMAContext *dma, DMADirection dir)

 static inline bool dma_has_iommu(DMAContext *dma)
 {
-    return !!dma;
+    return dma && dma->translate;
 }

 /* Checks that the given range of addresses is valid for DMA.  This is
@@ -120,8 +127,7 @@ static inline int dma_memory_rw_relaxed(DMAContext *dma, dma_addr_t addr,
 {
    if (!dma_has_iommu(dma)) {
        /* Fast-path for no IOMMU */
-        cpu_physical_memory_rw(addr, buf, len,
-                               dir == DMA_DIRECTION_FROM_DEVICE);
+        address_space_rw(dma->as, addr, buf, len, dir == DMA_DIRECTION_FROM_DEVICE);
        return 0;
    } else {
        return iommu_dma_memory_rw(dma, addr, buf, len, dir);
@@ -176,11 +182,10 @@ static inline void *dma_memory_map(DMAContext *dma,
                                   DMADirection dir)
 {
    if (!dma_has_iommu(dma)) {
-        target_phys_addr_t xlen = *len;
+        hwaddr xlen = *len;
        void *p;

-        p = cpu_physical_memory_map(addr, &xlen,
-                                    dir == DMA_DIRECTION_FROM_DEVICE);
+        p = address_space_map(dma->as, addr, &xlen, dir == DMA_DIRECTION_FROM_DEVICE);
        *len = xlen;
        return p;
    } else {
@@ -196,9 +201,8 @@ static inline void dma_memory_unmap(DMAContext *dma,
                                    DMADirection dir, dma_addr_t access_len)
 {
    if (!dma_has_iommu(dma)) {
-        cpu_physical_memory_unmap(buffer, (target_phys_addr_t)len,
-                                  dir == DMA_DIRECTION_FROM_DEVICE,
-                                  access_len);
+        address_space_unmap(dma->as, buffer, (hwaddr)len,
+                            dir == DMA_DIRECTION_FROM_DEVICE, access_len);
    } else {
        iommu_dma_memory_unmap(dma, buffer, len, dir, access_len);
    }
@@ -242,7 +246,7 @@ DEFINE_LDST_DMA(q, q, 64, be);

 #undef DEFINE_LDST_DMA

-void dma_context_init(DMAContext *dma, DMATranslateFunc translate,
+void dma_context_init(DMAContext *dma, AddressSpace *as, DMATranslateFunc translate,
                      DMAMapFunc map, DMAUnmapFunc unmap);

 struct ScatterGatherEntry {
--- a/docs/qemupciserial.inf
+++ b/docs/qemupciserial.inf
@@ -0,0 +1,109 @@
+; qemupciserial.inf for QEMU, based on MSPORTS.INF
+
+; The driver itself is shipped with Windows (serial.sys).  This is
+; just a inf file to tell windows which pci id the serial pci card
+; emulated by qemu has, and to apply a name tag to it which windows
+; will show in the device manager.
+
+; Installing the driver: Go to device manager.  You should find a "pci
+; serial card" tagged with a yellow question mark.  Open properties.
+; Pick "update driver".  Then "select driver manually".  Pick "Ports
+; (Com+Lpt)" from the list.  Click "Have a disk".  Select this file.
+; Procedure may vary a bit depending on the windows version.
+
+; FIXME: This file covers the single port version only.
+
+[Version]
+Signature="$CHICAGO$"
+Class=Ports
+ClassGuid={4D36E978-E325-11CE-BFC1-08002BE10318}
+Provider=%QEMU%
+DriverVer=09/24/2012,1.3.0
+
+[SourceDisksNames]
+3426=windows cd
+
+[SourceDisksFiles]
+serial.sys 		= 3426
+serenum.sys 		= 3426
+
+[DestinationDirs]
+DefaultDestDir  = 11        ;LDID_SYS
+ComPort.NT.Copy = 12        ;DIRID_DRIVERS
+SerialEnumerator.NT.Copy=12 ;DIRID_DRIVERS
+
+; Drivers
+;----------------------------------------------------------
+[Manufacturer]
+%QEMU%=QEMU,NTx86
+
+[QEMU.NTx86]
+%QEMU-PCI_SERIAL.DeviceDesc% = ComPort, "PCI\VEN_1b36&DEV_0002&CC_0700"
+
+; COM sections
+;----------------------------------------------------------
+[ComPort.AddReg]
+HKR,,PortSubClass,1,01
+
+[ComPort.NT]
+AddReg=ComPort.AddReg, ComPort.NT.AddReg
+LogConfig=caa
+SyssetupPnPFlags = 1
+
+[ComPort.NT.HW]
+AddReg=ComPort.NT.HW.AddReg
+
+[ComPort.NT.AddReg]
+HKR,,EnumPropPages32,,"MsPorts.dll,SerialPortPropPageProvider"
+
+[ComPort.NT.HW.AddReg]
+HKR,,"UpperFilters",0x00010000,"serenum"
+
+;-------------- Service installation
+; Port Driver (function driver for this device)
+[ComPort.NT.Services]
+AddService = Serial, 0x00000002, Serial_Service_Inst, Serial_EventLog_Inst
+AddService = Serenum,,Serenum_Service_Inst
+
+; -------------- Serial Port Driver install sections
+[Serial_Service_Inst]
+DisplayName    = %Serial.SVCDESC%
+ServiceType    = 1               ; SERVICE_KERNEL_DRIVER
+StartType      = 1               ; SERVICE_SYSTEM_START (this driver may do detection)
+ErrorControl   = 0               ; SERVICE_ERROR_IGNORE
+ServiceBinary  = %12%\serial.sys
+LoadOrderGroup = Extended base
+
+; -------------- Serenum Driver install section
+[Serenum_Service_Inst]
+DisplayName    = %Serenum.SVCDESC%
+ServiceType    = 1               ; SERVICE_KERNEL_DRIVER
+StartType      = 3               ; SERVICE_DEMAND_START
+ErrorControl   = 1               ; SERVICE_ERROR_NORMAL
+ServiceBinary  = %12%\serenum.sys
+LoadOrderGroup = PNP Filter
+
+[Serial_EventLog_Inst]
+AddReg = Serial_EventLog_AddReg
+
+[Serial_EventLog_AddReg]
+HKR,,EventMessageFile,0x00020000,"%%SystemRoot%%\System32\IoLogMsg.dll;%%SystemRoot%%\System32\drivers\serial.sys"
+HKR,,TypesSupported,0x00010001,7
+
+; The following sections are COM port resource configs.
+; Section name format means:
+; Char 1 = c (COM port)
+; Char 2 = I/O config: 1 (3f8), 2 (2f8), 3 (3e8), 4 (2e8), a (any)
+; Char 3 = IRQ config: #, a (any)
+
+[caa]                   ; Any base, any IRQ
+ConfigPriority=HARDRECONFIG
+IOConfig=8@100-ffff%fff8(3ff::)
+IRQConfig=S:3,4,5,7,9,10,11,12,14,15
+
+[Strings]
+QEMU="QEMU"
+QEMU-PCI_SERIAL.DeviceDesc="QEMU Serial PCI Card"
+
+Serial.SVCDESC   = "Serial port driver"
+Serenum.SVCDESC = "Serenum Filter Driver"
--- a/docs/specs/pci-serial.txt
+++ b/docs/specs/pci-serial.txt
@@ -0,0 +1,34 @@
+
+QEMU pci serial devices
+=======================
+
+There is one single-port variant and two muliport-variants.  Linux
+guests out-of-the box with all cards.  There is a Windows inf file
+(docs/qemupciserial.inf) to setup the single-port card in Windows
+guests.
+
+
+single-port card
+----------------
+
+Name:   pci-serial
+PCI ID: 1b36:0002
+
+PCI Region 0:
+   IO bar, 8 bytes long, with the 16550 uart mapped to it.
+   Interrupt is wired to pin A.
+
+
+multiport cards
+---------------
+
+Name:   pci-serial-2x
+PCI ID: 1b36:0003
+
+Name:   pci-serial-4x
+PCI ID: 1b36:0004
+
+PCI Region 0:
+   IO bar, with two/four 16550 uart mapped after each other.
+   The first is at offset 0, second at offset 8, ...
+   Interrupt is wired to pin A.
--- a/docs/specs/ppc-spapr-hcalls.txt
+++ b/docs/specs/ppc-spapr-hcalls.txt
@@ -31,7 +31,7 @@ Arguments:

 Returns:

-  H_SUCCESS   : Successully called the RTAS function (RTAS result
+  H_SUCCESS   : Successfully called the RTAS function (RTAS result
                will have been stored in the parameter block)
  H_PARAMETER : Unknown token

--- a/docs/specs/standard-vga.txt
+++ b/docs/specs/standard-vga.txt
@@ -0,0 +1,65 @@
+
+QEMU Standard VGA
+=================
+
+Exists in two variants, for isa and pci.
+
+command line switches:
+    -vga std            [ picks isa for -M isapc, otherwise pci ]
+    -device VGA         [ pci variant ]
+    -device isa-vga     [ isa variant ]
+
+
+PCI spec
+--------
+
+Applies to the pci variant only for obvious reasons.
+
+PCI ID: 1234:1111
+
+PCI Region 0:
+   Framebuffer memory, 16 MB in size (by default).
+   Size is tunable via vga_mem_mb property.
+
+PCI Region 1:
+   Reserved (so we have the option to make the framebuffer bar 64bit).
+
+PCI Region 2:
+   MMIO bar, 4096 bytes in size (qemu 1.3+)
+
+PCI ROM Region:
+   Holds the vgabios (qemu 0.14+).
+
+
+IO ports used
+-------------
+
+03c0 - 03df : standard vga ports
+01ce        : bochs vbe interface index port
+01cf        : bochs vbe interface data port (x86 only)
+01d0        : bochs vbe interface data port
+
+
+Memory regions used
+-------------------
+
+0xe0000000 : Framebuffer memory, isa variant only.
+
+The pci variant used to mirror the framebuffer bar here, qemu 0.14+
+stops doing that (except when in -M pc-$old compat mode).
+
+
+MMIO area spec
+--------------
+
+Likewise applies to the pci variant only for obvious reasons.
+
+0000 - 03ff : reserved, for possible virtio extension.
+0400 - 041f : vga ioports (0x3c0 -> 0x3df), remapped 1:1.
+              word access is supported, bytes are written
+              in little endia order (aka index port first),
+              so indexed registers can be updated with a
+              single mmio write (and thus only one vmexit).
+0500 - 0515 : bochs dispi interface registers, mapped flat
+              without index/data ports.  Use (index << 1)
+              as offset for (16bit) register access.
--- a/docs/tracing.txt
+++ b/docs/tracing.txt
@@ -139,6 +139,10 @@ having a common prefix in a batch. For example, virtio-blk trace events could
 be enabled using:
  trace-event virtio_blk_* on

+If a line in the "-trace events=<file>" file begins with a '-', the trace event
+will be disabled instead of enabled.  This is useful when a wildcard was used
+to enable an entire family of events but one noisy event needs to be disabled.
+
 == Trace backends ==

 The "tracetool" script automates tedious trace event code generation and also
@@ -185,15 +189,6 @@ records the char* pointer value instead of the string that is pointed to.

 ==== Monitor commands ====

-* info trace
-  Display the contents of trace buffer.  This command dumps the trace buffer
-  with simple formatting.  For full pretty-printing, use the simpletrace.py
-  script on a binary trace file.
-
-  The trace buffer is written into until full.  The full trace buffer is
-  flushed and emptied.  This means the 'info trace' will display few or no
-  entries if the buffer has just been flushed.
-
 * trace-file on|off|flush|set <path>
  Enable/disable/flush the trace file or set the trace file name.

--- a/docs/usb2.txt
+++ b/docs/usb2.txt
@@ -58,11 +58,11 @@ try ...
 xhci controller support
 -----------------------

-There also is xhci host controller support available.  It got alot
+There is also xhci host controller support available.  It got a lot
 less testing than ehci and there are a bunch of known limitations, so
 ehci may work better for you.  On the other hand the xhci hardware
 design is much more virtualization-friendly, thus xhci emulation uses
-less ressources (especially cpu).  If you wanna give xhci a try
+less resources (especially cpu).  If you want to give xhci a try
 use this to add the host controller ...

    qemu -device nec-usb-xhci,id=xhci
--- a/dump.c
+++ b/dump.c
@@ -15,7 +15,7 @@
 #include "elf.h"
 #include "cpu.h"
 #include "cpu-all.h"
-#include "targphys.h"
+#include "hwaddr.h"
 #include "monitor.h"
 #include "kvm.h"
 #include "dump.h"
@@ -66,7 +66,7 @@ typedef struct DumpState {
    bool have_section;
    bool resume;
    size_t note_size;
-    target_phys_addr_t memory_offset;
+    hwaddr memory_offset;
    int fd;

    RAMBlock *block;
@@ -100,18 +100,11 @@ static void dump_error(DumpState *s, const char *reason)
 static int fd_write_vmcore(void *buf, size_t size, void *opaque)
 {
    DumpState *s = opaque;
-    int fd = s->fd;
-    size_t writen_size;
+    size_t written_size;

-    /* The fd may be passed from user, and it can be non-blocked */
-    while (size) {
-        writen_size = qemu_write_full(fd, buf, size);
-        if (writen_size != size && errno != EAGAIN) {
-            return -1;
-        }
-
-        buf += writen_size;
-        size -= writen_size;
+    written_size = qemu_write_full(s->fd, buf, size);
+    if (written_size != size) {
+        return -1;
    }

    return 0;
@@ -194,7 +187,7 @@ static int write_elf32_header(DumpState *s)
 }

 static int write_elf64_load(DumpState *s, MemoryMapping *memory_mapping,
-                            int phdr_index, target_phys_addr_t offset)
+                            int phdr_index, hwaddr offset)
 {
    Elf64_Phdr phdr;
    int ret;
@@ -223,7 +216,7 @@ static int write_elf64_load(DumpState *s, MemoryMapping *memory_mapping,
 }

 static int write_elf32_load(DumpState *s, MemoryMapping *memory_mapping,
-                            int phdr_index, target_phys_addr_t offset)
+                            int phdr_index, hwaddr offset)
 {
    Elf32_Phdr phdr;
    int ret;
@@ -255,7 +248,7 @@ static int write_elf64_note(DumpState *s)
 {
    Elf64_Phdr phdr;
    int endian = s->dump_info.d_endian;
-    target_phys_addr_t begin = s->memory_offset - s->note_size;
+    hwaddr begin = s->memory_offset - s->note_size;
    int ret;

    memset(&phdr, 0, sizeof(Elf64_Phdr));
@@ -303,7 +296,7 @@ static int write_elf64_notes(DumpState *s)

 static int write_elf32_note(DumpState *s)
 {
-    target_phys_addr_t begin = s->memory_offset - s->note_size;
+    hwaddr begin = s->memory_offset - s->note_size;
    Elf32_Phdr phdr;
    int endian = s->dump_info.d_endian;
    int ret;
@@ -421,11 +414,11 @@ static int write_memory(DumpState *s, RAMBlock *block, ram_addr_t start,
 }

 /* get the memory's offset in the vmcore */
-static target_phys_addr_t get_offset(target_phys_addr_t phys_addr,
+static hwaddr get_offset(hwaddr phys_addr,
                                     DumpState *s)
 {
    RAMBlock *block;
-    target_phys_addr_t offset = s->memory_offset;
+    hwaddr offset = s->memory_offset;
    int64_t size_in_block, start;

    if (s->has_filter) {
@@ -470,7 +463,7 @@ static target_phys_addr_t get_offset(target_phys_addr_t phys_addr,

 static int write_elf_loads(DumpState *s)
 {
-    target_phys_addr_t offset;
+    hwaddr offset;
    MemoryMapping *memory_mapping;
    uint32_t phdr_index = 1;
    int ret;
@@ -836,9 +829,8 @@ void qmp_dump_guest_memory(bool paging, const char *file, bool has_begin,

 #if !defined(WIN32)
    if (strstart(file, "fd:", &p)) {
-        fd = monitor_get_fd(cur_mon, p);
+        fd = monitor_get_fd(cur_mon, p, errp);
        if (fd == -1) {
-            error_set(errp, QERR_FD_NOT_FOUND, p);
            return;
        }
    }
--- a/dyngen-exec.h
+++ b/dyngen-exec.h
@@ -1,70 +0,0 @@
-/*
- *  dyngen defines for micro operation code
- *
- *  Copyright (c) 2003 Fabrice Bellard
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, see <http://www.gnu.org/licenses/>.
- */
-#if !defined(__DYNGEN_EXEC_H__)
-#define __DYNGEN_EXEC_H__
-
-#if defined(CONFIG_TCG_INTERPRETER)
-/* The TCG interpreter does not need a special register AREG0,
- * but it is possible to use one by defining AREG0.
- * On i386, register edi seems to work. */
-/* Run without special register AREG0 or use a value defined elsewhere. */
-#elif defined(__i386__)
-#define AREG0 "ebp"
-#elif defined(__x86_64__)
-#define AREG0 "r14"
-#elif defined(_ARCH_PPC)
-#define AREG0 "r27"
-#elif defined(__arm__)
-#define AREG0 "r6"
-#elif defined(__hppa__)
-#define AREG0 "r17"
-#elif defined(__mips__)
-#define AREG0 "s0"
-#elif defined(__sparc__)
-#ifdef CONFIG_SOLARIS
-#define AREG0 "g2"
-#else
-#ifdef __sparc_v9__
-#define AREG0 "g5"
-#else
-#define AREG0 "g6"
-#endif
-#endif
-#elif defined(__s390__)
-#define AREG0 "r10"
-#elif defined(__alpha__)
-/* Note $15 is the frame pointer, so anything in op-i386.c that would
-   require a frame pointer, like alloca, would probably loose.  */
-#define AREG0 "$15"
-#elif defined(__mc68000)
-#define AREG0 "%a5"
-#elif defined(__ia64__)
-#define AREG0 "r7"
-#else
-#error unsupported CPU
-#endif
-
-#if defined(AREG0)
-register CPUArchState *env asm(AREG0);
-#else
-/* TODO: Try env = cpu_single_env. */
-extern CPUArchState *env;
-#endif
-
-#endif /* !defined(__DYNGEN_EXEC_H__) */
--- a/error.c
+++ b/error.c
@@ -43,6 +43,34 @@ void error_set(Error **errp, ErrorClass err_class, const char *fmt, ...)
    *errp = err;
 }

+void error_set_errno(Error **errp, int os_errno, ErrorClass err_class,
+                     const char *fmt, ...)
+{
+    Error *err;
+    char *msg1;
+    va_list ap;
+
+    if (errp == NULL) {
+        return;
+    }
+    assert(*errp == NULL);
+
+    err = g_malloc0(sizeof(*err));
+
+    va_start(ap, fmt);
+    msg1 = g_strdup_vprintf(fmt, ap);
+    if (os_errno != 0) {
+        err->msg = g_strdup_printf("%s: %s", msg1, strerror(os_errno));
+        g_free(msg1);
+    } else {
+        err->msg = msg1;
+    }
+    va_end(ap);
+    err->err_class = err_class;
+
+    *errp = err;
+}
+
 Error *error_copy(const Error *err)
 {
    Error *err_new;
--- a/error.h
+++ b/error.h
@@ -29,6 +29,21 @@ typedef struct Error Error;
 */
 void error_set(Error **err, ErrorClass err_class, const char *fmt, ...) GCC_FMT_ATTR(3, 4);

+/**
+ * Set an indirect pointer to an error given a ErrorClass value and a
+ * printf-style human message, followed by a strerror() string if
+ * @os_error is not zero.
+ */
+void error_set_errno(Error **err, int os_error, ErrorClass err_class, const char *fmt, ...) GCC_FMT_ATTR(4, 5);
+
+/**
+ * Same as error_set(), but sets a generic error
+ */
+#define error_setg(err, fmt, ...) \
+    error_set(err, ERROR_CLASS_GENERIC_ERROR, fmt, ## __VA_ARGS__)
+#define error_setg_errno(err, os_error, fmt, ...) \
+    error_set_errno(err, os_error, ERROR_CLASS_GENERIC_ERROR, fmt, ## __VA_ARGS__)
+
 /**
 * Returns true if an indirect pointer to an error is pointing to a valid
 * error object.
--- a/event_notifier-posix.c
+++ b/event_notifier-posix.c
@@ -0,0 +1,120 @@
+/*
+ * event notifier support
+ *
+ * Copyright Red Hat, Inc. 2010
+ *
+ * Authors:
+ *  Michael S. Tsirkin <mst@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu-common.h"
+#include "event_notifier.h"
+#include "qemu-char.h"
+
+#ifdef CONFIG_EVENTFD
+#include <sys/eventfd.h>
+#endif
+
+void event_notifier_init_fd(EventNotifier *e, int fd)
+{
+    e->rfd = fd;
+    e->wfd = fd;
+}
+
+int event_notifier_init(EventNotifier *e, int active)
+{
+    int fds[2];
+    int ret;
+
+#ifdef CONFIG_EVENTFD
+    ret = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
+#else
+    ret = -1;
+    errno = ENOSYS;
+#endif
+    if (ret >= 0) {
+        e->rfd = e->wfd = ret;
+    } else {
+        if (errno != ENOSYS) {
+            return -errno;
+        }
+        if (qemu_pipe(fds) < 0) {
+            return -errno;
+        }
+        ret = fcntl_setfl(fds[0], O_NONBLOCK);
+        if (ret < 0) {
+            ret = -errno;
+            goto fail;
+        }
+        ret = fcntl_setfl(fds[1], O_NONBLOCK);
+        if (ret < 0) {
+            ret = -errno;
+            goto fail;
+        }
+        e->rfd = fds[0];
+        e->wfd = fds[1];
+    }
+    if (active) {
+        event_notifier_set(e);
+    }
+    return 0;
+
+fail:
+    close(fds[0]);
+    close(fds[1]);
+    return ret;
+}
+
+void event_notifier_cleanup(EventNotifier *e)
+{
+    if (e->rfd != e->wfd) {
+        close(e->rfd);
+    }
+    close(e->wfd);
+}
+
+int event_notifier_get_fd(EventNotifier *e)
+{
+    return e->rfd;
+}
+
+int event_notifier_set_handler(EventNotifier *e,
+                               EventNotifierHandler *handler)
+{
+    return qemu_set_fd_handler(e->rfd, (IOHandler *)handler, NULL, e);
+}
+
+int event_notifier_set(EventNotifier *e)
+{
+    static const uint64_t value = 1;
+    ssize_t ret;
+
+    do {
+        ret = write(e->wfd, &value, sizeof(value));
+    } while (ret < 0 && errno == EINTR);
+
+    /* EAGAIN is fine, a read must be pending.  */
+    if (ret < 0 && errno != EAGAIN) {
+        return -errno;
+    }
+    return 0;
+}
+
+int event_notifier_test_and_clear(EventNotifier *e)
+{
+    int value;
+    ssize_t len;
+    char buffer[512];
+
+    /* Drain the notify pipe.  For eventfd, only 8 bytes will be read.  */
+    value = 0;
+    do {
+        len = read(e->rfd, buffer, sizeof(buffer));
+        value |= (len > 0);
+    } while ((len == -1 && errno == EINTR) || len == sizeof(buffer));
+
+    return value;
+}
--- a/event_notifier-win32.c
+++ b/event_notifier-win32.c
@@ -0,0 +1,59 @@
+/*
+ * event notifier support
+ *
+ * Copyright Red Hat, Inc. 2010
+ *
+ * Authors:
+ *  Michael S. Tsirkin <mst@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu-common.h"
+#include "event_notifier.h"
+#include "main-loop.h"
+
+int event_notifier_init(EventNotifier *e, int active)
+{
+    e->event = CreateEvent(NULL, TRUE, FALSE, NULL);
+    assert(e->event);
+    return 0;
+}
+
+void event_notifier_cleanup(EventNotifier *e)
+{
+    CloseHandle(e->event);
+}
+
+HANDLE event_notifier_get_handle(EventNotifier *e)
+{
+    return e->event;
+}
+
+int event_notifier_set_handler(EventNotifier *e,
+                               EventNotifierHandler *handler)
+{
+    if (handler) {
+        return qemu_add_wait_object(e->event, (IOHandler *)handler, e);
+    } else {
+        qemu_del_wait_object(e->event, (IOHandler *)handler, e);
+        return 0;
+    }
+}
+
+int event_notifier_set(EventNotifier *e)
+{
+    SetEvent(e->event);
+    return 0;
+}
+
+int event_notifier_test_and_clear(EventNotifier *e)
+{
+    int ret = WaitForSingleObject(e->event, 0);
+    if (ret == WAIT_OBJECT_0) {
+        ResetEvent(e->event);
+        return true;
+    }
+    return false;
+}
--- a/event_notifier.c
+++ b/event_notifier.c
@@ -1,67 +0,0 @@
-/*
- * event notifier support
- *
- * Copyright Red Hat, Inc. 2010
- *
- * Authors:
- *  Michael S. Tsirkin <mst@redhat.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- */
-
-#include "qemu-common.h"
-#include "event_notifier.h"
-#include "qemu-char.h"
-
-#ifdef CONFIG_EVENTFD
-#include <sys/eventfd.h>
-#endif
-
-void event_notifier_init_fd(EventNotifier *e, int fd)
-{
-    e->fd = fd;
-}
-
-int event_notifier_init(EventNotifier *e, int active)
-{
-#ifdef CONFIG_EVENTFD
-    int fd = eventfd(!!active, EFD_NONBLOCK | EFD_CLOEXEC);
-    if (fd < 0)
-        return -errno;
-    e->fd = fd;
-    return 0;
-#else
-    return -ENOSYS;
-#endif
-}
-
-void event_notifier_cleanup(EventNotifier *e)
-{
-    close(e->fd);
-}
-
-int event_notifier_get_fd(EventNotifier *e)
-{
-    return e->fd;
-}
-
-int event_notifier_set_handler(EventNotifier *e,
-                               EventNotifierHandler *handler)
-{
-    return qemu_set_fd_handler(e->fd, (IOHandler *)handler, NULL, e);
-}
-
-int event_notifier_set(EventNotifier *e)
-{
-    uint64_t value = 1;
-    int r = write(e->fd, &value, sizeof(value));
-    return r == sizeof(value);
-}
-
-int event_notifier_test_and_clear(EventNotifier *e)
-{
-    uint64_t value;
-    int r = read(e->fd, &value, sizeof(value));
-    return r == sizeof(value);
-}
--- a/event_notifier.h
+++ b/event_notifier.h
@@ -15,18 +15,32 @@

 #include "qemu-common.h"

+#ifdef _WIN32
+#include <windows.h>
+#endif
+
 struct EventNotifier {
-    int fd;
+#ifdef _WIN32
+    HANDLE event;
+#else
+    int rfd;
+    int wfd;
+#endif
 };

 typedef void EventNotifierHandler(EventNotifier *);

-void event_notifier_init_fd(EventNotifier *, int fd);
 int event_notifier_init(EventNotifier *, int active);
 void event_notifier_cleanup(EventNotifier *);
-int event_notifier_get_fd(EventNotifier *);
 int event_notifier_set(EventNotifier *);
 int event_notifier_test_and_clear(EventNotifier *);
 int event_notifier_set_handler(EventNotifier *, EventNotifierHandler *);

+#ifdef CONFIG_POSIX
+void event_notifier_init_fd(EventNotifier *, int fd);
+int event_notifier_get_fd(EventNotifier *);
+#else
+HANDLE event_notifier_get_handle(EventNotifier *);
+#endif
+
 #endif
--- a/Show More
+++ b/Show More
@@ -1 +1 @@
 .2.0
 .3.1