virtio-input: ignore events until the guest driver is ready

Cc: qemu-stable@nongnu.org Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Merge remote-tracking branch 'remotes/sstabellini/tags/2015-10-19-tag' into staging
2015-10-20 08:53:40 +02:00 · 2015-10-19 12:13:27 +01:00 · 2015-10-19 10:16:01 +00:00 · 2015-10-19 10:16:01 +00:00 · 2015-10-19 10:52:39 +01:00 · 2015-10-19 10:06:56 +01:00
779 changed files with 34551 additions and 19858 deletions
--- a/.dir-locals.el
+++ b/.dir-locals.el
@@ -0,0 +1,2 @@
+((c-mode . ((c-file-style . "stroustrup")
+	    (indent-tabs-mode . nil))))
--- a/.gitignore
+++ b/.gitignore
@@ -19,7 +19,6 @@
 /trace/generated-ust.c
 /ui/shader/texture-blit-frag.h
 /ui/shader/texture-blit-vert.h
-/libcacard/trace/generated-tracers.c
 *-timestamp
 /*-softmmu
 /*-darwin-user
@@ -34,6 +33,7 @@
 /qapi-visit.[ch]
 /qapi-event.[ch]
 /qmp-commands.h
+/qmp-introspect.[ch]
 /qmp-marshal.c
 /qemu-doc.html
 /qemu-tech.html
@@ -49,6 +49,7 @@
 /qemu-ga
 /qemu-bridge-helper
 /qemu-monitor.texi
+/qemu-monitor-info.texi
 /qmp-commands.txt
 /vscclient
 /fsdev/virtfs-proxy-helper
--- a/.travis.yml
+++ b/.travis.yml
@@ -54,15 +54,7 @@ matrix:
  include:
    # Make check target (we only do this once)
    - env:
-        - TARGETS=alpha-softmmu,arm-softmmu,aarch64-softmmu,cris-softmmu,
-                  i386-softmmu,x86_64-softmmu,m68k-softmmu,microblaze-softmmu,
-                  microblazeel-softmmu,mips-softmmu,mips64-softmmu,
-                  mips64el-softmmu,mipsel-softmmu,or32-softmmu,ppc-softmmu,
-                  ppc64-softmmu,ppcemb-softmmu,s390x-softmmu,sh4-softmmu,
-                  sh4eb-softmmu,sparc-softmmu,sparc64-softmmu,
-                  unicore32-softmmu,unicore32-linux-user,
-                  lm32-softmmu,moxie-softmmu,tricore-softmmu,xtensa-softmmu,
-                  xtensaeb-softmmu
+        - TARGETS=alpha-softmmu,arm-softmmu,aarch64-softmmu,cris-softmmu,i386-softmmu,x86_64-softmmu,m68k-softmmu,microblaze-softmmu,microblazeel-softmmu,mips-softmmu,mips64-softmmu,mips64el-softmmu,mipsel-softmmu,or32-softmmu,ppc-softmmu,ppc64-softmmu,ppcemb-softmmu,s390x-softmmu,sh4-softmmu,sh4eb-softmmu,sparc-softmmu,sparc64-softmmu,unicore32-softmmu,unicore32-linux-user,lm32-softmmu,moxie-softmmu,tricore-softmmu,xtensa-softmmu,xtensaeb-softmmu
          TEST_CMD="make check"
      compiler: gcc
    # Debug related options
--- a/165
+++ b/165
@@ -77,6 +77,7 @@ S: Maintained
 F: target-alpha/
 F: hw/alpha/
 F: tests/tcg/alpha/
+F: disas/alpha.c

 ARM
 M: Peter Maydell <peter.maydell@linaro.org>
@@ -84,6 +85,9 @@ S: Maintained
 F: target-arm/
 F: hw/arm/
 F: hw/cpu/a*mpcore.c
+F: disas/arm.c
+F: disas/arm-a64.cc
+F: disas/libvixl/

 CRIS
 M: Edgar E. Iglesias <edgar.iglesias@gmail.com>
@@ -91,6 +95,7 @@ S: Maintained
 F: target-cris/
 F: hw/cris/
 F: tests/tcg/cris/
+F: disas/cris.c

 LM32
 M: Michael Walle <michael@walle.cc>
@@ -114,6 +119,7 @@ M: Edgar E. Iglesias <edgar.iglesias@gmail.com>
 S: Maintained
 F: target-microblaze/
 F: hw/microblaze/
+F: disas/microblaze.c

 MIPS
 M: Aurelien Jarno <aurelien@aurel32.net>
@@ -122,11 +128,13 @@ S: Maintained
 F: target-mips/
 F: hw/mips/
 F: tests/tcg/mips/
+F: disas/mips.c

 Moxie
 M: Anthony Green <green@moxielogic.com>
 S: Maintained
 F: target-moxie/
+F: disas/moxie.c

 OpenRISC
 M: Jia Liu <proljc@gmail.com>
@@ -141,6 +149,7 @@ L: qemu-ppc@nongnu.org
 S: Maintained
 F: target-ppc/
 F: hw/ppc/
+F: disas/ppc.c

 S390
 M: Richard Henderson <rth@twiddle.net>
@@ -148,12 +157,14 @@ M: Alexander Graf <agraf@suse.de>
 S: Maintained
 F: target-s390x/
 F: hw/s390x/
+F: disas/s390.c

 SH4
 M: Aurelien Jarno <aurelien@aurel32.net>
 S: Odd Fixes
 F: target-sh4/
 F: hw/sh4/
+F: disas/sh4.c

 SPARC
 M: Blue Swirl <blauwirbel@gmail.com>
@@ -162,6 +173,7 @@ S: Maintained
 F: target-sparc/
 F: hw/sparc/
 F: hw/sparc64/
+F: disas/sparc.c

 UniCore32
 M: Guan Xuetao <gxt@mprc.pku.edu.cn>
@@ -176,6 +188,7 @@ M: Eduardo Habkost <ehabkost@redhat.com>
 S: Maintained
 F: target-i386/
 F: hw/i386/
+F: disas/i386.c

 Xtensa
 M: Max Filippov <jcmvbkbc@gmail.com>
@@ -242,6 +255,12 @@ L: xen-devel@lists.xensource.com
 S: Supported
 F: xen-*
 F: */xen*
+F: hw/char/xen_console.c
+F: hw/display/xenfb.c
+F: hw/net/xen_nic.c
+F: hw/xen/
+F: hw/xenpv/
+F: include/hw/xen/

 Hosts:
 ------
@@ -262,16 +281,47 @@ L: qemu-devel@nongnu.org
 M: Stefan Weil <sw@weilnetz.de>
 S: Maintained
 F: *win32*
+F: qemu.nsi

 ARM Machines
 ------------
 Allwinner-a10
-M: Li Guang <lig.fnst@cn.fujitsu.com>
+M: Beniamino Galvani <b.galvani@gmail.com>
 S: Maintained
-F: hw/*/allwinner-a10*
-F: include/hw/*/allwinner-a10*
+F: hw/*/allwinner*
+F: include/hw/*/allwinner*
 F: hw/arm/cubieboard.c

+ARM PrimeCell
+M: Peter Maydell <peter.maydell@linaro.org>
+S: Maintained
+F: hw/char/pl011.c
+F: hw/display/pl110*
+F: hw/dma/pl080.c
+F: hw/dma/pl330.c
+F: hw/gpio/pl061.c
+F: hw/input/pl050.c
+F: hw/intc/pl190.c
+F: hw/sd/pl181.c
+F: hw/timer/pl031.c
+F: include/hw/arm/primecell.h
+
+ARM cores
+M: Peter Maydell <peter.maydell@linaro.org>
+S: Maintained
+F: hw/intc/arm*
+F: hw/intc/gic_internal.h
+F: hw/misc/a9scu.c
+F: hw/misc/arm11scu.c
+F: hw/timer/a9gtimer*
+F: hw/timer/arm_*
+F: include/hw/arm/arm.h
+F: include/hw/intc/arm*
+F: include/hw/misc/a9scu.h
+F: include/hw/misc/arm11scu.h
+F: include/hw/timer/a9gtimer.h
+F: include/hw/timer/arm_mptimer.h
+
 Exynos
 M: Evgeny Voevodin <e.voevodin@samsung.com>
 M: Maksim Kozlov <m.kozlov@samsung.com>
@@ -308,11 +358,6 @@ M: Peter Maydell <peter.maydell@linaro.org>
 S: Maintained
 F: hw/arm/integratorcp.c

-Mainstone
-L: qemu-devel@nongnu.org
-S: Orphan
-F: hw/arm/mainstone.c
-
 Musicpal
 M: Jan Kiszka <jan.kiszka@web.de>
 S: Maintained
@@ -332,11 +377,17 @@ Real View
 M: Peter Maydell <peter.maydell@linaro.org>
 S: Maintained
 F: hw/arm/realview*
+F: hw/intc/realview_gic.c
+F: include/hw/intc/realview_gic.h

-Spitz
+PXA2XX
 M: Andrzej Zaborowski <balrogg@gmail.com>
 S: Maintained
+F: hw/arm/mainstone.c
 F: hw/arm/spitz.c
+F: hw/arm/tosa.c
+F: hw/arm/z2.c
+F: hw/*/pxa2xx*

 Stellaris
 M: Peter Maydell <peter.maydell@linaro.org>
@@ -564,6 +615,7 @@ M: Cornelia Huck <cornelia.huck@de.ibm.com>
 M: Christian Borntraeger <borntraeger@de.ibm.com>
 M: Alexander Graf <agraf@suse.de>
 S: Supported
+F: hw/char/sclp*.[hc]
 F: hw/s390x/s390-virtio-ccw.c
 F: hw/s390x/css.[hc]
 F: hw/s390x/sclp*.[hc]
@@ -601,6 +653,25 @@ F: hw/acpi/piix4.c
 F: hw/acpi/ich9.c
 F: include/hw/acpi/ich9.h
 F: include/hw/acpi/piix.h
+F: hw/misc/sga.c
+
+PC Chipset
+M: Michael S. Tsirkin <mst@redhat.com>
+M: Paolo Bonzini <pbonzini@redhat.com>
+S: Support
+F: hw/char/debugcon.c
+F: hw/char/parallel.c
+F: hw/char/serial*
+F: hw/dma/i8257*
+F: hw/i2c/pm_smbus.c
+F: hw/intc/apic*
+F: hw/intc/ioapic*
+F: hw/intc/i8259*
+F: hw/misc/debugexit.c
+F: hw/misc/pc-testdev.c
+F: hw/timer/hpet*
+F: hw/timer/i8254*
+F: hw/timer/mc146818rtc*


 Xtensa Machines
@@ -634,6 +705,7 @@ F: hw/block/cdrom.c
 F: hw/block/hd-geometry.c
 F: tests/ide-test.c
 F: tests/ahci-test.c
+F: tests/libqos/ahci*
 T: git git://github.com/jnsnow/qemu.git ide

 Floppy
@@ -642,6 +714,7 @@ L: qemu-block@nongnu.org
 S: Supported
 F: hw/block/fdc.c
 F: include/hw/block/fdc.h
+F: tests/fdc-test.c
 T: git git://github.com/jnsnow/qemu.git ide

 OMAP
@@ -649,11 +722,19 @@ M: Peter Maydell <peter.maydell@linaro.org>
 S: Maintained
 F: hw/*/omap*

+IPack
+M: Alberto Garcia <berto@igalia.com>
+S: Odd Fixes
+F: hw/char/ipoctal232.c
+F: hw/ipack/
+
 PCI
 M: Michael S. Tsirkin <mst@redhat.com>
 S: Supported
 F: include/hw/pci/*
+F: hw/misc/pci-testdev.c
 F: hw/pci/*
+F: hw/pci-bridge/*

 ACPI/SMBIOS
 M: Michael S. Tsirkin <mst@redhat.com>
@@ -682,6 +763,19 @@ M: Scott Wood <scottwood@freescale.com>
 L: qemu-ppc@nongnu.org
 S: Supported
 F: hw/ppc/e500*
+F: hw/pci-host/ppce500.c
+F: hw/net/fsl_etsec/
+
+Character devices
+M: Paolo Bonzini <pbonzini@redhat.com>
+S: Odd Fixes
+F: hw/char/
+
+Network devices
+M: Jason Wang <jasowang@redhat.com>
+S: Odd Fixes
+F: hw/net/
+T: git git://github.com/jasowang/qemu.git net

 SCSI
 M: Paolo Bonzini <pbonzini@redhat.com>
@@ -867,7 +961,7 @@ F: block/qapi.c
 F: qapi/block*.json
 T: git git://repo.or.cz/qemu/armbru.git block-next

-Character Devices
+Character device backends
 M: Paolo Bonzini <pbonzini@redhat.com>
 S: Maintained
 F: qemu-char.c
@@ -903,6 +997,14 @@ M: Alexander Graf <agraf@suse.de>
 S: Maintained
 F: device_tree.[ch]

+Error reporting
+M: Markus Armbruster <armbru@redhat.com>
+S: Supported
+F: include/qapi/error.h
+F: include/qemu/error-report.h
+F: util/error.c
+F: util/qemu-error.c
+
 GDB stub
 L: qemu-devel@nongnu.org
 S: Odd Fixes
@@ -954,12 +1056,11 @@ F: hmp.c
 F: hmp-commands.hx
 T: git git://repo.or.cz/qemu/qmp-unstable.git queue/qmp

-Network device layer
-M: Stefan Hajnoczi <stefanha@redhat.com>
+Network device backends
 M: Jason Wang <jasowang@redhat.com>
 S: Maintained
 F: net/
-T: git git://github.com/stefanha/qemu.git net
+T: git git://github.com/jasowang/qemu.git net

 Netmap network backend
 M: Luigi Rizzo <rizzo@iet.unipi.it>
@@ -992,7 +1093,12 @@ M: Michael Roth <mdroth@linux.vnet.ibm.com>
 S: Supported
 F: qapi/
 X: qapi/*.json
+F: include/qapi/
+X: include/qapi/qmp/
+F: include/qapi/qmp/dispatch.h
 F: tests/qapi-schema/
+F: tests/test-*-visitor.c
+F: tests/test-qmp-*.c
 F: scripts/qapi*
 F: docs/qapi*
 T: git git://repo.or.cz/qemu/armbru.git qapi-next
@@ -1009,6 +1115,14 @@ QObject
 M: Luiz Capitulino <lcapitulino@redhat.com>
 S: Maintained
 F: qobject/
+F: include/qapi/qmp/
+X: include/qapi/qmp/dispatch.h
+F: tests/check-qdict.c
+F: tests/check-qfloat.c
+F: tests/check-qint.c
+F: tests/check-qjson.c
+F: tests/check-qlist.c
+F: tests/check-qstring.c
 T: git git://repo.or.cz/qemu/qmp-unstable.git queue/qmp

 QEMU Guest Agent
@@ -1033,7 +1147,7 @@ S: Supported
 F: qmp.c
 F: monitor.c
 F: qmp-commands.hx
-F: docs/qmp/
+F: docs/*qmp-*
 F: scripts/qmp/
 T: git git://repo.or.cz/qemu/armbru.git qapi-next

@@ -1109,53 +1223,58 @@ M: Claudio Fontana <claudio.fontana@huawei.com>
 M: Claudio Fontana <claudio.fontana@gmail.com>
 S: Maintained
 F: tcg/aarch64/
+F: disas/arm-a64.cc
+F: disas/libvixl/

 ARM target
 M: Andrzej Zaborowski <balrogg@gmail.com>
 S: Maintained
 F: tcg/arm/
+F: disas/arm.c

 i386 target
 L: qemu-devel@nongnu.org
 S: Maintained
 F: tcg/i386/
+F: disas/i386.c

 IA64 target
 M: Aurelien Jarno <aurelien@aurel32.net>
 S: Maintained
 F: tcg/ia64/
+F: disas/ia64.c

 MIPS target
 M: Aurelien Jarno <aurelien@aurel32.net>
 S: Maintained
 F: tcg/mips/
+F: disas/mips.c

 PPC
 M: Vassili Karpov (malc) <av1474@comtv.ru>
 S: Maintained
 F: tcg/ppc/
-
-PPC64 target
-M: Vassili Karpov (malc) <av1474@comtv.ru>
-S: Maintained
-F: tcg/ppc64/
+F: disas/ppc.c

 S390 target
 M: Alexander Graf <agraf@suse.de>
 M: Richard Henderson <rth@twiddle.net>
 S: Maintained
 F: tcg/s390/
+F: disas/s390.c

 SPARC target
 M: Blue Swirl <blauwirbel@gmail.com>
 S: Maintained
 F: tcg/sparc/
+F: disas/sparc.c

 TCI target
 M: Stefan Weil <sw@weilnetz.de>
 S: Maintained
 F: tcg/tci/
 F: tci.c
+F: disas/tci.c

 Stable branches
 ---------------
@@ -1365,3 +1484,11 @@ M: Stefan Hajnoczi <stefanha@redhat.com>
 L: qemu-block@nongnu.org
 S: Supported
 F: tests/image-fuzzer/
+
+
+Documentation
+-------------
+Build system architecture
+M: Daniel P. Berrange <berrange@redhat.com>
+S: Odd Fixes
+F: docs/build-system.txt
--- a/35
+++ b/35
@@ -52,6 +52,8 @@ endif
 GENERATED_HEADERS = config-host.h qemu-options.def
 GENERATED_HEADERS += qmp-commands.h qapi-types.h qapi-visit.h qapi-event.h
 GENERATED_SOURCES += qmp-marshal.c qapi-types.c qapi-visit.c qapi-event.c
+GENERATED_HEADERS += qmp-introspect.h
+GENERATED_SOURCES += qmp-introspect.c

 GENERATED_HEADERS += trace/generated-events.h
 GENERATED_SOURCES += trace/generated-events.c
@@ -152,15 +154,15 @@ dummy := $(call unnest-vars,, \
                qga-vss-dll-obj-y \
                block-obj-y \
                block-obj-m \
+                crypto-obj-y \
+                crypto-aes-obj-y \
+                qom-obj-y \
                common-obj-y \
                common-obj-m)

 ifneq ($(wildcard config-host.mak),)
 include $(SRC_PATH)/tests/Makefile
 endif
-ifeq ($(CONFIG_SMARTCARD_NSS),y)
-include $(SRC_PATH)/libcacard/Makefile
-endif

 all: $(DOCS) $(TOOLS) $(HELPERS-y) recurse-all modules

@@ -173,6 +175,7 @@ SUBDIR_RULES=$(patsubst %,subdir-%, $(TARGET_DIRS))
 SOFTMMU_SUBDIR_RULES=$(filter %-softmmu,$(SUBDIR_RULES))

 $(SOFTMMU_SUBDIR_RULES): $(block-obj-y)
+$(SOFTMMU_SUBDIR_RULES): $(crypto-obj-y)
 $(SOFTMMU_SUBDIR_RULES): config-all-devices.mak

 subdir-%:
@@ -197,7 +200,7 @@ subdir-dtc:dtc/libfdt dtc/tests
 dtc/%:
 	mkdir -p $@

-$(SUBDIR_RULES): libqemuutil.a libqemustub.a $(common-obj-y)
+$(SUBDIR_RULES): libqemuutil.a libqemustub.a $(common-obj-y) $(qom-obj-y) $(crypto-aes-obj-$(CONFIG_USER_ONLY))

 ROMSUBDIR_RULES=$(patsubst %,romsubdir-%, $(ROMS))
 romsubdir-%:
@@ -227,9 +230,9 @@ util/module.o-cflags = -D'CONFIG_BLOCK_MODULES=$(block-modules)'

 qemu-img.o: qemu-img-cmds.h

-qemu-img$(EXESUF): qemu-img.o $(block-obj-y) libqemuutil.a libqemustub.a
-qemu-nbd$(EXESUF): qemu-nbd.o $(block-obj-y) libqemuutil.a libqemustub.a
-qemu-io$(EXESUF): qemu-io.o $(block-obj-y) libqemuutil.a libqemustub.a
+qemu-img$(EXESUF): qemu-img.o $(block-obj-y) $(crypto-obj-y) $(qom-obj-y) libqemuutil.a libqemustub.a
+qemu-nbd$(EXESUF): qemu-nbd.o $(block-obj-y) $(crypto-obj-y) $(qom-obj-y) libqemuutil.a libqemustub.a
+qemu-io$(EXESUF): qemu-io.o $(block-obj-y) $(crypto-obj-y) $(qom-obj-y) libqemuutil.a libqemustub.a

 qemu-bridge-helper$(EXESUF): qemu-bridge-helper.o

@@ -264,7 +267,7 @@ $(SRC_PATH)/qga/qapi-schema.json $(SRC_PATH)/scripts/qapi-commands.py $(qapi-py)

 qapi-modules = $(SRC_PATH)/qapi-schema.json $(SRC_PATH)/qapi/common.json \
               $(SRC_PATH)/qapi/block.json $(SRC_PATH)/qapi/block-core.json \
-               $(SRC_PATH)/qapi/event.json
+               $(SRC_PATH)/qapi/event.json $(SRC_PATH)/qapi/introspect.json

 qapi-types.c qapi-types.h :\
 $(qapi-modules) $(SRC_PATH)/scripts/qapi-types.py $(qapi-py)
@@ -286,6 +289,11 @@ $(qapi-modules) $(SRC_PATH)/scripts/qapi-commands.py $(qapi-py)
 	$(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/qapi-commands.py \
 		$(gen-out-type) -o "." -m $<, \
 		"  GEN   $@")
+qmp-introspect.h qmp-introspect.c :\
+$(qapi-modules) $(SRC_PATH)/scripts/qapi-introspect.py $(qapi-py)
+	$(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/qapi-introspect.py \
+		$(gen-out-type) -o "." $<, \
+		"  GEN   $@")

 QGALIB_GEN=$(addprefix qga/qapi-generated/, qga-qapi-types.h qga-qapi-visit.h qga-qmp-commands.h)
 $(qga-obj-y) qemu-ga.o: $(QGALIB_GEN)
@@ -344,7 +352,7 @@ qemu-%.tar.bz2:
 	$(SRC_PATH)/scripts/make-release "$(SRC_PATH)" "$(patsubst qemu-%.tar.bz2,%,$@)"

 distclean: clean
-	rm -f config-host.mak config-host.h* config-host.ld $(DOCS) qemu-options.texi qemu-img-cmds.texi qemu-monitor.texi
+	rm -f config-host.mak config-host.h* config-host.ld $(DOCS) qemu-options.texi qemu-img-cmds.texi qemu-monitor.texi qemu-monitor-info.texi
 	rm -f config-all-devices.mak config-all-disas.mak config.status
 	rm -f po/*.mo tests/qemu-iotests/common.env
 	rm -f roms/seabios/config.mak roms/vgabios/config.mak
@@ -511,13 +519,16 @@ qemu-options.texi: $(SRC_PATH)/qemu-options.hx
 qemu-monitor.texi: $(SRC_PATH)/hmp-commands.hx
 	$(call quiet-command,sh $(SRC_PATH)/scripts/hxtool -t < $< > $@,"  GEN   $@")

+qemu-monitor-info.texi: $(SRC_PATH)/hmp-commands-info.hx
+	$(call quiet-command,sh $(SRC_PATH)/scripts/hxtool -t < $< > $@,"  GEN   $@")
+
 qmp-commands.txt: $(SRC_PATH)/qmp-commands.hx
 	$(call quiet-command,sh $(SRC_PATH)/scripts/hxtool -q < $< > $@,"  GEN   $@")

 qemu-img-cmds.texi: $(SRC_PATH)/qemu-img-cmds.hx
 	$(call quiet-command,sh $(SRC_PATH)/scripts/hxtool -t < $< > $@,"  GEN   $@")

-qemu.1: qemu-doc.texi qemu-options.texi qemu-monitor.texi
+qemu.1: qemu-doc.texi qemu-options.texi qemu-monitor.texi qemu-monitor-info.texi
 	$(call quiet-command, \
 	  perl -Ww -- $(SRC_PATH)/scripts/texi2pod.pl $< qemu.pod && \
 	  $(POD2MAN) --section=1 --center=" " --release=" " qemu.pod > $@, \
@@ -560,7 +571,8 @@ pdf: qemu-doc.pdf qemu-tech.pdf

 qemu-doc.dvi qemu-doc.html qemu-doc.info qemu-doc.pdf: \
 	qemu-img.texi qemu-nbd.texi qemu-options.texi \
-	qemu-monitor.texi qemu-img-cmds.texi qemu-ga.texi
+	qemu-monitor.texi qemu-img-cmds.texi qemu-ga.texi \
+	qemu-monitor-info.texi

 ifdef CONFIG_WIN32

@@ -610,6 +622,7 @@ endif # SIGNCODE
                $(if $(DLL_PATH),-DDLLDIR="$(DLL_PATH)") \
                -DSRCDIR="$(SRC_PATH)" \
                -DOUTFILE="$(INSTALLER)" \
+                -DDISPLAYVERSION="$(VERSION)" \
                $(SRC_PATH)/qemu.nsi
 	rm -r ${INSTDIR}
 ifdef SIGNCODE
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -1,8 +1,8 @@
 #######################################################################
 # Common libraries for tools and emulators
 stub-obj-y = stubs/
-util-obj-y = util/ qobject/ qapi/ qapi-types.o qapi-visit.o qapi-event.o
-util-obj-y += crypto/
+util-obj-y = util/ qobject/ qapi/
+util-obj-y += qmp-introspect.o qapi-types.o qapi-visit.o qapi-event.o

 #######################################################################
 # block-obj-y is code used by both qemu system emulation and qemu-img
@@ -21,18 +21,16 @@ block-obj-y += coroutine-$(CONFIG_COROUTINE_BACKEND).o

 block-obj-m = block/

+#######################################################################
+# crypto-obj-y is code used by both qemu system emulation and qemu-img

-######################################################################
-# smartcard
+crypto-obj-y = crypto/
+crypto-aes-obj-y = crypto/

-libcacard-y += libcacard/cac.o libcacard/event.o
-libcacard-y += libcacard/vcard.o libcacard/vreader.o
-libcacard-y += libcacard/vcard_emul_nss.o
-libcacard-y += libcacard/vcard_emul_type.o
-libcacard-y += libcacard/card_7816.o
-libcacard-y += libcacard/vcardt.o
-libcacard/vcard_emul_nss.o-cflags := $(NSS_CFLAGS)
-libcacard/vcard_emul_nss.o-libs := $(NSS_LIBS)
+#######################################################################
+# qom-obj-y is code used by both qemu system emulation and qemu-img
+
+qom-obj-y = qom/

 ######################################################################
 # Target independent part of system emulation. The long term path is to
@@ -75,14 +73,13 @@ common-obj-y += backends/

 common-obj-$(CONFIG_SECCOMP) += qemu-seccomp.o

-common-obj-$(CONFIG_SMARTCARD_NSS) += $(libcacard-y)
-
 common-obj-$(CONFIG_FDT) += device_tree.o

 ######################################################################
 # qapi

 common-obj-y += qmp-marshal.o
+common-obj-y += qmp-introspect.o
 common-obj-y += qmp.o hmp.o
 endif

--- a/Makefile.target
+++ b/Makefile.target
@@ -85,8 +85,11 @@ all: $(PROGS) stap
 #########################################################
 # cpu emulator library
 obj-y = exec.o translate-all.o cpu-exec.o
+obj-y += translate-common.o
+obj-y += cpu-exec-common.o
 obj-y += tcg/tcg.o tcg/tcg-op.o tcg/optimize.o
 obj-$(CONFIG_TCG_INTERPRETER) += tci.o
+obj-y += tcg/tcg-common.o
 obj-$(CONFIG_TCG_INTERPRETER) += disas/tci.o
 obj-y += fpu/softfloat.o
 obj-y += target-$(TARGET_BASE_ARCH)/
@@ -151,7 +154,7 @@ else
 obj-y += hw/$(TARGET_BASE_ARCH)/
 endif

-GENERATED_HEADERS += hmp-commands.h qmp-commands-old.h
+GENERATED_HEADERS += hmp-commands.h hmp-commands-info.h qmp-commands-old.h

 endif # CONFIG_SOFTMMU

@@ -170,12 +173,18 @@ target-obj-y-save := $(target-obj-y)
 dummy := $(call unnest-vars,.., \
               block-obj-y \
               block-obj-m \
+               crypto-obj-y \
+               crypto-aes-obj-y \
+               qom-obj-y \
               common-obj-y \
               common-obj-m)
 target-obj-y := $(target-obj-y-save)
 all-obj-y += $(common-obj-y)
 all-obj-y += $(target-obj-y)
+all-obj-y += $(qom-obj-y)
 all-obj-$(CONFIG_SOFTMMU) += $(block-obj-y)
+all-obj-$(CONFIG_USER_ONLY) += $(crypto-aes-obj-y)
+all-obj-$(CONFIG_SOFTMMU) += $(crypto-obj-y)

 $(QEMU_PROG_BUILD): config-devices.mak

@@ -193,6 +202,9 @@ gdbstub-xml.c: $(TARGET_XML_FILES) $(SRC_PATH)/scripts/feature_to_c.sh
 hmp-commands.h: $(SRC_PATH)/hmp-commands.hx
 	$(call quiet-command,sh $(SRC_PATH)/scripts/hxtool -h < $< > $@,"  GEN   $(TARGET_DIR)$@")

+hmp-commands-info.h: $(SRC_PATH)/hmp-commands-info.hx
+	$(call quiet-command,sh $(SRC_PATH)/scripts/hxtool -h < $< > $@,"  GEN   $(TARGET_DIR)$@")
+
 qmp-commands-old.h: $(SRC_PATH)/qmp-commands.hx
 	$(call quiet-command,sh $(SRC_PATH)/scripts/hxtool -h < $< > $@,"  GEN   $(TARGET_DIR)$@")

--- a/108
+++ b/108
@@ -1,3 +1,107 @@
-Read the documentation in qemu-doc.html or on http://wiki.qemu-project.org
+         QEMU README
+         ===========

- QEMU team
+QEMU is a generic and open source machine & userspace emulator and
+virtualizer.
+
+QEMU is capable of emulating a complete machine in software without any
+need for hardware virtualization support. By using dynamic translation,
+it achieves very good performance. QEMU can also integrate with the Xen
+and KVM hypervisors to provide emulated hardware while allowing the
+hypervisor to manage the CPU. With hypervisor support, QEMU can achieve
+near native performance for CPUs. When QEMU emulates CPUs directly it is
+capable of running operating systems made for one machine (e.g. an ARMv7
+board) on a different machine (e.g. an x86_64 PC board).
+
+QEMU is also capable of providing userspace API virtualization for Linux
+and BSD kernel interfaces. This allows binaries compiled against one
+architecture ABI (e.g. the Linux PPC64 ABI) to be run on a host using a
+different architecture ABI (e.g. the Linux x86_64 ABI). This does not
+involve any hardware emulation, simply CPU and syscall emulation.
+
+QEMU aims to fit into a variety of use cases. It can be invoked directly
+by users wishing to have full control over its behaviour and settings.
+It also aims to facilitate integration into higher level management
+layers, by providing a stable command line interface and monitor API.
+It is commonly invoked indirectly via the libvirt library when using
+open source applications such as oVirt, OpenStack and virt-manager.
+
+QEMU as a whole is released under the GNU General Public License,
+version 2. For full licensing details, consult the LICENSE file.
+
+
+Building
+========
+
+QEMU is multi-platform software intended to be buildable on all modern
+Linux platforms, OS-X, Win32 (via the Mingw64 toolchain) and a variety
+of other UNIX targets. The simple steps to build QEMU are:
+
+  mkdir build
+  cd build
+  ../configure
+  make
+
+Complete details of the process for building and configuring QEMU for
+all supported host platforms can be found in the qemu-tech.html file.
+Additional information can also be found online via the QEMU website:
+
+  http://qemu-project.org/Hosts/Linux
+  http://qemu-project.org/Hosts/W32
+
+
+Submitting patches
+==================
+
+The QEMU source code is maintained under the GIT version control system.
+
+   git clone git://git.qemu-project.org/qemu.git
+
+When submitting patches, the preferred approach is to use 'git
+format-patch' and/or 'git send-email' to format & send the mail to the
+qemu-devel@nongnu.org mailing list. All patches submitted must contain
+a 'Signed-off-by' line from the author. Patches should follow the
+guidelines set out in the HACKING and CODING_STYLE files.
+
+Additional information on submitting patches can be found online via
+the QEMU website
+
+  http://qemu-project.org/Contribute/SubmitAPatch
+  http://qemu-project.org/Contribute/TrivialPatches
+
+
+Bug reporting
+=============
+
+The QEMU project uses Launchpad as its primary upstream bug tracker. Bugs
+found when running code built from QEMU git or upstream released sources
+should be reported via:
+
+  https://bugs.launchpad.net/qemu/
+
+If using QEMU via an operating system vendor pre-built binary package, it
+is preferable to report bugs to the vendor's own bug tracker first. If
+the bug is also known to affect latest upstream code, it can also be
+reported via launchpad.
+
+For additional information on bug reporting consult:
+
+  http://qemu-project.org/Contribute/ReportABug
+
+
+Contact
+=======
+
+The QEMU community can be contacted in a number of ways, with the two
+main methods being email and IRC
+
+ - qemu-devel@nongnu.org
+   http://lists.nongnu.org/mailman/listinfo/qemu-devel
+ - #qemu on irc.oftc.net
+
+Information on additional methods of contacting the community can be
+found online via the QEMU website:
+
+  http://qemu-project.org/Contribute/StartHere
+
+-- End
--- a/backends/baum.c
+++ b/backends/baum.c
@@ -561,7 +561,10 @@ static void baum_close(struct CharDriverState *chr)
    g_free(baum);
 }

-CharDriverState *chr_baum_init(void)
+static CharDriverState *chr_baum_init(const char *id,
+                                      ChardevBackend *backend,
+                                      ChardevReturn *ret,
+                                      Error **errp)
 {
    BaumDriverState *baum;
    CharDriverState *chr;
@@ -586,14 +589,16 @@ CharDriverState *chr_baum_init(void)

    baum->brlapi_fd = brlapi__openConnection(handle, NULL, NULL);
    if (baum->brlapi_fd == -1) {
-        brlapi_perror("baum_init: brlapi_openConnection");
+        error_setg(errp, "brlapi__openConnection: %s",
+                   brlapi_strerror(brlapi_error_location()));
        goto fail_handle;
    }

    baum->cellCount_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, baum_cellCount_timer_cb, baum);

    if (brlapi__getDisplaySize(handle, &baum->x, &baum->y) == -1) {
-        brlapi_perror("baum_init: brlapi_getDisplaySize");
+        error_setg(errp, "brlapi__getDisplaySize: %s",
+                   brlapi_strerror(brlapi_error_location()));
        goto fail;
    }

@@ -609,7 +614,8 @@ CharDriverState *chr_baum_init(void)
        tty = BRLAPI_TTY_DEFAULT;

    if (brlapi__enterTtyMode(handle, tty, NULL) == -1) {
-        brlapi_perror("baum_init: brlapi_enterTtyMode");
+        error_setg(errp, "brlapi__enterTtyMode: %s",
+                   brlapi_strerror(brlapi_error_location()));
        goto fail;
    }

@@ -629,7 +635,8 @@ fail_handle:

 static void register_types(void)
 {
-    register_char_driver("braille", CHARDEV_BACKEND_KIND_BRAILLE, NULL);
+    register_char_driver("braille", CHARDEV_BACKEND_KIND_BRAILLE, NULL,
+                         chr_baum_init);
 }

 type_init(register_types);
--- a/backends/msmouse.c
+++ b/backends/msmouse.c
@@ -63,7 +63,10 @@ static void msmouse_chr_close (struct CharDriverState *chr)
    g_free (chr);
 }

-CharDriverState *qemu_chr_open_msmouse(void)
+static CharDriverState *qemu_chr_open_msmouse(const char *id,
+                                              ChardevBackend *backend,
+                                              ChardevReturn *ret,
+                                              Error **errp)
 {
    CharDriverState *chr;

@@ -79,7 +82,8 @@ CharDriverState *qemu_chr_open_msmouse(void)

 static void register_types(void)
 {
-    register_char_driver("msmouse", CHARDEV_BACKEND_KIND_MSMOUSE, NULL);
+    register_char_driver("msmouse", CHARDEV_BACKEND_KIND_MSMOUSE, NULL,
+                         qemu_chr_open_msmouse);
 }

 type_init(register_types);
--- a/backends/testdev.c
+++ b/backends/testdev.c
@@ -108,13 +108,16 @@ static void testdev_close(struct CharDriverState *chr)
    g_free(testdev);
 }

-CharDriverState *chr_testdev_init(void)
+static CharDriverState *chr_testdev_init(const char *id,
+                                         ChardevBackend *backend,
+                                         ChardevReturn *ret,
+                                         Error **errp)
 {
    TestdevCharState *testdev;
    CharDriverState *chr;

-    testdev = g_malloc0(sizeof(TestdevCharState));
-    testdev->chr = chr = g_malloc0(sizeof(CharDriverState));
+    testdev = g_new0(TestdevCharState, 1);
+    testdev->chr = chr = g_new0(CharDriverState, 1);

    chr->opaque = testdev;
    chr->chr_write = testdev_write;
@@ -125,7 +128,8 @@ CharDriverState *chr_testdev_init(void)

 static void register_types(void)
 {
-    register_char_driver("testdev", CHARDEV_BACKEND_KIND_TESTDEV, NULL);
+    register_char_driver("testdev", CHARDEV_BACKEND_KIND_TESTDEV, NULL,
+                         chr_testdev_init);
 }

 type_init(register_types);
--- a/block.c
+++ b/block.c
@@ -721,7 +721,7 @@ const BdrvChildRole child_format = {
 };

 /*
- * Returns the flags that bs->backing_hd should get, based on the given flags
+ * Returns the flags that bs->backing should get, based on the given flags
 * for the parent BDS
 */
 static int bdrv_backing_flags(int flags)
@@ -763,12 +763,15 @@ static void bdrv_assign_node_name(BlockDriverState *bs,
                                  const char *node_name,
                                  Error **errp)
 {
-    if (!node_name) {
-        return;
-    }
+    char *gen_node_name = NULL;

-    /* Check for empty string or invalid characters */
-    if (!id_wellformed(node_name)) {
+    if (!node_name) {
+        node_name = gen_node_name = id_generate(ID_BLOCK);
+    } else if (!id_wellformed(node_name)) {
+        /*
+         * Check for empty string or invalid characters, but not if it is
+         * generated (generated names use characters not available to the user)
+         */
        error_setg(errp, "Invalid node name");
        return;
    }
@@ -777,18 +780,20 @@ static void bdrv_assign_node_name(BlockDriverState *bs,
    if (blk_by_name(node_name)) {
        error_setg(errp, "node-name=%s is conflicting with a device id",
                   node_name);
-        return;
+        goto out;
    }

    /* takes care of avoiding duplicates node names */
    if (bdrv_find_node(node_name)) {
        error_setg(errp, "Duplicate node name");
-        return;
+        goto out;
    }

    /* copy node name into the bs and insert it into the graph list */
    pstrcpy(bs->node_name, sizeof(bs->node_name), node_name);
    QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list);
+out:
+    g_free(gen_node_name);
 }

 static QemuOptsList bdrv_runtime_opts = {
@@ -809,7 +814,7 @@ static QemuOptsList bdrv_runtime_opts = {
 *
 * Removes all processed options from *options.
 */
-static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file,
+static int bdrv_open_common(BlockDriverState *bs, BdrvChild *file,
    QDict *options, int flags, BlockDriver *drv, Error **errp)
 {
    int ret, open_flags;
@@ -823,7 +828,7 @@ static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file,
    assert(options != NULL && bs->options != options);

    if (file != NULL) {
-        filename = file->filename;
+        filename = file->bs->filename;
    } else {
        filename = qdict_get_try_str(options, "filename");
    }
@@ -1090,6 +1095,7 @@ static BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
    };

    QLIST_INSERT_HEAD(&parent_bs->children, child, next);
+    QLIST_INSERT_HEAD(&child_bs->parents, child, next_parent);

    return child;
 }
@@ -1097,50 +1103,62 @@ static BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
 static void bdrv_detach_child(BdrvChild *child)
 {
    QLIST_REMOVE(child, next);
+    QLIST_REMOVE(child, next_parent);
    g_free(child);
 }

 void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child)
 {
-    BlockDriverState *child_bs = child->bs;
+    BlockDriverState *child_bs;
+
+    if (child == NULL) {
+        return;
+    }

    if (child->bs->inherits_from == parent) {
        child->bs->inherits_from = NULL;
    }

+    child_bs = child->bs;
    bdrv_detach_child(child);
    bdrv_unref(child_bs);
 }

+/*
+ * Sets the backing file link of a BDS. A new reference is created; callers
+ * which don't need their own reference any more must call bdrv_unref().
+ */
 void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd)
 {
+    if (backing_hd) {
+        bdrv_ref(backing_hd);
+    }

-    if (bs->backing_hd) {
+    if (bs->backing) {
        assert(bs->backing_blocker);
-        bdrv_op_unblock_all(bs->backing_hd, bs->backing_blocker);
-        bdrv_detach_child(bs->backing_child);
+        bdrv_op_unblock_all(bs->backing->bs, bs->backing_blocker);
+        bdrv_unref_child(bs, bs->backing);
    } else if (backing_hd) {
        error_setg(&bs->backing_blocker,
                   "node is used as backing hd of '%s'",
                   bdrv_get_device_or_node_name(bs));
    }

-    bs->backing_hd = backing_hd;
    if (!backing_hd) {
        error_free(bs->backing_blocker);
        bs->backing_blocker = NULL;
-        bs->backing_child = NULL;
+        bs->backing = NULL;
        goto out;
    }
-    bs->backing_child = bdrv_attach_child(bs, backing_hd, &child_backing);
+    bs->backing = bdrv_attach_child(bs, backing_hd, &child_backing);
    bs->open_flags &= ~BDRV_O_NO_BACKING;
    pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename);
    pstrcpy(bs->backing_format, sizeof(bs->backing_format),
            backing_hd->drv ? backing_hd->drv->format_name : "");

-    bdrv_op_block_all(bs->backing_hd, bs->backing_blocker);
+    bdrv_op_block_all(backing_hd, bs->backing_blocker);
    /* Otherwise we won't be able to commit due to check in bdrv_commit */
-    bdrv_op_unblock(bs->backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET,
+    bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET,
                    bs->backing_blocker);
 out:
    bdrv_refresh_limits(bs, NULL);
@@ -1161,7 +1179,7 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp)
    BlockDriverState *backing_hd;
    Error *local_err = NULL;

-    if (bs->backing_hd != NULL) {
+    if (bs->backing != NULL) {
        QDECREF(options);
        goto free_exit;
    }
@@ -1201,7 +1219,7 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp)
        qdict_put(options, "driver", qstring_from_str(bs->backing_format));
    }

-    assert(bs->backing_hd == NULL);
+    assert(bs->backing == NULL);
    ret = bdrv_open_inherit(&backing_hd,
                            *backing_filename ? backing_filename : NULL,
                            NULL, options, 0, bs, &child_backing, &local_err);
@@ -1215,7 +1233,10 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp)
        goto free_exit;
    }

+    /* Hook up the backing file link; drop our reference, bs owns the
+     * backing_hd reference now */
    bdrv_set_backing_hd(bs, backing_hd);
+    bdrv_unref(backing_hd);

 free_exit:
    g_free(backing_filename);
@@ -1279,40 +1300,6 @@ done:
    return c;
 }

-/*
- * This is a version of bdrv_open_child() that returns 0/-EINVAL instead of
- * a BdrvChild object.
- *
- * If allow_none is true, no image will be opened if filename is false and no
- * BlockdevRef is given. *pbs will remain unchanged and 0 will be returned.
- *
- * To conform with the behavior of bdrv_open(), *pbs has to be NULL.
- */
-int bdrv_open_image(BlockDriverState **pbs, const char *filename,
-                    QDict *options, const char *bdref_key,
-                    BlockDriverState* parent, const BdrvChildRole *child_role,
-                    bool allow_none, Error **errp)
-{
-    Error *local_err = NULL;
-    BdrvChild *c;
-
-    assert(pbs);
-    assert(*pbs == NULL);
-
-    c = bdrv_open_child(filename, options, bdref_key, parent, child_role,
-                        allow_none, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        return -EINVAL;
-    }
-
-    if (c != NULL) {
-        *pbs = c->bs;
-    }
-
-    return 0;
-}
-
 int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp)
 {
    /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
@@ -1401,7 +1388,8 @@ static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
                             const BdrvChildRole *child_role, Error **errp)
 {
    int ret;
-    BlockDriverState *file = NULL, *bs;
+    BdrvChild *file = NULL;
+    BlockDriverState *bs;
    BlockDriver *drv = NULL;
    const char *drvname;
    Error *local_err = NULL;
@@ -1485,11 +1473,12 @@ static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
            flags = bdrv_backing_flags(flags);
        }

-        assert(file == NULL);
        bs->open_flags = flags;
-        ret = bdrv_open_image(&file, filename, options, "file",
-                              bs, &child_file, true, &local_err);
-        if (ret < 0) {
+
+        file = bdrv_open_child(filename, options, "file", bs,
+                               &child_file, true, &local_err);
+        if (local_err) {
+            ret = -EINVAL;
            goto fail;
        }
    }
@@ -1497,7 +1486,7 @@ static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
    /* Image format probing */
    bs->probed = !drv;
    if (!drv && file) {
-        ret = find_image_format(file, filename, &drv, &local_err);
+        ret = find_image_format(file->bs, filename, &drv, &local_err);
        if (ret < 0) {
            goto fail;
        }
@@ -1520,7 +1509,7 @@ static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
    }

    if (file && (bs->file != file)) {
-        bdrv_unref(file);
+        bdrv_unref_child(bs, file);
        file = NULL;
    }

@@ -1537,15 +1526,6 @@ static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,

    bdrv_refresh_filename(bs);

-    /* For snapshot=on, create a temporary qcow2 overlay. bs points to the
-     * temporary snapshot afterwards. */
-    if (snapshot_flags) {
-        ret = bdrv_append_temp_snapshot(bs, snapshot_flags, &local_err);
-        if (local_err) {
-            goto close_and_fail;
-        }
-    }
-
    /* Check if any unknown options were used */
    if (options && (qdict_size(options) != 0)) {
        const QDictEntry *entry = qdict_first(options);
@@ -1577,11 +1557,21 @@ static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,

    QDECREF(options);
    *pbs = bs;
+
+    /* For snapshot=on, create a temporary qcow2 overlay. bs points to the
+     * temporary snapshot afterwards. */
+    if (snapshot_flags) {
+        ret = bdrv_append_temp_snapshot(bs, snapshot_flags, &local_err);
+        if (local_err) {
+            goto close_and_fail;
+        }
+    }
+
    return 0;

 fail:
    if (file != NULL) {
-        bdrv_unref(file);
+        bdrv_unref_child(bs, file);
    }
    QDECREF(bs->options);
    QDECREF(options);
@@ -1907,6 +1897,12 @@ void bdrv_close(BlockDriverState *bs)
    if (bs->job) {
        block_job_cancel_sync(bs->job);
    }
+
+    /* Disable I/O limits and drain all pending throttled requests */
+    if (bs->io_limits_enabled) {
+        bdrv_io_limits_disable(bs);
+    }
+
    bdrv_drain(bs); /* complete I/O */
    bdrv_flush(bs);
    bdrv_drain(bs); /* in case flush left pending I/O */
@@ -1916,11 +1912,13 @@ void bdrv_close(BlockDriverState *bs)
        BdrvChild *child, *next;

        bs->drv->bdrv_close(bs);
+        bs->drv = NULL;

-        if (bs->backing_hd) {
-            BlockDriverState *backing_hd = bs->backing_hd;
-            bdrv_set_backing_hd(bs, NULL);
-            bdrv_unref(backing_hd);
+        bdrv_set_backing_hd(bs, NULL);
+
+        if (bs->file != NULL) {
+            bdrv_unref_child(bs, bs->file);
+            bs->file = NULL;
        }

        QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
@@ -1934,7 +1932,6 @@ void bdrv_close(BlockDriverState *bs)

        g_free(bs->opaque);
        bs->opaque = NULL;
-        bs->drv = NULL;
        bs->copy_on_read = 0;
        bs->backing_file[0] = '\0';
        bs->backing_format[0] = '\0';
@@ -1947,22 +1944,12 @@ void bdrv_close(BlockDriverState *bs)
        bs->options = NULL;
        QDECREF(bs->full_open_options);
        bs->full_open_options = NULL;
-
-        if (bs->file != NULL) {
-            bdrv_unref(bs->file);
-            bs->file = NULL;
-        }
    }

    if (bs->blk) {
        blk_dev_change_media_cb(bs->blk, false);
    }

-    /*throttling disk I/O limits*/
-    if (bs->io_limits_enabled) {
-        bdrv_io_limits_disable(bs);
-    }
-
    QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
        g_free(ban);
    }
@@ -2004,13 +1991,7 @@ void bdrv_make_anon(BlockDriverState *bs)
    bs->node_name[0] = '\0';
 }

-static void bdrv_rebind(BlockDriverState *bs)
-{
-    if (bs->drv && bs->drv->bdrv_rebind) {
-        bs->drv->bdrv_rebind(bs);
-    }
-}
-
+/* Fields that need to stay with the top-level BDS */
 static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
                                     BlockDriverState *bs_src)
 {
@@ -2022,20 +2003,6 @@ static void bdrv_move_feature_fields(BlockDriverState *bs_dest,

    bs_dest->enable_write_cache = bs_src->enable_write_cache;

-    /* i/o throttled req */
-    bs_dest->throttle_state     = bs_src->throttle_state,
-    bs_dest->io_limits_enabled  = bs_src->io_limits_enabled;
-    bs_dest->pending_reqs[0]    = bs_src->pending_reqs[0];
-    bs_dest->pending_reqs[1]    = bs_src->pending_reqs[1];
-    bs_dest->throttled_reqs[0]  = bs_src->throttled_reqs[0];
-    bs_dest->throttled_reqs[1]  = bs_src->throttled_reqs[1];
-    memcpy(&bs_dest->round_robin,
-           &bs_src->round_robin,
-           sizeof(bs_dest->round_robin));
-    memcpy(&bs_dest->throttle_timers,
-           &bs_src->throttle_timers,
-           sizeof(ThrottleTimers));
-
    /* r/w error */
    bs_dest->on_read_error      = bs_src->on_read_error;
    bs_dest->on_write_error     = bs_src->on_write_error;
@@ -2046,125 +2013,45 @@ static void bdrv_move_feature_fields(BlockDriverState *bs_dest,

    /* dirty bitmap */
    bs_dest->dirty_bitmaps      = bs_src->dirty_bitmaps;
-
-    /* reference count */
-    bs_dest->refcnt             = bs_src->refcnt;
-
-    /* job */
-    bs_dest->job                = bs_src->job;
-
-    /* keep the same entry in bdrv_states */
-    bs_dest->device_list = bs_src->device_list;
-    bs_dest->blk = bs_src->blk;
-
-    memcpy(bs_dest->op_blockers, bs_src->op_blockers,
-           sizeof(bs_dest->op_blockers));
 }

-/*
- * Swap bs contents for two image chains while they are live,
- * while keeping required fields on the BlockDriverState that is
- * actually attached to a device.
- *
- * This will modify the BlockDriverState fields, and swap contents
- * between bs_new and bs_old. Both bs_new and bs_old are modified.
- *
- * bs_new must not be attached to a BlockBackend.
- *
- * This function does not create any image files.
- */
-void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old)
+static void change_parent_backing_link(BlockDriverState *from,
+                                       BlockDriverState *to)
+{
+    BdrvChild *c, *next;
+
+    QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) {
+        assert(c->role != &child_backing);
+        c->bs = to;
+        QLIST_REMOVE(c, next_parent);
+        QLIST_INSERT_HEAD(&to->parents, c, next_parent);
+        bdrv_ref(to);
+        bdrv_unref(from);
+    }
+    if (from->blk) {
+        blk_set_bs(from->blk, to);
+        if (!to->device_list.tqe_prev) {
+            QTAILQ_INSERT_BEFORE(from, to, device_list);
+        }
+        QTAILQ_REMOVE(&bdrv_states, from, device_list);
+    }
+}
+
+static void swap_feature_fields(BlockDriverState *bs_top,
+                                BlockDriverState *bs_new)
 {
    BlockDriverState tmp;
-    BdrvChild *child;

-    bdrv_drain(bs_new);
-    bdrv_drain(bs_old);
-
-    /* The code needs to swap the node_name but simply swapping node_list won't
-     * work so first remove the nodes from the graph list, do the swap then
-     * insert them back if needed.
-     */
-    if (bs_new->node_name[0] != '\0') {
-        QTAILQ_REMOVE(&graph_bdrv_states, bs_new, node_list);
-    }
-    if (bs_old->node_name[0] != '\0') {
-        QTAILQ_REMOVE(&graph_bdrv_states, bs_old, node_list);
-    }
-
-    /* If the BlockDriverState is part of a throttling group acquire
-     * its lock since we're going to mess with the protected fields.
-     * Otherwise there's no need to worry since no one else can touch
-     * them. */
-    if (bs_old->throttle_state) {
-        throttle_group_lock(bs_old);
-    }
-
-    /* bs_new must be unattached and shouldn't have anything fancy enabled */
-    assert(!bs_new->blk);
-    assert(QLIST_EMPTY(&bs_new->dirty_bitmaps));
-    assert(bs_new->job == NULL);
-    assert(bs_new->io_limits_enabled == false);
-    assert(bs_new->throttle_state == NULL);
-    assert(!throttle_timers_are_initialized(&bs_new->throttle_timers));
-
-    tmp = *bs_new;
-    *bs_new = *bs_old;
-    *bs_old = tmp;
-
-    /* there are some fields that should not be swapped, move them back */
-    bdrv_move_feature_fields(&tmp, bs_old);
-    bdrv_move_feature_fields(bs_old, bs_new);
+    bdrv_move_feature_fields(&tmp, bs_top);
+    bdrv_move_feature_fields(bs_top, bs_new);
    bdrv_move_feature_fields(bs_new, &tmp);

-    /* bs_new must remain unattached */
-    assert(!bs_new->blk);
-
-    /* Check a few fields that should remain attached to the device */
-    assert(bs_new->job == NULL);
-    assert(bs_new->io_limits_enabled == false);
-    assert(bs_new->throttle_state == NULL);
-    assert(!throttle_timers_are_initialized(&bs_new->throttle_timers));
-
-    /* Release the ThrottleGroup lock */
-    if (bs_old->throttle_state) {
-        throttle_group_unlock(bs_old);
+    assert(!bs_new->throttle_state);
+    if (bs_top->throttle_state) {
+        assert(bs_top->io_limits_enabled);
+        bdrv_io_limits_enable(bs_new, throttle_group_get_name(bs_top));
+        bdrv_io_limits_disable(bs_top);
    }
-
-    /* insert the nodes back into the graph node list if needed */
-    if (bs_new->node_name[0] != '\0') {
-        QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_new, node_list);
-    }
-    if (bs_old->node_name[0] != '\0') {
-        QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_old, node_list);
-    }
-
-    /*
-     * Update lh_first.le_prev for non-empty lists.
-     *
-     * The head of the op blocker list doesn't change because it is moved back
-     * in bdrv_move_feature_fields().
-     */
-    assert(QLIST_EMPTY(&bs_old->tracked_requests));
-    assert(QLIST_EMPTY(&bs_new->tracked_requests));
-
-    QLIST_FIX_HEAD_PTR(&bs_new->children, next);
-    QLIST_FIX_HEAD_PTR(&bs_old->children, next);
-
-    /* Update references in bs->opaque and children */
-    QLIST_FOREACH(child, &bs_old->children, next) {
-        if (child->bs->inherits_from == bs_new) {
-            child->bs->inherits_from = bs_old;
-        }
-    }
-    QLIST_FOREACH(child, &bs_new->children, next) {
-        if (child->bs->inherits_from == bs_old) {
-            child->bs->inherits_from = bs_new;
-        }
-    }
-
-    bdrv_rebind(bs_new);
-    bdrv_rebind(bs_old);
 }

 /*
@@ -2177,14 +2064,59 @@ void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old)
 * bs_new must not be attached to a BlockBackend.
 *
 * This function does not create any image files.
+ *
+ * bdrv_append() takes ownership of a bs_new reference and unrefs it because
+ * that's what the callers commonly need. bs_new will be referenced by the old
+ * parents of bs_top after bdrv_append() returns. If the caller needs to keep a
+ * reference of its own, it must call bdrv_ref().
 */
 void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
 {
-    bdrv_swap(bs_new, bs_top);
+    assert(!bdrv_requests_pending(bs_top));
+    assert(!bdrv_requests_pending(bs_new));

-    /* The contents of 'tmp' will become bs_top, as we are
-     * swapping bs_new and bs_top contents. */
-    bdrv_set_backing_hd(bs_top, bs_new);
+    bdrv_ref(bs_top);
+    change_parent_backing_link(bs_top, bs_new);
+
+    /* Some fields always stay on top of the backing file chain */
+    swap_feature_fields(bs_top, bs_new);
+
+    bdrv_set_backing_hd(bs_new, bs_top);
+    bdrv_unref(bs_top);
+
+    /* bs_new is now referenced by its new parents, we don't need the
+     * additional reference any more. */
+    bdrv_unref(bs_new);
+}
+
+void bdrv_replace_in_backing_chain(BlockDriverState *old, BlockDriverState *new)
+{
+    assert(!bdrv_requests_pending(old));
+    assert(!bdrv_requests_pending(new));
+
+    bdrv_ref(old);
+
+    if (old->blk) {
+        /* As long as these fields aren't in BlockBackend, but in the top-level
+         * BlockDriverState, it's not possible for a BDS to have two BBs.
+         *
+         * We really want to copy the fields from old to new, but we go for a
+         * swap instead so that pointers aren't duplicated and cause trouble.
+         * (Also, bdrv_swap() used to do the same.) */
+        assert(!new->blk);
+        swap_feature_fields(old, new);
+    }
+    change_parent_backing_link(old, new);
+
+    /* Change backing files if a previously independent node is added to the
+     * chain. For active commit, we replace top by its own (indirect) backing
+     * file and don't do anything here so we don't build a loop. */
+    if (new->backing == NULL && !bdrv_chain_contains(backing_bs(old), new)) {
+        bdrv_set_backing_hd(new, backing_bs(old));
+        bdrv_set_backing_hd(old, NULL);
+    }
+
+    bdrv_unref(old);
 }

 static void bdrv_delete(BlockDriverState *bs)
@@ -2236,20 +2168,20 @@ int bdrv_commit(BlockDriverState *bs)
    if (!drv)
        return -ENOMEDIUM;

-    if (!bs->backing_hd) {
+    if (!bs->backing) {
        return -ENOTSUP;
    }

    if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, NULL) ||
-        bdrv_op_is_blocked(bs->backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET, NULL)) {
+        bdrv_op_is_blocked(bs->backing->bs, BLOCK_OP_TYPE_COMMIT_TARGET, NULL)) {
        return -EBUSY;
    }

-    ro = bs->backing_hd->read_only;
-    open_flags =  bs->backing_hd->open_flags;
+    ro = bs->backing->bs->read_only;
+    open_flags =  bs->backing->bs->open_flags;

    if (ro) {
-        if (bdrv_reopen(bs->backing_hd, open_flags | BDRV_O_RDWR, NULL)) {
+        if (bdrv_reopen(bs->backing->bs, open_flags | BDRV_O_RDWR, NULL)) {
            return -EACCES;
        }
    }
@@ -2260,7 +2192,7 @@ int bdrv_commit(BlockDriverState *bs)
        goto ro_cleanup;
    }

-    backing_length = bdrv_getlength(bs->backing_hd);
+    backing_length = bdrv_getlength(bs->backing->bs);
    if (backing_length < 0) {
        ret = backing_length;
        goto ro_cleanup;
@@ -2270,7 +2202,7 @@ int bdrv_commit(BlockDriverState *bs)
     * grow the backing file image if possible.  If not possible,
     * we must return an error */
    if (length > backing_length) {
-        ret = bdrv_truncate(bs->backing_hd, length);
+        ret = bdrv_truncate(bs->backing->bs, length);
        if (ret < 0) {
            goto ro_cleanup;
        }
@@ -2279,7 +2211,7 @@ int bdrv_commit(BlockDriverState *bs)
    total_sectors = length >> BDRV_SECTOR_BITS;

    /* qemu_try_blockalign() for bs will choose an alignment that works for
-     * bs->backing_hd as well, so no need to compare the alignment manually. */
+     * bs->backing->bs as well, so no need to compare the alignment manually. */
    buf = qemu_try_blockalign(bs, COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
    if (buf == NULL) {
        ret = -ENOMEM;
@@ -2297,7 +2229,7 @@ int bdrv_commit(BlockDriverState *bs)
                goto ro_cleanup;
            }

-            ret = bdrv_write(bs->backing_hd, sector, buf, n);
+            ret = bdrv_write(bs->backing->bs, sector, buf, n);
            if (ret < 0) {
                goto ro_cleanup;
            }
@@ -2316,8 +2248,8 @@ int bdrv_commit(BlockDriverState *bs)
     * Make sure all data we wrote to the backing device is actually
     * stable on disk.
     */
-    if (bs->backing_hd) {
-        bdrv_flush(bs->backing_hd);
+    if (bs->backing) {
+        bdrv_flush(bs->backing->bs);
    }

    ret = 0;
@@ -2326,7 +2258,7 @@ ro_cleanup:

    if (ro) {
        /* ignoring error return here */
-        bdrv_reopen(bs->backing_hd, open_flags & ~BDRV_O_RDWR, NULL);
+        bdrv_reopen(bs->backing->bs, open_flags & ~BDRV_O_RDWR, NULL);
    }

    return ret;
@@ -2340,7 +2272,7 @@ int bdrv_commit_all(void)
        AioContext *aio_context = bdrv_get_aio_context(bs);

        aio_context_acquire(aio_context);
-        if (bs->drv && bs->backing_hd) {
+        if (bs->drv && bs->backing) {
            int ret = bdrv_commit(bs);
            if (ret < 0) {
                aio_context_release(aio_context);
@@ -2397,8 +2329,8 @@ int bdrv_change_backing_file(BlockDriverState *bs,
 BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
                                    BlockDriverState *bs)
 {
-    while (active && bs != active->backing_hd) {
-        active = active->backing_hd;
+    while (active && bs != backing_bs(active)) {
+        active = backing_bs(active);
    }

    return active;
@@ -2410,12 +2342,6 @@ BlockDriverState *bdrv_find_base(BlockDriverState *bs)
    return bdrv_find_overlay(bs, NULL);
 }

-typedef struct BlkIntermediateStates {
-    BlockDriverState *bs;
-    QSIMPLEQ_ENTRY(BlkIntermediateStates) entry;
-} BlkIntermediateStates;
-
-
 /*
 * Drops images above 'base' up to and including 'top', and sets the image
 * above 'top' to have base as its backing file.
@@ -2448,15 +2374,9 @@ typedef struct BlkIntermediateStates {
 int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
                           BlockDriverState *base, const char *backing_file_str)
 {
-    BlockDriverState *intermediate;
-    BlockDriverState *base_bs = NULL;
    BlockDriverState *new_top_bs = NULL;
-    BlkIntermediateStates *intermediate_state, *next;
    int ret = -EIO;

-    QSIMPLEQ_HEAD(states_to_delete, BlkIntermediateStates) states_to_delete;
-    QSIMPLEQ_INIT(&states_to_delete);
-
    if (!top->drv || !base->drv) {
        goto exit;
    }
@@ -2468,55 +2388,29 @@ int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
        goto exit;
    }

-    /* special case of new_top_bs->backing_hd already pointing to base - nothing
+    /* special case of new_top_bs->backing->bs already pointing to base - nothing
     * to do, no intermediate images */
-    if (new_top_bs->backing_hd == base) {
+    if (backing_bs(new_top_bs) == base) {
        ret = 0;
        goto exit;
    }

-    intermediate = top;
-
-    /* now we will go down through the list, and add each BDS we find
-     * into our deletion queue, until we hit the 'base'
-     */
-    while (intermediate) {
-        intermediate_state = g_new0(BlkIntermediateStates, 1);
-        intermediate_state->bs = intermediate;
-        QSIMPLEQ_INSERT_TAIL(&states_to_delete, intermediate_state, entry);
-
-        if (intermediate->backing_hd == base) {
-            base_bs = intermediate->backing_hd;
-            break;
-        }
-        intermediate = intermediate->backing_hd;
-    }
-    if (base_bs == NULL) {
-        /* something went wrong, we did not end at the base. safely
-         * unravel everything, and exit with error */
+    /* Make sure that base is in the backing chain of top */
+    if (!bdrv_chain_contains(top, base)) {
        goto exit;
    }

    /* success - we can delete the intermediate states, and link top->base */
-    backing_file_str = backing_file_str ? backing_file_str : base_bs->filename;
+    backing_file_str = backing_file_str ? backing_file_str : base->filename;
    ret = bdrv_change_backing_file(new_top_bs, backing_file_str,
-                                   base_bs->drv ? base_bs->drv->format_name : "");
+                                   base->drv ? base->drv->format_name : "");
    if (ret) {
        goto exit;
    }
-    bdrv_set_backing_hd(new_top_bs, base_bs);
+    bdrv_set_backing_hd(new_top_bs, base);

-    QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
-        /* so that bdrv_close() does not recursively close the chain */
-        bdrv_set_backing_hd(intermediate_state->bs, NULL);
-        bdrv_unref(intermediate_state->bs);
-    }
    ret = 0;
-
 exit:
-    QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
-        g_free(intermediate_state);
-    }
    return ret;
 }

@@ -2559,7 +2453,7 @@ int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
        return drv->bdrv_get_allocated_file_size(bs);
    }
    if (bs->file) {
-        return bdrv_get_allocated_file_size(bs->file);
+        return bdrv_get_allocated_file_size(bs->file->bs);
    }
    return -ENOTSUP;
 }
@@ -2708,25 +2602,27 @@ void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce)

 int bdrv_is_encrypted(BlockDriverState *bs)
 {
-    if (bs->backing_hd && bs->backing_hd->encrypted)
+    if (bs->backing && bs->backing->bs->encrypted) {
        return 1;
+    }
    return bs->encrypted;
 }

 int bdrv_key_required(BlockDriverState *bs)
 {
-    BlockDriverState *backing_hd = bs->backing_hd;
+    BdrvChild *backing = bs->backing;

-    if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
+    if (backing && backing->bs->encrypted && !backing->bs->valid_key) {
        return 1;
+    }
    return (bs->encrypted && !bs->valid_key);
 }

 int bdrv_set_key(BlockDriverState *bs, const char *key)
 {
    int ret;
-    if (bs->backing_hd && bs->backing_hd->encrypted) {
-        ret = bdrv_set_key(bs->backing_hd, key);
+    if (bs->backing && bs->backing->bs->encrypted) {
+        ret = bdrv_set_key(bs->backing->bs, key);
        if (ret < 0)
            return ret;
        if (!bs->encrypted)
@@ -2893,7 +2789,7 @@ BlockDriverState *bdrv_lookup_bs(const char *device,
 bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base)
 {
    while (top && top != base) {
-        top = top->backing_hd;
+        top = backing_bs(top);
    }

    return top != NULL;
@@ -2951,7 +2847,7 @@ int bdrv_has_zero_init(BlockDriverState *bs)

    /* If BS is a copy on write image, it is initialized to
       the contents of the base image, which may not be zeroes.  */
-    if (bs->backing_hd) {
+    if (bs->backing) {
        return 0;
    }
    if (bs->drv->bdrv_has_zero_init) {
@@ -2966,7 +2862,7 @@ bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs)
 {
    BlockDriverInfo bdi;

-    if (bs->backing_hd) {
+    if (bs->backing) {
        return false;
    }

@@ -2981,7 +2877,7 @@ bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs)
 {
    BlockDriverInfo bdi;

-    if (bs->backing_hd || !(bs->open_flags & BDRV_O_UNMAP)) {
+    if (bs->backing || !(bs->open_flags & BDRV_O_UNMAP)) {
        return false;
    }

@@ -2994,7 +2890,7 @@ bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs)

 const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
 {
-    if (bs->backing_hd && bs->backing_hd->encrypted)
+    if (bs->backing && bs->backing->bs->encrypted)
        return bs->backing_file;
    else if (bs->encrypted)
        return bs->filename;
@@ -3041,7 +2937,7 @@ int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
                          const char *tag)
 {
    while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) {
-        bs = bs->file;
+        bs = bs->file ? bs->file->bs : NULL;
    }

    if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) {
@@ -3054,7 +2950,7 @@ int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
 int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag)
 {
    while (bs && bs->drv && !bs->drv->bdrv_debug_remove_breakpoint) {
-        bs = bs->file;
+        bs = bs->file ? bs->file->bs : NULL;
    }

    if (bs && bs->drv && bs->drv->bdrv_debug_remove_breakpoint) {
@@ -3067,7 +2963,7 @@ int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag)
 int bdrv_debug_resume(BlockDriverState *bs, const char *tag)
 {
    while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) {
-        bs = bs->file;
+        bs = bs->file ? bs->file->bs : NULL;
    }

    if (bs && bs->drv && bs->drv->bdrv_debug_resume) {
@@ -3080,7 +2976,7 @@ int bdrv_debug_resume(BlockDriverState *bs, const char *tag)
 bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag)
 {
    while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) {
-        bs = bs->file;
+        bs = bs->file ? bs->file->bs : NULL;
    }

    if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) {
@@ -3119,13 +3015,13 @@ BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,

    is_protocol = path_has_protocol(backing_file);

-    for (curr_bs = bs; curr_bs->backing_hd; curr_bs = curr_bs->backing_hd) {
+    for (curr_bs = bs; curr_bs->backing; curr_bs = curr_bs->backing->bs) {

        /* If either of the filename paths is actually a protocol, then
         * compare unmodified paths; otherwise make paths relative */
        if (is_protocol || path_has_protocol(curr_bs->backing_file)) {
            if (strcmp(backing_file, curr_bs->backing_file) == 0) {
-                retval = curr_bs->backing_hd;
+                retval = curr_bs->backing->bs;
                break;
            }
        } else {
@@ -3149,7 +3045,7 @@ BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
            }

            if (strcmp(backing_file_full, filename_full) == 0) {
-                retval = curr_bs->backing_hd;
+                retval = curr_bs->backing->bs;
                break;
            }
        }
@@ -3167,11 +3063,11 @@ int bdrv_get_backing_file_depth(BlockDriverState *bs)
        return 0;
    }

-    if (!bs->backing_hd) {
+    if (!bs->backing) {
        return 0;
    }

-    return 1 + bdrv_get_backing_file_depth(bs->backing_hd);
+    return 1 + bdrv_get_backing_file_depth(bs->backing->bs);
 }

 void bdrv_init(void)
@@ -3202,7 +3098,7 @@ void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp)
    if (bs->drv->bdrv_invalidate_cache) {
        bs->drv->bdrv_invalidate_cache(bs, &local_err);
    } else if (bs->file) {
-        bdrv_invalidate_cache(bs->file, &local_err);
+        bdrv_invalidate_cache(bs->file->bs, &local_err);
    }
    if (local_err) {
        error_propagate(errp, local_err);
@@ -3932,10 +3828,10 @@ void bdrv_detach_aio_context(BlockDriverState *bs)
        bs->drv->bdrv_detach_aio_context(bs);
    }
    if (bs->file) {
-        bdrv_detach_aio_context(bs->file);
+        bdrv_detach_aio_context(bs->file->bs);
    }
-    if (bs->backing_hd) {
-        bdrv_detach_aio_context(bs->backing_hd);
+    if (bs->backing) {
+        bdrv_detach_aio_context(bs->backing->bs);
    }

    bs->aio_context = NULL;
@@ -3952,11 +3848,11 @@ void bdrv_attach_aio_context(BlockDriverState *bs,

    bs->aio_context = new_context;

-    if (bs->backing_hd) {
-        bdrv_attach_aio_context(bs->backing_hd, new_context);
+    if (bs->backing) {
+        bdrv_attach_aio_context(bs->backing->bs, new_context);
    }
    if (bs->file) {
-        bdrv_attach_aio_context(bs->file, new_context);
+        bdrv_attach_aio_context(bs->file->bs, new_context);
    }
    if (bs->drv->bdrv_attach_aio_context) {
        bs->drv->bdrv_attach_aio_context(bs, new_context);
@@ -4168,7 +4064,7 @@ void bdrv_refresh_filename(BlockDriverState *bs)
    /* This BDS's file name will most probably depend on its file's name, so
     * refresh that first */
    if (bs->file) {
-        bdrv_refresh_filename(bs->file);
+        bdrv_refresh_filename(bs->file->bs);
    }

    if (drv->bdrv_refresh_filename) {
@@ -4196,19 +4092,20 @@ void bdrv_refresh_filename(BlockDriverState *bs)

        /* If no specific options have been given for this BDS, the filename of
         * the underlying file should suffice for this one as well */
-        if (bs->file->exact_filename[0] && !has_open_options) {
-            strcpy(bs->exact_filename, bs->file->exact_filename);
+        if (bs->file->bs->exact_filename[0] && !has_open_options) {
+            strcpy(bs->exact_filename, bs->file->bs->exact_filename);
        }
        /* Reconstructing the full options QDict is simple for most format block
         * drivers, as long as the full options are known for the underlying
         * file BDS. The full options QDict of that file BDS should somehow
         * contain a representation of the filename, therefore the following
         * suffices without querying the (exact_)filename of this BDS. */
-        if (bs->file->full_open_options) {
+        if (bs->file->bs->full_open_options) {
            qdict_put_obj(opts, "driver",
                          QOBJECT(qstring_from_str(drv->format_name)));
-            QINCREF(bs->file->full_open_options);
-            qdict_put_obj(opts, "file", QOBJECT(bs->file->full_open_options));
+            QINCREF(bs->file->bs->full_open_options);
+            qdict_put_obj(opts, "file",
+                          QOBJECT(bs->file->bs->full_open_options));

            bs->full_open_options = opts;
        } else {
--- a/block/backup.c
+++ b/block/backup.c
@@ -89,7 +89,8 @@ static void cow_request_end(CowRequest *req)

 static int coroutine_fn backup_do_cow(BlockDriverState *bs,
                                      int64_t sector_num, int nb_sectors,
-                                      bool *error_is_read)
+                                      bool *error_is_read,
+                                      bool is_write_notifier)
 {
    BackupBlockJob *job = (BackupBlockJob *)bs->job;
    CowRequest cow_request;
@@ -129,8 +130,14 @@ static int coroutine_fn backup_do_cow(BlockDriverState *bs,
        iov.iov_len = n * BDRV_SECTOR_SIZE;
        qemu_iovec_init_external(&bounce_qiov, &iov, 1);

-        ret = bdrv_co_readv(bs, start * BACKUP_SECTORS_PER_CLUSTER, n,
-                            &bounce_qiov);
+        if (is_write_notifier) {
+            ret = bdrv_co_no_copy_on_readv(bs,
+                                           start * BACKUP_SECTORS_PER_CLUSTER,
+                                           n, &bounce_qiov);
+        } else {
+            ret = bdrv_co_readv(bs, start * BACKUP_SECTORS_PER_CLUSTER, n,
+                                &bounce_qiov);
+        }
        if (ret < 0) {
            trace_backup_do_cow_read_fail(job, start, ret);
            if (error_is_read) {
@@ -190,7 +197,7 @@ static int coroutine_fn backup_before_write_notify(
    assert((req->offset & (BDRV_SECTOR_SIZE - 1)) == 0);
    assert((req->bytes & (BDRV_SECTOR_SIZE - 1)) == 0);

-    return backup_do_cow(req->bs, sector_num, nb_sectors, NULL);
+    return backup_do_cow(req->bs, sector_num, nb_sectors, NULL, true);
 }

 static void backup_set_speed(BlockJob *job, int64_t speed, Error **errp)
@@ -303,7 +310,8 @@ static int coroutine_fn backup_run_incremental(BackupBlockJob *job)
                    return ret;
                }
                ret = backup_do_cow(bs, cluster * BACKUP_SECTORS_PER_CLUSTER,
-                                    BACKUP_SECTORS_PER_CLUSTER, &error_is_read);
+                                    BACKUP_SECTORS_PER_CLUSTER, &error_is_read,
+                                    false);
                if ((ret < 0) &&
                    backup_error_action(job, error_is_read, -ret) ==
                    BLOCK_ERROR_ACTION_REPORT) {
@@ -408,7 +416,7 @@ static void coroutine_fn backup_run(void *opaque)
            }
            /* FULL sync mode we copy the whole drive. */
            ret = backup_do_cow(bs, start * BACKUP_SECTORS_PER_CLUSTER,
-                    BACKUP_SECTORS_PER_CLUSTER, &error_is_read);
+                    BACKUP_SECTORS_PER_CLUSTER, &error_is_read, false);
            if (ret < 0) {
                /* Depending on error action, fail now or retry cluster */
                BlockErrorAction action =
--- a/block/blkdebug.c
+++ b/block/blkdebug.c
@@ -426,11 +426,11 @@ static int blkdebug_open(BlockDriverState *bs, QDict *options, int flags,
    /* Set initial state */
    s->state = 1;

-    /* Open the backing file */
-    assert(bs->file == NULL);
-    ret = bdrv_open_image(&bs->file, qemu_opt_get(opts, "x-image"), options, "image",
-                          bs, &child_file, false, &local_err);
-    if (ret < 0) {
+    /* Open the image file */
+    bs->file = bdrv_open_child(qemu_opt_get(opts, "x-image"), options, "image",
+                               bs, &child_file, false, &local_err);
+    if (local_err) {
+        ret = -EINVAL;
        error_propagate(errp, local_err);
        goto out;
    }
@@ -449,7 +449,7 @@ static int blkdebug_open(BlockDriverState *bs, QDict *options, int flags,
    goto out;

 fail_unref:
-    bdrv_unref(bs->file);
+    bdrv_unref_child(bs, bs->file);
 out:
    qemu_opts_del(opts);
    return ret;
@@ -510,7 +510,8 @@ static BlockAIOCB *blkdebug_aio_readv(BlockDriverState *bs,
        return inject_error(bs, cb, opaque, rule);
    }

-    return bdrv_aio_readv(bs->file, sector_num, qiov, nb_sectors, cb, opaque);
+    return bdrv_aio_readv(bs->file->bs, sector_num, qiov, nb_sectors,
+                          cb, opaque);
 }

 static BlockAIOCB *blkdebug_aio_writev(BlockDriverState *bs,
@@ -532,7 +533,8 @@ static BlockAIOCB *blkdebug_aio_writev(BlockDriverState *bs,
        return inject_error(bs, cb, opaque, rule);
    }

-    return bdrv_aio_writev(bs->file, sector_num, qiov, nb_sectors, cb, opaque);
+    return bdrv_aio_writev(bs->file->bs, sector_num, qiov, nb_sectors,
+                           cb, opaque);
 }

 static BlockAIOCB *blkdebug_aio_flush(BlockDriverState *bs,
@@ -551,7 +553,7 @@ static BlockAIOCB *blkdebug_aio_flush(BlockDriverState *bs,
        return inject_error(bs, cb, opaque, rule);
    }

-    return bdrv_aio_flush(bs->file, cb, opaque);
+    return bdrv_aio_flush(bs->file->bs, cb, opaque);
 }


@@ -716,12 +718,12 @@ static bool blkdebug_debug_is_suspended(BlockDriverState *bs, const char *tag)

 static int64_t blkdebug_getlength(BlockDriverState *bs)
 {
-    return bdrv_getlength(bs->file);
+    return bdrv_getlength(bs->file->bs);
 }

 static int blkdebug_truncate(BlockDriverState *bs, int64_t offset)
 {
-    return bdrv_truncate(bs->file, offset);
+    return bdrv_truncate(bs->file->bs, offset);
 }

 static void blkdebug_refresh_filename(BlockDriverState *bs)
@@ -741,24 +743,24 @@ static void blkdebug_refresh_filename(BlockDriverState *bs)
        }
    }

-    if (force_json && !bs->file->full_open_options) {
+    if (force_json && !bs->file->bs->full_open_options) {
        /* The config file cannot be recreated, so creating a plain filename
         * is impossible */
        return;
    }

-    if (!force_json && bs->file->exact_filename[0]) {
+    if (!force_json && bs->file->bs->exact_filename[0]) {
        snprintf(bs->exact_filename, sizeof(bs->exact_filename),
                 "blkdebug:%s:%s",
                 qdict_get_try_str(bs->options, "config") ?: "",
-                 bs->file->exact_filename);
+                 bs->file->bs->exact_filename);
    }

    opts = qdict_new();
    qdict_put_obj(opts, "driver", QOBJECT(qstring_from_str("blkdebug")));

-    QINCREF(bs->file->full_open_options);
-    qdict_put_obj(opts, "image", QOBJECT(bs->file->full_open_options));
+    QINCREF(bs->file->bs->full_open_options);
+    qdict_put_obj(opts, "image", QOBJECT(bs->file->bs->full_open_options));

    for (e = qdict_first(bs->options); e; e = qdict_next(bs->options, e)) {
        if (strcmp(qdict_entry_key(e), "x-image") &&
--- a/block/blkverify.c
+++ b/block/blkverify.c
@@ -14,7 +14,7 @@
 #include "qapi/qmp/qstring.h"

 typedef struct {
-    BlockDriverState *test_file;
+    BdrvChild *test_file;
 } BDRVBlkverifyState;

 typedef struct BlkverifyAIOCB BlkverifyAIOCB;
@@ -123,26 +123,29 @@ static int blkverify_open(BlockDriverState *bs, QDict *options, int flags,
    }

    /* Open the raw file */
-    assert(bs->file == NULL);
-    ret = bdrv_open_image(&bs->file, qemu_opt_get(opts, "x-raw"), options,
-                          "raw", bs, &child_file, false, &local_err);
-    if (ret < 0) {
+    bs->file = bdrv_open_child(qemu_opt_get(opts, "x-raw"), options, "raw",
+                               bs, &child_file, false, &local_err);
+    if (local_err) {
+        ret = -EINVAL;
        error_propagate(errp, local_err);
        goto fail;
    }

    /* Open the test file */
-    assert(s->test_file == NULL);
-    ret = bdrv_open_image(&s->test_file, qemu_opt_get(opts, "x-image"), options,
-                          "test", bs, &child_format, false, &local_err);
-    if (ret < 0) {
+    s->test_file = bdrv_open_child(qemu_opt_get(opts, "x-image"), options,
+                                   "test", bs, &child_format, false,
+                                   &local_err);
+    if (local_err) {
+        ret = -EINVAL;
        error_propagate(errp, local_err);
-        s->test_file = NULL;
        goto fail;
    }

    ret = 0;
 fail:
+    if (ret < 0) {
+        bdrv_unref_child(bs, bs->file);
+    }
    qemu_opts_del(opts);
    return ret;
 }
@@ -151,7 +154,7 @@ static void blkverify_close(BlockDriverState *bs)
 {
    BDRVBlkverifyState *s = bs->opaque;

-    bdrv_unref(s->test_file);
+    bdrv_unref_child(bs, s->test_file);
    s->test_file = NULL;
 }

@@ -159,7 +162,7 @@ static int64_t blkverify_getlength(BlockDriverState *bs)
 {
    BDRVBlkverifyState *s = bs->opaque;

-    return bdrv_getlength(s->test_file);
+    return bdrv_getlength(s->test_file->bs);
 }

 static BlkverifyAIOCB *blkverify_aio_get(BlockDriverState *bs, bool is_write,
@@ -238,13 +241,13 @@ static BlockAIOCB *blkverify_aio_readv(BlockDriverState *bs,
                                            nb_sectors, cb, opaque);

    acb->verify = blkverify_verify_readv;
-    acb->buf = qemu_blockalign(bs->file, qiov->size);
+    acb->buf = qemu_blockalign(bs->file->bs, qiov->size);
    qemu_iovec_init(&acb->raw_qiov, acb->qiov->niov);
    qemu_iovec_clone(&acb->raw_qiov, qiov, acb->buf);

-    bdrv_aio_readv(s->test_file, sector_num, qiov, nb_sectors,
+    bdrv_aio_readv(s->test_file->bs, sector_num, qiov, nb_sectors,
                   blkverify_aio_cb, acb);
-    bdrv_aio_readv(bs->file, sector_num, &acb->raw_qiov, nb_sectors,
+    bdrv_aio_readv(bs->file->bs, sector_num, &acb->raw_qiov, nb_sectors,
                   blkverify_aio_cb, acb);
    return &acb->common;
 }
@@ -257,9 +260,9 @@ static BlockAIOCB *blkverify_aio_writev(BlockDriverState *bs,
    BlkverifyAIOCB *acb = blkverify_aio_get(bs, true, sector_num, qiov,
                                            nb_sectors, cb, opaque);

-    bdrv_aio_writev(s->test_file, sector_num, qiov, nb_sectors,
+    bdrv_aio_writev(s->test_file->bs, sector_num, qiov, nb_sectors,
                    blkverify_aio_cb, acb);
-    bdrv_aio_writev(bs->file, sector_num, qiov, nb_sectors,
+    bdrv_aio_writev(bs->file->bs, sector_num, qiov, nb_sectors,
                    blkverify_aio_cb, acb);
    return &acb->common;
 }
@@ -271,7 +274,7 @@ static BlockAIOCB *blkverify_aio_flush(BlockDriverState *bs,
    BDRVBlkverifyState *s = bs->opaque;

    /* Only flush test file, the raw file is not important */
-    return bdrv_aio_flush(s->test_file, cb, opaque);
+    return bdrv_aio_flush(s->test_file->bs, cb, opaque);
 }

 static bool blkverify_recurse_is_first_non_filter(BlockDriverState *bs,
@@ -279,13 +282,13 @@ static bool blkverify_recurse_is_first_non_filter(BlockDriverState *bs,
 {
    BDRVBlkverifyState *s = bs->opaque;

-    bool perm = bdrv_recurse_is_first_non_filter(bs->file, candidate);
+    bool perm = bdrv_recurse_is_first_non_filter(bs->file->bs, candidate);

    if (perm) {
        return true;
    }

-    return bdrv_recurse_is_first_non_filter(s->test_file, candidate);
+    return bdrv_recurse_is_first_non_filter(s->test_file->bs, candidate);
 }

 /* Propagate AioContext changes to ->test_file */
@@ -293,7 +296,7 @@ static void blkverify_detach_aio_context(BlockDriverState *bs)
 {
    BDRVBlkverifyState *s = bs->opaque;

-    bdrv_detach_aio_context(s->test_file);
+    bdrv_detach_aio_context(s->test_file->bs);
 }

 static void blkverify_attach_aio_context(BlockDriverState *bs,
@@ -301,32 +304,38 @@ static void blkverify_attach_aio_context(BlockDriverState *bs,
 {
    BDRVBlkverifyState *s = bs->opaque;

-    bdrv_attach_aio_context(s->test_file, new_context);
+    bdrv_attach_aio_context(s->test_file->bs, new_context);
 }

 static void blkverify_refresh_filename(BlockDriverState *bs)
 {
    BDRVBlkverifyState *s = bs->opaque;

-    /* bs->file has already been refreshed */
-    bdrv_refresh_filename(s->test_file);
+    /* bs->file->bs has already been refreshed */
+    bdrv_refresh_filename(s->test_file->bs);

-    if (bs->file->full_open_options && s->test_file->full_open_options) {
+    if (bs->file->bs->full_open_options
+        && s->test_file->bs->full_open_options)
+    {
        QDict *opts = qdict_new();
        qdict_put_obj(opts, "driver", QOBJECT(qstring_from_str("blkverify")));

-        QINCREF(bs->file->full_open_options);
-        qdict_put_obj(opts, "raw", QOBJECT(bs->file->full_open_options));
-        QINCREF(s->test_file->full_open_options);
-        qdict_put_obj(opts, "test", QOBJECT(s->test_file->full_open_options));
+        QINCREF(bs->file->bs->full_open_options);
+        qdict_put_obj(opts, "raw", QOBJECT(bs->file->bs->full_open_options));
+        QINCREF(s->test_file->bs->full_open_options);
+        qdict_put_obj(opts, "test",
+                      QOBJECT(s->test_file->bs->full_open_options));

        bs->full_open_options = opts;
    }

-    if (bs->file->exact_filename[0] && s->test_file->exact_filename[0]) {
+    if (bs->file->bs->exact_filename[0]
+        && s->test_file->bs->exact_filename[0])
+    {
        snprintf(bs->exact_filename, sizeof(bs->exact_filename),
                 "blkverify:%s:%s",
-                 bs->file->exact_filename, s->test_file->exact_filename);
+                 bs->file->bs->exact_filename,
+                 s->test_file->bs->exact_filename);
    }
 }

--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -238,6 +238,23 @@ BlockDriverState *blk_bs(BlockBackend *blk)
    return blk->bs;
 }

+/*
+ * Changes the BlockDriverState attached to @blk
+ */
+void blk_set_bs(BlockBackend *blk, BlockDriverState *bs)
+{
+    bdrv_ref(bs);
+
+    if (blk->bs) {
+        blk->bs->blk = NULL;
+        bdrv_unref(blk->bs);
+    }
+    assert(bs->blk == NULL);
+
+    blk->bs = bs;
+    bs->blk = blk;
+}
+
 /*
 * Return @blk's DriveInfo if any, else null.
 */
--- a/block/bochs.c
+++ b/block/bochs.c
@@ -103,7 +103,7 @@ static int bochs_open(BlockDriverState *bs, QDict *options, int flags,

    bs->read_only = 1; // no write support yet

-    ret = bdrv_pread(bs->file, 0, &bochs, sizeof(bochs));
+    ret = bdrv_pread(bs->file->bs, 0, &bochs, sizeof(bochs));
    if (ret < 0) {
        return ret;
    }
@@ -137,7 +137,7 @@ static int bochs_open(BlockDriverState *bs, QDict *options, int flags,
        return -ENOMEM;
    }

-    ret = bdrv_pread(bs->file, le32_to_cpu(bochs.header), s->catalog_bitmap,
+    ret = bdrv_pread(bs->file->bs, le32_to_cpu(bochs.header), s->catalog_bitmap,
                     s->catalog_size * 4);
    if (ret < 0) {
        goto fail;
@@ -206,7 +206,7 @@ static int64_t seek_to_sector(BlockDriverState *bs, int64_t sector_num)
        (s->extent_blocks + s->bitmap_blocks));

    /* read in bitmap for current extent */
-    ret = bdrv_pread(bs->file, bitmap_offset + (extent_offset / 8),
+    ret = bdrv_pread(bs->file->bs, bitmap_offset + (extent_offset / 8),
                     &bitmap_entry, 1);
    if (ret < 0) {
        return ret;
@@ -229,7 +229,7 @@ static int bochs_read(BlockDriverState *bs, int64_t sector_num,
        if (block_offset < 0) {
            return block_offset;
        } else if (block_offset > 0) {
-            ret = bdrv_pread(bs->file, block_offset, buf, 512);
+            ret = bdrv_pread(bs->file->bs, block_offset, buf, 512);
            if (ret < 0) {
                return ret;
            }
--- a/block/cloop.c
+++ b/block/cloop.c
@@ -66,7 +66,7 @@ static int cloop_open(BlockDriverState *bs, QDict *options, int flags,
    bs->read_only = 1;

    /* read header */
-    ret = bdrv_pread(bs->file, 128, &s->block_size, 4);
+    ret = bdrv_pread(bs->file->bs, 128, &s->block_size, 4);
    if (ret < 0) {
        return ret;
    }
@@ -92,7 +92,7 @@ static int cloop_open(BlockDriverState *bs, QDict *options, int flags,
        return -EINVAL;
    }

-    ret = bdrv_pread(bs->file, 128 + 4, &s->n_blocks, 4);
+    ret = bdrv_pread(bs->file->bs, 128 + 4, &s->n_blocks, 4);
    if (ret < 0) {
        return ret;
    }
@@ -123,7 +123,7 @@ static int cloop_open(BlockDriverState *bs, QDict *options, int flags,
        return -ENOMEM;
    }

-    ret = bdrv_pread(bs->file, 128 + 4 + 4, s->offsets, offsets_size);
+    ret = bdrv_pread(bs->file->bs, 128 + 4 + 4, s->offsets, offsets_size);
    if (ret < 0) {
        goto fail;
    }
@@ -203,8 +203,8 @@ static inline int cloop_read_block(BlockDriverState *bs, int block_num)
        int ret;
        uint32_t bytes = s->offsets[block_num + 1] - s->offsets[block_num];

-        ret = bdrv_pread(bs->file, s->offsets[block_num], s->compressed_block,
-                         bytes);
+        ret = bdrv_pread(bs->file->bs, s->offsets[block_num],
+                         s->compressed_block, bytes);
        if (ret != bytes) {
            return -1;
        }
--- a/block/dmg.c
+++ b/block/dmg.c
@@ -85,7 +85,7 @@ static int read_uint64(BlockDriverState *bs, int64_t offset, uint64_t *result)
    uint64_t buffer;
    int ret;

-    ret = bdrv_pread(bs->file, offset, &buffer, 8);
+    ret = bdrv_pread(bs->file->bs, offset, &buffer, 8);
    if (ret < 0) {
        return ret;
    }
@@ -99,7 +99,7 @@ static int read_uint32(BlockDriverState *bs, int64_t offset, uint32_t *result)
    uint32_t buffer;
    int ret;

-    ret = bdrv_pread(bs->file, offset, &buffer, 4);
+    ret = bdrv_pread(bs->file->bs, offset, &buffer, 4);
    if (ret < 0) {
        return ret;
    }
@@ -354,7 +354,7 @@ static int dmg_read_resource_fork(BlockDriverState *bs, DmgHeaderState *ds,
        offset += 4;

        buffer = g_realloc(buffer, count);
-        ret = bdrv_pread(bs->file, offset, buffer, count);
+        ret = bdrv_pread(bs->file->bs, offset, buffer, count);
        if (ret < 0) {
            goto fail;
        }
@@ -391,7 +391,7 @@ static int dmg_read_plist_xml(BlockDriverState *bs, DmgHeaderState *ds,

    buffer = g_malloc(info_length + 1);
    buffer[info_length] = '\0';
-    ret = bdrv_pread(bs->file, info_begin, buffer, info_length);
+    ret = bdrv_pread(bs->file->bs, info_begin, buffer, info_length);
    if (ret != info_length) {
        ret = -EINVAL;
        goto fail;
@@ -446,7 +446,7 @@ static int dmg_open(BlockDriverState *bs, QDict *options, int flags,
    ds.max_sectors_per_chunk = 1;

    /* locate the UDIF trailer */
-    offset = dmg_find_koly_offset(bs->file, errp);
+    offset = dmg_find_koly_offset(bs->file->bs, errp);
    if (offset < 0) {
        ret = offset;
        goto fail;
@@ -514,9 +514,9 @@ static int dmg_open(BlockDriverState *bs, QDict *options, int flags,
    }

    /* initialize zlib engine */
-    s->compressed_chunk = qemu_try_blockalign(bs->file,
+    s->compressed_chunk = qemu_try_blockalign(bs->file->bs,
                                              ds.max_compressed_size + 1);
-    s->uncompressed_chunk = qemu_try_blockalign(bs->file,
+    s->uncompressed_chunk = qemu_try_blockalign(bs->file->bs,
                                                512 * ds.max_sectors_per_chunk);
    if (s->compressed_chunk == NULL || s->uncompressed_chunk == NULL) {
        ret = -ENOMEM;
@@ -592,7 +592,7 @@ static inline int dmg_read_chunk(BlockDriverState *bs, uint64_t sector_num)
        case 0x80000005: { /* zlib compressed */
            /* we need to buffer, because only the chunk as whole can be
             * inflated. */
-            ret = bdrv_pread(bs->file, s->offsets[chunk],
+            ret = bdrv_pread(bs->file->bs, s->offsets[chunk],
                             s->compressed_chunk, s->lengths[chunk]);
            if (ret != s->lengths[chunk]) {
                return -1;
@@ -616,7 +616,7 @@ static inline int dmg_read_chunk(BlockDriverState *bs, uint64_t sector_num)
        case 0x80000006: /* bzip2 compressed */
            /* we need to buffer, because only the chunk as whole can be
             * inflated. */
-            ret = bdrv_pread(bs->file, s->offsets[chunk],
+            ret = bdrv_pread(bs->file->bs, s->offsets[chunk],
                             s->compressed_chunk, s->lengths[chunk]);
            if (ret != s->lengths[chunk]) {
                return -1;
@@ -641,7 +641,7 @@ static inline int dmg_read_chunk(BlockDriverState *bs, uint64_t sector_num)
            break;
 #endif /* CONFIG_BZIP2 */
        case 1: /* copy */
-            ret = bdrv_pread(bs->file, s->offsets[chunk],
+            ret = bdrv_pread(bs->file->bs, s->offsets[chunk],
                             s->uncompressed_chunk, s->lengths[chunk]);
            if (ret != s->lengths[chunk]) {
                return -1;
--- a/block/io.c
+++ b/block/io.c
@@ -156,38 +156,38 @@ void bdrv_refresh_limits(BlockDriverState *bs, Error **errp)

    /* Take some limits from the children as a default */
    if (bs->file) {
-        bdrv_refresh_limits(bs->file, &local_err);
+        bdrv_refresh_limits(bs->file->bs, &local_err);
        if (local_err) {
            error_propagate(errp, local_err);
            return;
        }
-        bs->bl.opt_transfer_length = bs->file->bl.opt_transfer_length;
-        bs->bl.max_transfer_length = bs->file->bl.max_transfer_length;
-        bs->bl.min_mem_alignment = bs->file->bl.min_mem_alignment;
-        bs->bl.opt_mem_alignment = bs->file->bl.opt_mem_alignment;
+        bs->bl.opt_transfer_length = bs->file->bs->bl.opt_transfer_length;
+        bs->bl.max_transfer_length = bs->file->bs->bl.max_transfer_length;
+        bs->bl.min_mem_alignment = bs->file->bs->bl.min_mem_alignment;
+        bs->bl.opt_mem_alignment = bs->file->bs->bl.opt_mem_alignment;
    } else {
        bs->bl.min_mem_alignment = 512;
        bs->bl.opt_mem_alignment = getpagesize();
    }

-    if (bs->backing_hd) {
-        bdrv_refresh_limits(bs->backing_hd, &local_err);
+    if (bs->backing) {
+        bdrv_refresh_limits(bs->backing->bs, &local_err);
        if (local_err) {
            error_propagate(errp, local_err);
            return;
        }
        bs->bl.opt_transfer_length =
            MAX(bs->bl.opt_transfer_length,
-                bs->backing_hd->bl.opt_transfer_length);
+                bs->backing->bs->bl.opt_transfer_length);
        bs->bl.max_transfer_length =
            MIN_NON_ZERO(bs->bl.max_transfer_length,
-                         bs->backing_hd->bl.max_transfer_length);
+                         bs->backing->bs->bl.max_transfer_length);
        bs->bl.opt_mem_alignment =
            MAX(bs->bl.opt_mem_alignment,
-                bs->backing_hd->bl.opt_mem_alignment);
+                bs->backing->bs->bl.opt_mem_alignment);
        bs->bl.min_mem_alignment =
            MAX(bs->bl.min_mem_alignment,
-                bs->backing_hd->bl.min_mem_alignment);
+                bs->backing->bs->bl.min_mem_alignment);
    }

    /* Then let the driver override it */
@@ -213,7 +213,7 @@ void bdrv_disable_copy_on_read(BlockDriverState *bs)
 }

 /* Check if any requests are in-flight (including throttled requests) */
-static bool bdrv_requests_pending(BlockDriverState *bs)
+bool bdrv_requests_pending(BlockDriverState *bs)
 {
    if (!QLIST_EMPTY(&bs->tracked_requests)) {
        return true;
@@ -224,10 +224,10 @@ static bool bdrv_requests_pending(BlockDriverState *bs)
    if (!qemu_co_queue_empty(&bs->throttled_reqs[1])) {
        return true;
    }
-    if (bs->file && bdrv_requests_pending(bs->file)) {
+    if (bs->file && bdrv_requests_pending(bs->file->bs)) {
        return true;
    }
-    if (bs->backing_hd && bdrv_requests_pending(bs->backing_hd)) {
+    if (bs->backing && bdrv_requests_pending(bs->backing->bs)) {
        return true;
    }
    return false;
@@ -932,7 +932,8 @@ static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
        return ret;
    }

-    if (bs->copy_on_read) {
+    /* Don't do copy-on-read if we read data before write operation */
+    if (bs->copy_on_read && !(flags & BDRV_REQ_NO_COPY_ON_READ)) {
        flags |= BDRV_REQ_COPY_ON_READ;
    }

@@ -1001,6 +1002,15 @@ int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
    return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov, 0);
 }

+int coroutine_fn bdrv_co_no_copy_on_readv(BlockDriverState *bs,
+    int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
+{
+    trace_bdrv_co_no_copy_on_readv(bs, sector_num, nb_sectors);
+
+    return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov,
+                            BDRV_REQ_NO_COPY_ON_READ);
+}
+
 int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
    int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
 {
@@ -1127,13 +1137,13 @@ static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
    if (ret < 0) {
        /* Do nothing, write notifier decided to fail this request */
    } else if (flags & BDRV_REQ_ZERO_WRITE) {
-        BLKDBG_EVENT(bs, BLKDBG_PWRITEV_ZERO);
+        bdrv_debug_event(bs, BLKDBG_PWRITEV_ZERO);
        ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors, flags);
    } else {
-        BLKDBG_EVENT(bs, BLKDBG_PWRITEV);
+        bdrv_debug_event(bs, BLKDBG_PWRITEV);
        ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
    }
-    BLKDBG_EVENT(bs, BLKDBG_PWRITEV_DONE);
+    bdrv_debug_event(bs, BLKDBG_PWRITEV_DONE);

    if (ret == 0 && !bs->enable_write_cache) {
        ret = bdrv_co_flush(bs);
@@ -1182,13 +1192,13 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs,
        /* RMW the unaligned part before head. */
        mark_request_serialising(req, align);
        wait_serialising_requests(req);
-        BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_HEAD);
+        bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD);
        ret = bdrv_aligned_preadv(bs, req, offset & ~(align - 1), align,
                                  align, &local_qiov, 0);
        if (ret < 0) {
            goto fail;
        }
-        BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);
+        bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);

        memset(buf + head_padding_bytes, 0, zero_bytes);
        ret = bdrv_aligned_pwritev(bs, req, offset & ~(align - 1), align,
@@ -1220,13 +1230,13 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs,
        /* RMW the unaligned part after tail. */
        mark_request_serialising(req, align);
        wait_serialising_requests(req);
-        BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_TAIL);
+        bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL);
        ret = bdrv_aligned_preadv(bs, req, offset, align,
                                  align, &local_qiov, 0);
        if (ret < 0) {
            goto fail;
        }
-        BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
+        bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);

        memset(buf, 0, bytes);
        ret = bdrv_aligned_pwritev(bs, req, offset, align,
@@ -1297,13 +1307,13 @@ static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
        };
        qemu_iovec_init_external(&head_qiov, &head_iov, 1);

-        BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_HEAD);
+        bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD);
        ret = bdrv_aligned_preadv(bs, &req, offset & ~(align - 1), align,
                                  align, &head_qiov, 0);
        if (ret < 0) {
            goto fail;
        }
-        BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);
+        bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);

        qemu_iovec_init(&local_qiov, qiov->niov + 2);
        qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1));
@@ -1331,13 +1341,13 @@ static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
        };
        qemu_iovec_init_external(&tail_qiov, &tail_iov, 1);

-        BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_TAIL);
+        bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL);
        ret = bdrv_aligned_preadv(bs, &req, (offset + bytes) & ~(align - 1), align,
                                  align, &tail_qiov, 0);
        if (ret < 0) {
            goto fail;
        }
-        BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
+        bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);

        if (!use_local_qiov) {
            qemu_iovec_init(&local_qiov, qiov->niov + 1);
@@ -1486,7 +1496,7 @@ static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,

    if (ret & BDRV_BLOCK_RAW) {
        assert(ret & BDRV_BLOCK_OFFSET_VALID);
-        return bdrv_get_block_status(bs->file, ret >> BDRV_SECTOR_BITS,
+        return bdrv_get_block_status(bs->file->bs, ret >> BDRV_SECTOR_BITS,
                                     *pnum, pnum);
    }

@@ -1495,8 +1505,8 @@ static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
    } else {
        if (bdrv_unallocated_blocks_are_zero(bs)) {
            ret |= BDRV_BLOCK_ZERO;
-        } else if (bs->backing_hd) {
-            BlockDriverState *bs2 = bs->backing_hd;
+        } else if (bs->backing) {
+            BlockDriverState *bs2 = bs->backing->bs;
            int64_t nb_sectors2 = bdrv_nb_sectors(bs2);
            if (nb_sectors2 >= 0 && sector_num >= nb_sectors2) {
                ret |= BDRV_BLOCK_ZERO;
@@ -1509,7 +1519,7 @@ static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
        (ret & BDRV_BLOCK_OFFSET_VALID)) {
        int file_pnum;

-        ret2 = bdrv_co_get_block_status(bs->file, ret >> BDRV_SECTOR_BITS,
+        ret2 = bdrv_co_get_block_status(bs->file->bs, ret >> BDRV_SECTOR_BITS,
                                        *pnum, &file_pnum);
        if (ret2 >= 0) {
            /* Ignore errors.  This is just providing extra information, it
@@ -1541,7 +1551,7 @@ static int64_t coroutine_fn bdrv_co_get_block_status_above(BlockDriverState *bs,
    int64_t ret = 0;

    assert(bs != base);
-    for (p = bs; p != base; p = p->backing_hd) {
+    for (p = bs; p != base; p = backing_bs(p)) {
        ret = bdrv_co_get_block_status(p, sector_num, nb_sectors, pnum);
        if (ret < 0 || ret & BDRV_BLOCK_ALLOCATED) {
            break;
@@ -1604,7 +1614,7 @@ int64_t bdrv_get_block_status(BlockDriverState *bs,
                              int64_t sector_num,
                              int nb_sectors, int *pnum)
 {
-    return bdrv_get_block_status_above(bs, bs->backing_hd,
+    return bdrv_get_block_status_above(bs, backing_bs(bs),
                                       sector_num, nb_sectors, pnum);
 }

@@ -1662,7 +1672,7 @@ int bdrv_is_allocated_above(BlockDriverState *top,
            n = pnum_inter;
        }

-        intermediate = intermediate->backing_hd;
+        intermediate = backing_bs(intermediate);
    }

    *pnum = n;
@@ -1713,7 +1723,7 @@ int bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
    } else if (drv->bdrv_save_vmstate) {
        return drv->bdrv_save_vmstate(bs, qiov, pos);
    } else if (bs->file) {
-        return bdrv_writev_vmstate(bs->file, qiov, pos);
+        return bdrv_writev_vmstate(bs->file->bs, qiov, pos);
    }

    return -ENOTSUP;
@@ -1728,7 +1738,7 @@ int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
    if (drv->bdrv_load_vmstate)
        return drv->bdrv_load_vmstate(bs, buf, pos, size);
    if (bs->file)
-        return bdrv_load_vmstate(bs->file, buf, pos, size);
+        return bdrv_load_vmstate(bs->file->bs, buf, pos, size);
    return -ENOTSUP;
 }

@@ -2208,7 +2218,7 @@ void *qemu_aio_get(const AIOCBInfo *aiocb_info, BlockDriverState *bs,
 {
    BlockAIOCB *acb;

-    acb = g_slice_alloc(aiocb_info->aiocb_size);
+    acb = g_malloc(aiocb_info->aiocb_size);
    acb->aiocb_info = aiocb_info;
    acb->bs = bs;
    acb->cb = cb;
@@ -2228,7 +2238,7 @@ void qemu_aio_unref(void *p)
    BlockAIOCB *acb = p;
    assert(acb->refcnt > 0);
    if (--acb->refcnt == 0) {
-        g_slice_free1(acb->aiocb_info->aiocb_size, acb);
+        g_free(acb);
    }
 }

@@ -2356,7 +2366,7 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
     * in the case of cache=unsafe, so there are no useless flushes.
     */
 flush_parent:
-    return bdrv_co_flush(bs->file);
+    return bs->file ? bdrv_co_flush(bs->file->bs) : 0;
 }

 int bdrv_flush(BlockDriverState *bs)
@@ -2584,7 +2594,7 @@ void bdrv_io_plug(BlockDriverState *bs)
    if (drv && drv->bdrv_io_plug) {
        drv->bdrv_io_plug(bs);
    } else if (bs->file) {
-        bdrv_io_plug(bs->file);
+        bdrv_io_plug(bs->file->bs);
    }
 }

@@ -2594,7 +2604,7 @@ void bdrv_io_unplug(BlockDriverState *bs)
    if (drv && drv->bdrv_io_unplug) {
        drv->bdrv_io_unplug(bs);
    } else if (bs->file) {
-        bdrv_io_unplug(bs->file);
+        bdrv_io_unplug(bs->file->bs);
    }
 }

@@ -2604,7 +2614,7 @@ void bdrv_flush_io_queue(BlockDriverState *bs)
    if (drv && drv->bdrv_flush_io_queue) {
        drv->bdrv_flush_io_queue(bs);
    } else if (bs->file) {
-        bdrv_flush_io_queue(bs->file);
+        bdrv_flush_io_queue(bs->file->bs);
    }
    bdrv_start_throttled_reqs(bs);
 }
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -113,7 +113,7 @@ static void mirror_iteration_done(MirrorOp *op, int ret)
    }

    qemu_iovec_destroy(&op->qiov);
-    g_slice_free(MirrorOp, op);
+    g_free(op);

    if (s->waiting_for_io) {
        qemu_coroutine_enter(s->common.co, NULL);
@@ -264,7 +264,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
    } while (delay_ns == 0 && next_sector < end);

    /* Allocate a MirrorOp that is used as an AIO callback.  */
-    op = g_slice_new(MirrorOp);
+    op = g_new(MirrorOp, 1);
    op->s = s;
    op->sector_num = sector_num;
    op->nb_sectors = nb_sectors;
@@ -353,6 +353,11 @@ static void mirror_exit(BlockJob *job, void *opaque)
    MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
    MirrorExitData *data = opaque;
    AioContext *replace_aio_context = NULL;
+    BlockDriverState *src = s->common.bs;
+
+    /* Make sure that the source BDS doesn't go away before we called
+     * block_job_completed(). */
+    bdrv_ref(src);

    if (s->to_replace) {
        replace_aio_context = bdrv_get_aio_context(s->to_replace);
@@ -367,14 +372,7 @@ static void mirror_exit(BlockJob *job, void *opaque)
        if (bdrv_get_flags(s->target) != bdrv_get_flags(to_replace)) {
            bdrv_reopen(s->target, bdrv_get_flags(to_replace), NULL);
        }
-        bdrv_swap(s->target, to_replace);
-        if (s->common.driver->job_type == BLOCK_JOB_TYPE_COMMIT) {
-            /* drop the bs loop chain formed by the swap: break the loop then
-             * trigger the unref from the top one */
-            BlockDriverState *p = s->base->backing_hd;
-            bdrv_set_backing_hd(s->base, NULL);
-            bdrv_unref(p);
-        }
+        bdrv_replace_in_backing_chain(to_replace, s->target);
    }
    if (s->to_replace) {
        bdrv_op_unblock_all(s->to_replace, s->replace_blocker);
@@ -388,6 +386,7 @@ static void mirror_exit(BlockJob *job, void *opaque)
    bdrv_unref(s->target);
    block_job_completed(&s->common, data->ret);
    g_free(data);
+    bdrv_unref(src);
 }

 static void coroutine_fn mirror_run(void *opaque)
@@ -431,7 +430,7 @@ static void coroutine_fn mirror_run(void *opaque)
     */
    bdrv_get_backing_filename(s->target, backing_filename,
                              sizeof(backing_filename));
-    if (backing_filename[0] && !s->target->backing_hd) {
+    if (backing_filename[0] && !s->target->backing) {
        ret = bdrv_get_info(s->target, &bdi);
        if (ret < 0) {
            goto immediate_exit;
@@ -455,6 +454,8 @@ static void coroutine_fn mirror_run(void *opaque)
    if (!s->is_none_mode) {
        /* First part, loop on the sectors and initialize the dirty bitmap.  */
        BlockDriverState *base = s->base;
+        bool mark_all_dirty = s->base == NULL && !bdrv_has_zero_init(s->target);
+
        for (sector_num = 0; sector_num < end; ) {
            /* Just to make sure we are not exceeding int limit. */
            int nb_sectors = MIN(INT_MAX >> BDRV_SECTOR_BITS,
@@ -477,7 +478,7 @@ static void coroutine_fn mirror_run(void *opaque)
            }

            assert(n > 0);
-            if (ret == 1) {
+            if (ret == 1 || mark_all_dirty) {
                bdrv_set_dirty_bitmap(s->dirty_bitmap, sector_num, n);
            }
            sector_num += n;
@@ -635,8 +636,7 @@ static void mirror_complete(BlockJob *job, Error **errp)
        return;
    }
    if (!s->synced) {
-        error_setg(errp, QERR_BLOCK_JOB_NOT_READY,
-                   bdrv_get_device_name(job->bs));
+        error_setg(errp, QERR_BLOCK_JOB_NOT_READY, job->id);
        return;
    }

@@ -764,7 +764,7 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target,
        return;
    }
    is_none_mode = mode == MIRROR_SYNC_MODE_NONE;
-    base = mode == MIRROR_SYNC_MODE_TOP ? bs->backing_hd : NULL;
+    base = mode == MIRROR_SYNC_MODE_TOP ? backing_bs(bs) : NULL;
    mirror_start_job(bs, target, replaces,
                     speed, granularity, buf_size,
                     on_source_error, on_target_error, unmap, cb, opaque, errp,
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -43,7 +43,6 @@

 typedef struct BDRVNBDState {
    NbdClientSession client;
-    QemuOpts *socket_opts;
 } BDRVNBDState;

 static int nbd_parse_uri(const char *filename, QDict *options)
@@ -190,10 +189,10 @@ out:
    g_free(file);
 }

-static void nbd_config(BDRVNBDState *s, QDict *options, char **export,
-                       Error **errp)
+static SocketAddress *nbd_config(BDRVNBDState *s, QDict *options, char **export,
+                                 Error **errp)
 {
-    Error *local_err = NULL;
+    SocketAddress *saddr;

    if (qdict_haskey(options, "path") == qdict_haskey(options, "host")) {
        if (qdict_haskey(options, "path")) {
@@ -201,28 +200,37 @@ static void nbd_config(BDRVNBDState *s, QDict *options, char **export,
        } else {
            error_setg(errp, "one of path and host must be specified.");
        }
-        return;
+        return NULL;
    }

-    s->client.is_unix = qdict_haskey(options, "path");
-    s->socket_opts = qemu_opts_create(&socket_optslist, NULL, 0,
-                                      &error_abort);
+    saddr = g_new0(SocketAddress, 1);

-    qemu_opts_absorb_qdict(s->socket_opts, options, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        return;
+    if (qdict_haskey(options, "path")) {
+        saddr->kind = SOCKET_ADDRESS_KIND_UNIX;
+        saddr->q_unix = g_new0(UnixSocketAddress, 1);
+        saddr->q_unix->path = g_strdup(qdict_get_str(options, "path"));
+        qdict_del(options, "path");
+    } else {
+        saddr->kind = SOCKET_ADDRESS_KIND_INET;
+        saddr->inet = g_new0(InetSocketAddress, 1);
+        saddr->inet->host = g_strdup(qdict_get_str(options, "host"));
+        if (!qdict_get_try_str(options, "port")) {
+            saddr->inet->port = g_strdup_printf("%d", NBD_DEFAULT_PORT);
+        } else {
+            saddr->inet->port = g_strdup(qdict_get_str(options, "port"));
+        }
+        qdict_del(options, "host");
+        qdict_del(options, "port");
    }

-    if (!qemu_opt_get(s->socket_opts, "port")) {
-        qemu_opt_set_number(s->socket_opts, "port", NBD_DEFAULT_PORT,
-                            &error_abort);
-    }
+    s->client.is_unix = saddr->kind == SOCKET_ADDRESS_KIND_UNIX;

    *export = g_strdup(qdict_get_try_str(options, "export"));
    if (*export) {
        qdict_del(options, "export");
    }
+
+    return saddr;
 }

 NbdClientSession *nbd_get_client_session(BlockDriverState *bs)
@@ -231,26 +239,24 @@ NbdClientSession *nbd_get_client_session(BlockDriverState *bs)
    return &s->client;
 }

-static int nbd_establish_connection(BlockDriverState *bs, Error **errp)
+static int nbd_establish_connection(BlockDriverState *bs,
+                                    SocketAddress *saddr,
+                                    Error **errp)
 {
    BDRVNBDState *s = bs->opaque;
    int sock;

-    if (s->client.is_unix) {
-        sock = unix_connect_opts(s->socket_opts, errp, NULL, NULL);
-    } else {
-        sock = inet_connect_opts(s->socket_opts, errp, NULL, NULL);
-        if (sock >= 0) {
-            socket_set_nodelay(sock);
-        }
-    }
+    sock = socket_connect(saddr, errp, NULL, NULL);

-    /* Failed to establish connection */
    if (sock < 0) {
        logout("Failed to establish connection to NBD server\n");
        return -EIO;
    }

+    if (!s->client.is_unix) {
+        socket_set_nodelay(sock);
+    }
+
    return sock;
 }

@@ -260,19 +266,19 @@ static int nbd_open(BlockDriverState *bs, QDict *options, int flags,
    BDRVNBDState *s = bs->opaque;
    char *export = NULL;
    int result, sock;
-    Error *local_err = NULL;
+    SocketAddress *saddr;

    /* Pop the config into our state object. Exit if invalid. */
-    nbd_config(s, options, &export, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
+    saddr = nbd_config(s, options, &export, errp);
+    if (!saddr) {
        return -EINVAL;
    }

    /* establish TCP connection, return error if it fails
     * TODO: Configurable retry-until-timeout behaviour.
     */
-    sock = nbd_establish_connection(bs, errp);
+    sock = nbd_establish_connection(bs, saddr, errp);
+    qapi_free_SocketAddress(saddr);
    if (sock < 0) {
        g_free(export);
        return sock;
@@ -315,9 +321,6 @@ static int nbd_co_discard(BlockDriverState *bs, int64_t sector_num,

 static void nbd_close(BlockDriverState *bs)
 {
-    BDRVNBDState *s = bs->opaque;
-
-    qemu_opts_del(s->socket_opts);
    nbd_client_close(bs);
 }

--- a/block/nfs.c
+++ b/block/nfs.c
@@ -43,6 +43,7 @@ typedef struct NFSClient {
    int events;
    bool has_zero_init;
    AioContext *aio_context;
+    blkcnt_t st_blocks;
 } NFSClient;

 typedef struct NFSRPC {
@@ -374,6 +375,7 @@ static int64_t nfs_client_open(NFSClient *client, const char *filename,
    }

    ret = DIV_ROUND_UP(st.st_size, BDRV_SECTOR_SIZE);
+    client->st_blocks = st.st_blocks;
    client->has_zero_init = S_ISREG(st.st_mode);
    goto out;
 fail:
@@ -464,6 +466,11 @@ static int64_t nfs_get_allocated_file_size(BlockDriverState *bs)
    NFSRPC task = {0};
    struct stat st;

+    if (bdrv_is_read_only(bs) &&
+        !(bs->open_flags & BDRV_O_NOCACHE)) {
+        return client->st_blocks * 512;
+    }
+
    task.st = &st;
    if (nfs_fstat_async(client->context, client->fh, nfs_co_generic_cb,
                        &task) != 0) {
@@ -475,7 +482,7 @@ static int64_t nfs_get_allocated_file_size(BlockDriverState *bs)
        aio_poll(client->aio_context, true);
    }

-    return (task.ret < 0 ? task.ret : st.st_blocks * st.st_blksize);
+    return (task.ret < 0 ? task.ret : st.st_blocks * 512);
 }

 static int nfs_file_truncate(BlockDriverState *bs, int64_t offset)
@@ -484,6 +491,34 @@ static int nfs_file_truncate(BlockDriverState *bs, int64_t offset)
    return nfs_ftruncate(client->context, client->fh, offset);
 }

+/* Note that this will not re-establish a connection with the NFS server
+ * - it is effectively a NOP.  */
+static int nfs_reopen_prepare(BDRVReopenState *state,
+                              BlockReopenQueue *queue, Error **errp)
+{
+    NFSClient *client = state->bs->opaque;
+    struct stat st;
+    int ret = 0;
+
+    if (state->flags & BDRV_O_RDWR && bdrv_is_read_only(state->bs)) {
+        error_setg(errp, "Cannot open a read-only mount as read-write");
+        return -EACCES;
+    }
+
+    /* Update cache for read-only reopens */
+    if (!(state->flags & BDRV_O_RDWR)) {
+        ret = nfs_fstat(client->context, client->fh, &st);
+        if (ret < 0) {
+            error_setg(errp, "Failed to fstat file: %s",
+                       nfs_get_error(client->context));
+            return ret;
+        }
+        client->st_blocks = st.st_blocks;
+    }
+
+    return 0;
+}
+
 static BlockDriver bdrv_nfs = {
    .format_name                    = "nfs",
    .protocol_name                  = "nfs",
@@ -499,6 +534,7 @@ static BlockDriver bdrv_nfs = {
    .bdrv_file_open                 = nfs_file_open,
    .bdrv_close                     = nfs_file_close,
    .bdrv_create                    = nfs_file_create,
+    .bdrv_reopen_prepare            = nfs_reopen_prepare,

    .bdrv_co_readv                  = nfs_co_readv,
    .bdrv_co_writev                 = nfs_co_writev,
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -202,13 +202,13 @@ static int64_t allocate_clusters(BlockDriverState *bs, int64_t sector_num,

    to_allocate = (sector_num + *pnum + s->tracks - 1) / s->tracks - idx;
    space = to_allocate * s->tracks;
-    if (s->data_end + space > bdrv_getlength(bs->file) >> BDRV_SECTOR_BITS) {
+    if (s->data_end + space > bdrv_getlength(bs->file->bs) >> BDRV_SECTOR_BITS) {
        int ret;
        space += s->prealloc_size;
        if (s->prealloc_mode == PRL_PREALLOC_MODE_FALLOCATE) {
-            ret = bdrv_write_zeroes(bs->file, s->data_end, space, 0);
+            ret = bdrv_write_zeroes(bs->file->bs, s->data_end, space, 0);
        } else {
-            ret = bdrv_truncate(bs->file,
+            ret = bdrv_truncate(bs->file->bs,
                                (s->data_end + space) << BDRV_SECTOR_BITS);
        }
        if (ret < 0) {
@@ -244,7 +244,8 @@ static coroutine_fn int parallels_co_flush_to_os(BlockDriverState *bs)
        if (off + to_write > s->header_size) {
            to_write = s->header_size - off;
        }
-        ret = bdrv_pwrite(bs->file, off, (uint8_t *)s->header + off, to_write);
+        ret = bdrv_pwrite(bs->file->bs, off, (uint8_t *)s->header + off,
+                          to_write);
        if (ret < 0) {
            qemu_co_mutex_unlock(&s->lock);
            return ret;
@@ -303,7 +304,7 @@ static coroutine_fn int parallels_co_writev(BlockDriverState *bs,
        qemu_iovec_reset(&hd_qiov);
        qemu_iovec_concat(&hd_qiov, qiov, bytes_done, nbytes);

-        ret = bdrv_co_writev(bs->file, position, n, &hd_qiov);
+        ret = bdrv_co_writev(bs->file->bs, position, n, &hd_qiov);
        if (ret < 0) {
            break;
        }
@@ -343,7 +344,7 @@ static coroutine_fn int parallels_co_readv(BlockDriverState *bs,
            qemu_iovec_reset(&hd_qiov);
            qemu_iovec_concat(&hd_qiov, qiov, bytes_done, nbytes);

-            ret = bdrv_co_readv(bs->file, position, n, &hd_qiov);
+            ret = bdrv_co_readv(bs->file->bs, position, n, &hd_qiov);
            if (ret < 0) {
                break;
            }
@@ -369,7 +370,7 @@ static int parallels_check(BlockDriverState *bs, BdrvCheckResult *res,
    bool flush_bat = false;
    int cluster_size = s->tracks << BDRV_SECTOR_BITS;

-    size = bdrv_getlength(bs->file);
+    size = bdrv_getlength(bs->file->bs);
    if (size < 0) {
        res->check_errors++;
        return size;
@@ -424,7 +425,7 @@ static int parallels_check(BlockDriverState *bs, BdrvCheckResult *res,
    }

    if (flush_bat) {
-        ret = bdrv_pwrite_sync(bs->file, 0, s->header, s->header_size);
+        ret = bdrv_pwrite_sync(bs->file->bs, 0, s->header, s->header_size);
        if (ret < 0) {
            res->check_errors++;
            return ret;
@@ -440,7 +441,7 @@ static int parallels_check(BlockDriverState *bs, BdrvCheckResult *res,
                size - res->image_end_offset);
        res->leaks += count;
        if (fix & BDRV_FIX_LEAKS) {
-            ret = bdrv_truncate(bs->file, res->image_end_offset);
+            ret = bdrv_truncate(bs->file->bs, res->image_end_offset);
            if (ret < 0) {
                res->check_errors++;
                return ret;
@@ -546,12 +547,13 @@ static int parallels_probe(const uint8_t *buf, int buf_size,
 static int parallels_update_header(BlockDriverState *bs)
 {
    BDRVParallelsState *s = bs->opaque;
-    unsigned size = MAX(bdrv_opt_mem_align(bs->file), sizeof(ParallelsHeader));
+    unsigned size = MAX(bdrv_opt_mem_align(bs->file->bs),
+                        sizeof(ParallelsHeader));

    if (size > s->header_size) {
        size = s->header_size;
    }
-    return bdrv_pwrite_sync(bs->file, 0, s->header, size);
+    return bdrv_pwrite_sync(bs->file->bs, 0, s->header, size);
 }

 static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
@@ -564,7 +566,7 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
    Error *local_err = NULL;
    char *buf;

-    ret = bdrv_pread(bs->file, 0, &ph, sizeof(ph));
+    ret = bdrv_pread(bs->file->bs, 0, &ph, sizeof(ph));
    if (ret < 0) {
        goto fail;
    }
@@ -603,8 +605,8 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
    }

    size = bat_entry_off(s->bat_size);
-    s->header_size = ROUND_UP(size, bdrv_opt_mem_align(bs->file));
-    s->header = qemu_try_blockalign(bs->file, s->header_size);
+    s->header_size = ROUND_UP(size, bdrv_opt_mem_align(bs->file->bs));
+    s->header = qemu_try_blockalign(bs->file->bs, s->header_size);
    if (s->header == NULL) {
        ret = -ENOMEM;
        goto fail;
@@ -619,7 +621,7 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
        s->header_size = size;
    }

-    ret = bdrv_pread(bs->file, 0, s->header, s->header_size);
+    ret = bdrv_pread(bs->file->bs, 0, s->header, s->header_size);
    if (ret < 0) {
        goto fail;
    }
@@ -663,8 +665,8 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
    if (local_err != NULL) {
        goto fail_options;
    }
-    if (!bdrv_has_zero_init(bs->file) ||
-            bdrv_truncate(bs->file, bdrv_getlength(bs->file)) != 0) {
+    if (!bdrv_has_zero_init(bs->file->bs) ||
+            bdrv_truncate(bs->file->bs, bdrv_getlength(bs->file->bs)) != 0) {
        s->prealloc_mode = PRL_PREALLOC_MODE_FALLOCATE;
    }

@@ -707,7 +709,7 @@ static void parallels_close(BlockDriverState *bs)
    }

    if (bs->open_flags & BDRV_O_RDWR) {
-        bdrv_truncate(bs->file, s->data_end << BDRV_SECTOR_BITS);
+        bdrv_truncate(bs->file->bs, s->data_end << BDRV_SECTOR_BITS);
    }

    g_free(s->bat_dirty_bmap);
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -110,8 +110,8 @@ BlockDeviceInfo *bdrv_block_device_info(BlockDriverState *bs, Error **errp)
            qapi_free_BlockDeviceInfo(info);
            return NULL;
        }
-        if (bs0->drv && bs0->backing_hd) {
-            bs0 = bs0->backing_hd;
+        if (bs0->drv && bs0->backing) {
+            bs0 = bs0->backing->bs;
            (*p_image_info)->has_backing_image = true;
            p_image_info = &((*p_image_info)->backing_image);
        } else {
@@ -359,12 +359,12 @@ static BlockStats *bdrv_query_stats(const BlockDriverState *bs,

    if (bs->file) {
        s->has_parent = true;
-        s->parent = bdrv_query_stats(bs->file, query_backing);
+        s->parent = bdrv_query_stats(bs->file->bs, query_backing);
    }

-    if (query_backing && bs->backing_hd) {
+    if (query_backing && bs->backing) {
        s->has_backing = true;
-        s->backing = bdrv_query_stats(bs->backing_hd, query_backing);
+        s->backing = bdrv_query_stats(bs->backing->bs, query_backing);
    }

    return s;
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -100,7 +100,7 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
    int ret;
    QCowHeader header;

-    ret = bdrv_pread(bs->file, 0, &header, sizeof(header));
+    ret = bdrv_pread(bs->file->bs, 0, &header, sizeof(header));
    if (ret < 0) {
        goto fail;
    }
@@ -193,7 +193,7 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
        goto fail;
    }

-    ret = bdrv_pread(bs->file, s->l1_table_offset, s->l1_table,
+    ret = bdrv_pread(bs->file->bs, s->l1_table_offset, s->l1_table,
               s->l1_size * sizeof(uint64_t));
    if (ret < 0) {
        goto fail;
@@ -205,7 +205,7 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,

    /* alloc L2 cache (max. 64k * 16 * 8 = 8 MB) */
    s->l2_cache =
-        qemu_try_blockalign(bs->file,
+        qemu_try_blockalign(bs->file->bs,
                            s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t));
    if (s->l2_cache == NULL) {
        error_setg(errp, "Could not allocate L2 table cache");
@@ -224,7 +224,7 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
            ret = -EINVAL;
            goto fail;
        }
-        ret = bdrv_pread(bs->file, header.backing_file_offset,
+        ret = bdrv_pread(bs->file->bs, header.backing_file_offset,
                   bs->backing_file, len);
        if (ret < 0) {
            goto fail;
@@ -369,13 +369,13 @@ static uint64_t get_cluster_offset(BlockDriverState *bs,
        if (!allocate)
            return 0;
        /* allocate a new l2 entry */
-        l2_offset = bdrv_getlength(bs->file);
+        l2_offset = bdrv_getlength(bs->file->bs);
        /* round to cluster size */
        l2_offset = (l2_offset + s->cluster_size - 1) & ~(s->cluster_size - 1);
        /* update the L1 entry */
        s->l1_table[l1_index] = l2_offset;
        tmp = cpu_to_be64(l2_offset);
-        if (bdrv_pwrite_sync(bs->file,
+        if (bdrv_pwrite_sync(bs->file->bs,
                s->l1_table_offset + l1_index * sizeof(tmp),
                &tmp, sizeof(tmp)) < 0)
            return 0;
@@ -405,11 +405,12 @@ static uint64_t get_cluster_offset(BlockDriverState *bs,
    l2_table = s->l2_cache + (min_index << s->l2_bits);
    if (new_l2_table) {
        memset(l2_table, 0, s->l2_size * sizeof(uint64_t));
-        if (bdrv_pwrite_sync(bs->file, l2_offset, l2_table,
+        if (bdrv_pwrite_sync(bs->file->bs, l2_offset, l2_table,
                s->l2_size * sizeof(uint64_t)) < 0)
            return 0;
    } else {
-        if (bdrv_pread(bs->file, l2_offset, l2_table, s->l2_size * sizeof(uint64_t)) !=
+        if (bdrv_pread(bs->file->bs, l2_offset, l2_table,
+                       s->l2_size * sizeof(uint64_t)) !=
            s->l2_size * sizeof(uint64_t))
            return 0;
    }
@@ -430,20 +431,21 @@ static uint64_t get_cluster_offset(BlockDriverState *bs,
               overwritten */
            if (decompress_cluster(bs, cluster_offset) < 0)
                return 0;
-            cluster_offset = bdrv_getlength(bs->file);
+            cluster_offset = bdrv_getlength(bs->file->bs);
            cluster_offset = (cluster_offset + s->cluster_size - 1) &
                ~(s->cluster_size - 1);
            /* write the cluster content */
-            if (bdrv_pwrite(bs->file, cluster_offset, s->cluster_cache, s->cluster_size) !=
+            if (bdrv_pwrite(bs->file->bs, cluster_offset, s->cluster_cache,
+                            s->cluster_size) !=
                s->cluster_size)
                return -1;
        } else {
-            cluster_offset = bdrv_getlength(bs->file);
+            cluster_offset = bdrv_getlength(bs->file->bs);
            if (allocate == 1) {
                /* round to cluster size */
                cluster_offset = (cluster_offset + s->cluster_size - 1) &
                    ~(s->cluster_size - 1);
-                bdrv_truncate(bs->file, cluster_offset + s->cluster_size);
+                bdrv_truncate(bs->file->bs, cluster_offset + s->cluster_size);
                /* if encrypted, we must initialize the cluster
                   content which won't be written */
                if (bs->encrypted &&
@@ -463,7 +465,8 @@ static uint64_t get_cluster_offset(BlockDriverState *bs,
                                errno = EIO;
                                return -1;
                            }
-                            if (bdrv_pwrite(bs->file, cluster_offset + i * 512,
+                            if (bdrv_pwrite(bs->file->bs,
+                                            cluster_offset + i * 512,
                                            s->cluster_data, 512) != 512)
                                return -1;
                        }
@@ -477,7 +480,7 @@ static uint64_t get_cluster_offset(BlockDriverState *bs,
        /* update L2 table */
        tmp = cpu_to_be64(cluster_offset);
        l2_table[l2_index] = tmp;
-        if (bdrv_pwrite_sync(bs->file, l2_offset + l2_index * sizeof(tmp),
+        if (bdrv_pwrite_sync(bs->file->bs, l2_offset + l2_index * sizeof(tmp),
                &tmp, sizeof(tmp)) < 0)
            return 0;
    }
@@ -546,7 +549,7 @@ static int decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset)
    if (s->cluster_cache_offset != coffset) {
        csize = cluster_offset >> (63 - s->cluster_bits);
        csize &= (s->cluster_size - 1);
-        ret = bdrv_pread(bs->file, coffset, s->cluster_data, csize);
+        ret = bdrv_pread(bs->file->bs, coffset, s->cluster_data, csize);
        if (ret != csize)
            return -1;
        if (decompress_buffer(s->cluster_cache, s->cluster_size,
@@ -594,13 +597,13 @@ static coroutine_fn int qcow_co_readv(BlockDriverState *bs, int64_t sector_num,
        }

        if (!cluster_offset) {
-            if (bs->backing_hd) {
+            if (bs->backing) {
                /* read from the base image */
                hd_iov.iov_base = (void *)buf;
                hd_iov.iov_len = n * 512;
                qemu_iovec_init_external(&hd_qiov, &hd_iov, 1);
                qemu_co_mutex_unlock(&s->lock);
-                ret = bdrv_co_readv(bs->backing_hd, sector_num,
+                ret = bdrv_co_readv(bs->backing->bs, sector_num,
                                    n, &hd_qiov);
                qemu_co_mutex_lock(&s->lock);
                if (ret < 0) {
@@ -625,7 +628,7 @@ static coroutine_fn int qcow_co_readv(BlockDriverState *bs, int64_t sector_num,
            hd_iov.iov_len = n * 512;
            qemu_iovec_init_external(&hd_qiov, &hd_iov, 1);
            qemu_co_mutex_unlock(&s->lock);
-            ret = bdrv_co_readv(bs->file,
+            ret = bdrv_co_readv(bs->file->bs,
                                (cluster_offset >> 9) + index_in_cluster,
                                n, &hd_qiov);
            qemu_co_mutex_lock(&s->lock);
@@ -727,7 +730,7 @@ static coroutine_fn int qcow_co_writev(BlockDriverState *bs, int64_t sector_num,
        hd_iov.iov_len = n * 512;
        qemu_iovec_init_external(&hd_qiov, &hd_iov, 1);
        qemu_co_mutex_unlock(&s->lock);
-        ret = bdrv_co_writev(bs->file,
+        ret = bdrv_co_writev(bs->file->bs,
                             (cluster_offset >> 9) + index_in_cluster,
                             n, &hd_qiov);
        qemu_co_mutex_lock(&s->lock);
@@ -879,10 +882,10 @@ static int qcow_make_empty(BlockDriverState *bs)
    int ret;

    memset(s->l1_table, 0, l1_length);
-    if (bdrv_pwrite_sync(bs->file, s->l1_table_offset, s->l1_table,
+    if (bdrv_pwrite_sync(bs->file->bs, s->l1_table_offset, s->l1_table,
            l1_length) < 0)
        return -1;
-    ret = bdrv_truncate(bs->file, s->l1_table_offset + l1_length);
+    ret = bdrv_truncate(bs->file->bs, s->l1_table_offset + l1_length);
    if (ret < 0)
        return ret;

@@ -962,7 +965,7 @@ static int qcow_write_compressed(BlockDriverState *bs, int64_t sector_num,
        }

        cluster_offset &= s->cluster_offset_mask;
-        ret = bdrv_pwrite(bs->file, cluster_offset, out_buf, out_len);
+        ret = bdrv_pwrite(bs->file->bs, cluster_offset, out_buf, out_len);
        if (ret < 0) {
            goto fail;
        }
--- a/block/qcow2-cache.c
+++ b/block/qcow2-cache.c
@@ -127,7 +127,7 @@ Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables)
    c = g_new0(Qcow2Cache, 1);
    c->size = num_tables;
    c->entries = g_try_new0(Qcow2CachedTable, num_tables);
-    c->table_array = qemu_try_blockalign(bs->file,
+    c->table_array = qemu_try_blockalign(bs->file->bs,
                                         (size_t) num_tables * s->cluster_size);

    if (!c->entries || !c->table_array) {
@@ -185,7 +185,7 @@ static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i)
    if (c->depends) {
        ret = qcow2_cache_flush_dependency(bs, c);
    } else if (c->depends_on_flush) {
-        ret = bdrv_flush(bs->file);
+        ret = bdrv_flush(bs->file->bs);
        if (ret >= 0) {
            c->depends_on_flush = false;
        }
@@ -216,7 +216,7 @@ static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i)
        BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE);
    }

-    ret = bdrv_pwrite(bs->file, c->entries[i].offset,
+    ret = bdrv_pwrite(bs->file->bs, c->entries[i].offset,
                      qcow2_cache_get_table_addr(bs, c, i), s->cluster_size);
    if (ret < 0) {
        return ret;
@@ -244,7 +244,7 @@ int qcow2_cache_flush(BlockDriverState *bs, Qcow2Cache *c)
    }

    if (result == 0) {
-        ret = bdrv_flush(bs->file);
+        ret = bdrv_flush(bs->file->bs);
        if (ret < 0) {
            result = ret;
        }
@@ -356,7 +356,8 @@ static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c,
            BLKDBG_EVENT(bs->file, BLKDBG_L2_LOAD);
        }

-        ret = bdrv_pread(bs->file, offset, qcow2_cache_get_table_addr(bs, c, i),
+        ret = bdrv_pread(bs->file->bs, offset,
+                         qcow2_cache_get_table_addr(bs, c, i),
                         s->cluster_size);
        if (ret < 0) {
            return ret;
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -72,7 +72,7 @@ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
 #endif

    new_l1_size2 = sizeof(uint64_t) * new_l1_size;
-    new_l1_table = qemu_try_blockalign(bs->file,
+    new_l1_table = qemu_try_blockalign(bs->file->bs,
                                       align_offset(new_l1_size2, 512));
    if (new_l1_table == NULL) {
        return -ENOMEM;
@@ -105,7 +105,8 @@ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
    BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_WRITE_TABLE);
    for(i = 0; i < s->l1_size; i++)
        new_l1_table[i] = cpu_to_be64(new_l1_table[i]);
-    ret = bdrv_pwrite_sync(bs->file, new_l1_table_offset, new_l1_table, new_l1_size2);
+    ret = bdrv_pwrite_sync(bs->file->bs, new_l1_table_offset,
+                           new_l1_table, new_l1_size2);
    if (ret < 0)
        goto fail;
    for(i = 0; i < s->l1_size; i++)
@@ -115,7 +116,8 @@ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
    BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_ACTIVATE_TABLE);
    cpu_to_be32w((uint32_t*)data, new_l1_size);
    stq_be_p(data + 4, new_l1_table_offset);
-    ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, l1_size), data,sizeof(data));
+    ret = bdrv_pwrite_sync(bs->file->bs, offsetof(QCowHeader, l1_size),
+                           data, sizeof(data));
    if (ret < 0) {
        goto fail;
    }
@@ -182,8 +184,9 @@ int qcow2_write_l1_entry(BlockDriverState *bs, int l1_index)
    }

    BLKDBG_EVENT(bs->file, BLKDBG_L1_UPDATE);
-    ret = bdrv_pwrite_sync(bs->file, s->l1_table_offset + 8 * l1_start_index,
-        buf, sizeof(buf));
+    ret = bdrv_pwrite_sync(bs->file->bs,
+                           s->l1_table_offset + 8 * l1_start_index,
+                           buf, sizeof(buf));
    if (ret < 0) {
        return ret;
    }
@@ -440,7 +443,8 @@ static int coroutine_fn copy_sectors(BlockDriverState *bs,
    }

    BLKDBG_EVENT(bs->file, BLKDBG_COW_WRITE);
-    ret = bdrv_co_writev(bs->file, (cluster_offset >> 9) + n_start, n, &qiov);
+    ret = bdrv_co_writev(bs->file->bs, (cluster_offset >> 9) + n_start, n,
+                         &qiov);
    if (ret < 0) {
        goto out;
    }
@@ -817,7 +821,6 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)

    /*
     * If this was a COW, we need to decrease the refcount of the old cluster.
-     * Also flush bs->file to get the right order for L2 and refcount update.
     *
     * Don't discard clusters that reach a refcount of 0 (e.g. compressed
     * clusters), the next write will reuse them anyway.
@@ -1412,7 +1415,8 @@ int qcow2_decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset)
        sector_offset = coffset & 511;
        csize = nb_csectors * 512 - sector_offset;
        BLKDBG_EVENT(bs->file, BLKDBG_READ_COMPRESSED);
-        ret = bdrv_read(bs->file, coffset >> 9, s->cluster_data, nb_csectors);
+        ret = bdrv_read(bs->file->bs, coffset >> 9, s->cluster_data,
+                        nb_csectors);
        if (ret < 0) {
            return ret;
        }
@@ -1469,7 +1473,7 @@ static int discard_single_l2(BlockDriverState *bs, uint64_t offset,
         */
        switch (qcow2_get_cluster_type(old_l2_entry)) {
            case QCOW2_CLUSTER_UNALLOCATED:
-                if (full_discard || !bs->backing_hd) {
+                if (full_discard || !bs->backing) {
                    continue;
                }
                break;
@@ -1645,7 +1649,7 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
    if (!is_active_l1) {
        /* inactive L2 tables require a buffer to be stored in when loading
         * them from disk */
-        l2_table = qemu_try_blockalign(bs->file, s->cluster_size);
+        l2_table = qemu_try_blockalign(bs->file->bs, s->cluster_size);
        if (l2_table == NULL) {
            return -ENOMEM;
        }
@@ -1679,8 +1683,8 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
                    (void **)&l2_table);
        } else {
            /* load inactive L2 tables from disk */
-            ret = bdrv_read(bs->file, l2_offset / BDRV_SECTOR_SIZE,
-                    (void *)l2_table, s->cluster_sectors);
+            ret = bdrv_read(bs->file->bs, l2_offset / BDRV_SECTOR_SIZE,
+                            (void *)l2_table, s->cluster_sectors);
        }
        if (ret < 0) {
            goto fail;
@@ -1703,7 +1707,7 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
            }

            if (!preallocated) {
-                if (!bs->backing_hd) {
+                if (!bs->backing) {
                    /* not backed; therefore we can simply deallocate the
                     * cluster */
                    l2_table[j] = 0;
@@ -1754,7 +1758,7 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
                goto fail;
            }

-            ret = bdrv_write_zeroes(bs->file, offset / BDRV_SECTOR_SIZE,
+            ret = bdrv_write_zeroes(bs->file->bs, offset / BDRV_SECTOR_SIZE,
                                    s->cluster_sectors, 0);
            if (ret < 0) {
                if (!preallocated) {
@@ -1787,8 +1791,8 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
                    goto fail;
                }

-                ret = bdrv_write(bs->file, l2_offset / BDRV_SECTOR_SIZE,
-                        (void *)l2_table, s->cluster_sectors);
+                ret = bdrv_write(bs->file->bs, l2_offset / BDRV_SECTOR_SIZE,
+                                 (void *)l2_table, s->cluster_sectors);
                if (ret < 0) {
                    goto fail;
                }
@@ -1861,8 +1865,9 @@ int qcow2_expand_zero_clusters(BlockDriverState *bs,

        l1_table = g_realloc(l1_table, l1_sectors * BDRV_SECTOR_SIZE);

-        ret = bdrv_read(bs->file, s->snapshots[i].l1_table_offset /
-                BDRV_SECTOR_SIZE, (void *)l1_table, l1_sectors);
+        ret = bdrv_read(bs->file->bs,
+                        s->snapshots[i].l1_table_offset / BDRV_SECTOR_SIZE,
+                        (void *)l1_table, l1_sectors);
        if (ret < 0) {
            goto fail;
        }
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -101,7 +101,7 @@ int qcow2_refcount_init(BlockDriverState *bs)
            goto fail;
        }
        BLKDBG_EVENT(bs->file, BLKDBG_REFTABLE_LOAD);
-        ret = bdrv_pread(bs->file, s->refcount_table_offset,
+        ret = bdrv_pread(bs->file->bs, s->refcount_table_offset,
                         s->refcount_table, refcount_table_size2);
        if (ret < 0) {
            goto fail;
@@ -431,7 +431,7 @@ static int alloc_refcount_block(BlockDriverState *bs,
    if (refcount_table_index < s->refcount_table_size) {
        uint64_t data64 = cpu_to_be64(new_block);
        BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_HOOKUP);
-        ret = bdrv_pwrite_sync(bs->file,
+        ret = bdrv_pwrite_sync(bs->file->bs,
            s->refcount_table_offset + refcount_table_index * sizeof(uint64_t),
            &data64, sizeof(data64));
        if (ret < 0) {
@@ -535,7 +535,7 @@ static int alloc_refcount_block(BlockDriverState *bs,

    /* Write refcount blocks to disk */
    BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_WRITE_BLOCKS);
-    ret = bdrv_pwrite_sync(bs->file, meta_offset, new_blocks,
+    ret = bdrv_pwrite_sync(bs->file->bs, meta_offset, new_blocks,
        blocks_clusters * s->cluster_size);
    g_free(new_blocks);
    new_blocks = NULL;
@@ -549,7 +549,7 @@ static int alloc_refcount_block(BlockDriverState *bs,
    }

    BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_WRITE_TABLE);
-    ret = bdrv_pwrite_sync(bs->file, table_offset, new_table,
+    ret = bdrv_pwrite_sync(bs->file->bs, table_offset, new_table,
        table_size * sizeof(uint64_t));
    if (ret < 0) {
        goto fail_table;
@@ -564,8 +564,9 @@ static int alloc_refcount_block(BlockDriverState *bs,
    cpu_to_be64w((uint64_t*)data, table_offset);
    cpu_to_be32w((uint32_t*)(data + 8), table_clusters);
    BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_SWITCH_TABLE);
-    ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, refcount_table_offset),
-        data, sizeof(data));
+    ret = bdrv_pwrite_sync(bs->file->bs,
+                           offsetof(QCowHeader, refcount_table_offset),
+                           data, sizeof(data));
    if (ret < 0) {
        goto fail_table;
    }
@@ -613,7 +614,7 @@ void qcow2_process_discards(BlockDriverState *bs, int ret)

        /* Discard is optional, ignore the return value */
        if (ret >= 0) {
-            bdrv_discard(bs->file,
+            bdrv_discard(bs->file->bs,
                         d->offset >> BDRV_SECTOR_BITS,
                         d->bytes >> BDRV_SECTOR_BITS);
        }
@@ -1068,7 +1069,7 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
        }
        l1_allocated = true;

-        ret = bdrv_pread(bs->file, l1_table_offset, l1_table, l1_size2);
+        ret = bdrv_pread(bs->file->bs, l1_table_offset, l1_table, l1_size2);
        if (ret < 0) {
            goto fail;
        }
@@ -1221,7 +1222,8 @@ fail:
            cpu_to_be64s(&l1_table[i]);
        }

-        ret = bdrv_pwrite_sync(bs->file, l1_table_offset, l1_table, l1_size2);
+        ret = bdrv_pwrite_sync(bs->file->bs, l1_table_offset,
+                               l1_table, l1_size2);

        for (i = 0; i < l1_size; i++) {
            be64_to_cpus(&l1_table[i]);
@@ -1376,7 +1378,7 @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
    l2_size = s->l2_size * sizeof(uint64_t);
    l2_table = g_malloc(l2_size);

-    ret = bdrv_pread(bs->file, l2_offset, l2_table, l2_size);
+    ret = bdrv_pread(bs->file->bs, l2_offset, l2_table, l2_size);
    if (ret < 0) {
        fprintf(stderr, "ERROR: I/O error in check_refcounts_l2\n");
        res->check_errors++;
@@ -1508,7 +1510,7 @@ static int check_refcounts_l1(BlockDriverState *bs,
            res->check_errors++;
            goto fail;
        }
-        ret = bdrv_pread(bs->file, l1_table_offset, l1_table, l1_size2);
+        ret = bdrv_pread(bs->file->bs, l1_table_offset, l1_table, l1_size2);
        if (ret < 0) {
            fprintf(stderr, "ERROR: I/O error in check_refcounts_l1\n");
            res->check_errors++;
@@ -1606,7 +1608,7 @@ static int check_oflag_copied(BlockDriverState *bs, BdrvCheckResult *res,
            }
        }

-        ret = bdrv_pread(bs->file, l2_offset, l2_table,
+        ret = bdrv_pread(bs->file->bs, l2_offset, l2_table,
                         s->l2_size * sizeof(uint64_t));
        if (ret < 0) {
            fprintf(stderr, "ERROR: Could not read L2 table: %s\n",
@@ -1658,7 +1660,8 @@ static int check_oflag_copied(BlockDriverState *bs, BdrvCheckResult *res,
                goto fail;
            }

-            ret = bdrv_pwrite(bs->file, l2_offset, l2_table, s->cluster_size);
+            ret = bdrv_pwrite(bs->file->bs, l2_offset, l2_table,
+                              s->cluster_size);
            if (ret < 0) {
                fprintf(stderr, "ERROR: Could not write L2 table: %s\n",
                        strerror(-ret));
@@ -1713,11 +1716,11 @@ static int check_refblocks(BlockDriverState *bs, BdrvCheckResult *res,
                    goto resize_fail;
                }

-                ret = bdrv_truncate(bs->file, offset + s->cluster_size);
+                ret = bdrv_truncate(bs->file->bs, offset + s->cluster_size);
                if (ret < 0) {
                    goto resize_fail;
                }
-                size = bdrv_getlength(bs->file);
+                size = bdrv_getlength(bs->file->bs);
                if (size < 0) {
                    ret = size;
                    goto resize_fail;
@@ -2091,7 +2094,7 @@ write_refblocks:
        on_disk_refblock = (void *)((char *) *refcount_table +
                                    refblock_index * s->cluster_size);

-        ret = bdrv_write(bs->file, refblock_offset / BDRV_SECTOR_SIZE,
+        ret = bdrv_write(bs->file->bs, refblock_offset / BDRV_SECTOR_SIZE,
                         on_disk_refblock, s->cluster_sectors);
        if (ret < 0) {
            fprintf(stderr, "ERROR writing refblock: %s\n", strerror(-ret));
@@ -2140,7 +2143,7 @@ write_refblocks:
    }

    assert(reftable_size < INT_MAX / sizeof(uint64_t));
-    ret = bdrv_pwrite(bs->file, reftable_offset, on_disk_reftable,
+    ret = bdrv_pwrite(bs->file->bs, reftable_offset, on_disk_reftable,
                      reftable_size * sizeof(uint64_t));
    if (ret < 0) {
        fprintf(stderr, "ERROR writing reftable: %s\n", strerror(-ret));
@@ -2152,8 +2155,8 @@ write_refblocks:
                 reftable_offset);
    cpu_to_be32w(&reftable_offset_and_clusters.reftable_clusters,
                 size_to_clusters(s, reftable_size * sizeof(uint64_t)));
-    ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader,
-                                              refcount_table_offset),
+    ret = bdrv_pwrite_sync(bs->file->bs, offsetof(QCowHeader,
+                                                  refcount_table_offset),
                           &reftable_offset_and_clusters,
                           sizeof(reftable_offset_and_clusters));
    if (ret < 0) {
@@ -2191,7 +2194,7 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
    bool rebuild = false;
    int ret;

-    size = bdrv_getlength(bs->file);
+    size = bdrv_getlength(bs->file->bs);
    if (size < 0) {
        res->check_errors++;
        return size;
@@ -2400,7 +2403,7 @@ int qcow2_check_metadata_overlap(BlockDriverState *bs, int ign, int64_t offset,
                return -ENOMEM;
            }

-            ret = bdrv_pread(bs->file, l1_ofs, l1, l1_sz2);
+            ret = bdrv_pread(bs->file->bs, l1_ofs, l1, l1_sz2);
            if (ret < 0) {
                g_free(l1);
                return ret;
--- a/block/qcow2-snapshot.c
+++ b/block/qcow2-snapshot.c
@@ -64,7 +64,7 @@ int qcow2_read_snapshots(BlockDriverState *bs)
    for(i = 0; i < s->nb_snapshots; i++) {
        /* Read statically sized part of the snapshot header */
        offset = align_offset(offset, 8);
-        ret = bdrv_pread(bs->file, offset, &h, sizeof(h));
+        ret = bdrv_pread(bs->file->bs, offset, &h, sizeof(h));
        if (ret < 0) {
            goto fail;
        }
@@ -83,7 +83,7 @@ int qcow2_read_snapshots(BlockDriverState *bs)
        name_size = be16_to_cpu(h.name_size);

        /* Read extra data */
-        ret = bdrv_pread(bs->file, offset, &extra,
+        ret = bdrv_pread(bs->file->bs, offset, &extra,
                         MIN(sizeof(extra), extra_data_size));
        if (ret < 0) {
            goto fail;
@@ -102,7 +102,7 @@ int qcow2_read_snapshots(BlockDriverState *bs)

        /* Read snapshot ID */
        sn->id_str = g_malloc(id_str_size + 1);
-        ret = bdrv_pread(bs->file, offset, sn->id_str, id_str_size);
+        ret = bdrv_pread(bs->file->bs, offset, sn->id_str, id_str_size);
        if (ret < 0) {
            goto fail;
        }
@@ -111,7 +111,7 @@ int qcow2_read_snapshots(BlockDriverState *bs)

        /* Read snapshot name */
        sn->name = g_malloc(name_size + 1);
-        ret = bdrv_pread(bs->file, offset, sn->name, name_size);
+        ret = bdrv_pread(bs->file->bs, offset, sn->name, name_size);
        if (ret < 0) {
            goto fail;
        }
@@ -214,25 +214,25 @@ static int qcow2_write_snapshots(BlockDriverState *bs)
        h.name_size = cpu_to_be16(name_size);
        offset = align_offset(offset, 8);

-        ret = bdrv_pwrite(bs->file, offset, &h, sizeof(h));
+        ret = bdrv_pwrite(bs->file->bs, offset, &h, sizeof(h));
        if (ret < 0) {
            goto fail;
        }
        offset += sizeof(h);

-        ret = bdrv_pwrite(bs->file, offset, &extra, sizeof(extra));
+        ret = bdrv_pwrite(bs->file->bs, offset, &extra, sizeof(extra));
        if (ret < 0) {
            goto fail;
        }
        offset += sizeof(extra);

-        ret = bdrv_pwrite(bs->file, offset, sn->id_str, id_str_size);
+        ret = bdrv_pwrite(bs->file->bs, offset, sn->id_str, id_str_size);
        if (ret < 0) {
            goto fail;
        }
        offset += id_str_size;

-        ret = bdrv_pwrite(bs->file, offset, sn->name, name_size);
+        ret = bdrv_pwrite(bs->file->bs, offset, sn->name, name_size);
        if (ret < 0) {
            goto fail;
        }
@@ -254,7 +254,7 @@ static int qcow2_write_snapshots(BlockDriverState *bs)
    header_data.nb_snapshots        = cpu_to_be32(s->nb_snapshots);
    header_data.snapshots_offset    = cpu_to_be64(snapshots_offset);

-    ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, nb_snapshots),
+    ret = bdrv_pwrite_sync(bs->file->bs, offsetof(QCowHeader, nb_snapshots),
                           &header_data, sizeof(header_data));
    if (ret < 0) {
        goto fail;
@@ -396,7 +396,7 @@ int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
        goto fail;
    }

-    ret = bdrv_pwrite(bs->file, sn->l1_table_offset, l1_table,
+    ret = bdrv_pwrite(bs->file->bs, sn->l1_table_offset, l1_table,
                      s->l1_size * sizeof(uint64_t));
    if (ret < 0) {
        goto fail;
@@ -509,7 +509,8 @@ int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
        goto fail;
    }

-    ret = bdrv_pread(bs->file, sn->l1_table_offset, sn_l1_table, sn_l1_bytes);
+    ret = bdrv_pread(bs->file->bs, sn->l1_table_offset,
+                     sn_l1_table, sn_l1_bytes);
    if (ret < 0) {
        goto fail;
    }
@@ -526,7 +527,7 @@ int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
        goto fail;
    }

-    ret = bdrv_pwrite_sync(bs->file, s->l1_table_offset, sn_l1_table,
+    ret = bdrv_pwrite_sync(bs->file->bs, s->l1_table_offset, sn_l1_table,
                           cur_l1_bytes);
    if (ret < 0) {
        goto fail;
@@ -706,13 +707,14 @@ int qcow2_snapshot_load_tmp(BlockDriverState *bs,
        return -EFBIG;
    }
    new_l1_bytes = sn->l1_size * sizeof(uint64_t);
-    new_l1_table = qemu_try_blockalign(bs->file,
+    new_l1_table = qemu_try_blockalign(bs->file->bs,
                                       align_offset(new_l1_bytes, 512));
    if (new_l1_table == NULL) {
        return -ENOMEM;
    }

-    ret = bdrv_pread(bs->file, sn->l1_table_offset, new_l1_table, new_l1_bytes);
+    ret = bdrv_pread(bs->file->bs, sn->l1_table_offset,
+                     new_l1_table, new_l1_bytes);
    if (ret < 0) {
        error_setg(errp, "Failed to read l1 table for snapshot");
        qemu_vfree(new_l1_table);
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -104,7 +104,7 @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
        printf("attempting to read extended header in offset %lu\n", offset);
 #endif

-        ret = bdrv_pread(bs->file, offset, &ext, sizeof(ext));
+        ret = bdrv_pread(bs->file->bs, offset, &ext, sizeof(ext));
        if (ret < 0) {
            error_setg_errno(errp, -ret, "qcow2_read_extension: ERROR: "
                             "pread fail from offset %" PRIu64, offset);
@@ -132,7 +132,7 @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
                           sizeof(bs->backing_format));
                return 2;
            }
-            ret = bdrv_pread(bs->file, offset, bs->backing_format, ext.len);
+            ret = bdrv_pread(bs->file->bs, offset, bs->backing_format, ext.len);
            if (ret < 0) {
                error_setg_errno(errp, -ret, "ERROR: ext_backing_format: "
                                 "Could not read format name");
@@ -148,7 +148,7 @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
        case QCOW2_EXT_MAGIC_FEATURE_TABLE:
            if (p_feature_table != NULL) {
                void* feature_table = g_malloc0(ext.len + 2 * sizeof(Qcow2Feature));
-                ret = bdrv_pread(bs->file, offset , feature_table, ext.len);
+                ret = bdrv_pread(bs->file->bs, offset , feature_table, ext.len);
                if (ret < 0) {
                    error_setg_errno(errp, -ret, "ERROR: ext_feature_table: "
                                     "Could not read table");
@@ -169,7 +169,7 @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
                uext->len = ext.len;
                QLIST_INSERT_HEAD(&s->unknown_header_ext, uext, next);

-                ret = bdrv_pread(bs->file, offset , uext->data, uext->len);
+                ret = bdrv_pread(bs->file->bs, offset , uext->data, uext->len);
                if (ret < 0) {
                    error_setg_errno(errp, -ret, "ERROR: unknown extension: "
                                     "Could not read data");
@@ -260,12 +260,12 @@ int qcow2_mark_dirty(BlockDriverState *bs)
    }

    val = cpu_to_be64(s->incompatible_features | QCOW2_INCOMPAT_DIRTY);
-    ret = bdrv_pwrite(bs->file, offsetof(QCowHeader, incompatible_features),
+    ret = bdrv_pwrite(bs->file->bs, offsetof(QCowHeader, incompatible_features),
                      &val, sizeof(val));
    if (ret < 0) {
        return ret;
    }
-    ret = bdrv_flush(bs->file);
+    ret = bdrv_flush(bs->file->bs);
    if (ret < 0) {
        return ret;
    }
@@ -828,7 +828,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
    uint64_t ext_end;
    uint64_t l1_vm_state_index;

-    ret = bdrv_pread(bs->file, 0, &header, sizeof(header));
+    ret = bdrv_pread(bs->file->bs, 0, &header, sizeof(header));
    if (ret < 0) {
        error_setg_errno(errp, -ret, "Could not read qcow2 header");
        goto fail;
@@ -903,7 +903,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
    if (header.header_length > sizeof(header)) {
        s->unknown_header_fields_size = header.header_length - sizeof(header);
        s->unknown_header_fields = g_malloc(s->unknown_header_fields_size);
-        ret = bdrv_pread(bs->file, sizeof(header), s->unknown_header_fields,
+        ret = bdrv_pread(bs->file->bs, sizeof(header), s->unknown_header_fields,
                         s->unknown_header_fields_size);
        if (ret < 0) {
            error_setg_errno(errp, -ret, "Could not read unknown qcow2 header "
@@ -1056,14 +1056,14 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,


    if (s->l1_size > 0) {
-        s->l1_table = qemu_try_blockalign(bs->file,
+        s->l1_table = qemu_try_blockalign(bs->file->bs,
            align_offset(s->l1_size * sizeof(uint64_t), 512));
        if (s->l1_table == NULL) {
            error_setg(errp, "Could not allocate L1 table");
            ret = -ENOMEM;
            goto fail;
        }
-        ret = bdrv_pread(bs->file, s->l1_table_offset, s->l1_table,
+        ret = bdrv_pread(bs->file->bs, s->l1_table_offset, s->l1_table,
                         s->l1_size * sizeof(uint64_t));
        if (ret < 0) {
            error_setg_errno(errp, -ret, "Could not read L1 table");
@@ -1082,7 +1082,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,

    s->cluster_cache = g_malloc(s->cluster_size);
    /* one more sector for decompressed data alignment */
-    s->cluster_data = qemu_try_blockalign(bs->file, QCOW_MAX_CRYPT_CLUSTERS
+    s->cluster_data = qemu_try_blockalign(bs->file->bs, QCOW_MAX_CRYPT_CLUSTERS
                                                    * s->cluster_size + 512);
    if (s->cluster_data == NULL) {
        error_setg(errp, "Could not allocate temporary cluster buffer");
@@ -1119,7 +1119,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
            ret = -EINVAL;
            goto fail;
        }
-        ret = bdrv_pread(bs->file, header.backing_file_offset,
+        ret = bdrv_pread(bs->file->bs, header.backing_file_offset,
                         bs->backing_file, len);
        if (ret < 0) {
            error_setg_errno(errp, -ret, "Could not read backing file name");
@@ -1369,9 +1369,9 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
        switch (ret) {
        case QCOW2_CLUSTER_UNALLOCATED:

-            if (bs->backing_hd) {
+            if (bs->backing) {
                /* read from the base image */
-                n1 = qcow2_backing_read1(bs->backing_hd, &hd_qiov,
+                n1 = qcow2_backing_read1(bs->backing->bs, &hd_qiov,
                    sector_num, cur_nr_sectors);
                if (n1 > 0) {
                    QEMUIOVector local_qiov;
@@ -1382,7 +1382,7 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,

                    BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO);
                    qemu_co_mutex_unlock(&s->lock);
-                    ret = bdrv_co_readv(bs->backing_hd, sector_num,
+                    ret = bdrv_co_readv(bs->backing->bs, sector_num,
                                        n1, &local_qiov);
                    qemu_co_mutex_lock(&s->lock);

@@ -1429,8 +1429,9 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
                 */
                if (!cluster_data) {
                    cluster_data =
-                        qemu_try_blockalign(bs->file, QCOW_MAX_CRYPT_CLUSTERS
-                                                      * s->cluster_size);
+                        qemu_try_blockalign(bs->file->bs,
+                                            QCOW_MAX_CRYPT_CLUSTERS
+                                            * s->cluster_size);
                    if (cluster_data == NULL) {
                        ret = -ENOMEM;
                        goto fail;
@@ -1446,7 +1447,7 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,

            BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
            qemu_co_mutex_unlock(&s->lock);
-            ret = bdrv_co_readv(bs->file,
+            ret = bdrv_co_readv(bs->file->bs,
                                (cluster_offset >> 9) + index_in_cluster,
                                cur_nr_sectors, &hd_qiov);
            qemu_co_mutex_lock(&s->lock);
@@ -1543,7 +1544,7 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
            Error *err = NULL;
            assert(s->cipher);
            if (!cluster_data) {
-                cluster_data = qemu_try_blockalign(bs->file,
+                cluster_data = qemu_try_blockalign(bs->file->bs,
                                                   QCOW_MAX_CRYPT_CLUSTERS
                                                   * s->cluster_size);
                if (cluster_data == NULL) {
@@ -1580,7 +1581,7 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
        BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
        trace_qcow2_writev_data(qemu_coroutine_self(),
                                (cluster_offset >> 9) + index_in_cluster);
-        ret = bdrv_co_writev(bs->file,
+        ret = bdrv_co_writev(bs->file->bs,
                             (cluster_offset >> 9) + index_in_cluster,
                             cur_nr_sectors, &hd_qiov);
        qemu_co_mutex_lock(&s->lock);
@@ -1703,7 +1704,7 @@ static void qcow2_invalidate_cache(BlockDriverState *bs, Error **errp)

    qcow2_close(bs);

-    bdrv_invalidate_cache(bs->file, &local_err);
+    bdrv_invalidate_cache(bs->file->bs, &local_err);
    if (local_err) {
        error_propagate(errp, local_err);
        return;
@@ -1911,7 +1912,7 @@ int qcow2_update_header(BlockDriverState *bs)
    }

    /* Write the new header */
-    ret = bdrv_pwrite(bs->file, 0, header, s->cluster_size);
+    ret = bdrv_pwrite(bs->file->bs, 0, header, s->cluster_size);
    if (ret < 0) {
        goto fail;
    }
@@ -1991,7 +1992,8 @@ static int preallocate(BlockDriverState *bs)
    if (host_offset != 0) {
        uint8_t buf[BDRV_SECTOR_SIZE];
        memset(buf, 0, BDRV_SECTOR_SIZE);
-        ret = bdrv_write(bs->file, (host_offset >> BDRV_SECTOR_BITS) + num - 1,
+        ret = bdrv_write(bs->file->bs,
+                         (host_offset >> BDRV_SECTOR_BITS) + num - 1,
                         buf, 1);
        if (ret < 0) {
            return ret;
@@ -2403,7 +2405,7 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset)

    /* write updated header.size */
    offset = cpu_to_be64(offset);
-    ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, size),
+    ret = bdrv_pwrite_sync(bs->file->bs, offsetof(QCowHeader, size),
                           &offset, sizeof(uint64_t));
    if (ret < 0) {
        return ret;
@@ -2427,8 +2429,8 @@ static int qcow2_write_compressed(BlockDriverState *bs, int64_t sector_num,
    if (nb_sectors == 0) {
        /* align end of file to a sector boundary to ease reading with
           sector based I/Os */
-        cluster_offset = bdrv_getlength(bs->file);
-        return bdrv_truncate(bs->file, cluster_offset);
+        cluster_offset = bdrv_getlength(bs->file->bs);
+        return bdrv_truncate(bs->file->bs, cluster_offset);
    }

    if (nb_sectors != s->cluster_sectors) {
@@ -2495,7 +2497,7 @@ static int qcow2_write_compressed(BlockDriverState *bs, int64_t sector_num,
        }

        BLKDBG_EVENT(bs->file, BLKDBG_WRITE_COMPRESSED);
-        ret = bdrv_pwrite(bs->file, cluster_offset, out_buf, out_len);
+        ret = bdrv_pwrite(bs->file->bs, cluster_offset, out_buf, out_len);
        if (ret < 0) {
            goto fail;
        }
@@ -2544,7 +2546,7 @@ static int make_completely_empty(BlockDriverState *bs)
    /* After this call, neither the in-memory nor the on-disk refcount
     * information accurately describe the actual references */

-    ret = bdrv_write_zeroes(bs->file, s->l1_table_offset / BDRV_SECTOR_SIZE,
+    ret = bdrv_write_zeroes(bs->file->bs, s->l1_table_offset / BDRV_SECTOR_SIZE,
                            l1_clusters * s->cluster_sectors, 0);
    if (ret < 0) {
        goto fail_broken_refcounts;
@@ -2558,7 +2560,7 @@ static int make_completely_empty(BlockDriverState *bs)
     * overwrite parts of the existing refcount and L1 table, which is not
     * an issue because the dirty flag is set, complete data loss is in fact
     * desired and partial data loss is consequently fine as well */
-    ret = bdrv_write_zeroes(bs->file, s->cluster_size / BDRV_SECTOR_SIZE,
+    ret = bdrv_write_zeroes(bs->file->bs, s->cluster_size / BDRV_SECTOR_SIZE,
                            (2 + l1_clusters) * s->cluster_size /
                            BDRV_SECTOR_SIZE, 0);
    /* This call (even if it failed overall) may have overwritten on-disk
@@ -2578,7 +2580,7 @@ static int make_completely_empty(BlockDriverState *bs)
    cpu_to_be64w(&l1_ofs_rt_ofs_cls.l1_offset, 3 * s->cluster_size);
    cpu_to_be64w(&l1_ofs_rt_ofs_cls.reftable_offset, s->cluster_size);
    cpu_to_be32w(&l1_ofs_rt_ofs_cls.reftable_clusters, 1);
-    ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, l1_table_offset),
+    ret = bdrv_pwrite_sync(bs->file->bs, offsetof(QCowHeader, l1_table_offset),
                           &l1_ofs_rt_ofs_cls, sizeof(l1_ofs_rt_ofs_cls));
    if (ret < 0) {
        goto fail_broken_refcounts;
@@ -2609,7 +2611,7 @@ static int make_completely_empty(BlockDriverState *bs)

    /* Enter the first refblock into the reftable */
    rt_entry = cpu_to_be64(2 * s->cluster_size);
-    ret = bdrv_pwrite_sync(bs->file, s->cluster_size,
+    ret = bdrv_pwrite_sync(bs->file->bs, s->cluster_size,
                           &rt_entry, sizeof(rt_entry));
    if (ret < 0) {
        goto fail_broken_refcounts;
@@ -2634,7 +2636,7 @@ static int make_completely_empty(BlockDriverState *bs)
        goto fail;
    }

-    ret = bdrv_truncate(bs->file, (3 + l1_clusters) * s->cluster_size);
+    ret = bdrv_truncate(bs->file->bs, (3 + l1_clusters) * s->cluster_size);
    if (ret < 0) {
        goto fail;
    }
@@ -2769,7 +2771,7 @@ static void dump_refcounts(BlockDriverState *bs)
    int64_t nb_clusters, k, k1, size;
    int refcount;

-    size = bdrv_getlength(bs->file);
+    size = bdrv_getlength(bs->file->bs);
    nb_clusters = size_to_clusters(s, size);
    for(k = 0; k < nb_clusters;) {
        k1 = k;
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -295,8 +295,6 @@ typedef struct BDRVQcow2State {
    char *image_backing_format;
 } BDRVQcow2State;

-struct QCowAIOCB;
-
 typedef struct Qcow2COWRegion {
    /**
     * Offset of the COW region in bytes from the start of the first cluster
--- a/block/qed-table.c
+++ b/block/qed-table.c
@@ -63,7 +63,7 @@ static void qed_read_table(BDRVQEDState *s, uint64_t offset, QEDTable *table,
    read_table_cb->iov.iov_len = s->header.cluster_size * s->header.table_size,

    qemu_iovec_init_external(qiov, &read_table_cb->iov, 1);
-    bdrv_aio_readv(s->bs->file, offset / BDRV_SECTOR_SIZE, qiov,
+    bdrv_aio_readv(s->bs->file->bs, offset / BDRV_SECTOR_SIZE, qiov,
                   qiov->size / BDRV_SECTOR_SIZE,
                   qed_read_table_cb, read_table_cb);
 }
@@ -152,7 +152,7 @@ static void qed_write_table(BDRVQEDState *s, uint64_t offset, QEDTable *table,
    /* Adjust for offset into table */
    offset += start * sizeof(uint64_t);

-    bdrv_aio_writev(s->bs->file, offset / BDRV_SECTOR_SIZE,
+    bdrv_aio_writev(s->bs->file->bs, offset / BDRV_SECTOR_SIZE,
                    &write_table_cb->qiov,
                    write_table_cb->qiov.size / BDRV_SECTOR_SIZE,
                    qed_write_table_cb, write_table_cb);
--- a/block/qed.c
+++ b/block/qed.c
@@ -82,7 +82,7 @@ int qed_write_header_sync(BDRVQEDState *s)
    int ret;

    qed_header_cpu_to_le(&s->header, &le);
-    ret = bdrv_pwrite(s->bs->file, 0, &le, sizeof(le));
+    ret = bdrv_pwrite(s->bs->file->bs, 0, &le, sizeof(le));
    if (ret != sizeof(le)) {
        return ret;
    }
@@ -119,7 +119,7 @@ static void qed_write_header_read_cb(void *opaque, int ret)
    /* Update header */
    qed_header_cpu_to_le(&s->header, (QEDHeader *)write_header_cb->buf);

-    bdrv_aio_writev(s->bs->file, 0, &write_header_cb->qiov,
+    bdrv_aio_writev(s->bs->file->bs, 0, &write_header_cb->qiov,
                    write_header_cb->nsectors, qed_write_header_cb,
                    write_header_cb);
 }
@@ -152,7 +152,7 @@ static void qed_write_header(BDRVQEDState *s, BlockCompletionFunc cb,
    write_header_cb->iov.iov_len = len;
    qemu_iovec_init_external(&write_header_cb->qiov, &write_header_cb->iov, 1);

-    bdrv_aio_readv(s->bs->file, 0, &write_header_cb->qiov, nsectors,
+    bdrv_aio_readv(s->bs->file->bs, 0, &write_header_cb->qiov, nsectors,
                   qed_write_header_read_cb, write_header_cb);
 }

@@ -354,12 +354,6 @@ static void qed_cancel_need_check_timer(BDRVQEDState *s)
    timer_del(s->need_check_timer);
 }

-static void bdrv_qed_rebind(BlockDriverState *bs)
-{
-    BDRVQEDState *s = bs->opaque;
-    s->bs = bs;
-}
-
 static void bdrv_qed_detach_aio_context(BlockDriverState *bs)
 {
    BDRVQEDState *s = bs->opaque;
@@ -392,7 +386,7 @@ static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
    s->bs = bs;
    QSIMPLEQ_INIT(&s->allocating_write_reqs);

-    ret = bdrv_pread(bs->file, 0, &le_header, sizeof(le_header));
+    ret = bdrv_pread(bs->file->bs, 0, &le_header, sizeof(le_header));
    if (ret < 0) {
        return ret;
    }
@@ -416,7 +410,7 @@ static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
    }

    /* Round down file size to the last cluster */
-    file_size = bdrv_getlength(bs->file);
+    file_size = bdrv_getlength(bs->file->bs);
    if (file_size < 0) {
        return file_size;
    }
@@ -452,7 +446,7 @@ static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
            return -EINVAL;
        }

-        ret = qed_read_string(bs->file, s->header.backing_filename_offset,
+        ret = qed_read_string(bs->file->bs, s->header.backing_filename_offset,
                              s->header.backing_filename_size, bs->backing_file,
                              sizeof(bs->backing_file));
        if (ret < 0) {
@@ -471,7 +465,7 @@ static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
     * feature is no longer valid.
     */
    if ((s->header.autoclear_features & ~QED_AUTOCLEAR_FEATURE_MASK) != 0 &&
-        !bdrv_is_read_only(bs->file) && !(flags & BDRV_O_INCOMING)) {
+        !bdrv_is_read_only(bs->file->bs) && !(flags & BDRV_O_INCOMING)) {
        s->header.autoclear_features &= QED_AUTOCLEAR_FEATURE_MASK;

        ret = qed_write_header_sync(s);
@@ -480,7 +474,7 @@ static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
        }

        /* From here on only known autoclear feature bits are valid */
-        bdrv_flush(bs->file);
+        bdrv_flush(bs->file->bs);
    }

    s->l1_table = qed_alloc_table(s);
@@ -498,7 +492,7 @@ static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
         * potentially inconsistent images to be opened read-only.  This can
         * aid data recovery from an otherwise inconsistent image.
         */
-        if (!bdrv_is_read_only(bs->file) &&
+        if (!bdrv_is_read_only(bs->file->bs) &&
            !(flags & BDRV_O_INCOMING)) {
            BdrvCheckResult result = {0};

@@ -541,7 +535,7 @@ static void bdrv_qed_close(BlockDriverState *bs)
    bdrv_qed_detach_aio_context(bs);

    /* Ensure writes reach stable storage */
-    bdrv_flush(bs->file);
+    bdrv_flush(bs->file->bs);

    /* Clean shutdown, no check required on next open */
    if (s->header.features & QED_F_NEED_CHECK) {
@@ -772,8 +766,8 @@ static void qed_read_backing_file(BDRVQEDState *s, uint64_t pos,
    /* If there is a backing file, get its length.  Treat the absence of a
     * backing file like a zero length backing file.
     */
-    if (s->bs->backing_hd) {
-        int64_t l = bdrv_getlength(s->bs->backing_hd);
+    if (s->bs->backing) {
+        int64_t l = bdrv_getlength(s->bs->backing->bs);
        if (l < 0) {
            cb(opaque, l);
            return;
@@ -802,7 +796,7 @@ static void qed_read_backing_file(BDRVQEDState *s, uint64_t pos,
    qemu_iovec_concat(*backing_qiov, qiov, 0, size);

    BLKDBG_EVENT(s->bs->file, BLKDBG_READ_BACKING_AIO);
-    bdrv_aio_readv(s->bs->backing_hd, pos / BDRV_SECTOR_SIZE,
+    bdrv_aio_readv(s->bs->backing->bs, pos / BDRV_SECTOR_SIZE,
                   *backing_qiov, size / BDRV_SECTOR_SIZE, cb, opaque);
 }

@@ -839,7 +833,7 @@ static void qed_copy_from_backing_file_write(void *opaque, int ret)
    }

    BLKDBG_EVENT(s->bs->file, BLKDBG_COW_WRITE);
-    bdrv_aio_writev(s->bs->file, copy_cb->offset / BDRV_SECTOR_SIZE,
+    bdrv_aio_writev(s->bs->file->bs, copy_cb->offset / BDRV_SECTOR_SIZE,
                    &copy_cb->qiov, copy_cb->qiov.size / BDRV_SECTOR_SIZE,
                    qed_copy_from_backing_file_cb, copy_cb);
 }
@@ -1055,7 +1049,7 @@ static void qed_aio_write_flush_before_l2_update(void *opaque, int ret)
    QEDAIOCB *acb = opaque;
    BDRVQEDState *s = acb_to_s(acb);

-    if (!bdrv_aio_flush(s->bs->file, qed_aio_write_l2_update_cb, opaque)) {
+    if (!bdrv_aio_flush(s->bs->file->bs, qed_aio_write_l2_update_cb, opaque)) {
        qed_aio_complete(acb, -EIO);
    }
 }
@@ -1081,7 +1075,7 @@ static void qed_aio_write_main(void *opaque, int ret)
    if (acb->find_cluster_ret == QED_CLUSTER_FOUND) {
        next_fn = qed_aio_next_io;
    } else {
-        if (s->bs->backing_hd) {
+        if (s->bs->backing) {
            next_fn = qed_aio_write_flush_before_l2_update;
        } else {
            next_fn = qed_aio_write_l2_update_cb;
@@ -1089,7 +1083,7 @@ static void qed_aio_write_main(void *opaque, int ret)
    }

    BLKDBG_EVENT(s->bs->file, BLKDBG_WRITE_AIO);
-    bdrv_aio_writev(s->bs->file, offset / BDRV_SECTOR_SIZE,
+    bdrv_aio_writev(s->bs->file->bs, offset / BDRV_SECTOR_SIZE,
                    &acb->cur_qiov, acb->cur_qiov.size / BDRV_SECTOR_SIZE,
                    next_fn, acb);
 }
@@ -1139,7 +1133,7 @@ static void qed_aio_write_prefill(void *opaque, int ret)
 static bool qed_should_set_need_check(BDRVQEDState *s)
 {
    /* The flush before L2 update path ensures consistency */
-    if (s->bs->backing_hd) {
+    if (s->bs->backing) {
        return false;
    }

@@ -1321,7 +1315,7 @@ static void qed_aio_read_data(void *opaque, int ret,
    }

    BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
-    bdrv_aio_readv(bs->file, offset / BDRV_SECTOR_SIZE,
+    bdrv_aio_readv(bs->file->bs, offset / BDRV_SECTOR_SIZE,
                   &acb->cur_qiov, acb->cur_qiov.size / BDRV_SECTOR_SIZE,
                   qed_aio_next_io, acb);
    return;
@@ -1443,7 +1437,7 @@ static int coroutine_fn bdrv_qed_co_write_zeroes(BlockDriverState *bs,
    struct iovec iov;

    /* Refuse if there are untouched backing file sectors */
-    if (bs->backing_hd) {
+    if (bs->backing) {
        if (qed_offset_into_cluster(s, sector_num * BDRV_SECTOR_SIZE) != 0) {
            return -ENOTSUP;
        }
@@ -1580,7 +1574,7 @@ static int bdrv_qed_change_backing_file(BlockDriverState *bs,
    }

    /* Write new header */
-    ret = bdrv_pwrite_sync(bs->file, 0, buffer, buffer_len);
+    ret = bdrv_pwrite_sync(bs->file->bs, 0, buffer, buffer_len);
    g_free(buffer);
    if (ret == 0) {
        memcpy(&s->header, &new_header, sizeof(new_header));
@@ -1596,7 +1590,7 @@ static void bdrv_qed_invalidate_cache(BlockDriverState *bs, Error **errp)

    bdrv_qed_close(bs);

-    bdrv_invalidate_cache(bs->file, &local_err);
+    bdrv_invalidate_cache(bs->file->bs, &local_err);
    if (local_err) {
        error_propagate(errp, local_err);
        return;
@@ -1664,7 +1658,6 @@ static BlockDriver bdrv_qed = {
    .supports_backing         = true,

    .bdrv_probe               = bdrv_qed_probe,
-    .bdrv_rebind              = bdrv_qed_rebind,
    .bdrv_open                = bdrv_qed_open,
    .bdrv_close               = bdrv_qed_close,
    .bdrv_reopen_prepare      = bdrv_qed_reopen_prepare,
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -64,7 +64,7 @@ typedef struct QuorumVotes {

 /* the following structure holds the state of one quorum instance */
 typedef struct BDRVQuorumState {
-    BlockDriverState **bs; /* children BlockDriverStates */
+    BdrvChild **children;  /* children BlockDriverStates */
    int num_children;      /* children count */
    int threshold;         /* if less than threshold children reads gave the
                            * same result a quorum error occurs.
@@ -336,7 +336,7 @@ static void quorum_report_bad_versions(BDRVQuorumState *s,
            continue;
        }
        QLIST_FOREACH(item, &version->items, next) {
-            quorum_report_bad(acb, s->bs[item->index]->node_name, 0);
+            quorum_report_bad(acb, s->children[item->index]->bs->node_name, 0);
        }
    }
 }
@@ -369,8 +369,9 @@ static bool quorum_rewrite_bad_versions(BDRVQuorumState *s, QuorumAIOCB *acb,
            continue;
        }
        QLIST_FOREACH(item, &version->items, next) {
-            bdrv_aio_writev(s->bs[item->index], acb->sector_num, acb->qiov,
-                            acb->nb_sectors, quorum_rewrite_aio_cb, acb);
+            bdrv_aio_writev(s->children[item->index]->bs, acb->sector_num,
+                            acb->qiov, acb->nb_sectors, quorum_rewrite_aio_cb,
+                            acb);
        }
    }

@@ -639,13 +640,13 @@ static BlockAIOCB *read_quorum_children(QuorumAIOCB *acb)
    int i;

    for (i = 0; i < s->num_children; i++) {
-        acb->qcrs[i].buf = qemu_blockalign(s->bs[i], acb->qiov->size);
+        acb->qcrs[i].buf = qemu_blockalign(s->children[i]->bs, acb->qiov->size);
        qemu_iovec_init(&acb->qcrs[i].qiov, acb->qiov->niov);
        qemu_iovec_clone(&acb->qcrs[i].qiov, acb->qiov, acb->qcrs[i].buf);
    }

    for (i = 0; i < s->num_children; i++) {
-        bdrv_aio_readv(s->bs[i], acb->sector_num, &acb->qcrs[i].qiov,
+        bdrv_aio_readv(s->children[i]->bs, acb->sector_num, &acb->qcrs[i].qiov,
                       acb->nb_sectors, quorum_aio_cb, &acb->qcrs[i]);
    }

@@ -656,12 +657,12 @@ static BlockAIOCB *read_fifo_child(QuorumAIOCB *acb)
 {
    BDRVQuorumState *s = acb->common.bs->opaque;

-    acb->qcrs[acb->child_iter].buf = qemu_blockalign(s->bs[acb->child_iter],
-                                                     acb->qiov->size);
+    acb->qcrs[acb->child_iter].buf =
+        qemu_blockalign(s->children[acb->child_iter]->bs, acb->qiov->size);
    qemu_iovec_init(&acb->qcrs[acb->child_iter].qiov, acb->qiov->niov);
    qemu_iovec_clone(&acb->qcrs[acb->child_iter].qiov, acb->qiov,
                     acb->qcrs[acb->child_iter].buf);
-    bdrv_aio_readv(s->bs[acb->child_iter], acb->sector_num,
+    bdrv_aio_readv(s->children[acb->child_iter]->bs, acb->sector_num,
                   &acb->qcrs[acb->child_iter].qiov, acb->nb_sectors,
                   quorum_aio_cb, &acb->qcrs[acb->child_iter]);

@@ -702,8 +703,8 @@ static BlockAIOCB *quorum_aio_writev(BlockDriverState *bs,
    int i;

    for (i = 0; i < s->num_children; i++) {
-        acb->qcrs[i].aiocb = bdrv_aio_writev(s->bs[i], sector_num, qiov,
-                                             nb_sectors, &quorum_aio_cb,
+        acb->qcrs[i].aiocb = bdrv_aio_writev(s->children[i]->bs, sector_num,
+                                             qiov, nb_sectors, &quorum_aio_cb,
                                             &acb->qcrs[i]);
    }

@@ -717,12 +718,12 @@ static int64_t quorum_getlength(BlockDriverState *bs)
    int i;

    /* check that all file have the same length */
-    result = bdrv_getlength(s->bs[0]);
+    result = bdrv_getlength(s->children[0]->bs);
    if (result < 0) {
        return result;
    }
    for (i = 1; i < s->num_children; i++) {
-        int64_t value = bdrv_getlength(s->bs[i]);
+        int64_t value = bdrv_getlength(s->children[i]->bs);
        if (value < 0) {
            return value;
        }
@@ -741,7 +742,7 @@ static void quorum_invalidate_cache(BlockDriverState *bs, Error **errp)
    int i;

    for (i = 0; i < s->num_children; i++) {
-        bdrv_invalidate_cache(s->bs[i], &local_err);
+        bdrv_invalidate_cache(s->children[i]->bs, &local_err);
        if (local_err) {
            error_propagate(errp, local_err);
            return;
@@ -762,7 +763,7 @@ static coroutine_fn int quorum_co_flush(BlockDriverState *bs)
    error_votes.compare = quorum_64bits_compare;

    for (i = 0; i < s->num_children; i++) {
-        result = bdrv_co_flush(s->bs[i]);
+        result = bdrv_co_flush(s->children[i]->bs);
        result_value.l = result;
        quorum_count_vote(&error_votes, &result_value, i);
    }
@@ -782,7 +783,7 @@ static bool quorum_recurse_is_first_non_filter(BlockDriverState *bs,
    int i;

    for (i = 0; i < s->num_children; i++) {
-        bool perm = bdrv_recurse_is_first_non_filter(s->bs[i],
+        bool perm = bdrv_recurse_is_first_non_filter(s->children[i]->bs,
                                                     candidate);
        if (perm) {
            return true;
@@ -922,8 +923,8 @@ static int quorum_open(BlockDriverState *bs, QDict *options, int flags,
        }
    }

-    /* allocate the children BlockDriverState array */
-    s->bs = g_new0(BlockDriverState *, s->num_children);
+    /* allocate the children array */
+    s->children = g_new0(BdrvChild *, s->num_children);
    opened = g_new0(bool, s->num_children);

    for (i = 0; i < s->num_children; i++) {
@@ -931,9 +932,10 @@ static int quorum_open(BlockDriverState *bs, QDict *options, int flags,
        ret = snprintf(indexstr, 32, "children.%d", i);
        assert(ret < 32);

-        ret = bdrv_open_image(&s->bs[i], NULL, options, indexstr, bs,
-                              &child_format, false, &local_err);
-        if (ret < 0) {
+        s->children[i] = bdrv_open_child(NULL, options, indexstr, bs,
+                                         &child_format, false, &local_err);
+        if (local_err) {
+            ret = -EINVAL;
            goto close_exit;
        }

@@ -949,9 +951,9 @@ close_exit:
        if (!opened[i]) {
            continue;
        }
-        bdrv_unref(s->bs[i]);
+        bdrv_unref_child(bs, s->children[i]);
    }
-    g_free(s->bs);
+    g_free(s->children);
    g_free(opened);
 exit:
    qemu_opts_del(opts);
@@ -968,10 +970,10 @@ static void quorum_close(BlockDriverState *bs)
    int i;

    for (i = 0; i < s->num_children; i++) {
-        bdrv_unref(s->bs[i]);
+        bdrv_unref_child(bs, s->children[i]);
    }

-    g_free(s->bs);
+    g_free(s->children);
 }

 static void quorum_detach_aio_context(BlockDriverState *bs)
@@ -980,7 +982,7 @@ static void quorum_detach_aio_context(BlockDriverState *bs)
    int i;

    for (i = 0; i < s->num_children; i++) {
-        bdrv_detach_aio_context(s->bs[i]);
+        bdrv_detach_aio_context(s->children[i]->bs);
    }
 }

@@ -991,7 +993,7 @@ static void quorum_attach_aio_context(BlockDriverState *bs,
    int i;

    for (i = 0; i < s->num_children; i++) {
-        bdrv_attach_aio_context(s->bs[i], new_context);
+        bdrv_attach_aio_context(s->children[i]->bs, new_context);
    }
 }

@@ -1003,16 +1005,17 @@ static void quorum_refresh_filename(BlockDriverState *bs)
    int i;

    for (i = 0; i < s->num_children; i++) {
-        bdrv_refresh_filename(s->bs[i]);
-        if (!s->bs[i]->full_open_options) {
+        bdrv_refresh_filename(s->children[i]->bs);
+        if (!s->children[i]->bs->full_open_options) {
            return;
        }
    }

    children = qlist_new();
    for (i = 0; i < s->num_children; i++) {
-        QINCREF(s->bs[i]->full_open_options);
-        qlist_append_obj(children, QOBJECT(s->bs[i]->full_open_options));
+        QINCREF(s->children[i]->bs->full_open_options);
+        qlist_append_obj(children,
+                         QOBJECT(s->children[i]->bs->full_open_options));
    }

    opts = qdict_new();
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -519,7 +519,16 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
                     "future QEMU versions.\n",
                     bs->filename);
    }
-#endif
+#else
+    if (bdrv_flags & BDRV_O_NATIVE_AIO) {
+        error_printf("WARNING: aio=native was specified for '%s', but "
+                     "is not supported in this build. Falling back to "
+                     "aio=threads.\n"
+                     "         This will become an error condition in "
+                     "future QEMU versions.\n",
+                     bs->filename);
+    }
+#endif /* !defined(CONFIG_LINUX_AIO) */

    s->has_discard = true;
    s->has_write_zeroes = true;
@@ -1259,7 +1268,7 @@ static int aio_worker(void *arg)
        break;
    }

-    g_slice_free(RawPosixAIOData, aiocb);
+    g_free(aiocb);
    return ret;
 }

@@ -1267,7 +1276,7 @@ static int paio_submit_co(BlockDriverState *bs, int fd,
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
        int type)
 {
-    RawPosixAIOData *acb = g_slice_new(RawPosixAIOData);
+    RawPosixAIOData *acb = g_new(RawPosixAIOData, 1);
    ThreadPool *pool;

    acb->bs = bs;
@@ -1292,7 +1301,7 @@ static BlockAIOCB *paio_submit(BlockDriverState *bs, int fd,
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
        BlockCompletionFunc *cb, void *opaque, int type)
 {
-    RawPosixAIOData *acb = g_slice_new(RawPosixAIOData);
+    RawPosixAIOData *acb = g_new(RawPosixAIOData, 1);
    ThreadPool *pool;

    acb->bs = bs;
@@ -1648,7 +1657,7 @@ static int raw_create(const char *filename, QemuOpts *opts, Error **errp)
        goto out;
    }

-    fd = qemu_open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY,
+    fd = qemu_open(filename, O_RDWR | O_CREAT | O_TRUNC | O_BINARY,
                   0644);
    if (fd < 0) {
        result = -errno;
@@ -2237,7 +2246,7 @@ static BlockAIOCB *hdev_aio_ioctl(BlockDriverState *bs,
    if (fd_open(bs) < 0)
        return NULL;

-    acb = g_slice_new(RawPosixAIOData);
+    acb = g_new(RawPosixAIOData, 1);
    acb->bs = bs;
    acb->aio_type = QEMU_AIO_IOCTL;
    acb->aio_fildes = s->fd;
--- a/block/raw-win32.c
+++ b/block/raw-win32.c
@@ -119,9 +119,9 @@ static int aio_worker(void *arg)
    case QEMU_AIO_WRITE:
        count = handle_aiocb_rw(aiocb);
        if (count == aiocb->aio_nbytes) {
-            count = 0;
+            ret = 0;
        } else {
-            count = -EINVAL;
+            ret = -EINVAL;
        }
        break;
    case QEMU_AIO_FLUSH:
@@ -135,7 +135,7 @@ static int aio_worker(void *arg)
        break;
    }

-    g_slice_free(RawWin32AIOData, aiocb);
+    g_free(aiocb);
    return ret;
 }

@@ -143,7 +143,7 @@ static BlockAIOCB *paio_submit(BlockDriverState *bs, HANDLE hfile,
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
        BlockCompletionFunc *cb, void *opaque, int type)
 {
-    RawWin32AIOData *acb = g_slice_new(RawWin32AIOData);
+    RawWin32AIOData *acb = g_new(RawWin32AIOData, 1);
    ThreadPool *pool;

    acb->bs = bs;
--- a/block/raw_bsd.c
+++ b/block/raw_bsd.c
@@ -52,7 +52,7 @@ static int coroutine_fn raw_co_readv(BlockDriverState *bs, int64_t sector_num,
                                     int nb_sectors, QEMUIOVector *qiov)
 {
    BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
-    return bdrv_co_readv(bs->file, sector_num, nb_sectors, qiov);
+    return bdrv_co_readv(bs->file->bs, sector_num, nb_sectors, qiov);
 }

 static int coroutine_fn raw_co_writev(BlockDriverState *bs, int64_t sector_num,
@@ -75,7 +75,7 @@ static int coroutine_fn raw_co_writev(BlockDriverState *bs, int64_t sector_num,
            return 0;
        }

-        buf = qemu_try_blockalign(bs->file, 512);
+        buf = qemu_try_blockalign(bs->file->bs, 512);
        if (!buf) {
            ret = -ENOMEM;
            goto fail;
@@ -102,7 +102,7 @@ static int coroutine_fn raw_co_writev(BlockDriverState *bs, int64_t sector_num,
    }

    BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
-    ret = bdrv_co_writev(bs->file, sector_num, nb_sectors, qiov);
+    ret = bdrv_co_writev(bs->file->bs, sector_num, nb_sectors, qiov);

 fail:
    if (qiov == &local_qiov) {
@@ -125,58 +125,58 @@ static int coroutine_fn raw_co_write_zeroes(BlockDriverState *bs,
                                            int64_t sector_num, int nb_sectors,
                                            BdrvRequestFlags flags)
 {
-    return bdrv_co_write_zeroes(bs->file, sector_num, nb_sectors, flags);
+    return bdrv_co_write_zeroes(bs->file->bs, sector_num, nb_sectors, flags);
 }

 static int coroutine_fn raw_co_discard(BlockDriverState *bs,
                                       int64_t sector_num, int nb_sectors)
 {
-    return bdrv_co_discard(bs->file, sector_num, nb_sectors);
+    return bdrv_co_discard(bs->file->bs, sector_num, nb_sectors);
 }

 static int64_t raw_getlength(BlockDriverState *bs)
 {
-    return bdrv_getlength(bs->file);
+    return bdrv_getlength(bs->file->bs);
 }

 static int raw_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
 {
-    return bdrv_get_info(bs->file, bdi);
+    return bdrv_get_info(bs->file->bs, bdi);
 }

 static void raw_refresh_limits(BlockDriverState *bs, Error **errp)
 {
-    bs->bl = bs->file->bl;
+    bs->bl = bs->file->bs->bl;
 }

 static int raw_truncate(BlockDriverState *bs, int64_t offset)
 {
-    return bdrv_truncate(bs->file, offset);
+    return bdrv_truncate(bs->file->bs, offset);
 }

 static int raw_is_inserted(BlockDriverState *bs)
 {
-    return bdrv_is_inserted(bs->file);
+    return bdrv_is_inserted(bs->file->bs);
 }

 static int raw_media_changed(BlockDriverState *bs)
 {
-    return bdrv_media_changed(bs->file);
+    return bdrv_media_changed(bs->file->bs);
 }

 static void raw_eject(BlockDriverState *bs, bool eject_flag)
 {
-    bdrv_eject(bs->file, eject_flag);
+    bdrv_eject(bs->file->bs, eject_flag);
 }

 static void raw_lock_medium(BlockDriverState *bs, bool locked)
 {
-    bdrv_lock_medium(bs->file, locked);
+    bdrv_lock_medium(bs->file->bs, locked);
 }

 static int raw_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
 {
-    return bdrv_ioctl(bs->file, req, buf);
+    return bdrv_ioctl(bs->file->bs, req, buf);
 }

 static BlockAIOCB *raw_aio_ioctl(BlockDriverState *bs,
@@ -184,12 +184,12 @@ static BlockAIOCB *raw_aio_ioctl(BlockDriverState *bs,
                                 BlockCompletionFunc *cb,
                                 void *opaque)
 {
-    return bdrv_aio_ioctl(bs->file, req, buf, cb, opaque);
+    return bdrv_aio_ioctl(bs->file->bs, req, buf, cb, opaque);
 }

 static int raw_has_zero_init(BlockDriverState *bs)
 {
-    return bdrv_has_zero_init(bs->file);
+    return bdrv_has_zero_init(bs->file->bs);
 }

 static int raw_create(const char *filename, QemuOpts *opts, Error **errp)
@@ -207,7 +207,7 @@ static int raw_create(const char *filename, QemuOpts *opts, Error **errp)
 static int raw_open(BlockDriverState *bs, QDict *options, int flags,
                    Error **errp)
 {
-    bs->sg = bs->file->sg;
+    bs->sg = bs->file->bs->sg;

    if (bs->probed && !bdrv_is_read_only(bs)) {
        fprintf(stderr,
@@ -217,7 +217,7 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,
                "raw images, write operations on block 0 will be restricted.\n"
                "         Specify the 'raw' format explicitly to remove the "
                "restrictions.\n",
-                bs->file->filename);
+                bs->file->bs->filename);
    }

    return 0;
@@ -237,12 +237,12 @@ static int raw_probe(const uint8_t *buf, int buf_size, const char *filename)

 static int raw_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
 {
-    return bdrv_probe_blocksizes(bs->file, bsz);
+    return bdrv_probe_blocksizes(bs->file->bs, bsz);
 }

 static int raw_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
 {
-    return bdrv_probe_geometry(bs->file, geo);
+    return bdrv_probe_geometry(bs->file->bs, geo);
 }

 BlockDriver bdrv_raw = {
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -28,7 +28,6 @@
 #define SD_OP_READ_OBJ       0x02
 #define SD_OP_WRITE_OBJ      0x03
 /* 0x04 is used internally by Sheepdog */
-#define SD_OP_DISCARD_OBJ    0x05

 #define SD_OP_NEW_VDI        0x11
 #define SD_OP_LOCK_VDI       0x12
@@ -318,7 +317,7 @@ enum AIOCBState {
    AIOCB_DISCARD_OBJ,
 };

-#define AIOCBOverwrapping(x, y)                                 \
+#define AIOCBOverlapping(x, y)                                 \
    (!(x->max_affect_data_idx < y->min_affect_data_idx          \
       || y->max_affect_data_idx < x->min_affect_data_idx))

@@ -342,6 +341,15 @@ struct SheepdogAIOCB {
    uint32_t min_affect_data_idx;
    uint32_t max_affect_data_idx;

+    /*
+     * The difference between affect_data_idx and dirty_data_idx:
+     * affect_data_idx represents range of index of all request types.
+     * dirty_data_idx represents range of index updated by COW requests.
+     * dirty_data_idx is used for updating an inode object.
+     */
+    uint32_t min_dirty_data_idx;
+    uint32_t max_dirty_data_idx;
+
    QLIST_ENTRY(SheepdogAIOCB) aiocb_siblings;
 };

@@ -351,9 +359,6 @@ typedef struct BDRVSheepdogState {

    SheepdogInode inode;

-    uint32_t min_dirty_data_idx;
-    uint32_t max_dirty_data_idx;
-
    char name[SD_MAX_VDI_LEN];
    bool is_snapshot;
    uint32_t cache_flags;
@@ -373,10 +378,15 @@ typedef struct BDRVSheepdogState {
    QLIST_HEAD(inflight_aio_head, AIOReq) inflight_aio_head;
    QLIST_HEAD(failed_aio_head, AIOReq) failed_aio_head;

-    CoQueue overwrapping_queue;
+    CoQueue overlapping_queue;
    QLIST_HEAD(inflight_aiocb_head, SheepdogAIOCB) inflight_aiocb_head;
 } BDRVSheepdogState;

+typedef struct BDRVSheepdogReopenState {
+    int fd;
+    int cache_flags;
+} BDRVSheepdogReopenState;
+
 static const char * sd_strerror(int err)
 {
    int i;
@@ -556,6 +566,9 @@ static SheepdogAIOCB *sd_aio_setup(BlockDriverState *bs, QEMUIOVector *qiov,
    acb->max_affect_data_idx = (acb->sector_num * BDRV_SECTOR_SIZE +
                              acb->nb_sectors * BDRV_SECTOR_SIZE) / object_size;

+    acb->min_dirty_data_idx = UINT32_MAX;
+    acb->max_dirty_data_idx = 0;
+
    return acb;
 }

@@ -819,8 +832,8 @@ static void coroutine_fn aio_read_response(void *opaque)
             */
            if (rsp.result == SD_RES_SUCCESS) {
                s->inode.data_vdi_id[idx] = s->inode.vdi_id;
-                s->max_dirty_data_idx = MAX(idx, s->max_dirty_data_idx);
-                s->min_dirty_data_idx = MIN(idx, s->min_dirty_data_idx);
+                acb->max_dirty_data_idx = MAX(idx, acb->max_dirty_data_idx);
+                acb->min_dirty_data_idx = MIN(idx, acb->min_dirty_data_idx);
            }
        }
        break;
@@ -847,10 +860,6 @@ static void coroutine_fn aio_read_response(void *opaque)
            rsp.result = SD_RES_SUCCESS;
            s->discard_supported = false;
            break;
-        case SD_RES_SUCCESS:
-            idx = data_oid_to_idx(aio_req->oid);
-            s->inode.data_vdi_id[idx] = 0;
-            break;
        default:
            break;
        }
@@ -1165,7 +1174,13 @@ static void coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
        hdr.flags = SD_FLAG_CMD_WRITE | flags;
        break;
    case AIOCB_DISCARD_OBJ:
-        hdr.opcode = SD_OP_DISCARD_OBJ;
+        hdr.opcode = SD_OP_WRITE_OBJ;
+        hdr.flags = SD_FLAG_CMD_WRITE | flags;
+        s->inode.data_vdi_id[data_oid_to_idx(oid)] = 0;
+        offset = offsetof(SheepdogInode,
+                          data_vdi_id[data_oid_to_idx(oid)]);
+        oid = vid_to_vdi_oid(s->inode.vdi_id);
+        wlen = datalen = sizeof(uint32_t);
        break;
    }

@@ -1466,13 +1481,11 @@ static int sd_open(BlockDriverState *bs, QDict *options, int flags,
    }

    memcpy(&s->inode, buf, sizeof(s->inode));
-    s->min_dirty_data_idx = UINT32_MAX;
-    s->max_dirty_data_idx = 0;

    bs->total_sectors = s->inode.vdi_size / BDRV_SECTOR_SIZE;
    pstrcpy(s->name, sizeof(s->name), vdi);
    qemu_co_mutex_init(&s->lock);
-    qemu_co_queue_init(&s->overwrapping_queue);
+    qemu_co_queue_init(&s->overlapping_queue);
    qemu_opts_del(opts);
    g_free(buf);
    return 0;
@@ -1486,6 +1499,68 @@ out:
    return ret;
 }

+static int sd_reopen_prepare(BDRVReopenState *state, BlockReopenQueue *queue,
+                             Error **errp)
+{
+    BDRVSheepdogState *s = state->bs->opaque;
+    BDRVSheepdogReopenState *re_s;
+    int ret = 0;
+
+    re_s = state->opaque = g_new0(BDRVSheepdogReopenState, 1);
+
+    re_s->cache_flags = SD_FLAG_CMD_CACHE;
+    if (state->flags & BDRV_O_NOCACHE) {
+        re_s->cache_flags = SD_FLAG_CMD_DIRECT;
+    }
+
+    re_s->fd = get_sheep_fd(s, errp);
+    if (re_s->fd < 0) {
+        ret = re_s->fd;
+        return ret;
+    }
+
+    return ret;
+}
+
+static void sd_reopen_commit(BDRVReopenState *state)
+{
+    BDRVSheepdogReopenState *re_s = state->opaque;
+    BDRVSheepdogState *s = state->bs->opaque;
+
+    if (s->fd) {
+        aio_set_fd_handler(s->aio_context, s->fd, NULL, NULL, NULL);
+        closesocket(s->fd);
+    }
+
+    s->fd = re_s->fd;
+    s->cache_flags = re_s->cache_flags;
+
+    g_free(state->opaque);
+    state->opaque = NULL;
+
+    return;
+}
+
+static void sd_reopen_abort(BDRVReopenState *state)
+{
+    BDRVSheepdogReopenState *re_s = state->opaque;
+    BDRVSheepdogState *s = state->bs->opaque;
+
+    if (re_s == NULL) {
+        return;
+    }
+
+    if (re_s->fd) {
+        aio_set_fd_handler(s->aio_context, re_s->fd, NULL, NULL, NULL);
+        closesocket(re_s->fd);
+    }
+
+    g_free(state->opaque);
+    state->opaque = NULL;
+
+    return;
+}
+
 static int do_sd_create(BDRVSheepdogState *s, uint32_t *vdi_id, int snapshot,
                        Error **errp)
 {
@@ -1922,16 +1997,16 @@ static void coroutine_fn sd_write_done(SheepdogAIOCB *acb)
    AIOReq *aio_req;
    uint32_t offset, data_len, mn, mx;

-    mn = s->min_dirty_data_idx;
-    mx = s->max_dirty_data_idx;
+    mn = acb->min_dirty_data_idx;
+    mx = acb->max_dirty_data_idx;
    if (mn <= mx) {
        /* we need to update the vdi object. */
        offset = sizeof(s->inode) - sizeof(s->inode.data_vdi_id) +
            mn * sizeof(s->inode.data_vdi_id[0]);
        data_len = (mx - mn + 1) * sizeof(s->inode.data_vdi_id[0]);

-        s->min_dirty_data_idx = UINT32_MAX;
-        s->max_dirty_data_idx = 0;
+        acb->min_dirty_data_idx = UINT32_MAX;
+        acb->max_dirty_data_idx = 0;

        iov.iov_base = &s->inode;
        iov.iov_len = sizeof(s->inode);
@@ -2140,7 +2215,9 @@ static int coroutine_fn sd_co_rw_vector(void *p)
        }

        aio_req = alloc_aio_req(s, acb, oid, len, offset, flags, create,
-                                old_oid, done);
+                                old_oid,
+                                acb->aiocb_type == AIOCB_DISCARD_OBJ ?
+                                0 : done);
        QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings);

        add_aio_request(s, aio_req, acb->qiov->iov, acb->qiov->niov,
@@ -2157,12 +2234,12 @@ out:
    return 1;
 }

-static bool check_overwrapping_aiocb(BDRVSheepdogState *s, SheepdogAIOCB *aiocb)
+static bool check_overlapping_aiocb(BDRVSheepdogState *s, SheepdogAIOCB *aiocb)
 {
    SheepdogAIOCB *cb;

    QLIST_FOREACH(cb, &s->inflight_aiocb_head, aiocb_siblings) {
-        if (AIOCBOverwrapping(aiocb, cb)) {
+        if (AIOCBOverlapping(aiocb, cb)) {
            return true;
        }
    }
@@ -2191,15 +2268,15 @@ static coroutine_fn int sd_co_writev(BlockDriverState *bs, int64_t sector_num,
    acb->aiocb_type = AIOCB_WRITE_UDATA;

 retry:
-    if (check_overwrapping_aiocb(s, acb)) {
-        qemu_co_queue_wait(&s->overwrapping_queue);
+    if (check_overlapping_aiocb(s, acb)) {
+        qemu_co_queue_wait(&s->overlapping_queue);
        goto retry;
    }

    ret = sd_co_rw_vector(acb);
    if (ret <= 0) {
        QLIST_REMOVE(acb, aiocb_siblings);
-        qemu_co_queue_restart_all(&s->overwrapping_queue);
+        qemu_co_queue_restart_all(&s->overlapping_queue);
        qemu_aio_unref(acb);
        return ret;
    }
@@ -2207,7 +2284,7 @@ retry:
    qemu_coroutine_yield();

    QLIST_REMOVE(acb, aiocb_siblings);
-    qemu_co_queue_restart_all(&s->overwrapping_queue);
+    qemu_co_queue_restart_all(&s->overlapping_queue);

    return acb->ret;
 }
@@ -2224,15 +2301,15 @@ static coroutine_fn int sd_co_readv(BlockDriverState *bs, int64_t sector_num,
    acb->aio_done_func = sd_finish_aiocb;

 retry:
-    if (check_overwrapping_aiocb(s, acb)) {
-        qemu_co_queue_wait(&s->overwrapping_queue);
+    if (check_overlapping_aiocb(s, acb)) {
+        qemu_co_queue_wait(&s->overlapping_queue);
        goto retry;
    }

    ret = sd_co_rw_vector(acb);
    if (ret <= 0) {
        QLIST_REMOVE(acb, aiocb_siblings);
-        qemu_co_queue_restart_all(&s->overwrapping_queue);
+        qemu_co_queue_restart_all(&s->overlapping_queue);
        qemu_aio_unref(acb);
        return ret;
    }
@@ -2240,7 +2317,7 @@ retry:
    qemu_coroutine_yield();

    QLIST_REMOVE(acb, aiocb_siblings);
-    qemu_co_queue_restart_all(&s->overwrapping_queue);
+    qemu_co_queue_restart_all(&s->overlapping_queue);
    return acb->ret;
 }

@@ -2576,28 +2653,36 @@ static coroutine_fn int sd_co_discard(BlockDriverState *bs, int64_t sector_num,
                                      int nb_sectors)
 {
    SheepdogAIOCB *acb;
-    QEMUIOVector dummy;
    BDRVSheepdogState *s = bs->opaque;
    int ret;
+    QEMUIOVector discard_iov;
+    struct iovec iov;
+    uint32_t zero = 0;

    if (!s->discard_supported) {
            return 0;
    }

-    acb = sd_aio_setup(bs, &dummy, sector_num, nb_sectors);
+    memset(&discard_iov, 0, sizeof(discard_iov));
+    memset(&iov, 0, sizeof(iov));
+    iov.iov_base = &zero;
+    iov.iov_len = sizeof(zero);
+    discard_iov.iov = &iov;
+    discard_iov.niov = 1;
+    acb = sd_aio_setup(bs, &discard_iov, sector_num, nb_sectors);
    acb->aiocb_type = AIOCB_DISCARD_OBJ;
    acb->aio_done_func = sd_finish_aiocb;

 retry:
-    if (check_overwrapping_aiocb(s, acb)) {
-        qemu_co_queue_wait(&s->overwrapping_queue);
+    if (check_overlapping_aiocb(s, acb)) {
+        qemu_co_queue_wait(&s->overlapping_queue);
        goto retry;
    }

    ret = sd_co_rw_vector(acb);
    if (ret <= 0) {
        QLIST_REMOVE(acb, aiocb_siblings);
-        qemu_co_queue_restart_all(&s->overwrapping_queue);
+        qemu_co_queue_restart_all(&s->overlapping_queue);
        qemu_aio_unref(acb);
        return ret;
    }
@@ -2605,7 +2690,7 @@ retry:
    qemu_coroutine_yield();

    QLIST_REMOVE(acb, aiocb_siblings);
-    qemu_co_queue_restart_all(&s->overwrapping_queue);
+    qemu_co_queue_restart_all(&s->overlapping_queue);

    return acb->ret;
 }
@@ -2702,6 +2787,9 @@ static BlockDriver bdrv_sheepdog = {
    .instance_size  = sizeof(BDRVSheepdogState),
    .bdrv_needs_filename = true,
    .bdrv_file_open = sd_open,
+    .bdrv_reopen_prepare    = sd_reopen_prepare,
+    .bdrv_reopen_commit     = sd_reopen_commit,
+    .bdrv_reopen_abort      = sd_reopen_abort,
    .bdrv_close     = sd_close,
    .bdrv_create    = sd_create,
    .bdrv_has_zero_init = bdrv_has_zero_init_1,
@@ -2735,6 +2823,9 @@ static BlockDriver bdrv_sheepdog_tcp = {
    .instance_size  = sizeof(BDRVSheepdogState),
    .bdrv_needs_filename = true,
    .bdrv_file_open = sd_open,
+    .bdrv_reopen_prepare    = sd_reopen_prepare,
+    .bdrv_reopen_commit     = sd_reopen_commit,
+    .bdrv_reopen_abort      = sd_reopen_abort,
    .bdrv_close     = sd_close,
    .bdrv_create    = sd_create,
    .bdrv_has_zero_init = bdrv_has_zero_init_1,
@@ -2768,6 +2859,9 @@ static BlockDriver bdrv_sheepdog_unix = {
    .instance_size  = sizeof(BDRVSheepdogState),
    .bdrv_needs_filename = true,
    .bdrv_file_open = sd_open,
+    .bdrv_reopen_prepare    = sd_reopen_prepare,
+    .bdrv_reopen_commit     = sd_reopen_commit,
+    .bdrv_reopen_abort      = sd_reopen_abort,
    .bdrv_close     = sd_close,
    .bdrv_create    = sd_create,
    .bdrv_has_zero_init = bdrv_has_zero_init_1,
--- a/block/snapshot.c
+++ b/block/snapshot.c
@@ -149,7 +149,7 @@ int bdrv_can_snapshot(BlockDriverState *bs)

    if (!drv->bdrv_snapshot_create) {
        if (bs->file != NULL) {
-            return bdrv_can_snapshot(bs->file);
+            return bdrv_can_snapshot(bs->file->bs);
        }
        return 0;
    }
@@ -168,7 +168,7 @@ int bdrv_snapshot_create(BlockDriverState *bs,
        return drv->bdrv_snapshot_create(bs, sn_info);
    }
    if (bs->file) {
-        return bdrv_snapshot_create(bs->file, sn_info);
+        return bdrv_snapshot_create(bs->file->bs, sn_info);
    }
    return -ENOTSUP;
 }
@@ -188,10 +188,10 @@ int bdrv_snapshot_goto(BlockDriverState *bs,

    if (bs->file) {
        drv->bdrv_close(bs);
-        ret = bdrv_snapshot_goto(bs->file, snapshot_id);
+        ret = bdrv_snapshot_goto(bs->file->bs, snapshot_id);
        open_ret = drv->bdrv_open(bs, NULL, bs->open_flags, NULL);
        if (open_ret < 0) {
-            bdrv_unref(bs->file);
+            bdrv_unref(bs->file->bs);
            bs->drv = NULL;
            return open_ret;
        }
@@ -245,7 +245,7 @@ int bdrv_snapshot_delete(BlockDriverState *bs,
        return drv->bdrv_snapshot_delete(bs, snapshot_id, name, errp);
    }
    if (bs->file) {
-        return bdrv_snapshot_delete(bs->file, snapshot_id, name, errp);
+        return bdrv_snapshot_delete(bs->file->bs, snapshot_id, name, errp);
    }
    error_setg(errp, "Block format '%s' used by device '%s' "
               "does not support internal snapshot deletion",
@@ -283,7 +283,7 @@ int bdrv_snapshot_list(BlockDriverState *bs,
        return drv->bdrv_snapshot_list(bs, psn_info);
    }
    if (bs->file) {
-        return bdrv_snapshot_list(bs->file, psn_info);
+        return bdrv_snapshot_list(bs->file->bs, psn_info);
    }
    return -ENOTSUP;
 }
--- a/block/ssh.c
+++ b/block/ssh.c
@@ -193,7 +193,7 @@ sftp_error_report(BDRVSSHState *s, const char *fs, ...)
 static int parse_uri(const char *filename, QDict *options, Error **errp)
 {
    URI *uri = NULL;
-    QueryParams *qp = NULL;
+    QueryParams *qp;
    int i;

    uri = uri_parse(filename);
@@ -249,9 +249,6 @@ static int parse_uri(const char *filename, QDict *options, Error **errp)
    return 0;

 err:
-    if (qp) {
-      query_params_free(qp);
-    }
    if (uri) {
      uri_free(uri);
    }
--- a/block/stream.c
+++ b/block/stream.c
@@ -52,34 +52,6 @@ static int coroutine_fn stream_populate(BlockDriverState *bs,
    return bdrv_co_copy_on_readv(bs, sector_num, nb_sectors, &qiov);
 }

-static void close_unused_images(BlockDriverState *top, BlockDriverState *base,
-                                const char *base_id)
-{
-    BlockDriverState *intermediate;
-    intermediate = top->backing_hd;
-
-    /* Must assign before bdrv_delete() to prevent traversing dangling pointer
-     * while we delete backing image instances.
-     */
-    bdrv_set_backing_hd(top, base);
-
-    while (intermediate) {
-        BlockDriverState *unused;
-
-        /* reached base */
-        if (intermediate == base) {
-            break;
-        }
-
-        unused = intermediate;
-        intermediate = intermediate->backing_hd;
-        bdrv_set_backing_hd(unused, NULL);
-        bdrv_unref(unused);
-    }
-
-    bdrv_refresh_limits(top, NULL);
-}
-
 typedef struct {
    int ret;
    bool reached_end;
@@ -101,7 +73,7 @@ static void stream_complete(BlockJob *job, void *opaque)
            }
        }
        data->ret = bdrv_change_backing_file(job->bs, base_id, base_fmt);
-        close_unused_images(job->bs, base, base_id);
+        bdrv_set_backing_hd(job->bs, base);
    }

    g_free(s->backing_file_str);
@@ -121,7 +93,7 @@ static void coroutine_fn stream_run(void *opaque)
    int n = 0;
    void *buf;

-    if (!bs->backing_hd) {
+    if (!bs->backing) {
        block_job_completed(&s->common, 0);
        return;
    }
@@ -166,7 +138,7 @@ wait:
        } else if (ret >= 0) {
            /* Copy if allocated in the intermediate images.  Limit to the
             * known-unallocated area [sector_num, sector_num+n).  */
-            ret = bdrv_is_allocated_above(bs->backing_hd, base,
+            ret = bdrv_is_allocated_above(backing_bs(bs), base,
                                          sector_num, n, &n);

            /* Finish early if end of backing file has been reached */
--- a/block/vdi.c
+++ b/block/vdi.c
@@ -399,7 +399,7 @@ static int vdi_open(BlockDriverState *bs, QDict *options, int flags,

    logout("\n");

-    ret = bdrv_read(bs->file, 0, (uint8_t *)&header, 1);
+    ret = bdrv_read(bs->file->bs, 0, (uint8_t *)&header, 1);
    if (ret < 0) {
        goto fail;
    }
@@ -490,13 +490,14 @@ static int vdi_open(BlockDriverState *bs, QDict *options, int flags,

    bmap_size = header.blocks_in_image * sizeof(uint32_t);
    bmap_size = DIV_ROUND_UP(bmap_size, SECTOR_SIZE);
-    s->bmap = qemu_try_blockalign(bs->file, bmap_size * SECTOR_SIZE);
+    s->bmap = qemu_try_blockalign(bs->file->bs, bmap_size * SECTOR_SIZE);
    if (s->bmap == NULL) {
        ret = -ENOMEM;
        goto fail;
    }

-    ret = bdrv_read(bs->file, s->bmap_sector, (uint8_t *)s->bmap, bmap_size);
+    ret = bdrv_read(bs->file->bs, s->bmap_sector, (uint8_t *)s->bmap,
+                    bmap_size);
    if (ret < 0) {
        goto fail_free_bmap;
    }
@@ -585,7 +586,7 @@ static int vdi_co_read(BlockDriverState *bs,
            uint64_t offset = s->header.offset_data / SECTOR_SIZE +
                              (uint64_t)bmap_entry * s->block_sectors +
                              sector_in_block;
-            ret = bdrv_read(bs->file, offset, buf, n_sectors);
+            ret = bdrv_read(bs->file->bs, offset, buf, n_sectors);
        }
        logout("%u sectors read\n", n_sectors);

@@ -653,7 +654,7 @@ static int vdi_co_write(BlockDriverState *bs,
             * acquire the lock and thus the padded cluster is written before
             * the other coroutines can write to the affected area. */
            qemu_co_mutex_lock(&s->write_lock);
-            ret = bdrv_write(bs->file, offset, block, s->block_sectors);
+            ret = bdrv_write(bs->file->bs, offset, block, s->block_sectors);
            qemu_co_mutex_unlock(&s->write_lock);
        } else {
            uint64_t offset = s->header.offset_data / SECTOR_SIZE +
@@ -669,7 +670,7 @@ static int vdi_co_write(BlockDriverState *bs,
             * that that write operation has returned (there may be other writes
             * in flight, but they do not concern this very operation). */
            qemu_co_mutex_unlock(&s->write_lock);
-            ret = bdrv_write(bs->file, offset, buf, n_sectors);
+            ret = bdrv_write(bs->file->bs, offset, buf, n_sectors);
        }

        nb_sectors -= n_sectors;
@@ -694,7 +695,7 @@ static int vdi_co_write(BlockDriverState *bs,
        assert(VDI_IS_ALLOCATED(bmap_first));
        *header = s->header;
        vdi_header_to_le(header);
-        ret = bdrv_write(bs->file, 0, block, 1);
+        ret = bdrv_write(bs->file->bs, 0, block, 1);
        g_free(block);
        block = NULL;

@@ -712,7 +713,7 @@ static int vdi_co_write(BlockDriverState *bs,
        base = ((uint8_t *)&s->bmap[0]) + bmap_first * SECTOR_SIZE;
        logout("will write %u block map sectors starting from entry %u\n",
               n_sectors, bmap_first);
-        ret = bdrv_write(bs->file, offset, base, n_sectors);
+        ret = bdrv_write(bs->file->bs, offset, base, n_sectors);
    }

    return ret;
--- a/block/vhdx-log.c
+++ b/block/vhdx-log.c
@@ -81,7 +81,7 @@ static int vhdx_log_peek_hdr(BlockDriverState *bs, VHDXLogEntries *log,

    offset = log->offset + read;

-    ret = bdrv_pread(bs->file, offset, hdr, sizeof(VHDXLogEntryHeader));
+    ret = bdrv_pread(bs->file->bs, offset, hdr, sizeof(VHDXLogEntryHeader));
    if (ret < 0) {
        goto exit;
    }
@@ -141,7 +141,7 @@ static int vhdx_log_read_sectors(BlockDriverState *bs, VHDXLogEntries *log,
        }
        offset = log->offset + read;

-        ret = bdrv_pread(bs->file, offset, buffer, VHDX_LOG_SECTOR_SIZE);
+        ret = bdrv_pread(bs->file->bs, offset, buffer, VHDX_LOG_SECTOR_SIZE);
        if (ret < 0) {
            goto exit;
        }
@@ -191,7 +191,8 @@ static int vhdx_log_write_sectors(BlockDriverState *bs, VHDXLogEntries *log,
            /* full */
            break;
        }
-        ret = bdrv_pwrite(bs->file, offset, buffer_tmp, VHDX_LOG_SECTOR_SIZE);
+        ret = bdrv_pwrite(bs->file->bs, offset, buffer_tmp,
+                          VHDX_LOG_SECTOR_SIZE);
        if (ret < 0) {
            goto exit;
        }
@@ -353,7 +354,7 @@ static int vhdx_log_read_desc(BlockDriverState *bs, BDRVVHDXState *s,
    }

    desc_sectors = vhdx_compute_desc_sectors(hdr.descriptor_count);
-    desc_entries = qemu_try_blockalign(bs->file,
+    desc_entries = qemu_try_blockalign(bs->file->bs,
                                       desc_sectors * VHDX_LOG_SECTOR_SIZE);
    if (desc_entries == NULL) {
        ret = -ENOMEM;
@@ -462,7 +463,7 @@ static int vhdx_log_flush_desc(BlockDriverState *bs, VHDXLogDescriptor *desc,

    /* count is only > 1 if we are writing zeroes */
    for (i = 0; i < count; i++) {
-        ret = bdrv_pwrite_sync(bs->file, file_offset, buffer,
+        ret = bdrv_pwrite_sync(bs->file->bs, file_offset, buffer,
                               VHDX_LOG_SECTOR_SIZE);
        if (ret < 0) {
            goto exit;
@@ -509,7 +510,7 @@ static int vhdx_log_flush(BlockDriverState *bs, BDRVVHDXState *s,
        /* if the log shows a FlushedFileOffset larger than our current file
         * size, then that means the file has been truncated / corrupted, and
         * we must refused to open it / use it */
-        if (hdr_tmp.flushed_file_offset > bdrv_getlength(bs->file)) {
+        if (hdr_tmp.flushed_file_offset > bdrv_getlength(bs->file->bs)) {
            ret = -EINVAL;
            goto exit;
        }
@@ -539,12 +540,12 @@ static int vhdx_log_flush(BlockDriverState *bs, BDRVVHDXState *s,
                goto exit;
            }
        }
-        if (bdrv_getlength(bs->file) < desc_entries->hdr.last_file_offset) {
+        if (bdrv_getlength(bs->file->bs) < desc_entries->hdr.last_file_offset) {
            new_file_size = desc_entries->hdr.last_file_offset;
            if (new_file_size % (1024*1024)) {
                /* round up to nearest 1MB boundary */
                new_file_size = ((new_file_size >> 20) + 1) << 20;
-                bdrv_truncate(bs->file, new_file_size);
+                bdrv_truncate(bs->file->bs, new_file_size);
            }
        }
        qemu_vfree(desc_entries);
@@ -908,8 +909,8 @@ static int vhdx_log_write(BlockDriverState *bs, BDRVVHDXState *s,
                .sequence_number     = s->log.sequence,
                .descriptor_count    = sectors,
                .reserved            = 0,
-                .flushed_file_offset = bdrv_getlength(bs->file),
-                .last_file_offset    = bdrv_getlength(bs->file),
+                .flushed_file_offset = bdrv_getlength(bs->file->bs),
+                .last_file_offset    = bdrv_getlength(bs->file->bs),
              };

    new_hdr.log_guid = header->log_guid;
@@ -940,7 +941,7 @@ static int vhdx_log_write(BlockDriverState *bs, BDRVVHDXState *s,

        if (i == 0 && leading_length) {
            /* partial sector at the front of the buffer */
-            ret = bdrv_pread(bs->file, file_offset, merged_sector,
+            ret = bdrv_pread(bs->file->bs, file_offset, merged_sector,
                             VHDX_LOG_SECTOR_SIZE);
            if (ret < 0) {
                goto exit;
@@ -950,7 +951,7 @@ static int vhdx_log_write(BlockDriverState *bs, BDRVVHDXState *s,
            sector_write = merged_sector;
        } else if (i == sectors - 1 && trailing_length) {
            /* partial sector at the end of the buffer */
-            ret = bdrv_pread(bs->file,
+            ret = bdrv_pread(bs->file->bs,
                            file_offset,
                            merged_sector + trailing_length,
                            VHDX_LOG_SECTOR_SIZE - trailing_length);
--- a/block/vhdx.c
+++ b/block/vhdx.c
@@ -375,7 +375,7 @@ static int vhdx_update_header(BlockDriverState *bs, BDRVVHDXState *s,
        inactive_header->log_guid = *log_guid;
    }

-    ret = vhdx_write_header(bs->file, inactive_header, header_offset, true);
+    ret = vhdx_write_header(bs->file->bs, inactive_header, header_offset, true);
    if (ret < 0) {
        goto exit;
    }
@@ -427,7 +427,8 @@ static void vhdx_parse_header(BlockDriverState *bs, BDRVVHDXState *s,
    /* We have to read the whole VHDX_HEADER_SIZE instead of
     * sizeof(VHDXHeader), because the checksum is over the whole
     * region */
-    ret = bdrv_pread(bs->file, VHDX_HEADER1_OFFSET, buffer, VHDX_HEADER_SIZE);
+    ret = bdrv_pread(bs->file->bs, VHDX_HEADER1_OFFSET, buffer,
+                     VHDX_HEADER_SIZE);
    if (ret < 0) {
        goto fail;
    }
@@ -443,7 +444,8 @@ static void vhdx_parse_header(BlockDriverState *bs, BDRVVHDXState *s,
        }
    }

-    ret = bdrv_pread(bs->file, VHDX_HEADER2_OFFSET, buffer, VHDX_HEADER_SIZE);
+    ret = bdrv_pread(bs->file->bs, VHDX_HEADER2_OFFSET, buffer,
+                     VHDX_HEADER_SIZE);
    if (ret < 0) {
        goto fail;
    }
@@ -516,7 +518,7 @@ static int vhdx_open_region_tables(BlockDriverState *bs, BDRVVHDXState *s)
     * whole block */
    buffer = qemu_blockalign(bs, VHDX_HEADER_BLOCK_SIZE);

-    ret = bdrv_pread(bs->file, VHDX_REGION_TABLE_OFFSET, buffer,
+    ret = bdrv_pread(bs->file->bs, VHDX_REGION_TABLE_OFFSET, buffer,
                     VHDX_HEADER_BLOCK_SIZE);
    if (ret < 0) {
        goto fail;
@@ -629,7 +631,7 @@ static int vhdx_parse_metadata(BlockDriverState *bs, BDRVVHDXState *s)

    buffer = qemu_blockalign(bs, VHDX_METADATA_TABLE_MAX_SIZE);

-    ret = bdrv_pread(bs->file, s->metadata_rt.file_offset, buffer,
+    ret = bdrv_pread(bs->file->bs, s->metadata_rt.file_offset, buffer,
                     VHDX_METADATA_TABLE_MAX_SIZE);
    if (ret < 0) {
        goto exit;
@@ -732,7 +734,7 @@ static int vhdx_parse_metadata(BlockDriverState *bs, BDRVVHDXState *s)
        goto exit;
    }

-    ret = bdrv_pread(bs->file,
+    ret = bdrv_pread(bs->file->bs,
                     s->metadata_entries.file_parameters_entry.offset
                                         + s->metadata_rt.file_offset,
                     &s->params,
@@ -767,7 +769,7 @@ static int vhdx_parse_metadata(BlockDriverState *bs, BDRVVHDXState *s)
    /* determine virtual disk size, logical sector size,
     * and phys sector size */

-    ret = bdrv_pread(bs->file,
+    ret = bdrv_pread(bs->file->bs,
                     s->metadata_entries.virtual_disk_size_entry.offset
                                           + s->metadata_rt.file_offset,
                     &s->virtual_disk_size,
@@ -775,7 +777,7 @@ static int vhdx_parse_metadata(BlockDriverState *bs, BDRVVHDXState *s)
    if (ret < 0) {
        goto exit;
    }
-    ret = bdrv_pread(bs->file,
+    ret = bdrv_pread(bs->file->bs,
                     s->metadata_entries.logical_sector_size_entry.offset
                                             + s->metadata_rt.file_offset,
                     &s->logical_sector_size,
@@ -783,7 +785,7 @@ static int vhdx_parse_metadata(BlockDriverState *bs, BDRVVHDXState *s)
    if (ret < 0) {
        goto exit;
    }
-    ret = bdrv_pread(bs->file,
+    ret = bdrv_pread(bs->file->bs,
                     s->metadata_entries.phys_sector_size_entry.offset
                                          + s->metadata_rt.file_offset,
                     &s->physical_sector_size,
@@ -906,7 +908,7 @@ static int vhdx_open(BlockDriverState *bs, QDict *options, int flags,
    QLIST_INIT(&s->regions);

    /* validate the file signature */
-    ret = bdrv_pread(bs->file, 0, &signature, sizeof(uint64_t));
+    ret = bdrv_pread(bs->file->bs, 0, &signature, sizeof(uint64_t));
    if (ret < 0) {
        goto fail;
    }
@@ -959,13 +961,13 @@ static int vhdx_open(BlockDriverState *bs, QDict *options, int flags,
    }

    /* s->bat is freed in vhdx_close() */
-    s->bat = qemu_try_blockalign(bs->file, s->bat_rt.length);
+    s->bat = qemu_try_blockalign(bs->file->bs, s->bat_rt.length);
    if (s->bat == NULL) {
        ret = -ENOMEM;
        goto fail;
    }

-    ret = bdrv_pread(bs->file, s->bat_offset, s->bat, s->bat_rt.length);
+    ret = bdrv_pread(bs->file->bs, s->bat_offset, s->bat, s->bat_rt.length);
    if (ret < 0) {
        goto fail;
    }
@@ -1118,7 +1120,7 @@ static coroutine_fn int vhdx_co_readv(BlockDriverState *bs, int64_t sector_num,
                break;
            case PAYLOAD_BLOCK_FULLY_PRESENT:
                qemu_co_mutex_unlock(&s->lock);
-                ret = bdrv_co_readv(bs->file,
+                ret = bdrv_co_readv(bs->file->bs,
                                    sinfo.file_offset >> BDRV_SECTOR_BITS,
                                    sinfo.sectors_avail, &hd_qiov);
                qemu_co_mutex_lock(&s->lock);
@@ -1156,12 +1158,12 @@ exit:
 static int vhdx_allocate_block(BlockDriverState *bs, BDRVVHDXState *s,
                                    uint64_t *new_offset)
 {
-    *new_offset = bdrv_getlength(bs->file);
+    *new_offset = bdrv_getlength(bs->file->bs);

    /* per the spec, the address for a block is in units of 1MB */
    *new_offset = ROUND_UP(*new_offset, 1024 * 1024);

-    return bdrv_truncate(bs->file, *new_offset + s->block_size);
+    return bdrv_truncate(bs->file->bs, *new_offset + s->block_size);
 }

 /*
@@ -1260,7 +1262,7 @@ static coroutine_fn int vhdx_co_writev(BlockDriverState *bs, int64_t sector_num,
                /* Queue another write of zero buffers if the underlying file
                 * does not zero-fill on file extension */

-                if (bdrv_has_zero_init(bs->file) == 0) {
+                if (bdrv_has_zero_init(bs->file->bs) == 0) {
                    use_zero_buffers = true;

                    /* zero fill the front, if any */
@@ -1327,7 +1329,7 @@ static coroutine_fn int vhdx_co_writev(BlockDriverState *bs, int64_t sector_num,
                }
                /* block exists, so we can just overwrite it */
                qemu_co_mutex_unlock(&s->lock);
-                ret = bdrv_co_writev(bs->file,
+                ret = bdrv_co_writev(bs->file->bs,
                                    sinfo.file_offset >> BDRV_SECTOR_BITS,
                                    sectors_to_write, &hd_qiov);
                qemu_co_mutex_lock(&s->lock);
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -87,7 +87,7 @@ typedef struct {
 #define L2_CACHE_SIZE 16

 typedef struct VmdkExtent {
-    BlockDriverState *file;
+    BdrvChild *file;
    bool flat;
    bool compressed;
    bool has_marker;
@@ -222,7 +222,7 @@ static void vmdk_free_extents(BlockDriverState *bs)
        g_free(e->l1_backup_table);
        g_free(e->type);
        if (e->file != bs->file) {
-            bdrv_unref(e->file);
+            bdrv_unref_child(bs, e->file);
        }
    }
    g_free(s->extents);
@@ -248,7 +248,7 @@ static uint32_t vmdk_read_cid(BlockDriverState *bs, int parent)
    BDRVVmdkState *s = bs->opaque;
    int ret;

-    ret = bdrv_pread(bs->file, s->desc_offset, desc, DESC_SIZE);
+    ret = bdrv_pread(bs->file->bs, s->desc_offset, desc, DESC_SIZE);
    if (ret < 0) {
        return 0;
    }
@@ -278,7 +278,7 @@ static int vmdk_write_cid(BlockDriverState *bs, uint32_t cid)
    BDRVVmdkState *s = bs->opaque;
    int ret;

-    ret = bdrv_pread(bs->file, s->desc_offset, desc, DESC_SIZE);
+    ret = bdrv_pread(bs->file->bs, s->desc_offset, desc, DESC_SIZE);
    if (ret < 0) {
        return ret;
    }
@@ -297,7 +297,7 @@ static int vmdk_write_cid(BlockDriverState *bs, uint32_t cid)
        pstrcat(desc, sizeof(desc), tmp_desc);
    }

-    ret = bdrv_pwrite_sync(bs->file, s->desc_offset, desc, DESC_SIZE);
+    ret = bdrv_pwrite_sync(bs->file->bs, s->desc_offset, desc, DESC_SIZE);
    if (ret < 0) {
        return ret;
    }
@@ -308,10 +308,11 @@ static int vmdk_write_cid(BlockDriverState *bs, uint32_t cid)
 static int vmdk_is_cid_valid(BlockDriverState *bs)
 {
    BDRVVmdkState *s = bs->opaque;
-    BlockDriverState *p_bs = bs->backing_hd;
    uint32_t cur_pcid;

-    if (!s->cid_checked && p_bs) {
+    if (!s->cid_checked && bs->backing) {
+        BlockDriverState *p_bs = bs->backing->bs;
+
        cur_pcid = vmdk_read_cid(p_bs, 0);
        if (s->parent_cid != cur_pcid) {
            /* CID not valid */
@@ -340,7 +341,7 @@ static int vmdk_parent_open(BlockDriverState *bs)
    int ret;

    desc[DESC_SIZE] = '\0';
-    ret = bdrv_pread(bs->file, s->desc_offset, desc, DESC_SIZE);
+    ret = bdrv_pread(bs->file->bs, s->desc_offset, desc, DESC_SIZE);
    if (ret < 0) {
        return ret;
    }
@@ -367,7 +368,7 @@ static int vmdk_parent_open(BlockDriverState *bs)
 /* Create and append extent to the extent array. Return the added VmdkExtent
 * address. return NULL if allocation failed. */
 static int vmdk_add_extent(BlockDriverState *bs,
-                           BlockDriverState *file, bool flat, int64_t sectors,
+                           BdrvChild *file, bool flat, int64_t sectors,
                           int64_t l1_offset, int64_t l1_backup_offset,
                           uint32_t l1_size,
                           int l2_size, uint64_t cluster_sectors,
@@ -392,7 +393,7 @@ static int vmdk_add_extent(BlockDriverState *bs,
        return -EFBIG;
    }

-    nb_sectors = bdrv_nb_sectors(file);
+    nb_sectors = bdrv_nb_sectors(file->bs);
    if (nb_sectors < 0) {
        return nb_sectors;
    }
@@ -439,14 +440,14 @@ static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent,
        return -ENOMEM;
    }

-    ret = bdrv_pread(extent->file,
+    ret = bdrv_pread(extent->file->bs,
                     extent->l1_table_offset,
                     extent->l1_table,
                     l1_size);
    if (ret < 0) {
        error_setg_errno(errp, -ret,
                         "Could not read l1 table from extent '%s'",
-                         extent->file->filename);
+                         extent->file->bs->filename);
        goto fail_l1;
    }
    for (i = 0; i < extent->l1_size; i++) {
@@ -459,14 +460,14 @@ static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent,
            ret = -ENOMEM;
            goto fail_l1;
        }
-        ret = bdrv_pread(extent->file,
+        ret = bdrv_pread(extent->file->bs,
                         extent->l1_backup_table_offset,
                         extent->l1_backup_table,
                         l1_size);
        if (ret < 0) {
            error_setg_errno(errp, -ret,
                             "Could not read l1 backup table from extent '%s'",
-                             extent->file->filename);
+                             extent->file->bs->filename);
            goto fail_l1b;
        }
        for (i = 0; i < extent->l1_size; i++) {
@@ -485,7 +486,7 @@ static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent,
 }

 static int vmdk_open_vmfs_sparse(BlockDriverState *bs,
-                                 BlockDriverState *file,
+                                 BdrvChild *file,
                                 int flags, Error **errp)
 {
    int ret;
@@ -493,11 +494,11 @@ static int vmdk_open_vmfs_sparse(BlockDriverState *bs,
    VMDK3Header header;
    VmdkExtent *extent;

-    ret = bdrv_pread(file, sizeof(magic), &header, sizeof(header));
+    ret = bdrv_pread(file->bs, sizeof(magic), &header, sizeof(header));
    if (ret < 0) {
        error_setg_errno(errp, -ret,
                         "Could not read header from file '%s'",
-                         file->filename);
+                         file->bs->filename);
        return ret;
    }
    ret = vmdk_add_extent(bs, file, false,
@@ -559,7 +560,7 @@ static char *vmdk_read_desc(BlockDriverState *file, uint64_t desc_offset,
 }

 static int vmdk_open_vmdk4(BlockDriverState *bs,
-                           BlockDriverState *file,
+                           BdrvChild *file,
                           int flags, QDict *options, Error **errp)
 {
    int ret;
@@ -570,17 +571,17 @@ static int vmdk_open_vmdk4(BlockDriverState *bs,
    BDRVVmdkState *s = bs->opaque;
    int64_t l1_backup_offset = 0;

-    ret = bdrv_pread(file, sizeof(magic), &header, sizeof(header));
+    ret = bdrv_pread(file->bs, sizeof(magic), &header, sizeof(header));
    if (ret < 0) {
        error_setg_errno(errp, -ret,
                         "Could not read header from file '%s'",
-                         file->filename);
+                         file->bs->filename);
        return -EINVAL;
    }
    if (header.capacity == 0) {
        uint64_t desc_offset = le64_to_cpu(header.desc_offset);
        if (desc_offset) {
-            char *buf = vmdk_read_desc(file, desc_offset << 9, errp);
+            char *buf = vmdk_read_desc(file->bs, desc_offset << 9, errp);
            if (!buf) {
                return -EINVAL;
            }
@@ -620,8 +621,8 @@ static int vmdk_open_vmdk4(BlockDriverState *bs,
            } QEMU_PACKED eos_marker;
        } QEMU_PACKED footer;

-        ret = bdrv_pread(file,
-            bs->file->total_sectors * 512 - 1536,
+        ret = bdrv_pread(file->bs,
+            bs->file->bs->total_sectors * 512 - 1536,
            &footer, sizeof(footer));
        if (ret < 0) {
            error_setg_errno(errp, -ret, "Failed to read footer");
@@ -675,7 +676,7 @@ static int vmdk_open_vmdk4(BlockDriverState *bs,
    if (le32_to_cpu(header.flags) & VMDK4_FLAG_RGD) {
        l1_backup_offset = le64_to_cpu(header.rgd_offset) << 9;
    }
-    if (bdrv_nb_sectors(file) < le64_to_cpu(header.grain_offset)) {
+    if (bdrv_nb_sectors(file->bs) < le64_to_cpu(header.grain_offset)) {
        error_setg(errp, "File truncated, expecting at least %" PRId64 " bytes",
                   (int64_t)(le64_to_cpu(header.grain_offset)
                             * BDRV_SECTOR_SIZE));
@@ -739,8 +740,7 @@ static int vmdk_parse_description(const char *desc, const char *opt_name,
 }

 /* Open an extent file and append to bs array */
-static int vmdk_open_sparse(BlockDriverState *bs,
-                            BlockDriverState *file, int flags,
+static int vmdk_open_sparse(BlockDriverState *bs, BdrvChild *file, int flags,
                            char *buf, QDict *options, Error **errp)
 {
    uint32_t magic;
@@ -773,10 +773,11 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
    int64_t sectors = 0;
    int64_t flat_offset;
    char *extent_path;
-    BlockDriverState *extent_file;
+    BdrvChild *extent_file;
    BDRVVmdkState *s = bs->opaque;
    VmdkExtent *extent;
    char extent_opt_prefix[32];
+    Error *local_err = NULL;

    while (*p) {
        /* parse extent line in one of below formats:
@@ -819,22 +820,22 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
            !desc_file_path[0])
        {
            error_setg(errp, "Cannot use relative extent paths with VMDK "
-                       "descriptor file '%s'", bs->file->filename);
+                       "descriptor file '%s'", bs->file->bs->filename);
            return -EINVAL;
        }

        extent_path = g_malloc0(PATH_MAX);
        path_combine(extent_path, PATH_MAX, desc_file_path, fname);
-        extent_file = NULL;

        ret = snprintf(extent_opt_prefix, 32, "extents.%d", s->num_extents);
        assert(ret < 32);

-        ret = bdrv_open_image(&extent_file, extent_path, options,
-                              extent_opt_prefix, bs, &child_file, false, errp);
+        extent_file = bdrv_open_child(extent_path, options, extent_opt_prefix,
+                                      bs, &child_file, false, &local_err);
        g_free(extent_path);
-        if (ret) {
-            return ret;
+        if (local_err) {
+            error_propagate(errp, local_err);
+            return -EINVAL;
        }

        /* save to extents array */
@@ -844,13 +845,13 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
            ret = vmdk_add_extent(bs, extent_file, true, sectors,
                            0, 0, 0, 0, 0, &extent, errp);
            if (ret < 0) {
-                bdrv_unref(extent_file);
+                bdrv_unref_child(bs, extent_file);
                return ret;
            }
            extent->flat_start_offset = flat_offset << 9;
        } else if (!strcmp(type, "SPARSE") || !strcmp(type, "VMFSSPARSE")) {
            /* SPARSE extent and VMFSSPARSE extent are both "COWD" sparse file*/
-            char *buf = vmdk_read_desc(extent_file, 0, errp);
+            char *buf = vmdk_read_desc(extent_file->bs, 0, errp);
            if (!buf) {
                ret = -EINVAL;
            } else {
@@ -859,13 +860,13 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
            }
            g_free(buf);
            if (ret) {
-                bdrv_unref(extent_file);
+                bdrv_unref_child(bs, extent_file);
                return ret;
            }
            extent = &s->extents[s->num_extents - 1];
        } else {
            error_setg(errp, "Unsupported extent type '%s'", type);
-            bdrv_unref(extent_file);
+            bdrv_unref_child(bs, extent_file);
            return -ENOTSUP;
        }
        extent->type = g_strdup(type);
@@ -905,7 +906,8 @@ static int vmdk_open_desc_file(BlockDriverState *bs, int flags, char *buf,
    }
    s->create_type = g_strdup(ct);
    s->desc_offset = 0;
-    ret = vmdk_parse_extents(buf, bs, bs->file->exact_filename, options, errp);
+    ret = vmdk_parse_extents(buf, bs, bs->file->bs->exact_filename, options,
+                             errp);
 exit:
    return ret;
 }
@@ -918,7 +920,7 @@ static int vmdk_open(BlockDriverState *bs, QDict *options, int flags,
    BDRVVmdkState *s = bs->opaque;
    uint32_t magic;

-    buf = vmdk_read_desc(bs->file, 0, errp);
+    buf = vmdk_read_desc(bs->file->bs, 0, errp);
    if (!buf) {
        return -EINVAL;
    }
@@ -927,7 +929,8 @@ static int vmdk_open(BlockDriverState *bs, QDict *options, int flags,
    switch (magic) {
        case VMDK3_MAGIC:
        case VMDK4_MAGIC:
-            ret = vmdk_open_sparse(bs, bs->file, flags, buf, options, errp);
+            ret = vmdk_open_sparse(bs, bs->file, flags, buf, options,
+                                   errp);
            s->desc_offset = 0x200;
            break;
        default:
@@ -1004,7 +1007,7 @@ static int get_whole_cluster(BlockDriverState *bs,
    cluster_bytes = extent->cluster_sectors << BDRV_SECTOR_BITS;
    whole_grain = qemu_blockalign(bs, cluster_bytes);

-    if (!bs->backing_hd) {
+    if (!bs->backing) {
        memset(whole_grain, 0,  skip_start_sector << BDRV_SECTOR_BITS);
        memset(whole_grain + (skip_end_sector << BDRV_SECTOR_BITS), 0,
               cluster_bytes - (skip_end_sector << BDRV_SECTOR_BITS));
@@ -1013,22 +1016,22 @@ static int get_whole_cluster(BlockDriverState *bs,
    assert(skip_end_sector <= extent->cluster_sectors);
    /* we will be here if it's first write on non-exist grain(cluster).
     * try to read from parent image, if exist */
-    if (bs->backing_hd && !vmdk_is_cid_valid(bs)) {
+    if (bs->backing && !vmdk_is_cid_valid(bs)) {
        ret = VMDK_ERROR;
        goto exit;
    }

    /* Read backing data before skip range */
    if (skip_start_sector > 0) {
-        if (bs->backing_hd) {
-            ret = bdrv_read(bs->backing_hd, sector_num,
+        if (bs->backing) {
+            ret = bdrv_read(bs->backing->bs, sector_num,
                            whole_grain, skip_start_sector);
            if (ret < 0) {
                ret = VMDK_ERROR;
                goto exit;
            }
        }
-        ret = bdrv_write(extent->file, cluster_sector_num, whole_grain,
+        ret = bdrv_write(extent->file->bs, cluster_sector_num, whole_grain,
                         skip_start_sector);
        if (ret < 0) {
            ret = VMDK_ERROR;
@@ -1037,8 +1040,8 @@ static int get_whole_cluster(BlockDriverState *bs,
    }
    /* Read backing data after skip range */
    if (skip_end_sector < extent->cluster_sectors) {
-        if (bs->backing_hd) {
-            ret = bdrv_read(bs->backing_hd, sector_num + skip_end_sector,
+        if (bs->backing) {
+            ret = bdrv_read(bs->backing->bs, sector_num + skip_end_sector,
                            whole_grain + (skip_end_sector << BDRV_SECTOR_BITS),
                            extent->cluster_sectors - skip_end_sector);
            if (ret < 0) {
@@ -1046,7 +1049,7 @@ static int get_whole_cluster(BlockDriverState *bs,
                goto exit;
            }
        }
-        ret = bdrv_write(extent->file, cluster_sector_num + skip_end_sector,
+        ret = bdrv_write(extent->file->bs, cluster_sector_num + skip_end_sector,
                         whole_grain + (skip_end_sector << BDRV_SECTOR_BITS),
                         extent->cluster_sectors - skip_end_sector);
        if (ret < 0) {
@@ -1066,7 +1069,7 @@ static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data,
    offset = cpu_to_le32(offset);
    /* update L2 table */
    if (bdrv_pwrite_sync(
-                extent->file,
+                extent->file->bs,
                ((int64_t)m_data->l2_offset * 512)
                    + (m_data->l2_index * sizeof(offset)),
                &offset, sizeof(offset)) < 0) {
@@ -1076,7 +1079,7 @@ static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data,
    if (extent->l1_backup_table_offset != 0) {
        m_data->l2_offset = extent->l1_backup_table[m_data->l1_index];
        if (bdrv_pwrite_sync(
-                    extent->file,
+                    extent->file->bs,
                    ((int64_t)m_data->l2_offset * 512)
                        + (m_data->l2_index * sizeof(offset)),
                    &offset, sizeof(offset)) < 0) {
@@ -1166,7 +1169,7 @@ static int get_cluster_offset(BlockDriverState *bs,
    }
    l2_table = extent->l2_cache + (min_index * extent->l2_size);
    if (bdrv_pread(
-                extent->file,
+                extent->file->bs,
                (int64_t)l2_offset * 512,
                l2_table,
                extent->l2_size * sizeof(uint32_t)
@@ -1320,7 +1323,7 @@ static int vmdk_write_extent(VmdkExtent *extent, int64_t cluster_offset,
        write_len = buf_len + sizeof(VmdkGrainMarker);
    }
    write_offset = cluster_offset + offset_in_cluster,
-    ret = bdrv_pwrite(extent->file, write_offset, write_buf, write_len);
+    ret = bdrv_pwrite(extent->file->bs, write_offset, write_buf, write_len);

    write_end_sector = DIV_ROUND_UP(write_offset + write_len, BDRV_SECTOR_SIZE);

@@ -1355,7 +1358,7 @@ static int vmdk_read_extent(VmdkExtent *extent, int64_t cluster_offset,


    if (!extent->compressed) {
-        ret = bdrv_pread(extent->file,
+        ret = bdrv_pread(extent->file->bs,
                          cluster_offset + offset_in_cluster,
                          buf, nb_sectors * 512);
        if (ret == nb_sectors * 512) {
@@ -1369,7 +1372,7 @@ static int vmdk_read_extent(VmdkExtent *extent, int64_t cluster_offset,
    buf_bytes = cluster_bytes * 2;
    cluster_buf = g_malloc(buf_bytes);
    uncomp_buf = g_malloc(cluster_bytes);
-    ret = bdrv_pread(extent->file,
+    ret = bdrv_pread(extent->file->bs,
                cluster_offset,
                cluster_buf, buf_bytes);
    if (ret < 0) {
@@ -1431,11 +1434,11 @@ static int vmdk_read(BlockDriverState *bs, int64_t sector_num,
        }
        if (ret != VMDK_OK) {
            /* if not allocated, try to read from parent image, if exist */
-            if (bs->backing_hd && ret != VMDK_ZEROED) {
+            if (bs->backing && ret != VMDK_ZEROED) {
                if (!vmdk_is_cid_valid(bs)) {
                    return -EINVAL;
                }
-                ret = bdrv_read(bs->backing_hd, sector_num, buf, n);
+                ret = bdrv_read(bs->backing->bs, sector_num, buf, n);
                if (ret < 0) {
                    return ret;
                }
@@ -2035,7 +2038,7 @@ static coroutine_fn int vmdk_co_flush(BlockDriverState *bs)
    int ret = 0;

    for (i = 0; i < s->num_extents; i++) {
-        err = bdrv_co_flush(s->extents[i].file);
+        err = bdrv_co_flush(s->extents[i].file->bs);
        if (err < 0) {
            ret = err;
        }
@@ -2050,7 +2053,7 @@ static int64_t vmdk_get_allocated_file_size(BlockDriverState *bs)
    int64_t r;
    BDRVVmdkState *s = bs->opaque;

-    ret = bdrv_get_allocated_file_size(bs->file);
+    ret = bdrv_get_allocated_file_size(bs->file->bs);
    if (ret < 0) {
        return ret;
    }
@@ -2058,7 +2061,7 @@ static int64_t vmdk_get_allocated_file_size(BlockDriverState *bs)
        if (s->extents[i].file == bs->file) {
            continue;
        }
-        r = bdrv_get_allocated_file_size(s->extents[i].file);
+        r = bdrv_get_allocated_file_size(s->extents[i].file->bs);
        if (r < 0) {
            return r;
        }
@@ -2076,7 +2079,7 @@ static int vmdk_has_zero_init(BlockDriverState *bs)
     * return 0. */
    for (i = 0; i < s->num_extents; i++) {
        if (s->extents[i].flat) {
-            if (!bdrv_has_zero_init(s->extents[i].file)) {
+            if (!bdrv_has_zero_init(s->extents[i].file->bs)) {
                return 0;
            }
        }
@@ -2089,7 +2092,7 @@ static ImageInfo *vmdk_get_extent_info(VmdkExtent *extent)
    ImageInfo *info = g_new0(ImageInfo, 1);

    *info = (ImageInfo){
-        .filename         = g_strdup(extent->file->filename),
+        .filename         = g_strdup(extent->file->bs->filename),
        .format           = g_strdup(extent->type),
        .virtual_size     = extent->sectors * BDRV_SECTOR_SIZE,
        .compressed       = extent->compressed,
@@ -2135,7 +2138,9 @@ static int vmdk_check(BlockDriverState *bs, BdrvCheckResult *result,
                    PRId64 "\n", sector_num);
            break;
        }
-        if (ret == VMDK_OK && cluster_offset >= bdrv_getlength(extent->file)) {
+        if (ret == VMDK_OK &&
+            cluster_offset >= bdrv_getlength(extent->file->bs))
+        {
            fprintf(stderr,
                    "ERROR: cluster offset for sector %"
                    PRId64 " points after EOF\n", sector_num);
@@ -2211,7 +2216,7 @@ static void vmdk_detach_aio_context(BlockDriverState *bs)
    int i;

    for (i = 0; i < s->num_extents; i++) {
-        bdrv_detach_aio_context(s->extents[i].file);
+        bdrv_detach_aio_context(s->extents[i].file->bs);
    }
 }

@@ -2222,7 +2227,7 @@ static void vmdk_attach_aio_context(BlockDriverState *bs,
    int i;

    for (i = 0; i < s->num_extents; i++) {
-        bdrv_attach_aio_context(s->extents[i].file, new_context);
+        bdrv_attach_aio_context(s->extents[i].file->bs, new_context);
    }
 }

--- a/block/vpc.c
+++ b/block/vpc.c
@@ -172,14 +172,14 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
    int disk_type = VHD_DYNAMIC;
    int ret;

-    ret = bdrv_pread(bs->file, 0, s->footer_buf, HEADER_SIZE);
+    ret = bdrv_pread(bs->file->bs, 0, s->footer_buf, HEADER_SIZE);
    if (ret < 0) {
        goto fail;
    }

    footer = (VHDFooter *) s->footer_buf;
    if (strncmp(footer->creator, "conectix", 8)) {
-        int64_t offset = bdrv_getlength(bs->file);
+        int64_t offset = bdrv_getlength(bs->file->bs);
        if (offset < 0) {
            ret = offset;
            goto fail;
@@ -189,7 +189,7 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
        }

        /* If a fixed disk, the footer is found only at the end of the file */
-        ret = bdrv_pread(bs->file, offset-HEADER_SIZE, s->footer_buf,
+        ret = bdrv_pread(bs->file->bs, offset-HEADER_SIZE, s->footer_buf,
                         HEADER_SIZE);
        if (ret < 0) {
            goto fail;
@@ -232,7 +232,7 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
    }

    if (disk_type == VHD_DYNAMIC) {
-        ret = bdrv_pread(bs->file, be64_to_cpu(footer->data_offset), buf,
+        ret = bdrv_pread(bs->file->bs, be64_to_cpu(footer->data_offset), buf,
                         HEADER_SIZE);
        if (ret < 0) {
            goto fail;
@@ -280,7 +280,7 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,

        pagetable_size = (uint64_t) s->max_table_entries * 4;

-        s->pagetable = qemu_try_blockalign(bs->file, pagetable_size);
+        s->pagetable = qemu_try_blockalign(bs->file->bs, pagetable_size);
        if (s->pagetable == NULL) {
            ret = -ENOMEM;
            goto fail;
@@ -288,7 +288,8 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,

        s->bat_offset = be64_to_cpu(dyndisk_header->table_offset);

-        ret = bdrv_pread(bs->file, s->bat_offset, s->pagetable, pagetable_size);
+        ret = bdrv_pread(bs->file->bs, s->bat_offset, s->pagetable,
+                         pagetable_size);
        if (ret < 0) {
            goto fail;
        }
@@ -308,7 +309,7 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
            }
        }

-        if (s->free_data_block_offset > bdrv_getlength(bs->file)) {
+        if (s->free_data_block_offset > bdrv_getlength(bs->file->bs)) {
            error_setg(errp, "block-vpc: free_data_block_offset points after "
                             "the end of file. The image has been truncated.");
            ret = -EINVAL;
@@ -383,7 +384,7 @@ static inline int64_t get_sector_offset(BlockDriverState *bs,

        s->last_bitmap_offset = bitmap_offset;
        memset(bitmap, 0xff, s->bitmap_size);
-        bdrv_pwrite_sync(bs->file, bitmap_offset, bitmap, s->bitmap_size);
+        bdrv_pwrite_sync(bs->file->bs, bitmap_offset, bitmap, s->bitmap_size);
    }

    return block_offset;
@@ -401,7 +402,7 @@ static int rewrite_footer(BlockDriverState* bs)
    BDRVVPCState *s = bs->opaque;
    int64_t offset = s->free_data_block_offset;

-    ret = bdrv_pwrite_sync(bs->file, offset, s->footer_buf, HEADER_SIZE);
+    ret = bdrv_pwrite_sync(bs->file->bs, offset, s->footer_buf, HEADER_SIZE);
    if (ret < 0)
        return ret;

@@ -436,7 +437,7 @@ static int64_t alloc_block(BlockDriverState* bs, int64_t sector_num)

    // Initialize the block's bitmap
    memset(bitmap, 0xff, s->bitmap_size);
-    ret = bdrv_pwrite_sync(bs->file, s->free_data_block_offset, bitmap,
+    ret = bdrv_pwrite_sync(bs->file->bs, s->free_data_block_offset, bitmap,
        s->bitmap_size);
    if (ret < 0) {
        return ret;
@@ -451,7 +452,7 @@ static int64_t alloc_block(BlockDriverState* bs, int64_t sector_num)
    // Write BAT entry to disk
    bat_offset = s->bat_offset + (4 * index);
    bat_value = cpu_to_be32(s->pagetable[index]);
-    ret = bdrv_pwrite_sync(bs->file, bat_offset, &bat_value, 4);
+    ret = bdrv_pwrite_sync(bs->file->bs, bat_offset, &bat_value, 4);
    if (ret < 0)
        goto fail;

@@ -485,7 +486,7 @@ static int vpc_read(BlockDriverState *bs, int64_t sector_num,
    VHDFooter *footer = (VHDFooter *) s->footer_buf;

    if (be32_to_cpu(footer->type) == VHD_FIXED) {
-        return bdrv_read(bs->file, sector_num, buf, nb_sectors);
+        return bdrv_read(bs->file->bs, sector_num, buf, nb_sectors);
    }
    while (nb_sectors > 0) {
        offset = get_sector_offset(bs, sector_num, 0);
@@ -499,7 +500,7 @@ static int vpc_read(BlockDriverState *bs, int64_t sector_num,
        if (offset == -1) {
            memset(buf, 0, sectors * BDRV_SECTOR_SIZE);
        } else {
-            ret = bdrv_pread(bs->file, offset, buf,
+            ret = bdrv_pread(bs->file->bs, offset, buf,
                sectors * BDRV_SECTOR_SIZE);
            if (ret != sectors * BDRV_SECTOR_SIZE) {
                return -1;
@@ -534,7 +535,7 @@ static int vpc_write(BlockDriverState *bs, int64_t sector_num,
    VHDFooter *footer =  (VHDFooter *) s->footer_buf;

    if (be32_to_cpu(footer->type) == VHD_FIXED) {
-        return bdrv_write(bs->file, sector_num, buf, nb_sectors);
+        return bdrv_write(bs->file->bs, sector_num, buf, nb_sectors);
    }
    while (nb_sectors > 0) {
        offset = get_sector_offset(bs, sector_num, 1);
@@ -551,7 +552,8 @@ static int vpc_write(BlockDriverState *bs, int64_t sector_num,
                return -1;
        }

-        ret = bdrv_pwrite(bs->file, offset, buf, sectors * BDRV_SECTOR_SIZE);
+        ret = bdrv_pwrite(bs->file->bs, offset, buf,
+                          sectors * BDRV_SECTOR_SIZE);
        if (ret != sectors * BDRV_SECTOR_SIZE) {
            return -1;
        }
@@ -878,7 +880,7 @@ static int vpc_has_zero_init(BlockDriverState *bs)
    VHDFooter *footer =  (VHDFooter *) s->footer_buf;

    if (be32_to_cpu(footer->type) == VHD_FIXED) {
-        return bdrv_has_zero_init(bs->file);
+        return bdrv_has_zero_init(bs->file->bs);
    } else {
        return 1;
    }
--- a/block/vvfat.c
+++ b/block/vvfat.c
@@ -985,12 +985,6 @@ static BDRVVVFATState *vvv = NULL;
 static int enable_write_target(BDRVVVFATState *s, Error **errp);
 static int is_consistent(BDRVVVFATState *s);

-static void vvfat_rebind(BlockDriverState *bs)
-{
-    BDRVVVFATState *s = bs->opaque;
-    s->bs = bs;
-}
-
 static QemuOptsList runtime_opts = {
    .name = "vvfat",
    .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
@@ -2923,6 +2917,7 @@ static BlockDriver vvfat_write_target = {
 static int enable_write_target(BDRVVVFATState *s, Error **errp)
 {
    BlockDriver *bdrv_qcow = NULL;
+    BlockDriverState *backing;
    QemuOpts *opts = NULL;
    int ret;
    int size = sector2cluster(s, s->sector_count);
@@ -2971,10 +2966,13 @@ static int enable_write_target(BDRVVVFATState *s, Error **errp)
    unlink(s->qcow_filename);
 #endif

-    bdrv_set_backing_hd(s->bs, bdrv_new());
-    s->bs->backing_hd->drv = &vvfat_write_target;
-    s->bs->backing_hd->opaque = g_new(void *, 1);
-    *(void**)s->bs->backing_hd->opaque = s;
+    backing = bdrv_new();
+    bdrv_set_backing_hd(s->bs, backing);
+    bdrv_unref(backing);
+
+    s->bs->backing->bs->drv = &vvfat_write_target;
+    s->bs->backing->bs->opaque = g_new(void *, 1);
+    *(void**)s->bs->backing->bs->opaque = s;

    return 0;

@@ -3008,7 +3006,6 @@ static BlockDriver bdrv_vvfat = {
    .bdrv_parse_filename    = vvfat_parse_filename,
    .bdrv_file_open         = vvfat_open,
    .bdrv_close             = vvfat_close,
-    .bdrv_rebind            = vvfat_rebind,

    .bdrv_read              = vvfat_co_read,
    .bdrv_write             = vvfat_co_write,
--- a/blockdev.c
+++ b/blockdev.c
@@ -411,7 +411,6 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts,
        bdrv_flags |= BDRV_O_NO_FLUSH;
    }

-#ifdef CONFIG_LINUX_AIO
    if ((buf = qemu_opt_get(opts, "aio")) != NULL) {
        if (!strcmp(buf, "native")) {
            bdrv_flags |= BDRV_O_NATIVE_AIO;
@@ -422,7 +421,6 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts,
           goto early_err;
        }
    }
-#endif

    if ((buf = qemu_opt_get(opts, "format")) != NULL) {
        if (is_help_option(buf)) {
@@ -1546,7 +1544,7 @@ static void external_snapshot_commit(BlkTransactionState *common)
    /* We don't need (or want) to use the transactional
     * bdrv_reopen_multiple() across all the entries at once, because we
     * don't want to abort all of them if one of them fails the reopen */
-    bdrv_reopen(state->new_bs, state->new_bs->open_flags & ~BDRV_O_RDWR,
+    bdrv_reopen(state->old_bs, state->old_bs->open_flags & ~BDRV_O_RDWR,
                NULL);

    aio_context_release(state->aio_context);
@@ -2508,7 +2506,7 @@ void qmp_drive_backup(const char *device, const char *target,
    /* See if we have a backing HD we can use to create our new image
     * on top of. */
    if (sync == MIRROR_SYNC_MODE_TOP) {
-        source = bs->backing_hd;
+        source = backing_bs(bs);
        if (!source) {
            sync = MIRROR_SYNC_MODE_FULL;
        }
@@ -2716,7 +2714,7 @@ void qmp_drive_mirror(const char *device, const char *target,
    }

    flags = bs->open_flags | BDRV_O_RDWR;
-    source = bs->backing_hd;
+    source = backing_bs(bs);
    if (!source && sync == MIRROR_SYNC_MODE_TOP) {
        sync = MIRROR_SYNC_MODE_FULL;
    }
--- a/blockjob.c
+++ b/blockjob.c
@@ -54,6 +54,7 @@ void *block_job_create(const BlockJobDriver *driver, BlockDriverState *bs,
    bdrv_op_unblock(bs, BLOCK_OP_TYPE_DATAPLANE, job->blocker);

    job->driver        = driver;
+    job->id            = g_strdup(bdrv_get_device_name(bs));
    job->bs            = bs;
    job->cb            = cb;
    job->opaque        = opaque;
@@ -81,6 +82,7 @@ void block_job_release(BlockDriverState *bs)
    bs->job = NULL;
    bdrv_op_unblock_all(bs, job->blocker);
    error_free(job->blocker);
+    g_free(job->id);
    g_free(job);
 }

@@ -113,8 +115,7 @@ void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp)
 void block_job_complete(BlockJob *job, Error **errp)
 {
    if (job->pause_count || job->cancelled || !job->driver->complete) {
-        error_setg(errp, QERR_BLOCK_JOB_NOT_READY,
-                   bdrv_get_device_name(job->bs));
+        error_setg(errp, QERR_BLOCK_JOB_NOT_READY, job->id);
        return;
    }

@@ -269,7 +270,7 @@ BlockJobInfo *block_job_query(BlockJob *job)
 {
    BlockJobInfo *info = g_new0(BlockJobInfo, 1);
    info->type      = g_strdup(BlockJobType_lookup[job->driver->job_type]);
-    info->device    = g_strdup(bdrv_get_device_name(job->bs));
+    info->device    = g_strdup(job->id);
    info->len       = job->len;
    info->busy      = job->busy;
    info->paused    = job->pause_count > 0;
@@ -291,7 +292,7 @@ static void block_job_iostatus_set_err(BlockJob *job, int error)
 void block_job_event_cancelled(BlockJob *job)
 {
    qapi_event_send_block_job_cancelled(job->driver->job_type,
-                                        bdrv_get_device_name(job->bs),
+                                        job->id,
                                        job->len,
                                        job->offset,
                                        job->speed,
@@ -301,7 +302,7 @@ void block_job_event_cancelled(BlockJob *job)
 void block_job_event_completed(BlockJob *job, const char *msg)
 {
    qapi_event_send_block_job_completed(job->driver->job_type,
-                                        bdrv_get_device_name(job->bs),
+                                        job->id,
                                        job->len,
                                        job->offset,
                                        job->speed,
@@ -315,7 +316,7 @@ void block_job_event_ready(BlockJob *job)
    job->ready = true;

    qapi_event_send_block_job_ready(job->driver->job_type,
-                                    bdrv_get_device_name(job->bs),
+                                    job->id,
                                    job->len,
                                    job->offset,
                                    job->speed, &error_abort);
@@ -344,7 +345,7 @@ BlockErrorAction block_job_error_action(BlockJob *job, BlockDriverState *bs,
    default:
        abort();
    }
-    qapi_event_send_block_job_error(bdrv_get_device_name(job->bs),
+    qapi_event_send_block_job_error(job->id,
                                    is_read ? IO_OPERATION_TYPE_READ :
                                    IO_OPERATION_TYPE_WRITE,
                                    action, &error_abort);
--- a/bsd-user/main.c
+++ b/bsd-user/main.c
@@ -108,7 +108,7 @@ void cpu_list_unlock(void)

 uint64_t cpu_get_tsc(CPUX86State *env)
 {
-    return cpu_get_real_ticks();
+    return cpu_get_host_ticks();
 }

 static void write_dt(void *ptr, unsigned long addr, unsigned long limit,
--- a/195
+++ b/195
@@ -242,7 +242,6 @@ vnc="yes"
 sparse="no"
 uuid=""
 vde=""
-vnc_tls=""
 vnc_sasl=""
 vnc_jpeg=""
 vnc_png=""
@@ -303,7 +302,7 @@ trace_backends="nop"
 trace_file="trace"
 spice=""
 rbd=""
-smartcard_nss=""
+smartcard=""
 libusb=""
 usb_redir=""
 opengl=""
@@ -329,9 +328,11 @@ glusterfs_zerofill="no"
 archipelago="no"
 gtk=""
 gtkabi=""
+gtk_gl="no"
 gnutls=""
 gnutls_hash=""
 vte=""
+virglrenderer=""
 tpm="yes"
 libssh2=""
 vhdx=""
@@ -416,6 +417,9 @@ if test "$debug_info" = "yes"; then
    LDFLAGS="-g $LDFLAGS"
 fi

+test_cflags=""
+test_libs=""
+
 # make source path absolute
 source_path=`cd "$source_path"; pwd`

@@ -880,10 +884,6 @@ for opt do
  ;;
  --disable-strip) strip_opt="no"
  ;;
-  --disable-vnc-tls) vnc_tls="no"
-  ;;
-  --enable-vnc-tls) vnc_tls="yes"
-  ;;
  --disable-vnc-sasl) vnc_sasl="no"
  ;;
  --enable-vnc-sasl) vnc_sasl="yes"
@@ -1041,9 +1041,9 @@ for opt do
  ;;
  --enable-xfsctl) xfs="yes"
  ;;
-  --disable-smartcard-nss) smartcard_nss="no"
+  --disable-smartcard) smartcard="no"
  ;;
-  --enable-smartcard-nss) smartcard_nss="yes"
+  --enable-smartcard) smartcard="yes"
  ;;
  --disable-libusb) libusb="no"
  ;;
@@ -1124,6 +1124,10 @@ for opt do
  ;;
  --enable-vte) vte="yes"
  ;;
+  --disable-virglrenderer) virglrenderer="no"
+  ;;
+  --enable-virglrenderer) virglrenderer="yes"
+  ;;
  --disable-tpm) tpm="no"
  ;;
  --enable-tpm) tpm="yes"
@@ -1162,18 +1166,14 @@ fi

 # Note that if the Python conditional here evaluates True we will exit
 # with status 1 which is a shell 'false' value.
-if ! $python -c 'import sys; sys.exit(sys.version_info < (2,4) or sys.version_info >= (3,))'; then
-  error_exit "Cannot use '$python', Python 2.4 or later is required." \
+if ! $python -c 'import sys; sys.exit(sys.version_info < (2,6) or sys.version_info >= (3,))'; then
+  error_exit "Cannot use '$python', Python 2.6 or later is required." \
      "Note that Python 3 or later is not yet supported." \
      "Use --python=/path/to/python to specify a supported Python."
 fi

-# The -B switch was added in Python 2.6.
-# If it is supplied, compiled files are not written.
-# Use it for Python versions which support it.
-if $python -B -c 'import sys; sys.exit(0)' 2>/dev/null; then
-  python="$python -B"
-fi
+# Suppress writing compiled files
+python="$python -B"

 case "$cpu" in
    ppc)
@@ -1356,7 +1356,7 @@ disabled with --disable-FEATURE, default is enabled if available:
  rbd             rados block device (rbd)
  libiscsi        iscsi support
  libnfs          nfs support
-  smartcard-nss   smartcard nss support
+  smartcard       smartcard support (libcacard)
  libusb          libusb (for usb passthrough)
  usb-redir       usb network redirection support
  lzo             support of lzo compression library
@@ -1738,6 +1738,37 @@ else
  l2tpv3=no
 fi

+##########################################
+# MinGW / Mingw-w64 localtime_r/gmtime_r check
+
+if test "$mingw32" = "yes"; then
+    # Some versions of MinGW / Mingw-w64 lack localtime_r
+    # and gmtime_r entirely.
+    #
+    # Some versions of Mingw-w64 define a macro for
+    # localtime_r/gmtime_r.
+    #
+    # Some versions of Mingw-w64 will define functions
+    # for localtime_r/gmtime_r, but only if you have
+    # _POSIX_THREAD_SAFE_FUNCTIONS defined. For fun
+    # though, unistd.h and pthread.h both define
+    # that for you.
+    #
+    # So this #undef localtime_r and #include <unistd.h>
+    # are not in fact redundant.
+cat > $TMPC << EOF
+#include <unistd.h>
+#include <time.h>
+#undef localtime_r
+int main(void) { localtime_r(NULL, NULL); return 0; }
+EOF
+    if compile_prog "" "" ; then
+        localtime_r="yes"
+    else
+        localtime_r="no"
+    fi
+fi
+
 ##########################################
 # pkg-config probe

@@ -2249,6 +2280,19 @@ if test "$gnutls_nettle" != "no"; then
    fi
 fi

+##########################################
+# libtasn1 - only for the TLS creds/session test suite
+
+tasn1=yes
+if $pkg_config --exists "libtasn1"; then
+    tasn1_cflags=`$pkg_config --cflags libtasn1`
+    tasn1_libs=`$pkg_config --libs libtasn1`
+    test_cflags="$test_cflags $tasn1_cflags"
+    test_libs="$test_libs $tasn1_libs"
+else
+    tasn1=no
+fi
+

 ##########################################
 # VTE probe
@@ -2393,28 +2437,6 @@ EOF
  fi
 fi

-##########################################
-# VNC TLS/WS detection
-if test "$vnc" = "yes" -a "$vnc_tls" != "no" ; then
-  cat > $TMPC <<EOF
-#include <gnutls/gnutls.h>
-int main(void) { gnutls_session_t s; gnutls_init(&s, GNUTLS_SERVER); return 0; }
-EOF
-  vnc_tls_cflags=`$pkg_config --cflags gnutls 2> /dev/null`
-  vnc_tls_libs=`$pkg_config --libs gnutls 2> /dev/null`
-  if compile_prog "$vnc_tls_cflags" "$vnc_tls_libs" ; then
-    if test "$vnc_tls" != "no" ; then
-      vnc_tls=yes
-    fi
-    libs_softmmu="$vnc_tls_libs $libs_softmmu"
-    QEMU_CFLAGS="$QEMU_CFLAGS $vnc_tls_cflags"
-  else
-    if test "$vnc_tls" = "yes" ; then
-      feature_not_found "vnc-tls" "Install gnutls devel"
-    fi
-    vnc_tls=no
-  fi
-fi

 ##########################################
 # VNC SASL detection
@@ -3211,6 +3233,9 @@ if test "$opengl" != "no" ; then
    opengl_cflags="$($pkg_config --cflags $opengl_pkgs) $x11_cflags"
    opengl_libs="$($pkg_config --libs $opengl_pkgs) $x11_libs"
    opengl=yes
+    if test "$gtk" = "yes" && $pkg_config --exists "$gtkpackage >= 3.16"; then
+        gtk_gl="yes"
+    fi
  else
    if test "$opengl" = "yes" ; then
      feature_not_found "opengl" "Please install opengl (mesa) devel pkgs: $opengl_pkgs"
@@ -3821,34 +3846,20 @@ EOF
  fi
 fi

-# check for libcacard for smartcard support
+# check for smartcard support
 smartcard_cflags=""
-# TODO - what's the minimal nss version we support?
-if test "$smartcard_nss" != "no"; then
-  cat > $TMPC << EOF
-#include <pk11pub.h>
-int main(void) { PK11_FreeSlot(0); return 0; }
-EOF
-    # FIXME: do not include $glib_* in here
-    nss_libs="$($pkg_config --libs nss 2>/dev/null) $glib_libs"
-    nss_cflags="$($pkg_config --cflags nss 2>/dev/null) $glib_cflags"
-    test_cflags="$nss_cflags"
-    # The header files in nss < 3.13.3 have a bug which causes them to
-    # emit a warning. If we're going to compile QEMU with -Werror, then
-    # test that the headers don't have this bug. Otherwise we would pass
-    # the configure test but fail to compile QEMU later.
-    if test "$werror" = "yes"; then
-        test_cflags="-Werror $test_cflags"
-    fi
-    if test -n "$libtool" &&
-       $pkg_config --atleast-version=3.12.8 nss && \
-      compile_prog "$test_cflags" "$nss_libs"; then
-        smartcard_nss="yes"
+if test "$smartcard" != "no"; then
+    if $pkg_config libcacard; then
+        libcacard_cflags=$($pkg_config --cflags libcacard)
+        libcacard_libs=$($pkg_config --libs libcacard)
+        QEMU_CFLAGS="$QEMU_CFLAGS $libcacard_cflags"
+        libs_softmmu="$libs_softmmu $libcacard_libs"
+        smartcard="yes"
    else
-        if test "$smartcard_nss" = "yes"; then
-            feature_not_found "nss" "Install nss devel >= 3.12.8"
+        if test "$smartcard" = "yes"; then
+            feature_not_found "smartcard" "Install libcacard devel"
        fi
-        smartcard_nss="no"
+        smartcard="no"
    fi
 fi

@@ -3960,6 +3971,27 @@ EOF
  fi
 fi

+##########################################
+# virgl renderer probe
+
+if test "$virglrenderer" != "no" ; then
+  cat > $TMPC << EOF
+#include <virglrenderer.h>
+int main(void) { virgl_renderer_poll(); return 0; }
+EOF
+  virgl_cflags=$($pkg_config --cflags virglrenderer 2>/dev/null)
+  virgl_libs=$($pkg_config --libs virglrenderer 2>/dev/null)
+  if $pkg_config virglrenderer >/dev/null 2>&1 && \
+     compile_prog "$virgl_cflags" "$virgl_libs" ; then
+    virglrenderer="yes"
+  else
+    if test "$virglrenderer" = "yes" ; then
+      feature_not_found "virglrenderer"
+    fi
+    virglrenderer="no"
+  fi
+fi
+
 ##########################################
 # check if we have fdatasync

@@ -4570,12 +4602,15 @@ fi
 echo "pixman            $pixman"
 echo "SDL support       $sdl"
 echo "GTK support       $gtk"
+echo "GTK GL support    $gtk_gl"
 echo "GNUTLS support    $gnutls"
 echo "GNUTLS hash       $gnutls_hash"
 echo "GNUTLS gcrypt     $gnutls_gcrypt"
 echo "GNUTLS nettle     $gnutls_nettle ${gnutls_nettle+($nettle_version)}"
+echo "libtasn1          $tasn1"
 echo "VTE support       $vte"
 echo "curses support    $curses"
+echo "virgl support     $virglrenderer"
 echo "curl support      $curl"
 echo "mingw32 support   $mingw32"
 echo "Audio drivers     $audio_drv_list"
@@ -4584,7 +4619,6 @@ echo "Block whitelist (ro) $block_drv_ro_whitelist"
 echo "VirtFS support    $virtfs"
 echo "VNC support       $vnc"
 if test "$vnc" = "yes" ; then
-    echo "VNC TLS support   $vnc_tls"
    echo "VNC SASL support  $vnc_sasl"
    echo "VNC JPEG support  $vnc_jpeg"
    echo "VNC PNG support   $vnc_png"
@@ -4629,7 +4663,7 @@ echo "spice support     $spice"
 fi
 echo "rbd support       $rbd"
 echo "xfsctl support    $xfs"
-echo "nss used          $smartcard_nss"
+echo "smartcard support $smartcard"
 echo "libusb            $libusb"
 echo "usb net redir     $usb_redir"
 echo "OpenGL support    $opengl"
@@ -4793,9 +4827,6 @@ echo "CONFIG_BDRV_RO_WHITELIST=$block_drv_ro_whitelist" >> $config_host_mak
 if test "$vnc" = "yes" ; then
  echo "CONFIG_VNC=y" >> $config_host_mak
 fi
-if test "$vnc_tls" = "yes" ; then
-  echo "CONFIG_VNC_TLS=y" >> $config_host_mak
-fi
 if test "$vnc_sasl" = "yes" ; then
  echo "CONFIG_VNC_SASL=y" >> $config_host_mak
 fi
@@ -4931,6 +4962,9 @@ if test "$gtk" = "yes" ; then
  echo "CONFIG_GTK=y" >> $config_host_mak
  echo "CONFIG_GTKABI=$gtkabi" >> $config_host_mak
  echo "GTK_CFLAGS=$gtk_cflags" >> $config_host_mak
+  if test "$gtk_gl" = "yes" ; then
+    echo "CONFIG_GTK_GL=y" >> $config_host_mak
+  fi
 fi
 if test "$gnutls" = "yes" ; then
  echo "CONFIG_GNUTLS=y" >> $config_host_mak
@@ -4945,10 +4979,18 @@ if test "$gnutls_nettle" = "yes" ; then
  echo "CONFIG_GNUTLS_NETTLE=y" >> $config_host_mak
  echo "CONFIG_NETTLE_VERSION_MAJOR=${nettle_version%%.*}" >> $config_host_mak
 fi
+if test "$tasn1" = "yes" ; then
+  echo "CONFIG_TASN1=y" >> $config_host_mak
+fi
 if test "$vte" = "yes" ; then
  echo "CONFIG_VTE=y" >> $config_host_mak
  echo "VTE_CFLAGS=$vte_cflags" >> $config_host_mak
 fi
+if test "$virglrenderer" = "yes" ; then
+  echo "CONFIG_VIRGL=y" >> $config_host_mak
+  echo "VIRGL_CFLAGS=$virgl_cflags" >> $config_host_mak
+  echo "VIRGL_LIBS=$virgl_libs" >> $config_host_mak
+fi
 if test "$xen" = "yes" ; then
  echo "CONFIG_XEN_BACKEND=y" >> $config_host_mak
  echo "CONFIG_XEN_CTRL_INTERFACE_VERSION=$xen_ctrl_version" >> $config_host_mak
@@ -5006,10 +5048,8 @@ if test "$spice" = "yes" ; then
  echo "CONFIG_SPICE=y" >> $config_host_mak
 fi

-if test "$smartcard_nss" = "yes" ; then
-  echo "CONFIG_SMARTCARD_NSS=y" >> $config_host_mak
-  echo "NSS_LIBS=$nss_libs" >> $config_host_mak
-  echo "NSS_CFLAGS=$nss_cflags" >> $config_host_mak
+if test "$smartcard" = "yes" ; then
+  echo "CONFIG_SMARTCARD=y" >> $config_host_mak
 fi

 if test "$libusb" = "yes" ; then
@@ -5061,6 +5101,9 @@ fi
 if test "$zero_malloc" = "yes" ; then
  echo "CONFIG_ZERO_MALLOC=y" >> $config_host_mak
 fi
+if test "$localtime_r" = "yes" ; then
+  echo "CONFIG_LOCALTIME_R=y" >> $config_host_mak
+fi
 if test "$qom_cast_debug" = "yes" ; then
  echo "CONFIG_QOM_CAST_DEBUG=y" >> $config_host_mak
 fi
@@ -5268,6 +5311,8 @@ echo "EXESUF=$EXESUF" >> $config_host_mak
 echo "DSOSUF=$DSOSUF" >> $config_host_mak
 echo "LDFLAGS_SHARED=$LDFLAGS_SHARED" >> $config_host_mak
 echo "LIBS_QGA+=$libs_qga" >> $config_host_mak
+echo "TEST_LIBS=$test_libs" >> $config_host_mak
+echo "TEST_CFLAGS=$test_cflags" >> $config_host_mak
 echo "POD2MAN=$POD2MAN" >> $config_host_mak
 echo "TRANSLATE_OPT_CFLAGS=$TRANSLATE_OPT_CFLAGS" >> $config_host_mak
 if test "$gcov" = "yes" ; then
@@ -5448,7 +5493,9 @@ case "$target_name" in
    echo "TARGET_ABI32=y" >> $config_target_mak
  ;;
  s390x)
-    gdb_xml_files="s390x-core64.xml s390-acr.xml s390-fpr.xml s390-vx.xml s390-cr.xml"
+    gdb_xml_files="s390x-core64.xml s390-acr.xml s390-fpr.xml s390-vx.xml s390-cr.xml s390-virt.xml"
+  ;;
+  tilegx)
  ;;
  tricore)
  ;;
--- a/cpu-exec-common.c
+++ b/cpu-exec-common.c
@@ -0,0 +1,82 @@
+/*
+ *  emulator main execution loop
+ *
+ *  Copyright (c) 2003-2005 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "config.h"
+#include "cpu.h"
+#include "sysemu/cpus.h"
+#include "exec/memory-internal.h"
+
+bool exit_request;
+CPUState *tcg_current_cpu;
+
+/* exit the current TB from a signal handler. The host registers are
+   restored in a state compatible with the CPU emulator
+ */
+#if defined(CONFIG_SOFTMMU)
+void cpu_resume_from_signal(CPUState *cpu, void *puc)
+{
+    /* XXX: restore cpu registers saved in host registers */
+
+    cpu->exception_index = -1;
+    siglongjmp(cpu->jmp_env, 1);
+}
+
+void cpu_reloading_memory_map(void)
+{
+    if (qemu_in_vcpu_thread()) {
+        /* The guest can in theory prolong the RCU critical section as long
+         * as it feels like. The major problem with this is that because it
+         * can do multiple reconfigurations of the memory map within the
+         * critical section, we could potentially accumulate an unbounded
+         * collection of memory data structures awaiting reclamation.
+         *
+         * Because the only thing we're currently protecting with RCU is the
+         * memory data structures, it's sufficient to break the critical section
+         * in this callback, which we know will get called every time the
+         * memory map is rearranged.
+         *
+         * (If we add anything else in the system that uses RCU to protect
+         * its data structures, we will need to implement some other mechanism
+         * to force TCG CPUs to exit the critical section, at which point this
+         * part of this callback might become unnecessary.)
+         *
+         * This pair matches cpu_exec's rcu_read_lock()/rcu_read_unlock(), which
+         * only protects cpu->as->dispatch. Since we know our caller is about
+         * to reload it, it's safe to split the critical section.
+         */
+        rcu_read_unlock();
+        rcu_read_lock();
+    }
+}
+#endif
+
+void cpu_loop_exit(CPUState *cpu)
+{
+    cpu->current_tb = NULL;
+    siglongjmp(cpu->jmp_env, 1);
+}
+
+void cpu_loop_exit_restore(CPUState *cpu, uintptr_t pc)
+{
+    if (pc) {
+        cpu_restore_state(cpu, pc);
+    }
+    cpu->current_tb = NULL;
+    siglongjmp(cpu->jmp_env, 1);
+}
--- a/cpu-exec.c
+++ b/cpu-exec.c
@@ -25,9 +25,11 @@
 #include "sysemu/qtest.h"
 #include "qemu/timer.h"
 #include "exec/address-spaces.h"
-#include "exec/memory-internal.h"
 #include "qemu/rcu.h"
 #include "exec/tb-hash.h"
+#if defined(TARGET_I386) && !defined(CONFIG_USER_ONLY)
+#include "hw/i386/apic.h"
+#endif

 /* -icount align implementation. */

@@ -128,61 +130,6 @@ static void init_delay_params(SyncClocks *sc, const CPUState *cpu)
 }
 #endif /* CONFIG USER ONLY */

-void cpu_loop_exit(CPUState *cpu)
-{
-    cpu->current_tb = NULL;
-    siglongjmp(cpu->jmp_env, 1);
-}
-
-void cpu_loop_exit_restore(CPUState *cpu, uintptr_t pc)
-{
-    if (pc) {
-        cpu_restore_state(cpu, pc);
-    }
-    cpu->current_tb = NULL;
-    siglongjmp(cpu->jmp_env, 1);
-}
-
-/* exit the current TB from a signal handler. The host registers are
-   restored in a state compatible with the CPU emulator
- */
-#if defined(CONFIG_SOFTMMU)
-void cpu_resume_from_signal(CPUState *cpu, void *puc)
-{
-    /* XXX: restore cpu registers saved in host registers */
-
-    cpu->exception_index = -1;
-    siglongjmp(cpu->jmp_env, 1);
-}
-
-void cpu_reload_memory_map(CPUState *cpu)
-{
-    AddressSpaceDispatch *d;
-
-    if (qemu_in_vcpu_thread()) {
-        /* Do not let the guest prolong the critical section as much as it
-         * as it desires.
-         *
-         * Currently, this is prevented by the I/O thread's periodinc kicking
-         * of the VCPU thread (iothread_requesting_mutex, qemu_cpu_kick_thread)
-         * but this will go away once TCG's execution moves out of the global
-         * mutex.
-         *
-         * This pair matches cpu_exec's rcu_read_lock()/rcu_read_unlock(), which
-         * only protects cpu->as->dispatch.  Since we reload it below, we can
-         * split the critical section.
-         */
-        rcu_read_unlock();
-        rcu_read_lock();
-    }
-
-    /* The CPU and TLB are protected by the iothread lock.  */
-    d = atomic_rcu_read(&cpu->as->dispatch);
-    cpu->memory_dispatch = d;
-    tlb_flush(cpu, 1);
-}
-#endif
-
 /* Execute a TB, and fix up the CPU state afterwards if necessary */
 static inline tcg_target_ulong cpu_tb_exec(CPUState *cpu, uint8_t *tb_ptr)
 {
@@ -385,9 +332,6 @@ static void cpu_handle_debug_exception(CPUState *cpu)

 /* main execution loop */

-bool exit_request;
-CPUState *tcg_current_cpu;
-
 int cpu_exec(CPUState *cpu)
 {
    CPUClass *cc = CPU_GET_CLASS(cpu);
@@ -402,6 +346,12 @@ int cpu_exec(CPUState *cpu)
    SyncClocks sc;

    if (cpu->halted) {
+#if defined(TARGET_I386) && !defined(CONFIG_USER_ONLY)
+        if (cpu->interrupt_request & CPU_INTERRUPT_POLL) {
+            apic_poll_irq(x86_cpu->apic_state);
+            cpu_reset_interrupt(cpu, CPU_INTERRUPT_POLL);
+        }
+#endif
        if (!cpu_has_work(cpu)) {
            return EXCP_HALTED;
        }
--- a/cpus.c
+++ b/cpus.c
@@ -69,6 +69,14 @@ static CPUState *next_cpu;
 int64_t max_delay;
 int64_t max_advance;

+/* vcpu throttling controls */
+static QEMUTimer *throttle_timer;
+static unsigned int throttle_percentage;
+
+#define CPU_THROTTLE_PCT_MIN 1
+#define CPU_THROTTLE_PCT_MAX 99
+#define CPU_THROTTLE_TIMESLICE_NS 10000000
+
 bool cpu_is_stopped(CPUState *cpu)
 {
    return cpu->stopped || !runstate_is_running();
@@ -191,7 +199,7 @@ int64_t cpu_get_ticks(void)

    ticks = timers_state.cpu_ticks_offset;
    if (timers_state.cpu_ticks_enabled) {
-        ticks += cpu_get_real_ticks();
+        ticks += cpu_get_host_ticks();
    }

    if (timers_state.cpu_ticks_prev > ticks) {
@@ -239,7 +247,7 @@ void cpu_enable_ticks(void)
    /* Here, the really thing protected by seqlock is cpu_clock_offset. */
    seqlock_write_lock(&timers_state.vm_clock_seqlock);
    if (!timers_state.cpu_ticks_enabled) {
-        timers_state.cpu_ticks_offset -= cpu_get_real_ticks();
+        timers_state.cpu_ticks_offset -= cpu_get_host_ticks();
        timers_state.cpu_clock_offset -= get_clock();
        timers_state.cpu_ticks_enabled = 1;
    }
@@ -255,7 +263,7 @@ void cpu_disable_ticks(void)
    /* Here, the really thing protected by seqlock is cpu_clock_offset. */
    seqlock_write_lock(&timers_state.vm_clock_seqlock);
    if (timers_state.cpu_ticks_enabled) {
-        timers_state.cpu_ticks_offset += cpu_get_real_ticks();
+        timers_state.cpu_ticks_offset += cpu_get_host_ticks();
        timers_state.cpu_clock_offset = cpu_get_clock_locked();
        timers_state.cpu_ticks_enabled = 0;
    }
@@ -505,10 +513,80 @@ static const VMStateDescription vmstate_timers = {
    }
 };

+static void cpu_throttle_thread(void *opaque)
+{
+    CPUState *cpu = opaque;
+    double pct;
+    double throttle_ratio;
+    long sleeptime_ns;
+
+    if (!cpu_throttle_get_percentage()) {
+        return;
+    }
+
+    pct = (double)cpu_throttle_get_percentage()/100;
+    throttle_ratio = pct / (1 - pct);
+    sleeptime_ns = (long)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS);
+
+    qemu_mutex_unlock_iothread();
+    atomic_set(&cpu->throttle_thread_scheduled, 0);
+    g_usleep(sleeptime_ns / 1000); /* Convert ns to us for usleep call */
+    qemu_mutex_lock_iothread();
+}
+
+static void cpu_throttle_timer_tick(void *opaque)
+{
+    CPUState *cpu;
+    double pct;
+
+    /* Stop the timer if needed */
+    if (!cpu_throttle_get_percentage()) {
+        return;
+    }
+    CPU_FOREACH(cpu) {
+        if (!atomic_xchg(&cpu->throttle_thread_scheduled, 1)) {
+            async_run_on_cpu(cpu, cpu_throttle_thread, cpu);
+        }
+    }
+
+    pct = (double)cpu_throttle_get_percentage()/100;
+    timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
+                                   CPU_THROTTLE_TIMESLICE_NS / (1-pct));
+}
+
+void cpu_throttle_set(int new_throttle_pct)
+{
+    /* Ensure throttle percentage is within valid range */
+    new_throttle_pct = MIN(new_throttle_pct, CPU_THROTTLE_PCT_MAX);
+    new_throttle_pct = MAX(new_throttle_pct, CPU_THROTTLE_PCT_MIN);
+
+    atomic_set(&throttle_percentage, new_throttle_pct);
+
+    timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
+                                       CPU_THROTTLE_TIMESLICE_NS);
+}
+
+void cpu_throttle_stop(void)
+{
+    atomic_set(&throttle_percentage, 0);
+}
+
+bool cpu_throttle_active(void)
+{
+    return (cpu_throttle_get_percentage() != 0);
+}
+
+int cpu_throttle_get_percentage(void)
+{
+    return atomic_read(&throttle_percentage);
+}
+
 void cpu_ticks_init(void)
 {
    seqlock_init(&timers_state.vm_clock_seqlock, NULL);
    vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
+    throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
+                                           cpu_throttle_timer_tick, NULL);
 }

 void configure_icount(QemuOpts *opts, Error **errp)
--- a/cputlb.c
+++ b/cputlb.c
@@ -262,27 +262,24 @@ static inline ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
    return ram_addr;
 }

-void cpu_tlb_reset_dirty_all(ram_addr_t start1, ram_addr_t length)
+void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length)
 {
-    CPUState *cpu;
    CPUArchState *env;

-    CPU_FOREACH(cpu) {
-        int mmu_idx;
+    int mmu_idx;

-        env = cpu->env_ptr;
-        for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
-            unsigned int i;
+    env = cpu->env_ptr;
+    for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
+        unsigned int i;

-            for (i = 0; i < CPU_TLB_SIZE; i++) {
-                tlb_reset_dirty_range(&env->tlb_table[mmu_idx][i],
-                                      start1, length);
-            }
+        for (i = 0; i < CPU_TLB_SIZE; i++) {
+            tlb_reset_dirty_range(&env->tlb_table[mmu_idx][i],
+                                  start1, length);
+        }

-            for (i = 0; i < CPU_VTLB_SIZE; i++) {
-                tlb_reset_dirty_range(&env->tlb_v_table[mmu_idx][i],
-                                      start1, length);
-            }
+        for (i = 0; i < CPU_VTLB_SIZE; i++) {
+            tlb_reset_dirty_range(&env->tlb_v_table[mmu_idx][i],
+                                  start1, length);
        }
    }
 }
@@ -296,8 +293,9 @@ static inline void tlb_set_dirty1(CPUTLBEntry *tlb_entry, target_ulong vaddr)

 /* update the TLB corresponding to virtual page vaddr
   so that it is no longer dirty */
-void tlb_set_dirty(CPUArchState *env, target_ulong vaddr)
+void tlb_set_dirty(CPUState *cpu, target_ulong vaddr)
 {
+    CPUArchState *env = cpu->env_ptr;
    int i;
    int mmu_idx;

--- a/crypto/Makefile.objs
+++ b/crypto/Makefile.objs
@@ -1,5 +1,12 @@
-util-obj-y += init.o
-util-obj-y += hash.o
-util-obj-y += aes.o
-util-obj-y += desrfb.o
-util-obj-y += cipher.o
+crypto-obj-y = init.o
+crypto-obj-y += hash.o
+crypto-obj-y += aes.o
+crypto-obj-y += desrfb.o
+crypto-obj-y += cipher.o
+crypto-obj-y += tlscreds.o
+crypto-obj-y += tlscredsanon.o
+crypto-obj-y += tlscredsx509.o
+crypto-obj-y += tlssession.o
+
+# Let the userspace emulators avoid linking gnutls/etc
+crypto-aes-obj-y = aes.o
--- a/crypto/tlscreds.c
+++ b/crypto/tlscreds.c
@@ -0,0 +1,251 @@
+/*
+ * QEMU crypto TLS credential support
+ *
+ * Copyright (c) 2015 Red Hat, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "crypto/tlscredspriv.h"
+#include "trace.h"
+
+#define DH_BITS 2048
+
+#ifdef CONFIG_GNUTLS
+int
+qcrypto_tls_creds_get_dh_params_file(QCryptoTLSCreds *creds,
+                                     const char *filename,
+                                     gnutls_dh_params_t *dh_params,
+                                     Error **errp)
+{
+    int ret;
+
+    trace_qcrypto_tls_creds_load_dh(creds, filename ? filename : "<generated>");
+
+    if (filename == NULL) {
+        ret = gnutls_dh_params_init(dh_params);
+        if (ret < 0) {
+            error_setg(errp, "Unable to initialize DH parameters: %s",
+                       gnutls_strerror(ret));
+            return -1;
+        }
+        ret = gnutls_dh_params_generate2(*dh_params, DH_BITS);
+        if (ret < 0) {
+            gnutls_dh_params_deinit(*dh_params);
+            *dh_params = NULL;
+            error_setg(errp, "Unable to generate DH parameters: %s",
+                       gnutls_strerror(ret));
+            return -1;
+        }
+    } else {
+        GError *gerr = NULL;
+        gchar *contents;
+        gsize len;
+        gnutls_datum_t data;
+        if (!g_file_get_contents(filename,
+                                 &contents,
+                                 &len,
+                                 &gerr)) {
+
+            error_setg(errp, "%s", gerr->message);
+            g_error_free(gerr);
+            return -1;
+        }
+        data.data = (unsigned char *)contents;
+        data.size = len;
+        ret = gnutls_dh_params_init(dh_params);
+        if (ret < 0) {
+            g_free(contents);
+            error_setg(errp, "Unable to initialize DH parameters: %s",
+                       gnutls_strerror(ret));
+            return -1;
+        }
+        ret = gnutls_dh_params_import_pkcs3(*dh_params,
+                                            &data,
+                                            GNUTLS_X509_FMT_PEM);
+        g_free(contents);
+        if (ret < 0) {
+            gnutls_dh_params_deinit(*dh_params);
+            *dh_params = NULL;
+            error_setg(errp, "Unable to load DH parameters from %s: %s",
+                       filename, gnutls_strerror(ret));
+            return -1;
+        }
+    }
+
+    return 0;
+}
+
+
+int
+qcrypto_tls_creds_get_path(QCryptoTLSCreds *creds,
+                           const char *filename,
+                           bool required,
+                           char **cred,
+                           Error **errp)
+{
+    struct stat sb;
+    int ret = -1;
+
+    if (!creds->dir) {
+        if (required) {
+            error_setg(errp, "Missing 'dir' property value");
+            return -1;
+        } else {
+            return 0;
+        }
+    }
+
+    *cred = g_strdup_printf("%s/%s", creds->dir, filename);
+
+    if (stat(*cred, &sb) < 0) {
+        if (errno == ENOENT && !required) {
+            ret = 0;
+        } else {
+            error_setg_errno(errp, errno,
+                             "Unable to access credentials %s",
+                             *cred);
+        }
+        g_free(*cred);
+        *cred = NULL;
+        goto cleanup;
+    }
+
+    trace_qcrypto_tls_creds_get_path(creds, filename,
+                                     *cred ? *cred : "<none>");
+    ret = 0;
+ cleanup:
+    return ret;
+}
+
+
+#endif /* ! CONFIG_GNUTLS */
+
+
+static void
+qcrypto_tls_creds_prop_set_verify(Object *obj,
+                                  bool value,
+                                  Error **errp G_GNUC_UNUSED)
+{
+    QCryptoTLSCreds *creds = QCRYPTO_TLS_CREDS(obj);
+
+    creds->verifyPeer = value;
+}
+
+
+static bool
+qcrypto_tls_creds_prop_get_verify(Object *obj,
+                                  Error **errp G_GNUC_UNUSED)
+{
+    QCryptoTLSCreds *creds = QCRYPTO_TLS_CREDS(obj);
+
+    return creds->verifyPeer;
+}
+
+
+static void
+qcrypto_tls_creds_prop_set_dir(Object *obj,
+                               const char *value,
+                               Error **errp G_GNUC_UNUSED)
+{
+    QCryptoTLSCreds *creds = QCRYPTO_TLS_CREDS(obj);
+
+    creds->dir = g_strdup(value);
+}
+
+
+static char *
+qcrypto_tls_creds_prop_get_dir(Object *obj,
+                               Error **errp G_GNUC_UNUSED)
+{
+    QCryptoTLSCreds *creds = QCRYPTO_TLS_CREDS(obj);
+
+    return g_strdup(creds->dir);
+}
+
+
+static void
+qcrypto_tls_creds_prop_set_endpoint(Object *obj,
+                                    int value,
+                                    Error **errp G_GNUC_UNUSED)
+{
+    QCryptoTLSCreds *creds = QCRYPTO_TLS_CREDS(obj);
+
+    creds->endpoint = value;
+}
+
+
+static int
+qcrypto_tls_creds_prop_get_endpoint(Object *obj,
+                                    Error **errp G_GNUC_UNUSED)
+{
+    QCryptoTLSCreds *creds = QCRYPTO_TLS_CREDS(obj);
+
+    return creds->endpoint;
+}
+
+
+static void
+qcrypto_tls_creds_init(Object *obj)
+{
+    QCryptoTLSCreds *creds = QCRYPTO_TLS_CREDS(obj);
+
+    creds->verifyPeer = true;
+
+    object_property_add_bool(obj, "verify-peer",
+                             qcrypto_tls_creds_prop_get_verify,
+                             qcrypto_tls_creds_prop_set_verify,
+                             NULL);
+    object_property_add_str(obj, "dir",
+                            qcrypto_tls_creds_prop_get_dir,
+                            qcrypto_tls_creds_prop_set_dir,
+                            NULL);
+    object_property_add_enum(obj, "endpoint",
+                             "QCryptoTLSCredsEndpoint",
+                             QCryptoTLSCredsEndpoint_lookup,
+                             qcrypto_tls_creds_prop_get_endpoint,
+                             qcrypto_tls_creds_prop_set_endpoint,
+                             NULL);
+}
+
+
+static void
+qcrypto_tls_creds_finalize(Object *obj)
+{
+    QCryptoTLSCreds *creds = QCRYPTO_TLS_CREDS(obj);
+
+    g_free(creds->dir);
+}
+
+
+static const TypeInfo qcrypto_tls_creds_info = {
+    .parent = TYPE_OBJECT,
+    .name = TYPE_QCRYPTO_TLS_CREDS,
+    .instance_size = sizeof(QCryptoTLSCreds),
+    .instance_init = qcrypto_tls_creds_init,
+    .instance_finalize = qcrypto_tls_creds_finalize,
+    .class_size = sizeof(QCryptoTLSCredsClass),
+    .abstract = true,
+};
+
+
+static void
+qcrypto_tls_creds_register_types(void)
+{
+    type_register_static(&qcrypto_tls_creds_info);
+}
+
+
+type_init(qcrypto_tls_creds_register_types);
--- a/crypto/tlscredsanon.c
+++ b/crypto/tlscredsanon.c
@@ -0,0 +1,223 @@
+/*
+ * QEMU crypto TLS anonymous credential support
+ *
+ * Copyright (c) 2015 Red Hat, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "crypto/tlscredsanon.h"
+#include "crypto/tlscredspriv.h"
+#include "qom/object_interfaces.h"
+#include "trace.h"
+
+
+#ifdef CONFIG_GNUTLS
+
+
+static int
+qcrypto_tls_creds_anon_load(QCryptoTLSCredsAnon *creds,
+                            Error **errp)
+{
+    char *dhparams = NULL;
+    int ret;
+    int rv = -1;
+
+    trace_qcrypto_tls_creds_anon_load(creds,
+            creds->parent_obj.dir ? creds->parent_obj.dir : "<nodir>");
+
+    if (creds->parent_obj.endpoint == QCRYPTO_TLS_CREDS_ENDPOINT_SERVER) {
+        if (qcrypto_tls_creds_get_path(&creds->parent_obj,
+                                       QCRYPTO_TLS_CREDS_DH_PARAMS,
+                                       false, &dhparams, errp) < 0) {
+            goto cleanup;
+        }
+
+        ret = gnutls_anon_allocate_server_credentials(&creds->data.server);
+        if (ret < 0) {
+            error_setg(errp, "Cannot allocate credentials: %s",
+                       gnutls_strerror(ret));
+            goto cleanup;
+        }
+
+        if (qcrypto_tls_creds_get_dh_params_file(&creds->parent_obj, dhparams,
+                                                 &creds->parent_obj.dh_params,
+                                                 errp) < 0) {
+            goto cleanup;
+        }
+
+        gnutls_anon_set_server_dh_params(creds->data.server,
+                                         creds->parent_obj.dh_params);
+    } else {
+        ret = gnutls_anon_allocate_client_credentials(&creds->data.client);
+        if (ret < 0) {
+            error_setg(errp, "Cannot allocate credentials: %s",
+                       gnutls_strerror(ret));
+            goto cleanup;
+        }
+    }
+
+    rv = 0;
+ cleanup:
+    g_free(dhparams);
+    return rv;
+}
+
+
+static void
+qcrypto_tls_creds_anon_unload(QCryptoTLSCredsAnon *creds)
+{
+    if (creds->parent_obj.endpoint == QCRYPTO_TLS_CREDS_ENDPOINT_CLIENT) {
+        if (creds->data.client) {
+            gnutls_anon_free_client_credentials(creds->data.client);
+            creds->data.client = NULL;
+        }
+    } else {
+        if (creds->data.server) {
+            gnutls_anon_free_server_credentials(creds->data.server);
+            creds->data.server = NULL;
+        }
+    }
+    if (creds->parent_obj.dh_params) {
+        gnutls_dh_params_deinit(creds->parent_obj.dh_params);
+        creds->parent_obj.dh_params = NULL;
+    }
+}
+
+#else /* ! CONFIG_GNUTLS */
+
+
+static void
+qcrypto_tls_creds_anon_load(QCryptoTLSCredsAnon *creds G_GNUC_UNUSED,
+                            Error **errp)
+{
+    error_setg(errp, "TLS credentials support requires GNUTLS");
+}
+
+
+static void
+qcrypto_tls_creds_anon_unload(QCryptoTLSCredsAnon *creds G_GNUC_UNUSED)
+{
+    /* nada */
+}
+
+
+#endif /* ! CONFIG_GNUTLS */
+
+
+static void
+qcrypto_tls_creds_anon_prop_set_loaded(Object *obj,
+                                       bool value,
+                                       Error **errp)
+{
+    QCryptoTLSCredsAnon *creds = QCRYPTO_TLS_CREDS_ANON(obj);
+
+    if (value) {
+        qcrypto_tls_creds_anon_load(creds, errp);
+    } else {
+        qcrypto_tls_creds_anon_unload(creds);
+    }
+}
+
+
+#ifdef CONFIG_GNUTLS
+
+
+static bool
+qcrypto_tls_creds_anon_prop_get_loaded(Object *obj,
+                                       Error **errp G_GNUC_UNUSED)
+{
+    QCryptoTLSCredsAnon *creds = QCRYPTO_TLS_CREDS_ANON(obj);
+
+    if (creds->parent_obj.endpoint == QCRYPTO_TLS_CREDS_ENDPOINT_SERVER) {
+        return creds->data.server != NULL;
+    } else {
+        return creds->data.client != NULL;
+    }
+}
+
+
+#else /* ! CONFIG_GNUTLS */
+
+
+static bool
+qcrypto_tls_creds_anon_prop_get_loaded(Object *obj G_GNUC_UNUSED,
+                                       Error **errp G_GNUC_UNUSED)
+{
+    return false;
+}
+
+
+#endif /* ! CONFIG_GNUTLS */
+
+
+static void
+qcrypto_tls_creds_anon_complete(UserCreatable *uc, Error **errp)
+{
+    object_property_set_bool(OBJECT(uc), true, "loaded", errp);
+}
+
+
+static void
+qcrypto_tls_creds_anon_init(Object *obj)
+{
+    object_property_add_bool(obj, "loaded",
+                             qcrypto_tls_creds_anon_prop_get_loaded,
+                             qcrypto_tls_creds_anon_prop_set_loaded,
+                             NULL);
+}
+
+
+static void
+qcrypto_tls_creds_anon_finalize(Object *obj)
+{
+    QCryptoTLSCredsAnon *creds = QCRYPTO_TLS_CREDS_ANON(obj);
+
+    qcrypto_tls_creds_anon_unload(creds);
+}
+
+
+static void
+qcrypto_tls_creds_anon_class_init(ObjectClass *oc, void *data)
+{
+    UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc);
+
+    ucc->complete = qcrypto_tls_creds_anon_complete;
+}
+
+
+static const TypeInfo qcrypto_tls_creds_anon_info = {
+    .parent = TYPE_QCRYPTO_TLS_CREDS,
+    .name = TYPE_QCRYPTO_TLS_CREDS_ANON,
+    .instance_size = sizeof(QCryptoTLSCredsAnon),
+    .instance_init = qcrypto_tls_creds_anon_init,
+    .instance_finalize = qcrypto_tls_creds_anon_finalize,
+    .class_size = sizeof(QCryptoTLSCredsAnonClass),
+    .class_init = qcrypto_tls_creds_anon_class_init,
+    .interfaces = (InterfaceInfo[]) {
+        { TYPE_USER_CREATABLE },
+        { }
+    }
+};
+
+
+static void
+qcrypto_tls_creds_anon_register_types(void)
+{
+    type_register_static(&qcrypto_tls_creds_anon_info);
+}
+
+
+type_init(qcrypto_tls_creds_anon_register_types);
--- a/crypto/tlscredspriv.h
+++ b/crypto/tlscredspriv.h
@@ -0,0 +1,42 @@
+/*
+ * QEMU crypto TLS credential support private helpers
+ *
+ * Copyright (c) 2015 Red Hat, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#ifndef QCRYPTO_TLSCRED_PRIV_H__
+#define QCRYPTO_TLSCRED_PRIV_H__
+
+#include "crypto/tlscreds.h"
+
+#ifdef CONFIG_GNUTLS
+
+int qcrypto_tls_creds_get_path(QCryptoTLSCreds *creds,
+                               const char *filename,
+                               bool required,
+                               char **cred,
+                               Error **errp);
+
+int qcrypto_tls_creds_get_dh_params_file(QCryptoTLSCreds *creds,
+                                         const char *filename,
+                                         gnutls_dh_params_t *dh_params,
+                                         Error **errp);
+
+#endif
+
+#endif /* QCRYPTO_TLSCRED_PRIV_H__ */
+
--- a/crypto/tlscredsx509.c
+++ b/crypto/tlscredsx509.c
@@ -0,0 +1,809 @@
+/*
+ * QEMU crypto TLS x509 credential support
+ *
+ * Copyright (c) 2015 Red Hat, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "crypto/tlscredsx509.h"
+#include "crypto/tlscredspriv.h"
+#include "qom/object_interfaces.h"
+#include "trace.h"
+
+
+#ifdef CONFIG_GNUTLS
+
+#include <gnutls/x509.h>
+
+
+static int
+qcrypto_tls_creds_check_cert_times(gnutls_x509_crt_t cert,
+                                   const char *certFile,
+                                   bool isServer,
+                                   bool isCA,
+                                   Error **errp)
+{
+    time_t now = time(NULL);
+
+    if (now == ((time_t)-1)) {
+        error_setg_errno(errp, errno, "cannot get current time");
+        return -1;
+    }
+
+    if (gnutls_x509_crt_get_expiration_time(cert) < now) {
+        error_setg(errp,
+                   (isCA ?
+                    "The CA certificate %s has expired" :
+                    (isServer ?
+                     "The server certificate %s has expired" :
+                     "The client certificate %s has expired")),
+                   certFile);
+        return -1;
+    }
+
+    if (gnutls_x509_crt_get_activation_time(cert) > now) {
+        error_setg(errp,
+                   (isCA ?
+                    "The CA certificate %s is not yet active" :
+                    (isServer ?
+                     "The server certificate %s is not yet active" :
+                     "The client certificate %s is not yet active")),
+                   certFile);
+        return -1;
+    }
+
+    return 0;
+}
+
+
+#if LIBGNUTLS_VERSION_NUMBER >= 2
+/*
+ * The gnutls_x509_crt_get_basic_constraints function isn't
+ * available in GNUTLS 1.0.x branches. This isn't critical
+ * though, since gnutls_certificate_verify_peers2 will do
+ * pretty much the same check at runtime, so we can just
+ * disable this code
+ */
+static int
+qcrypto_tls_creds_check_cert_basic_constraints(QCryptoTLSCredsX509 *creds,
+                                               gnutls_x509_crt_t cert,
+                                               const char *certFile,
+                                               bool isServer,
+                                               bool isCA,
+                                               Error **errp)
+{
+    int status;
+
+    status = gnutls_x509_crt_get_basic_constraints(cert, NULL, NULL, NULL);
+    trace_qcrypto_tls_creds_x509_check_basic_constraints(
+        creds, certFile, status);
+
+    if (status > 0) { /* It is a CA cert */
+        if (!isCA) {
+            error_setg(errp, isServer ?
+                       "The certificate %s basic constraints show a CA, "
+                       "but we need one for a server" :
+                       "The certificate %s basic constraints show a CA, "
+                       "but we need one for a client",
+                       certFile);
+            return -1;
+        }
+    } else if (status == 0) { /* It is not a CA cert */
+        if (isCA) {
+            error_setg(errp,
+                       "The certificate %s basic constraints do not "
+                       "show a CA",
+                       certFile);
+            return -1;
+        }
+    } else if (status == GNUTLS_E_REQUESTED_DATA_NOT_AVAILABLE) {
+        /* Missing basicConstraints */
+        if (isCA) {
+            error_setg(errp,
+                       "The certificate %s is missing basic constraints "
+                       "for a CA",
+                       certFile);
+            return -1;
+        }
+    } else { /* General error */
+        error_setg(errp,
+                   "Unable to query certificate %s basic constraints: %s",
+                   certFile, gnutls_strerror(status));
+        return -1;
+    }
+
+    return 0;
+}
+#endif
+
+
+static int
+qcrypto_tls_creds_check_cert_key_usage(QCryptoTLSCredsX509 *creds,
+                                       gnutls_x509_crt_t cert,
+                                       const char *certFile,
+                                       bool isCA,
+                                       Error **errp)
+{
+    int status;
+    unsigned int usage = 0;
+    unsigned int critical = 0;
+
+    status = gnutls_x509_crt_get_key_usage(cert, &usage, &critical);
+    trace_qcrypto_tls_creds_x509_check_key_usage(
+        creds, certFile, status, usage, critical);
+
+    if (status < 0) {
+        if (status == GNUTLS_E_REQUESTED_DATA_NOT_AVAILABLE) {
+            usage = isCA ? GNUTLS_KEY_KEY_CERT_SIGN :
+                GNUTLS_KEY_DIGITAL_SIGNATURE|GNUTLS_KEY_KEY_ENCIPHERMENT;
+        } else {
+            error_setg(errp,
+                       "Unable to query certificate %s key usage: %s",
+                       certFile, gnutls_strerror(status));
+            return -1;
+        }
+    }
+
+    if (isCA) {
+        if (!(usage & GNUTLS_KEY_KEY_CERT_SIGN)) {
+            if (critical) {
+                error_setg(errp,
+                           "Certificate %s usage does not permit "
+                           "certificate signing", certFile);
+                return -1;
+            }
+        }
+    } else {
+        if (!(usage & GNUTLS_KEY_DIGITAL_SIGNATURE)) {
+            if (critical) {
+                error_setg(errp,
+                           "Certificate %s usage does not permit digital "
+                           "signature", certFile);
+                return -1;
+            }
+        }
+        if (!(usage & GNUTLS_KEY_KEY_ENCIPHERMENT)) {
+            if (critical) {
+                error_setg(errp,
+                           "Certificate %s usage does not permit key "
+                           "encipherment", certFile);
+                return -1;
+            }
+        }
+    }
+
+    return 0;
+}
+
+
+static int
+qcrypto_tls_creds_check_cert_key_purpose(QCryptoTLSCredsX509 *creds,
+                                         gnutls_x509_crt_t cert,
+                                         const char *certFile,
+                                         bool isServer,
+                                         Error **errp)
+{
+    int status;
+    size_t i;
+    unsigned int purposeCritical;
+    unsigned int critical;
+    char *buffer = NULL;
+    size_t size;
+    bool allowClient = false, allowServer = false;
+
+    critical = 0;
+    for (i = 0; ; i++) {
+        size = 0;
+        status = gnutls_x509_crt_get_key_purpose_oid(cert, i, buffer,
+                                                     &size, NULL);
+
+        if (status == GNUTLS_E_REQUESTED_DATA_NOT_AVAILABLE) {
+
+            /* If there is no data at all, then we must allow
+               client/server to pass */
+            if (i == 0) {
+                allowServer = allowClient = true;
+            }
+            break;
+        }
+        if (status != GNUTLS_E_SHORT_MEMORY_BUFFER) {
+            error_setg(errp,
+                       "Unable to query certificate %s key purpose: %s",
+                       certFile, gnutls_strerror(status));
+            return -1;
+        }
+
+        buffer = g_new0(char, size);
+
+        status = gnutls_x509_crt_get_key_purpose_oid(cert, i, buffer,
+                                                     &size, &purposeCritical);
+
+        if (status < 0) {
+            trace_qcrypto_tls_creds_x509_check_key_purpose(
+                creds, certFile, status, "<none>", purposeCritical);
+            g_free(buffer);
+            error_setg(errp,
+                       "Unable to query certificate %s key purpose: %s",
+                       certFile, gnutls_strerror(status));
+            return -1;
+        }
+        trace_qcrypto_tls_creds_x509_check_key_purpose(
+            creds, certFile, status, buffer, purposeCritical);
+        if (purposeCritical) {
+            critical = true;
+        }
+
+        if (g_str_equal(buffer, GNUTLS_KP_TLS_WWW_SERVER)) {
+            allowServer = true;
+        } else if (g_str_equal(buffer, GNUTLS_KP_TLS_WWW_CLIENT)) {
+            allowClient = true;
+        } else if (g_str_equal(buffer, GNUTLS_KP_ANY)) {
+            allowServer = allowClient = true;
+        }
+
+        g_free(buffer);
+    }
+
+    if (isServer) {
+        if (!allowServer) {
+            if (critical) {
+                error_setg(errp,
+                           "Certificate %s purpose does not allow "
+                           "use with a TLS server", certFile);
+                return -1;
+            }
+        }
+    } else {
+        if (!allowClient) {
+            if (critical) {
+                error_setg(errp,
+                           "Certificate %s purpose does not allow use "
+                           "with a TLS client", certFile);
+                return -1;
+            }
+        }
+    }
+
+    return 0;
+}
+
+
+static int
+qcrypto_tls_creds_check_cert(QCryptoTLSCredsX509 *creds,
+                             gnutls_x509_crt_t cert,
+                             const char *certFile,
+                             bool isServer,
+                             bool isCA,
+                             Error **errp)
+{
+    if (qcrypto_tls_creds_check_cert_times(cert, certFile,
+                                           isServer, isCA,
+                                           errp) < 0) {
+        return -1;
+    }
+
+#if LIBGNUTLS_VERSION_NUMBER >= 2
+    if (qcrypto_tls_creds_check_cert_basic_constraints(creds,
+                                                       cert, certFile,
+                                                       isServer, isCA,
+                                                       errp) < 0) {
+        return -1;
+    }
+#endif
+
+    if (qcrypto_tls_creds_check_cert_key_usage(creds,
+                                               cert, certFile,
+                                               isCA, errp) < 0) {
+        return -1;
+    }
+
+    if (!isCA &&
+        qcrypto_tls_creds_check_cert_key_purpose(creds,
+                                                 cert, certFile,
+                                                 isServer, errp) < 0) {
+        return -1;
+    }
+
+    return 0;
+}
+
+
+static int
+qcrypto_tls_creds_check_cert_pair(gnutls_x509_crt_t cert,
+                                  const char *certFile,
+                                  gnutls_x509_crt_t *cacerts,
+                                  size_t ncacerts,
+                                  const char *cacertFile,
+                                  bool isServer,
+                                  Error **errp)
+{
+    unsigned int status;
+
+    if (gnutls_x509_crt_list_verify(&cert, 1,
+                                    cacerts, ncacerts,
+                                    NULL, 0,
+                                    0, &status) < 0) {
+        error_setg(errp, isServer ?
+                   "Unable to verify server certificate %s against "
+                   "CA certificate %s" :
+                   "Unable to verify client certificate %s against "
+                   "CA certificate %s",
+                   certFile, cacertFile);
+        return -1;
+    }
+
+    if (status != 0) {
+        const char *reason = "Invalid certificate";
+
+        if (status & GNUTLS_CERT_INVALID) {
+            reason = "The certificate is not trusted";
+        }
+
+        if (status & GNUTLS_CERT_SIGNER_NOT_FOUND) {
+            reason = "The certificate hasn't got a known issuer";
+        }
+
+        if (status & GNUTLS_CERT_REVOKED) {
+            reason = "The certificate has been revoked";
+        }
+
+#ifndef GNUTLS_1_0_COMPAT
+        if (status & GNUTLS_CERT_INSECURE_ALGORITHM) {
+            reason = "The certificate uses an insecure algorithm";
+        }
+#endif
+
+        error_setg(errp,
+                   "Our own certificate %s failed validation against %s: %s",
+                   certFile, cacertFile, reason);
+        return -1;
+    }
+
+    return 0;
+}
+
+
+static gnutls_x509_crt_t
+qcrypto_tls_creds_load_cert(QCryptoTLSCredsX509 *creds,
+                            const char *certFile,
+                            bool isServer,
+                            Error **errp)
+{
+    gnutls_datum_t data;
+    gnutls_x509_crt_t cert = NULL;
+    char *buf = NULL;
+    gsize buflen;
+    GError *gerr;
+    int ret = -1;
+
+    trace_qcrypto_tls_creds_x509_load_cert(creds, isServer, certFile);
+
+    if (gnutls_x509_crt_init(&cert) < 0) {
+        error_setg(errp, "Unable to initialize certificate");
+        goto cleanup;
+    }
+
+    if (!g_file_get_contents(certFile, &buf, &buflen, &gerr)) {
+        error_setg(errp, "Cannot load CA cert list %s: %s",
+                   certFile, gerr->message);
+        g_error_free(gerr);
+        goto cleanup;
+    }
+
+    data.data = (unsigned char *)buf;
+    data.size = strlen(buf);
+
+    if (gnutls_x509_crt_import(cert, &data, GNUTLS_X509_FMT_PEM) < 0) {
+        error_setg(errp, isServer ?
+                   "Unable to import server certificate %s" :
+                   "Unable to import client certificate %s",
+                   certFile);
+        goto cleanup;
+    }
+
+    ret = 0;
+
+ cleanup:
+    if (ret != 0) {
+        gnutls_x509_crt_deinit(cert);
+        cert = NULL;
+    }
+    g_free(buf);
+    return cert;
+}
+
+
+static int
+qcrypto_tls_creds_load_ca_cert_list(QCryptoTLSCredsX509 *creds,
+                                    const char *certFile,
+                                    gnutls_x509_crt_t *certs,
+                                    unsigned int certMax,
+                                    size_t *ncerts,
+                                    Error **errp)
+{
+    gnutls_datum_t data;
+    char *buf = NULL;
+    gsize buflen;
+    int ret = -1;
+    GError *gerr = NULL;
+
+    *ncerts = 0;
+    trace_qcrypto_tls_creds_x509_load_cert_list(creds, certFile);
+
+    if (!g_file_get_contents(certFile, &buf, &buflen, &gerr)) {
+        error_setg(errp, "Cannot load CA cert list %s: %s",
+                   certFile, gerr->message);
+        g_error_free(gerr);
+        goto cleanup;
+    }
+
+    data.data = (unsigned char *)buf;
+    data.size = strlen(buf);
+
+    if (gnutls_x509_crt_list_import(certs, &certMax, &data,
+                                    GNUTLS_X509_FMT_PEM, 0) < 0) {
+        error_setg(errp,
+                   "Unable to import CA certificate list %s",
+                   certFile);
+        goto cleanup;
+    }
+    *ncerts = certMax;
+
+    ret = 0;
+
+ cleanup:
+    g_free(buf);
+    return ret;
+}
+
+
+#define MAX_CERTS 16
+static int
+qcrypto_tls_creds_x509_sanity_check(QCryptoTLSCredsX509 *creds,
+                                    bool isServer,
+                                    const char *cacertFile,
+                                    const char *certFile,
+                                    Error **errp)
+{
+    gnutls_x509_crt_t cert = NULL;
+    gnutls_x509_crt_t cacerts[MAX_CERTS];
+    size_t ncacerts = 0;
+    size_t i;
+    int ret = -1;
+
+    memset(cacerts, 0, sizeof(cacerts));
+    if (access(certFile, R_OK) == 0) {
+        cert = qcrypto_tls_creds_load_cert(creds,
+                                           certFile, isServer,
+                                           errp);
+        if (!cert) {
+            goto cleanup;
+        }
+    }
+    if (access(cacertFile, R_OK) == 0) {
+        if (qcrypto_tls_creds_load_ca_cert_list(creds,
+                                                cacertFile, cacerts,
+                                                MAX_CERTS, &ncacerts,
+                                                errp) < 0) {
+            goto cleanup;
+        }
+    }
+
+    if (cert &&
+        qcrypto_tls_creds_check_cert(creds,
+                                     cert, certFile, isServer,
+                                     false, errp) < 0) {
+        goto cleanup;
+    }
+
+    for (i = 0; i < ncacerts; i++) {
+        if (qcrypto_tls_creds_check_cert(creds,
+                                         cacerts[i], cacertFile,
+                                         isServer, true, errp) < 0) {
+            goto cleanup;
+        }
+    }
+
+    if (cert && ncacerts &&
+        qcrypto_tls_creds_check_cert_pair(cert, certFile, cacerts,
+                                          ncacerts, cacertFile,
+                                          isServer, errp) < 0) {
+        goto cleanup;
+    }
+
+    ret = 0;
+
+ cleanup:
+    if (cert) {
+        gnutls_x509_crt_deinit(cert);
+    }
+    for (i = 0; i < ncacerts; i++) {
+        gnutls_x509_crt_deinit(cacerts[i]);
+    }
+    return ret;
+}
+
+
+static int
+qcrypto_tls_creds_x509_load(QCryptoTLSCredsX509 *creds,
+                            Error **errp)
+{
+    char *cacert = NULL, *cacrl = NULL, *cert = NULL,
+        *key = NULL, *dhparams = NULL;
+    int ret;
+    int rv = -1;
+
+    trace_qcrypto_tls_creds_x509_load(creds,
+            creds->parent_obj.dir ? creds->parent_obj.dir : "<nodir>");
+
+    if (creds->parent_obj.endpoint == QCRYPTO_TLS_CREDS_ENDPOINT_SERVER) {
+        if (qcrypto_tls_creds_get_path(&creds->parent_obj,
+                                       QCRYPTO_TLS_CREDS_X509_CA_CERT,
+                                       true, &cacert, errp) < 0 ||
+            qcrypto_tls_creds_get_path(&creds->parent_obj,
+                                       QCRYPTO_TLS_CREDS_X509_CA_CRL,
+                                       false, &cacrl, errp) < 0 ||
+            qcrypto_tls_creds_get_path(&creds->parent_obj,
+                                       QCRYPTO_TLS_CREDS_X509_SERVER_CERT,
+                                       true, &cert, errp) < 0 ||
+            qcrypto_tls_creds_get_path(&creds->parent_obj,
+                                       QCRYPTO_TLS_CREDS_X509_SERVER_KEY,
+                                       true, &key, errp) < 0 ||
+            qcrypto_tls_creds_get_path(&creds->parent_obj,
+                                       QCRYPTO_TLS_CREDS_DH_PARAMS,
+                                       false, &dhparams, errp) < 0) {
+            goto cleanup;
+        }
+    } else {
+        if (qcrypto_tls_creds_get_path(&creds->parent_obj,
+                                       QCRYPTO_TLS_CREDS_X509_CA_CERT,
+                                       true, &cacert, errp) < 0 ||
+            qcrypto_tls_creds_get_path(&creds->parent_obj,
+                                       QCRYPTO_TLS_CREDS_X509_CLIENT_CERT,
+                                       false, &cert, errp) < 0 ||
+            qcrypto_tls_creds_get_path(&creds->parent_obj,
+                                       QCRYPTO_TLS_CREDS_X509_CLIENT_KEY,
+                                       false, &key, errp) < 0) {
+            goto cleanup;
+        }
+    }
+
+    if (creds->sanityCheck &&
+        qcrypto_tls_creds_x509_sanity_check(creds,
+            creds->parent_obj.endpoint == QCRYPTO_TLS_CREDS_ENDPOINT_SERVER,
+            cacert, cert, errp) < 0) {
+        goto cleanup;
+    }
+
+    ret = gnutls_certificate_allocate_credentials(&creds->data);
+    if (ret < 0) {
+        error_setg(errp, "Cannot allocate credentials: '%s'",
+                   gnutls_strerror(ret));
+        goto cleanup;
+    }
+
+    ret = gnutls_certificate_set_x509_trust_file(creds->data,
+                                                 cacert,
+                                                 GNUTLS_X509_FMT_PEM);
+    if (ret < 0) {
+        error_setg(errp, "Cannot load CA certificate '%s': %s",
+                   cacert, gnutls_strerror(ret));
+        goto cleanup;
+    }
+
+    if (cert != NULL && key != NULL) {
+        ret = gnutls_certificate_set_x509_key_file(creds->data,
+                                                   cert, key,
+                                                   GNUTLS_X509_FMT_PEM);
+        if (ret < 0) {
+            error_setg(errp, "Cannot load certificate '%s' & key '%s': %s",
+                       cert, key, gnutls_strerror(ret));
+            goto cleanup;
+        }
+    }
+
+    if (cacrl != NULL) {
+        ret = gnutls_certificate_set_x509_crl_file(creds->data,
+                                                   cacrl,
+                                                   GNUTLS_X509_FMT_PEM);
+        if (ret < 0) {
+            error_setg(errp, "Cannot load CRL '%s': %s",
+                       cacrl, gnutls_strerror(ret));
+            goto cleanup;
+        }
+    }
+
+    if (creds->parent_obj.endpoint == QCRYPTO_TLS_CREDS_ENDPOINT_SERVER) {
+        if (qcrypto_tls_creds_get_dh_params_file(&creds->parent_obj, dhparams,
+                                                 &creds->parent_obj.dh_params,
+                                                 errp) < 0) {
+            goto cleanup;
+        }
+        gnutls_certificate_set_dh_params(creds->data,
+                                         creds->parent_obj.dh_params);
+    }
+
+    rv = 0;
+ cleanup:
+    g_free(cacert);
+    g_free(cacrl);
+    g_free(cert);
+    g_free(key);
+    g_free(dhparams);
+    return rv;
+}
+
+
+static void
+qcrypto_tls_creds_x509_unload(QCryptoTLSCredsX509 *creds)
+{
+    if (creds->data) {
+        gnutls_certificate_free_credentials(creds->data);
+        creds->data = NULL;
+    }
+}
+
+
+#else /* ! CONFIG_GNUTLS */
+
+
+static void
+qcrypto_tls_creds_x509_load(QCryptoTLSCredsX509 *creds G_GNUC_UNUSED,
+                            Error **errp)
+{
+    error_setg(errp, "TLS credentials support requires GNUTLS");
+}
+
+
+static void
+qcrypto_tls_creds_x509_unload(QCryptoTLSCredsX509 *creds G_GNUC_UNUSED)
+{
+    /* nada */
+}
+
+
+#endif /* ! CONFIG_GNUTLS */
+
+
+static void
+qcrypto_tls_creds_x509_prop_set_loaded(Object *obj,
+                                       bool value,
+                                       Error **errp)
+{
+    QCryptoTLSCredsX509 *creds = QCRYPTO_TLS_CREDS_X509(obj);
+
+    if (value) {
+        qcrypto_tls_creds_x509_load(creds, errp);
+    } else {
+        qcrypto_tls_creds_x509_unload(creds);
+    }
+}
+
+
+#ifdef CONFIG_GNUTLS
+
+
+static bool
+qcrypto_tls_creds_x509_prop_get_loaded(Object *obj,
+                                       Error **errp G_GNUC_UNUSED)
+{
+    QCryptoTLSCredsX509 *creds = QCRYPTO_TLS_CREDS_X509(obj);
+
+    return creds->data != NULL;
+}
+
+
+#else /* ! CONFIG_GNUTLS */
+
+
+static bool
+qcrypto_tls_creds_x509_prop_get_loaded(Object *obj G_GNUC_UNUSED,
+                                       Error **errp G_GNUC_UNUSED)
+{
+    return false;
+}
+
+
+#endif /* ! CONFIG_GNUTLS */
+
+
+static void
+qcrypto_tls_creds_x509_prop_set_sanity(Object *obj,
+                                       bool value,
+                                       Error **errp G_GNUC_UNUSED)
+{
+    QCryptoTLSCredsX509 *creds = QCRYPTO_TLS_CREDS_X509(obj);
+
+    creds->sanityCheck = value;
+}
+
+
+static bool
+qcrypto_tls_creds_x509_prop_get_sanity(Object *obj,
+                                       Error **errp G_GNUC_UNUSED)
+{
+    QCryptoTLSCredsX509 *creds = QCRYPTO_TLS_CREDS_X509(obj);
+
+    return creds->sanityCheck;
+}
+
+
+static void
+qcrypto_tls_creds_x509_complete(UserCreatable *uc, Error **errp)
+{
+    object_property_set_bool(OBJECT(uc), true, "loaded", errp);
+}
+
+
+static void
+qcrypto_tls_creds_x509_init(Object *obj)
+{
+    QCryptoTLSCredsX509 *creds = QCRYPTO_TLS_CREDS_X509(obj);
+
+    creds->sanityCheck = true;
+
+    object_property_add_bool(obj, "loaded",
+                             qcrypto_tls_creds_x509_prop_get_loaded,
+                             qcrypto_tls_creds_x509_prop_set_loaded,
+                             NULL);
+    object_property_add_bool(obj, "sanity-check",
+                             qcrypto_tls_creds_x509_prop_get_sanity,
+                             qcrypto_tls_creds_x509_prop_set_sanity,
+                             NULL);
+}
+
+
+static void
+qcrypto_tls_creds_x509_finalize(Object *obj)
+{
+    QCryptoTLSCredsX509 *creds = QCRYPTO_TLS_CREDS_X509(obj);
+
+    qcrypto_tls_creds_x509_unload(creds);
+}
+
+
+static void
+qcrypto_tls_creds_x509_class_init(ObjectClass *oc, void *data)
+{
+    UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc);
+
+    ucc->complete = qcrypto_tls_creds_x509_complete;
+}
+
+
+static const TypeInfo qcrypto_tls_creds_x509_info = {
+    .parent = TYPE_QCRYPTO_TLS_CREDS,
+    .name = TYPE_QCRYPTO_TLS_CREDS_X509,
+    .instance_size = sizeof(QCryptoTLSCredsX509),
+    .instance_init = qcrypto_tls_creds_x509_init,
+    .instance_finalize = qcrypto_tls_creds_x509_finalize,
+    .class_size = sizeof(QCryptoTLSCredsX509Class),
+    .class_init = qcrypto_tls_creds_x509_class_init,
+    .interfaces = (InterfaceInfo[]) {
+        { TYPE_USER_CREATABLE },
+        { }
+    }
+};
+
+
+static void
+qcrypto_tls_creds_x509_register_types(void)
+{
+    type_register_static(&qcrypto_tls_creds_x509_info);
+}
+
+
+type_init(qcrypto_tls_creds_x509_register_types);
--- a/crypto/tlssession.c
+++ b/crypto/tlssession.c
@@ -0,0 +1,574 @@
+/*
+ * QEMU crypto TLS session support
+ *
+ * Copyright (c) 2015 Red Hat, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "crypto/tlssession.h"
+#include "crypto/tlscredsanon.h"
+#include "crypto/tlscredsx509.h"
+#include "qemu/acl.h"
+#include "trace.h"
+
+#ifdef CONFIG_GNUTLS
+
+
+#include <gnutls/x509.h>
+
+
+struct QCryptoTLSSession {
+    QCryptoTLSCreds *creds;
+    gnutls_session_t handle;
+    char *hostname;
+    char *aclname;
+    bool handshakeComplete;
+    QCryptoTLSSessionWriteFunc writeFunc;
+    QCryptoTLSSessionReadFunc readFunc;
+    void *opaque;
+    char *peername;
+};
+
+
+void
+qcrypto_tls_session_free(QCryptoTLSSession *session)
+{
+    if (!session) {
+        return;
+    }
+
+    gnutls_deinit(session->handle);
+    g_free(session->hostname);
+    g_free(session->peername);
+    g_free(session->aclname);
+    object_unref(OBJECT(session->creds));
+    g_free(session);
+}
+
+
+static ssize_t
+qcrypto_tls_session_push(void *opaque, const void *buf, size_t len)
+{
+    QCryptoTLSSession *session = opaque;
+
+    if (!session->writeFunc) {
+        errno = EIO;
+        return -1;
+    };
+
+    return session->writeFunc(buf, len, session->opaque);
+}
+
+
+static ssize_t
+qcrypto_tls_session_pull(void *opaque, void *buf, size_t len)
+{
+    QCryptoTLSSession *session = opaque;
+
+    if (!session->readFunc) {
+        errno = EIO;
+        return -1;
+    };
+
+    return session->readFunc(buf, len, session->opaque);
+}
+
+
+QCryptoTLSSession *
+qcrypto_tls_session_new(QCryptoTLSCreds *creds,
+                        const char *hostname,
+                        const char *aclname,
+                        QCryptoTLSCredsEndpoint endpoint,
+                        Error **errp)
+{
+    QCryptoTLSSession *session;
+    int ret;
+
+    session = g_new0(QCryptoTLSSession, 1);
+    trace_qcrypto_tls_session_new(
+        session, creds, hostname ? hostname : "<none>",
+        aclname ? aclname : "<none>", endpoint);
+
+    if (hostname) {
+        session->hostname = g_strdup(hostname);
+    }
+    if (aclname) {
+        session->aclname = g_strdup(aclname);
+    }
+    session->creds = creds;
+    object_ref(OBJECT(creds));
+
+    if (creds->endpoint != endpoint) {
+        error_setg(errp, "Credentials endpoint doesn't match session");
+        goto error;
+    }
+
+    if (endpoint == QCRYPTO_TLS_CREDS_ENDPOINT_SERVER) {
+        ret = gnutls_init(&session->handle, GNUTLS_SERVER);
+    } else {
+        ret = gnutls_init(&session->handle, GNUTLS_CLIENT);
+    }
+    if (ret < 0) {
+        error_setg(errp, "Cannot initialize TLS session: %s",
+                   gnutls_strerror(ret));
+        goto error;
+    }
+
+    if (object_dynamic_cast(OBJECT(creds),
+                            TYPE_QCRYPTO_TLS_CREDS_ANON)) {
+        QCryptoTLSCredsAnon *acreds = QCRYPTO_TLS_CREDS_ANON(creds);
+
+        ret = gnutls_priority_set_direct(session->handle,
+                                         "NORMAL:+ANON-DH", NULL);
+        if (ret < 0) {
+            error_setg(errp, "Unable to set TLS session priority: %s",
+                       gnutls_strerror(ret));
+            goto error;
+        }
+        if (creds->endpoint == QCRYPTO_TLS_CREDS_ENDPOINT_SERVER) {
+            ret = gnutls_credentials_set(session->handle,
+                                         GNUTLS_CRD_ANON,
+                                         acreds->data.server);
+        } else {
+            ret = gnutls_credentials_set(session->handle,
+                                         GNUTLS_CRD_ANON,
+                                         acreds->data.client);
+        }
+        if (ret < 0) {
+            error_setg(errp, "Cannot set session credentials: %s",
+                       gnutls_strerror(ret));
+            goto error;
+        }
+    } else if (object_dynamic_cast(OBJECT(creds),
+                                   TYPE_QCRYPTO_TLS_CREDS_X509)) {
+        QCryptoTLSCredsX509 *tcreds = QCRYPTO_TLS_CREDS_X509(creds);
+
+        ret = gnutls_set_default_priority(session->handle);
+        if (ret < 0) {
+            error_setg(errp, "Cannot set default TLS session priority: %s",
+                       gnutls_strerror(ret));
+            goto error;
+        }
+        ret = gnutls_credentials_set(session->handle,
+                                     GNUTLS_CRD_CERTIFICATE,
+                                     tcreds->data);
+        if (ret < 0) {
+            error_setg(errp, "Cannot set session credentials: %s",
+                       gnutls_strerror(ret));
+            goto error;
+        }
+
+        if (creds->endpoint == QCRYPTO_TLS_CREDS_ENDPOINT_SERVER) {
+            /* This requests, but does not enforce a client cert.
+             * The cert checking code later does enforcement */
+            gnutls_certificate_server_set_request(session->handle,
+                                                  GNUTLS_CERT_REQUEST);
+        }
+    } else {
+        error_setg(errp, "Unsupported TLS credentials type %s",
+                   object_get_typename(OBJECT(creds)));
+        goto error;
+    }
+
+    gnutls_transport_set_ptr(session->handle, session);
+    gnutls_transport_set_push_function(session->handle,
+                                       qcrypto_tls_session_push);
+    gnutls_transport_set_pull_function(session->handle,
+                                       qcrypto_tls_session_pull);
+
+    return session;
+
+ error:
+    qcrypto_tls_session_free(session);
+    return NULL;
+}
+
+static int
+qcrypto_tls_session_check_certificate(QCryptoTLSSession *session,
+                                      Error **errp)
+{
+    int ret;
+    unsigned int status;
+    const gnutls_datum_t *certs;
+    unsigned int nCerts, i;
+    time_t now;
+    gnutls_x509_crt_t cert = NULL;
+
+    now = time(NULL);
+    if (now == ((time_t)-1)) {
+        error_setg_errno(errp, errno, "Cannot get current time");
+        return -1;
+    }
+
+    ret = gnutls_certificate_verify_peers2(session->handle, &status);
+    if (ret < 0) {
+        error_setg(errp, "Verify failed: %s", gnutls_strerror(ret));
+        return -1;
+    }
+
+    if (status != 0) {
+        const char *reason = "Invalid certificate";
+
+        if (status & GNUTLS_CERT_INVALID) {
+            reason = "The certificate is not trusted";
+        }
+
+        if (status & GNUTLS_CERT_SIGNER_NOT_FOUND) {
+            reason = "The certificate hasn't got a known issuer";
+        }
+
+        if (status & GNUTLS_CERT_REVOKED) {
+            reason = "The certificate has been revoked";
+        }
+
+        if (status & GNUTLS_CERT_INSECURE_ALGORITHM) {
+            reason = "The certificate uses an insecure algorithm";
+        }
+
+        error_setg(errp, "%s", reason);
+        return -1;
+    }
+
+    certs = gnutls_certificate_get_peers(session->handle, &nCerts);
+    if (!certs) {
+        error_setg(errp, "No certificate peers");
+        return -1;
+    }
+
+    for (i = 0; i < nCerts; i++) {
+        ret = gnutls_x509_crt_init(&cert);
+        if (ret < 0) {
+            error_setg(errp, "Cannot initialize certificate: %s",
+                       gnutls_strerror(ret));
+            return -1;
+        }
+
+        ret = gnutls_x509_crt_import(cert, &certs[i], GNUTLS_X509_FMT_DER);
+        if (ret < 0) {
+            error_setg(errp, "Cannot import certificate: %s",
+                       gnutls_strerror(ret));
+            goto error;
+        }
+
+        if (gnutls_x509_crt_get_expiration_time(cert) < now) {
+            error_setg(errp, "The certificate has expired");
+            goto error;
+        }
+
+        if (gnutls_x509_crt_get_activation_time(cert) > now) {
+            error_setg(errp, "The certificate is not yet activated");
+            goto error;
+        }
+
+        if (gnutls_x509_crt_get_activation_time(cert) > now) {
+            error_setg(errp, "The certificate is not yet activated");
+            goto error;
+        }
+
+        if (i == 0) {
+            size_t dnameSize = 1024;
+            session->peername = g_malloc(dnameSize);
+        requery:
+            ret = gnutls_x509_crt_get_dn(cert, session->peername, &dnameSize);
+            if (ret < 0) {
+                if (ret == GNUTLS_E_SHORT_MEMORY_BUFFER) {
+                    session->peername = g_realloc(session->peername,
+                                                  dnameSize);
+                    goto requery;
+                }
+                error_setg(errp, "Cannot get client distinguished name: %s",
+                           gnutls_strerror(ret));
+                goto error;
+            }
+            if (session->aclname) {
+                qemu_acl *acl = qemu_acl_find(session->aclname);
+                int allow;
+                if (!acl) {
+                    error_setg(errp, "Cannot find ACL %s",
+                               session->aclname);
+                    goto error;
+                }
+
+                allow = qemu_acl_party_is_allowed(acl, session->peername);
+
+                error_setg(errp, "TLS x509 ACL check for %s is %s",
+                           session->peername, allow ? "allowed" : "denied");
+                if (!allow) {
+                    goto error;
+                }
+            }
+            if (session->hostname) {
+                if (!gnutls_x509_crt_check_hostname(cert, session->hostname)) {
+                    error_setg(errp,
+                               "Certificate does not match the hostname %s",
+                               session->hostname);
+                    goto error;
+                }
+            }
+        }
+
+        gnutls_x509_crt_deinit(cert);
+    }
+
+    return 0;
+
+ error:
+    gnutls_x509_crt_deinit(cert);
+    return -1;
+}
+
+
+int
+qcrypto_tls_session_check_credentials(QCryptoTLSSession *session,
+                                      Error **errp)
+{
+    if (object_dynamic_cast(OBJECT(session->creds),
+                            TYPE_QCRYPTO_TLS_CREDS_ANON)) {
+        return 0;
+    } else if (object_dynamic_cast(OBJECT(session->creds),
+                            TYPE_QCRYPTO_TLS_CREDS_X509)) {
+        if (session->creds->verifyPeer) {
+            return qcrypto_tls_session_check_certificate(session,
+                                                         errp);
+        } else {
+            return 0;
+        }
+    } else {
+        error_setg(errp, "Unexpected credential type %s",
+                   object_get_typename(OBJECT(session->creds)));
+        return -1;
+    }
+}
+
+
+void
+qcrypto_tls_session_set_callbacks(QCryptoTLSSession *session,
+                                  QCryptoTLSSessionWriteFunc writeFunc,
+                                  QCryptoTLSSessionReadFunc readFunc,
+                                  void *opaque)
+{
+    session->writeFunc = writeFunc;
+    session->readFunc = readFunc;
+    session->opaque = opaque;
+}
+
+
+ssize_t
+qcrypto_tls_session_write(QCryptoTLSSession *session,
+                          const char *buf,
+                          size_t len)
+{
+    ssize_t ret = gnutls_record_send(session->handle, buf, len);
+
+    if (ret < 0) {
+        switch (ret) {
+        case GNUTLS_E_AGAIN:
+            errno = EAGAIN;
+            break;
+        case GNUTLS_E_INTERRUPTED:
+            errno = EINTR;
+            break;
+        default:
+            errno = EIO;
+            break;
+        }
+        ret = -1;
+    }
+
+    return ret;
+}
+
+
+ssize_t
+qcrypto_tls_session_read(QCryptoTLSSession *session,
+                         char *buf,
+                         size_t len)
+{
+    ssize_t ret = gnutls_record_recv(session->handle, buf, len);
+
+    if (ret < 0) {
+        switch (ret) {
+        case GNUTLS_E_AGAIN:
+            errno = EAGAIN;
+            break;
+        case GNUTLS_E_INTERRUPTED:
+            errno = EINTR;
+            break;
+        default:
+            errno = EIO;
+            break;
+        }
+        ret = -1;
+    }
+
+    return ret;
+}
+
+
+int
+qcrypto_tls_session_handshake(QCryptoTLSSession *session,
+                              Error **errp)
+{
+    int ret = gnutls_handshake(session->handle);
+    if (ret == 0) {
+        session->handshakeComplete = true;
+    } else {
+        if (ret == GNUTLS_E_INTERRUPTED ||
+            ret == GNUTLS_E_AGAIN) {
+            ret = 1;
+        } else {
+            error_setg(errp, "TLS handshake failed: %s",
+                       gnutls_strerror(ret));
+            ret = -1;
+        }
+    }
+
+    return ret;
+}
+
+
+QCryptoTLSSessionHandshakeStatus
+qcrypto_tls_session_get_handshake_status(QCryptoTLSSession *session)
+{
+    if (session->handshakeComplete) {
+        return QCRYPTO_TLS_HANDSHAKE_COMPLETE;
+    } else if (gnutls_record_get_direction(session->handle) == 0) {
+        return QCRYPTO_TLS_HANDSHAKE_RECVING;
+    } else {
+        return QCRYPTO_TLS_HANDSHAKE_SENDING;
+    }
+}
+
+
+int
+qcrypto_tls_session_get_key_size(QCryptoTLSSession *session,
+                                 Error **errp)
+{
+    gnutls_cipher_algorithm_t cipher;
+    int ssf;
+
+    cipher = gnutls_cipher_get(session->handle);
+    ssf = gnutls_cipher_get_key_size(cipher);
+    if (!ssf) {
+        error_setg(errp, "Cannot get TLS cipher key size");
+        return -1;
+    }
+    return ssf;
+}
+
+
+char *
+qcrypto_tls_session_get_peer_name(QCryptoTLSSession *session)
+{
+    if (session->peername) {
+        return g_strdup(session->peername);
+    }
+    return NULL;
+}
+
+
+#else /* ! CONFIG_GNUTLS */
+
+
+QCryptoTLSSession *
+qcrypto_tls_session_new(QCryptoTLSCreds *creds G_GNUC_UNUSED,
+                        const char *hostname G_GNUC_UNUSED,
+                        const char *aclname G_GNUC_UNUSED,
+                        QCryptoTLSCredsEndpoint endpoint G_GNUC_UNUSED,
+                        Error **errp)
+{
+    error_setg(errp, "TLS requires GNUTLS support");
+    return NULL;
+}
+
+
+void
+qcrypto_tls_session_free(QCryptoTLSSession *sess G_GNUC_UNUSED)
+{
+}
+
+
+int
+qcrypto_tls_session_check_credentials(QCryptoTLSSession *sess G_GNUC_UNUSED,
+                                      Error **errp)
+{
+    error_setg(errp, "TLS requires GNUTLS support");
+    return -1;
+}
+
+
+void
+qcrypto_tls_session_set_callbacks(
+    QCryptoTLSSession *sess G_GNUC_UNUSED,
+    QCryptoTLSSessionWriteFunc writeFunc G_GNUC_UNUSED,
+    QCryptoTLSSessionReadFunc readFunc G_GNUC_UNUSED,
+    void *opaque G_GNUC_UNUSED)
+{
+}
+
+
+ssize_t
+qcrypto_tls_session_write(QCryptoTLSSession *sess,
+                          const char *buf,
+                          size_t len)
+{
+    errno = -EIO;
+    return -1;
+}
+
+
+ssize_t
+qcrypto_tls_session_read(QCryptoTLSSession *sess,
+                         char *buf,
+                         size_t len)
+{
+    errno = -EIO;
+    return -1;
+}
+
+
+int
+qcrypto_tls_session_handshake(QCryptoTLSSession *sess,
+                              Error **errp)
+{
+    error_setg(errp, "TLS requires GNUTLS support");
+    return -1;
+}
+
+
+QCryptoTLSSessionHandshakeStatus
+qcrypto_tls_session_get_handshake_status(QCryptoTLSSession *sess)
+{
+    return QCRYPTO_TLS_HANDSHAKE_COMPLETE;
+}
+
+
+int
+qcrypto_tls_session_get_key_size(QCryptoTLSSession *sess,
+                                 Error **errp)
+{
+    error_setg(errp, "TLS requires GNUTLS support");
+    return -1;
+}
+
+
+char *
+qcrypto_tls_session_get_peer_name(QCryptoTLSSession *sess)
+{
+    return NULL;
+}
+
+#endif
--- a/default-configs/i386-softmmu.mak
+++ b/default-configs/i386-softmmu.mak
@@ -44,7 +44,6 @@ CONFIG_LPC_ICH9=y
 CONFIG_PCI_Q35=y
 CONFIG_APIC=y
 CONFIG_IOAPIC=y
-CONFIG_ICC_BUS=y
 CONFIG_PVPANIC=y
 CONFIG_MEM_HOTPLUG=y
 CONFIG_XIO3130=y
--- a/default-configs/ppc64-softmmu.mak
+++ b/default-configs/ppc64-softmmu.mak
@@ -52,3 +52,4 @@ CONFIG_XICS_KVM=$(and $(CONFIG_PSERIES),$(CONFIG_KVM))
 # For PReP
 CONFIG_MC146818RTC=y
 CONFIG_ISA_TESTDEV=y
+CONFIG_MEM_HOTPLUG=y
--- a/default-configs/tilegx-linux-user.mak
+++ b/default-configs/tilegx-linux-user.mak
@@ -0,0 +1 @@
+# Default configuration for tilegx-linux-user
--- a/default-configs/x86_64-softmmu.mak
+++ b/default-configs/x86_64-softmmu.mak
@@ -44,7 +44,6 @@ CONFIG_LPC_ICH9=y
 CONFIG_PCI_Q35=y
 CONFIG_APIC=y
 CONFIG_IOAPIC=y
-CONFIG_ICC_BUS=y
 CONFIG_PVPANIC=y
 CONFIG_MEM_HOTPLUG=y
 CONFIG_XIO3130=y
--- a/disas.c
+++ b/disas.c
@@ -392,16 +392,6 @@ monitor_read_memory (bfd_vma memaddr, bfd_byte *myaddr, int length,
    return 0;
 }

-static int GCC_FMT_ATTR(2, 3)
-monitor_fprintf(FILE *stream, const char *fmt, ...)
-{
-    va_list ap;
-    va_start(ap, fmt);
-    monitor_vprintf((Monitor *)stream, fmt, ap);
-    va_end(ap);
-    return 0;
-}
-
 /* Disassembler for the monitor.
   See target_disas for a description of flags. */
 void monitor_disas(Monitor *mon, CPUState *cpu,
--- a/disas/cris.c
+++ b/disas/cris.c
@@ -2492,7 +2492,7 @@ print_with_operands (const struct cris_opcode *opcodep,
 	  = spec_reg_info ((insn >> 12) & 15, disdata->distype);

 	if (sregp->name == NULL)
-	  /* Should have been caught as a non-match eariler.  */
+	  /* Should have been caught as a non-match earlier.  */
 	  *tp++ = '?';
 	else
 	  {
--- a/docs/build-system.txt
+++ b/docs/build-system.txt
@@ -0,0 +1,507 @@
+    The QEMU build system architecture
+    ==================================
+
+This document aims to help developers understand the architecture of the
+QEMU build system. As with projects using GNU autotools, the QEMU build
+system has two stages, first the developer runs the "configure" script
+to determine the local build environment characteristics, then they run
+"make" to build the project. There is about where the similarities with
+GNU autotools end, so try to forget what you know about them.
+
+
+Stage 1: configure
+==================
+
+The QEMU configure script is written directly in shell, and should be
+compatible with any POSIX shell, hence it uses #!/bin/sh. An important
+implication of this is that it is important to avoid using bash-isms on
+development platforms where bash is the primary host.
+
+In contrast to autoconf scripts, QEMU's configure is expected to be
+silent while it is checking for features. It will only display output
+when an error occurs, or to show the final feature enablement summary
+on completion.
+
+Adding new checks to the configure script usually comprises the
+following tasks:
+
+ - Initialize one or more variables with the default feature state.
+
+   Ideally features should auto-detect whether they are present,
+   so try to avoid hardcoding the initial state to either enabled
+   or disabled, as that forces the user to pass a --enable-XXX
+   / --disable-XXX flag on every invocation of configure.
+
+ - Add support to the command line arg parser to handle any new
+   --enable-XXX / --disable-XXX flags required by the feature XXX.
+
+ - Add information to the help output message to report on the new
+   feature flag.
+
+ - Add code to perform the actual feature check. As noted above, try to
+   be fully dynamic in checking enablement/disablement.
+
+ - Add code to print out the feature status in the configure summary
+   upon completion.
+
+ - Add any new makefile variables to $config_host_mak on completion.
+
+
+Taking (a simplified version of) the probe for gnutls from configure,
+we have the following pieces:
+
+  # Initial variable state
+  gnutls=""
+
+  ..snip..
+
+  # Configure flag processing
+  --disable-gnutls) gnutls="no"
+  ;;
+  --enable-gnutls) gnutls="yes"
+  ;;
+
+  ..snip..
+
+  # Help output feature message
+  gnutls          GNUTLS cryptography support
+
+  ..snip..
+
+  # Test for gnutls
+  if test "$gnutls" != "no"; then
+     if ! $pkg_config --exists "gnutls"; then
+        gnutls_cflags=`$pkg_config --cflags gnutls`
+        gnutls_libs=`$pkg_config --libs gnutls`
+        libs_softmmu="$gnutls_libs $libs_softmmu"
+        libs_tools="$gnutls_libs $libs_tools"
+        QEMU_CFLAGS="$QEMU_CFLAGS $gnutls_cflags"
+        gnutls="yes"
+     elif test "$gnutls" = "yes"; then
+        feature_not_found "gnutls" "Install gnutls devel"
+     else
+        gnutls="no"
+     fi
+  fi
+
+  ..snip..
+
+  # Completion feature summary
+  echo "GNUTLS support    $gnutls"
+
+  ..snip..
+
+  # Define make variables
+  if test "$gnutls" = "yes" ; then
+     echo "CONFIG_GNUTLS=y" >> $config_host_mak
+  fi
+
+
+Helper functions
+----------------
+
+The configure script provides a variety of helper functions to assist
+developers in checking for system features:
+
+ - do_cc $ARGS...
+
+   Attempt to run the system C compiler passing it $ARGS...
+
+ - do_cxx $ARGS...
+
+   Attempt to run the system C++ compiler passing it $ARGS...
+
+ - compile_object $CFLAGS
+
+   Attempt to compile a test program with the system C compiler using
+   $CFLAGS. The test program must have been previously written to a file
+   called $TMPC.
+
+ - compile_prog $CFLAGS $LDFLAGS
+
+   Attempt to compile a test program with the system C compiler using
+   $CFLAGS and link it with the system linker using $LDFLAGS. The test
+   program must have been previously written to a file called $TMPC.
+
+ - has $COMMAND
+
+   Determine if $COMMAND exists in the current environment, either as a
+   shell builtin, or executable binary, returning 0 on success.
+
+ - path_of $COMMAND
+
+   Return the fully qualified path of $COMMAND, printing it to stdout,
+   and returning 0 on success.
+
+ - check_define $NAME
+
+   Determine if the macro $NAME is defined by the system C compiler
+
+ - check_include $NAME
+
+   Determine if the include $NAME file is available to the system C
+   compiler
+
+ - write_c_skeleton
+
+   Write a minimal C program main() function to the temporary file
+   indicated by $TMPC
+
+ - feature_not_found $NAME $REMEDY
+
+   Print a message to stderr that the feature $NAME was not available
+   on the system, suggesting the user try $REMEDY to address the
+   problem.
+
+ - error_exit $MESSAGE $MORE...
+
+   Print $MESSAGE to stderr, followed by $MORE... and then exit from the
+   configure script with non-zero status
+
+ - query_pkg_config $ARGS...
+
+   Run pkg-config passing it $ARGS. If QEMU is doing a static build,
+   then --static will be automatically added to $ARGS
+
+
+Stage 2: makefiles
+==================
+
+The use of GNU make is required with the QEMU build system.
+
+Although the source code is spread across multiple subdirectories, the
+build system should be considered largely non-recursive in nature, in
+contrast to common practices seen with automake. There is some recursive
+invocation of make, but this is related to the things being built,
+rather than the source directory structure.
+
+QEMU currently supports both VPATH and non-VPATH builds, so there are
+three general ways to invoke configure & perform a build.
+
+ - VPATH, build artifacts outside of QEMU source tree entirely
+
+     cd ../
+     mkdir build
+     cd build
+     ../qemu/configure
+     make
+
+ - VPATH, build artifacts in a subdir of QEMU source tree
+
+     mkdir build
+     cd build
+     ../configure
+     make
+
+ - non-VPATH, build artifacts everywhere
+
+     ./configure
+     make
+
+The QEMU maintainers generally recommend that a VPATH build is used by
+developers. Patches to QEMU are expected to ensure VPATH build still
+works.
+
+
+Module structure
+----------------
+
+There are a number of key outputs of the QEMU build system:
+
+ - Tools - qemu-img, qemu-nbd, qga (guest agent), etc
+ - System emulators - qemu-system-$ARCH
+ - Userspace emulators - qemu-$ARCH
+ - Unit tests
+
+The source code is highly modularized, split across many files to
+facilitate building of all of these components with as little duplicated
+compilation as possible. There can be considered to be two distinct
+groups of files, those which are independent of the QEMU emulation
+target and those which are dependent on the QEMU emulation target.
+
+In the target-independent set lives various general purpose helper code,
+such as error handling infrastructure, standard data structures,
+platform portability wrapper functions, etc. This code can be compiled
+once only and the .o files linked into all output binaries.
+
+In the target-dependent set lives CPU emulation, device emulation and
+much glue code. This sometimes also has to be compiled multiple times,
+once for each target being built.
+
+The utility code that is used by all binaries is built into a
+static archive called libqemuutil.a, which is then linked to all the
+binaries. In order to provide hooks that are only needed by some of the
+binaries, code in libqemuutil.a may depend on other functions that are
+not fully implemented by all QEMU binaries. To deal with this there is a
+second library called libqemustub.a which provides dummy stubs for all
+these functions. These will get lazy linked into the binary if the real
+implementation is not present. In this way, the libqemustub.a static
+library can be thought of as a portable implementation of the weak
+symbols concept. All binaries should link to both libqemuutil.a and
+libqemustub.a. e.g.
+
+ qemu-img$(EXESUF): qemu-img.o ..snip.. libqemuutil.a libqemustub.a
+
+
+Windows platform portability
+----------------------------
+
+On Windows, all binaries have the suffix '.exe', so all Makefile rules
+which create binaries must include the $(EXESUF) variable on the binary
+name. e.g.
+
+ qemu-img$(EXESUF): qemu-img.o ..snip..
+
+This expands to '.exe' on Windows, or '' on other platforms.
+
+A further complication for the system emulator binaries is that
+two separate binaries need to be generated.
+
+The main binary (e.g. qemu-system-x86_64.exe) is linked against the
+Windows console runtime subsystem. These are expected to be run from a
+command prompt window, and so will print stderr to the console that
+launched them.
+
+The second binary generated has a 'w' on the end of its name (e.g.
+qemu-system-x86_64w.exe) and is linked against the Windows graphical
+runtime subsystem. These are expected to be run directly from the
+desktop and will open up a dedicated console window for stderr output.
+
+The Makefile.target will generate the binary for the graphical subsystem
+first, and then use objcopy to relink it against the console subsystem
+to generate the second binary.
+
+
+Object variable naming
+----------------------
+
+The QEMU convention is to define variables to list different groups of
+object files. These are named with the convention $PREFIX-obj-y. For
+example the libqemuutil.a file will be linked with all objects listed
+in a variable 'util-obj-y'. So, for example, util/Makefile.obj will
+contain a set of definitions looking like
+
+  util-obj-y += bitmap.o bitops.o hbitmap.o
+  util-obj-y += fifo8.o
+  util-obj-y += acl.o
+  util-obj-y += error.o qemu-error.o
+
+When there is an object file which needs to be conditionally built based
+on some characteristic of the host system, the configure script will
+define a variable for the conditional. For example, on Windows it will
+define $(CONFIG_POSIX) with a value of 'n' and $(CONFIG_WIN32) with a
+value of 'y'. It is now possible to use the config variables when
+listing object files. For example,
+
+  util-obj-$(CONFIG_WIN32) += oslib-win32.o qemu-thread-win32.o
+  util-obj-$(CONFIG_POSIX) += oslib-posix.o qemu-thread-posix.o
+
+On Windows this expands to
+
+  util-obj-y += oslib-win32.o qemu-thread-win32.o
+  util-obj-n += oslib-posix.o qemu-thread-posix.o
+
+Since libqemutil.a links in $(util-obj-y), the POSIX specific files
+listed against $(util-obj-n) are ignored on the Windows platform builds.
+
+
+CFLAGS / LDFLAGS / LIBS handling
+--------------------------------
+
+There are many different binaries being built with differing purposes,
+and some of them might even be 3rd party libraries pulled in via git
+submodules. As such the use of the global CFLAGS variable is generally
+avoided in QEMU, since it would apply to too many build targets.
+
+Flags that are needed by any QEMU code (i.e. everything *except* GIT
+submodule projects) are put in $(QEMU_CFLAGS) variable. For linker
+flags the $(LIBS) variable is sometimes used, but a couple of more
+targeted variables are preferred. $(libs_softmmu) is used for
+libraries that must be linked to system emulator targets, $(LIBS_TOOLS)
+is used for tools like qemu-img, qemu-nbd, etc and $(LIBS_QGA) is used
+for the QEMU guest agent. There is currently no specific variable for
+the userspace emulator targets as the global $(LIBS), or more targeted
+variables shown below, are sufficient.
+
+In addition to these variables, it is possible to provide cflags and
+libs against individual source code files, by defining variables of the
+form $FILENAME-cflags and $FILENAME-libs. For example, the curl block
+driver needs to link to the libcurl library, so block/Makefile defines
+some variables:
+
+  curl.o-cflags      := $(CURL_CFLAGS)
+  curl.o-libs        := $(CURL_LIBS)
+
+The scope is a little different between the two variables. The libs get
+used when linking any target binary that includes the curl.o object
+file, while the cflags get used when compiling the curl.c file only.
+
+
+Statically defined files
+------------------------
+
+The following key files are statically defined in the source tree, with
+the rules needed to build QEMU. Their behaviour is influenced by a
+number of dynamically created files listed later.
+
+- Makefile
+
+The main entry point used when invoking make to build all the components
+of QEMU. The default 'all' target will naturally result in the build of
+every component. The various tools and helper binaries are built
+directly via a non-recursive set of rules.
+
+Each system/userspace emulation target needs to have a slightly
+different set of make rules / variables. Thus, make will be recursively
+invoked for each of the emulation targets.
+
+The recursive invocation will end up processing the toplevel
+Makefile.target file (more on that later).
+
+
+- */Makefile.objs
+
+Since the source code is spread across multiple directories, the rules
+for each file are similarly modularized. Thus each subdirectory
+containing .c files will usually also contain a Makefile.objs file.
+These files are not directly invoked by a recursive make, but instead
+they are imported by the top level Makefile and/or Makefile.target
+
+Each Makefile.objs usually just declares a set of variables listing the
+.o files that need building from the source files in the directory. They
+will also define any custom linker or compiler flags. For example in
+block/Makefile.objs
+
+  block-obj-$(CONFIG_LIBISCSI) += iscsi.o
+  block-obj-$(CONFIG_CURL) += curl.o
+
+  ..snip...
+
+  iscsi.o-cflags     := $(LIBISCSI_CFLAGS)
+  iscsi.o-libs       := $(LIBISCSI_LIBS)
+  curl.o-cflags      := $(CURL_CFLAGS)
+  curl.o-libs        := $(CURL_LIBS)
+
+If there are any rules defined in the Makefile.objs file, they should
+all use $(obj) as a prefix to the target, e.g.
+
+  $(obj)/generated-tcg-tracers.h: $(obj)/generated-tcg-tracers.h-timestamp
+
+
+- Makefile.target
+
+This file provides the entry point used to build each individual system
+or userspace emulator target. Each enabled target has its own
+subdirectory. For example if configure is run with the argument
+'--target-list=x86_64-softmmu', then a sub-directory 'x86_64-softmu'
+will be created, containing a 'Makefile' which symlinks back to
+Makefile.target
+
+So when the recursive '$(MAKE) -C x86_64-softmmu' is invoked, it ends up
+using Makefile.target for the build rules.
+
+
+- rules.mak
+
+This file provides the generic helper rules for invoking build tools, in
+particular the compiler and linker. This also contains the magic (hairy)
+'unnest-vars' function which is used to merge the variable definitions
+from all Makefile.objs in the source tree down into the main Makefile
+context.
+
+
+- default-configs/*.mak
+
+The files under default-configs/ control what emulated hardware is built
+into each QEMU system and userspace emulator targets. They merely
+contain a long list of config variable definitions. For example,
+default-configs/x86_64-softmmu.mak has:
+
+  include pci.mak
+  include sound.mak
+  include usb.mak
+  CONFIG_QXL=$(CONFIG_SPICE)
+  CONFIG_VGA_ISA=y
+  CONFIG_VGA_CIRRUS=y
+  CONFIG_VMWARE_VGA=y
+  CONFIG_VIRTIO_VGA=y
+  ...snip...
+
+These files rarely need changing unless new devices / hardware need to
+be enabled for a particular system/userspace emulation target
+
+
+- tests/Makefile
+
+Rules for building the unit tests. This file is included directly by the
+top level Makefile, so anything defined in this file will influence the
+entire build system. Care needs to be taken when writing rules for tests
+to ensure they only apply to the unit test execution / build.
+
+
+- po/Makefile
+
+Rules for building and installing the binary message catalogs from the
+text .po file sources. This almost never needs changing for any reason.
+
+
+Dynamically created files
+-------------------------
+
+The following files are generated dynamically by configure in order to
+control the behaviour of the statically defined makefiles. This avoids
+the need for QEMU makefiles to go through any pre-processing as seen
+with autotools, where Makefile.am generates Makefile.in which generates
+Makefile.
+
+
+- config-host.mak
+
+When configure has determined the characteristics of the build host it
+will write a long list of variables to config-host.mak file. This
+provides the various install directories, compiler / linker flags and a
+variety of CONFIG_* variables related to optionally enabled features.
+This is imported by the top level Makefile in order to tailor the build
+output.
+
+The variables defined here are those which are applicable to all QEMU
+build outputs. Variables which are potentially different for each
+emulator target are defined by the next file...
+
+It is also used as a dependency checking mechanism. If make sees that
+the modification timestamp on configure is newer than that on
+config-host.mak, then configure will be re-run.
+
+
+- config-host.h
+
+The config-host.h file is used by source code to determine what features
+are enabled. It is generated from the contents of config-host.mak using
+the scripts/create_config program. This extracts all the CONFIG_* variables,
+most of the HOST_* variables and a few other misc variables from
+config-host.mak, formatting them as C preprocessor macros.
+
+
+- $TARGET-NAME/config-target.mak
+
+TARGET-NAME is the name of a system or userspace emulator, for example,
+x86_64-softmmu denotes the system emulator for the x86_64 architecture.
+This file contains the variables which need to vary on a per-target
+basis. For example, it will indicate whether KVM or Xen are enabled for
+the target and any other potential custom libraries needed for linking
+the target.
+
+
+- $TARGET-NAME/config-devices.mak
+
+TARGET-NAME is again the name of a system or userspace emulator. The
+config-devices.mak file is automatically generated by make using the
+scripts/make_device_config.sh program, feeding it the
+default-configs/$TARGET-NAME file as input.
+
+
+- $TARGET-NAME/Makefile
+
+This is the entrypoint used when make recurses to build a single system
+or userspace emulator target. It is merely a symlink back to the
+Makefile.target in the top level.
--- a/docs/libcacard.txt
+++ b/docs/libcacard.txt
@@ -1,483 +0,0 @@
-This file documents the CAC (Common Access Card) library in the libcacard
-subdirectory.
-
-Virtual Smart Card Emulator
-
-This emulator is designed to provide emulation of actual smart cards to a
-virtual card reader running in a guest virtual machine. The emulated smart
-cards can be representations of real smart cards, where the necessary functions
-such as signing, card removal/insertion, etc. are mapped to real, physical
-cards which are shared with the client machine the emulator is running on, or
-the cards could be pure software constructs.
-
-The emulator is structured to allow multiple replaceable or additional pieces,
-so it can be easily modified for future requirements. The primary envisioned
-modifications are:
-
-1) The socket connection to the virtual card reader (presumably a CCID reader,
-but other ISO-7816 compatible readers could be used). The code that handles
-this is in vscclient.c.
-
-2) The virtual card low level emulation. This is currently supplied by using
-NSS. This emulation could be replaced by implementations based on other
-security libraries, including but not limitted to openssl+pkcs#11 library,
-raw pkcs#11, Microsoft CAPI, direct opensc calls, etc. The code that handles
-this is in vcard_emul_nss.c.
-
-3) Emulation for new types of cards. The current implementation emulates the
-original DoD CAC standard with separate pki containers. This emulator lives in
-cac.c. More than one card type emulator could be included. Other cards could
-be emulated as well, including PIV, newer versions of CAC, PKCS #15, etc.
-
--------------------
-Replacing the Socket Based Virtual Reader Interface.
-
-The current implementation contains a replaceable module vscclient.c. The
-current vscclient.c implements a sockets interface to the virtual ccid reader
-on the guest. CCID commands that are pertinent to emulation are passed
-across the socket, and their responses are passed back along that same socket.
-The protocol that vscclient uses is defined in vscard_common.h and connects
-to a qemu ccid usb device. Since this socket runs as a client, vscclient.c
-implements a program with a main entry. It also handles argument parsing for
-the emulator.
-
-An application that wants to use the virtual reader can replace vscclient.c
-with its own implementation that connects to its own CCID reader.  The calls
-that the CCID reader can call are:
-
-      VReaderList * vreader_get_reader_list();
-
-  This function returns a list of virtual readers.  These readers may map to
-  physical devices, or simulated devices depending on vcard the back end. Each
-  reader in the list should represent a reader to the virtual machine. Virtual
-  USB address mapping is left to the CCID reader front end. This call can be
-  made any time to get an updated list. The returned list is a copy of the
-  internal list that can be referenced by the caller without locking. This copy
-  must be freed by the caller with vreader_list_delete when it is no longer
-  needed.
-
-      VReaderListEntry *vreader_list_get_first(VReaderList *);
-
-  This function gets the first entry on the reader list. Along with
-  vreader_list_get_next(), vreader_list_get_first() can be used to walk the
-  reader list returned from vreader_get_reader_list(). VReaderListEntries are
-  part of the list themselves and do not need to be freed separately from the
-  list. If there are no entries on the list, it will return NULL.
-
-      VReaderListEntry *vreader_list_get_next(VReaderListEntry *);
-
-  This function gets the next entry in the list. If there are no more entries
-  it will return NULL.
-
-      VReader * vreader_list_get_reader(VReaderListEntry *)
-
-  This function returns the reader stored in the reader List entry. Caller gets
-  a new reference to a reader. The caller must free its reference when it is
-  finished with vreader_free().
-
-      void vreader_free(VReader *reader);
-
-   This function frees a reference to a reader. Readers are reference counted
-   and are automatically deleted when the last reference is freed.
-
-      void vreader_list_delete(VReaderList *list);
-
-   This function frees the list, all the elements on the list, and all the
-   reader references held by the list.
-
-      VReaderStatus vreader_power_on(VReader *reader, char *atr, int *len);
-
-  This function simulates a card power on. A virtual card does not care about
-  the actual voltage and other physical parameters, but it does care that the
-  card is actually on or off. Cycling the card causes the card to reset. If
-  the caller provides enough space, vreader_power_on will return the ATR of
-  the virtual card. The amount of space provided in atr should be indicated
-  in *len. The function modifies *len to be the actual length of of the
-  returned ATR.
-
-      VReaderStatus vreader_power_off(VReader *reader);
-
-  This function simulates a power off of a virtual card.
-
-      VReaderStatus vreader_xfer_bytes(VReader *reader, unsigne char *send_buf,
-                                       int send_buf_len,
-                                       unsigned char *receive_buf,
-                                       int receive_buf_len);
-
-  This function sends a raw apdu to a card and returns the card's response.
-  The CCID front end should return the response back. Most of the emulation
-  is driven from these APDUs.
-
-      VReaderStatus vreader_card_is_present(VReader *reader);
-
-  This function returns whether or not the reader has a card inserted. The
-  vreader_power_on, vreader_power_off, and vreader_xfer_bytes will return
-  VREADER_NO_CARD.
-
-       const char *vreader_get_name(VReader *reader);
-
-  This function returns the name of the reader. The name comes from the card
-  emulator level and is usually related to the name of the physical reader.
-
-       VReaderID vreader_get_id(VReader *reader);
-
-  This function returns the id of a reader. All readers start out with an id
-  of -1. The application can set the id with vreader_set_id.
-
-       VReaderStatus vreader_get_id(VReader *reader, VReaderID id);
-
-  This function sets the reader id. The application is responsible for making
-  sure that the id is unique for all readers it is actively using.
-
-       VReader *vreader_find_reader_by_id(VReaderID id);
-
-  This function returns the reader which matches the id. If two readers match,
-  only one is returned. The function returns NULL if the id is -1.
-
-       Event *vevent_wait_next_vevent();
-
-  This function blocks waiting for reader and card insertion events. There
-  will be one event for each card insertion, each card removal, each reader
-  insertion and each reader removal. At start up, events are created for all
-  the initial readers found, as well as all the cards that are inserted.
-
-       Event *vevent_get_next_vevent();
-
-  This function returns a pending event if it exists, otherwise it returns
-  NULL. It does not block.
-
----------------
-Card Type Emulator: Adding a New Virtual Card Type
-
-The ISO 7816 card spec describes 2 types of cards:
- 1) File system cards, where the smartcard is managed by reading and writing
-data to files in a file system. There is currently only boiler plate
-implemented for file system cards.
- 2) VM cards, where the card has loadable applets which perform the card
-functions. The current implementation supports VM cards.
-
-In the case of VM cards, the difference between various types of cards is
-really what applets have been installed in that card. This structure is
-mirrored in card type emulators. The 7816 emulator already handles the basic
-ISO 7186 commands. Card type emulators simply need to add the virtual applets
-which emulate the real card applets. Card type emulators have exactly one
-public entry point:
-
-       VCARDStatus xxx_card_init(VCard *card, const char *flags,
-                               const unsigned char *cert[],
-                               int cert_len[],
-                               VCardKey *key[],
-                               int cert_count);
-
-  The parameters for this are:
-  card       - the virtual card structure which will represent this card.
-  flags      - option flags that may be specific to this card type.
-  cert       - array of binary certificates.
-  cert_len   - array of lengths of each of the certificates specified in cert.
-  key        - array of opaque key structures representing the private keys on
-               the card.
-  cert_count - number of entries in cert, cert_len, and key arrays.
-
-  Any cert, cert_len, or key with the same index are matching sets. That is
-  cert[0] is cert_len[0] long and has the corresponding private key of key[0].
-
-The card type emulator is expected to own the VCardKeys, but it should copy
-any raw cert data it wants to save. It can create new applets and add them to
-the card using the following functions:
-
-       VCardApplet *vcard_new_applet(VCardProcessAPDU apdu_func,
-                                     VCardResetApplet reset_func,
-                                     const unsigned char *aid,
-                                     int aid_len);
-
-  This function creates a new applet. Applet structures store the following
-  information:
-     1) the AID of the applet (set by aid and aid_len).
-     2) a function to handle APDUs for this applet. (set by apdu_func, more on
-        this below).
-     3) a function to reset the applet state when the applet is selected.
-        (set by reset_func, more on this below).
-     3) applet private data, a data pointer used by the card type emulator to
-        store any data or state it needs to complete requests. (set by a
-        separate call).
-     4) applet private data free, a function used to free the applet private
-        data when the applet itself is destroyed.
-  The created applet can be added to the card with vcard_add_applet below.
-
-        void vcard_set_applet_private(VCardApplet *applet,
-                                      VCardAppletPrivate *private,
-                                      VCardAppletPrivateFree private_free);
-  This function sets the private data and the corresponding free function.
-  VCardAppletPrivate is an opaque data structure to the rest of the emulator.
-  The card type emulator can define it any way it wants by defining
-  struct VCardAppletPrivateStruct {};. If there is already a private data
-  structure on the applet, the old one is freed before the new one is set up.
-  passing two NULL clear any existing private data.
-
-         VCardStatus vcard_add_applet(VCard *card, VCardApplet *applet);
-
-  Add an applet onto the list of applets attached to the card. Once an applet
-  has been added, it can be selected by its AID, and then commands will be
-  routed to it VCardProcessAPDU function. This function adopts the applet that
-  is passed into it. Note: 2 applets with the same AID should not be added to
-  the same card. It is permissible to add more than one applet. Multiple applets
-  may have the same VCardPRocessAPDU entry point.
-
-The certs and keys should be attached to private data associated with one or
-more appropriate applets for that card. Control will come to the card type
-emulators once one of its applets are selected through the VCardProcessAPDU
-function it specified when it created the applet.
-
-The signature of VCardResetApplet is:
-        VCardStatus (*VCardResetApplet) (VCard *card, int channel);
-  This function will reset the any internal applet state that needs to be
-  cleared after a select applet call. It should return VCARD_DONE;
-
-The signature of VCardProcessAPDU is:
-        VCardStatus (*VCardProcessAPDU)(VCard *card, VCardAPDU *apdu,
-                                         VCardResponse **response);
-  This function examines the APDU and determines whether it should process
-  the apdu directly, reject the apdu as invalid, or pass the apdu on to
-  the basic 7816 emulator for processing.
-      If the 7816 emulator should process the apdu, then the VCardProcessAPDU
-  should return VCARD_NEXT.
-      If there is an error, then VCardProcessAPDU should return an error
-  response using vcard_make_response and the appropriate 7816 error code
-  (see card_7816t.h) or vcard_make_response with a card type specific error
-  code. It should then return VCARD_DONE.
-      If the apdu can be processed correctly, VCardProcessAPDU should do so,
-  set the response value appropriately for that APDU, and return VCARD_DONE.
-  VCardProcessAPDU should always set the response if it returns VCARD_DONE.
-  It should always either return VCARD_DONE or VCARD_NEXT.
-
-Parsing the APDU --
-
-Prior to processing calling the card type emulator's VCardProcessAPDU function, the emulator has already decoded the APDU header and set several fields:
-
-   apdu->a_data - The raw apdu data bytes.
-   apdu->a_len  - The len of the raw apdu data.
-   apdu->a_body - The start of any post header parameter data.
-   apdu->a_Lc   - The parameter length value.
-   apdu->a_Le   - The expected length of any returned data.
-   apdu->a_cla  - The raw apdu class.
-   apdu->a_channel - The channel (decoded from the class).
-   apdu->a_secure_messaging_type - The decoded secure messaging type
-                                   (from class).
-   apdu->a_type - The decode class type.
-   apdu->a_gen_type - the generic class type (7816, PROPRIETARY, RFU, PTS).
-   apdu->a_ins  - The instruction byte.
-   apdu->a_p1   - Parameter 1.
-   apdu->a_p2   - Parameter 2.
-
-Creating a Response --
-
-The expected result of any APDU call is a response. The card type emulator must
-set *response with an appropriate VCardResponse value if it returns VCARD_DONE.
-Responses could be as simple as returning a 2 byte status word response, to as
-complex as returning a block of data along with a 2 byte response. Which is
-returned will depend on the semantics of the APDU. The following functions will
-create card responses.
-
-        VCardResponse *vcard_make_response(VCard7816Status status);
-
-    This is the most basic function to get a response. This function will
-    return a response the consists solely one 2 byte status code. If that status
-    code is defined in card_7816t.h, then this function is guaranteed to
-    return a response with that status. If a cart type specific status code
-    is passed and vcard_make_response fails to allocate the appropriate memory
-    for that response, then vcard_make_response will return a VCardResponse
-    of VCARD7816_STATUS_EXC_ERROR_MEMORY. In any case, this function is
-    guaranteed to return a valid VCardResponse.
-
-        VCardResponse *vcard_response_new(unsigned char *buf, int len,
-                                          VCard7816Status status);
-
-    This function is similar to vcard_make_response except it includes some
-    returned data with the response. It could also fail to allocate enough
-    memory, in which case it will return NULL.
-
-        VCardResponse *vcard_response_new_status_bytes(unsigned char sw1,
-                                                       unsigned char sw2);
-
-    Sometimes in 7816 the response bytes are treated as two separate bytes with
-    split meanings. This function allows you to create a response based on
-    two separate bytes. This function could fail, in which case it will return
-    NULL.
-
-       VCardResponse *vcard_response_new_bytes(unsigned char *buf, int len,
-                                               unsigned char sw1,
-                                               unsigned char sw2);
-
-    This function is the same as vcard_response_new except you may specify
-    the status as two separate bytes like vcard_response_new_status_bytes.
-
-
-Implementing functionality ---
-
-The following helper functions access information about the current card
-and applet.
-
-        VCARDAppletPrivate *vcard_get_current_applet_private(VCard *card,
-                                                             int channel);
-
-    This function returns any private data set by the card type emulator on
-    the currently selected applet. The card type emulator keeps track of the
-    current applet state in this data structure. Any certs and keys associated
-    with a particular applet is also stored here.
-
-        int vcard_emul_get_login_count(VCard *card);
-
-    This function returns the number of remaining login attempts for this
-    card. If the card emulator does not know, or the card does not have a
-    way of giving this information, this function returns -1.
-
-
-         VCard7816Status vcard_emul_login(VCard *card, unsigned char *pin,
-                                          int pin_len);
-
-    This function logs into the card and returns the standard 7816 status
-    word depending on the success or failure of the call.
-
-         void vcard_emul_delete_key(VCardKey *key);
-
-     This function frees the VCardKey passed in to xxxx_card_init. The card
-     type emulator is responsible for freeing this key when it no longer needs
-     it.
-
-         VCard7816Status vcard_emul_rsa_op(VCard *card, VCardKey *key,
-                                           unsigned char *buffer,
-                                           int buffer_size);
-
-     This function does a raw rsa op on the buffer with the given key.
-
-The sample card type emulator is found in cac.c. It implements the cac specific
-applets.  Only those applets needed by the coolkey pkcs#11 driver on the guest
-have been implemented. To support the full range CAC middleware, a complete CAC
-card according to the CAC specs should be implemented here.
-
------------------------------
-Virtual Card Emulator
-
-This code accesses both real smart cards and simulated smart cards through
-services provided on the client. The current implementation uses NSS, which
-already knows how to talk to various PKCS #11 modules on the client, and is
-portable to most operating systems. A particular emulator can have only one
-virtual card implementation at a time.
-
-The virtual card emulator consists of a series of virtual card services. In
-addition to the services describe above (services starting with
-vcard_emul_xxxx), the virtual card emulator also provides the following
-functions:
-
-    VCardEmulError vcard_emul_init(cont VCardEmulOptions *options);
-
-  The options structure is built by another function in the virtual card
-  interface where a string of virtual card emulator specific strings are
-  mapped to the options. The actual structure is defined by the virtual card
-  emulator and is used to determine the configuration of soft cards, or to
-  determine which physical cards to present to the guest.
-
-  The vcard_emul_init function will build up sets of readers, create any
-  threads that are needed to watch for changes in the reader state. If readers
-  have cards present in them, they are also initialized.
-
-  Readers are created with the function.
-
-          VReader *vreader_new(VReaderEmul *reader_emul,
-                               VReaderEmulFree reader_emul_free);
-
-      The freeFunc is used to free the VReaderEmul * when the reader is
-      destroyed.  The VReaderEmul structure is an opaque structure to the
-      rest of the code, but defined by the virtual card emulator, which can
-      use it to store any reader specific state.
-
-  Once the reader has been created, it can be added to the front end with the
-  call:
-
-           VReaderStatus vreader_add_reader(VReader *reader);
-
-      This function will automatically generate the appropriate new reader
-      events and add the reader to the list.
-
-  To create a new card, the virtual card emulator will call a similar
-  function.
-
-           VCard *vcard_new(VCardEmul *card_emul,
-                            VCardEmulFree card_emul_free);
-
-      Like vreader_new, this function takes a virtual card emulator specific
-      structure which it uses to keep track of the card state.
-
-  Once the card is created, it is attached to a card type emulator with the
-  following function:
-
-            VCardStatus vcard_init(VCard *vcard, VCardEmulType type,
-                                   const char *flags,
-                                   unsigned char *const *certs,
-                                   int *cert_len,
-                                   VCardKey *key[],
-                                   int cert_count);
-
-      The vcard is the value returned from vcard_new. The type is the
-      card type emulator that this card should presented to the guest as.
-      The flags are card type emulator specific options. The certs,
-      cert_len, and keys are all arrays of length cert_count. These are
-      the same of the parameters xxxx_card_init() accepts.
-
-   Finally the card is associated with its reader by the call:
-
-            VReaderStatus vreader_insert_card(VReader *vreader, VCard *vcard);
-
-      This function, like vreader_add_reader, will take care of any event
-      notification for the card insert.
-
-
-    VCardEmulError vcard_emul_force_card_remove(VReader *vreader);
-
-  Force a card that is present to appear to be removed to the guest, even if
-  that card is a physical card and is present.
-
-
-    VCardEmulError vcard_emul_force_card_insert(VReader *reader);
-
-  Force a card that has been removed by vcard_emul_force_card_remove to be
-  reinserted from the point of view of the guest. This will only work if the
-  card is physically present (which is always true fro a soft card).
-
-     void vcard_emul_get_atr(Vcard *card, unsigned char *atr, int *atr_len);
-
-  Return the virtual ATR for the card. By convention this should be the value
-  VCARD_ATR_PREFIX(size) followed by several ascii bytes related to this
-  particular emulator. For instance the NSS emulator returns
-  {VCARD_ATR_PREFIX(3), 'N', 'S', 'S' }. Do ot return more data then *atr_len;
-
-     void vcard_emul_reset(VCard *card, VCardPower power)
-
-   Set the state of 'card' to the current power level and reset its internal
-   state (logout, etc).
-
-------------------------------------------------------
-List of files and their function:
-README - This file
-card_7816.c - emulate basic 7816 functionality. Parse APDUs.
-card_7816.h - apdu and response services definitions.
-card_7816t.h - 7816 specific structures, types and definitions.
-event.c - event handling code.
-event.h - event handling services definitions.
-eventt.h - event handling structures and types
-vcard.c - handle common virtual card services like creation, destruction, and
-          applet management.
-vcard.h - common virtual card services function definitions.
-vcardt.h - comon virtual card types
-vreader.c - common virtual reader services.
-vreader.h - common virtual reader services definitions.
-vreadert.h - comon virtual reader types.
-vcard_emul_type.c - manage the card type emulators.
-vcard_emul_type.h - definitions for card type emulators.
-cac.c - card type emulator for CAC cards
-vcard_emul.h - virtual card emulator service definitions.
-vcard_emul_nss.c - virtual card emulator implementation for nss.
-vscclient.c - socket connection to guest qemu usb driver.
-vscard_common.h - common header with the guest qemu usb driver.
-mutex.h - header file for machine independent mutexes.
-link_test.c - static test to make sure all the symbols are properly defined.
--- a/docs/qapi-code-gen.txt
+++ b/docs/qapi-code-gen.txt
@@ -111,10 +111,7 @@ and field names within a type, should be all lower case with words
 separated by a hyphen.  However, some existing older commands and
 complex types use underscore; when extending such expressions,
 consistency is preferred over blindly avoiding underscore.  Event
-names should be ALL_CAPS with words separated by underscore.  The
-special string '**' appears for some commands that manually perform
-their own type checking rather than relying on the type-safe code
-produced by the qapi code generators.
+names should be ALL_CAPS with words separated by underscore.

 Any name (command, event, type, field, or enum value) beginning with
 "x-" is marked experimental, and may be withdrawn or changed
@@ -140,17 +137,25 @@ must have a value that forms a struct name.

 === Built-in Types ===

-The following types are built-in to the parser:
-  'str' - arbitrary UTF-8 string
-  'int' - 64-bit signed integer (although the C code may place further
-          restrictions on acceptable range)
-  'number' - floating point number
-  'bool' - JSON value of true or false
-  'int8', 'int16', 'int32', 'int64' - like 'int', but enforce maximum
-                                      bit size
-  'uint8', 'uint16', 'uint32', 'uint64' - unsigned counterparts
-  'size' - like 'uint64', but allows scaled suffix from command line
-           visitor
+The following types are predefined, and map to C as follows:
+
+  Schema    C          JSON
+  str       char *     any JSON string, UTF-8
+  number    double     any JSON number
+  int       int64_t    a JSON number without fractional part
+                       that fits into the C integer type
+  int8      int8_t     likewise
+  int16     int16_t    likewise
+  int32     int32_t    likewise
+  int64     int64_t    likewise
+  uint8     uint8_t    likewise
+  uint16    uint16_t   likewise
+  uint32    uint32_t   likewise
+  uint64    uint64_t   likewise
+  size      uint64_t   like uint64_t, except StringInputVisitor
+                       accepts size suffixes
+  bool      bool       JSON true or false
+  any       QObject *  any JSON value


 === Includes ===
@@ -236,6 +241,7 @@ both fields like this:
 === Enumeration types ===

 Usage: { 'enum': STRING, 'data': ARRAY-OF-STRING }
+       { 'enum': STRING, '*prefix': STRING, 'data': ARRAY-OF-STRING }

 An enumeration type is a dictionary containing a single 'data' key
 whose value is a list of strings.  An example enumeration is:
@@ -247,6 +253,13 @@ useful.  The list of strings should be lower case; if an enum name
 represents multiple words, use '-' between words.  The string 'max' is
 not allowed as an enum value, and values should not be repeated.

+The enum constants will be named by using a heuristic to turn the
+type name into a set of underscore separated words. For the example
+above, 'MyEnum' will turn into 'MY_ENUM' giving a constant name
+of 'MY_ENUM_VALUE1' for the first value. If the default heuristic
+does not result in a desirable name, the optional 'prefix' field
+can be used when defining the enum.
+
 The enumeration values are passed as strings over the Client JSON
 Protocol, but are encoded as C enum integral values in generated code.
 While the C code starts numbering at 0, it is better to use explicit
@@ -445,17 +458,14 @@ which would validate this Client JSON Protocol transaction:
 <= { "return": [ { "value": "one" }, { } ] }

 In rare cases, QAPI cannot express a type-safe representation of a
-corresponding Client JSON Protocol command.  In these cases, if the
-command expression includes the key 'gen' with boolean value false,
-then the 'data' or 'returns' member that intends to bypass generated
-type-safety and do its own manual validation should use an inline
-dictionary definition, with a value of '**' rather than a valid type
-name for the keys that the generated code will not validate.  Please
-try to avoid adding new commands that rely on this, and instead use
-type-safe unions.  For an example of bypass usage:
+corresponding Client JSON Protocol command.  You then have to suppress
+generation of a marshalling function by including a key 'gen' with
+boolean value false, and instead write your own function.  Please try
+to avoid adding new commands that rely on this, and instead use
+type-safe unions.  For an example of this usage:

 { 'command': 'netdev_add',
-   'data': {'type': 'str', 'id': 'str', '*props': '**'},
+   'data': {'type': 'str', 'id': 'str'},
   'gen': false }

 Normally, the QAPI schema is used to describe synchronous exchanges,
@@ -492,13 +502,204 @@ Resulting in this JSON object:
  "timestamp": { "seconds": 1267020223, "microseconds": 435656 } }


+== Client JSON Protocol introspection ==
+
+Clients of a Client JSON Protocol commonly need to figure out what
+exactly the server (QEMU) supports.
+
+For this purpose, QMP provides introspection via command
+query-qmp-schema.  QGA currently doesn't support introspection.
+
+query-qmp-schema returns a JSON array of SchemaInfo objects.  These
+objects together describe the wire ABI, as defined in the QAPI schema.
+
+However, the SchemaInfo can't reflect all the rules and restrictions
+that apply to QMP.  It's interface introspection (figuring out what's
+there), not interface specification.  The specification is in the QAPI
+schema.  To understand how QMP is to be used, you need to study the
+QAPI schema.
+
+Like any other command, query-qmp-schema is itself defined in the QAPI
+schema, along with the SchemaInfo type.  This text attempts to give an
+overview how things work.  For details you need to consult the QAPI
+schema.
+
+SchemaInfo objects have common members "name" and "meta-type", and
+additional variant members depending on the value of meta-type.
+
+Each SchemaInfo object describes a wire ABI entity of a certain
+meta-type: a command, event or one of several kinds of type.
+
+SchemaInfo for commands and events have the same name as in the QAPI
+schema.
+
+Command and event names are part of the wire ABI, but type names are
+not.  Therefore, the SchemaInfo for types have auto-generated
+meaningless names.  For readability, the examples in this section use
+meaningful type names instead.
+
+To examine a type, start with a command or event using it, then follow
+references by name.
+
+QAPI schema definitions not reachable that way are omitted.
+
+The SchemaInfo for a command has meta-type "command", and variant
+members "arg-type" and "ret-type".  On the wire, the "arguments"
+member of a client's "execute" command must conform to the object type
+named by "arg-type".  The "return" member that the server passes in a
+success response conforms to the type named by "ret-type".
+
+If the command takes no arguments, "arg-type" names an object type
+without members.  Likewise, if the command returns nothing, "ret-type"
+names an object type without members.
+
+Example: the SchemaInfo for command query-qmp-schema
+
+    { "name": "query-qmp-schema", "meta-type": "command",
+      "arg-type": ":empty", "ret-type": "SchemaInfoList" }
+
+    Type ":empty" is an object type without members, and type
+    "SchemaInfoList" is the array of SchemaInfo type.
+
+The SchemaInfo for an event has meta-type "event", and variant member
+"arg-type".  On the wire, a "data" member that the server passes in an
+event conforms to the object type named by "arg-type".
+
+If the event carries no additional information, "arg-type" names an
+object type without members.  The event may not have a data member on
+the wire then.
+
+Each command or event defined with dictionary-valued 'data' in the
+QAPI schema implicitly defines an object type.
+
+Example: the SchemaInfo for EVENT_C from section Events
+
+    { "name": "EVENT_C", "meta-type": "event",
+      "arg-type": ":obj-EVENT_C-arg" }
+
+    Type ":obj-EVENT_C-arg" is an implicitly defined object type with
+    the two members from the event's definition.
+
+The SchemaInfo for struct and union types has meta-type "object".
+
+The SchemaInfo for a struct type has variant member "members".
+
+The SchemaInfo for a union type additionally has variant members "tag"
+and "variants".
+
+"members" is a JSON array describing the object's common members, if
+any.  Each element is a JSON object with members "name" (the member's
+name), "type" (the name of its type), and optionally "default".  The
+member is optional if "default" is present.  Currently, "default" can
+only have value null.  Other values are reserved for future
+extensions.
+
+Example: the SchemaInfo for MyType from section Struct types
+
+    { "name": "MyType", "meta-type": "object",
+      "members": [
+          { "name": "member1", "type": "str" },
+          { "name": "member2", "type": "int" },
+          { "name": "member3", "type": "str", "default": null } ] }
+
+"tag" is the name of the common member serving as type tag.
+"variants" is a JSON array describing the object's variant members.
+Each element is a JSON object with members "case" (the value of type
+tag this element applies to) and "type" (the name of an object type
+that provides the variant members for this type tag value).
+
+Example: the SchemaInfo for flat union BlockdevOptions from section
+Union types
+
+    { "name": "BlockdevOptions", "meta-type": "object",
+      "members": [
+          { "name": "driver", "type": "BlockdevDriver" },
+          { "name": "readonly", "type": "bool"} ],
+      "tag": "driver",
+      "variants": [
+          { "case": "file", "type": "FileOptions" },
+          { "case": "qcow2", "type": "Qcow2Options" } ] }
+
+Note that base types are "flattened": its members are included in the
+"members" array.
+
+A simple union implicitly defines an enumeration type for its implicit
+discriminator (called "type" on the wire, see section Union types).
+
+A simple union implicitly defines an object type for each of its
+variants.
+
+Example: the SchemaInfo for simple union BlockdevOptions from section
+Union types
+
+    { "name": "BlockdevOptions", "meta-type": "object",
+      "members": [
+          { "name": "kind", "type": "BlockdevOptionsKind" } ],
+      "tag": "type",
+      "variants": [
+          { "case": "file", "type": ":obj-FileOptions-wrapper" },
+          { "case": "qcow2", "type": ":obj-Qcow2Options-wrapper" } ] }
+
+    Enumeration type "BlockdevOptionsKind" and the object types
+    ":obj-FileOptions-wrapper", ":obj-Qcow2Options-wrapper" are
+    implicitly defined.
+
+The SchemaInfo for an alternate type has meta-type "alternate", and
+variant member "members".  "members" is a JSON array.  Each element is
+a JSON object with member "type", which names a type.  Values of the
+alternate type conform to exactly one of its member types.
+
+Example: the SchemaInfo for BlockRef from section Alternate types
+
+    { "name": "BlockRef", "meta-type": "alternate",
+      "members": [
+          { "type": "BlockdevOptions" },
+          { "type": "str" } ] }
+
+The SchemaInfo for an array type has meta-type "array", and variant
+member "element-type", which names the array's element type.  Array
+types are implicitly defined.
+
+Example: the SchemaInfo for ['str']
+
+    { "name": "strList", "meta-type": "array",
+      "element-type": "str" }
+
+The SchemaInfo for an enumeration type has meta-type "enum" and
+variant member "values".
+
+Example: the SchemaInfo for MyEnum from section Enumeration types
+
+    { "name": "MyEnum", "meta-type": "enum",
+      "values": [ "value1", "value2", "value3" ] }
+
+The SchemaInfo for a built-in type has the same name as the type in
+the QAPI schema (see section Built-in Types), with one exception
+detailed below.  It has variant member "json-type" that shows how
+values of this type are encoded on the wire.
+
+Example: the SchemaInfo for str
+
+    { "name": "str", "meta-type": "builtin", "json-type": "string" }
+
+The QAPI schema supports a number of integer types that only differ in
+how they map to C.  They are identical as far as SchemaInfo is
+concerned.  Therefore, they get all mapped to a single type "int" in
+SchemaInfo.
+
+As explained above, type names are not part of the wire ABI.  Not even
+the names of built-in types.  Clients should examine member
+"json-type" instead of hard-coding names of built-in types.
+
+
 == Code generation ==

-Schemas are fed into 3 scripts to generate all the code/files that, paired
-with the core QAPI libraries, comprise everything required to take JSON
-commands read in by a Client JSON Protocol server, unmarshal the arguments into
-the underlying C types, call into the corresponding C function, and map the
-response back to a Client JSON Protocol response to be returned to the user.
+Schemas are fed into four scripts to generate all the code/files that,
+paired with the core QAPI libraries, comprise everything required to
+take JSON commands read in by a Client JSON Protocol server, unmarshal
+the arguments into the underlying C types, call into the corresponding
+C function, and map the response back to a Client JSON Protocol
+response to be returned to the user.

 As an example, we'll use the following schema, which describes a single
 complex user-defined type (which will produce a C struct, along with a list
@@ -537,35 +738,34 @@ Example:
    $ cat qapi-generated/example-qapi-types.c
 [Uninteresting stuff omitted...]

-    void qapi_free_UserDefOneList(UserDefOneList *obj)
-    {
-        QapiDeallocVisitor *md;
-        Visitor *v;
-
-        if (!obj) {
-            return;
-        }
-
-        md = qapi_dealloc_visitor_new();
-        v = qapi_dealloc_get_visitor(md);
-        visit_type_UserDefOneList(v, &obj, NULL, NULL);
-        qapi_dealloc_visitor_cleanup(md);
-    }
-
-
    void qapi_free_UserDefOne(UserDefOne *obj)
    {
-        QapiDeallocVisitor *md;
+        QapiDeallocVisitor *qdv;
        Visitor *v;

        if (!obj) {
            return;
        }

-        md = qapi_dealloc_visitor_new();
-        v = qapi_dealloc_get_visitor(md);
+        qdv = qapi_dealloc_visitor_new();
+        v = qapi_dealloc_get_visitor(qdv);
        visit_type_UserDefOne(v, &obj, NULL, NULL);
-        qapi_dealloc_visitor_cleanup(md);
+        qapi_dealloc_visitor_cleanup(qdv);
+    }
+
+    void qapi_free_UserDefOneList(UserDefOneList *obj)
+    {
+        QapiDeallocVisitor *qdv;
+        Visitor *v;
+
+        if (!obj) {
+            return;
+        }
+
+        qdv = qapi_dealloc_visitor_new();
+        v = qapi_dealloc_get_visitor(qdv);
+        visit_type_UserDefOneList(v, &obj, NULL, NULL);
+        qapi_dealloc_visitor_cleanup(qdv);
    }
    $ cat qapi-generated/example-qapi-types.h
 [Uninteresting stuff omitted...]
@@ -577,25 +777,25 @@ Example:

    typedef struct UserDefOne UserDefOne;

-    typedef struct UserDefOneList {
-        union {
-            UserDefOne *value;
-            uint64_t padding;
-        };
-        struct UserDefOneList *next;
-    } UserDefOneList;
-
-
-[Functions on built-in types omitted...]
+    typedef struct UserDefOneList UserDefOneList;

    struct UserDefOne {
        int64_t integer;
        char *string;
    };

-    void qapi_free_UserDefOneList(UserDefOneList *obj);
    void qapi_free_UserDefOne(UserDefOne *obj);

+    struct UserDefOneList {
+        union {
+            UserDefOne *value;
+            uint64_t padding;
+        };
+        UserDefOneList *next;
+    };
+
+    void qapi_free_UserDefOneList(UserDefOneList *obj);
+
    #endif

 === scripts/qapi-visit.py ===
@@ -623,15 +823,15 @@ Example:
    $ cat qapi-generated/example-qapi-visit.c
 [Uninteresting stuff omitted...]

-    static void visit_type_UserDefOne_fields(Visitor *m, UserDefOne **obj, Error **errp)
+    static void visit_type_UserDefOne_fields(Visitor *v, UserDefOne **obj, Error **errp)
    {
        Error *err = NULL;

-        visit_type_int(m, &(*obj)->integer, "integer", &err);
+        visit_type_int(v, &(*obj)->integer, "integer", &err);
        if (err) {
            goto out;
        }
-        visit_type_str(m, &(*obj)->string, "string", &err);
+        visit_type_str(v, &(*obj)->string, "string", &err);
        if (err) {
            goto out;
        }
@@ -640,40 +840,40 @@ Example:
        error_propagate(errp, err);
    }

-    void visit_type_UserDefOne(Visitor *m, UserDefOne **obj, const char *name, Error **errp)
+    void visit_type_UserDefOne(Visitor *v, UserDefOne **obj, const char *name, Error **errp)
    {
        Error *err = NULL;

-        visit_start_struct(m, (void **)obj, "UserDefOne", name, sizeof(UserDefOne), &err);
+        visit_start_struct(v, (void **)obj, "UserDefOne", name, sizeof(UserDefOne), &err);
        if (!err) {
            if (*obj) {
-                visit_type_UserDefOne_fields(m, obj, errp);
+                visit_type_UserDefOne_fields(v, obj, errp);
            }
-            visit_end_struct(m, &err);
+            visit_end_struct(v, &err);
        }
        error_propagate(errp, err);
    }

-    void visit_type_UserDefOneList(Visitor *m, UserDefOneList **obj, const char *name, Error **errp)
+    void visit_type_UserDefOneList(Visitor *v, UserDefOneList **obj, const char *name, Error **errp)
    {
        Error *err = NULL;
        GenericList *i, **prev;

-        visit_start_list(m, name, &err);
+        visit_start_list(v, name, &err);
        if (err) {
            goto out;
        }

        for (prev = (GenericList **)obj;
-             !err && (i = visit_next_list(m, prev, &err)) != NULL;
+             !err && (i = visit_next_list(v, prev, &err)) != NULL;
             prev = &i) {
            UserDefOneList *native_i = (UserDefOneList *)i;
-            visit_type_UserDefOne(m, &native_i->value, NULL, &err);
+            visit_type_UserDefOne(v, &native_i->value, NULL, &err);
        }

        error_propagate(errp, err);
        err = NULL;
-        visit_end_list(m, &err);
+        visit_end_list(v, &err);
    out:
        error_propagate(errp, err);
    }
@@ -685,8 +885,8 @@ Example:

 [Visitors for built-in types omitted...]

-    void visit_type_UserDefOne(Visitor *m, UserDefOne **obj, const char *name, Error **errp);
-    void visit_type_UserDefOneList(Visitor *m, UserDefOneList **obj, const char *name, Error **errp);
+    void visit_type_UserDefOne(Visitor *v, UserDefOne **obj, const char *name, Error **errp);
+    void visit_type_UserDefOneList(Visitor *v, UserDefOneList **obj, const char *name, Error **errp);

    #endif

@@ -714,63 +914,63 @@ Example:
    $ cat qapi-generated/example-qmp-marshal.c
 [Uninteresting stuff omitted...]

-    static void qmp_marshal_output_my_command(UserDefOne *ret_in, QObject **ret_out, Error **errp)
+    static void qmp_marshal_output_UserDefOne(UserDefOne *ret_in, QObject **ret_out, Error **errp)
    {
-        Error *local_err = NULL;
-        QmpOutputVisitor *mo = qmp_output_visitor_new();
-        QapiDeallocVisitor *md;
+        Error *err = NULL;
+        QmpOutputVisitor *qov = qmp_output_visitor_new();
+        QapiDeallocVisitor *qdv;
        Visitor *v;

-        v = qmp_output_get_visitor(mo);
-        visit_type_UserDefOne(v, &ret_in, "unused", &local_err);
-        if (local_err) {
+        v = qmp_output_get_visitor(qov);
+        visit_type_UserDefOne(v, &ret_in, "unused", &err);
+        if (err) {
            goto out;
        }
-        *ret_out = qmp_output_get_qobject(mo);
+        *ret_out = qmp_output_get_qobject(qov);

    out:
-        error_propagate(errp, local_err);
-        qmp_output_visitor_cleanup(mo);
-        md = qapi_dealloc_visitor_new();
-        v = qapi_dealloc_get_visitor(md);
+        error_propagate(errp, err);
+        qmp_output_visitor_cleanup(qov);
+        qdv = qapi_dealloc_visitor_new();
+        v = qapi_dealloc_get_visitor(qdv);
        visit_type_UserDefOne(v, &ret_in, "unused", NULL);
-        qapi_dealloc_visitor_cleanup(md);
+        qapi_dealloc_visitor_cleanup(qdv);
    }

-    static void qmp_marshal_input_my_command(QDict *args, QObject **ret, Error **errp)
+    static void qmp_marshal_my_command(QDict *args, QObject **ret, Error **errp)
    {
-        Error *local_err = NULL;
+        Error *err = NULL;
        UserDefOne *retval;
-        QmpInputVisitor *mi = qmp_input_visitor_new_strict(QOBJECT(args));
-        QapiDeallocVisitor *md;
+        QmpInputVisitor *qiv = qmp_input_visitor_new_strict(QOBJECT(args));
+        QapiDeallocVisitor *qdv;
        Visitor *v;
        UserDefOne *arg1 = NULL;

-        v = qmp_input_get_visitor(mi);
-        visit_type_UserDefOne(v, &arg1, "arg1", &local_err);
-        if (local_err) {
+        v = qmp_input_get_visitor(qiv);
+        visit_type_UserDefOne(v, &arg1, "arg1", &err);
+        if (err) {
            goto out;
        }

-        retval = qmp_my_command(arg1, &local_err);
-        if (local_err) {
+        retval = qmp_my_command(arg1, &err);
+        if (err) {
            goto out;
        }

-        qmp_marshal_output_my_command(retval, ret, &local_err);
+        qmp_marshal_output_UserDefOne(retval, ret, &err);

    out:
-        error_propagate(errp, local_err);
-        qmp_input_visitor_cleanup(mi);
-        md = qapi_dealloc_visitor_new();
-        v = qapi_dealloc_get_visitor(md);
+        error_propagate(errp, err);
+        qmp_input_visitor_cleanup(qiv);
+        qdv = qapi_dealloc_visitor_new();
+        v = qapi_dealloc_get_visitor(qdv);
        visit_type_UserDefOne(v, &arg1, "arg1", NULL);
-        qapi_dealloc_visitor_cleanup(md);
+        qapi_dealloc_visitor_cleanup(qdv);
    }

    static void qmp_init_marshal(void)
    {
-        qmp_register_command("my-command", qmp_marshal_input_my_command, QCO_NO_OPTIONS);
+        qmp_register_command("my-command", qmp_marshal_my_command, QCO_NO_OPTIONS);
    }

    qapi_init(qmp_init_marshal);
@@ -807,7 +1007,7 @@ Example:
    void qapi_event_send_my_event(Error **errp)
    {
        QDict *qmp;
-        Error *local_err = NULL;
+        Error *err = NULL;
        QMPEventFuncEmit emit;
        emit = qmp_event_get_func_emit();
        if (!emit) {
@@ -816,15 +1016,15 @@ Example:

        qmp = qmp_event_build_dict("MY_EVENT");

-        emit(EXAMPLE_QAPI_EVENT_MY_EVENT, qmp, &local_err);
+        emit(EXAMPLE_QAPI_EVENT_MY_EVENT, qmp, &err);

-        error_propagate(errp, local_err);
+        error_propagate(errp, err);
        QDECREF(qmp);
    }

-    const char *example_QAPIEvent_lookup[] = {
-        "MY_EVENT",
-        NULL,
+    const char *const example_QAPIEvent_lookup[] = {
+        [EXAMPLE_QAPI_EVENT_MY_EVENT] = "MY_EVENT",
+        [EXAMPLE_QAPI_EVENT_MAX] = NULL,
    };
    $ cat qapi-generated/example-qapi-event.h
 [Uninteresting stuff omitted...]
@@ -839,10 +1039,45 @@ Example:

    void qapi_event_send_my_event(Error **errp);

-    extern const char *example_QAPIEvent_lookup[];
    typedef enum example_QAPIEvent {
        EXAMPLE_QAPI_EVENT_MY_EVENT = 0,
        EXAMPLE_QAPI_EVENT_MAX = 1,
    } example_QAPIEvent;

+    extern const char *const example_QAPIEvent_lookup[];
+
+    #endif
+
+=== scripts/qapi-introspect.py ===
+
+Used to generate the introspection C code for a schema. The following
+files are created:
+
+$(prefix)qmp-introspect.c - Defines a string holding a JSON
+                            description of the schema.
+$(prefix)qmp-introspect.h - Declares the above string.
+
+Example:
+
+    $ python scripts/qapi-introspect.py --output-dir="qapi-generated"
+    --prefix="example-" example-schema.json
+    $ cat qapi-generated/example-qmp-introspect.c
+[Uninteresting stuff omitted...]
+
+    const char example_qmp_schema_json[] = "["
+        "{\"arg-type\": \"0\", \"meta-type\": \"event\", \"name\": \"MY_EVENT\"}, "
+        "{\"arg-type\": \"1\", \"meta-type\": \"command\", \"name\": \"my-command\", \"ret-type\": \"2\"}, "
+        "{\"members\": [], \"meta-type\": \"object\", \"name\": \"0\"}, "
+        "{\"members\": [{\"name\": \"arg1\", \"type\": \"2\"}], \"meta-type\": \"object\", \"name\": \"1\"}, "
+        "{\"members\": [{\"name\": \"integer\", \"type\": \"int\"}, {\"name\": \"string\", \"type\": \"str\"}], \"meta-type\": \"object\", \"name\": \"2\"}, "
+        "{\"json-type\": \"int\", \"meta-type\": \"builtin\", \"name\": \"int\"}, "
+        "{\"json-type\": \"string\", \"meta-type\": \"builtin\", \"name\": \"str\"}]";
+    $ cat qapi-generated/example-qmp-introspect.h
+[Uninteresting stuff omitted...]
+
+    #ifndef EXAMPLE_QMP_INTROSPECT_H
+    #define EXAMPLE_QMP_INTROSPECT_H
+
+    extern const char example_qmp_schema_json[];
+
    #endif
--- a/docs/qmp/qmp-events.txt
+++ b/docs/qmp/qmp-events.txt
--- a/docs/qmp-intro.txt
+++ b/docs/qmp-intro.txt
--- a/docs/qmp/qmp-spec.txt
+++ b/docs/qmp/qmp-spec.txt
--- a/docs/rcu.txt
+++ b/docs/rcu.txt
@@ -128,7 +128,7 @@ The core RCU API is small:
        the callback function is g_free, in particular, g_free_rcu can be
        used.  In the above case, one could have written simply:

-            g_free_rcu(foo_reclaim, rcu);
+            g_free_rcu(&foo, rcu);

     typeof(*p) atomic_rcu_read(p);

--- a/docs/specs/ppc-spapr-hotplug.txt
+++ b/docs/specs/ppc-spapr-hotplug.txt
@@ -302,4 +302,52 @@ consisting of <phys>, <size> and <maxcpus>.
 pseries guests use this property to note the maximum allowed CPUs for the
 guest.

+== ibm,dynamic-reconfiguration-memory ==
+
+ibm,dynamic-reconfiguration-memory is a device tree node that represents
+dynamically reconfigurable logical memory blocks (LMB). This node
+is generated only when the guest advertises the support for it via
+ibm,client-architecture-support call. Memory that is not dynamically
+reconfigurable is represented by /memory nodes. The properties of this
+node that are of interest to the sPAPR memory hotplug implementation
+in QEMU are described here.
+
+ibm,lmb-size
+
+This 64bit integer defines the size of each dynamically reconfigurable LMB.
+
+ibm,associativity-lookup-arrays
+
+This property defines a lookup array in which the NUMA associativity
+information for each LMB can be found. It is a property encoded array
+that begins with an integer M, the number of associativity lists followed
+by an integer N, the number of entries per associativity list and terminated
+by M associativity lists each of length N integers.
+
+This property provides the same information as given by ibm,associativity
+property in a /memory node. Each assigned LMB has an index value between
+0 and M-1 which is used as an index into this table to select which
+associativity list to use for the LMB. This index value for each LMB
+is defined in ibm,dynamic-memory property.
+
+ibm,dynamic-memory
+
+This property describes the dynamically reconfigurable memory. It is a
+property encoded array that has an integer N, the number of LMBs followed
+by N LMB list entires.
+
+Each LMB list entry consists of the following elements:
+
+- Logical address of the start of the LMB encoded as a 64bit integer. This
+  corresponds to reg property in /memory node.
+- DRC index of the LMB that corresponds to ibm,my-drc-index property
+  in a /memory node.
+- Four bytes reserved for expansion.
+- Associativity list index for the LMB that is used as an index into
+  ibm,associativity-lookup-arrays property described earlier. This
+  is used to retrieve the right associativity list to be used for this
+  LMB.
+- A 32bit flags word. The bit at bit position 0x00000008 defines whether
+  the LMB is assigned to the the partition as of boot time.
+
 [1] http://thread.gmane.org/gmane.linux.ports.ppc.embedded/75350/focus=106867
--- a/docs/specs/vhost-user.txt
+++ b/docs/specs/vhost-user.txt
@@ -113,6 +113,7 @@ message replies. Most of the requests don't require replies. Here is a list of
 the ones that do:

 * VHOST_GET_FEATURES
+ * VHOST_GET_PROTOCOL_FEATURES
 * VHOST_GET_VRING_BASE

 There are several messages that the master sends with file descriptors passed
@@ -127,6 +128,30 @@ in the ancillary data:
 If Master is unable to send the full message or receives a wrong reply it will
 close the connection. An optional reconnection mechanism can be implemented.

+Any protocol extensions are gated by protocol feature bits,
+which allows full backwards compatibility on both master
+and slave.
+As older slaves don't support negotiating protocol features,
+a feature bit was dedicated for this purpose:
+#define VHOST_USER_F_PROTOCOL_FEATURES 30
+
+Multiple queue support
+----------------------
+
+Multiple queue is treated as a protocol extension, hence the slave has to
+implement protocol features first. The multiple queues feature is supported
+only when the protocol feature VHOST_USER_PROTOCOL_F_MQ (bit 0) is set:
+#define VHOST_USER_PROTOCOL_F_MQ    0
+
+The max number of queues the slave supports can be queried with message
+VHOST_USER_GET_PROTOCOL_FEATURES. Master should stop when the number of
+requested queues is bigger than that.
+
+As all queues share one connection, the master uses a unique index for each
+queue in the sent message to identify a specified queue. One queue pair
+is enabled initially. More queues are enabled dynamically, by sending
+message VHOST_USER_SET_VRING_ENABLE.
+
 Message types
 -------------

@@ -138,6 +163,8 @@ Message types
      Slave payload: u64

      Get from the underlying vhost implementation the features bitmask.
+      Feature bit VHOST_USER_F_PROTOCOL_FEATURES signals slave support for
+      VHOST_USER_GET_PROTOCOL_FEATURES and VHOST_USER_SET_PROTOCOL_FEATURES.

 * VHOST_USER_SET_FEATURES

@@ -146,6 +173,33 @@ Message types
      Master payload: u64

      Enable features in the underlying vhost implementation using a bitmask.
+      Feature bit VHOST_USER_F_PROTOCOL_FEATURES signals slave support for
+      VHOST_USER_GET_PROTOCOL_FEATURES and VHOST_USER_SET_PROTOCOL_FEATURES.
+
+ * VHOST_USER_GET_PROTOCOL_FEATURES
+
+      Id: 15
+      Equivalent ioctl: VHOST_GET_FEATURES
+      Master payload: N/A
+      Slave payload: u64
+
+      Get the protocol feature bitmask from the underlying vhost implementation.
+      Only legal if feature bit VHOST_USER_F_PROTOCOL_FEATURES is present in
+      VHOST_USER_GET_FEATURES.
+      Note: slave that reported VHOST_USER_F_PROTOCOL_FEATURES must support
+      this message even before VHOST_USER_SET_FEATURES was called.
+
+ * VHOST_USER_SET_PROTOCOL_FEATURES
+
+      Id: 16
+      Ioctl: VHOST_SET_FEATURES
+      Master payload: u64
+
+      Enable protocol features in the underlying vhost implementation.
+      Only legal if feature bit VHOST_USER_F_PROTOCOL_FEATURES is present in
+      VHOST_USER_GET_FEATURES.
+      Note: slave that reported VHOST_USER_F_PROTOCOL_FEATURES must support
+      this message even before VHOST_USER_SET_FEATURES was called.

 * VHOST_USER_SET_OWNER

@@ -157,10 +211,10 @@ Message types
      as an owner of the session. This can be used on the Slave as a
      "session start" flag.

- * VHOST_USER_RESET_OWNER
+ * VHOST_USER_RESET_DEVICE

      Id: 4
-      Equivalent ioctl: VHOST_RESET_OWNER
+      Equivalent ioctl: VHOST_RESET_DEVICE
      Master payload: N/A

      Issued when a new connection is about to be closed. The Master will no
@@ -264,3 +318,22 @@ Message types
      Bits (0-7) of the payload contain the vring index. Bit 8 is the
      invalid FD flag. This flag is set when there is no file descriptor
      in the ancillary data.
+
+ * VHOST_USER_GET_QUEUE_NUM
+
+      Id: 17
+      Equivalent ioctl: N/A
+      Master payload: N/A
+      Slave payload: u64
+
+      Query how many queues the backend supports. This request should be
+      sent only when VHOST_USER_PROTOCOL_F_MQ is set in quried protocol
+      features by VHOST_USER_GET_PROTOCOL_FEATURES.
+
+ * VHOST_USER_SET_VRING_ENABLE
+
+      Id: 18
+      Equivalent ioctl: N/A
+      Master payload: vring state description
+
+      Signal slave to enable or disable corresponding vring.
--- a/docs/tracing.txt
+++ b/docs/tracing.txt
@@ -258,11 +258,11 @@ is generated to make use in scripts more convenient.  This step can also be
 performed manually after a build in order to change the binary name in the .stp
 probes:

-    scripts/tracetool --dtrace --stap \
-                      --binary path/to/qemu-binary \
-                      --target-type system \
-                      --target-name x86_64 \
-                      <trace-events >qemu.stp
+    scripts/tracetool.py --backends=dtrace --format=stap \
+                         --binary path/to/qemu-binary \
+                         --target-type system \
+                         --target-name x86_64 \
+                         <trace-events >qemu.stp

 == Trace event properties ==

--- a/docs/win32-qemu-event.promela
+++ b/docs/win32-qemu-event.promela
@@ -0,0 +1,98 @@
+/*
+ * This model describes the implementation of QemuEvent in
+ * util/qemu-thread-win32.c.
+ *
+ * Author: Paolo Bonzini <pbonzini@redhat.com>
+ *
+ * This file is in the public domain.  If you really want a license,
+ * the WTFPL will do.
+ *
+ * To verify it:
+ *     spin -a docs/event.promela
+ *     gcc -O2 pan.c -DSAFETY
+ *     ./a.out
+ */
+
+bool event;
+int value;
+
+/* Primitives for a Win32 event */
+#define RAW_RESET event = false
+#define RAW_SET   event = true
+#define RAW_WAIT  do :: event -> break; od
+
+#if 0
+/* Basic sanity checking: test the Win32 event primitives */
+#define RESET     RAW_RESET
+#define SET       RAW_SET
+#define WAIT      RAW_WAIT
+#else
+/* Full model: layer a userspace-only fast path on top of the RAW_*
+ * primitives.  SET/RESET/WAIT have exactly the same semantics as
+ * RAW_SET/RAW_RESET/RAW_WAIT, but try to avoid invoking them.
+ */
+#define EV_SET 0
+#define EV_FREE 1
+#define EV_BUSY -1
+
+int state = EV_FREE;
+
+int xchg_result;
+#define SET   if :: state != EV_SET ->                                      \
+                    atomic { /* xchg_result=xchg(state, EV_SET) */          \
+                        xchg_result = state;                                \
+                        state = EV_SET;                                     \
+                    }                                                       \
+                    if :: xchg_result == EV_BUSY -> RAW_SET;                \
+                       :: else -> skip;                                     \
+                    fi;                                                     \
+                 :: else -> skip;                                           \
+              fi
+
+#define RESET if :: state == EV_SET -> atomic { state = state | EV_FREE; }  \
+                 :: else            -> skip;                                \
+              fi
+
+int tmp1, tmp2;
+#define WAIT  tmp1 = state;                                                 \
+              if :: tmp1 != EV_SET ->                                       \
+                    if :: tmp1 == EV_FREE ->                                \
+                          RAW_RESET;                                        \
+                          atomic { /* tmp2=cas(state, EV_FREE, EV_BUSY) */  \
+                              tmp2 = state;                                 \
+                              if :: tmp2 == EV_FREE -> state = EV_BUSY;     \
+                                 :: else            -> skip;                \
+                              fi;                                           \
+                          }                                                 \
+                          if :: tmp2 == EV_SET -> tmp1 = EV_SET;            \
+                             :: else           -> tmp1 = EV_BUSY;           \
+                          fi;                                               \
+                       :: else -> skip;                                     \
+                    fi;                                                     \
+                    assert(tmp1 != EV_FREE);                                \
+                    if :: tmp1 == EV_BUSY -> RAW_WAIT;                      \
+                       :: else -> skip;                                     \
+                    fi;                                                     \
+                 :: else -> skip;                                           \
+              fi
+#endif
+
+active proctype waiter()
+{
+     if
+         :: !value ->
+             RESET;
+             if
+                 :: !value -> WAIT;
+                 :: else   -> skip;
+             fi;
+        :: else -> skip;
+    fi;
+    assert(value);
+}
+
+active proctype notifier()
+{
+    value = true;
+    SET;
+}
--- a/docs/writing-qmp-commands.txt
+++ b/docs/writing-qmp-commands.txt
@@ -127,7 +127,7 @@ following at the bottom:
    {
        .name       = "hello-world",
        .args_type  = "",
-        .mhandler.cmd_new = qmp_marshal_input_hello_world,
+        .mhandler.cmd_new = qmp_marshal_hello_world,
    },

 You're done. Now build qemu, run it as suggested in the "Testing" section,
@@ -179,7 +179,7 @@ The last step is to update the qmp-commands.hx file:
    {
        .name       = "hello-world",
        .args_type  = "message:s?",
-        .mhandler.cmd_new = qmp_marshal_input_hello_world,
+        .mhandler.cmd_new = qmp_marshal_hello_world,
    },

 Notice that the "args_type" member got our "message" argument. The character
@@ -461,7 +461,7 @@ The last step is to add the correspoding entry in the qmp-commands.hx file:
    {
        .name       = "query-alarm-clock",
        .args_type  = "",
-        .mhandler.cmd_new = qmp_marshal_input_query_alarm_clock,
+        .mhandler.cmd_new = qmp_marshal_query_alarm_clock,
    },

 Time to test the new command. Build qemu, run it as described in the "Testing"
@@ -607,7 +607,7 @@ To test this you have to add the corresponding qmp-commands.hx entry:
    {
        .name       = "query-alarm-methods",
        .args_type  = "",
-        .mhandler.cmd_new = qmp_marshal_input_query_alarm_methods,
+        .mhandler.cmd_new = qmp_marshal_query_alarm_methods,
    },

 Now Build qemu, run it as explained in the "Testing" section and try our new
--- a/exec.c
+++ b/exec.c
@@ -49,7 +49,6 @@
 #include "exec/cpu-all.h"
 #include "qemu/rcu_queue.h"
 #include "qemu/main-loop.h"
-#include "exec/cputlb.h"
 #include "translate-all.h"

 #include "exec/memory-internal.h"
@@ -85,6 +84,9 @@ static MemoryRegion io_mem_unassigned;
 */
 #define RAM_RESIZEABLE (1 << 2)

+/* An extra page is mapped on top of this RAM.
+ */
+#define RAM_EXTRA (1 << 3)
 #endif

 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
@@ -159,6 +161,21 @@ static void memory_map_init(void);
 static void tcg_commit(MemoryListener *listener);

 static MemoryRegion io_mem_watch;
+
+/**
+ * CPUAddressSpace: all the information a CPU needs about an AddressSpace
+ * @cpu: the CPU whose AddressSpace this is
+ * @as: the AddressSpace itself
+ * @memory_dispatch: its dispatch pointer (cached, RCU protected)
+ * @tcg_as_listener: listener for tracking changes to the AddressSpace
+ */
+struct CPUAddressSpace {
+    CPUState *cpu;
+    AddressSpace *as;
+    struct AddressSpaceDispatch *memory_dispatch;
+    MemoryListener tcg_as_listener;
+};
+
 #endif

 #if !defined(CONFIG_USER_ONLY)
@@ -429,7 +446,7 @@ address_space_translate_for_iotlb(CPUState *cpu, hwaddr addr,
                                  hwaddr *xlat, hwaddr *plen)
 {
    MemoryRegionSection *section;
-    section = address_space_translate_internal(cpu->memory_dispatch,
+    section = address_space_translate_internal(cpu->cpu_ases[0].memory_dispatch,
                                               addr, xlat, plen, false);

    assert(!section->mr->iommu_ops);
@@ -478,6 +495,24 @@ static const VMStateDescription vmstate_cpu_common_exception_index = {
    }
 };

+static bool cpu_common_crash_occurred_needed(void *opaque)
+{
+    CPUState *cpu = opaque;
+
+    return cpu->crash_occurred;
+}
+
+static const VMStateDescription vmstate_cpu_common_crash_occurred = {
+    .name = "cpu_common/crash_occurred",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = cpu_common_crash_occurred_needed,
+    .fields = (VMStateField[]) {
+        VMSTATE_BOOL(crash_occurred, CPUState),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
 const VMStateDescription vmstate_cpu_common = {
    .name = "cpu_common",
    .version_id = 1,
@@ -491,6 +526,7 @@ const VMStateDescription vmstate_cpu_common = {
    },
    .subsections = (const VMStateDescription*[]) {
        &vmstate_cpu_common_exception_index,
+        &vmstate_cpu_common_crash_occurred,
        NULL
    }
 };
@@ -516,13 +552,16 @@ void tcg_cpu_address_space_init(CPUState *cpu, AddressSpace *as)
    /* We only support one address space per cpu at the moment.  */
    assert(cpu->as == as);

-    if (cpu->tcg_as_listener) {
-        memory_listener_unregister(cpu->tcg_as_listener);
-    } else {
-        cpu->tcg_as_listener = g_new0(MemoryListener, 1);
+    if (cpu->cpu_ases) {
+        /* We've already registered the listener for our only AS */
+        return;
    }
-    cpu->tcg_as_listener->commit = tcg_commit;
-    memory_listener_register(cpu->tcg_as_listener, as);
+
+    cpu->cpu_ases = g_new0(CPUAddressSpace, 1);
+    cpu->cpu_ases[0].cpu = cpu;
+    cpu->cpu_ases[0].as = as;
+    cpu->cpu_ases[0].tcg_as_listener.commit = tcg_commit;
+    memory_listener_register(&cpu->cpu_ases[0].tcg_as_listener, as);
 }
 #endif

@@ -580,7 +619,6 @@ void cpu_exec_init(CPUState *cpu, Error **errp)
 #ifndef CONFIG_USER_ONLY
    cpu->as = &address_space_memory;
    cpu->thread_id = qemu_get_thread_id();
-    cpu_reload_memory_map(cpu);
 #endif

 #if defined(CONFIG_USER_ONLY)
@@ -894,6 +932,7 @@ found:

 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
 {
+    CPUState *cpu;
    ram_addr_t start1;
    RAMBlock *block;
    ram_addr_t end;
@@ -905,7 +944,9 @@ static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
    block = qemu_get_ram_block(start);
    assert(block == qemu_get_ram_block(end - 1));
    start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
-    cpu_tlb_reset_dirty_all(start1, length);
+    CPU_FOREACH(cpu) {
+        tlb_reset_dirty(cpu, start1, length);
+    }
    rcu_read_unlock();
 }

@@ -1164,10 +1205,13 @@ static void *file_ram_alloc(RAMBlock *block,
    char *filename;
    char *sanitized_name;
    char *c;
+    void *ptr;
    void *area = NULL;
    int fd;
    uint64_t hpagesize;
+    uint64_t total;
    Error *local_err = NULL;
+    size_t offset;

    hpagesize = gethugepagesize(path, &local_err);
    if (local_err) {
@@ -1211,6 +1255,7 @@ static void *file_ram_alloc(RAMBlock *block,
    g_free(filename);

    memory = ROUND_UP(memory, hpagesize);
+    total = memory + hpagesize;

    /*
     * ftruncate is not supported by hugetlbfs in older
@@ -1222,16 +1267,40 @@ static void *file_ram_alloc(RAMBlock *block,
        perror("ftruncate");
    }

-    area = mmap(0, memory, PROT_READ | PROT_WRITE,
-                (block->flags & RAM_SHARED ? MAP_SHARED : MAP_PRIVATE),
+    ptr = mmap(0, total, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS,
+                -1, 0);
+    if (ptr == MAP_FAILED) {
+        error_setg_errno(errp, errno,
+                         "unable to allocate memory range for hugepages");
+        close(fd);
+        goto error;
+    }
+
+    offset = QEMU_ALIGN_UP((uintptr_t)ptr, hpagesize) - (uintptr_t)ptr;
+
+    area = mmap(ptr + offset, memory, PROT_READ | PROT_WRITE,
+                (block->flags & RAM_SHARED ? MAP_SHARED : MAP_PRIVATE) |
+                MAP_FIXED,
                fd, 0);
    if (area == MAP_FAILED) {
        error_setg_errno(errp, errno,
                         "unable to map backing store for hugepages");
+        munmap(ptr, total);
        close(fd);
        goto error;
    }

+    if (offset > 0) {
+        munmap(ptr, offset);
+    }
+    ptr += offset;
+    total -= offset;
+
+    if (total > memory + getpagesize()) {
+        munmap(ptr + memory + getpagesize(),
+               total - memory - getpagesize());
+    }
+
    if (mem_prealloc) {
        os_mem_prealloc(fd, area, memory);
    }
@@ -1549,6 +1618,7 @@ ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
    new_block->used_length = size;
    new_block->max_length = size;
    new_block->flags = share ? RAM_SHARED : 0;
+    new_block->flags |= RAM_EXTRA;
    new_block->host = file_ram_alloc(new_block, size,
                                     mem_path, errp);
    if (!new_block->host) {
@@ -1650,7 +1720,11 @@ static void reclaim_ramblock(RAMBlock *block)
        xen_invalidate_map_cache_entry(block->host);
 #ifndef _WIN32
    } else if (block->fd >= 0) {
-        munmap(block->host, block->max_length);
+        if (block->flags & RAM_EXTRA) {
+            munmap(block->host, block->max_length + getpagesize());
+        } else {
+            munmap(block->host, block->max_length);
+        }
        close(block->fd);
 #endif
    } else {
@@ -1900,8 +1974,7 @@ static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
    /* we remove the notdirty callback only if the code has been
       flushed */
    if (!cpu_physical_memory_is_clean(ram_addr)) {
-        CPUArchState *env = current_cpu->env_ptr;
-        tlb_set_dirty(env, current_cpu->mem_io_vaddr);
+        tlb_set_dirty(current_cpu, current_cpu->mem_io_vaddr);
    }
 }

@@ -2163,7 +2236,8 @@ static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,

 MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index)
 {
-    AddressSpaceDispatch *d = atomic_rcu_read(&cpu->memory_dispatch);
+    CPUAddressSpace *cpuas = &cpu->cpu_ases[0];
+    AddressSpaceDispatch *d = atomic_rcu_read(&cpuas->memory_dispatch);
    MemoryRegionSection *sections = d->map.sections;

    return sections[index & ~TARGET_PAGE_MASK].mr;
@@ -2222,19 +2296,20 @@ static void mem_commit(MemoryListener *listener)

 static void tcg_commit(MemoryListener *listener)
 {
-    CPUState *cpu;
+    CPUAddressSpace *cpuas;
+    AddressSpaceDispatch *d;

    /* since each CPU stores ram addresses in its TLB cache, we must
       reset the modified entries */
-    /* XXX: slow ! */
-    CPU_FOREACH(cpu) {
-        /* FIXME: Disentangle the cpu.h circular files deps so we can
-           directly get the right CPU from listener.  */
-        if (cpu->tcg_as_listener != listener) {
-            continue;
-        }
-        cpu_reload_memory_map(cpu);
-    }
+    cpuas = container_of(listener, CPUAddressSpace, tcg_as_listener);
+    cpu_reloading_memory_map();
+    /* The CPU and TLB are protected by the iothread lock.
+     * We reload the dispatch pointer now because cpu_reloading_memory_map()
+     * may have split the RCU critical section.
+     */
+    d = atomic_rcu_read(&cpuas->as->dispatch);
+    cpuas->memory_dispatch = d;
+    tlb_flush(cpuas->cpu, 1);
 }

 void address_space_init_dispatch(AddressSpace *as)
--- a/gdb-xml/s390-virt.xml
+++ b/gdb-xml/s390-virt.xml
@@ -0,0 +1,18 @@
+<?xml version="1.0"?>
+<!-- Copyright 2015 IBM Corp.
+
+     This work is licensed under the terms of the GNU GPL, version 2 or
+     (at your option) any later version. See the COPYING file in the
+     top-level directory. -->
+
+<!DOCTYPE feature SYSTEM "gdb-target.dtd">
+<feature name="org.gnu.gdb.s390.virt">
+  <reg name="ckc" bitsize="64" type="uint64" group="system"/>
+  <reg name="cputm" bitsize="64" type="uint64" group="system"/>
+  <reg name="last_break" bitsize="64" type="code_ptr" group="system"/>
+  <reg name="prefix" bitsize="64" type="data_ptr" group="system"/>
+  <reg name="pp" bitsize="64" type="uint64" group="system"/>
+  <reg name="pfault_token" bitsize="64" type="uint64" group="system"/>
+  <reg name="pfault_select" bitsize="64" type="uint64" group="system"/>
+  <reg name="pfault_compare" bitsize="64" type="uint64" group="system"/>
+</feature>
--- a/hmp-commands-info.hx
+++ b/hmp-commands-info.hx
@@ -0,0 +1,795 @@
+HXCOMM Use DEFHEADING() to define headings in both help text and texi
+HXCOMM Text between STEXI and ETEXI are copied to texi version and
+HXCOMM discarded from C version
+HXCOMM DEF(command, args, callback, arg_string, help) is used to construct
+HXCOMM monitor info commands
+HXCOMM HXCOMM can be used for comments, discarded from both texi and C
+
+STEXI
+@table @option
+@item info @var{subcommand}
+@findex info
+Show various information about the system state.
+@table @option
+ETEXI
+
+    {
+        .name       = "version",
+        .args_type  = "",
+        .params     = "",
+        .help       = "show the version of QEMU",
+        .mhandler.cmd = hmp_info_version,
+    },
+
+STEXI
+@item info version
+@findex version
+Show the version of QEMU.
+ETEXI
+
+    {
+        .name       = "network",
+        .args_type  = "",
+        .params     = "",
+        .help       = "show the network state",
+        .mhandler.cmd = hmp_info_network,
+    },
+
+STEXI
+@item info network
+@findex network
+Show the network state.
+ETEXI
+
+    {
+        .name       = "chardev",
+        .args_type  = "",
+        .params     = "",
+        .help       = "show the character devices",
+        .mhandler.cmd = hmp_info_chardev,
+    },
+
+STEXI
+@item info chardev
+@findex chardev
+Show the character devices.
+ETEXI
+
+    {
+        .name       = "block",
+        .args_type  = "nodes:-n,verbose:-v,device:B?",
+        .params     = "[-n] [-v] [device]",
+        .help       = "show info of one block device or all block devices "
+                      "(-n: show named nodes; -v: show details)",
+        .mhandler.cmd = hmp_info_block,
+    },
+
+STEXI
+@item info block
+@findex block
+Show info of one block device or all block devices.
+ETEXI
+
+    {
+        .name       = "blockstats",
+        .args_type  = "",
+        .params     = "",
+        .help       = "show block device statistics",
+        .mhandler.cmd = hmp_info_blockstats,
+    },
+
+STEXI
+@item info blockstats
+@findex blockstats
+Show block device statistics.
+ETEXI
+
+    {
+        .name       = "block-jobs",
+        .args_type  = "",
+        .params     = "",
+        .help       = "show progress of ongoing block device operations",
+        .mhandler.cmd = hmp_info_block_jobs,
+    },
+
+STEXI
+@item info block-jobs
+@findex block-jobs
+Show progress of ongoing block device operations.
+ETEXI
+
+    {
+        .name       = "registers",
+        .args_type  = "",
+        .params     = "",
+        .help       = "show the cpu registers",
+        .mhandler.cmd = hmp_info_registers,
+    },
+
+STEXI
+@item info registers
+@findex registers
+Show the cpu registers.
+ETEXI
+
+#if defined(TARGET_I386)
+    {
+        .name       = "lapic",
+        .args_type  = "",
+        .params     = "",
+        .help       = "show local apic state",
+        .mhandler.cmd = hmp_info_local_apic,
+    },
+#endif
+
+STEXI
+@item info lapic
+@findex lapic
+Show local APIC state
+ETEXI
+
+#if defined(TARGET_I386)
+    {
+        .name       = "ioapic",
+        .args_type  = "",
+        .params     = "",
+        .help       = "show io apic state",
+        .mhandler.cmd = hmp_info_io_apic,
+    },
+#endif
+
+STEXI
+@item info ioapic
+@findex ioapic
+Show io APIC state
+ETEXI
+
+    {
+        .name       = "cpus",
+        .args_type  = "",
+        .params     = "",
+        .help       = "show infos for each CPU",
+        .mhandler.cmd = hmp_info_cpus,
+    },
+
+STEXI
+@item info cpus
+@findex cpus
+Show infos for each CPU.
+ETEXI
+
+    {
+        .name       = "history",
+        .args_type  = "",
+        .params     = "",
+        .help       = "show the command line history",
+        .mhandler.cmd = hmp_info_history,
+    },
+
+STEXI
+@item info history
+@findex history
+Show the command line history.
+ETEXI
+
+#if defined(TARGET_I386) || defined(TARGET_PPC) || defined(TARGET_MIPS) || \
+    defined(TARGET_LM32) || (defined(TARGET_SPARC) && !defined(TARGET_SPARC64))
+    {
+        .name       = "irq",
+        .args_type  = "",
+        .params     = "",
+        .help       = "show the interrupts statistics (if available)",
+#ifdef TARGET_SPARC
+        .mhandler.cmd = sun4m_hmp_info_irq,
+#elif defined(TARGET_LM32)
+        .mhandler.cmd = lm32_hmp_info_irq,
+#else
+        .mhandler.cmd = hmp_info_irq,
+#endif
+    },
+
+STEXI
+@item info irq
+@findex irq
+Show the interrupts statistics (if available).
+ETEXI
+
+    {
+        .name       = "pic",
+        .args_type  = "",
+        .params     = "",
+        .help       = "show i8259 (PIC) state",
+#ifdef TARGET_SPARC
+        .mhandler.cmd = sun4m_hmp_info_pic,
+#elif defined(TARGET_LM32)
+        .mhandler.cmd = lm32_hmp_info_pic,
+#else
+        .mhandler.cmd = hmp_info_pic,
+#endif
+    },
+#endif
+
+STEXI
+@item info pic
+@findex pic
+Show i8259 (PIC) state.
+ETEXI
+
+    {
+        .name       = "pci",
+        .args_type  = "",
+        .params     = "",
+        .help       = "show PCI info",
+        .mhandler.cmd = hmp_info_pci,
+    },
+
+STEXI
+@item info pci
+@findex pci
+Show PCI information.
+ETEXI
+
+#if defined(TARGET_I386) || defined(TARGET_SH4) || defined(TARGET_SPARC) || \
+    defined(TARGET_PPC) || defined(TARGET_XTENSA)
+    {
+        .name       = "tlb",
+        .args_type  = "",
+        .params     = "",
+        .help       = "show virtual to physical memory mappings",
+        .mhandler.cmd = hmp_info_tlb,
+    },
+#endif
+
+STEXI
+@item info tlb
+@findex tlb
+Show virtual to physical memory mappings.
+ETEXI
+
+#if defined(TARGET_I386)
+    {
+        .name       = "mem",
+        .args_type  = "",
+        .params     = "",
+        .help       = "show the active virtual memory mappings",
+        .mhandler.cmd = hmp_info_mem,
+    },
+#endif
+
+STEXI
+@item info mem
+@findex mem
+Show the active virtual memory mappings.
+ETEXI
+
+    {
+        .name       = "mtree",
+        .args_type  = "",
+        .params     = "",
+        .help       = "show memory tree",
+        .mhandler.cmd = hmp_info_mtree,
+    },
+
+STEXI
+@item info mtree
+@findex mtree
+Show memory tree.
+ETEXI
+
+    {
+        .name       = "jit",
+        .args_type  = "",
+        .params     = "",
+        .help       = "show dynamic compiler info",
+        .mhandler.cmd = hmp_info_jit,
+    },
+
+STEXI
+@item info jit
+@findex jit
+Show dynamic compiler info.
+ETEXI
+
+    {
+        .name       = "opcount",
+        .args_type  = "",
+        .params     = "",
+        .help       = "show dynamic compiler opcode counters",
+        .mhandler.cmd = hmp_info_opcount,
+    },
+
+STEXI
+@item info opcount
+@findex opcount
+Show dynamic compiler opcode counters
+ETEXI
+
+    {
+        .name       = "kvm",
+        .args_type  = "",
+        .params     = "",
+        .help       = "show KVM information",
+        .mhandler.cmd = hmp_info_kvm,
+    },
+
+STEXI
+@item info kvm
+@findex kvm
+Show KVM information.
+ETEXI
+
+    {
+        .name       = "numa",
+        .args_type  = "",
+        .params     = "",
+        .help       = "show NUMA information",
+        .mhandler.cmd = hmp_info_numa,
+    },
+
+STEXI
+@item info numa
+@findex numa
+Show NUMA information.
+ETEXI
+
+    {
+        .name       = "usb",
+        .args_type  = "",
+        .params     = "",
+        .help       = "show guest USB devices",
+        .mhandler.cmd = hmp_info_usb,
+    },
+
+STEXI
+@item info usb
+@findex usb
+Show guest USB devices.
+ETEXI
+
+    {
+        .name       = "usbhost",
+        .args_type  = "",
+        .params     = "",
+        .help       = "show host USB devices",
+        .mhandler.cmd = hmp_info_usbhost,
+    },
+
+STEXI
+@item info usbhost
+@findex usbhost
+Show host USB devices.
+ETEXI
+
+    {
+        .name       = "profile",
+        .args_type  = "",
+        .params     = "",
+        .help       = "show profiling information",
+        .mhandler.cmd = hmp_info_profile,
+    },
+
+STEXI
+@item info profile
+@findex profile
+Show profiling information.
+ETEXI
+
+    {
+        .name       = "capture",
+        .args_type  = "",
+        .params     = "",
+        .help       = "show capture information",
+        .mhandler.cmd = hmp_info_capture,
+    },
+
+STEXI
+@item info capture
+@findex capture
+Show capture information.
+ETEXI
+
+    {
+        .name       = "snapshots",
+        .args_type  = "",
+        .params     = "",
+        .help       = "show the currently saved VM snapshots",
+        .mhandler.cmd = hmp_info_snapshots,
+    },
+
+STEXI
+@item info snapshots
+@findex snapshots
+Show the currently saved VM snapshots.
+ETEXI
+
+    {
+        .name       = "status",
+        .args_type  = "",
+        .params     = "",
+        .help       = "show the current VM status (running|paused)",
+        .mhandler.cmd = hmp_info_status,
+    },
+
+STEXI
+@item info status
+@findex status
+Show the current VM status (running|paused).
+ETEXI
+
+    {
+        .name       = "mice",
+        .args_type  = "",
+        .params     = "",
+        .help       = "show which guest mouse is receiving events",
+        .mhandler.cmd = hmp_info_mice,
+    },
+
+STEXI
+@item info mice
+@findex mice
+Show which guest mouse is receiving events.
+ETEXI
+
+    {
+        .name       = "vnc",
+        .args_type  = "",
+        .params     = "",
+        .help       = "show the vnc server status",
+        .mhandler.cmd = hmp_info_vnc,
+    },
+
+STEXI
+@item info vnc
+@findex vnc
+Show the vnc server status.
+ETEXI
+
+#if defined(CONFIG_SPICE)
+    {
+        .name       = "spice",
+        .args_type  = "",
+        .params     = "",
+        .help       = "show the spice server status",
+        .mhandler.cmd = hmp_info_spice,
+    },
+#endif
+
+STEXI
+@item info spice
+@findex spice
+Show the spice server status.
+ETEXI
+
+    {
+        .name       = "name",
+        .args_type  = "",
+        .params     = "",
+        .help       = "show the current VM name",
+        .mhandler.cmd = hmp_info_name,
+    },
+
+STEXI
+@item info name
+@findex name
+Show the current VM name.
+ETEXI
+
+    {
+        .name       = "uuid",
+        .args_type  = "",
+        .params     = "",
+        .help       = "show the current VM UUID",
+        .mhandler.cmd = hmp_info_uuid,
+    },
+
+STEXI
+@item info uuid
+@findex uuid
+Show the current VM UUID.
+ETEXI
+
+    {
+        .name       = "cpustats",
+        .args_type  = "",
+        .params     = "",
+        .help       = "show CPU statistics",
+        .mhandler.cmd = hmp_info_cpustats,
+    },
+
+STEXI
+@item info cpustats
+@findex cpustats
+Show CPU statistics.
+ETEXI
+
+#if defined(CONFIG_SLIRP)
+    {
+        .name       = "usernet",
+        .args_type  = "",
+        .params     = "",
+        .help       = "show user network stack connection states",
+        .mhandler.cmd = hmp_info_usernet,
+    },
+#endif
+
+STEXI
+@item info usernet
+@findex usernet
+Show user network stack connection states.
+ETEXI
+
+    {
+        .name       = "migrate",
+        .args_type  = "",
+        .params     = "",
+        .help       = "show migration status",
+        .mhandler.cmd = hmp_info_migrate,
+    },
+
+STEXI
+@item info migrate
+@findex migrate
+Show migration status.
+ETEXI
+
+    {
+        .name       = "migrate_capabilities",
+        .args_type  = "",
+        .params     = "",
+        .help       = "show current migration capabilities",
+        .mhandler.cmd = hmp_info_migrate_capabilities,
+    },
+
+STEXI
+@item info migrate_capabilities
+@findex migrate_capabilities
+Show current migration capabilities.
+ETEXI
+
+    {
+        .name       = "migrate_parameters",
+        .args_type  = "",
+        .params     = "",
+        .help       = "show current migration parameters",
+        .mhandler.cmd = hmp_info_migrate_parameters,
+    },
+
+STEXI
+@item info migrate_parameters
+@findex migrate_parameters
+Show current migration parameters.
+ETEXI
+
+    {
+        .name       = "migrate_cache_size",
+        .args_type  = "",
+        .params     = "",
+        .help       = "show current migration xbzrle cache size",
+        .mhandler.cmd = hmp_info_migrate_cache_size,
+    },
+
+STEXI
+@item info migrate_cache_size
+@findex migrate_cache_size
+Show current migration xbzrle cache size.
+ETEXI
+
+    {
+        .name       = "balloon",
+        .args_type  = "",
+        .params     = "",
+        .help       = "show balloon information",
+        .mhandler.cmd = hmp_info_balloon,
+    },
+
+STEXI
+@item info balloon
+@findex balloon
+Show balloon information.
+ETEXI
+
+    {
+        .name       = "qtree",
+        .args_type  = "",
+        .params     = "",
+        .help       = "show device tree",
+        .mhandler.cmd = hmp_info_qtree,
+    },
+
+STEXI
+@item info qtree
+@findex qtree
+Show device tree.
+ETEXI
+
+    {
+        .name       = "qdm",
+        .args_type  = "",
+        .params     = "",
+        .help       = "show qdev device model list",
+        .mhandler.cmd = hmp_info_qdm,
+    },
+
+STEXI
+@item info qdm
+@findex qdm
+Show qdev device model list.
+ETEXI
+
+    {
+        .name       = "qom-tree",
+        .args_type  = "path:s?",
+        .params     = "[path]",
+        .help       = "show QOM composition tree",
+        .mhandler.cmd = hmp_info_qom_tree,
+    },
+
+STEXI
+@item info qom-tree
+@findex qom-tree
+Show QOM composition tree.
+ETEXI
+
+    {
+        .name       = "roms",
+        .args_type  = "",
+        .params     = "",
+        .help       = "show roms",
+        .mhandler.cmd = hmp_info_roms,
+    },
+
+STEXI
+@item info roms
+@findex roms
+Show roms.
+ETEXI
+
+    {
+        .name       = "trace-events",
+        .args_type  = "",
+        .params     = "",
+        .help       = "show available trace-events & their state",
+        .mhandler.cmd = hmp_info_trace_events,
+    },
+
+STEXI
+@item info trace-events
+@findex trace-events
+Show available trace-events & their state.
+ETEXI
+
+    {
+        .name       = "tpm",
+        .args_type  = "",
+        .params     = "",
+        .help       = "show the TPM device",
+        .mhandler.cmd = hmp_info_tpm,
+    },
+
+STEXI
+@item info tpm
+@findex tpm
+Show the TPM device.
+ETEXI
+
+    {
+        .name       = "memdev",
+        .args_type  = "",
+        .params     = "",
+        .help       = "show memory backends",
+        .mhandler.cmd = hmp_info_memdev,
+    },
+
+STEXI
+@item info memdev
+@findex memdev
+Show memory backends
+ETEXI
+
+    {
+        .name       = "memory-devices",
+        .args_type  = "",
+        .params     = "",
+        .help       = "show memory devices",
+        .mhandler.cmd = hmp_info_memory_devices,
+    },
+
+STEXI
+@item info memory-devices
+@findex memory-devices
+Show memory devices.
+ETEXI
+
+    {
+        .name       = "iothreads",
+        .args_type  = "",
+        .params     = "",
+        .help       = "show iothreads",
+        .mhandler.cmd = hmp_info_iothreads,
+    },
+
+STEXI
+@item info iothreads
+@findex iothreads
+Show iothread's identifiers.
+ETEXI
+
+    {
+        .name       = "rocker",
+        .args_type  = "name:s",
+        .params     = "name",
+        .help       = "Show rocker switch",
+        .mhandler.cmd = hmp_rocker,
+    },
+
+STEXI
+@item info rocker @var{name}
+@findex rocker
+Show rocker switch.
+ETEXI
+
+    {
+        .name       = "rocker-ports",
+        .args_type  = "name:s",
+        .params     = "name",
+        .help       = "Show rocker ports",
+        .mhandler.cmd = hmp_rocker_ports,
+    },
+
+STEXI
+@item info rocker_ports @var{name}-ports
+@findex ocker-ports
+Show rocker ports.
+ETEXI
+
+    {
+        .name       = "rocker-of-dpa-flows",
+        .args_type  = "name:s,tbl_id:i?",
+        .params     = "name [tbl_id]",
+        .help       = "Show rocker OF-DPA flow tables",
+        .mhandler.cmd = hmp_rocker_of_dpa_flows,
+    },
+
+STEXI
+@item info rocker_of_dpa_flows @var{name} [@var{tbl_id}]
+@findex rocker-of-dpa-flows
+Show rocker OF-DPA flow tables.
+ETEXI
+
+    {
+        .name       = "rocker-of-dpa-groups",
+        .args_type  = "name:s,type:i?",
+        .params     = "name [type]",
+        .help       = "Show rocker OF-DPA groups",
+        .mhandler.cmd = hmp_rocker_of_dpa_groups,
+    },
+
+STEXI
+@item info rocker-of-dpa-groups @var{name} [@var{type}]
+@findex rocker-of-dpa-groups
+Show rocker OF-DPA groups.
+ETEXI
+
+#if defined(TARGET_S390X)
+    {
+        .name       = "skeys",
+        .args_type  = "addr:l",
+        .params     = "address",
+        .help       = "Display the value of a storage key",
+        .mhandler.cmd = hmp_info_skeys,
+    },
+#endif
+
+STEXI
+@item info skeys @var{address}
+@findex skeys
+Display the value of a storage key (s390 only)
+ETEXI
+
+STEXI
+@end table
+ETEXI
+
+STEXI
+@end table
+ETEXI
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -676,7 +676,8 @@ ETEXI
 STEXI
@item device_del @var{id}
@findex device_del
-Remove device @var{id}.
+Remove device @var{id}. @var{id} may be a short ID
+or a QOM object path.
 ETEXI

    {
@@ -1723,126 +1724,6 @@ ETEXI
        .sub_table = info_cmds,
    },

-STEXI
-@item info @var{subcommand}
-@findex info
-Show various information about the system state.
-
-@table @option
-@item info version
-show the version of QEMU
-@item info network
-show the various VLANs and the associated devices
-@item info chardev
-show the character devices
-@item info block
-show the block devices
-@item info blockstats
-show block device statistics
-@item info registers
-show the cpu registers
-@item info cpus
-show infos for each CPU
-@item info history
-show the command line history
-@item info irq
-show the interrupts statistics (if available)
-@item info pic
-show i8259 (PIC) state
-@item info pci
-show emulated PCI device info
-@item info tlb
-show virtual to physical memory mappings (i386, SH4, SPARC, PPC, and Xtensa only)
-@item info mem
-show the active virtual memory mappings (i386 only)
-@item info jit
-show dynamic compiler info
-@item info numa
-show NUMA information
-@item info kvm
-show KVM information
-@item info usb
-show USB devices plugged on the virtual USB hub
-@item info usbhost
-show all USB host devices
-@item info profile
-show profiling information
-@item info capture
-show information about active capturing
-@item info snapshots
-show list of VM snapshots
-@item info status
-show the current VM status (running|paused)
-@item info mice
-show which guest mouse is receiving events
-@item info vnc
-show the vnc server status
-@item info name
-show the current VM name
-@item info uuid
-show the current VM UUID
-@item info cpustats
-show CPU statistics
-@item info usernet
-show user network stack connection states
-@item info migrate
-show migration status
-@item info migrate_capabilities
-show current migration capabilities
-@item info migrate_parameters
-show current migration parameters
-@item info migrate_cache_size
-show current migration XBZRLE cache size
-@item info balloon
-show balloon information
-@item info qtree
-show device tree
-@item info qdm
-show qdev device model list
-@item info qom-tree
-show object composition tree
-@item info roms
-show roms
-@item info tpm
-show the TPM device
-@item info memory-devices
-show the memory devices
-@item info skeys
-Display the value of a storage key (s390 only)
-@item info iothreads
-show iothreads
-@end table
-ETEXI
-
-STEXI
-@item info trace-events
-show available trace events and their state
-ETEXI
-
-STEXI
-@item rocker @var{name}
-@findex rocker
-Show Rocker(s)
-ETEXI
-
-STEXI
-@item rocker_ports @var{name}
-@findex rocker_ports
-Show Rocker ports
-ETEXI
-
-STEXI
-@item rocker_of_dpa_flows @var{name} [@var{tbl_id}]
-@findex rocker_of_dpa_flows
-Show Rocker OF-DPA flow tables
-ETEXI
-
-STEXI
-@item rocker_of_dpa_groups @var{name} [@var{type}]
-@findex rocker_of_dpa_groups
-Show Rocker OF-DPA groups
-ETEXI
-
 STEXI
@end table
 ETEXI
--- a/hmp.c
+++ b/hmp.c
@@ -232,6 +232,11 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict)
                       info->xbzrle_cache->overflow);
    }

+    if (info->has_x_cpu_throttle_percentage) {
+        monitor_printf(mon, "cpu throttle percentage: %" PRIu64 "\n",
+                       info->x_cpu_throttle_percentage);
+    }
+
    qapi_free_MigrationInfo(info);
    qapi_free_MigrationCapabilityStatusList(caps);
 }
@@ -272,6 +277,12 @@ void hmp_info_migrate_parameters(Monitor *mon, const QDict *qdict)
        monitor_printf(mon, " %s: %" PRId64,
            MigrationParameter_lookup[MIGRATION_PARAMETER_DECOMPRESS_THREADS],
            params->decompress_threads);
+        monitor_printf(mon, " %s: %" PRId64,
+            MigrationParameter_lookup[MIGRATION_PARAMETER_X_CPU_THROTTLE_INITIAL],
+            params->x_cpu_throttle_initial);
+        monitor_printf(mon, " %s: %" PRId64,
+            MigrationParameter_lookup[MIGRATION_PARAMETER_X_CPU_THROTTLE_INCREMENT],
+            params->x_cpu_throttle_increment);
        monitor_printf(mon, "\n");
    }

@@ -1221,6 +1232,8 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict)
    bool has_compress_level = false;
    bool has_compress_threads = false;
    bool has_decompress_threads = false;
+    bool has_x_cpu_throttle_initial = false;
+    bool has_x_cpu_throttle_increment = false;
    int i;

    for (i = 0; i < MIGRATION_PARAMETER_MAX; i++) {
@@ -1235,10 +1248,18 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict)
            case MIGRATION_PARAMETER_DECOMPRESS_THREADS:
                has_decompress_threads = true;
                break;
+            case MIGRATION_PARAMETER_X_CPU_THROTTLE_INITIAL:
+                has_x_cpu_throttle_initial = true;
+                break;
+            case MIGRATION_PARAMETER_X_CPU_THROTTLE_INCREMENT:
+                has_x_cpu_throttle_increment = true;
+                break;
            }
            qmp_migrate_set_parameters(has_compress_level, value,
                                       has_compress_threads, value,
                                       has_decompress_threads, value,
+                                       has_x_cpu_throttle_initial, value,
+                                       has_x_cpu_throttle_increment, value,
                                       &err);
            break;
        }
--- a/hw/alpha/dp264.c
+++ b/hw/alpha/dp264.c
@@ -168,17 +168,12 @@ static void clipper_init(MachineState *machine)
    }
 }

-static QEMUMachine clipper_machine = {
-    .name = "clipper",
-    .desc = "Alpha DP264/CLIPPER",
-    .init = clipper_init,
-    .max_cpus = 4,
-    .is_default = 1,
-};
-
-static void clipper_machine_init(void)
+static void clipper_machine_init(MachineClass *mc)
 {
-    qemu_register_machine(&clipper_machine);
+    mc->desc = "Alpha DP264/CLIPPER";
+    mc->init = clipper_init;
+    mc->max_cpus = 4;
+    mc->is_default = 1;
 }

-machine_init(clipper_machine_init);
+DEFINE_MACHINE("clipper", clipper_machine_init)
--- a/hw/arm/allwinner-a10.c
+++ b/hw/arm/allwinner-a10.c
@@ -103,6 +103,12 @@ static void aw_a10_class_init(ObjectClass *oc, void *data)
    DeviceClass *dc = DEVICE_CLASS(oc);

    dc->realize = aw_a10_realize;
+
+    /*
+     * Reason: creates an ARM CPU, thus use after free(), see
+     * arm_cpu_class_init()
+     */
+    dc->cannot_destroy_with_object_finalize_yet = true;
 }

 static const TypeInfo aw_a10_type_info = {
--- a/hw/arm/armv7m.c
+++ b/hw/arm/armv7m.c
@@ -215,7 +215,7 @@ qemu_irq *armv7m_init(MemoryRegion *system_memory, int mem_size, int num_irq,

    if (kernel_filename) {
        image_size = load_elf(kernel_filename, NULL, NULL, &entry, &lowaddr,
-                              NULL, big_endian, ELF_MACHINE, 1);
+                              NULL, big_endian, EM_ARM, 1);
        if (image_size < 0) {
            image_size = load_image_targphys(kernel_filename, 0, mem_size);
            lowaddr = 0;
@@ -229,7 +229,7 @@ qemu_irq *armv7m_init(MemoryRegion *system_memory, int mem_size, int num_irq,
    /* Hack to map an additional page of ram at the top of the address
       space.  This stops qemu complaining about executing code outside RAM
       when returning from an exception.  */
-    memory_region_init_ram(hack, NULL, "armv7m.hack", 0x1000, &error_abort);
+    memory_region_init_ram(hack, NULL, "armv7m.hack", 0x1000, &error_fatal);
    vmstate_register_ram_global(hack);
    memory_region_add_subregion(system_memory, 0xfffff000, hack);

--- a/hw/arm/collie.c
+++ b/hw/arm/collie.c
@@ -58,15 +58,10 @@ static void collie_init(MachineState *machine)
    arm_load_kernel(s->cpu, &collie_binfo);
 }

-static QEMUMachine collie_machine = {
-    .name = "collie",
-    .desc = "Collie PDA (SA-1110)",
-    .init = collie_init,
-};
-
-static void collie_machine_init(void)
+static void collie_machine_init(MachineClass *mc)
 {
-    qemu_register_machine(&collie_machine);
+    mc->desc = "Sharp SL-5500 (Collie) PDA (SA-1110)";
+    mc->init = collie_init;
 }

-machine_init(collie_machine_init)
+DEFINE_MACHINE("collie", collie_machine_init)
--- a/hw/arm/cubieboard.c
+++ b/hw/arm/cubieboard.c
@@ -74,16 +74,10 @@ static void cubieboard_init(MachineState *machine)
    arm_load_kernel(&s->a10->cpu, &cubieboard_binfo);
 }

-static QEMUMachine cubieboard_machine = {
-    .name = "cubieboard",
-    .desc = "cubietech cubieboard",
-    .init = cubieboard_init,
-};
-
-
-static void cubieboard_machine_init(void)
+static void cubieboard_machine_init(MachineClass *mc)
 {
-    qemu_register_machine(&cubieboard_machine);
+    mc->desc = "cubietech cubieboard";
+    mc->init = cubieboard_init;
 }

-machine_init(cubieboard_machine_init)
+DEFINE_MACHINE("cubieboard", cubieboard_machine_init)
--- a/hw/arm/digic.c
+++ b/hw/arm/digic.c
@@ -97,6 +97,12 @@ static void digic_class_init(ObjectClass *oc, void *data)
    DeviceClass *dc = DEVICE_CLASS(oc);

    dc->realize = digic_realize;
+
+    /*
+     * Reason: creates an ARM CPU, thus use after free(), see
+     * arm_cpu_class_init()
+     */
+    dc->cannot_destroy_with_object_finalize_yet = true;
 }

 static const TypeInfo digic_type_info = {
--- a/hw/arm/digic_boards.c
+++ b/hw/arm/digic_boards.c
@@ -148,15 +148,10 @@ static void canon_a1100_init(MachineState *machine)
    digic4_board_init(&digic4_board_canon_a1100);
 }

-static QEMUMachine canon_a1100 = {
-    .name = "canon-a1100",
-    .desc = "Canon PowerShot A1100 IS",
-    .init = &canon_a1100_init,
-};
-
-static void digic_register_machines(void)
+static void canon_a1100_machine_init(MachineClass *mc)
 {
-    qemu_register_machine(&canon_a1100);
+    mc->desc = "Canon PowerShot A1100 IS";
+    mc->init = &canon_a1100_init;
 }

-machine_init(digic_register_machines)
+DEFINE_MACHINE("canon-a1100", canon_a1100_machine_init)
--- a/hw/arm/exynos4210.c
+++ b/hw/arm/exynos4210.c
@@ -259,7 +259,7 @@ Exynos4210State *exynos4210_init(MemoryRegion *system_mem,

    /* Internal ROM */
    memory_region_init_ram(&s->irom_mem, NULL, "exynos4210.irom",
-                           EXYNOS4210_IROM_SIZE, &error_abort);
+                           EXYNOS4210_IROM_SIZE, &error_fatal);
    vmstate_register_ram_global(&s->irom_mem);
    memory_region_set_readonly(&s->irom_mem, true);
    memory_region_add_subregion(system_mem, EXYNOS4210_IROM_BASE_ADDR,
@@ -275,7 +275,7 @@ Exynos4210State *exynos4210_init(MemoryRegion *system_mem,

    /* Internal RAM */
    memory_region_init_ram(&s->iram_mem, NULL, "exynos4210.iram",
-                           EXYNOS4210_IRAM_SIZE, &error_abort);
+                           EXYNOS4210_IRAM_SIZE, &error_fatal);
    vmstate_register_ram_global(&s->iram_mem);
    memory_region_add_subregion(system_mem, EXYNOS4210_IRAM_BASE_ADDR,
                                &s->iram_mem);
@@ -284,14 +284,14 @@ Exynos4210State *exynos4210_init(MemoryRegion *system_mem,
    mem_size = ram_size;
    if (mem_size > EXYNOS4210_DRAM_MAX_SIZE) {
        memory_region_init_ram(&s->dram1_mem, NULL, "exynos4210.dram1",
-                mem_size - EXYNOS4210_DRAM_MAX_SIZE, &error_abort);
+                mem_size - EXYNOS4210_DRAM_MAX_SIZE, &error_fatal);
        vmstate_register_ram_global(&s->dram1_mem);
        memory_region_add_subregion(system_mem, EXYNOS4210_DRAM1_BASE_ADDR,
                &s->dram1_mem);
        mem_size = EXYNOS4210_DRAM_MAX_SIZE;
    }
    memory_region_init_ram(&s->dram0_mem, NULL, "exynos4210.dram0", mem_size,
-                           &error_abort);
+                           &error_fatal);
    vmstate_register_ram_global(&s->dram0_mem);
    memory_region_add_subregion(system_mem, EXYNOS4210_DRAM0_BASE_ADDR,
            &s->dram0_mem);
--- a/hw/arm/exynos4_boards.c
+++ b/hw/arm/exynos4_boards.c
@@ -74,8 +74,6 @@ static struct arm_boot_info exynos4_board_binfo = {
    .write_secondary_boot = exynos4210_write_secondary,
 };

-static QEMUMachine exynos4_machines[EXYNOS4_NUM_OF_BOARDS];
-
 static void lan9215_init(uint32_t base, qemu_irq irq)
 {
    DeviceState *dev;
@@ -97,11 +95,12 @@ static void lan9215_init(uint32_t base, qemu_irq irq)
 static Exynos4210State *exynos4_boards_init_common(MachineState *machine,
                                                   Exynos4BoardType board_type)
 {
+    MachineClass *mc = MACHINE_GET_CLASS(machine);
+
    if (smp_cpus != EXYNOS4210_NCPUS && !qtest_enabled()) {
        fprintf(stderr, "%s board supports only %d CPU cores. Ignoring smp_cpus"
                " value.\n",
-                exynos4_machines[board_type].name,
-                exynos4_machines[board_type].max_cpus);
+                mc->name, EXYNOS4210_NCPUS);
    }

    exynos4_board_binfo.ram_size = exynos4_board_ram_size[board_type];
@@ -145,25 +144,40 @@ static void smdkc210_init(MachineState *machine)
    arm_load_kernel(ARM_CPU(first_cpu), &exynos4_board_binfo);
 }

-static QEMUMachine exynos4_machines[EXYNOS4_NUM_OF_BOARDS] = {
-    [EXYNOS4_BOARD_NURI] = {
-        .name = "nuri",
-        .desc = "Samsung NURI board (Exynos4210)",
-        .init = nuri_init,
-        .max_cpus = EXYNOS4210_NCPUS,
-    },
-    [EXYNOS4_BOARD_SMDKC210] = {
-        .name = "smdkc210",
-        .desc = "Samsung SMDKC210 board (Exynos4210)",
-        .init = smdkc210_init,
-        .max_cpus = EXYNOS4210_NCPUS,
-    },
-};
-
-static void exynos4_machine_init(void)
+static void nuri_class_init(ObjectClass *oc, void *data)
 {
-    qemu_register_machine(&exynos4_machines[EXYNOS4_BOARD_NURI]);
-    qemu_register_machine(&exynos4_machines[EXYNOS4_BOARD_SMDKC210]);
+    MachineClass *mc = MACHINE_CLASS(oc);
+
+    mc->desc = "Samsung NURI board (Exynos4210)";
+    mc->init = nuri_init;
+    mc->max_cpus = EXYNOS4210_NCPUS;
 }

-machine_init(exynos4_machine_init);
+static const TypeInfo nuri_type = {
+    .name = MACHINE_TYPE_NAME("nuri"),
+    .parent = TYPE_MACHINE,
+    .class_init = nuri_class_init,
+};
+
+static void smdkc210_class_init(ObjectClass *oc, void *data)
+{
+    MachineClass *mc = MACHINE_CLASS(oc);
+
+    mc->desc = "Samsung SMDKC210 board (Exynos4210)";
+    mc->init = smdkc210_init;
+    mc->max_cpus = EXYNOS4210_NCPUS;
+}
+
+static const TypeInfo smdkc210_type = {
+    .name = MACHINE_TYPE_NAME("smdkc210"),
+    .parent = TYPE_MACHINE,
+    .class_init = smdkc210_class_init,
+};
+
+static void exynos4_machines_init(void)
+{
+    type_register_static(&nuri_type);
+    type_register_static(&smdkc210_type);
+}
+
+machine_init(exynos4_machines_init)
--- a/hw/arm/fsl-imx25.c
+++ b/hw/arm/fsl-imx25.c
@@ -284,6 +284,12 @@ static void fsl_imx25_class_init(ObjectClass *oc, void *data)
    DeviceClass *dc = DEVICE_CLASS(oc);

    dc->realize = fsl_imx25_realize;
+
+    /*
+     * Reason: creates an ARM CPU, thus use after free(), see
+     * arm_cpu_class_init()
+     */
+    dc->cannot_destroy_with_object_finalize_yet = true;
 }

 static const TypeInfo fsl_imx25_type_info = {
--- a/hw/arm/fsl-imx31.c
+++ b/hw/arm/fsl-imx31.c
@@ -258,6 +258,12 @@ static void fsl_imx31_class_init(ObjectClass *oc, void *data)
    DeviceClass *dc = DEVICE_CLASS(oc);

    dc->realize = fsl_imx31_realize;
+
+    /*
+     * Reason: creates an ARM CPU, thus use after free(), see
+     * arm_cpu_class_init()
+     */
+    dc->cannot_destroy_with_object_finalize_yet = true;
 }

 static const TypeInfo fsl_imx31_type_info = {
--- a/hw/arm/gumstix.c
+++ b/hw/arm/gumstix.c
@@ -121,22 +121,38 @@ static void verdex_init(MachineState *machine)
                    qdev_get_gpio_in(cpu->gpio, 99));
 }

-static QEMUMachine connex_machine = {
-    .name = "connex",
-    .desc = "Gumstix Connex (PXA255)",
-    .init = connex_init,
+static void connex_class_init(ObjectClass *oc, void *data)
+{
+    MachineClass *mc = MACHINE_CLASS(oc);
+
+    mc->desc = "Gumstix Connex (PXA255)";
+    mc->init = connex_init;
+}
+
+static const TypeInfo connex_type = {
+    .name = MACHINE_TYPE_NAME("connex"),
+    .parent = TYPE_MACHINE,
+    .class_init = connex_class_init,
 };

-static QEMUMachine verdex_machine = {
-    .name = "verdex",
-    .desc = "Gumstix Verdex (PXA270)",
-    .init = verdex_init,
+static void verdex_class_init(ObjectClass *oc, void *data)
+{
+    MachineClass *mc = MACHINE_CLASS(oc);
+
+    mc->desc = "Gumstix Verdex (PXA270)";
+    mc->init = verdex_init;
+}
+
+static const TypeInfo verdex_type = {
+    .name = MACHINE_TYPE_NAME("verdex"),
+    .parent = TYPE_MACHINE,
+    .class_init = verdex_class_init,
 };

 static void gumstix_machine_init(void)
 {
-    qemu_register_machine(&connex_machine);
-    qemu_register_machine(&verdex_machine);
+    type_register_static(&connex_type);
+    type_register_static(&verdex_type);
 }

-machine_init(gumstix_machine_init);
+machine_init(gumstix_machine_init)
--- a/hw/arm/highbank.c
+++ b/hw/arm/highbank.c
@@ -281,7 +281,7 @@ static void calxeda_init(MachineState *machine, enum cxmachines machine_id)

    sysram = g_new(MemoryRegion, 1);
    memory_region_init_ram(sysram, NULL, "highbank.sysram", 0x8000,
-                           &error_abort);
+                           &error_fatal);
    memory_region_add_subregion(sysmem, 0xfff88000, sysram);
    if (bios_name != NULL) {
        sysboot_filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name);
@@ -391,26 +391,42 @@ static void midway_init(MachineState *machine)
    calxeda_init(machine, CALXEDA_MIDWAY);
 }

-static QEMUMachine highbank_machine = {
-    .name = "highbank",
-    .desc = "Calxeda Highbank (ECX-1000)",
-    .init = highbank_init,
-    .block_default_type = IF_SCSI,
-    .max_cpus = 4,
+static void highbank_class_init(ObjectClass *oc, void *data)
+{
+    MachineClass *mc = MACHINE_CLASS(oc);
+
+    mc->desc = "Calxeda Highbank (ECX-1000)";
+    mc->init = highbank_init;
+    mc->block_default_type = IF_SCSI;
+    mc->max_cpus = 4;
+}
+
+static const TypeInfo highbank_type = {
+    .name = MACHINE_TYPE_NAME("highbank"),
+    .parent = TYPE_MACHINE,
+    .class_init = highbank_class_init,
 };

-static QEMUMachine midway_machine = {
-    .name = "midway",
-    .desc = "Calxeda Midway (ECX-2000)",
-    .init = midway_init,
-    .block_default_type = IF_SCSI,
-    .max_cpus = 4,
+static void midway_class_init(ObjectClass *oc, void *data)
+{
+    MachineClass *mc = MACHINE_CLASS(oc);
+
+    mc->desc = "Calxeda Midway (ECX-2000)";
+    mc->init = midway_init;
+    mc->block_default_type = IF_SCSI;
+    mc->max_cpus = 4;
+}
+
+static const TypeInfo midway_type = {
+    .name = MACHINE_TYPE_NAME("midway"),
+    .parent = TYPE_MACHINE,
+    .class_init = midway_class_init,
 };

 static void calxeda_machines_init(void)
 {
-    qemu_register_machine(&highbank_machine);
-    qemu_register_machine(&midway_machine);
+    type_register_static(&highbank_type);
+    type_register_static(&midway_type);
 }

-machine_init(calxeda_machines_init);
+machine_init(calxeda_machines_init)
--- a/hw/arm/imx25_pdk.c
+++ b/hw/arm/imx25_pdk.c
@@ -145,15 +145,10 @@ static void imx25_pdk_init(MachineState *machine)
    }
 }

-static QEMUMachine imx25_pdk_machine = {
-    .name = "imx25_pdk",
-    .desc = "ARM i.MX25 PDK board (ARM926)",
-    .init = imx25_pdk_init,
-};
-
-static void imx25_pdk_machine_init(void)
+static void imx25_pdk_machine_init(MachineClass *mc)
 {
-    qemu_register_machine(&imx25_pdk_machine);
+    mc->desc = "ARM i.MX25 PDK board (ARM926)";
+    mc->init = imx25_pdk_init;
 }

-machine_init(imx25_pdk_machine_init)
+DEFINE_MACHINE("imx25-pdk", imx25_pdk_machine_init)
--- a/hw/arm/integratorcp.c
+++ b/hw/arm/integratorcp.c
@@ -266,7 +266,7 @@ static int integratorcm_init(SysBusDevice *dev)
    s->cm_refcnt_offset = muldiv64(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), 24,
                                   1000);
    memory_region_init_ram(&s->flash, OBJECT(s), "integrator.flash", 0x100000,
-                           &error_abort);
+                           &error_fatal);
    vmstate_register_ram_global(&s->flash);

    memory_region_init_io(&s->iomem, OBJECT(s), &integratorcm_ops, s,
@@ -619,18 +619,13 @@ static void integratorcp_init(MachineState *machine)
    arm_load_kernel(cpu, &integrator_binfo);
 }

-static QEMUMachine integratorcp_machine = {
-    .name = "integratorcp",
-    .desc = "ARM Integrator/CP (ARM926EJ-S)",
-    .init = integratorcp_init,
-};
-
-static void integratorcp_machine_init(void)
+static void integratorcp_machine_init(MachineClass *mc)
 {
-    qemu_register_machine(&integratorcp_machine);
+    mc->desc = "ARM Integrator/CP (ARM926EJ-S)";
+    mc->init = integratorcp_init;
 }

-machine_init(integratorcp_machine_init);
+DEFINE_MACHINE("integratorcp", integratorcp_machine_init)

 static Property core_properties[] = {
    DEFINE_PROP_UINT32("memsz", IntegratorCMState, memsz, 0),
--- a/Show More
+++ b/Show More
				`@@ -0,0 +1 @@`
				`# Default configuration for tilegx-linux-user`