Update version for v2.9.0-rc3 release

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Merge remote-tracking branch 'remotes/gkurz/tags/for-upstream' into staging
2017-04-04 18:36:51 +01:00 · 2017-04-04 18:00:23 +01:00 · 2017-04-04 17:27:32 +01:00 · 2017-04-04 18:06:01 +02:00 · 2017-04-04 18:06:01 +02:00 · 2017-04-04 18:32:25 +03:00
1306 changed files with 50628 additions and 24305 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -6,18 +6,12 @@
 /config.status
 /config-temp
 /trace-events-all
-/trace/generated-tracers.h
-/trace/generated-tracers.c
-/trace/generated-tracers-dtrace.h
-/trace/generated-tracers.dtrace
 /trace/generated-events.h
 /trace/generated-events.c
 /trace/generated-helpers-wrappers.h
 /trace/generated-helpers.h
 /trace/generated-helpers.c
 /trace/generated-tcg-tracers.h
-/trace/generated-ust-provider.h
-/trace/generated-ust.c
 /ui/shader/texture-blit-frag.h
 /ui/shader/texture-blit-vert.h
 *-timestamp
@@ -105,14 +99,15 @@
 /pc-bios/optionrom/kvmvapic.img
 /pc-bios/s390-ccw/s390-ccw.elf
 /pc-bios/s390-ccw/s390-ccw.img
+/docs/qemu-ga-qapi.texi
 /docs/qemu-ga-ref.html
+/docs/qemu-ga-ref.info*
 /docs/qemu-ga-ref.txt
+/docs/qemu-qmp-qapi.texi
 /docs/qemu-qmp-ref.html
+/docs/qemu-qmp-ref.info*
 /docs/qemu-qmp-ref.txt
-docs/qemu-ga-ref.info*
-docs/qemu-qmp-ref.info*
-/qemu-ga-qapi.texi
-/qemu-qapi.texi
+/docs/version.texi
 *.tps
 .stgit-*
 cscope.*
@@ -120,3 +115,19 @@ tags
 TAGS
 docker-src.*
 *~
+trace.h
+trace.c
+trace-ust.h
+trace-ust.h
+trace-dtrace.h
+trace-dtrace.dtrace
+trace-root.h
+trace-root.c
+trace-ust-root.h
+trace-ust-root.h
+trace-ust-all.h
+trace-ust-all.c
+trace-dtrace-root.h
+trace-dtrace-root.dtrace
+trace-ust-all.h
+trace-ust-all.c
--- a/.shippable.yml
+++ b/.shippable.yml
@@ -0,0 +1,21 @@
+language: c
+env:
+  matrix:
+    - IMAGE=debian-armhf-cross
+      TARGET_LIST=arm-softmmu,arm-linux-user
+    - IMAGE=debian-arm64-cross
+      TARGET_LIST=aarch64-softmmu,aarch64-linux-user
+    - IMAGE=debian-s390x-cross
+      TARGET_LIST=s390x-softmmu,s390x-linux-user
+build:
+  pre_ci:
+    - make docker-image-${IMAGE}
+  pre_ci_boot:
+    image_name: qemu
+    image_tag: ${IMAGE}
+    pull: false
+    options: "-e HOME=/root"
+  ci:
+    - unset CC
+    - ./configure ${QEMU_CONFIGURE_OPTS} --target-list=${TARGET_LIST}
+    - make -j2
--- a/.travis.yml
+++ b/.travis.yml
@@ -92,8 +92,8 @@ matrix:
    - env: CONFIG=""
      os: osx
      compiler: clang
-    # Plain Trusty Build
-    - env: CONFIG=""
+    # Plain Trusty System Build
+    - env: CONFIG="--disable-linux-user"
      sudo: required
      addons:
      dist: trusty
@@ -103,16 +103,45 @@ matrix:
        - sudo apt-get build-dep -qq qemu
        - wget -O - http://people.linaro.org/~alex.bennee/qemu-submodule-git-seed.tar.xz | tar -xvJ
        - git submodule update --init --recursive
-    # Trusty build with latest stable clang
-    - env: CONFIG=""
+    # Plain Trusty Linux User Build
+    - env: CONFIG="--disable-system"
      sudo: required
+      addons:
+      dist: trusty
+      compiler: gcc
+      before_install:
+        - sudo apt-get update -qq
+        - sudo apt-get build-dep -qq qemu
+        - wget -O - http://people.linaro.org/~alex.bennee/qemu-submodule-git-seed.tar.xz | tar -xvJ
+        - git submodule update --init --recursive
+    # Trusty System build with latest stable clang
+    - sudo: required
      addons:
      dist: trusty
      language: generic
      compiler: none
      env:
        - COMPILER_NAME=clang CXX=clang++-3.9 CC=clang-3.9
-        - CONFIG="--cc=clang-3.9 --cxx=clang++-3.9"
+        - CONFIG="--disable-linux-user --cc=clang-3.9 --cxx=clang++-3.9"
+      before_install:
+        - wget -nv -O - http://llvm.org/apt/llvm-snapshot.gpg.key | sudo apt-key add -
+        - sudo apt-add-repository -y 'deb http://llvm.org/apt/trusty llvm-toolchain-trusty-3.9 main'
+        - sudo apt-get update -qq
+        - sudo apt-get install -qq -y clang-3.9
+        - sudo apt-get build-dep -qq qemu
+        - wget -O - http://people.linaro.org/~alex.bennee/qemu-submodule-git-seed.tar.xz | tar -xvJ
+        - git submodule update --init --recursive
+      before_script:
+        - ./configure ${CONFIG} || cat config.log
+    # Trusty Linux User build with latest stable clang
+    - sudo: required
+      addons:
+      dist: trusty
+      language: generic
+      compiler: none
+      env:
+        - COMPILER_NAME=clang CXX=clang++-3.9 CC=clang-3.9
+        - CONFIG="--disable-system --cc=clang-3.9 --cxx=clang++-3.9"
      before_install:
        - wget -nv -O - http://llvm.org/apt/llvm-snapshot.gpg.key | sudo apt-key add -
        - sudo apt-add-repository -y 'deb http://llvm.org/apt/trusty llvm-toolchain-trusty-3.9 main'
--- a/7
+++ b/7
@@ -116,3 +116,10 @@ if (a == 1) {
 Rationale: Yoda conditions (as in 'if (1 == a)') are awkward to read.
 Besides, good compilers already warn users when '==' is mis-typed as '=',
 even when the constant is on the right.
+
+7. Comment style
+
+We use traditional C-style /* */ comments and avoid // comments.
+
+Rationale: The // form is valid in C99, so this is purely a matter of
+consistency of style. The checkpatch script will warn you about this.
--- a/87
+++ b/87
@@ -323,7 +323,7 @@ Guest CPU Cores (Xen):
 X86
 M: Stefano Stabellini <sstabellini@kernel.org>
 M: Anthony Perard <anthony.perard@citrix.com>
-L: xen-devel@lists.xensource.com
+L: xen-devel@lists.xenproject.org
 S: Supported
 F: xen-*
 F: */xen*
@@ -561,20 +561,19 @@ F: hw/lm32/milkymist.c
 M68K Machines
 -------------
 an5206
-S: Orphan
+M: Thomas Huth <huth@tuxfamily.org>
+S: Odd Fixes
 F: hw/m68k/an5206.c
 F: hw/m68k/mcf5206.c

-dummy_m68k
-S: Orphan
-F: hw/m68k/dummy_m68k.c
-
 mcf5208
-S: Orphan
+M: Thomas Huth <huth@tuxfamily.org>
+S: Odd Fixes
 F: hw/m68k/mcf5208.c
 F: hw/m68k/mcf_intc.c
 F: hw/char/mcf_uart.c
 F: hw/net/mcf_fec.c
+F: include/hw/m68k/mcf*.h

 MicroBlaze Machines
 -------------------
@@ -601,15 +600,28 @@ S: Maintained
 F: hw/mips/mips_malta.c

 Mipssim
-L: qemu-devel@nongnu.org
-S: Orphan
+M: Yongbok Kim <yongbok.kim@imgtec.com>
+S: Odd Fixes
 F: hw/mips/mips_mipssim.c
+F: hw/net/mipsnet.c

 R4000
 M: Aurelien Jarno <aurelien@aurel32.net>
 S: Maintained
 F: hw/mips/mips_r4k.c

+Fulong 2E
+M: Yongbok Kim <yongbok.kim@imgtec.com>
+S: Odd Fixes
+F: hw/mips/mips_fulong2e.c
+
+Boston
+M: Paul Burton <paul.burton@imgtec.com>
+S: Maintained
+F: hw/core/loader-fit.c
+F: hw/mips/boston.c
+F: hw/pci-host/xilinx-pcie.c
+
 OpenRISC Machines
 -----------------
 or1k-sim
@@ -671,10 +683,13 @@ F: hw/misc/macio/
 F: hw/intc/heathrow_pic.c

 PReP
+M: Hervé Poussineau <hpoussin@reactos.org>
 L: qemu-devel@nongnu.org
 L: qemu-ppc@nongnu.org
-S: Odd Fixes
+S: Maintained
 F: hw/ppc/prep.c
+F: hw/ppc/prep_systemio.c
+F: hw/ppc/rs6000_mc.c
 F: hw/pci-host/prep.[hc]
 F: hw/isa/pc87312.[hc]
 F: pc-bios/ppc_rom.bin
@@ -906,6 +921,8 @@ F: hw/acpi/*
 F: hw/smbios/*
 F: hw/i386/acpi-build.[hc]
 F: hw/arm/virt-acpi-build.c
+F: tests/bios-tables-test.c
+F: tests/acpi-utils.[hc]

 ppc4xx
 M: Alexander Graf <agraf@suse.de>
@@ -1031,7 +1048,7 @@ F: hw/input/virtio-input*.c
 F: include/hw/virtio/virtio-input.h

 virtio-serial
-M: Amit Shah <amit.shah@redhat.com>
+M: Amit Shah <amit@kernel.org>
 S: Supported
 F: hw/char/virtio-serial-bus.c
 F: hw/char/virtio-console.c
@@ -1040,7 +1057,7 @@ F: tests/virtio-console-test.c
 F: tests/virtio-serial-test.c

 virtio-rng
-M: Amit Shah <amit.shah@redhat.com>
+M: Amit Shah <amit@kernel.org>
 S: Supported
 F: hw/virtio/virtio-rng.c
 F: include/hw/virtio/virtio-rng.h
@@ -1120,6 +1137,15 @@ F: hw/nvram/chrp_nvram.c
 F: include/hw/nvram/chrp_nvram.h
 F: tests/prom-env-test.c

+VM Generation ID
+M: Ben Warren <ben@skyportsystems.com>
+S: Maintained
+F: hw/acpi/vmgenid.c
+F: include/hw/acpi/vmgenid.h
+F: docs/specs/vmgenid.txt
+F: tests/vmgenid-test.c
+F: stubs/vmgenid.c
+
 Subsystems
 ----------
 Audio
@@ -1194,8 +1220,9 @@ T: git git://github.com/jnsnow/qemu.git bitmaps

 Character device backends
 M: Paolo Bonzini <pbonzini@redhat.com>
+M: Marc-André Lureau <marcandre.lureau@redhat.com>
 S: Maintained
-F: qemu-char.c
+F: chardev/
 F: backends/msmouse.c
 F: backends/testdev.c

@@ -1204,6 +1231,15 @@ M: Samuel Thibault <samuel.thibault@ens-lyon.org>
 S: Maintained
 F: backends/baum.c

+Command line option argument parsing
+M: Markus Armbruster <armbru@redhat.com>
+S: Supported
+F: include/qemu/option.h
+F: tests/test-keyval.c
+F: tests/test-qemu-opts.c
+F: util/keyval.c
+F: util/qemu-option.c
+
 Coverity model
 M: Markus Armbruster <armbru@redhat.com>
 S: Supported
@@ -1338,7 +1374,9 @@ X: include/qapi/qmp/
 F: include/qapi/qmp/dispatch.h
 F: tests/qapi-schema/
 F: tests/test-*-visitor.c
+F: tests/test-qapi-*.c
 F: tests/test-qmp-*.c
+F: tests/test-visitor-serialization.c
 F: scripts/qapi*
 F: docs/qapi*
 T: git git://repo.or.cz/qemu/armbru.git qapi-next
@@ -1390,6 +1428,7 @@ F: qmp.c
 F: monitor.c
 F: docs/*qmp-*
 F: scripts/qmp/
+F: tests/qmp-test.c
 T: git git://repo.or.cz/qemu/armbru.git qapi-next

 Register API
@@ -1427,7 +1466,6 @@ F: scripts/checkpatch.pl

 Migration
 M: Juan Quintela <quintela@redhat.com>
-M: Amit Shah <amit.shah@redhat.com>
 M: Dr. David Alan Gilbert <dgilbert@redhat.com>
 S: Maintained
 F: include/migration/
@@ -1660,14 +1698,6 @@ S: Supported
 F: block/ssh.c
 T: git git://github.com/codyprime/qemu-kvm-jtc.git block

-ARCHIPELAGO
-M: Chrysostomos Nanakos <chris@include.gr>
-M: Jeff Cody <jcody@redhat.com>
-L: qemu-block@nongnu.org
-S: Maintained
-F: block/archipelago.c
-T: git git://github.com/codyprime/qemu-kvm-jtc.git block
-
 CURL
 M: Jeff Cody <jcody@redhat.com>
 L: qemu-block@nongnu.org
@@ -1798,9 +1828,14 @@ F: docs/block-replication.txt
 Build and test automation
 -------------------------
 M: Alex Bennée <alex.bennee@linaro.org>
+M: Fam Zheng <famz@redhat.com>
 L: qemu-devel@nongnu.org
-S: Supported
+S: Maintained
 F: .travis.yml
+F: .shippable.yml
+F: tests/docker/
+W: https://travis-ci.org/qemu/qemu
+W: http://patchew.org/QEMU/

 Documentation
 -------------
@@ -1809,9 +1844,3 @@ M: Daniel P. Berrange <berrange@redhat.com>
 S: Odd Fixes
 F: docs/build-system.txt

-Docker testing
--------------
-Docker based testing framework and cases
-M: Fam Zheng <famz@redhat.com>
-S: Maintained
-F: tests/docker/
--- a/224
+++ b/224
@@ -26,6 +26,7 @@ endif

 CONFIG_SOFTMMU := $(if $(filter %-softmmu,$(TARGET_DIRS)),y)
 CONFIG_USER_ONLY := $(if $(filter %-user,$(TARGET_DIRS)),y)
+CONFIG_XEN := $(CONFIG_XEN_BACKEND)
 CONFIG_ALL=y
 -include config-all-devices.mak
 -include config-all-disas.mak
@@ -50,30 +51,145 @@ endif

 include $(SRC_PATH)/rules.mak

-GENERATED_HEADERS = qemu-version.h config-host.h qemu-options.def
-GENERATED_HEADERS += qmp-commands.h qapi-types.h qapi-visit.h qapi-event.h
-GENERATED_SOURCES += qmp-marshal.c qapi-types.c qapi-visit.c qapi-event.c
-GENERATED_HEADERS += qmp-introspect.h
-GENERATED_SOURCES += qmp-introspect.c
+GENERATED_FILES = qemu-version.h config-host.h qemu-options.def
+GENERATED_FILES += qmp-commands.h qapi-types.h qapi-visit.h qapi-event.h
+GENERATED_FILES += qmp-marshal.c qapi-types.c qapi-visit.c qapi-event.c
+GENERATED_FILES += qmp-introspect.h
+GENERATED_FILES += qmp-introspect.c

-GENERATED_HEADERS += trace/generated-tracers.h
-ifeq ($(findstring dtrace,$(TRACE_BACKENDS)),dtrace)
-GENERATED_HEADERS += trace/generated-tracers-dtrace.h
-endif
-GENERATED_SOURCES += trace/generated-tracers.c
+GENERATED_FILES += trace/generated-tcg-tracers.h

-GENERATED_HEADERS += trace/generated-tcg-tracers.h
+GENERATED_FILES += trace/generated-helpers-wrappers.h
+GENERATED_FILES += trace/generated-helpers.h
+GENERATED_FILES += trace/generated-helpers.c

-GENERATED_HEADERS += trace/generated-helpers-wrappers.h
-GENERATED_HEADERS += trace/generated-helpers.h
-GENERATED_SOURCES += trace/generated-helpers.c
-
-ifeq ($(findstring ust,$(TRACE_BACKENDS)),ust)
-GENERATED_HEADERS += trace/generated-ust-provider.h
-GENERATED_SOURCES += trace/generated-ust.c
+ifdef CONFIG_TRACE_UST
+GENERATED_FILES += trace-ust-all.h
+GENERATED_FILES += trace-ust-all.c
 endif

-GENERATED_HEADERS += module_block.h
+GENERATED_FILES += module_block.h
+
+TRACE_HEADERS = trace-root.h $(trace-events-subdirs:%=%/trace.h)
+TRACE_SOURCES = trace-root.c $(trace-events-subdirs:%=%/trace.c)
+TRACE_DTRACE =
+ifdef CONFIG_TRACE_DTRACE
+TRACE_HEADERS += trace-dtrace-root.h $(trace-events-subdirs:%=%/trace-dtrace.h)
+TRACE_DTRACE += trace-dtrace-root.dtrace $(trace-events-subdirs:%=%/trace-dtrace.dtrace)
+endif
+ifdef CONFIG_TRACE_UST
+TRACE_HEADERS += trace-ust-root.h $(trace-events-subdirs:%=%/trace-ust.h)
+endif
+
+GENERATED_FILES += $(TRACE_HEADERS)
+GENERATED_FILES += $(TRACE_SOURCES)
+GENERATED_FILES += $(BUILD_DIR)/trace-events-all
+
+trace-group-name = $(shell dirname $1 | sed -e 's/[^a-zA-Z0-9]/_/g')
+
+tracetool-y = $(SRC_PATH)/scripts/tracetool.py
+tracetool-y += $(shell find $(SRC_PATH)/scripts/tracetool -name "*.py")
+
+%/trace.h: %/trace.h-timestamp
+	@cmp $< $@ >/dev/null 2>&1 || cp $< $@
+%/trace.h-timestamp: $(SRC_PATH)/%/trace-events $(tracetool-y)
+	$(call quiet-command,$(TRACETOOL) \
+		--group=$(call trace-group-name,$@) \
+		--format=h \
+		--backends=$(TRACE_BACKENDS) \
+		$< > $@,"GEN","$(@:%-timestamp=%)")
+
+%/trace.c: %/trace.c-timestamp
+	@cmp $< $@ >/dev/null 2>&1 || cp $< $@
+%/trace.c-timestamp: $(SRC_PATH)/%/trace-events $(tracetool-y)
+	$(call quiet-command,$(TRACETOOL) \
+		--group=$(call trace-group-name,$@) \
+		--format=c \
+		--backends=$(TRACE_BACKENDS) \
+		$< > $@,"GEN","$(@:%-timestamp=%)")
+
+%/trace-ust.h: %/trace-ust.h-timestamp
+	@cmp $< $@ >/dev/null 2>&1 || cp $< $@
+%/trace-ust.h-timestamp: $(SRC_PATH)/%/trace-events $(tracetool-y)
+	$(call quiet-command,$(TRACETOOL) \
+		--group=$(call trace-group-name,$@) \
+		--format=ust-events-h \
+		--backends=$(TRACE_BACKENDS) \
+		$< > $@,"GEN","$(@:%-timestamp=%)")
+
+%/trace-dtrace.dtrace: %/trace-dtrace.dtrace-timestamp
+	@cmp $< $@ >/dev/null 2>&1 || cp $< $@
+%/trace-dtrace.dtrace-timestamp: $(SRC_PATH)/%/trace-events $(BUILD_DIR)/config-host.mak $(tracetool-y)
+	$(call quiet-command,$(TRACETOOL) \
+		--group=$(call trace-group-name,$@) \
+		--format=d \
+		--backends=$(TRACE_BACKENDS) \
+		$< > $@,"GEN","$(@:%-timestamp=%)")
+
+%/trace-dtrace.h: %/trace-dtrace.dtrace $(tracetool-y)
+	$(call quiet-command,dtrace -o $@ -h -s $<, "GEN","$@")
+
+%/trace-dtrace.o: %/trace-dtrace.dtrace $(tracetool-y)
+
+
+trace-root.h: trace-root.h-timestamp
+	@cmp $< $@ >/dev/null 2>&1 || cp $< $@
+trace-root.h-timestamp: $(SRC_PATH)/trace-events $(tracetool-y)
+	$(call quiet-command,$(TRACETOOL) \
+		--group=root \
+		--format=h \
+		--backends=$(TRACE_BACKENDS) \
+		$< > $@,"GEN","$(@:%-timestamp=%)")
+
+trace-root.c: trace-root.c-timestamp
+	@cmp $< $@ >/dev/null 2>&1 || cp $< $@
+trace-root.c-timestamp: $(SRC_PATH)/trace-events $(tracetool-y)
+	$(call quiet-command,$(TRACETOOL) \
+		--group=root \
+		--format=c \
+		--backends=$(TRACE_BACKENDS) \
+		$< > $@,"GEN","$(@:%-timestamp=%)")
+
+trace-ust-root.h: trace-ust-root.h-timestamp
+	@cmp $< $@ >/dev/null 2>&1 || cp $< $@
+trace-ust-root.h-timestamp: $(SRC_PATH)/trace-events $(tracetool-y)
+	$(call quiet-command,$(TRACETOOL) \
+		--group=root \
+		--format=ust-events-h \
+		--backends=$(TRACE_BACKENDS) \
+		$< > $@,"GEN","$(@:%-timestamp=%)")
+
+trace-ust-all.h: trace-ust-all.h-timestamp
+	@cmp $< $@ >/dev/null 2>&1 || cp $< $@
+trace-ust-all.h-timestamp: $(trace-events-files) $(tracetool-y)
+	$(call quiet-command,$(TRACETOOL) \
+		--group=all \
+		--format=ust-events-h \
+		--backends=$(TRACE_BACKENDS) \
+		$(trace-events-files) > $@,"GEN","$(@:%-timestamp=%)")
+
+trace-ust-all.c: trace-ust-all.c-timestamp
+	@cmp $< $@ >/dev/null 2>&1 || cp $< $@
+trace-ust-all.c-timestamp: $(trace-events-files) $(tracetool-y)
+	$(call quiet-command,$(TRACETOOL) \
+		--group=all \
+		--format=ust-events-c \
+		--backends=$(TRACE_BACKENDS) \
+		$(trace-events-files) > $@,"GEN","$(@:%-timestamp=%)")
+
+trace-dtrace-root.dtrace: trace-dtrace-root.dtrace-timestamp
+	@cmp $< $@ >/dev/null 2>&1 || cp $< $@
+trace-dtrace-root.dtrace-timestamp: $(SRC_PATH)/trace-events $(BUILD_DIR)/config-host.mak $(tracetool-y)
+	$(call quiet-command,$(TRACETOOL) \
+		--group=root \
+		--format=d \
+		--backends=$(TRACE_BACKENDS) \
+		$< > $@,"GEN","$(@:%-timestamp=%)")
+
+trace-dtrace-root.h: trace-dtrace-root.dtrace
+	$(call quiet-command,dtrace -o $@ -h -s $<, "GEN","$@")
+
+trace-dtrace-root.o: trace-dtrace-root.dtrace

 # Don't try to regenerate Makefile or configure
 # We don't generate any of them
@@ -147,6 +263,7 @@ endif

 dummy := $(call unnest-vars,, \
                stub-obj-y \
+                chardev-obj-y \
                util-obj-y \
                qga-obj-y \
                ivshmem-client-obj-y \
@@ -160,7 +277,8 @@ dummy := $(call unnest-vars,, \
                qom-obj-y \
                io-obj-y \
                common-obj-y \
-                common-obj-m)
+                common-obj-m \
+                trace-obj-y)

 ifneq ($(wildcard config-host.mak),)
 include $(SRC_PATH)/tests/Makefile.include
@@ -186,7 +304,11 @@ qemu-version.h: FORCE
 				printf '""\n'; \
 			fi; \
 		fi) > $@.tmp)
-	$(call quiet-command, cmp -s $@ $@.tmp || mv $@.tmp $@)
+	$(call quiet-command, if ! cmp -s $@ $@.tmp; then \
+	  mv $@.tmp $@; \
+	 else \
+	  rm $@.tmp; \
+	 fi)

 config-host.h: config-host.h-timestamp
 config-host.h-timestamp: config-host.mak
@@ -223,7 +345,8 @@ subdir-dtc:dtc/libfdt dtc/tests
 dtc/%:
 	mkdir -p $@

-$(SUBDIR_RULES): libqemuutil.a libqemustub.a $(common-obj-y) $(qom-obj-y) $(crypto-aes-obj-$(CONFIG_USER_ONLY))
+$(SUBDIR_RULES): libqemuutil.a libqemustub.a $(common-obj-y) $(chardev-obj-y) \
+	$(qom-obj-y) $(crypto-aes-obj-$(CONFIG_USER_ONLY)) $(trace-obj-y)

 ROMSUBDIR_RULES=$(patsubst %,romsubdir-%, $(ROMS))
 # Only keep -O and -g cflags
@@ -247,15 +370,17 @@ libqemuutil.a: $(util-obj-y)

 ######################################################################

+COMMON_LDADDS = $(trace-obj-y) libqemuutil.a libqemustub.a
+
 qemu-img.o: qemu-img-cmds.h

-qemu-img$(EXESUF): qemu-img.o $(block-obj-y) $(crypto-obj-y) $(io-obj-y) $(qom-obj-y) libqemuutil.a libqemustub.a
-qemu-nbd$(EXESUF): qemu-nbd.o $(block-obj-y) $(crypto-obj-y) $(io-obj-y) $(qom-obj-y) libqemuutil.a libqemustub.a
-qemu-io$(EXESUF): qemu-io.o $(block-obj-y) $(crypto-obj-y) $(io-obj-y) $(qom-obj-y) libqemuutil.a libqemustub.a
+qemu-img$(EXESUF): qemu-img.o $(block-obj-y) $(crypto-obj-y) $(io-obj-y) $(qom-obj-y) $(COMMON_LDADDS)
+qemu-nbd$(EXESUF): qemu-nbd.o $(block-obj-y) $(crypto-obj-y) $(io-obj-y) $(qom-obj-y) $(COMMON_LDADDS)
+qemu-io$(EXESUF): qemu-io.o $(block-obj-y) $(crypto-obj-y) $(io-obj-y) $(qom-obj-y) $(COMMON_LDADDS)

-qemu-bridge-helper$(EXESUF): qemu-bridge-helper.o libqemuutil.a libqemustub.a
+qemu-bridge-helper$(EXESUF): qemu-bridge-helper.o $(COMMON_LDADDS)

-fsdev/virtfs-proxy-helper$(EXESUF): fsdev/virtfs-proxy-helper.o fsdev/9p-marshal.o fsdev/9p-iov-marshal.o libqemuutil.a libqemustub.a
+fsdev/virtfs-proxy-helper$(EXESUF): fsdev/virtfs-proxy-helper.o fsdev/9p-marshal.o fsdev/9p-iov-marshal.o $(COMMON_LDADDS)
 fsdev/virtfs-proxy-helper$(EXESUF): LIBS += -lcap

 qemu-img-cmds.h: $(SRC_PATH)/qemu-img-cmds.hx $(SRC_PATH)/scripts/hxtool
@@ -267,7 +392,6 @@ qemu-ga$(EXESUF): QEMU_CFLAGS += -I qga/qapi-generated
 gen-out-type = $(subst .,-,$(suffix $@))

 qapi-py = $(SRC_PATH)/scripts/qapi.py $(SRC_PATH)/scripts/ordereddict.py
-qapi-py += $(SRC_PATH)/scripts/qapi2texi.py

 qga/qapi-generated/qga-qapi-types.c qga/qapi-generated/qga-qapi-types.h :\
 $(SRC_PATH)/qga/qapi-schema.json $(SRC_PATH)/scripts/qapi-types.py $(qapi-py)
@@ -320,7 +444,7 @@ $(qapi-modules) $(SRC_PATH)/scripts/qapi-introspect.py $(qapi-py)
 QGALIB_GEN=$(addprefix qga/qapi-generated/, qga-qapi-types.h qga-qapi-visit.h qga-qmp-commands.h)
 $(qga-obj-y) qemu-ga.o: $(QGALIB_GEN)

-qemu-ga$(EXESUF): $(qga-obj-y) libqemuutil.a libqemustub.a
+qemu-ga$(EXESUF): $(qga-obj-y) $(COMMON_LDADDS)
 	$(call LINK, $^)

 ifdef QEMU_GA_MSI_ENABLED
@@ -345,9 +469,9 @@ ifneq ($(EXESUF),)
 qemu-ga: qemu-ga$(EXESUF) $(QGA_VSS_PROVIDER) $(QEMU_GA_MSI)
 endif

-ivshmem-client$(EXESUF): $(ivshmem-client-obj-y) libqemuutil.a libqemustub.a
+ivshmem-client$(EXESUF): $(ivshmem-client-obj-y) $(COMMON_LDADDS)
 	$(call LINK, $^)
-ivshmem-server$(EXESUF): $(ivshmem-server-obj-y) libqemuutil.a libqemustub.a
+ivshmem-server$(EXESUF): $(ivshmem-server-obj-y) $(COMMON_LDADDS)
 	$(call LINK, $^)

 module_block.h: $(SRC_PATH)/scripts/modules/module_block.py config-host.mak
@@ -365,11 +489,10 @@ clean:
 	rm -f fsdev/*.pod
 	rm -f qemu-img-cmds.h
 	rm -f ui/shader/*-vert.h ui/shader/*-frag.h
-	@# May not be present in GENERATED_HEADERS
+	@# May not be present in GENERATED_FILES
 	rm -f trace/generated-tracers-dtrace.dtrace*
 	rm -f trace/generated-tracers-dtrace.h*
-	rm -f $(foreach f,$(GENERATED_HEADERS),$(f) $(f)-timestamp)
-	rm -f $(foreach f,$(GENERATED_SOURCES),$(f) $(f)-timestamp)
+	rm -f $(foreach f,$(GENERATED_FILES),$(f) $(f)-timestamp)
 	rm -rf qapi-generated
 	rm -rf qga/qapi-generated
 	for d in $(ALL_SUBDIRS); do \
@@ -396,7 +519,7 @@ distclean: clean
 	rm -f qemu-doc.vr qemu-doc.txt
 	rm -f config.log
 	rm -f linux-headers/asm
-	rm -f qemu-ga-qapi.texi qemu-qapi.texi
+	rm -f docs/qemu-ga-qapi.texi docs/qemu-qmp-qapi.texi docs/version.texi
 	rm -f docs/qemu-qmp-ref.7 docs/qemu-ga-ref.7
 	rm -f docs/qemu-qmp-ref.txt docs/qemu-ga-ref.txt
 	rm -f docs/qemu-qmp-ref.pdf docs/qemu-ga-ref.pdf
@@ -473,8 +596,7 @@ endif
 endif


-install: all $(if $(BUILD_DOCS),install-doc) \
-install-datadir install-localstatedir
+install: all $(if $(BUILD_DOCS),install-doc) install-datadir install-localstatedir
 ifneq ($(TOOLS),)
 	$(call install-prog,$(subst qemu-ga,qemu-ga$(EXESUF),$(TOOLS)),$(DESTDIR)$(bindir))
 endif
@@ -543,8 +665,11 @@ ui/console-gl.o: $(SRC_PATH)/ui/console-gl.c \

 # documentation
 MAKEINFO=makeinfo
-MAKEINFOFLAGS=--no-split --number-sections -D 'VERSION $(VERSION)'
-TEXIFLAG=$(if $(V),,--quiet) --command='@set VERSION $(VERSION)'
+MAKEINFOFLAGS=--no-split --number-sections -I docs
+TEXIFLAG=$(if $(V),,--quiet)
+
+docs/version.texi: $(SRC_PATH)/VERSION
+	$(call quiet-command,echo "@set VERSION $(VERSION)" > $@,"GEN","$@")

 %.html: %.texi
 	$(call quiet-command,LC_ALL=C $(MAKEINFO) $(MAKEINFOFLAGS) --no-headers \
@@ -558,7 +683,10 @@ TEXIFLAG=$(if $(V),,--quiet) --command='@set VERSION $(VERSION)'
 	--plaintext $< -o $@,"GEN","$@")

 %.pdf: %.texi
-	$(call quiet-command,texi2pdf $(TEXIFLAG) -I $(SRC_PATH) -I . $< -o $@,"GEN","$@")
+	$(call quiet-command,texi2pdf $(TEXIFLAG) -I $(SRC_PATH) -I docs $< -o $@,"GEN","$@")
+
+docs/qemu-ga-ref.html docs/qemu-ga-ref.info docs/qemu-ga-ref.txt docs/qemu-ga-ref.pdf docs/qemu-ga-ref.7.pod: docs/version.texi
+docs/qemu-qmp-ref.html docs/qemu-qmp-ref.info docs/qemu-qmp-ref.txt docs/qemu-qmp-ref.pdf docs/qemu-qmp-ref.pod: docs/version.texi

 qemu-options.texi: $(SRC_PATH)/qemu-options.hx $(SRC_PATH)/scripts/hxtool
 	$(call quiet-command,sh $(SRC_PATH)/scripts/hxtool -t < $< > $@,"GEN","$@")
@@ -572,10 +700,12 @@ qemu-monitor-info.texi: $(SRC_PATH)/hmp-commands-info.hx $(SRC_PATH)/scripts/hxt
 qemu-img-cmds.texi: $(SRC_PATH)/qemu-img-cmds.hx $(SRC_PATH)/scripts/hxtool
 	$(call quiet-command,sh $(SRC_PATH)/scripts/hxtool -t < $< > $@,"GEN","$@")

-qemu-qapi.texi: $(qapi-modules) $(qapi-py)
+docs/qemu-qmp-qapi.texi docs/qemu-ga-qapi.texi: $(SRC_PATH)/scripts/qapi2texi.py $(qapi-py)
+
+docs/qemu-qmp-qapi.texi: $(qapi-modules)
 	$(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/qapi2texi.py $< > $@,"GEN","$@")

-qemu-ga-qapi.texi: $(SRC_PATH)/qga/qapi-schema.json $(qapi-py)
+docs/qemu-ga-qapi.texi: $(SRC_PATH)/qga/qapi-schema.json
 	$(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/qapi2texi.py $< > $@,"GEN","$@")

 qemu.1: qemu-doc.texi qemu-options.texi qemu-monitor.texi qemu-monitor-info.texi
@@ -596,10 +726,10 @@ qemu-doc.html qemu-doc.info qemu-doc.pdf qemu-doc.txt: \
 	qemu-monitor-info.texi

 docs/qemu-ga-ref.dvi docs/qemu-ga-ref.html docs/qemu-ga-ref.info docs/qemu-ga-ref.pdf docs/qemu-ga-ref.txt docs/qemu-ga-ref.7: \
-docs/qemu-ga-ref.texi qemu-ga-qapi.texi
+docs/qemu-ga-ref.texi docs/qemu-ga-qapi.texi

 docs/qemu-qmp-ref.dvi docs/qemu-qmp-ref.html docs/qemu-qmp-ref.info docs/qemu-qmp-ref.pdf docs/qemu-qmp-ref.txt docs/qemu-qmp-ref.7: \
-docs/qemu-qmp-ref.texi qemu-qapi.texi
+docs/qemu-qmp-ref.texi docs/qemu-qmp-qapi.texi


 ifdef CONFIG_WIN32
@@ -661,9 +791,13 @@ endif # CONFIG_WIN
 # Add a dependency on the generated files, so that they are always
 # rebuilt before other object files
 ifneq ($(filter-out $(UNCHECKED_GOALS),$(MAKECMDGOALS)),$(if $(MAKECMDGOALS),,fail))
-Makefile: $(GENERATED_HEADERS)
+Makefile: $(GENERATED_FILES)
 endif

+.SECONDARY: $(TRACE_HEADERS) $(TRACE_HEADERS:%=%-timestamp) \
+	$(TRACE_SOURCES) $(TRACE_SOURCES:%=%-timestamp) \
+	$(TRACE_DTRACE) $(TRACE_DTRACE:%=%-timestamp)
+
 # Include automatically generated dependency files
 # Dependencies in Makefile.objs files come from our recursive subdir rules
 -include $(wildcard *.d tests/*.d)
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -4,15 +4,13 @@ stub-obj-y = stubs/ crypto/
 util-obj-y = util/ qobject/ qapi/
 util-obj-y += qmp-introspect.o qapi-types.o qapi-visit.o qapi-event.o

+chardev-obj-y = chardev/
+
 #######################################################################
 # block-obj-y is code used by both qemu system emulation and qemu-img

-block-obj-y = async.o thread-pool.o
 block-obj-y += nbd/
 block-obj-y += block.o blockjob.o
-block-obj-y += main-loop.o iohandler.o qemu-timer.o
-block-obj-$(CONFIG_POSIX) += aio-posix.o
-block-obj-$(CONFIG_WIN32) += aio-win32.o
 block-obj-y += block/
 block-obj-y += qemu-io-cmds.o
 block-obj-$(CONFIG_REPLICATION) += replication.o
@@ -51,8 +49,7 @@ common-obj-$(CONFIG_POSIX) += os-posix.o
 common-obj-$(CONFIG_LINUX) += fsdev/

 common-obj-y += migration/
-common-obj-y += qemu-char.o #aio.o
-common-obj-y += page_cache.o
+common-obj-y += page_cache.o #aio.o

 common-obj-$(CONFIG_SPICE) += spice-qemu-char.o

@@ -118,47 +115,60 @@ ivshmem-server-obj-y = contrib/ivshmem-server/
 libvhost-user-obj-y = contrib/libvhost-user/

 ######################################################################
-trace-events-y = trace-events
-trace-events-y += util/trace-events
-trace-events-y += crypto/trace-events
-trace-events-y += io/trace-events
-trace-events-y += migration/trace-events
-trace-events-y += block/trace-events
-trace-events-y += hw/block/trace-events
-trace-events-y += hw/char/trace-events
-trace-events-y += hw/intc/trace-events
-trace-events-y += hw/net/trace-events
-trace-events-y += hw/virtio/trace-events
-trace-events-y += hw/audio/trace-events
-trace-events-y += hw/misc/trace-events
-trace-events-y += hw/usb/trace-events
-trace-events-y += hw/scsi/trace-events
-trace-events-y += hw/nvram/trace-events
-trace-events-y += hw/display/trace-events
-trace-events-y += hw/input/trace-events
-trace-events-y += hw/timer/trace-events
-trace-events-y += hw/dma/trace-events
-trace-events-y += hw/sparc/trace-events
-trace-events-y += hw/sd/trace-events
-trace-events-y += hw/isa/trace-events
-trace-events-y += hw/mem/trace-events
-trace-events-y += hw/i386/trace-events
-trace-events-y += hw/9pfs/trace-events
-trace-events-y += hw/ppc/trace-events
-trace-events-y += hw/pci/trace-events
-trace-events-y += hw/s390x/trace-events
-trace-events-y += hw/vfio/trace-events
-trace-events-y += hw/acpi/trace-events
-trace-events-y += hw/arm/trace-events
-trace-events-y += hw/alpha/trace-events
-trace-events-y += ui/trace-events
-trace-events-y += audio/trace-events
-trace-events-y += net/trace-events
-trace-events-y += target/arm/trace-events
-trace-events-y += target/i386/trace-events
-trace-events-y += target/sparc/trace-events
-trace-events-y += target/s390x/trace-events
-trace-events-y += target/ppc/trace-events
-trace-events-y += qom/trace-events
-trace-events-y += linux-user/trace-events
-trace-events-y += qapi/trace-events
+trace-events-subdirs =
+trace-events-subdirs += util
+trace-events-subdirs += crypto
+trace-events-subdirs += io
+trace-events-subdirs += migration
+trace-events-subdirs += block
+trace-events-subdirs += backends
+trace-events-subdirs += hw/block
+trace-events-subdirs += hw/block/dataplane
+trace-events-subdirs += hw/char
+trace-events-subdirs += hw/intc
+trace-events-subdirs += hw/net
+trace-events-subdirs += hw/virtio
+trace-events-subdirs += hw/audio
+trace-events-subdirs += hw/misc
+trace-events-subdirs += hw/usb
+trace-events-subdirs += hw/scsi
+trace-events-subdirs += hw/nvram
+trace-events-subdirs += hw/display
+trace-events-subdirs += hw/input
+trace-events-subdirs += hw/timer
+trace-events-subdirs += hw/dma
+trace-events-subdirs += hw/sparc
+trace-events-subdirs += hw/sd
+trace-events-subdirs += hw/isa
+trace-events-subdirs += hw/mem
+trace-events-subdirs += hw/i386
+trace-events-subdirs += hw/i386/xen
+trace-events-subdirs += hw/9pfs
+trace-events-subdirs += hw/ppc
+trace-events-subdirs += hw/pci
+trace-events-subdirs += hw/s390x
+trace-events-subdirs += hw/vfio
+trace-events-subdirs += hw/acpi
+trace-events-subdirs += hw/arm
+trace-events-subdirs += hw/alpha
+trace-events-subdirs += hw/xen
+trace-events-subdirs += ui
+trace-events-subdirs += audio
+trace-events-subdirs += net
+trace-events-subdirs += target/arm
+trace-events-subdirs += target/i386
+trace-events-subdirs += target/mips
+trace-events-subdirs += target/sparc
+trace-events-subdirs += target/s390x
+trace-events-subdirs += target/ppc
+trace-events-subdirs += qom
+trace-events-subdirs += linux-user
+trace-events-subdirs += qapi
+
+trace-events-files = $(SRC_PATH)/trace-events $(trace-events-subdirs:%=$(SRC_PATH)/%/trace-events)
+
+trace-obj-y = trace-root.o
+trace-obj-y += $(trace-events-subdirs:%=%/trace.o)
+trace-obj-$(CONFIG_TRACE_UST) += trace-ust-all.o
+trace-obj-$(CONFIG_TRACE_DTRACE) += trace-dtrace-root.o
+trace-obj-$(CONFIG_TRACE_DTRACE) += $(trace-events-subdirs:%=%/trace-dtrace.o)
--- a/Makefile.target
+++ b/Makefile.target
@@ -50,6 +50,7 @@ endif

 $(QEMU_PROG).stp-installed: $(BUILD_DIR)/trace-events-all
 	$(call quiet-command,$(TRACETOOL) \
+		--group=all \
 		--format=stap \
 		--backends=$(TRACE_BACKENDS) \
 		--binary=$(bindir)/$(QEMU_PROG) \
@@ -59,6 +60,7 @@ $(QEMU_PROG).stp-installed: $(BUILD_DIR)/trace-events-all

 $(QEMU_PROG).stp: $(BUILD_DIR)/trace-events-all
 	$(call quiet-command,$(TRACETOOL) \
+		--group=all \
 		--format=stap \
 		--backends=$(TRACE_BACKENDS) \
 		--binary=$(realpath .)/$(QEMU_PROG) \
@@ -68,6 +70,7 @@ $(QEMU_PROG).stp: $(BUILD_DIR)/trace-events-all

 $(QEMU_PROG)-simpletrace.stp: $(BUILD_DIR)/trace-events-all
 	$(call quiet-command,$(TRACETOOL) \
+		--group=all \
 		--format=simpletrace-stap \
 		--backends=$(TRACE_BACKENDS) \
 		--probe-prefix=qemu.$(TARGET_TYPE).$(TARGET_NAME) \
@@ -159,7 +162,7 @@ else
 obj-y += hw/$(TARGET_BASE_ARCH)/
 endif

-GENERATED_HEADERS += hmp-commands.h hmp-commands-info.h
+GENERATED_FILES += hmp-commands.h hmp-commands-info.h

 endif # CONFIG_SOFTMMU

@@ -172,31 +175,36 @@ all-obj-y := $(obj-y)
 target-obj-y :=
 block-obj-y :=
 common-obj-y :=
+chardev-obj-y :=
 include $(SRC_PATH)/Makefile.objs
 dummy := $(call unnest-vars,,target-obj-y)
 target-obj-y-save := $(target-obj-y)
 dummy := $(call unnest-vars,.., \
               block-obj-y \
               block-obj-m \
+               chardev-obj-y \
               crypto-obj-y \
               crypto-aes-obj-y \
               qom-obj-y \
               io-obj-y \
               common-obj-y \
-               common-obj-m)
+               common-obj-m \
+               trace-obj-y)
 target-obj-y := $(target-obj-y-save)
 all-obj-y += $(common-obj-y)
 all-obj-y += $(target-obj-y)
 all-obj-y += $(qom-obj-y)
-all-obj-$(CONFIG_SOFTMMU) += $(block-obj-y)
+all-obj-$(CONFIG_SOFTMMU) += $(block-obj-y) $(chardev-obj-y)
 all-obj-$(CONFIG_USER_ONLY) += $(crypto-aes-obj-y)
 all-obj-$(CONFIG_SOFTMMU) += $(crypto-obj-y)
 all-obj-$(CONFIG_SOFTMMU) += $(io-obj-y)

 $(QEMU_PROG_BUILD): config-devices.mak

+COMMON_LDADDS = $(trace-obj-y) ../libqemuutil.a ../libqemustub.a
+
 # build either PROG or PROGW
-$(QEMU_PROG_BUILD): $(all-obj-y) ../libqemuutil.a ../libqemustub.a
+$(QEMU_PROG_BUILD): $(all-obj-y) $(COMMON_LDADDS)
 	$(call LINK, $(filter-out %.mak, $^))
 ifdef CONFIG_DARWIN
 	$(call quiet-command,Rez -append $(SRC_PATH)/pc-bios/qemu.rsrc -o $@,"REZ","$(TARGET_DIR)$@")
@@ -230,5 +238,5 @@ ifdef CONFIG_TRACE_SYSTEMTAP
 	$(INSTALL_DATA) $(QEMU_PROG)-simpletrace.stp "$(DESTDIR)$(qemu_datadir)/../systemtap/tapset/$(QEMU_PROG)-simpletrace.stp"
 endif

-GENERATED_HEADERS += config-target.h
-Makefile: $(GENERATED_HEADERS)
+GENERATED_FILES += config-target.h
+Makefile: $(GENERATED_FILES)
--- a/2
+++ b/2
@@ -1 +1 @@
-2.8.50
+2.8.93
--- a/audio/audio.c
+++ b/audio/audio.c
@@ -28,6 +28,7 @@
 #include "qemu/timer.h"
 #include "sysemu/sysemu.h"
 #include "qemu/cutils.h"
+#include "sysemu/replay.h"

 #define AUDIO_CAP "audio"
 #include "audio_int.h"
@@ -1112,7 +1113,7 @@ static int audio_is_timer_needed (void)
 static void audio_reset_timer (AudioState *s)
 {
    if (audio_is_timer_needed ()) {
-        timer_mod (s->ts,
+        timer_mod_anticipate_ns(s->ts,
            qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + conf.period.ticks);
    }
    else {
@@ -1387,6 +1388,7 @@ static void audio_run_out (AudioState *s)

        prev_rpos = hw->rpos;
        played = hw->pcm_ops->run_out (hw, live);
+        replay_audio_out(&played);
        if (audio_bug (AUDIO_FUNC, hw->rpos >= hw->samples)) {
            dolog ("hw->rpos=%d hw->samples=%d played=%d\n",
                   hw->rpos, hw->samples, played);
@@ -1450,9 +1452,12 @@ static void audio_run_in (AudioState *s)

    while ((hw = audio_pcm_hw_find_any_enabled_in (hw))) {
        SWVoiceIn *sw;
-        int captured, min;
+        int captured = 0, min;

-        captured = hw->pcm_ops->run_in (hw);
+        if (replay_mode != REPLAY_MODE_PLAY) {
+            captured = hw->pcm_ops->run_in(hw);
+        }
+        replay_audio_in(&captured, hw->conv_buf, &hw->wpos, hw->samples);

        min = audio_pcm_hw_find_min_in (hw);
        hw->total_samples_captured += captured - min;
--- a/audio/audio.h
+++ b/audio/audio.h
@@ -166,4 +166,9 @@ int wav_start_capture (CaptureState *s, const char *path, int freq,
 bool audio_is_cleaning_up(void);
 void audio_cleanup(void);

+void audio_sample_to_uint64(void *samples, int pos,
+                            uint64_t *left, uint64_t *right);
+void audio_sample_from_uint64(void *samples, int pos,
+                            uint64_t left, uint64_t right);
+
 #endif /* QEMU_AUDIO_H */
--- a/audio/mixeng.c
+++ b/audio/mixeng.c
@@ -25,6 +25,7 @@
 #include "qemu/osdep.h"
 #include "qemu-common.h"
 #include "qemu/bswap.h"
+#include "qemu/error-report.h"
 #include "audio.h"

 #define AUDIO_CAP "mixeng"
@@ -267,6 +268,37 @@ f_sample *mixeng_clip[2][2][2][3] = {
    }
 };

+
+void audio_sample_to_uint64(void *samples, int pos,
+                            uint64_t *left, uint64_t *right)
+{
+    struct st_sample *sample = samples;
+    sample += pos;
+#ifdef FLOAT_MIXENG
+    error_report(
+        "Coreaudio and floating point samples are not supported by replay yet");
+    abort();
+#else
+    *left = sample->l;
+    *right = sample->r;
+#endif
+}
+
+void audio_sample_from_uint64(void *samples, int pos,
+                            uint64_t left, uint64_t right)
+{
+    struct st_sample *sample = samples;
+    sample += pos;
+#ifdef FLOAT_MIXENG
+    error_report(
+        "Coreaudio and floating point samples are not supported by replay yet");
+    abort();
+#else
+    sample->l = left;
+    sample->r = right;
+#endif
+}
+
 /*
 * August 21, 1998
 * Copyright 1998 Fabrice Bellard.
--- a/audio/sdlaudio.c
+++ b/audio/sdlaudio.c
@@ -38,10 +38,14 @@
 #define AUDIO_CAP "sdl"
 #include "audio_int.h"

+#define USE_SEMAPHORE (SDL_MAJOR_VERSION < 2)
+
 typedef struct SDLVoiceOut {
    HWVoiceOut hw;
    int live;
+#if USE_SEMAPHORE
    int rpos;
+#endif
    int decr;
 } SDLVoiceOut;

@@ -53,8 +57,10 @@ static struct {

 static struct SDLAudioState {
    int exit;
+#if USE_SEMAPHORE
    SDL_mutex *mutex;
    SDL_sem *sem;
+#endif
    int initialized;
    bool driver_created;
 } glob_sdl;
@@ -73,31 +79,45 @@ static void GCC_FMT_ATTR (1, 2) sdl_logerr (const char *fmt, ...)

 static int sdl_lock (SDLAudioState *s, const char *forfn)
 {
+#if USE_SEMAPHORE
    if (SDL_LockMutex (s->mutex)) {
        sdl_logerr ("SDL_LockMutex for %s failed\n", forfn);
        return -1;
    }
+#else
+    SDL_LockAudio();
+#endif
+
    return 0;
 }

 static int sdl_unlock (SDLAudioState *s, const char *forfn)
 {
+#if USE_SEMAPHORE
    if (SDL_UnlockMutex (s->mutex)) {
        sdl_logerr ("SDL_UnlockMutex for %s failed\n", forfn);
        return -1;
    }
+#else
+    SDL_UnlockAudio();
+#endif
+
    return 0;
 }

 static int sdl_post (SDLAudioState *s, const char *forfn)
 {
+#if USE_SEMAPHORE
    if (SDL_SemPost (s->sem)) {
        sdl_logerr ("SDL_SemPost for %s failed\n", forfn);
        return -1;
    }
+#endif
+
    return 0;
 }

+#if USE_SEMAPHORE
 static int sdl_wait (SDLAudioState *s, const char *forfn)
 {
    if (SDL_SemWait (s->sem)) {
@@ -106,6 +126,7 @@ static int sdl_wait (SDLAudioState *s, const char *forfn)
    }
    return 0;
 }
+#endif

 static int sdl_unlock_and_post (SDLAudioState *s, const char *forfn)
 {
@@ -246,6 +267,7 @@ static void sdl_callback (void *opaque, Uint8 *buf, int len)
        int to_mix, decr;

        /* dolog ("in callback samples=%d\n", samples); */
+#if USE_SEMAPHORE
        sdl_wait (s, "sdl_callback");
        if (s->exit) {
            return;
@@ -264,6 +286,11 @@ static void sdl_callback (void *opaque, Uint8 *buf, int len)
        if (!sdl->live) {
            goto again;
        }
+#else
+        if (s->exit || !sdl->live) {
+            break;
+        }
+#endif

        /* dolog ("in callback live=%d\n", live); */
        to_mix = audio_MIN (samples, sdl->live);
@@ -274,7 +301,11 @@ static void sdl_callback (void *opaque, Uint8 *buf, int len)

            /* dolog ("in callback to_mix %d, chunk %d\n", to_mix, chunk); */
            hw->clip (buf, src, chunk);
+#if USE_SEMAPHORE
            sdl->rpos = (sdl->rpos + chunk) % hw->samples;
+#else
+            hw->rpos = (hw->rpos + chunk) % hw->samples;
+#endif
            to_mix -= chunk;
            buf += chunk << hw->info.shift;
        }
@@ -282,12 +313,21 @@ static void sdl_callback (void *opaque, Uint8 *buf, int len)
        sdl->live -= decr;
        sdl->decr += decr;

+#if USE_SEMAPHORE
    again:
        if (sdl_unlock (s, "sdl_callback")) {
            return;
        }
+#endif
    }
    /* dolog ("done len=%d\n", len); */
+
+#if (SDL_MAJOR_VERSION >= 2)
+    /* SDL2 does not clear the remaining buffer for us, so do it on our own */
+    if (samples) {
+        memset(buf, 0, samples << hw->info.shift);
+    }
+#endif
 }

 static int sdl_write_out (SWVoiceOut *sw, void *buf, int len)
@@ -315,8 +355,12 @@ static int sdl_run_out (HWVoiceOut *hw, int live)
    decr = audio_MIN (sdl->decr, live);
    sdl->decr -= decr;

+#if USE_SEMAPHORE
    sdl->live = live - decr;
    hw->rpos = sdl->rpos;
+#else
+    sdl->live = live;
+#endif

    if (sdl->live > 0) {
        sdl_unlock_and_post (s, "sdl_run_out");
@@ -405,6 +449,7 @@ static void *sdl_audio_init (void)
        return NULL;
    }

+#if USE_SEMAPHORE
    s->mutex = SDL_CreateMutex ();
    if (!s->mutex) {
        sdl_logerr ("Failed to create SDL mutex\n");
@@ -419,6 +464,7 @@ static void *sdl_audio_init (void)
        SDL_QuitSubSystem (SDL_INIT_AUDIO);
        return NULL;
    }
+#endif

    s->driver_created = true;
    return s;
@@ -428,8 +474,10 @@ static void sdl_audio_fini (void *opaque)
 {
    SDLAudioState *s = opaque;
    sdl_close (s);
+#if USE_SEMAPHORE
    SDL_DestroySemaphore (s->sem);
    SDL_DestroyMutex (s->mutex);
+#endif
    SDL_QuitSubSystem (SDL_INIT_AUDIO);
    s->driver_created = false;
 }
--- a/backends/Makefile.objs
+++ b/backends/Makefile.objs
@@ -1,7 +1,7 @@
 common-obj-y += rng.o rng-egd.o
 common-obj-$(CONFIG_POSIX) += rng-random.o

-common-obj-y += msmouse.o testdev.o
+common-obj-y += msmouse.o wctablet.o testdev.o
 common-obj-$(CONFIG_BRLAPI) += baum.o
 baum.o-cflags := $(SDL_CFLAGS)

--- a/backends/baum.c
+++ b/backends/baum.c
@@ -616,9 +616,9 @@ static void baum_chr_read(void *opaque)
    }
 }

-static void baum_chr_free(Chardev *chr)
+static void char_braille_finalize(Object *obj)
 {
-    BaumChardev *baum = BAUM_CHARDEV(chr);
+    BaumChardev *baum = BAUM_CHARDEV(obj);

    timer_free(baum->cellCount_timer);
    if (baum->brlapi) {
@@ -659,23 +659,18 @@ static void char_braille_class_init(ObjectClass *oc, void *data)
    cc->open = baum_chr_open;
    cc->chr_write = baum_chr_write;
    cc->chr_accept_input = baum_chr_accept_input;
-    cc->chr_free = baum_chr_free;
 }

 static const TypeInfo char_braille_type_info = {
    .name = TYPE_CHARDEV_BRAILLE,
    .parent = TYPE_CHARDEV,
    .instance_size = sizeof(BaumChardev),
+    .instance_finalize = char_braille_finalize,
    .class_init = char_braille_class_init,
 };

 static void register_types(void)
 {
-    static const CharDriver driver = {
-        .kind = CHARDEV_BACKEND_KIND_BRAILLE,
-    };
-
-    register_char_driver(&driver);
    type_register_static(&char_braille_type_info);
 }

--- a/backends/cryptodev-builtin.c
+++ b/backends/cryptodev-builtin.c
@@ -320,10 +320,12 @@ static int cryptodev_builtin_sym_operation(

    sess = builtin->sessions[op_info->session_id];

-    ret = qcrypto_cipher_setiv(sess->cipher, op_info->iv,
-                               op_info->iv_len, errp);
-    if (ret < 0) {
-        return -VIRTIO_CRYPTO_ERR;
+    if (op_info->iv_len > 0) {
+        ret = qcrypto_cipher_setiv(sess->cipher, op_info->iv,
+                                   op_info->iv_len, errp);
+        if (ret < 0) {
+            return -VIRTIO_CRYPTO_ERR;
+        }
    }

    if (sess->direction == VIRTIO_CRYPTO_OP_ENCRYPT) {
@@ -359,8 +361,6 @@ static void cryptodev_builtin_cleanup(
        }
    }

-    assert(queues == 1);
-
    for (i = 0; i < queues; i++) {
        cc = backend->conf.peers.ccs[i];
        if (cc) {
--- a/backends/hostmem.c
+++ b/backends/hostmem.c
@@ -64,14 +64,6 @@ out:
    error_propagate(errp, local_err);
 }

-static uint16List **host_memory_append_node(uint16List **node,
-                                            unsigned long value)
-{
-     *node = g_malloc0(sizeof(**node));
-     (*node)->value = value;
-     return &(*node)->next;
-}
-
 static void
 host_memory_backend_get_host_nodes(Object *obj, Visitor *v, const char *name,
                                   void *opaque, Error **errp)
@@ -82,23 +74,25 @@ host_memory_backend_get_host_nodes(Object *obj, Visitor *v, const char *name,
    unsigned long value;

    value = find_first_bit(backend->host_nodes, MAX_NODES);
-
-    node = host_memory_append_node(node, value);
-
    if (value == MAX_NODES) {
-        goto out;
+        return;
    }

+    *node = g_malloc0(sizeof(**node));
+    (*node)->value = value;
+    node = &(*node)->next;
+
    do {
        value = find_next_bit(backend->host_nodes, MAX_NODES, value + 1);
        if (value == MAX_NODES) {
            break;
        }

-        node = host_memory_append_node(node, value);
+        *node = g_malloc0(sizeof(**node));
+        (*node)->value = value;
+        node = &(*node)->next;
    } while (true);

-out:
    visit_type_uint16List(v, name, &host_nodes, errp);
 }

@@ -224,7 +218,7 @@ static void host_memory_backend_set_prealloc(Object *obj, bool value,
        void *ptr = memory_region_get_ram_ptr(&backend->mr);
        uint64_t sz = memory_region_size(&backend->mr);

-        os_mem_prealloc(fd, ptr, sz, &local_err);
+        os_mem_prealloc(fd, ptr, sz, smp_cpus, &local_err);
        if (local_err) {
            error_propagate(errp, local_err);
            return;
@@ -328,7 +322,7 @@ host_memory_backend_memory_complete(UserCreatable *uc, Error **errp)
         */
        if (backend->prealloc) {
            os_mem_prealloc(memory_region_get_fd(&backend->mr), ptr, sz,
-                            &local_err);
+                            smp_cpus, &local_err);
            if (local_err) {
                goto out;
            }
--- a/backends/msmouse.c
+++ b/backends/msmouse.c
@@ -139,9 +139,9 @@ static int msmouse_chr_write(struct Chardev *s, const uint8_t *buf, int len)
    return len;
 }

-static void msmouse_chr_free(struct Chardev *chr)
+static void char_msmouse_finalize(Object *obj)
 {
-    MouseChardev *mouse = MOUSE_CHARDEV(chr);
+    MouseChardev *mouse = MOUSE_CHARDEV(obj);

    qemu_input_handler_unregister(mouse->hs);
 }
@@ -172,23 +172,18 @@ static void char_msmouse_class_init(ObjectClass *oc, void *data)
    cc->open = msmouse_chr_open;
    cc->chr_write = msmouse_chr_write;
    cc->chr_accept_input = msmouse_chr_accept_input;
-    cc->chr_free = msmouse_chr_free;
 }

 static const TypeInfo char_msmouse_type_info = {
    .name = TYPE_CHARDEV_MSMOUSE,
    .parent = TYPE_CHARDEV,
    .instance_size = sizeof(MouseChardev),
+    .instance_finalize = char_msmouse_finalize,
    .class_init = char_msmouse_class_init,
 };

 static void register_types(void)
 {
-    static const CharDriver driver = {
-        .kind = CHARDEV_BACKEND_KIND_MSMOUSE,
-    };
-
-    register_char_driver(&driver);
    type_register_static(&char_msmouse_type_info);
 }

--- a/backends/testdev.c
+++ b/backends/testdev.c
@@ -123,11 +123,6 @@ static const TypeInfo char_testdev_type_info = {

 static void register_types(void)
 {
-    static const CharDriver driver = {
-        .kind = CHARDEV_BACKEND_KIND_TESTDEV,
-    };
-
-    register_char_driver(&driver);
    type_register_static(&char_testdev_type_info);
 }

--- a/backends/trace-events
+++ b/backends/trace-events
@@ -0,0 +1,10 @@
+# See docs/tracing.txt for syntax documentation.
+
+# backends/wctablet.c
+wct_init(void) ""
+wct_cmd_re(void) ""
+wct_cmd_st(void) ""
+wct_cmd_sp(void) ""
+wct_cmd_ts(int input) "0x%02x"
+wct_cmd_other(const char *cmd) "%s"
+wct_speed(int speed) "%d"
--- a/backends/wctablet.c
+++ b/backends/wctablet.c
@@ -0,0 +1,369 @@
+/*
+ * QEMU Wacom Penpartner serial tablet emulation
+ *
+ * some protocol details:
+ *   http://linuxwacom.sourceforge.net/wiki/index.php/Serial_Protocol_IV
+ *
+ * Copyright (c) 2016 Anatoli Huseu1
+ * Copyright (c) 2016,17 Gerd Hoffmann
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include <stdlib.h>
+#include <string.h>
+#include <sys/time.h>
+#include <time.h>
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "sysemu/char.h"
+#include "ui/console.h"
+#include "ui/input.h"
+#include "trace.h"
+
+
+#define WC_OUTPUT_BUF_MAX_LEN 512
+#define WC_COMMAND_MAX_LEN 60
+
+#define WC_L7(n) ((n) & 127)
+#define WC_M7(n) (((n) >> 7) & 127)
+#define WC_H2(n) ((n) >> 14)
+
+#define WC_L4(n) ((n) & 15)
+#define WC_H4(n) (((n) >> 4) & 15)
+
+/* Model string and config string */
+#define WC_MODEL_STRING_LENGTH 18
+uint8_t WC_MODEL_STRING[WC_MODEL_STRING_LENGTH + 1] = "~#CT-0045R,V1.3-5,";
+
+#define WC_CONFIG_STRING_LENGTH 8
+uint8_t WC_CONFIG_STRING[WC_CONFIG_STRING_LENGTH + 1] = "96,N,8,0";
+
+#define WC_FULL_CONFIG_STRING_LENGTH 61
+uint8_t WC_FULL_CONFIG_STRING[WC_FULL_CONFIG_STRING_LENGTH + 1] = {
+    0x5c, 0x39, 0x36, 0x2c, 0x4e, 0x2c, 0x38, 0x2c,
+    0x31, 0x28, 0x01, 0x24, 0x57, 0x41, 0x43, 0x30,
+    0x30, 0x34, 0x35, 0x5c, 0x5c, 0x50, 0x45, 0x4e, 0x5c,
+    0x57, 0x41, 0x43, 0x30, 0x30, 0x30, 0x30, 0x5c,
+    0x54, 0x61, 0x62, 0x6c, 0x65, 0x74, 0x0d, 0x0a,
+    0x43, 0x54, 0x2d, 0x30, 0x30, 0x34, 0x35, 0x52,
+    0x2c, 0x56, 0x31, 0x2e, 0x33, 0x2d, 0x35, 0x0d,
+    0x0a, 0x45, 0x37, 0x29
+};
+
+/* This structure is used to save private info for Wacom Tablet. */
+typedef struct {
+    Chardev parent;
+    QemuInputHandlerState *hs;
+
+    /* Query string from serial */
+    uint8_t query[100];
+    int query_index;
+
+    /* Command to be sent to serial port */
+    uint8_t outbuf[WC_OUTPUT_BUF_MAX_LEN];
+    int outlen;
+
+    int line_speed;
+    bool send_events;
+    int axis[INPUT_AXIS__MAX];
+    bool btns[INPUT_BUTTON__MAX];
+
+} TabletChardev;
+
+#define TYPE_CHARDEV_WCTABLET "chardev-wctablet"
+#define WCTABLET_CHARDEV(obj)                                      \
+    OBJECT_CHECK(TabletChardev, (obj), TYPE_CHARDEV_WCTABLET)
+
+
+static void wctablet_chr_accept_input(Chardev *chr);
+
+static void wctablet_shift_input(TabletChardev *tablet, int count)
+{
+    tablet->query_index -= count;
+    memmove(tablet->query, tablet->query + count, tablet->query_index);
+    tablet->query[tablet->query_index] = 0;
+}
+
+static void wctablet_queue_output(TabletChardev *tablet, uint8_t *buf, int count)
+{
+    if (tablet->outlen + count > sizeof(tablet->outbuf)) {
+        return;
+    }
+
+    memcpy(tablet->outbuf + tablet->outlen, buf, count);
+    tablet->outlen += count;
+    wctablet_chr_accept_input(CHARDEV(tablet));
+}
+
+static void wctablet_reset(TabletChardev *tablet)
+{
+    /* clear buffers */
+    tablet->query_index = 0;
+    tablet->outlen = 0;
+    /* reset state */
+    tablet->send_events = false;
+}
+
+static void wctablet_queue_event(TabletChardev *tablet)
+{
+    uint8_t codes[8] = { 0xe0, 0, 0, 0, 0, 0, 0 };
+
+    if (tablet->line_speed != 9600) {
+        return;
+    }
+
+    int newX = tablet->axis[INPUT_AXIS_X] * 0.1537;
+    int nexY = tablet->axis[INPUT_AXIS_Y] * 0.1152;
+
+    codes[0] = codes[0] | WC_H2(newX);
+    codes[1] = codes[1] | WC_M7(newX);
+    codes[2] = codes[2] | WC_L7(newX);
+
+    codes[3] = codes[3] | WC_H2(nexY);
+    codes[4] = codes[4] | WC_M7(nexY);
+    codes[5] = codes[5] | WC_L7(nexY);
+
+    if (tablet->btns[INPUT_BUTTON_LEFT]) {
+        codes[0] = 0xa0;
+    }
+
+    wctablet_queue_output(tablet, codes, 7);
+}
+
+static void wctablet_input_event(DeviceState *dev, QemuConsole *src,
+                                InputEvent *evt)
+{
+    TabletChardev *tablet = (TabletChardev *)dev;
+    InputMoveEvent *move;
+    InputBtnEvent *btn;
+
+    switch (evt->type) {
+    case INPUT_EVENT_KIND_ABS:
+        move = evt->u.abs.data;
+        tablet->axis[move->axis] = move->value;
+        break;
+
+    case INPUT_EVENT_KIND_BTN:
+        btn = evt->u.btn.data;
+        tablet->btns[btn->button] = btn->down;
+        break;
+
+    default:
+        /* keep gcc happy */
+        break;
+    }
+}
+
+static void wctablet_input_sync(DeviceState *dev)
+{
+    TabletChardev *tablet = (TabletChardev *)dev;
+
+    if (tablet->send_events) {
+        wctablet_queue_event(tablet);
+    }
+}
+
+static QemuInputHandler wctablet_handler = {
+    .name  = "QEMU Wacome Pen Tablet",
+    .mask  = INPUT_EVENT_MASK_BTN | INPUT_EVENT_MASK_ABS,
+    .event = wctablet_input_event,
+    .sync  = wctablet_input_sync,
+};
+
+static void wctablet_chr_accept_input(Chardev *chr)
+{
+    TabletChardev *tablet = WCTABLET_CHARDEV(chr);
+    int len, canWrite;
+
+    canWrite = qemu_chr_be_can_write(chr);
+    len = canWrite;
+    if (len > tablet->outlen) {
+        len = tablet->outlen;
+    }
+
+    if (len) {
+        qemu_chr_be_write(chr, tablet->outbuf, len);
+        tablet->outlen -= len;
+        if (tablet->outlen) {
+            memmove(tablet->outbuf, tablet->outbuf + len, tablet->outlen);
+        }
+    }
+}
+
+static int wctablet_chr_write(struct Chardev *chr,
+                              const uint8_t *buf, int len)
+{
+    TabletChardev *tablet = WCTABLET_CHARDEV(chr);
+    unsigned int i, clen;
+    char *pos;
+
+    if (tablet->line_speed != 9600) {
+        return len;
+    }
+    for (i = 0; i < len && tablet->query_index < sizeof(tablet->query) - 1; i++) {
+        tablet->query[tablet->query_index++] = buf[i];
+    }
+    tablet->query[tablet->query_index] = 0;
+
+    while (tablet->query_index > 0 && (tablet->query[0] == '@'  ||
+                                       tablet->query[0] == '\r' ||
+                                       tablet->query[0] == '\n')) {
+        wctablet_shift_input(tablet, 1);
+    }
+    if (!tablet->query_index) {
+        return len;
+    }
+
+    if (strncmp((char *)tablet->query, "~#", 2) == 0) {
+        /* init / detect sequence */
+        trace_wct_init();
+        wctablet_shift_input(tablet, 2);
+        wctablet_queue_output(tablet, WC_MODEL_STRING,
+                              WC_MODEL_STRING_LENGTH);
+        return len;
+    }
+
+    /* detect line */
+    pos = strchr((char *)tablet->query, '\r');
+    if (!pos) {
+        pos = strchr((char *)tablet->query, '\n');
+    }
+    if (!pos) {
+        return len;
+    }
+    clen = pos - (char *)tablet->query;
+
+    /* process commands */
+    if (strncmp((char *)tablet->query, "RE", 2) == 0 &&
+        clen == 2) {
+        trace_wct_cmd_re();
+        wctablet_shift_input(tablet, 3);
+        wctablet_queue_output(tablet, WC_CONFIG_STRING,
+                              WC_CONFIG_STRING_LENGTH);
+
+    } else if (strncmp((char *)tablet->query, "ST", 2) == 0 &&
+               clen == 2) {
+        trace_wct_cmd_st();
+        wctablet_shift_input(tablet, 3);
+        tablet->send_events = true;
+        wctablet_queue_event(tablet);
+
+    } else if (strncmp((char *)tablet->query, "SP", 2) == 0 &&
+               clen == 2) {
+        trace_wct_cmd_sp();
+        wctablet_shift_input(tablet, 3);
+        tablet->send_events = false;
+
+    } else if (strncmp((char *)tablet->query, "TS", 2) == 0 &&
+               clen == 3) {
+        unsigned int input = tablet->query[2];
+        uint8_t codes[7] = {
+            0xa3,
+            ((input & 0x80) == 0) ? 0x7e : 0x7f,
+            (((WC_H4(input) & 0x7) ^ 0x5) << 4) | (WC_L4(input) ^ 0x7),
+            0x03,
+            0x7f,
+            0x7f,
+            0x00,
+        };
+        trace_wct_cmd_ts(input);
+        wctablet_shift_input(tablet, 4);
+        wctablet_queue_output(tablet, codes, 7);
+
+    } else {
+        tablet->query[clen] = 0; /* terminate line for printing */
+        trace_wct_cmd_other((char *)tablet->query);
+        wctablet_shift_input(tablet, clen + 1);
+
+    }
+
+    return len;
+}
+
+static int wctablet_chr_ioctl(Chardev *chr, int cmd, void *arg)
+{
+    TabletChardev *tablet = WCTABLET_CHARDEV(chr);
+    QEMUSerialSetParams *ssp;
+
+    switch (cmd) {
+    case CHR_IOCTL_SERIAL_SET_PARAMS:
+        ssp = arg;
+        if (tablet->line_speed != ssp->speed) {
+            trace_wct_speed(ssp->speed);
+            wctablet_reset(tablet);
+            tablet->line_speed = ssp->speed;
+        }
+        break;
+    default:
+        return -ENOTSUP;
+    }
+    return 0;
+}
+
+static void wctablet_chr_finalize(Object *obj)
+{
+    TabletChardev *tablet = WCTABLET_CHARDEV(obj);
+
+    qemu_input_handler_unregister(tablet->hs);
+    g_free(tablet);
+}
+
+static void wctablet_chr_open(Chardev *chr,
+                              ChardevBackend *backend,
+                              bool *be_opened,
+                              Error **errp)
+{
+    TabletChardev *tablet = WCTABLET_CHARDEV(chr);
+
+    *be_opened = true;
+
+    /* init state machine */
+    memcpy(tablet->outbuf, WC_FULL_CONFIG_STRING, WC_FULL_CONFIG_STRING_LENGTH);
+    tablet->outlen = WC_FULL_CONFIG_STRING_LENGTH;
+    tablet->query_index = 0;
+
+    tablet->hs = qemu_input_handler_register((DeviceState *)tablet,
+                                             &wctablet_handler);
+}
+
+static void wctablet_chr_class_init(ObjectClass *oc, void *data)
+{
+    ChardevClass *cc = CHARDEV_CLASS(oc);
+
+    cc->open = wctablet_chr_open;
+    cc->chr_write = wctablet_chr_write;
+    cc->chr_ioctl = wctablet_chr_ioctl;
+    cc->chr_accept_input = wctablet_chr_accept_input;
+}
+
+static const TypeInfo wctablet_type_info = {
+    .name = TYPE_CHARDEV_WCTABLET,
+    .parent = TYPE_CHARDEV,
+    .instance_size = sizeof(TabletChardev),
+    .instance_finalize = wctablet_chr_finalize,
+    .class_init = wctablet_chr_class_init,
+};
+
+static void register_types(void)
+{
+     type_register_static(&wctablet_type_info);
+}
+
+type_init(register_types);
--- a/balloon.c
+++ b/balloon.c
@@ -29,7 +29,7 @@
 #include "exec/cpu-common.h"
 #include "sysemu/kvm.h"
 #include "sysemu/balloon.h"
-#include "trace.h"
+#include "trace-root.h"
 #include "qmp-commands.h"
 #include "qapi/qmp/qerror.h"
 #include "qapi/qmp/qjson.h"
--- a/block.c
+++ b/block.c
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -19,7 +19,6 @@ block-obj-$(CONFIG_LIBNFS) += nfs.o
 block-obj-$(CONFIG_CURL) += curl.o
 block-obj-$(CONFIG_RBD) += rbd.o
 block-obj-$(CONFIG_GLUSTERFS) += gluster.o
-block-obj-$(CONFIG_ARCHIPELAGO) += archipelago.o
 block-obj-$(CONFIG_LIBSSH2) += ssh.o
 block-obj-y += accounting.o dirty-bitmap.o
 block-obj-y += write-threshold.o
@@ -41,7 +40,6 @@ gluster.o-cflags   := $(GLUSTERFS_CFLAGS)
 gluster.o-libs     := $(GLUSTERFS_LIBS)
 ssh.o-cflags       := $(LIBSSH2_CFLAGS)
 ssh.o-libs         := $(LIBSSH2_LIBS)
-archipelago.o-libs := $(ARCHIPELAGO_LIBS)
 block-obj-$(if $(CONFIG_BZIP2),m,n) += dmg-bz2.o
 dmg-bz2.o-libs     := $(BZIP2_LIBS)
 qcow.o-libs        := -lz
--- a/block/archipelago.c
+++ b/block/archipelago.c
--- a/block/backup.c
+++ b/block/backup.c
@@ -24,6 +24,7 @@
 #include "qemu/cutils.h"
 #include "sysemu/block-backend.h"
 #include "qemu/bitmap.h"
+#include "qemu/error-report.h"

 #define BACKUP_CLUSTER_SIZE_DEFAULT (1 << 16)
 #define SLICE_TIME 100000000ULL /* ns */
@@ -64,7 +65,7 @@ static void coroutine_fn wait_for_overlapping_requests(BackupBlockJob *job,
        retry = false;
        QLIST_FOREACH(req, &job->inflight_reqs, list) {
            if (end > req->start && start < req->end) {
-                qemu_co_queue_wait(&req->wait_queue);
+                qemu_co_queue_wait(&req->wait_queue, NULL);
                retry = true;
                break;
            }
@@ -467,13 +468,14 @@ static void coroutine_fn backup_run(void *opaque)
        /* Both FULL and TOP SYNC_MODE's require copying.. */
        for (; start < end; start++) {
            bool error_is_read;
+            int alloced = 0;
+
            if (yield_and_check(job)) {
                break;
            }

            if (job->sync_mode == MIRROR_SYNC_MODE_TOP) {
                int i, n;
-                int alloced = 0;

                /* Check to see if these blocks are already in the
                 * backing file. */
@@ -491,7 +493,7 @@ static void coroutine_fn backup_run(void *opaque)
                                sectors_per_cluster - i, &n);
                    i += n;

-                    if (alloced == 1 || n == 0) {
+                    if (alloced || n == 0) {
                        break;
                    }
                }
@@ -503,8 +505,13 @@ static void coroutine_fn backup_run(void *opaque)
                }
            }
            /* FULL sync mode we copy the whole drive. */
-            ret = backup_do_cow(job, start * sectors_per_cluster,
-                                sectors_per_cluster, &error_is_read, false);
+            if (alloced < 0) {
+                ret = alloced;
+            } else {
+                ret = backup_do_cow(job, start * sectors_per_cluster,
+                                    sectors_per_cluster, &error_is_read,
+                                    false);
+            }
            if (ret < 0) {
                /* Depending on error action, fail now or retry cluster */
                BlockErrorAction action =
@@ -618,14 +625,24 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
        goto error;
    }

-    job = block_job_create(job_id, &backup_job_driver, bs, speed,
-                           creation_flags, cb, opaque, errp);
+    /* job->common.len is fixed, so we can't allow resize */
+    job = block_job_create(job_id, &backup_job_driver, bs,
+                           BLK_PERM_CONSISTENT_READ,
+                           BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE |
+                           BLK_PERM_WRITE_UNCHANGED | BLK_PERM_GRAPH_MOD,
+                           speed, creation_flags, cb, opaque, errp);
    if (!job) {
        goto error;
    }

-    job->target = blk_new();
-    blk_insert_bs(job->target, target);
+    /* The target must match the source in size, so no resize here either */
+    job->target = blk_new(BLK_PERM_WRITE,
+                          BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE |
+                          BLK_PERM_WRITE_UNCHANGED | BLK_PERM_GRAPH_MOD);
+    ret = blk_insert_bs(job->target, target, errp);
+    if (ret < 0) {
+        goto error;
+    }

    job->on_source_error = on_source_error;
    job->on_target_error = on_target_error;
@@ -638,7 +655,16 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
     * backup cluster size is smaller than the target cluster size. Even for
     * targets with a backing file, try to avoid COW if possible. */
    ret = bdrv_get_info(target, &bdi);
-    if (ret < 0 && !target->backing) {
+    if (ret == -ENOTSUP && !target->backing) {
+        /* Cluster size is not defined */
+        error_report("WARNING: The target block device doesn't provide "
+                     "information about the block size and it doesn't have a "
+                     "backing file. The default block size of %u bytes is "
+                     "used. If the actual block size of the target exceeds "
+                     "this default, the backup may be unusable",
+                     BACKUP_CLUSTER_SIZE_DEFAULT);
+        job->cluster_size = BACKUP_CLUSTER_SIZE_DEFAULT;
+    } else if (ret < 0 && !target->backing) {
        error_setg_errno(errp, -ret,
            "Couldn't determine the cluster size of the target image, "
            "which has no backing file");
@@ -652,7 +678,9 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
        job->cluster_size = MAX(BACKUP_CLUSTER_SIZE_DEFAULT, bdi.cluster_size);
    }

-    block_job_add_bdrv(&job->common, target);
+    /* Required permissions are already taken with target's blk_new() */
+    block_job_add_bdrv(&job->common, "target", target, 0, BLK_PERM_ALL,
+                       &error_abort);
    job->common.len = len;
    block_job_txn_add_job(txn, &job->common);

--- a/block/blkdebug.c
+++ b/block/blkdebug.c
@@ -405,12 +405,6 @@ out:
    return ret;
 }

-static void error_callback_bh(void *opaque)
-{
-    Coroutine *co = opaque;
-    qemu_coroutine_enter(co);
-}
-
 static int inject_error(BlockDriverState *bs, BlkdebugRule *rule)
 {
    BDRVBlkdebugState *s = bs->opaque;
@@ -423,8 +417,7 @@ static int inject_error(BlockDriverState *bs, BlkdebugRule *rule)
    }

    if (!immediately) {
-        aio_bh_schedule_oneshot(bdrv_get_aio_context(bs), error_callback_bh,
-                                qemu_coroutine_self());
+        aio_co_schedule(qemu_get_current_aio_context(), qemu_coroutine_self());
        qemu_coroutine_yield();
    }

@@ -670,7 +663,7 @@ static int64_t blkdebug_getlength(BlockDriverState *bs)

 static int blkdebug_truncate(BlockDriverState *bs, int64_t offset)
 {
-    return bdrv_truncate(bs->file->bs, offset);
+    return bdrv_truncate(bs->file, offset);
 }

 static void blkdebug_refresh_filename(BlockDriverState *bs, QDict *options)
@@ -741,6 +734,8 @@ static BlockDriver bdrv_blkdebug = {
    .bdrv_file_open         = blkdebug_open,
    .bdrv_close             = blkdebug_close,
    .bdrv_reopen_prepare    = blkdebug_reopen_prepare,
+    .bdrv_child_perm        = bdrv_filter_default_perms,
+
    .bdrv_getlength         = blkdebug_getlength,
    .bdrv_truncate          = blkdebug_truncate,
    .bdrv_refresh_filename  = blkdebug_refresh_filename,
--- a/block/blkreplay.c
+++ b/block/blkreplay.c
@@ -60,7 +60,7 @@ static int64_t blkreplay_getlength(BlockDriverState *bs)
 static void blkreplay_bh_cb(void *opaque)
 {
    Request *req = opaque;
-    qemu_coroutine_enter(req->co);
+    aio_co_wake(req->co);
    qemu_bh_delete(req->bh);
    g_free(req);
 }
@@ -137,6 +137,7 @@ static BlockDriver bdrv_blkreplay = {

    .bdrv_file_open         = blkreplay_open,
    .bdrv_close             = blkreplay_close,
+    .bdrv_child_perm        = bdrv_filter_default_perms,
    .bdrv_getlength         = blkreplay_getlength,

    .bdrv_co_preadv         = blkreplay_co_preadv,
--- a/block/blkverify.c
+++ b/block/blkverify.c
@@ -320,6 +320,7 @@ static BlockDriver bdrv_blkverify = {
    .bdrv_parse_filename              = blkverify_parse_filename,
    .bdrv_file_open                   = blkverify_open,
    .bdrv_close                       = blkverify_close,
+    .bdrv_child_perm                  = bdrv_filter_default_perms,
    .bdrv_getlength                   = blkverify_getlength,
    .bdrv_refresh_filename            = blkverify_refresh_filename,

--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -59,9 +59,14 @@ struct BlockBackend {
    bool iostatus_enabled;
    BlockDeviceIoStatus iostatus;

+    uint64_t perm;
+    uint64_t shared_perm;
+
    bool allow_write_beyond_eof;

    NotifierList remove_bs_notifiers, insert_bs_notifiers;
+
+    int quiesce_counter;
 };

 typedef struct BlockBackendAIOCB {
@@ -77,6 +82,7 @@ static const AIOCBInfo block_backend_aiocb_info = {

 static void drive_info_del(DriveInfo *dinfo);
 static BlockBackend *bdrv_first_blk(BlockDriverState *bs);
+static char *blk_get_attached_dev_id(BlockBackend *blk);

 /* All BlockBackends */
 static QTAILQ_HEAD(, BlockBackend) block_backends =
@@ -99,6 +105,25 @@ static void blk_root_drained_end(BdrvChild *child);
 static void blk_root_change_media(BdrvChild *child, bool load);
 static void blk_root_resize(BdrvChild *child);

+static char *blk_root_get_parent_desc(BdrvChild *child)
+{
+    BlockBackend *blk = child->opaque;
+    char *dev_id;
+
+    if (blk->name) {
+        return g_strdup(blk->name);
+    }
+
+    dev_id = blk_get_attached_dev_id(blk);
+    if (*dev_id) {
+        return dev_id;
+    } else {
+        /* TODO Callback into the BB owner for something more detailed */
+        g_free(dev_id);
+        return g_strdup("a block device");
+    }
+}
+
 static const char *blk_root_get_name(BdrvChild *child)
 {
    return blk_name(child->opaque);
@@ -110,6 +135,7 @@ static const BdrvChildRole child_root = {
    .change_media       = blk_root_change_media,
    .resize             = blk_root_resize,
    .get_name           = blk_root_get_name,
+    .get_parent_desc    = blk_root_get_parent_desc,

    .drained_begin      = blk_root_drained_begin,
    .drained_end        = blk_root_drained_end,
@@ -117,15 +143,23 @@ static const BdrvChildRole child_root = {

 /*
 * Create a new BlockBackend with a reference count of one.
- * Store an error through @errp on failure, unless it's null.
+ *
+ * @perm is a bitmasks of BLK_PERM_* constants which describes the permissions
+ * to request for a block driver node that is attached to this BlockBackend.
+ * @shared_perm is a bitmask which describes which permissions may be granted
+ * to other users of the attached node.
+ * Both sets of permissions can be changed later using blk_set_perm().
+ *
 * Return the new BlockBackend on success, null on failure.
 */
-BlockBackend *blk_new(void)
+BlockBackend *blk_new(uint64_t perm, uint64_t shared_perm)
 {
    BlockBackend *blk;

    blk = g_new0(BlockBackend, 1);
    blk->refcnt = 1;
+    blk->perm = perm;
+    blk->shared_perm = shared_perm;
    blk_set_enable_write_cache(blk, true);

    qemu_co_queue_init(&blk->public.throttled_reqs[0]);
@@ -155,15 +189,38 @@ BlockBackend *blk_new_open(const char *filename, const char *reference,
 {
    BlockBackend *blk;
    BlockDriverState *bs;
+    uint64_t perm;

-    blk = blk_new();
+    /* blk_new_open() is mainly used in .bdrv_create implementations and the
+     * tools where sharing isn't a concern because the BDS stays private, so we
+     * just request permission according to the flags.
+     *
+     * The exceptions are xen_disk and blockdev_init(); in these cases, the
+     * caller of blk_new_open() doesn't make use of the permissions, but they
+     * shouldn't hurt either. We can still share everything here because the
+     * guest devices will add their own blockers if they can't share. */
+    perm = BLK_PERM_CONSISTENT_READ;
+    if (flags & BDRV_O_RDWR) {
+        perm |= BLK_PERM_WRITE;
+    }
+    if (flags & BDRV_O_RESIZE) {
+        perm |= BLK_PERM_RESIZE;
+    }
+
+    blk = blk_new(perm, BLK_PERM_ALL);
    bs = bdrv_open(filename, reference, options, flags, errp);
    if (!bs) {
        blk_unref(blk);
        return NULL;
    }

-    blk->root = bdrv_root_attach_child(bs, "root", &child_root, blk);
+    blk->root = bdrv_root_attach_child(bs, "root", &child_root,
+                                       perm, BLK_PERM_ALL, blk, errp);
+    if (!blk->root) {
+        bdrv_unref(bs);
+        blk_unref(blk);
+        return NULL;
+    }

    return blk;
 }
@@ -495,16 +552,49 @@ void blk_remove_bs(BlockBackend *blk)
 /*
 * Associates a new BlockDriverState with @blk.
 */
-void blk_insert_bs(BlockBackend *blk, BlockDriverState *bs)
+int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp)
 {
+    blk->root = bdrv_root_attach_child(bs, "root", &child_root,
+                                       blk->perm, blk->shared_perm, blk, errp);
+    if (blk->root == NULL) {
+        return -EPERM;
+    }
    bdrv_ref(bs);
-    blk->root = bdrv_root_attach_child(bs, "root", &child_root, blk);

    notifier_list_notify(&blk->insert_bs_notifiers, blk);
    if (blk->public.throttle_state) {
        throttle_timers_attach_aio_context(
            &blk->public.throttle_timers, bdrv_get_aio_context(bs));
    }
+
+    return 0;
+}
+
+/*
+ * Sets the permission bitmasks that the user of the BlockBackend needs.
+ */
+int blk_set_perm(BlockBackend *blk, uint64_t perm, uint64_t shared_perm,
+                 Error **errp)
+{
+    int ret;
+
+    if (blk->root) {
+        ret = bdrv_child_try_set_perm(blk->root, perm, shared_perm, errp);
+        if (ret < 0) {
+            return ret;
+        }
+    }
+
+    blk->perm = perm;
+    blk->shared_perm = shared_perm;
+
+    return 0;
+}
+
+void blk_get_perm(BlockBackend *blk, uint64_t *perm, uint64_t *shared_perm)
+{
+    *perm = blk->perm;
+    *shared_perm = blk->shared_perm;
 }

 static int blk_do_attach_dev(BlockBackend *blk, void *dev)
@@ -553,6 +643,7 @@ void blk_detach_dev(BlockBackend *blk, void *dev)
    blk->dev_ops = NULL;
    blk->dev_opaque = NULL;
    blk->guest_block_size = 512;
+    blk_set_perm(blk, 0, BLK_PERM_ALL, &error_abort);
    blk_unref(blk);
 }

@@ -610,29 +701,44 @@ void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops,
                     void *opaque)
 {
    /* All drivers that use blk_set_dev_ops() are qdevified and we want to keep
-     * it that way, so we can assume blk->dev is a DeviceState if blk->dev_ops
-     * is set. */
+     * it that way, so we can assume blk->dev, if present, is a DeviceState if
+     * blk->dev_ops is set. Non-device users may use dev_ops without device. */
    assert(!blk->legacy_dev);

    blk->dev_ops = ops;
    blk->dev_opaque = opaque;
+
+    /* Are we currently quiesced? Should we enforce this right now? */
+    if (blk->quiesce_counter && ops->drained_begin) {
+        ops->drained_begin(opaque);
+    }
 }

 /*
 * Notify @blk's attached device model of media change.
- * If @load is true, notify of media load.
- * Else, notify of media eject.
+ *
+ * If @load is true, notify of media load. This action can fail, meaning that
+ * the medium cannot be loaded. @errp is set then.
+ *
+ * If @load is false, notify of media eject. This can never fail.
+ *
 * Also send DEVICE_TRAY_MOVED events as appropriate.
 */
-void blk_dev_change_media_cb(BlockBackend *blk, bool load)
+void blk_dev_change_media_cb(BlockBackend *blk, bool load, Error **errp)
 {
    if (blk->dev_ops && blk->dev_ops->change_media_cb) {
        bool tray_was_open, tray_is_open;
+        Error *local_err = NULL;

        assert(!blk->legacy_dev);

        tray_was_open = blk_dev_is_tray_open(blk);
-        blk->dev_ops->change_media_cb(blk->dev_opaque, load);
+        blk->dev_ops->change_media_cb(blk->dev_opaque, load, &local_err);
+        if (local_err) {
+            assert(load == true);
+            error_propagate(errp, local_err);
+            return;
+        }
        tray_is_open = blk_dev_is_tray_open(blk);

        if (tray_was_open != tray_is_open) {
@@ -646,7 +752,7 @@ void blk_dev_change_media_cb(BlockBackend *blk, bool load)

 static void blk_root_change_media(BdrvChild *child, bool load)
 {
-    blk_dev_change_media_cb(child->opaque, load);
+    blk_dev_change_media_cb(child->opaque, load, NULL);
 }

 /*
@@ -880,7 +986,6 @@ static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf,
 {
    QEMUIOVector qiov;
    struct iovec iov;
-    Coroutine *co;
    BlkRwCo rwco;

    iov = (struct iovec) {
@@ -897,9 +1002,14 @@ static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf,
        .ret    = NOT_DONE,
    };

-    co = qemu_coroutine_create(co_entry, &rwco);
-    qemu_coroutine_enter(co);
-    BDRV_POLL_WHILE(blk_bs(blk), rwco.ret == NOT_DONE);
+    if (qemu_in_coroutine()) {
+        /* Fast-path if already in coroutine context */
+        co_entry(&rwco);
+    } else {
+        Coroutine *co = qemu_coroutine_create(co_entry, &rwco);
+        qemu_coroutine_enter(co);
+        BDRV_POLL_WHILE(blk_bs(blk), rwco.ret == NOT_DONE);
+    }

    return rwco.ret;
 }
@@ -979,7 +1089,6 @@ static void blk_aio_complete(BlkAioEmAIOCB *acb)
 static void blk_aio_complete_bh(void *opaque)
 {
    BlkAioEmAIOCB *acb = opaque;
-
    assert(acb->has_returned);
    blk_aio_complete(acb);
 }
@@ -1602,7 +1711,7 @@ int blk_truncate(BlockBackend *blk, int64_t offset)
        return -ENOMEDIUM;
    }

-    return bdrv_truncate(blk_bs(blk), offset);
+    return bdrv_truncate(blk->root, offset);
 }

 static void blk_pdiscard_entry(void *opaque)
@@ -1768,6 +1877,12 @@ static void blk_root_drained_begin(BdrvChild *child)
 {
    BlockBackend *blk = child->opaque;

+    if (++blk->quiesce_counter == 1) {
+        if (blk->dev_ops && blk->dev_ops->drained_begin) {
+            blk->dev_ops->drained_begin(blk->dev_opaque);
+        }
+    }
+
    /* Note that blk->root may not be accessible here yet if we are just
     * attaching to a BlockDriverState that is drained. Use child instead. */

@@ -1779,7 +1894,14 @@ static void blk_root_drained_begin(BdrvChild *child)
 static void blk_root_drained_end(BdrvChild *child)
 {
    BlockBackend *blk = child->opaque;
+    assert(blk->quiesce_counter);

    assert(blk->public.io_limits_disabled);
    --blk->public.io_limits_disabled;
+
+    if (--blk->quiesce_counter == 0) {
+        if (blk->dev_ops && blk->dev_ops->drained_end) {
+            blk->dev_ops->drained_end(blk->dev_opaque);
+        }
+    }
 }
--- a/block/bochs.c
+++ b/block/bochs.c
@@ -104,6 +104,12 @@ static int bochs_open(BlockDriverState *bs, QDict *options, int flags,
    struct bochs_header bochs;
    int ret;

+    bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
+                               false, errp);
+    if (!bs->file) {
+        return -EINVAL;
+    }
+
    bs->read_only = true; /* no write support yet */

    ret = bdrv_pread(bs->file, 0, &bochs, sizeof(bochs));
@@ -287,6 +293,7 @@ static BlockDriver bdrv_bochs = {
    .instance_size	= sizeof(BDRVBochsState),
    .bdrv_probe		= bochs_probe,
    .bdrv_open		= bochs_open,
+    .bdrv_child_perm     = bdrv_format_default_perms,
    .bdrv_refresh_limits = bochs_refresh_limits,
    .bdrv_co_preadv = bochs_co_preadv,
    .bdrv_close		= bochs_close,
--- a/block/cloop.c
+++ b/block/cloop.c
@@ -66,6 +66,12 @@ static int cloop_open(BlockDriverState *bs, QDict *options, int flags,
    uint32_t offsets_size, max_compressed_block_size = 1, i;
    int ret;

+    bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
+                               false, errp);
+    if (!bs->file) {
+        return -EINVAL;
+    }
+
    bs->read_only = true;

    /* read header */
@@ -284,6 +290,7 @@ static BlockDriver bdrv_cloop = {
    .instance_size  = sizeof(BDRVCloopState),
    .bdrv_probe     = cloop_probe,
    .bdrv_open      = cloop_open,
+    .bdrv_child_perm     = bdrv_format_default_perms,
    .bdrv_refresh_limits = cloop_refresh_limits,
    .bdrv_co_preadv = cloop_co_preadv,
    .bdrv_close     = cloop_close,
--- a/block/commit.c
+++ b/block/commit.c
@@ -13,6 +13,7 @@
 */

 #include "qemu/osdep.h"
+#include "qemu/cutils.h"
 #include "trace.h"
 #include "block/block_int.h"
 #include "block/blockjob_int.h"
@@ -36,6 +37,7 @@ typedef struct CommitBlockJob {
    BlockJob common;
    RateLimit limit;
    BlockDriverState *active;
+    BlockDriverState *commit_top_bs;
    BlockBackend *top;
    BlockBackend *base;
    BlockdevOnError on_error;
@@ -83,12 +85,23 @@ static void commit_complete(BlockJob *job, void *opaque)
    BlockDriverState *active = s->active;
    BlockDriverState *top = blk_bs(s->top);
    BlockDriverState *base = blk_bs(s->base);
-    BlockDriverState *overlay_bs = bdrv_find_overlay(active, top);
+    BlockDriverState *overlay_bs = bdrv_find_overlay(active, s->commit_top_bs);
    int ret = data->ret;
+    bool remove_commit_top_bs = false;
+
+    /* Remove base node parent that still uses BLK_PERM_WRITE/RESIZE before
+     * the normal backing chain can be restored. */
+    blk_unref(s->base);

    if (!block_job_is_cancelled(&s->common) && ret == 0) {
        /* success */
-        ret = bdrv_drop_intermediate(active, top, base, s->backing_file_str);
+        ret = bdrv_drop_intermediate(active, s->commit_top_bs, base,
+                                     s->backing_file_str);
+    } else if (overlay_bs) {
+        /* XXX Can (or should) we somehow keep 'consistent read' blocked even
+         * after the failed/cancelled commit job is gone? If we already wrote
+         * something to base, the intermediate images aren't valid any more. */
+        remove_commit_top_bs = true;
    }

    /* restore base open flags here if appropriate (e.g., change the base back
@@ -102,9 +115,15 @@ static void commit_complete(BlockJob *job, void *opaque)
    }
    g_free(s->backing_file_str);
    blk_unref(s->top);
-    blk_unref(s->base);
    block_job_completed(&s->common, ret);
    g_free(data);
+
+    /* If bdrv_drop_intermediate() didn't already do that, remove the commit
+     * filter driver from the backing chain. Do this as the final step so that
+     * the 'consistent read' permission can be granted.  */
+    if (remove_commit_top_bs) {
+        bdrv_set_backing_hd(overlay_bs, top, &error_abort);
+    }
 }

 static void coroutine_fn commit_run(void *opaque)
@@ -208,10 +227,57 @@ static const BlockJobDriver commit_job_driver = {
    .start         = commit_run,
 };

+static int coroutine_fn bdrv_commit_top_preadv(BlockDriverState *bs,
+    uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags)
+{
+    return bdrv_co_preadv(bs->backing, offset, bytes, qiov, flags);
+}
+
+static int64_t coroutine_fn bdrv_commit_top_get_block_status(
+    BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum,
+    BlockDriverState **file)
+{
+    *pnum = nb_sectors;
+    *file = bs->backing->bs;
+    return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID | BDRV_BLOCK_DATA |
+           (sector_num << BDRV_SECTOR_BITS);
+}
+
+static void bdrv_commit_top_refresh_filename(BlockDriverState *bs, QDict *opts)
+{
+    bdrv_refresh_filename(bs->backing->bs);
+    pstrcpy(bs->exact_filename, sizeof(bs->exact_filename),
+            bs->backing->bs->filename);
+}
+
+static void bdrv_commit_top_close(BlockDriverState *bs)
+{
+}
+
+static void bdrv_commit_top_child_perm(BlockDriverState *bs, BdrvChild *c,
+                                       const BdrvChildRole *role,
+                                       uint64_t perm, uint64_t shared,
+                                       uint64_t *nperm, uint64_t *nshared)
+{
+    *nperm = 0;
+    *nshared = BLK_PERM_ALL;
+}
+
+/* Dummy node that provides consistent read to its users without requiring it
+ * from its backing file and that allows writes on the backing file chain. */
+static BlockDriver bdrv_commit_top = {
+    .format_name                = "commit_top",
+    .bdrv_co_preadv             = bdrv_commit_top_preadv,
+    .bdrv_co_get_block_status   = bdrv_commit_top_get_block_status,
+    .bdrv_refresh_filename      = bdrv_commit_top_refresh_filename,
+    .bdrv_close                 = bdrv_commit_top_close,
+    .bdrv_child_perm            = bdrv_commit_top_child_perm,
+};
+
 void commit_start(const char *job_id, BlockDriverState *bs,
                  BlockDriverState *base, BlockDriverState *top, int64_t speed,
                  BlockdevOnError on_error, const char *backing_file_str,
-                  Error **errp)
+                  const char *filter_node_name, Error **errp)
 {
    CommitBlockJob *s;
    BlockReopenQueue *reopen_queue = NULL;
@@ -219,7 +285,9 @@ void commit_start(const char *job_id, BlockDriverState *bs,
    int orig_base_flags;
    BlockDriverState *iter;
    BlockDriverState *overlay_bs;
+    BlockDriverState *commit_top_bs = NULL;
    Error *local_err = NULL;
+    int ret;

    assert(top != bs);
    if (top == base) {
@@ -234,8 +302,8 @@ void commit_start(const char *job_id, BlockDriverState *bs,
        return;
    }

-    s = block_job_create(job_id, &commit_job_driver, bs, speed,
-                         BLOCK_JOB_DEFAULT, NULL, NULL, errp);
+    s = block_job_create(job_id, &commit_job_driver, bs, 0, BLK_PERM_ALL,
+                         speed, BLOCK_JOB_DEFAULT, NULL, NULL, errp);
    if (!s) {
        return;
    }
@@ -256,30 +324,82 @@ void commit_start(const char *job_id, BlockDriverState *bs,
        bdrv_reopen_multiple(bdrv_get_aio_context(bs), reopen_queue, &local_err);
        if (local_err != NULL) {
            error_propagate(errp, local_err);
-            block_job_unref(&s->common);
-            return;
+            goto fail;
        }
    }

+    /* Insert commit_top block node above top, so we can block consistent read
+     * on the backing chain below it */
+    commit_top_bs = bdrv_new_open_driver(&bdrv_commit_top, filter_node_name, 0,
+                                         errp);
+    if (commit_top_bs == NULL) {
+        goto fail;
+    }
+
+    bdrv_set_backing_hd(commit_top_bs, top, &local_err);
+    if (local_err) {
+        bdrv_unref(commit_top_bs);
+        commit_top_bs = NULL;
+        error_propagate(errp, local_err);
+        goto fail;
+    }
+    bdrv_set_backing_hd(overlay_bs, commit_top_bs, &local_err);
+    if (local_err) {
+        bdrv_unref(commit_top_bs);
+        commit_top_bs = NULL;
+        error_propagate(errp, local_err);
+        goto fail;
+    }
+
+    s->commit_top_bs = commit_top_bs;
+    bdrv_unref(commit_top_bs);

    /* Block all nodes between top and base, because they will
     * disappear from the chain after this operation. */
    assert(bdrv_chain_contains(top, base));
-    for (iter = top; iter != backing_bs(base); iter = backing_bs(iter)) {
-        block_job_add_bdrv(&s->common, iter);
+    for (iter = top; iter != base; iter = backing_bs(iter)) {
+        /* XXX BLK_PERM_WRITE needs to be allowed so we don't block ourselves
+         * at s->base (if writes are blocked for a node, they are also blocked
+         * for its backing file). The other options would be a second filter
+         * driver above s->base. */
+        ret = block_job_add_bdrv(&s->common, "intermediate node", iter, 0,
+                                 BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE,
+                                 errp);
+        if (ret < 0) {
+            goto fail;
+        }
    }
+
+    ret = block_job_add_bdrv(&s->common, "base", base, 0, BLK_PERM_ALL, errp);
+    if (ret < 0) {
+        goto fail;
+    }
+
    /* overlay_bs must be blocked because it needs to be modified to
-     * update the backing image string, but if it's the root node then
-     * don't block it again */
-    if (bs != overlay_bs) {
-        block_job_add_bdrv(&s->common, overlay_bs);
+     * update the backing image string. */
+    ret = block_job_add_bdrv(&s->common, "overlay of top", overlay_bs,
+                             BLK_PERM_GRAPH_MOD, BLK_PERM_ALL, errp);
+    if (ret < 0) {
+        goto fail;
    }

-    s->base = blk_new();
-    blk_insert_bs(s->base, base);
+    s->base = blk_new(BLK_PERM_CONSISTENT_READ
+                      | BLK_PERM_WRITE
+                      | BLK_PERM_RESIZE,
+                      BLK_PERM_CONSISTENT_READ
+                      | BLK_PERM_GRAPH_MOD
+                      | BLK_PERM_WRITE_UNCHANGED);
+    ret = blk_insert_bs(s->base, base, errp);
+    if (ret < 0) {
+        goto fail;
+    }

-    s->top = blk_new();
-    blk_insert_bs(s->top, top);
+    /* Required permissions are already taken with block_job_add_bdrv() */
+    s->top = blk_new(0, BLK_PERM_ALL);
+    ret = blk_insert_bs(s->top, top, errp);
+    if (ret < 0) {
+        goto fail;
+    }

    s->active = bs;

@@ -292,6 +412,19 @@ void commit_start(const char *job_id, BlockDriverState *bs,

    trace_commit_start(bs, base, top, s);
    block_job_start(&s->common);
+    return;
+
+fail:
+    if (s->base) {
+        blk_unref(s->base);
+    }
+    if (s->top) {
+        blk_unref(s->top);
+    }
+    if (commit_top_bs) {
+        bdrv_set_backing_hd(overlay_bs, top, &error_abort);
+    }
+    block_job_unref(&s->common);
 }


@@ -301,11 +434,14 @@ void commit_start(const char *job_id, BlockDriverState *bs,
 int bdrv_commit(BlockDriverState *bs)
 {
    BlockBackend *src, *backing;
+    BlockDriverState *backing_file_bs = NULL;
+    BlockDriverState *commit_top_bs = NULL;
    BlockDriver *drv = bs->drv;
    int64_t sector, total_sectors, length, backing_length;
    int n, ro, open_flags;
    int ret = 0;
    uint8_t *buf = NULL;
+    Error *local_err = NULL;

    if (!drv)
        return -ENOMEDIUM;
@@ -328,11 +464,33 @@ int bdrv_commit(BlockDriverState *bs)
        }
    }

-    src = blk_new();
-    blk_insert_bs(src, bs);
+    src = blk_new(BLK_PERM_CONSISTENT_READ, BLK_PERM_ALL);
+    backing = blk_new(BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL);

-    backing = blk_new();
-    blk_insert_bs(backing, bs->backing->bs);
+    ret = blk_insert_bs(src, bs, &local_err);
+    if (ret < 0) {
+        error_report_err(local_err);
+        goto ro_cleanup;
+    }
+
+    /* Insert commit_top block node above backing, so we can write to it */
+    backing_file_bs = backing_bs(bs);
+
+    commit_top_bs = bdrv_new_open_driver(&bdrv_commit_top, NULL, BDRV_O_RDWR,
+                                         &local_err);
+    if (commit_top_bs == NULL) {
+        error_report_err(local_err);
+        goto ro_cleanup;
+    }
+
+    bdrv_set_backing_hd(commit_top_bs, backing_file_bs, &error_abort);
+    bdrv_set_backing_hd(bs, commit_top_bs, &error_abort);
+
+    ret = blk_insert_bs(backing, backing_file_bs, &local_err);
+    if (ret < 0) {
+        error_report_err(local_err);
+        goto ro_cleanup;
+    }

    length = blk_getlength(src);
    if (length < 0) {
@@ -404,8 +562,12 @@ int bdrv_commit(BlockDriverState *bs)
 ro_cleanup:
    qemu_vfree(buf);

-    blk_unref(src);
    blk_unref(backing);
+    if (backing_file_bs) {
+        bdrv_set_backing_hd(bs, backing_file_bs, &error_abort);
+    }
+    bdrv_unref(commit_top_bs);
+    blk_unref(src);

    if (ro) {
        /* ignoring error return here */
--- a/block/crypto.c
+++ b/block/crypto.c
@@ -300,6 +300,12 @@ static int block_crypto_open_generic(QCryptoBlockFormat format,
    QCryptoBlockOpenOptions *open_opts = NULL;
    unsigned int cflags = 0;

+    bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
+                               false, errp);
+    if (!bs->file) {
+        return -EINVAL;
+    }
+
    opts = qemu_opts_create(opts_spec, NULL, 0, &error_abort);
    qemu_opts_absorb_qdict(opts, options, &local_err);
    if (local_err) {
@@ -383,7 +389,7 @@ static int block_crypto_truncate(BlockDriverState *bs, int64_t offset)

    offset += payload_offset;

-    return bdrv_truncate(bs->file->bs, offset);
+    return bdrv_truncate(bs->file, offset);
 }

 static void block_crypto_close(BlockDriverState *bs)
@@ -622,6 +628,7 @@ BlockDriver bdrv_crypto_luks = {
    .bdrv_probe         = block_crypto_probe_luks,
    .bdrv_open          = block_crypto_open_luks,
    .bdrv_close         = block_crypto_close,
+    .bdrv_child_perm    = bdrv_format_default_perms,
    .bdrv_create        = block_crypto_create_luks,
    .bdrv_truncate      = block_crypto_truncate,
    .create_opts        = &block_crypto_create_opts_luks,
--- a/block/curl.c
+++ b/block/curl.c
@@ -135,6 +135,7 @@ typedef struct BDRVCURLState {
    char *cookie;
    bool accept_range;
    AioContext *aio_context;
+    QemuMutex mutex;
    char *username;
    char *password;
    char *proxyusername;
@@ -333,6 +334,7 @@ static int curl_find_buf(BDRVCURLState *s, size_t start, size_t len,
    return FIND_RET_NONE;
 }

+/* Called with s->mutex held.  */
 static void curl_multi_check_completion(BDRVCURLState *s)
 {
    int msgs_in_queue;
@@ -374,7 +376,9 @@ static void curl_multi_check_completion(BDRVCURLState *s)
                        continue;
                    }

-                    acb->common.cb(acb->common.opaque, -EPROTO);
+                    qemu_mutex_unlock(&s->mutex);
+                    acb->common.cb(acb->common.opaque, -EIO);
+                    qemu_mutex_lock(&s->mutex);
                    qemu_aio_unref(acb);
                    state->acb[i] = NULL;
                }
@@ -386,9 +390,9 @@ static void curl_multi_check_completion(BDRVCURLState *s)
    }
 }

-static void curl_multi_do(void *arg)
+/* Called with s->mutex held.  */
+static void curl_multi_do_locked(CURLState *s)
 {
-    CURLState *s = (CURLState *)arg;
    CURLSocket *socket, *next_socket;
    int running;
    int r;
@@ -406,12 +410,23 @@ static void curl_multi_do(void *arg)
    }
 }

+static void curl_multi_do(void *arg)
+{
+    CURLState *s = (CURLState *)arg;
+
+    qemu_mutex_lock(&s->s->mutex);
+    curl_multi_do_locked(s);
+    qemu_mutex_unlock(&s->s->mutex);
+}
+
 static void curl_multi_read(void *arg)
 {
    CURLState *s = (CURLState *)arg;

-    curl_multi_do(arg);
+    qemu_mutex_lock(&s->s->mutex);
+    curl_multi_do_locked(s);
    curl_multi_check_completion(s->s);
+    qemu_mutex_unlock(&s->s->mutex);
 }

 static void curl_multi_timeout_do(void *arg)
@@ -424,9 +439,11 @@ static void curl_multi_timeout_do(void *arg)
        return;
    }

+    qemu_mutex_lock(&s->mutex);
    curl_multi_socket_action(s->multi, CURL_SOCKET_TIMEOUT, 0, &running);

    curl_multi_check_completion(s);
+    qemu_mutex_unlock(&s->mutex);
 #else
    abort();
 #endif
@@ -642,6 +659,7 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags,
    const char *cookie;
    double d;
    const char *secretid;
+    const char *protocol_delimiter;

    static int inited = 0;

@@ -683,6 +701,15 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags,
        goto out_noclean;
    }

+    if (!strstart(file, bs->drv->protocol_name, &protocol_delimiter) ||
+        !strstart(protocol_delimiter, "://", NULL))
+    {
+        error_setg(errp, "%s curl driver cannot handle the URL '%s' (does not "
+                   "start with '%s://')", bs->drv->protocol_name, file,
+                   bs->drv->protocol_name);
+        goto out_noclean;
+    }
+
    s->username = g_strdup(qemu_opt_get(opts, CURL_BLOCK_OPT_USERNAME));
    secretid = qemu_opt_get(opts, CURL_BLOCK_OPT_PASSWORD_SECRET);

@@ -759,6 +786,7 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags,
    curl_easy_cleanup(state->curl);
    state->curl = NULL;

+    qemu_mutex_init(&s->mutex);
    curl_attach_aio_context(bs, bdrv_get_aio_context(bs));

    qemu_opts_del(opts);
@@ -784,13 +812,17 @@ static void curl_readv_bh_cb(void *p)
 {
    CURLState *state;
    int running;
+    int ret = -EINPROGRESS;

    CURLAIOCB *acb = p;
-    BDRVCURLState *s = acb->common.bs->opaque;
+    BlockDriverState *bs = acb->common.bs;
+    BDRVCURLState *s = bs->opaque;

    size_t start = acb->sector_num * BDRV_SECTOR_SIZE;
    size_t end;

+    qemu_mutex_lock(&s->mutex);
+
    // In case we have the requested data already (e.g. read-ahead),
    // we can just call the callback and be done.
    switch (curl_find_buf(s, start, acb->nb_sectors * BDRV_SECTOR_SIZE, acb)) {
@@ -798,7 +830,7 @@ static void curl_readv_bh_cb(void *p)
            qemu_aio_unref(acb);
            // fall through
        case FIND_RET_WAIT:
-            return;
+            goto out;
        default:
            break;
    }
@@ -806,9 +838,8 @@ static void curl_readv_bh_cb(void *p)
    // No cache found, so let's start a new request
    state = curl_init_state(acb->common.bs, s);
    if (!state) {
-        acb->common.cb(acb->common.opaque, -EIO);
-        qemu_aio_unref(acb);
-        return;
+        ret = -EIO;
+        goto out;
    }

    acb->start = 0;
@@ -822,9 +853,8 @@ static void curl_readv_bh_cb(void *p)
    state->orig_buf = g_try_malloc(state->buf_len);
    if (state->buf_len && state->orig_buf == NULL) {
        curl_clean_state(state);
-        acb->common.cb(acb->common.opaque, -ENOMEM);
-        qemu_aio_unref(acb);
-        return;
+        ret = -ENOMEM;
+        goto out;
    }
    state->acb[0] = acb;

@@ -837,6 +867,13 @@ static void curl_readv_bh_cb(void *p)

    /* Tell curl it needs to kick things off */
    curl_multi_socket_action(s->multi, CURL_SOCKET_TIMEOUT, 0, &running);
+
+out:
+    qemu_mutex_unlock(&s->mutex);
+    if (ret != -EINPROGRESS) {
+        acb->common.cb(acb->common.opaque, ret);
+        qemu_aio_unref(acb);
+    }
 }

 static BlockAIOCB *curl_aio_readv(BlockDriverState *bs,
@@ -861,6 +898,7 @@ static void curl_close(BlockDriverState *bs)

    DPRINTF("CURL: Close\n");
    curl_detach_aio_context(bs);
+    qemu_mutex_destroy(&s->mutex);

    g_free(s->cookie);
    g_free(s->url);
--- a/block/dmg.c
+++ b/block/dmg.c
@@ -413,6 +413,12 @@ static int dmg_open(BlockDriverState *bs, QDict *options, int flags,
    int64_t offset;
    int ret;

+    bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
+                               false, errp);
+    if (!bs->file) {
+        return -EINVAL;
+    }
+
    block_module_load_one("dmg-bz2");
    bs->read_only = true;

@@ -691,6 +697,7 @@ static BlockDriver bdrv_dmg = {
    .bdrv_probe     = dmg_probe,
    .bdrv_open      = dmg_open,
    .bdrv_refresh_limits = dmg_refresh_limits,
+    .bdrv_child_perm     = bdrv_format_default_perms,
    .bdrv_co_preadv = dmg_co_preadv,
    .bdrv_close     = dmg_close,
 };
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -144,6 +144,7 @@ typedef struct BDRVRawState {
    bool has_write_zeroes:1;
    bool discard_zeroes:1;
    bool use_linux_aio:1;
+    bool page_cache_inconsistent:1;
    bool has_fallocate;
    bool needs_alignment;
 } BDRVRawState;
@@ -219,28 +220,28 @@ static int probe_logical_blocksize(int fd, unsigned int *sector_size_p)
 {
    unsigned int sector_size;
    bool success = false;
+    int i;

    errno = ENOTSUP;
-
-    /* Try a few ioctls to get the right size */
+    static const unsigned long ioctl_list[] = {
 #ifdef BLKSSZGET
-    if (ioctl(fd, BLKSSZGET, &sector_size) >= 0) {
-        *sector_size_p = sector_size;
-        success = true;
-    }
+        BLKSSZGET,
 #endif
 #ifdef DKIOCGETBLOCKSIZE
-    if (ioctl(fd, DKIOCGETBLOCKSIZE, &sector_size) >= 0) {
-        *sector_size_p = sector_size;
-        success = true;
-    }
+        DKIOCGETBLOCKSIZE,
 #endif
 #ifdef DIOCGSECTORSIZE
-    if (ioctl(fd, DIOCGSECTORSIZE, &sector_size) >= 0) {
-        *sector_size_p = sector_size;
-        success = true;
-    }
+        DIOCGSECTORSIZE,
 #endif
+    };
+
+    /* Try a few ioctls to get the right size */
+    for (i = 0; i < (int)ARRAY_SIZE(ioctl_list); i++) {
+        if (ioctl(fd, ioctl_list[i], &sector_size) >= 0) {
+            *sector_size_p = sector_size;
+            success = true;
+        }
+    }

    return success ? 0 : -errno;
 }
@@ -668,6 +669,51 @@ static int hdev_get_max_transfer_length(BlockDriverState *bs, int fd)
 #endif
 }

+static int hdev_get_max_segments(const struct stat *st)
+{
+#ifdef CONFIG_LINUX
+    char buf[32];
+    const char *end;
+    char *sysfspath;
+    int ret;
+    int fd = -1;
+    long max_segments;
+
+    sysfspath = g_strdup_printf("/sys/dev/block/%u:%u/queue/max_segments",
+                                major(st->st_rdev), minor(st->st_rdev));
+    fd = open(sysfspath, O_RDONLY);
+    if (fd == -1) {
+        ret = -errno;
+        goto out;
+    }
+    do {
+        ret = read(fd, buf, sizeof(buf) - 1);
+    } while (ret == -1 && errno == EINTR);
+    if (ret < 0) {
+        ret = -errno;
+        goto out;
+    } else if (ret == 0) {
+        ret = -EIO;
+        goto out;
+    }
+    buf[ret] = 0;
+    /* The file is ended with '\n', pass 'end' to accept that. */
+    ret = qemu_strtol(buf, &end, 10, &max_segments);
+    if (ret == 0 && end && *end == '\n') {
+        ret = max_segments;
+    }
+
+out:
+    if (fd != -1) {
+        close(fd);
+    }
+    g_free(sysfspath);
+    return ret;
+#else
+    return -ENOTSUP;
+#endif
+}
+
 static void raw_refresh_limits(BlockDriverState *bs, Error **errp)
 {
    BDRVRawState *s = bs->opaque;
@@ -679,6 +725,11 @@ static void raw_refresh_limits(BlockDriverState *bs, Error **errp)
            if (ret > 0 && ret <= BDRV_REQUEST_MAX_BYTES) {
                bs->bl.max_transfer = pow2floor(ret);
            }
+            ret = hdev_get_max_segments(&st);
+            if (ret > 0) {
+                bs->bl.max_transfer = MIN(bs->bl.max_transfer,
+                                          ret * getpagesize());
+            }
        }
    }

@@ -774,10 +825,31 @@ static ssize_t handle_aiocb_ioctl(RawPosixAIOData *aiocb)

 static ssize_t handle_aiocb_flush(RawPosixAIOData *aiocb)
 {
+    BDRVRawState *s = aiocb->bs->opaque;
    int ret;

+    if (s->page_cache_inconsistent) {
+        return -EIO;
+    }
+
    ret = qemu_fdatasync(aiocb->aio_fildes);
    if (ret == -1) {
+        /* There is no clear definition of the semantics of a failing fsync(),
+         * so we may have to assume the worst. The sad truth is that this
+         * assumption is correct for Linux. Some pages are now probably marked
+         * clean in the page cache even though they are inconsistent with the
+         * on-disk contents. The next fdatasync() call would succeed, but no
+         * further writeback attempt will be made. We can't get back to a state
+         * in which we know what is on disk (we would have to rewrite
+         * everything that was touched since the last fdatasync() at least), so
+         * make bdrv_flush() fail permanently. Given that the behaviour isn't
+         * really defined, I have little hope that other OSes are doing better.
+         *
+         * Obviously, this doesn't affect O_DIRECT, which bypasses the page
+         * cache. */
+        if ((s->open_flags & O_DIRECT) == 0) {
+            s->page_cache_inconsistent = true;
+        }
        return -errno;
    }
    return 0;
@@ -1591,18 +1663,17 @@ static int raw_create(const char *filename, QemuOpts *opts, Error **errp)
 #endif
    }

-    if (ftruncate(fd, total_size) != 0) {
-        result = -errno;
-        error_setg_errno(errp, -result, "Could not resize file");
-        goto out_close;
-    }
-
    switch (prealloc) {
 #ifdef CONFIG_POSIX_FALLOCATE
    case PREALLOC_MODE_FALLOC:
-        /* posix_fallocate() doesn't set errno. */
+        /*
+         * Truncating before posix_fallocate() makes it about twice slower on
+         * file systems that do not support fallocate(), trying to check if a
+         * block is allocated before allocating it, so don't do that here.
+         */
        result = -posix_fallocate(fd, 0, total_size);
        if (result != 0) {
+            /* posix_fallocate() doesn't set errno. */
            error_setg_errno(errp, -result,
                             "Could not preallocate data for the new file");
        }
@@ -1610,6 +1681,17 @@ static int raw_create(const char *filename, QemuOpts *opts, Error **errp)
 #endif
    case PREALLOC_MODE_FULL:
    {
+        /*
+         * Knowing the final size from the beginning could allow the file
+         * system driver to do less allocations and possibly avoid
+         * fragmentation of the file.
+         */
+        if (ftruncate(fd, total_size) != 0) {
+            result = -errno;
+            error_setg_errno(errp, -result, "Could not resize file");
+            goto out_close;
+        }
+
        int64_t num = 0, left = total_size;
        buf = g_malloc0(65536);

@@ -1636,6 +1718,10 @@ static int raw_create(const char *filename, QemuOpts *opts, Error **errp)
        break;
    }
    case PREALLOC_MODE_OFF:
+        if (ftruncate(fd, total_size) != 0) {
+            result = -errno;
+            error_setg_errno(errp, -result, "Could not resize file");
+        }
        break;
    default:
        result = -EINVAL;
@@ -2107,6 +2193,12 @@ static int hdev_open(BlockDriverState *bs, QDict *options, int flags,
    int ret;

 #if defined(__APPLE__) && defined(__MACH__)
+    /*
+     * Caution: while qdict_get_str() is fine, getting non-string types
+     * would require more care.  When @options come from -blockdev or
+     * blockdev_add, its members are typed according to the QAPI
+     * schema, but when they come from -drive, they're all QString.
+     */
    const char *filename = qdict_get_str(options, "filename");
    char bsd_path[MAXPATHLEN] = "";
    bool error_occurred = false;
--- a/block/gluster.c
+++ b/block/gluster.c
@@ -12,6 +12,7 @@
 #include "block/block_int.h"
 #include "qapi/error.h"
 #include "qapi/qmp/qerror.h"
+#include "qapi/util.h"
 #include "qemu/uri.h"
 #include "qemu/error-report.h"
 #include "qemu/cutils.h"
@@ -151,7 +152,7 @@ static QemuOptsList runtime_type_opts = {
        {
            .name = GLUSTER_OPT_TYPE,
            .type = QEMU_OPT_STRING,
-            .help = "tcp|unix",
+            .help = "inet|unix",
        },
        { /* end of list */ }
    },
@@ -170,14 +171,14 @@ static QemuOptsList runtime_unix_opts = {
    },
 };

-static QemuOptsList runtime_tcp_opts = {
-    .name = "gluster_tcp",
-    .head = QTAILQ_HEAD_INITIALIZER(runtime_tcp_opts.head),
+static QemuOptsList runtime_inet_opts = {
+    .name = "gluster_inet",
+    .head = QTAILQ_HEAD_INITIALIZER(runtime_inet_opts.head),
    .desc = {
        {
            .name = GLUSTER_OPT_TYPE,
            .type = QEMU_OPT_STRING,
-            .help = "tcp|unix",
+            .help = "inet|unix",
        },
        {
            .name = GLUSTER_OPT_HOST,
@@ -320,7 +321,7 @@ static int parse_volume_options(BlockdevOptionsGluster *gconf, char *path)
 static int qemu_gluster_parse_uri(BlockdevOptionsGluster *gconf,
                                  const char *filename)
 {
-    GlusterServer *gsconf;
+    SocketAddressFlat *gsconf;
    URI *uri;
    QueryParams *qp = NULL;
    bool is_unix = false;
@@ -331,19 +332,19 @@ static int qemu_gluster_parse_uri(BlockdevOptionsGluster *gconf,
        return -EINVAL;
    }

-    gconf->server = g_new0(GlusterServerList, 1);
-    gconf->server->value = gsconf = g_new0(GlusterServer, 1);
+    gconf->server = g_new0(SocketAddressFlatList, 1);
+    gconf->server->value = gsconf = g_new0(SocketAddressFlat, 1);

    /* transport */
    if (!uri->scheme || !strcmp(uri->scheme, "gluster")) {
-        gsconf->type = GLUSTER_TRANSPORT_TCP;
+        gsconf->type = SOCKET_ADDRESS_FLAT_TYPE_INET;
    } else if (!strcmp(uri->scheme, "gluster+tcp")) {
-        gsconf->type = GLUSTER_TRANSPORT_TCP;
+        gsconf->type = SOCKET_ADDRESS_FLAT_TYPE_INET;
    } else if (!strcmp(uri->scheme, "gluster+unix")) {
-        gsconf->type = GLUSTER_TRANSPORT_UNIX;
+        gsconf->type = SOCKET_ADDRESS_FLAT_TYPE_UNIX;
        is_unix = true;
    } else if (!strcmp(uri->scheme, "gluster+rdma")) {
-        gsconf->type = GLUSTER_TRANSPORT_TCP;
+        gsconf->type = SOCKET_ADDRESS_FLAT_TYPE_INET;
        error_report("Warning: rdma feature is not supported, falling "
                     "back to tcp");
    } else {
@@ -373,11 +374,11 @@ static int qemu_gluster_parse_uri(BlockdevOptionsGluster *gconf,
        }
        gsconf->u.q_unix.path = g_strdup(qp->p[0].value);
    } else {
-        gsconf->u.tcp.host = g_strdup(uri->server ? uri->server : "localhost");
+        gsconf->u.inet.host = g_strdup(uri->server ? uri->server : "localhost");
        if (uri->port) {
-            gsconf->u.tcp.port = g_strdup_printf("%d", uri->port);
+            gsconf->u.inet.port = g_strdup_printf("%d", uri->port);
        } else {
-            gsconf->u.tcp.port = g_strdup_printf("%d", GLUSTER_DEFAULT_PORT);
+            gsconf->u.inet.port = g_strdup_printf("%d", GLUSTER_DEFAULT_PORT);
        }
    }

@@ -395,7 +396,7 @@ static struct glfs *qemu_gluster_glfs_init(BlockdevOptionsGluster *gconf,
    struct glfs *glfs;
    int ret;
    int old_errno;
-    GlusterServerList *server;
+    SocketAddressFlatList *server;
    unsigned long long port;

    glfs = glfs_find_preopened(gconf->volume);
@@ -411,22 +412,27 @@ static struct glfs *qemu_gluster_glfs_init(BlockdevOptionsGluster *gconf,
    glfs_set_preopened(gconf->volume, glfs);

    for (server = gconf->server; server; server = server->next) {
-        if (server->value->type  == GLUSTER_TRANSPORT_UNIX) {
-            ret = glfs_set_volfile_server(glfs,
-                                   GlusterTransport_lookup[server->value->type],
+        switch (server->value->type) {
+        case SOCKET_ADDRESS_FLAT_TYPE_UNIX:
+            ret = glfs_set_volfile_server(glfs, "unix",
                                   server->value->u.q_unix.path, 0);
-        } else {
-            if (parse_uint_full(server->value->u.tcp.port, &port, 10) < 0 ||
+            break;
+        case SOCKET_ADDRESS_FLAT_TYPE_INET:
+            if (parse_uint_full(server->value->u.inet.port, &port, 10) < 0 ||
                port > 65535) {
                error_setg(errp, "'%s' is not a valid port number",
-                           server->value->u.tcp.port);
+                           server->value->u.inet.port);
                errno = EINVAL;
                goto out;
            }
-            ret = glfs_set_volfile_server(glfs,
-                                   GlusterTransport_lookup[server->value->type],
-                                   server->value->u.tcp.host,
+            ret = glfs_set_volfile_server(glfs, "tcp",
+                                   server->value->u.inet.host,
                                   (int)port);
+            break;
+        case SOCKET_ADDRESS_FLAT_TYPE_VSOCK:
+        case SOCKET_ADDRESS_FLAT_TYPE_FD:
+        default:
+            abort();
        }

        if (ret < 0) {
@@ -444,13 +450,13 @@ static struct glfs *qemu_gluster_glfs_init(BlockdevOptionsGluster *gconf,
        error_setg(errp, "Gluster connection for volume %s, path %s failed"
                         " to connect", gconf->volume, gconf->path);
        for (server = gconf->server; server; server = server->next) {
-            if (server->value->type  == GLUSTER_TRANSPORT_UNIX) {
+            if (server->value->type  == SOCKET_ADDRESS_FLAT_TYPE_UNIX) {
                error_append_hint(errp, "hint: failed on socket %s ",
                                  server->value->u.q_unix.path);
            } else {
                error_append_hint(errp, "hint: failed on host %s and port %s ",
-                                  server->value->u.tcp.host,
-                                  server->value->u.tcp.port);
+                                  server->value->u.inet.host,
+                                  server->value->u.inet.port);
            }
        }

@@ -474,23 +480,6 @@ out:
    return NULL;
 }

-static int qapi_enum_parse(const char *opt)
-{
-    int i;
-
-    if (!opt) {
-        return GLUSTER_TRANSPORT__MAX;
-    }
-
-    for (i = 0; i < GLUSTER_TRANSPORT__MAX; i++) {
-        if (!strcmp(opt, GlusterTransport_lookup[i])) {
-            return i;
-        }
-    }
-
-    return i;
-}
-
 /*
 * Convert the json formatted command line into qapi.
 */
@@ -498,14 +487,14 @@ static int qemu_gluster_parse_json(BlockdevOptionsGluster *gconf,
                                  QDict *options, Error **errp)
 {
    QemuOpts *opts;
-    GlusterServer *gsconf;
-    GlusterServerList *curr = NULL;
+    SocketAddressFlat *gsconf = NULL;
+    SocketAddressFlatList *curr = NULL;
    QDict *backing_options = NULL;
    Error *local_err = NULL;
    char *str = NULL;
    const char *ptr;
    size_t num_servers;
-    int i;
+    int i, type;

    /* create opts info from runtime_json_opts list */
    opts = qemu_opts_create(&runtime_json_opts, NULL, 0, &error_abort);
@@ -547,25 +536,32 @@ static int qemu_gluster_parse_json(BlockdevOptionsGluster *gconf,
        }

        ptr = qemu_opt_get(opts, GLUSTER_OPT_TYPE);
-        gsconf = g_new0(GlusterServer, 1);
-        gsconf->type = qapi_enum_parse(ptr);
        if (!ptr) {
            error_setg(&local_err, QERR_MISSING_PARAMETER, GLUSTER_OPT_TYPE);
            error_append_hint(&local_err, GERR_INDEX_HINT, i);
            goto out;

        }
-        if (gsconf->type == GLUSTER_TRANSPORT__MAX) {
-            error_setg(&local_err, QERR_INVALID_PARAMETER_VALUE,
-                       GLUSTER_OPT_TYPE, "tcp or unix");
+        gsconf = g_new0(SocketAddressFlat, 1);
+        if (!strcmp(ptr, "tcp")) {
+            ptr = "inet";       /* accept legacy "tcp" */
+        }
+        type = qapi_enum_parse(SocketAddressFlatType_lookup, ptr,
+                               SOCKET_ADDRESS_FLAT_TYPE__MAX, -1, NULL);
+        if (type != SOCKET_ADDRESS_FLAT_TYPE_INET
+            && type != SOCKET_ADDRESS_FLAT_TYPE_UNIX) {
+            error_setg(&local_err,
+                       "Parameter '%s' may be 'inet' or 'unix'",
+                       GLUSTER_OPT_TYPE);
            error_append_hint(&local_err, GERR_INDEX_HINT, i);
            goto out;
        }
+        gsconf->type = type;
        qemu_opts_del(opts);

-        if (gsconf->type == GLUSTER_TRANSPORT_TCP) {
-            /* create opts info from runtime_tcp_opts list */
-            opts = qemu_opts_create(&runtime_tcp_opts, NULL, 0, &error_abort);
+        if (gsconf->type == SOCKET_ADDRESS_FLAT_TYPE_INET) {
+            /* create opts info from runtime_inet_opts list */
+            opts = qemu_opts_create(&runtime_inet_opts, NULL, 0, &error_abort);
            qemu_opts_absorb_qdict(opts, backing_options, &local_err);
            if (local_err) {
                goto out;
@@ -578,7 +574,7 @@ static int qemu_gluster_parse_json(BlockdevOptionsGluster *gconf,
                error_append_hint(&local_err, GERR_INDEX_HINT, i);
                goto out;
            }
-            gsconf->u.tcp.host = g_strdup(ptr);
+            gsconf->u.inet.host = g_strdup(ptr);
            ptr = qemu_opt_get(opts, GLUSTER_OPT_PORT);
            if (!ptr) {
                error_setg(&local_err, QERR_MISSING_PARAMETER,
@@ -586,28 +582,28 @@ static int qemu_gluster_parse_json(BlockdevOptionsGluster *gconf,
                error_append_hint(&local_err, GERR_INDEX_HINT, i);
                goto out;
            }
-            gsconf->u.tcp.port = g_strdup(ptr);
+            gsconf->u.inet.port = g_strdup(ptr);

            /* defend for unsupported fields in InetSocketAddress,
             * i.e. @ipv4, @ipv6  and @to
             */
            ptr = qemu_opt_get(opts, GLUSTER_OPT_TO);
            if (ptr) {
-                gsconf->u.tcp.has_to = true;
+                gsconf->u.inet.has_to = true;
            }
            ptr = qemu_opt_get(opts, GLUSTER_OPT_IPV4);
            if (ptr) {
-                gsconf->u.tcp.has_ipv4 = true;
+                gsconf->u.inet.has_ipv4 = true;
            }
            ptr = qemu_opt_get(opts, GLUSTER_OPT_IPV6);
            if (ptr) {
-                gsconf->u.tcp.has_ipv6 = true;
+                gsconf->u.inet.has_ipv6 = true;
            }
-            if (gsconf->u.tcp.has_to) {
+            if (gsconf->u.inet.has_to) {
                error_setg(&local_err, "Parameter 'to' not supported");
                goto out;
            }
-            if (gsconf->u.tcp.has_ipv4 || gsconf->u.tcp.has_ipv6) {
+            if (gsconf->u.inet.has_ipv4 || gsconf->u.inet.has_ipv6) {
                error_setg(&local_err, "Parameters 'ipv4/ipv6' not supported");
                goto out;
            }
@@ -632,16 +628,18 @@ static int qemu_gluster_parse_json(BlockdevOptionsGluster *gconf,
        }

        if (gconf->server == NULL) {
-            gconf->server = g_new0(GlusterServerList, 1);
+            gconf->server = g_new0(SocketAddressFlatList, 1);
            gconf->server->value = gsconf;
            curr = gconf->server;
        } else {
-            curr->next = g_new0(GlusterServerList, 1);
+            curr->next = g_new0(SocketAddressFlatList, 1);
            curr->next->value = gsconf;
            curr = curr->next;
        }
+        gsconf = NULL;

-        qdict_del(backing_options, str);
+        QDECREF(backing_options);
+        backing_options = NULL;
        g_free(str);
        str = NULL;
    }
@@ -650,11 +648,10 @@ static int qemu_gluster_parse_json(BlockdevOptionsGluster *gconf,

 out:
    error_propagate(errp, local_err);
+    qapi_free_SocketAddressFlat(gsconf);
    qemu_opts_del(opts);
-    if (str) {
-        qdict_del(backing_options, str);
-        g_free(str);
-    }
+    g_free(str);
+    QDECREF(backing_options);
    errno = EINVAL;
    return -errno;
 }
@@ -683,7 +680,7 @@ static struct glfs *qemu_gluster_init(BlockdevOptionsGluster *gconf,
                             "file.volume=testvol,file.path=/path/a.qcow2"
                             "[,file.debug=9]"
                             "[,file.logfile=/path/filename.log],"
-                             "file.server.0.type=tcp,"
+                             "file.server.0.type=inet,"
                             "file.server.0.host=1.2.3.4,"
                             "file.server.0.port=24007,"
                             "file.server.1.transport=unix,"
@@ -698,13 +695,6 @@ static struct glfs *qemu_gluster_init(BlockdevOptionsGluster *gconf,
    return qemu_gluster_glfs_init(gconf, errp);
 }

-static void qemu_gluster_complete_aio(void *opaque)
-{
-    GlusterAIOCB *acb = (GlusterAIOCB *)opaque;
-
-    qemu_coroutine_enter(acb->coroutine);
-}
-
 /*
 * AIO callback routine called from GlusterFS thread.
 */
@@ -720,7 +710,7 @@ static void gluster_finish_aiocb(struct glfs_fd *fd, ssize_t ret, void *arg)
        acb->ret = -EIO; /* Partial read/write - fail it */
    }

-    aio_bh_schedule_oneshot(acb->aio_context, qemu_gluster_complete_aio, acb);
+    aio_co_schedule(acb->aio_context, acb->coroutine);
 }

 static void qemu_gluster_parse_flags(int bdrv_flags, int *open_flags)
--- a/block/io.c
+++ b/block/io.c
@@ -189,7 +189,7 @@ static void bdrv_co_drain_bh_cb(void *opaque)
    bdrv_dec_in_flight(bs);
    bdrv_drained_begin(bs);
    data->done = true;
-    qemu_coroutine_enter(co);
+    aio_co_wake(co);
 }

 static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs)
@@ -539,7 +539,7 @@ static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
                 * (instead of producing a deadlock in the former case). */
                if (!req->waiting_for) {
                    self->waiting_for = req;
-                    qemu_co_queue_wait(&req->wait_queue);
+                    qemu_co_queue_wait(&req->wait_queue, NULL);
                    self->waiting_for = NULL;
                    retry = true;
                    waited = true;
@@ -813,7 +813,7 @@ static void bdrv_co_io_em_complete(void *opaque, int ret)
    CoroutineIOCompletion *co = opaque;

    co->ret = ret;
-    qemu_coroutine_enter(co->coroutine);
+    aio_co_wake(co->coroutine);
 }

 static int coroutine_fn bdrv_driver_preadv(BlockDriverState *bs,
@@ -925,9 +925,11 @@ bdrv_driver_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
    return drv->bdrv_co_pwritev_compressed(bs, offset, bytes, qiov);
 }

-static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
+static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child,
        int64_t offset, unsigned int bytes, QEMUIOVector *qiov)
 {
+    BlockDriverState *bs = child->bs;
+
    /* Perform I/O through a temporary buffer so that users who scribble over
     * their read buffer while the operation is in progress do not end up
     * modifying the image file.  This is critical for zero-copy guest I/O
@@ -943,6 +945,8 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
    size_t skip_bytes;
    int ret;

+    assert(child->perm & (BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE));
+
    /* Cover entire cluster so no additional backing file I/O is required when
     * allocating cluster in the image file.
     */
@@ -1001,10 +1005,11 @@ err:
 * handles copy on read, zeroing after EOF, and fragmentation of large
 * reads; any other features must be implemented by the caller.
 */
-static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
+static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child,
    BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
    int64_t align, QEMUIOVector *qiov, int flags)
 {
+    BlockDriverState *bs = child->bs;
    int64_t total_bytes, max_bytes;
    int ret = 0;
    uint64_t bytes_remaining = bytes;
@@ -1050,7 +1055,7 @@ static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
        }

        if (!ret || pnum != nb_sectors) {
-            ret = bdrv_co_do_copy_on_readv(bs, offset, bytes, qiov);
+            ret = bdrv_co_do_copy_on_readv(child, offset, bytes, qiov);
            goto out;
        }
    }
@@ -1158,7 +1163,7 @@ int coroutine_fn bdrv_co_preadv(BdrvChild *child,
    }

    tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_READ);
-    ret = bdrv_aligned_preadv(bs, &req, offset, bytes, align,
+    ret = bdrv_aligned_preadv(child, &req, offset, bytes, align,
                              use_local_qiov ? &local_qiov : qiov,
                              flags);
    tracked_request_end(&req);
@@ -1306,10 +1311,11 @@ fail:
 * Forwards an already correctly aligned write request to the BlockDriver,
 * after possibly fragmenting it.
 */
-static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
+static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child,
    BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
    int64_t align, QEMUIOVector *qiov, int flags)
 {
+    BlockDriverState *bs = child->bs;
    BlockDriver *drv = bs->drv;
    bool waited;
    int ret;
@@ -1332,6 +1338,8 @@ static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
    assert(!waited || !req->serialising);
    assert(req->overlap_offset <= offset);
    assert(offset + bytes <= req->overlap_offset + req->overlap_bytes);
+    assert(child->perm & BLK_PERM_WRITE);
+    assert(end_sector <= bs->total_sectors || child->perm & BLK_PERM_RESIZE);

    ret = notifier_with_return_list_notify(&bs->before_write_notifiers, req);

@@ -1397,12 +1405,13 @@ static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
    return ret;
 }

-static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs,
+static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child,
                                                int64_t offset,
                                                unsigned int bytes,
                                                BdrvRequestFlags flags,
                                                BdrvTrackedRequest *req)
 {
+    BlockDriverState *bs = child->bs;
    uint8_t *buf = NULL;
    QEMUIOVector local_qiov;
    struct iovec iov;
@@ -1430,7 +1439,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs,
        mark_request_serialising(req, align);
        wait_serialising_requests(req);
        bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD);
-        ret = bdrv_aligned_preadv(bs, req, offset & ~(align - 1), align,
+        ret = bdrv_aligned_preadv(child, req, offset & ~(align - 1), align,
                                  align, &local_qiov, 0);
        if (ret < 0) {
            goto fail;
@@ -1438,7 +1447,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs,
        bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);

        memset(buf + head_padding_bytes, 0, zero_bytes);
-        ret = bdrv_aligned_pwritev(bs, req, offset & ~(align - 1), align,
+        ret = bdrv_aligned_pwritev(child, req, offset & ~(align - 1), align,
                                   align, &local_qiov,
                                   flags & ~BDRV_REQ_ZERO_WRITE);
        if (ret < 0) {
@@ -1452,7 +1461,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs,
    if (bytes >= align) {
        /* Write the aligned part in the middle. */
        uint64_t aligned_bytes = bytes & ~(align - 1);
-        ret = bdrv_aligned_pwritev(bs, req, offset, aligned_bytes, align,
+        ret = bdrv_aligned_pwritev(child, req, offset, aligned_bytes, align,
                                   NULL, flags);
        if (ret < 0) {
            goto fail;
@@ -1468,7 +1477,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs,
        mark_request_serialising(req, align);
        wait_serialising_requests(req);
        bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL);
-        ret = bdrv_aligned_preadv(bs, req, offset, align,
+        ret = bdrv_aligned_preadv(child, req, offset, align,
                                  align, &local_qiov, 0);
        if (ret < 0) {
            goto fail;
@@ -1476,7 +1485,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs,
        bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);

        memset(buf, 0, bytes);
-        ret = bdrv_aligned_pwritev(bs, req, offset, align, align,
+        ret = bdrv_aligned_pwritev(child, req, offset, align, align,
                                   &local_qiov, flags & ~BDRV_REQ_ZERO_WRITE);
    }
 fail:
@@ -1523,7 +1532,7 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
    tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_WRITE);

    if (!qiov) {
-        ret = bdrv_co_do_zero_pwritev(bs, offset, bytes, flags, &req);
+        ret = bdrv_co_do_zero_pwritev(child, offset, bytes, flags, &req);
        goto out;
    }

@@ -1542,7 +1551,7 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
        qemu_iovec_init_external(&head_qiov, &head_iov, 1);

        bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD);
-        ret = bdrv_aligned_preadv(bs, &req, offset & ~(align - 1), align,
+        ret = bdrv_aligned_preadv(child, &req, offset & ~(align - 1), align,
                                  align, &head_qiov, 0);
        if (ret < 0) {
            goto fail;
@@ -1584,8 +1593,8 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
        qemu_iovec_init_external(&tail_qiov, &tail_iov, 1);

        bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL);
-        ret = bdrv_aligned_preadv(bs, &req, (offset + bytes) & ~(align - 1), align,
-                                  align, &tail_qiov, 0);
+        ret = bdrv_aligned_preadv(child, &req, (offset + bytes) & ~(align - 1),
+                                  align, align, &tail_qiov, 0);
        if (ret < 0) {
            goto fail;
        }
@@ -1603,7 +1612,7 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
        bytes = ROUND_UP(bytes, align);
    }

-    ret = bdrv_aligned_pwritev(bs, &req, offset, bytes, align,
+    ret = bdrv_aligned_pwritev(child, &req, offset, bytes, align,
                               use_local_qiov ? &local_qiov : qiov,
                               flags);

@@ -1751,7 +1760,7 @@ static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,

    if (ret & BDRV_BLOCK_RAW) {
        assert(ret & BDRV_BLOCK_OFFSET_VALID);
-        ret = bdrv_get_block_status(bs->file->bs, ret >> BDRV_SECTOR_BITS,
+        ret = bdrv_get_block_status(*file, ret >> BDRV_SECTOR_BITS,
                                    *pnum, pnum, file);
        goto out;
    }
@@ -2080,6 +2089,11 @@ void bdrv_aio_cancel(BlockAIOCB *acb)
        if (acb->aiocb_info->get_aio_context) {
            aio_poll(acb->aiocb_info->get_aio_context(acb), true);
        } else if (acb->bs) {
+            /* qemu_aio_ref and qemu_aio_unref are not thread-safe, so
+             * assert that we're not using an I/O thread.  Thread-safe
+             * code should use bdrv_aio_cancel_async exclusively.
+             */
+            assert(bdrv_get_aio_context(acb->bs) == qemu_get_aio_context());
            aio_poll(bdrv_get_aio_context(acb->bs), true);
        } else {
            abort();
@@ -2239,35 +2253,6 @@ BlockAIOCB *bdrv_aio_flush(BlockDriverState *bs,
    return &acb->common;
 }

-void *qemu_aio_get(const AIOCBInfo *aiocb_info, BlockDriverState *bs,
-                   BlockCompletionFunc *cb, void *opaque)
-{
-    BlockAIOCB *acb;
-
-    acb = g_malloc(aiocb_info->aiocb_size);
-    acb->aiocb_info = aiocb_info;
-    acb->bs = bs;
-    acb->cb = cb;
-    acb->opaque = opaque;
-    acb->refcnt = 1;
-    return acb;
-}
-
-void qemu_aio_ref(void *p)
-{
-    BlockAIOCB *acb = p;
-    acb->refcnt++;
-}
-
-void qemu_aio_unref(void *p)
-{
-    BlockAIOCB *acb = p;
-    assert(acb->refcnt > 0);
-    if (--acb->refcnt == 0) {
-        g_free(acb);
-    }
-}
-
 /**************************************************************/
 /* Coroutine block device emulation */

@@ -2299,7 +2284,7 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs)

    /* Wait until any previous flushes are completed */
    while (bs->active_flush_req) {
-        qemu_co_queue_wait(&bs->flush_queue);
+        qemu_co_queue_wait(&bs->flush_queue, NULL);
    }

    bs->active_flush_req = true;
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -58,6 +58,7 @@ typedef struct IscsiLun {
    int events;
    QEMUTimer *nop_timer;
    QEMUTimer *event_timer;
+    QemuMutex mutex;
    struct scsi_inquiry_logical_block_provisioning lbp;
    struct scsi_inquiry_block_limits bl;
    unsigned char *zeroblock;
@@ -102,7 +103,6 @@ typedef struct IscsiTask {

 typedef struct IscsiAIOCB {
    BlockAIOCB common;
-    QEMUIOVector *qiov;
    QEMUBH *bh;
    IscsiLun *iscsilun;
    struct scsi_task *task;
@@ -165,8 +165,9 @@ iscsi_schedule_bh(IscsiAIOCB *acb)
 static void iscsi_co_generic_bh_cb(void *opaque)
 {
    struct IscsiTask *iTask = opaque;
+
    iTask->complete = 1;
-    qemu_coroutine_enter(iTask->co);
+    aio_co_wake(iTask->co);
 }

 static void iscsi_retry_timer_expired(void *opaque)
@@ -174,7 +175,7 @@ static void iscsi_retry_timer_expired(void *opaque)
    struct IscsiTask *iTask = opaque;
    iTask->complete = 1;
    if (iTask->co) {
-        qemu_coroutine_enter(iTask->co);
+        aio_co_wake(iTask->co);
    }
 }

@@ -251,6 +252,7 @@ static int iscsi_translate_sense(struct scsi_sense *sense)
    return ret;
 }

+/* Called (via iscsi_service) with QemuMutex held.  */
 static void
 iscsi_co_generic_cb(struct iscsi_context *iscsi, int status,
                        void *command_data, void *opaque)
@@ -351,6 +353,7 @@ static const AIOCBInfo iscsi_aiocb_info = {
 static void iscsi_process_read(void *arg);
 static void iscsi_process_write(void *arg);

+/* Called with QemuMutex held.  */
 static void
 iscsi_set_events(IscsiLun *iscsilun)
 {
@@ -394,8 +397,10 @@ iscsi_process_read(void *arg)
    IscsiLun *iscsilun = arg;
    struct iscsi_context *iscsi = iscsilun->iscsi;

+    qemu_mutex_lock(&iscsilun->mutex);
    iscsi_service(iscsi, POLLIN);
    iscsi_set_events(iscsilun);
+    qemu_mutex_unlock(&iscsilun->mutex);
 }

 static void
@@ -404,8 +409,10 @@ iscsi_process_write(void *arg)
    IscsiLun *iscsilun = arg;
    struct iscsi_context *iscsi = iscsilun->iscsi;

+    qemu_mutex_lock(&iscsilun->mutex);
    iscsi_service(iscsi, POLLOUT);
    iscsi_set_events(iscsilun);
+    qemu_mutex_unlock(&iscsilun->mutex);
 }

 static int64_t sector_lun2qemu(int64_t sector, IscsiLun *iscsilun)
@@ -584,6 +591,7 @@ iscsi_co_writev_flags(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
    uint64_t lba;
    uint32_t num_sectors;
    bool fua = flags & BDRV_REQ_FUA;
+    int r = 0;

    if (fua) {
        assert(iscsilun->dpofua);
@@ -599,6 +607,7 @@ iscsi_co_writev_flags(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
    lba = sector_qemu2lun(sector_num, iscsilun);
    num_sectors = sector_qemu2lun(nb_sectors, iscsilun);
    iscsi_co_init_iscsitask(iscsilun, &iTask);
+    qemu_mutex_lock(&iscsilun->mutex);
 retry:
    if (iscsilun->use_16_for_rw) {
 #if LIBISCSI_API_VERSION >= (20160603)
@@ -627,6 +636,7 @@ retry:
    }
 #endif
    if (iTask.task == NULL) {
+        qemu_mutex_unlock(&iscsilun->mutex);
        return -ENOMEM;
    }
 #if LIBISCSI_API_VERSION < (20160603)
@@ -635,7 +645,9 @@ retry:
 #endif
    while (!iTask.complete) {
        iscsi_set_events(iscsilun);
+        qemu_mutex_unlock(&iscsilun->mutex);
        qemu_coroutine_yield();
+        qemu_mutex_lock(&iscsilun->mutex);
    }

    if (iTask.task != NULL) {
@@ -650,12 +662,15 @@ retry:

    if (iTask.status != SCSI_STATUS_GOOD) {
        iscsi_allocmap_set_invalid(iscsilun, sector_num, nb_sectors);
-        return iTask.err_code;
+        r = iTask.err_code;
+        goto out_unlock;
    }

    iscsi_allocmap_set_allocated(iscsilun, sector_num, nb_sectors);

-    return 0;
+out_unlock:
+    qemu_mutex_unlock(&iscsilun->mutex);
+    return r;
 }


@@ -688,18 +703,21 @@ static int64_t coroutine_fn iscsi_co_get_block_status(BlockDriverState *bs,
        goto out;
    }

+    qemu_mutex_lock(&iscsilun->mutex);
 retry:
    if (iscsi_get_lba_status_task(iscsilun->iscsi, iscsilun->lun,
                                  sector_qemu2lun(sector_num, iscsilun),
                                  8 + 16, iscsi_co_generic_cb,
                                  &iTask) == NULL) {
        ret = -ENOMEM;
-        goto out;
+        goto out_unlock;
    }

    while (!iTask.complete) {
        iscsi_set_events(iscsilun);
+        qemu_mutex_unlock(&iscsilun->mutex);
        qemu_coroutine_yield();
+        qemu_mutex_lock(&iscsilun->mutex);
    }

    if (iTask.do_retry) {
@@ -716,20 +734,20 @@ retry:
         * because the device is busy or the cmd is not
         * supported) we pretend all blocks are allocated
         * for backwards compatibility */
-        goto out;
+        goto out_unlock;
    }

    lbas = scsi_datain_unmarshall(iTask.task);
    if (lbas == NULL) {
        ret = -EIO;
-        goto out;
+        goto out_unlock;
    }

    lbasd = &lbas->descriptors[0];

    if (sector_qemu2lun(sector_num, iscsilun) != lbasd->lba) {
        ret = -EIO;
-        goto out;
+        goto out_unlock;
    }

    *pnum = sector_lun2qemu(lbasd->num_blocks, iscsilun);
@@ -751,6 +769,8 @@ retry:
    if (*pnum > nb_sectors) {
        *pnum = nb_sectors;
    }
+out_unlock:
+    qemu_mutex_unlock(&iscsilun->mutex);
 out:
    if (iTask.task != NULL) {
        scsi_free_scsi_task(iTask.task);
@@ -813,6 +833,7 @@ static int coroutine_fn iscsi_co_readv(BlockDriverState *bs,
    num_sectors = sector_qemu2lun(nb_sectors, iscsilun);

    iscsi_co_init_iscsitask(iscsilun, &iTask);
+    qemu_mutex_lock(&iscsilun->mutex);
 retry:
    if (iscsilun->use_16_for_rw) {
 #if LIBISCSI_API_VERSION >= (20160603)
@@ -843,6 +864,7 @@ retry:
    }
 #endif
    if (iTask.task == NULL) {
+        qemu_mutex_unlock(&iscsilun->mutex);
        return -ENOMEM;
    }
 #if LIBISCSI_API_VERSION < (20160603)
@@ -850,7 +872,9 @@ retry:
 #endif
    while (!iTask.complete) {
        iscsi_set_events(iscsilun);
+        qemu_mutex_unlock(&iscsilun->mutex);
        qemu_coroutine_yield();
+        qemu_mutex_lock(&iscsilun->mutex);
    }

    if (iTask.task != NULL) {
@@ -862,6 +886,7 @@ retry:
        iTask.complete = 0;
        goto retry;
    }
+    qemu_mutex_unlock(&iscsilun->mutex);

    if (iTask.status != SCSI_STATUS_GOOD) {
        return iTask.err_code;
@@ -876,15 +901,19 @@ static int coroutine_fn iscsi_co_flush(BlockDriverState *bs)
    struct IscsiTask iTask;

    iscsi_co_init_iscsitask(iscsilun, &iTask);
+    qemu_mutex_lock(&iscsilun->mutex);
 retry:
    if (iscsi_synchronizecache10_task(iscsilun->iscsi, iscsilun->lun, 0, 0, 0,
                                      0, iscsi_co_generic_cb, &iTask) == NULL) {
+        qemu_mutex_unlock(&iscsilun->mutex);
        return -ENOMEM;
    }

    while (!iTask.complete) {
        iscsi_set_events(iscsilun);
+        qemu_mutex_unlock(&iscsilun->mutex);
        qemu_coroutine_yield();
+        qemu_mutex_lock(&iscsilun->mutex);
    }

    if (iTask.task != NULL) {
@@ -896,6 +925,7 @@ retry:
        iTask.complete = 0;
        goto retry;
    }
+    qemu_mutex_unlock(&iscsilun->mutex);

    if (iTask.status != SCSI_STATUS_GOOD) {
        return iTask.err_code;
@@ -905,6 +935,7 @@ retry:
 }

 #ifdef __linux__
+/* Called (via iscsi_service) with QemuMutex held.  */
 static void
 iscsi_aio_ioctl_cb(struct iscsi_context *iscsi, int status,
                     void *command_data, void *opaque)
@@ -1029,6 +1060,7 @@ static BlockAIOCB *iscsi_aio_ioctl(BlockDriverState *bs,
    acb->task->expxferlen = acb->ioh->dxfer_len;

    data.size = 0;
+    qemu_mutex_lock(&iscsilun->mutex);
    if (acb->task->xfer_dir == SCSI_XFER_WRITE) {
        if (acb->ioh->iovec_count == 0) {
            data.data = acb->ioh->dxferp;
@@ -1044,6 +1076,7 @@ static BlockAIOCB *iscsi_aio_ioctl(BlockDriverState *bs,
                                 iscsi_aio_ioctl_cb,
                                 (data.size > 0) ? &data : NULL,
                                 acb) != 0) {
+        qemu_mutex_unlock(&iscsilun->mutex);
        scsi_free_scsi_task(acb->task);
        qemu_aio_unref(acb);
        return NULL;
@@ -1063,6 +1096,7 @@ static BlockAIOCB *iscsi_aio_ioctl(BlockDriverState *bs,
    }

    iscsi_set_events(iscsilun);
+    qemu_mutex_unlock(&iscsilun->mutex);

    return &acb->common;
 }
@@ -1087,6 +1121,7 @@ coroutine_fn iscsi_co_pdiscard(BlockDriverState *bs, int64_t offset, int count)
    IscsiLun *iscsilun = bs->opaque;
    struct IscsiTask iTask;
    struct unmap_list list;
+    int r = 0;

    if (!is_byte_request_lun_aligned(offset, count, iscsilun)) {
        return -ENOTSUP;
@@ -1101,15 +1136,19 @@ coroutine_fn iscsi_co_pdiscard(BlockDriverState *bs, int64_t offset, int count)
    list.num = count / iscsilun->block_size;

    iscsi_co_init_iscsitask(iscsilun, &iTask);
+    qemu_mutex_lock(&iscsilun->mutex);
 retry:
    if (iscsi_unmap_task(iscsilun->iscsi, iscsilun->lun, 0, 0, &list, 1,
                         iscsi_co_generic_cb, &iTask) == NULL) {
-        return -ENOMEM;
+        r = -ENOMEM;
+        goto out_unlock;
    }

    while (!iTask.complete) {
        iscsi_set_events(iscsilun);
+        qemu_mutex_unlock(&iscsilun->mutex);
        qemu_coroutine_yield();
+        qemu_mutex_lock(&iscsilun->mutex);
    }

    if (iTask.task != NULL) {
@@ -1126,17 +1165,20 @@ retry:
        /* the target might fail with a check condition if it
           is not happy with the alignment of the UNMAP request
           we silently fail in this case */
-        return 0;
+        goto out_unlock;
    }

    if (iTask.status != SCSI_STATUS_GOOD) {
-        return iTask.err_code;
+        r = iTask.err_code;
+        goto out_unlock;
    }

    iscsi_allocmap_set_invalid(iscsilun, offset >> BDRV_SECTOR_BITS,
                               count >> BDRV_SECTOR_BITS);

-    return 0;
+out_unlock:
+    qemu_mutex_unlock(&iscsilun->mutex);
+    return r;
 }

 static int
@@ -1148,6 +1190,7 @@ coroutine_fn iscsi_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
    uint64_t lba;
    uint32_t nb_blocks;
    bool use_16_for_ws = iscsilun->use_16_for_rw;
+    int r = 0;

    if (!is_byte_request_lun_aligned(offset, count, iscsilun)) {
        return -ENOTSUP;
@@ -1181,6 +1224,7 @@ coroutine_fn iscsi_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
        }
    }

+    qemu_mutex_lock(&iscsilun->mutex);
    iscsi_co_init_iscsitask(iscsilun, &iTask);
 retry:
    if (use_16_for_ws) {
@@ -1195,12 +1239,15 @@ retry:
                                            0, 0, iscsi_co_generic_cb, &iTask);
    }
    if (iTask.task == NULL) {
+        qemu_mutex_unlock(&iscsilun->mutex);
        return -ENOMEM;
    }

    while (!iTask.complete) {
        iscsi_set_events(iscsilun);
+        qemu_mutex_unlock(&iscsilun->mutex);
        qemu_coroutine_yield();
+        qemu_mutex_lock(&iscsilun->mutex);
    }

    if (iTask.status == SCSI_STATUS_CHECK_CONDITION &&
@@ -1210,7 +1257,8 @@ retry:
        /* WRITE SAME is not supported by the target */
        iscsilun->has_write_same = false;
        scsi_free_scsi_task(iTask.task);
-        return -ENOTSUP;
+        r = -ENOTSUP;
+        goto out_unlock;
    }

    if (iTask.task != NULL) {
@@ -1226,7 +1274,8 @@ retry:
    if (iTask.status != SCSI_STATUS_GOOD) {
        iscsi_allocmap_set_invalid(iscsilun, offset >> BDRV_SECTOR_BITS,
                                   count >> BDRV_SECTOR_BITS);
-        return iTask.err_code;
+        r = iTask.err_code;
+        goto out_unlock;
    }

    if (flags & BDRV_REQ_MAY_UNMAP) {
@@ -1237,32 +1286,19 @@ retry:
                                     count >> BDRV_SECTOR_BITS);
    }

-    return 0;
+out_unlock:
+    qemu_mutex_unlock(&iscsilun->mutex);
+    return r;
 }

-static void parse_chap(struct iscsi_context *iscsi, const char *target,
+static void apply_chap(struct iscsi_context *iscsi, QemuOpts *opts,
                       Error **errp)
 {
-    QemuOptsList *list;
-    QemuOpts *opts;
    const char *user = NULL;
    const char *password = NULL;
    const char *secretid;
    char *secret = NULL;

-    list = qemu_find_opts("iscsi");
-    if (!list) {
-        return;
-    }
-
-    opts = qemu_opts_find(list, target);
-    if (opts == NULL) {
-        opts = QTAILQ_FIRST(&list->head);
-        if (!opts) {
-            return;
-        }
-    }
-
    user = qemu_opt_get(opts, "user");
    if (!user) {
        return;
@@ -1293,64 +1329,36 @@ static void parse_chap(struct iscsi_context *iscsi, const char *target,
    g_free(secret);
 }

-static void parse_header_digest(struct iscsi_context *iscsi, const char *target,
+static void apply_header_digest(struct iscsi_context *iscsi, QemuOpts *opts,
                                Error **errp)
 {
-    QemuOptsList *list;
-    QemuOpts *opts;
    const char *digest = NULL;

-    list = qemu_find_opts("iscsi");
-    if (!list) {
-        return;
-    }
-
-    opts = qemu_opts_find(list, target);
-    if (opts == NULL) {
-        opts = QTAILQ_FIRST(&list->head);
-        if (!opts) {
-            return;
-        }
-    }
-
    digest = qemu_opt_get(opts, "header-digest");
    if (!digest) {
-        return;
-    }
-
-    if (!strcmp(digest, "CRC32C")) {
+        iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE_CRC32C);
+    } else if (!strcmp(digest, "crc32c")) {
        iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_CRC32C);
-    } else if (!strcmp(digest, "NONE")) {
+    } else if (!strcmp(digest, "none")) {
        iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE);
-    } else if (!strcmp(digest, "CRC32C-NONE")) {
+    } else if (!strcmp(digest, "crc32c-none")) {
        iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_CRC32C_NONE);
-    } else if (!strcmp(digest, "NONE-CRC32C")) {
+    } else if (!strcmp(digest, "none-crc32c")) {
        iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE_CRC32C);
    } else {
        error_setg(errp, "Invalid header-digest setting : %s", digest);
    }
 }

-static char *parse_initiator_name(const char *target)
+static char *get_initiator_name(QemuOpts *opts)
 {
-    QemuOptsList *list;
-    QemuOpts *opts;
    const char *name;
    char *iscsi_name;
    UuidInfo *uuid_info;

-    list = qemu_find_opts("iscsi");
-    if (list) {
-        opts = qemu_opts_find(list, target);
-        if (!opts) {
-            opts = QTAILQ_FIRST(&list->head);
-        }
-        if (opts) {
-            name = qemu_opt_get(opts, "initiator-name");
-            if (name) {
-                return g_strdup(name);
-            }
-        }
+    name = qemu_opt_get(opts, "initiator-name");
+    if (name) {
+        return g_strdup(name);
    }

    uuid_info = qmp_query_uuid(NULL);
@@ -1365,43 +1373,24 @@ static char *parse_initiator_name(const char *target)
    return iscsi_name;
 }

-static int parse_timeout(const char *target)
-{
-    QemuOptsList *list;
-    QemuOpts *opts;
-    const char *timeout;
-
-    list = qemu_find_opts("iscsi");
-    if (list) {
-        opts = qemu_opts_find(list, target);
-        if (!opts) {
-            opts = QTAILQ_FIRST(&list->head);
-        }
-        if (opts) {
-            timeout = qemu_opt_get(opts, "timeout");
-            if (timeout) {
-                return atoi(timeout);
-            }
-        }
-    }
-
-    return 0;
-}
-
 static void iscsi_nop_timed_event(void *opaque)
 {
    IscsiLun *iscsilun = opaque;

+    qemu_mutex_lock(&iscsilun->mutex);
    if (iscsi_get_nops_in_flight(iscsilun->iscsi) >= MAX_NOP_FAILURES) {
        error_report("iSCSI: NOP timeout. Reconnecting...");
        iscsilun->request_timed_out = true;
    } else if (iscsi_nop_out_async(iscsilun->iscsi, NULL, NULL, 0, NULL) != 0) {
        error_report("iSCSI: failed to sent NOP-Out. Disabling NOP messages.");
-        return;
+        goto out;
    }

    timer_mod(iscsilun->nop_timer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL);
    iscsi_set_events(iscsilun);
+
+out:
+    qemu_mutex_unlock(&iscsilun->mutex);
 }

 static void iscsi_readcapacity_sync(IscsiLun *iscsilun, Error **errp)
@@ -1474,20 +1463,6 @@ static void iscsi_readcapacity_sync(IscsiLun *iscsilun, Error **errp)
    }
 }

-/* TODO Convert to fine grained options */
-static QemuOptsList runtime_opts = {
-    .name = "iscsi",
-    .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
-    .desc = {
-        {
-            .name = "filename",
-            .type = QEMU_OPT_STRING,
-            .help = "URL to the iscsi image",
-        },
-        { /* end of list */ }
-    },
-};
-
 static struct scsi_task *iscsi_do_inquiry(struct iscsi_context *iscsi, int lun,
                                          int evpd, int pc, void **inq, Error **errp)
 {
@@ -1605,24 +1580,178 @@ out:
    }
 }

+static void iscsi_parse_iscsi_option(const char *target, QDict *options)
+{
+    QemuOptsList *list;
+    QemuOpts *opts;
+    const char *user, *password, *password_secret, *initiator_name,
+               *header_digest, *timeout;
+
+    list = qemu_find_opts("iscsi");
+    if (!list) {
+        return;
+    }
+
+    opts = qemu_opts_find(list, target);
+    if (opts == NULL) {
+        opts = QTAILQ_FIRST(&list->head);
+        if (!opts) {
+            return;
+        }
+    }
+
+    user = qemu_opt_get(opts, "user");
+    if (user) {
+        qdict_set_default_str(options, "user", user);
+    }
+
+    password = qemu_opt_get(opts, "password");
+    if (password) {
+        qdict_set_default_str(options, "password", password);
+    }
+
+    password_secret = qemu_opt_get(opts, "password-secret");
+    if (password_secret) {
+        qdict_set_default_str(options, "password-secret", password_secret);
+    }
+
+    initiator_name = qemu_opt_get(opts, "initiator-name");
+    if (initiator_name) {
+        qdict_set_default_str(options, "initiator-name", initiator_name);
+    }
+
+    header_digest = qemu_opt_get(opts, "header-digest");
+    if (header_digest) {
+        /* -iscsi takes upper case values, but QAPI only supports lower case
+         * enum constant names, so we have to convert here. */
+        char *qapi_value = g_ascii_strdown(header_digest, -1);
+        qdict_set_default_str(options, "header-digest", qapi_value);
+        g_free(qapi_value);
+    }
+
+    timeout = qemu_opt_get(opts, "timeout");
+    if (timeout) {
+        qdict_set_default_str(options, "timeout", timeout);
+    }
+}
+
 /*
 * We support iscsi url's on the form
 * iscsi://[<username>%<password>@]<host>[:<port>]/<targetname>/<lun>
 */
+static void iscsi_parse_filename(const char *filename, QDict *options,
+                                 Error **errp)
+{
+    struct iscsi_url *iscsi_url;
+    const char *transport_name;
+    char *lun_str;
+
+    iscsi_url = iscsi_parse_full_url(NULL, filename);
+    if (iscsi_url == NULL) {
+        error_setg(errp, "Failed to parse URL : %s", filename);
+        return;
+    }
+
+#if LIBISCSI_API_VERSION >= (20160603)
+    switch (iscsi_url->transport) {
+    case TCP_TRANSPORT:
+        transport_name = "tcp";
+        break;
+    case ISER_TRANSPORT:
+        transport_name = "iser";
+        break;
+    default:
+        error_setg(errp, "Unknown transport type (%d)",
+                   iscsi_url->transport);
+        return;
+    }
+#else
+    transport_name = "tcp";
+#endif
+
+    qdict_set_default_str(options, "transport", transport_name);
+    qdict_set_default_str(options, "portal", iscsi_url->portal);
+    qdict_set_default_str(options, "target", iscsi_url->target);
+
+    lun_str = g_strdup_printf("%d", iscsi_url->lun);
+    qdict_set_default_str(options, "lun", lun_str);
+    g_free(lun_str);
+
+    /* User/password from -iscsi take precedence over those from the URL */
+    iscsi_parse_iscsi_option(iscsi_url->target, options);
+
+    if (iscsi_url->user[0] != '\0') {
+        qdict_set_default_str(options, "user", iscsi_url->user);
+        qdict_set_default_str(options, "password", iscsi_url->passwd);
+    }
+
+    iscsi_destroy_url(iscsi_url);
+}
+
+static QemuOptsList runtime_opts = {
+    .name = "iscsi",
+    .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
+    .desc = {
+        {
+            .name = "transport",
+            .type = QEMU_OPT_STRING,
+        },
+        {
+            .name = "portal",
+            .type = QEMU_OPT_STRING,
+        },
+        {
+            .name = "target",
+            .type = QEMU_OPT_STRING,
+        },
+        {
+            .name = "user",
+            .type = QEMU_OPT_STRING,
+        },
+        {
+            .name = "password",
+            .type = QEMU_OPT_STRING,
+        },
+        {
+            .name = "password-secret",
+            .type = QEMU_OPT_STRING,
+        },
+        {
+            .name = "lun",
+            .type = QEMU_OPT_NUMBER,
+        },
+        {
+            .name = "initiator-name",
+            .type = QEMU_OPT_STRING,
+        },
+        {
+            .name = "header-digest",
+            .type = QEMU_OPT_STRING,
+        },
+        {
+            .name = "timeout",
+            .type = QEMU_OPT_NUMBER,
+        },
+        { /* end of list */ }
+    },
+};
+
 static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
                      Error **errp)
 {
    IscsiLun *iscsilun = bs->opaque;
    struct iscsi_context *iscsi = NULL;
-    struct iscsi_url *iscsi_url = NULL;
    struct scsi_task *task = NULL;
    struct scsi_inquiry_standard *inq = NULL;
    struct scsi_inquiry_supported_pages *inq_vpd;
    char *initiator_name = NULL;
    QemuOpts *opts;
    Error *local_err = NULL;
-    const char *filename;
-    int i, ret = 0, timeout = 0;
+    const char *transport_name, *portal, *target;
+#if LIBISCSI_API_VERSION >= (20160603)
+    enum iscsi_transport_type transport;
+#endif
+    int i, ret = 0, timeout = 0, lun;

    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
    qemu_opts_absorb_qdict(opts, options, &local_err);
@@ -1632,18 +1761,34 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
        goto out;
    }

-    filename = qemu_opt_get(opts, "filename");
+    transport_name = qemu_opt_get(opts, "transport");
+    portal = qemu_opt_get(opts, "portal");
+    target = qemu_opt_get(opts, "target");
+    lun = qemu_opt_get_number(opts, "lun", 0);

-    iscsi_url = iscsi_parse_full_url(iscsi, filename);
-    if (iscsi_url == NULL) {
-        error_setg(errp, "Failed to parse URL : %s", filename);
+    if (!transport_name || !portal || !target) {
+        error_setg(errp, "Need all of transport, portal and target options");
+        ret = -EINVAL;
+        goto out;
+    }
+
+    if (!strcmp(transport_name, "tcp")) {
+#if LIBISCSI_API_VERSION >= (20160603)
+        transport = TCP_TRANSPORT;
+    } else if (!strcmp(transport_name, "iser")) {
+        transport = ISER_TRANSPORT;
+#else
+        /* TCP is what older libiscsi versions always use */
+#endif
+    } else {
+        error_setg(errp, "Unknown transport: %s", transport_name);
        ret = -EINVAL;
        goto out;
    }

    memset(iscsilun, 0, sizeof(IscsiLun));

-    initiator_name = parse_initiator_name(iscsi_url->target);
+    initiator_name = get_initiator_name(opts);

    iscsi = iscsi_create_context(initiator_name);
    if (iscsi == NULL) {
@@ -1652,30 +1797,20 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
        goto out;
    }
 #if LIBISCSI_API_VERSION >= (20160603)
-    if (iscsi_init_transport(iscsi, iscsi_url->transport)) {
+    if (iscsi_init_transport(iscsi, transport)) {
        error_setg(errp, ("Error initializing transport."));
        ret = -EINVAL;
        goto out;
    }
 #endif
-    if (iscsi_set_targetname(iscsi, iscsi_url->target)) {
+    if (iscsi_set_targetname(iscsi, target)) {
        error_setg(errp, "iSCSI: Failed to set target name.");
        ret = -EINVAL;
        goto out;
    }

-    if (iscsi_url->user[0] != '\0') {
-        ret = iscsi_set_initiator_username_pwd(iscsi, iscsi_url->user,
-                                              iscsi_url->passwd);
-        if (ret != 0) {
-            error_setg(errp, "Failed to set initiator username and password");
-            ret = -EINVAL;
-            goto out;
-        }
-    }
-
    /* check if we got CHAP username/password via the options */
-    parse_chap(iscsi, iscsi_url->target, &local_err);
+    apply_chap(iscsi, opts, &local_err);
    if (local_err != NULL) {
        error_propagate(errp, local_err);
        ret = -EINVAL;
@@ -1688,10 +1823,8 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
        goto out;
    }

-    iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE_CRC32C);
-
    /* check if we got HEADER_DIGEST via the options */
-    parse_header_digest(iscsi, iscsi_url->target, &local_err);
+    apply_header_digest(iscsi, opts, &local_err);
    if (local_err != NULL) {
        error_propagate(errp, local_err);
        ret = -EINVAL;
@@ -1699,7 +1832,7 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
    }

    /* timeout handling is broken in libiscsi before 1.15.0 */
-    timeout = parse_timeout(iscsi_url->target);
+    timeout = qemu_opt_get_number(opts, "timeout", 0);
 #if LIBISCSI_API_VERSION >= 20150621
    iscsi_set_timeout(iscsi, timeout);
 #else
@@ -1708,7 +1841,7 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
    }
 #endif

-    if (iscsi_full_connect_sync(iscsi, iscsi_url->portal, iscsi_url->lun) != 0) {
+    if (iscsi_full_connect_sync(iscsi, portal, lun) != 0) {
        error_setg(errp, "iSCSI: Failed to connect to LUN : %s",
            iscsi_get_error(iscsi));
        ret = -EINVAL;
@@ -1717,7 +1850,7 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,

    iscsilun->iscsi = iscsi;
    iscsilun->aio_context = bdrv_get_aio_context(bs);
-    iscsilun->lun   = iscsi_url->lun;
+    iscsilun->lun = lun;
    iscsilun->has_write_same = true;

    task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 0, 0,
@@ -1803,6 +1936,7 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
    scsi_free_scsi_task(task);
    task = NULL;

+    qemu_mutex_init(&iscsilun->mutex);
    iscsi_attach_aio_context(bs, iscsilun->aio_context);

    /* Guess the internal cluster (page) size of the iscsi target by the means
@@ -1820,9 +1954,6 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
 out:
    qemu_opts_del(opts);
    g_free(initiator_name);
-    if (iscsi_url != NULL) {
-        iscsi_destroy_url(iscsi_url);
-    }
    if (task != NULL) {
        scsi_free_scsi_task(task);
    }
@@ -1851,6 +1982,7 @@ static void iscsi_close(BlockDriverState *bs)
    iscsi_destroy_context(iscsi);
    g_free(iscsilun->zeroblock);
    iscsi_allocmap_free(iscsilun);
+    qemu_mutex_destroy(&iscsilun->mutex);
    memset(iscsilun, 0, sizeof(IscsiLun));
 }

@@ -2031,15 +2163,15 @@ static BlockDriver bdrv_iscsi = {
    .format_name     = "iscsi",
    .protocol_name   = "iscsi",

-    .instance_size   = sizeof(IscsiLun),
-    .bdrv_needs_filename = true,
-    .bdrv_file_open  = iscsi_open,
-    .bdrv_close      = iscsi_close,
-    .bdrv_create     = iscsi_create,
-    .create_opts     = &iscsi_create_opts,
-    .bdrv_reopen_prepare   = iscsi_reopen_prepare,
-    .bdrv_reopen_commit    = iscsi_reopen_commit,
-    .bdrv_invalidate_cache = iscsi_invalidate_cache,
+    .instance_size          = sizeof(IscsiLun),
+    .bdrv_parse_filename    = iscsi_parse_filename,
+    .bdrv_file_open         = iscsi_open,
+    .bdrv_close             = iscsi_close,
+    .bdrv_create            = iscsi_create,
+    .create_opts            = &iscsi_create_opts,
+    .bdrv_reopen_prepare    = iscsi_reopen_prepare,
+    .bdrv_reopen_commit     = iscsi_reopen_commit,
+    .bdrv_invalidate_cache  = iscsi_invalidate_cache,

    .bdrv_getlength  = iscsi_getlength,
    .bdrv_get_info   = iscsi_get_info,
@@ -2066,15 +2198,15 @@ static BlockDriver bdrv_iser = {
    .format_name     = "iser",
    .protocol_name   = "iser",

-    .instance_size   = sizeof(IscsiLun),
-    .bdrv_needs_filename = true,
-    .bdrv_file_open  = iscsi_open,
-    .bdrv_close      = iscsi_close,
-    .bdrv_create     = iscsi_create,
-    .create_opts     = &iscsi_create_opts,
-    .bdrv_reopen_prepare   = iscsi_reopen_prepare,
-    .bdrv_reopen_commit    = iscsi_reopen_commit,
-    .bdrv_invalidate_cache = iscsi_invalidate_cache,
+    .instance_size          = sizeof(IscsiLun),
+    .bdrv_parse_filename    = iscsi_parse_filename,
+    .bdrv_file_open         = iscsi_open,
+    .bdrv_close             = iscsi_close,
+    .bdrv_create            = iscsi_create,
+    .create_opts            = &iscsi_create_opts,
+    .bdrv_reopen_prepare    = iscsi_reopen_prepare,
+    .bdrv_reopen_commit     = iscsi_reopen_commit,
+    .bdrv_invalidate_cache  = iscsi_invalidate_cache,

    .bdrv_getlength  = iscsi_getlength,
    .bdrv_get_info   = iscsi_get_info,
--- a/block/linux-aio.c
+++ b/block/linux-aio.c
@@ -54,10 +54,10 @@ struct LinuxAioState {
    io_context_t ctx;
    EventNotifier e;

-    /* io queue for submit at batch */
+    /* io queue for submit at batch.  Protected by AioContext lock. */
    LaioQueue io_q;

-    /* I/O completion processing */
+    /* I/O completion processing.  Only runs in I/O thread.  */
    QEMUBH *completion_bh;
    int event_idx;
    int event_max;
@@ -100,7 +100,7 @@ static void qemu_laio_process_completion(struct qemu_laiocb *laiocb)
         * that!
         */
        if (!qemu_coroutine_entered(laiocb->co)) {
-            qemu_coroutine_enter(laiocb->co);
+            aio_co_wake(laiocb->co);
        }
    } else {
        laiocb->common.cb(laiocb->common.opaque, ret);
@@ -234,9 +234,12 @@ static void qemu_laio_process_completions(LinuxAioState *s)
 static void qemu_laio_process_completions_and_submit(LinuxAioState *s)
 {
    qemu_laio_process_completions(s);
+
+    aio_context_acquire(s->aio_context);
    if (!s->io_q.plugged && !QSIMPLEQ_EMPTY(&s->io_q.pending)) {
        ioq_submit(s);
    }
+    aio_context_release(s->aio_context);
 }

 static void qemu_laio_completion_bh(void *opaque)
@@ -455,6 +458,7 @@ void laio_detach_aio_context(LinuxAioState *s, AioContext *old_context)
 {
    aio_set_event_notifier(old_context, &s->e, false, NULL, NULL);
    qemu_bh_delete(s->completion_bh);
+    s->aio_context = NULL;
 }

 void laio_attach_aio_context(LinuxAioState *s, AioContext *new_context)
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -12,6 +12,7 @@
 */

 #include "qemu/osdep.h"
+#include "qemu/cutils.h"
 #include "trace.h"
 #include "block/blockjob_int.h"
 #include "block/block_int.h"
@@ -38,7 +39,10 @@ typedef struct MirrorBlockJob {
    BlockJob common;
    RateLimit limit;
    BlockBackend *target;
+    BlockDriverState *mirror_top_bs;
+    BlockDriverState *source;
    BlockDriverState *base;
+
    /* The name of the graph node to replace */
    char *replaces;
    /* The BDS to replace */
@@ -69,6 +73,7 @@ typedef struct MirrorBlockJob {
    bool waiting_for_io;
    int target_cluster_sectors;
    int max_iov;
+    bool initial_zeroing_ongoing;
 } MirrorBlockJob;

 typedef struct MirrorOp {
@@ -117,9 +122,10 @@ static void mirror_iteration_done(MirrorOp *op, int ret)
        if (s->cow_bitmap) {
            bitmap_set(s->cow_bitmap, chunk_num, nb_chunks);
        }
-        s->common.offset += (uint64_t)op->nb_sectors * BDRV_SECTOR_SIZE;
+        if (!s->initial_zeroing_ongoing) {
+            s->common.offset += (uint64_t)op->nb_sectors * BDRV_SECTOR_SIZE;
+        }
    }
-
    qemu_iovec_destroy(&op->qiov);
    g_free(op);

@@ -132,6 +138,8 @@ static void mirror_write_complete(void *opaque, int ret)
 {
    MirrorOp *op = opaque;
    MirrorBlockJob *s = op->s;
+
+    aio_context_acquire(blk_get_aio_context(s->common.blk));
    if (ret < 0) {
        BlockErrorAction action;

@@ -142,12 +150,15 @@ static void mirror_write_complete(void *opaque, int ret)
        }
    }
    mirror_iteration_done(op, ret);
+    aio_context_release(blk_get_aio_context(s->common.blk));
 }

 static void mirror_read_complete(void *opaque, int ret)
 {
    MirrorOp *op = opaque;
    MirrorBlockJob *s = op->s;
+
+    aio_context_acquire(blk_get_aio_context(s->common.blk));
    if (ret < 0) {
        BlockErrorAction action;

@@ -158,10 +169,11 @@ static void mirror_read_complete(void *opaque, int ret)
        }

        mirror_iteration_done(op, ret);
-        return;
+    } else {
+        blk_aio_pwritev(s->target, op->sector_num * BDRV_SECTOR_SIZE, &op->qiov,
+                        0, mirror_write_complete, op);
    }
-    blk_aio_pwritev(s->target, op->sector_num * BDRV_SECTOR_SIZE, &op->qiov,
-                    0, mirror_write_complete, op);
+    aio_context_release(blk_get_aio_context(s->common.blk));
 }

 static inline void mirror_clip_sectors(MirrorBlockJob *s,
@@ -319,7 +331,7 @@ static void mirror_do_zero_or_discard(MirrorBlockJob *s,

 static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
 {
-    BlockDriverState *source = blk_bs(s->common.blk);
+    BlockDriverState *source = s->source;
    int64_t sector_num, first_chunk;
    uint64_t delay_ns = 0;
    /* At least the first dirty chunk is mirrored in one iteration. */
@@ -378,7 +390,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
                            nb_chunks * sectors_per_chunk);
    bitmap_set(s->in_flight_bitmap, sector_num / sectors_per_chunk, nb_chunks);
    while (nb_chunks > 0 && sector_num < end) {
-        int ret;
+        int64_t ret;
        int io_sectors, io_sectors_acct;
        BlockDriverState *file;
        enum MirrorMethod {
@@ -489,12 +501,37 @@ static void mirror_exit(BlockJob *job, void *opaque)
    MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
    MirrorExitData *data = opaque;
    AioContext *replace_aio_context = NULL;
-    BlockDriverState *src = blk_bs(s->common.blk);
+    BlockDriverState *src = s->source;
    BlockDriverState *target_bs = blk_bs(s->target);
+    BlockDriverState *mirror_top_bs = s->mirror_top_bs;
+    Error *local_err = NULL;

    /* Make sure that the source BDS doesn't go away before we called
     * block_job_completed(). */
    bdrv_ref(src);
+    bdrv_ref(mirror_top_bs);
+    bdrv_ref(target_bs);
+
+    /* Remove target parent that still uses BLK_PERM_WRITE/RESIZE before
+     * inserting target_bs at s->to_replace, where we might not be able to get
+     * these permissions. */
+    blk_unref(s->target);
+    s->target = NULL;
+
+    /* We don't access the source any more. Dropping any WRITE/RESIZE is
+     * required before it could become a backing file of target_bs. */
+    bdrv_child_try_set_perm(mirror_top_bs->backing, 0, BLK_PERM_ALL,
+                            &error_abort);
+    if (s->backing_mode == MIRROR_SOURCE_BACKING_CHAIN) {
+        BlockDriverState *backing = s->is_none_mode ? src : s->base;
+        if (backing_bs(target_bs) != backing) {
+            bdrv_set_backing_hd(target_bs, backing, &local_err);
+            if (local_err) {
+                error_report_err(local_err);
+                data->ret = -EPERM;
+            }
+        }
+    }

    if (s->to_replace) {
        replace_aio_context = bdrv_get_aio_context(s->to_replace);
@@ -514,12 +551,12 @@ static void mirror_exit(BlockJob *job, void *opaque)
        /* The mirror job has no requests in flight any more, but we need to
         * drain potential other users of the BDS before changing the graph. */
        bdrv_drained_begin(target_bs);
-        bdrv_replace_in_backing_chain(to_replace, target_bs);
+        bdrv_replace_node(to_replace, target_bs, &local_err);
        bdrv_drained_end(target_bs);
-
-        /* We just changed the BDS the job BB refers to */
-        blk_remove_bs(job->blk);
-        blk_insert_bs(job->blk, src);
+        if (local_err) {
+            error_report_err(local_err);
+            data->ret = -EPERM;
+        }
    }
    if (s->to_replace) {
        bdrv_op_unblock_all(s->to_replace, s->replace_blocker);
@@ -530,11 +567,29 @@ static void mirror_exit(BlockJob *job, void *opaque)
        aio_context_release(replace_aio_context);
    }
    g_free(s->replaces);
-    blk_unref(s->target);
-    s->target = NULL;
+    bdrv_unref(target_bs);
+
+    /* Remove the mirror filter driver from the graph. Before this, get rid of
+     * the blockers on the intermediate nodes so that the resulting state is
+     * valid. Also give up permissions on mirror_top_bs->backing, which might
+     * block the removal. */
+    block_job_remove_all_bdrv(job);
+    bdrv_child_try_set_perm(mirror_top_bs->backing, 0, BLK_PERM_ALL,
+                            &error_abort);
+    bdrv_replace_node(mirror_top_bs, backing_bs(mirror_top_bs), &error_abort);
+
+    /* We just changed the BDS the job BB refers to (with either or both of the
+     * bdrv_replace_node() calls), so switch the BB back so the cleanup does
+     * the right thing. We don't need any permissions any more now. */
+    blk_remove_bs(job->blk);
+    blk_set_perm(job->blk, 0, BLK_PERM_ALL, &error_abort);
+    blk_insert_bs(job->blk, mirror_top_bs, &error_abort);
+
    block_job_completed(&s->common, data->ret);
+
    g_free(data);
    bdrv_drained_end(src);
+    bdrv_unref(mirror_top_bs);
    bdrv_unref(src);
 }

@@ -554,7 +609,7 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)
 {
    int64_t sector_num, end;
    BlockDriverState *base = s->base;
-    BlockDriverState *bs = blk_bs(s->common.blk);
+    BlockDriverState *bs = s->source;
    BlockDriverState *target_bs = blk_bs(s->target);
    int ret, n;

@@ -566,6 +621,7 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)
            return 0;
        }

+        s->initial_zeroing_ongoing = true;
        for (sector_num = 0; sector_num < end; ) {
            int nb_sectors = MIN(end - sector_num,
                QEMU_ALIGN_DOWN(INT_MAX, s->granularity) >> BDRV_SECTOR_BITS);
@@ -573,11 +629,13 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)
            mirror_throttle(s);

            if (block_job_is_cancelled(&s->common)) {
+                s->initial_zeroing_ongoing = false;
                return 0;
            }

            if (s->in_flight >= MAX_IN_FLIGHT) {
-                trace_mirror_yield(s, s->in_flight, s->buf_free_count, -1);
+                trace_mirror_yield(s, UINT64_MAX, s->buf_free_count,
+                                   s->in_flight);
                mirror_wait_for_io(s);
                continue;
            }
@@ -587,6 +645,7 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)
        }

        mirror_wait_for_all_io(s);
+        s->initial_zeroing_ongoing = false;
    }

    /* First part, loop on the sectors and initialize the dirty bitmap.  */
@@ -633,7 +692,7 @@ static void coroutine_fn mirror_run(void *opaque)
 {
    MirrorBlockJob *s = opaque;
    MirrorExitData *data;
-    BlockDriverState *bs = blk_bs(s->common.blk);
+    BlockDriverState *bs = s->source;
    BlockDriverState *target_bs = blk_bs(s->target);
    bool need_drain = true;
    int64_t length;
@@ -651,7 +710,28 @@ static void coroutine_fn mirror_run(void *opaque)
    if (s->bdev_length < 0) {
        ret = s->bdev_length;
        goto immediate_exit;
-    } else if (s->bdev_length == 0) {
+    }
+
+    /* Active commit must resize the base image if its size differs from the
+     * active layer. */
+    if (s->base == blk_bs(s->target)) {
+        int64_t base_length;
+
+        base_length = blk_getlength(s->target);
+        if (base_length < 0) {
+            ret = base_length;
+            goto immediate_exit;
+        }
+
+        if (s->bdev_length > base_length) {
+            ret = blk_truncate(s->target, s->bdev_length);
+            if (ret < 0) {
+                goto immediate_exit;
+            }
+        }
+    }
+
+    if (s->bdev_length == 0) {
        /* Report BLOCK_JOB_READY and wait for complete. */
        block_job_event_ready(&s->common);
        s->synced = true;
@@ -730,7 +810,7 @@ static void coroutine_fn mirror_run(void *opaque)
            s->common.iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
            if (s->in_flight >= MAX_IN_FLIGHT || s->buf_free_count == 0 ||
                (cnt == 0 && s->in_flight > 0)) {
-                trace_mirror_yield(s, s->in_flight, s->buf_free_count, cnt);
+                trace_mirror_yield(s, cnt, s->buf_free_count, s->in_flight);
                mirror_wait_for_io(s);
                continue;
            } else if (cnt != 0) {
@@ -844,9 +924,8 @@ static void mirror_set_speed(BlockJob *job, int64_t speed, Error **errp)
 static void mirror_complete(BlockJob *job, Error **errp)
 {
    MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
-    BlockDriverState *src, *target;
+    BlockDriverState *target;

-    src = blk_bs(job->blk);
    target = blk_bs(s->target);

    if (!s->synced) {
@@ -878,6 +957,10 @@ static void mirror_complete(BlockJob *job, Error **errp)
        replace_aio_context = bdrv_get_aio_context(s->to_replace);
        aio_context_acquire(replace_aio_context);

+        /* TODO Translate this into permission system. Current definition of
+         * GRAPH_MOD would require to request it for the parents; they might
+         * not even be BlockDriverStates, however, so a BdrvChild can't address
+         * them. May need redefinition of GRAPH_MOD. */
        error_setg(&s->replace_blocker,
                   "block device is in use by block-job-complete");
        bdrv_op_block_all(s->to_replace, s->replace_blocker);
@@ -886,13 +969,6 @@ static void mirror_complete(BlockJob *job, Error **errp)
        aio_context_release(replace_aio_context);
    }

-    if (s->backing_mode == MIRROR_SOURCE_BACKING_CHAIN) {
-        BlockDriverState *backing = s->is_none_mode ? src : s->base;
-        if (backing_bs(target) != backing) {
-            bdrv_set_backing_hd(target, backing);
-        }
-    }
-
    s->should_complete = true;
    block_job_enter(&s->common);
 }
@@ -948,6 +1024,85 @@ static const BlockJobDriver commit_active_job_driver = {
    .drain                  = mirror_drain,
 };

+static int coroutine_fn bdrv_mirror_top_preadv(BlockDriverState *bs,
+    uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags)
+{
+    return bdrv_co_preadv(bs->backing, offset, bytes, qiov, flags);
+}
+
+static int coroutine_fn bdrv_mirror_top_pwritev(BlockDriverState *bs,
+    uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags)
+{
+    return bdrv_co_pwritev(bs->backing, offset, bytes, qiov, flags);
+}
+
+static int coroutine_fn bdrv_mirror_top_flush(BlockDriverState *bs)
+{
+    return bdrv_co_flush(bs->backing->bs);
+}
+
+static int64_t coroutine_fn bdrv_mirror_top_get_block_status(
+    BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum,
+    BlockDriverState **file)
+{
+    *pnum = nb_sectors;
+    *file = bs->backing->bs;
+    return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID | BDRV_BLOCK_DATA |
+           (sector_num << BDRV_SECTOR_BITS);
+}
+
+static int coroutine_fn bdrv_mirror_top_pwrite_zeroes(BlockDriverState *bs,
+    int64_t offset, int count, BdrvRequestFlags flags)
+{
+    return bdrv_co_pwrite_zeroes(bs->backing, offset, count, flags);
+}
+
+static int coroutine_fn bdrv_mirror_top_pdiscard(BlockDriverState *bs,
+    int64_t offset, int count)
+{
+    return bdrv_co_pdiscard(bs->backing->bs, offset, count);
+}
+
+static void bdrv_mirror_top_refresh_filename(BlockDriverState *bs, QDict *opts)
+{
+    bdrv_refresh_filename(bs->backing->bs);
+    pstrcpy(bs->exact_filename, sizeof(bs->exact_filename),
+            bs->backing->bs->filename);
+}
+
+static void bdrv_mirror_top_close(BlockDriverState *bs)
+{
+}
+
+static void bdrv_mirror_top_child_perm(BlockDriverState *bs, BdrvChild *c,
+                                       const BdrvChildRole *role,
+                                       uint64_t perm, uint64_t shared,
+                                       uint64_t *nperm, uint64_t *nshared)
+{
+    /* Must be able to forward guest writes to the real image */
+    *nperm = 0;
+    if (perm & BLK_PERM_WRITE) {
+        *nperm |= BLK_PERM_WRITE;
+    }
+
+    *nshared = BLK_PERM_ALL;
+}
+
+/* Dummy node that provides consistent read to its users without requiring it
+ * from its backing file and that allows writes on the backing file chain. */
+static BlockDriver bdrv_mirror_top = {
+    .format_name                = "mirror_top",
+    .bdrv_co_preadv             = bdrv_mirror_top_preadv,
+    .bdrv_co_pwritev            = bdrv_mirror_top_pwritev,
+    .bdrv_co_pwrite_zeroes      = bdrv_mirror_top_pwrite_zeroes,
+    .bdrv_co_pdiscard           = bdrv_mirror_top_pdiscard,
+    .bdrv_co_flush              = bdrv_mirror_top_flush,
+    .bdrv_co_get_block_status   = bdrv_mirror_top_get_block_status,
+    .bdrv_refresh_filename      = bdrv_mirror_top_refresh_filename,
+    .bdrv_close                 = bdrv_mirror_top_close,
+    .bdrv_child_perm            = bdrv_mirror_top_child_perm,
+};
+
 static void mirror_start_job(const char *job_id, BlockDriverState *bs,
                             int creation_flags, BlockDriverState *target,
                             const char *replaces, int64_t speed,
@@ -960,9 +1115,14 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs,
                             void *opaque, Error **errp,
                             const BlockJobDriver *driver,
                             bool is_none_mode, BlockDriverState *base,
-                             bool auto_complete)
+                             bool auto_complete, const char *filter_node_name)
 {
    MirrorBlockJob *s;
+    BlockDriverState *mirror_top_bs;
+    bool target_graph_mod;
+    bool target_is_backing;
+    Error *local_err = NULL;
+    int ret;

    if (granularity == 0) {
        granularity = bdrv_get_default_bitmap_granularity(target);
@@ -979,14 +1139,62 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs,
        buf_size = DEFAULT_MIRROR_BUF_SIZE;
    }

-    s = block_job_create(job_id, driver, bs, speed, creation_flags,
-                         cb, opaque, errp);
-    if (!s) {
+    /* In the case of active commit, add dummy driver to provide consistent
+     * reads on the top, while disabling it in the intermediate nodes, and make
+     * the backing chain writable. */
+    mirror_top_bs = bdrv_new_open_driver(&bdrv_mirror_top, filter_node_name,
+                                         BDRV_O_RDWR, errp);
+    if (mirror_top_bs == NULL) {
+        return;
+    }
+    mirror_top_bs->total_sectors = bs->total_sectors;
+
+    /* bdrv_append takes ownership of the mirror_top_bs reference, need to keep
+     * it alive until block_job_create() even if bs has no parent. */
+    bdrv_ref(mirror_top_bs);
+    bdrv_drained_begin(bs);
+    bdrv_append(mirror_top_bs, bs, &local_err);
+    bdrv_drained_end(bs);
+
+    if (local_err) {
+        bdrv_unref(mirror_top_bs);
+        error_propagate(errp, local_err);
        return;
    }

-    s->target = blk_new();
-    blk_insert_bs(s->target, target);
+    /* Make sure that the source is not resized while the job is running */
+    s = block_job_create(job_id, driver, mirror_top_bs,
+                         BLK_PERM_CONSISTENT_READ,
+                         BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED |
+                         BLK_PERM_WRITE | BLK_PERM_GRAPH_MOD, speed,
+                         creation_flags, cb, opaque, errp);
+    bdrv_unref(mirror_top_bs);
+    if (!s) {
+        goto fail;
+    }
+    s->source = bs;
+    s->mirror_top_bs = mirror_top_bs;
+
+    /* No resize for the target either; while the mirror is still running, a
+     * consistent read isn't necessarily possible. We could possibly allow
+     * writes and graph modifications, though it would likely defeat the
+     * purpose of a mirror, so leave them blocked for now.
+     *
+     * In the case of active commit, things look a bit different, though,
+     * because the target is an already populated backing file in active use.
+     * We can allow anything except resize there.*/
+    target_is_backing = bdrv_chain_contains(bs, target);
+    target_graph_mod = (backing_mode != MIRROR_LEAVE_BACKING_CHAIN);
+    s->target = blk_new(BLK_PERM_WRITE | BLK_PERM_RESIZE |
+                        (target_graph_mod ? BLK_PERM_GRAPH_MOD : 0),
+                        BLK_PERM_WRITE_UNCHANGED |
+                        (target_is_backing ? BLK_PERM_CONSISTENT_READ |
+                                             BLK_PERM_WRITE |
+                                             BLK_PERM_GRAPH_MOD : 0));
+    ret = blk_insert_bs(s->target, target, errp);
+    if (ret < 0) {
+        goto fail;
+    }

    s->replaces = g_strdup(replaces);
    s->on_source_error = on_source_error;
@@ -1003,24 +1211,45 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs,

    s->dirty_bitmap = bdrv_create_dirty_bitmap(bs, granularity, NULL, errp);
    if (!s->dirty_bitmap) {
-        g_free(s->replaces);
-        blk_unref(s->target);
-        block_job_unref(&s->common);
-        return;
+        goto fail;
    }

-    block_job_add_bdrv(&s->common, target);
+    /* Required permissions are already taken with blk_new() */
+    block_job_add_bdrv(&s->common, "target", target, 0, BLK_PERM_ALL,
+                       &error_abort);
+
    /* In commit_active_start() all intermediate nodes disappear, so
     * any jobs in them must be blocked */
-    if (bdrv_chain_contains(bs, target)) {
+    if (target_is_backing) {
        BlockDriverState *iter;
        for (iter = backing_bs(bs); iter != target; iter = backing_bs(iter)) {
-            block_job_add_bdrv(&s->common, iter);
+            /* XXX BLK_PERM_WRITE needs to be allowed so we don't block
+             * ourselves at s->base (if writes are blocked for a node, they are
+             * also blocked for its backing file). The other options would be a
+             * second filter driver above s->base (== target). */
+            ret = block_job_add_bdrv(&s->common, "intermediate node", iter, 0,
+                                     BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE,
+                                     errp);
+            if (ret < 0) {
+                goto fail;
+            }
        }
    }

    trace_mirror_start(bs, s, opaque);
    block_job_start(&s->common);
+    return;
+
+fail:
+    if (s) {
+        g_free(s->replaces);
+        blk_unref(s->target);
+        block_job_unref(&s->common);
+    }
+
+    bdrv_child_try_set_perm(mirror_top_bs->backing, 0, BLK_PERM_ALL,
+                            &error_abort);
+    bdrv_replace_node(mirror_top_bs, backing_bs(mirror_top_bs), &error_abort);
 }

 void mirror_start(const char *job_id, BlockDriverState *bs,
@@ -1029,7 +1258,7 @@ void mirror_start(const char *job_id, BlockDriverState *bs,
                  MirrorSyncMode mode, BlockMirrorBackingMode backing_mode,
                  BlockdevOnError on_source_error,
                  BlockdevOnError on_target_error,
-                  bool unmap, Error **errp)
+                  bool unmap, const char *filter_node_name, Error **errp)
 {
    bool is_none_mode;
    BlockDriverState *base;
@@ -1043,18 +1272,18 @@ void mirror_start(const char *job_id, BlockDriverState *bs,
    mirror_start_job(job_id, bs, BLOCK_JOB_DEFAULT, target, replaces,
                     speed, granularity, buf_size, backing_mode,
                     on_source_error, on_target_error, unmap, NULL, NULL, errp,
-                     &mirror_job_driver, is_none_mode, base, false);
+                     &mirror_job_driver, is_none_mode, base, false,
+                     filter_node_name);
 }

 void commit_active_start(const char *job_id, BlockDriverState *bs,
                         BlockDriverState *base, int creation_flags,
                         int64_t speed, BlockdevOnError on_error,
+                         const char *filter_node_name,
                         BlockCompletionFunc *cb, void *opaque, Error **errp,
                         bool auto_complete)
 {
-    int64_t length, base_length;
    int orig_base_flags;
-    int ret;
    Error *local_err = NULL;

    orig_base_flags = bdrv_get_flags(base);
@@ -1063,35 +1292,11 @@ void commit_active_start(const char *job_id, BlockDriverState *bs,
        return;
    }

-    length = bdrv_getlength(bs);
-    if (length < 0) {
-        error_setg_errno(errp, -length,
-                         "Unable to determine length of %s", bs->filename);
-        goto error_restore_flags;
-    }
-
-    base_length = bdrv_getlength(base);
-    if (base_length < 0) {
-        error_setg_errno(errp, -base_length,
-                         "Unable to determine length of %s", base->filename);
-        goto error_restore_flags;
-    }
-
-    if (length > base_length) {
-        ret = bdrv_truncate(base, length);
-        if (ret < 0) {
-            error_setg_errno(errp, -ret,
-                            "Top image %s is larger than base image %s, and "
-                             "resize of base image failed",
-                             bs->filename, base->filename);
-            goto error_restore_flags;
-        }
-    }
-
    mirror_start_job(job_id, bs, creation_flags, base, NULL, speed, 0, 0,
                     MIRROR_LEAVE_BACKING_CHAIN,
                     on_error, on_error, true, cb, opaque, &local_err,
-                     &commit_active_job_driver, false, base, auto_complete);
+                     &commit_active_job_driver, false, base, auto_complete,
+                     filter_node_name);
    if (local_err) {
        error_propagate(errp, local_err);
        goto error_restore_flags;
--- a/block/nbd-client.c
+++ b/block/nbd-client.c
@@ -39,7 +39,7 @@ static void nbd_recv_coroutines_enter_all(NBDClientSession *s)

    for (i = 0; i < MAX_NBD_REQUESTS; i++) {
        if (s->recv_coroutine[i]) {
-            qemu_coroutine_enter(s->recv_coroutine[i]);
+            aio_co_wake(s->recv_coroutine[i]);
        }
    }
 }
@@ -56,7 +56,7 @@ static void nbd_teardown_connection(BlockDriverState *bs)
    qio_channel_shutdown(client->ioc,
                         QIO_CHANNEL_SHUTDOWN_BOTH,
                         NULL);
-    nbd_recv_coroutines_enter_all(client);
+    BDRV_POLL_WHILE(bs, client->read_reply_co);

    nbd_client_detach_aio_context(bs);
    object_unref(OBJECT(client->sioc));
@@ -65,54 +65,45 @@ static void nbd_teardown_connection(BlockDriverState *bs)
    client->ioc = NULL;
 }

-static void nbd_reply_ready(void *opaque)
+static coroutine_fn void nbd_read_reply_entry(void *opaque)
 {
-    BlockDriverState *bs = opaque;
-    NBDClientSession *s = nbd_get_client_session(bs);
+    NBDClientSession *s = opaque;
    uint64_t i;
    int ret;

-    if (!s->ioc) { /* Already closed */
-        return;
-    }
-
-    if (s->reply.handle == 0) {
-        /* No reply already in flight.  Fetch a header.  It is possible
-         * that another thread has done the same thing in parallel, so
-         * the socket is not readable anymore.
-         */
+    for (;;) {
+        assert(s->reply.handle == 0);
        ret = nbd_receive_reply(s->ioc, &s->reply);
-        if (ret == -EAGAIN) {
-            return;
+        if (ret <= 0) {
+            break;
        }
-        if (ret < 0) {
-            s->reply.handle = 0;
-            goto fail;
+
+        /* There's no need for a mutex on the receive side, because the
+         * handler acts as a synchronization point and ensures that only
+         * one coroutine is called until the reply finishes.
+         */
+        i = HANDLE_TO_INDEX(s, s->reply.handle);
+        if (i >= MAX_NBD_REQUESTS || !s->recv_coroutine[i]) {
+            break;
        }
+
+        /* We're woken up by the recv_coroutine itself.  Note that there
+         * is no race between yielding and reentering read_reply_co.  This
+         * is because:
+         *
+         * - if recv_coroutine[i] runs on the same AioContext, it is only
+         *   entered after we yield
+         *
+         * - if recv_coroutine[i] runs on a different AioContext, reentering
+         *   read_reply_co happens through a bottom half, which can only
+         *   run after we yield.
+         */
+        aio_co_wake(s->recv_coroutine[i]);
+        qemu_coroutine_yield();
    }

-    /* There's no need for a mutex on the receive side, because the
-     * handler acts as a synchronization point and ensures that only
-     * one coroutine is called until the reply finishes.  */
-    i = HANDLE_TO_INDEX(s, s->reply.handle);
-    if (i >= MAX_NBD_REQUESTS) {
-        goto fail;
-    }
-
-    if (s->recv_coroutine[i]) {
-        qemu_coroutine_enter(s->recv_coroutine[i]);
-        return;
-    }
-
-fail:
-    nbd_teardown_connection(bs);
-}
-
-static void nbd_restart_write(void *opaque)
-{
-    BlockDriverState *bs = opaque;
-
-    qemu_coroutine_enter(nbd_get_client_session(bs)->send_coroutine);
+    nbd_recv_coroutines_enter_all(s);
+    s->read_reply_co = NULL;
 }

 static int nbd_co_send_request(BlockDriverState *bs,
@@ -120,7 +111,6 @@ static int nbd_co_send_request(BlockDriverState *bs,
                               QEMUIOVector *qiov)
 {
    NBDClientSession *s = nbd_get_client_session(bs);
-    AioContext *aio_context;
    int rc, ret, i;

    qemu_co_mutex_lock(&s->send_mutex);
@@ -141,11 +131,6 @@ static int nbd_co_send_request(BlockDriverState *bs,
        return -EPIPE;
    }

-    s->send_coroutine = qemu_coroutine_self();
-    aio_context = bdrv_get_aio_context(bs);
-
-    aio_set_fd_handler(aio_context, s->sioc->fd, false,
-                       nbd_reply_ready, nbd_restart_write, NULL, bs);
    if (qiov) {
        qio_channel_set_cork(s->ioc, true);
        rc = nbd_send_request(s->ioc, request);
@@ -160,9 +145,6 @@ static int nbd_co_send_request(BlockDriverState *bs,
    } else {
        rc = nbd_send_request(s->ioc, request);
    }
-    aio_set_fd_handler(aio_context, s->sioc->fd, false,
-                       nbd_reply_ready, NULL, NULL, bs);
-    s->send_coroutine = NULL;
    qemu_co_mutex_unlock(&s->send_mutex);
    return rc;
 }
@@ -174,8 +156,7 @@ static void nbd_co_receive_reply(NBDClientSession *s,
 {
    int ret;

-    /* Wait until we're woken up by the read handler.  TODO: perhaps
-     * peek at the next reply and avoid yielding if it's ours?  */
+    /* Wait until we're woken up by nbd_read_reply_entry.  */
    qemu_coroutine_yield();
    *reply = s->reply;
    if (reply->handle != request->handle ||
@@ -201,7 +182,7 @@ static void nbd_coroutine_start(NBDClientSession *s,
    /* Poor man semaphore.  The free_sema is locked when no other request
     * can be accepted, and unlocked after receiving one reply.  */
    if (s->in_flight == MAX_NBD_REQUESTS) {
-        qemu_co_queue_wait(&s->free_sema);
+        qemu_co_queue_wait(&s->free_sema, NULL);
        assert(s->in_flight < MAX_NBD_REQUESTS);
    }
    s->in_flight++;
@@ -209,13 +190,19 @@ static void nbd_coroutine_start(NBDClientSession *s,
    /* s->recv_coroutine[i] is set as soon as we get the send_lock.  */
 }

-static void nbd_coroutine_end(NBDClientSession *s,
+static void nbd_coroutine_end(BlockDriverState *bs,
                              NBDRequest *request)
 {
+    NBDClientSession *s = nbd_get_client_session(bs);
    int i = HANDLE_TO_INDEX(s, request->handle);
+
    s->recv_coroutine[i] = NULL;
-    if (s->in_flight-- == MAX_NBD_REQUESTS) {
-        qemu_co_queue_next(&s->free_sema);
+    s->in_flight--;
+    qemu_co_queue_next(&s->free_sema);
+
+    /* Kick the read_reply_co to get the next reply.  */
+    if (s->read_reply_co) {
+        aio_co_wake(s->read_reply_co);
    }
 }

@@ -241,7 +228,7 @@ int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset,
    } else {
        nbd_co_receive_reply(client, &request, &reply, qiov);
    }
-    nbd_coroutine_end(client, &request);
+    nbd_coroutine_end(bs, &request);
    return -reply.error;
 }

@@ -271,7 +258,7 @@ int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset,
    } else {
        nbd_co_receive_reply(client, &request, &reply, NULL);
    }
-    nbd_coroutine_end(client, &request);
+    nbd_coroutine_end(bs, &request);
    return -reply.error;
 }

@@ -306,7 +293,7 @@ int nbd_client_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
    } else {
        nbd_co_receive_reply(client, &request, &reply, NULL);
    }
-    nbd_coroutine_end(client, &request);
+    nbd_coroutine_end(bs, &request);
    return -reply.error;
 }

@@ -331,7 +318,7 @@ int nbd_client_co_flush(BlockDriverState *bs)
    } else {
        nbd_co_receive_reply(client, &request, &reply, NULL);
    }
-    nbd_coroutine_end(client, &request);
+    nbd_coroutine_end(bs, &request);
    return -reply.error;
 }

@@ -357,23 +344,23 @@ int nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset, int count)
    } else {
        nbd_co_receive_reply(client, &request, &reply, NULL);
    }
-    nbd_coroutine_end(client, &request);
+    nbd_coroutine_end(bs, &request);
    return -reply.error;

 }

 void nbd_client_detach_aio_context(BlockDriverState *bs)
 {
-    aio_set_fd_handler(bdrv_get_aio_context(bs),
-                       nbd_get_client_session(bs)->sioc->fd,
-                       false, NULL, NULL, NULL, NULL);
+    NBDClientSession *client = nbd_get_client_session(bs);
+    qio_channel_detach_aio_context(QIO_CHANNEL(client->sioc));
 }

 void nbd_client_attach_aio_context(BlockDriverState *bs,
                                   AioContext *new_context)
 {
-    aio_set_fd_handler(new_context, nbd_get_client_session(bs)->sioc->fd,
-                       false, nbd_reply_ready, NULL, NULL, bs);
+    NBDClientSession *client = nbd_get_client_session(bs);
+    qio_channel_attach_aio_context(QIO_CHANNEL(client->sioc), new_context);
+    aio_co_schedule(new_context, client->read_reply_co);
 }

 void nbd_client_close(BlockDriverState *bs)
@@ -434,7 +421,7 @@ int nbd_client_init(BlockDriverState *bs,
    /* Now that we're connected, set the socket to be non-blocking and
     * kick the reply mechanism.  */
    qio_channel_set_blocking(QIO_CHANNEL(sioc), false, NULL);
-
+    client->read_reply_co = qemu_coroutine_create(nbd_read_reply_entry, client);
    nbd_client_attach_aio_context(bs, bdrv_get_aio_context(bs));

    logout("Established connection with NBD server\n");
--- a/block/nbd-client.h
+++ b/block/nbd-client.h
@@ -25,13 +25,11 @@ typedef struct NBDClientSession {

    CoMutex send_mutex;
    CoQueue free_sema;
-    Coroutine *send_coroutine;
+    Coroutine *read_reply_co;
    int in_flight;

    Coroutine *recv_coroutine[MAX_NBD_REQUESTS];
    NBDReply reply;
-
-    bool is_unix;
 } NBDClientSession;

 NBDClientSession *nbd_get_client_session(BlockDriverState *bs);
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -47,7 +47,7 @@ typedef struct BDRVNBDState {
    NBDClientSession client;

    /* For nbd_refresh_filename() */
-    SocketAddress *saddr;
+    SocketAddressFlat *saddr;
    char *export, *tlscredsid;
 } BDRVNBDState;

@@ -95,7 +95,7 @@ static int nbd_parse_uri(const char *filename, QDict *options)
            goto out;
        }
        qdict_put(options, "server.type", qstring_from_str("unix"));
-        qdict_put(options, "server.data.path",
+        qdict_put(options, "server.path",
                  qstring_from_str(qp->p[0].value));
    } else {
        QString *host;
@@ -116,10 +116,10 @@ static int nbd_parse_uri(const char *filename, QDict *options)
        }

        qdict_put(options, "server.type", qstring_from_str("inet"));
-        qdict_put(options, "server.data.host", host);
+        qdict_put(options, "server.host", host);

        port_str = g_strdup_printf("%d", uri->port ?: NBD_DEFAULT_PORT);
-        qdict_put(options, "server.data.port", qstring_from_str(port_str));
+        qdict_put(options, "server.port", qstring_from_str(port_str));
        g_free(port_str);
    }

@@ -197,7 +197,7 @@ static void nbd_parse_filename(const char *filename, QDict *options,
    /* are we a UNIX or TCP socket? */
    if (strstart(host_spec, "unix:", &unixpath)) {
        qdict_put(options, "server.type", qstring_from_str("unix"));
-        qdict_put(options, "server.data.path", qstring_from_str(unixpath));
+        qdict_put(options, "server.path", qstring_from_str(unixpath));
    } else {
        InetSocketAddress *addr = NULL;

@@ -207,8 +207,8 @@ static void nbd_parse_filename(const char *filename, QDict *options,
        }

        qdict_put(options, "server.type", qstring_from_str("inet"));
-        qdict_put(options, "server.data.host", qstring_from_str(addr->host));
-        qdict_put(options, "server.data.port", qstring_from_str(addr->port));
+        qdict_put(options, "server.host", qstring_from_str(addr->host));
+        qdict_put(options, "server.port", qstring_from_str(addr->port));
        qapi_free_InetSocketAddress(addr);
    }

@@ -248,20 +248,21 @@ static bool nbd_process_legacy_socket_options(QDict *output_options,
        }

        qdict_put(output_options, "server.type", qstring_from_str("unix"));
-        qdict_put(output_options, "server.data.path", qstring_from_str(path));
+        qdict_put(output_options, "server.path", qstring_from_str(path));
    } else if (host) {
        qdict_put(output_options, "server.type", qstring_from_str("inet"));
-        qdict_put(output_options, "server.data.host", qstring_from_str(host));
-        qdict_put(output_options, "server.data.port",
+        qdict_put(output_options, "server.host", qstring_from_str(host));
+        qdict_put(output_options, "server.port",
                  qstring_from_str(port ?: stringify(NBD_DEFAULT_PORT)));
    }

    return true;
 }

-static SocketAddress *nbd_config(BDRVNBDState *s, QDict *options, Error **errp)
+static SocketAddressFlat *nbd_config(BDRVNBDState *s, QDict *options,
+                                     Error **errp)
 {
-    SocketAddress *saddr = NULL;
+    SocketAddressFlat *saddr = NULL;
    QDict *addr = NULL;
    QObject *crumpled_addr = NULL;
    Visitor *iv = NULL;
@@ -278,15 +279,21 @@ static SocketAddress *nbd_config(BDRVNBDState *s, QDict *options, Error **errp)
        goto done;
    }

-    iv = qobject_input_visitor_new(crumpled_addr, true);
-    visit_type_SocketAddress(iv, NULL, &saddr, &local_err);
+    /*
+     * FIXME .numeric, .to, .ipv4 or .ipv6 don't work with -drive
+     * server.type=inet.  .to doesn't matter, it's ignored anyway.
+     * That's because when @options come from -blockdev or
+     * blockdev_add, members are typed according to the QAPI schema,
+     * but when they come from -drive, they're all QString.  The
+     * visitor expects the former.
+     */
+    iv = qobject_input_visitor_new(crumpled_addr);
+    visit_type_SocketAddressFlat(iv, NULL, &saddr, &local_err);
    if (local_err) {
        error_propagate(errp, local_err);
        goto done;
    }

-    s->client.is_unix = saddr->type == SOCKET_ADDRESS_KIND_UNIX;
-
 done:
    QDECREF(addr);
    qobject_decref(crumpled_addr);
@@ -300,9 +307,10 @@ NBDClientSession *nbd_get_client_session(BlockDriverState *bs)
    return &s->client;
 }

-static QIOChannelSocket *nbd_establish_connection(SocketAddress *saddr,
+static QIOChannelSocket *nbd_establish_connection(SocketAddressFlat *saddr_flat,
                                                  Error **errp)
 {
+    SocketAddress *saddr = socket_address_crumple(saddr_flat);
    QIOChannelSocket *sioc;
    Error *local_err = NULL;

@@ -312,7 +320,9 @@ static QIOChannelSocket *nbd_establish_connection(SocketAddress *saddr,
    qio_channel_socket_connect_sync(sioc,
                                    saddr,
                                    &local_err);
+    qapi_free_SocketAddress(saddr);
    if (local_err) {
+        object_unref(OBJECT(sioc));
        error_propagate(errp, local_err);
        return NULL;
    }
@@ -403,7 +413,7 @@ static int nbd_open(BlockDriverState *bs, QDict *options, int flags,
        goto error;
    }

-    /* Translate @host, @port, and @path to a SocketAddress */
+    /* Translate @host, @port, and @path to a SocketAddressFlat */
    if (!nbd_process_legacy_socket_options(options, opts, errp)) {
        goto error;
    }
@@ -423,11 +433,12 @@ static int nbd_open(BlockDriverState *bs, QDict *options, int flags,
            goto error;
        }

-        if (s->saddr->type != SOCKET_ADDRESS_KIND_INET) {
+        /* TODO SOCKET_ADDRESS_KIND_FD where fd has AF_INET or AF_INET6 */
+        if (s->saddr->type != SOCKET_ADDRESS_FLAT_TYPE_INET) {
            error_setg(errp, "TLS only supported over IP sockets");
            goto error;
        }
-        hostname = s->saddr->u.inet.data->host;
+        hostname = s->saddr->u.inet.host;
    }

    /* establish TCP connection, return error if it fails
@@ -450,7 +461,7 @@ static int nbd_open(BlockDriverState *bs, QDict *options, int flags,
        object_unref(OBJECT(tlscreds));
    }
    if (ret < 0) {
-        qapi_free_SocketAddress(s->saddr);
+        qapi_free_SocketAddressFlat(s->saddr);
        g_free(s->export);
        g_free(s->tlscredsid);
    }
@@ -476,7 +487,7 @@ static void nbd_close(BlockDriverState *bs)

    nbd_client_close(bs);

-    qapi_free_SocketAddress(s->saddr);
+    qapi_free_SocketAddressFlat(s->saddr);
    g_free(s->export);
    g_free(s->tlscredsid);
 }
@@ -507,15 +518,15 @@ static void nbd_refresh_filename(BlockDriverState *bs, QDict *options)
    Visitor *ov;
    const char *host = NULL, *port = NULL, *path = NULL;

-    if (s->saddr->type == SOCKET_ADDRESS_KIND_INET) {
-        const InetSocketAddress *inet = s->saddr->u.inet.data;
+    if (s->saddr->type == SOCKET_ADDRESS_FLAT_TYPE_INET) {
+        const InetSocketAddress *inet = &s->saddr->u.inet;
        if (!inet->has_ipv4 && !inet->has_ipv6 && !inet->has_to) {
            host = inet->host;
            port = inet->port;
        }
-    } else if (s->saddr->type == SOCKET_ADDRESS_KIND_UNIX) {
-        path = s->saddr->u.q_unix.data->path;
-    }
+    } else if (s->saddr->type == SOCKET_ADDRESS_FLAT_TYPE_UNIX) {
+        path = s->saddr->u.q_unix.path;
+    } /* else can't represent as pseudo-filename */

    qdict_put(opts, "driver", qstring_from_str("nbd"));

@@ -534,11 +545,9 @@ static void nbd_refresh_filename(BlockDriverState *bs, QDict *options)
    }

    ov = qobject_output_visitor_new(&saddr_qdict);
-    visit_type_SocketAddress(ov, NULL, &s->saddr, &error_abort);
+    visit_type_SocketAddressFlat(ov, NULL, &s->saddr, &error_abort);
    visit_complete(ov, &saddr_qdict);
    visit_free(ov);
-    assert(qobject_type(saddr_qdict) == QTYPE_QDICT);
-
    qdict_put_obj(opts, "server", saddr_qdict);

    if (s->export) {
--- a/block/nfs.c
+++ b/block/nfs.c
@@ -54,6 +54,7 @@ typedef struct NFSClient {
    int events;
    bool has_zero_init;
    AioContext *aio_context;
+    QemuMutex mutex;
    blkcnt_t st_blocks;
    bool cache_used;
    NFSServer *server;
@@ -108,12 +109,13 @@ static int nfs_parse_uri(const char *filename, QDict *options, Error **errp)
    qdict_put(options, "path", qstring_from_str(uri->path));

    for (i = 0; i < qp->n; i++) {
+        unsigned long long val;
        if (!qp->p[i].value) {
            error_setg(errp, "Value for NFS parameter expected: %s",
                       qp->p[i].name);
            goto out;
        }
-        if (parse_uint_full(qp->p[i].value, NULL, 0)) {
+        if (parse_uint_full(qp->p[i].value, &val, 0)) {
            error_setg(errp, "Illegal value for NFS parameter: %s",
                       qp->p[i].name);
            goto out;
@@ -190,6 +192,7 @@ static void nfs_parse_filename(const char *filename, QDict *options,
 static void nfs_process_read(void *arg);
 static void nfs_process_write(void *arg);

+/* Called with QemuMutex held.  */
 static void nfs_set_events(NFSClient *client)
 {
    int ev = nfs_which_events(client->context);
@@ -207,15 +210,21 @@ static void nfs_set_events(NFSClient *client)
 static void nfs_process_read(void *arg)
 {
    NFSClient *client = arg;
+
+    qemu_mutex_lock(&client->mutex);
    nfs_service(client->context, POLLIN);
    nfs_set_events(client);
+    qemu_mutex_unlock(&client->mutex);
 }

 static void nfs_process_write(void *arg)
 {
    NFSClient *client = arg;
+
+    qemu_mutex_lock(&client->mutex);
    nfs_service(client->context, POLLOUT);
    nfs_set_events(client);
+    qemu_mutex_unlock(&client->mutex);
 }

 static void nfs_co_init_task(BlockDriverState *bs, NFSRPC *task)
@@ -230,10 +239,12 @@ static void nfs_co_init_task(BlockDriverState *bs, NFSRPC *task)
 static void nfs_co_generic_bh_cb(void *opaque)
 {
    NFSRPC *task = opaque;
+
    task->complete = 1;
-    qemu_coroutine_enter(task->co);
+    aio_co_wake(task->co);
 }

+/* Called (via nfs_service) with QemuMutex held.  */
 static void
 nfs_co_generic_cb(int ret, struct nfs_context *nfs, void *data,
                  void *private_data)
@@ -255,9 +266,9 @@ nfs_co_generic_cb(int ret, struct nfs_context *nfs, void *data,
                            nfs_co_generic_bh_cb, task);
 }

-static int coroutine_fn nfs_co_readv(BlockDriverState *bs,
-                                     int64_t sector_num, int nb_sectors,
-                                     QEMUIOVector *iov)
+static int coroutine_fn nfs_co_preadv(BlockDriverState *bs, uint64_t offset,
+                                      uint64_t bytes, QEMUIOVector *iov,
+                                      int flags)
 {
    NFSClient *client = bs->opaque;
    NFSRPC task;
@@ -265,14 +276,15 @@ static int coroutine_fn nfs_co_readv(BlockDriverState *bs,
    nfs_co_init_task(bs, &task);
    task.iov = iov;

+    qemu_mutex_lock(&client->mutex);
    if (nfs_pread_async(client->context, client->fh,
-                        sector_num * BDRV_SECTOR_SIZE,
-                        nb_sectors * BDRV_SECTOR_SIZE,
-                        nfs_co_generic_cb, &task) != 0) {
+                        offset, bytes, nfs_co_generic_cb, &task) != 0) {
+        qemu_mutex_unlock(&client->mutex);
        return -ENOMEM;
    }

    nfs_set_events(client);
+    qemu_mutex_unlock(&client->mutex);
    while (!task.complete) {
        qemu_coroutine_yield();
    }
@@ -289,39 +301,50 @@ static int coroutine_fn nfs_co_readv(BlockDriverState *bs,
    return 0;
 }

-static int coroutine_fn nfs_co_writev(BlockDriverState *bs,
-                                        int64_t sector_num, int nb_sectors,
-                                        QEMUIOVector *iov)
+static int coroutine_fn nfs_co_pwritev(BlockDriverState *bs, uint64_t offset,
+                                       uint64_t bytes, QEMUIOVector *iov,
+                                       int flags)
 {
    NFSClient *client = bs->opaque;
    NFSRPC task;
    char *buf = NULL;
+    bool my_buffer = false;

    nfs_co_init_task(bs, &task);

-    buf = g_try_malloc(nb_sectors * BDRV_SECTOR_SIZE);
-    if (nb_sectors && buf == NULL) {
-        return -ENOMEM;
+    if (iov->niov != 1) {
+        buf = g_try_malloc(bytes);
+        if (bytes && buf == NULL) {
+            return -ENOMEM;
+        }
+        qemu_iovec_to_buf(iov, 0, buf, bytes);
+        my_buffer = true;
+    } else {
+        buf = iov->iov[0].iov_base;
    }

-    qemu_iovec_to_buf(iov, 0, buf, nb_sectors * BDRV_SECTOR_SIZE);
-
+    qemu_mutex_lock(&client->mutex);
    if (nfs_pwrite_async(client->context, client->fh,
-                         sector_num * BDRV_SECTOR_SIZE,
-                         nb_sectors * BDRV_SECTOR_SIZE,
-                         buf, nfs_co_generic_cb, &task) != 0) {
-        g_free(buf);
+                         offset, bytes, buf,
+                         nfs_co_generic_cb, &task) != 0) {
+        qemu_mutex_unlock(&client->mutex);
+        if (my_buffer) {
+            g_free(buf);
+        }
        return -ENOMEM;
    }

    nfs_set_events(client);
+    qemu_mutex_unlock(&client->mutex);
    while (!task.complete) {
        qemu_coroutine_yield();
    }

-    g_free(buf);
+    if (my_buffer) {
+        g_free(buf);
+    }

-    if (task.ret != nb_sectors * BDRV_SECTOR_SIZE) {
+    if (task.ret != bytes) {
        return task.ret < 0 ? task.ret : -EIO;
    }

@@ -335,12 +358,15 @@ static int coroutine_fn nfs_co_flush(BlockDriverState *bs)

    nfs_co_init_task(bs, &task);

+    qemu_mutex_lock(&client->mutex);
    if (nfs_fsync_async(client->context, client->fh, nfs_co_generic_cb,
                        &task) != 0) {
+        qemu_mutex_unlock(&client->mutex);
        return -ENOMEM;
    }

    nfs_set_events(client);
+    qemu_mutex_unlock(&client->mutex);
    while (!task.complete) {
        qemu_coroutine_yield();
    }
@@ -358,27 +384,27 @@ static QemuOptsList runtime_opts = {
            .help = "Path of the image on the host",
        },
        {
-            .name = "uid",
+            .name = "user",
            .type = QEMU_OPT_NUMBER,
            .help = "UID value to use when talking to the server",
        },
        {
-            .name = "gid",
+            .name = "group",
            .type = QEMU_OPT_NUMBER,
            .help = "GID value to use when talking to the server",
        },
        {
-            .name = "tcp-syncnt",
+            .name = "tcp-syn-count",
            .type = QEMU_OPT_NUMBER,
            .help = "Number of SYNs to send during the session establish",
        },
        {
-            .name = "readahead",
+            .name = "readahead-size",
            .type = QEMU_OPT_NUMBER,
            .help = "Set the readahead size in bytes",
        },
        {
-            .name = "pagecache",
+            .name = "page-cache-size",
            .type = QEMU_OPT_NUMBER,
            .help = "Set the pagecache size in bytes",
        },
@@ -426,6 +452,7 @@ static void nfs_file_close(BlockDriverState *bs)
 {
    NFSClient *client = bs->opaque;
    nfs_client_close(client);
+    qemu_mutex_destroy(&client->mutex);
 }

 static NFSServer *nfs_config(QDict *options, Error **errp)
@@ -447,7 +474,14 @@ static NFSServer *nfs_config(QDict *options, Error **errp)
        goto out;
    }

-    iv = qobject_input_visitor_new(crumpled_addr, true);
+    /*
+     * Caution: this works only because all scalar members of
+     * NFSServer are QString in @crumpled_addr.  The visitor expects
+     * @crumpled_addr to be typed according to the QAPI schema.  It
+     * is when @options come from -blockdev or blockdev_add.  But when
+     * they come from -drive, they're all QString.
+     */
+    iv = qobject_input_visitor_new(crumpled_addr);
    visit_type_NFSServer(iv, NULL, &server, &local_error);
    if (local_error) {
        error_propagate(errp, local_error);
@@ -507,29 +541,29 @@ static int64_t nfs_client_open(NFSClient *client, QDict *options,
        goto fail;
    }

-    if (qemu_opt_get(opts, "uid")) {
-        client->uid = qemu_opt_get_number(opts, "uid", 0);
+    if (qemu_opt_get(opts, "user")) {
+        client->uid = qemu_opt_get_number(opts, "user", 0);
        nfs_set_uid(client->context, client->uid);
    }

-    if (qemu_opt_get(opts, "gid")) {
-        client->gid = qemu_opt_get_number(opts, "gid", 0);
+    if (qemu_opt_get(opts, "group")) {
+        client->gid = qemu_opt_get_number(opts, "group", 0);
        nfs_set_gid(client->context, client->gid);
    }

-    if (qemu_opt_get(opts, "tcp-syncnt")) {
-        client->tcp_syncnt = qemu_opt_get_number(opts, "tcp-syncnt", 0);
+    if (qemu_opt_get(opts, "tcp-syn-count")) {
+        client->tcp_syncnt = qemu_opt_get_number(opts, "tcp-syn-count", 0);
        nfs_set_tcp_syncnt(client->context, client->tcp_syncnt);
    }

 #ifdef LIBNFS_FEATURE_READAHEAD
-    if (qemu_opt_get(opts, "readahead")) {
+    if (qemu_opt_get(opts, "readahead-size")) {
        if (open_flags & BDRV_O_NOCACHE) {
            error_setg(errp, "Cannot enable NFS readahead "
                             "if cache.direct = on");
            goto fail;
        }
-        client->readahead = qemu_opt_get_number(opts, "readahead", 0);
+        client->readahead = qemu_opt_get_number(opts, "readahead-size", 0);
        if (client->readahead > QEMU_NFS_MAX_READAHEAD_SIZE) {
            error_report("NFS Warning: Truncating NFS readahead "
                         "size to %d", QEMU_NFS_MAX_READAHEAD_SIZE);
@@ -544,13 +578,13 @@ static int64_t nfs_client_open(NFSClient *client, QDict *options,
 #endif

 #ifdef LIBNFS_FEATURE_PAGECACHE
-    if (qemu_opt_get(opts, "pagecache")) {
+    if (qemu_opt_get(opts, "page-cache-size")) {
        if (open_flags & BDRV_O_NOCACHE) {
            error_setg(errp, "Cannot enable NFS pagecache "
                             "if cache.direct = on");
            goto fail;
        }
-        client->pagecache = qemu_opt_get_number(opts, "pagecache", 0);
+        client->pagecache = qemu_opt_get_number(opts, "page-cache-size", 0);
        if (client->pagecache > QEMU_NFS_MAX_PAGECACHE_SIZE) {
            error_report("NFS Warning: Truncating NFS pagecache "
                         "size to %d pages", QEMU_NFS_MAX_PAGECACHE_SIZE);
@@ -633,6 +667,7 @@ static int nfs_file_open(BlockDriverState *bs, QDict *options, int flags,
    if (ret < 0) {
        return ret;
    }
+    qemu_mutex_init(&client->mutex);
    bs->total_sectors = ret;
    ret = 0;
    return ret;
@@ -688,6 +723,7 @@ static int nfs_has_zero_init(BlockDriverState *bs)
    return client->has_zero_init;
 }

+/* Called (via nfs_service) with QemuMutex held.  */
 static void
 nfs_get_allocated_file_size_cb(int ret, struct nfs_context *nfs, void *data,
                               void *private_data)
@@ -797,28 +833,26 @@ static void nfs_refresh_filename(BlockDriverState *bs, QDict *options)
    ov = qobject_output_visitor_new(&server_qdict);
    visit_type_NFSServer(ov, NULL, &client->server, &error_abort);
    visit_complete(ov, &server_qdict);
-    assert(qobject_type(server_qdict) == QTYPE_QDICT);
-
    qdict_put_obj(opts, "server", server_qdict);
    qdict_put(opts, "path", qstring_from_str(client->path));

    if (client->uid) {
-        qdict_put(opts, "uid", qint_from_int(client->uid));
+        qdict_put(opts, "user", qint_from_int(client->uid));
    }
    if (client->gid) {
-        qdict_put(opts, "gid", qint_from_int(client->gid));
+        qdict_put(opts, "group", qint_from_int(client->gid));
    }
    if (client->tcp_syncnt) {
-        qdict_put(opts, "tcp-syncnt",
-                      qint_from_int(client->tcp_syncnt));
+        qdict_put(opts, "tcp-syn-cnt",
+                  qint_from_int(client->tcp_syncnt));
    }
    if (client->readahead) {
-        qdict_put(opts, "readahead",
-                      qint_from_int(client->readahead));
+        qdict_put(opts, "readahead-size",
+                  qint_from_int(client->readahead));
    }
    if (client->pagecache) {
-        qdict_put(opts, "pagecache",
-                      qint_from_int(client->pagecache));
+        qdict_put(opts, "page-cache-size",
+                  qint_from_int(client->pagecache));
    }
    if (client->debug) {
        qdict_put(opts, "debug", qint_from_int(client->debug));
@@ -855,8 +889,8 @@ static BlockDriver bdrv_nfs = {
    .bdrv_create                    = nfs_file_create,
    .bdrv_reopen_prepare            = nfs_reopen_prepare,

-    .bdrv_co_readv                  = nfs_co_readv,
-    .bdrv_co_writev                 = nfs_co_writev,
+    .bdrv_co_preadv                 = nfs_co_preadv,
+    .bdrv_co_pwritev                = nfs_co_pwritev,
    .bdrv_co_flush_to_disk          = nfs_co_flush,

    .bdrv_detach_aio_context        = nfs_detach_aio_context,
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -114,7 +114,7 @@ static QemuOptsList parallels_runtime_opts = {
            .name = PARALLELS_OPT_PREALLOC_SIZE,
            .type = QEMU_OPT_SIZE,
            .help = "Preallocation size on image expansion",
-            .def_value_str = "128MiB",
+            .def_value_str = "128M",
        },
        {
            .name = PARALLELS_OPT_PREALLOC_MODE,
@@ -192,8 +192,7 @@ static int64_t allocate_clusters(BlockDriverState *bs, int64_t sector_num,
                                 int nb_sectors, int *pnum)
 {
    BDRVParallelsState *s = bs->opaque;
-    uint32_t idx, to_allocate, i;
-    int64_t pos, space;
+    int64_t pos, space, idx, to_allocate, i;

    pos = block_status(s, sector_num, nb_sectors, pnum);
    if (pos > 0) {
@@ -201,11 +200,19 @@ static int64_t allocate_clusters(BlockDriverState *bs, int64_t sector_num,
    }

    idx = sector_num / s->tracks;
-    if (idx >= s->bat_size) {
-        return -EINVAL;
-    }
-
    to_allocate = DIV_ROUND_UP(sector_num + *pnum, s->tracks) - idx;
+
+    /* This function is called only by parallels_co_writev(), which will never
+     * pass a sector_num at or beyond the end of the image (because the block
+     * layer never passes such a sector_num to that function). Therefore, idx
+     * is always below s->bat_size.
+     * block_status() will limit *pnum so that sector_num + *pnum will not
+     * exceed the image end. Therefore, idx + to_allocate cannot exceed
+     * s->bat_size.
+     * Note that s->bat_size is an unsigned int, therefore idx + to_allocate
+     * will always fit into a uint32_t. */
+    assert(idx < s->bat_size && idx + to_allocate <= s->bat_size);
+
    space = to_allocate * s->tracks;
    if (s->data_end + space > bdrv_getlength(bs->file->bs) >> BDRV_SECTOR_BITS) {
        int ret;
@@ -215,7 +222,7 @@ static int64_t allocate_clusters(BlockDriverState *bs, int64_t sector_num,
                                     s->data_end << BDRV_SECTOR_BITS,
                                     space << BDRV_SECTOR_BITS, 0);
        } else {
-            ret = bdrv_truncate(bs->file->bs,
+            ret = bdrv_truncate(bs->file,
                                (s->data_end + space) << BDRV_SECTOR_BITS);
        }
        if (ret < 0) {
@@ -449,7 +456,7 @@ static int parallels_check(BlockDriverState *bs, BdrvCheckResult *res,
                size - res->image_end_offset);
        res->leaks += count;
        if (fix & BDRV_FIX_LEAKS) {
-            ret = bdrv_truncate(bs->file->bs, res->image_end_offset);
+            ret = bdrv_truncate(bs->file, res->image_end_offset);
            if (ret < 0) {
                res->check_errors++;
                return ret;
@@ -488,7 +495,8 @@ static int parallels_create(const char *filename, QemuOpts *opts, Error **errp)
    }

    file = blk_new_open(filename, NULL, NULL,
-                        BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
+                        BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
+                        &local_err);
    if (file == NULL) {
        error_propagate(errp, local_err);
        return -EIO;
@@ -581,6 +589,12 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
    Error *local_err = NULL;
    char *buf;

+    bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
+                               false, errp);
+    if (!bs->file) {
+        return -EINVAL;
+    }
+
    ret = bdrv_pread(bs->file, 0, &ph, sizeof(ph));
    if (ret < 0) {
        goto fail;
@@ -680,8 +694,9 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
    if (local_err != NULL) {
        goto fail_options;
    }
-    if (!bdrv_has_zero_init(bs->file->bs) ||
-            bdrv_truncate(bs->file->bs, bdrv_getlength(bs->file->bs)) != 0) {
+
+    if (!(flags & BDRV_O_RESIZE) || !bdrv_has_zero_init(bs->file->bs) ||
+            bdrv_truncate(bs->file, bdrv_getlength(bs->file->bs)) != 0) {
        s->prealloc_mode = PRL_PREALLOC_MODE_FALLOCATE;
    }

@@ -724,7 +739,7 @@ static void parallels_close(BlockDriverState *bs)
    }

    if (bs->open_flags & BDRV_O_RDWR) {
-        bdrv_truncate(bs->file->bs, s->data_end << BDRV_SECTOR_BITS);
+        bdrv_truncate(bs->file, s->data_end << BDRV_SECTOR_BITS);
    }

    g_free(s->bat_dirty_bmap);
@@ -756,6 +771,7 @@ static BlockDriver bdrv_parallels = {
    .bdrv_probe		= parallels_probe,
    .bdrv_open		= parallels_open,
    .bdrv_close		= parallels_close,
+    .bdrv_child_perm          = bdrv_format_default_perms,
    .bdrv_co_get_block_status = parallels_co_get_block_status,
    .bdrv_has_zero_init       = bdrv_has_zero_init_1,
    .bdrv_co_flush_to_os      = parallels_co_flush_to_os,
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -237,8 +237,8 @@ void bdrv_query_image_info(BlockDriverState *bs,

    size = bdrv_getlength(bs);
    if (size < 0) {
-        error_setg_errno(errp, -size, "Can't get size of device '%s'",
-                         bdrv_get_device_name(bs));
+        error_setg_errno(errp, -size, "Can't get image size '%s'",
+                         bs->exact_filename);
        goto out;
    }

@@ -357,10 +357,6 @@ static void bdrv_query_info(BlockBackend *blk, BlockInfo **p_info,
    qapi_free_BlockInfo(info);
 }

-static BlockStats *bdrv_query_stats(BlockBackend *blk,
-                                    const BlockDriverState *bs,
-                                    bool query_backing);
-
 static void bdrv_query_blk_stats(BlockDeviceStats *ds, BlockBackend *blk)
 {
    BlockAcctStats *stats = blk_get_stats(blk);
@@ -428,9 +424,18 @@ static void bdrv_query_blk_stats(BlockDeviceStats *ds, BlockBackend *blk)
    }
 }

-static void bdrv_query_bds_stats(BlockStats *s, const BlockDriverState *bs,
+static BlockStats *bdrv_query_bds_stats(const BlockDriverState *bs,
                                 bool query_backing)
 {
+    BlockStats *s = NULL;
+
+    s = g_malloc0(sizeof(*s));
+    s->stats = g_malloc0(sizeof(*s->stats));
+
+    if (!bs) {
+        return s;
+    }
+
    if (bdrv_get_node_name(bs)[0]) {
        s->has_node_name = true;
        s->node_name = g_strdup(bdrv_get_node_name(bs));
@@ -440,32 +445,12 @@ static void bdrv_query_bds_stats(BlockStats *s, const BlockDriverState *bs,

    if (bs->file) {
        s->has_parent = true;
-        s->parent = bdrv_query_stats(NULL, bs->file->bs, query_backing);
+        s->parent = bdrv_query_bds_stats(bs->file->bs, query_backing);
    }

    if (query_backing && bs->backing) {
        s->has_backing = true;
-        s->backing = bdrv_query_stats(NULL, bs->backing->bs, query_backing);
-    }
-
-}
-
-static BlockStats *bdrv_query_stats(BlockBackend *blk,
-                                    const BlockDriverState *bs,
-                                    bool query_backing)
-{
-    BlockStats *s;
-
-    s = g_malloc0(sizeof(*s));
-    s->stats = g_malloc0(sizeof(*s->stats));
-
-    if (blk) {
-        s->has_device = true;
-        s->device = g_strdup(blk_name(blk));
-        bdrv_query_blk_stats(s->stats, blk);
-    }
-    if (bs) {
-        bdrv_query_bds_stats(s, bs, query_backing);
+        s->backing = bdrv_query_bds_stats(bs->backing->bs, query_backing);
    }

    return s;
@@ -494,42 +479,44 @@ BlockInfoList *qmp_query_block(Error **errp)
    return head;
 }

-static bool next_query_bds(BlockBackend **blk, BlockDriverState **bs,
-                           bool query_nodes)
-{
-    if (query_nodes) {
-        *bs = bdrv_next_node(*bs);
-        return !!*bs;
-    }
-
-    *blk = blk_next(*blk);
-    *bs = *blk ? blk_bs(*blk) : NULL;
-
-    return !!*blk;
-}
-
 BlockStatsList *qmp_query_blockstats(bool has_query_nodes,
                                     bool query_nodes,
                                     Error **errp)
 {
    BlockStatsList *head = NULL, **p_next = &head;
-    BlockBackend *blk = NULL;
-    BlockDriverState *bs = NULL;
+    BlockBackend *blk;
+    BlockDriverState *bs;

    /* Just to be safe if query_nodes is not always initialized */
-    query_nodes = has_query_nodes && query_nodes;
+    if (has_query_nodes && query_nodes) {
+        for (bs = bdrv_next_node(NULL); bs; bs = bdrv_next_node(bs)) {
+            BlockStatsList *info = g_malloc0(sizeof(*info));
+            AioContext *ctx = bdrv_get_aio_context(bs);

-    while (next_query_bds(&blk, &bs, query_nodes)) {
-        BlockStatsList *info = g_malloc0(sizeof(*info));
-        AioContext *ctx = blk ? blk_get_aio_context(blk)
-                              : bdrv_get_aio_context(bs);
+            aio_context_acquire(ctx);
+            info->value = bdrv_query_bds_stats(bs, false);
+            aio_context_release(ctx);

-        aio_context_acquire(ctx);
-        info->value = bdrv_query_stats(blk, bs, !query_nodes);
-        aio_context_release(ctx);
+            *p_next = info;
+            p_next = &info->next;
+        }
+    } else {
+        for (blk = blk_next(NULL); blk; blk = blk_next(blk)) {
+            BlockStatsList *info = g_malloc0(sizeof(*info));
+            AioContext *ctx = blk_get_aio_context(blk);
+            BlockStats *s;

-        *p_next = info;
-        p_next = &info->next;
+            aio_context_acquire(ctx);
+            s = bdrv_query_bds_stats(blk_bs(blk), true);
+            s->has_device = true;
+            s->device = g_strdup(blk_name(blk));
+            bdrv_query_blk_stats(s->stats, blk);
+            aio_context_release(ctx);
+
+            info->value = s;
+            *p_next = info;
+            p_next = &info->next;
+        }
    }

    return head;
@@ -695,7 +682,6 @@ void bdrv_image_info_specific_dump(fprintf_function func_fprintf, void *f,

    visit_type_ImageInfoSpecific(v, NULL, &info_spec, &error_abort);
    visit_complete(v, &obj);
-    assert(qobject_type(obj) == QTYPE_QDICT);
    data = qdict_get(qobject_to_qdict(obj), "data");
    dump_qobject(func_fprintf, f, 1, data);
    qobject_decref(obj);
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -106,6 +106,12 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
    QCowHeader header;
    Error *local_err = NULL;

+    bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
+                               false, errp);
+    if (!bs->file) {
+        return -EINVAL;
+    }
+
    ret = bdrv_pread(bs->file, 0, &header, sizeof(header));
    if (ret < 0) {
        goto fail;
@@ -467,7 +473,7 @@ static uint64_t get_cluster_offset(BlockDriverState *bs,
                /* round to cluster size */
                cluster_offset = (cluster_offset + s->cluster_size - 1) &
                    ~(s->cluster_size - 1);
-                bdrv_truncate(bs->file->bs, cluster_offset + s->cluster_size);
+                bdrv_truncate(bs->file, cluster_offset + s->cluster_size);
                /* if encrypted, we must initialize the cluster
                   content which won't be written */
                if (bs->encrypted &&
@@ -817,7 +823,8 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp)
    }

    qcow_blk = blk_new_open(filename, NULL, NULL,
-                            BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
+                            BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
+                            &local_err);
    if (qcow_blk == NULL) {
        error_propagate(errp, local_err);
        ret = -EIO;
@@ -909,7 +916,7 @@ static int qcow_make_empty(BlockDriverState *bs)
    if (bdrv_pwrite_sync(bs->file, s->l1_table_offset, s->l1_table,
            l1_length) < 0)
        return -1;
-    ret = bdrv_truncate(bs->file->bs, s->l1_table_offset + l1_length);
+    ret = bdrv_truncate(bs->file, s->l1_table_offset + l1_length);
    if (ret < 0)
        return ret;

@@ -1046,6 +1053,7 @@ static BlockDriver bdrv_qcow = {
    .bdrv_probe		= qcow_probe,
    .bdrv_open		= qcow_open,
    .bdrv_close		= qcow_close,
+    .bdrv_child_perm        = bdrv_format_default_perms,
    .bdrv_reopen_prepare    = qcow_reopen_prepare,
    .bdrv_create            = qcow_create,
    .bdrv_has_zero_init     = bdrv_has_zero_init_1,
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -932,9 +932,7 @@ static int handle_dependencies(BlockDriverState *bs, uint64_t guest_offset,
            if (bytes == 0) {
                /* Wait for the dependency to complete. We need to recheck
                 * the free/allocated clusters when we continue. */
-                qemu_co_mutex_unlock(&s->lock);
-                qemu_co_queue_wait(&old_alloc->dependent_requests);
-                qemu_co_mutex_lock(&s->lock);
+                qemu_co_queue_wait(&old_alloc->dependent_requests, &s->lock);
                return -EAGAIN;
            }
        }
@@ -1521,12 +1519,10 @@ int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset,

    end_offset = offset + (nb_sectors << BDRV_SECTOR_BITS);

-    /* Round start up and end down */
-    offset = align_offset(offset, s->cluster_size);
-    end_offset = start_of_cluster(s, end_offset);
-
-    if (offset > end_offset) {
-        return 0;
+    /* The caller must cluster-align start; round end down except at EOF */
+    assert(QEMU_IS_ALIGNED(offset, s->cluster_size));
+    if (end_offset != bs->total_sectors * BDRV_SECTOR_SIZE) {
+        end_offset = start_of_cluster(s, end_offset);
    }

    nb_clusters = size_to_clusters(s, end_offset - offset);
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -83,6 +83,16 @@ static Qcow2SetRefcountFunc *const set_refcount_funcs[] = {
 /*********************************************************/
 /* refcount handling */

+static void update_max_refcount_table_index(BDRVQcow2State *s)
+{
+    unsigned i = s->refcount_table_size - 1;
+    while (i > 0 && (s->refcount_table[i] & REFT_OFFSET_MASK) == 0) {
+        i--;
+    }
+    /* Set s->max_refcount_table_index to the index of the last used entry */
+    s->max_refcount_table_index = i;
+}
+
 int qcow2_refcount_init(BlockDriverState *bs)
 {
    BDRVQcow2State *s = bs->opaque;
@@ -111,6 +121,7 @@ int qcow2_refcount_init(BlockDriverState *bs)
        }
        for(i = 0; i < s->refcount_table_size; i++)
            be64_to_cpus(&s->refcount_table[i]);
+        update_max_refcount_table_index(s);
    }
    return 0;
 fail:
@@ -439,6 +450,10 @@ static int alloc_refcount_block(BlockDriverState *bs,
        }

        s->refcount_table[refcount_table_index] = new_block;
+        /* If there's a hole in s->refcount_table then it can happen
+         * that refcount_table_index < s->max_refcount_table_index */
+        s->max_refcount_table_index =
+            MAX(s->max_refcount_table_index, refcount_table_index);

        /* The new refcount block may be where the caller intended to put its
         * data, so let it restart the search. */
@@ -580,6 +595,7 @@ static int alloc_refcount_block(BlockDriverState *bs,
    s->refcount_table = new_table;
    s->refcount_table_size = table_size;
    s->refcount_table_offset = table_offset;
+    update_max_refcount_table_index(s);

    /* Free old table. */
    qcow2_free_clusters(bs, old_table_offset, old_table_size * sizeof(uint64_t),
@@ -1718,7 +1734,7 @@ static int check_refblocks(BlockDriverState *bs, BdrvCheckResult *res,
                    goto resize_fail;
                }

-                ret = bdrv_truncate(bs->file->bs, offset + s->cluster_size);
+                ret = bdrv_truncate(bs->file, offset + s->cluster_size);
                if (ret < 0) {
                    goto resize_fail;
                }
@@ -2171,6 +2187,7 @@ write_refblocks:
    s->refcount_table = on_disk_reftable;
    s->refcount_table_offset = reftable_offset;
    s->refcount_table_size = reftable_size;
+    update_max_refcount_table_index(s);

    return 0;

@@ -2383,7 +2400,11 @@ int qcow2_check_metadata_overlap(BlockDriverState *bs, int ign, int64_t offset,
    }

    if ((chk & QCOW2_OL_REFCOUNT_BLOCK) && s->refcount_table) {
-        for (i = 0; i < s->refcount_table_size; i++) {
+        unsigned last_entry = s->max_refcount_table_index;
+        assert(last_entry < s->refcount_table_size);
+        assert(last_entry + 1 == s->refcount_table_size ||
+               (s->refcount_table[last_entry + 1] & REFT_OFFSET_MASK) == 0);
+        for (i = 0; i <= last_entry; i++) {
            if ((s->refcount_table[i] & REFT_OFFSET_MASK) &&
                overlaps_with(s->refcount_table[i] & REFT_OFFSET_MASK,
                s->cluster_size)) {
@@ -2871,6 +2892,7 @@ int qcow2_change_refcount_order(BlockDriverState *bs, int refcount_order,
    /* Now update the rest of the in-memory information */
    old_reftable = s->refcount_table;
    s->refcount_table = new_reftable;
+    update_max_refcount_table_index(s);

    s->refcount_bits = 1 << refcount_order;
    s->refcount_max = UINT64_C(1) << (s->refcount_bits - 1);
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -814,8 +814,8 @@ static int qcow2_update_options(BlockDriverState *bs, QDict *options,
    return ret;
 }

-static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
-                      Error **errp)
+static int qcow2_do_open(BlockDriverState *bs, QDict *options, int flags,
+                         Error **errp)
 {
    BDRVQcow2State *s = bs->opaque;
    unsigned int len, i;
@@ -1205,6 +1205,18 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
    return ret;
 }

+static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
+                      Error **errp)
+{
+    bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
+                               false, errp);
+    if (!bs->file) {
+        return -EINVAL;
+    }
+
+    return qcow2_do_open(bs, options, flags, errp);
+}
+
 static void qcow2_refresh_limits(BlockDriverState *bs, Error **errp)
 {
    BDRVQcow2State *s = bs->opaque;
@@ -1785,7 +1797,7 @@ static void qcow2_invalidate_cache(BlockDriverState *bs, Error **errp)
    options = qdict_clone_shallow(bs->options);

    flags &= ~BDRV_O_INACTIVE;
-    ret = qcow2_open(bs, options, flags, &local_err);
+    ret = qcow2_do_open(bs, options, flags, &local_err);
    QDECREF(options);
    if (local_err) {
        error_propagate(errp, local_err);
@@ -2190,7 +2202,8 @@ static int qcow2_create2(const char *filename, int64_t total_size,
    }

    blk = blk_new_open(filename, NULL, NULL,
-                       BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
+                       BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
+                       &local_err);
    if (blk == NULL) {
        error_propagate(errp, local_err);
        return -EIO;
@@ -2254,7 +2267,8 @@ static int qcow2_create2(const char *filename, int64_t total_size,
    options = qdict_new();
    qdict_put(options, "driver", qstring_from_str("qcow2"));
    blk = blk_new_open(filename, NULL, options,
-                       BDRV_O_RDWR | BDRV_O_NO_FLUSH, &local_err);
+                       BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_NO_FLUSH,
+                       &local_err);
    if (blk == NULL) {
        error_propagate(errp, local_err);
        ret = -EIO;
@@ -2570,7 +2584,7 @@ qcow2_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
        /* align end of file to a sector boundary to ease reading with
           sector based I/Os */
        cluster_offset = bdrv_getlength(bs->file->bs);
-        return bdrv_truncate(bs->file->bs, cluster_offset);
+        return bdrv_truncate(bs->file, cluster_offset);
    }

    buf = qemu_blockalign(bs, s->cluster_size);
@@ -2743,6 +2757,7 @@ static int make_completely_empty(BlockDriverState *bs)

    s->refcount_table_offset = s->cluster_size;
    s->refcount_table_size   = s->cluster_size / sizeof(uint64_t);
+    s->max_refcount_table_index = 0;

    g_free(s->refcount_table);
    s->refcount_table = new_reftable;
@@ -2783,7 +2798,7 @@ static int make_completely_empty(BlockDriverState *bs)
        goto fail;
    }

-    ret = bdrv_truncate(bs->file->bs, (3 + l1_clusters) * s->cluster_size);
+    ret = bdrv_truncate(bs->file, (3 + l1_clusters) * s->cluster_size);
    if (ret < 0) {
        goto fail;
    }
@@ -3100,6 +3115,7 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts,
    uint64_t cluster_size = s->cluster_size;
    bool encrypt;
    int refcount_bits = s->refcount_bits;
+    Error *local_err = NULL;
    int ret;
    QemuOptDesc *desc = opts->list->desc;
    Qcow2AmendHelperCBInfo helper_cb_info;
@@ -3249,7 +3265,16 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts,
    }

    if (new_size) {
-        ret = bdrv_truncate(bs, new_size);
+        BlockBackend *blk = blk_new(BLK_PERM_RESIZE, BLK_PERM_ALL);
+        ret = blk_insert_bs(blk, bs, &local_err);
+        if (ret < 0) {
+            error_report_err(local_err);
+            blk_unref(blk);
+            return ret;
+        }
+
+        ret = blk_truncate(blk, new_size);
+        blk_unref(blk);
        if (ret < 0) {
            return ret;
        }
@@ -3386,6 +3411,7 @@ BlockDriver bdrv_qcow2 = {
    .bdrv_reopen_commit   = qcow2_reopen_commit,
    .bdrv_reopen_abort    = qcow2_reopen_abort,
    .bdrv_join_options    = qcow2_join_options,
+    .bdrv_child_perm      = bdrv_format_default_perms,
    .bdrv_create        = qcow2_create,
    .bdrv_has_zero_init = bdrv_has_zero_init_1,
    .bdrv_co_get_block_status = qcow2_co_get_block_status,
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -251,6 +251,7 @@ typedef struct BDRVQcow2State {
    uint64_t *refcount_table;
    uint64_t refcount_table_offset;
    uint32_t refcount_table_size;
+    uint32_t max_refcount_table_index; /* Last used entry in refcount_table */
    uint64_t free_cluster_index;
    uint64_t free_byte_offset;

--- a/block/qed-cluster.c
+++ b/block/qed-cluster.c
@@ -83,6 +83,7 @@ static void qed_find_cluster_cb(void *opaque, int ret)
    unsigned int index;
    unsigned int n;

+    qed_acquire(s);
    if (ret) {
        goto out;
    }
@@ -109,6 +110,7 @@ static void qed_find_cluster_cb(void *opaque, int ret)

 out:
    find_cluster_cb->cb(find_cluster_cb->opaque, ret, offset, len);
+    qed_release(s);
    g_free(find_cluster_cb);
 }

--- a/block/qed-table.c
+++ b/block/qed-table.c
@@ -31,6 +31,7 @@ static void qed_read_table_cb(void *opaque, int ret)
 {
    QEDReadTableCB *read_table_cb = opaque;
    QEDTable *table = read_table_cb->table;
+    BDRVQEDState *s = read_table_cb->s;
    int noffsets = read_table_cb->qiov.size / sizeof(uint64_t);
    int i;

@@ -40,13 +41,15 @@ static void qed_read_table_cb(void *opaque, int ret)
    }

    /* Byteswap offsets */
+    qed_acquire(s);
    for (i = 0; i < noffsets; i++) {
        table->offsets[i] = le64_to_cpu(table->offsets[i]);
    }
+    qed_release(s);

 out:
    /* Completion */
-    trace_qed_read_table_cb(read_table_cb->s, read_table_cb->table, ret);
+    trace_qed_read_table_cb(s, read_table_cb->table, ret);
    gencb_complete(&read_table_cb->gencb, ret);
 }

@@ -84,8 +87,9 @@ typedef struct {
 static void qed_write_table_cb(void *opaque, int ret)
 {
    QEDWriteTableCB *write_table_cb = opaque;
+    BDRVQEDState *s = write_table_cb->s;

-    trace_qed_write_table_cb(write_table_cb->s,
+    trace_qed_write_table_cb(s,
                             write_table_cb->orig_table,
                             write_table_cb->flush,
                             ret);
@@ -97,8 +101,10 @@ static void qed_write_table_cb(void *opaque, int ret)
    if (write_table_cb->flush) {
        /* We still need to flush first */
        write_table_cb->flush = false;
+        qed_acquire(s);
        bdrv_aio_flush(write_table_cb->s->bs, qed_write_table_cb,
                       write_table_cb);
+        qed_release(s);
        return;
    }

@@ -213,6 +219,7 @@ static void qed_read_l2_table_cb(void *opaque, int ret)
    CachedL2Table *l2_table = request->l2_table;
    uint64_t l2_offset = read_l2_table_cb->l2_offset;

+    qed_acquire(s);
    if (ret) {
        /* can't trust loaded L2 table anymore */
        qed_unref_l2_cache_entry(l2_table);
@@ -228,6 +235,7 @@ static void qed_read_l2_table_cb(void *opaque, int ret)
        request->l2_table = qed_find_l2_cache_entry(&s->l2_cache, l2_offset);
        assert(request->l2_table != NULL);
    }
+    qed_release(s);

    gencb_complete(&read_l2_table_cb->gencb, ret);
 }
--- a/block/qed.c
+++ b/block/qed.c
@@ -273,7 +273,19 @@ static CachedL2Table *qed_new_l2_table(BDRVQEDState *s)
    return l2_table;
 }

-static void qed_aio_next_io(void *opaque, int ret);
+static void qed_aio_next_io(QEDAIOCB *acb, int ret);
+
+static void qed_aio_start_io(QEDAIOCB *acb)
+{
+    qed_aio_next_io(acb, 0);
+}
+
+static void qed_aio_next_io_cb(void *opaque, int ret)
+{
+    QEDAIOCB *acb = opaque;
+
+    qed_aio_next_io(acb, ret);
+}

 static void qed_plug_allocating_write_reqs(BDRVQEDState *s)
 {
@@ -292,7 +304,7 @@ static void qed_unplug_allocating_write_reqs(BDRVQEDState *s)

    acb = QSIMPLEQ_FIRST(&s->allocating_write_reqs);
    if (acb) {
-        qed_aio_next_io(acb, 0);
+        qed_aio_start_io(acb);
    }
 }

@@ -333,10 +345,22 @@ static void qed_need_check_timer_cb(void *opaque)

    trace_qed_need_check_timer_cb(s);

+    qed_acquire(s);
    qed_plug_allocating_write_reqs(s);

    /* Ensure writes are on disk before clearing flag */
    bdrv_aio_flush(s->bs->file->bs, qed_clear_need_check, s);
+    qed_release(s);
+}
+
+void qed_acquire(BDRVQEDState *s)
+{
+    aio_context_acquire(bdrv_get_aio_context(s->bs));
+}
+
+void qed_release(BDRVQEDState *s)
+{
+    aio_context_release(bdrv_get_aio_context(s->bs));
 }

 static void qed_start_need_check_timer(BDRVQEDState *s)
@@ -391,8 +415,8 @@ static void bdrv_qed_drain(BlockDriverState *bs)
    }
 }

-static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
-                         Error **errp)
+static int bdrv_qed_do_open(BlockDriverState *bs, QDict *options, int flags,
+                            Error **errp)
 {
    BDRVQEDState *s = bs->opaque;
    QEDHeader le_header;
@@ -526,6 +550,18 @@ out:
    return ret;
 }

+static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
+                         Error **errp)
+{
+    bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
+                               false, errp);
+    if (!bs->file) {
+        return -EINVAL;
+    }
+
+    return bdrv_qed_do_open(bs, options, flags, errp);
+}
+
 static void bdrv_qed_refresh_limits(BlockDriverState *bs, Error **errp)
 {
    BDRVQEDState *s = bs->opaque;
@@ -589,7 +625,8 @@ static int qed_create(const char *filename, uint32_t cluster_size,
    }

    blk = blk_new_open(filename, NULL, NULL,
-                       BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
+                       BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
+                       &local_err);
    if (blk == NULL) {
        error_propagate(errp, local_err);
        return -EIO;
@@ -721,7 +758,7 @@ static void qed_is_allocated_cb(void *opaque, int ret, uint64_t offset, size_t l
    }

    if (cb->co) {
-        qemu_coroutine_enter(cb->co);
+        aio_co_wake(cb->co);
    }
 }

@@ -918,6 +955,7 @@ static void qed_update_l2_table(BDRVQEDState *s, QEDTable *table, int index,
 static void qed_aio_complete_bh(void *opaque)
 {
    QEDAIOCB *acb = opaque;
+    BDRVQEDState *s = acb_to_s(acb);
    BlockCompletionFunc *cb = acb->common.cb;
    void *user_opaque = acb->common.opaque;
    int ret = acb->bh_ret;
@@ -925,7 +963,9 @@ static void qed_aio_complete_bh(void *opaque)
    qemu_aio_unref(acb);

    /* Invoke callback */
+    qed_acquire(s);
    cb(user_opaque, ret);
+    qed_release(s);
 }

 static void qed_aio_complete(QEDAIOCB *acb, int ret)
@@ -959,7 +999,7 @@ static void qed_aio_complete(QEDAIOCB *acb, int ret)
        QSIMPLEQ_REMOVE_HEAD(&s->allocating_write_reqs, next);
        acb = QSIMPLEQ_FIRST(&s->allocating_write_reqs);
        if (acb) {
-            qed_aio_next_io(acb, 0);
+            qed_aio_start_io(acb);
        } else if (s->header.features & QED_F_NEED_CHECK) {
            qed_start_need_check_timer(s);
        }
@@ -984,7 +1024,7 @@ static void qed_commit_l2_update(void *opaque, int ret)
    acb->request.l2_table = qed_find_l2_cache_entry(&s->l2_cache, l2_offset);
    assert(acb->request.l2_table != NULL);

-    qed_aio_next_io(opaque, ret);
+    qed_aio_next_io(acb, ret);
 }

 /**
@@ -1032,11 +1072,11 @@ static void qed_aio_write_l2_update(QEDAIOCB *acb, int ret, uint64_t offset)
    if (need_alloc) {
        /* Write out the whole new L2 table */
        qed_write_l2_table(s, &acb->request, 0, s->table_nelems, true,
-                            qed_aio_write_l1_update, acb);
+                           qed_aio_write_l1_update, acb);
    } else {
        /* Write out only the updated part of the L2 table */
        qed_write_l2_table(s, &acb->request, index, acb->cur_nclusters, false,
-                            qed_aio_next_io, acb);
+                           qed_aio_next_io_cb, acb);
    }
    return;

@@ -1088,7 +1128,7 @@ static void qed_aio_write_main(void *opaque, int ret)
    }

    if (acb->find_cluster_ret == QED_CLUSTER_FOUND) {
-        next_fn = qed_aio_next_io;
+        next_fn = qed_aio_next_io_cb;
    } else {
        if (s->bs->backing) {
            next_fn = qed_aio_write_flush_before_l2_update;
@@ -1201,7 +1241,7 @@ static void qed_aio_write_alloc(QEDAIOCB *acb, size_t len)
    if (acb->flags & QED_AIOCB_ZERO) {
        /* Skip ahead if the clusters are already zero */
        if (acb->find_cluster_ret == QED_CLUSTER_ZERO) {
-            qed_aio_next_io(acb, 0);
+            qed_aio_start_io(acb);
            return;
        }

@@ -1321,18 +1361,18 @@ static void qed_aio_read_data(void *opaque, int ret,
    /* Handle zero cluster and backing file reads */
    if (ret == QED_CLUSTER_ZERO) {
        qemu_iovec_memset(&acb->cur_qiov, 0, 0, acb->cur_qiov.size);
-        qed_aio_next_io(acb, 0);
+        qed_aio_start_io(acb);
        return;
    } else if (ret != QED_CLUSTER_FOUND) {
        qed_read_backing_file(s, acb->cur_pos, &acb->cur_qiov,
-                              &acb->backing_qiov, qed_aio_next_io, acb);
+                              &acb->backing_qiov, qed_aio_next_io_cb, acb);
        return;
    }

    BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
    bdrv_aio_readv(bs->file, offset / BDRV_SECTOR_SIZE,
                   &acb->cur_qiov, acb->cur_qiov.size / BDRV_SECTOR_SIZE,
-                   qed_aio_next_io, acb);
+                   qed_aio_next_io_cb, acb);
    return;

 err:
@@ -1342,9 +1382,8 @@ err:
 /**
 * Begin next I/O or complete the request
 */
-static void qed_aio_next_io(void *opaque, int ret)
+static void qed_aio_next_io(QEDAIOCB *acb, int ret)
 {
-    QEDAIOCB *acb = opaque;
    BDRVQEDState *s = acb_to_s(acb);
    QEDFindClusterFunc *io_fn = (acb->flags & QED_AIOCB_WRITE) ?
                                qed_aio_write_data : qed_aio_read_data;
@@ -1400,7 +1439,7 @@ static BlockAIOCB *qed_aio_setup(BlockDriverState *bs,
    qemu_iovec_init(&acb->cur_qiov, qiov->niov);

    /* Start request */
-    qed_aio_next_io(acb, 0);
+    qed_aio_start_io(acb);
    return &acb->common;
 }

@@ -1436,7 +1475,7 @@ static void coroutine_fn qed_co_pwrite_zeroes_cb(void *opaque, int ret)
    cb->done = true;
    cb->ret = ret;
    if (cb->co) {
-        qemu_coroutine_enter(cb->co);
+        aio_co_wake(cb->co);
    }
 }

@@ -1603,7 +1642,7 @@ static void bdrv_qed_invalidate_cache(BlockDriverState *bs, Error **errp)
    bdrv_qed_close(bs);

    memset(s, 0, sizeof(BDRVQEDState));
-    ret = bdrv_qed_open(bs, NULL, bs->open_flags, &local_err);
+    ret = bdrv_qed_do_open(bs, NULL, bs->open_flags, &local_err);
    if (local_err) {
        error_propagate(errp, local_err);
        error_prepend(errp, "Could not reopen qed layer: ");
@@ -1666,6 +1705,7 @@ static BlockDriver bdrv_qed = {
    .bdrv_open                = bdrv_qed_open,
    .bdrv_close               = bdrv_qed_close,
    .bdrv_reopen_prepare      = bdrv_qed_reopen_prepare,
+    .bdrv_child_perm          = bdrv_format_default_perms,
    .bdrv_create              = bdrv_qed_create,
    .bdrv_has_zero_init       = bdrv_has_zero_init_1,
    .bdrv_co_get_block_status = bdrv_qed_co_get_block_status,
--- a/block/qed.h
+++ b/block/qed.h
@@ -198,6 +198,9 @@ enum {
 */
 typedef void QEDFindClusterFunc(void *opaque, int ret, uint64_t offset, size_t len);

+void qed_acquire(BDRVQEDState *s);
+void qed_release(BDRVQEDState *s);
+
 /**
 * Generic callback for chaining async callbacks
 */
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -1032,10 +1032,17 @@ static void quorum_add_child(BlockDriverState *bs, BlockDriverState *child_bs,

    /* We can safely add the child now */
    bdrv_ref(child_bs);
-    child = bdrv_attach_child(bs, child_bs, indexstr, &child_format);
+
+    child = bdrv_attach_child(bs, child_bs, indexstr, &child_format, errp);
+    if (child == NULL) {
+        s->next_child_index--;
+        bdrv_unref(child_bs);
+        goto out;
+    }
    s->children = g_renew(BdrvChild *, s->children, s->num_children + 1);
    s->children[s->num_children++] = child;

+out:
    bdrv_drained_end(bs);
 }

@@ -1126,6 +1133,8 @@ static BlockDriver bdrv_quorum = {
    .bdrv_add_child                     = quorum_add_child,
    .bdrv_del_child                     = quorum_del_child,

+    .bdrv_child_perm                    = bdrv_filter_default_perms,
+
    .is_filter                          = true,
    .bdrv_recurse_is_first_non_filter   = quorum_recurse_is_first_non_filter,
 };
--- a/block/raw-format.c
+++ b/block/raw-format.c
@@ -341,7 +341,7 @@ static int raw_truncate(BlockDriverState *bs, int64_t offset)

    s->size = offset;
    offset += s->offset;
-    return bdrv_truncate(bs->file->bs, offset);
+    return bdrv_truncate(bs->file, offset);
 }

 static int raw_media_changed(BlockDriverState *bs)
@@ -384,6 +384,12 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,
    BDRVRawState *s = bs->opaque;
    int ret;

+    bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
+                               false, errp);
+    if (!bs->file) {
+        return -EINVAL;
+    }
+
    bs->sg = bs->file->bs->sg;
    bs->supported_write_flags = BDRV_REQ_FUA &
        bs->file->bs->supported_write_flags;
@@ -461,6 +467,7 @@ BlockDriver bdrv_raw = {
    .bdrv_reopen_abort    = &raw_reopen_abort,
    .bdrv_open            = &raw_open,
    .bdrv_close           = &raw_close,
+    .bdrv_child_perm      = bdrv_filter_default_perms,
    .bdrv_create          = &raw_create,
    .bdrv_co_preadv       = &raw_co_preadv,
    .bdrv_co_pwritev      = &raw_co_pwritev,
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -13,13 +13,14 @@

 #include "qemu/osdep.h"

+#include <rbd/librbd.h>
 #include "qapi/error.h"
 #include "qemu/error-report.h"
 #include "block/block_int.h"
 #include "crypto/secret.h"
 #include "qemu/cutils.h"
-
-#include <rbd/librbd.h>
+#include "qapi/qmp/qstring.h"
+#include "qapi/qmp/qjson.h"

 /*
 * When specifying the image filename use:
@@ -55,13 +56,15 @@

 #define OBJ_MAX_SIZE (1UL << OBJ_DEFAULT_OBJ_ORDER)

-#define RBD_MAX_CONF_NAME_SIZE 128
-#define RBD_MAX_CONF_VAL_SIZE 512
-#define RBD_MAX_CONF_SIZE 1024
-#define RBD_MAX_POOL_NAME_SIZE 128
-#define RBD_MAX_SNAP_NAME_SIZE 128
 #define RBD_MAX_SNAPS 100

+/* The LIBRBD_SUPPORTS_IOVEC is defined in librbd.h */
+#ifdef LIBRBD_SUPPORTS_IOVEC
+#define LIBRBD_USE_IOVEC 1
+#else
+#define LIBRBD_USE_IOVEC 0
+#endif
+
 typedef enum {
    RBD_AIO_READ,
    RBD_AIO_WRITE,
@@ -91,46 +94,29 @@ typedef struct BDRVRBDState {
    rados_t cluster;
    rados_ioctx_t io_ctx;
    rbd_image_t image;
-    char name[RBD_MAX_IMAGE_NAME_SIZE];
+    char *name;
    char *snap;
 } BDRVRBDState;

-static int qemu_rbd_next_tok(char *dst, int dst_len,
-                             char *src, char delim,
-                             const char *name,
-                             char **p, Error **errp)
+static char *qemu_rbd_next_tok(char *src, char delim, char **p)
 {
-    int l;
    char *end;

    *p = NULL;

-    if (delim != '\0') {
-        for (end = src; *end; ++end) {
-            if (*end == delim) {
-                break;
-            }
-            if (*end == '\\' && end[1] != '\0') {
-                end++;
-            }
-        }
+    for (end = src; *end; ++end) {
        if (*end == delim) {
-            *p = end + 1;
-            *end = '\0';
+            break;
+        }
+        if (*end == '\\' && end[1] != '\0') {
+            end++;
        }
    }
-    l = strlen(src);
-    if (l >= dst_len) {
-        error_setg(errp, "%s too long", name);
-        return -EINVAL;
-    } else if (l == 0) {
-        error_setg(errp, "%s too short", name);
-        return -EINVAL;
+    if (*end == delim) {
+        *p = end + 1;
+        *end = '\0';
    }
-
-    pstrcpy(dst, dst_len, src);
-
-    return 0;
+    return src;
 }

 static void qemu_rbd_unescape(char *src)
@@ -146,87 +132,92 @@ static void qemu_rbd_unescape(char *src)
    *p = '\0';
 }

-static int qemu_rbd_parsename(const char *filename,
-                              char *pool, int pool_len,
-                              char *snap, int snap_len,
-                              char *name, int name_len,
-                              char *conf, int conf_len,
-                              Error **errp)
+static void qemu_rbd_parse_filename(const char *filename, QDict *options,
+                                    Error **errp)
 {
    const char *start;
    char *p, *buf;
-    int ret;
+    QList *keypairs = NULL;
+    char *found_str;

    if (!strstart(filename, "rbd:", &start)) {
        error_setg(errp, "File name must start with 'rbd:'");
-        return -EINVAL;
+        return;
    }

    buf = g_strdup(start);
    p = buf;
-    *snap = '\0';
-    *conf = '\0';

-    ret = qemu_rbd_next_tok(pool, pool_len, p,
-                            '/', "pool name", &p, errp);
-    if (ret < 0 || !p) {
-        ret = -EINVAL;
+    found_str = qemu_rbd_next_tok(p, '/', &p);
+    if (!p) {
+        error_setg(errp, "Pool name is required");
        goto done;
    }
-    qemu_rbd_unescape(pool);
+    qemu_rbd_unescape(found_str);
+    qdict_put(options, "pool", qstring_from_str(found_str));

    if (strchr(p, '@')) {
-        ret = qemu_rbd_next_tok(name, name_len, p,
-                                '@', "object name", &p, errp);
-        if (ret < 0) {
-            goto done;
-        }
-        ret = qemu_rbd_next_tok(snap, snap_len, p,
-                                ':', "snap name", &p, errp);
-        qemu_rbd_unescape(snap);
+        found_str = qemu_rbd_next_tok(p, '@', &p);
+        qemu_rbd_unescape(found_str);
+        qdict_put(options, "image", qstring_from_str(found_str));
+
+        found_str = qemu_rbd_next_tok(p, ':', &p);
+        qemu_rbd_unescape(found_str);
+        qdict_put(options, "snapshot", qstring_from_str(found_str));
    } else {
-        ret = qemu_rbd_next_tok(name, name_len, p,
-                                ':', "object name", &p, errp);
+        found_str = qemu_rbd_next_tok(p, ':', &p);
+        qemu_rbd_unescape(found_str);
+        qdict_put(options, "image", qstring_from_str(found_str));
    }
-    qemu_rbd_unescape(name);
-    if (ret < 0 || !p) {
+    if (!p) {
        goto done;
    }

-    ret = qemu_rbd_next_tok(conf, conf_len, p,
-                            '\0', "configuration", &p, errp);
+    /* The following are essentially all key/value pairs, and we treat
+     * 'id' and 'conf' a bit special.  Key/value pairs may be in any order. */
+    while (p) {
+        char *name, *value;
+        name = qemu_rbd_next_tok(p, '=', &p);
+        if (!p) {
+            error_setg(errp, "conf option %s has no value", name);
+            break;
+        }
+
+        qemu_rbd_unescape(name);
+
+        value = qemu_rbd_next_tok(p, ':', &p);
+        qemu_rbd_unescape(value);
+
+        if (!strcmp(name, "conf")) {
+            qdict_put(options, "conf", qstring_from_str(value));
+        } else if (!strcmp(name, "id")) {
+            qdict_put(options, "user" , qstring_from_str(value));
+        } else {
+            /*
+             * We pass these internally to qemu_rbd_set_keypairs(), so
+             * we can get away with the simpler list of [ "key1",
+             * "value1", "key2", "value2" ] rather than a raw dict
+             * { "key1": "value1", "key2": "value2" } where we can't
+             * guarantee order, or even a more correct but complex
+             * [ { "key1": "value1" }, { "key2": "value2" } ]
+             */
+            if (!keypairs) {
+                keypairs = qlist_new();
+            }
+            qlist_append(keypairs, qstring_from_str(name));
+            qlist_append(keypairs, qstring_from_str(value));
+        }
+    }
+
+    if (keypairs) {
+        qdict_put(options, "=keyvalue-pairs",
+                  qobject_to_json(QOBJECT(keypairs)));
+    }

 done:
    g_free(buf);
-    return ret;
-}
-
-static char *qemu_rbd_parse_clientname(const char *conf, char *clientname)
-{
-    const char *p = conf;
-
-    while (*p) {
-        int len;
-        const char *end = strchr(p, ':');
-
-        if (end) {
-            len = end - p;
-        } else {
-            len = strlen(p);
-        }
-
-        if (strncmp(p, "id=", 3) == 0) {
-            len -= 3;
-            strncpy(clientname, p + 3, len);
-            clientname[len] = '\0';
-            return clientname;
-        }
-        if (end == NULL) {
-            break;
-        }
-        p = end + 1;
-    }
-    return NULL;
+    QDECREF(keypairs);
+    return;
 }


@@ -249,94 +240,125 @@ static int qemu_rbd_set_auth(rados_t cluster, const char *secretid,
    return 0;
 }

-
-static int qemu_rbd_set_conf(rados_t cluster, const char *conf,
-                             bool only_read_conf_file,
-                             Error **errp)
+static int qemu_rbd_set_keypairs(rados_t cluster, const char *keypairs_json,
+                                 Error **errp)
 {
-    char *p, *buf;
-    char name[RBD_MAX_CONF_NAME_SIZE];
-    char value[RBD_MAX_CONF_VAL_SIZE];
+    QList *keypairs;
+    QString *name;
+    QString *value;
+    const char *key;
+    size_t remaining;
    int ret = 0;

-    buf = g_strdup(conf);
-    p = buf;
+    if (!keypairs_json) {
+        return ret;
+    }
+    keypairs = qobject_to_qlist(qobject_from_json(keypairs_json,
+                                                  &error_abort));
+    remaining = qlist_size(keypairs) / 2;
+    assert(remaining);

-    while (p) {
-        ret = qemu_rbd_next_tok(name, sizeof(name), p,
-                                '=', "conf option name", &p, errp);
+    while (remaining--) {
+        name = qobject_to_qstring(qlist_pop(keypairs));
+        value = qobject_to_qstring(qlist_pop(keypairs));
+        assert(name && value);
+        key = qstring_get_str(name);
+
+        ret = rados_conf_set(cluster, key, qstring_get_str(value));
+        QDECREF(name);
+        QDECREF(value);
        if (ret < 0) {
-            break;
-        }
-        qemu_rbd_unescape(name);
-
-        if (!p) {
-            error_setg(errp, "conf option %s has no value", name);
+            error_setg_errno(errp, -ret, "invalid conf option %s", key);
            ret = -EINVAL;
            break;
        }
-
-        ret = qemu_rbd_next_tok(value, sizeof(value), p,
-                                ':', "conf option value", &p, errp);
-        if (ret < 0) {
-            break;
-        }
-        qemu_rbd_unescape(value);
-
-        if (strcmp(name, "conf") == 0) {
-            /* read the conf file alone, so it doesn't override more
-               specific settings for a particular device */
-            if (only_read_conf_file) {
-                ret = rados_conf_read_file(cluster, value);
-                if (ret < 0) {
-                    error_setg_errno(errp, -ret, "error reading conf file %s",
-                                     value);
-                    break;
-                }
-            }
-        } else if (strcmp(name, "id") == 0) {
-            /* ignore, this is parsed by qemu_rbd_parse_clientname() */
-        } else if (!only_read_conf_file) {
-            ret = rados_conf_set(cluster, name, value);
-            if (ret < 0) {
-                error_setg_errno(errp, -ret, "invalid conf option %s", name);
-                ret = -EINVAL;
-                break;
-            }
-        }
    }

-    g_free(buf);
+    QDECREF(keypairs);
    return ret;
 }

+static void qemu_rbd_memset(RADOSCB *rcb, int64_t offs)
+{
+    if (LIBRBD_USE_IOVEC) {
+        RBDAIOCB *acb = rcb->acb;
+        iov_memset(acb->qiov->iov, acb->qiov->niov, offs, 0,
+                   acb->qiov->size - offs);
+    } else {
+        memset(rcb->buf + offs, 0, rcb->size - offs);
+    }
+}
+
+static QemuOptsList runtime_opts = {
+    .name = "rbd",
+    .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
+    .desc = {
+        {
+            .name = "pool",
+            .type = QEMU_OPT_STRING,
+            .help = "Rados pool name",
+        },
+        {
+            .name = "image",
+            .type = QEMU_OPT_STRING,
+            .help = "Image name in the pool",
+        },
+        {
+            .name = "conf",
+            .type = QEMU_OPT_STRING,
+            .help = "Rados config file location",
+        },
+        {
+            .name = "snapshot",
+            .type = QEMU_OPT_STRING,
+            .help = "Ceph snapshot name",
+        },
+        {
+            /* maps to 'id' in rados_create() */
+            .name = "user",
+            .type = QEMU_OPT_STRING,
+            .help = "Rados id name",
+        },
+        /*
+         * server.* extracted manually, see qemu_rbd_mon_host()
+         */
+        {
+            .name = "password-secret",
+            .type = QEMU_OPT_STRING,
+            .help = "ID of secret providing the password",
+        },
+
+        /*
+         * Keys for qemu_rbd_parse_filename(), not in the QAPI schema
+         */
+        {
+            /*
+             * HACK: name starts with '=' so that qemu_opts_parse()
+             * can't set it
+             */
+            .name = "=keyvalue-pairs",
+            .type = QEMU_OPT_STRING,
+            .help = "Legacy rados key/value option parameters",
+        },
+        { /* end of list */ }
+    },
+};
+
 static int qemu_rbd_create(const char *filename, QemuOpts *opts, Error **errp)
 {
    Error *local_err = NULL;
    int64_t bytes = 0;
    int64_t objsize;
    int obj_order = 0;
-    char pool[RBD_MAX_POOL_NAME_SIZE];
-    char name[RBD_MAX_IMAGE_NAME_SIZE];
-    char snap_buf[RBD_MAX_SNAP_NAME_SIZE];
-    char conf[RBD_MAX_CONF_SIZE];
-    char clientname_buf[RBD_MAX_CONF_SIZE];
-    char *clientname;
+    const char *pool, *name, *conf, *clientname, *keypairs;
    const char *secretid;
    rados_t cluster;
    rados_ioctx_t io_ctx;
-    int ret;
+    QDict *options = NULL;
+    int ret = 0;

    secretid = qemu_opt_get(opts, "password-secret");

-    if (qemu_rbd_parsename(filename, pool, sizeof(pool),
-                           snap_buf, sizeof(snap_buf),
-                           name, sizeof(name),
-                           conf, sizeof(conf), &local_err) < 0) {
-        error_propagate(errp, local_err);
-        return -EINVAL;
-    }
-
    /* Read out options */
    bytes = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
                     BDRV_SECTOR_SIZE);
@@ -344,35 +366,53 @@ static int qemu_rbd_create(const char *filename, QemuOpts *opts, Error **errp)
    if (objsize) {
        if ((objsize - 1) & objsize) {    /* not a power of 2? */
            error_setg(errp, "obj size needs to be power of 2");
-            return -EINVAL;
+            ret = -EINVAL;
+            goto exit;
        }
        if (objsize < 4096) {
            error_setg(errp, "obj size too small");
-            return -EINVAL;
+            ret = -EINVAL;
+            goto exit;
        }
        obj_order = ctz32(objsize);
    }

-    clientname = qemu_rbd_parse_clientname(conf, clientname_buf);
+    options = qdict_new();
+    qemu_rbd_parse_filename(filename, options, &local_err);
+    if (local_err) {
+        ret = -EINVAL;
+        error_propagate(errp, local_err);
+        goto exit;
+    }
+
+    /*
+     * Caution: while qdict_get_try_str() is fine, getting non-string
+     * types would require more care.  When @options come from -blockdev
+     * or blockdev_add, its members are typed according to the QAPI
+     * schema, but when they come from -drive, they're all QString.
+     */
+    pool       = qdict_get_try_str(options, "pool");
+    conf       = qdict_get_try_str(options, "conf");
+    clientname = qdict_get_try_str(options, "user");
+    name       = qdict_get_try_str(options, "image");
+    keypairs   = qdict_get_try_str(options, "=keyvalue-pairs");
+
    ret = rados_create(&cluster, clientname);
    if (ret < 0) {
        error_setg_errno(errp, -ret, "error initializing");
-        return ret;
+        goto exit;
    }

-    if (strstr(conf, "conf=") == NULL) {
-        /* try default location, but ignore failure */
-        rados_conf_read_file(cluster, NULL);
-    } else if (conf[0] != '\0' &&
-               qemu_rbd_set_conf(cluster, conf, true, &local_err) < 0) {
-        error_propagate(errp, local_err);
+    /* try default location when conf=NULL, but ignore failure */
+    ret = rados_conf_read_file(cluster, conf);
+    if (conf && ret < 0) {
+        error_setg_errno(errp, -ret, "error reading conf file %s", conf);
        ret = -EIO;
        goto shutdown;
    }

-    if (conf[0] != '\0' &&
-        qemu_rbd_set_conf(cluster, conf, false, &local_err) < 0) {
-        error_propagate(errp, local_err);
+    ret = qemu_rbd_set_keypairs(cluster, keypairs, errp);
+    if (ret < 0) {
        ret = -EIO;
        goto shutdown;
    }
@@ -403,6 +443,9 @@ static int qemu_rbd_create(const char *filename, QemuOpts *opts, Error **errp)

 shutdown:
    rados_shutdown(cluster);
+
+exit:
+    QDECREF(options);
    return ret;
 }

@@ -426,11 +469,11 @@ static void qemu_rbd_complete_aio(RADOSCB *rcb)
        }
    } else {
        if (r < 0) {
-            memset(rcb->buf, 0, rcb->size);
+            qemu_rbd_memset(rcb, 0);
            acb->ret = r;
            acb->error = 1;
        } else if (r < rcb->size) {
-            memset(rcb->buf + r, 0, rcb->size - r);
+            qemu_rbd_memset(rcb, r);
            if (!acb->error) {
                acb->ret = rcb->size;
            }
@@ -441,92 +484,122 @@ static void qemu_rbd_complete_aio(RADOSCB *rcb)

    g_free(rcb);

-    if (acb->cmd == RBD_AIO_READ) {
-        qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
+    if (!LIBRBD_USE_IOVEC) {
+        if (acb->cmd == RBD_AIO_READ) {
+            qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
+        }
+        qemu_vfree(acb->bounce);
    }
-    qemu_vfree(acb->bounce);
+
    acb->common.cb(acb->common.opaque, (acb->ret > 0 ? 0 : acb->ret));

    qemu_aio_unref(acb);
 }

-/* TODO Convert to fine grained options */
-static QemuOptsList runtime_opts = {
-    .name = "rbd",
-    .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
-    .desc = {
-        {
-            .name = "filename",
-            .type = QEMU_OPT_STRING,
-            .help = "Specification of the rbd image",
-        },
-        {
-            .name = "password-secret",
-            .type = QEMU_OPT_STRING,
-            .help = "ID of secret providing the password",
-        },
-        { /* end of list */ }
-    },
-};
+static char *qemu_rbd_mon_host(QDict *options, Error **errp)
+{
+    const char **vals = g_new(const char *, qdict_size(options) + 1);
+    char keybuf[32];
+    const char *host, *port;
+    char *rados_str;
+    int i;
+
+    for (i = 0;; i++) {
+        sprintf(keybuf, "server.%d.host", i);
+        host = qdict_get_try_str(options, keybuf);
+        qdict_del(options, keybuf);
+        sprintf(keybuf, "server.%d.port", i);
+        port = qdict_get_try_str(options, keybuf);
+        qdict_del(options, keybuf);
+        if (!host && !port) {
+            break;
+        }
+        if (!host) {
+            error_setg(errp, "Parameter server.%d.host is missing", i);
+            rados_str = NULL;
+            goto out;
+        }
+
+        if (strchr(host, ':')) {
+            vals[i] = port ? g_strdup_printf("[%s]:%s", host, port)
+                : g_strdup_printf("[%s]", host);
+        } else {
+            vals[i] = port ? g_strdup_printf("%s:%s", host, port)
+                : g_strdup(host);
+        }
+    }
+    vals[i] = NULL;
+
+    rados_str = i ? g_strjoinv(";", (char **)vals) : NULL;
+out:
+    g_strfreev((char **)vals);
+    return rados_str;
+}

 static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
                         Error **errp)
 {
    BDRVRBDState *s = bs->opaque;
-    char pool[RBD_MAX_POOL_NAME_SIZE];
-    char snap_buf[RBD_MAX_SNAP_NAME_SIZE];
-    char conf[RBD_MAX_CONF_SIZE];
-    char clientname_buf[RBD_MAX_CONF_SIZE];
-    char *clientname;
+    const char *pool, *snap, *conf, *clientname, *name, *keypairs;
    const char *secretid;
    QemuOpts *opts;
    Error *local_err = NULL;
-    const char *filename;
+    char *mon_host = NULL;
    int r;

    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
    qemu_opts_absorb_qdict(opts, options, &local_err);
    if (local_err) {
        error_propagate(errp, local_err);
-        qemu_opts_del(opts);
-        return -EINVAL;
-    }
-
-    filename = qemu_opt_get(opts, "filename");
-    secretid = qemu_opt_get(opts, "password-secret");
-
-    if (qemu_rbd_parsename(filename, pool, sizeof(pool),
-                           snap_buf, sizeof(snap_buf),
-                           s->name, sizeof(s->name),
-                           conf, sizeof(conf), errp) < 0) {
        r = -EINVAL;
        goto failed_opts;
    }

-    clientname = qemu_rbd_parse_clientname(conf, clientname_buf);
+    mon_host = qemu_rbd_mon_host(options, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        r = -EINVAL;
+        goto failed_opts;
+    }
+
+    secretid = qemu_opt_get(opts, "password-secret");
+
+    pool           = qemu_opt_get(opts, "pool");
+    conf           = qemu_opt_get(opts, "conf");
+    snap           = qemu_opt_get(opts, "snapshot");
+    clientname     = qemu_opt_get(opts, "user");
+    name           = qemu_opt_get(opts, "image");
+    keypairs       = qemu_opt_get(opts, "=keyvalue-pairs");
+
+    if (!pool || !name) {
+        error_setg(errp, "Parameters 'pool' and 'image' are required");
+        r = -EINVAL;
+        goto failed_opts;
+    }
+
    r = rados_create(&s->cluster, clientname);
    if (r < 0) {
        error_setg_errno(errp, -r, "error initializing");
        goto failed_opts;
    }

-    s->snap = NULL;
-    if (snap_buf[0] != '\0') {
-        s->snap = g_strdup(snap_buf);
+    s->snap = g_strdup(snap);
+    s->name = g_strdup(name);
+
+    /* try default location when conf=NULL, but ignore failure */
+    r = rados_conf_read_file(s->cluster, conf);
+    if (conf && r < 0) {
+        error_setg_errno(errp, -r, "error reading conf file %s", conf);
+        goto failed_shutdown;
    }

-    if (strstr(conf, "conf=") == NULL) {
-        /* try default location, but ignore failure */
-        rados_conf_read_file(s->cluster, NULL);
-    } else if (conf[0] != '\0') {
-        r = qemu_rbd_set_conf(s->cluster, conf, true, errp);
-        if (r < 0) {
-            goto failed_shutdown;
-        }
+    r = qemu_rbd_set_keypairs(s->cluster, keypairs, errp);
+    if (r < 0) {
+        goto failed_shutdown;
    }

-    if (conf[0] != '\0') {
-        r = qemu_rbd_set_conf(s->cluster, conf, false, errp);
+    if (mon_host) {
+        r = rados_conf_set(s->cluster, "mon_host", mon_host);
        if (r < 0) {
            goto failed_shutdown;
        }
@@ -578,8 +651,10 @@ failed_open:
 failed_shutdown:
    rados_shutdown(s->cluster);
    g_free(s->snap);
+    g_free(s->name);
 failed_opts:
    qemu_opts_del(opts);
+    g_free(mon_host);
    return r;
 }

@@ -590,6 +665,7 @@ static void qemu_rbd_close(BlockDriverState *bs)
    rbd_close(s->image);
    rados_ioctx_destroy(s->io_ctx);
    g_free(s->snap);
+    g_free(s->name);
    rados_shutdown(s->cluster);
 }

@@ -655,7 +731,6 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
    RBDAIOCB *acb;
    RADOSCB *rcb = NULL;
    rbd_completion_t c;
-    char *buf;
    int r;

    BDRVRBDState *s = bs->opaque;
@@ -664,27 +739,29 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
    acb->cmd = cmd;
    acb->qiov = qiov;
    assert(!qiov || qiov->size == size);
-    if (cmd == RBD_AIO_DISCARD || cmd == RBD_AIO_FLUSH) {
-        acb->bounce = NULL;
-    } else {
-        acb->bounce = qemu_try_blockalign(bs, qiov->size);
-        if (acb->bounce == NULL) {
-            goto failed;
+
+    rcb = g_new(RADOSCB, 1);
+
+    if (!LIBRBD_USE_IOVEC) {
+        if (cmd == RBD_AIO_DISCARD || cmd == RBD_AIO_FLUSH) {
+            acb->bounce = NULL;
+        } else {
+            acb->bounce = qemu_try_blockalign(bs, qiov->size);
+            if (acb->bounce == NULL) {
+                goto failed;
+            }
        }
+        if (cmd == RBD_AIO_WRITE) {
+            qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
+        }
+        rcb->buf = acb->bounce;
    }
+
    acb->ret = 0;
    acb->error = 0;
    acb->s = s;

-    if (cmd == RBD_AIO_WRITE) {
-        qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
-    }
-
-    buf = acb->bounce;
-
-    rcb = g_new(RADOSCB, 1);
    rcb->acb = acb;
-    rcb->buf = buf;
    rcb->s = acb->s;
    rcb->size = size;
    r = rbd_aio_create_completion(rcb, (rbd_callback_t) rbd_finish_aiocb, &c);
@@ -694,10 +771,18 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,

    switch (cmd) {
    case RBD_AIO_WRITE:
-        r = rbd_aio_write(s->image, off, size, buf, c);
+#ifdef LIBRBD_SUPPORTS_IOVEC
+            r = rbd_aio_writev(s->image, qiov->iov, qiov->niov, off, c);
+#else
+            r = rbd_aio_write(s->image, off, size, rcb->buf, c);
+#endif
        break;
    case RBD_AIO_READ:
-        r = rbd_aio_read(s->image, off, size, buf, c);
+#ifdef LIBRBD_SUPPORTS_IOVEC
+            r = rbd_aio_readv(s->image, qiov->iov, qiov->niov, off, c);
+#else
+            r = rbd_aio_read(s->image, off, size, rcb->buf, c);
+#endif
        break;
    case RBD_AIO_DISCARD:
        r = rbd_aio_discard_wrapper(s->image, off, size, c);
@@ -712,14 +797,16 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
    if (r < 0) {
        goto failed_completion;
    }
-
    return &acb->common;

 failed_completion:
    rbd_aio_release(c);
 failed:
    g_free(rcb);
-    qemu_vfree(acb->bounce);
+    if (!LIBRBD_USE_IOVEC) {
+        qemu_vfree(acb->bounce);
+    }
+
    qemu_aio_unref(acb);
    return NULL;
 }
@@ -972,18 +1059,18 @@ static QemuOptsList qemu_rbd_create_opts = {
 };

 static BlockDriver bdrv_rbd = {
-    .format_name        = "rbd",
-    .instance_size      = sizeof(BDRVRBDState),
-    .bdrv_needs_filename = true,
-    .bdrv_file_open     = qemu_rbd_open,
-    .bdrv_close         = qemu_rbd_close,
-    .bdrv_create        = qemu_rbd_create,
-    .bdrv_has_zero_init = bdrv_has_zero_init_1,
-    .bdrv_get_info      = qemu_rbd_getinfo,
-    .create_opts        = &qemu_rbd_create_opts,
-    .bdrv_getlength     = qemu_rbd_getlength,
-    .bdrv_truncate      = qemu_rbd_truncate,
-    .protocol_name      = "rbd",
+    .format_name            = "rbd",
+    .instance_size          = sizeof(BDRVRBDState),
+    .bdrv_parse_filename    = qemu_rbd_parse_filename,
+    .bdrv_file_open         = qemu_rbd_open,
+    .bdrv_close             = qemu_rbd_close,
+    .bdrv_create            = qemu_rbd_create,
+    .bdrv_has_zero_init     = bdrv_has_zero_init_1,
+    .bdrv_get_info          = qemu_rbd_getinfo,
+    .create_opts            = &qemu_rbd_create_opts,
+    .bdrv_getlength         = qemu_rbd_getlength,
+    .bdrv_truncate          = qemu_rbd_truncate,
+    .protocol_name          = "rbd",

    .bdrv_aio_readv         = qemu_rbd_aio_readv,
    .bdrv_aio_writev        = qemu_rbd_aio_writev,
--- a/block/replication.c
+++ b/block/replication.c
@@ -86,6 +86,12 @@ static int replication_open(BlockDriverState *bs, QDict *options,
    const char *mode;
    const char *top_id;

+    bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
+                               false, errp);
+    if (!bs->file) {
+        return -EINVAL;
+    }
+
    ret = -EINVAL;
    opts = qemu_opts_create(&replication_runtime_opts, NULL, 0, &error_abort);
    qemu_opts_absorb_qdict(opts, options, &local_err);
@@ -149,6 +155,18 @@ static void replication_close(BlockDriverState *bs)
    replication_remove(s->rs);
 }

+static void replication_child_perm(BlockDriverState *bs, BdrvChild *c,
+                                   const BdrvChildRole *role,
+                                   uint64_t perm, uint64_t shared,
+                                   uint64_t *nperm, uint64_t *nshared)
+{
+    *nperm = *nshared = BLK_PERM_CONSISTENT_READ \
+                        | BLK_PERM_WRITE \
+                        | BLK_PERM_WRITE_UNCHANGED;
+
+    return;
+}
+
 static int64_t replication_getlength(BlockDriverState *bs)
 {
    return bdrv_getlength(bs->file->bs);
@@ -638,7 +656,7 @@ static void replication_stop(ReplicationState *rs, bool failover, Error **errp)
        s->replication_state = BLOCK_REPLICATION_FAILOVER;
        commit_active_start(NULL, s->active_disk->bs, s->secondary_disk->bs,
                            BLOCK_JOB_INTERNAL, 0, BLOCKDEV_ON_ERROR_REPORT,
-                            replication_done, bs, errp, true);
+                            NULL, replication_done, bs, errp, true);
        break;
    default:
        aio_context_release(aio_context);
@@ -654,6 +672,7 @@ BlockDriver bdrv_replication = {

    .bdrv_open                  = replication_open,
    .bdrv_close                 = replication_close,
+    .bdrv_child_perm            = replication_child_perm,

    .bdrv_getlength             = replication_getlength,
    .bdrv_co_readv              = replication_co_readv,
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
--- a/block/ssh.c
+++ b/block/ssh.c
@@ -601,7 +601,15 @@ static InetSocketAddress *ssh_config(QDict *options, Error **errp)
        goto out;
    }

-    iv = qobject_input_visitor_new(crumpled_addr, true);
+    /*
+     * FIXME .numeric, .to, .ipv4 or .ipv6 don't work with -drive.
+     * .to doesn't matter, it's ignored anyway.
+     * That's because when @options come from -blockdev or
+     * blockdev_add, members are typed according to the QAPI schema,
+     * but when they come from -drive, they're all QString.  The
+     * visitor expects the former.
+     */
+    iv = qobject_input_visitor_new(crumpled_addr);
    visit_type_InetSocketAddress(iv, NULL, &inet, &local_error);
    if (local_error) {
        error_propagate(errp, local_error);
@@ -889,10 +897,14 @@ static void restart_coroutine(void *opaque)

    DPRINTF("co=%p", co);

-    qemu_coroutine_enter(co);
+    aio_co_wake(co);
 }

-static coroutine_fn void set_fd_handler(BDRVSSHState *s, BlockDriverState *bs)
+/* A non-blocking call returned EAGAIN, so yield, ensuring the
+ * handlers are set up so that we'll be rescheduled when there is an
+ * interesting event on the socket.
+ */
+static coroutine_fn void co_yield(BDRVSSHState *s, BlockDriverState *bs)
 {
    int r;
    IOHandler *rd_handler = NULL, *wr_handler = NULL;
@@ -912,25 +924,10 @@ static coroutine_fn void set_fd_handler(BDRVSSHState *s, BlockDriverState *bs)

    aio_set_fd_handler(bdrv_get_aio_context(bs), s->sock,
                       false, rd_handler, wr_handler, NULL, co);
-}
-
-static coroutine_fn void clear_fd_handler(BDRVSSHState *s,
-                                          BlockDriverState *bs)
-{
-    DPRINTF("s->sock=%d", s->sock);
-    aio_set_fd_handler(bdrv_get_aio_context(bs), s->sock,
-                       false, NULL, NULL, NULL, NULL);
-}
-
-/* A non-blocking call returned EAGAIN, so yield, ensuring the
- * handlers are set up so that we'll be rescheduled when there is an
- * interesting event on the socket.
- */
-static coroutine_fn void co_yield(BDRVSSHState *s, BlockDriverState *bs)
-{
-    set_fd_handler(s, bs);
    qemu_coroutine_yield();
-    clear_fd_handler(s, bs);
+    DPRINTF("s->sock=%d - back", s->sock);
+    aio_set_fd_handler(bdrv_get_aio_context(bs), s->sock, false,
+                       NULL, NULL, NULL, NULL);
 }

 /* SFTP has a function `libssh2_sftp_seek64' which seeks to a position
--- a/block/stream.c
+++ b/block/stream.c
@@ -68,6 +68,7 @@ static void stream_complete(BlockJob *job, void *opaque)
    StreamCompleteData *data = opaque;
    BlockDriverState *bs = blk_bs(job->blk);
    BlockDriverState *base = s->base;
+    Error *local_err = NULL;

    if (!block_job_is_cancelled(&s->common) && data->reached_end &&
        data->ret == 0) {
@@ -79,11 +80,19 @@ static void stream_complete(BlockJob *job, void *opaque)
            }
        }
        data->ret = bdrv_change_backing_file(bs, base_id, base_fmt);
-        bdrv_set_backing_hd(bs, base);
+        bdrv_set_backing_hd(bs, base, &local_err);
+        if (local_err) {
+            error_report_err(local_err);
+            data->ret = -EPERM;
+            goto out;
+        }
    }

+out:
    /* Reopen the image back in read-only mode if necessary */
    if (s->bs_flags != bdrv_get_flags(bs)) {
+        /* Give up write permissions before making it read-only */
+        blk_set_perm(job->blk, 0, BLK_PERM_ALL, &error_abort);
        bdrv_reopen(bs, s->bs_flags, NULL);
    }

@@ -229,25 +238,35 @@ void stream_start(const char *job_id, BlockDriverState *bs,
    BlockDriverState *iter;
    int orig_bs_flags;

-    s = block_job_create(job_id, &stream_job_driver, bs, speed,
-                         BLOCK_JOB_DEFAULT, NULL, NULL, errp);
-    if (!s) {
-        return;
-    }
-
    /* Make sure that the image is opened in read-write mode */
    orig_bs_flags = bdrv_get_flags(bs);
    if (!(orig_bs_flags & BDRV_O_RDWR)) {
        if (bdrv_reopen(bs, orig_bs_flags | BDRV_O_RDWR, errp) != 0) {
-            block_job_unref(&s->common);
            return;
        }
    }

-    /* Block all intermediate nodes between bs and base, because they
-     * will disappear from the chain after this operation */
+    /* Prevent concurrent jobs trying to modify the graph structure here, we
+     * already have our own plans. Also don't allow resize as the image size is
+     * queried only at the job start and then cached. */
+    s = block_job_create(job_id, &stream_job_driver, bs,
+                         BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED |
+                         BLK_PERM_GRAPH_MOD,
+                         BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED |
+                         BLK_PERM_WRITE,
+                         speed, BLOCK_JOB_DEFAULT, NULL, NULL, errp);
+    if (!s) {
+        goto fail;
+    }
+
+    /* Block all intermediate nodes between bs and base, because they will
+     * disappear from the chain after this operation. The streaming job reads
+     * every block only once, assuming that it doesn't change, so block writes
+     * and resizes. */
    for (iter = backing_bs(bs); iter && iter != base; iter = backing_bs(iter)) {
-        block_job_add_bdrv(&s->common, iter);
+        block_job_add_bdrv(&s->common, "intermediate node", iter, 0,
+                           BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED,
+                           &error_abort);
    }

    s->base = base;
@@ -257,4 +276,10 @@ void stream_start(const char *job_id, BlockDriverState *bs,
    s->on_error = on_error;
    trace_stream_start(bs, base, s);
    block_job_start(&s->common);
+    return;
+
+fail:
+    if (orig_bs_flags != bdrv_get_flags(bs)) {
+        bdrv_reopen(bs, s->bs_flags, NULL);
+    }
 }
--- a/block/throttle-groups.c
+++ b/block/throttle-groups.c
@@ -326,7 +326,7 @@ void coroutine_fn throttle_group_co_io_limits_intercept(BlockBackend *blk,
    if (must_wait || blkp->pending_reqs[is_write]) {
        blkp->pending_reqs[is_write]++;
        qemu_mutex_unlock(&tg->lock);
-        qemu_co_queue_wait(&blkp->throttled_reqs[is_write]);
+        qemu_co_queue_wait(&blkp->throttled_reqs[is_write], NULL);
        qemu_mutex_lock(&tg->lock);
        blkp->pending_reqs[is_write]--;
    }
@@ -416,7 +416,9 @@ static void timer_cb(BlockBackend *blk, bool is_write)
    qemu_mutex_unlock(&tg->lock);

    /* Run the request that was waiting for this timer */
+    aio_context_acquire(blk_get_aio_context(blk));
    empty_queue = !qemu_co_enter_next(&blkp->throttled_reqs[is_write]);
+    aio_context_release(blk_get_aio_context(blk));

    /* If the request queue was empty then we have to take care of
     * scheduling the next one */
--- a/block/trace-events
+++ b/block/trace-events
@@ -35,8 +35,6 @@ mirror_one_iteration(void *s, int64_t sector_num, int nb_sectors) "s %p sector_n
 mirror_iteration_done(void *s, int64_t sector_num, int nb_sectors, int ret) "s %p sector_num %"PRId64" nb_sectors %d ret %d"
 mirror_yield(void *s, int64_t cnt, int buf_free_count, int in_flight) "s %p dirty count %"PRId64" free buffers %d in_flight %d"
 mirror_yield_in_flight(void *s, int64_t sector_num, int in_flight) "s %p sector_num %"PRId64" in_flight %d"
-mirror_yield_buf_busy(void *s, int nb_chunks, int in_flight) "s %p requested chunks %d in_flight %d"
-mirror_break_buf_busy(void *s, int nb_chunks, int in_flight) "s %p requested chunks %d in_flight %d"

 # block/backup.c
 backup_do_cow_enter(void *job, int64_t start, int64_t sector_num, int nb_sectors) "job %p start %"PRId64" sector_num %"PRId64" nb_sectors %d"
--- a/block/vdi.c
+++ b/block/vdi.c
@@ -363,6 +363,12 @@ static int vdi_open(BlockDriverState *bs, QDict *options, int flags,
    int ret;
    Error *local_err = NULL;

+    bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
+                               false, errp);
+    if (!bs->file) {
+        return -EINVAL;
+    }
+
    logout("\n");

    ret = bdrv_read(bs->file, 0, (uint8_t *)&header, 1);
@@ -757,7 +763,8 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp)
    }

    blk = blk_new_open(filename, NULL, NULL,
-                       BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
+                       BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
+                       &local_err);
    if (blk == NULL) {
        error_propagate(errp, local_err);
        ret = -EIO;
@@ -885,6 +892,7 @@ static BlockDriver bdrv_vdi = {
    .bdrv_open = vdi_open,
    .bdrv_close = vdi_close,
    .bdrv_reopen_prepare = vdi_reopen_prepare,
+    .bdrv_child_perm          = bdrv_format_default_perms,
    .bdrv_create = vdi_create,
    .bdrv_has_zero_init = bdrv_has_zero_init_1,
    .bdrv_co_get_block_status = vdi_co_get_block_status,
--- a/block/vhdx-log.c
+++ b/block/vhdx-log.c
@@ -548,7 +548,7 @@ static int vhdx_log_flush(BlockDriverState *bs, BDRVVHDXState *s,
            if (new_file_size % (1024*1024)) {
                /* round up to nearest 1MB boundary */
                new_file_size = ((new_file_size >> 20) + 1) << 20;
-                bdrv_truncate(bs->file->bs, new_file_size);
+                bdrv_truncate(bs->file, new_file_size);
            }
        }
        qemu_vfree(desc_entries);
--- a/block/vhdx.c
+++ b/block/vhdx.c
@@ -898,6 +898,12 @@ static int vhdx_open(BlockDriverState *bs, QDict *options, int flags,
    uint64_t signature;
    Error *local_err = NULL;

+    bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
+                               false, errp);
+    if (!bs->file) {
+        return -EINVAL;
+    }
+
    s->bat = NULL;
    s->first_visible_write = true;

@@ -1165,7 +1171,7 @@ static int vhdx_allocate_block(BlockDriverState *bs, BDRVVHDXState *s,
    /* per the spec, the address for a block is in units of 1MB */
    *new_offset = ROUND_UP(*new_offset, 1024 * 1024);

-    return bdrv_truncate(bs->file->bs, *new_offset + s->block_size);
+    return bdrv_truncate(bs->file, *new_offset + s->block_size);
 }

 /*
@@ -1853,7 +1859,8 @@ static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp)
    }

    blk = blk_new_open(filename, NULL, NULL,
-                       BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
+                       BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
+                       &local_err);
    if (blk == NULL) {
        error_propagate(errp, local_err);
        ret = -EIO;
@@ -1977,6 +1984,7 @@ static BlockDriver bdrv_vhdx = {
    .bdrv_open              = vhdx_open,
    .bdrv_close             = vhdx_close,
    .bdrv_reopen_prepare    = vhdx_reopen_prepare,
+    .bdrv_child_perm        = bdrv_format_default_perms,
    .bdrv_co_readv          = vhdx_co_readv,
    .bdrv_co_writev         = vhdx_co_writev,
    .bdrv_create            = vhdx_create,
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -943,6 +943,12 @@ static int vmdk_open(BlockDriverState *bs, QDict *options, int flags,
    uint32_t magic;
    Error *local_err = NULL;

+    bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
+                               false, errp);
+    if (!bs->file) {
+        return -EINVAL;
+    }
+
    buf = vmdk_read_desc(bs->file, 0, errp);
    if (!buf) {
        return -EINVAL;
@@ -1361,8 +1367,8 @@ static int vmdk_write_extent(VmdkExtent *extent, int64_t cluster_offset,
            goto out;
        }

-        data->lba = offset >> BDRV_SECTOR_BITS;
-        data->size = buf_len;
+        data->lba = cpu_to_le64(offset >> BDRV_SECTOR_BITS);
+        data->size = cpu_to_le32(buf_len);

        n_bytes = buf_len + sizeof(VmdkGrainMarker);
        iov = (struct iovec) {
@@ -1697,7 +1703,8 @@ static int vmdk_create_extent(const char *filename, int64_t filesize,
    }

    blk = blk_new_open(filename, NULL, NULL,
-                       BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
+                       BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
+                       &local_err);
    if (blk == NULL) {
        error_propagate(errp, local_err);
        ret = -EIO;
@@ -2065,7 +2072,8 @@ static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp)
    }

    new_blk = blk_new_open(filename, NULL, NULL,
-                           BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
+                           BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
+                           &local_err);
    if (new_blk == NULL) {
        error_propagate(errp, local_err);
        ret = -EIO;
@@ -2353,6 +2361,7 @@ static BlockDriver bdrv_vmdk = {
    .bdrv_open                    = vmdk_open,
    .bdrv_check                   = vmdk_check,
    .bdrv_reopen_prepare          = vmdk_reopen_prepare,
+    .bdrv_child_perm              = bdrv_format_default_perms,
    .bdrv_co_preadv               = vmdk_co_preadv,
    .bdrv_co_pwritev              = vmdk_co_pwritev,
    .bdrv_co_pwritev_compressed   = vmdk_co_pwritev_compressed,
--- a/block/vpc.c
+++ b/block/vpc.c
@@ -220,6 +220,12 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
    int disk_type = VHD_DYNAMIC;
    int ret;

+    bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
+                               false, errp);
+    if (!bs->file) {
+        return -EINVAL;
+    }
+
    opts = qemu_opts_create(&vpc_runtime_opts, NULL, 0, &error_abort);
    qemu_opts_absorb_qdict(opts, options, &local_err);
    if (local_err) {
@@ -909,7 +915,8 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp)
    }

    blk = blk_new_open(filename, NULL, NULL,
-                       BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
+                       BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
+                       &local_err);
    if (blk == NULL) {
        error_propagate(errp, local_err);
        ret = -EIO;
@@ -1061,6 +1068,7 @@ static BlockDriver bdrv_vpc = {
    .bdrv_open              = vpc_open,
    .bdrv_close             = vpc_close,
    .bdrv_reopen_prepare    = vpc_reopen_prepare,
+    .bdrv_child_perm        = bdrv_format_default_perms,
    .bdrv_create            = vpc_create,

    .bdrv_co_preadv             = vpc_co_preadv,
--- a/block/vvfat.c
+++ b/block/vvfat.c
@@ -1394,7 +1394,13 @@ static int vvfat_read(BlockDriverState *bs, int64_t sector_num,
 	   return -1;
 	if (s->qcow) {
 	    int n;
-            if (bdrv_is_allocated(s->qcow->bs, sector_num, nb_sectors-i, &n)) {
+            int ret;
+            ret = bdrv_is_allocated(s->qcow->bs, sector_num,
+                                    nb_sectors - i, &n);
+            if (ret < 0) {
+                return ret;
+            }
+            if (ret) {
                DLOG(fprintf(stderr, "sectors %d+%d allocated\n",
                             (int)sector_num, n));
                if (bdrv_read(s->qcow, sector_num, buf + i * 0x200, n)) {
@@ -1668,7 +1674,8 @@ static inline uint32_t modified_fat_get(BDRVVVFATState* s,
    }
 }

-static inline int cluster_was_modified(BDRVVVFATState* s, uint32_t cluster_num)
+static inline bool cluster_was_modified(BDRVVVFATState *s,
+                                        uint32_t cluster_num)
 {
    int was_modified = 0;
    int i, dummy;
@@ -1683,7 +1690,13 @@ static inline int cluster_was_modified(BDRVVVFATState* s, uint32_t cluster_num)
                                         1, &dummy);
    }

-    return was_modified;
+    /*
+     * Note that this treats failures to learn allocation status the
+     * same as if an allocation has occurred.  It's as safe as
+     * anything else, given that a failure to learn allocation status
+     * will probably result in more failures.
+     */
+    return !!was_modified;
 }

 static const char* get_basename(const char* path)
@@ -1833,6 +1846,9 @@ static uint32_t get_cluster_count_for_direntry(BDRVVVFATState* s,
                    int res;

                    res = bdrv_is_allocated(s->qcow->bs, offset + i, 1, &dummy);
+                    if (res < 0) {
+                        return -1;
+                    }
                    if (!res) {
                        res = vvfat_read(s->bs, offset, s->cluster_buffer, 1);
                        if (res) {
@@ -2968,6 +2984,7 @@ static void write_target_close(BlockDriverState *bs) {

 static BlockDriver vvfat_write_target = {
    .format_name        = "vvfat_write_target",
+    .instance_size      = sizeof(void*),
    .bdrv_co_pwritev    = write_target_commit,
    .bdrv_close         = write_target_close,
 };
@@ -3036,13 +3053,12 @@ static int enable_write_target(BlockDriverState *bs, Error **errp)
    unlink(s->qcow_filename);
 #endif

-    backing = bdrv_new();
-    bdrv_set_backing_hd(s->bs, backing);
-    bdrv_unref(backing);
+    backing = bdrv_new_open_driver(&vvfat_write_target, NULL, BDRV_O_ALLOW_RDWR,
+                                   &error_abort);
+    *(void**) backing->opaque = s;

-    s->bs->backing->bs->drv = &vvfat_write_target;
-    s->bs->backing->bs->opaque = g_new(void *, 1);
-    *(void**)s->bs->backing->bs->opaque = s;
+    bdrv_set_backing_hd(s->bs, backing, &error_abort);
+    bdrv_unref(backing);

    return 0;

@@ -3052,6 +3068,27 @@ err:
    return ret;
 }

+static void vvfat_child_perm(BlockDriverState *bs, BdrvChild *c,
+                             const BdrvChildRole *role,
+                             uint64_t perm, uint64_t shared,
+                             uint64_t *nperm, uint64_t *nshared)
+{
+    BDRVVVFATState *s = bs->opaque;
+
+    assert(c == s->qcow || role == &child_backing);
+
+    if (c == s->qcow) {
+        /* This is a private node, nobody should try to attach to it */
+        *nperm = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE;
+        *nshared = BLK_PERM_WRITE_UNCHANGED;
+    } else {
+        /* The backing file is there so 'commit' can use it. vvfat doesn't
+         * access it in any way. */
+        *nperm = 0;
+        *nshared = BLK_PERM_ALL;
+    }
+}
+
 static void vvfat_close(BlockDriverState *bs)
 {
    BDRVVVFATState *s = bs->opaque;
@@ -3077,6 +3114,7 @@ static BlockDriver bdrv_vvfat = {
    .bdrv_file_open         = vvfat_open,
    .bdrv_refresh_limits    = vvfat_refresh_limits,
    .bdrv_close             = vvfat_close,
+    .bdrv_child_perm        = vvfat_child_perm,

    .bdrv_co_preadv         = vvfat_co_preadv,
    .bdrv_co_pwritev        = vvfat_co_pwritev,
--- a/block/win32-aio.c
+++ b/block/win32-aio.c
@@ -41,7 +41,7 @@ struct QEMUWin32AIOState {
    HANDLE hIOCP;
    EventNotifier e;
    int count;
-    bool is_aio_context_attached;
+    AioContext *aio_ctx;
 };

 typedef struct QEMUWin32AIOCB {
@@ -87,7 +87,6 @@ static void win32_aio_process_completion(QEMUWin32AIOState *s,
        qemu_vfree(waiocb->buf);
    }

-
    waiocb->common.cb(waiocb->common.opaque, ret);
    qemu_aio_unref(waiocb);
 }
@@ -176,13 +175,13 @@ void win32_aio_detach_aio_context(QEMUWin32AIOState *aio,
                                  AioContext *old_context)
 {
    aio_set_event_notifier(old_context, &aio->e, false, NULL, NULL);
-    aio->is_aio_context_attached = false;
+    aio->aio_ctx = NULL;
 }

 void win32_aio_attach_aio_context(QEMUWin32AIOState *aio,
                                  AioContext *new_context)
 {
-    aio->is_aio_context_attached = true;
+    aio->aio_ctx = new_context;
    aio_set_event_notifier(new_context, &aio->e, false,
                           win32_aio_completion_cb, NULL);
 }
@@ -212,7 +211,7 @@ out_free_state:

 void win32_aio_cleanup(QEMUWin32AIOState *aio)
 {
-    assert(!aio->is_aio_context_attached);
+    assert(!aio->aio_ctx);
    CloseHandle(aio->hIOCP);
    event_notifier_cleanup(&aio->e);
    g_free(aio);
--- a/blockdev-nbd.c
+++ b/blockdev-nbd.c
@@ -16,7 +16,6 @@
 #include "qapi/qmp/qerror.h"
 #include "sysemu/sysemu.h"
 #include "qmp-commands.h"
-#include "trace.h"
 #include "block/nbd.h"
 #include "io/channel-socket.h"

@@ -125,6 +124,7 @@ void qmp_nbd_server_start(SocketAddress *addr,
            goto error;
        }

+        /* TODO SOCKET_ADDRESS_KIND_FD where fd has AF_INET or AF_INET6 */
        if (addr->type != SOCKET_ADDRESS_KIND_INET) {
            error_setg(errp, "TLS is only supported with IPv4/IPv6");
            goto error;
--- a/blockdev.c
+++ b/blockdev.c
@@ -48,10 +48,11 @@
 #include "sysemu/sysemu.h"
 #include "block/block_int.h"
 #include "qmp-commands.h"
-#include "trace.h"
+#include "block/trace.h"
 #include "sysemu/arch_init.h"
 #include "qemu/cutils.h"
 #include "qemu/help_option.h"
+#include "qemu/throttle-options.h"

 static QTAILQ_HEAD(, BlockDriverState) monitor_bdrv_states =
    QTAILQ_HEAD_INITIALIZER(monitor_bdrv_states);
@@ -227,27 +228,30 @@ DriveInfo *drive_get(BlockInterfaceType type, int bus, int unit)
    return NULL;
 }

-bool drive_check_orphaned(void)
+void drive_check_orphaned(void)
 {
    BlockBackend *blk;
    DriveInfo *dinfo;
-    bool rs = false;
+    Location loc;
+    bool orphans = false;

    for (blk = blk_next(NULL); blk; blk = blk_next(blk)) {
        dinfo = blk_legacy_dinfo(blk);
-        /* If dinfo->bdrv->dev is NULL, it has no device attached. */
-        /* Unless this is a default drive, this may be an oversight. */
        if (!blk_get_attached_dev(blk) && !dinfo->is_default &&
            dinfo->type != IF_NONE) {
-            fprintf(stderr, "Warning: Orphaned drive without device: "
-                    "id=%s,file=%s,if=%s,bus=%d,unit=%d\n",
-                    blk_name(blk), blk_bs(blk) ? blk_bs(blk)->filename : "",
-                    if_name[dinfo->type], dinfo->bus, dinfo->unit);
-            rs = true;
+            loc_push_none(&loc);
+            qemu_opts_loc_restore(dinfo->opts);
+            error_report("machine type does not support"
+                         " if=%s,bus=%d,unit=%d",
+                         if_name[dinfo->type], dinfo->bus, dinfo->unit);
+            loc_pop(&loc);
+            orphans = true;
        }
    }

-    return rs;
+    if (orphans) {
+        exit(1);
+    }
 }

 DriveInfo *drive_get_by_index(BlockInterfaceType type, int index)
@@ -554,7 +558,7 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts,
    if ((!file || !*file) && !qdict_size(bs_opts)) {
        BlockBackendRootState *blk_rs;

-        blk = blk_new();
+        blk = blk_new(0, BLK_PERM_ALL);
        blk_rs = blk_get_root_state(blk);
        blk_rs->open_flags    = bdrv_flags;
        blk_rs->read_only     = read_only;
@@ -1610,6 +1614,7 @@ typedef struct ExternalSnapshotState {
    BlockDriverState *old_bs;
    BlockDriverState *new_bs;
    AioContext *aio_context;
+    bool overlay_appended;
 } ExternalSnapshotState;

 static void external_snapshot_prepare(BlkActionState *common,
@@ -1764,7 +1769,19 @@ static void external_snapshot_prepare(BlkActionState *common,

    if (!state->new_bs->drv->supports_backing) {
        error_setg(errp, "The snapshot does not support backing images");
+        return;
    }
+
+    /* This removes our old bs and adds the new bs. This is an operation that
+     * can fail, so we need to do it in .prepare; undoing it for abort is
+     * always possible. */
+    bdrv_ref(state->new_bs);
+    bdrv_append(state->new_bs, state->old_bs, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+    state->overlay_appended = true;
 }

 static void external_snapshot_commit(BlkActionState *common)
@@ -1774,8 +1791,6 @@ static void external_snapshot_commit(BlkActionState *common)

    bdrv_set_aio_context(state->new_bs, state->aio_context);

-    /* This removes our old bs and adds the new bs */
-    bdrv_append(state->new_bs, state->old_bs);
    /* We don't need (or want) to use the transactional
     * bdrv_reopen_multiple() across all the entries at once, because we
     * don't want to abort all of them if one of them fails the reopen */
@@ -1790,7 +1805,9 @@ static void external_snapshot_abort(BlkActionState *common)
    ExternalSnapshotState *state =
                             DO_UPCAST(ExternalSnapshotState, common, common);
    if (state->new_bs) {
-        bdrv_unref(state->new_bs);
+        if (state->overlay_appended) {
+            bdrv_replace_node(state->new_bs, state->old_bs, &error_abort);
+        }
    }
 }

@@ -1801,6 +1818,7 @@ static void external_snapshot_clean(BlkActionState *common)
    if (state->aio_context) {
        bdrv_drained_end(state->old_bs);
        aio_context_release(state->aio_context);
+        bdrv_unref(state->new_bs);
    }
 }

@@ -2029,7 +2047,9 @@ static void block_dirty_bitmap_clear_abort(BlkActionState *common)
    BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
                                             common, common);

-    bdrv_undo_clear_dirty_bitmap(state->bitmap, state->backup);
+    if (state->backup) {
+        bdrv_undo_clear_dirty_bitmap(state->bitmap, state->backup);
+    }
 }

 static void block_dirty_bitmap_clear_commit(BlkActionState *common)
@@ -2307,7 +2327,7 @@ static int do_open_tray(const char *blk_name, const char *qdev_id,
    }

    if (!locked || force) {
-        blk_dev_change_media_cb(blk, false);
+        blk_dev_change_media_cb(blk, false, &error_abort);
    }

    if (locked && !force) {
@@ -2345,6 +2365,7 @@ void qmp_blockdev_close_tray(bool has_device, const char *device,
                             Error **errp)
 {
    BlockBackend *blk;
+    Error *local_err = NULL;

    device = has_device ? device : NULL;
    id = has_id ? id : NULL;
@@ -2368,7 +2389,11 @@ void qmp_blockdev_close_tray(bool has_device, const char *device,
        return;
    }

-    blk_dev_change_media_cb(blk, true);
+    blk_dev_change_media_cb(blk, true, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
 }

 void qmp_x_blockdev_remove_medium(bool has_device, const char *device,
@@ -2421,7 +2446,7 @@ void qmp_x_blockdev_remove_medium(bool has_device, const char *device,
         * called at all); therefore, the medium needs to be ejected here.
         * Do it after blk_remove_bs() so blk_is_inserted(blk) returns the @load
         * value passed here (i.e. false). */
-        blk_dev_change_media_cb(blk, false);
+        blk_dev_change_media_cb(blk, false, &error_abort);
    }

 out:
@@ -2431,7 +2456,9 @@ out:
 static void qmp_blockdev_insert_anon_medium(BlockBackend *blk,
                                            BlockDriverState *bs, Error **errp)
 {
+    Error *local_err = NULL;
    bool has_device;
+    int ret;

    /* For BBs without a device, we can exchange the BDS tree at will */
    has_device = blk_get_attached_dev(blk);
@@ -2451,7 +2478,10 @@ static void qmp_blockdev_insert_anon_medium(BlockBackend *blk,
        return;
    }

-    blk_insert_bs(blk, bs);
+    ret = blk_insert_bs(blk, bs, errp);
+    if (ret < 0) {
+        return;
+    }

    if (!blk_dev_has_tray(blk)) {
        /* For tray-less devices, blockdev-close-tray is a no-op (or may not be
@@ -2459,7 +2489,12 @@ static void qmp_blockdev_insert_anon_medium(BlockBackend *blk,
         * slot here.
         * Do it after blk_insert_bs() so blk_is_inserted(blk) returns the @load
         * value passed here (i.e. true). */
-        blk_dev_change_media_cb(blk, true);
+        blk_dev_change_media_cb(blk, true, &local_err);
+        if (local_err) {
+            error_propagate(errp, local_err);
+            blk_remove_bs(blk);
+            return;
+        }
    }
 }

@@ -2800,7 +2835,7 @@ void hmp_drive_del(Monitor *mon, const QDict *qdict)

    bs = bdrv_find_node(id);
    if (bs) {
-        qmp_x_blockdev_del(id, &local_err);
+        qmp_blockdev_del(id, &local_err);
        if (local_err) {
            error_report_err(local_err);
        }
@@ -2855,6 +2890,7 @@ void qmp_block_resize(bool has_device, const char *device,
                      int64_t size, Error **errp)
 {
    Error *local_err = NULL;
+    BlockBackend *blk = NULL;
    BlockDriverState *bs;
    AioContext *aio_context;
    int ret;
@@ -2885,10 +2921,16 @@ void qmp_block_resize(bool has_device, const char *device,
        goto out;
    }

+    blk = blk_new(BLK_PERM_RESIZE, BLK_PERM_ALL);
+    ret = blk_insert_bs(blk, bs, errp);
+    if (ret < 0) {
+        goto out;
+    }
+
    /* complete all in-flight operations before resizing the device */
    bdrv_drain_all();

-    ret = bdrv_truncate(bs, size);
+    ret = blk_truncate(blk, size);
    switch (ret) {
    case 0:
        break;
@@ -2910,6 +2952,7 @@ void qmp_block_resize(bool has_device, const char *device,
    }

 out:
+    blk_unref(blk);
    aio_context_release(aio_context);
 }

@@ -3005,6 +3048,7 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device,
                      bool has_top, const char *top,
                      bool has_backing_file, const char *backing_file,
                      bool has_speed, int64_t speed,
+                      bool has_filter_node_name, const char *filter_node_name,
                      Error **errp)
 {
    BlockDriverState *bs;
@@ -3020,6 +3064,9 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device,
    if (!has_speed) {
        speed = 0;
    }
+    if (!has_filter_node_name) {
+        filter_node_name = NULL;
+    }

    /* Important Note:
     *  libvirt relies on the DeviceNotFound error class in order to probe for
@@ -3094,8 +3141,8 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device,
            goto out;
        }
        commit_active_start(has_job_id ? job_id : NULL, bs, base_bs,
-                            BLOCK_JOB_DEFAULT, speed, on_error, NULL, NULL,
-                            &local_err, false);
+                            BLOCK_JOB_DEFAULT, speed, on_error,
+                            filter_node_name, NULL, NULL, &local_err, false);
    } else {
        BlockDriverState *overlay_bs = bdrv_find_overlay(bs, top_bs);
        if (bdrv_op_is_blocked(overlay_bs, BLOCK_OP_TYPE_COMMIT_TARGET, errp)) {
@@ -3103,7 +3150,7 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device,
        }
        commit_start(has_job_id ? job_id : NULL, bs, base_bs, top_bs, speed,
                     on_error, has_backing_file ? backing_file : NULL,
-                     &local_err);
+                     filter_node_name, &local_err);
    }
    if (local_err != NULL) {
        error_propagate(errp, local_err);
@@ -3339,6 +3386,8 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
                                   bool has_on_target_error,
                                   BlockdevOnError on_target_error,
                                   bool has_unmap, bool unmap,
+                                   bool has_filter_node_name,
+                                   const char *filter_node_name,
                                   Error **errp)
 {

@@ -3360,6 +3409,9 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
    if (!has_unmap) {
        unmap = true;
    }
+    if (!has_filter_node_name) {
+        filter_node_name = NULL;
+    }

    if (granularity != 0 && (granularity < 512 || granularity > 1048576 * 64)) {
        error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "granularity",
@@ -3389,7 +3441,8 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
    mirror_start(job_id, bs, target,
                 has_replaces ? replaces : NULL,
                 speed, granularity, buf_size, sync, backing_mode,
-                 on_source_error, on_target_error, unmap, errp);
+                 on_source_error, on_target_error, unmap, filter_node_name,
+                 errp);
 }

 void qmp_drive_mirror(DriveMirror *arg, Error **errp)
@@ -3527,6 +3580,7 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
                           arg->has_on_source_error, arg->on_source_error,
                           arg->has_on_target_error, arg->on_target_error,
                           arg->has_unmap, arg->unmap,
+                           false, NULL,
                           &local_err);
    bdrv_unref(target_bs);
    error_propagate(errp, local_err);
@@ -3545,6 +3599,8 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id,
                         BlockdevOnError on_source_error,
                         bool has_on_target_error,
                         BlockdevOnError on_target_error,
+                         bool has_filter_node_name,
+                         const char *filter_node_name,
                         Error **errp)
 {
    BlockDriverState *bs;
@@ -3576,6 +3632,7 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id,
                           has_on_source_error, on_source_error,
                           has_on_target_error, on_target_error,
                           true, true,
+                           has_filter_node_name, filter_node_name,
                           &local_err);
    error_propagate(errp, local_err);

@@ -3843,7 +3900,7 @@ fail:
    visit_free(v);
 }

-void qmp_x_blockdev_del(const char *node_name, Error **errp)
+void qmp_blockdev_del(const char *node_name, Error **errp)
 {
    AioContext *aio_context;
    BlockDriverState *bs;
@@ -3999,83 +4056,11 @@ QemuOptsList qemu_common_drive_opts = {
            .name = BDRV_OPT_READ_ONLY,
            .type = QEMU_OPT_BOOL,
            .help = "open drive file as read-only",
-        },{
-            .name = "throttling.iops-total",
-            .type = QEMU_OPT_NUMBER,
-            .help = "limit total I/O operations per second",
-        },{
-            .name = "throttling.iops-read",
-            .type = QEMU_OPT_NUMBER,
-            .help = "limit read operations per second",
-        },{
-            .name = "throttling.iops-write",
-            .type = QEMU_OPT_NUMBER,
-            .help = "limit write operations per second",
-        },{
-            .name = "throttling.bps-total",
-            .type = QEMU_OPT_NUMBER,
-            .help = "limit total bytes per second",
-        },{
-            .name = "throttling.bps-read",
-            .type = QEMU_OPT_NUMBER,
-            .help = "limit read bytes per second",
-        },{
-            .name = "throttling.bps-write",
-            .type = QEMU_OPT_NUMBER,
-            .help = "limit write bytes per second",
-        },{
-            .name = "throttling.iops-total-max",
-            .type = QEMU_OPT_NUMBER,
-            .help = "I/O operations burst",
-        },{
-            .name = "throttling.iops-read-max",
-            .type = QEMU_OPT_NUMBER,
-            .help = "I/O operations read burst",
-        },{
-            .name = "throttling.iops-write-max",
-            .type = QEMU_OPT_NUMBER,
-            .help = "I/O operations write burst",
-        },{
-            .name = "throttling.bps-total-max",
-            .type = QEMU_OPT_NUMBER,
-            .help = "total bytes burst",
-        },{
-            .name = "throttling.bps-read-max",
-            .type = QEMU_OPT_NUMBER,
-            .help = "total bytes read burst",
-        },{
-            .name = "throttling.bps-write-max",
-            .type = QEMU_OPT_NUMBER,
-            .help = "total bytes write burst",
-        },{
-            .name = "throttling.iops-total-max-length",
-            .type = QEMU_OPT_NUMBER,
-            .help = "length of the iops-total-max burst period, in seconds",
-        },{
-            .name = "throttling.iops-read-max-length",
-            .type = QEMU_OPT_NUMBER,
-            .help = "length of the iops-read-max burst period, in seconds",
-        },{
-            .name = "throttling.iops-write-max-length",
-            .type = QEMU_OPT_NUMBER,
-            .help = "length of the iops-write-max burst period, in seconds",
-        },{
-            .name = "throttling.bps-total-max-length",
-            .type = QEMU_OPT_NUMBER,
-            .help = "length of the bps-total-max burst period, in seconds",
-        },{
-            .name = "throttling.bps-read-max-length",
-            .type = QEMU_OPT_NUMBER,
-            .help = "length of the bps-read-max burst period, in seconds",
-        },{
-            .name = "throttling.bps-write-max-length",
-            .type = QEMU_OPT_NUMBER,
-            .help = "length of the bps-write-max burst period, in seconds",
-        },{
-            .name = "throttling.iops-size",
-            .type = QEMU_OPT_NUMBER,
-            .help = "when limiting by iops max size of an I/O in bytes",
-        },{
+        },
+
+        THROTTLE_OPTS,
+
+        {
            .name = "throttling.group",
            .type = QEMU_OPT_STRING,
            .help = "name of the block throttling group",
--- a/blockjob.c
+++ b/blockjob.c
@@ -25,7 +25,6 @@

 #include "qemu/osdep.h"
 #include "qemu-common.h"
-#include "trace.h"
 #include "block/block.h"
 #include "block/blockjob_int.h"
 #include "block/block_int.h"
@@ -56,6 +55,36 @@ struct BlockJobTxn {

 static QLIST_HEAD(, BlockJob) block_jobs = QLIST_HEAD_INITIALIZER(block_jobs);

+static char *child_job_get_parent_desc(BdrvChild *c)
+{
+    BlockJob *job = c->opaque;
+    return g_strdup_printf("%s job '%s'",
+                           BlockJobType_lookup[job->driver->job_type],
+                           job->id);
+}
+
+static const BdrvChildRole child_job = {
+    .get_parent_desc    = child_job_get_parent_desc,
+    .stay_at_node       = true,
+};
+
+static void block_job_drained_begin(void *opaque)
+{
+    BlockJob *job = opaque;
+    block_job_pause(job);
+}
+
+static void block_job_drained_end(void *opaque)
+{
+    BlockJob *job = opaque;
+    block_job_resume(job);
+}
+
+static const BlockDevOps block_job_dev_ops = {
+    .drained_begin = block_job_drained_begin,
+    .drained_end = block_job_drained_end,
+};
+
 BlockJob *block_job_next(BlockJob *job)
 {
    if (!job) {
@@ -116,19 +145,44 @@ static void block_job_detach_aio_context(void *opaque)
    block_job_unref(job);
 }

-void block_job_add_bdrv(BlockJob *job, BlockDriverState *bs)
+void block_job_remove_all_bdrv(BlockJob *job)
 {
-    job->nodes = g_slist_prepend(job->nodes, bs);
+    GSList *l;
+    for (l = job->nodes; l; l = l->next) {
+        BdrvChild *c = l->data;
+        bdrv_op_unblock_all(c->bs, job->blocker);
+        bdrv_root_unref_child(c);
+    }
+    g_slist_free(job->nodes);
+    job->nodes = NULL;
+}
+
+int block_job_add_bdrv(BlockJob *job, const char *name, BlockDriverState *bs,
+                       uint64_t perm, uint64_t shared_perm, Error **errp)
+{
+    BdrvChild *c;
+
+    c = bdrv_root_attach_child(bs, name, &child_job, perm, shared_perm,
+                               job, errp);
+    if (c == NULL) {
+        return -EPERM;
+    }
+
+    job->nodes = g_slist_prepend(job->nodes, c);
    bdrv_ref(bs);
    bdrv_op_block_all(bs, job->blocker);
+
+    return 0;
 }

 void *block_job_create(const char *job_id, const BlockJobDriver *driver,
-                       BlockDriverState *bs, int64_t speed, int flags,
+                       BlockDriverState *bs, uint64_t perm,
+                       uint64_t shared_perm, int64_t speed, int flags,
                       BlockCompletionFunc *cb, void *opaque, Error **errp)
 {
    BlockBackend *blk;
    BlockJob *job;
+    int ret;

    if (bs->job) {
        error_setg(errp, QERR_DEVICE_IN_USE, bdrv_get_device_name(bs));
@@ -160,15 +214,14 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver,
        }
    }

-    blk = blk_new();
-    blk_insert_bs(blk, bs);
+    blk = blk_new(perm, shared_perm);
+    ret = blk_insert_bs(blk, bs, errp);
+    if (ret < 0) {
+        blk_unref(blk);
+        return NULL;
+    }

    job = g_malloc0(driver->instance_size);
-    error_setg(&job->blocker, "block device is in use by block job: %s",
-               BlockJobType_lookup[driver->job_type]);
-    block_job_add_bdrv(job, bs);
-    bdrv_op_unblock(bs, BLOCK_OP_TYPE_DATAPLANE, job->blocker);
-
    job->driver        = driver;
    job->id            = g_strdup(job_id);
    job->blk           = blk;
@@ -178,8 +231,15 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver,
    job->paused        = true;
    job->pause_count   = 1;
    job->refcnt        = 1;
+
+    error_setg(&job->blocker, "block device is in use by block job: %s",
+               BlockJobType_lookup[driver->job_type]);
+    block_job_add_bdrv(job, "main node", bs, 0, BLK_PERM_ALL, &error_abort);
    bs->job = job;

+    blk_set_dev_ops(blk, &block_job_dev_ops, job);
+    bdrv_op_unblock(bs, BLOCK_OP_TYPE_DATAPLANE, job->blocker);
+
    QLIST_INSERT_HEAD(&block_jobs, job, job_list);

    blk_add_aio_context_notifier(blk, block_job_attached_aio_context,
@@ -209,16 +269,28 @@ static bool block_job_started(BlockJob *job)
    return job->co;
 }

+/**
+ * All jobs must allow a pause point before entering their job proper. This
+ * ensures that jobs can be paused prior to being started, then resumed later.
+ */
+static void coroutine_fn block_job_co_entry(void *opaque)
+{
+    BlockJob *job = opaque;
+
+    assert(job && job->driver && job->driver->start);
+    block_job_pause_point(job);
+    job->driver->start(job);
+}
+
 void block_job_start(BlockJob *job)
 {
    assert(job && !block_job_started(job) && job->paused &&
-           !job->busy && job->driver->start);
-    job->co = qemu_coroutine_create(job->driver->start, job);
-    if (--job->pause_count == 0) {
-        job->paused = false;
-        job->busy = true;
-        qemu_coroutine_enter(job->co);
-    }
+           job->driver && job->driver->start);
+    job->co = qemu_coroutine_create(block_job_co_entry, job);
+    job->pause_count--;
+    job->busy = true;
+    job->paused = false;
+    qemu_coroutine_enter(job->co);
 }

 void block_job_ref(BlockJob *job)
@@ -229,15 +301,9 @@ void block_job_ref(BlockJob *job)
 void block_job_unref(BlockJob *job)
 {
    if (--job->refcnt == 0) {
-        GSList *l;
        BlockDriverState *bs = blk_bs(job->blk);
        bs->job = NULL;
-        for (l = job->nodes; l; l = l->next) {
-            bs = l->data;
-            bdrv_op_unblock_all(bs, job->blocker);
-            bdrv_unref(bs);
-        }
-        g_slist_free(job->nodes);
+        block_job_remove_all_bdrv(job);
        blk_remove_aio_context_notifier(job->blk,
                                        block_job_attached_aio_context,
                                        block_job_detach_aio_context, job);
@@ -720,12 +786,16 @@ static void block_job_defer_to_main_loop_bh(void *opaque)

    /* Fetch BDS AioContext again, in case it has changed */
    aio_context = blk_get_aio_context(data->job->blk);
-    aio_context_acquire(aio_context);
+    if (aio_context != data->aio_context) {
+        aio_context_acquire(aio_context);
+    }

    data->job->deferred_to_main_loop = false;
    data->fn(data->job, data->opaque);

-    aio_context_release(aio_context);
+    if (aio_context != data->aio_context) {
+        aio_context_release(aio_context);
+    }

    aio_context_release(data->aio_context);

--- a/bsd-user/mmap.c
+++ b/bsd-user/mmap.c
@@ -24,8 +24,7 @@

 //#define DEBUG_MMAP

-#if defined(CONFIG_USE_NPTL)
-pthread_mutex_t mmap_mutex;
+static pthread_mutex_t mmap_mutex = PTHREAD_MUTEX_INITIALIZER;
 static int __thread mmap_lock_count;

 void mmap_lock(void)
@@ -62,16 +61,6 @@ void mmap_fork_end(int child)
    else
        pthread_mutex_unlock(&mmap_mutex);
 }
-#else
-/* We aren't threadsafe to start with, so no need to worry about locking.  */
-void mmap_lock(void)
-{
-}
-
-void mmap_unlock(void)
-{
-}
-#endif

 /* NOTE: all the constants are the HOST ones, but addresses are target. */
 int target_mprotect(abi_ulong start, abi_ulong len, int prot)
@@ -199,12 +188,7 @@ static int mmap_frag(abi_ulong real_start,
    return 0;
 }

-#if defined(__CYGWIN__)
-/* Cygwin doesn't have a whole lot of address space.  */
-static abi_ulong mmap_next_start = 0x18000000;
-#else
 static abi_ulong mmap_next_start = 0x40000000;
-#endif

 unsigned long last_brk;

--- a/bsd-user/qemu.h
+++ b/bsd-user/qemu.h
@@ -209,10 +209,8 @@ abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size,
                       abi_ulong new_addr);
 int target_msync(abi_ulong start, abi_ulong len, int flags);
 extern unsigned long last_brk;
-#if defined(CONFIG_USE_NPTL)
 void mmap_fork_start(void);
 void mmap_fork_end(int child);
-#endif

 /* main.c */
 extern unsigned long x86_stack_size;
--- a/chardev/Makefile.objs
+++ b/chardev/Makefile.objs
@@ -0,0 +1,17 @@
+chardev-obj-y += char.o
+chardev-obj-$(CONFIG_WIN32) += char-console.o
+chardev-obj-$(CONFIG_POSIX) += char-fd.o
+chardev-obj-y += char-file.o
+chardev-obj-y += char-io.o
+chardev-obj-y += char-mux.o
+chardev-obj-y += char-null.o
+chardev-obj-$(CONFIG_POSIX) += char-parallel.o
+chardev-obj-y += char-pipe.o
+chardev-obj-$(CONFIG_POSIX) += char-pty.o
+chardev-obj-y += char-ringbuf.o
+chardev-obj-y += char-serial.o
+chardev-obj-y += char-socket.o
+chardev-obj-y += char-stdio.o
+chardev-obj-y += char-udp.o
+chardev-obj-$(CONFIG_WIN32) += char-win.o
+chardev-obj-$(CONFIG_WIN32) += char-win-stdio.o
--- a/chardev/char-console.c
+++ b/chardev/char-console.c
@@ -0,0 +1,53 @@
+/*
+ * QEMU System Emulator
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "qemu/osdep.h"
+#include "char-win.h"
+
+static void qemu_chr_open_win_con(Chardev *chr,
+                                  ChardevBackend *backend,
+                                  bool *be_opened,
+                                  Error **errp)
+{
+    qemu_chr_open_win_file(chr, GetStdHandle(STD_OUTPUT_HANDLE));
+}
+
+static void char_console_class_init(ObjectClass *oc, void *data)
+{
+    ChardevClass *cc = CHARDEV_CLASS(oc);
+
+    cc->open = qemu_chr_open_win_con;
+}
+
+static const TypeInfo char_console_type_info = {
+    .name = TYPE_CHARDEV_CONSOLE,
+    .parent = TYPE_CHARDEV_WIN,
+    .class_init = char_console_class_init,
+};
+
+static void register_types(void)
+{
+    type_register_static(&char_console_type_info);
+}
+
+type_init(register_types);
--- a/chardev/char-fd.c
+++ b/chardev/char-fd.c
@@ -0,0 +1,170 @@
+/*
+ * QEMU System Emulator
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "qemu/osdep.h"
+#include "qemu/sockets.h"
+#include "qapi/error.h"
+#include "qemu-common.h"
+#include "sysemu/char.h"
+#include "io/channel-file.h"
+
+#include "char-fd.h"
+#include "char-io.h"
+
+/* Called with chr_write_lock held.  */
+static int fd_chr_write(Chardev *chr, const uint8_t *buf, int len)
+{
+    FDChardev *s = FD_CHARDEV(chr);
+
+    return io_channel_send(s->ioc_out, buf, len);
+}
+
+static gboolean fd_chr_read(QIOChannel *chan, GIOCondition cond, void *opaque)
+{
+    Chardev *chr = CHARDEV(opaque);
+    FDChardev *s = FD_CHARDEV(opaque);
+    int len;
+    uint8_t buf[CHR_READ_BUF_LEN];
+    ssize_t ret;
+
+    len = sizeof(buf);
+    if (len > s->max_size) {
+        len = s->max_size;
+    }
+    if (len == 0) {
+        return TRUE;
+    }
+
+    ret = qio_channel_read(
+        chan, (gchar *)buf, len, NULL);
+    if (ret == 0) {
+        remove_fd_in_watch(chr, NULL);
+        qemu_chr_be_event(chr, CHR_EVENT_CLOSED);
+        return FALSE;
+    }
+    if (ret > 0) {
+        qemu_chr_be_write(chr, buf, ret);
+    }
+
+    return TRUE;
+}
+
+static int fd_chr_read_poll(void *opaque)
+{
+    Chardev *chr = CHARDEV(opaque);
+    FDChardev *s = FD_CHARDEV(opaque);
+
+    s->max_size = qemu_chr_be_can_write(chr);
+    return s->max_size;
+}
+
+static GSource *fd_chr_add_watch(Chardev *chr, GIOCondition cond)
+{
+    FDChardev *s = FD_CHARDEV(chr);
+    return qio_channel_create_watch(s->ioc_out, cond);
+}
+
+static void fd_chr_update_read_handler(Chardev *chr,
+                                       GMainContext *context)
+{
+    FDChardev *s = FD_CHARDEV(chr);
+
+    remove_fd_in_watch(chr, NULL);
+    if (s->ioc_in) {
+        chr->fd_in_tag = io_add_watch_poll(chr, s->ioc_in,
+                                           fd_chr_read_poll,
+                                           fd_chr_read, chr,
+                                           context);
+    }
+}
+
+static void char_fd_finalize(Object *obj)
+{
+    Chardev *chr = CHARDEV(obj);
+    FDChardev *s = FD_CHARDEV(obj);
+
+    remove_fd_in_watch(chr, NULL);
+    if (s->ioc_in) {
+        object_unref(OBJECT(s->ioc_in));
+    }
+    if (s->ioc_out) {
+        object_unref(OBJECT(s->ioc_out));
+    }
+
+    qemu_chr_be_event(chr, CHR_EVENT_CLOSED);
+}
+
+int qmp_chardev_open_file_source(char *src, int flags, Error **errp)
+{
+    int fd = -1;
+
+    TFR(fd = qemu_open(src, flags, 0666));
+    if (fd == -1) {
+        error_setg_file_open(errp, errno, src);
+    }
+    return fd;
+}
+
+/* open a character device to a unix fd */
+void qemu_chr_open_fd(Chardev *chr,
+                      int fd_in, int fd_out)
+{
+    FDChardev *s = FD_CHARDEV(chr);
+    char *name;
+
+    s->ioc_in = QIO_CHANNEL(qio_channel_file_new_fd(fd_in));
+    name = g_strdup_printf("chardev-file-in-%s", chr->label);
+    qio_channel_set_name(QIO_CHANNEL(s->ioc_in), name);
+    g_free(name);
+    s->ioc_out = QIO_CHANNEL(qio_channel_file_new_fd(fd_out));
+    name = g_strdup_printf("chardev-file-out-%s", chr->label);
+    qio_channel_set_name(QIO_CHANNEL(s->ioc_out), name);
+    g_free(name);
+    qemu_set_nonblock(fd_out);
+    s->chr = chr;
+}
+
+static void char_fd_class_init(ObjectClass *oc, void *data)
+{
+    ChardevClass *cc = CHARDEV_CLASS(oc);
+
+    cc->chr_add_watch = fd_chr_add_watch;
+    cc->chr_write = fd_chr_write;
+    cc->chr_update_read_handler = fd_chr_update_read_handler;
+}
+
+static const TypeInfo char_fd_type_info = {
+    .name = TYPE_CHARDEV_FD,
+    .parent = TYPE_CHARDEV,
+    .instance_size = sizeof(FDChardev),
+    .instance_finalize = char_fd_finalize,
+    .class_init = char_fd_class_init,
+    .abstract = true,
+};
+
+static void register_types(void)
+{
+    type_register_static(&char_fd_type_info);
+}
+
+type_init(register_types);
--- a/chardev/char-fd.h
+++ b/chardev/char-fd.h
@@ -0,0 +1,44 @@
+/*
+ * QEMU System Emulator
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef CHAR_FD_H
+#define CHAR_FD_H
+
+#include "io/channel.h"
+#include "sysemu/char.h"
+
+typedef struct FDChardev {
+    Chardev parent;
+    Chardev *chr;
+    QIOChannel *ioc_in, *ioc_out;
+    int max_size;
+} FDChardev;
+
+#define TYPE_CHARDEV_FD "chardev-fd"
+
+#define FD_CHARDEV(obj) OBJECT_CHECK(FDChardev, (obj), TYPE_CHARDEV_FD)
+
+void qemu_chr_open_fd(Chardev *chr, int fd_in, int fd_out);
+int qmp_chardev_open_file_source(char *src, int flags, Error **errp);
+
+#endif /* CHAR_FD_H */
--- a/chardev/char-file.c
+++ b/chardev/char-file.c
@@ -0,0 +1,139 @@
+/*
+ * QEMU System Emulator
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qemu-common.h"
+#include "sysemu/char.h"
+
+#ifdef _WIN32
+#include "char-win.h"
+#else
+#include "char-fd.h"
+#endif
+
+static void qmp_chardev_open_file(Chardev *chr,
+                                  ChardevBackend *backend,
+                                  bool *be_opened,
+                                  Error **errp)
+{
+    ChardevFile *file = backend->u.file.data;
+#ifdef _WIN32
+    HANDLE out;
+    DWORD accessmode;
+    DWORD flags;
+
+    if (file->has_in) {
+        error_setg(errp, "input file not supported");
+        return;
+    }
+
+    if (file->has_append && file->append) {
+        /* Append to file if it already exists. */
+        accessmode = FILE_GENERIC_WRITE & ~FILE_WRITE_DATA;
+        flags = OPEN_ALWAYS;
+    } else {
+        /* Truncate file if it already exists. */
+        accessmode = GENERIC_WRITE;
+        flags = CREATE_ALWAYS;
+    }
+
+    out = CreateFile(file->out, accessmode, FILE_SHARE_READ, NULL, flags,
+                     FILE_ATTRIBUTE_NORMAL, NULL);
+    if (out == INVALID_HANDLE_VALUE) {
+        error_setg(errp, "open %s failed", file->out);
+        return;
+    }
+
+    qemu_chr_open_win_file(chr, out);
+#else
+    int flags, in = -1, out;
+
+    flags = O_WRONLY | O_CREAT | O_BINARY;
+    if (file->has_append && file->append) {
+        flags |= O_APPEND;
+    } else {
+        flags |= O_TRUNC;
+    }
+
+    out = qmp_chardev_open_file_source(file->out, flags, errp);
+    if (out < 0) {
+        return;
+    }
+
+    if (file->has_in) {
+        flags = O_RDONLY;
+        in = qmp_chardev_open_file_source(file->in, flags, errp);
+        if (in < 0) {
+            qemu_close(out);
+            return;
+        }
+    }
+
+    qemu_chr_open_fd(chr, in, out);
+#endif
+}
+
+static void qemu_chr_parse_file_out(QemuOpts *opts, ChardevBackend *backend,
+                                    Error **errp)
+{
+    const char *path = qemu_opt_get(opts, "path");
+    ChardevFile *file;
+
+    backend->type = CHARDEV_BACKEND_KIND_FILE;
+    if (path == NULL) {
+        error_setg(errp, "chardev: file: no filename given");
+        return;
+    }
+    file = backend->u.file.data = g_new0(ChardevFile, 1);
+    qemu_chr_parse_common(opts, qapi_ChardevFile_base(file));
+    file->out = g_strdup(path);
+
+    file->has_append = true;
+    file->append = qemu_opt_get_bool(opts, "append", false);
+}
+
+static void char_file_class_init(ObjectClass *oc, void *data)
+{
+    ChardevClass *cc = CHARDEV_CLASS(oc);
+
+    cc->parse = qemu_chr_parse_file_out;
+    cc->open = qmp_chardev_open_file;
+}
+
+static const TypeInfo char_file_type_info = {
+    .name = TYPE_CHARDEV_FILE,
+#ifdef _WIN32
+    .parent = TYPE_CHARDEV_WIN,
+#else
+    .parent = TYPE_CHARDEV_FD,
+#endif
+    .class_init = char_file_class_init,
+};
+
+static void register_types(void)
+{
+    type_register_static(&char_file_type_info);
+}
+
+type_init(register_types);
--- a/chardev/char-io.c
+++ b/chardev/char-io.c
@@ -0,0 +1,192 @@
+/*
+ * QEMU System Emulator
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "qemu/osdep.h"
+#include "char-io.h"
+
+typedef struct IOWatchPoll {
+    GSource parent;
+
+    QIOChannel *ioc;
+    GSource *src;
+
+    IOCanReadHandler *fd_can_read;
+    GSourceFunc fd_read;
+    void *opaque;
+    GMainContext *context;
+} IOWatchPoll;
+
+static IOWatchPoll *io_watch_poll_from_source(GSource *source)
+{
+    return container_of(source, IOWatchPoll, parent);
+}
+
+static gboolean io_watch_poll_prepare(GSource *source,
+                                      gint *timeout)
+{
+    IOWatchPoll *iwp = io_watch_poll_from_source(source);
+    bool now_active = iwp->fd_can_read(iwp->opaque) > 0;
+    bool was_active = iwp->src != NULL;
+    if (was_active == now_active) {
+        return FALSE;
+    }
+
+    if (now_active) {
+        iwp->src = qio_channel_create_watch(
+            iwp->ioc, G_IO_IN | G_IO_ERR | G_IO_HUP | G_IO_NVAL);
+        g_source_set_callback(iwp->src, iwp->fd_read, iwp->opaque, NULL);
+        g_source_attach(iwp->src, iwp->context);
+    } else {
+        g_source_destroy(iwp->src);
+        g_source_unref(iwp->src);
+        iwp->src = NULL;
+    }
+    return FALSE;
+}
+
+static gboolean io_watch_poll_check(GSource *source)
+{
+    return FALSE;
+}
+
+static gboolean io_watch_poll_dispatch(GSource *source, GSourceFunc callback,
+                                       gpointer user_data)
+{
+    abort();
+}
+
+static void io_watch_poll_finalize(GSource *source)
+{
+    /* Due to a glib bug, removing the last reference to a source
+     * inside a finalize callback causes recursive locking (and a
+     * deadlock).  This is not a problem inside other callbacks,
+     * including dispatch callbacks, so we call io_remove_watch_poll
+     * to remove this source.  At this point, iwp->src must
+     * be NULL, or we would leak it.
+     *
+     * This would be solved much more elegantly by child sources,
+     * but we support older glib versions that do not have them.
+     */
+    IOWatchPoll *iwp = io_watch_poll_from_source(source);
+    assert(iwp->src == NULL);
+}
+
+static GSourceFuncs io_watch_poll_funcs = {
+    .prepare = io_watch_poll_prepare,
+    .check = io_watch_poll_check,
+    .dispatch = io_watch_poll_dispatch,
+    .finalize = io_watch_poll_finalize,
+};
+
+guint io_add_watch_poll(Chardev *chr,
+                        QIOChannel *ioc,
+                        IOCanReadHandler *fd_can_read,
+                        QIOChannelFunc fd_read,
+                        gpointer user_data,
+                        GMainContext *context)
+{
+    IOWatchPoll *iwp;
+    int tag;
+    char *name;
+
+    iwp = (IOWatchPoll *) g_source_new(&io_watch_poll_funcs,
+                                       sizeof(IOWatchPoll));
+    iwp->fd_can_read = fd_can_read;
+    iwp->opaque = user_data;
+    iwp->ioc = ioc;
+    iwp->fd_read = (GSourceFunc) fd_read;
+    iwp->src = NULL;
+    iwp->context = context;
+
+    name = g_strdup_printf("chardev-iowatch-%s", chr->label);
+    g_source_set_name((GSource *)iwp, name);
+    g_free(name);
+
+    tag = g_source_attach(&iwp->parent, context);
+    g_source_unref(&iwp->parent);
+    return tag;
+}
+
+static void io_remove_watch_poll(guint tag, GMainContext *context)
+{
+    GSource *source;
+    IOWatchPoll *iwp;
+
+    g_return_if_fail(tag > 0);
+
+    source = g_main_context_find_source_by_id(context, tag);
+    g_return_if_fail(source != NULL);
+
+    iwp = io_watch_poll_from_source(source);
+    if (iwp->src) {
+        g_source_destroy(iwp->src);
+        g_source_unref(iwp->src);
+        iwp->src = NULL;
+    }
+    g_source_destroy(&iwp->parent);
+}
+
+void remove_fd_in_watch(Chardev *chr, GMainContext *context)
+{
+    if (chr->fd_in_tag) {
+        io_remove_watch_poll(chr->fd_in_tag, context);
+        chr->fd_in_tag = 0;
+    }
+}
+
+int io_channel_send_full(QIOChannel *ioc,
+                         const void *buf, size_t len,
+                         int *fds, size_t nfds)
+{
+    size_t offset = 0;
+
+    while (offset < len) {
+        ssize_t ret = 0;
+        struct iovec iov = { .iov_base = (char *)buf + offset,
+                             .iov_len = len - offset };
+
+        ret = qio_channel_writev_full(
+            ioc, &iov, 1,
+            fds, nfds, NULL);
+        if (ret == QIO_CHANNEL_ERR_BLOCK) {
+            if (offset) {
+                return offset;
+            }
+
+            errno = EAGAIN;
+            return -1;
+        } else if (ret < 0) {
+            errno = EINVAL;
+            return -1;
+        }
+
+        offset += ret;
+    }
+
+    return offset;
+}
+
+int io_channel_send(QIOChannel *ioc, const void *buf, size_t len)
+{
+    return io_channel_send_full(ioc, buf, len, NULL, 0);
+}
--- a/chardev/char-io.h
+++ b/chardev/char-io.h
@@ -0,0 +1,46 @@
+/*
+ * QEMU System Emulator
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef CHAR_IO_H
+#define CHAR_IO_H
+
+#include "qemu-common.h"
+#include "io/channel.h"
+#include "sysemu/char.h"
+
+/* Can only be used for read */
+guint io_add_watch_poll(Chardev *chr,
+                        QIOChannel *ioc,
+                        IOCanReadHandler *fd_can_read,
+                        QIOChannelFunc fd_read,
+                        gpointer user_data,
+                        GMainContext *context);
+
+void remove_fd_in_watch(Chardev *chr, GMainContext *context);
+
+int io_channel_send(QIOChannel *ioc, const void *buf, size_t len);
+
+int io_channel_send_full(QIOChannel *ioc, const void *buf, size_t len,
+                         int *fds, size_t nfds);
+
+#endif /* CHAR_IO_H */
--- a/chardev/char-mux.c
+++ b/chardev/char-mux.c
@@ -0,0 +1,358 @@
+/*
+ * QEMU System Emulator
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qemu-common.h"
+#include "sysemu/char.h"
+#include "sysemu/block-backend.h"
+#include "char-mux.h"
+
+/* MUX driver for serial I/O splitting */
+
+/* Called with chr_write_lock held.  */
+static int mux_chr_write(Chardev *chr, const uint8_t *buf, int len)
+{
+    MuxChardev *d = MUX_CHARDEV(chr);
+    int ret;
+    if (!d->timestamps) {
+        ret = qemu_chr_fe_write(&d->chr, buf, len);
+    } else {
+        int i;
+
+        ret = 0;
+        for (i = 0; i < len; i++) {
+            if (d->linestart) {
+                char buf1[64];
+                int64_t ti;
+                int secs;
+
+                ti = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+                if (d->timestamps_start == -1) {
+                    d->timestamps_start = ti;
+                }
+                ti -= d->timestamps_start;
+                secs = ti / 1000;
+                snprintf(buf1, sizeof(buf1),
+                         "[%02d:%02d:%02d.%03d] ",
+                         secs / 3600,
+                         (secs / 60) % 60,
+                         secs % 60,
+                         (int)(ti % 1000));
+                /* XXX this blocks entire thread. Rewrite to use
+                 * qemu_chr_fe_write and background I/O callbacks */
+                qemu_chr_fe_write_all(&d->chr,
+                                      (uint8_t *)buf1, strlen(buf1));
+                d->linestart = 0;
+            }
+            ret += qemu_chr_fe_write(&d->chr, buf + i, 1);
+            if (buf[i] == '\n') {
+                d->linestart = 1;
+            }
+        }
+    }
+    return ret;
+}
+
+static const char * const mux_help[] = {
+    "% h    print this help\n\r",
+    "% x    exit emulator\n\r",
+    "% s    save disk data back to file (if -snapshot)\n\r",
+    "% t    toggle console timestamps\n\r",
+    "% b    send break (magic sysrq)\n\r",
+    "% c    switch between console and monitor\n\r",
+    "% %  sends %\n\r",
+    NULL
+};
+
+int term_escape_char = 0x01; /* ctrl-a is used for escape */
+static void mux_print_help(Chardev *chr)
+{
+    int i, j;
+    char ebuf[15] = "Escape-Char";
+    char cbuf[50] = "\n\r";
+
+    if (term_escape_char > 0 && term_escape_char < 26) {
+        snprintf(cbuf, sizeof(cbuf), "\n\r");
+        snprintf(ebuf, sizeof(ebuf), "C-%c", term_escape_char - 1 + 'a');
+    } else {
+        snprintf(cbuf, sizeof(cbuf),
+                 "\n\rEscape-Char set to Ascii: 0x%02x\n\r\n\r",
+                 term_escape_char);
+    }
+    /* XXX this blocks entire thread. Rewrite to use
+     * qemu_chr_fe_write and background I/O callbacks */
+    qemu_chr_write_all(chr, (uint8_t *)cbuf, strlen(cbuf));
+    for (i = 0; mux_help[i] != NULL; i++) {
+        for (j = 0; mux_help[i][j] != '\0'; j++) {
+            if (mux_help[i][j] == '%') {
+                qemu_chr_write_all(chr, (uint8_t *)ebuf, strlen(ebuf));
+            } else {
+                qemu_chr_write_all(chr, (uint8_t *)&mux_help[i][j], 1);
+            }
+        }
+    }
+}
+
+void mux_chr_send_event(MuxChardev *d, int mux_nr, int event)
+{
+    CharBackend *be = d->backends[mux_nr];
+
+    if (be && be->chr_event) {
+        be->chr_event(be->opaque, event);
+    }
+}
+
+static int mux_proc_byte(Chardev *chr, MuxChardev *d, int ch)
+{
+    if (d->term_got_escape) {
+        d->term_got_escape = 0;
+        if (ch == term_escape_char) {
+            goto send_char;
+        }
+        switch (ch) {
+        case '?':
+        case 'h':
+            mux_print_help(chr);
+            break;
+        case 'x':
+            {
+                 const char *term =  "QEMU: Terminated\n\r";
+                 qemu_chr_write_all(chr, (uint8_t *)term, strlen(term));
+                 exit(0);
+                 break;
+            }
+        case 's':
+            blk_commit_all();
+            break;
+        case 'b':
+            qemu_chr_be_event(chr, CHR_EVENT_BREAK);
+            break;
+        case 'c':
+            assert(d->mux_cnt > 0); /* handler registered with first fe */
+            /* Switch to the next registered device */
+            mux_set_focus(chr, (d->focus + 1) % d->mux_cnt);
+            break;
+        case 't':
+            d->timestamps = !d->timestamps;
+            d->timestamps_start = -1;
+            d->linestart = 0;
+            break;
+        }
+    } else if (ch == term_escape_char) {
+        d->term_got_escape = 1;
+    } else {
+    send_char:
+        return 1;
+    }
+    return 0;
+}
+
+static void mux_chr_accept_input(Chardev *chr)
+{
+    MuxChardev *d = MUX_CHARDEV(chr);
+    int m = d->focus;
+    CharBackend *be = d->backends[m];
+
+    while (be && d->prod[m] != d->cons[m] &&
+           be->chr_can_read && be->chr_can_read(be->opaque)) {
+        be->chr_read(be->opaque,
+                     &d->buffer[m][d->cons[m]++ & MUX_BUFFER_MASK], 1);
+    }
+}
+
+static int mux_chr_can_read(void *opaque)
+{
+    MuxChardev *d = MUX_CHARDEV(opaque);
+    int m = d->focus;
+    CharBackend *be = d->backends[m];
+
+    if ((d->prod[m] - d->cons[m]) < MUX_BUFFER_SIZE) {
+        return 1;
+    }
+
+    if (be && be->chr_can_read) {
+        return be->chr_can_read(be->opaque);
+    }
+
+    return 0;
+}
+
+static void mux_chr_read(void *opaque, const uint8_t *buf, int size)
+{
+    Chardev *chr = CHARDEV(opaque);
+    MuxChardev *d = MUX_CHARDEV(opaque);
+    int m = d->focus;
+    CharBackend *be = d->backends[m];
+    int i;
+
+    mux_chr_accept_input(opaque);
+
+    for (i = 0; i < size; i++)
+        if (mux_proc_byte(chr, d, buf[i])) {
+            if (d->prod[m] == d->cons[m] &&
+                be && be->chr_can_read &&
+                be->chr_can_read(be->opaque)) {
+                be->chr_read(be->opaque, &buf[i], 1);
+            } else {
+                d->buffer[m][d->prod[m]++ & MUX_BUFFER_MASK] = buf[i];
+            }
+        }
+}
+
+bool muxes_realized;
+
+static void mux_chr_event(void *opaque, int event)
+{
+    MuxChardev *d = MUX_CHARDEV(opaque);
+    int i;
+
+    if (!muxes_realized) {
+        return;
+    }
+
+    /* Send the event to all registered listeners */
+    for (i = 0; i < d->mux_cnt; i++) {
+        mux_chr_send_event(d, i, event);
+    }
+}
+
+static GSource *mux_chr_add_watch(Chardev *s, GIOCondition cond)
+{
+    MuxChardev *d = MUX_CHARDEV(s);
+    Chardev *chr = qemu_chr_fe_get_driver(&d->chr);
+    ChardevClass *cc = CHARDEV_GET_CLASS(chr);
+
+    if (!cc->chr_add_watch) {
+        return NULL;
+    }
+
+    return cc->chr_add_watch(chr, cond);
+}
+
+static void char_mux_finalize(Object *obj)
+{
+    MuxChardev *d = MUX_CHARDEV(obj);
+    int i;
+
+    for (i = 0; i < d->mux_cnt; i++) {
+        CharBackend *be = d->backends[i];
+        if (be) {
+            be->chr = NULL;
+        }
+    }
+    qemu_chr_fe_deinit(&d->chr);
+}
+
+void mux_chr_set_handlers(Chardev *chr, GMainContext *context)
+{
+    MuxChardev *d = MUX_CHARDEV(chr);
+
+    /* Fix up the real driver with mux routines */
+    qemu_chr_fe_set_handlers(&d->chr,
+                             mux_chr_can_read,
+                             mux_chr_read,
+                             mux_chr_event,
+                             chr,
+                             context, true);
+}
+
+void mux_set_focus(Chardev *chr, int focus)
+{
+    MuxChardev *d = MUX_CHARDEV(chr);
+
+    assert(focus >= 0);
+    assert(focus < d->mux_cnt);
+
+    if (d->focus != -1) {
+        mux_chr_send_event(d, d->focus, CHR_EVENT_MUX_OUT);
+    }
+
+    d->focus = focus;
+    mux_chr_send_event(d, d->focus, CHR_EVENT_MUX_IN);
+}
+
+static void qemu_chr_open_mux(Chardev *chr,
+                              ChardevBackend *backend,
+                              bool *be_opened,
+                              Error **errp)
+{
+    ChardevMux *mux = backend->u.mux.data;
+    Chardev *drv;
+    MuxChardev *d = MUX_CHARDEV(chr);
+
+    drv = qemu_chr_find(mux->chardev);
+    if (drv == NULL) {
+        error_setg(errp, "mux: base chardev %s not found", mux->chardev);
+        return;
+    }
+
+    d->focus = -1;
+    /* only default to opened state if we've realized the initial
+     * set of muxes
+     */
+    *be_opened = muxes_realized;
+    qemu_chr_fe_init(&d->chr, drv, errp);
+}
+
+static void qemu_chr_parse_mux(QemuOpts *opts, ChardevBackend *backend,
+                               Error **errp)
+{
+    const char *chardev = qemu_opt_get(opts, "chardev");
+    ChardevMux *mux;
+
+    if (chardev == NULL) {
+        error_setg(errp, "chardev: mux: no chardev given");
+        return;
+    }
+    backend->type = CHARDEV_BACKEND_KIND_MUX;
+    mux = backend->u.mux.data = g_new0(ChardevMux, 1);
+    qemu_chr_parse_common(opts, qapi_ChardevMux_base(mux));
+    mux->chardev = g_strdup(chardev);
+}
+
+static void char_mux_class_init(ObjectClass *oc, void *data)
+{
+    ChardevClass *cc = CHARDEV_CLASS(oc);
+
+    cc->parse = qemu_chr_parse_mux;
+    cc->open = qemu_chr_open_mux;
+    cc->chr_write = mux_chr_write;
+    cc->chr_accept_input = mux_chr_accept_input;
+    cc->chr_add_watch = mux_chr_add_watch;
+}
+
+static const TypeInfo char_mux_type_info = {
+    .name = TYPE_CHARDEV_MUX,
+    .parent = TYPE_CHARDEV,
+    .class_init = char_mux_class_init,
+    .instance_size = sizeof(MuxChardev),
+    .instance_finalize = char_mux_finalize,
+};
+
+static void register_types(void)
+{
+    type_register_static(&char_mux_type_info);
+}
+
+type_init(register_types);
--- a/chardev/char-mux.h
+++ b/chardev/char-mux.h
@@ -0,0 +1,63 @@
+/*
+ * QEMU System Emulator
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef CHAR_MUX_H
+#define CHAR_MUX_H
+
+#include "sysemu/char.h"
+
+extern bool muxes_realized;
+
+#define MAX_MUX 4
+#define MUX_BUFFER_SIZE 32 /* Must be a power of 2.  */
+#define MUX_BUFFER_MASK (MUX_BUFFER_SIZE - 1)
+typedef struct MuxChardev {
+    Chardev parent;
+    CharBackend *backends[MAX_MUX];
+    CharBackend chr;
+    int focus;
+    int mux_cnt;
+    int term_got_escape;
+    int max_size;
+    /* Intermediate input buffer catches escape sequences even if the
+       currently active device is not accepting any input - but only until it
+       is full as well. */
+    unsigned char buffer[MAX_MUX][MUX_BUFFER_SIZE];
+    int prod[MAX_MUX];
+    int cons[MAX_MUX];
+    int timestamps;
+
+    /* Protected by the Chardev chr_write_lock.  */
+    int linestart;
+    int64_t timestamps_start;
+} MuxChardev;
+
+#define MUX_CHARDEV(obj) OBJECT_CHECK(MuxChardev, (obj), TYPE_CHARDEV_MUX)
+#define CHARDEV_IS_MUX(chr)                             \
+    object_dynamic_cast(OBJECT(chr), TYPE_CHARDEV_MUX)
+
+void mux_chr_set_handlers(Chardev *chr, GMainContext *context);
+void mux_set_focus(Chardev *chr, int focus);
+void mux_chr_send_event(MuxChardev *d, int mux_nr, int event);
+
+#endif /* CHAR_MUX_H */
--- a/chardev/char-null.c
+++ b/chardev/char-null.c
@@ -0,0 +1,54 @@
+/*
+ * QEMU System Emulator
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "qemu/osdep.h"
+#include "sysemu/char.h"
+
+static void null_chr_open(Chardev *chr,
+                          ChardevBackend *backend,
+                          bool *be_opened,
+                          Error **errp)
+{
+    *be_opened = false;
+}
+
+static void char_null_class_init(ObjectClass *oc, void *data)
+{
+    ChardevClass *cc = CHARDEV_CLASS(oc);
+
+    cc->open = null_chr_open;
+}
+
+static const TypeInfo char_null_type_info = {
+    .name = TYPE_CHARDEV_NULL,
+    .parent = TYPE_CHARDEV,
+    .instance_size = sizeof(Chardev),
+    .class_init = char_null_class_init,
+};
+
+static void register_types(void)
+{
+    type_register_static(&char_null_type_info);
+}
+
+type_init(register_types);
--- a/chardev/char-parallel.c
+++ b/chardev/char-parallel.c
@@ -0,0 +1,316 @@
+/*
+ * QEMU System Emulator
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "qemu/osdep.h"
+#include "sysemu/char.h"
+#include "qapi/error.h"
+#include <sys/ioctl.h>
+
+#ifdef CONFIG_BSD
+#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
+#include <dev/ppbus/ppi.h>
+#include <dev/ppbus/ppbconf.h>
+#elif defined(__DragonFly__)
+#include <dev/misc/ppi/ppi.h>
+#include <bus/ppbus/ppbconf.h>
+#endif
+#else
+#ifdef __linux__
+#include <linux/ppdev.h>
+#include <linux/parport.h>
+#endif
+#endif
+
+#include "char-fd.h"
+#include "char-parallel.h"
+
+#if defined(__linux__)
+
+typedef struct {
+    Chardev parent;
+    int fd;
+    int mode;
+} ParallelChardev;
+
+#define PARALLEL_CHARDEV(obj) \
+    OBJECT_CHECK(ParallelChardev, (obj), TYPE_CHARDEV_PARALLEL)
+
+static int pp_hw_mode(ParallelChardev *s, uint16_t mode)
+{
+    if (s->mode != mode) {
+        int m = mode;
+        if (ioctl(s->fd, PPSETMODE, &m) < 0) {
+            return 0;
+        }
+        s->mode = mode;
+    }
+    return 1;
+}
+
+static int pp_ioctl(Chardev *chr, int cmd, void *arg)
+{
+    ParallelChardev *drv = PARALLEL_CHARDEV(chr);
+    int fd = drv->fd;
+    uint8_t b;
+
+    switch (cmd) {
+    case CHR_IOCTL_PP_READ_DATA:
+        if (ioctl(fd, PPRDATA, &b) < 0) {
+            return -ENOTSUP;
+        }
+        *(uint8_t *)arg = b;
+        break;
+    case CHR_IOCTL_PP_WRITE_DATA:
+        b = *(uint8_t *)arg;
+        if (ioctl(fd, PPWDATA, &b) < 0) {
+            return -ENOTSUP;
+        }
+        break;
+    case CHR_IOCTL_PP_READ_CONTROL:
+        if (ioctl(fd, PPRCONTROL, &b) < 0) {
+            return -ENOTSUP;
+        }
+        /* Linux gives only the lowest bits, and no way to know data
+           direction! For better compatibility set the fixed upper
+           bits. */
+        *(uint8_t *)arg = b | 0xc0;
+        break;
+    case CHR_IOCTL_PP_WRITE_CONTROL:
+        b = *(uint8_t *)arg;
+        if (ioctl(fd, PPWCONTROL, &b) < 0) {
+            return -ENOTSUP;
+        }
+        break;
+    case CHR_IOCTL_PP_READ_STATUS:
+        if (ioctl(fd, PPRSTATUS, &b) < 0) {
+            return -ENOTSUP;
+        }
+        *(uint8_t *)arg = b;
+        break;
+    case CHR_IOCTL_PP_DATA_DIR:
+        if (ioctl(fd, PPDATADIR, (int *)arg) < 0) {
+            return -ENOTSUP;
+        }
+        break;
+    case CHR_IOCTL_PP_EPP_READ_ADDR:
+        if (pp_hw_mode(drv, IEEE1284_MODE_EPP | IEEE1284_ADDR)) {
+            struct ParallelIOArg *parg = arg;
+            int n = read(fd, parg->buffer, parg->count);
+            if (n != parg->count) {
+                return -EIO;
+            }
+        }
+        break;
+    case CHR_IOCTL_PP_EPP_READ:
+        if (pp_hw_mode(drv, IEEE1284_MODE_EPP)) {
+            struct ParallelIOArg *parg = arg;
+            int n = read(fd, parg->buffer, parg->count);
+            if (n != parg->count) {
+                return -EIO;
+            }
+        }
+        break;
+    case CHR_IOCTL_PP_EPP_WRITE_ADDR:
+        if (pp_hw_mode(drv, IEEE1284_MODE_EPP | IEEE1284_ADDR)) {
+            struct ParallelIOArg *parg = arg;
+            int n = write(fd, parg->buffer, parg->count);
+            if (n != parg->count) {
+                return -EIO;
+            }
+        }
+        break;
+    case CHR_IOCTL_PP_EPP_WRITE:
+        if (pp_hw_mode(drv, IEEE1284_MODE_EPP)) {
+            struct ParallelIOArg *parg = arg;
+            int n = write(fd, parg->buffer, parg->count);
+            if (n != parg->count) {
+                return -EIO;
+            }
+        }
+        break;
+    default:
+        return -ENOTSUP;
+    }
+    return 0;
+}
+
+static void qemu_chr_open_pp_fd(Chardev *chr,
+                                int fd,
+                                bool *be_opened,
+                                Error **errp)
+{
+    ParallelChardev *drv = PARALLEL_CHARDEV(chr);
+
+    if (ioctl(fd, PPCLAIM) < 0) {
+        error_setg_errno(errp, errno, "not a parallel port");
+        close(fd);
+        return;
+    }
+
+    drv->fd = fd;
+    drv->mode = IEEE1284_MODE_COMPAT;
+}
+#endif /* __linux__ */
+
+#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__DragonFly__)
+
+typedef struct {
+    Chardev parent;
+    int fd;
+} ParallelChardev;
+
+#define PARALLEL_CHARDEV(obj)                                   \
+    OBJECT_CHECK(ParallelChardev, (obj), TYPE_CHARDEV_PARALLEL)
+
+static int pp_ioctl(Chardev *chr, int cmd, void *arg)
+{
+    ParallelChardev *drv = PARALLEL_CHARDEV(chr);
+    uint8_t b;
+
+    switch (cmd) {
+    case CHR_IOCTL_PP_READ_DATA:
+        if (ioctl(drv->fd, PPIGDATA, &b) < 0) {
+            return -ENOTSUP;
+        }
+        *(uint8_t *)arg = b;
+        break;
+    case CHR_IOCTL_PP_WRITE_DATA:
+        b = *(uint8_t *)arg;
+        if (ioctl(drv->fd, PPISDATA, &b) < 0) {
+            return -ENOTSUP;
+        }
+        break;
+    case CHR_IOCTL_PP_READ_CONTROL:
+        if (ioctl(drv->fd, PPIGCTRL, &b) < 0) {
+            return -ENOTSUP;
+        }
+        *(uint8_t *)arg = b;
+        break;
+    case CHR_IOCTL_PP_WRITE_CONTROL:
+        b = *(uint8_t *)arg;
+        if (ioctl(drv->fd, PPISCTRL, &b) < 0) {
+            return -ENOTSUP;
+        }
+        break;
+    case CHR_IOCTL_PP_READ_STATUS:
+        if (ioctl(drv->fd, PPIGSTATUS, &b) < 0) {
+            return -ENOTSUP;
+        }
+        *(uint8_t *)arg = b;
+        break;
+    default:
+        return -ENOTSUP;
+    }
+    return 0;
+}
+
+static void qemu_chr_open_pp_fd(Chardev *chr,
+                                int fd,
+                                bool *be_opened,
+                                Error **errp)
+{
+    ParallelChardev *drv = PARALLEL_CHARDEV(chr);
+    drv->fd = fd;
+    *be_opened = false;
+}
+#endif
+
+#ifdef HAVE_CHARDEV_PARPORT
+static void qmp_chardev_open_parallel(Chardev *chr,
+                                      ChardevBackend *backend,
+                                      bool *be_opened,
+                                      Error **errp)
+{
+    ChardevHostdev *parallel = backend->u.parallel.data;
+    int fd;
+
+    fd = qmp_chardev_open_file_source(parallel->device, O_RDWR, errp);
+    if (fd < 0) {
+        return;
+    }
+    qemu_chr_open_pp_fd(chr, fd, be_opened, errp);
+}
+
+static void qemu_chr_parse_parallel(QemuOpts *opts, ChardevBackend *backend,
+                                    Error **errp)
+{
+    const char *device = qemu_opt_get(opts, "path");
+    ChardevHostdev *parallel;
+
+    if (device == NULL) {
+        error_setg(errp, "chardev: parallel: no device path given");
+        return;
+    }
+    backend->type = CHARDEV_BACKEND_KIND_PARALLEL;
+    parallel = backend->u.parallel.data = g_new0(ChardevHostdev, 1);
+    qemu_chr_parse_common(opts, qapi_ChardevHostdev_base(parallel));
+    parallel->device = g_strdup(device);
+}
+
+static void char_parallel_class_init(ObjectClass *oc, void *data)
+{
+    ChardevClass *cc = CHARDEV_CLASS(oc);
+
+    cc->parse = qemu_chr_parse_parallel;
+    cc->open = qmp_chardev_open_parallel;
+#if defined(__linux__)
+    cc->chr_ioctl = pp_ioctl;
+#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || \
+    defined(__DragonFly__)
+    cc->chr_ioctl = pp_ioctl;
+#endif
+}
+
+static void char_parallel_finalize(Object *obj)
+{
+#if defined(__linux__)
+    Chardev *chr = CHARDEV(obj);
+    ParallelChardev *drv = PARALLEL_CHARDEV(chr);
+    int fd = drv->fd;
+
+    pp_hw_mode(drv, IEEE1284_MODE_COMPAT);
+    ioctl(fd, PPRELEASE);
+    close(fd);
+    qemu_chr_be_event(chr, CHR_EVENT_CLOSED);
+#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || \
+    defined(__DragonFly__)
+    /* FIXME: close fd? */
+#endif
+}
+
+static const TypeInfo char_parallel_type_info = {
+    .name = TYPE_CHARDEV_PARALLEL,
+    .parent = TYPE_CHARDEV,
+    .instance_size = sizeof(ParallelChardev),
+    .instance_finalize = char_parallel_finalize,
+    .class_init = char_parallel_class_init,
+};
+
+static void register_types(void)
+{
+    type_register_static(&char_parallel_type_info);
+}
+
+type_init(register_types);
+
+#endif
--- a/chardev/char-parallel.h
+++ b/chardev/char-parallel.h
@@ -0,0 +1,32 @@
+/*
+ * QEMU System Emulator
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef CHAR_PARALLEL_H
+#define CHAR_PARALLEL_H
+
+#if defined(__linux__) || defined(__FreeBSD__) || \
+    defined(__FreeBSD_kernel__) || defined(__DragonFly__)
+#define HAVE_CHARDEV_PARPORT 1
+#endif
+
+#endif /* CHAR_PARALLEL_H */
--- a/chardev/char-pipe.c
+++ b/chardev/char-pipe.c
@@ -0,0 +1,191 @@
+/*
+ * QEMU System Emulator
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "sysemu/char.h"
+
+#ifdef _WIN32
+#include "char-win.h"
+#else
+#include "char-fd.h"
+#endif
+
+#ifdef _WIN32
+#define MAXCONNECT 1
+#define NTIMEOUT 5000
+
+static int win_chr_pipe_init(Chardev *chr, const char *filename,
+                             Error **errp)
+{
+    WinChardev *s = WIN_CHARDEV(chr);
+    OVERLAPPED ov;
+    int ret;
+    DWORD size;
+    char *openname;
+
+    s->fpipe = TRUE;
+
+    s->hsend = CreateEvent(NULL, TRUE, FALSE, NULL);
+    if (!s->hsend) {
+        error_setg(errp, "Failed CreateEvent");
+        goto fail;
+    }
+    s->hrecv = CreateEvent(NULL, TRUE, FALSE, NULL);
+    if (!s->hrecv) {
+        error_setg(errp, "Failed CreateEvent");
+        goto fail;
+    }
+
+    openname = g_strdup_printf("\\\\.\\pipe\\%s", filename);
+    s->hcom = CreateNamedPipe(openname,
+                              PIPE_ACCESS_DUPLEX | FILE_FLAG_OVERLAPPED,
+                              PIPE_TYPE_BYTE | PIPE_READMODE_BYTE |
+                              PIPE_WAIT,
+                              MAXCONNECT, NSENDBUF, NRECVBUF, NTIMEOUT, NULL);
+    g_free(openname);
+    if (s->hcom == INVALID_HANDLE_VALUE) {
+        error_setg(errp, "Failed CreateNamedPipe (%lu)", GetLastError());
+        s->hcom = NULL;
+        goto fail;
+    }
+
+    ZeroMemory(&ov, sizeof(ov));
+    ov.hEvent = CreateEvent(NULL, TRUE, FALSE, NULL);
+    ret = ConnectNamedPipe(s->hcom, &ov);
+    if (ret) {
+        error_setg(errp, "Failed ConnectNamedPipe");
+        goto fail;
+    }
+
+    ret = GetOverlappedResult(s->hcom, &ov, &size, TRUE);
+    if (!ret) {
+        error_setg(errp, "Failed GetOverlappedResult");
+        if (ov.hEvent) {
+            CloseHandle(ov.hEvent);
+            ov.hEvent = NULL;
+        }
+        goto fail;
+    }
+
+    if (ov.hEvent) {
+        CloseHandle(ov.hEvent);
+        ov.hEvent = NULL;
+    }
+    qemu_add_polling_cb(win_chr_pipe_poll, chr);
+    return 0;
+
+ fail:
+    return -1;
+}
+
+static void qemu_chr_open_pipe(Chardev *chr,
+                               ChardevBackend *backend,
+                               bool *be_opened,
+                               Error **errp)
+{
+    ChardevHostdev *opts = backend->u.pipe.data;
+    const char *filename = opts->device;
+
+    if (win_chr_pipe_init(chr, filename, errp) < 0) {
+        return;
+    }
+}
+
+#else
+
+static void qemu_chr_open_pipe(Chardev *chr,
+                               ChardevBackend *backend,
+                               bool *be_opened,
+                               Error **errp)
+{
+    ChardevHostdev *opts = backend->u.pipe.data;
+    int fd_in, fd_out;
+    char *filename_in;
+    char *filename_out;
+    const char *filename = opts->device;
+
+    filename_in = g_strdup_printf("%s.in", filename);
+    filename_out = g_strdup_printf("%s.out", filename);
+    TFR(fd_in = qemu_open(filename_in, O_RDWR | O_BINARY));
+    TFR(fd_out = qemu_open(filename_out, O_RDWR | O_BINARY));
+    g_free(filename_in);
+    g_free(filename_out);
+    if (fd_in < 0 || fd_out < 0) {
+        if (fd_in >= 0) {
+            close(fd_in);
+        }
+        if (fd_out >= 0) {
+            close(fd_out);
+        }
+        TFR(fd_in = fd_out = qemu_open(filename, O_RDWR | O_BINARY));
+        if (fd_in < 0) {
+            error_setg_file_open(errp, errno, filename);
+            return;
+        }
+    }
+    qemu_chr_open_fd(chr, fd_in, fd_out);
+}
+
+#endif /* !_WIN32 */
+
+static void qemu_chr_parse_pipe(QemuOpts *opts, ChardevBackend *backend,
+                                Error **errp)
+{
+    const char *device = qemu_opt_get(opts, "path");
+    ChardevHostdev *dev;
+
+    if (device == NULL) {
+        error_setg(errp, "chardev: pipe: no device path given");
+        return;
+    }
+    backend->type = CHARDEV_BACKEND_KIND_PIPE;
+    dev = backend->u.pipe.data = g_new0(ChardevHostdev, 1);
+    qemu_chr_parse_common(opts, qapi_ChardevHostdev_base(dev));
+    dev->device = g_strdup(device);
+}
+
+static void char_pipe_class_init(ObjectClass *oc, void *data)
+{
+    ChardevClass *cc = CHARDEV_CLASS(oc);
+
+    cc->parse = qemu_chr_parse_pipe;
+    cc->open = qemu_chr_open_pipe;
+}
+
+static const TypeInfo char_pipe_type_info = {
+    .name = TYPE_CHARDEV_PIPE,
+#ifdef _WIN32
+    .parent = TYPE_CHARDEV_WIN,
+#else
+    .parent = TYPE_CHARDEV_FD,
+#endif
+    .class_init = char_pipe_class_init,
+};
+
+static void register_types(void)
+{
+    type_register_static(&char_pipe_type_info);
+}
+
+type_init(register_types);
--- a/chardev/char-pty.c
+++ b/chardev/char-pty.c
@@ -0,0 +1,300 @@
+/*
+ * QEMU System Emulator
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qemu-common.h"
+#include "sysemu/char.h"
+#include "io/channel-file.h"
+#include "qemu/sockets.h"
+#include "qemu/error-report.h"
+
+#include "char-io.h"
+
+#if defined(__linux__) || defined(__sun__) || defined(__FreeBSD__)      \
+    || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) \
+    || defined(__GLIBC__)
+
+typedef struct {
+    Chardev parent;
+    QIOChannel *ioc;
+    int read_bytes;
+
+    /* Protected by the Chardev chr_write_lock.  */
+    int connected;
+    guint timer_tag;
+    guint open_tag;
+} PtyChardev;
+
+#define PTY_CHARDEV(obj) OBJECT_CHECK(PtyChardev, (obj), TYPE_CHARDEV_PTY)
+
+static void pty_chr_update_read_handler_locked(Chardev *chr);
+static void pty_chr_state(Chardev *chr, int connected);
+
+static gboolean pty_chr_timer(gpointer opaque)
+{
+    struct Chardev *chr = CHARDEV(opaque);
+    PtyChardev *s = PTY_CHARDEV(opaque);
+
+    qemu_mutex_lock(&chr->chr_write_lock);
+    s->timer_tag = 0;
+    s->open_tag = 0;
+    if (!s->connected) {
+        /* Next poll ... */
+        pty_chr_update_read_handler_locked(chr);
+    }
+    qemu_mutex_unlock(&chr->chr_write_lock);
+    return FALSE;
+}
+
+/* Called with chr_write_lock held.  */
+static void pty_chr_rearm_timer(Chardev *chr, int ms)
+{
+    PtyChardev *s = PTY_CHARDEV(chr);
+    char *name;
+
+    if (s->timer_tag) {
+        g_source_remove(s->timer_tag);
+        s->timer_tag = 0;
+    }
+
+    if (ms == 1000) {
+        name = g_strdup_printf("pty-timer-secs-%s", chr->label);
+        s->timer_tag = g_timeout_add_seconds(1, pty_chr_timer, chr);
+    } else {
+        name = g_strdup_printf("pty-timer-ms-%s", chr->label);
+        s->timer_tag = g_timeout_add(ms, pty_chr_timer, chr);
+    }
+    g_source_set_name_by_id(s->timer_tag, name);
+    g_free(name);
+}
+
+/* Called with chr_write_lock held.  */
+static void pty_chr_update_read_handler_locked(Chardev *chr)
+{
+    PtyChardev *s = PTY_CHARDEV(chr);
+    GPollFD pfd;
+    int rc;
+    QIOChannelFile *fioc = QIO_CHANNEL_FILE(s->ioc);
+
+    pfd.fd = fioc->fd;
+    pfd.events = G_IO_OUT;
+    pfd.revents = 0;
+    do {
+        rc = g_poll(&pfd, 1, 0);
+    } while (rc == -1 && errno == EINTR);
+    assert(rc >= 0);
+
+    if (pfd.revents & G_IO_HUP) {
+        pty_chr_state(chr, 0);
+    } else {
+        pty_chr_state(chr, 1);
+    }
+}
+
+static void pty_chr_update_read_handler(Chardev *chr,
+                                        GMainContext *context)
+{
+    qemu_mutex_lock(&chr->chr_write_lock);
+    pty_chr_update_read_handler_locked(chr);
+    qemu_mutex_unlock(&chr->chr_write_lock);
+}
+
+/* Called with chr_write_lock held.  */
+static int char_pty_chr_write(Chardev *chr, const uint8_t *buf, int len)
+{
+    PtyChardev *s = PTY_CHARDEV(chr);
+
+    if (!s->connected) {
+        /* guest sends data, check for (re-)connect */
+        pty_chr_update_read_handler_locked(chr);
+        if (!s->connected) {
+            return len;
+        }
+    }
+    return io_channel_send(s->ioc, buf, len);
+}
+
+static GSource *pty_chr_add_watch(Chardev *chr, GIOCondition cond)
+{
+    PtyChardev *s = PTY_CHARDEV(chr);
+    if (!s->connected) {
+        return NULL;
+    }
+    return qio_channel_create_watch(s->ioc, cond);
+}
+
+static int pty_chr_read_poll(void *opaque)
+{
+    Chardev *chr = CHARDEV(opaque);
+    PtyChardev *s = PTY_CHARDEV(opaque);
+
+    s->read_bytes = qemu_chr_be_can_write(chr);
+    return s->read_bytes;
+}
+
+static gboolean pty_chr_read(QIOChannel *chan, GIOCondition cond, void *opaque)
+{
+    Chardev *chr = CHARDEV(opaque);
+    PtyChardev *s = PTY_CHARDEV(opaque);
+    gsize len;
+    uint8_t buf[CHR_READ_BUF_LEN];
+    ssize_t ret;
+
+    len = sizeof(buf);
+    if (len > s->read_bytes) {
+        len = s->read_bytes;
+    }
+    if (len == 0) {
+        return TRUE;
+    }
+    ret = qio_channel_read(s->ioc, (char *)buf, len, NULL);
+    if (ret <= 0) {
+        pty_chr_state(chr, 0);
+        return FALSE;
+    } else {
+        pty_chr_state(chr, 1);
+        qemu_chr_be_write(chr, buf, ret);
+    }
+    return TRUE;
+}
+
+static gboolean qemu_chr_be_generic_open_func(gpointer opaque)
+{
+    Chardev *chr = CHARDEV(opaque);
+    PtyChardev *s = PTY_CHARDEV(opaque);
+
+    s->open_tag = 0;
+    qemu_chr_be_generic_open(chr);
+    return FALSE;
+}
+
+/* Called with chr_write_lock held.  */
+static void pty_chr_state(Chardev *chr, int connected)
+{
+    PtyChardev *s = PTY_CHARDEV(chr);
+
+    if (!connected) {
+        if (s->open_tag) {
+            g_source_remove(s->open_tag);
+            s->open_tag = 0;
+        }
+        remove_fd_in_watch(chr, NULL);
+        s->connected = 0;
+        /* (re-)connect poll interval for idle guests: once per second.
+         * We check more frequently in case the guests sends data to
+         * the virtual device linked to our pty. */
+        pty_chr_rearm_timer(chr, 1000);
+    } else {
+        if (s->timer_tag) {
+            g_source_remove(s->timer_tag);
+            s->timer_tag = 0;
+        }
+        if (!s->connected) {
+            g_assert(s->open_tag == 0);
+            s->connected = 1;
+            s->open_tag = g_idle_add(qemu_chr_be_generic_open_func, chr);
+        }
+        if (!chr->fd_in_tag) {
+            chr->fd_in_tag = io_add_watch_poll(chr, s->ioc,
+                                               pty_chr_read_poll,
+                                               pty_chr_read,
+                                               chr, NULL);
+        }
+    }
+}
+
+static void char_pty_finalize(Object *obj)
+{
+    Chardev *chr = CHARDEV(obj);
+    PtyChardev *s = PTY_CHARDEV(obj);
+
+    qemu_mutex_lock(&chr->chr_write_lock);
+    pty_chr_state(chr, 0);
+    object_unref(OBJECT(s->ioc));
+    if (s->timer_tag) {
+        g_source_remove(s->timer_tag);
+        s->timer_tag = 0;
+    }
+    qemu_mutex_unlock(&chr->chr_write_lock);
+    qemu_chr_be_event(chr, CHR_EVENT_CLOSED);
+}
+
+static void char_pty_open(Chardev *chr,
+                          ChardevBackend *backend,
+                          bool *be_opened,
+                          Error **errp)
+{
+    PtyChardev *s;
+    int master_fd, slave_fd;
+    char pty_name[PATH_MAX];
+    char *name;
+
+    master_fd = qemu_openpty_raw(&slave_fd, pty_name);
+    if (master_fd < 0) {
+        error_setg_errno(errp, errno, "Failed to create PTY");
+        return;
+    }
+
+    close(slave_fd);
+    qemu_set_nonblock(master_fd);
+
+    chr->filename = g_strdup_printf("pty:%s", pty_name);
+    error_report("char device redirected to %s (label %s)",
+                 pty_name, chr->label);
+
+    s = PTY_CHARDEV(chr);
+    s->ioc = QIO_CHANNEL(qio_channel_file_new_fd(master_fd));
+    name = g_strdup_printf("chardev-pty-%s", chr->label);
+    qio_channel_set_name(QIO_CHANNEL(s->ioc), name);
+    g_free(name);
+    s->timer_tag = 0;
+    *be_opened = false;
+}
+
+static void char_pty_class_init(ObjectClass *oc, void *data)
+{
+    ChardevClass *cc = CHARDEV_CLASS(oc);
+
+    cc->open = char_pty_open;
+    cc->chr_write = char_pty_chr_write;
+    cc->chr_update_read_handler = pty_chr_update_read_handler;
+    cc->chr_add_watch = pty_chr_add_watch;
+}
+
+static const TypeInfo char_pty_type_info = {
+    .name = TYPE_CHARDEV_PTY,
+    .parent = TYPE_CHARDEV,
+    .instance_size = sizeof(PtyChardev),
+    .instance_finalize = char_pty_finalize,
+    .class_init = char_pty_class_init,
+};
+
+static void register_types(void)
+{
+    type_register_static(&char_pty_type_info);
+}
+
+type_init(register_types);
+
+#endif
--- a/chardev/char-ringbuf.c
+++ b/chardev/char-ringbuf.c
@@ -0,0 +1,249 @@
+/*
+ * QEMU System Emulator
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "qemu/osdep.h"
+#include "sysemu/char.h"
+#include "qmp-commands.h"
+#include "qemu/base64.h"
+
+/* Ring buffer chardev */
+
+typedef struct {
+    Chardev parent;
+    size_t size;
+    size_t prod;
+    size_t cons;
+    uint8_t *cbuf;
+} RingBufChardev;
+
+#define RINGBUF_CHARDEV(obj)                                    \
+    OBJECT_CHECK(RingBufChardev, (obj), TYPE_CHARDEV_RINGBUF)
+
+static size_t ringbuf_count(const Chardev *chr)
+{
+    const RingBufChardev *d = RINGBUF_CHARDEV(chr);
+
+    return d->prod - d->cons;
+}
+
+static int ringbuf_chr_write(Chardev *chr, const uint8_t *buf, int len)
+{
+    RingBufChardev *d = RINGBUF_CHARDEV(chr);
+    int i;
+
+    if (!buf || (len < 0)) {
+        return -1;
+    }
+
+    for (i = 0; i < len; i++) {
+        d->cbuf[d->prod++ & (d->size - 1)] = buf[i];
+        if (d->prod - d->cons > d->size) {
+            d->cons = d->prod - d->size;
+        }
+    }
+
+    return len;
+}
+
+static int ringbuf_chr_read(Chardev *chr, uint8_t *buf, int len)
+{
+    RingBufChardev *d = RINGBUF_CHARDEV(chr);
+    int i;
+
+    qemu_mutex_lock(&chr->chr_write_lock);
+    for (i = 0; i < len && d->cons != d->prod; i++) {
+        buf[i] = d->cbuf[d->cons++ & (d->size - 1)];
+    }
+    qemu_mutex_unlock(&chr->chr_write_lock);
+
+    return i;
+}
+
+static void char_ringbuf_finalize(Object *obj)
+{
+    RingBufChardev *d = RINGBUF_CHARDEV(obj);
+
+    g_free(d->cbuf);
+}
+
+static void qemu_chr_open_ringbuf(Chardev *chr,
+                                  ChardevBackend *backend,
+                                  bool *be_opened,
+                                  Error **errp)
+{
+    ChardevRingbuf *opts = backend->u.ringbuf.data;
+    RingBufChardev *d = RINGBUF_CHARDEV(chr);
+
+    d->size = opts->has_size ? opts->size : 65536;
+
+    /* The size must be power of 2 */
+    if (d->size & (d->size - 1)) {
+        error_setg(errp, "size of ringbuf chardev must be power of two");
+        return;
+    }
+
+    d->prod = 0;
+    d->cons = 0;
+    d->cbuf = g_malloc0(d->size);
+}
+
+void qmp_ringbuf_write(const char *device, const char *data,
+                       bool has_format, enum DataFormat format,
+                       Error **errp)
+{
+    Chardev *chr;
+    const uint8_t *write_data;
+    int ret;
+    gsize write_count;
+
+    chr = qemu_chr_find(device);
+    if (!chr) {
+        error_setg(errp, "Device '%s' not found", device);
+        return;
+    }
+
+    if (!CHARDEV_IS_RINGBUF(chr)) {
+        error_setg(errp, "%s is not a ringbuf device", device);
+        return;
+    }
+
+    if (has_format && (format == DATA_FORMAT_BASE64)) {
+        write_data = qbase64_decode(data, -1,
+                                    &write_count,
+                                    errp);
+        if (!write_data) {
+            return;
+        }
+    } else {
+        write_data = (uint8_t *)data;
+        write_count = strlen(data);
+    }
+
+    ret = ringbuf_chr_write(chr, write_data, write_count);
+
+    if (write_data != (uint8_t *)data) {
+        g_free((void *)write_data);
+    }
+
+    if (ret < 0) {
+        error_setg(errp, "Failed to write to device %s", device);
+        return;
+    }
+}
+
+char *qmp_ringbuf_read(const char *device, int64_t size,
+                       bool has_format, enum DataFormat format,
+                       Error **errp)
+{
+    Chardev *chr;
+    uint8_t *read_data;
+    size_t count;
+    char *data;
+
+    chr = qemu_chr_find(device);
+    if (!chr) {
+        error_setg(errp, "Device '%s' not found", device);
+        return NULL;
+    }
+
+    if (!CHARDEV_IS_RINGBUF(chr)) {
+        error_setg(errp, "%s is not a ringbuf device", device);
+        return NULL;
+    }
+
+    if (size <= 0) {
+        error_setg(errp, "size must be greater than zero");
+        return NULL;
+    }
+
+    count = ringbuf_count(chr);
+    size = size > count ? count : size;
+    read_data = g_malloc(size + 1);
+
+    ringbuf_chr_read(chr, read_data, size);
+
+    if (has_format && (format == DATA_FORMAT_BASE64)) {
+        data = g_base64_encode(read_data, size);
+        g_free(read_data);
+    } else {
+        /*
+         * FIXME should read only complete, valid UTF-8 characters up
+         * to @size bytes.  Invalid sequences should be replaced by a
+         * suitable replacement character.  Except when (and only
+         * when) ring buffer lost characters since last read, initial
+         * continuation characters should be dropped.
+         */
+        read_data[size] = 0;
+        data = (char *)read_data;
+    }
+
+    return data;
+}
+
+static void qemu_chr_parse_ringbuf(QemuOpts *opts, ChardevBackend *backend,
+                                   Error **errp)
+{
+    int val;
+    ChardevRingbuf *ringbuf;
+
+    backend->type = CHARDEV_BACKEND_KIND_RINGBUF;
+    ringbuf = backend->u.ringbuf.data = g_new0(ChardevRingbuf, 1);
+    qemu_chr_parse_common(opts, qapi_ChardevRingbuf_base(ringbuf));
+
+    val = qemu_opt_get_size(opts, "size", 0);
+    if (val != 0) {
+        ringbuf->has_size = true;
+        ringbuf->size = val;
+    }
+}
+
+static void char_ringbuf_class_init(ObjectClass *oc, void *data)
+{
+    ChardevClass *cc = CHARDEV_CLASS(oc);
+
+    cc->parse = qemu_chr_parse_ringbuf;
+    cc->open = qemu_chr_open_ringbuf;
+    cc->chr_write = ringbuf_chr_write;
+}
+
+static const TypeInfo char_ringbuf_type_info = {
+    .name = TYPE_CHARDEV_RINGBUF,
+    .parent = TYPE_CHARDEV,
+    .class_init = char_ringbuf_class_init,
+    .instance_size = sizeof(RingBufChardev),
+    .instance_finalize = char_ringbuf_finalize,
+};
+
+/* Bug-compatibility: */
+static const TypeInfo char_memory_type_info = {
+    .name = TYPE_CHARDEV_MEMORY,
+    .parent = TYPE_CHARDEV_RINGBUF,
+};
+
+static void register_types(void)
+{
+    type_register_static(&char_ringbuf_type_info);
+    type_register_static(&char_memory_type_info);
+}
+
+type_init(register_types);
--- a/chardev/char-serial.c
+++ b/chardev/char-serial.c
@@ -0,0 +1,318 @@
+/*
+ * QEMU System Emulator
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "qemu/osdep.h"
+#include "qemu/sockets.h"
+#include "io/channel-file.h"
+#include "qapi/error.h"
+
+#ifdef _WIN32
+#include "char-win.h"
+#else
+#include <sys/ioctl.h>
+#include <termios.h>
+#include "char-fd.h"
+#endif
+
+#include "char-serial.h"
+
+#ifdef _WIN32
+
+static void qmp_chardev_open_serial(Chardev *chr,
+                                    ChardevBackend *backend,
+                                    bool *be_opened,
+                                    Error **errp)
+{
+    ChardevHostdev *serial = backend->u.serial.data;
+
+    win_chr_init(chr, serial->device, errp);
+}
+
+#elif defined(__linux__) || defined(__sun__) || defined(__FreeBSD__)      \
+    || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) \
+    || defined(__GLIBC__)
+
+static void tty_serial_init(int fd, int speed,
+                            int parity, int data_bits, int stop_bits)
+{
+    struct termios tty;
+    speed_t spd;
+
+#if 0
+    printf("tty_serial_init: speed=%d parity=%c data=%d stop=%d\n",
+           speed, parity, data_bits, stop_bits);
+#endif
+    tcgetattr(fd, &tty);
+
+#define check_speed(val) if (speed <= val) { spd = B##val; break; }
+    speed = speed * 10 / 11;
+    do {
+        check_speed(50);
+        check_speed(75);
+        check_speed(110);
+        check_speed(134);
+        check_speed(150);
+        check_speed(200);
+        check_speed(300);
+        check_speed(600);
+        check_speed(1200);
+        check_speed(1800);
+        check_speed(2400);
+        check_speed(4800);
+        check_speed(9600);
+        check_speed(19200);
+        check_speed(38400);
+        /* Non-Posix values follow. They may be unsupported on some systems. */
+        check_speed(57600);
+        check_speed(115200);
+#ifdef B230400
+        check_speed(230400);
+#endif
+#ifdef B460800
+        check_speed(460800);
+#endif
+#ifdef B500000
+        check_speed(500000);
+#endif
+#ifdef B576000
+        check_speed(576000);
+#endif
+#ifdef B921600
+        check_speed(921600);
+#endif
+#ifdef B1000000
+        check_speed(1000000);
+#endif
+#ifdef B1152000
+        check_speed(1152000);
+#endif
+#ifdef B1500000
+        check_speed(1500000);
+#endif
+#ifdef B2000000
+        check_speed(2000000);
+#endif
+#ifdef B2500000
+        check_speed(2500000);
+#endif
+#ifdef B3000000
+        check_speed(3000000);
+#endif
+#ifdef B3500000
+        check_speed(3500000);
+#endif
+#ifdef B4000000
+        check_speed(4000000);
+#endif
+        spd = B115200;
+    } while (0);
+
+    cfsetispeed(&tty, spd);
+    cfsetospeed(&tty, spd);
+
+    tty.c_iflag &= ~(IGNBRK | BRKINT | PARMRK | ISTRIP
+                     | INLCR | IGNCR | ICRNL | IXON);
+    tty.c_oflag |= OPOST;
+    tty.c_lflag &= ~(ECHO | ECHONL | ICANON | IEXTEN | ISIG);
+    tty.c_cflag &= ~(CSIZE | PARENB | PARODD | CRTSCTS | CSTOPB);
+    switch (data_bits) {
+    default:
+    case 8:
+        tty.c_cflag |= CS8;
+        break;
+    case 7:
+        tty.c_cflag |= CS7;
+        break;
+    case 6:
+        tty.c_cflag |= CS6;
+        break;
+    case 5:
+        tty.c_cflag |= CS5;
+        break;
+    }
+    switch (parity) {
+    default:
+    case 'N':
+        break;
+    case 'E':
+        tty.c_cflag |= PARENB;
+        break;
+    case 'O':
+        tty.c_cflag |= PARENB | PARODD;
+        break;
+    }
+    if (stop_bits == 2) {
+        tty.c_cflag |= CSTOPB;
+    }
+
+    tcsetattr(fd, TCSANOW, &tty);
+}
+
+static int tty_serial_ioctl(Chardev *chr, int cmd, void *arg)
+{
+    FDChardev *s = FD_CHARDEV(chr);
+    QIOChannelFile *fioc = QIO_CHANNEL_FILE(s->ioc_in);
+
+    switch (cmd) {
+    case CHR_IOCTL_SERIAL_SET_PARAMS:
+        {
+            QEMUSerialSetParams *ssp = arg;
+            tty_serial_init(fioc->fd,
+                            ssp->speed, ssp->parity,
+                            ssp->data_bits, ssp->stop_bits);
+        }
+        break;
+    case CHR_IOCTL_SERIAL_SET_BREAK:
+        {
+            int enable = *(int *)arg;
+            if (enable) {
+                tcsendbreak(fioc->fd, 1);
+            }
+        }
+        break;
+    case CHR_IOCTL_SERIAL_GET_TIOCM:
+        {
+            int sarg = 0;
+            int *targ = (int *)arg;
+            ioctl(fioc->fd, TIOCMGET, &sarg);
+            *targ = 0;
+            if (sarg & TIOCM_CTS) {
+                *targ |= CHR_TIOCM_CTS;
+            }
+            if (sarg & TIOCM_CAR) {
+                *targ |= CHR_TIOCM_CAR;
+            }
+            if (sarg & TIOCM_DSR) {
+                *targ |= CHR_TIOCM_DSR;
+            }
+            if (sarg & TIOCM_RI) {
+                *targ |= CHR_TIOCM_RI;
+            }
+            if (sarg & TIOCM_DTR) {
+                *targ |= CHR_TIOCM_DTR;
+            }
+            if (sarg & TIOCM_RTS) {
+                *targ |= CHR_TIOCM_RTS;
+            }
+        }
+        break;
+    case CHR_IOCTL_SERIAL_SET_TIOCM:
+        {
+            int sarg = *(int *)arg;
+            int targ = 0;
+            ioctl(fioc->fd, TIOCMGET, &targ);
+            targ &= ~(CHR_TIOCM_CTS | CHR_TIOCM_CAR | CHR_TIOCM_DSR
+                     | CHR_TIOCM_RI | CHR_TIOCM_DTR | CHR_TIOCM_RTS);
+            if (sarg & CHR_TIOCM_CTS) {
+                targ |= TIOCM_CTS;
+            }
+            if (sarg & CHR_TIOCM_CAR) {
+                targ |= TIOCM_CAR;
+            }
+            if (sarg & CHR_TIOCM_DSR) {
+                targ |= TIOCM_DSR;
+            }
+            if (sarg & CHR_TIOCM_RI) {
+                targ |= TIOCM_RI;
+            }
+            if (sarg & CHR_TIOCM_DTR) {
+                targ |= TIOCM_DTR;
+            }
+            if (sarg & CHR_TIOCM_RTS) {
+                targ |= TIOCM_RTS;
+            }
+            ioctl(fioc->fd, TIOCMSET, &targ);
+        }
+        break;
+    default:
+        return -ENOTSUP;
+    }
+    return 0;
+}
+
+static void qmp_chardev_open_serial(Chardev *chr,
+                                    ChardevBackend *backend,
+                                    bool *be_opened,
+                                    Error **errp)
+{
+    ChardevHostdev *serial = backend->u.serial.data;
+    int fd;
+
+    fd = qmp_chardev_open_file_source(serial->device, O_RDWR, errp);
+    if (fd < 0) {
+        return;
+    }
+    qemu_set_nonblock(fd);
+    tty_serial_init(fd, 115200, 'N', 8, 1);
+
+    qemu_chr_open_fd(chr, fd, fd);
+}
+#endif /* __linux__ || __sun__ */
+
+#ifdef HAVE_CHARDEV_SERIAL
+static void qemu_chr_parse_serial(QemuOpts *opts, ChardevBackend *backend,
+                                  Error **errp)
+{
+    const char *device = qemu_opt_get(opts, "path");
+    ChardevHostdev *serial;
+
+    if (device == NULL) {
+        error_setg(errp, "chardev: serial/tty: no device path given");
+        return;
+    }
+    backend->type = CHARDEV_BACKEND_KIND_SERIAL;
+    serial = backend->u.serial.data = g_new0(ChardevHostdev, 1);
+    qemu_chr_parse_common(opts, qapi_ChardevHostdev_base(serial));
+    serial->device = g_strdup(device);
+}
+
+static void char_serial_class_init(ObjectClass *oc, void *data)
+{
+    ChardevClass *cc = CHARDEV_CLASS(oc);
+
+    cc->parse = qemu_chr_parse_serial;
+    cc->open = qmp_chardev_open_serial;
+#ifndef _WIN32
+    cc->chr_ioctl = tty_serial_ioctl;
+#endif
+}
+
+
+static const TypeInfo char_serial_type_info = {
+    .name = TYPE_CHARDEV_SERIAL,
+#ifdef _WIN32
+    .parent = TYPE_CHARDEV_WIN,
+#else
+    .parent = TYPE_CHARDEV_FD,
+#endif
+    .class_init = char_serial_class_init,
+};
+
+static void register_types(void)
+{
+    type_register_static(&char_serial_type_info);
+}
+
+type_init(register_types);
+
+#endif
--- a/chardev/char-serial.h
+++ b/chardev/char-serial.h
@@ -0,0 +1,35 @@
+/*
+ * QEMU System Emulator
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef CHAR_SERIAL_H
+#define CHAR_SERIAL_H
+
+#ifdef _WIN32
+#define HAVE_CHARDEV_SERIAL 1
+#elif defined(__linux__) || defined(__sun__) || defined(__FreeBSD__)    \
+    || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) \
+    || defined(__GLIBC__)
+#define HAVE_CHARDEV_SERIAL 1
+#endif
+
+#endif
--- a/chardev/char-socket.c
+++ b/chardev/char-socket.c
--- a/chardev/char-stdio.c
+++ b/chardev/char-stdio.c
@@ -0,0 +1,164 @@
+/*
+ * QEMU System Emulator
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "qemu/osdep.h"
+#include "qemu/sockets.h"
+#include "qapi/error.h"
+#include "qemu-common.h"
+#include "sysemu/char.h"
+
+#ifdef _WIN32
+#include "char-win.h"
+#include "char-win-stdio.h"
+#else
+#include <termios.h>
+#include "char-fd.h"
+#endif
+
+#ifndef _WIN32
+/* init terminal so that we can grab keys */
+static struct termios oldtty;
+static int old_fd0_flags;
+static bool stdio_in_use;
+static bool stdio_allow_signal;
+static bool stdio_echo_state;
+
+static void term_exit(void)
+{
+    tcsetattr(0, TCSANOW, &oldtty);
+    fcntl(0, F_SETFL, old_fd0_flags);
+}
+
+static void qemu_chr_set_echo_stdio(Chardev *chr, bool echo)
+{
+    struct termios tty;
+
+    stdio_echo_state = echo;
+    tty = oldtty;
+    if (!echo) {
+        tty.c_iflag &= ~(IGNBRK | BRKINT | PARMRK | ISTRIP
+                         | INLCR | IGNCR | ICRNL | IXON);
+        tty.c_oflag |= OPOST;
+        tty.c_lflag &= ~(ECHO | ECHONL | ICANON | IEXTEN);
+        tty.c_cflag &= ~(CSIZE | PARENB);
+        tty.c_cflag |= CS8;
+        tty.c_cc[VMIN] = 1;
+        tty.c_cc[VTIME] = 0;
+    }
+    if (!stdio_allow_signal) {
+        tty.c_lflag &= ~ISIG;
+    }
+
+    tcsetattr(0, TCSANOW, &tty);
+}
+
+static void term_stdio_handler(int sig)
+{
+    /* restore echo after resume from suspend. */
+    qemu_chr_set_echo_stdio(NULL, stdio_echo_state);
+}
+
+static void qemu_chr_open_stdio(Chardev *chr,
+                                ChardevBackend *backend,
+                                bool *be_opened,
+                                Error **errp)
+{
+    ChardevStdio *opts = backend->u.stdio.data;
+    struct sigaction act;
+
+    if (is_daemonized()) {
+        error_setg(errp, "cannot use stdio with -daemonize");
+        return;
+    }
+
+    if (stdio_in_use) {
+        error_setg(errp, "cannot use stdio by multiple character devices");
+        return;
+    }
+
+    stdio_in_use = true;
+    old_fd0_flags = fcntl(0, F_GETFL);
+    tcgetattr(0, &oldtty);
+    qemu_set_nonblock(0);
+    atexit(term_exit);
+
+    memset(&act, 0, sizeof(act));
+    act.sa_handler = term_stdio_handler;
+    sigaction(SIGCONT, &act, NULL);
+
+    qemu_chr_open_fd(chr, 0, 1);
+
+    if (opts->has_signal) {
+        stdio_allow_signal = opts->signal;
+    }
+    qemu_chr_set_echo_stdio(chr, false);
+}
+#endif
+
+static void qemu_chr_parse_stdio(QemuOpts *opts, ChardevBackend *backend,
+                                 Error **errp)
+{
+    ChardevStdio *stdio;
+
+    backend->type = CHARDEV_BACKEND_KIND_STDIO;
+    stdio = backend->u.stdio.data = g_new0(ChardevStdio, 1);
+    qemu_chr_parse_common(opts, qapi_ChardevStdio_base(stdio));
+    stdio->has_signal = true;
+    stdio->signal = qemu_opt_get_bool(opts, "signal", true);
+}
+
+static void char_stdio_class_init(ObjectClass *oc, void *data)
+{
+    ChardevClass *cc = CHARDEV_CLASS(oc);
+
+    cc->parse = qemu_chr_parse_stdio;
+#ifndef _WIN32
+    cc->open = qemu_chr_open_stdio;
+    cc->chr_set_echo = qemu_chr_set_echo_stdio;
+#endif
+}
+
+static void char_stdio_finalize(Object *obj)
+{
+#ifndef _WIN32
+    term_exit();
+#endif
+}
+
+static const TypeInfo char_stdio_type_info = {
+    .name = TYPE_CHARDEV_STDIO,
+#ifdef _WIN32
+    .parent = TYPE_CHARDEV_WIN_STDIO,
+#else
+    .parent = TYPE_CHARDEV_FD,
+#endif
+    .instance_finalize = char_stdio_finalize,
+    .class_init = char_stdio_class_init,
+};
+
+static void register_types(void)
+{
+    type_register_static(&char_stdio_type_info);
+}
+
+type_init(register_types);
--- a/chardev/char-udp.c
+++ b/chardev/char-udp.c
@@ -0,0 +1,233 @@
+/*
+ * QEMU System Emulator
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "qemu/osdep.h"
+#include "sysemu/char.h"
+#include "io/channel-socket.h"
+#include "qapi/error.h"
+
+#include "char-io.h"
+
+/***********************************************************/
+/* UDP Net console */
+
+typedef struct {
+    Chardev parent;
+    QIOChannel *ioc;
+    uint8_t buf[CHR_READ_BUF_LEN];
+    int bufcnt;
+    int bufptr;
+    int max_size;
+} UdpChardev;
+
+#define UDP_CHARDEV(obj) OBJECT_CHECK(UdpChardev, (obj), TYPE_CHARDEV_UDP)
+
+/* Called with chr_write_lock held.  */
+static int udp_chr_write(Chardev *chr, const uint8_t *buf, int len)
+{
+    UdpChardev *s = UDP_CHARDEV(chr);
+
+    return qio_channel_write(
+        s->ioc, (const char *)buf, len, NULL);
+}
+
+static int udp_chr_read_poll(void *opaque)
+{
+    Chardev *chr = CHARDEV(opaque);
+    UdpChardev *s = UDP_CHARDEV(opaque);
+
+    s->max_size = qemu_chr_be_can_write(chr);
+
+    /* If there were any stray characters in the queue process them
+     * first
+     */
+    while (s->max_size > 0 && s->bufptr < s->bufcnt) {
+        qemu_chr_be_write(chr, &s->buf[s->bufptr], 1);
+        s->bufptr++;
+        s->max_size = qemu_chr_be_can_write(chr);
+    }
+    return s->max_size;
+}
+
+static gboolean udp_chr_read(QIOChannel *chan, GIOCondition cond, void *opaque)
+{
+    Chardev *chr = CHARDEV(opaque);
+    UdpChardev *s = UDP_CHARDEV(opaque);
+    ssize_t ret;
+
+    if (s->max_size == 0) {
+        return TRUE;
+    }
+    ret = qio_channel_read(
+        s->ioc, (char *)s->buf, sizeof(s->buf), NULL);
+    if (ret <= 0) {
+        remove_fd_in_watch(chr, NULL);
+        return FALSE;
+    }
+    s->bufcnt = ret;
+
+    s->bufptr = 0;
+    while (s->max_size > 0 && s->bufptr < s->bufcnt) {
+        qemu_chr_be_write(chr, &s->buf[s->bufptr], 1);
+        s->bufptr++;
+        s->max_size = qemu_chr_be_can_write(chr);
+    }
+
+    return TRUE;
+}
+
+static void udp_chr_update_read_handler(Chardev *chr,
+                                        GMainContext *context)
+{
+    UdpChardev *s = UDP_CHARDEV(chr);
+
+    remove_fd_in_watch(chr, NULL);
+    if (s->ioc) {
+        chr->fd_in_tag = io_add_watch_poll(chr, s->ioc,
+                                           udp_chr_read_poll,
+                                           udp_chr_read, chr,
+                                           context);
+    }
+}
+
+static void char_udp_finalize(Object *obj)
+{
+    Chardev *chr = CHARDEV(obj);
+    UdpChardev *s = UDP_CHARDEV(obj);
+
+    remove_fd_in_watch(chr, NULL);
+    if (s->ioc) {
+        object_unref(OBJECT(s->ioc));
+    }
+    qemu_chr_be_event(chr, CHR_EVENT_CLOSED);
+}
+
+static void qemu_chr_parse_udp(QemuOpts *opts, ChardevBackend *backend,
+                               Error **errp)
+{
+    const char *host = qemu_opt_get(opts, "host");
+    const char *port = qemu_opt_get(opts, "port");
+    const char *localaddr = qemu_opt_get(opts, "localaddr");
+    const char *localport = qemu_opt_get(opts, "localport");
+    bool has_local = false;
+    SocketAddress *addr;
+    ChardevUdp *udp;
+
+    backend->type = CHARDEV_BACKEND_KIND_UDP;
+    if (host == NULL || strlen(host) == 0) {
+        host = "localhost";
+    }
+    if (port == NULL || strlen(port) == 0) {
+        error_setg(errp, "chardev: udp: remote port not specified");
+        return;
+    }
+    if (localport == NULL || strlen(localport) == 0) {
+        localport = "0";
+    } else {
+        has_local = true;
+    }
+    if (localaddr == NULL || strlen(localaddr) == 0) {
+        localaddr = "";
+    } else {
+        has_local = true;
+    }
+
+    udp = backend->u.udp.data = g_new0(ChardevUdp, 1);
+    qemu_chr_parse_common(opts, qapi_ChardevUdp_base(udp));
+
+    addr = g_new0(SocketAddress, 1);
+    addr->type = SOCKET_ADDRESS_KIND_INET;
+    addr->u.inet.data = g_new(InetSocketAddress, 1);
+    *addr->u.inet.data = (InetSocketAddress) {
+        .host = g_strdup(host),
+        .port = g_strdup(port),
+        .has_ipv4 = qemu_opt_get(opts, "ipv4"),
+        .ipv4 = qemu_opt_get_bool(opts, "ipv4", 0),
+        .has_ipv6 = qemu_opt_get(opts, "ipv6"),
+        .ipv6 = qemu_opt_get_bool(opts, "ipv6", 0),
+    };
+    udp->remote = addr;
+
+    if (has_local) {
+        udp->has_local = true;
+        addr = g_new0(SocketAddress, 1);
+        addr->type = SOCKET_ADDRESS_KIND_INET;
+        addr->u.inet.data = g_new(InetSocketAddress, 1);
+        *addr->u.inet.data = (InetSocketAddress) {
+            .host = g_strdup(localaddr),
+            .port = g_strdup(localport),
+        };
+        udp->local = addr;
+    }
+}
+
+static void qmp_chardev_open_udp(Chardev *chr,
+                                 ChardevBackend *backend,
+                                 bool *be_opened,
+                                 Error **errp)
+{
+    ChardevUdp *udp = backend->u.udp.data;
+    QIOChannelSocket *sioc = qio_channel_socket_new();
+    char *name;
+    UdpChardev *s = UDP_CHARDEV(chr);
+
+    if (qio_channel_socket_dgram_sync(sioc,
+                                      udp->local, udp->remote,
+                                      errp) < 0) {
+        object_unref(OBJECT(sioc));
+        return;
+    }
+
+    name = g_strdup_printf("chardev-udp-%s", chr->label);
+    qio_channel_set_name(QIO_CHANNEL(sioc), name);
+    g_free(name);
+
+    s->ioc = QIO_CHANNEL(sioc);
+    /* be isn't opened until we get a connection */
+    *be_opened = false;
+}
+
+static void char_udp_class_init(ObjectClass *oc, void *data)
+{
+    ChardevClass *cc = CHARDEV_CLASS(oc);
+
+    cc->parse = qemu_chr_parse_udp;
+    cc->open = qmp_chardev_open_udp;
+    cc->chr_write = udp_chr_write;
+    cc->chr_update_read_handler = udp_chr_update_read_handler;
+}
+
+static const TypeInfo char_udp_type_info = {
+    .name = TYPE_CHARDEV_UDP,
+    .parent = TYPE_CHARDEV,
+    .instance_size = sizeof(UdpChardev),
+    .instance_finalize = char_udp_finalize,
+    .class_init = char_udp_class_init,
+};
+
+static void register_types(void)
+{
+    type_register_static(&char_udp_type_info);
+}
+
+type_init(register_types);
--- a/chardev/char-win-stdio.c
+++ b/chardev/char-win-stdio.c
@@ -0,0 +1,266 @@
+/*
+ * QEMU System Emulator
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "char-win.h"
+#include "char-win-stdio.h"
+
+typedef struct {
+    Chardev parent;
+    HANDLE  hStdIn;
+    HANDLE  hInputReadyEvent;
+    HANDLE  hInputDoneEvent;
+    HANDLE  hInputThread;
+    uint8_t win_stdio_buf;
+} WinStdioChardev;
+
+#define WIN_STDIO_CHARDEV(obj)                                          \
+    OBJECT_CHECK(WinStdioChardev, (obj), TYPE_CHARDEV_WIN_STDIO)
+
+static void win_stdio_wait_func(void *opaque)
+{
+    Chardev *chr = CHARDEV(opaque);
+    WinStdioChardev *stdio = WIN_STDIO_CHARDEV(opaque);
+    INPUT_RECORD       buf[4];
+    int                ret;
+    DWORD              dwSize;
+    int                i;
+
+    ret = ReadConsoleInput(stdio->hStdIn, buf, ARRAY_SIZE(buf), &dwSize);
+
+    if (!ret) {
+        /* Avoid error storm */
+        qemu_del_wait_object(stdio->hStdIn, NULL, NULL);
+        return;
+    }
+
+    for (i = 0; i < dwSize; i++) {
+        KEY_EVENT_RECORD *kev = &buf[i].Event.KeyEvent;
+
+        if (buf[i].EventType == KEY_EVENT && kev->bKeyDown) {
+            int j;
+            if (kev->uChar.AsciiChar != 0) {
+                for (j = 0; j < kev->wRepeatCount; j++) {
+                    if (qemu_chr_be_can_write(chr)) {
+                        uint8_t c = kev->uChar.AsciiChar;
+                        qemu_chr_be_write(chr, &c, 1);
+                    }
+                }
+            }
+        }
+    }
+}
+
+static DWORD WINAPI win_stdio_thread(LPVOID param)
+{
+    WinStdioChardev *stdio = WIN_STDIO_CHARDEV(param);
+    int                ret;
+    DWORD              dwSize;
+
+    while (1) {
+
+        /* Wait for one byte */
+        ret = ReadFile(stdio->hStdIn, &stdio->win_stdio_buf, 1, &dwSize, NULL);
+
+        /* Exit in case of error, continue if nothing read */
+        if (!ret) {
+            break;
+        }
+        if (!dwSize) {
+            continue;
+        }
+
+        /* Some terminal emulator returns \r\n for Enter, just pass \n */
+        if (stdio->win_stdio_buf == '\r') {
+            continue;
+        }
+
+        /* Signal the main thread and wait until the byte was eaten */
+        if (!SetEvent(stdio->hInputReadyEvent)) {
+            break;
+        }
+        if (WaitForSingleObject(stdio->hInputDoneEvent, INFINITE)
+            != WAIT_OBJECT_0) {
+            break;
+        }
+    }
+
+    qemu_del_wait_object(stdio->hInputReadyEvent, NULL, NULL);
+    return 0;
+}
+
+static void win_stdio_thread_wait_func(void *opaque)
+{
+    Chardev *chr = CHARDEV(opaque);
+    WinStdioChardev *stdio = WIN_STDIO_CHARDEV(opaque);
+
+    if (qemu_chr_be_can_write(chr)) {
+        qemu_chr_be_write(chr, &stdio->win_stdio_buf, 1);
+    }
+
+    SetEvent(stdio->hInputDoneEvent);
+}
+
+static void qemu_chr_set_echo_win_stdio(Chardev *chr, bool echo)
+{
+    WinStdioChardev *stdio = WIN_STDIO_CHARDEV(chr);
+    DWORD              dwMode = 0;
+
+    GetConsoleMode(stdio->hStdIn, &dwMode);
+
+    if (echo) {
+        SetConsoleMode(stdio->hStdIn, dwMode | ENABLE_ECHO_INPUT);
+    } else {
+        SetConsoleMode(stdio->hStdIn, dwMode & ~ENABLE_ECHO_INPUT);
+    }
+}
+
+static void qemu_chr_open_stdio(Chardev *chr,
+                                ChardevBackend *backend,
+                                bool *be_opened,
+                                Error **errp)
+{
+    WinStdioChardev *stdio = WIN_STDIO_CHARDEV(chr);
+    DWORD              dwMode;
+    int                is_console = 0;
+
+    stdio->hStdIn = GetStdHandle(STD_INPUT_HANDLE);
+    if (stdio->hStdIn == INVALID_HANDLE_VALUE) {
+        error_setg(errp, "cannot open stdio: invalid handle");
+        return;
+    }
+
+    is_console = GetConsoleMode(stdio->hStdIn, &dwMode) != 0;
+
+    if (is_console) {
+        if (qemu_add_wait_object(stdio->hStdIn,
+                                 win_stdio_wait_func, chr)) {
+            error_setg(errp, "qemu_add_wait_object: failed");
+            goto err1;
+        }
+    } else {
+        DWORD   dwId;
+
+        stdio->hInputReadyEvent = CreateEvent(NULL, FALSE, FALSE, NULL);
+        stdio->hInputDoneEvent  = CreateEvent(NULL, FALSE, FALSE, NULL);
+        if (stdio->hInputReadyEvent == INVALID_HANDLE_VALUE
+            || stdio->hInputDoneEvent == INVALID_HANDLE_VALUE) {
+            error_setg(errp, "cannot create event");
+            goto err2;
+        }
+        if (qemu_add_wait_object(stdio->hInputReadyEvent,
+                                 win_stdio_thread_wait_func, chr)) {
+            error_setg(errp, "qemu_add_wait_object: failed");
+            goto err2;
+        }
+        stdio->hInputThread     = CreateThread(NULL, 0, win_stdio_thread,
+                                               chr, 0, &dwId);
+
+        if (stdio->hInputThread == INVALID_HANDLE_VALUE) {
+            error_setg(errp, "cannot create stdio thread");
+            goto err3;
+        }
+    }
+
+    dwMode |= ENABLE_LINE_INPUT;
+
+    if (is_console) {
+        /* set the terminal in raw mode */
+        /* ENABLE_QUICK_EDIT_MODE | ENABLE_EXTENDED_FLAGS */
+        dwMode |= ENABLE_PROCESSED_INPUT;
+    }
+
+    SetConsoleMode(stdio->hStdIn, dwMode);
+
+    qemu_chr_set_echo_win_stdio(chr, false);
+
+    return;
+
+err3:
+    qemu_del_wait_object(stdio->hInputReadyEvent, NULL, NULL);
+err2:
+    CloseHandle(stdio->hInputReadyEvent);
+    CloseHandle(stdio->hInputDoneEvent);
+err1:
+    qemu_del_wait_object(stdio->hStdIn, NULL, NULL);
+}
+
+static void char_win_stdio_finalize(Object *obj)
+{
+    WinStdioChardev *stdio = WIN_STDIO_CHARDEV(obj);
+
+    if (stdio->hInputReadyEvent != INVALID_HANDLE_VALUE) {
+        CloseHandle(stdio->hInputReadyEvent);
+    }
+    if (stdio->hInputDoneEvent != INVALID_HANDLE_VALUE) {
+        CloseHandle(stdio->hInputDoneEvent);
+    }
+    if (stdio->hInputThread != INVALID_HANDLE_VALUE) {
+        TerminateThread(stdio->hInputThread, 0);
+    }
+}
+
+static int win_stdio_write(Chardev *chr, const uint8_t *buf, int len)
+{
+    HANDLE  hStdOut = GetStdHandle(STD_OUTPUT_HANDLE);
+    DWORD   dwSize;
+    int     len1;
+
+    len1 = len;
+
+    while (len1 > 0) {
+        if (!WriteFile(hStdOut, buf, len1, &dwSize, NULL)) {
+            break;
+        }
+        buf  += dwSize;
+        len1 -= dwSize;
+    }
+
+    return len - len1;
+}
+
+static void char_win_stdio_class_init(ObjectClass *oc, void *data)
+{
+    ChardevClass *cc = CHARDEV_CLASS(oc);
+
+    cc->open = qemu_chr_open_stdio;
+    cc->chr_write = win_stdio_write;
+    cc->chr_set_echo = qemu_chr_set_echo_win_stdio;
+}
+
+static const TypeInfo char_win_stdio_type_info = {
+    .name = TYPE_CHARDEV_WIN_STDIO,
+    .parent = TYPE_CHARDEV,
+    .instance_size = sizeof(WinStdioChardev),
+    .instance_finalize = char_win_stdio_finalize,
+    .class_init = char_win_stdio_class_init,
+    .abstract = true,
+};
+
+static void register_types(void)
+{
+    type_register_static(&char_win_stdio_type_info);
+}
+
+type_init(register_types);
--- a/chardev/char-win-stdio.h
+++ b/chardev/char-win-stdio.h
@@ -0,0 +1,29 @@
+/*
+ * QEMU System Emulator
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef CHAR_WIN_STDIO_H
+#define CHAR_WIN_STDIO_H
+
+#define TYPE_CHARDEV_WIN_STDIO "chardev-win-stdio"
+
+#endif /* CHAR_WIN_STDIO_H */
--- a/Show More
+++ b/Show More
@@ -1 +1 @@
 .8.50
 .8.93