SPARC64: add icount support

This patch adds gen_io_start()/gen_io_end() to various instructions as required in order to boot my OpenBIOS test images on qemu-system-sparc64 with icount enabled. Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
hw/sparc/sun4m: Fix problems with device introspection
2018-06-17 11:13:06 +01:00 · 2018-06-17 11:12:53 +01:00 · 2018-06-17 11:12:41 +01:00 · 2018-06-15 18:13:35 +01:00 · 2018-06-15 17:28:37 +01:00 · 2018-06-15 16:30:27 +01:00
1324 changed files with 62960 additions and 36259 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -36,6 +36,7 @@
 /qapi/qapi-commands-common.[ch]
 /qapi/qapi-commands-crypto.[ch]
 /qapi/qapi-commands-introspect.[ch]
+/qapi/qapi-commands-job.[ch]
 /qapi/qapi-commands-migration.[ch]
 /qapi/qapi-commands-misc.[ch]
 /qapi/qapi-commands-net.[ch]
@@ -53,6 +54,7 @@
 /qapi/qapi-events-common.[ch]
 /qapi/qapi-events-crypto.[ch]
 /qapi/qapi-events-introspect.[ch]
+/qapi/qapi-events-job.[ch]
 /qapi/qapi-events-migration.[ch]
 /qapi/qapi-events-misc.[ch]
 /qapi/qapi-events-net.[ch]
@@ -71,6 +73,7 @@
 /qapi/qapi-types-common.[ch]
 /qapi/qapi-types-crypto.[ch]
 /qapi/qapi-types-introspect.[ch]
+/qapi/qapi-types-job.[ch]
 /qapi/qapi-types-migration.[ch]
 /qapi/qapi-types-misc.[ch]
 /qapi/qapi-types-net.[ch]
@@ -88,6 +91,7 @@
 /qapi/qapi-visit-common.[ch]
 /qapi/qapi-visit-crypto.[ch]
 /qapi/qapi-visit-introspect.[ch]
+/qapi/qapi-visit-job.[ch]
 /qapi/qapi-visit-migration.[ch]
 /qapi/qapi-visit-misc.[ch]
 /qapi/qapi-visit-net.[ch]
@@ -206,3 +210,4 @@ trace-dtrace-root.h
 trace-dtrace-root.dtrace
 trace-ust-all.h
 trace-ust-all.c
+/target/arm/decode-sve.inc.c
--- a/.shippable.yml
+++ b/.shippable.yml
@@ -35,13 +35,5 @@ build:
    options: "-e HOME=/root"
  ci:
    - unset CC
-    # some targets require newer up to date packages, for example TARGET_LIST matching
-    # aarch64*-softmmu|arm*-softmmu|ppc*-softmmu|microblaze*-softmmu|mips64el-softmmu)
-    # see the configure script:
-    #    error_exit "DTC (libfdt) version >= 1.4.2 not present. Your options:"
-    #    "  (1) Preferred: Install the DTC (libfdt) devel package"
-    #    "  (2) Fetch the DTC submodule, using:"
-    #    "      git submodule update --init dtc"
-    - dpkg --compare-versions `dpkg-query --showformat='${Version}' --show libfdt-dev` ge 1.4.2 || git submodule update --init dtc
    - ./configure ${QEMU_CONFIGURE_OPTS} --target-list=${TARGET_LIST}
    - make -j$(($(getconf _NPROCESSORS_ONLN) + 1))
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,4 +1,8 @@
+# The current Travis default is a container based 14.04 Trust on EC2
+# Additional builds with specific requirements for a full VM need to
+# be added as additional matrix: entries later on
 sudo: false
+dist: trusty
 language: c
 python:
  - "2.6"
@@ -65,28 +69,31 @@ before_install:
  - wget -O - http://people.linaro.org/~alex.bennee/qemu-submodule-git-seed.tar.xz | tar -xvJ
  - git submodule update --init --recursive
 before_script:
-  - ./configure ${CONFIG}
+  - ./configure ${CONFIG} || { cat config.log && exit 1; }
 script:
  - make ${MAKEFLAGS} && ${TEST_CMD}
 matrix:
  include:
-    # Test with CLang for compile portability
-    - env: CONFIG=""
+    # Test with Clang for compile portability (Travis uses clang-5.0)
+    - env: CONFIG="--disable-system"
+      compiler: clang
+    - env: CONFIG="--disable-user"
      compiler: clang
    # gprof/gcov are GCC features
-    - env: CONFIG="--enable-gprof --enable-gcov --disable-pie"
+    - env: CONFIG="--enable-gprof --enable-gcov --disable-pie --target-list=aarch64-softmmu,arm-softmmu,i386-softmmu,mips-softmmu,mips64-softmmu,ppc64-softmmu,riscv64-softmmu,s390x-softmmu,x86_64-softmmu"
      compiler: gcc
    # We manually include builds which we disable "make check" for
    - env: CONFIG="--enable-debug --enable-tcg-interpreter"
           TEST_CMD=""
      compiler: gcc
-    - env: CONFIG="--enable-trace-backends=simple"
+    # We don't need to exercise every backend with every front-end
+    - env: CONFIG="--enable-trace-backends=log,simple,syslog --disable-system"
           TEST_CMD=""
      compiler: gcc
-    - env: CONFIG="--enable-trace-backends=ftrace"
+    - env: CONFIG="--enable-trace-backends=ftrace --target-list=x86_64-softmmu"
           TEST_CMD=""
      compiler: gcc
-    - env: CONFIG="--enable-trace-backends=ust"
+    - env: CONFIG="--enable-trace-backends=ust --target-list=x86_64-softmmu"
           TEST_CMD=""
      compiler: gcc
    - env: CONFIG="--disable-tcg"
@@ -95,80 +102,24 @@ matrix:
    - env: CONFIG=""
      os: osx
      compiler: clang
-    # Plain Trusty System Build
-    - env: CONFIG="--disable-linux-user"
-      sudo: required
-      addons:
-      dist: trusty
-      compiler: gcc
-      before_install:
-        - sudo apt-get update -qq
-        - sudo apt-get build-dep -qq qemu
-        - wget -O - http://people.linaro.org/~alex.bennee/qemu-submodule-git-seed.tar.xz | tar -xvJ
-        - git submodule update --init --recursive
-    # Plain Trusty Linux User Build
-    - env: CONFIG="--disable-system"
-      sudo: required
-      addons:
-      dist: trusty
-      compiler: gcc
-      before_install:
-        - sudo apt-get update -qq
-        - sudo apt-get build-dep -qq qemu
-        - wget -O - http://people.linaro.org/~alex.bennee/qemu-submodule-git-seed.tar.xz | tar -xvJ
-        - git submodule update --init --recursive
-    # Trusty System build with latest stable clang & python 3.0
-    - sudo: required
-      addons:
-      dist: trusty
-      language: generic
-      compiler: none
+    # Python builds
+    - env: CONFIG="--target-list=x86_64-softmmu"
      python:
        - "3.0"
-      env:
-        - COMPILER_NAME=clang CXX=clang++-3.9 CC=clang-3.9
-        - CONFIG="--disable-linux-user --cc=clang-3.9 --cxx=clang++-3.9 --python=/usr/bin/python3"
-      before_install:
-        - wget -nv -O - http://llvm.org/apt/llvm-snapshot.gpg.key | sudo apt-key add -
-        - sudo apt-add-repository -y 'deb http://llvm.org/apt/trusty llvm-toolchain-trusty-3.9 main'
-        - sudo apt-get update -qq
-        - sudo apt-get install -qq -y clang-3.9
-        - sudo apt-get build-dep -qq qemu
-        - wget -O - http://people.linaro.org/~alex.bennee/qemu-submodule-git-seed.tar.xz | tar -xvJ
-        - git submodule update --init --recursive
-      before_script:
-        - ./configure ${CONFIG} || cat config.log
-    # Trusty Linux User build with latest stable clang & python 3.6
-    - sudo: required
-      addons:
-      dist: trusty
-      language: generic
-      compiler: none
+    - env: CONFIG="--target-list=x86_64-softmmu"
      python:
        - "3.6"
-      env:
-        - COMPILER_NAME=clang CXX=clang++-3.9 CC=clang-3.9
-        - CONFIG="--disable-system --cc=clang-3.9 --cxx=clang++-3.9 --python=/usr/bin/python3"
-      before_install:
-        - wget -nv -O - http://llvm.org/apt/llvm-snapshot.gpg.key | sudo apt-key add -
-        - sudo apt-add-repository -y 'deb http://llvm.org/apt/trusty llvm-toolchain-trusty-3.9 main'
-        - sudo apt-get update -qq
-        - sudo apt-get install -qq -y clang-3.9
-        - sudo apt-get build-dep -qq qemu
-        - wget -O - http://people.linaro.org/~alex.bennee/qemu-submodule-git-seed.tar.xz | tar -xvJ
-        - git submodule update --init --recursive
-      before_script:
-        - ./configure ${CONFIG} || cat config.log
    # Using newer GCC with sanitizers
    - addons:
        apt:
+          update: true
          sources:
            # PPAs for newer toolchains
            - ubuntu-toolchain-r-test
          packages:
            # Extra toolchains
-            - gcc-5
-            - g++-5
+            - gcc-7
+            - g++-7
            # Build dependencies
            - libaio-dev
            - libattr1-dev
@@ -197,8 +148,8 @@ matrix:
      language: generic
      compiler: none
      env:
-        - COMPILER_NAME=gcc CXX=g++-5 CC=gcc-5
-        - CONFIG="--cc=gcc-5 --cxx=g++-5 --disable-pie --disable-linux-user"
+        - COMPILER_NAME=gcc CXX=g++-7 CC=gcc-7
+        - CONFIG="--cc=gcc-7 --cxx=g++-7 --disable-pie --disable-linux-user"
        - TEST_CMD=""
      before_script:
-        - ./configure ${CONFIG} --extra-cflags="-g3 -O0 -fsanitize=thread -fuse-ld=gold" || cat config.log
+        - ./configure ${CONFIG} --extra-cflags="-g3 -O0 -fsanitize=thread -fuse-ld=gold" || { cat config.log && exit 1; }
--- a/17
+++ b/17
@@ -124,6 +124,23 @@ We use traditional C-style /* */ comments and avoid // comments.
 Rationale: The // form is valid in C99, so this is purely a matter of
 consistency of style. The checkpatch script will warn you about this.

+Multiline comment blocks should have a row of stars on the left,
+and the initial /* and terminating */ both on their own lines:
+    /*
+     * like
+     * this
+     */
+This is the same format required by the Linux kernel coding style.
+
+(Some of the existing comments in the codebase use the GNU Coding
+Standards form which does not have stars on the left, or other
+variations; avoid these when writing new comments, but don't worry
+about converting to the preferred form unless you're editing that
+comment anyway.)
+
+Rationale: Consistency, and ease of visually picking out a multiline
+comment from the surrounding code.
+
 8. trace-events style

 8.1 0x prefix
--- a/9
+++ b/9
@@ -118,6 +118,15 @@ Please note that g_malloc will exit on allocation failure, so there
 is no need to test for failure (as you would have to with malloc).
 Calling g_malloc with a zero size is valid and will return NULL.

+Prefer g_new(T, n) instead of g_malloc(sizeof(T) * n) for the following
+reasons:
+
+  a. It catches multiplication overflowing size_t;
+  b. It returns T * instead of void *, letting compiler catch more type
+     errors.
+
+Declarations like T *v = g_malloc(sizeof(*v)) are acceptable, though.
+
 Memory allocated by qemu_memalign or qemu_blockalign must be freed with
 qemu_vfree, since breaking this will cause problems on Win32.

--- a/59
+++ b/59
@@ -447,6 +447,8 @@ F: hw/timer/cmsdk-apb-timer.c
 F: include/hw/timer/cmsdk-apb-timer.h
 F: hw/char/cmsdk-apb-uart.c
 F: include/hw/char/cmsdk-apb-uart.h
+F: hw/misc/tz-ppc.c
+F: include/hw/misc/tz-ppc.h

 ARM cores
 M: Peter Maydell <peter.maydell@linaro.org>
@@ -515,8 +517,11 @@ M: Peter Maydell <peter.maydell@linaro.org>
 L: qemu-arm@nongnu.org
 S: Maintained
 F: hw/arm/mps2.c
-F: hw/misc/mps2-scc.c
-F: include/hw/misc/mps2-scc.h
+F: hw/arm/mps2-tz.c
+F: hw/misc/mps2-*.c
+F: include/hw/misc/mps2-*.h
+F: hw/arm/iotkit.c
+F: include/hw/arm/iotkit.h

 Musicpal
 M: Jan Kiszka <jan.kiszka@web.de>
@@ -761,8 +766,11 @@ F: hw/ppc/mac_newworld.c
 F: hw/pci-host/uninorth.c
 F: hw/pci-bridge/dec.[hc]
 F: hw/misc/macio/
-F: include/hw/ppc/mac_dbdma.h
+F: hw/misc/mos6522.c
 F: hw/nvram/mac_nvram.c
+F: include/hw/misc/macio/
+F: include/hw/misc/mos6522.h
+F: include/hw/ppc/mac_dbdma.h

 Old World
 M: Alexander Graf <agraf@suse.de>
@@ -909,7 +917,7 @@ X86 Machines
 ------------
 PC
 M: Michael S. Tsirkin <mst@redhat.com>
-M: Marcel Apfelbaum <marcel@redhat.com>
+M: Marcel Apfelbaum <marcel.apfelbaum@gmail.com>
 S: Supported
 F: include/hw/i386/
 F: hw/i386/
@@ -959,7 +967,7 @@ F: include/hw/timer/mc146818rtc*

 Machine core
 M: Eduardo Habkost <ehabkost@redhat.com>
-M: Marcel Apfelbaum <marcel@redhat.com>
+M: Marcel Apfelbaum <marcel.apfelbaum@gmail.com>
 S: Supported
 F: hw/core/machine.c
 F: hw/core/null-machine.c
@@ -998,6 +1006,7 @@ F: hw/block/cdrom.c
 F: hw/block/hd-geometry.c
 F: tests/ide-test.c
 F: tests/ahci-test.c
+F: tests/cdrom-test.c
 F: tests/libqos/ahci*
 T: git git://github.com/jnsnow/qemu.git ide

@@ -1033,7 +1042,7 @@ F: hw/ipack/

 PCI
 M: Michael S. Tsirkin <mst@redhat.com>
-M: Marcel Apfelbaum <marcel@redhat.com>
+M: Marcel Apfelbaum <marcel.apfelbaum@gmail.com>
 S: Supported
 F: include/hw/pci/*
 F: hw/misc/pci-testdev.c
@@ -1314,6 +1323,27 @@ S: Maintained
 F: include/hw/misc/unimp.h
 F: hw/misc/unimp.c

+Standard VGA
+M: Gerd Hoffmann <kraxel@redhat.com>
+S: Maintained
+F: hw/display/vga*
+F: hw/display/bochs-display.c
+F: include/hw/display/vga.h
+F: include/hw/display/bochs-vbe.h
+
+virtio-gpu
+M: Gerd Hoffmann <kraxel@redhat.com>
+S: Maintained
+F: hw/display/virtio-gpu*
+F: hw/display/virtio-vga.c
+F: include/hw/virtio/virtio-gpu.h
+
+Cirrus VGA
+M: Gerd Hoffmann <kraxel@redhat.com>
+S: Odd Fixes
+W: https://www.kraxel.org/blog/2014/10/qemu-using-cirrus-considered-harmful/
+F: hw/display/cirrus*
+
 Subsystems
 ----------
 Audio
@@ -1339,6 +1369,8 @@ F: qemu-img*
 F: qemu-io*
 F: tests/qemu-iotests/
 F: util/qemu-progress.c
+F: qobject/block-qdict.c
+F: test/check-block-qdict.c
 T: git git://repo.or.cz/qemu/kevin.git block

 Block I/O path
@@ -1369,10 +1401,14 @@ L: qemu-block@nongnu.org
 S: Supported
 F: blockjob.c
 F: include/block/blockjob.h
+F: job.c
+F: job-qmp.c
+F: include/block/job.h
 F: block/backup.c
 F: block/commit.c
 F: block/stream.c
 F: block/mirror.c
+F: qapi/job.json
 T: git git://github.com/codyprime/qemu-kvm-jtc.git block

 Block QAPI, monitor, command line
@@ -1645,6 +1681,7 @@ S: Maintained
 F: slirp/
 F: net/slirp.c
 F: include/net/slirp.h
+T: git https://people.debian.org/~sthibault/qemu.git slirp
 T: git git://git.kiszka.org/qemu.git queues/slirp

 Stubs
@@ -1656,6 +1693,8 @@ Tracing
 M: Stefan Hajnoczi <stefanha@redhat.com>
 S: Maintained
 F: trace/
+F: trace-events
+F: qemu-option-trace.texi
 F: scripts/tracetool.py
 F: scripts/tracetool/
 F: docs/devel/tracing.txt
@@ -1781,6 +1820,12 @@ F: include/sysemu/replay.h
 F: docs/replay.txt
 F: stubs/replay.c

+IOVA Tree
+M: Peter Xu <peterx@redhat.com>
+S: Maintained
+F: include/qemu/iova-tree.h
+F: util/iova-tree.c
+
 Usermode Emulation
 ------------------
 Overall
@@ -2075,7 +2120,7 @@ F: docs/block-replication.txt

 PVRDMA
 M: Yuval Shaia <yuval.shaia@oracle.com>
-M: Marcel Apfelbaum <marcel@redhat.com>
+M: Marcel Apfelbaum <marcel.apfelbaum@gmail.com>
 S: Maintained
 F: hw/rdma/*
 F: hw/rdma/vmw/*
--- a/38
+++ b/38
@@ -62,8 +62,8 @@ seems to have been used for an in-tree build. You can fix this by running \
 endif
 endif

-CONFIG_SOFTMMU := $(if $(filter %-softmmu,$(TARGET_DIRS)),y)
-CONFIG_USER_ONLY := $(if $(filter %-user,$(TARGET_DIRS)),y)
+CONFIG_SOFTMMU := $(if $(filter %-softmmu,$(TARGET_LIST)),y)
+CONFIG_USER_ONLY := $(if $(filter %-user,$(TARGET_LIST)),y)
 CONFIG_XEN := $(CONFIG_XEN_BACKEND)
 CONFIG_ALL=y
 -include config-all-devices.mak
@@ -98,6 +98,7 @@ GENERATED_FILES += qapi/qapi-types-char.h qapi/qapi-types-char.c
 GENERATED_FILES += qapi/qapi-types-common.h qapi/qapi-types-common.c
 GENERATED_FILES += qapi/qapi-types-crypto.h qapi/qapi-types-crypto.c
 GENERATED_FILES += qapi/qapi-types-introspect.h qapi/qapi-types-introspect.c
+GENERATED_FILES += qapi/qapi-types-job.h qapi/qapi-types-job.c
 GENERATED_FILES += qapi/qapi-types-migration.h qapi/qapi-types-migration.c
 GENERATED_FILES += qapi/qapi-types-misc.h qapi/qapi-types-misc.c
 GENERATED_FILES += qapi/qapi-types-net.h qapi/qapi-types-net.c
@@ -116,6 +117,7 @@ GENERATED_FILES += qapi/qapi-visit-char.h qapi/qapi-visit-char.c
 GENERATED_FILES += qapi/qapi-visit-common.h qapi/qapi-visit-common.c
 GENERATED_FILES += qapi/qapi-visit-crypto.h qapi/qapi-visit-crypto.c
 GENERATED_FILES += qapi/qapi-visit-introspect.h qapi/qapi-visit-introspect.c
+GENERATED_FILES += qapi/qapi-visit-job.h qapi/qapi-visit-job.c
 GENERATED_FILES += qapi/qapi-visit-migration.h qapi/qapi-visit-migration.c
 GENERATED_FILES += qapi/qapi-visit-misc.h qapi/qapi-visit-misc.c
 GENERATED_FILES += qapi/qapi-visit-net.h qapi/qapi-visit-net.c
@@ -133,6 +135,7 @@ GENERATED_FILES += qapi/qapi-commands-char.h qapi/qapi-commands-char.c
 GENERATED_FILES += qapi/qapi-commands-common.h qapi/qapi-commands-common.c
 GENERATED_FILES += qapi/qapi-commands-crypto.h qapi/qapi-commands-crypto.c
 GENERATED_FILES += qapi/qapi-commands-introspect.h qapi/qapi-commands-introspect.c
+GENERATED_FILES += qapi/qapi-commands-job.h qapi/qapi-commands-job.c
 GENERATED_FILES += qapi/qapi-commands-migration.h qapi/qapi-commands-migration.c
 GENERATED_FILES += qapi/qapi-commands-misc.h qapi/qapi-commands-misc.c
 GENERATED_FILES += qapi/qapi-commands-net.h qapi/qapi-commands-net.c
@@ -150,6 +153,7 @@ GENERATED_FILES += qapi/qapi-events-char.h qapi/qapi-events-char.c
 GENERATED_FILES += qapi/qapi-events-common.h qapi/qapi-events-common.c
 GENERATED_FILES += qapi/qapi-events-crypto.h qapi/qapi-events-crypto.c
 GENERATED_FILES += qapi/qapi-events-introspect.h qapi/qapi-events-introspect.c
+GENERATED_FILES += qapi/qapi-events-job.h qapi/qapi-events-job.c
 GENERATED_FILES += qapi/qapi-events-migration.h qapi/qapi-events-migration.c
 GENERATED_FILES += qapi/qapi-events-misc.h qapi/qapi-events-misc.c
 GENERATED_FILES += qapi/qapi-events-net.h qapi/qapi-events-net.c
@@ -318,6 +322,7 @@ KEYCODEMAP_FILES = \
 		 ui/input-keymap-xorgkbd-to-qcode.c \
 		 ui/input-keymap-xorgxquartz-to-qcode.c \
 		 ui/input-keymap-xorgxwin-to-qcode.c \
+		 ui/input-keymap-osx-to-qcode.c \
 		 $(NULL)

 GENERATED_FILES += $(KEYCODEMAP_FILES)
@@ -347,7 +352,7 @@ $(call set-vpath, $(SRC_PATH))

 LIBS+=-lz $(LIBS_TOOLS)

-HELPERS-$(CONFIG_LINUX) = qemu-bridge-helper$(EXESUF)
+HELPERS-$(call land,$(CONFIG_SOFTMMU),$(CONFIG_LINUX)) = qemu-bridge-helper$(EXESUF)

 ifdef BUILD_DOCS
 DOCS=qemu-doc.html qemu-doc.txt qemu.1 qemu-img.1 qemu-nbd.8 qemu-ga.8
@@ -362,8 +367,8 @@ DOCS=
 endif

 SUBDIR_MAKEFLAGS=$(if $(V),,--no-print-directory --quiet) BUILD_DIR=$(BUILD_DIR)
-SUBDIR_DEVICES_MAK=$(patsubst %, %/config-devices.mak, $(TARGET_DIRS))
-SUBDIR_DEVICES_MAK_DEP=$(patsubst %, %-config-devices.mak.d, $(TARGET_DIRS))
+SUBDIR_DEVICES_MAK=$(patsubst %, %/config-devices.mak, $(TARGET_LIST))
+SUBDIR_DEVICES_MAK_DEP=$(patsubst %, %-config-devices.mak.d, $(TARGET_LIST))

 ifeq ($(SUBDIR_DEVICES_MAK),)
 config-all-devices.mak:
@@ -466,7 +471,7 @@ config-host.h-timestamp: config-host.mak
 qemu-options.def: $(SRC_PATH)/qemu-options.hx $(SRC_PATH)/scripts/hxtool
 	$(call quiet-command,sh $(SRC_PATH)/scripts/hxtool -h < $< > $@,"GEN","$@")

-SUBDIR_RULES=$(patsubst %,subdir-%, $(TARGET_DIRS))
+SUBDIR_RULES=$(patsubst %,subdir-%, $(TARGET_LIST))
 SOFTMMU_SUBDIR_RULES=$(filter %-softmmu,$(SUBDIR_RULES))

 $(SOFTMMU_SUBDIR_RULES): $(block-obj-y)
@@ -485,7 +490,7 @@ subdir-dtc: .git-submodule-status dtc/libfdt dtc/tests
 	$(call quiet-command,$(MAKE) $(DTC_MAKE_ARGS) CPPFLAGS="$(DTC_CPPFLAGS)" CFLAGS="$(DTC_CFLAGS)" LDFLAGS="$(LDFLAGS)" ARFLAGS="$(ARFLAGS)" CC="$(CC)" AR="$(AR)" LD="$(LD)" $(SUBDIR_MAKEFLAGS) libfdt/libfdt.a,)

 dtc/%: .git-submodule-status
-	mkdir -p $@
+	@mkdir -p $@

 # Overriding CFLAGS causes us to lose defines added in the sub-makefile.
 # Not overriding CFLAGS leads to mis-matches between compilation modes.
@@ -510,7 +515,7 @@ ROMSUBDIR_RULES=$(patsubst %,romsubdir-%, $(ROMS))
 romsubdir-%:
 	$(call quiet-command,$(MAKE) $(SUBDIR_MAKEFLAGS) -C pc-bios/$* V="$(V)" TARGET_DIR="$*/" CFLAGS="$(filter -O% -g%,$(CFLAGS))",)

-ALL_SUBDIRS=$(TARGET_DIRS) $(patsubst %,pc-bios/%, $(ROMS))
+ALL_SUBDIRS=$(TARGET_LIST) $(patsubst %,pc-bios/%, $(ROMS))

 recurse-all: $(SUBDIR_RULES) $(ROMSUBDIR_RULES)

@@ -563,7 +568,6 @@ $(SRC_PATH)/scripts/qapi/types.py \
 $(SRC_PATH)/scripts/qapi/visit.py \
 $(SRC_PATH)/scripts/qapi/common.py \
 $(SRC_PATH)/scripts/qapi/doc.py \
-$(SRC_PATH)/scripts/ordereddict.py \
 $(SRC_PATH)/scripts/qapi-gen.py

 qga/qapi-generated/qga-qapi-types.c qga/qapi-generated/qga-qapi-types.h \
@@ -582,6 +586,7 @@ qapi-modules = $(SRC_PATH)/qapi/qapi-schema.json $(SRC_PATH)/qapi/common.json \
               $(SRC_PATH)/qapi/char.json \
               $(SRC_PATH)/qapi/crypto.json \
               $(SRC_PATH)/qapi/introspect.json \
+               $(SRC_PATH)/qapi/job.json \
               $(SRC_PATH)/qapi/migration.json \
               $(SRC_PATH)/qapi/misc.json \
               $(SRC_PATH)/qapi/net.json \
@@ -601,6 +606,7 @@ qapi/qapi-types-char.c qapi/qapi-types-char.h \
 qapi/qapi-types-common.c qapi/qapi-types-common.h \
 qapi/qapi-types-crypto.c qapi/qapi-types-crypto.h \
 qapi/qapi-types-introspect.c qapi/qapi-types-introspect.h \
+qapi/qapi-types-job.c qapi/qapi-types-job.h \
 qapi/qapi-types-migration.c qapi/qapi-types-migration.h \
 qapi/qapi-types-misc.c qapi/qapi-types-misc.h \
 qapi/qapi-types-net.c qapi/qapi-types-net.h \
@@ -619,6 +625,7 @@ qapi/qapi-visit-char.c qapi/qapi-visit-char.h \
 qapi/qapi-visit-common.c qapi/qapi-visit-common.h \
 qapi/qapi-visit-crypto.c qapi/qapi-visit-crypto.h \
 qapi/qapi-visit-introspect.c qapi/qapi-visit-introspect.h \
+qapi/qapi-visit-job.c qapi/qapi-visit-job.h \
 qapi/qapi-visit-migration.c qapi/qapi-visit-migration.h \
 qapi/qapi-visit-misc.c qapi/qapi-visit-misc.h \
 qapi/qapi-visit-net.c qapi/qapi-visit-net.h \
@@ -636,6 +643,7 @@ qapi/qapi-commands-char.c qapi/qapi-commands-char.h \
 qapi/qapi-commands-common.c qapi/qapi-commands-common.h \
 qapi/qapi-commands-crypto.c qapi/qapi-commands-crypto.h \
 qapi/qapi-commands-introspect.c qapi/qapi-commands-introspect.h \
+qapi/qapi-commands-job.c qapi/qapi-commands-job.h \
 qapi/qapi-commands-migration.c qapi/qapi-commands-migration.h \
 qapi/qapi-commands-misc.c qapi/qapi-commands-misc.h \
 qapi/qapi-commands-net.c qapi/qapi-commands-net.h \
@@ -653,6 +661,7 @@ qapi/qapi-events-char.c qapi/qapi-events-char.h \
 qapi/qapi-events-common.c qapi/qapi-events-common.h \
 qapi/qapi-events-crypto.c qapi/qapi-events-crypto.h \
 qapi/qapi-events-introspect.c qapi/qapi-events-introspect.h \
+qapi/qapi-events-job.c qapi/qapi-events-job.h \
 qapi/qapi-events-migration.c qapi/qapi-events-migration.h \
 qapi/qapi-events-misc.c qapi/qapi-events-misc.h \
 qapi/qapi-events-net.c qapi/qapi-events-net.h \
@@ -763,7 +772,7 @@ distclean: clean
 	rm -f docs/interop/qemu-qmp-ref.pdf docs/interop/qemu-ga-ref.pdf
 	rm -f docs/interop/qemu-qmp-ref.html docs/interop/qemu-ga-ref.html
 	rm -f docs/qemu-block-drivers.7
-	for d in $(TARGET_DIRS); do \
+	for d in $(TARGET_LIST); do \
 	rm -rf $$d || exit 1 ; \
        done
 	rm -Rf .sdk
@@ -864,7 +873,7 @@ endif
 		$(INSTALL_DATA) $(SRC_PATH)/pc-bios/keymaps/$$x "$(DESTDIR)$(qemu_datadir)/keymaps"; \
 	done
 	$(INSTALL_DATA) $(BUILD_DIR)/trace-events-all "$(DESTDIR)$(qemu_datadir)/trace-events-all"
-	for d in $(TARGET_DIRS); do \
+	for d in $(TARGET_LIST); do \
 	$(MAKE) $(SUBDIR_MAKEFLAGS) TARGET_DIR=$$d/ -C $$d $@ || exit 1 ; \
        done

@@ -1047,9 +1056,6 @@ endif
 include $(SRC_PATH)/tests/docker/Makefile.include
 include $(SRC_PATH)/tests/vm/Makefile.include

-printgen:
-	@echo $(GENERATED_FILES)
-
 .PHONY: help
 help:
 	@echo  'Generic targets:'
@@ -1062,9 +1068,9 @@ endif
 	@echo  '  ctags/TAGS      - Generate tags file for editors'
 	@echo  '  cscope          - Generate cscope index'
 	@echo  ''
-	@$(if $(TARGET_DIRS), \
+	@$(if $(TARGET_LIST), \
 		echo 'Architecture specific targets:'; \
-		$(foreach t, $(TARGET_DIRS), \
+		$(foreach t, $(TARGET_LIST), \
 		printf "  %-30s - Build for %s\\n" $(patsubst %,subdir-%,$(t)) $(t);) \
 		echo '')
 	@echo  'Cleaning targets:'
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -10,6 +10,7 @@ util-obj-y += qapi/qapi-types-char.o
 util-obj-y += qapi/qapi-types-common.o
 util-obj-y += qapi/qapi-types-crypto.o
 util-obj-y += qapi/qapi-types-introspect.o
+util-obj-y += qapi/qapi-types-job.o
 util-obj-y += qapi/qapi-types-migration.o
 util-obj-y += qapi/qapi-types-misc.o
 util-obj-y += qapi/qapi-types-net.o
@@ -28,6 +29,7 @@ util-obj-y += qapi/qapi-visit-char.o
 util-obj-y += qapi/qapi-visit-common.o
 util-obj-y += qapi/qapi-visit-crypto.o
 util-obj-y += qapi/qapi-visit-introspect.o
+util-obj-y += qapi/qapi-visit-job.o
 util-obj-y += qapi/qapi-visit-migration.o
 util-obj-y += qapi/qapi-visit-misc.o
 util-obj-y += qapi/qapi-visit-net.o
@@ -45,6 +47,7 @@ util-obj-y += qapi/qapi-events-char.o
 util-obj-y += qapi/qapi-events-common.o
 util-obj-y += qapi/qapi-events-crypto.o
 util-obj-y += qapi/qapi-events-introspect.o
+util-obj-y += qapi/qapi-events-job.o
 util-obj-y += qapi/qapi-events-migration.o
 util-obj-y += qapi/qapi-events-misc.o
 util-obj-y += qapi/qapi-events-net.o
@@ -63,7 +66,7 @@ chardev-obj-y = chardev/
 # block-obj-y is code used by both qemu system emulation and qemu-img

 block-obj-y += nbd/
-block-obj-y += block.o blockjob.o
+block-obj-y += block.o blockjob.o job.o
 block-obj-y += block/ scsi/
 block-obj-y += qemu-io-cmds.o
 block-obj-$(CONFIG_REPLICATION) += replication.o
@@ -94,6 +97,7 @@ io-obj-y = io/
 ifeq ($(CONFIG_SOFTMMU),y)
 common-obj-y = blockdev.o blockdev-nbd.o block/
 common-obj-y += bootdevice.o iothread.o
+common-obj-y += job-qmp.o
 common-obj-y += net/
 common-obj-y += qdev-monitor.o device-hotplug.o
 common-obj-$(CONFIG_WIN32) += os-win32.o
@@ -140,6 +144,7 @@ common-obj-y += qapi/qapi-commands-char.o
 common-obj-y += qapi/qapi-commands-common.o
 common-obj-y += qapi/qapi-commands-crypto.o
 common-obj-y += qapi/qapi-commands-introspect.o
+common-obj-y += qapi/qapi-commands-job.o
 common-obj-y += qapi/qapi-commands-migration.o
 common-obj-y += qapi/qapi-commands-misc.o
 common-obj-y += qapi/qapi-commands-net.o
@@ -191,66 +196,67 @@ vhost-user-blk-obj-y = contrib/vhost-user-blk/

 ######################################################################
 trace-events-subdirs =
-trace-events-subdirs += util
-trace-events-subdirs += crypto
-trace-events-subdirs += io
-trace-events-subdirs += migration
+trace-events-subdirs += accel/kvm
+trace-events-subdirs += accel/tcg
+trace-events-subdirs += audio
 trace-events-subdirs += block
 trace-events-subdirs += chardev
+trace-events-subdirs += crypto
+trace-events-subdirs += hw/9pfs
+trace-events-subdirs += hw/acpi
+trace-events-subdirs += hw/alpha
+trace-events-subdirs += hw/arm
+trace-events-subdirs += hw/audio
 trace-events-subdirs += hw/block
 trace-events-subdirs += hw/block/dataplane
 trace-events-subdirs += hw/char
-trace-events-subdirs += hw/intc
-trace-events-subdirs += hw/net
-trace-events-subdirs += hw/rdma
-trace-events-subdirs += hw/rdma/vmw
-trace-events-subdirs += hw/virtio
-trace-events-subdirs += hw/audio
-trace-events-subdirs += hw/misc
-trace-events-subdirs += hw/misc/macio
-trace-events-subdirs += hw/usb
-trace-events-subdirs += hw/scsi
-trace-events-subdirs += hw/nvram
 trace-events-subdirs += hw/display
-trace-events-subdirs += hw/input
-trace-events-subdirs += hw/timer
 trace-events-subdirs += hw/dma
-trace-events-subdirs += hw/sparc
-trace-events-subdirs += hw/sparc64
-trace-events-subdirs += hw/sd
-trace-events-subdirs += hw/isa
-trace-events-subdirs += hw/mem
+trace-events-subdirs += hw/hppa
+trace-events-subdirs += hw/i2c
 trace-events-subdirs += hw/i386
 trace-events-subdirs += hw/i386/xen
-trace-events-subdirs += hw/9pfs
-trace-events-subdirs += hw/ppc
+trace-events-subdirs += hw/ide
+trace-events-subdirs += hw/input
+trace-events-subdirs += hw/intc
+trace-events-subdirs += hw/isa
+trace-events-subdirs += hw/mem
+trace-events-subdirs += hw/misc
+trace-events-subdirs += hw/misc/macio
+trace-events-subdirs += hw/net
+trace-events-subdirs += hw/nvram
 trace-events-subdirs += hw/pci
 trace-events-subdirs += hw/pci-host
+trace-events-subdirs += hw/ppc
+trace-events-subdirs += hw/rdma
+trace-events-subdirs += hw/rdma/vmw
 trace-events-subdirs += hw/s390x
-trace-events-subdirs += hw/vfio
-trace-events-subdirs += hw/acpi
-trace-events-subdirs += hw/arm
-trace-events-subdirs += hw/alpha
-trace-events-subdirs += hw/hppa
-trace-events-subdirs += hw/xen
-trace-events-subdirs += hw/ide
+trace-events-subdirs += hw/scsi
+trace-events-subdirs += hw/sd
+trace-events-subdirs += hw/sparc
+trace-events-subdirs += hw/sparc64
+trace-events-subdirs += hw/timer
 trace-events-subdirs += hw/tpm
-trace-events-subdirs += ui
-trace-events-subdirs += audio
+trace-events-subdirs += hw/usb
+trace-events-subdirs += hw/vfio
+trace-events-subdirs += hw/virtio
+trace-events-subdirs += hw/xen
+trace-events-subdirs += io
+trace-events-subdirs += linux-user
+trace-events-subdirs += migration
+trace-events-subdirs += nbd
 trace-events-subdirs += net
+trace-events-subdirs += qapi
+trace-events-subdirs += qom
+trace-events-subdirs += scsi
 trace-events-subdirs += target/arm
 trace-events-subdirs += target/i386
 trace-events-subdirs += target/mips
-trace-events-subdirs += target/sparc
-trace-events-subdirs += target/s390x
 trace-events-subdirs += target/ppc
-trace-events-subdirs += qom
-trace-events-subdirs += linux-user
-trace-events-subdirs += qapi
-trace-events-subdirs += accel/tcg
-trace-events-subdirs += accel/kvm
-trace-events-subdirs += nbd
-trace-events-subdirs += scsi
+trace-events-subdirs += target/s390x
+trace-events-subdirs += target/sparc
+trace-events-subdirs += ui
+trace-events-subdirs += util

 trace-events-files = $(SRC_PATH)/trace-events $(trace-events-subdirs:%=$(SRC_PATH)/%/trace-events)

--- a/Makefile.target
+++ b/Makefile.target
@@ -97,7 +97,7 @@ obj-$(CONFIG_TCG) += tcg/tcg.o tcg/tcg-op.o tcg/tcg-op-vec.o tcg/tcg-op-gvec.o
 obj-$(CONFIG_TCG) += tcg/tcg-common.o tcg/optimize.o
 obj-$(CONFIG_TCG_INTERPRETER) += tcg/tci.o
 obj-$(CONFIG_TCG_INTERPRETER) += disas/tci.o
-obj-y += fpu/softfloat.o
+obj-$(CONFIG_TCG) += fpu/softfloat.o
 obj-y += target/$(TARGET_BASE_ARCH)/
 obj-y += disas.o
 obj-$(call notempty,$(TARGET_XML_FILES)) += gdbstub-xml.o
--- a/2
+++ b/2
@@ -1 +1 @@
-2.11.94
+2.12.50
--- a/accel/accel.c
+++ b/accel/accel.c
@@ -70,8 +70,8 @@ static int accel_init_machine(AccelClass *acc, MachineState *ms)

 void configure_accelerator(MachineState *ms)
 {
-    const char *accel, *p;
-    char buf[10];
+    const char *accel;
+    char **accel_list, **tmp;
    int ret;
    bool accel_initialised = false;
    bool init_failed = false;
@@ -83,13 +83,10 @@ void configure_accelerator(MachineState *ms)
        accel = "tcg";
    }

-    p = accel;
-    while (!accel_initialised && *p != '\0') {
-        if (*p == ':') {
-            p++;
-        }
-        p = get_opt_name(buf, sizeof(buf), p, ':');
-        acc = accel_find(buf);
+    accel_list = g_strsplit(accel, ":", 0);
+
+    for (tmp = accel_list; !accel_initialised && tmp && *tmp; tmp++) {
+        acc = accel_find(*tmp);
        if (!acc) {
            continue;
        }
@@ -107,6 +104,7 @@ void configure_accelerator(MachineState *ms)
            accel_initialised = true;
        }
    }
+    g_strfreev(accel_list);

    if (!accel_initialised) {
        if (!init_failed) {
@@ -126,6 +124,15 @@ void accel_register_compat_props(AccelState *accel)
    register_compat_props_array(class->global_props);
 }

+void accel_setup_post(MachineState *ms)
+{
+    AccelState *accel = ms->accelerator;
+    AccelClass *acc = ACCEL_GET_CLASS(accel);
+    if (acc->setup_post) {
+        acc->setup_post(ms, accel);
+    }
+}
+
 static void register_accel_types(void)
 {
    type_register_static(&accel_type);
--- a/accel/tcg/atomic_template.h
+++ b/accel/tcg/atomic_template.h
@@ -25,18 +25,22 @@
 #elif DATA_SIZE == 8
 # define SUFFIX     q
 # define DATA_TYPE  uint64_t
+# define SDATA_TYPE int64_t
 # define BSWAP      bswap64
 #elif DATA_SIZE == 4
 # define SUFFIX     l
 # define DATA_TYPE  uint32_t
+# define SDATA_TYPE int32_t
 # define BSWAP      bswap32
 #elif DATA_SIZE == 2
 # define SUFFIX     w
 # define DATA_TYPE  uint16_t
+# define SDATA_TYPE int16_t
 # define BSWAP      bswap16
 #elif DATA_SIZE == 1
 # define SUFFIX     b
 # define DATA_TYPE  uint8_t
+# define SDATA_TYPE int8_t
 # define BSWAP
 #else
 # error unsupported data size
@@ -118,6 +122,39 @@ GEN_ATOMIC_HELPER(or_fetch)
 GEN_ATOMIC_HELPER(xor_fetch)

 #undef GEN_ATOMIC_HELPER
+
+/* These helpers are, as a whole, full barriers.  Within the helper,
+ * the leading barrier is explicit and the trailing barrier is within
+ * cmpxchg primitive.
+ */
+#define GEN_ATOMIC_HELPER_FN(X, FN, XDATA_TYPE, RET)                \
+ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr,       \
+                        ABI_TYPE xval EXTRA_ARGS)                   \
+{                                                                   \
+    ATOMIC_MMU_DECLS;                                               \
+    XDATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;                          \
+    XDATA_TYPE cmp, old, new, val = xval;                           \
+    smp_mb();                                                       \
+    cmp = atomic_read__nocheck(haddr);                              \
+    do {                                                            \
+        old = cmp; new = FN(old, val);                              \
+        cmp = atomic_cmpxchg__nocheck(haddr, old, new);             \
+    } while (cmp != old);                                           \
+    ATOMIC_MMU_CLEANUP;                                             \
+    return RET;                                                     \
+}
+
+GEN_ATOMIC_HELPER_FN(fetch_smin, MIN, SDATA_TYPE, old)
+GEN_ATOMIC_HELPER_FN(fetch_umin, MIN,  DATA_TYPE, old)
+GEN_ATOMIC_HELPER_FN(fetch_smax, MAX, SDATA_TYPE, old)
+GEN_ATOMIC_HELPER_FN(fetch_umax, MAX,  DATA_TYPE, old)
+
+GEN_ATOMIC_HELPER_FN(smin_fetch, MIN, SDATA_TYPE, new)
+GEN_ATOMIC_HELPER_FN(umin_fetch, MIN,  DATA_TYPE, new)
+GEN_ATOMIC_HELPER_FN(smax_fetch, MAX, SDATA_TYPE, new)
+GEN_ATOMIC_HELPER_FN(umax_fetch, MAX,  DATA_TYPE, new)
+
+#undef GEN_ATOMIC_HELPER_FN
 #endif /* DATA SIZE >= 16 */

 #undef END
@@ -192,47 +229,45 @@ GEN_ATOMIC_HELPER(xor_fetch)

 #undef GEN_ATOMIC_HELPER

+/* These helpers are, as a whole, full barriers.  Within the helper,
+ * the leading barrier is explicit and the trailing barrier is within
+ * cmpxchg primitive.
+ */
+#define GEN_ATOMIC_HELPER_FN(X, FN, XDATA_TYPE, RET)                \
+ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr,       \
+                        ABI_TYPE xval EXTRA_ARGS)                   \
+{                                                                   \
+    ATOMIC_MMU_DECLS;                                               \
+    XDATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;                          \
+    XDATA_TYPE ldo, ldn, old, new, val = xval;                      \
+    smp_mb();                                                       \
+    ldn = atomic_read__nocheck(haddr);                              \
+    do {                                                            \
+        ldo = ldn; old = BSWAP(ldo); new = FN(old, val);            \
+        ldn = atomic_cmpxchg__nocheck(haddr, ldo, BSWAP(new));      \
+    } while (ldo != ldn);                                           \
+    ATOMIC_MMU_CLEANUP;                                             \
+    return RET;                                                     \
+}
+
+GEN_ATOMIC_HELPER_FN(fetch_smin, MIN, SDATA_TYPE, old)
+GEN_ATOMIC_HELPER_FN(fetch_umin, MIN,  DATA_TYPE, old)
+GEN_ATOMIC_HELPER_FN(fetch_smax, MAX, SDATA_TYPE, old)
+GEN_ATOMIC_HELPER_FN(fetch_umax, MAX,  DATA_TYPE, old)
+
+GEN_ATOMIC_HELPER_FN(smin_fetch, MIN, SDATA_TYPE, new)
+GEN_ATOMIC_HELPER_FN(umin_fetch, MIN,  DATA_TYPE, new)
+GEN_ATOMIC_HELPER_FN(smax_fetch, MAX, SDATA_TYPE, new)
+GEN_ATOMIC_HELPER_FN(umax_fetch, MAX,  DATA_TYPE, new)
+
 /* Note that for addition, we need to use a separate cmpxchg loop instead
   of bswaps for the reverse-host-endian helpers.  */
-ABI_TYPE ATOMIC_NAME(fetch_add)(CPUArchState *env, target_ulong addr,
-                         ABI_TYPE val EXTRA_ARGS)
-{
-    ATOMIC_MMU_DECLS;
-    DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;
-    DATA_TYPE ldo, ldn, ret, sto;
+#define ADD(X, Y)   (X + Y)
+GEN_ATOMIC_HELPER_FN(fetch_add, ADD, DATA_TYPE, old)
+GEN_ATOMIC_HELPER_FN(add_fetch, ADD, DATA_TYPE, new)
+#undef ADD

-    ldo = atomic_read__nocheck(haddr);
-    while (1) {
-        ret = BSWAP(ldo);
-        sto = BSWAP(ret + val);
-        ldn = atomic_cmpxchg__nocheck(haddr, ldo, sto);
-        if (ldn == ldo) {
-            ATOMIC_MMU_CLEANUP;
-            return ret;
-        }
-        ldo = ldn;
-    }
-}
-
-ABI_TYPE ATOMIC_NAME(add_fetch)(CPUArchState *env, target_ulong addr,
-                         ABI_TYPE val EXTRA_ARGS)
-{
-    ATOMIC_MMU_DECLS;
-    DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;
-    DATA_TYPE ldo, ldn, ret, sto;
-
-    ldo = atomic_read__nocheck(haddr);
-    while (1) {
-        ret = BSWAP(ldo) + val;
-        sto = BSWAP(ret);
-        ldn = atomic_cmpxchg__nocheck(haddr, ldo, sto);
-        if (ldn == ldo) {
-            ATOMIC_MMU_CLEANUP;
-            return ret;
-        }
-        ldo = ldn;
-    }
-}
+#undef GEN_ATOMIC_HELPER_FN
 #endif /* DATA_SIZE >= 16 */

 #undef END
@@ -241,5 +276,6 @@ ABI_TYPE ATOMIC_NAME(add_fetch)(CPUArchState *env, target_ulong addr,
 #undef BSWAP
 #undef ABI_TYPE
 #undef DATA_TYPE
+#undef SDATA_TYPE
 #undef SUFFIX
 #undef DATA_SIZE
--- a/accel/tcg/cpu-exec.c
+++ b/accel/tcg/cpu-exec.c
@@ -25,7 +25,6 @@
 #include "qemu/atomic.h"
 #include "sysemu/qtest.h"
 #include "qemu/timer.h"
-#include "exec/address-spaces.h"
 #include "qemu/rcu.h"
 #include "exec/tb-hash.h"
 #include "exec/tb-lookup.h"
@@ -156,11 +155,14 @@ static inline tcg_target_ulong cpu_tb_exec(CPUState *cpu, TranslationBlock *itb)
    if (qemu_loglevel_mask(CPU_LOG_TB_CPU)
        && qemu_log_in_addr_range(itb->pc)) {
        qemu_log_lock();
+        int flags = 0;
+        if (qemu_loglevel_mask(CPU_LOG_TB_FPU)) {
+            flags |= CPU_DUMP_FPU;
+        }
 #if defined(TARGET_I386)
-        log_cpu_state(cpu, CPU_DUMP_CCOP);
-#else
-        log_cpu_state(cpu, 0);
+        flags |= CPU_DUMP_CCOP;
 #endif
+        log_cpu_state(cpu, flags);
        qemu_log_unlock();
    }
 #endif /* DEBUG_DISAS */
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -632,7 +632,8 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
    }

    sz = size;
-    section = address_space_translate_for_iotlb(cpu, asidx, paddr, &xlat, &sz);
+    section = address_space_translate_for_iotlb(cpu, asidx, paddr, &xlat, &sz,
+                                                attrs, &prot);
    assert(sz >= TARGET_PAGE_SIZE);

    tlb_debug("vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx
@@ -664,6 +665,18 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
    env->iotlb_v[mmu_idx][vidx] = env->iotlb[mmu_idx][index];

    /* refill the tlb */
+    /*
+     * At this point iotlb contains a physical section number in the lower
+     * TARGET_PAGE_BITS, and either
+     *  + the ram_addr_t of the page base of the target RAM (if NOTDIRTY or ROM)
+     *  + the offset within section->mr of the page base (otherwise)
+     * We subtract the vaddr (which is page aligned and thus won't
+     * disturb the low bits) to give an offset which can be added to the
+     * (non-page-aligned) vaddr of the eventual memory access to get
+     * the MemoryRegion offset for the access. Note that the vaddr we
+     * subtract here is that of the page base, and not the same as the
+     * vaddr we add back in io_readx()/io_writex()/get_page_addr_code().
+     */
    env->iotlb[mmu_idx][index].addr = iotlb - vaddr;
    env->iotlb[mmu_idx][index].attrs = attrs;

@@ -765,13 +778,16 @@ static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
                         target_ulong addr, uintptr_t retaddr, int size)
 {
    CPUState *cpu = ENV_GET_CPU(env);
-    hwaddr physaddr = iotlbentry->addr;
-    MemoryRegion *mr = iotlb_to_region(cpu, physaddr, iotlbentry->attrs);
+    hwaddr mr_offset;
+    MemoryRegionSection *section;
+    MemoryRegion *mr;
    uint64_t val;
    bool locked = false;
    MemTxResult r;

-    physaddr = (physaddr & TARGET_PAGE_MASK) + addr;
+    section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs);
+    mr = section->mr;
+    mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
    cpu->mem_io_pc = retaddr;
    if (mr != &io_mem_rom && mr != &io_mem_notdirty && !cpu->can_do_io) {
        cpu_io_recompile(cpu, retaddr);
@@ -783,9 +799,13 @@ static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
        qemu_mutex_lock_iothread();
        locked = true;
    }
-    r = memory_region_dispatch_read(mr, physaddr,
+    r = memory_region_dispatch_read(mr, mr_offset,
                                    &val, size, iotlbentry->attrs);
    if (r != MEMTX_OK) {
+        hwaddr physaddr = mr_offset +
+            section->offset_within_address_space -
+            section->offset_within_region;
+
        cpu_transaction_failed(cpu, physaddr, addr, size, MMU_DATA_LOAD,
                               mmu_idx, iotlbentry->attrs, r, retaddr);
    }
@@ -802,12 +822,15 @@ static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
                      uintptr_t retaddr, int size)
 {
    CPUState *cpu = ENV_GET_CPU(env);
-    hwaddr physaddr = iotlbentry->addr;
-    MemoryRegion *mr = iotlb_to_region(cpu, physaddr, iotlbentry->attrs);
+    hwaddr mr_offset;
+    MemoryRegionSection *section;
+    MemoryRegion *mr;
    bool locked = false;
    MemTxResult r;

-    physaddr = (physaddr & TARGET_PAGE_MASK) + addr;
+    section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs);
+    mr = section->mr;
+    mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
    if (mr != &io_mem_rom && mr != &io_mem_notdirty && !cpu->can_do_io) {
        cpu_io_recompile(cpu, retaddr);
    }
@@ -818,9 +841,13 @@ static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
        qemu_mutex_lock_iothread();
        locked = true;
    }
-    r = memory_region_dispatch_write(mr, physaddr,
+    r = memory_region_dispatch_write(mr, mr_offset,
                                     val, size, iotlbentry->attrs);
    if (r != MEMTX_OK) {
+        hwaddr physaddr = mr_offset +
+            section->offset_within_address_space -
+            section->offset_within_region;
+
        cpu_transaction_failed(cpu, physaddr, addr, size, MMU_DATA_STORE,
                               mmu_idx, iotlbentry->attrs, r, retaddr);
    }
@@ -868,12 +895,13 @@ static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index,
 */
 tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr)
 {
-    int mmu_idx, index, pd;
+    int mmu_idx, index;
    void *p;
    MemoryRegion *mr;
+    MemoryRegionSection *section;
    CPUState *cpu = ENV_GET_CPU(env);
    CPUIOTLBEntry *iotlbentry;
-    hwaddr physaddr;
+    hwaddr physaddr, mr_offset;

    index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
    mmu_idx = cpu_mmu_index(env, true);
@@ -884,8 +912,8 @@ tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr)
        }
    }
    iotlbentry = &env->iotlb[mmu_idx][index];
-    pd = iotlbentry->addr & ~TARGET_PAGE_MASK;
-    mr = iotlb_to_region(cpu, pd, iotlbentry->attrs);
+    section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs);
+    mr = section->mr;
    if (memory_region_is_unassigned(mr)) {
        qemu_mutex_lock_iothread();
        if (memory_region_request_mmio_ptr(mr, addr)) {
@@ -906,7 +934,10 @@ tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr)
         * and use the MemTXResult it produced). However it is the
         * simplest place we have currently available for the check.
         */
-        physaddr = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
+        mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
+        physaddr = mr_offset +
+            section->offset_within_address_space -
+            section->offset_within_region;
        cpu_transaction_failed(cpu, physaddr, addr, 0, MMU_INST_FETCH, mmu_idx,
                               iotlbentry->attrs, MEMTX_DECODE_ERROR, 0);

--- a/accel/tcg/tcg-runtime.h
+++ b/accel/tcg/tcg-runtime.h
@@ -125,11 +125,19 @@ GEN_ATOMIC_HELPERS(fetch_add)
 GEN_ATOMIC_HELPERS(fetch_and)
 GEN_ATOMIC_HELPERS(fetch_or)
 GEN_ATOMIC_HELPERS(fetch_xor)
+GEN_ATOMIC_HELPERS(fetch_smin)
+GEN_ATOMIC_HELPERS(fetch_umin)
+GEN_ATOMIC_HELPERS(fetch_smax)
+GEN_ATOMIC_HELPERS(fetch_umax)

 GEN_ATOMIC_HELPERS(add_fetch)
 GEN_ATOMIC_HELPERS(and_fetch)
 GEN_ATOMIC_HELPERS(or_fetch)
 GEN_ATOMIC_HELPERS(xor_fetch)
+GEN_ATOMIC_HELPERS(smin_fetch)
+GEN_ATOMIC_HELPERS(umin_fetch)
+GEN_ATOMIC_HELPERS(smax_fetch)
+GEN_ATOMIC_HELPERS(umax_fetch)

 GEN_ATOMIC_HELPERS(xchg)

--- a/accel/tcg/translate-all.c
+++ b/accel/tcg/translate-all.c
@@ -644,11 +644,8 @@ static inline void *alloc_code_gen_buffer(void)
 static inline void *alloc_code_gen_buffer(void)
 {
    size_t size = tcg_ctx->code_gen_buffer_size;
-    void *buf;
-
-    buf = VirtualAlloc(NULL, size, MEM_RESERVE | MEM_COMMIT,
+    return VirtualAlloc(NULL, size, MEM_RESERVE | MEM_COMMIT,
                        PAGE_EXECUTE_READWRITE);
-    return buf;
 }
 #else
 static inline void *alloc_code_gen_buffer(void)
@@ -1672,14 +1669,14 @@ static TranslationBlock *tb_find_pc(uintptr_t tc_ptr)
 }

 #if !defined(CONFIG_USER_ONLY)
-void tb_invalidate_phys_addr(AddressSpace *as, hwaddr addr)
+void tb_invalidate_phys_addr(AddressSpace *as, hwaddr addr, MemTxAttrs attrs)
 {
    ram_addr_t ram_addr;
    MemoryRegion *mr;
    hwaddr l = 1;

    rcu_read_lock();
-    mr = address_space_translate(as, addr, &addr, &l, false);
+    mr = address_space_translate(as, addr, &addr, &l, false, attrs);
    if (!(memory_region_is_ram(mr)
          || memory_region_is_romd(mr))) {
        rcu_read_unlock();
--- a/accel/tcg/translator.c
+++ b/accel/tcg/translator.c
@@ -34,8 +34,6 @@ void translator_loop_temp_check(DisasContextBase *db)
 void translator_loop(const TranslatorOps *ops, DisasContextBase *db,
                     CPUState *cpu, TranslationBlock *tb)
 {
-    int max_insns;
-
    /* Initialize DisasContext */
    db->tb = tb;
    db->pc_first = tb->pc;
@@ -45,18 +43,18 @@ void translator_loop(const TranslatorOps *ops, DisasContextBase *db,
    db->singlestep_enabled = cpu->singlestep_enabled;

    /* Instruction counting */
-    max_insns = tb_cflags(db->tb) & CF_COUNT_MASK;
-    if (max_insns == 0) {
-        max_insns = CF_COUNT_MASK;
+    db->max_insns = tb_cflags(db->tb) & CF_COUNT_MASK;
+    if (db->max_insns == 0) {
+        db->max_insns = CF_COUNT_MASK;
    }
-    if (max_insns > TCG_MAX_INSNS) {
-        max_insns = TCG_MAX_INSNS;
+    if (db->max_insns > TCG_MAX_INSNS) {
+        db->max_insns = TCG_MAX_INSNS;
    }
    if (db->singlestep_enabled || singlestep) {
-        max_insns = 1;
+        db->max_insns = 1;
    }

-    max_insns = ops->init_disas_context(db, cpu, max_insns);
+    ops->init_disas_context(db, cpu);
    tcg_debug_assert(db->is_jmp == DISAS_NEXT);  /* no early exit */

    /* Reset the temp count so that we can identify leaks */
@@ -95,7 +93,8 @@ void translator_loop(const TranslatorOps *ops, DisasContextBase *db,
           update db->pc_next and db->is_jmp to indicate what should be
           done next -- either exiting this loop or locate the start of
           the next instruction.  */
-        if (db->num_insns == max_insns && (tb_cflags(db->tb) & CF_LAST_IO)) {
+        if (db->num_insns == db->max_insns
+            && (tb_cflags(db->tb) & CF_LAST_IO)) {
            /* Accept I/O on the last instruction.  */
            gen_io_start();
            ops->translate_insn(db, cpu);
@@ -111,7 +110,7 @@ void translator_loop(const TranslatorOps *ops, DisasContextBase *db,

        /* Stop translation if the output buffer is full,
           or we have executed all of the allowed instructions.  */
-        if (tcg_op_buf_full() || db->num_insns >= max_insns) {
+        if (tcg_op_buf_full() || db->num_insns >= db->max_insns) {
            db->is_jmp = DISAS_TOO_MANY;
            break;
        }
--- a/arch_init.c
+++ b/arch_init.c
@@ -29,6 +29,7 @@
 #include "hw/pci/pci.h"
 #include "hw/audio/soundhw.h"
 #include "qapi/qapi-commands-misc.h"
+#include "qapi/error.h"
 #include "qemu/config-file.h"
 #include "qemu/error-report.h"
 #include "hw/acpi/acpi.h"
@@ -51,14 +52,14 @@ int graphic_depth = 32;
 #define QEMU_ARCH QEMU_ARCH_ARM
 #elif defined(TARGET_CRIS)
 #define QEMU_ARCH QEMU_ARCH_CRIS
-#elif defined(TARGET_I386)
-#define QEMU_ARCH QEMU_ARCH_I386
 #elif defined(TARGET_HPPA)
 #define QEMU_ARCH QEMU_ARCH_HPPA
-#elif defined(TARGET_M68K)
-#define QEMU_ARCH QEMU_ARCH_M68K
+#elif defined(TARGET_I386)
+#define QEMU_ARCH QEMU_ARCH_I386
 #elif defined(TARGET_LM32)
 #define QEMU_ARCH QEMU_ARCH_LM32
+#elif defined(TARGET_M68K)
+#define QEMU_ARCH QEMU_ARCH_M68K
 #elif defined(TARGET_MICROBLAZE)
 #define QEMU_ARCH QEMU_ARCH_MICROBLAZE
 #elif defined(TARGET_MIPS)
@@ -79,12 +80,12 @@ int graphic_depth = 32;
 #define QEMU_ARCH QEMU_ARCH_SH4
 #elif defined(TARGET_SPARC)
 #define QEMU_ARCH QEMU_ARCH_SPARC
-#elif defined(TARGET_XTENSA)
-#define QEMU_ARCH QEMU_ARCH_XTENSA
-#elif defined(TARGET_UNICORE32)
-#define QEMU_ARCH QEMU_ARCH_UNICORE32
 #elif defined(TARGET_TRICORE)
 #define QEMU_ARCH QEMU_ARCH_TRICORE
+#elif defined(TARGET_UNICORE32)
+#define QEMU_ARCH QEMU_ARCH_UNICORE32
+#elif defined(TARGET_XTENSA)
+#define QEMU_ARCH QEMU_ARCH_XTENSA
 #endif

 const uint32_t arch_type = QEMU_ARCH;
@@ -112,7 +113,8 @@ TargetInfo *qmp_query_target(Error **errp)
 {
    TargetInfo *info = g_malloc0(sizeof(*info));

-    info->arch = g_strdup(TARGET_NAME);
+    info->arch = qapi_enum_parse(&SysEmuTarget_lookup, TARGET_NAME, -1,
+                                 &error_abort);

    return info;
 }
--- a/backends/cryptodev-vhost-user.c
+++ b/backends/cryptodev-vhost-user.c
@@ -26,6 +26,7 @@
 #include "qapi/error.h"
 #include "qapi/qmp/qerror.h"
 #include "qemu/error-report.h"
+#include "hw/virtio/vhost-user.h"
 #include "standard-headers/linux/virtio_crypto.h"
 #include "sysemu/cryptodev-vhost.h"
 #include "chardev/char-fe.h"
@@ -46,6 +47,7 @@
 typedef struct CryptoDevBackendVhostUser {
    CryptoDevBackend parent_obj;

+    VhostUserState *vhost_user;
    CharBackend chr;
    char *chr_name;
    bool opened;
@@ -102,7 +104,7 @@ cryptodev_vhost_user_start(int queues,
            continue;
        }

-        options.opaque = &s->chr;
+        options.opaque = s->vhost_user;
        options.backend_type = VHOST_BACKEND_TYPE_USER;
        options.cc = b->conf.peers.ccs[i];
        s->vhost_crypto[i] = cryptodev_vhost_init(&options);
@@ -185,6 +187,7 @@ static void cryptodev_vhost_user_init(
    size_t i;
    Error *local_err = NULL;
    Chardev *chr;
+    VhostUserState *user;
    CryptoDevBackendClient *cc;
    CryptoDevBackendVhostUser *s =
                      CRYPTODEV_BACKEND_VHOST_USER(backend);
@@ -215,6 +218,15 @@ static void cryptodev_vhost_user_init(
        }
    }

+    user = vhost_user_init();
+    if (!user) {
+        error_setg(errp, "Failed to init vhost_user");
+        return;
+    }
+
+    user->chr = &s->chr;
+    s->vhost_user = user;
+
    qemu_chr_fe_set_handlers(&s->chr, NULL, NULL,
                     cryptodev_vhost_user_event, NULL, s, NULL, true);

@@ -299,6 +311,12 @@ static void cryptodev_vhost_user_cleanup(
            backend->conf.peers.ccs[i] = NULL;
        }
    }
+
+    if (s->vhost_user) {
+        vhost_user_cleanup(s->vhost_user);
+        g_free(s->vhost_user);
+        s->vhost_user = NULL;
+    }
 }

 static void cryptodev_vhost_user_set_chardev(Object *obj,
--- a/backends/hostmem.c
+++ b/backends/hostmem.c
@@ -18,6 +18,7 @@
 #include "qapi/visitor.h"
 #include "qemu/config-file.h"
 #include "qom/object_interfaces.h"
+#include "qemu/mmap-alloc.h"

 #ifdef CONFIG_NUMA
 #include <numaif.h>
@@ -262,6 +263,23 @@ bool host_memory_backend_is_mapped(HostMemoryBackend *backend)
    return backend->is_mapped;
 }

+#ifdef __linux__
+size_t host_memory_backend_pagesize(HostMemoryBackend *memdev)
+{
+    Object *obj = OBJECT(memdev);
+    char *path = object_property_get_str(obj, "mem-path", NULL);
+    size_t pagesize = qemu_mempath_getpagesize(path);
+
+    g_free(path);
+    return pagesize;
+}
+#else
+size_t host_memory_backend_pagesize(HostMemoryBackend *memdev)
+{
+    return getpagesize();
+}
+#endif
+
 static void
 host_memory_backend_memory_complete(UserCreatable *uc, Error **errp)
 {
@@ -351,24 +369,6 @@ host_memory_backend_can_be_deleted(UserCreatable *uc)
    }
 }

-static char *get_id(Object *o, Error **errp)
-{
-    HostMemoryBackend *backend = MEMORY_BACKEND(o);
-
-    return g_strdup(backend->id);
-}
-
-static void set_id(Object *o, const char *str, Error **errp)
-{
-    HostMemoryBackend *backend = MEMORY_BACKEND(o);
-
-    if (backend->id) {
-        error_setg(errp, "cannot change property value");
-        return;
-    }
-    backend->id = g_strdup(str);
-}
-
 static bool host_memory_backend_get_share(Object *o, Error **errp)
 {
    HostMemoryBackend *backend = MEMORY_BACKEND(o);
@@ -416,18 +416,11 @@ host_memory_backend_class_init(ObjectClass *oc, void *data)
        &HostMemPolicy_lookup,
        host_memory_backend_get_policy,
        host_memory_backend_set_policy, &error_abort);
-    object_class_property_add_str(oc, "id", get_id, set_id, &error_abort);
    object_class_property_add_bool(oc, "share",
        host_memory_backend_get_share, host_memory_backend_set_share,
        &error_abort);
 }

-static void host_memory_backend_finalize(Object *o)
-{
-    HostMemoryBackend *backend = MEMORY_BACKEND(o);
-    g_free(backend->id);
-}
-
 static const TypeInfo host_memory_backend_info = {
    .name = TYPE_MEMORY_BACKEND,
    .parent = TYPE_OBJECT,
@@ -436,7 +429,6 @@ static const TypeInfo host_memory_backend_info = {
    .class_init = host_memory_backend_class_init,
    .instance_size = sizeof(HostMemoryBackend),
    .instance_init = host_memory_backend_init,
-    .instance_finalize = host_memory_backend_finalize,
    .interfaces = (InterfaceInfo[]) {
        { TYPE_USER_CREATABLE },
        { }
--- a/block.c
+++ b/block.c
@@ -27,6 +27,7 @@
 #include "block/block_int.h"
 #include "block/blockjob.h"
 #include "block/nbd.h"
+#include "block/qdict.h"
 #include "qemu/error-report.h"
 #include "module_block.h"
 #include "qemu/module.h"
@@ -1227,9 +1228,9 @@ BlockDriverState *bdrv_new_open_driver(BlockDriver *drv, const char *node_name,

    ret = bdrv_open_driver(bs, drv, node_name, bs->options, flags, errp);
    if (ret < 0) {
-        QDECREF(bs->explicit_options);
+        qobject_unref(bs->explicit_options);
        bs->explicit_options = NULL;
-        QDECREF(bs->options);
+        qobject_unref(bs->options);
        bs->options = NULL;
        bdrv_unref(bs);
        return NULL;
@@ -1460,7 +1461,7 @@ static QDict *parse_json_filename(const char *filename, Error **errp)

    options = qobject_to(QDict, options_obj);
    if (!options) {
-        qobject_decref(options_obj);
+        qobject_unref(options_obj);
        error_setg(errp, "Invalid JSON object given");
        return NULL;
    }
@@ -1490,7 +1491,7 @@ static void parse_json_protocol(QDict *options, const char **pfilename,
    /* Options given in the filename have lower priority than options
     * specified directly */
    qdict_join(options, json_options, false);
-    QDECREF(json_options);
+    qobject_unref(json_options);
    *pfilename = NULL;
 }

@@ -1620,13 +1621,24 @@ static int bdrv_reopen_get_flags(BlockReopenQueue *q, BlockDriverState *bs)

 /* Returns whether the image file can be written to after the reopen queue @q
 * has been successfully applied, or right now if @q is NULL. */
-static bool bdrv_is_writable(BlockDriverState *bs, BlockReopenQueue *q)
+static bool bdrv_is_writable_after_reopen(BlockDriverState *bs,
+                                          BlockReopenQueue *q)
 {
    int flags = bdrv_reopen_get_flags(q, bs);

    return (flags & (BDRV_O_RDWR | BDRV_O_INACTIVE)) == BDRV_O_RDWR;
 }

+/*
+ * Return whether the BDS can be written to.  This is not necessarily
+ * the same as !bdrv_is_read_only(bs), as inactivated images may not
+ * be written to but do not count as read-only images.
+ */
+bool bdrv_is_writable(BlockDriverState *bs)
+{
+    return bdrv_is_writable_after_reopen(bs, NULL);
+}
+
 static void bdrv_child_perm(BlockDriverState *bs, BlockDriverState *child_bs,
                            BdrvChild *c, const BdrvChildRole *role,
                            BlockReopenQueue *reopen_queue,
@@ -1664,7 +1676,7 @@ static int bdrv_check_perm(BlockDriverState *bs, BlockReopenQueue *q,

    /* Write permissions never work with read-only images */
    if ((cumulative_perms & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) &&
-        !bdrv_is_writable(bs, q))
+        !bdrv_is_writable_after_reopen(bs, q))
    {
        error_setg(errp, "Block node is read-only");
        return -EPERM;
@@ -1956,7 +1968,7 @@ void bdrv_format_default_perms(BlockDriverState *bs, BdrvChild *c,
                                  &perm, &shared);

        /* Format drivers may touch metadata even if the guest doesn't write */
-        if (bdrv_is_writable(bs, reopen_queue)) {
+        if (bdrv_is_writable_after_reopen(bs, reopen_queue)) {
            perm |= BLK_PERM_WRITE | BLK_PERM_RESIZE;
        }

@@ -2273,7 +2285,7 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,
    if (reference || qdict_haskey(options, "file.filename")) {
        backing_filename[0] = '\0';
    } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) {
-        QDECREF(options);
+        qobject_unref(options);
        goto free_exit;
    } else {
        bdrv_get_full_backing_filename(bs, backing_filename, PATH_MAX,
@@ -2281,7 +2293,7 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,
        if (local_err) {
            ret = -EINVAL;
            error_propagate(errp, local_err);
-            QDECREF(options);
+            qobject_unref(options);
            goto free_exit;
        }
    }
@@ -2289,7 +2301,7 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,
    if (!bs->drv || !bs->drv->supports_backing) {
        ret = -EINVAL;
        error_setg(errp, "Driver doesn't support backing files");
-        QDECREF(options);
+        qobject_unref(options);
        goto free_exit;
    }

@@ -2323,7 +2335,7 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,

 free_exit:
    g_free(backing_filename);
-    QDECREF(tmp_parent_options);
+    qobject_unref(tmp_parent_options);
    return ret;
 }

@@ -2356,7 +2368,7 @@ bdrv_open_child_bs(const char *filename, QDict *options, const char *bdref_key,
            error_setg(errp, "A block device must be specified for \"%s\"",
                       bdref_key);
        }
-        QDECREF(image_options);
+        qobject_unref(image_options);
        goto done;
    }

@@ -2449,7 +2461,7 @@ BlockDriverState *bdrv_open_blockdev_ref(BlockdevRef *ref, Error **errp)
    obj = NULL;

 fail:
-    qobject_decref(obj);
+    qobject_unref(obj);
    visit_free(v);
    return bs;
 }
@@ -2519,7 +2531,7 @@ static BlockDriverState *bdrv_append_temp_snapshot(BlockDriverState *bs,
    }

 out:
-    QDECREF(snapshot_options);
+    qobject_unref(snapshot_options);
    g_free(tmp_filename);
    return bs_snapshot;
 }
@@ -2530,7 +2542,7 @@ out:
 * options is a QDict of options to pass to the block drivers, or NULL for an
 * empty set of options. The reference to the QDict belongs to the block layer
 * after the call (even on failure), so if the caller intends to reuse the
- * dictionary, it needs to use QINCREF() before calling bdrv_open.
+ * dictionary, it needs to use qobject_ref() before calling bdrv_open.
 *
 * If *pbs is NULL, a new BDS will be created with a pointer to it stored there.
 * If it is not NULL, the referenced BDS will be reused.
@@ -2561,7 +2573,7 @@ static BlockDriverState *bdrv_open_inherit(const char *filename,

    if (reference) {
        bool options_non_empty = options ? qdict_size(options) : false;
-        QDECREF(options);
+        qobject_unref(options);

        if (filename || options_non_empty) {
            error_setg(errp, "Cannot reference an existing block device with "
@@ -2752,7 +2764,7 @@ static BlockDriverState *bdrv_open_inherit(const char *filename,

    bdrv_parent_cb_change_media(bs, true);

-    QDECREF(options);
+    qobject_unref(options);

    /* For snapshot=on, create a temporary qcow2 overlay. bs points to the
     * temporary snapshot afterwards. */
@@ -2776,10 +2788,10 @@ static BlockDriverState *bdrv_open_inherit(const char *filename,

 fail:
    blk_unref(file);
-    QDECREF(snapshot_options);
-    QDECREF(bs->explicit_options);
-    QDECREF(bs->options);
-    QDECREF(options);
+    qobject_unref(snapshot_options);
+    qobject_unref(bs->explicit_options);
+    qobject_unref(bs->options);
+    qobject_unref(options);
    bs->options = NULL;
    bs->explicit_options = NULL;
    bdrv_unref(bs);
@@ -2788,8 +2800,8 @@ fail:

 close_and_fail:
    bdrv_unref(bs);
-    QDECREF(snapshot_options);
-    QDECREF(options);
+    qobject_unref(snapshot_options);
+    qobject_unref(options);
    error_propagate(errp, local_err);
    return NULL;
 }
@@ -2884,7 +2896,7 @@ static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue,
        old_options = qdict_clone_shallow(bs->explicit_options);
    }
    bdrv_join_options(bs, options, old_options);
-    QDECREF(old_options);
+    qobject_unref(old_options);

    explicit_options = qdict_clone_shallow(options);

@@ -2899,13 +2911,13 @@ static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue,
        qemu_opts_absorb_qdict(opts, options_copy, NULL);
        update_flags_from_options(&flags, opts);
        qemu_opts_del(opts);
-        QDECREF(options_copy);
+        qobject_unref(options_copy);
    }

    /* Old values are used for options that aren't set yet */
    old_options = qdict_clone_shallow(bs->options);
    bdrv_join_options(bs, options, old_options);
-    QDECREF(old_options);
+    qobject_unref(old_options);

    /* bdrv_open_inherit() sets and clears some additional flags internally */
    flags &= ~BDRV_O_PROTOCOL;
@@ -2917,8 +2929,8 @@ static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue,
        bs_entry = g_new0(BlockReopenQueueEntry, 1);
        QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry);
    } else {
-        QDECREF(bs_entry->state.options);
-        QDECREF(bs_entry->state.explicit_options);
+        qobject_unref(bs_entry->state.options);
+        qobject_unref(bs_entry->state.explicit_options);
    }

    bs_entry->state.bs = bs;
@@ -3008,9 +3020,9 @@ cleanup:
        if (ret && bs_entry->prepared) {
            bdrv_reopen_abort(&bs_entry->state);
        } else if (ret) {
-            QDECREF(bs_entry->state.explicit_options);
+            qobject_unref(bs_entry->state.explicit_options);
        }
-        QDECREF(bs_entry->state.options);
+        qobject_unref(bs_entry->state.options);
        g_free(bs_entry);
    }
    g_free(bs_queue);
@@ -3253,7 +3265,7 @@ void bdrv_reopen_commit(BDRVReopenState *reopen_state)
    }

    /* set BDS specific flags now */
-    QDECREF(bs->explicit_options);
+    qobject_unref(bs->explicit_options);

    bs->explicit_options   = reopen_state->explicit_options;
    bs->open_flags         = reopen_state->flags;
@@ -3296,7 +3308,7 @@ void bdrv_reopen_abort(BDRVReopenState *reopen_state)
        drv->bdrv_reopen_abort(reopen_state);
    }

-    QDECREF(reopen_state->explicit_options);
+    qobject_unref(reopen_state->explicit_options);

    bdrv_abort_perm_update(reopen_state->bs);
 }
@@ -3343,11 +3355,11 @@ static void bdrv_close(BlockDriverState *bs)
    bs->total_sectors = 0;
    bs->encrypted = false;
    bs->sg = false;
-    QDECREF(bs->options);
-    QDECREF(bs->explicit_options);
+    qobject_unref(bs->options);
+    qobject_unref(bs->explicit_options);
    bs->options = NULL;
    bs->explicit_options = NULL;
-    QDECREF(bs->full_open_options);
+    qobject_unref(bs->full_open_options);
    bs->full_open_options = NULL;

    bdrv_release_named_dirty_bitmaps(bs);
@@ -3362,7 +3374,7 @@ static void bdrv_close(BlockDriverState *bs)

 void bdrv_close_all(void)
 {
-    block_job_cancel_sync_all();
+    assert(job_next(NULL) == NULL);
    nbd_export_close_all();

    /* Drop references from requests still in flight, such as canceled block
@@ -4996,15 +5008,19 @@ void bdrv_remove_aio_context_notifier(BlockDriverState *bs,
 }

 int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts,
-                       BlockDriverAmendStatusCB *status_cb, void *cb_opaque)
+                       BlockDriverAmendStatusCB *status_cb, void *cb_opaque,
+                       Error **errp)
 {
    if (!bs->drv) {
+        error_setg(errp, "Node is ejected");
        return -ENOMEDIUM;
    }
    if (!bs->drv->bdrv_amend_options) {
+        error_setg(errp, "Block driver '%s' does not support option amendment",
+                   bs->drv->format_name);
        return -ENOTSUP;
    }
-    return bs->drv->bdrv_amend_options(bs, opts, status_cb, cb_opaque);
+    return bs->drv->bdrv_amend_options(bs, opts, status_cb, cb_opaque, errp);
 }

 /* This function will be called by the bdrv_recurse_is_first_non_filter method
@@ -5134,8 +5150,8 @@ static bool append_open_options(QDict *d, BlockDriverState *bs)
            continue;
        }

-        qobject_incref(qdict_entry_value(entry));
-        qdict_put_obj(d, qdict_entry_key(entry), qdict_entry_value(entry));
+        qdict_put_obj(d, qdict_entry_key(entry),
+                      qobject_ref(qdict_entry_value(entry)));
        found_any = true;
    }

@@ -5174,21 +5190,21 @@ void bdrv_refresh_filename(BlockDriverState *bs)
         * information before refreshing it */
        bs->exact_filename[0] = '\0';
        if (bs->full_open_options) {
-            QDECREF(bs->full_open_options);
+            qobject_unref(bs->full_open_options);
            bs->full_open_options = NULL;
        }

        opts = qdict_new();
        append_open_options(opts, bs);
        drv->bdrv_refresh_filename(bs, opts);
-        QDECREF(opts);
+        qobject_unref(opts);
    } else if (bs->file) {
        /* Try to reconstruct valid information from the underlying file */
        bool has_open_options;

        bs->exact_filename[0] = '\0';
        if (bs->full_open_options) {
-            QDECREF(bs->full_open_options);
+            qobject_unref(bs->full_open_options);
            bs->full_open_options = NULL;
        }

@@ -5207,12 +5223,12 @@ void bdrv_refresh_filename(BlockDriverState *bs)
         * suffices without querying the (exact_)filename of this BDS. */
        if (bs->file->bs->full_open_options) {
            qdict_put_str(opts, "driver", drv->format_name);
-            QINCREF(bs->file->bs->full_open_options);
-            qdict_put(opts, "file", bs->file->bs->full_open_options);
+            qdict_put(opts, "file",
+                      qobject_ref(bs->file->bs->full_open_options));

            bs->full_open_options = opts;
        } else {
-            QDECREF(opts);
+            qobject_unref(opts);
        }
    } else if (!bs->full_open_options && qdict_size(bs->options)) {
        /* There is no underlying file BDS (at least referenced by BDS.file),
@@ -5246,7 +5262,7 @@ void bdrv_refresh_filename(BlockDriverState *bs)
        QString *json = qobject_to_json(QOBJECT(bs->full_open_options));
        snprintf(bs->filename, sizeof(bs->filename), "json:%s",
                 qstring_get_str(json));
-        QDECREF(json);
+        qobject_unref(json);
    }
 }

--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -26,7 +26,7 @@ block-obj-y += accounting.o dirty-bitmap.o
 block-obj-y += write-threshold.o
 block-obj-y += backup.o
 block-obj-$(CONFIG_REPLICATION) += replication.o
-block-obj-y += throttle.o
+block-obj-y += throttle.o copy-on-read.o

 block-obj-y += crypto.o

--- a/block/backup.c
+++ b/block/backup.c
@@ -27,7 +27,6 @@
 #include "qemu/error-report.h"

 #define BACKUP_CLUSTER_SIZE_DEFAULT (1 << 16)
-#define SLICE_TIME 100000000ULL /* ns */

 typedef struct BackupBlockJob {
    BlockJob common;
@@ -35,10 +34,10 @@ typedef struct BackupBlockJob {
    /* bitmap for sync=incremental */
    BdrvDirtyBitmap *sync_bitmap;
    MirrorSyncMode sync_mode;
-    RateLimit limit;
    BlockdevOnError on_source_error;
    BlockdevOnError on_target_error;
    CoRwlock flush_rwlock;
+    uint64_t len;
    uint64_t bytes_read;
    int64_t cluster_size;
    bool compress;
@@ -48,6 +47,8 @@ typedef struct BackupBlockJob {
    HBitmap *copy_bitmap;
 } BackupBlockJob;

+static const BlockJobDriver backup_job_driver;
+
 /* See if in-flight requests overlap and wait for them to complete */
 static void coroutine_fn wait_for_overlapping_requests(BackupBlockJob *job,
                                                       int64_t start,
@@ -118,7 +119,7 @@ static int coroutine_fn backup_do_cow(BackupBlockJob *job,

        trace_backup_do_cow_process(job, start);

-        n = MIN(job->cluster_size, job->common.len - start);
+        n = MIN(job->cluster_size, job->len - start);

        if (!bounce_buffer) {
            bounce_buffer = blk_blockalign(blk, job->cluster_size);
@@ -159,7 +160,7 @@ static int coroutine_fn backup_do_cow(BackupBlockJob *job,
         * offset field is an opaque progress value, it is not a disk offset.
         */
        job->bytes_read += n;
-        job->common.offset += n;
+        job_progress_update(&job->common.job, n);
    }

 out:
@@ -190,17 +191,6 @@ static int coroutine_fn backup_before_write_notify(
    return backup_do_cow(job, req->offset, req->bytes, NULL, true);
 }

-static void backup_set_speed(BlockJob *job, int64_t speed, Error **errp)
-{
-    BackupBlockJob *s = container_of(job, BackupBlockJob, common);
-
-    if (speed < 0) {
-        error_setg(errp, QERR_INVALID_PARAMETER, "speed");
-        return;
-    }
-    ratelimit_set_speed(&s->limit, speed, SLICE_TIME);
-}
-
 static void backup_cleanup_sync_bitmap(BackupBlockJob *job, int ret)
 {
    BdrvDirtyBitmap *bm;
@@ -217,25 +207,25 @@ static void backup_cleanup_sync_bitmap(BackupBlockJob *job, int ret)
    }
 }

-static void backup_commit(BlockJob *job)
+static void backup_commit(Job *job)
 {
-    BackupBlockJob *s = container_of(job, BackupBlockJob, common);
+    BackupBlockJob *s = container_of(job, BackupBlockJob, common.job);
    if (s->sync_bitmap) {
        backup_cleanup_sync_bitmap(s, 0);
    }
 }

-static void backup_abort(BlockJob *job)
+static void backup_abort(Job *job)
 {
-    BackupBlockJob *s = container_of(job, BackupBlockJob, common);
+    BackupBlockJob *s = container_of(job, BackupBlockJob, common.job);
    if (s->sync_bitmap) {
        backup_cleanup_sync_bitmap(s, -1);
    }
 }

-static void backup_clean(BlockJob *job)
+static void backup_clean(Job *job)
 {
-    BackupBlockJob *s = container_of(job, BackupBlockJob, common);
+    BackupBlockJob *s = container_of(job, BackupBlockJob, common.job);
    assert(s->target);
    blk_unref(s->target);
    s->target = NULL;
@@ -253,7 +243,7 @@ void backup_do_checkpoint(BlockJob *job, Error **errp)
    BackupBlockJob *backup_job = container_of(job, BackupBlockJob, common);
    int64_t len;

-    assert(job->driver->job_type == BLOCK_JOB_TYPE_BACKUP);
+    assert(block_job_driver(job) == &backup_job_driver);

    if (backup_job->sync_mode != MIRROR_SYNC_MODE_NONE) {
        error_setg(errp, "The backup job only supports block checkpoint in"
@@ -261,7 +251,7 @@ void backup_do_checkpoint(BlockJob *job, Error **errp)
        return;
    }

-    len = DIV_ROUND_UP(backup_job->common.len, backup_job->cluster_size);
+    len = DIV_ROUND_UP(backup_job->len, backup_job->cluster_size);
    hbitmap_set(backup_job->copy_bitmap, 0, len);
 }

@@ -271,7 +261,7 @@ void backup_wait_for_overlapping_requests(BlockJob *job, int64_t offset,
    BackupBlockJob *backup_job = container_of(job, BackupBlockJob, common);
    int64_t start, end;

-    assert(job->driver->job_type == BLOCK_JOB_TYPE_BACKUP);
+    assert(block_job_driver(job) == &backup_job_driver);

    start = QEMU_ALIGN_DOWN(offset, backup_job->cluster_size);
    end = QEMU_ALIGN_UP(offset + bytes, backup_job->cluster_size);
@@ -284,7 +274,7 @@ void backup_cow_request_begin(CowRequest *req, BlockJob *job,
    BackupBlockJob *backup_job = container_of(job, BackupBlockJob, common);
    int64_t start, end;

-    assert(job->driver->job_type == BLOCK_JOB_TYPE_BACKUP);
+    assert(block_job_driver(job) == &backup_job_driver);

    start = QEMU_ALIGN_DOWN(offset, backup_job->cluster_size);
    end = QEMU_ALIGN_UP(offset + bytes, backup_job->cluster_size);
@@ -327,33 +317,29 @@ typedef struct {
    int ret;
 } BackupCompleteData;

-static void backup_complete(BlockJob *job, void *opaque)
+static void backup_complete(Job *job, void *opaque)
 {
    BackupCompleteData *data = opaque;

-    block_job_completed(job, data->ret);
+    job_completed(job, data->ret, NULL);
    g_free(data);
 }

 static bool coroutine_fn yield_and_check(BackupBlockJob *job)
 {
-    if (block_job_is_cancelled(&job->common)) {
+    uint64_t delay_ns;
+
+    if (job_is_cancelled(&job->common.job)) {
        return true;
    }

-    /* we need to yield so that bdrv_drain_all() returns.
-     * (without, VM does not reboot)
-     */
-    if (job->common.speed) {
-        uint64_t delay_ns = ratelimit_calculate_delay(&job->limit,
-                                                      job->bytes_read);
-        job->bytes_read = 0;
-        block_job_sleep_ns(&job->common, delay_ns);
-    } else {
-        block_job_sleep_ns(&job->common, 0);
-    }
+    /* We need to yield even for delay_ns = 0 so that bdrv_drain_all() can
+     * return. Without a yield, the VM would not reboot. */
+    delay_ns = block_job_ratelimit_get_delay(&job->common, job->bytes_read);
+    job->bytes_read = 0;
+    job_sleep_ns(&job->common.job, delay_ns);

-    if (block_job_is_cancelled(&job->common)) {
+    if (job_is_cancelled(&job->common.job)) {
        return true;
    }

@@ -420,8 +406,9 @@ static void backup_incremental_init_copy_bitmap(BackupBlockJob *job)
        bdrv_set_dirty_iter(dbi, next_cluster * job->cluster_size);
    }

-    job->common.offset = job->common.len -
-                         hbitmap_count(job->copy_bitmap) * job->cluster_size;
+    /* TODO job_progress_set_remaining() would make more sense */
+    job_progress_update(&job->common.job,
+        job->len - hbitmap_count(job->copy_bitmap) * job->cluster_size);

    bdrv_dirty_iter_free(dbi);
 }
@@ -437,7 +424,9 @@ static void coroutine_fn backup_run(void *opaque)
    QLIST_INIT(&job->inflight_reqs);
    qemu_co_rwlock_init(&job->flush_rwlock);

-    nb_clusters = DIV_ROUND_UP(job->common.len, job->cluster_size);
+    nb_clusters = DIV_ROUND_UP(job->len, job->cluster_size);
+    job_progress_set_remaining(&job->common.job, job->len);
+
    job->copy_bitmap = hbitmap_alloc(nb_clusters, 0);
    if (job->sync_mode == MIRROR_SYNC_MODE_INCREMENTAL) {
        backup_incremental_init_copy_bitmap(job);
@@ -452,16 +441,16 @@ static void coroutine_fn backup_run(void *opaque)
    if (job->sync_mode == MIRROR_SYNC_MODE_NONE) {
        /* All bits are set in copy_bitmap to allow any cluster to be copied.
         * This does not actually require them to be copied. */
-        while (!block_job_is_cancelled(&job->common)) {
+        while (!job_is_cancelled(&job->common.job)) {
            /* Yield until the job is cancelled.  We just let our before_write
             * notify callback service CoW requests. */
-            block_job_yield(&job->common);
+            job_yield(&job->common.job);
        }
    } else if (job->sync_mode == MIRROR_SYNC_MODE_INCREMENTAL) {
        ret = backup_run_incremental(job);
    } else {
        /* Both FULL and TOP SYNC_MODE's require copying.. */
-        for (offset = 0; offset < job->common.len;
+        for (offset = 0; offset < job->len;
             offset += job->cluster_size) {
            bool error_is_read;
            int alloced = 0;
@@ -530,17 +519,21 @@ static void coroutine_fn backup_run(void *opaque)

    data = g_malloc(sizeof(*data));
    data->ret = ret;
-    block_job_defer_to_main_loop(&job->common, backup_complete, data);
+    job_defer_to_main_loop(&job->common.job, backup_complete, data);
 }

 static const BlockJobDriver backup_job_driver = {
-    .instance_size          = sizeof(BackupBlockJob),
-    .job_type               = BLOCK_JOB_TYPE_BACKUP,
-    .start                  = backup_run,
-    .set_speed              = backup_set_speed,
-    .commit                 = backup_commit,
-    .abort                  = backup_abort,
-    .clean                  = backup_clean,
+    .job_driver = {
+        .instance_size          = sizeof(BackupBlockJob),
+        .job_type               = JOB_TYPE_BACKUP,
+        .free                   = block_job_free,
+        .user_resume            = block_job_user_resume,
+        .drain                  = block_job_drain,
+        .start                  = backup_run,
+        .commit                 = backup_commit,
+        .abort                  = backup_abort,
+        .clean                  = backup_clean,
+    },
    .attached_aio_context   = backup_attached_aio_context,
    .drain                  = backup_drain,
 };
@@ -553,7 +546,7 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
                  BlockdevOnError on_target_error,
                  int creation_flags,
                  BlockCompletionFunc *cb, void *opaque,
-                  BlockJobTxn *txn, Error **errp)
+                  JobTxn *txn, Error **errp)
 {
    int64_t len;
    BlockDriverInfo bdi;
@@ -620,7 +613,7 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
        goto error;
    }

-    /* job->common.len is fixed, so we can't allow resize */
+    /* job->len is fixed, so we can't allow resize */
    job = block_job_create(job_id, &backup_job_driver, txn, bs,
                           BLK_PERM_CONSISTENT_READ,
                           BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE |
@@ -676,7 +669,7 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
    /* Required permissions are already taken with target's blk_new() */
    block_job_add_bdrv(&job->common, "target", target, 0, BLK_PERM_ALL,
                       &error_abort);
-    job->common.len = len;
+    job->len = len;

    return &job->common;

@@ -685,8 +678,8 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
        bdrv_reclaim_dirty_bitmap(bs, sync_bitmap, NULL);
    }
    if (job) {
-        backup_clean(&job->common);
-        block_job_early_fail(&job->common);
+        backup_clean(&job->common.job);
+        job_early_fail(&job->common.job);
    }

    return NULL;
--- a/block/blkdebug.c
+++ b/block/blkdebug.c
@@ -398,10 +398,11 @@ static int blkdebug_open(BlockDriverState *bs, QDict *options, int flags,
        goto out;
    }

-    bs->supported_write_flags = BDRV_REQ_FUA &
-        bs->file->bs->supported_write_flags;
-    bs->supported_zero_flags = (BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP) &
-        bs->file->bs->supported_zero_flags;
+    bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED |
+        (BDRV_REQ_FUA & bs->file->bs->supported_write_flags);
+    bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED |
+        ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP) &
+            bs->file->bs->supported_zero_flags);
    ret = -EINVAL;

    /* Set alignment overrides */
@@ -845,13 +846,12 @@ static void blkdebug_refresh_filename(BlockDriverState *bs, QDict *options)
    opts = qdict_new();
    qdict_put_str(opts, "driver", "blkdebug");

-    QINCREF(bs->file->bs->full_open_options);
-    qdict_put(opts, "image", bs->file->bs->full_open_options);
+    qdict_put(opts, "image", qobject_ref(bs->file->bs->full_open_options));

    for (e = qdict_first(options); e; e = qdict_next(options, e)) {
        if (strcmp(qdict_entry_key(e), "x-image")) {
-            qobject_incref(qdict_entry_value(e));
-            qdict_put_obj(opts, qdict_entry_key(e), qdict_entry_value(e));
+            qdict_put_obj(opts, qdict_entry_key(e),
+                          qobject_ref(qdict_entry_value(e)));
        }
    }

--- a/block/blkreplay.c
+++ b/block/blkreplay.c
@@ -35,6 +35,9 @@ static int blkreplay_open(BlockDriverState *bs, QDict *options, int flags,
        goto fail;
    }

+    bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED;
+    bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED;
+
    ret = 0;
 fail:
    return ret;
--- a/block/blkverify.c
+++ b/block/blkverify.c
@@ -141,6 +141,9 @@ static int blkverify_open(BlockDriverState *bs, QDict *options, int flags,
        goto fail;
    }

+    bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED;
+    bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED;
+
    ret = 0;
 fail:
    qemu_opts_del(opts);
@@ -291,10 +294,10 @@ static void blkverify_refresh_filename(BlockDriverState *bs, QDict *options)
        QDict *opts = qdict_new();
        qdict_put_str(opts, "driver", "blkverify");

-        QINCREF(bs->file->bs->full_open_options);
-        qdict_put(opts, "raw", bs->file->bs->full_open_options);
-        QINCREF(s->test_file->bs->full_open_options);
-        qdict_put(opts, "test", s->test_file->bs->full_open_options);
+        qdict_put(opts, "raw",
+                  qobject_ref(bs->file->bs->full_open_options));
+        qdict_put(opts, "test",
+                  qobject_ref(s->test_file->bs->full_open_options));

        bs->full_open_options = opts;
    }
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -419,7 +419,6 @@ static void drive_info_del(DriveInfo *dinfo)
        return;
    }
    qemu_opts_del(dinfo->opts);
-    g_free(dinfo->serial);
    g_free(dinfo);
 }

@@ -1865,13 +1864,7 @@ void blk_op_unblock_all(BlockBackend *blk, Error *reason)

 AioContext *blk_get_aio_context(BlockBackend *blk)
 {
-    BlockDriverState *bs = blk_bs(blk);
-
-    if (bs) {
-        return bdrv_get_aio_context(bs);
-    } else {
-        return qemu_get_aio_context();
-    }
+    return bdrv_get_aio_context(blk_bs(blk));
 }

 static AioContext *blk_aiocb_get_aio_context(BlockAIOCB *acb)
@@ -2217,3 +2210,21 @@ void blk_unregister_buf(BlockBackend *blk, void *host)
 {
    bdrv_unregister_buf(blk_bs(blk), host);
 }
+
+int coroutine_fn blk_co_copy_range(BlockBackend *blk_in, int64_t off_in,
+                                   BlockBackend *blk_out, int64_t off_out,
+                                   int bytes, BdrvRequestFlags flags)
+{
+    int r;
+    r = blk_check_byte_request(blk_in, off_in, bytes);
+    if (r) {
+        return r;
+    }
+    r = blk_check_byte_request(blk_out, off_out, bytes);
+    if (r) {
+        return r;
+    }
+    return bdrv_co_copy_range(blk_in->root, off_in,
+                              blk_out->root, off_out,
+                              bytes, flags);
+}
--- a/block/commit.c
+++ b/block/commit.c
@@ -31,11 +31,8 @@ enum {
    COMMIT_BUFFER_SIZE = 512 * 1024, /* in bytes */
 };

-#define SLICE_TIME 100000000ULL /* ns */
-
 typedef struct CommitBlockJob {
    BlockJob common;
-    RateLimit limit;
    BlockDriverState *commit_top_bs;
    BlockBackend *top;
    BlockBackend *base;
@@ -75,9 +72,10 @@ typedef struct {
    int ret;
 } CommitCompleteData;

-static void commit_complete(BlockJob *job, void *opaque)
+static void commit_complete(Job *job, void *opaque)
 {
-    CommitBlockJob *s = container_of(job, CommitBlockJob, common);
+    CommitBlockJob *s = container_of(job, CommitBlockJob, common.job);
+    BlockJob *bjob = &s->common;
    CommitCompleteData *data = opaque;
    BlockDriverState *top = blk_bs(s->top);
    BlockDriverState *base = blk_bs(s->base);
@@ -93,7 +91,7 @@ static void commit_complete(BlockJob *job, void *opaque)
     * the normal backing chain can be restored. */
    blk_unref(s->base);

-    if (!block_job_is_cancelled(&s->common) && ret == 0) {
+    if (!job_is_cancelled(job) && ret == 0) {
        /* success */
        ret = bdrv_drop_intermediate(s->commit_top_bs, base,
                                     s->backing_file_str);
@@ -114,12 +112,12 @@ static void commit_complete(BlockJob *job, void *opaque)
    blk_unref(s->top);

    /* If there is more than one reference to the job (e.g. if called from
-     * block_job_finish_sync()), block_job_completed() won't free it and
-     * therefore the blockers on the intermediate nodes remain. This would
-     * cause bdrv_set_backing_hd() to fail. */
-    block_job_remove_all_bdrv(job);
+     * job_finish_sync()), job_completed() won't free it and therefore the
+     * blockers on the intermediate nodes remain. This would cause
+     * bdrv_set_backing_hd() to fail. */
+    block_job_remove_all_bdrv(bjob);

-    block_job_completed(&s->common, ret);
+    job_completed(job, ret, NULL);
    g_free(data);

    /* If bdrv_drop_intermediate() didn't already do that, remove the commit
@@ -146,21 +144,21 @@ static void coroutine_fn commit_run(void *opaque)
    int64_t n = 0; /* bytes */
    void *buf = NULL;
    int bytes_written = 0;
-    int64_t base_len;
+    int64_t len, base_len;

-    ret = s->common.len = blk_getlength(s->top);
-
-    if (s->common.len < 0) {
+    ret = len = blk_getlength(s->top);
+    if (len < 0) {
        goto out;
    }
+    job_progress_set_remaining(&s->common.job, len);

    ret = base_len = blk_getlength(s->base);
    if (base_len < 0) {
        goto out;
    }

-    if (base_len < s->common.len) {
-        ret = blk_truncate(s->base, s->common.len, PREALLOC_MODE_OFF, NULL);
+    if (base_len < len) {
+        ret = blk_truncate(s->base, len, PREALLOC_MODE_OFF, NULL);
        if (ret) {
            goto out;
        }
@@ -168,14 +166,14 @@ static void coroutine_fn commit_run(void *opaque)

    buf = blk_blockalign(s->top, COMMIT_BUFFER_SIZE);

-    for (offset = 0; offset < s->common.len; offset += n) {
+    for (offset = 0; offset < len; offset += n) {
        bool copy;

        /* Note that even when no rate limit is applied we need to yield
         * with no pending I/O here so that bdrv_drain_all() returns.
         */
-        block_job_sleep_ns(&s->common, delay_ns);
-        if (block_job_is_cancelled(&s->common)) {
+        job_sleep_ns(&s->common.job, delay_ns);
+        if (job_is_cancelled(&s->common.job)) {
            break;
        }
        /* Copy if allocated above the base */
@@ -198,10 +196,10 @@ static void coroutine_fn commit_run(void *opaque)
            }
        }
        /* Publish progress */
-        s->common.offset += n;
+        job_progress_update(&s->common.job, n);

-        if (copy && s->common.speed) {
-            delay_ns = ratelimit_calculate_delay(&s->limit, n);
+        if (copy) {
+            delay_ns = block_job_ratelimit_get_delay(&s->common, n);
        } else {
            delay_ns = 0;
        }
@@ -214,25 +212,18 @@ out:

    data = g_malloc(sizeof(*data));
    data->ret = ret;
-    block_job_defer_to_main_loop(&s->common, commit_complete, data);
-}
-
-static void commit_set_speed(BlockJob *job, int64_t speed, Error **errp)
-{
-    CommitBlockJob *s = container_of(job, CommitBlockJob, common);
-
-    if (speed < 0) {
-        error_setg(errp, QERR_INVALID_PARAMETER, "speed");
-        return;
-    }
-    ratelimit_set_speed(&s->limit, speed, SLICE_TIME);
+    job_defer_to_main_loop(&s->common.job, commit_complete, data);
 }

 static const BlockJobDriver commit_job_driver = {
-    .instance_size = sizeof(CommitBlockJob),
-    .job_type      = BLOCK_JOB_TYPE_COMMIT,
-    .set_speed     = commit_set_speed,
-    .start         = commit_run,
+    .job_driver = {
+        .instance_size = sizeof(CommitBlockJob),
+        .job_type      = JOB_TYPE_COMMIT,
+        .free          = block_job_free,
+        .user_resume   = block_job_user_resume,
+        .drain         = block_job_drain,
+        .start         = commit_run,
+    },
 };

 static int coroutine_fn bdrv_commit_top_preadv(BlockDriverState *bs,
@@ -292,7 +283,7 @@ void commit_start(const char *job_id, BlockDriverState *bs,
    }

    s = block_job_create(job_id, &commit_job_driver, NULL, bs, 0, BLK_PERM_ALL,
-                         speed, BLOCK_JOB_DEFAULT, NULL, NULL, errp);
+                         speed, JOB_DEFAULT, NULL, NULL, errp);
    if (!s) {
        return;
    }
@@ -382,7 +373,7 @@ void commit_start(const char *job_id, BlockDriverState *bs,
    s->on_error = on_error;

    trace_commit_start(bs, base, top, s);
-    block_job_start(&s->common);
+    job_start(&s->common.job);
    return;

 fail:
@@ -395,7 +386,7 @@ fail:
    if (commit_top_bs) {
        bdrv_replace_node(commit_top_bs, top, &error_abort);
    }
-    block_job_early_fail(&s->common);
+    job_early_fail(&s->common.job);
 }


--- a/block/copy-on-read.c
+++ b/block/copy-on-read.c
@@ -0,0 +1,173 @@
+/*
+ * Copy-on-read filter block driver
+ *
+ * Copyright (c) 2018 Red Hat, Inc.
+ *
+ * Author:
+ *   Max Reitz <mreitz@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 or
+ * (at your option) version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "block/block_int.h"
+
+
+static int cor_open(BlockDriverState *bs, QDict *options, int flags,
+                    Error **errp)
+{
+    bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file, false,
+                               errp);
+    if (!bs->file) {
+        return -EINVAL;
+    }
+
+    bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED |
+                                (BDRV_REQ_FUA &
+                                    bs->file->bs->supported_write_flags);
+
+    bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED |
+                               ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP) &
+                                    bs->file->bs->supported_zero_flags);
+
+    return 0;
+}
+
+
+static void cor_close(BlockDriverState *bs)
+{
+}
+
+
+#define PERM_PASSTHROUGH (BLK_PERM_CONSISTENT_READ \
+                          | BLK_PERM_WRITE \
+                          | BLK_PERM_RESIZE)
+#define PERM_UNCHANGED (BLK_PERM_ALL & ~PERM_PASSTHROUGH)
+
+static void cor_child_perm(BlockDriverState *bs, BdrvChild *c,
+                           const BdrvChildRole *role,
+                           BlockReopenQueue *reopen_queue,
+                           uint64_t perm, uint64_t shared,
+                           uint64_t *nperm, uint64_t *nshared)
+{
+    if (c == NULL) {
+        *nperm = (perm & PERM_PASSTHROUGH) | BLK_PERM_WRITE_UNCHANGED;
+        *nshared = (shared & PERM_PASSTHROUGH) | PERM_UNCHANGED;
+        return;
+    }
+
+    *nperm = (perm & PERM_PASSTHROUGH) |
+             (c->perm & PERM_UNCHANGED);
+    *nshared = (shared & PERM_PASSTHROUGH) |
+               (c->shared_perm & PERM_UNCHANGED);
+}
+
+
+static int64_t cor_getlength(BlockDriverState *bs)
+{
+    return bdrv_getlength(bs->file->bs);
+}
+
+
+static int cor_truncate(BlockDriverState *bs, int64_t offset,
+                        PreallocMode prealloc, Error **errp)
+{
+    return bdrv_truncate(bs->file, offset, prealloc, errp);
+}
+
+
+static int coroutine_fn cor_co_preadv(BlockDriverState *bs,
+                                      uint64_t offset, uint64_t bytes,
+                                      QEMUIOVector *qiov, int flags)
+{
+    return bdrv_co_preadv(bs->file, offset, bytes, qiov,
+                          flags | BDRV_REQ_COPY_ON_READ);
+}
+
+
+static int coroutine_fn cor_co_pwritev(BlockDriverState *bs,
+                                       uint64_t offset, uint64_t bytes,
+                                       QEMUIOVector *qiov, int flags)
+{
+
+    return bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
+}
+
+
+static int coroutine_fn cor_co_pwrite_zeroes(BlockDriverState *bs,
+                                             int64_t offset, int bytes,
+                                             BdrvRequestFlags flags)
+{
+    return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags);
+}
+
+
+static int coroutine_fn cor_co_pdiscard(BlockDriverState *bs,
+                                        int64_t offset, int bytes)
+{
+    return bdrv_co_pdiscard(bs->file->bs, offset, bytes);
+}
+
+
+static void cor_eject(BlockDriverState *bs, bool eject_flag)
+{
+    bdrv_eject(bs->file->bs, eject_flag);
+}
+
+
+static void cor_lock_medium(BlockDriverState *bs, bool locked)
+{
+    bdrv_lock_medium(bs->file->bs, locked);
+}
+
+
+static bool cor_recurse_is_first_non_filter(BlockDriverState *bs,
+                                            BlockDriverState *candidate)
+{
+    return bdrv_recurse_is_first_non_filter(bs->file->bs, candidate);
+}
+
+
+BlockDriver bdrv_copy_on_read = {
+    .format_name                        = "copy-on-read",
+
+    .bdrv_open                          = cor_open,
+    .bdrv_close                         = cor_close,
+    .bdrv_child_perm                    = cor_child_perm,
+
+    .bdrv_getlength                     = cor_getlength,
+    .bdrv_truncate                      = cor_truncate,
+
+    .bdrv_co_preadv                     = cor_co_preadv,
+    .bdrv_co_pwritev                    = cor_co_pwritev,
+    .bdrv_co_pwrite_zeroes              = cor_co_pwrite_zeroes,
+    .bdrv_co_pdiscard                   = cor_co_pdiscard,
+
+    .bdrv_eject                         = cor_eject,
+    .bdrv_lock_medium                   = cor_lock_medium,
+
+    .bdrv_co_block_status               = bdrv_co_block_status_from_file,
+
+    .bdrv_recurse_is_first_non_filter   = cor_recurse_is_first_non_filter,
+
+    .has_variable_length                = true,
+    .is_filter                          = true,
+};
+
+static void bdrv_copy_on_read_init(void)
+{
+    bdrv_register(&bdrv_copy_on_read);
+}
+
+block_init(bdrv_copy_on_read_init);
--- a/block/create.c
+++ b/block/create.c
@@ -24,28 +24,51 @@

 #include "qemu/osdep.h"
 #include "block/block_int.h"
+#include "qemu/job.h"
 #include "qapi/qapi-commands-block-core.h"
+#include "qapi/qapi-visit-block-core.h"
+#include "qapi/clone-visitor.h"
 #include "qapi/error.h"

-typedef struct BlockdevCreateCo {
+typedef struct BlockdevCreateJob {
+    Job common;
    BlockDriver *drv;
    BlockdevCreateOptions *opts;
    int ret;
-    Error **errp;
-} BlockdevCreateCo;
+    Error *err;
+} BlockdevCreateJob;

-static void coroutine_fn bdrv_co_create_co_entry(void *opaque)
+static void blockdev_create_complete(Job *job, void *opaque)
 {
-    BlockdevCreateCo *cco = opaque;
-    cco->ret = cco->drv->bdrv_co_create(cco->opts, cco->errp);
+    BlockdevCreateJob *s = container_of(job, BlockdevCreateJob, common);
+
+    job_completed(job, s->ret, s->err);
 }

-void qmp_x_blockdev_create(BlockdevCreateOptions *options, Error **errp)
+static void coroutine_fn blockdev_create_run(void *opaque)
 {
+    BlockdevCreateJob *s = opaque;
+
+    job_progress_set_remaining(&s->common, 1);
+    s->ret = s->drv->bdrv_co_create(s->opts, &s->err);
+    job_progress_update(&s->common, 1);
+
+    qapi_free_BlockdevCreateOptions(s->opts);
+    job_defer_to_main_loop(&s->common, blockdev_create_complete, NULL);
+}
+
+static const JobDriver blockdev_create_job_driver = {
+    .instance_size = sizeof(BlockdevCreateJob),
+    .job_type      = JOB_TYPE_CREATE,
+    .start         = blockdev_create_run,
+};
+
+void qmp_blockdev_create(const char *job_id, BlockdevCreateOptions *options,
+                         Error **errp)
+{
+    BlockdevCreateJob *s;
    const char *fmt = BlockdevDriver_str(options->driver);
    BlockDriver *drv = bdrv_find_format(fmt);
-    Coroutine *co;
-    BlockdevCreateCo cco;

    /* If the driver is in the schema, we know that it exists. But it may not
     * be whitelisted. */
@@ -55,22 +78,24 @@ void qmp_x_blockdev_create(BlockdevCreateOptions *options, Error **errp)
        return;
    }

-    /* Call callback if it exists */
+    /* Error out if the driver doesn't support .bdrv_co_create */
    if (!drv->bdrv_co_create) {
        error_setg(errp, "Driver does not support blockdev-create");
        return;
    }

-    cco = (BlockdevCreateCo) {
-        .drv = drv,
-        .opts = options,
-        .ret = -EINPROGRESS,
-        .errp = errp,
-    };
-
-    co = qemu_coroutine_create(bdrv_co_create_co_entry, &cco);
-    qemu_coroutine_enter(co);
-    while (cco.ret == -EINPROGRESS) {
-        aio_poll(qemu_get_aio_context(), true);
+    /* Create the block job */
+    /* TODO Running in the main context. Block drivers need to error out or add
+     * locking when they use a BDS in a different AioContext. */
+    s = job_create(job_id, &blockdev_create_job_driver, NULL,
+                   qemu_get_aio_context(), JOB_DEFAULT | JOB_MANUAL_DISMISS,
+                   NULL, NULL, errp);
+    if (!s) {
+        return;
    }
+
+    s->drv = drv,
+    s->opts = QAPI_CLONE(BlockdevCreateOptions, options),
+
+    job_start(&s->common);
 }
--- a/block/crypto.c
+++ b/block/crypto.c
@@ -21,15 +21,15 @@
 #include "qemu/osdep.h"

 #include "block/block_int.h"
+#include "block/qdict.h"
 #include "sysemu/block-backend.h"
 #include "crypto/block.h"
 #include "qapi/opts-visitor.h"
 #include "qapi/qapi-visit-crypto.h"
-#include "qapi/qmp/qdict.h"
 #include "qapi/qobject-input-visitor.h"
 #include "qapi/error.h"
 #include "qemu/option.h"
-#include "block/crypto.h"
+#include "crypto.h"

 typedef struct BlockCrypto BlockCrypto;

@@ -159,7 +159,10 @@ block_crypto_open_opts_init(QCryptoBlockFormat format,
    ret = g_new0(QCryptoBlockOpenOptions, 1);
    ret->format = format;

-    v = qobject_input_visitor_new_keyval(QOBJECT(opts));
+    v = qobject_input_visitor_new_flat_confused(opts, &local_err);
+    if (local_err) {
+        goto out;
+    }

    visit_start_struct(v, NULL, NULL, 0, &local_err);
    if (local_err) {
@@ -210,7 +213,10 @@ block_crypto_create_opts_init(QCryptoBlockFormat format,
    ret = g_new0(QCryptoBlockCreateOptions, 1);
    ret->format = format;

-    v = qobject_input_visitor_new_keyval(QOBJECT(opts));
+    v = qobject_input_visitor_new_flat_confused(opts, &local_err);
+    if (local_err) {
+        goto out;
+    }

    visit_start_struct(v, NULL, NULL, 0, &local_err);
    if (local_err) {
@@ -305,7 +311,7 @@ static int block_crypto_open_generic(QCryptoBlockFormat format,

    ret = 0;
 cleanup:
-    QDECREF(cryptoopts);
+    qobject_unref(cryptoopts);
    qapi_free_QCryptoBlockOpenOptions(open_opts);
    return ret;
 }
@@ -635,7 +641,7 @@ static int coroutine_fn block_crypto_co_create_opts_luks(const char *filename,
 fail:
    bdrv_unref(bs);
    qapi_free_QCryptoBlockCreateOptions(create_opts);
-    QDECREF(cryptoopts);
+    qobject_unref(cryptoopts);
    return ret;
 }

--- a/block/dirty-bitmap.c
+++ b/block/dirty-bitmap.c
@@ -97,15 +97,6 @@ BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, const char *name)
    return NULL;
 }

-/* Called with BQL taken.  */
-void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap)
-{
-    assert(!bdrv_dirty_bitmap_frozen(bitmap));
-    g_free(bitmap->name);
-    bitmap->name = NULL;
-    bitmap->persistent = false;
-}
-
 /* Called with BQL taken.  */
 BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs,
                                          uint32_t granularity,
@@ -258,49 +249,16 @@ void bdrv_dirty_bitmap_enable_successor(BdrvDirtyBitmap *bitmap)
    qemu_mutex_unlock(bitmap->mutex);
 }

-/* Called within bdrv_dirty_bitmap_lock..unlock */
-static void bdrv_do_release_matching_dirty_bitmap_locked(
-    BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
-    bool (*cond)(BdrvDirtyBitmap *bitmap))
+/* Called within bdrv_dirty_bitmap_lock..unlock and with BQL taken.  */
+static void bdrv_release_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap)
 {
-    BdrvDirtyBitmap *bm, *next;
-
-    QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) {
-        if ((!bitmap || bm == bitmap) && (!cond || cond(bm))) {
-            assert(!bm->active_iterators);
-            assert(!bdrv_dirty_bitmap_frozen(bm));
-            assert(!bm->meta);
-            QLIST_REMOVE(bm, list);
-            hbitmap_free(bm->bitmap);
-            g_free(bm->name);
-            g_free(bm);
-
-            if (bitmap) {
-                return;
-            }
-        }
-    }
-
-    if (bitmap) {
-        abort();
-    }
-}
-
-/* Called with BQL taken.  */
-static void bdrv_do_release_matching_dirty_bitmap(
-    BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
-    bool (*cond)(BdrvDirtyBitmap *bitmap))
-{
-    bdrv_dirty_bitmaps_lock(bs);
-    bdrv_do_release_matching_dirty_bitmap_locked(bs, bitmap, cond);
-    bdrv_dirty_bitmaps_unlock(bs);
-}
-
-/* Called within bdrv_dirty_bitmap_lock..unlock */
-static void bdrv_release_dirty_bitmap_locked(BlockDriverState *bs,
-                                             BdrvDirtyBitmap *bitmap)
-{
-    bdrv_do_release_matching_dirty_bitmap_locked(bs, bitmap, NULL);
+    assert(!bitmap->active_iterators);
+    assert(!bdrv_dirty_bitmap_frozen(bitmap));
+    assert(!bitmap->meta);
+    QLIST_REMOVE(bitmap, list);
+    hbitmap_free(bitmap->bitmap);
+    g_free(bitmap->name);
+    g_free(bitmap);
 }

 /**
@@ -353,7 +311,7 @@ BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap_locked(BlockDriverState *bs,
        error_setg(errp, "Merging of parent and successor bitmap failed");
        return NULL;
    }
-    bdrv_release_dirty_bitmap_locked(bs, successor);
+    bdrv_release_dirty_bitmap_locked(successor);
    parent->successor = NULL;

    return parent;
@@ -391,15 +349,12 @@ void bdrv_dirty_bitmap_truncate(BlockDriverState *bs, int64_t bytes)
    bdrv_dirty_bitmaps_unlock(bs);
 }

-static bool bdrv_dirty_bitmap_has_name(BdrvDirtyBitmap *bitmap)
-{
-    return !!bdrv_dirty_bitmap_name(bitmap);
-}
-
 /* Called with BQL taken.  */
 void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
 {
-    bdrv_do_release_matching_dirty_bitmap(bs, bitmap, NULL);
+    bdrv_dirty_bitmaps_lock(bs);
+    bdrv_release_dirty_bitmap_locked(bitmap);
+    bdrv_dirty_bitmaps_unlock(bs);
 }

 /**
@@ -410,7 +365,15 @@ void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
 */
 void bdrv_release_named_dirty_bitmaps(BlockDriverState *bs)
 {
-    bdrv_do_release_matching_dirty_bitmap(bs, NULL, bdrv_dirty_bitmap_has_name);
+    BdrvDirtyBitmap *bm, *next;
+
+    bdrv_dirty_bitmaps_lock(bs);
+    QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) {
+        if (bdrv_dirty_bitmap_name(bm)) {
+            bdrv_release_dirty_bitmap_locked(bm);
+        }
+    }
+    bdrv_dirty_bitmaps_unlock(bs);
 }

 /**
@@ -418,11 +381,19 @@ void bdrv_release_named_dirty_bitmaps(BlockDriverState *bs)
 * bdrv_inactivate_recurse()).
 * There must not be any frozen bitmaps attached.
 * This function does not remove persistent bitmaps from the storage.
+ * Called with BQL taken.
 */
 void bdrv_release_persistent_dirty_bitmaps(BlockDriverState *bs)
 {
-    bdrv_do_release_matching_dirty_bitmap(bs, NULL,
-                                          bdrv_dirty_bitmap_get_persistance);
+    BdrvDirtyBitmap *bm, *next;
+
+    bdrv_dirty_bitmaps_lock(bs);
+    QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) {
+        if (bdrv_dirty_bitmap_get_persistance(bm)) {
+            bdrv_release_dirty_bitmap_locked(bm);
+        }
+    }
+    bdrv_dirty_bitmaps_unlock(bs);
 }

 /**
@@ -442,18 +413,20 @@ void bdrv_remove_persistent_dirty_bitmap(BlockDriverState *bs,
    }
 }

-/* Called with BQL taken.  */
 void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
 {
+    bdrv_dirty_bitmap_lock(bitmap);
    assert(!bdrv_dirty_bitmap_frozen(bitmap));
    bitmap->disabled = true;
+    bdrv_dirty_bitmap_unlock(bitmap);
 }

-/* Called with BQL taken.  */
 void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
 {
+    bdrv_dirty_bitmap_lock(bitmap);
    assert(!bdrv_dirty_bitmap_frozen(bitmap));
    bitmap->disabled = false;
+    bdrv_dirty_bitmap_unlock(bitmap);
 }

 BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs)
@@ -755,3 +728,21 @@ int64_t bdrv_dirty_bitmap_next_zero(BdrvDirtyBitmap *bitmap, uint64_t offset)
 {
    return hbitmap_next_zero(bitmap->bitmap, offset);
 }
+
+void bdrv_merge_dirty_bitmap(BdrvDirtyBitmap *dest, const BdrvDirtyBitmap *src,
+                             Error **errp)
+{
+    /* only bitmaps from one bds are supported */
+    assert(dest->mutex == src->mutex);
+
+    qemu_mutex_lock(dest->mutex);
+
+    assert(bdrv_dirty_bitmap_enabled(dest));
+    assert(!bdrv_dirty_bitmap_readonly(dest));
+
+    if (!hbitmap_merge(dest->bitmap, src->bitmap)) {
+        error_setg(errp, "Bitmaps are incompatible and can't be merged");
+    }
+
+    qemu_mutex_unlock(dest->mutex);
+}
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -59,6 +59,7 @@
 #ifdef __linux__
 #include <sys/ioctl.h>
 #include <sys/param.h>
+#include <sys/syscall.h>
 #include <linux/cdrom.h>
 #include <linux/fd.h>
 #include <linux/fs.h>
@@ -161,6 +162,7 @@ typedef struct BDRVRawState {
    bool page_cache_inconsistent:1;
    bool has_fallocate;
    bool needs_alignment;
+    bool check_cache_dropped;

    PRManager *pr_mgr;
 } BDRVRawState;
@@ -168,6 +170,7 @@ typedef struct BDRVRawState {
 typedef struct BDRVRawReopenState {
    int fd;
    int open_flags;
+    bool check_cache_dropped;
 } BDRVRawReopenState;

 static int fd_open(BlockDriverState *bs);
@@ -185,6 +188,8 @@ typedef struct RawPosixAIOData {
 #define aio_ioctl_cmd   aio_nbytes /* for QEMU_AIO_IOCTL */
    off_t aio_offset;
    int aio_type;
+    int aio_fd2;
+    off_t aio_offset2;
 } RawPosixAIOData;

 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
@@ -415,6 +420,11 @@ static QemuOptsList raw_runtime_opts = {
            .type = QEMU_OPT_STRING,
            .help = "id of persistent reservation manager object (default: none)",
        },
+        {
+            .name = "x-check-cache-dropped",
+            .type = QEMU_OPT_BOOL,
+            .help = "check that page cache was dropped on live migration (default: off)"
+        },
        { /* end of list */ }
    },
 };
@@ -500,6 +510,9 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
        }
    }

+    s->check_cache_dropped = qemu_opt_get_bool(opts, "x-check-cache-dropped",
+                                               false);
+
    s->open_flags = open_flags;
    raw_parse_flags(bdrv_flags, &s->open_flags);

@@ -630,7 +643,7 @@ typedef enum {
 * file; if @unlock == true, also unlock the unneeded bytes.
 * @shared_perm_lock_bits is the mask of all permissions that are NOT shared.
 */
-static int raw_apply_lock_bytes(BDRVRawState *s,
+static int raw_apply_lock_bytes(int fd,
                                uint64_t perm_lock_bits,
                                uint64_t shared_perm_lock_bits,
                                bool unlock, Error **errp)
@@ -641,13 +654,13 @@ static int raw_apply_lock_bytes(BDRVRawState *s,
    PERM_FOREACH(i) {
        int off = RAW_LOCK_PERM_BASE + i;
        if (perm_lock_bits & (1ULL << i)) {
-            ret = qemu_lock_fd(s->lock_fd, off, 1, false);
+            ret = qemu_lock_fd(fd, off, 1, false);
            if (ret) {
                error_setg(errp, "Failed to lock byte %d", off);
                return ret;
            }
        } else if (unlock) {
-            ret = qemu_unlock_fd(s->lock_fd, off, 1);
+            ret = qemu_unlock_fd(fd, off, 1);
            if (ret) {
                error_setg(errp, "Failed to unlock byte %d", off);
                return ret;
@@ -657,13 +670,13 @@ static int raw_apply_lock_bytes(BDRVRawState *s,
    PERM_FOREACH(i) {
        int off = RAW_LOCK_SHARED_BASE + i;
        if (shared_perm_lock_bits & (1ULL << i)) {
-            ret = qemu_lock_fd(s->lock_fd, off, 1, false);
+            ret = qemu_lock_fd(fd, off, 1, false);
            if (ret) {
                error_setg(errp, "Failed to lock byte %d", off);
                return ret;
            }
        } else if (unlock) {
-            ret = qemu_unlock_fd(s->lock_fd, off, 1);
+            ret = qemu_unlock_fd(fd, off, 1);
            if (ret) {
                error_setg(errp, "Failed to unlock byte %d", off);
                return ret;
@@ -674,8 +687,7 @@ static int raw_apply_lock_bytes(BDRVRawState *s,
 }

 /* Check "unshared" bytes implied by @perm and ~@shared_perm in the file. */
-static int raw_check_lock_bytes(BDRVRawState *s,
-                                uint64_t perm, uint64_t shared_perm,
+static int raw_check_lock_bytes(int fd, uint64_t perm, uint64_t shared_perm,
                                Error **errp)
 {
    int ret;
@@ -685,7 +697,7 @@ static int raw_check_lock_bytes(BDRVRawState *s,
        int off = RAW_LOCK_SHARED_BASE + i;
        uint64_t p = 1ULL << i;
        if (perm & p) {
-            ret = qemu_lock_fd_test(s->lock_fd, off, 1, true);
+            ret = qemu_lock_fd_test(fd, off, 1, true);
            if (ret) {
                char *perm_name = bdrv_perm_names(p);
                error_setg(errp,
@@ -702,7 +714,7 @@ static int raw_check_lock_bytes(BDRVRawState *s,
        int off = RAW_LOCK_PERM_BASE + i;
        uint64_t p = 1ULL << i;
        if (!(shared_perm & p)) {
-            ret = qemu_lock_fd_test(s->lock_fd, off, 1, true);
+            ret = qemu_lock_fd_test(fd, off, 1, true);
            if (ret) {
                char *perm_name = bdrv_perm_names(p);
                error_setg(errp,
@@ -739,11 +751,11 @@ static int raw_handle_perm_lock(BlockDriverState *bs,

    switch (op) {
    case RAW_PL_PREPARE:
-        ret = raw_apply_lock_bytes(s, s->perm | new_perm,
+        ret = raw_apply_lock_bytes(s->lock_fd, s->perm | new_perm,
                                   ~s->shared_perm | ~new_shared,
                                   false, errp);
        if (!ret) {
-            ret = raw_check_lock_bytes(s, new_perm, new_shared, errp);
+            ret = raw_check_lock_bytes(s->lock_fd, new_perm, new_shared, errp);
            if (!ret) {
                return 0;
            }
@@ -751,7 +763,8 @@ static int raw_handle_perm_lock(BlockDriverState *bs,
        op = RAW_PL_ABORT;
        /* fall through to unlock bytes. */
    case RAW_PL_ABORT:
-        raw_apply_lock_bytes(s, s->perm, ~s->shared_perm, true, &local_err);
+        raw_apply_lock_bytes(s->lock_fd, s->perm, ~s->shared_perm,
+                             true, &local_err);
        if (local_err) {
            /* Theoretically the above call only unlocks bytes and it cannot
             * fail. Something weird happened, report it.
@@ -760,7 +773,8 @@ static int raw_handle_perm_lock(BlockDriverState *bs,
        }
        break;
    case RAW_PL_COMMIT:
-        raw_apply_lock_bytes(s, new_perm, ~new_shared, true, &local_err);
+        raw_apply_lock_bytes(s->lock_fd, new_perm, ~new_shared,
+                             true, &local_err);
        if (local_err) {
            /* Theoretically the above call only unlocks bytes and it cannot
             * fail. Something weird happened, report it.
@@ -777,6 +791,7 @@ static int raw_reopen_prepare(BDRVReopenState *state,
 {
    BDRVRawState *s;
    BDRVRawReopenState *rs;
+    QemuOpts *opts;
    int ret = 0;
    Error *local_err = NULL;

@@ -787,6 +802,19 @@ static int raw_reopen_prepare(BDRVReopenState *state,

    state->opaque = g_new0(BDRVRawReopenState, 1);
    rs = state->opaque;
+    rs->fd = -1;
+
+    /* Handle options changes */
+    opts = qemu_opts_create(&raw_runtime_opts, NULL, 0, &error_abort);
+    qemu_opts_absorb_qdict(opts, state->options, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        ret = -EINVAL;
+        goto out;
+    }
+
+    rs->check_cache_dropped = qemu_opt_get_bool(opts, "x-check-cache-dropped",
+                                                s->check_cache_dropped);

    if (s->type == FTYPE_CD) {
        rs->open_flags |= O_NONBLOCK;
@@ -794,8 +822,6 @@ static int raw_reopen_prepare(BDRVReopenState *state,

    raw_parse_flags(state->flags, &rs->open_flags);

-    rs->fd = -1;
-
    int fcntl_flags = O_APPEND | O_NONBLOCK;
 #ifdef O_NOATIME
    fcntl_flags |= O_NOATIME;
@@ -850,6 +876,8 @@ static int raw_reopen_prepare(BDRVReopenState *state,
        }
    }

+out:
+    qemu_opts_del(opts);
    return ret;
 }

@@ -858,6 +886,7 @@ static void raw_reopen_commit(BDRVReopenState *state)
    BDRVRawReopenState *rs = state->opaque;
    BDRVRawState *s = state->bs->opaque;

+    s->check_cache_dropped = rs->check_cache_dropped;
    s->open_flags = rs->open_flags;

    qemu_close(s->fd);
@@ -1421,6 +1450,49 @@ static ssize_t handle_aiocb_write_zeroes(RawPosixAIOData *aiocb)
    return -ENOTSUP;
 }

+#ifndef HAVE_COPY_FILE_RANGE
+static off_t copy_file_range(int in_fd, off_t *in_off, int out_fd,
+                             off_t *out_off, size_t len, unsigned int flags)
+{
+#ifdef __NR_copy_file_range
+    return syscall(__NR_copy_file_range, in_fd, in_off, out_fd,
+                   out_off, len, flags);
+#else
+    errno = ENOSYS;
+    return -1;
+#endif
+}
+#endif
+
+static ssize_t handle_aiocb_copy_range(RawPosixAIOData *aiocb)
+{
+    uint64_t bytes = aiocb->aio_nbytes;
+    off_t in_off = aiocb->aio_offset;
+    off_t out_off = aiocb->aio_offset2;
+
+    while (bytes) {
+        ssize_t ret = copy_file_range(aiocb->aio_fildes, &in_off,
+                                      aiocb->aio_fd2, &out_off,
+                                      bytes, 0);
+        if (ret == -EINTR) {
+            continue;
+        }
+        if (ret < 0) {
+            if (errno == ENOSYS) {
+                return -ENOTSUP;
+            } else {
+                return -errno;
+            }
+        }
+        if (!ret) {
+            /* No progress (e.g. when beyond EOF), fall back to buffer I/O. */
+            return -ENOTSUP;
+        }
+        bytes -= ret;
+    }
+    return 0;
+}
+
 static ssize_t handle_aiocb_discard(RawPosixAIOData *aiocb)
 {
    int ret = -EOPNOTSUPP;
@@ -1501,6 +1573,9 @@ static int aio_worker(void *arg)
    case QEMU_AIO_WRITE_ZEROES:
        ret = handle_aiocb_write_zeroes(aiocb);
        break;
+    case QEMU_AIO_COPY_RANGE:
+        ret = handle_aiocb_copy_range(aiocb);
+        break;
    default:
        fprintf(stderr, "invalid aio request (0x%x)\n", aiocb->aio_type);
        ret = -EINVAL;
@@ -1511,9 +1586,10 @@ static int aio_worker(void *arg)
    return ret;
 }

-static int paio_submit_co(BlockDriverState *bs, int fd,
-                          int64_t offset, QEMUIOVector *qiov,
-                          int bytes, int type)
+static int paio_submit_co_full(BlockDriverState *bs, int fd,
+                               int64_t offset, int fd2, int64_t offset2,
+                               QEMUIOVector *qiov,
+                               int bytes, int type)
 {
    RawPosixAIOData *acb = g_new(RawPosixAIOData, 1);
    ThreadPool *pool;
@@ -1521,6 +1597,8 @@ static int paio_submit_co(BlockDriverState *bs, int fd,
    acb->bs = bs;
    acb->aio_type = type;
    acb->aio_fildes = fd;
+    acb->aio_fd2 = fd2;
+    acb->aio_offset2 = offset2;

    acb->aio_nbytes = bytes;
    acb->aio_offset = offset;
@@ -1536,6 +1614,13 @@ static int paio_submit_co(BlockDriverState *bs, int fd,
    return thread_pool_submit_co(pool, aio_worker, acb);
 }

+static inline int paio_submit_co(BlockDriverState *bs, int fd,
+                                 int64_t offset, QEMUIOVector *qiov,
+                                 int bytes, int type)
+{
+    return paio_submit_co_full(bs, fd, offset, -1, 0, qiov, bytes, type);
+}
+
 static BlockAIOCB *paio_submit(BlockDriverState *bs, int fd,
        int64_t offset, QEMUIOVector *qiov, int bytes,
        BlockCompletionFunc *cb, void *opaque, int type)
@@ -1991,6 +2076,7 @@ static int raw_co_create(BlockdevCreateOptions *options, Error **errp)
 {
    BlockdevCreateOptionsFile *file_opts;
    int fd;
+    int perm, shared;
    int result = 0;

    /* Validate options and set default values */
@@ -2005,14 +2091,44 @@ static int raw_co_create(BlockdevCreateOptions *options, Error **errp)
    }

    /* Create file */
-    fd = qemu_open(file_opts->filename, O_RDWR | O_CREAT | O_TRUNC | O_BINARY,
-                   0644);
+    fd = qemu_open(file_opts->filename, O_RDWR | O_CREAT | O_BINARY, 0644);
    if (fd < 0) {
        result = -errno;
        error_setg_errno(errp, -result, "Could not create file");
        goto out;
    }

+    /* Take permissions: We want to discard everything, so we need
+     * BLK_PERM_WRITE; and truncation to the desired size requires
+     * BLK_PERM_RESIZE.
+     * On the other hand, we cannot share the RESIZE permission
+     * because we promise that after this function, the file has the
+     * size given in the options.  If someone else were to resize it
+     * concurrently, we could not guarantee that.
+     * Note that after this function, we can no longer guarantee that
+     * the file is not touched by a third party, so it may be resized
+     * then. */
+    perm = BLK_PERM_WRITE | BLK_PERM_RESIZE;
+    shared = BLK_PERM_ALL & ~BLK_PERM_RESIZE;
+
+    /* Step one: Take locks */
+    result = raw_apply_lock_bytes(fd, perm, shared, false, errp);
+    if (result < 0) {
+        goto out_close;
+    }
+
+    /* Step two: Check that nobody else has taken conflicting locks */
+    result = raw_check_lock_bytes(fd, perm, shared, errp);
+    if (result < 0) {
+        goto out_close;
+    }
+
+    /* Clear the file by truncating it to 0 */
+    result = raw_regular_truncate(fd, 0, PREALLOC_MODE_OFF, errp);
+    if (result < 0) {
+        goto out_close;
+    }
+
    if (file_opts->nocow) {
 #ifdef __linux__
        /* Set NOCOW flag to solve performance issue on fs like btrfs.
@@ -2028,6 +2144,8 @@ static int raw_co_create(BlockdevCreateOptions *options, Error **errp)
 #endif
    }

+    /* Resize and potentially preallocate the file to the desired
+     * final size */
    result = raw_regular_truncate(fd, file_opts->size, file_opts->preallocation,
                                  errp);
    if (result < 0) {
@@ -2236,6 +2354,120 @@ static int coroutine_fn raw_co_block_status(BlockDriverState *bs,
    return ret | BDRV_BLOCK_OFFSET_VALID;
 }

+#if defined(__linux__)
+/* Verify that the file is not in the page cache */
+static void check_cache_dropped(BlockDriverState *bs, Error **errp)
+{
+    const size_t window_size = 128 * 1024 * 1024;
+    BDRVRawState *s = bs->opaque;
+    void *window = NULL;
+    size_t length = 0;
+    unsigned char *vec;
+    size_t page_size;
+    off_t offset;
+    off_t end;
+
+    /* mincore(2) page status information requires 1 byte per page */
+    page_size = sysconf(_SC_PAGESIZE);
+    vec = g_malloc(DIV_ROUND_UP(window_size, page_size));
+
+    end = raw_getlength(bs);
+
+    for (offset = 0; offset < end; offset += window_size) {
+        void *new_window;
+        size_t new_length;
+        size_t vec_end;
+        size_t i;
+        int ret;
+
+        /* Unmap previous window if size has changed */
+        new_length = MIN(end - offset, window_size);
+        if (new_length != length) {
+            munmap(window, length);
+            window = NULL;
+            length = 0;
+        }
+
+        new_window = mmap(window, new_length, PROT_NONE, MAP_PRIVATE,
+                          s->fd, offset);
+        if (new_window == MAP_FAILED) {
+            error_setg_errno(errp, errno, "mmap failed");
+            break;
+        }
+
+        window = new_window;
+        length = new_length;
+
+        ret = mincore(window, length, vec);
+        if (ret < 0) {
+            error_setg_errno(errp, errno, "mincore failed");
+            break;
+        }
+
+        vec_end = DIV_ROUND_UP(length, page_size);
+        for (i = 0; i < vec_end; i++) {
+            if (vec[i] & 0x1) {
+                error_setg(errp, "page cache still in use!");
+                break;
+            }
+        }
+    }
+
+    if (window) {
+        munmap(window, length);
+    }
+
+    g_free(vec);
+}
+#endif /* __linux__ */
+
+static void coroutine_fn raw_co_invalidate_cache(BlockDriverState *bs,
+                                                 Error **errp)
+{
+    BDRVRawState *s = bs->opaque;
+    int ret;
+
+    ret = fd_open(bs);
+    if (ret < 0) {
+        error_setg_errno(errp, -ret, "The file descriptor is not open");
+        return;
+    }
+
+    if (s->open_flags & O_DIRECT) {
+        return; /* No host kernel page cache */
+    }
+
+#if defined(__linux__)
+    /* This sets the scene for the next syscall... */
+    ret = bdrv_co_flush(bs);
+    if (ret < 0) {
+        error_setg_errno(errp, -ret, "flush failed");
+        return;
+    }
+
+    /* Linux does not invalidate pages that are dirty, locked, or mmapped by a
+     * process.  These limitations are okay because we just fsynced the file,
+     * we don't use mmap, and the file should not be in use by other processes.
+     */
+    ret = posix_fadvise(s->fd, 0, 0, POSIX_FADV_DONTNEED);
+    if (ret != 0) { /* the return value is a positive errno */
+        error_setg_errno(errp, ret, "fadvise failed");
+        return;
+    }
+
+    if (s->check_cache_dropped) {
+        check_cache_dropped(bs, errp);
+    }
+#else /* __linux__ */
+    /* Do nothing.  Live migration to a remote host with cache.direct=off is
+     * unsupported on other host operating systems.  Cache consistency issues
+     * may occur but no error is reported here, partly because that's the
+     * historical behavior and partly because it's hard to differentiate valid
+     * configurations that should not cause errors.
+     */
+#endif /* !__linux__ */
+}
+
 static coroutine_fn BlockAIOCB *raw_aio_pdiscard(BlockDriverState *bs,
    int64_t offset, int bytes,
    BlockCompletionFunc *cb, void *opaque)
@@ -2312,6 +2544,35 @@ static void raw_abort_perm_update(BlockDriverState *bs)
    raw_handle_perm_lock(bs, RAW_PL_ABORT, 0, 0, NULL);
 }

+static int coroutine_fn raw_co_copy_range_from(BlockDriverState *bs,
+                                               BdrvChild *src, uint64_t src_offset,
+                                               BdrvChild *dst, uint64_t dst_offset,
+                                               uint64_t bytes, BdrvRequestFlags flags)
+{
+    return bdrv_co_copy_range_to(src, src_offset, dst, dst_offset, bytes, flags);
+}
+
+static int coroutine_fn raw_co_copy_range_to(BlockDriverState *bs,
+                                             BdrvChild *src, uint64_t src_offset,
+                                             BdrvChild *dst, uint64_t dst_offset,
+                                             uint64_t bytes, BdrvRequestFlags flags)
+{
+    BDRVRawState *s = bs->opaque;
+    BDRVRawState *src_s;
+
+    assert(dst->bs == bs);
+    if (src->bs->drv->bdrv_co_copy_range_to != raw_co_copy_range_to) {
+        return -ENOTSUP;
+    }
+
+    src_s = src->bs->opaque;
+    if (fd_open(bs) < 0 || fd_open(bs) < 0) {
+        return -EIO;
+    }
+    return paio_submit_co_full(bs, src_s->fd, src_offset, s->fd, dst_offset,
+                               NULL, bytes, QEMU_AIO_COPY_RANGE);
+}
+
 BlockDriver bdrv_file = {
    .format_name = "file",
    .protocol_name = "file",
@@ -2328,12 +2589,15 @@ BlockDriver bdrv_file = {
    .bdrv_co_create_opts = raw_co_create_opts,
    .bdrv_has_zero_init = bdrv_has_zero_init_1,
    .bdrv_co_block_status = raw_co_block_status,
+    .bdrv_co_invalidate_cache = raw_co_invalidate_cache,
    .bdrv_co_pwrite_zeroes = raw_co_pwrite_zeroes,

    .bdrv_co_preadv         = raw_co_preadv,
    .bdrv_co_pwritev        = raw_co_pwritev,
    .bdrv_aio_flush = raw_aio_flush,
    .bdrv_aio_pdiscard = raw_aio_pdiscard,
+    .bdrv_co_copy_range_from = raw_co_copy_range_from,
+    .bdrv_co_copy_range_to  = raw_co_copy_range_to,
    .bdrv_refresh_limits = raw_refresh_limits,
    .bdrv_io_plug = raw_aio_plug,
    .bdrv_io_unplug = raw_aio_unplug,
@@ -2805,12 +3069,15 @@ static BlockDriver bdrv_host_device = {
    .bdrv_reopen_abort   = raw_reopen_abort,
    .bdrv_co_create_opts = hdev_co_create_opts,
    .create_opts         = &raw_create_opts,
+    .bdrv_co_invalidate_cache = raw_co_invalidate_cache,
    .bdrv_co_pwrite_zeroes = hdev_co_pwrite_zeroes,

    .bdrv_co_preadv         = raw_co_preadv,
    .bdrv_co_pwritev        = raw_co_pwritev,
    .bdrv_aio_flush	= raw_aio_flush,
    .bdrv_aio_pdiscard   = hdev_aio_pdiscard,
+    .bdrv_co_copy_range_from = raw_co_copy_range_from,
+    .bdrv_co_copy_range_to  = raw_co_copy_range_to,
    .bdrv_refresh_limits = raw_refresh_limits,
    .bdrv_io_plug = raw_aio_plug,
    .bdrv_io_unplug = raw_aio_unplug,
@@ -2927,6 +3194,7 @@ static BlockDriver bdrv_host_cdrom = {
    .bdrv_reopen_abort   = raw_reopen_abort,
    .bdrv_co_create_opts = hdev_co_create_opts,
    .create_opts         = &raw_create_opts,
+    .bdrv_co_invalidate_cache = raw_co_invalidate_cache,


    .bdrv_co_preadv         = raw_co_preadv,
--- a/block/file-win32.c
+++ b/block/file-win32.c
@@ -251,7 +251,11 @@ static void raw_probe_alignment(BlockDriverState *bs, Error **errp)
                         &dg.Geometry.BytesPerSector,
                         &freeClusters, &totalClusters);
        bs->bl.request_alignment = dg.Geometry.BytesPerSector;
+        return;
    }
+
+    /* XXX Does Windows support AIO on less than 512-byte alignment? */
+    bs->bl.request_alignment = 512;
 }

 static void raw_parse_flags(int flags, bool use_aio, int *access_flags,
@@ -410,32 +414,32 @@ fail:
    return ret;
 }

-static BlockAIOCB *raw_aio_readv(BlockDriverState *bs,
-                         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-                         BlockCompletionFunc *cb, void *opaque)
+static BlockAIOCB *raw_aio_preadv(BlockDriverState *bs,
+                                  uint64_t offset, uint64_t bytes,
+                                  QEMUIOVector *qiov, int flags,
+                                  BlockCompletionFunc *cb, void *opaque)
 {
    BDRVRawState *s = bs->opaque;
    if (s->aio) {
-        return win32_aio_submit(bs, s->aio, s->hfile, sector_num, qiov,
-                                nb_sectors, cb, opaque, QEMU_AIO_READ);
+        return win32_aio_submit(bs, s->aio, s->hfile, offset, bytes, qiov,
+                                cb, opaque, QEMU_AIO_READ);
    } else {
-        return paio_submit(bs, s->hfile, sector_num << BDRV_SECTOR_BITS, qiov,
-                           nb_sectors << BDRV_SECTOR_BITS,
+        return paio_submit(bs, s->hfile, offset, qiov, bytes,
                           cb, opaque, QEMU_AIO_READ);
    }
 }

-static BlockAIOCB *raw_aio_writev(BlockDriverState *bs,
-                          int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-                          BlockCompletionFunc *cb, void *opaque)
+static BlockAIOCB *raw_aio_pwritev(BlockDriverState *bs,
+                                   uint64_t offset, uint64_t bytes,
+                                   QEMUIOVector *qiov, int flags,
+                                   BlockCompletionFunc *cb, void *opaque)
 {
    BDRVRawState *s = bs->opaque;
    if (s->aio) {
-        return win32_aio_submit(bs, s->aio, s->hfile, sector_num, qiov,
-                                nb_sectors, cb, opaque, QEMU_AIO_WRITE);
+        return win32_aio_submit(bs, s->aio, s->hfile, offset, bytes, qiov,
+                                cb, opaque, QEMU_AIO_WRITE);
    } else {
-        return paio_submit(bs, s->hfile, sector_num << BDRV_SECTOR_BITS, qiov,
-                           nb_sectors << BDRV_SECTOR_BITS,
+        return paio_submit(bs, s->hfile, offset, qiov, bytes,
                           cb, opaque, QEMU_AIO_WRITE);
    }
 }
@@ -632,8 +636,8 @@ BlockDriver bdrv_file = {
    .bdrv_co_create_opts = raw_co_create_opts,
    .bdrv_has_zero_init = bdrv_has_zero_init_1,

-    .bdrv_aio_readv     = raw_aio_readv,
-    .bdrv_aio_writev    = raw_aio_writev,
+    .bdrv_aio_preadv    = raw_aio_preadv,
+    .bdrv_aio_pwritev   = raw_aio_pwritev,
    .bdrv_aio_flush     = raw_aio_flush,

    .bdrv_truncate	= raw_truncate,
@@ -708,6 +712,12 @@ static void hdev_parse_filename(const char *filename, QDict *options,
    bdrv_parse_filename_strip_prefix(filename, "host_device:", options);
 }

+static void hdev_refresh_limits(BlockDriverState *bs, Error **errp)
+{
+    /* XXX Does Windows support AIO on less than 512-byte alignment? */
+    bs->bl.request_alignment = 512;
+}
+
 static int hdev_open(BlockDriverState *bs, QDict *options, int flags,
                     Error **errp)
 {
@@ -793,9 +803,10 @@ static BlockDriver bdrv_host_device = {
    .bdrv_probe_device	= hdev_probe_device,
    .bdrv_file_open	= hdev_open,
    .bdrv_close		= raw_close,
+    .bdrv_refresh_limits = hdev_refresh_limits,

-    .bdrv_aio_readv     = raw_aio_readv,
-    .bdrv_aio_writev    = raw_aio_writev,
+    .bdrv_aio_preadv    = raw_aio_preadv,
+    .bdrv_aio_pwritev   = raw_aio_pwritev,
    .bdrv_aio_flush     = raw_aio_flush,

    .bdrv_detach_aio_context = raw_detach_aio_context,
--- a/block/gluster.c
+++ b/block/gluster.c
@@ -11,6 +11,7 @@
 #include "qemu/osdep.h"
 #include <glusterfs/api/glfs.h>
 #include "block/block_int.h"
+#include "block/qdict.h"
 #include "qapi/error.h"
 #include "qapi/qmp/qdict.h"
 #include "qapi/qmp/qerror.h"
@@ -650,7 +651,7 @@ static int qemu_gluster_parse_json(BlockdevOptionsGluster *gconf,
        }
        gsconf = NULL;

-        QDECREF(backing_options);
+        qobject_unref(backing_options);
        backing_options = NULL;
        g_free(str);
        str = NULL;
@@ -663,7 +664,7 @@ out:
    qapi_free_SocketAddress(gsconf);
    qemu_opts_del(opts);
    g_free(str);
-    QDECREF(backing_options);
+    qobject_unref(backing_options);
    errno = EINVAL;
    return -errno;
 }
@@ -1194,8 +1195,10 @@ static coroutine_fn int qemu_gluster_co_readv(BlockDriverState *bs,
 static coroutine_fn int qemu_gluster_co_writev(BlockDriverState *bs,
                                               int64_t sector_num,
                                               int nb_sectors,
-                                               QEMUIOVector *qiov)
+                                               QEMUIOVector *qiov,
+                                               int flags)
 {
+    assert(!flags);
    return qemu_gluster_co_rw(bs, sector_num, nb_sectors, qiov, 1);
 }

--- a/block/io.c
+++ b/block/io.c
@@ -92,7 +92,8 @@ void bdrv_refresh_limits(BlockDriverState *bs, Error **errp)
    }

    /* Default alignment based on whether driver has byte interface */
-    bs->bl.request_alignment = drv->bdrv_co_preadv ? 1 : 512;
+    bs->bl.request_alignment = (drv->bdrv_co_preadv ||
+                                drv->bdrv_aio_preadv) ? 1 : 512;

    /* Take some limits from the children as a default */
    if (bs->file) {
@@ -924,23 +925,14 @@ static int coroutine_fn bdrv_driver_preadv(BlockDriverState *bs,
        return drv->bdrv_co_preadv(bs, offset, bytes, qiov, flags);
    }

-    sector_num = offset >> BDRV_SECTOR_BITS;
-    nb_sectors = bytes >> BDRV_SECTOR_BITS;
-
-    assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
-    assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
-    assert((bytes >> BDRV_SECTOR_BITS) <= BDRV_REQUEST_MAX_SECTORS);
-
-    if (drv->bdrv_co_readv) {
-        return drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
-    } else {
+    if (drv->bdrv_aio_preadv) {
        BlockAIOCB *acb;
        CoroutineIOCompletion co = {
            .coroutine = qemu_coroutine_self(),
        };

-        acb = bs->drv->bdrv_aio_readv(bs, sector_num, qiov, nb_sectors,
-                                      bdrv_co_io_em_complete, &co);
+        acb = drv->bdrv_aio_preadv(bs, offset, bytes, qiov, flags,
+                                   bdrv_co_io_em_complete, &co);
        if (acb == NULL) {
            return -EIO;
        } else {
@@ -948,6 +940,16 @@ static int coroutine_fn bdrv_driver_preadv(BlockDriverState *bs,
            return co.ret;
        }
    }
+
+    sector_num = offset >> BDRV_SECTOR_BITS;
+    nb_sectors = bytes >> BDRV_SECTOR_BITS;
+
+    assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
+    assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
+    assert((bytes >> BDRV_SECTOR_BITS) <= BDRV_REQUEST_MAX_SECTORS);
+    assert(drv->bdrv_co_readv);
+
+    return drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
 }

 static int coroutine_fn bdrv_driver_pwritev(BlockDriverState *bs,
@@ -972,6 +974,25 @@ static int coroutine_fn bdrv_driver_pwritev(BlockDriverState *bs,
        goto emulate_flags;
    }

+    if (drv->bdrv_aio_pwritev) {
+        BlockAIOCB *acb;
+        CoroutineIOCompletion co = {
+            .coroutine = qemu_coroutine_self(),
+        };
+
+        acb = drv->bdrv_aio_pwritev(bs, offset, bytes, qiov,
+                                    flags & bs->supported_write_flags,
+                                    bdrv_co_io_em_complete, &co);
+        flags &= ~bs->supported_write_flags;
+        if (acb == NULL) {
+            ret = -EIO;
+        } else {
+            qemu_coroutine_yield();
+            ret = co.ret;
+        }
+        goto emulate_flags;
+    }
+
    sector_num = offset >> BDRV_SECTOR_BITS;
    nb_sectors = bytes >> BDRV_SECTOR_BITS;

@@ -979,28 +1000,10 @@ static int coroutine_fn bdrv_driver_pwritev(BlockDriverState *bs,
    assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
    assert((bytes >> BDRV_SECTOR_BITS) <= BDRV_REQUEST_MAX_SECTORS);

-    if (drv->bdrv_co_writev_flags) {
-        ret = drv->bdrv_co_writev_flags(bs, sector_num, nb_sectors, qiov,
-                                        flags & bs->supported_write_flags);
-        flags &= ~bs->supported_write_flags;
-    } else if (drv->bdrv_co_writev) {
-        assert(!bs->supported_write_flags);
-        ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
-    } else {
-        BlockAIOCB *acb;
-        CoroutineIOCompletion co = {
-            .coroutine = qemu_coroutine_self(),
-        };
-
-        acb = bs->drv->bdrv_aio_writev(bs, sector_num, qiov, nb_sectors,
-                                       bdrv_co_io_em_complete, &co);
-        if (acb == NULL) {
-            ret = -EIO;
-        } else {
-            qemu_coroutine_yield();
-            ret = co.ret;
-        }
-    }
+    assert(drv->bdrv_co_writev);
+    ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov,
+                              flags & bs->supported_write_flags);
+    flags &= ~bs->supported_write_flags;

 emulate_flags:
    if (ret == 0 && (flags & BDRV_REQ_FUA)) {
@@ -1115,13 +1118,15 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child,
                /* FIXME: Should we (perhaps conditionally) be setting
                 * BDRV_REQ_MAY_UNMAP, if it will allow for a sparser copy
                 * that still correctly reads as zero? */
-                ret = bdrv_co_do_pwrite_zeroes(bs, cluster_offset, pnum, 0);
+                ret = bdrv_co_do_pwrite_zeroes(bs, cluster_offset, pnum,
+                                               BDRV_REQ_WRITE_UNCHANGED);
            } else {
                /* This does not change the data on the disk, it is not
                 * necessary to flush even in cache=writethrough mode.
                 */
                ret = bdrv_driver_pwritev(bs, cluster_offset, pnum,
-                                          &local_qiov, 0);
+                                          &local_qiov,
+                                          BDRV_REQ_WRITE_UNCHANGED);
            }

            if (ret < 0) {
@@ -1501,7 +1506,11 @@ static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child,
    assert(!waited || !req->serialising);
    assert(req->overlap_offset <= offset);
    assert(offset + bytes <= req->overlap_offset + req->overlap_bytes);
-    assert(child->perm & BLK_PERM_WRITE);
+    if (flags & BDRV_REQ_WRITE_UNCHANGED) {
+        assert(child->perm & (BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE));
+    } else {
+        assert(child->perm & BLK_PERM_WRITE);
+    }
    assert(end_sector <= bs->total_sectors || child->perm & BLK_PERM_RESIZE);

    ret = notifier_with_return_list_notify(&bs->before_write_notifiers, req);
@@ -2826,3 +2835,100 @@ void bdrv_unregister_buf(BlockDriverState *bs, void *host)
        bdrv_unregister_buf(child->bs, host);
    }
 }
+
+static int coroutine_fn bdrv_co_copy_range_internal(BdrvChild *src,
+                                                    uint64_t src_offset,
+                                                    BdrvChild *dst,
+                                                    uint64_t dst_offset,
+                                                    uint64_t bytes,
+                                                    BdrvRequestFlags flags,
+                                                    bool recurse_src)
+{
+    int ret;
+
+    if (!src || !dst || !src->bs || !dst->bs) {
+        return -ENOMEDIUM;
+    }
+    ret = bdrv_check_byte_request(src->bs, src_offset, bytes);
+    if (ret) {
+        return ret;
+    }
+
+    ret = bdrv_check_byte_request(dst->bs, dst_offset, bytes);
+    if (ret) {
+        return ret;
+    }
+    if (flags & BDRV_REQ_ZERO_WRITE) {
+        return bdrv_co_pwrite_zeroes(dst, dst_offset, bytes, flags);
+    }
+
+    if (!src->bs->drv->bdrv_co_copy_range_from
+        || !dst->bs->drv->bdrv_co_copy_range_to
+        || src->bs->encrypted || dst->bs->encrypted) {
+        return -ENOTSUP;
+    }
+    if (recurse_src) {
+        return src->bs->drv->bdrv_co_copy_range_from(src->bs,
+                                                     src, src_offset,
+                                                     dst, dst_offset,
+                                                     bytes, flags);
+    } else {
+        return dst->bs->drv->bdrv_co_copy_range_to(dst->bs,
+                                                   src, src_offset,
+                                                   dst, dst_offset,
+                                                   bytes, flags);
+    }
+}
+
+/* Copy range from @src to @dst.
+ *
+ * See the comment of bdrv_co_copy_range for the parameter and return value
+ * semantics. */
+int coroutine_fn bdrv_co_copy_range_from(BdrvChild *src, uint64_t src_offset,
+                                         BdrvChild *dst, uint64_t dst_offset,
+                                         uint64_t bytes, BdrvRequestFlags flags)
+{
+    return bdrv_co_copy_range_internal(src, src_offset, dst, dst_offset,
+                                       bytes, flags, true);
+}
+
+/* Copy range from @src to @dst.
+ *
+ * See the comment of bdrv_co_copy_range for the parameter and return value
+ * semantics. */
+int coroutine_fn bdrv_co_copy_range_to(BdrvChild *src, uint64_t src_offset,
+                                       BdrvChild *dst, uint64_t dst_offset,
+                                       uint64_t bytes, BdrvRequestFlags flags)
+{
+    return bdrv_co_copy_range_internal(src, src_offset, dst, dst_offset,
+                                       bytes, flags, false);
+}
+
+int coroutine_fn bdrv_co_copy_range(BdrvChild *src, uint64_t src_offset,
+                                    BdrvChild *dst, uint64_t dst_offset,
+                                    uint64_t bytes, BdrvRequestFlags flags)
+{
+    BdrvTrackedRequest src_req, dst_req;
+    BlockDriverState *src_bs = src->bs;
+    BlockDriverState *dst_bs = dst->bs;
+    int ret;
+
+    bdrv_inc_in_flight(src_bs);
+    bdrv_inc_in_flight(dst_bs);
+    tracked_request_begin(&src_req, src_bs, src_offset,
+                          bytes, BDRV_TRACKED_READ);
+    tracked_request_begin(&dst_req, dst_bs, dst_offset,
+                          bytes, BDRV_TRACKED_WRITE);
+
+    wait_serialising_requests(&src_req);
+    wait_serialising_requests(&dst_req);
+    ret = bdrv_co_copy_range_from(src, src_offset,
+                                  dst, dst_offset,
+                                  bytes, flags);
+
+    tracked_request_end(&src_req);
+    tracked_request_end(&dst_req);
+    bdrv_dec_in_flight(src_bs);
+    bdrv_dec_in_flight(dst_bs);
+    return ret;
+}
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -33,6 +33,7 @@
 #include "qemu/bitops.h"
 #include "qemu/bitmap.h"
 #include "block/block_int.h"
+#include "block/qdict.h"
 #include "scsi/constants.h"
 #include "qemu/iov.h"
 #include "qemu/option.h"
@@ -68,6 +69,7 @@ typedef struct IscsiLun {
    QemuMutex mutex;
    struct scsi_inquiry_logical_block_provisioning lbp;
    struct scsi_inquiry_block_limits bl;
+    struct scsi_inquiry_device_designator *dd;
    unsigned char *zeroblock;
    /* The allocmap tracks which clusters (pages) on the iSCSI target are
     * allocated and which are not. In case a target returns zeros for
@@ -555,9 +557,20 @@ static inline bool iscsi_allocmap_is_valid(IscsiLun *iscsilun,
                               offset / iscsilun->cluster_size) == size);
 }

+static void coroutine_fn iscsi_co_wait_for_task(IscsiTask *iTask,
+                                                IscsiLun *iscsilun)
+{
+    while (!iTask->complete) {
+        iscsi_set_events(iscsilun);
+        qemu_mutex_unlock(&iscsilun->mutex);
+        qemu_coroutine_yield();
+        qemu_mutex_lock(&iscsilun->mutex);
+    }
+}
+
 static int coroutine_fn
-iscsi_co_writev_flags(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
-                      QEMUIOVector *iov, int flags)
+iscsi_co_writev(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
+                QEMUIOVector *iov, int flags)
 {
    IscsiLun *iscsilun = bs->opaque;
    struct IscsiTask iTask;
@@ -616,12 +629,7 @@ retry:
    scsi_task_set_iov_out(iTask.task, (struct scsi_iovec *) iov->iov,
                          iov->niov);
 #endif
-    while (!iTask.complete) {
-        iscsi_set_events(iscsilun);
-        qemu_mutex_unlock(&iscsilun->mutex);
-        qemu_coroutine_yield();
-        qemu_mutex_lock(&iscsilun->mutex);
-    }
+    iscsi_co_wait_for_task(&iTask, iscsilun);

    if (iTask.task != NULL) {
        scsi_free_scsi_task(iTask.task);
@@ -692,13 +700,7 @@ retry:
        ret = -ENOMEM;
        goto out_unlock;
    }
-
-    while (!iTask.complete) {
-        iscsi_set_events(iscsilun);
-        qemu_mutex_unlock(&iscsilun->mutex);
-        qemu_coroutine_yield();
-        qemu_mutex_lock(&iscsilun->mutex);
-    }
+    iscsi_co_wait_for_task(&iTask, iscsilun);

    if (iTask.do_retry) {
        if (iTask.task != NULL) {
@@ -862,13 +864,8 @@ retry:
 #if LIBISCSI_API_VERSION < (20160603)
    scsi_task_set_iov_in(iTask.task, (struct scsi_iovec *) iov->iov, iov->niov);
 #endif
-    while (!iTask.complete) {
-        iscsi_set_events(iscsilun);
-        qemu_mutex_unlock(&iscsilun->mutex);
-        qemu_coroutine_yield();
-        qemu_mutex_lock(&iscsilun->mutex);
-    }

+    iscsi_co_wait_for_task(&iTask, iscsilun);
    if (iTask.task != NULL) {
        scsi_free_scsi_task(iTask.task);
        iTask.task = NULL;
@@ -905,12 +902,7 @@ retry:
        return -ENOMEM;
    }

-    while (!iTask.complete) {
-        iscsi_set_events(iscsilun);
-        qemu_mutex_unlock(&iscsilun->mutex);
-        qemu_coroutine_yield();
-        qemu_mutex_lock(&iscsilun->mutex);
-    }
+    iscsi_co_wait_for_task(&iTask, iscsilun);

    if (iTask.task != NULL) {
        scsi_free_scsi_task(iTask.task);
@@ -1142,12 +1134,7 @@ retry:
        goto out_unlock;
    }

-    while (!iTask.complete) {
-        iscsi_set_events(iscsilun);
-        qemu_mutex_unlock(&iscsilun->mutex);
-        qemu_coroutine_yield();
-        qemu_mutex_lock(&iscsilun->mutex);
-    }
+    iscsi_co_wait_for_task(&iTask, iscsilun);

    if (iTask.task != NULL) {
        scsi_free_scsi_task(iTask.task);
@@ -1243,12 +1230,7 @@ retry:
        return -ENOMEM;
    }

-    while (!iTask.complete) {
-        iscsi_set_events(iscsilun);
-        qemu_mutex_unlock(&iscsilun->mutex);
-        qemu_coroutine_yield();
-        qemu_mutex_lock(&iscsilun->mutex);
-    }
+    iscsi_co_wait_for_task(&iTask, iscsilun);

    if (iTask.status == SCSI_STATUS_CHECK_CONDITION &&
        iTask.task->sense.key == SCSI_SENSE_ILLEGAL_REQUEST &&
@@ -1732,14 +1714,34 @@ static QemuOptsList runtime_opts = {
            .name = "timeout",
            .type = QEMU_OPT_NUMBER,
        },
-        {
-            .name = "filename",
-            .type = QEMU_OPT_STRING,
-        },
        { /* end of list */ }
    },
 };

+static void iscsi_save_designator(IscsiLun *lun,
+                                  struct scsi_inquiry_device_identification *inq_di)
+{
+    struct scsi_inquiry_device_designator *desig, *copy = NULL;
+
+    for (desig = inq_di->designators; desig; desig = desig->next) {
+        if (desig->association ||
+            desig->designator_type > SCSI_DESIGNATOR_TYPE_NAA) {
+            continue;
+        }
+        /* NAA works better than T10 vendor ID based designator. */
+        if (!copy || copy->designator_type < desig->designator_type) {
+            copy = desig;
+        }
+    }
+    if (copy) {
+        lun->dd = g_new(struct scsi_inquiry_device_designator, 1);
+        *lun->dd = *copy;
+        lun->dd->next = NULL;
+        lun->dd->designator = g_malloc(copy->designator_length);
+        memcpy(lun->dd->designator, copy->designator, copy->designator_length);
+    }
+}
+
 static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
                      Error **errp)
 {
@@ -1751,27 +1753,12 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
    char *initiator_name = NULL;
    QemuOpts *opts;
    Error *local_err = NULL;
-    const char *transport_name, *portal, *target, *filename;
+    const char *transport_name, *portal, *target;
 #if LIBISCSI_API_VERSION >= (20160603)
    enum iscsi_transport_type transport;
 #endif
    int i, ret = 0, timeout = 0, lun;

-    /* If we are given a filename, parse the filename, with precedence given to
-     * filename encoded options */
-    filename = qdict_get_try_str(options, "filename");
-    if (filename) {
-        warn_report("'filename' option specified. "
-                    "This is an unsupported option, and may be deprecated "
-                    "in the future");
-        iscsi_parse_filename(filename, options, &local_err);
-        if (local_err) {
-            ret = -EINVAL;
-            error_propagate(errp, local_err);
-            goto exit;
-        }
-    }
-
    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
    qemu_opts_absorb_qdict(opts, options, &local_err);
    if (local_err) {
@@ -1922,6 +1909,7 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
        struct scsi_task *inq_task;
        struct scsi_inquiry_logical_block_provisioning *inq_lbp;
        struct scsi_inquiry_block_limits *inq_bl;
+        struct scsi_inquiry_device_identification *inq_di;
        switch (inq_vpd->pages[i]) {
        case SCSI_INQUIRY_PAGECODE_LOGICAL_BLOCK_PROVISIONING:
            inq_task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1,
@@ -1947,6 +1935,17 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
                   sizeof(struct scsi_inquiry_block_limits));
            scsi_free_scsi_task(inq_task);
            break;
+        case SCSI_INQUIRY_PAGECODE_DEVICE_IDENTIFICATION:
+            inq_task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1,
+                                    SCSI_INQUIRY_PAGECODE_DEVICE_IDENTIFICATION,
+                                    (void **) &inq_di, errp);
+            if (inq_task == NULL) {
+                ret = -EINVAL;
+                goto out;
+            }
+            iscsi_save_designator(iscsilun, inq_di);
+            scsi_free_scsi_task(inq_task);
+            break;
        default:
            break;
        }
@@ -1989,7 +1988,7 @@ out:
        }
        memset(iscsilun, 0, sizeof(IscsiLun));
    }
-exit:
+
    return ret;
 }

@@ -2003,6 +2002,10 @@ static void iscsi_close(BlockDriverState *bs)
        iscsi_logout_sync(iscsi);
    }
    iscsi_destroy_context(iscsi);
+    if (iscsilun->dd) {
+        g_free(iscsilun->dd->designator);
+        g_free(iscsilun->dd);
+    }
    g_free(iscsilun->zeroblock);
    iscsi_allocmap_free(iscsilun);
    qemu_mutex_destroy(&iscsilun->mutex);
@@ -2143,7 +2146,7 @@ static int coroutine_fn iscsi_co_create_opts(const char *filename, QemuOpts *opt
    } else {
        ret = iscsi_open(bs, bs_options, 0, NULL);
    }
-    QDECREF(bs_options);
+    qobject_unref(bs_options);

    if (ret != 0) {
        goto out;
@@ -2184,6 +2187,221 @@ static void coroutine_fn iscsi_co_invalidate_cache(BlockDriverState *bs,
    iscsi_allocmap_invalidate(iscsilun);
 }

+static int coroutine_fn iscsi_co_copy_range_from(BlockDriverState *bs,
+                                                 BdrvChild *src,
+                                                 uint64_t src_offset,
+                                                 BdrvChild *dst,
+                                                 uint64_t dst_offset,
+                                                 uint64_t bytes,
+                                                 BdrvRequestFlags flags)
+{
+    return bdrv_co_copy_range_to(src, src_offset, dst, dst_offset, bytes, flags);
+}
+
+static struct scsi_task *iscsi_xcopy_task(int param_len)
+{
+    struct scsi_task *task;
+
+    task = g_new0(struct scsi_task, 1);
+
+    task->cdb[0]     = EXTENDED_COPY;
+    task->cdb[10]    = (param_len >> 24) & 0xFF;
+    task->cdb[11]    = (param_len >> 16) & 0xFF;
+    task->cdb[12]    = (param_len >> 8) & 0xFF;
+    task->cdb[13]    = param_len & 0xFF;
+    task->cdb_size   = 16;
+    task->xfer_dir   = SCSI_XFER_WRITE;
+    task->expxferlen = param_len;
+
+    return task;
+}
+
+static void iscsi_populate_target_desc(unsigned char *desc, IscsiLun *lun)
+{
+    struct scsi_inquiry_device_designator *dd = lun->dd;
+
+    memset(desc, 0, 32);
+    desc[0] = 0xE4; /* IDENT_DESCR_TGT_DESCR */
+    desc[4] = dd->code_set;
+    desc[5] = (dd->designator_type & 0xF)
+        | ((dd->association & 3) << 4);
+    desc[7] = dd->designator_length;
+    memcpy(desc + 8, dd->designator, dd->designator_length);
+
+    desc[28] = 0;
+    desc[29] = (lun->block_size >> 16) & 0xFF;
+    desc[30] = (lun->block_size >> 8) & 0xFF;
+    desc[31] = lun->block_size & 0xFF;
+}
+
+static void iscsi_xcopy_desc_hdr(uint8_t *hdr, int dc, int cat, int src_index,
+                                 int dst_index)
+{
+    hdr[0] = 0x02; /* BLK_TO_BLK_SEG_DESCR */
+    hdr[1] = ((dc << 1) | cat) & 0xFF;
+    hdr[2] = (XCOPY_BLK2BLK_SEG_DESC_SIZE >> 8) & 0xFF;
+    /* don't account for the first 4 bytes in descriptor header*/
+    hdr[3] = (XCOPY_BLK2BLK_SEG_DESC_SIZE - 4 /* SEG_DESC_SRC_INDEX_OFFSET */) & 0xFF;
+    hdr[4] = (src_index >> 8) & 0xFF;
+    hdr[5] = src_index & 0xFF;
+    hdr[6] = (dst_index >> 8) & 0xFF;
+    hdr[7] = dst_index & 0xFF;
+}
+
+static void iscsi_xcopy_populate_desc(uint8_t *desc, int dc, int cat,
+                                      int src_index, int dst_index, int num_blks,
+                                      uint64_t src_lba, uint64_t dst_lba)
+{
+    iscsi_xcopy_desc_hdr(desc, dc, cat, src_index, dst_index);
+
+    /* The caller should verify the request size */
+    assert(num_blks < 65536);
+    desc[10] = (num_blks >> 8) & 0xFF;
+    desc[11] = num_blks & 0xFF;
+    desc[12] = (src_lba >> 56) & 0xFF;
+    desc[13] = (src_lba >> 48) & 0xFF;
+    desc[14] = (src_lba >> 40) & 0xFF;
+    desc[15] = (src_lba >> 32) & 0xFF;
+    desc[16] = (src_lba >> 24) & 0xFF;
+    desc[17] = (src_lba >> 16) & 0xFF;
+    desc[18] = (src_lba >> 8) & 0xFF;
+    desc[19] = src_lba & 0xFF;
+    desc[20] = (dst_lba >> 56) & 0xFF;
+    desc[21] = (dst_lba >> 48) & 0xFF;
+    desc[22] = (dst_lba >> 40) & 0xFF;
+    desc[23] = (dst_lba >> 32) & 0xFF;
+    desc[24] = (dst_lba >> 24) & 0xFF;
+    desc[25] = (dst_lba >> 16) & 0xFF;
+    desc[26] = (dst_lba >> 8) & 0xFF;
+    desc[27] = dst_lba & 0xFF;
+}
+
+static void iscsi_xcopy_populate_header(unsigned char *buf, int list_id, int str,
+                                        int list_id_usage, int prio,
+                                        int tgt_desc_len,
+                                        int seg_desc_len, int inline_data_len)
+{
+    buf[0] = list_id;
+    buf[1] = ((str & 1) << 5) | ((list_id_usage & 3) << 3) | (prio & 7);
+    buf[2] = (tgt_desc_len >> 8) & 0xFF;
+    buf[3] = tgt_desc_len & 0xFF;
+    buf[8] = (seg_desc_len >> 24) & 0xFF;
+    buf[9] = (seg_desc_len >> 16) & 0xFF;
+    buf[10] = (seg_desc_len >> 8) & 0xFF;
+    buf[11] = seg_desc_len & 0xFF;
+    buf[12] = (inline_data_len >> 24) & 0xFF;
+    buf[13] = (inline_data_len >> 16) & 0xFF;
+    buf[14] = (inline_data_len >> 8) & 0xFF;
+    buf[15] = inline_data_len & 0xFF;
+}
+
+static void iscsi_xcopy_data(struct iscsi_data *data,
+                             IscsiLun *src, int64_t src_lba,
+                             IscsiLun *dst, int64_t dst_lba,
+                             uint16_t num_blocks)
+{
+    uint8_t *buf;
+    const int src_offset = XCOPY_DESC_OFFSET;
+    const int dst_offset = XCOPY_DESC_OFFSET + IDENT_DESCR_TGT_DESCR_SIZE;
+    const int seg_offset = dst_offset + IDENT_DESCR_TGT_DESCR_SIZE;
+
+    data->size = XCOPY_DESC_OFFSET +
+                 IDENT_DESCR_TGT_DESCR_SIZE * 2 +
+                 XCOPY_BLK2BLK_SEG_DESC_SIZE;
+    data->data = g_malloc0(data->size);
+    buf = data->data;
+
+    /* Initialise the parameter list header */
+    iscsi_xcopy_populate_header(buf, 1, 0, 2 /* LIST_ID_USAGE_DISCARD */,
+                                0, 2 * IDENT_DESCR_TGT_DESCR_SIZE,
+                                XCOPY_BLK2BLK_SEG_DESC_SIZE,
+                                0);
+
+    /* Initialise CSCD list with one src + one dst descriptor */
+    iscsi_populate_target_desc(&buf[src_offset], src);
+    iscsi_populate_target_desc(&buf[dst_offset], dst);
+
+    /* Initialise one segment descriptor */
+    iscsi_xcopy_populate_desc(&buf[seg_offset], 0, 0, 0, 1, num_blocks,
+                              src_lba, dst_lba);
+}
+
+static int coroutine_fn iscsi_co_copy_range_to(BlockDriverState *bs,
+                                               BdrvChild *src,
+                                               uint64_t src_offset,
+                                               BdrvChild *dst,
+                                               uint64_t dst_offset,
+                                               uint64_t bytes,
+                                               BdrvRequestFlags flags)
+{
+    IscsiLun *dst_lun = dst->bs->opaque;
+    IscsiLun *src_lun;
+    struct IscsiTask iscsi_task;
+    struct iscsi_data data;
+    int r = 0;
+    int block_size;
+
+    if (src->bs->drv->bdrv_co_copy_range_to != iscsi_co_copy_range_to) {
+        return -ENOTSUP;
+    }
+    src_lun = src->bs->opaque;
+
+    if (!src_lun->dd || !dst_lun->dd) {
+        return -ENOTSUP;
+    }
+    if (!is_byte_request_lun_aligned(dst_offset, bytes, dst_lun)) {
+        return -ENOTSUP;
+    }
+    if (!is_byte_request_lun_aligned(src_offset, bytes, src_lun)) {
+        return -ENOTSUP;
+    }
+    if (dst_lun->block_size != src_lun->block_size ||
+        !dst_lun->block_size) {
+        return -ENOTSUP;
+    }
+
+    block_size = dst_lun->block_size;
+    if (bytes / block_size > 65535) {
+        return -ENOTSUP;
+    }
+
+    iscsi_xcopy_data(&data,
+                     src_lun, src_offset / block_size,
+                     dst_lun, dst_offset / block_size,
+                     bytes / block_size);
+
+    iscsi_co_init_iscsitask(dst_lun, &iscsi_task);
+
+    qemu_mutex_lock(&dst_lun->mutex);
+    iscsi_task.task = iscsi_xcopy_task(data.size);
+retry:
+    if (iscsi_scsi_command_async(dst_lun->iscsi, dst_lun->lun,
+                                 iscsi_task.task, iscsi_co_generic_cb,
+                                 &data,
+                                 &iscsi_task) != 0) {
+        r = -EIO;
+        goto out_unlock;
+    }
+
+    iscsi_co_wait_for_task(&iscsi_task, dst_lun);
+
+    if (iscsi_task.do_retry) {
+        iscsi_task.complete = 0;
+        goto retry;
+    }
+
+    if (iscsi_task.status != SCSI_STATUS_GOOD) {
+        r = iscsi_task.err_code;
+        goto out_unlock;
+    }
+
+out_unlock:
+    g_free(iscsi_task.task);
+    qemu_mutex_unlock(&dst_lun->mutex);
+    g_free(iscsi_task.err_str);
+    return r;
+}
+
 static QemuOptsList iscsi_create_opts = {
    .name = "iscsi-create-opts",
    .head = QTAILQ_HEAD_INITIALIZER(iscsi_create_opts.head),
@@ -2218,9 +2436,11 @@ static BlockDriver bdrv_iscsi = {

    .bdrv_co_block_status  = iscsi_co_block_status,
    .bdrv_co_pdiscard      = iscsi_co_pdiscard,
+    .bdrv_co_copy_range_from = iscsi_co_copy_range_from,
+    .bdrv_co_copy_range_to  = iscsi_co_copy_range_to,
    .bdrv_co_pwrite_zeroes = iscsi_co_pwrite_zeroes,
    .bdrv_co_readv         = iscsi_co_readv,
-    .bdrv_co_writev_flags  = iscsi_co_writev_flags,
+    .bdrv_co_writev        = iscsi_co_writev,
    .bdrv_co_flush_to_disk = iscsi_co_flush,

 #ifdef __linux__
@@ -2253,9 +2473,11 @@ static BlockDriver bdrv_iser = {

    .bdrv_co_block_status  = iscsi_co_block_status,
    .bdrv_co_pdiscard      = iscsi_co_pdiscard,
+    .bdrv_co_copy_range_from = iscsi_co_copy_range_from,
+    .bdrv_co_copy_range_to  = iscsi_co_copy_range_to,
    .bdrv_co_pwrite_zeroes = iscsi_co_pwrite_zeroes,
    .bdrv_co_readv         = iscsi_co_readv,
-    .bdrv_co_writev_flags  = iscsi_co_writev_flags,
+    .bdrv_co_writev        = iscsi_co_writev,
    .bdrv_co_flush_to_disk = iscsi_co_flush,

 #ifdef __linux__
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -22,7 +22,6 @@
 #include "qemu/ratelimit.h"
 #include "qemu/bitmap.h"

-#define SLICE_TIME    100000000ULL /* ns */
 #define MAX_IN_FLIGHT 16
 #define MAX_IO_BYTES (1 << 20) /* 1 Mb */
 #define DEFAULT_MIRROR_BUF_SIZE (MAX_IN_FLIGHT * MAX_IO_BYTES)
@@ -36,7 +35,6 @@ typedef struct MirrorBuffer {

 typedef struct MirrorBlockJob {
    BlockJob common;
-    RateLimit limit;
    BlockBackend *target;
    BlockDriverState *mirror_top_bs;
    BlockDriverState *source;
@@ -121,14 +119,14 @@ static void mirror_iteration_done(MirrorOp *op, int ret)
            bitmap_set(s->cow_bitmap, chunk_num, nb_chunks);
        }
        if (!s->initial_zeroing_ongoing) {
-            s->common.offset += op->bytes;
+            job_progress_update(&s->common.job, op->bytes);
        }
    }
    qemu_iovec_destroy(&op->qiov);
    g_free(op);

    if (s->waiting_for_io) {
-        qemu_coroutine_enter(s->common.co);
+        qemu_coroutine_enter(s->common.job.co);
    }
 }

@@ -347,7 +345,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
        mirror_wait_for_io(s);
    }

-    block_job_pause_point(&s->common);
+    job_pause_point(&s->common.job);

    /* Find the number of consective dirty chunks following the first dirty
     * one, and wait for in flight requests in them. */
@@ -449,9 +447,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
        assert(io_bytes);
        offset += io_bytes;
        nb_chunks -= DIV_ROUND_UP(io_bytes, s->granularity);
-        if (s->common.speed) {
-            delay_ns = ratelimit_calculate_delay(&s->limit, io_bytes_acct);
-        }
+        delay_ns = block_job_ratelimit_get_delay(&s->common, io_bytes_acct);
    }
    return delay_ns;
 }
@@ -488,9 +484,10 @@ typedef struct {
    int ret;
 } MirrorExitData;

-static void mirror_exit(BlockJob *job, void *opaque)
+static void mirror_exit(Job *job, void *opaque)
 {
-    MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
+    MirrorBlockJob *s = container_of(job, MirrorBlockJob, common.job);
+    BlockJob *bjob = &s->common;
    MirrorExitData *data = opaque;
    AioContext *replace_aio_context = NULL;
    BlockDriverState *src = s->source;
@@ -501,7 +498,7 @@ static void mirror_exit(BlockJob *job, void *opaque)
    bdrv_release_dirty_bitmap(src, s->dirty_bitmap);

    /* Make sure that the source BDS doesn't go away before we called
-     * block_job_completed(). */
+     * job_completed(). */
    bdrv_ref(src);
    bdrv_ref(mirror_top_bs);
    bdrv_ref(target_bs);
@@ -572,7 +569,7 @@ static void mirror_exit(BlockJob *job, void *opaque)
     * the blockers on the intermediate nodes so that the resulting state is
     * valid. Also give up permissions on mirror_top_bs->backing, which might
     * block the removal. */
-    block_job_remove_all_bdrv(job);
+    block_job_remove_all_bdrv(bjob);
    bdrv_child_try_set_perm(mirror_top_bs->backing, 0, BLK_PERM_ALL,
                            &error_abort);
    bdrv_replace_node(mirror_top_bs, backing_bs(mirror_top_bs), &error_abort);
@@ -580,11 +577,11 @@ static void mirror_exit(BlockJob *job, void *opaque)
    /* We just changed the BDS the job BB refers to (with either or both of the
     * bdrv_replace_node() calls), so switch the BB back so the cleanup does
     * the right thing. We don't need any permissions any more now. */
-    blk_remove_bs(job->blk);
-    blk_set_perm(job->blk, 0, BLK_PERM_ALL, &error_abort);
-    blk_insert_bs(job->blk, mirror_top_bs, &error_abort);
+    blk_remove_bs(bjob->blk);
+    blk_set_perm(bjob->blk, 0, BLK_PERM_ALL, &error_abort);
+    blk_insert_bs(bjob->blk, mirror_top_bs, &error_abort);

-    block_job_completed(&s->common, data->ret);
+    job_completed(job, data->ret, NULL);

    g_free(data);
    bdrv_drained_end(src);
@@ -596,11 +593,11 @@ static void mirror_throttle(MirrorBlockJob *s)
 {
    int64_t now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);

-    if (now - s->last_pause_ns > SLICE_TIME) {
+    if (now - s->last_pause_ns > BLOCK_JOB_SLICE_TIME) {
        s->last_pause_ns = now;
-        block_job_sleep_ns(&s->common, 0);
+        job_sleep_ns(&s->common.job, 0);
    } else {
-        block_job_pause_point(&s->common);
+        job_pause_point(&s->common.job);
    }
 }

@@ -626,7 +623,7 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)

            mirror_throttle(s);

-            if (block_job_is_cancelled(&s->common)) {
+            if (job_is_cancelled(&s->common.job)) {
                s->initial_zeroing_ongoing = false;
                return 0;
            }
@@ -654,7 +651,7 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)

        mirror_throttle(s);

-        if (block_job_is_cancelled(&s->common)) {
+        if (job_is_cancelled(&s->common.job)) {
            return 0;
        }

@@ -699,7 +696,7 @@ static void coroutine_fn mirror_run(void *opaque)
                                 checking for a NULL string */
    int ret = 0;

-    if (block_job_is_cancelled(&s->common)) {
+    if (job_is_cancelled(&s->common.job)) {
        goto immediate_exit;
    }

@@ -730,13 +727,13 @@ static void coroutine_fn mirror_run(void *opaque)
    }

    if (s->bdev_length == 0) {
-        /* Report BLOCK_JOB_READY and wait for complete. */
-        block_job_event_ready(&s->common);
+        /* Transition to the READY state and wait for complete. */
+        job_transition_to_ready(&s->common.job);
        s->synced = true;
-        while (!block_job_is_cancelled(&s->common) && !s->should_complete) {
-            block_job_yield(&s->common);
+        while (!job_is_cancelled(&s->common.job) && !s->should_complete) {
+            job_yield(&s->common.job);
        }
-        s->common.cancelled = false;
+        s->common.job.cancelled = false;
        goto immediate_exit;
    }

@@ -772,7 +769,7 @@ static void coroutine_fn mirror_run(void *opaque)
    s->last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
    if (!s->is_none_mode) {
        ret = mirror_dirty_init(s);
-        if (ret < 0 || block_job_is_cancelled(&s->common)) {
+        if (ret < 0 || job_is_cancelled(&s->common.job)) {
            goto immediate_exit;
        }
    }
@@ -789,22 +786,20 @@ static void coroutine_fn mirror_run(void *opaque)
            goto immediate_exit;
        }

-        block_job_pause_point(&s->common);
+        job_pause_point(&s->common.job);

        cnt = bdrv_get_dirty_count(s->dirty_bitmap);
-        /* s->common.offset contains the number of bytes already processed so
-         * far, cnt is the number of dirty bytes remaining and
-         * s->bytes_in_flight is the number of bytes currently being
-         * processed; together those are the current total operation length */
-        s->common.len = s->common.offset + s->bytes_in_flight + cnt;
+        /* cnt is the number of dirty bytes remaining and s->bytes_in_flight is
+         * the number of bytes currently being processed; together those are
+         * the current remaining operation length */
+        job_progress_set_remaining(&s->common.job, s->bytes_in_flight + cnt);

        /* Note that even when no rate limit is applied we need to yield
         * periodically with no pending I/O so that bdrv_drain_all() returns.
-         * We do so every SLICE_TIME nanoseconds, or when there is an error,
-         * or when the source is clean, whichever comes first.
-         */
+         * We do so every BLKOCK_JOB_SLICE_TIME nanoseconds, or when there is
+         * an error, or when the source is clean, whichever comes first. */
        delta = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - s->last_pause_ns;
-        if (delta < SLICE_TIME &&
+        if (delta < BLOCK_JOB_SLICE_TIME &&
            s->common.iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
            if (s->in_flight >= MAX_IN_FLIGHT || s->buf_free_count == 0 ||
                (cnt == 0 && s->in_flight > 0)) {
@@ -829,12 +824,12 @@ static void coroutine_fn mirror_run(void *opaque)
                 * report completion.  This way, block-job-cancel will leave
                 * the target in a consistent state.
                 */
-                block_job_event_ready(&s->common);
+                job_transition_to_ready(&s->common.job);
                s->synced = true;
            }

            should_complete = s->should_complete ||
-                block_job_is_cancelled(&s->common);
+                job_is_cancelled(&s->common.job);
            cnt = bdrv_get_dirty_count(s->dirty_bitmap);
        }

@@ -862,18 +857,23 @@ static void coroutine_fn mirror_run(void *opaque)
             * completion.
             */
            assert(QLIST_EMPTY(&bs->tracked_requests));
-            s->common.cancelled = false;
+            s->common.job.cancelled = false;
            need_drain = false;
            break;
        }

        ret = 0;
+
+        if (s->synced && !should_complete) {
+            delay_ns = (s->in_flight == 0 &&
+                        cnt == 0 ? BLOCK_JOB_SLICE_TIME : 0);
+        }
        trace_mirror_before_sleep(s, cnt, s->synced, delay_ns);
-        if (block_job_is_cancelled(&s->common) && s->common.force) {
+        job_sleep_ns(&s->common.job, delay_ns);
+        if (job_is_cancelled(&s->common.job) &&
+            (!s->synced || s->common.job.force_cancel))
+        {
            break;
-        } else if (!should_complete) {
-            delay_ns = (s->in_flight == 0 && cnt == 0 ? SLICE_TIME : 0);
-            block_job_sleep_ns(&s->common, delay_ns);
        }
        s->last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
    }
@@ -884,8 +884,8 @@ immediate_exit:
         * or it was cancelled prematurely so that we do not guarantee that
         * the target is a copy of the source.
         */
-        assert(ret < 0 || ((s->common.force || !s->synced) &&
-               block_job_is_cancelled(&s->common)));
+        assert(ret < 0 || ((s->common.job.force_cancel || !s->synced) &&
+               job_is_cancelled(&s->common.job)));
        assert(need_drain);
        mirror_wait_for_all_io(s);
    }
@@ -902,23 +902,12 @@ immediate_exit:
    if (need_drain) {
        bdrv_drained_begin(bs);
    }
-    block_job_defer_to_main_loop(&s->common, mirror_exit, data);
+    job_defer_to_main_loop(&s->common.job, mirror_exit, data);
 }

-static void mirror_set_speed(BlockJob *job, int64_t speed, Error **errp)
+static void mirror_complete(Job *job, Error **errp)
 {
-    MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
-
-    if (speed < 0) {
-        error_setg(errp, QERR_INVALID_PARAMETER, "speed");
-        return;
-    }
-    ratelimit_set_speed(&s->limit, speed, SLICE_TIME);
-}
-
-static void mirror_complete(BlockJob *job, Error **errp)
-{
-    MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
+    MirrorBlockJob *s = container_of(job, MirrorBlockJob, common.job);
    BlockDriverState *target;

    target = blk_bs(s->target);
@@ -965,12 +954,12 @@ static void mirror_complete(BlockJob *job, Error **errp)
    }

    s->should_complete = true;
-    block_job_enter(&s->common);
+    job_enter(job);
 }

-static void mirror_pause(BlockJob *job)
+static void mirror_pause(Job *job)
 {
-    MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
+    MirrorBlockJob *s = container_of(job, MirrorBlockJob, common.job);

    mirror_wait_for_all_io(s);
 }
@@ -998,23 +987,31 @@ static void mirror_drain(BlockJob *job)
 }

 static const BlockJobDriver mirror_job_driver = {
-    .instance_size          = sizeof(MirrorBlockJob),
-    .job_type               = BLOCK_JOB_TYPE_MIRROR,
-    .set_speed              = mirror_set_speed,
-    .start                  = mirror_run,
-    .complete               = mirror_complete,
-    .pause                  = mirror_pause,
+    .job_driver = {
+        .instance_size          = sizeof(MirrorBlockJob),
+        .job_type               = JOB_TYPE_MIRROR,
+        .free                   = block_job_free,
+        .user_resume            = block_job_user_resume,
+        .drain                  = block_job_drain,
+        .start                  = mirror_run,
+        .pause                  = mirror_pause,
+        .complete               = mirror_complete,
+    },
    .attached_aio_context   = mirror_attached_aio_context,
    .drain                  = mirror_drain,
 };

 static const BlockJobDriver commit_active_job_driver = {
-    .instance_size          = sizeof(MirrorBlockJob),
-    .job_type               = BLOCK_JOB_TYPE_COMMIT,
-    .set_speed              = mirror_set_speed,
-    .start                  = mirror_run,
-    .complete               = mirror_complete,
-    .pause                  = mirror_pause,
+    .job_driver = {
+        .instance_size          = sizeof(MirrorBlockJob),
+        .job_type               = JOB_TYPE_COMMIT,
+        .free                   = block_job_free,
+        .user_resume            = block_job_user_resume,
+        .drain                  = block_job_drain,
+        .start                  = mirror_run,
+        .pause                  = mirror_pause,
+        .complete               = mirror_complete,
+    },
    .attached_aio_context   = mirror_attached_aio_context,
    .drain                  = mirror_drain,
 };
@@ -1148,6 +1145,8 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs,
        mirror_top_bs->implicit = true;
    }
    mirror_top_bs->total_sectors = bs->total_sectors;
+    mirror_top_bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED;
+    mirror_top_bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED;
    bdrv_set_aio_context(mirror_top_bs, bdrv_get_aio_context(bs));

    /* bdrv_append takes ownership of the mirror_top_bs reference, need to keep
@@ -1249,7 +1248,7 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs,
    }

    trace_mirror_start(bs, s, opaque);
-    block_job_start(&s->common);
+    job_start(&s->common.job);
    return;

 fail:
@@ -1260,7 +1259,7 @@ fail:

        g_free(s->replaces);
        blk_unref(s->target);
-        block_job_early_fail(&s->common);
+        job_early_fail(&s->common.job);
    }

    bdrv_child_try_set_perm(mirror_top_bs->backing, 0, BLK_PERM_ALL,
@@ -1287,7 +1286,7 @@ void mirror_start(const char *job_id, BlockDriverState *bs,
    }
    is_none_mode = mode == MIRROR_SYNC_MODE_NONE;
    base = mode == MIRROR_SYNC_MODE_TOP ? backing_bs(bs) : NULL;
-    mirror_start_job(job_id, bs, BLOCK_JOB_DEFAULT, target, replaces,
+    mirror_start_job(job_id, bs, JOB_DEFAULT, target, replaces,
                     speed, granularity, buf_size, backing_mode,
                     on_source_error, on_target_error, unmap, NULL, NULL,
                     &mirror_job_driver, is_none_mode, base, false,
--- a/block/nbd-client.c
+++ b/block/nbd-client.c
@@ -259,14 +259,18 @@ static int nbd_parse_blockstatus_payload(NBDClientSession *client,

    if (extent->length == 0 ||
        (client->info.min_block && !QEMU_IS_ALIGNED(extent->length,
-                                                    client->info.min_block)) ||
-        extent->length > orig_length)
-    {
+                                                    client->info.min_block))) {
        error_setg(errp, "Protocol error: server sent status chunk with "
                   "invalid length");
        return -EINVAL;
    }

+    /* The server is allowed to send us extra information on the final
+     * extent; just clamp it to the length we requested. */
+    if (extent->length > orig_length) {
+        extent->length = orig_length;
+    }
+
    return 0;
 }

--- a/block/nbd.c
+++ b/block/nbd.c
@@ -27,7 +27,8 @@
 */

 #include "qemu/osdep.h"
-#include "block/nbd-client.h"
+#include "nbd-client.h"
+#include "block/qdict.h"
 #include "qapi/error.h"
 #include "qemu/uri.h"
 #include "block/block_int.h"
@@ -262,7 +263,6 @@ static SocketAddress *nbd_config(BDRVNBDState *s, QDict *options,
 {
    SocketAddress *saddr = NULL;
    QDict *addr = NULL;
-    QObject *crumpled_addr = NULL;
    Visitor *iv = NULL;
    Error *local_err = NULL;

@@ -272,20 +272,11 @@ static SocketAddress *nbd_config(BDRVNBDState *s, QDict *options,
        goto done;
    }

-    crumpled_addr = qdict_crumple(addr, errp);
-    if (!crumpled_addr) {
+    iv = qobject_input_visitor_new_flat_confused(addr, errp);
+    if (!iv) {
        goto done;
    }

-    /*
-     * FIXME .numeric, .to, .ipv4 or .ipv6 don't work with -drive
-     * server.type=inet.  .to doesn't matter, it's ignored anyway.
-     * That's because when @options come from -blockdev or
-     * blockdev_add, members are typed according to the QAPI schema,
-     * but when they come from -drive, they're all QString.  The
-     * visitor expects the former.
-     */
-    iv = qobject_input_visitor_new(crumpled_addr);
    visit_type_SocketAddress(iv, NULL, &saddr, &local_err);
    if (local_err) {
        error_propagate(errp, local_err);
@@ -293,8 +284,7 @@ static SocketAddress *nbd_config(BDRVNBDState *s, QDict *options,
    }

 done:
-    QDECREF(addr);
-    qobject_decref(crumpled_addr);
+    qobject_unref(addr);
    visit_free(iv);
    return saddr;
 }
--- a/block/nfs.c
+++ b/block/nfs.c
@@ -29,6 +29,7 @@
 #include "qemu/error-report.h"
 #include "qapi/error.h"
 #include "block/block_int.h"
+#include "block/qdict.h"
 #include "trace.h"
 #include "qemu/iov.h"
 #include "qemu/option.h"
@@ -555,24 +556,29 @@ static BlockdevOptionsNfs *nfs_options_qdict_to_qapi(QDict *options,
                                                     Error **errp)
 {
    BlockdevOptionsNfs *opts = NULL;
-    QObject *crumpled = NULL;
    Visitor *v;
+    const QDictEntry *e;
    Error *local_err = NULL;

-    crumpled = qdict_crumple(options, errp);
-    if (crumpled == NULL) {
+    v = qobject_input_visitor_new_flat_confused(options, errp);
+    if (!v) {
        return NULL;
    }

-    v = qobject_input_visitor_new_keyval(crumpled);
    visit_type_BlockdevOptionsNfs(v, NULL, &opts, &local_err);
    visit_free(v);
-    qobject_decref(crumpled);

    if (local_err) {
+        error_propagate(errp, local_err);
        return NULL;
    }

+    /* Remove the processed options from the QDict (the visitor processes
+     * _all_ options in the QDict) */
+    while ((e = qdict_first(options))) {
+        qdict_del(options, e->key);
+    }
+
    return opts;
 }

@@ -683,7 +689,7 @@ static int coroutine_fn nfs_file_co_create_opts(const char *url, QemuOpts *opts,

    ret = 0;
 out:
-    QDECREF(options);
+    qobject_unref(options);
    qapi_free_BlockdevCreateOptions(create_options);
    return ret;
 }
--- a/block/null.c
+++ b/block/null.c
@@ -93,6 +93,7 @@ static int null_file_open(BlockDriverState *bs, QDict *options, int flags,
    }
    s->read_zeroes = qemu_opt_get_bool(opts, NULL_OPT_ZEROES, false);
    qemu_opts_del(opts);
+    bs->supported_write_flags = BDRV_REQ_FUA;
    return ret;
 }

@@ -116,22 +117,22 @@ static coroutine_fn int null_co_common(BlockDriverState *bs)
    return 0;
 }

-static coroutine_fn int null_co_readv(BlockDriverState *bs,
-                                      int64_t sector_num, int nb_sectors,
-                                      QEMUIOVector *qiov)
+static coroutine_fn int null_co_preadv(BlockDriverState *bs,
+                                       uint64_t offset, uint64_t bytes,
+                                       QEMUIOVector *qiov, int flags)
 {
    BDRVNullState *s = bs->opaque;

    if (s->read_zeroes) {
-        qemu_iovec_memset(qiov, 0, 0, nb_sectors * BDRV_SECTOR_SIZE);
+        qemu_iovec_memset(qiov, 0, 0, bytes);
    }

    return null_co_common(bs);
 }

-static coroutine_fn int null_co_writev(BlockDriverState *bs,
-                                       int64_t sector_num, int nb_sectors,
-                                       QEMUIOVector *qiov)
+static coroutine_fn int null_co_pwritev(BlockDriverState *bs,
+                                        uint64_t offset, uint64_t bytes,
+                                        QEMUIOVector *qiov, int flags)
 {
    return null_co_common(bs);
 }
@@ -186,26 +187,26 @@ static inline BlockAIOCB *null_aio_common(BlockDriverState *bs,
    return &acb->common;
 }

-static BlockAIOCB *null_aio_readv(BlockDriverState *bs,
-                                  int64_t sector_num, QEMUIOVector *qiov,
-                                  int nb_sectors,
-                                  BlockCompletionFunc *cb,
-                                  void *opaque)
+static BlockAIOCB *null_aio_preadv(BlockDriverState *bs,
+                                   uint64_t offset, uint64_t bytes,
+                                   QEMUIOVector *qiov, int flags,
+                                   BlockCompletionFunc *cb,
+                                   void *opaque)
 {
    BDRVNullState *s = bs->opaque;

    if (s->read_zeroes) {
-        qemu_iovec_memset(qiov, 0, 0, nb_sectors * BDRV_SECTOR_SIZE);
+        qemu_iovec_memset(qiov, 0, 0, bytes);
    }

    return null_aio_common(bs, cb, opaque);
 }

-static BlockAIOCB *null_aio_writev(BlockDriverState *bs,
-                                   int64_t sector_num, QEMUIOVector *qiov,
-                                   int nb_sectors,
-                                   BlockCompletionFunc *cb,
-                                   void *opaque)
+static BlockAIOCB *null_aio_pwritev(BlockDriverState *bs,
+                                    uint64_t offset, uint64_t bytes,
+                                    QEMUIOVector *qiov, int flags,
+                                    BlockCompletionFunc *cb,
+                                    void *opaque)
 {
    return null_aio_common(bs, cb, opaque);
 }
@@ -244,7 +245,6 @@ static int coroutine_fn null_co_block_status(BlockDriverState *bs,

 static void null_refresh_filename(BlockDriverState *bs, QDict *opts)
 {
-    QINCREF(opts);
    qdict_del(opts, "filename");

    if (!qdict_size(opts)) {
@@ -253,7 +253,7 @@ static void null_refresh_filename(BlockDriverState *bs, QDict *opts)
    }

    qdict_put_str(opts, "driver", bs->drv->format_name);
-    bs->full_open_options = opts;
+    bs->full_open_options = qobject_ref(opts);
 }

 static BlockDriver bdrv_null_co = {
@@ -266,8 +266,8 @@ static BlockDriver bdrv_null_co = {
    .bdrv_close             = null_close,
    .bdrv_getlength         = null_getlength,

-    .bdrv_co_readv          = null_co_readv,
-    .bdrv_co_writev         = null_co_writev,
+    .bdrv_co_preadv         = null_co_preadv,
+    .bdrv_co_pwritev        = null_co_pwritev,
    .bdrv_co_flush_to_disk  = null_co_flush,
    .bdrv_reopen_prepare    = null_reopen_prepare,

@@ -286,8 +286,8 @@ static BlockDriver bdrv_null_aio = {
    .bdrv_close             = null_close,
    .bdrv_getlength         = null_getlength,

-    .bdrv_aio_readv         = null_aio_readv,
-    .bdrv_aio_writev        = null_aio_writev,
+    .bdrv_aio_preadv        = null_aio_preadv,
+    .bdrv_aio_pwritev       = null_aio_pwritev,
    .bdrv_aio_flush         = null_aio_flush,
    .bdrv_reopen_prepare    = null_reopen_prepare,

--- a/block/nvme.c
+++ b/block/nvme.c
@@ -1073,7 +1073,6 @@ static int nvme_reopen_prepare(BDRVReopenState *reopen_state,

 static void nvme_refresh_filename(BlockDriverState *bs, QDict *opts)
 {
-    QINCREF(opts);
    qdict_del(opts, "filename");

    if (!qdict_size(opts)) {
@@ -1082,7 +1081,7 @@ static void nvme_refresh_filename(BlockDriverState *bs, QDict *opts)
    }

    qdict_put_str(opts, "driver", bs->drv->format_name);
-    bs->full_open_options = opts;
+    bs->full_open_options = qobject_ref(opts);
 }

 static void nvme_refresh_limits(BlockDriverState *bs, Error **errp)
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -31,6 +31,7 @@
 #include "qemu/osdep.h"
 #include "qapi/error.h"
 #include "block/block_int.h"
+#include "block/qdict.h"
 #include "sysemu/block-backend.h"
 #include "qemu/module.h"
 #include "qemu/option.h"
@@ -311,13 +312,15 @@ static int coroutine_fn parallels_co_block_status(BlockDriverState *bs,
 }

 static coroutine_fn int parallels_co_writev(BlockDriverState *bs,
-        int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
+                                            int64_t sector_num, int nb_sectors,
+                                            QEMUIOVector *qiov, int flags)
 {
    BDRVParallelsState *s = bs->opaque;
    uint64_t bytes_done = 0;
    QEMUIOVector hd_qiov;
    int ret = 0;

+    assert(!flags);
    qemu_iovec_init(&hd_qiov, qiov->niov);

    while (nb_sectors > 0) {
@@ -613,8 +616,7 @@ static int coroutine_fn parallels_co_create_opts(const char *filename,
    BlockdevCreateOptions *create_options = NULL;
    Error *local_err = NULL;
    BlockDriverState *bs = NULL;
-    QDict *qdict = NULL;
-    QObject *qobj;
+    QDict *qdict;
    Visitor *v;
    int ret;

@@ -650,15 +652,12 @@ static int coroutine_fn parallels_co_create_opts(const char *filename,
    qdict_put_str(qdict, "driver", "parallels");
    qdict_put_str(qdict, "file", bs->node_name);

-    qobj = qdict_crumple(qdict, errp);
-    QDECREF(qdict);
-    qdict = qobject_to(QDict, qobj);
-    if (qdict == NULL) {
+    v = qobject_input_visitor_new_flat_confused(qdict, errp);
+    if (!v) {
        ret = -EINVAL;
        goto done;
    }

-    v = qobject_input_visitor_new_keyval(QOBJECT(qdict));
    visit_type_BlockdevCreateOptions(v, NULL, &create_options, &local_err);
    visit_free(v);

@@ -682,7 +681,7 @@ static int coroutine_fn parallels_co_create_opts(const char *filename,
    ret = 0;

 done:
-    QDECREF(qdict);
+    qobject_unref(qdict);
    bdrv_unref(bs);
    qapi_free_BlockdevCreateOptions(create_options);
    return ret;
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -773,7 +773,7 @@ void bdrv_image_info_specific_dump(fprintf_function func_fprintf, void *f,
    visit_complete(v, &obj);
    data = qdict_get(qobject_to(QDict, obj), "data");
    dump_qobject(func_fprintf, f, 1, data);
-    qobject_decref(obj);
+    qobject_unref(obj);
    visit_free(v);
 }

--- a/block/qcow.c
+++ b/block/qcow.c
@@ -26,6 +26,7 @@
 #include "qapi/error.h"
 #include "qemu/error-report.h"
 #include "block/block_int.h"
+#include "block/qdict.h"
 #include "sysemu/block-backend.h"
 #include "qemu/module.h"
 #include "qemu/option.h"
@@ -37,7 +38,7 @@
 #include "qapi/qapi-visit-block-core.h"
 #include "crypto/block.h"
 #include "migration/blocker.h"
-#include "block/crypto.h"
+#include "crypto.h"

 /**************************************************************/
 /* QEMU COW block driver with compression and encryption support */
@@ -315,7 +316,7 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
        goto fail;
    }

-    QDECREF(encryptopts);
+    qobject_unref(encryptopts);
    qapi_free_QCryptoBlockOpenOptions(crypto_opts);
    qemu_co_mutex_init(&s->lock);
    return 0;
@@ -326,7 +327,7 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
    g_free(s->cluster_cache);
    g_free(s->cluster_data);
    qcrypto_block_free(s->crypto);
-    QDECREF(encryptopts);
+    qobject_unref(encryptopts);
    qapi_free_QCryptoBlockOpenOptions(crypto_opts);
    return ret;
 }
@@ -720,7 +721,8 @@ static coroutine_fn int qcow_co_readv(BlockDriverState *bs, int64_t sector_num,
 }

 static coroutine_fn int qcow_co_writev(BlockDriverState *bs, int64_t sector_num,
-                          int nb_sectors, QEMUIOVector *qiov)
+                                       int nb_sectors, QEMUIOVector *qiov,
+                                       int flags)
 {
    BDRVQcowState *s = bs->opaque;
    int index_in_cluster;
@@ -731,6 +733,7 @@ static coroutine_fn int qcow_co_writev(BlockDriverState *bs, int64_t sector_num,
    uint8_t *buf;
    void *orig_buf;

+    assert(!flags);
    s->cluster_cache_offset = -1; /* disable compressed cache */

    /* We must always copy the iov when encrypting, so we
@@ -943,8 +946,7 @@ static int coroutine_fn qcow_co_create_opts(const char *filename,
 {
    BlockdevCreateOptions *create_options = NULL;
    BlockDriverState *bs = NULL;
-    QDict *qdict = NULL;
-    QObject *qobj;
+    QDict *qdict;
    Visitor *v;
    const char *val;
    Error *local_err = NULL;
@@ -994,15 +996,12 @@ static int coroutine_fn qcow_co_create_opts(const char *filename,
    qdict_put_str(qdict, "driver", "qcow");
    qdict_put_str(qdict, "file", bs->node_name);

-    qobj = qdict_crumple(qdict, errp);
-    QDECREF(qdict);
-    qdict = qobject_to(QDict, qobj);
-    if (qdict == NULL) {
+    v = qobject_input_visitor_new_flat_confused(qdict, errp);
+    if (!v) {
        ret = -EINVAL;
        goto fail;
    }

-    v = qobject_input_visitor_new_keyval(QOBJECT(qdict));
    visit_type_BlockdevCreateOptions(v, NULL, &create_options, &local_err);
    visit_free(v);

@@ -1025,7 +1024,7 @@ static int coroutine_fn qcow_co_create_opts(const char *filename,

    ret = 0;
 fail:
-    QDECREF(qdict);
+    qobject_unref(qdict);
    bdrv_unref(bs);
    qapi_free_BlockdevCreateOptions(create_options);
    return ret;
@@ -1110,7 +1109,7 @@ qcow_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
    if (ret != Z_STREAM_END || out_len >= s->cluster_size) {
        /* could not compress: write normal cluster */
        ret = qcow_co_writev(bs, offset >> BDRV_SECTOR_BITS,
-                             bytes >> BDRV_SECTOR_BITS, qiov);
+                             bytes >> BDRV_SECTOR_BITS, qiov, 0);
        if (ret < 0) {
            goto fail;
        }
--- a/block/qcow2-bitmap.c
+++ b/block/qcow2-bitmap.c
@@ -30,7 +30,7 @@
 #include "qemu/cutils.h"

 #include "block/block_int.h"
-#include "block/qcow2.h"
+#include "qcow2.h"

 /* NOTICE: BME here means Bitmaps Extension and used as a namespace for
 * _internal_ constants. Please do not use this _internal_ abbreviation for
@@ -254,7 +254,6 @@ static int free_bitmap_clusters(BlockDriverState *bs, Qcow2BitmapTable *tb)

    ret = bitmap_table_load(bs, tb, &bitmap_table);
    if (ret < 0) {
-        assert(bitmap_table == NULL);
        return ret;
    }

--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -28,7 +28,7 @@
 #include "qapi/error.h"
 #include "qemu-common.h"
 #include "block/block_int.h"
-#include "block/qcow2.h"
+#include "qcow2.h"
 #include "qemu/bswap.h"
 #include "trace.h"

--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -26,7 +26,7 @@
 #include "qapi/error.h"
 #include "qemu-common.h"
 #include "block/block_int.h"
-#include "block/qcow2.h"
+#include "qcow2.h"
 #include "qemu/range.h"
 #include "qemu/bswap.h"
 #include "qemu/cutils.h"
@@ -1577,9 +1577,9 @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
        case QCOW2_CLUSTER_COMPRESSED:
            /* Compressed clusters don't have QCOW_OFLAG_COPIED */
            if (l2_entry & QCOW_OFLAG_COPIED) {
-                fprintf(stderr, "ERROR: cluster %" PRId64 ": "
+                fprintf(stderr, "ERROR: coffset=0x%" PRIx64 ": "
                    "copied flag must never be set for compressed "
-                    "clusters\n", l2_entry >> s->cluster_bits);
+                    "clusters\n", l2_entry & s->cluster_offset_mask);
                l2_entry &= ~QCOW_OFLAG_COPIED;
                res->corruptions++;
            }
@@ -1799,6 +1799,19 @@ static int check_oflag_copied(BlockDriverState *bs, BdrvCheckResult *res,
    int ret;
    uint64_t refcount;
    int i, j;
+    bool repair;
+
+    if (fix & BDRV_FIX_ERRORS) {
+        /* Always repair */
+        repair = true;
+    } else if (fix & BDRV_FIX_LEAKS) {
+        /* Repair only if that seems safe: This function is always
+         * called after the refcounts have been fixed, so the refcount
+         * is accurate if that repair was successful */
+        repair = !res->check_errors && !res->corruptions && !res->leaks;
+    } else {
+        repair = false;
+    }

    for (i = 0; i < s->l1_size; i++) {
        uint64_t l1_entry = s->l1_table[i];
@@ -1818,10 +1831,8 @@ static int check_oflag_copied(BlockDriverState *bs, BdrvCheckResult *res,
        if ((refcount == 1) != ((l1_entry & QCOW_OFLAG_COPIED) != 0)) {
            fprintf(stderr, "%s OFLAG_COPIED L2 cluster: l1_index=%d "
                    "l1_entry=%" PRIx64 " refcount=%" PRIu64 "\n",
-                    fix & BDRV_FIX_ERRORS ? "Repairing" :
-                                            "ERROR",
-                    i, l1_entry, refcount);
-            if (fix & BDRV_FIX_ERRORS) {
+                    repair ? "Repairing" : "ERROR", i, l1_entry, refcount);
+            if (repair) {
                s->l1_table[i] = refcount == 1
                               ? l1_entry |  QCOW_OFLAG_COPIED
                               : l1_entry & ~QCOW_OFLAG_COPIED;
@@ -1862,10 +1873,8 @@ static int check_oflag_copied(BlockDriverState *bs, BdrvCheckResult *res,
                if ((refcount == 1) != ((l2_entry & QCOW_OFLAG_COPIED) != 0)) {
                    fprintf(stderr, "%s OFLAG_COPIED data cluster: "
                            "l2_entry=%" PRIx64 " refcount=%" PRIu64 "\n",
-                            fix & BDRV_FIX_ERRORS ? "Repairing" :
-                                                    "ERROR",
-                            l2_entry, refcount);
-                    if (fix & BDRV_FIX_ERRORS) {
+                            repair ? "Repairing" : "ERROR", l2_entry, refcount);
+                    if (repair) {
                        l2_table[j] = cpu_to_be64(refcount == 1
                                    ? l2_entry |  QCOW_OFLAG_COPIED
                                    : l2_entry & ~QCOW_OFLAG_COPIED);
--- a/block/qcow2-snapshot.c
+++ b/block/qcow2-snapshot.c
@@ -25,7 +25,7 @@
 #include "qemu/osdep.h"
 #include "qapi/error.h"
 #include "block/block_int.h"
-#include "block/qcow2.h"
+#include "qcow2.h"
 #include "qemu/bswap.h"
 #include "qemu/error-report.h"
 #include "qemu/cutils.h"
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -24,10 +24,11 @@

 #include "qemu/osdep.h"
 #include "block/block_int.h"
+#include "block/qdict.h"
 #include "sysemu/block-backend.h"
 #include "qemu/module.h"
 #include <zlib.h>
-#include "block/qcow2.h"
+#include "qcow2.h"
 #include "qemu/error-report.h"
 #include "qapi/error.h"
 #include "qapi/qapi-events-block-core.h"
@@ -39,7 +40,7 @@
 #include "qemu/bswap.h"
 #include "qapi/qobject-input-visitor.h"
 #include "qapi/qapi-visit-block-core.h"
-#include "block/crypto.h"
+#include "crypto.h"

 /*
  Differences with QCOW:
@@ -768,6 +769,7 @@ static void read_cache_sizes(BlockDriverState *bs, QemuOpts *opts,
    BDRVQcow2State *s = bs->opaque;
    uint64_t combined_cache_size;
    bool l2_cache_size_set, refcount_cache_size_set, combined_cache_size_set;
+    int min_refcount_cache = MIN_REFCOUNT_CACHE_SIZE * s->cluster_size;

    combined_cache_size_set = qemu_opt_get(opts, QCOW2_OPT_CACHE_SIZE);
    l2_cache_size_set = qemu_opt_get(opts, QCOW2_OPT_L2_CACHE_SIZE);
@@ -802,23 +804,28 @@ static void read_cache_sizes(BlockDriverState *bs, QemuOpts *opts,
        } else if (refcount_cache_size_set) {
            *l2_cache_size = combined_cache_size - *refcount_cache_size;
        } else {
-            *refcount_cache_size = combined_cache_size
-                                 / (DEFAULT_L2_REFCOUNT_SIZE_RATIO + 1);
-            *l2_cache_size = combined_cache_size - *refcount_cache_size;
+            uint64_t virtual_disk_size = bs->total_sectors * BDRV_SECTOR_SIZE;
+            uint64_t max_l2_cache = virtual_disk_size / (s->cluster_size / 8);
+
+            /* Assign as much memory as possible to the L2 cache, and
+             * use the remainder for the refcount cache */
+            if (combined_cache_size >= max_l2_cache + min_refcount_cache) {
+                *l2_cache_size = max_l2_cache;
+                *refcount_cache_size = combined_cache_size - *l2_cache_size;
+            } else {
+                *refcount_cache_size =
+                    MIN(combined_cache_size, min_refcount_cache);
+                *l2_cache_size = combined_cache_size - *refcount_cache_size;
+            }
        }
    } else {
-        if (!l2_cache_size_set && !refcount_cache_size_set) {
+        if (!l2_cache_size_set) {
            *l2_cache_size = MAX(DEFAULT_L2_CACHE_BYTE_SIZE,
                                 (uint64_t)DEFAULT_L2_CACHE_CLUSTERS
                                 * s->cluster_size);
-            *refcount_cache_size = *l2_cache_size
-                                 / DEFAULT_L2_REFCOUNT_SIZE_RATIO;
-        } else if (!l2_cache_size_set) {
-            *l2_cache_size = *refcount_cache_size
-                           * DEFAULT_L2_REFCOUNT_SIZE_RATIO;
-        } else if (!refcount_cache_size_set) {
-            *refcount_cache_size = *l2_cache_size
-                                 / DEFAULT_L2_REFCOUNT_SIZE_RATIO;
+        }
+        if (!refcount_cache_size_set) {
+            *refcount_cache_size = min_refcount_cache;
        }
    }

@@ -1063,7 +1070,7 @@ static int qcow2_update_options_prepare(BlockDriverState *bs,

    ret = 0;
 fail:
-    QDECREF(encryptopts);
+    qobject_unref(encryptopts);
    qemu_opts_del(opts);
    opts = NULL;
    return ret;
@@ -1755,6 +1762,39 @@ static int coroutine_fn qcow2_co_block_status(BlockDriverState *bs,
    return status;
 }

+static coroutine_fn int qcow2_handle_l2meta(BlockDriverState *bs,
+                                            QCowL2Meta **pl2meta,
+                                            bool link_l2)
+{
+    int ret = 0;
+    QCowL2Meta *l2meta = *pl2meta;
+
+    while (l2meta != NULL) {
+        QCowL2Meta *next;
+
+        if (!ret && link_l2) {
+            ret = qcow2_alloc_cluster_link_l2(bs, l2meta);
+            if (ret) {
+                goto out;
+            }
+        }
+
+        /* Take the request off the list of running requests */
+        if (l2meta->nb_clusters != 0) {
+            QLIST_REMOVE(l2meta, next_in_flight);
+        }
+
+        qemu_co_queue_restart_all(&l2meta->dependent_requests);
+
+        next = l2meta->next;
+        g_free(l2meta);
+        l2meta = next;
+    }
+out:
+    *pl2meta = l2meta;
+    return ret;
+}
+
 static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset,
                                        uint64_t bytes, QEMUIOVector *qiov,
                                        int flags)
@@ -2041,24 +2081,9 @@ static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset,
            }
        }

-        while (l2meta != NULL) {
-            QCowL2Meta *next;
-
-            ret = qcow2_alloc_cluster_link_l2(bs, l2meta);
-            if (ret < 0) {
-                goto fail;
-            }
-
-            /* Take the request off the list of running requests */
-            if (l2meta->nb_clusters != 0) {
-                QLIST_REMOVE(l2meta, next_in_flight);
-            }
-
-            qemu_co_queue_restart_all(&l2meta->dependent_requests);
-
-            next = l2meta->next;
-            g_free(l2meta);
-            l2meta = next;
+        ret = qcow2_handle_l2meta(bs, &l2meta, true);
+        if (ret) {
+            goto fail;
        }

        bytes -= cur_bytes;
@@ -2069,18 +2094,7 @@ static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset,
    ret = 0;

 fail:
-    while (l2meta != NULL) {
-        QCowL2Meta *next;
-
-        if (l2meta->nb_clusters != 0) {
-            QLIST_REMOVE(l2meta, next_in_flight);
-        }
-        qemu_co_queue_restart_all(&l2meta->dependent_requests);
-
-        next = l2meta->next;
-        g_free(l2meta);
-        l2meta = next;
-    }
+    qcow2_handle_l2meta(bs, &l2meta, false);

    qemu_co_mutex_unlock(&s->lock);

@@ -2183,7 +2197,7 @@ static void coroutine_fn qcow2_co_invalidate_cache(BlockDriverState *bs,
    qemu_co_mutex_lock(&s->lock);
    ret = qcow2_do_open(bs, options, flags, &local_err);
    qemu_co_mutex_unlock(&s->lock);
-    QDECREF(options);
+    qobject_unref(options);
    if (local_err) {
        error_propagate(errp, local_err);
        error_prepend(errp, "Could not reopen qcow2 layer: ");
@@ -3066,8 +3080,7 @@ static int coroutine_fn qcow2_co_create_opts(const char *filename, QemuOpts *opt
                                             Error **errp)
 {
    BlockdevCreateOptions *create_options = NULL;
-    QDict *qdict = NULL;
-    QObject *qobj;
+    QDict *qdict;
    Visitor *v;
    BlockDriverState *bs = NULL;
    Error *local_err = NULL;
@@ -3138,15 +3151,12 @@ static int coroutine_fn qcow2_co_create_opts(const char *filename, QemuOpts *opt
    qdict_put_str(qdict, "file", bs->node_name);

    /* Now get the QAPI type BlockdevCreateOptions */
-    qobj = qdict_crumple(qdict, errp);
-    QDECREF(qdict);
-    qdict = qobject_to(QDict, qobj);
-    if (qdict == NULL) {
+    v = qobject_input_visitor_new_flat_confused(qdict, errp);
+    if (!v) {
        ret = -EINVAL;
        goto finish;
    }

-    v = qobject_input_visitor_new_keyval(QOBJECT(qdict));
    visit_type_BlockdevCreateOptions(v, NULL, &create_options, &local_err);
    visit_free(v);

@@ -3168,7 +3178,7 @@ static int coroutine_fn qcow2_co_create_opts(const char *filename, QemuOpts *opt

    ret = 0;
 finish:
-    QDECREF(qdict);
+    qobject_unref(qdict);
    bdrv_unref(bs);
    qapi_free_BlockdevCreateOptions(create_options);
    return ret;
@@ -3267,6 +3277,166 @@ static coroutine_fn int qcow2_co_pdiscard(BlockDriverState *bs,
    return ret;
 }

+static int coroutine_fn
+qcow2_co_copy_range_from(BlockDriverState *bs,
+                         BdrvChild *src, uint64_t src_offset,
+                         BdrvChild *dst, uint64_t dst_offset,
+                         uint64_t bytes, BdrvRequestFlags flags)
+{
+    BDRVQcow2State *s = bs->opaque;
+    int ret;
+    unsigned int cur_bytes; /* number of bytes in current iteration */
+    BdrvChild *child = NULL;
+    BdrvRequestFlags cur_flags;
+
+    assert(!bs->encrypted);
+    qemu_co_mutex_lock(&s->lock);
+
+    while (bytes != 0) {
+        uint64_t copy_offset = 0;
+        /* prepare next request */
+        cur_bytes = MIN(bytes, INT_MAX);
+        cur_flags = flags;
+
+        ret = qcow2_get_cluster_offset(bs, src_offset, &cur_bytes, &copy_offset);
+        if (ret < 0) {
+            goto out;
+        }
+
+        switch (ret) {
+        case QCOW2_CLUSTER_UNALLOCATED:
+            if (bs->backing && bs->backing->bs) {
+                int64_t backing_length = bdrv_getlength(bs->backing->bs);
+                if (src_offset >= backing_length) {
+                    cur_flags |= BDRV_REQ_ZERO_WRITE;
+                } else {
+                    child = bs->backing;
+                    cur_bytes = MIN(cur_bytes, backing_length - src_offset);
+                    copy_offset = src_offset;
+                }
+            } else {
+                cur_flags |= BDRV_REQ_ZERO_WRITE;
+            }
+            break;
+
+        case QCOW2_CLUSTER_ZERO_PLAIN:
+        case QCOW2_CLUSTER_ZERO_ALLOC:
+            cur_flags |= BDRV_REQ_ZERO_WRITE;
+            break;
+
+        case QCOW2_CLUSTER_COMPRESSED:
+            ret = -ENOTSUP;
+            goto out;
+            break;
+
+        case QCOW2_CLUSTER_NORMAL:
+            child = bs->file;
+            copy_offset += offset_into_cluster(s, src_offset);
+            if ((copy_offset & 511) != 0) {
+                ret = -EIO;
+                goto out;
+            }
+            break;
+
+        default:
+            abort();
+        }
+        qemu_co_mutex_unlock(&s->lock);
+        ret = bdrv_co_copy_range_from(child,
+                                      copy_offset,
+                                      dst, dst_offset,
+                                      cur_bytes, cur_flags);
+        qemu_co_mutex_lock(&s->lock);
+        if (ret < 0) {
+            goto out;
+        }
+
+        bytes -= cur_bytes;
+        src_offset += cur_bytes;
+        dst_offset += cur_bytes;
+    }
+    ret = 0;
+
+out:
+    qemu_co_mutex_unlock(&s->lock);
+    return ret;
+}
+
+static int coroutine_fn
+qcow2_co_copy_range_to(BlockDriverState *bs,
+                       BdrvChild *src, uint64_t src_offset,
+                       BdrvChild *dst, uint64_t dst_offset,
+                       uint64_t bytes, BdrvRequestFlags flags)
+{
+    BDRVQcow2State *s = bs->opaque;
+    int offset_in_cluster;
+    int ret;
+    unsigned int cur_bytes; /* number of sectors in current iteration */
+    uint64_t cluster_offset;
+    uint8_t *cluster_data = NULL;
+    QCowL2Meta *l2meta = NULL;
+
+    assert(!bs->encrypted);
+    s->cluster_cache_offset = -1; /* disable compressed cache */
+
+    qemu_co_mutex_lock(&s->lock);
+
+    while (bytes != 0) {
+
+        l2meta = NULL;
+
+        offset_in_cluster = offset_into_cluster(s, dst_offset);
+        cur_bytes = MIN(bytes, INT_MAX);
+
+        /* TODO:
+         * If src->bs == dst->bs, we could simply copy by incrementing
+         * the refcnt, without copying user data.
+         * Or if src->bs == dst->bs->backing->bs, we could copy by discarding. */
+        ret = qcow2_alloc_cluster_offset(bs, dst_offset, &cur_bytes,
+                                         &cluster_offset, &l2meta);
+        if (ret < 0) {
+            goto fail;
+        }
+
+        assert((cluster_offset & 511) == 0);
+
+        ret = qcow2_pre_write_overlap_check(bs, 0,
+                cluster_offset + offset_in_cluster, cur_bytes);
+        if (ret < 0) {
+            goto fail;
+        }
+
+        qemu_co_mutex_unlock(&s->lock);
+        ret = bdrv_co_copy_range_to(src, src_offset,
+                                    bs->file,
+                                    cluster_offset + offset_in_cluster,
+                                    cur_bytes, flags);
+        qemu_co_mutex_lock(&s->lock);
+        if (ret < 0) {
+            goto fail;
+        }
+
+        ret = qcow2_handle_l2meta(bs, &l2meta, true);
+        if (ret) {
+            goto fail;
+        }
+
+        bytes -= cur_bytes;
+        dst_offset += cur_bytes;
+    }
+    ret = 0;
+
+fail:
+    qcow2_handle_l2meta(bs, &l2meta, false);
+
+    qemu_co_mutex_unlock(&s->lock);
+
+    qemu_vfree(cluster_data);
+    trace_qcow2_writev_done_req(qemu_coroutine_self(), ret);
+
+    return ret;
+}
+
 static int qcow2_truncate(BlockDriverState *bs, int64_t offset,
                          PreallocMode prealloc, Error **errp)
 {
@@ -4042,22 +4212,21 @@ static int qcow2_load_vmstate(BlockDriverState *bs, QEMUIOVector *qiov,
 * have to be removed.
 */
 static int qcow2_downgrade(BlockDriverState *bs, int target_version,
-                           BlockDriverAmendStatusCB *status_cb, void *cb_opaque)
+                           BlockDriverAmendStatusCB *status_cb, void *cb_opaque,
+                           Error **errp)
 {
    BDRVQcow2State *s = bs->opaque;
    int current_version = s->qcow_version;
    int ret;

-    if (target_version == current_version) {
-        return 0;
-    } else if (target_version > current_version) {
-        return -EINVAL;
-    } else if (target_version != 2) {
-        return -EINVAL;
-    }
+    /* This is qcow2_downgrade(), not qcow2_upgrade() */
+    assert(target_version < current_version);
+
+    /* There are no other versions (now) that you can downgrade to */
+    assert(target_version == 2);

    if (s->refcount_order != 4) {
-        error_report("compat=0.10 requires refcount_bits=16");
+        error_setg(errp, "compat=0.10 requires refcount_bits=16");
        return -ENOTSUP;
    }

@@ -4065,6 +4234,7 @@ static int qcow2_downgrade(BlockDriverState *bs, int target_version,
    if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) {
        ret = qcow2_mark_clean(bs);
        if (ret < 0) {
+            error_setg_errno(errp, -ret, "Failed to make the image clean");
            return ret;
        }
    }
@@ -4074,6 +4244,8 @@ static int qcow2_downgrade(BlockDriverState *bs, int target_version,
     * best thing to do anyway */

    if (s->incompatible_features) {
+        error_setg(errp, "Cannot downgrade an image with incompatible features "
+                   "%#" PRIx64 " set", s->incompatible_features);
        return -ENOTSUP;
    }

@@ -4087,6 +4259,7 @@ static int qcow2_downgrade(BlockDriverState *bs, int target_version,

    ret = qcow2_expand_zero_clusters(bs, status_cb, cb_opaque);
    if (ret < 0) {
+        error_setg_errno(errp, -ret, "Failed to turn zero into data clusters");
        return ret;
    }

@@ -4094,6 +4267,7 @@ static int qcow2_downgrade(BlockDriverState *bs, int target_version,
    ret = qcow2_update_header(bs);
    if (ret < 0) {
        s->qcow_version = current_version;
+        error_setg_errno(errp, -ret, "Failed to update the image header");
        return ret;
    }
    return 0;
@@ -4171,7 +4345,8 @@ static void qcow2_amend_helper_cb(BlockDriverState *bs,

 static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts,
                               BlockDriverAmendStatusCB *status_cb,
-                               void *cb_opaque)
+                               void *cb_opaque,
+                               Error **errp)
 {
    BDRVQcow2State *s = bs->opaque;
    int old_version = s->qcow_version, new_version = old_version;
@@ -4183,7 +4358,6 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts,
    bool encrypt;
    int encformat;
    int refcount_bits = s->refcount_bits;
-    Error *local_err = NULL;
    int ret;
    QemuOptDesc *desc = opts->list->desc;
    Qcow2AmendHelperCBInfo helper_cb_info;
@@ -4204,11 +4378,11 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts,
            } else if (!strcmp(compat, "1.1")) {
                new_version = 3;
            } else {
-                error_report("Unknown compatibility level %s", compat);
+                error_setg(errp, "Unknown compatibility level %s", compat);
                return -EINVAL;
            }
        } else if (!strcmp(desc->name, BLOCK_OPT_PREALLOC)) {
-            error_report("Cannot change preallocation mode");
+            error_setg(errp, "Cannot change preallocation mode");
            return -ENOTSUP;
        } else if (!strcmp(desc->name, BLOCK_OPT_SIZE)) {
            new_size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0);
@@ -4221,7 +4395,8 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts,
                                        !!s->crypto);

            if (encrypt != !!s->crypto) {
-                error_report("Changing the encryption flag is not supported");
+                error_setg(errp,
+                           "Changing the encryption flag is not supported");
                return -ENOTSUP;
            }
        } else if (!strcmp(desc->name, BLOCK_OPT_ENCRYPT_FORMAT)) {
@@ -4229,17 +4404,19 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts,
                qemu_opt_get(opts, BLOCK_OPT_ENCRYPT_FORMAT));

            if (encformat != s->crypt_method_header) {
-                error_report("Changing the encryption format is not supported");
+                error_setg(errp,
+                           "Changing the encryption format is not supported");
                return -ENOTSUP;
            }
        } else if (g_str_has_prefix(desc->name, "encrypt.")) {
-            error_report("Changing the encryption parameters is not supported");
+            error_setg(errp,
+                       "Changing the encryption parameters is not supported");
            return -ENOTSUP;
        } else if (!strcmp(desc->name, BLOCK_OPT_CLUSTER_SIZE)) {
            cluster_size = qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE,
                                             cluster_size);
            if (cluster_size != s->cluster_size) {
-                error_report("Changing the cluster size is not supported");
+                error_setg(errp, "Changing the cluster size is not supported");
                return -ENOTSUP;
            }
        } else if (!strcmp(desc->name, BLOCK_OPT_LAZY_REFCOUNTS)) {
@@ -4252,8 +4429,8 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts,
            if (refcount_bits <= 0 || refcount_bits > 64 ||
                !is_power_of_2(refcount_bits))
            {
-                error_report("Refcount width must be a power of two and may "
-                             "not exceed 64 bits");
+                error_setg(errp, "Refcount width must be a power of two and "
+                           "may not exceed 64 bits");
                return -EINVAL;
            }
        } else {
@@ -4278,6 +4455,7 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts,
        ret = qcow2_update_header(bs);
        if (ret < 0) {
            s->qcow_version = old_version;
+            error_setg_errno(errp, -ret, "Failed to update the image header");
            return ret;
        }
    }
@@ -4286,18 +4464,17 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts,
        int refcount_order = ctz32(refcount_bits);

        if (new_version < 3 && refcount_bits != 16) {
-            error_report("Different refcount widths than 16 bits require "
-                         "compatibility level 1.1 or above (use compat=1.1 or "
-                         "greater)");
+            error_setg(errp, "Refcount widths other than 16 bits require "
+                       "compatibility level 1.1 or above (use compat=1.1 or "
+                       "greater)");
            return -EINVAL;
        }

        helper_cb_info.current_operation = QCOW2_CHANGING_REFCOUNT_ORDER;
        ret = qcow2_change_refcount_order(bs, refcount_order,
                                          &qcow2_amend_helper_cb,
-                                          &helper_cb_info, &local_err);
+                                          &helper_cb_info, errp);
        if (ret < 0) {
-            error_report_err(local_err);
            return ret;
        }
    }
@@ -4307,6 +4484,7 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts,
                    backing_file ?: s->image_backing_file,
                    backing_format ?: s->image_backing_format);
        if (ret < 0) {
+            error_setg_errno(errp, -ret, "Failed to change the backing file");
            return ret;
        }
    }
@@ -4314,14 +4492,16 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts,
    if (s->use_lazy_refcounts != lazy_refcounts) {
        if (lazy_refcounts) {
            if (new_version < 3) {
-                error_report("Lazy refcounts only supported with compatibility "
-                             "level 1.1 and above (use compat=1.1 or greater)");
+                error_setg(errp, "Lazy refcounts only supported with "
+                           "compatibility level 1.1 and above (use compat=1.1 "
+                           "or greater)");
                return -EINVAL;
            }
            s->compatible_features |= QCOW2_COMPAT_LAZY_REFCOUNTS;
            ret = qcow2_update_header(bs);
            if (ret < 0) {
                s->compatible_features &= ~QCOW2_COMPAT_LAZY_REFCOUNTS;
+                error_setg_errno(errp, -ret, "Failed to update the image header");
                return ret;
            }
            s->use_lazy_refcounts = true;
@@ -4329,6 +4509,7 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts,
            /* make image clean first */
            ret = qcow2_mark_clean(bs);
            if (ret < 0) {
+                error_setg_errno(errp, -ret, "Failed to make the image clean");
                return ret;
            }
            /* now disallow lazy refcounts */
@@ -4336,6 +4517,7 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts,
            ret = qcow2_update_header(bs);
            if (ret < 0) {
                s->compatible_features |= QCOW2_COMPAT_LAZY_REFCOUNTS;
+                error_setg_errno(errp, -ret, "Failed to update the image header");
                return ret;
            }
            s->use_lazy_refcounts = false;
@@ -4344,17 +4526,15 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts,

    if (new_size) {
        BlockBackend *blk = blk_new(BLK_PERM_RESIZE, BLK_PERM_ALL);
-        ret = blk_insert_bs(blk, bs, &local_err);
+        ret = blk_insert_bs(blk, bs, errp);
        if (ret < 0) {
-            error_report_err(local_err);
            blk_unref(blk);
            return ret;
        }

-        ret = blk_truncate(blk, new_size, PREALLOC_MODE_OFF, &local_err);
+        ret = blk_truncate(blk, new_size, PREALLOC_MODE_OFF, errp);
        blk_unref(blk);
        if (ret < 0) {
-            error_report_err(local_err);
            return ret;
        }
    }
@@ -4363,7 +4543,7 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts,
    if (new_version < old_version) {
        helper_cb_info.current_operation = QCOW2_DOWNGRADING;
        ret = qcow2_downgrade(bs, new_version, &qcow2_amend_helper_cb,
-                              &helper_cb_info);
+                              &helper_cb_info, errp);
        if (ret < 0) {
            return ret;
        }
@@ -4386,7 +4566,7 @@ void qcow2_signal_corruption(BlockDriverState *bs, bool fatal, int64_t offset,
    char *message;
    va_list ap;

-    fatal = fatal && !bs->read_only;
+    fatal = fatal && bdrv_is_writable(bs);

    if (s->signaled_corruption &&
        (!fatal || (s->incompatible_features & QCOW2_INCOMPAT_CORRUPT)))
@@ -4515,6 +4695,8 @@ BlockDriver bdrv_qcow2 = {

    .bdrv_co_pwrite_zeroes  = qcow2_co_pwrite_zeroes,
    .bdrv_co_pdiscard       = qcow2_co_pdiscard,
+    .bdrv_co_copy_range_from = qcow2_co_copy_range_from,
+    .bdrv_co_copy_range_to  = qcow2_co_copy_range_to,
    .bdrv_truncate          = qcow2_truncate,
    .bdrv_co_pwritev_compressed = qcow2_co_pwritev_compressed,
    .bdrv_make_empty        = qcow2_make_empty,
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -77,10 +77,6 @@
 #define DEFAULT_L2_CACHE_CLUSTERS 8 /* clusters */
 #define DEFAULT_L2_CACHE_BYTE_SIZE 1048576 /* bytes */

-/* The refblock cache needs only a fourth of the L2 cache size to cover as many
- * clusters */
-#define DEFAULT_L2_REFCOUNT_SIZE_RATIO 4
-
 #define DEFAULT_CLUSTER_SIZE 65536


--- a/block/qed.c
+++ b/block/qed.c
@@ -13,6 +13,7 @@
 */

 #include "qemu/osdep.h"
+#include "block/qdict.h"
 #include "qapi/error.h"
 #include "qemu/timer.h"
 #include "qemu/bswap.h"
@@ -721,8 +722,7 @@ static int coroutine_fn bdrv_qed_co_create_opts(const char *filename,
                                                Error **errp)
 {
    BlockdevCreateOptions *create_options = NULL;
-    QDict *qdict = NULL;
-    QObject *qobj;
+    QDict *qdict;
    Visitor *v;
    BlockDriverState *bs = NULL;
    Error *local_err = NULL;
@@ -762,15 +762,12 @@ static int coroutine_fn bdrv_qed_co_create_opts(const char *filename,
    qdict_put_str(qdict, "driver", "qed");
    qdict_put_str(qdict, "file", bs->node_name);

-    qobj = qdict_crumple(qdict, errp);
-    QDECREF(qdict);
-    qdict = qobject_to(QDict, qobj);
-    if (qdict == NULL) {
+    v = qobject_input_visitor_new_flat_confused(qdict, errp);
+    if (!v) {
        ret = -EINVAL;
        goto fail;
    }

-    v = qobject_input_visitor_new_keyval(QOBJECT(qdict));
    visit_type_BlockdevCreateOptions(v, NULL, &create_options, &local_err);
    visit_free(v);

@@ -789,7 +786,7 @@ static int coroutine_fn bdrv_qed_co_create_opts(const char *filename,
    ret = bdrv_qed_co_create(create_options, errp);

 fail:
-    QDECREF(qdict);
+    qobject_unref(qdict);
    bdrv_unref(bs);
    qapi_free_BlockdevCreateOptions(create_options);
    return ret;
@@ -1437,8 +1434,9 @@ static int coroutine_fn bdrv_qed_co_readv(BlockDriverState *bs,

 static int coroutine_fn bdrv_qed_co_writev(BlockDriverState *bs,
                                           int64_t sector_num, int nb_sectors,
-                                           QEMUIOVector *qiov)
+                                           QEMUIOVector *qiov, int flags)
 {
+    assert(!flags);
    return qed_co_request(bs, sector_num, qiov, nb_sectors, QED_AIOCB_WRITE);
 }

--- a/block/quorum.c
+++ b/block/quorum.c
@@ -17,6 +17,7 @@
 #include "qemu/cutils.h"
 #include "qemu/option.h"
 #include "block/block_int.h"
+#include "block/qdict.h"
 #include "qapi/error.h"
 #include "qapi/qapi-events-block.h"
 #include "qapi/qmp/qdict.h"
@@ -115,6 +116,7 @@ struct QuorumAIOCB {
    /* Request metadata */
    uint64_t offset;
    uint64_t bytes;
+    int flags;

    QEMUIOVector *qiov;         /* calling IOV */

@@ -157,7 +159,8 @@ static bool quorum_64bits_compare(QuorumVoteValue *a, QuorumVoteValue *b)
 static QuorumAIOCB *quorum_aio_get(BlockDriverState *bs,
                                   QEMUIOVector *qiov,
                                   uint64_t offset,
-                                   uint64_t bytes)
+                                   uint64_t bytes,
+                                   int flags)
 {
    BDRVQuorumState *s = bs->opaque;
    QuorumAIOCB *acb = g_new(QuorumAIOCB, 1);
@@ -168,6 +171,7 @@ static QuorumAIOCB *quorum_aio_get(BlockDriverState *bs,
        .bs                 = bs,
        .offset             = offset,
        .bytes              = bytes,
+        .flags              = flags,
        .qiov               = qiov,
        .votes.compare      = quorum_sha256_compare,
        .votes.vote_list    = QLIST_HEAD_INITIALIZER(acb.votes.vote_list),
@@ -271,9 +275,11 @@ static void quorum_rewrite_entry(void *opaque)
    BDRVQuorumState *s = acb->bs->opaque;

    /* Ignore any errors, it's just a correction attempt for already
-     * corrupted data. */
+     * corrupted data.
+     * Mask out BDRV_REQ_WRITE_UNCHANGED because this overwrites the
+     * area with different data from the other children. */
    bdrv_co_pwritev(s->children[co->idx], acb->offset, acb->bytes,
-                    acb->qiov, 0);
+                    acb->qiov, acb->flags & ~BDRV_REQ_WRITE_UNCHANGED);

    /* Wake up the caller after the last rewrite */
    acb->rewrite_count--;
@@ -608,7 +614,7 @@ static void read_quorum_children_entry(void *opaque)
 static int read_quorum_children(QuorumAIOCB *acb)
 {
    BDRVQuorumState *s = acb->bs->opaque;
-    int i, ret;
+    int i;

    acb->children_read = s->num_children;
    for (i = 0; i < s->num_children; i++) {
@@ -643,9 +649,7 @@ static int read_quorum_children(QuorumAIOCB *acb)
        qemu_coroutine_yield();
    }

-    ret = acb->vote_ret;
-
-    return ret;
+    return acb->vote_ret;
 }

 static int read_fifo_child(QuorumAIOCB *acb)
@@ -673,7 +677,7 @@ static int quorum_co_preadv(BlockDriverState *bs, uint64_t offset,
                            uint64_t bytes, QEMUIOVector *qiov, int flags)
 {
    BDRVQuorumState *s = bs->opaque;
-    QuorumAIOCB *acb = quorum_aio_get(bs, qiov, offset, bytes);
+    QuorumAIOCB *acb = quorum_aio_get(bs, qiov, offset, bytes, flags);
    int ret;

    acb->is_read = true;
@@ -699,7 +703,7 @@ static void write_quorum_entry(void *opaque)

    sacb->bs = s->children[i]->bs;
    sacb->ret = bdrv_co_pwritev(s->children[i], acb->offset, acb->bytes,
-                                acb->qiov, 0);
+                                acb->qiov, acb->flags);
    if (sacb->ret == 0) {
        acb->success_count++;
    } else {
@@ -719,7 +723,7 @@ static int quorum_co_pwritev(BlockDriverState *bs, uint64_t offset,
                             uint64_t bytes, QEMUIOVector *qiov, int flags)
 {
    BDRVQuorumState *s = bs->opaque;
-    QuorumAIOCB *acb = quorum_aio_get(bs, qiov, offset, bytes);
+    QuorumAIOCB *acb = quorum_aio_get(bs, qiov, offset, bytes, flags);
    int i, ret;

    for (i = 0; i < s->num_children; i++) {
@@ -961,6 +965,8 @@ static int quorum_open(BlockDriverState *bs, QDict *options, int flags,
    }
    s->next_child_index = s->num_children;

+    bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED;
+
    g_free(opened);
    goto exit;

@@ -1082,8 +1088,8 @@ static void quorum_refresh_filename(BlockDriverState *bs, QDict *options)

    children = qlist_new();
    for (i = 0; i < s->num_children; i++) {
-        QINCREF(s->children[i]->bs->full_open_options);
-        qlist_append(children, s->children[i]->bs->full_open_options);
+        qlist_append(children,
+                     qobject_ref(s->children[i]->bs->full_open_options));
    }

    opts = qdict_new();
--- a/block/raw-format.c
+++ b/block/raw-format.c
@@ -167,16 +167,37 @@ static void raw_reopen_abort(BDRVReopenState *state)
    state->opaque = NULL;
 }

+/* Check and adjust the offset, against 'offset' and 'size' options. */
+static inline int raw_adjust_offset(BlockDriverState *bs, uint64_t *offset,
+                                    uint64_t bytes, bool is_write)
+{
+    BDRVRawState *s = bs->opaque;
+
+    if (s->has_size && (*offset > s->size || bytes > (s->size - *offset))) {
+        /* There's not enough space for the write, or the read request is
+         * out-of-range. Don't read/write anything to prevent leaking out of
+         * the size specified in options. */
+        return is_write ? -ENOSPC : -EINVAL;;
+    }
+
+    if (*offset > INT64_MAX - s->offset) {
+        return -EINVAL;
+    }
+    *offset += s->offset;
+
+    return 0;
+}
+
 static int coroutine_fn raw_co_preadv(BlockDriverState *bs, uint64_t offset,
                                      uint64_t bytes, QEMUIOVector *qiov,
                                      int flags)
 {
-    BDRVRawState *s = bs->opaque;
+    int ret;

-    if (offset > UINT64_MAX - s->offset) {
-        return -EINVAL;
+    ret = raw_adjust_offset(bs, &offset, bytes, false);
+    if (ret) {
+        return ret;
    }
-    offset += s->offset;

    BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
    return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags);
@@ -186,23 +207,11 @@ static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, uint64_t offset,
                                       uint64_t bytes, QEMUIOVector *qiov,
                                       int flags)
 {
-    BDRVRawState *s = bs->opaque;
    void *buf = NULL;
    BlockDriver *drv;
    QEMUIOVector local_qiov;
    int ret;

-    if (s->has_size && (offset > s->size || bytes > (s->size - offset))) {
-        /* There's not enough space for the data. Don't write anything and just
-         * fail to prevent leaking out of the size specified in options. */
-        return -ENOSPC;
-    }
-
-    if (offset > UINT64_MAX - s->offset) {
-        ret = -EINVAL;
-        goto fail;
-    }
-
    if (bs->probed && offset < BLOCK_PROBE_BUF_SIZE && bytes) {
        /* Handling partial writes would be a pain - so we just
         * require that guests have 512-byte request alignment if
@@ -237,7 +246,10 @@ static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, uint64_t offset,
        qiov = &local_qiov;
    }

-    offset += s->offset;
+    ret = raw_adjust_offset(bs, &offset, bytes, true);
+    if (ret) {
+        goto fail;
+    }

    BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
    ret = bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
@@ -267,22 +279,24 @@ static int coroutine_fn raw_co_pwrite_zeroes(BlockDriverState *bs,
                                             int64_t offset, int bytes,
                                             BdrvRequestFlags flags)
 {
-    BDRVRawState *s = bs->opaque;
-    if (offset > UINT64_MAX - s->offset) {
-        return -EINVAL;
+    int ret;
+
+    ret = raw_adjust_offset(bs, (uint64_t *)&offset, bytes, true);
+    if (ret) {
+        return ret;
    }
-    offset += s->offset;
    return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags);
 }

 static int coroutine_fn raw_co_pdiscard(BlockDriverState *bs,
                                        int64_t offset, int bytes)
 {
-    BDRVRawState *s = bs->opaque;
-    if (offset > UINT64_MAX - s->offset) {
-        return -EINVAL;
+    int ret;
+
+    ret = raw_adjust_offset(bs, (uint64_t *)&offset, bytes, true);
+    if (ret) {
+        return ret;
    }
-    offset += s->offset;
    return bdrv_co_pdiscard(bs->file->bs, offset, bytes);
 }

@@ -415,10 +429,11 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,
    }

    bs->sg = bs->file->bs->sg;
-    bs->supported_write_flags = BDRV_REQ_FUA &
-        bs->file->bs->supported_write_flags;
-    bs->supported_zero_flags = (BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP) &
-        bs->file->bs->supported_zero_flags;
+    bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED |
+        (BDRV_REQ_FUA & bs->file->bs->supported_write_flags);
+    bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED |
+        ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP) &
+            bs->file->bs->supported_zero_flags);

    if (bs->probed && !bdrv_is_read_only(bs)) {
        fprintf(stderr,
@@ -482,6 +497,36 @@ static int raw_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
    return bdrv_probe_geometry(bs->file->bs, geo);
 }

+static int coroutine_fn raw_co_copy_range_from(BlockDriverState *bs,
+                                               BdrvChild *src, uint64_t src_offset,
+                                               BdrvChild *dst, uint64_t dst_offset,
+                                               uint64_t bytes, BdrvRequestFlags flags)
+{
+    int ret;
+
+    ret = raw_adjust_offset(bs, &src_offset, bytes, false);
+    if (ret) {
+        return ret;
+    }
+    return bdrv_co_copy_range_from(bs->file, src_offset, dst, dst_offset,
+                                   bytes, flags);
+}
+
+static int coroutine_fn raw_co_copy_range_to(BlockDriverState *bs,
+                                             BdrvChild *src, uint64_t src_offset,
+                                             BdrvChild *dst, uint64_t dst_offset,
+                                             uint64_t bytes, BdrvRequestFlags flags)
+{
+    int ret;
+
+    ret = raw_adjust_offset(bs, &dst_offset, bytes, true);
+    if (ret) {
+        return ret;
+    }
+    return bdrv_co_copy_range_to(src, src_offset, bs->file, dst_offset, bytes,
+                                 flags);
+}
+
 BlockDriver bdrv_raw = {
    .format_name          = "raw",
    .instance_size        = sizeof(BDRVRawState),
@@ -498,6 +543,8 @@ BlockDriver bdrv_raw = {
    .bdrv_co_pwrite_zeroes = &raw_co_pwrite_zeroes,
    .bdrv_co_pdiscard     = &raw_co_pdiscard,
    .bdrv_co_block_status = &raw_co_block_status,
+    .bdrv_co_copy_range_from = &raw_co_copy_range_from,
+    .bdrv_co_copy_range_to  = &raw_co_copy_range_to,
    .bdrv_truncate        = &raw_truncate,
    .bdrv_getlength       = &raw_getlength,
    .has_variable_length  = true,
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -18,6 +18,7 @@
 #include "qemu/error-report.h"
 #include "qemu/option.h"
 #include "block/block_int.h"
+#include "block/qdict.h"
 #include "crypto/secret.h"
 #include "qemu/cutils.h"
 #include "qapi/qmp/qstring.h"
@@ -226,27 +227,57 @@ static void qemu_rbd_parse_filename(const char *filename, QDict *options,

 done:
    g_free(buf);
-    QDECREF(keypairs);
+    qobject_unref(keypairs);
    return;
 }


-static int qemu_rbd_set_auth(rados_t cluster, const char *secretid,
+static void qemu_rbd_refresh_limits(BlockDriverState *bs, Error **errp)
+{
+    /* XXX Does RBD support AIO on less than 512-byte alignment? */
+    bs->bl.request_alignment = 512;
+}
+
+
+static int qemu_rbd_set_auth(rados_t cluster, BlockdevOptionsRbd *opts,
                             Error **errp)
 {
-    if (secretid == 0) {
-        return 0;
+    char *key, *acr;
+    int r;
+    GString *accu;
+    RbdAuthModeList *auth;
+
+    if (opts->key_secret) {
+        key = qcrypto_secret_lookup_as_base64(opts->key_secret, errp);
+        if (!key) {
+            return -EIO;
+        }
+        r = rados_conf_set(cluster, "key", key);
+        g_free(key);
+        if (r < 0) {
+            error_setg_errno(errp, -r, "Could not set 'key'");
+            return r;
+        }
    }

-    gchar *secret = qcrypto_secret_lookup_as_base64(secretid,
-                                                    errp);
-    if (!secret) {
-        return -1;
+    if (opts->has_auth_client_required) {
+        accu = g_string_new("");
+        for (auth = opts->auth_client_required; auth; auth = auth->next) {
+            if (accu->str[0]) {
+                g_string_append_c(accu, ';');
+            }
+            g_string_append(accu, RbdAuthMode_str(auth->value));
+        }
+        acr = g_string_free(accu, FALSE);
+        r = rados_conf_set(cluster, "auth_client_required", acr);
+        g_free(acr);
+        if (r < 0) {
+            error_setg_errno(errp, -r,
+                             "Could not set 'auth_client_required'");
+            return r;
+        }
    }

-    rados_conf_set(cluster, "key", secret);
-    g_free(secret);
-
    return 0;
 }

@@ -275,17 +306,17 @@ static int qemu_rbd_set_keypairs(rados_t cluster, const char *keypairs_json,
        key = qstring_get_str(name);

        ret = rados_conf_set(cluster, key, qstring_get_str(value));
-        QDECREF(value);
+        qobject_unref(value);
        if (ret < 0) {
            error_setg_errno(errp, -ret, "invalid conf option %s", key);
-            QDECREF(name);
+            qobject_unref(name);
            ret = -EINVAL;
            break;
        }
-        QDECREF(name);
+        qobject_unref(name);
    }

-    QDECREF(keypairs);
+    qobject_unref(keypairs);
    return ret;
 }

@@ -337,9 +368,7 @@ static QemuOptsList runtime_opts = {
    },
 };

-/* FIXME Deprecate and remove keypairs or make it available in QMP.
- * password_secret should eventually be configurable in opts->location. Support
- * for it in .bdrv_open will make it work here as well. */
+/* FIXME Deprecate and remove keypairs or make it available in QMP. */
 static int qemu_rbd_do_create(BlockdevCreateOptions *options,
                              const char *keypairs, const char *password_secret,
                              Error **errp)
@@ -449,7 +478,7 @@ static int coroutine_fn qemu_rbd_co_create_opts(const char *filename,
    }

 exit:
-    QDECREF(options);
+    qobject_unref(options);
    qapi_free_BlockdevCreateOptions(create_options);
    return ret;
 }
@@ -545,6 +574,16 @@ static int qemu_rbd_connect(rados_t *cluster, rados_ioctx_t *io_ctx,
    Error *local_err = NULL;
    int r;

+    if (secretid) {
+        if (opts->key_secret) {
+            error_setg(errp,
+                       "Legacy 'password-secret' clashes with 'key-secret'");
+            return -EINVAL;
+        }
+        opts->key_secret = g_strdup(secretid);
+        opts->has_key_secret = true;
+    }
+
    mon_host = qemu_rbd_mon_host(opts, &local_err);
    if (local_err) {
        error_propagate(errp, local_err);
@@ -577,8 +616,8 @@ static int qemu_rbd_connect(rados_t *cluster, rados_ioctx_t *io_ctx,
        }
    }

-    if (qemu_rbd_set_auth(*cluster, secretid, errp) < 0) {
-        r = -EIO;
+    r = qemu_rbd_set_auth(*cluster, opts, errp);
+    if (r < 0) {
        goto failed_shutdown;
    }

@@ -622,28 +661,11 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
    BDRVRBDState *s = bs->opaque;
    BlockdevOptionsRbd *opts = NULL;
    Visitor *v;
-    QObject *crumpled = NULL;
    const QDictEntry *e;
    Error *local_err = NULL;
-    const char *filename;
    char *keypairs, *secretid;
    int r;

-    /* If we are given a filename, parse the filename, with precedence given to
-     * filename encoded options */
-    filename = qdict_get_try_str(options, "filename");
-    if (filename) {
-        warn_report("'filename' option specified. "
-                    "This is an unsupported option, and may be deprecated "
-                    "in the future");
-        qemu_rbd_parse_filename(filename, options, &local_err);
-        qdict_del(options, "filename");
-        if (local_err) {
-            error_propagate(errp, local_err);
-            return -EINVAL;
-        }
-    }
-
    keypairs = g_strdup(qdict_get_try_str(options, "=keyvalue-pairs"));
    if (keypairs) {
        qdict_del(options, "=keyvalue-pairs");
@@ -655,16 +677,14 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
    }

    /* Convert the remaining options into a QAPI object */
-    crumpled = qdict_crumple(options, errp);
-    if (crumpled == NULL) {
+    v = qobject_input_visitor_new_flat_confused(options, errp);
+    if (!v) {
        r = -EINVAL;
        goto out;
    }

-    v = qobject_input_visitor_new_keyval(crumpled);
    visit_type_BlockdevOptionsRbd(v, NULL, &opts, &local_err);
    visit_free(v);
-    qobject_decref(crumpled);

    if (local_err) {
        error_propagate(errp, local_err);
@@ -899,27 +919,23 @@ failed:
    return NULL;
 }

-static BlockAIOCB *qemu_rbd_aio_readv(BlockDriverState *bs,
-                                      int64_t sector_num,
-                                      QEMUIOVector *qiov,
-                                      int nb_sectors,
-                                      BlockCompletionFunc *cb,
-                                      void *opaque)
-{
-    return rbd_start_aio(bs, sector_num << BDRV_SECTOR_BITS, qiov,
-                         (int64_t) nb_sectors << BDRV_SECTOR_BITS, cb, opaque,
-                         RBD_AIO_READ);
-}
-
-static BlockAIOCB *qemu_rbd_aio_writev(BlockDriverState *bs,
-                                       int64_t sector_num,
-                                       QEMUIOVector *qiov,
-                                       int nb_sectors,
+static BlockAIOCB *qemu_rbd_aio_preadv(BlockDriverState *bs,
+                                       uint64_t offset, uint64_t bytes,
+                                       QEMUIOVector *qiov, int flags,
                                       BlockCompletionFunc *cb,
                                       void *opaque)
 {
-    return rbd_start_aio(bs, sector_num << BDRV_SECTOR_BITS, qiov,
-                         (int64_t) nb_sectors << BDRV_SECTOR_BITS, cb, opaque,
+    return rbd_start_aio(bs, offset, qiov, bytes, cb, opaque,
+                         RBD_AIO_READ);
+}
+
+static BlockAIOCB *qemu_rbd_aio_pwritev(BlockDriverState *bs,
+                                        uint64_t offset, uint64_t bytes,
+                                        QEMUIOVector *qiov, int flags,
+                                        BlockCompletionFunc *cb,
+                                        void *opaque)
+{
+    return rbd_start_aio(bs, offset, qiov, bytes, cb, opaque,
                         RBD_AIO_WRITE);
 }

@@ -1158,6 +1174,7 @@ static BlockDriver bdrv_rbd = {
    .format_name            = "rbd",
    .instance_size          = sizeof(BDRVRBDState),
    .bdrv_parse_filename    = qemu_rbd_parse_filename,
+    .bdrv_refresh_limits    = qemu_rbd_refresh_limits,
    .bdrv_file_open         = qemu_rbd_open,
    .bdrv_close             = qemu_rbd_close,
    .bdrv_reopen_prepare    = qemu_rbd_reopen_prepare,
@@ -1170,8 +1187,8 @@ static BlockDriver bdrv_rbd = {
    .bdrv_truncate          = qemu_rbd_truncate,
    .protocol_name          = "rbd",

-    .bdrv_aio_readv         = qemu_rbd_aio_readv,
-    .bdrv_aio_writev        = qemu_rbd_aio_writev,
+    .bdrv_aio_preadv        = qemu_rbd_aio_preadv,
+    .bdrv_aio_pwritev       = qemu_rbd_aio_pwritev,

 #ifdef LIBRBD_SUPPORTS_AIO_FLUSH
    .bdrv_aio_flush         = qemu_rbd_aio_flush,
--- a/block/replication.c
+++ b/block/replication.c
@@ -145,7 +145,7 @@ static void replication_close(BlockDriverState *bs)
        replication_stop(s->rs, false, NULL);
    }
    if (s->stage == BLOCK_REPLICATION_FAILOVER) {
-        block_job_cancel_sync(s->active_disk->bs->job);
+        job_cancel_sync(&s->active_disk->bs->job->job);
    }

    if (s->mode == REPLICATION_MODE_SECONDARY) {
@@ -260,7 +260,8 @@ out:
 static coroutine_fn int replication_co_writev(BlockDriverState *bs,
                                              int64_t sector_num,
                                              int remaining_sectors,
-                                              QEMUIOVector *qiov)
+                                              QEMUIOVector *qiov,
+                                              int flags)
 {
    BDRVReplicationState *s = bs->opaque;
    QEMUIOVector hd_qiov;
@@ -271,6 +272,7 @@ static coroutine_fn int replication_co_writev(BlockDriverState *bs,
    int ret;
    int64_t n;

+    assert(!flags);
    ret = replication_get_io_status(s);
    if (ret < 0) {
        goto out;
@@ -566,7 +568,7 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode,
        job = backup_job_create(NULL, s->secondary_disk->bs, s->hidden_disk->bs,
                                0, MIRROR_SYNC_MODE_NONE, NULL, false,
                                BLOCKDEV_ON_ERROR_REPORT,
-                                BLOCKDEV_ON_ERROR_REPORT, BLOCK_JOB_INTERNAL,
+                                BLOCKDEV_ON_ERROR_REPORT, JOB_INTERNAL,
                                backup_job_completed, bs, NULL, &local_err);
        if (local_err) {
            error_propagate(errp, local_err);
@@ -574,7 +576,7 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode,
            aio_context_release(aio_context);
            return;
        }
-        block_job_start(job);
+        job_start(&job->job);
        break;
    default:
        aio_context_release(aio_context);
@@ -679,7 +681,7 @@ static void replication_stop(ReplicationState *rs, bool failover, Error **errp)
         * disk, secondary disk in backup_job_completed().
         */
        if (s->secondary_disk->bs->job) {
-            block_job_cancel_sync(s->secondary_disk->bs->job);
+            job_cancel_sync(&s->secondary_disk->bs->job->job);
        }

        if (!failover) {
@@ -691,7 +693,7 @@ static void replication_stop(ReplicationState *rs, bool failover, Error **errp)

        s->stage = BLOCK_REPLICATION_FAILOVER;
        commit_active_start(NULL, s->active_disk->bs, s->secondary_disk->bs,
-                            BLOCK_JOB_INTERNAL, 0, BLOCKDEV_ON_ERROR_REPORT,
+                            JOB_INTERNAL, 0, BLOCKDEV_ON_ERROR_REPORT,
                            NULL, replication_done, bs, true, errp);
        break;
    default:
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -24,6 +24,7 @@
 #include "qemu/option.h"
 #include "qemu/sockets.h"
 #include "block/block_int.h"
+#include "block/qdict.h"
 #include "sysemu/block-backend.h"
 #include "qemu/bitops.h"
 #include "qemu/cutils.h"
@@ -538,27 +539,17 @@ static void sd_aio_setup(SheepdogAIOCB *acb, BDRVSheepdogState *s,
 static SocketAddress *sd_server_config(QDict *options, Error **errp)
 {
    QDict *server = NULL;
-    QObject *crumpled_server = NULL;
    Visitor *iv = NULL;
    SocketAddress *saddr = NULL;
    Error *local_err = NULL;

    qdict_extract_subqdict(options, &server, "server.");

-    crumpled_server = qdict_crumple(server, errp);
-    if (!crumpled_server) {
+    iv = qobject_input_visitor_new_flat_confused(server, errp);
+    if (!iv) {
        goto done;
    }

-    /*
-     * FIXME .numeric, .to, .ipv4 or .ipv6 don't work with -drive
-     * server.type=inet.  .to doesn't matter, it's ignored anyway.
-     * That's because when @options come from -blockdev or
-     * blockdev_add, members are typed according to the QAPI schema,
-     * but when they come from -drive, they're all QString.  The
-     * visitor expects the former.
-     */
-    iv = qobject_input_visitor_new(crumpled_server);
    visit_type_SocketAddress(iv, NULL, &saddr, &local_err);
    if (local_err) {
        error_propagate(errp, local_err);
@@ -567,8 +558,7 @@ static SocketAddress *sd_server_config(QDict *options, Error **errp)

 done:
    visit_free(iv);
-    qobject_decref(crumpled_server);
-    QDECREF(server);
+    qobject_unref(server);
    return saddr;
 }

@@ -1859,9 +1849,7 @@ out:
        error_setg_errno(errp, -ret, "Can't pre-allocate");
    }
 out_with_err_set:
-    if (blk) {
-        blk_unref(blk);
-    }
+    blk_unref(blk);
    g_free(buf);

    return ret;
@@ -1883,7 +1871,7 @@ static int sd_create_prealloc(BlockdevOptionsSheepdog *location, int64_t size,

    if (local_err) {
        error_propagate(errp, local_err);
-        qobject_decref(obj);
+        qobject_unref(obj);
        return -EINVAL;
    }

@@ -1901,7 +1889,7 @@ static int sd_create_prealloc(BlockdevOptionsSheepdog *location, int64_t size,
    ret = sd_prealloc(bs, 0, size, errp);
 fail:
    bdrv_unref(bs);
-    QDECREF(qdict);
+    qobject_unref(qdict);
    return ret;
 }

@@ -1987,6 +1975,7 @@ static SheepdogRedundancy *parse_redundancy_str(const char *opt)
    } else {
        ret = qemu_strtol(n2, NULL, 10, &parity);
        if (ret < 0) {
+            g_free(redundancy);
            return NULL;
        }

@@ -2181,9 +2170,8 @@ static int coroutine_fn sd_co_create_opts(const char *filename, QemuOpts *opts,
 {
    BlockdevCreateOptions *create_options = NULL;
    QDict *qdict, *location_qdict;
-    QObject *crumpled;
    Visitor *v;
-    const char *redundancy;
+    char *redundancy;
    Error *local_err = NULL;
    int ret;

@@ -2217,16 +2205,14 @@ static int coroutine_fn sd_co_create_opts(const char *filename, QemuOpts *opts,
    }

    /* Get the QAPI object */
-    crumpled = qdict_crumple(qdict, errp);
-    if (crumpled == NULL) {
+    v = qobject_input_visitor_new_flat_confused(qdict, errp);
+    if (!v) {
        ret = -EINVAL;
        goto fail;
    }

-    v = qobject_input_visitor_new_keyval(crumpled);
    visit_type_BlockdevCreateOptions(v, NULL, &create_options, &local_err);
    visit_free(v);
-    qobject_decref(crumpled);

    if (local_err) {
        error_propagate(errp, local_err);
@@ -2252,7 +2238,8 @@ static int coroutine_fn sd_co_create_opts(const char *filename, QemuOpts *opts,
    ret = sd_co_create(create_options, errp);
 fail:
    qapi_free_BlockdevCreateOptions(create_options);
-    QDECREF(qdict);
+    qobject_unref(qdict);
+    g_free(redundancy);
    return ret;
 }

@@ -2335,7 +2322,7 @@ static int sd_truncate(BlockDriverState *bs, int64_t offset,
    }

    /* we don't need to update entire object */
-    datalen = SD_INODE_SIZE - sizeof(s->inode.data_vdi_id);
+    datalen = SD_INODE_HEADER_SIZE;
    s->inode.vdi_size = offset;
    ret = write_object(fd, s->bs, (char *)&s->inode,
                       vid_to_vdi_oid(s->inode.vdi_id), s->inode.nr_copies,
@@ -2612,13 +2599,15 @@ static void sd_aio_complete(SheepdogAIOCB *acb)
 }

 static coroutine_fn int sd_co_writev(BlockDriverState *bs, int64_t sector_num,
-                        int nb_sectors, QEMUIOVector *qiov)
+                                     int nb_sectors, QEMUIOVector *qiov,
+                                     int flags)
 {
    SheepdogAIOCB acb;
    int ret;
    int64_t offset = (sector_num + nb_sectors) * BDRV_SECTOR_SIZE;
    BDRVSheepdogState *s = bs->opaque;

+    assert(!flags);
    if (offset > s->inode.vdi_size) {
        ret = sd_truncate(bs, offset, PREALLOC_MODE_OFF, NULL);
        if (ret < 0) {
@@ -2701,7 +2690,7 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
     */
    strncpy(s->inode.tag, sn_info->name, sizeof(s->inode.tag));
    /* we don't need to update entire object */
-    datalen = SD_INODE_SIZE - sizeof(s->inode.data_vdi_id);
+    datalen = SD_INODE_HEADER_SIZE;
    inode = g_malloc(datalen);

    /* refresh inode. */
@@ -2936,13 +2925,14 @@ static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
    QEMUSnapshotInfo *sn_tab = NULL;
    unsigned wlen, rlen;
    int found = 0;
-    static SheepdogInode inode;
+    SheepdogInode *inode;
    unsigned long *vdi_inuse;
    unsigned int start_nr;
    uint64_t hval;
    uint32_t vid;

    vdi_inuse = g_malloc(max);
+    inode = g_malloc(SD_INODE_HEADER_SIZE);

    fd = connect_to_sdog(s, &local_err);
    if (fd < 0) {
@@ -2985,26 +2975,26 @@ static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
        }

        /* we don't need to read entire object */
-        ret = read_object(fd, s->bs, (char *)&inode,
+        ret = read_object(fd, s->bs, (char *)inode,
                          vid_to_vdi_oid(vid),
-                          0, SD_INODE_SIZE - sizeof(inode.data_vdi_id), 0,
+                          0, SD_INODE_HEADER_SIZE, 0,
                          s->cache_flags);

        if (ret) {
            continue;
        }

-        if (!strcmp(inode.name, s->name) && is_snapshot(&inode)) {
-            sn_tab[found].date_sec = inode.snap_ctime >> 32;
-            sn_tab[found].date_nsec = inode.snap_ctime & 0xffffffff;
-            sn_tab[found].vm_state_size = inode.vm_state_size;
-            sn_tab[found].vm_clock_nsec = inode.vm_clock_nsec;
+        if (!strcmp(inode->name, s->name) && is_snapshot(inode)) {
+            sn_tab[found].date_sec = inode->snap_ctime >> 32;
+            sn_tab[found].date_nsec = inode->snap_ctime & 0xffffffff;
+            sn_tab[found].vm_state_size = inode->vm_state_size;
+            sn_tab[found].vm_clock_nsec = inode->vm_clock_nsec;

            snprintf(sn_tab[found].id_str, sizeof(sn_tab[found].id_str),
-                     "%" PRIu32, inode.snap_id);
+                     "%" PRIu32, inode->snap_id);
            pstrcpy(sn_tab[found].name,
-                    MIN(sizeof(sn_tab[found].name), sizeof(inode.tag)),
-                    inode.tag);
+                    MIN(sizeof(sn_tab[found].name), sizeof(inode->tag)),
+                    inode->tag);
            found++;
        }
    }
@@ -3014,6 +3004,7 @@ out:
    *psn_tab = sn_tab;

    g_free(vdi_inuse);
+    g_free(inode);

    if (ret < 0) {
        return ret;
--- a/block/snapshot.c
+++ b/block/snapshot.c
@@ -25,6 +25,7 @@
 #include "qemu/osdep.h"
 #include "block/snapshot.h"
 #include "block/block_int.h"
+#include "block/qdict.h"
 #include "qapi/error.h"
 #include "qapi/qmp/qdict.h"
 #include "qapi/qmp/qerror.h"
@@ -214,7 +215,7 @@ int bdrv_snapshot_goto(BlockDriverState *bs,
        bdrv_ref(file);

        qdict_extract_subqdict(options, &file_options, "file.");
-        QDECREF(file_options);
+        qobject_unref(file_options);
        qdict_put_str(options, "file", bdrv_get_node_name(file));

        drv->bdrv_close(bs);
@@ -223,7 +224,7 @@ int bdrv_snapshot_goto(BlockDriverState *bs,

        ret = bdrv_snapshot_goto(file, snapshot_id, errp);
        open_ret = drv->bdrv_open(bs, options, bs->open_flags, &local_err);
-        QDECREF(options);
+        qobject_unref(options);
        if (open_ret < 0) {
            bdrv_unref(file);
            bs->drv = NULL;
--- a/block/ssh.c
+++ b/block/ssh.c
@@ -28,6 +28,7 @@
 #include <libssh2_sftp.h>

 #include "block/block_int.h"
+#include "block/qdict.h"
 #include "qapi/error.h"
 #include "qemu/error-report.h"
 #include "qemu/option.h"
@@ -605,7 +606,6 @@ static BlockdevOptionsSsh *ssh_parse_options(QDict *options, Error **errp)
    BlockdevOptionsSsh *result = NULL;
    QemuOpts *opts = NULL;
    Error *local_err = NULL;
-    QObject *crumpled;
    const QDictEntry *e;
    Visitor *v;

@@ -622,23 +622,13 @@ static BlockdevOptionsSsh *ssh_parse_options(QDict *options, Error **errp)
    }

    /* Create the QAPI object */
-    crumpled = qdict_crumple(options, errp);
-    if (crumpled == NULL) {
+    v = qobject_input_visitor_new_flat_confused(options, errp);
+    if (!v) {
        goto fail;
    }

-    /*
-     * FIXME .numeric, .to, .ipv4 or .ipv6 don't work with -drive.
-     * .to doesn't matter, it's ignored anyway.
-     * That's because when @options come from -blockdev or
-     * blockdev_add, members are typed according to the QAPI schema,
-     * but when they come from -drive, they're all QString.  The
-     * visitor expects the former.
-     */
-    v = qobject_input_visitor_new(crumpled);
    visit_type_BlockdevOptionsSsh(v, NULL, &result, &local_err);
    visit_free(v);
-    qobject_decref(crumpled);

    if (local_err) {
        error_propagate(errp, local_err);
@@ -917,7 +907,7 @@ static int coroutine_fn ssh_co_create_opts(const char *filename, QemuOpts *opts,
    ret = ssh_co_create(create_options, errp);

 out:
-    QDECREF(uri_options);
+    qobject_unref(uri_options);
    qapi_free_BlockdevCreateOptions(create_options);
    return ret;
 }
@@ -1164,11 +1154,13 @@ static int ssh_write(BDRVSSHState *s, BlockDriverState *bs,

 static coroutine_fn int ssh_co_writev(BlockDriverState *bs,
                                      int64_t sector_num,
-                                      int nb_sectors, QEMUIOVector *qiov)
+                                      int nb_sectors, QEMUIOVector *qiov,
+                                      int flags)
 {
    BDRVSSHState *s = bs->opaque;
    int ret;

+    assert(!flags);
    qemu_co_mutex_lock(&s->lock);
    ret = ssh_write(s, bs, sector_num * BDRV_SECTOR_SIZE,
                    nb_sectors * BDRV_SECTOR_SIZE, qiov);
--- a/block/stream.c
+++ b/block/stream.c
@@ -29,11 +29,8 @@ enum {
    STREAM_BUFFER_SIZE = 512 * 1024, /* in bytes */
 };

-#define SLICE_TIME 100000000ULL /* ns */
-
 typedef struct StreamBlockJob {
    BlockJob common;
-    RateLimit limit;
    BlockDriverState *base;
    BlockdevOnError on_error;
    char *backing_file_str;
@@ -61,16 +58,16 @@ typedef struct {
    int ret;
 } StreamCompleteData;

-static void stream_complete(BlockJob *job, void *opaque)
+static void stream_complete(Job *job, void *opaque)
 {
-    StreamBlockJob *s = container_of(job, StreamBlockJob, common);
+    StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
+    BlockJob *bjob = &s->common;
    StreamCompleteData *data = opaque;
-    BlockDriverState *bs = blk_bs(job->blk);
+    BlockDriverState *bs = blk_bs(bjob->blk);
    BlockDriverState *base = s->base;
    Error *local_err = NULL;

-    if (!block_job_is_cancelled(&s->common) && bs->backing &&
-        data->ret == 0) {
+    if (!job_is_cancelled(job) && bs->backing && data->ret == 0) {
        const char *base_id = NULL, *base_fmt = NULL;
        if (base) {
            base_id = s->backing_file_str;
@@ -91,12 +88,12 @@ out:
    /* Reopen the image back in read-only mode if necessary */
    if (s->bs_flags != bdrv_get_flags(bs)) {
        /* Give up write permissions before making it read-only */
-        blk_set_perm(job->blk, 0, BLK_PERM_ALL, &error_abort);
+        blk_set_perm(bjob->blk, 0, BLK_PERM_ALL, &error_abort);
        bdrv_reopen(bs, s->bs_flags, NULL);
    }

    g_free(s->backing_file_str);
-    block_job_completed(&s->common, data->ret);
+    job_completed(job, data->ret, NULL);
    g_free(data);
 }

@@ -107,6 +104,7 @@ static void coroutine_fn stream_run(void *opaque)
    BlockBackend *blk = s->common.blk;
    BlockDriverState *bs = blk_bs(blk);
    BlockDriverState *base = s->base;
+    int64_t len;
    int64_t offset = 0;
    uint64_t delay_ns = 0;
    int error = 0;
@@ -118,11 +116,12 @@ static void coroutine_fn stream_run(void *opaque)
        goto out;
    }

-    s->common.len = bdrv_getlength(bs);
-    if (s->common.len < 0) {
-        ret = s->common.len;
+    len = bdrv_getlength(bs);
+    if (len < 0) {
+        ret = len;
        goto out;
    }
+    job_progress_set_remaining(&s->common.job, len);

    buf = qemu_blockalign(bs, STREAM_BUFFER_SIZE);

@@ -135,14 +134,14 @@ static void coroutine_fn stream_run(void *opaque)
        bdrv_enable_copy_on_read(bs);
    }

-    for ( ; offset < s->common.len; offset += n) {
+    for ( ; offset < len; offset += n) {
        bool copy;

        /* Note that even when no rate limit is applied we need to yield
         * with no pending I/O here so that bdrv_drain_all() returns.
         */
-        block_job_sleep_ns(&s->common, delay_ns);
-        if (block_job_is_cancelled(&s->common)) {
+        job_sleep_ns(&s->common.job, delay_ns);
+        if (job_is_cancelled(&s->common.job)) {
            break;
        }

@@ -159,7 +158,7 @@ static void coroutine_fn stream_run(void *opaque)

            /* Finish early if end of backing file has been reached */
            if (ret == 0 && n == 0) {
-                n = s->common.len - offset;
+                n = len - offset;
            }

            copy = (ret == 1);
@@ -185,9 +184,9 @@ static void coroutine_fn stream_run(void *opaque)
        ret = 0;

        /* Publish progress */
-        s->common.offset += n;
-        if (copy && s->common.speed) {
-            delay_ns = ratelimit_calculate_delay(&s->limit, n);
+        job_progress_update(&s->common.job, n);
+        if (copy) {
+            delay_ns = block_job_ratelimit_get_delay(&s->common, n);
        } else {
            delay_ns = 0;
        }
@@ -206,25 +205,18 @@ out:
    /* Modify backing chain and close BDSes in main loop */
    data = g_malloc(sizeof(*data));
    data->ret = ret;
-    block_job_defer_to_main_loop(&s->common, stream_complete, data);
-}
-
-static void stream_set_speed(BlockJob *job, int64_t speed, Error **errp)
-{
-    StreamBlockJob *s = container_of(job, StreamBlockJob, common);
-
-    if (speed < 0) {
-        error_setg(errp, QERR_INVALID_PARAMETER, "speed");
-        return;
-    }
-    ratelimit_set_speed(&s->limit, speed, SLICE_TIME);
+    job_defer_to_main_loop(&s->common.job, stream_complete, data);
 }

 static const BlockJobDriver stream_job_driver = {
-    .instance_size = sizeof(StreamBlockJob),
-    .job_type      = BLOCK_JOB_TYPE_STREAM,
-    .set_speed     = stream_set_speed,
-    .start         = stream_run,
+    .job_driver = {
+        .instance_size = sizeof(StreamBlockJob),
+        .job_type      = JOB_TYPE_STREAM,
+        .free          = block_job_free,
+        .start         = stream_run,
+        .user_resume   = block_job_user_resume,
+        .drain         = block_job_drain,
+    },
 };

 void stream_start(const char *job_id, BlockDriverState *bs,
@@ -251,7 +243,7 @@ void stream_start(const char *job_id, BlockDriverState *bs,
                         BLK_PERM_GRAPH_MOD,
                         BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED |
                         BLK_PERM_WRITE,
-                         speed, BLOCK_JOB_DEFAULT, NULL, NULL, errp);
+                         speed, JOB_DEFAULT, NULL, NULL, errp);
    if (!s) {
        goto fail;
    }
@@ -272,7 +264,7 @@ void stream_start(const char *job_id, BlockDriverState *bs,

    s->on_error = on_error;
    trace_stream_start(bs, base, s);
-    block_job_start(&s->common);
+    job_start(&s->common.job);
    return;

 fail:
--- a/block/throttle.c
+++ b/block/throttle.c
@@ -36,9 +36,12 @@ static QemuOptsList throttle_opts = {
    },
 };

-static int throttle_configure_tgm(BlockDriverState *bs,
-                                  ThrottleGroupMember *tgm,
-                                  QDict *options, Error **errp)
+/*
+ * If this function succeeds then the throttle group name is stored in
+ * @group and must be freed by the caller.
+ * If there's an error then @group remains unmodified.
+ */
+static int throttle_parse_options(QDict *options, char **group, Error **errp)
 {
    int ret;
    const char *group_name;
@@ -63,8 +66,7 @@ static int throttle_configure_tgm(BlockDriverState *bs,
        goto fin;
    }

-    /* Register membership to group with name group_name */
-    throttle_group_register_tgm(tgm, group_name, bdrv_get_aio_context(bs));
+    *group = g_strdup(group_name);
    ret = 0;
 fin:
    qemu_opts_del(opts);
@@ -75,16 +77,27 @@ static int throttle_open(BlockDriverState *bs, QDict *options,
                         int flags, Error **errp)
 {
    ThrottleGroupMember *tgm = bs->opaque;
+    char *group;
+    int ret;

    bs->file = bdrv_open_child(NULL, options, "file", bs,
                               &child_file, false, errp);
    if (!bs->file) {
        return -EINVAL;
    }
-    bs->supported_write_flags = bs->file->bs->supported_write_flags;
-    bs->supported_zero_flags = bs->file->bs->supported_zero_flags;
+    bs->supported_write_flags = bs->file->bs->supported_write_flags |
+                                BDRV_REQ_WRITE_UNCHANGED;
+    bs->supported_zero_flags = bs->file->bs->supported_zero_flags |
+                               BDRV_REQ_WRITE_UNCHANGED;

-    return throttle_configure_tgm(bs, tgm, options, errp);
+    ret = throttle_parse_options(options, &group, errp);
+    if (ret == 0) {
+        /* Register membership to group with name group_name */
+        throttle_group_register_tgm(tgm, group, bdrv_get_aio_context(bs));
+        g_free(group);
+    }
+
+    return ret;
 }

 static void throttle_close(BlockDriverState *bs)
@@ -160,35 +173,36 @@ static void throttle_attach_aio_context(BlockDriverState *bs,
 static int throttle_reopen_prepare(BDRVReopenState *reopen_state,
                                   BlockReopenQueue *queue, Error **errp)
 {
-    ThrottleGroupMember *tgm;
+    int ret;
+    char *group = NULL;

    assert(reopen_state != NULL);
    assert(reopen_state->bs != NULL);

-    reopen_state->opaque = g_new0(ThrottleGroupMember, 1);
-    tgm = reopen_state->opaque;
-
-    return throttle_configure_tgm(reopen_state->bs, tgm, reopen_state->options,
-            errp);
+    ret = throttle_parse_options(reopen_state->options, &group, errp);
+    reopen_state->opaque = group;
+    return ret;
 }

 static void throttle_reopen_commit(BDRVReopenState *reopen_state)
 {
-    ThrottleGroupMember *old_tgm = reopen_state->bs->opaque;
-    ThrottleGroupMember *new_tgm = reopen_state->opaque;
+    BlockDriverState *bs = reopen_state->bs;
+    ThrottleGroupMember *tgm = bs->opaque;
+    char *group = reopen_state->opaque;

-    throttle_group_unregister_tgm(old_tgm);
-    g_free(old_tgm);
-    reopen_state->bs->opaque = new_tgm;
+    assert(group);
+
+    if (strcmp(group, throttle_group_get_name(tgm))) {
+        throttle_group_unregister_tgm(tgm);
+        throttle_group_register_tgm(tgm, group, bdrv_get_aio_context(bs));
+    }
+    g_free(reopen_state->opaque);
    reopen_state->opaque = NULL;
 }

 static void throttle_reopen_abort(BDRVReopenState *reopen_state)
 {
-    ThrottleGroupMember *tgm = reopen_state->opaque;
-
-    throttle_group_unregister_tgm(tgm);
-    g_free(tgm);
+    g_free(reopen_state->opaque);
    reopen_state->opaque = NULL;
 }

--- a/block/trace-events
+++ b/block/trace-events
@@ -4,11 +4,6 @@
 bdrv_open_common(void *bs, const char *filename, int flags, const char *format_name) "bs %p filename \"%s\" flags 0x%x format_name \"%s\""
 bdrv_lock_medium(void *bs, bool locked) "bs %p locked %d"

-# blockjob.c
-block_job_completed(void *job, int ret, int jret) "job %p ret %d corrected ret %d"
-block_job_state_transition(void *job,  int ret, const char *legal, const char *s0, const char *s1) "job %p (ret: %d) attempting %s transition (%s-->%s)"
-block_job_apply_verb(void *job, const char *state, const char *verb, const char *legal) "job %p in state %s; applying verb %s (%s)"
-
 # block/block-backend.c
 blk_co_preadv(void *blk, void *bs, int64_t offset, unsigned int bytes, int flags) "blk %p bs %p offset %"PRId64" bytes %u flags 0x%x"
 blk_co_pwritev(void *blk, void *bs, int64_t offset, unsigned int bytes, int flags) "blk %p bs %p offset %"PRId64" bytes %u flags 0x%x"
--- a/block/vdi.c
+++ b/block/vdi.c
@@ -51,10 +51,10 @@

 #include "qemu/osdep.h"
 #include "qapi/error.h"
-#include "qapi/qmp/qdict.h"
 #include "qapi/qobject-input-visitor.h"
 #include "qapi/qapi-visit-block-core.h"
 #include "block/block_int.h"
+#include "block/qdict.h"
 #include "sysemu/block-backend.h"
 #include "qemu/module.h"
 #include "qemu/option.h"
@@ -865,6 +865,7 @@ static int coroutine_fn vdi_co_do_create(BlockdevCreateOptions *create_options,
        }
    }

+    ret = 0;
 exit:
    blk_unref(blk);
    bdrv_unref(bs_file);
@@ -933,7 +934,11 @@ static int coroutine_fn vdi_co_create_opts(const char *filename, QemuOpts *opts,
    }

    /* Get the QAPI object */
-    v = qobject_input_visitor_new_keyval(QOBJECT(qdict));
+    v = qobject_input_visitor_new_flat_confused(qdict, errp);
+    if (!v) {
+        ret = -EINVAL;
+        goto done;
+    }
    visit_type_BlockdevCreateOptions(v, NULL, &create_options, &local_err);
    visit_free(v);

@@ -951,7 +956,7 @@ static int coroutine_fn vdi_co_create_opts(const char *filename, QemuOpts *opts,
    /* Create the vdi image (format layer) */
    ret = vdi_co_do_create(create_options, block_size, errp);
 done:
-    QDECREF(qdict);
+    qobject_unref(qdict);
    qapi_free_BlockdevCreateOptions(create_options);
    bdrv_unref(bs_file);
    return ret;
--- a/block/vhdx-endian.c
+++ b/block/vhdx-endian.c
@@ -19,7 +19,7 @@
 #include "qemu-common.h"
 #include "block/block_int.h"
 #include "qemu/bswap.h"
-#include "block/vhdx.h"
+#include "vhdx.h"

 /*
 * All the VHDX formats on disk are little endian - the following
--- a/block/vhdx-log.c
+++ b/block/vhdx-log.c
@@ -24,7 +24,7 @@
 #include "qemu/error-report.h"
 #include "qemu/module.h"
 #include "qemu/bswap.h"
-#include "block/vhdx.h"
+#include "vhdx.h"


 typedef struct VHDXLogSequence {
--- a/block/vhdx.c
+++ b/block/vhdx.c
@@ -18,12 +18,13 @@
 #include "qemu/osdep.h"
 #include "qapi/error.h"
 #include "block/block_int.h"
+#include "block/qdict.h"
 #include "sysemu/block-backend.h"
 #include "qemu/module.h"
 #include "qemu/option.h"
 #include "qemu/crc32c.h"
 #include "qemu/bswap.h"
-#include "block/vhdx.h"
+#include "vhdx.h"
 #include "migration/blocker.h"
 #include "qemu/uuid.h"
 #include "qapi/qmp/qdict.h"
@@ -1226,7 +1227,8 @@ int vhdx_user_visible_write(BlockDriverState *bs, BDRVVHDXState *s)
 }

 static coroutine_fn int vhdx_co_writev(BlockDriverState *bs, int64_t sector_num,
-                                      int nb_sectors, QEMUIOVector *qiov)
+                                       int nb_sectors, QEMUIOVector *qiov,
+                                       int flags)
 {
    int ret = -ENOTSUP;
    BDRVVHDXState *s = bs->opaque;
@@ -1242,6 +1244,7 @@ static coroutine_fn int vhdx_co_writev(BlockDriverState *bs, int64_t sector_num,
    uint64_t bat_prior_offset = 0;
    bool bat_update = false;

+    assert(!flags);
    qemu_iovec_init(&hd_qiov, qiov->niov);

    qemu_co_mutex_lock(&s->lock);
@@ -1949,7 +1952,7 @@ static int coroutine_fn vhdx_co_create(BlockdevCreateOptions *opts,
        goto delete_and_exit;
    }

-
+    ret = 0;
 delete_and_exit:
    blk_unref(blk);
    bdrv_unref(bs);
@@ -1962,8 +1965,7 @@ static int coroutine_fn vhdx_co_create_opts(const char *filename,
                                            Error **errp)
 {
    BlockdevCreateOptions *create_options = NULL;
-    QDict *qdict = NULL;
-    QObject *qobj;
+    QDict *qdict;
    Visitor *v;
    BlockDriverState *bs = NULL;
    Error *local_err = NULL;
@@ -2002,15 +2004,12 @@ static int coroutine_fn vhdx_co_create_opts(const char *filename,
    qdict_put_str(qdict, "driver", "vhdx");
    qdict_put_str(qdict, "file", bs->node_name);

-    qobj = qdict_crumple(qdict, errp);
-    QDECREF(qdict);
-    qdict = qobject_to(QDict, qobj);
-    if (qdict == NULL) {
+    v = qobject_input_visitor_new_flat_confused(qdict, errp);
+    if (!v) {
        ret = -EINVAL;
        goto fail;
    }

-    v = qobject_input_visitor_new_keyval(QOBJECT(qdict));
    visit_type_BlockdevCreateOptions(v, NULL, &create_options, &local_err);
    visit_free(v);

@@ -2049,7 +2048,7 @@ static int coroutine_fn vhdx_co_create_opts(const char *filename,
    ret = vhdx_co_create(create_options, errp);

 fail:
-    QDECREF(qdict);
+    qobject_unref(qdict);
    bdrv_unref(bs);
    qapi_free_BlockdevCreateOptions(create_options);
    return ret;
--- a/block/vpc.c
+++ b/block/vpc.c
@@ -26,6 +26,7 @@
 #include "qemu/osdep.h"
 #include "qapi/error.h"
 #include "block/block_int.h"
+#include "block/qdict.h"
 #include "sysemu/block-backend.h"
 #include "qemu/module.h"
 #include "qemu/option.h"
@@ -1080,8 +1081,7 @@ static int coroutine_fn vpc_co_create_opts(const char *filename,
                                           QemuOpts *opts, Error **errp)
 {
    BlockdevCreateOptions *create_options = NULL;
-    QDict *qdict = NULL;
-    QObject *qobj;
+    QDict *qdict;
    Visitor *v;
    BlockDriverState *bs = NULL;
    Error *local_err = NULL;
@@ -1118,15 +1118,12 @@ static int coroutine_fn vpc_co_create_opts(const char *filename,
    qdict_put_str(qdict, "driver", "vpc");
    qdict_put_str(qdict, "file", bs->node_name);

-    qobj = qdict_crumple(qdict, errp);
-    QDECREF(qdict);
-    qdict = qobject_to(QDict, qobj);
-    if (qdict == NULL) {
+    v = qobject_input_visitor_new_flat_confused(qdict, errp);
+    if (!v) {
        ret = -EINVAL;
        goto fail;
    }

-    v = qobject_input_visitor_new_keyval(QOBJECT(qdict));
    visit_type_BlockdevCreateOptions(v, NULL, &create_options, &local_err);
    visit_free(v);

@@ -1157,7 +1154,7 @@ static int coroutine_fn vpc_co_create_opts(const char *filename,
    ret = vpc_co_create(create_options, errp);

 fail:
-    QDECREF(qdict);
+    qobject_unref(qdict);
    bdrv_unref(bs);
    qapi_free_BlockdevCreateOptions(create_options);
    return ret;
--- a/block/vvfat.c
+++ b/block/vvfat.c
@@ -27,6 +27,7 @@
 #include <dirent.h>
 #include "qapi/error.h"
 #include "block/block_int.h"
+#include "block/qdict.h"
 #include "qemu/module.h"
 #include "qemu/option.h"
 #include "qemu/bswap.h"
@@ -3179,7 +3180,7 @@ static int enable_write_target(BlockDriverState *bs, Error **errp)
    qdict_put_str(options, "write-target.driver", "qcow");
    s->qcow = bdrv_open_child(s->qcow_filename, options, "write-target", bs,
                              &child_vvfat_qcow, false, errp);
-    QDECREF(options);
+    qobject_unref(options);
    if (!s->qcow) {
        ret = -EINVAL;
        goto err;
--- a/block/vxhs.c
+++ b/block/vxhs.c
@@ -12,6 +12,7 @@
 #include <qnio/qnio_api.h>
 #include <sys/param.h>
 #include "block/block_int.h"
+#include "block/qdict.h"
 #include "qapi/qmp/qerror.h"
 #include "qapi/qmp/qdict.h"
 #include "qapi/qmp/qstring.h"
@@ -216,6 +217,12 @@ static void vxhs_parse_filename(const char *filename, QDict *options,
    }
 }

+static void vxhs_refresh_limits(BlockDriverState *bs, Error **errp)
+{
+    /* XXX Does VXHS support AIO on less than 512-byte alignment? */
+    bs->bl.request_alignment = 512;
+}
+
 static int vxhs_init_and_ref(void)
 {
    if (vxhs_ref++ == 0) {
@@ -396,7 +403,7 @@ static int vxhs_open(BlockDriverState *bs, QDict *options,

 out:
    g_free(of_vsa_addr);
-    QDECREF(backing_options);
+    qobject_unref(backing_options);
    qemu_opts_del(tcp_opts);
    qemu_opts_del(opts);
    g_free(cacert);
@@ -424,21 +431,17 @@ static const AIOCBInfo vxhs_aiocb_info = {
 * and is passed to QNIO. When QNIO completes the work,
 * it will be passed back through the callback.
 */
-static BlockAIOCB *vxhs_aio_rw(BlockDriverState *bs, int64_t sector_num,
-                               QEMUIOVector *qiov, int nb_sectors,
+static BlockAIOCB *vxhs_aio_rw(BlockDriverState *bs, uint64_t offset,
+                               QEMUIOVector *qiov, uint64_t size,
                               BlockCompletionFunc *cb, void *opaque,
                               VDISKAIOCmd iodir)
 {
    VXHSAIOCB *acb = NULL;
    BDRVVXHSState *s = bs->opaque;
-    size_t size;
-    uint64_t offset;
    int iio_flags = 0;
    int ret = 0;
    void *dev_handle = s->vdisk_hostinfo.dev_handle;

-    offset = sector_num * BDRV_SECTOR_SIZE;
-    size = nb_sectors * BDRV_SECTOR_SIZE;
    acb = qemu_aio_get(&vxhs_aiocb_info, bs, cb, opaque);

    /*
@@ -451,11 +454,11 @@ static BlockAIOCB *vxhs_aio_rw(BlockDriverState *bs, int64_t sector_num,
    switch (iodir) {
    case VDISK_AIO_WRITE:
            ret = iio_writev(dev_handle, acb, qiov->iov, qiov->niov,
-                             offset, (uint64_t)size, iio_flags);
+                             offset, size, iio_flags);
            break;
    case VDISK_AIO_READ:
            ret = iio_readv(dev_handle, acb, qiov->iov, qiov->niov,
-                            offset, (uint64_t)size, iio_flags);
+                            offset, size, iio_flags);
            break;
    default:
            trace_vxhs_aio_rw_invalid(iodir);
@@ -474,22 +477,20 @@ errout:
    return NULL;
 }

-static BlockAIOCB *vxhs_aio_readv(BlockDriverState *bs,
-                                   int64_t sector_num, QEMUIOVector *qiov,
-                                   int nb_sectors,
+static BlockAIOCB *vxhs_aio_preadv(BlockDriverState *bs,
+                                   uint64_t offset, uint64_t bytes,
+                                   QEMUIOVector *qiov, int flags,
                                   BlockCompletionFunc *cb, void *opaque)
 {
-    return vxhs_aio_rw(bs, sector_num, qiov, nb_sectors, cb,
-                       opaque, VDISK_AIO_READ);
+    return vxhs_aio_rw(bs, offset, qiov, bytes, cb, opaque, VDISK_AIO_READ);
 }

-static BlockAIOCB *vxhs_aio_writev(BlockDriverState *bs,
-                                   int64_t sector_num, QEMUIOVector *qiov,
-                                   int nb_sectors,
-                                   BlockCompletionFunc *cb, void *opaque)
+static BlockAIOCB *vxhs_aio_pwritev(BlockDriverState *bs,
+                                    uint64_t offset, uint64_t bytes,
+                                    QEMUIOVector *qiov, int flags,
+                                    BlockCompletionFunc *cb, void *opaque)
 {
-    return vxhs_aio_rw(bs, sector_num, qiov, nb_sectors,
-                       cb, opaque, VDISK_AIO_WRITE);
+    return vxhs_aio_rw(bs, offset, qiov, bytes, cb, opaque, VDISK_AIO_WRITE);
 }

 static void vxhs_close(BlockDriverState *bs)
@@ -561,10 +562,11 @@ static BlockDriver bdrv_vxhs = {
    .instance_size                = sizeof(BDRVVXHSState),
    .bdrv_file_open               = vxhs_open,
    .bdrv_parse_filename          = vxhs_parse_filename,
+    .bdrv_refresh_limits          = vxhs_refresh_limits,
    .bdrv_close                   = vxhs_close,
    .bdrv_getlength               = vxhs_getlength,
-    .bdrv_aio_readv               = vxhs_aio_readv,
-    .bdrv_aio_writev              = vxhs_aio_writev,
+    .bdrv_aio_preadv              = vxhs_aio_preadv,
+    .bdrv_aio_pwritev             = vxhs_aio_pwritev,
 };

 static void bdrv_vxhs_init(void)
--- a/block/win32-aio.c
+++ b/block/win32-aio.c
@@ -112,15 +112,14 @@ static const AIOCBInfo win32_aiocb_info = {

 BlockAIOCB *win32_aio_submit(BlockDriverState *bs,
        QEMUWin32AIOState *aio, HANDLE hfile,
-        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+        uint64_t offset, uint64_t bytes, QEMUIOVector *qiov,
        BlockCompletionFunc *cb, void *opaque, int type)
 {
    struct QEMUWin32AIOCB *waiocb;
-    uint64_t offset = sector_num * 512;
    DWORD rc;

    waiocb = qemu_aio_get(&win32_aiocb_info, bs, cb, opaque);
-    waiocb->nbytes = nb_sectors * 512;
+    waiocb->nbytes = bytes;
    waiocb->qiov = qiov;
    waiocb->is_read = (type == QEMU_AIO_READ);

--- a/blockdev.c
+++ b/blockdev.c
@@ -35,6 +35,7 @@
 #include "sysemu/blockdev.h"
 #include "hw/block/block.h"
 #include "block/blockjob.h"
+#include "block/qdict.h"
 #include "block/throttle-groups.h"
 #include "monitor/monitor.h"
 #include "qemu/error-report.h"
@@ -150,7 +151,7 @@ void blockdev_mark_auto_del(BlockBackend *blk)
        aio_context_acquire(aio_context);

        if (bs->job) {
-            block_job_cancel(bs->job, false);
+            job_cancel(&bs->job->job, false);
        }

        aio_context_release(aio_context);
@@ -576,7 +577,7 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts,
        blk_rs->read_only     = read_only;
        blk_rs->detect_zeroes = detect_zeroes;

-        QDECREF(bs_opts);
+        qobject_unref(bs_opts);
    } else {
        if (file && !*file) {
            file = NULL;
@@ -632,16 +633,16 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts,

 err_no_bs_opts:
    qemu_opts_del(opts);
-    QDECREF(interval_dict);
-    QDECREF(interval_list);
+    qobject_unref(interval_dict);
+    qobject_unref(interval_list);
    return blk;

 early_err:
    qemu_opts_del(opts);
-    QDECREF(interval_dict);
-    QDECREF(interval_list);
+    qobject_unref(interval_dict);
+    qobject_unref(interval_list);
 err_no_opts:
-    QDECREF(bs_opts);
+    qobject_unref(bs_opts);
    return NULL;
 }

@@ -729,30 +730,6 @@ QemuOptsList qemu_legacy_drive_opts = {
            .name = "if",
            .type = QEMU_OPT_STRING,
            .help = "interface (ide, scsi, sd, mtd, floppy, pflash, virtio)",
-        },{
-            .name = "cyls",
-            .type = QEMU_OPT_NUMBER,
-            .help = "number of cylinders (ide disk geometry)",
-        },{
-            .name = "heads",
-            .type = QEMU_OPT_NUMBER,
-            .help = "number of heads (ide disk geometry)",
-        },{
-            .name = "secs",
-            .type = QEMU_OPT_NUMBER,
-            .help = "number of sectors (ide disk geometry)",
-        },{
-            .name = "trans",
-            .type = QEMU_OPT_STRING,
-            .help = "chs translation (auto, lba, none)",
-        },{
-            .name = "addr",
-            .type = QEMU_OPT_STRING,
-            .help = "pci address (virtio only)",
-        },{
-            .name = "serial",
-            .type = QEMU_OPT_STRING,
-            .help = "disk serial number",
        },{
            .name = "file",
            .type = QEMU_OPT_STRING,
@@ -791,19 +768,13 @@ DriveInfo *drive_new(QemuOpts *all_opts, BlockInterfaceType block_default_type)
    QemuOpts *legacy_opts;
    DriveMediaType media = MEDIA_DISK;
    BlockInterfaceType type;
-    int cyls, heads, secs, translation;
    int max_devs, bus_id, unit_id, index;
-    const char *devaddr;
    const char *werror, *rerror;
    bool read_only = false;
    bool copy_on_read;
-    const char *serial;
    const char *filename;
    Error *local_err = NULL;
    int i;
-    const char *deprecated[] = {
-        "serial", "trans", "secs", "heads", "cyls", "addr"
-    };

    /* Change legacy command line options into QMP ones */
    static const struct {
@@ -880,16 +851,6 @@ DriveInfo *drive_new(QemuOpts *all_opts, BlockInterfaceType block_default_type)
        goto fail;
    }

-    /* Other deprecated options */
-    if (!qtest_enabled()) {
-        for (i = 0; i < ARRAY_SIZE(deprecated); i++) {
-            if (qemu_opt_get(legacy_opts, deprecated[i]) != NULL) {
-                error_report("'%s' is deprecated, please use the corresponding "
-                             "option of '-device' instead", deprecated[i]);
-            }
-        }
-    }
-
    /* Media type */
    value = qemu_opt_get(legacy_opts, "media");
    if (value) {
@@ -931,57 +892,6 @@ DriveInfo *drive_new(QemuOpts *all_opts, BlockInterfaceType block_default_type)
        type = block_default_type;
    }

-    /* Geometry */
-    cyls  = qemu_opt_get_number(legacy_opts, "cyls", 0);
-    heads = qemu_opt_get_number(legacy_opts, "heads", 0);
-    secs  = qemu_opt_get_number(legacy_opts, "secs", 0);
-
-    if (cyls || heads || secs) {
-        if (cyls < 1) {
-            error_report("invalid physical cyls number");
-            goto fail;
-        }
-        if (heads < 1) {
-            error_report("invalid physical heads number");
-            goto fail;
-        }
-        if (secs < 1) {
-            error_report("invalid physical secs number");
-            goto fail;
-        }
-    }
-
-    translation = BIOS_ATA_TRANSLATION_AUTO;
-    value = qemu_opt_get(legacy_opts, "trans");
-    if (value != NULL) {
-        if (!cyls) {
-            error_report("'%s' trans must be used with cyls, heads and secs",
-                         value);
-            goto fail;
-        }
-        if (!strcmp(value, "none")) {
-            translation = BIOS_ATA_TRANSLATION_NONE;
-        } else if (!strcmp(value, "lba")) {
-            translation = BIOS_ATA_TRANSLATION_LBA;
-        } else if (!strcmp(value, "large")) {
-            translation = BIOS_ATA_TRANSLATION_LARGE;
-        } else if (!strcmp(value, "rechs")) {
-            translation = BIOS_ATA_TRANSLATION_RECHS;
-        } else if (!strcmp(value, "auto")) {
-            translation = BIOS_ATA_TRANSLATION_AUTO;
-        } else {
-            error_report("'%s' invalid translation type", value);
-            goto fail;
-        }
-    }
-
-    if (media == MEDIA_CDROM) {
-        if (cyls || secs || heads) {
-            error_report("CHS can't be set with media=cdrom");
-            goto fail;
-        }
-    }
-
    /* Device address specified by bus/unit or index.
     * If none was specified, try to find the first free one. */
    bus_id  = qemu_opt_get_number(legacy_opts, "bus", 0);
@@ -1021,9 +931,6 @@ DriveInfo *drive_new(QemuOpts *all_opts, BlockInterfaceType block_default_type)
        goto fail;
    }

-    /* Serial number */
-    serial = qemu_opt_get(legacy_opts, "serial");
-
    /* no id supplied -> create one */
    if (qemu_opts_id(all_opts) == NULL) {
        char *new_id;
@@ -1043,12 +950,6 @@ DriveInfo *drive_new(QemuOpts *all_opts, BlockInterfaceType block_default_type)
    }

    /* Add virtio block device */
-    devaddr = qemu_opt_get(legacy_opts, "addr");
-    if (devaddr && type != IF_VIRTIO) {
-        error_report("addr is not supported by this bus type");
-        goto fail;
-    }
-
    if (type == IF_VIRTIO) {
        QemuOpts *devopts;
        devopts = qemu_opts_create(qemu_find_opts("device"), NULL, 0,
@@ -1060,9 +961,6 @@ DriveInfo *drive_new(QemuOpts *all_opts, BlockInterfaceType block_default_type)
        }
        qemu_opt_set(devopts, "drive", qdict_get_str(bs_opts, "id"),
                     &error_abort);
-        if (devaddr) {
-            qemu_opt_set(devopts, "addr", devaddr, &error_abort);
-        }
    }

    filename = qemu_opt_get(legacy_opts, "file");
@@ -1104,16 +1002,9 @@ DriveInfo *drive_new(QemuOpts *all_opts, BlockInterfaceType block_default_type)
    dinfo = g_malloc0(sizeof(*dinfo));
    dinfo->opts = all_opts;

-    dinfo->cyls = cyls;
-    dinfo->heads = heads;
-    dinfo->secs = secs;
-    dinfo->trans = translation;
-
    dinfo->type = type;
    dinfo->bus = bus_id;
    dinfo->unit = unit_id;
-    dinfo->devaddr = devaddr;
-    dinfo->serial = g_strdup(serial);

    blk_set_legacy_dinfo(blk, dinfo);

@@ -1130,7 +1021,7 @@ DriveInfo *drive_new(QemuOpts *all_opts, BlockInterfaceType block_default_type)

 fail:
    qemu_opts_del(legacy_opts);
-    QDECREF(bs_opts);
+    qobject_unref(bs_opts);
    return dinfo;
 }

@@ -1446,7 +1337,7 @@ typedef struct BlkActionOps {
 struct BlkActionState {
    TransactionAction *action;
    const BlkActionOps *ops;
-    BlockJobTxn *block_job_txn;
+    JobTxn *block_job_txn;
    TransactionProperties *txn_props;
    QSIMPLEQ_ENTRY(BlkActionState) entry;
 };
@@ -1864,7 +1755,7 @@ typedef struct DriveBackupState {
    BlockJob *job;
 } DriveBackupState;

-static BlockJob *do_drive_backup(DriveBackup *backup, BlockJobTxn *txn,
+static BlockJob *do_drive_backup(DriveBackup *backup, JobTxn *txn,
                            Error **errp);

 static void drive_backup_prepare(BlkActionState *common, Error **errp)
@@ -1910,7 +1801,7 @@ static void drive_backup_commit(BlkActionState *common)
    aio_context_acquire(aio_context);

    assert(state->job);
-    block_job_start(state->job);
+    job_start(&state->job->job);

    aio_context_release(aio_context);
 }
@@ -1925,7 +1816,7 @@ static void drive_backup_abort(BlkActionState *common)
        aio_context = bdrv_get_aio_context(state->bs);
        aio_context_acquire(aio_context);

-        block_job_cancel_sync(state->job);
+        job_cancel_sync(&state->job->job);

        aio_context_release(aio_context);
    }
@@ -1954,7 +1845,7 @@ typedef struct BlockdevBackupState {
    BlockJob *job;
 } BlockdevBackupState;

-static BlockJob *do_blockdev_backup(BlockdevBackup *backup, BlockJobTxn *txn,
+static BlockJob *do_blockdev_backup(BlockdevBackup *backup, JobTxn *txn,
                                    Error **errp);

 static void blockdev_backup_prepare(BlkActionState *common, Error **errp)
@@ -2008,7 +1899,7 @@ static void blockdev_backup_commit(BlkActionState *common)
    aio_context_acquire(aio_context);

    assert(state->job);
-    block_job_start(state->job);
+    job_start(&state->job->job);

    aio_context_release(aio_context);
 }
@@ -2023,7 +1914,7 @@ static void blockdev_backup_abort(BlkActionState *common)
        aio_context = bdrv_get_aio_context(state->bs);
        aio_context_acquire(aio_context);

-        block_job_cancel_sync(state->job);
+        job_cancel_sync(&state->job->job);

        aio_context_release(aio_context);
    }
@@ -2052,6 +1943,7 @@ typedef struct BlockDirtyBitmapState {
    BlockDriverState *bs;
    HBitmap *backup;
    bool prepared;
+    bool was_enabled;
 } BlockDirtyBitmapState;

 static void block_dirty_bitmap_add_prepare(BlkActionState *common,
@@ -2072,6 +1964,7 @@ static void block_dirty_bitmap_add_prepare(BlkActionState *common,
                               action->has_granularity, action->granularity,
                               action->has_persistent, action->persistent,
                               action->has_autoload, action->autoload,
+                               action->has_x_disabled, action->x_disabled,
                               &local_err);

    if (!local_err) {
@@ -2151,6 +2044,74 @@ static void block_dirty_bitmap_clear_commit(BlkActionState *common)
    hbitmap_free(state->backup);
 }

+static void block_dirty_bitmap_enable_prepare(BlkActionState *common,
+                                              Error **errp)
+{
+    BlockDirtyBitmap *action;
+    BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
+                                             common, common);
+
+    if (action_check_completion_mode(common, errp) < 0) {
+        return;
+    }
+
+    action = common->action->u.x_block_dirty_bitmap_enable.data;
+    state->bitmap = block_dirty_bitmap_lookup(action->node,
+                                              action->name,
+                                              NULL,
+                                              errp);
+    if (!state->bitmap) {
+        return;
+    }
+
+    state->was_enabled = bdrv_dirty_bitmap_enabled(state->bitmap);
+    bdrv_enable_dirty_bitmap(state->bitmap);
+}
+
+static void block_dirty_bitmap_enable_abort(BlkActionState *common)
+{
+    BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
+                                             common, common);
+
+    if (!state->was_enabled) {
+        bdrv_disable_dirty_bitmap(state->bitmap);
+    }
+}
+
+static void block_dirty_bitmap_disable_prepare(BlkActionState *common,
+                                               Error **errp)
+{
+    BlockDirtyBitmap *action;
+    BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
+                                             common, common);
+
+    if (action_check_completion_mode(common, errp) < 0) {
+        return;
+    }
+
+    action = common->action->u.x_block_dirty_bitmap_disable.data;
+    state->bitmap = block_dirty_bitmap_lookup(action->node,
+                                              action->name,
+                                              NULL,
+                                              errp);
+    if (!state->bitmap) {
+        return;
+    }
+
+    state->was_enabled = bdrv_dirty_bitmap_enabled(state->bitmap);
+    bdrv_disable_dirty_bitmap(state->bitmap);
+}
+
+static void block_dirty_bitmap_disable_abort(BlkActionState *common)
+{
+    BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
+                                             common, common);
+
+    if (state->was_enabled) {
+        bdrv_enable_dirty_bitmap(state->bitmap);
+    }
+}
+
 static void abort_prepare(BlkActionState *common, Error **errp)
 {
    error_setg(errp, "Transaction aborted using Abort action");
@@ -2211,7 +2172,17 @@ static const BlkActionOps actions[] = {
        .prepare = block_dirty_bitmap_clear_prepare,
        .commit = block_dirty_bitmap_clear_commit,
        .abort = block_dirty_bitmap_clear_abort,
-    }
+    },
+    [TRANSACTION_ACTION_KIND_X_BLOCK_DIRTY_BITMAP_ENABLE] = {
+        .instance_size = sizeof(BlockDirtyBitmapState),
+        .prepare = block_dirty_bitmap_enable_prepare,
+        .abort = block_dirty_bitmap_enable_abort,
+    },
+    [TRANSACTION_ACTION_KIND_X_BLOCK_DIRTY_BITMAP_DISABLE] = {
+        .instance_size = sizeof(BlockDirtyBitmapState),
+        .prepare = block_dirty_bitmap_disable_prepare,
+        .abort = block_dirty_bitmap_disable_abort,
+     }
 };

 /**
@@ -2243,7 +2214,7 @@ void qmp_transaction(TransactionActionList *dev_list,
                     Error **errp)
 {
    TransactionActionList *dev_entry = dev_list;
-    BlockJobTxn *block_job_txn = NULL;
+    JobTxn *block_job_txn = NULL;
    BlkActionState *state, *next;
    Error *local_err = NULL;

@@ -2251,11 +2222,11 @@ void qmp_transaction(TransactionActionList *dev_list,
    QSIMPLEQ_INIT(&snap_bdrv_states);

    /* Does this transaction get canceled as a group on failure?
-     * If not, we don't really need to make a BlockJobTxn.
+     * If not, we don't really need to make a JobTxn.
     */
    props = get_transaction_properties(props);
    if (props->completion_mode != ACTION_COMPLETION_MODE_INDIVIDUAL) {
-        block_job_txn = block_job_txn_new();
+        block_job_txn = job_txn_new();
    }

    /* drain all i/o before any operations */
@@ -2314,7 +2285,7 @@ exit:
    if (!has_props) {
        qapi_free_TransactionProperties(props);
    }
-    block_job_txn_unref(block_job_txn);
+    job_txn_unref(block_job_txn);
 }

 void qmp_eject(bool has_device, const char *device,
@@ -2801,6 +2772,7 @@ void qmp_block_dirty_bitmap_add(const char *node, const char *name,
                                bool has_granularity, uint32_t granularity,
                                bool has_persistent, bool persistent,
                                bool has_autoload, bool autoload,
+                                bool has_disabled, bool disabled,
                                Error **errp)
 {
    BlockDriverState *bs;
@@ -2835,6 +2807,10 @@ void qmp_block_dirty_bitmap_add(const char *node, const char *name,
        warn_report("Autoload option is deprecated and its value is ignored");
    }

+    if (!has_disabled) {
+        disabled = false;
+    }
+
    if (persistent &&
        !bdrv_can_store_new_dirty_bitmap(bs, name, granularity, errp))
    {
@@ -2846,6 +2822,10 @@ void qmp_block_dirty_bitmap_add(const char *node, const char *name,
        return;
    }

+    if (disabled) {
+        bdrv_disable_dirty_bitmap(bitmap);
+    }
+
    bdrv_dirty_bitmap_set_persistance(bitmap, persistent);
 }

@@ -2881,7 +2861,6 @@ void qmp_block_dirty_bitmap_remove(const char *node, const char *name,
        }
    }

-    bdrv_dirty_bitmap_make_anon(bitmap);
    bdrv_release_dirty_bitmap(bs, bitmap);
 }

@@ -2923,6 +2902,78 @@ void qmp_block_dirty_bitmap_clear(const char *node, const char *name,
    bdrv_clear_dirty_bitmap(bitmap, NULL);
 }

+void qmp_x_block_dirty_bitmap_enable(const char *node, const char *name,
+                                   Error **errp)
+{
+    BlockDriverState *bs;
+    BdrvDirtyBitmap *bitmap;
+
+    bitmap = block_dirty_bitmap_lookup(node, name, &bs, errp);
+    if (!bitmap) {
+        return;
+    }
+
+    if (bdrv_dirty_bitmap_frozen(bitmap)) {
+        error_setg(errp,
+                   "Bitmap '%s' is currently frozen and cannot be enabled",
+                   name);
+        return;
+    }
+
+    bdrv_enable_dirty_bitmap(bitmap);
+}
+
+void qmp_x_block_dirty_bitmap_disable(const char *node, const char *name,
+                                    Error **errp)
+{
+    BlockDriverState *bs;
+    BdrvDirtyBitmap *bitmap;
+
+    bitmap = block_dirty_bitmap_lookup(node, name, &bs, errp);
+    if (!bitmap) {
+        return;
+    }
+
+    if (bdrv_dirty_bitmap_frozen(bitmap)) {
+        error_setg(errp,
+                   "Bitmap '%s' is currently frozen and cannot be disabled",
+                   name);
+        return;
+    }
+
+    bdrv_disable_dirty_bitmap(bitmap);
+}
+
+void qmp_x_block_dirty_bitmap_merge(const char *node, const char *dst_name,
+                                    const char *src_name, Error **errp)
+{
+    BlockDriverState *bs;
+    BdrvDirtyBitmap *dst, *src;
+
+    dst = block_dirty_bitmap_lookup(node, dst_name, &bs, errp);
+    if (!dst) {
+        return;
+    }
+
+    if (bdrv_dirty_bitmap_frozen(dst)) {
+        error_setg(errp, "Bitmap '%s' is frozen and cannot be modified",
+                   dst_name);
+        return;
+    } else if (bdrv_dirty_bitmap_readonly(dst)) {
+        error_setg(errp, "Bitmap '%s' is readonly and cannot be modified",
+                   dst_name);
+        return;
+    }
+
+    src = bdrv_find_dirty_bitmap(bs, src_name);
+    if (!src) {
+        error_setg(errp, "Dirty bitmap '%s' not found", src_name);
+        return;
+    }
+
+    bdrv_merge_dirty_bitmap(dst, src, errp);
+}
+
 BlockDirtyBitmapSha256 *qmp_x_debug_block_dirty_bitmap_sha256(const char *node,
                                                              const char *name,
                                                              Error **errp)
@@ -3244,7 +3295,7 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device,
            goto out;
        }
        commit_active_start(has_job_id ? job_id : NULL, bs, base_bs,
-                            BLOCK_JOB_DEFAULT, speed, on_error,
+                            JOB_DEFAULT, speed, on_error,
                            filter_node_name, NULL, NULL, false, &local_err);
    } else {
        BlockDriverState *overlay_bs = bdrv_find_overlay(bs, top_bs);
@@ -3264,7 +3315,7 @@ out:
    aio_context_release(aio_context);
 }

-static BlockJob *do_drive_backup(DriveBackup *backup, BlockJobTxn *txn,
+static BlockJob *do_drive_backup(DriveBackup *backup, JobTxn *txn,
                                 Error **errp)
 {
    BlockDriverState *bs;
@@ -3275,7 +3326,7 @@ static BlockJob *do_drive_backup(DriveBackup *backup, BlockJobTxn *txn,
    AioContext *aio_context;
    QDict *options = NULL;
    Error *local_err = NULL;
-    int flags, job_flags = BLOCK_JOB_DEFAULT;
+    int flags, job_flags = JOB_DEFAULT;
    int64_t size;
    bool set_backing_hd = false;

@@ -3398,10 +3449,10 @@ static BlockJob *do_drive_backup(DriveBackup *backup, BlockJobTxn *txn,
        }
    }
    if (!backup->auto_finalize) {
-        job_flags |= BLOCK_JOB_MANUAL_FINALIZE;
+        job_flags |= JOB_MANUAL_FINALIZE;
    }
    if (!backup->auto_dismiss) {
-        job_flags |= BLOCK_JOB_MANUAL_DISMISS;
+        job_flags |= JOB_MANUAL_DISMISS;
    }

    job = backup_job_create(backup->job_id, bs, target_bs, backup->speed,
@@ -3425,7 +3476,7 @@ void qmp_drive_backup(DriveBackup *arg, Error **errp)
    BlockJob *job;
    job = do_drive_backup(arg, NULL, errp);
    if (job) {
-        block_job_start(job);
+        job_start(&job->job);
    }
 }

@@ -3434,7 +3485,7 @@ BlockDeviceInfoList *qmp_query_named_block_nodes(Error **errp)
    return bdrv_named_nodes_list(errp);
 }

-BlockJob *do_blockdev_backup(BlockdevBackup *backup, BlockJobTxn *txn,
+BlockJob *do_blockdev_backup(BlockdevBackup *backup, JobTxn *txn,
                             Error **errp)
 {
    BlockDriverState *bs;
@@ -3442,7 +3493,7 @@ BlockJob *do_blockdev_backup(BlockdevBackup *backup, BlockJobTxn *txn,
    Error *local_err = NULL;
    AioContext *aio_context;
    BlockJob *job = NULL;
-    int job_flags = BLOCK_JOB_DEFAULT;
+    int job_flags = JOB_DEFAULT;

    if (!backup->has_speed) {
        backup->speed = 0;
@@ -3491,10 +3542,10 @@ BlockJob *do_blockdev_backup(BlockdevBackup *backup, BlockJobTxn *txn,
        }
    }
    if (!backup->auto_finalize) {
-        job_flags |= BLOCK_JOB_MANUAL_FINALIZE;
+        job_flags |= JOB_MANUAL_FINALIZE;
    }
    if (!backup->auto_dismiss) {
-        job_flags |= BLOCK_JOB_MANUAL_DISMISS;
+        job_flags |= JOB_MANUAL_DISMISS;
    }
    job = backup_job_create(backup->job_id, bs, target_bs, backup->speed,
                            backup->sync, NULL, backup->compress,
@@ -3513,7 +3564,7 @@ void qmp_blockdev_backup(BlockdevBackup *arg, Error **errp)
    BlockJob *job;
    job = do_blockdev_backup(arg, NULL, errp);
    if (job) {
-        block_job_start(job);
+        job_start(&job->job);
    }
 }

@@ -3844,14 +3895,14 @@ void qmp_block_job_cancel(const char *device,
        force = false;
    }

-    if (block_job_user_paused(job) && !force) {
+    if (job_user_paused(&job->job) && !force) {
        error_setg(errp, "The block job for device '%s' is currently paused",
                   device);
        goto out;
    }

    trace_qmp_block_job_cancel(job);
-    block_job_user_cancel(job, force, errp);
+    job_user_cancel(&job->job, force, errp);
 out:
    aio_context_release(aio_context);
 }
@@ -3866,7 +3917,7 @@ void qmp_block_job_pause(const char *device, Error **errp)
    }

    trace_qmp_block_job_pause(job);
-    block_job_user_pause(job, errp);
+    job_user_pause(&job->job, errp);
    aio_context_release(aio_context);
 }

@@ -3880,7 +3931,7 @@ void qmp_block_job_resume(const char *device, Error **errp)
    }

    trace_qmp_block_job_resume(job);
-    block_job_user_resume(job, errp);
+    job_user_resume(&job->job, errp);
    aio_context_release(aio_context);
 }

@@ -3894,7 +3945,7 @@ void qmp_block_job_complete(const char *device, Error **errp)
    }

    trace_qmp_block_job_complete(job);
-    block_job_complete(job, errp);
+    job_complete(&job->job, errp);
    aio_context_release(aio_context);
 }

@@ -3908,21 +3959,23 @@ void qmp_block_job_finalize(const char *id, Error **errp)
    }

    trace_qmp_block_job_finalize(job);
-    block_job_finalize(job, errp);
+    job_finalize(&job->job, errp);
    aio_context_release(aio_context);
 }

 void qmp_block_job_dismiss(const char *id, Error **errp)
 {
    AioContext *aio_context;
-    BlockJob *job = find_block_job(id, &aio_context, errp);
+    BlockJob *bjob = find_block_job(id, &aio_context, errp);
+    Job *job;

-    if (!job) {
+    if (!bjob) {
        return;
    }

-    trace_qmp_block_job_dismiss(job);
-    block_job_dismiss(&job, errp);
+    trace_qmp_block_job_dismiss(bjob);
+    job = &bjob->job;
+    job_dismiss(&job, errp);
    aio_context_release(aio_context);
 }

@@ -4022,7 +4075,7 @@ void hmp_drive_add_node(Monitor *mon, const char *optstr)
    qdict = qemu_opts_to_qdict(opts, NULL);

    if (!qdict_get_try_str(qdict, "node-name")) {
-        QDECREF(qdict);
+        qobject_unref(qdict);
        error_report("'node-name' needs to be specified");
        goto out;
    }
--- a/blockjob.c
+++ b/blockjob.c
--- a/bsd-user/main.c
+++ b/bsd-user/main.c
@@ -898,9 +898,10 @@ int main(int argc, char **argv)
        cpu_model = "any";
 #endif
    }
+
+    /* init tcg before creating CPUs and to get qemu_host_page_size */
    tcg_exec_init(0);
-    /* NOTE: we need to init the CPU at this stage to get
-       qemu_host_page_size */
+
    cpu_type = parse_cpu_model(cpu_model);
    cpu = cpu_create(cpu_type);
    env = cpu->env_ptr;
@@ -917,7 +918,7 @@ int main(int argc, char **argv)
    envlist_free(envlist);

    /*
-     * Now that page sizes are configured in cpu_init() we can do
+     * Now that page sizes are configured in tcg_exec_init() we can do
     * proper page alignment for guest_base.
     */
    guest_base = HOST_PAGE_ALIGN(guest_base);
--- a/bsd-user/mmap.c
+++ b/bsd-user/mmap.c
@@ -21,6 +21,7 @@
 #include "qemu.h"
 #include "qemu-common.h"
 #include "bsd-mman.h"
+#include "exec/exec-all.h"

 //#define DEBUG_MMAP

--- a/bsd-user/qemu.h
+++ b/bsd-user/qemu.h
@@ -19,7 +19,6 @@


 #include "cpu.h"
-#include "exec/exec-all.h"
 #include "exec/cpu_ldst.h"

 #undef DEBUG_REMAP
--- a/chardev/char-mux.c
+++ b/chardev/char-mux.c
@@ -304,6 +304,7 @@ void mux_set_focus(Chardev *chr, int focus)
    }

    d->focus = focus;
+    chr->be = d->backends[focus];
    mux_chr_send_event(d, d->focus, CHR_EVENT_MUX_IN);
 }

--- a/chardev/char-serial.c
+++ b/chardev/char-serial.c
@@ -139,7 +139,7 @@ static void tty_serial_init(int fd, int speed,

    tty.c_iflag &= ~(IGNBRK | BRKINT | PARMRK | ISTRIP
                     | INLCR | IGNCR | ICRNL | IXON);
-    tty.c_oflag |= OPOST;
+    tty.c_oflag &= ~OPOST;
    tty.c_lflag &= ~(ECHO | ECHONL | ICANON | IEXTEN | ISIG);
    tty.c_cflag &= ~(CSIZE | PARENB | PARODD | CRTSCTS | CSTOPB);
    switch (data_bits) {
--- a/117
+++ b/117
@@ -60,6 +60,11 @@ do_compiler() {
    # is compiler binary to execute.
    local compiler="$1"
    shift
+    if test -n "$BASH_VERSION"; then eval '
+        echo >>config.log "
+funcs: ${FUNCNAME[*]}
+lines: ${BASH_LINENO[*]}"
+    '; fi
    echo $compiler "$@" >> config.log
    $compiler "$@" >> config.log 2>&1 || return $?
    # Test passed. If this is an --enable-werror build, rerun
@@ -451,6 +456,7 @@ jemalloc="no"
 replication="yes"
 vxhs=""
 libxml2=""
+docker="no"

 supported_cpu="no"
 supported_os="no"
@@ -959,6 +965,8 @@ for opt do
  ;;
  --firmwarepath=*) firmwarepath="$optarg"
  ;;
+  --host=*|--build=*|\
+  --disable-dependency-tracking|\
  --sbindir=*|--sharedstatedir=*|\
  --oldincludedir=*|--datarootdir=*|--infodir=*|--localedir=*|\
  --htmldir=*|--dvidir=*|--pdfdir=*|--psdir=*)
@@ -1581,7 +1589,7 @@ disabled with --disable-FEATURE, default is enabled if available:
  virtfs          VirtFS
  mpath           Multipath persistent reservation passthrough
  xen             xen backend driver support
-  xen-pci-passthrough
+  xen-pci-passthrough    PCI passthrough support for Xen
  brlapi          BrlAPI (Braile)
  curl            curl connectivity
  membarrier      membarrier system call (for Linux 4.14+ or Windows)
@@ -1643,8 +1651,8 @@ fi

 # Note that if the Python conditional here evaluates True we will exit
 # with status 1 which is a shell 'false' value.
-if ! $python -c 'import sys; sys.exit(sys.version_info < (2,6))'; then
-  error_exit "Cannot use '$python', Python 2 >= 2.6 or Python 3 is required." \
+if ! $python -c 'import sys; sys.exit(sys.version_info < (2,7))'; then
+  error_exit "Cannot use '$python', Python 2 >= 2.7 or Python 3 is required." \
      "Use --python=/path/to/python to specify a supported Python."
 fi

@@ -2189,6 +2197,9 @@ if test "$xen" != "no" ; then
    xen=yes
    xen_pc="xencontrol xenstore xenguest xenforeignmemory xengnttab"
    xen_pc="$xen_pc xenevtchn xendevicemodel"
+    if $pkg_config --exists xentoolcore; then
+      xen_pc="$xen_pc xentoolcore"
+    fi
    QEMU_CFLAGS="$QEMU_CFLAGS $($pkg_config --cflags $xen_pc)"
    libs_softmmu="$($pkg_config --libs $xen_pc) $libs_softmmu"
    LDFLAGS="$($pkg_config --libs $xen_pc) $LDFLAGS"
@@ -2218,20 +2229,46 @@ EOF
    # Xen unstable
    elif
        cat > $TMPC <<EOF &&
+#undef XC_WANT_COMPAT_DEVICEMODEL_API
+#define __XEN_TOOLS__
+#include <xendevicemodel.h>
+#include <xenforeignmemory.h>
+int main(void) {
+  xendevicemodel_handle *xd;
+  xenforeignmemory_handle *xfmem;
+
+  xd = xendevicemodel_open(0, 0);
+  xendevicemodel_pin_memory_cacheattr(xd, 0, 0, 0, 0);
+
+  xfmem = xenforeignmemory_open(0, 0);
+  xenforeignmemory_map_resource(xfmem, 0, 0, 0, 0, 0, NULL, 0, 0);
+
+  return 0;
+}
+EOF
+        compile_prog "" "$xen_libs -lxendevicemodel $xen_stable_libs -lxentoolcore"
+      then
+      xen_stable_libs="-lxendevicemodel $xen_stable_libs -lxentoolcore"
+      xen_ctrl_version=41100
+      xen=yes
+    elif
+        cat > $TMPC <<EOF &&
 #undef XC_WANT_COMPAT_MAP_FOREIGN_API
 #include <xenforeignmemory.h>
+#include <xentoolcore.h>
 int main(void) {
  xenforeignmemory_handle *xfmem;

  xfmem = xenforeignmemory_open(0, 0);
  xenforeignmemory_map2(xfmem, 0, 0, 0, 0, 0, 0, 0);
+  xentoolcore_restrict_all(0);

  return 0;
 }
 EOF
-        compile_prog "" "$xen_libs -lxendevicemodel $xen_stable_libs"
+        compile_prog "" "$xen_libs -lxendevicemodel $xen_stable_libs -lxentoolcore"
      then
-      xen_stable_libs="-lxendevicemodel $xen_stable_libs"
+      xen_stable_libs="-lxendevicemodel $xen_stable_libs -lxentoolcore"
      xen_ctrl_version=41000
      xen=yes
    elif
@@ -2493,20 +2530,7 @@ fi
 ##########################################
 # Windows Hypervisor Platform accelerator (WHPX) check
 if test "$whpx" != "no" ; then
-    cat > $TMPC << EOF
-#include <windows.h>
-#include <WinHvPlatform.h>
-#include <WinHvEmulation.h>
-int main(void) {
-    WHV_CAPABILITY whpx_cap;
-    UINT32 writtenSize;
-    WHvGetCapability(WHvCapabilityCodeFeatures, &whpx_cap, sizeof(whpx_cap),
-                     &writtenSize);
-    return 0;
-}
-EOF
-    if compile_prog "" "-lWinHvPlatform -lWinHvEmulation" ; then
-        libs_softmmu="$libs_softmmu -lWinHvPlatform -lWinHvEmulation"
+    if check_include "WinHvPlatform.h" && check_include "WinHvEmulation.h"; then
        whpx="yes"
    else
        if test "$whpx" = "yes"; then
@@ -3732,7 +3756,7 @@ fi
 fdt_required=no
 for target in $target_list; do
  case $target in
-    aarch64*-softmmu|arm*-softmmu|ppc*-softmmu|microblaze*-softmmu|mips64el-softmmu)
+    aarch64*-softmmu|arm*-softmmu|ppc*-softmmu|microblaze*-softmmu|mips64el-softmmu|riscv*-softmmu)
      fdt_required=yes
    ;;
  esac
@@ -3758,22 +3782,22 @@ int main(void) { fdt_first_subnode(0, 0); return 0; }
 EOF
  if compile_prog "" "$fdt_libs" ; then
    # system DTC is good - use it
-    fdt=yes
+    fdt=system
  else
      # have GIT checkout, so activate dtc submodule
      if test -e "${source_path}/.git" ; then
          git_submodules="${git_submodules} dtc"
      fi
      if test -d "${source_path}/dtc/libfdt" || test -e "${source_path}/.git" ; then
-          fdt=yes
-          dtc_internal="yes"
+          fdt=git
          mkdir -p dtc
          if [ "$pwd_is_source_path" != "y" ] ; then
              symlink "$source_path/dtc/Makefile" "dtc/Makefile"
              symlink "$source_path/dtc/scripts" "dtc/scripts"
          fi
          fdt_cflags="-I\$(SRC_PATH)/dtc/libfdt"
-          fdt_libs="-L\$(BUILD_DIR)/dtc/libfdt $fdt_libs"
+          fdt_ldflags="-L\$(BUILD_DIR)/dtc/libfdt"
+          fdt_libs="$fdt_libs"
      elif test "$fdt" = "yes" ; then
          # Not a git build & no libfdt found, prompt for system install
          error_exit "DTC (libfdt) version >= 1.4.2 not present." \
@@ -4449,7 +4473,7 @@ fi

 # check for smartcard support
 if test "$smartcard" != "no"; then
-    if $pkg_config libcacard; then
+    if $pkg_config --atleast-version=2.5.1 libcacard; then
        libcacard_cflags=$($pkg_config --cflags libcacard)
        libcacard_libs=$($pkg_config --libs libcacard)
        smartcard="yes"
@@ -5147,6 +5171,20 @@ if test "$fortify_source" != "no"; then
  fi
 fi

+###############################################
+# Check if copy_file_range is provided by glibc
+have_copy_file_range=no
+cat > $TMPC << EOF
+#include <unistd.h>
+int main(void) {
+  copy_file_range(0, NULL, 0, NULL, 0, 0);
+  return 0;
+}
+EOF
+if compile_prog "" "" ; then
+    have_copy_file_range=yes
+fi
+
 ##########################################
 # check if struct fsxattr is available via linux/fs.h

@@ -5413,6 +5451,17 @@ EOF
  fi
 fi

+##########################################
+# Docker and cross-compiler support
+#
+# This is specifically for building test
+# cases for foreign architectures, not
+# cross-compiling QEMU itself.
+
+if has "docker"; then
+    docker=$($python $source_path/tests/docker/docker.py probe)
+fi
+
 ##########################################
 # End of CC checks
 # After here, no more $cc or $ld runs
@@ -5715,6 +5764,7 @@ echo_version() {

 # prepend pixman and ftd flags after all config tests are done
 QEMU_CFLAGS="$pixman_cflags $fdt_cflags $QEMU_CFLAGS"
+QEMU_LDFLAGS="$fdt_ldflags $QEMU_LDFLAGS"
 libs_softmmu="$pixman_libs $libs_softmmu"

 echo "Install prefix    $prefix"
@@ -5745,6 +5795,7 @@ echo "ARFLAGS           $ARFLAGS"
 echo "CFLAGS            $CFLAGS"
 echo "QEMU_CFLAGS       $QEMU_CFLAGS"
 echo "LDFLAGS           $LDFLAGS"
+echo "QEMU_LDFLAGS      $QEMU_LDFLAGS"
 echo "make              $make"
 echo "install           $install"
 echo "python            $python"
@@ -5874,6 +5925,7 @@ echo "avx2 optimization $avx2_opt"
 echo "replication support $replication"
 echo "VxHS block device $vxhs"
 echo "capstone          $capstone"
+echo "docker            $docker"

 if test "$sdl_too_old" = "yes"; then
 echo "-> Your SDL version is too old - please upgrade to have SDL support"
@@ -6076,7 +6128,7 @@ qemu_version=$(head $source_path/VERSION)
 echo "VERSION=$qemu_version" >>$config_host_mak
 echo "PKGVERSION=$pkgversion" >>$config_host_mak
 echo "SRC_PATH=$source_path" >> $config_host_mak
-echo "TARGET_DIRS=$target_list" >> $config_host_mak
+echo "TARGET_LIST=$target_list" >> $config_host_mak
 if [ "$docs" = "yes" ] ; then
  echo "BUILD_DOCS=yes" >> $config_host_mak
 fi
@@ -6248,6 +6300,9 @@ fi
 if test "$have_fsxattr" = "yes" ; then
    echo "HAVE_FSXATTR=y" >> $config_host_mak
 fi
+if test "$have_copy_file_range" = "yes" ; then
+    echo "HAVE_COPY_FILE_RANGE=y" >> $config_host_mak
+fi
 if test "$vte" = "yes" ; then
  echo "CONFIG_VTE=y" >> $config_host_mak
  echo "VTE_CFLAGS=$vte_cflags" >> $config_host_mak
@@ -6304,7 +6359,7 @@ fi
 if test "$preadv" = "yes" ; then
  echo "CONFIG_PREADV=y" >> $config_host_mak
 fi
-if test "$fdt" = "yes" ; then
+if test "$fdt" != "no" ; then
  echo "CONFIG_FDT=y" >> $config_host_mak
 fi
 if test "$membarrier" = "yes" ; then
@@ -6679,6 +6734,7 @@ else
 fi
 echo "LDFLAGS=$LDFLAGS" >> $config_host_mak
 echo "LDFLAGS_NOPIE=$LDFLAGS_NOPIE" >> $config_host_mak
+echo "QEMU_LDFLAGS=$QEMU_LDFLAGS" >> $config_host_mak
 echo "LD_REL_FLAGS=$LD_REL_FLAGS" >> $config_host_mak
 echo "LD_I386_EMULATION=$ld_i386_emulation" >> $config_host_mak
 echo "LIBS+=$LIBS" >> $config_host_mak
@@ -6697,6 +6753,10 @@ if test "$gcov" = "yes" ; then
  echo "GCOV=$gcov_tool" >> $config_host_mak
 fi

+if test "$docker" != "no"; then
+    echo "HAVE_USER_DOCKER=y" >> $config_host_mak
+fi
+
 # use included Linux headers
 if test "$linux" = "yes" ; then
  mkdir -p linux-headers
@@ -6810,6 +6870,7 @@ case "$target_name" in
  microblaze|microblazeel)
    TARGET_ARCH=microblaze
    bflt="yes"
+    echo "TARGET_ABI32=y" >> $config_target_mak
  ;;
  mips|mipsel)
    TARGET_ARCH=mips
@@ -7105,7 +7166,7 @@ echo "QEMU_CFLAGS+=$cflags" >> $config_target_mak

 done # for target in $targets

-if [ "$dtc_internal" = "yes" ]; then
+if [ "$fdt" = "git" ]; then
  echo "config-host.h: subdir-dtc" >> $config_host_mak
 fi
 if [ "$capstone" = "git" -o "$capstone" = "internal" ]; then
--- a/contrib/libvhost-user/libvhost-user.c
+++ b/contrib/libvhost-user/libvhost-user.c
@@ -314,22 +314,19 @@ vu_message_write(VuDev *dev, int conn_fd, VhostUserMsg *vmsg)
        msg.msg_controllen = 0;
    }

-    /* Set the version in the flags when sending the reply */
-    vmsg->flags &= ~VHOST_USER_VERSION_MASK;
-    vmsg->flags |= VHOST_USER_VERSION;
-    vmsg->flags |= VHOST_USER_REPLY_MASK;
-
    do {
        rc = sendmsg(conn_fd, &msg, 0);
    } while (rc < 0 && (errno == EINTR || errno == EAGAIN));

-    do {
-        if (vmsg->data) {
-            rc = write(conn_fd, vmsg->data, vmsg->size);
-        } else {
-            rc = write(conn_fd, p + VHOST_USER_HDR_SIZE, vmsg->size);
-        }
-    } while (rc < 0 && (errno == EINTR || errno == EAGAIN));
+    if (vmsg->size) {
+        do {
+            if (vmsg->data) {
+                rc = write(conn_fd, vmsg->data, vmsg->size);
+            } else {
+                rc = write(conn_fd, p + VHOST_USER_HDR_SIZE, vmsg->size);
+            }
+        } while (rc < 0 && (errno == EINTR || errno == EAGAIN));
+    }

    if (rc <= 0) {
        vu_panic(dev, "Error while writing: %s", strerror(errno));
@@ -339,6 +336,39 @@ vu_message_write(VuDev *dev, int conn_fd, VhostUserMsg *vmsg)
    return true;
 }

+static bool
+vu_send_reply(VuDev *dev, int conn_fd, VhostUserMsg *vmsg)
+{
+    /* Set the version in the flags when sending the reply */
+    vmsg->flags &= ~VHOST_USER_VERSION_MASK;
+    vmsg->flags |= VHOST_USER_VERSION;
+    vmsg->flags |= VHOST_USER_REPLY_MASK;
+
+    return vu_message_write(dev, conn_fd, vmsg);
+}
+
+static bool
+vu_process_message_reply(VuDev *dev, const VhostUserMsg *vmsg)
+{
+    VhostUserMsg msg_reply;
+
+    if ((vmsg->flags & VHOST_USER_NEED_REPLY_MASK) == 0) {
+        return true;
+    }
+
+    if (!vu_message_read(dev, dev->slave_fd, &msg_reply)) {
+        return false;
+    }
+
+    if (msg_reply.request != vmsg->request) {
+        DPRINT("Received unexpected msg type. Expected %d received %d",
+               vmsg->request, msg_reply.request);
+        return false;
+    }
+
+    return msg_reply.payload.u64 == 0;
+}
+
 /* Kick the log_call_fd if required. */
 static void
 vu_log_kick(VuDev *dev)
@@ -534,7 +564,7 @@ vu_set_mem_table_exec_postcopy(VuDev *dev, VhostUserMsg *vmsg)

    /* Send the message back to qemu with the addresses filled in */
    vmsg->fd_num = 0;
-    if (!vu_message_write(dev, dev->sock, vmsg)) {
+    if (!vu_send_reply(dev, dev->sock, vmsg)) {
        vu_panic(dev, "failed to respond to set-mem-table for postcopy");
        return false;
    }
@@ -914,6 +944,41 @@ void vu_set_queue_handler(VuDev *dev, VuVirtq *vq,
    }
 }

+bool vu_set_queue_host_notifier(VuDev *dev, VuVirtq *vq, int fd,
+                                int size, int offset)
+{
+    int qidx = vq - dev->vq;
+    int fd_num = 0;
+    VhostUserMsg vmsg = {
+        .request = VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG,
+        .flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK,
+        .size = sizeof(vmsg.payload.area),
+        .payload.area = {
+            .u64 = qidx & VHOST_USER_VRING_IDX_MASK,
+            .size = size,
+            .offset = offset,
+        },
+    };
+
+    if (fd == -1) {
+        vmsg.payload.area.u64 |= VHOST_USER_VRING_NOFD_MASK;
+    } else {
+        vmsg.fds[fd_num++] = fd;
+    }
+
+    vmsg.fd_num = fd_num;
+
+    if ((dev->protocol_features & VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD) == 0) {
+        return false;
+    }
+
+    if (!vu_message_write(dev, dev->slave_fd, &vmsg)) {
+        return false;
+    }
+
+    return vu_process_message_reply(dev, &vmsg);
+}
+
 static bool
 vu_set_vring_call_exec(VuDev *dev, VhostUserMsg *vmsg)
 {
@@ -966,7 +1031,9 @@ static bool
 vu_get_protocol_features_exec(VuDev *dev, VhostUserMsg *vmsg)
 {
    uint64_t features = 1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD |
-                        1ULL << VHOST_USER_PROTOCOL_F_SLAVE_REQ;
+                        1ULL << VHOST_USER_PROTOCOL_F_SLAVE_REQ |
+                        1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER |
+                        1ULL << VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD;

    if (have_userfault()) {
        features |= 1ULL << VHOST_USER_PROTOCOL_F_PAGEFAULT;
@@ -1250,7 +1317,7 @@ vu_dispatch(VuDev *dev)
        goto end;
    }

-    if (!vu_message_write(dev, dev->sock, &vmsg)) {
+    if (!vu_send_reply(dev, dev->sock, &vmsg)) {
        goto end;
    }

--- a/contrib/libvhost-user/libvhost-user.h
+++ b/contrib/libvhost-user/libvhost-user.h
@@ -51,6 +51,8 @@ enum VhostUserProtocolFeature {
    VHOST_USER_PROTOCOL_F_CRYPTO_SESSION = 7,
    VHOST_USER_PROTOCOL_F_PAGEFAULT = 8,
    VHOST_USER_PROTOCOL_F_CONFIG = 9,
+    VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD = 10,
+    VHOST_USER_PROTOCOL_F_HOST_NOTIFIER = 11,

    VHOST_USER_PROTOCOL_F_MAX
 };
@@ -92,6 +94,14 @@ typedef enum VhostUserRequest {
    VHOST_USER_MAX
 } VhostUserRequest;

+typedef enum VhostUserSlaveRequest {
+    VHOST_USER_SLAVE_NONE = 0,
+    VHOST_USER_SLAVE_IOTLB_MSG = 1,
+    VHOST_USER_SLAVE_CONFIG_CHANGE_MSG = 2,
+    VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG = 3,
+    VHOST_USER_SLAVE_MAX
+}  VhostUserSlaveRequest;
+
 typedef struct VhostUserMemoryRegion {
    uint64_t guest_phys_addr;
    uint64_t memory_size;
@@ -122,6 +132,12 @@ static VhostUserConfig c __attribute__ ((unused));
                                   + sizeof(c.size) \
                                   + sizeof(c.flags))

+typedef struct VhostUserVringArea {
+    uint64_t u64;
+    uint64_t size;
+    uint64_t offset;
+} VhostUserVringArea;
+
 #if defined(_WIN32)
 # define VU_PACKED __attribute__((gcc_struct, packed))
 #else
@@ -133,6 +149,7 @@ typedef struct VhostUserMsg {

 #define VHOST_USER_VERSION_MASK     (0x3)
 #define VHOST_USER_REPLY_MASK       (0x1 << 2)
+#define VHOST_USER_NEED_REPLY_MASK  (0x1 << 3)
    uint32_t flags;
    uint32_t size; /* the following payload size */

@@ -145,6 +162,7 @@ typedef struct VhostUserMsg {
        VhostUserMemory memory;
        VhostUserLog log;
        VhostUserConfig config;
+        VhostUserVringArea area;
    } payload;

    int fds[VHOST_MEMORY_MAX_NREGIONS];
@@ -368,6 +386,20 @@ VuVirtq *vu_get_queue(VuDev *dev, int qidx);
 void vu_set_queue_handler(VuDev *dev, VuVirtq *vq,
                          vu_queue_handler_cb handler);

+/**
+ * vu_set_queue_host_notifier:
+ * @dev: a VuDev context
+ * @vq: a VuVirtq queue
+ * @fd: a file descriptor
+ * @size: host page size
+ * @offset: notifier offset in @fd file
+ *
+ * Set queue's host notifier. This function may be called several
+ * times for the same queue. If called with -1 @fd, the notifier
+ * is removed.
+ */
+bool vu_set_queue_host_notifier(VuDev *dev, VuVirtq *vq, int fd,
+                                int size, int offset);

 /**
 * vu_queue_set_notification:
--- a/contrib/vhost-user-blk/vhost-user-blk.c
+++ b/contrib/vhost-user-blk/vhost-user-blk.c
@@ -31,6 +31,7 @@ typedef struct VubDev {
    VugDev parent;
    int blk_fd;
    struct virtio_blk_config blkcfg;
+    bool enable_ro;
    char *blk_name;
    GMainLoop *loop;
 } VubDev;
@@ -301,14 +302,33 @@ static void vub_queue_set_started(VuDev *vu_dev, int idx, bool started)
 static uint64_t
 vub_get_features(VuDev *dev)
 {
-    return 1ull << VIRTIO_BLK_F_SIZE_MAX |
-           1ull << VIRTIO_BLK_F_SEG_MAX |
-           1ull << VIRTIO_BLK_F_TOPOLOGY |
-           1ull << VIRTIO_BLK_F_BLK_SIZE |
-           1ull << VIRTIO_BLK_F_FLUSH |
-           1ull << VIRTIO_BLK_F_CONFIG_WCE |
-           1ull << VIRTIO_F_VERSION_1 |
-           1ull << VHOST_USER_F_PROTOCOL_FEATURES;
+    uint64_t features;
+    VugDev *gdev;
+    VubDev *vdev_blk;
+
+    gdev = container_of(dev, VugDev, parent);
+    vdev_blk = container_of(gdev, VubDev, parent);
+
+    features = 1ull << VIRTIO_BLK_F_SIZE_MAX |
+               1ull << VIRTIO_BLK_F_SEG_MAX |
+               1ull << VIRTIO_BLK_F_TOPOLOGY |
+               1ull << VIRTIO_BLK_F_BLK_SIZE |
+               1ull << VIRTIO_BLK_F_FLUSH |
+               1ull << VIRTIO_BLK_F_CONFIG_WCE |
+               1ull << VIRTIO_F_VERSION_1 |
+               1ull << VHOST_USER_F_PROTOCOL_FEATURES;
+
+    if (vdev_blk->enable_ro) {
+        features |= 1ull << VIRTIO_BLK_F_RO;
+    }
+
+    return features;
+}
+
+static uint64_t
+vub_get_protocol_features(VuDev *dev)
+{
+    return 1ull << VHOST_USER_PROTOCOL_F_CONFIG;
 }

 static int
@@ -373,6 +393,7 @@ vub_set_config(VuDev *vu_dev, const uint8_t *data,
 static const VuDevIface vub_iface = {
    .get_features = vub_get_features,
    .queue_set_started = vub_queue_set_started,
+    .get_protocol_features = vub_get_protocol_features,
    .get_config = vub_get_config,
    .set_config = vub_set_config,
 };
@@ -469,6 +490,7 @@ vub_new(char *blk_file)
        vub_free(vdev_blk);
        return NULL;
    }
+    vdev_blk->enable_ro = false;
    vdev_blk->blkcfg.wce = 0;
    vdev_blk->blk_name = blk_file;

@@ -483,10 +505,11 @@ int main(int argc, char **argv)
    int opt;
    char *unix_socket = NULL;
    char *blk_file = NULL;
+    bool enable_ro = false;
    int lsock = -1, csock = -1;
    VubDev *vdev_blk = NULL;

-    while ((opt = getopt(argc, argv, "b:s:h")) != -1) {
+    while ((opt = getopt(argc, argv, "b:rs:h")) != -1) {
        switch (opt) {
        case 'b':
            blk_file = g_strdup(optarg);
@@ -494,17 +517,20 @@ int main(int argc, char **argv)
        case 's':
            unix_socket = g_strdup(optarg);
            break;
+        case 'r':
+            enable_ro = true;
+            break;
        case 'h':
        default:
-            printf("Usage: %s [-b block device or file, -s UNIX domain socket]"
-                   " | [ -h ]\n", argv[0]);
+            printf("Usage: %s [ -b block device or file, -s UNIX domain socket"
+                   " | -r Enable read-only ] | [ -h ]\n", argv[0]);
            return 0;
        }
    }

    if (!unix_socket || !blk_file) {
-        printf("Usage: %s [-b block device or file, -s UNIX domain socket] |"
-               " [ -h ]\n", argv[0]);
+        printf("Usage: %s [ -b block device or file, -s UNIX domain socket"
+               " | -r Enable read-only ] | [ -h ]\n", argv[0]);
        return -1;
    }

@@ -523,6 +549,9 @@ int main(int argc, char **argv)
    if (!vdev_blk) {
        goto err;
    }
+    if (enable_ro) {
+        vdev_blk->enable_ro = true;
+    }

    vug_init(&vdev_blk->parent, csock, vub_panic_cb, &vub_iface);

--- a/cpus.c
+++ b/cpus.c
@@ -1648,7 +1648,7 @@ static void *qemu_tcg_cpu_thread_fn(void *arg)
    /* process any pending work */
    cpu->exit_request = 1;

-    while (1) {
+    do {
        if (cpu_can_run(cpu)) {
            int r;
            qemu_mutex_unlock_iothread();
@@ -2043,7 +2043,6 @@ int vm_stop(RunState state)
 int vm_prepare_start(void)
 {
    RunState requested;
-    int res = 0;

    qemu_vmstop_requested(&requested);
    if (runstate_is_running() && requested == RUN_STATE__MAX) {
@@ -2057,17 +2056,18 @@ int vm_prepare_start(void)
     */
    if (runstate_is_running()) {
        qapi_event_send_stop(&error_abort);
-        res = -1;
-    } else {
-        replay_enable_events();
-        cpu_enable_ticks();
-        runstate_set(RUN_STATE_RUNNING);
-        vm_state_notify(1, RUN_STATE_RUNNING);
+        qapi_event_send_resume(&error_abort);
+        return -1;
    }

    /* We are sending this now, but the CPUs will be resumed shortly later */
    qapi_event_send_resume(&error_abort);
-    return res;
+
+    replay_enable_events();
+    cpu_enable_ticks();
+    runstate_set(RUN_STATE_RUNNING);
+    vm_state_notify(1, RUN_STATE_RUNNING);
+    return 0;
 }

 void vm_start(void)
@@ -2187,6 +2187,59 @@ CpuInfoList *qmp_query_cpus(Error **errp)
    return head;
 }

+static CpuInfoArch sysemu_target_to_cpuinfo_arch(SysEmuTarget target)
+{
+    /*
+     * The @SysEmuTarget -> @CpuInfoArch mapping below is based on the
+     * TARGET_ARCH -> TARGET_BASE_ARCH mapping in the "configure" script.
+     */
+    switch (target) {
+    case SYS_EMU_TARGET_I386:
+    case SYS_EMU_TARGET_X86_64:
+        return CPU_INFO_ARCH_X86;
+
+    case SYS_EMU_TARGET_PPC:
+    case SYS_EMU_TARGET_PPCEMB:
+    case SYS_EMU_TARGET_PPC64:
+        return CPU_INFO_ARCH_PPC;
+
+    case SYS_EMU_TARGET_SPARC:
+    case SYS_EMU_TARGET_SPARC64:
+        return CPU_INFO_ARCH_SPARC;
+
+    case SYS_EMU_TARGET_MIPS:
+    case SYS_EMU_TARGET_MIPSEL:
+    case SYS_EMU_TARGET_MIPS64:
+    case SYS_EMU_TARGET_MIPS64EL:
+        return CPU_INFO_ARCH_MIPS;
+
+    case SYS_EMU_TARGET_TRICORE:
+        return CPU_INFO_ARCH_TRICORE;
+
+    case SYS_EMU_TARGET_S390X:
+        return CPU_INFO_ARCH_S390;
+
+    case SYS_EMU_TARGET_RISCV32:
+    case SYS_EMU_TARGET_RISCV64:
+        return CPU_INFO_ARCH_RISCV;
+
+    default:
+        return CPU_INFO_ARCH_OTHER;
+    }
+}
+
+static void cpustate_to_cpuinfo_s390(CpuInfoS390 *info, const CPUState *cpu)
+{
+#ifdef TARGET_S390X
+    S390CPU *s390_cpu = S390_CPU(cpu);
+    CPUS390XState *env = &s390_cpu->env;
+
+    info->cpu_state = env->cpu_state;
+#else
+    abort();
+#endif
+}
+
 /*
 * fast means: we NEVER interrupt vCPU threads to retrieve
 * information from KVM.
@@ -2196,11 +2249,9 @@ CpuInfoFastList *qmp_query_cpus_fast(Error **errp)
    MachineState *ms = MACHINE(qdev_get_machine());
    MachineClass *mc = MACHINE_GET_CLASS(ms);
    CpuInfoFastList *head = NULL, *cur_item = NULL;
+    SysEmuTarget target = qapi_enum_parse(&SysEmuTarget_lookup, TARGET_NAME,
+                                          -1, &error_abort);
    CPUState *cpu;
-#if defined(TARGET_S390X)
-    S390CPU *s390_cpu;
-    CPUS390XState *env;
-#endif

    CPU_FOREACH(cpu) {
        CpuInfoFastList *info = g_malloc0(sizeof(*info));
@@ -2218,12 +2269,14 @@ CpuInfoFastList *qmp_query_cpus_fast(Error **errp)
            info->value->props = props;
        }

-#if defined(TARGET_S390X)
-        s390_cpu = S390_CPU(cpu);
-        env = &s390_cpu->env;
-        info->value->arch = CPU_INFO_ARCH_S390;
-        info->value->u.s390.cpu_state = env->cpu_state;
-#endif
+        info->value->arch = sysemu_target_to_cpuinfo_arch(target);
+        info->value->target = target;
+        if (target == SYS_EMU_TARGET_S390X) {
+            cpustate_to_cpuinfo_s390(&info->value->u.s390x, cpu);
+        } else {
+            /* do nothing for @CpuInfoOther */
+        }
+
        if (!cur_item) {
            head = cur_item = info;
        } else {
--- a/crypto/block-luks.c
+++ b/crypto/block-luks.c
@@ -22,7 +22,7 @@
 #include "qapi/error.h"
 #include "qemu/bswap.h"

-#include "crypto/block-luks.h"
+#include "block-luks.h"

 #include "crypto/hash.h"
 #include "crypto/afsplit.h"
--- a/crypto/block-luks.h
+++ b/crypto/block-luks.h
@@ -21,7 +21,7 @@
 #ifndef QCRYPTO_BLOCK_LUKS_H
 #define QCRYPTO_BLOCK_LUKS_H

-#include "crypto/blockpriv.h"
+#include "blockpriv.h"

 extern const QCryptoBlockDriver qcrypto_block_driver_luks;

--- a/crypto/block-qcow.c
+++ b/crypto/block-qcow.c
@@ -27,7 +27,7 @@
 #include "qemu/osdep.h"
 #include "qapi/error.h"

-#include "crypto/block-qcow.h"
+#include "block-qcow.h"
 #include "crypto/secret.h"

 #define QCRYPTO_BLOCK_QCOW_SECTOR_SIZE 512
--- a/crypto/block-qcow.h
+++ b/crypto/block-qcow.h
@@ -21,7 +21,7 @@
 #ifndef QCRYPTO_BLOCK_QCOW_H
 #define QCRYPTO_BLOCK_QCOW_H

-#include "crypto/blockpriv.h"
+#include "blockpriv.h"

 extern const QCryptoBlockDriver qcrypto_block_driver_qcow;

--- a/crypto/block.c
+++ b/crypto/block.c
@@ -20,9 +20,9 @@

 #include "qemu/osdep.h"
 #include "qapi/error.h"
-#include "crypto/blockpriv.h"
-#include "crypto/block-qcow.h"
-#include "crypto/block-luks.h"
+#include "blockpriv.h"
+#include "block-qcow.h"
+#include "block-luks.h"

 static const QCryptoBlockDriver *qcrypto_block_drivers[] = {
    [Q_CRYPTO_BLOCK_FORMAT_QCOW] = &qcrypto_block_driver_qcow,
--- a/crypto/cipher.c
+++ b/crypto/cipher.c
@@ -150,11 +150,11 @@ qcrypto_cipher_munge_des_rfb_key(const uint8_t *key,
 #endif /* CONFIG_GCRYPT || CONFIG_NETTLE */

 #ifdef CONFIG_GCRYPT
-#include "crypto/cipher-gcrypt.c"
+#include "cipher-gcrypt.c"
 #elif defined CONFIG_NETTLE
-#include "crypto/cipher-nettle.c"
+#include "cipher-nettle.c"
 #else
-#include "crypto/cipher-builtin.c"
+#include "cipher-builtin.c"
 #endif

 QCryptoCipher *qcrypto_cipher_new(QCryptoCipherAlgorithm alg,
--- a/crypto/ivgen-essiv.c
+++ b/crypto/ivgen-essiv.c
@@ -20,7 +20,7 @@

 #include "qemu/osdep.h"
 #include "qemu/bswap.h"
-#include "crypto/ivgen-essiv.h"
+#include "ivgen-essiv.h"

 typedef struct QCryptoIVGenESSIV QCryptoIVGenESSIV;
 struct QCryptoIVGenESSIV {
--- a/crypto/ivgen-essiv.h
+++ b/crypto/ivgen-essiv.h
@@ -18,7 +18,7 @@
 *
 */

-#include "crypto/ivgenpriv.h"
+#include "ivgenpriv.h"

 #ifndef QCRYPTO_IVGEN_ESSIV_H__
 #define QCRYPTO_IVGEN_ESSIV_H__
--- a/crypto/ivgen-plain.c
+++ b/crypto/ivgen-plain.c
@@ -20,7 +20,7 @@

 #include "qemu/osdep.h"
 #include "qemu/bswap.h"
-#include "crypto/ivgen-plain.h"
+#include "ivgen-plain.h"

 static int qcrypto_ivgen_plain_init(QCryptoIVGen *ivgen,
                                    const uint8_t *key, size_t nkey,
--- a/crypto/ivgen-plain.h
+++ b/crypto/ivgen-plain.h
@@ -18,7 +18,7 @@
 *
 */

-#include "crypto/ivgenpriv.h"
+#include "ivgenpriv.h"

 #ifndef QCRYPTO_IVGEN_PLAIN_H__
 #define QCRYPTO_IVGEN_PLAIN_H__
--- a/crypto/ivgen-plain64.c
+++ b/crypto/ivgen-plain64.c
@@ -20,7 +20,7 @@

 #include "qemu/osdep.h"
 #include "qemu/bswap.h"
-#include "crypto/ivgen-plain.h"
+#include "ivgen-plain.h"

 static int qcrypto_ivgen_plain_init(QCryptoIVGen *ivgen,
                                    const uint8_t *key, size_t nkey,
--- a/crypto/ivgen-plain64.h
+++ b/crypto/ivgen-plain64.h
@@ -18,7 +18,7 @@
 *
 */

-#include "crypto/ivgenpriv.h"
+#include "ivgenpriv.h"

 #ifndef QCRYPTO_IVGEN_PLAIN64_H__
 #define QCRYPTO_IVGEN_PLAIN64_H__
--- a/crypto/ivgen.c
+++ b/crypto/ivgen.c
@@ -21,10 +21,10 @@
 #include "qemu/osdep.h"
 #include "qapi/error.h"

-#include "crypto/ivgenpriv.h"
-#include "crypto/ivgen-plain.h"
-#include "crypto/ivgen-plain64.h"
-#include "crypto/ivgen-essiv.h"
+#include "ivgenpriv.h"
+#include "ivgen-plain.h"
+#include "ivgen-plain64.h"
+#include "ivgen-essiv.h"


 QCryptoIVGen *qcrypto_ivgen_new(QCryptoIVGenAlgorithm alg,
--- a/crypto/tlscreds.c
+++ b/crypto/tlscreds.c
@@ -20,7 +20,7 @@

 #include "qemu/osdep.h"
 #include "qapi/error.h"
-#include "crypto/tlscredspriv.h"
+#include "tlscredspriv.h"
 #include "trace.h"

 #define DH_BITS 2048
--- a/crypto/tlscredsanon.c
+++ b/crypto/tlscredsanon.c
@@ -20,7 +20,7 @@

 #include "qemu/osdep.h"
 #include "crypto/tlscredsanon.h"
-#include "crypto/tlscredspriv.h"
+#include "tlscredspriv.h"
 #include "qapi/error.h"
 #include "qom/object_interfaces.h"
 #include "trace.h"
--- a/crypto/tlscredsx509.c
+++ b/crypto/tlscredsx509.c
@@ -20,7 +20,7 @@

 #include "qemu/osdep.h"
 #include "crypto/tlscredsx509.h"
-#include "crypto/tlscredspriv.h"
+#include "tlscredspriv.h"
 #include "crypto/secret.h"
 #include "qapi/error.h"
 #include "qom/object_interfaces.h"
--- a/default-configs/aarch64-softmmu.mak
+++ b/default-configs/aarch64-softmmu.mak
@@ -8,3 +8,4 @@ CONFIG_DDC=y
 CONFIG_DPCD=y
 CONFIG_XLNX_ZYNQMP=y
 CONFIG_XLNX_ZYNQMP_ARM=y
+CONFIG_ARM_SMMUV3=y
--- a/Show More
+++ b/Show More
@@ -1 +1 @@
 .11.94
 .12.50